From beffd6132234f55eeed8509e88ef32128fa6028d Mon Sep 17 00:00:00 2001 From: Koen Vossen Date: Mon, 2 Feb 2026 13:10:56 +0100 Subject: [PATCH 01/15] Test against mysql + fix for mysql --- .github/workflows/test.yml | 32 ++++++++++++- .../store/dataset/sqlalchemy/repository.py | 46 ++++++++++++++----- ingestify/tests/config.yaml | 5 +- ingestify/tests/conftest.py | 8 ++++ 4 files changed, 76 insertions(+), 15 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 4d1ddb5..d506104 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -15,6 +15,25 @@ jobs: matrix: os: [ubuntu-latest, macos-latest] python-version: [3.9, "3.10", "3.11", "3.12"] + database: [sqlite, mysql] + exclude: + # MySQL tests only run on ubuntu-latest to reduce CI time + - os: macos-latest + database: mysql + + services: + mysql: + image: ${{ matrix.database == 'mysql' && 'mysql:8.0' || '' }} + env: + MYSQL_ROOT_PASSWORD: root + MYSQL_DATABASE: ingestify_test + ports: + - 3306:3306 + options: >- + --health-cmd="mysqladmin ping" + --health-interval=10s + --health-timeout=5s + --health-retries=3 steps: - uses: actions/checkout@v4 @@ -26,10 +45,21 @@ jobs: run: | python -m pip install --upgrade pip pip install -e ".[test]" + - name: Install MySQL dependencies + if: matrix.database == 'mysql' + run: | + pip install mysqlclient - name: Code formatting run: | pip install black==22.3.0 black --check . - - name: Test with pytest + - name: Test with pytest (SQLite) + if: matrix.database == 'sqlite' + run: | + pytest --color=yes + - name: Test with pytest (MySQL) + if: matrix.database == 'mysql' + env: + INGESTIFY_TEST_DATABASE_URL: mysql://root:root@127.0.0.1:3306/ingestify_test run: | pytest --color=yes diff --git a/ingestify/infra/store/dataset/sqlalchemy/repository.py b/ingestify/infra/store/dataset/sqlalchemy/repository.py index 51d7551..b14b1d4 100644 --- a/ingestify/infra/store/dataset/sqlalchemy/repository.py +++ b/ingestify/infra/store/dataset/sqlalchemy/repository.py @@ -208,18 +208,39 @@ def _upsert( primary_key_columns = [column for column in table.columns if column.primary_key] - if immutable_rows: - stmt = stmt.on_conflict_do_nothing(index_elements=primary_key_columns) + if dialect == "mysql": + # MySQL uses ON DUPLICATE KEY UPDATE syntax + if immutable_rows: + # For MySQL, we simulate do_nothing by updating with the same values + # This prevents errors but doesn't actually change anything + set_ = { + name: table.c[name] + for name, column in table.columns.items() + if column not in primary_key_columns + } + stmt = stmt.on_duplicate_key_update(set_) + else: + # MySQL uses stmt.inserted instead of stmt.excluded + set_ = { + name: stmt.inserted[name] + for name, column in table.columns.items() + if column not in primary_key_columns + } + stmt = stmt.on_duplicate_key_update(set_) else: - set_ = { - name: getattr(stmt.excluded, name) - for name, column in table.columns.items() - if column not in primary_key_columns - } - - stmt = stmt.on_conflict_do_update( - index_elements=primary_key_columns, set_=set_ - ) + # PostgreSQL and SQLite use ON CONFLICT syntax + if immutable_rows: + stmt = stmt.on_conflict_do_nothing(index_elements=primary_key_columns) + else: + set_ = { + name: getattr(stmt.excluded, name) + for name, column in table.columns.items() + if column not in primary_key_columns + } + + stmt = stmt.on_conflict_do_update( + index_elements=primary_key_columns, set_=set_ + ) connection.execute(stmt) @@ -242,7 +263,8 @@ def _build_cte_sqlite(self, records, name: str) -> CTE: def _build_cte(self, records: list[dict], name: str) -> CTE: """Build a CTE from a list of dictionaries.""" - if self.dialect.name == "sqlite": + if self.dialect.name in ("sqlite", "mysql"): + # SQLite and MySQL don't support VALUES syntax, use UNION ALL instead return self._build_cte_sqlite(records, name) first_row = records[0] diff --git a/ingestify/tests/config.yaml b/ingestify/tests/config.yaml index 175032b..0b12337 100644 --- a/ingestify/tests/config.yaml +++ b/ingestify/tests/config.yaml @@ -1,6 +1,7 @@ main: - # Cannot use in memory data because database is shared between processes - metadata_url: !ENV "sqlite:///${TEST_DIR}/main.db" + # Database URL can be overridden via INGESTIFY_TEST_DATABASE_URL environment variable + # Defaults to SQLite if not set + metadata_url: !ENV ${INGESTIFY_TEST_DATABASE_URL} file_url: !ENV file://${TEST_DIR}/data default_bucket: main diff --git a/ingestify/tests/conftest.py b/ingestify/tests/conftest.py index 33faeae..cc1a31e 100644 --- a/ingestify/tests/conftest.py +++ b/ingestify/tests/conftest.py @@ -9,6 +9,14 @@ def datastore_dir(): with tempfile.TemporaryDirectory() as tmpdirname: os.environ["TEST_DIR"] = tmpdirname os.environ["INGESTIFY_RUN_EAGER"] = "true" + + # Allow database URL to be overridden via environment variable + # If INGESTIFY_TEST_DATABASE_URL is not set, use SQLite by default + if "INGESTIFY_TEST_DATABASE_URL" not in os.environ: + os.environ[ + "INGESTIFY_TEST_DATABASE_URL" + ] = f"sqlite:///{tmpdirname}/main.db" + yield tmpdirname From 3f7622e96702bebeedd57c00b78511bf34c4e879 Mon Sep 17 00:00:00 2001 From: Koen Vossen Date: Mon, 2 Feb 2026 13:37:24 +0100 Subject: [PATCH 02/15] Split CI workflow into separate SQLite and MySQL jobs - Avoid conditional service issues by having dedicated jobs - test-sqlite: runs on Ubuntu and macOS - test-mysql: runs on Ubuntu only with MySQL 8.0 service - Cleaner separation and no empty service image issues --- .github/workflows/test.yml | 41 +++++++++++++++++++++++++------------- 1 file changed, 27 insertions(+), 14 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index d506104..a83f11e 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -9,21 +9,40 @@ on: - main jobs: - build: + test-sqlite: runs-on: ${{ matrix.os }} strategy: matrix: os: [ubuntu-latest, macos-latest] python-version: [3.9, "3.10", "3.11", "3.12"] - database: [sqlite, mysql] - exclude: - # MySQL tests only run on ubuntu-latest to reduce CI time - - os: macos-latest - database: mysql + + steps: + - uses: actions/checkout@v4 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install -e ".[test]" + - name: Code formatting + run: | + pip install black==22.3.0 + black --check . + - name: Test with pytest + run: | + pytest --color=yes + + test-mysql: + runs-on: ubuntu-latest + strategy: + matrix: + python-version: [3.9, "3.10", "3.11", "3.12"] services: mysql: - image: ${{ matrix.database == 'mysql' && 'mysql:8.0' || '' }} + image: mysql:8.0 env: MYSQL_ROOT_PASSWORD: root MYSQL_DATABASE: ingestify_test @@ -46,19 +65,13 @@ jobs: python -m pip install --upgrade pip pip install -e ".[test]" - name: Install MySQL dependencies - if: matrix.database == 'mysql' run: | pip install mysqlclient - name: Code formatting run: | pip install black==22.3.0 black --check . - - name: Test with pytest (SQLite) - if: matrix.database == 'sqlite' - run: | - pytest --color=yes - - name: Test with pytest (MySQL) - if: matrix.database == 'mysql' + - name: Test with pytest env: INGESTIFY_TEST_DATABASE_URL: mysql://root:root@127.0.0.1:3306/ingestify_test run: | From 62d831c560fdfa1273dc35e133c18135525b307a Mon Sep 17 00:00:00 2001 From: Koen Vossen Date: Mon, 2 Feb 2026 13:46:57 +0100 Subject: [PATCH 03/15] Remove default database URL logic from conftest - Config now uses fallback syntax for INGESTIFY_TEST_DATABASE_URL - SQLite tests rely on TEST_DIR (no env var needed) - MySQL tests set INGESTIFY_TEST_DATABASE_URL explicitly - Simpler and more explicit configuration --- ingestify/tests/config.yaml | 6 +++--- ingestify/tests/conftest.py | 8 -------- 2 files changed, 3 insertions(+), 11 deletions(-) diff --git a/ingestify/tests/config.yaml b/ingestify/tests/config.yaml index 0b12337..3422df4 100644 --- a/ingestify/tests/config.yaml +++ b/ingestify/tests/config.yaml @@ -1,7 +1,7 @@ main: - # Database URL can be overridden via INGESTIFY_TEST_DATABASE_URL environment variable - # Defaults to SQLite if not set - metadata_url: !ENV ${INGESTIFY_TEST_DATABASE_URL} + # For MySQL tests, INGESTIFY_TEST_DATABASE_URL will be set + # For SQLite tests, falls back to using TEST_DIR + metadata_url: !ENV ${INGESTIFY_TEST_DATABASE_URL:-sqlite:///${TEST_DIR}/main.db} file_url: !ENV file://${TEST_DIR}/data default_bucket: main diff --git a/ingestify/tests/conftest.py b/ingestify/tests/conftest.py index cc1a31e..33faeae 100644 --- a/ingestify/tests/conftest.py +++ b/ingestify/tests/conftest.py @@ -9,14 +9,6 @@ def datastore_dir(): with tempfile.TemporaryDirectory() as tmpdirname: os.environ["TEST_DIR"] = tmpdirname os.environ["INGESTIFY_RUN_EAGER"] = "true" - - # Allow database URL to be overridden via environment variable - # If INGESTIFY_TEST_DATABASE_URL is not set, use SQLite by default - if "INGESTIFY_TEST_DATABASE_URL" not in os.environ: - os.environ[ - "INGESTIFY_TEST_DATABASE_URL" - ] = f"sqlite:///{tmpdirname}/main.db" - yield tmpdirname From 358773c395659950d20a7f0f311195ff570c2f50 Mon Sep 17 00:00:00 2001 From: Koen Vossen Date: Tue, 3 Feb 2026 09:35:11 +0100 Subject: [PATCH 04/15] WIP --- ingestify/tests/config.yaml | 2 +- ingestify/tests/conftest.py | 12 ++++++++++-- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/ingestify/tests/config.yaml b/ingestify/tests/config.yaml index 3422df4..8d368ec 100644 --- a/ingestify/tests/config.yaml +++ b/ingestify/tests/config.yaml @@ -1,7 +1,7 @@ main: # For MySQL tests, INGESTIFY_TEST_DATABASE_URL will be set # For SQLite tests, falls back to using TEST_DIR - metadata_url: !ENV ${INGESTIFY_TEST_DATABASE_URL:-sqlite:///${TEST_DIR}/main.db} + metadata_url: !ENV ${INGESTIFY_TEST_DATABASE_URL} file_url: !ENV file://${TEST_DIR}/data default_bucket: main diff --git a/ingestify/tests/conftest.py b/ingestify/tests/conftest.py index 33faeae..a9351b2 100644 --- a/ingestify/tests/conftest.py +++ b/ingestify/tests/conftest.py @@ -12,6 +12,14 @@ def datastore_dir(): yield tmpdirname -@pytest.fixture(scope="session") -def config_file(): +@pytest.fixture(scope="function", autouse=True) +def ingestify_test_database_url(datastore_dir): + if not os.environ.get("INGESTIFY_TEST_DATABASE_URL"): + tmp_dir = os.environ["TEST_DIR"] + os.environ["INGESTIFY_TEST_DATABASE_URL"] = f"sqlite:///${tmp_dir}/main.db" + + +@pytest.fixture(scope="function") +def config_file(ingestify_test_database_url): + # Depend on ingestify_test_database_url to make sure environment variables are set in time return os.path.abspath(os.path.dirname(__file__) + "/config.yaml") From d889fe0d3544fdc1be1333d6452bd9bbf420f5f3 Mon Sep 17 00:00:00 2001 From: Koen Vossen Date: Tue, 3 Feb 2026 09:42:14 +0100 Subject: [PATCH 05/15] Fix INGESTIFY_TEST_DATABASE_URL --- ingestify/tests/conftest.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/ingestify/tests/conftest.py b/ingestify/tests/conftest.py index a9351b2..51db07d 100644 --- a/ingestify/tests/conftest.py +++ b/ingestify/tests/conftest.py @@ -14,9 +14,11 @@ def datastore_dir(): @pytest.fixture(scope="function", autouse=True) def ingestify_test_database_url(datastore_dir): + # Only set and pop when not yet set if not os.environ.get("INGESTIFY_TEST_DATABASE_URL"): - tmp_dir = os.environ["TEST_DIR"] - os.environ["INGESTIFY_TEST_DATABASE_URL"] = f"sqlite:///${tmp_dir}/main.db" + os.environ["INGESTIFY_TEST_DATABASE_URL"] = f"sqlite:///{datastore_dir}/main.db" + yield + os.environ.pop("INGESTIFY_TEST_DATABASE_URL") @pytest.fixture(scope="function") From 6b602c1a03ca314964c09f225cfd41d291b99287 Mon Sep 17 00:00:00 2001 From: Koen Vossen Date: Tue, 3 Feb 2026 09:54:13 +0100 Subject: [PATCH 06/15] Make a cleaner implementation of ingestify_test_database_url --- ingestify/tests/conftest.py | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/ingestify/tests/conftest.py b/ingestify/tests/conftest.py index 51db07d..bce340d 100644 --- a/ingestify/tests/conftest.py +++ b/ingestify/tests/conftest.py @@ -1,7 +1,7 @@ import tempfile -import pytest import os +import pytest @pytest.fixture(scope="function", autouse=True) @@ -12,13 +12,16 @@ def datastore_dir(): yield tmpdirname -@pytest.fixture(scope="function", autouse=True) -def ingestify_test_database_url(datastore_dir): - # Only set and pop when not yet set - if not os.environ.get("INGESTIFY_TEST_DATABASE_URL"): - os.environ["INGESTIFY_TEST_DATABASE_URL"] = f"sqlite:///{datastore_dir}/main.db" - yield - os.environ.pop("INGESTIFY_TEST_DATABASE_URL") +@pytest.fixture(autouse=True) +def ingestify_test_database_url(datastore_dir, monkeypatch): + key = "INGESTIFY_TEST_DATABASE_URL" + + value = os.environ.get(key) + if value is None: + value = f"sqlite:///{datastore_dir}/main.db" + monkeypatch.setenv(key, value) + + return value @pytest.fixture(scope="function") From 7b25fbf697aa35c6c44530186451a36354a9ad78 Mon Sep 17 00:00:00 2001 From: Koen Vossen Date: Tue, 3 Feb 2026 10:03:21 +0100 Subject: [PATCH 07/15] Cleanup --- .../store/dataset/sqlalchemy/repository.py | 5 +++++ ingestify/tests/conftest.py | 17 +++++++++++++++++ 2 files changed, 22 insertions(+) diff --git a/ingestify/infra/store/dataset/sqlalchemy/repository.py b/ingestify/infra/store/dataset/sqlalchemy/repository.py index b14b1d4..5c40642 100644 --- a/ingestify/infra/store/dataset/sqlalchemy/repository.py +++ b/ingestify/infra/store/dataset/sqlalchemy/repository.py @@ -143,6 +143,11 @@ def close(self): if hasattr(self, "engine"): self.engine.dispose() + def drop_all_tables(self): + """Drop all tables in the database. Useful for test cleanup.""" + if hasattr(self, "metadata") and hasattr(self, "engine"): + self.metadata.drop_all(self.engine) + def get(self): return self.session() diff --git a/ingestify/tests/conftest.py b/ingestify/tests/conftest.py index bce340d..b684e60 100644 --- a/ingestify/tests/conftest.py +++ b/ingestify/tests/conftest.py @@ -24,6 +24,23 @@ def ingestify_test_database_url(datastore_dir, monkeypatch): return value +@pytest.fixture(autouse=True) +def clean_database(ingestify_test_database_url): + """Clean database after each test, especially important for MySQL.""" + yield # Let the test run first + + # Clean up after test for persistent databases (MySQL, PostgreSQL) + db_url = os.environ.get("INGESTIFY_TEST_DATABASE_URL", "") + if db_url.startswith("mysql://") or db_url.startswith("postgresql://"): + from ingestify.infra.store.dataset.sqlalchemy.repository import ( + SqlAlchemySessionProvider, + ) + + provider = SqlAlchemySessionProvider(db_url) + provider.drop_all_tables() + provider.close() + + @pytest.fixture(scope="function") def config_file(ingestify_test_database_url): # Depend on ingestify_test_database_url to make sure environment variables are set in time From cc0b8b866070ae16bdab02e5f97e2fd481ce0b6f Mon Sep 17 00:00:00 2001 From: Koen Vossen Date: Tue, 3 Feb 2026 12:32:53 +0100 Subject: [PATCH 08/15] WIP --- ingestify/domain/models/dataset/revision.py | 1 + .../store/dataset/sqlalchemy/repository.py | 19 ++++----- .../infra/store/dataset/sqlalchemy/tables.py | 21 +++++++++- ingestify/tests/config.yaml | 2 + ingestify/tests/conftest.py | 41 +++++++++++-------- ingestify/tests/test_auto_ingest.py | 28 ++++--------- ingestify/tests/test_engine.py | 39 +++++++----------- ingestify/tests/test_file_cache.py | 3 +- ingestify/tests/test_pagination.py | 6 +-- 9 files changed, 78 insertions(+), 82 deletions(-) diff --git a/ingestify/domain/models/dataset/revision.py b/ingestify/domain/models/dataset/revision.py index 29dbd0c..9e39e85 100644 --- a/ingestify/domain/models/dataset/revision.py +++ b/ingestify/domain/models/dataset/revision.py @@ -54,6 +54,7 @@ def is_changed( if file_id not in modified_files_map: return True + print("Test", modified_files_map[file_id].modified_at, last_modified) if modified_files_map[file_id].modified_at < last_modified: if dataset_last_modified_at < last_modified: # For StatsBomb we use last_modified of match for lineups, and events files. diff --git a/ingestify/infra/store/dataset/sqlalchemy/repository.py b/ingestify/infra/store/dataset/sqlalchemy/repository.py index 5c40642..c8ad484 100644 --- a/ingestify/infra/store/dataset/sqlalchemy/repository.py +++ b/ingestify/infra/store/dataset/sqlalchemy/repository.py @@ -122,7 +122,7 @@ def __setstate__(self, state): self.table_prefix = state.get("table_prefix", "") self._init_engine() - def __init__(self, url: str, table_prefix: str = ""): + def __init__(self, url: str, table_prefix: str = "", create_tables=True): url = self.fix_url(url) self.url = url @@ -130,7 +130,8 @@ def __init__(self, url: str, table_prefix: str = ""): self._init_engine() # Create all tables in the database - self.metadata.create_all(self.engine) + if create_tables: + self.metadata.create_all(self.engine) def __del__(self): self.close() @@ -216,14 +217,12 @@ def _upsert( if dialect == "mysql": # MySQL uses ON DUPLICATE KEY UPDATE syntax if immutable_rows: - # For MySQL, we simulate do_nothing by updating with the same values - # This prevents errors but doesn't actually change anything - set_ = { - name: table.c[name] - for name, column in table.columns.items() - if column not in primary_key_columns - } - stmt = stmt.on_duplicate_key_update(set_) + # For MySQL immutable rows, use INSERT IGNORE to skip duplicates + stmt = stmt.prefix_with("IGNORE") + print("Inserting") + for entity in entities: + print(entity) + print("Done") else: # MySQL uses stmt.inserted instead of stmt.excluded set_ = { diff --git a/ingestify/infra/store/dataset/sqlalchemy/tables.py b/ingestify/infra/store/dataset/sqlalchemy/tables.py index c164de9..fc9254b 100644 --- a/ingestify/infra/store/dataset/sqlalchemy/tables.py +++ b/ingestify/infra/store/dataset/sqlalchemy/tables.py @@ -51,14 +51,31 @@ class TZDateTime(TypeDecorator): LOCAL_TIMEZONE = datetime.datetime.utcnow().astimezone().tzinfo cache_ok = True + def __init__(self, fsp=None, **kwargs): + super().__init__(**kwargs) + self.fsp = fsp + + def load_dialect_impl(self, dialect): + # For MySQL, use DATETIME with fractional seconds precision + if dialect.name == "mysql" and self.fsp is not None: + from sqlalchemy.dialects.mysql import DATETIME as MySQL_DATETIME + + # Return the type without type_descriptor to ensure our process methods are called + return MySQL_DATETIME(fsp=self.fsp) + return super().load_dialect_impl(dialect) + def process_bind_param(self, value: Optional[datetime.datetime], dialect): if not value: return None if value.tzinfo is None: - value = value.astimezone(self.LOCAL_TIMEZONE) + # Assume naive datetimes are already in UTC + value = value.replace(tzinfo=datetime.timezone.utc) + else: + # Convert timezone-aware datetimes to UTC + value = value.astimezone(datetime.timezone.utc) - return value.astimezone(datetime.timezone.utc) + return value def process_result_value(self, value, dialect): if not value: diff --git a/ingestify/tests/config.yaml b/ingestify/tests/config.yaml index 8d368ec..1318297 100644 --- a/ingestify/tests/config.yaml +++ b/ingestify/tests/config.yaml @@ -2,6 +2,8 @@ main: # For MySQL tests, INGESTIFY_TEST_DATABASE_URL will be set # For SQLite tests, falls back to using TEST_DIR metadata_url: !ENV ${INGESTIFY_TEST_DATABASE_URL} + metadata_options: + table_prefix: !ENV ${INGESTIFY_TEST_DATABASE_PREFIX}_ file_url: !ENV file://${TEST_DIR}/data default_bucket: main diff --git a/ingestify/tests/conftest.py b/ingestify/tests/conftest.py index b684e60..6211a5a 100644 --- a/ingestify/tests/conftest.py +++ b/ingestify/tests/conftest.py @@ -1,8 +1,15 @@ import tempfile import os +import uuid + import pytest +from ingestify.infra.store.dataset.sqlalchemy.repository import ( + SqlAlchemySessionProvider, +) +from ingestify.main import get_engine + @pytest.fixture(scope="function", autouse=True) def datastore_dir(): @@ -12,7 +19,7 @@ def datastore_dir(): yield tmpdirname -@pytest.fixture(autouse=True) +@pytest.fixture(scope="function") def ingestify_test_database_url(datastore_dir, monkeypatch): key = "INGESTIFY_TEST_DATABASE_URL" @@ -21,27 +28,25 @@ def ingestify_test_database_url(datastore_dir, monkeypatch): value = f"sqlite:///{datastore_dir}/main.db" monkeypatch.setenv(key, value) + monkeypatch.setenv("INGESTIFY_TEST_DATABASE_PREFIX", "test_") + return value -@pytest.fixture(autouse=True) -def clean_database(ingestify_test_database_url): - """Clean database after each test, especially important for MySQL.""" - yield # Let the test run first +@pytest.fixture(scope="function") +def config_file(ingestify_test_database_url): + # Depend on ingestify_test_database_url to make sure environment variables are set in time, also make sure database is + # cleaned before ingestify opens a connection + return os.path.abspath(os.path.dirname(__file__) + "/config.yaml") - # Clean up after test for persistent databases (MySQL, PostgreSQL) - db_url = os.environ.get("INGESTIFY_TEST_DATABASE_URL", "") - if db_url.startswith("mysql://") or db_url.startswith("postgresql://"): - from ingestify.infra.store.dataset.sqlalchemy.repository import ( - SqlAlchemySessionProvider, - ) - provider = SqlAlchemySessionProvider(db_url) - provider.drop_all_tables() - provider.close() +@pytest.fixture(scope="function") +def engine(config_file, ingestify_test_database_url): + engine = get_engine(config_file, "main") + yield engine -@pytest.fixture(scope="function") -def config_file(ingestify_test_database_url): - # Depend on ingestify_test_database_url to make sure environment variables are set in time - return os.path.abspath(os.path.dirname(__file__) + "/config.yaml") + session_provider = getattr(engine.store.dataset_repository, "session_provider") + if session_provider: + session_provider.drop_all_tables() + # session_provider.close() diff --git a/ingestify/tests/test_auto_ingest.py b/ingestify/tests/test_auto_ingest.py index 0073b9a..eb90367 100644 --- a/ingestify/tests/test_auto_ingest.py +++ b/ingestify/tests/test_auto_ingest.py @@ -106,10 +106,8 @@ def discover_selectors(self, dataset_type: str): ] -def test_iter_datasets_basic_auto_ingest(config_file): +def test_iter_datasets_basic_auto_ingest(engine): """Test basic auto-ingest functionality.""" - engine = get_engine(config_file) - # Add a simple ingestion plan mock_source = MockSource(name="test_source") data_spec_versions = DataSpecVersionCollection.from_dict({"default": {"v1"}}) @@ -141,20 +139,16 @@ def test_iter_datasets_basic_auto_ingest(config_file): assert datasets[0].identifier["competition_id"] == 11 -def test_iter_datasets_auto_ingest_disabled(config_file): +def test_iter_datasets_auto_ingest_disabled(engine): """Test that auto_ingest=False returns only existing datasets.""" - engine = get_engine(config_file) - # Should only return existing datasets (none in empty store) datasets = list(engine.iter_datasets(competition_id=11, auto_ingest=False)) assert len(datasets) == 0 -def test_iter_datasets_outside_config_scope(config_file): +def test_iter_datasets_outside_config_scope(engine): """Test that requests outside IngestionPlan scope return nothing.""" - engine = get_engine(config_file) - # Add plan only for competition_id=11 mock_source = MockSource(name="test_source") data_spec_versions = DataSpecVersionCollection.from_dict({"default": {"v1"}}) @@ -180,10 +174,8 @@ def test_iter_datasets_outside_config_scope(config_file): assert len(datasets) == 0 -def test_iter_datasets_discover_selectors_with_filters(config_file): +def test_iter_datasets_discover_selectors_with_filters(engine): """Test that selector_filters are applied after discover_selectors runs.""" - engine = get_engine(config_file) - # Create an IngestionPlan with empty selector - this will trigger discover_selectors mock_source = MockSourceWithDiscoverSelectors(name="test_source_discover") data_spec_versions = DataSpecVersionCollection.from_dict({"default": {"v1"}}) @@ -216,10 +208,8 @@ def test_iter_datasets_discover_selectors_with_filters(config_file): assert datasets[0].name == "Mock match comp 11" -def test_iter_datasets_discover_selectors_multiple_matches(config_file): +def test_iter_datasets_discover_selectors_multiple_matches(engine): """Test that multiple discovered selectors can match the filters.""" - engine = get_engine(config_file) - # Create an IngestionPlan with empty selector - this will trigger discover_selectors mock_source = MockSourceWithDiscoverSelectors(name="test_source_discover") data_spec_versions = DataSpecVersionCollection.from_dict({"default": {"v1"}}) @@ -248,12 +238,10 @@ def test_iter_datasets_discover_selectors_multiple_matches(config_file): assert competition_ids == {11, 22} -def test_selector_filters_make_discovered_selectors_more_strict(config_file): +def test_selector_filters_make_discovered_selectors_more_strict(engine): """Test that when selector_filters are more strict than discovered selectors, we make the selectors more strict.""" from unittest.mock import Mock - engine = get_engine(config_file) - # Create a source that returns multiple matches per season class MockSourceMultipleMatches(Source): @property @@ -348,13 +336,11 @@ def discover_selectors(self, dataset_type): # Without this optimization, we'd call with match_id=None and fetch 3 matches instead of 1 -def test_iter_datasets_with_open_data_auto_discovery(config_file): +def test_iter_datasets_with_open_data_auto_discovery(engine): """Test that use_open_data=True auto-discovers open data sources without configuration.""" from unittest.mock import Mock from ingestify.application import loader - engine = get_engine(config_file) - # Create mock source class that inherits from Source class MockOpenDataSource(Source): def __init__(self, name): diff --git a/ingestify/tests/test_engine.py b/ingestify/tests/test_engine.py index a7ec4d1..00d89f8 100644 --- a/ingestify/tests/test_engine.py +++ b/ingestify/tests/test_engine.py @@ -273,9 +273,7 @@ def find_datasets( ) -def test_engine(config_file): - engine = get_engine(config_file, "main") - +def test_engine(engine): add_ingestion_plan( engine, SimpleFakeSource("fake-source"), competition_id=1, season_id=2 ) @@ -293,6 +291,9 @@ def test_engine(config_file): dataset = datasets.first() assert dataset.identifier == Identifier(competition_id=1, season_id=2, match_id=1) + for revision in dataset.revisions: + print("Rev", revision) + assert len(dataset.revisions) == 2 assert len(dataset.revisions[0].modified_files) == 3 assert len(dataset.revisions[1].modified_files) == 1 @@ -325,13 +326,11 @@ def test_engine(config_file): assert dataset.last_modified_at is not None -def test_iterator_source(config_file): +def test_iterator_source(engine): """Test when a Source returns a Iterator to do Batch processing. Every batch must be executed right away. """ - engine = get_engine(config_file, "main") - batch_source = None def callback(idx): @@ -339,7 +338,7 @@ def callback(idx): datasets = engine.store.get_dataset_collection() assert len(datasets) == idx - if idx == 1000: + if idx == 100: batch_source.should_stop = True batch_source = BatchSource("fake-source", callback) @@ -348,7 +347,7 @@ def callback(idx): engine.load() datasets = engine.store.get_dataset_collection() - assert len(datasets) == 1000 + assert len(datasets) == 100 for dataset in datasets: assert len(dataset.revisions) == 1 @@ -357,14 +356,14 @@ def callback(idx): batch_source.should_stop = False def callback(idx): - if idx == 1000: + if idx == 100: batch_source.should_stop = True batch_source.callback = callback engine.load() datasets = engine.store.get_dataset_collection() - assert len(datasets) == 1000 + assert len(datasets) == 100 for dataset in datasets: assert len(dataset.revisions) == 2 @@ -373,9 +372,7 @@ def callback(idx): deserialize(s) -def test_ingestion_plan_failing_task(config_file): - engine = get_engine(config_file, "main") - +def test_ingestion_plan_failing_task(engine): source = FailingLoadSource("fake-source") add_ingestion_plan(engine, source, competition_id=1, season_id=2) @@ -387,9 +384,7 @@ def test_ingestion_plan_failing_task(config_file): assert items[0].task_summaries[0].state == TaskState.FAILED -def test_ingestion_plan_failing_job(config_file): - engine = get_engine(config_file, "main") - +def test_ingestion_plan_failing_job(engine): source = FailingJobSource("fake-source") add_ingestion_plan(engine, source, competition_id=1, season_id=2) @@ -412,9 +407,7 @@ def test_change_partition_key_transformer(): """ -def test_serde(config_file): - engine = get_engine(config_file, "main") - +def test_serde(engine): add_ingestion_plan( engine, SimpleFakeSource("fake-source"), competition_id=1, season_id=2 ) @@ -434,10 +427,8 @@ def test_serde(config_file): assert event.model_dump_json() == deserialized_event.model_dump_json() -def test_empty_dataset_resource_id(config_file): +def test_empty_dataset_resource_id(engine): """When a empty DatasetResourceId is passed nothing should break""" - engine = get_engine(config_file, "main") - add_ingestion_plan(engine, EmptyDatasetResourceIdSource("fake-source")) engine.load() @@ -525,10 +516,8 @@ def test_post_load_files_hook(config_file): assert dataset2.state == DatasetState.COMPLETE -def test_force_save_creates_revision(config_file): +def test_force_save_creates_revision(engine): """Test that datasets get a revision even when no files are persisted.""" - engine = get_engine(config_file, "main") - # Create one dataset with files and one without add_ingestion_plan( engine, SimpleFakeSource("fake-source"), competition_id=1, season_id=2 diff --git a/ingestify/tests/test_file_cache.py b/ingestify/tests/test_file_cache.py index 3312a8e..cd7e560 100644 --- a/ingestify/tests/test_file_cache.py +++ b/ingestify/tests/test_file_cache.py @@ -8,10 +8,9 @@ from ingestify.domain.models.dataset.revision import RevisionSource, SourceType -def test_file_cache(config_file): +def test_file_cache(engine): """Test file caching with the with_file_cache context manager.""" # Get engine from the fixture - engine = get_engine(config_file, "main") store = engine.store # Create a timestamp for test data diff --git a/ingestify/tests/test_pagination.py b/ingestify/tests/test_pagination.py index 075e4f1..81813a2 100644 --- a/ingestify/tests/test_pagination.py +++ b/ingestify/tests/test_pagination.py @@ -6,10 +6,9 @@ from ingestify.main import get_engine -def test_iter_dataset_collection_batches(config_file): +def test_iter_dataset_collection_batches(engine): """Test iteration over datasets with batches using iter_dataset_collection_batches.""" # Get engine from the fixture - engine = get_engine(config_file, "main") store = engine.store bucket = store.bucket @@ -81,10 +80,9 @@ def test_iter_dataset_collection_batches(config_file): assert filtered_dataset_ids[0] == "dataset-5" -def test_dataset_state_filter(config_file): +def test_dataset_state_filter(engine): """Test filtering datasets by state.""" # Get engine from the fixture - engine = get_engine(config_file, "main") store = engine.store bucket = store.bucket From 292f2e9439904fe4a589ed53e2b2bba8536eb413 Mon Sep 17 00:00:00 2001 From: Koen Vossen Date: Tue, 3 Feb 2026 12:43:04 +0100 Subject: [PATCH 09/15] Remove debug prints --- ingestify/domain/models/dataset/revision.py | 1 - ingestify/infra/store/dataset/sqlalchemy/repository.py | 4 ---- ingestify/tests/test_engine.py | 2 -- 3 files changed, 7 deletions(-) diff --git a/ingestify/domain/models/dataset/revision.py b/ingestify/domain/models/dataset/revision.py index 9e39e85..29dbd0c 100644 --- a/ingestify/domain/models/dataset/revision.py +++ b/ingestify/domain/models/dataset/revision.py @@ -54,7 +54,6 @@ def is_changed( if file_id not in modified_files_map: return True - print("Test", modified_files_map[file_id].modified_at, last_modified) if modified_files_map[file_id].modified_at < last_modified: if dataset_last_modified_at < last_modified: # For StatsBomb we use last_modified of match for lineups, and events files. diff --git a/ingestify/infra/store/dataset/sqlalchemy/repository.py b/ingestify/infra/store/dataset/sqlalchemy/repository.py index c8ad484..a04ef7b 100644 --- a/ingestify/infra/store/dataset/sqlalchemy/repository.py +++ b/ingestify/infra/store/dataset/sqlalchemy/repository.py @@ -219,10 +219,6 @@ def _upsert( if immutable_rows: # For MySQL immutable rows, use INSERT IGNORE to skip duplicates stmt = stmt.prefix_with("IGNORE") - print("Inserting") - for entity in entities: - print(entity) - print("Done") else: # MySQL uses stmt.inserted instead of stmt.excluded set_ = { diff --git a/ingestify/tests/test_engine.py b/ingestify/tests/test_engine.py index 00d89f8..83f30be 100644 --- a/ingestify/tests/test_engine.py +++ b/ingestify/tests/test_engine.py @@ -291,8 +291,6 @@ def test_engine(engine): dataset = datasets.first() assert dataset.identifier == Identifier(competition_id=1, season_id=2, match_id=1) - for revision in dataset.revisions: - print("Rev", revision) assert len(dataset.revisions) == 2 assert len(dataset.revisions[0].modified_files) == 3 From d29544c01217f71a8f58c65b9792d60b15a9447b Mon Sep 17 00:00:00 2001 From: Koen Vossen Date: Tue, 3 Feb 2026 13:11:56 +0100 Subject: [PATCH 10/15] test --- .../store/dataset/sqlalchemy/repository.py | 5 ++++- ingestify/tests/conftest.py | 22 ++++++++++++++----- ingestify/tests/test_auto_ingest.py | 11 +++++----- ingestify/tests/test_store_version.py | 3 +-- 4 files changed, 27 insertions(+), 14 deletions(-) diff --git a/ingestify/infra/store/dataset/sqlalchemy/repository.py b/ingestify/infra/store/dataset/sqlalchemy/repository.py index a04ef7b..91c52cb 100644 --- a/ingestify/infra/store/dataset/sqlalchemy/repository.py +++ b/ingestify/infra/store/dataset/sqlalchemy/repository.py @@ -131,7 +131,7 @@ def __init__(self, url: str, table_prefix: str = "", create_tables=True): # Create all tables in the database if create_tables: - self.metadata.create_all(self.engine) + self.create_all_tables() def __del__(self): self.close() @@ -144,6 +144,9 @@ def close(self): if hasattr(self, "engine"): self.engine.dispose() + def create_all_tables(self): + self.metadata.create_all(self.engine) + def drop_all_tables(self): """Drop all tables in the database. Useful for test cleanup.""" if hasattr(self, "metadata") and hasattr(self, "engine"): diff --git a/ingestify/tests/conftest.py b/ingestify/tests/conftest.py index 6211a5a..8b2c371 100644 --- a/ingestify/tests/conftest.py +++ b/ingestify/tests/conftest.py @@ -28,8 +28,6 @@ def ingestify_test_database_url(datastore_dir, monkeypatch): value = f"sqlite:///{datastore_dir}/main.db" monkeypatch.setenv(key, value) - monkeypatch.setenv("INGESTIFY_TEST_DATABASE_PREFIX", "test_") - return value @@ -41,12 +39,24 @@ def config_file(ingestify_test_database_url): @pytest.fixture(scope="function") -def engine(config_file, ingestify_test_database_url): +def engine(config_file): + # Now create the engine for the test engine = get_engine(config_file, "main") + # session_provider = getattr(engine.store.dataset_repository, "session_provider", None) + # if session_provider: + # session_provider.close() + # + # session_provider.drop_all_tables() + # session_provider.create_all_tables() yield engine - - session_provider = getattr(engine.store.dataset_repository, "session_provider") + # + # # Close connections after test + session_provider = getattr( + engine.store.dataset_repository, "session_provider", None + ) if session_provider: + session_provider.session.remove() + session_provider.engine.dispose() + session_provider.drop_all_tables() - # session_provider.close() diff --git a/ingestify/tests/test_auto_ingest.py b/ingestify/tests/test_auto_ingest.py index eb90367..83bcdf0 100644 --- a/ingestify/tests/test_auto_ingest.py +++ b/ingestify/tests/test_auto_ingest.py @@ -8,6 +8,7 @@ from ingestify.domain.models.fetch_policy import FetchPolicy from ingestify.domain import Selector, DataSpecVersionCollection from ingestify import Source, DatasetResource +from ingestify.utils import utcnow class MockSource(Source): @@ -39,7 +40,7 @@ def find_datasets( url="http://test.com/match1", ).add_file( data_feed_key="test", - last_modified=datetime.datetime.now(), + last_modified=utcnow(), json_content={"blaat": "piet"}, ) @@ -75,7 +76,7 @@ def find_datasets( url="http://test.com/match1", ).add_file( data_feed_key="test", - last_modified=datetime.datetime.now(), + last_modified=utcnow(), json_content={"competition_id": 11}, ) elif competition_id == 22: @@ -91,7 +92,7 @@ def find_datasets( url="http://test.com/match2", ).add_file( data_feed_key="test", - last_modified=datetime.datetime.now(), + last_modified=utcnow(), json_content={"competition_id": 22}, ) @@ -279,7 +280,7 @@ def find_datasets( url=f"http://test.com/match{mid}", ).add_file( data_feed_key="test", - last_modified=datetime.datetime.now(), + last_modified=utcnow(), json_content={"match_id": mid}, ) return [] @@ -373,7 +374,7 @@ def find_datasets( url="http://open-data.com/match123", ).add_file( data_feed_key="test", - last_modified=datetime.datetime.now(), + last_modified=utcnow(), json_content={"match_id": 123}, ) diff --git a/ingestify/tests/test_store_version.py b/ingestify/tests/test_store_version.py index 8ae21c3..41cd5dd 100644 --- a/ingestify/tests/test_store_version.py +++ b/ingestify/tests/test_store_version.py @@ -54,9 +54,8 @@ def test_store_version_tracking_version_mismatch(config_file, caplog): assert "stored=1.0.0, current=2.0.0" in caplog.text -def test_store_version_methods(config_file): +def test_store_version_methods(engine): """Test the repository version methods directly.""" - engine = get_engine(config_file) repo = engine.store.dataset_repository from ingestify import __version__ From 58d82d4d34f7ae652ad0719c9dd9c8daf4760bc9 Mon Sep 17 00:00:00 2001 From: Koen Vossen Date: Tue, 3 Feb 2026 13:28:32 +0100 Subject: [PATCH 11/15] test --- ingestify/infra/store/dataset/sqlalchemy/repository.py | 7 ++++++- ingestify/tests/conftest.py | 4 ++-- ingestify/tests/test_store_version.py | 8 +++++--- 3 files changed, 13 insertions(+), 6 deletions(-) diff --git a/ingestify/infra/store/dataset/sqlalchemy/repository.py b/ingestify/infra/store/dataset/sqlalchemy/repository.py index 91c52cb..1d15992 100644 --- a/ingestify/infra/store/dataset/sqlalchemy/repository.py +++ b/ingestify/infra/store/dataset/sqlalchemy/repository.py @@ -122,13 +122,18 @@ def __setstate__(self, state): self.table_prefix = state.get("table_prefix", "") self._init_engine() - def __init__(self, url: str, table_prefix: str = "", create_tables=True): + def __init__( + self, url: str, table_prefix: str = "", create_tables=True, drop_tables=False + ): url = self.fix_url(url) self.url = url self.table_prefix = table_prefix self._init_engine() + if drop_tables: + self.drop_all_tables() + # Create all tables in the database if create_tables: self.create_all_tables() diff --git a/ingestify/tests/conftest.py b/ingestify/tests/conftest.py index 8b2c371..4b5056d 100644 --- a/ingestify/tests/conftest.py +++ b/ingestify/tests/conftest.py @@ -28,7 +28,7 @@ def ingestify_test_database_url(datastore_dir, monkeypatch): value = f"sqlite:///{datastore_dir}/main.db" monkeypatch.setenv(key, value) - return value + yield value @pytest.fixture(scope="function") @@ -50,6 +50,7 @@ def engine(config_file): # session_provider.create_all_tables() yield engine + # # # Close connections after test session_provider = getattr( @@ -58,5 +59,4 @@ def engine(config_file): if session_provider: session_provider.session.remove() session_provider.engine.dispose() - session_provider.drop_all_tables() diff --git a/ingestify/tests/test_store_version.py b/ingestify/tests/test_store_version.py index 41cd5dd..a4e9d3c 100644 --- a/ingestify/tests/test_store_version.py +++ b/ingestify/tests/test_store_version.py @@ -32,13 +32,15 @@ def test_store_version_tracking_existing_store_same_version(config_file): def test_store_version_tracking_version_mismatch(config_file, caplog): """Test that version mismatch is logged as warning.""" - # Initialize store with version 1.0.0 - with patch("ingestify.__version__", "1.0.0"): + # Use engine as fixture as this cleans up the database + + # Initialize store with version 1.0.1 + with patch("ingestify.__version__", "1.0.1"): engine1 = get_engine(config_file) store1 = engine1.store stored_version = store1.dataset_repository.get_store_version() - assert stored_version == "1.0.0" + assert stored_version == "1.0.1" # Open store with different version with patch("ingestify.__version__", "2.0.0"): From 9e1129f0d5b23162337890b75a0a6a07bc38f611 Mon Sep 17 00:00:00 2001 From: Koen Vossen Date: Tue, 3 Feb 2026 13:32:44 +0100 Subject: [PATCH 12/15] Fix tests --- ingestify/tests/conftest.py | 27 +++++++++++++++++---------- ingestify/tests/test_store_version.py | 16 +++++++++++----- 2 files changed, 28 insertions(+), 15 deletions(-) diff --git a/ingestify/tests/conftest.py b/ingestify/tests/conftest.py index 4b5056d..52b6a38 100644 --- a/ingestify/tests/conftest.py +++ b/ingestify/tests/conftest.py @@ -38,8 +38,23 @@ def config_file(ingestify_test_database_url): return os.path.abspath(os.path.dirname(__file__) + "/config.yaml") +@pytest.fixture +def db_cleanup(): + def do_cleanup(engine): + # # Close connections after test + session_provider = getattr( + engine.store.dataset_repository, "session_provider", None + ) + if session_provider: + session_provider.session.remove() + session_provider.engine.dispose() + session_provider.drop_all_tables() + + return do_cleanup + + @pytest.fixture(scope="function") -def engine(config_file): +def engine(config_file, db_cleanup): # Now create the engine for the test engine = get_engine(config_file, "main") # session_provider = getattr(engine.store.dataset_repository, "session_provider", None) @@ -51,12 +66,4 @@ def engine(config_file): yield engine - # - # # Close connections after test - session_provider = getattr( - engine.store.dataset_repository, "session_provider", None - ) - if session_provider: - session_provider.session.remove() - session_provider.engine.dispose() - session_provider.drop_all_tables() + db_cleanup(engine) diff --git a/ingestify/tests/test_store_version.py b/ingestify/tests/test_store_version.py index a4e9d3c..9a1861c 100644 --- a/ingestify/tests/test_store_version.py +++ b/ingestify/tests/test_store_version.py @@ -4,7 +4,7 @@ from ingestify.main import get_engine -def test_store_version_tracking_new_store(config_file): +def test_store_version_tracking_new_store(config_file, db_cleanup): """Test that a new store gets initialized with the current version.""" with patch("ingestify.__version__", "1.0.0"): engine = get_engine(config_file) @@ -13,8 +13,10 @@ def test_store_version_tracking_new_store(config_file): stored_version = engine.store.dataset_repository.get_store_version() assert stored_version == "1.0.0" + db_cleanup(engine) -def test_store_version_tracking_existing_store_same_version(config_file): + +def test_store_version_tracking_existing_store_same_version(config_file, db_cleanup): """Test that an existing store with same version doesn't cause issues.""" with patch("ingestify.__version__", "1.0.0"): # Initialize store first time @@ -29,8 +31,10 @@ def test_store_version_tracking_existing_store_same_version(config_file): stored_version = store2.dataset_repository.get_store_version() assert stored_version == "1.0.0" + db_cleanup(engine1) + -def test_store_version_tracking_version_mismatch(config_file, caplog): +def test_store_version_tracking_version_mismatch(config_file, caplog, db_cleanup): """Test that version mismatch is logged as warning.""" # Use engine as fixture as this cleans up the database @@ -49,11 +53,13 @@ def test_store_version_tracking_version_mismatch(config_file, caplog): # Version should still be the original one stored_version = store2.dataset_repository.get_store_version() - assert stored_version == "1.0.0" + assert stored_version == "1.0.1" # Should have logged a warning about version mismatch assert "Store version mismatch" in caplog.text - assert "stored=1.0.0, current=2.0.0" in caplog.text + assert "stored=1.0.1, current=2.0.0" in caplog.text + + db_cleanup(engine1) def test_store_version_methods(engine): From cbc72a17815e2413d9a4aa21668c806879ee5cca Mon Sep 17 00:00:00 2001 From: Koen Vossen Date: Tue, 3 Feb 2026 13:38:22 +0100 Subject: [PATCH 13/15] test --- ingestify/tests/test_engine.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/ingestify/tests/test_engine.py b/ingestify/tests/test_engine.py index 83f30be..d87dce1 100644 --- a/ingestify/tests/test_engine.py +++ b/ingestify/tests/test_engine.py @@ -539,7 +539,9 @@ def test_force_save_creates_revision(engine): season_id=2 ).first() - dataset_without_files = engine.store.get_dataset_collection(metadata_only=True) + dataset_without_files = engine.store.get_dataset_collection( + season_id=2, metadata_only=True + ) assert ( dataset_without_files.metadata.last_modified == dataset_with_last_modified.last_modified_at From 72bad8bd0eb0d9736488d1fd4606613143e89721 Mon Sep 17 00:00:00 2001 From: Koen Vossen Date: Tue, 3 Feb 2026 13:56:32 +0100 Subject: [PATCH 14/15] fix --- ingestify/tests/test_engine.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/ingestify/tests/test_engine.py b/ingestify/tests/test_engine.py index d87dce1..aab0ee0 100644 --- a/ingestify/tests/test_engine.py +++ b/ingestify/tests/test_engine.py @@ -25,7 +25,8 @@ from ingestify.domain.models.fetch_policy import FetchPolicy from ingestify.domain.models.task.task_summary import TaskState from ingestify.infra.serialization import serialize, deserialize -from ingestify.main import get_engine, get_dev_engine +from ingestify.main import get_dev_engine +from ingestify.utils import utcnow def add_ingestion_plan(engine: IngestionEngine, source: Source, **selector): @@ -78,7 +79,7 @@ def find_datasets( season_id, **kwargs, ): - last_modified = datetime.now(pytz.utc) + last_modified = utcnow() yield ( DatasetResource( @@ -498,9 +499,8 @@ def find_datasets( ) -def test_post_load_files_hook(config_file): +def test_post_load_files_hook(engine): """Test that post_load_files hook changes state from SCHEDULED to COMPLETE when content is not empty.""" - engine = get_engine(config_file, "main") add_ingestion_plan(engine, SourceWithHook("test"), competition_id=1, season_id=2) # First run: file contains '{}', state should remain SCHEDULED From 6da4ab5db7087dff28da8dbb9047a7262888f3d0 Mon Sep 17 00:00:00 2001 From: Koen Vossen Date: Tue, 3 Feb 2026 14:02:01 +0100 Subject: [PATCH 15/15] Cleanup --- ingestify/infra/store/dataset/sqlalchemy/repository.py | 10 ++-------- ingestify/tests/conftest.py | 10 ---------- 2 files changed, 2 insertions(+), 18 deletions(-) diff --git a/ingestify/infra/store/dataset/sqlalchemy/repository.py b/ingestify/infra/store/dataset/sqlalchemy/repository.py index 1d15992..7627ec5 100644 --- a/ingestify/infra/store/dataset/sqlalchemy/repository.py +++ b/ingestify/infra/store/dataset/sqlalchemy/repository.py @@ -122,21 +122,15 @@ def __setstate__(self, state): self.table_prefix = state.get("table_prefix", "") self._init_engine() - def __init__( - self, url: str, table_prefix: str = "", create_tables=True, drop_tables=False - ): + def __init__(self, url: str, table_prefix: str = ""): url = self.fix_url(url) self.url = url self.table_prefix = table_prefix self._init_engine() - if drop_tables: - self.drop_all_tables() - # Create all tables in the database - if create_tables: - self.create_all_tables() + self.create_all_tables() def __del__(self): self.close() diff --git a/ingestify/tests/conftest.py b/ingestify/tests/conftest.py index 52b6a38..d87cf56 100644 --- a/ingestify/tests/conftest.py +++ b/ingestify/tests/conftest.py @@ -1,13 +1,9 @@ import tempfile import os -import uuid import pytest -from ingestify.infra.store.dataset.sqlalchemy.repository import ( - SqlAlchemySessionProvider, -) from ingestify.main import get_engine @@ -57,12 +53,6 @@ def do_cleanup(engine): def engine(config_file, db_cleanup): # Now create the engine for the test engine = get_engine(config_file, "main") - # session_provider = getattr(engine.store.dataset_repository, "session_provider", None) - # if session_provider: - # session_provider.close() - # - # session_provider.drop_all_tables() - # session_provider.create_all_tables() yield engine