diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index d26ea5b..043e7f0 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -59,3 +59,11 @@ jobs: - name: Run BHE job scheduling test run: | .venv/bin/pytest tests/test_bhe_job_scheduling.py -v + + - name: Run preprocess lookup generation tests + run: | + .venv/bin/pytest tests/test_preproc.py -v + + - name: Run DuckDB lookup exception handling tests + run: | + .venv/bin/pytest tests/test_lookup.py -v diff --git a/pyproject.toml b/pyproject.toml index 97fd180..ea4d25d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -65,7 +65,7 @@ local_scheme = "no-local-version" [dependency-groups] dev = [ - "openhound-faker==0.0.4", + "openhound-faker==0.0.6", "ipython>=9.12.0", "pre-commit>=4.5.1", "pytest>=9.0.1", diff --git a/src/openhound/core/lookup.py b/src/openhound/core/lookup.py index 2e19c56..e96c795 100644 --- a/src/openhound/core/lookup.py +++ b/src/openhound/core/lookup.py @@ -1,5 +1,10 @@ +import logging + +import duckdb from duckdb import DuckDBPyConnection +logger = logging.getLogger(__name__) + class LookupManager: def __init__(self, client: DuckDBPyConnection, schema: str): @@ -18,9 +23,18 @@ def _find_all_objects(self, *args) -> list: Returns: list: Query result rows as a list of tuples. """ - self.client.execute(*args) - results = self.client.fetchall() - return results + try: + self.client.execute(*args) + results = self.client.fetchall() + return results + + except duckdb.CatalogException as err: + logger.error("DuckDB lookup failed, missing table: %s", err) + return [] + + except duckdb.Error as err: + logger.error("DuckDB lookup query failed: %s", err) + return [] def _find_single_object(self, *args) -> str | None: """Execute a query and return the ID of the matching row @@ -28,6 +42,15 @@ def _find_single_object(self, *args) -> str | None: Returns: str | None: The first column (ie. ID) value as a string or None if no result is found """ - self.client.execute(*args) - result = self.client.fetchone() - return str(result[0]) if result else None + try: + self.client.execute(*args) + result = self.client.fetchone() + return str(result[0]) if result else None + + except duckdb.CatalogException as err: + logger.error("DuckDB lookup failed, missing table: %s", err) + return None + + except duckdb.Error as err: + logger.error("DuckDB lookup query failed: %s", err) + return None diff --git a/src/openhound/core/preproc.py b/src/openhound/core/preproc.py index 0f35181..b029420 100644 --- a/src/openhound/core/preproc.py +++ b/src/openhound/core/preproc.py @@ -1,3 +1,4 @@ +import logging from dataclasses import dataclass from pathlib import Path from typing import Callable @@ -12,6 +13,33 @@ from openhound.core.progress import Progress from openhound.sources.resource_files.source import resource_files +logger = logging.getLogger(__name__) + + +def run_transform( + transform: Callable[..., None], + con: duckdb.DuckDBPyConnection, + *args, + **kwargs, +) -> None: + """A transformer helper function that handles DuckDB exceptions when generating a lookup""" + try: + transform(con, *args, **kwargs) + + except duckdb.CatalogException as err: + logger.error( + "DuckDB preprocessing transform '%s' failed due to missing table: %s", + transform.__name__, + err, + ) + + except duckdb.Error as err: + logger.error( + "DuckDB preprocessing transform '%s' failed: %s", + transform.__name__, + err, + ) + class PreProcessor(BasePipeline): def __init__( @@ -66,6 +94,12 @@ def run( con = duckdb.connect(str(self.output_file)) try: self.transformer(con) + except duckdb.CatalogException as err: + logger.error( + "DuckDB preprocessing failed due to missing table: %s", err + ) + except duckdb.Error as err: + logger.error("DuckDB preprocessing failed: %s", err) finally: con.close() diff --git a/tests/test_lookup.py b/tests/test_lookup.py new file mode 100644 index 0000000..430291a --- /dev/null +++ b/tests/test_lookup.py @@ -0,0 +1,41 @@ +import logging + +import duckdb + +from openhound.core.lookup import LookupManager + + +def test_find_single_object_returns_none_on_duckdb_error(caplog): + client = duckdb.connect(":memory:") + lookup = LookupManager(client, "main") + caplog.set_level(logging.ERROR, logger="openhound.core.lookup") + + try: + result = lookup._find_single_object("SELECT id FROM missing_table") + finally: + client.close() + + assert result is None + assert any( + "DuckDB lookup failed, missing table:" in record.message + and "missing_table" in record.message + for record in caplog.records + ) + + +def test_find_all_objects_returns_empty_list_on_duckdb_error(caplog): + client = duckdb.connect(":memory:") + lookup = LookupManager(client, "main") + caplog.set_level(logging.ERROR, logger="openhound.core.lookup") + + try: + result = lookup._find_all_objects("SELECT id FROM missing_table") + finally: + client.close() + + assert result == [] + assert any( + "DuckDB lookup failed, missing table:" in record.message + and "missing_table" in record.message + for record in caplog.records + ) diff --git a/tests/test_preproc.py b/tests/test_preproc.py new file mode 100644 index 0000000..f17d1eb --- /dev/null +++ b/tests/test_preproc.py @@ -0,0 +1,93 @@ +import logging +import os +from pathlib import Path + +import duckdb + +os.environ["RUNTIME__LOG_PATH"] = "/tmp/openhound-test-logs" + +from openhound.core.app import DEFAULT_LOOKUP_FILE, OpenHound +from openhound.core.preproc import PreProcessor, run_transform +from openhound.core.progress import Progress + + +def test_preproc_uses_default_lookup_file(monkeypatch, tmp_path): + captured: dict[str, Path] = {} + + def fake_run(self, resources, filters=None): + captured["output_file"] = self.output_file + captured["resources"] = resources + return "ok" + + monkeypatch.setattr(PreProcessor, "run", fake_run) + + app = OpenHound("test", "test") + + @app.preproc() + def preprocess(ctx): + return {"resource": "resource"} + + result = app.preprocessor( # type: ignore[misc] + input_path=tmp_path, + progress=Progress.log, + ) + + assert result == "ok" + assert captured["output_file"] == DEFAULT_LOOKUP_FILE + assert captured["resources"] == {"resource": "resource"} + + +def test_preproc_logs_duckdb_transform_errors(monkeypatch, tmp_path, caplog): + def fake_run(self, source, **kwargs): + return "ok" + + def missing_table_transform(con: duckdb.DuckDBPyConnection): + con.execute("SELECT * FROM missing_table") + + monkeypatch.setattr(PreProcessor, "_run", fake_run) + caplog.set_level(logging.ERROR, logger="openhound.core.preproc") + + preprocessor = PreProcessor( + name="test", + input_path=tmp_path, + output_file=tmp_path / "lookup.duckdb", + transformer=missing_table_transform, + ) + + result = preprocessor.run(resources={"resource": "resource"}) + + assert result == "ok" + assert any( + "DuckDB preprocessing failed due to missing table:" in record.message + and "missing_table" in record.message + for record in caplog.records + ) + + +def test_run_transform_logs_transform_name_and_continues(caplog): + called: list[str] = [] + + def missing_table_transform(con: duckdb.DuckDBPyConnection): + called.append("missing") + con.execute("SELECT * FROM missing_table") + + def successful_transform(con: duckdb.DuckDBPyConnection): + called.append("successful") + con.execute("SELECT 1") + + con = duckdb.connect(":memory:") + caplog.set_level(logging.ERROR, logger="openhound.core.preproc") + + try: + run_transform(missing_table_transform, con) + run_transform(successful_transform, con) + finally: + con.close() + + assert called == ["missing", "successful"] + assert any( + "DuckDB preprocessing transform 'missing_table_transform' failed due to missing table:" + in record.message + and "missing_table" in record.message + for record in caplog.records + ) diff --git a/tests/test_preproc_default_lookup.py b/tests/test_preproc_default_lookup.py deleted file mode 100644 index 2b87f92..0000000 --- a/tests/test_preproc_default_lookup.py +++ /dev/null @@ -1,34 +0,0 @@ -import os -from pathlib import Path - -os.environ["RUNTIME__LOG_PATH"] = "/tmp/openhound-test-logs" - -from openhound.core.app import DEFAULT_LOOKUP_FILE, OpenHound -from openhound.core.preproc import PreProcessor -from openhound.core.progress import Progress - - -def test_preproc_uses_default_lookup_file(monkeypatch, tmp_path): - captured: dict[str, Path] = {} - - def fake_run(self, resources, filters=None): - captured["output_file"] = self.output_file - captured["resources"] = resources - return "ok" - - monkeypatch.setattr(PreProcessor, "run", fake_run) - - app = OpenHound("test", "test") - - @app.preproc() - def preprocess(ctx): - return {"resource": "resource"} - - result = app.preprocessor( # type: ignore[misc] - input_path=tmp_path, - progress=Progress.log, - ) - - assert result == "ok" - assert captured["output_file"] == DEFAULT_LOOKUP_FILE - assert captured["resources"] == {"resource": "resource"}