From 42bc467b690383aaa62dd3a3123ce0480db8f3c6 Mon Sep 17 00:00:00 2001 From: saathviksheerla Date: Tue, 17 Mar 2026 22:04:20 +0530 Subject: [PATCH 1/2] feat: add GET /run/list endpoint (#39) --- src/main.py | 2 + src/routers/openml/runs.py | 137 ++++++++++++ tests/routers/openml/runs_list_test.py | 276 +++++++++++++++++++++++++ 3 files changed, 415 insertions(+) create mode 100644 src/routers/openml/runs.py create mode 100644 tests/routers/openml/runs_list_test.py diff --git a/src/main.py b/src/main.py index 76a52ad3..0a0c5460 100644 --- a/src/main.py +++ b/src/main.py @@ -15,6 +15,7 @@ from routers.openml.evaluations import router as evaluationmeasures_router from routers.openml.flows import router as flows_router from routers.openml.qualities import router as qualities_router +from routers.openml.runs import router as run_router from routers.openml.setups import router as setup_router from routers.openml.study import router as study_router from routers.openml.tasks import router as task_router @@ -69,6 +70,7 @@ def create_api() -> FastAPI: app.include_router(task_router) app.include_router(flows_router) app.include_router(study_router) + app.include_router(run_router) app.include_router(setup_router) return app diff --git a/src/routers/openml/runs.py b/src/routers/openml/runs.py new file mode 100644 index 00000000..70a7c511 --- /dev/null +++ b/src/routers/openml/runs.py @@ -0,0 +1,137 @@ +"""Endpoints for OpenML Run resources.""" + +from typing import Annotated, Any + +from fastapi import APIRouter, Body, Depends +from sqlalchemy import text +from sqlalchemy.ext.asyncio import AsyncConnection + +from core.errors import NoResultsError +from routers.dependencies import Pagination, expdb_connection +from routers.types import SystemString64 + +router = APIRouter(prefix="/run", tags=["run"]) + + +def _add_in_filter( + filters: list[str], + params: dict[str, Any], + column: str, + param_prefix: str, + values: list[int], +) -> None: + """Append an IN filter clause and its bind parameters to the query builder. + + Builds named placeholders (:prefix_0, :prefix_1, ...) for safe binding + of multiple integer values without SQL injection risk. + + Args: + filters: List of WHERE clause fragments to append to. + params: Bind parameter dict to update in-place. + column: SQL column expression (e.g. "r.rid", "a.implementation_id"). + param_prefix: Prefix for named bind params (e.g. "run_id", "flow_id"). + values: List of integer values to filter by. + + """ + placeholders = ", ".join(f":{param_prefix}_{i}" for i in range(len(values))) + filters.append(f"{column} IN ({placeholders})") + params |= {f"{param_prefix}_{i}": v for i, v in enumerate(values)} + + +@router.post(path="/list", description="Provided for convenience, same as `GET` endpoint.") +@router.get(path="/list") +async def list_runs( # noqa: PLR0913 + pagination: Annotated[Pagination, Body(default_factory=Pagination)], + run_id: Annotated[ + list[int] | None, + Body( + description="The run(s) to include in the search. " + "If none are specified, all runs are included.", + ), + ] = None, + task_id: Annotated[ + list[int] | None, + Body(description="Only include runs for these task id(s)."), + ] = None, + flow_id: Annotated[ + list[int] | None, + Body(description="Only include runs using these flow id(s)."), + ] = None, + setup_id: Annotated[ + list[int] | None, + Body(description="Only include runs with these setup id(s)."), + ] = None, + uploader: Annotated[ + list[int] | None, + Body(description="Only include runs uploaded by these user id(s)."), + ] = None, + tag: Annotated[str | None, SystemString64] = None, + expdb: Annotated[AsyncConnection, Depends(expdb_connection)] = None, +) -> list[dict[str, Any]]: + """List runs, optionally filtered by one or more criteria. + + Filters are combinable — all provided filters are applied with AND logic. + List filters (run_id, task_id, flow_id, setup_id, uploader) accept multiple + values and are applied with IN logic within each filter. + + Returns a flat list of run objects. Raises 404 if no runs match the filters. + + PHP equivalent: GET /run/list/[run/{ids}][/task/{ids}][/flow/{ids}]... + Note: Unlike PHP (which requires at least one filter), this endpoint allows + an empty filter set and returns all runs paginated. + """ + filters: list[str] = [] + params: dict[str, Any] = {"limit": pagination.limit, "offset": pagination.offset} + + # Each list filter maps a user-facing param to a SQL column. + # flow_id maps to algorithm_setup.implementation_id (aliased as `a`). + # setup_id maps to run.setup — the FK column stored on the run row. + if run_id: + _add_in_filter(filters, params, "r.rid", "run_id", run_id) + if task_id: + _add_in_filter(filters, params, "r.task_id", "task_id", task_id) + if flow_id: + _add_in_filter(filters, params, "a.implementation_id", "flow_id", flow_id) + if setup_id: + _add_in_filter(filters, params, "r.setup", "setup_id", setup_id) + if uploader: + _add_in_filter(filters, params, "r.uploader", "uploader", uploader) + + if tag is not None: + # run_tag.id is the run FK (not a surrogate PK), so we join on run.rid + filters.append("r.rid IN (SELECT id FROM run_tag WHERE tag = :tag)") + params["tag"] = tag + + where_clause = f"WHERE {' AND '.join(filters)}" if filters else "" + + query = text( + f""" + SELECT + r.rid AS run_id, + r.task_id AS task_id, + r.setup AS setup_id, + a.implementation_id AS flow_id, + r.uploader AS uploader, + r.start_time AS upload_time, + IFNULL(r.error_message, '') AS error_message, + IFNULL(r.run_details, '') AS run_details + FROM run r + JOIN algorithm_setup a ON r.setup = a.sid + {where_clause} + LIMIT :limit OFFSET :offset + """, # noqa: S608 + ) + + result = await expdb.execute(query, params) + rows = result.mappings().all() + + if not rows: + msg = "No runs match the search criteria." + raise NoResultsError(msg) + + # SQLAlchemy returns start_time as a datetime object. Format to match PHP + # response shape: "YYYY-MM-DD HH:MM:SS" (no T separator, no timezone). + # dict unpacking with a later key overrides the earlier one from **row. + return [ + {**row, "upload_time": row["upload_time"].strftime("%Y-%m-%d %H:%M:%S")} for row in rows + ] diff --git a/tests/routers/openml/runs_list_test.py b/tests/routers/openml/runs_list_test.py new file mode 100644 index 00000000..180056e1 --- /dev/null +++ b/tests/routers/openml/runs_list_test.py @@ -0,0 +1,276 @@ +"""Tests for GET/POST /run/list endpoint. + +Test data available in DB (verified): + run 24: task_id=115, setup_id=2, flow_id=19, uploader=1159 + run 25: task_id=115, setup_id=3, flow_id=19, uploader=1159 + run 26: task_id=11, setup_id=5, flow_id=24, uploader=1159 + run 28: task_id=801, setup_id=24, flow_id=73, uploader=1159 + ... (many more, all uploader=1159) + +All runs have tags: ["openml-python", "Sklearn_X.X.X."] +""" + +import re +from http import HTTPStatus + +import httpx + +RUN_ID_26 = 26 +RUN_ID_24 = 24 +RUN_ID_25 = 25 +RUN_ID_28 = 28 + +TASK_ID_115 = 115 +TASK_ID_11 = 11 + +FLOW_ID_19 = 19 +FLOW_ID_24 = 24 + +SETUP_ID_2 = 2 + +UPLOADER_1159 = 1159 + +EXPECTED_FIELDS = { + "run_id", + "task_id", + "setup_id", + "flow_id", + "uploader", + "upload_time", + "error_message", + "run_details", +} + + +def assert_valid_run(run: dict[str, object]) -> None: + """Assert that a run dict has all expected fields with correct types.""" + assert set(run.keys()) == EXPECTED_FIELDS, f"Unexpected fields: {set(run.keys())}" + assert isinstance(run["run_id"], int) + assert isinstance(run["task_id"], int) + assert isinstance(run["setup_id"], int) + assert isinstance(run["flow_id"], int) + assert isinstance(run["uploader"], int) + assert isinstance(run["upload_time"], str) + assert isinstance(run["error_message"], str) + assert isinstance(run["run_details"], str) + # upload_time must match PHP format: "YYYY-MM-DD HH:MM:SS" (no T, no timezone) + assert re.match(r"^\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}$", run["upload_time"]), ( + f"upload_time format mismatch: {run['upload_time']!r}" + ) + + +def assert_no_results_error(response: httpx.Response) -> None: + """Assert that a response is a 404 NoResultsError with code 372.""" + assert response.status_code == HTTPStatus.NOT_FOUND + assert response.json()["code"] == "372" + + +async def test_get_and_post_list_runs_return_same_results(py_api: httpx.AsyncClient) -> None: + """GET and POST /run/list with no filters must return identical results.""" + get_resp = await py_api.get("/run/list") + post_resp = await py_api.post("/run/list", json={}) + assert get_resp.status_code == HTTPStatus.OK + assert post_resp.status_code == HTTPStatus.OK + assert get_resp.json() == post_resp.json() + + +async def test_list_runs_no_filter_returns_all_runs(py_api: httpx.AsyncClient) -> None: + """No filter returns all runs in DB, paginated by default limit.""" + response = await py_api.get("/run/list") + assert response.status_code == HTTPStatus.OK + runs = response.json() + assert isinstance(runs, list) + assert len(runs) > 0 + assert {RUN_ID_24, RUN_ID_25, RUN_ID_26, RUN_ID_28} <= {r["run_id"] for r in runs} + + +async def test_list_runs_no_filter_all_fields_valid(py_api: httpx.AsyncClient) -> None: + """Every run in no-filter response must have all fields with correct types.""" + response = await py_api.get("/run/list") + assert response.status_code == HTTPStatus.OK + for run in response.json(): + assert_valid_run(run) + + +async def test_list_runs_filter_single_run_id(py_api: httpx.AsyncClient) -> None: + """Filter by a single run_id returns exactly that run with correct field values.""" + response = await py_api.post("/run/list", json={"run_id": [RUN_ID_26]}) + assert response.status_code == HTTPStatus.OK + runs = response.json() + assert len(runs) == 1 + assert runs[0] == { + "run_id": RUN_ID_26, + "task_id": TASK_ID_11, + "setup_id": 5, + "flow_id": FLOW_ID_24, + "uploader": UPLOADER_1159, + "upload_time": "2024-01-04 10:45:03", + "error_message": "", + "run_details": "", + } + + +async def test_list_runs_filter_multiple_run_ids(py_api: httpx.AsyncClient) -> None: + """Filter by multiple run_ids returns exactly those runs.""" + response = await py_api.post("/run/list", json={"run_id": [RUN_ID_24, RUN_ID_26]}) + assert response.status_code == HTTPStatus.OK + assert {r["run_id"] for r in response.json()} == {RUN_ID_24, RUN_ID_26} + + +async def test_list_runs_filter_run_id_not_found(py_api: httpx.AsyncClient) -> None: + """Non-existent run_id returns 404 NoResultsError.""" + assert_no_results_error(await py_api.post("/run/list", json={"run_id": [999999]})) + + +async def test_list_runs_filter_task_id(py_api: httpx.AsyncClient) -> None: + """Filter by task_id returns only runs for that task.""" + response = await py_api.post("/run/list", json={"task_id": [TASK_ID_115]}) + assert response.status_code == HTTPStatus.OK + runs = response.json() + assert all(r["task_id"] == TASK_ID_115 for r in runs) + assert {RUN_ID_24, RUN_ID_25} <= {r["run_id"] for r in runs} + + +async def test_list_runs_filter_multiple_task_ids(py_api: httpx.AsyncClient) -> None: + """Filter by multiple task_ids returns runs for any of those tasks.""" + response = await py_api.post("/run/list", json={"task_id": [TASK_ID_115, TASK_ID_11]}) + assert response.status_code == HTTPStatus.OK + runs = response.json() + assert all(r["task_id"] in {TASK_ID_115, TASK_ID_11} for r in runs) + assert {RUN_ID_24, RUN_ID_25, RUN_ID_26} <= {r["run_id"] for r in runs} + + +async def test_list_runs_filter_task_id_not_found(py_api: httpx.AsyncClient) -> None: + """Non-existent task_id returns 404.""" + assert_no_results_error(await py_api.post("/run/list", json={"task_id": [999999]})) + + +async def test_list_runs_filter_flow_id(py_api: httpx.AsyncClient) -> None: + """Filter by flow_id returns only runs using that flow.""" + response = await py_api.post("/run/list", json={"flow_id": [FLOW_ID_19]}) + assert response.status_code == HTTPStatus.OK + runs = response.json() + assert all(r["flow_id"] == FLOW_ID_19 for r in runs) + assert {RUN_ID_24, RUN_ID_25} <= {r["run_id"] for r in runs} + + +async def test_list_runs_filter_flow_id_not_found(py_api: httpx.AsyncClient) -> None: + """Non-existent flow_id returns 404.""" + assert_no_results_error(await py_api.post("/run/list", json={"flow_id": [999999]})) + + +async def test_list_runs_filter_setup_id(py_api: httpx.AsyncClient) -> None: + """Filter by setup_id returns only runs with that setup.""" + response = await py_api.post("/run/list", json={"setup_id": [SETUP_ID_2]}) + assert response.status_code == HTTPStatus.OK + runs = response.json() + assert all(r["setup_id"] == SETUP_ID_2 for r in runs) + assert len(runs) == 1 + assert runs[0]["run_id"] == RUN_ID_24 + + +async def test_list_runs_filter_setup_id_not_found(py_api: httpx.AsyncClient) -> None: + """Non-existent setup_id returns 404.""" + assert_no_results_error(await py_api.post("/run/list", json={"setup_id": [999999]})) + + +async def test_list_runs_filter_uploader(py_api: httpx.AsyncClient) -> None: + """Filter by uploader returns only runs from that user.""" + response = await py_api.post("/run/list", json={"uploader": [UPLOADER_1159]}) + assert response.status_code == HTTPStatus.OK + runs = response.json() + assert len(runs) > 0 + assert all(r["uploader"] == UPLOADER_1159 for r in runs) + + +async def test_list_runs_filter_uploader_not_found(py_api: httpx.AsyncClient) -> None: + """Non-existent uploader returns 404.""" + assert_no_results_error(await py_api.post("/run/list", json={"uploader": [999999]})) + + +async def test_list_runs_filter_tag(py_api: httpx.AsyncClient) -> None: + """Filter by tag returns only runs tagged with that value.""" + response = await py_api.post("/run/list", json={"tag": "openml-python"}) + assert response.status_code == HTTPStatus.OK + runs = response.json() + assert len(runs) > 0 + assert {RUN_ID_24, RUN_ID_25, RUN_ID_26} <= {r["run_id"] for r in runs} + + +async def test_list_runs_filter_tag_not_found(py_api: httpx.AsyncClient) -> None: + """Non-existent tag returns 404.""" + assert_no_results_error( + await py_api.post("/run/list", json={"tag": "nonexistent-tag-xyz"}), + ) + + +async def test_list_runs_filter_tag_invalid_format(py_api: httpx.AsyncClient) -> None: + """Tag containing spaces (invalid per SystemString64) returns 422.""" + response = await py_api.post("/run/list", json={"tag": "invalid tag with spaces"}) + assert response.status_code == HTTPStatus.UNPROCESSABLE_ENTITY + + +async def test_list_runs_combined_task_and_flow(py_api: httpx.AsyncClient) -> None: + """task_id + flow_id combined narrows results to intersection.""" + response = await py_api.post( + "/run/list", + json={"task_id": [TASK_ID_115], "flow_id": [FLOW_ID_19]}, + ) + assert response.status_code == HTTPStatus.OK + runs = response.json() + assert all(r["task_id"] == TASK_ID_115 and r["flow_id"] == FLOW_ID_19 for r in runs) + + +async def test_list_runs_combined_filters_no_match(py_api: httpx.AsyncClient) -> None: + """Filters with no common run return 404. + + Runs with task_id=115 all have flow_id=19. + Run 26 has flow_id=24 but task_id=11, not 115. + No run satisfies both task_id=115 AND flow_id=24. + """ + assert_no_results_error( + await py_api.post("/run/list", json={"task_id": [TASK_ID_115], "flow_id": [FLOW_ID_24]}), + ) + + +async def test_list_runs_combined_run_id_and_matching_task_id(py_api: httpx.AsyncClient) -> None: + """run_id + correct task_id returns the run.""" + response = await py_api.post( + "/run/list", + json={"run_id": [RUN_ID_26], "task_id": [TASK_ID_11]}, + ) + assert response.status_code == HTTPStatus.OK + assert response.json()[0]["run_id"] == RUN_ID_26 + + +async def test_list_runs_combined_run_id_and_mismatched_task_id(py_api: httpx.AsyncClient) -> None: + """run_id + wrong task_id (AND logic) returns 404. + + run 26 has task_id=11, not 115 — combination yields no rows. + """ + assert_no_results_error( + await py_api.post("/run/list", json={"run_id": [RUN_ID_26], "task_id": [TASK_ID_115]}), + ) + + +async def test_list_runs_pagination_limit(py_api: httpx.AsyncClient) -> None: + """Pagination limit=1 returns exactly 1 run.""" + response = await py_api.post("/run/list", json={"pagination": {"limit": 1, "offset": 0}}) + assert response.status_code == HTTPStatus.OK + assert len(response.json()) == 1 + + +async def test_list_runs_pagination_offset(py_api: httpx.AsyncClient) -> None: + """Different offsets return different runs.""" + resp_0 = await py_api.post("/run/list", json={"pagination": {"limit": 1, "offset": 0}}) + resp_1 = await py_api.post("/run/list", json={"pagination": {"limit": 1, "offset": 1}}) + assert resp_0.status_code == HTTPStatus.OK + assert resp_1.status_code == HTTPStatus.OK + assert resp_0.json()[0]["run_id"] != resp_1.json()[0]["run_id"] + + +async def test_list_runs_pagination_offset_beyond_results(py_api: httpx.AsyncClient) -> None: + """Offset beyond total number of runs returns 404.""" + assert_no_results_error( + await py_api.post("/run/list", json={"pagination": {"limit": 100, "offset": 999999}}), + ) From 366f27317a2030dcfb0f2ffdae66a464688c9da9 Mon Sep 17 00:00:00 2001 From: saathviksheerla Date: Wed, 18 Mar 2026 10:33:09 +0530 Subject: [PATCH 2/2] fix: add ORDER BY r.rid for deterministic pagination in run list --- src/routers/openml/runs.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/routers/openml/runs.py b/src/routers/openml/runs.py index 70a7c511..0cef25ea 100644 --- a/src/routers/openml/runs.py +++ b/src/routers/openml/runs.py @@ -118,6 +118,7 @@ async def list_runs( # noqa: PLR0913 FROM run r JOIN algorithm_setup a ON r.setup = a.sid {where_clause} + ORDER BY r.rid LIMIT :limit OFFSET :offset """, # noqa: S608 )