From 42bc467b690383aaa62dd3a3123ce0480db8f3c6 Mon Sep 17 00:00:00 2001
From: saathviksheerla <saathvik.sheerla@gmail.com>
Date: Tue, 17 Mar 2026 22:04:20 +0530
Subject: [PATCH 1/2] feat: add GET /run/list endpoint (#39)

---
 src/main.py                            |   2 +
 src/routers/openml/runs.py             | 137 ++++++++++++
 tests/routers/openml/runs_list_test.py | 276 +++++++++++++++++++++++++
 3 files changed, 415 insertions(+)
 create mode 100644 src/routers/openml/runs.py
 create mode 100644 tests/routers/openml/runs_list_test.py

diff --git a/src/main.py b/src/main.py
index 76a52ad3..0a0c5460 100644
--- a/src/main.py
+++ b/src/main.py
@@ -15,6 +15,7 @@
 from routers.openml.evaluations import router as evaluationmeasures_router
 from routers.openml.flows import router as flows_router
 from routers.openml.qualities import router as qualities_router
+from routers.openml.runs import router as run_router
 from routers.openml.setups import router as setup_router
 from routers.openml.study import router as study_router
 from routers.openml.tasks import router as task_router
@@ -69,6 +70,7 @@ def create_api() -> FastAPI:
     app.include_router(task_router)
     app.include_router(flows_router)
     app.include_router(study_router)
+    app.include_router(run_router)
     app.include_router(setup_router)
     return app
 
diff --git a/src/routers/openml/runs.py b/src/routers/openml/runs.py
new file mode 100644
index 00000000..70a7c511
--- /dev/null
+++ b/src/routers/openml/runs.py
@@ -0,0 +1,137 @@
+"""Endpoints for OpenML Run resources."""
+
+from typing import Annotated, Any
+
+from fastapi import APIRouter, Body, Depends
+from sqlalchemy import text
+from sqlalchemy.ext.asyncio import AsyncConnection
+
+from core.errors import NoResultsError
+from routers.dependencies import Pagination, expdb_connection
+from routers.types import SystemString64
+
+router = APIRouter(prefix="/run", tags=["run"])
+
+
+def _add_in_filter(
+    filters: list[str],
+    params: dict[str, Any],
+    column: str,
+    param_prefix: str,
+    values: list[int],
+) -> None:
+    """Append an IN filter clause and its bind parameters to the query builder.
+
+    Builds named placeholders (:prefix_0, :prefix_1, ...) for safe binding
+    of multiple integer values without SQL injection risk.
+
+    Args:
+        filters: List of WHERE clause fragments to append to.
+        params: Bind parameter dict to update in-place.
+        column: SQL column expression (e.g. "r.rid", "a.implementation_id").
+        param_prefix: Prefix for named bind params (e.g. "run_id", "flow_id").
+        values: List of integer values to filter by.
+
+    """
+    placeholders = ", ".join(f":{param_prefix}_{i}" for i in range(len(values)))
+    filters.append(f"{column} IN ({placeholders})")
+    params |= {f"{param_prefix}_{i}": v for i, v in enumerate(values)}
+
+
+@router.post(path="/list", description="Provided for convenience, same as `GET` endpoint.")
+@router.get(path="/list")
+async def list_runs(  # noqa: PLR0913
+    pagination: Annotated[Pagination, Body(default_factory=Pagination)],
+    run_id: Annotated[
+        list[int] | None,
+        Body(
+            description="The run(s) to include in the search. "
+            "If none are specified, all runs are included.",
+        ),
+    ] = None,
+    task_id: Annotated[
+        list[int] | None,
+        Body(description="Only include runs for these task id(s)."),
+    ] = None,
+    flow_id: Annotated[
+        list[int] | None,
+        Body(description="Only include runs using these flow id(s)."),
+    ] = None,
+    setup_id: Annotated[
+        list[int] | None,
+        Body(description="Only include runs with these setup id(s)."),
+    ] = None,
+    uploader: Annotated[
+        list[int] | None,
+        Body(description="Only include runs uploaded by these user id(s)."),
+    ] = None,
+    tag: Annotated[str | None, SystemString64] = None,
+    expdb: Annotated[AsyncConnection, Depends(expdb_connection)] = None,
+) -> list[dict[str, Any]]:
+    """List runs, optionally filtered by one or more criteria.
+
+    Filters are combinable — all provided filters are applied with AND logic.
+    List filters (run_id, task_id, flow_id, setup_id, uploader) accept multiple
+    values and are applied with IN logic within each filter.
+
+    Returns a flat list of run objects. Raises 404 if no runs match the filters.
+
+    PHP equivalent: GET /run/list/[run/{ids}][/task/{ids}][/flow/{ids}]...
+    Note: Unlike PHP (which requires at least one filter), this endpoint allows
+    an empty filter set and returns all runs paginated.
+    """
+    filters: list[str] = []
+    params: dict[str, Any] = {"limit": pagination.limit, "offset": pagination.offset}
+
+    # Each list filter maps a user-facing param to a SQL column.
+    # flow_id maps to algorithm_setup.implementation_id (aliased as `a`).
+    # setup_id maps to run.setup — the FK column stored on the run row.
+    if run_id:
+        _add_in_filter(filters, params, "r.rid", "run_id", run_id)
+    if task_id:
+        _add_in_filter(filters, params, "r.task_id", "task_id", task_id)
+    if flow_id:
+        _add_in_filter(filters, params, "a.implementation_id", "flow_id", flow_id)
+    if setup_id:
+        _add_in_filter(filters, params, "r.setup", "setup_id", setup_id)
+    if uploader:
+        _add_in_filter(filters, params, "r.uploader", "uploader", uploader)
+
+    if tag is not None:
+        # run_tag.id is the run FK (not a surrogate PK), so we join on run.rid
+        filters.append("r.rid IN (SELECT id FROM run_tag WHERE tag = :tag)")
+        params["tag"] = tag
+
+    where_clause = f"WHERE {' AND '.join(filters)}" if filters else ""
+
+    query = text(
+        f"""
+        SELECT
+            r.rid                       AS run_id,
+            r.task_id                   AS task_id,
+            r.setup                     AS setup_id,
+            a.implementation_id         AS flow_id,
+            r.uploader                  AS uploader,
+            r.start_time                AS upload_time,
+            IFNULL(r.error_message, '') AS error_message,
+            IFNULL(r.run_details, '')   AS run_details
+        FROM run r
+        JOIN algorithm_setup a ON r.setup = a.sid
+        {where_clause}
+        LIMIT :limit OFFSET :offset
+        """,  # noqa: S608
+    )
+
+    result = await expdb.execute(query, params)
+    rows = result.mappings().all()
+
+    if not rows:
+        msg = "No runs match the search criteria."
+        raise NoResultsError(msg)
+
+    # SQLAlchemy returns start_time as a datetime object. Format to match PHP
+    # response shape: "YYYY-MM-DD HH:MM:SS" (no T separator, no timezone).
+    # dict unpacking with a later key overrides the earlier one from **row.
+    return [
+        {**row, "upload_time": row["upload_time"].strftime("%Y-%m-%d %H:%M:%S")} for row in rows
+    ]
diff --git a/tests/routers/openml/runs_list_test.py b/tests/routers/openml/runs_list_test.py
new file mode 100644
index 00000000..180056e1
--- /dev/null
+++ b/tests/routers/openml/runs_list_test.py
@@ -0,0 +1,276 @@
+"""Tests for GET/POST /run/list endpoint.
+
+Test data available in DB (verified):
+  run 24: task_id=115, setup_id=2,  flow_id=19,  uploader=1159
+  run 25: task_id=115, setup_id=3,  flow_id=19,  uploader=1159
+  run 26: task_id=11,  setup_id=5,  flow_id=24,  uploader=1159
+  run 28: task_id=801, setup_id=24, flow_id=73,  uploader=1159
+  ... (many more, all uploader=1159)
+
+All runs have tags: ["openml-python", "Sklearn_X.X.X."]
+"""
+
+import re
+from http import HTTPStatus
+
+import httpx
+
+RUN_ID_26 = 26
+RUN_ID_24 = 24
+RUN_ID_25 = 25
+RUN_ID_28 = 28
+
+TASK_ID_115 = 115
+TASK_ID_11 = 11
+
+FLOW_ID_19 = 19
+FLOW_ID_24 = 24
+
+SETUP_ID_2 = 2
+
+UPLOADER_1159 = 1159
+
+EXPECTED_FIELDS = {
+    "run_id",
+    "task_id",
+    "setup_id",
+    "flow_id",
+    "uploader",
+    "upload_time",
+    "error_message",
+    "run_details",
+}
+
+
+def assert_valid_run(run: dict[str, object]) -> None:
+    """Assert that a run dict has all expected fields with correct types."""
+    assert set(run.keys()) == EXPECTED_FIELDS, f"Unexpected fields: {set(run.keys())}"
+    assert isinstance(run["run_id"], int)
+    assert isinstance(run["task_id"], int)
+    assert isinstance(run["setup_id"], int)
+    assert isinstance(run["flow_id"], int)
+    assert isinstance(run["uploader"], int)
+    assert isinstance(run["upload_time"], str)
+    assert isinstance(run["error_message"], str)
+    assert isinstance(run["run_details"], str)
+    # upload_time must match PHP format: "YYYY-MM-DD HH:MM:SS" (no T, no timezone)
+    assert re.match(r"^\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}$", run["upload_time"]), (
+        f"upload_time format mismatch: {run['upload_time']!r}"
+    )
+
+
+def assert_no_results_error(response: httpx.Response) -> None:
+    """Assert that a response is a 404 NoResultsError with code 372."""
+    assert response.status_code == HTTPStatus.NOT_FOUND
+    assert response.json()["code"] == "372"
+
+
+async def test_get_and_post_list_runs_return_same_results(py_api: httpx.AsyncClient) -> None:
+    """GET and POST /run/list with no filters must return identical results."""
+    get_resp = await py_api.get("/run/list")
+    post_resp = await py_api.post("/run/list", json={})
+    assert get_resp.status_code == HTTPStatus.OK
+    assert post_resp.status_code == HTTPStatus.OK
+    assert get_resp.json() == post_resp.json()
+
+
+async def test_list_runs_no_filter_returns_all_runs(py_api: httpx.AsyncClient) -> None:
+    """No filter returns all runs in DB, paginated by default limit."""
+    response = await py_api.get("/run/list")
+    assert response.status_code == HTTPStatus.OK
+    runs = response.json()
+    assert isinstance(runs, list)
+    assert len(runs) > 0
+    assert {RUN_ID_24, RUN_ID_25, RUN_ID_26, RUN_ID_28} <= {r["run_id"] for r in runs}
+
+
+async def test_list_runs_no_filter_all_fields_valid(py_api: httpx.AsyncClient) -> None:
+    """Every run in no-filter response must have all fields with correct types."""
+    response = await py_api.get("/run/list")
+    assert response.status_code == HTTPStatus.OK
+    for run in response.json():
+        assert_valid_run(run)
+
+
+async def test_list_runs_filter_single_run_id(py_api: httpx.AsyncClient) -> None:
+    """Filter by a single run_id returns exactly that run with correct field values."""
+    response = await py_api.post("/run/list", json={"run_id": [RUN_ID_26]})
+    assert response.status_code == HTTPStatus.OK
+    runs = response.json()
+    assert len(runs) == 1
+    assert runs[0] == {
+        "run_id": RUN_ID_26,
+        "task_id": TASK_ID_11,
+        "setup_id": 5,
+        "flow_id": FLOW_ID_24,
+        "uploader": UPLOADER_1159,
+        "upload_time": "2024-01-04 10:45:03",
+        "error_message": "",
+        "run_details": "",
+    }
+
+
+async def test_list_runs_filter_multiple_run_ids(py_api: httpx.AsyncClient) -> None:
+    """Filter by multiple run_ids returns exactly those runs."""
+    response = await py_api.post("/run/list", json={"run_id": [RUN_ID_24, RUN_ID_26]})
+    assert response.status_code == HTTPStatus.OK
+    assert {r["run_id"] for r in response.json()} == {RUN_ID_24, RUN_ID_26}
+
+
+async def test_list_runs_filter_run_id_not_found(py_api: httpx.AsyncClient) -> None:
+    """Non-existent run_id returns 404 NoResultsError."""
+    assert_no_results_error(await py_api.post("/run/list", json={"run_id": [999999]}))
+
+
+async def test_list_runs_filter_task_id(py_api: httpx.AsyncClient) -> None:
+    """Filter by task_id returns only runs for that task."""
+    response = await py_api.post("/run/list", json={"task_id": [TASK_ID_115]})
+    assert response.status_code == HTTPStatus.OK
+    runs = response.json()
+    assert all(r["task_id"] == TASK_ID_115 for r in runs)
+    assert {RUN_ID_24, RUN_ID_25} <= {r["run_id"] for r in runs}
+
+
+async def test_list_runs_filter_multiple_task_ids(py_api: httpx.AsyncClient) -> None:
+    """Filter by multiple task_ids returns runs for any of those tasks."""
+    response = await py_api.post("/run/list", json={"task_id": [TASK_ID_115, TASK_ID_11]})
+    assert response.status_code == HTTPStatus.OK
+    runs = response.json()
+    assert all(r["task_id"] in {TASK_ID_115, TASK_ID_11} for r in runs)
+    assert {RUN_ID_24, RUN_ID_25, RUN_ID_26} <= {r["run_id"] for r in runs}
+
+
+async def test_list_runs_filter_task_id_not_found(py_api: httpx.AsyncClient) -> None:
+    """Non-existent task_id returns 404."""
+    assert_no_results_error(await py_api.post("/run/list", json={"task_id": [999999]}))
+
+
+async def test_list_runs_filter_flow_id(py_api: httpx.AsyncClient) -> None:
+    """Filter by flow_id returns only runs using that flow."""
+    response = await py_api.post("/run/list", json={"flow_id": [FLOW_ID_19]})
+    assert response.status_code == HTTPStatus.OK
+    runs = response.json()
+    assert all(r["flow_id"] == FLOW_ID_19 for r in runs)
+    assert {RUN_ID_24, RUN_ID_25} <= {r["run_id"] for r in runs}
+
+
+async def test_list_runs_filter_flow_id_not_found(py_api: httpx.AsyncClient) -> None:
+    """Non-existent flow_id returns 404."""
+    assert_no_results_error(await py_api.post("/run/list", json={"flow_id": [999999]}))
+
+
+async def test_list_runs_filter_setup_id(py_api: httpx.AsyncClient) -> None:
+    """Filter by setup_id returns only runs with that setup."""
+    response = await py_api.post("/run/list", json={"setup_id": [SETUP_ID_2]})
+    assert response.status_code == HTTPStatus.OK
+    runs = response.json()
+    assert all(r["setup_id"] == SETUP_ID_2 for r in runs)
+    assert len(runs) == 1
+    assert runs[0]["run_id"] == RUN_ID_24
+
+
+async def test_list_runs_filter_setup_id_not_found(py_api: httpx.AsyncClient) -> None:
+    """Non-existent setup_id returns 404."""
+    assert_no_results_error(await py_api.post("/run/list", json={"setup_id": [999999]}))
+
+
+async def test_list_runs_filter_uploader(py_api: httpx.AsyncClient) -> None:
+    """Filter by uploader returns only runs from that user."""
+    response = await py_api.post("/run/list", json={"uploader": [UPLOADER_1159]})
+    assert response.status_code == HTTPStatus.OK
+    runs = response.json()
+    assert len(runs) > 0
+    assert all(r["uploader"] == UPLOADER_1159 for r in runs)
+
+
+async def test_list_runs_filter_uploader_not_found(py_api: httpx.AsyncClient) -> None:
+    """Non-existent uploader returns 404."""
+    assert_no_results_error(await py_api.post("/run/list", json={"uploader": [999999]}))
+
+
+async def test_list_runs_filter_tag(py_api: httpx.AsyncClient) -> None:
+    """Filter by tag returns only runs tagged with that value."""
+    response = await py_api.post("/run/list", json={"tag": "openml-python"})
+    assert response.status_code == HTTPStatus.OK
+    runs = response.json()
+    assert len(runs) > 0
+    assert {RUN_ID_24, RUN_ID_25, RUN_ID_26} <= {r["run_id"] for r in runs}
+
+
+async def test_list_runs_filter_tag_not_found(py_api: httpx.AsyncClient) -> None:
+    """Non-existent tag returns 404."""
+    assert_no_results_error(
+        await py_api.post("/run/list", json={"tag": "nonexistent-tag-xyz"}),
+    )
+
+
+async def test_list_runs_filter_tag_invalid_format(py_api: httpx.AsyncClient) -> None:
+    """Tag containing spaces (invalid per SystemString64) returns 422."""
+    response = await py_api.post("/run/list", json={"tag": "invalid tag with spaces"})
+    assert response.status_code == HTTPStatus.UNPROCESSABLE_ENTITY
+
+
+async def test_list_runs_combined_task_and_flow(py_api: httpx.AsyncClient) -> None:
+    """task_id + flow_id combined narrows results to intersection."""
+    response = await py_api.post(
+        "/run/list",
+        json={"task_id": [TASK_ID_115], "flow_id": [FLOW_ID_19]},
+    )
+    assert response.status_code == HTTPStatus.OK
+    runs = response.json()
+    assert all(r["task_id"] == TASK_ID_115 and r["flow_id"] == FLOW_ID_19 for r in runs)
+
+
+async def test_list_runs_combined_filters_no_match(py_api: httpx.AsyncClient) -> None:
+    """Filters with no common run return 404.
+
+    Runs with task_id=115 all have flow_id=19.
+    Run 26 has flow_id=24 but task_id=11, not 115.
+    No run satisfies both task_id=115 AND flow_id=24.
+    """
+    assert_no_results_error(
+        await py_api.post("/run/list", json={"task_id": [TASK_ID_115], "flow_id": [FLOW_ID_24]}),
+    )
+
+
+async def test_list_runs_combined_run_id_and_matching_task_id(py_api: httpx.AsyncClient) -> None:
+    """run_id + correct task_id returns the run."""
+    response = await py_api.post(
+        "/run/list",
+        json={"run_id": [RUN_ID_26], "task_id": [TASK_ID_11]},
+    )
+    assert response.status_code == HTTPStatus.OK
+    assert response.json()[0]["run_id"] == RUN_ID_26
+
+
+async def test_list_runs_combined_run_id_and_mismatched_task_id(py_api: httpx.AsyncClient) -> None:
+    """run_id + wrong task_id (AND logic) returns 404.
+
+    run 26 has task_id=11, not 115 — combination yields no rows.
+    """
+    assert_no_results_error(
+        await py_api.post("/run/list", json={"run_id": [RUN_ID_26], "task_id": [TASK_ID_115]}),
+    )
+
+
+async def test_list_runs_pagination_limit(py_api: httpx.AsyncClient) -> None:
+    """Pagination limit=1 returns exactly 1 run."""
+    response = await py_api.post("/run/list", json={"pagination": {"limit": 1, "offset": 0}})
+    assert response.status_code == HTTPStatus.OK
+    assert len(response.json()) == 1
+
+
+async def test_list_runs_pagination_offset(py_api: httpx.AsyncClient) -> None:
+    """Different offsets return different runs."""
+    resp_0 = await py_api.post("/run/list", json={"pagination": {"limit": 1, "offset": 0}})
+    resp_1 = await py_api.post("/run/list", json={"pagination": {"limit": 1, "offset": 1}})
+    assert resp_0.status_code == HTTPStatus.OK
+    assert resp_1.status_code == HTTPStatus.OK
+    assert resp_0.json()[0]["run_id"] != resp_1.json()[0]["run_id"]
+
+
+async def test_list_runs_pagination_offset_beyond_results(py_api: httpx.AsyncClient) -> None:
+    """Offset beyond total number of runs returns 404."""
+    assert_no_results_error(
+        await py_api.post("/run/list", json={"pagination": {"limit": 100, "offset": 999999}}),
+    )

From 366f27317a2030dcfb0f2ffdae66a464688c9da9 Mon Sep 17 00:00:00 2001
From: saathviksheerla <saathvik.sheerla@gmail.com>
Date: Wed, 18 Mar 2026 10:33:09 +0530
Subject: [PATCH 2/2] fix: add ORDER BY r.rid for deterministic pagination in
 run list

---
 src/routers/openml/runs.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/routers/openml/runs.py b/src/routers/openml/runs.py
index 70a7c511..0cef25ea 100644
--- a/src/routers/openml/runs.py
+++ b/src/routers/openml/runs.py
@@ -118,6 +118,7 @@ async def list_runs(  # noqa: PLR0913
         FROM run r
         JOIN algorithm_setup a ON r.setup = a.sid
         {where_clause}
+        ORDER BY r.rid
         LIMIT :limit OFFSET :offset
         """,  # noqa: S608
     )