TranslatorSRI · gaurav · Jan 8, 2026 · Jan 8, 2026 · Jan 8, 2026 · Jan 12, 2026
diff --git a/.gitignore b/.gitignore
@@ -1,3 +1,6 @@
+# Ignore the root .env file.
+/.env
+
 # Ignore all data files.
 data/
 

diff --git a/CLAUDE.md b/CLAUDE.md
@@ -46,21 +46,31 @@ cd website && npm install && npm run dev   # Dev server at localhost:4321
 
 ## Architecture
 
+### Library (`src/babel_validation/`)
+
+Shared library code used by the tests and potentially other consumers.
+
+- `core/testrow.py` — `TestRow` dataclass (models a single Google Sheet test row), `TestStatus` enum, `TestResult` dataclass
+- `services/nodenorm.py` — `CachedNodeNorm`: wraps the NodeNorm `get_normalized_nodes` API with per-instance caching
+- `services/nameres.py` — `CachedNameRes`: wraps the NameRes `lookup`/`bulk-lookup` APIs with per-instance caching
+- `sources/google_sheets/google_sheet_test_cases.py` — `GoogleSheetTestCases`: downloads and parses the shared Google Sheet into `TestRow` instances and pytest `ParameterSet` lists
+- `sources/github/github_issues_test_cases.py` — `GitHubIssueTest` and `GitHubIssuesTestCases`: pull test cases embedded in GitHub issues (wiki or YAML syntax) and evaluate them against NodeNorm/NameRes
+
 ### Test Framework (`tests/`)
 
 The core of this project. Tests validate NodeNorm and NameRes services across multiple deployment environments.
 
 **Target system:** `tests/targets.ini` defines endpoints for each environment (dev, prod, test, ci, exp, localhost). Tests use `target_info` fixture to get URLs. The `conftest.py` parametrizes tests across targets via `--target` CLI option; default is `dev`.
 
-**Google Sheet integration:** ~2000+ test cases are pulled from a [shared Google Sheet](https://docs.google.com/spreadsheets/d/11zebx8Qs1Tc3ShQR9nh4HRW8QSoo8k65w_xIaftN0no/). `tests/common/google_sheet_test_cases.py` fetches and parses these into `TestRow` dataclasses. Rows marked as not expected to pass are wrapped with `pytest.mark.xfail(strict=True)`. Tests are parametrized by row, with IDs like `gsheet:row=42`.
+**Google Sheet integration:** ~2000+ test cases are pulled from a [shared Google Sheet](https://docs.google.com/spreadsheets/d/11zebx8Qs1Tc3ShQR9nh4HRW8QSoo8k65w_xIaftN0no/). `src/babel_validation/sources/google_sheets/google_sheet_test_cases.py` fetches and parses these into `TestRow` dataclasses. Rows marked as not expected to pass are wrapped with `pytest.mark.xfail(strict=True)`. Tests are parametrized by row, with IDs like `gsheet:row=42`.
 
 **Category filtering:** Google Sheet rows have a Category column. The `test_category` fixture (from conftest.py) returns a callable that tests use to `pytest.skip()` rows not matching `--category`/`--category-exclude` filters.
 
 **Test modules:**
 - `tests/nodenorm/` — NodeNorm tests (normalization accuracy, preferred IDs/labels, Biolink types, conflation, descriptions, OpenAPI spec, setid endpoint)
 - `tests/nameres/` — NameRes tests (label lookup, autocomplete, Biolink type filtering, blocklist, taxon_specific flag)
-- `tests/nodenorm/by_issue/` — Tests tied to specific GitHub issues
-- `tests/common/` — Shared utilities (`GoogleSheetTestCases`, `TestRow`)
+- `tests/nodenorm/by_issue/` — Per-issue regression tests for NodeNorm (hand-written)
+- `tests/github_issues/` — Dynamically-generated tests pulled from GitHub issue bodies via `GitHubIssuesTestCases`
 
 ### Web Applications
 
@@ -79,4 +89,6 @@ When writing new tests:
 - Use the `target_info` fixture to get NodeNorm/NameRes URLs from targets.ini
 - For Google Sheet-based tests, parametrize with `gsheet.test_rows()` and use the `test_category` fixture for category filtering
 - Use `pytest.mark.xfail(strict=True)` for known failures (strict=True means unexpected passes also fail)
-- Issue-specific tests go in `tests/nodenorm/by_issue/` or `tests/github_issues/`
+- Hand-written per-issue regression tests go in `tests/nodenorm/by_issue/`
+- GitHub-issue-driven tests are picked up automatically by `tests/github_issues/test_github_issues.py` via `GitHubIssuesTestCases`
+- Import shared classes from `src.babel_validation.*` (e.g. `from src.babel_validation.services.nodenorm import CachedNodeNorm`)
diff --git a/pyproject.toml b/pyproject.toml
@@ -8,9 +8,14 @@ requires-python = ">=3.11"
 dependencies = [
     "black>=25.9.0",
     "deepdiff>=8.6.1",
+    "dotenv>=0.9.9",
     "openapi-spec-validator>=0.7.2",
-    "pytest>=8.4.2",
+    "pygithub>=2.8.1",
+    "pytest>=9.0",
+    "pytest-xdist[psutil]",
+    "filelock",
     "requests>=2.32.5",
+    "tqdm>=4.67.1",
 ]
 
 [project.urls]

diff --git a/tests/common/__init__.py → src/__init__.py b/tests/common/__init__.py → src/__init__.py
diff --git a/src/babel_validation/__init__.py b/src/babel_validation/__init__.py
diff --git a/src/babel_validation/assertions/README.md b/src/babel_validation/assertions/README.md
@@ -0,0 +1,186 @@
+<!-- AUTO-GENERATED — do not edit by hand.
+     Regenerate with: uv run python -m src.babel_validation.assertions.gen_docs -->
+
+# BabelTest Assertion Types
+
+This package defines the assertion types that can be embedded in GitHub issue bodies and evaluated against the NodeNorm and NameRes services.
+
+## Embedding Tests in Issues
+
+Two syntaxes are supported:
+
+**Wiki syntax** (one assertion per line):
+```
+{{BabelTest|AssertionType|param1|param2|...}}
+```
+
+**YAML syntax** (multiple assertions, multiple param sets):
+````
+```yaml
+babel_tests:
+  AssertionType:
+    - param1
+    - [param1, param2]
+```
+````
+
+Assertion names are case-insensitive.
+
+---
+
+## NodeNorm Assertions
+
+These assertions test the [NodeNorm](https://nodenorm.transltr.io/docs) service.
+
+### Resolves
+
+**Applies to:** NodeNorm
+
+Each CURIE in each param_set must resolve to a non-null result in NodeNorm.
+
+**Parameters:** One or more CURIEs per param_set.
+
+**Wiki syntax:**
+```
+{{BabelTest|Resolves|CHEBI:15365}}
+{{BabelTest|Resolves|MONDO:0005015|DOID:9351}}
+```
+
+**YAML syntax:**
+```yaml
+babel_tests:
+  Resolves:
+    - CHEBI:15365
+    - [MONDO:0005015, DOID:9351]
+```
+
+---
+
+### DoesNotResolve
+
+**Applies to:** NodeNorm
+
+Each CURIE in each param_set must fail to resolve (return null) in NodeNorm. Use this to confirm that an identifier is intentionally not normalizable.
+
+**Parameters:** One or more CURIEs per param_set.
+
+**Wiki syntax:**
+```
+{{BabelTest|DoesNotResolve|FAKENS:99999}}
+```
+
+**YAML syntax:**
+```yaml
+babel_tests:
+  DoesNotResolve:
+    - FAKENS:99999
+```
+
+---
+
+### ResolvesWith
+
+**Applies to:** NodeNorm
+
+All CURIEs within each param_set must resolve to the identical normalized result. Use this to assert that two identifiers are equivalent.
+
+**Parameters:** Two or more CURIEs per param_set. All must resolve to the same result.
+
+**Wiki syntax:**
+```
+{{BabelTest|ResolvesWith|CHEBI:15365|PUBCHEM.COMPOUND:1}}
+```
+
+**YAML syntax:**
+```yaml
+babel_tests:
+  ResolvesWith:
+    - [CHEBI:15365, PUBCHEM.COMPOUND:1]
+    - [MONDO:0005015, DOID:9351]
+```
+
+---
+
+### ResolvesWithType
+
+**Applies to:** NodeNorm
+
+Each param_set must have at least two elements: the first is the expected Biolink type (e.g. 'biolink:Gene'), and the remainder are CURIEs that must resolve with that type.
+
+**Parameters:** Each param_set: first element is the expected Biolink type (e.g. `biolink:Gene`), remaining elements are CURIEs.
+
+**Wiki syntax:**
+```
+{{BabelTest|ResolvesWithType|biolink:Gene|NCBIGene:1}}
+```
+
+**YAML syntax:**
+```yaml
+babel_tests:
+  ResolvesWithType:
+    - [biolink:Gene, NCBIGene:1, HGNC:5]
+```
+
+---
+
+## NameRes Assertions
+
+These assertions test the [NameRes](https://name-lookup.transltr.io/docs) service.
+
+### SearchByName
+
+**Applies to:** NameRes
+
+Each param_set must have at least two elements: a search query string and an expected CURIE. The test passes if the CURIE's normalized identifier appears within the top N results (default N=5) when NameRes looks up the search query.
+
+**Parameters:** Each param_set: the **search query string** and the **expected CURIE**. The CURIE is normalized via NodeNorm (drug/chemical conflation enabled) before matching.
+
+**Wiki syntax:**
+```
+{{BabelTest|SearchByName|water|CHEBI:15377}}
+```
+
+**YAML syntax:**
+```yaml
+babel_tests:
+  SearchByName:
+    - [water, CHEBI:15377]
+    - [diabetes, MONDO:0005015]
+```
+
+---
+
+## Special Assertions
+
+### Needed
+
+**Applies to:** NodeNorm and NameRes
+
+Marks an issue as needing a test — always fails as a reminder to add real assertions.
+
+**Wiki syntax:**
+```
+{{BabelTest|Needed}}
+```
+
+**YAML syntax:**
+```yaml
+babel_tests:
+  Needed:
+    - placeholder
+```
+
+---
+
+## Adding a New Assertion Type
+
+1. Choose the right module:
+   - `nodenorm.py` — for NodeNorm-only assertions (subclass `NodeNormTest`, override `test_param_set`)
+   - `nameres.py` — for NameRes-only assertions (subclass `NameResTest`, override `test_param_set`)
+   - `common.py` — for assertions that apply to both services (subclass `AssertionHandler`, override `test_with_nodenorm` and/or `test_with_nameres`)
+
+2. Define the class with `NAME`, `DESCRIPTION`, `PARAMETERS`, `WIKI_EXAMPLES`, `YAML_PARAMS`, and `test_param_set()` (or both `test_with_*` methods for `AssertionHandler` subclasses).
+
+3. Import it in `__init__.py` and add an instance to `ASSERTION_HANDLERS`.
+
+4. Run `uv run python -m src.babel_validation.assertions.gen_docs` to regenerate `README.md`.
diff --git a/src/babel_validation/assertions/__init__.py b/src/babel_validation/assertions/__init__.py
@@ -0,0 +1,132 @@
+"""
+babel_validation.assertions
+===========================
+
+This package defines the assertion types that can be embedded in GitHub issue bodies
+and evaluated against the NodeNorm and NameRes services.
+
+Supported assertion types are registered in ASSERTION_HANDLERS. To see everything
+that is currently supported, scan that dict or read assertions/README.md (auto-generated).
+
+Adding a new assertion type
+---------------------------
+1. Create a subclass of NodeNormTest or NameResTest (or AssertionHandler for both)
+   in the appropriate module (nodenorm.py, nameres.py, or common.py).
+2. Set NAME and DESCRIPTION class attributes.
+3. Set PARAMETERS, WIKI_EXAMPLES, and YAML_PARAMS class attributes for documentation.
+4. Override test_param_set().
+5. Import it here and add an instance to ASSERTION_HANDLERS.
+6. Run `uv run python -m src.babel_validation.assertions.gen_docs` to regenerate README.md.
+"""
+
+from typing import Iterator
+
+from src.babel_validation.core.testrow import TestResult, TestStatus
+
+
+class AssertionHandler:
+    """Base class for all BabelTest assertion handlers."""
+    NAME: str           # lowercase assertion name as used in issue bodies
+    DESCRIPTION: str    # one-line human-readable description
+
+    def passed(self, message: str) -> TestResult:
+        return TestResult(status=TestStatus.Passed, message=message)
+
+    def failed(self, message: str) -> TestResult:
+        return TestResult(status=TestStatus.Failed, message=message)
+
+    def test_with_nodenorm(self, param_sets: list[list[str]], nodenorm,
+                           label: str = "") -> Iterator[TestResult]:
+        """Evaluate this assertion against NodeNorm. Returns [] if not applicable."""
+        return []
+
+    def test_with_nameres(self, param_sets: list[list[str]], nodenorm, nameres,
+                          pass_if_found_in_top: int = 5,
+                          label: str = "") -> Iterator[TestResult]:
+        """Evaluate this assertion against NameRes. Returns [] if not applicable."""
+        return []
+
+
+class NodeNormTest(AssertionHandler):
+    """Base class for assertions that test NodeNorm.
+
+    Subclasses implement test_param_set() instead of test_with_nodenorm().
+    """
+
+    def test_with_nodenorm(self, param_sets: list[list[str]], nodenorm,
+                           label: str = "") -> Iterator[TestResult]:
+        if not param_sets:
+            yield self.failed(f"No parameters provided in {label}")
+            return
+        # warm the cache for all CURIEs up front
+        nodenorm.normalize_curies([p for params in param_sets for p in params])
+        found = False
+        for index, params in enumerate(param_sets):
+            if not params:
+                yield self.failed(f"No parameters in param_set {index} in {label}")
+                found = True
+                continue
+            for result in self.test_param_set(params, nodenorm, label):
+                found = True
+                yield result
+        if not found:
+            yield self.failed(f"No test results returned in {label}")
+
+    def test_param_set(self, params: list[str], nodenorm, label: str = "") -> Iterator[TestResult]:
+        """Override this to implement the assertion. Called once per param_set."""
+        raise NotImplementedError
+
+    def resolved_message(self, curie: str, result: dict, nodenorm) -> str:
+        """Standard pass-message when a CURIE resolves."""
+        return (f"Resolved {curie} to {result['id']['identifier']} "
+                f"({result['type'][0]}, \"{result['id']['label']}\") "
+                f"with NodeNormalization service {nodenorm}")
+
+
+class NameResTest(AssertionHandler):
+    """Base class for assertions that test NameRes.
+
+    Subclasses implement test_param_set() instead of test_with_nameres().
+    """
+
+    def test_with_nameres(self, param_sets: list[list[str]], nodenorm, nameres,
+                          pass_if_found_in_top: int = 5,
+                          label: str = "") -> Iterator[TestResult]:
+        if not param_sets:
+            yield self.failed(f"No parameters provided in {label}")
+            return
+        found = False
+        for index, params in enumerate(param_sets):
+            if not params:
+                yield self.failed(f"No parameters in param_set {index} in {label}")
+                found = True
+                continue
+            for result in self.test_param_set(params, nodenorm, nameres, pass_if_found_in_top, label):
+                found = True
+                yield result
+        if not found:
+            yield self.failed(f"No test results returned in {label}")
+
+    def test_param_set(self, params: list[str], nodenorm, nameres,
+                       pass_if_found_in_top: int, label: str = "") -> Iterator[TestResult]:
+        """Override this to implement the assertion. Called once per param_set."""
+        raise NotImplementedError
+
+
+# Registry — import submodules after base classes are defined to avoid circular imports.
+from src.babel_validation.assertions.nodenorm import (  # noqa: E402
+    ResolvesHandler, DoesNotResolveHandler, ResolvesWithHandler, ResolvesWithTypeHandler,
+)
+from src.babel_validation.assertions.nameres import SearchByNameHandler  # noqa: E402
+from src.babel_validation.assertions.common import NeededHandler  # noqa: E402
+
+ASSERTION_HANDLERS: dict[str, AssertionHandler] = {
+    h.NAME: h for h in [
+        ResolvesHandler(),
+        DoesNotResolveHandler(),
+        ResolvesWithHandler(),
+        ResolvesWithTypeHandler(),
+        SearchByNameHandler(),
+        NeededHandler(),
+    ]
+}