From aa6d8cea05d46821a536772714e32ba44640ee66 Mon Sep 17 00:00:00 2001 From: mchen-sentry Date: Thu, 9 Apr 2026 13:19:36 -0700 Subject: [PATCH 1/8] feat(ci): two-tier test split with service classification - service_classifier.py: hybrid static + runtime classification plugin that maps each test to its service dependencies (Snuba, Kafka, etc.) - classify-services.yml: workflow to generate classification across 22 shards - split-tests-by-tier.py: splits classification into tier1 (postgres-only) and tier2 (full Snuba stack) test lists - backend.yml: add split-tiers + backend-light jobs, wire backend-test to use tier2 list when classification is available - Selective testing (PRs) and tiers (master) are mutually exclusive --- .github/workflows/backend.yml | 142 +++++++++++++++++- .github/workflows/classify-services.yml | 90 +++++++++++ .../workflows/scripts/split-tests-by-tier.py | 75 +++++++++ src/sentry/testutils/pytest/__init__.py | 1 + .../testutils/pytest/service_classifier.py | 139 +++++++++++++++++ 5 files changed, 445 insertions(+), 2 deletions(-) create mode 100644 .github/workflows/classify-services.yml create mode 100644 .github/workflows/scripts/split-tests-by-tier.py create mode 100644 src/sentry/testutils/pytest/service_classifier.py diff --git a/.github/workflows/backend.yml b/.github/workflows/backend.yml index e3a2b8c1aff9d3..aaa320aa644f8a 100644 --- a/.github/workflows/backend.yml +++ b/.github/workflows/backend.yml @@ -200,13 +200,142 @@ jobs: run: | python3 .github/workflows/scripts/calculate-backend-test-shards.py + split-tiers: + if: >- + always() && + !cancelled() && + needs.files-changed.outputs.backend == 'true' && + needs.select-tests.outputs.has-selected-tests != 'true' + needs: [files-changed, select-tests] + name: split tests into tiers + runs-on: ubuntu-24.04 + timeout-minutes: 5 + permissions: + contents: read + actions: read + outputs: + has-tiers: ${{ steps.split.outputs.has-tiers }} + steps: + - uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 + + - name: Download classification report + id: download + env: + GH_TOKEN: ${{ github.token }} + run: | + RUN_ID=$(gh run list --workflow=classify-services.yml --status=success --limit=1 --json databaseId --jq '.[0].databaseId') + if [ -z "$RUN_ID" ]; then + echo "No classify-services run found, skipping tiers" + echo "has-tiers=false" >> "$GITHUB_OUTPUT" + exit 0 + fi + gh run download "$RUN_ID" --name test-service-classification --dir /tmp/classification + echo "has-tiers=true" >> "$GITHUB_OUTPUT" + + - name: Split tests by tier + id: split + if: steps.download.outputs.has-tiers == 'true' + run: | + python3 .github/workflows/scripts/split-tests-by-tier.py \ + --classification /tmp/classification/test-service-classification.json \ + --tier tier1 --output /tmp/backend-light-tests.txt + python3 .github/workflows/scripts/split-tests-by-tier.py \ + --classification /tmp/classification/test-service-classification.json \ + --tier tier2 --output /tmp/backend-tests.txt + echo "has-tiers=true" >> "$GITHUB_OUTPUT" + + - uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4 + if: steps.split.outputs.has-tiers == 'true' + with: + name: tier-file-lists + path: | + /tmp/backend-light-tests.txt + /tmp/backend-tests.txt + retention-days: 1 + + backend-light: + if: >- + always() && + !cancelled() && + needs.files-changed.outputs.backend == 'true' && + needs.split-tiers.outputs.has-tiers == 'true' + needs: [files-changed, split-tiers] + name: 'backend-light (${{ matrix.instance }})' + runs-on: ubuntu-24.04 + timeout-minutes: 30 + permissions: + contents: read + id-token: write + strategy: + fail-fast: false + matrix: + instance: [0, 1, 2, 3, 4] + env: + MATRIX_INSTANCE_TOTAL: 5 + TEST_GROUP_STRATEGY: roundrobin + PYTHONHASHSEED: '0' + SENTRY_SKIP_SELENIUM_PLUGIN: '1' + steps: + - uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 + + - uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4 + with: + name: tier-file-lists + path: /tmp/ + + - name: Setup sentry env + uses: ./.github/actions/setup-sentry + id: setup + with: + mode: migrations + + - name: Download odiff binary + run: | + curl -sL https://registry.npmjs.org/odiff-bin/-/odiff-bin-4.3.2.tgz \ + | tar -xz --strip-components=2 package/raw_binaries/odiff-linux-x64 + sudo install -m 755 odiff-linux-x64 /usr/local/bin/odiff + rm odiff-linux-x64 + + - name: Run backend-light tests + env: + SELECTED_TESTS_FILE: /tmp/backend-light-tests.txt + run: | + python3 -b -m pytest tests \ + --reuse-db \ + -n 4 \ + --dist=loadfile \ + --ignore tests/acceptance \ + --ignore tests/apidocs \ + --ignore tests/js \ + --ignore tests/tools \ + --json-report \ + --json-report-file=".artifacts/pytest.json" \ + --json-report-omit=log \ + --junit-xml=.artifacts/pytest.junit.xml \ + -o junit_suite_name=pytest-backend-light + + - name: Inspect failure + if: failure() + run: devservices logs 2>/dev/null || true + + - name: Collect test data + uses: ./.github/actions/collect-test-data + if: ${{ !cancelled() }} + with: + artifact_path: .artifacts/pytest.json + gcs_bucket: ${{ secrets.COLLECT_TEST_DATA_GCS_BUCKET }} + gcp_project_id: ${{ secrets.COLLECT_TEST_DATA_GCP_PROJECT_ID }} + workload_identity_provider: ${{ secrets.SENTRY_GCP_DEV_WORKLOAD_IDENTITY_POOL }} + service_account_email: ${{ secrets.COLLECT_TEST_DATA_SERVICE_ACCOUNT_EMAIL }} + matrix_instance_number: ${{ steps.setup.outputs.matrix-instance-number }} + backend-test: # Use always() so this job runs even when select-tests is skipped (master) if: >- always() && !cancelled() && needs.files-changed.outputs.backend == 'true' && needs.calculate-shards.outputs.shard-count != '0' - needs: [files-changed, select-tests, calculate-shards] + needs: [files-changed, select-tests, calculate-shards, split-tiers] name: backend test runs-on: ubuntu-24.04 timeout-minutes: 60 @@ -258,9 +387,16 @@ jobs: name: selected-tests-${{ github.run_id }} path: .artifacts/ + - name: Download backend test list + if: needs.split-tiers.outputs.has-tiers == 'true' + uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4 + with: + name: tier-file-lists + path: /tmp/ + - name: Run backend test (${{ steps.setup.outputs.matrix-instance-number }} of ${{ steps.setup.outputs.matrix-instance-total }}) env: - SELECTED_TESTS_FILE: ${{ needs.select-tests.outputs.has-selected-tests == 'true' && '.artifacts/selected-tests.txt' || '' }} + SELECTED_TESTS_FILE: ${{ needs.select-tests.outputs.has-selected-tests == 'true' && '.artifacts/selected-tests.txt' || (needs.split-tiers.outputs.has-tiers == 'true' && '/tmp/backend-tests.txt' || '') }} run: | if [ -n "${XDIST_WORKERS}" ]; then export PYTEST_ADDOPTS="$PYTEST_ADDOPTS -n ${XDIST_WORKERS} --dist=loadfile" @@ -620,6 +756,8 @@ jobs: [ api-docs, backend-test, + backend-light, + split-tiers, backend-migration-tests, calculate-shards, cli, diff --git a/.github/workflows/classify-services.yml b/.github/workflows/classify-services.yml new file mode 100644 index 00000000000000..b049962716b054 --- /dev/null +++ b/.github/workflows/classify-services.yml @@ -0,0 +1,90 @@ +name: classify test services + +on: + workflow_dispatch: + +jobs: + classify: + name: classify (${{ matrix.instance }}) + runs-on: ubuntu-24.04 + timeout-minutes: 60 + permissions: + contents: read + strategy: + fail-fast: false + matrix: + instance: + [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21] + + env: + MATRIX_INSTANCE_TOTAL: 22 + TEST_GROUP_STRATEGY: roundrobin + + steps: + - uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 + + - name: Setup sentry env + uses: ./.github/actions/setup-sentry + with: + mode: backend-ci + + - name: Run tests with classification + env: + PYTEST_ADDOPTS: '${{ env.PYTEST_ADDOPTS }} --classify-services --classification-output=test-service-classification.json' + run: | + python3 -b -m pytest tests \ + --ignore tests/acceptance \ + --ignore tests/apidocs \ + --ignore tests/js \ + --ignore tests/tools \ + || true + + - name: Upload classification report + if: always() + uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4 + with: + name: classification-shard-${{ matrix.instance }} + path: test-service-classification.json + retention-days: 90 + + merge-reports: + name: merge classification reports + needs: classify + if: always() + runs-on: ubuntu-24.04 + timeout-minutes: 5 + steps: + - uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4 + with: + pattern: classification-shard-* + path: shards/ + + - name: Merge shard reports + run: | + python3 - <<'SCRIPT' + import json + from collections import defaultdict + from pathlib import Path + + merged = defaultdict(set) + shard_dirs = sorted(Path("shards").iterdir()) + + for shard_dir in shard_dirs: + report = shard_dir / "test-service-classification.json" + if report.exists(): + for tid, svcs in json.loads(report.read_text()).get("tests", {}).items(): + merged[tid].update(svcs) + + Path("test-service-classification.json").write_text(json.dumps( + {"total_tests": len(merged), "tests": {k: sorted(v) for k, v in sorted(merged.items())}}, + indent=2, + ) + "\n") + print(f"Merged {len(merged)} tests from {len(shard_dirs)} shards") + SCRIPT + + - name: Upload merged report + uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4 + with: + name: test-service-classification + path: test-service-classification.json + retention-days: 90 diff --git a/.github/workflows/scripts/split-tests-by-tier.py b/.github/workflows/scripts/split-tests-by-tier.py new file mode 100644 index 00000000000000..38d818de2b6cc6 --- /dev/null +++ b/.github/workflows/scripts/split-tests-by-tier.py @@ -0,0 +1,75 @@ +#!/usr/bin/env python3 +from __future__ import annotations + +import argparse +import json +import sys +from collections import defaultdict +from pathlib import Path + +TIER2_SERVICES = {"snuba", "kafka", "symbolicator", "objectstore", "bigtable"} + + +def _scope_key(test_id: str, granularity: str) -> str: + if granularity == "file": + return test_id.split("::")[0] + elif granularity == "class": + return "::".join(test_id.split("::")[:2]) + return test_id + + +def split(classification: dict, granularity: str = "file") -> dict[str, set[str]]: + scope_services: dict[str, set[str]] = defaultdict(set) + for test_id, services in classification.get("tests", {}).items(): + scope = _scope_key(test_id, granularity) + if isinstance(services, list): + scope_services[scope].update(services) + else: + scope_services[scope].add(services) + + tier1: set[str] = set() + tier2: set[str] = set() + for scope, services in scope_services.items(): + (tier2 if services & TIER2_SERVICES else tier1).add(scope) + + return {"tier1": tier1, "tier2": tier2} + + +def main() -> int: + parser = argparse.ArgumentParser() + parser.add_argument("--classification", required=True) + parser.add_argument("--tier", choices=["tier1", "tier2"], required=True) + parser.add_argument("--output", required=True) + parser.add_argument("--granularity", choices=["file", "class"], default="file") + args = parser.parse_args() + + with open(args.classification) as f: + classification = json.load(f) + + tests = classification.get("tests", {}) + if not tests: + print("Error: classification JSON has no 'tests' key or is empty", file=sys.stderr) + return 1 + + tiers = split(classification, granularity=args.granularity) + if not tiers["tier1"] and not tiers["tier2"]: + print("Error: classification produced 0 scopes in both tiers", file=sys.stderr) + return 1 + + if not tiers["tier1"] or not tiers["tier2"]: + print( + f"Warning: tier1={len(tiers['tier1'])} tier2={len(tiers['tier2'])} — one tier is empty", + file=sys.stderr, + ) + + scopes = sorted(tiers[args.tier]) + Path(args.output).write_text("\n".join(scopes) + "\n") + print( + f"tier1={len(tiers['tier1'])} tier2={len(tiers['tier2'])} → wrote {len(scopes)} to {args.output}", + file=sys.stderr, + ) + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/src/sentry/testutils/pytest/__init__.py b/src/sentry/testutils/pytest/__init__.py index 231eee6a140712..9fa6342b5259d1 100644 --- a/src/sentry/testutils/pytest/__init__.py +++ b/src/sentry/testutils/pytest/__init__.py @@ -12,6 +12,7 @@ "sentry.testutils.pytest.json_report_reruns", "sentry.testutils.pytest.show_flaky_failures", "sentry.testutils.thread_leaks.pytest", + "sentry.testutils.pytest.service_classifier", ] if os.environ.get("SENTRY_SKIP_SELENIUM_PLUGIN") != "1": diff --git a/src/sentry/testutils/pytest/service_classifier.py b/src/sentry/testutils/pytest/service_classifier.py new file mode 100644 index 00000000000000..5f34aff1b285d8 --- /dev/null +++ b/src/sentry/testutils/pytest/service_classifier.py @@ -0,0 +1,139 @@ +from __future__ import annotations + +import socket +import time +from collections import defaultdict +from pathlib import Path +from typing import Any + +import pytest + +from sentry.utils import json + +SERVICE_PORTS: dict[int, str] = { + 1218: "snuba", + 3021: "symbolicator", + 8086: "bigtable", + 8888: "objectstore", +} + +FIXTURE_SERVICE_MAP: dict[str, str] = { + "_requires_snuba": "snuba", + "_requires_kafka": "kafka", + "_requires_symbolicator": "symbolicator", + "_requires_objectstore": "objectstore", +} + +_original_send: Any = None +_original_sendall: Any = None +_current_test: str | None = None +_test_services: dict[str, set[str]] = defaultdict(set) +_enabled: bool = False + + +def _classify_socket(sock: socket.socket) -> None: + if not _current_test: + return + try: + service = SERVICE_PORTS.get(sock.getpeername()[1]) + if service: + _test_services[_current_test].add(service) + except (OSError, AttributeError, IndexError): + pass + + +def _patched_send(self: socket.socket, *args: Any, **kwargs: Any) -> Any: + _classify_socket(self) + return _original_send(self, *args, **kwargs) # type: ignore[misc] + + +def _patched_sendall(self: socket.socket, *args: Any, **kwargs: Any) -> Any: + _classify_socket(self) + return _original_sendall(self, *args, **kwargs) # type: ignore[misc] + + +def _install_socket_patches() -> None: + global _original_send, _original_sendall + _original_send = socket.socket.send + _original_sendall = socket.socket.sendall + socket.socket.send = _patched_send # type: ignore[assignment] + socket.socket.sendall = _patched_sendall # type: ignore[assignment] + + +def _uninstall_socket_patches() -> None: + if _original_send is not None: + socket.socket.send = _original_send # type: ignore[assignment] + if _original_sendall is not None: + socket.socket.sendall = _original_sendall # type: ignore[assignment] + + +def _detect_static_services(item: pytest.Item) -> set[str]: + services: set[str] = set() + + if getattr(item, "cls", None) is not None: + services.add("postgres") + elif hasattr(item, "fixturenames"): + if {"db", "transactional_db", "django_db_reset_sequences"} & set(item.fixturenames): + services.add("postgres") + + if hasattr(item, "fixturenames"): + for fixture, service in FIXTURE_SERVICE_MAP.items(): + if fixture in item.fixturenames: + services.add(service) + + for marker in item.iter_markers("usefixtures"): + for name in marker.args: + if name in FIXTURE_SERVICE_MAP: + services.add(FIXTURE_SERVICE_MAP[name]) + + return services + + +def pytest_addoption(parser: pytest.Parser) -> None: + group = parser.getgroup("service-classifier") + group.addoption("--classify-services", action="store_true", default=False) + group.addoption("--classification-output", default="test-service-classification.json") + + +def pytest_configure(config: pytest.Config) -> None: + global _enabled + _enabled = config.getoption("--classify-services", default=False) + if _enabled: + _install_socket_patches() + + +def pytest_unconfigure(config: pytest.Config) -> None: + if _enabled: + _uninstall_socket_patches() + + +def pytest_collection_modifyitems(config: pytest.Config, items: list[pytest.Item]) -> None: + if _enabled: + for item in items: + _test_services[item.nodeid].update(_detect_static_services(item)) + + +@pytest.hookimpl(tryfirst=True) +def pytest_runtest_setup(item: pytest.Item) -> None: + global _current_test + if _enabled: + _current_test = item.nodeid + + +@pytest.hookimpl(trylast=True) +def pytest_runtest_teardown(item: pytest.Item, nextitem: pytest.Item | None) -> None: + global _current_test + if _enabled: + _current_test = None + + +def pytest_sessionfinish(session: pytest.Session, exitstatus: int) -> None: + if _enabled: + report = { + "generated_at": time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()), + "total_tests": len(_test_services), + "tests": {nid: sorted(svcs) for nid, svcs in sorted(_test_services.items())}, + } + Path(session.config.getoption("--classification-output")).write_text( + json.dumps(report) + "\n" + ) From f9df32894357dc44bc53a784c6ef0bc21f2a65fa Mon Sep 17 00:00:00 2001 From: mchen-sentry Date: Thu, 9 Apr 2026 13:51:40 -0700 Subject: [PATCH 2/8] fix(ci): correct mypy type ignore codes in service_classifier --- src/sentry/testutils/pytest/service_classifier.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/sentry/testutils/pytest/service_classifier.py b/src/sentry/testutils/pytest/service_classifier.py index 5f34aff1b285d8..7c8485cd5e3473 100644 --- a/src/sentry/testutils/pytest/service_classifier.py +++ b/src/sentry/testutils/pytest/service_classifier.py @@ -44,27 +44,27 @@ def _classify_socket(sock: socket.socket) -> None: def _patched_send(self: socket.socket, *args: Any, **kwargs: Any) -> Any: _classify_socket(self) - return _original_send(self, *args, **kwargs) # type: ignore[misc] + return _original_send(self, *args, **kwargs) def _patched_sendall(self: socket.socket, *args: Any, **kwargs: Any) -> Any: _classify_socket(self) - return _original_sendall(self, *args, **kwargs) # type: ignore[misc] + return _original_sendall(self, *args, **kwargs) def _install_socket_patches() -> None: global _original_send, _original_sendall _original_send = socket.socket.send _original_sendall = socket.socket.sendall - socket.socket.send = _patched_send # type: ignore[assignment] - socket.socket.sendall = _patched_sendall # type: ignore[assignment] + socket.socket.send = _patched_send # type: ignore[method-assign] + socket.socket.sendall = _patched_sendall # type: ignore[method-assign] def _uninstall_socket_patches() -> None: if _original_send is not None: - socket.socket.send = _original_send # type: ignore[assignment] + socket.socket.send = _original_send # type: ignore[method-assign] if _original_sendall is not None: - socket.socket.sendall = _original_sendall # type: ignore[assignment] + socket.socket.sendall = _original_sendall # type: ignore[method-assign] def _detect_static_services(item: pytest.Item) -> set[str]: From 4731854e363aedf699b8638d3a7afaf01de06c0f Mon Sep 17 00:00:00 2001 From: mchen-sentry Date: Thu, 9 Apr 2026 14:12:36 -0700 Subject: [PATCH 3/8] fix(ci): broaden mypy ignores for socket monkey-patching --- src/sentry/testutils/pytest/service_classifier.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/sentry/testutils/pytest/service_classifier.py b/src/sentry/testutils/pytest/service_classifier.py index 7c8485cd5e3473..18c705e8935af1 100644 --- a/src/sentry/testutils/pytest/service_classifier.py +++ b/src/sentry/testutils/pytest/service_classifier.py @@ -56,8 +56,8 @@ def _install_socket_patches() -> None: global _original_send, _original_sendall _original_send = socket.socket.send _original_sendall = socket.socket.sendall - socket.socket.send = _patched_send # type: ignore[method-assign] - socket.socket.sendall = _patched_sendall # type: ignore[method-assign] + socket.socket.send = _patched_send # type: ignore[assignment,method-assign] + socket.socket.sendall = _patched_sendall # type: ignore[assignment,method-assign] def _uninstall_socket_patches() -> None: From 86349c4cd797f683a950518043755bff2c527d98 Mon Sep 17 00:00:00 2001 From: mchen-sentry Date: Thu, 9 Apr 2026 13:59:21 -0700 Subject: [PATCH 4/8] fix(ci): add redis-cluster/kafka service containers to backend-light --- .github/workflows/backend.yml | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/.github/workflows/backend.yml b/.github/workflows/backend.yml index aaa320aa644f8a..31a2a2404a9faf 100644 --- a/.github/workflows/backend.yml +++ b/.github/workflows/backend.yml @@ -266,6 +266,24 @@ jobs: permissions: contents: read id-token: write + services: + redis-cluster: + image: ghcr.io/getsentry/docker-redis-cluster:7.0.10 + ports: ['7000:7000', '7001:7001', '7002:7002', '7003:7003', '7004:7004', '7005:7005'] + env: + IP: 0.0.0.0 + zookeeper: + image: ghcr.io/getsentry/image-mirror-confluentinc-cp-zookeeper:6.2.0 + env: + ZOOKEEPER_CLIENT_PORT: 2181 + kafka: + image: ghcr.io/getsentry/image-mirror-confluentinc-cp-kafka:6.2.0 + env: + KAFKA_ZOOKEEPER_CONNECT: zookeeper:2181 + KAFKA_ADVERTISED_LISTENERS: PLAINTEXT://127.0.0.1:9092 + KAFKA_OFFSETS_TOPIC_REPLICATION_FACTOR: 1 + KAFKA_OFFSETS_TOPIC_NUM_PARTITIONS: 1 + ports: ['9092:9092'] strategy: fail-fast: false matrix: From b8623330d9e68f659dcb290f4fcae8a464726033 Mon Sep 17 00:00:00 2001 From: mchen-sentry Date: Fri, 10 Apr 2026 12:56:52 -0700 Subject: [PATCH 5/8] fix(ci): reduce backend-test to 17 shards when tiers active (5+17=22) --- .github/workflows/backend.yml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/backend.yml b/.github/workflows/backend.yml index 31a2a2404a9faf..ebe2dd017635ed 100644 --- a/.github/workflows/backend.yml +++ b/.github/workflows/backend.yml @@ -367,12 +367,12 @@ jobs: # and reducing the risk that one of many runs would turn red again (read: intermittent tests) fail-fast: false matrix: - # Dynamic matrix from calculate-shards - instance: ${{ fromJSON(needs.calculate-shards.outputs.shard-indices) }} + # When tiers are active, backend-test only runs tier2 (17 shards); + # the remaining 5 shards run as backend-light. Total stays at 22. + instance: ${{ needs.split-tiers.outputs.has-tiers == 'true' && fromJSON('[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16]') || fromJSON(needs.calculate-shards.outputs.shard-indices) }} env: - # Dynamic total from calculate-shards - MATRIX_INSTANCE_TOTAL: ${{ needs.calculate-shards.outputs.shard-count }} + MATRIX_INSTANCE_TOTAL: ${{ needs.split-tiers.outputs.has-tiers == 'true' && '17' || needs.calculate-shards.outputs.shard-count }} TEST_GROUP_STRATEGY: roundrobin PYTHONHASHSEED: '0' XDIST_PER_WORKER_SNUBA: '1' From f7c4dae4e0106d618397f7eaae360096dac4c187 Mon Sep 17 00:00:00 2001 From: mchen-sentry Date: Mon, 27 Apr 2026 10:45:16 -0700 Subject: [PATCH 6/8] fix(ci): filter classify runs by conclusion via jq, not --status flag --- .github/workflows/backend.yml | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/.github/workflows/backend.yml b/.github/workflows/backend.yml index ebe2dd017635ed..c5cd35db5ca6f9 100644 --- a/.github/workflows/backend.yml +++ b/.github/workflows/backend.yml @@ -223,13 +223,18 @@ jobs: env: GH_TOKEN: ${{ github.token }} run: | - RUN_ID=$(gh run list --workflow=classify-services.yml --status=success --limit=1 --json databaseId --jq '.[0].databaseId') + # Filter by conclusion=success via jq — gh CLI's --status flag is unreliable for this. + RUN_ID=$(gh run list --workflow=classify-services.yml --limit=10 --json databaseId,conclusion --jq '[.[] | select(.conclusion == "success")][0].databaseId') if [ -z "$RUN_ID" ]; then echo "No classify-services run found, skipping tiers" echo "has-tiers=false" >> "$GITHUB_OUTPUT" exit 0 fi - gh run download "$RUN_ID" --name test-service-classification --dir /tmp/classification + if ! gh run download "$RUN_ID" --name test-service-classification --dir /tmp/classification; then + echo "Classification artifact unavailable (may be expired), skipping tiers" + echo "has-tiers=false" >> "$GITHUB_OUTPUT" + exit 0 + fi echo "has-tiers=true" >> "$GITHUB_OUTPUT" - name: Split tests by tier From f798c1e44e74d456e389fc099c870243f5b5e101 Mon Sep 17 00:00:00 2001 From: mchen-sentry Date: Mon, 27 Apr 2026 14:42:05 -0700 Subject: [PATCH 7/8] perf(ci): tier 2 uses --dist=load, tier 1 keeps --dist=loadfile Hybrid distribution mode based on experiment data: --dist=load cuts tier 2 shard-time variance by 54% (179s -> 82s spread) by load-balancing individual tests across workers, but hurts tier 1 (where small fast tests benefit from fixture reuse via loadfile). Apply load only when tiers are active. Backend-test without tiers (selective PRs, master without classification) keeps --dist=loadfile for backwards compatibility. --- .github/workflows/backend.yml | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/.github/workflows/backend.yml b/.github/workflows/backend.yml index c5cd35db5ca6f9..b9434d1af7cbc3 100644 --- a/.github/workflows/backend.yml +++ b/.github/workflows/backend.yml @@ -420,9 +420,13 @@ jobs: - name: Run backend test (${{ steps.setup.outputs.matrix-instance-number }} of ${{ steps.setup.outputs.matrix-instance-total }}) env: SELECTED_TESTS_FILE: ${{ needs.select-tests.outputs.has-selected-tests == 'true' && '.artifacts/selected-tests.txt' || (needs.split-tiers.outputs.has-tiers == 'true' && '/tmp/backend-tests.txt' || '') }} + # tier 2 (snuba-heavy) gets --dist=load to balance individual-test latency + # across workers — variance is much higher than the no-tiers case where + # --dist=loadfile averages out via larger file count per shard. + XDIST_DIST: ${{ needs.split-tiers.outputs.has-tiers == 'true' && 'load' || 'loadfile' }} run: | if [ -n "${XDIST_WORKERS}" ]; then - export PYTEST_ADDOPTS="$PYTEST_ADDOPTS -n ${XDIST_WORKERS} --dist=loadfile" + export PYTEST_ADDOPTS="$PYTEST_ADDOPTS -n ${XDIST_WORKERS} --dist=${XDIST_DIST}" timeout 1200 make test-python-ci || { rc=$? if [ "$rc" -eq 124 ]; then From a8916c4da291b10c3f3a6b40a509f8d9e8951376 Mon Sep 17 00:00:00 2001 From: mchen-sentry Date: Tue, 12 May 2026 14:28:25 -0700 Subject: [PATCH 8/8] feat(ci): two-tier test split with service classification --- .github/workflows/backend.yml | 17 +++------- .../workflows/scripts/split-tests-by-tier.py | 34 ++++++------------- 2 files changed, 14 insertions(+), 37 deletions(-) diff --git a/.github/workflows/backend.yml b/.github/workflows/backend.yml index b9434d1af7cbc3..f3d91ac2e78330 100644 --- a/.github/workflows/backend.yml +++ b/.github/workflows/backend.yml @@ -274,7 +274,8 @@ jobs: services: redis-cluster: image: ghcr.io/getsentry/docker-redis-cluster:7.0.10 - ports: ['7000:7000', '7001:7001', '7002:7002', '7003:7003', '7004:7004', '7005:7005'] + ports: + ['7000:7000', '7001:7001', '7002:7002', '7003:7003', '7004:7004', '7005:7005'] env: IP: 0.0.0.0 zookeeper: @@ -312,13 +313,6 @@ jobs: with: mode: migrations - - name: Download odiff binary - run: | - curl -sL https://registry.npmjs.org/odiff-bin/-/odiff-bin-4.3.2.tgz \ - | tar -xz --strip-components=2 package/raw_binaries/odiff-linux-x64 - sudo install -m 755 odiff-linux-x64 /usr/local/bin/odiff - rm odiff-linux-x64 - - name: Run backend-light tests env: SELECTED_TESTS_FILE: /tmp/backend-light-tests.txt @@ -372,8 +366,7 @@ jobs: # and reducing the risk that one of many runs would turn red again (read: intermittent tests) fail-fast: false matrix: - # When tiers are active, backend-test only runs tier2 (17 shards); - # the remaining 5 shards run as backend-light. Total stays at 22. + # With tiers: 17 shards for tier2; 5 more run as backend-light (22 total). instance: ${{ needs.split-tiers.outputs.has-tiers == 'true' && fromJSON('[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16]') || fromJSON(needs.calculate-shards.outputs.shard-indices) }} env: @@ -420,9 +413,7 @@ jobs: - name: Run backend test (${{ steps.setup.outputs.matrix-instance-number }} of ${{ steps.setup.outputs.matrix-instance-total }}) env: SELECTED_TESTS_FILE: ${{ needs.select-tests.outputs.has-selected-tests == 'true' && '.artifacts/selected-tests.txt' || (needs.split-tiers.outputs.has-tiers == 'true' && '/tmp/backend-tests.txt' || '') }} - # tier 2 (snuba-heavy) gets --dist=load to balance individual-test latency - # across workers — variance is much higher than the no-tiers case where - # --dist=loadfile averages out via larger file count per shard. + # tier2 uses --dist=load; snuba-heavy tests have high per-test variance that load-balances better than loadfile. XDIST_DIST: ${{ needs.split-tiers.outputs.has-tiers == 'true' && 'load' || 'loadfile' }} run: | if [ -n "${XDIST_WORKERS}" ]; then diff --git a/.github/workflows/scripts/split-tests-by-tier.py b/.github/workflows/scripts/split-tests-by-tier.py index 38d818de2b6cc6..feb7e18e5c44aa 100644 --- a/.github/workflows/scripts/split-tests-by-tier.py +++ b/.github/workflows/scripts/split-tests-by-tier.py @@ -10,27 +10,15 @@ TIER2_SERVICES = {"snuba", "kafka", "symbolicator", "objectstore", "bigtable"} -def _scope_key(test_id: str, granularity: str) -> str: - if granularity == "file": - return test_id.split("::")[0] - elif granularity == "class": - return "::".join(test_id.split("::")[:2]) - return test_id - - -def split(classification: dict, granularity: str = "file") -> dict[str, set[str]]: - scope_services: dict[str, set[str]] = defaultdict(set) +def split(classification: dict) -> dict[str, set[str]]: + file_services: dict[str, set[str]] = defaultdict(set) for test_id, services in classification.get("tests", {}).items(): - scope = _scope_key(test_id, granularity) - if isinstance(services, list): - scope_services[scope].update(services) - else: - scope_services[scope].add(services) + file_services[test_id.split("::")[0]].update(services) tier1: set[str] = set() tier2: set[str] = set() - for scope, services in scope_services.items(): - (tier2 if services & TIER2_SERVICES else tier1).add(scope) + for path, services in file_services.items(): + (tier2 if services & TIER2_SERVICES else tier1).add(path) return {"tier1": tier1, "tier2": tier2} @@ -40,20 +28,18 @@ def main() -> int: parser.add_argument("--classification", required=True) parser.add_argument("--tier", choices=["tier1", "tier2"], required=True) parser.add_argument("--output", required=True) - parser.add_argument("--granularity", choices=["file", "class"], default="file") args = parser.parse_args() with open(args.classification) as f: classification = json.load(f) - tests = classification.get("tests", {}) - if not tests: - print("Error: classification JSON has no 'tests' key or is empty", file=sys.stderr) + if not classification.get("tests"): + print("Error: classification JSON has no tests", file=sys.stderr) return 1 - tiers = split(classification, granularity=args.granularity) + tiers = split(classification) if not tiers["tier1"] and not tiers["tier2"]: - print("Error: classification produced 0 scopes in both tiers", file=sys.stderr) + print("Error: classification produced 0 files in both tiers", file=sys.stderr) return 1 if not tiers["tier1"] or not tiers["tier2"]: @@ -65,7 +51,7 @@ def main() -> int: scopes = sorted(tiers[args.tier]) Path(args.output).write_text("\n".join(scopes) + "\n") print( - f"tier1={len(tiers['tier1'])} tier2={len(tiers['tier2'])} → wrote {len(scopes)} to {args.output}", + f"tier1={len(tiers['tier1'])} tier2={len(tiers['tier2'])} -> wrote {len(scopes)} to {args.output}", file=sys.stderr, ) return 0