From aa6d8cea05d46821a536772714e32ba44640ee66 Mon Sep 17 00:00:00 2001
From: mchen-sentry <ming.chen@sentry.io>
Date: Thu, 9 Apr 2026 13:19:36 -0700
Subject: [PATCH 1/8] feat(ci): two-tier test split with service classification

- service_classifier.py: hybrid static + runtime classification plugin
  that maps each test to its service dependencies (Snuba, Kafka, etc.)
- classify-services.yml: workflow to generate classification across 22 shards
- split-tests-by-tier.py: splits classification into tier1 (postgres-only)
  and tier2 (full Snuba stack) test lists
- backend.yml: add split-tiers + backend-light jobs, wire backend-test
  to use tier2 list when classification is available
- Selective testing (PRs) and tiers (master) are mutually exclusive
---
 .github/workflows/backend.yml                 | 142 +++++++++++++++++-
 .github/workflows/classify-services.yml       |  90 +++++++++++
 .../workflows/scripts/split-tests-by-tier.py  |  75 +++++++++
 src/sentry/testutils/pytest/__init__.py       |   1 +
 .../testutils/pytest/service_classifier.py    | 139 +++++++++++++++++
 5 files changed, 445 insertions(+), 2 deletions(-)
 create mode 100644 .github/workflows/classify-services.yml
 create mode 100644 .github/workflows/scripts/split-tests-by-tier.py
 create mode 100644 src/sentry/testutils/pytest/service_classifier.py

diff --git a/.github/workflows/backend.yml b/.github/workflows/backend.yml
index e3a2b8c1aff9d3..aaa320aa644f8a 100644
--- a/.github/workflows/backend.yml
+++ b/.github/workflows/backend.yml
@@ -200,13 +200,142 @@ jobs:
         run: |
           python3 .github/workflows/scripts/calculate-backend-test-shards.py
 
+  split-tiers:
+    if: >-
+      always() &&
+      !cancelled() &&
+      needs.files-changed.outputs.backend == 'true' &&
+      needs.select-tests.outputs.has-selected-tests != 'true'
+    needs: [files-changed, select-tests]
+    name: split tests into tiers
+    runs-on: ubuntu-24.04
+    timeout-minutes: 5
+    permissions:
+      contents: read
+      actions: read
+    outputs:
+      has-tiers: ${{ steps.split.outputs.has-tiers }}
+    steps:
+      - uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7
+
+      - name: Download classification report
+        id: download
+        env:
+          GH_TOKEN: ${{ github.token }}
+        run: |
+          RUN_ID=$(gh run list --workflow=classify-services.yml --status=success --limit=1 --json databaseId --jq '.[0].databaseId')
+          if [ -z "$RUN_ID" ]; then
+            echo "No classify-services run found, skipping tiers"
+            echo "has-tiers=false" >> "$GITHUB_OUTPUT"
+            exit 0
+          fi
+          gh run download "$RUN_ID" --name test-service-classification --dir /tmp/classification
+          echo "has-tiers=true" >> "$GITHUB_OUTPUT"
+
+      - name: Split tests by tier
+        id: split
+        if: steps.download.outputs.has-tiers == 'true'
+        run: |
+          python3 .github/workflows/scripts/split-tests-by-tier.py \
+            --classification /tmp/classification/test-service-classification.json \
+            --tier tier1 --output /tmp/backend-light-tests.txt
+          python3 .github/workflows/scripts/split-tests-by-tier.py \
+            --classification /tmp/classification/test-service-classification.json \
+            --tier tier2 --output /tmp/backend-tests.txt
+          echo "has-tiers=true" >> "$GITHUB_OUTPUT"
+
+      - uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4
+        if: steps.split.outputs.has-tiers == 'true'
+        with:
+          name: tier-file-lists
+          path: |
+            /tmp/backend-light-tests.txt
+            /tmp/backend-tests.txt
+          retention-days: 1
+
+  backend-light:
+    if: >-
+      always() &&
+      !cancelled() &&
+      needs.files-changed.outputs.backend == 'true' &&
+      needs.split-tiers.outputs.has-tiers == 'true'
+    needs: [files-changed, split-tiers]
+    name: 'backend-light (${{ matrix.instance }})'
+    runs-on: ubuntu-24.04
+    timeout-minutes: 30
+    permissions:
+      contents: read
+      id-token: write
+    strategy:
+      fail-fast: false
+      matrix:
+        instance: [0, 1, 2, 3, 4]
+    env:
+      MATRIX_INSTANCE_TOTAL: 5
+      TEST_GROUP_STRATEGY: roundrobin
+      PYTHONHASHSEED: '0'
+      SENTRY_SKIP_SELENIUM_PLUGIN: '1'
+    steps:
+      - uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7
+
+      - uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4
+        with:
+          name: tier-file-lists
+          path: /tmp/
+
+      - name: Setup sentry env
+        uses: ./.github/actions/setup-sentry
+        id: setup
+        with:
+          mode: migrations
+
+      - name: Download odiff binary
+        run: |
+          curl -sL https://registry.npmjs.org/odiff-bin/-/odiff-bin-4.3.2.tgz \
+            | tar -xz --strip-components=2 package/raw_binaries/odiff-linux-x64
+          sudo install -m 755 odiff-linux-x64 /usr/local/bin/odiff
+          rm odiff-linux-x64
+
+      - name: Run backend-light tests
+        env:
+          SELECTED_TESTS_FILE: /tmp/backend-light-tests.txt
+        run: |
+          python3 -b -m pytest tests \
+            --reuse-db \
+            -n 4 \
+            --dist=loadfile \
+            --ignore tests/acceptance \
+            --ignore tests/apidocs \
+            --ignore tests/js \
+            --ignore tests/tools \
+            --json-report \
+            --json-report-file=".artifacts/pytest.json" \
+            --json-report-omit=log \
+            --junit-xml=.artifacts/pytest.junit.xml \
+            -o junit_suite_name=pytest-backend-light
+
+      - name: Inspect failure
+        if: failure()
+        run: devservices logs 2>/dev/null || true
+
+      - name: Collect test data
+        uses: ./.github/actions/collect-test-data
+        if: ${{ !cancelled() }}
+        with:
+          artifact_path: .artifacts/pytest.json
+          gcs_bucket: ${{ secrets.COLLECT_TEST_DATA_GCS_BUCKET }}
+          gcp_project_id: ${{ secrets.COLLECT_TEST_DATA_GCP_PROJECT_ID }}
+          workload_identity_provider: ${{ secrets.SENTRY_GCP_DEV_WORKLOAD_IDENTITY_POOL }}
+          service_account_email: ${{ secrets.COLLECT_TEST_DATA_SERVICE_ACCOUNT_EMAIL }}
+          matrix_instance_number: ${{ steps.setup.outputs.matrix-instance-number }}
+
   backend-test:
     # Use always() so this job runs even when select-tests is skipped (master)
     if: >-
       always() &&
       !cancelled() &&
       needs.files-changed.outputs.backend == 'true' && needs.calculate-shards.outputs.shard-count != '0'
-    needs: [files-changed, select-tests, calculate-shards]
+    needs: [files-changed, select-tests, calculate-shards, split-tiers]
     name: backend test
     runs-on: ubuntu-24.04
     timeout-minutes: 60
@@ -258,9 +387,16 @@ jobs:
           name: selected-tests-${{ github.run_id }}
           path: .artifacts/
 
+      - name: Download backend test list
+        if: needs.split-tiers.outputs.has-tiers == 'true'
+        uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4
+        with:
+          name: tier-file-lists
+          path: /tmp/
+
       - name: Run backend test (${{ steps.setup.outputs.matrix-instance-number }} of ${{ steps.setup.outputs.matrix-instance-total }})
         env:
-          SELECTED_TESTS_FILE: ${{ needs.select-tests.outputs.has-selected-tests == 'true' && '.artifacts/selected-tests.txt' || '' }}
+          SELECTED_TESTS_FILE: ${{ needs.select-tests.outputs.has-selected-tests == 'true' && '.artifacts/selected-tests.txt' || (needs.split-tiers.outputs.has-tiers == 'true' && '/tmp/backend-tests.txt' || '') }}
         run: |
           if [ -n "${XDIST_WORKERS}" ]; then
             export PYTEST_ADDOPTS="$PYTEST_ADDOPTS -n ${XDIST_WORKERS} --dist=loadfile"
@@ -620,6 +756,8 @@ jobs:
       [
         api-docs,
         backend-test,
+        backend-light,
+        split-tiers,
         backend-migration-tests,
         calculate-shards,
         cli,
diff --git a/.github/workflows/classify-services.yml b/.github/workflows/classify-services.yml
new file mode 100644
index 00000000000000..b049962716b054
--- /dev/null
+++ b/.github/workflows/classify-services.yml
@@ -0,0 +1,90 @@
+name: classify test services
+
+on:
+  workflow_dispatch:
+
+jobs:
+  classify:
+    name: classify (${{ matrix.instance }})
+    runs-on: ubuntu-24.04
+    timeout-minutes: 60
+    permissions:
+      contents: read
+    strategy:
+      fail-fast: false
+      matrix:
+        instance:
+          [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21]
+
+    env:
+      MATRIX_INSTANCE_TOTAL: 22
+      TEST_GROUP_STRATEGY: roundrobin
+
+    steps:
+      - uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7
+
+      - name: Setup sentry env
+        uses: ./.github/actions/setup-sentry
+        with:
+          mode: backend-ci
+
+      - name: Run tests with classification
+        env:
+          PYTEST_ADDOPTS: '${{ env.PYTEST_ADDOPTS }} --classify-services --classification-output=test-service-classification.json'
+        run: |
+          python3 -b -m pytest tests \
+            --ignore tests/acceptance \
+            --ignore tests/apidocs \
+            --ignore tests/js \
+            --ignore tests/tools \
+            || true
+
+      - name: Upload classification report
+        if: always()
+        uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4
+        with:
+          name: classification-shard-${{ matrix.instance }}
+          path: test-service-classification.json
+          retention-days: 90
+
+  merge-reports:
+    name: merge classification reports
+    needs: classify
+    if: always()
+    runs-on: ubuntu-24.04
+    timeout-minutes: 5
+    steps:
+      - uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4
+        with:
+          pattern: classification-shard-*
+          path: shards/
+
+      - name: Merge shard reports
+        run: |
+          python3 - <<'SCRIPT'
+          import json
+          from collections import defaultdict
+          from pathlib import Path
+
+          merged = defaultdict(set)
+          shard_dirs = sorted(Path("shards").iterdir())
+
+          for shard_dir in shard_dirs:
+              report = shard_dir / "test-service-classification.json"
+              if report.exists():
+                  for tid, svcs in json.loads(report.read_text()).get("tests", {}).items():
+                      merged[tid].update(svcs)
+
+          Path("test-service-classification.json").write_text(json.dumps(
+              {"total_tests": len(merged), "tests": {k: sorted(v) for k, v in sorted(merged.items())}},
+              indent=2,
+          ) + "\n")
+          print(f"Merged {len(merged)} tests from {len(shard_dirs)} shards")
+          SCRIPT
+
+      - name: Upload merged report
+        uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4
+        with:
+          name: test-service-classification
+          path: test-service-classification.json
+          retention-days: 90
diff --git a/.github/workflows/scripts/split-tests-by-tier.py b/.github/workflows/scripts/split-tests-by-tier.py
new file mode 100644
index 00000000000000..38d818de2b6cc6
--- /dev/null
+++ b/.github/workflows/scripts/split-tests-by-tier.py
@@ -0,0 +1,75 @@
+#!/usr/bin/env python3
+from __future__ import annotations
+
+import argparse
+import json
+import sys
+from collections import defaultdict
+from pathlib import Path
+
+TIER2_SERVICES = {"snuba", "kafka", "symbolicator", "objectstore", "bigtable"}
+
+
+def _scope_key(test_id: str, granularity: str) -> str:
+    if granularity == "file":
+        return test_id.split("::")[0]
+    elif granularity == "class":
+        return "::".join(test_id.split("::")[:2])
+    return test_id
+
+
+def split(classification: dict, granularity: str = "file") -> dict[str, set[str]]:
+    scope_services: dict[str, set[str]] = defaultdict(set)
+    for test_id, services in classification.get("tests", {}).items():
+        scope = _scope_key(test_id, granularity)
+        if isinstance(services, list):
+            scope_services[scope].update(services)
+        else:
+            scope_services[scope].add(services)
+
+    tier1: set[str] = set()
+    tier2: set[str] = set()
+    for scope, services in scope_services.items():
+        (tier2 if services & TIER2_SERVICES else tier1).add(scope)
+
+    return {"tier1": tier1, "tier2": tier2}
+
+
+def main() -> int:
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--classification", required=True)
+    parser.add_argument("--tier", choices=["tier1", "tier2"], required=True)
+    parser.add_argument("--output", required=True)
+    parser.add_argument("--granularity", choices=["file", "class"], default="file")
+    args = parser.parse_args()
+
+    with open(args.classification) as f:
+        classification = json.load(f)
+
+    tests = classification.get("tests", {})
+    if not tests:
+        print("Error: classification JSON has no 'tests' key or is empty", file=sys.stderr)
+        return 1
+
+    tiers = split(classification, granularity=args.granularity)
+    if not tiers["tier1"] and not tiers["tier2"]:
+        print("Error: classification produced 0 scopes in both tiers", file=sys.stderr)
+        return 1
+
+    if not tiers["tier1"] or not tiers["tier2"]:
+        print(
+            f"Warning: tier1={len(tiers['tier1'])} tier2={len(tiers['tier2'])} — one tier is empty",
+            file=sys.stderr,
+        )
+
+    scopes = sorted(tiers[args.tier])
+    Path(args.output).write_text("\n".join(scopes) + "\n")
+    print(
+        f"tier1={len(tiers['tier1'])} tier2={len(tiers['tier2'])} → wrote {len(scopes)} to {args.output}",
+        file=sys.stderr,
+    )
+    return 0
+
+
+if __name__ == "__main__":
+    sys.exit(main())
diff --git a/src/sentry/testutils/pytest/__init__.py b/src/sentry/testutils/pytest/__init__.py
index 231eee6a140712..9fa6342b5259d1 100644
--- a/src/sentry/testutils/pytest/__init__.py
+++ b/src/sentry/testutils/pytest/__init__.py
@@ -12,6 +12,7 @@
     "sentry.testutils.pytest.json_report_reruns",
     "sentry.testutils.pytest.show_flaky_failures",
     "sentry.testutils.thread_leaks.pytest",
+    "sentry.testutils.pytest.service_classifier",
 ]
 
 if os.environ.get("SENTRY_SKIP_SELENIUM_PLUGIN") != "1":
diff --git a/src/sentry/testutils/pytest/service_classifier.py b/src/sentry/testutils/pytest/service_classifier.py
new file mode 100644
index 00000000000000..5f34aff1b285d8
--- /dev/null
+++ b/src/sentry/testutils/pytest/service_classifier.py
@@ -0,0 +1,139 @@
+from __future__ import annotations
+
+import socket
+import time
+from collections import defaultdict
+from pathlib import Path
+from typing import Any
+
+import pytest
+
+from sentry.utils import json
+
+SERVICE_PORTS: dict[int, str] = {
+    1218: "snuba",
+    3021: "symbolicator",
+    8086: "bigtable",
+    8888: "objectstore",
+}
+
+FIXTURE_SERVICE_MAP: dict[str, str] = {
+    "_requires_snuba": "snuba",
+    "_requires_kafka": "kafka",
+    "_requires_symbolicator": "symbolicator",
+    "_requires_objectstore": "objectstore",
+}
+
+_original_send: Any = None
+_original_sendall: Any = None
+_current_test: str | None = None
+_test_services: dict[str, set[str]] = defaultdict(set)
+_enabled: bool = False
+
+
+def _classify_socket(sock: socket.socket) -> None:
+    if not _current_test:
+        return
+    try:
+        service = SERVICE_PORTS.get(sock.getpeername()[1])
+        if service:
+            _test_services[_current_test].add(service)
+    except (OSError, AttributeError, IndexError):
+        pass
+
+
+def _patched_send(self: socket.socket, *args: Any, **kwargs: Any) -> Any:
+    _classify_socket(self)
+    return _original_send(self, *args, **kwargs)  # type: ignore[misc]
+
+
+def _patched_sendall(self: socket.socket, *args: Any, **kwargs: Any) -> Any:
+    _classify_socket(self)
+    return _original_sendall(self, *args, **kwargs)  # type: ignore[misc]
+
+
+def _install_socket_patches() -> None:
+    global _original_send, _original_sendall
+    _original_send = socket.socket.send
+    _original_sendall = socket.socket.sendall
+    socket.socket.send = _patched_send  # type: ignore[assignment]
+    socket.socket.sendall = _patched_sendall  # type: ignore[assignment]
+
+
+def _uninstall_socket_patches() -> None:
+    if _original_send is not None:
+        socket.socket.send = _original_send  # type: ignore[assignment]
+    if _original_sendall is not None:
+        socket.socket.sendall = _original_sendall  # type: ignore[assignment]
+
+
+def _detect_static_services(item: pytest.Item) -> set[str]:
+    services: set[str] = set()
+
+    if getattr(item, "cls", None) is not None:
+        services.add("postgres")
+    elif hasattr(item, "fixturenames"):
+        if {"db", "transactional_db", "django_db_reset_sequences"} & set(item.fixturenames):
+            services.add("postgres")
+
+    if hasattr(item, "fixturenames"):
+        for fixture, service in FIXTURE_SERVICE_MAP.items():
+            if fixture in item.fixturenames:
+                services.add(service)
+
+    for marker in item.iter_markers("usefixtures"):
+        for name in marker.args:
+            if name in FIXTURE_SERVICE_MAP:
+                services.add(FIXTURE_SERVICE_MAP[name])
+
+    return services
+
+
+def pytest_addoption(parser: pytest.Parser) -> None:
+    group = parser.getgroup("service-classifier")
+    group.addoption("--classify-services", action="store_true", default=False)
+    group.addoption("--classification-output", default="test-service-classification.json")
+
+
+def pytest_configure(config: pytest.Config) -> None:
+    global _enabled
+    _enabled = config.getoption("--classify-services", default=False)
+    if _enabled:
+        _install_socket_patches()
+
+
+def pytest_unconfigure(config: pytest.Config) -> None:
+    if _enabled:
+        _uninstall_socket_patches()
+
+
+def pytest_collection_modifyitems(config: pytest.Config, items: list[pytest.Item]) -> None:
+    if _enabled:
+        for item in items:
+            _test_services[item.nodeid].update(_detect_static_services(item))
+
+
+@pytest.hookimpl(tryfirst=True)
+def pytest_runtest_setup(item: pytest.Item) -> None:
+    global _current_test
+    if _enabled:
+        _current_test = item.nodeid
+
+
+@pytest.hookimpl(trylast=True)
+def pytest_runtest_teardown(item: pytest.Item, nextitem: pytest.Item | None) -> None:
+    global _current_test
+    if _enabled:
+        _current_test = None
+
+
+def pytest_sessionfinish(session: pytest.Session, exitstatus: int) -> None:
+    if _enabled:
+        report = {
+            "generated_at": time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()),
+            "total_tests": len(_test_services),
+            "tests": {nid: sorted(svcs) for nid, svcs in sorted(_test_services.items())},
+        }
+        Path(session.config.getoption("--classification-output")).write_text(
+            json.dumps(report) + "\n"
+        )

From f9df32894357dc44bc53a784c6ef0bc21f2a65fa Mon Sep 17 00:00:00 2001
From: mchen-sentry <ming.chen@sentry.io>
Date: Thu, 9 Apr 2026 13:51:40 -0700
Subject: [PATCH 2/8] fix(ci): correct mypy type ignore codes in
 service_classifier

---
 src/sentry/testutils/pytest/service_classifier.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/src/sentry/testutils/pytest/service_classifier.py b/src/sentry/testutils/pytest/service_classifier.py
index 5f34aff1b285d8..7c8485cd5e3473 100644
--- a/src/sentry/testutils/pytest/service_classifier.py
+++ b/src/sentry/testutils/pytest/service_classifier.py
@@ -44,27 +44,27 @@ def _classify_socket(sock: socket.socket) -> None:
 
 def _patched_send(self: socket.socket, *args: Any, **kwargs: Any) -> Any:
     _classify_socket(self)
-    return _original_send(self, *args, **kwargs)  # type: ignore[misc]
+    return _original_send(self, *args, **kwargs)
 
 
 def _patched_sendall(self: socket.socket, *args: Any, **kwargs: Any) -> Any:
     _classify_socket(self)
-    return _original_sendall(self, *args, **kwargs)  # type: ignore[misc]
+    return _original_sendall(self, *args, **kwargs)
 
 
 def _install_socket_patches() -> None:
     global _original_send, _original_sendall
     _original_send = socket.socket.send
     _original_sendall = socket.socket.sendall
-    socket.socket.send = _patched_send  # type: ignore[assignment]
-    socket.socket.sendall = _patched_sendall  # type: ignore[assignment]
+    socket.socket.send = _patched_send  # type: ignore[method-assign]
+    socket.socket.sendall = _patched_sendall  # type: ignore[method-assign]
 
 
 def _uninstall_socket_patches() -> None:
     if _original_send is not None:
-        socket.socket.send = _original_send  # type: ignore[assignment]
+        socket.socket.send = _original_send  # type: ignore[method-assign]
     if _original_sendall is not None:
-        socket.socket.sendall = _original_sendall  # type: ignore[assignment]
+        socket.socket.sendall = _original_sendall  # type: ignore[method-assign]
 
 
 def _detect_static_services(item: pytest.Item) -> set[str]:

From 4731854e363aedf699b8638d3a7afaf01de06c0f Mon Sep 17 00:00:00 2001
From: mchen-sentry <ming.chen@sentry.io>
Date: Thu, 9 Apr 2026 14:12:36 -0700
Subject: [PATCH 3/8] fix(ci): broaden mypy ignores for socket monkey-patching

---
 src/sentry/testutils/pytest/service_classifier.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/sentry/testutils/pytest/service_classifier.py b/src/sentry/testutils/pytest/service_classifier.py
index 7c8485cd5e3473..18c705e8935af1 100644
--- a/src/sentry/testutils/pytest/service_classifier.py
+++ b/src/sentry/testutils/pytest/service_classifier.py
@@ -56,8 +56,8 @@ def _install_socket_patches() -> None:
     global _original_send, _original_sendall
     _original_send = socket.socket.send
     _original_sendall = socket.socket.sendall
-    socket.socket.send = _patched_send  # type: ignore[method-assign]
-    socket.socket.sendall = _patched_sendall  # type: ignore[method-assign]
+    socket.socket.send = _patched_send  # type: ignore[assignment,method-assign]
+    socket.socket.sendall = _patched_sendall  # type: ignore[assignment,method-assign]
 
 
 def _uninstall_socket_patches() -> None:

From 86349c4cd797f683a950518043755bff2c527d98 Mon Sep 17 00:00:00 2001
From: mchen-sentry <ming.chen@sentry.io>
Date: Thu, 9 Apr 2026 13:59:21 -0700
Subject: [PATCH 4/8] fix(ci): add redis-cluster/kafka service containers to
 backend-light

---
 .github/workflows/backend.yml | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

diff --git a/.github/workflows/backend.yml b/.github/workflows/backend.yml
index aaa320aa644f8a..31a2a2404a9faf 100644
--- a/.github/workflows/backend.yml
+++ b/.github/workflows/backend.yml
@@ -266,6 +266,24 @@ jobs:
     permissions:
       contents: read
       id-token: write
+    services:
+      redis-cluster:
+        image: ghcr.io/getsentry/docker-redis-cluster:7.0.10
+        ports: ['7000:7000', '7001:7001', '7002:7002', '7003:7003', '7004:7004', '7005:7005']
+        env:
+          IP: 0.0.0.0
+      zookeeper:
+        image: ghcr.io/getsentry/image-mirror-confluentinc-cp-zookeeper:6.2.0
+        env:
+          ZOOKEEPER_CLIENT_PORT: 2181
+      kafka:
+        image: ghcr.io/getsentry/image-mirror-confluentinc-cp-kafka:6.2.0
+        env:
+          KAFKA_ZOOKEEPER_CONNECT: zookeeper:2181
+          KAFKA_ADVERTISED_LISTENERS: PLAINTEXT://127.0.0.1:9092
+          KAFKA_OFFSETS_TOPIC_REPLICATION_FACTOR: 1
+          KAFKA_OFFSETS_TOPIC_NUM_PARTITIONS: 1
+        ports: ['9092:9092']
     strategy:
       fail-fast: false
       matrix:

From b8623330d9e68f659dcb290f4fcae8a464726033 Mon Sep 17 00:00:00 2001
From: mchen-sentry <ming.chen@sentry.io>
Date: Fri, 10 Apr 2026 12:56:52 -0700
Subject: [PATCH 5/8] fix(ci): reduce backend-test to 17 shards when tiers
 active (5+17=22)

---
 .github/workflows/backend.yml | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/.github/workflows/backend.yml b/.github/workflows/backend.yml
index 31a2a2404a9faf..ebe2dd017635ed 100644
--- a/.github/workflows/backend.yml
+++ b/.github/workflows/backend.yml
@@ -367,12 +367,12 @@ jobs:
       # and reducing the risk that one of many runs would turn red again (read: intermittent tests)
       fail-fast: false
       matrix:
-        # Dynamic matrix from calculate-shards
-        instance: ${{ fromJSON(needs.calculate-shards.outputs.shard-indices) }}
+        # When tiers are active, backend-test only runs tier2 (17 shards);
+        # the remaining 5 shards run as backend-light. Total stays at 22.
+        instance: ${{ needs.split-tiers.outputs.has-tiers == 'true' && fromJSON('[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16]') || fromJSON(needs.calculate-shards.outputs.shard-indices) }}
 
     env:
-      # Dynamic total from calculate-shards
-      MATRIX_INSTANCE_TOTAL: ${{ needs.calculate-shards.outputs.shard-count }}
+      MATRIX_INSTANCE_TOTAL: ${{ needs.split-tiers.outputs.has-tiers == 'true' && '17' || needs.calculate-shards.outputs.shard-count }}
       TEST_GROUP_STRATEGY: roundrobin
       PYTHONHASHSEED: '0'
       XDIST_PER_WORKER_SNUBA: '1'

From f7c4dae4e0106d618397f7eaae360096dac4c187 Mon Sep 17 00:00:00 2001
From: mchen-sentry <ming.chen@sentry.io>
Date: Mon, 27 Apr 2026 10:45:16 -0700
Subject: [PATCH 6/8] fix(ci): filter classify runs by conclusion via jq, not
 --status flag

---
 .github/workflows/backend.yml | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/backend.yml b/.github/workflows/backend.yml
index ebe2dd017635ed..c5cd35db5ca6f9 100644
--- a/.github/workflows/backend.yml
+++ b/.github/workflows/backend.yml
@@ -223,13 +223,18 @@ jobs:
         env:
           GH_TOKEN: ${{ github.token }}
         run: |
-          RUN_ID=$(gh run list --workflow=classify-services.yml --status=success --limit=1 --json databaseId --jq '.[0].databaseId')
+          # Filter by conclusion=success via jq — gh CLI's --status flag is unreliable for this.
+          RUN_ID=$(gh run list --workflow=classify-services.yml --limit=10 --json databaseId,conclusion --jq '[.[] | select(.conclusion == "success")][0].databaseId')
           if [ -z "$RUN_ID" ]; then
             echo "No classify-services run found, skipping tiers"
             echo "has-tiers=false" >> "$GITHUB_OUTPUT"
             exit 0
           fi
-          gh run download "$RUN_ID" --name test-service-classification --dir /tmp/classification
+          if ! gh run download "$RUN_ID" --name test-service-classification --dir /tmp/classification; then
+            echo "Classification artifact unavailable (may be expired), skipping tiers"
+            echo "has-tiers=false" >> "$GITHUB_OUTPUT"
+            exit 0
+          fi
           echo "has-tiers=true" >> "$GITHUB_OUTPUT"
 
       - name: Split tests by tier

From f798c1e44e74d456e389fc099c870243f5b5e101 Mon Sep 17 00:00:00 2001
From: mchen-sentry <ming.chen@sentry.io>
Date: Mon, 27 Apr 2026 14:42:05 -0700
Subject: [PATCH 7/8] perf(ci): tier 2 uses --dist=load, tier 1 keeps
 --dist=loadfile

Hybrid distribution mode based on experiment data: --dist=load cuts tier 2
shard-time variance by 54% (179s -> 82s spread) by load-balancing individual
tests across workers, but hurts tier 1 (where small fast tests benefit from
fixture reuse via loadfile). Apply load only when tiers are active.

Backend-test without tiers (selective PRs, master without classification)
keeps --dist=loadfile for backwards compatibility.
---
 .github/workflows/backend.yml | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/backend.yml b/.github/workflows/backend.yml
index c5cd35db5ca6f9..b9434d1af7cbc3 100644
--- a/.github/workflows/backend.yml
+++ b/.github/workflows/backend.yml
@@ -420,9 +420,13 @@ jobs:
       - name: Run backend test (${{ steps.setup.outputs.matrix-instance-number }} of ${{ steps.setup.outputs.matrix-instance-total }})
         env:
           SELECTED_TESTS_FILE: ${{ needs.select-tests.outputs.has-selected-tests == 'true' && '.artifacts/selected-tests.txt' || (needs.split-tiers.outputs.has-tiers == 'true' && '/tmp/backend-tests.txt' || '') }}
+          # tier 2 (snuba-heavy) gets --dist=load to balance individual-test latency
+          # across workers — variance is much higher than the no-tiers case where
+          # --dist=loadfile averages out via larger file count per shard.
+          XDIST_DIST: ${{ needs.split-tiers.outputs.has-tiers == 'true' && 'load' || 'loadfile' }}
         run: |
           if [ -n "${XDIST_WORKERS}" ]; then
-            export PYTEST_ADDOPTS="$PYTEST_ADDOPTS -n ${XDIST_WORKERS} --dist=loadfile"
+            export PYTEST_ADDOPTS="$PYTEST_ADDOPTS -n ${XDIST_WORKERS} --dist=${XDIST_DIST}"
             timeout 1200 make test-python-ci || {
               rc=$?
               if [ "$rc" -eq 124 ]; then

From a8916c4da291b10c3f3a6b40a509f8d9e8951376 Mon Sep 17 00:00:00 2001
From: mchen-sentry <ming.chen@sentry.io>
Date: Tue, 12 May 2026 14:28:25 -0700
Subject: [PATCH 8/8] feat(ci): two-tier test split with service classification

---
 .github/workflows/backend.yml                 | 17 +++-------
 .../workflows/scripts/split-tests-by-tier.py  | 34 ++++++-------------
 2 files changed, 14 insertions(+), 37 deletions(-)

diff --git a/.github/workflows/backend.yml b/.github/workflows/backend.yml
index b9434d1af7cbc3..f3d91ac2e78330 100644
--- a/.github/workflows/backend.yml
+++ b/.github/workflows/backend.yml
@@ -274,7 +274,8 @@ jobs:
     services:
       redis-cluster:
         image: ghcr.io/getsentry/docker-redis-cluster:7.0.10
-        ports: ['7000:7000', '7001:7001', '7002:7002', '7003:7003', '7004:7004', '7005:7005']
+        ports:
+          ['7000:7000', '7001:7001', '7002:7002', '7003:7003', '7004:7004', '7005:7005']
         env:
           IP: 0.0.0.0
       zookeeper:
@@ -312,13 +313,6 @@ jobs:
         with:
           mode: migrations
 
-      - name: Download odiff binary
-        run: |
-          curl -sL https://registry.npmjs.org/odiff-bin/-/odiff-bin-4.3.2.tgz \
-            | tar -xz --strip-components=2 package/raw_binaries/odiff-linux-x64
-          sudo install -m 755 odiff-linux-x64 /usr/local/bin/odiff
-          rm odiff-linux-x64
-
       - name: Run backend-light tests
         env:
           SELECTED_TESTS_FILE: /tmp/backend-light-tests.txt
@@ -372,8 +366,7 @@ jobs:
       # and reducing the risk that one of many runs would turn red again (read: intermittent tests)
       fail-fast: false
       matrix:
-        # When tiers are active, backend-test only runs tier2 (17 shards);
-        # the remaining 5 shards run as backend-light. Total stays at 22.
+        # With tiers: 17 shards for tier2; 5 more run as backend-light (22 total).
         instance: ${{ needs.split-tiers.outputs.has-tiers == 'true' && fromJSON('[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16]') || fromJSON(needs.calculate-shards.outputs.shard-indices) }}
 
     env:
@@ -420,9 +413,7 @@ jobs:
       - name: Run backend test (${{ steps.setup.outputs.matrix-instance-number }} of ${{ steps.setup.outputs.matrix-instance-total }})
         env:
           SELECTED_TESTS_FILE: ${{ needs.select-tests.outputs.has-selected-tests == 'true' && '.artifacts/selected-tests.txt' || (needs.split-tiers.outputs.has-tiers == 'true' && '/tmp/backend-tests.txt' || '') }}
-          # tier 2 (snuba-heavy) gets --dist=load to balance individual-test latency
-          # across workers — variance is much higher than the no-tiers case where
-          # --dist=loadfile averages out via larger file count per shard.
+          # tier2 uses --dist=load; snuba-heavy tests have high per-test variance that load-balances better than loadfile.
           XDIST_DIST: ${{ needs.split-tiers.outputs.has-tiers == 'true' && 'load' || 'loadfile' }}
         run: |
           if [ -n "${XDIST_WORKERS}" ]; then
diff --git a/.github/workflows/scripts/split-tests-by-tier.py b/.github/workflows/scripts/split-tests-by-tier.py
index 38d818de2b6cc6..feb7e18e5c44aa 100644
--- a/.github/workflows/scripts/split-tests-by-tier.py
+++ b/.github/workflows/scripts/split-tests-by-tier.py
@@ -10,27 +10,15 @@
 TIER2_SERVICES = {"snuba", "kafka", "symbolicator", "objectstore", "bigtable"}
 
 
-def _scope_key(test_id: str, granularity: str) -> str:
-    if granularity == "file":
-        return test_id.split("::")[0]
-    elif granularity == "class":
-        return "::".join(test_id.split("::")[:2])
-    return test_id
-
-
-def split(classification: dict, granularity: str = "file") -> dict[str, set[str]]:
-    scope_services: dict[str, set[str]] = defaultdict(set)
+def split(classification: dict) -> dict[str, set[str]]:
+    file_services: dict[str, set[str]] = defaultdict(set)
     for test_id, services in classification.get("tests", {}).items():
-        scope = _scope_key(test_id, granularity)
-        if isinstance(services, list):
-            scope_services[scope].update(services)
-        else:
-            scope_services[scope].add(services)
+        file_services[test_id.split("::")[0]].update(services)
 
     tier1: set[str] = set()
     tier2: set[str] = set()
-    for scope, services in scope_services.items():
-        (tier2 if services & TIER2_SERVICES else tier1).add(scope)
+    for path, services in file_services.items():
+        (tier2 if services & TIER2_SERVICES else tier1).add(path)
 
     return {"tier1": tier1, "tier2": tier2}
 
@@ -40,20 +28,18 @@ def main() -> int:
     parser.add_argument("--classification", required=True)
     parser.add_argument("--tier", choices=["tier1", "tier2"], required=True)
     parser.add_argument("--output", required=True)
-    parser.add_argument("--granularity", choices=["file", "class"], default="file")
     args = parser.parse_args()
 
     with open(args.classification) as f:
         classification = json.load(f)
 
-    tests = classification.get("tests", {})
-    if not tests:
-        print("Error: classification JSON has no 'tests' key or is empty", file=sys.stderr)
+    if not classification.get("tests"):
+        print("Error: classification JSON has no tests", file=sys.stderr)
         return 1
 
-    tiers = split(classification, granularity=args.granularity)
+    tiers = split(classification)
     if not tiers["tier1"] and not tiers["tier2"]:
-        print("Error: classification produced 0 scopes in both tiers", file=sys.stderr)
+        print("Error: classification produced 0 files in both tiers", file=sys.stderr)
         return 1
 
     if not tiers["tier1"] or not tiers["tier2"]:
@@ -65,7 +51,7 @@ def main() -> int:
     scopes = sorted(tiers[args.tier])
     Path(args.output).write_text("\n".join(scopes) + "\n")
     print(
-        f"tier1={len(tiers['tier1'])} tier2={len(tiers['tier2'])} → wrote {len(scopes)} to {args.output}",
+        f"tier1={len(tiers['tier1'])} tier2={len(tiers['tier2'])} -> wrote {len(scopes)} to {args.output}",
         file=sys.stderr,
     )
     return 0