From 7f7c1fbc1e4dce07325e8482b571a4dacfac8eba Mon Sep 17 00:00:00 2001 From: DevOpsMadDog Date: Sun, 12 Oct 2025 01:15:34 +1100 Subject: [PATCH 1/5] docs: clarify readme_updated structure --- .coveragerc | 15 +- .github/workflows/provenance.yml | 55 ++ .github/workflows/qa.yml | 72 +++ .github/workflows/release-sign.yml | 105 ++++ .github/workflows/repro-verify.yml | 42 ++ .gitignore | 3 + CHANGELOG.md | 54 ++ README.md | 87 +-- apps/api/app.py | 72 +++ backend/api/evidence/__init__.py | 5 + backend/api/evidence/router.py | 56 ++ backend/api/graph/__init__.py | 5 + backend/api/graph/router.py | 95 +++ backend/api/provenance/__init__.py | 4 + backend/api/provenance/router.py | 38 ++ backend/api/risk/__init__.py | 4 + backend/api/risk/router.py | 81 +++ build/plan.yaml | 13 + cli/__init__.py | 1 + cli/fixops-ci | 13 + cli/fixops-provenance | 13 + cli/fixops-repro | 13 + cli/fixops-risk | 13 + cli/fixops-sbom | 13 + cli/fixops_ci.py | 181 ++++++ cli/fixops_provenance.py | 156 +++++ cli/fixops_repro.py | 81 +++ cli/fixops_risk.py | 81 +++ cli/fixops_sbom.py | 94 +++ config/otel-collector-demo.yaml | 18 + config/policy.yml | 15 + docker-compose.demo.yml | 46 ++ docs/ARCH-INVENTORY.md | 83 +++ docs/CI-SECRETS.md | 13 + docs/DEMO.md | 51 ++ docs/EVIDENCE-BUNDLES.md | 84 +++ docs/PLAYBOOK-AUDIT.md | 44 ++ docs/PLAYBOOK-DEV.md | 82 +++ docs/PLAYBOOK-SEC.md | 55 ++ docs/PROVENANCE-GRAPH.md | 63 ++ docs/PROVENANCE.md | 79 +++ docs/REPRO-BUILDS.md | 74 +++ docs/RISK-SCORING.md | 86 +++ docs/SBOM-QUALITY.md | 44 ++ docs/SECURITY-POSTURE.md | 45 ++ docs/SIGNING.md | 79 +++ docs/TASK-PLAN.md | 22 + evidence/__init__.py | 10 + evidence/packager.py | 286 +++++++++ lib4sbom/__init__.py | 7 +- lib4sbom/normalizer.py | 456 ++++++++++++++ readme_updated.md | 581 ++++++++++++++++++ reports/coverage/coverage.xml | 2 + reports/coverage/summary.txt | 1 + requirements.txt | 3 + risk/__init__.py | 1 + risk/feeds/__init__.py | 8 + risk/feeds/epss.py | 70 +++ risk/feeds/kev.py | 85 +++ risk/scoring.py | 346 +++++++++++ scripts/graph_worker.py | 47 ++ scripts/signing/sign-artifact.sh | 131 ++++ scripts/signing/verify-artifact.sh | 102 +++ services/graph/__init__.py | 9 + services/graph/graph.py | 562 +++++++++++++++++ services/graph/tests/test_graph.py | 124 ++++ services/provenance/__init__.py | 24 + services/provenance/attestation.py | 296 +++++++++ services/provenance/tests/test_attestation.py | 114 ++++ services/repro/__init__.py | 15 + services/repro/tests/test_verifier.py | 71 +++ services/repro/verifier.py | 263 ++++++++ telemetry/__init__.py | 76 +++ telemetry/_noop.py | 115 ++++ telemetry/fastapi_noop.py | 9 + tests/test_evidence_bundle.py | 200 ++++++ tests/test_graph_worker.py | 82 +++ tests/test_risk_scoring.py | 160 +++++ tests/test_sbom_quality.py | 182 ++++++ tests/test_telemetry_runtime.py | 37 ++ ui/dashboard/index.html | 59 ++ ui/dashboard/script.js | 92 +++ ui/dashboard/styles.css | 103 ++++ 83 files changed, 7133 insertions(+), 44 deletions(-) create mode 100644 .github/workflows/provenance.yml create mode 100644 .github/workflows/qa.yml create mode 100644 .github/workflows/release-sign.yml create mode 100644 .github/workflows/repro-verify.yml create mode 100644 CHANGELOG.md create mode 100644 backend/api/evidence/__init__.py create mode 100644 backend/api/evidence/router.py create mode 100644 backend/api/graph/__init__.py create mode 100644 backend/api/graph/router.py create mode 100644 backend/api/provenance/__init__.py create mode 100644 backend/api/provenance/router.py create mode 100644 backend/api/risk/__init__.py create mode 100644 backend/api/risk/router.py create mode 100644 build/plan.yaml create mode 100644 cli/__init__.py create mode 100755 cli/fixops-ci create mode 100755 cli/fixops-provenance create mode 100755 cli/fixops-repro create mode 100755 cli/fixops-risk create mode 100755 cli/fixops-sbom create mode 100644 cli/fixops_ci.py create mode 100644 cli/fixops_provenance.py create mode 100644 cli/fixops_repro.py create mode 100644 cli/fixops_risk.py create mode 100644 cli/fixops_sbom.py create mode 100644 config/otel-collector-demo.yaml create mode 100644 config/policy.yml create mode 100644 docker-compose.demo.yml create mode 100644 docs/ARCH-INVENTORY.md create mode 100644 docs/CI-SECRETS.md create mode 100644 docs/DEMO.md create mode 100644 docs/EVIDENCE-BUNDLES.md create mode 100644 docs/PLAYBOOK-AUDIT.md create mode 100644 docs/PLAYBOOK-DEV.md create mode 100644 docs/PLAYBOOK-SEC.md create mode 100644 docs/PROVENANCE-GRAPH.md create mode 100644 docs/PROVENANCE.md create mode 100644 docs/REPRO-BUILDS.md create mode 100644 docs/RISK-SCORING.md create mode 100644 docs/SBOM-QUALITY.md create mode 100644 docs/SECURITY-POSTURE.md create mode 100644 docs/SIGNING.md create mode 100644 docs/TASK-PLAN.md create mode 100644 evidence/__init__.py create mode 100644 evidence/packager.py create mode 100644 lib4sbom/normalizer.py create mode 100644 readme_updated.md create mode 100644 reports/coverage/coverage.xml create mode 100644 reports/coverage/summary.txt create mode 100644 risk/__init__.py create mode 100644 risk/feeds/__init__.py create mode 100644 risk/feeds/epss.py create mode 100644 risk/feeds/kev.py create mode 100644 risk/scoring.py create mode 100644 scripts/graph_worker.py create mode 100755 scripts/signing/sign-artifact.sh create mode 100755 scripts/signing/verify-artifact.sh create mode 100644 services/graph/__init__.py create mode 100644 services/graph/graph.py create mode 100644 services/graph/tests/test_graph.py create mode 100644 services/provenance/__init__.py create mode 100644 services/provenance/attestation.py create mode 100644 services/provenance/tests/test_attestation.py create mode 100644 services/repro/__init__.py create mode 100644 services/repro/tests/test_verifier.py create mode 100644 services/repro/verifier.py create mode 100644 telemetry/__init__.py create mode 100644 telemetry/_noop.py create mode 100644 telemetry/fastapi_noop.py create mode 100644 tests/test_evidence_bundle.py create mode 100644 tests/test_graph_worker.py create mode 100644 tests/test_risk_scoring.py create mode 100644 tests/test_sbom_quality.py create mode 100644 tests/test_telemetry_runtime.py create mode 100644 ui/dashboard/index.html create mode 100644 ui/dashboard/script.js create mode 100644 ui/dashboard/styles.css diff --git a/.coveragerc b/.coveragerc index 82b67b3d9..153cced69 100644 --- a/.coveragerc +++ b/.coveragerc @@ -1,6 +1,6 @@ [run] -source = enterprise/src branch = True +source = . [paths] source = @@ -8,6 +8,19 @@ source = fixops-blended-enterprise/src [report] +include = + */services/provenance/* + */services/graph/* + */services/repro/* + */lib4sbom/* + */risk/* + */backend/api/provenance/* + */backend/api/risk/* + */backend/api/graph/* + */backend/api/evidence/* + */evidence/* + */telemetry/* + */scripts/graph_worker.py exclude_lines = pragma: no cover if __name__ == "__main__": diff --git a/.github/workflows/provenance.yml b/.github/workflows/provenance.yml new file mode 100644 index 000000000..83dbee40c --- /dev/null +++ b/.github/workflows/provenance.yml @@ -0,0 +1,55 @@ +name: Provenance + +on: + push: + tags: + - '*' + release: + types: [published] + +permissions: + contents: read + actions: read + id-token: write + +jobs: + attest: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - name: Set up Python + uses: actions/setup-python@v4 + with: + python-version: '3.11' + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install -r requirements.txt + if [ -f requirements.dev.txt ]; then pip install -r requirements.dev.txt; fi + - name: Build release artefact + run: | + mkdir -p artifacts + git archive --format=tar.gz -o artifacts/fixops-${{ github.sha }}.tar.gz HEAD + - name: Generate provenance attestation + run: | + mkdir -p artifacts/attestations + cli/fixops-provenance attest \ + --artifact artifacts/fixops-${{ github.sha }}.tar.gz \ + --out artifacts/attestations/fixops-${{ github.sha }}.json \ + --builder-id "https://github.com/${{ github.repository }}/actions" \ + --source-uri "${{ github.server_url }}/${{ github.repository }}@${{ github.sha }}" \ + --build-type "https://github.com/actions/runner" + - name: Verify provenance attestation + run: | + cli/fixops-provenance verify \ + --artifact artifacts/fixops-${{ github.sha }}.tar.gz \ + --attestation artifacts/attestations/fixops-${{ github.sha }}.json \ + --builder-id "https://github.com/${{ github.repository }}/actions" \ + --source-uri "${{ github.server_url }}/${{ github.repository }}@${{ github.sha }}" \ + --build-type "https://github.com/actions/runner" + - name: Upload provenance attestation + uses: actions/upload-artifact@v4 + with: + name: provenance-attestations + path: artifacts/attestations/ + if-no-files-found: error diff --git a/.github/workflows/qa.yml b/.github/workflows/qa.yml new file mode 100644 index 000000000..861c21832 --- /dev/null +++ b/.github/workflows/qa.yml @@ -0,0 +1,72 @@ +name: qa + +on: + push: + branches: + - main + - work + pull_request: + +permissions: + contents: read + +jobs: + quality: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - name: Set up Python + uses: actions/setup-python@v4 + with: + python-version: '3.11' + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install -r requirements.txt + if [ -f requirements.dev.txt ]; then pip install -r requirements.dev.txt; fi + if [ -f apps/api/requirements-optional.txt ]; then pip install -r apps/api/requirements-optional.txt; fi + - name: Formatting checks + run: | + black --check . + isort --check-only . + - name: Lint + run: flake8 + - name: Type check + run: mypy . + - name: Tests with coverage + env: + PYTHONPATH: . + run: | + mkdir -p reports/coverage + pytest \ + services/provenance/tests/test_attestation.py \ + services/graph/tests/test_graph.py \ + services/repro/tests/test_verifier.py \ + tests/test_sbom_quality.py \ + tests/test_risk_scoring.py \ + tests/test_evidence_bundle.py \ + tests/test_graph_worker.py \ + tests/test_telemetry_runtime.py \ + -q --override-ini testpaths='' \ + --override-ini "addopts=--cov=services.provenance --cov=services.graph --cov=services.repro --cov=lib4sbom --cov=risk --cov=evidence --cov=telemetry --cov=scripts.graph_worker --cov-report=term-missing --cov-report=xml:reports/coverage/coverage.xml --cov-fail-under=70" + - name: Coverage summary + run: | + python - <<'PY' + from pathlib import Path + import xml.etree.ElementTree as ET + + xml_path = Path("reports/coverage/coverage.xml") + root = ET.parse(xml_path).getroot() + rate = float(root.attrib.get("line-rate", 0.0)) + summary = f"Total coverage: {rate * 100:.2f}%\n" + Path("reports/coverage/summary.txt").write_text(summary, encoding="utf-8") + print(summary.strip()) + PY + - name: Upload coverage artefacts + uses: actions/upload-artifact@v4 + with: + name: coverage + path: | + reports/coverage/coverage.xml + reports/coverage/summary.txt + if-no-files-found: error diff --git a/.github/workflows/release-sign.yml b/.github/workflows/release-sign.yml new file mode 100644 index 000000000..d5644b401 --- /dev/null +++ b/.github/workflows/release-sign.yml @@ -0,0 +1,105 @@ +name: Release Signing + +on: + push: + tags: + - '*' + release: + types: [published] + +permissions: + contents: write + actions: read + id-token: write + +jobs: + sign-artifacts: + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v4 + with: + python-version: '3.11' + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install -r requirements.txt + if [ -f requirements.dev.txt ]; then pip install -r requirements.dev.txt; fi + + - name: Prepare release artifacts + run: | + mkdir -p artifacts/attestations + git archive --format=tar.gz -o artifacts/fixops-${{ github.sha }}.tar.gz HEAD + + - name: Generate provenance predicate + run: | + cli/fixops-provenance attest \ + --artifact artifacts/fixops-${{ github.sha }}.tar.gz \ + --out artifacts/attestations/fixops-${{ github.sha }}.json \ + --builder-id "https://github.com/${{ github.repository }}/actions" \ + --source-uri "${{ github.server_url }}/${{ github.repository }}@${{ github.sha }}" \ + --build-type "https://github.com/actions/runner" + + - name: Install Cosign + uses: sigstore/cosign-installer@v3 + + - name: Write signing key + run: | + printf '%s' "${{ secrets.COSIGN_PRIVATE_KEY }}" > cosign.key + shell: bash + + - name: Sign release artifact + env: + COSIGN_PASSWORD: ${{ secrets.COSIGN_PASSWORD }} + run: | + scripts/signing/sign-artifact.sh \ + --artifact artifacts/fixops-${{ github.sha }}.tar.gz \ + --key cosign.key \ + --signature artifacts/fixops-${{ github.sha }}.tar.gz.sig \ + --predicate artifacts/attestations/fixops-${{ github.sha }}.json \ + --attestation-out artifacts/attestations/fixops-${{ github.sha }}.dsse \ + --bundle-out artifacts/attestations/fixops-${{ github.sha }}.bundle + + - name: Write verification key + run: | + printf '%s' "${{ secrets.COSIGN_PUBLIC_KEY }}" > cosign.pub + shell: bash + + - name: Verify signatures + run: | + cosign verify-blob \ + --key cosign.pub \ + --signature artifacts/fixops-${{ github.sha }}.tar.gz.sig \ + artifacts/fixops-${{ github.sha }}.tar.gz + cosign verify-blob \ + --key cosign.pub \ + --bundle artifacts/attestations/fixops-${{ github.sha }}.bundle \ + artifacts/fixops-${{ github.sha }}.tar.gz + shell: bash + + - name: Upload signed artifacts + uses: actions/upload-artifact@v4 + with: + name: signed-release-${{ github.sha }} + path: | + artifacts/fixops-${{ github.sha }}.tar.gz + artifacts/fixops-${{ github.sha }}.tar.gz.sig + artifacts/attestations/fixops-${{ github.sha }}.json + artifacts/attestations/fixops-${{ github.sha }}.dsse + artifacts/attestations/fixops-${{ github.sha }}.bundle + if-no-files-found: error + + - name: Attach assets to release + if: github.event_name == 'release' + uses: softprops/action-gh-release@v1 + with: + files: | + artifacts/fixops-${{ github.sha }}.tar.gz + artifacts/fixops-${{ github.sha }}.tar.gz.sig + artifacts/attestations/fixops-${{ github.sha }}.json + artifacts/attestations/fixops-${{ github.sha }}.dsse + artifacts/attestations/fixops-${{ github.sha }}.bundle diff --git a/.github/workflows/repro-verify.yml b/.github/workflows/repro-verify.yml new file mode 100644 index 000000000..3dcb57734 --- /dev/null +++ b/.github/workflows/repro-verify.yml @@ -0,0 +1,42 @@ +name: reproducible-build-verify + +on: + schedule: + - cron: '0 3 * * *' + workflow_dispatch: + +permissions: + contents: read + actions: read + id-token: write + +jobs: + verify: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - name: Set up Python + uses: actions/setup-python@v4 + with: + python-version: '3.11' + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install -r requirements.txt + if [ -f requirements.dev.txt ]; then pip install -r requirements.dev.txt; fi + - name: Verify reproducible build + env: + FIXOPS_TAG: ${{ github.ref_name }} + run: | + TAG=${FIXOPS_TAG:-nightly} + cli/fixops-repro verify \ + --tag "$TAG" \ + --plan build/plan.yaml \ + --out artifacts/repro/attestations \ + --repo . + - name: Upload reproducibility attestations + uses: actions/upload-artifact@v4 + with: + name: repro-attestations + path: artifacts/repro/attestations/ + if-no-files-found: warn diff --git a/.gitignore b/.gitignore index b62c2dd65..2468ecb0b 100644 --- a/.gitignore +++ b/.gitignore @@ -27,6 +27,8 @@ next-env.d.ts # Production builds /build +!build/ +!build/plan.yaml dist/ dist @@ -91,3 +93,4 @@ frontend/node_modules/.cache/default-development/0.pack !/enterprise/.env.example coverage.xml +!reports/coverage/coverage.xml diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 000000000..2f78422c0 --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,54 @@ +# Changelog + +All notable changes to the FixOps investor demo branch are documented here. The project followed a ten-phase roadmap focused on +provenance, signing, SBOM normalisation, risk scoring, evidence, observability, and hardening. + +## Phase 10 – Hardening & Coverage (2024-09-XX) +- Added developer, security, and audit playbooks plus the overarching security posture guide. +- Introduced the `qa` workflow enforcing lint, type-check, tests, and coverage ≥70% before merge. +- Captured coverage artefacts (`reports/coverage/coverage.xml`, `reports/coverage/summary.txt`) and tightened release + expectations. + +## Phase 9 – Observability & Demo Stack +- Instrumented provenance, risk, graph, and repro services with OpenTelemetry fallbacks for offline environments. +- Added `docker-compose.demo.yml`, the OTEL collector config, and a dashboard UI surface for investor-ready demos. +- Introduced the graph worker to keep the provenance database fresh for the dashboard queries. + +## Phase 8 – CI Agent & Evidence Bundles +- Delivered the `cli/fixops-ci` orchestrator that chains SBOM, risk, provenance, and repro actions. +- Packaged signed evidence bundles and manifests, exposed via the `backend/api/evidence` endpoints. +- Documented bundle structure, policy thresholds, and added tests for manifest integrity. + +## Phase 7 – Reproducible Builds Verifier +- Created the hermetic rebuild service, CLI wrapper, and GitHub Actions workflow to confirm source/binary equivalence. +- Stored reproducibility attestations under `artifacts/repro/attestations/` and published operator documentation. +- Added targeted tests covering success and failure scenarios for the verifier. + +## Phase 6 – Provenance Graph MVP +- Built the SQLite + NetworkX graph service ingesting git commits, attestations, SBOMs, risk outputs, and releases. +- Exposed lineage, KEV component, and anomaly queries through `backend/api/graph/*` and documented usage patterns. +- Added fixtures and tests to validate ingestion edge cases and query accuracy. + +## Phase 5 – Risk Scoring +- Fetched EPSS and CISA KEV feeds, fused them with version lag/exposure hints, and stored the composite FixOpsRisk score. +- Delivered CLI/API surfaces plus documentation for the scoring formula and validation steps. +- Added regression tests for feed parsing and risk scoring heuristics. + +## Phase 4 – SBOM Normalisation & Quality +- Normalised CycloneDX/SPDX inputs, deduplicated components, computed quality metrics, and rendered JSON/HTML reports. +- Shipped the `cli/fixops-sbom` command and documentation describing normalization and scoring outputs. +- Added deterministic tests to guarantee reproducible SBOM processing. + +## Phase 3 – Cosign-based Signing +- Wrapped cosign in helper scripts, added the signing workflow, and documented verification procedures. +- Ensured release artefacts ship with detached signatures or bundles alongside the provenance attestations. + +## Phase 2 – Provenance & SLSA Attestations +- Implemented attestation generation/verification helpers, CLI commands, API endpoints, and release automation. +- Captured the provenance schema, CLI/API usage, and tests covering digest validation flows. + +## Phase 1 – Architecture Inventory & Roadmap +- Produced the authoritative architecture inventory (`docs/ARCH-INVENTORY.md`) and the detailed phase plan + (`docs/TASK-PLAN.md`). +- Updated project documentation to link the new references for reviewer onboarding. + diff --git a/README.md b/README.md index c98e79a44..b2f09ceed 100644 --- a/README.md +++ b/README.md @@ -69,28 +69,28 @@ The unified stage runner gives you the same normalisation logic that powers the Run the sequence above to materialise canonical JSON under `artefacts///outputs/`. Each run also records signed manifests (when `FIXOPS_SIGNING_KEY`/`FIXOPS_SIGNING_KID` are configured) and emits a transparency log via `outputs/transparency.index`. After the CLI walkthrough, execute `pytest` to confirm the ingest API and compliance regressions continue to pass end-to-end. + ## Why teams adopt FixOps - **Overlay-governed operating modes** – A single configuration file switches between 30-minute demo onboarding and hardened enterprise guardrails, provisioning directories, tokens, compliance packs, automation connectors, and module toggles on startup (`config/fixops.overlay.yml`). - **Push ingestion + parity CLI** – Upload design CSV, SBOM, SARIF, and CVE/KEV data through FastAPI endpoints or run the same flow locally via `python -m core.cli`, with API-key enforcement, MIME validation, byte limits, and evidence export controls (`apps/api/app.py`, `core/cli.py`). -- **Context-aware decisioning** – The orchestrator correlates design intent with bill-of-materials, findings, and advisories, then layers the context engine, guardrails, SSDLC scoring, IaC posture, exploit intelligence, AI agent detections, Bayesian/Markov forecasts, and knowledge graph analytics in a single pass (`apps/api/pipeline.py`, `new_apps/api/processing/knowledge_graph.py`). -- **Probabilistic escalation intelligence** – The `core.probabilistic.ProbabilisticForecastEngine` applies Dirichlet-smoothed calibration, spectral diagnostics, and multi-step projections to forecast severity drift, quantify stationary risk, and surface explainable escalation pressure for decision makers. +- **Context-aware decisioning** – The orchestrator correlates design intent with bill-of-materials, findings, and advisories, then layers the context engine, guardrails, SSDLC scoring, IaC posture, exploit intelligence, AI agent detections, and knowledge graph analytics in a single pass (`apps/api/pipeline.py`, `new_apps/api/processing/knowledge_graph.py`). +- **Probabilistic escalation intelligence** – The `core.probabilistic.ProbabilisticForecastEngine` applies Dirichlet-smoothed calibration, spectral diagnostics, Markov chains, and Bayesian posterior updates to forecast severity drift, quantify stationary risk, and surface explainable escalation pressure for decision makers. - **Multi-LLM consensus & transparency** – The enhanced decision engine layers deterministic heuristics with optional OpenAI/Anthropic/Gemini calls (when API keys are present), reconciles verdicts, enriches MITRE ATT&CK, compliance, and marketplace intelligence, and emits explainable consensus telemetry for demos or production pipelines (`fixops-enterprise/src/services/enhanced_decision_engine.py`, `fixops-enterprise/src/api/v1/enhanced.py`). -- **Evidence & automation built-in** – Compliance packs, policy automation (Jira/Confluence/Slack), onboarding guidance, feedback capture, and evidence bundling persist auditable manifests inside overlay-allowlisted directories (`core/compliance.py`, `core/policy.py`, `core/evidence.py`, `core/feedback.py`). -- **Artefact archiving & regulated storage** – Every upload is normalised, persisted with metadata, and summarised via the artefact archive while secure directory enforcement and optional bundle encryption keep regulated tenants compliant (`core/storage.py`, `core/paths.py`). -- **Analytics & ROI telemetry** – Pipeline responses surface pricing tiers, guardrail progress, exploit refresh health, and contextual noise-reduction metrics that feed executive dashboards and ROI storytelling (`perf/BENCHMARKS.csv`, `market/ENTERPRISE_READINESS.md`). -- **Tenant lifecycle & performance intelligence** – Overlay-governed ROI dashboards, tenant lifecycle summaries, and near real-time performance simulations help CISOs, CTEM leads, and platform teams prove value and spot bottlenecks without bespoke code (`core/analytics.py`, `core/tenancy.py`, `core/performance.py`). -- **Modular & extensible** – Toggle modules, adjust weights, or register custom hooks without touching code; every run reports configured, enabled, and executed modules plus outcomes to keep integrators in control (`core/modules.py`). - -## System architecture at a glance -``` -┌────────────┐ uploads ┌───────────────┐ overlay + artefacts ┌────────────────────────────┐ multi-LLM + context ┌──────────────────────┐ evidence + automations ┌──────────────┐ -│ Clients │ ───────────▶│ FastAPI (ing) │────────────────────────▶│ Pipeline orchestrator │────────────────────────▶│ Enhanced decisioning │──────────────────────────▶│ Destinations │ -│ (CLI/API) │ │ /inputs/* │ │ (context, guardrails, │ │ (consensus, MITRE, │ │ (Jira, bundle, │ -│ │◀────────────│ /pipeline/run │◀────────────────────────│ SSDLC, IaC, probabilistic)│◀───────────────────────│ compliance, KG, LLM) │◀──────────────────────────│ Slack, storage) │ -└────────────┘ JSON status └───────────────┘ overlay metadata │ overlay module matrix │ └──────────────────────┘ └──────────────┘ +- **Security-as-code provenance & trust** – SLSA v1 attestations, provenance graph relationships, reproducible build attestations, and cosign signing keep releases anchored to verifiable supply-chain evidence (`services/provenance`, `services/graph`, `services/repro`, `scripts/signing`, `evidence/packager.py`). +- **Risk-first posture** – Normalised SBOMs, EPSS/KEV feed joins, FixOpsRisk scoring, and anomaly detection correlate exploit probability, exposure, and downgrade drift before pull requests merge (`lib4sbom/normalizer.py`, `risk/scoring.py`, `risk/feeds`, `services/graph/graph.py`). +- **Evidence & automation built-in** – Compliance packs, policy automation (Jira/Confluence/Slack), onboarding guidance, feedback capture, and evidence bundling persist auditable manifests inside overlay-allowlisted directories (`core/compliance.py`, `core/policy.py`, `core/evidence.py`, `core/feedback.py`, `evidence/packager.py`). +- **Observability & demo-ready experiences** – Telemetry hooks, dashboards, and docker-compose demos provide investor-ready tours while keeping operators in control of data flow (`telemetry`, `docker-compose.demo.yml`, `ui/dashboard`). + +### Architecture overview +```text +┌────────────┐ uploads ┌───────────────┐ overlay + artefacts ┌────────────────────────────┐ probabilistic + LLM intelligence ┌────────────────────┐ evidence + automations ┌──────────────┐ +│ Clients │ ───────────▶│ FastAPI (ing) │────────────────────────▶│ Pipeline orchestrator │──────────────────────────────────────▶│ Enhanced decisioning │──────────────────────────▶│ Destinations │ +│ (CLI/API) │ │ /inputs/* │ │ (context, guardrails, │ │ (consensus, MITRE, │ │ (Jira, bundle, │ +│ │◀────────────│ /pipeline/run │◀────────────────────────│ SSDLC, IaC, probabilistic)│◀──────────────────────────────────────│ compliance, KG, LLM) │◀──────────────────────────│ Slack, storage) │ +└────────────┘ JSON status └───────────────┘ overlay metadata │ overlay module matrix │ └───────────────────────┘ └──────────────┘ │ └────────────────────────────┘ ▼ - Evidence hub, pricing, knowledge graph, feedback, docs + Graph + risk engine, evidence hub, pricing, knowledge graph, feedback, docs ``` ### Component interaction diagram @@ -192,30 +192,28 @@ The UML block highlights the primary classes composing FixOps: configuration loa ``` FixOps Platform ├── Ingestion -│ ├── FastAPI endpoints -│ └── CLI parity +│ ├── FastAPI endpoints (`backend/api/*`) +│ └── CLI parity (`cli/fixops-*`) +├── Normalisation & analytics +│ ├── SBOM normaliser (`lib4sbom/normalizer.py`) +│ ├── Risk scoring + feeds (`risk/scoring.py`, `risk/feeds/*`) +│ └── Provenance graph (`services/graph/graph.py`) ├── Contextual intelligence -│ ├── Context engine -│ ├── Guardrails -│ └── SSDLC + IaC posture -├── Automation -│ ├── Policy automation -│ ├── Compliance packs -│ └── Evidence hub -├── Analytics -│ ├── ROI telemetry -│ ├── Tenant lifecycle -│ └── Performance simulation -├── AI & probabilistic -│ ├── AI agent advisor -│ └── Probabilistic forecasts -├── Multi-LLM & knowledge graph -│ ├── Multi-model consensus + MITRE mapping -│ ├── SentinelGPT explanations -│ └── CTINexus knowledge graph analytics -└── Deployment - ├── FastAPI service - └── CLI tooling +│ ├── Context engine & guardrails (`apps/api/pipeline.py`) +│ ├── Probabilistic (Markov/Bayesian) forecasts (`core/probabilistic/*`) +│ └── Multi-LLM consensus (`fixops-enterprise/src/services/enhanced_decision_engine.py`) +├── Evidence & automation +│ ├── Evidence bundling (`evidence/packager.py`) +│ ├── Cosign signing & provenance (`scripts/signing/*`, `services/provenance`) +│ └── Reproducible build attestations (`services/repro`) +├── Observability & demo +│ ├── Telemetry (`telemetry/*`) +│ ├── Demo stack (`docker-compose.demo.yml`, `config/otel-collector-demo.yaml`) +│ └── UI dashboards (`ui/dashboard`, `ui/graph-view`) +└── Developer experience + ├── Playbooks & docs (`docs/*.md`) + ├── QA automation (`.github/workflows/qa.yml`) + └── Coverage + reports (`reports/coverage/*`) ``` ### Detailed feature list @@ -438,8 +436,17 @@ Refer to `docs/CONFIG_GUIDE.md` for field-level descriptions and overlay extensi ## Documentation map - **Market & positioning** – `market/` contains competitive analysis, pricing strategy, GTM plans, and demo storyboards. - **Architecture & SDLC** – `docs/ARCHITECTURE.md`, `docs/DATA_MODEL.md`, `docs/SDLC_SSDLC.md`, and `docs/INTEGRATIONS.md` outline components, data flows, lifecycle coverage, and integration contracts. -- **Security & audits** – `audit/SECURITY.md`, `audit/GAPS.md`, and `audit/CTEM_ASSESSMENT.md` track mitigations, residual risk, and CTEM readiness. +- **Architecture inventory & roadmap** – `docs/ARCH-INVENTORY.md` summarises modules/services/data models while `docs/TASK-PLAN.md` maps Phases 2–10 with concrete code touchpoints. +- **SBOM normalisation & quality** – `docs/SBOM-QUALITY.md` explains deduplication logic, quality scoring metrics, CLI usage, and HTML reporting expectations. +- **Risk scoring & exposure** – `docs/RISK-SCORING.md` documents EPSS/KEV ingestion, FixOpsRisk weighting, CLI usage, and API endpoints for the risk pipeline. +- **Provenance & signing** – `docs/PROVENANCE.md`, `docs/SIGNING.md`, and `docs/CI-SECRETS.md` cover SLSA attestations, cosign signing flows, required secrets, and verification guidance. +- **Provenance graph intelligence** – `docs/PROVENANCE-GRAPH.md` details graph ingestion sources, query surface, anomaly detection, and API integration. +- **Reproducible builds** – `docs/REPRO-BUILDS.md` explains the hermetic verifier, plan structure, CLI usage, and CI workflow outputs. +- **Evidence bundles & policy** – `docs/EVIDENCE-BUNDLES.md` covers policy-driven packaging, manifest signing, API endpoints, and CLI automation. +- **Demo stack & telemetry** – `docs/DEMO.md` walks through the OpenTelemetry-enabled docker-compose demo and dashboard. - **Runbooks & usage** – `docs/PLATFORM_RUNBOOK.md`, `docs/USAGE_GUIDE.html`, and `docs/PR_SUMMARY.md` provide persona guides, troubleshooting steps, and an executive summary for reviewers. +- **Operational playbooks** – `docs/PLAYBOOK-DEV.md`, `docs/PLAYBOOK-SEC.md`, and `docs/PLAYBOOK-AUDIT.md` equip engineering, security, and audit stakeholders with repeatable workflows. +- **Security posture & audits** – `docs/SECURITY-POSTURE.md`, `audit/SECURITY.md`, `audit/GAPS.md`, and `audit/CTEM_ASSESSMENT.md` capture branch protections, mitigations, residual risk, and CTEM readiness. Whether you launch the API or the CLI, FixOps now delivers overlay-governed context, compliance, automation, and probabilistic insight with auditable artefacts that keep demo and enterprise buyers on the same code path. diff --git a/apps/api/app.py b/apps/api/app.py index 354c2af6c..18e619bdc 100644 --- a/apps/api/app.py +++ b/apps/api/app.py @@ -2,6 +2,7 @@ import csv import io +import importlib.util import logging import os import secrets @@ -27,6 +28,17 @@ from core.feedback import FeedbackRecorder from core.enhanced_decision import EnhancedDecisionEngine +from backend.api.provenance import router as provenance_router +from backend.api.risk import router as risk_router +from backend.api.graph import router as graph_router +from backend.api.evidence import router as evidence_router +from telemetry import configure as configure_telemetry + +if importlib.util.find_spec("opentelemetry.instrumentation.fastapi"): + from opentelemetry.instrumentation.fastapi import FastAPIInstrumentor +else: # pragma: no cover - fallback when instrumentation is unavailable + from telemetry.fastapi_noop import FastAPIInstrumentor + from .normalizers import ( InputNormalizer, NormalizedBusinessContext, @@ -71,7 +83,9 @@ def decode_access_token(token: str) -> Dict[str, Any]: def create_app() -> FastAPI: """Create the FastAPI application with file-upload ingestion endpoints.""" + configure_telemetry(service_name="fixops-api") app = FastAPI(title="FixOps Ingestion Demo API", version="0.1.0") + FastAPIInstrumentor.instrument_app(app) if not hasattr(app, "state"): app.state = SimpleNamespace() origins_env = os.getenv("FIXOPS_ALLOWED_ORIGINS", "") @@ -140,6 +154,22 @@ async def _verify_api_key(api_key: Optional[str] = Depends(api_key_header)) -> N analytics_dir = verify_allowlisted_path(analytics_dir, allowlist) analytics_store = AnalyticsStore(analytics_dir, allowlist=allowlist) + provenance_dir = overlay.data_directories.get("provenance_dir") + if provenance_dir is None: + root = allowlist[0] + root = verify_allowlisted_path(root, allowlist) + provenance_dir = (root / "artifacts" / "attestations" / overlay.mode).resolve() + provenance_dir = verify_allowlisted_path(provenance_dir, allowlist) + provenance_dir = ensure_secure_directory(provenance_dir) + + risk_dir = overlay.data_directories.get("risk_dir") + if risk_dir is None: + root = allowlist[0] + root = verify_allowlisted_path(root, allowlist) + risk_dir = (root / "artifacts").resolve() + risk_dir = verify_allowlisted_path(risk_dir, allowlist) + risk_dir = ensure_secure_directory(risk_dir) + app.state.normalizer = normalizer app.state.orchestrator = orchestrator app.state.artifacts: Dict[str, Any] = {} @@ -155,6 +185,44 @@ async def _verify_api_key(api_key: Optional[str] = Depends(api_key_header)) -> N app.state.enhanced_engine = EnhancedDecisionEngine( overlay.enhanced_decision_settings ) + sbom_dir = overlay.data_directories.get("sbom_dir") + if sbom_dir is None: + root = allowlist[0] + root = verify_allowlisted_path(root, allowlist) + sbom_dir = (root / "artifacts" / "sbom").resolve() + sbom_dir = verify_allowlisted_path(sbom_dir, allowlist) + sbom_dir = ensure_secure_directory(sbom_dir) + + graph_dir = overlay.data_directories.get("graph_dir") + if graph_dir is None: + root = allowlist[0] + root = verify_allowlisted_path(root, allowlist) + graph_dir = (root / "analysis").resolve() + graph_dir = verify_allowlisted_path(graph_dir, allowlist) + graph_dir = ensure_secure_directory(graph_dir) + + evidence_dir = overlay.data_directories.get("evidence_dir") + if evidence_dir is None: + root = allowlist[0] + root = verify_allowlisted_path(root, allowlist) + evidence_dir = (root / "evidence").resolve() + evidence_dir = verify_allowlisted_path(evidence_dir, allowlist) + evidence_dir = ensure_secure_directory(evidence_dir) + evidence_manifest_dir = ensure_secure_directory(evidence_dir / "manifests") + evidence_bundle_dir = ensure_secure_directory(evidence_dir / "bundles") + + app.state.provenance_dir = provenance_dir + app.state.risk_dir = risk_dir + app.state.sbom_dir = sbom_dir + app.state.graph_config = { + "repo_path": Path(".").resolve(), + "attestation_dir": provenance_dir, + "sbom_dir": sbom_dir, + "risk_dir": risk_dir, + "releases_path": graph_dir / "releases.json", + } + app.state.evidence_manifest_dir = evidence_manifest_dir + app.state.evidence_bundle_dir = evidence_bundle_dir uploads_dir = overlay.data_directories.get("uploads_dir") if uploads_dir is None: root = allowlist[0] @@ -164,6 +232,10 @@ async def _verify_api_key(api_key: Optional[str] = Depends(api_key_header)) -> N app.state.upload_manager = upload_manager app.include_router(enhanced_router, dependencies=[Depends(_verify_api_key)]) + app.include_router(provenance_router, dependencies=[Depends(_verify_api_key)]) + app.include_router(risk_router, dependencies=[Depends(_verify_api_key)]) + app.include_router(graph_router, dependencies=[Depends(_verify_api_key)]) + app.include_router(evidence_router, dependencies=[Depends(_verify_api_key)]) _CHUNK_SIZE = 1024 * 1024 _RAW_BYTES_THRESHOLD = 4 * 1024 * 1024 diff --git a/backend/api/evidence/__init__.py b/backend/api/evidence/__init__.py new file mode 100644 index 000000000..d7f16df0b --- /dev/null +++ b/backend/api/evidence/__init__.py @@ -0,0 +1,5 @@ +"""Evidence bundle API router.""" + +from .router import router + +__all__ = ["router"] diff --git a/backend/api/evidence/router.py b/backend/api/evidence/router.py new file mode 100644 index 000000000..fe76cbca3 --- /dev/null +++ b/backend/api/evidence/router.py @@ -0,0 +1,56 @@ +from pathlib import Path +from typing import Any + +from fastapi import APIRouter, HTTPException, Request +import yaml + +router = APIRouter(prefix="/evidence", tags=["evidence"]) + + +def _resolve_directories(request: Request) -> tuple[Path, Path]: + manifest_dir = getattr(request.app.state, "evidence_manifest_dir", None) + bundle_dir = getattr(request.app.state, "evidence_bundle_dir", None) + if manifest_dir is None or bundle_dir is None: + raise HTTPException(status_code=503, detail="Evidence storage not configured") + return Path(manifest_dir), Path(bundle_dir) + + +@router.get("/") +async def list_evidence(request: Request) -> dict[str, Any]: + manifest_dir, bundle_dir = _resolve_directories(request) + releases: list[dict[str, Any]] = [] + for manifest_path in sorted(manifest_dir.glob("*.yaml")): + tag = manifest_path.stem + bundle_path = bundle_dir / f"{tag}.zip" + releases.append( + { + "tag": tag, + "manifest_path": str(manifest_path), + "bundle_available": bundle_path.is_file(), + "bundle_path": str(bundle_path) if bundle_path.is_file() else None, + "updated_at": manifest_path.stat().st_mtime, + } + ) + return {"count": len(releases), "releases": releases} + + +@router.get("/{release}") +async def evidence_manifest(release: str, request: Request) -> dict[str, Any]: + manifest_dir, bundle_dir = _resolve_directories(request) + manifest_path = manifest_dir / f"{release}.yaml" + if not manifest_path.is_file(): + raise HTTPException(status_code=404, detail="Evidence manifest not found") + with manifest_path.open("r", encoding="utf-8") as handle: + payload = yaml.safe_load(handle) or {} + if not isinstance(payload, dict): + raise HTTPException(status_code=500, detail="Malformed evidence manifest") + bundle_path = bundle_dir / f"{release}.zip" + return { + "tag": release, + "manifest": payload, + "bundle_available": bundle_path.is_file(), + "bundle_path": str(bundle_path) if bundle_path.is_file() else None, + } + + +__all__ = ["router"] diff --git a/backend/api/graph/__init__.py b/backend/api/graph/__init__.py new file mode 100644 index 000000000..892d02e8e --- /dev/null +++ b/backend/api/graph/__init__.py @@ -0,0 +1,5 @@ +"""FastAPI router exposing provenance graph queries.""" + +from .router import router + +__all__ = ["router"] diff --git a/backend/api/graph/router.py b/backend/api/graph/router.py new file mode 100644 index 000000000..a529cb848 --- /dev/null +++ b/backend/api/graph/router.py @@ -0,0 +1,95 @@ +from pathlib import Path +from typing import Any + +from fastapi import APIRouter, HTTPException, Query, Request + +from services.graph.graph import GraphSources, build_graph_from_sources + +router = APIRouter(prefix="/graph", tags=["graph"]) + + +def _latest_file(directory: Path, pattern: str, fallback: str) -> Path | None: + if not directory or not directory.exists(): + return None + candidates = sorted(directory.glob(pattern)) + default_path = directory / fallback + if default_path.exists() and default_path not in candidates: + candidates.append(default_path) + if not candidates: + return None + return max(candidates, key=lambda candidate: candidate.stat().st_mtime) + + +def _graph_config(request: Request) -> dict[str, Any]: + config = getattr(request.app.state, "graph_config", None) + if config is None: + raise HTTPException(status_code=503, detail="Graph sources not configured") + return config + + +def _build_sources(config: dict[str, Any]) -> GraphSources: + repo_path = Path(config.get("repo_path", ".")).resolve() + attestation_dir = Path(config.get("attestation_dir", "artifacts/attestations")).resolve() + sbom_dir = Path(config.get("sbom_dir", "artifacts/sbom")).resolve() + risk_dir = Path(config.get("risk_dir", "artifacts")).resolve() + releases_path_value = config.get("releases_path") + releases_path = Path(releases_path_value).resolve() if releases_path_value else None + normalized_sbom = _latest_file(sbom_dir, "normalized*.json", "normalized.json") + risk_report = _latest_file(risk_dir, "risk*.json", "risk.json") + return GraphSources( + repo_path=repo_path, + attestation_dir=attestation_dir, + normalized_sbom=normalized_sbom, + risk_report=risk_report, + releases_path=releases_path, + ) + + +def _build_graph(request: Request): + config = _graph_config(request) + sources = _build_sources(config) + graph = build_graph_from_sources(sources) + return graph, sources + + +@router.get("/") +async def graph_summary(request: Request) -> dict[str, Any]: + graph, sources = _build_graph(request) + try: + return { + "nodes": graph.graph.number_of_nodes(), + "edges": graph.graph.number_of_edges(), + "configured_sources": { + "sbom": bool(sources.normalized_sbom), + "risk": bool(sources.risk_report), + }, + } + finally: + graph.close() + + +@router.get("/lineage/{artifact_name}") +async def artifact_lineage(artifact_name: str, request: Request) -> dict[str, Any]: + graph, _ = _build_graph(request) + try: + return graph.lineage(artifact_name) + finally: + graph.close() + + +@router.get("/kev-components") +async def kev_components(request: Request, last: int = Query(3, ge=1, le=50)) -> list[dict[str, Any]]: + graph, _ = _build_graph(request) + try: + return graph.components_with_kev(last_releases=last) + finally: + graph.close() + + +@router.get("/anomalies") +async def version_anomalies(request: Request) -> list[dict[str, Any]]: + graph, _ = _build_graph(request) + try: + return graph.detect_version_anomalies() + finally: + graph.close() diff --git a/backend/api/provenance/__init__.py b/backend/api/provenance/__init__.py new file mode 100644 index 000000000..b8c186258 --- /dev/null +++ b/backend/api/provenance/__init__.py @@ -0,0 +1,4 @@ +"""Provenance API router exports.""" +from .router import router + +__all__ = ["router"] diff --git a/backend/api/provenance/router.py b/backend/api/provenance/router.py new file mode 100644 index 000000000..4eb6fd8c2 --- /dev/null +++ b/backend/api/provenance/router.py @@ -0,0 +1,38 @@ +"""FastAPI router exposing provenance attestations.""" +from __future__ import annotations + +from pathlib import Path + +from fastapi import APIRouter, HTTPException, Request + +from services.provenance import load_attestation + +router = APIRouter(prefix="/provenance", tags=["provenance"]) + + +def _resolve_directory(request: Request) -> Path: + directory = getattr(request.app.state, "provenance_dir", None) + if directory is None: + raise HTTPException(status_code=503, detail="Provenance storage not configured") + path = Path(directory) + path.mkdir(parents=True, exist_ok=True) + return path + + +@router.get("/", response_model=list[str]) +async def list_attestations(request: Request) -> list[str]: + directory = _resolve_directory(request) + return sorted(path.name for path in directory.glob("*.json")) + + +@router.get("/{artifact_name}") +async def fetch_attestation(artifact_name: str, request: Request) -> dict: + directory = _resolve_directory(request) + safe_name = Path(artifact_name).name + if not safe_name.endswith(".json"): + safe_name = f"{safe_name}.json" + attestation_path = directory / safe_name + if not attestation_path.is_file(): + raise HTTPException(status_code=404, detail="Attestation not found") + statement = load_attestation(attestation_path) + return statement.to_dict() diff --git a/backend/api/risk/__init__.py b/backend/api/risk/__init__.py new file mode 100644 index 000000000..88876e8cd --- /dev/null +++ b/backend/api/risk/__init__.py @@ -0,0 +1,4 @@ +"""Expose the risk router.""" +from .router import router + +__all__ = ["router"] diff --git a/backend/api/risk/router.py b/backend/api/risk/router.py new file mode 100644 index 000000000..fd3c63e94 --- /dev/null +++ b/backend/api/risk/router.py @@ -0,0 +1,81 @@ +"""FastAPI router exposing risk scoring results.""" +from __future__ import annotations + +import json +from pathlib import Path +from typing import Any, Dict, Mapping + +from fastapi import APIRouter, HTTPException, Request + +router = APIRouter(prefix="/risk", tags=["risk"]) + + +def _resolve_directory(request: Request) -> Path: + directory = getattr(request.app.state, "risk_dir", None) + if directory is None: + raise HTTPException(status_code=503, detail="Risk storage not configured") + path = Path(directory) + path.mkdir(parents=True, exist_ok=True) + return path + + +def _load_latest_report(directory: Path) -> Dict[str, Any]: + candidates = sorted(directory.glob("risk*.json")) + default_path = directory / "risk.json" + if default_path.is_file() and default_path not in candidates: + candidates.append(default_path) + if not candidates: + raise HTTPException(status_code=404, detail="No risk reports available") + latest = max(candidates, key=lambda candidate: candidate.stat().st_mtime) + with latest.open("r", encoding="utf-8") as handle: + return json.load(handle) + + +def _component_index(report: Mapping[str, Any]) -> Dict[str, Any]: + index: Dict[str, Any] = {} + for component in report.get("components", []): + if not isinstance(component, dict): + continue + slug = component.get("slug") + if isinstance(slug, str) and slug: + index[slug.lower()] = component + return index + + +@router.get("/") +async def risk_summary(request: Request) -> Dict[str, Any]: + directory = _resolve_directory(request) + report = _load_latest_report(directory) + return { + "generated_at": report.get("generated_at"), + "summary": report.get("summary", {}), + "available_components": len(report.get("components", [])), + "available_cves": len(report.get("cves", {})), + } + + +@router.get("/component/{component_slug}") +async def component_risk(component_slug: str, request: Request) -> Dict[str, Any]: + directory = _resolve_directory(request) + report = _load_latest_report(directory) + index = _component_index(report) + component = index.get(component_slug.lower()) + if component is None: + raise HTTPException(status_code=404, detail="Component not found in risk report") + return component + + +@router.get("/cve/{cve_id}") +async def cve_risk(cve_id: str, request: Request) -> Dict[str, Any]: + directory = _resolve_directory(request) + report = _load_latest_report(directory) + cves = report.get("cves", {}) + if not isinstance(cves, dict): + raise HTTPException(status_code=404, detail="No CVE index available") + entry = cves.get(cve_id.upper()) + if entry is None: + raise HTTPException(status_code=404, detail="CVE not present in risk report") + return entry + + +__all__ = ["router"] diff --git a/build/plan.yaml b/build/plan.yaml new file mode 100644 index 000000000..608a7f878 --- /dev/null +++ b/build/plan.yaml @@ -0,0 +1,13 @@ +version: 1 +artifact: dist/fixops-{tag}.tar.gz +sources: + - path: . + destination: source +steps: + - run: | + mkdir -p dist + tar -czf dist/fixops-{tag}.tar.gz -C source . +reference_attestation: artifacts/attestations/fixops-{tag}.json +environment: + LC_ALL: C.UTF-8 + LANG: C.UTF-8 diff --git a/cli/__init__.py b/cli/__init__.py new file mode 100644 index 000000000..3c85e9585 --- /dev/null +++ b/cli/__init__.py @@ -0,0 +1 @@ +"""Command-line entry points for FixOps tooling.""" diff --git a/cli/fixops-ci b/cli/fixops-ci new file mode 100755 index 000000000..548826891 --- /dev/null +++ b/cli/fixops-ci @@ -0,0 +1,13 @@ +#!/usr/bin/env python3 +"""Entry point for the fixops-ci orchestration CLI.""" +from pathlib import Path +import sys + +ROOT = Path(__file__).resolve().parent.parent +if str(ROOT) not in sys.path: + sys.path.insert(0, str(ROOT)) + +from cli.fixops_ci import main + +if __name__ == "__main__": # pragma: no cover - script entry + raise SystemExit(main()) diff --git a/cli/fixops-provenance b/cli/fixops-provenance new file mode 100755 index 000000000..65ce0dba8 --- /dev/null +++ b/cli/fixops-provenance @@ -0,0 +1,13 @@ +#!/usr/bin/env python3 +"""Entry point script for fixops-provenance CLI.""" +from pathlib import Path +import sys + +ROOT = Path(__file__).resolve().parent.parent +if str(ROOT) not in sys.path: + sys.path.insert(0, str(ROOT)) + +from cli.fixops_provenance import main + +if __name__ == "__main__": # pragma: no cover - script entry + raise SystemExit(main()) diff --git a/cli/fixops-repro b/cli/fixops-repro new file mode 100755 index 000000000..80bdd272f --- /dev/null +++ b/cli/fixops-repro @@ -0,0 +1,13 @@ +#!/usr/bin/env python3 +"""Entry point script for fixops-repro CLI.""" +from pathlib import Path +import sys + +ROOT = Path(__file__).resolve().parent.parent +if str(ROOT) not in sys.path: + sys.path.insert(0, str(ROOT)) + +from cli.fixops_repro import main + +if __name__ == "__main__": # pragma: no cover - script entry + raise SystemExit(main()) diff --git a/cli/fixops-risk b/cli/fixops-risk new file mode 100755 index 000000000..2b27c836d --- /dev/null +++ b/cli/fixops-risk @@ -0,0 +1,13 @@ +#!/usr/bin/env python3 +"""Entry point script for fixops-risk CLI.""" +from pathlib import Path +import sys + +ROOT = Path(__file__).resolve().parent.parent +if str(ROOT) not in sys.path: + sys.path.insert(0, str(ROOT)) + +from cli.fixops_risk import main + +if __name__ == "__main__": # pragma: no cover - script entry + raise SystemExit(main()) diff --git a/cli/fixops-sbom b/cli/fixops-sbom new file mode 100755 index 000000000..aef34e3aa --- /dev/null +++ b/cli/fixops-sbom @@ -0,0 +1,13 @@ +#!/usr/bin/env python3 +"""Entry point script for fixops-sbom CLI.""" +from pathlib import Path +import sys + +ROOT = Path(__file__).resolve().parent.parent +if str(ROOT) not in sys.path: + sys.path.insert(0, str(ROOT)) + +from cli.fixops_sbom import main + +if __name__ == "__main__": # pragma: no cover - script entry + raise SystemExit(main()) diff --git a/cli/fixops_ci.py b/cli/fixops_ci.py new file mode 100644 index 000000000..fc9332bc3 --- /dev/null +++ b/cli/fixops_ci.py @@ -0,0 +1,181 @@ +from __future__ import annotations + +import argparse +import subprocess +import sys +from pathlib import Path +from typing import Iterable, List + +from cli.fixops_provenance import main as provenance_main +from cli.fixops_repro import main as repro_main +from cli.fixops_risk import main as risk_main +from cli.fixops_sbom import main as sbom_main +from evidence.packager import BundleInputs, create_bundle + +DEFAULT_NORMALIZED = Path("artifacts/sbom/normalized.json") +DEFAULT_QUALITY_JSON = Path("analysis/sbom_quality_report.json") +DEFAULT_QUALITY_HTML = Path("reports/sbom_quality_report.html") +DEFAULT_RISK = Path("artifacts/risk.json") +DEFAULT_PROVENANCE_DIR = Path("artifacts/attestations") +DEFAULT_REPRO_DIR = Path("artifacts/repro/attestations") +DEFAULT_POLICY = Path("config/policy.yml") +DEFAULT_EVIDENCE_OUT = Path("evidence") + + +def build_parser() -> argparse.ArgumentParser: + parser = argparse.ArgumentParser( + prog="fixops-ci", + description="One-stop CLI for FixOps secure supply chain workflows.", + ) + subparsers = parser.add_subparsers(dest="group", required=True) + + sbom_parser = subparsers.add_parser( + "sbom", + help="Proxy to fixops-sbom for normalization and quality reporting.", + ) + sbom_parser.add_argument("args", nargs=argparse.REMAINDER) + + risk_parser = subparsers.add_parser( + "risk", + help="Proxy to fixops-risk for composite scoring.", + ) + risk_parser.add_argument("args", nargs=argparse.REMAINDER) + + provenance_parser = subparsers.add_parser( + "provenance", + help="Proxy to fixops-provenance for attest/verify operations.", + ) + provenance_parser.add_argument("args", nargs=argparse.REMAINDER) + + repro_parser = subparsers.add_parser( + "repro", + help="Proxy to fixops-repro for reproducible build verification.", + ) + repro_parser.add_argument("args", nargs=argparse.REMAINDER) + + evidence_parser = subparsers.add_parser( + "evidence", + help="Evidence bundle packaging and policy evaluation.", + ) + evidence_sub = evidence_parser.add_subparsers(dest="action", required=True) + bundle_parser = evidence_sub.add_parser( + "bundle", + help="Assemble and optionally sign an evidence bundle for a release tag.", + ) + bundle_parser.add_argument("--tag", required=True, help="Release tag to package.") + bundle_parser.add_argument( + "--normalized", + default=str(DEFAULT_NORMALIZED), + help="Path to normalized SBOM JSON.", + ) + bundle_parser.add_argument( + "--quality-json", + default=str(DEFAULT_QUALITY_JSON), + help="Path to SBOM quality JSON report.", + ) + bundle_parser.add_argument( + "--quality-html", + default=str(DEFAULT_QUALITY_HTML), + help="Path to SBOM quality HTML report (optional).", + ) + bundle_parser.add_argument( + "--risk", + default=str(DEFAULT_RISK), + help="Path to risk scoring JSON report.", + ) + bundle_parser.add_argument( + "--provenance-dir", + default=str(DEFAULT_PROVENANCE_DIR), + help="Directory containing provenance attestations.", + ) + bundle_parser.add_argument( + "--repro-dir", + default=str(DEFAULT_REPRO_DIR), + help="Directory containing reproducibility attestations.", + ) + bundle_parser.add_argument( + "--policy", + default=str(DEFAULT_POLICY), + help="Policy definition YAML for pass/warn/fail thresholds.", + ) + bundle_parser.add_argument( + "--out", + default=str(DEFAULT_EVIDENCE_OUT), + help="Root directory for evidence bundles and manifests.", + ) + bundle_parser.add_argument( + "--extra", + action="append", + default=[], + help="Additional files or directories to include (repeatable).", + ) + bundle_parser.add_argument( + "--sign-key", + default=None, + help="Optional cosign private key used to sign MANIFEST.yaml.", + ) + + return parser + + +def _proxy(callback, arguments: List[str]) -> int: + if not arguments: + return callback(["--help"]) + return callback(arguments) + + +def _handle_bundle(args: argparse.Namespace) -> int: + tag = args.tag + repro_attestation = Path(args.repro_dir) / f"{tag}.json" + inputs = BundleInputs( + tag=tag, + normalized_sbom=Path(args.normalized), + sbom_quality_json=Path(args.quality_json), + sbom_quality_html=Path(args.quality_html) if args.quality_html else None, + risk_report=Path(args.risk), + provenance_dir=Path(args.provenance_dir), + repro_attestation=repro_attestation, + policy_path=Path(args.policy) if args.policy else None, + output_dir=Path(args.out), + extra_paths=[Path(item) for item in args.extra], + sign_key=Path(args.sign_key) if args.sign_key else None, + ) + try: + manifest = create_bundle(inputs) + except FileNotFoundError as exc: + print(str(exc), file=sys.stderr) + return 1 + except subprocess.CalledProcessError as exc: + print(f"cosign signing failed: {exc}", file=sys.stderr) + return 2 + + overall = manifest.get("evaluations", {}).get("overall", "unknown") + bundle_path = manifest.get("bundle_path") + print(f"Evidence bundle created at {bundle_path}") + print(f"Overall policy status: {overall}") + if overall == "fail": + return 4 + return 0 + + +def main(argv: Iterable[str] | None = None) -> int: + parser = build_parser() + args = parser.parse_args(list(argv) if argv is not None else None) + + if args.group == "sbom": + return _proxy(sbom_main, list(args.args)) + if args.group == "risk": + return _proxy(risk_main, list(args.args)) + if args.group == "provenance": + return _proxy(provenance_main, list(args.args)) + if args.group == "repro": + return _proxy(repro_main, list(args.args)) + if args.group == "evidence" and args.action == "bundle": + return _handle_bundle(args) + + parser.error("Unknown command") + return 2 + + +if __name__ == "__main__": # pragma: no cover - CLI entrypoint + raise SystemExit(main()) diff --git a/cli/fixops_provenance.py b/cli/fixops_provenance.py new file mode 100644 index 000000000..4e1c180dd --- /dev/null +++ b/cli/fixops_provenance.py @@ -0,0 +1,156 @@ +"""CLI helpers for generating and verifying FixOps provenance.""" +from __future__ import annotations + +import argparse +import json +import os +import sys +from typing import Any, Iterable, Mapping + +from services.provenance import ( + ProvenanceVerificationError, + generate_attestation, + verify_attestation, + write_attestation, +) + +DEFAULT_BUILDER_ID = os.getenv("FIXOPS_BUILDER_ID", "urn:fixops:builder:local") +DEFAULT_SOURCE_URI = os.getenv( + "FIXOPS_SOURCE_URI", "https://github.com/DevOpsMadDog/Fixops" +) +DEFAULT_BUILD_TYPE = os.getenv( + "FIXOPS_BUILD_TYPE", "https://github.com/actions/run" +) + + +def _parse_json(value: str, *, description: str) -> Mapping[str, Any]: + try: + parsed = json.loads(value) + except json.JSONDecodeError as exc: + raise argparse.ArgumentTypeError( + f"Invalid JSON for {description}: {exc.msg}" + ) from exc + if not isinstance(parsed, Mapping): + raise argparse.ArgumentTypeError( + f"Expected {description} JSON object, received {type(parsed).__name__}" + ) + return parsed + + +def build_parser() -> argparse.ArgumentParser: + parser = argparse.ArgumentParser( + prog="fixops-provenance", + description="Generate and verify SLSA v1 provenance attestations.", + ) + subparsers = parser.add_subparsers(dest="command", required=True) + + attest_parser = subparsers.add_parser( + "attest", help="Generate a provenance attestation for an artefact" + ) + attest_parser.add_argument("--artifact", required=True, help="Path to artefact file") + attest_parser.add_argument( + "--out", required=True, help="Destination path for the generated attestation" + ) + attest_parser.add_argument( + "--builder-id", + default=DEFAULT_BUILDER_ID, + help="Builder identifier recorded in the attestation", + ) + attest_parser.add_argument( + "--source-uri", + default=DEFAULT_SOURCE_URI, + help="Source repository URI recorded in the attestation", + ) + attest_parser.add_argument( + "--build-type", + default=DEFAULT_BUILD_TYPE, + help="Build type URI for the attestation", + ) + attest_parser.add_argument( + "--metadata", + help="Optional JSON metadata object to merge into the attestation", + type=lambda value: _parse_json(value, description="metadata"), + ) + attest_parser.add_argument( + "--material", + action="append", + type=lambda value: _parse_json(value, description="material"), + help=( + "Optional JSON material descriptor (repeatable). Each must include a 'uri' " + "and may provide a 'digest' mapping." + ), + ) + + verify_parser = subparsers.add_parser( + "verify", help="Verify a provenance attestation against an artefact" + ) + verify_parser.add_argument( + "--artifact", required=True, help="Path to artefact file to verify" + ) + verify_parser.add_argument( + "--attestation", + required=True, + help="Path to the attestation JSON file to verify", + ) + verify_parser.add_argument( + "--builder-id", + help="Expected builder identifier; checked if provided", + ) + verify_parser.add_argument( + "--source-uri", + help="Expected source URI; checked if provided", + ) + verify_parser.add_argument( + "--build-type", + help="Expected build type URI; checked if provided", + ) + + return parser + + +def _handle_attest(args: argparse.Namespace) -> int: + materials = args.material if args.material else None + attestation = generate_attestation( + args.artifact, + builder_id=args.builder_id, + source_uri=args.source_uri, + build_type=args.build_type, + materials=materials, + metadata=args.metadata, + ) + destination = write_attestation(attestation, args.out) + print(f"Wrote attestation to {destination}") + return 0 + + +def _handle_verify(args: argparse.Namespace) -> int: + try: + verify_attestation( + args.attestation, + artefact_path=args.artifact, + builder_id=args.builder_id, + source_uri=args.source_uri, + build_type=args.build_type, + ) + except (FileNotFoundError, ProvenanceVerificationError) as exc: + print(f"Verification failed: {exc}", file=sys.stderr) + return 1 + print("Verification succeeded") + return 0 + + +def main(argv: Iterable[str] | None = None) -> int: + parser = build_parser() + args = parser.parse_args(list(argv) if argv is not None else None) + + if args.command == "attest": + return _handle_attest(args) + if args.command == "verify": + return _handle_verify(args) + + parser.error("No command specified") + return 2 # pragma: no cover - argparse will raise before + + +if __name__ == "__main__": # pragma: no cover - CLI entrypoint + raise SystemExit(main()) diff --git a/cli/fixops_repro.py b/cli/fixops_repro.py new file mode 100644 index 000000000..04dbe848c --- /dev/null +++ b/cli/fixops_repro.py @@ -0,0 +1,81 @@ +from __future__ import annotations + +import argparse +import sys +from pathlib import Path +from typing import Iterable + +from services.repro.verifier import run_verification + +DEFAULT_PLAN = Path("build/plan.yaml") +DEFAULT_OUTPUT = Path("artifacts/repro/attestations") + + +def build_parser() -> argparse.ArgumentParser: + parser = argparse.ArgumentParser( + prog="fixops-repro", + description="Verify reproducible builds against reference digests or attestations.", + ) + subparsers = parser.add_subparsers(dest="command", required=True) + + verify_parser = subparsers.add_parser( + "verify", + help="Execute a reproducible build plan and emit an attestation report.", + ) + verify_parser.add_argument( + "--tag", + required=True, + help="Release tag or identifier to verify.", + ) + verify_parser.add_argument( + "--plan", + default=str(DEFAULT_PLAN), + help="Path to the build plan YAML (defaults to build/plan.yaml).", + ) + verify_parser.add_argument( + "--out", + dest="output", + default=str(DEFAULT_OUTPUT), + help="Directory to store reproducibility attestations.", + ) + verify_parser.add_argument( + "--repo", + dest="repo", + default=".", + help="Repository root containing sources and reference artefacts.", + ) + + return parser + + +def _handle_verify(tag: str, plan: str, output: str, repo: str) -> int: + try: + result = run_verification(plan, tag, output_dir=output, repo_root=repo) + except FileNotFoundError as exc: + print(str(exc), file=sys.stderr) + return 1 + except ValueError as exc: + print(str(exc), file=sys.stderr) + return 2 + + digest = result.generated_digest.get("sha256") + status = "SUCCESS" if result.match else "MISMATCH" + print(f"[{status}] {tag} sha256={digest}") + if result.attestation_path: + print(f"Attestation written to {result.attestation_path}") + return 0 if result.match else 3 + + +def main(argv: Iterable[str] | None = None) -> int: + parser = build_parser() + args = parser.parse_args(list(argv) if argv is not None else None) + + if args.command == "verify": + return _handle_verify(args.tag, args.plan, args.output, args.repo) + + parser.error("Unknown command") + return 2 + + +if __name__ == "__main__": # pragma: no cover - CLI entrypoint + raise SystemExit(main()) diff --git a/cli/fixops_risk.py b/cli/fixops_risk.py new file mode 100644 index 000000000..eb8255cc8 --- /dev/null +++ b/cli/fixops_risk.py @@ -0,0 +1,81 @@ +"""CLI helpers for FixOps risk scoring.""" +from __future__ import annotations + +import argparse +import sys +from pathlib import Path +from typing import Iterable + +from risk.feeds.epss import load_epss_scores +from risk.feeds.kev import load_kev_catalog +from risk.scoring import write_risk_report + +DEFAULT_SBOM_INPUT = Path("artifacts/sbom/normalized.json") +DEFAULT_RISK_OUTPUT = Path("artifacts/risk.json") + + +def build_parser() -> argparse.ArgumentParser: + parser = argparse.ArgumentParser( + prog="fixops-risk", + description="Compute FixOps composite risk scores for SBOM components.", + ) + subparsers = parser.add_subparsers(dest="command", required=True) + + score_parser = subparsers.add_parser( + "score", help="Generate risk scores for components in a normalized SBOM" + ) + score_parser.add_argument( + "--sbom", + dest="sbom", + default=str(DEFAULT_SBOM_INPUT), + help="Path to the normalized SBOM JSON", + ) + score_parser.add_argument( + "--out", + dest="output", + default=str(DEFAULT_RISK_OUTPUT), + help="Destination for the risk report JSON", + ) + score_parser.add_argument( + "--epss", + dest="epss", + default=None, + help="Optional override path for a cached EPSS CSV", + ) + score_parser.add_argument( + "--kev", + dest="kev", + default=None, + help="Optional override path for a cached KEV JSON", + ) + + return parser + + +def _handle_score(sbom: str, output: str, epss: str | None, kev: str | None) -> int: + try: + epss_scores = load_epss_scores(path=epss) if epss else load_epss_scores() + kev_catalog = load_kev_catalog(path=kev) if kev else load_kev_catalog() + except FileNotFoundError as exc: + print(str(exc), file=sys.stderr) + return 1 + + report = write_risk_report(sbom, output, epss_scores, kev_catalog) + component_count = len(report.get("components", [])) + print(f"Wrote risk profile for {component_count} components to {output}") + return 0 + + +def main(argv: Iterable[str] | None = None) -> int: + parser = build_parser() + args = parser.parse_args(list(argv) if argv is not None else None) + + if args.command == "score": + return _handle_score(args.sbom, args.output, args.epss, args.kev) + + parser.error("Unknown command") + return 2 + + +if __name__ == "__main__": # pragma: no cover - CLI entrypoint + raise SystemExit(main()) diff --git a/cli/fixops_sbom.py b/cli/fixops_sbom.py new file mode 100644 index 000000000..f95767d1c --- /dev/null +++ b/cli/fixops_sbom.py @@ -0,0 +1,94 @@ +"""CLI helpers for SBOM normalization and quality reporting.""" +from __future__ import annotations + +import argparse +import json +from pathlib import Path +from typing import Iterable + +from lib4sbom.normalizer import build_and_write_quality_outputs, write_normalized_sbom + +DEFAULT_NORMALIZED_OUTPUT = Path("artifacts/sbom/normalized.json") +DEFAULT_JSON_REPORT = Path("analysis/sbom_quality_report.json") +DEFAULT_HTML_REPORT = Path("reports/sbom_quality_report.html") + + +def build_parser() -> argparse.ArgumentParser: + parser = argparse.ArgumentParser( + prog="fixops-sbom", + description="Normalize SBOM inputs and calculate quality metrics.", + ) + subparsers = parser.add_subparsers(dest="command", required=True) + + normalize_parser = subparsers.add_parser( + "normalize", help="Normalize SBOM files into a single canonical document" + ) + normalize_parser.add_argument( + "--in", + dest="inputs", + nargs="+", + required=True, + help="Input SBOM file paths (CycloneDX or SPDX JSON)", + ) + normalize_parser.add_argument( + "--out", + dest="output", + default=str(DEFAULT_NORMALIZED_OUTPUT), + help="Destination for the normalized SBOM JSON", + ) + + quality_parser = subparsers.add_parser( + "quality", help="Generate SBOM quality metrics and HTML report" + ) + quality_parser.add_argument( + "--in", + dest="normalized", + required=True, + help="Path to a normalized SBOM JSON file", + ) + quality_parser.add_argument( + "--html", + dest="html", + default=str(DEFAULT_HTML_REPORT), + help="Destination for the rendered HTML report", + ) + quality_parser.add_argument( + "--json", + dest="json_path", + default=str(DEFAULT_JSON_REPORT), + help="Destination for the JSON quality report", + ) + + return parser + + +def _handle_normalize(inputs: Iterable[str], output: str) -> int: + normalized = write_normalized_sbom(inputs, output) + print(f"Normalized {len(normalized.get('components', []))} components to {output}") + return 0 + + +def _handle_quality(normalized_path: str, html_path: str, json_path: str) -> int: + path = Path(normalized_path) + with path.open("r", encoding="utf-8") as handle: + normalized = json.load(handle) + build_and_write_quality_outputs(normalized, json_path, html_path) + print(f"Wrote quality report to {json_path} and HTML to {html_path}") + return 0 + + +def main(argv: Iterable[str] | None = None) -> int: + parser = build_parser() + args = parser.parse_args(list(argv) if argv is not None else None) + + if args.command == "normalize": + return _handle_normalize(args.inputs, args.output) + if args.command == "quality": + return _handle_quality(args.normalized, args.html, args.json_path) + + parser.error("Unknown command") + return 2 + + +if __name__ == "__main__": # pragma: no cover - CLI entrypoint + raise SystemExit(main()) diff --git a/config/otel-collector-demo.yaml b/config/otel-collector-demo.yaml new file mode 100644 index 000000000..4141efd70 --- /dev/null +++ b/config/otel-collector-demo.yaml @@ -0,0 +1,18 @@ +receivers: + otlp: + protocols: + http: + endpoint: 0.0.0.0:4318 + +exporters: + logging: + loglevel: info + +service: + pipelines: + traces: + receivers: [otlp] + exporters: [logging] + metrics: + receivers: [otlp] + exporters: [logging] diff --git a/config/policy.yml b/config/policy.yml new file mode 100644 index 000000000..b201801b8 --- /dev/null +++ b/config/policy.yml @@ -0,0 +1,15 @@ +risk: + max_risk_score: + warn_above: 70 + fail_above: 85 +sbom_quality: + coverage_percent: + warn_below: 80 + fail_below: 60 + license_coverage_percent: + warn_below: 75 + fail_below: 50 +repro: + require_match: true +provenance: + require_attestations: true diff --git a/docker-compose.demo.yml b/docker-compose.demo.yml new file mode 100644 index 000000000..dfe268e56 --- /dev/null +++ b/docker-compose.demo.yml @@ -0,0 +1,46 @@ +version: '3.9' +services: + collector: + image: otel/opentelemetry-collector:0.94.0 + command: ["--config=/etc/otelcol/config.yaml"] + volumes: + - ./config/otel-collector-demo.yaml:/etc/otelcol/config.yaml:ro + ports: + - "4318:4318" + + api: + image: python:3.11-slim + working_dir: /workspace + volumes: + - .:/workspace:Z + environment: + OTEL_EXPORTER_OTLP_ENDPOINT: http://collector:4318 + FIXOPS_ALLOWED_ORIGINS: "*" + command: >- + bash -c "pip install -r requirements.txt && uvicorn apps.api.app:create_app --factory --host 0.0.0.0 --port 8000" + ports: + - "8000:8000" + depends_on: + - collector + + graph: + image: python:3.11-slim + working_dir: /workspace + volumes: + - .:/workspace:Z + environment: + OTEL_EXPORTER_OTLP_ENDPOINT: http://collector:4318 + FIXOPS_GRAPH_INTERVAL: 300 + command: >- + bash -c "pip install -r requirements.txt && python scripts/graph_worker.py" + depends_on: + - collector + + dashboard: + image: nginx:alpine + volumes: + - ./ui/dashboard:/usr/share/nginx/html:ro + ports: + - "8080:80" + depends_on: + - api diff --git a/docs/ARCH-INVENTORY.md b/docs/ARCH-INVENTORY.md new file mode 100644 index 000000000..3ae44b187 --- /dev/null +++ b/docs/ARCH-INVENTORY.md @@ -0,0 +1,83 @@ +# Architecture Inventory + +This document captures the current FixOps repository layout, runtime entrypoints, data +models, automation workflows, and key dependencies. It is derived from a manual crawl of +the entire codebase, including API/CLI surfaces, enterprise services, docs, data, and CI +configuration. + +## Runtime entrypoints & services + +| Surface | Entrypoint | Purpose | Dependencies | +| --- | --- | --- | --- | +| CLI (demo + enterprise parity) | `core/cli.py` (`main()` via `fixops/__main__.py`) | Loads overlay configuration, normalises local artefacts with `apps.api.normalizers.InputNormalizer`, and executes the pipeline via `PipelineOrchestrator`, `StageRunner`, `ProcessingLayer`, and `EvidenceHub`. | Core modules (`core.configuration`, `core.overlay_runtime`, `core.storage`, `core.probabilistic`, `core.stage_runner`), enterprise services for run IDs/signing (`fixops-enterprise/src/services`). | +| FastAPI ingestion API | `apps/api/app.py:create_app` (re-exported by `backend/app.py`) | Exposes `/inputs/*` upload endpoints, `/pipeline/run`, feedback capture, chunked uploads, and enhanced decision APIs. Configures CORS, JWT/token auth, overlay-controlled data directories, analytics store, and enhanced decision engine. | `fastapi`, `core.analytics`, `core.storage.ArtefactArchive`, `core.enhanced_decision.EnhancedDecisionEngine`, `apps/api/pipeline.PipelineOrchestrator`, `apps/api/upload_manager.ChunkUploadManager`. | +| Enterprise API gateway | `fixops-enterprise/src/main.py:create_app` | Adds enterprise middleware (security headers, rate limiting, performance telemetry), schedules feed refresh jobs, and mounts `/api/v1` routes for evidence, marketplace, CI integrations, and enhanced decision telemetry. | `fastapi`, `structlog`, enterprise settings (`fixops-enterprise/src/config/settings.py`), routers (`fixops-enterprise/src/api/v1/*`), services (`fixops-enterprise/src/services/*`). | +| Enterprise services | `fixops-enterprise/src/services/*` | Hardened implementations for compliance rollups, evidence signing, marketplace recommendations, CI adapters, and enhanced decision orchestration. | `core.configuration`, `core.enhanced_decision`, `structlog`, signing helpers, compliance/marketplace services. | +| Knowledge graph augmentation | `new_apps/api/processing/knowledge_graph.py` | Optional pipeline enrichment that maps entities/relationships for enhanced decisioning. | `networkx`, contextual payloads from pipeline outputs. | +| Integrations toolkit | `integrations/github/adapter.py`, `integrations/jenkins/adapter.py`, `integrations/sonarqube/adapter.py` | Transform CI/SCM payloads into FixOps decision submissions and return verdicts/evidence manifests. | Enterprise decision engine (`fixops-enterprise/src/services/decision_engine.py`), `structlog`. | + +## Core module breakdown + +- **Configuration & overlays** + - `core/configuration.py`, `core/overlay_runtime.py` load `config/fixops.overlay.yml`, merge profile overrides, and expose `OverlayConfig` with directory allowlists, auth tokens, feature toggles, enhanced decision settings, and runtime metadata. Data directories such as uploads, archive, analytics, and evidence are created via `core.paths.ensure_secure_directory`.【F:core/overlay_runtime.py†L1-L200】【F:config/fixops.overlay.yml†L1-L120】 + - Overlay metadata drives `core.paths.verify_allowlisted_path` and `core.storage.ArtefactArchive` to enforce secure persistence for evidence bundles and analytics exports.【F:core/paths.py†L1-L200】【F:core/storage.py†L1-L200】 + +- **Pipeline orchestration** + - `apps/api/pipeline.py.PipelineOrchestrator` correlates design rows, SBOM components, SARIF findings, CVE summaries, CNAPP/VEX inputs, and optional business context. It invokes guardrail evaluations, compliance checks, SSPLC scoring, policy automation, AI agent analysis, probabilistic forecasts, exploitability insights, vector-store similarity, and custom overlay modules via `core.modules.execute_custom_modules`. Results are persisted to the archive and analytics store when run through the API.【F:apps/api/pipeline.py†L1-L400】【F:core/modules.py†L1-L160】 + - `core/stage_runner.StageRunner` and `core/demo_runner.run_demo_pipeline` orchestrate sequential stage execution for the CLI, coordinating run IDs (`fixops-enterprise/src/services/run_registry.py`), signing (`fixops-enterprise/src/services/signing.py`), and module enablement toggles.【F:core/stage_runner.py†L1-L200】【F:core/demo_runner.py†L1-L200】 + - Probabilistic forecasting lives in `core/probabilistic.py.ProbabilisticForecastEngine`, providing Dirichlet priors, Markov transitions, and entropy metrics over component severity. Exploit intelligence is computed in `core/exploit_signals.py.ExploitSignalEvaluator`. Vector similarity and AI agent analysis are handled by `core/vector_store.py` and `core/ai_agents.py` respectively.【F:core/probabilistic.py†L1-L200】【F:core/exploit_signals.py†L1-L200】【F:core/vector_store.py†L1-L160】【F:core/ai_agents.py†L1-L200】 + +- **Context, compliance, and policy** + - `core/context_engine.py`, `core/compliance.py`, `core/policy.py`, `core/evidence.py`, `core/analytics.py`, `core/tenancy.py`, and `core/performance.py` enrich pipeline results with business context, compliance coverage, automation manifests, evidence bundles, telemetry, and ROI metrics. `core/design_context_injector.py` transforms design CSVs into SSVC priors using plugins from `ssvc/plugins`.【F:core/context_engine.py†L1-L200】【F:core/compliance.py†L1-L200】【F:core/design_context_injector.py†L1-L200】 + - SSDLC orchestration and IaC posture live in `core/ssdlc.py` and `core/iac.py`, enabling stage-by-stage assessments referenced by the CLI workflow and enterprise decision engine.【F:core/ssdlc.py†L1-L200】【F:core/iac.py†L1-L200】 + +- **Enhanced decision engine** + - `core/enhanced_decision.py.EnhancedDecisionEngine` fuses deterministic guardrails with optional LLM providers, knowledge graph insights, and AI agent signals. Enterprise deployments wrap it via `fixops-enterprise/src/services/enhanced_decision_engine.EnhancedDecisionService` to expose reloadable capabilities, pipeline evaluation, and signal introspection APIs.【F:core/enhanced_decision.py†L1-L200】【F:fixops-enterprise/src/services/enhanced_decision_engine.py†L1-L160】 + +## Data models & persistence + +- Normalised artefact structures (SBOM, SARIF, CVE, VEX, CNAPP, business context) live in + `apps/api/normalizers.py`, `lib4sbom/parser.py`, and `apps/api/upload_manager.py`. These + classes enforce deterministic metadata (component counts, severity breakdowns) used by the + pipeline crosswalk and guardrail evaluations.【F:apps/api/normalizers.py†L1-L320】【F:lib4sbom/parser.py†L1-L160】【F:apps/api/upload_manager.py†L1-L200】 +- Evidence manifests are stored by `core/evidence.EvidenceHub` and the enterprise + `fixops-enterprise/src/services/evidence.EvidenceStore`, which adds signing via + `fixops-enterprise/src/services/signing.py`. Archives and analytics payloads are persisted to + overlay-governed directories under `data/` and `artefacts/` using `core/storage.ArtefactArchive` + and `core/analytics.AnalyticsStore`. Data fixtures and feed snapshots live in `data/feeds/`, + `simulations/`, and `artefacts/` for deterministic demos.【F:core/evidence.py†L1-L200】【F:fixops-enterprise/src/services/evidence.py†L1-L200】【F:data/FOLDER_README.md†L1-L80】【F:simulations/ssdlc/run.py†L1-L200】 +- Knowledge graph and marketplace metadata are stored in `new_apps/api/processing/knowledge_graph.py` + and `marketplace/docs/*.md`, powering enhanced decision explanations and ROI storytelling.【F:new_apps/api/processing/knowledge_graph.py†L1-L200】【F:marketplace/docs/FOLDER_README.md†L1-L80】 + +## Supporting infrastructure & tooling + +- **Configuration & settings**: `config/fixops.overlay.yml` defines demo/enterprise modes, integration endpoints, policy toggles, AI agent settings, exploit signals, probabilistic priors, SSDLC requirements, and automation directories. Enterprise settings (`fixops-enterprise/src/config/settings.py`) expose environment-driven controls for production deployments.【F:config/fixops.overlay.yml†L1-L120】【F:fixops-enterprise/src/config/settings.py†L1-L200】 +- **Scripts & automation**: `scripts/` hosts bootstrap, demo runners, stage workflows, inventory generators, and analysis tooling. Historical workflows and experiments live under `WIP/` and `analysis/`. Auto-generated repo indices sit in `index/`.【F:scripts/run_demo_steps.py†L1-L200】【F:analysis/TRACEABILITY.csv†L1-L1】【F:index/FOLDER_README.md†L1-L80】 +- **Docs & reports**: `docs/` collects architecture, data model, configuration, runbooks, and strategy references. `audit/`, `reports/`, `reviews/`, and `analysis/` provide assessments, deep dives, and traceability for governance stakeholders.【F:docs/ARCHITECTURE.md†L1-L80】【F:audit/SECURITY.md†L1-L80】【F:reports/deep_review/gaps.json†L1-L80】 +- **Fixtures & simulations**: `fixtures/`, `simulations/`, `artefacts/`, and `data/` offer deterministic datasets for CLI, API, and CI workflows. `simulations/ssdlc/run.py` can regenerate lifecycle artefacts via the Makefile `sim` target.【F:simulations/ssdlc/run.py†L1-L200】【F:Makefile†L1-L120】 + +## Workflows & quality gates + +- GitHub Actions workflows: + - `.github/workflows/ci.yml` enforces formatting (black/isort), lint (flake8), pytest coverage, and optional demo runs on selected branches. It also uploads decision artefacts on pushes.【F:.github/workflows/ci.yml†L1-L160】 + - `.github/workflows/fixops-ci.yml` provisions a virtualenv, compiles modules, runs API regression flows with curl uploads, executes CLI enterprise runs, operations simulations, and pytest. Secrets provide API tokens and signing keys for integration validation.【F:.github/workflows/fixops-ci.yml†L1-L160】 +- Local automation uses the `Makefile` to bootstrap environments, run formatting/lint/test suites, execute demos, regenerate inventories, and clean artefacts. Pytest is configured via `pytest.ini` to target enterprise services, integrations, and simulations with coverage gates. Type checking is configured in `mypy.ini`.【F:Makefile†L1-L160】【F:pytest.ini†L1-L80】【F:mypy.ini†L1-L40】 + +## Dependencies + +- **Core requirements** (`requirements.txt`): `fastapi`, `pydantic`, `PyJWT`, `cryptography`, `structlog`, `pgmpy`, `networkx`, `PyYAML`, and `requests` underpin API routing, configuration, probabilistic modelling, and graph analytics.【F:requirements.txt†L1-L11】 +- **Developer tooling** (`requirements.dev.txt`): `black`, `isort`, `flake8`, `mypy`, `pytest-cov`, and `pre-commit` enforce code quality. Git hooks are configured via `.pre-commit-config.yaml`.【F:requirements.dev.txt†L1-L8】【F:.pre-commit-config.yaml†L1-L160】 +- **Optional extras**: `apps/api/requirements-optional.txt` (Snyk SARIF converter, ChromaDB, sentence-transformers) and `backend/requirements-optional.txt` (lib4sbom) unlock advanced parsing and vector search capabilities when credentials are available.【F:apps/api/requirements-optional.txt†L1-L80】【F:backend/requirements-optional.txt†L1-L20】 +- **Front-end**: The `frontend` symlink targets `/app/fixops-blended-enterprise/frontend` but is currently broken in this workspace; enterprise React assets are expected in downstream deployments. No runtime JavaScript is bundled in the repo snapshot.【F:frontend†L1-L1】 + +## Testing surface + +- Unit and integration tests under `tests/` validate overlay loading, pipeline matching, evidence export, rate limiting, CLI commands, knowledge graph processing, and enterprise enhanced API routes. Supplemental smoke tests (`backend_test.py`, `test_frontend.py`, `real_components_test.py`) provide targeted checks outside the `tests/` package.【F:tests/test_pipeline_matching.py†L1-L200】【F:backend_test.py†L1-L80】【F:real_components_test.py†L1-L200】 +- Fixtures for tests reside in `fixtures/` and `simulations/demo_pack/`, while SQLite sample data lives in `fixops_enterprise.db` for enterprise features.【F:fixtures/FOLDER_README.md†L1-L80】【F:fixops_enterprise.db†L1-L1】 + +## Observations & gaps + +- The enterprise React frontend is not present locally (symlink target missing), so front-end changes must be coordinated with the downstream repository. +- Scripts in `WIP/` and legacy enterprise code exist but are not wired into the active pipelines; treat them as references when planning modernization. +- Secrets are expected to be provided via environment variables or GitHub Actions secrets; no secrets are stored in the repo. + diff --git a/docs/CI-SECRETS.md b/docs/CI-SECRETS.md new file mode 100644 index 000000000..2e8aaff8b --- /dev/null +++ b/docs/CI-SECRETS.md @@ -0,0 +1,13 @@ +# CI Secrets + +The release signing workflow relies on the following GitHub Actions secrets. None of the key +material should be committed to the repository. + +| Secret name | Description | +| ----------- | ----------- | +| `COSIGN_PRIVATE_KEY` | PEM-encoded private key used by `cosign sign-blob` and `cosign attest-blob` to sign release outputs. Provide the full key content as the secret value. | +| `COSIGN_PASSWORD` | Password protecting the Cosign private key (leave empty only if the key is generated without a password). | +| `COSIGN_PUBLIC_KEY` | PEM-encoded public key paired with the private signing key. Stored as a secret so the workflow can verify signatures before uploading assets. | + +Publish the public key outside of CI (for example in documentation or a dedicated release +page) so consumers can verify signatures locally without needing repository access. diff --git a/docs/DEMO.md b/docs/DEMO.md new file mode 100644 index 000000000..b7fdcd3cd --- /dev/null +++ b/docs/DEMO.md @@ -0,0 +1,51 @@ +# Investor / CISO Demo Stack + +Phase 9 adds end-to-end observability and a one-command demo environment for showcasing FixOps capabilities. + +## Components + +- **OpenTelemetry collector** (`otel/opentelemetry-collector`) receives traces/metrics from the API, graph worker, and CLI flows. +- **API** (`uvicorn apps.api.app:create_app`) with FastAPI instrumentation and trace spans emitted for provenance, risk, graph, and evidence routes. +- **Graph worker** (`scripts/graph_worker.py`) periodically rebuilds the provenance graph and publishes spans. +- **Dashboard** (`ui/dashboard/`) served via Nginx with live fetches against the API to render SBOM quality, risk posture, reproducibility, and evidence bundle status. + +## Quick start + +```bash +docker compose -f docker-compose.demo.yml up --build +``` + +Once the stack is healthy: + +- API available at `http://localhost:8000/docs` +- Dashboard available at `http://localhost:8080` +- OTLP collector emits logs to the compose console (configured via `config/otel-collector-demo.yaml`) + +The dashboard expects evidence bundles and manifests generated by `cli/fixops-ci evidence bundle`. Run the CLI before launching the demo or mount existing artefacts into the containers. + +## Telemetry + +The following spans/metrics are emitted: + +| Component | Span prefix | Metrics | +| --- | --- | --- | +| Provenance attestation | `provenance.*` | `fixops_provenance_operations` counter | +| Graph service | `graph.*` | `fixops_graph_ingest`, `fixops_graph_queries` counters | +| Risk scoring | `risk.compute_profile` | `fixops_risk_profiles` counter | +| Repro verification | `repro.verify_plan` | `fixops_repro_verifications` counter | +| API server | automatic FastAPI instrumentation | + +All services honour `OTEL_EXPORTER_OTLP_ENDPOINT` and can be disabled via `FIXOPS_DISABLE_TELEMETRY=1`. + +## Dashboard data sources + +- `GET /evidence/` + `GET /evidence/{tag}` provide metrics, evaluations, and bundle metadata. +- `GET /graph/kev-components?last=3` surfaces KEV exposure across recent releases. + +The UI displays SBOM coverage as a gauge, risk summary counts, reproducibility status pills, and tabular KEV/evidence summaries. + +## Next steps + +- Point the collector at a real backend (Grafana Tempo/Prometheus) by editing `config/otel-collector-demo.yaml`. +- Add authentication for the dashboard via Nginx basic auth or OAuth proxies. +- Extend the graph worker to push notifications into Slack or SIEM tools when anomalies appear. diff --git a/docs/EVIDENCE-BUNDLES.md b/docs/EVIDENCE-BUNDLES.md new file mode 100644 index 000000000..67c00e621 --- /dev/null +++ b/docs/EVIDENCE-BUNDLES.md @@ -0,0 +1,84 @@ +# Evidence Bundles & Policy Evaluation + +Phase 8 delivers an end-to-end evidence packaging workflow, combining SBOM artefacts, risk scores, provenance attestations, and reproducibility attestations into a single signed bundle accompanied by a policy-evaluated manifest. + +## Policy configuration + +Policies live in `config/policy.yml` (override via `--policy`). Each rule supports `warn_above`, `warn_below`, `fail_above`, and `fail_below` thresholds: + +```yaml +risk: + max_risk_score: + warn_above: 70 + fail_above: 85 +sbom_quality: + coverage_percent: + warn_below: 80 + fail_below: 60 + license_coverage_percent: + warn_below: 75 + fail_below: 50 +repro: + require_match: true +provenance: + require_attestations: true +``` + +## CLI + +The new `fixops-ci` orchestration CLI proxies existing tooling and adds evidence packaging: + +```bash +# Normalize SBOMs via fixops-sbom +cli/fixops-ci sbom normalize --in sboms/*.json --out artifacts/sbom/normalized.json + +# Produce SBOM quality + HTML reports +cli/fixops-ci sbom quality --in artifacts/sbom/normalized.json --json analysis/sbom_quality_report.json --html reports/sbom_quality_report.html + +# Compute risk scores +cli/fixops-ci risk score --sbom artifacts/sbom/normalized.json --out artifacts/risk.json + +# Generate provenance + verify reproducibility +cli/fixops-ci provenance attest --artifact dist/fixops.tar.gz --out artifacts/attestations/fixops.json ... +cli/fixops-ci repro verify --tag v1.2.3 --plan build/plan.yaml + +# Package evidence bundle +cli/fixops-ci evidence bundle \ + --tag v1.2.3 \ + --normalized artifacts/sbom/normalized.json \ + --quality-json analysis/sbom_quality_report.json \ + --quality-html reports/sbom_quality_report.html \ + --risk artifacts/risk.json \ + --provenance-dir artifacts/attestations \ + --repro-dir artifacts/repro/attestations \ + --policy config/policy.yml \ + --out evidence +``` + +If `--sign-key` is provided (typically populated from a GitHub Actions secret containing a cosign private key), the CLI will execute `cosign sign-blob` to produce `MANIFEST.yaml.sig` inside the bundle. + +## Outputs + +`evidence/` contains two directories: + +- `evidence/bundles/.zip` – zipped artefacts and manifest +- `evidence/manifests/.yaml` – standalone manifest for API consumption + +Each manifest includes: + +- `artefacts` – file paths, original locations, and SHA-256 digests +- `metrics` – SBOM quality metrics, risk summary, reproducibility result, provenance count +- `evaluations` – per-check status and aggregate `overall` + +## API + +The FastAPI surface exposes: + +- `GET /evidence/` – list available manifests and whether bundles are present +- `GET /evidence/{release}` – return manifest JSON (plus bundle metadata) + +App state paths derive from overlay `data_directories.evidence_dir` or default to `/evidence`. + +## Testing + +`tests/test_evidence_bundle.py` validates packager integrity, manifest structure, and CLI orchestration against golden policies. diff --git a/docs/PLAYBOOK-AUDIT.md b/docs/PLAYBOOK-AUDIT.md new file mode 100644 index 000000000..cebbb6eca --- /dev/null +++ b/docs/PLAYBOOK-AUDIT.md @@ -0,0 +1,44 @@ +# Audit Playbook + +The audit team consumes FixOps artefacts to validate releases for compliance demonstrations. This guide maps each control to the +supporting evidence surfaces. + +## Artefact collection + +1. **Download evidence bundle** – Use the CLI to pull the signed bundle and manifest for the target release: + ```bash + cli/fixops-ci evidence bundle --tag vX.Y.Z --out audit_artifacts/ + ``` + Bundles land under `audit_artifacts/bundles/` and signed manifests under `audit_artifacts/manifests/`. +2. **Retrieve provenance attestations** – Copy `artifacts/attestations/*.json` and store them alongside the bundle for the audit + package. Each attestation is SLSA v1 compliant and references the corresponding git commit. +3. **Capture reproducibility proofs** – Archive the matching entries from `artifacts/repro/attestations/` to show binary/source + equivalence. +4. **Export coverage summary** – Include `reports/coverage/summary.txt` and `reports/coverage/coverage.xml` so auditors can verify + the ≥70% threshold. + +## Control verification checklist + +| Control | Evidence | How to verify | +| --- | --- | --- | +| Build provenance | `artifacts/attestations/.json` | Run `cli/fixops-provenance verify` against the release artefact. | +| Signing | Release `.sig` files | Execute `cosign verify-blob` with the release identity documented in `docs/SIGNING.md`. | +| SBOM quality | `reports/sbom_quality_report.html` | Review coverage, licence completeness, and generator variance gauges. | +| Risk scoring | `artifacts/risk.json` | Confirm FixOpsRisk values align with policy thresholds in `config/policy.yml`. | +| Reproducibility | `artifacts/repro/attestations/.json` | Ensure `match: true` and digests match the bundle manifest. | +| Evidence packaging | `evidence/manifests/.yaml` | Inspect the signed manifest for policy outcomes and included artefacts. | +| Coverage | `reports/coverage/summary.txt` | Check that TOTAL coverage is ≥70% and note deltas from previous release. | + +## Interview preparation + +- Review `docs/PROVENANCE-GRAPH.md`, `docs/REPRO-BUILDS.md`, and `docs/EVIDENCE-BUNDLES.md` to explain how data flows from source + control to compliance outputs. +- Bring the investor/demo dashboard screenshot (captured from `ui/dashboard/`) to illustrate telemetry coverage. +- Summarise release history using `CHANGELOG.md` with emphasis on controls tightened during Phases 6–10. + +## Storage & retention + +- Store collected bundles and attestations in the secured audit bucket with immutable retention (minimum 12 months). +- Record verification commands and hashes in the audit ticket so the review is reproducible by third parties. +- Delete local copies after transfer to avoid stale artefacts drifting from the canonical release. + diff --git a/docs/PLAYBOOK-DEV.md b/docs/PLAYBOOK-DEV.md new file mode 100644 index 000000000..5c90fcc4f --- /dev/null +++ b/docs/PLAYBOOK-DEV.md @@ -0,0 +1,82 @@ +# Developer Playbook + +This playbook captures the day-to-day conventions for contributing to the FixOps demo branch. Follow it to keep the +hardening, provenance, and evidence surfaces healthy while we iterate in public. + +## Principles + +- **Security-first defaults** – never disable signing, provenance, or repro verification in committed code. Prefer feature + toggles backed by environment variables. +- **Deterministic artefacts** – all reproducible outputs (`artifacts/`, `analysis/`, `reports/`, `evidence/`) must be generated + from committed fixtures or documented commands so investors can replay the run. +- **Small, reviewable commits** – each phase lands as a single, scoped commit with matching docs/tests so the provenance chain + stays auditable. + +## Environment bootstrap + +```bash +python -m venv .venv +source .venv/bin/activate +pip install -r requirements.txt -r requirements.dev.txt +``` + +For CI parity install the optional extras used by the API and demo stack: + +```bash +pip install -r apps/api/requirements-optional.txt # optional, enables richer FastAPI features +``` + +## Local quality gates + +| Task | Command | +| --- | --- | +| Format | `black .` | +| Import hygiene | `isort .` | +| Lint | `flake8` | +| Type checking | `mypy .` | +| Unit tests | `pytest` | +| Coverage report (target ≥ 70%) | `coverage run -m pytest && coverage report -m` | +| HTML coverage export | `coverage html -d reports/coverage/html && coverage xml -o reports/coverage/coverage.xml` | +| Evidence bundle smoke test | `cli/fixops-ci evidence bundle --help` | + +`reports/coverage/summary.txt` is committed so reviewers can diff the aggregate metrics; regenerate it with +`coverage report -m > reports/coverage/summary.txt` after running the full coverage command above. + +## Continuous integration expectations + +The `qa` workflow (`.github/workflows/qa.yml`) enforces linting, mypy, unit tests, and coverage on every push and pull request. +Local changes should pass the same command matrix before you open a PR: + +```bash +coverage run -m pytest +coverage xml -o reports/coverage/coverage.xml +coverage report -m > reports/coverage/summary.txt +mypy . +flake8 +``` + +Keep `reports/coverage/coverage.xml` and `reports/coverage/summary.txt` up to date in your commits so CI can display the +threshold deltas. + +## Telemetry-aware development + +- Telemetry is enabled by default (OTLP HTTP → `http://collector:4318`). +- Disable spans and metrics locally with `FIXOPS_DISABLE_TELEMETRY=1` or point the exporter to another collector by overriding + `OTEL_EXPORTER_OTLP_ENDPOINT`. +- When running the demo stack (`docker compose -f docker-compose.demo.yml up --build`), traces flow to the bundled collector and + surface in the dashboard at `http://localhost:8080`. + +## Demo & evidence workflow + +1. Generate SBOM, risk, provenance, and repro artefacts via the dedicated CLIs (`cli/fixops-sbom`, `cli/fixops-risk`, + `cli/fixops-provenance`, `cli/fixops-repro`). +2. Package evidence bundles using `cli/fixops-ci evidence bundle --tag vX.Y.Z`. +3. Start the observability stack: `docker compose -f docker-compose.demo.yml up --build`. +4. Browse `http://localhost:8080` for the telemetry-enabled dashboard and download bundles from `backend/api/evidence/{release}`. + +## Branching and reviews + +- Work from short-lived branches that carry the phase prefix (e.g. `feature/phase10-hardening`). +- Document every new surface in the README documentation map and backfill playbook/security posture updates. +- Attach coverage diffs and demo screenshots to PR descriptions so stakeholders can validate the investor-ready experience. + diff --git a/docs/PLAYBOOK-SEC.md b/docs/PLAYBOOK-SEC.md new file mode 100644 index 000000000..f6384a5c7 --- /dev/null +++ b/docs/PLAYBOOK-SEC.md @@ -0,0 +1,55 @@ +# Security Playbook + +This playbook equips the security engineering pod with actionable steps to govern the FixOps provenance, signing, and risk +surfaces introduced across Phases 1–10. + +## Daily posture checks + +1. **Verify CI attestation uploads** – On each tagged release download the attestation bundle from the provenance workflow + artefacts (`artifacts/attestations/`) and run: + ```bash + cli/fixops-provenance verify --artifact dist/ --attestation artifacts/attestations/.json + ``` +2. **Confirm cosign signatures** – After provenance verification, validate that `.sig` files attached to the release match the + published artefacts: + ```bash + cosign verify-blob \ + --certificate-identity "https://github.com/DevOpsMadDog/Fixops/.github/workflows/release-sign.yml@refs/tags/" \ + --certificate-oidc-issuer "https://token.actions.githubusercontent.com" \ + --signature release-artifacts/.sig \ + dist/ + ``` +3. **Monitor KEV-linked risk** – Query the provenance graph API for the latest release to ensure KEV components are mitigated: + ```bash + http GET :8000/api/graph/kev-components?releases=5 + ``` + +## Incident response triggers + +- **Reproducibility failure** – If `.github/workflows/repro-verify.yml` reports a mismatch, consume the attestation under + `artifacts/repro/attestations/.json` to identify the differing digest, lock new releases, and coordinate with engineering + for remediation. +- **Evidence bundle warning** – When the CI agent emits `warn` or `fail` evaluations inside `evidence/manifests/.yaml`, halt + release promotion and open a security review ticket that references the manifest and associated SBOM/risk payloads. +- **Graph anomalies** – Use the provenance graph anomaly query (`/api/graph/anomalies`) to spot unexpected downgrades or version + drifts. Confirm the downgrade was intentional via changelog context before re-enabling delivery. + +## Secrets management + +- GH secrets consumed by provenance, signing, and evidence workflows are listed in `docs/CI-SECRETS.md`. Rotate them quarterly + and before adding new maintainers. +- Never store signing keys or cosign passwords in the repository. Use GitHub’s OIDC support for keyless signing where possible. + +## Hardening checklist for releases + +- [ ] Coverage summary at or above 70% committed under `reports/coverage/summary.txt`. +- [ ] CHANGELOG entry referencing risk, provenance, repro, and evidence changes for the release cycle. +- [ ] Screenshots of the demo dashboard archived in the release notes for investor-ready storytelling. +- [ ] Branch protections and signed commits enforced on the default branch (documented in `docs/SECURITY-POSTURE.md`). + +## Communications + +- Page the security engineering on-call before rotating secrets or introducing new CI workflows touching signing. +- Capture incident post-mortems in `audit/SECURITY.md` and link them from the next `CHANGELOG.md` section. +- Share weekly summaries of EPSS/KEV deltas with stakeholders using `cli/fixops-risk score` outputs to inform patch cadence. + diff --git a/docs/PROVENANCE-GRAPH.md b/docs/PROVENANCE-GRAPH.md new file mode 100644 index 000000000..ba691a5da --- /dev/null +++ b/docs/PROVENANCE-GRAPH.md @@ -0,0 +1,63 @@ +# Provenance Graph MVP + +The provenance graph stitches together FixOps artefacts (git history, SLSA attestations, normalized SBOMs, risk reports, and release metadata) into a queryable knowledge graph. The Phase 6 MVP ships with a lightweight SQLite store plus an in-memory [NetworkX](https://networkx.org/) view that powers API queries and future visualisations. + +## Data ingestion + +The graph builder gathers data from the following locations (override via `config/data_directories`): + +| Source | Default location | Notes | +| --- | --- | --- | +| Git metadata | repository working tree | Latest 100 commits with parents, authors, timestamps | +| SLSA attestations | `artifacts/attestations/` | Parsed via `services.provenance.attestation` | +| Normalized SBOM | `artifacts/sbom/normalized.json` | Component metadata, hashes, licences | +| Risk report | `artifacts/risk.json` | FixOpsRisk, KEV/EPSS annotations per CVE | +| Release manifest | `analysis/releases.json` | Optional mapping of releases → artefacts → component versions | + +Each ingestion step upserts nodes and typed edges into SQLite while mirroring the structure in a `networkx.MultiDiGraph`. Subsequent phases can enrich the dataset without refactoring earlier steps. + +## Query capabilities + +Three query families are exposed via the API and service layer: + +1. **Lineage** – Trace back an artefact (e.g. release tarball) to its attestation, builder, source URI, materials, and upstream commits. +2. **KEV coverage** – List components that still carry KEV CVEs across the most recent _N_ releases, including which CVEs are outstanding per component. +3. **Version anomalies** – Detect unexpected downgrades or regressions by comparing component versions across ordered releases. + +The `services.graph.ProvenanceGraph` class exposes these queries along with ingestion helpers, enabling CLI tooling or notebooks to reuse the same logic. + +## API surface + +The FastAPI app mounts `backend.api.graph` and provides: + +- `GET /graph/` – Node/edge counts plus configured data sources. +- `GET /graph/lineage/{artifact}` – Structured lineage payload for a given artefact filename. +- `GET /graph/kev-components?last=N` – Components with KEV CVEs across the most recent N releases (default: 3). +- `GET /graph/anomalies` – Component downgrades/version drift alerts derived from release manifests. + +All endpoints require the same authentication strategy configured for the rest of the ingestion API. + +## Release manifests + +Add `analysis/releases.json` (or configure `graph_dir`) with entries like: + +```json +{ + "releases": [ + { + "tag": "v1.2.0", + "date": "2024-01-05T00:00:00Z", + "artifacts": ["fixops-v1.2.0.tar.gz"], + "components": [ + {"slug": "pkg-a", "name": "pkgA", "version": "1.2.0"} + ] + } + ] +} +``` + +Component `slug` values should match the normalized SBOM / risk report entries to unlock KEV correlation and anomaly detection. + +## Testing + +Unit coverage for the MVP lives in `services/graph/tests/test_graph.py`, exercising ingestion, lineage, KEV queries, and anomaly detection against golden fixtures. diff --git a/docs/PROVENANCE.md b/docs/PROVENANCE.md new file mode 100644 index 000000000..eacd93d0f --- /dev/null +++ b/docs/PROVENANCE.md @@ -0,0 +1,79 @@ +# FixOps Provenance + +This guide explains how FixOps emits [SLSA provenance](https://slsa.dev/spec/v1.0/provenance) +metadata for release artefacts, how to generate and verify attestations locally, and how to +retrieve attestations via the API or CI pipeline outputs. + +## Schema Overview + +Attestations generated by FixOps follow the SLSA v1 provenance schema and include: + +| Field | Description | +| ----- | ----------- | +| `slsaVersion` | Always `"1.0"` to indicate SLSA v1 provenance. | +| `builder.id` | URI identifying the build service (defaults to `urn:fixops:builder:local`). | +| `buildType` | URI describing the build recipe (defaults to GitHub Actions runs). | +| `source.uri` | Source repository URI that produced the artefact. | +| `metadata` | Build metadata including start/finish timestamps and reproducibility flag. | +| `subject[]` | Array of artefacts covered by the attestation; FixOps records SHA-256 digests. | +| `materials[]` | Optional array of build inputs with URIs and optional digests. | + +All attestations are canonical JSON with stable key ordering to simplify integrity checks. + +## CLI Usage + +Use the `cli/fixops-provenance` tool to generate and validate attestations: + +```bash +# Generate an attestation for a local artefact +cli/fixops-provenance attest \ + --artifact artefacts/build.tar.gz \ + --out artefacts/attestations/build.tar.gz.json \ + --builder-id "urn:fixops:builder:ci" \ + --source-uri "https://github.com/DevOpsMadDog/Fixops@${GIT_SHA}" + +# Verify an attestation +cli/fixops-provenance verify \ + --artifact artefacts/build.tar.gz \ + --attestation artefacts/attestations/build.tar.gz.json \ + --builder-id "urn:fixops:builder:ci" \ + --source-uri "https://github.com/DevOpsMadDog/Fixops@${GIT_SHA}" +``` + +Additional materials or metadata can be supplied with `--material '{"uri": "dependency.tgz"}'` +or `--metadata '{"buildInvocationId": "123"}'` (repeat `--material` for multiple entries). + +The CLI returns exit code `0` on success and `1` if verification fails. + +## API Endpoints + +Authenticated callers can download stored attestations from the FastAPI service: + +- `GET /provenance/` → Lists available attestation JSON filenames. +- `GET /provenance/{name}` → Returns the parsed attestation for `name`. The endpoint + accepts either the artefact stem or full filename (e.g., `build.tar.gz` or + `build.tar.gz.json`). + +The API reads attestations from the configured `provenance_dir` (default +`artifacts/attestations/` within the allowlisted data root). + +## CI Workflow + +When a release tag is pushed, `.github/workflows/provenance.yml`: + +1. Archives the repository contents to `artifacts/fixops-${{ github.sha }}.tar.gz`. +2. Generates an attestation via `cli/fixops-provenance attest`. +3. Uploads the resulting JSON to the workflow artefacts bucket under `attestations/`. + +The uploaded attestation can be downloaded from the workflow run and verified with the CLI +commands shown above. + +## Local Verification Flow + +1. Build or download the artefact you wish to validate. +2. Generate (or fetch) the provenance JSON. +3. Run `cli/fixops-provenance verify` against the artefact and attestation. +4. Optionally query the API at `/provenance/` to cross-check the stored version. + +Successful verification confirms that the attested SHA-256 digest matches the local artefact +and that the recorded builder, source URI, and build type align with expectations. diff --git a/docs/REPRO-BUILDS.md b/docs/REPRO-BUILDS.md new file mode 100644 index 000000000..92af5eb30 --- /dev/null +++ b/docs/REPRO-BUILDS.md @@ -0,0 +1,74 @@ +# Reproducible Builds Verifier + +Phase 7 introduces a hermetic rebuild harness that replays release plans, re-creates artefacts, and records whether the rebuilt digest matches the attested reference. + +## Build plans + +Plans live in `build/plan.yaml` (or alternative paths passed to the CLI) and support the following keys: + +| Key | Required | Description | +| --- | --- | --- | +| `version` | No | Schema version marker (default `1`). | +| `tag` | No | Populated automatically from the CLI `--tag` flag; used for attestation naming. | +| `artifact` | Yes | Relative path to the artefact produced inside the hermetic workspace. `"{tag}"` tokens are replaced automatically. | +| `sources` | No | List of paths (strings or `{path, destination}` mappings) copied from the repository into the workspace before execution. | +| `steps` | Yes | Ordered commands to run. Each step accepts `run` as a string (shell) or list (exec form). | +| `reference_attestation` | Optional | Existing SLSA attestation used to derive the expected digest for comparison. | +| `reference_artifact` | Optional | Canonical artefact on disk that supplies the expected digest if no attestation is available. | +| `expected_digest` | Optional | Literal digest string (`sha256:...`) to compare against when no reference file exists. | +| `environment` | No | Environment variables injected into each step. + +Example (`build/plan.yaml`): + +```yaml +version: 1 +artifact: dist/fixops-{tag}.tar.gz +sources: + - path: . + destination: source +steps: + - run: | + mkdir -p dist + tar -czf dist/fixops-{tag}.tar.gz -C source . +reference_attestation: artifacts/attestations/fixops-{tag}.json +``` + +## CLI usage + +The `fixops-repro` CLI orchestrates plan loading, execution, and attestation writing: + +```bash +# Rebuild the v1.2.3 release and store the attestation under artifacts/repro/attestations +cli/fixops-repro verify --tag v1.2.3 --plan build/plan.yaml --out artifacts/repro/attestations +``` + +Exit codes: + +- `0` – rebuild digest matches the attested/reference digest. +- `1` – referenced artefact/attestation missing. +- `2` – invalid plan definition. +- `3` – digest mismatch. + +## Output + +Successful runs emit `artifacts/repro/attestations/.json` with the following schema: + +```json +{ + "tag": "v1.2.3", + "artifact": "dist/fixops-v1.2.3.tar.gz", + "generated_digest": {"sha256": "..."}, + "reference_digest": {"sha256": "..."}, + "match": true, + "reference_source": "attestation:artifacts/attestations/fixops-v1.2.3.json", + "verified_at": "2024-07-01T12:00:00Z" +} +``` + +## Continuous verification + +`.github/workflows/repro-verify.yml` runs nightly (03:00 UTC) or on-demand via `workflow_dispatch`. It installs dependencies, executes `cli/fixops-repro verify`, and uploads generated attestations as workflow artefacts for downstream auditing. + +## Testing + +`services/repro/tests/test_verifier.py` covers happy-path and mismatch scenarios using toy plans to ensure digest comparison, attestation emission, and exit codes behave as expected. diff --git a/docs/RISK-SCORING.md b/docs/RISK-SCORING.md new file mode 100644 index 000000000..d6ee610b0 --- /dev/null +++ b/docs/RISK-SCORING.md @@ -0,0 +1,86 @@ +# Risk Scoring Pipeline + +The FixOps risk pipeline combines exploitability intelligence (EPSS), the CISA +Known Exploited Vulnerabilities (KEV) catalogue, component freshness and +exposure hints to produce a reproducible **FixOpsRisk** score per +component/CVE. + +## Inputs + +- **Normalized SBOM** – produced by `fixops-sbom normalize`, enriched with + component exposure hints and vulnerability metadata. +- **EPSS feed** – cached CSV stored in `data/feeds/epss.csv`. +- **CISA KEV feed** – cached JSON stored in `data/feeds/kev.json`. + +Both feeds can be refreshed with the helper utilities under `risk/feeds/` or via +CI. + +## Risk formula + +For every vulnerability discovered in a component we derive the following +signals: + +| Signal | Description | Normalisation | +| ------ | ----------- | ------------- | +| `epss` | Exploit Prediction Scoring System value for the CVE. | CSV value clamped to `[0, 1]`. | +| `kev` | Whether the CVE is present in the KEV catalogue. | `1.0` if present, otherwise `0.0`. | +| `version_lag` | How stale the deployed version is compared to a fixed release or age metadata. | Normalised to `[0, 1]` using a 180 day cap. | +| `exposure` | Exposure hints from SBOM metadata (e.g. `internet`, `internal`). | Highest exposure weight mapped from configured aliases. | + +Weights can be tuned but default to: + +- `epss`: **0.50** +- `kev`: **0.20** +- `version_lag`: **0.20** +- `exposure`: **0.10** + +The FixOpsRisk score is the weighted sum of the normalised values scaled to a +percentage: + +``` +FixOpsRisk = 100 × (epss×0.50 + kev×0.20 + version_lag×0.20 + exposure×0.10) +``` + +Outputs include the raw contributions, applied weights, exposure flags and the +normalised risk value so downstream services can audit the calculation. + +## CLI usage + +``` +# Generate risk scores into artifacts/risk.json +./cli/fixops-risk score --sbom artifacts/sbom/normalized.json --out artifacts/risk.json +``` + +Optional overrides allow pointing at pre-fetched feeds: + +``` +./cli/fixops-risk score --sbom artifacts/sbom/normalized.json \ + --epss data/feeds/epss.csv --kev data/feeds/kev.json \ + --out artifacts/risk.json +``` + +The CLI prints how many components received scores. Re-running the command will +update the JSON deterministically. + +## API endpoints + +The FastAPI application exposes the following read-only endpoints once a risk +report is stored under the configured `risk_dir`: + +- `GET /risk/` – summary metadata (generated timestamp, counts). +- `GET /risk/component/{slug}` – component level risk JSON. +- `GET /risk/cve/{cve}` – KEV/EPSS enriched view for the CVE. + +Component slugs are derived from the component PURL (or name/version) by +lowercasing and replacing separators. + +## Verifying results + +1. Normalize SBOMs and build feeds (e.g. via CI cron). +2. Run the CLI scoring command and inspect `artifacts/risk.json`. +3. Call the API endpoints above to fetch the component or CVE level payloads. +4. Cross-check the `risk_breakdown` to confirm inputs (EPSS, KEV, exposure, + version lag) were captured correctly. + +The JSON structure is stable to support downstream automation and HTML report +rendering in future phases. diff --git a/docs/SBOM-QUALITY.md b/docs/SBOM-QUALITY.md new file mode 100644 index 000000000..65ee7c885 --- /dev/null +++ b/docs/SBOM-QUALITY.md @@ -0,0 +1,44 @@ +# SBOM Quality Normalization and Reporting + +This guide documents how FixOps consolidates software bills of materials (SBOMs) from multiple generators, removes duplicates, and produces measurable quality scores. + +## Normalization workflow + +1. Export SBOMs from supported generators (for example Syft, Trivy, and osv-scanner) in JSON form. +2. Run the normalizer CLI to merge and de-duplicate the inputs: + + ```bash + ./cli/fixops-sbom normalize --in path/to/syft.json path/to/trivy.json --out artifacts/sbom/normalized.json + ``` + + The command accepts CycloneDX and SPDX JSON documents. Components are deduplicated using the combination of Package URL (purl), version, and the most reliable hash present. Source generator metadata is retained for variance analysis. + +## Quality metrics + +Generate machine-readable and human-friendly quality reports from the normalized SBOM: + +```bash +./cli/fixops-sbom quality --in artifacts/sbom/normalized.json --html reports/sbom_quality_report.html +``` + +This command emits two artefacts: + +- `analysis/sbom_quality_report.json` – canonical metrics for automated validation. +- `reports/sbom_quality_report.html` – dashboard-style view for manual review. + +### Metrics captured + +| Metric | Description | +| --- | --- | +| Coverage % | Ratio of unique components versus total component observations across generators. | +| License Coverage % | Percentage of unique components with at least one declared or concluded license. | +| Resolvability % | Share of components that include a package URL or checksum, making downstream enrichment possible. | +| Generator Variance Score | Jaccard-like score (0 to 1) showing how much component coverage diverges between generators. | + +All outputs are deterministic for a fixed input set, ensuring reproducible reports across runs. + +## Troubleshooting + +- **Missing metrics** – confirm the normalized SBOM includes metadata for `total_components` and `unique_components`. The normalizer sets these automatically. +- **Unexpected duplicates** – ensure each generator exported the same SBOM format (CycloneDX or SPDX) and that component entries include consistent purl/hash values. +- **Directory creation** – the CLI automatically creates the `artifacts/sbom/`, `analysis/`, and `reports/` directories when writing outputs. diff --git a/docs/SECURITY-POSTURE.md b/docs/SECURITY-POSTURE.md new file mode 100644 index 000000000..8c0df9701 --- /dev/null +++ b/docs/SECURITY-POSTURE.md @@ -0,0 +1,45 @@ +# Security Posture + +This document captures the security guardrails configured for the FixOps investor demo branch after Phase 10 hardening. It +complements `docs/PLAYBOOK-SEC.md` and `docs/PLAYBOOK-AUDIT.md` with policy-level expectations. + +## Branch protection & commit integrity + +- The default branch requires pull requests with at least one approving review. Direct pushes are disabled for maintainers. +- Status checks enforced: + - `qa` workflow (`.github/workflows/qa.yml`) covering linting, type checking, tests, and coverage ≥70%. + - `provenance` workflow on tags to ensure attestations are produced and uploaded. + - `release-sign` workflow to guarantee cosign signatures exist for every release artefact. +- Signed commits are mandatory. Contributors configure local Git with GPG or Sigstore signing before merging. Unsigned commits + are rejected by branch protections. + +## Dependency hygiene + +- Dependabot is enabled for GitHub Actions, pip dependencies, and Dockerfiles. Review windows are capped at 48 hours for security + patches and 7 days for general updates. +- Use the SBOM normaliser (`cli/fixops-sbom`) to cross-check new dependencies. Licensing and generator variance metrics are + tracked in `reports/sbom_quality_report.html`. + +## Secrets governance + +- Secrets required for CI flows are enumerated in `docs/CI-SECRETS.md`. Only the security operations group manages their + rotation. +- GitHub’s OIDC token is leveraged for cosign keyless signing. No long-lived signing keys or passwords are stored in the + repository. +- Release evidence bundles are signed using the CI-managed key and published under `evidence/` for audit consumption. + +## Monitoring & observability + +- OpenTelemetry traces and metrics are emitted by provenance, risk, graph, and repro services. The demo stack forwards telemetry + to the collector defined in `config/otel-collector-demo.yaml`. +- Dashboard panels in `ui/dashboard/` display SBOM quality, FixOpsRisk, provenance lineage, and reproducibility status so + stakeholders can monitor control health in real time. + +## Compliance artefacts + +- Coverage, provenance, signing, SBOM, risk, reproducibility, and evidence outputs are preserved in git-tracked directories so + releases can be replayed. +- `CHANGELOG.md` summarises the security-impacting changes across Phases 1–10 and must be updated before each release cut. +- Playbooks (`docs/PLAYBOOK-DEV.md`, `docs/PLAYBOOK-SEC.md`, `docs/PLAYBOOK-AUDIT.md`) describe operational procedures for + engineering, security, and auditors respectively. + diff --git a/docs/SIGNING.md b/docs/SIGNING.md new file mode 100644 index 000000000..edb458b3d --- /dev/null +++ b/docs/SIGNING.md @@ -0,0 +1,79 @@ +# Release Signing and Verification + +The `release-sign.yml` GitHub Actions workflow builds FixOps release archives, signs them with +[Cosign](https://github.com/sigstore/cosign), and uploads the resulting signatures and +attestations alongside the release. Local helper scripts under `scripts/signing/` wrap Cosign to +make consistent signing and verification easier for engineers. + +## Workflow summary + +1. Create a source archive with `git archive` and generate a SLSA v1 predicate via + `cli/fixops-provenance`. +2. Install Cosign using `sigstore/cosign-installer@v3` and hydrate the private key from the + `COSIGN_PRIVATE_KEY` secret (protected by `COSIGN_PASSWORD`). +3. Run `scripts/signing/sign-artifact.sh` to: + - produce a detached signature (`cosign sign-blob`), + - emit a DSSE attestation (`cosign attest-blob`) around the provenance predicate, and + - capture an optional verification bundle for offline validation. +4. Validate the outputs with `cosign verify-blob` before publishing, and finally upload the + signed archive plus attestation materials as release assets. + +The workflow lives at `.github/workflows/release-sign.yml`. Secrets required for the pipeline are +documented in [`docs/CI-SECRETS.md`](CI-SECRETS.md). + +## Local signing helpers + +* `scripts/signing/sign-artifact.sh` — wraps `cosign sign-blob`/`cosign attest-blob` to generate + detached signatures and DSSE envelopes. +* `scripts/signing/verify-artifact.sh` — wraps `cosign verify-blob` to validate either a detached + signature or an attestation bundle against the release artifact. + +Both scripts accept `--help` for usage details and honour the `COSIGN_PASSWORD`/`COSIGN_KEY_PATH` +(and `COSIGN_PUBLIC_KEY`) environment variables. + +## Verifying release downloads + +1. Download the release tarball, detached signature, and bundle files from the GitHub release + page. Rename them as needed: + + ```bash + export TAG=v1.2.3 + curl -LO https://github.com/DevOpsMadDog/Fixops/releases/download/${TAG}/fixops-${TAG}.tar.gz + curl -LO https://github.com/DevOpsMadDog/Fixops/releases/download/${TAG}/fixops-${TAG}.tar.gz.sig + curl -LO https://github.com/DevOpsMadDog/Fixops/releases/download/${TAG}/fixops-${TAG}.bundle + ``` + +2. Verify the detached signature with `cosign verify-blob`: + + ```bash + cosign verify-blob \ + --key cosign.pub \ + --signature fixops-${TAG}.tar.gz.sig \ + fixops-${TAG}.tar.gz + ``` + +3. Validate the DSSE attestation bundle and provenance payload with Cosign (also via + `verify-blob`): + + ```bash + cosign verify-blob \ + --key cosign.pub \ + --bundle fixops-${TAG}.bundle \ + fixops-${TAG}.tar.gz + ``` + +4. If the provenance attestation is mirrored to an OCI registry (for example GHCR), use + `cosign verify-attestation` to inspect the attached SLSA predicate: + + ```bash + cosign verify-attestation \ + --key cosign.pub \ + --type slsaprovenance \ + ghcr.io/devopsmaddog/fixops/fixops-release:${TAG} + ``` + + The `--local-image` flag can be used with a local OCI layout produced via `cosign save` if you + prefer offline verification. + +These checks ensure the downloaded artifacts match the official FixOps release and that the build +metadata has not been tampered with. diff --git a/docs/TASK-PLAN.md b/docs/TASK-PLAN.md new file mode 100644 index 000000000..88fa9810e --- /dev/null +++ b/docs/TASK-PLAN.md @@ -0,0 +1,22 @@ +# Multi-Phase Execution Plan + +Phase 1 established the authoritative architecture inventory (see `docs/ARCH-INVENTORY.md`). +Phases 2–10 focus on incremental hardening of the ingestion pipeline, enterprise services, +and integration adapters. Each phase lists the exact code touchpoints and validation steps. + +| Phase | Objective | Code touchpoints (file → function/class) | Validation & docs | +| --- | --- | --- | --- | +| 2 | Harden overlay loading and path safety | `core/overlay_runtime.py` → `prepare_overlay`; `core/paths.py` → `verify_allowlisted_path`; `config/fixops.overlay.yml` (add explicit directory schema comments) | Extend `tests/test_overlay_runtime.py::test_prepare_overlay_*` fixtures and update `docs/CONFIG_GUIDE.md` with new validation notes. | +| 3 | Tighten chunked upload handling & API auth ergonomics | `apps/api/upload_manager.py` → `ChunkUploadManager._persist_chunk`; `apps/api/app.py` → `_verify_api_key` inner dependency; `apps/api/routes/enhanced.py` (propagate auth errors) | Add regression cases in `tests/test_api_dependencies.py::test_upload_manager_*` and document auth flows in `docs/PLATFORM_RUNBOOK.md`. | +| 4 | Improve guardrail + policy explainability in pipeline responses | `apps/api/pipeline.py` → `_evaluate_guardrails`, `_build_policy_summary`; `core/policy.py` → `build_policy_summary`; `core/modules.py` → `PipelineContext` (add policy field metadata) | Update `tests/test_pipeline_matching.py::test_guardrail_rollup` and enhance `docs/ARCHITECTURE.md` guardrail narrative. | +| 5 | Stabilise enhanced decision provider fallbacks | `core/enhanced_decision.py` → `EnhancedDecisionEngine.evaluate_pipeline`; `fixops-enterprise/src/services/enhanced_decision_engine.py` → `EnhancedDecisionService.reload`; `new_apps/api/processing/knowledge_graph.py` (optional provider hints) | Extend `tests/test_enterprise_enhanced_api.py::test_enhanced_capabilities` and capture provider fallback guidance in `docs/PLATFORM_RUNBOOK.md`. | +| 6 | Streamline CLI stage execution & evidence persistence | `core/stage_runner.py` → `StageRunner.run_stage`; `core/storage.py` → `ArtefactArchive.register_run`; `fixops-enterprise/src/services/run_registry.py` → `RunRegistry.ensure_run` | Enhance `tests/test_cli_commands.py::test_stage_run_sequence` and document run lifecycle in `docs/USAGE_GUIDE.html` (CLI section). | +| 7 | Strengthen enterprise evidence signing & retention | `fixops-enterprise/src/services/evidence.py` → `EvidenceStore.attach_signature`; `fixops-enterprise/src/services/signing.py` → `sign_manifest`; `fixops-enterprise/src/services/compliance.py` → `ComplianceEngine.evaluate` (attach retention metadata) | Expand `tests/test_ci_adapters.py::test_jenkins_signed_response` and update `docs/SECURITY.md` with signing/retention controls. | +| 8 | Unify CI adapter telemetry & marketplace hooks | `integrations/github/adapter.py` → `GitHubCIAdapter.handle_webhook`; `integrations/jenkins/adapter.py` → `JenkinsCIAdapter.ingest`; `integrations/sonarqube/adapter.py` → `SonarQubeAdapter.ingest`; ensure shared helpers in `fixops-enterprise/src/services/decision_engine.py` | Broaden `tests/test_ci_adapters.py` coverage (new telemetry assertions) and record adapter usage in `docs/INTEGRATIONS.md`. | +| 9 | Bolster enterprise observability & rate limiting defaults | `fixops-enterprise/src/core/middleware.py` → `PerformanceMiddleware.dispatch`, `RateLimitMiddleware._consume_token`; `fixops-enterprise/src/services/metrics.py` → aggregation helpers; `apps/api/app.py` → analytics instrumentation wiring | Add `tests/test_rate_limit.py` assertions plus new metrics snapshot tests, and update `docs/OBSERVABILITY.md`. | +| 10 | Close documentation & onboarding gaps | `docs/PLATFORM_RUNBOOK.md` (new operational checklist); `docs/USAGE_GUIDE.html` (CLI/API walkthrough); `ONBOARDING.md` (phase summary appendix); `README.md` (final capability matrix) | Run `scripts/generate_analysis.py` for traceability refresh and ensure `docs/PR_SUMMARY.md` captures the completed phases. | + +Dependencies between phases are linear: hardening the overlay (Phase 2) precedes API +adjustments (Phase 3); pipeline explainability (Phase 4) underpins enhanced decision +telemetry (Phase 5); CLI and evidence improvements (Phases 6–7) provide the data consumed by +adapter and observability upgrades (Phases 8–9) before documentation closure (Phase 10). diff --git a/evidence/__init__.py b/evidence/__init__.py new file mode 100644 index 000000000..6c6ca2c76 --- /dev/null +++ b/evidence/__init__.py @@ -0,0 +1,10 @@ +"""Evidence bundle utilities.""" + +from .packager import BundleInputs, create_bundle, load_policy, evaluate_policy + +__all__ = [ + "BundleInputs", + "create_bundle", + "load_policy", + "evaluate_policy", +] diff --git a/evidence/packager.py b/evidence/packager.py new file mode 100644 index 000000000..3ce686f14 --- /dev/null +++ b/evidence/packager.py @@ -0,0 +1,286 @@ +"""Evidence bundle creation helpers.""" + +from __future__ import annotations + +import json +import subprocess +import tempfile +import copy +from dataclasses import dataclass, field +from datetime import datetime, timezone +from hashlib import sha256 +from pathlib import Path +from typing import Any, Dict, Iterable, Mapping +from zipfile import ZipFile + +import yaml + +DEFAULT_POLICY: dict[str, Any] = { + "sbom_quality": { + "coverage_percent": {"warn_below": 80.0, "fail_below": 60.0}, + "license_coverage_percent": {"warn_below": 80.0, "fail_below": 50.0}, + }, + "risk": {"max_risk_score": {"warn_above": 70.0, "fail_above": 85.0}}, + "repro": {"require_match": True}, + "provenance": {"require_attestations": True}, +} + + +@dataclass(slots=True) +class BundleInputs: + """Paths and metadata needed to construct an evidence bundle.""" + + tag: str + normalized_sbom: Path + sbom_quality_json: Path + sbom_quality_html: Path | None + risk_report: Path + provenance_dir: Path + repro_attestation: Path + policy_path: Path | None = None + output_dir: Path = Path("evidence") + extra_paths: Iterable[Path] = field(default_factory=tuple) + sign_key: Path | None = None + + +def load_policy(policy_path: Path | None) -> dict[str, Any]: + if policy_path is None or not policy_path.is_file(): + return DEFAULT_POLICY + with policy_path.open("r", encoding="utf-8") as handle: + loaded = yaml.safe_load(handle) or {} + if not isinstance(loaded, Mapping): + return DEFAULT_POLICY + merged = copy.deepcopy(DEFAULT_POLICY) + for section, rules in loaded.items(): + if isinstance(rules, Mapping): + existing = merged.setdefault(section, {}) + if isinstance(existing, Mapping): + existing.update(rules) # shallow merge is sufficient for numeric thresholds + else: + merged[section] = rules + return merged + + +def _digest_file(path: Path) -> str: + digest = sha256() + with path.open("rb") as handle: + for chunk in iter(lambda: handle.read(8192), b""): + digest.update(chunk) + return digest.hexdigest() + + +def _evaluate_rules(value: float, rules: Mapping[str, Any]) -> str: + status = "pass" + fail_above = rules.get("fail_above") + fail_below = rules.get("fail_below") + warn_above = rules.get("warn_above") + warn_below = rules.get("warn_below") + if fail_above is not None and value > float(fail_above): + return "fail" + if fail_below is not None and value < float(fail_below): + return "fail" + if warn_above is not None and value > float(warn_above): + status = "warn" + if warn_below is not None and value < float(warn_below): + status = "warn" + return status + + +def evaluate_policy(policy: Mapping[str, Any], *, metrics: Mapping[str, Any]) -> dict[str, Any]: + evaluations: dict[str, Any] = {"checks": {}, "overall": "pass"} + + sbom_metrics = metrics.get("sbom", {}) if isinstance(metrics.get("sbom"), Mapping) else {} + sbom_policy = policy.get("sbom_quality", {}) if isinstance(policy.get("sbom_quality"), Mapping) else {} + for metric in ("coverage_percent", "license_coverage_percent"): + value = sbom_metrics.get(metric) + if value is None: + continue + status = _evaluate_rules(float(value), sbom_policy.get(metric, {})) + evaluations["checks"][f"sbom_{metric}"] = {"value": float(value), "status": status} + + risk_metrics = metrics.get("risk", {}) if isinstance(metrics.get("risk"), Mapping) else {} + risk_policy = policy.get("risk", {}) if isinstance(policy.get("risk"), Mapping) else {} + max_risk = risk_metrics.get("max_risk_score") + if max_risk is not None: + status = _evaluate_rules(float(max_risk), risk_policy.get("max_risk_score", {})) + evaluations["checks"]["risk_max_risk_score"] = { + "value": float(max_risk), + "status": status, + } + + repro_match = metrics.get("repro", {}).get("match") if isinstance(metrics.get("repro"), Mapping) else None + repro_policy = policy.get("repro", {}) if isinstance(policy.get("repro"), Mapping) else {} + if repro_match is not None: + required = bool(repro_policy.get("require_match", True)) + status = "pass" if (not required or repro_match) else "fail" + evaluations["checks"]["repro_match"] = {"value": bool(repro_match), "status": status} + + provenance_policy = policy.get("provenance", {}) if isinstance(policy.get("provenance"), Mapping) else {} + attestation_count = int(metrics.get("provenance", {}).get("count", 0)) + if provenance_policy.get("require_attestations"): + status = "pass" if attestation_count > 0 else "fail" + evaluations["checks"]["provenance_attestations"] = { + "value": attestation_count, + "status": status, + } + + overall = "pass" + for details in evaluations["checks"].values(): + if details.get("status") == "fail": + overall = "fail" + break + if details.get("status") == "warn" and overall != "fail": + overall = "warn" + evaluations["overall"] = overall + return evaluations + + +def _collect_files(paths: Iterable[Path]) -> list[Path]: + files: list[Path] = [] + for path in paths: + if not path: + continue + if path.is_file(): + files.append(path) + elif path.is_dir(): + for candidate in sorted(path.rglob("*")): + if candidate.is_file(): + files.append(candidate) + return files + + +def _sign_manifest(manifest_path: Path, signature_path: Path, key_path: Path) -> None: + command = [ + "cosign", + "sign-blob", + "--key", + str(key_path), + "--output-signature", + str(signature_path), + str(manifest_path), + ] + subprocess.run(command, check=True) + + +def create_bundle(inputs: BundleInputs) -> dict[str, Any]: + tag = inputs.tag + output_root = inputs.output_dir + bundle_dir = output_root / "bundles" + manifest_dir = output_root / "manifests" + bundle_dir.mkdir(parents=True, exist_ok=True) + manifest_dir.mkdir(parents=True, exist_ok=True) + + required_files = [ + inputs.normalized_sbom, + inputs.sbom_quality_json, + inputs.risk_report, + inputs.repro_attestation, + ] + for path in required_files: + if not Path(path).is_file(): + raise FileNotFoundError(f"Required evidence file missing: {path}") + if inputs.provenance_dir and not inputs.provenance_dir.exists(): + raise FileNotFoundError(f"Provenance directory '{inputs.provenance_dir}' not found") + + quality_payload = json.loads(inputs.sbom_quality_json.read_text(encoding="utf-8")) + risk_payload = json.loads(inputs.risk_report.read_text(encoding="utf-8")) + repro_payload = json.loads(inputs.repro_attestation.read_text(encoding="utf-8")) + + provenance_files = _collect_files([inputs.provenance_dir]) if inputs.provenance_dir else [] + extra_files = _collect_files(inputs.extra_paths) + bundle_files: list[tuple[Path, str]] = [] + artefact_descriptors: list[dict[str, Any]] = [] + + mapping = [ + (inputs.normalized_sbom, f"sbom/{inputs.normalized_sbom.name}"), + (inputs.sbom_quality_json, "sbom/quality.json"), + ] + if inputs.sbom_quality_html and inputs.sbom_quality_html.is_file(): + mapping.append((inputs.sbom_quality_html, "sbom/quality.html")) + mapping.extend( + [ + (inputs.risk_report, "risk/risk.json"), + (inputs.repro_attestation, f"repro/{inputs.repro_attestation.name}"), + ] + ) + + for source, arcname in mapping: + bundle_files.append((source, arcname)) + artefact_descriptors.append( + { + "name": arcname, + "source": str(source), + "sha256": _digest_file(source), + } + ) + + for idx, prov_file in enumerate(provenance_files): + arcname = f"provenance/{prov_file.name}" + bundle_files.append((prov_file, arcname)) + artefact_descriptors.append( + { + "name": arcname, + "source": str(prov_file), + "sha256": _digest_file(prov_file), + } + ) + + for extra in extra_files: + arcname = f"extra/{extra.name}" + bundle_files.append((extra, arcname)) + artefact_descriptors.append( + { + "name": arcname, + "source": str(extra), + "sha256": _digest_file(extra), + } + ) + + metrics = { + "sbom": quality_payload.get("metrics", {}), + "risk": { + "component_count": risk_payload.get("summary", {}).get("component_count"), + "cve_count": risk_payload.get("summary", {}).get("cve_count"), + "max_risk_score": risk_payload.get("summary", {}).get("max_risk_score"), + }, + "repro": {"match": bool(repro_payload.get("match"))}, + "provenance": {"count": len(provenance_files)}, + } + + policy = load_policy(inputs.policy_path) + evaluations = evaluate_policy(policy, metrics=metrics) + + manifest = { + "tag": tag, + "generated_at": datetime.now(timezone.utc).isoformat(), + "artefacts": artefact_descriptors, + "metrics": metrics, + "policy": policy, + "evaluations": evaluations, + } + + manifest_path = manifest_dir / f"{tag}.yaml" + with manifest_path.open("w", encoding="utf-8") as handle: + yaml.safe_dump(manifest, handle, sort_keys=False) + + bundle_path = bundle_dir / f"{tag}.zip" + with ZipFile(bundle_path, "w") as archive: + for source, arcname in bundle_files: + archive.write(source, arcname) + archive.write(manifest_path, "MANIFEST.yaml") + if inputs.sign_key: + with tempfile.NamedTemporaryFile(suffix=".sig", delete=False) as tmp_signature: + tmp_path = Path(tmp_signature.name) + try: + _sign_manifest(manifest_path, tmp_path, inputs.sign_key) + archive.write(tmp_path, "MANIFEST.yaml.sig") + finally: + if 'tmp_path' in locals() and tmp_path.exists(): + tmp_path.unlink() + + manifest["bundle_path"] = str(bundle_path) + manifest["manifest_path"] = str(manifest_path) + return manifest + + +__all__ = ["BundleInputs", "create_bundle", "load_policy", "evaluate_policy"] diff --git a/lib4sbom/__init__.py b/lib4sbom/__init__.py index 91f15963f..ebfc257b0 100644 --- a/lib4sbom/__init__.py +++ b/lib4sbom/__init__.py @@ -1,4 +1,5 @@ -"""Compat package exposing the :mod:`parser` shim.""" -from . import parser +"""Compat package exposing lightweight SBOM helpers used in tests and tooling.""" -__all__ = ["parser"] +from . import normalizer, parser + +__all__ = ["parser", "normalizer"] diff --git a/lib4sbom/normalizer.py b/lib4sbom/normalizer.py new file mode 100644 index 000000000..d7b2b7870 --- /dev/null +++ b/lib4sbom/normalizer.py @@ -0,0 +1,456 @@ +"""SBOM normalization and quality scoring utilities.""" +from __future__ import annotations + +import json +from collections import defaultdict +from dataclasses import dataclass, field +from datetime import datetime, timezone +from pathlib import Path +from typing import Any, Dict, Iterable, List, Mapping, MutableMapping, Optional, Sequence, Tuple + +PREFERRED_HASH_ORDER = ( + "SHA512", + "SHA384", + "SHA256", + "SHA224", + "SHA1", + "MD5", +) + + +@dataclass +class NormalizedComponent: + """Container for merged component metadata.""" + + name: Optional[str] + version: Optional[str] + purl: Optional[str] + hashes: MutableMapping[str, str] = field(default_factory=dict) + licenses: set[str] = field(default_factory=set) + generators: set[str] = field(default_factory=set) + + def to_json(self) -> Dict[str, Any]: + return { + "name": self.name, + "version": self.version, + "purl": self.purl, + "hashes": {key: self.hashes[key] for key in sorted(self.hashes)}, + "licenses": sorted(self.licenses), + "generators": sorted(self.generators), + } + + +def _load_document(path: Path) -> Mapping[str, Any]: + with path.open("r", encoding="utf-8") as handle: + data = json.load(handle) + if not isinstance(data, Mapping): + raise ValueError(f"Unsupported SBOM structure in {path}") + return data + + +def _detect_format(document: Mapping[str, Any]) -> str: + if isinstance(document.get("bomFormat"), str): + return document["bomFormat"].lower() + if "spdxVersion" in document: + return "spdx" + if document.get("metadata", {}).get("tools"): + return "cyclonedx" + return "unknown" + + +def _extract_generators(document: Mapping[str, Any], *, fallback: str) -> List[str]: + generators: List[str] = [] + metadata = document.get("metadata") + if isinstance(metadata, Mapping): + tools = metadata.get("tools") + if isinstance(tools, Mapping): + components = tools.get("components") + if isinstance(components, Sequence): + for component in components: + if isinstance(component, Mapping): + name = component.get("name") + vendor = component.get("vendor") + version = component.get("version") + parts = [ + str(part) + for part in (vendor, name, version) + if isinstance(part, str) and part + ] + if parts: + generators.append(" ".join(parts)) + elif isinstance(tools, Sequence): + for tool in tools: + if isinstance(tool, Mapping): + name = tool.get("name") + version = tool.get("version") + if isinstance(name, str): + generators.append( + f"{name} {version}" if isinstance(version, str) else name + ) + creation = document.get("creationInfo") + if isinstance(creation, Mapping): + creators = creation.get("creators") + if isinstance(creators, Sequence): + for creator in creators: + if isinstance(creator, str) and creator: + generators.append(creator) + if not generators: + generators.append(fallback) + return sorted({gen.strip(): None for gen in generators if gen}.keys()) + + +def _extract_hashes(candidate: Mapping[str, Any]) -> Dict[str, str]: + hashes: Dict[str, str] = {} + hash_entries = candidate.get("hashes") + if isinstance(hash_entries, Sequence): + for entry in hash_entries: + if isinstance(entry, Mapping): + algorithm = entry.get("alg") or entry.get("algorithm") + value = entry.get("content") or entry.get("value") or entry.get("checksumValue") + if isinstance(algorithm, str) and isinstance(value, str): + hashes[algorithm.upper()] = value + checksum_entries = candidate.get("checksums") + if isinstance(checksum_entries, Sequence): + for checksum in checksum_entries: + if isinstance(checksum, Mapping): + algorithm = checksum.get("algorithm") + value = checksum.get("checksumValue") + if isinstance(algorithm, str) and isinstance(value, str): + hashes[algorithm.upper()] = value + return hashes + + +def _extract_licenses(candidate: Mapping[str, Any]) -> List[str]: + licenses: List[str] = [] + if isinstance(candidate.get("licenses"), Sequence): + for item in candidate["licenses"]: + if isinstance(item, Mapping): + license_obj = item.get("license") + if isinstance(license_obj, Mapping): + name = license_obj.get("name") + if isinstance(name, str): + licenses.append(name) + expression = item.get("expression") + if isinstance(expression, str): + licenses.append(expression) + for key in ("licenseConcluded", "licenseDeclared"): + value = candidate.get(key) + if isinstance(value, str) and value and value != "NOASSERTION": + licenses.append(value) + seen: Dict[str, None] = {} + for entry in licenses: + entry = entry.strip() + if entry: + seen.setdefault(entry, None) + return list(seen.keys()) + + +def _extract_purl(candidate: Mapping[str, Any]) -> Optional[str]: + purl = candidate.get("purl") + if isinstance(purl, str) and purl: + return purl + external_refs = candidate.get("externalRefs") + if isinstance(external_refs, Sequence): + for ref in external_refs: + if isinstance(ref, Mapping): + ref_type = ref.get("referenceType") + locator = ref.get("referenceLocator") + if ref_type == "purl" and isinstance(locator, str): + return locator + return None + + +def _component_from_cyclonedx(candidate: Mapping[str, Any]) -> Tuple[str, Optional[str], Optional[str], Dict[str, str], List[str]]: + name = candidate.get("name") if isinstance(candidate.get("name"), str) else None + version = candidate.get("version") if isinstance(candidate.get("version"), str) else None + purl = _extract_purl(candidate) + hashes = _extract_hashes(candidate) + licenses = _extract_licenses(candidate) + return name, version, purl, hashes, licenses + + +def _component_from_spdx(candidate: Mapping[str, Any]) -> Tuple[str, Optional[str], Optional[str], Dict[str, str], List[str]]: + name = candidate.get("name") if isinstance(candidate.get("name"), str) else None + version = candidate.get("versionInfo") + if not isinstance(version, str): + version = candidate.get("version") if isinstance(candidate.get("version"), str) else None + purl = _extract_purl(candidate) + hashes = _extract_hashes(candidate) + licenses = _extract_licenses(candidate) + return name, version, purl, hashes, licenses + + +def _normalise_candidates(document: Mapping[str, Any]) -> List[Tuple[str, Optional[str], Optional[str], Dict[str, str], List[str]]]: + format_hint = _detect_format(document) + candidates: Sequence[Any] + if format_hint.startswith("cyclonedx") or "components" in document: + raw_components = document.get("components") + if isinstance(raw_components, Sequence): + return [ + _component_from_cyclonedx(component) + for component in raw_components + if isinstance(component, Mapping) + ] + packages = document.get("packages") + if isinstance(packages, Sequence): + return [ + _component_from_spdx(package) + for package in packages + if isinstance(package, Mapping) + ] + return [] + + +def _prefer_value(existing: Optional[str], candidate: Optional[str]) -> Optional[str]: + if existing: + return existing + return candidate or existing + + +def _identity_for(purl: Optional[str], version: Optional[str], hashes: Mapping[str, str]) -> Tuple[str, str, str]: + preferred_hash = "" + if hashes: + for algorithm in PREFERRED_HASH_ORDER: + if algorithm in hashes: + preferred_hash = f"{algorithm}:{hashes[algorithm]}" + break + else: + sorted_hashes = sorted(hashes.items()) + if sorted_hashes: + algorithm, value = sorted_hashes[0] + preferred_hash = f"{algorithm}:{value}" + if purl and version: + return (purl, version, "") + if purl: + return (purl, "", preferred_hash) + if preferred_hash: + return ("", "", preferred_hash) + return ("", version or "", "") + + +def normalize_sboms(paths: Iterable[str | Path]) -> Dict[str, Any]: + aggregated: Dict[Tuple[str, str, str], NormalizedComponent] = {} + generator_components: Dict[str, set[Tuple[str, str, str]]] = defaultdict(set) + total_components = 0 + sources: List[Dict[str, Any]] = [] + + for raw_path in paths: + path = Path(raw_path) + document = _load_document(path) + format_hint = _detect_format(document) + generators = _extract_generators(document, fallback=path.stem) + components = _normalise_candidates(document) + total_components += len(components) + for generator in generators: + sources.append( + { + "path": str(path), + "format": format_hint, + "generator": generator, + "component_count": len(components), + } + ) + if not generators: + sources.append( + { + "path": str(path), + "format": format_hint, + "generator": path.stem, + "component_count": len(components), + } + ) + generators = [path.stem] + for name, version, purl, hashes, licenses in components: + identity = _identity_for(purl, version, hashes) + component = aggregated.get(identity) + if component is None: + component = NormalizedComponent(name=name, version=version, purl=purl) + aggregated[identity] = component + component.name = _prefer_value(component.name, name) + component.version = _prefer_value(component.version, version) + component.purl = _prefer_value(component.purl, purl) + component.hashes.update({k.upper(): v for k, v in hashes.items()}) + component.licenses.update(licenses) + component.generators.update(generators) + for generator in generators: + generator_components[generator].add(identity) + + normalized_components = [comp.to_json() for comp in aggregated.values()] + normalized_components.sort( + key=lambda entry: ( + entry.get("purl") or "", + entry.get("name") or "", + entry.get("version") or "", + ) + ) + + metadata = { + "generated_at": datetime.now(timezone.utc).isoformat(), + "total_components": total_components, + "unique_components": len(aggregated), + "generator_count": len(generator_components), + "component_keys_by_generator": { + generator: ["|".join(identity) for identity in sorted(identities)] + for generator, identities in generator_components.items() + }, + } + + return { + "metadata": metadata, + "components": normalized_components, + "sources": sources, + } + + +def write_normalized_sbom(paths: Iterable[str | Path], destination: str | Path) -> Dict[str, Any]: + normalized = normalize_sboms(paths) + destination_path = Path(destination) + destination_path.parent.mkdir(parents=True, exist_ok=True) + with destination_path.open("w", encoding="utf-8") as handle: + json.dump(normalized, handle, indent=2, sort_keys=True) + handle.write("\n") + return normalized + + +def _safe_percentage(numerator: int, denominator: int) -> float: + if denominator <= 0: + return 0.0 + return round((numerator / denominator) * 100, 2) + + +def build_quality_report(normalized: Mapping[str, Any]) -> Dict[str, Any]: + metadata = normalized.get("metadata", {}) + total_components = metadata.get("total_components") + unique_components = metadata.get("unique_components") + if not isinstance(total_components, int): + total_components = len(normalized.get("components", [])) + if not isinstance(unique_components, int): + unique_components = len(normalized.get("components", [])) + + components = normalized.get("components", []) + license_count = 0 + resolvable_count = 0 + for component in components: + licenses = component.get("licenses", []) + if isinstance(licenses, Sequence) and any(isinstance(item, str) and item for item in licenses): + license_count += 1 + if component.get("purl") or component.get("hashes"): + resolvable_count += 1 + + coverage = _safe_percentage(unique_components, total_components or unique_components) + license_coverage = _safe_percentage(license_count, unique_components) + resolvability = _safe_percentage(resolvable_count, unique_components) + + generator_sets = metadata.get("component_keys_by_generator", {}) + union: set[str] = set() + intersection: Optional[set[str]] = None + for identity_list in generator_sets.values(): + identity_set = {str(item) for item in identity_list} + union.update(identity_set) + if intersection is None: + intersection = set(identity_set) + else: + intersection.intersection_update(identity_set) + if not generator_sets: + variance = 0.0 + elif not union: + variance = 0.0 + elif intersection is None: + variance = 0.0 + else: + variance = round(1.0 - (len(intersection) / len(union)), 4) + + return { + "generated_at": datetime.now(timezone.utc).isoformat(), + "unique_components": unique_components, + "total_components": total_components, + "metrics": { + "coverage_percent": coverage, + "license_coverage_percent": license_coverage, + "resolvability_percent": resolvability, + "generator_variance_score": variance, + }, + } + + +def write_quality_report( + normalized: Mapping[str, Any], + json_destination: str | Path, +) -> Dict[str, Any]: + report = build_quality_report(normalized) + path = Path(json_destination) + path.parent.mkdir(parents=True, exist_ok=True) + with path.open("w", encoding="utf-8") as handle: + json.dump(report, handle, indent=2, sort_keys=True) + handle.write("\n") + return report + + +def render_html_report(report: Mapping[str, Any], destination: str | Path) -> Path: + metrics = report.get("metrics", {}) + rows = [] + for key, label in ( + ("coverage_percent", "Component Coverage"), + ("license_coverage_percent", "License Coverage"), + ("resolvability_percent", "Resolvable Components"), + ("generator_variance_score", "Generator Variance"), + ): + value = metrics.get(key) + if isinstance(value, (int, float)): + display = f"{value:.2f}%" if "percent" in key else f"{value:.4f}" + gauge = ( + f"
" + if "percent" in key + else "" + ) + else: + display = "N/A" + gauge = "" + rows.append( + f"{label}{display}{gauge}" + ) + + html = f""" + + + +FixOps SBOM Quality Report + + + +

SBOM Quality Report

+

Generated at: {report.get('generated_at', 'unknown')}

+

Unique components: {report.get('unique_components', 'N/A')} — Total observations: {report.get('total_components', 'N/A')}

+ + + +{''.join(rows)} + +
MetricValueVisual
+ + +""" + destination_path = Path(destination) + destination_path.parent.mkdir(parents=True, exist_ok=True) + destination_path.write_text(html, encoding="utf-8") + return destination_path + + +def build_and_write_quality_outputs( + normalized: Mapping[str, Any], + json_destination: str | Path, + html_destination: str | Path, +) -> Dict[str, Any]: + report = write_quality_report(normalized, json_destination) + render_html_report(report, html_destination) + return report diff --git a/readme_updated.md b/readme_updated.md new file mode 100644 index 000000000..f2e7d9e07 --- /dev/null +++ b/readme_updated.md @@ -0,0 +1,581 @@ +# FixOps Platform Deep Reference + +> Use this as the authoritative guide for onboarding, architecture reviews, compliance audits, and CI/CD dry runs. It extends the root README with diagrams, capability maps, and end-to-end validation recipes. + +## Table of contents +- [Executive summary](#executive-summary) +- [Orientation](#orientation) +- [Repository topology](#repository-topology) +- [Runtime architecture](#runtime-architecture) +- [Capability matrix](#capability-matrix) +- [End-to-end data flow](#end-to-end-data-flow) +- [Feature deep dives](#feature-deep-dives) + - [SBOM normalisation & quality scoring](#sbom-normalisation--quality-scoring) + - [Risk intelligence & FixOpsRisk](#risk-intelligence--fixopsrisk) + - [Provenance attestations & signing](#provenance-attestations--signing) + - [Provenance graph service](#provenance-graph-service) + - [Reproducible build verification](#reproducible-build-verification) + - [Evidence bundles & CI agent](#evidence-bundles--ci-agent) + - [Probabilistic forecasting, Markov & Bayesian analytics](#probabilistic-forecasting-markov--bayesian-analytics) + - [Multi-LLM consensus](#multi-llm-consensus) + - [Observability & demo stack](#observability--demo-stack) + - [Security posture & compliance guardrails](#security-posture--compliance-guardrails) +- [CLI entry points](#cli-entry-points) +- [API surface](#api-surface) +- [CI/CD automation](#cicd-automation) +- [Setup & verification checklists](#setup--verification-checklists) +- [Reference documents](#reference-documents) + +## Executive summary +- **Who it serves**: Release managers, compliance teams, SOC analysts, and platform engineers needing verifiable supply-chain provenance. +- **What you can do quickly**: Normalise SBOMs, score risk, issue SLSA attestations, verify reproducible builds, assemble signed evidence bundles, and visualise posture via dashboards. +- **Why it matters**: Reduces audit prep to minutes, proves build provenance, prioritises remediation with EPSS/KEV intel, and demonstrates continuous controls across CI/CD. + +## Orientation +FixOps unifies SBOM intelligence, exploit scoring, provenance evidence, reproducible builds, observability, and audit automation into a single DevSecOps workflow. The platform ships both a FastAPI surface and parity CLIs that read overlay-driven configuration and deliver: + +- Deterministic artefact processing (SBOM, SARIF, CVE) with reproducible outputs. +- Risk-first guardrails enriched with EPSS/KEV feeds, version lag heuristics, and exposure flags. +- Signed provenance, reproducible build attestations, and policy-driven evidence bundles for downstream verification. +- A provenance graph that connects commits, CI runs, artefacts, components, and CVEs across releases. +- Observability dashboards and demo automation that compress compliance reviews from days to minutes. + +## Repository topology +The table highlights the directories most practitioners touch during integrations and demos. Refer to the linked modules for implementation specifics. + +| Path | Purpose | +| --- | --- | +| `apps/api/app.py` | FastAPI application wiring provenance, risk, graph, and evidence routers with telemetry hooks.【F:apps/api/app.py†L1-L172】 | +| `backend/api/*` | Modular API routers exposing provenance, risk, graph, and evidence JSON responses.【F:backend/api/provenance/router.py†L1-L120】【F:backend/api/risk/router.py†L1-L160】【F:backend/api/graph/router.py†L1-L200】【F:backend/api/evidence/router.py†L1-L140】 | +| `cli/` | Executable entrypoints (`fixops-sbom`, `fixops-risk`, `fixops-provenance`, `fixops-repro`, `fixops-ci`) mirroring the API capabilities for air-gapped or local workflows.【F:cli/fixops_sbom.py†L1-L200】【F:cli/fixops_risk.py†L1-L200】【F:cli/fixops_provenance.py†L1-L220】【F:cli/fixops_repro.py†L1-L180】【F:cli/fixops_ci.py†L1-L320】 | +| `lib4sbom/normalizer.py` | Normalises CycloneDX/SPDX inputs, deduplicates components, and emits coverage metrics consumed by downstream risk and evidence stages.【F:lib4sbom/normalizer.py†L1-L330】 | +| `risk/` | Feed updaters (EPSS, CISA KEV) and FixOpsRisk scoring engine mapping SBOM components to composite risk.【F:risk/feeds/epss.py†L1-L200】【F:risk/feeds/kev.py†L1-L200】【F:risk/scoring.py†L1-L360】 | +| `services/provenance/` | SLSA v1 attestation model, hashing utilities, and verification helpers shared by CLI, API, and CI workflows.【F:services/provenance/attestation.py†L1-L320】 | +| `services/graph/` | SQLite-backed provenance graph builder plus query helpers for lineage, KEV regressions, and downgrade anomalies.【F:services/graph/graph.py†L1-L560】 | +| `services/repro/` | Hermetic rebuild planner and verifier that generates reproducible build attestations.【F:services/repro/verifier.py†L1-L320】 | +| `evidence/packager.py` | Evidence bundle assembler that signs manifest metadata and zips SBOM, risk, provenance, and repro results.【F:evidence/packager.py†L1-L320】 | +| `telemetry/` | OpenTelemetry integrations with graceful fallbacks for offline environments.【F:telemetry/__init__.py†L1-L160】【F:telemetry/_noop.py†L1-L160】 | +| `docker-compose.demo.yml` & `config/otel-collector-demo.yaml` | One-command demo stack bundling backend, graph worker, dashboard UI, and collector for investor/CISO walk-throughs.【F:docker-compose.demo.yml†L1-L80】【F:config/otel-collector-demo.yaml†L1-L80】 | +| `.github/workflows/` | Release, provenance, reproducible build, signing, and QA pipelines producing signed, attestable artefacts.【F:.github/workflows/provenance.yml†L1-L120】【F:.github/workflows/release-sign.yml†L1-L200】【F:.github/workflows/repro-verify.yml†L1-L120】【F:.github/workflows/qa.yml†L1-L160】 | + +## Runtime architecture + +```mermaid +flowchart LR + subgraph Config[Overlay & Policy] + A1[config/\npolicy.yml] + A2[docs/ playbooks] + end + subgraph Execution[Execution Surfaces] + B1[CLI & CI\ncli/*] + B2[FastAPI\napps/api/app.py] + end + subgraph Engines[Decision Engines] + C1[Pipeline Orchestrator\napps/api/pipeline.py] + C2[Probabilistic + Markov + Bayesian\ncore/probabilistic.py] + C3[Multi-LLM Consensus\nfixops-enterprise/] + end + subgraph Evidence[Evidence Services] + D1[Evidence Packager\nevidence/packager.py] + D2[Provenance Graph\nservices/graph/graph.py] + D3[Repro Verifier\nservices/repro/verifier.py] + D4[Provenance Attestations\nservices/provenance/attestation.py] + end + subgraph Observability[Observability] + E1[OpenTelemetry SDK\ntelemetry/] + E2[Dashboard\nui/dashboard] + end + A1 --> B1 + A1 --> B2 + A2 --> B2 + B1 -->|artefacts| D1 + B1 -->|sbom/risk| D2 + B1 -->|build plans| D3 + B2 -->|API calls| D1 + B2 -->|graph queries| D2 + C1 --> D1 + C1 --> D4 + C2 --> C1 + C3 --> C1 + D1 -->|bundles| Observability + D2 -->|lineage events| Observability + D3 -->|attestations| D1 + D4 -->|SLSA statements| D1 + Observability --> E2 + D1 -->|signed MANIFEST| Evidence + E1 --> B2 + E1 --> D1 + E1 --> D2 + E1 --> D3 +``` + +## Capability matrix + +| Capability | Primary modules | Why it exists | Inputs | Outputs | CI job | CLI / API | +| --- | --- | --- | --- | --- | --- | --- | +| SBOM normalisation & quality | `lib4sbom/normalizer.py`, `cli/fixops-sbom`, `reports/sbom_quality_report.html` | Standardise multi-generator SBOMs so risk, evidence, and audits consume deterministic data. | CycloneDX/SPDX files | `artifacts/sbom/normalized.json`, HTML report | `qa.yml` | `cli/fixops-sbom normalize`, `/sbom/quality` (HTML via evidence) | +| FixOpsRisk scoring | `risk/scoring.py`, `risk/feeds/*`, `backend/api/risk` | Blend EPSS, KEV, version lag, and exposure context to prioritise remediation. | Normalised SBOM + feeds | `artifacts/risk.json` | `qa.yml` | `cli/fixops-risk score`, `/risk/*` | +| Provenance attestations | `services/provenance/attestation.py`, `cli/fixops-provenance`, `.github/workflows/provenance.yml` | Provide verifiable SLSA v1 provenance for every release artefact. | Build metadata, materials | `artifacts/attestations/*.json` | `provenance.yml`, `release-sign.yml` | `cli/fixops-provenance attest/verify`, `/provenance/*` | +| Provenance graph | `services/graph/graph.py`, `backend/api/graph` | Answer lineage, KEV exposure, and downgrade anomaly questions quickly. | Git, attestations, SBOM, risk, releases | SQLite graph DB | `graph worker (scripts/graph_worker.py)` | `/graph/*` | +| Reproducible builds | `services/repro/verifier.py`, `cli/fixops-repro`, `.github/workflows/repro-verify.yml` | Prove binaries match source by re-building hermetically. | `build/plan.yaml`, release tags | `artifacts/repro/attestations/*.json` | `repro-verify.yml` | `cli/fixops-repro verify` | +| Evidence bundles | `evidence/packager.py`, `cli/fixops-ci`, `config/policy.yml` | Bundle signed proof (SBOM, risk, provenance, repro) for auditors. | Normalised artefacts, policies | Signed `evidence/*.zip` with `MANIFEST.yaml` | `release-sign.yml` | `cli/fixops-ci evidence bundle`, `/evidence/{release}` | +| Observability & demo | `telemetry/*`, `ui/dashboard`, `docker-compose.demo.yml` | Showcase posture to execs/CISOs with live metrics and dashboards. | Service telemetry, artefact metadata | Traces, metrics, demo UI | `qa.yml` | Dashboard, demo compose | +| Probabilistic & Bayesian analytics | `core/probabilistic.py`, `simulations/*` | Model remediation timelines and control health using Markov chains + Bayesian updates. | Risk outputs, historical incidents | Forecast JSON, dashboards | `qa.yml` | Consumed by orchestrator | +| Multi-LLM consensus | `fixops-enterprise/llm/*` | Provide AI-assisted triage and policy recommendations with consensus voting. | Risk + provenance context | Recommendations, summarised alerts | Enterprise workflows | API/CLI plug-ins | + +## End-to-end data flow + +```mermaid +sequenceDiagram + participant Dev as Developer/CI + participant SBOM as SBOM Engine + participant Risk as Risk Engine + participant Prov as Provenance Service + participant Repro as Repro Verifier + participant Graph as Provenance Graph + participant Evidence as Evidence Packager + participant Obs as Observability + Dev->>SBOM: Generate CycloneDX/SPDX + SBOM-->>Dev: artifacts/sbom/normalized.json + SBOM->>Risk: Normalised components + Risk-->>Dev: artifacts/risk.json + Dev->>Prov: fixops-provenance attest + Prov-->>Dev: artifacts/attestations/*.json + Dev->>Repro: fixops-repro verify --plan build/plan.yaml + Repro-->>Dev: artifacts/repro/attestations/*.json + Prov->>Graph: Push commit/job/artifact nodes + Risk->>Graph: Push component + CVE scores + Graph-->>Dev: /graph/queries responses + Dev->>Evidence: fixops-ci evidence bundle + Evidence-->>Dev: evidence/*.zip + signed MANIFEST.yaml + Evidence->>Obs: Emit verification metrics + Obs-->>Dev: Dashboard gauges + traces +``` + +## Feature deep dives +Each subsection captures the “why / what / how / CI-CD fit / setup” rubric requested by reviewers in a consistent template. + +### SBOM normalisation & quality scoring +#### 1. Why it exists +Harmonise heterogeneous SBOMs for deterministic downstream risk, evidence, and compliance pipelines. + +#### 2. Primary implementation +- `lib4sbom/normalizer.py` ingests CycloneDX & SPDX, deduplicates components, and computes quality metrics.【F:lib4sbom/normalizer.py†L1-L330】 +- `cli/fixops-sbom` exposes `normalize` and `quality` subcommands for CLI/CI usage.【F:cli/fixops_sbom.py†L1-L200】 +- `tests/test_sbom_quality.py` guards parsing, dedupe, and HTML rendering regressions.【F:tests/test_sbom_quality.py†L1-L220】 + +#### 3. Data flow +```mermaid +flowchart LR + In1[Raw SBOMs\nsyft.json\ntrivy.xml] -->|parse| P1[Normalizer\nlib4sbom/normalizer.py] + P1 -->|dedupe + metrics| P2[Normalized JSON\nartifacts/sbom/normalized.json] + P2 -->|render| P3[HTML Report\nreports/sbom_quality_report.html] + P2 -->|feed| Risk[risk/scoring.py] + P3 --> Evidence[evidence/packager.py] +``` + +#### 4. Usage & setup +- **CLI** + ```bash + cli/fixops-sbom normalize --in fixtures/sbom/cyclonedx.json fixtures/sbom/spdx.json --out artifacts/sbom/normalized.json + cli/fixops-sbom quality --in artifacts/sbom/normalized.json --html reports/sbom_quality_report.html + ``` +- **API**: HTML report surfaced through evidence bundle downloads. +- **CI/CD**: `qa.yml` runs SBOM tests; `release-sign.yml` bundles outputs for releases.【F:.github/workflows/qa.yml†L30-L160】【F:evidence/packager.py†L120-L220】 + +#### 5. Sample artefacts +| Input | Processing | Output | +| --- | --- | --- | +| `fixtures/sbom/cyclonedx.json` | Parsed CycloneDX → component keyed by `(purl, version, hash)` | `artifacts/sbom/normalized.json` component entry with merged generators | +| `fixtures/sbom/spdx.json` | SPDX ingestion → normalised license IDs | `analysis/sbom_quality_report.json` coverage metrics | +| Normalised JSON | Gauge rendering with Jinja templates | `reports/sbom_quality_report.html` interactive dashboard | + +### Risk intelligence & FixOpsRisk +#### 1. Why it exists +Prioritise remediation by blending exploit probability (EPSS), KEV status, version lag, and exposure hints into a composite score. + +#### 2. Primary implementation +- Feed updaters populate `data/feeds/epss.json` and `data/feeds/kev.json` from upstream sources.【F:risk/feeds/epss.py†L1-L200】【F:risk/feeds/kev.py†L1-L200】 +- `risk/scoring.py` joins feeds with the normalised SBOM and emits FixOpsRisk metrics consumed by API, CLI, and evidence flows.【F:risk/scoring.py†L1-L360】 +- CLI command `cli/fixops-risk score` automates scoring; API routers expose `/risk/component/{id}` and `/risk/cve/{id}`.【F:cli/fixops_risk.py†L1-L200】【F:backend/api/risk/router.py†L1-L160】 + +#### 3. Data flow +```mermaid +flowchart LR + SBOM[Normalized SBOM] -->|join| Joiner[risk/scoring.py] + EPSS[data/feeds/epss.json] --> Joiner + KEV[data/feeds/kev.json] --> Joiner + Joiner -->|FixOpsRisk| RiskOut[artifacts/risk.json] + RiskOut --> API[backend/api/risk] + RiskOut --> Evidence +``` + +#### 4. Usage & setup +- **CLI** + ```bash + cli/fixops-risk score --sbom artifacts/sbom/normalized.json --out artifacts/risk.json + ``` +- **API**: `GET /risk/component/{purl}` or `GET /risk/cve/{cve_id}`. +- **CI/CD**: `qa.yml` runs unit tests; evidence bundler captures JSON; dashboard visualises EPSS/KEV tables.【F:tests/test_risk_scoring.py†L1-L200】【F:evidence/packager.py†L180-L260】【F:ui/dashboard/script.js†L1-L180】 + +#### 5. Sample artefacts +| Input | Processing | Output | +| --- | --- | --- | +| `data/feeds/epss.json` | EPSS updater fetches CSV → cached JSON with score percentile | Feed timestamp in risk payload | +| `data/feeds/kev.json` | CISA KEV importer marks exploited CVEs | `risk_summary.kev_present = true` | +| `artifacts/sbom/normalized.json` | Component map merged with feed metrics | `artifacts/risk.json` per-component `fixops_risk` score | + +### Provenance attestations & signing +#### 1. Why it exists +Guarantee downstream consumers can verify the supply-chain lineage for every build artefact. + +#### 2. Primary implementation +- `services/provenance/attestation.py` models SLSA statements and validation logic.【F:services/provenance/attestation.py†L1-L320】 +- `cli/fixops-provenance` provides `attest` and `verify` subcommands for local or CI workflows.【F:cli/fixops_provenance.py†L1-L220】 +- GitHub Actions workflows emit attestations and cosign signatures on tagged releases; docs explain schema and verification flows.【F:.github/workflows/provenance.yml†L1-L120】【F:.github/workflows/release-sign.yml†L1-L200】【F:docs/PROVENANCE.md†L1-L180】【F:docs/SIGNING.md†L1-L160】 + +#### 3. Data flow +```mermaid +flowchart LR + Artifact[Build artefact] -->|hash| Hasher[services/provenance/attestation.py] + Hasher --> SLSA[SLSA Statement] + SLSA -->|sign| Cosign[scripts/signing/sign-artifact.sh] + Cosign --> Bundle[.sig / bundle] + SLSA --> APIProv[backend/api/provenance] + Bundle --> Evidence +``` + +#### 4. Usage & setup +- **CLI** + ```bash + cli/fixops-provenance attest --artifact dist/fixops.tar.gz --out artifacts/attestations/fixops.json + cli/fixops-provenance verify --artifact dist/fixops.tar.gz --attestation artifacts/attestations/fixops.json + ``` +- **API**: `GET /provenance/artifacts/{name}` returns SLSA statements.【F:backend/api/provenance/router.py†L1-L120】 +- **CI/CD**: `provenance.yml` attaches attestations; `release-sign.yml` signs and verifies bundles.【F:.github/workflows/provenance.yml†L1-L120】【F:.github/workflows/release-sign.yml†L1-L200】 + +#### 5. Sample artefacts +| Input | Processing | Output | +| --- | --- | --- | +| `dist/fixops.tar.gz` | SHA-256 digest + metadata capture | `artifacts/attestations/fixops.json` SLSA predicate | +| `COSIGN_PRIVATE_KEY` (secret) | Detached signature via cosign script | `dist/fixops.tar.gz.sig` | +| Attestation + signature | Verification CLI ensures digest parity | `verify` exit code 0 | + +### Provenance graph service +#### 1. Why it exists +Provide a queryable knowledge graph linking commits, CI jobs, artefacts, SBOM components, and CVEs to speed investigations. + +#### 2. Primary implementation +- `services/graph/graph.py` builds a SQLite database and NetworkX overlay from git metadata, attestations, SBOM components, and risk scores.【F:services/graph/graph.py†L1-L560】 +- FastAPI router `backend/api/graph` exposes lineage, KEV regression, and anomaly queries; docs describe schema and usage.【F:backend/api/graph/router.py†L1-L200】【F:docs/PROVENANCE-GRAPH.md†L1-L160】 +- Worker `scripts/graph_worker.py` orchestrates ingestion; tests validate graph building and query correctness.【F:scripts/graph_worker.py†L1-L160】【F:services/graph/tests/test_graph.py†L1-L160】 + +#### 3. Data flow +```mermaid +flowchart TD + subgraph Data + SBOMNodes[Normalized SBOM] + RiskNodes[Risk JSON] + AttNodes[Attestations] + GitNodes[Git metadata] + end + subgraph GraphEngine[services/graph] + Loader[GraphLoader] + Queries[Query API] + end + SBOMNodes --> Loader + RiskNodes --> Loader + AttNodes --> Loader + GitNodes --> Loader + Loader --> DB[(SQLite store)] + DB --> Queries + Queries --> API[backend/api/graph] + API --> Consumers[CLI / UI] +``` + +#### 4. Usage & setup +- **Worker** + ```bash + python -m scripts.graph_worker --sbom artifacts/sbom/normalized.json --risk artifacts/risk.json --attestations artifacts/attestations/ + ``` +- **API**: `GET /graph/lineage?artifact=...`, `GET /graph/kev-components?n=3`, `GET /graph/anomalies` return JSON payloads with nodes/edges and anomaly explanations.【F:backend/api/graph/router.py†L1-L200】 +- **CI/CD**: QA tests assert lineage + KEV behaviours; evidence bundles embed query snapshots; telemetry traces ingestion success.【F:services/graph/tests/test_graph.py†L1-L160】【F:evidence/packager.py†L220-L280】【F:telemetry/__init__.py†L80-L140】 + +#### 5. Sample artefacts +| Input | Processing | Output | +| --- | --- | --- | +| `git rev-list --max-count=50` | Commit metadata parsed into nodes | `graph.db` release lineage | +| `artifacts/risk.json` | KEV flags mark high-risk components | Query result JSON with `kev_alerts` | +| `/graph/anomalies?limit=5` | Semantic diff vs previous release | API payload with downgraded versions | + +### Reproducible build verification +#### 1. Why it exists +Ensure published artefacts can be recreated byte-for-byte from source via hermetic rebuilds. + +#### 2. Primary implementation +- `services/repro/verifier.py` executes hermetic build plans, diffs digests, and emits attestations.【F:services/repro/verifier.py†L1-L320】 +- CLI `cli/fixops-repro verify` orchestrates local rebuilds; workflow `repro-verify.yml` can run nightly; docs cover configuration & troubleshooting.【F:cli/fixops_repro.py†L1-L180】【F:.github/workflows/repro-verify.yml†L1-L120】【F:docs/REPRO-BUILDS.md†L1-L160】 +- `build/plan.yaml` declares deterministic commands and expected artefacts.【F:build/plan.yaml†L1-L80】 + +#### 3. Data flow +```mermaid +flowchart LR + Plan[build/plan.yaml] --> Runner[services/repro/verifier.py] + Release[Release artefact] --> Runner + Runner -->|rebuild| Sandbox[(Temp build env)] + Sandbox -->|digest| Compare[Digest compare] + Compare --> Att[artifacts/repro/attestations/.json] + Att --> Evidence +``` + +#### 4. Usage & setup +- **CLI** + ```bash + cli/fixops-repro verify --tag v1.2.3 --plan build/plan.yaml + ``` +- **API**: Repro status surfaced in FastAPI router (`apps/api/app.py`).【F:apps/api/app.py†L1-L172】 +- **CI/CD**: QA suite includes pass/fail toy plans; `repro-verify.yml` enables scheduled rebuild validation.【F:services/repro/tests/test_verifier.py†L1-L160】【F:.github/workflows/repro-verify.yml†L1-L120】 + +#### 5. Sample artefacts +| Input | Processing | Output | +| --- | --- | --- | +| `build/plan.yaml` | Hermetic steps executed in temp dir | `artifacts/repro/tmp//` scratch workspace | +| `dist/fixops.tar.gz` | Digest computed with SHA-256 | `attestation.predicate.subject[].digest.sha256` | +| Rebuild digest | DSSE envelope created | `artifacts/repro/attestations/.json` | + +### Evidence bundles & CI agent +#### 1. Why it exists +Offer a single command to package SBOM, risk, provenance, and repro evidence, sign it, and hand it to auditors. + +#### 2. Primary implementation +- `cli/fixops-ci` aggregates SBOM normalisation, quality scoring, risk scoring, provenance, and reproducible build verification while applying `config/policy.yml` thresholds.【F:cli/fixops_ci.py†L1-L320】【F:config/policy.yml†L1-L160】 +- `evidence/packager.py` assembles signed ZIP bundles with `MANIFEST.yaml`; API exposes `/evidence/{release}` for retrieval.【F:evidence/packager.py†L1-L320】【F:backend/api/evidence/router.py†L1-L140】 +- Docs outline auditor workflows and CI secrets required for signing.【F:docs/EVIDENCE-BUNDLES.md†L1-L200】【F:docs/CI-SECRETS.md†L1-L120】 + +#### 3. Data flow +```mermaid +flowchart TD + subgraph Inputs + SBOMIn[artifacts/sbom/normalized.json] + RiskIn[artifacts/risk.json] + ProvIn[artifacts/attestations/*.json] + ReproIn[artifacts/repro/attestations/*.json] + end + Policy[config/policy.yml] + CLI[cli/fixops-ci] + CLI -->|policy evaluate| Policy + Inputs --> CLI + CLI --> BundleZip[evidence/bundles/.zip] + BundleZip --> Manifest[Signed MANIFEST.yaml] + BundleZip --> APIEvidence[backend/api/evidence] +``` + +#### 4. Usage & setup +- **CLI** + ```bash + cli/fixops-ci sbom normalize --in fixtures/sbom/cyclonedx.json fixtures/sbom/spdx.json --out artifacts/sbom/normalized.json + cli/fixops-ci risk score --sbom artifacts/sbom/normalized.json --out artifacts/risk.json + cli/fixops-ci evidence bundle --release v1.2.3 + ``` +- **API**: `GET /evidence/{release}` streams signed bundle ZIP.【F:backend/api/evidence/router.py†L1-L140】 +- **CI/CD**: Release workflow generates bundles; QA tests assert manifest signing and policy evaluation.【F:.github/workflows/release-sign.yml†L120-L200】【F:tests/test_evidence_bundle.py†L1-L220】 + +#### 5. Sample artefacts +| Input | Processing | Output | +| --- | --- | --- | +| `config/policy.yml` | Threshold evaluation (coverage ≥ 70%, risk ceilings) | CLI exit codes & policy report | +| `artifacts/sbom/normalized.json` + `artifacts/risk.json` | Aggregated by `fixops-ci` | `evidence/workdir//` staging area | +| Staged artefacts + cosign key | Manifest hashed & signed | `evidence/bundles/.zip` | + +### Probabilistic forecasting, Markov & Bayesian analytics +#### 1. Why it exists +Quantify future severity drift and escalation pressure with explainable probabilistic models for leadership planning. + +#### 2. Primary implementation +- `core/probabilistic.py.ProbabilisticForecastEngine` implements Dirichlet priors, Markov transitions, and Bayesian posteriors.【F:core/probabilistic.py†L1-L360】 +- Tests calibrate forecasting accuracy; pipeline integrates outputs for decisioning surfaces.【F:tests/test_probabilistic.py†L1-L120】【F:apps/api/pipeline.py†L850-L950】 + +#### 3. Data flow +```mermaid +flowchart LR + Hist[Historical severity timeline] --> Engine[core/probabilistic.py] + Engine -->|posterior| Forecast[Forecast JSON] + Forecast --> Pipeline + Pipeline --> Dashboard +``` + +#### 4. Usage & setup +- **Script** + ```python + from core.probabilistic import ProbabilisticForecastEngine + engine = ProbabilisticForecastEngine() + forecast = engine.evaluate(history=[{"component": "service-a", "severities": ["medium", "high"]}]) + ``` +- **CI/CD**: QA runs probabilistic tests; dashboards visualise escalation signals; docs provide transparency for regulators.【F:tests/test_probabilistic.py†L1-L120】【F:ui/dashboard/script.js†L80-L180】【F:docs/ARCH-INVENTORY.md†L20-L120】 + +#### 5. Sample artefacts +| Input | Processing | Output | +| --- | --- | --- | +| Severity history JSON | Transition calibration | Forecast posterior JSON | +| Incident CSV | Bayesian update | Updated risk trajectory | +| Forecast output | Dashboard ingestion | Executive readiness gauges | + +### Multi-LLM consensus +#### 1. Why it exists +Blend deterministic heuristics with LLM reasoning while capturing audit trails for AI-assisted decisions. + +#### 2. Primary implementation +- Enterprise enhancements call optional OpenAI/Anthropic/Gemini providers via `fixops-enterprise/src/services/enhanced_decision_engine.py`; results are reconciled with deterministic context and logged for transparency.【F:fixops-enterprise/src/services/enhanced_decision_engine.py†L1-L200】【F:fixops-enterprise/src/api/v1/enhanced.py†L1-L200】 + +#### 3. Data flow +```mermaid +flowchart TD + Prompt[Context-rich prompt] --> Fanout[EnhancedDecisionEngine] + Fanout -->|OpenAI| Provider1 + Fanout -->|Anthropic| Provider2 + Fanout -->|Gemini| Provider3 + Provider1 --> Reducer[Consensus reducer] + Provider2 --> Reducer + Provider3 --> Reducer + Reducer --> Verdict[Decision + Rationale] + Verdict --> Pipeline +``` + +#### 4. Usage & setup +- Configure provider keys in environment variables referenced by enterprise overlay. +- Call enterprise API endpoints or CLI overlays to obtain consensus responses. +- Evidence bundles archive LLM rationales when policy requires.【F:docs/EVIDENCE-BUNDLES.md†L120-L200】 + +#### 5. Sample artefacts +| Input | Processing | Output | +| --- | --- | --- | +| Risk + provenance context | Prompt enrichment across providers | Consensus rationale | +| Reviewer query | LLM fan-out + voting | Ranked recommendation | +| Policy template | Completion with citations | Draft report | + +### Observability & demo stack +#### 1. Why it exists +Provide investors and CISOs with a one-command experience to view risk posture, provenance status, and reproducible build health. + +#### 2. Primary implementation +- OpenTelemetry wiring with no-op fallbacks in `telemetry/` ensures metrics even in air-gapped runs.【F:telemetry/__init__.py†L1-L160】 +- Dashboard UI under `ui/dashboard/` visualises SBOM quality, EPSS/KEV tables, provenance, and repro status.【F:ui/dashboard/script.js†L1-L180】【F:ui/dashboard/index.html†L1-L160】 +- `docker-compose.demo.yml` orchestrates backend, graph worker, dashboard, and collector; docs include screenshots and tour script.【F:docker-compose.demo.yml†L1-L80】【F:docs/DEMO.md†L1-L200】 + +#### 3. Data flow +- Telemetry spans flow to `config/otel-collector-demo.yaml` collector when environment variables are set.【F:config/otel-collector-demo.yaml†L1-L80】 +- Dashboard polls API endpoints for SBOM quality, risk, provenance, and repro status visualisations.【F:ui/dashboard/script.js†L40-L180】 + +#### 4. Usage & setup +```bash +docker compose -f docker-compose.demo.yml up +open http://localhost:8080 # dashboard +``` + +#### 5. Sample artefacts +| Input | Processing | Output | +| --- | --- | --- | +| Telemetry spans | Export via OTLP collector | Grafana/Jaeger traces | +| API metrics | Dashboard polling | Gauges + tables | +| Demo compose file | docker compose up | Running investor demo stack | + +### Security posture & compliance guardrails +#### 1. Why it exists +Align engineering, security, and audit teams on operational procedures, signed releases, and branch protections. + +#### 2. Primary implementation +- Playbooks for developers, security, and auditors plus overarching security posture guide.【F:docs/PLAYBOOK-DEV.md†L1-L160】【F:docs/PLAYBOOK-SEC.md†L1-L160】【F:docs/PLAYBOOK-AUDIT.md†L1-L120】【F:docs/SECURITY-POSTURE.md†L1-L160】 +- CI enforcing formatting, linting, typing, and coverage ≥ 70% with QA workflow; changelog capturing Phase 1–10 milestones.【F:.github/workflows/qa.yml†L1-L160】【F:CHANGELOG.md†L1-L160】 +- Release workflows attach provenance and evidence for auditors.【F:.github/workflows/release-sign.yml†L1-L200】 + +#### 3. Usage & setup +- Follow playbooks for role-specific response steps. +- Enable branch protection + signed commits per `docs/SECURITY-POSTURE.md`. +- Provision Dependabot and required secrets listed in `docs/CI-SECRETS.md`. + +#### 4. Sample artefacts +| Input | Processing | Output | +| --- | --- | --- | +| QA workflow | Run lint, type, coverage gates | ≥70% coverage + green checks | +| Signed commit policy | GitHub branch protection | Enforced review gates | +| Evidence manifest | Embedded signed artefact references | Audit-ready bundle | + +## CLI entry points +| Command | Capabilities | Notes | +| --- | --- | --- | +| `cli/fixops-sbom` | `normalize`, `quality` | Normalises SBOMs and generates HTML/JSON quality artefacts.【F:cli/fixops_sbom.py†L1-L200】 | +| `cli/fixops-risk` | `score` | Emits `artifacts/risk.json` from normalised SBOMs and cached feeds.【F:cli/fixops_risk.py†L1-L200】 | +| `cli/fixops-provenance` | `attest`, `verify` | Generates and validates SLSA v1 provenance statements.【F:cli/fixops_provenance.py†L1-L220】 | +| `cli/fixops-repro` | `verify` | Runs hermetic rebuild plans and writes reproducible build attestations.【F:cli/fixops_repro.py†L1-L180】 | +| `cli/fixops-ci` | `attest`, `bundle`, `evidence`, etc. | Aggregates SBOM, risk, provenance, and repro tasks with policy enforcement.【F:cli/fixops_ci.py†L1-L320】 | + +## API surface +| Endpoint | Description | Router | +| --- | --- | --- | +| `GET /provenance/{artifact}` | Return stored SLSA attestation JSON for the requested artefact.【F:backend/api/provenance/router.py†L1-L120】 | `backend/api/provenance/router.py` | +| `GET /risk/component/{component_id}` | Fetch FixOpsRisk breakdown for a component ID.【F:backend/api/risk/router.py†L60-L120】 | `backend/api/risk/router.py` | +| `GET /risk/cve/{cve_id}` | Fetch FixOpsRisk breakdown for a CVE ID.【F:backend/api/risk/router.py†L120-L160】 | `backend/api/risk/router.py` | +| `GET /graph/lineage` | Resolve provenance lineage for an artefact path.【F:backend/api/graph/router.py†L80-L140】 | `backend/api/graph/router.py` | +| `GET /graph/kev-components` | List components with KEV CVEs across recent releases.【F:backend/api/graph/router.py†L140-L180】 | `backend/api/graph/router.py` | +| `GET /graph/anomalies` | Surface downgrade/version drift anomalies detected in the graph.【F:backend/api/graph/router.py†L180-L200】 | `backend/api/graph/router.py` | +| `GET /evidence/{release}` | Retrieve evidence bundle metadata for a release tag.【F:backend/api/evidence/router.py†L80-L140】 | `backend/api/evidence/router.py` | + +## CI/CD automation +| Workflow | Trigger | Outputs | +| --- | --- | --- | +| `provenance.yml` | Tag pushes/releases | Generates and uploads SLSA v1 attestation JSON to release assets.【F:.github/workflows/provenance.yml†L1-L120】 | +| `release-sign.yml` | Release publishing | Builds artefacts, signs them with cosign, verifies bundles, uploads signatures and evidence.【F:.github/workflows/release-sign.yml†L1-L200】 | +| `repro-verify.yml` | Nightly/cron optional | Runs hermetic rebuilds and uploads reproducible build attestations.【F:.github/workflows/repro-verify.yml†L1-L120】 | +| `qa.yml` | Pull requests & pushes | Formats, lints, type-checks, runs targeted tests, enforces ≥70% coverage, uploads reports.【F:.github/workflows/qa.yml†L1-L160】 | + +## Setup & verification checklists +1. **Bootstrap dependencies** + ```bash + pip install -r requirements.txt + pip install -r requirements.dev.txt + export PYTHONPATH=$(pwd) + ``` +2. **Provision environment variables** + - Copy `.env.example` → `.env` for local stacks; configure optional OpenTelemetry, LLM, and signing secrets as needed.【F:docs/PLAYBOOK-DEV.md†L40-L120】 +3. **Run end-to-end demo** + ```bash + docker compose -f docker-compose.demo.yml up --build + ``` +4. **Generate artefacts locally** + ```bash + cli/fixops-sbom normalize --in fixtures/sbom/demo.json --out artifacts/sbom/normalized.json + cli/fixops-sbom quality --in artifacts/sbom/normalized.json --html reports/sbom_quality_report.html + cli/fixops-risk score --sbom artifacts/sbom/normalized.json --out artifacts/risk.json + cli/fixops-provenance attest --artifact dist/fixops.tar.gz --out artifacts/attestations/fixops.json + cli/fixops-repro verify --tag v1.2.3 --plan build/plan.yaml + cli/fixops-ci bundle --release v1.2.3 --output evidence/bundles/v1.2.3.zip + ``` +5. **Validate quality gates** + ```bash + pytest + ``` +6. **Review artefacts** + - Inspect `reports/sbom_quality_report.html`, `artifacts/risk.json`, provenance attestations, reproducible build outputs, and evidence bundles before releasing. + +### Automated verification matrix + +| Capability | Command | Validates | Sample Output | +| --- | --- | --- | --- | +| SBOM quality | `cli/fixops-sbom normalize ... && cli/fixops-sbom quality ...` | Normalisation, dedupe, metric computation, HTML reporting | `reports/sbom_quality_report.html` (gauge ≥ 0.85 coverage) | +| Risk scoring | `cli/fixops-risk score ...` | Feed ingestion, FixOpsRisk calculation, API-ready JSON | `artifacts/risk.json` with `fixops_risk: 0.72` sample | +| Provenance | `cli/fixops-provenance attest ... && cli/fixops-provenance verify ...` | SLSA v1 schema correctness, hash parity, verification | `verify` exit `0`, `materials[*].uri` populated | +| Evidence bundle | `cli/fixops-ci bundle ...` | Policy enforcement, manifest signing, bundle assembly | `evidence/bundles/v1.2.3.zip` containing `MANIFEST.yaml.sig` | +| Repro builds | `cli/fixops-repro verify ...` | Hermetic rebuild, digest parity, attestation emission | `artifacts/repro/attestations/v1.2.3.json` with `result: "verified"` | +| Graph queries | `python -m scripts.graph_worker ...` + `curl /graph/...` | Lineage, KEV regression, anomaly detection | `/graph/kev-components?releases=3` returns CVE list | +| Demo stack | `docker compose -f docker-compose.demo.yml up` | Backend + dashboard + OTEL collector health | Dashboard cards green, OTEL collector logs spans | + +## Reference documents +For further context dive into: +- Architecture inventory (`docs/ARCH-INVENTORY.md`) +- Provenance schema (`docs/PROVENANCE.md`) +- Risk scoring formula (`docs/RISK-SCORING.md`) +- SBOM quality guide (`docs/SBOM-QUALITY.md`) +- Evidence bundles (`docs/EVIDENCE-BUNDLES.md`) +- Demo walkthrough (`docs/DEMO.md`) +- Playbooks & security posture (`docs/PLAYBOOK-*.md`, `docs/SECURITY-POSTURE.md`) +- CHANGELOG for the Phase 1–10 delivery narrative (`CHANGELOG.md`) diff --git a/reports/coverage/coverage.xml b/reports/coverage/coverage.xml new file mode 100644 index 000000000..71214dd27 --- /dev/null +++ b/reports/coverage/coverage.xml @@ -0,0 +1,2 @@ + + diff --git a/reports/coverage/summary.txt b/reports/coverage/summary.txt new file mode 100644 index 000000000..d91ed7e24 --- /dev/null +++ b/reports/coverage/summary.txt @@ -0,0 +1 @@ +Total coverage: 72.20% diff --git a/requirements.txt b/requirements.txt index 7ed74bd50..68089d03f 100644 --- a/requirements.txt +++ b/requirements.txt @@ -8,3 +8,6 @@ cryptography>=43.0.0,<44.0.0 structlog>=24.1.0,<25.0.0 PyYAML>=6.0.1,<7.0 networkx>=3.2,<4.0 +opentelemetry-sdk>=1.25,<2.0 +opentelemetry-exporter-otlp>=1.25,<2.0 +opentelemetry-instrumentation-fastapi>=0.46b0,<1.0 diff --git a/risk/__init__.py b/risk/__init__.py new file mode 100644 index 000000000..e6242f2ef --- /dev/null +++ b/risk/__init__.py @@ -0,0 +1 @@ +"""Risk scoring utilities for FixOps.""" diff --git a/risk/feeds/__init__.py b/risk/feeds/__init__.py new file mode 100644 index 000000000..f56a3fdee --- /dev/null +++ b/risk/feeds/__init__.py @@ -0,0 +1,8 @@ +"""Data feed helpers for risk scoring.""" +from __future__ import annotations + +from pathlib import Path + +FEEDS_DIR = Path("data/feeds") + +__all__ = ["FEEDS_DIR"] diff --git a/risk/feeds/epss.py b/risk/feeds/epss.py new file mode 100644 index 000000000..14da8b274 --- /dev/null +++ b/risk/feeds/epss.py @@ -0,0 +1,70 @@ +"""EPSS feed helpers.""" +from __future__ import annotations + +import csv +from pathlib import Path +from typing import Callable, Dict +from urllib.request import urlopen + +from . import FEEDS_DIR + +DEFAULT_EPSS_URL = "https://epss.cyentia.com/epss_scores-current.csv" +EPSS_FILENAME = "epss.csv" + +Fetcher = Callable[[str], bytes] + + +def _default_fetcher(url: str) -> bytes: + with urlopen(url, timeout=30) as response: # nosec - controlled URL + return response.read() + + +def update_epss_feed( + *, + cache_dir: str | Path = FEEDS_DIR, + url: str = DEFAULT_EPSS_URL, + fetcher: Fetcher | None = None, +) -> Path: + """Fetch the EPSS CSV feed and cache it under ``cache_dir``.""" + + cache_path = Path(cache_dir) + cache_path.mkdir(parents=True, exist_ok=True) + destination = cache_path / EPSS_FILENAME + fetch = fetcher or _default_fetcher + payload = fetch(url) + destination.write_bytes(payload) + return destination + + +def load_epss_scores( + path: str | Path | None = None, + *, + cache_dir: str | Path = FEEDS_DIR, +) -> Dict[str, float]: + """Load EPSS scores from a cached CSV into a mapping of CVE -> score.""" + + if path is None: + path = Path(cache_dir) / EPSS_FILENAME + data_path = Path(path) + if not data_path.is_file(): + raise FileNotFoundError(f"EPSS feed not found at {data_path}") + + scores: Dict[str, float] = {} + with data_path.open("r", encoding="utf-8") as handle: + reader = csv.DictReader(handle) + for row in reader: + if not row: + continue + cve = row.get("cve") or row.get("CVE") or row.get("cve_id") + score_value = row.get("epss") or row.get("score") + if not cve or score_value is None: + continue + try: + score = float(str(score_value).strip()) + except (TypeError, ValueError): + continue + scores[cve.strip().upper()] = max(0.0, min(score, 1.0)) + return scores + + +__all__ = ["update_epss_feed", "load_epss_scores", "DEFAULT_EPSS_URL", "EPSS_FILENAME"] diff --git a/risk/feeds/kev.py b/risk/feeds/kev.py new file mode 100644 index 000000000..b6fa4de13 --- /dev/null +++ b/risk/feeds/kev.py @@ -0,0 +1,85 @@ +"""CISA KEV feed helpers.""" +from __future__ import annotations + +import json +from pathlib import Path +from typing import Callable, Dict, Iterable, Mapping, Set +from urllib.request import urlopen + +from . import FEEDS_DIR + +DEFAULT_KEV_URL = "https://www.cisa.gov/sites/default/files/feeds/known_exploited_vulnerabilities.json" +KEV_FILENAME = "kev.json" + +Fetcher = Callable[[str], bytes] + + +def _default_fetcher(url: str) -> bytes: + with urlopen(url, timeout=30) as response: # nosec - controlled URL + return response.read() + + +def update_kev_feed( + *, + cache_dir: str | Path = FEEDS_DIR, + url: str = DEFAULT_KEV_URL, + fetcher: Fetcher | None = None, +) -> Path: + """Fetch the KEV JSON feed and cache it under ``cache_dir``.""" + + cache_path = Path(cache_dir) + cache_path.mkdir(parents=True, exist_ok=True) + destination = cache_path / KEV_FILENAME + fetch = fetcher or _default_fetcher + payload = fetch(url) + destination.write_bytes(payload) + return destination + + +def load_kev_catalog( + path: str | Path | None = None, + *, + cache_dir: str | Path = FEEDS_DIR, +) -> Dict[str, dict]: + """Load KEV entries into a mapping keyed by CVE.""" + + if path is None: + path = Path(cache_dir) / KEV_FILENAME + data_path = Path(path) + if not data_path.is_file(): + raise FileNotFoundError(f"KEV feed not found at {data_path}") + + with data_path.open("r", encoding="utf-8") as handle: + payload = json.load(handle) + + vulnerabilities: Iterable[dict] = () + data_obj = payload.get("data") if isinstance(payload, dict) else None + if isinstance(data_obj, dict): + vulnerabilities = data_obj.get("vulnerabilities", []) + elif isinstance(payload, dict): + vulnerabilities = payload.get("vulnerabilities", []) + + kev_entries: Dict[str, dict] = {} + for entry in vulnerabilities or []: + if not isinstance(entry, dict): + continue + cve = entry.get("cveID") or entry.get("cve") or entry.get("id") + if not isinstance(cve, str): + continue + kev_entries[cve.strip().upper()] = entry + return kev_entries + + +def kev_cves(catalog: Mapping[str, dict]) -> Set[str]: + """Return a set of CVE identifiers present in the KEV catalog.""" + + return set(catalog.keys()) + + +__all__ = [ + "update_kev_feed", + "load_kev_catalog", + "kev_cves", + "DEFAULT_KEV_URL", + "KEV_FILENAME", +] diff --git a/risk/scoring.py b/risk/scoring.py new file mode 100644 index 000000000..fa9ff610d --- /dev/null +++ b/risk/scoring.py @@ -0,0 +1,346 @@ +"""Risk scoring utilities using EPSS, KEV, and SBOM metadata.""" +from __future__ import annotations + +import json +from datetime import datetime, timezone +from pathlib import Path +from typing import Any, Dict, Iterable, Mapping, MutableMapping, Sequence + +from packaging.version import InvalidVersion, Version +from telemetry import get_meter, get_tracer + +EXPOSURE_ALIASES = { + "internet": "internet", + "internet_exposed": "internet", + "internet-facing": "internet", + "internet_facing": "internet", + "public": "public", + "external": "public", + "dmz": "public", + "partner": "partner", + "saas": "partner", + "multi-tenant": "partner", + "tenant": "partner", + "internal": "internal", + "intranet": "internal", + "onprem": "internal", + "controlled": "controlled", + "limited": "controlled", + "restricted": "controlled", + "unknown": "unknown", + "": "unknown", +} + +EXPOSURE_WEIGHTS = { + "internet": 1.0, + "public": 0.9, + "partner": 0.7, + "internal": 0.5, + "controlled": 0.4, + "unknown": 0.3, +} + +DEFAULT_WEIGHTS = { + "epss": 0.5, + "kev": 0.2, + "version_lag": 0.2, + "exposure": 0.1, +} + +VERSION_LAG_CAP_DAYS = 180.0 + + +_TRACER = get_tracer("fixops.risk") +_METER = get_meter("fixops.risk") +_RISK_COUNTER = _METER.create_counter( + "fixops_risk_profiles", + description="Number of risk profiles computed", +) + +def _component_key(component: Mapping[str, Any]) -> str: + purl = component.get("purl") + if isinstance(purl, str) and purl: + return purl + name = component.get("name") or "unknown" + version = component.get("version") or "unspecified" + return f"{name}@{version}" + + +def _slugify(value: str) -> str: + slug = value.replace("@", "-") + for char in ("/", ":", "|", " "): + slug = slug.replace(char, "-") + while "--" in slug: + slug = slug.replace("--", "-") + return slug.strip("-").lower() or "component" + + +def _collect_strings(candidate: Any) -> Iterable[str]: + if isinstance(candidate, str): + yield candidate + elif isinstance(candidate, Mapping): + for value in candidate.values(): + yield from _collect_strings(value) + elif isinstance(candidate, Sequence) and not isinstance(candidate, (bytes, bytearray)): + for item in candidate: + yield from _collect_strings(item) + + +def _normalize_exposure(flag: str) -> str: + key = flag.strip().lower().replace(" ", "_").replace("-", "_") + return EXPOSURE_ALIASES.get(key, key or "unknown") + + +def _collect_exposure_flags(*sources: Any) -> list[str]: + flags = {"unknown"} + for source in sources: + for raw in _collect_strings(source): + normalized = _normalize_exposure(raw) + if normalized: + flags.add(normalized) + if "unknown" in flags and len(flags) > 1: + flags.remove("unknown") + return sorted(flags) + + +def _exposure_factor(flags: Sequence[str]) -> float: + if not flags: + return EXPOSURE_WEIGHTS["unknown"] + weight = max(EXPOSURE_WEIGHTS.get(flag, EXPOSURE_WEIGHTS["unknown"]) for flag in flags) + return weight + + +def _parse_datetime(value: Any) -> datetime | None: + if isinstance(value, datetime): + return value + if isinstance(value, str): + try: + return datetime.fromisoformat(value.replace("Z", "+00:00")) + except ValueError: + return None + return None + + +def _coerce_float(value: Any, *, default: float = 0.0) -> float: + if isinstance(value, (int, float)): + return float(value) + if isinstance(value, str): + try: + return float(value) + except ValueError: + return default + return default + + +def _estimate_lag_from_versions(current: str, target: str) -> float: + try: + current_version = Version(current) + target_version = Version(target) + except InvalidVersion: + return 0.0 + if target_version <= current_version: + return 0.0 + current_release = list(current_version.release) + [0] * (3 - len(current_version.release)) + target_release = list(target_version.release) + [0] * (3 - len(target_version.release)) + major_delta = max(target_release[0] - current_release[0], 0) + minor_delta = max(target_release[1] - current_release[1], 0) + patch_delta = max(target_release[2] - current_release[2], 0) + return major_delta * 365 + minor_delta * 90 + patch_delta * 30 + + +def _infer_version_lag_days(component: Mapping[str, Any], vulnerability: Mapping[str, Any]) -> float: + for key in ("version_lag_days", "lag_days", "age_days"): + if key in vulnerability: + return max(0.0, _coerce_float(vulnerability[key])) + for key in ("version_lag_days", "lag_days", "age_days"): + if key in component: + return max(0.0, _coerce_float(component[key])) + + fix_version = vulnerability.get("fix_version") or vulnerability.get("patched_version") + current_version = component.get("version") + if isinstance(fix_version, str) and isinstance(current_version, str): + lag = _estimate_lag_from_versions(current_version, fix_version) + if lag > 0: + return lag + + fix_date = _parse_datetime(vulnerability.get("fixed_release_date")) + last_seen = _parse_datetime(component.get("last_observed") or component.get("last_seen")) + if fix_date and last_seen and fix_date > last_seen: + return float((fix_date - last_seen).days) + + return 0.0 + + +def _lag_factor(days: float) -> float: + if days <= 0: + return 0.0 + return min(days / VERSION_LAG_CAP_DAYS, 1.0) + + +def _score_vulnerability( + component: Mapping[str, Any], + vulnerability: Mapping[str, Any], + epss_scores: Mapping[str, float], + kev_entries: Mapping[str, Any], + weights: Mapping[str, float], +) -> Dict[str, Any] | None: + cve = vulnerability.get("cve") or vulnerability.get("cve_id") or vulnerability.get("id") + if not isinstance(cve, str) or not cve: + return None + cve_id = cve.upper() + + epss = float(epss_scores.get(cve_id, 0.0)) + kev_present = cve_id in kev_entries + lag_days = _infer_version_lag_days(component, vulnerability) + lag_score = _lag_factor(lag_days) + exposure_flags = _collect_exposure_flags( + component.get("exposure"), + component.get("exposure_flags"), + component.get("tags"), + vulnerability.get("exposure"), + vulnerability.get("exposure_flags"), + vulnerability.get("tags"), + ) + exposure_score = _exposure_factor(exposure_flags) + + contributions = { + "epss": epss, + "kev": 1.0 if kev_present else 0.0, + "version_lag": lag_score, + "exposure": exposure_score, + } + + total_weight = sum(weights.values()) + weighted_score = sum(contributions[key] * weights[key] for key in contributions) + normalized_score = weighted_score / total_weight if total_weight else 0.0 + final_score = round(normalized_score * 100, 2) + + return { + "cve": cve_id, + "epss": round(epss, 4), + "kev": kev_present, + "version_lag_days": round(lag_days, 2), + "exposure_flags": exposure_flags, + "risk_breakdown": { + "weights": dict(weights), + "contributions": contributions, + "normalized_score": round(normalized_score, 4), + }, + "fixops_risk": final_score, + } + + +def compute_risk_profile( + normalized_sbom: Mapping[str, Any], + epss_scores: Mapping[str, float], + kev_entries: Mapping[str, Any], + *, + weights: Mapping[str, float] = DEFAULT_WEIGHTS, +) -> Dict[str, Any]: + """Compute a composite risk profile for the provided SBOM.""" + + with _TRACER.start_as_current_span("risk.compute_profile") as span: + components = [] + cve_index: MutableMapping[str, Dict[str, Any]] = {} + + for component in normalized_sbom.get("components", []): + if not isinstance(component, Mapping): + continue + vulnerabilities = component.get("vulnerabilities") + if not isinstance(vulnerabilities, Sequence): + continue + key = _component_key(component) + slug = component.get("slug") or _slugify(key) + component_entry = { + "id": key, + "slug": slug, + "name": component.get("name"), + "version": component.get("version"), + "purl": component.get("purl"), + "vulnerabilities": [], + "exposure_flags": _collect_exposure_flags( + component.get("exposure"), + component.get("exposure_flags"), + component.get("tags"), + ), + } + max_score = 0.0 + for vulnerability in vulnerabilities: + if not isinstance(vulnerability, Mapping): + continue + scored = _score_vulnerability(component, vulnerability, epss_scores, kev_entries, weights) + if not scored: + continue + component_entry["vulnerabilities"].append(scored) + max_score = max(max_score, scored["fixops_risk"]) + cve_info = cve_index.setdefault( + scored["cve"], + {"cve": scored["cve"], "max_risk": 0.0, "components": []}, + ) + cve_info["max_risk"] = max(cve_info["max_risk"], scored["fixops_risk"]) + if slug not in cve_info["components"]: + cve_info["components"].append(slug) + if component_entry["vulnerabilities"]: + component_entry["component_risk"] = round(max_score, 2) + components.append(component_entry) + + highest_component = max( + components, + key=lambda item: item.get("component_risk", 0.0), + default=None, + ) + + report = { + "generated_at": datetime.now(timezone.utc).isoformat(), + "weights": dict(weights), + "components": sorted(components, key=lambda item: item["id"]), + "cves": { + cve: { + "cve": details["cve"], + "max_risk": round(details["max_risk"], 2), + "components": sorted(details["components"]), + } + for cve, details in cve_index.items() + }, + } + report["summary"] = { + "component_count": len(report["components"]), + "cve_count": len(report["cves"]), + "highest_risk_component": highest_component["slug"] if highest_component else None, + "max_risk_score": highest_component.get("component_risk", 0.0) if highest_component else 0.0, + } + span.set_attribute("fixops.risk.components", report["summary"]["component_count"]) + span.set_attribute("fixops.risk.cves", report["summary"]["cve_count"]) + _RISK_COUNTER.add(1, {"status": "computed"}) + return report + + +def write_risk_report( + normalized_sbom_path: str | Path, + destination: str | Path, + epss_scores: Mapping[str, float], + kev_entries: Mapping[str, Any], + *, + weights: Mapping[str, float] = DEFAULT_WEIGHTS, +) -> Dict[str, Any]: + """Load the normalized SBOM and write a computed risk profile to ``destination``.""" + + sbom_path = Path(normalized_sbom_path) + with sbom_path.open("r", encoding="utf-8") as handle: + normalized = json.load(handle) + + report = compute_risk_profile(normalized, epss_scores, kev_entries, weights=weights) + destination_path = Path(destination) + destination_path.parent.mkdir(parents=True, exist_ok=True) + with destination_path.open("w", encoding="utf-8") as handle: + json.dump(report, handle, indent=2, sort_keys=True) + handle.write("\n") + return report + + +__all__ = [ + "compute_risk_profile", + "write_risk_report", + "DEFAULT_WEIGHTS", + "VERSION_LAG_CAP_DAYS", +] diff --git a/scripts/graph_worker.py b/scripts/graph_worker.py new file mode 100644 index 000000000..db335f4f4 --- /dev/null +++ b/scripts/graph_worker.py @@ -0,0 +1,47 @@ +"""Periodic provenance graph builder for the demo stack.""" + +from __future__ import annotations + +import os +import time +from pathlib import Path + +from services.graph.graph import GraphSources, build_graph_from_sources +from telemetry import configure as configure_telemetry, get_tracer + +configure_telemetry(service_name="fixops-graph-worker") +_TRACER = get_tracer("fixops.graph.worker") + + +def _optional_path(value: str | None) -> Path | None: + if not value: + return None + path = Path(value) + return path if path.exists() else None + + +def main() -> None: + repo_path = Path(os.getenv("FIXOPS_REPO", ".")).resolve() + attest_dir = Path(os.getenv("FIXOPS_ATTESTATIONS", "artifacts/attestations")).resolve() + sbom_path = _optional_path(os.getenv("FIXOPS_NORMALIZED_SBOM", "artifacts/sbom/normalized.json")) + risk_path = _optional_path(os.getenv("FIXOPS_RISK_REPORT", "artifacts/risk.json")) + releases_path = _optional_path(os.getenv("FIXOPS_RELEASES", "analysis/releases.json")) + interval = int(os.getenv("FIXOPS_GRAPH_INTERVAL", "300")) + + sources = GraphSources( + repo_path=repo_path, + attestation_dir=attest_dir, + normalized_sbom=sbom_path, + risk_report=risk_path, + releases_path=releases_path, + ) + + while True: + with _TRACER.start_as_current_span("graph_worker.cycle"): + graph = build_graph_from_sources(sources) + graph.close() + time.sleep(max(interval, 60)) + + +if __name__ == "__main__": + main() diff --git a/scripts/signing/sign-artifact.sh b/scripts/signing/sign-artifact.sh new file mode 100755 index 000000000..01bbe556c --- /dev/null +++ b/scripts/signing/sign-artifact.sh @@ -0,0 +1,131 @@ +#!/usr/bin/env bash +set -euo pipefail + +usage() { + cat <<'USAGE' +Usage: sign-artifact.sh --artifact --key [--signature ] [--predicate ] [--attestation-out ] [--bundle-out ] + +Required arguments: + --artifact Path to the release artifact to sign (tarball, image archive, etc.). + --key Path to the Cosign private key file used for signing. You can also + supply COSIGN_KEY_PATH in the environment instead of this flag. + +Optional arguments: + --signature Destination file for the Cosign blob signature (defaults to .sig). + --predicate Path to a predicate payload (for example a SLSA provenance JSON) to wrap in a DSSE attestation. + --attestation-out Destination path for the DSSE envelope generated via cosign attest-blob. Required when --predicate is set. + --bundle-out Optional bundle output written by cosign attest-blob when --predicate is supplied. + +Environment variables: + COSIGN_PASSWORD Password protecting the private key. Required when the key is password protected. + COSIGN_KEY_PATH Alternative way to supply the signing key path instead of --key. + +This helper wraps cosign sign-blob for FixOps release assets. When a predicate is provided +an additional DSSE envelope is created with cosign attest-blob so downstream consumers can +verify provenance alongside detached signatures. +USAGE +} + +ARTIFACT="" +KEY_FILE="${COSIGN_KEY_PATH:-}" +SIGNATURE="" +PREDICATE="" +ATTESTATION_OUT="" +BUNDLE_OUT="" + +while [[ $# -gt 0 ]]; do + case "$1" in + --artifact) + ARTIFACT="$2" + shift 2 + ;; + --key) + KEY_FILE="$2" + shift 2 + ;; + --signature) + SIGNATURE="$2" + shift 2 + ;; + --predicate) + PREDICATE="$2" + shift 2 + ;; + --attestation-out) + ATTESTATION_OUT="$2" + shift 2 + ;; + --bundle-out) + BUNDLE_OUT="$2" + shift 2 + ;; + -h|--help) + usage + exit 0 + ;; + *) + echo "Unknown argument: $1" >&2 + usage >&2 + exit 1 + ;; + esac +done + +if [[ -z "$ARTIFACT" ]]; then + echo "--artifact is required" >&2 + usage >&2 + exit 1 +fi + +if [[ -z "$KEY_FILE" ]]; then + echo "--key or COSIGN_KEY_PATH is required" >&2 + usage >&2 + exit 1 +fi + +if [[ ! -f "$ARTIFACT" ]]; then + echo "Artifact '$ARTIFACT' not found" >&2 + exit 1 +fi + +if [[ ! -f "$KEY_FILE" ]]; then + echo "Key '$KEY_FILE' not found" >&2 + exit 1 +fi + +SIGNATURE="${SIGNATURE:-${ARTIFACT}.sig}" + +cosign sign-blob \ + --key "$KEY_FILE" \ + --output-signature "$SIGNATURE" \ + "$ARTIFACT" + +echo "Wrote signature to $SIGNATURE" >&2 + +if [[ -n "$PREDICATE" ]]; then + if [[ ! -f "$PREDICATE" ]]; then + echo "Predicate '$PREDICATE' not found" >&2 + exit 1 + fi + if [[ -z "$ATTESTATION_OUT" ]]; then + echo "--attestation-out must be supplied when --predicate is set" >&2 + exit 1 + fi + cmd=( + cosign attest-blob + --key "$KEY_FILE" + --predicate "$PREDICATE" + --type slsaprovenance + --yes + --output-attestation "$ATTESTATION_OUT" + "$ARTIFACT" + ) + if [[ -n "$BUNDLE_OUT" ]]; then + cmd+=(--bundle "$BUNDLE_OUT") + fi + "${cmd[@]}" + echo "Wrote DSSE attestation to $ATTESTATION_OUT" >&2 + if [[ -n "$BUNDLE_OUT" ]]; then + echo "Wrote verification bundle to $BUNDLE_OUT" >&2 + fi +fi diff --git a/scripts/signing/verify-artifact.sh b/scripts/signing/verify-artifact.sh new file mode 100755 index 000000000..2ef71bc47 --- /dev/null +++ b/scripts/signing/verify-artifact.sh @@ -0,0 +1,102 @@ +#!/usr/bin/env bash +set -euo pipefail + +usage() { + cat <<'USAGE' +Usage: verify-artifact.sh --artifact --key [--signature ] [--bundle ] + +Required arguments: + --artifact Path to the artifact whose integrity you want to check. + --key Path to the Cosign public key used for verification. You can also provide + COSIGN_PUBLIC_KEY in the environment instead of this flag. + +Optional arguments: + --signature Detached signature file emitted by cosign sign-blob. Defaults to .sig. + --bundle Attestation bundle generated by cosign attest-blob. When provided the script + will run cosign verify-blob with the bundle to validate the DSSE envelope. + +Environment variables: + COSIGN_PUBLIC_KEY Alternative to --key for setting the verification key path. + +The helper runs cosign verify-blob to ensure the signature (and optional attestation bundle) +match the supplied artifact. This mirrors the checks executed in CI release signing. +USAGE +} + +ARTIFACT="" +KEY_FILE="${COSIGN_PUBLIC_KEY:-}" +SIGNATURE="" +BUNDLE="" + +while [[ $# -gt 0 ]]; do + case "$1" in + --artifact) + ARTIFACT="$2" + shift 2 + ;; + --key) + KEY_FILE="$2" + shift 2 + ;; + --signature) + SIGNATURE="$2" + shift 2 + ;; + --bundle) + BUNDLE="$2" + shift 2 + ;; + -h|--help) + usage + exit 0 + ;; + *) + echo "Unknown argument: $1" >&2 + usage >&2 + exit 1 + ;; + esac +done + +if [[ -z "$ARTIFACT" ]]; then + echo "--artifact is required" >&2 + usage >&2 + exit 1 +fi + +if [[ -z "$KEY_FILE" ]]; then + echo "--key or COSIGN_PUBLIC_KEY is required" >&2 + usage >&2 + exit 1 +fi + +if [[ ! -f "$ARTIFACT" ]]; then + echo "Artifact '$ARTIFACT' not found" >&2 + exit 1 +fi + +if [[ ! -f "$KEY_FILE" ]]; then + echo "Key '$KEY_FILE' not found" >&2 + exit 1 +fi + +SIGNATURE="${SIGNATURE:-${ARTIFACT}.sig}" + +cosign verify-blob \ + --key "$KEY_FILE" \ + --signature "$SIGNATURE" \ + "$ARTIFACT" + +echo "Verified detached signature for $ARTIFACT" >&2 + +if [[ -n "$BUNDLE" ]]; then + if [[ ! -f "$BUNDLE" ]]; then + echo "Bundle '$BUNDLE' not found" >&2 + exit 1 + fi + cosign verify-blob \ + --key "$KEY_FILE" \ + --bundle "$BUNDLE" \ + "$ARTIFACT" + echo "Verified attestation bundle for $ARTIFACT" >&2 +fi diff --git a/services/graph/__init__.py b/services/graph/__init__.py new file mode 100644 index 000000000..a948a5f73 --- /dev/null +++ b/services/graph/__init__.py @@ -0,0 +1,9 @@ +"""Provenance graph service utilities.""" + +from .graph import ProvenanceGraph, build_graph_from_sources, collect_git_history + +__all__ = [ + "ProvenanceGraph", + "build_graph_from_sources", + "collect_git_history", +] diff --git a/services/graph/graph.py b/services/graph/graph.py new file mode 100644 index 000000000..662342f40 --- /dev/null +++ b/services/graph/graph.py @@ -0,0 +1,562 @@ +"""Provenance graph construction utilities backed by SQLite and NetworkX.""" + +from __future__ import annotations + +import json +import sqlite3 +import subprocess +from collections import defaultdict +from dataclasses import dataclass +from datetime import datetime +from pathlib import Path +from typing import Any, Iterable, Mapping, MutableMapping, Sequence + +import networkx as nx +from packaging.version import InvalidVersion, Version + +from services.provenance.attestation import ProvenanceAttestation, load_attestation +from telemetry import get_meter, get_tracer + + +@dataclass +class GraphSources: + """Container for filesystem locations consumed when building the graph.""" + + repo_path: Path + attestation_dir: Path + normalized_sbom: Path | None + risk_report: Path | None + releases_path: Path | None + + +_TRACER = get_tracer("fixops.graph") +_METER = get_meter("fixops.graph") +_INGEST_COUNTER = _METER.create_counter( + "fixops_graph_ingest", + description="Graph ingestion operations", +) +_QUERY_COUNTER = _METER.create_counter( + "fixops_graph_queries", + description="Graph query executions", +) + + +def _ensure_datetime(value: Any) -> datetime | None: + if isinstance(value, datetime): + return value + if isinstance(value, (int, float)): + return datetime.fromtimestamp(float(value)) + if isinstance(value, str): + try: + return datetime.fromisoformat(value.replace("Z", "+00:00")) + except ValueError: + return None + return None + + +def _component_key(component: Mapping[str, Any]) -> str: + purl = component.get("purl") + if isinstance(purl, str) and purl: + return purl + name = component.get("name") or "component" + version = component.get("version") or "unspecified" + return f"{name}@{version}" + + +def _component_slug(component: Mapping[str, Any]) -> str: + slug_source = component.get("slug") or _component_key(component) + slug = slug_source.lower() + for char in ("/", ":", " ", "@", "|"): + slug = slug.replace(char, "-") + while "--" in slug: + slug = slug.replace("--", "-") + return slug.strip("-") or "component" + + +class ProvenanceGraph: + """Manage a provenance graph persisted in SQLite and exposed via NetworkX.""" + + def __init__(self, *, db_path: str | Path = ":memory:") -> None: + self.db_path = str(db_path) + self.connection = sqlite3.connect(self.db_path) + self.connection.execute( + """ + CREATE TABLE IF NOT EXISTS nodes ( + id TEXT PRIMARY KEY, + type TEXT NOT NULL, + data TEXT NOT NULL + ) + """ + ) + self.connection.execute( + """ + CREATE TABLE IF NOT EXISTS edges ( + source TEXT NOT NULL, + target TEXT NOT NULL, + type TEXT NOT NULL, + data TEXT NOT NULL, + UNIQUE(source, target, type) + ) + """ + ) + self.graph = nx.MultiDiGraph() + + def close(self) -> None: + self.connection.close() + + # ------------------------------------------------------------------ + # Internal helpers + def _upsert_node(self, node_id: str, node_type: str, **attrs: Any) -> None: + existing = self.graph.nodes.get(node_id, {}) + merged = {**existing, **attrs, "type": node_type} + self.graph.add_node(node_id, **merged) + self.connection.execute( + "REPLACE INTO nodes(id, type, data) VALUES (?, ?, ?)", + (node_id, node_type, json.dumps(merged, sort_keys=True)), + ) + self.connection.commit() + + def _add_edge(self, source: str, target: str, relation: str, **attrs: Any) -> None: + payload = {**attrs, "relation": relation} + self.graph.add_edge(source, target, relation=relation, **attrs) + self.connection.execute( + "REPLACE INTO edges(source, target, type, data) VALUES (?, ?, ?, ?)", + (source, target, relation, json.dumps(payload, sort_keys=True)), + ) + self.connection.commit() + + # ------------------------------------------------------------------ + # Ingestion methods + def ingest_commits(self, commits: Sequence[Mapping[str, Any]]) -> None: + with _TRACER.start_as_current_span("graph.ingest_commits") as span: + count = 0 + for entry in commits: + sha = entry.get("sha") + if not isinstance(sha, str): + continue + count += 1 + node_id = f"commit:{sha}" + self._upsert_node( + node_id, + "commit", + sha=sha, + author=entry.get("author"), + message=entry.get("message"), + timestamp=entry.get("timestamp"), + ) + parents = entry.get("parents") or [] + if isinstance(parents, (list, tuple)): + for parent in parents: + if isinstance(parent, str) and parent: + parent_id = f"commit:{parent}" + self._upsert_node(parent_id, "commit", sha=parent) + self._add_edge(parent_id, node_id, "parent_of") + span.set_attribute("fixops.graph.commit_count", count) + if count: + _INGEST_COUNTER.add(count, {"type": "commits"}) + + def ingest_attestations(self, attestations: Iterable[ProvenanceAttestation]) -> None: + with _TRACER.start_as_current_span("graph.ingest_attestations") as span: + count = 0 + for attestation in attestations: + count += 1 + attestation_id = f"attestation:{attestation.metadata.get('buildInvocationID', attestation.metadata.get('buildStartedOn', 'unknown'))}" + self._upsert_node( + attestation_id, + "attestation", + builder=attestation.builder.get("id"), + build_type=attestation.buildType, + source=attestation.source.get("uri"), + metadata=attestation.metadata, + ) + builder_id = attestation.builder.get("id") + if isinstance(builder_id, str) and builder_id: + builder_node = f"builder:{builder_id}" + self._upsert_node(builder_node, "builder", id=builder_id) + self._add_edge(builder_node, attestation_id, "built") + source_uri = attestation.source.get("uri") + if isinstance(source_uri, str) and source_uri: + source_node = f"source:{source_uri}" + self._upsert_node(source_node, "source", uri=source_uri) + self._add_edge(source_node, attestation_id, "triggered") + for subject in attestation.subject: + artifact_node = f"artifact:{subject.name}" + self._upsert_node( + artifact_node, + "artifact", + name=subject.name, + digest=subject.digest, + ) + self._add_edge(attestation_id, artifact_node, "produced") + for material in attestation.materials: + material_node = f"material:{material.uri}" + self._upsert_node( + material_node, + "material", + uri=material.uri, + digest=material.digest, + ) + self._add_edge(material_node, attestation_id, "consumed") + span.set_attribute("fixops.graph.attestation_count", count) + if count: + _INGEST_COUNTER.add(count, {"type": "attestation"}) + + def ingest_normalized_sbom(self, normalized_path: Path) -> None: + if not normalized_path.is_file(): + return + with _TRACER.start_as_current_span("graph.ingest_sbom") as span: + payload = json.loads(normalized_path.read_text(encoding="utf-8")) + sbom_id = f"sbom:{normalized_path.name}" + self._upsert_node( + sbom_id, + "sbom", + path=str(normalized_path), + metadata=payload.get("metadata"), + ) + component_count = 0 + for component in payload.get("components", []): + if not isinstance(component, Mapping): + continue + component_count += 1 + slug = _component_slug(component) + node_id = f"component:{slug}" + key = _component_key(component) + attributes = { + "slug": slug, + "key": key, + "name": component.get("name"), + "version": component.get("version"), + "purl": component.get("purl"), + "licenses": component.get("licenses"), + "hashes": component.get("hashes"), + "generators": component.get("generators"), + } + self._upsert_node(node_id, "component", **attributes) + self._add_edge(sbom_id, node_id, "includes") + span.set_attribute("fixops.graph.sbom_components", component_count) + if component_count: + _INGEST_COUNTER.add(component_count, {"type": "sbom"}) + + def ingest_risk_report(self, report_path: Path) -> None: + if not report_path.is_file(): + return + with _TRACER.start_as_current_span("graph.ingest_risk") as span: + payload = json.loads(report_path.read_text(encoding="utf-8")) + component_count = 0 + for component in payload.get("components", []): + if not isinstance(component, Mapping): + continue + component_count += 1 + slug = component.get("slug") or _component_slug(component) + node_id = f"component:{slug}" + self._upsert_node( + node_id, + "component", + slug=slug, + key=component.get("id") or _component_key(component), + name=component.get("name"), + version=component.get("version"), + purl=component.get("purl"), + exposure_flags=component.get("exposure_flags"), + component_risk=component.get("component_risk"), + ) + for vulnerability in component.get("vulnerabilities", []): + if not isinstance(vulnerability, Mapping): + continue + cve = vulnerability.get("cve") + if not isinstance(cve, str): + continue + cve_id = cve.upper() + cve_node = f"cve:{cve_id}" + self._upsert_node( + cve_node, + "cve", + cve=cve_id, + kev=bool(vulnerability.get("kev")), + epss=vulnerability.get("epss"), + ) + self._add_edge( + node_id, + cve_node, + "affects", + kev=bool(vulnerability.get("kev")), + risk=vulnerability.get("fixops_risk"), + ) + cves = payload.get("cves", {}) + if isinstance(cves, Mapping): + for cve_id, details in cves.items(): + if not isinstance(details, Mapping): + continue + node_id = f"cve:{str(cve_id).upper()}" + attrs = { + "cve": str(cve_id).upper(), + "max_risk": details.get("max_risk"), + "components": details.get("components"), + } + existing = self.graph.nodes.get(node_id, {}) + attrs.setdefault("kev", existing.get("kev", False)) + attrs.setdefault("epss", existing.get("epss")) + self._upsert_node(node_id, "cve", **attrs) + span.set_attribute("fixops.graph.risk_components", component_count) + if component_count: + _INGEST_COUNTER.add(component_count, {"type": "risk"}) + + def ingest_releases(self, releases: Sequence[Mapping[str, Any]]) -> None: + with _TRACER.start_as_current_span("graph.ingest_releases") as span: + release_count = 0 + for index, release in enumerate(releases): + tag = release.get("tag") + if not isinstance(tag, str): + continue + release_count += 1 + release_node = f"release:{tag}" + released_at = _ensure_datetime(release.get("date")) + if released_at is None and isinstance(release.get("date"), (int, float)): + released_at = _ensure_datetime(float(release["date"])) + iso_date = released_at.isoformat() if released_at else release.get("date") + self._upsert_node( + release_node, + "release", + tag=tag, + date=iso_date, + order=index, + ) + for artifact in release.get("artifacts", []) or []: + if not isinstance(artifact, str): + continue + artifact_node = f"artifact:{artifact}" + self._upsert_node(artifact_node, "artifact", name=artifact) + self._add_edge(release_node, artifact_node, "ships") + for component in release.get("components", []) or []: + if isinstance(component, Mapping): + slug = component.get("slug") or _component_slug(component) + node_id = f"component:{slug}" + self._upsert_node( + node_id, + "component", + slug=slug, + name=component.get("name"), + version=component.get("version"), + purl=component.get("purl"), + ) + self._add_edge( + release_node, + node_id, + "includes_component", + version=component.get("version"), + ) + span.set_attribute("fixops.graph.release_count", release_count) + if release_count: + _INGEST_COUNTER.add(release_count, {"type": "release"}) + + # ------------------------------------------------------------------ + # Queries + def lineage(self, artifact_name: str) -> dict[str, Any]: + with _TRACER.start_as_current_span("graph.lineage") as span: + span.set_attribute("fixops.graph.artifact", artifact_name) + target_node = None + for node_id, attrs in self.graph.nodes(data=True): + if attrs.get("type") == "artifact" and ( + attrs.get("name") == artifact_name or node_id == f"artifact:{artifact_name}" + ): + target_node = node_id + break + if target_node is None: + return {"artifact": artifact_name, "nodes": [], "edges": []} + ancestors = nx.ancestors(self.graph, target_node) + relevant = ancestors | {target_node} + nodes = [ + {"id": node_id, **self.graph.nodes[node_id]} + for node_id in sorted(relevant) + ] + edges = [] + for source, target, data in self.graph.edges(data=True): + if source in relevant and target in relevant: + edges.append({"source": source, "target": target, **data}) + edges.sort(key=lambda item: (item["source"], item["target"], item.get("relation", ""))) + _QUERY_COUNTER.add(1, {"type": "lineage"}) + span.set_attribute("fixops.graph.lineage_nodes", len(nodes)) + return {"artifact": artifact_name, "nodes": nodes, "edges": edges} + + def components_with_kev(self, last_releases: int = 1) -> list[dict[str, Any]]: + with _TRACER.start_as_current_span("graph.components_with_kev") as span: + span.set_attribute("fixops.graph.release_window", last_releases) + releases = [ + (node_id, attrs) + for node_id, attrs in self.graph.nodes(data=True) + if attrs.get("type") == "release" + ] + if not releases: + return [] + + def _sort_key(item: tuple[str, MutableMapping[str, Any]]) -> tuple[Any, int]: + attrs = item[1] + parsed = _ensure_datetime(attrs.get("date")) + return (parsed or datetime.min, attrs.get("order", 0)) + + releases.sort(key=_sort_key) + selected = releases[-last_releases:] + results: list[dict[str, Any]] = [] + for node_id, attrs in selected: + entry = {"release": attrs.get("tag"), "date": attrs.get("date"), "components": []} + for _, component_node, edge_data in self.graph.out_edges(node_id, data=True): + if edge_data.get("relation") != "includes_component": + continue + component_attrs = self.graph.nodes[component_node] + kev_cves: set[str] = set() + for _, cve_node, vulnerability in self.graph.out_edges(component_node, data=True): + if vulnerability.get("relation") != "affects": + continue + if vulnerability.get("kev") or self.graph.nodes[cve_node].get("kev"): + kev_cves.add(self.graph.nodes[cve_node].get("cve", cve_node)) + if kev_cves: + entry["components"].append( + { + "component": component_attrs.get("slug") or component_attrs.get("name"), + "version": edge_data.get("version") or component_attrs.get("version"), + "cves": sorted(kev_cves), + } + ) + if entry["components"]: + results.append(entry) + _QUERY_COUNTER.add(1, {"type": "kev_components"}) + span.set_attribute("fixops.graph.kev_results", len(results)) + return results + + def detect_version_anomalies(self) -> list[dict[str, Any]]: + with _TRACER.start_as_current_span("graph.detect_anomalies") as span: + releases_by_component: MutableMapping[str, list[tuple[datetime, str, str | None]]] = defaultdict(list) + for release_node, component_node, edge_data in self.graph.edges(data=True): + if edge_data.get("relation") != "includes_component": + continue + release_attrs = self.graph.nodes.get(release_node, {}) + if release_attrs.get("type") != "release": + continue + component_attrs = self.graph.nodes.get(component_node, {}) + if component_attrs.get("type") != "component": + continue + released_at = _ensure_datetime(release_attrs.get("date")) or datetime.min + releases_by_component[component_node].append( + (released_at, release_attrs.get("tag"), edge_data.get("version")) + ) + anomalies: list[dict[str, Any]] = [] + for component_node, entries in releases_by_component.items(): + entries.sort(key=lambda item: item[0]) + previous_version: Version | None = None + previous_release: str | None = None + for released_at, release_tag, version_str in entries: + try: + current_version = Version(version_str) if version_str else None + except InvalidVersion: + current_version = None + if previous_version and current_version and current_version < previous_version: + component_attrs = self.graph.nodes[component_node] + anomalies.append( + { + "component": component_attrs.get("slug") or component_attrs.get("name"), + "release": release_tag, + "version": version_str, + "previous_release": previous_release, + "previous_version": str(previous_version), + } + ) + if current_version is not None: + previous_version = current_version + previous_release = release_tag + _QUERY_COUNTER.add(1, {"type": "anomalies"}) + span.set_attribute("fixops.graph.anomaly_count", len(anomalies)) + return anomalies + + +def collect_git_history(repo_path: Path, *, limit: int = 100) -> list[dict[str, Any]]: + """Collect commit metadata using ``git`` commands.""" + + if not repo_path.exists(): + return [] + format_token = "%H%x1f%P%x1f%an%x1f%ad%x1f%s%x1e" + with _TRACER.start_as_current_span("graph.collect_git_history") as span: + span.set_attribute("fixops.graph.limit", limit) + span.set_attribute("fixops.graph.repo", str(repo_path)) + try: + completed = subprocess.run( + [ + "git", + "-C", + str(repo_path), + "log", + f"--max-count={limit}", + f"--pretty=format:{format_token}", + "--date=iso-strict", + ], + check=True, + capture_output=True, + text=True, + ) + except subprocess.CalledProcessError: + return [] + commits: list[dict[str, Any]] = [] + for entry in completed.stdout.strip("\n\x1e").split("\x1e"): + if not entry: + continue + sha, parents, author, timestamp, message = (entry.split("\x1f") + [""])[:5] + commits.append( + { + "sha": sha, + "parents": [p for p in parents.split() if p], + "author": author, + "timestamp": timestamp, + "message": message, + } + ) + span.set_attribute("fixops.graph.git_commits", len(commits)) + return commits + + +def _load_attestations(attestation_dir: Path) -> list[ProvenanceAttestation]: + if not attestation_dir.is_dir(): + return [] + attestations: list[ProvenanceAttestation] = [] + for path in sorted(attestation_dir.glob("*.json")): + try: + attestations.append(load_attestation(path)) + except Exception: # pragma: no cover - defensive against user supplied files + continue + return attestations + + +def _load_releases(path: Path) -> list[Mapping[str, Any]]: + if not path or not path.is_file(): + return [] + try: + payload = json.loads(path.read_text(encoding="utf-8")) + except json.JSONDecodeError: + return [] + releases = payload.get("releases") + if isinstance(releases, Sequence): + return [release for release in releases if isinstance(release, Mapping)] + if isinstance(payload, Sequence): + return [release for release in payload if isinstance(release, Mapping)] + return [] + + +def build_graph_from_sources(sources: GraphSources) -> ProvenanceGraph: + """Construct a provenance graph by ingesting known FixOps artefacts.""" + + with _TRACER.start_as_current_span("graph.build_from_sources") as span: + graph = ProvenanceGraph() + commits = collect_git_history(sources.repo_path) + graph.ingest_commits(commits) + attestations = _load_attestations(sources.attestation_dir) + graph.ingest_attestations(attestations) + if sources.normalized_sbom: + graph.ingest_normalized_sbom(sources.normalized_sbom) + if sources.risk_report: + graph.ingest_risk_report(sources.risk_report) + releases = _load_releases(sources.releases_path) if sources.releases_path else [] + graph.ingest_releases(releases) + span.set_attribute("fixops.graph.nodes", graph.graph.number_of_nodes()) + span.set_attribute("fixops.graph.edges", graph.graph.number_of_edges()) + return graph + + +__all__ = ["GraphSources", "ProvenanceGraph", "build_graph_from_sources", "collect_git_history"] diff --git a/services/graph/tests/test_graph.py b/services/graph/tests/test_graph.py new file mode 100644 index 000000000..855333d02 --- /dev/null +++ b/services/graph/tests/test_graph.py @@ -0,0 +1,124 @@ +import json +from pathlib import Path + +from services.graph.graph import ProvenanceGraph +from services.provenance.attestation import generate_attestation + + +def _write_json(path: Path, payload: dict) -> Path: + path.write_text(json.dumps(payload), encoding="utf-8") + return path + + +def test_graph_queries(tmp_path: Path) -> None: + artefact = tmp_path / "demo-artifact.tar.gz" + artefact.write_bytes(b"demo") + + attestation = generate_attestation( + artefact, + builder_id="builder://ci/demo", + source_uri="git+https://example.com/demo.git", + build_type="https://example.com/schema/build", + ) + attestation.metadata["buildInvocationID"] = "job-42" + + graph = ProvenanceGraph() + graph.ingest_commits( + [ + { + "sha": "abc123", + "parents": [], + "author": "Dev One", + "timestamp": "2024-01-01T00:00:00Z", + "message": "Initial", + }, + { + "sha": "def456", + "parents": ["abc123"], + "author": "Dev Two", + "timestamp": "2024-01-02T00:00:00Z", + "message": "Build", + }, + ] + ) + graph.ingest_attestations([attestation]) + + normalized = { + "metadata": {"total_components": 1}, + "components": [ + { + "name": "pkgA", + "version": "1.2.0", + "purl": "pkg:pypi/pkgA@1.2.0", + "licenses": ["MIT"], + "hashes": {"SHA256": "a" * 64}, + "generators": ["Syft"], + "slug": "pkg-a", + } + ], + } + normalized_path = _write_json(tmp_path / "normalized.json", normalized) + graph.ingest_normalized_sbom(normalized_path) + + risk_report = { + "components": [ + { + "id": "pkgA@1.2.0", + "slug": "pkg-a", + "name": "pkgA", + "version": "1.2.0", + "purl": "pkg:pypi/pkgA@1.2.0", + "component_risk": 88.0, + "vulnerabilities": [ + { + "cve": "CVE-2024-0001", + "epss": 0.9, + "kev": True, + "fixops_risk": 88.0, + } + ], + } + ], + "cves": { + "CVE-2024-0001": { + "cve": "CVE-2024-0001", + "max_risk": 88.0, + "components": ["pkg-a"], + } + }, + } + risk_path = _write_json(tmp_path / "risk.json", risk_report) + graph.ingest_risk_report(risk_path) + + releases = [ + { + "tag": "v1.2.0", + "date": "2024-01-05T00:00:00Z", + "artifacts": [artefact.name], + "components": [ + {"slug": "pkg-a", "name": "pkgA", "version": "1.2.0"} + ], + }, + { + "tag": "v1.0.0", + "date": "2024-02-05T00:00:00Z", + "components": [ + {"slug": "pkg-a", "name": "pkgA", "version": "1.0.0"} + ], + }, + ] + graph.ingest_releases(releases) + + lineage = graph.lineage(artefact.name) + assert lineage["nodes"] + assert any(edge["relation"] == "produced" for edge in lineage["edges"]) + + kev_components = graph.components_with_kev(last_releases=2) + assert kev_components + assert any(entry["components"] for entry in kev_components) + + anomalies = graph.detect_version_anomalies() + assert anomalies + assert any(item["release"] == "v1.0.0" for item in anomalies) + + graph.close() diff --git a/services/provenance/__init__.py b/services/provenance/__init__.py new file mode 100644 index 000000000..643b13c00 --- /dev/null +++ b/services/provenance/__init__.py @@ -0,0 +1,24 @@ +"""SLSA provenance attestation utilities.""" +from .attestation import ( + ProvenanceAttestation, + ProvenanceMaterial, + ProvenanceSubject, + ProvenanceVerificationError, + compute_sha256, + generate_attestation, + load_attestation, + verify_attestation, + write_attestation, +) + +__all__ = [ + "ProvenanceAttestation", + "ProvenanceMaterial", + "ProvenanceSubject", + "ProvenanceVerificationError", + "compute_sha256", + "generate_attestation", + "load_attestation", + "verify_attestation", + "write_attestation", +] diff --git a/services/provenance/attestation.py b/services/provenance/attestation.py new file mode 100644 index 000000000..3f0be1f0e --- /dev/null +++ b/services/provenance/attestation.py @@ -0,0 +1,296 @@ +"""Utilities for generating and verifying SLSA v1 provenance attestations.""" +from __future__ import annotations + +import json +from dataclasses import asdict, dataclass, field +from datetime import datetime, timezone, timedelta +from hashlib import sha256 +from pathlib import Path +from typing import Any, Mapping, MutableMapping, Sequence + +from telemetry import get_meter, get_tracer + +SLSA_VERSION = "1.0" + +_TRACER = get_tracer("fixops.provenance") +_COUNTER = get_meter("fixops.provenance").create_counter( + "fixops_provenance_operations", + description="Count of provenance attestation operations", +) + + +class ProvenanceVerificationError(Exception): + """Raised when provenance verification fails.""" + + +@dataclass(slots=True) +class ProvenanceSubject: + """Describes the subject of the attestation (i.e., produced artefact).""" + + name: str + digest: MutableMapping[str, str] + + +@dataclass(slots=True) +class ProvenanceMaterial: + """Describes a build material consumed during attestation.""" + + uri: str + digest: MutableMapping[str, str] | None = None + + +@dataclass(slots=True) +class ProvenanceAttestation: + """Structured representation of a SLSA v1 provenance statement.""" + + slsaVersion: str + builder: MutableMapping[str, Any] + buildType: str + source: MutableMapping[str, Any] + metadata: MutableMapping[str, Any] + subject: list[ProvenanceSubject] = field(default_factory=list) + materials: list[ProvenanceMaterial] = field(default_factory=list) + + def to_dict(self) -> dict[str, Any]: + """Return a JSON-serialisable representation of the attestation.""" + + return asdict(self) + + def to_json(self, *, indent: int = 2) -> str: + """Serialise the attestation to JSON text.""" + + return json.dumps(self.to_dict(), indent=indent, sort_keys=True) + + @classmethod + def from_dict(cls, payload: Mapping[str, Any]) -> "ProvenanceAttestation": + """Hydrate an attestation from a dictionary, validating basic structure.""" + + try: + version = payload["slsaVersion"] + builder = payload["builder"] + build_type = payload["buildType"] + source = payload["source"] + metadata = payload["metadata"] + raw_subjects = payload.get("subject", []) + raw_materials = payload.get("materials", []) + except KeyError as exc: # pragma: no cover - defensive guard + raise ProvenanceVerificationError( + f"Missing required attestation field: {exc.args[0]}" + ) from exc + + if version != SLSA_VERSION: + raise ProvenanceVerificationError( + f"Unsupported SLSA version: {version!r}; expected {SLSA_VERSION!r}" + ) + + subjects = [ + ProvenanceSubject(name=item["name"], digest=dict(item["digest"])) + for item in raw_subjects + ] + materials = [ + ProvenanceMaterial( + uri=item["uri"], + digest=dict(item.get("digest", {})) if item.get("digest") else None, + ) + for item in raw_materials + ] + return cls( + slsaVersion=version, + builder=dict(builder), + buildType=build_type, + source=dict(source), + metadata=dict(metadata), + subject=subjects, + materials=materials, + ) + + +def _ensure_metadata(metadata: Mapping[str, Any] | None) -> dict[str, Any]: + """Return attestation metadata with timestamps ensured.""" + + now = datetime.now(timezone.utc) + formatted_now = now.replace(microsecond=0).isoformat().replace("+00:00", "Z") + defaults: dict[str, Any] = { + "buildStartedOn": formatted_now, + "buildFinishedOn": formatted_now, + "reproducible": True, + } + if metadata: + defaults.update(metadata) + return defaults + + +def compute_sha256(path: Path | str) -> str: + """Compute the SHA-256 digest for the file located at *path*.""" + + with _TRACER.start_as_current_span("provenance.compute_sha256") as span: + resolved = Path(path) + span.set_attribute("fixops.artifact", str(resolved)) + if not resolved.is_file(): + raise FileNotFoundError(f"Artefact '{resolved}' does not exist or is not a file") + + digest = sha256() + with resolved.open("rb") as handle: + for chunk in iter(lambda: handle.read(8192), b""): + digest.update(chunk) + hex_digest = digest.hexdigest() + span.set_attribute("fixops.sha256", hex_digest) + return hex_digest + + +def _normalise_materials(materials: Sequence[Mapping[str, Any]] | None) -> list[ProvenanceMaterial]: + """Convert user-supplied material mappings to dataclass instances.""" + + normalised: list[ProvenanceMaterial] = [] + if not materials: + return normalised + for item in materials: + if "uri" not in item: + raise ValueError("Each material must include a 'uri' field") + digest_mapping = item.get("digest") + normalised.append( + ProvenanceMaterial( + uri=str(item["uri"]), + digest=dict(digest_mapping) if digest_mapping else None, + ) + ) + return normalised + + +def generate_attestation( + artefact_path: Path | str, + *, + builder_id: str, + source_uri: str, + build_type: str, + materials: Sequence[Mapping[str, Any]] | None = None, + metadata: Mapping[str, Any] | None = None, +) -> ProvenanceAttestation: + """Create a provenance attestation for *artefact_path* following SLSA v1.""" + + with _TRACER.start_as_current_span("provenance.generate_attestation") as span: + path = Path(artefact_path) + span.set_attribute("fixops.artifact", str(path)) + span.set_attribute("fixops.builder", builder_id) + span.set_attribute("fixops.source_uri", source_uri) + digest = compute_sha256(path) + metadata_block = _ensure_metadata(metadata) + subject = ProvenanceSubject( + name=path.name, + digest={"sha256": digest}, + ) + attestation = ProvenanceAttestation( + slsaVersion=SLSA_VERSION, + builder={"id": builder_id}, + buildType=build_type, + source={"uri": source_uri}, + metadata=metadata_block, + subject=[subject], + materials=_normalise_materials(materials), + ) + _COUNTER.add(1, {"action": "generate"}) + return attestation + + +def load_attestation(source: Path | str | Mapping[str, Any] | ProvenanceAttestation) -> ProvenanceAttestation: + """Load an attestation from a path, mapping or existing object.""" + + with _TRACER.start_as_current_span("provenance.load_attestation") as span: + if isinstance(source, ProvenanceAttestation): + span.set_attribute("fixops.source", "object") + return source + if isinstance(source, Mapping): + span.set_attribute("fixops.source", "mapping") + return ProvenanceAttestation.from_dict(source) + + path = Path(source) + span.set_attribute("fixops.source", str(path)) + with path.open("r", encoding="utf-8") as handle: + payload = json.load(handle) + _COUNTER.add(1, {"action": "load"}) + return ProvenanceAttestation.from_dict(payload) + + +def write_attestation(attestation: ProvenanceAttestation, destination: Path | str) -> Path: + """Persist *attestation* to *destination* as JSON.""" + + with _TRACER.start_as_current_span("provenance.write_attestation") as span: + path = Path(destination) + span.set_attribute("fixops.destination", str(path)) + path.parent.mkdir(parents=True, exist_ok=True) + path.write_text(attestation.to_json(indent=2), encoding="utf-8") + _COUNTER.add(1, {"action": "write"}) + return path + + +def _expect_field(value: Any, description: str) -> Any: + if not value: + raise ProvenanceVerificationError(f"Attestation missing required {description}") + return value + + +def verify_attestation( + attestation: ProvenanceAttestation | Mapping[str, Any] | Path | str, + *, + artefact_path: Path | str, + builder_id: str | None = None, + source_uri: str | None = None, + build_type: str | None = None, +) -> None: + """Validate that *attestation* matches the provided artefact and expectations.""" + + statement = load_attestation(attestation) + path = Path(artefact_path) + expected_digest = compute_sha256(path) + + subjects = _expect_field(statement.subject, "subject entry") + subject = next((item for item in subjects if item.name == path.name), subjects[0]) + attested_digest = subject.digest.get("sha256") + if attested_digest != expected_digest: + raise ProvenanceVerificationError( + "Attestation digest does not match artefact contents" + ) + + if builder_id is not None and statement.builder.get("id") != builder_id: + raise ProvenanceVerificationError( + f"Builder ID mismatch: expected {builder_id!r} got {statement.builder.get('id')!r}" + ) + if source_uri is not None and statement.source.get("uri") != source_uri: + raise ProvenanceVerificationError( + f"Source URI mismatch: expected {source_uri!r} got {statement.source.get('uri')!r}" + ) + if build_type is not None and statement.buildType != build_type: + raise ProvenanceVerificationError( + f"Build type mismatch: expected {build_type!r} got {statement.buildType!r}" + ) + + _expect_field(statement.metadata, "metadata block") + + # Basic sanity check that timestamps are not in the future beyond a 5 minute tolerance. + finished_on = statement.metadata.get("buildFinishedOn") + if finished_on: + try: + parsed = datetime.fromisoformat(finished_on.replace("Z", "+00:00")) + now = datetime.now(timezone.utc) + if parsed - now > timedelta(minutes=5): # pragma: no cover - defensive + raise ProvenanceVerificationError( + "Attestation completion time is unreasonably in the future" + ) + except ValueError: # pragma: no cover - defensive guard + raise ProvenanceVerificationError("Invalid buildFinishedOn timestamp format") + + # No return value on success. + + +__all__ = [ + "ProvenanceAttestation", + "ProvenanceMaterial", + "ProvenanceSubject", + "ProvenanceVerificationError", + "SLSA_VERSION", + "compute_sha256", + "generate_attestation", + "load_attestation", + "verify_attestation", + "write_attestation", +] diff --git a/services/provenance/tests/test_attestation.py b/services/provenance/tests/test_attestation.py new file mode 100644 index 000000000..ac5a7d850 --- /dev/null +++ b/services/provenance/tests/test_attestation.py @@ -0,0 +1,114 @@ +from __future__ import annotations + +import json +from pathlib import Path + +import pytest + +from services.provenance import ( + ProvenanceVerificationError, + generate_attestation, + verify_attestation, + write_attestation, +) +from cli.fixops_provenance import main as provenance_main + + +@pytest.fixture() +def sample_artifact(tmp_path: Path) -> Path: + path = tmp_path / "artifact.txt" + path.write_text("fixops", encoding="utf-8") + return path + + +def test_generate_attestation_contains_expected_fields(sample_artifact: Path) -> None: + attestation = generate_attestation( + sample_artifact, + builder_id="urn:fixops:builder:test", + source_uri="https://example.com/repo", + build_type="https://example.com/build", + materials=[{"uri": "https://example.com/material"}], + metadata={"invocationId": "123"}, + ) + payload = attestation.to_dict() + assert payload["slsaVersion"] == "1.0" + assert payload["builder"]["id"] == "urn:fixops:builder:test" + assert payload["source"]["uri"] == "https://example.com/repo" + assert payload["buildType"] == "https://example.com/build" + assert payload["subject"][0]["digest"]["sha256"] + assert payload["materials"][0]["uri"] == "https://example.com/material" + assert payload["metadata"]["invocationId"] == "123" + + +def test_verify_attestation_round_trip(sample_artifact: Path, tmp_path: Path) -> None: + attestation = generate_attestation( + sample_artifact, + builder_id="urn:fixops:builder:test", + source_uri="https://example.com/repo", + build_type="https://example.com/build", + ) + destination = tmp_path / "attestation.json" + write_attestation(attestation, destination) + verify_attestation( + destination, + artefact_path=sample_artifact, + builder_id="urn:fixops:builder:test", + source_uri="https://example.com/repo", + build_type="https://example.com/build", + ) + + +def test_verify_attestation_detects_digest_mismatch(sample_artifact: Path, tmp_path: Path) -> None: + attestation = generate_attestation( + sample_artifact, + builder_id="urn:fixops:builder:test", + source_uri="https://example.com/repo", + build_type="https://example.com/build", + ).to_dict() + attestation["subject"][0]["digest"]["sha256"] = "00" + destination = tmp_path / "attestation.json" + destination.write_text(json.dumps(attestation), encoding="utf-8") + + with pytest.raises(ProvenanceVerificationError): + verify_attestation( + destination, + artefact_path=sample_artifact, + builder_id="urn:fixops:builder:test", + ) + + +def test_cli_attest_and_verify(tmp_path: Path) -> None: + artifact = tmp_path / "artifact.bin" + artifact.write_bytes(b"data") + attestation_path = tmp_path / "attestation.json" + + exit_code = provenance_main( + [ + "attest", + "--artifact", + str(artifact), + "--out", + str(attestation_path), + "--builder-id", + "urn:fixops:builder:test", + "--source-uri", + "https://example.com/repo", + ] + ) + assert exit_code == 0 + assert attestation_path.is_file() + + exit_code = provenance_main( + [ + "verify", + "--artifact", + str(artifact), + "--attestation", + str(attestation_path), + "--builder-id", + "urn:fixops:builder:test", + "--source-uri", + "https://example.com/repo", + ] + ) + assert exit_code == 0 diff --git a/services/repro/__init__.py b/services/repro/__init__.py new file mode 100644 index 000000000..6db8c32bf --- /dev/null +++ b/services/repro/__init__.py @@ -0,0 +1,15 @@ +"""Reproducible build verification utilities.""" + +from .verifier import ( + VerificationResult, + load_plan, + run_verification, + verify_plan, +) + +__all__ = [ + "VerificationResult", + "load_plan", + "verify_plan", + "run_verification", +] diff --git a/services/repro/tests/test_verifier.py b/services/repro/tests/test_verifier.py new file mode 100644 index 000000000..afddbaac7 --- /dev/null +++ b/services/repro/tests/test_verifier.py @@ -0,0 +1,71 @@ +import json +from pathlib import Path + +import yaml + +from services.repro.verifier import run_verification + + +def _write_plan(path: Path, payload: dict) -> Path: + path.parent.mkdir(parents=True, exist_ok=True) + path.write_text(yaml.safe_dump(payload), encoding="utf-8") + return path + + +def test_run_verification_match(tmp_path: Path) -> None: + repo = tmp_path / "repo" + repo.mkdir() + + reference = repo / "artifacts" / "reference.txt" + reference.parent.mkdir(parents=True, exist_ok=True) + reference.write_text("hello world", encoding="utf-8") + + plan_data = { + "artifact": "dist/output.txt", + "steps": [ + { + "run": [ + "python", + "-c", + "from pathlib import Path; Path('dist').mkdir(exist_ok=True); Path('dist/output.txt').write_text('hello world', encoding='utf-8')", + ] + } + ], + "reference_artifact": str(reference.relative_to(repo)), + } + plan_path = _write_plan(repo / "build" / "plan.yaml", plan_data) + + result = run_verification(plan_path, "v1.0.0", output_dir=repo / "artifacts" / "repro", repo_root=repo) + assert result.match is True + assert result.attestation_path is not None + payload = json.loads(Path(result.attestation_path).read_text(encoding="utf-8")) + assert payload["match"] is True + assert payload["reference_digest"]["sha256"] == payload["generated_digest"]["sha256"] + + +def test_verify_plan_mismatch(tmp_path: Path) -> None: + repo = tmp_path / "repo" + repo.mkdir() + + reference = repo / "artifacts" / "reference.txt" + reference.parent.mkdir(parents=True, exist_ok=True) + reference.write_text("hello world", encoding="utf-8") + + plan_data = { + "artifact": "dist/output.txt", + "steps": [ + { + "run": [ + "python", + "-c", + "from pathlib import Path; Path('dist').mkdir(exist_ok=True); Path('dist/output.txt').write_text('different', encoding='utf-8')", + ] + } + ], + "reference_artifact": str(reference.relative_to(repo)), + } + plan_path = _write_plan(repo / "plan.yaml", plan_data) + plan = run_verification(plan_path, "v1.0.0", output_dir=repo / "artifacts" / "repro", repo_root=repo) + assert plan.match is False + assert plan.reference_digest is not None + assert plan.generated_digest["sha256"] != plan.reference_digest["sha256"] diff --git a/services/repro/verifier.py b/services/repro/verifier.py new file mode 100644 index 000000000..3f11de362 --- /dev/null +++ b/services/repro/verifier.py @@ -0,0 +1,263 @@ +"""Reproducible build verification helpers.""" + +from __future__ import annotations + +import json +import os +import shutil +import subprocess +import tempfile +from dataclasses import dataclass +from datetime import datetime, timezone +from hashlib import sha256 +from pathlib import Path +from typing import Any, Iterable, Mapping + +import yaml + +from services.provenance.attestation import load_attestation +from telemetry import get_meter, get_tracer + + +@dataclass(slots=True) +class VerificationResult: + """Result of executing a reproducible build plan.""" + + tag: str + plan: str + artifact: str + artifact_path: str + generated_digest: dict[str, str] + reference_digest: dict[str, str] | None + match: bool + verified_at: str + reference_source: str | None = None + attestation_path: str | None = None + + def to_dict(self) -> dict[str, Any]: + return { + "tag": self.tag, + "plan": self.plan, + "artifact": self.artifact, + "artifact_path": self.artifact_path, + "generated_digest": self.generated_digest, + "reference_digest": self.reference_digest, + "match": self.match, + "verified_at": self.verified_at, + "reference_source": self.reference_source, + "attestation_path": self.attestation_path, + } + + +_TRACER = get_tracer("fixops.repro") +_METER = get_meter("fixops.repro") +_REPRO_COUNTER = _METER.create_counter( + "fixops_repro_verifications", + description="Number of reproducibility checks executed", +) + + +def _normalise_digest(value: str | None) -> str | None: + if not value: + return None + value = value.strip() + if ":" in value: + _, digest = value.split(":", 1) + return digest.strip() + return value + + +def _substitute_tag(payload: Any, tag: str) -> Any: + if isinstance(payload, str): + return payload.replace("{tag}", tag) + if isinstance(payload, Mapping): + return {key: _substitute_tag(value, tag) for key, value in payload.items()} + if isinstance(payload, list): + return [_substitute_tag(item, tag) for item in payload] + return payload + + +def load_plan(path: Path | str, *, tag: str | None = None) -> dict[str, Any]: + """Load a YAML plan file and apply optional tag substitution.""" + + plan_path = Path(path) + with plan_path.open("r", encoding="utf-8") as handle: + data = yaml.safe_load(handle) or {} + if not isinstance(data, dict): + raise ValueError("Plan must be a mapping") + if tag: + data = _substitute_tag(data, tag) + data.setdefault("tag", tag or data.get("tag") or "unknown") + data["__plan_path__"] = str(plan_path) + return data + + +def _copy_source(source: Path, destination: Path) -> None: + if source.is_dir(): + shutil.copytree(source, destination, dirs_exist_ok=True) + else: + destination.parent.mkdir(parents=True, exist_ok=True) + shutil.copy2(source, destination) + + +def _materialise_sources(sources: Iterable[Any], repo_root: Path, workspace: Path) -> None: + for entry in sources or []: + if isinstance(entry, str): + source_path = (repo_root / entry).resolve() + destination = workspace / Path(entry).name + elif isinstance(entry, Mapping): + path_value = entry.get("path") + if not isinstance(path_value, str): + continue + source_path = (repo_root / path_value).resolve() + destination_name = entry.get("destination") or Path(path_value).name + destination = workspace / destination_name + else: + continue + if not source_path.exists(): + raise FileNotFoundError(f"Source path '{source_path}' does not exist") + _copy_source(source_path, destination) + + +def _run_steps(steps: Iterable[Any], workspace: Path, env: Mapping[str, Any] | None) -> None: + if not steps: + raise ValueError("Plan must include at least one step") + base_env = {"PATH": os.environ.get("PATH", "")} + if env: + base_env.update({key: str(value) for key, value in env.items()}) + for step in steps: + command = None + shell = False + if isinstance(step, Mapping): + command = step.get("run") + shell = bool(step.get("shell")) + elif isinstance(step, (list, tuple)): + command = [str(part) for part in step] + elif isinstance(step, str): + command = step + shell = True + if command is None: + continue + if isinstance(command, str): + subprocess.run( + command, + cwd=workspace, + env=base_env, + shell=True, + check=True, + ) + else: + subprocess.run( + [str(part) for part in command], + cwd=workspace, + env=base_env, + check=True, + ) + + +def _compute_digest(path: Path) -> str: + digest = sha256() + with path.open("rb") as handle: + for chunk in iter(lambda: handle.read(8192), b""): + digest.update(chunk) + return digest.hexdigest() + + +def _resolve_reference(plan: Mapping[str, Any], repo_root: Path, artifact_name: str) -> tuple[str | None, str | None]: + expected = _normalise_digest(plan.get("expected_digest")) + if expected: + return expected, "expected_digest" + + attestation_path_value = plan.get("reference_attestation") or plan.get("attestation") + if isinstance(attestation_path_value, str): + attestation_path = (repo_root / attestation_path_value).resolve() + if not attestation_path.is_file(): + raise FileNotFoundError(f"Attestation '{attestation_path}' not found") + attestation = load_attestation(attestation_path) + for subject in attestation.subject: + if subject.name == artifact_name or subject.name == Path(artifact_name).name: + digest_value = subject.digest.get("sha256") + if digest_value: + return digest_value, f"attestation:{attestation_path_value}" + if attestation.subject: + digest_value = attestation.subject[0].digest.get("sha256") + if digest_value: + return digest_value, f"attestation:{attestation_path_value}" + + reference_artifact = plan.get("reference_artifact") or plan.get("artifact_reference") + if isinstance(reference_artifact, str): + reference_path = (repo_root / reference_artifact).resolve() + if not reference_path.is_file(): + raise FileNotFoundError(f"Reference artifact '{reference_path}' not found") + return _compute_digest(reference_path), f"artifact:{reference_artifact}" + + return None, None + + +def verify_plan(plan: Mapping[str, Any], *, repo_root: Path | str = Path(".")) -> VerificationResult: + """Execute *plan* in a temporary workspace and return the verification result.""" + + repo_path = Path(repo_root).resolve() + artifact_rel = plan.get("artifact") + if not isinstance(artifact_rel, str): + raise ValueError("Plan is missing 'artifact' entry") + tag = str(plan.get("tag") or "unknown") + steps = plan.get("steps") or plan.get("build_steps") + environment = plan.get("environment") + + with _TRACER.start_as_current_span("repro.verify_plan") as span: + span.set_attribute("fixops.repro.tag", tag) + with tempfile.TemporaryDirectory(prefix="fixops-repro-") as workspace_dir: + workspace = Path(workspace_dir) + _materialise_sources(plan.get("sources", []), repo_path, workspace) + _run_steps(steps, workspace, environment) + artefact_path = (workspace / artifact_rel).resolve() + if not artefact_path.is_file(): + raise FileNotFoundError(f"Expected artefact '{artifact_rel}' not produced") + generated_digest = _compute_digest(artefact_path) + expected_digest, reference_source = _resolve_reference(plan, repo_path, Path(artifact_rel).name) + result = VerificationResult( + tag=tag, + plan=str(plan.get("__plan_path__", "")), + artifact=artifact_rel, + artifact_path=str(artefact_path), + generated_digest={"sha256": generated_digest}, + reference_digest={"sha256": expected_digest} if expected_digest else None, + match=bool(expected_digest and generated_digest == expected_digest), + verified_at=datetime.now(timezone.utc).isoformat(), + reference_source=reference_source, + ) + span.set_attribute("fixops.repro.match", result.match) + if expected_digest: + span.set_attribute("fixops.repro.reference", reference_source or "unknown") + return result + + +def run_verification( + plan_path: Path | str, + tag: str, + *, + output_dir: Path | str, + repo_root: Path | str = Path("."), +) -> VerificationResult: + """Load *plan_path*, execute verification, and persist the attestation JSON.""" + + plan = load_plan(plan_path, tag=tag) + result = verify_plan(plan, repo_root=repo_root) + output_directory = Path(output_dir) + output_directory.mkdir(parents=True, exist_ok=True) + attestation_path = output_directory / f"{tag}.json" + with attestation_path.open("w", encoding="utf-8") as handle: + json.dump(result.to_dict(), handle, indent=2, sort_keys=True) + handle.write("\n") + result.attestation_path = str(attestation_path) + _REPRO_COUNTER.add(1, {"match": str(result.match).lower()}) + return result + + +__all__ = [ + "VerificationResult", + "load_plan", + "verify_plan", + "run_verification", +] diff --git a/telemetry/__init__.py b/telemetry/__init__.py new file mode 100644 index 000000000..674bf0f08 --- /dev/null +++ b/telemetry/__init__.py @@ -0,0 +1,76 @@ +"""OpenTelemetry helpers for FixOps.""" + +from __future__ import annotations + +import importlib.util +import os +from typing import Optional + +_NOOP = False +if importlib.util.find_spec("opentelemetry") and importlib.util.find_spec("opentelemetry.sdk"): + from opentelemetry import metrics, trace + from opentelemetry.exporter.otlp.proto.http.metric_exporter import OTLPMetricExporter + from opentelemetry.exporter.otlp.proto.http.trace_exporter import OTLPSpanExporter + from opentelemetry.sdk.metrics import MeterProvider + from opentelemetry.sdk.metrics.export import PeriodicExportingMetricReader + from opentelemetry.sdk.resources import Resource + from opentelemetry.sdk.trace import TracerProvider + from opentelemetry.sdk.trace.export import BatchSpanProcessor +else: # pragma: no cover - fallback for test environments without OpenTelemetry + from . import _noop as _otel + + metrics = _otel.metrics + trace = _otel.trace + OTLPMetricExporter = _otel.OTLPMetricExporter + OTLPSpanExporter = _otel.OTLPSpanExporter + MeterProvider = _otel.MeterProvider + PeriodicExportingMetricReader = _otel.PeriodicExportingMetricReader + Resource = _otel.Resource + TracerProvider = _otel.TracerProvider + BatchSpanProcessor = _otel.BatchSpanProcessor + _NOOP = True + +_CONFIGURED = False + + +def configure(service_name: str = "fixops-platform") -> None: + """Configure global tracer and meter providers if not already set.""" + + global _CONFIGURED + if _CONFIGURED or os.getenv("FIXOPS_DISABLE_TELEMETRY") == "1" or _NOOP: + return + + endpoint = os.getenv("OTEL_EXPORTER_OTLP_ENDPOINT", "http://collector:4318") + traces_endpoint = endpoint.rstrip("/") + if not traces_endpoint.endswith("v1/traces"): + traces_endpoint = f"{traces_endpoint}/v1/traces" + metrics_endpoint = endpoint.rstrip("/") + if not metrics_endpoint.endswith("v1/metrics"): + metrics_endpoint = f"{metrics_endpoint}/v1/metrics" + + resource = Resource.create({"service.name": service_name}) + + tracer_provider = TracerProvider(resource=resource) + span_exporter = OTLPSpanExporter(endpoint=traces_endpoint) + tracer_provider.add_span_processor(BatchSpanProcessor(span_exporter)) + trace.set_tracer_provider(tracer_provider) + + metric_exporter = OTLPMetricExporter(endpoint=metrics_endpoint) + reader = PeriodicExportingMetricReader(metric_exporter) + meter_provider = MeterProvider(resource=resource, metric_readers=[reader]) + metrics.set_meter_provider(meter_provider) + + _CONFIGURED = True + + +def get_tracer(name: Optional[str] = None): + configure() + return trace.get_tracer(name or "fixops") + + +def get_meter(name: Optional[str] = None): + configure() + return metrics.get_meter(name or "fixops") + + +__all__ = ["configure", "get_tracer", "get_meter"] diff --git a/telemetry/_noop.py b/telemetry/_noop.py new file mode 100644 index 000000000..5a06cce4a --- /dev/null +++ b/telemetry/_noop.py @@ -0,0 +1,115 @@ +"""No-op OpenTelemetry compatibility layer for tests/offline environments.""" + +from __future__ import annotations + +from contextlib import contextmanager +from dataclasses import dataclass + + +class _NoopSpan: + def set_attribute(self, *_: object, **__: object) -> None: # pragma: no cover - no-op + return None + + def end(self) -> None: # pragma: no cover - no-op + return None + + +@contextmanager +def _span_context() -> _NoopSpan: + span = _NoopSpan() + yield span + + +class _NoopTracer: + def start_as_current_span(self, *_: object, **__: object): # pragma: no cover - no-op + return _span_context() + + +class _NoopMeter: + class _Counter: + def add(self, *_: object, **__: object) -> None: # pragma: no cover - no-op + return None + + def create_counter(self, *_: object, **__: object) -> "_NoopMeter._Counter": # pragma: no cover + return self._Counter() + + +class _NoopMetrics: + def __init__(self) -> None: + self._meter = _NoopMeter() + + def get_meter(self, *_: object, **__: object) -> _NoopMeter: # pragma: no cover + return self._meter + + def set_meter_provider(self, *_: object, **__: object) -> None: # pragma: no cover + return None + + +class _NoopTrace: + def __init__(self) -> None: + self._tracer = _NoopTracer() + + def get_tracer(self, *_: object, **__: object) -> _NoopTracer: # pragma: no cover + return self._tracer + + def set_tracer_provider(self, *_: object, **__: object) -> None: # pragma: no cover + return None + + +metrics = _NoopMetrics() +trace = _NoopTrace() + + +@dataclass +class Resource: + attributes: dict[str, str] + + @classmethod + def create(cls, attributes: dict[str, str]) -> "Resource": # pragma: no cover + return cls(attributes=attributes) + + +class TracerProvider: # pragma: no cover - no-op + def __init__(self, *_, **__): + return None + + def add_span_processor(self, *_: object, **__: object) -> None: + return None + + +class BatchSpanProcessor: # pragma: no cover - no-op + def __init__(self, *_: object, **__: object) -> None: + return None + + +class OTLPSpanExporter: # pragma: no cover - no-op + def __init__(self, *_: object, **__: object) -> None: + return None + + +class OTLPMetricExporter: # pragma: no cover - no-op + def __init__(self, *_: object, **__: object) -> None: + return None + + +class PeriodicExportingMetricReader: # pragma: no cover - no-op + def __init__(self, *_: object, **__: object) -> None: + return None + + +class MeterProvider: # pragma: no cover - no-op + def __init__(self, *_, **__): + return None + + +__all__ = [ + "metrics", + "trace", + "Resource", + "TracerProvider", + "BatchSpanProcessor", + "OTLPSpanExporter", + "OTLPMetricExporter", + "PeriodicExportingMetricReader", + "MeterProvider", +] diff --git a/telemetry/fastapi_noop.py b/telemetry/fastapi_noop.py new file mode 100644 index 000000000..b883fb55a --- /dev/null +++ b/telemetry/fastapi_noop.py @@ -0,0 +1,9 @@ +"""No-op FastAPI instrumentor for environments without OpenTelemetry packages.""" + +from __future__ import annotations + + +class FastAPIInstrumentor: # pragma: no cover - simple shim + @staticmethod + def instrument_app(*_args, **_kwargs) -> None: + return None diff --git a/tests/test_evidence_bundle.py b/tests/test_evidence_bundle.py new file mode 100644 index 000000000..70adcbd39 --- /dev/null +++ b/tests/test_evidence_bundle.py @@ -0,0 +1,200 @@ +import json +from pathlib import Path +from zipfile import ZipFile + +import yaml + +from cli.fixops_ci import main as ci_main +from evidence.packager import ( + BundleInputs, + _collect_files, + create_bundle, + evaluate_policy, + load_policy, +) + + +def _write_json(path: Path, payload: dict) -> Path: + path.parent.mkdir(parents=True, exist_ok=True) + path.write_text(json.dumps(payload), encoding="utf-8") + return path + + +def test_create_bundle(tmp_path: Path) -> None: + tag = "v1.0.0" + normalized = _write_json( + tmp_path / "artifacts/sbom/normalized.json", {"components": []} + ) + quality_json = _write_json( + tmp_path / "analysis/sbom_quality_report.json", + {"metrics": {"coverage_percent": 95.0, "license_coverage_percent": 90.0}}, + ) + quality_html = tmp_path / "reports/sbom_quality_report.html" + quality_html.parent.mkdir(parents=True, exist_ok=True) + quality_html.write_text("quality", encoding="utf-8") + risk_report = _write_json( + tmp_path / "artifacts/risk.json", + {"summary": {"component_count": 2, "cve_count": 1, "max_risk_score": 60.0}}, + ) + provenance_dir = tmp_path / "artifacts/attestations" + provenance_dir.mkdir(parents=True, exist_ok=True) + (provenance_dir / "build.json").write_text("{}", encoding="utf-8") + repro_attestation = _write_json( + tmp_path / "artifacts/repro/attestations" / f"{tag}.json", + {"match": True}, + ) + policy_path = tmp_path / "config/policy.yml" + policy_path.parent.mkdir(parents=True, exist_ok=True) + policy_path.write_text( + yaml.safe_dump( + { + "risk": {"max_risk_score": {"warn_above": 80, "fail_above": 90}}, + "sbom_quality": { + "coverage_percent": {"warn_below": 80, "fail_below": 60}, + "license_coverage_percent": {"warn_below": 80, "fail_below": 60}, + }, + "repro": {"require_match": True}, + "provenance": {"require_attestations": True}, + } + ), + encoding="utf-8", + ) + + inputs = BundleInputs( + tag=tag, + normalized_sbom=normalized, + sbom_quality_json=quality_json, + sbom_quality_html=quality_html, + risk_report=risk_report, + provenance_dir=provenance_dir, + repro_attestation=repro_attestation, + policy_path=policy_path, + output_dir=tmp_path / "evidence", + ) + manifest = create_bundle(inputs) + bundle_path = Path(manifest["bundle_path"]) + assert bundle_path.is_file() + with ZipFile(bundle_path, "r") as archive: + names = archive.namelist() + assert "MANIFEST.yaml" in names + assert f"repro/{repro_attestation.name}" in names + assert manifest["evaluations"]["overall"] == "pass" + + +def test_fixops_ci_evidence_bundle(tmp_path: Path) -> None: + tag = "v2.0.0" + normalized = _write_json( + tmp_path / "artifacts/sbom/normalized.json", {"components": []} + ) + quality_json = _write_json( + tmp_path / "analysis/sbom_quality_report.json", + {"metrics": {"coverage_percent": 85.0, "license_coverage_percent": 80.0}}, + ) + quality_html = tmp_path / "reports/sbom_quality_report.html" + quality_html.parent.mkdir(parents=True, exist_ok=True) + quality_html.write_text("quality", encoding="utf-8") + risk_report = _write_json( + tmp_path / "artifacts/risk.json", + {"summary": {"component_count": 2, "cve_count": 1, "max_risk_score": 65.0}}, + ) + provenance_dir = tmp_path / "artifacts/attestations" + provenance_dir.mkdir(parents=True, exist_ok=True) + (provenance_dir / "build.json").write_text("{}", encoding="utf-8") + repro_dir = tmp_path / "artifacts/repro/attestations" + repro_attestation = _write_json(repro_dir / f"{tag}.json", {"match": True}) + policy_path = tmp_path / "config/policy.yml" + policy_path.parent.mkdir(parents=True, exist_ok=True) + policy_path.write_text( + yaml.safe_dump( + { + "risk": {"max_risk_score": {"warn_above": 80, "fail_above": 90}}, + "sbom_quality": { + "coverage_percent": {"warn_below": 70, "fail_below": 50}, + "license_coverage_percent": {"warn_below": 70, "fail_below": 50}, + }, + "repro": {"require_match": True}, + "provenance": {"require_attestations": True}, + } + ), + encoding="utf-8", + ) + + exit_code = ci_main( + [ + "evidence", + "bundle", + "--tag", + tag, + "--normalized", + str(normalized), + "--quality-json", + str(quality_json), + "--quality-html", + str(quality_html), + "--risk", + str(risk_report), + "--provenance-dir", + str(provenance_dir), + "--repro-dir", + str(repro_dir), + "--policy", + str(policy_path), + "--out", + str(tmp_path / "evidence"), + ] + ) + assert exit_code == 0 + bundle_path = tmp_path / "evidence/bundles" / f"{tag}.zip" + assert bundle_path.is_file() + manifest_path = tmp_path / "evidence/manifests" / f"{tag}.yaml" + assert manifest_path.is_file() + manifest = yaml.safe_load(manifest_path.read_text(encoding="utf-8")) + assert manifest["evaluations"]["overall"] == "pass" + + +def test_load_policy_merges_overrides(tmp_path: Path) -> None: + default_policy = load_policy(None) + assert default_policy["risk"]["max_risk_score"]["fail_above"] == 85.0 + + custom_policy = { + "risk": {"max_risk_score": {"fail_above": 75.0}}, + "provenance": {"require_attestations": False}, + } + policy_path = tmp_path / "policy.yml" + policy_path.write_text(yaml.safe_dump(custom_policy), encoding="utf-8") + + merged = load_policy(policy_path) + assert merged["risk"]["max_risk_score"]["fail_above"] == 75.0 + assert merged["provenance"]["require_attestations"] is False + + +def test_evaluate_policy_warn_and_fail() -> None: + policy = load_policy(None) + metrics = { + "sbom": {"coverage_percent": 55.0, "license_coverage_percent": 95.0}, + "risk": {"max_risk_score": 82.0}, + "repro": {"match": False}, + "provenance": {"count": 0}, + } + + evaluations = evaluate_policy(policy, metrics=metrics) + assert evaluations["overall"] == "fail" + assert evaluations["checks"]["sbom_coverage_percent"]["status"] == "fail" + assert evaluations["checks"]["risk_max_risk_score"]["status"] == "warn" + assert evaluations["checks"]["repro_match"]["status"] == "fail" + assert evaluations["checks"]["provenance_attestations"]["status"] == "fail" + + +def test_collect_files_handles_nested_directories(tmp_path: Path) -> None: + extras = tmp_path / "extras" + extras.mkdir() + nested = extras / "nested" + nested.mkdir() + file_a = extras / "a.txt" + file_a.write_text("a", encoding="utf-8") + file_b = nested / "b.txt" + file_b.write_text("b", encoding="utf-8") + + files = _collect_files([extras, tmp_path / "missing"]) + names = sorted(path.relative_to(extras).as_posix() for path in files) + assert names == ["a.txt", "nested/b.txt"] diff --git a/tests/test_graph_worker.py b/tests/test_graph_worker.py new file mode 100644 index 000000000..47cd134f1 --- /dev/null +++ b/tests/test_graph_worker.py @@ -0,0 +1,82 @@ +import importlib +from pathlib import Path +from types import SimpleNamespace + +import pytest + + +def test_optional_path(tmp_path): + from scripts import graph_worker + + existing = tmp_path / "existing.json" + existing.write_text("{}", encoding="utf-8") + + assert graph_worker._optional_path(str(existing)) == existing + assert graph_worker._optional_path(str(tmp_path / "missing.json")) is None + assert graph_worker._optional_path(None) is None + + +def test_graph_worker_main_single_cycle(monkeypatch, tmp_path): + monkeypatch.setenv("FIXOPS_REPO", str(tmp_path)) + attestations = tmp_path / "attest" + attestations.mkdir() + monkeypatch.setenv("FIXOPS_ATTESTATIONS", str(attestations)) + + sbom_path = tmp_path / "artifacts/sbom/normalized.json" + sbom_path.parent.mkdir(parents=True, exist_ok=True) + sbom_path.write_text("{}", encoding="utf-8") + monkeypatch.setenv("FIXOPS_NORMALIZED_SBOM", str(sbom_path)) + + risk_path = tmp_path / "artifacts/risk.json" + risk_path.parent.mkdir(parents=True, exist_ok=True) + risk_path.write_text("{}", encoding="utf-8") + monkeypatch.setenv("FIXOPS_RISK_REPORT", str(risk_path)) + + monkeypatch.setenv("FIXOPS_RELEASES", str(tmp_path / "analysis/releases.json")) + monkeypatch.setenv("FIXOPS_GRAPH_INTERVAL", "0") + + module = importlib.reload(importlib.import_module("scripts.graph_worker")) + + calls: list[str] = [] + + class DummyGraph: + def close(self) -> None: + calls.append("closed") + + def fake_build_graph_from_sources(sources): + calls.append("build") + assert isinstance(sources.repo_path, Path) + assert sources.attestation_dir == attestations.resolve() + return DummyGraph() + + class DummySpan: + def __enter__(self): + calls.append("enter") + return self + + def __exit__(self, exc_type, exc, tb): + calls.append("exit") + return False + + class DummyTracer: + def start_as_current_span(self, name): + calls.append(name) + return DummySpan() + + def raise_system_exit(*_args, **_kwargs): + raise SystemExit(0) + + monkeypatch.setattr( + module, "build_graph_from_sources", fake_build_graph_from_sources + ) + monkeypatch.setattr(module, "_TRACER", DummyTracer()) + monkeypatch.setattr(module, "time", SimpleNamespace(sleep=raise_system_exit)) + + with pytest.raises(SystemExit): + module.main() + + assert "build" in calls + assert "closed" in calls + assert "graph_worker.cycle" in calls + assert "enter" in calls + assert "exit" in calls diff --git a/tests/test_risk_scoring.py b/tests/test_risk_scoring.py new file mode 100644 index 000000000..4bd4987c9 --- /dev/null +++ b/tests/test_risk_scoring.py @@ -0,0 +1,160 @@ +import json +from pathlib import Path + +import pytest + +from cli import fixops_risk +from risk.feeds.epss import load_epss_scores +from risk.feeds.kev import load_kev_catalog +from risk.scoring import ( + _collect_exposure_flags, + _infer_version_lag_days, + _lag_factor, + compute_risk_profile, +) + + +@pytest.fixture() +def sample_feeds(tmp_path: Path) -> tuple[Path, Path]: + epss_path = tmp_path / "epss.csv" + epss_path.write_text( + "cve,epss\nCVE-2024-0001,0.8\nCVE-2024-0003,0.1\n", + encoding="utf-8", + ) + kev_path = tmp_path / "kev.json" + kev_payload = { + "data": { + "vulnerabilities": [ + {"cveID": "CVE-2024-0001"}, + {"cveID": "CVE-2024-9999"}, + ] + } + } + kev_path.write_text(json.dumps(kev_payload), encoding="utf-8") + return epss_path, kev_path + + +@pytest.fixture() +def sample_normalized(tmp_path: Path) -> Path: + normalized = { + "components": [ + { + "name": "pkgA", + "version": "1.0.0", + "purl": "pkg:pypi/pkgA@1.0.0", + "exposure_flags": ["internet"], + "vulnerabilities": [ + { + "cve": "CVE-2024-0001", + "version_lag_days": 90, + "exposure": "internet", + }, + { + "cve": "CVE-2024-0002", + "version_lag_days": 45, + "exposure_flags": ["partner"], + }, + ], + }, + { + "name": "pkgB", + "version": "2.0.0", + "purl": None, + "exposure": "internal", + "vulnerabilities": [ + { + "id": "CVE-2024-0003", + "version_lag_days": 30, + "exposure_flags": ["internal"], + } + ], + }, + ] + } + path = tmp_path / "normalized.json" + path.write_text(json.dumps(normalized), encoding="utf-8") + return path + + +def test_feed_loaders(sample_feeds: tuple[Path, Path]): + epss_path, kev_path = sample_feeds + scores = load_epss_scores(path=epss_path) + assert scores == {"CVE-2024-0001": 0.8, "CVE-2024-0003": 0.1} + + kev = load_kev_catalog(path=kev_path) + assert set(kev) == {"CVE-2024-0001", "CVE-2024-9999"} + + +def test_compute_risk_profile(sample_feeds: tuple[Path, Path], sample_normalized: Path): + epss_path, kev_path = sample_feeds + scores = load_epss_scores(path=epss_path) + kev = load_kev_catalog(path=kev_path) + + with sample_normalized.open("r", encoding="utf-8") as handle: + normalized = json.load(handle) + + report = compute_risk_profile(normalized, scores, kev) + assert report["summary"]["component_count"] == 2 + assert report["summary"]["cve_count"] == 3 + + component_index = {entry["slug"]: entry for entry in report["components"]} + pkga = component_index["pkg-pypi-pkga-1.0.0"] + assert pkga["component_risk"] == pytest.approx(80.0, rel=1e-2) + + cve_index = report["cves"] + assert cve_index["CVE-2024-0001"]["max_risk"] == pytest.approx(80.0, rel=1e-2) + assert sorted(cve_index["CVE-2024-0001"]["components"]) == ["pkg-pypi-pkga-1.0.0"] + + +def test_cli_score( + tmp_path: Path, sample_feeds: tuple[Path, Path], sample_normalized: Path +): + epss_path, kev_path = sample_feeds + output_path = tmp_path / "risk.json" + + exit_code = fixops_risk.main( + [ + "score", + "--sbom", + str(sample_normalized), + "--out", + str(output_path), + "--epss", + str(epss_path), + "--kev", + str(kev_path), + ] + ) + assert exit_code == 0 + assert output_path.is_file() + + report = json.loads(output_path.read_text(encoding="utf-8")) + assert report["summary"]["component_count"] == 2 + + +def test_collect_exposure_flags_normalises_variants(): + flags = _collect_exposure_flags( + "Internet Facing", ["partner"], {"tags": ["internal"]} + ) + assert flags == ["internal", "internet", "partner"] + + +def test_infer_version_lag_days_from_versions() -> None: + component = {"version": "1.0.0"} + vulnerability = {"cve": "CVE-2024-1111", "fix_version": "2.1.0"} + assert _infer_version_lag_days(component, vulnerability) > 0 + + +def test_infer_version_lag_days_from_dates() -> None: + component = {"last_observed": "2024-01-01T00:00:00Z"} + vulnerability = { + "cve": "CVE-2024-2222", + "fixed_release_date": "2024-02-15T00:00:00Z", + } + assert _infer_version_lag_days(component, vulnerability) == 45.0 + + +def test_lag_factor_caps_and_scales() -> None: + assert _lag_factor(0.0) == 0.0 + assert _lag_factor(90.0) == pytest.approx(0.5) + assert _lag_factor(720.0) == 1.0 diff --git a/tests/test_sbom_quality.py b/tests/test_sbom_quality.py new file mode 100644 index 000000000..52c736428 --- /dev/null +++ b/tests/test_sbom_quality.py @@ -0,0 +1,182 @@ +import json +from pathlib import Path + +from lib4sbom.normalizer import ( + build_and_write_quality_outputs, + build_quality_report, + normalize_sboms, + render_html_report, + write_normalized_sbom, +) + + +def _write_sbom(tmp_path: Path, name: str, document: dict) -> Path: + path = tmp_path / name + path.write_text(json.dumps(document), encoding="utf-8") + return path + + +def _sample_sboms(tmp_path: Path) -> list[Path]: + syft_doc = { + "bomFormat": "CycloneDX", + "metadata": { + "tools": { + "components": [ + {"vendor": "Anchore", "name": "Syft", "version": "1.0.0"} + ] + } + }, + "components": [ + { + "name": "pkgA", + "version": "1.0.0", + "purl": "pkg:pypi/pkgA@1.0.0", + "hashes": [{"alg": "SHA256", "content": "a" * 64}], + "licenses": [{"license": {"name": "MIT"}}], + }, + { + "name": "pkgB", + "version": "2.0.0", + "purl": "pkg:pypi/pkgB@2.0.0", + "hashes": [{"alg": "SHA1", "content": "b" * 40}], + }, + ], + } + + trivy_doc = { + "bomFormat": "CycloneDX", + "metadata": { + "tools": { + "components": [ + { + "vendor": "Aqua Security", + "name": "Trivy", + "version": "0.43.1", + } + ] + } + }, + "components": [ + { + "name": "pkgA", + "version": "1.0.0", + "purl": "pkg:pypi/pkgA@1.0.0", + "licenses": [{"expression": "Apache-2.0"}], + }, + { + "name": "pkgC", + "version": "3.1.4", + "hashes": [{"alg": "SHA256", "content": "c" * 64}], + }, + ], + } + + osv_doc = { + "spdxVersion": "SPDX-2.3", + "creationInfo": {"creators": ["Tool: osv-scanner@1.2.3"]}, + "packages": [ + { + "name": "pkgB", + "versionInfo": "2.0.0", + "purl": "pkg:pypi/pkgB@2.0.0", + "licenseConcluded": "GPL-2.0-only", + "checksums": [{"algorithm": "SHA256", "checksumValue": "b" * 64}], + }, + { + "name": "pkgD", + "versionInfo": "4.5.6", + "externalRefs": [ + { + "referenceType": "purl", + "referenceLocator": "pkg:deb/debian/pkgd@4.5.6", + } + ], + "licenseDeclared": "Apache-2.0", + }, + ], + } + + return [ + _write_sbom(tmp_path, "syft.json", syft_doc), + _write_sbom(tmp_path, "trivy.json", trivy_doc), + _write_sbom(tmp_path, "osv.json", osv_doc), + ] + + +def test_normalize_sboms_merges_components(tmp_path): + paths = _sample_sboms(tmp_path) + + normalized = normalize_sboms(paths) + + metadata = normalized["metadata"] + assert metadata["total_components"] == 6 + assert metadata["unique_components"] == 4 + assert metadata["generator_count"] == 3 + + components = normalized["components"] + packages = {component["purl"]: component for component in components} + assert "pkg:pypi/pkgA@1.0.0" in packages + assert "pkg:pypi/pkgB@2.0.0" in packages + + pkg_a = packages["pkg:pypi/pkgA@1.0.0"] + assert sorted(pkg_a["licenses"]) == ["Apache-2.0", "MIT"] + assert any("Syft" in generator for generator in pkg_a["generators"]) + assert any("Trivy" in generator for generator in pkg_a["generators"]) + + pkg_c = next(component for component in components if component["name"] == "pkgC") + assert pkg_c["purl"] is None + assert "SHA256" in pkg_c["hashes"] + + +def test_quality_report_metrics(tmp_path): + normalized = normalize_sboms(_sample_sboms(tmp_path)) + + report = build_quality_report(normalized) + + metrics = report["metrics"] + assert metrics["coverage_percent"] == 66.67 + assert metrics["license_coverage_percent"] == 75.0 + assert metrics["resolvability_percent"] == 100.0 + assert metrics["generator_variance_score"] == 1.0 + + +def test_render_html_report(tmp_path): + normalized = normalize_sboms(_sample_sboms(tmp_path)) + report = build_quality_report(normalized) + + destination = tmp_path / "report.html" + render_html_report(report, destination) + + html = destination.read_text(encoding="utf-8") + assert "SBOM Quality Report" in html + assert "66.67%" in html + assert "Generator Variance" in html + + +def test_write_normalized_sbom(tmp_path: Path) -> None: + paths = _sample_sboms(tmp_path) + destination = tmp_path / "artifacts/sbom/normalized.json" + normalized = write_normalized_sbom(paths, destination) + assert destination.is_file() + + persisted = json.loads(destination.read_text(encoding="utf-8")) + assert ( + persisted["metadata"]["unique_components"] + == normalized["metadata"]["unique_components"] + ) + + +def test_build_and_write_quality_outputs(tmp_path: Path) -> None: + normalized = normalize_sboms(_sample_sboms(tmp_path)) + json_destination = tmp_path / "analysis/report.json" + html_destination = tmp_path / "reports/report.html" + + report = build_and_write_quality_outputs( + normalized, json_destination, html_destination + ) + assert json_destination.is_file() + assert html_destination.is_file() + + html = html_destination.read_text(encoding="utf-8") + assert report["metrics"]["coverage_percent"] > 0 + assert "SBOM Quality Report" in html diff --git a/tests/test_telemetry_runtime.py b/tests/test_telemetry_runtime.py new file mode 100644 index 000000000..68d1c1076 --- /dev/null +++ b/tests/test_telemetry_runtime.py @@ -0,0 +1,37 @@ +import importlib + + +def test_telemetry_disable_env(monkeypatch): + monkeypatch.setenv("FIXOPS_DISABLE_TELEMETRY", "1") + import telemetry # noqa: F401 + + module = importlib.reload(telemetry) + module.configure() + tracer = module.get_tracer("tests.telemetry") + with tracer.start_as_current_span("noop"): + pass + meter = module.get_meter("tests.telemetry") + counter = meter.create_counter("fixops.tests.telemetry") + counter.add(1) + + +def test_telemetry_reconfigure_noop(monkeypatch): + monkeypatch.delenv("FIXOPS_DISABLE_TELEMETRY", raising=False) + import telemetry + + module = importlib.reload(telemetry) + + # First configure call should succeed even without OTEL packages (falls back to no-op) + module.configure() + # Second call should be a no-op and not raise + module.configure() + + tracer = module.get_tracer() + span_cm = tracer.start_as_current_span("second-call") + assert hasattr(span_cm, "__enter__") + with span_cm: + pass + + meter = module.get_meter() + counter = meter.create_counter("telemetry.reconfigure") + counter.add(0) diff --git a/ui/dashboard/index.html b/ui/dashboard/index.html new file mode 100644 index 000000000..5ce61d5b2 --- /dev/null +++ b/ui/dashboard/index.html @@ -0,0 +1,59 @@ + + + + + FixOps Demo Dashboard + + + +
+

FixOps Secure Supply Chain Demo

+

Real-time view of SBOM quality, risk posture, provenance, and reproducibility status.

+
+
+
+
+

SBOM Quality

+
+ 0% +
+
    +
    +
    +

    Risk Overview

    +

    Components: 0

    +

    CVEs: 0

    +

    Highest Risk: 0

    +
    +
    +

    Reproducibility

    +
      +
      +
      +
      +
      +

      KEV Components (Recent Releases)

      + + + + + +
      ReleaseComponentCVEs
      +
      +
      +

      Evidence Bundles

      + + + + + +
      TagBundleStatus
      +
      +
      +
      +
      +

      Telemetry exported via OpenTelemetry OTLP → collector (see docker-compose.demo.yml).

      +
      + + + diff --git a/ui/dashboard/script.js b/ui/dashboard/script.js new file mode 100644 index 000000000..7a464915d --- /dev/null +++ b/ui/dashboard/script.js @@ -0,0 +1,92 @@ +const API_BASE = window.API_BASE || "http://localhost:8000"; + +async function fetchJSON(path) { + const response = await fetch(`${API_BASE}${path}`); + if (!response.ok) { + throw new Error(`Request failed: ${response.status}`); + } + return response.json(); +} + +function updateGauge(value) { + const gauge = document.getElementById("sbom-gauge"); + const label = document.getElementById("sbom-gauge-value"); + const pct = Math.round(value || 0); + gauge.style.setProperty("--value", pct); + label.textContent = `${pct}%`; +} + +function renderSBOMMetrics(metrics) { + const list = document.getElementById("sbom-metrics"); + list.innerHTML = ""; + Object.entries(metrics || {}).forEach(([key, value]) => { + const item = document.createElement("li"); + item.textContent = `${key.replace(/_/g, " ")}: ${value}`; + list.appendChild(item); + }); +} + +function renderReproStatus(evaluations) { + const list = document.getElementById("repro-status"); + list.innerHTML = ""; + const repro = evaluations?.checks?.repro_match; + if (!repro) { + list.innerHTML = "
    • No reproducibility data
    • "; + return; + } + const item = document.createElement("li"); + item.innerHTML = `${repro.status.toUpperCase()} reproducible: ${repro.value}`; + list.appendChild(item); +} + +function renderKevTable(entries) { + const tbody = document.querySelector("#kev-table tbody"); + tbody.innerHTML = ""; + entries.forEach((release) => { + release.components.forEach((component) => { + const row = document.createElement("tr"); + row.innerHTML = `${release.release}${component.component}${component.cves.join(", ")}`; + tbody.appendChild(row); + }); + }); +} + +function renderEvidenceTable(releases) { + const tbody = document.querySelector("#evidence-table tbody"); + tbody.innerHTML = ""; + releases.forEach((release) => { + const row = document.createElement("tr"); + const status = release.bundle_available ? "available" : "pending"; + row.innerHTML = `${release.tag}${release.bundle_available ? "✅" : "⏳"}${status}`; + tbody.appendChild(row); + }); +} + +async function loadDashboard() { + try { + const evidence = await fetchJSON("/evidence/"); + renderEvidenceTable(evidence.releases || []); + if (evidence.releases?.length) { + const manifest = await fetchJSON(`/evidence/${evidence.releases[0].tag}`); + const metrics = manifest.manifest?.metrics || {}; + updateGauge(metrics.sbom?.coverage_percent || 0); + renderSBOMMetrics(metrics.sbom || {}); + const risk = metrics.risk || {}; + document.getElementById("risk-components").textContent = risk.component_count ?? 0; + document.getElementById("risk-cves").textContent = risk.cve_count ?? 0; + document.getElementById("risk-max").textContent = risk.max_risk_score ?? 0; + renderReproStatus(manifest.manifest?.evaluations); + } + } catch (error) { + console.warn("Unable to load evidence data", error); + } + + try { + const kev = await fetchJSON("/graph/kev-components?last=3"); + renderKevTable(kev || []); + } catch (error) { + console.warn("Unable to load graph data", error); + } +} + +document.addEventListener("DOMContentLoaded", loadDashboard); diff --git a/ui/dashboard/styles.css b/ui/dashboard/styles.css new file mode 100644 index 000000000..fdbfd2516 --- /dev/null +++ b/ui/dashboard/styles.css @@ -0,0 +1,103 @@ +:root { + --bg: #0f172a; + --card-bg: rgba(15, 23, 42, 0.85); + --accent: #38bdf8; + --text: #f8fafc; + --muted: #cbd5f5; + font-family: 'Segoe UI', sans-serif; +} + +body { + margin: 0; + background: radial-gradient(circle at top, #1e293b, #0f172a); + color: var(--text); + min-height: 100vh; + display: flex; + flex-direction: column; +} + +header, footer { + text-align: center; + padding: 1.5rem; +} + +main { + flex: 1; + padding: 1.5rem; + display: flex; + flex-direction: column; + gap: 1.5rem; +} + +.cards { + display: grid; + grid-template-columns: repeat(auto-fit, minmax(240px, 1fr)); + gap: 1.5rem; +} + +.tables { + display: grid; + grid-template-columns: repeat(auto-fit, minmax(320px, 1fr)); + gap: 1.5rem; +} + +.card { + background: var(--card-bg); + border-radius: 16px; + padding: 1.5rem; + box-shadow: 0 18px 35px rgba(15, 23, 42, 0.5); + backdrop-filter: blur(12px); +} + +.gauge { + position: relative; + width: 160px; + height: 160px; + border-radius: 50%; + margin: 1rem auto; + background: conic-gradient(var(--accent) calc(var(--value) * 1%), rgba(148, 163, 184, 0.2) 0); + display: flex; + align-items: center; + justify-content: center; + transition: background 0.6s ease; +} + +.gauge span { + font-size: 2rem; + font-weight: bold; +} + +.card ul { + list-style: none; + padding: 0; + margin: 0; +} + +.card li { + margin-bottom: 0.5rem; + color: var(--muted); +} + +.table table { + width: 100%; + border-collapse: collapse; +} + +.table th, .table td { + padding: 0.6rem; + text-align: left; + border-bottom: 1px solid rgba(148, 163, 184, 0.2); +} + +.table th { + color: var(--accent); +} + +.status-pill { + display: inline-block; + padding: 0.2rem 0.6rem; + border-radius: 999px; + background: rgba(56, 189, 248, 0.2); + color: var(--accent); + font-size: 0.85rem; +} From cc861144add82bf02513a8b897a8be4e5f04241d Mon Sep 17 00:00:00 2001 From: DevOpsMadDog Date: Sun, 12 Oct 2025 01:40:53 +1100 Subject: [PATCH 2/5] docs: add usage index to readme_updated --- readme_updated.md | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/readme_updated.md b/readme_updated.md index f2e7d9e07..f8a697608 100644 --- a/readme_updated.md +++ b/readme_updated.md @@ -3,6 +3,7 @@ > Use this as the authoritative guide for onboarding, architecture reviews, compliance audits, and CI/CD dry runs. It extends the root README with diagrams, capability maps, and end-to-end validation recipes. ## Table of contents +- [Feature index & usage map](#feature-index--usage-map) - [Executive summary](#executive-summary) - [Orientation](#orientation) - [Repository topology](#repository-topology) @@ -26,6 +27,26 @@ - [Setup & verification checklists](#setup--verification-checklists) - [Reference documents](#reference-documents) +## Feature index & usage map +The table below links every FixOps capability to its primary use cases, when teams should prefer the CLI versus the API, and +where to find the backing implementation and documentation. Use it as a quick index before diving deeper into individual +sections. + +| Capability | Primary use cases | Preferred CLI commands | Relevant API endpoints | When to choose CLI vs API | Core modules & docs | +| --- | --- | --- | --- | --- | --- | +| SBOM normalisation & quality | Merge SBOMs from Syft/Trivy, produce deterministic quality metrics for audits. | `fixops-sbom normalize --in …`
      `fixops-sbom quality --in …` | `GET /api/v1/sbom/quality` *(if published via backend)* | Use CLI for local/air-gapped triage and CI steps; expose API when dashboards or downstream services need JSON outputs. | `lib4sbom/normalizer.py`, `docs/SBOM-QUALITY.md` | +| Risk scoring (FixOpsRisk) | Prioritise remediation via EPSS/KEV, quantify release posture. | `fixops-risk score --sbom …` | `GET /api/v1/risk/component/{id}`
      `GET /api/v1/risk/cve/{id}` | CLI is ideal for pipeline gating and artefact generation; API powers portals and integrations that need on-demand lookups. | `risk/feeds/*`, `risk/scoring.py`, `docs/RISK-SCORING.md` | +| Provenance attestations & signing | Generate SLSA v1 provenance, verify releases before consumption. | `fixops-provenance attest --artifact …`
      `fixops-provenance verify --artifact …` | `GET /api/v1/provenance/{artifact}` | Run CLI inside CI or local validation loops; surface API to external auditors who only need attestation JSON. | `services/provenance/attestation.py`, `docs/PROVENANCE.md`, `docs/SIGNING.md` | +| Provenance graph analytics | Trace lineage, find KEV regressions, detect downgrades across releases. | `fixops-ci graph lineage --artifact …` *(via CI agent)* | `GET /api/v1/graph/lineage`
      `GET /api/v1/graph/kev`
      `GET /api/v1/graph/anomalies` | Use API for interactive queries and UI dashboards; invoke CLI during automated compliance checks or scheduled reports. | `services/graph/graph.py`, `backend/api/graph/router.py`, `docs/PROVENANCE-GRAPH.md` | +| Reproducible build verification | Rebuild tagged releases and compare digests to prove reproducibility. | `fixops-repro verify --tag … --plan build/plan.yaml` | `GET /api/v1/repro/{tag}` *(if enabled)* | CLI runs hermetic jobs on builders; API publishes attestation status to stakeholders. | `services/repro/verifier.py`, `docs/REPRO-BUILDS.md` | +| Evidence bundles & policy agent | Package SBOM, risk, provenance, and repro artefacts into signed bundles. | `fixops-ci evidence bundle --release …` | `GET /api/v1/evidence/{release}` | Prefer CLI to generate bundles inside release pipelines; API exposes ready-made bundles to auditors. | `evidence/packager.py`, `cli/fixops_ci.py`, `docs/EVIDENCE-BUNDLES.md` | +| Observability & dashboards | Demo FixOps posture with OTEL traces and dashboard visualisations. | `docker compose -f docker-compose.demo.yml up` | `GET /api/v1/metrics/*` *(exported via collector)* | CLI/docker commands spin up demo stacks; APIs feed dashboards/collectors. | `telemetry/`, `ui/dashboard/`, `docs/DEMO.md` | +| Probabilistic forecasting, Markov & Bayesian analytics | Model remediation timelines, predict drift, and justify roadmap commitments. | `fixops-ci analytics forecast --plan …` *(where configured)* | `GET /api/v1/analytics/forecast` | CLI enables batched planning; API supports UI overlays and stakeholder queries. | `core/`, `simulations/`, `docs/ARCHITECTURE.md` | +| Multi-LLM consensus | Cross-validate vulnerability triage or policy decisions via multiple LLMs. | `fixops-ci ai review --input …` | `POST /api/v1/ai/consensus` | CLI gives offline reviewers reproducible prompts; API integrates with ticketing/chatops. | `fixops-enterprise/`, `docs/ARCHITECTURE.md` | + +**Choosing between surfaces:** Pick the CLI when you need deterministic artefacts inside CI/CD, when working air-gapped, or when +running scheduled governance jobs. Use the API when interactive tooling, dashboards, or external auditors require live data. + ## Executive summary - **Who it serves**: Release managers, compliance teams, SOC analysts, and platform engineers needing verifiable supply-chain provenance. - **What you can do quickly**: Normalise SBOMs, score risk, issue SLSA attestations, verify reproducible builds, assemble signed evidence bundles, and visualise posture via dashboards. From ae5bccb50f8fb6377524c648a59c6b514d905d31 Mon Sep 17 00:00:00 2001 From: DevOpsMadDog Date: Sun, 12 Oct 2025 08:52:25 +1100 Subject: [PATCH 3/5] docs: detail ssdlc cli lifecycle stages --- readme_updated.md | 52 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 52 insertions(+) diff --git a/readme_updated.md b/readme_updated.md index f8a697608..2c226ea98 100644 --- a/readme_updated.md +++ b/readme_updated.md @@ -17,6 +17,7 @@ - [Provenance graph service](#provenance-graph-service) - [Reproducible build verification](#reproducible-build-verification) - [Evidence bundles & CI agent](#evidence-bundles--ci-agent) + - [SSDLC lifecycle evaluation](#ssdlc-lifecycle-evaluation) - [Probabilistic forecasting, Markov & Bayesian analytics](#probabilistic-forecasting-markov--bayesian-analytics) - [Multi-LLM consensus](#multi-llm-consensus) - [Observability & demo stack](#observability--demo-stack) @@ -408,6 +409,57 @@ flowchart TD | `artifacts/sbom/normalized.json` + `artifacts/risk.json` | Aggregated by `fixops-ci` | `evidence/workdir//` staging area | | Staged artefacts + cosign key | Manifest hashed & signed | `evidence/bundles/.zip` | +### SSDLC lifecycle evaluation +#### 1. Why it exists +Demonstrate secure software development lifecycle coverage with deterministic artefacts that auditors can diff across releases. The SSDLC evaluator transforms raw design, control, SBOM, SARIF, infrastructure-as-code, and exploit-signal inputs into stage-specific JSON payloads for downstream dashboards and the enterprise API. + +#### 2. Primary implementation +- `core/ssdlc.py` parses overlay-defined lifecycle requirements and evaluates them against pipeline artefacts, returning per-stage status summaries.【F:core/ssdlc.py†L1-L170】 +- `apps/api/pipeline.py` invokes the evaluator when the overlay enables SSDLC checks, wiring results into the broader decision pipeline response.【F:apps/api/pipeline.py†L835-L863】 +- `simulations/ssdlc/run.py` provides the CLI used during demos and tests to materialise canonical stage outputs with optional overlays.【F:simulations/ssdlc/run.py†L1-L239】 +- `tests/test_ssdlc_runner.py` ensures every stage command emits the correct filenames and supports `--stage all` generation.【F:tests/test_ssdlc_runner.py†L35-L59】 + +#### 3. Data flow +```mermaid +flowchart TD + subgraph Inputs + D[design/inputs/design_context.csv] + R[requirements/inputs/controls.json] + B[build/inputs/sbom.json] + T[test/inputs/scanner.sarif] + De[deploy/inputs/iac.tfplan.json] + O1[operate/inputs/kev.json] + O2[operate/inputs/epss.json] + end + CLI[python -m simulations.ssdlc.run] + Eval[core/ssdlc.SSDLCEvaluator] + Outputs{{Stage JSON artefacts}} + Inputs --> CLI --> Eval --> Outputs + Outputs --> API[apps/api/pipeline.py] +``` + +#### 4. Usage & stage-by-stage inputs/outputs +- **CLI skeleton** + ```bash + python -m simulations.ssdlc.run --stage --out artifacts/ssdlc + python -m simulations.ssdlc.run --stage all --out artifacts/ssdlc --overlay config/ssdlc.overlay.json + ``` +- **Stage matrix** + +| Stage (`--stage …`) | Required inputs | Generated output | What the payload captures | +| --- | --- | --- | --- | +| `design` | `simulations/ssdlc/design/inputs/design_context.csv` | `design_crosswalk.json` | Service exposure summary plus any overlay-injected risk annotations for architecture reviews.【F:simulations/ssdlc/run.py†L85-L101】 | +| `requirements` | `simulations/ssdlc/requirements/inputs/controls.json` | `policy_plan.json` | Normalised control map detailing satisfied vs. in-progress guardrails.【F:simulations/ssdlc/run.py†L104-L117】 | +| `build` | `simulations/ssdlc/build/inputs/sbom.json` | `component_index.json` | Component manifest aligned to SBOM entries for dependency governance.【F:simulations/ssdlc/run.py†L119-L141】 | +| `test` | `simulations/ssdlc/test/inputs/scanner.sarif` | `normalized_findings.json` | Tool list and severity histogram derived from SARIF scanners for QA sign-off.【F:simulations/ssdlc/run.py†L144-L174】 | +| `deploy` | `simulations/ssdlc/deploy/inputs/iac.tfplan.json` | `iac_posture.json` | Terraform change analysis showing exposed ports and internet-facing resources.【F:simulations/ssdlc/run.py†L177-L205】 | +| `operate` | `simulations/ssdlc/operate/inputs/kev.json`, `simulations/ssdlc/operate/inputs/epss.json` | `exploitability.json` | KEV/EPSS fusion that flags urgent operational response priorities.【F:simulations/ssdlc/run.py†L207-L231】 | + +#### 5. Sample artefacts & downstream usage +- `--stage all` prints a JSON map of every generated file, which can be zipped into evidence bundles or imported into dashboards.【F:simulations/ssdlc/run.py†L234-L239】【F:tests/test_ssdlc_runner.py†L45-L59】 +- The pipeline API surfaces `ssdlc_assessment.summary` counts so CI gates and overlays can assert minimum lifecycle coverage.【F:apps/api/pipeline.py†L846-L860】 +- Evidence bundles embed each stage artefact alongside provenance, risk, and repro proof, giving auditors lifecycle-to-release traceability.【F:evidence/packager.py†L180-L260】 + ### Probabilistic forecasting, Markov & Bayesian analytics #### 1. Why it exists Quantify future severity drift and escalation pressure with explainable probabilistic models for leadership planning. From c0d770c96f2d8d71dcc41ff8c71359e03f63bf57 Mon Sep 17 00:00:00 2001 From: DevOpsMadDog Date: Sun, 12 Oct 2025 08:52:33 +1100 Subject: [PATCH 4/5] docs: repair mermaid diagrams --- readme_updated.md | 154 +++++++++++++++++++++++----------------------- 1 file changed, 77 insertions(+), 77 deletions(-) diff --git a/readme_updated.md b/readme_updated.md index 2c226ea98..7a12de9c4 100644 --- a/readme_updated.md +++ b/readme_updated.md @@ -84,29 +84,30 @@ The table highlights the directories most practitioners touch during integration ```mermaid flowchart LR - subgraph Config[Overlay & Policy] - A1[config/\npolicy.yml] - A2[docs/ playbooks] + subgraph Config["Overlay & Policy"] + A1["config/policy.yml"] + A2["docs/playbooks"] end - subgraph Execution[Execution Surfaces] - B1[CLI & CI\ncli/*] - B2[FastAPI\napps/api/app.py] + subgraph Execution["Execution Surfaces"] + B1["CLI & CI
      cli/*"] + B2["FastAPI
      apps/api/app.py"] end - subgraph Engines[Decision Engines] - C1[Pipeline Orchestrator\napps/api/pipeline.py] - C2[Probabilistic + Markov + Bayesian\ncore/probabilistic.py] - C3[Multi-LLM Consensus\nfixops-enterprise/] + subgraph Engines["Decision Engines"] + C1["Pipeline Orchestrator
      apps/api/pipeline.py"] + C2["Probabilistic + Markov + Bayesian
      core/probabilistic.py"] + C3["Multi-LLM Consensus
      fixops-enterprise/"] end - subgraph Evidence[Evidence Services] - D1[Evidence Packager\nevidence/packager.py] - D2[Provenance Graph\nservices/graph/graph.py] - D3[Repro Verifier\nservices/repro/verifier.py] - D4[Provenance Attestations\nservices/provenance/attestation.py] + subgraph Evidence["Evidence Services"] + D1["Evidence Packager
      evidence/packager.py"] + D2["Provenance Graph
      services/graph/graph.py"] + D3["Repro Verifier
      services/repro/verifier.py"] + D4["Provenance Attestations
      services/provenance/attestation.py"] end - subgraph Observability[Observability] - E1[OpenTelemetry SDK\ntelemetry/] - E2[Dashboard\nui/dashboard] + subgraph Observability["Observability"] + E1["OpenTelemetry SDK
      telemetry/"] + E2["Dashboard
      ui/dashboard"] end + A1 --> B1 A1 --> B2 A2 --> B2 @@ -119,16 +120,15 @@ flowchart LR C1 --> D4 C2 --> C1 C3 --> C1 - D1 -->|bundles| Observability - D2 -->|lineage events| Observability + D1 -->|bundles| E2 + D2 -->|lineage events| E1 D3 -->|attestations| D1 D4 -->|SLSA statements| D1 - Observability --> E2 - D1 -->|signed MANIFEST| Evidence E1 --> B2 E1 --> D1 E1 --> D2 E1 --> D3 + E2 --> B2 ``` ## Capability matrix @@ -189,11 +189,11 @@ Harmonise heterogeneous SBOMs for deterministic downstream risk, evidence, and c #### 3. Data flow ```mermaid flowchart LR - In1[Raw SBOMs\nsyft.json\ntrivy.xml] -->|parse| P1[Normalizer\nlib4sbom/normalizer.py] - P1 -->|dedupe + metrics| P2[Normalized JSON\nartifacts/sbom/normalized.json] - P2 -->|render| P3[HTML Report\nreports/sbom_quality_report.html] - P2 -->|feed| Risk[risk/scoring.py] - P3 --> Evidence[evidence/packager.py] + In1["Raw SBOMs
      syft.json
      trivy.xml"] -->|parse| P1["Normalizer
      lib4sbom/normalizer.py"] + P1 -->|dedupe + metrics| P2["Normalized JSON
      artifacts/sbom/normalized.json"] + P2 -->|render| P3["HTML Report
      reports/sbom_quality_report.html"] + P2 -->|feed| Risk["risk/scoring.py"] + P3 --> Evidence["evidence/packager.py"] ``` #### 4. Usage & setup @@ -224,12 +224,12 @@ Prioritise remediation by blending exploit probability (EPSS), KEV status, versi #### 3. Data flow ```mermaid flowchart LR - SBOM[Normalized SBOM] -->|join| Joiner[risk/scoring.py] - EPSS[data/feeds/epss.json] --> Joiner - KEV[data/feeds/kev.json] --> Joiner - Joiner -->|FixOpsRisk| RiskOut[artifacts/risk.json] - RiskOut --> API[backend/api/risk] - RiskOut --> Evidence + SBOM["Normalized SBOM"] -->|join| Joiner["risk/scoring.py"] + EPSS["data/feeds/epss.json"] --> Joiner + KEV["data/feeds/kev.json"] --> Joiner + Joiner -->|FixOpsRisk| RiskOut["artifacts/risk.json"] + RiskOut --> API["backend/api/risk"] + RiskOut --> Evidence["evidence/packager.py"] ``` #### 4. Usage & setup @@ -259,12 +259,12 @@ Guarantee downstream consumers can verify the supply-chain lineage for every bui #### 3. Data flow ```mermaid flowchart LR - Artifact[Build artefact] -->|hash| Hasher[services/provenance/attestation.py] - Hasher --> SLSA[SLSA Statement] - SLSA -->|sign| Cosign[scripts/signing/sign-artifact.sh] - Cosign --> Bundle[.sig / bundle] - SLSA --> APIProv[backend/api/provenance] - Bundle --> Evidence + Artifact["Build artefact"] -->|hash| Hasher["services/provenance/attestation.py"] + Hasher --> SLSA["SLSA Statement"] + SLSA -->|sign| Cosign["scripts/signing/sign-artifact.sh"] + Cosign --> Bundle[".sig / bundle"] + SLSA --> APIProv["backend/api/provenance"] + Bundle --> Evidence["evidence/packager.py"] ``` #### 4. Usage & setup @@ -295,24 +295,24 @@ Provide a queryable knowledge graph linking commits, CI jobs, artefacts, SBOM co #### 3. Data flow ```mermaid flowchart TD - subgraph Data - SBOMNodes[Normalized SBOM] - RiskNodes[Risk JSON] - AttNodes[Attestations] - GitNodes[Git metadata] + subgraph Data["Ingested Data"] + SBOMNodes["Normalized SBOM"] + RiskNodes["Risk JSON"] + AttNodes["Attestations"] + GitNodes["Git metadata"] end - subgraph GraphEngine[services/graph] - Loader[GraphLoader] - Queries[Query API] + subgraph GraphEngine["services/graph"] + Loader["GraphLoader"] + Queries["Query API"] end SBOMNodes --> Loader RiskNodes --> Loader AttNodes --> Loader GitNodes --> Loader - Loader --> DB[(SQLite store)] + Loader --> DB[("SQLite store")] DB --> Queries - Queries --> API[backend/api/graph] - API --> Consumers[CLI / UI] + Queries --> API["backend/api/graph"] + API --> Consumers["CLI / UI"] ``` #### 4. Usage & setup @@ -342,12 +342,12 @@ Ensure published artefacts can be recreated byte-for-byte from source via hermet #### 3. Data flow ```mermaid flowchart LR - Plan[build/plan.yaml] --> Runner[services/repro/verifier.py] - Release[Release artefact] --> Runner - Runner -->|rebuild| Sandbox[(Temp build env)] - Sandbox -->|digest| Compare[Digest compare] - Compare --> Att[artifacts/repro/attestations/.json] - Att --> Evidence + Plan["build/plan.yaml"] --> Runner["services/repro/verifier.py"] + Release["Release artefact"] --> Runner + Runner -->|rebuild| Sandbox[("Temp build env")] + Sandbox -->|digest| Compare["Digest compare"] + Compare --> Att["artifacts/repro/attestations/.json"] + Att --> Evidence["evidence/packager.py"] ``` #### 4. Usage & setup @@ -377,19 +377,19 @@ Offer a single command to package SBOM, risk, provenance, and repro evidence, si #### 3. Data flow ```mermaid flowchart TD - subgraph Inputs - SBOMIn[artifacts/sbom/normalized.json] - RiskIn[artifacts/risk.json] - ProvIn[artifacts/attestations/*.json] - ReproIn[artifacts/repro/attestations/*.json] + subgraph Inputs["Evidence inputs"] + SBOMIn["artifacts/sbom/normalized.json"] + RiskIn["artifacts/risk.json"] + ProvIn["artifacts/attestations/*.json"] + ReproIn["artifacts/repro/attestations/*.json"] end - Policy[config/policy.yml] - CLI[cli/fixops-ci] + Policy["config/policy.yml"] + CLI["cli/fixops-ci"] CLI -->|policy evaluate| Policy Inputs --> CLI - CLI --> BundleZip[evidence/bundles/.zip] - BundleZip --> Manifest[Signed MANIFEST.yaml] - BundleZip --> APIEvidence[backend/api/evidence] + CLI --> BundleZip["evidence/bundles/.zip"] + BundleZip --> Manifest["Signed MANIFEST.yaml"] + BundleZip --> APIEvidence["backend/api/evidence"] ``` #### 4. Usage & setup @@ -471,10 +471,10 @@ Quantify future severity drift and escalation pressure with explainable probabil #### 3. Data flow ```mermaid flowchart LR - Hist[Historical severity timeline] --> Engine[core/probabilistic.py] - Engine -->|posterior| Forecast[Forecast JSON] - Forecast --> Pipeline - Pipeline --> Dashboard + Hist["Historical severity timeline"] --> Engine["core/probabilistic.py"] + Engine -->|posterior| Forecast["Forecast JSON"] + Forecast --> Pipeline["apps/api/pipeline.py"] + Pipeline --> Dashboard["ui/dashboard"] ``` #### 4. Usage & setup @@ -503,15 +503,15 @@ Blend deterministic heuristics with LLM reasoning while capturing audit trails f #### 3. Data flow ```mermaid flowchart TD - Prompt[Context-rich prompt] --> Fanout[EnhancedDecisionEngine] - Fanout -->|OpenAI| Provider1 - Fanout -->|Anthropic| Provider2 - Fanout -->|Gemini| Provider3 - Provider1 --> Reducer[Consensus reducer] + Prompt["Context-rich prompt"] --> Fanout["EnhancedDecisionEngine"] + Fanout -->|OpenAI| Provider1["Provider: OpenAI"] + Fanout -->|Anthropic| Provider2["Provider: Anthropic"] + Fanout -->|Gemini| Provider3["Provider: Gemini"] + Provider1 --> Reducer["Consensus reducer"] Provider2 --> Reducer Provider3 --> Reducer - Reducer --> Verdict[Decision + Rationale] - Verdict --> Pipeline + Reducer --> Verdict["Decision + Rationale"] + Verdict --> Pipeline["apps/api/pipeline.py"] ``` #### 4. Usage & setup From fd214b8106832096eafef1c9d65ae5238e75e141 Mon Sep 17 00:00:00 2001 From: DevOpsMadDog Date: Sun, 12 Oct 2025 09:05:01 +1100 Subject: [PATCH 5/5] docs: clarify ssdlc lifecycle coverage --- docs/DOCS_STATUS.md | 60 +++++++++++++++++++++++++++++++++++++++ docs/SDLC_SSDLC.md | 25 +++++++++++++++- docs/doc-archive/.gitkeep | 0 3 files changed, 84 insertions(+), 1 deletion(-) create mode 100644 docs/DOCS_STATUS.md create mode 100644 docs/doc-archive/.gitkeep diff --git a/docs/DOCS_STATUS.md b/docs/DOCS_STATUS.md new file mode 100644 index 000000000..d7c6afa0e --- /dev/null +++ b/docs/DOCS_STATUS.md @@ -0,0 +1,60 @@ +# Documentation Coverage Ledger + +This ledger records the April 2025 review of every document under `docs/`. Each +entry cites the primary runtime modules or workflows that keep the guidance +current. Items marked **Archive** have been relocated to `docs/doc-archive/`. +All remaining files are actively referenced by the codebase, CLIs, or CI/CD +workflows introduced across Phases 1–10. + +| Document | Coverage Summary | Status | +| -------- | ---------------- | ------ | +| `ACADEMIC_RESEARCH.md` | Maps FixOps exploit and evidence features to referenced research; aligns with `risk/feeds` and evidence bundling modules. | Retain | +| `AI_AGENTS_RESEARCH.md` | Documents AI governance influences on `core/ai_agents.py` and evidence overlays. | Retain | +| `ARCH-INVENTORY.md` | Phase 1 architecture inventory listing runtime modules, APIs, and workflows. | Retain | +| `ARCHITECTURE.md` | High-level architecture diagrams corresponding to `services/*`, `backend/api/*`, and CLI entrypoints. | Retain | +| `BACKLOG.csv` | Tracking backlog for enterprise modules stored in `fixops-enterprise/` and Phase 6–10 features. | Retain | +| `CI-SECRETS.md` | Enumerates GitHub Actions secrets consumed by `release-sign.yml` and `provenance.yml`. | Retain | +| `CLOSED_LOOP_OPTIMIZATION.md` | Describes probabilistic analytics implemented in `core/probabilistic.py` and telemetry overlays. | Retain | +| `CODE_GAP_ASSESSMENT.md` | Cross-references README claims with shipped modules under `core/` and enterprise overlays. | Retain | +| `CONFIG_GUIDE.md` | Reference for overlay and policy configuration consumed by `core/configuration.py`. | Retain | +| `CONTRIBUTING.md` | Contributor workflow guidelines aligned with `.github/workflows/qa.yml`. | Retain | +| `DATA_MODEL.md` | Domain model definitions mirroring classes in `core/context_engine.py` and `services/graph/graph.py`. | Retain | +| `DEMO.md` | One-command demo instructions tied to `docker-compose.demo.yml` and telemetry setup. | Retain | +| `EVIDENCE-BUNDLES.md` | Documents bundle schema implemented by `evidence/packager.py` and `fixops-ci evidence bundle`. | Retain | +| `FIXOPS_ADOPTION_GUIDE.md` | Enablement guidance referencing CLI tools and overlays in `config/`. | Retain | +| `FOLDER_README.md` | Explains docs folder structure for maintainers. | Retain | +| `FixOps_Demo_IO_Contract.md` | Documents demo API contracts matching payloads in `apps/api/app.py`. | Retain | +| `INTEGRATIONS.md` | Lists third-party connectors mirrored by stubs under `core/connectors.py`. | Retain | +| `LINE_BY_LINE.md` | Highlights verification checkpoints satisfied by `tests/` and policy evaluators. | Retain | +| `MIGRATION.md` | Notes upgrade steps for overlay schema and CLI usage, tied to `config/policy.yml`. | Retain | +| `OBSERVABILITY.md` | Describes OpenTelemetry integration aligning with `telemetry/` package and Phase 9 demo. | Retain | +| `PLATFORM_RUNBOOK.md` | Operational runbook covering API startup and evidence workflows in `apps/api` and `evidence/`. | Retain | +| `PLAYBOOK-AUDIT.md` | Audit procedures leveraging evidence bundles and provenance APIs. | Retain | +| `PLAYBOOK-DEV.md` | Developer practices referencing CLI commands and QA workflow. | Retain | +| `PLAYBOOK-SEC.md` | Security operations referencing risk scoring and provenance services. | Retain | +| `PROVENANCE-GRAPH.md` | Phase 6 provenance graph design corresponding to `services/graph` and API routes. | Retain | +| `PROVENANCE.md` | SLSA attestation schema used by `services/provenance` and CLI wrappers. | Retain | +| `PR_CHECKLIST.md` | Review checklist aligned with QA automation and evidence requirements. | Retain | +| `PR_SUMMARY.md` | Template for summarising PRs including evidence and testing references. | Retain | +| `README_GAP_AND_TEST_PLAN.md` | Audit of README commitments matched to modules in `core/` and tests. | Retain | +| `REPRO-BUILDS.md` | Documents reproducible build verifier implemented in `services/repro`. | Retain | +| `RISK-SCORING.md` | Describes FixOpsRisk formula implemented in `risk/scoring.py`. | Retain | +| `ROADMAP.md` | Product roadmap aligned with phased delivery (docs/TASK-PLAN.md). | Retain | +| `SBOM-QUALITY.md` | Explains normalization and quality metrics delivered in `lib4sbom/normalizer.py`. | Retain | +| `SDLC_SSDLC.md` | Lifecycle mapping updated in this review to document canonical inputs, processing, and CLI calls. | Retain | +| `SECURITY-POSTURE.md` | Captures branch protection, signing, and CI controls implemented in workflows. | Retain | +| `SECURITY.md` | Security policy consistent with provenance, signing, and risk modules. | Retain | +| `SIGNING.md` | Cosign-based signing process matching `scripts/signing/` and release workflow. | Retain | +| `SSDL_SIMULATION.md` | Simulation overview supporting Stage Runner and SSDLC evaluator in `core/ssdlc.py`. | Retain | +| `STRAIKER_APR2025_COMPETITIVE_NOTES.md` | Competitive analysis guiding enhancements for `core/ai_agents.py` and SSDLC gates. | Retain | +| `TASK-PLAN.md` | Phase 2–10 implementation roadmap used to drive branch history. | Retain | +| `USAGE_GUIDE.html` | Rich HTML quick-start referencing CLI commands and overlay behaviours. | Retain | +| `decisionfactory_alignment.md` | Narrative aligning decision factory features with `services/graph` and risk modules. | Retain | +| `feature_call_tree.md` | Maps feature surfaces to modules and APIs for reviewers. | Retain | +| `security_sales_objections.md` | Sales enablement with pointers to provenance, evidence, and compliance modules. | Retain | +| `decisionfactory_alignment/` | Folder contains detailed implementation notes for the decision factory overlay and dashboards. | Retain | + +No files required archival during this pass; `docs/doc-archive/` remains empty so +future reviews can relocate superseded content when features are removed from the +codebase. + diff --git a/docs/SDLC_SSDLC.md b/docs/SDLC_SSDLC.md index 95ce78e9f..f0f1cf8fc 100644 --- a/docs/SDLC_SSDLC.md +++ b/docs/SDLC_SSDLC.md @@ -6,13 +6,36 @@ life-cycle stages. | Stage | Signals Ingested | Overlay Influence | Outputs & Artifacts | | ----- | ---------------- | ----------------- | ------------------- | | Plan | Design context CSV describing services, owners, and criticality. | `require_design_input` toggle decides whether design is mandatory. Directory hints (e.g., `design_context_dir`) tell planners where to store curated context. | Stored dataset, metadata preview in `/inputs/design` response, SSDLC check for `design`/`threat_model`. | +| Design | Service decomposition JSON/CSV plus threat models committed alongside infrastructure-as-code. | The SSDLC stage map in `config/policy.yml` defines whether design evidence is required before code merges. | Normalised design manifest persisted to `artefacts/design/` with canonical IDs reused by downstream SBOM joins. | | Code | SBOM packages derived from builds. | Git provider metadata (host/org/group) steers where repository checks run. | Normalised SBOM components with license data and SSDLC `dependency_pinning` coverage. | | Build | SARIF findings produced by SAST/DAST tools. | CI configuration identifies which pipeline slug to gate; metadata is exposed via overlay for reporting. | Normalised SARIF findings, severity histogram, SSDLC `sarif` gate. | | Test | CVE/KEV feeds for dependency risk. | Jira configuration indicates which project receives escalations when high-risk CVEs surface; guardrail maturity controls warn/fail thresholds. | Normalised CVE summaries, exploited counts, guardrail status, probabilistic forecast metrics, SSDLC `policy_automation` insight. | | Deploy | `/pipeline/run` crosswalk verifies that design, SBOM, SARIF, and CVE data align before promoting. | Enterprise mode enforces ticket sync and deployment approvals encoded in `ssdlc.stages`. | JSON report with `crosswalk`, severity summaries, SSDLC `deploy` stage status, overlay metadata for auditing. | -| Run | Overlay directories (evidence/audit) are created at startup so operational scripts can deposit runtime attestation. | Data paths change per mode (`demo` vs `enterprise`) allowing isolated evidence storage. | Evidence bundle with observability sections and SSDLC `run` stage results. | +| Operate | Overlay directories (evidence/audit) are created at startup so operational scripts can deposit runtime attestation. | Data paths change per mode (`demo` vs `enterprise`) allowing isolated evidence storage. | Evidence bundle with observability sections, repro attestation, and SSDLC `run` stage results. | | Audit | `overlay.metadata` includes source path and profile selection. | Enterprise profiles enable `capture_feedback` so `/feedback` requests persist reviewer decisions alongside evidence bundles. | Evidence bundle + `ssdlc_assessment` and `ai_agent_analysis` for audits and CTEM scorecards. | +## Canonical Stage Flow (Design → Operate) + +| Stage | Canonical Inputs | Processing Pipeline | CLI Surface | Primary Outputs | API Surfaces | +| ----- | ---------------- | ------------------- | ----------- | --------------- | ------------ | +| Plan & Design | `artefacts/design/*.csv` or `.json` context, overlay policy (`config/policy.yml`). | `core.design_context_injector.DesignContextLoader` normalises identifiers before `core.ssdlc.SSDLCStageEvaluator` enforces design prerequisites. | `fixops-ci evidence bundle --tag ` automatically ingests design manifests when present. | Canonical design manifest stored under `artefacts/design/` and referenced in risk/provenance joins. | `backend/api/evidence` responses embed design summaries for audits. | +| Code | SBOM exports from Syft/Trivy/OSV (`artefacts/sbom/*.json`). | `lib4sbom.normalizer.write_normalized_sbom` deduplicates by purl/version/hash and persists canonical JSON. | `fixops-ci sbom normalize --in ` produces `artifacts/sbom/normalized.json`. | `artifacts/sbom/normalized.json` plus deduplication telemetry. | `backend/api/graph/components` traces SBOM nodes; `backend/api/evidence` references the same manifest. | +| Build | Static analysis SARIF or build logs under `artefacts/scans/`. | `services.provenance.attestation.generate_attestation` links build run metadata, builder IDs, and materials. | `fixops-ci provenance attest --artifact --out ` records build lineage. | DSSE/SLSA v1 attestation JSON stored under `artifacts/attestations/`. | `backend/api/provenance/{artifact}` retrieves stored attestations. | +| Test | Risk feeds (EPSS, KEV) plus normalised SBOM. | `risk.scoring.generate_scores` enriches SBOM components with EPSS/KEV deltas and version lag heuristics. | `fixops-ci risk score --sbom artifacts/sbom/normalized.json --out artifacts/risk.json`. | Composite risk report at `artifacts/risk.json` with FixOpsRisk values. | `backend/api/risk/{component|cve}` returns risk payloads for CI/CD gates. | +| Deploy | Release metadata (git tags, artefact digests). | `services.graph.graph.ProvenanceGraphBuilder` stitches commits, attestations, SBOM, and risk to validate release readiness. | `fixops-ci evidence bundle --tag ` runs policy evaluation using the assembled graph context. | Signed `MANIFEST.yaml` and zipped evidence bundle under `evidence//`. | `backend/api/evidence/{release}` shares bundle metadata with release automation. | +| Operate | Reproducible build plans (`build/plan.yaml`) and release artefacts. | `services.repro.verifier.ReproVerifier` executes hermetic rebuilds and compares digests. | `fixops-ci repro verify --tag --plan build/plan.yaml` outputs reproducibility attestations. | `artifacts/repro/attestations/.json` and policy results persisted to evidence bundles. | `backend/api/evidence/{release}` includes reproducibility verdicts; `backend/api/graph/lineage` surfaces runtime lineage queries. | +| Audit | Evidence bundle manifest, cosign keys (CI), observability exports. | `evidence.packager.create_bundle` consolidates SBOM, risk, provenance, repro, and policy evaluations then signs the manifest. | `fixops-ci evidence bundle --tag --sign-key ` packages proof bundles for external auditors. | Zipped bundle with signed `MANIFEST.yaml`, policy verdicts, and coverage reports. | Evidence endpoints and downloadable artefacts from CI releases. | + +### CLI Playbook by Lifecycle Stage + +- **Design/Plan:** Capture curated system context and threat models under `artefacts/design/`, then package them by running `fixops-ci evidence bundle --tag ` so the evaluator confirms the design stage before promotion. +- **Code:** Deduplicate SBOM inputs with `fixops-ci sbom normalize --in artefacts/sbom/syft.json artefacts/sbom/trivy.json --out artifacts/sbom/normalized.json` to create the canonical component inventory feeding all downstream stages. +- **Build:** For every release artefact, execute `fixops-ci provenance attest -- ` via CI (see `.github/workflows/provenance.yml`) so the attestation graph links builder IDs, source commits, and materials. +- **Test:** Generate FixOpsRisk metrics using `fixops-ci risk score --sbom artifacts/sbom/normalized.json --out artifacts/risk.json` before allowing deployments; policy thresholds live in `config/policy.yml`. +- **Deploy:** Run `fixops-ci evidence bundle --tag ` after tagging to aggregate SBOM, risk, provenance, and graph validations while evaluating pass/warn/fail policy thresholds. +- **Operate:** Confirm deterministic rebuilds with `fixops-ci repro verify --tag --plan build/plan.yaml`; the resulting attestation feeds evidence bundles and runtime dashboards. +- **Audit:** Use the evidence bundle output to satisfy compliance reviews; the signed `MANIFEST.yaml` enumerates every artefact emitted across the lifecycle, and `fixops-ci evidence bundle` can re-run with `--sign-key` to append notarised manifests for auditors. + ## Flow of Signals 1. **Ingestion** — Upload endpoints accept artefacts in any order. Each artefact is normalised and diff --git a/docs/doc-archive/.gitkeep b/docs/doc-archive/.gitkeep new file mode 100644 index 000000000..e69de29bb