diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 87f438f..a1b93b8 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -17,6 +17,7 @@ concurrency: env: LC_ALL: C.UTF-8 LANG: C.UTF-8 + CI_DEFER_ARTIFACT_LINK_RESOLUTION: "1" defaults: run: diff --git a/.github/workflows/code-analysis-evidence.yml b/.github/workflows/code-analysis-evidence.yml new file mode 100644 index 0000000..07ffbae --- /dev/null +++ b/.github/workflows/code-analysis-evidence.yml @@ -0,0 +1,46 @@ +name: code-analysis-evidence + +on: + push: + branches: ["main"] + pull_request: + branches: ["main"] + workflow_dispatch: + +permissions: + contents: read + +jobs: + code-analysis-evidence: + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Setup .NET + uses: actions/setup-dotnet@v4 + with: + dotnet-version: | + 8.0.x + 10.0.102 + + - name: Verify code analysis evidence + run: bash tools/audit/verify-code-analysis-evidence.sh + + - name: Validate result schema + if: always() + run: dotnet restore --locked-mode tools/ci/checks/ResultSchemaValidator/ResultSchemaValidator.csproj && dotnet build -c Release tools/ci/checks/ResultSchemaValidator/ResultSchemaValidator.csproj && dotnet tools/ci/checks/ResultSchemaValidator/bin/Release/net10.0/ResultSchemaValidator.dll --schema tools/ci/schema/result.schema.json --result artifacts/ci/code-analysis-evidence/result.json + + - name: Upload Artifact + if: always() + uses: actions/upload-artifact@v4 + with: + name: ci-code-analysis-evidence + path: | + artifacts/ci/code-analysis-evidence/ + artifacts/audit/code_inventory.json + artifacts/audit/callgraph_inventory.json + artifacts/audit/dead_code_candidates.json + artifacts/audit/redundancy_candidates.json + artifacts/audit/hardening_candidates.json + if-no-files-found: error diff --git a/.github/workflows/nuget-online-convergence.yml b/.github/workflows/nuget-online-convergence.yml index 820f134..1af3a78 100644 --- a/.github/workflows/nuget-online-convergence.yml +++ b/.github/workflows/nuget-online-convergence.yml @@ -11,7 +11,7 @@ permissions: jobs: verify-convergence: - if: github.event.workflow_run.conclusion == 'success' + if: github.event.workflow_run.conclusion == 'success' && github.event.workflow_run.head_repository.full_name == github.repository && github.event.workflow_run.event == 'push' runs-on: ubuntu-latest permissions: contents: read diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 4803a47..035ea25 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -7,6 +7,8 @@ on: permissions: contents: read + attestations: write + id-token: write jobs: version-policy: @@ -91,6 +93,7 @@ jobs: permissions: contents: read id-token: write + attestations: write steps: - name: Checkout uses: actions/checkout@v4 @@ -139,6 +142,28 @@ jobs: NUGET_API_KEY: ${{ steps.nuget_login.outputs.NUGET_API_KEY }} run: bash tools/ci/release/publish_nuget.sh "${{ steps.nupkg.outputs.path }}" + - name: Attest package provenance + uses: actions/attest-build-provenance@v2 + with: + subject-path: "${{ steps.nupkg.outputs.path }}" + + - name: Verify package attestation + env: + GH_TOKEN: ${{ github.token }} + shell: bash + run: | + set -euo pipefail + mkdir -p artifacts/nuget + gh attestation verify "${{ steps.nupkg.outputs.path }}" --repo "${{ github.repository }}" > artifacts/nuget/attestation-verify.txt + + - name: Upload attestation verification artifact + if: always() + uses: actions/upload-artifact@v4 + with: + name: nuget-attestation-verify + path: artifacts/nuget/attestation-verify.txt + if-no-files-found: error + - name: Gate 4 - SVT post-publish (git version == package version == nuget version) shell: bash run: bash tools/ci/release/gate4_verify_postpublish.sh "${GITHUB_REF_NAME#v}" "${{ steps.nupkg.outputs.path }}" diff --git a/.github/workflows/scorecard.yml b/.github/workflows/scorecard.yml new file mode 100644 index 0000000..fc7893e --- /dev/null +++ b/.github/workflows/scorecard.yml @@ -0,0 +1,45 @@ +name: scorecard + +on: + branch_protection_rule: + schedule: + - cron: '23 3 * * 1' + push: + branches: ["main"] + workflow_dispatch: + +permissions: + security-events: write + id-token: write + contents: read + actions: read + +jobs: + scorecard: + runs-on: ubuntu-latest + steps: + - name: Checkout repository + uses: actions/checkout@v4 + + - name: Prepare scorecard artifact directory + run: mkdir -p artifacts/ci/scorecard + + - name: Run OpenSSF Scorecard + uses: ossf/scorecard-action@v2.3.3 + with: + results_file: artifacts/ci/scorecard/results.sarif + results_format: sarif + publish_results: true + + - name: Upload SARIF to code scanning + uses: github/codeql-action/upload-sarif@v3 + with: + sarif_file: artifacts/ci/scorecard/results.sarif + + - name: Upload Artifact + if: always() + uses: actions/upload-artifact@v4 + with: + name: ci-scorecard + path: artifacts/ci/scorecard/ + if-no-files-found: error diff --git a/.github/workflows/security-claims-evidence.yml b/.github/workflows/security-claims-evidence.yml new file mode 100644 index 0000000..037ac55 --- /dev/null +++ b/.github/workflows/security-claims-evidence.yml @@ -0,0 +1,46 @@ +name: security-claims-evidence + +on: + push: + branches: ["main"] + pull_request: + branches: ["main"] + workflow_dispatch: + +permissions: + contents: read + +jobs: + security-claims-evidence: + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Setup .NET + uses: actions/setup-dotnet@v4 + with: + dotnet-version: | + 8.0.x + 10.0.102 + + - name: Assert required CLI tools + run: command -v gh && command -v jq + + - name: Verify security claims + env: + GH_TOKEN: ${{ secrets.SECURITY_CLAIMS_TOKEN }} + GITHUB_REPOSITORY: ${{ github.repository }} + run: bash tools/audit/verify-security-claims.sh + + - name: Validate result schema + if: always() + run: dotnet restore --locked-mode tools/ci/checks/ResultSchemaValidator/ResultSchemaValidator.csproj && dotnet build -c Release tools/ci/checks/ResultSchemaValidator/ResultSchemaValidator.csproj && dotnet tools/ci/checks/ResultSchemaValidator/bin/Release/net10.0/ResultSchemaValidator.dll --schema tools/ci/schema/result.schema.json --result artifacts/ci/security-claims-evidence/result.json + + - name: Upload Artifact + if: always() + uses: actions/upload-artifact@v4 + with: + name: ci-security-claims-evidence + path: artifacts/ci/security-claims-evidence/ + if-no-files-found: error diff --git a/README.md b/README.md index dae4136..72ff271 100644 --- a/README.md +++ b/README.md @@ -150,6 +150,7 @@ Delegationshinweis: `ArchiveProcessing` ist bei path-basierten Archivpfaden eine - [Dokumentationsindex](https://github.com/tomtastisch/FileClassifier/blob/main/docs/001_INDEX_CORE.MD) - [API-Kernübersicht](https://github.com/tomtastisch/FileClassifier/blob/main/docs/010_API_CORE.MD) - [Architektur und Flows](https://github.com/tomtastisch/FileClassifier/blob/main/docs/020_ARCH_CORE.MD) +- [Audit Index](https://github.com/tomtastisch/FileClassifier/blob/main/docs/audit/000_INDEX.MD) - [HMAC Key Setup (SSOT)](https://github.com/tomtastisch/FileClassifier/blob/main/docs/secure/001_HMAC_KEY_SETUP.MD) - [Migration: Hashing Rename](https://github.com/tomtastisch/FileClassifier/blob/main/docs/migrations/001_HASHING_RENAME.MD) - [Governance und Policies](https://github.com/tomtastisch/FileClassifier/blob/main/docs/governance/001_POLICY_CI.MD) @@ -167,6 +168,9 @@ Delegationshinweis: `ArchiveProcessing` ist bei path-basierten Archivpfaden eine dotnet build FileClassifier.sln -v minimal dotnet test tests/FileTypeDetectionLib.Tests/FileTypeDetectionLib.Tests.csproj -c Release -v minimal python3 tools/check-docs.py +bash tools/audit/verify-security-claims.sh +bash tools/audit/generate-code-analysis-json.sh +gh attestation verify artifacts/nuget/*.nupkg --repo tomtastisch/FileClassifier python3 tools/check-policy-roc.py --out artifacts/policy_roc_matrix.tsv bash tools/ci/bin/run.sh versioning-svt bash tools/ci/bin/run.sh naming-snt diff --git a/SECURITY.md b/SECURITY.md new file mode 100644 index 0000000..1cc33bd --- /dev/null +++ b/SECURITY.md @@ -0,0 +1,106 @@ +# Sicherheitsrichtlinie (SECURITY.md) + +## 1. Zweck und Geltungsbereich +Diese Richtlinie beschreibt die Meldung, Bearbeitung und koordinierte Offenlegung von +Sicherheitsluecken im Repository `tomtastisch/FileClassifier`. + +Die Ausgestaltung ist an bewaehrten Prozessen orientiert, insbesondere: +- ISO/IEC 29147 (Vulnerability Disclosure) +- ISO/IEC 30111 (Vulnerability Handling Processes) + +Hinweis: Diese Richtlinie ist ein operatives Projekt-Policy-Dokument und kein +Zertifizierungs- oder Rechtsgutachten. + +## 2. Unterstuetzte Versionen (Security Fixes) +Security-Fixes werden nur fuer den aktuell unterstuetzten Major bereitgestellt. + +| Version | Security-Support | +| ------- | ---------------- | +| 5.x | Ja | +| < 5.0 | Nein | + +## 3. Meldung einer Sicherheitsluecke +Bitte melde Sicherheitsluecken **nicht** ueber oeffentliche Issues. + +Primarer Meldeweg: +- GitHub Private Vulnerability Reporting / Security Advisory: + [Repository Security](https://github.com/tomtastisch/FileClassifier/security) + (dort "Report a vulnerability" verwenden) + +Wenn die Plattform technisch nicht verfuegbar ist, bitte einen Issue ohne technische +Exploit-Details erstellen und auf vertraulichen Kontakt hinweisen. + +## 4. Erforderliche Angaben in der Meldung +Bitte liefere nach Moeglichkeit: +- betroffene Version(en) und Umgebung +- klare Reproduktionsschritte +- erwartetes vs. tatsaechliches Verhalten +- Impact-Einschaetzung (Vertraulichkeit, Integritaet, Verfuegbarkeit) +- Proof-of-Concept in minimaler, sicherer Form +- bekannte Mitigations/Workarounds + +## 5. Prozess und Reaktionszeiten (kompaktes SLA) +- Eingangsbestaetigung: in der Regel innerhalb von **5 Werktagen** +- Triage und Priorisierung: risikobasiert (Schweregrad, Ausnutzbarkeit, Reichweite) +- Behebungsplanung und Kommunikation: nach Risiko, Komplexitaet und Release-Zyklus + +Es besteht kein Anspruch auf sofortige Behebung; wir arbeiten risikoorientiert und +koordinieren die Kommunikation transparent im Advisory-Prozess. + +## 6. Safe Harbor fuer gutglaeubige Sicherheitsforschung +Wir begruessen verantwortungsvolle, gutglaeubige Forschung innerhalb folgender Leitplanken: +- keine absichtliche Datenexfiltration, Datenveraenderung oder dauerhafte Stoerung +- keine Denial-of-Service-Tests oder Lastspitzen gegen Produktions-/fremde Systeme +- kein Social Engineering, kein Phishing, keine physische Angriffe +- keine automatisierten Massen-Scans ohne vorherige Abstimmung +- nur notwendige, minimale Testtiefe zur Nachweisfuehrung +- unverzuegliche vertrauliche Meldung bei Fund + +Wenn du in gutem Glauben und im Rahmen dieser Leitplanken handelst, betrachten wir das +als verantwortungsvolle Forschung und streben eine kooperative Loesung an. + +## 7. Koordinierte Offenlegung +Wir verfolgen koordiniertes Disclosure: +- Oeffentliche Details erst nach verfuegbarem Fix oder abgestimmter Mitigation +- Zeitfenster werden fallbezogen zwischen Maintainern und meldender Person abgestimmt +- Credits werden auf Wunsch im Advisory genannt + +## 8. Nicht unterstuetzte Meldungskanaele +- Oeffentliche GitHub Issues/Discussions fuer ungepatchte Schwachstellen +- Veroeffentlichung von Exploit-Details vor abgestimmter Offenlegung + +## 9. Nachweisbarkeit und Einsatz in sicherheitsrelevanten Umgebungen +Dieses Repository trifft **keine** Aussage ueber formale Zertifizierung (z. B. ISO 27001, +IEC 62443, Common Criteria) des Produkts oder eines Betreiber-ISMS. + +Der Einsatz in sicherheitsrelevanten oder systemkritischen Architekturen ist nur +verantwortbar, wenn die betreibende Organisation zusaetzliche, eigene Kontrollen +nachweisbar umsetzt (z. B. Threat Modeling, Haertung, Betriebsmonitoring, Incident Response, +Schluesselmanagement, Netzwerksegmentierung, Backup/Restore-Tests, Change-Management). + +Nachweisbare, repo-seitige Sicherheitsmechanismen (Stand dieses Projekts): +- Security-Vulnerability-Meldeweg via GitHub Repository Security +- CI-Gate `security-nuget` fuer Vulnerability- und Deprecation-Scans +- Branch-Protection mit festen Required Contexts auf `main` +- Release-Publish via OIDC Trusted Publishing fuer NuGet (kein statischer API-Key im + regulaeren Publish-Pfad) + +Empfohlener Mindestnachweis vor Produktiveinsatz: +```bash +dotnet build FileClassifier.sln -v minimal +dotnet test tests/FileTypeDetectionLib.Tests/FileTypeDetectionLib.Tests.csproj -c Release -v minimal +bash tools/ci/bin/run.sh security-nuget +python3 tools/check-docs.py +``` + +Optional fuer erweiterten Nachweis: +```bash +bash tools/ci/bin/run.sh tests-bdd-coverage +bash tools/ci/bin/run.sh api-contract +bash tools/ci/bin/run.sh pack +bash tools/ci/bin/run.sh consumer-smoke +bash tools/ci/bin/run.sh package-backed-tests +``` + +Vielen Dank fuer verantwortungsvolle Meldungen und die Unterstuetzung der +Sicherheit von FileClassifier. diff --git a/docs/audit/000_INDEX.MD b/docs/audit/000_INDEX.MD new file mode 100644 index 0000000..31f962a --- /dev/null +++ b/docs/audit/000_INDEX.MD @@ -0,0 +1,41 @@ +# Audit Index + +## Scope +Central index for evidence-hardening artifacts that support claims in `SECURITY.md` without modifying `SECURITY.md` itself. + +## Documents +- `docs/audit/000_HASHING_BASELINE.MD` +- `docs/audit/000_INDEX.MD` +- `docs/audit/002_AUDIT_CONTRACT_AND_GUARDRAILS.MD` +- `docs/audit/011_SECURITY_BENCHMARK.MD` +- `docs/audit/003_SECURITY_ASSERTION_TRACEABILITY.MD` +- `docs/audit/004_CERTIFICATION_AND_ATTESTATION_ROADMAP.MD` +- `docs/audit/005_CODE_ANALYSIS_METHOD.MD` +- `docs/audit/006_CODE_REVIEW_FINDINGS.MD` +- `docs/audit/007_THREAT_MODEL.MD` +- `docs/audit/008_INCIDENT_RESPONSE_RUNBOOK.MD` +- `docs/audit/009_SUPPLY_CHAIN_BASELINE.MD` +- `docs/audit/010_REFACTOR_BACKLOG.MD` + +## Machine Evidence +- `artifacts/ci/security-claims-evidence/` +- `artifacts/ci/code-analysis-evidence/` +- `artifacts/audit/code_inventory.json` +- `artifacts/audit/callgraph_inventory.json` +- `artifacts/audit/dead_code_candidates.json` +- `artifacts/audit/redundancy_candidates.json` +- `artifacts/audit/hardening_candidates.json` + +## Repro Commands +```bash +bash tools/audit/verify-security-claims.sh +bash tools/audit/verify-code-analysis-evidence.sh +bash tools/audit/generate-code-analysis-json.sh +python3 tools/check-docs.py +``` + +## External Assurance +- OpenSSF Scorecard workflow: `.github/workflows/scorecard.yml` +- Artifact attestations in release workflow: `.github/workflows/release.yml` +- Deep analysis evidence workflow: `.github/workflows/code-analysis-evidence.yml` +- GitHub Code Scanning Default Setup (configured): `gh api repos///code-scanning/default-setup` diff --git a/docs/audit/002_AUDIT_CONTRACT_AND_GUARDRAILS.MD b/docs/audit/002_AUDIT_CONTRACT_AND_GUARDRAILS.MD new file mode 100644 index 0000000..f4706a1 --- /dev/null +++ b/docs/audit/002_AUDIT_CONTRACT_AND_GUARDRAILS.MD @@ -0,0 +1,37 @@ +# Audit Contract And Guardrails + +## 1. Purpose +Define deterministic evidence rules for `SECURITY.md` claim verification. + +## 2. Status Model +Result contract status values: +- `pass`: no blocking violations. +- `fail`: at least one blocking violation. +- `warn`: report-only findings. + +Operational tri-state mapping used by claim engine: +- `pass` -> schema `pass` +- `fail` -> schema `fail` +- `unknown` -> schema `warn` with explicit reason code in `rule_violations[].message` + +## 3. Blocking Policy +Blocking checks are allowed only if deterministic. +Live GitHub API checks are blocker-eligible with strict anti-flake controls: +- 3 retries +- exponential backoff (2s, 4s, 8s) +- current implementation emits generic API failure messages after retries + +If retries are exhausted: +- blocker claims fail with reason code +- report-only claims emit warn + +## 4. Evidence Contract +Required artifacts per check: +- `raw.log` +- `summary.md` +- `result.json` (must match `tools/ci/schema/result.schema.json`) + +## 5. Drift Handling +- Policy drift is fail-closed for blocker claims. +- Unknown states are never silently ignored. +- Every failure must include an evidence path and a reproducible command. diff --git a/docs/audit/003_SECURITY_ASSERTION_TRACEABILITY.MD b/docs/audit/003_SECURITY_ASSERTION_TRACEABILITY.MD new file mode 100644 index 0000000..a91873f --- /dev/null +++ b/docs/audit/003_SECURITY_ASSERTION_TRACEABILITY.MD @@ -0,0 +1,37 @@ +# Security Assertion Traceability + +## Scope +Mapping of claims in `SECURITY.md` to evidence sources and verification commands. + +| Claim ID | SECURITY Anchor | Claim Summary | Evidence Source | Verification Command | Pass Criteria | Blocker | +|---|---|---|---|---|---|---| +| SEC-CLAIM-001 | 2. Unterstuetzte Versionen | Security support is tied to major 5 | `src/FileTypeDetection/FileTypeDetectionLib.vbproj` | `sed -n 's:.*\([^<]*\).*:\1:p' src/FileTypeDetection/FileTypeDetectionLib.vbproj` | Version major is `5` | yes | +| SEC-CLAIM-002 | 3. Meldung | Private vulnerability reporting path exists | GitHub API `private-vulnerability-reporting` | `gh api repos/$REPO/private-vulnerability-reporting` | `.enabled == true` | yes | +| SEC-CLAIM-003 | 9. Nachweisbarkeit | Dependabot security updates enabled | GitHub API (`security_and_analysis` oder `automated-security-fixes`) | `gh api repos/$REPO --jq '.security_and_analysis.dependabot_security_updates.status' || gh api repos/$REPO/automated-security-fixes --jq '.enabled'` | `enabled` oder `true` | yes | +| SEC-CLAIM-004 | 9. Nachweisbarkeit | Secret scanning enabled | GitHub API (`security_and_analysis` oder Secret-Scanning-Alerts Endpoint) | `gh api repos/$REPO --jq '.security_and_analysis.secret_scanning.status' || gh api 'repos/$REPO/secret-scanning/alerts?per_page=1'` | `enabled` oder Endpoint erfolgreich erreichbar | yes | +| SEC-CLAIM-005 | 9. Nachweisbarkeit | Branch protection required contexts fixed | GitHub API branch rules (primär), branch protection (Fallback) | `gh api repos/$REPO/rules/branches/main || gh api repos/$REPO/branches/main/protection` | expected context list present | yes | +| SEC-CLAIM-006 | 9. Nachweisbarkeit | `security-nuget` CI gate exists | `.github/workflows/ci.yml` | `rg -n "security-nuget|run\.sh security-nuget" .github/workflows/ci.yml` | gate + runner call both exist | yes | +| SEC-CLAIM-007 | 9. Nachweisbarkeit | OIDC trusted publishing configured | `.github/workflows/release.yml` | `rg -n "NuGet/login@v1|assert OIDC temp key present" .github/workflows/release.yml` | both markers present | yes | +| SEC-CLAIM-008 | 5. SLA | response target is 5 business days | policy declaration | manual policy review + incident log review | maintained process evidence exists | report-only | +| SEC-CLAIM-009 | 6. Safe Harbor | safe-harbor constraints are documented | `SECURITY.md` + runbook | `rg -n "Safe Harbor|gutglaeubige" SECURITY.md` | section present + runbook aligns | report-only | +| SEC-CLAIM-010 | 7. Koordinierte Offenlegung | coordinated disclosure process exists | runbook + advisory usage | review `docs/audit/008_INCIDENT_RESPONSE_RUNBOOK.MD` | workflow documented and actionable | report-only | +| SEC-CLAIM-011 | 3. Meldung | public issue tracker is not a primary disclosure channel | `SECURITY.md` policy text | `rg -n "nicht.*oeffentliche Issues|Nicht unterstuetzte" SECURITY.md` | policy text exists | report-only | +| SEC-CLAIM-012 | 4. Erforderliche Angaben | minimum report fields are explicitly listed | `SECURITY.md` section 4 | `rg -n "Erforderliche Angaben|Reproduktionsschritte|Impact" SECURITY.md` | section and required bullets exist | report-only | +| SEC-CLAIM-013 | 9. Nachweisbarkeit | local minimum evidence commands are executable | build/test + claim script | `dotnet build FileClassifier.sln -v minimal && dotnet test tests/FileTypeDetectionLib.Tests/FileTypeDetectionLib.Tests.csproj -c Release -v minimal && bash tools/audit/verify-security-claims.sh` | all commands exit 0 | yes | +| SEC-CLAIM-014 | 9. Nachweisbarkeit | optional extended check IDs are available | CI runner implementation | `rg -n "tests-bdd-coverage|api-contract|pack|consumer-smoke|package-backed-tests" tools/ci/bin/run.sh` | all IDs resolvable | yes | +| SEC-CLAIM-015 | 1. Zweck/Geltungsbereich | ISO/IEC 29147 and 30111 orientation is documented | policy + roadmap docs | `rg -n "29147|30111" SECURITY.md docs/audit/004_CERTIFICATION_AND_ATTESTATION_ROADMAP.MD` | references present with non-certification language | report-only | +| SEC-CLAIM-016 | 9. Zertifizierungsgrenze | no formal product certification claim is made | `SECURITY.md` section 9 | `rg -n "keine.*Zertifizierung|kein.*Rechtsgutachten" SECURITY.md` | explicit non-claim present | yes | + +## Full Coverage Note +Claims that are normative process statements (policy intent) are classified as `report-only` unless they can be turned into deterministic machine checks. + +## CI Claim Mapping +The verification script uses `CI-SEC-CLAIM-*` rule IDs. Mapping to normative claims: +- `CI-SEC-CLAIM-001` -> `SEC-CLAIM-002` (repository/reporting context resolvable) +- `CI-SEC-CLAIM-002` -> `SEC-CLAIM-001` (supported major version claim) +- `CI-SEC-CLAIM-003` -> `SEC-CLAIM-006` (`security-nuget` CI gate exists) +- `CI-SEC-CLAIM-004` -> `SEC-CLAIM-007` (OIDC trusted publishing markers) +- `CI-SEC-CLAIM-005` -> `SEC-CLAIM-003` (Dependabot security updates enabled/equivalent endpoint) +- `CI-SEC-CLAIM-006` -> `SEC-CLAIM-004` (secret scanning enabled/equivalent endpoint) +- `CI-SEC-CLAIM-007` -> `SEC-CLAIM-002` (private vulnerability reporting enabled) +- `CI-SEC-CLAIM-008` -> `SEC-CLAIM-005` (required status checks baseline) diff --git a/docs/audit/004_CERTIFICATION_AND_ATTESTATION_ROADMAP.MD b/docs/audit/004_CERTIFICATION_AND_ATTESTATION_ROADMAP.MD new file mode 100644 index 0000000..9c78277 --- /dev/null +++ b/docs/audit/004_CERTIFICATION_AND_ATTESTATION_ROADMAP.MD @@ -0,0 +1,39 @@ +# Certification And Attestation Roadmap + +## 1. Positioning +This project distinguishes: +- Assurance/attestation evidence (technical, reproducible) +- Formal third-party certification (external audit, out of current scope) + +No formal certification claim is made by this repository. + +## 2. Minimum Set (Wave 1) +1. OpenSSF Scorecard (free, external, GitHub-native) +2. GitHub Artifact Attestations for release artifacts +3. Documented provenance verification commands + +## 3. Optional Set +- OpenSSF Best Practices Badge (self-attested program, externally visible) + +## 4. NuGet-Relevant Evidence +- Verify package signatures: +```bash +dotnet nuget verify artifacts/nuget/*.nupkg +``` +- Verify build provenance attestation: +```bash +gh attestation verify artifacts/nuget/*.nupkg --repo tomtastisch/FileClassifier +``` + +## 5. Acceptance Criteria +- Scorecard workflow runs and stores artifacts. +- Release workflow emits attestations for nupkg artifacts. +- Verification commands are documented and reproducible. + +## 6. Code Scanning Mode Constraint +This repository currently uses GitHub Code Scanning Default Setup (state: `configured`). +When Default Setup is enabled, additional advanced CodeQL workflow uploads can be rejected by GitHub. +Operational rule for this repo: +- Keep Default Setup active for CodeQL until an explicit migration decision is made. +- Do not run parallel advanced CodeQL workflow uploads in the same repository state. +- Non-CodeQL SARIF uploads (for example OpenSSF Scorecard SARIF) are permitted. diff --git a/docs/audit/005_CODE_ANALYSIS_METHOD.MD b/docs/audit/005_CODE_ANALYSIS_METHOD.MD new file mode 100644 index 0000000..a029eb7 --- /dev/null +++ b/docs/audit/005_CODE_ANALYSIS_METHOD.MD @@ -0,0 +1,43 @@ +# Code Analysis Method + +## 1. Goal +Provide reproducible, file-level and line-level analysis artifacts without changing public APIs. + +## 2. Machine Outputs (JSON only) +Generated by `tools/audit/generate-code-analysis-json.sh`: +- `artifacts/audit/code_inventory.json` +- `artifacts/audit/callgraph_inventory.json` +- `artifacts/audit/dead_code_candidates.json` +- `artifacts/audit/redundancy_candidates.json` +- `artifacts/audit/hardening_candidates.json` + +CI evidence contract for cluster-7 execution: +- `artifacts/ci/code-analysis-evidence/raw.log` +- `artifacts/ci/code-analysis-evidence/summary.md` +- `artifacts/ci/code-analysis-evidence/result.json` + +## 3. Current Analysis Mode +Heuristic baseline (wave 1): +- declaration extraction by regex +- repository symbol reference counts +- repeated long-line redundancy heuristic +- broad exception catch detection (only explicit generic forms: `As Exception`) + +## 4. Escalation Path +For high-confidence refactors, each candidate must be validated by: +1. direct code inspection +2. failing/targeted test before change (where applicable) +3. passing regression tests after change + +## 5. Repro +```bash +bash tools/audit/generate-code-analysis-json.sh +bash tools/audit/verify-code-analysis-evidence.sh +``` + +## 6. Baseline Snapshot (2026-02-13) +- inventory files (`src/*.vb|*.cs`): `22` +- method declarations (heuristic): `698` +- dead code candidates (heuristic): `0` +- redundancy candidates (heuristic): `18` +- hardening candidates (broad catches): `12` diff --git a/docs/audit/006_CODE_REVIEW_FINDINGS.MD b/docs/audit/006_CODE_REVIEW_FINDINGS.MD new file mode 100644 index 0000000..917f893 --- /dev/null +++ b/docs/audit/006_CODE_REVIEW_FINDINGS.MD @@ -0,0 +1,53 @@ +# Code Review Findings + +## Scope +Wave 1 baseline findings from JSON analysis artifacts. + +## Current State +Package A (Cluster 7C) has been started and reduced generic entry-point catches. +Evidence-backed candidates are tracked in: +- `artifacts/audit/dead_code_candidates.json` +- `artifacts/audit/redundancy_candidates.json` +- `artifacts/audit/hardening_candidates.json` +- `artifacts/ci/code-analysis-evidence/result.json` + +## Baseline Findings (Wave 1) +1. Generic `Catch ex As Exception` usage was reduced in Package A for entry-point files (`FileTypeDetector`, `FileMaterializer`, `EvidenceHashing`). + Remaining broad catches still require follow-up in infrastructure internals. + - Examples: + - `src/FileTypeDetection/Infrastructure/CoreInternals.vb:58` + - `src/FileTypeDetection/Infrastructure/ArchiveManagedInternals.vb:70` + - `src/FileTypeDetection/Infrastructure/ArchiveInternals.vb:99` + - `src/FileTypeDetection/Infrastructure/ArchiveInternals.vb:201` + - `src/FileTypeDetection/Infrastructure/ArchiveInternals.vb:546` + - Evidence: `artifacts/audit/hardening_candidates.json` + +2. Repeated long-line patterns indicate potential guard/path handling duplication across archive and detector internals. + - Examples: + - `src/FileTypeDetection/Infrastructure/CoreInternals.vb:66` + - `src/FileTypeDetection/Infrastructure/ArchiveManagedInternals.vb:29` + - `src/FileTypeDetection/Infrastructure/ArchiveInternals.vb:109` + - Evidence: `artifacts/audit/redundancy_candidates.json` + +3. Dead-code candidate scan currently returned no low-reference source declarations in `src/` under the implemented heuristic. + - Evidence: `artifacts/audit/dead_code_candidates.json` + +## Repro Evidence Commands +```bash +bash tools/audit/verify-code-analysis-evidence.sh +jq '.candidates | length' artifacts/audit/hardening_candidates.json +jq '.candidates | length' artifacts/audit/redundancy_candidates.json +``` + +## Findings Format (for next PRs) +- ID +- file + line +- reproducible evidence command +- impact/risk +- proposed change +- required regression tests + +## Priority Policy +- P1: security correctness or behavioral risk +- P2: likely dead/redundant code with medium confidence +- P3: readability/perf opportunities with low risk diff --git a/docs/audit/007_THREAT_MODEL.MD b/docs/audit/007_THREAT_MODEL.MD new file mode 100644 index 0000000..dfc3c6b --- /dev/null +++ b/docs/audit/007_THREAT_MODEL.MD @@ -0,0 +1,69 @@ +# Threat Model + +## 1. Scope +In scope: +- library behavior in `src/FileTypeDetection/*` +- CI/CD and release controls in `.github/workflows/*` +- security reporting and evidence lifecycle in `SECURITY.md` and `docs/audit/*` + +Out of scope: +- downstream deployment hardening of consuming systems +- host-level controls outside this repository + +## 2. Assets +- A1: untrusted file payloads entering detection/extraction paths +- A2: extracted archive entries in memory +- A3: hash/evidence artifacts used for integrity assertions +- A4: produced NuGet package (`.nupkg`) and its provenance +- A5: vulnerability reports and coordinated disclosure state + +## 3. Trust Boundaries +- B1: caller input -> detector/archive internals (`src/FileTypeDetection/FileTypeDetector.vb`) +- B2: repository source -> CI execution (`.github/workflows/ci.yml`) +- B3: CI-produced package -> NuGet publication (`.github/workflows/release.yml`) +- B4: external reporter -> maintainer triage (GitHub private vulnerability reporting) + +## 4. Attacker Capabilities +- C1: submit crafted files/archives to trigger unsafe parsing or extraction logic +- C2: attempt path traversal or resource exhaustion through archive payloads +- C3: attempt supply-chain tampering between build and publication +- C4: attempt disclosure abuse via public channels before fix readiness + +## 5. Threat Scenarios And Controls +- T1: archive traversal (zip-slip style path escape) + - Impact: write/read outside intended scope during extraction. + - Current controls: fail-closed archive safety gates in archive internals and detector paths; extraction APIs constrained to safe flows. + - Evidence: `tests/FileTypeDetectionLib.Tests/*` plus CI job `tests-bdd-coverage` in `.github/workflows/ci.yml`. +- T2: decompression/resource exhaustion + - Impact: memory/CPU pressure, possible denial-of-service. + - Current controls: guarded archive processing paths and bounded validation/extraction flows. + - Evidence: source paths under `src/FileTypeDetection/Infrastructure/*` and CI execution traces. +- T3: release artifact provenance tampering + - Impact: consumers install untrusted package build output. + - Current controls: OIDC trusted publishing and provenance attestation in `.github/workflows/release.yml`. + - Evidence: release workflow step `actions/attest-build-provenance@v2` and `gh attestation verify` output artifact. +- T4: vulnerable dependency ingress + - Impact: known vulnerable transitive components in shipped package. + - Current controls: `security-nuget` gate in CI (`dotnet list ... --vulnerable --include-transitive`) with High/Critical fail behavior. + - Evidence: `tools/ci/bin/run.sh` (`run_security_nuget`) and CI artifacts `artifacts/ci/security-nuget/*`. +- T5: uncoordinated public disclosure before patch + - Impact: exploitability window increases before mitigation is available. + - Current controls: private vulnerability reporting path and coordinated disclosure process as declared in `SECURITY.md`. + - Evidence: `tools/audit/verify-security-claims.sh` claim checks and `docs/audit/008_INCIDENT_RESPONSE_RUNBOOK.MD`. + +## 6. Assumptions +- Maintainer access controls and repository permissions remain correctly configured. +- GitHub-hosted security features remain available for the repository. +- Consumers execute their own runtime hardening controls; this repository cannot enforce them. + +## 7. Residual Risks +- R1: runtime denial-of-service risk can only be reduced, not eliminated, for hostile inputs. +- R2: organizational response speed depends on maintainer availability. +- R3: third-party platform outages can delay triage/release actions. + +## 8. Reproducible Verification +```bash +bash tools/audit/verify-security-claims.sh +bash tools/ci/bin/run.sh security-nuget +gh attestation verify --repo tomtastisch/FileClassifier +``` diff --git a/docs/audit/008_INCIDENT_RESPONSE_RUNBOOK.MD b/docs/audit/008_INCIDENT_RESPONSE_RUNBOOK.MD new file mode 100644 index 0000000..dae747a --- /dev/null +++ b/docs/audit/008_INCIDENT_RESPONSE_RUNBOOK.MD @@ -0,0 +1,73 @@ +# Incident Response Runbook + +## 1. Trigger Events +- private vulnerability report submitted via GitHub security reporting +- CI security gate failure with exploitable impact (`security-nuget`, provenance verification, or equivalent) +- credible external report of active exploitation + +## 2. Roles +- Incident owner: repository maintainer on duty +- Technical triage: maintainer with code-area ownership +- Release owner: maintainer with release permissions +- Communications owner: maintainer coordinating disclosure/advisory text + +## 3. SLA And Time Targets +- Intake acknowledgement: within 5 business days (per `SECURITY.md`) +- Triage decision target: as early as possible after reproducibility is confirmed +- Fix and disclosure timeline: risk- and complexity-based, coordinated with reporter + +## 4. Severity Model (Operational) +- Sev-1: confirmed critical exploitability or active exploitation +- Sev-2: high impact with realistic exploitation path +- Sev-3: medium/low impact or hard-to-exploit conditions + +## 5. Procedure +1. Intake +- Confirm report channel and classify duplicate/new case. +- Create internal tracking entry with timestamp and reporter context. + +2. Triage +- Reproduce in isolated environment using minimal PoC. +- Determine affected versions and impact surface. + +3. Containment +- Define temporary mitigations (config/workaround/documentation guardrails) if fix is not immediate. +- Avoid public issue disclosure before coordinated plan is agreed. + +4. Remediation +- Implement minimal-risk patch in focused commit(s). +- Add or adapt regression tests for exploit path. + +5. Verification +- Run security-relevant checks and document exact command outputs: +```bash +dotnet build FileClassifier.sln -v minimal +dotnet test tests/FileTypeDetectionLib.Tests/FileTypeDetectionLib.Tests.csproj -c Release -v minimal +bash tools/ci/bin/run.sh security-nuget +bash tools/audit/verify-security-claims.sh +``` + +6. Release And Disclosure +- Prepare advisory/release notes with affected/fixed versions. +- Publish coordinated disclosure once fix availability is confirmed. + +7. Closure +- Record post-incident summary, root cause, and follow-up tasks. +- Link fix commit(s), tests, and evidence artifacts. + +## 6. Evidence Checklist +- timeline with UTC timestamps +- severity assignment and rationale +- affected version range and fixed version +- reproduction steps and verification commands +- commit SHAs and PR references +- advisory/disclosure reference + +## 7. Evidence Storage +- CI artifacts: `artifacts/ci/*` +- claim evidence: `artifacts/ci/security-claims-evidence/*` +- package/release evidence: `artifacts/nuget/*` when available + +## 8. Non-Goals +- This runbook is not legal advice. +- This runbook does not claim formal certification or regulatory coverage by itself. diff --git a/docs/audit/009_SUPPLY_CHAIN_BASELINE.MD b/docs/audit/009_SUPPLY_CHAIN_BASELINE.MD new file mode 100644 index 0000000..d0fd252 --- /dev/null +++ b/docs/audit/009_SUPPLY_CHAIN_BASELINE.MD @@ -0,0 +1,50 @@ +# Supply Chain Baseline + +## 1. Objective +Define minimum reproducible controls for source-to-package integrity in this repository. + +## 2. Control Baseline +- S1 Source integrity: + - branch protections and required status checks on default branch + - deterministic CI gates (`preflight`, `build`, `security-nuget`, `summary`) +- S2 Build integrity: + - `dotnet restore --locked-mode` in CI/release paths + - fail-closed policy checks in `tools/ci/bin/run.sh` +- S3 Release integrity: + - OIDC trusted publishing in `.github/workflows/release.yml` + - provenance attestation via `actions/attest-build-provenance@v2` +- S4 Vulnerability hygiene: + - NuGet vulnerability gate (`security-nuget`) + - security claims verification (`security-claims-evidence`) + +## 3. Evidence Mapping +- E1 CI workflow evidence: + - `.github/workflows/ci.yml` + - `artifacts/ci/*` +- E2 Security claim evidence: + - `.github/workflows/security-claims-evidence.yml` + - `artifacts/ci/security-claims-evidence/result.json` +- E3 Code analysis evidence: + - `.github/workflows/code-analysis-evidence.yml` + - `artifacts/ci/code-analysis-evidence/result.json` +- E4 Release/provenance evidence: + - `.github/workflows/release.yml` + - `artifacts/nuget/attestation-verify.txt` (when release workflow runs) + +## 4. Verification Commands +```bash +bash tools/ci/bin/run.sh security-nuget +bash tools/audit/verify-security-claims.sh +bash tools/audit/verify-code-analysis-evidence.sh +dotnet nuget verify +gh attestation verify --repo tomtastisch/FileClassifier +``` + +## 5. Operational Cadence +- Every PR: CI, security claims, and code-analysis evidence workflows +- Every release: attestation generation and verification +- Regular review: refresh baseline docs when controls or workflows change + +## 6. Boundaries And Limits +- This baseline provides assurance evidence, not formal third-party certification. +- Downstream runtime hardening remains the responsibility of deployers/operators. diff --git a/docs/audit/010_REFACTOR_BACKLOG.MD b/docs/audit/010_REFACTOR_BACKLOG.MD new file mode 100644 index 0000000..4f40f06 --- /dev/null +++ b/docs/audit/010_REFACTOR_BACKLOG.MD @@ -0,0 +1,75 @@ +# Refactor Backlog (Cluster 7C) + +## 1. Source +Backlog is derived from: +- `artifacts/audit/hardening_candidates.json` +- `artifacts/audit/redundancy_candidates.json` +- `docs/audit/006_CODE_REVIEW_FINDINGS.MD` + +Current baseline snapshot: +- hardening candidates: `12` +- redundancy candidates: `18` +- dead-code candidates: `0` + +## 2. Prioritized PR Packages + +### Package A (P1) - Narrow broad exception catches in high-traffic entry points +Scope candidates: +- `src/FileTypeDetection/FileTypeDetector.vb` +- `src/FileTypeDetection/FileMaterializer.vb` +- `src/FileTypeDetection/EvidenceHashing.vb` + +Rules: +- replace generic `Catch ex As Exception` with narrower exception handling where deterministically possible +- keep fail-closed behavior intact +- preserve current public API and return contracts + +Required checks: +```bash +dotnet build FileClassifier.sln -v minimal +dotnet test tests/FileTypeDetectionLib.Tests/FileTypeDetectionLib.Tests.csproj -c Release -v minimal +bash tools/ci/bin/run.sh tests-bdd-coverage +``` + +### Package B (P2) - Archive/path guard duplication reduction +Scope candidates: +- `src/FileTypeDetection/Infrastructure/CoreInternals.vb` +- `src/FileTypeDetection/Infrastructure/ArchiveInternals.vb` +- `src/FileTypeDetection/Infrastructure/ArchiveManagedInternals.vb` + +Rules: +- consolidate duplicated guard fragments only when behavior remains byte-for-byte equivalent on outputs +- add focused regression tests for guard edge cases + +Required checks: +```bash +dotnet test tests/FileTypeDetectionLib.Tests/FileTypeDetectionLib.Tests.csproj -c Release --filter "Category=ApiContract" -v minimal +bash tools/ci/bin/run.sh package-backed-tests +bash tools/ci/bin/run.sh consumer-smoke +``` + +### Package C (P3) - Exception reason code normalization +Scope candidates: +- broad catch sites that remain intentionally generic after Package A + +Rules: +- enrich exception-to-result mapping with deterministic reason codes +- no hidden behavior changes in success path + +Required checks: +```bash +dotnet build FileClassifier.sln -v minimal +dotnet test tests/FileTypeDetectionLib.Tests/FileTypeDetectionLib.Tests.csproj -c Release -v minimal +bash tools/audit/verify-code-analysis-evidence.sh +``` + +## 3. Execution Order +1. Package A +2. Package B +3. Package C + +## 4. Definition Of Done Per Package +- targeted code changes are limited to package scope +- regression tests added/updated for changed behavior branches +- full CI on PR is green +- `docs/audit/006_CODE_REVIEW_FINDINGS.MD` updated with closed/moved findings diff --git a/docs/audit/011_SECURITY_BENCHMARK.MD b/docs/audit/011_SECURITY_BENCHMARK.MD new file mode 100644 index 0000000..1fbebc5 --- /dev/null +++ b/docs/audit/011_SECURITY_BENCHMARK.MD @@ -0,0 +1,92 @@ +# Security-Policy Benchmark (Stand: 2026-02-13) + +## 1. Ziel und Scope +Vergleich der Security-Policy-Reife von `tomtastisch/FileClassifier` (PR-Branch `tomtastisch-patch-1`) mit verbreiteten .NET-Open-Source-Repositories anhand nachweisbarer GitHub- und Repository-Fakten. +Dieser Benchmark ist ein Snapshot vor Merge in `main` (Stand 2026-02-13). + +Verglichene Repositories: +- `tomtastisch/FileClassifier` +- `dotnet/runtime` +- `dotnet/aspnetcore` +- `JamesNK/Newtonsoft.Json` +- `DapperLib/Dapper` +- `serilog/serilog` +- `AutoMapper/AutoMapper` +- `App-vNext/Polly` +- `FluentValidation/FluentValidation` +- `xunit/xunit` +- `NLog/NLog` + +## 2. Methodik (nur faktenbasiert) +Erhoben via GitHub API und lokale Dateiinspektion: +- Vorhandensein `SECURITY.md` (`SECURITY.md` oder `.github/SECURITY.md`) +- Status `private-vulnerability-reporting` +- Sichtbare `security_and_analysis`-Felder (`dependabot_security_updates`, `secret_scanning`) +- Vorhandensein `.github/dependabot.yml` +- Vorhandensein von Workflow-Dateien mit `codeql` im Dateinamen +- Inhaltsmerkmale der `SECURITY.md`: Support-Tabelle, Reporting, SLA-Zeitangaben, Safe Harbor, ISO/IEC 29147/30111, koordinierte Offenlegung + +## 3. Ergebnis A - Plattform-/Repo-Merkmale +| Repository | SECURITY.md | Private Vulnerability Reporting | Dependabot Security Updates | Secret Scanning | CodeQL Workflow-Datei | dependabot.yml | +|---|---|---|---|---|---|---| +| tomtastisch/FileClassifier | nein (Snapshot vor Merge in `main`) | true | enabled | enabled | nein | nein | +| dotnet/runtime | ja | false_or_unavailable | unknown | unknown | nein | ja | +| dotnet/aspnetcore | ja | false_or_unavailable | unknown | unknown | nein | ja | +| JamesNK/Newtonsoft.Json | nein | false_or_unavailable | unknown | unknown | ja | nein | +| DapperLib/Dapper | nein | false_or_unavailable | unknown | unknown | nein | nein | +| serilog/serilog | ja | true | unknown | unknown | nein | nein | +| AutoMapper/AutoMapper | nein | true | unknown | unknown | nein | nein | +| App-vNext/Polly | ja | true | unknown | unknown | nein | ja | +| FluentValidation/FluentValidation | nein | false_or_unavailable | unknown | unknown | nein | nein | +| xunit/xunit | nein | false_or_unavailable | unknown | unknown | nein | nein | +| NLog/NLog | ja | true | unknown | unknown | nein | ja | + +Hinweis: +- `unknown` bedeutet: Feld wird ueber GitHub API fuer das betrachtete Fremdrepo nicht veroeffentlicht bzw. nicht sichtbar geliefert. +- `false_or_unavailable` bedeutet: API liefert fuer den Endpunkt keine aktive Konfiguration. + +## 4. Ergebnis B - SECURITY.md Inhaltsvergleich +| Repository | Support-Versionen | Reporting-Abschnitt | SLA-Zeitwerte | Safe Harbor | ISO/IEC 29147/30111 | Koordinierte Offenlegung | +|---|---|---|---|---|---|---| +| dotnet/runtime | ja | ja | ja | nein | nein | nein | +| dotnet/aspnetcore | ja | ja | ja | nein | nein | nein | +| serilog/serilog | ja | ja | nein | nein | nein | nein | +| App-vNext/Polly | nein | ja | ja | nein | nein | nein | +| NLog/NLog | ja | ja | nein | nein | nein | nein | +| tomtastisch/FileClassifier (lokal, PR) | ja | ja | ja | ja | ja | ja | + +## 5. Interpretation (faktennah) +- Die neue `SECURITY.md` von FileClassifier ist inhaltlich umfangreicher als die untersuchten Vergleichsdateien (insbesondere Safe Harbor, ISO/IEC-Orientierung, koordinierte Offenlegung). +- Snapshot-Hinweis: Zum Erhebungszeitpunkt lag die Datei im PR-Branch `tomtastisch-patch-1` und noch nicht in `main`. +- Repo-seitige Sicherheitsmechanismen sind fuer FileClassifier nachweisbar aktiv: + - Private Vulnerability Reporting (`enabled=true`) + - Dependabot Security Updates (`enabled`) + - Secret Scanning (`enabled`) + - CI-Sicherheitsgate `security-nuget` (lokal mit `pass` verifiziert) + +## 6. Rest-Gaps fuer "zertifizierungsnahe" Einsatzfaehigkeit +Diese Punkte sind ausserhalb einer einzelnen `SECURITY.md`, aber notwendig fuer belastbare Audit-/Zertifizierungsfaehigkeit: +- formaler SDLC-Nachweis (dokumentierte Rollen, Freigaben, Change-Management) +- Threat-Modeling-Artefakte mit regelmaessiger Aktualisierung +- Incident-Response-Runbooks mit Uebungen und Nachweisen +- SBOM/Provenance-Strategie fuer Releases (inkl. Aufbewahrung) +- formaler Nachweis fuer Produktionsbetriebskontrollen beim Betreiber + +## 7. Reproduzierbarkeit +Verwendete Kommandos (Auszug): +```bash +gh api 'repos//' +gh api 'repos///private-vulnerability-reporting' +gh api 'repos///contents/SECURITY.md' +gh api 'repos///contents/.github/SECURITY.md' +gh api 'repos///contents/.github/workflows' +gh api 'repos///contents/.github/dependabot.yml' +``` + +Lokale Verifikation fuer FileClassifier: +```bash +python3 tools/check-docs.py +dotnet build FileClassifier.sln -v minimal +dotnet test tests/FileTypeDetectionLib.Tests/FileTypeDetectionLib.Tests.csproj -c Release -v minimal +bash tools/ci/bin/run.sh security-nuget +``` diff --git a/src/FileTypeDetection/EvidenceHashing.vb b/src/FileTypeDetection/EvidenceHashing.vb index aee90b1..13d9336 100644 --- a/src/FileTypeDetection/EvidenceHashing.vb +++ b/src/FileTypeDetection/EvidenceHashing.vb @@ -524,12 +524,26 @@ Namespace Global.Tomtastisch.FileClassifier End Using End Using Return True + Catch ex As UnauthorizedAccessException + Return SetReadFileError(ex, errorMessage) + Catch ex As Global.System.IO.IOException + Return SetReadFileError(ex, errorMessage) + Catch ex As Global.System.IO.InvalidDataException + Return SetReadFileError(ex, errorMessage) + Catch ex As NotSupportedException + Return SetReadFileError(ex, errorMessage) + Catch ex As ArgumentException + Return SetReadFileError(ex, errorMessage) Catch ex As Exception - errorMessage = $"Datei konnte nicht gelesen werden: {ex.Message}" - Return False + Return SetReadFileError(ex, errorMessage) End Try End Function + Private Shared Function SetReadFileError(ex As Exception, ByRef errorMessage As String) As Boolean + errorMessage = $"Datei konnte nicht gelesen werden: {ex.Message}" + Return False + End Function + Private NotInheritable Class NormalizedEntry Friend ReadOnly Property RelativePath As String Friend ReadOnly Property Content As Byte() diff --git a/src/FileTypeDetection/FileMaterializer.vb b/src/FileTypeDetection/FileMaterializer.vb index 7ce54c6..10f5f38 100644 --- a/src/FileTypeDetection/FileMaterializer.vb +++ b/src/FileTypeDetection/FileMaterializer.vb @@ -32,6 +32,21 @@ Namespace Global.Tomtastisch.FileClassifier Dim destinationFull As String Try destinationFull = Path.GetFullPath(destinationPath) + Catch ex As ArgumentException + LogGuard.Warn(opt.Logger, $"[Materialize] Ungueltiger Zielpfad: {ex.Message}") + Return False + Catch ex As UnauthorizedAccessException + LogGuard.Warn(opt.Logger, $"[Materialize] Ungueltiger Zielpfad: {ex.Message}") + Return False + Catch ex As NotSupportedException + LogGuard.Warn(opt.Logger, $"[Materialize] Ungueltiger Zielpfad: {ex.Message}") + Return False + Catch ex As PathTooLongException + LogGuard.Warn(opt.Logger, $"[Materialize] Ungueltiger Zielpfad: {ex.Message}") + Return False + Catch ex As IOException + LogGuard.Warn(opt.Logger, $"[Materialize] Ungueltiger Zielpfad: {ex.Message}") + Return False Catch ex As Exception LogGuard.Warn(opt.Logger, $"[Materialize] Ungueltiger Zielpfad: {ex.Message}") Return False @@ -75,6 +90,21 @@ Namespace Global.Tomtastisch.FileClassifier End Using Return True + Catch ex As UnauthorizedAccessException + LogGuard.Error(opt.Logger, "[Materialize] Byte-Persistenz fehlgeschlagen.", ex) + Return False + Catch ex As IOException + LogGuard.Error(opt.Logger, "[Materialize] Byte-Persistenz fehlgeschlagen.", ex) + Return False + Catch ex As InvalidDataException + LogGuard.Error(opt.Logger, "[Materialize] Byte-Persistenz fehlgeschlagen.", ex) + Return False + Catch ex As NotSupportedException + LogGuard.Error(opt.Logger, "[Materialize] Byte-Persistenz fehlgeschlagen.", ex) + Return False + Catch ex As ArgumentException + LogGuard.Error(opt.Logger, "[Materialize] Byte-Persistenz fehlgeschlagen.", ex) + Return False Catch ex As Exception LogGuard.Error(opt.Logger, "[Materialize] Byte-Persistenz fehlgeschlagen.", ex) Return False @@ -91,6 +121,21 @@ Namespace Global.Tomtastisch.FileClassifier Using ms As New MemoryStream(data, writable:=False) Return ArchiveExtractor.TryExtractArchiveStream(ms, destinationFull, opt, descriptor) End Using + Catch ex As UnauthorizedAccessException + LogGuard.Error(opt.Logger, "[Materialize] Archiv-Extraktion fehlgeschlagen.", ex) + Return False + Catch ex As IOException + LogGuard.Error(opt.Logger, "[Materialize] Archiv-Extraktion fehlgeschlagen.", ex) + Return False + Catch ex As InvalidDataException + LogGuard.Error(opt.Logger, "[Materialize] Archiv-Extraktion fehlgeschlagen.", ex) + Return False + Catch ex As NotSupportedException + LogGuard.Error(opt.Logger, "[Materialize] Archiv-Extraktion fehlgeschlagen.", ex) + Return False + Catch ex As ArgumentException + LogGuard.Error(opt.Logger, "[Materialize] Archiv-Extraktion fehlgeschlagen.", ex) + Return False Catch ex As Exception LogGuard.Error(opt.Logger, "[Materialize] Archiv-Extraktion fehlgeschlagen.", ex) Return False diff --git a/src/FileTypeDetection/FileTypeDetector.vb b/src/FileTypeDetection/FileTypeDetector.vb index 1186a1a..a378d6f 100644 --- a/src/FileTypeDetection/FileTypeDetector.vb +++ b/src/FileTypeDetection/FileTypeDetector.vb @@ -74,9 +74,18 @@ Namespace Global.Tomtastisch.FileClassifier Return ms.ToArray() End Using End Using + Catch ex As UnauthorizedAccessException + Return LogReadFileSafeFailure(opt, ex) + Catch ex As IOException + Return LogReadFileSafeFailure(opt, ex) + Catch ex As InvalidDataException + Return LogReadFileSafeFailure(opt, ex) + Catch ex As NotSupportedException + Return LogReadFileSafeFailure(opt, ex) + Catch ex As ArgumentException + Return LogReadFileSafeFailure(opt, ex) Catch ex As Exception - LogGuard.Error(opt.Logger, "[Detect] ReadFileSafe Fehler.", ex) - Return Array.Empty(Of Byte)() + Return LogReadFileSafeFailure(opt, ex) End Try End Function @@ -203,10 +212,18 @@ Namespace Global.Tomtastisch.FileClassifier Dim header = ReadHeader(fs, opt.SniffBytes, opt.MaxBytes) Return ResolveByHeaderForPath(header, opt, trace, fs) End Using + Catch ex As UnauthorizedAccessException + Return LogDetectFailure(opt, trace, ex) + Catch ex As IOException + Return LogDetectFailure(opt, trace, ex) + Catch ex As InvalidDataException + Return LogDetectFailure(opt, trace, ex) + Catch ex As NotSupportedException + Return LogDetectFailure(opt, trace, ex) + Catch ex As ArgumentException + Return LogDetectFailure(opt, trace, ex) Catch ex As Exception - LogGuard.Error(opt.Logger, "[Detect] Ausnahme, fail-closed.", ex) - trace.ReasonCode = ReasonException - Return UnknownType() + Return LogDetectFailure(opt, trace, ex) End Try End Function @@ -250,9 +267,18 @@ Namespace Global.Tomtastisch.FileClassifier If payload.Length = 0 Then Return False Return _ FileMaterializer.Persist(payload, destinationDirectory, overwrite:=False, secureExtract:=True) + Catch ex As UnauthorizedAccessException + Return LogArchiveExtractFailure(opt, ex) + Catch ex As IOException + Return LogArchiveExtractFailure(opt, ex) + Catch ex As InvalidDataException + Return LogArchiveExtractFailure(opt, ex) + Catch ex As NotSupportedException + Return LogArchiveExtractFailure(opt, ex) + Catch ex As ArgumentException + Return LogArchiveExtractFailure(opt, ex) Catch ex As Exception - LogGuard.Error(opt.Logger, "[ArchiveExtract] Ausnahme, fail-closed.", ex) - Return False + Return LogArchiveExtractFailure(opt, ex) End Try End Function @@ -277,9 +303,18 @@ Namespace Global.Tomtastisch.FileClassifier InternalIoDefaults.FileStreamBufferSize, FileOptions.SequentialScan) Return ArchiveExtractor.TryExtractArchiveStreamToMemory(fs, opt) End Using + Catch ex As UnauthorizedAccessException + Return LogArchiveExtractFailure(opt, ex, emptyResult) + Catch ex As IOException + Return LogArchiveExtractFailure(opt, ex, emptyResult) + Catch ex As InvalidDataException + Return LogArchiveExtractFailure(opt, ex, emptyResult) + Catch ex As NotSupportedException + Return LogArchiveExtractFailure(opt, ex, emptyResult) + Catch ex As ArgumentException + Return LogArchiveExtractFailure(opt, ex, emptyResult) Catch ex As Exception - LogGuard.Error(opt.Logger, "[ArchiveExtract] Ausnahme, fail-closed.", ex) - Return emptyResult + Return LogArchiveExtractFailure(opt, ex, emptyResult) End Try End Function @@ -293,9 +328,18 @@ Namespace Global.Tomtastisch.FileClassifier Try Dim trace As DetectionTrace = DetectionTrace.Empty Return ResolveByHeaderForBytes(data, opt, trace, data) + Catch ex As UnauthorizedAccessException + Return LogDetectFailure(opt, ex) + Catch ex As IOException + Return LogDetectFailure(opt, ex) + Catch ex As InvalidDataException + Return LogDetectFailure(opt, ex) + Catch ex As NotSupportedException + Return LogDetectFailure(opt, ex) + Catch ex As ArgumentException + Return LogDetectFailure(opt, ex) Catch ex As Exception - LogGuard.Error(opt.Logger, "[Detect] Ausnahme, fail-closed.", ex) - Return UnknownType() + Return LogDetectFailure(opt, ex) End Try End Function @@ -545,6 +589,35 @@ Namespace Global.Tomtastisch.FileClassifier Return FileTypeRegistry.Resolve(FileKind.Unknown) End Function + Private Shared Function LogReadFileSafeFailure(opt As FileTypeProjectOptions, ex As Exception) As Byte() + LogGuard.Error(opt.Logger, "[Detect] ReadFileSafe Fehler.", ex) + Return Array.Empty(Of Byte)() + End Function + + Private Shared Function LogDetectFailure(opt As FileTypeProjectOptions, ex As Exception) As FileType + LogGuard.Error(opt.Logger, "[Detect] Ausnahme, fail-closed.", ex) + Return UnknownType() + End Function + + Private Shared Function LogDetectFailure(opt As FileTypeProjectOptions, ByRef trace As DetectionTrace, + ex As Exception) As FileType + LogGuard.Error(opt.Logger, "[Detect] Ausnahme, fail-closed.", ex) + trace.ReasonCode = ReasonException + Return UnknownType() + End Function + + Private Shared Function LogArchiveExtractFailure(opt As FileTypeProjectOptions, ex As Exception) As Boolean + LogGuard.Error(opt.Logger, "[ArchiveExtract] Ausnahme, fail-closed.", ex) + Return False + End Function + + Private Shared Function LogArchiveExtractFailure(opt As FileTypeProjectOptions, ex As Exception, + emptyResult As IReadOnlyList(Of ZipExtractedEntry)) _ + As IReadOnlyList(Of ZipExtractedEntry) + LogGuard.Error(opt.Logger, "[ArchiveExtract] Ausnahme, fail-closed.", ex) + Return emptyResult + End Function + Private Shared Function CreateReadOnlyMemoryStream(data As Byte()) As MemoryStream Return New MemoryStream(data, 0, data.Length, writable:=False, publiclyVisible:=False) End Function diff --git a/src/FileTypeDetection/README.md b/src/FileTypeDetection/README.md index 9d94f26..674e7a4 100644 --- a/src/FileTypeDetection/README.md +++ b/src/FileTypeDetection/README.md @@ -29,7 +29,14 @@ flowchart LR - [Dokumentationsindex](https://github.com/tomtastisch/FileClassifier/blob/main/docs/001_INDEX_CORE.MD) - [API-Kernübersicht](https://github.com/tomtastisch/FileClassifier/blob/main/docs/010_API_CORE.MD) - [Architektur und Flows](https://github.com/tomtastisch/FileClassifier/blob/main/docs/020_ARCH_CORE.MD) +- [Audit Index](https://github.com/tomtastisch/FileClassifier/blob/main/docs/audit/000_INDEX.MD) - [Detektion-Submodul](https://github.com/tomtastisch/FileClassifier/blob/main/src/FileTypeDetection/Detection/README.md) - [Infrastruktur-Submodul](https://github.com/tomtastisch/FileClassifier/blob/main/src/FileTypeDetection/Infrastructure/README.md) - [Konfiguration-Submodul](https://github.com/tomtastisch/FileClassifier/blob/main/src/FileTypeDetection/Configuration/README.md) - [Abstractions-Submodul](https://github.com/tomtastisch/FileClassifier/blob/main/src/FileTypeDetection/Abstractions/README.md) + +## 7. Provenance Verification +```bash +dotnet nuget verify artifacts/nuget/*.nupkg +gh attestation verify artifacts/nuget/*.nupkg --repo tomtastisch/FileClassifier +``` diff --git a/tools/audit/generate-code-analysis-json.sh b/tools/audit/generate-code-analysis-json.sh new file mode 100755 index 0000000..53f6ed5 --- /dev/null +++ b/tools/audit/generate-code-analysis-json.sh @@ -0,0 +1,144 @@ +#!/usr/bin/env bash +set -euo pipefail + +ROOT_DIR="$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")/../.." && pwd)" +OUT_DIR="${ROOT_DIR}/artifacts/audit" +mkdir -p "${OUT_DIR}" + +INVENTORY_JSON="${OUT_DIR}/code_inventory.json" +CALLGRAPH_JSON="${OUT_DIR}/callgraph_inventory.json" +DEAD_JSON="${OUT_DIR}/dead_code_candidates.json" +REDUND_JSON="${OUT_DIR}/redundancy_candidates.json" +HARD_JSON="${OUT_DIR}/hardening_candidates.json" + +# Inventory +python3 - "$ROOT_DIR" "$INVENTORY_JSON" <<'PY' +import datetime, hashlib, json, pathlib, sys +root=pathlib.Path(sys.argv[1]) +out=pathlib.Path(sys.argv[2]) +files=[] +for p in sorted((root/'src').rglob('*')): + if p.suffix.lower() not in {'.vb','.cs'} or not p.is_file(): + continue + if '/obj/' in p.as_posix() or '/bin/' in p.as_posix(): + continue + rel=p.relative_to(root).as_posix() + data=p.read_bytes() + txt=data.decode('utf-8', errors='replace') + files.append({ + 'path': rel, + 'language': 'vb' if p.suffix.lower()=='.vb' else 'cs', + 'loc': len(txt.splitlines()), + 'sha256': hashlib.sha256(data).hexdigest(), + }) +generated_at=datetime.datetime.now(datetime.timezone.utc).replace(microsecond=0).isoformat().replace('+00:00','Z') +out.write_text(json.dumps({'generated_at': generated_at, 'files': files}, ensure_ascii=True), encoding='utf-8') +PY + +python3 - "$ROOT_DIR" "$CALLGRAPH_JSON" "$DEAD_JSON" "$REDUND_JSON" "$HARD_JSON" <<'PY' +import json, pathlib, re, sys, collections, datetime +root=pathlib.Path(sys.argv[1]) +callgraph_path=pathlib.Path(sys.argv[2]) +dead_path=pathlib.Path(sys.argv[3]) +redund_path=pathlib.Path(sys.argv[4]) +hard_path=pathlib.Path(sys.argv[5]) + +decls=[] +vb_decl=re.compile(r'^\s*(Public|Private|Friend|Protected)?\s*(Shared\s+)?(Function|Sub)\s+([A-Za-z_][A-Za-z0-9_]*)') +cs_decl=re.compile(r'^\s*(public|private|internal|protected)\s+(?:static\s+|virtual\s+|override\s+|sealed\s+|async\s+|unsafe\s+|new\s+|partial\s+)*(?:[\w<>\[\],\.\?]+\s+)+([A-Za-z_][A-Za-z0-9_]*)\s*\(') + +# Reference counts +source_texts={} +for p in list((root/'src').rglob('*.vb'))+list((root/'src').rglob('*.cs'))+list((root/'tests').rglob('*.cs')): + if p.is_file(): + if '/obj/' in p.as_posix() or '/bin/' in p.as_posix(): + continue + rel=p.relative_to(root).as_posix() + txt=p.read_text(encoding='utf-8', errors='replace') + source_texts[rel]=txt + lines=txt.splitlines() + if p.suffix.lower()=='.vb': + for i,line in enumerate(lines, start=1): + m=vb_decl.match(line) + if m: + decls.append({'file':rel,'line':i,'symbol':m.group(4),'language':'vb'}) + elif p.suffix.lower()=='.cs': + for i,line in enumerate(lines, start=1): + m=cs_decl.match(line) + if m: + decls.append({'file':rel,'line':i,'symbol':m.group(2),'language':'cs'}) + +ref_counts=[] +for d in decls: + sym=d['symbol'] + pattern=re.compile(r'\b'+re.escape(sym)+r'\b') + count=0 + for txt in source_texts.values(): + count += len(pattern.findall(txt)) + external_ref_count=max(count - 1, 0) + ref_counts.append({**d,'reference_count':count,'reference_count_excluding_self':external_ref_count}) + +dead=[] +for r in ref_counts: + if r['reference_count_excluding_self']==0 and '/src/' in ('/'+r['file']): + dead.append({ + 'type':'potential_dead_code', + 'file':r['file'], + 'line':r['line'], + 'symbol':r['symbol'], + 'evidence':'symbol has no detected references outside declaration in repository text search', + 'confidence':'low', + }) + +# Redundancy heuristic: repeated line fragments in src +line_hits=collections.Counter() +line_locations=collections.defaultdict(list) +for fp,txt in source_texts.items(): + if not fp.startswith('src/'): + continue + for i,line in enumerate(txt.splitlines(), start=1): + norm=line.strip() + if len(norm)<40: + continue + if norm.startswith('Namespace ') or norm.startswith("'''") or 'LogGuard.' in norm: + continue + line_hits[norm]+=1 + if len(line_locations[norm])<5: + line_locations[norm].append({'file':fp,'line':i}) + +redund=[] +for norm,count in line_hits.items(): + if count>=4: + redund.append({ + 'type':'potential_redundancy', + 'snippet':norm[:180], + 'occurrences':count, + 'sample_locations':line_locations[norm], + 'confidence':'low', + }) + +# Hardening candidates: broad catch blocks in src +# Only classify explicit generic catch forms (`As Exception`) as broad. +hard=[] +broad_catch=re.compile(r'Catch\s+\w+\s+As\s+(?:Global\.System\.)?Exception\b') +for fp,txt in source_texts.items(): + if not fp.startswith('src/'): + continue + for i,line in enumerate(txt.splitlines(), start=1): + if broad_catch.search(line): + hard.append({ + 'type':'broad_exception_catch', + 'file':fp, + 'line':i, + 'evidence':line.strip()[:220], + 'confidence':'medium', + }) + +generated_at=datetime.datetime.now(datetime.timezone.utc).replace(microsecond=0).isoformat().replace('+00:00','Z') +callgraph_path.write_text(json.dumps({'generated_at':generated_at,'method_declarations':decls,'symbol_reference_counts':ref_counts,'edges':[],'notes':['heuristic baseline: declaration + repository symbol counts only']}, ensure_ascii=True), encoding='utf-8') +dead_path.write_text(json.dumps({'generated_at':generated_at,'candidates':dead}, ensure_ascii=True), encoding='utf-8') +redund_path.write_text(json.dumps({'generated_at':generated_at,'candidates':redund}, ensure_ascii=True), encoding='utf-8') +hard_path.write_text(json.dumps({'generated_at':generated_at,'candidates':hard}, ensure_ascii=True), encoding='utf-8') +PY + +echo "Generated JSON artifacts in ${OUT_DIR}" >&2 diff --git a/tools/audit/verify-code-analysis-evidence.sh b/tools/audit/verify-code-analysis-evidence.sh new file mode 100755 index 0000000..0915b06 --- /dev/null +++ b/tools/audit/verify-code-analysis-evidence.sh @@ -0,0 +1,169 @@ +#!/usr/bin/env bash +set -euo pipefail + +ROOT_DIR="$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")/../.." && pwd)" +CHECK_ID="code-analysis-evidence" +OUT_DIR_REL="artifacts/ci/${CHECK_ID}" +OUT_DIR="${ROOT_DIR}/${OUT_DIR_REL}" +RAW_LOG="${OUT_DIR}/raw.log" +SUMMARY_MD="${OUT_DIR}/summary.md" +RESULT_JSON="${OUT_DIR}/result.json" + +mkdir -p "${OUT_DIR}" +: > "${RAW_LOG}" + +STARTED_AT="$(date -u +"%Y-%m-%dT%H:%M:%SZ")" +START_EPOCH="$(date -u +%s)" + +VIOLATIONS_JSONL="${OUT_DIR}/.violations.jsonl" +: > "${VIOLATIONS_JSONL}" + +PASS_COUNT=0 +WARN_COUNT=0 +FAIL_COUNT=0 + +log() { + printf '%s\n' "$*" | tee -a "${RAW_LOG}" >/dev/null +} + +add_violation() { + local rule_id="$1" + local severity="$2" + local message="$3" + local evidence="$4" + + if [[ "${severity}" == "warn" ]]; then + WARN_COUNT=$((WARN_COUNT + 1)) + else + FAIL_COUNT=$((FAIL_COUNT + 1)) + fi + + python3 - "$rule_id" "$severity" "$message" "$evidence" >> "${VIOLATIONS_JSONL}" <<'PY' +import json,sys +if len(sys.argv) < 5: + raise SystemExit("expected 4 arguments: rule_id severity message evidence") +rule_id,severity,message,evidence=sys.argv[1:5] +print(json.dumps({ + "rule_id": rule_id, + "severity": severity, + "message": message, + "evidence_paths": [evidence], +}, ensure_ascii=True)) +PY +} + +add_pass() { + PASS_COUNT=$((PASS_COUNT + 1)) +} + +run_cmd_capture() { + local label="$1" + shift + log "# ${label}" + log "$ $*" + if "$@" >> "${RAW_LOG}" 2>&1; then + return 0 + fi + return 1 +} + +require_tool() { + local t="$1" + if ! command -v "${t}" >/dev/null 2>&1; then + add_violation "CI-CODE-ANALYSIS-003" "fail" "Missing required tool '${t}'" "tools/audit/verify-code-analysis-evidence.sh" + return 1 + fi + return 0 +} + +validate_json_doc() { + local file_path="$1" + local jq_expr="$2" + if [[ ! -f "${file_path}" ]]; then + add_violation "CI-CODE-ANALYSIS-001" "fail" "Missing JSON artifact: ${file_path}" "${OUT_DIR_REL}/raw.log" + return 1 + fi + if ! jq -e "${jq_expr}" "${file_path}" >> "${RAW_LOG}" 2>&1; then + add_violation "CI-CODE-ANALYSIS-002" "fail" "Invalid JSON structure in ${file_path}" "${file_path#${ROOT_DIR}/}" + return 1 + fi + add_pass + return 0 +} + +require_tool jq + +if ! run_cmd_capture "Generate code analysis JSON artifacts" bash "${ROOT_DIR}/tools/audit/generate-code-analysis-json.sh"; then + add_violation "CI-CODE-ANALYSIS-000" "fail" "Generator script failed" "tools/audit/generate-code-analysis-json.sh" +fi + +validate_json_doc "${ROOT_DIR}/artifacts/audit/code_inventory.json" '.generated_at and (.files | type=="array")' || true +validate_json_doc "${ROOT_DIR}/artifacts/audit/callgraph_inventory.json" '.generated_at and (.method_declarations | type=="array") and (.symbol_reference_counts | type=="array") and (.edges | type=="array")' || true +validate_json_doc "${ROOT_DIR}/artifacts/audit/dead_code_candidates.json" '.generated_at and (.candidates | type=="array")' || true +validate_json_doc "${ROOT_DIR}/artifacts/audit/redundancy_candidates.json" '.generated_at and (.candidates | type=="array")' || true +validate_json_doc "${ROOT_DIR}/artifacts/audit/hardening_candidates.json" '.generated_at and (.candidates | type=="array")' || true + +if [[ "${FAIL_COUNT}" -gt 0 ]]; then + STATUS="fail" +elif [[ "${WARN_COUNT}" -gt 0 ]]; then + STATUS="warn" +else + STATUS="pass" +fi + +FINISHED_AT="$(date -u +"%Y-%m-%dT%H:%M:%SZ")" +END_EPOCH="$(date -u +%s)" +DURATION_MS="$(( (END_EPOCH - START_EPOCH) * 1000 ))" + +{ + echo "# Code Analysis Evidence" + echo + echo "- status: ${STATUS}" + echo "- pass_count: ${PASS_COUNT}" + echo "- warn_count: ${WARN_COUNT}" + echo "- fail_count: ${FAIL_COUNT}" + echo "- started_at: ${STARTED_AT}" + echo "- finished_at: ${FINISHED_AT}" + echo + echo "Evidence root: ${OUT_DIR_REL}" +} > "${SUMMARY_MD}" + +python3 - "${VIOLATIONS_JSONL}" "${RESULT_JSON}" "${STATUS}" "${STARTED_AT}" "${FINISHED_AT}" "${DURATION_MS}" "${OUT_DIR_REL}" <<'PY' +import json +import pathlib +import sys + +if len(sys.argv) < 8: + raise SystemExit("expected 7 arguments: violations result status started finished duration out_rel") +viol_path, result_path, status, started_at, finished_at, duration_ms, out_rel = sys.argv[1:8] +violations = [] +vp = pathlib.Path(viol_path) +if vp.exists(): + for line in vp.read_text(encoding="utf-8").splitlines(): + line = line.strip() + if line: + violations.append(json.loads(line)) + +result = { + "schema_version": 1, + "check_id": "code-analysis-evidence", + "status": status, + "rule_violations": violations, + "evidence_paths": [f"{out_rel}/raw.log", f"{out_rel}/summary.md"], + "artifacts": [ + f"{out_rel}/raw.log", + f"{out_rel}/summary.md", + f"{out_rel}/result.json", + ], + "timing": { + "started_at": started_at, + "finished_at": finished_at, + "duration_ms": int(duration_ms), + }, +} +pathlib.Path(result_path).write_text(json.dumps(result, ensure_ascii=True), encoding="utf-8") +PY + +if [[ "${STATUS}" != "pass" ]]; then + exit 1 +fi diff --git a/tools/audit/verify-security-claims.sh b/tools/audit/verify-security-claims.sh new file mode 100755 index 0000000..9c9430b --- /dev/null +++ b/tools/audit/verify-security-claims.sh @@ -0,0 +1,314 @@ +#!/usr/bin/env bash +set -euo pipefail + +ROOT_DIR="$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")/../.." && pwd)" +CHECK_ID="security-claims-evidence" +OUT_DIR_REL="artifacts/ci/${CHECK_ID}" +OUT_DIR="${ROOT_DIR}/${OUT_DIR_REL}" +RAW_LOG="${OUT_DIR}/raw.log" +SUMMARY_MD="${OUT_DIR}/summary.md" +RESULT_JSON="${OUT_DIR}/result.json" + +mkdir -p "${OUT_DIR}" +: > "${RAW_LOG}" + +STARTED_AT="$(date -u +"%Y-%m-%dT%H:%M:%SZ")" +START_EPOCH="$(date -u +%s)" + +VIOLATIONS_JSONL="${OUT_DIR}/.violations.jsonl" +: > "${VIOLATIONS_JSONL}" + +PASS_COUNT=0 +WARN_COUNT=0 +FAIL_COUNT=0 + +log() { + printf '%s\n' "$*" | tee -a "${RAW_LOG}" >/dev/null +} + +add_violation() { + local rule_id="$1" + local severity="$2" + local message="$3" + local evidence="$4" + + if [[ "${severity}" == "warn" ]]; then + WARN_COUNT=$((WARN_COUNT + 1)) + else + FAIL_COUNT=$((FAIL_COUNT + 1)) + fi + + python3 - "$rule_id" "$severity" "$message" "$evidence" >> "${VIOLATIONS_JSONL}" <<'PY' +import json,sys +if len(sys.argv) < 5: + raise SystemExit("expected 4 arguments: rule_id severity message evidence") +rule_id,severity,message,evidence=sys.argv[1:5] +print(json.dumps({ + "rule_id": rule_id, + "severity": severity, + "message": message, + "evidence_paths": [evidence], +}, ensure_ascii=True)) +PY +} + +add_pass() { + PASS_COUNT=$((PASS_COUNT + 1)) +} + +run_cmd_capture() { + local label="$1" + shift + log "# ${label}" + log "$ $*" + if "$@" >> "${RAW_LOG}" 2>&1; then + return 0 + fi + return 1 +} + +retry_gh_api() { + local endpoint="$1" + local out_file="$2" + local attempts=3 + local delay=2 + local i + + for i in $(seq 1 "${attempts}"); do + if gh api "${endpoint}" > "${out_file}" 2>> "${RAW_LOG}"; then + return 0 + fi + if [[ "${i}" -lt "${attempts}" ]]; then + sleep "${delay}" + delay=$((delay * 2)) + fi + done + return 1 +} + +retry_gh_api_optional() { + local endpoint="$1" + local out_file="$2" + if retry_gh_api "${endpoint}" "${out_file}"; then + return 0 + fi + return 1 +} + +require_tool() { + local t="$1" + if ! command -v "${t}" >/dev/null 2>&1; then + add_violation "CI-SEC-CLAIM-000" "fail" "Missing required tool '${t}'" "tools/audit/verify-security-claims.sh" + return 1 + fi + return 0 +} + +require_tool gh +require_tool jq + +has_rg() { + command -v rg >/dev/null 2>&1 +} + +match_file() { + local pattern="$1" + local file="$2" + if has_rg; then + rg -q "$pattern" "$file" + else + grep -Eq "$pattern" "$file" + fi +} + +REPO_FULL="${GITHUB_REPOSITORY:-}" +if [[ -z "${REPO_FULL}" ]]; then + origin_url="$(git -C "${ROOT_DIR}" remote get-url origin 2>/dev/null || true)" + origin_url="${origin_url%/}" + if [[ "${origin_url}" =~ github.com[:/]([^/]+/[^/.]+)(\.git)?$ ]]; then + REPO_FULL="${BASH_REMATCH[1]}" + fi +fi + +if [[ -z "${REPO_FULL}" ]]; then + add_violation "CI-SEC-CLAIM-001" "fail" "Unable to determine GitHub repository slug" "SECURITY.md" +fi + +# Claim: 5.x supported and <5.0 unsupported maps to current package major = 5 +pkg_ver="$(sed -n 's:.*\([^<]*\).*:\1:p' "${ROOT_DIR}/src/FileTypeDetection/FileTypeDetectionLib.vbproj" | head -n1)" +if [[ -z "${pkg_ver}" ]]; then + add_violation "CI-SEC-CLAIM-002" "fail" "Package version not found" "src/FileTypeDetection/FileTypeDetectionLib.vbproj" +else + major="${pkg_ver%%.*}" + if [[ "${major}" == "5" ]]; then + add_pass + else + add_violation "CI-SEC-CLAIM-002" "fail" "Expected package major 5 for SECURITY.md support claim, found ${pkg_ver}" "src/FileTypeDetection/FileTypeDetectionLib.vbproj" + fi +fi + +# Claim: security-nuget gate exists in CI +if match_file "^[[:space:]]+security-nuget:" "${ROOT_DIR}/.github/workflows/ci.yml" && match_file "run\\.sh security-nuget" "${ROOT_DIR}/.github/workflows/ci.yml"; then + add_pass +else + add_violation "CI-SEC-CLAIM-003" "fail" "security-nuget gate missing from CI workflow" ".github/workflows/ci.yml" +fi + +# Claim: OIDC trusted publishing present in release workflow +if match_file "NuGet/login@v1" "${ROOT_DIR}/.github/workflows/release.yml" && match_file "assert OIDC temp key present" "${ROOT_DIR}/.github/workflows/release.yml"; then + add_pass +else + add_violation "CI-SEC-CLAIM-004" "fail" "OIDC trusted publishing markers missing" ".github/workflows/release.yml" +fi + +if [[ -n "${REPO_FULL}" ]]; then + tmp_repo_json="${OUT_DIR}/.repo.json" + tmp_pvr_json="${OUT_DIR}/.pvr.json" + tmp_branch_rules_json="${OUT_DIR}/.branch-rules.json" + tmp_branch_protection_json="${OUT_DIR}/.branch-protection.json" + tmp_asf_json="${OUT_DIR}/.automated-security-fixes.json" + tmp_secret_scanning_json="${OUT_DIR}/.secret-scanning-alerts.json" + + if retry_gh_api "repos/${REPO_FULL}" "${tmp_repo_json}"; then + dep_status="$(jq -r '.security_and_analysis.dependabot_security_updates.status // empty' "${tmp_repo_json}")" + sec_status="$(jq -r '.security_and_analysis.secret_scanning.status // empty' "${tmp_repo_json}")" + default_branch="$(jq -r '.default_branch' "${tmp_repo_json}")" + + if [[ "${dep_status}" == "enabled" ]]; then + add_pass + elif retry_gh_api_optional "repos/${REPO_FULL}/automated-security-fixes" "${tmp_asf_json}" && [[ "$(jq -r '.enabled // false' "${tmp_asf_json}")" == "true" ]]; then + add_pass + else + dep_effective="${dep_status:-unknown}" + add_violation "CI-SEC-CLAIM-005" "fail" "Dependabot security updates expected enabled, got ${dep_effective}" "${OUT_DIR_REL}/raw.log" + fi + + if [[ "${sec_status}" == "enabled" ]]; then + add_pass + elif retry_gh_api_optional "repos/${REPO_FULL}/secret-scanning/alerts?per_page=1" "${tmp_secret_scanning_json}"; then + add_pass + else + sec_effective="${sec_status:-unknown}" + add_violation "CI-SEC-CLAIM-006" "fail" "Secret scanning expected enabled, got ${sec_effective}" "${OUT_DIR_REL}/raw.log" + fi + + if retry_gh_api "repos/${REPO_FULL}/private-vulnerability-reporting" "${tmp_pvr_json}"; then + pvr_enabled="$(jq -r '.enabled // false' "${tmp_pvr_json}")" + if [[ "${pvr_enabled}" == "true" ]]; then + add_pass + else + add_violation "CI-SEC-CLAIM-007" "fail" "Private vulnerability reporting expected true" "${OUT_DIR_REL}/raw.log" + fi + else + add_violation "CI-SEC-CLAIM-007" "fail" "GitHub API failed for private-vulnerability-reporting after retries" "${OUT_DIR_REL}/raw.log" + fi + + if retry_gh_api_optional "repos/${REPO_FULL}/rules/branches/${default_branch}" "${tmp_branch_rules_json}"; then + required_contexts=("preflight" "version-policy" "build" "api-contract" "pack" "consumer-smoke" "package-backed-tests" "security-nuget" "tests-bdd-coverage") + missing=0 + for ctx in "${required_contexts[@]}"; do + if ! jq -e --arg ctx "${ctx}" ' + map(select(.type=="required_status_checks")) + | map(.parameters.required_status_checks // []) + | add // [] + | map(.context) + | index($ctx) + ' "${tmp_branch_rules_json}" >/dev/null; then + log "Missing branch protection context: ${ctx}" + missing=1 + fi + done + if [[ "${missing}" -eq 0 ]]; then + add_pass + else + add_violation "CI-SEC-CLAIM-008" "fail" "Branch protection contexts do not match SECURITY evidence baseline" "${OUT_DIR_REL}/raw.log" + fi + elif retry_gh_api_optional "repos/${REPO_FULL}/branches/${default_branch}/protection" "${tmp_branch_protection_json}"; then + required_contexts=("preflight" "version-policy" "build" "api-contract" "pack" "consumer-smoke" "package-backed-tests" "security-nuget" "tests-bdd-coverage") + missing=0 + for ctx in "${required_contexts[@]}"; do + if ! jq -e --arg ctx "${ctx}" '.required_status_checks.contexts | index($ctx)' "${tmp_branch_protection_json}" >/dev/null; then + log "Missing branch protection context: ${ctx}" + missing=1 + fi + done + if [[ "${missing}" -eq 0 ]]; then + add_pass + else + add_violation "CI-SEC-CLAIM-008" "fail" "Branch protection contexts do not match SECURITY evidence baseline" "${OUT_DIR_REL}/raw.log" + fi + else + add_violation "CI-SEC-CLAIM-008" "fail" "GitHub API failed for branch rules/protection after retries" "${OUT_DIR_REL}/raw.log" + fi + else + add_violation "CI-SEC-CLAIM-005" "fail" "GitHub API failed for repository metadata after retries" "${OUT_DIR_REL}/raw.log" + add_violation "CI-SEC-CLAIM-006" "fail" "GitHub API failed for repository metadata after retries" "${OUT_DIR_REL}/raw.log" + add_violation "CI-SEC-CLAIM-007" "fail" "GitHub API failed for repository metadata after retries" "${OUT_DIR_REL}/raw.log" + add_violation "CI-SEC-CLAIM-008" "fail" "GitHub API failed for repository metadata after retries" "${OUT_DIR_REL}/raw.log" + fi +fi + +if [[ "${FAIL_COUNT}" -gt 0 ]]; then + STATUS="fail" +elif [[ "${WARN_COUNT}" -gt 0 ]]; then + STATUS="warn" +else + STATUS="pass" +fi + +FINISHED_AT="$(date -u +"%Y-%m-%dT%H:%M:%SZ")" +END_EPOCH="$(date -u +%s)" +DURATION_MS="$(( (END_EPOCH - START_EPOCH) * 1000 ))" + +{ + echo "# Security Claims Evidence" + echo + echo "- status: ${STATUS}" + echo "- pass_count: ${PASS_COUNT}" + echo "- warn_count: ${WARN_COUNT}" + echo "- fail_count: ${FAIL_COUNT}" + echo "- started_at: ${STARTED_AT}" + echo "- finished_at: ${FINISHED_AT}" + echo + echo "Evidence root: ${OUT_DIR_REL}" +} > "${SUMMARY_MD}" + +python3 - "${VIOLATIONS_JSONL}" "${RESULT_JSON}" "${STATUS}" "${STARTED_AT}" "${FINISHED_AT}" "${DURATION_MS}" "${OUT_DIR_REL}" <<'PY' +import json +import pathlib +import sys + +if len(sys.argv) < 8: + raise SystemExit("expected 7 arguments: violations result status started finished duration out_rel") +viol_path, result_path, status, started_at, finished_at, duration_ms, out_rel = sys.argv[1:8] +violations = [] +vp = pathlib.Path(viol_path) +if vp.exists(): + for line in vp.read_text(encoding="utf-8").splitlines(): + line = line.strip() + if line: + violations.append(json.loads(line)) + +result = { + "schema_version": 1, + "check_id": "security-claims-evidence", + "status": status, + "rule_violations": violations, + "evidence_paths": [f"{out_rel}/raw.log", f"{out_rel}/summary.md"], + "artifacts": [ + f"{out_rel}/raw.log", + f"{out_rel}/summary.md", + f"{out_rel}/result.json", + ], + "timing": { + "started_at": started_at, + "finished_at": finished_at, + "duration_ms": int(duration_ms), + }, +} +pathlib.Path(result_path).write_text(json.dumps(result, ensure_ascii=True), encoding="utf-8") +PY + +if [[ "${STATUS}" != "pass" ]]; then + exit 1 +fi