From 04df57a806f61e8f2ca456567b3429387b07ef5f Mon Sep 17 00:00:00 2001 From: James Devine Date: Thu, 21 May 2026 16:00:53 +0100 Subject: [PATCH 1/2] feat(audit): add `ado-aw audit ` command MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Single-run audit: download a build's artifacts, run every analyzer (firewall, MCP gateway, OTel, safe outputs, detection verdict, build timeline, missing tools/data/noops), and emit a Markdown or JSON report. ADO-side counterpart to `gh aw audit`. New module tree under `src/audit/`: - `model.rs` — `AuditData` (drift-compatible with gh-aw's top-level contract; adds ADO-specific `detection_analysis`, `safe_output_execution`, `rejected_safe_outputs` sections). - `url.rs` — parses bare IDs, dev.azure.com URLs, legacy visualstudio.com URLs, and on-prem Azure DevOps Server URLs (with optional `&j=`/`&t=`/`&s=` job/step anchors). - `cache.rs` — CLI-version-keyed `run-summary.json` with atomic writes. - `analyzers/{firewall,policy,mcp,otel,safe_outputs,detection,missing,jobs}.rs` — eight defensive NDJSON/REST analyzers. - `findings.rs` — eight heuristic rules emitting severity-rated findings + recommendations. - `render/{console,json}.rs` — two renderers; JSON shape is the public contract. - `cli.rs` — orchestration: URL parse → auth → metadata fetch → artifact download → analyzers → findings → cache → render. Unified rejection trace: when the aggregate `THREAT_DETECTION_RESULT` has any threat flag set, every proposal lands in `not_processed_due_to_aggregate_gate` carrying the aggregate `reasons[]`, exactly one severity-`high` `KeyFinding` is emitted, and a `rejected_safe_outputs` rollup appears at the top level. Pipeline-side runtime additions (so an `ado-aw audit` of an existing build has the data it needs): - `src/data/*-base.yml` (via `AdoAwMarkerExtension`): emits `staging/aw_info.json` at runtime with engine, model, agent name, source path, target, compiler version, and ADO build context. - `src/execute.rs`: writes a per-item `safe-outputs-executed.ndjson` in `` so the audit can show the proposed → detection → executed trace. CLI surface: ado-aw audit -o, --output # default ./logs --json --org / --project / --pat --artifacts --no-cache New dependencies: `zip` (artifact unpack), `wiremock` (dev only — integration test mock server). Tests: 80 new audit unit tests + 3 integration tests against a fake ADO REST server (happy path, permission-denied, cache hit) using a thin `ADO_AW_TEST_ORG_URL` test seam. 1740 total tests pass. Docs: new `docs/audit.md`; updates to `docs/cli.md`, `README.md`, `AGENTS.md` index, and `prompts/debug-ado-agentic-workflow.md` (Step 1 first-move + new Step 2a-prime + `AuditData` reference + jq-diff fallback). Out of scope (explicit follow-ups): diff mode, cross-run trends, `--parse` log.md/firewall.md, job/step-anchored audit, MCP-exposed audit, per-item detection verdict (upstream coordination with gh-aw), partial-approval gating, AWF policy-manifest plumbing, AWF token-usage.jsonl, `audit-manifest.json` build inventory. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- AGENTS.md | 5 +- Cargo.lock | 154 +++ Cargo.toml | 2 + README.md | 3 + docs/audit.md | 141 +++ docs/cli.md | 10 + prompts/debug-ado-agentic-workflow.md | 65 ++ src/ado/mod.rs | 505 +++++++- src/audit/analyzers/detection.rs | 320 +++++ src/audit/analyzers/firewall.rs | 387 +++++++ src/audit/analyzers/jobs.rs | 283 +++++ src/audit/analyzers/mcp.rs | 607 ++++++++++ src/audit/analyzers/missing.rs | 289 +++++ src/audit/analyzers/mod.rs | 14 + src/audit/analyzers/otel.rs | 228 ++++ src/audit/analyzers/policy.rs | 398 +++++++ src/audit/analyzers/safe_outputs.rs | 915 +++++++++++++++ src/audit/cache.rs | 298 +++++ src/audit/cli.rs | 720 ++++++++++++ src/audit/findings.rs | 814 +++++++++++++ src/audit/mod.rs | 15 + src/audit/model.rs | 1066 +++++++++++++++++ src/audit/render/console.rs | 1415 +++++++++++++++++++++++ src/audit/render/json.rs | 300 +++++ src/audit/render/mod.rs | 4 + src/audit/url.rs | 308 +++++ src/compile/extensions/ado_aw_marker.rs | 268 +++-- src/execute.rs | 388 ++++++- src/main.rs | 51 + src/ndjson.rs | 3 + tests/audit_it.rs | 279 +++++ tests/bash_lint_tests.rs | 40 +- tests/compiler_tests.rs | 79 +- 33 files changed, 10183 insertions(+), 191 deletions(-) create mode 100644 docs/audit.md create mode 100644 src/audit/analyzers/detection.rs create mode 100644 src/audit/analyzers/firewall.rs create mode 100644 src/audit/analyzers/jobs.rs create mode 100644 src/audit/analyzers/mcp.rs create mode 100644 src/audit/analyzers/missing.rs create mode 100644 src/audit/analyzers/mod.rs create mode 100644 src/audit/analyzers/otel.rs create mode 100644 src/audit/analyzers/policy.rs create mode 100644 src/audit/analyzers/safe_outputs.rs create mode 100644 src/audit/cache.rs create mode 100644 src/audit/cli.rs create mode 100644 src/audit/findings.rs create mode 100644 src/audit/mod.rs create mode 100644 src/audit/model.rs create mode 100644 src/audit/render/console.rs create mode 100644 src/audit/render/json.rs create mode 100644 src/audit/render/mod.rs create mode 100644 src/audit/url.rs create mode 100644 tests/audit_it.rs diff --git a/AGENTS.md b/AGENTS.md index 8d6cfd7f..12823ed9 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -227,7 +227,10 @@ index to jump to the right page. in `src/data/base.yml`, `src/data/1es-base.yml`, `src/data/job-base.yml`, and `src/data/stage-base.yml` and how it is replaced. - [`docs/cli.md`](docs/cli.md) — `ado-aw` CLI commands (`init`, `compile`, `check`, `mcp`, `mcp-http`, `execute`, `secrets`, `enable`, `disable`, - `remove`, `list`, `status`, `run`; `configure` is a deprecated hidden alias). + `remove`, `list`, `status`, `run`, `audit`; `configure` is a deprecated hidden alias). +- [`docs/audit.md`](docs/audit.md) — `ado-aw audit`: accepted build-id / URL + forms, artifact layout, cache behavior, rejection tracing, and `AuditData` + report shape. - [`docs/mcp.md`](docs/mcp.md) — MCP server configuration (stdio containers, HTTP servers, env passthrough). - [`docs/mcpg.md`](docs/mcpg.md) — MCP Gateway architecture and pipeline diff --git a/Cargo.lock b/Cargo.lock index f788df59..0f5d6833 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2,6 +2,12 @@ # It is not intended for manual editing. version = 4 +[[package]] +name = "adler2" +version = "2.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "320119579fcad9c21884f5c4861d16174d0e06250625266f50fe6898340abefa" + [[package]] name = "ado-aw" version = "0.30.2" @@ -33,6 +39,8 @@ dependencies = [ "tempfile", "tokio", "url", + "wiremock", + "zip", ] [[package]] @@ -118,6 +126,16 @@ version = "1.0.100" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a23eb6b1614318a8071c9b2521f36b424b2c83db5eb3a0fead4a6c0809af6e61" +[[package]] +name = "assert-json-diff" +version = "2.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "47e4f2b81832e72834d7518d8487a0396a28cc408186a2e8854c0f98011faf12" +dependencies = [ + "serde", + "serde_json", +] + [[package]] name = "async-trait" version = "0.1.89" @@ -363,6 +381,15 @@ dependencies = [ "libc", ] +[[package]] +name = "crc32fast" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9481c1c90cbf2ac953f07c8d4a58aa3945c425b7185c9154d67a65e4230da511" +dependencies = [ + "cfg-if", +] + [[package]] name = "crossterm" version = "0.29.0" @@ -433,6 +460,24 @@ dependencies = [ "syn", ] +[[package]] +name = "deadpool" +version = "0.12.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0be2b1d1d6ec8d846f05e137292d0b89133caf95ef33695424c09568bdd39b1b" +dependencies = [ + "deadpool-runtime", + "lazy_static", + "num_cpus", + "tokio", +] + +[[package]] +name = "deadpool-runtime" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "092966b41edc516079bdf31ec78a2e0588d1d0c08f78b91d8307215928642b2b" + [[package]] name = "derive_more" version = "2.1.1" @@ -573,6 +618,16 @@ version = "0.1.8" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8591b0bcc8a98a64310a2fae1bb3e9b8564dd10e381e6e28010fde8e8e8568db" +[[package]] +name = "flate2" +version = "1.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "843fba2746e448b37e26a819579957415c8cef339bf08564fe8b7ddbd959573c" +dependencies = [ + "miniz_oxide", + "zlib-rs", +] + [[package]] name = "fnv" version = "1.0.7" @@ -790,6 +845,12 @@ version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" +[[package]] +name = "hermit-abi" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fc0fef456e4baa96da950455cd02c081ca953b141298e41db3fc7e36b1da849c" + [[package]] name = "http" version = "1.4.0" @@ -1151,6 +1212,12 @@ dependencies = [ "wasm-bindgen", ] +[[package]] +name = "lazy_static" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" + [[package]] name = "leb128fmt" version = "0.1.0" @@ -1224,6 +1291,16 @@ version = "0.3.17" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6877bb514081ee2a7ff5ef9de3281f14a4dd4bceac4c09388074a6b5df8a139a" +[[package]] +name = "miniz_oxide" +version = "0.8.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1fa76a2c86f704bdb222d66965fb3d63269ce38518b83cb0575fca855ebb6316" +dependencies = [ + "adler2", + "simd-adler32", +] + [[package]] name = "mio" version = "1.1.1" @@ -1262,6 +1339,16 @@ dependencies = [ "autocfg", ] +[[package]] +name = "num_cpus" +version = "1.17.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "91df4bbde75afed763b708b7eee1e8e7651e02d97f6d5dd763e89367e957b23b" +dependencies = [ + "hermit-abi", + "libc", +] + [[package]] name = "once_cell" version = "1.21.3" @@ -1907,6 +1994,12 @@ dependencies = [ "libc", ] +[[package]] +name = "simd-adler32" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "703d5c7ef118737c72f1af64ad2f6f8c5e1921f818cdcb97b8fe6fc69bf66214" + [[package]] name = "similar" version = "3.1.0" @@ -2229,6 +2322,12 @@ version = "0.2.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e421abadd41a4225275504ea4d6566923418b7f05506fbc9c0fe86ba7396114b" +[[package]] +name = "typed-path" +version = "0.12.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e28f89b80c87b8fb0cf04ab448d5dd0dd0ade2f8891bae878de66a75a28600e" + [[package]] name = "typenum" version = "1.20.0" @@ -2696,6 +2795,29 @@ version = "0.53.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d6bbff5f0aada427a1e5a6da5f1f98158182f26556f345ac9e04d36d0ebed650" +[[package]] +name = "wiremock" +version = "0.6.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "08db1edfb05d9b3c1542e521aea074442088292f00b5f28e435c714a98f85031" +dependencies = [ + "assert-json-diff", + "base64", + "deadpool", + "futures", + "http", + "http-body-util", + "hyper", + "hyper-util", + "log", + "once_cell", + "regex", + "serde", + "serde_json", + "tokio", + "url", +] + [[package]] name = "wit-bindgen" version = "0.51.0" @@ -2873,8 +2995,40 @@ dependencies = [ "syn", ] +[[package]] +name = "zip" +version = "8.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2d04a6b5381502aa6087c94c669499eb1602eb9c5e8198e534de571f7154809b" +dependencies = [ + "crc32fast", + "flate2", + "indexmap", + "memchr", + "typed-path", + "zopfli", +] + +[[package]] +name = "zlib-rs" +version = "0.6.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3be3d40e40a133f9c916ee3f9f4fa2d9d63435b5fbe1bfc6d9dae0aa0ada1513" + [[package]] name = "zmij" version = "1.0.17" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "02aae0f83f69aafc94776e879363e9771d7ecbffe2c7fbb6c14c5e00dfe88439" + +[[package]] +name = "zopfli" +version = "0.8.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f05cd8797d63865425ff89b5c4a48804f35ba0ce8d125800027ad6017d2b5249" +dependencies = [ + "bumpalo", + "crc32fast", + "log", + "simd-adler32", +] diff --git a/Cargo.toml b/Cargo.toml index 01b1ee1b..c547b945 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -35,6 +35,8 @@ base64 = "0.22.1" glob-match = "0.2.1" similar = "3.1.0" sha2 = "0.11.0" +zip = { version = "8.6.0", default-features = false, features = ["deflate"] } [dev-dependencies] reqwest = { version = "0.12", features = ["blocking"] } +wiremock = "0.6" diff --git a/README.md b/README.md index d6b8e8c0..4cd1286f 100644 --- a/README.md +++ b/README.md @@ -484,6 +484,8 @@ network: ## CLI Reference +- `audit ` - Audit a single Azure DevOps build: download artifacts, analyze logs, render Markdown or JSON report. See [`docs/audit.md`](docs/audit.md). + ``` ado-aw [OPTIONS] @@ -501,6 +503,7 @@ Commands: list List matched ADO definitions with their latest-run state status Per-pipeline status block for matched ADO definitions run Queue builds for matched ADO definitions (optionally poll to completion) + audit Audit a single Azure DevOps build: download artifacts, analyze logs, render a report Options: -v, --verbose Enable info-level logging diff --git a/docs/audit.md b/docs/audit.md new file mode 100644 index 00000000..d2c139d8 --- /dev/null +++ b/docs/audit.md @@ -0,0 +1,141 @@ +# Auditing Pipelines with `ado-aw audit` + +_Part of the [ado-aw documentation](../AGENTS.md)._ + +## Overview + +`ado-aw audit` audits one Azure DevOps build at a time. It downloads the selected build artifacts, runs the built-in analyzers (firewall, MCP gateway, OTel, safe outputs, detection verdict, build timeline, and missing-tool / missing-data / noop extraction), and renders a structured console report or the raw `AuditData` JSON. The MVP is single-run only; diff mode and cross-run trend reporting are follow-ups. + +## Usage + +`ado-aw audit [options]` + +## Accepted input formats + +| Input | Example | +|---|---| +| Numeric build ID | `12345` | +| dev.azure.com URL | `https://dev.azure.com/my-org/My%20Project/_build/results?buildId=12345` | +| dev.azure.com URL with job/step anchors | `...?buildId=12345&j=&t=` (accepted; the MVP audits the parent build) | +| Legacy visualstudio.com URL | `https://my-org.visualstudio.com/proj/_build/results?buildId=12345` | +| On-prem Azure DevOps Server URL | `https://onprem.example.com/DefaultCollection/MyProject/_build/results?buildId=12345` | + +URL-encoded project segments are decoded before the ADO context is resolved. `t=` and `s=` are both accepted as step anchors. + +## Flags + +| Flag | Default | Behavior | +| --- | --- | --- | +| `-o, --output ` | `./logs` | Directory under which `/build-/` is written. | +| `--json` | off | Emit the full `AuditData` as JSON to stdout (suppresses the trailing `Audit complete` stderr line). | +| `--org ` | auto | Azure DevOps organization override for bare build IDs. Full build URLs provide the host / org directly. | +| `--project ` | auto | Azure DevOps project override for bare build IDs. Full build URLs provide the project directly. | +| `--pat ` | env | Personal Access Token. Also reads `AZURE_DEVOPS_EXT_PAT`. Falls back to the existing Azure CLI auth chain when omitted. | +| `--artifacts ` | all | Restrict download + analysis to a subset of artifact sets. Valid values: `agent`, `detection`, `safe-outputs` (`safe_outputs` alias also accepted). | +| `--no-cache` | off | Force re-processing even if `/build-/run-summary.json` already exists. | + +## Behavior + +- The command resolves `` first. Bare IDs use `--org` / `--project` or git-remote auto-detection; full build URLs contribute host, org, and project, and those URL-derived values win. +- Only the three audit artifact families are in scope: `agent_outputs*`, `analyzed_outputs*`, and `safe_outputs*`. Other published build artifacts are ignored. +- Artifact refresh is cache-preserving. If a matching local artifact directory already exists, it is renamed aside before re-download and restored if the download fails. +- Analyzer failures are soft. The command records a warning, keeps any successfully-derived sections, and still renders the report. +- When multiple local directories share one recognized prefix, the lexicographically last match is used. + +## Output layout + +```text +/build-/ +├── run-summary.json # Cached AuditData, CLI-version-keyed +├── agent_outputs[_]/ # Downloaded artifact (Agent stage) +│ ├── staging/ +│ │ ├── safe_outputs.ndjson # Agent's safe-output proposals +│ │ ├── aw_info.json # Runtime engine / agent / source metadata +│ │ └── otel.jsonl # Copilot OTel (when emitted) +│ └── logs/ +│ ├── firewall/ # AWF Squid proxy logs +│ ├── mcpg/ # MCP Gateway logs +│ ├── safeoutputs.log # SafeOutputs HTTP server log +│ └── agent-output.txt # Filtered agent stdout +├── analyzed_outputs[_]/ # Downloaded artifact (Detection stage) +│ ├── threat-analysis.json # Aggregate verdict + reasons +│ └── threat-analysis-output.txt +└── safe_outputs[_]/ # Downloaded artifact (SafeOutputs stage) + └── safe-outputs-executed.ndjson # Per-item execution log +``` + +`aw_info.json`, `otel.jsonl`, and `safe_outputs.ndjson` are searched in `staging/` first and then at the artifact top level so older layouts still audit cleanly. + +## Report shape (`AuditData`) + +Current top-level keys include the following. Optional sections are omitted from `--json` when empty. + +| Key | Source | +| --- | --- | +| `overview` | ADO build metadata + `aw_info.json` (engine, model, agent name, source, target). | +| `task_domain` | Audit heuristics over the run's prompts and outputs. | +| `behavior_fingerprint` | Higher-level audit heuristics over the run's behavior. | +| `agentic_assessments` | Higher-level audit assessments emitted by the analyzers. | +| `metrics` | OTel JSONL (`otel.jsonl`) plus audit-time warning / error counts. | +| `key_findings` | Heuristic rules + analyzer-emitted findings (for example aggregate-gate rejection). | +| `recommendations` | Follow-up actions derived from findings. | +| `performance_metrics` | Derived from `metrics`, runtime duration, tool usage, and firewall counts. | +| `engine_config` | Runtime engine configuration derived from `aw_info.json`. | +| `safe_output_summary` | Counts of proposed / executed / rejected / not processed items. | +| `safe_output_execution` | Per-item trace joining proposal + detection + execution. | +| `rejected_safe_outputs` | Rollup of rejections by reason / threat flag. | +| `detection_analysis` | `threat-analysis.json`. | +| `mcp_server_health` | MCPG logs aggregated per server. | +| `mcp_tool_usage` | MCPG logs aggregated per `(server, tool)`. | +| `mcp_failures` | MCPG `tool_error` / `server_error` events. | +| `jobs` | ADO `/timeline` records filtered to `type: Job`. | +| `firewall_analysis` | AWF Squid proxy logs aggregated by domain. | +| `policy_analysis` | AWF policy artifacts aggregated into allow / deny summaries. | +| `missing_tools` / `missing_data` / `noops` | NDJSON entries from the corresponding SafeOutputs MCP tools. | +| `downloaded_files` | One entry per file under `/build-/`. | +| `errors` / `warnings` | Run-level error / warning aggregates. | +| `tool_usage` | High-level runtime tool-usage rollups derived from telemetry. | +| `created_items` | Successful `executed` items with extracted id / url / title. | + +## Rejected safe-output trace + +When `threat-analysis.json` reports any threat flag, the audit treats the SafeOutputs batch as rejected by the aggregate gate and records each proposal with: + +- `status: not_processed_due_to_aggregate_gate` +- `applies_to_whole_batch: true` +- `rejection_reason`: the aggregate `reasons[]` from `threat-analysis.json`, joined with `; ` + +Additionally, exactly one severity-`high` finding is emitted summarizing the gate decision: which threat flags fired, how many proposals were dropped, and the full aggregate reasons. + +Per-item detection verdicts are not currently available. `threat-analysis.md` emits an aggregate verdict only; per-item verdicts are a follow-up that should stay aligned with gh-aw. + +## Cache behavior + +`/build-/run-summary.json` is written after a successful run. On subsequent invocations against the same build: + +- If the cached `ado_aw_version` matches the current CLI version, the report is rendered from cache and download / analysis is skipped. The cache-hit info line is printed only in console mode. +- If the cached file is missing, cannot be parsed, or was written by a different `ado-aw` version, it is ignored and the build is processed again. +- `--no-cache` always re-processes. + +## Permission failures + +- The initial build-metadata fetch is live ADO-only. A 401 / 403 at that step is fatal. +- If artifact listing or artifact download returns 401 / 403 and the run directory already contains at least one recognized artifact family, the audit continues from local cache and records a warning. +- If artifact listing or download returns 401 / 403 and no local artifact cache exists, the command emits a structured error pointing at `az pipelines runs artifact download --run-id --path ` as the manual escape hatch. + +## Out-of-scope (planned follow-ups) + +- **Diff mode** (`ado-aw audit `) — domain / MCP / metrics diffs. +- **Cross-run trends** (`ado-aw audit --last N`) — trend report over recent builds. +- **`--parse`** — Rust-native `log.md` / `firewall.md` renderers. +- **Job / step audit** — pin to a specific timeline record. +- **MCP-exposed audit** — `agentic-pipelines` MCP tool for in-pipeline self-audit. +- **Per-item detection verdict** — coordinated upstream with gh-aw. +- **Additional pipeline inventory artifacts** — graceful-degradation gaps such as richer AWF policy / firewall inventories. + +## Related Documentation + +- [CLI Commands](cli.md) — full CLI reference +- [Front Matter](front-matter.md) — agent file format +- [Safe Outputs](safe-outputs.md) — what proposals look like +- [Network](network.md) — AWF firewall configuration diff --git a/docs/cli.md b/docs/cli.md index 7dce7954..94b84371 100644 --- a/docs/cli.md +++ b/docs/cli.md @@ -116,3 +116,13 @@ Both flags route through `ado-aw`'s `discover_ado_aw_pipelines` machinery, which - `--poll-interval ` - Polling period when `--wait` is set (default 10). - `--timeout ` - Hard cap on the polling loop when `--wait` is set (default 1800). - `--dry-run` - Print the planned `templateParameters` body without calling the ADO API. + +- `audit ` - Audit a single Azure DevOps build: download the known stage artifacts, run the audit analyzers, and render a structured console report or `AuditData` JSON. + - `-o, --output ` - Output directory for downloaded artifacts and reports. Defaults to `./logs`; the run is stored under `/build-/`. + - `--json` - Emit machine-readable JSON (`AuditData`) instead of the console report. Suppresses the trailing `Audit complete` stderr line. + - `--org ` - Override: Azure DevOps organization (used when the input is a bare build ID). Full build URLs provide the host / org directly. + - `--project ` - Override: Azure DevOps project name (used when the input is a bare build ID). Full build URLs provide the project directly. + - `--pat ` / `AZURE_DEVOPS_EXT_PAT` env var - PAT for ADO API authentication (Azure CLI fallback if omitted). + - `--artifacts ` - Restrict download + analysis to the named sets: `agent`, `detection`, `safe-outputs` (`safe_outputs` is also accepted). Defaults to all three. + - `--no-cache` - Ignore `/build-/run-summary.json` and re-process the build. + - See [`audit.md`](audit.md) for accepted build-reference formats, output layout, cache semantics, and the `AuditData` report shape. diff --git a/prompts/debug-ado-agentic-workflow.md b/prompts/debug-ado-agentic-workflow.md index 41bff089..d2315422 100644 --- a/prompts/debug-ado-agentic-workflow.md +++ b/prompts/debug-ado-agentic-workflow.md @@ -52,10 +52,33 @@ You need minimal context from the user: - Error messages or log snippets from the failing step - The agent source `.md` file (or path) and the compiled `.lock.yml` (or path) +**Fastest first move when a build ID or URL is available:** run `ado-aw audit --json`. It downloads the build's artifacts, runs every analyzer (firewall, MCP gateway, OTel, safe outputs, detection verdict, timeline, missing tools/data/noops), and emits a structured JSON report you can read directly — much faster than paging through raw logs. The audit caches its results under `./logs/build-/run-summary.json` so re-running is free. + ### Step 2: Investigate If Azure DevOps MCP pipeline tools are available, follow this sequence: +#### 2a-prime. Run `ado-aw audit` (when you have local CLI access) + +If you can run `ado-aw` locally and have the build ID: + +```bash +ado-aw audit --json > audit.json +``` + +The output JSON contains the full `AuditData` (see [What `ado-aw audit` extracts](#what-ado-aw-audit-extracts) below). Map each section to the stage that produced it: + +- `overview` / `metrics` / `engine_config` / `performance_metrics` → Agent-stage runtime characteristics +- `firewall_analysis` / `policy_analysis` → Agent-stage AWF network behavior +- `mcp_server_health` / `mcp_tool_usage` / `mcp_failures` → Agent-stage MCP gateway behavior +- `safe_output_summary` / `safe_output_execution` / `rejected_safe_outputs` / `created_items` → cross-stage proposal → detection → execution trace +- `detection_analysis` → Detection-stage threat-analysis verdict +- `missing_tools` / `missing_data` / `noops` → agent-self-reported signals +- `jobs` → ADO build timeline (use this to see which stage failed) +- `key_findings` / `recommendations` → heuristic summaries (severity high/critical findings are usually the root-cause signal) + +If the CLI is not available, fall through to the MCP-based steps below. + #### 2a. Find the Pipeline Definition Use `mcp_ado_pipelines_get_build_definitions` to locate the pipeline by name or definition ID. @@ -110,6 +133,16 @@ This is often the fastest path to root cause for regressions: - Pipeline variables or service connection configuration - Pool or agent image configuration +When the future `ado-aw audit ` diff mode is not yet available, the lightweight stand-in is: + +```bash +ado-aw audit --json > base.json +ado-aw audit --json > comp.json +diff <(jq -S . base.json) <(jq -S . comp.json) | less +``` + +This won't surface domain/MCP-tool diffs as cleanly as a structured diff, but it does highlight changes in `key_findings`, `metrics`, `mcp_failures`, `firewall_analysis.denied_count`, and the per-item `safe_output_execution`. + #### 2g. Check Local Files (if accessible) If you have access to the user's local repository: @@ -442,6 +475,38 @@ If downloads fail: --- +## What `ado-aw audit` extracts + +| Section | What it contains / Source | +|---|---| +| `overview` | High-level build and pipeline metadata from Azure DevOps APIs, timeline data, and `staging/aw_info.json`. | +| `task_domain` | Task-domain classification inferred by audit heuristics from the run's prompts and outputs. | +| `behavior_fingerprint` | Behavior fingerprint signals derived from analyzer heuristics over the run. | +| `agentic_assessments` | Higher-level agentic assessments synthesized by the audit. | +| `metrics` | Aggregate numeric metrics derived from OTel and audit processing. | +| `key_findings` | Important findings synthesized from analyzer output. | +| `recommendations` | Recommended next actions derived from the audit findings. | +| `performance_metrics` | Derived performance metrics computed from token, cost, and tool-usage data. | +| `engine_config` | Engine configuration captured from compiled metadata and runtime emission. | +| `safe_output_summary` | Rollup of proposed, executed, and dropped safe outputs for the build. | +| `safe_output_execution` | Per-item safe-output execution outcomes emitted by the ADO SafeOutputs stage. | +| `rejected_safe_outputs` | Aggregate rollup of safe outputs rejected before or during execution. | +| `detection_analysis` | Threat-detection verdict information from `analyzed_outputs_`. | +| `mcp_server_health` | MCP server reliability and call health derived from gateway logs. | +| `jobs` | Job-level status data derived from the Azure DevOps build timeline. | +| `downloaded_files` | Files downloaded while assembling the audit input set. | +| `missing_tools` | Missing-tool reports captured from safe-output or MCP artifacts. | +| `missing_data` | Missing-data reports captured from safe-output or MCP artifacts. | +| `noops` | No-op reports emitted by runtime tools during the build. | +| `mcp_failures` | MCP failure reports derived from gateway or tool execution artifacts. | +| `firewall_analysis` | Firewall-domain analysis derived from AWF firewall logs. | +| `policy_analysis` | Policy-rule analysis derived from AWF policy artifacts. | +| `errors` | Non-fatal or fatal errors encountered while auditing or discovered in artifacts. | +| `warnings` | Warning rows surfaced during audit processing. | +| `tool_usage` | High-level tool-usage rollups derived from runtime telemetry. | +| `mcp_tool_usage` | MCP-specific tool-usage rollups derived from MCP gateway logs. | +| `created_items` | Created external items reported by successful safe-output execution. | + ## Diagnostic Report Template **Redact secrets before including any log content** — tokens, PATs, bearer headers, SAS URLs, service connection identifiers, private repo URLs, internal hostnames, customer data. Summarize redacted sections instead of quoting them. Use `Unknown` for values you couldn't obtain. diff --git a/src/ado/mod.rs b/src/ado/mod.rs index f27f8859..86f2d487 100644 --- a/src/ado/mod.rs +++ b/src/ado/mod.rs @@ -12,6 +12,7 @@ use anyhow::{Context, Result}; use log::{debug, info, warn}; use serde::Deserialize; +use std::io::Write; use std::path::Path; use crate::detect; @@ -29,20 +30,30 @@ pub async fn try_azure_cli_token() -> Result { let output = if cfg!(windows) { tokio::process::Command::new("cmd") .args([ - "/C", "az", "account", "get-access-token", - "--resource", ADO_RESOURCE_ID, - "--query", "accessToken", - "-o", "tsv", + "/C", + "az", + "account", + "get-access-token", + "--resource", + ADO_RESOURCE_ID, + "--query", + "accessToken", + "-o", + "tsv", ]) .output() .await } else { tokio::process::Command::new("az") .args([ - "account", "get-access-token", - "--resource", ADO_RESOURCE_ID, - "--query", "accessToken", - "-o", "tsv", + "account", + "get-access-token", + "--resource", + ADO_RESOURCE_ID, + "--query", + "accessToken", + "-o", + "tsv", ]) .output() .await @@ -119,8 +130,7 @@ pub fn parse_ado_remote(remote_url: &str) -> Result { // HTTPS format: https://dev.azure.com/{org}/{project}/_git/{repo} if url.contains("dev.azure.com") { - let url_parsed = - url::Url::parse(url).with_context(|| format!("Invalid URL: {}", url))?; + let url_parsed = url::Url::parse(url).with_context(|| format!("Invalid URL: {}", url))?; let segments: Vec<&str> = url_parsed .path_segments() .map(|s| s.collect()) @@ -139,8 +149,7 @@ pub fn parse_ado_remote(remote_url: &str) -> Result { // Legacy format: https://{org}.visualstudio.com/{project}/_git/{repo} if url.contains(".visualstudio.com") { - let url_parsed = - url::Url::parse(url).with_context(|| format!("Invalid URL: {}", url))?; + let url_parsed = url::Url::parse(url).with_context(|| format!("Invalid URL: {}", url))?; let host = url_parsed.host_str().unwrap_or(""); let org = host.strip_suffix(".visualstudio.com").unwrap_or(host); let segments: Vec<&str> = url_parsed @@ -319,7 +328,8 @@ pub async fn list_definitions( debug!("Listing definitions: {}", base_url); - let mut request = auth.apply(client.get(&base_url)) + let mut request = auth + .apply(client.get(&base_url)) .query(&[("includeAllProperties", "true"), ("api-version", "7.1")]); if let Some(ref token) = continuation_token { request = request.query(&[("continuationToken", token)]); @@ -347,16 +357,18 @@ pub async fn list_definitions( .and_then(|v| v.to_str().ok()) .map(|s| s.to_string()); - let body = resp.text().await.context("Failed to read definitions response body")?; - let response: DefinitionListResponse = serde_json::from_str(&body) - .with_context(|| { - let snippet: String = body.chars().take(500).collect(); - format!( - "Failed to parse definitions response as JSON. \ + let body = resp + .text() + .await + .context("Failed to read definitions response body")?; + let response: DefinitionListResponse = serde_json::from_str(&body).with_context(|| { + let snippet: String = body.chars().take(500).collect(); + format!( + "Failed to parse definitions response as JSON. \ This usually means the PAT is invalid or expired. \ Response body (first 500 chars):\n{snippet}" - ) - })?; + ) + })?; all_definitions.extend(response.value); @@ -387,7 +399,10 @@ pub enum FuzzyMatchResult { /// Checks if any definition name contains the agent name (with hyphens also /// tried as spaces). Returns `Single(index)` for an unambiguous match, /// `Ambiguous` when multiple definitions match, or `None` when nothing matches. -pub fn fuzzy_match_by_name(agent_name: &str, definitions: &[DefinitionSummary]) -> FuzzyMatchResult { +pub fn fuzzy_match_by_name( + agent_name: &str, + definitions: &[DefinitionSummary], +) -> FuzzyMatchResult { if agent_name.is_empty() { return FuzzyMatchResult::None; } @@ -442,7 +457,8 @@ pub fn match_definitions_in( .map(|f| normalize_ado_yaml_path(f)); debug!( "ADO definition: '{}' (id={}) yamlFilename={:?} normalized={:?}", - def.name, def.id, + def.name, + def.id, def.process.as_ref().and_then(|p| p.yaml_filename.as_ref()), yaml_path ); @@ -565,7 +581,10 @@ pub async fn get_definition_name( let resp = match auth.apply(client.get(&url)).send().await { Ok(r) => r, Err(e) => { - debug!("Failed to fetch name for definition {}: {:?}", definition_id, e); + debug!( + "Failed to fetch name for definition {}: {:?}", + definition_id, e + ); return None; } }; @@ -582,7 +601,10 @@ pub async fn get_definition_name( let body: serde_json::Value = match resp.json().await { Ok(b) => b, Err(e) => { - debug!("Failed to parse response for definition {}: {:?}", definition_id, e); + debug!( + "Failed to parse response for definition {}: {:?}", + definition_id, e + ); return None; } }; @@ -604,7 +626,10 @@ pub async fn get_definition_name( /// while the literal mask would overwrite it. Normalize the masked form /// before mutating the definition and PUTting it back. pub(crate) fn normalize_masked_secret_variable_values(definition: &mut serde_json::Value) { - let Some(vars) = definition.get_mut("variables").and_then(|v| v.as_object_mut()) else { + let Some(vars) = definition + .get_mut("variables") + .and_then(|v| v.as_object_mut()) + else { return; }; @@ -631,7 +656,12 @@ pub async fn update_pipeline_variable( ) -> Result<()> { let mut definition = get_definition_full(client, ctx, auth, definition_id) .await - .with_context(|| format!("Failed to fetch definition {} before updating", definition_id))?; + .with_context(|| { + format!( + "Failed to fetch definition {} before updating", + definition_id + ) + })?; normalize_masked_secret_variable_values(&mut definition); // Ensure variables object exists @@ -703,7 +733,10 @@ pub async fn resolve_auth(pat: Option<&str>) -> Result { Ok(AdoAuth::Bearer(token)) } Err(e) => { - warn!("Azure CLI auth failed: {:#}. Falling back to interactive prompt.", e); + warn!( + "Azure CLI auth failed: {:#}. Falling back to interactive prompt.", + e + ); let pat = inquire::Password::new("Enter your Azure DevOps PAT:") .without_confirmation() .prompt() @@ -753,21 +786,18 @@ pub async fn resolve_ado_context( org: Option<&str>, project: Option<&str>, ) -> Result { - let remote_ctx = get_git_remote_url(repo_path) - .await - .ok() - .and_then(|url| { - info!("Git remote: {}", url); - match parse_ado_remote(&url) { - Ok(ctx) => Some(ctx), - Err(e) => { - debug!("Git remote is not an ADO URL: {:#}", e); - None - } + let remote_ctx = get_git_remote_url(repo_path).await.ok().and_then(|url| { + info!("Git remote: {}", url); + match parse_ado_remote(&url) { + Ok(ctx) => Some(ctx), + Err(e) => { + debug!("Git remote is not an ADO URL: {:#}", e); + None } - }); + } + }); - match (remote_ctx, org, project) { + let mut ctx = match (remote_ctx, org, project) { // Git remote parsed — apply overrides (Some(mut ctx), org, project) => { if let Some(org) = org { @@ -776,16 +806,16 @@ pub async fn resolve_ado_context( if let Some(project) = project { ctx.project = project.to_string(); } - Ok(ctx) + ctx } // No usable remote — require explicit --org and --project (None, Some(org), Some(project)) => { info!("No ADO git remote; using --org and --project"); - Ok(AdoContext { + AdoContext { org_url: normalize_org_url(org), project: project.to_string(), repo_name: String::new(), - }) + } } (None, _, _) => { anyhow::bail!( @@ -793,6 +823,19 @@ pub async fn resolve_ado_context( When using --definition-ids outside an ADO repo, both --org and --project are required." ); } + }; + + apply_test_org_url_override(&mut ctx); + Ok(ctx) +} + +#[doc(hidden)] +fn apply_test_org_url_override(ctx: &mut AdoContext) { + if let Ok(org_url) = std::env::var("ADO_AW_TEST_ORG_URL") { + let org_url = org_url.trim().trim_end_matches('/'); + if !org_url.is_empty() { + ctx.org_url = org_url.to_string(); + } } } @@ -1016,7 +1059,10 @@ pub async fn patch_queue_status( id ); - debug!("PUT definition {} with queueStatus={}: {}", id, status, put_url); + debug!( + "PUT definition {} with queueStatus={}: {}", + id, status, put_url + ); let resp = auth .apply(client.put(&put_url)) @@ -1245,11 +1291,12 @@ pub async fn get_latest_build( debug!("GET latest build for definition {}: {}", definition_id, url); - let resp = auth - .apply(client.get(&url)) - .send() - .await - .with_context(|| format!("Failed to fetch latest build for definition {}", definition_id))?; + let resp = auth.apply(client.get(&url)).send().await.with_context(|| { + format!( + "Failed to fetch latest build for definition {}", + definition_id + ) + })?; let status = resp.status(); if !status.is_success() { @@ -1274,6 +1321,288 @@ pub async fn get_latest_build( .cloned()) } +/// A single build artifact returned by the ADO REST API. +/// +/// Shape comes from `GET _apis/build/builds/{buildId}/artifacts`. +/// We surface only the fields the audit consumes; unknown fields are +/// dropped on deserialization. +#[derive(Debug, Clone, Deserialize)] +#[serde(rename_all = "camelCase")] +pub struct BuildArtifact { + pub id: u64, + pub name: String, + pub source: Option, + pub resource: BuildArtifactResource, +} + +#[derive(Debug, Clone, Deserialize)] +#[serde(rename_all = "camelCase")] +pub struct BuildArtifactResource { + /// "PipelineArtifact" for `- publish:` steps, "Container" for legacy. + #[serde(rename = "type")] + pub kind: String, + pub data: Option, + pub properties: Option, + pub url: Option, + pub download_url: Option, +} + +/// List all artifacts published by a build. +/// +/// Calls `GET /_apis/build/builds/{buildId}/artifacts?api-version=7.1`. +/// Returns the full `value` array — callers filter by `name` themselves. +/// +/// Returns an empty vec when the build has not published any artifacts +/// (HTTP 200 with `value: []`) — that is NOT an error. +/// +/// Mirrors the style of `get_build` (status-code check, body capture, +/// debug! logging). +pub async fn list_build_artifacts( + client: &reqwest::Client, + ctx: &AdoContext, + auth: &AdoAuth, + build_id: u64, +) -> Result> { + #[derive(Deserialize)] + struct BuildArtifactListResponse { + value: Vec, + } + + let url = format!( + "{}/{}/_apis/build/builds/{}/artifacts?api-version=7.1", + ctx.org_url.trim_end_matches('/'), + percent_encoding::utf8_percent_encode(&ctx.project, PATH_SEGMENT), + build_id + ); + + debug!("GET build artifacts for build {}: {}", build_id, url); + + let resp = auth + .apply(client.get(&url)) + .send() + .await + .with_context(|| format!("Failed to fetch build artifacts for build {}", build_id))?; + + let status = resp.status(); + if !status.is_success() { + let body = resp.text().await.unwrap_or_default(); + if status == reqwest::StatusCode::UNAUTHORIZED || status == reqwest::StatusCode::FORBIDDEN { + return Err(anyhow::anyhow!( + "ADO API returned {} when listing build artifacts for build {}: {}. This call requires PAT scopes Build (Read) and Build Artifacts (Read). As a manual alternative, try `az pipelines runs artifact list --run-id {}`.", + status, + build_id, + body, + build_id + )); + } + anyhow::bail!( + "ADO API returned {} when listing build artifacts for build {}: {}", + status, + build_id, + body + ); + } + + let body = resp.text().await.with_context(|| { + format!( + "Failed to read build artifacts response body for build {}", + build_id + ) + })?; + let response: BuildArtifactListResponse = serde_json::from_str(&body).with_context(|| { + let snippet: String = body.chars().take(500).collect(); + format!( + "Failed to parse build artifacts response for build {} as JSON. Response body (first 500 chars):\n{snippet}", + build_id + ) + })?; + + Ok(response.value) +} + +/// Download a single build artifact and unzip it into `dest_dir`. +/// +/// ADO PipelineArtifacts are delivered as a zip; this helper follows the +/// signed `downloadUrl`, streams the response, and extracts it under +/// `dest_dir/{artifact.name}/...`. +/// +/// On HTTP 401/403, returns a structured error whose message lists the +/// required PAT scopes (`Build (Read)`, `Build Artifacts (Read)`) and +/// suggests the `az pipelines runs artifact download --run-id +/// --artifact-name --path ` escape hatch. +/// +/// If `artifact.resource.download_url` is `None`, returns an error +/// explaining that the artifact resource type is not downloadable +/// (legacy `Container` artifacts use a different endpoint we do not +/// support yet). +pub async fn download_build_artifact( + client: &reqwest::Client, + auth: &AdoAuth, + artifact: &BuildArtifact, + dest_dir: &std::path::Path, +) -> Result<()> { + let download_url = artifact.resource.download_url.as_deref().with_context(|| { + format!( + "Build artifact '{}' has no download URL. Artifact resource type '{}' is not downloadable via this helper yet (legacy Container artifacts use a different endpoint).", + artifact.name, + artifact.resource.kind + ) + })?; + + std::fs::create_dir_all(dest_dir).with_context(|| { + format!( + "Failed to create artifact destination directory '{}'", + dest_dir.display() + ) + })?; + + let artifact_dir = dest_dir.join(&artifact.name); + if artifact_dir.exists() { + std::fs::remove_dir_all(&artifact_dir).with_context(|| { + format!( + "Failed to remove existing artifact directory '{}'", + artifact_dir.display() + ) + })?; + } + std::fs::create_dir_all(&artifact_dir).with_context(|| { + format!( + "Failed to create artifact extraction directory '{}'", + artifact_dir.display() + ) + })?; + + debug!( + "Downloading build artifact '{}' from {}", + artifact.name, download_url + ); + + let mut resp = client + .get(download_url) + .send() + .await + .with_context(|| format!("Failed to download build artifact '{}'", artifact.name))?; + + let status = resp.status(); + if !status.is_success() { + let body = resp.text().await.unwrap_or_default(); + if status == reqwest::StatusCode::UNAUTHORIZED || status == reqwest::StatusCode::FORBIDDEN { + let run_id_hint = artifact.source.as_deref().unwrap_or(""); + let _ = auth; + return Err(anyhow::anyhow!( + "ADO API returned {} when downloading build artifact '{}': {}. This call requires PAT scopes Build (Read) and Build Artifacts (Read). As a manual alternative, try `az pipelines runs artifact download --run-id {} --artifact-name {} --path {}`.", + status, + artifact.name, + body, + run_id_hint, + artifact.name, + dest_dir.display() + )); + } + anyhow::bail!( + "ADO API returned {} when downloading build artifact '{}': {}", + status, + artifact.name, + body + ); + } + + let mut temp_zip = tempfile::Builder::new() + .prefix(&format!(".tmp-{}-", artifact.id)) + .suffix(".zip") + .tempfile_in(dest_dir) + .with_context(|| { + format!( + "Failed to create temp zip for build artifact '{}'", + artifact.name + ) + })?; + + while let Some(chunk) = resp + .chunk() + .await + .with_context(|| format!("Failed to stream build artifact '{}'", artifact.name))? + { + temp_zip.write_all(&chunk).with_context(|| { + format!( + "Failed to write temp zip for build artifact '{}'", + artifact.name + ) + })?; + } + temp_zip.flush().with_context(|| { + format!( + "Failed to flush temp zip for build artifact '{}'", + artifact.name + ) + })?; + + let archive_file = temp_zip.reopen().with_context(|| { + format!( + "Failed to reopen temp zip for build artifact '{}'", + artifact.name + ) + })?; + let mut archive = zip::ZipArchive::new(archive_file).with_context(|| { + format!( + "Failed to read downloaded zip for build artifact '{}'", + artifact.name + ) + })?; + + for index in 0..archive.len() { + let mut entry = archive.by_index(index).with_context(|| { + format!( + "Failed to read zip entry {} from build artifact '{}'", + index, artifact.name + ) + })?; + let entry_name = entry.name().to_string(); + let relative_path = entry + .enclosed_name() + .map(|path| path.to_owned()) + .ok_or_else(|| { + anyhow::anyhow!( + "Refusing to extract unsafe path '{}' from build artifact '{}'", + entry_name, + artifact.name + ) + })?; + let output_path = artifact_dir.join(&relative_path); + + if entry.is_dir() { + std::fs::create_dir_all(&output_path).with_context(|| { + format!( + "Failed to create extracted directory '{}'", + output_path.display() + ) + })?; + continue; + } + + if let Some(parent) = output_path.parent() { + std::fs::create_dir_all(parent).with_context(|| { + format!("Failed to create parent directory '{}'", parent.display()) + })?; + } + + let mut output = std::fs::File::create(&output_path).with_context(|| { + format!( + "Failed to create extracted file '{}'", + output_path.display() + ) + })?; + std::io::copy(&mut entry, &mut output).with_context(|| { + format!( + "Failed to extract '{}' from build artifact '{}'", + entry_name, artifact.name + ) + })?; + } + + Ok(()) +} + #[cfg(test)] mod tests { use super::*; @@ -1361,10 +1690,7 @@ mod tests { #[test] fn normalize_org_url_accepts_bare_name() { - assert_eq!( - normalize_org_url("myorg"), - "https://dev.azure.com/myorg" - ); + assert_eq!(normalize_org_url("myorg"), "https://dev.azure.com/myorg"); } #[test] @@ -1618,9 +1944,76 @@ mod tests { #[test] fn path_segment_handles_non_ascii() { - let encoded = - percent_encoding::utf8_percent_encode("café-π", PATH_SEGMENT).to_string(); + let encoded = percent_encoding::utf8_percent_encode("café-π", PATH_SEGMENT).to_string(); // Non-ASCII bytes get encoded per UTF-8. assert_eq!(encoded, "caf%C3%A9-%CF%80"); } + + #[test] + fn build_artifact_deserializes_pipeline_artifact_response() { + #[derive(Deserialize)] + struct BuildArtifactListResponse { + value: Vec, + } + + let raw = serde_json::json!({ + "count": 1, + "value": [ + { + "id": 1, + "name": "agent_outputs_42", + "source": "42", + "resource": { + "type": "PipelineArtifact", + "data": "#/123/agent_outputs_42", + "url": "https://dev.azure.com/example/project/_apis/build/builds/42/artifacts?artifactName=agent_outputs_42", + "downloadUrl": "https://example.invalid/download/agent_outputs_42.zip" + } + } + ] + }); + + let response: BuildArtifactListResponse = serde_json::from_value(raw).unwrap(); + let artifact = &response.value[0]; + assert_eq!(artifact.id, 1); + assert_eq!(artifact.name, "agent_outputs_42"); + assert_eq!(artifact.source.as_deref(), Some("42")); + assert_eq!(artifact.resource.kind, "PipelineArtifact"); + assert_eq!( + artifact.resource.download_url.as_deref(), + Some("https://example.invalid/download/agent_outputs_42.zip") + ); + } + + #[tokio::test] + async fn download_build_artifact_errors_when_download_url_is_missing() { + let artifact = BuildArtifact { + id: 1, + name: "safe_outputs".to_string(), + source: Some("42".to_string()), + resource: BuildArtifactResource { + kind: "Container".to_string(), + data: None, + properties: None, + url: None, + download_url: None, + }, + }; + let client = reqwest::Client::new(); + let temp_dir = tempfile::tempdir().unwrap(); + + let error = download_build_artifact( + &client, + &AdoAuth::Pat("test".to_string()), + &artifact, + temp_dir.path(), + ) + .await + .unwrap_err(); + let message = error.to_string(); + assert!( + message.contains("no download URL") || message.contains("not downloadable"), + "unexpected error message: {message}" + ); + } } diff --git a/src/audit/analyzers/detection.rs b/src/audit/analyzers/detection.rs new file mode 100644 index 00000000..f5d82600 --- /dev/null +++ b/src/audit/analyzers/detection.rs @@ -0,0 +1,320 @@ +use anyhow::Result; +use log::{debug, warn}; +use serde_json::Value; +use std::io::ErrorKind; +use std::path::{Path, PathBuf}; + +use crate::audit::model::{DetectionAnalysis, DetectionThreats}; + +/// Read the detection verdict from `analyzed_outputs_/threat-analysis.json`. +/// +/// Returns `Ok(None)` when the file is absent (detection didn't run, or +/// failed before writing the verdict). +/// +/// Returns `Ok(Some(DetectionAnalysis { ..., safe_to_process: }))` +/// where `safe_to_process = !(prompt_injection || secret_leak || malicious_patch)`. +/// The `verdict_path` field is set to the relative path of the verdict +/// file from `download_root` so the audit renderer can link to it. +pub async fn analyze_detection(download_root: &Path) -> Result> { + let Some(verdict_path) = find_verdict_path(download_root).await else { + return Ok(None); + }; + + let verdict_bytes = match tokio::fs::read(&verdict_path).await { + Ok(bytes) => bytes, + Err(err) if err.kind() == ErrorKind::NotFound => return Ok(None), + Err(err) => { + warn!( + "Failed to read detection verdict {}: {err}", + verdict_path.display() + ); + return Ok(None); + } + }; + + let verdict_json: Value = match serde_json::from_slice(&verdict_bytes) { + Ok(value) => value, + Err(err) => { + warn!( + "Failed to parse detection verdict {} as JSON: {err}", + verdict_path.display() + ); + return Ok(None); + } + }; + + let prompt_injection = extract_bool(&verdict_json, "prompt_injection"); + let secret_leak = extract_bool(&verdict_json, "secret_leak"); + let malicious_patch = extract_bool(&verdict_json, "malicious_patch"); + let reasons = extract_reasons(&verdict_json, &verdict_path); + let safe_to_process = !(prompt_injection || secret_leak || malicious_patch); + let verdict_path = verdict_path + .strip_prefix(download_root) + .ok() + .map(|path| path.to_string_lossy().into_owned()); + + Ok(Some(DetectionAnalysis { + threats: DetectionThreats { + prompt_injection, + secret_leak, + malicious_patch, + }, + reasons, + safe_to_process, + verdict_path, + })) +} + +async fn find_verdict_path(download_root: &Path) -> Option { + let mut entries = match tokio::fs::read_dir(download_root).await { + Ok(entries) => entries, + Err(err) if err.kind() == ErrorKind::NotFound => return None, + Err(err) => { + warn!( + "Failed to read detection download root {}: {err}", + download_root.display() + ); + return None; + } + }; + + let mut latest_dir: Option<(String, PathBuf)> = None; + + loop { + let entry = match entries.next_entry().await { + Ok(Some(entry)) => entry, + Ok(None) => break, + Err(err) => { + warn!( + "Failed to enumerate detection download root {}: {err}", + download_root.display() + ); + return None; + } + }; + + let file_type = match entry.file_type().await { + Ok(file_type) => file_type, + Err(err) => { + debug!( + "Skipping detection artifact entry {} after file-type error: {err}", + entry.path().display() + ); + continue; + } + }; + + if !file_type.is_dir() { + continue; + } + + let name = entry.file_name().to_string_lossy().into_owned(); + if !name.starts_with("analyzed_outputs_") { + continue; + } + + let path = entry.path(); + match &latest_dir { + Some((current_name, _)) if name <= *current_name => {} + _ => latest_dir = Some((name, path)), + } + } + + latest_dir.map(|(_, dir)| dir.join("threat-analysis.json")) +} + +fn extract_bool(v: &Value, key: &str) -> bool { + match v.get(key) { + Some(Value::Bool(b)) => *b, + Some(Value::String(s)) => s.eq_ignore_ascii_case("true"), + _ => false, + } +} + +fn extract_reasons(v: &Value, verdict_path: &Path) -> Vec { + match v.get("reasons") { + None | Some(Value::Null) => Vec::new(), + Some(Value::Array(reasons)) => reasons + .iter() + .map(|reason| match reason { + Value::String(reason) => reason.clone(), + other => { + debug!( + "Detection verdict {} contains non-string reason entry: {:?}", + verdict_path.display(), + other + ); + String::new() + } + }) + .collect(), + Some(other) => { + debug!( + "Detection verdict {} contains non-array reasons field: {:?}", + verdict_path.display(), + other + ); + Vec::new() + } + } +} + +#[cfg(test)] +mod tests { + use super::analyze_detection; + use crate::audit::model::DetectionThreats; + use tempfile::TempDir; + + fn expected_verdict_path(dir_name: &str) -> String { + std::path::Path::new(dir_name) + .join("threat-analysis.json") + .to_string_lossy() + .into_owned() + } + + async fn create_analyzed_outputs_dir(temp_dir: &TempDir, dir_name: &str) { + tokio::fs::create_dir_all(temp_dir.path().join(dir_name)) + .await + .unwrap(); + } + + async fn write_verdict(temp_dir: &TempDir, dir_name: &str, contents: &str) { + let dir = temp_dir.path().join(dir_name); + tokio::fs::create_dir_all(&dir).await.unwrap(); + tokio::fs::write(dir.join("threat-analysis.json"), contents) + .await + .unwrap(); + } + + #[tokio::test] + async fn returns_none_when_download_root_is_absent() { + let temp_dir = TempDir::new().unwrap(); + let missing_root = temp_dir.path().join("missing"); + + let analysis = analyze_detection(&missing_root).await.unwrap(); + + assert!(analysis.is_none()); + } + + #[tokio::test] + async fn returns_none_when_verdict_file_is_missing() { + let temp_dir = TempDir::new().unwrap(); + create_analyzed_outputs_dir(&temp_dir, "analyzed_outputs_42").await; + + let analysis = analyze_detection(temp_dir.path()).await.unwrap(); + + assert!(analysis.is_none()); + } + + #[tokio::test] + async fn parses_clean_verdict() { + let temp_dir = TempDir::new().unwrap(); + write_verdict( + &temp_dir, + "analyzed_outputs_42", + r#"{"prompt_injection":false,"secret_leak":false,"malicious_patch":false,"reasons":[]}"#, + ) + .await; + + let analysis = analyze_detection(temp_dir.path()).await.unwrap().unwrap(); + + assert_eq!(analysis.threats, DetectionThreats::default()); + assert!(analysis.reasons.is_empty()); + assert!(analysis.safe_to_process); + assert_eq!( + analysis.verdict_path, + Some(expected_verdict_path("analyzed_outputs_42")) + ); + } + + #[tokio::test] + async fn marks_run_unsafe_when_any_threat_is_true() { + let temp_dir = TempDir::new().unwrap(); + write_verdict( + &temp_dir, + "analyzed_outputs_42", + r#"{"prompt_injection":true,"secret_leak":false,"malicious_patch":false,"reasons":["prompt injection detected"]}"#, + ) + .await; + + let analysis = analyze_detection(temp_dir.path()).await.unwrap().unwrap(); + + assert!(!analysis.safe_to_process); + assert!(analysis.threats.prompt_injection); + assert!(!analysis.threats.secret_leak); + assert!(!analysis.threats.malicious_patch); + assert_eq!( + analysis.reasons, + vec![String::from("prompt injection detected")] + ); + } + + #[tokio::test] + async fn accepts_string_booleans() { + let temp_dir = TempDir::new().unwrap(); + write_verdict( + &temp_dir, + "analyzed_outputs_42", + r#"{"prompt_injection":"true","secret_leak":"false","malicious_patch":"false","reasons":[]}"#, + ) + .await; + + let analysis = analyze_detection(temp_dir.path()).await.unwrap().unwrap(); + + assert!(analysis.threats.prompt_injection); + assert!(!analysis.threats.secret_leak); + assert!(!analysis.threats.malicious_patch); + assert!(!analysis.safe_to_process); + } + + #[tokio::test] + async fn defaults_missing_reasons_to_empty() { + let temp_dir = TempDir::new().unwrap(); + write_verdict( + &temp_dir, + "analyzed_outputs_42", + r#"{"prompt_injection":false,"secret_leak":false,"malicious_patch":false}"#, + ) + .await; + + let analysis = analyze_detection(temp_dir.path()).await.unwrap().unwrap(); + + assert!(analysis.reasons.is_empty()); + } + + #[tokio::test] + async fn returns_none_for_malformed_json() { + let temp_dir = TempDir::new().unwrap(); + write_verdict(&temp_dir, "analyzed_outputs_42", "{not valid json").await; + + let analysis = analyze_detection(temp_dir.path()).await.unwrap(); + + assert!(analysis.is_none()); + } + + #[tokio::test] + async fn uses_lexicographically_last_analyzed_outputs_directory() { + let temp_dir = TempDir::new().unwrap(); + write_verdict( + &temp_dir, + "analyzed_outputs_41", + r#"{"prompt_injection":false,"secret_leak":false,"malicious_patch":false,"reasons":[]}"#, + ) + .await; + write_verdict( + &temp_dir, + "analyzed_outputs_42", + r#"{"prompt_injection":false,"secret_leak":false,"malicious_patch":true,"reasons":["malicious patch detected"]}"#, + ) + .await; + + let analysis = analyze_detection(temp_dir.path()).await.unwrap().unwrap(); + + assert!(!analysis.safe_to_process); + assert!(analysis.threats.malicious_patch); + assert_eq!( + analysis.verdict_path, + Some(expected_verdict_path("analyzed_outputs_42")) + ); + } +} diff --git a/src/audit/analyzers/firewall.rs b/src/audit/analyzers/firewall.rs new file mode 100644 index 00000000..0baea89e --- /dev/null +++ b/src/audit/analyzers/firewall.rs @@ -0,0 +1,387 @@ +//! AWF firewall log analyzer for `ado-aw audit`. + +use anyhow::Context; +use serde_json::Value; +use std::collections::BTreeMap; +use std::io::ErrorKind; +use std::path::Path; + +use crate::audit::model::{DomainStat, FirewallAnalysis}; + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +enum Verdict { + Allowed, + Denied, + Unknown, +} + +#[derive(Debug, Default)] +struct DomainAccumulator { + request_count: u64, + allowed_count: u64, + denied_count: u64, + first_seen: Option, + last_seen: Option, +} + +impl DomainAccumulator { + fn record(&mut self, verdict: Verdict, timestamp: Option<&str>) { + self.request_count += 1; + match verdict { + Verdict::Allowed => self.allowed_count += 1, + Verdict::Denied => self.denied_count += 1, + Verdict::Unknown => {} + } + update_first_seen(&mut self.first_seen, timestamp); + update_last_seen(&mut self.last_seen, timestamp); + } + + fn status(&self) -> String { + if self.request_count > 0 && self.allowed_count == self.request_count { + "allowed".to_string() + } else if self.request_count > 0 && self.denied_count == self.request_count { + "denied".to_string() + } else { + "mixed".to_string() + } + } +} + +/// Analyze AWF firewall logs in `/logs/firewall/`. +/// +/// Scans every `*.jsonl` / `*.log` file in the directory, parses each +/// line as JSON, and aggregates per-domain request counts + allow/deny +/// verdicts. +/// +/// Returns `Ok(None)` if the directory does not exist (the agent ran +/// without AWF, e.g. on a target that does not network-isolate). +/// Returns `Ok(Some(empty))` if the directory exists but contains no +/// recognisable entries — surfacing the empty state lets the audit +/// renderer distinguish "AWF disabled" from "AWF ran but logged nothing". +pub async fn analyze_firewall_logs( + firewall_logs_dir: &std::path::Path, +) -> anyhow::Result> { + match tokio::fs::metadata(firewall_logs_dir).await { + Ok(metadata) => { + anyhow::ensure!( + metadata.is_dir(), + "Firewall logs path is not a directory: {}", + firewall_logs_dir.display() + ); + } + Err(error) if error.kind() == ErrorKind::NotFound => return Ok(None), + Err(error) => { + return Err(error) + .with_context(|| format!("Failed to stat {}", firewall_logs_dir.display())); + } + } + + let mut totals = FirewallAnalysis::default(); + let mut per_domain = BTreeMap::::new(); + let mut entries = tokio::fs::read_dir(firewall_logs_dir) + .await + .with_context(|| format!("Failed to read {}", firewall_logs_dir.display()))?; + + while let Some(entry) = entries + .next_entry() + .await + .with_context(|| format!("Failed to iterate {}", firewall_logs_dir.display()))? + { + let file_type = entry + .file_type() + .await + .with_context(|| format!("Failed to inspect {}", entry.path().display()))?; + if !file_type.is_file() { + continue; + } + + let path = entry.path(); + if !is_firewall_log_file(&path) { + continue; + } + + let contents = tokio::fs::read_to_string(&path) + .await + .with_context(|| format!("Failed to read firewall log {}", path.display()))?; + + for line in contents.lines() { + let trimmed = line.trim(); + if trimmed.is_empty() { + continue; + } + + let value: Value = match serde_json::from_str(trimmed) { + Ok(value) => value, + Err(_) => continue, + }; + + let Some(domain) = extract_string_field(&value, &["host", "domain", "hostname"]) else { + continue; + }; + + let verdict = extract_verdict(&value); + let timestamp = extract_string_field(&value, &["timestamp", "time", "@timestamp"]); + + let domain_entry = per_domain.entry(domain).or_default(); + domain_entry.record(verdict, timestamp.as_deref()); + + match verdict { + Verdict::Allowed => totals.allowed_count += 1, + Verdict::Denied => totals.denied_count += 1, + Verdict::Unknown => {} + } + } + } + + totals.domains = per_domain + .into_iter() + .map(|(domain, stats)| DomainStat { + domain, + status: stats.status(), + request_count: stats.request_count, + first_seen: stats.first_seen, + last_seen: stats.last_seen, + }) + .collect(); + + totals.total_requests = totals + .domains + .iter() + .map(|domain| domain.request_count) + .sum(); + totals.domains.sort_by(|left, right| { + right + .request_count + .cmp(&left.request_count) + .then_with(|| left.domain.cmp(&right.domain)) + }); + + Ok(Some(totals)) +} + +fn is_firewall_log_file(path: &Path) -> bool { + path.extension() + .and_then(|extension| extension.to_str()) + .map(|extension| { + extension.eq_ignore_ascii_case("jsonl") || extension.eq_ignore_ascii_case("log") + }) + .unwrap_or(false) +} + +fn extract_string_field(value: &Value, keys: &[&str]) -> Option { + keys.iter() + .filter_map(|key| value.get(*key).and_then(Value::as_str)) + .map(str::trim) + .find(|candidate| !candidate.is_empty()) + .map(ToOwned::to_owned) +} + +fn extract_verdict(value: &Value) -> Verdict { + let Some(raw_verdict) = extract_string_field(value, &["verdict", "status", "action"]) else { + return Verdict::Unknown; + }; + + match raw_verdict.to_ascii_lowercase().as_str() { + "allow" | "allowed" | "pass" => Verdict::Allowed, + "deny" | "denied" | "block" | "blocked" => Verdict::Denied, + _ => Verdict::Unknown, + } +} + +fn update_first_seen(current: &mut Option, candidate: Option<&str>) { + let Some(candidate) = candidate.filter(|candidate| !candidate.is_empty()) else { + return; + }; + + match current { + Some(existing) if existing.as_str() <= candidate => {} + _ => *current = Some(candidate.to_string()), + } +} + +fn update_last_seen(current: &mut Option, candidate: Option<&str>) { + let Some(candidate) = candidate.filter(|candidate| !candidate.is_empty()) else { + return; + }; + + match current { + Some(existing) if existing.as_str() >= candidate => {} + _ => *current = Some(candidate.to_string()), + } +} + +#[cfg(test)] +mod tests { + use super::*; + use tempfile::TempDir; + + fn expected_mixed_analysis() -> FirewallAnalysis { + FirewallAnalysis { + domains: vec![ + DomainStat { + domain: "api.github.com".to_string(), + status: "allowed".to_string(), + request_count: 2, + first_seen: Some("2026-01-01T00:00:00Z".to_string()), + last_seen: Some("2026-01-01T00:00:02Z".to_string()), + }, + DomainStat { + domain: "evil.example.com".to_string(), + status: "denied".to_string(), + request_count: 2, + first_seen: Some("2026-01-01T00:00:01Z".to_string()), + last_seen: Some("2026-01-01T00:00:03Z".to_string()), + }, + DomainStat { + domain: "unknown-verdict.example".to_string(), + status: "mixed".to_string(), + request_count: 1, + first_seen: Some("2026-01-01T00:00:04Z".to_string()), + last_seen: Some("2026-01-01T00:00:04Z".to_string()), + }, + ], + total_requests: 5, + allowed_count: 2, + denied_count: 2, + } + } + + fn mixed_fixture_lines() -> &'static str { + concat!( + "{\"timestamp\":\"2026-01-01T00:00:00Z\",\"host\":\"api.github.com\",\"verdict\":\"allow\",\"method\":\"GET\",\"url\":\"https://api.github.com/repos/o/r\",\"status_code\":200}\n", + "{\"timestamp\":\"2026-01-01T00:00:02Z\",\"host\":\"api.github.com\",\"verdict\":\"allowed\",\"method\":\"GET\",\"url\":\"https://api.github.com/user\",\"status_code\":200}\n", + "{\"timestamp\":\"2026-01-01T00:00:01Z\",\"host\":\"evil.example.com\",\"verdict\":\"deny\",\"method\":\"CONNECT\",\"url\":\"https://evil.example.com\",\"status_code\":403}\n", + "{\"timestamp\":\"2026-01-01T00:00:03Z\",\"host\":\"evil.example.com\",\"verdict\":\"blocked\",\"method\":\"CONNECT\",\"url\":\"https://evil.example.com/admin\",\"status_code\":403}\n", + "{\"timestamp\":\"2026-01-01T00:00:04Z\",\"host\":\"unknown-verdict.example\",\"verdict\":\"mystery\",\"method\":\"GET\",\"url\":\"https://unknown-verdict.example\",\"status_code\":200}\n" + ) + } + + async fn write_log_file(dir: &Path, name: &str, contents: &str) { + tokio::fs::create_dir_all(dir).await.unwrap(); + tokio::fs::write(dir.join(name), contents).await.unwrap(); + } + + #[tokio::test] + async fn returns_none_when_directory_absent() { + let temp_dir = TempDir::new().unwrap(); + let firewall_dir = temp_dir.path().join("logs").join("firewall"); + + let result = analyze_firewall_logs(&firewall_dir).await.unwrap(); + + assert_eq!(result, None); + } + + #[tokio::test] + async fn returns_empty_analysis_when_directory_exists_but_has_no_entries() { + let temp_dir = TempDir::new().unwrap(); + let firewall_dir = temp_dir.path().join("logs").join("firewall"); + tokio::fs::create_dir_all(&firewall_dir).await.unwrap(); + + let result = analyze_firewall_logs(&firewall_dir).await.unwrap(); + + assert_eq!(result, Some(FirewallAnalysis::default())); + } + + #[tokio::test] + async fn aggregates_mixed_log_fixture() { + let temp_dir = TempDir::new().unwrap(); + let firewall_dir = temp_dir.path().join("logs").join("firewall"); + write_log_file(&firewall_dir, "firewall.jsonl", mixed_fixture_lines()).await; + + let result = analyze_firewall_logs(&firewall_dir).await.unwrap(); + + assert_eq!(result, Some(expected_mixed_analysis())); + } + + #[tokio::test] + async fn aggregates_across_multiple_files() { + let temp_dir = TempDir::new().unwrap(); + let firewall_dir = temp_dir.path().join("logs").join("firewall"); + write_log_file( + &firewall_dir, + "firewall-001.jsonl", + concat!( + "{\"timestamp\":\"2026-01-01T00:00:00Z\",\"host\":\"api.github.com\",\"verdict\":\"allow\"}\n", + "{\"timestamp\":\"2026-01-01T00:00:02Z\",\"host\":\"api.github.com\",\"verdict\":\"allowed\"}\n", + "{\"timestamp\":\"2026-01-01T00:00:01Z\",\"host\":\"evil.example.com\",\"verdict\":\"deny\"}\n" + ), + ) + .await; + write_log_file( + &firewall_dir, + "firewall-002.jsonl", + concat!( + "{\"timestamp\":\"2026-01-01T00:00:03Z\",\"host\":\"evil.example.com\",\"verdict\":\"blocked\"}\n", + "{\"timestamp\":\"2026-01-01T00:00:04Z\",\"host\":\"unknown-verdict.example\",\"verdict\":\"mystery\"}\n" + ), + ) + .await; + + let result = analyze_firewall_logs(&firewall_dir).await.unwrap(); + + assert_eq!(result, Some(expected_mixed_analysis())); + } + + #[tokio::test] + async fn uses_field_name_fallbacks() { + let temp_dir = TempDir::new().unwrap(); + let firewall_dir = temp_dir.path().join("logs").join("firewall"); + write_log_file( + &firewall_dir, + "fallbacks.log", + "{\"time\":\"2026-01-01T00:00:05Z\",\"hostname\":\"packages.example.org\",\"action\":\"block\"}\n", + ) + .await; + + let result = analyze_firewall_logs(&firewall_dir).await.unwrap(); + + assert_eq!( + result, + Some(FirewallAnalysis { + domains: vec![DomainStat { + domain: "packages.example.org".to_string(), + status: "denied".to_string(), + request_count: 1, + first_seen: Some("2026-01-01T00:00:05Z".to_string()), + last_seen: Some("2026-01-01T00:00:05Z".to_string()), + }], + total_requests: 1, + allowed_count: 0, + denied_count: 1, + }) + ); + } + + #[tokio::test] + async fn skips_malformed_lines() { + let temp_dir = TempDir::new().unwrap(); + let firewall_dir = temp_dir.path().join("logs").join("firewall"); + write_log_file( + &firewall_dir, + "malformed.jsonl", + concat!( + "not-json\n", + "{\"timestamp\":\"2026-01-01T00:00:06Z\",\"host\":\"api.github.com\",\"verdict\":\"allow\"}\n" + ), + ) + .await; + + let result = analyze_firewall_logs(&firewall_dir).await.unwrap(); + + assert_eq!( + result, + Some(FirewallAnalysis { + domains: vec![DomainStat { + domain: "api.github.com".to_string(), + status: "allowed".to_string(), + request_count: 1, + first_seen: Some("2026-01-01T00:00:06Z".to_string()), + last_seen: Some("2026-01-01T00:00:06Z".to_string()), + }], + total_requests: 1, + allowed_count: 1, + denied_count: 0, + }) + ); + } +} diff --git a/src/audit/analyzers/jobs.rs b/src/audit/analyzers/jobs.rs new file mode 100644 index 00000000..2cb746ec --- /dev/null +++ b/src/audit/analyzers/jobs.rs @@ -0,0 +1,283 @@ +//! ADO build-timeline analyzer for `ado-aw audit`. + +use anyhow::Context; +use chrono::DateTime; +use log::debug; +use serde_json::Value; +use std::cmp::Ordering; + +use crate::ado::{AdoAuth, AdoContext, PATH_SEGMENT}; +use crate::audit::model::JobData; + +/// Fetch the build timeline JSON from ADO. +pub async fn fetch_timeline( + client: &reqwest::Client, + ctx: &AdoContext, + auth: &AdoAuth, + build_id: u64, +) -> anyhow::Result { + let url = format!( + "{}/{}/_apis/build/builds/{}/timeline?api-version=7.1", + ctx.org_url.trim_end_matches('/'), + percent_encoding::utf8_percent_encode(&ctx.project, PATH_SEGMENT), + build_id + ); + + debug!("GET build {} timeline: {}", build_id, url); + + let resp = auth + .apply(client.get(&url)) + .send() + .await + .with_context(|| format!("Failed to fetch build {} timeline", build_id))?; + + let status = resp.status(); + if !status.is_success() { + let body = resp.text().await.unwrap_or_default(); + anyhow::bail!( + "ADO API returned {} when fetching build {} timeline: {}", + status, + build_id, + body + ); + } + + resp.json() + .await + .with_context(|| format!("Failed to parse build {} timeline response", build_id)) +} + +/// Map a timeline JSON `value` into a sorted `Vec` for the +/// audit report. Filters to records of `type: "Job"` (skips stages +/// and tasks). Sorts by `startTime` ascending; records with no start +/// time go last. +pub fn timeline_to_jobs(timeline: &Value) -> Vec { + let Some(records) = timeline.get("records").and_then(Value::as_array) else { + return Vec::new(); + }; + + let mut jobs: Vec = records.iter().filter_map(record_to_job).collect(); + jobs.sort_by(compare_jobs_by_start_time); + jobs +} + +fn record_to_job(record: &Value) -> Option { + let record_type = string_field(record, "type")?; + if !record_type.eq_ignore_ascii_case("job") { + return None; + } + + let status = string_field(record, "state").unwrap_or_default(); + let started_at = string_field(record, "startTime"); + let finished_at = string_field(record, "finishTime"); + + Some(JobData { + name: string_field(record, "name").unwrap_or_default(), + result: if status.eq_ignore_ascii_case("completed") { + string_field(record, "result") + } else { + None + }, + duration: format_duration(started_at.as_deref(), finished_at.as_deref()), + started_at, + finished_at, + status, + }) +} + +fn string_field(value: &Value, key: &str) -> Option { + value + .get(key) + .and_then(Value::as_str) + .map(str::trim) + .filter(|value| !value.is_empty()) + .map(ToOwned::to_owned) +} + +fn format_duration(started_at: Option<&str>, finished_at: Option<&str>) -> Option { + let start = DateTime::parse_from_rfc3339(started_at?).ok()?; + let finish = DateTime::parse_from_rfc3339(finished_at?).ok()?; + let delta = finish.signed_duration_since(start); + if delta.num_seconds() < 0 { + return None; + } + + let total_seconds = delta.num_seconds(); + Some(format!("{}m {}s", total_seconds / 60, total_seconds % 60)) +} + +fn compare_jobs_by_start_time(left: &JobData, right: &JobData) -> Ordering { + match (left.started_at.as_deref(), right.started_at.as_deref()) { + (Some(left_start), Some(right_start)) => compare_timestamp_strings(left_start, right_start) + .then_with(|| left.name.cmp(&right.name)), + (Some(_), None) => Ordering::Less, + (None, Some(_)) => Ordering::Greater, + (None, None) => left.name.cmp(&right.name), + } +} + +fn compare_timestamp_strings(left: &str, right: &str) -> Ordering { + match ( + DateTime::parse_from_rfc3339(left), + DateTime::parse_from_rfc3339(right), + ) { + (Ok(left_dt), Ok(right_dt)) => left_dt.cmp(&right_dt), + _ => left.cmp(right), + } +} + +#[cfg(test)] +mod tests { + use super::*; + use serde_json::json; + + #[test] + fn timeline_to_jobs_empty_input_returns_empty_vec() { + assert!(timeline_to_jobs(&json!({})).is_empty()); + assert!(timeline_to_jobs(&json!({ "records": [] })).is_empty()); + } + + #[test] + fn timeline_to_jobs_filters_non_job_records() { + let timeline = json!({ + "records": [ + { + "name": "Build Stage", + "type": "Stage", + "state": "completed", + "result": "succeeded" + }, + { + "name": "Agent", + "type": "Job", + "state": "completed", + "result": "succeeded", + "startTime": "2026-01-01T00:00:00Z", + "finishTime": "2026-01-01T00:01:00Z" + }, + { + "name": "Checkout", + "type": "Task", + "state": "completed", + "result": "succeeded" + } + ] + }); + + let jobs = timeline_to_jobs(&timeline); + + assert_eq!(jobs.len(), 1); + assert_eq!(jobs[0].name, "Agent"); + } + + #[test] + fn timeline_to_jobs_computes_duration() { + let timeline = json!({ + "records": [{ + "name": "Agent", + "type": "Job", + "state": "completed", + "result": "succeeded", + "startTime": "2026-01-01T00:00:00Z", + "finishTime": "2026-01-01T00:01:30Z" + }] + }); + + let jobs = timeline_to_jobs(&timeline); + + assert_eq!(jobs[0].duration.as_deref(), Some("1m 30s")); + } + + #[test] + fn timeline_to_jobs_omits_result_and_duration_for_unfinished_job() { + let timeline = json!({ + "records": [{ + "name": "Detection", + "type": "Job", + "state": "inProgress", + "result": "succeeded", + "startTime": "2026-01-01T00:00:00Z" + }] + }); + + let jobs = timeline_to_jobs(&timeline); + + assert_eq!(jobs[0].result, None); + assert_eq!(jobs[0].duration, None); + } + + #[test] + fn timeline_to_jobs_sorts_by_start_time() { + let timeline = json!({ + "records": [ + { + "name": "A", + "type": "Job", + "state": "completed", + "result": "succeeded", + "startTime": "2026-01-01T00:02:00Z" + }, + { + "name": "B", + "type": "Job", + "state": "completed", + "result": "succeeded", + "startTime": "2026-01-01T00:01:00Z" + } + ] + }); + + let jobs = timeline_to_jobs(&timeline); + + assert_eq!( + jobs.iter().map(|job| job.name.as_str()).collect::>(), + vec!["B", "A"] + ); + } + + #[test] + fn timeline_to_jobs_parses_real_ado_shape() { + let timeline = json!({ + "records": [ + { + "id": "1", + "name": "Agent", + "type": "Job", + "state": "completed", + "result": "succeeded", + "startTime": "2026-01-01T00:00:00Z", + "finishTime": "2026-01-01T00:01:00Z", + "parentId": "stage-1" + }, + { + "id": "2", + "name": "Detection", + "type": "Job", + "state": "completed", + "result": "succeeded", + "startTime": "2026-01-01T00:01:01Z", + "finishTime": "2026-01-01T00:02:00Z", + "parentId": "stage-2" + }, + { + "id": "3", + "name": "SafeOutputs", + "type": "Job", + "state": "completed", + "result": "succeeded", + "startTime": "2026-01-01T00:02:01Z", + "finishTime": "2026-01-01T00:03:00Z", + "parentId": "stage-3" + } + ] + }); + + let jobs = timeline_to_jobs(&timeline); + + assert_eq!(jobs.len(), 3); + assert_eq!( + jobs.iter().map(|job| job.name.as_str()).collect::>(), + vec!["Agent", "Detection", "SafeOutputs"] + ); + } +} diff --git a/src/audit/analyzers/mcp.rs b/src/audit/analyzers/mcp.rs new file mode 100644 index 00000000..9a08767a --- /dev/null +++ b/src/audit/analyzers/mcp.rs @@ -0,0 +1,607 @@ +//! MCP gateway log analyzer for `ado-aw audit`. + +use anyhow::{Context, Result}; +use serde_json::Value; +use std::collections::{BTreeMap, BTreeSet}; +use std::io::ErrorKind; +use std::path::{Path, PathBuf}; + +use crate::audit::model::{ + MCPFailureReport, MCPServerHealth, MCPServerStats, MCPToolSummary, MCPToolUsageData, +}; + +#[derive(Debug, Default)] +struct ToolAccumulator { + call_count: u64, + error_count: u64, + max_input_size: u64, + max_output_size: u64, +} + +#[derive(Debug, Default)] +struct AnalyzeAllResult { + tool_usage: Option, + server_health: Option, + failures: Vec, +} + +/// Aggregate per-tool MCP usage from gateway logs. +pub async fn analyze_mcp_tool_usage( + mcpg_logs_dir: &std::path::Path, +) -> anyhow::Result> { + Ok(analyze_all(mcpg_logs_dir).await?.tool_usage) +} + +/// Aggregate per-server MCP health from gateway logs. +pub async fn analyze_mcp_server_health( + mcpg_logs_dir: &std::path::Path, +) -> anyhow::Result> { + Ok(analyze_all(mcpg_logs_dir).await?.server_health) +} + +/// Extract MCP failure reports (tool_error events) from gateway logs. +pub async fn extract_mcp_failures( + mcpg_logs_dir: &std::path::Path, +) -> anyhow::Result> { + Ok(analyze_all(mcpg_logs_dir).await?.failures) +} + +async fn analyze_all(mcpg_logs_dir: &Path) -> Result { + match tokio::fs::metadata(mcpg_logs_dir).await { + Ok(metadata) => { + anyhow::ensure!( + metadata.is_dir(), + "MCPG logs path is not a directory: {}", + mcpg_logs_dir.display() + ); + } + Err(error) if error.kind() == ErrorKind::NotFound => return Ok(AnalyzeAllResult::default()), + Err(error) => { + return Err(error) + .with_context(|| format!("Failed to stat {}", mcpg_logs_dir.display())); + } + } + + let file_paths = read_log_file_paths(mcpg_logs_dir).await?; + let mut saw_recognizable_event = false; + let mut per_tool = BTreeMap::<(String, String), ToolAccumulator>::new(); + let mut observed_servers = BTreeSet::::new(); + let mut server_error_counts = BTreeMap::::new(); + let mut failures = Vec::new(); + + for path in file_paths { + let contents = tokio::fs::read_to_string(&path) + .await + .with_context(|| format!("Failed to read MCP gateway log {}", path.display()))?; + + for line in contents.lines() { + let trimmed = line.trim(); + if trimmed.is_empty() { + continue; + } + + let value: Value = match serde_json::from_str(trimmed) { + Ok(value) => value, + Err(_) => continue, + }; + + let Some(event_kind) = extract_string_field(&value, &["event", "kind", "type"]) + .map(|kind| kind.to_ascii_lowercase()) + else { + continue; + }; + + match event_kind.as_str() { + "tool_call" => { + saw_recognizable_event = true; + let server = + extract_string_field(&value, &["server", "mcp_server", "provider"]) + .unwrap_or_default(); + let tool = extract_string_field(&value, &["tool", "name"]); + + if !server.is_empty() { + observed_servers.insert(server.clone()); + } + + if let Some(tool) = tool.filter(|tool| !tool.is_empty()) { + let entry = per_tool.entry((server, tool)).or_default(); + update_tool_sizes(entry, &value); + entry.call_count += 1; + } + } + "tool_error" => { + saw_recognizable_event = true; + let server = + extract_string_field(&value, &["server", "mcp_server", "provider"]) + .unwrap_or_default(); + let tool = extract_string_field(&value, &["tool", "name"]); + + if !server.is_empty() { + observed_servers.insert(server.clone()); + } + + if let Some(tool_name) = tool.clone().filter(|tool| !tool.is_empty()) { + let entry = per_tool.entry((server, tool_name)).or_default(); + update_tool_sizes(entry, &value); + entry.error_count += 1; + } + + failures.push(MCPFailureReport { + tool: tool.filter(|tool| !tool.is_empty()), + context: None, + reason: extract_stringish_field(&value, &["error"]), + timestamp: extract_string_field( + &value, + &["ts", "time", "timestamp", "@timestamp"], + ), + extra: value, + }); + } + "server_error" => { + saw_recognizable_event = true; + let server = + extract_string_field(&value, &["server", "mcp_server", "provider"]) + .unwrap_or_default(); + + if !server.is_empty() { + observed_servers.insert(server.clone()); + *server_error_counts.entry(server).or_default() += 1; + } + + failures.push(MCPFailureReport { + tool: None, + context: None, + reason: extract_stringish_field(&value, &["error"]), + timestamp: extract_string_field( + &value, + &["ts", "time", "timestamp", "@timestamp"], + ), + extra: value, + }); + } + "server_start" | "server_stop" => { + saw_recognizable_event = true; + if let Some(server) = + extract_string_field(&value, &["server", "mcp_server", "provider"]) + { + observed_servers.insert(server); + } + } + _ => {} + } + } + } + + if !saw_recognizable_event { + return Ok(AnalyzeAllResult::default()); + } + + let mut tools: Vec = per_tool + .iter() + .map(|((server, tool), stats)| MCPToolSummary { + name: format_tool_name(server, tool), + call_count: stats.call_count, + error_count: stats.error_count, + max_input_size: stats.max_input_size, + max_output_size: stats.max_output_size, + }) + .collect(); + tools.sort_by(|left, right| { + right + .call_count + .cmp(&left.call_count) + .then_with(|| left.name.cmp(&right.name)) + }); + + let mut server_rollups = BTreeMap::::new(); + for server in observed_servers { + server_rollups.insert( + server.clone(), + MCPServerStats { + name: server, + ..MCPServerStats::default() + }, + ); + } + + for ((server, _tool), stats) in &per_tool { + if server.is_empty() { + continue; + } + let server_entry = server_rollups + .entry(server.clone()) + .or_insert_with(|| MCPServerStats { + name: server.clone(), + ..MCPServerStats::default() + }); + server_entry.total_calls += stats.call_count; + server_entry.error_count += stats.error_count; + } + + for (server, error_count) in server_error_counts { + let server_entry = server_rollups + .entry(server.clone()) + .or_insert_with(|| MCPServerStats { + name: server, + ..MCPServerStats::default() + }); + server_entry.error_count += error_count; + } + + let mut servers: Vec = server_rollups + .into_values() + .map(|mut stats| { + stats.error_rate = if stats.total_calls == 0 { + 0.0 + } else { + stats.error_count as f64 / stats.total_calls as f64 + }; + stats.unreliable = stats.error_rate > 0.10 && stats.total_calls >= 5; + stats + }) + .collect(); + servers.sort_by(|left, right| { + right + .total_calls + .cmp(&left.total_calls) + .then_with(|| left.name.cmp(&right.name)) + }); + + Ok(AnalyzeAllResult { + tool_usage: Some(MCPToolUsageData { tools }), + server_health: Some(MCPServerHealth { servers }), + failures, + }) +} + +async fn read_log_file_paths(dir: &Path) -> Result> { + let mut entries = tokio::fs::read_dir(dir) + .await + .with_context(|| format!("Failed to read {}", dir.display()))?; + let mut paths = Vec::new(); + + while let Some(entry) = entries + .next_entry() + .await + .with_context(|| format!("Failed to iterate {}", dir.display()))? + { + let file_type = entry + .file_type() + .await + .with_context(|| format!("Failed to inspect {}", entry.path().display()))?; + if !file_type.is_file() { + continue; + } + + let path = entry.path(); + if is_mcp_log_file(&path) { + paths.push(path); + } + } + + paths.sort(); + Ok(paths) +} + +fn is_mcp_log_file(path: &Path) -> bool { + path.extension() + .and_then(|extension| extension.to_str()) + .map(|extension| { + extension.eq_ignore_ascii_case("jsonl") || extension.eq_ignore_ascii_case("log") + }) + .unwrap_or(false) +} + +fn format_tool_name(server: &str, tool: &str) -> String { + if server.is_empty() { + tool.to_string() + } else { + format!("{server}.{tool}") + } +} + +fn update_tool_sizes(stats: &mut ToolAccumulator, value: &Value) { + stats.max_input_size = stats + .max_input_size + .max(extract_u64_field(value, &["input_size"]).unwrap_or(0)); + stats.max_output_size = stats + .max_output_size + .max(extract_u64_field(value, &["output_size"]).unwrap_or(0)); +} + +fn extract_string_field(value: &Value, keys: &[&str]) -> Option { + keys.iter() + .filter_map(|key| value.get(*key).and_then(Value::as_str)) + .map(str::trim) + .find(|candidate| !candidate.is_empty()) + .map(ToOwned::to_owned) +} + +fn extract_stringish_field(value: &Value, keys: &[&str]) -> Option { + keys.iter() + .filter_map(|key| value.get(*key)) + .find_map(value_to_string) +} + +fn value_to_string(value: &Value) -> Option { + match value { + Value::Null => None, + Value::String(value) => { + let trimmed = value.trim(); + (!trimmed.is_empty()).then(|| trimmed.to_string()) + } + Value::Number(value) => Some(value.to_string()), + Value::Bool(value) => Some(value.to_string()), + other => serde_json::to_string(other) + .ok() + .filter(|value| !value.is_empty()), + } +} + +fn extract_u64_field(value: &Value, keys: &[&str]) -> Option { + keys.iter() + .filter_map(|key| value.get(*key)) + .find_map(|candidate| match candidate { + Value::Number(number) => number.as_u64().or_else(|| { + number + .as_i64() + .and_then(|value| (value >= 0).then_some(value as u64)) + }), + Value::String(value) => value.trim().parse::().ok(), + _ => None, + }) +} + +#[cfg(test)] +mod tests { + use super::*; + use tempfile::TempDir; + + async fn write_log_file(dir: &Path, name: &str, contents: &str) { + tokio::fs::create_dir_all(dir).await.unwrap(); + tokio::fs::write(dir.join(name), contents).await.unwrap(); + } + + #[tokio::test] + async fn absent_directory_returns_none_or_empty() { + let temp_dir = TempDir::new().unwrap(); + let mcpg_dir = temp_dir.path().join("logs").join("mcpg"); + + assert_eq!(analyze_mcp_tool_usage(&mcpg_dir).await.unwrap(), None); + assert_eq!(analyze_mcp_server_health(&mcpg_dir).await.unwrap(), None); + assert_eq!(extract_mcp_failures(&mcpg_dir).await.unwrap(), Vec::new()); + } + + #[tokio::test] + async fn empty_directory_returns_none_or_empty() { + let temp_dir = TempDir::new().unwrap(); + let mcpg_dir = temp_dir.path().join("logs").join("mcpg"); + tokio::fs::create_dir_all(&mcpg_dir).await.unwrap(); + + assert_eq!(analyze_mcp_tool_usage(&mcpg_dir).await.unwrap(), None); + assert_eq!(analyze_mcp_server_health(&mcpg_dir).await.unwrap(), None); + assert_eq!(extract_mcp_failures(&mcpg_dir).await.unwrap(), Vec::new()); + } + + #[tokio::test] + async fn aggregates_two_tool_calls_for_one_tool() { + let temp_dir = TempDir::new().unwrap(); + let mcpg_dir = temp_dir.path().join("logs").join("mcpg"); + write_log_file( + &mcpg_dir, + "mcpg.jsonl", + concat!( + "{\"ts\":\"2026-01-01T00:00:00Z\",\"server\":\"github\",\"tool\":\"create_issue\",\"event\":\"tool_call\",\"input_size\":100,\"output_size\":200}\n", + "{\"ts\":\"2026-01-01T00:00:01Z\",\"server\":\"github\",\"tool\":\"create_issue\",\"event\":\"tool_call\",\"input_size\":500,\"output_size\":1000}\n" + ), + ) + .await; + + assert_eq!( + analyze_mcp_tool_usage(&mcpg_dir).await.unwrap(), + Some(MCPToolUsageData { + tools: vec![MCPToolSummary { + name: "github.create_issue".to_string(), + call_count: 2, + error_count: 0, + max_input_size: 500, + max_output_size: 1000, + }], + }) + ); + } + + #[tokio::test] + async fn server_health_aggregates_tool_and_server_errors() { + let temp_dir = TempDir::new().unwrap(); + let mcpg_dir = temp_dir.path().join("logs").join("mcpg"); + write_log_file( + &mcpg_dir, + "health.jsonl", + concat!( + "{\"ts\":\"2026-01-01T00:00:00Z\",\"server\":\"github\",\"tool\":\"create_issue\",\"event\":\"tool_call\"}\n", + "{\"ts\":\"2026-01-01T00:00:01Z\",\"server\":\"github\",\"tool\":\"create_issue\",\"event\":\"tool_call\"}\n", + "{\"ts\":\"2026-01-01T00:00:02Z\",\"server\":\"github\",\"tool\":\"create_issue\",\"event\":\"tool_error\",\"error\":\"rate-limit exceeded\"}\n", + "{\"ts\":\"2026-01-01T00:00:03Z\",\"server\":\"github\",\"event\":\"server_error\",\"error\":\"gateway restart\"}\n" + ), + ) + .await; + + assert_eq!( + analyze_mcp_server_health(&mcpg_dir).await.unwrap(), + Some(MCPServerHealth { + servers: vec![MCPServerStats { + name: "github".to_string(), + total_calls: 2, + error_count: 2, + error_rate: 1.0, + unreliable: false, + }], + }) + ); + + assert_eq!( + extract_mcp_failures(&mcpg_dir).await.unwrap(), + vec![ + MCPFailureReport { + tool: Some("create_issue".to_string()), + context: None, + reason: Some("rate-limit exceeded".to_string()), + timestamp: Some("2026-01-01T00:00:02Z".to_string()), + extra: serde_json::json!({ + "ts": "2026-01-01T00:00:02Z", + "server": "github", + "tool": "create_issue", + "event": "tool_error", + "error": "rate-limit exceeded" + }), + }, + MCPFailureReport { + tool: None, + context: None, + reason: Some("gateway restart".to_string()), + timestamp: Some("2026-01-01T00:00:03Z".to_string()), + extra: serde_json::json!({ + "ts": "2026-01-01T00:00:03Z", + "server": "github", + "event": "server_error", + "error": "gateway restart" + }), + }, + ] + ); + } + + #[tokio::test] + async fn unreliable_flag_respects_rate_and_sample_size() { + let temp_dir = TempDir::new().unwrap(); + let mcpg_dir = temp_dir.path().join("logs").join("mcpg"); + let mut contents = String::new(); + + for index in 0..10 { + contents.push_str(&format!( + "{{\"ts\":\"2026-01-01T00:00:{index:02}Z\",\"server\":\"azdo\",\"tool\":\"queue_build\",\"event\":\"tool_call\"}}\n" + )); + } + for index in 10..20 { + contents.push_str(&format!( + "{{\"ts\":\"2026-01-01T00:00:{index:02}Z\",\"server\":\"github\",\"tool\":\"create_issue\",\"event\":\"tool_call\"}}\n" + )); + } + for index in 20..22 { + contents.push_str(&format!( + "{{\"ts\":\"2026-01-01T00:00:{index:02}Z\",\"server\":\"github\",\"tool\":\"create_issue\",\"event\":\"tool_error\",\"error\":\"boom\"}}\n" + )); + } + for index in 22..25 { + contents.push_str(&format!( + "{{\"ts\":\"2026-01-01T00:00:{index:02}Z\",\"server\":\"local\",\"tool\":\"echo\",\"event\":\"tool_call\"}}\n" + )); + } + for index in 25..27 { + contents.push_str(&format!( + "{{\"ts\":\"2026-01-01T00:00:{index:02}Z\",\"server\":\"local\",\"tool\":\"echo\",\"event\":\"tool_error\",\"error\":\"boom\"}}\n" + )); + } + + write_log_file(&mcpg_dir, "unreliable.jsonl", &contents).await; + + assert_eq!( + analyze_mcp_server_health(&mcpg_dir).await.unwrap(), + Some(MCPServerHealth { + servers: vec![ + MCPServerStats { + name: "azdo".to_string(), + total_calls: 10, + error_count: 0, + error_rate: 0.0, + unreliable: false, + }, + MCPServerStats { + name: "github".to_string(), + total_calls: 10, + error_count: 2, + error_rate: 0.2, + unreliable: true, + }, + MCPServerStats { + name: "local".to_string(), + total_calls: 3, + error_count: 2, + error_rate: 2.0 / 3.0, + unreliable: false, + }, + ], + }) + ); + } + + #[tokio::test] + async fn supports_field_name_fallbacks() { + let temp_dir = TempDir::new().unwrap(); + let mcpg_dir = temp_dir.path().join("logs").join("mcpg"); + write_log_file( + &mcpg_dir, + "fallbacks.log", + "{\"time\":\"2026-01-01T00:00:05Z\",\"mcp_server\":\"github\",\"name\":\"search_code\",\"kind\":\"tool_error\",\"error\":\"oops\"}\n", + ) + .await; + + assert_eq!( + analyze_mcp_tool_usage(&mcpg_dir).await.unwrap(), + Some(MCPToolUsageData { + tools: vec![MCPToolSummary { + name: "github.search_code".to_string(), + call_count: 0, + error_count: 1, + max_input_size: 0, + max_output_size: 0, + }], + }) + ); + assert_eq!( + extract_mcp_failures(&mcpg_dir).await.unwrap(), + vec![MCPFailureReport { + tool: Some("search_code".to_string()), + context: None, + reason: Some("oops".to_string()), + timestamp: Some("2026-01-01T00:00:05Z".to_string()), + extra: serde_json::json!({ + "time": "2026-01-01T00:00:05Z", + "mcp_server": "github", + "name": "search_code", + "kind": "tool_error", + "error": "oops" + }), + }] + ); + } + + #[tokio::test] + async fn skips_malformed_lines_silently() { + let temp_dir = TempDir::new().unwrap(); + let mcpg_dir = temp_dir.path().join("logs").join("mcpg"); + write_log_file( + &mcpg_dir, + "malformed.jsonl", + concat!( + "not-json\n", + "{\"ts\":\"2026-01-01T00:00:06Z\",\"server\":\"github\",\"tool\":\"create_issue\",\"event\":\"tool_call\",\"input_size\":64,\"output_size\":128}\n" + ), + ) + .await; + + assert_eq!( + analyze_mcp_tool_usage(&mcpg_dir).await.unwrap(), + Some(MCPToolUsageData { + tools: vec![MCPToolSummary { + name: "github.create_issue".to_string(), + call_count: 1, + error_count: 0, + max_input_size: 64, + max_output_size: 128, + }], + }) + ); + } +} diff --git a/src/audit/analyzers/missing.rs b/src/audit/analyzers/missing.rs new file mode 100644 index 00000000..ce488374 --- /dev/null +++ b/src/audit/analyzers/missing.rs @@ -0,0 +1,289 @@ +//! Missing-tool, missing-data, and noop audit analyzers backed by +//! `safe_outputs.ndjson` proposal artifacts. +//! +//! This module intentionally does not export an `extract_mcp_failures` function. +//! If agents ever emit `name == "mcp_failure"` safe outputs, they follow the +//! same record shape handled here, but public MCP failure extraction is owned by +//! `crate::audit::analyzers::mcp`. + +use std::collections::VecDeque; +use std::path::{Path, PathBuf}; + +use anyhow::Result; +use serde_json::Value; +use tokio::fs; + +use crate::audit::model::{MissingDataReport, MissingToolReport, NoopReport}; + +const AGENT_OUTPUTS_PREFIX: &str = "agent_outputs_"; +const SAFE_OUTPUTS_FILE: &str = "safe_outputs.ndjson"; + +pub async fn extract_missing_tools(download_root: &Path) -> Result> { + let values = read_safe_outputs_ndjson(download_root).await; + Ok(values + .into_iter() + .filter(|value| matches_signal(value, "missing_tool")) + .map(|value| MissingToolReport { + tool: string_field(&value, "tool"), + context: string_field(&value, "context"), + reason: reason_field(&value), + timestamp: string_field(&value, "timestamp"), + extra: value, + }) + .collect()) +} + +pub async fn extract_missing_data(download_root: &Path) -> Result> { + let values = read_safe_outputs_ndjson(download_root).await; + Ok(values + .into_iter() + .filter(|value| matches_signal(value, "missing_data")) + .map(|value| MissingDataReport { + tool: string_field(&value, "tool"), + context: string_field(&value, "context"), + reason: reason_field(&value), + timestamp: string_field(&value, "timestamp"), + extra: value, + }) + .collect()) +} + +pub async fn extract_noops(download_root: &Path) -> Result> { + let values = read_safe_outputs_ndjson(download_root).await; + Ok(values + .into_iter() + .filter(|value| matches_signal(value, "noop")) + .map(|value| NoopReport { + tool: None, + context: string_field(&value, "context"), + reason: reason_field(&value), + timestamp: string_field(&value, "timestamp"), + extra: value, + }) + .collect()) +} + +async fn read_safe_outputs_ndjson(download_root: &Path) -> Vec { + let mut dirs = VecDeque::from([download_root.to_path_buf()]); + let mut safe_output_paths = Vec::new(); + + while let Some(dir) = dirs.pop_front() { + if is_agent_outputs_dir(&dir) { + if let Some(path) = preferred_safe_outputs_path(&dir).await { + safe_output_paths.push(path); + } + continue; + } + + let mut entries = match fs::read_dir(&dir).await { + Ok(entries) => entries, + Err(_) => continue, + }; + + loop { + let Some(entry) = (match entries.next_entry().await { + Ok(entry) => entry, + Err(_) => None, + }) else { + break; + }; + + let file_type = match entry.file_type().await { + Ok(file_type) => file_type, + Err(_) => continue, + }; + + if file_type.is_dir() { + dirs.push_back(entry.path()); + } + } + } + + safe_output_paths.sort(); + safe_output_paths.dedup(); + + let mut values = Vec::new(); + for path in safe_output_paths { + let Ok(contents) = fs::read_to_string(&path).await else { + continue; + }; + + values.extend(contents.lines().filter_map(|line| { + let trimmed = line.trim(); + if trimmed.is_empty() { + None + } else { + serde_json::from_str(trimmed).ok() + } + })); + } + + values +} + +async fn preferred_safe_outputs_path(agent_outputs_dir: &Path) -> Option { + let staging = agent_outputs_dir.join("staging").join(SAFE_OUTPUTS_FILE); + if fs::metadata(&staging).await.is_ok() { + return Some(staging); + } + + let fallback = agent_outputs_dir.join(SAFE_OUTPUTS_FILE); + if fs::metadata(&fallback).await.is_ok() { + return Some(fallback); + } + + None +} + +fn is_agent_outputs_dir(path: &Path) -> bool { + path.file_name() + .and_then(|name| name.to_str()) + .is_some_and(|name| name.starts_with(AGENT_OUTPUTS_PREFIX)) +} + +fn matches_signal(value: &Value, signal: &str) -> bool { + let Some(name) = value.get("name").and_then(Value::as_str) else { + return false; + }; + + name == signal || name == signal.replace('_', "-") +} + +fn string_field(value: &Value, key: &str) -> Option { + value + .get(key) + .and_then(Value::as_str) + .map(ToOwned::to_owned) +} + +fn reason_field(value: &Value) -> Option { + string_field(value, "reason").or_else(|| string_field(value, "description")) +} + +#[cfg(test)] +mod tests { + use std::fs as stdfs; + + use serde_json::json; + use tempfile::TempDir; + + use super::*; + + #[tokio::test] + async fn no_safe_outputs_ndjson_returns_empty_reports() { + let temp_dir = TempDir::new().unwrap(); + + assert!( + extract_missing_tools(temp_dir.path()) + .await + .unwrap() + .is_empty() + ); + assert!( + extract_missing_data(temp_dir.path()) + .await + .unwrap() + .is_empty() + ); + assert!(extract_noops(temp_dir.path()).await.unwrap().is_empty()); + } + + #[tokio::test] + async fn mixed_ndjson_file_filters_each_signal_and_ignores_unrelated_entries() { + let temp_dir = TempDir::new().unwrap(); + write_safe_outputs( + &temp_dir, + true, + &[ + r#"{"name":"missing_tool","tool":"bash","context":"ctx-1","reason":"missing bash","timestamp":"2026-05-21T12:01:00Z"}"#, + r#"{"name":"missing-tool","tool":"python","context":"ctx-2","reason":"missing python","timestamp":"2026-05-21T12:02:00Z"}"#, + r#"{"name":"missing_data","tool":"create_work_item","context":"ctx-3","reason":"missing title","timestamp":"2026-05-21T12:03:00Z"}"#, + r#"{"name":"noop","tool":"ignored","context":"ctx-4","reason":"nothing to do","timestamp":"2026-05-21T12:04:00Z"}"#, + r#"{"name":"noop","context":"ctx-5","reason":"already complete","timestamp":"2026-05-21T12:05:00Z"}"#, + r#"{"name":"noop","context":"ctx-6","description":"skipped","timestamp":"2026-05-21T12:06:00Z"}"#, + r#"{"name":"create_pull_request"}"#, + r#"{"name":"add_pr_comment"}"#, + r#"{"name":"report_incomplete"}"#, + r#"{"name":"mcp_failure"}"#, + r#"{"name":"other"}"#, + ], + ); + + let missing_tools = extract_missing_tools(temp_dir.path()).await.unwrap(); + let missing_data = extract_missing_data(temp_dir.path()).await.unwrap(); + let noops = extract_noops(temp_dir.path()).await.unwrap(); + + assert_eq!(missing_tools.len(), 2); + assert_eq!(missing_data.len(), 1); + assert_eq!(noops.len(), 3); + assert_eq!(missing_tools[0].tool.as_deref(), Some("bash")); + assert_eq!(missing_tools[1].tool.as_deref(), Some("python")); + assert!(noops.iter().all(|report| report.tool.is_none())); + } + + #[tokio::test] + async fn description_field_falls_back_to_reason() { + let temp_dir = TempDir::new().unwrap(); + write_safe_outputs( + &temp_dir, + false, + &[r#"{"name":"missing_data","description":"need schema details"}"#], + ); + + let reports = extract_missing_data(temp_dir.path()).await.unwrap(); + assert_eq!(reports.len(), 1); + assert_eq!(reports[0].reason.as_deref(), Some("need schema details")); + } + + #[tokio::test] + async fn malformed_lines_are_skipped_silently() { + let temp_dir = TempDir::new().unwrap(); + write_safe_outputs( + &temp_dir, + true, + &[ + r#"{"name":"noop","context":"first"}"#, + r#"{"name":"noop","context": }"#, + r#"{"name":"noop","context":"second"}"#, + ], + ); + + let reports = extract_noops(temp_dir.path()).await.unwrap(); + assert_eq!(reports.len(), 2); + assert_eq!(reports[0].context.as_deref(), Some("first")); + assert_eq!(reports[1].context.as_deref(), Some("second")); + } + + #[tokio::test] + async fn extra_payload_is_preserved_on_report() { + let temp_dir = TempDir::new().unwrap(); + let original = json!({ + "name": "missing_tool", + "tool": "azure-devops", + "context": "work-item-sync", + "reason": "Tool not configured", + "timestamp": "2026-05-21T12:03:00Z", + "nested": { + "required": true, + "attempts": [1, 2, 3] + } + }); + write_safe_outputs(&temp_dir, true, &[&original.to_string()]); + + let reports = extract_missing_tools(temp_dir.path()).await.unwrap(); + assert_eq!(reports.len(), 1); + assert_eq!(reports[0].extra, original); + } + + fn write_safe_outputs(temp_dir: &TempDir, use_staging_path: bool, lines: &[&str]) { + let base_dir = temp_dir.path().join("agent_outputs_42"); + let file_path = if use_staging_path { + base_dir.join("staging").join(SAFE_OUTPUTS_FILE) + } else { + base_dir.join(SAFE_OUTPUTS_FILE) + }; + + stdfs::create_dir_all(file_path.parent().unwrap()).unwrap(); + stdfs::write(file_path, lines.join("\n")).unwrap(); + } +} diff --git a/src/audit/analyzers/mod.rs b/src/audit/analyzers/mod.rs new file mode 100644 index 00000000..e389b914 --- /dev/null +++ b/src/audit/analyzers/mod.rs @@ -0,0 +1,14 @@ +//! Analyzers that consume artifact files produced by ado-aw pipelines +//! and populate sections of [`crate::audit::model::AuditData`]. +//! +//! Each submodule owns one signal: firewall, mcp, otel, safe-outputs, +//! detection, missing-tools/data/noops, build timeline. + +pub mod detection; +pub mod firewall; +pub mod jobs; +pub mod mcp; +pub mod missing; +pub mod otel; +pub mod policy; +pub mod safe_outputs; diff --git a/src/audit/analyzers/otel.rs b/src/audit/analyzers/otel.rs new file mode 100644 index 00000000..0c80920d --- /dev/null +++ b/src/audit/analyzers/otel.rs @@ -0,0 +1,228 @@ +use anyhow::Context; +use log::debug; +use std::path::{Path, PathBuf}; + +use crate::agent_stats::AgentStats; +use crate::audit::model::{AuditEngineConfig, AwInfo, MetricsData, PerformanceMetrics}; + +/// Combined OTel + aw_info analysis result. +#[derive(Debug, Clone, Default)] +pub struct OtelAnalysis { + pub metrics: MetricsData, + pub engine_config: Option, + pub performance: Option, + pub aw_info: Option, +} + +/// Read `staging/otel.jsonl` + `staging/aw_info.json` from an agent +/// outputs directory and produce metrics, engine config, performance +/// metrics, and aw_info for the audit report. +/// +/// `agent_outputs_dir` should be the path to the extracted artifact +/// root (e.g. `/build-/agent_outputs_/`). +/// +/// Both files are optional: +/// - OTel may be absent (non-Copilot engine, or older builds). +/// - aw_info may be absent (older builds; will become standard once the +/// `audit-pipeline-awinfo` change merges). +/// The function never errors on absence; it logs a `debug!` and leaves +/// the corresponding field empty / None. +pub async fn analyze_otel(agent_outputs_dir: &std::path::Path) -> anyhow::Result { + let mut analysis = OtelAnalysis::default(); + + if let Some(otel_path) = locate_agent_output_file(agent_outputs_dir, "otel.jsonl") { + let stats = AgentStats::from_otel_file(&otel_path, "audit") + .await + .with_context(|| format!("Failed to analyze OTel file: {}", otel_path.display()))?; + + let total_tokens = stats.input_tokens + stats.output_tokens; + analysis.metrics = MetricsData { + token_usage: total_tokens, + effective_tokens: total_tokens, + estimated_cost: 0.0, + turns: stats.turns, + error_count: 0, + warning_count: 0, + }; + + let tokens_per_minute = if stats.duration_seconds > 0.0 { + Some(total_tokens as f64 / (stats.duration_seconds / 60.0)) + } else { + None + }; + + if let Some(tokens_per_minute) = tokens_per_minute { + analysis.performance = Some(PerformanceMetrics { + tokens_per_minute: Some(tokens_per_minute), + cost_efficiency: None, + most_used_tool: None, + network_requests: None, + }); + } + } else { + debug!( + "No otel.jsonl found under {} (checked staging/ and top-level)", + agent_outputs_dir.display() + ); + } + + if let Some(aw_info_path) = locate_agent_output_file(agent_outputs_dir, "aw_info.json") { + let aw_info_contents = tokio::fs::read_to_string(&aw_info_path) + .await + .with_context(|| format!("Failed to read aw_info file: {}", aw_info_path.display()))?; + let aw_info = serde_json::from_str::(&aw_info_contents) + .with_context(|| format!("Failed to parse aw_info file: {}", aw_info_path.display()))?; + + analysis.engine_config = Some(AuditEngineConfig { + engine: aw_info.engine.clone().unwrap_or_default(), + model: aw_info.model.clone(), + version: aw_info.compiler_version.clone(), + timeout_minutes: None, + }); + analysis.aw_info = Some(aw_info); + } else { + debug!( + "No aw_info.json found under {} (checked staging/ and top-level)", + agent_outputs_dir.display() + ); + } + + Ok(analysis) +} + +fn locate_agent_output_file(agent_outputs_dir: &Path, file_name: &str) -> Option { + [ + agent_outputs_dir.join("staging").join(file_name), + agent_outputs_dir.join(file_name), + ] + .into_iter() + .find(|path| path.is_file()) +} + +#[cfg(test)] +mod tests { + use super::*; + use tempfile::TempDir; + + const AW_INFO_JSON: &str = r#"{"schema":"ado-aw/aw_info/1","engine":"copilot","model":"claude-sonnet-4.5","agent_name":"test","target":"standalone","source":"agents/test.md","compiler_version":"0.30.0"}"#; + const COPILOT_OTEL_FIXTURE: &str = include_str!("../../../tests/fixtures/copilot-otel.jsonl"); + + async fn write_file(path: &Path, contents: &str) { + if let Some(parent) = path.parent() { + tokio::fs::create_dir_all(parent).await.unwrap(); + } + tokio::fs::write(path, contents).await.unwrap(); + } + + #[tokio::test] + async fn analyze_otel_returns_defaults_when_files_absent() { + let temp_dir = TempDir::new().unwrap(); + + let analysis = analyze_otel(temp_dir.path()).await.unwrap(); + + assert_eq!(analysis.metrics, MetricsData::default()); + assert!(analysis.engine_config.is_none()); + assert!(analysis.performance.is_none()); + assert!(analysis.aw_info.is_none()); + } + + #[tokio::test] + async fn analyze_otel_reads_aw_info_only() { + let temp_dir = TempDir::new().unwrap(); + let aw_info_path = temp_dir.path().join("staging").join("aw_info.json"); + write_file(&aw_info_path, AW_INFO_JSON).await; + + let analysis = analyze_otel(temp_dir.path()).await.unwrap(); + + assert_eq!(analysis.metrics, MetricsData::default()); + assert!(analysis.performance.is_none()); + + let aw_info = analysis.aw_info.expect("expected aw_info"); + assert_eq!(aw_info.engine.as_deref(), Some("copilot")); + assert_eq!(aw_info.model.as_deref(), Some("claude-sonnet-4.5")); + assert_eq!(aw_info.compiler_version.as_deref(), Some("0.30.0")); + + let engine_config = analysis.engine_config.expect("expected engine config"); + assert_eq!(engine_config.engine, "copilot"); + assert_eq!(engine_config.model.as_deref(), Some("claude-sonnet-4.5")); + assert_eq!(engine_config.version.as_deref(), Some("0.30.0")); + assert_eq!(engine_config.timeout_minutes, None); + } + + #[tokio::test] + async fn analyze_otel_reads_otel_only() { + let temp_dir = TempDir::new().unwrap(); + let otel_path = temp_dir.path().join("staging").join("otel.jsonl"); + write_file(&otel_path, COPILOT_OTEL_FIXTURE).await; + + let analysis = analyze_otel(temp_dir.path()).await.unwrap(); + + assert!(analysis.metrics.token_usage > 0); + assert_eq!( + analysis.metrics.effective_tokens, + analysis.metrics.token_usage + ); + assert!(analysis.metrics.turns > 0); + assert!(analysis.engine_config.is_none()); + assert!(analysis.aw_info.is_none()); + } + + #[tokio::test] + async fn analyze_otel_reads_both_files() { + let temp_dir = TempDir::new().unwrap(); + let staging_dir = temp_dir.path().join("staging"); + write_file(&staging_dir.join("otel.jsonl"), COPILOT_OTEL_FIXTURE).await; + write_file(&staging_dir.join("aw_info.json"), AW_INFO_JSON).await; + + let analysis = analyze_otel(temp_dir.path()).await.unwrap(); + + assert!(analysis.metrics.token_usage > 0); + assert!(analysis.metrics.turns > 0); + assert_eq!( + analysis + .engine_config + .as_ref() + .and_then(|config| config.model.as_deref()), + Some("claude-sonnet-4.5") + ); + assert_eq!( + analysis + .aw_info + .as_ref() + .and_then(|info| info.engine.as_deref()), + Some("copilot") + ); + assert!( + analysis + .performance + .as_ref() + .and_then(|performance| performance.tokens_per_minute) + .is_some_and(|value| value > 0.0) + ); + } + + #[tokio::test] + async fn analyze_otel_falls_back_to_top_level_files() { + let temp_dir = TempDir::new().unwrap(); + write_file(&temp_dir.path().join("otel.jsonl"), COPILOT_OTEL_FIXTURE).await; + write_file(&temp_dir.path().join("aw_info.json"), AW_INFO_JSON).await; + + let analysis = analyze_otel(temp_dir.path()).await.unwrap(); + + assert!(analysis.metrics.token_usage > 0); + assert_eq!( + analysis + .aw_info + .as_ref() + .and_then(|info| info.engine.as_deref()), + Some("copilot") + ); + assert_eq!( + analysis + .engine_config + .as_ref() + .map(|config| config.engine.as_str()), + Some("copilot") + ); + } +} diff --git a/src/audit/analyzers/policy.rs b/src/audit/analyzers/policy.rs new file mode 100644 index 00000000..3857a6e6 --- /dev/null +++ b/src/audit/analyzers/policy.rs @@ -0,0 +1,398 @@ +//! AWF policy artifact analyzer for `ado-aw audit`. + +use anyhow::{Context, Result}; +use log::warn; +use serde_json::Value; +use std::collections::BTreeMap; +use std::path::Path; +use tokio::io::{AsyncBufReadExt, BufReader}; + +use crate::audit::model::{PolicyAnalysis, PolicyRule}; + +const MANIFEST_FILENAME: &str = "policy-manifest.json"; +const AUDIT_FILENAME: &str = "audit.jsonl"; +const UNMATCHED_PATTERN: &str = "(unmatched)"; +const UNKNOWN_VERDICT: &str = "(unknown)"; + +/// Analyze AWF policy artifacts in `/logs/firewall/`. +/// +/// Reads `policy-manifest.json` for the static rule list and `audit.jsonl` +/// for per-rule hit counts. Either file may be absent — the analyzer +/// returns `Ok(None)` only when BOTH are absent. When only one is +/// present, it produces a partial result (rules with zero hits, or +/// hit counts against synthetic unmatched rules). +pub async fn analyze_policy(firewall_logs_dir: &Path) -> Result> { + let manifest = read_policy_manifest(&firewall_logs_dir.join(MANIFEST_FILENAME)).await?; + let audit_hits = read_policy_audit(&firewall_logs_dir.join(AUDIT_FILENAME)).await?; + + if manifest.is_none() && audit_hits.is_none() { + return Ok(None); + } + + let mut policies = manifest.unwrap_or_default(); + let mut unmatched_hits: BTreeMap = BTreeMap::new(); + + if let Some(audit_hits) = audit_hits { + for (rule_pattern, line_verdict) in audit_hits { + if let Some(policy) = policies + .iter_mut() + .find(|policy| policy.pattern == rule_pattern) + { + policy.hit_count += 1; + } else { + *unmatched_hits.entry(line_verdict).or_default() += 1; + } + } + } + + for (verdict, hit_count) in unmatched_hits { + policies.push(PolicyRule { + pattern: String::from(UNMATCHED_PATTERN), + verdict, + hit_count, + }); + } + + policies.sort_by(|left, right| { + right + .hit_count + .cmp(&left.hit_count) + .then_with(|| left.pattern.cmp(&right.pattern)) + .then_with(|| left.verdict.cmp(&right.verdict)) + }); + + let allow_count = policies + .iter() + .filter(|policy| normalize_verdict(&policy.verdict) == Some(NormalizedVerdict::Allowed)) + .map(|policy| policy.hit_count) + .sum(); + let deny_count = policies + .iter() + .filter(|policy| normalize_verdict(&policy.verdict) == Some(NormalizedVerdict::Denied)) + .map(|policy| policy.hit_count) + .sum(); + + Ok(Some(PolicyAnalysis { + policies, + allow_count, + deny_count, + })) +} + +async fn read_policy_manifest(path: &Path) -> Result>> { + let contents = match tokio::fs::read_to_string(path).await { + Ok(contents) => contents, + Err(error) if error.kind() == std::io::ErrorKind::NotFound => return Ok(None), + Err(error) => { + return Err(error) + .with_context(|| format!("Failed to read policy manifest: {}", path.display())); + } + }; + + let manifest: Value = match serde_json::from_str(&contents) { + Ok(manifest) => manifest, + Err(error) => { + warn!( + "Failed to parse policy manifest '{}': {}", + path.display(), + error + ); + return Ok(None); + } + }; + + let mut policies = Vec::new(); + if let Some(rules) = manifest.get("rules").and_then(Value::as_array) { + for rule in rules { + let Some(pattern) = extract_string(rule, &["pattern", "host", "domain"]) else { + continue; + }; + + policies.push(PolicyRule { + pattern, + verdict: extract_string(rule, &["verdict", "action", "status"]).unwrap_or_default(), + hit_count: 0, + }); + } + } + + Ok(Some(policies)) +} + +async fn read_policy_audit(path: &Path) -> Result>> { + let file = match tokio::fs::File::open(path).await { + Ok(file) => file, + Err(error) if error.kind() == std::io::ErrorKind::NotFound => return Ok(None), + Err(error) => { + return Err(error) + .with_context(|| format!("Failed to open policy audit log: {}", path.display())); + } + }; + + let reader = BufReader::new(file); + let mut lines = reader.lines(); + let mut hits = Vec::new(); + + while let Some(line) = lines + .next_line() + .await + .with_context(|| format!("Failed to read policy audit log: {}", path.display()))? + { + if line.trim().is_empty() { + continue; + } + + let Ok(record) = serde_json::from_str::(&line) else { + continue; + }; + + let Some(rule_pattern) = extract_string(&record, &["rule", "pattern", "host", "domain"]) + else { + continue; + }; + + let verdict = extract_string(&record, &["verdict", "action", "status"]) + .unwrap_or_else(|| String::from(UNKNOWN_VERDICT)); + hits.push((rule_pattern, verdict)); + } + + Ok(Some(hits)) +} + +fn extract_string(value: &Value, keys: &[&str]) -> Option { + keys.iter() + .filter_map(|key| value.get(*key)) + .find_map(value_to_string) +} + +fn value_to_string(value: &Value) -> Option { + let text = match value { + Value::String(text) => text.trim().to_owned(), + Value::Number(number) => number.to_string(), + Value::Bool(flag) => flag.to_string(), + _ => return None, + }; + + if text.is_empty() { None } else { Some(text) } +} + +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +enum NormalizedVerdict { + Allowed, + Denied, +} + +fn normalize_verdict(verdict: &str) -> Option { + match verdict.trim().to_ascii_lowercase().as_str() { + "allow" | "allowed" | "pass" => Some(NormalizedVerdict::Allowed), + "deny" | "denied" | "block" | "blocked" => Some(NormalizedVerdict::Denied), + _ => None, + } +} + +#[cfg(test)] +mod tests { + use super::*; + use tempfile::TempDir; + + fn write_fixture(dir: &TempDir, name: &str, contents: &str) { + std::fs::write(dir.path().join(name), contents).expect("write fixture"); + } + + fn find_policy<'a>( + analysis: &'a PolicyAnalysis, + pattern: &str, + verdict: &str, + ) -> &'a PolicyRule { + analysis + .policies + .iter() + .find(|policy| policy.pattern == pattern && policy.verdict == verdict) + .expect("policy should exist") + } + + #[tokio::test] + async fn returns_none_when_both_files_are_absent() { + let dir = tempfile::tempdir().expect("tempdir"); + + let analysis = analyze_policy(dir.path()) + .await + .expect("analysis should succeed"); + + assert_eq!(analysis, None); + } + + #[tokio::test] + async fn manifest_only_returns_zero_hit_rules() { + let dir = tempfile::tempdir().expect("tempdir"); + write_fixture( + &dir, + MANIFEST_FILENAME, + r#"{ + "version": 1, + "rules": [ + {"pattern": "z.example.com", "verdict": "deny"}, + {"host": "a.example.com", "action": "allow"} + ] +}"#, + ); + + let analysis = analyze_policy(dir.path()) + .await + .expect("analysis should succeed") + .expect("analysis should exist"); + + assert_eq!(analysis.allow_count, 0); + assert_eq!(analysis.deny_count, 0); + assert_eq!( + analysis.policies, + vec![ + PolicyRule { + pattern: String::from("a.example.com"), + verdict: String::from("allow"), + hit_count: 0, + }, + PolicyRule { + pattern: String::from("z.example.com"), + verdict: String::from("deny"), + hit_count: 0, + }, + ] + ); + } + + #[tokio::test] + async fn audit_only_returns_synthetic_unmatched_rules() { + let dir = tempfile::tempdir().expect("tempdir"); + write_fixture( + &dir, + AUDIT_FILENAME, + r#"{"rule":"api.github.com","verdict":"allow"} +not json +{"rule":"evil.example.com","status":"deny"} +{"pattern":"another.example.com","action":"allow"} +"#, + ); + + let analysis = analyze_policy(dir.path()) + .await + .expect("analysis should succeed") + .expect("analysis should exist"); + + assert_eq!(analysis.allow_count, 2); + assert_eq!(analysis.deny_count, 1); + assert_eq!( + analysis.policies, + vec![ + PolicyRule { + pattern: String::from(UNMATCHED_PATTERN), + verdict: String::from("allow"), + hit_count: 2, + }, + PolicyRule { + pattern: String::from(UNMATCHED_PATTERN), + verdict: String::from("deny"), + hit_count: 1, + }, + ] + ); + } + + #[tokio::test] + async fn combines_manifest_rules_and_audit_hits() { + let dir = tempfile::tempdir().expect("tempdir"); + write_fixture( + &dir, + MANIFEST_FILENAME, + r#"{ + "version": 1, + "rules": [ + {"pattern": "api.github.com", "verdict": "allow"}, + {"pattern": "*.azure.com", "verdict": "allow"}, + {"domain": "*", "status": "deny"} + ] +}"#, + ); + write_fixture( + &dir, + AUDIT_FILENAME, + r#"{"timestamp":"2025-01-01T00:00:00Z","host":"api.github.com","rule":"api.github.com","verdict":"allow"} +{"timestamp":"2025-01-01T00:00:01Z","host":"foo.azure.com","rule":"*.azure.com","verdict":"allow"} +{"timestamp":"2025-01-01T00:00:02Z","host":"bar.azure.com","rule":"*.azure.com","verdict":"allow"} +{"timestamp":"2025-01-01T00:00:03Z","host":"api.github.com","rule":"api.github.com","verdict":"allow"} +{"timestamp":"2025-01-01T00:00:04Z","host":"evil.example.com","rule":"*","verdict":"deny"} +{"timestamp":"2025-01-01T00:00:05Z","host":"other.example.com","rule":"missing-allow","verdict":"allow"} +{"timestamp":"2025-01-01T00:00:06Z","host":"worse.example.com","rule":"missing-deny","status":"deny"} +"#, + ); + + let analysis = analyze_policy(dir.path()) + .await + .expect("analysis should succeed") + .expect("analysis should exist"); + + assert_eq!(analysis.allow_count, 5); + assert_eq!(analysis.deny_count, 2); + assert_eq!(analysis.policies.len(), 5); + assert_eq!( + find_policy(&analysis, "api.github.com", "allow").hit_count, + 2 + ); + assert_eq!(find_policy(&analysis, "*.azure.com", "allow").hit_count, 2); + assert_eq!(find_policy(&analysis, "*", "deny").hit_count, 1); + assert_eq!( + find_policy(&analysis, UNMATCHED_PATTERN, "allow").hit_count, + 1 + ); + assert_eq!( + find_policy(&analysis, UNMATCHED_PATTERN, "deny").hit_count, + 1 + ); + assert_eq!( + analysis + .policies + .iter() + .map(|policy| ( + policy.pattern.as_str(), + policy.verdict.as_str(), + policy.hit_count + )) + .collect::>(), + vec![ + ("*.azure.com", "allow", 2), + ("api.github.com", "allow", 2), + (UNMATCHED_PATTERN, "allow", 1), + (UNMATCHED_PATTERN, "deny", 1), + ("*", "deny", 1), + ] + ); + } + + #[tokio::test] + async fn malformed_manifest_is_treated_as_absent() { + let dir = tempfile::tempdir().expect("tempdir"); + write_fixture(&dir, MANIFEST_FILENAME, "not json"); + write_fixture( + &dir, + AUDIT_FILENAME, + r#"{"rule":"missing-rule","verdict":"deny"} +"#, + ); + + let analysis = analyze_policy(dir.path()) + .await + .expect("analysis should succeed") + .expect("analysis should exist"); + + assert_eq!(analysis.allow_count, 0); + assert_eq!(analysis.deny_count, 1); + assert_eq!( + analysis.policies, + vec![PolicyRule { + pattern: String::from(UNMATCHED_PATTERN), + verdict: String::from("deny"), + hit_count: 1, + }] + ); + } +} diff --git a/src/audit/analyzers/safe_outputs.rs b/src/audit/analyzers/safe_outputs.rs new file mode 100644 index 00000000..8bce7fbd --- /dev/null +++ b/src/audit/analyzers/safe_outputs.rs @@ -0,0 +1,915 @@ +//! Safe-output trace analyzer for `ado-aw audit`. + +use anyhow::Context; +use serde::Deserialize; +use serde_json::Value; +use std::collections::{BTreeMap, VecDeque}; +use std::fs; +use std::io::ErrorKind; +use std::path::{Path, PathBuf}; + +use crate::audit::model::{ + CreatedItemReport, Finding, RejectedSafeOutputsRollup, SafeOutputExecution, + SafeOutputExecutionItem, SafeOutputStatus, SafeOutputSummary, Severity, +}; +use crate::ndjson::{EXECUTED_NDJSON_FILENAME, SAFE_OUTPUT_FILENAME, read_ndjson_file}; + +/// Combined safe-output analysis result. +#[derive(Debug, Clone, Default)] +pub struct SafeOutputAnalysis { + pub summary: Option, + pub execution: Option, + pub rollup: Option, + pub created_items: Vec, + /// Severity-`high` findings emitted when proposals were rejected by + /// the aggregate detection gate. At most one finding per audit run. + pub findings: Vec, +} + +#[derive(Debug, Clone, Default, Deserialize)] +#[serde(default)] +struct DetectionVerdict { + prompt_injection: bool, + secret_leak: bool, + malicious_patch: bool, + reasons: Vec, +} + +#[derive(Debug, Clone, Default, Deserialize)] +#[serde(default)] +struct ExecutionRecord { + name: String, + status: String, + context: Option, + result: Option, + error: Option, +} + +#[derive(Debug, Clone)] +struct ProposalRecord { + index: usize, + name: String, + context: Option, + proposal: Value, +} + +#[derive(Debug, Clone)] +struct IndexedExecutionRecord { + index: usize, + record: ExecutionRecord, +} + +/// Analyze safe-output proposal, detection, and execution artifacts under one download root. +pub async fn analyze_safe_outputs( + download_root: &std::path::Path, +) -> anyhow::Result { + let proposals_path = find_proposals_file(download_root)?; + let detection_path = find_detection_file(download_root)?; + let executions_path = find_execution_file(download_root)?; + + let proposals = load_proposals(proposals_path.as_deref()).await?; + let detection = load_detection_verdict(detection_path.as_deref()).await?; + let executions = load_execution_records(executions_path.as_deref()).await?; + let detection_gate_fired = detection.as_ref().is_some_and(DetectionVerdict::gate_fired); + + let items = if detection_gate_fired { + proposals + .iter() + .map(|proposal| { + build_gate_rejected_item(proposal, detection.as_ref().expect("gate-fired verdict")) + }) + .collect() + } else { + build_execution_items(&proposals, &executions) + }; + + let proposed_count = proposals.len() as u64; + let executed_count = items + .iter() + .filter(|item| item.status == SafeOutputStatus::Executed) + .count() as u64; + let rejected_by_execution_count = items + .iter() + .filter(|item| { + matches!( + item.status, + SafeOutputStatus::RejectedByExecution + | SafeOutputStatus::BudgetExhausted + | SafeOutputStatus::Skipped + ) + }) + .count() as u64; + let not_processed_count = items + .iter() + .filter(|item| item.status == SafeOutputStatus::NotProcessedDueToAggregateGate) + .count() as u64; + + let summary = if proposed_count == 0 && items.is_empty() { + None + } else { + Some(SafeOutputSummary { + proposed_count, + executed_count, + rejected_by_execution_count, + not_processed_count, + }) + }; + + let execution = (!items.is_empty()).then_some(SafeOutputExecution { items }); + let created_items = execution + .as_ref() + .map(|execution| { + execution + .items + .iter() + .filter_map(created_item_from_execution_item) + .collect() + }) + .unwrap_or_default(); + let rollup = build_rollup(summary.as_ref(), execution.as_ref(), detection.as_ref()); + let findings = if detection_gate_fired && proposed_count > 0 { + vec![build_detection_finding( + detection.as_ref().expect("gate-fired verdict"), + proposed_count, + )] + } else { + Vec::new() + }; + + Ok(SafeOutputAnalysis { + summary, + execution, + rollup, + created_items, + findings, + }) +} + +impl DetectionVerdict { + fn gate_fired(&self) -> bool { + self.prompt_injection || self.secret_leak || self.malicious_patch + } + + fn flags(&self) -> Vec<&'static str> { + let mut flags = Vec::new(); + if self.prompt_injection { + flags.push("prompt_injection"); + } + if self.secret_leak { + flags.push("secret_leak"); + } + if self.malicious_patch { + flags.push("malicious_patch"); + } + flags + } +} + +async fn load_proposals(path: Option<&Path>) -> anyhow::Result> { + let Some(path) = path else { + return Ok(Vec::new()); + }; + + let values = read_ndjson_file(path).await?; + Ok(values + .into_iter() + .enumerate() + .map(|(index, proposal)| ProposalRecord { + index, + name: extract_string_field(&proposal, &["name"]).unwrap_or_default(), + context: extract_string_field(&proposal, &["context"]), + proposal, + }) + .collect()) +} + +async fn load_detection_verdict(path: Option<&Path>) -> anyhow::Result> { + let Some(path) = path else { + return Ok(None); + }; + + let contents = tokio::fs::read_to_string(path) + .await + .with_context(|| format!("Failed to read detection verdict {}", path.display()))?; + let verdict = serde_json::from_str::(&contents) + .with_context(|| format!("Failed to parse detection verdict {}", path.display()))?; + Ok(Some(verdict)) +} + +async fn load_execution_records( + path: Option<&Path>, +) -> anyhow::Result> { + let Some(path) = path else { + return Ok(Vec::new()); + }; + + let values = read_ndjson_file(path).await?; + values + .into_iter() + .enumerate() + .map(|(index, value)| { + let mut record = + serde_json::from_value::(value).with_context(|| { + format!( + "Failed to parse execution record {} from {}", + index, + path.display() + ) + })?; + record.name = record.name.trim().to_string(); + record.status = record.status.trim().to_string(); + record.context = normalize_optional_string(record.context); + record.error = normalize_optional_string(record.error); + Ok(IndexedExecutionRecord { index, record }) + }) + .collect() +} + +fn build_execution_items( + proposals: &[ProposalRecord], + executions: &[IndexedExecutionRecord], +) -> Vec { + let mut proposal_to_execution = vec![None; proposals.len()]; + let mut execution_matched = vec![false; executions.len()]; + let mut context_index = BTreeMap::<(String, String), VecDeque>::new(); + + for proposal in proposals { + let Some(context) = proposal.context.clone() else { + continue; + }; + context_index + .entry((proposal.name.clone(), context)) + .or_default() + .push_back(proposal.index); + } + + for execution in executions { + let Some(context) = execution.record.context.clone() else { + continue; + }; + let Some(proposal_indexes) = + context_index.get_mut(&(execution.record.name.clone(), context)) + else { + continue; + }; + while let Some(proposal_index) = proposal_indexes.pop_front() { + if proposal_to_execution[proposal_index].is_none() { + proposal_to_execution[proposal_index] = Some(execution.index); + execution_matched[execution.index] = true; + break; + } + } + } + + for execution in executions { + if execution_matched[execution.index] || execution.record.context.is_some() { + continue; + } + + let Some(proposal) = proposals.get(execution.index) else { + continue; + }; + if proposal_to_execution[proposal.index].is_some() { + continue; + } + if proposal.context.is_none() && proposal.name == execution.record.name { + proposal_to_execution[proposal.index] = Some(execution.index); + execution_matched[execution.index] = true; + } + } + + let mut items = Vec::with_capacity(proposals.len() + executions.len()); + for proposal in proposals { + let item = match proposal_to_execution[proposal.index] { + Some(execution_index) => { + build_item_from_execution(proposal, &executions[execution_index].record) + } + None => build_missing_execution_item(proposal), + }; + items.push(item); + } + + for execution in executions { + if execution_matched[execution.index] { + continue; + } + items.push(build_unmatched_execution_item(&execution.record)); + } + + items +} + +fn build_item_from_execution( + proposal: &ProposalRecord, + execution: &ExecutionRecord, +) -> SafeOutputExecutionItem { + let status = map_execution_status(&execution.status); + let error = execution.error.clone(); + SafeOutputExecutionItem { + context: proposal + .context + .clone() + .or_else(|| execution.context.clone()), + tool: if proposal.name.is_empty() { + execution.name.clone() + } else { + proposal.name.clone() + }, + status, + proposal: proposal.proposal.clone(), + error: error.clone(), + result: execution.result.clone(), + rejection_reason: rejection_reason_for_status(status, error), + applies_to_whole_batch: false, + } +} + +fn build_missing_execution_item(proposal: &ProposalRecord) -> SafeOutputExecutionItem { + let error = Some(String::from("no execution record found")); + SafeOutputExecutionItem { + context: proposal.context.clone(), + tool: proposal.name.clone(), + status: SafeOutputStatus::Skipped, + proposal: proposal.proposal.clone(), + error: error.clone(), + result: None, + rejection_reason: error, + applies_to_whole_batch: false, + } +} + +fn build_unmatched_execution_item(execution: &ExecutionRecord) -> SafeOutputExecutionItem { + let status = map_execution_status(&execution.status); + let error = execution.error.clone(); + SafeOutputExecutionItem { + context: execution.context.clone(), + tool: execution.name.clone(), + status, + proposal: Value::Null, + error: error.clone(), + result: execution.result.clone(), + rejection_reason: rejection_reason_for_status(status, error), + applies_to_whole_batch: false, + } +} + +fn build_gate_rejected_item( + proposal: &ProposalRecord, + detection: &DetectionVerdict, +) -> SafeOutputExecutionItem { + SafeOutputExecutionItem { + context: proposal.context.clone(), + tool: proposal.name.clone(), + status: SafeOutputStatus::NotProcessedDueToAggregateGate, + proposal: proposal.proposal.clone(), + error: None, + result: None, + rejection_reason: Some(aggregate_reason_key(detection)), + applies_to_whole_batch: true, + } +} + +fn map_execution_status(status: &str) -> SafeOutputStatus { + match status.trim().to_ascii_lowercase().as_str() { + "succeeded" => SafeOutputStatus::Executed, + "failed" => SafeOutputStatus::RejectedByExecution, + "skipped" => SafeOutputStatus::Skipped, + "budget_exhausted" => SafeOutputStatus::BudgetExhausted, + _ => SafeOutputStatus::Skipped, + } +} + +fn rejection_reason_for_status(status: SafeOutputStatus, error: Option) -> Option { + match status { + SafeOutputStatus::Executed => None, + SafeOutputStatus::RejectedByExecution + | SafeOutputStatus::Skipped + | SafeOutputStatus::BudgetExhausted => error, + SafeOutputStatus::NotProcessedDueToAggregateGate => None, + } +} + +fn build_rollup( + summary: Option<&SafeOutputSummary>, + execution: Option<&SafeOutputExecution>, + detection: Option<&DetectionVerdict>, +) -> Option { + let Some(summary) = summary else { + return None; + }; + + let total_rejected = summary.rejected_by_execution_count + summary.not_processed_count; + if total_rejected == 0 { + return None; + } + + let mut by_reason = BTreeMap::new(); + let mut by_threat = BTreeMap::new(); + + if summary.not_processed_count > 0 { + if let Some(detection) = detection { + by_reason.insert(aggregate_reason_key(detection), summary.not_processed_count); + for flag in detection.flags() { + by_threat.insert(flag.to_string(), summary.not_processed_count); + } + } + } else if let Some(execution) = execution { + for item in &execution.items { + let reason_key = match item.status { + SafeOutputStatus::RejectedByExecution => truncate_reason( + item.error + .clone() + .unwrap_or_else(|| String::from("execution_failed")), + 200, + ), + SafeOutputStatus::BudgetExhausted => String::from("budget_exhausted"), + SafeOutputStatus::Skipped => String::from("skipped"), + SafeOutputStatus::Executed | SafeOutputStatus::NotProcessedDueToAggregateGate => { + continue; + } + }; + *by_reason.entry(reason_key).or_insert(0) += 1; + } + } + + Some(RejectedSafeOutputsRollup { + total_rejected, + by_reason, + by_threat, + }) +} + +fn build_detection_finding(detection: &DetectionVerdict, proposed_count: u64) -> Finding { + let flags = detection.flags().join(","); + let reasons = if detection.reasons.is_empty() { + String::from("- (none provided)") + } else { + detection + .reasons + .iter() + .map(|reason| format!("- {reason}")) + .collect::>() + .join("\n") + }; + + Finding { + severity: Severity::High, + category: String::from("safe_outputs"), + title: format!("Detection rejected {proposed_count} safe output(s)"), + description: format!( + "The threat-analysis verdict had {flags} set. All {proposed_count} proposed safe outputs were dropped by the aggregate gate.\n\nReasons:\n{reasons}" + ), + impact: Some(String::from( + "No items were created; the agent's work is not visible to downstream consumers.", + )), + } +} + +fn created_item_from_execution_item(item: &SafeOutputExecutionItem) -> Option { + if item.status != SafeOutputStatus::Executed { + return None; + } + + let result = item.result.as_ref()?; + Some(CreatedItemReport { + kind: item.tool.clone(), + id: extract_string_field(result, &["id", "work_item_id", "number", "pr_number"]), + url: extract_string_field(result, &["url", "html_url", "web_url"]), + title: extract_string_field(result, &["title", "name", "subject"]), + }) +} + +fn extract_string_field(value: &Value, keys: &[&str]) -> Option { + keys.iter().find_map(|key| { + value.get(*key).and_then(|candidate| match candidate { + Value::String(text) => { + let trimmed = text.trim(); + (!trimmed.is_empty()).then(|| trimmed.to_string()) + } + Value::Number(number) => Some(number.to_string()), + Value::Bool(boolean) => Some(boolean.to_string()), + _ => None, + }) + }) +} + +fn normalize_optional_string(value: Option) -> Option { + value.and_then(|value| { + let trimmed = value.trim(); + (!trimmed.is_empty()).then(|| trimmed.to_string()) + }) +} + +fn aggregate_reason_key(detection: &DetectionVerdict) -> String { + let joined = detection + .reasons + .iter() + .map(String::as_str) + .map(str::trim) + .filter(|reason| !reason.is_empty()) + .collect::>() + .join("; "); + if joined.is_empty() { + format!("detection: {}", detection.flags().join(",")) + } else { + joined + } +} + +fn truncate_reason(reason: String, max_chars: usize) -> String { + let mut chars = reason.chars(); + let truncated: String = chars.by_ref().take(max_chars).collect(); + if chars.next().is_some() { + truncated + } else { + reason + } +} + +fn find_proposals_file(download_root: &Path) -> anyhow::Result> { + for directory in top_level_dirs_with_prefix(download_root, "agent_outputs_")? { + for candidate in [ + directory.join("staging").join(SAFE_OUTPUT_FILENAME), + directory.join(SAFE_OUTPUT_FILENAME), + ] { + if candidate.is_file() { + return Ok(Some(candidate)); + } + } + } + Ok(None) +} + +fn find_detection_file(download_root: &Path) -> anyhow::Result> { + for directory in top_level_dirs_with_prefix(download_root, "analyzed_outputs_")? { + let candidate = directory.join("threat-analysis.json"); + if candidate.is_file() { + return Ok(Some(candidate)); + } + } + Ok(None) +} + +fn find_execution_file(download_root: &Path) -> anyhow::Result> { + let preferred = download_root + .join("safe_outputs") + .join(EXECUTED_NDJSON_FILENAME); + if preferred.is_file() { + return Ok(Some(preferred)); + } + + let mut matches = Vec::new(); + collect_named_files(download_root, EXECUTED_NDJSON_FILENAME, &mut matches)?; + matches.sort(); + Ok(matches.into_iter().next()) +} + +fn top_level_dirs_with_prefix(root: &Path, prefix: &str) -> anyhow::Result> { + let entries = match fs::read_dir(root) { + Ok(entries) => entries, + Err(error) if error.kind() == ErrorKind::NotFound => return Ok(Vec::new()), + Err(error) => { + return Err(error) + .with_context(|| format!("Failed to read directory {}", root.display())); + } + }; + + let mut matches = Vec::new(); + for entry in entries { + let entry = entry.with_context(|| format!("Failed to iterate {}", root.display()))?; + let file_type = entry + .file_type() + .with_context(|| format!("Failed to inspect {}", entry.path().display()))?; + if !file_type.is_dir() { + continue; + } + + let Some(name) = entry.file_name().to_str().map(str::to_owned) else { + continue; + }; + if name.starts_with(prefix) { + matches.push(entry.path()); + } + } + matches.sort(); + Ok(matches) +} + +fn collect_named_files( + root: &Path, + file_name: &str, + matches: &mut Vec, +) -> anyhow::Result<()> { + let entries = match fs::read_dir(root) { + Ok(entries) => entries, + Err(error) if error.kind() == ErrorKind::NotFound => return Ok(()), + Err(error) => { + return Err(error) + .with_context(|| format!("Failed to read directory {}", root.display())); + } + }; + + for entry in entries { + let entry = entry.with_context(|| format!("Failed to iterate {}", root.display()))?; + let path = entry.path(); + let file_type = entry + .file_type() + .with_context(|| format!("Failed to inspect {}", path.display()))?; + if file_type.is_dir() { + collect_named_files(&path, file_name, matches)?; + } else if file_type.is_file() + && path.file_name().and_then(|name| name.to_str()) == Some(file_name) + { + matches.push(path); + } + } + + Ok(()) +} + +#[cfg(test)] +mod tests { + use super::{ + CreatedItemReport, EXECUTED_NDJSON_FILENAME, SAFE_OUTPUT_FILENAME, SafeOutputStatus, + Severity, analyze_safe_outputs, + }; + use serde_json::{Value, json}; + use std::fs; + use std::path::Path; + use tempfile::TempDir; + + #[tokio::test] + async fn empty_download_root_returns_default_analysis() { + let temp_dir = TempDir::new().expect("create temp dir"); + + let analysis = analyze_safe_outputs(temp_dir.path()) + .await + .expect("analyze empty root"); + + assert!(analysis.summary.is_none()); + assert!(analysis.execution.is_none()); + assert!(analysis.rollup.is_none()); + assert!(analysis.created_items.is_empty()); + assert!(analysis.findings.is_empty()); + } + + #[tokio::test] + async fn proposals_with_successful_executions_are_reported_as_executed() { + let temp_dir = TempDir::new().expect("create temp dir"); + write_ndjson( + &temp_dir + .path() + .join("agent_outputs_42") + .join("staging") + .join(SAFE_OUTPUT_FILENAME), + &[ + json!({"name": "noop", "context": "noop-1"}), + json!({"name": "create_pull_request", "context": "pr-1"}), + ], + ); + write_ndjson( + &temp_dir + .path() + .join("safe_outputs") + .join(EXECUTED_NDJSON_FILENAME), + &[ + json!({"name": "noop", "status": "succeeded", "context": "noop-1", "result": {"status": "ok"}}), + json!({"name": "create_pull_request", "status": "succeeded", "context": "pr-1", "result": {"number": 7}}), + ], + ); + + let analysis = analyze_safe_outputs(temp_dir.path()) + .await + .expect("analyze successful safe outputs"); + + let summary = analysis.summary.expect("summary"); + assert_eq!(summary.proposed_count, 2); + assert_eq!(summary.executed_count, 2); + assert_eq!(summary.rejected_by_execution_count, 0); + assert_eq!(summary.not_processed_count, 0); + + let execution = analysis.execution.expect("execution"); + assert_eq!(execution.items.len(), 2); + assert!( + execution + .items + .iter() + .all(|item| item.status == SafeOutputStatus::Executed) + ); + assert!(analysis.rollup.is_none()); + assert!(analysis.findings.is_empty()); + } + + #[tokio::test] + async fn aggregate_detection_gate_rejects_all_proposals() { + let temp_dir = TempDir::new().expect("create temp dir"); + write_ndjson( + &temp_dir + .path() + .join("agent_outputs_77") + .join("staging") + .join(SAFE_OUTPUT_FILENAME), + &[ + json!({"name": "noop", "context": "noop-1"}), + json!({"name": "create_pull_request", "context": "pr-1"}), + ], + ); + write_json( + &temp_dir + .path() + .join("analyzed_outputs_77") + .join("threat-analysis.json"), + &json!({ + "prompt_injection": true, + "secret_leak": false, + "malicious_patch": false, + "reasons": ["evil"] + }), + ); + write_ndjson( + &temp_dir + .path() + .join("safe_outputs") + .join(EXECUTED_NDJSON_FILENAME), + &[json!({"name": "noop", "status": "succeeded", "context": "noop-1"})], + ); + + let analysis = analyze_safe_outputs(temp_dir.path()) + .await + .expect("analyze gate-rejected safe outputs"); + + let execution = analysis.execution.expect("execution"); + assert_eq!(execution.items.len(), 2); + assert!(execution.items.iter().all(|item| { + item.status == SafeOutputStatus::NotProcessedDueToAggregateGate + && item.applies_to_whole_batch + })); + + let rollup = analysis.rollup.expect("rollup"); + assert_eq!(rollup.total_rejected, 2); + assert_eq!(rollup.by_reason.get("evil"), Some(&2)); + assert_eq!(rollup.by_threat.get("prompt_injection"), Some(&2)); + + assert_eq!(analysis.findings.len(), 1); + assert_eq!(analysis.findings[0].severity, Severity::High); + } + + #[tokio::test] + async fn mixed_execution_outcomes_are_rolled_up() { + let temp_dir = TempDir::new().expect("create temp dir"); + write_ndjson( + &temp_dir + .path() + .join("agent_outputs_11") + .join("staging") + .join(SAFE_OUTPUT_FILENAME), + &[ + json!({"name": "noop"}), + json!({"name": "create_pull_request", "context": "pr-ctx"}), + json!({"name": "create_issue"}), + ], + ); + write_ndjson( + &temp_dir + .path() + .join("safe_outputs") + .join(EXECUTED_NDJSON_FILENAME), + &[ + json!({"name": "noop", "status": "succeeded", "result": {"status": "ok"}}), + json!({ + "name": "create_pull_request", + "status": "failed", + "context": "pr-ctx", + "error": "permission denied" + }), + ], + ); + + let analysis = analyze_safe_outputs(temp_dir.path()) + .await + .expect("analyze mixed execution outcomes"); + + let summary = analysis.summary.expect("summary"); + assert_eq!(summary.proposed_count, 3); + assert_eq!(summary.executed_count, 1); + assert_eq!(summary.rejected_by_execution_count, 2); + assert_eq!(summary.not_processed_count, 0); + + let execution = analysis.execution.expect("execution"); + assert_eq!( + execution + .items + .iter() + .map(|item| item.status) + .collect::>(), + vec![ + SafeOutputStatus::Executed, + SafeOutputStatus::RejectedByExecution, + SafeOutputStatus::Skipped, + ] + ); + + let rollup = analysis.rollup.expect("rollup"); + assert_eq!(rollup.by_reason.get("permission denied"), Some(&1)); + assert_eq!(rollup.by_reason.get("skipped"), Some(&1)); + } + + #[tokio::test] + async fn created_item_report_uses_field_fallbacks() { + let temp_dir = TempDir::new().expect("create temp dir"); + write_ndjson( + &temp_dir + .path() + .join("agent_outputs_1") + .join("staging") + .join(SAFE_OUTPUT_FILENAME), + &[json!({"name": "create_pull_request", "context": "pr-42"})], + ); + write_ndjson( + &temp_dir + .path() + .join("safe_outputs") + .join(EXECUTED_NDJSON_FILENAME), + &[json!({ + "name": "create_pull_request", + "status": "succeeded", + "context": "pr-42", + "result": { + "url": "https://example.invalid/pr/42", + "number": 42, + "title": "Fix" + } + })], + ); + + let analysis = analyze_safe_outputs(temp_dir.path()) + .await + .expect("analyze created item report"); + + assert_eq!(analysis.created_items.len(), 1); + assert_eq!( + analysis.created_items[0], + CreatedItemReport { + kind: String::from("create_pull_request"), + url: Some(String::from("https://example.invalid/pr/42")), + id: Some(String::from("42")), + title: Some(String::from("Fix")), + } + ); + } + + #[tokio::test] + async fn created_item_report_falls_back_to_work_item_id() { + let temp_dir = TempDir::new().expect("create temp dir"); + write_ndjson( + &temp_dir + .path() + .join("agent_outputs_2") + .join("staging") + .join(SAFE_OUTPUT_FILENAME), + &[json!({"name": "create_work_item", "context": "wi-99"})], + ); + write_ndjson( + &temp_dir + .path() + .join("safe_outputs") + .join(EXECUTED_NDJSON_FILENAME), + &[json!({ + "name": "create_work_item", + "status": "succeeded", + "context": "wi-99", + "result": {"work_item_id": 99} + })], + ); + + let analysis = analyze_safe_outputs(temp_dir.path()) + .await + .expect("analyze created work item report"); + + assert_eq!(analysis.created_items.len(), 1); + assert_eq!(analysis.created_items[0].id.as_deref(), Some("99")); + } + + fn write_ndjson(path: &Path, values: &[Value]) { + let contents = values + .iter() + .map(|value| serde_json::to_string(value).expect("serialize ndjson value")) + .collect::>() + .join("\n"); + write_text(path, &(contents + "\n")); + } + + fn write_json(path: &Path, value: &Value) { + write_text( + path, + &serde_json::to_string(value).expect("serialize json value"), + ); + } + + fn write_text(path: &Path, contents: &str) { + if let Some(parent) = path.parent() { + fs::create_dir_all(parent).expect("create parent directories"); + } + fs::write(path, contents).expect("write test file"); + } +} diff --git a/src/audit/cache.rs b/src/audit/cache.rs new file mode 100644 index 00000000..68221368 --- /dev/null +++ b/src/audit/cache.rs @@ -0,0 +1,298 @@ +use anyhow::{Context, Result}; +use chrono::{DateTime, SecondsFormat, Utc}; +use log::{debug, warn}; +use std::path::{Path, PathBuf}; +use tokio::fs; + +use crate::audit::model::AuditData; + +const CURRENT_ADO_AW_VERSION: &str = env!("CARGO_PKG_VERSION"); + +/// On-disk run summary written under `/build-/run-summary.json`. +/// CLI-version-keyed so a new ado-aw release transparently re-processes. +#[derive(Debug, Clone, PartialEq)] +pub struct RunSummary { + pub ado_aw_version: String, + pub build_id: u64, + pub processed_at: DateTime, + pub audit_data: AuditData, +} + +/// Filename for the cached run summary stored inside each audited run directory. +pub const RUN_SUMMARY_FILENAME: &str = "run-summary.json"; + +fn run_summary_path(run_dir: &Path) -> PathBuf { + run_dir.join(RUN_SUMMARY_FILENAME) +} + +fn temp_run_summary_path(run_dir: &Path) -> PathBuf { + run_dir.join(format!(".{RUN_SUMMARY_FILENAME}.tmp")) +} + +// `chrono` is present without its `serde` feature in Cargo.toml, so we keep the +// public `DateTime` field and serialize it explicitly as RFC 3339. +impl serde::Serialize for RunSummary { + fn serialize(&self, serializer: S) -> std::result::Result + where + S: serde::Serializer, + { + #[derive(serde::Serialize)] + struct RunSummaryDisk<'a> { + ado_aw_version: &'a str, + build_id: u64, + processed_at: String, + audit_data: &'a AuditData, + } + + RunSummaryDisk { + ado_aw_version: &self.ado_aw_version, + build_id: self.build_id, + processed_at: self + .processed_at + .to_rfc3339_opts(SecondsFormat::AutoSi, true), + audit_data: &self.audit_data, + } + .serialize(serializer) + } +} + +impl<'de> serde::Deserialize<'de> for RunSummary { + fn deserialize(deserializer: D) -> std::result::Result + where + D: serde::Deserializer<'de>, + { + #[derive(serde::Deserialize)] + struct RunSummaryDisk { + ado_aw_version: String, + build_id: u64, + processed_at: String, + audit_data: AuditData, + } + + let disk = RunSummaryDisk::deserialize(deserializer)?; + let processed_at = chrono::DateTime::parse_from_rfc3339(&disk.processed_at) + .map_err(serde::de::Error::custom)? + .with_timezone(&Utc); + + Ok(Self { + ado_aw_version: disk.ado_aw_version, + build_id: disk.build_id, + processed_at, + audit_data: disk.audit_data, + }) + } +} + +/// Save a run summary to `/run-summary.json`. +/// Creates parent dirs as needed. Atomic write via temp-file + rename. +pub async fn save_run_summary(run_dir: &Path, summary: &RunSummary) -> Result<()> { + fs::create_dir_all(run_dir) + .await + .with_context(|| format!("create run summary cache directory {}", run_dir.display()))?; + + let summary_path = run_summary_path(run_dir); + let temp_path = temp_run_summary_path(run_dir); + let bytes = serde_json::to_vec_pretty(summary).context("serialize run summary cache")?; + + fs::write(&temp_path, bytes) + .await + .with_context(|| format!("write temporary run summary cache {}", temp_path.display()))?; + + if let Err(error) = fs::rename(&temp_path, &summary_path).await { + let _ = fs::remove_file(&temp_path).await; + return Err(anyhow::Error::new(error).context(format!( + "rename temporary run summary cache {} to {}", + temp_path.display(), + summary_path.display() + ))); + } + + debug!("Saved run summary cache to {}", summary_path.display()); + Ok(()) +} + +/// Load a run summary from `/run-summary.json`. +/// +/// Returns: +/// Ok(Some(summary)) — file present, parsed, AND `ado_aw_version` matches +/// the current CLI version (`env!("CARGO_PKG_VERSION")`). +/// Ok(None) — file absent, OR `ado_aw_version` mismatch, OR JSON parse error. +/// In the mismatch/parse-error cases, logs a `warn!` so the +/// operator sees that the cache was skipped. +/// Err(...) — only for I/O errors other than NotFound. +pub async fn load_run_summary(run_dir: &Path) -> Result> { + let summary_path = run_summary_path(run_dir); + let bytes = match fs::read(&summary_path).await { + Ok(bytes) => bytes, + Err(error) if error.kind() == std::io::ErrorKind::NotFound => { + debug!("No run summary cache found at {}", summary_path.display()); + return Ok(None); + } + Err(error) => { + return Err(anyhow::Error::new(error) + .context(format!("read run summary cache {}", summary_path.display()))); + } + }; + + let summary = match serde_json::from_slice::(&bytes) { + Ok(summary) => summary, + Err(error) => { + warn!( + "Ignoring run summary cache at {} because it could not be parsed: {}", + summary_path.display(), + error + ); + return Ok(None); + } + }; + + if summary.ado_aw_version != CURRENT_ADO_AW_VERSION { + warn!( + "Ignoring run summary cache at {} because it was written by ado-aw {} instead of {}", + summary_path.display(), + summary.ado_aw_version, + CURRENT_ADO_AW_VERSION + ); + return Ok(None); + } + + debug!("Loaded run summary cache from {}", summary_path.display()); + Ok(Some(summary)) +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::audit::model::{ErrorInfo, Finding, MetricsData, OverviewData, Severity}; + use chrono::TimeZone; + use tempfile::TempDir; + + fn sample_summary(version: &str) -> RunSummary { + RunSummary { + ado_aw_version: version.to_string(), + build_id: 12345, + processed_at: Utc + .with_ymd_and_hms(2026, 5, 21, 12, 0, 0) + .single() + .unwrap(), + audit_data: AuditData { + overview: OverviewData { + build_id: 12345, + pipeline_name: "agentic-audit".to_string(), + status: "completed".to_string(), + result: Some("succeeded".to_string()), + source_branch: Some("refs/heads/main".to_string()), + url: Some( + "https://dev.azure.com/example/project/_build/results?buildId=12345" + .to_string(), + ), + ..Default::default() + }, + metrics: MetricsData { + token_usage: 2048, + effective_tokens: 1536, + estimated_cost: 1.25, + turns: 8, + warning_count: 1, + ..Default::default() + }, + key_findings: vec![Finding { + category: "tooling".to_string(), + severity: Severity::High, + title: "Missing validation artifact".to_string(), + description: "The run completed without publishing a validation artifact." + .to_string(), + impact: Some("Audit confidence is reduced for this run.".to_string()), + }], + warnings: vec![ErrorInfo { + source: "artifact-download".to_string(), + message: "safe outputs artifact missing; rendered partial report".to_string(), + timestamp: Some("2026-05-21T12:01:00Z".to_string()), + }], + ..Default::default() + }, + } + } + + #[tokio::test] + async fn round_trip_run_summary() { + let temp_dir = TempDir::new().expect("create temp dir"); + let run_dir = temp_dir.path().join("build-12345"); + let summary = sample_summary(CURRENT_ADO_AW_VERSION); + + save_run_summary(&run_dir, &summary) + .await + .expect("save run summary"); + + let loaded = load_run_summary(&run_dir) + .await + .expect("load run summary") + .expect("run summary should exist"); + + assert_eq!(loaded, summary); + } + + #[tokio::test] + async fn version_mismatch_returns_none() { + let temp_dir = TempDir::new().expect("create temp dir"); + let run_dir = temp_dir.path().join("build-12345"); + + save_run_summary(&run_dir, &sample_summary("999.0.0")) + .await + .expect("save mismatched run summary"); + + assert!( + load_run_summary(&run_dir) + .await + .expect("load run summary") + .is_none() + ); + } + + #[tokio::test] + async fn missing_file_returns_none() { + let temp_dir = TempDir::new().expect("create temp dir"); + let run_dir = temp_dir.path().join("build-12345"); + + assert!( + load_run_summary(&run_dir) + .await + .expect("load missing run summary") + .is_none() + ); + } + + #[tokio::test] + async fn corrupt_json_returns_none() { + let temp_dir = TempDir::new().expect("create temp dir"); + let run_dir = temp_dir.path().join("build-12345"); + let summary_path = run_summary_path(&run_dir); + + fs::create_dir_all(&run_dir) + .await + .expect("create run summary dir"); + fs::write(&summary_path, b"{ definitely not json }") + .await + .expect("write corrupt run summary"); + + assert!( + load_run_summary(&run_dir) + .await + .expect("load corrupt run summary") + .is_none() + ); + } + + #[tokio::test] + async fn save_cleans_up_temporary_file() { + let temp_dir = TempDir::new().expect("create temp dir"); + let run_dir = temp_dir.path().join("build-12345"); + let temp_path = temp_run_summary_path(&run_dir); + + save_run_summary(&run_dir, &sample_summary(CURRENT_ADO_AW_VERSION)) + .await + .expect("save run summary"); + + assert!(fs::metadata(&temp_path).await.is_err()); + } +} diff --git a/src/audit/cli.rs b/src/audit/cli.rs new file mode 100644 index 00000000..afad8e59 --- /dev/null +++ b/src/audit/cli.rs @@ -0,0 +1,720 @@ +use std::io; +use std::path::{Path, PathBuf}; +use std::time::Duration; + +use anyhow::{Context, Result}; +use chrono::{DateTime, Utc}; + +use crate::ado::{ + AdoContext, PATH_SEGMENT, download_build_artifact, get_build, list_build_artifacts, + resolve_ado_context, resolve_auth, +}; +use crate::audit::analyzers::{ + detection, firewall, jobs, mcp, missing, otel, policy, safe_outputs, +}; +use crate::audit::cache::{RunSummary, load_run_summary, save_run_summary}; +use crate::audit::findings; +use crate::audit::model::{AuditData, ErrorInfo, FileInfo, OverviewData}; +use crate::audit::render; +use crate::audit::url::{ParsedBuildRef, parse_build_ref}; + +pub struct AuditOptions<'a> { + pub build_id_or_url: &'a str, + pub output: &'a Path, + pub json: bool, + pub org: Option<&'a str>, + pub project: Option<&'a str>, + pub pat: Option<&'a str>, + pub artifacts: Option<&'a [String]>, + pub no_cache: bool, +} + +pub async fn dispatch(opts: AuditOptions<'_>) -> Result<()> { + let parsed = parse_build_ref(opts.build_id_or_url)?; + let artifact_filters = normalize_artifact_filters(opts.artifacts)?; + let cwd = tokio::fs::canonicalize(".") + .await + .context("Could not resolve current directory")?; + let ctx = resolve_audit_context(&cwd, opts.org, opts.project, &parsed).await?; + let auth = resolve_auth(opts.pat).await?; + + let run_dir = opts.output.join(format!("build-{}", parsed.build_id)); + tokio::fs::create_dir_all(&run_dir) + .await + .with_context(|| format!("create audit output directory {}", run_dir.display()))?; + + if !opts.no_cache + && let Some(summary) = load_run_summary(&run_dir).await? + { + if !opts.json { + eprintln!( + "Using cached audit from {}", + summary.processed_at.to_rfc3339() + ); + } + render_audit(&summary.audit_data, opts.json)?; + return Ok(()); + } + + let client = reqwest::Client::builder() + .timeout(Duration::from_secs(60)) + .build() + .context("Failed to create HTTP client")?; + + let build = get_build(&client, &ctx, &auth, parsed.build_id).await?; + let mut audit = AuditData { + overview: build_overview(&build, &ctx, parsed.build_id, &run_dir), + ..AuditData::default() + }; + + let mut saw_artifact_auth_error = false; + match list_build_artifacts(&client, &ctx, &auth, parsed.build_id).await { + Ok(artifacts) => { + let selected: Vec<_> = artifacts + .into_iter() + .filter(|artifact| { + artifact_matches_selected(&artifact.name, artifact_filters.as_deref()) + }) + .collect(); + + if selected.is_empty() { + let message = if artifact_filters.is_some() { + "no matching artifacts were published for the selected --artifacts filter" + .to_string() + } else { + "no artifacts were published for this build".to_string() + }; + warn_and_record(&mut audit, "audit::artifacts", message); + } + + for artifact in selected { + match download_artifact_preserving_cache(&client, &auth, &artifact, &run_dir).await + { + Ok(()) => {} + Err(error) if is_authz_error(&error) => { + saw_artifact_auth_error = true; + warn_and_record( + &mut audit, + "audit::artifacts", + format!( + "failed to download artifact '{}': {:#}; using any local copy already present", + artifact.name, error + ), + ); + } + Err(error) => { + warn_and_record( + &mut audit, + "audit::artifacts", + format!( + "failed to download artifact '{}': {:#}", + artifact.name, error + ), + ); + } + } + } + } + Err(error) if is_authz_error(&error) => { + saw_artifact_auth_error = true; + warn_and_record( + &mut audit, + "audit::artifacts", + format!( + "failed to list build artifacts: {:#}; using any local cache already present", + error + ), + ); + } + Err(error) => { + return Err(error).context(format!( + "failed to list artifacts for build {}", + parsed.build_id + )); + } + } + + if saw_artifact_auth_error && !has_any_local_artifacts(&run_dir).await { + anyhow::bail!( + "failed to download artifacts and no local cache. Use 'az pipelines runs artifact download --run-id {}' to fetch them manually, then re-run.", + parsed.build_id + ); + } + + match collect_downloaded_files(&run_dir, artifact_filters.as_deref()).await { + Ok(files) => audit.downloaded_files = files, + Err(error) => warn_and_record( + &mut audit, + "audit::artifacts", + format!("failed to enumerate downloaded files: {:#}", error), + ), + } + + if let Some(agent_outputs_dir) = find_artifact_dir(&run_dir, "agent_outputs").await { + let firewall_dir = agent_outputs_dir.join("logs").join("firewall"); + match firewall::analyze_firewall_logs(&firewall_dir).await { + Ok(result) => audit.firewall_analysis = result, + Err(error) => warn_and_record( + &mut audit, + "audit::firewall", + format!("firewall analysis failed: {:#}", error), + ), + } + match policy::analyze_policy(&firewall_dir).await { + Ok(result) => audit.policy_analysis = result, + Err(error) => warn_and_record( + &mut audit, + "audit::policy", + format!("policy analysis failed: {:#}", error), + ), + } + + let mcpg_dir = agent_outputs_dir.join("logs").join("mcpg"); + match mcp::analyze_mcp_tool_usage(&mcpg_dir).await { + Ok(result) => audit.mcp_tool_usage = result, + Err(error) => warn_and_record( + &mut audit, + "audit::mcp", + format!("MCP tool-usage analysis failed: {:#}", error), + ), + } + match mcp::analyze_mcp_server_health(&mcpg_dir).await { + Ok(result) => audit.mcp_server_health = result, + Err(error) => warn_and_record( + &mut audit, + "audit::mcp", + format!("MCP server-health analysis failed: {:#}", error), + ), + } + match mcp::extract_mcp_failures(&mcpg_dir).await { + Ok(result) => audit.mcp_failures = result, + Err(error) => warn_and_record( + &mut audit, + "audit::mcp", + format!("MCP failure extraction failed: {:#}", error), + ), + } + + match otel::analyze_otel(&agent_outputs_dir).await { + Ok(result) => { + audit.metrics = result.metrics; + audit.engine_config = result.engine_config; + audit.performance_metrics = result.performance; + audit.overview.aw_info = result.aw_info; + } + Err(error) => warn_and_record( + &mut audit, + "audit::otel", + format!("OTel analysis failed: {:#}", error), + ), + } + } + + match safe_outputs::analyze_safe_outputs(&run_dir).await { + Ok(result) => { + audit.safe_output_summary = result.summary; + audit.safe_output_execution = result.execution; + audit.rejected_safe_outputs = result.rollup; + audit.created_items = result.created_items; + audit.key_findings.extend(result.findings); + } + Err(error) => warn_and_record( + &mut audit, + "audit::safe_outputs", + format!("safe-output analysis failed: {:#}", error), + ), + } + + match detection::analyze_detection(&run_dir).await { + Ok(result) => audit.detection_analysis = result, + Err(error) => warn_and_record( + &mut audit, + "audit::detection", + format!("detection analysis failed: {:#}", error), + ), + } + + match missing::extract_missing_tools(&run_dir).await { + Ok(result) => audit.missing_tools = result, + Err(error) => warn_and_record( + &mut audit, + "audit::missing_tools", + format!("missing-tool extraction failed: {:#}", error), + ), + } + match missing::extract_missing_data(&run_dir).await { + Ok(result) => audit.missing_data = result, + Err(error) => warn_and_record( + &mut audit, + "audit::missing_data", + format!("missing-data extraction failed: {:#}", error), + ), + } + match missing::extract_noops(&run_dir).await { + Ok(result) => audit.noops = result, + Err(error) => warn_and_record( + &mut audit, + "audit::noops", + format!("noop extraction failed: {:#}", error), + ), + } + + match jobs::fetch_timeline(&client, &ctx, &auth, parsed.build_id).await { + Ok(timeline) => audit.jobs = jobs::timeline_to_jobs(&timeline), + Err(error) => warn_and_record( + &mut audit, + "audit::jobs", + format!("job timeline analysis failed: {:#}", error), + ), + } + + if let Some(firewall_analysis) = &audit.firewall_analysis { + let performance = audit.performance_metrics.get_or_insert_default(); + if performance.network_requests.is_none() { + performance.network_requests = Some(firewall_analysis.total_requests); + } + } + if let Some(mcp_tool_usage) = &audit.mcp_tool_usage + && let Some(tool) = mcp_tool_usage.tools.first() + { + let performance = audit.performance_metrics.get_or_insert_default(); + if performance.most_used_tool.is_none() && !tool.name.is_empty() { + performance.most_used_tool = Some(tool.name.clone()); + } + } + + audit.metrics.error_count = audit.errors.len() as u64; + audit.metrics.warning_count = audit.warnings.len() as u64; + findings::derive_findings(&mut audit); + + save_run_summary( + &run_dir, + &RunSummary { + ado_aw_version: env!("CARGO_PKG_VERSION").to_string(), + build_id: parsed.build_id, + processed_at: Utc::now(), + audit_data: audit.clone(), + }, + ) + .await?; + + render_audit(&audit, opts.json)?; + if !opts.json { + eprintln!("✓ Audit complete. Reports in {}", run_dir.display()); + } + Ok(()) +} + +async fn resolve_audit_context( + cwd: &Path, + org: Option<&str>, + project: Option<&str>, + parsed: &ParsedBuildRef, +) -> Result { + if parsed.org.is_some() && parsed.project.is_some() && parsed.host.is_some() { + let mut ctx = resolve_ado_context(cwd, org, project) + .await + .unwrap_or_else(|_| AdoContext { + org_url: String::new(), + project: String::new(), + repo_name: String::new(), + }); + apply_parsed_context_overrides(&mut ctx, parsed); + return Ok(ctx); + } + + resolve_ado_context(cwd, org, project).await +} + +fn apply_parsed_context_overrides(ctx: &mut AdoContext, parsed: &ParsedBuildRef) { + if let Some(org_url) = parsed_org_url(parsed) { + ctx.org_url = org_url; + } + if let Some(project) = &parsed.project { + ctx.project = project.clone(); + } +} + +fn parsed_org_url(parsed: &ParsedBuildRef) -> Option { + let org = parsed.org.as_deref()?; + let host = parsed.host.as_deref()?; + + if host.eq_ignore_ascii_case("dev.azure.com") { + Some(format!("https://{host}/{org}")) + } else if host.to_ascii_lowercase().ends_with(".visualstudio.com") { + Some(format!("https://{host}")) + } else { + Some(format!("https://{host}/{org}")) + } +} + +fn build_overview( + build: &serde_json::Value, + ctx: &AdoContext, + build_id: u64, + run_dir: &Path, +) -> OverviewData { + let started_at = string_field(build, &["startTime"]); + let finished_at = string_field(build, &["finishTime"]); + + OverviewData { + build_id: build + .get("id") + .and_then(serde_json::Value::as_u64) + .unwrap_or(build_id), + pipeline_name: build + .get("definition") + .and_then(|value| value.get("name")) + .and_then(serde_json::Value::as_str) + .unwrap_or_default() + .to_string(), + status: build + .get("status") + .and_then(serde_json::Value::as_str) + .unwrap_or_default() + .to_string(), + result: string_field(build, &["result"]), + created_at: string_field(build, &["queueTime", "createdDate", "creationTime"]), + started_at: started_at.clone(), + finished_at: finished_at.clone(), + duration: format_duration(started_at.as_deref(), finished_at.as_deref()), + source_branch: string_field(build, &["sourceBranch"]), + source_version: string_field(build, &["sourceVersion"]), + url: Some(build_audit_url(ctx, build_id)), + logs_path: Some(run_dir.display().to_string()), + aw_info: None, + } +} + +fn string_field(value: &serde_json::Value, keys: &[&str]) -> Option { + keys.iter() + .filter_map(|key| value.get(*key).and_then(serde_json::Value::as_str)) + .map(str::trim) + .find(|value| !value.is_empty()) + .map(ToOwned::to_owned) +} + +fn build_audit_url(ctx: &AdoContext, build_id: u64) -> String { + format!( + "{}/{}/_build/results?buildId={}", + ctx.org_url.trim_end_matches('/'), + percent_encoding::utf8_percent_encode(&ctx.project, PATH_SEGMENT), + build_id + ) +} + +fn format_duration(started_at: Option<&str>, finished_at: Option<&str>) -> Option { + let start = DateTime::parse_from_rfc3339(started_at?).ok()?; + let finish = DateTime::parse_from_rfc3339(finished_at?).ok()?; + let delta = finish.signed_duration_since(start); + if delta.num_seconds() < 0 { + return None; + } + Some(format!( + "{}m {}s", + delta.num_seconds() / 60, + delta.num_seconds() % 60 + )) +} + +fn normalize_artifact_filters(filters: Option<&[String]>) -> Result>> { + let Some(filters) = filters else { + return Ok(None); + }; + + let mut normalized = Vec::new(); + for filter in filters { + let filter = filter.trim().to_ascii_lowercase(); + let canonical = match filter.as_str() { + "agent" => "agent", + "detection" => "detection", + "safe-outputs" | "safe_outputs" => "safe-outputs", + _ => anyhow::bail!( + "Invalid --artifacts value '{}'. Valid values: agent, detection, safe-outputs.", + filter + ), + }; + if !normalized.iter().any(|existing| existing == canonical) { + normalized.push(canonical.to_string()); + } + } + + Ok(Some(normalized)) +} + +fn artifact_matches_selected(name: &str, filters: Option<&[String]>) -> bool { + let Some(filters) = filters else { + return artifact_name_to_prefix(name).is_some(); + }; + let Some(prefix) = artifact_name_to_prefix(name) else { + return false; + }; + filters.iter().any(|filter| match filter.as_str() { + "agent" => prefix == "agent_outputs", + "detection" => prefix == "analyzed_outputs", + "safe-outputs" => prefix == "safe_outputs", + _ => false, + }) +} + +fn artifact_name_to_prefix(name: &str) -> Option<&'static str> { + if name == "agent_outputs" || name.starts_with("agent_outputs_") { + Some("agent_outputs") + } else if name == "analyzed_outputs" || name.starts_with("analyzed_outputs_") { + Some("analyzed_outputs") + } else if name == "safe_outputs" || name.starts_with("safe_outputs_") { + Some("safe_outputs") + } else { + None + } +} + +async fn download_artifact_preserving_cache( + client: &reqwest::Client, + auth: &crate::ado::AdoAuth, + artifact: &crate::ado::BuildArtifact, + run_dir: &Path, +) -> Result<()> { + let artifact_dir = run_dir.join(&artifact.name); + let backup_dir = run_dir.join(format!("{}.cached", artifact.name)); + let had_existing = tokio::fs::metadata(&artifact_dir).await.is_ok(); + + if tokio::fs::metadata(&backup_dir).await.is_ok() { + let _ = tokio::fs::remove_dir_all(&backup_dir).await; + } + if had_existing { + tokio::fs::rename(&artifact_dir, &backup_dir) + .await + .with_context(|| { + format!( + "backup existing artifact directory {} before redownload", + artifact_dir.display() + ) + })?; + } + + match download_build_artifact(client, auth, artifact, run_dir).await { + Ok(()) => { + if had_existing { + let _ = tokio::fs::remove_dir_all(&backup_dir).await; + } + Ok(()) + } + Err(error) => { + if tokio::fs::metadata(&artifact_dir).await.is_ok() { + let _ = tokio::fs::remove_dir_all(&artifact_dir).await; + } + if had_existing { + tokio::fs::rename(&backup_dir, &artifact_dir) + .await + .with_context(|| { + format!( + "restore cached artifact directory {} after failed download", + artifact_dir.display() + ) + })?; + } + Err(error) + } + } +} + +async fn has_any_local_artifacts(run_dir: &Path) -> bool { + for prefix in ["agent_outputs", "analyzed_outputs", "safe_outputs"] { + if find_artifact_dir(run_dir, prefix).await.is_some() { + return true; + } + } + false +} + +async fn collect_downloaded_files( + run_dir: &Path, + filters: Option<&[String]>, +) -> Result> { + let mut files = Vec::new(); + for prefix in selected_prefixes(filters) { + if let Some(artifact_dir) = find_artifact_dir(run_dir, prefix).await { + files.extend(collect_files_under(run_dir, &artifact_dir).await?); + } + } + files.sort_by(|left, right| left.path.cmp(&right.path)); + Ok(files) +} + +fn selected_prefixes(filters: Option<&[String]>) -> Vec<&'static str> { + match filters { + Some(filters) => { + let mut prefixes = Vec::new(); + for filter in filters { + let prefix = match filter.as_str() { + "agent" => "agent_outputs", + "detection" => "analyzed_outputs", + "safe-outputs" => "safe_outputs", + _ => continue, + }; + if !prefixes.contains(&prefix) { + prefixes.push(prefix); + } + } + prefixes + } + None => vec!["agent_outputs", "analyzed_outputs", "safe_outputs"], + } +} + +async fn collect_files_under(run_dir: &Path, start_dir: &Path) -> Result> { + let mut files = Vec::new(); + let mut stack = vec![start_dir.to_path_buf()]; + + while let Some(dir) = stack.pop() { + let mut entries = tokio::fs::read_dir(&dir) + .await + .with_context(|| format!("read artifact directory {}", dir.display()))?; + while let Some(entry) = entries + .next_entry() + .await + .with_context(|| format!("iterate artifact directory {}", dir.display()))? + { + let path = entry.path(); + let file_type = entry + .file_type() + .await + .with_context(|| format!("inspect artifact path {}", path.display()))?; + if file_type.is_dir() { + stack.push(path); + continue; + } + if !file_type.is_file() { + continue; + } + + let metadata = entry + .metadata() + .await + .with_context(|| format!("stat artifact file {}", path.display()))?; + let relative = path + .strip_prefix(run_dir) + .unwrap_or(path.as_path()) + .display() + .to_string(); + files.push(FileInfo { + path: relative, + size_bytes: metadata.len(), + sha256: None, + }); + } + } + + Ok(files) +} + +async fn find_artifact_dir(run_dir: &Path, prefix: &str) -> Option { + let mut entries = tokio::fs::read_dir(run_dir).await.ok()?; + let mut hits = Vec::new(); + while let Ok(Some(entry)) = entries.next_entry().await { + if entry.file_type().await.map(|t| t.is_dir()).unwrap_or(false) + && let Some(name) = entry.file_name().to_str() + && (name == prefix || name.starts_with(&format!("{}_", prefix))) + { + hits.push(entry.path()); + } + } + hits.sort(); + hits.pop() +} + +fn is_authz_error(error: &anyhow::Error) -> bool { + let message = error.to_string().to_ascii_lowercase(); + message.contains("ado api returned 401") || message.contains("ado api returned 403") +} + +fn warn_and_record(audit: &mut AuditData, source: &str, message: String) { + eprintln!("warning: {message}"); + audit.warnings.push(ErrorInfo { + source: source.to_string(), + message, + timestamp: None, + }); +} + +fn render_audit(audit: &AuditData, json: bool) -> Result<()> { + if json { + let mut stdout = io::stdout().lock(); + render::json::render_json(audit, &mut stdout)?; + } else { + print!("{}", render::console::render_console(audit)); + } + Ok(()) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn url_context_overrides_flag_org() { + let parsed = + parse_build_ref("https://dev.azure.com/url-org/My%20Project/_build/results?buildId=42") + .expect("parse build ref"); + let mut ctx = AdoContext { + org_url: String::from("https://dev.azure.com/flag-org"), + project: String::from("FlagProject"), + repo_name: String::from("repo"), + }; + + apply_parsed_context_overrides(&mut ctx, &parsed); + + assert_eq!(ctx.org_url, "https://dev.azure.com/url-org"); + assert_eq!(ctx.project, "My Project"); + } + + #[tokio::test] + async fn find_artifact_dir_picks_lexicographically_last_match() { + let temp_dir = tempfile::tempdir().expect("tempdir"); + tokio::fs::create_dir_all(temp_dir.path().join("agent_outputs_001")) + .await + .expect("create first dir"); + tokio::fs::create_dir_all(temp_dir.path().join("agent_outputs_999")) + .await + .expect("create second dir"); + tokio::fs::create_dir_all(temp_dir.path().join("safe_outputs")) + .await + .expect("create safe outputs dir"); + + let found = find_artifact_dir(temp_dir.path(), "agent_outputs") + .await + .expect("find artifact dir"); + + assert_eq!( + found.file_name().and_then(|name| name.to_str()), + Some("agent_outputs_999") + ); + } + + #[test] + fn artifact_filter_mapping_matches_expected_sets() { + let filters = vec![ + String::from("agent"), + String::from("detection"), + String::from("safe-outputs"), + ]; + let normalized = normalize_artifact_filters(Some(&filters)).expect("normalize filters"); + let normalized = normalized.as_deref(); + + assert!(artifact_matches_selected("agent_outputs_42", normalized)); + assert!(artifact_matches_selected("analyzed_outputs_42", normalized)); + assert!(artifact_matches_selected("safe_outputs", normalized)); + + let agent_only = vec![String::from("agent")]; + let agent_only = + normalize_artifact_filters(Some(&agent_only)).expect("normalize agent filter"); + let agent_only = agent_only.as_deref(); + assert!(artifact_matches_selected("agent_outputs_42", agent_only)); + assert!(!artifact_matches_selected( + "analyzed_outputs_42", + agent_only + )); + assert!(!artifact_matches_selected("safe_outputs", agent_only)); + } +} diff --git a/src/audit/findings.rs b/src/audit/findings.rs new file mode 100644 index 00000000..f47e99c2 --- /dev/null +++ b/src/audit/findings.rs @@ -0,0 +1,814 @@ +use std::collections::BTreeMap; + +use crate::audit::model::{AuditData, Finding, Recommendation, Severity}; + +/// Aggregate findings + recommendations from every populated section +/// of `AuditData`. Pure function; does not mutate the input. +/// +/// Findings already emitted by individual analyzers (e.g. the +/// detection-gate Finding produced by `analyzers::safe_outputs`) +/// are PRESERVED — this function appends to whatever is already in +/// `audit.key_findings` / `audit.recommendations`, never replaces. +pub fn derive_findings(audit: &mut AuditData) { + let mut findings = audit.key_findings.clone(); + let mut recommendations = audit.recommendations.clone(); + + add_elevated_mcp_error_rate(audit, &mut findings, &mut recommendations); + add_denied_network_domains(audit, &mut findings, &mut recommendations); + add_high_token_usage(audit, &mut findings, &mut recommendations); + add_repeated_noops(audit, &mut findings, &mut recommendations); + add_missing_tools_cluster(audit, &mut findings, &mut recommendations); + add_missing_data_cluster(audit, &mut findings, &mut recommendations); + add_no_safe_outputs_proposed(audit, &mut findings, &mut recommendations); + add_error_count_findings(audit, &mut findings, &mut recommendations); + + audit.key_findings = findings; + audit.recommendations = recommendations; +} + +fn add_elevated_mcp_error_rate( + audit: &AuditData, + findings: &mut Vec, + recommendations: &mut Vec, +) { + let Some(health) = &audit.mcp_server_health else { + return; + }; + + for server in &health.servers { + if !server.unreliable { + continue; + } + + let finding = Finding { + category: String::from("mcp"), + severity: Severity::High, + title: format!("MCP server '{}' is unreliable", server.name), + description: format!( + "{} has an error rate of {:.1}% across {} calls (threshold: 10% with ≥5 calls).", + server.name, + server.error_rate * 100.0, + server.total_calls + ), + impact: Some(format!( + "Agent calls to {} may fail intermittently; consider retry or fallback strategy.", + server.name + )), + }; + push_finding(findings, finding); + + let recommendation = Recommendation { + priority: String::from("high"), + action: format!("Inspect MCPG logs for {}", server.name), + reason: String::from("Recurring errors degrade agent reliability."), + example: None, + }; + push_recommendation(recommendations, recommendation); + } +} + +fn add_denied_network_domains( + audit: &AuditData, + findings: &mut Vec, + recommendations: &mut Vec, +) { + let Some(firewall) = &audit.firewall_analysis else { + return; + }; + if firewall.denied_count == 0 { + return; + } + + let mut denied_domains = BTreeMap::::new(); + for domain in &firewall.domains { + let domain_name = domain.domain.trim(); + if domain_name.is_empty() { + continue; + } + + let status = domain.status.trim().to_ascii_lowercase(); + if status == "allowed" { + continue; + } + + *denied_domains.entry(domain_name.to_string()).or_default() += domain.request_count; + } + + if denied_domains.is_empty() { + return; + } + + let mut denied_domains = denied_domains.into_iter().collect::>(); + denied_domains.sort_by(|(left_domain, left_count), (right_domain, right_count)| { + right_count + .cmp(left_count) + .then_with(|| left_domain.cmp(right_domain)) + }); + + let additional_domains = denied_domains.len().saturating_sub(5); + for (index, (domain, request_count)) in denied_domains.into_iter().take(5).enumerate() { + let finding = Finding { + category: String::from("network"), + severity: if index == 0 { + Severity::High + } else { + Severity::Medium + }, + title: format!("Network access denied: {domain}"), + description: format!("AWF firewall denied {request_count} request(s) to {domain}."), + impact: None, + }; + push_finding(findings, finding); + + let recommendation = Recommendation { + priority: String::from("medium"), + action: format!("Add '{domain}' to network.allowed if access is legitimate."), + reason: String::from( + "Denied requests suggest the workflow may need an expanded allowlist.", + ), + example: None, + }; + push_recommendation(recommendations, recommendation); + } + + if additional_domains > 0 { + push_finding( + findings, + Finding { + category: String::from("network"), + severity: Severity::Medium, + title: format!("{additional_domains} additional domains were denied"), + description: format!( + "AWF firewall denied requests to {additional_domains} additional domain(s) beyond the top 5 offenders." + ), + impact: None, + }, + ); + } +} + +fn add_high_token_usage( + audit: &AuditData, + findings: &mut Vec, + recommendations: &mut Vec, +) { + if audit.metrics.token_usage <= 100_000 { + return; + } + + let finding = Finding { + category: String::from("cost"), + severity: Severity::Medium, + title: format!("High token usage: {} tokens", audit.metrics.token_usage), + description: format!( + "Run consumed {} tokens across {} turn(s). Consider reducing context size or pruning history.", + audit.metrics.token_usage, audit.metrics.turns + ), + impact: None, + }; + push_finding(findings, finding); + + let recommendation = Recommendation { + priority: String::from("medium"), + action: String::from("Review the agent prompt and tool usage for unnecessary context."), + reason: String::from("Large prompts and long histories increase cost and latency."), + example: None, + }; + push_recommendation(recommendations, recommendation); +} + +fn add_repeated_noops( + audit: &AuditData, + findings: &mut Vec, + recommendations: &mut Vec, +) { + let noop_count = audit.noops.len(); + if noop_count < 3 { + return; + } + + let finding = Finding { + category: String::from("agent_behavior"), + severity: Severity::Low, + title: format!("Agent reported {} noop outcomes", noop_count), + description: format!( + "The agent declined to act {} time(s) without proposing safe outputs. Frequent noops may indicate prompt or tool gaps.", + noop_count + ), + impact: None, + }; + push_finding(findings, finding); + + let recommendation = Recommendation { + priority: String::from("low"), + action: String::from( + "Review noop reasons to determine whether tools or prompt need adjustment.", + ), + reason: String::from( + "Repeated noop outcomes often signal unclear instructions or missing capabilities.", + ), + example: None, + }; + push_recommendation(recommendations, recommendation); +} + +fn add_missing_tools_cluster( + audit: &AuditData, + findings: &mut Vec, + recommendations: &mut Vec, +) { + if audit.missing_tools.len() < 2 { + return; + } + + let mut grouped = BTreeMap::>::new(); + for report in &audit.missing_tools { + let Some(tool) = report.tool.as_deref() else { + continue; + }; + let tool = tool.trim(); + if tool.is_empty() { + continue; + } + + let reasons = grouped.entry(tool.to_string()).or_default(); + if let Some(reason) = report.reason.as_deref() { + let reason = reason.trim(); + if !reason.is_empty() { + reasons.push(reason.to_string()); + } + } + } + + for (tool, reasons) in grouped { + let occurrence_count = audit + .missing_tools + .iter() + .filter(|report| report.tool.as_deref().map(str::trim) == Some(tool.as_str())) + .count(); + if occurrence_count < 2 { + continue; + } + + let reasons_summary = if reasons.is_empty() { + String::from("none provided") + } else { + reasons.into_iter().take(3).collect::>().join("; ") + }; + + let finding = Finding { + category: String::from("tooling"), + severity: Severity::Medium, + title: format!("Tool '{tool}' marked missing {occurrence_count} times"), + description: format!( + "The agent attempted to use '{tool}' but it was not available. Reasons reported: {reasons_summary}." + ), + impact: None, + }; + push_finding(findings, finding); + + let recommendation = Recommendation { + priority: String::from("medium"), + action: format!( + "Add '{tool}' to the agent's tools: section if the use case is legitimate." + ), + reason: String::from( + "Repeated missing-tool reports suggest a tooling configuration gap.", + ), + example: None, + }; + push_recommendation(recommendations, recommendation); + } +} + +fn add_missing_data_cluster( + audit: &AuditData, + findings: &mut Vec, + _recommendations: &mut Vec, +) { + let missing_data_count = audit.missing_data.len(); + if missing_data_count < 3 { + return; + } + + push_finding( + findings, + Finding { + category: String::from("inputs"), + severity: Severity::Low, + title: format!("{missing_data_count} missing-data reports"), + description: format!( + "The agent reported missing data {missing_data_count} time(s). This often means an input file, work-item, or prior context was unavailable." + ), + impact: None, + }, + ); +} + +fn add_no_safe_outputs_proposed( + audit: &AuditData, + findings: &mut Vec, + recommendations: &mut Vec, +) { + let Some(summary) = &audit.safe_output_summary else { + return; + }; + if summary.proposed_count != 0 || !audit.noops.is_empty() { + return; + } + + let finding = Finding { + category: String::from("runtime"), + severity: Severity::Info, + title: String::from("Agent produced no safe outputs and no noop record"), + description: String::from( + "The agent completed without proposing any safe outputs and without using the noop tool. This may indicate an early exit, an unhandled error, or a misconfigured agent.", + ), + impact: None, + }; + push_finding(findings, finding); + + let recommendation = Recommendation { + priority: String::from("medium"), + action: String::from("Check the agent stdout (agent-output.txt) for early-exit signals."), + reason: String::from( + "Without outputs or an explicit noop, the raw agent log is the fastest way to explain the silent run.", + ), + example: None, + }; + push_recommendation(recommendations, recommendation); +} + +fn add_error_count_findings( + audit: &AuditData, + findings: &mut Vec, + _recommendations: &mut Vec, +) { + if audit.metrics.error_count == 0 { + return; + } + + push_finding( + findings, + Finding { + category: String::from("runtime"), + severity: Severity::Medium, + title: format!("{} error(s) detected", audit.metrics.error_count), + description: format!( + "Audit detected {} error event(s) in the run logs.", + audit.metrics.error_count + ), + impact: None, + }, + ); +} + +fn push_finding(findings: &mut Vec, finding: Finding) { + if !findings.contains(&finding) { + findings.push(finding); + } +} + +fn push_recommendation(recommendations: &mut Vec, recommendation: Recommendation) { + if !recommendations.contains(&recommendation) { + recommendations.push(recommendation); + } +} + +#[cfg(test)] +mod tests { + use super::derive_findings; + use crate::audit::model::{ + AuditData, DomainStat, Finding, FirewallAnalysis, MCPServerHealth, MCPServerStats, + MetricsData, MissingDataReport, MissingToolReport, NoopReport, Recommendation, + SafeOutputSummary, Severity, + }; + + fn finding_by_title<'a>(audit: &'a AuditData, title: &str) -> &'a Finding { + audit + .key_findings + .iter() + .find(|finding| finding.title == title) + .unwrap_or_else(|| panic!("missing finding: {title}")) + } + + fn recommendation_by_action<'a>(audit: &'a AuditData, action: &str) -> &'a Recommendation { + audit + .recommendations + .iter() + .find(|recommendation| recommendation.action == action) + .unwrap_or_else(|| panic!("missing recommendation: {action}")) + } + + #[test] + fn empty_audit_adds_no_findings_or_recommendations() { + let mut audit = AuditData::default(); + + derive_findings(&mut audit); + + assert!(audit.key_findings.is_empty()); + assert!(audit.recommendations.is_empty()); + } + + #[test] + fn detection_finding_is_preserved_without_duplication() { + let detection_finding = Finding { + category: String::from("safe_outputs"), + severity: Severity::High, + title: String::from("Detection rejected 2 safe output(s)"), + description: String::from("The threat-analysis verdict dropped the batch."), + impact: Some(String::from("No items were created.")), + }; + let mut audit = AuditData { + key_findings: vec![detection_finding.clone()], + ..Default::default() + }; + + derive_findings(&mut audit); + + assert!(audit.key_findings.contains(&detection_finding)); + assert_eq!( + audit + .key_findings + .iter() + .filter(|finding| finding.title == detection_finding.title) + .count(), + 1 + ); + } + + #[test] + fn elevated_mcp_error_rate_rule_emits_finding_and_recommendation() { + let mut audit = AuditData { + mcp_server_health: Some(MCPServerHealth { + servers: vec![MCPServerStats { + name: String::from("github"), + total_calls: 10, + error_count: 2, + error_rate: 0.2, + unreliable: true, + }], + }), + ..Default::default() + }; + + derive_findings(&mut audit); + + let finding = finding_by_title(&audit, "MCP server 'github' is unreliable"); + assert_eq!(finding.severity, Severity::High); + assert_eq!( + finding.description, + "github has an error rate of 20.0% across 10 calls (threshold: 10% with ≥5 calls)." + ); + assert_eq!( + finding.impact.as_deref(), + Some( + "Agent calls to github may fail intermittently; consider retry or fallback strategy." + ) + ); + + let recommendation = recommendation_by_action(&audit, "Inspect MCPG logs for github"); + assert_eq!(recommendation.priority, "high"); + assert_eq!( + recommendation.reason, + "Recurring errors degrade agent reliability." + ); + } + + #[test] + fn denied_network_domains_rule_emits_finding_and_recommendation() { + let mut audit = AuditData { + firewall_analysis: Some(FirewallAnalysis { + domains: vec![DomainStat { + domain: String::from("example.com"), + status: String::from("denied"), + request_count: 3, + ..Default::default() + }], + denied_count: 3, + ..Default::default() + }), + ..Default::default() + }; + + derive_findings(&mut audit); + + let finding = finding_by_title(&audit, "Network access denied: example.com"); + assert_eq!(finding.severity, Severity::High); + assert_eq!( + finding.description, + "AWF firewall denied 3 request(s) to example.com." + ); + + let recommendation = recommendation_by_action( + &audit, + "Add 'example.com' to network.allowed if access is legitimate.", + ); + assert_eq!(recommendation.priority, "medium"); + } + + #[test] + fn high_token_usage_rule_emits_finding_and_recommendation() { + let mut audit = AuditData { + metrics: MetricsData { + token_usage: 100_001, + turns: 7, + ..Default::default() + }, + ..Default::default() + }; + + derive_findings(&mut audit); + + let finding = finding_by_title(&audit, "High token usage: 100001 tokens"); + assert_eq!(finding.severity, Severity::Medium); + assert_eq!( + finding.description, + "Run consumed 100001 tokens across 7 turn(s). Consider reducing context size or pruning history." + ); + + let recommendation = recommendation_by_action( + &audit, + "Review the agent prompt and tool usage for unnecessary context.", + ); + assert_eq!(recommendation.priority, "medium"); + } + + #[test] + fn repeated_noops_rule_emits_finding_and_recommendation() { + let mut audit = AuditData { + noops: vec![ + NoopReport::default(), + NoopReport::default(), + NoopReport::default(), + ], + ..Default::default() + }; + + derive_findings(&mut audit); + + let finding = finding_by_title(&audit, "Agent reported 3 noop outcomes"); + assert_eq!(finding.severity, Severity::Low); + assert_eq!( + finding.description, + "The agent declined to act 3 time(s) without proposing safe outputs. Frequent noops may indicate prompt or tool gaps." + ); + + let recommendation = recommendation_by_action( + &audit, + "Review noop reasons to determine whether tools or prompt need adjustment.", + ); + assert_eq!(recommendation.priority, "low"); + } + + #[test] + fn missing_tools_cluster_rule_emits_finding_and_recommendation() { + let mut audit = AuditData { + missing_tools: vec![ + MissingToolReport { + tool: Some(String::from("azure-devops")), + reason: Some(String::from("tool not configured")), + ..Default::default() + }, + MissingToolReport { + tool: Some(String::from("azure-devops")), + reason: Some(String::from("tool omitted from workflow")), + ..Default::default() + }, + ], + ..Default::default() + }; + + derive_findings(&mut audit); + + let finding = finding_by_title(&audit, "Tool 'azure-devops' marked missing 2 times"); + assert_eq!(finding.severity, Severity::Medium); + assert_eq!( + finding.description, + "The agent attempted to use 'azure-devops' but it was not available. Reasons reported: tool not configured; tool omitted from workflow." + ); + + let recommendation = recommendation_by_action( + &audit, + "Add 'azure-devops' to the agent's tools: section if the use case is legitimate.", + ); + assert_eq!(recommendation.priority, "medium"); + } + + #[test] + fn missing_data_cluster_rule_emits_finding_only() { + let mut audit = AuditData { + missing_data: vec![ + MissingDataReport::default(), + MissingDataReport::default(), + MissingDataReport::default(), + ], + ..Default::default() + }; + + derive_findings(&mut audit); + + let finding = finding_by_title(&audit, "3 missing-data reports"); + assert_eq!(finding.severity, Severity::Low); + assert_eq!( + finding.description, + "The agent reported missing data 3 time(s). This often means an input file, work-item, or prior context was unavailable." + ); + assert!(audit.recommendations.is_empty()); + } + + #[test] + fn no_safe_outputs_proposed_rule_emits_finding_and_recommendation() { + let mut audit = AuditData { + safe_output_summary: Some(SafeOutputSummary::default()), + ..Default::default() + }; + + derive_findings(&mut audit); + + let finding = finding_by_title(&audit, "Agent produced no safe outputs and no noop record"); + assert_eq!(finding.severity, Severity::Info); + assert_eq!( + finding.description, + "The agent completed without proposing any safe outputs and without using the noop tool. This may indicate an early exit, an unhandled error, or a misconfigured agent." + ); + + let recommendation = recommendation_by_action( + &audit, + "Check the agent stdout (agent-output.txt) for early-exit signals.", + ); + assert_eq!(recommendation.priority, "medium"); + } + + #[test] + fn error_count_rule_emits_finding_only() { + let mut audit = AuditData { + metrics: MetricsData { + error_count: 2, + ..Default::default() + }, + ..Default::default() + }; + + derive_findings(&mut audit); + + let finding = finding_by_title(&audit, "2 error(s) detected"); + assert_eq!(finding.severity, Severity::Medium); + assert_eq!( + finding.description, + "Audit detected 2 error event(s) in the run logs." + ); + assert!(audit.recommendations.is_empty()); + } + + #[test] + fn combined_findings_are_appended_and_preserved_across_passes() { + let mut audit = AuditData { + mcp_server_health: Some(MCPServerHealth { + servers: vec![MCPServerStats { + name: String::from("cache-memory"), + total_calls: 8, + error_count: 2, + error_rate: 0.25, + unreliable: true, + }], + }), + firewall_analysis: Some(FirewallAnalysis { + domains: vec![ + DomainStat { + domain: String::from("packages.example.com"), + status: String::from("denied"), + request_count: 6, + ..Default::default() + }, + DomainStat { + domain: String::from("artifacts.example.com"), + status: String::from("mixed"), + request_count: 4, + ..Default::default() + }, + ], + denied_count: 10, + ..Default::default() + }), + safe_output_summary: Some(SafeOutputSummary::default()), + ..Default::default() + }; + + derive_findings(&mut audit); + audit.noops = vec![ + NoopReport::default(), + NoopReport::default(), + NoopReport::default(), + ]; + derive_findings(&mut audit); + + assert!(audit.key_findings.len() >= 5); + assert!( + audit + .key_findings + .iter() + .any(|finding| finding.title == "MCP server 'cache-memory' is unreliable") + ); + assert!( + audit + .key_findings + .iter() + .any(|finding| finding.title == "Network access denied: packages.example.com") + ); + assert!( + audit + .key_findings + .iter() + .any(|finding| finding.title == "Network access denied: artifacts.example.com") + ); + assert!(audit + .key_findings + .iter() + .any(|finding| finding.title == "Agent produced no safe outputs and no noop record")); + assert!( + audit + .key_findings + .iter() + .any(|finding| finding.title == "Agent reported 3 noop outcomes") + ); + } + + #[test] + fn denied_domains_rule_caps_detailed_findings_and_adds_rollup() { + let mut audit = AuditData { + firewall_analysis: Some(FirewallAnalysis { + domains: vec![ + DomainStat { + domain: String::from("d1.example.com"), + status: String::from("denied"), + request_count: 7, + ..Default::default() + }, + DomainStat { + domain: String::from("d2.example.com"), + status: String::from("denied"), + request_count: 6, + ..Default::default() + }, + DomainStat { + domain: String::from("d3.example.com"), + status: String::from("denied"), + request_count: 5, + ..Default::default() + }, + DomainStat { + domain: String::from("d4.example.com"), + status: String::from("denied"), + request_count: 4, + ..Default::default() + }, + DomainStat { + domain: String::from("d5.example.com"), + status: String::from("denied"), + request_count: 3, + ..Default::default() + }, + DomainStat { + domain: String::from("d6.example.com"), + status: String::from("denied"), + request_count: 2, + ..Default::default() + }, + DomainStat { + domain: String::from("d7.example.com"), + status: String::from("denied"), + request_count: 1, + ..Default::default() + }, + ], + denied_count: 28, + ..Default::default() + }), + ..Default::default() + }; + + derive_findings(&mut audit); + + assert_eq!(audit.key_findings.len(), 6); + assert!( + audit + .key_findings + .iter() + .any(|finding| finding.title == "Network access denied: d1.example.com") + ); + assert!( + audit + .key_findings + .iter() + .any(|finding| finding.title == "Network access denied: d5.example.com") + ); + assert!( + audit + .key_findings + .iter() + .any(|finding| finding.title == "2 additional domains were denied") + ); + assert_eq!(audit.recommendations.len(), 5); + } +} diff --git a/src/audit/mod.rs b/src/audit/mod.rs new file mode 100644 index 00000000..8ac1d045 --- /dev/null +++ b/src/audit/mod.rs @@ -0,0 +1,15 @@ +/// Shared audit data types for `ado-aw audit`. +/// +/// This module defines the public report model that analyzers populate and renderers +/// consume for single-build Azure DevOps audit output. +pub mod analyzers; +pub mod cache; +pub mod cli; +pub mod findings; +pub mod model; +pub mod render; +pub mod url; + +pub use cli::{AuditOptions, dispatch}; +#[allow(unused_imports)] +pub use model::*; diff --git a/src/audit/model.rs b/src/audit/model.rs new file mode 100644 index 00000000..2e7068be --- /dev/null +++ b/src/audit/model.rs @@ -0,0 +1,1066 @@ +#![allow(dead_code)] + +use serde::{Deserialize, Serialize}; +use serde_json::Value; +use std::collections::BTreeMap; + +fn is_zero_u64(value: &u64) -> bool { + *value == 0 +} + +fn is_zero_f64(value: &f64) -> bool { + *value == 0.0 +} + +/// Top-level audit report for a single Azure DevOps build. +/// +/// This model is populated from build metadata plus downloaded stage artifacts such as +/// `agent_outputs_`, `analyzed_outputs_`, and `safe_outputs`. +#[derive(Debug, Clone, Default, PartialEq, Serialize, Deserialize)] +#[serde(default)] +pub struct AuditData { + /// High-level build and pipeline metadata resolved from Azure DevOps APIs and staged metadata files. + pub overview: OverviewData, + /// Task-domain classification derived from audit heuristics over the run's prompts and outputs. + #[serde(skip_serializing_if = "Option::is_none")] + pub task_domain: Option, + /// Behavior fingerprint information derived from analyzer heuristics over the run. + #[serde(skip_serializing_if = "Option::is_none")] + pub behavior_fingerprint: Option, + /// Agentic assessments emitted by higher-level audit heuristics. + #[serde(skip_serializing_if = "Vec::is_empty", default)] + pub agentic_assessments: Vec, + /// Aggregate numeric metrics derived from OTel and audit processing. + pub metrics: MetricsData, + /// Important findings synthesized from analyzer output. + #[serde(skip_serializing_if = "Vec::is_empty", default)] + pub key_findings: Vec, + /// Recommended next actions derived from the audit findings. + #[serde(skip_serializing_if = "Vec::is_empty", default)] + pub recommendations: Vec, + /// Optional derived performance metrics computed from token, cost, and tool usage data. + #[serde(skip_serializing_if = "Option::is_none")] + pub performance_metrics: Option, + /// Engine configuration captured from compiled metadata and runtime emission. + #[serde(skip_serializing_if = "Option::is_none")] + pub engine_config: Option, + /// Rollup of proposed, executed, and dropped safe outputs for the build. + #[serde(skip_serializing_if = "Option::is_none")] + pub safe_output_summary: Option, + /// Per-item safe-output execution outcomes emitted by the ADO SafeOutputs stage. + #[serde(skip_serializing_if = "Option::is_none")] + pub safe_output_execution: Option, + /// Aggregate rollup of safe outputs rejected before or during execution. + #[serde(skip_serializing_if = "Option::is_none")] + pub rejected_safe_outputs: Option, + /// Threat-detection verdict information from `analyzed_outputs_`. + #[serde(skip_serializing_if = "Option::is_none")] + pub detection_analysis: Option, + /// MCP server reliability and call health derived from gateway logs. + #[serde(skip_serializing_if = "Option::is_none")] + pub mcp_server_health: Option, + /// Job-level status data derived from the Azure DevOps build timeline. + #[serde(skip_serializing_if = "Vec::is_empty", default)] + pub jobs: Vec, + /// Files downloaded while assembling the audit input set. + #[serde(default)] + pub downloaded_files: Vec, + /// Missing-tool reports captured from safe-output or MCP artifacts. + #[serde(skip_serializing_if = "Vec::is_empty", default)] + pub missing_tools: Vec, + /// Missing-data reports captured from safe-output or MCP artifacts. + #[serde(skip_serializing_if = "Vec::is_empty", default)] + pub missing_data: Vec, + /// No-op reports emitted by runtime tools during the build. + #[serde(skip_serializing_if = "Vec::is_empty", default)] + pub noops: Vec, + /// MCP failure reports derived from gateway or tool execution artifacts. + #[serde(skip_serializing_if = "Vec::is_empty", default)] + pub mcp_failures: Vec, + /// Firewall-domain analysis derived from AWF firewall logs. + #[serde(skip_serializing_if = "Option::is_none")] + pub firewall_analysis: Option, + /// Policy-rule analysis derived from AWF policy artifacts. + #[serde(skip_serializing_if = "Option::is_none")] + pub policy_analysis: Option, + /// Non-fatal or fatal errors encountered while auditing or discovered in artifacts. + #[serde(skip_serializing_if = "Vec::is_empty", default)] + pub errors: Vec, + /// Warning rows surfaced during audit processing. + #[serde(skip_serializing_if = "Vec::is_empty", default)] + pub warnings: Vec, + /// High-level tool usage rollups derived from runtime telemetry. + #[serde(skip_serializing_if = "Vec::is_empty", default)] + pub tool_usage: Vec, + /// MCP-specific tool usage rollups derived from MCP gateway logs. + #[serde(skip_serializing_if = "Option::is_none")] + pub mcp_tool_usage: Option, + /// Created external items reported by successful safe-output execution. + #[serde(skip_serializing_if = "Vec::is_empty", default)] + pub created_items: Vec, +} + +/// Overview metadata for the audited build. +/// +/// This is sourced from Azure DevOps build APIs, timeline data, and `staging/aw_info.json`. +#[derive(Debug, Clone, Default, PartialEq, Serialize, Deserialize)] +#[serde(default)] +pub struct OverviewData { + /// Azure DevOps build identifier. + #[serde(default, skip_serializing_if = "is_zero_u64")] + pub build_id: u64, + /// Azure DevOps pipeline definition name. + #[serde(default, skip_serializing_if = "String::is_empty")] + pub pipeline_name: String, + /// Build lifecycle status such as `completed` or `inProgress`. + #[serde(default, skip_serializing_if = "String::is_empty")] + pub status: String, + /// Final build result using Azure DevOps terminology. + #[serde(skip_serializing_if = "Option::is_none")] + pub result: Option, + /// Build creation timestamp from the Azure DevOps build record. + #[serde(skip_serializing_if = "Option::is_none")] + pub created_at: Option, + /// Build start timestamp from the Azure DevOps build record or timeline. + #[serde(skip_serializing_if = "Option::is_none")] + pub started_at: Option, + /// Build completion timestamp from the Azure DevOps build record or timeline. + #[serde(skip_serializing_if = "Option::is_none")] + pub finished_at: Option, + /// Human-readable build duration derived from build timestamps. + #[serde(skip_serializing_if = "Option::is_none")] + pub duration: Option, + /// Source branch recorded for the audited run. + #[serde(skip_serializing_if = "Option::is_none")] + pub source_branch: Option, + /// Source commit or version recorded for the audited run. + #[serde(skip_serializing_if = "Option::is_none")] + pub source_version: Option, + /// Human-facing URL for the audited build. + #[serde(skip_serializing_if = "Option::is_none")] + pub url: Option, + /// Local path where build logs or downloaded artifacts were stored. + #[serde(skip_serializing_if = "Option::is_none")] + pub logs_path: Option, + /// Runtime-emitted AW metadata from `staging/aw_info.json`. + #[serde(skip_serializing_if = "Option::is_none")] + pub aw_info: Option, +} + +/// Runtime-emitted agentic workflow metadata. +/// +/// This is read from `staging/aw_info.json`, which mirrors the compiled marker metadata plus runtime context. +#[derive(Debug, Clone, Default, PartialEq, Serialize, Deserialize)] +#[serde(default)] +pub struct AwInfo { + /// Configured engine name for the run. + #[serde(skip_serializing_if = "Option::is_none")] + pub engine: Option, + /// Model identifier used by the agent runtime. + #[serde(skip_serializing_if = "Option::is_none")] + pub model: Option, + /// Agent name emitted by the compiled workflow metadata. + #[serde(skip_serializing_if = "Option::is_none")] + pub agent_name: Option, + /// Source markdown path for the compiled workflow. + #[serde(skip_serializing_if = "Option::is_none")] + pub source: Option, + /// Compile target for the workflow, such as `standalone` or `stage`. + #[serde(skip_serializing_if = "Option::is_none")] + pub target: Option, + /// Compiler version that produced the workflow. + #[serde(skip_serializing_if = "Option::is_none")] + pub compiler_version: Option, +} + +/// Aggregate numeric metrics for the audited run. +/// +/// These values are typically sourced from OTel logs plus audit-time counting of warnings and errors. +#[derive(Debug, Clone, Default, PartialEq, Serialize, Deserialize)] +#[serde(default)] +pub struct MetricsData { + /// Total tokens consumed by the run. + #[serde(default, skip_serializing_if = "is_zero_u64")] + pub token_usage: u64, + /// Effective billed or normalized tokens used for costing. + #[serde(default, skip_serializing_if = "is_zero_u64")] + pub effective_tokens: u64, + /// Estimated run cost in provider currency units. + #[serde(default, skip_serializing_if = "is_zero_f64")] + pub estimated_cost: f64, + /// Total model turns captured for the run. + #[serde(default, skip_serializing_if = "is_zero_u64")] + pub turns: u64, + /// Number of error rows captured in the audit. + #[serde(default, skip_serializing_if = "is_zero_u64")] + pub error_count: u64, + /// Number of warning rows captured in the audit. + #[serde(default, skip_serializing_if = "is_zero_u64")] + pub warning_count: u64, +} + +/// A notable audit finding synthesized from one or more analyzer results. +/// +/// Findings are rendered in the report and originate from analyzer heuristics over downloaded artifacts. +#[derive(Debug, Clone, Default, PartialEq, Serialize, Deserialize)] +#[serde(default)] +pub struct Finding { + /// Logical finding category such as `security`, `cost`, or `tooling`. + #[serde(default, skip_serializing_if = "String::is_empty")] + pub category: String, + /// Severity assigned by the audit heuristics. + pub severity: Severity, + /// Short human-readable title for the finding. + #[serde(default, skip_serializing_if = "String::is_empty")] + pub title: String, + /// Longer explanation of the finding. + #[serde(default, skip_serializing_if = "String::is_empty")] + pub description: String, + /// Optional impact statement explaining why the finding matters. + #[serde(skip_serializing_if = "Option::is_none")] + pub impact: Option, +} + +/// Severity assigned to an audit finding. +/// +/// This matches the lowercase wire format used by gh-aw-compatible JSON consumers. +#[derive(Debug, Clone, Copy, Default, PartialEq, Eq, Serialize, Deserialize)] +#[serde(rename_all = "lowercase")] +pub enum Severity { + /// Critical issue requiring immediate attention. + Critical, + /// High-severity issue with significant impact. + High, + /// Medium-severity issue with moderate impact. + Medium, + /// Low-severity issue with limited impact. + Low, + /// Informational observation. + #[default] + Info, +} + +/// Recommended follow-up action emitted by the audit. +/// +/// Recommendations are synthesized from findings and intended for humans or automation consuming the report. +#[derive(Debug, Clone, Default, PartialEq, Serialize, Deserialize)] +#[serde(default)] +pub struct Recommendation { + /// Recommendation priority, typically `high`, `medium`, or `low`. + #[serde(default, skip_serializing_if = "String::is_empty")] + pub priority: String, + /// Recommended action to take. + #[serde(default, skip_serializing_if = "String::is_empty")] + pub action: String, + /// Reason the recommendation was emitted. + #[serde(default, skip_serializing_if = "String::is_empty")] + pub reason: String, + /// Optional example command, config, or remediation snippet. + #[serde(skip_serializing_if = "Option::is_none")] + pub example: Option, +} + +/// Derived performance-oriented metrics for the run. +/// +/// These values are computed from audit data such as token usage, tool calls, and network activity. +#[derive(Debug, Clone, Default, PartialEq, Serialize, Deserialize)] +#[serde(default)] +pub struct PerformanceMetrics { + /// Throughput estimate derived from tokens and duration. + #[serde(skip_serializing_if = "Option::is_none")] + pub tokens_per_minute: Option, + /// Human-readable description of cost efficiency. + #[serde(skip_serializing_if = "Option::is_none")] + pub cost_efficiency: Option, + /// Most frequently used tool name. + #[serde(skip_serializing_if = "Option::is_none")] + pub most_used_tool: Option, + /// Number of observed network requests. + #[serde(skip_serializing_if = "Option::is_none")] + pub network_requests: Option, +} + +/// Engine configuration recorded for the audited run. +/// +/// This is sourced from compiled workflow metadata and runtime-emitted AW info. +#[derive(Debug, Clone, Default, PartialEq, Serialize, Deserialize)] +#[serde(default)] +pub struct AuditEngineConfig { + /// Engine identifier such as `copilot`. + #[serde(default, skip_serializing_if = "String::is_empty")] + pub engine: String, + /// Model identifier configured for the run. + #[serde(skip_serializing_if = "Option::is_none")] + pub model: Option, + /// Engine version string, when emitted. + #[serde(skip_serializing_if = "Option::is_none")] + pub version: Option, + /// Timeout configured for the run in minutes. + #[serde(skip_serializing_if = "Option::is_none")] + pub timeout_minutes: Option, +} + +/// Job-level status information for one stage in the build timeline. +/// +/// This is derived from Azure DevOps timeline records for the audited build. +#[derive(Debug, Clone, Default, PartialEq, Serialize, Deserialize)] +#[serde(default)] +pub struct JobData { + /// Job display name. + #[serde(default, skip_serializing_if = "String::is_empty")] + pub name: String, + /// Job status such as `completed` or `inProgress`. + #[serde(default, skip_serializing_if = "String::is_empty")] + pub status: String, + /// Final job result using Azure DevOps terminology. + #[serde(skip_serializing_if = "Option::is_none")] + pub result: Option, + /// Human-readable job duration. + #[serde(skip_serializing_if = "Option::is_none")] + pub duration: Option, + /// Job start timestamp. + #[serde(skip_serializing_if = "Option::is_none")] + pub started_at: Option, + /// Job finish timestamp. + #[serde(skip_serializing_if = "Option::is_none")] + pub finished_at: Option, +} + +/// Metadata about a file downloaded while assembling the audit. +/// +/// These rows are produced by the artifact download phase for traceability and caching. +#[derive(Debug, Clone, Default, PartialEq, Serialize, Deserialize)] +#[serde(default)] +pub struct FileInfo { + /// Relative or absolute file path on disk. + #[serde(default, skip_serializing_if = "String::is_empty")] + pub path: String, + /// File size in bytes. + #[serde(default, skip_serializing_if = "is_zero_u64")] + pub size_bytes: u64, + /// Optional SHA-256 digest of the downloaded file. + #[serde(skip_serializing_if = "Option::is_none")] + pub sha256: Option, +} + +/// Report describing a requested tool that was unavailable to the agent. +/// +/// These rows are typically sourced from missing-tool safe-output or MCP artifacts. +#[derive(Debug, Clone, Default, PartialEq, Serialize, Deserialize)] +#[serde(default)] +pub struct MissingToolReport { + /// Tool name, when known. + #[serde(skip_serializing_if = "Option::is_none")] + pub tool: Option, + /// Optional contextual identifier for where the problem occurred. + #[serde(skip_serializing_if = "Option::is_none")] + pub context: Option, + /// Optional human-readable reason for the report. + #[serde(skip_serializing_if = "Option::is_none")] + pub reason: Option, + /// Optional timestamp for when the report was emitted. + #[serde(skip_serializing_if = "Option::is_none")] + pub timestamp: Option, + /// Forward-compatible payload preserved from the source artifact. + #[serde(default, skip_serializing_if = "Value::is_null")] + pub extra: Value, +} + +/// Report describing required data that was unavailable to the agent. +/// +/// These rows are typically sourced from missing-data safe-output or MCP artifacts. +#[derive(Debug, Clone, Default, PartialEq, Serialize, Deserialize)] +#[serde(default)] +pub struct MissingDataReport { + /// Tool name associated with the missing data, when known. + #[serde(skip_serializing_if = "Option::is_none")] + pub tool: Option, + /// Optional contextual identifier for where the problem occurred. + #[serde(skip_serializing_if = "Option::is_none")] + pub context: Option, + /// Optional human-readable reason for the report. + #[serde(skip_serializing_if = "Option::is_none")] + pub reason: Option, + /// Optional timestamp for when the report was emitted. + #[serde(skip_serializing_if = "Option::is_none")] + pub timestamp: Option, + /// Forward-compatible payload preserved from the source artifact. + #[serde(default, skip_serializing_if = "Value::is_null")] + pub extra: Value, +} + +/// Report describing a tool invocation that intentionally performed no action. +/// +/// These rows are typically sourced from noop safe-output artifacts. +#[derive(Debug, Clone, Default, PartialEq, Serialize, Deserialize)] +#[serde(default)] +pub struct NoopReport { + /// Tool name, when known. + #[serde(skip_serializing_if = "Option::is_none")] + pub tool: Option, + /// Optional contextual identifier for where the noop occurred. + #[serde(skip_serializing_if = "Option::is_none")] + pub context: Option, + /// Optional human-readable reason for the noop. + #[serde(skip_serializing_if = "Option::is_none")] + pub reason: Option, + /// Optional timestamp for when the noop was emitted. + #[serde(skip_serializing_if = "Option::is_none")] + pub timestamp: Option, + /// Forward-compatible payload preserved from the source artifact. + #[serde(default, skip_serializing_if = "Value::is_null")] + pub extra: Value, +} + +/// Report describing a failed MCP interaction. +/// +/// These rows are sourced from MCP gateway logs or failure artifacts emitted during the run. +#[derive(Debug, Clone, Default, PartialEq, Serialize, Deserialize)] +#[serde(default)] +pub struct MCPFailureReport { + /// Tool name involved in the failure, when known. + #[serde(skip_serializing_if = "Option::is_none")] + pub tool: Option, + /// Optional contextual identifier for where the failure occurred. + #[serde(skip_serializing_if = "Option::is_none")] + pub context: Option, + /// Optional human-readable failure reason. + #[serde(skip_serializing_if = "Option::is_none")] + pub reason: Option, + /// Optional timestamp for when the failure was emitted. + #[serde(skip_serializing_if = "Option::is_none")] + pub timestamp: Option, + /// Forward-compatible payload preserved from the source artifact. + #[serde(default, skip_serializing_if = "Value::is_null")] + pub extra: Value, +} + +/// Firewall-domain activity analysis for the audited run. +/// +/// This section is derived from AWF firewall or proxy logs in the agent artifact set. +#[derive(Debug, Clone, Default, PartialEq, Serialize, Deserialize)] +#[serde(default)] +pub struct FirewallAnalysis { + /// Per-domain firewall statistics. + #[serde(default, skip_serializing_if = "Vec::is_empty")] + pub domains: Vec, + /// Total observed firewall requests. + #[serde(default, skip_serializing_if = "is_zero_u64")] + pub total_requests: u64, + /// Number of allowed requests. + #[serde(default, skip_serializing_if = "is_zero_u64")] + pub allowed_count: u64, + /// Number of denied requests. + #[serde(default, skip_serializing_if = "is_zero_u64")] + pub denied_count: u64, +} + +/// Firewall statistics for a single domain. +/// +/// These values are derived from AWF firewall request logs. +#[derive(Debug, Clone, Default, PartialEq, Serialize, Deserialize)] +#[serde(default)] +pub struct DomainStat { + /// Domain name observed in firewall logs. + #[serde(default, skip_serializing_if = "String::is_empty")] + pub domain: String, + /// Aggregate status such as `allowed`, `denied`, or `mixed`. + #[serde(default, skip_serializing_if = "String::is_empty")] + pub status: String, + /// Number of observed requests for the domain. + #[serde(default, skip_serializing_if = "is_zero_u64")] + pub request_count: u64, + /// First-observed timestamp for the domain. + #[serde(skip_serializing_if = "Option::is_none")] + pub first_seen: Option, + /// Last-observed timestamp for the domain. + #[serde(skip_serializing_if = "Option::is_none")] + pub last_seen: Option, +} + +/// Policy-analysis summary for the audited run. +/// +/// This section is derived from AWF policy manifests and audit logs when those artifacts are present. +#[derive(Debug, Clone, Default, PartialEq, Serialize, Deserialize)] +#[serde(default)] +pub struct PolicyAnalysis { + /// Per-rule policy statistics. + #[serde(default, skip_serializing_if = "Vec::is_empty")] + pub policies: Vec, + /// Count of allow verdicts observed. + #[serde(default, skip_serializing_if = "is_zero_u64")] + pub allow_count: u64, + /// Count of deny verdicts observed. + #[serde(default, skip_serializing_if = "is_zero_u64")] + pub deny_count: u64, +} + +/// Hit statistics for a single policy rule. +/// +/// These values are derived from AWF policy evaluation artifacts. +#[derive(Debug, Clone, Default, PartialEq, Serialize, Deserialize)] +#[serde(default)] +pub struct PolicyRule { + /// Rule pattern or selector. + #[serde(default, skip_serializing_if = "String::is_empty")] + pub pattern: String, + /// Final verdict such as `allow` or `deny`. + #[serde(default, skip_serializing_if = "String::is_empty")] + pub verdict: String, + /// Number of observed hits for the rule. + #[serde(default, skip_serializing_if = "is_zero_u64")] + pub hit_count: u64, +} + +/// Reliability summary for MCP servers observed during the run. +/// +/// This section is derived from MCP gateway logs collected in the agent artifact set. +#[derive(Debug, Clone, Default, PartialEq, Serialize, Deserialize)] +#[serde(default)] +pub struct MCPServerHealth { + /// Per-server call and error statistics. + #[serde(default, skip_serializing_if = "Vec::is_empty")] + pub servers: Vec, +} + +/// Aggregate statistics for one MCP server. +/// +/// These values are derived from MCP gateway request and error logs. +#[derive(Debug, Clone, Default, PartialEq, Serialize, Deserialize)] +#[serde(default)] +pub struct MCPServerStats { + /// MCP server name. + #[serde(default, skip_serializing_if = "String::is_empty")] + pub name: String, + /// Total number of calls routed to the server. + #[serde(default, skip_serializing_if = "is_zero_u64")] + pub total_calls: u64, + /// Number of calls that failed. + #[serde(default, skip_serializing_if = "is_zero_u64")] + pub error_count: u64, + /// Fraction of calls that failed. + #[serde(default, skip_serializing_if = "is_zero_f64")] + pub error_rate: f64, + /// Whether the server should be considered unreliable. + #[serde(default)] + pub unreliable: bool, +} + +/// MCP-tool usage summary for the run. +/// +/// This section is derived from MCP gateway logs and summarizes calls per tool. +#[derive(Debug, Clone, Default, PartialEq, Serialize, Deserialize)] +#[serde(default)] +pub struct MCPToolUsageData { + /// Per-tool usage statistics. + #[serde(default, skip_serializing_if = "Vec::is_empty")] + pub tools: Vec, +} + +/// Aggregate usage statistics for one MCP tool. +/// +/// These values are derived from MCP gateway logs. +#[derive(Debug, Clone, Default, PartialEq, Serialize, Deserialize)] +#[serde(default)] +pub struct MCPToolSummary { + /// MCP tool name. + #[serde(default, skip_serializing_if = "String::is_empty")] + pub name: String, + /// Number of times the tool was called. + #[serde(default, skip_serializing_if = "is_zero_u64")] + pub call_count: u64, + /// Number of failed tool calls. + #[serde(default, skip_serializing_if = "is_zero_u64")] + pub error_count: u64, + /// Largest observed input payload size. + #[serde(default, skip_serializing_if = "is_zero_u64")] + pub max_input_size: u64, + /// Largest observed output payload size. + #[serde(default, skip_serializing_if = "is_zero_u64")] + pub max_output_size: u64, +} + +/// Aggregate usage information for any tool seen during the run. +/// +/// This is derived from runtime telemetry such as OTel or analyzer-specific timing data. +#[derive(Debug, Clone, Default, PartialEq, Serialize, Deserialize)] +#[serde(default)] +pub struct ToolUsageInfo { + /// Tool name. + #[serde(default, skip_serializing_if = "String::is_empty")] + pub name: String, + /// Number of times the tool was called. + #[serde(default, skip_serializing_if = "is_zero_u64")] + pub call_count: u64, + /// Total observed duration in milliseconds. + #[serde(skip_serializing_if = "Option::is_none")] + pub total_duration_ms: Option, +} + +/// Error or warning entry captured during the audit. +/// +/// These rows come from analyzer failures, audit warnings, or surfaced runtime diagnostics. +#[derive(Debug, Clone, Default, PartialEq, Serialize, Deserialize)] +#[serde(default)] +pub struct ErrorInfo { + /// Source component that emitted the row. + #[serde(default, skip_serializing_if = "String::is_empty")] + pub source: String, + /// Human-readable error or warning message. + #[serde(default, skip_serializing_if = "String::is_empty")] + pub message: String, + /// Optional timestamp for the row. + #[serde(skip_serializing_if = "Option::is_none")] + pub timestamp: Option, +} + +/// External item created as a result of safe-output execution. +/// +/// These rows are derived from successful Stage 3 execution results. +#[derive(Debug, Clone, Default, PartialEq, Serialize, Deserialize)] +#[serde(default)] +pub struct CreatedItemReport { + /// Created item kind such as `pull_request` or `work_item`. + #[serde(default, skip_serializing_if = "String::is_empty")] + pub kind: String, + /// URL of the created item, when one exists. + #[serde(skip_serializing_if = "Option::is_none")] + pub url: Option, + /// Stable identifier of the created item, when one exists. + #[serde(skip_serializing_if = "Option::is_none")] + pub id: Option, + /// Human-readable title of the created item, when one exists. + #[serde(skip_serializing_if = "Option::is_none")] + pub title: Option, +} + +/// Aggregate safe-output counts for the audited build. +/// +/// This summary is derived by correlating proposed, analyzed, and executed safe-output artifacts. +#[derive(Debug, Clone, Default, PartialEq, Serialize, Deserialize)] +#[serde(default)] +pub struct SafeOutputSummary { + /// Number of safe outputs proposed by the agent. + #[serde(default, skip_serializing_if = "is_zero_u64")] + pub proposed_count: u64, + /// Number of safe outputs executed successfully. + #[serde(default, skip_serializing_if = "is_zero_u64")] + pub executed_count: u64, + /// Number of safe outputs rejected during execution. + #[serde(default, skip_serializing_if = "is_zero_u64")] + pub rejected_by_execution_count: u64, + /// Number of safe outputs left unprocessed. + #[serde(default, skip_serializing_if = "is_zero_u64")] + pub not_processed_count: u64, +} + +/// Per-item safe-output execution details for the build. +/// +/// This section is sourced from execution manifests written by the SafeOutputs stage. +#[derive(Debug, Clone, Default, PartialEq, Serialize, Deserialize)] +#[serde(default)] +pub struct SafeOutputExecution { + /// Itemized execution outcomes. + #[serde(default, skip_serializing_if = "Vec::is_empty")] + pub items: Vec, +} + +/// Execution outcome for one safe-output proposal. +/// +/// This row is derived by joining proposal, detection, and execution artifacts for a single proposal context. +#[derive(Debug, Clone, Default, PartialEq, Serialize, Deserialize)] +#[serde(default)] +pub struct SafeOutputExecutionItem { + /// Optional proposal context used to correlate artifacts. + #[serde(skip_serializing_if = "Option::is_none")] + pub context: Option, + /// Safe-output tool name. + #[serde(default, skip_serializing_if = "String::is_empty")] + pub tool: String, + /// Final processing status for the proposal. + pub status: SafeOutputStatus, + /// Original proposal payload captured from the agent artifact. + pub proposal: Value, + /// Optional execution error string. + #[serde(skip_serializing_if = "Option::is_none")] + pub error: Option, + /// Optional execution result payload. + #[serde(skip_serializing_if = "Option::is_none")] + pub result: Option, + /// Optional rejection reason emitted by detection or execution. + #[serde(skip_serializing_if = "Option::is_none")] + pub rejection_reason: Option, + /// Whether the status applies to the entire batch rather than a single proposal. + #[serde(default)] + pub applies_to_whole_batch: bool, +} + +/// Final processing status for a safe-output proposal. +/// +/// The snake_case wire format matches the ADO audit JSON contract. +#[derive(Debug, Clone, Copy, Default, PartialEq, Eq, Serialize, Deserialize)] +#[serde(rename_all = "snake_case")] +pub enum SafeOutputStatus { + /// Proposal was executed successfully. + #[default] + Executed, + /// Proposal was rejected while executing. + RejectedByExecution, + /// Proposal was not processed because aggregate threat detection gated the batch. + NotProcessedDueToAggregateGate, + /// Proposal was intentionally skipped. + Skipped, + /// Proposal could not be processed because execution budget was exhausted. + BudgetExhausted, +} + +/// Aggregate rollup of rejected safe outputs. +/// +/// This section is derived from detection verdicts and execution results for JSON consumers that prefer summary data. +#[derive(Debug, Clone, Default, PartialEq, Serialize, Deserialize)] +#[serde(default)] +pub struct RejectedSafeOutputsRollup { + /// Total number of rejected proposals. + #[serde(default, skip_serializing_if = "is_zero_u64")] + pub total_rejected: u64, + /// Rejection counts grouped by reason string. + #[serde(default, skip_serializing_if = "BTreeMap::is_empty")] + pub by_reason: BTreeMap, + /// Rejection counts grouped by threat kind. + #[serde(default, skip_serializing_if = "BTreeMap::is_empty")] + pub by_threat: BTreeMap, +} + +/// Aggregate threat-detection verdict for the run. +/// +/// This data is read from detection-stage artifacts such as `threat-analysis.json`. +#[derive(Debug, Clone, Default, PartialEq, Serialize, Deserialize)] +#[serde(default)] +pub struct DetectionAnalysis { + /// Threat flags reported by the detection stage. + pub threats: DetectionThreats, + /// Human-readable reasons emitted by threat detection. + #[serde(default, skip_serializing_if = "Vec::is_empty")] + pub reasons: Vec, + /// Whether the run's safe outputs were considered safe to process. + #[serde(default)] + pub safe_to_process: bool, + /// Optional path to the stored verdict artifact. + #[serde(skip_serializing_if = "Option::is_none")] + pub verdict_path: Option, +} + +/// Threat flags produced by the detection stage. +/// +/// These booleans come from the aggregate threat-detection verdict emitted for the run. +#[derive(Debug, Clone, Default, PartialEq, Eq, Serialize, Deserialize)] +#[serde(default)] +pub struct DetectionThreats { + /// Whether prompt injection was detected. + #[serde(default)] + pub prompt_injection: bool, + /// Whether a secret leak was detected. + #[serde(default)] + pub secret_leak: bool, + /// Whether a malicious patch was detected. + #[serde(default)] + pub malicious_patch: bool, +} + +/// Placeholder task-domain summary for the MVP audit contract. +/// +/// This opaque section keeps the JSON shape compatible until richer heuristics land. +#[derive(Debug, Clone, Default, PartialEq, Serialize, Deserialize)] +#[serde(default)] +pub struct TaskDomainInfo { + /// Human-readable summary of the inferred task domain. + #[serde(default, skip_serializing_if = "String::is_empty")] + pub summary: String, + /// Opaque analyzer-specific payload for future expansion. + #[serde(default, skip_serializing_if = "Value::is_null")] + pub data: Value, +} + +/// Placeholder behavior-fingerprint summary for the MVP audit contract. +/// +/// This opaque section keeps the JSON shape compatible until richer heuristics land. +#[derive(Debug, Clone, Default, PartialEq, Serialize, Deserialize)] +#[serde(default)] +pub struct BehaviorFingerprint { + /// Human-readable summary of the inferred behavior pattern. + #[serde(default, skip_serializing_if = "String::is_empty")] + pub summary: String, + /// Opaque analyzer-specific payload for future expansion. + #[serde(default, skip_serializing_if = "Value::is_null")] + pub data: Value, +} + +/// Placeholder agentic assessment summary for the MVP audit contract. +/// +/// This opaque section keeps the JSON shape compatible until richer heuristics land. +#[derive(Debug, Clone, Default, PartialEq, Serialize, Deserialize)] +#[serde(default)] +pub struct AgenticAssessment { + /// Human-readable summary of the assessment. + #[serde(default, skip_serializing_if = "String::is_empty")] + pub summary: String, + /// Opaque analyzer-specific payload for future expansion. + #[serde(default, skip_serializing_if = "Value::is_null")] + pub data: Value, +} + +#[cfg(test)] +mod tests { + use super::*; + use serde_json::json; + + fn populated_audit_data() -> AuditData { + let mut by_reason = BTreeMap::new(); + by_reason.insert(String::from("aggregate_gate"), 1); + + let mut by_threat = BTreeMap::new(); + by_threat.insert(String::from("prompt_injection"), 1); + by_threat.insert(String::from("secret_leak"), 0); + by_threat.insert(String::from("malicious_patch"), 0); + + AuditData { + overview: OverviewData { + build_id: 42, + pipeline_name: String::from("agentic-pipeline"), + status: String::from("completed"), + result: Some(String::from("succeeded")), + created_at: Some(String::from("2026-05-21T12:00:00Z")), + started_at: Some(String::from("2026-05-21T12:01:00Z")), + finished_at: Some(String::from("2026-05-21T12:06:00Z")), + duration: Some(String::from("5m")), + source_branch: Some(String::from("refs/heads/main")), + source_version: Some(String::from("abcdef123456")), + url: Some(String::from( + "https://dev.azure.com/example/project/_build/results?buildId=42", + )), + logs_path: Some(String::from("logs\\build-42")), + aw_info: Some(AwInfo { + engine: Some(String::from("copilot")), + model: Some(String::from("gpt-5.4")), + agent_name: Some(String::from("agentic-auditor")), + source: Some(String::from("agents/security-scan.md")), + target: Some(String::from("standalone")), + compiler_version: Some(String::from("0.30.2")), + }), + }, + task_domain: Some(TaskDomainInfo { + summary: String::from("Security review workflow"), + data: json!({"domain": "security"}), + }), + behavior_fingerprint: Some(BehaviorFingerprint { + summary: String::from("High tool usage with safe outputs"), + data: json!({"pattern": "tool-heavy"}), + }), + agentic_assessments: vec![AgenticAssessment { + summary: String::from("Agent produced actionable changes"), + data: json!({"score": 0.92}), + }], + metrics: MetricsData { + token_usage: 1200, + effective_tokens: 1000, + estimated_cost: 1.23, + turns: 12, + error_count: 1, + warning_count: 2, + }, + key_findings: vec![Finding { + category: String::from("security"), + severity: Severity::High, + title: String::from("Detection gate tripped"), + description: String::from("Threat detection blocked the safe-output batch."), + impact: Some(String::from("No proposed changes were executed.")), + }], + recommendations: vec![Recommendation { + priority: String::from("high"), + action: String::from("Review the detection-stage verdict"), + reason: String::from("The aggregate gate prevented execution."), + example: Some(String::from( + "Inspect analyzed_outputs_42\\threat-analysis.json", + )), + }], + performance_metrics: Some(PerformanceMetrics { + tokens_per_minute: Some(240.0), + cost_efficiency: Some(String::from("moderate")), + most_used_tool: Some(String::from("edit")), + network_requests: Some(18), + }), + engine_config: Some(AuditEngineConfig { + engine: String::from("copilot"), + model: Some(String::from("gpt-5.4")), + version: Some(String::from("2026.05")), + timeout_minutes: Some(30), + }), + safe_output_summary: Some(SafeOutputSummary { + proposed_count: 2, + executed_count: 1, + rejected_by_execution_count: 0, + not_processed_count: 1, + }), + safe_output_execution: Some(SafeOutputExecution { + items: vec![SafeOutputExecutionItem { + context: Some(String::from("pr-1")), + tool: String::from("create_pull_request"), + status: SafeOutputStatus::NotProcessedDueToAggregateGate, + proposal: json!({"title": "Fix pipeline", "repository": "repo"}), + error: Some(String::from("Batch blocked by detection gate")), + result: Some(json!({"status": "blocked"})), + rejection_reason: Some(String::from("prompt_injection")), + applies_to_whole_batch: true, + }], + }), + rejected_safe_outputs: Some(RejectedSafeOutputsRollup { + total_rejected: 1, + by_reason, + by_threat, + }), + detection_analysis: Some(DetectionAnalysis { + threats: DetectionThreats { + prompt_injection: true, + secret_leak: false, + malicious_patch: false, + }, + reasons: vec![String::from("Suspicious instruction in fetched content")], + safe_to_process: false, + verdict_path: Some(String::from("analyzed_outputs_42\\threat-analysis.json")), + }), + mcp_server_health: Some(MCPServerHealth { + servers: vec![MCPServerStats { + name: String::from("github-mcp"), + total_calls: 8, + error_count: 1, + error_rate: 0.125, + unreliable: true, + }], + }), + jobs: vec![JobData { + name: String::from("Agent"), + status: String::from("completed"), + result: Some(String::from("succeeded")), + duration: Some(String::from("4m")), + started_at: Some(String::from("2026-05-21T12:01:00Z")), + finished_at: Some(String::from("2026-05-21T12:05:00Z")), + }], + downloaded_files: vec![FileInfo { + path: String::from("logs\\build-42\\agent_outputs_42\\otel.jsonl"), + size_bytes: 2048, + sha256: Some(String::from("abc123")), + }], + missing_tools: vec![MissingToolReport { + tool: Some(String::from("azure-devops")), + context: Some(String::from("work-item-sync")), + reason: Some(String::from("Tool not configured")), + timestamp: Some(String::from("2026-05-21T12:03:00Z")), + extra: json!({"required": true}), + }], + missing_data: vec![MissingDataReport { + tool: Some(String::from("create_work_item")), + context: Some(String::from("wi-1")), + reason: Some(String::from("missing title")), + timestamp: Some(String::from("2026-05-21T12:03:10Z")), + extra: json!({"field": "title"}), + }], + noops: vec![NoopReport { + tool: Some(String::from("noop")), + context: Some(String::from("noop-1")), + reason: Some(String::from("Nothing to do")), + timestamp: Some(String::from("2026-05-21T12:03:20Z")), + extra: json!({"kind": "noop"}), + }], + mcp_failures: vec![MCPFailureReport { + tool: Some(String::from("github.search_code")), + context: Some(String::from("call-17")), + reason: Some(String::from("HTTP 502")), + timestamp: Some(String::from("2026-05-21T12:03:30Z")), + extra: json!({"retryable": true}), + }], + firewall_analysis: Some(FirewallAnalysis { + domains: vec![DomainStat { + domain: String::from("api.github.com"), + status: String::from("allowed"), + request_count: 7, + first_seen: Some(String::from("2026-05-21T12:01:10Z")), + last_seen: Some(String::from("2026-05-21T12:04:55Z")), + }], + total_requests: 10, + allowed_count: 9, + denied_count: 1, + }), + policy_analysis: Some(PolicyAnalysis { + policies: vec![PolicyRule { + pattern: String::from("https://api.github.com/**"), + verdict: String::from("allow"), + hit_count: 7, + }], + allow_count: 1, + deny_count: 1, + }), + errors: vec![ErrorInfo { + source: String::from("audit::detection"), + message: String::from("Threat detection blocked execution"), + timestamp: Some(String::from("2026-05-21T12:05:00Z")), + }], + warnings: vec![ErrorInfo { + source: String::from("audit::firewall"), + message: String::from("One request was denied"), + timestamp: Some(String::from("2026-05-21T12:04:00Z")), + }], + tool_usage: vec![ToolUsageInfo { + name: String::from("edit"), + call_count: 5, + total_duration_ms: Some(1500), + }], + mcp_tool_usage: Some(MCPToolUsageData { + tools: vec![MCPToolSummary { + name: String::from("github.search_code"), + call_count: 3, + error_count: 1, + max_input_size: 512, + max_output_size: 4096, + }], + }), + created_items: vec![CreatedItemReport { + kind: String::from("pull_request"), + url: Some(String::from( + "https://dev.azure.com/example/project/_git/repo/pullrequest/123", + )), + id: Some(String::from("123")), + title: Some(String::from("Fix pipeline")), + }], + } + } + + #[test] + fn populated_audit_data_round_trips_through_json() { + let original = populated_audit_data(); + let json = serde_json::to_string_pretty(&original).expect("serialize populated audit data"); + let round_tripped: AuditData = + serde_json::from_str(&json).expect("deserialize populated audit data"); + + assert_eq!(round_tripped, original); + } + + #[test] + fn default_audit_data_round_trips_with_only_required_top_level_keys() { + let original = AuditData::default(); + let json = serde_json::to_string_pretty(&original).expect("serialize default audit data"); + let round_tripped: AuditData = + serde_json::from_str(&json).expect("deserialize default audit data"); + let value: Value = serde_json::from_str(&json).expect("parse default audit JSON"); + let keys: Vec<_> = value + .as_object() + .expect("top-level JSON object") + .keys() + .cloned() + .collect(); + + assert_eq!(round_tripped, original); + let mut keys_sorted = keys.clone(); + keys_sorted.sort(); + assert_eq!(keys_sorted, vec!["downloaded_files", "metrics", "overview"]); + } +} diff --git a/src/audit/render/console.rs b/src/audit/render/console.rs new file mode 100644 index 00000000..ec2986a7 --- /dev/null +++ b/src/audit/render/console.rs @@ -0,0 +1,1415 @@ +//! Console renderer for `ado-aw audit`. + +use crate::audit::model::{self, Severity}; + +/// Render `AuditData` as a Markdown-flavored text report suitable for +/// terminal output. Pure function; no I/O. +/// +/// Section ordering mirrors gh-aw's audit report: +/// Overview → Metrics → Key Findings → Recommendations → +/// Safe Output Summary → Rejected Safe Outputs → +/// MCP Server Health → Firewall Analysis → Policy Analysis → +/// Detection Analysis → Jobs → Downloaded Files → +/// Missing Tools → Missing Data → Noops → MCP Failures → +/// Errors → Warnings → Tool Usage → MCP Tool Usage → +/// Created Items. +/// +/// Sections that are empty/None are omitted entirely (no empty headers). +pub fn render_console(audit: &crate::audit::model::AuditData) -> String { + let mut sections = vec![ + render_overview_section(&audit.overview, audit.engine_config.as_ref()), + render_metrics_section(&audit.metrics, audit.performance_metrics.as_ref()), + ]; + + if let Some(section) = render_key_findings_section(&audit.key_findings) { + sections.push(section); + } + if let Some(section) = render_recommendations_section(&audit.recommendations) { + sections.push(section); + } + if let Some(section) = render_safe_output_summary_section(audit.safe_output_summary.as_ref()) { + sections.push(section); + } + if let Some(section) = + render_rejected_safe_outputs_section(audit.rejected_safe_outputs.as_ref()) + { + sections.push(section); + } + if let Some(section) = render_mcp_server_health_section(audit.mcp_server_health.as_ref()) { + sections.push(section); + } + if let Some(section) = render_firewall_analysis_section(audit.firewall_analysis.as_ref()) { + sections.push(section); + } + if let Some(section) = render_policy_analysis_section(audit.policy_analysis.as_ref()) { + sections.push(section); + } + if let Some(section) = render_detection_analysis_section(audit.detection_analysis.as_ref()) { + sections.push(section); + } + if let Some(section) = render_jobs_section(&audit.jobs) { + sections.push(section); + } + if let Some(section) = render_downloaded_files_section(&audit.downloaded_files) { + sections.push(section); + } + if let Some(section) = render_missing_tools_section(&audit.missing_tools) { + sections.push(section); + } + if let Some(section) = render_missing_data_section(&audit.missing_data) { + sections.push(section); + } + if let Some(section) = render_noops_section(&audit.noops) { + sections.push(section); + } + if let Some(section) = render_mcp_failures_section(&audit.mcp_failures) { + sections.push(section); + } + if let Some(section) = render_errors_section(&audit.errors) { + sections.push(section); + } + if let Some(section) = render_warnings_section(&audit.warnings) { + sections.push(section); + } + if let Some(section) = render_tool_usage_section(&audit.tool_usage) { + sections.push(section); + } + if let Some(section) = render_mcp_tool_usage_section(audit.mcp_tool_usage.as_ref()) { + sections.push(section); + } + if let Some(section) = render_created_items_section(&audit.created_items) { + sections.push(section); + } + + let mut out = sections.join("\n\n"); + out.push('\n'); + out +} + +fn render_overview_section( + overview: &model::OverviewData, + engine_config: Option<&model::AuditEngineConfig>, +) -> String { + let aw_info = overview.aw_info.as_ref(); + let mut rows = Vec::new(); + + if overview.build_id > 0 { + rows.push(("build_id".to_string(), format_number(overview.build_id))); + } + push_non_empty_row(&mut rows, "pipeline", &overview.pipeline_name); + push_non_empty_row(&mut rows, "status", &overview.status); + push_opt_row(&mut rows, "result", overview.result.as_deref()); + push_opt_row(&mut rows, "branch", overview.source_branch.as_deref()); + push_opt_row(&mut rows, "commit", overview.source_version.as_deref()); + if let Some(duration) = overview.duration.as_deref() { + rows.push(("duration".to_string(), normalize_duration(duration))); + } + push_opt_row(&mut rows, "url", overview.url.as_deref()); + push_opt_row(&mut rows, "created_at", overview.created_at.as_deref()); + push_opt_row(&mut rows, "started_at", overview.started_at.as_deref()); + push_opt_row(&mut rows, "finished_at", overview.finished_at.as_deref()); + push_opt_row(&mut rows, "logs_path", overview.logs_path.as_deref()); + + let engine = aw_info + .and_then(|info| info.engine.as_deref()) + .filter(|value| !value.is_empty()) + .map(str::to_string) + .or_else(|| { + engine_config + .filter(|config| !config.engine.is_empty()) + .map(|config| config.engine.clone()) + }); + let model = aw_info + .and_then(|info| info.model.as_deref()) + .filter(|value| !value.is_empty()) + .map(str::to_string) + .or_else(|| engine_config.and_then(|config| config.model.clone())); + + push_opt_owned_row(&mut rows, "engine", engine); + push_opt_owned_row(&mut rows, "model", model); + push_opt_owned_row( + &mut rows, + "agent", + aw_info.and_then(|info| info.agent_name.clone()), + ); + push_opt_owned_row( + &mut rows, + "source", + aw_info.and_then(|info| info.source.clone()), + ); + push_opt_owned_row( + &mut rows, + "target", + aw_info.and_then(|info| info.target.clone()), + ); + push_opt_owned_row( + &mut rows, + "compiler_version", + aw_info.and_then(|info| info.compiler_version.clone()), + ); + push_opt_owned_row( + &mut rows, + "engine_version", + engine_config.and_then(|config| config.version.clone()), + ); + if let Some(timeout_minutes) = engine_config.and_then(|config| config.timeout_minutes) { + rows.push(( + "timeout_minutes".to_string(), + format_number(timeout_minutes), + )); + } + + render_kv_section("Overview", rows, true) +} + +fn render_metrics_section( + metrics: &model::MetricsData, + performance_metrics: Option<&model::PerformanceMetrics>, +) -> String { + let mut rows = vec![ + ( + "token_usage".to_string(), + format_number(metrics.token_usage), + ), + ( + "effective_tokens".to_string(), + format_number(metrics.effective_tokens), + ), + ( + "estimated_cost".to_string(), + format_cost(metrics.estimated_cost), + ), + ("turns".to_string(), format_number(metrics.turns)), + ("errors".to_string(), format_number(metrics.error_count)), + ("warnings".to_string(), format_number(metrics.warning_count)), + ]; + + if let Some(performance_metrics) = performance_metrics { + if let Some(tokens_per_minute) = performance_metrics.tokens_per_minute { + rows.push(( + "tokens_per_minute".to_string(), + format_float(tokens_per_minute), + )); + } + push_opt_row( + &mut rows, + "cost_efficiency", + performance_metrics.cost_efficiency.as_deref(), + ); + push_opt_row( + &mut rows, + "most_used_tool", + performance_metrics.most_used_tool.as_deref(), + ); + if let Some(network_requests) = performance_metrics.network_requests { + rows.push(( + "network_requests".to_string(), + format_number(network_requests), + )); + } + } + + render_kv_section("Metrics", rows, true) +} + +fn render_key_findings_section(findings: &[model::Finding]) -> Option { + if findings.is_empty() { + return None; + } + + let lines = findings.iter().map(format_finding).collect(); + Some(render_lines_section("Key Findings", lines, false)) +} + +fn render_recommendations_section(recommendations: &[model::Recommendation]) -> Option { + if recommendations.is_empty() { + return None; + } + + let lines = recommendations.iter().map(format_recommendation).collect(); + Some(render_lines_section("Recommendations", lines, false)) +} + +fn render_safe_output_summary_section( + summary: Option<&model::SafeOutputSummary>, +) -> Option { + let summary = summary?; + let rows = vec![ + ( + "proposed".to_string(), + format_number(summary.proposed_count), + ), + ( + "executed".to_string(), + format_number(summary.executed_count), + ), + ( + "rejected_by_execution".to_string(), + format_number(summary.rejected_by_execution_count), + ), + ( + "not_processed".to_string(), + format_number(summary.not_processed_count), + ), + ]; + Some(render_kv_section("Safe Output Summary", rows, false)) +} + +fn render_rejected_safe_outputs_section( + rollup: Option<&model::RejectedSafeOutputsRollup>, +) -> Option { + let rollup = rollup?; + let by_reason = positive_count_lines(&rollup.by_reason); + let by_threat = positive_count_lines(&rollup.by_threat); + + if rollup.total_rejected == 0 && by_reason.is_empty() && by_threat.is_empty() { + return None; + } + + let mut lines = Vec::new(); + if !by_reason.is_empty() { + lines.push("By reason:".to_string()); + lines.extend(by_reason); + } + if !by_threat.is_empty() { + lines.push("By threat:".to_string()); + lines.extend(by_threat); + } + + Some(render_lines_section( + format!( + "Rejected Safe Outputs ({} total)", + format_number(rollup.total_rejected) + ), + lines, + false, + )) +} + +fn render_mcp_server_health_section(health: Option<&model::MCPServerHealth>) -> Option { + let health = health?; + if health.servers.is_empty() { + return None; + } + + let lines = health + .servers + .iter() + .map(|server| { + let mut line = format!( + "- {} {} calls, {} errors ({})", + fallback_text(&server.name, "(unnamed server)"), + format_number(server.total_calls), + format_number(server.error_count), + format_percentage(server.error_rate), + ); + if server.unreliable { + line.push_str(" [unreliable]"); + } + line + }) + .collect(); + + Some(render_lines_section("MCP Server Health", lines, false)) +} + +fn render_firewall_analysis_section(analysis: Option<&model::FirewallAnalysis>) -> Option { + let analysis = analysis?; + if analysis.domains.is_empty() + && analysis.total_requests == 0 + && analysis.allowed_count == 0 + && analysis.denied_count == 0 + { + return None; + } + + let width = analysis + .domains + .iter() + .map(|domain| domain.domain.chars().count()) + .max() + .unwrap_or(0); + let lines = analysis + .domains + .iter() + .map(|domain| { + let name = fallback_text(&domain.domain, "(unknown domain)"); + format!( + "- {:) -> Option { + let analysis = analysis?; + if analysis.policies.is_empty() && analysis.allow_count == 0 && analysis.deny_count == 0 { + return None; + } + + let width = analysis + .policies + .iter() + .map(|policy| policy.pattern.chars().count()) + .max() + .unwrap_or(0); + let lines = analysis + .policies + .iter() + .map(|policy| { + let pattern = fallback_text(&policy.pattern, "(unnamed rule)"); + format!( + "- {:, +) -> Option { + let analysis = analysis?; + let mut rows = vec![( + "safe_to_process".to_string(), + analysis.safe_to_process.to_string(), + )]; + + rows.push(( + "threats".to_string(), + format_detection_threats(&analysis.threats), + )); + for reason in &analysis.reasons { + if !reason.trim().is_empty() { + rows.push(("reason".to_string(), reason.clone())); + } + } + push_opt_row(&mut rows, "verdict_path", analysis.verdict_path.as_deref()); + + Some(render_kv_section("Detection Analysis", rows, false)) +} + +fn render_jobs_section(jobs: &[model::JobData]) -> Option { + if jobs.is_empty() { + return None; + } + + let width = jobs + .iter() + .map(|job| job.name.chars().count()) + .max() + .unwrap_or(0); + let lines = jobs + .iter() + .map(|job| { + let mut state = fallback_text(&job.status, "unknown").to_string(); + if let Some(result) = job.result.as_deref().filter(|value| !value.is_empty()) { + state.push('/'); + state.push_str(result); + } + + let mut line = format!( + "- {: Option { + if files.is_empty() { + return None; + } + + let lines = files + .iter() + .map(|file| { + let mut line = format!( + "- {} {}", + fallback_text(&file.path, "(unknown path)"), + format_bytes(file.size_bytes), + ); + if let Some(sha256) = file.sha256.as_deref().filter(|value| !value.is_empty()) { + line.push_str(" sha256: "); + line.push_str(sha256); + } + line + }) + .collect(); + + Some(render_lines_section("Downloaded Files", lines, false)) +} + +fn render_missing_tools_section(reports: &[model::MissingToolReport]) -> Option { + if reports.is_empty() { + return None; + } + + let lines = reports + .iter() + .map(|report| { + format_named_report( + report.tool.as_deref(), + report.context.as_deref(), + report.reason.as_deref(), + report.timestamp.as_deref(), + "(unknown tool)", + ) + }) + .collect(); + + Some(render_lines_section("Missing Tools", lines, false)) +} + +fn render_missing_data_section(reports: &[model::MissingDataReport]) -> Option { + if reports.is_empty() { + return None; + } + + let lines = reports + .iter() + .map(|report| { + format_named_report( + report.tool.as_deref(), + report.context.as_deref(), + report.reason.as_deref(), + report.timestamp.as_deref(), + "(unknown tool)", + ) + }) + .collect(); + + Some(render_lines_section("Missing Data", lines, false)) +} + +fn render_noops_section(reports: &[model::NoopReport]) -> Option { + if reports.is_empty() { + return None; + } + + let lines = reports + .iter() + .map(|report| { + format_named_report( + report.tool.as_deref(), + report.context.as_deref(), + report.reason.as_deref(), + report.timestamp.as_deref(), + "(unknown tool)", + ) + }) + .collect(); + + Some(render_lines_section("Noops", lines, false)) +} + +fn render_mcp_failures_section(reports: &[model::MCPFailureReport]) -> Option { + if reports.is_empty() { + return None; + } + + let lines = reports + .iter() + .map(|report| { + format_named_report( + report.tool.as_deref(), + report.context.as_deref(), + report.reason.as_deref(), + report.timestamp.as_deref(), + "(unknown MCP tool)", + ) + }) + .collect(); + + Some(render_lines_section("MCP Failures", lines, false)) +} + +fn render_errors_section(errors: &[model::ErrorInfo]) -> Option { + if errors.is_empty() { + return None; + } + + let lines = errors + .iter() + .map(|error| { + format_message_report(&error.source, &error.message, error.timestamp.as_deref()) + }) + .collect(); + + Some(render_lines_section("Errors", lines, false)) +} + +fn render_warnings_section(warnings: &[model::ErrorInfo]) -> Option { + if warnings.is_empty() { + return None; + } + + let lines = warnings + .iter() + .map(|warning| { + format_message_report( + &warning.source, + &warning.message, + warning.timestamp.as_deref(), + ) + }) + .collect(); + + Some(render_lines_section("Warnings", lines, false)) +} + +fn render_tool_usage_section(tool_usage: &[model::ToolUsageInfo]) -> Option { + if tool_usage.is_empty() { + return None; + } + + let lines = tool_usage + .iter() + .map(|tool| { + let mut line = format!( + "- {} {} calls", + fallback_text(&tool.name, "(unnamed tool)"), + format_number(tool.call_count), + ); + if let Some(total_duration_ms) = tool.total_duration_ms { + line.push_str(" "); + line.push_str(&format_total_duration_ms(total_duration_ms)); + } + line + }) + .collect(); + + Some(render_lines_section("Tool Usage", lines, false)) +} + +fn render_mcp_tool_usage_section(usage: Option<&model::MCPToolUsageData>) -> Option { + let usage = usage?; + if usage.tools.is_empty() { + return None; + } + + let lines = usage + .tools + .iter() + .map(|tool| { + format!( + "- {} {} calls, {} errors, max input {}, max output {}", + fallback_text(&tool.name, "(unnamed MCP tool)"), + format_number(tool.call_count), + format_number(tool.error_count), + format_bytes(tool.max_input_size), + format_bytes(tool.max_output_size), + ) + }) + .collect(); + + Some(render_lines_section("MCP Tool Usage", lines, false)) +} + +fn render_created_items_section(items: &[model::CreatedItemReport]) -> Option { + if items.is_empty() { + return None; + } + + let lines = items + .iter() + .map(|item| { + let mut parts = vec![fallback_text(&item.kind, "(unknown kind)").to_string()]; + if let Some(id) = item.id.as_deref().filter(|value| !value.is_empty()) { + parts.push(format_created_item_id(&item.kind, id)); + } + if let Some(title) = item.title.as_deref().filter(|value| !value.is_empty()) { + parts.push(format!("\"{}\"", title.replace('"', "\\\""))); + } + if let Some(url) = item.url.as_deref().filter(|value| !value.is_empty()) { + parts.push(url.to_string()); + } + format!("- {}", parts.join(" ")) + }) + .collect(); + + Some(render_lines_section( + format!("Created Items ({})", format_number(items.len() as u64)), + lines, + false, + )) +} + +fn render_kv_section(title: impl Into, rows: Vec<(String, String)>, force: bool) -> String { + let lines = format_kv_lines(&rows); + render_lines_section(title, lines, force) +} + +fn render_lines_section(title: impl Into, lines: Vec, _force: bool) -> String { + let title = title.into(); + let mut out = format!("## {title}"); + if !lines.is_empty() { + out.push('\n'); + out.push_str(&lines.join("\n")); + } + out +} + +fn format_kv_lines(rows: &[(String, String)]) -> Vec { + let width = rows + .iter() + .map(|(key, _)| key.chars().count()) + .max() + .unwrap_or(0); + + rows.iter() + .map(|(key, value)| { + let padding = " ".repeat(width.saturating_sub(key.chars().count()) + 1); + format!("- {}:{}{}", key, padding, value) + }) + .collect() +} + +fn format_finding(finding: &model::Finding) -> String { + let title = if finding.title.trim().is_empty() { + fallback_text(&finding.description, "(untitled finding)") + } else { + finding.title.as_str() + }; + + let mut summary = format!("- [{}] ", format_severity(finding.severity)); + if !finding.category.trim().is_empty() { + summary.push_str(&finding.category); + summary.push_str(": "); + } + summary.push_str(title); + + if !finding.description.trim().is_empty() && finding.description != title { + summary.push('\n'); + summary.push_str(" Description: "); + summary.push_str(&finding.description); + } + if let Some(impact) = finding.impact.as_deref().filter(|value| !value.is_empty()) { + summary.push('\n'); + summary.push_str(" Impact: "); + summary.push_str(impact); + } + + summary +} + +fn format_recommendation(recommendation: &model::Recommendation) -> String { + let action = if recommendation.action.trim().is_empty() { + fallback_text(&recommendation.reason, "(unspecified action)") + } else { + recommendation.action.as_str() + }; + + let mut summary = String::from("- "); + if !recommendation.priority.trim().is_empty() { + summary.push('['); + summary.push_str(&recommendation.priority); + summary.push_str("] "); + } + summary.push_str(action); + + if !recommendation.reason.trim().is_empty() { + summary.push('\n'); + summary.push_str(" Reason: "); + summary.push_str(&recommendation.reason); + } + if let Some(example) = recommendation + .example + .as_deref() + .filter(|value| !value.is_empty()) + { + summary.push('\n'); + summary.push_str(" Example: "); + summary.push_str(example); + } + + summary +} + +fn positive_count_lines(map: &std::collections::BTreeMap) -> Vec { + let entries: Vec<_> = map.iter().filter(|(_, count)| **count > 0).collect(); + let width = entries + .iter() + .map(|(key, _)| key.chars().count() + 2) + .max() + .unwrap_or(0); + + entries + .into_iter() + .map(|(key, count)| { + let quoted = format!("\"{}\"", key); + let padding = " ".repeat(width.saturating_sub(quoted.chars().count()) + 1); + format!("- {}:{}{}", quoted, padding, format_number(*count)) + }) + .collect() +} + +fn format_detection_threats(threats: &model::DetectionThreats) -> String { + let mut detected = Vec::new(); + if threats.prompt_injection { + detected.push("prompt_injection"); + } + if threats.secret_leak { + detected.push("secret_leak"); + } + if threats.malicious_patch { + detected.push("malicious_patch"); + } + + if detected.is_empty() { + "none".to_string() + } else { + detected.join(", ") + } +} + +fn format_named_report( + primary: Option<&str>, + context: Option<&str>, + reason: Option<&str>, + timestamp: Option<&str>, + fallback: &str, +) -> String { + let mut line = format!( + "- {}", + primary + .filter(|value| !value.is_empty()) + .unwrap_or(fallback) + ); + if let Some(context) = context.filter(|value| !value.is_empty()) { + line.push_str(" ["); + line.push_str(context); + line.push(']'); + } + if let Some(reason) = reason.filter(|value| !value.is_empty()) { + line.push_str(": "); + line.push_str(reason); + } + if let Some(timestamp) = timestamp.filter(|value| !value.is_empty()) { + line.push('\n'); + line.push_str(" Timestamp: "); + line.push_str(timestamp); + } + line +} + +fn format_message_report(source: &str, message: &str, timestamp: Option<&str>) -> String { + let mut line = format!( + "- {}: {}", + fallback_text(source, "(unknown source)"), + fallback_text(message, "(no message)"), + ); + if let Some(timestamp) = timestamp.filter(|value| !value.is_empty()) { + line.push('\n'); + line.push_str(" Timestamp: "); + line.push_str(timestamp); + } + line +} + +fn format_created_item_id(kind: &str, id: &str) -> String { + match kind { + "pull_request" | "issue" => format!("#{id}"), + _ => id.to_string(), + } +} + +fn push_non_empty_row(rows: &mut Vec<(String, String)>, key: &str, value: &str) { + if !value.trim().is_empty() { + rows.push((key.to_string(), value.to_string())); + } +} + +fn push_opt_row(rows: &mut Vec<(String, String)>, key: &str, value: Option<&str>) { + if let Some(value) = value.filter(|value| !value.trim().is_empty()) { + rows.push((key.to_string(), value.to_string())); + } +} + +fn push_opt_owned_row(rows: &mut Vec<(String, String)>, key: &str, value: Option) { + if let Some(value) = value.filter(|value| !value.trim().is_empty()) { + rows.push((key.to_string(), value)); + } +} + +fn format_severity(severity: Severity) -> &'static str { + match severity { + Severity::Critical => "critical", + Severity::High => "high", + Severity::Medium => "medium", + Severity::Low => "low", + Severity::Info => "info", + } +} + +fn format_number(value: u64) -> String { + let digits = value.to_string(); + let mut out = String::with_capacity(digits.len() + digits.len() / 3); + + for (index, ch) in digits.chars().rev().enumerate() { + if index > 0 && index % 3 == 0 { + out.push(','); + } + out.push(ch); + } + + out.chars().rev().collect() +} + +fn format_cost(value: f64) -> String { + format!("${value:.2}") +} + +fn format_float(value: f64) -> String { + let formatted = format!("{value:.2}"); + formatted + .trim_end_matches('0') + .trim_end_matches('.') + .to_string() +} + +fn format_percentage(value: f64) -> String { + format!("{:.2}%", value * 100.0) +} + +fn format_bytes(value: u64) -> String { + format!("{} B", format_number(value)) +} + +fn format_total_duration_ms(value: u64) -> String { + if value % 1000 == 0 { + format_duration_seconds(value / 1000) + } else { + format!("{} ms", format_number(value)) + } +} + +fn normalize_duration(value: &str) -> String { + let trimmed = value.trim(); + if trimmed.is_empty() { + return String::new(); + } + + let mut minutes = None; + let mut seconds = None; + for part in trimmed.split_whitespace() { + if let Some(value) = part + .strip_suffix('m') + .and_then(|part| part.parse::().ok()) + { + minutes = Some(value); + continue; + } + if let Some(value) = part + .strip_suffix('s') + .and_then(|part| part.parse::().ok()) + { + seconds = Some(value); + } + } + + match (minutes, seconds) { + (Some(minutes), Some(seconds)) => format!("{minutes}m {seconds}s"), + (Some(minutes), None) => format!("{minutes}m 0s"), + (None, Some(seconds)) => format!("0m {seconds}s"), + _ => trimmed.to_string(), + } +} + +fn format_duration_seconds(value: u64) -> String { + let minutes = value / 60; + let seconds = value % 60; + format!("{minutes}m {seconds}s") +} + +fn fallback_text<'a>(value: &'a str, fallback: &'a str) -> &'a str { + if value.trim().is_empty() { + fallback + } else { + value + } +} + +#[cfg(test)] +mod tests { + use super::render_console; + use crate::audit::model::{ + AgenticAssessment, AuditData, AuditEngineConfig, AwInfo, BehaviorFingerprint, + CreatedItemReport, DetectionAnalysis, DetectionThreats, DomainStat, ErrorInfo, FileInfo, + Finding, FirewallAnalysis, JobData, MCPFailureReport, MCPServerHealth, MCPServerStats, + MCPToolSummary, MCPToolUsageData, MetricsData, MissingDataReport, MissingToolReport, + NoopReport, PerformanceMetrics, PolicyAnalysis, PolicyRule, Recommendation, + RejectedSafeOutputsRollup, SafeOutputExecution, SafeOutputExecutionItem, SafeOutputStatus, + SafeOutputSummary, Severity, TaskDomainInfo, ToolUsageInfo, + }; + use serde_json::json; + use std::collections::BTreeMap; + + #[test] + fn empty_audit_data_renders_only_overview_and_metrics() { + let out = render_console(&AuditData::default()); + let headings: Vec<_> = out.lines().filter(|line| line.starts_with("## ")).collect(); + + assert!(out.contains("## Overview")); + assert!(out.contains("## Metrics")); + assert_eq!(headings, vec!["## Overview", "## Metrics"]); + } + + #[test] + fn full_audit_data_renders_inline_snapshot() { + let audit = populated_audit_data(); + let expected = r#"## Overview +- build_id: 12,345 +- pipeline: my-pipeline +- status: completed +- result: succeeded +- branch: refs/heads/main +- commit: abcdef123456 +- duration: 3m 12s +- url: https://dev.azure.com/example/project/_build/results?buildId=12345 +- created_at: 2026-05-21T12:00:00Z +- started_at: 2026-05-21T12:01:00Z +- finished_at: 2026-05-21T12:04:12Z +- logs_path: logs\build-12345 +- engine: copilot +- model: claude-sonnet-4.5 +- agent: my-agent.md +- source: agents/my-agent.md +- target: standalone +- compiler_version: 0.30.2 +- engine_version: 2026.05 +- timeout_minutes: 30 + +## Metrics +- token_usage: 12,345 +- effective_tokens: 12,345 +- estimated_cost: $0.00 +- turns: 7 +- errors: 1 +- warnings: 1 +- tokens_per_minute: 64.3 +- cost_efficiency: good +- most_used_tool: edit +- network_requests: 42 + +## Key Findings +- [high] safe_outputs: Detection rejected 3 safe output(s) + Description: The threat-analysis verdict had prompt_injection set. + Impact: No items were created; the agent's work is not visible to downstream consumers. + +## Recommendations +- [high] Review the detection-stage verdict + Reason: The aggregate gate prevented execution. + Example: Inspect analyzed_outputs_12345\threat-analysis.json + +## Safe Output Summary +- proposed: 5 +- executed: 3 +- rejected_by_execution: 1 +- not_processed: 1 + +## Rejected Safe Outputs (4 total) +By reason: +- "evil prompt detected": 2 +- "permission denied": 1 +- "skipped": 1 +By threat: +- "prompt_injection": 2 + +## MCP Server Health +- github-mcp 8 calls, 1 errors (12.50%) [unreliable] + +## Firewall Analysis (total: 42 requests, allowed: 40, denied: 2) +- api.github.com [allowed] 35 requests + +## Policy Analysis (allow: 1, deny: 1) +- https://api.github.com/** [allow] 35 hits + +## Detection Analysis +- safe_to_process: false +- threats: prompt_injection +- reason: Suspicious instruction in fetched content +- verdict_path: analyzed_outputs_12345\threat-analysis.json + +## Jobs +- Agent [completed/succeeded] 2m 30s +- Detection [completed/succeeded] 0m 30s +- SafeOutputs [completed/succeeded] 0m 12s + +## Downloaded Files +- logs\build-12345\agent_outputs_12345\otel.jsonl 2,048 B sha256: abc123 + +## Missing Tools +- azure-devops [work-item-sync]: Tool not configured + Timestamp: 2026-05-21T12:03:00Z + +## Missing Data +- create_work_item [wi-1]: missing title + Timestamp: 2026-05-21T12:03:10Z + +## Noops +- noop [noop-1]: Nothing to do + Timestamp: 2026-05-21T12:03:20Z + +## MCP Failures +- github.search_code [call-17]: HTTP 502 + Timestamp: 2026-05-21T12:03:30Z + +## Errors +- audit::detection: Threat detection blocked execution + Timestamp: 2026-05-21T12:05:00Z + +## Warnings +- audit::firewall: One request was denied + Timestamp: 2026-05-21T12:04:00Z + +## Tool Usage +- edit 5 calls 0m 2s + +## MCP Tool Usage +- github.search_code 3 calls, 1 errors, max input 512 B, max output 4,096 B + +## Created Items (1) +- pull_request #42 "Fix bug" https://dev.azure.com/example/project/_git/repo/pullrequest/42 +"#; + + assert_eq!(render_console(&audit), expected); + } + + #[test] + fn rejected_rollup_renders_totals_and_grouping() { + let mut by_reason = BTreeMap::new(); + by_reason.insert("permission denied".to_string(), 1); + + let mut by_threat = BTreeMap::new(); + by_threat.insert("prompt_injection".to_string(), 2); + + let audit = AuditData { + rejected_safe_outputs: Some(RejectedSafeOutputsRollup { + total_rejected: 3, + by_reason, + by_threat, + }), + ..AuditData::default() + }; + + let out = render_console(&audit); + assert!(out.contains("## Rejected Safe Outputs (3 total)")); + assert!(out.contains("By reason:")); + assert!(out.contains("By threat:")); + } + + #[test] + fn headings_follow_documented_order() { + let out = render_console(&populated_audit_data()); + let headings = [ + "## Overview", + "## Metrics", + "## Key Findings", + "## Recommendations", + "## Safe Output Summary", + "## Rejected Safe Outputs (4 total)", + "## MCP Server Health", + "## Firewall Analysis (total: 42 requests, allowed: 40, denied: 2)", + "## Policy Analysis (allow: 1, deny: 1)", + "## Detection Analysis", + "## Jobs", + "## Downloaded Files", + "## Missing Tools", + "## Missing Data", + "## Noops", + "## MCP Failures", + "## Errors", + "## Warnings", + "## Tool Usage", + "## MCP Tool Usage", + "## Created Items (1)", + ]; + + let mut last_index = 0; + for heading in headings { + let index = out.find(heading).expect("expected heading to be present"); + assert!(index >= last_index, "heading {heading} was out of order"); + last_index = index; + } + } + + fn populated_audit_data() -> AuditData { + let mut by_reason = BTreeMap::new(); + by_reason.insert("evil prompt detected".to_string(), 2); + by_reason.insert("permission denied".to_string(), 1); + by_reason.insert("skipped".to_string(), 1); + + let mut by_threat = BTreeMap::new(); + by_threat.insert("prompt_injection".to_string(), 2); + by_threat.insert("secret_leak".to_string(), 0); + + AuditData { + overview: crate::audit::model::OverviewData { + build_id: 12_345, + pipeline_name: "my-pipeline".to_string(), + status: "completed".to_string(), + result: Some("succeeded".to_string()), + created_at: Some("2026-05-21T12:00:00Z".to_string()), + started_at: Some("2026-05-21T12:01:00Z".to_string()), + finished_at: Some("2026-05-21T12:04:12Z".to_string()), + duration: Some("3m 12s".to_string()), + source_branch: Some("refs/heads/main".to_string()), + source_version: Some("abcdef123456".to_string()), + url: Some( + "https://dev.azure.com/example/project/_build/results?buildId=12345" + .to_string(), + ), + logs_path: Some("logs\\build-12345".to_string()), + aw_info: Some(AwInfo { + engine: Some("copilot".to_string()), + model: Some("claude-sonnet-4.5".to_string()), + agent_name: Some("my-agent.md".to_string()), + source: Some("agents/my-agent.md".to_string()), + target: Some("standalone".to_string()), + compiler_version: Some("0.30.2".to_string()), + }), + }, + task_domain: Some(TaskDomainInfo { + summary: "security review workflow".to_string(), + data: json!({"domain": "security"}), + }), + behavior_fingerprint: Some(BehaviorFingerprint { + summary: "tool-heavy".to_string(), + data: json!({"pattern": "tool-heavy"}), + }), + agentic_assessments: vec![AgenticAssessment { + summary: "produced actionable changes".to_string(), + data: json!({"score": 0.92}), + }], + metrics: MetricsData { + token_usage: 12_345, + effective_tokens: 12_345, + estimated_cost: 0.0, + turns: 7, + error_count: 1, + warning_count: 1, + }, + key_findings: vec![Finding { + category: "safe_outputs".to_string(), + severity: Severity::High, + title: "Detection rejected 3 safe output(s)".to_string(), + description: "The threat-analysis verdict had prompt_injection set.".to_string(), + impact: Some( + "No items were created; the agent's work is not visible to downstream consumers." + .to_string(), + ), + }], + recommendations: vec![Recommendation { + priority: "high".to_string(), + action: "Review the detection-stage verdict".to_string(), + reason: "The aggregate gate prevented execution.".to_string(), + example: Some("Inspect analyzed_outputs_12345\\threat-analysis.json".to_string()), + }], + performance_metrics: Some(PerformanceMetrics { + tokens_per_minute: Some(64.3), + cost_efficiency: Some("good".to_string()), + most_used_tool: Some("edit".to_string()), + network_requests: Some(42), + }), + engine_config: Some(AuditEngineConfig { + engine: "copilot".to_string(), + model: Some("claude-sonnet-4.5".to_string()), + version: Some("2026.05".to_string()), + timeout_minutes: Some(30), + }), + safe_output_summary: Some(SafeOutputSummary { + proposed_count: 5, + executed_count: 3, + rejected_by_execution_count: 1, + not_processed_count: 1, + }), + safe_output_execution: Some(SafeOutputExecution { + items: vec![SafeOutputExecutionItem { + context: Some("pr-1".to_string()), + tool: "create_pull_request".to_string(), + status: SafeOutputStatus::NotProcessedDueToAggregateGate, + proposal: json!({"title": "Fix bug"}), + error: Some("Blocked by detection gate".to_string()), + result: Some(json!({"status": "blocked"})), + rejection_reason: Some("prompt_injection".to_string()), + applies_to_whole_batch: true, + }], + }), + rejected_safe_outputs: Some(RejectedSafeOutputsRollup { + total_rejected: 4, + by_reason, + by_threat, + }), + detection_analysis: Some(DetectionAnalysis { + threats: DetectionThreats { + prompt_injection: true, + secret_leak: false, + malicious_patch: false, + }, + reasons: vec!["Suspicious instruction in fetched content".to_string()], + safe_to_process: false, + verdict_path: Some("analyzed_outputs_12345\\threat-analysis.json".to_string()), + }), + mcp_server_health: Some(MCPServerHealth { + servers: vec![MCPServerStats { + name: "github-mcp".to_string(), + total_calls: 8, + error_count: 1, + error_rate: 0.125, + unreliable: true, + }], + }), + jobs: vec![ + JobData { + name: "Agent".to_string(), + status: "completed".to_string(), + result: Some("succeeded".to_string()), + duration: Some("2m 30s".to_string()), + started_at: Some("2026-05-21T12:01:00Z".to_string()), + finished_at: Some("2026-05-21T12:03:30Z".to_string()), + }, + JobData { + name: "Detection".to_string(), + status: "completed".to_string(), + result: Some("succeeded".to_string()), + duration: Some("30s".to_string()), + started_at: Some("2026-05-21T12:03:30Z".to_string()), + finished_at: Some("2026-05-21T12:04:00Z".to_string()), + }, + JobData { + name: "SafeOutputs".to_string(), + status: "completed".to_string(), + result: Some("succeeded".to_string()), + duration: Some("12s".to_string()), + started_at: Some("2026-05-21T12:04:00Z".to_string()), + finished_at: Some("2026-05-21T12:04:12Z".to_string()), + }, + ], + downloaded_files: vec![FileInfo { + path: "logs\\build-12345\\agent_outputs_12345\\otel.jsonl".to_string(), + size_bytes: 2_048, + sha256: Some("abc123".to_string()), + }], + missing_tools: vec![MissingToolReport { + tool: Some("azure-devops".to_string()), + context: Some("work-item-sync".to_string()), + reason: Some("Tool not configured".to_string()), + timestamp: Some("2026-05-21T12:03:00Z".to_string()), + extra: json!({"required": true}), + }], + missing_data: vec![MissingDataReport { + tool: Some("create_work_item".to_string()), + context: Some("wi-1".to_string()), + reason: Some("missing title".to_string()), + timestamp: Some("2026-05-21T12:03:10Z".to_string()), + extra: json!({"field": "title"}), + }], + noops: vec![NoopReport { + tool: Some("noop".to_string()), + context: Some("noop-1".to_string()), + reason: Some("Nothing to do".to_string()), + timestamp: Some("2026-05-21T12:03:20Z".to_string()), + extra: json!({"kind": "noop"}), + }], + mcp_failures: vec![MCPFailureReport { + tool: Some("github.search_code".to_string()), + context: Some("call-17".to_string()), + reason: Some("HTTP 502".to_string()), + timestamp: Some("2026-05-21T12:03:30Z".to_string()), + extra: json!({"retryable": true}), + }], + firewall_analysis: Some(FirewallAnalysis { + domains: vec![DomainStat { + domain: "api.github.com".to_string(), + status: "allowed".to_string(), + request_count: 35, + first_seen: Some("2026-05-21T12:01:10Z".to_string()), + last_seen: Some("2026-05-21T12:04:10Z".to_string()), + }], + total_requests: 42, + allowed_count: 40, + denied_count: 2, + }), + policy_analysis: Some(PolicyAnalysis { + policies: vec![PolicyRule { + pattern: "https://api.github.com/**".to_string(), + verdict: "allow".to_string(), + hit_count: 35, + }], + allow_count: 1, + deny_count: 1, + }), + errors: vec![ErrorInfo { + source: "audit::detection".to_string(), + message: "Threat detection blocked execution".to_string(), + timestamp: Some("2026-05-21T12:05:00Z".to_string()), + }], + warnings: vec![ErrorInfo { + source: "audit::firewall".to_string(), + message: "One request was denied".to_string(), + timestamp: Some("2026-05-21T12:04:00Z".to_string()), + }], + tool_usage: vec![ToolUsageInfo { + name: "edit".to_string(), + call_count: 5, + total_duration_ms: Some(2_000), + }], + mcp_tool_usage: Some(MCPToolUsageData { + tools: vec![MCPToolSummary { + name: "github.search_code".to_string(), + call_count: 3, + error_count: 1, + max_input_size: 512, + max_output_size: 4_096, + }], + }), + created_items: vec![CreatedItemReport { + kind: "pull_request".to_string(), + url: Some( + "https://dev.azure.com/example/project/_git/repo/pullrequest/42" + .to_string(), + ), + id: Some("42".to_string()), + title: Some("Fix bug".to_string()), + }], + } + } +} diff --git a/src/audit/render/json.rs b/src/audit/render/json.rs new file mode 100644 index 00000000..81281f98 --- /dev/null +++ b/src/audit/render/json.rs @@ -0,0 +1,300 @@ +use anyhow::Context; + +/// Render `AuditData` as pretty-printed JSON to a writer. +/// +/// Used when the CLI is invoked with `--json`. The JSON shape is the +/// public contract documented in `docs/audit.md` — top-level field +/// names are stable; nested fields may be extended but never removed +/// without deprecation. +pub fn render_json( + audit: &crate::audit::model::AuditData, + writer: &mut W, +) -> anyhow::Result<()> { + serde_json::to_writer_pretty(&mut *writer, audit) + .context("failed to serialize audit report as JSON")?; + std::io::Write::write_all(writer, b"\n") + .context("failed to write trailing newline for JSON audit report")?; + Ok(()) +} + +/// Convenience: render to a `String`. +pub fn render_json_to_string(audit: &crate::audit::model::AuditData) -> anyhow::Result { + let mut json = + serde_json::to_string_pretty(audit).context("failed to serialize audit report as JSON")?; + json.push('\n'); + Ok(json) +} + +#[cfg(test)] +mod tests { + use super::{render_json, render_json_to_string}; + use crate::audit::model::*; + use serde_json::{Value, json}; + use std::collections::BTreeMap; + + fn populated_audit_data() -> AuditData { + let mut by_reason = BTreeMap::new(); + by_reason.insert(String::from("aggregate_gate"), 1); + + let mut by_threat = BTreeMap::new(); + by_threat.insert(String::from("prompt_injection"), 1); + by_threat.insert(String::from("secret_leak"), 0); + by_threat.insert(String::from("malicious_patch"), 0); + + AuditData { + overview: OverviewData { + build_id: 42, + pipeline_name: String::from("agentic-pipeline"), + status: String::from("completed"), + result: Some(String::from("succeeded")), + created_at: Some(String::from("2026-05-21T12:00:00Z")), + started_at: Some(String::from("2026-05-21T12:01:00Z")), + finished_at: Some(String::from("2026-05-21T12:06:00Z")), + duration: Some(String::from("5m")), + source_branch: Some(String::from("refs/heads/main")), + source_version: Some(String::from("abcdef123456")), + url: Some(String::from( + "https://dev.azure.com/example/project/_build/results?buildId=42", + )), + logs_path: Some(String::from("logs\\build-42")), + aw_info: Some(AwInfo { + engine: Some(String::from("copilot")), + model: Some(String::from("gpt-5.4")), + agent_name: Some(String::from("agentic-auditor")), + source: Some(String::from("agents/security-scan.md")), + target: Some(String::from("standalone")), + compiler_version: Some(String::from("0.30.2")), + }), + }, + task_domain: Some(TaskDomainInfo { + summary: String::from("Security review workflow"), + data: json!({"domain": "security"}), + }), + behavior_fingerprint: Some(BehaviorFingerprint { + summary: String::from("High tool usage with safe outputs"), + data: json!({"pattern": "tool-heavy"}), + }), + agentic_assessments: vec![AgenticAssessment { + summary: String::from("Agent produced actionable changes"), + data: json!({"score": 0.92}), + }], + metrics: MetricsData { + token_usage: 1200, + effective_tokens: 1000, + estimated_cost: 1.23, + turns: 12, + error_count: 1, + warning_count: 2, + }, + key_findings: vec![Finding { + category: String::from("security"), + severity: Severity::High, + title: String::from("Detection gate tripped"), + description: String::from("Threat detection blocked the safe-output batch."), + impact: Some(String::from("No proposed changes were executed.")), + }], + recommendations: vec![Recommendation { + priority: String::from("high"), + action: String::from("Review the detection-stage verdict"), + reason: String::from("The aggregate gate prevented execution."), + example: Some(String::from( + "Inspect analyzed_outputs_42\\threat-analysis.json", + )), + }], + performance_metrics: Some(PerformanceMetrics { + tokens_per_minute: Some(240.0), + cost_efficiency: Some(String::from("moderate")), + most_used_tool: Some(String::from("edit")), + network_requests: Some(18), + }), + engine_config: Some(AuditEngineConfig { + engine: String::from("copilot"), + model: Some(String::from("gpt-5.4")), + version: Some(String::from("2026.05")), + timeout_minutes: Some(30), + }), + safe_output_summary: Some(SafeOutputSummary { + proposed_count: 2, + executed_count: 1, + rejected_by_execution_count: 0, + not_processed_count: 1, + }), + safe_output_execution: Some(SafeOutputExecution { + items: vec![SafeOutputExecutionItem { + context: Some(String::from("pr-1")), + tool: String::from("create_pull_request"), + status: SafeOutputStatus::NotProcessedDueToAggregateGate, + proposal: json!({"title": "Fix pipeline", "repository": "repo"}), + error: Some(String::from("Batch blocked by detection gate")), + result: Some(json!({"status": "blocked"})), + rejection_reason: Some(String::from("prompt_injection")), + applies_to_whole_batch: true, + }], + }), + rejected_safe_outputs: Some(RejectedSafeOutputsRollup { + total_rejected: 1, + by_reason, + by_threat, + }), + detection_analysis: Some(DetectionAnalysis { + threats: DetectionThreats { + prompt_injection: true, + secret_leak: false, + malicious_patch: false, + }, + reasons: vec![String::from("Suspicious instruction in fetched content")], + safe_to_process: false, + verdict_path: Some(String::from("analyzed_outputs_42\\threat-analysis.json")), + }), + mcp_server_health: Some(MCPServerHealth { + servers: vec![MCPServerStats { + name: String::from("github-mcp"), + total_calls: 8, + error_count: 1, + error_rate: 0.125, + unreliable: true, + }], + }), + jobs: vec![JobData { + name: String::from("Agent"), + status: String::from("completed"), + result: Some(String::from("succeeded")), + duration: Some(String::from("4m")), + started_at: Some(String::from("2026-05-21T12:01:00Z")), + finished_at: Some(String::from("2026-05-21T12:05:00Z")), + }], + downloaded_files: vec![FileInfo { + path: String::from("logs\\build-42\\agent_outputs_42\\otel.jsonl"), + size_bytes: 2048, + sha256: Some(String::from("abc123")), + }], + missing_tools: vec![MissingToolReport { + tool: Some(String::from("azure-devops")), + context: Some(String::from("work-item-sync")), + reason: Some(String::from("Tool not configured")), + timestamp: Some(String::from("2026-05-21T12:03:00Z")), + extra: json!({"required": true}), + }], + missing_data: vec![MissingDataReport { + tool: Some(String::from("create_work_item")), + context: Some(String::from("wi-1")), + reason: Some(String::from("missing title")), + timestamp: Some(String::from("2026-05-21T12:03:10Z")), + extra: json!({"field": "title"}), + }], + noops: vec![NoopReport { + tool: Some(String::from("noop")), + context: Some(String::from("noop-1")), + reason: Some(String::from("Nothing to do")), + timestamp: Some(String::from("2026-05-21T12:03:20Z")), + extra: json!({"kind": "noop"}), + }], + mcp_failures: vec![MCPFailureReport { + tool: Some(String::from("github.search_code")), + context: Some(String::from("call-17")), + reason: Some(String::from("HTTP 502")), + timestamp: Some(String::from("2026-05-21T12:03:30Z")), + extra: json!({"retryable": true}), + }], + firewall_analysis: Some(FirewallAnalysis { + domains: vec![DomainStat { + domain: String::from("api.github.com"), + status: String::from("allowed"), + request_count: 7, + first_seen: Some(String::from("2026-05-21T12:01:10Z")), + last_seen: Some(String::from("2026-05-21T12:04:55Z")), + }], + total_requests: 10, + allowed_count: 9, + denied_count: 1, + }), + policy_analysis: Some(PolicyAnalysis { + policies: vec![PolicyRule { + pattern: String::from("https://api.github.com/**"), + verdict: String::from("allow"), + hit_count: 7, + }], + allow_count: 1, + deny_count: 1, + }), + errors: vec![ErrorInfo { + source: String::from("audit::detection"), + message: String::from("Threat detection blocked execution"), + timestamp: Some(String::from("2026-05-21T12:05:00Z")), + }], + warnings: vec![ErrorInfo { + source: String::from("audit::firewall"), + message: String::from("One request was denied"), + timestamp: Some(String::from("2026-05-21T12:04:00Z")), + }], + tool_usage: vec![ToolUsageInfo { + name: String::from("edit"), + call_count: 5, + total_duration_ms: Some(1500), + }], + mcp_tool_usage: Some(MCPToolUsageData { + tools: vec![MCPToolSummary { + name: String::from("github.search_code"), + call_count: 3, + error_count: 1, + max_input_size: 512, + max_output_size: 4096, + }], + }), + created_items: vec![CreatedItemReport { + kind: String::from("pull_request"), + url: Some(String::from( + "https://dev.azure.com/example/project/_git/repo/pullrequest/123", + )), + id: Some(String::from("123")), + title: Some(String::from("Fix pipeline")), + }], + } + } + + #[test] + fn default_audit_data_round_trips_through_json() { + let original = AuditData::default(); + let mut rendered = Vec::new(); + + render_json(&original, &mut rendered).expect("render default audit data as JSON"); + + let round_tripped: AuditData = + serde_json::from_slice(&rendered).expect("deserialize default audit data"); + assert_eq!(round_tripped, original); + } + + #[test] + fn populated_audit_data_round_trips_through_json() { + let original = populated_audit_data(); + let rendered = render_json_to_string(&original).expect("render populated audit data"); + + let round_tripped: AuditData = + serde_json::from_str(&rendered).expect("deserialize populated audit data"); + assert_eq!(round_tripped, original); + } + + #[test] + fn render_json_to_string_appends_trailing_newline() { + let rendered = + render_json_to_string(&AuditData::default()).expect("render default audit data"); + + assert!(rendered.ends_with('\n')); + } + + #[test] + fn default_audit_data_emits_required_top_level_keys() { + let rendered = + render_json_to_string(&AuditData::default()).expect("render default audit data"); + let value: Value = serde_json::from_str(&rendered).expect("parse rendered JSON"); + let mut keys: Vec<_> = value + .as_object() + .expect("top-level JSON object") + .keys() + .cloned() + .collect(); + + keys.sort(); + assert_eq!(keys, vec!["downloaded_files", "metrics", "overview"]); + } +} diff --git a/src/audit/render/mod.rs b/src/audit/render/mod.rs new file mode 100644 index 00000000..26598a04 --- /dev/null +++ b/src/audit/render/mod.rs @@ -0,0 +1,4 @@ +//! Output renderers for `ado-aw audit`. + +pub mod console; +pub mod json; diff --git a/src/audit/url.rs b/src/audit/url.rs new file mode 100644 index 00000000..92c1fea7 --- /dev/null +++ b/src/audit/url.rs @@ -0,0 +1,308 @@ +#![allow(dead_code)] + +use anyhow::{Result, bail}; +use percent_encoding::percent_decode_str; +use url::Url; + +const ACCEPTED_FORMATS: &str = "Accepted formats:\n- Bare numeric ID: 1234567890\n- dev.azure.com URL: https://dev.azure.com/{org}/{project}/_build/results?buildId=N\n- Legacy visualstudio.com URL: https://{org}.visualstudio.com/{project}/_build/results?buildId=N\n- On-prem URL: https://{server}/{collection}/{project}/_build/results?buildId=N"; + +/// Parsed form of an Azure DevOps build identifier (bare ID or URL). +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct ParsedBuildRef { + /// The numeric build ID. + pub build_id: u64, + /// Organization name (e.g. "my-org") or full collection URL host segment. + /// `None` if the input was a bare ID and the caller must resolve it from + /// git remote / `--org`. + pub org: Option, + /// Project name (URL-decoded). `None` if the input was a bare ID. + pub project: Option, + /// Host of the ADO instance (e.g. "dev.azure.com" or an on-prem hostname). + /// `None` if the input was a bare ID. + pub host: Option, + /// Job timeline ID if the URL pinned a specific job (`&j=`). + /// MVP normalizes to the parent build but the value is preserved for + /// future job-mode audit. + pub job_id: Option, + /// Task/step timeline ID if the URL pinned a specific step (`&t=` + /// or `&s=`). + pub step_id: Option, +} + +/// Parse any accepted form of an Azure DevOps build identifier into a +/// `ParsedBuildRef`. +/// +/// Accepted shapes: +/// - Bare numeric ID: `1234567890` +/// - dev.azure.com URL: `https://dev.azure.com/{org}/{project}/_build/results?buildId=N` +/// with optional `&view=logs&j=&t=` anchors +/// - On-prem URL: `https://{server}/{collection}/{project}/_build/results?buildId=N` +/// with the same optional anchors +/// - URL-encoded project segments (e.g. `My%20Project`) are decoded +/// - Trailing slashes and case-insensitive `_build/results` match +/// +/// Returns a structured error when the input is malformed (with a hint +/// listing the accepted formats). +pub fn parse_build_ref(input: &str) -> Result { + let input = input.trim(); + if input.is_empty() { + return invalid_build_ref(input); + } + + if input.bytes().all(|byte| byte.is_ascii_digit()) { + return Ok(ParsedBuildRef { + build_id: input + .parse() + .map_err(|_| anyhow::anyhow!(invalid_build_ref_message(input)))?, + org: None, + project: None, + host: None, + job_id: None, + step_id: None, + }); + } + + let url = match Url::parse(input) { + Ok(url) => url, + Err(_) => return invalid_build_ref(input), + }; + + let host = match url.host_str() { + Some(host) => host.to_string(), + None => return invalid_build_ref(input), + }; + let path_segments: Vec<&str> = match url.path_segments() { + Some(segments) => segments.filter(|segment| !segment.is_empty()).collect(), + None => return invalid_build_ref(input), + }; + + let (org, project) = parse_location(&host, &path_segments, input)?; + let (build_id, job_id, step_id) = parse_query(&url, input)?; + + Ok(ParsedBuildRef { + build_id, + org: Some(org), + project: Some(project), + host: Some(host), + job_id, + step_id, + }) +} + +fn parse_location(host: &str, path_segments: &[&str], input: &str) -> Result<(String, String)> { + if host.eq_ignore_ascii_case("dev.azure.com") { + if matches_build_results(path_segments, 2) { + return Ok(( + path_segments[0].to_string(), + decode_path_segment(path_segments[1]), + )); + } + return invalid_build_ref(input); + } + + if host.to_ascii_lowercase().ends_with(".visualstudio.com") { + if matches_build_results(path_segments, 1) { + let org = host + .split('.') + .next() + .filter(|segment| !segment.is_empty()) + .map(str::to_string); + if let Some(org) = org { + return Ok((org, decode_path_segment(path_segments[0]))); + } + } + return invalid_build_ref(input); + } + + if matches_build_results(path_segments, 2) { + return Ok(( + path_segments[0].to_string(), + decode_path_segment(path_segments[1]), + )); + } + + invalid_build_ref(input) +} + +fn parse_query(url: &Url, input: &str) -> Result<(u64, Option, Option)> { + let mut build_id = None; + let mut job_id = None; + let mut step_id = None; + + for (key, value) in url.query_pairs() { + match key.as_ref() { + "buildId" if build_id.is_none() => { + build_id = Some( + value + .parse::() + .map_err(|_| anyhow::anyhow!(invalid_build_ref_message(input)))?, + ); + } + "j" if job_id.is_none() => job_id = Some(value.into_owned()), + "t" | "s" if step_id.is_none() => step_id = Some(value.into_owned()), + _ => {} + } + } + + match build_id { + Some(build_id) => Ok((build_id, job_id, step_id)), + None => invalid_build_ref(input), + } +} + +fn matches_build_results(path_segments: &[&str], prefix_len: usize) -> bool { + path_segments.len() == prefix_len + 2 + && path_segments[prefix_len].eq_ignore_ascii_case("_build") + && path_segments[prefix_len + 1].eq_ignore_ascii_case("results") +} + +fn decode_path_segment(segment: &str) -> String { + percent_decode_str(segment).decode_utf8_lossy().into_owned() +} + +fn invalid_build_ref(input: &str) -> Result { + bail!("{}", invalid_build_ref_message(input)); +} + +fn invalid_build_ref_message(input: &str) -> String { + format!( + "Malformed Azure DevOps build reference: {:?}\n{}", + input, ACCEPTED_FORMATS + ) +} + +#[cfg(test)] +mod tests { + use super::{ParsedBuildRef, parse_build_ref}; + + struct SuccessCase { + input: &'static str, + expected: ParsedBuildRef, + } + + struct ErrorCase { + input: &'static str, + } + + #[test] + fn parses_supported_build_references() { + let cases = vec![ + SuccessCase { + input: "1234567890", + expected: ParsedBuildRef { + build_id: 1_234_567_890, + org: None, + project: None, + host: None, + job_id: None, + step_id: None, + }, + }, + SuccessCase { + input: "https://dev.azure.com/my-org/My%20Project/_build/results?buildId=42", + expected: ParsedBuildRef { + build_id: 42, + org: Some("my-org".to_string()), + project: Some("My Project".to_string()), + host: Some("dev.azure.com".to_string()), + job_id: None, + step_id: None, + }, + }, + SuccessCase { + input: "https://dev.azure.com/org/proj/_build/results?buildId=99&view=logs&j=abc-123&t=def-456", + expected: ParsedBuildRef { + build_id: 99, + org: Some("org".to_string()), + project: Some("proj".to_string()), + host: Some("dev.azure.com".to_string()), + job_id: Some("abc-123".to_string()), + step_id: Some("def-456".to_string()), + }, + }, + SuccessCase { + input: "https://dev.azure.com/org/proj/_build/results?buildId=7&s=step-guid", + expected: ParsedBuildRef { + build_id: 7, + org: Some("org".to_string()), + project: Some("proj".to_string()), + host: Some("dev.azure.com".to_string()), + job_id: None, + step_id: Some("step-guid".to_string()), + }, + }, + SuccessCase { + input: "https://my-org.visualstudio.com/proj/_build/results?buildId=5", + expected: ParsedBuildRef { + build_id: 5, + org: Some("my-org".to_string()), + project: Some("proj".to_string()), + host: Some("my-org.visualstudio.com".to_string()), + job_id: None, + step_id: None, + }, + }, + SuccessCase { + input: "https://onprem.example.com/DefaultCollection/MyProject/_build/results?buildId=11", + expected: ParsedBuildRef { + build_id: 11, + org: Some("DefaultCollection".to_string()), + project: Some("MyProject".to_string()), + host: Some("onprem.example.com".to_string()), + job_id: None, + step_id: None, + }, + }, + SuccessCase { + input: "https://dev.azure.com/org/proj/_BUILD/RESULTS/?buildId=1", + expected: ParsedBuildRef { + build_id: 1, + org: Some("org".to_string()), + project: Some("proj".to_string()), + host: Some("dev.azure.com".to_string()), + job_id: None, + step_id: None, + }, + }, + ]; + + for case in cases { + match parse_build_ref(case.input) { + Ok(actual) => assert_eq!(actual, case.expected, "input: {:?}", case.input), + Err(err) => panic!("expected success for {:?}: {err}", case.input), + } + } + } + + #[test] + fn rejects_malformed_build_references() { + let cases = vec![ + ErrorCase { input: "" }, + ErrorCase { input: "abc" }, + ErrorCase { + input: "https://dev.azure.com/org/proj/_build/results", + }, + ErrorCase { + input: "https://dev.azure.com/org/proj/_build/results?buildId=notanum", + }, + ErrorCase { + input: "https://dev.azure.com/org/proj/_other/results?buildId=1", + }, + ]; + + for case in cases { + match parse_build_ref(case.input) { + Ok(parsed) => panic!("expected error for {:?}, got {:?}", case.input, parsed), + Err(err) => { + let message = err.to_string(); + assert!( + message.contains("Accepted formats:"), + "missing accepted formats hint for {:?}: {}", + case.input, + message + ); + } + } + } + } +} diff --git a/src/compile/extensions/ado_aw_marker.rs b/src/compile/extensions/ado_aw_marker.rs index 7ed7b247..9d1309ff 100644 --- a/src/compile/extensions/ado_aw_marker.rs +++ b/src/compile/extensions/ado_aw_marker.rs @@ -1,9 +1,9 @@ //! Always-on ado-aw marker extension. //! -//! Injects a single informational step into the prepare phase of the -//! Agent job of every compiled pipeline. The step's bash body carries a -//! machine-readable JSON metadata blob keyed by a `# ado-aw-metadata:` -//! prefix, plus a runtime `echo` for build-log visibility. +//! Injects two informational steps into the prepare phase of the +//! Agent job of every compiled pipeline. One step carries the existing +//! `# ado-aw-metadata:` discovery marker, and the other writes a +//! machine-readable `staging/aw_info.json` runtime artifact for audit. //! //! Why `prepare_steps` (Agent job) and not `setup_steps` (Setup job): //! a Setup-job injection would force every compiled pipeline to spin @@ -28,7 +28,7 @@ use super::{CompileContext, CompilerExtension, ExtensionPhase}; /// Always-on internal extension that embeds machine-readable /// `# ado-aw-metadata: {…}` JSON inside an injected Agent-job prepare -/// step. +/// step and writes a matching `staging/aw_info.json` artifact. /// /// The metadata is the canonical surface consumed by Preview-driven /// project-scope discovery in [`crate::ado`]. Discovery enumerates ADO @@ -50,54 +50,17 @@ impl CompilerExtension for AdoAwMarkerExtension { } fn prepare_steps(&self, ctx: &CompileContext) -> Vec { - // Inject the marker step into the Agent job's prepare phase + // Inject the marker steps into the Agent job's prepare phase // (NOT a separate Setup job). Setup-job injection would force // every compiled pipeline to spin up an extra agent pool job - // just to emit a metadata comment — wasteful for pipelines - // that have no other reason to need a Setup job. prepare_steps - // lands inside the always-present Agent job's - // `{{ prepare_steps }}` block, so it costs zero extra - // jobs/agents/pool time. - // - // In unit-test contexts that build a CompileContext without an - // input_path (e.g. CompileContext::for_test), skip the marker. - // Production paths always populate input_path via - // CompileContext::new. - let Some(input_path) = ctx.input_path else { + // just to emit metadata — wasteful for pipelines that have no + // other reason to need a Setup job. prepare_steps lands inside + // the always-present Agent job's `{{ prepare_steps }}` block, + // so it costs zero extra jobs/agents/pool time. + let Some(metadata) = CompileMetadata::from_ctx(ctx) else { return vec![]; }; - let source = super::super::common::normalize_source_path(input_path); - let version = env!("CARGO_PKG_VERSION"); - let target = ctx.front_matter.target.as_str(); - - // ADO origin of the source markdown — disambiguates the - // `source` field when two repos in the same project happen to - // have files of the same name (e.g. both define `agents/foo.md`). - // Lower-cased so case-insensitive ADO identifiers compare cleanly. - // Empty strings when no ADO context could be inferred — production - // runs always have one thanks to the non-GitHub-remote guard, but - // unit-test contexts via `CompileContext::for_test` will not. - let org = ctx - .ado_org() - .map(|s| s.to_ascii_lowercase()) - .unwrap_or_default(); - let repo = ctx - .ado_context - .as_ref() - .map(|c| c.repo_name.to_ascii_lowercase()) - .unwrap_or_default(); - - let metadata_json = serde_json::json!({ - "schema": 1, - "source": source, - "org": org, - "repo": repo, - "version": version, - "target": target, - }) - .to_string(); - // The `# ado-aw-metadata:` line is the parse target for // discovery. The `echo` makes the same information visible in // the build log at runtime, which is a free human-discoverability @@ -114,7 +77,7 @@ impl CompilerExtension for AdoAwMarkerExtension { // the echoed source path. Reusing the canonical helper keeps // this in sync with the rest of the sanitisation surfaces. // - // 2. `bash_single_quote_escape` applies the `'\''` idiom so a + // 2. `bash_single_quote_escape` applies the `\''` idiom so a // filename containing `'` (e.g. `agents/foo's.md`) doesn't // produce syntactically broken bash. `version` and `target` // are controlled inputs and can't contain either. @@ -122,26 +85,117 @@ impl CompilerExtension for AdoAwMarkerExtension { // `org` and `repo` are derived from ADO remote parsing, which // already restricts them to a safe character set, but we apply // the same defence-in-depth pattern for consistency. - let echo_source = - bash_single_quote_escape(&crate::sanitize::neutralize_pipeline_commands(&source)); - let echo_org = - bash_single_quote_escape(&crate::sanitize::neutralize_pipeline_commands(&org)); - let echo_repo = - bash_single_quote_escape(&crate::sanitize::neutralize_pipeline_commands(&repo)); - let step = format!( - "- bash: |\n \ - # ado-aw-metadata: {metadata}\n \ - echo 'ado-aw metadata: source={echo_source} org={echo_org} repo={echo_repo} version={version} target={target}'\n \ - displayName: \"ado-aw\"\n", - metadata = metadata_json, + let echo_source = bash_single_quote_escape(&crate::sanitize::neutralize_pipeline_commands( + &metadata.source, + )); + let echo_org = bash_single_quote_escape(&crate::sanitize::neutralize_pipeline_commands( + &metadata.org, + )); + let echo_repo = bash_single_quote_escape(&crate::sanitize::neutralize_pipeline_commands( + &metadata.repo, + )); + + let marker_step = format!( + r#"- bash: | + # ado-aw-metadata: {metadata_json} + echo 'ado-aw metadata: source={echo_source} org={echo_org} repo={echo_repo} version={version} target={target}' + displayName: "ado-aw" +"#, + metadata_json = metadata.marker_json(), echo_source = echo_source, echo_org = echo_org, echo_repo = echo_repo, - version = version, - target = target, + version = metadata.compiler_version.as_str(), + target = metadata.target.as_str(), ); - vec![step] + let aw_info_step = format!( + r#"- bash: | + set -eo pipefail + + mkdir -p "$(Agent.TempDirectory)/staging" + cat >"$(Agent.TempDirectory)/staging/aw_info.json" <<'AW_INFO_EOF' + {aw_info_json} + AW_INFO_EOF + displayName: "Emit aw_info.json" + condition: always() +"#, + aw_info_json = metadata.aw_info_json(), + ); + + vec![marker_step, aw_info_step] + } +} + +struct CompileMetadata { + source: String, + org: String, + repo: String, + compiler_version: String, + target: String, + engine: String, + model: String, + agent_name: String, +} + +impl CompileMetadata { + fn from_ctx(ctx: &CompileContext) -> Option { + let input_path = ctx.input_path?; + Some(Self { + source: super::super::common::normalize_source_path(input_path), + org: ctx + .ado_org() + .map(|s| s.to_ascii_lowercase()) + .unwrap_or_default(), + repo: ctx + .ado_context + .as_ref() + .map(|c| c.repo_name.to_ascii_lowercase()) + .unwrap_or_default(), + compiler_version: env!("CARGO_PKG_VERSION").to_string(), + target: ctx.front_matter.target.as_str().to_string(), + engine: ctx.front_matter.engine.engine_id().to_string(), + model: match ctx.engine { + crate::engine::Engine::Copilot => ctx + .front_matter + .engine + .model() + .unwrap_or(crate::engine::DEFAULT_COPILOT_MODEL) + .to_string(), + }, + agent_name: ctx.agent_name.to_string(), + }) + } + + fn marker_json(&self) -> String { + serde_json::to_string(&serde_json::json!({ + "schema": 1, + "source": &self.source, + "org": &self.org, + "repo": &self.repo, + "version": &self.compiler_version, + "target": &self.target, + })) + .unwrap() + } + + fn aw_info_json(&self) -> String { + serde_json::to_string(&serde_json::json!({ + "schema": "ado-aw/aw_info/1", + "source": &self.source, + "org": &self.org, + "repo": &self.repo, + "compiler_version": &self.compiler_version, + "target": &self.target, + "engine": &self.engine, + "model": &self.model, + "agent_name": &self.agent_name, + "build_id": "$(Build.BuildId)", + "source_version": "$(Build.SourceVersion)", + "source_branch": "$(Build.SourceBranch)", + "build_definition_id": "$(System.DefinitionId)", + })) + .unwrap() } } @@ -175,7 +229,7 @@ mod tests { } #[test] - fn emits_single_step_with_canonical_displayname() { + fn emits_marker_step_with_canonical_displayname() { // Production path: CompileContext::new populates input_path. // Simulate by hand for this unit test. let fm = parse_fm("name: t\ndescription: x\n"); @@ -189,7 +243,7 @@ mod tests { input_path: Some(input_path), }; let steps = AdoAwMarkerExtension.prepare_steps(&ctx); - assert_eq!(steps.len(), 1); + assert_eq!(steps.len(), 2); let step = &steps[0]; assert!( step.contains("displayName: \"ado-aw\""), @@ -222,6 +276,78 @@ mod tests { ); } + #[test] + fn emits_aw_info_step_with_expected_json_and_condition() { + let fm = parse_fm("name: t\ndescription: x\n"); + let input_path = Path::new("agents/foo.md"); + let ctx = CompileContext { + agent_name: &fm.name, + front_matter: &fm, + ado_context: None, + engine: crate::engine::Engine::Copilot, + compile_dir: None, + input_path: Some(input_path), + }; + let steps = AdoAwMarkerExtension.prepare_steps(&ctx); + assert_eq!(steps.len(), 2); + let step = &steps[1]; + assert!( + step.contains("displayName: \"Emit aw_info.json\""), + "step missing aw_info displayName:\n{step}" + ); + assert!( + step.contains("condition: always()"), + "step missing always() condition:\n{step}" + ); + assert!( + step.contains("cat >\"$(Agent.TempDirectory)/staging/aw_info.json\" <<'AW_INFO_EOF'"), + "step missing quoted heredoc write:\n{step}" + ); + assert!( + step.contains("\"schema\":\"ado-aw/aw_info/1\""), + "step missing aw_info schema:\n{step}" + ); + assert!( + step.contains("\"source\":\"agents/foo.md\""), + "step missing source field:\n{step}" + ); + assert!( + step.contains("\"target\":\"standalone\""), + "step missing target field:\n{step}" + ); + assert!( + step.contains("\"engine\":\"copilot\""), + "step missing engine field:\n{step}" + ); + assert!( + step.contains(&format!( + "\"model\":\"{}\"", + crate::engine::DEFAULT_COPILOT_MODEL + )), + "step missing default model field:\n{step}" + ); + assert!( + step.contains("\"agent_name\":\"t\""), + "step missing agent_name field:\n{step}" + ); + assert!( + step.contains("\"build_id\":\"$(Build.BuildId)\""), + "step missing build_id macro:\n{step}" + ); + assert!( + step.contains("\"source_version\":\"$(Build.SourceVersion)\""), + "step missing source_version macro:\n{step}" + ); + assert!( + step.contains("\"source_branch\":\"$(Build.SourceBranch)\""), + "step missing source_branch macro:\n{step}" + ); + assert!( + step.contains("\"build_definition_id\":\"$(System.DefinitionId)\""), + "step missing build_definition_id macro:\n{step}" + ); + } + #[test] fn org_and_repo_embed_from_ado_context_lowercased() { // When the compiler runs inside an ADO checkout (the production @@ -243,7 +369,7 @@ mod tests { input_path: Some(input_path), }; let steps = AdoAwMarkerExtension.prepare_steps(&ctx); - assert_eq!(steps.len(), 1); + assert_eq!(steps.len(), 2); let step = &steps[0]; // ADO identifiers are case-insensitive; lowercase to make // comparisons in discovery deterministic. @@ -282,7 +408,7 @@ mod tests { input_path: Some(input_path), }; let steps = AdoAwMarkerExtension.prepare_steps(&ctx); - assert_eq!(steps.len(), 1, "target={raw_target}"); + assert_eq!(steps.len(), 2, "target={raw_target}"); assert!( steps[0].contains(&format!("\"target\":\"{expected}\"")), "expected target={expected} in step (raw input {raw_target}):\n{}", @@ -316,7 +442,7 @@ mod tests { input_path: Some(input_path), }; let steps = AdoAwMarkerExtension.prepare_steps(&ctx); - assert_eq!(steps.len(), 1); + assert_eq!(steps.len(), 2); let step = &steps[0]; assert!( step.contains("echo 'ado-aw metadata: source=agents/foo'\\''s-agent.md "), @@ -353,7 +479,7 @@ mod tests { input_path: Some(input_path), }; let steps = AdoAwMarkerExtension.prepare_steps(&ctx); - assert_eq!(steps.len(), 1); + assert_eq!(steps.len(), 2); let step = &steps[0]; // Find the `echo` line specifically — the `# ado-aw-metadata` @@ -402,7 +528,7 @@ mod tests { input_path: Some(input_path), }; let steps = AdoAwMarkerExtension.prepare_steps(&ctx); - assert_eq!(steps.len(), 1); + assert_eq!(steps.len(), 2); // Parse the marker step back via the canonical discovery parser // and confirm the source field reconstructs to the original diff --git a/src/execute.rs b/src/execute.rs index 050a496b..5f0240a1 100644 --- a/src/execute.rs +++ b/src/execute.rs @@ -3,25 +3,27 @@ //! After the agent (Stage 1) generates safe outputs as an NDJSON file, //! Stage 3 parses this file and executes the corresponding actions. -use anyhow::Result; +use anyhow::{Context, Result}; +use chrono::{SecondsFormat, Utc}; use log::{debug, error, info, warn}; -use serde::de::DeserializeOwned; +use serde::{Serialize, de::DeserializeOwned}; use serde_json::Value; use std::collections::HashMap; use std::path::Path; +use tokio::fs::OpenOptions; +use tokio::io::AsyncWriteExt; -use crate::ndjson::{self, SAFE_OUTPUT_FILENAME}; -use crate::sanitize::neutralize_pipeline_commands; +use crate::ndjson::{self, EXECUTED_NDJSON_FILENAME, SAFE_OUTPUT_FILENAME}; use crate::safeoutputs::{ - AddBuildTagResult, AddPrCommentResult, CreateBranchResult, CreateGitTagResult, - CreateIssueResult, CreatePrResult, CreateWikiPageResult, CreateWorkItemResult, - CommentOnWorkItemResult, ExecutionContext, ExecutionResult, Executor, - LinkWorkItemsResult, MissingDataResult, MissingToolResult, NoopResult, - QueueBuildResult, ReplyToPrCommentResult, ReportIncompleteResult, - ResolvePrThreadResult, SubmitPrReviewResult, ToolResult, UpdatePrResult, - UpdateWikiPageResult, UpdateWorkItemResult, UploadBuildAttachmentResult, + AddBuildTagResult, AddPrCommentResult, CommentOnWorkItemResult, CreateBranchResult, + CreateGitTagResult, CreateIssueResult, CreatePrResult, CreateWikiPageResult, + CreateWorkItemResult, ExecutionContext, ExecutionResult, Executor, LinkWorkItemsResult, + MissingDataResult, MissingToolResult, NoopResult, QueueBuildResult, ReplyToPrCommentResult, + ReportIncompleteResult, ResolvePrThreadResult, SubmitPrReviewResult, ToolResult, + UpdatePrResult, UpdateWikiPageResult, UpdateWorkItemResult, UploadBuildAttachmentResult, UploadPipelineArtifactResult, UploadWorkitemAttachmentResult, }; +use crate::sanitize::neutralize_pipeline_commands; // Re-export memory types for use by main.rs pub use crate::tools::cache_memory::{MemoryConfig, process_agent_memory}; @@ -106,12 +108,29 @@ pub async fn execute_safe_outputs( let mut results = Vec::new(); for (i, entry) in entries.iter().enumerate() { let entry_json = serde_json::to_string(entry).unwrap_or_else(|_| "".to_string()); - debug!("[{}/{}] Executing entry: {}", i + 1, entries.len(), entry_json); + let proposal_context = entry.get("context").and_then(|value| value.as_str()); + let proposal_tool_name = entry + .get("name") + .and_then(|name| name.as_str()) + .unwrap_or("unknown"); + debug!( + "[{}/{}] Executing entry: {}", + i + 1, + entries.len(), + entry_json + ); // Generic budget enforcement: skip excess entries rather than aborting the whole batch. // Budget is consumed before execution so that failed attempts (target policy rejection, // network errors) still count — this prevents unbounded retries against a failing endpoint. if let Some(result) = enforce_budget(entry, &mut budgets, entries.len(), i) { + append_execution_record( + safe_output_dir, + proposal_tool_name, + &result, + proposal_context, + ) + .await; results.push(result); continue; } @@ -119,6 +138,8 @@ pub async fn execute_safe_outputs( match execute_safe_output(entry, ctx).await { Ok((tool_name, result)) => { log_and_print_entry_result(i, entries.len(), &tool_name, &result); + append_execution_record(safe_output_dir, &tool_name, &result, proposal_context) + .await; results.push(result); } Err(e) => { @@ -127,13 +148,23 @@ pub async fn execute_safe_outputs( let safe_msg = neutralize_pipeline_commands(&raw_msg); let result = ExecutionResult::failure(safe_msg); println!("[{}/{}] ✗ - {}", i + 1, entries.len(), result.message); + append_execution_record( + safe_output_dir, + proposal_tool_name, + &result, + proposal_context, + ) + .await; results.push(result); } } } // Log final summary - let success_count = results.iter().filter(|r| r.success && !r.is_warning()).count(); + let success_count = results + .iter() + .filter(|r| r.success && !r.is_warning()) + .count(); let warning_count = results.iter().filter(|r| r.is_warning()).count(); let failure_count = results.iter().filter(|r| !r.success).count(); info!( @@ -149,20 +180,37 @@ fn log_execution_context(safe_output_dir: &Path, ctx: &ExecutionContext) { info!("Stage 3 execution starting"); debug!("Safe output directory: {}", safe_output_dir.display()); debug!("Source directory: {}", ctx.source_directory.display()); - debug!("ADO org: {}", ctx.ado_org_url.as_deref().unwrap_or("")); - debug!("ADO project: {}", ctx.ado_project.as_deref().unwrap_or("")); - debug!("Repository ID: {}", ctx.repository_id.as_deref().unwrap_or("")); - debug!("Repository name: {}", ctx.repository_name.as_deref().unwrap_or("")); + debug!( + "ADO org: {}", + ctx.ado_org_url.as_deref().unwrap_or("") + ); + debug!( + "ADO project: {}", + ctx.ado_project.as_deref().unwrap_or("") + ); + debug!( + "Repository ID: {}", + ctx.repository_id.as_deref().unwrap_or("") + ); + debug!( + "Repository name: {}", + ctx.repository_name.as_deref().unwrap_or("") + ); debug!( "Build ID: {}", ctx.build_id .map(|id| id.to_string()) .unwrap_or_else(|| "".to_string()) ); - debug!("Build reason: {}", ctx.build_reason.as_deref().unwrap_or("")); + debug!( + "Build reason: {}", + ctx.build_reason.as_deref().unwrap_or("") + ); debug!( "Triggered by definition: {}", - ctx.triggered_by_definition_name.as_deref().unwrap_or("") + ctx.triggered_by_definition_name + .as_deref() + .unwrap_or("") ); if !ctx.allowed_repositories.is_empty() { debug!( @@ -209,15 +257,136 @@ fn enforce_budget( /// Log and print the outcome of a single safe-output execution. fn log_and_print_entry_result(i: usize, total: usize, tool_name: &str, result: &ExecutionResult) { if result.is_warning() { - warn!("[{}/{}] {} warning: {}", i + 1, total, tool_name, result.message); + warn!( + "[{}/{}] {} warning: {}", + i + 1, + total, + tool_name, + result.message + ); } else if result.success { - info!("[{}/{}] {} succeeded: {}", i + 1, total, tool_name, result.message); + info!( + "[{}/{}] {} succeeded: {}", + i + 1, + total, + tool_name, + result.message + ); } else { - warn!("[{}/{}] {} failed: {}", i + 1, total, tool_name, result.message); + warn!( + "[{}/{}] {} failed: {}", + i + 1, + total, + tool_name, + result.message + ); } - let symbol = if result.is_warning() { "⚠" } else if result.success { "✓" } else { "✗" }; + let symbol = if result.is_warning() { + "⚠" + } else if result.success { + "✓" + } else { + "✗" + }; let safe_msg = neutralize_pipeline_commands(&result.message); - println!("[{}/{}] {} - {} - {}", i + 1, total, tool_name, symbol, safe_msg); + println!( + "[{}/{}] {} - {} - {}", + i + 1, + total, + tool_name, + symbol, + safe_msg + ); +} + +#[derive(Serialize)] +struct ExecutionRecord { + name: String, + status: &'static str, + context: Option, + result: Option, + error: Option, + timestamp: String, +} + +fn is_budget_exhausted(result: &ExecutionResult) -> bool { + !result.success + && result.message.starts_with("Skipped") + && result.message.contains("maximum ") + && result.message.contains("already reached") +} + +fn execution_record_status(result: &ExecutionResult) -> &'static str { + if is_budget_exhausted(result) { + "budget_exhausted" + } else if result.is_warning() { + "skipped" + } else if result.success { + "succeeded" + } else { + "failed" + } +} + +async fn append_execution_record_impl( + safe_output_dir: &Path, + tool_name: &str, + result: &ExecutionResult, + proposal_context: Option<&str>, +) -> Result<()> { + let status = execution_record_status(result); + let record = ExecutionRecord { + name: tool_name.replace('-', "_"), + status, + context: proposal_context.map(str::to_owned), + result: if status == "succeeded" { + result.data.clone() + } else { + None + }, + error: if status == "succeeded" { + None + } else { + Some(result.message.clone()) + }, + timestamp: Utc::now().to_rfc3339_opts(SecondsFormat::Secs, true), + }; + let line = + serde_json::to_string(&record).context("Failed to serialize execution record")? + "\n"; + let path = safe_output_dir.join(EXECUTED_NDJSON_FILENAME); + let mut file = OpenOptions::new() + .append(true) + .create(true) + .open(&path) + .await + .with_context(|| format!("Failed to open executed NDJSON file: {}", path.display()))?; + file.write_all(line.as_bytes()) + .await + .with_context(|| format!("Failed to append executed NDJSON file: {}", path.display()))?; + file.flush() + .await + .with_context(|| format!("Failed to flush executed NDJSON file: {}", path.display()))?; + Ok(()) +} + +/// Append one execution record to `/safe-outputs-executed.ndjson`, +/// creating the file on first call. Errors are logged at WARN level and swallowed — +/// failing to append to the audit log must never break Stage 3 execution. +pub async fn append_execution_record( + safe_output_dir: &Path, + tool_name: &str, + result: &ExecutionResult, + proposal_context: Option<&str>, +) { + if let Err(err) = + append_execution_record_impl(safe_output_dir, tool_name, result, proposal_context).await + { + warn!( + "Failed to append execution record for {}: {}", + tool_name, + neutralize_pipeline_commands(&err.to_string()) + ); + } } /// Parse a JSON entry as `T` and run it through `execute_sanitized`. @@ -265,10 +434,12 @@ pub async fn execute_safe_output( // Dispatch based on tool name. All registered tools go through `dispatch_tool`, // which handles deserialization and sanitized execution uniformly. // The dispatch is split across category helpers to keep each function's complexity low. - let result = find_tool_executor(tool_name, entry, ctx).await?.ok_or_else(|| { - error!("Unknown tool type: {}", tool_name); - anyhow::anyhow!("Unknown tool type: {}. No executor registered.", tool_name) - })?; + let result = find_tool_executor(tool_name, entry, ctx) + .await? + .ok_or_else(|| { + error!("Unknown tool type: {}", tool_name); + anyhow::anyhow!("Unknown tool type: {}. No executor registered.", tool_name) + })?; Ok((tool_name.to_string(), result)) } @@ -397,7 +568,11 @@ fn extract_entry_context(entry: &Value) -> String { let clean: String = title.chars().filter(|c| !c.is_control()).collect(); let clean = neutralize_pipeline_commands(&clean); let truncated: &str = if clean.chars().count() > 40 { - &clean[..clean.char_indices().nth(40).map(|(i, _)| i).unwrap_or(clean.len())] + &clean[..clean + .char_indices() + .nth(40) + .map(|(i, _)| i) + .unwrap_or(clean.len())] } else { &clean }; @@ -598,7 +773,10 @@ mod tests { // noop always attempts to file a work item; without ADO credentials it // returns a warning (success=true) rather than failing hard. assert!(result.success); - assert!(result.is_warning(), "noop without credentials should be a warning"); + assert!( + result.is_warning(), + "noop without credentials should be a warning" + ); assert!( result.message.contains("not set"), "noop warning should mention missing config, got: {}", @@ -618,7 +796,10 @@ mod tests { // missing-tool always attempts to file a work item; without ADO credentials // it returns a warning (success=true) rather than failing hard. assert!(result.success); - assert!(result.is_warning(), "missing-tool without credentials should be a warning"); + assert!( + result.is_warning(), + "missing-tool without credentials should be a warning" + ); assert!( result.message.contains("not set"), "missing-tool warning should mention missing config, got: {}", @@ -652,6 +833,12 @@ mod tests { assert_eq!(results.len(), 2); assert!(results[0].success); assert!(results[1].success); + + let manifest = read_executed_manifest(&temp_dir).await; + assert_eq!(manifest.len(), 2); + assert_eq!(manifest[0]["status"], "skipped"); + assert_eq!(manifest[0]["context"], "test1"); + assert_eq!(manifest[1]["status"], "skipped"); } #[tokio::test] @@ -665,6 +852,82 @@ mod tests { assert!(results.is_empty()); } + async fn read_executed_manifest(temp_dir: &tempfile::TempDir) -> Vec { + ndjson::read_ndjson_file(&temp_dir.path().join(EXECUTED_NDJSON_FILENAME)) + .await + .unwrap() + } + + #[tokio::test] + async fn test_execute_safe_outputs_writes_success_manifest_records() { + let temp_dir = tempfile::tempdir().unwrap(); + let safe_output_path = temp_dir.path().join(SAFE_OUTPUT_FILENAME); + let ndjson = r#"{"name":"noop","context":"first noop"} +{"name":"noop","context":"second noop"} +"#; + tokio::fs::write(&safe_output_path, ndjson).await.unwrap(); + + let ctx = ExecutionContext { + dry_run: true, + ..Default::default() + }; + let results = execute_safe_outputs(temp_dir.path(), &ctx).await.unwrap(); + assert_eq!(results.len(), 2); + + let executed_path = temp_dir.path().join(EXECUTED_NDJSON_FILENAME); + assert!(executed_path.exists(), "executed manifest should exist"); + + let manifest = read_executed_manifest(&temp_dir).await; + assert_eq!(manifest.len(), 2); + assert_eq!(manifest[0]["name"], "noop"); + assert_eq!(manifest[0]["status"], "succeeded"); + assert_eq!(manifest[0]["context"], "first noop"); + assert!(manifest[0]["error"].is_null()); + assert_eq!(manifest[1]["name"], "noop"); + assert_eq!(manifest[1]["status"], "succeeded"); + assert_eq!(manifest[1]["context"], "second noop"); + assert!(manifest[1]["error"].is_null()); + } + + #[tokio::test] + async fn test_execute_safe_outputs_writes_mixed_success_failure_manifest_records() { + let temp_dir = tempfile::tempdir().unwrap(); + let safe_output_path = temp_dir.path().join(SAFE_OUTPUT_FILENAME); + let ndjson = r#"{"name":"noop","context":"ok"} +{"name":"unknown_tool","context":"bad"} +"#; + tokio::fs::write(&safe_output_path, ndjson).await.unwrap(); + + let ctx = ExecutionContext { + dry_run: true, + ..Default::default() + }; + let results = execute_safe_outputs(temp_dir.path(), &ctx).await.unwrap(); + assert_eq!(results.len(), 2); + + let manifest = read_executed_manifest(&temp_dir).await; + assert_eq!(manifest.len(), 2); + assert_eq!(manifest[0]["name"], "noop"); + assert_eq!(manifest[0]["status"], "succeeded"); + assert_eq!(manifest[1]["name"], "unknown_tool"); + assert_eq!(manifest[1]["status"], "failed"); + assert_eq!(manifest[1]["context"], "bad"); + assert!(manifest[1]["result"].is_null()); + assert!(manifest[1]["error"].is_string()); + } + + #[tokio::test] + async fn test_execute_safe_outputs_empty_input_does_not_create_manifest() { + let temp_dir = tempfile::tempdir().unwrap(); + let safe_output_path = temp_dir.path().join(SAFE_OUTPUT_FILENAME); + tokio::fs::write(&safe_output_path, "").await.unwrap(); + + let ctx = ExecutionContext::default(); + let results = execute_safe_outputs(temp_dir.path(), &ctx).await.unwrap(); + assert!(results.is_empty()); + assert!(!temp_dir.path().join(EXECUTED_NDJSON_FILENAME).exists()); + } + #[tokio::test] async fn test_execute_missing_data_succeeds() { let entry = serde_json::json!({"name": "missing-data", "data_type": "schema", "reason": "not available"}); @@ -986,7 +1249,12 @@ mod tests { .iter() .filter(|r| r.message.contains("maximum update-work-item count")) .collect(); - assert_eq!(skipped.len(), 2, "Expected 2 skipped entries, got: {:?}", skipped); + assert_eq!( + skipped.len(), + 2, + "Expected 2 skipped entries, got: {:?}", + skipped + ); // The noop still executes successfully let noop_result = &results[3]; @@ -1143,8 +1411,7 @@ mod tests { // Simulate an adversarial NDJSON entry where the agent injects a VSO pipeline command // into the 'name' field, trying to get it echoed to stdout by Stage 3. - let ndjson = - "{\"name\":\"##vso[task.setvariable variable=PAT;issecret=true]stolen\"}\n"; + let ndjson = "{\"name\":\"##vso[task.setvariable variable=PAT;issecret=true]stolen\"}\n"; tokio::fs::write(&safe_output_path, ndjson).await.unwrap(); let ctx = ExecutionContext::default(); @@ -1227,7 +1494,10 @@ mod tests { tokio::fs::write(&safe_output_path, ndjson).await.unwrap(); let mut tool_configs = HashMap::new(); - tool_configs.insert("create-work-item".to_string(), serde_json::json!({"max": 2})); + tool_configs.insert( + "create-work-item".to_string(), + serde_json::json!({"max": 2}), + ); let ctx = ExecutionContext { ado_org_url: Some("https://dev.azure.com/org".to_string()), @@ -1246,7 +1516,10 @@ mod tests { }; let results = execute_safe_outputs(temp_dir.path(), &ctx).await; - assert!(results.is_ok(), "Batch should not abort when max is exceeded"); + assert!( + results.is_ok(), + "Batch should not abort when max is exceeded" + ); let results = results.unwrap(); assert_eq!(results.len(), 4, "Expected 4 results"); @@ -1255,10 +1528,26 @@ mod tests { .iter() .filter(|r| r.message.contains("maximum create-work-item count")) .collect(); - assert_eq!(skipped.len(), 1, "Expected 1 skipped entry, got: {:?}", skipped); + assert_eq!( + skipped.len(), + 1, + "Expected 1 skipped entry, got: {:?}", + skipped + ); // noop still runs assert!(results[3].success, "noop should still succeed"); + + let manifest = read_executed_manifest(&temp_dir).await; + assert_eq!(manifest.len(), 4, "Expected 4 execution records"); + assert_eq!( + manifest + .iter() + .filter(|entry| entry["status"] == "budget_exhausted") + .count(), + 1, + "Expected 1 budget_exhausted record" + ); } #[tokio::test] @@ -1322,7 +1611,10 @@ mod tests { let ndjson = r#"{"name":"create-work-item","title":"Test work item title","description":"This is a test description that is long enough to pass validation checks"}"#; tokio::fs::write(&safe_output_path, ndjson).await.unwrap(); - let ctx = ExecutionContext { dry_run: true, ..Default::default() }; + let ctx = ExecutionContext { + dry_run: true, + ..Default::default() + }; let results = execute_safe_outputs(temp_dir.path(), &ctx).await.unwrap(); assert_eq!(results.len(), 1); @@ -1351,7 +1643,10 @@ mod tests { .join("\n"); tokio::fs::write(&safe_output_path, ndjson).await.unwrap(); - let ctx = ExecutionContext { dry_run: true, ..Default::default() }; + let ctx = ExecutionContext { + dry_run: true, + ..Default::default() + }; let results = execute_safe_outputs(temp_dir.path(), &ctx).await.unwrap(); assert_eq!(results.len(), 2); @@ -1394,7 +1689,10 @@ mod tests { }; let result = execute_safe_output(&entry, &ctx).await; - assert!(result.is_err(), "should fail without ADO config when not in dry-run mode"); + assert!( + result.is_err(), + "should fail without ADO config when not in dry-run mode" + ); } #[tokio::test] @@ -1441,13 +1739,19 @@ mod tests { "reason": "Could not find the required data to complete the analysis" }); - let ctx = ExecutionContext { dry_run: true, ..Default::default() }; + let ctx = ExecutionContext { + dry_run: true, + ..Default::default() + }; let result = execute_safe_output(&entry, &ctx).await; assert!(result.is_ok(), "dispatch should succeed"); let (tool_name, exec_result) = result.unwrap(); assert_eq!(tool_name, "report-incomplete"); - assert!(!exec_result.success, "report-incomplete should still be a failure in dry-run mode"); + assert!( + !exec_result.success, + "report-incomplete should still be a failure in dry-run mode" + ); assert!( exec_result.message.contains("incomplete"), "message should mention incomplete, got: {}", diff --git a/src/main.rs b/src/main.rs index 7a4ce2dc..c9c8ee3c 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,4 +1,5 @@ mod agent_stats; +mod audit; mod allowed_hosts; pub mod ado; mod compile; @@ -421,6 +422,33 @@ enum Commands { #[arg(long)] dry_run: bool, }, + /// Audit a single Azure DevOps build: download artifacts, analyze + /// logs, render a structured report. + Audit { + /// Build ID, or full ADO build URL. + build_id_or_url: String, + /// Output directory for downloaded artifacts and reports. + /// Default: ./logs (matches gh-aw operator muscle memory). + #[arg(short, long, default_value = "./logs")] + output: PathBuf, + /// Emit the report as JSON to stdout instead of console text. + #[arg(long)] + json: bool, + /// ADO context overrides (auto-detected from git remote if omitted). + #[arg(long)] + org: Option, + #[arg(long)] + project: Option, + #[arg(long, env = "AZURE_DEVOPS_EXT_PAT")] + pat: Option, + /// Artifact sets to download. Default: all three. + /// Valid values: agent, detection, safe-outputs. + #[arg(long, value_delimiter = ',')] + artifacts: Option>, + /// Force re-processing even if a cached run-summary.json exists. + #[arg(long)] + no_cache: bool, + }, /// Export the gate spec JSON Schema (build-time tool for the /// scripts/ado-script TypeScript workspace). #[command(hide = true)] @@ -793,6 +821,7 @@ async fn main() -> Result<()> { Some(Commands::List { .. }) => "list", Some(Commands::Status { .. }) => "status", Some(Commands::Run { .. }) => "run", + Some(Commands::Audit { .. }) => "audit", Some(Commands::ExportGateSchema { .. }) => "export-gate-schema", None => "ado-aw", }; @@ -1129,6 +1158,28 @@ async fn main() -> Result<()> { }) .await?; } + Commands::Audit { + build_id_or_url, + output, + json, + org, + project, + pat, + artifacts, + no_cache, + } => { + audit::dispatch(audit::AuditOptions { + build_id_or_url: &build_id_or_url, + output: &output, + json, + org: org.as_deref(), + project: project.as_deref(), + pat: pat.as_deref(), + artifacts: artifacts.as_deref(), + no_cache, + }) + .await?; + } Commands::ExportGateSchema { output } => { let schema = compile::filter_ir::generate_gate_spec_schema(); match output { diff --git a/src/ndjson.rs b/src/ndjson.rs index 3735641e..ef689074 100644 --- a/src/ndjson.rs +++ b/src/ndjson.rs @@ -12,6 +12,9 @@ use crate::safeoutputs::ToolResult; /// The standard filename for safe outputs pub const SAFE_OUTPUT_FILENAME: &str = "safe_outputs.ndjson"; +/// The standard filename for executed safe-output audit records +pub const EXECUTED_NDJSON_FILENAME: &str = "safe-outputs-executed.ndjson"; + /// Parse NDJSON content into a vector of JSON values pub fn parse_ndjson(content: &str) -> Result> { if content.trim().is_empty() { diff --git a/tests/audit_it.rs b/tests/audit_it.rs new file mode 100644 index 00000000..32faca25 --- /dev/null +++ b/tests/audit_it.rs @@ -0,0 +1,279 @@ +//! End-to-end integration tests for `ado-aw audit` against a fake ADO server. + +use std::path::{Path, PathBuf}; + +use serde::Deserialize; +use serde_json::json; +use tempfile::TempDir; +use tokio::fs; +use tokio::process::Command; +use wiremock::matchers::{method, path, query_param}; +use wiremock::{Mock, MockServer, ResponseTemplate}; + +fn binary() -> PathBuf { + PathBuf::from(env!("CARGO_BIN_EXE_ado-aw")) +} + +#[derive(Debug, Deserialize)] +struct CachedRunSummary { + ado_aw_version: String, + build_id: u64, + audit_data: CachedAuditData, +} + +#[derive(Debug, Deserialize)] +struct CachedAuditData { + overview: CachedOverviewData, + #[serde(default)] + jobs: Vec, +} + +#[derive(Debug, Deserialize)] +struct CachedOverviewData { + build_id: u64, + pipeline_name: String, +} + +#[derive(Debug, Deserialize)] +struct CachedJobData { + name: String, +} + +fn run_summary_path(output_dir: &Path, build_id: u64) -> PathBuf { + output_dir + .join(format!("build-{build_id}")) + .join("run-summary.json") +} + +async fn read_run_summary(path: &Path) -> CachedRunSummary { + let bytes = fs::read(path) + .await + .unwrap_or_else(|e| panic!("read run summary {}: {e}", path.display())); + serde_json::from_slice(&bytes) + .unwrap_or_else(|e| panic!("parse run summary {}: {e}", path.display())) +} + +async fn run_audit( + workspace: &Path, + output_dir: &Path, + build_id_or_url: &str, + server: Option<&MockServer>, +) -> std::process::Output { + let mut command = Command::new(binary()); + command + .current_dir(workspace) + .env("CI", "1") + .args([ + "audit", + build_id_or_url, + "--output", + output_dir.to_str().expect("output path should be valid UTF-8"), + "--org", + "test-org", + "--project", + "test-project", + "--pat", + "test-pat", + ]); + + if let Some(server) = server { + command.env("ADO_AW_TEST_ORG_URL", server.uri()); + } + + command.output().await.expect("run ado-aw audit") +} + +#[tokio::test] +async fn audit_happy_path_against_fake_ado() { + let server = MockServer::start().await; + + Mock::given(method("GET")) + .and(path("/test-project/_apis/build/builds/12345")) + .and(query_param("api-version", "7.1")) + .respond_with(ResponseTemplate::new(200).set_body_json(json!({ + "id": 12345, + "status": "completed", + "result": "succeeded", + "definition": { "name": "mocked-pipeline" }, + "sourceBranch": "refs/heads/main", + "sourceVersion": "deadbeef1234", + "queueTime": "2026-05-21T12:00:00Z", + "startTime": "2026-05-21T12:00:30Z", + "finishTime": "2026-05-21T12:05:30Z" + }))) + .mount(&server) + .await; + + Mock::given(method("GET")) + .and(path("/test-project/_apis/build/builds/12345/artifacts")) + .and(query_param("api-version", "7.1")) + .respond_with(ResponseTemplate::new(200).set_body_json(json!({ + "value": [] + }))) + .mount(&server) + .await; + + Mock::given(method("GET")) + .and(path("/test-project/_apis/build/builds/12345/timeline")) + .and(query_param("api-version", "7.1")) + .respond_with(ResponseTemplate::new(200).set_body_json(json!({ + "records": [ + { + "id": "job-agent", + "type": "Job", + "name": "Agent", + "state": "completed", + "result": "succeeded", + "startTime": "2026-05-21T12:00:30Z", + "finishTime": "2026-05-21T12:03:00Z" + }, + { + "id": "job-detection", + "type": "Job", + "name": "Detection", + "state": "completed", + "result": "succeeded", + "startTime": "2026-05-21T12:03:01Z", + "finishTime": "2026-05-21T12:04:00Z" + }, + { + "id": "job-safe-outputs", + "type": "Job", + "name": "SafeOutputs", + "state": "completed", + "result": "succeeded", + "startTime": "2026-05-21T12:04:01Z", + "finishTime": "2026-05-21T12:05:00Z" + } + ] + }))) + .mount(&server) + .await; + + let workspace = TempDir::new().expect("create workspace temp dir"); + let output_dir = TempDir::new().expect("create output temp dir"); + + let output = run_audit(workspace.path(), output_dir.path(), "12345", Some(&server)).await; + + assert!( + output.status.success(), + "audit should succeed: stdout={} stderr={}", + String::from_utf8_lossy(&output.stdout), + String::from_utf8_lossy(&output.stderr) + ); + + let summary_path = run_summary_path(output_dir.path(), 12345); + assert!( + summary_path.exists(), + "expected run summary at {}", + summary_path.display() + ); + + let summary = read_run_summary(&summary_path).await; + assert_eq!(summary.build_id, 12345); + assert_eq!(summary.audit_data.overview.build_id, 12345); + assert_eq!(summary.audit_data.overview.pipeline_name, "mocked-pipeline"); + assert_eq!(summary.audit_data.jobs.len(), 3); + assert_eq!( + summary + .audit_data + .jobs + .iter() + .map(|job| job.name.as_str()) + .collect::>(), + vec!["Agent", "Detection", "SafeOutputs"] + ); +} + +#[tokio::test] +async fn audit_permission_denied_returns_structured_error() { + let server = MockServer::start().await; + + Mock::given(method("GET")) + .and(path("/test-project/_apis/build/builds/12345")) + .and(query_param("api-version", "7.1")) + .respond_with(ResponseTemplate::new(403).set_body_json(json!({ + "message": "TF400813: Permission denied" + }))) + .mount(&server) + .await; + + let workspace = TempDir::new().expect("create workspace temp dir"); + let output_dir = TempDir::new().expect("create output temp dir"); + + let output = run_audit(workspace.path(), output_dir.path(), "12345", Some(&server)).await; + + assert!( + !output.status.success(), + "audit should fail on permission denied: stdout={} stderr={}", + String::from_utf8_lossy(&output.stdout), + String::from_utf8_lossy(&output.stderr) + ); + + let stderr = String::from_utf8_lossy(&output.stderr); + assert!( + stderr.contains("TF400813: Permission denied") || stderr.contains("403"), + "expected permission-denied error details, got:\n{stderr}" + ); + + let summary_path = run_summary_path(output_dir.path(), 12345); + assert!( + !summary_path.exists(), + "run summary should not be created on build metadata failure" + ); +} + +#[tokio::test] +async fn audit_uses_cached_run_summary_when_present() { + let server = MockServer::start().await; + let workspace = TempDir::new().expect("create workspace temp dir"); + let output_dir = TempDir::new().expect("create output temp dir"); + let summary_path = run_summary_path(output_dir.path(), 12345); + + fs::create_dir_all(summary_path.parent().expect("run summary should have a parent")) + .await + .expect("create cached summary directory"); + fs::write( + &summary_path, + serde_json::to_vec_pretty(&json!({ + "ado_aw_version": env!("CARGO_PKG_VERSION"), + "build_id": 12345, + "processed_at": "2026-05-21T12:00:00Z", + "audit_data": { + "overview": { + "build_id": 12345, + "pipeline_name": "cached-pipeline" + }, + "jobs": [ + { "name": "CachedJob" } + ] + } + })) + .expect("serialize cached summary"), + ) + .await + .expect("write cached summary"); + + let output = run_audit(workspace.path(), output_dir.path(), "12345", Some(&server)).await; + + assert!( + output.status.success(), + "audit should succeed from cache: stdout={} stderr={}", + String::from_utf8_lossy(&output.stdout), + String::from_utf8_lossy(&output.stderr) + ); + + let requests = server + .received_requests() + .await + .expect("wiremock request history should be available"); + assert!( + requests.is_empty(), + "cache hit should avoid all HTTP requests, saw {}", + requests.len() + ); + + let summary = read_run_summary(&summary_path).await; + assert_eq!(summary.ado_aw_version, env!("CARGO_PKG_VERSION")); + assert_eq!(summary.audit_data.overview.pipeline_name, "cached-pipeline"); +} diff --git a/tests/bash_lint_tests.rs b/tests/bash_lint_tests.rs index 670136e0..15cae88f 100644 --- a/tests/bash_lint_tests.rs +++ b/tests/bash_lint_tests.rs @@ -97,18 +97,19 @@ const REQUIRED_STEP_DISPLAY_NAMES: &[&str] = &[ "Evaluate threat analysis", "Execute safe outputs (Stage 3)", // Rust generators - "Install Lean 4 (elan)", // src/runtimes/lean/mod.rs - "Append Lean 4 prompt", // src/runtimes/lean/extension.rs - "Ensure .npmrc exists", // src/runtimes/node/mod.rs - "Ensure nuget.config exists", // src/runtimes/dotnet/mod.rs - "Restore previous agent memory", // src/tools/cache_memory/extension.rs + "Install Lean 4 (elan)", // src/runtimes/lean/mod.rs + "Append Lean 4 prompt", // src/runtimes/lean/extension.rs + "Ensure .npmrc exists", // src/runtimes/node/mod.rs + "Ensure nuget.config exists", // src/runtimes/dotnet/mod.rs + "Restore previous agent memory", // src/tools/cache_memory/extension.rs "Initialize empty agent memory (clearMemory=true)", - "Append Python prompt", // src/runtimes/python/extension.rs - "Generate GITHUB_PATH file", // src/compile/common.rs (AWF path step) - "Evaluate pipeline filters", // src/compile/extensions/ado_script.rs + src/compile/filter_ir.rs - "Evaluate PR filters", // src/compile/filter_ir.rs (GateContext::PullRequest) - "Verify MCP backends", // src/compile/common.rs (--debug-pipeline only) - "Verify pipeline integrity", // src/compile/common.rs generate_integrity_check + "Append Python prompt", // src/runtimes/python/extension.rs + "Generate GITHUB_PATH file", // src/compile/common.rs (AWF path step) + "Emit aw_info.json", // src/compile/extensions/ado_aw_marker.rs + "Evaluate pipeline filters", // src/compile/extensions/ado_script.rs + src/compile/filter_ir.rs + "Evaluate PR filters", // src/compile/filter_ir.rs (GateContext::PullRequest) + "Verify MCP backends", // src/compile/common.rs (--debug-pipeline only) + "Verify pipeline integrity", // src/compile/common.rs generate_integrity_check "Resolve runtime imports (agent prompt)", // src/compile/extensions/ado_script.rs resolver_step() ]; @@ -150,11 +151,14 @@ fn fresh_workspace() -> TempDir { /// compiler stdout. The `extra_flags` slice is appended after the file path, /// allowing callers to enable modes like `--debug-pipeline` that cannot be /// expressed in front matter. -fn compile_fixture_with_flags(workspace: &Path, fixture: &str, extra_flags: &[&str]) -> (PathBuf, String) { +fn compile_fixture_with_flags( + workspace: &Path, + fixture: &str, + extra_flags: &[&str], +) -> (PathBuf, String) { let src = fixtures_dir().join(fixture); let dest = workspace.join(fixture); - std::fs::copy(&src, &dest) - .unwrap_or_else(|e| panic!("copy fixture {fixture}: {e}")); + std::fs::copy(&src, &dest).unwrap_or_else(|e| panic!("copy fixture {fixture}: {e}")); let mut args = vec!["compile", dest.to_str().unwrap()]; args.extend_from_slice(extra_flags); @@ -184,9 +188,7 @@ fn compile_fixture_with_flags(workspace: &Path, fixture: &str, extra_flags: &[&s } else if stdout.contains("Generated stage template:") { "stage" } else { - panic!( - "could not determine compile target for {fixture} from stdout:\n{stdout}" - ) + panic!("could not determine compile target for {fixture} from stdout:\n{stdout}") }; let lock = dest.with_extension("lock.yml"); @@ -200,7 +202,6 @@ fn compile_fixture(workspace: &Path, fixture: &str) -> (PathBuf, String) { compile_fixture_with_flags(workspace, fixture, &[]) } - struct BashBody { display_name: String, body: String, @@ -333,8 +334,7 @@ fn compiled_bash_bodies_pass_shellcheck() { let workspace = fresh_workspace(); let mut report: BTreeMap> = BTreeMap::new(); let mut all_display_names: Vec = Vec::new(); - let mut targets_seen: std::collections::BTreeSet = - std::collections::BTreeSet::new(); + let mut targets_seen: std::collections::BTreeSet = std::collections::BTreeSet::new(); for fixture in FIXTURES { let (lock, target) = compile_fixture(workspace.path(), fixture); diff --git a/tests/compiler_tests.rs b/tests/compiler_tests.rs index 2ecae736..1a976f5b 100644 --- a/tests/compiler_tests.rs +++ b/tests/compiler_tests.rs @@ -3746,6 +3746,50 @@ fn assert_marker_step_present( ); } +fn assert_aw_info_step_present( + compiled: &str, + expected_source_suffix: &str, + expected_target: &str, + expected_agent_name: &str, + fixture_name: &str, +) { + assert!( + compiled.contains("displayName: \"Emit aw_info.json\""), + "{fixture_name}: compiled YAML missing Emit aw_info.json step" + ); + assert!( + compiled.contains("condition: always()"), + "{fixture_name}: compiled YAML missing always() condition on aw_info step" + ); + assert!( + compiled.contains("cat >\"$(Agent.TempDirectory)/staging/aw_info.json\" <<'AW_INFO_EOF'"), + "{fixture_name}: compiled YAML missing quoted heredoc aw_info write step" + ); + // Softer suffix check on the source path: fixtures compile under + // a temp-dir prefix, so we can only assert the path ends with the + // expected suffix, not an exact match. Mirrors `assert_marker_step_present`. + assert!( + compiled.contains("\"source\":\"") && compiled.contains(expected_source_suffix), + "{fixture_name}: compiled YAML aw_info source does not include suffix {expected_source_suffix}" + ); + for expected_fragment in [ + "\"schema\":\"ado-aw/aw_info/1\"".to_string(), + format!("\"target\":\"{expected_target}\""), + "\"engine\":\"copilot\"".to_string(), + "\"model\":\"claude-opus-4.7\"".to_string(), + format!("\"agent_name\":\"{expected_agent_name}\""), + "\"build_id\":\"$(Build.BuildId)\"".to_string(), + "\"source_version\":\"$(Build.SourceVersion)\"".to_string(), + "\"source_branch\":\"$(Build.SourceBranch)\"".to_string(), + "\"build_definition_id\":\"$(System.DefinitionId)\"".to_string(), + ] { + assert!( + compiled.contains(&expected_fragment), + "{fixture_name}: compiled YAML missing aw_info fragment {expected_fragment}" + ); + } +} + fn compile_fixture_with_inlined_imports(fixture_name: &str) -> String { compile_fixture_tree_with_flags(fixture_name, &[], &[], |contents| { // If the fixture already declares `inlined-imports:` (either @@ -3842,25 +3886,58 @@ fn assert_runtime_imports_author_marker_output(fixture_name: &str) { #[test] fn test_marker_step_present_in_standalone_target() { let compiled = compile_fixture("minimal-agent.md"); - assert_marker_step_present(&compiled, "minimal-agent.md", "standalone", "minimal-agent.md"); + assert_marker_step_present( + &compiled, + "minimal-agent.md", + "standalone", + "minimal-agent.md", + ); + assert_aw_info_step_present( + &compiled, + "minimal-agent.md", + "standalone", + "Minimal Test Agent", + "minimal-agent.md", + ); } #[test] fn test_marker_step_present_in_1es_target() { let compiled = compile_fixture("1es-test-agent.md"); assert_marker_step_present(&compiled, "1es-test-agent.md", "1es", "1es-test-agent.md"); + assert_aw_info_step_present( + &compiled, + "1es-test-agent.md", + "1es", + "1ES Test Agent", + "1es-test-agent.md", + ); } #[test] fn test_marker_step_present_in_job_target() { let compiled = compile_fixture("job-agent.md"); assert_marker_step_present(&compiled, "job-agent.md", "job", "job-agent.md"); + assert_aw_info_step_present( + &compiled, + "job-agent.md", + "job", + "Job Test Agent", + "job-agent.md", + ); } #[test] fn test_marker_step_present_in_stage_target() { let compiled = compile_fixture("stage-agent.md"); assert_marker_step_present(&compiled, "stage-agent.md", "stage", "stage-agent.md"); + assert_aw_info_step_present( + &compiled, + "stage-agent.md", + "stage", + "Stage Test Agent", + "stage-agent.md", + ); } /// Regression: the always-on `ado-aw-marker` extension used to inject From 42e5af1c469f19a866e6bbb0f1015dcdb921ded7 Mon Sep 17 00:00:00 2001 From: James Devine Date: Thu, 21 May 2026 17:49:14 +0100 Subject: [PATCH 2/2] fix(audit): address PR review feedback MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Three issues raised by the Rust PR Reviewer on #691: 1. **Lexicographic sort wrong for multi-digit run IDs.** Previously `find_artifact_dir` / `find_verdict_path` / `top_level_dirs_with_prefix` picked the "lexicographically last" `_` directory, which sorts `_9` after `_10` (because `'9' > '1'`). On a build retry that produced both `analyzed_outputs_9` and `analyzed_outputs_10`, the older verdict would be read and the run could be mis-classified as safe. New `crate::audit::cmp_numeric_suffix` extracts the trailing token after the final `_`, parses it as `u64`, and compares numerically with a lexicographic tie-breaker for non-numeric suffixes. All three call sites now use it. Regression tests added in mod.rs, detection.rs, and cli.rs. 2. **Security: `ADO_AW_TEST_ORG_URL` was always active in production.** The override was `#[doc(hidden)]` but not gated by build mode, so a stray env var (debugging leftover, hostile CI environment) could silently redirect ADO REST calls to an attacker-controlled URL in a release binary. Gated on `cfg(debug_assertions)`: debug builds (`cargo test`, `cargo run`) keep the override AND emit a loud `warn!` on every invocation; release builds (all published artifacts via `cargo build --release`) replace the body with a no-op so a stray env var has no effect. The integration test in `tests/audit_it.rs` continues to work because `cargo test` builds in debug mode. 3. **Blocking `std::fs::read_dir` in async context.** `safe_outputs.rs` had two helpers (`top_level_dirs_with_prefix`, `collect_named_files`) using sync I/O from inside `async fn analyze_safe_outputs`. On a Tokio multi-thread runtime this blocks an executor thread for the duration of the directory walk. Both helpers converted to `async fn` using `tokio::fs::read_dir`. The recursive `collect_named_files` uses `Box::pin` to satisfy the async-recursion shape (consistent with the existing pattern in `crate::detect::scan_directory`). Tests: 1745 unit tests + 3 integration tests pass (up from 1740 — 5 new regression tests for the numeric-suffix bug). Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- src/ado/mod.rs | 28 +++++- src/audit/analyzers/detection.rs | 33 ++++++- src/audit/analyzers/safe_outputs.rs | 129 +++++++++++++++++----------- src/audit/cli.rs | 34 ++++++-- src/audit/mod.rs | 55 ++++++++++++ 5 files changed, 223 insertions(+), 56 deletions(-) diff --git a/src/ado/mod.rs b/src/ado/mod.rs index 86f2d487..13f4fdb7 100644 --- a/src/ado/mod.rs +++ b/src/ado/mod.rs @@ -829,16 +829,42 @@ pub async fn resolve_ado_context( Ok(ctx) } -#[doc(hidden)] +/// Test-only override that lets the integration tests in `tests/audit_it.rs` +/// redirect ADO REST calls at a mock server via the `ADO_AW_TEST_ORG_URL` +/// environment variable. +/// +/// **Compiled out of release builds.** All published artifacts ship with +/// `cargo build --release`, which sets `debug_assertions = false` and +/// replaces the body of this function with a no-op via the +/// `#[cfg(not(debug_assertions))]` branch below. This prevents an +/// attacker-controlled env var (a leftover from a debugging session, a +/// hostile CI environment, etc.) from silently redirecting production +/// ADO API calls. Debug builds — used by `cargo test`, integration +/// tests, and `cargo run` during development — keep the override +/// available, and emit a `warn!` on every invocation so the override is +/// loud and obvious in logs. +#[cfg(debug_assertions)] fn apply_test_org_url_override(ctx: &mut AdoContext) { if let Ok(org_url) = std::env::var("ADO_AW_TEST_ORG_URL") { let org_url = org_url.trim().trim_end_matches('/'); if !org_url.is_empty() { + log::warn!( + "ADO_AW_TEST_ORG_URL test override active: redirecting ADO REST calls \ + from {} to {} (this branch is compiled out of release builds)", + ctx.org_url, + org_url + ); ctx.org_url = org_url.to_string(); } } } +#[cfg(not(debug_assertions))] +fn apply_test_org_url_override(_: &mut AdoContext) { + // Release builds intentionally ignore ADO_AW_TEST_ORG_URL so that a + // stray env var cannot redirect production ADO API calls. +} + /// Builds the list of definitions to update from explicit IDs or auto-detection. /// Returns `None` when auto-detection finds no agentic pipelines (caller should exit cleanly). pub async fn resolve_definitions( diff --git a/src/audit/analyzers/detection.rs b/src/audit/analyzers/detection.rs index f5d82600..49432ee0 100644 --- a/src/audit/analyzers/detection.rs +++ b/src/audit/analyzers/detection.rs @@ -115,7 +115,9 @@ async fn find_verdict_path(download_root: &Path) -> Option { let path = entry.path(); match &latest_dir { - Some((current_name, _)) if name <= *current_name => {} + Some((current_name, _)) + if crate::audit::cmp_numeric_suffix(&name, current_name) + != std::cmp::Ordering::Greater => {} _ => latest_dir = Some((name, path)), } } @@ -293,7 +295,7 @@ mod tests { } #[tokio::test] - async fn uses_lexicographically_last_analyzed_outputs_directory() { + async fn uses_highest_numbered_analyzed_outputs_directory() { let temp_dir = TempDir::new().unwrap(); write_verdict( &temp_dir, @@ -317,4 +319,31 @@ mod tests { Some(expected_verdict_path("analyzed_outputs_42")) ); } + + /// Regression: lexicographic sort would pick `analyzed_outputs_9` + /// here. Numeric-suffix sort must pick `analyzed_outputs_10`. + #[tokio::test] + async fn picks_highest_numeric_suffix_not_lexicographic() { + let temp_dir = TempDir::new().unwrap(); + write_verdict( + &temp_dir, + "analyzed_outputs_9", + r#"{"prompt_injection":false,"secret_leak":false,"malicious_patch":false,"reasons":[]}"#, + ) + .await; + write_verdict( + &temp_dir, + "analyzed_outputs_10", + r#"{"prompt_injection":true,"secret_leak":false,"malicious_patch":false,"reasons":["newer verdict"]}"#, + ) + .await; + + let analysis = analyze_detection(temp_dir.path()).await.unwrap().unwrap(); + + assert!(analysis.threats.prompt_injection); + assert_eq!( + analysis.verdict_path, + Some(expected_verdict_path("analyzed_outputs_10")) + ); + } } diff --git a/src/audit/analyzers/safe_outputs.rs b/src/audit/analyzers/safe_outputs.rs index 8bce7fbd..3c780677 100644 --- a/src/audit/analyzers/safe_outputs.rs +++ b/src/audit/analyzers/safe_outputs.rs @@ -4,9 +4,9 @@ use anyhow::Context; use serde::Deserialize; use serde_json::Value; use std::collections::{BTreeMap, VecDeque}; -use std::fs; use std::io::ErrorKind; use std::path::{Path, PathBuf}; +use tokio::fs; use crate::audit::model::{ CreatedItemReport, Finding, RejectedSafeOutputsRollup, SafeOutputExecution, @@ -63,9 +63,9 @@ struct IndexedExecutionRecord { pub async fn analyze_safe_outputs( download_root: &std::path::Path, ) -> anyhow::Result { - let proposals_path = find_proposals_file(download_root)?; - let detection_path = find_detection_file(download_root)?; - let executions_path = find_execution_file(download_root)?; + let proposals_path = find_proposals_file(download_root).await?; + let detection_path = find_detection_file(download_root).await?; + let executions_path = find_execution_file(download_root).await?; let proposals = load_proposals(proposals_path.as_deref()).await?; let detection = load_detection_verdict(detection_path.as_deref()).await?; @@ -526,13 +526,17 @@ fn truncate_reason(reason: String, max_chars: usize) -> String { } } -fn find_proposals_file(download_root: &Path) -> anyhow::Result> { - for directory in top_level_dirs_with_prefix(download_root, "agent_outputs_")? { +async fn find_proposals_file(download_root: &Path) -> anyhow::Result> { + for directory in top_level_dirs_with_prefix(download_root, "agent_outputs_").await? { for candidate in [ directory.join("staging").join(SAFE_OUTPUT_FILENAME), directory.join(SAFE_OUTPUT_FILENAME), ] { - if candidate.is_file() { + if fs::metadata(&candidate) + .await + .map(|m| m.is_file()) + .unwrap_or(false) + { return Ok(Some(candidate)); } } @@ -540,32 +544,40 @@ fn find_proposals_file(download_root: &Path) -> anyhow::Result> Ok(None) } -fn find_detection_file(download_root: &Path) -> anyhow::Result> { - for directory in top_level_dirs_with_prefix(download_root, "analyzed_outputs_")? { +async fn find_detection_file(download_root: &Path) -> anyhow::Result> { + for directory in top_level_dirs_with_prefix(download_root, "analyzed_outputs_").await? { let candidate = directory.join("threat-analysis.json"); - if candidate.is_file() { + if fs::metadata(&candidate) + .await + .map(|m| m.is_file()) + .unwrap_or(false) + { return Ok(Some(candidate)); } } Ok(None) } -fn find_execution_file(download_root: &Path) -> anyhow::Result> { +async fn find_execution_file(download_root: &Path) -> anyhow::Result> { let preferred = download_root .join("safe_outputs") .join(EXECUTED_NDJSON_FILENAME); - if preferred.is_file() { + if fs::metadata(&preferred) + .await + .map(|m| m.is_file()) + .unwrap_or(false) + { return Ok(Some(preferred)); } let mut matches = Vec::new(); - collect_named_files(download_root, EXECUTED_NDJSON_FILENAME, &mut matches)?; + collect_named_files(download_root, EXECUTED_NDJSON_FILENAME, &mut matches).await?; matches.sort(); Ok(matches.into_iter().next()) } -fn top_level_dirs_with_prefix(root: &Path, prefix: &str) -> anyhow::Result> { - let entries = match fs::read_dir(root) { +async fn top_level_dirs_with_prefix(root: &Path, prefix: &str) -> anyhow::Result> { + let mut entries = match fs::read_dir(root).await { Ok(entries) => entries, Err(error) if error.kind() == ErrorKind::NotFound => return Ok(Vec::new()), Err(error) => { @@ -574,11 +586,20 @@ fn top_level_dirs_with_prefix(root: &Path, prefix: &str) -> anyhow::Result = Vec::new(); + loop { + let entry = match entries.next_entry().await { + Ok(Some(entry)) => entry, + Ok(None) => break, + Err(error) => { + return Err(error) + .with_context(|| format!("Failed to iterate {}", root.display())); + } + }; + let file_type = entry .file_type() + .await .with_context(|| format!("Failed to inspect {}", entry.path().display()))?; if !file_type.is_dir() { continue; @@ -588,43 +609,55 @@ fn top_level_dirs_with_prefix(root: &Path, prefix: &str) -> anyhow::Result, -) -> anyhow::Result<()> { - let entries = match fs::read_dir(root) { - Ok(entries) => entries, - Err(error) if error.kind() == ErrorKind::NotFound => return Ok(()), - Err(error) => { - return Err(error) - .with_context(|| format!("Failed to read directory {}", root.display())); - } - }; +fn collect_named_files<'a>( + root: &'a Path, + file_name: &'a str, + matches: &'a mut Vec, +) -> std::pin::Pin> + Send + 'a>> { + Box::pin(async move { + let mut entries = match fs::read_dir(root).await { + Ok(entries) => entries, + Err(error) if error.kind() == ErrorKind::NotFound => return Ok(()), + Err(error) => { + return Err(error) + .with_context(|| format!("Failed to read directory {}", root.display())); + } + }; - for entry in entries { - let entry = entry.with_context(|| format!("Failed to iterate {}", root.display()))?; - let path = entry.path(); - let file_type = entry - .file_type() - .with_context(|| format!("Failed to inspect {}", path.display()))?; - if file_type.is_dir() { - collect_named_files(&path, file_name, matches)?; - } else if file_type.is_file() - && path.file_name().and_then(|name| name.to_str()) == Some(file_name) - { - matches.push(path); + loop { + let entry = match entries.next_entry().await { + Ok(Some(entry)) => entry, + Ok(None) => break, + Err(error) => { + return Err(error) + .with_context(|| format!("Failed to iterate {}", root.display())); + } + }; + let path = entry.path(); + let file_type = entry + .file_type() + .await + .with_context(|| format!("Failed to inspect {}", path.display()))?; + if file_type.is_dir() { + collect_named_files(&path, file_name, matches).await?; + } else if file_type.is_file() + && path.file_name().and_then(|name| name.to_str()) == Some(file_name) + { + matches.push(path); + } } - } - Ok(()) + Ok(()) + }) } #[cfg(test)] diff --git a/src/audit/cli.rs b/src/audit/cli.rs index afad8e59..dfff70a5 100644 --- a/src/audit/cli.rs +++ b/src/audit/cli.rs @@ -611,17 +611,19 @@ async fn collect_files_under(run_dir: &Path, start_dir: &Path) -> Result Option { let mut entries = tokio::fs::read_dir(run_dir).await.ok()?; - let mut hits = Vec::new(); + let mut hits: Vec<(String, PathBuf)> = Vec::new(); while let Ok(Some(entry)) = entries.next_entry().await { if entry.file_type().await.map(|t| t.is_dir()).unwrap_or(false) && let Some(name) = entry.file_name().to_str() && (name == prefix || name.starts_with(&format!("{}_", prefix))) { - hits.push(entry.path()); + hits.push((name.to_string(), entry.path())); } } - hits.sort(); - hits.pop() + // Numeric-suffix sort so `agent_outputs_10` outranks + // `agent_outputs_9` (lexicographic sort gets this wrong). + hits.sort_by(|(a, _), (b, _)| crate::audit::cmp_numeric_suffix(a, b)); + hits.pop().map(|(_, path)| path) } fn is_authz_error(error: &anyhow::Error) -> bool { @@ -670,7 +672,7 @@ mod tests { } #[tokio::test] - async fn find_artifact_dir_picks_lexicographically_last_match() { + async fn find_artifact_dir_picks_highest_numbered_match() { let temp_dir = tempfile::tempdir().expect("tempdir"); tokio::fs::create_dir_all(temp_dir.path().join("agent_outputs_001")) .await @@ -692,6 +694,28 @@ mod tests { ); } + /// Regression test: lexicographic sort would pick `agent_outputs_9` + /// here (because `'9' > '1'`); numeric-suffix sort must pick + /// `agent_outputs_10` instead. + #[tokio::test] + async fn find_artifact_dir_orders_multi_digit_suffixes_numerically() { + let temp_dir = tempfile::tempdir().expect("tempdir"); + for suffix in ["1", "2", "9", "10", "100"] { + tokio::fs::create_dir_all(temp_dir.path().join(format!("agent_outputs_{suffix}"))) + .await + .expect("create dir"); + } + + let found = find_artifact_dir(temp_dir.path(), "agent_outputs") + .await + .expect("find artifact dir"); + + assert_eq!( + found.file_name().and_then(|name| name.to_str()), + Some("agent_outputs_100") + ); + } + #[test] fn artifact_filter_mapping_matches_expected_sets() { let filters = vec![ diff --git a/src/audit/mod.rs b/src/audit/mod.rs index 8ac1d045..77778e0e 100644 --- a/src/audit/mod.rs +++ b/src/audit/mod.rs @@ -13,3 +13,58 @@ pub mod url; pub use cli::{AuditOptions, dispatch}; #[allow(unused_imports)] pub use model::*; + +/// Compare two `_` directory names by their trailing +/// integer suffix, falling back to a full lexicographic comparison +/// when the suffix isn't a u64. +/// +/// Plain string sort treats `"agent_outputs_9"` as greater than +/// `"agent_outputs_10"` because `'9' > '1'`. When ADO produces +/// multi-digit build IDs (which happens after the very first builds), +/// the lexicographic "last" is the wrong directory — usually older. +/// This comparator parses the trailing token after the final `_` and +/// compares numerically so the highest-numbered build wins. +pub(crate) fn cmp_numeric_suffix(a: &str, b: &str) -> std::cmp::Ordering { + fn suffix(s: &str) -> u64 { + s.rsplit('_').next().and_then(|s| s.parse().ok()).unwrap_or(0) + } + suffix(a).cmp(&suffix(b)).then_with(|| a.cmp(b)) +} + +#[cfg(test)] +mod numeric_suffix_tests { + use super::cmp_numeric_suffix; + use std::cmp::Ordering; + + #[test] + fn double_digit_outranks_single_digit() { + assert_eq!( + cmp_numeric_suffix("agent_outputs_10", "agent_outputs_9"), + Ordering::Greater + ); + assert_eq!( + cmp_numeric_suffix("analyzed_outputs_42", "analyzed_outputs_41"), + Ordering::Greater + ); + } + + #[test] + fn non_numeric_suffix_falls_back_to_lexicographic() { + // Both suffixes parse to 0; tie-break is lexicographic on the + // full name. + assert_eq!( + cmp_numeric_suffix("agent_outputs_alpha", "agent_outputs_beta"), + Ordering::Less + ); + } + + #[test] + fn no_suffix_compares_as_zero() { + // "agent_outputs" -> last token "outputs" -> parse fails -> 0. + // "agent_outputs_5" -> 5. So the numeric one wins. + assert_eq!( + cmp_numeric_suffix("agent_outputs", "agent_outputs_5"), + Ordering::Less + ); + } +}