diff --git a/.semgrepignore b/.semgrepignore new file mode 100644 index 0000000..ec979a6 --- /dev/null +++ b/.semgrepignore @@ -0,0 +1,10 @@ +# Intentionally minimal — overrides semgrep's built-in default +# .semgrepignore (which excludes test/, tests/, vendor/, etc.). +# +# IVE owns workspace traversal: the daemon already filters node_modules, +# target, .git, and .ive in scanner::walk_workspace. We don't want +# semgrep also applying its own opinion about which files are worth +# scanning, especially since our test/fixtures/ai-slop/ tree literally +# exists to be scanned by the semgrep analyzer. +# +# If you add real exclusions here, document why. diff --git a/daemon/src/analyzers/semgrep.rs b/daemon/src/analyzers/semgrep.rs index 028b669..ec51fa4 100644 --- a/daemon/src/analyzers/semgrep.rs +++ b/daemon/src/analyzers/semgrep.rs @@ -13,6 +13,7 @@ use crate::contracts::{Diagnostic, DiagnosticSource, Location, Range, Severity}; use std::path::{Path, PathBuf}; use std::process::Command; use std::time::Duration; +use tracing::warn; pub fn binary_present() -> bool { if std::env::var("IVE_SKIP_SEMGREP").is_ok() { @@ -58,17 +59,59 @@ pub fn scan_path(target: &Path, rules: &Path) -> Option> { // Semgrep ≥1.x exits non-zero when it finds issues — we consume // stdout either way and don't pass the flag that older versions used // for this (it was renamed/removed across versions). + // `--no-git-ignore` is load-bearing: without it, semgrep auto-detects + // when the target lives inside a git repo and silently restricts the + // scan to files tracked by git from semgrep's own vantage point. That + // produces 0 findings on subdirectory targets even when the files are + // tracked at the repo root (the daemon's case). We never want that + // behaviour — IVE owns workspace traversal — so always opt out. let output = Command::new("semgrep") .arg("--config") .arg(rules) .arg("--json") .arg("--timeout") .arg("10") + .arg("--no-git-ignore") .arg(target) .output() .ok()?; - let parsed: serde_json::Value = serde_json::from_slice(&output.stdout).ok()?; - let results = parsed.get("results")?.as_array()?; + let parsed: serde_json::Value = match serde_json::from_slice(&output.stdout) { + Ok(v) => v, + Err(e) => { + warn!( + error = %e, + stderr = %String::from_utf8_lossy(&output.stderr), + "semgrep stdout was not valid JSON" + ); + return None; + } + }; + let results = match parsed.get("results").and_then(|r| r.as_array()) { + Some(r) => r, + None => { + warn!( + stderr = %String::from_utf8_lossy(&output.stderr), + "semgrep JSON had no `results` array" + ); + return None; + } + }; + if results.is_empty() { + // 0 findings is a legitimate outcome, but on the AI-slop fixtures + // it's almost always a misconfig. Surface semgrep's own errors so + // the failure is debuggable. + let errors = parsed + .get("errors") + .map(|e| e.to_string()) + .unwrap_or_else(|| "[]".into()); + let stderr_tail = String::from_utf8_lossy(&output.stderr); + warn!( + errors = %errors, + stderr_len = stderr_tail.len(), + stderr_tail = %stderr_tail.lines().rev().take(5).collect::>().join(" | "), + "semgrep returned 0 results" + ); + } let mut diagnostics = Vec::with_capacity(results.len()); for r in results { if let Some(d) = result_to_diagnostic(r, target) { diff --git a/test/run_fixtures.sh b/test/run_fixtures.sh index ca83b4d..d565736 100755 --- a/test/run_fixtures.sh +++ b/test/run_fixtures.sh @@ -1,12 +1,16 @@ #!/usr/bin/env bash # Integration harness for test/fixtures/ai-slop/. # -# For each subdirectory under fixtures/ai-slop, run `ive-daemon scan` and -# check the returned JSON matches the expectations in the YAML sidecar. -# This runner is deliberately shell+jq-only (no extra deps) and only checks -# the invariants that exist in v1. +# For each subdirectory, run `ive-daemon scan` and check the returned JSON +# satisfies a generic non-green invariant. Detailed per-analyzer expectations +# live in daemon/tests/fixtures.rs — this script is the end-to-end smoke +# check that the released binary's wiring still works. # -# Exit code is non-zero on the first mismatch. +# Fixtures that depend on an external analyzer binary (rust-analyzer, +# semgrep, pyright, tsc) are skipped when that binary isn't on PATH so a +# minimal install doesn't fail CI for missing optional capabilities. +# +# Exit code is non-zero on the first hard mismatch. set -euo pipefail @@ -18,35 +22,68 @@ if [[ ! -x "$DAEMON" ]]; then exit 2 fi +# Map fixture name → required external binary. Empty string means the +# fixture relies only on the daemon's built-in analyzers (hallucination, +# crossfile, binding) and must always produce diagnostics. +required_binary() { + case "$1" in + rust_analyzer) echo rust-analyzer ;; + semgrep) echo semgrep ;; + pyright) echo pyright ;; + tsc) echo tsc ;; + *) echo "" ;; + esac +} + FAIL=0 for fixture_dir in "$ROOT"/test/fixtures/ai-slop/*/; do name="$(basename "$fixture_dir")" echo "── fixture: $name" - summary="$("$DAEMON" scan --workspace "$fixture_dir" 2>/dev/null)" + + required="$(required_binary "$name")" + # `command -v` is not enough: rustup ships a rust-analyzer shim that + # is on PATH but errors out unless `rustup component add rust-analyzer` + # has been run. Mirror the daemon's own check (` --version`). + if [[ -n "$required" ]] && ! "$required" --version >/dev/null 2>&1; then + echo " ⤳ skipped: '$required' not usable (binary missing or rustup shim without component)" + continue + fi + + # Capture stderr so a daemon panic or analyzer error is visible on failure. + stderr_log="$(mktemp)" + summary="$("$DAEMON" scan --workspace "$fixture_dir" 2>"$stderr_log")" files_total="$(echo "$summary" | grep -o '"files":[[:space:]]*[0-9]*' | head -1 | awk '{print $2}')" diagnostics="$(echo "$summary" | grep -o '"diagnostics":[[:space:]]*[0-9]*' | head -1 | awk '{print $2}')" red="$(echo "$summary" | grep -o '"redFiles":[[:space:]]*[0-9]*' | head -1 | awk '{print $2}')" yellow="$(echo "$summary" | grep -o '"yellowFiles":[[:space:]]*[0-9]*' | head -1 | awk '{print $2}')" - if [[ "${files_total:-0}" == "0" ]]; then - echo " ✗ expected at least one file in $name" + fail_with() { + echo " ✗ $1 in $name" + if [[ -s "$stderr_log" ]]; then + echo " ── daemon stderr ──" + sed 's/^/ /' "$stderr_log" + fi + rm -f "$stderr_log" FAIL=1 + } + + if [[ "${files_total:-0}" == "0" ]]; then + fail_with "expected at least one file" continue fi if [[ "${diagnostics:-0}" == "0" ]]; then - echo " ✗ expected hallucinated-import diagnostic in $name" - FAIL=1 + fail_with "expected at least one diagnostic" continue fi if [[ "${red:-0}" == "0" && "${yellow:-0}" == "0" ]]; then - echo " ✗ expected at least one non-green file in $name" - FAIL=1 + fail_with "expected at least one non-green file" continue fi + rm -f "$stderr_log" echo " ✓ $name ($files_total files, $diagnostics diagnostics, $red red, $yellow yellow)" done