Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions .semgrepignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
# Intentionally minimal — overrides semgrep's built-in default
# .semgrepignore (which excludes test/, tests/, vendor/, etc.).
#
# IVE owns workspace traversal: the daemon already filters node_modules,
# target, .git, and .ive in scanner::walk_workspace. We don't want
# semgrep also applying its own opinion about which files are worth
# scanning, especially since our test/fixtures/ai-slop/ tree literally
# exists to be scanned by the semgrep analyzer.
#
# If you add real exclusions here, document why.
47 changes: 45 additions & 2 deletions daemon/src/analyzers/semgrep.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ use crate::contracts::{Diagnostic, DiagnosticSource, Location, Range, Severity};
use std::path::{Path, PathBuf};
use std::process::Command;
use std::time::Duration;
use tracing::warn;

pub fn binary_present() -> bool {
if std::env::var("IVE_SKIP_SEMGREP").is_ok() {
Expand Down Expand Up @@ -58,17 +59,59 @@ pub fn scan_path(target: &Path, rules: &Path) -> Option<Vec<Diagnostic>> {
// Semgrep ≥1.x exits non-zero when it finds issues — we consume
// stdout either way and don't pass the flag that older versions used
// for this (it was renamed/removed across versions).
// `--no-git-ignore` is load-bearing: without it, semgrep auto-detects
// when the target lives inside a git repo and silently restricts the
// scan to files tracked by git from semgrep's own vantage point. That
// produces 0 findings on subdirectory targets even when the files are
// tracked at the repo root (the daemon's case). We never want that
// behaviour — IVE owns workspace traversal — so always opt out.
let output = Command::new("semgrep")
.arg("--config")
.arg(rules)
.arg("--json")
.arg("--timeout")
.arg("10")
.arg("--no-git-ignore")
.arg(target)
.output()
.ok()?;
let parsed: serde_json::Value = serde_json::from_slice(&output.stdout).ok()?;
let results = parsed.get("results")?.as_array()?;
let parsed: serde_json::Value = match serde_json::from_slice(&output.stdout) {
Ok(v) => v,
Err(e) => {
warn!(
error = %e,
stderr = %String::from_utf8_lossy(&output.stderr),
"semgrep stdout was not valid JSON"
);
return None;
}
};
let results = match parsed.get("results").and_then(|r| r.as_array()) {
Some(r) => r,
None => {
warn!(
stderr = %String::from_utf8_lossy(&output.stderr),
"semgrep JSON had no `results` array"
);
return None;
}
};
if results.is_empty() {
// 0 findings is a legitimate outcome, but on the AI-slop fixtures
// it's almost always a misconfig. Surface semgrep's own errors so
// the failure is debuggable.
let errors = parsed
.get("errors")
.map(|e| e.to_string())
.unwrap_or_else(|| "[]".into());
let stderr_tail = String::from_utf8_lossy(&output.stderr);
warn!(
errors = %errors,
stderr_len = stderr_tail.len(),
stderr_tail = %stderr_tail.lines().rev().take(5).collect::<Vec<_>>().join(" | "),
"semgrep returned 0 results"
);
}
let mut diagnostics = Vec::with_capacity(results.len());
for r in results {
if let Some(d) = result_to_diagnostic(r, target) {
Expand Down
61 changes: 49 additions & 12 deletions test/run_fixtures.sh
Original file line number Diff line number Diff line change
@@ -1,12 +1,16 @@
#!/usr/bin/env bash
# Integration harness for test/fixtures/ai-slop/.
#
# For each subdirectory under fixtures/ai-slop, run `ive-daemon scan` and
# check the returned JSON matches the expectations in the YAML sidecar.
# This runner is deliberately shell+jq-only (no extra deps) and only checks
# the invariants that exist in v1.
# For each subdirectory, run `ive-daemon scan` and check the returned JSON
# satisfies a generic non-green invariant. Detailed per-analyzer expectations
# live in daemon/tests/fixtures.rs — this script is the end-to-end smoke
# check that the released binary's wiring still works.
#
# Exit code is non-zero on the first mismatch.
# Fixtures that depend on an external analyzer binary (rust-analyzer,
# semgrep, pyright, tsc) are skipped when that binary isn't on PATH so a
# minimal install doesn't fail CI for missing optional capabilities.
#
# Exit code is non-zero on the first hard mismatch.

set -euo pipefail

Expand All @@ -18,35 +22,68 @@ if [[ ! -x "$DAEMON" ]]; then
exit 2
fi

# Map fixture name → required external binary. Empty string means the
# fixture relies only on the daemon's built-in analyzers (hallucination,
# crossfile, binding) and must always produce diagnostics.
required_binary() {
case "$1" in
rust_analyzer) echo rust-analyzer ;;
semgrep) echo semgrep ;;
pyright) echo pyright ;;
tsc) echo tsc ;;
*) echo "" ;;
esac
}

FAIL=0

for fixture_dir in "$ROOT"/test/fixtures/ai-slop/*/; do
name="$(basename "$fixture_dir")"
echo "── fixture: $name"
summary="$("$DAEMON" scan --workspace "$fixture_dir" 2>/dev/null)"

required="$(required_binary "$name")"
# `command -v` is not enough: rustup ships a rust-analyzer shim that
# is on PATH but errors out unless `rustup component add rust-analyzer`
# has been run. Mirror the daemon's own check (`<bin> --version`).
if [[ -n "$required" ]] && ! "$required" --version >/dev/null 2>&1; then
echo " ⤳ skipped: '$required' not usable (binary missing or rustup shim without component)"
continue
fi

# Capture stderr so a daemon panic or analyzer error is visible on failure.
stderr_log="$(mktemp)"
summary="$("$DAEMON" scan --workspace "$fixture_dir" 2>"$stderr_log")"
files_total="$(echo "$summary" | grep -o '"files":[[:space:]]*[0-9]*' | head -1 | awk '{print $2}')"
diagnostics="$(echo "$summary" | grep -o '"diagnostics":[[:space:]]*[0-9]*' | head -1 | awk '{print $2}')"
red="$(echo "$summary" | grep -o '"redFiles":[[:space:]]*[0-9]*' | head -1 | awk '{print $2}')"
yellow="$(echo "$summary" | grep -o '"yellowFiles":[[:space:]]*[0-9]*' | head -1 | awk '{print $2}')"

if [[ "${files_total:-0}" == "0" ]]; then
echo " ✗ expected at least one file in $name"
fail_with() {
echo " ✗ $1 in $name"
if [[ -s "$stderr_log" ]]; then
echo " ── daemon stderr ──"
sed 's/^/ /' "$stderr_log"
fi
rm -f "$stderr_log"
FAIL=1
}

if [[ "${files_total:-0}" == "0" ]]; then
fail_with "expected at least one file"
continue
fi

if [[ "${diagnostics:-0}" == "0" ]]; then
echo " ✗ expected hallucinated-import diagnostic in $name"
FAIL=1
fail_with "expected at least one diagnostic"
continue
fi

if [[ "${red:-0}" == "0" && "${yellow:-0}" == "0" ]]; then
echo " ✗ expected at least one non-green file in $name"
FAIL=1
fail_with "expected at least one non-green file"
continue
fi

rm -f "$stderr_log"
echo " ✓ $name ($files_total files, $diagnostics diagnostics, $red red, $yellow yellow)"
done

Expand Down
Loading