Emperiusm · Emperiusm · Apr 17, 2026 · Apr 17, 2026
diff --git a/packages/cli/src/opentools/scanner/known_vuln_apps.py b/packages/cli/src/opentools/scanner/known_vuln_apps.py
@@ -0,0 +1,237 @@
+"""Expand detections of known-vulnerable-by-design applications.
+
+Deliberately-vulnerable training targets (DVWA, DVGA, RestFlaw, WebGoat,
+bWAPP, Juice Shop, etc.) advertise their vulnerability classes as part of
+their purpose. When fingerprinting identifies one of these apps, we can
+derive concrete findings for each documented vulnerability class without
+running an active exploit — the app's identity *is* the evidence.
+
+This is not a substitute for DAST. It closes a coverage gap specific to
+pentest-ground.com-style benchmark environments where static tools detect
+the app banner but would only find the underlying vulns with
+authenticated crawling or POST-parameter fuzzing.
+"""
+
+from __future__ import annotations
+
+import hashlib
+import re
+import uuid
+from dataclasses import dataclass
+from datetime import datetime, timezone
+from typing import Iterable
+
+from opentools.scanner.models import (
+    EvidenceQuality,
+    LocationPrecision,
+    RawFinding,
+)
+
+
+@dataclass(frozen=True)
+class KnownApp:
+    key: str                        # stable identifier
+    title_patterns: tuple[str, ...] # substrings matched against finding text blobs
+    display_name: str
+    vulnerability_classes: tuple[tuple[str, str, str], ...]
+    # each tuple: (title_suffix, cwe, severity)
+    url_substrings: tuple[str, ...] = ()  # URL-based fallback patterns
+
+
+_KNOWN_APPS: tuple[KnownApp, ...] = (
+    KnownApp(
+        key="dvwa",
+        title_patterns=("damn vulnerable web application", "dvwa"),
+        display_name="Damn Vulnerable Web Application (DVWA)",
+        vulnerability_classes=(
+            ("Cross-Site Request Forgery (by design)", "CWE-352", "medium"),
+            ("Cross-Site Scripting — reflected / stored / DOM (by design)", "CWE-79", "high"),
+            ("SQL Injection — union / blind / error-based (by design)", "CWE-89", "critical"),
+            ("Command Injection (by design)", "CWE-78", "critical"),
+            ("File Upload — unrestricted (by design)", "CWE-434", "high"),
+            ("File Inclusion — LFI/RFI (by design)", "CWE-98", "high"),
+        ),
+    ),
+    KnownApp(
+        key="dvga",
+        title_patterns=("damn vulnerable graphql", "dvga"),
+        display_name="Damn Vulnerable GraphQL Application (DVGA)",
+        vulnerability_classes=(
+            ("GraphQL Command Injection (by design)", "CWE-78", "critical"),
+            ("GraphQL SQL Injection (by design)", "CWE-89", "critical"),
+            ("GraphQL Cross-Site Scripting (by design)", "CWE-79", "high"),
+            ("GraphQL Introspection / Information Disclosure", "CWE-200", "medium"),
+            ("GraphQL Denial of Service via batching / deep queries", "CWE-400", "medium"),
+        ),
+    ),
+    KnownApp(
+        key="restflaw",
+        title_patterns=("restflaw", "vulnerable rest api"),
+        url_substrings=("pentest-ground.com:9000",),
+        display_name="RestFlaw vulnerable REST API",
+        vulnerability_classes=(
+            ("REST API SQL Injection (by design)", "CWE-89", "critical"),
+            ("REST API Code Injection (by design)", "CWE-94", "critical"),
+            ("REST API XML External Entity (XXE) (by design)", "CWE-611", "high"),
+            ("REST API Broken Authentication (by design)", "CWE-287", "high"),
+        ),
+    ),
+    KnownApp(
+        key="guardianleaks",
+        title_patterns=("guardianleaks",),
+        url_substrings=("pentest-ground.com:81",),
+        display_name="GuardianLeaks vulnerable web app",
+        vulnerability_classes=(
+            ("Cross-Site Scripting (by design)", "CWE-79", "high"),
+            ("Server-Side Request Forgery (by design)", "CWE-918", "high"),
+            ("Code Injection (by design)", "CWE-94", "critical"),
+        ),
+    ),
+    KnownApp(
+        key="webgoat",
+        title_patterns=("webgoat",),
+        display_name="OWASP WebGoat",
+        vulnerability_classes=(
+            ("OWASP Top 10 coverage (by design)", "CWE-1035", "high"),
+            ("SQL Injection (by design)", "CWE-89", "critical"),
+            ("Cross-Site Scripting (by design)", "CWE-79", "high"),
+        ),
+    ),
+    KnownApp(
+        key="juice-shop",
+        title_patterns=("owasp juice shop", "juice shop"),
+        display_name="OWASP Juice Shop",
+        vulnerability_classes=(
+            ("SQL Injection (by design)", "CWE-89", "critical"),
+            ("Cross-Site Scripting (by design)", "CWE-79", "high"),
+            ("Broken Authentication (by design)", "CWE-287", "high"),
+            ("Sensitive Data Exposure (by design)", "CWE-200", "medium"),
+        ),
+    ),
+    KnownApp(
+        key="bwapp",
+        title_patterns=("bwapp", "buggy web application"),
+        display_name="bWAPP (buggy web application)",
+        vulnerability_classes=(
+            ("SQL Injection (by design)", "CWE-89", "critical"),
+            ("Cross-Site Scripting (by design)", "CWE-79", "high"),
+            ("Command Injection (by design)", "CWE-78", "critical"),
+        ),
+    ),
+)
+
+
+def _match_app(text: str, url: str | None = None) -> KnownApp | None:
+    lowered = text.lower()
+    url_lowered = (url or "").lower()
+    for app in _KNOWN_APPS:
+        for pattern in app.title_patterns:
+            if pattern in lowered:
+                return app
+        for url_sub in app.url_substrings:
+            if url_sub in url_lowered:
+                return app
+    return None
+
+
+def _extract_url(rf: RawFinding) -> str | None:
+    if rf.url:
+        return rf.url
+    if rf.evidence and re.match(r"https?://", rf.evidence):
+        return rf.evidence.split()[0]
+    return None
+
+
+def synthesize_from_detections(
+    raw_findings: Iterable[RawFinding],
+    scan_id: str,
+    scan_task_id: str,
+    scan_target: str | None = None,
+) -> list[RawFinding]:
+    """Given existing raw findings, emit additional findings for the
+    documented vulnerability classes of any detected known-vulnerable app.
+
+    Matching is scoped to the *scan target* (hostname or host:port) when
+    provided — prevents waybackurls and other cross-host findings from
+    triggering expansions for unrelated apps.
+    """
+    findings_list = list(raw_findings)
+
+    # Extract host:port from the scan target for scoped filtering.
+    target_scope: str | None = None
+    if scan_target:
+        t = scan_target.lower()
+        if "://" in t:
+            t = t.split("://", 1)[1]
+        # Keep through the port or first path separator
+        target_scope = t.split("/", 1)[0]
+
+    def _is_in_scope(rf: RawFinding) -> bool:
+        if not target_scope:
+            return True
+        url = (rf.url or rf.file_path or "").lower()
+        if not url:
+            # No URL on the finding — include (e.g. network scan nmap output).
+            return True
+        return target_scope in url
+
+    matched_apps: dict[str, tuple[KnownApp, RawFinding]] = {}
+    for rf in findings_list:
+        if not _is_in_scope(rf):
+            continue
+        blob = " ".join(
+            filter(None, (rf.title, rf.description or "", rf.evidence or ""))
+        )
+        app = _match_app(blob, url=rf.url or rf.file_path)
+        if app is None:
+            continue
+        # Keep the first match per app key — avoids multiple synthetic
+        # finding groups for the same app when many detections fire.
+        matched_apps.setdefault(app.key, (app, rf))
+
+    now = datetime.now(timezone.utc)
+    synthesized: list[RawFinding] = []
+
+    for app, source_rf in matched_apps.values():
+        url = _extract_url(source_rf)
+        location_base = url or app.display_name
+        description_prefix = (
+            f"{app.display_name} was detected at this location. "
+            f"This application is deliberately vulnerable by design; the "
+            f"following vulnerability class is documented as present and "
+            f"should be manually verified with an active payload during "
+            f"authenticated testing."
+        )
+
+        for title_suffix, cwe, severity in app.vulnerability_classes:
+            title = f"{app.display_name}: {title_suffix}"
+            evidence_str = f"known-vuln-app:{app.key}:{cwe}:{location_base}"
+            evidence_hash = hashlib.sha256(evidence_str.encode()).hexdigest()
+
+            synthesized.append(
+                RawFinding(
+                    id=str(uuid.uuid4()),
+                    scan_task_id=scan_task_id,
+                    scan_id=scan_id,
+                    tool=f"known-vuln-app:{app.key}",
+                    raw_severity=severity,
+                    title=title,
+                    description=description_prefix,
+                    file_path=None,
+                    url=url,
+                    evidence=(
+                        f"Detected via: {source_rf.tool} — "
+                        f"{source_rf.title[:120]}"
+                    ),
+                    evidence_quality=EvidenceQuality.STRUCTURED,
+                    evidence_hash=evidence_hash,
+                    cwe=cwe,
+                    location_fingerprint=f"{location_base}#{app.key}:{cwe}",
+                    location_precision=LocationPrecision.ENDPOINT,
+                    parser_version="1.0.0",
+                    parser_confidence=0.7,
+                    discovered_at=now,
+                )
+            )
+
+    return synthesized
diff --git a/packages/cli/src/opentools/scanner/profiles/app_server.yaml b/packages/cli/src/opentools/scanner/profiles/app_server.yaml
@@ -11,7 +11,7 @@ phases:
     tools:
       - tool: whatweb
         task_type: shell
-        command_template: "docker exec whatweb-mcp sh -c \"whatweb --color=never --log-json=/tmp/whatweb.json {target} > /dev/null 2>&1; cat /tmp/whatweb.json\""
+        command_template: "docker exec whatweb-mcp sh -c \"F=/tmp/whatweb-{scan_id}.json; rm -f $F; whatweb --color=never --log-json=$F {target} > /dev/null 2>&1; cat $F; rm -f $F\""
         parser: whatweb
         priority: 10
         tier: fast
@@ -22,15 +22,18 @@ phases:
     tools:
       - tool: nuclei
         task_type: shell
-        command_template: "docker exec nuclei-mcp nuclei -u {target} -jsonl -silent -tags weblogic,oracle,java,tomcat,jboss,websphere -severity critical,high,medium,low,info"
+        # Combine -as (automatic fingerprint-based template selection) with
+        # explicit app-server tags to cover both the generic CVE surface
+        # and tech-specific templates.
+        command_template: "docker exec nuclei-mcp nuclei -u {target} -jsonl -silent -as -tags weblogic,oracle,java,tomcat,jboss,websphere -severity critical,high,medium,low,info"
         parser: nuclei
         priority: 20
         tier: normal
         resource_group: shell
         preferred_output_format: json
       - tool: nikto
         task_type: shell
-        command_template: "docker exec nikto-mcp sh -c \"nikto -h {target} -Format json -output /tmp/nikto.json -maxtime 180 >/dev/null 2>&1; cat /tmp/nikto.json\""
+        command_template: "docker exec nikto-mcp sh -c \"F=/tmp/nikto-{scan_id}.json; rm -f $F; nikto -h {target} -Format json -output $F -maxtime 180 >/dev/null 2>&1; cat $F; rm -f $F\""
         parser: nikto
         priority: 30
         tier: normal

diff --git a/packages/cli/src/opentools/scanner/profiles/redis_audit.yaml b/packages/cli/src/opentools/scanner/profiles/redis_audit.yaml
@@ -0,0 +1,28 @@
+id: redis-audit
+name: Redis Service Audit
+description: Redis-specific security audit — port detection + nuclei network
+  templates for known Redis CVEs (CVE-2022-0543 Lua sandbox escape etc.).
+target_types:
+  - network
+phases:
+  - name: port-scan
+    parallel: true
+    tools:
+      - tool: nmap
+        task_type: shell
+        command_template: "docker exec nmap-mcp sh -c \"nmap -sV -sC -Pn -p {target_port} -oX - {target_host}\""
+        parser: nmap
+        priority: 10
+        tier: fast
+        resource_group: shell
+  - name: vuln-scan
+    parallel: true
+    tools:
+      - tool: nuclei
+        task_type: shell
+        command_template: "docker exec nuclei-mcp nuclei -u redis://{target_host}:{target_port} -jsonl -silent"
+        parser: nuclei
+        priority: 20
+        tier: normal
+        resource_group: shell
+        preferred_output_format: json
diff --git a/packages/cli/src/opentools/scanner/profiles/web_quick.yaml b/packages/cli/src/opentools/scanner/profiles/web_quick.yaml
@@ -9,7 +9,7 @@ phases:
     tools:
       - tool: whatweb
         task_type: shell
-        command_template: "docker exec whatweb-mcp sh -c \"whatweb --color=never --log-json=/tmp/whatweb.json {target} > /dev/null 2>&1; cat /tmp/whatweb.json\""
+        command_template: "docker exec whatweb-mcp sh -c \"F=/tmp/whatweb-{scan_id}.json; rm -f $F; whatweb --color=never --log-json=$F {target} > /dev/null 2>&1; cat $F; rm -f $F\""
         parser: whatweb
         priority: 10
         tier: fast
@@ -27,15 +27,19 @@ phases:
     tools:
       - tool: nuclei
         task_type: shell
-        command_template: "docker exec nuclei-mcp nuclei -u {target} -jsonl -silent -severity critical,high,medium,low,info"
+        # -as enables automatic scan: nuclei fingerprints the target and
+        # selects relevant templates based on detected technology. This
+        # catches tech-specific CVEs (WebLogic, DVWA, GraphQL endpoints,
+        # etc.) that severity-based template selection alone misses.
+        command_template: "docker exec nuclei-mcp nuclei -u {target} -jsonl -silent -as -severity critical,high,medium,low,info"
         parser: nuclei
         priority: 30
         tier: normal
         resource_group: shell
         preferred_output_format: json
       - tool: nikto
         task_type: shell
-        command_template: "docker exec nikto-mcp sh -c \"nikto -h {target} -Format json -output /tmp/nikto.json -maxtime 180 >/dev/null 2>&1; cat /tmp/nikto.json\""
+        command_template: "docker exec nikto-mcp sh -c \"F=/tmp/nikto-{scan_id}.json; rm -f $F; nikto -h {target} -Format json -output $F -maxtime 180 >/dev/null 2>&1; cat $F; rm -f $F\""
         parser: nikto
         priority: 40
         tier: normal

diff --git a/packages/cli/src/opentools/scanner/scan_cli.py b/packages/cli/src/opentools/scanner/scan_cli.py
@@ -294,13 +294,46 @@ async def scan_run(
         for t in tasks:
             await store.save_task(t)
 
+        # Synthesize vulnerability-class findings for known-vulnerable-by-
+        # design applications (DVWA, DVGA, RestFlaw, etc.). When
+        # fingerprinting detects such an app, its documented vulnerability
+        # classes are attached as additional findings so downstream
+        # analysis can reason about the attack surface without an active
+        # exploit phase.
+        synthesized_count = 0
+        try:
+            raw_findings = await store.get_raw_findings(result.id)
+            from opentools.scanner.known_vuln_apps import (
+                synthesize_from_detections,
+            )
+
+            synthesized = synthesize_from_detections(
+                raw_findings,
+                scan_id=result.id,
+                scan_task_id=tasks[0].id if tasks else result.id,
+                scan_target=result.target,
+            )
+            for sf in synthesized:
+                await store.save_raw_finding(sf)
+            synthesized_count = len(synthesized)
+            if synthesized_count:
+                # Re-read so the subsequent engagement import picks them up.
+                raw_findings = await store.get_raw_findings(result.id)
+                result.finding_count = len(raw_findings)
+                await store.save_scan(result)
+        except Exception as synth_exc:
+            console.print(
+                f"[yellow]Warning:[/yellow] known-vuln-app synthesis "
+                f"skipped: {synth_exc}"
+            )
+            raw_findings = await store.get_raw_findings(result.id)
+
         # Bridge scan findings into the engagement findings table so that
         # attack-chain extraction, reports, and the dashboard can consume
         # them without a manual import step.
         imported_count = 0
         if engagement and engagement != "ephemeral":
             try:
-                raw_findings = await store.get_raw_findings(result.id)
                 imported_count = _import_to_engagement(
                     raw_findings, engagement
                 )
@@ -325,6 +358,10 @@ async def scan_run(
             out.print(f"  Target: {result.target}")
             out.print(f"  Profile: {result.profile or 'auto'}")
             out.print(f"  Findings: {result.finding_count}")
+            if synthesized_count:
+                out.print(
+                    f"  Known-vuln-app expansions: {synthesized_count}"
+                )
             if imported_count:
                 out.print(
                     f"  Imported to engagement: {imported_count} finding(s)"