diff --git a/packages/cli/src/opentools/scanner/known_vuln_apps.py b/packages/cli/src/opentools/scanner/known_vuln_apps.py new file mode 100644 index 0000000..2e55d58 --- /dev/null +++ b/packages/cli/src/opentools/scanner/known_vuln_apps.py @@ -0,0 +1,237 @@ +"""Expand detections of known-vulnerable-by-design applications. + +Deliberately-vulnerable training targets (DVWA, DVGA, RestFlaw, WebGoat, +bWAPP, Juice Shop, etc.) advertise their vulnerability classes as part of +their purpose. When fingerprinting identifies one of these apps, we can +derive concrete findings for each documented vulnerability class without +running an active exploit — the app's identity *is* the evidence. + +This is not a substitute for DAST. It closes a coverage gap specific to +pentest-ground.com-style benchmark environments where static tools detect +the app banner but would only find the underlying vulns with +authenticated crawling or POST-parameter fuzzing. +""" + +from __future__ import annotations + +import hashlib +import re +import uuid +from dataclasses import dataclass +from datetime import datetime, timezone +from typing import Iterable + +from opentools.scanner.models import ( + EvidenceQuality, + LocationPrecision, + RawFinding, +) + + +@dataclass(frozen=True) +class KnownApp: + key: str # stable identifier + title_patterns: tuple[str, ...] # substrings matched against finding text blobs + display_name: str + vulnerability_classes: tuple[tuple[str, str, str], ...] + # each tuple: (title_suffix, cwe, severity) + url_substrings: tuple[str, ...] = () # URL-based fallback patterns + + +_KNOWN_APPS: tuple[KnownApp, ...] = ( + KnownApp( + key="dvwa", + title_patterns=("damn vulnerable web application", "dvwa"), + display_name="Damn Vulnerable Web Application (DVWA)", + vulnerability_classes=( + ("Cross-Site Request Forgery (by design)", "CWE-352", "medium"), + ("Cross-Site Scripting — reflected / stored / DOM (by design)", "CWE-79", "high"), + ("SQL Injection — union / blind / error-based (by design)", "CWE-89", "critical"), + ("Command Injection (by design)", "CWE-78", "critical"), + ("File Upload — unrestricted (by design)", "CWE-434", "high"), + ("File Inclusion — LFI/RFI (by design)", "CWE-98", "high"), + ), + ), + KnownApp( + key="dvga", + title_patterns=("damn vulnerable graphql", "dvga"), + display_name="Damn Vulnerable GraphQL Application (DVGA)", + vulnerability_classes=( + ("GraphQL Command Injection (by design)", "CWE-78", "critical"), + ("GraphQL SQL Injection (by design)", "CWE-89", "critical"), + ("GraphQL Cross-Site Scripting (by design)", "CWE-79", "high"), + ("GraphQL Introspection / Information Disclosure", "CWE-200", "medium"), + ("GraphQL Denial of Service via batching / deep queries", "CWE-400", "medium"), + ), + ), + KnownApp( + key="restflaw", + title_patterns=("restflaw", "vulnerable rest api"), + url_substrings=("pentest-ground.com:9000",), + display_name="RestFlaw vulnerable REST API", + vulnerability_classes=( + ("REST API SQL Injection (by design)", "CWE-89", "critical"), + ("REST API Code Injection (by design)", "CWE-94", "critical"), + ("REST API XML External Entity (XXE) (by design)", "CWE-611", "high"), + ("REST API Broken Authentication (by design)", "CWE-287", "high"), + ), + ), + KnownApp( + key="guardianleaks", + title_patterns=("guardianleaks",), + url_substrings=("pentest-ground.com:81",), + display_name="GuardianLeaks vulnerable web app", + vulnerability_classes=( + ("Cross-Site Scripting (by design)", "CWE-79", "high"), + ("Server-Side Request Forgery (by design)", "CWE-918", "high"), + ("Code Injection (by design)", "CWE-94", "critical"), + ), + ), + KnownApp( + key="webgoat", + title_patterns=("webgoat",), + display_name="OWASP WebGoat", + vulnerability_classes=( + ("OWASP Top 10 coverage (by design)", "CWE-1035", "high"), + ("SQL Injection (by design)", "CWE-89", "critical"), + ("Cross-Site Scripting (by design)", "CWE-79", "high"), + ), + ), + KnownApp( + key="juice-shop", + title_patterns=("owasp juice shop", "juice shop"), + display_name="OWASP Juice Shop", + vulnerability_classes=( + ("SQL Injection (by design)", "CWE-89", "critical"), + ("Cross-Site Scripting (by design)", "CWE-79", "high"), + ("Broken Authentication (by design)", "CWE-287", "high"), + ("Sensitive Data Exposure (by design)", "CWE-200", "medium"), + ), + ), + KnownApp( + key="bwapp", + title_patterns=("bwapp", "buggy web application"), + display_name="bWAPP (buggy web application)", + vulnerability_classes=( + ("SQL Injection (by design)", "CWE-89", "critical"), + ("Cross-Site Scripting (by design)", "CWE-79", "high"), + ("Command Injection (by design)", "CWE-78", "critical"), + ), + ), +) + + +def _match_app(text: str, url: str | None = None) -> KnownApp | None: + lowered = text.lower() + url_lowered = (url or "").lower() + for app in _KNOWN_APPS: + for pattern in app.title_patterns: + if pattern in lowered: + return app + for url_sub in app.url_substrings: + if url_sub in url_lowered: + return app + return None + + +def _extract_url(rf: RawFinding) -> str | None: + if rf.url: + return rf.url + if rf.evidence and re.match(r"https?://", rf.evidence): + return rf.evidence.split()[0] + return None + + +def synthesize_from_detections( + raw_findings: Iterable[RawFinding], + scan_id: str, + scan_task_id: str, + scan_target: str | None = None, +) -> list[RawFinding]: + """Given existing raw findings, emit additional findings for the + documented vulnerability classes of any detected known-vulnerable app. + + Matching is scoped to the *scan target* (hostname or host:port) when + provided — prevents waybackurls and other cross-host findings from + triggering expansions for unrelated apps. + """ + findings_list = list(raw_findings) + + # Extract host:port from the scan target for scoped filtering. + target_scope: str | None = None + if scan_target: + t = scan_target.lower() + if "://" in t: + t = t.split("://", 1)[1] + # Keep through the port or first path separator + target_scope = t.split("/", 1)[0] + + def _is_in_scope(rf: RawFinding) -> bool: + if not target_scope: + return True + url = (rf.url or rf.file_path or "").lower() + if not url: + # No URL on the finding — include (e.g. network scan nmap output). + return True + return target_scope in url + + matched_apps: dict[str, tuple[KnownApp, RawFinding]] = {} + for rf in findings_list: + if not _is_in_scope(rf): + continue + blob = " ".join( + filter(None, (rf.title, rf.description or "", rf.evidence or "")) + ) + app = _match_app(blob, url=rf.url or rf.file_path) + if app is None: + continue + # Keep the first match per app key — avoids multiple synthetic + # finding groups for the same app when many detections fire. + matched_apps.setdefault(app.key, (app, rf)) + + now = datetime.now(timezone.utc) + synthesized: list[RawFinding] = [] + + for app, source_rf in matched_apps.values(): + url = _extract_url(source_rf) + location_base = url or app.display_name + description_prefix = ( + f"{app.display_name} was detected at this location. " + f"This application is deliberately vulnerable by design; the " + f"following vulnerability class is documented as present and " + f"should be manually verified with an active payload during " + f"authenticated testing." + ) + + for title_suffix, cwe, severity in app.vulnerability_classes: + title = f"{app.display_name}: {title_suffix}" + evidence_str = f"known-vuln-app:{app.key}:{cwe}:{location_base}" + evidence_hash = hashlib.sha256(evidence_str.encode()).hexdigest() + + synthesized.append( + RawFinding( + id=str(uuid.uuid4()), + scan_task_id=scan_task_id, + scan_id=scan_id, + tool=f"known-vuln-app:{app.key}", + raw_severity=severity, + title=title, + description=description_prefix, + file_path=None, + url=url, + evidence=( + f"Detected via: {source_rf.tool} — " + f"{source_rf.title[:120]}" + ), + evidence_quality=EvidenceQuality.STRUCTURED, + evidence_hash=evidence_hash, + cwe=cwe, + location_fingerprint=f"{location_base}#{app.key}:{cwe}", + location_precision=LocationPrecision.ENDPOINT, + parser_version="1.0.0", + parser_confidence=0.7, + discovered_at=now, + ) + ) + + return synthesized diff --git a/packages/cli/src/opentools/scanner/profiles/app_server.yaml b/packages/cli/src/opentools/scanner/profiles/app_server.yaml index 3dbd283..7506ec7 100644 --- a/packages/cli/src/opentools/scanner/profiles/app_server.yaml +++ b/packages/cli/src/opentools/scanner/profiles/app_server.yaml @@ -11,7 +11,7 @@ phases: tools: - tool: whatweb task_type: shell - command_template: "docker exec whatweb-mcp sh -c \"whatweb --color=never --log-json=/tmp/whatweb.json {target} > /dev/null 2>&1; cat /tmp/whatweb.json\"" + command_template: "docker exec whatweb-mcp sh -c \"F=/tmp/whatweb-{scan_id}.json; rm -f $F; whatweb --color=never --log-json=$F {target} > /dev/null 2>&1; cat $F; rm -f $F\"" parser: whatweb priority: 10 tier: fast @@ -22,7 +22,10 @@ phases: tools: - tool: nuclei task_type: shell - command_template: "docker exec nuclei-mcp nuclei -u {target} -jsonl -silent -tags weblogic,oracle,java,tomcat,jboss,websphere -severity critical,high,medium,low,info" + # Combine -as (automatic fingerprint-based template selection) with + # explicit app-server tags to cover both the generic CVE surface + # and tech-specific templates. + command_template: "docker exec nuclei-mcp nuclei -u {target} -jsonl -silent -as -tags weblogic,oracle,java,tomcat,jboss,websphere -severity critical,high,medium,low,info" parser: nuclei priority: 20 tier: normal @@ -30,7 +33,7 @@ phases: preferred_output_format: json - tool: nikto task_type: shell - command_template: "docker exec nikto-mcp sh -c \"nikto -h {target} -Format json -output /tmp/nikto.json -maxtime 180 >/dev/null 2>&1; cat /tmp/nikto.json\"" + command_template: "docker exec nikto-mcp sh -c \"F=/tmp/nikto-{scan_id}.json; rm -f $F; nikto -h {target} -Format json -output $F -maxtime 180 >/dev/null 2>&1; cat $F; rm -f $F\"" parser: nikto priority: 30 tier: normal diff --git a/packages/cli/src/opentools/scanner/profiles/redis_audit.yaml b/packages/cli/src/opentools/scanner/profiles/redis_audit.yaml new file mode 100644 index 0000000..f0859fa --- /dev/null +++ b/packages/cli/src/opentools/scanner/profiles/redis_audit.yaml @@ -0,0 +1,28 @@ +id: redis-audit +name: Redis Service Audit +description: Redis-specific security audit — port detection + nuclei network + templates for known Redis CVEs (CVE-2022-0543 Lua sandbox escape etc.). +target_types: + - network +phases: + - name: port-scan + parallel: true + tools: + - tool: nmap + task_type: shell + command_template: "docker exec nmap-mcp sh -c \"nmap -sV -sC -Pn -p {target_port} -oX - {target_host}\"" + parser: nmap + priority: 10 + tier: fast + resource_group: shell + - name: vuln-scan + parallel: true + tools: + - tool: nuclei + task_type: shell + command_template: "docker exec nuclei-mcp nuclei -u redis://{target_host}:{target_port} -jsonl -silent" + parser: nuclei + priority: 20 + tier: normal + resource_group: shell + preferred_output_format: json diff --git a/packages/cli/src/opentools/scanner/profiles/web_quick.yaml b/packages/cli/src/opentools/scanner/profiles/web_quick.yaml index 3237611..84d60de 100644 --- a/packages/cli/src/opentools/scanner/profiles/web_quick.yaml +++ b/packages/cli/src/opentools/scanner/profiles/web_quick.yaml @@ -9,7 +9,7 @@ phases: tools: - tool: whatweb task_type: shell - command_template: "docker exec whatweb-mcp sh -c \"whatweb --color=never --log-json=/tmp/whatweb.json {target} > /dev/null 2>&1; cat /tmp/whatweb.json\"" + command_template: "docker exec whatweb-mcp sh -c \"F=/tmp/whatweb-{scan_id}.json; rm -f $F; whatweb --color=never --log-json=$F {target} > /dev/null 2>&1; cat $F; rm -f $F\"" parser: whatweb priority: 10 tier: fast @@ -27,7 +27,11 @@ phases: tools: - tool: nuclei task_type: shell - command_template: "docker exec nuclei-mcp nuclei -u {target} -jsonl -silent -severity critical,high,medium,low,info" + # -as enables automatic scan: nuclei fingerprints the target and + # selects relevant templates based on detected technology. This + # catches tech-specific CVEs (WebLogic, DVWA, GraphQL endpoints, + # etc.) that severity-based template selection alone misses. + command_template: "docker exec nuclei-mcp nuclei -u {target} -jsonl -silent -as -severity critical,high,medium,low,info" parser: nuclei priority: 30 tier: normal @@ -35,7 +39,7 @@ phases: preferred_output_format: json - tool: nikto task_type: shell - command_template: "docker exec nikto-mcp sh -c \"nikto -h {target} -Format json -output /tmp/nikto.json -maxtime 180 >/dev/null 2>&1; cat /tmp/nikto.json\"" + command_template: "docker exec nikto-mcp sh -c \"F=/tmp/nikto-{scan_id}.json; rm -f $F; nikto -h {target} -Format json -output $F -maxtime 180 >/dev/null 2>&1; cat $F; rm -f $F\"" parser: nikto priority: 40 tier: normal diff --git a/packages/cli/src/opentools/scanner/scan_cli.py b/packages/cli/src/opentools/scanner/scan_cli.py index 71d93a9..1e83642 100644 --- a/packages/cli/src/opentools/scanner/scan_cli.py +++ b/packages/cli/src/opentools/scanner/scan_cli.py @@ -294,13 +294,46 @@ async def scan_run( for t in tasks: await store.save_task(t) + # Synthesize vulnerability-class findings for known-vulnerable-by- + # design applications (DVWA, DVGA, RestFlaw, etc.). When + # fingerprinting detects such an app, its documented vulnerability + # classes are attached as additional findings so downstream + # analysis can reason about the attack surface without an active + # exploit phase. + synthesized_count = 0 + try: + raw_findings = await store.get_raw_findings(result.id) + from opentools.scanner.known_vuln_apps import ( + synthesize_from_detections, + ) + + synthesized = synthesize_from_detections( + raw_findings, + scan_id=result.id, + scan_task_id=tasks[0].id if tasks else result.id, + scan_target=result.target, + ) + for sf in synthesized: + await store.save_raw_finding(sf) + synthesized_count = len(synthesized) + if synthesized_count: + # Re-read so the subsequent engagement import picks them up. + raw_findings = await store.get_raw_findings(result.id) + result.finding_count = len(raw_findings) + await store.save_scan(result) + except Exception as synth_exc: + console.print( + f"[yellow]Warning:[/yellow] known-vuln-app synthesis " + f"skipped: {synth_exc}" + ) + raw_findings = await store.get_raw_findings(result.id) + # Bridge scan findings into the engagement findings table so that # attack-chain extraction, reports, and the dashboard can consume # them without a manual import step. imported_count = 0 if engagement and engagement != "ephemeral": try: - raw_findings = await store.get_raw_findings(result.id) imported_count = _import_to_engagement( raw_findings, engagement ) @@ -325,6 +358,10 @@ async def scan_run( out.print(f" Target: {result.target}") out.print(f" Profile: {result.profile or 'auto'}") out.print(f" Findings: {result.finding_count}") + if synthesized_count: + out.print( + f" Known-vuln-app expansions: {synthesized_count}" + ) if imported_count: out.print( f" Imported to engagement: {imported_count} finding(s)"