diff --git a/.gitignore b/.gitignore index 43c664a..0104520 100644 --- a/.gitignore +++ b/.gitignore @@ -26,8 +26,8 @@ node_modules/ # Git worktrees .worktrees/ -# Profiling output -profiles/ +# Profiling output (not scanner/plugin profile definitions — those are code) +/profiles/ # TypeScript build output *.tsbuildinfo diff --git a/packages/cli/src/opentools/chain/cli.py b/packages/cli/src/opentools/chain/cli.py index 6c4129a..efcba3a 100644 --- a/packages/cli/src/opentools/chain/cli.py +++ b/packages/cli/src/opentools/chain/cli.py @@ -53,8 +53,23 @@ def _default_db_path() -> Path: - """Return the default database path used by the CLI.""" - return Path.home() / ".opentools" / "engagements.db" + """Return the default database path used by the CLI. + + Resolution order matches the main CLI's ``_get_store`` so chain and + engagement commands share a single database: + + 1. ``/../../engagements/opentools.db`` when + ``OPENTOOLS_PLUGIN_DIR`` is set or the plugin dir can be discovered. + 2. ``~/.opentools/engagements.db`` as a fallback for stand-alone use + outside a repo checkout. + """ + try: + from opentools.plugin import discover_plugin_dir + + plugin_dir = discover_plugin_dir() + return plugin_dir.parent.parent / "engagements" / "opentools.db" + except Exception: + return Path.home() / ".opentools" / "engagements.db" def _async_command(coro_fn): diff --git a/packages/cli/src/opentools/scanner/engagement_bridge.py b/packages/cli/src/opentools/scanner/engagement_bridge.py new file mode 100644 index 0000000..26d5cf3 --- /dev/null +++ b/packages/cli/src/opentools/scanner/engagement_bridge.py @@ -0,0 +1,95 @@ +"""Bridge scan pipeline output into the engagement findings table. + +The scanner persists its own ``raw_finding`` and ``dedup_finding`` rows into +``~/.opentools/scans.db``. Downstream features (attack-chain extraction, +kill-chain queries, reports, dashboards) read from the engagement's +``findings`` table in ``/engagements/opentools.db``. Without a bridge, +every scan produces output that nothing else can consume. + +This module runs after a scan completes — if ``engagement_id`` is set and +an engagement exists in the engagement store, raw findings are converted +into :class:`opentools.models.Finding` records and inserted in batch. + +Idempotency is approximate: we compare ``(scan_id, tool, title, file_path)`` +against existing findings for the engagement and skip exact matches. +Re-running the same scan is uncommon; the intent is to prevent accidental +duplication when imports are retried. +""" + +from __future__ import annotations + +import uuid +from datetime import datetime, timezone +from typing import Iterable, Optional + +from opentools.engagement.store import EngagementStore +from opentools.models import Finding, Severity +from opentools.scanner.models import RawFinding + + +_SEVERITY_MAP: dict[str, Severity] = { + "critical": Severity.CRITICAL, + "high": Severity.HIGH, + "medium": Severity.MEDIUM, + "low": Severity.LOW, + "info": Severity.INFO, +} + + +def _raw_to_finding(rf: RawFinding, engagement_id: str, now: datetime) -> Finding: + severity = _SEVERITY_MAP.get(rf.raw_severity.lower(), Severity.INFO) + return Finding( + id=str(uuid.uuid4()), + engagement_id=engagement_id, + tool=rf.tool, + cwe=rf.cwe, + severity=severity, + title=rf.title, + description=rf.description, + file_path=rf.url or rf.file_path, + line_start=rf.line_start, + line_end=rf.line_end, + evidence=rf.evidence, + created_at=rf.discovered_at or now, + scan_id=rf.scan_id, + ) + + +def _dedup_key(f: Finding) -> tuple: + return (f.scan_id or "", f.tool, f.title, f.file_path or "") + + +def import_scan_findings( + raw_findings: Iterable[RawFinding], + engagement_id: Optional[str], + engagement_store: EngagementStore, +) -> int: + """Import raw findings into the engagement's findings table. + + Returns the number of new findings inserted. Skips rows if the + engagement does not exist in the store or if an identical finding + already exists. + """ + if not engagement_id: + return 0 + + try: + existing = engagement_store.get_findings(engagement_id) + except Exception: + return 0 + + existing_keys = {_dedup_key(f) for f in existing} + now = datetime.now(timezone.utc) + + inserted = 0 + for rf in raw_findings: + candidate = _raw_to_finding(rf, engagement_id, now) + if _dedup_key(candidate) in existing_keys: + continue + try: + engagement_store.add_finding(candidate) + existing_keys.add(_dedup_key(candidate)) + inserted += 1 + except Exception: + continue + return inserted diff --git a/packages/cli/src/opentools/scanner/planner.py b/packages/cli/src/opentools/scanner/planner.py index 4fcf87c..e0cfa72 100644 --- a/packages/cli/src/opentools/scanner/planner.py +++ b/packages/cli/src/opentools/scanner/planner.py @@ -498,17 +498,30 @@ def _resolve_template( if template is None: return None - # Extract host from URL for {target_host} + # Extract host and port for substitution placeholders. target_host = target + target_port = "" if "://" in target: from urllib.parse import urlparse parsed = urlparse(target) target_host = parsed.hostname or target + target_port = str(parsed.port) if parsed.port else "" + elif ":" in target and "/" not in target: + # host:port form (e.g. "pentest-ground.com:6379") + host_part, _, port_part = target.rpartition(":") + if port_part.isdigit(): + target_host = host_part + target_port = port_part + + # Default to a common port range when no explicit port was given — + # avoids producing a syntactically invalid `-p ` argument for nmap. + port_or_range = target_port or "1-10000" replacements = { "{target}": target, "{scan_id}": scan_id, "{target_host}": target_host, + "{target_port}": port_or_range, "{target_hash}": metadata.get("content_hash", "unknown"), "{tool}": "", # filled per-tool if needed } diff --git a/packages/cli/src/opentools/scanner/profiles/app_server.yaml b/packages/cli/src/opentools/scanner/profiles/app_server.yaml new file mode 100644 index 0000000..3dbd283 --- /dev/null +++ b/packages/cli/src/opentools/scanner/profiles/app_server.yaml @@ -0,0 +1,38 @@ +id: app-server +name: Application Server Scan +description: Focused scan for Java app servers (WebLogic, Tomcat, JBoss) and + similar enterprise middleware — combines nuclei templates tagged for the + relevant technology with nikto for misconfiguration surface. +target_types: + - url +phases: + - name: fingerprint + parallel: true + tools: + - tool: whatweb + task_type: shell + command_template: "docker exec whatweb-mcp sh -c \"whatweb --color=never --log-json=/tmp/whatweb.json {target} > /dev/null 2>&1; cat /tmp/whatweb.json\"" + parser: whatweb + priority: 10 + tier: fast + resource_group: shell + preferred_output_format: json + - name: vuln-scan + parallel: true + tools: + - tool: nuclei + task_type: shell + command_template: "docker exec nuclei-mcp nuclei -u {target} -jsonl -silent -tags weblogic,oracle,java,tomcat,jboss,websphere -severity critical,high,medium,low,info" + parser: nuclei + priority: 20 + tier: normal + resource_group: shell + preferred_output_format: json + - tool: nikto + task_type: shell + command_template: "docker exec nikto-mcp sh -c \"nikto -h {target} -Format json -output /tmp/nikto.json -maxtime 180 >/dev/null 2>&1; cat /tmp/nikto.json\"" + parser: nikto + priority: 30 + tier: normal + resource_group: shell + preferred_output_format: json diff --git a/packages/cli/src/opentools/scanner/profiles/network_recon.yaml b/packages/cli/src/opentools/scanner/profiles/network_recon.yaml index 02a1429..70dbecb 100644 --- a/packages/cli/src/opentools/scanner/profiles/network_recon.yaml +++ b/packages/cli/src/opentools/scanner/profiles/network_recon.yaml @@ -9,26 +9,13 @@ phases: tools: - tool: nmap task_type: shell - command_template: "nmap -sV -sC -oX - {target}" + command_template: "docker exec nmap-mcp sh -c \"nmap -sV -sC -Pn -p {target_port} -oX - {target_host}\"" parser: nmap priority: 10 tier: normal resource_group: shell - - tool: masscan - task_type: shell - command_template: "masscan {target} -p1-65535 --rate=1000 -oJ -" - parser: masscan - priority: 20 - tier: heavy - resource_group: shell - optional: true - preferred_output_format: json reactive_edges: - evaluator: "builtin:open_ports_to_vuln_scan" trigger_tool: "nmap" max_spawns: 20 max_spawns_per_trigger: 5 - - evaluator: "builtin:open_ports_to_vuln_scan" - trigger_tool: "masscan" - max_spawns: 20 - max_spawns_per_trigger: 5 diff --git a/packages/cli/src/opentools/scanner/scan_cli.py b/packages/cli/src/opentools/scanner/scan_cli.py index 8128ef3..71d93a9 100644 --- a/packages/cli/src/opentools/scanner/scan_cli.py +++ b/packages/cli/src/opentools/scanner/scan_cli.py @@ -51,6 +51,54 @@ async def _get_store(): return store +def _engagement_db_path() -> Optional[Path]: + """Return the engagement DB path the main CLI uses. + + Falls back to ``None`` if the plugin dir cannot be discovered (e.g., + outside a repo checkout). Callers should skip bridging in that case. + """ + try: + from opentools.plugin import discover_plugin_dir + + plugin_dir = discover_plugin_dir() + return plugin_dir.parent.parent / "engagements" / "opentools.db" + except Exception: + return None + + +def _import_to_engagement( + raw_findings: list, + engagement_ref: str, +) -> int: + """Bridge scanner RawFindings into the engagement findings table. + + ``engagement_ref`` may be an engagement id, an id prefix, or a name. + Returns the number of findings imported. + """ + from opentools.engagement.store import EngagementStore + from opentools.scanner.engagement_bridge import import_scan_findings + + db = _engagement_db_path() + if db is None: + return 0 + + es = EngagementStore(db_path=db) + engagements = es.list_all() + match = next( + ( + e + for e in engagements + if e.id == engagement_ref + or e.name == engagement_ref + or e.id.startswith(engagement_ref) + ), + None, + ) + if match is None: + return 0 + return import_scan_findings(raw_findings, match.id, es) + + # --------------------------------------------------------------------------- # scan profiles # --------------------------------------------------------------------------- @@ -246,6 +294,22 @@ async def scan_run( for t in tasks: await store.save_task(t) + # Bridge scan findings into the engagement findings table so that + # attack-chain extraction, reports, and the dashboard can consume + # them without a manual import step. + imported_count = 0 + if engagement and engagement != "ephemeral": + try: + raw_findings = await store.get_raw_findings(result.id) + imported_count = _import_to_engagement( + raw_findings, engagement + ) + except Exception as bridge_exc: + console.print( + f"[yellow]Warning:[/yellow] findings not imported to " + f"engagement: {bridge_exc}" + ) + if json_output: out.print(result.model_dump_json(indent=2)) else: @@ -261,6 +325,10 @@ async def scan_run( out.print(f" Target: {result.target}") out.print(f" Profile: {result.profile or 'auto'}") out.print(f" Findings: {result.finding_count}") + if imported_count: + out.print( + f" Imported to engagement: {imported_count} finding(s)" + ) finally: await store.close() diff --git a/packages/cli/src/opentools/scanner/target.py b/packages/cli/src/opentools/scanner/target.py index 0b3f443..43cae68 100644 --- a/packages/cli/src/opentools/scanner/target.py +++ b/packages/cli/src/opentools/scanner/target.py @@ -279,6 +279,14 @@ def _is_network_target(self, target: str) -> bool: return True except ValueError: pass + # Hostname:port pattern (e.g. ``example.com:6379``). Must have a dot in + # the hostname and a numeric port. This is how users express a TCP + # service target like Redis, SSH, or Postgres. + match = re.match(r"^([\w\-]+(?:\.[\w\-]+)+):(\d{1,5})$", target) + if match: + port = int(match.group(2)) + if 1 <= port <= 65535: + return True return False def _is_docker_image(self, target: str) -> bool: