diff --git a/CHANGELOG.md b/CHANGELOG.md index dfbd8e3..2ecc8c9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -36,11 +36,21 @@ Initial release of SlimStack - Dependency hygiene and waste elimination CLI tool - **CLI Commands** - `slim version` - Display version - `slim help` - Show usage help + - `slim man` - Display detailed manual - `slim scan -py` - Scan Python dependencies - `slim scan -node` - Scan Node.js dependencies - `slim prune -py` - Remove unused Python packages - `slim prune -node` - Remove unused Node packages - `slim disk` - Disk usage analysis + - `slim docker` - Dockerfile security and optimization analysis + +- **Dockerfile Analysis** (NEW) + - Security anti-pattern detection (secrets in ENV, running as root, etc.) + - Hardened image recommendations (Chainguard, Alpine, distroless) + - Multi-stage build detection + - Best practice suggestions (HEALTHCHECK, COPY vs ADD, etc.) + - Severity filtering and security-only mode + - JSON output for CI/CD pipelines - **Safety Features** - Read-only scan operations by default @@ -57,6 +67,7 @@ Initial release of SlimStack - Dependency hygiene and waste elimination CLI tool - **Documentation** - Comprehensive README with usage examples - Unix man page (`man slim`) + - Built-in manual (`slim man`) ## [Unreleased] diff --git a/README.md b/README.md index 47a0f36..8c7c7e9 100644 --- a/README.md +++ b/README.md @@ -144,6 +144,60 @@ Node.js node_modules: 324.1 MB api-server/node_modules ``` +### Docker Commands + +```bash +# Analyze Dockerfile for security and optimization issues +slim docker + +# Output as JSON (for CI/CD) +slim docker --json + +# Only show security issues +slim docker --security-only + +# Filter by severity (critical, warning, info) +slim docker --severity warning +``` + +**Example output:** +```text +SlimStack Dockerfile Analysis +================================ + +Dockerfile: Dockerfile +Base images: 1 +Multi-stage: No +Runs as non-root: No +Has HEALTHCHECK: No + +Issues Found (4): + + 🔴 Line 2: Potential secret exposed in ENV instruction + Category: security + → Use Docker secrets or mount secrets at runtime instead of ENV + + 🟡 Line 1: Container runs as root (no USER instruction) + Category: security + → Add 'USER nonroot' or 'USER 1000' to run as non-root user + +Image Recommendations: + + Current: python:3.12 + Recommended: python:3.12-slim + Reason: smaller - Debian slim variant, ~100MB smaller + Size: ~150MB + + Current: python:3.12 + Recommended: cgr.dev/chainguard/python:latest + Reason: hardened - Chainguard hardened image, zero CVEs + Size: ~50MB + +──────────────────────────────── +Summary: 1 critical, 2 warnings, 1 info + 2 image recommendations +``` + ## Safety SlimStack is designed with safety as a priority: diff --git a/man/slim.1 b/man/slim.1 index a2075a4..524dd5a 100644 --- a/man/slim.1 +++ b/man/slim.1 @@ -94,6 +94,22 @@ Scan a specific directory instead of current directory. .TP .B slim disk \-\-json Output disk usage as JSON. +.SS "Docker Commands" +.TP +.B slim docker +Analyze Dockerfile for security issues and optimization opportunities. Suggests hardened image alternatives. +.TP +.B slim docker \-\-json +Output analysis results as JSON for CI/CD pipelines. +.TP +.B slim docker \-\-path \fIPATH\fR +Analyze Dockerfile in a specific directory. +.TP +.B slim docker \-\-severity \fILEVEL\fR +Filter issues by minimum severity (critical, warning, info). +.TP +.B slim docker \-\-security\-only +Only show security-related issues. .SH OPTIONS .TP .B \-py, \-\-py diff --git a/slim/cli.py b/slim/cli.py index 45dff5e..33c7882 100644 --- a/slim/cli.py +++ b/slim/cli.py @@ -20,6 +20,9 @@ slim disk Show disk usage by ecosystem slim disk --by project Show disk usage by project slim disk --top 10 Limit results + + slim docker Analyze Dockerfile for issues + slim docker --json Output as JSON """ import argparse @@ -673,6 +676,121 @@ def cmd_disk(args: argparse.Namespace) -> int: return 0 +def cmd_docker_scan(args: argparse.Namespace) -> int: + """Scan Dockerfile for security issues and optimization opportunities.""" + from slim.scanners.docker_scanner import scan_dockerfile, get_scan_result_dict + + project_path = Path(args.path) if args.path else None + result = scan_dockerfile(project_path=project_path) + + if args.json: + output_json(get_scan_result_dict(result)) + return 0 + + # Human-readable output + colors_enabled = is_tty() + + print() + if colors_enabled: + print(f"{Colors.BOLD}SlimStack Dockerfile Analysis{Colors.RESET}") + else: + print("SlimStack Dockerfile Analysis") + print("=" * 32) + + # Check if Dockerfile was found + if not result.base_images: + if result.issues and result.issues[0].message == "No Dockerfile found": + error("No Dockerfile found in project.") + return 1 + + # Show basic info + print(f"\nDockerfile: {result.dockerfile_path}") + print(f"Base images: {len(result.base_images)}") + print(f"Multi-stage: {'Yes' if result.multi_stage else 'No'}") + print(f"Runs as non-root: {'Yes' if result.has_user_instruction else 'No'}") + print(f"Has HEALTHCHECK: {'Yes' if result.has_healthcheck else 'No'}") + + # Filter issues by severity if specified + issues = result.issues + min_severity = getattr(args, 'severity', None) + if min_severity: + severity_order = {"critical": 0, "warning": 1, "info": 2} + min_level = severity_order.get(min_severity.lower(), 2) + issues = [i for i in issues if severity_order.get(i.severity, 2) <= min_level] + + # Filter by security only if specified + if getattr(args, 'security_only', False): + issues = [i for i in issues if i.category == "security"] + + # Show issues + if issues: + print() + if colors_enabled: + print(f"{Colors.YELLOW}Issues Found ({len(issues)}):{Colors.RESET}") + else: + print(f"Issues Found ({len(issues)}):") + + for issue in issues: + # Severity icon and color + if issue.severity == "critical": + icon = "🔴" if colors_enabled else "[CRITICAL]" + color = Colors.RED if colors_enabled else "" + elif issue.severity == "warning": + icon = "🟡" if colors_enabled else "[WARNING]" + color = Colors.YELLOW if colors_enabled else "" + else: + icon = "🔵" if colors_enabled else "[INFO]" + color = Colors.CYAN if colors_enabled else "" + + reset = Colors.RESET if colors_enabled else "" + + print(f"\n {icon} {color}Line {issue.line_number}: {issue.message}{reset}") + print(f" Category: {issue.category}") + if colors_enabled: + print(f" {Colors.DIM}→ {issue.suggestion}{Colors.RESET}") + else: + print(f" → {issue.suggestion}") + else: + print(f"\n{Colors.GREEN if colors_enabled else ''}✓ No issues found!{Colors.RESET if colors_enabled else ''}") + + # Show recommendations + if result.recommendations: + print() + if colors_enabled: + print(f"{Colors.CYAN}Image Recommendations:{Colors.RESET}") + else: + print("Image Recommendations:") + + shown = set() + for rec in result.recommendations: + if rec.recommended_image in shown: + continue + shown.add(rec.recommended_image) + + print(f"\n Current: {rec.current_image}") + if colors_enabled: + print(f" {Colors.GREEN}Recommended: {rec.recommended_image}{Colors.RESET}") + else: + print(f" Recommended: {rec.recommended_image}") + print(f" Reason: {rec.reason} - {rec.description}") + if rec.size_estimate: + print(f" Size: {rec.size_estimate}") + + # Summary + critical = sum(1 for i in result.issues if i.severity == "critical") + warning = sum(1 for i in result.issues if i.severity == "warning") + info_count = sum(1 for i in result.issues if i.severity == "info") + + print(f"\n{'─' * 32}") + print(f"Summary: {critical} critical, {warning} warnings, {info_count} info") + + if result.recommendations: + print(f" {len(result.recommendations)} image recommendations") + + # Return non-zero if critical issues found + return 1 if critical > 0 else 0 + + def create_parser() -> argparse.ArgumentParser: """Create the argument parser.""" parser = argparse.ArgumentParser( @@ -724,6 +842,16 @@ def create_parser() -> argparse.ArgumentParser: disk_parser.add_argument("--path", "-p", help="Path to scan (default: current directory)") disk_parser.set_defaults(func=cmd_disk) + # docker scan command + docker_parser = subparsers.add_parser("docker", help="Dockerfile analysis and optimization") + docker_parser.add_argument("--json", action="store_true", help="Output as JSON") + docker_parser.add_argument("--path", "-p", help="Project path (default: current directory)") + docker_parser.add_argument("--severity", choices=["critical", "warning", "info"], + help="Minimum severity to report") + docker_parser.add_argument("--security-only", action="store_true", dest="security_only", + help="Only show security-related issues") + docker_parser.set_defaults(func=cmd_docker_scan) + return parser diff --git a/slim/core/docker_images.py b/slim/core/docker_images.py new file mode 100644 index 0000000..324fa32 --- /dev/null +++ b/slim/core/docker_images.py @@ -0,0 +1,280 @@ + +""" +Docker image alternatives and hardened image mappings. + +Advisory-only: recommendations are heuristic and non-authoritative. +""" + +from dataclasses import dataclass +from enum import Enum +from typing import Optional + + +class AlternativeReason(str, Enum): + HARDENED = "hardened" + SMALLER = "smaller" + DISTROLESS = "distroless" + ALPINE = "alpine" + + +@dataclass(frozen=True) +class ImageAlternative: + """A recommended alternative for a base image.""" + image: str + reason: AlternativeReason + description: str + size_estimate: Optional[str] = None # heuristic only + + +# ----------------------------- +# Static recommendation mapping +# ----------------------------- + +IMAGE_ALTERNATIVES: dict[str, list[ImageAlternative]] = { + "python": [ + ImageAlternative( + image="{name}:{version}-slim", + reason=AlternativeReason.SMALLER, + description="Debian slim variant", + size_estimate="~150MB", + ), + ImageAlternative( + image="{name}:{version}-alpine", + reason=AlternativeReason.ALPINE, + description="Alpine Linux base", + size_estimate="~50MB", + ), + ImageAlternative( + image="cgr.dev/chainguard/python", + reason=AlternativeReason.HARDENED, + description="Chainguard hardened, distroless", + size_estimate="~50MB", + ), + ], + "node": [ + ImageAlternative( + image="{name}:{version}-slim", + reason=AlternativeReason.SMALLER, + description="Debian slim variant", + size_estimate="~200MB", + ), + ImageAlternative( + image="{name}:{version}-alpine", + reason=AlternativeReason.ALPINE, + description="Alpine Linux base", + size_estimate="~130MB", + ), + ImageAlternative( + image="cgr.dev/chainguard/node", + reason=AlternativeReason.HARDENED, + description="Chainguard hardened Node.js", + size_estimate="~100MB", + ), + ], + "golang": [ + ImageAlternative( + image="{name}:{version}-alpine", + reason=AlternativeReason.ALPINE, + description="Alpine build image", + size_estimate="~250MB", + ), + ImageAlternative( + image="gcr.io/distroless/static-debian12", + reason=AlternativeReason.DISTROLESS, + description="Distroless runtime for static Go binaries", + size_estimate="~2MB", + ), + ImageAlternative( + image="cgr.dev/chainguard/go", + reason=AlternativeReason.HARDENED, + description="Chainguard hardened Go build image", + size_estimate="~200MB", + ), + ], + "openjdk": [ + ImageAlternative( + image="eclipse-temurin:{version}-jre-alpine", + reason=AlternativeReason.ALPINE, + description="Temurin JRE on Alpine", + size_estimate="~150MB", + ), + ImageAlternative( + image="gcr.io/distroless/java17-debian12", + reason=AlternativeReason.DISTROLESS, + description="Distroless Java runtime", + size_estimate="~200MB", + ), + ImageAlternative( + image="cgr.dev/chainguard/jre", + reason=AlternativeReason.HARDENED, + description="Chainguard hardened JRE", + size_estimate="~100MB", + ), + ], + "ubuntu": [ + ImageAlternative( + image="ubuntu:{version}-minimal", + reason=AlternativeReason.SMALLER, + description="Ubuntu minimal", + size_estimate="~30MB", + ), + ImageAlternative( + image="debian:{version}-slim", + reason=AlternativeReason.SMALLER, + description="Debian slim", + size_estimate="~25MB", + ), + ImageAlternative( + image="cgr.dev/chainguard/wolfi-base", + reason=AlternativeReason.HARDENED, + description="Wolfi security-focused base", + size_estimate="~15MB", + ), + ], + "debian": [ + ImageAlternative( + image="debian:{version}-slim", + reason=AlternativeReason.SMALLER, + description="Debian slim", + size_estimate="~25MB", + ), + ImageAlternative( + image="cgr.dev/chainguard/wolfi-base", + reason=AlternativeReason.HARDENED, + description="Wolfi security-focused base", + size_estimate="~15MB", + ), + ], + "nginx": [ + ImageAlternative( + image="nginx:{version}-alpine", + reason=AlternativeReason.ALPINE, + description="Alpine variant", + size_estimate="~25MB", + ), + ImageAlternative( + image="cgr.dev/chainguard/nginx", + reason=AlternativeReason.HARDENED, + description="Chainguard hardened nginx", + size_estimate="~15MB", + ), + ], + "redis": [ + ImageAlternative( + image="redis:{version}-alpine", + reason=AlternativeReason.ALPINE, + description="Alpine variant", + size_estimate="~30MB", + ), + ], + "postgres": [ + ImageAlternative( + image="postgres:{version}-alpine", + reason=AlternativeReason.ALPINE, + description="Alpine variant", + size_estimate="~80MB", + ), + ], +} + + +# ----------------------------- +# Risk & optimization signals +# ----------------------------- + +RISKY_TAGS = {"latest", "dev", "development", "beta", "rc", "nightly", "unstable"} + +HARDENED_REGISTRIES = { + "cgr.dev/chainguard", + "gcr.io/distroless", +} + +OPTIMIZED_TAG_MARKERS = {"slim", "alpine", "minimal", "distroless"} + + +# ----------------------------- +# Parsing helpers +# ----------------------------- + +def _strip_digest(image: str) -> str: + return image.split("@", 1)[0] + + +def get_base_image_name(image: str) -> str: + image = _strip_digest(image) + last_segment = image.rsplit("/", 1)[-1] + if ":" in last_segment: + last_segment = last_segment.rsplit(":", 1)[0] + return last_segment.lower() + + +def get_image_tag(image: str) -> str: + image = _strip_digest(image) + last_segment = image.rsplit("/", 1)[-1] + if ":" in last_segment: + return last_segment.rsplit(":", 1)[1] + return "latest" + + +def get_image_registry(image: str) -> Optional[str]: + parts = image.split("/") + if len(parts) > 1 and "." in parts[0]: + return parts[0] + return None + + +# ----------------------------- +# Classification +# ----------------------------- + +def has_risky_tag(image: str) -> bool: + if "@" in image: + return False # digest-pinned + return get_image_tag(image).lower() in RISKY_TAGS + + +def is_hardened_image(image: str) -> bool: + registry = get_image_registry(image) + return registry in HARDENED_REGISTRIES + + +def is_optimized_image(image: str) -> bool: + tag = get_image_tag(image).lower() + return any(marker in tag for marker in OPTIMIZED_TAG_MARKERS) + + +# ----------------------------- +# Recommendation engine +# ----------------------------- + +def _extract_version(tag: str) -> Optional[str]: + if tag in RISKY_TAGS: + return None + return tag.split("-", 1)[0] + + +def get_alternatives(image: str) -> list[ImageAlternative]: + base_name = get_base_image_name(image) + tag = get_image_tag(image).lower() + version = _extract_version(tag) + + alternatives = IMAGE_ALTERNATIVES.get(base_name, []) + results: list[ImageAlternative] = [] + + for alt in alternatives: + if "{version}" in alt.image and not version: + continue + + formatted = ( + alt.image.format(name=base_name, version=version) + if "{version}" in alt.image + else alt.image + ) + + # Do not recommend the same image + if formatted == image: + continue + + results.append(alt) + + return results diff --git a/slim/scanners/docker_scanner.py b/slim/scanners/docker_scanner.py new file mode 100644 index 0000000..02ac2ca --- /dev/null +++ b/slim/scanners/docker_scanner.py @@ -0,0 +1,404 @@ +"""Dockerfile scanner for security and optimization analysis.""" + +import re +from dataclasses import dataclass, field +from pathlib import Path +from typing import Iterator + +from slim.core.docker_images import ( + get_alternatives, + is_already_optimized, + has_risky_tag, + get_image_tag, + ImageAlternative, +) +from slim.core.utils import format_size + + +@dataclass +class DockerfileIssue: + """An issue found in a Dockerfile.""" + line_number: int + severity: str # "critical", "warning", "info" + category: str # "security", "size", "best-practice" + message: str + suggestion: str + + def to_dict(self) -> dict: + """Convert to JSON-serializable dictionary.""" + return { + "line_number": self.line_number, + "severity": self.severity, + "category": self.category, + "message": self.message, + "suggestion": self.suggestion, + } + + +@dataclass +class ImageRecommendation: + """A recommended alternative for a base image.""" + current_image: str + recommended_image: str + reason: str + description: str + line_number: int + size_estimate: str | None = None + + def to_dict(self) -> dict: + """Convert to JSON-serializable dictionary.""" + return { + "current_image": self.current_image, + "recommended_image": self.recommended_image, + "reason": self.reason, + "description": self.description, + "line_number": self.line_number, + "size_estimate": self.size_estimate, + } + + +@dataclass +class DockerScanResult: + """Result of scanning a Dockerfile.""" + dockerfile_path: Path + base_images: list[tuple[int, str]] = field(default_factory=list) # (line_num, image) + issues: list[DockerfileIssue] = field(default_factory=list) + recommendations: list[ImageRecommendation] = field(default_factory=list) + multi_stage: bool = False + stage_count: int = 0 + has_user_instruction: bool = False + has_healthcheck: bool = False + total_lines: int = 0 + + def to_dict(self) -> dict: + """Convert to JSON-serializable dictionary.""" + return { + "dockerfile_path": str(self.dockerfile_path), + "base_images": [ + {"line": line, "image": img} for line, img in self.base_images + ], + "issues": [issue.to_dict() for issue in self.issues], + "recommendations": [rec.to_dict() for rec in self.recommendations], + "multi_stage": self.multi_stage, + "stage_count": self.stage_count, + "has_user_instruction": self.has_user_instruction, + "has_healthcheck": self.has_healthcheck, + "total_lines": self.total_lines, + "summary": { + "total_issues": len(self.issues), + "critical": sum(1 for i in self.issues if i.severity == "critical"), + "warning": sum(1 for i in self.issues if i.severity == "warning"), + "info": sum(1 for i in self.issues if i.severity == "info"), + "recommendations_count": len(self.recommendations), + }, + } + + +# Patterns for detecting issues +PATTERNS = { + # FROM instruction - captures image name with optional AS alias + "from": re.compile(r"^\s*FROM\s+(\S+)(?:\s+AS\s+\S+)?", re.IGNORECASE), + + # USER instruction + "user": re.compile(r"^\s*USER\s+", re.IGNORECASE), + + # HEALTHCHECK instruction + "healthcheck": re.compile(r"^\s*HEALTHCHECK\s+", re.IGNORECASE), + + # ADD instruction (potential issue if used for local files) + "add": re.compile(r"^\s*ADD\s+(?!https?://|--from=)(\S+)", re.IGNORECASE), + + # ENV with potential secrets + "env_secret": re.compile( + r"^\s*ENV\s+\S*(PASSWORD|SECRET|KEY|TOKEN|CREDENTIAL|API_KEY)\s*=", + re.IGNORECASE + ), + + # ARG with potential secrets + "arg_secret": re.compile( + r"^\s*ARG\s+\S*(PASSWORD|SECRET|KEY|TOKEN|CREDENTIAL|API_KEY)", + re.IGNORECASE + ), + + # RUN with curl/wget piped to shell (risky) + "run_pipe_shell": re.compile( + r"^\s*RUN\s+.*(?:curl|wget)\s+.*\|\s*(?:bash|sh)", + re.IGNORECASE + ), + + # RUN apt-get without cleanup + "apt_no_cleanup": re.compile( + r"^\s*RUN\s+.*apt-get\s+install(?!.*rm\s+-rf\s+/var/lib/apt/lists)", + re.IGNORECASE + ), + + # RUN npm install without cache clean + "npm_no_cleanup": re.compile( + r"^\s*RUN\s+.*npm\s+install(?!.*npm\s+cache\s+clean)", + re.IGNORECASE + ), + + # RUN pip install without cache optimization + "pip_no_cache": re.compile( + r"^\s*RUN\s+.*pip\s+install(?!.*--no-cache-dir)", + re.IGNORECASE + ), + + # EXPOSE for common sensitive ports + "expose_sensitive": re.compile( + r"^\s*EXPOSE\s+(22|23|3389|5432|3306|27017|6379)\b", + re.IGNORECASE + ), + + # COPY . . at root level (copies everything including .git, secrets) + "copy_all": re.compile(r"^\s*COPY\s+\.\s+\.", re.IGNORECASE), +} + + +def find_dockerfile(project_path: Path | None = None) -> Path | None: + """Find Dockerfile in the project.""" + if project_path is None: + project_path = Path.cwd() + + # Look for Dockerfile (case-insensitive on Windows, case-sensitive on Unix) + dockerfile_names = ["Dockerfile", "dockerfile", "Dockerfile.prod", "Dockerfile.dev"] + + for name in dockerfile_names: + dockerfile = project_path / name + if dockerfile.exists(): + return dockerfile + + return None + + +def parse_dockerfile(dockerfile_path: Path) -> list[tuple[int, str]]: + """Parse a Dockerfile and return list of (line_number, line_content).""" + lines = [] + try: + content = dockerfile_path.read_text(encoding="utf-8") + for i, line in enumerate(content.splitlines(), start=1): + lines.append((i, line)) + except OSError as e: + # Log or handle file access errors appropriately + import logging + logging.warning(f"Failed to read Dockerfile {dockerfile_path}: {e}") + return lines + + +def analyze_dockerfile(lines: list[tuple[int, str]]) -> DockerScanResult: + """Analyze Dockerfile lines for issues and recommendations.""" + result = DockerScanResult( + dockerfile_path=Path("Dockerfile"), + total_lines=len(lines), + ) + + # Track state + from_images: list[tuple[int, str]] = [] + has_user = False + has_healthcheck = False + last_from_line = 0 + + for line_num, line in lines: + # Skip comments and empty lines + stripped = line.strip() + if not stripped or stripped.startswith("#"): + continue + + # Check FROM instruction + from_match = PATTERNS["from"].match(stripped) + if from_match: + image = from_match.group(1) + from_images.append((line_num, image)) + last_from_line = line_num + + # Check for risky tag + if has_risky_tag(image): + result.issues.append(DockerfileIssue( + line_number=line_num, + severity="warning", + category="security", + message=f"Image uses unpinned '{get_image_tag(image)}' tag", + suggestion="Pin to a specific version tag for reproducible builds", + )) + + # Get alternatives if not already optimized + if not is_already_optimized(image): + alternatives = get_alternatives(image) + for alt in alternatives[:2]: # Top 2 recommendations + result.recommendations.append(ImageRecommendation( + current_image=image, + recommended_image=alt.image, + reason=alt.reason, + description=alt.description, + line_number=line_num, + size_estimate=alt.size_estimate, + )) + + # Check USER instruction + if PATTERNS["user"].match(stripped): + has_user = True + + # Check HEALTHCHECK instruction + if PATTERNS["healthcheck"].match(stripped): + has_healthcheck = True + + # Check ADD for local files (should use COPY) + add_match = PATTERNS["add"].match(stripped) + if add_match: + result.issues.append(DockerfileIssue( + line_number=line_num, + severity="info", + category="best-practice", + message="Using ADD for local files", + suggestion="Use COPY instead of ADD for local files (ADD has extra features that may be unexpected)", + )) + + # Check for secrets in ENV + if PATTERNS["env_secret"].match(stripped): + result.issues.append(DockerfileIssue( + line_number=line_num, + severity="critical", + category="security", + message="Potential secret exposed in ENV instruction", + suggestion="Use Docker secrets or mount secrets at runtime instead of ENV", + )) + + # Check for secrets in ARG + if PATTERNS["arg_secret"].match(stripped): + result.issues.append(DockerfileIssue( + line_number=line_num, + severity="warning", + category="security", + message="Potential secret in ARG instruction", + suggestion="ARG values are visible in image history. Use build secrets (--secret) instead", + )) + + # Check for curl/wget piped to shell + if PATTERNS["run_pipe_shell"].match(stripped): + result.issues.append(DockerfileIssue( + line_number=line_num, + severity="warning", + category="security", + message="Piping remote script to shell", + suggestion="Download and verify scripts before executing, or use package managers", + )) + + # Check for apt-get without cleanup + if PATTERNS["apt_no_cleanup"].match(stripped): + result.issues.append(DockerfileIssue( + line_number=line_num, + severity="info", + category="size", + message="apt-get install without cleanup", + suggestion="Add 'rm -rf /var/lib/apt/lists/*' to reduce image size", + )) + + # Check for pip without --no-cache-dir + if PATTERNS["pip_no_cache"].match(stripped): + result.issues.append(DockerfileIssue( + line_number=line_num, + severity="info", + category="size", + message="pip install without cache optimization", + suggestion="Add '--no-cache-dir' to pip install to reduce image size", + )) + + # Check COPY . . (copies everything) + if PATTERNS["copy_all"].match(stripped): + result.issues.append(DockerfileIssue( + line_number=line_num, + severity="warning", + category="security", + message="COPY . . may include sensitive files (.git, .env, secrets)", + suggestion="Use .dockerignore or copy specific files/directories", + )) + + # Set result properties + result.base_images = from_images + result.stage_count = len(from_images) + result.multi_stage = len(from_images) > 1 + result.has_user_instruction = has_user + result.has_healthcheck = has_healthcheck + + # Add issue if running as root (no USER instruction) + if not has_user and from_images: + result.issues.append(DockerfileIssue( + line_number=last_from_line, + severity="warning", + category="security", + message="Container runs as root (no USER instruction)", + suggestion="Add 'USER nonroot' or 'USER 1000' to run as non-root user", + )) + + # Add info if no HEALTHCHECK + if not has_healthcheck and from_images: + result.issues.append(DockerfileIssue( + line_number=last_from_line, + severity="info", + category="best-practice", + message="No HEALTHCHECK instruction", + suggestion="Add HEALTHCHECK for container orchestration and health monitoring", + )) + + # Suggest multi-stage build for non-multi-stage Dockerfiles with build tools + if not result.multi_stage and from_images: + base_image = from_images[0][1].lower() + if any(lang in base_image for lang in ["golang", "rust", "node", "python", "java", "maven", "gradle"]): + result.issues.append(DockerfileIssue( + line_number=from_images[0][0], + severity="info", + category="size", + message="Consider using multi-stage build", + suggestion="Multi-stage builds can significantly reduce final image size by excluding build tools", + )) + + # Sort issues by severity + severity_order = {"critical": 0, "warning": 1, "info": 2} + result.issues.sort(key=lambda x: severity_order.get(x.severity, 3)) + + return result + + +def scan_dockerfile( + project_path: Path | None = None, + dockerfile_path: Path | None = None, +) -> DockerScanResult: + """ + Scan a Dockerfile for security issues and optimization opportunities. + + Args: + project_path: Path to the project directory (looks for Dockerfile) + dockerfile_path: Direct path to a Dockerfile (overrides project_path) + + Returns: + DockerScanResult with issues and recommendations + """ + # Find the Dockerfile + if dockerfile_path is None: + dockerfile_path = find_dockerfile(project_path) + + if dockerfile_path is None or not dockerfile_path.exists(): + # Return empty result with path set + result = DockerScanResult( + dockerfile_path=Path(project_path or Path.cwd()) / "Dockerfile" + ) + result.issues.append(DockerfileIssue( + line_number=0, + severity="critical", + category="best-practice", + message="No Dockerfile found", + suggestion="Create a Dockerfile in the project root", + )) + return result + + # Parse and analyze + lines = parse_dockerfile(dockerfile_path) + result = analyze_dockerfile(lines) + result.dockerfile_path = dockerfile_path + + return result + + +def get_scan_result_dict(result: DockerScanResult) -> dict: + """Convert scan result to JSON-serializable dictionary.""" + return result.to_dict()