From 53941859710a7688f585d7c2fc8323f0bfb12dd3 Mon Sep 17 00:00:00 2001 From: Shreya Dutta Date: Tue, 27 Jan 2026 23:25:53 +0530 Subject: [PATCH 1/5] feat: add Dockerfile security and optimization analysis --- CHANGELOG.md | 11 + README.md | 54 +++++ man/slim.1 | 16 ++ slim/cli.py | 128 ++++++++++ slim/core/docker_images.py | 274 ++++++++++++++++++++++ slim/scanners/docker_scanner.py | 402 ++++++++++++++++++++++++++++++++ 6 files changed, 885 insertions(+) create mode 100644 slim/core/docker_images.py create mode 100644 slim/scanners/docker_scanner.py diff --git a/CHANGELOG.md b/CHANGELOG.md index dfbd8e3..2ecc8c9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -36,11 +36,21 @@ Initial release of SlimStack - Dependency hygiene and waste elimination CLI tool - **CLI Commands** - `slim version` - Display version - `slim help` - Show usage help + - `slim man` - Display detailed manual - `slim scan -py` - Scan Python dependencies - `slim scan -node` - Scan Node.js dependencies - `slim prune -py` - Remove unused Python packages - `slim prune -node` - Remove unused Node packages - `slim disk` - Disk usage analysis + - `slim docker` - Dockerfile security and optimization analysis + +- **Dockerfile Analysis** (NEW) + - Security anti-pattern detection (secrets in ENV, running as root, etc.) + - Hardened image recommendations (Chainguard, Alpine, distroless) + - Multi-stage build detection + - Best practice suggestions (HEALTHCHECK, COPY vs ADD, etc.) + - Severity filtering and security-only mode + - JSON output for CI/CD pipelines - **Safety Features** - Read-only scan operations by default @@ -57,6 +67,7 @@ Initial release of SlimStack - Dependency hygiene and waste elimination CLI tool - **Documentation** - Comprehensive README with usage examples - Unix man page (`man slim`) + - Built-in manual (`slim man`) ## [Unreleased] diff --git a/README.md b/README.md index 47a0f36..79b8287 100644 --- a/README.md +++ b/README.md @@ -144,6 +144,60 @@ Node.js node_modules: 324.1 MB api-server/node_modules ``` +### Docker Commands + +```bash +# Analyze Dockerfile for security and optimization issues +slim docker + +# Output as JSON (for CI/CD) +slim docker --json + +# Only show security issues +slim docker --security-only + +# Filter by severity (critical, warning, info) +slim docker --severity warning +``` + +**Example output:** +``` +SlimStack Dockerfile Analysis +================================ + +Dockerfile: Dockerfile +Base images: 1 +Multi-stage: No +Runs as non-root: No +Has HEALTHCHECK: No + +Issues Found (4): + + 🔴 Line 2: Potential secret exposed in ENV instruction + Category: security + → Use Docker secrets or mount secrets at runtime instead of ENV + + 🟡 Line 1: Container runs as root (no USER instruction) + Category: security + → Add 'USER nonroot' or 'USER 1000' to run as non-root user + +Image Recommendations: + + Current: python:3.12 + Recommended: python:3.12-slim + Reason: smaller - Debian slim variant, ~100MB smaller + Size: ~150MB + + Current: python:3.12 + Recommended: cgr.dev/chainguard/python:latest + Reason: hardened - Chainguard hardened image, zero CVEs + Size: ~50MB + +──────────────────────────────── +Summary: 1 critical, 2 warnings, 1 info + 2 image recommendations +``` + ## Safety SlimStack is designed with safety as a priority: diff --git a/man/slim.1 b/man/slim.1 index a2075a4..524dd5a 100644 --- a/man/slim.1 +++ b/man/slim.1 @@ -94,6 +94,22 @@ Scan a specific directory instead of current directory. .TP .B slim disk \-\-json Output disk usage as JSON. +.SS "Docker Commands" +.TP +.B slim docker +Analyze Dockerfile for security issues and optimization opportunities. Suggests hardened image alternatives. +.TP +.B slim docker \-\-json +Output analysis results as JSON for CI/CD pipelines. +.TP +.B slim docker \-\-path \fIPATH\fR +Analyze Dockerfile in a specific directory. +.TP +.B slim docker \-\-severity \fILEVEL\fR +Filter issues by minimum severity (critical, warning, info). +.TP +.B slim docker \-\-security\-only +Only show security-related issues. .SH OPTIONS .TP .B \-py, \-\-py diff --git a/slim/cli.py b/slim/cli.py index 45dff5e..33c7882 100644 --- a/slim/cli.py +++ b/slim/cli.py @@ -20,6 +20,9 @@ slim disk Show disk usage by ecosystem slim disk --by project Show disk usage by project slim disk --top 10 Limit results + + slim docker Analyze Dockerfile for issues + slim docker --json Output as JSON """ import argparse @@ -673,6 +676,121 @@ def cmd_disk(args: argparse.Namespace) -> int: return 0 +def cmd_docker_scan(args: argparse.Namespace) -> int: + """Scan Dockerfile for security issues and optimization opportunities.""" + from slim.scanners.docker_scanner import scan_dockerfile, get_scan_result_dict + + project_path = Path(args.path) if args.path else None + result = scan_dockerfile(project_path=project_path) + + if args.json: + output_json(get_scan_result_dict(result)) + return 0 + + # Human-readable output + colors_enabled = is_tty() + + print() + if colors_enabled: + print(f"{Colors.BOLD}SlimStack Dockerfile Analysis{Colors.RESET}") + else: + print("SlimStack Dockerfile Analysis") + print("=" * 32) + + # Check if Dockerfile was found + if not result.base_images: + if result.issues and result.issues[0].message == "No Dockerfile found": + error("No Dockerfile found in project.") + return 1 + + # Show basic info + print(f"\nDockerfile: {result.dockerfile_path}") + print(f"Base images: {len(result.base_images)}") + print(f"Multi-stage: {'Yes' if result.multi_stage else 'No'}") + print(f"Runs as non-root: {'Yes' if result.has_user_instruction else 'No'}") + print(f"Has HEALTHCHECK: {'Yes' if result.has_healthcheck else 'No'}") + + # Filter issues by severity if specified + issues = result.issues + min_severity = getattr(args, 'severity', None) + if min_severity: + severity_order = {"critical": 0, "warning": 1, "info": 2} + min_level = severity_order.get(min_severity.lower(), 2) + issues = [i for i in issues if severity_order.get(i.severity, 2) <= min_level] + + # Filter by security only if specified + if getattr(args, 'security_only', False): + issues = [i for i in issues if i.category == "security"] + + # Show issues + if issues: + print() + if colors_enabled: + print(f"{Colors.YELLOW}Issues Found ({len(issues)}):{Colors.RESET}") + else: + print(f"Issues Found ({len(issues)}):") + + for issue in issues: + # Severity icon and color + if issue.severity == "critical": + icon = "🔴" if colors_enabled else "[CRITICAL]" + color = Colors.RED if colors_enabled else "" + elif issue.severity == "warning": + icon = "🟡" if colors_enabled else "[WARNING]" + color = Colors.YELLOW if colors_enabled else "" + else: + icon = "🔵" if colors_enabled else "[INFO]" + color = Colors.CYAN if colors_enabled else "" + + reset = Colors.RESET if colors_enabled else "" + + print(f"\n {icon} {color}Line {issue.line_number}: {issue.message}{reset}") + print(f" Category: {issue.category}") + if colors_enabled: + print(f" {Colors.DIM}→ {issue.suggestion}{Colors.RESET}") + else: + print(f" → {issue.suggestion}") + else: + print(f"\n{Colors.GREEN if colors_enabled else ''}✓ No issues found!{Colors.RESET if colors_enabled else ''}") + + # Show recommendations + if result.recommendations: + print() + if colors_enabled: + print(f"{Colors.CYAN}Image Recommendations:{Colors.RESET}") + else: + print("Image Recommendations:") + + shown = set() + for rec in result.recommendations: + if rec.recommended_image in shown: + continue + shown.add(rec.recommended_image) + + print(f"\n Current: {rec.current_image}") + if colors_enabled: + print(f" {Colors.GREEN}Recommended: {rec.recommended_image}{Colors.RESET}") + else: + print(f" Recommended: {rec.recommended_image}") + print(f" Reason: {rec.reason} - {rec.description}") + if rec.size_estimate: + print(f" Size: {rec.size_estimate}") + + # Summary + critical = sum(1 for i in result.issues if i.severity == "critical") + warning = sum(1 for i in result.issues if i.severity == "warning") + info_count = sum(1 for i in result.issues if i.severity == "info") + + print(f"\n{'─' * 32}") + print(f"Summary: {critical} critical, {warning} warnings, {info_count} info") + + if result.recommendations: + print(f" {len(result.recommendations)} image recommendations") + + # Return non-zero if critical issues found + return 1 if critical > 0 else 0 + + def create_parser() -> argparse.ArgumentParser: """Create the argument parser.""" parser = argparse.ArgumentParser( @@ -724,6 +842,16 @@ def create_parser() -> argparse.ArgumentParser: disk_parser.add_argument("--path", "-p", help="Path to scan (default: current directory)") disk_parser.set_defaults(func=cmd_disk) + # docker scan command + docker_parser = subparsers.add_parser("docker", help="Dockerfile analysis and optimization") + docker_parser.add_argument("--json", action="store_true", help="Output as JSON") + docker_parser.add_argument("--path", "-p", help="Project path (default: current directory)") + docker_parser.add_argument("--severity", choices=["critical", "warning", "info"], + help="Minimum severity to report") + docker_parser.add_argument("--security-only", action="store_true", dest="security_only", + help="Only show security-related issues") + docker_parser.set_defaults(func=cmd_docker_scan) + return parser diff --git a/slim/core/docker_images.py b/slim/core/docker_images.py new file mode 100644 index 0000000..3fa5b20 --- /dev/null +++ b/slim/core/docker_images.py @@ -0,0 +1,274 @@ +"""Docker image alternatives and hardened image mappings.""" + +from dataclasses import dataclass + + +@dataclass +class ImageAlternative: + """A recommended alternative for a base image.""" + image: str + reason: str # "hardened", "smaller", "distroless", "alpine" + description: str + size_estimate: str | None = None # e.g., "~50MB" or "~800MB savings" + + +# Common base images and their recommended alternatives +# Organized by base image family +IMAGE_ALTERNATIVES: dict[str, list[ImageAlternative]] = { + # Python images + "python": [ + ImageAlternative( + image="{name}:{version}-slim", + reason="smaller", + description="Debian slim variant, ~100MB smaller", + size_estimate="~150MB", + ), + ImageAlternative( + image="{name}:{version}-alpine", + reason="alpine", + description="Alpine Linux base, minimal footprint", + size_estimate="~50MB", + ), + ImageAlternative( + image="cgr.dev/chainguard/python:latest", + reason="hardened", + description="Chainguard hardened image, zero CVEs, distroless", + size_estimate="~50MB", + ), + ], + + # Node.js images + "node": [ + ImageAlternative( + image="{name}:{version}-slim", + reason="smaller", + description="Debian slim variant", + size_estimate="~200MB", + ), + ImageAlternative( + image="{name}:{version}-alpine", + reason="alpine", + description="Alpine Linux base, minimal footprint", + size_estimate="~130MB", + ), + ImageAlternative( + image="cgr.dev/chainguard/node:latest", + reason="hardened", + description="Chainguard hardened image, zero CVEs", + size_estimate="~100MB", + ), + ], + + # Golang images + "golang": [ + ImageAlternative( + image="{name}:{version}-alpine", + reason="alpine", + description="Alpine variant for building", + size_estimate="~250MB", + ), + ImageAlternative( + image="gcr.io/distroless/static-debian12", + reason="distroless", + description="For final stage - static Go binaries only", + size_estimate="~2MB", + ), + ImageAlternative( + image="cgr.dev/chainguard/go:latest", + reason="hardened", + description="Chainguard hardened Go build image", + size_estimate="~200MB", + ), + ], + + # Java images + "openjdk": [ + ImageAlternative( + image="eclipse-temurin:{version}-jre-alpine", + reason="alpine", + description="Eclipse Temurin JRE on Alpine", + size_estimate="~150MB", + ), + ImageAlternative( + image="gcr.io/distroless/java17-debian12", + reason="distroless", + description="Google distroless Java runtime", + size_estimate="~200MB", + ), + ImageAlternative( + image="cgr.dev/chainguard/jre:latest", + reason="hardened", + description="Chainguard hardened JRE", + size_estimate="~100MB", + ), + ], + + # Ubuntu/Debian images + "ubuntu": [ + ImageAlternative( + image="ubuntu:{version}-minimal", + reason="smaller", + description="Ubuntu minimal variant", + size_estimate="~30MB", + ), + ImageAlternative( + image="debian:{version}-slim", + reason="smaller", + description="Debian slim variant", + size_estimate="~25MB", + ), + ImageAlternative( + image="cgr.dev/chainguard/wolfi-base", + reason="hardened", + description="Chainguard Wolfi base, security-focused", + size_estimate="~15MB", + ), + ], + + "debian": [ + ImageAlternative( + image="debian:{version}-slim", + reason="smaller", + description="Debian slim variant", + size_estimate="~25MB", + ), + ImageAlternative( + image="cgr.dev/chainguard/wolfi-base", + reason="hardened", + description="Chainguard Wolfi base, security-focused", + size_estimate="~15MB", + ), + ], + + # Nginx + "nginx": [ + ImageAlternative( + image="nginx:{version}-alpine", + reason="alpine", + description="Alpine variant", + size_estimate="~25MB", + ), + ImageAlternative( + image="cgr.dev/chainguard/nginx:latest", + reason="hardened", + description="Chainguard hardened nginx", + size_estimate="~15MB", + ), + ], + + # Redis + "redis": [ + ImageAlternative( + image="redis:{version}-alpine", + reason="alpine", + description="Alpine variant", + size_estimate="~30MB", + ), + ], + + # PostgreSQL + "postgres": [ + ImageAlternative( + image="postgres:{version}-alpine", + reason="alpine", + description="Alpine variant", + size_estimate="~80MB", + ), + ], +} + +# Images that are already optimized/hardened +ALREADY_OPTIMIZED = { + "alpine", + "distroless", + "chainguard", + "wolfi", + "scratch", + "busybox", + "gcr.io/distroless", + "cgr.dev/chainguard", +} + +# Tags that indicate security risk +RISKY_TAGS = { + "latest", + "dev", + "development", + "beta", + "rc", + "nightly", + "unstable", +} + + +def get_base_image_name(image: str) -> str: + """Extract the base image name without registry, tag, or variant.""" + # Remove registry prefix + if "/" in image: + parts = image.split("/") + image = parts[-1] + + # Remove tag + if ":" in image: + image = image.split(":")[0] + + return image.lower() + + +def get_image_tag(image: str) -> str: + """Extract the tag from an image string.""" + if ":" in image: + return image.split(":")[-1] + return "latest" # Default tag + + +def is_already_optimized(image: str) -> bool: + """Check if an image is already considered optimized.""" + image_lower = image.lower() + + # Check for optimized registries/prefixes + for prefix in ALREADY_OPTIMIZED: + if prefix in image_lower: + return True + + # Check for slim/alpine variants in tag + tag = get_image_tag(image).lower() + if any(variant in tag for variant in ["slim", "alpine", "minimal", "distroless"]): + return True + + return False + + +def has_risky_tag(image: str) -> bool: + """Check if an image uses a risky tag.""" + tag = get_image_tag(image).lower() + return tag in RISKY_TAGS + + +def get_alternatives(image: str) -> list[ImageAlternative]: + """Get recommended alternatives for a given image.""" + if is_already_optimized(image): + return [] + + base_name = get_base_image_name(image) + tag = get_image_tag(image) + + # Extract version from tag (e.g., "3.12-bookworm" -> "3.12") + version = tag.split("-")[0] if "-" in tag else tag + if version == "latest": + version = "latest" + + alternatives = IMAGE_ALTERNATIVES.get(base_name, []) + + # Format alternatives with actual version + result = [] + for alt in alternatives: + formatted_image = alt.image.format(name=base_name, version=version) + result.append(ImageAlternative( + image=formatted_image, + reason=alt.reason, + description=alt.description, + size_estimate=alt.size_estimate, + )) + + return result diff --git a/slim/scanners/docker_scanner.py b/slim/scanners/docker_scanner.py new file mode 100644 index 0000000..ccb7e07 --- /dev/null +++ b/slim/scanners/docker_scanner.py @@ -0,0 +1,402 @@ +"""Dockerfile scanner for security and optimization analysis.""" + +import re +from dataclasses import dataclass, field +from pathlib import Path +from typing import Iterator + +from slim.core.docker_images import ( + get_alternatives, + is_already_optimized, + has_risky_tag, + get_image_tag, + ImageAlternative, +) +from slim.core.utils import format_size + + +@dataclass +class DockerfileIssue: + """An issue found in a Dockerfile.""" + line_number: int + severity: str # "critical", "warning", "info" + category: str # "security", "size", "best-practice" + message: str + suggestion: str + + def to_dict(self) -> dict: + """Convert to JSON-serializable dictionary.""" + return { + "line_number": self.line_number, + "severity": self.severity, + "category": self.category, + "message": self.message, + "suggestion": self.suggestion, + } + + +@dataclass +class ImageRecommendation: + """A recommended alternative for a base image.""" + current_image: str + recommended_image: str + reason: str + description: str + line_number: int + size_estimate: str | None = None + + def to_dict(self) -> dict: + """Convert to JSON-serializable dictionary.""" + return { + "current_image": self.current_image, + "recommended_image": self.recommended_image, + "reason": self.reason, + "description": self.description, + "line_number": self.line_number, + "size_estimate": self.size_estimate, + } + + +@dataclass +class DockerScanResult: + """Result of scanning a Dockerfile.""" + dockerfile_path: Path + base_images: list[tuple[int, str]] = field(default_factory=list) # (line_num, image) + issues: list[DockerfileIssue] = field(default_factory=list) + recommendations: list[ImageRecommendation] = field(default_factory=list) + multi_stage: bool = False + stage_count: int = 0 + has_user_instruction: bool = False + has_healthcheck: bool = False + total_lines: int = 0 + + def to_dict(self) -> dict: + """Convert to JSON-serializable dictionary.""" + return { + "dockerfile_path": str(self.dockerfile_path), + "base_images": [ + {"line": line, "image": img} for line, img in self.base_images + ], + "issues": [issue.to_dict() for issue in self.issues], + "recommendations": [rec.to_dict() for rec in self.recommendations], + "multi_stage": self.multi_stage, + "stage_count": self.stage_count, + "has_user_instruction": self.has_user_instruction, + "has_healthcheck": self.has_healthcheck, + "total_lines": self.total_lines, + "summary": { + "total_issues": len(self.issues), + "critical": sum(1 for i in self.issues if i.severity == "critical"), + "warning": sum(1 for i in self.issues if i.severity == "warning"), + "info": sum(1 for i in self.issues if i.severity == "info"), + "recommendations_count": len(self.recommendations), + }, + } + + +# Patterns for detecting issues +PATTERNS = { + # FROM instruction - captures image name with optional AS alias + "from": re.compile(r"^\s*FROM\s+(\S+)(?:\s+AS\s+\S+)?", re.IGNORECASE), + + # USER instruction + "user": re.compile(r"^\s*USER\s+", re.IGNORECASE), + + # HEALTHCHECK instruction + "healthcheck": re.compile(r"^\s*HEALTHCHECK\s+", re.IGNORECASE), + + # ADD instruction (potential issue if used for local files) + "add": re.compile(r"^\s*ADD\s+(?!https?://|--from=)(\S+)", re.IGNORECASE), + + # ENV with potential secrets + "env_secret": re.compile( + r"^\s*ENV\s+\S*(PASSWORD|SECRET|KEY|TOKEN|CREDENTIAL|API_KEY)\s*=", + re.IGNORECASE + ), + + # ARG with potential secrets + "arg_secret": re.compile( + r"^\s*ARG\s+\S*(PASSWORD|SECRET|KEY|TOKEN|CREDENTIAL|API_KEY)", + re.IGNORECASE + ), + + # RUN with curl/wget piped to shell (risky) + "run_pipe_shell": re.compile( + r"^\s*RUN\s+.*(?:curl|wget)\s+.*\|\s*(?:bash|sh)", + re.IGNORECASE + ), + + # RUN apt-get without cleanup + "apt_no_cleanup": re.compile( + r"^\s*RUN\s+.*apt-get\s+install(?!.*rm\s+-rf\s+/var/lib/apt/lists)", + re.IGNORECASE + ), + + # RUN npm install without cache clean + "npm_no_cleanup": re.compile( + r"^\s*RUN\s+.*npm\s+install(?!.*npm\s+cache\s+clean)", + re.IGNORECASE + ), + + # RUN pip install without cache optimization + "pip_no_cache": re.compile( + r"^\s*RUN\s+.*pip\s+install(?!.*--no-cache-dir)", + re.IGNORECASE + ), + + # EXPOSE for common sensitive ports + "expose_sensitive": re.compile( + r"^\s*EXPOSE\s+(22|23|3389|5432|3306|27017|6379)\b", + re.IGNORECASE + ), + + # COPY . . at root level (copies everything including .git, secrets) + "copy_all": re.compile(r"^\s*COPY\s+\.\s+\.", re.IGNORECASE), +} + + +def find_dockerfile(project_path: Path | None = None) -> Path | None: + """Find Dockerfile in the project.""" + if project_path is None: + project_path = Path.cwd() + + # Look for Dockerfile (case-insensitive on Windows, case-sensitive on Unix) + dockerfile_names = ["Dockerfile", "dockerfile", "Dockerfile.prod", "Dockerfile.dev"] + + for name in dockerfile_names: + dockerfile = project_path / name + if dockerfile.exists(): + return dockerfile + + return None + + +def parse_dockerfile(dockerfile_path: Path) -> list[tuple[int, str]]: + """Parse a Dockerfile and return list of (line_number, line_content).""" + lines = [] + try: + content = dockerfile_path.read_text(encoding="utf-8") + for i, line in enumerate(content.splitlines(), start=1): + lines.append((i, line)) + except Exception: + pass + return lines + + +def analyze_dockerfile(lines: list[tuple[int, str]]) -> DockerScanResult: + """Analyze Dockerfile lines for issues and recommendations.""" + result = DockerScanResult( + dockerfile_path=Path("Dockerfile"), + total_lines=len(lines), + ) + + # Track state + from_images: list[tuple[int, str]] = [] + has_user = False + has_healthcheck = False + last_from_line = 0 + + for line_num, line in lines: + # Skip comments and empty lines + stripped = line.strip() + if not stripped or stripped.startswith("#"): + continue + + # Check FROM instruction + from_match = PATTERNS["from"].match(stripped) + if from_match: + image = from_match.group(1) + from_images.append((line_num, image)) + last_from_line = line_num + + # Check for risky tag + if has_risky_tag(image): + result.issues.append(DockerfileIssue( + line_number=line_num, + severity="warning", + category="security", + message=f"Image uses unpinned '{get_image_tag(image)}' tag", + suggestion="Pin to a specific version tag for reproducible builds", + )) + + # Get alternatives if not already optimized + if not is_already_optimized(image): + alternatives = get_alternatives(image) + for alt in alternatives[:2]: # Top 2 recommendations + result.recommendations.append(ImageRecommendation( + current_image=image, + recommended_image=alt.image, + reason=alt.reason, + description=alt.description, + line_number=line_num, + size_estimate=alt.size_estimate, + )) + + # Check USER instruction + if PATTERNS["user"].match(stripped): + has_user = True + + # Check HEALTHCHECK instruction + if PATTERNS["healthcheck"].match(stripped): + has_healthcheck = True + + # Check ADD for local files (should use COPY) + add_match = PATTERNS["add"].match(stripped) + if add_match: + result.issues.append(DockerfileIssue( + line_number=line_num, + severity="info", + category="best-practice", + message="Using ADD for local files", + suggestion="Use COPY instead of ADD for local files (ADD has extra features that may be unexpected)", + )) + + # Check for secrets in ENV + if PATTERNS["env_secret"].match(stripped): + result.issues.append(DockerfileIssue( + line_number=line_num, + severity="critical", + category="security", + message="Potential secret exposed in ENV instruction", + suggestion="Use Docker secrets or mount secrets at runtime instead of ENV", + )) + + # Check for secrets in ARG + if PATTERNS["arg_secret"].match(stripped): + result.issues.append(DockerfileIssue( + line_number=line_num, + severity="warning", + category="security", + message="Potential secret in ARG instruction", + suggestion="ARG values are visible in image history. Use build secrets (--secret) instead", + )) + + # Check for curl/wget piped to shell + if PATTERNS["run_pipe_shell"].match(stripped): + result.issues.append(DockerfileIssue( + line_number=line_num, + severity="warning", + category="security", + message="Piping remote script to shell", + suggestion="Download and verify scripts before executing, or use package managers", + )) + + # Check for apt-get without cleanup + if PATTERNS["apt_no_cleanup"].match(stripped): + result.issues.append(DockerfileIssue( + line_number=line_num, + severity="info", + category="size", + message="apt-get install without cleanup", + suggestion="Add 'rm -rf /var/lib/apt/lists/*' to reduce image size", + )) + + # Check for pip without --no-cache-dir + if PATTERNS["pip_no_cache"].match(stripped): + result.issues.append(DockerfileIssue( + line_number=line_num, + severity="info", + category="size", + message="pip install without cache optimization", + suggestion="Add '--no-cache-dir' to pip install to reduce image size", + )) + + # Check COPY . . (copies everything) + if PATTERNS["copy_all"].match(stripped): + result.issues.append(DockerfileIssue( + line_number=line_num, + severity="warning", + category="security", + message="COPY . . may include sensitive files (.git, .env, secrets)", + suggestion="Use .dockerignore or copy specific files/directories", + )) + + # Set result properties + result.base_images = from_images + result.stage_count = len(from_images) + result.multi_stage = len(from_images) > 1 + result.has_user_instruction = has_user + result.has_healthcheck = has_healthcheck + + # Add issue if running as root (no USER instruction) + if not has_user and from_images: + result.issues.append(DockerfileIssue( + line_number=last_from_line, + severity="warning", + category="security", + message="Container runs as root (no USER instruction)", + suggestion="Add 'USER nonroot' or 'USER 1000' to run as non-root user", + )) + + # Add info if no HEALTHCHECK + if not has_healthcheck and from_images: + result.issues.append(DockerfileIssue( + line_number=last_from_line, + severity="info", + category="best-practice", + message="No HEALTHCHECK instruction", + suggestion="Add HEALTHCHECK for container orchestration and health monitoring", + )) + + # Suggest multi-stage build for non-multi-stage Dockerfiles with build tools + if not result.multi_stage and from_images: + base_image = from_images[0][1].lower() + if any(lang in base_image for lang in ["golang", "rust", "node", "python", "java", "maven", "gradle"]): + result.issues.append(DockerfileIssue( + line_number=from_images[0][0], + severity="info", + category="size", + message="Consider using multi-stage build", + suggestion="Multi-stage builds can significantly reduce final image size by excluding build tools", + )) + + # Sort issues by severity + severity_order = {"critical": 0, "warning": 1, "info": 2} + result.issues.sort(key=lambda x: severity_order.get(x.severity, 3)) + + return result + + +def scan_dockerfile( + project_path: Path | None = None, + dockerfile_path: Path | None = None, +) -> DockerScanResult: + """ + Scan a Dockerfile for security issues and optimization opportunities. + + Args: + project_path: Path to the project directory (looks for Dockerfile) + dockerfile_path: Direct path to a Dockerfile (overrides project_path) + + Returns: + DockerScanResult with issues and recommendations + """ + # Find the Dockerfile + if dockerfile_path is None: + dockerfile_path = find_dockerfile(project_path) + + if dockerfile_path is None or not dockerfile_path.exists(): + # Return empty result with path set + result = DockerScanResult( + dockerfile_path=Path(project_path or Path.cwd()) / "Dockerfile" + ) + result.issues.append(DockerfileIssue( + line_number=0, + severity="critical", + category="best-practice", + message="No Dockerfile found", + suggestion="Create a Dockerfile in the project root", + )) + return result + + # Parse and analyze + lines = parse_dockerfile(dockerfile_path) + result = analyze_dockerfile(lines) + result.dockerfile_path = dockerfile_path + + return result + + +def get_scan_result_dict(result: DockerScanResult) -> dict: + """Convert scan result to JSON-serializable dictionary.""" + return result.to_dict() From e7ce169f06cb545a1413b136142926f431a3635b Mon Sep 17 00:00:00 2001 From: Shreya Dutta Date: Tue, 27 Jan 2026 23:49:04 +0530 Subject: [PATCH 2/5] nitpicks cleared --- README.md | 2 +- slim/core/docker_images.py | 2 -- slim/scanners/docker_scanner.py | 6 ++++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 79b8287..8c7c7e9 100644 --- a/README.md +++ b/README.md @@ -161,7 +161,7 @@ slim docker --severity warning ``` **Example output:** -``` +```text SlimStack Dockerfile Analysis ================================ diff --git a/slim/core/docker_images.py b/slim/core/docker_images.py index 3fa5b20..1d24369 100644 --- a/slim/core/docker_images.py +++ b/slim/core/docker_images.py @@ -255,8 +255,6 @@ def get_alternatives(image: str) -> list[ImageAlternative]: # Extract version from tag (e.g., "3.12-bookworm" -> "3.12") version = tag.split("-")[0] if "-" in tag else tag - if version == "latest": - version = "latest" alternatives = IMAGE_ALTERNATIVES.get(base_name, []) diff --git a/slim/scanners/docker_scanner.py b/slim/scanners/docker_scanner.py index ccb7e07..02ac2ca 100644 --- a/slim/scanners/docker_scanner.py +++ b/slim/scanners/docker_scanner.py @@ -178,8 +178,10 @@ def parse_dockerfile(dockerfile_path: Path) -> list[tuple[int, str]]: content = dockerfile_path.read_text(encoding="utf-8") for i, line in enumerate(content.splitlines(), start=1): lines.append((i, line)) - except Exception: - pass + except OSError as e: + # Log or handle file access errors appropriately + import logging + logging.warning(f"Failed to read Dockerfile {dockerfile_path}: {e}") return lines From f4865229a3b5db1669d7027f74f74fc4a3942560 Mon Sep 17 00:00:00 2001 From: Shreya Dutta <136884688+arceuzvx@users.noreply.github.com> Date: Tue, 27 Jan 2026 23:58:50 +0530 Subject: [PATCH 3/5] Update slim/core/docker_images.py Co-authored-by: coderabbitai[bot] <136622811+coderabbitai[bot]@users.noreply.github.com> --- slim/core/docker_images.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/slim/core/docker_images.py b/slim/core/docker_images.py index 1d24369..e964f47 100644 --- a/slim/core/docker_images.py +++ b/slim/core/docker_images.py @@ -256,11 +256,18 @@ def get_alternatives(image: str) -> list[ImageAlternative]: # Extract version from tag (e.g., "3.12-bookworm" -> "3.12") version = tag.split("-")[0] if "-" in tag else tag + # Skip version templating for 'latest' - use a placeholder or skip templated alternatives + if version == "latest": + version = "latest" # Hardcoded alternatives will still work + alternatives = IMAGE_ALTERNATIVES.get(base_name, []) # Format alternatives with actual version result = [] for alt in alternatives: + # Skip templated alternatives when version is 'latest' + if version == "latest" and "{version}" in alt.image: + continue formatted_image = alt.image.format(name=base_name, version=version) result.append(ImageAlternative( image=formatted_image, @@ -270,3 +277,4 @@ def get_alternatives(image: str) -> list[ImageAlternative]: )) return result + return result From 5e9e987308d6d6a42fb3d99cad3fa03c8902ca86 Mon Sep 17 00:00:00 2001 From: Shreya Dutta Date: Wed, 28 Jan 2026 00:11:29 +0530 Subject: [PATCH 4/5] nitpicks cleared 2.0 --- slim/core/docker_images.py | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/slim/core/docker_images.py b/slim/core/docker_images.py index 1d24369..5d077df 100644 --- a/slim/core/docker_images.py +++ b/slim/core/docker_images.py @@ -203,22 +203,22 @@ class ImageAlternative: def get_base_image_name(image: str) -> str: """Extract the base image name without registry, tag, or variant.""" - # Remove registry prefix - if "/" in image: - parts = image.split("/") - image = parts[-1] - - # Remove tag - if ":" in image: - image = image.split(":")[0] - - return image.lower() + # Drop digest if present + image = image.split("@", 1)[0] + # Isolate last path segment (handles registries with ports) + last_segment = image.rsplit("/", 1)[-1] + # Remove tag only if it appears after the last "/" + if ":" in last_segment: + last_segment = last_segment.rsplit(":", 1)[0] + return last_segment.lower() def get_image_tag(image: str) -> str: - """Extract the tag from an image string.""" - if ":" in image: - return image.split(":")[-1] + # Drop digest if present + image = image.split("@", 1)[0] + last_segment = image.rsplit("/", 1)[-1] + if ":" in last_segment: + return last_segment.rsplit(":", 1)[1] return "latest" # Default tag From 69d120d36d90731a0d10c5aaa6b7e98fa535834c Mon Sep 17 00:00:00 2001 From: Shreya Dutta Date: Wed, 28 Jan 2026 00:32:14 +0530 Subject: [PATCH 5/5] big changes in docker_images file --- slim/core/docker_images.py | 272 ++++++++++++++++++------------------- 1 file changed, 136 insertions(+), 136 deletions(-) diff --git a/slim/core/docker_images.py b/slim/core/docker_images.py index b146742..324fa32 100644 --- a/slim/core/docker_images.py +++ b/slim/core/docker_images.py @@ -1,280 +1,280 @@ -"""Docker image alternatives and hardened image mappings.""" + +""" +Docker image alternatives and hardened image mappings. + +Advisory-only: recommendations are heuristic and non-authoritative. +""" from dataclasses import dataclass +from enum import Enum +from typing import Optional + +class AlternativeReason(str, Enum): + HARDENED = "hardened" + SMALLER = "smaller" + DISTROLESS = "distroless" + ALPINE = "alpine" -@dataclass + +@dataclass(frozen=True) class ImageAlternative: """A recommended alternative for a base image.""" image: str - reason: str # "hardened", "smaller", "distroless", "alpine" + reason: AlternativeReason description: str - size_estimate: str | None = None # e.g., "~50MB" or "~800MB savings" + size_estimate: Optional[str] = None # heuristic only + +# ----------------------------- +# Static recommendation mapping +# ----------------------------- -# Common base images and their recommended alternatives -# Organized by base image family IMAGE_ALTERNATIVES: dict[str, list[ImageAlternative]] = { - # Python images "python": [ ImageAlternative( image="{name}:{version}-slim", - reason="smaller", - description="Debian slim variant, ~100MB smaller", + reason=AlternativeReason.SMALLER, + description="Debian slim variant", size_estimate="~150MB", ), ImageAlternative( image="{name}:{version}-alpine", - reason="alpine", - description="Alpine Linux base, minimal footprint", + reason=AlternativeReason.ALPINE, + description="Alpine Linux base", size_estimate="~50MB", ), ImageAlternative( - image="cgr.dev/chainguard/python:latest", - reason="hardened", - description="Chainguard hardened image, zero CVEs, distroless", + image="cgr.dev/chainguard/python", + reason=AlternativeReason.HARDENED, + description="Chainguard hardened, distroless", size_estimate="~50MB", ), ], - - # Node.js images "node": [ ImageAlternative( image="{name}:{version}-slim", - reason="smaller", + reason=AlternativeReason.SMALLER, description="Debian slim variant", size_estimate="~200MB", ), ImageAlternative( image="{name}:{version}-alpine", - reason="alpine", - description="Alpine Linux base, minimal footprint", + reason=AlternativeReason.ALPINE, + description="Alpine Linux base", size_estimate="~130MB", ), ImageAlternative( - image="cgr.dev/chainguard/node:latest", - reason="hardened", - description="Chainguard hardened image, zero CVEs", + image="cgr.dev/chainguard/node", + reason=AlternativeReason.HARDENED, + description="Chainguard hardened Node.js", size_estimate="~100MB", ), ], - - # Golang images "golang": [ ImageAlternative( image="{name}:{version}-alpine", - reason="alpine", - description="Alpine variant for building", + reason=AlternativeReason.ALPINE, + description="Alpine build image", size_estimate="~250MB", ), ImageAlternative( image="gcr.io/distroless/static-debian12", - reason="distroless", - description="For final stage - static Go binaries only", + reason=AlternativeReason.DISTROLESS, + description="Distroless runtime for static Go binaries", size_estimate="~2MB", ), ImageAlternative( - image="cgr.dev/chainguard/go:latest", - reason="hardened", + image="cgr.dev/chainguard/go", + reason=AlternativeReason.HARDENED, description="Chainguard hardened Go build image", size_estimate="~200MB", ), ], - - # Java images "openjdk": [ ImageAlternative( image="eclipse-temurin:{version}-jre-alpine", - reason="alpine", - description="Eclipse Temurin JRE on Alpine", + reason=AlternativeReason.ALPINE, + description="Temurin JRE on Alpine", size_estimate="~150MB", ), ImageAlternative( image="gcr.io/distroless/java17-debian12", - reason="distroless", - description="Google distroless Java runtime", + reason=AlternativeReason.DISTROLESS, + description="Distroless Java runtime", size_estimate="~200MB", ), ImageAlternative( - image="cgr.dev/chainguard/jre:latest", - reason="hardened", + image="cgr.dev/chainguard/jre", + reason=AlternativeReason.HARDENED, description="Chainguard hardened JRE", size_estimate="~100MB", ), ], - - # Ubuntu/Debian images "ubuntu": [ ImageAlternative( image="ubuntu:{version}-minimal", - reason="smaller", - description="Ubuntu minimal variant", + reason=AlternativeReason.SMALLER, + description="Ubuntu minimal", size_estimate="~30MB", ), ImageAlternative( image="debian:{version}-slim", - reason="smaller", - description="Debian slim variant", + reason=AlternativeReason.SMALLER, + description="Debian slim", size_estimate="~25MB", ), ImageAlternative( image="cgr.dev/chainguard/wolfi-base", - reason="hardened", - description="Chainguard Wolfi base, security-focused", + reason=AlternativeReason.HARDENED, + description="Wolfi security-focused base", size_estimate="~15MB", ), ], - "debian": [ ImageAlternative( image="debian:{version}-slim", - reason="smaller", - description="Debian slim variant", + reason=AlternativeReason.SMALLER, + description="Debian slim", size_estimate="~25MB", ), ImageAlternative( image="cgr.dev/chainguard/wolfi-base", - reason="hardened", - description="Chainguard Wolfi base, security-focused", + reason=AlternativeReason.HARDENED, + description="Wolfi security-focused base", size_estimate="~15MB", ), ], - - # Nginx "nginx": [ ImageAlternative( image="nginx:{version}-alpine", - reason="alpine", + reason=AlternativeReason.ALPINE, description="Alpine variant", size_estimate="~25MB", ), ImageAlternative( - image="cgr.dev/chainguard/nginx:latest", - reason="hardened", + image="cgr.dev/chainguard/nginx", + reason=AlternativeReason.HARDENED, description="Chainguard hardened nginx", size_estimate="~15MB", ), ], - - # Redis "redis": [ ImageAlternative( image="redis:{version}-alpine", - reason="alpine", + reason=AlternativeReason.ALPINE, description="Alpine variant", size_estimate="~30MB", ), ], - - # PostgreSQL "postgres": [ ImageAlternative( image="postgres:{version}-alpine", - reason="alpine", + reason=AlternativeReason.ALPINE, description="Alpine variant", size_estimate="~80MB", ), ], } -# Images that are already optimized/hardened -ALREADY_OPTIMIZED = { - "alpine", - "distroless", - "chainguard", - "wolfi", - "scratch", - "busybox", - "gcr.io/distroless", + +# ----------------------------- +# Risk & optimization signals +# ----------------------------- + +RISKY_TAGS = {"latest", "dev", "development", "beta", "rc", "nightly", "unstable"} + +HARDENED_REGISTRIES = { "cgr.dev/chainguard", + "gcr.io/distroless", } -# Tags that indicate security risk -RISKY_TAGS = { - "latest", - "dev", - "development", - "beta", - "rc", - "nightly", - "unstable", -} +OPTIMIZED_TAG_MARKERS = {"slim", "alpine", "minimal", "distroless"} + + +# ----------------------------- +# Parsing helpers +# ----------------------------- + +def _strip_digest(image: str) -> str: + return image.split("@", 1)[0] def get_base_image_name(image: str) -> str: - """Extract the base image name without registry, tag, or variant.""" - # Drop digest if present - image = image.split("@", 1)[0] - # Isolate last path segment (handles registries with ports) + image = _strip_digest(image) last_segment = image.rsplit("/", 1)[-1] - # Remove tag only if it appears after the last "/" if ":" in last_segment: last_segment = last_segment.rsplit(":", 1)[0] return last_segment.lower() def get_image_tag(image: str) -> str: - # Drop digest if present - image = image.split("@", 1)[0] + image = _strip_digest(image) last_segment = image.rsplit("/", 1)[-1] if ":" in last_segment: return last_segment.rsplit(":", 1)[1] - return "latest" # Default tag - - -def is_already_optimized(image: str) -> bool: - """Check if an image is already considered optimized.""" - image_lower = image.lower() - - # Check for optimized registries/prefixes - for prefix in ALREADY_OPTIMIZED: - if prefix in image_lower: - return True - - # Check for slim/alpine variants in tag - tag = get_image_tag(image).lower() - if any(variant in tag for variant in ["slim", "alpine", "minimal", "distroless"]): - return True - - return False + return "latest" +def get_image_registry(image: str) -> Optional[str]: + parts = image.split("/") + if len(parts) > 1 and "." in parts[0]: + return parts[0] + return None + + +# ----------------------------- +# Classification +# ----------------------------- + def has_risky_tag(image: str) -> bool: - """Check if an image uses a risky tag.""" + if "@" in image: + return False # digest-pinned + return get_image_tag(image).lower() in RISKY_TAGS + + +def is_hardened_image(image: str) -> bool: + registry = get_image_registry(image) + return registry in HARDENED_REGISTRIES + + +def is_optimized_image(image: str) -> bool: tag = get_image_tag(image).lower() - return tag in RISKY_TAGS + return any(marker in tag for marker in OPTIMIZED_TAG_MARKERS) + + +# ----------------------------- +# Recommendation engine +# ----------------------------- + +def _extract_version(tag: str) -> Optional[str]: + if tag in RISKY_TAGS: + return None + return tag.split("-", 1)[0] def get_alternatives(image: str) -> list[ImageAlternative]: - """Get recommended alternatives for a given image.""" - if is_already_optimized(image): - return [] - base_name = get_base_image_name(image) - tag = get_image_tag(image) - - # Extract version from tag (e.g., "3.12-bookworm" -> "3.12") - version = tag.split("-")[0] if "-" in tag else tag - - # Skip version templating for 'latest' - use a placeholder or skip templated alternatives - if version == "latest": - version = "latest" # Hardcoded alternatives will still work - + tag = get_image_tag(image).lower() + version = _extract_version(tag) + alternatives = IMAGE_ALTERNATIVES.get(base_name, []) - - # Format alternatives with actual version - result = [] + results: list[ImageAlternative] = [] + for alt in alternatives: - # Skip templated alternatives when version is 'latest' - if version == "latest" and "{version}" in alt.image: + if "{version}" in alt.image and not version: continue - formatted_image = alt.image.format(name=base_name, version=version) - result.append(ImageAlternative( - image=formatted_image, - reason=alt.reason, - description=alt.description, - size_estimate=alt.size_estimate, - )) - - return result - return result + + formatted = ( + alt.image.format(name=base_name, version=version) + if "{version}" in alt.image + else alt.image + ) + + # Do not recommend the same image + if formatted == image: + continue + + results.append(alt) + + return results