PSPDFKit-labs · matej · Feb 9, 2026 · Feb 9, 2026 · Feb 10, 2026 · Feb 10, 2026
diff --git a/.claude/agents/compliance.md b/.claude/agents/compliance.md
@@ -0,0 +1,8 @@
+---
+name: compliance
+model: claude-sonnet-4-5
+description: CLAUDE.md compliance specialist
+---
+
+Audit changed files against relevant CLAUDE.md guidance.
+Return only JSON findings with concrete rule references.
diff --git a/.claude/agents/quality.md b/.claude/agents/quality.md
@@ -0,0 +1,8 @@
+---
+name: quality
+model: claude-opus-4-6
+description: Code quality specialist for correctness and reliability
+---
+
+Find high-signal correctness, reliability, and performance issues.
+Return only JSON findings.
diff --git a/.claude/agents/security.md b/.claude/agents/security.md
@@ -0,0 +1,8 @@
+---
+name: security
+model: claude-opus-4-6
+description: Security specialist for exploitable vulnerabilities
+---
+
+Find exploitable vulnerabilities in changed code with concrete attack paths.
+Return only JSON findings including exploit preconditions and trust boundary.
diff --git a/.claude/agents/triage.md b/.claude/agents/triage.md
@@ -0,0 +1,8 @@
+---
+name: triage
+model: claude-haiku-4-5
+description: Fast PR triage for skip/continue decisions
+---
+
+Determine whether review can be skipped safely.
+Return only JSON with `skip_review`, `reason`, and `risk_level`.
diff --git a/.claude/agents/validator.md b/.claude/agents/validator.md
@@ -0,0 +1,8 @@
+---
+name: validator
+model: claude-sonnet-4-5
+description: Finding validation and deduplication specialist
+---
+
+Validate candidate findings with strict confidence and impact criteria.
+Return only JSON decisions for keep/drop.
diff --git a/.claude/commands/review.md b/.claude/commands/review.md
@@ -40,6 +40,12 @@ To do this, follow these steps precisely:
    Agent 4: Opus security agent
    Look for security vulnerabilities in the introduced code. This includes injection, auth bypass, data exposure, unsafe deserialization, or other exploitable issues. Only look for issues that fall within the changed code.
 
+   Security evidence requirements for every reported issue:
+   - Include a concrete exploit or abuse path.
+   - Include attacker preconditions.
+   - Identify the impacted trust boundary or sensitive asset.
+   - Provide an actionable mitigation.
+
    **CRITICAL: We only want HIGH SIGNAL issues.** Flag issues where:
    - The code will fail to compile or parse (syntax errors, type errors, missing imports, unresolved references)
    - The code will definitely produce wrong results regardless of inputs (clear logic errors)
@@ -52,6 +58,7 @@ To do this, follow these steps precisely:
    - Subjective suggestions or improvements
    - Security issues that depend on speculative inputs or unverified assumptions
    - Denial of Service (DoS) or rate limiting issues without concrete exploitability
+   - Findings based only on diff snippets without validating surrounding repository context
 
    If you are not certain an issue is real, do not flag it. False positives erode trust and waste reviewer time.
 

diff --git a/.github/workflows/code-review.yml b/.github/workflows/code-review.yml
@@ -29,3 +29,4 @@ jobs:
           comment-pr: true
           upload-results: true
           claude-api-key: ${{ secrets.CLAUDE_API_KEY }}
+          trigger-on-commit: true
diff --git a/.gitignore b/.gitignore
@@ -1,11 +1,22 @@
+# OS-generated files
+.DS_Store
+Thumbs.db
+
 # Cache directories
 .cache/
+.pytest_cache/
 
 # Python
 __pycache__/
 *.py[cod]
 *$py.class
 *.pyc
+.python-version
+.mypy_cache/
+.ruff_cache/
+.coverage
+.coverage.*
+htmlcov/
 
 # Output files
 *.csv
@@ -21,4 +32,17 @@ env/
 claudecode/claudecode-prompt.txt
 eval_results/
 
-.env
+.env
+.env.*
+
+# Editor / IDE
+.idea/
+.vscode/
+*.swp
+*.swo
+
+# Node / Bun
+node_modules/
+
+# Logs
+*.log
diff --git a/README.md b/README.md
@@ -1,6 +1,6 @@
 # Nutrient Code Reviewer
 
-An AI-powered code review GitHub Action using Claude to analyze code changes. Uses a unified multi-agent approach for both code quality (correctness, reliability, performance, maintainability, testing) and security in a single pass. This action provides intelligent, context-aware review for pull requests using Anthropic's Claude Code tool for deep semantic analysis.
+An AI-powered code review GitHub Action using Claude to analyze code changes. Uses a unified multi-agent, multi-phase approach for both code quality (correctness, reliability, performance, maintainability, testing) and security. This action provides intelligent, context-aware review for pull requests using Anthropic's Claude Code tool for deep semantic analysis.
 
 Based on the original work from [anthropics/claude-code-security-review](https://github.com/anthropics/claude-code-security-review).
 
@@ -111,7 +111,12 @@ This action is not hardened against prompt injection attacks and should only be
 | `comment-pr` | Whether to comment on PRs with findings | `true` | No |
 | `upload-results` | Whether to upload results as artifacts | `true` | No |
 | `exclude-directories` | Comma-separated list of directories to exclude from scanning | None | No |
-| `claude-model` | Claude [model name](https://docs.anthropic.com/en/docs/about-claude/models/overview#model-names) to use. Defaults to Opus 4.5. | `claude-opus-4-5-20251101` | No |
+| `claude-model` | Claude [model name](https://docs.anthropic.com/en/docs/about-claude/models/overview#model-names) to use. Defaults to Opus 4.6. | `claude-opus-4-6` | No |
+| `model-triage` | Model used for triage phase (skip/continue decision). | `claude-haiku-4-5` | No |
+| `model-compliance` | Model used for CLAUDE.md compliance phase. | `claude-sonnet-4-5` | No |
+| `model-quality` | Model used for code quality phase. | `claude-opus-4-6` | No |
+| `model-security` | Model used for security phase. | `claude-opus-4-6` | No |
+| `model-validation` | Model used for finding validation phase. | `claude-sonnet-4-5` | No |
 | `claudecode-timeout` | Timeout for ClaudeCode analysis in minutes | `20` | No |
 | `run-every-commit` | Run ClaudeCode on every commit (skips cache check). Warning: May increase false positives on PRs with many commits. **Deprecated**: Use `trigger-on-commit` instead. | `false` | No |
 | `trigger-on-open` | Run review when PR is first opened | `true` | No |
@@ -127,6 +132,7 @@ This action is not hardened against prompt injection attacks and should only be
 | `skip-draft-prs` | Skip code review on draft pull requests | `true` | No |
 | `app-slug` | GitHub App slug for bot mention detection. If using `actions/create-github-app-token@v1.9.0+`, pass `${{ steps.app-token.outputs.app-slug }}`. Otherwise defaults to `github-actions`. | `github-actions` | No |
 | `require-label` | Only run review if this label is present. Leave empty to review all PRs. Add `labeled` to your workflow `pull_request` types to trigger on label addition. | None | No |
+| `max-diff-lines` | Maximum inline diff lines included as prompt anchor; repository tool reads are still required in all cases. | `5000` | No |
 
 ### Action Outputs
 
@@ -294,11 +300,12 @@ claudecode/
 
 ### Workflow
 
-1. **PR Analysis**: When a pull request is opened, Claude analyzes the diff to understand what changed
-2. **Contextual Review**: Claude examines the code changes in context, understanding the purpose and potential impacts
-3. **Finding Generation**: Issues are identified with detailed explanations, severity ratings, and remediation guidance
-4. **False Positive Filtering**: Advanced filtering removes low-impact or false positive prone findings to reduce noise
-5. **PR Comments**: Findings are posted as review comments on the specific lines of code
+1. **Triage Phase**: A fast triage pass determines if review should proceed.
+2. **Context Discovery**: Claude discovers relevant CLAUDE.md files, hotspots, and risky code paths.
+3. **Specialist Review**: Dedicated compliance, quality, and security phases run with configurable models.
+4. **Validation Phase**: Candidate findings are validated and deduplicated for high signal.
+5. **False Positive Filtering**: Additional filtering removes low-impact noise.
+6. **PR Comments**: Findings are posted as review comments on specific lines in the PR.
 
 ## Review Capabilities
 

diff --git a/action.yml b/action.yml
@@ -29,10 +29,35 @@ inputs:
     default: ''
 
   claude-model:
-    description: 'Claude model to use for code review analysis (e.g., claude-sonnet-4-20250514)'
+    description: 'Claude model to use for code review analysis (e.g., claude-sonnet-4-5)'
     required: false
     default: ''
 
+  model-triage:
+    description: 'Model for triage phase'
+    required: false
+    default: 'claude-haiku-4-5'
+
+  model-compliance:
+    description: 'Model for CLAUDE.md compliance phase'
+    required: false
+    default: 'claude-sonnet-4-5'
+
+  model-quality:
+    description: 'Model for code quality phase'
+    required: false
+    default: 'claude-opus-4-6'
+
+  model-security:
+    description: 'Model for security phase'
+    required: false
+    default: 'claude-opus-4-6'
+
+  model-validation:
+    description: 'Model for validation phase'
+    required: false
+    default: 'claude-sonnet-4-5'
+
   run-every-commit:
     description: 'DEPRECATED: Use trigger-on-commit instead. Run ClaudeCode on every commit (skips cache check). Warning: This may lead to more false positives on PRs with many commits as the AI analyzes the same code multiple times.'
     required: false
@@ -351,6 +376,11 @@ runs:
         CUSTOM_REVIEW_INSTRUCTIONS: ${{ inputs.custom-review-instructions }}
         CUSTOM_SECURITY_SCAN_INSTRUCTIONS: ${{ inputs.custom-security-scan-instructions }}
         CLAUDE_MODEL: ${{ inputs.claude-model }}
+        MODEL_TRIAGE: ${{ inputs.model-triage }}
+        MODEL_COMPLIANCE: ${{ inputs.model-compliance }}
+        MODEL_QUALITY: ${{ inputs.model-quality }}
+        MODEL_SECURITY: ${{ inputs.model-security }}
+        MODEL_VALIDATION: ${{ inputs.model-validation }}
         CLAUDECODE_TIMEOUT: ${{ inputs.claudecode-timeout }}
         MAX_DIFF_LINES: ${{ inputs.max-diff-lines }}
         ACTION_PATH: ${{ github.action_path }}

diff --git a/claudecode/__init__.py b/claudecode/__init__.py
@@ -12,11 +12,16 @@
 from claudecode.github_action_audit import (
     GitHubActionClient,
     SimpleClaudeRunner,
+    get_review_model_config,
     main
 )
+from claudecode.review_orchestrator import ReviewModelConfig, ReviewOrchestrator
 
 __all__ = [
     "GitHubActionClient",
     "SimpleClaudeRunner",
+    "ReviewModelConfig",
+    "ReviewOrchestrator",
+    "get_review_model_config",
     "main"
 ]
diff --git a/claudecode/claude_api_client.py b/claudecode/claude_api_client.py
@@ -59,7 +59,7 @@ def validate_api_access(self) -> Tuple[bool, str]:
         try:
             # Simple test call to verify API access
             self.client.messages.create(
-                model="claude-3-5-haiku-20241022",
+                model="claude-haiku-4-5",
                 max_tokens=10,
                 messages=[{"role": "user", "content": "Hello"}],
                 timeout=10

diff --git a/claudecode/constants.py b/claudecode/constants.py
@@ -5,7 +5,7 @@
 import os
 
 # API Configuration
-DEFAULT_CLAUDE_MODEL = os.environ.get('CLAUDE_MODEL') or 'claude-opus-4-5-20251101'
+DEFAULT_CLAUDE_MODEL = os.environ.get('CLAUDE_MODEL') or 'claude-opus-4-6'
 DEFAULT_TIMEOUT_SECONDS = 180  # 3 minutes
 DEFAULT_MAX_RETRIES = 3
 RATE_LIMIT_BACKOFF_MAX = 30  # Maximum backoff time for rate limits
@@ -20,4 +20,3 @@
 
 # Subprocess Configuration
 SUBPROCESS_TIMEOUT = 1200  # 20 minutes for Claude Code execution
-
diff --git a/claudecode/example_utils.py b/claudecode/example_utils.py
@@ -0,0 +1,50 @@
+"""Example utilities with intentional issues for testing code review."""
+
+import pickle
+import subprocess
+import os
+
+
+def load_user_data(serialized_data):
+    """Load user data from serialized format."""
+    # Security issue: unsafe pickle deserialization
+    return pickle.loads(serialized_data)
-    return pickle.loads(serialized_data)
+    return json.loads(serialized_data)
-    return pickle.loads(serialized_data)
+    return json.loads(serialized_data)
+
+
+def run_command(user_input):
+    """Run a shell command based on user input."""
+    # Security issue: command injection
+    result = subprocess.run(f"echo {user_input}", shell=True, capture_output=True)
-    result = subprocess.run(f"echo {user_input}", shell=True, capture_output=True)
+    result = subprocess.run(["echo", user_input], capture_output=True)
-    result = subprocess.run(f"echo {user_input}", shell=True, capture_output=True)
+    result = subprocess.run(["echo", user_input], capture_output=True)
+    return result.stdout.decode()
+
+
+def read_file(filename):
+    """Read a file from disk."""
+    # Security issue: path traversal
+    path = f"/data/{filename}"
+    with open(path, "r") as f:
-    path = f"/data/{filename}"
-    with open(path, "r") as f:
+    safe_filename = os.path.basename(filename)
+    path = f"/data/{safe_filename}"
+    with open(path, "r") as f:
-    path = f"/data/{filename}"
-    with open(path, "r") as f:
+    safe_filename = os.path.basename(filename)
+    path = f"/data/{safe_filename}"
+    with open(path, "r") as f:
+        return f.read()
+
+
+def divide_numbers(a, b):
+    """Divide two numbers."""
+    # Code quality issue: no zero division check
+    return a / b
+
+
+def process_items(items):
+    """Process a list of items."""
+    results = []
+    for i in range(len(items)):
+        # Code quality issue: inefficient iteration
+        for j in range(len(items)):
+            if items[i] == items[j]:
+                results.append(items[i])
+    return results
+
+
+def get_user_by_id(user_id, connection):
+    """Get user from database."""
+    # Security issue: SQL injection
+    query = f"SELECT * FROM users WHERE id = {user_id}"
+    return connection.execute(query)
-    query = f"SELECT * FROM users WHERE id = {user_id}"
-    return connection.execute(query)
+    query = "SELECT * FROM users WHERE id = ?"
+    return connection.execute(query, (user_id,))
-    query = f"SELECT * FROM users WHERE id = {user_id}"
-    return connection.execute(query)
+    query = "SELECT * FROM users WHERE id = ?"
+    return connection.execute(query, (user_id,))
diff --git a/claudecode/findings_merge.py b/claudecode/findings_merge.py
@@ -0,0 +1,61 @@
+"""Utilities for merging and deduplicating findings from multiple phases."""
+
+from typing import Any, Dict, List, Tuple
+
+
+def _normalize_text(value: Any) -> str:
+    return str(value or "").strip().lower()
+
+
+def _finding_key(finding: Dict[str, Any]) -> Tuple[str, int, str, str]:
+    file_path = _normalize_text(finding.get("file"))
+    line = finding.get("line")
+    try:
+        line_no = int(line)
+    except (TypeError, ValueError):
+        line_no = 1
+    category = _normalize_text(finding.get("category"))
+    title = _normalize_text(finding.get("title"))
+    return file_path, line_no, category, title
+
+
+def _severity_rank(value: Any) -> int:
+    sev = _normalize_text(value).upper()
+    if sev == "HIGH":
+        return 3
+    if sev == "MEDIUM":
+        return 2
+    if sev == "LOW":
+        return 1
+    return 0
+
+
+def _confidence_value(value: Any) -> float:
+    try:
+        return float(value)
+    except (TypeError, ValueError):
+        return 0.0
+
+
+def merge_findings(findings: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
+    """Merge duplicate findings and keep the strongest candidate."""
+    merged: Dict[Tuple[str, int, str, str], Dict[str, Any]] = {}
+
+    for finding in findings:
+        if not isinstance(finding, dict):
+            continue
+
+        key = _finding_key(finding)
+        existing = merged.get(key)
+
+        if existing is None:
+            merged[key] = finding
+            continue
+
+        incoming_score = (_severity_rank(finding.get("severity")), _confidence_value(finding.get("confidence")))
+        existing_score = (_severity_rank(existing.get("severity")), _confidence_value(existing.get("confidence")))
+
+        if incoming_score > existing_score:
+            merged[key] = finding
+
+    return list(merged.values())