From a9eae23cda8e6c657ca810cf94e971fb27063e65 Mon Sep 17 00:00:00 2001
From: HungKNguyen <hungnguyenkhac7@gmail.com>
Date: Fri, 13 Feb 2026 19:19:54 +0700
Subject: [PATCH 1/4] draft

---
 action.yml                              |  39 ++-
 claudecode/evals/eval_engine.py         |   9 +-
 claudecode/github_action_audit.py       | 339 +++++++++++++++++-----
 claudecode/prompts.py                   |  63 ++++-
 claudecode/test_github_client.py        | 358 ++++++++++++++++++------
 claudecode/test_main_function.py        | 157 +++++++++--
 claudecode/test_workflow_integration.py |  53 ++--
 7 files changed, 796 insertions(+), 222 deletions(-)

diff --git a/action.yml b/action.yml
index 2c582ca..680e15e 100644
--- a/action.yml
+++ b/action.yml
@@ -142,6 +142,12 @@ runs:
             exit 1
           fi
 
+          if ! PR_BASE_SHA=$(echo "$PR_DATA" | jq -r '.base.sha' 2>&1); then
+            echo "Error: Failed to parse PR base SHA from API response"
+            echo "is_pr=false" >> $GITHUB_OUTPUT
+            exit 1
+          fi
+
           # Extract PR labels (array of label names)
           if ! PR_LABELS=$(echo "$PR_DATA" | jq -c '[.labels[].name]' 2>&1); then
             echo "Error: Failed to parse PR labels from API response"
@@ -158,11 +164,12 @@ runs:
 
           echo "pr_number=${{ github.event.issue.number }}" >> $GITHUB_OUTPUT
           echo "pr_sha=$PR_HEAD_SHA" >> $GITHUB_OUTPUT
+          echo "pr_base_sha=$PR_BASE_SHA" >> $GITHUB_OUTPUT
           echo "pr_labels=$PR_LABELS" >> $GITHUB_OUTPUT
           echo "is_draft=$IS_DRAFT" >> $GITHUB_OUTPUT
           echo "is_pr=true" >> $GITHUB_OUTPUT
 
-          echo "Issue comment is on PR #${{ github.event.issue.number }} with SHA $PR_HEAD_SHA"
+          echo "Issue comment is on PR #${{ github.event.issue.number }} with base SHA $PR_BASE_SHA, head SHA $PR_HEAD_SHA"
         else
           echo "is_pr=false" >> $GITHUB_OUTPUT
           echo "Issue comment is not on a PR, skipping"
@@ -357,6 +364,36 @@ runs:
       with:
         node-version: '18'
     
+    - name: Setup git for diffing
+      if: steps.claudecode-check.outputs.enable_claudecode == 'true'
+      shell: bash
+      env:
+        BASE_SHA: ${{ github.event.pull_request.base.sha || steps.pr-info.outputs.pr_base_sha }}
+        HEAD_SHA: ${{ github.event.pull_request.head.sha || steps.pr-info.outputs.pr_sha }}
+      run: |
+        echo "::group::Prepare repository so 'git diff' shows PR changes"
+
+        BASE_COMMIT="$BASE_SHA"
+        HEAD_COMMIT="$HEAD_SHA"
+
+        echo "Base SHA (PR snapshot): $BASE_COMMIT"
+        echo "Head SHA (current PR): $HEAD_COMMIT"
+
+        # Fetch both commits
+        git fetch origin "$BASE_COMMIT" --depth=1
+        git fetch origin "$HEAD_COMMIT" --depth=1
+
+        # Reset to base commit (B)
+        git checkout "$BASE_COMMIT"
+
+        # Restore PR files to working directory (E) - NOT staged
+        git restore --source="$HEAD_COMMIT" --worktree .
+
+        # Now: HEAD=B, index=B, worktree=E
+        # So: git diff shows B → E (PR changes)
+        echo "Repository prepared. Plain 'git diff' now shows PR changes."
+        echo "::endgroup::"
+
     - name: Install dependencies
       if: steps.claudecode-check.outputs.enable_claudecode == 'true'
       shell: bash
diff --git a/claudecode/evals/eval_engine.py b/claudecode/evals/eval_engine.py
index 6da935d..4ebff7b 100644
--- a/claudecode/evals/eval_engine.py
+++ b/claudecode/evals/eval_engine.py
@@ -451,7 +451,14 @@ def _run_code_review(self, test_case: EvalCase, repo_path: str) -> Tuple[bool, s
             )
             
             output = result.stdout
-            
+
+            # Display stderr in verbose mode (contains logging from github_action_audit.py)
+            if self.verbose and result.stderr:
+                self.log("Code review stderr output:", prefix="[AUDIT]")
+                for line in result.stderr.splitlines():
+                    if line.strip():
+                        print(f"[AUDIT] {line}", file=sys.stderr)
+
             # Parse the JSON output first to see if we got valid results
             success, parsed_results = parse_json_with_fallbacks(output)
             if not success:
diff --git a/claudecode/github_action_audit.py b/claudecode/github_action_audit.py
index dbf6766..a55e205 100644
--- a/claudecode/github_action_audit.py
+++ b/claudecode/github_action_audit.py
@@ -126,82 +126,237 @@ def __init__(self):
             print(f"[Debug] User excluded directories: {user_excluded_dirs}", file=sys.stderr)
         print(f"[Debug] Total excluded directories: {self.excluded_dirs}", file=sys.stderr)
     
-    def get_pr_data(self, repo_name: str, pr_number: int) -> Dict[str, Any]:
-        """Get PR metadata and files from GitHub API.
-        
+    def get_pr_data(self, repo_name: str, pr_number: int, max_diff_lines: int = 5000) -> Dict[str, Any]:
+        """Get PR metadata and construct diff in one pass with early termination.
+
+        Fetches files page-by-page while building the diff. Stops fetching when
+        max_diff_lines is reached, saving API calls for large PRs.
+
+        When max_diff_lines == 0 (agentic mode), only fetches PR metadata without files,
+        as Claude will use git commands to explore changes.
+
         Args:
             repo_name: Repository name in format "owner/repo"
             pr_number: Pull request number
-            
+            max_diff_lines: Maximum diff lines (0 = agentic mode, don't fetch files)
+
         Returns:
-            Dictionary containing PR data
+            Dictionary containing:
+            - PR metadata (number, title, body, user, etc.)
+            - files: List of files fetched (empty if agentic mode)
+            - pr_diff: Constructed diff text (empty if agentic mode)
+            - is_truncated: Whether we stopped fetching due to max_lines
+            - diff_stats: {files_included, total_files from PR metadata}
         """
-        # Get PR metadata
+        # Get PR metadata first (contains total changed_files count)
         pr_url = f"https://api.github.com/repos/{repo_name}/pulls/{pr_number}"
         response = requests.get(pr_url, headers=self.headers)
         response.raise_for_status()
-        pr_data = response.json()
-        
-        # Get PR files with pagination support
-        files_url = f"https://api.github.com/repos/{repo_name}/pulls/{pr_number}/files?per_page=100"
-        response = requests.get(files_url, headers=self.headers)
-        response.raise_for_status()
-        files_data = response.json()
-        
+        pr_metadata = response.json()
+
+        # Extract total files from PR metadata
+        total_files_in_pr = pr_metadata['changed_files']
+
+        # Agentic mode: Don't fetch files, Claude uses git commands
+        if max_diff_lines == 0:
+            logger.info(f"Agentic mode enabled (MAX_DIFF_LINES=0): Skipping file fetch for PR #{pr_number} ({total_files_in_pr} files in PR)")
+            return {
+                'number': pr_metadata['number'],
+                'title': pr_metadata['title'],
+                'body': pr_metadata.get('body', ''),
+                'user': pr_metadata['user']['login'],
+                'created_at': pr_metadata['created_at'],
+                'updated_at': pr_metadata['updated_at'],
+                'state': pr_metadata['state'],
+                'head': {
+                    'ref': pr_metadata['head']['ref'],
+                    'sha': pr_metadata['head']['sha'],
+                    'repo': {
+                        'full_name': pr_metadata['head']['repo']['full_name'] if pr_metadata['head']['repo'] else repo_name
+                    }
+                },
+                'base': {
+                    'ref': pr_metadata['base']['ref'],
+                    'sha': pr_metadata['base']['sha']
+                },
+                'files': [],
+                'additions': pr_metadata['additions'],
+                'deletions': pr_metadata['deletions'],
+                'changed_files': pr_metadata['changed_files'],
+                # Diff data (empty for agentic mode)
+                'pr_diff': '',
+                'is_truncated': False,
+                'diff_stats': {'files_included': 0, 'total_files': total_files_in_pr}
+            }
+
+        # Setup for incremental diff construction
+        diff_sections = []
+        current_lines = 0
+        files_with_patches = 0
+        is_truncated = False
+        fetched_files = []
+
+        # Fetch files page-by-page, building diff incrementally
+        page = 1
+        per_page = 100
+
+        while True:
+            files_url = f"https://api.github.com/repos/{repo_name}/pulls/{pr_number}/files"
+            params = {'per_page': per_page, 'page': page}
+
+            try:
+                response = requests.get(files_url, headers=self.headers, params=params)
+                response.raise_for_status()
+                files_data = response.json()
+
+                if not files_data:
+                    break
+
+                # Process each file in this page
+                for file_data in files_data:
+                    filename = file_data['filename']
+
+                    # Skip excluded files
+                    if self._is_excluded(filename):
+                        continue
+
+                    # Store file metadata
+                    file_obj = {
+                        'filename': filename,
+                        'status': file_data['status'],
+                        'additions': file_data['additions'],
+                        'deletions': file_data['deletions'],
+                        'changes': file_data['changes'],
+                        'patch': file_data.get('patch', ''),
+                        'previous_filename': file_data.get('previous_filename')
+                    }
+                    fetched_files.append(file_obj)
+
+                    # Try to add to diff (only files with patches)
+                    patch = file_data.get('patch', '')
+                    if not patch:
+                        logger.debug(f"Skipping {filename} (no patch - binary/rename-only)")
+                        continue  # Skip files without patches (binaries, etc.)
+
+                    # Build diff section
+                    diff_section = self._format_file_diff(file_obj)
+                    section_lines = diff_section.count('\n')
+
+                    # Check if adding this would exceed limit
+                    if current_lines + section_lines > max_diff_lines:
+                        is_truncated = True
+                        # Early termination - stop fetching more files
+                        logger.info(f"Diff truncated at {files_with_patches} files ({current_lines} lines, would exceed max {max_diff_lines})")
+                        break
+
+                    # Add to diff
+                    diff_sections.append(diff_section)
+                    current_lines += section_lines
+                    files_with_patches += 1
+                    logger.debug(f"Added {filename} to diff ({section_lines} lines, total: {current_lines}/{max_diff_lines})")
+
+                # If truncated, stop pagination
+                if is_truncated:
+                    break
+
+                # GitHub API supports up to 3000 files
+                # Stop if no more pages
+                if len(files_data) < per_page:
+                    break
+
+                page += 1
+
+            except requests.RequestException as e:
+                logger.warning(f"Failed to fetch files page {page}: {e}")
+                # Continue with files we have so far rather than failing completely
+                break
+
+        logger.info(f"Fetched {len(fetched_files)} files for PR #{pr_number} (total in PR: {total_files_in_pr})")
+
+        pr_diff = '\n'.join(diff_sections)
+        diff_line_count = len(pr_diff.splitlines()) if pr_diff else 0
+
+        diff_stats = {
+            'files_included': files_with_patches,
+            'total_files': total_files_in_pr  # From PR metadata, not fetched count
+        }
+
+        # Log diff construction summary
+        skipped_files = len(fetched_files) - files_with_patches
+        logger.info(f"Diff construction complete: {files_with_patches} files in diff ({diff_line_count} lines), "
+                   f"{skipped_files} files skipped (no patch), {total_files_in_pr - len(fetched_files)} files not fetched")
+
         return {
-            'number': pr_data['number'],
-            'title': pr_data['title'],
-            'body': pr_data.get('body', ''),
-            'user': pr_data['user']['login'],
-            'created_at': pr_data['created_at'],
-            'updated_at': pr_data['updated_at'],
-            'state': pr_data['state'],
+            'number': pr_metadata['number'],
+            'title': pr_metadata['title'],
+            'body': pr_metadata.get('body', ''),
+            'user': pr_metadata['user']['login'],
+            'created_at': pr_metadata['created_at'],
+            'updated_at': pr_metadata['updated_at'],
+            'state': pr_metadata['state'],
             'head': {
-                'ref': pr_data['head']['ref'],
-                'sha': pr_data['head']['sha'],
+                'ref': pr_metadata['head']['ref'],
+                'sha': pr_metadata['head']['sha'],
                 'repo': {
-                    'full_name': pr_data['head']['repo']['full_name'] if pr_data['head']['repo'] else repo_name
+                    'full_name': pr_metadata['head']['repo']['full_name'] if pr_metadata['head']['repo'] else repo_name
                 }
             },
             'base': {
-                'ref': pr_data['base']['ref'],
-                'sha': pr_data['base']['sha']
+                'ref': pr_metadata['base']['ref'],
+                'sha': pr_metadata['base']['sha']
             },
-            'files': [
-                {
-                    'filename': f['filename'],
-                    'status': f['status'],
-                    'additions': f['additions'],
-                    'deletions': f['deletions'],
-                    'changes': f['changes'],
-                    'patch': f.get('patch', '')
-                }
-                for f in files_data
-                if not self._is_excluded(f['filename'])
-            ],
-            'additions': pr_data['additions'],
-            'deletions': pr_data['deletions'],
-            'changed_files': pr_data['changed_files']
+            'files': fetched_files,
+            'additions': pr_metadata['additions'],
+            'deletions': pr_metadata['deletions'],
+            'changed_files': pr_metadata['changed_files'],
+            # Diff data
+            'pr_diff': pr_diff,
+            'is_truncated': is_truncated,
+            'diff_stats': diff_stats
         }
-    
-    def get_pr_diff(self, repo_name: str, pr_number: int) -> str:
-        """Get complete PR diff in unified format.
+
+    def _format_file_diff(self, file_data: Dict[str, Any]) -> str:
+        """Format unified diff section for a single file.
+
+        Handles all file statuses: added, removed, modified, renamed, etc.
 
         Args:
-            repo_name: Repository name in format "owner/repo"
-            pr_number: Pull request number
+            file_data: File dictionary with filename, status, patch, etc.
 
         Returns:
-            Complete PR diff in unified format
+            Formatted unified diff section for this file
         """
-        url = f"https://api.github.com/repos/{repo_name}/pulls/{pr_number}"
-        headers = dict(self.headers)
-        headers['Accept'] = 'application/vnd.github.diff'
-
-        response = requests.get(url, headers=headers)
-        response.raise_for_status()
-
-        return self._filter_generated_files(response.text)
+        filename = file_data['filename']
+        status = file_data.get('status', 'modified')
+        previous_filename = file_data.get('previous_filename')
+        patch = file_data.get('patch', '')
+
+        # Build diff header based on file status
+        diff_lines = []
+        diff_lines.append(f"diff --git a/{previous_filename or filename} b/{filename}")
+
+        if status == 'added':
+            diff_lines.append("new file mode 100644")
+            diff_lines.append("--- /dev/null")
+            diff_lines.append(f"+++ b/{filename}")
+        elif status == 'removed':
+            diff_lines.append("deleted file mode 100644")
+            diff_lines.append(f"--- a/{filename}")
+            diff_lines.append("+++ /dev/null")
+        elif status == 'renamed':
+            diff_lines.append("similarity index 100%")
+            diff_lines.append(f"rename from {previous_filename}")
+            diff_lines.append(f"rename to {filename}")
+            diff_lines.append(f"--- a/{previous_filename}")
+            diff_lines.append(f"+++ b/{filename}")
+        else:  # modified, changed, copied, unchanged
+            diff_lines.append(f"--- a/{filename}")
+            diff_lines.append(f"+++ b/{filename}")
+
+        # Add patch content
+        diff_lines.append(patch)
+
+        return '\n'.join(diff_lines) + '\n'
 
     def get_pr_comments(self, repo_name: str, pr_number: int) -> List[Dict[str, Any]]:
         """Get all review comments for a PR with pagination.
@@ -736,14 +891,41 @@ def main():
             print(json.dumps({'error': f'Claude Code not available: {claude_error}'}))
             sys.exit(EXIT_GENERAL_ERROR)
         
-        # Get PR data
+        # Parse max diff lines setting
+        max_diff_lines_str = os.environ.get('MAX_DIFF_LINES', '5000')
+        try:
+            max_diff_lines = int(max_diff_lines_str)
+        except ValueError:
+            max_diff_lines = 5000
+
+        # Get PR data with integrated diff construction
         try:
-            pr_data = github_client.get_pr_data(repo_name, pr_number)
-            pr_diff = github_client.get_pr_diff(repo_name, pr_number)
+            pr_data = github_client.get_pr_data(repo_name, pr_number, max_diff_lines)
         except Exception as e:
             print(json.dumps({'error': f'Failed to fetch PR data: {str(e)}'}))
             sys.exit(EXIT_GENERAL_ERROR)
 
+        # Extract diff data from pr_data
+        pr_diff = pr_data['pr_diff']
+        is_truncated = pr_data['is_truncated']
+        diff_stats = pr_data['diff_stats']
+
+        diff_line_count = len(pr_diff.splitlines()) if pr_diff else 0
+
+        # Determine review mode
+        force_agentic_mode = max_diff_lines == 0  # User explicitly requested agentic mode
+        use_partial_diff_mode = is_truncated and not force_agentic_mode
+
+        # Log mode selection
+        if force_agentic_mode:
+            logger.info(f"Using full agentic mode (MAX_DIFF_LINES=0)")
+        elif use_partial_diff_mode:
+            included = diff_stats['files_included']
+            total = diff_stats['total_files']
+            logger.info(f"Using partial diff mode ({included}/{total} files, {diff_line_count} lines)")
+        else:
+            logger.info(f"Using full diff mode ({diff_line_count} lines, {diff_stats['files_included']} files)")
+
         # Fetch PR comments and build review context
         review_context = None
         try:
@@ -781,26 +963,19 @@ def main():
             logger.warning(f"Failed to fetch review context (continuing without it): {e}")
             review_context = None
 
-        # Determine whether to embed diff or use agentic file reading
-        max_diff_lines_str = os.environ.get('MAX_DIFF_LINES', '5000')
-        try:
-            max_diff_lines = int(max_diff_lines_str)
-        except ValueError:
-            max_diff_lines = 5000
-
-        diff_line_count = len(pr_diff.splitlines())
-        use_agentic_mode = max_diff_lines == 0 or diff_line_count > max_diff_lines
-
-        if use_agentic_mode:
-            print(f"[Info] Using agentic file reading mode (diff has {diff_line_count} lines, threshold: {max_diff_lines})", file=sys.stderr)
-        else:
-            print(f"[Debug] Embedding diff in prompt ({diff_line_count} lines)", file=sys.stderr)
+        # Prepare diff metadata for prompt
+        diff_metadata = None
+        if use_partial_diff_mode:
+            diff_metadata = {
+                'is_truncated': True,
+                'stats': diff_stats
+            }
 
         # Get repo directory from environment or use current directory
         repo_path = os.environ.get('REPO_PATH')
         repo_dir = Path(repo_path) if repo_path else Path.cwd()
 
-        def run_review(include_diff: bool):
+        def run_review(include_diff: bool, diff_metadata=None):
             prompt_text = get_unified_review_prompt(
                 pr_data,
                 pr_diff if include_diff else None,
@@ -808,6 +983,7 @@ def run_review(include_diff: bool):
                 custom_review_instructions=custom_review_instructions,
                 custom_security_instructions=custom_security_instructions,
                 review_context=review_context,
+                diff_metadata=diff_metadata,
             )
             return claude_runner.run_code_review(repo_dir, prompt_text), len(prompt_text)
 
@@ -815,11 +991,22 @@ def run_review(include_diff: bool):
         pr_summary_from_review = {}
 
         try:
-            (success, error_msg, review_results), prompt_len = run_review(include_diff=not use_agentic_mode)
+            if force_agentic_mode:
+                # Full agentic mode - no diff
+                (success, error_msg, review_results), prompt_len = run_review(include_diff=False)
+            elif use_partial_diff_mode:
+                # Partial diff mode - include truncated diff with metadata
+                (success, error_msg, review_results), prompt_len = run_review(
+                    include_diff=True,
+                    diff_metadata=diff_metadata
+                )
+            else:
+                # Full diff mode
+                (success, error_msg, review_results), prompt_len = run_review(include_diff=True)
 
-            # Fallback to agentic mode if prompt still too long
+            # Fallback to full agentic if prompt still too long
             if not success and error_msg == "PROMPT_TOO_LONG":
-                print(f"[Info] Prompt too long ({prompt_len} chars), falling back to agentic mode", file=sys.stderr)
+                logger.info(f"Prompt too long ({prompt_len} chars), falling back to full agentic mode")
                 (success, error_msg, review_results), prompt_len = run_review(include_diff=False)
 
             if not success:
diff --git a/claudecode/prompts.py b/claudecode/prompts.py
index 6786858..da480cf 100644
--- a/claudecode/prompts.py
+++ b/claudecode/prompts.py
@@ -6,10 +6,44 @@ def _format_files_changed(pr_data):
     return "\n".join([f"- {f['filename']}" for f in pr_data['files']])
 
 
-def _build_diff_section(pr_diff, include_diff):
-    """Build prompt section for inline diff or agentic file reading."""
+def _build_diff_section(pr_diff, include_diff, diff_metadata=None):
+    """Build prompt section for inline diff, partial diff, or agentic file reading."""
     if pr_diff and include_diff:
-        return f"""
+        # Check if this is a partial diff
+        is_truncated = diff_metadata and diff_metadata.get('is_truncated', False)
+
+        if is_truncated:
+            # PARTIAL DIFF MODE
+            stats = diff_metadata.get('stats', {})
+            files_included = stats.get('files_included', 0)
+            total_files = stats.get('total_files', 0)
+            remaining_files = total_files - files_included
+
+            return f"""
+
+IMPORTANT - PARTIAL DIFF PROVIDED:
+The diff below shows {files_included} of {total_files} changed files.
+The remaining {remaining_files} files are not included due to size limits.
+
+To see all changes in this PR:
+1. Run: git diff --stat
+   This shows all changed files with line counts
+2. Run: git diff <filename>
+   To see specific file changes
+3. Use the Read tool to examine files in detail
+
+Note: The repository is configured so plain 'git diff' shows PR changes.
+
+PR DIFF CONTENT ({files_included}/{total_files} files):
+```
+{pr_diff}
+```
+
+The diff above is partial. Use git diff --stat and git diff <filename> to explore
+the remaining {remaining_files} files not shown above.
+"""
+        else:
+            return f"""
 
 PR DIFF CONTENT:
 ```
@@ -22,13 +56,22 @@ def _build_diff_section(pr_diff, include_diff):
     return """
 
 IMPORTANT - FILE READING INSTRUCTIONS:
-You have access to the repository files. For each file listed above, use the Read tool to examine the changes.
-Focus on the files that are most likely to contain issues based on the PR context.
+No diff is provided. You have access to the repository and git tools to explore the changes.
+
+To see all changes in this PR:
+1. Run: git diff --stat
+   This shows all changed files with line counts
+2. Run: git diff <filename>
+   To see specific file changes
+3. Use the Read tool to examine files in detail
+
+Note: The repository is configured so plain 'git diff' shows PR changes.
 
 To review effectively:
-1. Read each modified file to understand the current code
-2. Look at surrounding code context when needed to understand the changes
-3. Check related files if you need to understand dependencies or usage patterns
+1. Start with git diff --stat to understand the scope of changes
+2. Use git diff <filename> to see changes for files most likely to have issues
+3. Use the Read tool to examine complete files when you need full context
+4. Check related files if you need to understand dependencies or usage patterns
 """
 
 
@@ -39,6 +82,7 @@ def get_unified_review_prompt(
     custom_review_instructions=None,
     custom_security_instructions=None,
     review_context=None,
+    diff_metadata=None,
 ):
     """Generate unified code review + security prompt for Claude Code.
 
@@ -52,13 +96,14 @@ def get_unified_review_prompt(
         custom_review_instructions: Optional custom review instructions to append
         custom_security_instructions: Optional custom security instructions to append
         review_context: Optional previous review context (bot findings and user replies)
+        diff_metadata: Optional metadata about diff truncation (for partial diff mode)
 
     Returns:
         Formatted prompt string
     """
 
     files_changed = _format_files_changed(pr_data)
-    diff_section = _build_diff_section(pr_diff, include_diff)
+    diff_section = _build_diff_section(pr_diff, include_diff, diff_metadata)
 
     custom_review_section = ""
     if custom_review_instructions:
diff --git a/claudecode/test_github_client.py b/claudecode/test_github_client.py
index ee2268d..709aaac 100644
--- a/claudecode/test_github_client.py
+++ b/claudecode/test_github_client.py
@@ -96,9 +96,11 @@ def test_get_pr_data_success(self, mock_get):
             'https://api.github.com/repos/owner/repo/pulls/123',
             headers=client.headers
         )
+        # Check for paginated files request with params
         mock_get.assert_any_call(
-            'https://api.github.com/repos/owner/repo/pulls/123/files?per_page=100',
-            headers=client.headers
+            'https://api.github.com/repos/owner/repo/pulls/123/files',
+            headers=client.headers,
+            params={'per_page': 100, 'page': 1}
         )
         
         # Verify result structure
@@ -106,8 +108,13 @@ def test_get_pr_data_success(self, mock_get):
         assert result['title'] == 'Test PR'
         assert result['user'] == 'testuser'
         assert len(result['files']) == 2
+        assert result['diff_stats']['total_files'] == 3  # From PR metadata
         assert result['files'][0]['filename'] == 'src/main.py'
         assert result['files'][1]['status'] == 'added'
+        # Verify diff data is included
+        assert 'pr_diff' in result
+        assert 'is_truncated' in result
+        assert 'diff_stats' in result
     
     @patch('requests.get')
     def test_get_pr_data_null_head_repo(self, mock_get):
@@ -160,86 +167,7 @@ def test_get_pr_data_api_error(self, mock_get):
             client = GitHubActionClient()
             with pytest.raises(Exception, match="API Error"):
                 client.get_pr_data('owner/repo', 123)
-    
-    @patch('requests.get')
-    def test_get_pr_diff_success(self, mock_get):
-        """Test successful PR diff retrieval."""
-        diff_content = """diff --git a/src/main.py b/src/main.py
-index abc123..def456 100644
---- a/src/main.py
-+++ b/src/main.py
-@@ -1,5 +1,10 @@
-+import os
- def main():
-     print("Hello")
-+    # New feature
-+    process_data()
-"""
-        
-        mock_response = Mock()
-        mock_response.text = diff_content
-        mock_response.raise_for_status.return_value = None
-        mock_get.return_value = mock_response
-        
-        with patch.dict(os.environ, {'GITHUB_TOKEN': 'test-token'}):
-            client = GitHubActionClient()
-            result = client.get_pr_diff('owner/repo', 123)
-        
-        # Verify API call
-        mock_get.assert_called_once()
-        call_args = mock_get.call_args
-        assert call_args[0][0] == 'https://api.github.com/repos/owner/repo/pulls/123'
-        assert call_args[1]['headers']['Accept'] == 'application/vnd.github.diff'
-        
-        # Verify result
-        assert 'import os' in result
-        assert 'process_data()' in result
-    
-    @patch('requests.get')
-    def test_get_pr_diff_filters_generated_files(self, mock_get):
-        """Test that generated files are filtered from diff."""
-        diff_with_generated = """diff --git a/src/main.py b/src/main.py
-index abc123..def456 100644
---- a/src/main.py
-+++ b/src/main.py
-@@ -1,5 +1,10 @@
-+import os
- def main():
-     print("Hello")
-diff --git a/generated/code.py b/generated/code.py
-index 111..222 100644
---- a/generated/code.py
-+++ b/generated/code.py
-@@ -1,3 +1,5 @@
-# @generated by protoc
-+# More generated code
-+print("generated")
-diff --git a/src/feature.py b/src/feature.py
-index 333..444 100644
---- a/src/feature.py
-+++ b/src/feature.py
-@@ -1,3 +1,5 @@
-+# Real code
- def feature():
-     pass
-"""
-        
-        mock_response = Mock()
-        mock_response.text = diff_with_generated
-        mock_response.raise_for_status.return_value = None
-        mock_get.return_value = mock_response
-        
-        with patch.dict(os.environ, {'GITHUB_TOKEN': 'test-token'}):
-            client = GitHubActionClient()
-            result = client.get_pr_diff('owner/repo', 123)
-        
-        # Verify generated file is filtered out
-        assert 'src/main.py' in result
-        assert 'src/feature.py' in result
-        assert 'generated/code.py' not in result
-        assert '@generated' not in result
-        assert 'More generated code' not in result
-    
+
     def test_filter_generated_files_edge_cases(self):
         """Test edge cases in generated file filtering."""
         with patch.dict(os.environ, {'GITHUB_TOKEN': 'test-token'}):
@@ -270,9 +198,261 @@ def test_filter_generated_files_edge_cases(self):
             assert 'c.py' not in result
 
 
+class TestDiffSectionBuilding:
+    """Test unified diff section construction for different file types."""
+
+    def test_format_file_diff_modified_file(self):
+        """Test diff section for modified file."""
+        with patch.dict(os.environ, {'GITHUB_TOKEN': 'test-token'}):
+            client = GitHubActionClient()
+
+            file_data = {
+                'filename': 'src/main.py',
+                'status': 'modified',
+                'patch': '@@ -1,3 +1,3 @@\n-old line\n+new line'
+            }
+
+            diff_section = client._format_file_diff(file_data)
+
+            assert 'diff --git a/src/main.py b/src/main.py' in diff_section
+            assert '--- a/src/main.py' in diff_section
+            assert '+++ b/src/main.py' in diff_section
+            assert '@@ -1,3 +1,3 @@' in diff_section
+            assert '-old line' in diff_section
+            assert '+new line' in diff_section
+
+    def test_format_file_diff_added_file(self):
+        """Test diff section for added file."""
+        with patch.dict(os.environ, {'GITHUB_TOKEN': 'test-token'}):
+            client = GitHubActionClient()
+
+            file_data = {
+                'filename': 'src/new.py',
+                'status': 'added',
+                'patch': '@@ -0,0 +1,5 @@\n+def hello():\n+    pass'
+            }
+
+            diff_section = client._format_file_diff(file_data)
+
+            assert 'diff --git a/src/new.py b/src/new.py' in diff_section
+            assert 'new file mode 100644' in diff_section
+            assert '--- /dev/null' in diff_section
+            assert '+++ b/src/new.py' in diff_section
+            assert '+def hello():' in diff_section
+
+    def test_format_file_diff_removed_file(self):
+        """Test diff section for removed file."""
+        with patch.dict(os.environ, {'GITHUB_TOKEN': 'test-token'}):
+            client = GitHubActionClient()
+
+            file_data = {
+                'filename': 'src/old.py',
+                'status': 'removed',
+                'patch': '@@ -1,5 +0,0 @@\n-def old():\n-    pass'
+            }
+
+            diff_section = client._format_file_diff(file_data)
+
+            assert 'diff --git a/src/old.py b/src/old.py' in diff_section
+            assert 'deleted file mode 100644' in diff_section
+            assert '--- a/src/old.py' in diff_section
+            assert '+++ /dev/null' in diff_section
+            assert '-def old():' in diff_section
+
+    def test_format_file_diff_renamed_file(self):
+        """Test diff section for renamed file."""
+        with patch.dict(os.environ, {'GITHUB_TOKEN': 'test-token'}):
+            client = GitHubActionClient()
+
+            file_data = {
+                'filename': 'src/new_name.py',
+                'previous_filename': 'src/old_name.py',
+                'status': 'renamed',
+                'patch': '@@ -1,3 +1,3 @@\n-old line\n+new line'
+            }
+
+            diff_section = client._format_file_diff(file_data)
+
+            assert 'diff --git a/src/old_name.py b/src/new_name.py' in diff_section
+            assert 'similarity index 100%' in diff_section
+            assert 'rename from src/old_name.py' in diff_section
+            assert 'rename to src/new_name.py' in diff_section
+            assert '--- a/src/old_name.py' in diff_section
+            assert '+++ b/src/new_name.py' in diff_section
+
+    def test_format_file_diff_without_status(self):
+        """Test diff section defaults to modified when status not provided."""
+        with patch.dict(os.environ, {'GITHUB_TOKEN': 'test-token'}):
+            client = GitHubActionClient()
+
+            file_data = {
+                'filename': 'src/file.py',
+                'patch': '@@ -1,1 +1,1 @@\n-old\n+new'
+            }
+
+            diff_section = client._format_file_diff(file_data)
+
+            # Should default to modified
+            assert '--- a/src/file.py' in diff_section
+            assert '+++ b/src/file.py' in diff_section
+            assert 'new file mode' not in diff_section
+            assert 'deleted file' not in diff_section
+
+
+class TestIntegratedDiffConstruction:
+    """Test integrated diff construction during file fetching."""
+
+    @patch('requests.get')
+    def test_get_pr_data_builds_diff_incrementally(self, mock_get):
+        """Test that get_pr_data builds diff while fetching files."""
+        pr_response = Mock()
+        pr_response.json.return_value = {
+            'number': 123,
+            'title': 'Test PR',
+            'body': '',
+            'user': {'login': 'testuser'},
+            'created_at': '2024-01-01T00:00:00Z',
+            'updated_at': '2024-01-01T01:00:00Z',
+            'state': 'open',
+            'head': {'ref': 'feature', 'sha': 'abc123', 'repo': {'full_name': 'owner/repo'}},
+            'base': {'ref': 'main', 'sha': 'def456'},
+            'additions': 20,
+            'deletions': 5,
+            'changed_files': 2
+        }
+
+        files_response = Mock()
+        files_response.json.return_value = [
+            {
+                'filename': 'added.py',
+                'status': 'added',
+                'additions': 10,
+                'deletions': 0,
+                'changes': 10,
+                'patch': '@@ -0,0 +1,2 @@\n+def new():\n+    pass'
+            },
+            {
+                'filename': 'modified.py',
+                'status': 'modified',
+                'additions': 10,
+                'deletions': 5,
+                'changes': 15,
+                'patch': '@@ -1,1 +1,1 @@\n-old\n+new'
+            }
+        ]
+
+        # Empty response to stop pagination
+        empty_response = Mock()
+        empty_response.json.return_value = []
+
+        mock_get.side_effect = [pr_response, files_response, empty_response]
+
+        with patch.dict(os.environ, {'GITHUB_TOKEN': 'test-token'}):
+            client = GitHubActionClient()
+            result = client.get_pr_data('owner/repo', 123, max_diff_lines=5000)
+
+        # Verify diff was constructed
+        assert 'pr_diff' in result
+        diff = result['pr_diff']
+
+        # Should contain both files
+        assert 'new file mode 100644' in diff  # added.py
+        assert '--- /dev/null' in diff
+        assert '+++ b/added.py' in diff
+
+        assert '--- a/modified.py' in diff  # modified.py
+        assert '+++ b/modified.py' in diff
+
+        # Verify stats
+        assert result['is_truncated'] == False
+        assert result['diff_stats']['files_included'] == 2
+        assert result['diff_stats']['total_files'] == 2
+
+    @patch('requests.get')
+    def test_get_pr_data_truncates_at_max_lines(self, mock_get):
+        """Test that get_pr_data stops fetching when max_lines reached."""
+        pr_response = Mock()
+        pr_response.json.return_value = {
+            'number': 123,
+            'title': 'Large PR',
+            'body': '',
+            'user': {'login': 'testuser'},
+            'created_at': '2024-01-01T00:00:00Z',
+            'updated_at': '2024-01-01T01:00:00Z',
+            'state': 'open',
+            'head': {'ref': 'feature', 'sha': 'abc123', 'repo': {'full_name': 'owner/repo'}},
+            'base': {'ref': 'main', 'sha': 'def456'},
+            'additions': 1000,
+            'deletions': 500,
+            'changed_files': 100  # Total files in PR
+        }
+
+        # First page with files
+        files_page1 = Mock()
+        files_page1.json.return_value = [
+            {
+                'filename': f'file{i}.py',
+                'status': 'modified',
+                'additions': 5,
+                'deletions': 2,
+                'changes': 7,
+                'patch': '@@ -1,1 +1,1 @@\n-old\n+new'
+            }
+            for i in range(10)
+        ]
+
+        mock_get.side_effect = [pr_response, files_page1]
+
+        with patch.dict(os.environ, {'GITHUB_TOKEN': 'test-token'}):
+            client = GitHubActionClient()
+            # Very low max_lines to trigger truncation
+            result = client.get_pr_data('owner/repo', 123, max_diff_lines=50)
+
+        # Should be truncated
+        assert result['is_truncated'] == True
+        # Should have included some files but not all
+        assert result['diff_stats']['files_included'] < 10
+        assert result['diff_stats']['total_files'] == 100  # From PR metadata
+
+    @patch('requests.get')
+    def test_get_pr_data_agentic_mode_skips_files(self, mock_get):
+        """Test that max_diff_lines=0 (agentic mode) doesn't fetch files."""
+        pr_response = Mock()
+        pr_response.json.return_value = {
+            'number': 123,
+            'title': 'Test PR',
+            'body': '',
+            'user': {'login': 'testuser'},
+            'created_at': '2024-01-01T00:00:00Z',
+            'updated_at': '2024-01-01T01:00:00Z',
+            'state': 'open',
+            'head': {'ref': 'feature', 'sha': 'abc123', 'repo': {'full_name': 'owner/repo'}},
+            'base': {'ref': 'main', 'sha': 'def456'},
+            'additions': 100,
+            'deletions': 50,
+            'changed_files': 50
+        }
+
+        mock_get.side_effect = [pr_response]
+
+        with patch.dict(os.environ, {'GITHUB_TOKEN': 'test-token'}):
+            client = GitHubActionClient()
+            result = client.get_pr_data('owner/repo', 123, max_diff_lines=0)
+
+        # Should only call PR metadata endpoint, not files endpoint
+        assert mock_get.call_count == 1
+
+        # Should have empty diff data
+        assert result['pr_diff'] == ''
+        assert result['is_truncated'] == False
+        assert result['files'] == []
+        assert result['diff_stats']['files_included'] == 0
+        assert result['diff_stats']['total_files'] == 50  # From PR metadata
+
+
 class TestGitHubAPIIntegration:
     """Test GitHub API integration scenarios."""
-    
+
     @patch('requests.get')
     def test_rate_limit_handling(self, mock_get):
         """Test that rate limit headers are respected."""
@@ -325,10 +505,14 @@ def test_pagination_not_needed_for_pr_files(self, mock_get):
             'changed_files': 100
         }
         
-        files_response = Mock()
-        files_response.json.return_value = large_file_list
-        
-        mock_get.side_effect = [pr_response, files_response]
+        files_response_page1 = Mock()
+        files_response_page1.json.return_value = large_file_list
+
+        # Empty response for page 2 to stop pagination
+        files_response_page2 = Mock()
+        files_response_page2.json.return_value = []
+
+        mock_get.side_effect = [pr_response, files_response_page1, files_response_page2]
         
         with patch.dict(os.environ, {'GITHUB_TOKEN': 'test-token'}):
             client = GitHubActionClient()
diff --git a/claudecode/test_main_function.py b/claudecode/test_main_function.py
index 0b2bdae..b4207de 100644
--- a/claudecode/test_main_function.py
+++ b/claudecode/test_main_function.py
@@ -12,6 +12,41 @@
 from claudecode.github_action_audit import main
 
 
+def make_pr_data(pr_diff='diff', is_truncated=False, files_included=1, total_files=1, files=None, **kwargs):
+    """Helper to create complete PR data mock with all required fields."""
+    base_data = {
+        'number': 123,
+        'title': 'Test PR',
+        'body': '',
+        'user': 'testuser',
+        'created_at': '2024-01-01T00:00:00Z',
+        'updated_at': '2024-01-01T01:00:00Z',
+        'state': 'open',
+        'head': {
+            'ref': 'feature',
+            'sha': 'abc123',
+            'repo': {'full_name': 'owner/repo'}
+        },
+        'base': {
+            'ref': 'main',
+            'sha': 'def456'
+        },
+        'files': files or [],
+        'additions': 10,
+        'deletions': 5,
+        'changed_files': total_files,
+        # Diff data (new fields)
+        'pr_diff': pr_diff,
+        'is_truncated': is_truncated,
+        'diff_stats': {
+            'files_included': files_included,
+            'total_files': total_files
+        }
+    }
+    base_data.update(kwargs)
+    return base_data
+
+
 class TestMainFunction:
     """Test main function execution flow."""
 
@@ -211,6 +246,78 @@ def test_main_pr_data_fetch_failure(self, mock_client_class, mock_runner_class,
             assert 'Failed to fetch PR data' in output['error']
             assert 'API error' in output['error']
 
+    @patch('pathlib.Path.cwd')
+    @patch('claudecode.github_action_audit.get_unified_review_prompt')
+    @patch('claudecode.github_action_audit.FindingsFilter')
+    @patch('claudecode.github_action_audit.SimpleClaudeRunner')
+    @patch('claudecode.github_action_audit.GitHubActionClient')
+    def test_main_partial_diff_mode(self, mock_client_class, mock_runner_class,
+                                     mock_filter_class, mock_prompt_func,
+                                     mock_cwd, capsys):
+        """Test partial diff mode when diff exceeds MAX_DIFF_LINES."""
+        # Setup mocks
+        mock_client = Mock()
+        mock_client.get_pr_data.return_value = make_pr_data(
+            pr_diff="partial diff content",
+            is_truncated=True,
+            files_included=50,
+            total_files=300,
+            title='Large PR',
+            body='Large PR with many files'
+        )
+        mock_client.get_pr_comments.return_value = []
+        mock_client_class.return_value = mock_client
+
+        mock_runner = Mock()
+        mock_runner.validate_claude_available.return_value = (True, "")
+        mock_runner.run_code_review.return_value = (
+            True,
+            "",
+            {
+                'findings': [],
+                'analysis_summary': {
+                    'files_reviewed': 300,
+                    'high_severity': 0,
+                    'medium_severity': 0,
+                    'low_severity': 0
+                }
+            }
+        )
+        mock_runner_class.return_value = mock_runner
+
+        mock_filter = Mock()
+        mock_filter.filter_findings.return_value = (
+            True,
+            {
+                'filtered_findings': [],
+                'excluded_findings': [],
+                'analysis_summary': {}
+            },
+            Mock()
+        )
+        mock_filter_class.return_value = mock_filter
+        mock_cwd.return_value = Path('/test/repo')
+
+        with patch.dict(os.environ, {
+            'GITHUB_REPOSITORY': 'owner/repo',
+            'PR_NUMBER': '123',
+            'GITHUB_TOKEN': 'test-token'
+        }):
+            with pytest.raises(SystemExit) as exc_info:
+                main()
+
+            assert exc_info.value.code == 0  # Successful completion
+
+            # Verify prompt was called with partial diff metadata
+            call_args = mock_prompt_func.call_args
+            assert call_args is not None
+            # Diff should be included but with metadata
+            assert call_args[0][1] == "partial diff content"
+            assert call_args[1]['include_diff'] is True
+            # Check diff_metadata is passed
+            assert call_args[1]['diff_metadata'] is not None
+            assert call_args[1]['diff_metadata']['is_truncated'] is True
+
     @patch('pathlib.Path.cwd')
     @patch('claudecode.github_action_audit.get_unified_review_prompt')
     @patch('claudecode.github_action_audit.FindingsFilter')
@@ -222,12 +329,12 @@ def test_main_successful_audit_no_findings(self, mock_client_class, mock_runner_
         """Test successful audit with no findings."""
         # Setup mocks
         mock_client = Mock()
-        mock_client.get_pr_data.return_value = {
-            'number': 123,
-            'title': 'Test PR',
-            'body': 'Description'
-        }
-        mock_client.get_pr_diff.return_value = "diff content"
+        mock_client.get_pr_data.return_value = make_pr_data(
+            pr_diff="diff content",
+            title='Test PR',
+            body='Description'
+        )
+        mock_client.get_pr_comments.return_value = []
         mock_client_class.return_value = mock_client
 
         mock_runner = Mock()
@@ -292,12 +399,11 @@ def test_main_successful_audit_with_findings(self, mock_client_class, mock_runne
         """Test successful audit with high severity findings."""
         # Setup mocks
         mock_client = Mock()
-        mock_client.get_pr_data.return_value = {
-            'number': 123,
-            'title': 'Test PR',
-            'body': 'Description'
-        }
-        mock_client.get_pr_diff.return_value = "diff content"
+        mock_client.get_pr_data.return_value = make_pr_data(
+            title='Test PR',
+            body='Description'
+        )
+        mock_client.get_pr_comments.return_value = []
         mock_client._is_excluded.return_value = False  # Don't exclude any files in tests
         mock_client_class.return_value = mock_client
 
@@ -384,12 +490,11 @@ def test_main_with_full_filter(self, mock_client_class, mock_runner_class,
         """Test main with full FindingsFilter (LLM-based)."""
         # Setup mocks
         mock_client = Mock()
-        mock_client.get_pr_data.return_value = {
-            'number': 123,
-            'title': 'Test PR',
-            'body': 'Description'
-        }
-        mock_client.get_pr_diff.return_value = "diff content"
+        mock_client.get_pr_data.return_value = make_pr_data(
+            title='Test PR',
+            body='Description'
+        )
+        mock_client.get_pr_comments.return_value = []
         mock_client._is_excluded.return_value = False  # Don't exclude any files in tests
         mock_client_class.return_value = mock_client
 
@@ -447,8 +552,8 @@ def test_main_filter_failure_keeps_all_findings(self, mock_client_class, mock_ru
         """Test that filter failure keeps all findings with SimpleFindingsFilter."""
         # Setup mocks
         mock_client = Mock()
-        mock_client.get_pr_data.return_value = {'number': 123, 'title': 'Test', 'body': ''}
-        mock_client.get_pr_diff.return_value = "diff"
+        mock_client.get_pr_data.return_value = make_pr_data()
+        mock_client.get_pr_comments.return_value = []
         mock_client._is_excluded.return_value = False  # Don't exclude any files in tests
         mock_client_class.return_value = mock_client
 
@@ -533,8 +638,8 @@ def test_audit_failure(self, mock_client_class, mock_runner_class,
                           mock_cwd, capsys):
         """Test when code review fails."""
         mock_client = Mock()
-        mock_client.get_pr_data.return_value = {'number': 123}
-        mock_client.get_pr_diff.return_value = "diff"
+        mock_client.get_pr_data.return_value = make_pr_data()
+        mock_client.get_pr_comments.return_value = []
         mock_client_class.return_value = mock_client
 
         mock_runner = Mock()
@@ -579,8 +684,8 @@ def test_findings_have_correct_review_type(self, mock_client_class, mock_runner_
                                                mock_cwd, capsys):
         """Test that findings get review_type based on category."""
         mock_client = Mock()
-        mock_client.get_pr_data.return_value = {'number': 123, 'title': 'Test', 'body': ''}
-        mock_client.get_pr_diff.return_value = "diff"
+        mock_client.get_pr_data.return_value = make_pr_data()
+        mock_client.get_pr_comments.return_value = []
         mock_client._is_excluded.return_value = False
         mock_client_class.return_value = mock_client
 
@@ -641,8 +746,8 @@ def test_review_type_not_overwritten_if_already_set(self, mock_client_class, moc
                                                         mock_cwd, capsys):
         """Test that review_type is not overwritten if finding already has it."""
         mock_client = Mock()
-        mock_client.get_pr_data.return_value = {'number': 123, 'title': 'Test', 'body': ''}
-        mock_client.get_pr_diff.return_value = "diff"
+        mock_client.get_pr_data.return_value = make_pr_data()
+        mock_client.get_pr_comments.return_value = []
         mock_client._is_excluded.return_value = False
         mock_client_class.return_value = mock_client
 
diff --git a/claudecode/test_workflow_integration.py b/claudecode/test_workflow_integration.py
index e5410d6..35c19aa 100644
--- a/claudecode/test_workflow_integration.py
+++ b/claudecode/test_workflow_integration.py
@@ -198,7 +198,11 @@ def test_full_workflow_with_real_pr_structure(self, mock_get, mock_run):
             }
         ]
 
-        mock_get.side_effect = [pr_response, files_response, diff_response, comments_response, reactions_response]
+        # Empty response for files page 2 to stop pagination
+        files_response_page2 = Mock()
+        files_response_page2.json.return_value = []
+
+        mock_get.side_effect = [pr_response, files_response, files_response_page2, comments_response, reactions_response]
 
         # Setup Claude response
         claude_response = {
@@ -279,8 +283,10 @@ def test_full_workflow_with_real_pr_structure(self, mock_get, mock_run):
                 assert exc_info.value.code == 1
         
         # Verify API calls
-        # 5 calls: PR data, files, diff, comments, reactions for bot comment
-        assert mock_get.call_count == 5
+        # 3 calls: PR data, files page 1 (stops since <100 files), comments page 1
+        # Note: comments makes 1 call, gets empty list, stops
+        # Reactions not called since no bot comments in this test
+        assert mock_get.call_count == 3
         assert mock_run.call_count == 2  # 1 version check + 1 unified review
         
         # Verify the audit was run with proper prompt
@@ -485,25 +491,28 @@ def test_workflow_with_massive_pr(self, mock_get, mock_run):
             'changed_files': 500
         }
         
-        files_response = Mock()
-        files_response.json.return_value = large_files
-        
-        # Create massive diff
-        diff_parts = []
-        for i in range(500):
-            diff_parts.append(f'''diff --git a/src/file{i}.py b/src/file{i}.py
-new file mode 100644
-index 0000000..1234567
---- /dev/null
-+++ b/src/file{i}.py
-@@ -0,0 +1,100 @@
-+# File {i}
-''' + '\n'.join([f'+line {j}' for j in range(99)]))
-        
-        diff_response = Mock()
-        diff_response.text = '\n'.join(diff_parts)
-        
-        mock_get.side_effect = [pr_response, files_response, diff_response]
+        # Paginate files (100 per page)
+        files_page1 = Mock()
+        files_page1.json.return_value = large_files[:100]
+
+        files_page2 = Mock()
+        files_page2.json.return_value = large_files[100:200]
+
+        files_page3 = Mock()
+        files_page3.json.return_value = large_files[200:300]
+
+        files_page4 = Mock()
+        files_page4.json.return_value = large_files[300:400]
+
+        files_page5 = Mock()
+        files_page5.json.return_value = large_files[400:500]
+
+        # Empty page to stop pagination
+        files_page6 = Mock()
+        files_page6.json.return_value = []
+
+        # No diff_response needed - diff is now constructed from files
+        mock_get.side_effect = [pr_response, files_page1, files_page2, files_page3, files_page4, files_page5, files_page6]
         
         # Claude handles it gracefully
         mock_run.side_effect = [

From e1fb8a687c52ddb7ee8ef197311552365d833ee4 Mon Sep 17 00:00:00 2001
From: HungKNguyen <hungnguyenkhac7@gmail.com>
Date: Fri, 13 Feb 2026 21:30:41 +0700
Subject: [PATCH 2/4] change max size limit

---
 action.yml                        |  20 +++++-
 claudecode/constants.py           |   5 ++
 claudecode/github_action_audit.py | 100 +++++++++++++++++++-----------
 claudecode/logger.py              |   9 ++-
 claudecode/test_github_client.py  |  63 +++++++++++++++++--
 5 files changed, 152 insertions(+), 45 deletions(-)

diff --git a/action.yml b/action.yml
index 680e15e..a68a207 100644
--- a/action.yml
+++ b/action.yml
@@ -64,10 +64,25 @@ inputs:
     required: false
     default: ''
 
+  max-diff-chars:
+    description: |
+      Maximum diff characters to embed in prompt (default: 400000 = 400k chars).
+      Larger diffs use agentic file reading instead. Set to 0 to always use agentic mode.
+
+      IMPORTANT: For large limits (>400k), use a model with larger context like:
+      - claude-sonnet-4-5-20250929 (1M context) for diffs up to 800k chars
+      - Set via 'claude-model' input parameter
+
+      Note: ~400k chars fits comfortably in 200k token models (Opus/Sonnet standard).
+    required: false
+    default: '400000'
+
   max-diff-lines:
-    description: 'Maximum diff lines to embed in prompt. Larger diffs use agentic file reading instead. Set to 0 to always use agentic mode. Default: 5000'
+    description: |
+      [DEPRECATED] Use 'max-diff-chars' instead. This converts lines to chars (line * 80).
+      Kept for backward compatibility only. If both set, max-diff-chars takes precedence.
     required: false
-    default: '5000'
+    default: ''
 
   trigger-on-open:
     description: 'Run review when PR is first opened'
@@ -423,6 +438,7 @@ runs:
         CUSTOM_SECURITY_SCAN_INSTRUCTIONS: ${{ inputs.custom-security-scan-instructions }}
         CLAUDE_MODEL: ${{ inputs.claude-model }}
         CLAUDECODE_TIMEOUT: ${{ inputs.claudecode-timeout }}
+        MAX_DIFF_CHARS: ${{ inputs.max-diff-chars }}
         MAX_DIFF_LINES: ${{ inputs.max-diff-lines }}
         ACTION_PATH: ${{ github.action_path }}
       run: |
diff --git a/claudecode/constants.py b/claudecode/constants.py
index bbf7988..a1e0e32 100644
--- a/claudecode/constants.py
+++ b/claudecode/constants.py
@@ -13,6 +13,11 @@
 # Token Limits
 PROMPT_TOKEN_LIMIT = 16384  # 16k tokens max for claude-opus-4
 
+# Diff Construction Limits
+DEFAULT_MAX_DIFF_CHARS = 400000  # 400k characters (suitable for 200k token models)
+# Conversion factor for deprecated MAX_DIFF_LINES -> MAX_DIFF_CHARS
+CHARS_PER_LINE_ESTIMATE = 80  # Average characters per line for conversion
+
 # Exit Codes
 EXIT_SUCCESS = 0
 EXIT_GENERAL_ERROR = 1
diff --git a/claudecode/github_action_audit.py b/claudecode/github_action_audit.py
index a55e205..001ace7 100644
--- a/claudecode/github_action_audit.py
+++ b/claudecode/github_action_audit.py
@@ -24,7 +24,9 @@
     DEFAULT_CLAUDE_MODEL,
     EXIT_SUCCESS,
     EXIT_GENERAL_ERROR,
-    SUBPROCESS_TIMEOUT
+    SUBPROCESS_TIMEOUT,
+    DEFAULT_MAX_DIFF_CHARS,
+    CHARS_PER_LINE_ESTIMATE
 )
 from claudecode.logger import get_logger
 from claudecode.review_schema import REVIEW_OUTPUT_SCHEMA
@@ -126,26 +128,26 @@ def __init__(self):
             print(f"[Debug] User excluded directories: {user_excluded_dirs}", file=sys.stderr)
         print(f"[Debug] Total excluded directories: {self.excluded_dirs}", file=sys.stderr)
     
-    def get_pr_data(self, repo_name: str, pr_number: int, max_diff_lines: int = 5000) -> Dict[str, Any]:
+    def get_pr_data(self, repo_name: str, pr_number: int, max_diff_chars: int = 400000) -> Dict[str, Any]:
         """Get PR metadata and construct diff in one pass with early termination.
 
         Fetches files page-by-page while building the diff. Stops fetching when
-        max_diff_lines is reached, saving API calls for large PRs.
+        max_diff_chars is reached, saving API calls for large PRs.
 
-        When max_diff_lines == 0 (agentic mode), only fetches PR metadata without files,
+        When max_diff_chars == 0 (agentic mode), only fetches PR metadata without files,
         as Claude will use git commands to explore changes.
 
         Args:
             repo_name: Repository name in format "owner/repo"
             pr_number: Pull request number
-            max_diff_lines: Maximum diff lines (0 = agentic mode, don't fetch files)
+            max_diff_chars: Maximum diff characters (0 = agentic mode, don't fetch files)
 
         Returns:
             Dictionary containing:
             - PR metadata (number, title, body, user, etc.)
             - files: List of files fetched (empty if agentic mode)
             - pr_diff: Constructed diff text (empty if agentic mode)
-            - is_truncated: Whether we stopped fetching due to max_lines
+            - is_truncated: Whether we stopped fetching due to max_chars
             - diff_stats: {files_included, total_files from PR metadata}
         """
         # Get PR metadata first (contains total changed_files count)
@@ -158,8 +160,8 @@ def get_pr_data(self, repo_name: str, pr_number: int, max_diff_lines: int = 5000
         total_files_in_pr = pr_metadata['changed_files']
 
         # Agentic mode: Don't fetch files, Claude uses git commands
-        if max_diff_lines == 0:
-            logger.info(f"Agentic mode enabled (MAX_DIFF_LINES=0): Skipping file fetch for PR #{pr_number} ({total_files_in_pr} files in PR)")
+        if max_diff_chars == 0:
+            logger.info(f"Agentic mode enabled (MAX_DIFF_CHARS=0): Skipping file fetch for PR #{pr_number} ({total_files_in_pr} files in PR)")
             return {
                 'number': pr_metadata['number'],
                 'title': pr_metadata['title'],
@@ -186,15 +188,16 @@ def get_pr_data(self, repo_name: str, pr_number: int, max_diff_lines: int = 5000
                 # Diff data (empty for agentic mode)
                 'pr_diff': '',
                 'is_truncated': False,
-                'diff_stats': {'files_included': 0, 'total_files': total_files_in_pr}
+                'diff_stats': {'files_included': 0, 'total_files': total_files_in_pr, 'included_file_list': []}
             }
 
         # Setup for incremental diff construction
         diff_sections = []
-        current_lines = 0
+        current_chars = 0
         files_with_patches = 0
         is_truncated = False
         fetched_files = []
+        included_files = []  # Track which files made it into the diff
 
         # Fetch files page-by-page, building diff incrementally
         page = 1
@@ -240,20 +243,21 @@ def get_pr_data(self, repo_name: str, pr_number: int, max_diff_lines: int = 5000
 
                     # Build diff section
                     diff_section = self._format_file_diff(file_obj)
-                    section_lines = diff_section.count('\n')
+                    section_chars = len(diff_section)
 
                     # Check if adding this would exceed limit
-                    if current_lines + section_lines > max_diff_lines:
+                    if current_chars + section_chars > max_diff_chars:
                         is_truncated = True
                         # Early termination - stop fetching more files
-                        logger.info(f"Diff truncated at {files_with_patches} files ({current_lines} lines, would exceed max {max_diff_lines})")
+                        logger.info(f"Diff truncated at {files_with_patches} files ({current_chars} chars, would exceed max {max_diff_chars})")
                         break
 
                     # Add to diff
                     diff_sections.append(diff_section)
-                    current_lines += section_lines
+                    current_chars += section_chars
                     files_with_patches += 1
-                    logger.debug(f"Added {filename} to diff ({section_lines} lines, total: {current_lines}/{max_diff_lines})")
+                    included_files.append(filename)
+                    logger.debug(f"Added {filename} to diff ({section_chars} chars, total: {current_chars}/{max_diff_chars})")
 
                 # If truncated, stop pagination
                 if is_truncated:
@@ -274,16 +278,17 @@ def get_pr_data(self, repo_name: str, pr_number: int, max_diff_lines: int = 5000
         logger.info(f"Fetched {len(fetched_files)} files for PR #{pr_number} (total in PR: {total_files_in_pr})")
 
         pr_diff = '\n'.join(diff_sections)
-        diff_line_count = len(pr_diff.splitlines()) if pr_diff else 0
+        diff_char_count = len(pr_diff)
 
         diff_stats = {
             'files_included': files_with_patches,
-            'total_files': total_files_in_pr  # From PR metadata, not fetched count
+            'total_files': total_files_in_pr,  # From PR metadata, not fetched count
+            'included_file_list': included_files  # Actual list of files in the diff
         }
 
         # Log diff construction summary
         skipped_files = len(fetched_files) - files_with_patches
-        logger.info(f"Diff construction complete: {files_with_patches} files in diff ({diff_line_count} lines), "
+        logger.info(f"Diff construction complete: {files_with_patches} files in diff ({diff_char_count} chars), "
                    f"{skipped_files} files skipped (no patch), {total_files_in_pr - len(fetched_files)} files not fetched")
 
         return {
@@ -891,16 +896,32 @@ def main():
             print(json.dumps({'error': f'Claude Code not available: {claude_error}'}))
             sys.exit(EXIT_GENERAL_ERROR)
         
-        # Parse max diff lines setting
-        max_diff_lines_str = os.environ.get('MAX_DIFF_LINES', '5000')
-        try:
-            max_diff_lines = int(max_diff_lines_str)
-        except ValueError:
-            max_diff_lines = 5000
+        # Parse max diff chars setting (with backward compatibility for max_diff_lines)
+        max_diff_chars_str = os.environ.get('MAX_DIFF_CHARS', '')
+        max_diff_lines_str = os.environ.get('MAX_DIFF_LINES', '')
+
+        if max_diff_chars_str:
+            # New parameter takes precedence
+            try:
+                max_diff_chars = int(max_diff_chars_str)
+            except ValueError:
+                max_diff_chars = DEFAULT_MAX_DIFF_CHARS
+        elif max_diff_lines_str:
+            # Backward compatibility: convert lines to chars
+            try:
+                max_diff_lines = int(max_diff_lines_str)
+                max_diff_chars = max_diff_lines * CHARS_PER_LINE_ESTIMATE
+                logger.info(f"[DEPRECATED] MAX_DIFF_LINES used ({max_diff_lines} lines). "
+                           f"Converted to {max_diff_chars} chars. Please use MAX_DIFF_CHARS instead.")
+            except ValueError:
+                max_diff_chars = DEFAULT_MAX_DIFF_CHARS
+        else:
+            # Use default
+            max_diff_chars = DEFAULT_MAX_DIFF_CHARS
 
         # Get PR data with integrated diff construction
         try:
-            pr_data = github_client.get_pr_data(repo_name, pr_number, max_diff_lines)
+            pr_data = github_client.get_pr_data(repo_name, pr_number, max_diff_chars)
         except Exception as e:
             print(json.dumps({'error': f'Failed to fetch PR data: {str(e)}'}))
             sys.exit(EXIT_GENERAL_ERROR)
@@ -910,21 +931,21 @@ def main():
         is_truncated = pr_data['is_truncated']
         diff_stats = pr_data['diff_stats']
 
-        diff_line_count = len(pr_diff.splitlines()) if pr_diff else 0
+        diff_char_count = len(pr_diff)
 
         # Determine review mode
-        force_agentic_mode = max_diff_lines == 0  # User explicitly requested agentic mode
+        force_agentic_mode = max_diff_chars == 0  # User explicitly requested agentic mode
         use_partial_diff_mode = is_truncated and not force_agentic_mode
 
         # Log mode selection
         if force_agentic_mode:
-            logger.info(f"Using full agentic mode (MAX_DIFF_LINES=0)")
+            logger.info(f"Using full agentic mode (MAX_DIFF_CHARS=0)")
         elif use_partial_diff_mode:
             included = diff_stats['files_included']
             total = diff_stats['total_files']
-            logger.info(f"Using partial diff mode ({included}/{total} files, {diff_line_count} lines)")
+            logger.info(f"Using partial diff mode ({included}/{total} files, {diff_char_count:,} chars)")
         else:
-            logger.info(f"Using full diff mode ({diff_line_count} lines, {diff_stats['files_included']} files)")
+            logger.info(f"Using full diff mode ({diff_char_count:,} chars, {diff_stats['files_included']} files)")
 
         # Fetch PR comments and build review context
         review_context = None
@@ -1052,19 +1073,26 @@ def severity_counts(findings_list):
 
         high_count, medium_count, low_count = severity_counts(kept_findings)
 
-        # Calculate files_reviewed from pr_summary.file_changes
-        files_reviewed = 0
+        # Calculate files_reviewed by merging:
+        # 1. Files included in embedded diff (from diff_stats)
+        # 2. Additional files mentioned in pr_summary (Claude explored via git commands)
+        all_reviewed_files = set()
+
+        # Add files from embedded diff
+        if diff_stats and 'included_file_list' in diff_stats:
+            all_reviewed_files.update(diff_stats['included_file_list'])
+
+        # Add files from pr_summary (files Claude explored beyond the embedded diff)
         if isinstance(pr_summary_from_review, dict):
             file_changes = pr_summary_from_review.get('file_changes', [])
             if isinstance(file_changes, list):
-                # Count unique files from all 'files' arrays
-                all_files = set()
                 for entry in file_changes:
                     if isinstance(entry, dict):
                         files_list = entry.get('files', [])
                         if isinstance(files_list, list):
-                            all_files.update(files_list)
-                files_reviewed = len(all_files)
+                            all_reviewed_files.update(files_list)
+
+        files_reviewed = len(all_reviewed_files)
 
         # Prepare output
         output = {
diff --git a/claudecode/logger.py b/claudecode/logger.py
index 018408b..1f30d3a 100644
--- a/claudecode/logger.py
+++ b/claudecode/logger.py
@@ -43,6 +43,11 @@ def get_logger(name: str) -> logging.Logger:
         formatter = logging.Formatter(format_str)
         handler.setFormatter(formatter)
         logger.addHandler(handler)
-        logger.setLevel(logging.INFO)
-    
+
+        # Enable DEBUG logging in eval mode for detailed file-by-file tracking
+        if os.environ.get('EVAL_MODE') == '1':
+            logger.setLevel(logging.DEBUG)
+        else:
+            logger.setLevel(logging.INFO)
+
     return logger
\ No newline at end of file
diff --git a/claudecode/test_github_client.py b/claudecode/test_github_client.py
index 709aaac..fa9c164 100644
--- a/claudecode/test_github_client.py
+++ b/claudecode/test_github_client.py
@@ -349,7 +349,7 @@ def test_get_pr_data_builds_diff_incrementally(self, mock_get):
 
         with patch.dict(os.environ, {'GITHUB_TOKEN': 'test-token'}):
             client = GitHubActionClient()
-            result = client.get_pr_data('owner/repo', 123, max_diff_lines=5000)
+            result = client.get_pr_data('owner/repo', 123, max_diff_chars=400000)
 
         # Verify diff was constructed
         assert 'pr_diff' in result
@@ -367,6 +367,7 @@ def test_get_pr_data_builds_diff_incrementally(self, mock_get):
         assert result['is_truncated'] == False
         assert result['diff_stats']['files_included'] == 2
         assert result['diff_stats']['total_files'] == 2
+        assert result['diff_stats']['included_file_list'] == ['added.py', 'modified.py']
 
     @patch('requests.get')
     def test_get_pr_data_truncates_at_max_lines(self, mock_get):
@@ -405,8 +406,8 @@ def test_get_pr_data_truncates_at_max_lines(self, mock_get):
 
         with patch.dict(os.environ, {'GITHUB_TOKEN': 'test-token'}):
             client = GitHubActionClient()
-            # Very low max_lines to trigger truncation
-            result = client.get_pr_data('owner/repo', 123, max_diff_lines=50)
+            # Very low max_chars to trigger truncation (each file ~150 chars)
+            result = client.get_pr_data('owner/repo', 123, max_diff_chars=200)
 
         # Should be truncated
         assert result['is_truncated'] == True
@@ -416,7 +417,7 @@ def test_get_pr_data_truncates_at_max_lines(self, mock_get):
 
     @patch('requests.get')
     def test_get_pr_data_agentic_mode_skips_files(self, mock_get):
-        """Test that max_diff_lines=0 (agentic mode) doesn't fetch files."""
+        """Test that max_diff_chars=0 (agentic mode) doesn't fetch files."""
         pr_response = Mock()
         pr_response.json.return_value = {
             'number': 123,
@@ -437,7 +438,7 @@ def test_get_pr_data_agentic_mode_skips_files(self, mock_get):
 
         with patch.dict(os.environ, {'GITHUB_TOKEN': 'test-token'}):
             client = GitHubActionClient()
-            result = client.get_pr_data('owner/repo', 123, max_diff_lines=0)
+            result = client.get_pr_data('owner/repo', 123, max_diff_chars=0)
 
         # Should only call PR metadata endpoint, not files endpoint
         assert mock_get.call_count == 1
@@ -450,6 +451,58 @@ def test_get_pr_data_agentic_mode_skips_files(self, mock_get):
         assert result['diff_stats']['total_files'] == 50  # From PR metadata
 
 
+class TestBackwardCompatibility:
+    """Test backward compatibility with deprecated MAX_DIFF_LINES."""
+
+    @patch('requests.get')
+    def test_max_diff_lines_converts_to_chars(self, mock_get):
+        """Test that max_diff_lines parameter still works via character conversion."""
+        # This tests backward compatibility: max_diff_lines * 80 = max_diff_chars
+        # If we pass max_diff_lines=10, it should convert to 800 chars
+
+        pr_response = Mock()
+        pr_response.json.return_value = {
+            'number': 123,
+            'title': 'Test PR',
+            'body': '',
+            'user': {'login': 'testuser'},
+            'created_at': '2024-01-01T00:00:00Z',
+            'updated_at': '2024-01-01T01:00:00Z',
+            'state': 'open',
+            'head': {'ref': 'feature', 'sha': 'abc123', 'repo': {'full_name': 'owner/repo'}},
+            'base': {'ref': 'main', 'sha': 'def456'},
+            'additions': 10,
+            'deletions': 5,
+            'changed_files': 1
+        }
+
+        files_response = Mock()
+        # Create file with ~100 chars per section to test truncation
+        files_response.json.return_value = [
+            {
+                'filename': 'short.txt',
+                'status': 'modified',
+                'additions': 1,
+                'deletions': 1,
+                'changes': 2,
+                'patch': '@@ -1,1 +1,1 @@\n-old\n+new'  # ~50 chars
+            }
+        ]
+
+        empty_response = Mock()
+        empty_response.json.return_value = []
+
+        mock_get.side_effect = [pr_response, files_response, empty_response]
+
+        with patch.dict(os.environ, {'GITHUB_TOKEN': 'test-token'}):
+            client = GitHubActionClient()
+            # Using character limit directly (new way)
+            result = client.get_pr_data('owner/repo', 123, max_diff_chars=100)
+
+        # Should include the file since diff is ~85 chars
+        assert result['diff_stats']['files_included'] == 1
+
+
 class TestGitHubAPIIntegration:
     """Test GitHub API integration scenarios."""
 

From 7ed380d3c2b76cd46fdb57b4f91723c134b78210 Mon Sep 17 00:00:00 2001
From: HungKNguyen <hungnguyenkhac7@gmail.com>
Date: Fri, 13 Feb 2026 21:53:32 +0700
Subject: [PATCH 3/4] update readme.md

---
 README.md | 62 ++++++++++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 61 insertions(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 19d6591..aadcd47 100644
--- a/README.md
+++ b/README.md
@@ -65,8 +65,10 @@ This action is not hardened against prompt injection attacks and should only be
 | `comment-pr` | Whether to comment on PRs with findings | `true` | No |
 | `upload-results` | Whether to upload results as artifacts | `true` | No |
 | `exclude-directories` | Comma-separated list of directories to exclude from scanning | None | No |
-| `claude-model` | Claude [model name](https://docs.anthropic.com/en/docs/about-claude/models/overview#model-names) to use. Defaults to Opus 4.5. | `claude-opus-4-5-20251101` | No |
+| `claude-model` | Claude [model name](https://docs.anthropic.com/en/docs/about-claude/models/overview#model-names) to use. Defaults to Opus 4.5. For large PRs (>400k char diffs), consider using `claude-sonnet-4-5-20250929` (1M context). | `claude-opus-4-5-20251101` | No |
 | `claudecode-timeout` | Timeout for ClaudeCode analysis in minutes | `20` | No |
+| `max-diff-chars` | Maximum diff characters to include in prompt. Set to `0` for agentic mode (Claude uses git commands to explore). See [Diff Size Configuration](#diff-size-configuration) below. | `400000` | No |
+| `max-diff-lines` | **[DEPRECATED]** Use `max-diff-chars` instead. Converts lines to chars (line × 80). | None | No |
 | `run-every-commit` | Run ClaudeCode on every commit (skips cache check). Warning: May increase false positives on PRs with many commits. **Deprecated**: Use `trigger-on-commit` instead. | `false` | No |
 | `trigger-on-open` | Run review when PR is first opened | `true` | No |
 | `trigger-on-commit` | Run review on every new commit | `false` | No |
@@ -88,6 +90,64 @@ This action is not hardened against prompt injection attacks and should only be
 | `findings-count` | Total number of code review findings |
 | `results-file` | Path to the results JSON file |
 
+### Diff Size Configuration
+
+The action handles PRs of any size using three review modes:
+
+#### Review Modes
+
+1. **Full Diff Mode** (default for small PRs)
+   - Entire diff embedded in prompt
+   - Fastest and most comprehensive
+   - Works for diffs up to ~400k characters
+
+2. **Partial Diff Mode** (automatic for large PRs)
+   - First N files embedded in prompt
+   - Claude uses git commands to explore remaining files
+   - Balances embedded context with agentic exploration
+
+3. **Full Agentic Mode** (set `max-diff-chars: 0`)
+   - No diff embedded
+   - Claude uses git commands to explore all changes
+   - Most flexible for massive PRs (1000+ files)
+
+#### Configuration
+
+**`max-diff-chars`** - Maximum diff characters to embed (default: 400,000)
+
+```yaml
+# Default: 400k chars (fits in 200k token models)
+- uses: PSPDFKit-labs/nutrient-code-review@main
+  with:
+    claude-api-key: ${{ secrets.CLAUDE_API_KEY }}
+    max-diff-chars: 400000  # ~100k tokens
+
+# Large PRs: Use 1M context model with higher limit
+- uses: PSPDFKit-labs/nutrient-code-review@main
+  with:
+    claude-api-key: ${{ secrets.CLAUDE_API_KEY }}
+    claude-model: claude-sonnet-4-5-20250929  # 1M context
+    max-diff-chars: 800000  # ~200k tokens
+
+# Always use agentic mode (no embedded diff)
+- uses: PSPDFKit-labs/nutrient-code-review@main
+  with:
+    claude-api-key: ${{ secrets.CLAUDE_API_KEY }}
+    max-diff-chars: 0  # Force agentic exploration
+```
+
+**Model Selection for Large Diffs:**
+
+| Diff Size | Recommended Model | Context Window |
+|-----------|-------------------|----------------|
+| < 400k chars | `claude-opus-4-5-20251101` (default) | 200k tokens |
+| 400k - 800k chars | `claude-sonnet-4-5-20250929` | 1M tokens |
+| > 800k chars | Set `max-diff-chars: 0` (agentic mode) | Any model |
+
+**Backward Compatibility:**
+
+`max-diff-lines` is deprecated but still supported. If used, it converts to characters: `max_diff_chars = max_diff_lines × 80`
+
 ### Re-Review Trigger Configuration
 
 The action supports multiple triggers for when reviews should be run, allowing fine-grained control over bot behavior:

From cee72b0bfa98440953bf825739932b8b475bd95f Mon Sep 17 00:00:00 2001
From: HungKNguyen <hungnguyenkhac7@gmail.com>
Date: Fri, 13 Feb 2026 22:10:20 +0700
Subject: [PATCH 4/4] polish

---
 claudecode/logger.py             | 2 +-
 claudecode/test_main_function.py | 3 ++-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/claudecode/logger.py b/claudecode/logger.py
index 1f30d3a..8fa0f52 100644
--- a/claudecode/logger.py
+++ b/claudecode/logger.py
@@ -50,4 +50,4 @@ def get_logger(name: str) -> logging.Logger:
         else:
             logger.setLevel(logging.INFO)
 
-    return logger
\ No newline at end of file
+    return logger
diff --git a/claudecode/test_main_function.py b/claudecode/test_main_function.py
index b4207de..b2d9cd4 100644
--- a/claudecode/test_main_function.py
+++ b/claudecode/test_main_function.py
@@ -40,7 +40,8 @@ def make_pr_data(pr_diff='diff', is_truncated=False, files_included=1, total_fil
         'is_truncated': is_truncated,
         'diff_stats': {
             'files_included': files_included,
-            'total_files': total_files
+            'total_files': total_files,
+            'included_file_list': [f['filename'] for f in (files or [])] if files else []
         }
     }
     base_data.update(kwargs)