# RuffNormalizer._convert_ruff_finding() - Lines ~232
location = Location(
file="/opt/andela/genai/patchpro-bot-test-bafecd1/src/patchpro_bot/__init__.py", # β ABSOLUTE PATH
line=3,
column=1,
...
)Problem: Analyzer stores ABSOLUTE PATHS in findings.json
- Expected:
"src/patchpro_bot/__init__.py" - Actual:
"/opt/andela/genai/patchpro-bot-test-bafecd1/src/patchpro_bot/__init__.py"
# build_batch_diff_prompt()
for file_path, findings in file_fixes.items():
prompt += f"""
## File: `{file_path}` # β LLM sees ABSOLUTE PATH
"""Problem: LLM receives absolute paths in the prompt:
## File: `/opt/andela/genai/patchpro-bot-test-bafecd1/src/patchpro_bot/__init__.py`
{
"patches": [
{
"file_path": "/opt/andela/genai/patchpro-bot-test-bafecd1/src/patchpro_bot/__init__.py",
"diff_content": "diff --git a/analyzer.py b/analyzer.py\n...",
"summary": "Fix imports"
}
]
}Problem: LLM returns ABSOLUTE PATH but generates diff with TRUNCATED PATH
- Why? LLM tries to be "helpful" and normalize paths, but does it incorrectly
- Result:
file_pathis absolute, butdiff_contenthas truncated path
def generate_diff_from_patch(self, diff_patch: DiffPatch) -> str:
file_path = diff_patch.file_path # β Absolute: "/opt/.../analyzer.py"
relative_path = self._make_relative_path(file_path) # β
Tries to fix it
# BUT the diff_content already has truncated paths from LLM!
if diff_content.startswith('diff --git'):
# Lines 185-197: Tries to fix headers
for line in lines:
if line.startswith('diff --git '):
fixed_lines.append(f'diff --git a/{relative_path} b/{relative_path}')Problem: Even though _make_relative_path() exists, the LLM's diff already has wrong paths
- LLM generated:
diff --git a/analyzer.py b/analyzer.py - Should be:
diff --git a/src/patchpro_bot/analyzer.py b/src/patchpro_bot/analyzer.py
def _get_git_root(self, file_path: Optional[str] = None) -> Optional[Path]:
try:
# β BUG HERE - Uses file's parent directory as cwd!
cwd = Path(file_path).parent if file_path else Path.cwd()
# In multiprocessing context, this executes from WRONG directory
result = subprocess.run(
['git', 'rev-parse', '--show-toplevel'],
cwd=str(cwd), # β Points to file's directory, not repo root!
...
)Problem in Multiprocessing Context:
- Single file:
cwd = /opt/andela/genai/patchpro-bot-test-bafecd1β Works - Many files:
cwd = /opt/andela/genai/patchpro-bot-test-bafecd1/src/patchpro_botβ Wrong! - Result:
git rev-parsereturns correct root, but path calculation fails
- Location:
analyzer.pylines ~232 (RuffNormalizer) and ~388 (SemgrepNormalizer) - Code: Uses
ruff_finding['filename']directly without normalization - Impact: findings.json has absolute paths instead of relative
- Location:
prompts.pyline 192 - prompt includes absolute paths - Code:
prompt += f"## File:{file_path}" - Impact: LLM sees absolute path, tries to normalize, generates wrong diff headers
-
Single file/sync mode:
- Process runs from repo root
Path.cwd()=/opt/andela/genai/patchpro-bot-test-bafecd1- Absolute paths get normalized correctly
-
Many files/async mode:
- Multiprocessing spawns separate processes
- Each process may have different working directory
Path.cwd()might be/opt/andela/genai/patchpro-bot-test-bafecd1/src/patchpro_bot- Path normalization fails or produces truncated paths
Rationale: Prevent absolute paths from EVER entering the system
# In RuffNormalizer._convert_ruff_finding() - Line ~232
location = Location(
file=self._normalize_file_path(ruff_finding['filename']), # β
Fix here
line=location_data["row"],
...
)
def _normalize_file_path(self, file_path: str) -> str:
"""Convert absolute path to relative path from git root."""
if not Path(file_path).is_absolute():
return file_path # Already relative
# Get git root WITHOUT using cwd parameter (avoid multiprocessing bug)
try:
result = subprocess.run(
['git', 'rev-parse', '--show-toplevel'],
# β
Don't pass cwd - use current process directory
capture_output=True,
text=True,
check=True,
)
git_root = Path(result.stdout.strip())
abs_path = Path(file_path).resolve()
return str(abs_path.relative_to(git_root))
except:
return file_path # FallbackBenefits:
- β Single point of normalization
- β Prevents absolute paths in findings.json
- β LLM receives clean relative paths
- β No downstream fixes needed
- β Works in all modes (sync, async, multiprocessing)
Rationale: Fixing symptoms downstream
# Would need to fix _get_git_root() to not use cwd=file_dir
def _get_git_root(self) -> Optional[Path]:
result = subprocess.run(
['git', 'rev-parse', '--show-toplevel'],
# β
Remove cwd parameter
capture_output=True,
...
)Problems:
- β Findings.json still has absolute paths
- β LLM still sees absolute paths
- β Two normalization points (analyzer + generator)
- β Harder to debug
- β More likely to have edge cases
- Worktree:
/opt/andela/genai/patchpro-bot-test-bafecd1 - Commit: bafecd1 (20 files changed)
- Trigger:
git commitβ post-commit hook β async analysis - Tools: Ruff + Semgrep with LLM patch generation
# findings.json shows absolute paths
"file": "/opt/andela/genai/patchpro-bot-test-bafecd1/src/patchpro_bot/__init__.py"
# patch_combined_20251004_214105.diff shows truncated paths
diff --git a/analyzer.py b/analyzer.py # β Should be src/patchpro_bot/analyzer.py
# git apply fails
error: analyzer.py: No such file or directorygit apply --check .patchpro/patch_combined_*.diff
# Expected: Success
# Actual: Error - file not found- Add
_normalize_file_path()method to RuffNormalizer - Add
_normalize_file_path()method to SemgrepNormalizer - Update
Locationcreation to use normalized paths - Remove
cwd=file_dirparameter (if exists) - Test with single file (verify still works)
- Test with 20 files (verify bug fixed)
- Re-run test in worktree
- Verify findings.json has relative paths
- Verify patches have correct paths
- Verify
git applysucceeds
- Consider removing
_make_relative_path()from generator.py (redundant) - Or keep it as defensive programming (belt + suspenders)
/opt/andela/genai/patchpro-bot-agent-dev/src/patchpro_bot/analyzer.py- Lines ~232, ~388/opt/andela/genai/patchpro-bot-agent-dev/src/patchpro_bot/diff/generator.py- Line 45
cli.py- Line 1039 (multiprocessing.Process spawn)agent_core.py- Line 863 (patch writing)llm/prompts.py- Line 192 (prompt building)llm/response_parser.py- Line 48 (DiffPatch dataclass)diff/patch_writer.py- Line 115 (combined patch creation)
- Blocks patch application in production async mode
- Only manifests under load (many files)
- Hard to reproduce in manual testing
- β Affects: Async/multiprocessing mode with multiple files
- β Does NOT affect: Single file commits, manual analysis
- Developers see patches generated but can't apply them
git applyfails with "No such file or directory"- Requires manual intervention
- Test with realistic workloads: Single file tests miss multiprocessing bugs
- Avoid
cwdparameters: Especially in multiprocessing contexts - Normalize early: Fix data at entry point, not at exit point
- Git hooks are essential: Manual commands don't reproduce real workflow
- Absolute vs relative paths: Critical for diff generation
Status: Bug confirmed and analyzed
Next Step: Implement Option A (normalize in analyzer.py)
Issue: #12
PR: #11