diff --git a/pyproject.toml b/pyproject.toml index 640c1d2..d5e5cd2 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "hatchling.build" [project] name = "slopometry" -version = "2026.3.11" +version = "2026.3.20" description = "Opinionated code quality metrics for code agents and humans" readme = "README.md" requires-python = ">=3.13" diff --git a/src/slopometry/core/code_analyzer.py b/src/slopometry/core/code_analyzer.py index c77ec98..ce61582 100644 --- a/src/slopometry/core/code_analyzer.py +++ b/src/slopometry/core/code_analyzer.py @@ -117,7 +117,7 @@ def analyze_file(self, file_path: Path) -> FileAnalysisResult: tokens=count_file_tokens(file_path), ) except Exception as e: - logger.warning("Failed to analyze %s: %s", file_path, e) + logger.debug("Failed to analyze %s: %s", file_path, e) return FileAnalysisResult( path=str(file_path), complexity=0, diff --git a/src/slopometry/core/compact_analyzer.py b/src/slopometry/core/compact_analyzer.py index 1070f6d..1a357e7 100644 --- a/src/slopometry/core/compact_analyzer.py +++ b/src/slopometry/core/compact_analyzer.py @@ -82,7 +82,7 @@ def analyze_transcript(self, transcript_path: Path) -> list[CompactEvent]: compact_events.append(compact_event) except OSError as e: - logger.warning(f"Failed to read transcript file {transcript_path}: {e}") + logger.debug(f"Failed to read transcript file {transcript_path}: {e}") return compact_events @@ -110,13 +110,13 @@ def _create_compact_event( timestamp_str = boundary.timestamp or summary.timestamp if not timestamp_str: - logger.warning(f"Compact event at line {line_number} missing timestamp, skipping") + logger.debug(f"Compact event at line {line_number} missing timestamp, skipping") return None try: timestamp = datetime.fromisoformat(timestamp_str.replace("Z", "+00:00")) except ValueError: - logger.warning(f"Compact event at line {line_number} has invalid timestamp '{timestamp_str}', skipping") + logger.debug(f"Compact event at line {line_number} has invalid timestamp '{timestamp_str}', skipping") return None return CompactEvent( @@ -179,7 +179,7 @@ def _transcript_matches_project(transcript_path: Path, working_directory: Path) return False return Path(cwd).resolve() == working_directory except (OSError, json.JSONDecodeError) as e: - logger.warning(f"Failed to read transcript {transcript_path} for project matching: {e}") + logger.debug(f"Failed to read transcript {transcript_path} for project matching: {e}") return False @@ -217,7 +217,7 @@ def find_compact_instructions(transcript_path: Path, compact_line_number: int, l continue except OSError as e: - logger.warning(f"Failed to read transcript {transcript_path} for compact instructions: {e}") + logger.debug(f"Failed to read transcript {transcript_path} for compact instructions: {e}") return None diff --git a/src/slopometry/core/complexity_analyzer.py b/src/slopometry/core/complexity_analyzer.py index 27f95fe..ba730e3 100644 --- a/src/slopometry/core/complexity_analyzer.py +++ b/src/slopometry/core/complexity_analyzer.py @@ -245,7 +245,7 @@ def _build_files_by_loc(self, python_files: list[Path], target_dir: Path) -> dic relative_path = self._get_relative_path(file_path, target_dir) files_by_loc[relative_path] = code_loc except (OSError, UnicodeDecodeError) as e: - logger.warning(f"Skipping unreadable file {file_path}: {e}") + logger.debug(f"Skipping unreadable file {file_path}: {e}") continue return files_by_loc @@ -294,7 +294,7 @@ def _analyze_files_parallel(self, files: list[Path], max_workers: int | None = N results.append(result) except Exception as e: file_path = futures[future] - logger.warning(f"Failed to analyze {file_path}: {e}") + logger.debug(f"Failed to analyze {file_path}: {e}") results.append( FileAnalysisResult( path=str(file_path), diff --git a/src/slopometry/core/hook_handler.py b/src/slopometry/core/hook_handler.py index cecd5c0..8f88b53 100644 --- a/src/slopometry/core/hook_handler.py +++ b/src/slopometry/core/hook_handler.py @@ -13,6 +13,7 @@ from slopometry.core.lock import SlopometryLock from slopometry.core.models.complexity import ComplexityDelta, ExtendedComplexityMetrics from slopometry.core.models.hook import ( + FeedbackCacheState, HookEvent, HookEventType, HookInputUnion, @@ -262,23 +263,18 @@ def _get_feedback_cache_path(working_directory: str) -> Path: return cache_dir / "feedback_cache.json" -def _compute_feedback_cache_key(working_directory: str, edited_files: set[str], feedback_hash: str) -> str: - """Compute a cache key for the current state. +def _compute_working_tree_cache_key(working_directory: str) -> str: + """Compute a cache key based solely on working tree state. - Uses language-aware change detection to avoid cache invalidation from - non-source file changes (like uv.lock, submodules, build artifacts, etc.). - - The languages parameter defaults to None (all supported languages). - Currently only Python is supported; future languages will be auto-detected - via LanguageDetector when added to the registry. + The cache key depends only on the git commit and source file contents, + making it stable across sessions and independent of smell analysis output. + This ensures the hook fires exactly once per code state change. Args: working_directory: Path to the working directory - edited_files: Set of edited file paths - feedback_hash: Hash of the feedback content Returns: - Cache key string + Cache key string (BLAKE2b hex digest) """ tracker = GitTracker(Path(working_directory)) git_state = tracker.get_git_state() @@ -288,42 +284,38 @@ def _compute_feedback_cache_key(working_directory: str, edited_files: set[str], has_source_changes = bool(wt_calculator._get_modified_source_files_from_git()) working_tree_hash = wt_calculator.calculate_working_tree_hash(commit_sha) if has_source_changes else "clean" - files_key = ",".join(sorted(edited_files)) - key_parts = f"{commit_sha}:{working_tree_hash}:{files_key}:{feedback_hash}" + key_parts = f"{commit_sha}:{working_tree_hash}" return hashlib.blake2b(key_parts.encode(), digest_size=8).hexdigest() -def _is_feedback_cached(working_directory: str, cache_key: str) -> bool: - """Check if the feedback for this state was already shown. - - Args: - working_directory: Path to the working directory - cache_key: Cache key to check +def _load_feedback_cache(working_directory: str) -> FeedbackCacheState | None: + """Load the feedback cache state from disk. Returns: - True if feedback was already shown for this state + FeedbackCacheState if cache exists and is valid, None otherwise """ cache_path = _get_feedback_cache_path(working_directory) if not cache_path.exists(): - return False + return None try: - cache_data = json.loads(cache_path.read_text()) - return cache_data.get("last_key") == cache_key - except (json.JSONDecodeError, OSError): - return False + return FeedbackCacheState.model_validate_json(cache_path.read_text()) + except (json.JSONDecodeError, OSError, ValueError): + return None -def _save_feedback_cache(working_directory: str, cache_key: str) -> None: - """Save the feedback cache key. +def _save_feedback_cache(working_directory: str, cache_key: str, file_hashes: dict[str, str]) -> None: + """Save the feedback cache state with per-file content hashes. Args: working_directory: Path to the working directory - cache_key: Cache key to save + cache_key: Working tree cache key + file_hashes: Per-file content hashes at the time of this cache save """ cache_path = _get_feedback_cache_path(working_directory) try: - cache_path.write_text(json.dumps({"last_key": cache_key})) + state = FeedbackCacheState(last_key=cache_key, file_hashes=file_hashes) + cache_path.write_text(state.model_dump_json()) except OSError as e: logger.debug(f"Failed to save feedback cache: {e}") @@ -356,16 +348,26 @@ def handle_stop_event(session_id: str, parsed_input: "StopInput | SubagentStopIn current_metrics, delta = db.calculate_extended_complexity_metrics(stats.working_directory) - feedback_parts: list[str] = [] - cache_stable_parts: list[str] = [] + # Determine which files changed since the last time feedback was shown. + # Uses per-file content hashes from the feedback cache to filter out + # pre-existing uncommitted changes that haven't changed. + wt_calculator = WorkingTreeStateCalculator(stats.working_directory, languages=None) + cached_state = _load_feedback_cache(stats.working_directory) - # Get edited files from git (more reliable than transcript-based context coverage) - try: - wt_calculator = WorkingTreeStateCalculator(stats.working_directory, languages=None) + # Early exit: if working tree state hasn't changed since last feedback, skip + cache_key = _compute_working_tree_cache_key(stats.working_directory) + if cached_state is not None and cached_state.last_key == cache_key: + return 0 + + current_file_hashes = wt_calculator.get_source_file_content_hashes() + + if cached_state is not None: + edited_files = wt_calculator.get_files_changed_since(cached_state.file_hashes) + else: + # No cache yet (first run) — treat all modified source files as edited edited_files = wt_calculator.get_modified_source_file_paths() - except (ValueError, OSError) as e: - logger.debug(f"Failed to get modified source files: {e}") - edited_files = set() + + feedback_parts: list[str] = [] # Smell feedback: split into code-based (stable) and context-derived (unstable) # Context-derived smells (e.g., unread_related_tests) change with every transcript @@ -381,14 +383,12 @@ def handle_stop_event(session_id: str, parsed_input: "StopInput | SubagentStopIn code_feedback, has_code_smells, _ = format_code_smell_feedback(code_smells, session_id) if has_code_smells: feedback_parts.append(code_feedback) - cache_stable_parts.append(code_feedback) context_smell_feedback, has_context_smells, _ = format_code_smell_feedback(context_smells, session_id) if has_context_smells: feedback_parts.append(context_smell_feedback) # Context coverage - informational but NOT stable (changes with every Read/Glob/Grep) - # Excluded from cache hash to avoid invalidation on tool calls if settings.enable_complexity_feedback and stats.context_coverage and stats.context_coverage.has_gaps: context_feedback = format_context_coverage_feedback(stats.context_coverage) if context_feedback: @@ -399,25 +399,17 @@ def handle_stop_event(session_id: str, parsed_input: "StopInput | SubagentStopIn if dev_guidelines: feedback_parts.append(f"\n**Project Development Guidelines:**\n{dev_guidelines}") + # Save cache with current file hashes regardless of whether feedback is shown. + # This ensures the next stop event compares against this point in time. + _save_feedback_cache(stats.working_directory, cache_key, current_file_hashes) + if feedback_parts: feedback = "\n\n".join(feedback_parts) - # Cache key uses only code-based smell feedback — context coverage - # changes with every tool call and would invalidate cache - cache_content = "\n\n".join(cache_stable_parts) if cache_stable_parts else "" - feedback_hash = hashlib.blake2b(cache_content.encode(), digest_size=8).hexdigest() - feedback += ( f"\n\n---\n**Session**: `{session_id}` | Details: `slopometry solo show {session_id} --smell-details`" ) - cache_key = _compute_feedback_cache_key(stats.working_directory, edited_files, feedback_hash) - - if _is_feedback_cached(stats.working_directory, cache_key): - return 0 - - _save_feedback_cache(stats.working_directory, cache_key) - hook_output = {"decision": "block", "reason": feedback} print(json.dumps(hook_output)) return 2 diff --git a/src/slopometry/core/language_detector.py b/src/slopometry/core/language_detector.py index 7e6b6c3..4da9825 100644 --- a/src/slopometry/core/language_detector.py +++ b/src/slopometry/core/language_detector.py @@ -70,7 +70,7 @@ def _get_tracked_files(self) -> list[str]: return [line for line in result.stdout.strip().split("\n") if line] except subprocess.TimeoutExpired: - logger.warning("Language detection timed out for %s", self.repo_path) + logger.debug("Language detection timed out for %s", self.repo_path) return [] except FileNotFoundError: logger.debug("git not found, cannot detect languages in %s", self.repo_path) diff --git a/src/slopometry/core/models/hook.py b/src/slopometry/core/models/hook.py index 5135baa..1d89a9a 100644 --- a/src/slopometry/core/models/hook.py +++ b/src/slopometry/core/models/hook.py @@ -122,6 +122,21 @@ class GitState(BaseModel): commit_sha: str | None = None +class FeedbackCacheState(BaseModel): + """Persisted state of the feedback cache for change-based firing. + + Stored in .slopometry/feedback_cache.json. The hook only fires when + the working tree state changes since the last time feedback was shown. + Per-file content hashes enable computing which specific files changed. + """ + + last_key: str = Field(description="Cache key from last fire: commit_sha:working_tree_hash") + file_hashes: dict[str, str] = Field( + default_factory=dict, + description="Per-file content hashes (rel_path -> BLAKE2b hex) at time of last fire", + ) + + class HookEvent(BaseModel): """Represents a single hook invocation event.""" diff --git a/src/slopometry/core/python_feature_analyzer.py b/src/slopometry/core/python_feature_analyzer.py index 32e3b4c..e2050c6 100644 --- a/src/slopometry/core/python_feature_analyzer.py +++ b/src/slopometry/core/python_feature_analyzer.py @@ -39,7 +39,7 @@ class FeatureStats(BaseModel): orphan_comment_count: int = SmellField( label="Orphan Comments", files_field="orphan_comment_files", - guidance="Make sure inline code comments add meaningful information about non-obvious design tradeoffs or explain tech debt or performance implications. Consider if these could be docstrings or field descriptors instead", + guidance="Make sure inline code comments add meaningful information about non-obvious design tradeoffs or explain tech debt or performance implications. Consider if these could be docstrings or field descriptors instead. Prefix with `# NOTE:`, `# REASON:`, `# PERF:`, `# SAFETY:`, `# WORKAROUND:`, `# CAVEAT:`, `# COMPAT:`, or `# IMPORTANT:` to mark intentional design decisions (these are excluded from the count)", ) untracked_todo_count: int = SmellField( label="Untracked TODOs", @@ -166,7 +166,6 @@ def _analyze_single_file_features(file_path: Path) -> FeatureStats | None: total_loc, code_loc = _count_loc(content) path_str = str(file_path) - # 4 smells come from non-AST analysis; rest from FeatureVisitor non_ast_counts: dict[str, int] = { "orphan_comment_count": orphan_comments, "untracked_todo_count": untracked_todos, @@ -331,7 +330,7 @@ def _analyze_files_parallel(self, files: list[Path], max_workers: int | None = N results.append(result) except Exception as e: file_path = futures[future] - logger.warning(f"Failed to analyze features for {file_path}: {e}") + logger.debug(f"Failed to analyze features for {file_path}: {e}") results.append(None) return results diff --git a/src/slopometry/core/tokenizer.py b/src/slopometry/core/tokenizer.py index 70d935d..6851280 100644 --- a/src/slopometry/core/tokenizer.py +++ b/src/slopometry/core/tokenizer.py @@ -59,7 +59,7 @@ def count_file_tokens(file_path: Path) -> int | TokenCountError: content = file_path.read_text(encoding="utf-8") return count_tokens(content) except Exception as e: - logger.warning("Failed to read file for token counting %s: %s", file_path, e) + logger.debug("Failed to read file for token counting %s: %s", file_path, e) return TokenCountError(message=str(e), path=str(file_path)) diff --git a/src/slopometry/core/transcript_token_analyzer.py b/src/slopometry/core/transcript_token_analyzer.py index e6e7f34..0f0ebd5 100644 --- a/src/slopometry/core/transcript_token_analyzer.py +++ b/src/slopometry/core/transcript_token_analyzer.py @@ -66,10 +66,10 @@ def extract_transcript_metadata(transcript_path: Path) -> TranscriptMetadata: if agent_version is not None and model is not None and git_branch is not None: break except OSError as e: - logger.warning(f"Failed to read transcript for metadata: {e}") + logger.debug(f"Failed to read transcript for metadata: {e}") if skipped_lines: - logger.warning(f"Skipped {skipped_lines} unparseable line(s) in metadata extraction from {transcript_path}") + logger.debug(f"Skipped {skipped_lines} unparseable line(s) in metadata extraction from {transcript_path}") return TranscriptMetadata(agent_version=agent_version, model=model, git_branch=git_branch) @@ -144,10 +144,10 @@ def analyze_transcript(self, transcript_path: Path) -> TokenUsage: self._process_event(event, usage) except OSError as e: - logger.warning(f"Failed to read transcript file {transcript_path}: {e}") + logger.debug(f"Failed to read transcript file {transcript_path}: {e}") if skipped_lines: - logger.warning(f"Skipped {skipped_lines} unparseable line(s) in {transcript_path}") + logger.debug(f"Skipped {skipped_lines} unparseable line(s) in {transcript_path}") usage.final_context_input_tokens = self._latest_raw_input_tokens usage.subagent_tokens = usage.explore_subagent_tokens + usage.non_explore_subagent_tokens diff --git a/src/slopometry/core/working_tree_state.py b/src/slopometry/core/working_tree_state.py index 8b5a8ea..b7b2ca1 100644 --- a/src/slopometry/core/working_tree_state.py +++ b/src/slopometry/core/working_tree_state.py @@ -33,6 +33,54 @@ def __init__( self.working_directory = Path(working_directory).resolve() self.languages = languages + def get_source_file_content_hashes(self) -> dict[str, str]: + """Get per-file content hashes for all modified source files. + + Returns a mapping of relative path to BLAKE2b content hash for each + source file that git reports as modified (staged + unstaged). + Filters by language config and ignore patterns. + + Returns: + Dict mapping relative path strings to 16-char hex BLAKE2b hashes + """ + modified_source_files = self._get_modified_source_files_from_git() + result: dict[str, str] = {} + + for source_file in sorted(modified_source_files): + try: + content_hash = hashlib.blake2b(source_file.read_bytes(), digest_size=8).hexdigest() + rel_path = str(source_file.relative_to(self.working_directory)) + result[rel_path] = content_hash + except (OSError, ValueError): + continue + + return result + + def get_files_changed_since(self, previous_hashes: dict[str, str]) -> set[str]: + """Compute source files that changed since a previous state. + + Compares current modified source files against a previous set of + content hashes (e.g., from the last time the hook fired). A file + is considered changed if it: + - Is currently modified AND was not modified previously (new dirty file) + - Is currently modified AND has a different content hash than previously + + Args: + previous_hashes: File content hashes from the previous state + + Returns: + Set of relative path strings for files that changed + """ + current_hashes = self.get_source_file_content_hashes() + changed: set[str] = set() + + for rel_path, current_hash in current_hashes.items(): + previous_hash = previous_hashes.get(rel_path) + if previous_hash is None or previous_hash != current_hash: + changed.add(rel_path) + + return changed + def calculate_working_tree_hash(self, commit_sha: str) -> str: """Calculate a hash representing the current working tree state. @@ -48,21 +96,12 @@ def calculate_working_tree_hash(self, commit_sha: str) -> str: Returns: Unique hash representing current working tree state """ - modified_source_files = self._get_modified_source_files_from_git() + file_hashes = self.get_source_file_content_hashes() hash_components = [commit_sha] - - for source_file in sorted(modified_source_files): - try: - # Use content hash, not mtime - filters out touch/checkout false positives - # BLAKE2b with digest_size=8 gives 16 hex chars, fast on arm64/amd64 - content_hash = hashlib.blake2b(source_file.read_bytes(), digest_size=8).hexdigest() - rel_path = source_file.relative_to(self.working_directory) - hash_components.append(f"{rel_path}:{content_hash}") - except (OSError, ValueError): - continue - - hash_components.append(f"file_count:{len(modified_source_files)}") + for rel_path in sorted(file_hashes): + hash_components.append(f"{rel_path}:{file_hashes[rel_path]}") + hash_components.append(f"file_count:{len(file_hashes)}") combined = "|".join(hash_components) return hashlib.blake2b(combined.encode("utf-8"), digest_size=8).hexdigest() @@ -166,7 +205,7 @@ def get_current_commit_sha(self) -> str | None: if result.returncode == 0: return result.stdout.strip() except (subprocess.TimeoutExpired, subprocess.SubprocessError, OSError) as e: - logger.warning(f"Failed to get current commit SHA: {e}") + logger.debug(f"Failed to get current commit SHA: {e}") return None def has_uncommitted_changes(self) -> bool: @@ -186,5 +225,5 @@ def has_uncommitted_changes(self) -> bool: if result.returncode == 0: return bool(result.stdout.strip()) except (subprocess.TimeoutExpired, subprocess.SubprocessError, OSError) as e: - logger.warning(f"Failed to check for uncommitted changes: {e}") + logger.debug(f"Failed to check for uncommitted changes: {e}") return False diff --git a/src/slopometry/summoner/services/current_impact_service.py b/src/slopometry/summoner/services/current_impact_service.py index dd1223f..665c84a 100644 --- a/src/slopometry/summoner/services/current_impact_service.py +++ b/src/slopometry/summoner/services/current_impact_service.py @@ -1,11 +1,20 @@ """Current (uncommitted) impact analysis service.""" +from __future__ import annotations + import logging import shutil +import subprocess from datetime import datetime from pathlib import Path +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from slopometry.core.git_tracker import GitTracker from slopometry.core.complexity_analyzer import ComplexityAnalyzer +from slopometry.core.context_coverage_analyzer import ContextCoverageAnalyzer +from slopometry.core.git_tracker import GitOperationError from slopometry.core.database import EventDatabase from slopometry.core.models.baseline import CurrentChangesAnalysis, GalenMetrics, QPEScore, RepoBaseline, SmellAdvantage from slopometry.core.models.complexity import ComplexityDelta, ExtendedComplexityMetrics @@ -67,8 +76,6 @@ def analyze_uncommitted_changes( assessment = self.impact_calculator.calculate_impact(current_delta, baseline) - from slopometry.core.context_coverage_analyzer import ContextCoverageAnalyzer - coverage_analyzer = ContextCoverageAnalyzer(repo_path) blind_spots = coverage_analyzer.get_affected_dependents(set(changed_files)) @@ -121,12 +128,19 @@ def get_token_count(path_str: str) -> int: smell_advantages=smell_advantages, ) + # Maximum number of commits to walk back when searching for code changes + MAX_COMMIT_WALKBACK = 10 + def analyze_previous_commit( self, repo_path: Path, baseline: RepoBaseline, ) -> CurrentChangesAnalysis | None: - """Analyze the previous commit (HEAD) against its parent (HEAD~1). + """Analyze the most recent commit with code file changes. + + Walks back from HEAD looking for a commit pair where Python/Rust files + changed (up to MAX_COMMIT_WALKBACK commits). This handles cases where + recent commits only touched non-code files (e.g., pyproject.toml, docs). Used as fallback when there are no uncommitted changes. @@ -135,9 +149,9 @@ def analyze_previous_commit( baseline: Pre-computed repository baseline Returns: - CurrentChangesAnalysis or None if analysis fails + CurrentChangesAnalysis or None if no code changes found """ - from slopometry.core.git_tracker import GitOperationError, GitTracker + from slopometry.core.git_tracker import GitTracker repo_path = repo_path.resolve() git_tracker = GitTracker(repo_path) @@ -146,40 +160,78 @@ def analyze_previous_commit( if not git_tracker.has_previous_commit(): return None - head_sha = git_tracker._get_current_commit_sha() - if not head_sha: - return None + for offset in range(self.MAX_COMMIT_WALKBACK): + child_ref = f"HEAD~{offset}" + parent_ref = f"HEAD~{offset + 1}" - import subprocess + try: + child_result = subprocess.run( + ["git", "rev-parse", child_ref], + cwd=repo_path, + capture_output=True, + text=True, + timeout=5, + ) + parent_result = subprocess.run( + ["git", "rev-parse", parent_ref], + cwd=repo_path, + capture_output=True, + text=True, + timeout=5, + ) + if child_result.returncode != 0 or parent_result.returncode != 0: + logger.debug(f"git rev-parse failed at offset {offset}, stopping walkback") + return None + child_sha = child_result.stdout.strip() + parent_sha = parent_result.stdout.strip() + except Exception as e: + logger.debug(f"Failed to resolve commit at offset {offset}: {e}") + return None - try: - result = subprocess.run( - ["git", "rev-parse", "HEAD~1"], - cwd=repo_path, - capture_output=True, - text=True, - timeout=5, - ) - if result.returncode != 0: - logger.debug(f"git rev-parse HEAD~1 failed: {result.stderr.strip()}") + try: + changed_files = git_tracker.get_changed_python_files(parent_sha, child_sha) + except GitOperationError as e: + logger.debug(f"Failed to get changed files between {parent_sha[:8]} and {child_sha[:8]}: {e}") return None - parent_sha = result.stdout.strip() - except Exception as e: - logger.debug(f"Failed to get parent commit SHA: {e}") - return None - try: - changed_files = git_tracker.get_changed_python_files(parent_sha, head_sha) - except GitOperationError as e: - logger.debug(f"Failed to get changed files between {parent_sha[:8]} and {head_sha[:8]}: {e}") - return None + if not changed_files: + logger.debug(f"No code file changes in {child_sha[:8]}, walking back") + continue - if not changed_files: - return None + return self._analyze_commit_pair( + repo_path, analyzer, git_tracker, baseline, parent_sha, child_sha, changed_files + ) + + logger.debug(f"No code changes found in last {self.MAX_COMMIT_WALKBACK} commits") + return None + + def _analyze_commit_pair( + self, + repo_path: Path, + analyzer: ComplexityAnalyzer, + git_tracker: GitTracker, + baseline: RepoBaseline, + parent_sha: str, + child_sha: str, + changed_files: list[str], + ) -> CurrentChangesAnalysis | None: + """Analyze complexity between two commits. + Args: + repo_path: Path to the repository + analyzer: Complexity analyzer instance + git_tracker: Git tracker instance + baseline: Pre-computed repository baseline + parent_sha: Parent commit SHA + child_sha: Child commit SHA + changed_files: List of changed code file paths + + Returns: + CurrentChangesAnalysis or None if extraction/analysis fails + """ try: with git_tracker.extract_files_from_commit_ctx(parent_sha) as parent_dir: - with git_tracker.extract_files_from_commit_ctx(head_sha) as head_dir: + with git_tracker.extract_files_from_commit_ctx(child_sha) as head_dir: if not parent_dir or not head_dir: logger.debug( f"Failed to extract files from commits: parent_dir={parent_dir}, head_dir={head_dir}" @@ -189,14 +241,12 @@ def analyze_previous_commit( parent_metrics = analyzer.analyze_extended_complexity(parent_dir) head_metrics = analyzer.analyze_extended_complexity(head_dir) except GitOperationError as e: - logger.debug(f"Failed to extract or analyze commits {parent_sha[:8]}..{head_sha[:8]}: {e}") + logger.debug(f"Failed to extract or analyze commits {parent_sha[:8]}..{child_sha[:8]}: {e}") return None current_delta, smell_advantages, _, _ = self._compute_delta(parent_metrics, head_metrics) assessment = self.impact_calculator.calculate_impact(current_delta, baseline) - from slopometry.core.context_coverage_analyzer import ContextCoverageAnalyzer - coverage_analyzer = ContextCoverageAnalyzer(repo_path) blind_spots = coverage_analyzer.get_affected_dependents(set(changed_files)) @@ -217,7 +267,7 @@ def analyze_previous_commit( repository_path=str(repo_path), analysis_timestamp=datetime.now(), source=AnalysisSource.PREVIOUS_COMMIT, - analyzed_commit_sha=head_sha[:8], + analyzed_commit_sha=child_sha[:8], base_commit_sha=parent_sha[:8], changed_files=changed_files, current_metrics=head_metrics, @@ -225,7 +275,7 @@ def analyze_previous_commit( assessment=assessment, baseline=baseline, blind_spots=blind_spots, - filtered_coverage=None, # Coverage not meaningful for committed changes + filtered_coverage=None, blind_spot_tokens=blind_spot_tokens, changed_files_tokens=changed_files_tokens, complete_picture_context_size=complete_picture_context_size, diff --git a/tests/test_feedback_cache.py b/tests/test_feedback_cache.py index 546faa9..7589106 100644 --- a/tests/test_feedback_cache.py +++ b/tests/test_feedback_cache.py @@ -1,16 +1,14 @@ """Tests for feedback cache functionality to prevent repeated feedback display.""" -import hashlib -import json import subprocess import tempfile import time from pathlib import Path from slopometry.core.hook_handler import ( - _compute_feedback_cache_key, + _compute_working_tree_cache_key, _get_feedback_cache_path, - _is_feedback_cached, + _load_feedback_cache, _save_feedback_cache, ) from slopometry.core.working_tree_state import WorkingTreeStateCalculator @@ -38,31 +36,23 @@ def _commit_all(path: Path, message: str = "commit") -> None: subprocess.run(["git", "commit", "-m", message], cwd=path, capture_output=True) -class TestFeedbackCacheKeyComputation: - """Tests for _compute_feedback_cache_key function.""" +class TestWorkingTreeCacheKeyComputation: + """Tests for _compute_working_tree_cache_key function.""" - def test_compute_feedback_cache_key__same_feedback_different_sessions_same_key(self): - """Verify that identical feedback with different session_ids produces same cache key. - - This is the primary bug fix - session_id should not affect the cache key. - """ + def test_compute_working_tree_cache_key__stable_across_calls(self): + """Verify repeated calls with same state produce same cache key.""" with tempfile.TemporaryDirectory() as tmpdir: tmppath = Path(tmpdir) _init_git_repo(tmppath) (tmppath / "test.py").write_text("def foo(): pass") _commit_all(tmppath) - # Same feedback content - feedback_content = "Code smells detected: orphan comments" - feedback_hash = hashlib.blake2b(feedback_content.encode(), digest_size=8).hexdigest() + key1 = _compute_working_tree_cache_key(str(tmppath)) + key2 = _compute_working_tree_cache_key(str(tmppath)) - # Compute cache key - key1 = _compute_feedback_cache_key(str(tmppath), set(), feedback_hash) - key2 = _compute_feedback_cache_key(str(tmppath), set(), feedback_hash) + assert key1 == key2, "Same state should produce same cache key" - assert key1 == key2, "Same feedback should produce same cache key" - - def test_compute_feedback_cache_key__uv_lock_changes_dont_invalidate(self): + def test_compute_working_tree_cache_key__uv_lock_changes_dont_invalidate(self): """Verify non-Python file changes (uv.lock) don't cause cache key changes.""" with tempfile.TemporaryDirectory() as tmpdir: tmppath = Path(tmpdir) @@ -70,17 +60,16 @@ def test_compute_feedback_cache_key__uv_lock_changes_dont_invalidate(self): (tmppath / "test.py").write_text("def foo(): pass") _commit_all(tmppath) - feedback_hash = "feedbackhash1234" - key_before = _compute_feedback_cache_key(str(tmppath), set(), feedback_hash) + key_before = _compute_working_tree_cache_key(str(tmppath)) # Modify uv.lock (non-Python file) (tmppath / "uv.lock").write_text("some lock content") - key_after = _compute_feedback_cache_key(str(tmppath), set(), feedback_hash) + key_after = _compute_working_tree_cache_key(str(tmppath)) assert key_before == key_after, "uv.lock changes should not invalidate cache" - def test_compute_feedback_cache_key__pycache_changes_dont_invalidate(self): + def test_compute_working_tree_cache_key__pycache_changes_dont_invalidate(self): """Verify __pycache__/*.pyc files don't affect the cache key.""" with tempfile.TemporaryDirectory() as tmpdir: tmppath = Path(tmpdir) @@ -88,19 +77,18 @@ def test_compute_feedback_cache_key__pycache_changes_dont_invalidate(self): (tmppath / "test.py").write_text("def foo(): pass") _commit_all(tmppath) - feedback_hash = "feedbackhash1234" - key_before = _compute_feedback_cache_key(str(tmppath), set(), feedback_hash) + key_before = _compute_working_tree_cache_key(str(tmppath)) # Create __pycache__ with .pyc file pycache = tmppath / "__pycache__" pycache.mkdir() (pycache / "test.cpython-312.pyc").write_bytes(b"\x00\x00\x00\x00") - key_after = _compute_feedback_cache_key(str(tmppath), set(), feedback_hash) + key_after = _compute_working_tree_cache_key(str(tmppath)) assert key_before == key_after, "__pycache__ should not invalidate cache" - def test_compute_feedback_cache_key__compiled_extensions_dont_invalidate(self): + def test_compute_working_tree_cache_key__compiled_extensions_dont_invalidate(self): """Verify compiled extensions (.so, .pyd) don't affect the cache key.""" with tempfile.TemporaryDirectory() as tmpdir: tmppath = Path(tmpdir) @@ -108,18 +96,17 @@ def test_compute_feedback_cache_key__compiled_extensions_dont_invalidate(self): (tmppath / "test.py").write_text("def foo(): pass") _commit_all(tmppath) - feedback_hash = "feedbackhash1234" - key_before = _compute_feedback_cache_key(str(tmppath), set(), feedback_hash) + key_before = _compute_working_tree_cache_key(str(tmppath)) # Create compiled extension files (tmppath / "module.so").write_bytes(b"\x7fELF") (tmppath / "module.pyd").write_bytes(b"MZ") - key_after = _compute_feedback_cache_key(str(tmppath), set(), feedback_hash) + key_after = _compute_working_tree_cache_key(str(tmppath)) assert key_before == key_after, "Compiled extensions should not invalidate cache" - def test_compute_feedback_cache_key__python_content_changes_invalidate(self): + def test_compute_working_tree_cache_key__python_content_changes_invalidate(self): """Verify actual Python code changes invalidate the cache.""" with tempfile.TemporaryDirectory() as tmpdir: tmppath = Path(tmpdir) @@ -127,17 +114,16 @@ def test_compute_feedback_cache_key__python_content_changes_invalidate(self): (tmppath / "test.py").write_text("def foo(): pass") _commit_all(tmppath) - feedback_hash = "feedbackhash1234" - key_before = _compute_feedback_cache_key(str(tmppath), set(), feedback_hash) + key_before = _compute_working_tree_cache_key(str(tmppath)) # Modify Python file content (tmppath / "test.py").write_text("def foo(): return 42") - key_after = _compute_feedback_cache_key(str(tmppath), set(), feedback_hash) + key_after = _compute_working_tree_cache_key(str(tmppath)) assert key_before != key_after, "Python content changes should invalidate cache" - def test_compute_feedback_cache_key__empty_edited_files_stable_key(self): + def test_compute_working_tree_cache_key__stable_when_no_modifications(self): """Verify cache key is stable when no Python files are modified.""" with tempfile.TemporaryDirectory() as tmpdir: tmppath = Path(tmpdir) @@ -145,12 +131,9 @@ def test_compute_feedback_cache_key__empty_edited_files_stable_key(self): (tmppath / "test.py").write_text("def foo(): pass") _commit_all(tmppath) - feedback_hash = "feedbackhash1234" - - # Multiple calls with empty edited_files - key1 = _compute_feedback_cache_key(str(tmppath), set(), feedback_hash) - key2 = _compute_feedback_cache_key(str(tmppath), set(), feedback_hash) - key3 = _compute_feedback_cache_key(str(tmppath), set(), feedback_hash) + key1 = _compute_working_tree_cache_key(str(tmppath)) + key2 = _compute_working_tree_cache_key(str(tmppath)) + key3 = _compute_working_tree_cache_key(str(tmppath)) assert key1 == key2 == key3, "Cache key should be stable" @@ -211,10 +194,10 @@ def test_working_tree_hash__actual_content_change_invalidates(self): class TestFeedbackCachePersistence: - """Tests for feedback cache persistence.""" + """Tests for feedback cache persistence using FeedbackCacheState.""" - def test_feedback_cache__persists_across_sessions(self): - """Verify cache file persists and works across multiple calls.""" + def test_feedback_cache__persists_and_loads_correctly(self): + """Verify cache file persists with file hashes and loads correctly.""" with tempfile.TemporaryDirectory() as tmpdir: tmppath = Path(tmpdir) _init_git_repo(tmppath) @@ -222,24 +205,26 @@ def test_feedback_cache__persists_across_sessions(self): _commit_all(tmppath) cache_key = "test_cache_key_123" + file_hashes = {"src/app.py": "abcdef1234567890"} - # First call - should not be cached - assert not _is_feedback_cached(str(tmppath), cache_key) + # First load - should return None + assert _load_feedback_cache(str(tmppath)) is None # Save to cache - _save_feedback_cache(str(tmppath), cache_key) + _save_feedback_cache(str(tmppath), cache_key, file_hashes) - # Second call - should be cached - assert _is_feedback_cached(str(tmppath), cache_key) + # Second load - should return state + loaded = _load_feedback_cache(str(tmppath)) + assert loaded is not None + assert loaded.last_key == cache_key + assert loaded.file_hashes == file_hashes # Verify cache file exists cache_path = _get_feedback_cache_path(str(tmppath)) assert cache_path.exists() - cache_data = json.loads(cache_path.read_text()) - assert cache_data["last_key"] == cache_key - def test_feedback_cache__different_key_not_cached(self): - """Verify that a different cache key is not considered cached.""" + def test_feedback_cache__different_key_detected(self): + """Verify that a different cache key is detected as a change.""" with tempfile.TemporaryDirectory() as tmpdir: tmppath = Path(tmpdir) _init_git_repo(tmppath) @@ -247,10 +232,34 @@ def test_feedback_cache__different_key_not_cached(self): _commit_all(tmppath) # Save one key - _save_feedback_cache(str(tmppath), "key1") + _save_feedback_cache(str(tmppath), "key1", {}) + + # Load and check — key mismatch means working tree changed + loaded = _load_feedback_cache(str(tmppath)) + assert loaded is not None + assert loaded.last_key != "key2" + + def test_feedback_cache__file_hashes_enable_change_detection(self): + """Verify saved file hashes allow detecting which files changed.""" + with tempfile.TemporaryDirectory() as tmpdir: + tmppath = Path(tmpdir) + _init_git_repo(tmppath) + (tmppath / "test.py").write_text("def foo(): pass") + _commit_all(tmppath) + + # Save cache with file hashes + file_hashes = {"app.py": "hash1", "utils.py": "hash2"} + _save_feedback_cache(str(tmppath), "cache_key", file_hashes) - # Check different key - should not be cached - assert not _is_feedback_cached(str(tmppath), "key2") + loaded = _load_feedback_cache(str(tmppath)) + assert loaded is not None + assert loaded.file_hashes == file_hashes + + # Use file hashes with WorkingTreeStateCalculator.get_files_changed_since + calculator = WorkingTreeStateCalculator(str(tmppath)) + changed = calculator.get_files_changed_since(loaded.file_hashes) + # No actual git changes, so nothing should be "changed" + assert changed == set() class TestModifiedPythonFilesDetection: @@ -364,8 +373,7 @@ def test_feedback_cache__submodule_changes_dont_invalidate(self): ) _commit_all(main_repo, "add submodule") - feedback_hash = "feedbackhash1234" - key_before = _compute_feedback_cache_key(str(main_repo), set(), feedback_hash) + key_before = _compute_working_tree_cache_key(str(main_repo)) # Update submodule (creates a change in main repo's git status) subprocess.run( @@ -374,7 +382,7 @@ def test_feedback_cache__submodule_changes_dont_invalidate(self): capture_output=True, ) - key_after = _compute_feedback_cache_key(str(main_repo), set(), feedback_hash) + key_after = _compute_working_tree_cache_key(str(main_repo)) assert key_before == key_after, "Submodule changes should not invalidate cache" @@ -382,12 +390,11 @@ def test_feedback_cache__submodule_changes_dont_invalidate(self): class TestNewUntrackedFiles: """Tests for new untracked Python file handling.""" - def test_feedback_cache__new_untracked_python_files_invalidate(self): - """Verify that new Python files (even untracked) invalidate cache. + def test_feedback_cache__new_untracked_python_files_dont_invalidate(self): + """Verify that new untracked Python files don't invalidate cache. - Note: New untracked files won't appear in git diff, but they will be - detected by git ls-files if not gitignored. The behavior here depends - on whether git considers them "changed" - typically they won't be. + New untracked files won't appear in git diff, so the working tree + cache key remains unchanged. Only tracked file changes matter. """ with tempfile.TemporaryDirectory() as tmpdir: tmppath = Path(tmpdir) @@ -395,16 +402,13 @@ def test_feedback_cache__new_untracked_python_files_invalidate(self): (tmppath / "existing.py").write_text("def existing(): pass") _commit_all(tmppath) - feedback_hash = "feedbackhash1234" - key_before = _compute_feedback_cache_key(str(tmppath), set(), feedback_hash) + key_before = _compute_working_tree_cache_key(str(tmppath)) # Add new untracked Python file (tmppath / "new_file.py").write_text("def new(): pass") - key_after = _compute_feedback_cache_key(str(tmppath), set(), feedback_hash) + key_after = _compute_working_tree_cache_key(str(tmppath)) - # New untracked files don't show in git diff, so key should be same - # This is expected behavior - only tracked file changes matter assert key_before == key_after, "Untracked files don't appear in git diff" @@ -420,14 +424,13 @@ def test_feedback_cache__dist_directory_ignored(self): (tmppath / "src" / "module.py").write_text("def foo(): pass") _commit_all(tmppath) - feedback_hash = "feedbackhash1234" - key_before = _compute_feedback_cache_key(str(tmppath), set(), feedback_hash) + key_before = _compute_working_tree_cache_key(str(tmppath)) # Create dist directory with Python file (shouldn't affect cache) (tmppath / "dist").mkdir() (tmppath / "dist" / "generated.py").write_text("# Generated") - key_after = _compute_feedback_cache_key(str(tmppath), set(), feedback_hash) + key_after = _compute_working_tree_cache_key(str(tmppath)) assert key_before == key_after, "dist/ directory should be ignored" @@ -440,15 +443,14 @@ def test_feedback_cache__build_directory_ignored(self): (tmppath / "src" / "module.py").write_text("def foo(): pass") _commit_all(tmppath) - feedback_hash = "feedbackhash1234" - key_before = _compute_feedback_cache_key(str(tmppath), set(), feedback_hash) + key_before = _compute_working_tree_cache_key(str(tmppath)) # Create build directory with Python file (shouldn't affect cache) (tmppath / "build").mkdir() (tmppath / "build" / "lib").mkdir() (tmppath / "build" / "lib" / "module.py").write_text("# Built") - key_after = _compute_feedback_cache_key(str(tmppath), set(), feedback_hash) + key_after = _compute_working_tree_cache_key(str(tmppath)) assert key_before == key_after, "build/ directory should be ignored" @@ -461,14 +463,13 @@ def test_feedback_cache__egg_info_directory_ignored(self): (tmppath / "src" / "module.py").write_text("def foo(): pass") _commit_all(tmppath) - feedback_hash = "feedbackhash1234" - key_before = _compute_feedback_cache_key(str(tmppath), set(), feedback_hash) + key_before = _compute_working_tree_cache_key(str(tmppath)) # Create egg-info directory (shouldn't affect cache) (tmppath / "package.egg-info").mkdir() (tmppath / "package.egg-info" / "PKG-INFO").write_text("Name: package") - key_after = _compute_feedback_cache_key(str(tmppath), set(), feedback_hash) + key_after = _compute_working_tree_cache_key(str(tmppath)) assert key_before == key_after, "*.egg-info directory should be ignored" @@ -542,12 +543,7 @@ def test_get_modified_source_file_paths__returns_relative_string_paths(self): def test_feedback_cache__slopometry_dir_visibility_does_not_affect_key(): """Verify cache key is stable whether .slopometry/ is in gitignore or not. - This is the critical test: the cache key should remain stable when: - 1. .slopometry/ is NOT in gitignore (shows as untracked) - 2. .slopometry/ IS in gitignore (hidden from git) - 3. .gitignore is modified but not committed - - The key only depends on: commit SHA, Python file content, edited files, and feedback hash. + The key only depends on: commit SHA and source file content hashes. """ with tempfile.TemporaryDirectory() as tmpdir: tmppath = Path(tmpdir) @@ -561,21 +557,19 @@ def test_feedback_cache__slopometry_dir_visibility_does_not_affect_key(): # Modify Python file (uncommitted) (tmppath / "test.py").write_text("def foo(): return 1") - feedback_hash = "test_feedback_hash" - # Scenario 1: .slopometry NOT in gitignore - key1 = _compute_feedback_cache_key(str(tmppath), {"test.py"}, feedback_hash) + key1 = _compute_working_tree_cache_key(str(tmppath)) # Save cache (creates .slopometry/ directory) - _save_feedback_cache(str(tmppath), key1) + _save_feedback_cache(str(tmppath), key1, {"test.py": "somehash"}) # Scenario 2: Add .slopometry to gitignore (uncommitted) (tmppath / ".gitignore").write_text("__pycache__/\n.slopometry/\n") - key2 = _compute_feedback_cache_key(str(tmppath), {"test.py"}, feedback_hash) + key2 = _compute_working_tree_cache_key(str(tmppath)) # Scenario 3: Remove .slopometry from gitignore (tmppath / ".gitignore").write_text("__pycache__/\n") - key3 = _compute_feedback_cache_key(str(tmppath), {"test.py"}, feedback_hash) + key3 = _compute_working_tree_cache_key(str(tmppath)) assert key1 == key2, "Adding .slopometry to gitignore should not change cache key" assert key2 == key3, "Removing .slopometry from gitignore should not change cache key" @@ -591,13 +585,12 @@ def test_feedback_cache__gitignore_modification_does_not_invalidate(): (tmppath / ".gitignore").write_text("*.pyc\n") _commit_all(tmppath) - feedback_hash = "feedbackhash1234" - key_before = _compute_feedback_cache_key(str(tmppath), set(), feedback_hash) + key_before = _compute_working_tree_cache_key(str(tmppath)) # Modify .gitignore (uncommitted) (tmppath / ".gitignore").write_text("*.pyc\n.slopometry/\n__pycache__/\n") - key_after = _compute_feedback_cache_key(str(tmppath), set(), feedback_hash) + key_after = _compute_working_tree_cache_key(str(tmppath)) assert key_before == key_after, ".gitignore modifications should not invalidate cache" @@ -611,13 +604,12 @@ def test_feedback_cache__env_file_changes_dont_invalidate(): (tmppath / ".env").write_text("SECRET=old") _commit_all(tmppath) - feedback_hash = "feedbackhash1234" - key_before = _compute_feedback_cache_key(str(tmppath), set(), feedback_hash) + key_before = _compute_working_tree_cache_key(str(tmppath)) # Modify .env (tracked but non-source) (tmppath / ".env").write_text("SECRET=new") - key_after = _compute_feedback_cache_key(str(tmppath), set(), feedback_hash) + key_after = _compute_working_tree_cache_key(str(tmppath)) assert key_before == key_after, ".env changes should not invalidate cache" @@ -631,12 +623,11 @@ def test_feedback_cache__markdown_changes_dont_invalidate(): (tmppath / "README.md").write_text("# Old readme") _commit_all(tmppath) - feedback_hash = "feedbackhash1234" - key_before = _compute_feedback_cache_key(str(tmppath), set(), feedback_hash) + key_before = _compute_working_tree_cache_key(str(tmppath)) # Modify markdown (tracked but non-source) (tmppath / "README.md").write_text("# New readme with changes") - key_after = _compute_feedback_cache_key(str(tmppath), set(), feedback_hash) + key_after = _compute_working_tree_cache_key(str(tmppath)) assert key_before == key_after, ".md changes should not invalidate cache" diff --git a/tests/test_working_tree_state.py b/tests/test_working_tree_state.py index e582645..67fe144 100644 --- a/tests/test_working_tree_state.py +++ b/tests/test_working_tree_state.py @@ -1,3 +1,4 @@ +import logging import subprocess import tempfile import time @@ -168,26 +169,175 @@ def test_has_uncommitted_changes__detects_status(): def test_get_current_commit_sha__handles_git_failure(caplog): - """Test git failure returns None and logs warning.""" - with patch("subprocess.run") as mock_run: - mock_run.side_effect = subprocess.TimeoutExpired("git rev-parse", 5) + """Test git failure returns None and logs debug message.""" + with caplog.at_level(logging.DEBUG, logger="slopometry.core.working_tree_state"): + with patch("subprocess.run") as mock_run: + mock_run.side_effect = subprocess.TimeoutExpired("git rev-parse", 5) - with tempfile.TemporaryDirectory() as temp_dir: - calculator = WorkingTreeStateCalculator(temp_dir) - result = calculator.get_current_commit_sha() + with tempfile.TemporaryDirectory() as temp_dir: + calculator = WorkingTreeStateCalculator(temp_dir) + result = calculator.get_current_commit_sha() assert result is None assert "Failed to get current commit SHA" in caplog.text def test_has_uncommitted_changes__handles_git_failure(caplog): - """Test git failure returns False and logs warning.""" - with patch("subprocess.run") as mock_run: - mock_run.side_effect = subprocess.SubprocessError("git status failed") + """Test git failure returns False and logs debug message.""" + with caplog.at_level(logging.DEBUG, logger="slopometry.core.working_tree_state"): + with patch("subprocess.run") as mock_run: + mock_run.side_effect = subprocess.SubprocessError("git status failed") - with tempfile.TemporaryDirectory() as temp_dir: - calculator = WorkingTreeStateCalculator(temp_dir) - result = calculator.has_uncommitted_changes() + with tempfile.TemporaryDirectory() as temp_dir: + calculator = WorkingTreeStateCalculator(temp_dir) + result = calculator.has_uncommitted_changes() assert result is False assert "Failed to check for uncommitted changes" in caplog.text + + +def test_get_source_file_content_hashes__returns_hashes_for_modified_files(): + """Test returns content hashes for files that git reports as modified.""" + with tempfile.TemporaryDirectory() as temp_dir: + temp_path = Path(temp_dir) + _init_git_repo(temp_path) + (temp_path / "module.py").write_text("def foo(): pass") + _commit_all(temp_path) + + # Modify the file to create an uncommitted change + (temp_path / "module.py").write_text("def foo(): return 42") + + calculator = WorkingTreeStateCalculator(temp_dir) + hashes = calculator.get_source_file_content_hashes() + + assert "module.py" in hashes + assert len(hashes["module.py"]) == 16 # BLAKE2b digest_size=8 -> 16 hex chars + + +def test_get_source_file_content_hashes__empty_when_no_changes(): + """Test returns empty dict when no files are modified.""" + with tempfile.TemporaryDirectory() as temp_dir: + temp_path = Path(temp_dir) + _init_git_repo(temp_path) + (temp_path / "module.py").write_text("def foo(): pass") + _commit_all(temp_path) + + calculator = WorkingTreeStateCalculator(temp_dir) + hashes = calculator.get_source_file_content_hashes() + + assert hashes == {} + + +def test_get_files_changed_since__detects_new_modifications(): + """Test detects files modified after baseline was captured.""" + with tempfile.TemporaryDirectory() as temp_dir: + temp_path = Path(temp_dir) + _init_git_repo(temp_path) + (temp_path / "module.py").write_text("def foo(): pass") + _commit_all(temp_path) + + calculator = WorkingTreeStateCalculator(temp_dir) + + # Capture baseline (no uncommitted changes) + baseline_hashes = calculator.get_source_file_content_hashes() + assert baseline_hashes == {} + + # Modify a file during the "session" + (temp_path / "module.py").write_text("def foo(): return 42") + + session_edits = calculator.get_files_changed_since(baseline_hashes) + assert "module.py" in session_edits + + +def test_get_files_changed_since__detects_content_changes_to_preexisting_dirty_files(): + """Test detects further edits to files that were already dirty at session start.""" + with tempfile.TemporaryDirectory() as temp_dir: + temp_path = Path(temp_dir) + _init_git_repo(temp_path) + (temp_path / "module.py").write_text("def foo(): pass") + _commit_all(temp_path) + + # Pre-existing uncommitted change + (temp_path / "module.py").write_text("def foo(): return 1") + + calculator = WorkingTreeStateCalculator(temp_dir) + baseline_hashes = calculator.get_source_file_content_hashes() + assert "module.py" in baseline_hashes + + # Further edit during session + (temp_path / "module.py").write_text("def foo(): return 99") + + session_edits = calculator.get_files_changed_since(baseline_hashes) + assert "module.py" in session_edits + + +def test_get_files_changed_since__excludes_unchanged_preexisting_modifications(): + """Test pre-existing uncommitted changes are NOT treated as session edits. + + This is the primary regression test for the bug: repos with pre-existing + uncommitted changes in subdirectories (like ghost_town/) should not trigger + smell feedback when the session only reads files. + """ + with tempfile.TemporaryDirectory() as temp_dir: + temp_path = Path(temp_dir) + _init_git_repo(temp_path) + + # Create files in a subdirectory (simulating ghost_town/) + subdir = temp_path / "ghost_town" / "src" + subdir.mkdir(parents=True) + (subdir / "app.py").write_text("def main(): pass") + (subdir / "utils.py").write_text("def helper(): pass") + _commit_all(temp_path) + + # Pre-existing uncommitted changes (made before this session) + (subdir / "app.py").write_text("def main(): return True") + (subdir / "utils.py").write_text("def helper(): return 42") + + calculator = WorkingTreeStateCalculator(temp_dir) + baseline_hashes = calculator.get_source_file_content_hashes() + assert len(baseline_hashes) == 2 + + # Session does NOT modify any files (read-only session) + session_edits = calculator.get_files_changed_since(baseline_hashes) + assert session_edits == set(), "Pre-existing changes should not be treated as session edits" + + +def test_get_files_changed_since__empty_when_all_preexisting(): + """Test returns empty set when all modifications existed at session start.""" + with tempfile.TemporaryDirectory() as temp_dir: + temp_path = Path(temp_dir) + _init_git_repo(temp_path) + (temp_path / "a.py").write_text("x = 1") + (temp_path / "b.py").write_text("y = 2") + _commit_all(temp_path) + + # Pre-existing changes + (temp_path / "a.py").write_text("x = 10") + (temp_path / "b.py").write_text("y = 20") + + calculator = WorkingTreeStateCalculator(temp_dir) + baseline_hashes = calculator.get_source_file_content_hashes() + + session_edits = calculator.get_files_changed_since(baseline_hashes) + assert session_edits == set() + + +def test_calculate_working_tree_hash__refactored_produces_consistent_results(): + """Test the refactored calculate_working_tree_hash produces stable results. + + Regression test to verify the DRY refactor (using get_source_file_content_hashes) + doesn't change the hash output behavior. + """ + with tempfile.TemporaryDirectory() as temp_dir: + temp_path = Path(temp_dir) + _init_git_repo(temp_path) + (temp_path / "test.py").write_text("content") + _commit_all(temp_path) + (temp_path / "test.py").write_text("modified content") + + calculator = WorkingTreeStateCalculator(temp_dir) + hash1 = calculator.calculate_working_tree_hash("abc123") + hash2 = calculator.calculate_working_tree_hash("abc123") + + assert hash1 == hash2, "Refactored hash should be deterministic" + assert len(hash1) == 16 diff --git a/uv.lock b/uv.lock index 722872f..9a923c4 100644 --- a/uv.lock +++ b/uv.lock @@ -2749,7 +2749,7 @@ wheels = [ [[package]] name = "slopometry" -version = "2026.3.4" +version = "2026.3.20" source = { editable = "." } dependencies = [ { name = "click" },