Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
95 changes: 57 additions & 38 deletions src/agentready/assessors/stub_assessors.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,12 @@
enhanced later with more sophisticated detection and scoring logic.
"""

from pathlib import Path

from ..models.attribute import Attribute
from ..models.finding import Citation, Finding, Remediation
from ..models.repository import Repository
from ..utils.subprocess_utils import safe_subprocess_run
from .base import BaseAssessor


Expand Down Expand Up @@ -539,51 +542,67 @@ def assess(self, repository: Repository) -> Finding:
- 100: All files <500 lines
- 75-99: Some files 500-1000 lines
- 0-74: Files >1000 lines exist

Note: Uses git ls-files to respect .gitignore (fixes issue #245).
"""
# Count files by size
large_files = [] # 500-1000 lines
huge_files = [] # >1000 lines
large_files: list[tuple[Path, int]] = [] # 500-1000 lines
huge_files: list[tuple[Path, int]] = [] # >1000 lines
total_files = 0

# Check common source file extensions
extensions = {
".py",
".js",
".ts",
".jsx",
".tsx",
".go",
".java",
".rb",
".rs",
".cpp",
".c",
".h",
}
extensions = [
"py",
"js",
"ts",
"jsx",
"tsx",
"go",
"java",
"rb",
"rs",
"cpp",
"c",
"h",
]

for ext in extensions:
pattern = f"**/*{ext}"
try:
# Get git-tracked files (respects .gitignore)
# This fixes issue #245 where .venv files were incorrectly scanned
try:
patterns = [f"*.{ext}" for ext in extensions]
result = safe_subprocess_run(
["git", "ls-files"] + patterns,
cwd=repository.path,
capture_output=True,
text=True,
timeout=30,
check=True,
)
tracked_files = [f for f in result.stdout.strip().split("\n") if f]
except Exception:
# Fallback for non-git repos: use glob (less accurate)
tracked_files = []
for ext in extensions:
tracked_files.extend(
str(f.relative_to(repository.path))
for f in repository.path.rglob(f"*.{ext}")
if f.is_file()
)

for file_path in repository.path.glob(pattern):
if file_path.is_file():
try:
with open(file_path, "r", encoding="utf-8") as f:
lines = len(f.readlines())
total_files += 1

if lines > 1000:
huge_files.append(
(file_path.relative_to(repository.path), lines)
)
elif lines > 500:
large_files.append(
(file_path.relative_to(repository.path), lines)
)
except (OSError, UnicodeDecodeError):
# Skip files we can't read
pass
except Exception:
# Count lines in tracked files
for rel_path in tracked_files:
file_path = repository.path / rel_path
try:
with open(file_path, "r", encoding="utf-8") as f:
lines = len(f.readlines())
total_files += 1

if lines > 1000:
huge_files.append((Path(rel_path), lines))
elif lines > 500:
large_files.append((Path(rel_path), lines))
except (OSError, UnicodeDecodeError):
# Skip files we can't read
pass

if total_files == 0:
Expand Down
176 changes: 176 additions & 0 deletions tests/unit/test_assessors_stub.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

from agentready.assessors.stub_assessors import (
DependencyPinningAssessor,
FileSizeLimitsAssessor,
GitignoreAssessor,
)
from agentready.models.repository import Repository
Expand Down Expand Up @@ -513,3 +514,178 @@ def test_no_languages_detected(self, tmp_path):

# Should still give points if file exists with content
assert finding.score > 0


class TestFileSizeLimitsAssessor:
"""Tests for FileSizeLimitsAssessor - Issue #245 fix."""

def test_respects_gitignore_venv(self, tmp_path):
"""Verify .venv files are NOT counted (fixes issue #245)."""
# Initialize git repository
subprocess.run(["git", "init"], cwd=tmp_path, capture_output=True, check=True)

# Create .gitignore with .venv/
gitignore = tmp_path / ".gitignore"
gitignore.write_text(".venv/\n")

# Create .venv directory with large file (should be IGNORED)
venv_dir = tmp_path / ".venv"
venv_dir.mkdir()
large_venv_file = venv_dir / "large_module.py"
large_venv_file.write_text("x = 1\n" * 2000) # 2000 lines - huge

# Create src directory with small file (should be counted)
src_dir = tmp_path / "src"
src_dir.mkdir()
small_file = src_dir / "main.py"
small_file.write_text("print('hello')\n" * 50) # 50 lines

# Add only the tracked file to git
subprocess.run(["git", "add", "src/main.py"], cwd=tmp_path, capture_output=True)

repo = Repository(
path=tmp_path,
name="test-repo",
url=None,
branch="main",
commit_hash="abc123",
languages={"Python": 1},
total_files=1,
total_lines=50,
)

assessor = FileSizeLimitsAssessor()
finding = assessor.assess(repo)

# Should pass because .venv file is ignored
assert finding.status == "pass"
assert finding.score == 100.0
# Evidence should NOT mention the 2000-line file
assert "2000" not in str(finding.evidence)

def test_no_source_files_returns_not_applicable(self, tmp_path):
"""Test not_applicable when no source files exist."""
# Initialize git repository
subprocess.run(["git", "init"], cwd=tmp_path, capture_output=True, check=True)

# Create only non-source files
readme = tmp_path / "README.md"
readme.write_text("# Test\n")
subprocess.run(["git", "add", "README.md"], cwd=tmp_path, capture_output=True)

repo = Repository(
path=tmp_path,
name="test-repo",
url=None,
branch="main",
commit_hash="abc123",
languages={"Markdown": 1},
total_files=1,
total_lines=1,
)

assessor = FileSizeLimitsAssessor()
finding = assessor.assess(repo)

assert finding.status == "not_applicable"

def test_huge_files_detected(self, tmp_path):
"""Test that files >1000 lines are flagged."""
# Initialize git repository
subprocess.run(["git", "init"], cwd=tmp_path, capture_output=True, check=True)

# Create a huge file
huge_file = tmp_path / "huge_module.py"
huge_file.write_text("x = 1\n" * 1500) # 1500 lines
subprocess.run(
["git", "add", "huge_module.py"], cwd=tmp_path, capture_output=True
)

repo = Repository(
path=tmp_path,
name="test-repo",
url=None,
branch="main",
commit_hash="abc123",
languages={"Python": 1},
total_files=1,
total_lines=1500,
)

assessor = FileSizeLimitsAssessor()
finding = assessor.assess(repo)

assert finding.status == "fail"
assert finding.score < 70
assert "1500" in str(finding.evidence) or ">1000" in str(finding.evidence)

def test_small_files_pass(self, tmp_path):
"""Test that all files <500 lines gives perfect score."""
# Initialize git repository
subprocess.run(["git", "init"], cwd=tmp_path, capture_output=True, check=True)

# Create small files
for i in range(5):
small_file = tmp_path / f"module_{i}.py"
small_file.write_text("x = 1\n" * 100) # 100 lines each
subprocess.run(
["git", "add", f"module_{i}.py"], cwd=tmp_path, capture_output=True
)

repo = Repository(
path=tmp_path,
name="test-repo",
url=None,
branch="main",
commit_hash="abc123",
languages={"Python": 5},
total_files=5,
total_lines=500,
)

assessor = FileSizeLimitsAssessor()
finding = assessor.assess(repo)

assert finding.status == "pass"
assert finding.score == 100.0
assert "All 5 source files are <500 lines" in str(finding.evidence)

def test_respects_gitignore_node_modules(self, tmp_path):
"""Verify node_modules files are NOT counted."""
# Initialize git repository
subprocess.run(["git", "init"], cwd=tmp_path, capture_output=True, check=True)

# Create .gitignore with node_modules/
gitignore = tmp_path / ".gitignore"
gitignore.write_text("node_modules/\n")

# Create node_modules directory with large JS file (should be IGNORED)
nm_dir = tmp_path / "node_modules"
nm_dir.mkdir()
large_js = nm_dir / "large_lib.js"
large_js.write_text("var x = 1;\n" * 3000) # 3000 lines

# Create src directory with small JS file (should be counted)
src_dir = tmp_path / "src"
src_dir.mkdir()
small_js = src_dir / "app.js"
small_js.write_text("console.log('hi');\n" * 30) # 30 lines

subprocess.run(["git", "add", "src/app.js"], cwd=tmp_path, capture_output=True)

repo = Repository(
path=tmp_path,
name="test-repo",
url=None,
branch="main",
commit_hash="abc123",
languages={"JavaScript": 1},
total_files=1,
total_lines=30,
)

assessor = FileSizeLimitsAssessor()
finding = assessor.assess(repo)

assert finding.status == "pass"
assert "3000" not in str(finding.evidence)
Loading