forked from yusufkaraaslan/Skill_Seekers
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathtest_codebase_scraper.py
More file actions
206 lines (157 loc) · 6.67 KB
/
test_codebase_scraper.py
File metadata and controls
206 lines (157 loc) · 6.67 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
#!/usr/bin/env python3
"""
Tests for codebase_scraper.py - Standalone codebase analysis CLI.
Test Coverage:
- Language detection
- Directory exclusion
- File walking
- .gitignore loading
"""
import os
import shutil
import sys
import tempfile
import unittest
from pathlib import Path
# Add src to path
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "src"))
from skill_seekers.cli.codebase_scraper import (
DEFAULT_EXCLUDED_DIRS,
detect_language,
load_gitignore,
should_exclude_dir,
walk_directory,
)
class TestLanguageDetection(unittest.TestCase):
"""Tests for language detection from file extensions"""
def test_python_detection(self):
"""Test Python file detection."""
self.assertEqual(detect_language(Path("test.py")), "Python")
def test_javascript_detection(self):
"""Test JavaScript file detection."""
self.assertEqual(detect_language(Path("test.js")), "JavaScript")
self.assertEqual(detect_language(Path("test.jsx")), "JavaScript")
def test_typescript_detection(self):
"""Test TypeScript file detection."""
self.assertEqual(detect_language(Path("test.ts")), "TypeScript")
self.assertEqual(detect_language(Path("test.tsx")), "TypeScript")
def test_cpp_detection(self):
"""Test C++ file detection."""
self.assertEqual(detect_language(Path("test.cpp")), "C++")
self.assertEqual(detect_language(Path("test.h")), "C++")
self.assertEqual(detect_language(Path("test.hpp")), "C++")
def test_csharp_detection(self):
"""Test C# file detection."""
self.assertEqual(detect_language(Path("test.cs")), "C#")
def test_go_detection(self):
"""Test Go file detection."""
self.assertEqual(detect_language(Path("test.go")), "Go")
def test_rust_detection(self):
"""Test Rust file detection."""
self.assertEqual(detect_language(Path("test.rs")), "Rust")
def test_java_detection(self):
"""Test Java file detection."""
self.assertEqual(detect_language(Path("test.java")), "Java")
def test_ruby_detection(self):
"""Test Ruby file detection."""
self.assertEqual(detect_language(Path("test.rb")), "Ruby")
def test_php_detection(self):
"""Test PHP file detection."""
self.assertEqual(detect_language(Path("test.php")), "PHP")
def test_unknown_language(self):
"""Test unknown file extension."""
self.assertEqual(detect_language(Path("test.swift")), "Unknown")
self.assertEqual(detect_language(Path("test.txt")), "Unknown")
class TestDirectoryExclusion(unittest.TestCase):
"""Tests for directory exclusion logic"""
def test_node_modules_excluded(self):
"""Test that node_modules is excluded."""
self.assertTrue(should_exclude_dir("node_modules", DEFAULT_EXCLUDED_DIRS))
def test_venv_excluded(self):
"""Test that venv is excluded."""
self.assertTrue(should_exclude_dir("venv", DEFAULT_EXCLUDED_DIRS))
def test_git_excluded(self):
"""Test that .git is excluded."""
self.assertTrue(should_exclude_dir(".git", DEFAULT_EXCLUDED_DIRS))
def test_normal_dir_not_excluded(self):
"""Test that normal directories are not excluded."""
self.assertFalse(should_exclude_dir("src", DEFAULT_EXCLUDED_DIRS))
self.assertFalse(should_exclude_dir("tests", DEFAULT_EXCLUDED_DIRS))
class TestDirectoryWalking(unittest.TestCase):
"""Tests for directory walking functionality"""
def setUp(self):
"""Set up test environment"""
self.temp_dir = tempfile.mkdtemp()
self.root = Path(self.temp_dir)
def tearDown(self):
"""Clean up test environment"""
shutil.rmtree(self.temp_dir, ignore_errors=True)
def test_walk_empty_directory(self):
"""Test walking empty directory."""
files = walk_directory(self.root)
self.assertEqual(len(files), 0)
def test_walk_with_python_files(self):
"""Test walking directory with Python files."""
# Create test files
(self.root / "test1.py").write_text('print("test")')
(self.root / "test2.py").write_text('print("test2")')
(self.root / "readme.txt").write_text("readme")
files = walk_directory(self.root)
# Should only find Python files
self.assertEqual(len(files), 2)
self.assertTrue(all(f.suffix == ".py" for f in files))
def test_walk_excludes_node_modules(self):
"""Test that node_modules directory is excluded."""
# Create test files
(self.root / "test.py").write_text("test")
# Create node_modules with files
node_modules = self.root / "node_modules"
node_modules.mkdir()
(node_modules / "package.js").write_text("test")
files = walk_directory(self.root)
# Should only find root test.py, not package.js
self.assertEqual(len(files), 1)
self.assertEqual(files[0].name, "test.py")
def test_walk_with_subdirectories(self):
"""Test walking nested directory structure."""
# Create nested structure
src_dir = self.root / "src"
src_dir.mkdir()
(src_dir / "module.py").write_text("test")
tests_dir = self.root / "tests"
tests_dir.mkdir()
(tests_dir / "test_module.py").write_text("test")
files = walk_directory(self.root)
# Should find both files
self.assertEqual(len(files), 2)
filenames = [f.name for f in files]
self.assertIn("module.py", filenames)
self.assertIn("test_module.py", filenames)
class TestGitignoreLoading(unittest.TestCase):
"""Tests for .gitignore loading"""
def setUp(self):
"""Set up test environment"""
self.temp_dir = tempfile.mkdtemp()
self.root = Path(self.temp_dir)
def tearDown(self):
"""Clean up test environment"""
shutil.rmtree(self.temp_dir, ignore_errors=True)
def test_no_gitignore(self):
"""Test behavior when no .gitignore exists."""
spec = load_gitignore(self.root)
# Should return None when no .gitignore found
self.assertIsNone(spec)
def test_load_gitignore(self):
"""Test loading valid .gitignore file."""
# Create .gitignore
gitignore_path = self.root / ".gitignore"
gitignore_path.write_text("*.log\ntemp/\n")
spec = load_gitignore(self.root)
# Should successfully load pathspec (if pathspec is installed)
# If pathspec is not installed, spec will be None
if spec is not None:
# Verify it's a PathSpec object
self.assertIsNotNone(spec)
if __name__ == "__main__":
# Run tests with verbose output
unittest.main(verbosity=2)