forked from yusufkaraaslan/Skill_Seekers
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathtest_unified_analyzer.py
More file actions
428 lines (334 loc) · 16.3 KB
/
test_unified_analyzer.py
File metadata and controls
428 lines (334 loc) · 16.3 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
"""
Tests for Unified Codebase Analyzer
Tests the unified analyzer that works with:
- GitHub URLs (uses three-stream fetcher)
- Local paths (analyzes directly)
Analysis modes:
- basic: Fast, shallow analysis
- c3x: Deep C3.x analysis
"""
import os
from unittest.mock import Mock, patch
import pytest
from skill_seekers.cli.github_fetcher import CodeStream, DocsStream, InsightsStream, ThreeStreamData
from skill_seekers.cli.unified_codebase_analyzer import AnalysisResult, UnifiedCodebaseAnalyzer
# Skip marker for tests requiring GitHub access
requires_github = pytest.mark.skipif(
not os.environ.get("GITHUB_TOKEN"),
reason="GITHUB_TOKEN not set - skipping tests that require GitHub access",
)
class TestAnalysisResult:
"""Test AnalysisResult data class."""
def test_analysis_result_basic(self):
"""Test basic AnalysisResult creation."""
result = AnalysisResult(
code_analysis={"files": []}, source_type="local", analysis_depth="basic"
)
assert result.code_analysis == {"files": []}
assert result.source_type == "local"
assert result.analysis_depth == "basic"
assert result.github_docs is None
assert result.github_insights is None
def test_analysis_result_with_github(self):
"""Test AnalysisResult with GitHub data."""
result = AnalysisResult(
code_analysis={"files": []},
github_docs={"readme": "# README"},
github_insights={"metadata": {"stars": 1234}},
source_type="github",
analysis_depth="c3x",
)
assert result.github_docs is not None
assert result.github_insights is not None
assert result.source_type == "github"
class TestURLDetection:
"""Test GitHub URL detection."""
def test_is_github_url_https(self):
"""Test detection of HTTPS GitHub URLs."""
analyzer = UnifiedCodebaseAnalyzer()
assert analyzer.is_github_url("https://github.com/facebook/react") is True
def test_is_github_url_ssh(self):
"""Test detection of SSH GitHub URLs."""
analyzer = UnifiedCodebaseAnalyzer()
assert analyzer.is_github_url("git@github.com:facebook/react.git") is True
def test_is_github_url_local_path(self):
"""Test local paths are not detected as GitHub URLs."""
analyzer = UnifiedCodebaseAnalyzer()
assert analyzer.is_github_url("/path/to/local/repo") is False
assert analyzer.is_github_url("./relative/path") is False
def test_is_github_url_other_git(self):
"""Test non-GitHub git URLs are not detected."""
analyzer = UnifiedCodebaseAnalyzer()
assert analyzer.is_github_url("https://gitlab.com/user/repo") is False
class TestBasicAnalysis:
"""Test basic analysis mode."""
def test_basic_analysis_local(self, tmp_path):
"""Test basic analysis on local directory."""
# Create test files
(tmp_path / "main.py").write_text("import os\nprint('hello')")
(tmp_path / "utils.js").write_text("function test() {}")
(tmp_path / "README.md").write_text("# README")
analyzer = UnifiedCodebaseAnalyzer()
result = analyzer.analyze(source=str(tmp_path), depth="basic")
assert result.source_type == "local"
assert result.analysis_depth == "basic"
assert result.code_analysis["analysis_type"] == "basic"
assert len(result.code_analysis["files"]) >= 3
def test_list_files(self, tmp_path):
"""Test file listing."""
(tmp_path / "file1.py").write_text("code")
(tmp_path / "file2.js").write_text("code")
(tmp_path / "subdir").mkdir()
(tmp_path / "subdir" / "file3.ts").write_text("code")
analyzer = UnifiedCodebaseAnalyzer()
files = analyzer.list_files(tmp_path)
assert len(files) == 3
paths = [f["path"] for f in files]
assert "file1.py" in paths
assert "file2.js" in paths
assert "subdir/file3.ts" in paths
def test_get_directory_structure(self, tmp_path):
"""Test directory structure extraction."""
(tmp_path / "src").mkdir()
(tmp_path / "src" / "main.py").write_text("code")
(tmp_path / "tests").mkdir()
(tmp_path / "README.md").write_text("# README")
analyzer = UnifiedCodebaseAnalyzer()
structure = analyzer.get_directory_structure(tmp_path)
assert structure["type"] == "directory"
assert len(structure["children"]) >= 3
child_names = [c["name"] for c in structure["children"]]
assert "src" in child_names
assert "tests" in child_names
assert "README.md" in child_names
def test_extract_imports_python(self, tmp_path):
"""Test Python import extraction."""
(tmp_path / "main.py").write_text("""
import os
import sys
from pathlib import Path
from typing import List, Dict
def main():
pass
""")
analyzer = UnifiedCodebaseAnalyzer()
imports = analyzer.extract_imports(tmp_path)
assert ".py" in imports
python_imports = imports[".py"]
assert any("import os" in imp for imp in python_imports)
assert any("from pathlib import Path" in imp for imp in python_imports)
def test_extract_imports_javascript(self, tmp_path):
"""Test JavaScript import extraction."""
(tmp_path / "app.js").write_text("""
import React from 'react';
import { useState } from 'react';
const fs = require('fs');
function App() {}
""")
analyzer = UnifiedCodebaseAnalyzer()
imports = analyzer.extract_imports(tmp_path)
assert ".js" in imports
js_imports = imports[".js"]
assert any("import React" in imp for imp in js_imports)
def test_find_entry_points(self, tmp_path):
"""Test entry point detection."""
(tmp_path / "main.py").write_text("print('hello')")
(tmp_path / "setup.py").write_text("from setuptools import setup")
(tmp_path / "package.json").write_text('{"name": "test"}')
analyzer = UnifiedCodebaseAnalyzer()
entry_points = analyzer.find_entry_points(tmp_path)
assert "main.py" in entry_points
assert "setup.py" in entry_points
assert "package.json" in entry_points
def test_compute_statistics(self, tmp_path):
"""Test statistics computation."""
(tmp_path / "file1.py").write_text("a" * 100)
(tmp_path / "file2.py").write_text("b" * 200)
(tmp_path / "file3.js").write_text("c" * 150)
analyzer = UnifiedCodebaseAnalyzer()
stats = analyzer.compute_statistics(tmp_path)
assert stats["total_files"] == 3
assert stats["total_size_bytes"] == 450 # 100 + 200 + 150
assert stats["file_types"][".py"] == 2
assert stats["file_types"][".js"] == 1
assert stats["languages"]["Python"] == 2
assert stats["languages"]["JavaScript"] == 1
class TestC3xAnalysis:
"""Test C3.x analysis mode."""
def test_c3x_analysis_local(self, tmp_path):
"""Test C3.x analysis on local directory with actual components."""
# Create a test file that C3.x can analyze
(tmp_path / "main.py").write_text("import os\nprint('hello')")
analyzer = UnifiedCodebaseAnalyzer()
result = analyzer.analyze(source=str(tmp_path), depth="c3x")
assert result.source_type == "local"
assert result.analysis_depth == "c3x"
assert result.code_analysis["analysis_type"] == "c3x"
# Check C3.x components are populated (not None)
assert "c3_1_patterns" in result.code_analysis
assert "c3_2_examples" in result.code_analysis
assert "c3_3_guides" in result.code_analysis
assert "c3_4_configs" in result.code_analysis
assert "c3_7_architecture" in result.code_analysis
# C3.x components should be lists (may be empty if analysis didn't find anything)
assert isinstance(result.code_analysis["c3_1_patterns"], list)
assert isinstance(result.code_analysis["c3_2_examples"], list)
assert isinstance(result.code_analysis["c3_3_guides"], list)
assert isinstance(result.code_analysis["c3_4_configs"], list)
assert isinstance(result.code_analysis["c3_7_architecture"], list)
def test_c3x_includes_basic_analysis(self, tmp_path):
"""Test that C3.x includes all basic analysis data."""
(tmp_path / "main.py").write_text("code")
analyzer = UnifiedCodebaseAnalyzer()
result = analyzer.analyze(source=str(tmp_path), depth="c3x")
# Should include basic analysis fields
assert "files" in result.code_analysis
assert "structure" in result.code_analysis
assert "imports" in result.code_analysis
assert "entry_points" in result.code_analysis
assert "statistics" in result.code_analysis
class TestGitHubAnalysis:
"""Test GitHub repository analysis."""
@requires_github
@patch("skill_seekers.cli.unified_codebase_analyzer.GitHubThreeStreamFetcher")
def test_analyze_github_basic(self, mock_fetcher_class, tmp_path):
"""Test basic analysis of GitHub repository."""
# Mock three-stream fetcher
mock_fetcher = Mock()
mock_fetcher_class.return_value = mock_fetcher
# Create mock streams
code_stream = CodeStream(directory=tmp_path, files=[tmp_path / "main.py"])
docs_stream = DocsStream(readme="# README", contributing=None, docs_files=[])
insights_stream = InsightsStream(
metadata={"stars": 1234}, common_problems=[], known_solutions=[], top_labels=[]
)
three_streams = ThreeStreamData(code_stream, docs_stream, insights_stream)
mock_fetcher.fetch.return_value = three_streams
# Create test file in tmp_path
(tmp_path / "main.py").write_text("print('hello')")
analyzer = UnifiedCodebaseAnalyzer()
result = analyzer.analyze(
source="https://github.com/test/repo", depth="basic", fetch_github_metadata=True
)
assert result.source_type == "github"
assert result.analysis_depth == "basic"
assert result.github_docs is not None
assert result.github_insights is not None
assert result.github_docs["readme"] == "# README"
assert result.github_insights["metadata"]["stars"] == 1234
@requires_github
@patch("skill_seekers.cli.unified_codebase_analyzer.GitHubThreeStreamFetcher")
def test_analyze_github_c3x(self, mock_fetcher_class, tmp_path):
"""Test C3.x analysis of GitHub repository."""
# Mock three-stream fetcher
mock_fetcher = Mock()
mock_fetcher_class.return_value = mock_fetcher
code_stream = CodeStream(directory=tmp_path, files=[])
docs_stream = DocsStream(readme="# README", contributing=None, docs_files=[])
insights_stream = InsightsStream(
metadata={}, common_problems=[], known_solutions=[], top_labels=[]
)
three_streams = ThreeStreamData(code_stream, docs_stream, insights_stream)
mock_fetcher.fetch.return_value = three_streams
(tmp_path / "main.py").write_text("code")
analyzer = UnifiedCodebaseAnalyzer()
result = analyzer.analyze(source="https://github.com/test/repo", depth="c3x")
assert result.analysis_depth == "c3x"
assert result.code_analysis["analysis_type"] == "c3x"
@requires_github
@patch("skill_seekers.cli.unified_codebase_analyzer.GitHubThreeStreamFetcher")
def test_analyze_github_without_metadata(self, mock_fetcher_class, tmp_path):
"""Test GitHub analysis without fetching metadata."""
mock_fetcher = Mock()
mock_fetcher_class.return_value = mock_fetcher
code_stream = CodeStream(directory=tmp_path, files=[])
docs_stream = DocsStream(readme=None, contributing=None, docs_files=[])
insights_stream = InsightsStream(
metadata={}, common_problems=[], known_solutions=[], top_labels=[]
)
three_streams = ThreeStreamData(code_stream, docs_stream, insights_stream)
mock_fetcher.fetch.return_value = three_streams
(tmp_path / "main.py").write_text("code")
analyzer = UnifiedCodebaseAnalyzer()
result = analyzer.analyze(
source="https://github.com/test/repo", depth="basic", fetch_github_metadata=False
)
# Should not include GitHub docs/insights
assert result.github_docs is None
assert result.github_insights is None
class TestErrorHandling:
"""Test error handling."""
def test_invalid_depth_mode(self, tmp_path):
"""Test invalid depth mode raises error."""
(tmp_path / "main.py").write_text("code")
analyzer = UnifiedCodebaseAnalyzer()
with pytest.raises(ValueError, match="Unknown depth"):
analyzer.analyze(source=str(tmp_path), depth="invalid")
def test_nonexistent_directory(self):
"""Test nonexistent directory raises error."""
analyzer = UnifiedCodebaseAnalyzer()
with pytest.raises(FileNotFoundError):
analyzer.analyze(source="/nonexistent/path", depth="basic")
def test_file_instead_of_directory(self, tmp_path):
"""Test analyzing a file instead of directory raises error."""
test_file = tmp_path / "file.py"
test_file.write_text("code")
analyzer = UnifiedCodebaseAnalyzer()
with pytest.raises(NotADirectoryError):
analyzer.analyze(source=str(test_file), depth="basic")
class TestTokenHandling:
"""Test GitHub token handling."""
@requires_github
@patch.dict("os.environ", {"GITHUB_TOKEN": "test_token"})
@patch("skill_seekers.cli.unified_codebase_analyzer.GitHubThreeStreamFetcher")
def test_github_token_from_env(self, mock_fetcher_class, tmp_path):
"""Test GitHub token loaded from environment."""
mock_fetcher = Mock()
mock_fetcher_class.return_value = mock_fetcher
code_stream = CodeStream(directory=tmp_path, files=[])
docs_stream = DocsStream(readme=None, contributing=None, docs_files=[])
insights_stream = InsightsStream(
metadata={}, common_problems=[], known_solutions=[], top_labels=[]
)
three_streams = ThreeStreamData(code_stream, docs_stream, insights_stream)
mock_fetcher.fetch.return_value = three_streams
(tmp_path / "main.py").write_text("code")
analyzer = UnifiedCodebaseAnalyzer()
_result = analyzer.analyze(source="https://github.com/test/repo", depth="basic")
# Verify fetcher was created with token
mock_fetcher_class.assert_called_once()
args = mock_fetcher_class.call_args[0]
assert args[1] == "test_token" # Second arg is github_token
@requires_github
@patch("skill_seekers.cli.unified_codebase_analyzer.GitHubThreeStreamFetcher")
def test_github_token_explicit(self, mock_fetcher_class, tmp_path):
"""Test explicit GitHub token parameter."""
mock_fetcher = Mock()
mock_fetcher_class.return_value = mock_fetcher
code_stream = CodeStream(directory=tmp_path, files=[])
docs_stream = DocsStream(readme=None, contributing=None, docs_files=[])
insights_stream = InsightsStream(
metadata={}, common_problems=[], known_solutions=[], top_labels=[]
)
three_streams = ThreeStreamData(code_stream, docs_stream, insights_stream)
mock_fetcher.fetch.return_value = three_streams
(tmp_path / "main.py").write_text("code")
analyzer = UnifiedCodebaseAnalyzer(github_token="custom_token")
_result = analyzer.analyze(source="https://github.com/test/repo", depth="basic")
mock_fetcher_class.assert_called_once()
args = mock_fetcher_class.call_args[0]
assert args[1] == "custom_token"
class TestIntegration:
"""Integration tests."""
def test_local_to_github_consistency(self, tmp_path):
"""Test that local and GitHub analysis produce consistent structure."""
(tmp_path / "main.py").write_text("import os\nprint('hello')")
(tmp_path / "README.md").write_text("# README")
analyzer = UnifiedCodebaseAnalyzer()
# Analyze as local
local_result = analyzer.analyze(source=str(tmp_path), depth="basic")
# Both should have same core analysis structure
assert "files" in local_result.code_analysis
assert "structure" in local_result.code_analysis
assert "imports" in local_result.code_analysis
assert local_result.code_analysis["analysis_type"] == "basic"