Skip to content

Commit 1ce2eb2

Browse files
authored
Merge pull request #8 from e2720pjk/opencode/issue4-20251224154752
Task 1: Connected fallback functions to analysis workflow,Task 2: Added integration tests,Task 3: Verified data flow
2 parents fc556e2 + dba2f5d commit 1ce2eb2

3 files changed

Lines changed: 284 additions & 15 deletions

File tree

codewiki/src/be/dependency_analyzer/analysis/analysis_service.py

Lines changed: 35 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,10 @@
1515
from codewiki.src.be.dependency_analyzer.analysis.cloning import clone_repository, cleanup_repository, parse_github_url
1616
from codewiki.src.be.dependency_analyzer.models.analysis import AnalysisResult
1717
from codewiki.src.be.dependency_analyzer.models.core import Repository
18+
from codewiki.src.be.dependency_analyzer.utils.patterns import (
19+
find_fallback_entry_points,
20+
find_fallback_connectivity_files
21+
)
1822

1923

2024
logger = logging.getLogger(__name__)
@@ -63,55 +67,73 @@ def analyze_local_repository(
6367
) -> Dict[str, Any]:
6468
"""
6569
Analyze a local repository folder.
66-
70+
6771
Args:
6872
repo_path: Path to local repository folder
6973
max_files: Maximum number of files to analyze
7074
max_entry_points: Maximum fallback entry points
7175
max_connectivity_files: Maximum fallback connectivity files
7276
languages: List of languages to include (e.g., ['python', 'javascript'])
73-
77+
7478
Returns:
75-
Dict with analysis results including nodes and relationships
79+
Dict with analysis results including:
80+
- nodes: Dict of function/call nodes
81+
- relationships: List of call relationships
82+
- entry_points: List of entry point files
83+
- connectivity_files: List of high-connectivity files
84+
- summary: Summary statistics including total_files, total_nodes,
85+
total_relationships, total_entry_points, total_connectivity_files
7686
"""
7787
try:
7888
logger.debug(f"Analyzing local repository at {repo_path}")
79-
89+
8090
# Validate file limits
8191
self._validate_file_limits(max_files, max_entry_points, max_connectivity_files)
82-
92+
8393
# Get repo analyzer to find files
8494
repo_analyzer = RepoAnalyzer()
8595
structure_result = repo_analyzer.analyze_repository_structure(repo_path)
86-
96+
8797
# Extract code files
8898
code_files = self.call_graph_analyzer.extract_code_files(structure_result["file_tree"])
89-
99+
90100
# Filter by languages if specified
91101
if languages:
92102
code_files = [f for f in code_files if f.get("language") in languages]
93-
103+
94104
# Limit number of files
95105
if len(code_files) > max_files:
96106
code_files = code_files[:max_files]
97107
logger.debug(f"Limited analysis to {max_files} files")
98-
108+
99109
logger.debug(f"Analyzing {len(code_files)} files")
100-
110+
111+
# Find fallback entry points
112+
entry_points = find_fallback_entry_points(code_files, max_files=max_entry_points)
113+
logger.debug(f"Found {len(entry_points)} fallback entry points")
114+
115+
# Find fallback connectivity files
116+
connectivity_files = find_fallback_connectivity_files(code_files, max_files=max_connectivity_files)
117+
logger.debug(f"Found {len(connectivity_files)} fallback connectivity files")
118+
101119
# Analyze files
102120
enable_parallel = getattr(self.config, 'enable_parallel_processing', True)
103121
result = self.call_graph_analyzer.analyze_code_files(code_files, repo_path, enable_parallel)
104-
122+
105123
return {
106124
"nodes": result.get("functions", {}),
107125
"relationships": result.get("relationships", []),
126+
"entry_points": entry_points,
127+
"connectivity_files": connectivity_files,
108128
"summary": {
109129
"total_files": len(code_files),
110130
"total_nodes": len(result.get("functions", {})),
111-
"total_relationships": len(result.get("relationships", []))
131+
"total_relationships": len(result.get("relationships", [])),
132+
"total_entry_points": len(entry_points),
133+
"total_connectivity_files": len(connectivity_files)
112134
}
113135
}
114-
136+
115137
except Exception as e:
116138
logger.error(f"Local repository analysis failed: {str(e)}", exc_info=True)
117139
raise RuntimeError(f"Analysis failed: {str(e)}")

test/test_cli_file_limits.py

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
#!/usr/bin/env python3
2+
"""
3+
End-to-end CLI tests for file limit options.
4+
"""
5+
6+
import pytest
7+
import tempfile
8+
from pathlib import Path
9+
from click.testing import CliRunner
10+
from codewiki.cli.main import cli
11+
from codewiki.cli.models.job import GenerationOptions
12+
13+
14+
class TestCLIFileLimits:
15+
"""End-to-end CLI tests for file limit options."""
16+
17+
def setup_method(self):
18+
"""Set up test fixtures."""
19+
self.runner = CliRunner()
20+
21+
def test_cli_accepts_max_files_option(self):
22+
"""Verify CLI accepts --max-files option."""
23+
with tempfile.TemporaryDirectory() as temp_dir:
24+
# Create a minimal test repository
25+
temp_path = Path(temp_dir)
26+
(temp_path / "main.py").write_text("def main(): pass\n")
27+
28+
result = self.runner.invoke(cli, ['generate', '--help'])
29+
assert '--max-files' in result.output
30+
assert '--max-entry-points' in result.output
31+
assert '--max-connectivity-files' in result.output
32+
33+
def test_generation_options_has_file_limits(self):
34+
"""Verify GenerationOptions has file limit fields."""
35+
options = GenerationOptions(
36+
max_files=200,
37+
max_entry_points=10,
38+
max_connectivity_files=15
39+
)
40+
assert options.max_files == 200
41+
assert options.max_entry_points == 10
42+
assert options.max_connectivity_files == 15
43+
44+
def test_generation_options_defaults_match_configured(self):
45+
"""Verify default values match the configured defaults."""
46+
options = GenerationOptions()
47+
assert options.max_files == 100
48+
assert options.max_entry_points == 5
49+
assert options.max_connectivity_files == 10
50+
51+
52+
if __name__ == "__main__":
53+
pytest.main([__file__])

test/test_file_limits.py

Lines changed: 196 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,10 @@
44
"""
55

66
import pytest
7+
import tempfile
8+
import os
9+
from pathlib import Path
10+
from typing import List
711
from codewiki.src.be.dependency_analyzer.analysis.analysis_service import AnalysisService
812

913

@@ -73,13 +77,203 @@ def test_edge_case_equal_values(self):
7377
"""Test edge cases where values are equal."""
7478
# All equal values should be valid
7579
self.analysis_service._validate_file_limits(5, 5, 5)
76-
80+
7781
# max_entry_points equals max_files should be valid
7882
self.analysis_service._validate_file_limits(10, 10, 5)
79-
83+
8084
# max_connectivity_files equals max_files should be valid
8185
self.analysis_service._validate_file_limits(10, 5, 10)
8286

8387

88+
class TestFileLimitIntegration:
89+
"""Integration tests for actual file limiting behavior."""
90+
91+
def setup_method(self):
92+
"""Set up test fixtures."""
93+
self.analysis_service = AnalysisService()
94+
95+
def _create_test_files(self, temp_dir: Path, count: int, prefix: str = "file") -> List[Path]:
96+
"""Create test files in the given directory."""
97+
files = []
98+
for i in range(count):
99+
file_path = temp_dir / f"{prefix}_{i}.py"
100+
file_path.write_text(f"def func_{i}():\n pass\n")
101+
files.append(file_path)
102+
return files
103+
104+
def test_max_files_actually_limits_files(self):
105+
"""Verify that max_files actually limits the number of files analyzed."""
106+
with tempfile.TemporaryDirectory() as temp_dir:
107+
temp_path = Path(temp_dir)
108+
109+
# Create 50 test files
110+
self._create_test_files(temp_path, 50)
111+
112+
# Analyze with max_files=10
113+
result = self.analysis_service.analyze_local_repository(
114+
str(temp_path),
115+
max_files=10,
116+
max_entry_points=5,
117+
max_connectivity_files=10
118+
)
119+
120+
# Verify only 10 files were analyzed
121+
assert result["summary"]["total_files"] == 10
122+
assert len(result["nodes"]) <= 10
123+
124+
def test_max_entry_points_actually_limits_entry_points(self):
125+
"""Verify that max_entry_points actually limits entry points."""
126+
with tempfile.TemporaryDirectory() as temp_dir:
127+
temp_path = Path(temp_dir)
128+
129+
# Create potential entry point files
130+
entry_point_names = ["main.py", "app.py", "index.py", "server.py", "start.py", "run.py"]
131+
for name in entry_point_names:
132+
(temp_path / name).write_text(f"# {name}\ndef main():\n pass\n")
133+
134+
# Create additional non-entry point files
135+
self._create_test_files(temp_path, 20, prefix="other")
136+
137+
# Analyze with max_entry_points=3
138+
result = self.analysis_service.analyze_local_repository(
139+
str(temp_path),
140+
max_files=100,
141+
max_entry_points=3,
142+
max_connectivity_files=10
143+
)
144+
145+
# Verify entry points are limited
146+
assert len(result["entry_points"]) <= 3
147+
assert result["summary"]["total_entry_points"] == len(result["entry_points"])
148+
149+
# Verify entry_points is a subset of analyzed files
150+
entry_point_paths = {ep["path"] for ep in result["entry_points"]}
151+
assert entry_point_paths
152+
153+
def test_max_connectivity_files_actually_limits_connectivity_files(self):
154+
"""Verify that max_connectivity_files actually limits connectivity files."""
155+
with tempfile.TemporaryDirectory() as temp_dir:
156+
temp_path = Path(temp_dir)
157+
158+
# Create source directories with files
159+
for dir_name in ["src", "lib", "app", "core"]:
160+
dir_path = temp_path / dir_name
161+
dir_path.mkdir()
162+
self._create_test_files(dir_path, 10, prefix=f"{dir_name}_file")
163+
164+
# Create some root files
165+
self._create_test_files(temp_path, 5, prefix="root")
166+
167+
# Analyze with max_connectivity_files=10
168+
result = self.analysis_service.analyze_local_repository(
169+
str(temp_path),
170+
max_files=100,
171+
max_entry_points=5,
172+
max_connectivity_files=10
173+
)
174+
175+
# Verify connectivity files are limited
176+
assert len(result["connectivity_files"]) <= 10
177+
assert result["summary"]["total_connectivity_files"] == len(result["connectivity_files"])
178+
179+
# Verify connectivity_files are from source directories
180+
connectivity_paths = [cf["path"] for cf in result["connectivity_files"]]
181+
assert any("src/" in p or "lib/" in p or "app/" in p or "core/" in p for p in connectivity_paths)
182+
183+
def test_all_limits_respected_together(self):
184+
"""Verify that all three limits are respected when used together."""
185+
with tempfile.TemporaryDirectory() as temp_dir:
186+
temp_path = Path(temp_dir)
187+
188+
# Create entry point files
189+
for name in ["main.py", "app.py", "index.py", "server.py", "start.py"]:
190+
(temp_path / name).write_text(f"# {name}\ndef main():\n pass\n")
191+
192+
# Create source directory files
193+
for dir_name in ["src", "lib"]:
194+
dir_path = temp_path / dir_name
195+
dir_path.mkdir()
196+
self._create_test_files(dir_path, 30, prefix=f"{dir_name}_file")
197+
198+
# Create additional files
199+
self._create_test_files(temp_path, 50, prefix="other")
200+
201+
# Analyze with all three limits
202+
result = self.analysis_service.analyze_local_repository(
203+
str(temp_path),
204+
max_files=50,
205+
max_entry_points=5,
206+
max_connectivity_files=10
207+
)
208+
209+
# Verify all limits are respected
210+
assert result["summary"]["total_files"] <= 50
211+
assert len(result["entry_points"]) <= 5
212+
assert len(result["connectivity_files"]) <= 10
213+
214+
# Verify entry_points and connectivity_files are subsets of analyzed files
215+
all_analyzed_files = set()
216+
for node in result["nodes"]:
217+
if "file_path" in node:
218+
all_analyzed_files.add(node["file_path"])
219+
220+
# Verify validation rules (entry/connectivity counts don't exceed max_files)
221+
assert len(result["entry_points"]) <= result["summary"]["total_files"]
222+
assert len(result["connectivity_files"]) <= result["summary"]["total_files"]
223+
224+
@pytest.mark.parametrize("file_count,limit", [
225+
(10, 10),
226+
(50, 25),
227+
(100, 50),
228+
])
229+
def test_scalability_at_different_scales(self, file_count, limit):
230+
"""Test file limiting works with various project sizes."""
231+
with tempfile.TemporaryDirectory() as temp_dir:
232+
temp_path = Path(temp_dir)
233+
234+
# Create specified number of files
235+
self._create_test_files(temp_path, file_count)
236+
237+
# Analyze with limit
238+
result = self.analysis_service.analyze_local_repository(
239+
str(temp_path),
240+
max_files=limit
241+
)
242+
243+
# Verify result matches expectation
244+
if file_count >= limit:
245+
assert result["summary"]["total_files"] == limit
246+
else:
247+
assert result["summary"]["total_files"] == file_count
248+
249+
def test_backward_compatibility_existing_fields(self):
250+
"""Verify backward compatibility: existing fields are present."""
251+
with tempfile.TemporaryDirectory() as temp_dir:
252+
temp_path = Path(temp_dir)
253+
254+
# Create some test files
255+
self._create_test_files(temp_path, 10)
256+
257+
# Analyze
258+
result = self.analysis_service.analyze_local_repository(
259+
str(temp_path),
260+
max_files=10
261+
)
262+
263+
# Verify existing fields are present
264+
assert "nodes" in result
265+
assert "relationships" in result
266+
assert "summary" in result
267+
assert "total_files" in result["summary"]
268+
assert "total_nodes" in result["summary"]
269+
assert "total_relationships" in result["summary"]
270+
271+
# Verify new fields are present
272+
assert "entry_points" in result
273+
assert "connectivity_files" in result
274+
assert "total_entry_points" in result["summary"]
275+
assert "total_connectivity_files" in result["summary"]
276+
277+
84278
if __name__ == "__main__":
85279
pytest.main([__file__])

0 commit comments

Comments
 (0)