e2720pjk · opencode-agent · Jan 5, 2026 · Jan 6, 2026 · Dec 27, 2025 · Jan 4, 2026
diff --git a/.gitignore b/.gitignore
@@ -38,7 +38,9 @@ MANIFEST
 htmlcov/
 .tox/
 .hypothesis/
+# Ignore tests/ directory but allow A/B tests
 tests/
+!tests/ab_testing/
 
 # Jupyter
 *.ipynb

diff --git a/codewiki/cli/commands/config.py b/codewiki/cli/commands/config.py
@@ -57,6 +57,12 @@ def config_group():
     default=None,
     help="Maximum tokens per leaf module (default: 16000, range: 500-100000)",
 )
+@click.option(
+    "--cache-size",
+    type=click.IntRange(min=100, max=10000),
+    default=None,
+    help="LLM cache size - number of cached prompts (default: 1000, range: 100-10000)",
+)
 def config_set(
     api_key: Optional[str],
     base_url: Optional[str],
@@ -67,6 +73,7 @@ def config_set(
     concurrency_limit: Optional[int],
     max_tokens_per_module: Optional[int],
     max_tokens_per_leaf: Optional[int],
+    cache_size: Optional[int],
 ):
     """
     Set configuration values for CodeWiki.
@@ -100,6 +107,7 @@ def config_set(
                 concurrency_limit is not None,
                 max_tokens_per_module is not None,
                 max_tokens_per_leaf is not None,
+                cache_size is not None,
             ]
         ):
             click.echo("No options provided. Use --help for usage information.")
@@ -135,6 +143,9 @@ def config_set(
         if max_tokens_per_leaf is not None:
             validated_data["max_tokens_per_leaf"] = max_tokens_per_leaf
 
+        if cache_size is not None:
+            validated_data["cache_size"] = cache_size
+
         # Create config manager and save
         manager = ConfigManager()
         manager.load()  # Load existing config if present
@@ -149,6 +160,7 @@ def config_set(
             concurrency_limit=validated_data.get("concurrency_limit"),
             max_tokens_per_module=validated_data.get("max_tokens_per_module"),
             max_tokens_per_leaf=validated_data.get("max_tokens_per_leaf"),
+            cache_size=validated_data.get("cache_size"),
         )
 
         # Display success messages
@@ -197,6 +209,9 @@ def config_set(
         if max_tokens_per_leaf is not None:
             click.secho(f"✓ Max tokens per leaf: {max_tokens_per_leaf}", fg="green")
 
+        if cache_size is not None:
+            click.secho(f"✓ Cache size: {cache_size}", fg="green")
+
         click.echo("\n" + click.style("Configuration updated successfully.", fg="green", bold=True))
 
     except ConfigurationError as e:

diff --git a/codewiki/cli/config_manager.py b/codewiki/cli/config_manager.py
@@ -93,6 +93,7 @@ def save(
         concurrency_limit: Optional[int] = None,
         max_tokens_per_module: Optional[int] = None,
         max_tokens_per_leaf: Optional[int] = None,
+        cache_size: Optional[int] = None,
     ):
         """
         Save configuration to file and keyring.
@@ -108,6 +109,7 @@ def save(
             concurrency_limit: Maximum concurrent API calls (1-10)
             max_tokens_per_module: Maximum tokens per module
             max_tokens_per_leaf: Maximum tokens per leaf module
+            cache_size: LLM cache size (number of cached prompts)
         """
         # Ensure config directory exists
         try:
@@ -150,6 +152,9 @@ def save(
         if max_tokens_per_leaf is not None:
             self._config.max_tokens_per_leaf = max_tokens_per_leaf
 
+        if cache_size is not None:
+            self._config.cache_size = cache_size
+
         # Validate configuration
         self._config.validate()
 

diff --git a/codewiki/cli/models/config.py b/codewiki/cli/models/config.py
@@ -20,19 +20,83 @@ class Configuration:
     """
     CodeWiki configuration data model.
 
+    This configuration represents persistent user settings stored in ~/.codewiki/config.json.
+    These settings are converted to backend Config class when running documentation generation.
+
     Attributes:
-        base_url: LLM API base URL
+        base_url: LLM API base URL (e.g., https://api.anthropic.com)
+                  Required for all LLM operations.
+
         main_model: Primary model for documentation generation
+                   (e.g., claude-sonnet-4, gpt-4o)
+                   This model generates most documentation content.
+
         cluster_model: Model for module clustering
+                      Recommend top-tier model for better clustering quality
+                      (e.g., claude-sonnet-4, gpt-4o)
+                      Used only for module organization, not documentation generation.
+
         fallback_model: Fallback model for documentation generation
-        default_output: Default output directory
+                        (e.g., glm-4p5, gpt-4-turbo)
+                        Used when main model fails or is unavailable.
+                        Default: glm-4p5
+
+        default_output: Default output directory for generated docs
+                      Relative path or absolute path
+                      Default: "docs"
+
         max_files: Maximum number of files to analyze
+                   Range: 1-5000
+                   Default: 100
+                   Limits analysis to prevent OOM on large repositories
+                   Higher values = more comprehensive analysis but slower and more memory
+
         max_entry_points: Maximum fallback entry points
+                          Range: 1-max_files
+                          Default: 5
+                          Number of entry files to identify when no obvious entry point exists
+                          Used for repository structure analysis
+                          Higher values = more entry points detected but potentially irrelevant
+
         max_connectivity_files: Maximum fallback connectivity files
-        max_tokens_per_module: Maximum tokens per module (keeps default)
-        max_tokens_per_leaf: Maximum tokens per leaf module (keeps default)
-        enable_parallel_processing: Enable parallel processing
+                               Range: 1-max_files
+                               Default: 10
+                               Number of high-connectivity files to identify
+                               Used for dependency graph construction
+                               Higher values = more nodes in dependency graph but slower analysis
+
+        max_tokens_per_module: Maximum tokens per module
+                              Range: 1000-200000
+                              Default: 36369
+                              Controls module clustering and documentation generation size
+                              Higher values = larger modules with more content but potentially less focused
+
+        max_tokens_per_leaf: Maximum tokens per leaf module
+                            Range: 500-100000
+                            Default: 16000
+                            Controls individual documentation file size
+                            Higher values = longer documentation files but potentially overwhelming
+
+        enable_parallel_processing: Enable parallel processing of leaf modules
+                                 Type: boolean
+                                 Default: True
+                                 Improves performance on multi-core systems
+                                 Set to False on systems with limited CPU or memory
+                                 Parallel processing uses ThreadPoolExecutor with configurable workers
+
         concurrency_limit: Maximum concurrent API calls
+                         Range: 1-10
+                         Default: 5
+                         Controls parallelism for LLM API calls
+                         Higher values = faster documentation generation but higher API load
+                         Consider API rate limits and system resources when adjusting
+
+        cache_size: LLM cache size (number of cached prompts)
+                    Range: 100-10000
+                    Default: 1000
+                    Controls memory usage and cache hit rate for LLM prompts
+                    Higher values = more cache hits but higher memory usage
+                    Adjust based on available system memory
     """
 
     base_url: str
@@ -47,6 +111,7 @@ class Configuration:
     max_tokens_per_leaf: int = 16000  # Keep default as requested
     enable_parallel_processing: bool = True
     concurrency_limit: int = 5
+    cache_size: int = 1000
 
     def validate(self):
         """
@@ -60,6 +125,31 @@ def validate(self):
         validate_model_name(self.cluster_model)
         validate_model_name(self.fallback_model)
 
+        if not (1 <= self.max_files <= 5000):
+            raise ValueError(f"max_files must be between 1 and 5000, got {self.max_files}")
+        if not (1 <= self.max_entry_points <= self.max_files):
+            raise ValueError(
+                f"max_entry_points must be between 1 and max_files ({self.max_files}), got {self.max_entry_points}"
+            )
+        if not (1 <= self.max_connectivity_files <= self.max_files):
+            raise ValueError(
+                f"max_connectivity_files must be between 1 and max_files ({self.max_files}), got {self.max_connectivity_files}"
+            )
+        if not (1000 <= self.max_tokens_per_module <= 200000):
+            raise ValueError(
+                f"max_tokens_per_module must be between 1000 and 200000, got {self.max_tokens_per_module}"
+            )
+        if not (500 <= self.max_tokens_per_leaf <= 100000):
+            raise ValueError(
+                f"max_tokens_per_leaf must be between 500 and 100000, got {self.max_tokens_per_leaf}"
+            )
+        if not (1 <= self.concurrency_limit <= 10):
+            raise ValueError(
+                f"concurrency_limit must be between 1 and 10, got {self.concurrency_limit}"
+            )
+        if not (100 <= self.cache_size <= 10000):
+            raise ValueError(f"cache_size must be between 100 and 10000, got {self.cache_size}")
+
     def to_dict(self) -> dict:
         """Convert to dictionary."""
         return asdict(self)
@@ -88,6 +178,7 @@ def from_dict(cls, data: dict) -> "Configuration":
             max_tokens_per_leaf=data.get("max_tokens_per_leaf", 16000),
             enable_parallel_processing=data.get("enable_parallel_processing", True),
             concurrency_limit=data.get("concurrency_limit", 5),
+            cache_size=data.get("cache_size", 1000),
         )
 
     def is_complete(self) -> bool:
@@ -119,6 +210,7 @@ def to_backend_config(self, repo_path: str, output_dir: str, api_key: str):
             max_connectivity_files=self.max_connectivity_files,
             enable_parallel_processing=self.enable_parallel_processing,
             concurrency_limit=self.concurrency_limit,
+            cache_size=self.cache_size,
         )
 
         return Config.from_cli(

diff --git a/codewiki/cli/models/job.py b/codewiki/cli/models/job.py
@@ -32,6 +32,7 @@ class AnalysisOptions:
     concurrency_limit: int = 5
     enable_llm_cache: bool = True
     agent_retries: int = 3
+    cache_size: int = 1000
 
 
 @dataclass

diff --git a/codewiki/src/be/agent_orchestrator.py b/codewiki/src/be/agent_orchestrator.py
@@ -1,62 +1,32 @@
 from pydantic_ai import Agent
 
-# import logfire
-import logging
 import os
 import traceback
 from typing import Dict, List, Any
 
-# Configure logging and monitoring
-
-logger = logging.getLogger(__name__)
-
-# try:
-#     # Configure logfire with environment variables for Docker compatibility
-#     logfire_token = os.getenv('LOGFIRE_TOKEN')
-#     logfire_project = os.getenv('LOGFIRE_PROJECT_NAME', 'default')
-#     logfire_service = os.getenv('LOGFIRE_SERVICE_NAME', 'default')
-
-#     if logfire_token:
-#         # Configure with explicit token (for Docker)
-#         logfire.configure(
-#             token=logfire_token,
-#             project_name=logfire_project,
-#             service_name=logfire_service,
-#         )
-#     else:
-#         # Use default configuration (for local development with logfire auth)
-#         logfire.configure(
-#             project_name=logfire_project,
-#             service_name=logfire_service,
-#         )
-
-#     logfire.instrument_pydantic_ai()
-#     logger.debug(f"Logfire configured successfully for project: {logfire_project}")
-
-# except Exception as e:
-#     logger.warning(f"Failed to configure logfire: {e}")
-
-# Local imports (placed after logging configuration)
-from codewiki.src.be.agent_tools.deps import CodeWikiDeps  # noqa: E402
-from codewiki.src.be.agent_tools.read_code_components import read_code_components_tool  # noqa: E402
-from codewiki.src.be.agent_tools.str_replace_editor import str_replace_editor_tool  # noqa: E402
-from codewiki.src.be.agent_tools.generate_sub_module_documentations import (  # noqa: E402
+from codewiki.src.be.logging_config import get_logger
+from codewiki.src.be.agent_tools.deps import CodeWikiDeps
+from codewiki.src.be.agent_tools.read_code_components import read_code_components_tool
+from codewiki.src.be.agent_tools.str_replace_editor import str_replace_editor_tool
+from codewiki.src.be.agent_tools.generate_sub_module_documentations import (
     generate_sub_module_documentation_tool,
 )
-from codewiki.src.be.llm_services import create_fallback_models  # noqa: E402
-from codewiki.src.be.prompt_template import (  # noqa: E402
+from codewiki.src.be.llm_services import create_fallback_models
+from codewiki.src.be.prompt_template import (
     SYSTEM_PROMPT,
     LEAF_SYSTEM_PROMPT,
     format_user_prompt,
 )
-from codewiki.src.be.utils import is_complex_module  # noqa: E402
-from codewiki.src.config import (  # noqa: E402
+from codewiki.src.be.utils import is_complex_module
+from codewiki.src.config import (
     Config,
     MODULE_TREE_FILENAME,
     OVERVIEW_FILENAME,
 )
-from codewiki.src.utils import file_manager  # noqa: E402
-from codewiki.src.be.dependency_analyzer.models.core import Node  # noqa: E402
+from codewiki.src.utils import file_manager
+from codewiki.src.be.dependency_analyzer.models.core import Node
+
+logger = get_logger(__name__)
 
 
 class AgentOrchestrator:

diff --git a/codewiki/src/be/cluster_modules.py b/codewiki/src/be/cluster_modules.py
@@ -1,16 +1,16 @@
 from typing import List, Dict, Any, Optional
 from collections import defaultdict
-import logging
 import traceback
 import ast
 
-logger = logging.getLogger(__name__)
+from codewiki.src.be.logging_config import get_logger
+from codewiki.src.be.dependency_analyzer.models.core import Node
+from codewiki.src.be.llm_services import call_llm
+from codewiki.src.be.utils import count_tokens
+from codewiki.src.config import MAX_TOKEN_PER_MODULE, Config
+from codewiki.src.be.prompt_template import format_cluster_prompt
 
-from codewiki.src.be.dependency_analyzer.models.core import Node  # noqa: E402
-from codewiki.src.be.llm_services import call_llm  # noqa: E402
-from codewiki.src.be.utils import count_tokens  # noqa: E402
-from codewiki.src.config import MAX_TOKEN_PER_MODULE, Config  # noqa: E402
-from codewiki.src.be.prompt_template import format_cluster_prompt  # noqa: E402
+logger = get_logger(__name__)
 
 
 def format_potential_core_components(

diff --git a/codewiki/src/be/dependency_analyzer/analysis/call_graph_analyzer.py b/codewiki/src/be/dependency_analyzer/analysis/call_graph_analyzer.py
@@ -8,6 +8,7 @@
 
 from typing import Dict, List
 import logging
+import os
 import traceback
 from pathlib import Path
 from concurrent.futures import ThreadPoolExecutor, as_completed
@@ -20,6 +21,8 @@
 
 logger = logging.getLogger(__name__)
 
+MAX_WORKERS = 8
+
 
 class CallGraphAnalyzer:
     def __init__(self):
@@ -72,7 +75,10 @@ def _analyze_parallel(self, code_files: List[Dict], base_dir: str) -> Dict:
         self.call_relationships = []
 
         # Process languages in parallel
-        max_workers = min(4, len(files_by_language))  # Conservative limit
+        max_workers = min(os.cpu_count() or 4, len(files_by_language), MAX_WORKERS)
+        logger.debug(
+            f"Using {max_workers} workers for parallel analysis (CPU cores: {os.cpu_count()}, language groups: {len(files_by_language)})"
+        )
         with ThreadPoolExecutor(max_workers=max_workers) as executor:
             # Submit tasks for each language group
             future_to_language = {