diff --git a/README.md b/README.md
index 3b9cf99..8b0d4fb 100644
--- a/README.md
+++ b/README.md
@@ -1,6 +1,6 @@
 # BaseAgent - SDK 3.0
 
-High-performance autonomous agent for [Term Challenge](https://term.challenge). **Does NOT use term_sdk** - fully autonomous with litellm.
+High-performance autonomous agent for [Term Challenge](https://term.challenge). **Does NOT use term_sdk** - fully autonomous with Chutes API.
 
 ## Installation
 
@@ -36,7 +36,7 @@ my-agent/
 │   │   ├── loop.py       # Main loop
 │   │   └── compaction.py # Context management (MANDATORY)
 │   ├── llm/
-│   │   └── client.py     # LLM client (litellm)
+│   │   └── client.py     # LLM client (Chutes API)
 │   └── tools/
 │       └── ...           # Available tools
 ├── requirements.txt      # Dependencies
@@ -77,13 +77,13 @@ AUTO_COMPACT_THRESHOLD = 0.85
 
 ## Features
 
-### LLM Client (litellm)
+### LLM Client (Chutes API)
 
 ```python
-from src.llm.client import LiteLLMClient
+from src.llm.client import LLMClient
 
-llm = LiteLLMClient(
-    model="openrouter/anthropic/claude-opus-4.5",
+llm = LLMClient(
+    model="moonshotai/Kimi-K2.5-TEE",
     temperature=0.0,
     max_tokens=16384,
 )
@@ -129,7 +129,7 @@ See `src/config/defaults.py`:
 
 ```python
 CONFIG = {
-    "model": "openrouter/anthropic/claude-opus-4.5",
+    "model": "moonshotai/Kimi-K2.5-TEE",
     "max_tokens": 16384,
     "max_iterations": 200,
     "auto_compact_threshold": 0.85,
@@ -142,7 +142,7 @@ CONFIG = {
 
 | Variable | Description |
 |----------|-------------|
-| `OPENROUTER_API_KEY` | OpenRouter API key |
+| `CHUTES_API_KEY` | Chutes API key |
 
 ## Documentation
 
@@ -151,7 +151,7 @@ CONFIG = {
 See [rules/](rules/) for comprehensive guides:
 
 - [Architecture Patterns](rules/02-architecture-patterns.md) - **Mandatory project structure**
-- [LLM Usage Guide](rules/06-llm-usage-guide.md) - **Using litellm**
+- [LLM Usage Guide](rules/06-llm-usage-guide.md) - **Using Chutes API**
 - [Best Practices](rules/05-best-practices.md)
 - [Error Handling](rules/08-error-handling.md)
 
diff --git a/agent.py b/agent.py
index 710edb1..db76ffb 100644
--- a/agent.py
+++ b/agent.py
@@ -3,7 +3,7 @@
 SuperAgent for Term Challenge - Entry Point (SDK 3.0 Compatible).
 
 This agent accepts --instruction from the validator and runs autonomously.
-Uses litellm for LLM calls instead of term_sdk.
+Uses Chutes API for LLM calls instead of term_sdk.
 
 Installation:
     pip install .                    # via pyproject.toml
@@ -16,20 +16,20 @@
 from __future__ import annotations
 
 import argparse
-import sys
-import time
 import os
 import subprocess
+import sys
+import time
 from pathlib import Path
 
 # Add parent to path for imports
 sys.path.insert(0, str(Path(__file__).parent))
 
+
 # Auto-install dependencies if missing
 def ensure_dependencies():
     """Install dependencies if not present."""
     try:
-        import litellm
         import httpx
         import pydantic
     except ImportError:
@@ -37,23 +37,28 @@ def ensure_dependencies():
         agent_dir = Path(__file__).parent
         req_file = agent_dir / "requirements.txt"
         if req_file.exists():
-            subprocess.run([sys.executable, "-m", "pip", "install", "-r", str(req_file), "-q"], check=True)
+            subprocess.run(
+                [sys.executable, "-m", "pip", "install", "-r", str(req_file), "-q"], check=True
+            )
         else:
-            subprocess.run([sys.executable, "-m", "pip", "install", str(agent_dir), "-q"], check=True)
+            subprocess.run(
+                [sys.executable, "-m", "pip", "install", str(agent_dir), "-q"], check=True
+            )
         print("[setup] Dependencies installed", file=sys.stderr)
 
+
 ensure_dependencies()
 
 from src.config.defaults import CONFIG
 from src.core.loop import run_agent_loop
+from src.llm.client import CostLimitExceeded, LLMClient
+from src.output.jsonl import ErrorEvent, emit
 from src.tools.registry import ToolRegistry
-from src.output.jsonl import emit, ErrorEvent
-from src.llm.client import LiteLLMClient, CostLimitExceeded
 
 
 class AgentContext:
     """Minimal context for agent execution (replaces term_sdk.AgentContext)."""
-    
+
     def __init__(self, instruction: str, cwd: str = None):
         self.instruction = instruction
         self.cwd = cwd or os.getcwd()
@@ -61,11 +66,11 @@ def __init__(self, instruction: str, cwd: str = None):
         self.is_done = False
         self.history = []
         self._start_time = time.time()
-    
+
     @property
     def elapsed_secs(self) -> float:
         return time.time() - self._start_time
-    
+
     def shell(self, cmd: str, timeout: int = 120) -> "ShellResult":
         """Execute a shell command."""
         self.step += 1
@@ -86,20 +91,22 @@ def shell(self, cmd: str, timeout: int = 120) -> "ShellResult":
         except Exception as e:
             output = f"[ERROR] {e}"
             exit_code = -1
-        
+
         shell_result = ShellResult(output=output, exit_code=exit_code)
-        self.history.append({
-            "step": self.step,
-            "command": cmd,
-            "output": output[:1000],
-            "exit_code": exit_code,
-        })
+        self.history.append(
+            {
+                "step": self.step,
+                "command": cmd,
+                "output": output[:1000],
+                "exit_code": exit_code,
+            }
+        )
         return shell_result
-    
+
     def done(self):
         """Mark task as complete."""
         self.is_done = True
-    
+
     def log(self, msg: str):
         """Log a message."""
         timestamp = time.strftime("%H:%M:%S")
@@ -108,13 +115,13 @@ def log(self, msg: str):
 
 class ShellResult:
     """Result from shell command."""
-    
+
     def __init__(self, output: str, exit_code: int):
         self.output = output
         self.stdout = output
         self.stderr = ""
         self.exit_code = exit_code
-    
+
     def has(self, text: str) -> bool:
         return text in self.output
 
@@ -129,29 +136,29 @@ def main():
     parser = argparse.ArgumentParser(description="SuperAgent for Term Challenge SDK 3.0")
     parser.add_argument("--instruction", required=True, help="Task instruction from validator")
     args = parser.parse_args()
-    
+
     _log("=" * 60)
-    _log("SuperAgent Starting (SDK 3.0 - litellm)")
+    _log("SuperAgent Starting (SDK 3.0 - Chutes API)")
     _log("=" * 60)
     _log(f"Model: {CONFIG['model']}")
     _log(f"Reasoning effort: {CONFIG.get('reasoning_effort', 'default')}")
     _log(f"Instruction: {args.instruction[:200]}...")
     _log("-" * 60)
-    
+
     # Initialize components
     start_time = time.time()
-    
-    llm = LiteLLMClient(
+
+    llm = LLMClient(
         model=CONFIG["model"],
         temperature=CONFIG.get("temperature"),
         max_tokens=CONFIG.get("max_tokens", 16384),
     )
-    
+
     tools = ToolRegistry()
     ctx = AgentContext(instruction=args.instruction)
-    
+
     _log("Components initialized")
-    
+
     try:
         run_agent_loop(
             llm=llm,
diff --git a/astuces/08-cost-optimization.md b/astuces/08-cost-optimization.md
index 1230292..ae8bd9a 100644
--- a/astuces/08-cost-optimization.md
+++ b/astuces/08-cost-optimization.md
@@ -2,21 +2,20 @@
 
 ## Cost Breakdown
 
-For Claude Sonnet via OpenRouter:
+Typical LLM pricing (varies by model):
 
-| Token Type | Cost per 1M |
-|------------|-------------|
-| Input tokens | $3.00 |
-| Cached input | $0.30 (90% off) |
-| Output tokens | $15.00 |
+| Token Type | Typical Cost per 1M |
+|------------|---------------------|
+| Input tokens | $1.00 - $15.00 |
+| Cached input | 10-50% of input |
+| Output tokens | $2.00 - $60.00 |
 
 For a typical task:
 - 50 turns
 - 100k context average
 - 500 output tokens per turn
 
-**Without optimization**: 50 × 100k × $3/1M = **$15 per task**
-**With 90% caching**: 50 × 100k × $0.30/1M = **$1.50 per task**
+Costs vary significantly by model choice. Kimi K2.5-TEE offers a good balance of performance and cost.
 
 ## Optimization Strategies
 
diff --git a/docs/README.md b/docs/README.md
new file mode 100644
index 0000000..3700151
--- /dev/null
+++ b/docs/README.md
@@ -0,0 +1,125 @@
+# BaseAgent Documentation
+
+> **Professional documentation for the BaseAgent autonomous coding assistant**
+
+BaseAgent is a high-performance autonomous agent designed for the [Term Challenge](https://term.challenge). It leverages LLM-driven decision making with advanced context management and cost optimization techniques.
+
+---
+
+## Table of Contents
+
+### Getting Started
+- [Overview](./overview.md) - What is BaseAgent and core design principles
+- [Installation](./installation.md) - Prerequisites and setup instructions
+- [Quick Start](./quickstart.md) - Your first task in 5 minutes
+
+### Core Concepts
+- [Architecture](./architecture.md) - Technical architecture and system design
+- [Configuration](./configuration.md) - All configuration options explained
+- [Usage Guide](./usage.md) - Command-line interface and options
+
+### Reference
+- [Tools Reference](./tools.md) - Available tools and their parameters
+- [Context Management](./context-management.md) - Token management and compaction
+- [Best Practices](./best-practices.md) - Optimal usage patterns
+
+### LLM Providers
+- [Chutes API Integration](./chutes-integration.md) - Using Chutes as your LLM provider
+
+---
+
+## Quick Navigation
+
+| Document | Description |
+|----------|-------------|
+| [Overview](./overview.md) | High-level introduction and design principles |
+| [Installation](./installation.md) | Step-by-step setup guide |
+| [Quick Start](./quickstart.md) | Get running in minutes |
+| [Architecture](./architecture.md) | Technical deep-dive with diagrams |
+| [Configuration](./configuration.md) | Environment variables and settings |
+| [Usage](./usage.md) | CLI commands and examples |
+| [Tools](./tools.md) | Complete tools reference |
+| [Context Management](./context-management.md) | Memory and token optimization |
+| [Best Practices](./best-practices.md) | Tips for optimal performance |
+| [Chutes Integration](./chutes-integration.md) | Chutes API setup and usage |
+
+---
+
+## Architecture at a Glance
+
+```mermaid
+graph TB
+    subgraph User["User Interface"]
+        CLI["CLI (agent.py)"]
+    end
+    
+    subgraph Core["Core Engine"]
+        Loop["Agent Loop"]
+        Context["Context Manager"]
+        Cache["Prompt Cache"]
+    end
+    
+    subgraph LLM["LLM Layer"]
+        Client["LiteLLM Client"]
+        Provider["Provider (Chutes/OpenRouter)"]
+    end
+    
+    subgraph Tools["Tool System"]
+        Registry["Tool Registry"]
+        Shell["shell_command"]
+        Files["read_file / write_file"]
+        Search["grep_files / list_dir"]
+    end
+    
+    CLI --> Loop
+    Loop --> Context
+    Loop --> Cache
+    Loop --> Client
+    Client --> Provider
+    Loop --> Registry
+    Registry --> Shell
+    Registry --> Files
+    Registry --> Search
+```
+
+---
+
+## Key Features
+
+- **Fully Autonomous** - No user confirmation required; makes decisions independently
+- **LLM-Driven** - All decisions made by the language model, not hardcoded logic
+- **Prompt Caching** - 90%+ cache hit rate for significant cost reduction
+- **Context Management** - Intelligent pruning and compaction for long tasks
+- **Self-Verification** - Automatic validation before task completion
+- **Multi-Provider** - Supports Chutes AI, OpenRouter, and litellm-compatible providers
+
+---
+
+## Project Structure
+
+```
+baseagent/
+├── agent.py                 # Entry point
+├── src/
+│   ├── core/
+│   │   ├── loop.py          # Main agent loop
+│   │   └── compaction.py    # Context management
+│   ├── llm/
+│   │   └── client.py        # LLM client (litellm)
+│   ├── config/
+│   │   └── defaults.py      # Configuration
+│   ├── tools/               # Tool implementations
+│   ├── prompts/
+│   │   └── system.py        # System prompt
+│   └── output/
+│       └── jsonl.py         # JSONL event emission
+├── rules/                   # Development guidelines
+├── astuces/                 # Implementation techniques
+└── docs/                    # This documentation
+```
+
+---
+
+## License
+
+MIT License - See [LICENSE](../LICENSE) for details.
diff --git a/docs/architecture.md b/docs/architecture.md
new file mode 100644
index 0000000..772b5ee
--- /dev/null
+++ b/docs/architecture.md
@@ -0,0 +1,435 @@
+# Technical Architecture
+
+> **Deep dive into BaseAgent's system design, components, and data flow**
+
+## System Overview
+
+BaseAgent follows a modular architecture with clear separation of concerns:
+
+```mermaid
+graph TB
+    subgraph Entry["Entry Layer"]
+        agent["agent.py<br/>CLI Entry Point"]
+    end
+    
+    subgraph Core["Core Layer"]
+        loop["loop.py<br/>Agent Loop"]
+        compact["compaction.py<br/>Context Manager"]
+    end
+    
+    subgraph LLM["LLM Layer"]
+        client["client.py<br/>LiteLLM Client"]
+    end
+    
+    subgraph Config["Configuration"]
+        defaults["defaults.py<br/>Settings"]
+        prompts["system.py<br/>System Prompt"]
+    end
+    
+    subgraph Tools["Tool Layer"]
+        registry["registry.py<br/>Tool Registry"]
+        shell["shell.py"]
+        read["read_file.py"]
+        write["write_file.py"]
+        patch["apply_patch.py"]
+        grep["grep_files.py"]
+        list["list_dir.py"]
+    end
+    
+    subgraph Output["Output Layer"]
+        jsonl["jsonl.py<br/>Event Emitter"]
+    end
+    
+    agent --> loop
+    loop --> compact
+    loop --> client
+    loop --> registry
+    loop --> jsonl
+    client --> defaults
+    loop --> prompts
+    registry --> shell & read & write & patch & grep & list
+    
+    style loop fill:#4CAF50,color:#fff
+    style client fill:#2196F3,color:#fff
+    style compact fill:#FF9800,color:#fff
+```
+
+---
+
+## Component Diagram
+
+```mermaid
+classDiagram
+    class AgentContext {
+        +instruction: str
+        +cwd: str
+        +step: int
+        +is_done: bool
+        +history: List
+        +shell(cmd, timeout) ShellResult
+        +done()
+        +log(msg)
+    }
+    
+    class LiteLLMClient {
+        +model: str
+        +temperature: float
+        +max_tokens: int
+        +cost_limit: float
+        +chat(messages, tools) LLMResponse
+        +get_stats() Dict
+    }
+    
+    class LLMResponse {
+        +text: str
+        +function_calls: List~FunctionCall~
+        +tokens: Dict
+        +has_function_calls() bool
+    }
+    
+    class FunctionCall {
+        +id: str
+        +name: str
+        +arguments: Dict
+    }
+    
+    class ToolRegistry {
+        +tools: Dict
+        +execute(ctx, name, args) ToolResult
+        +get_tools_for_llm() List
+    }
+    
+    class ToolResult {
+        +success: bool
+        +output: str
+        +inject_content: Optional
+    }
+    
+    AgentContext --> LiteLLMClient : uses
+    LiteLLMClient --> LLMResponse : returns
+    LLMResponse --> FunctionCall : contains
+    AgentContext --> ToolRegistry : uses
+    ToolRegistry --> ToolResult : returns
+```
+
+---
+
+## Agent Loop Workflow
+
+The heart of BaseAgent is the agent loop in `src/core/loop.py`:
+
+```mermaid
+flowchart TB
+    Start([Start]) --> Init[Initialize Session]
+    Init --> BuildMsg[Build Initial Messages]
+    BuildMsg --> GetState[Get Terminal State]
+    
+    GetState --> LoopStart{Iteration < Max?}
+    
+    LoopStart -->|Yes| ManageCtx[Manage Context<br/>Prune/Compact if needed]
+    ManageCtx --> ApplyCache[Apply Prompt Caching]
+    ApplyCache --> CallLLM[Call LLM with Tools]
+    
+    CallLLM --> HasCalls{Has Tool Calls?}
+    
+    HasCalls -->|Yes| ResetPending[Reset pending_completion]
+    ResetPending --> ExecTools[Execute Tool Calls]
+    ExecTools --> AddResults[Add Results to Messages]
+    AddResults --> LoopStart
+    
+    HasCalls -->|No| CheckPending{pending_completion?}
+    
+    CheckPending -->|No| SetPending[Set pending_completion = true]
+    SetPending --> InjectVerify[Inject Verification Prompt]
+    InjectVerify --> LoopStart
+    
+    CheckPending -->|Yes| Complete[Task Complete]
+    
+    LoopStart -->|No| Timeout[Max Iterations Reached]
+    
+    Complete --> Emit[Emit turn.completed]
+    Timeout --> Emit
+    Emit --> End([End])
+    
+    style ManageCtx fill:#FF9800,color:#fff
+    style ApplyCache fill:#9C27B0,color:#fff
+    style CallLLM fill:#2196F3,color:#fff
+    style ExecTools fill:#4CAF50,color:#fff
+    style InjectVerify fill:#E91E63,color:#fff
+```
+
+---
+
+## Data Flow
+
+### Request Flow
+
+```mermaid
+sequenceDiagram
+    participant User
+    participant Entry as agent.py
+    participant Loop as loop.py
+    participant Context as compaction.py
+    participant Cache as Prompt Cache
+    participant LLM as LiteLLM Client
+    participant Provider as API Provider
+    participant Tools as Tool Registry
+
+    User->>Entry: --instruction "Create hello.txt"
+    Entry->>Entry: Initialize AgentContext
+    Entry->>Entry: Initialize LiteLLMClient
+    Entry->>Loop: run_agent_loop()
+    
+    Loop->>Loop: Build messages [system, user, state]
+    
+    rect rgb(255, 240, 220)
+        Note over Loop,Provider: Iteration Loop
+        Loop->>Context: manage_context(messages)
+        Context-->>Loop: Managed messages
+        
+        Loop->>Cache: apply_caching(messages)
+        Cache-->>Loop: Cached messages
+        
+        Loop->>LLM: chat(messages, tools)
+        LLM->>Provider: API Request
+        Provider-->>LLM: Response
+        LLM-->>Loop: LLMResponse
+        
+        alt Has tool_calls
+            Loop->>Tools: execute(ctx, tool_name, args)
+            Tools-->>Loop: ToolResult
+            Loop->>Loop: Append to messages
+        end
+    end
+    
+    Loop-->>Entry: Complete
+    Entry-->>User: JSONL output
+```
+
+### Message Structure
+
+Messages accumulate through the session:
+
+```python
+messages = [
+    # 1. System prompt (stable, cached)
+    {"role": "system", "content": SYSTEM_PROMPT},
+    
+    # 2. User instruction
+    {"role": "user", "content": "Create hello.txt with 'Hello World'"},
+    
+    # 3. Initial state
+    {"role": "user", "content": "Current directory:\n```\n...\n```"},
+    
+    # 4. Assistant response with tool calls
+    {
+        "role": "assistant",
+        "content": "Creating the file...",
+        "tool_calls": [
+            {"id": "call_1", "type": "function", "function": {...}}
+        ]
+    },
+    
+    # 5. Tool result
+    {"role": "tool", "tool_call_id": "call_1", "content": "File created"},
+    
+    # ... continues until completion
+]
+```
+
+---
+
+## Module Descriptions
+
+### `src/core/loop.py` - Agent Loop
+
+The main orchestration module that:
+- Initializes the session and emits JSONL events
+- Manages the iterative Observe→Think→Act cycle
+- Applies prompt caching for cost optimization
+- Handles LLM errors with retry logic
+- Triggers self-verification before completion
+
+### `src/core/compaction.py` - Context Manager
+
+Intelligent context management that:
+- Estimates token usage (4 chars ≈ 1 token)
+- Detects context overflow at 85% of usable window
+- Prunes old tool outputs (protects last 40K tokens)
+- Runs AI compaction when pruning is insufficient
+- Preserves critical information through summarization
+
+### `src/llm/client.py` - LLM Client
+
+LiteLLM-based client that:
+- Supports multiple providers (Chutes, OpenRouter, etc.)
+- Tracks token usage and costs
+- Handles tool/function calling format
+- Enforces cost limits
+- Provides usage statistics
+
+### `src/tools/registry.py` - Tool Registry
+
+Centralized tool management that:
+- Registers all available tools
+- Provides tool specs for LLM
+- Executes tools with proper context
+- Handles tool output truncation
+- Manages image injection for `view_image`
+
+### `src/prompts/system.py` - System Prompt
+
+System prompt configuration that:
+- Defines agent personality and behavior
+- Specifies coding guidelines
+- Includes AGENTS.md support
+- Configures autonomous operation mode
+- Provides environment context
+
+### `src/config/defaults.py` - Configuration
+
+Central configuration containing:
+- Model settings (model name, tokens, temperature)
+- Context management thresholds
+- Tool output limits
+- Prompt caching settings
+- Execution limits
+
+---
+
+## Context Management Pipeline
+
+```mermaid
+flowchart LR
+    subgraph Input
+        Msgs[Messages<br/>~150K tokens]
+    end
+    
+    subgraph Detection
+        Est[Estimate Tokens]
+        Check{> 85% of<br/>168K usable?}
+    end
+    
+    subgraph Pruning
+        Scan[Scan backwards]
+        Protect[Protect last 40K<br/>tool tokens]
+        Clear[Clear old outputs]
+    end
+    
+    subgraph Compaction
+        CheckAgain{Still > 85%?}
+        Summarize[AI Summarization]
+        NewMsgs[Compacted Messages]
+    end
+    
+    subgraph Output
+        Result[Managed Messages]
+    end
+    
+    Msgs --> Est --> Check
+    Check -->|No| Result
+    Check -->|Yes| Scan --> Protect --> Clear
+    Clear --> CheckAgain
+    CheckAgain -->|No| Result
+    CheckAgain -->|Yes| Summarize --> NewMsgs --> Result
+```
+
+---
+
+## Tool Execution Flow
+
+```mermaid
+flowchart TB
+    subgraph LLM["LLM Response"]
+        Calls["tool_calls: [<br/>  {name: 'shell_command', args: {command: 'ls'}},<br/>  {name: 'read_file', args: {file_path: 'README.md'}}<br/>]"]
+    end
+    
+    subgraph Registry["Tool Registry"]
+        direction TB
+        Lookup[Lookup Tool]
+        Execute[Execute with Context]
+        Truncate[Truncate Output<br/>max 2500 tokens]
+    end
+    
+    subgraph Tools["Tool Implementations"]
+        Shell[shell_command]
+        Read[read_file]
+        Write[write_file]
+        Patch[apply_patch]
+        Grep[grep_files]
+        List[list_dir]
+    end
+    
+    subgraph Output["Results"]
+        Results["tool results added<br/>to messages"]
+    end
+    
+    Calls --> Lookup
+    Lookup --> Execute
+    Execute --> Shell & Read & Write & Patch & Grep & List
+    Shell & Read & Write & Patch & Grep & List --> Truncate
+    Truncate --> Results
+```
+
+---
+
+## JSONL Event Emission
+
+BaseAgent emits structured JSONL events throughout execution:
+
+```mermaid
+sequenceDiagram
+    participant Loop as Agent Loop
+    participant JSONL as Event Emitter
+    participant stdout as Standard Output
+
+    Loop->>JSONL: emit(ThreadStartedEvent)
+    JSONL->>stdout: {"type": "thread.started", ...}
+    
+    Loop->>JSONL: emit(TurnStartedEvent)
+    JSONL->>stdout: {"type": "turn.started", ...}
+    
+    loop Each Tool Call
+        Loop->>JSONL: emit(ItemStartedEvent)
+        JSONL->>stdout: {"type": "item.started", ...}
+        Loop->>JSONL: emit(ItemCompletedEvent)
+        JSONL->>stdout: {"type": "item.completed", ...}
+    end
+    
+    Loop->>JSONL: emit(TurnCompletedEvent)
+    JSONL->>stdout: {"type": "turn.completed", "usage": {...}}
+```
+
+---
+
+## Error Handling Strategy
+
+```mermaid
+flowchart TB
+    Error[Error Occurs] --> Type{Error Type?}
+    
+    Type -->|CostLimitExceeded| Abort[Emit TurnFailed<br/>Abort Session]
+    
+    Type -->|Authentication| Abort
+    
+    Type -->|Rate Limit| Retry{Attempt < 5?}
+    Retry -->|Yes| Wait[Wait 10s × attempt]
+    Wait --> TryAgain[Retry Request]
+    Retry -->|No| Abort
+    
+    Type -->|Timeout/504| Retry
+    
+    Type -->|Other| Retry
+    
+    TryAgain --> Success{Success?}
+    Success -->|Yes| Continue[Continue Loop]
+    Success -->|No| Retry
+```
+
+---
+
+## Next Steps
+
+- [Configuration Reference](./configuration.md) - All settings explained
+- [Tools Reference](./tools.md) - Detailed tool documentation
+- [Context Management](./context-management.md) - Deep dive into memory management
diff --git a/docs/best-practices.md b/docs/best-practices.md
new file mode 100644
index 0000000..7fa098a
--- /dev/null
+++ b/docs/best-practices.md
@@ -0,0 +1,408 @@
+# Best Practices
+
+> **Strategies for optimal performance, cost efficiency, and reliable results**
+
+## Core Principles
+
+BaseAgent follows these fundamental principles:
+
+1. **Explore First** - Always gather context before acting
+2. **Iterate** - Never try to solve everything in one shot
+3. **Verify** - Double-confirm before completing
+4. **Fail Gracefully** - Handle errors and retry
+5. **Stay Focused** - Complete exactly what's asked
+
+---
+
+## Explore-First Pattern
+
+Before making any changes, always understand the context:
+
+```mermaid
+flowchart LR
+    subgraph Bad["❌ Bad Pattern"]
+        B1[Receive Task] --> B2[Start Coding]
+        B2 --> B3[Hit Problems]
+        B3 --> B4[Backtrack]
+    end
+    
+    subgraph Good["✅ Good Pattern"]
+        G1[Receive Task] --> G2[Explore Codebase]
+        G2 --> G3[Understand Patterns]
+        G3 --> G4[Plan Approach]
+        G4 --> G5[Implement]
+    end
+```
+
+### Exploration Steps
+
+1. **Read README** - Understand project purpose
+2. **List directory** - See project structure
+3. **Find similar code** - Match existing patterns
+4. **Check tests** - Understand expected behavior
+5. **Review AGENTS.md** - Follow project instructions
+
+---
+
+## Self-Verification
+
+BaseAgent automatically verifies work before completion:
+
+```mermaid
+sequenceDiagram
+    participant Agent
+    participant Verify as Verification
+    participant LLM as LLM
+
+    Agent->>Agent: No more tool calls
+    Agent->>Verify: Inject verification prompt
+    Verify->>LLM: Re-read instruction
+    LLM->>LLM: List requirements
+    LLM->>LLM: Verify each requirement
+    
+    alt All verified
+        LLM-->>Agent: Confirm completion
+    else Something missing
+        LLM-->>Agent: Continue working
+    end
+```
+
+### Verification Checklist
+
+The agent automatically asks:
+- ✅ Did I read the ENTIRE original instruction?
+- ✅ Did I list ALL requirements (explicit and implicit)?
+- ✅ Did I run commands to VERIFY each requirement?
+- ✅ Did I fix any issues found during verification?
+
+---
+
+## Prompt Caching
+
+Achieve **90%+ cache hit rate** for massive cost savings:
+
+```mermaid
+graph TB
+    subgraph Strategy["Caching Strategy"]
+        S1["Cache first 2 system messages"]
+        S2["Cache last 2 non-system messages"]
+        S3["Up to 4 breakpoints total"]
+    end
+    
+    subgraph Effect["Effect"]
+        E1["Request 1: Cache miss (create)"]
+        E2["Request 2: Cache HIT (90% saved)"]
+        E3["Request 3: Cache HIT (90% saved)"]
+        E4["Request N: Cache HIT (90% saved)"]
+    end
+    
+    S1 --> E1
+    S2 --> E1
+    E1 --> E2 --> E3 --> E4
+    
+    style E2 fill:#4CAF50,color:#fff
+    style E3 fill:#4CAF50,color:#fff
+    style E4 fill:#4CAF50,color:#fff
+```
+
+### How It Works
+
+```python
+# Messages structure
+messages = [
+    {"role": "system", "content": "...", "cache_control": {"type": "ephemeral"}},  # ✓ Cached
+    {"role": "user", "content": "original instruction"},
+    {"role": "assistant", "content": "...", "tool_calls": [...]},
+    {"role": "tool", "content": "..."},
+    {"role": "assistant", "content": "...", "cache_control": {"type": "ephemeral"}},  # ✓ Cached
+    {"role": "user", "content": "verification", "cache_control": {"type": "ephemeral"}},  # ✓ Cached
+]
+```
+
+### Cost Impact
+
+| Scenario | Cost per 1M tokens |
+|----------|-------------------|
+| No caching | $3.00 |
+| 90% cache hit | $0.30 |
+| **Savings** | **90%** |
+
+---
+
+## Cost Optimization
+
+### Set Cost Limits
+
+```bash
+export LLM_COST_LIMIT="5.0"  # Max $5 per session
+```
+
+### Monitor Usage
+
+Watch the logs for token counts:
+```
+[14:30:17] [loop] Tokens: 50000 input, 45000 cached, 500 output
+```
+
+### Optimize Instructions
+
+```bash
+# ❌ Vague (causes exploration loops)
+python3 agent.py --instruction "Fix the bugs"
+
+# ✅ Specific (direct action)
+python3 agent.py --instruction "Fix the TypeError in src/api/handlers.py:42"
+```
+
+### Use Targeted Tools
+
+```bash
+# ❌ Wasteful
+ls -laR /  # Lists entire filesystem
+
+# ✅ Efficient
+list_dir(dir_path="src/", depth=2)
+```
+
+---
+
+## Git Hygiene
+
+BaseAgent follows strict git rules:
+
+### ✅ Allowed
+
+- `git status` - Check current state
+- `git log` - View history
+- `git blame` - Understand code origins
+- `git diff` - Review changes
+- `git add` - Stage changes (when asked)
+- `git commit` - Commit changes (when asked)
+
+### ❌ Forbidden
+
+- `git reset --hard` - Destructive
+- `git checkout --` - Loses changes
+- Reverting changes you didn't make
+- Amending commits without permission
+- Pushing without explicit request
+
+### Safe Practices
+
+```bash
+# Always check state first
+git status
+
+# Review before committing
+git diff
+
+# Stage specific files
+git add src/specific_file.py
+
+# Never force operations
+# ❌ git push --force
+```
+
+---
+
+## Writing Effective Instructions
+
+### Be Specific
+
+```bash
+# ❌ Too vague
+"Fix the code"
+
+# ✅ Specific
+"Fix the NullPointerException in UserService.java:85 when user.email is null"
+```
+
+### Provide Context
+
+```bash
+# ❌ Missing context
+"Add authentication"
+
+# ✅ With context
+"Add JWT authentication to the /api/users endpoint using the existing AuthService"
+```
+
+### Request Verification
+
+```bash
+# ✅ Ask for verification
+"Create a sorting algorithm and verify it works with [5, 2, 8, 1, 9]"
+```
+
+### Break Down Complex Tasks
+
+```bash
+# ❌ Too complex for one instruction
+"Build a complete e-commerce platform"
+
+# ✅ Incremental
+"Create the product catalog data model with name, price, and description fields"
+```
+
+---
+
+## Tool Usage Patterns
+
+### Shell Commands
+
+```python
+# ✅ Use workdir
+{"command": "ls -la", "workdir": "/workspace/src"}
+
+# ❌ Avoid cd chains
+{"command": "cd /workspace && cd src && ls"}
+```
+
+### File Reading
+
+```python
+# ✅ Read specific sections
+{"file_path": "large.py", "offset": 100, "limit": 50}
+
+# ❌ Read entire large files
+{"file_path": "large.py"}  # May overwhelm context
+```
+
+### Searching
+
+```python
+# ✅ Use grep_files for discovery
+{"pattern": "def calculate", "include": "*.py", "path": "src/"}
+
+# Then read specific files found
+{"file_path": "src/billing/calculator.py"}
+```
+
+### Editing
+
+```python
+# ✅ Use apply_patch for surgical edits
+{"patch": "*** Update File: src/utils.py\n@@ def old_func:\n-    old\n+    new"}
+
+# ✅ Use write_file for new files
+{"file_path": "new_module.py", "content": "..."}
+```
+
+---
+
+## Handling Long Tasks
+
+For complex, multi-step tasks:
+
+### 1. Use update_plan
+
+```python
+{
+    "steps": [
+        {"description": "Analyze existing code", "status": "completed"},
+        {"description": "Design new module", "status": "in_progress"},
+        {"description": "Implement core logic", "status": "pending"},
+        {"description": "Add unit tests", "status": "pending"},
+        {"description": "Update documentation", "status": "pending"}
+    ]
+}
+```
+
+### 2. Monitor Context
+
+Watch for compaction events:
+```
+[compaction] Context overflow detected, managing...
+```
+
+### 3. Save Progress
+
+If context compaction occurs, the summary preserves:
+- Current progress
+- Key decisions
+- Remaining work
+- Modified files
+
+---
+
+## Error Handling
+
+BaseAgent handles errors gracefully:
+
+### Automatic Retry
+
+```mermaid
+flowchart TB
+    Error[Error Occurs] --> Type{Error Type}
+    
+    Type -->|Rate Limit| Wait[Wait + Retry]
+    Type -->|Timeout| Wait
+    Type -->|Server Error| Wait
+    
+    Type -->|Auth Error| Fail[Abort]
+    Type -->|Cost Limit| Fail
+    
+    Wait --> Attempt{Attempt < 5?}
+    Attempt -->|Yes| Retry[Retry Request]
+    Attempt -->|No| Fail
+    
+    Retry --> Success{Success?}
+    Success -->|Yes| Continue[Continue]
+    Success -->|No| Attempt
+```
+
+### Recovery Strategies
+
+1. **Try alternatives** - If one approach fails, try another
+2. **Check documentation** - Read AGENTS.md, README.md
+3. **Simplify** - Break complex operations into steps
+4. **Report issues** - Note blockers in final message
+
+---
+
+## Performance Tips
+
+### Reduce Iterations
+
+1. Give specific, complete instructions
+2. Provide necessary context upfront
+3. Avoid vague requirements
+
+### Minimize Token Usage
+
+1. Search before reading entire files
+2. Use targeted directory listings
+3. Keep tool outputs focused
+
+### Maximize Cache Hits
+
+1. Keep system prompt stable
+2. Don't modify early messages
+3. Let the agent handle caching automatically
+
+---
+
+## Checklist
+
+Before running the agent:
+
+- [ ] Clear, specific instruction
+- [ ] Necessary context provided
+- [ ] API key configured
+- [ ] Cost limit set appropriately
+- [ ] Working directory correct
+
+After completion:
+
+- [ ] Verify output matches requirements
+- [ ] Check for any error messages
+- [ ] Review modified files
+- [ ] Run relevant tests
+
+---
+
+## Next Steps
+
+- [Configuration](./configuration.md) - Tune settings
+- [Context Management](./context-management.md) - Memory optimization
+- [Tools Reference](./tools.md) - Detailed tool docs
diff --git a/docs/chutes-integration.md b/docs/chutes-integration.md
new file mode 100644
index 0000000..75b4955
--- /dev/null
+++ b/docs/chutes-integration.md
@@ -0,0 +1,378 @@
+# Chutes API Integration
+
+> **Using Chutes AI as your LLM provider for BaseAgent**
+
+## Overview
+
+[Chutes AI](https://chutes.ai) provides access to advanced language models through a simple API. BaseAgent supports Chutes as a first-class provider, offering access to the **Kimi K2.5-TEE** model with its powerful thinking capabilities.
+
+---
+
+## Chutes API Features
+
+| Feature | Value |
+|---------|-------|
+| **API Base URL** | `https://llm.chutes.ai/v1` |
+| **Default Model** | `moonshotai/Kimi-K2.5-TEE` |
+| **Model Parameters** | 1T total, 32B activated |
+| **Context Window** | 256K tokens |
+| **Thinking Mode** | Enabled by default |
+
+---
+
+## Quick Setup
+
+### Step 1: Get Your API Token
+
+1. Visit [chutes.ai](https://chutes.ai)
+2. Create an account or sign in
+3. Navigate to API settings
+4. Generate an API token
+
+### Step 2: Configure Environment
+
+```bash
+# Required: API token
+export CHUTES_API_TOKEN="your-token-from-chutes.ai"
+
+# Optional: Explicitly set provider and model
+export LLM_PROVIDER="chutes"
+export LLM_MODEL="moonshotai/Kimi-K2.5-TEE"
+```
+
+### Step 3: Run BaseAgent
+
+```bash
+python3 agent.py --instruction "Your task description"
+```
+
+---
+
+## Authentication Flow
+
+```mermaid
+sequenceDiagram
+    participant Agent as BaseAgent
+    participant Client as LiteLLM Client
+    participant Chutes as Chutes API
+
+    Agent->>Client: Initialize with CHUTES_API_TOKEN
+    Client->>Client: Configure litellm
+    
+    loop Each Request
+        Agent->>Client: chat(messages, tools)
+        Client->>Chutes: POST /v1/chat/completions
+        Note over Client,Chutes: Authorization: Bearer $CHUTES_API_TOKEN
+        Chutes-->>Client: Response with tokens
+        Client-->>Agent: LLMResponse
+    end
+```
+
+---
+
+## Model Details: Kimi K2.5-TEE
+
+The **moonshotai/Kimi-K2.5-TEE** model offers:
+
+### Architecture
+- **Total Parameters**: 1 Trillion (1T)
+- **Activated Parameters**: 32 Billion (32B)
+- **Architecture**: Mixture of Experts (MoE)
+- **Context Length**: 256,000 tokens
+
+### Thinking Mode
+
+Kimi K2.5-TEE supports a "thinking mode" where the model shows its reasoning process:
+
+```mermaid
+sequenceDiagram
+    participant User
+    participant Model as Kimi K2.5-TEE
+    participant Response
+
+    User->>Model: Complex task instruction
+    
+    rect rgb(230, 240, 255)
+        Note over Model: Thinking Mode Active
+        Model->>Model: Analyze problem
+        Model->>Model: Consider approaches
+        Model->>Model: Evaluate options
+    end
+    
+    Model->>Response: <think>Reasoning process...</think>
+    Model->>Response: Final answer/action
+```
+
+### Temperature Settings
+
+| Mode | Temperature | Top-p | Description |
+|------|-------------|-------|-------------|
+| **Thinking** | 1.0 | 0.95 | More exploratory reasoning |
+| **Instant** | 0.6 | 0.95 | Faster, more deterministic |
+
+---
+
+## Configuration Options
+
+### Basic Configuration
+
+```python
+# src/config/defaults.py
+CONFIG = {
+    "model": os.environ.get("LLM_MODEL", "moonshotai/Kimi-K2.5-TEE"),
+    "provider": "chutes",
+    "temperature": 1.0,  # For thinking mode
+    "max_tokens": 16384,
+}
+```
+
+### Environment Variables
+
+| Variable | Required | Default | Description |
+|----------|----------|---------|-------------|
+| `CHUTES_API_TOKEN` | Yes | - | API token from chutes.ai |
+| `LLM_PROVIDER` | No | `openrouter` | Set to `chutes` |
+| `LLM_MODEL` | No | `moonshotai/Kimi-K2.5-TEE` | Model identifier |
+| `LLM_COST_LIMIT` | No | `10.0` | Max cost in USD |
+
+---
+
+## Thinking Mode Processing
+
+When thinking mode is enabled, responses include `<think>` tags:
+
+```xml
+<think>
+The user wants to create a file with specific content.
+I should:
+1. Check if the file already exists
+2. Create the file with the requested content
+3. Verify the file was created correctly
+</think>
+
+I'll create the file for you now.
+```
+
+BaseAgent can be configured to:
+- **Parse and strip** the thinking tags (show only final answer)
+- **Keep** the thinking content (useful for debugging)
+- **Log** thinking to stderr while showing final answer
+
+### Parsing Example
+
+```python
+import re
+
+def parse_thinking(response_text: str) -> tuple[str, str]:
+    """Extract thinking and final response."""
+    think_pattern = r'<think>(.*?)</think>'
+    match = re.search(think_pattern, response_text, re.DOTALL)
+    
+    if match:
+        thinking = match.group(1).strip()
+        final = re.sub(think_pattern, '', response_text, flags=re.DOTALL).strip()
+        return thinking, final
+    
+    return "", response_text
+```
+
+---
+
+## API Request Format
+
+Chutes API follows OpenAI-compatible format:
+
+```bash
+curl -X POST https://llm.chutes.ai/v1/chat/completions \
+  -H "Authorization: Bearer $CHUTES_API_TOKEN" \
+  -H "Content-Type: application/json" \
+  -d '{
+    "model": "moonshotai/Kimi-K2.5-TEE",
+    "messages": [
+      {"role": "system", "content": "You are a helpful assistant."},
+      {"role": "user", "content": "Hello!"}
+    ],
+    "max_tokens": 1024,
+    "temperature": 1.0,
+    "top_p": 0.95
+  }'
+```
+
+---
+
+## Fallback to OpenRouter
+
+If Chutes is unavailable, BaseAgent can fall back to OpenRouter:
+
+```mermaid
+flowchart TB
+    Start[API Request] --> Check{Chutes Available?}
+    
+    Check -->|Yes| Chutes[Send to Chutes API]
+    Chutes --> Success{Success?}
+    Success -->|Yes| Done[Return Response]
+    Success -->|No| Retry{Retry Count < 3?}
+    
+    Retry -->|Yes| Chutes
+    Retry -->|No| Fallback[Use OpenRouter]
+    
+    Check -->|No| Fallback
+    Fallback --> Done
+```
+
+### Configuration for Fallback
+
+```bash
+# Primary: Chutes
+export CHUTES_API_TOKEN="..."
+export LLM_PROVIDER="chutes"
+
+# Fallback: OpenRouter
+export OPENROUTER_API_KEY="..."
+```
+
+### Switching Providers
+
+```bash
+# Switch to OpenRouter
+export LLM_PROVIDER="openrouter"
+export LLM_MODEL="openrouter/anthropic/claude-sonnet-4-20250514"
+
+# Switch back to Chutes
+export LLM_PROVIDER="chutes"
+export LLM_MODEL="moonshotai/Kimi-K2.5-TEE"
+```
+
+---
+
+## Cost Considerations
+
+### Pricing (Approximate)
+
+| Metric | Cost |
+|--------|------|
+| Input tokens | Varies by model |
+| Output tokens | Varies by model |
+| Cached input | Reduced rate |
+
+### Cost Management
+
+```bash
+# Set cost limit
+export LLM_COST_LIMIT="5.0"  # Max $5.00 per session
+```
+
+BaseAgent tracks costs and will abort if the limit is exceeded:
+
+```python
+# In src/llm/client.py
+if self._total_cost >= self.cost_limit:
+    raise CostLimitExceeded(
+        f"Cost limit exceeded: ${self._total_cost:.4f}",
+        used=self._total_cost,
+        limit=self.cost_limit,
+    )
+```
+
+---
+
+## Troubleshooting
+
+### Authentication Errors
+
+```
+LLMError: authentication_error
+```
+
+**Solution**: Verify your token is correct and exported:
+
+```bash
+echo $CHUTES_API_TOKEN  # Should show your token
+export CHUTES_API_TOKEN="correct-token"
+```
+
+### Rate Limiting
+
+```
+LLMError: rate_limit
+```
+
+**Solution**: BaseAgent automatically retries with exponential backoff. You can also:
+- Wait a few minutes before retrying
+- Reduce request frequency
+- Check your API plan limits
+
+### Model Not Found
+
+```
+LLMError: Model 'xyz' not found
+```
+
+**Solution**: Use the correct model identifier:
+
+```bash
+export LLM_MODEL="moonshotai/Kimi-K2.5-TEE"
+```
+
+### Connection Timeouts
+
+```
+LLMError: timeout
+```
+
+**Solution**: BaseAgent retries automatically. If persistent:
+- Check your internet connection
+- Verify Chutes API status
+- Consider using OpenRouter as fallback
+
+---
+
+## Integration with LiteLLM
+
+BaseAgent uses [LiteLLM](https://docs.litellm.ai/) for provider abstraction:
+
+```python
+# src/llm/client.py
+import litellm
+
+# For Chutes, configure base URL
+litellm.api_base = "https://llm.chutes.ai/v1"
+
+# Make request
+response = litellm.completion(
+    model="moonshotai/Kimi-K2.5-TEE",
+    messages=messages,
+    api_key=os.environ.get("CHUTES_API_TOKEN"),
+)
+```
+
+---
+
+## Best Practices
+
+### For Optimal Performance
+
+1. **Enable thinking mode** for complex reasoning tasks
+2. **Use appropriate temperature** (1.0 for exploration, 0.6 for precision)
+3. **Leverage the 256K context** for large codebases
+4. **Monitor costs** with `LLM_COST_LIMIT`
+
+### For Reliability
+
+1. **Set up fallback** to OpenRouter
+2. **Handle rate limits** gracefully (automatic in BaseAgent)
+3. **Log responses** for debugging complex tasks
+
+### For Cost Efficiency
+
+1. **Enable prompt caching** (reduces costs by 90%)
+2. **Use context management** to avoid token waste
+3. **Set reasonable cost limits** for testing
+
+---
+
+## Next Steps
+
+- [Configuration Reference](./configuration.md) - All settings explained
+- [Best Practices](./best-practices.md) - Optimization tips
+- [Usage Guide](./usage.md) - Command-line options
diff --git a/docs/configuration.md b/docs/configuration.md
new file mode 100644
index 0000000..492f074
--- /dev/null
+++ b/docs/configuration.md
@@ -0,0 +1,304 @@
+# Configuration Reference
+
+> **Complete guide to all configuration options in BaseAgent**
+
+## Overview
+
+BaseAgent configuration is centralized in `src/config/defaults.py`. Settings can be customized via environment variables or by modifying the configuration file directly.
+
+---
+
+## Configuration File
+
+The main configuration is stored in the `CONFIG` dictionary:
+
+```python
+# src/config/defaults.py
+CONFIG = {
+    # Model Settings
+    "model": "openrouter/anthropic/claude-sonnet-4-20250514",
+    "provider": "openrouter",
+    "temperature": 0.0,
+    "max_tokens": 16384,
+    "reasoning_effort": "none",
+    
+    # Agent Execution
+    "max_iterations": 200,
+    "max_output_tokens": 2500,
+    "shell_timeout": 60,
+    
+    # Context Management
+    "model_context_limit": 200_000,
+    "output_token_max": 32_000,
+    "auto_compact_threshold": 0.85,
+    "prune_protect": 40_000,
+    "prune_minimum": 20_000,
+    
+    # Prompt Caching
+    "cache_enabled": True,
+    
+    # Execution Flags
+    "bypass_approvals": True,
+    "bypass_sandbox": True,
+    "skip_git_check": True,
+    "unified_exec": True,
+    "json_output": True,
+    
+    # Completion
+    "require_completion_confirmation": False,
+}
+```
+
+---
+
+## Environment Variables
+
+### LLM Provider Settings
+
+| Variable | Default | Description |
+|----------|---------|-------------|
+| `LLM_MODEL` | `openrouter/anthropic/claude-sonnet-4-20250514` | Model identifier |
+| `LLM_PROVIDER` | `openrouter` | Provider name (`chutes`, `openrouter`, etc.) |
+| `LLM_COST_LIMIT` | `10.0` | Maximum cost in USD before aborting |
+
+### API Keys
+
+| Variable | Provider | Description |
+|----------|----------|-------------|
+| `CHUTES_API_TOKEN` | Chutes AI | Token from chutes.ai |
+| `OPENROUTER_API_KEY` | OpenRouter | API key from openrouter.ai |
+| `ANTHROPIC_API_KEY` | Anthropic | Direct Anthropic API key |
+| `OPENAI_API_KEY` | OpenAI | OpenAI API key |
+
+### Example Setup
+
+```bash
+# For Chutes AI
+export CHUTES_API_TOKEN="your-token"
+export LLM_PROVIDER="chutes"
+export LLM_MODEL="moonshotai/Kimi-K2.5-TEE"
+
+# For OpenRouter
+export OPENROUTER_API_KEY="sk-or-v1-..."
+export LLM_MODEL="openrouter/anthropic/claude-sonnet-4-20250514"
+```
+
+---
+
+## Configuration Sections
+
+### Model Settings
+
+```mermaid
+graph LR
+    subgraph Model["Model Configuration"]
+        M1["model<br/>Model identifier"]
+        M2["provider<br/>API provider"]
+        M3["temperature<br/>Response randomness"]
+        M4["max_tokens<br/>Max output tokens"]
+        M5["reasoning_effort<br/>Reasoning depth"]
+    end
+```
+
+| Setting | Type | Default | Description |
+|---------|------|---------|-------------|
+| `model` | `str` | `openrouter/anthropic/claude-sonnet-4-20250514` | Full model identifier with provider prefix |
+| `provider` | `str` | `openrouter` | LLM provider name |
+| `temperature` | `float` | `0.0` | Response randomness (0 = deterministic) |
+| `max_tokens` | `int` | `16384` | Maximum tokens in LLM response |
+| `reasoning_effort` | `str` | `none` | Reasoning depth: `none`, `minimal`, `low`, `medium`, `high`, `xhigh` |
+
+### Agent Execution Settings
+
+```mermaid
+graph LR
+    subgraph Execution["Execution Limits"]
+        E1["max_iterations<br/>200 iterations"]
+        E2["max_output_tokens<br/>2500 tokens"]
+        E3["shell_timeout<br/>60 seconds"]
+    end
+```
+
+| Setting | Type | Default | Description |
+|---------|------|---------|-------------|
+| `max_iterations` | `int` | `200` | Maximum loop iterations before stopping |
+| `max_output_tokens` | `int` | `2500` | Max tokens for tool output truncation |
+| `shell_timeout` | `int` | `60` | Shell command timeout in seconds |
+
+### Context Management
+
+```mermaid
+graph TB
+    subgraph Context["Context Window Management"]
+        C1["model_context_limit: 200K"]
+        C2["output_token_max: 32K"]
+        C3["Usable: 168K"]
+        C4["auto_compact_threshold: 85%"]
+        C5["Trigger: ~143K"]
+    end
+    
+    C1 --> C3
+    C2 --> C3
+    C3 --> C4
+    C4 --> C5
+```
+
+| Setting | Type | Default | Description |
+|---------|------|---------|-------------|
+| `model_context_limit` | `int` | `200000` | Total model context window (tokens) |
+| `output_token_max` | `int` | `32000` | Tokens reserved for output |
+| `auto_compact_threshold` | `float` | `0.85` | Trigger compaction at this % of usable context |
+| `prune_protect` | `int` | `40000` | Protect this many tokens of recent tool output |
+| `prune_minimum` | `int` | `20000` | Only prune if recovering at least this many tokens |
+
+### Prompt Caching
+
+| Setting | Type | Default | Description |
+|---------|------|---------|-------------|
+| `cache_enabled` | `bool` | `True` | Enable Anthropic prompt caching |
+
+> **Note**: Prompt caching requires minimum token thresholds per breakpoint:
+> - Claude Opus 4.5 on Bedrock: 4096 tokens
+> - Claude Sonnet/other: 1024 tokens
+
+### Execution Flags
+
+| Setting | Type | Default | Description |
+|---------|------|---------|-------------|
+| `bypass_approvals` | `bool` | `True` | Skip user approval prompts |
+| `bypass_sandbox` | `bool` | `True` | Bypass sandbox restrictions |
+| `skip_git_check` | `bool` | `True` | Skip git repository validation |
+| `unified_exec` | `bool` | `True` | Enable unified execution mode |
+| `json_output` | `bool` | `True` | Always emit JSONL output |
+| `require_completion_confirmation` | `bool` | `False` | Require double-confirm before completing |
+
+---
+
+## Provider-Specific Configuration
+
+### Chutes AI
+
+```python
+# Environment
+CHUTES_API_TOKEN="your-token"
+LLM_PROVIDER="chutes"
+LLM_MODEL="moonshotai/Kimi-K2.5-TEE"
+
+# Model features
+# - 1T parameters, 32B activated
+# - 256K context window
+# - Thinking mode enabled by default
+# - Temperature: 1.0 (thinking), 0.6 (instant)
+```
+
+### OpenRouter
+
+```python
+# Environment
+OPENROUTER_API_KEY="sk-or-v1-..."
+LLM_MODEL="openrouter/anthropic/claude-sonnet-4-20250514"
+
+# Requires openrouter/ prefix for litellm
+```
+
+### Direct Anthropic
+
+```python
+# Environment
+ANTHROPIC_API_KEY="sk-ant-..."
+LLM_MODEL="claude-3-5-sonnet-20241022"
+
+# No prefix needed for direct API
+```
+
+---
+
+## Configuration Workflow
+
+```mermaid
+flowchart TB
+    subgraph Load["Configuration Loading"]
+        Env[Environment Variables]
+        File[defaults.py]
+        Merge[Merged Config]
+    end
+    
+    subgraph Apply["Configuration Application"]
+        Loop[Agent Loop]
+        LLM[LLM Client]
+        Context[Context Manager]
+        Tools[Tool Registry]
+    end
+    
+    Env --> Merge
+    File --> Merge
+    Merge --> Loop
+    Merge --> LLM
+    Merge --> Context
+    Merge --> Tools
+```
+
+---
+
+## Computed Values
+
+Some values are computed from configuration:
+
+```python
+# Usable context window
+usable_context = model_context_limit - output_token_max
+# Default: 200,000 - 32,000 = 168,000 tokens
+
+# Compaction trigger threshold
+compaction_trigger = usable_context * auto_compact_threshold
+# Default: 168,000 * 0.85 = 142,800 tokens
+
+# Token estimation
+chars_per_token = 4  # Heuristic
+tokens = len(text) // 4
+```
+
+---
+
+## Best Practices
+
+### For Cost Optimization
+
+```bash
+# Lower cost limit for testing
+export LLM_COST_LIMIT="1.0"
+
+# Use smaller context for simple tasks
+# (edit defaults.py)
+"model_context_limit": 100_000
+```
+
+### For Long Tasks
+
+```bash
+# Increase iterations
+# (edit defaults.py)
+"max_iterations": 500
+
+# Lower compaction threshold for aggressive memory management
+"auto_compact_threshold": 0.70
+```
+
+### For Debugging
+
+```bash
+# Disable caching to see full API calls
+# (edit defaults.py)
+"cache_enabled": False
+
+# Increase output limits for more context
+"max_output_tokens": 5000
+```
+
+---
+
+## Next Steps
+
+- [Chutes Integration](./chutes-integration.md) - Configure Chutes API
+- [Context Management](./context-management.md) - Understand memory management
+- [Best Practices](./best-practices.md) - Optimization tips
diff --git a/docs/context-management.md b/docs/context-management.md
new file mode 100644
index 0000000..2f26e75
--- /dev/null
+++ b/docs/context-management.md
@@ -0,0 +1,412 @@
+# Context Management
+
+> **How BaseAgent manages memory and prevents token overflow**
+
+## Why Context Management Matters
+
+Large Language Models have finite context windows. Without proper management:
+- "Context too long" errors terminate sessions
+- Critical information gets lost
+- Response quality degrades
+- Costs increase unnecessarily
+
+BaseAgent implements sophisticated context management inspired by OpenCode and Codex.
+
+---
+
+## Context Window Overview
+
+```mermaid
+graph TB
+    subgraph Window["Claude Opus 4.5 Context Window (200K tokens)"]
+        Output["Reserved for Output<br/>32K tokens"]
+        Usable["Usable Context<br/>168K tokens"]
+    end
+    
+    subgraph Thresholds["Management Thresholds"]
+        Safe["Safe Zone<br/>< 85% (143K)"]
+        Warning["Warning Zone<br/>85-100%"]
+        Overflow["Overflow<br/>> 168K"]
+    end
+    
+    Usable --> Safe
+    Usable --> Warning
+    Usable --> Overflow
+    
+    style Safe fill:#4CAF50,color:#fff
+    style Warning fill:#FF9800,color:#fff
+    style Overflow fill:#F44336,color:#fff
+```
+
+### Key Numbers
+
+| Metric | Value | Description |
+|--------|-------|-------------|
+| Total context | 200,000 | Model's full context window |
+| Output reserve | 32,000 | Reserved for LLM response |
+| Usable context | 168,000 | Available for messages |
+| Compaction threshold | 85% | Trigger at 142,800 tokens |
+| Prune protect | 40,000 | Recent tool output to keep |
+| Prune minimum | 20,000 | Minimum savings to prune |
+
+---
+
+## Token Estimation
+
+BaseAgent estimates tokens using a simple heuristic:
+
+```python
+# 1 token ≈ 4 characters
+def estimate_tokens(text: str) -> int:
+    return len(text) // 4
+```
+
+### Message Token Components
+
+```mermaid
+graph LR
+    subgraph Message["Message Token Estimation"]
+        Content["Content<br/>(text / 4)"]
+        Images["Images<br/>(~1000 each)"]
+        ToolCalls["Tool Calls<br/>(name + args)"]
+        Overhead["Role Overhead<br/>(~4 tokens)"]
+    end
+    
+    Content --> Total["Total Tokens"]
+    Images --> Total
+    ToolCalls --> Total
+    Overhead --> Total
+```
+
+---
+
+## Context Management Pipeline
+
+```mermaid
+flowchart TB
+    subgraph Input["Every Iteration"]
+        Messages["Current Messages"]
+    end
+    
+    subgraph Detection["1. Detection"]
+        Estimate["Estimate Total Tokens"]
+        Check{"Above 85%<br/>Threshold?"}
+    end
+    
+    subgraph Pruning["2. Pruning (First Pass)"]
+        Scan["Scan Backwards"]
+        Protect["Protect Last 40K<br/>Tool Output Tokens"]
+        Clear["Clear Old Tool Outputs"]
+        CheckAgain{"Still Above<br/>Threshold?"}
+    end
+    
+    subgraph Compaction["3. AI Compaction (Second Pass)"]
+        Summary["Generate Summary<br/>via LLM"]
+        Rebuild["Rebuild Messages:<br/>System + Summary"]
+    end
+    
+    subgraph Output["Continue Loop"]
+        Managed["Managed Messages"]
+    end
+    
+    Messages --> Estimate --> Check
+    Check -->|No| Managed
+    Check -->|Yes| Scan --> Protect --> Clear --> CheckAgain
+    CheckAgain -->|No| Managed
+    CheckAgain -->|Yes| Summary --> Rebuild --> Managed
+    
+    style Pruning fill:#FF9800,color:#fff
+    style Compaction fill:#9C27B0,color:#fff
+```
+
+---
+
+## Stage 1: Tool Output Pruning
+
+The first defense against context overflow is pruning old tool outputs.
+
+### Strategy
+
+1. Scan messages **backwards** (most recent first)
+2. Skip the first 2 user turns (most recent)
+3. Accumulate tool output tokens
+4. After 40K tokens accumulated, mark older outputs for pruning
+5. Only prune if savings exceed 20K tokens
+
+### Implementation
+
+```python
+def prune_old_tool_outputs(messages, protect_last_turns=2):
+    total = 0  # Total tool output tokens seen
+    pruned = 0  # Tokens to be pruned
+    to_prune = []
+    turns = 0
+    
+    for i in range(len(messages) - 1, -1, -1):
+        msg = messages[i]
+        
+        if msg["role"] == "user":
+            turns += 1
+        
+        if turns < protect_last_turns:
+            continue
+        
+        if msg["role"] == "tool":
+            content = msg.get("content", "")
+            estimate = len(content) // 4
+            total += estimate
+            
+            if total > PRUNE_PROTECT:  # 40K
+                pruned += estimate
+                to_prune.append(i)
+    
+    if pruned > PRUNE_MINIMUM:  # 20K
+        # Replace content with marker
+        for idx in to_prune:
+            messages[idx]["content"] = "[Old tool result content cleared]"
+    
+    return messages
+```
+
+### Visual Example
+
+```mermaid
+graph TB
+    subgraph Before["Before Pruning (150K tokens)"]
+        S1["System Prompt<br/>5K tokens"]
+        U1["User Instruction<br/>1K tokens"]
+        A1["Assistant + Tools<br/>10K tokens"]
+        T1["Tool Results (old)<br/>50K tokens"]
+        A2["Assistant + Tools<br/>10K tokens"]
+        T2["Tool Results (old)<br/>40K tokens"]
+        A3["Assistant + Tools<br/>10K tokens"]
+        T3["Tool Results (recent)<br/>24K tokens"]
+    end
+    
+    subgraph After["After Pruning (60K tokens)"]
+        S2["System Prompt<br/>5K tokens"]
+        U2["User Instruction<br/>1K tokens"]
+        A4["Assistant + Tools<br/>10K tokens"]
+        T4["[cleared]<br/>~0 tokens"]
+        A5["Assistant + Tools<br/>10K tokens"]
+        T5["[cleared]<br/>~0 tokens"]
+        A6["Assistant + Tools<br/>10K tokens"]
+        T6["Tool Results (protected)<br/>24K tokens"]
+    end
+    
+    T1 -.-> T4
+    T2 -.-> T5
+    T3 --> T6
+    
+    style T4 fill:#FF9800,color:#fff
+    style T5 fill:#FF9800,color:#fff
+    style T6 fill:#4CAF50,color:#fff
+```
+
+---
+
+## Stage 2: AI Compaction
+
+When pruning isn't enough, BaseAgent uses the LLM to summarize the conversation.
+
+### Compaction Process
+
+```mermaid
+sequenceDiagram
+    participant Loop as Agent Loop
+    participant Compact as Compaction
+    participant LLM as LLM API
+
+    Loop->>Compact: Context still too large
+    Compact->>Compact: Add compaction prompt
+    Compact->>LLM: Request summary
+    LLM-->>Compact: Summary response
+    Compact->>Compact: Build new messages
+    Compact-->>Loop: [System, Summary]
+```
+
+### Compaction Prompt
+
+```python
+COMPACTION_PROMPT = """
+You are performing a CONTEXT CHECKPOINT COMPACTION. 
+Create a handoff summary for another LLM that will resume the task.
+
+Include:
+- Current progress and key decisions made
+- Important context, constraints, or user preferences
+- What remains to be done (clear next steps)
+- Any critical data, examples, or references needed to continue
+- Which files were modified and how
+- Any errors encountered and how they were resolved
+
+Be concise, structured, and focused on helping the next LLM 
+seamlessly continue the work. Use bullet points and clear sections.
+"""
+```
+
+### Result
+
+The compacted messages are:
+
+```python
+compacted = [
+    {"role": "system", "content": original_system_prompt},
+    {"role": "user", "content": SUMMARY_PREFIX + llm_summary},
+]
+```
+
+### Summary Prefix
+
+```python
+SUMMARY_PREFIX = """
+Another language model started to solve this problem and produced 
+a summary of its thinking process. You also have access to the state 
+of the tools that were used. Use this to build on the work that has 
+already been done and avoid duplicating work.
+
+Here is the summary from the previous context:
+
+"""
+```
+
+---
+
+## Middle-Out Truncation
+
+For individual tool outputs, BaseAgent uses middle-out truncation:
+
+```mermaid
+graph LR
+    subgraph Original["Original Output"]
+        O1["Start<br/>(headers, definitions)"]
+        O2["Middle<br/>(repetitive data)"]
+        O3["End<br/>(results, errors)"]
+    end
+    
+    subgraph Truncated["Truncated Output"]
+        T1["Start<br/>(preserved)"]
+        T2["[...truncated...]"]
+        T3["End<br/>(preserved)"]
+    end
+    
+    O1 --> T1
+    O2 -.-> T2
+    O3 --> T3
+    
+    style O2 fill:#FF9800,color:#fff
+    style T2 fill:#FF9800,color:#fff
+```
+
+### Implementation
+
+```python
+def middle_out_truncate(text: str, max_tokens: int = 2500) -> str:
+    max_chars = max_tokens * 4  # 4 chars per token
+    
+    if len(text) <= max_chars:
+        return text
+    
+    keep = max_chars // 2 - 50  # Room for marker
+    return f"{text[:keep]}\n\n[...truncated...]\n\n{text[-keep:]}"
+```
+
+### Why Middle-Out?
+
+| Section | Contains | Value |
+|---------|----------|-------|
+| **Start** | Headers, imports, definitions | High |
+| **Middle** | Repetitive data, logs | Low |
+| **End** | Results, errors, summaries | High |
+
+---
+
+## Configuration Options
+
+| Setting | Default | Description |
+|---------|---------|-------------|
+| `model_context_limit` | 200,000 | Total context window |
+| `output_token_max` | 32,000 | Reserved for output |
+| `auto_compact_threshold` | 0.85 | Trigger threshold |
+| `prune_protect` | 40,000 | Recent tool tokens to keep |
+| `prune_minimum` | 20,000 | Minimum savings to prune |
+| `max_output_tokens` | 2,500 | Per-tool output limit |
+
+### Tuning Guidelines
+
+**For Long Tasks:**
+```python
+"auto_compact_threshold": 0.70,  # More aggressive
+"prune_protect": 30_000,          # Protect less
+```
+
+**For Complex Tasks (need more context):**
+```python
+"auto_compact_threshold": 0.90,  # Less aggressive
+"prune_protect": 60_000,          # Protect more
+```
+
+---
+
+## Monitoring Context Usage
+
+BaseAgent logs context status each iteration:
+
+```
+[14:30:16] [compaction] Context: 45000 tokens (26.8% of 168000)
+[14:35:22] [compaction] Context: 125000 tokens (74.4% of 168000)
+[14:38:45] [compaction] Context: 148000 tokens (88.1% of 168000)
+[14:38:45] [compaction] Context overflow detected, managing...
+[14:38:45] [compaction] Prune scan: 95000 total tokens, 55000 prunable
+[14:38:45] [compaction] Pruning 12 tool outputs, recovering ~55000 tokens
+[14:38:46] [compaction] Pruning sufficient: 148000 -> 93000 tokens
+```
+
+---
+
+## Best Practices
+
+### 1. Keep Tool Outputs Focused
+
+```bash
+# ❌ Too much output
+ls -laR /  # Lists entire filesystem
+
+# ✅ Targeted
+ls -la /workspace/src/  # Just what's needed
+```
+
+### 2. Use Appropriate Search Patterns
+
+```bash
+# ❌ Too broad
+grep "function"  # Matches everything
+
+# ✅ Specific
+grep "def calculate_total" src/billing.py
+```
+
+### 3. Read Sections, Not Entire Files
+
+```json
+// ❌ Entire large file
+{"name": "read_file", "arguments": {"file_path": "huge.py"}}
+
+// ✅ Specific section
+{"name": "read_file", "arguments": {"file_path": "huge.py", "offset": 100, "limit": 50}}
+```
+
+### 4. Monitor Long Sessions
+
+For tasks exceeding 50 iterations, watch for:
+- Repeated compaction events
+- Context oscillating near threshold
+- Loss of important context after compaction
+
+---
+
+## Next Steps
+
+- [Best Practices](./best-practices.md) - Optimization strategies
+- [Configuration](./configuration.md) - Tuning options
+- [Architecture](./architecture.md) - System design
diff --git a/docs/installation.md b/docs/installation.md
new file mode 100644
index 0000000..24d6700
--- /dev/null
+++ b/docs/installation.md
@@ -0,0 +1,249 @@
+# Installation Guide
+
+> **Step-by-step instructions for setting up BaseAgent**
+
+## Prerequisites
+
+Before installing BaseAgent, ensure you have:
+
+| Requirement | Version | Notes |
+|-------------|---------|-------|
+| Python | 3.9+ | Python 3.11+ recommended |
+| pip | Latest | Python package manager |
+| Git | 2.x | For cloning the repository |
+
+### Optional but Recommended
+
+| Tool | Purpose |
+|------|---------|
+| `ripgrep` (`rg`) | Fast file searching (used by `grep_files` tool) |
+| `tree` | Directory visualization |
+
+---
+
+## Installation Methods
+
+### Method 1: Using pyproject.toml (Recommended)
+
+```bash
+# Clone the repository
+git clone https://github.com/your-org/baseagent.git
+cd baseagent
+
+# Install with pip
+pip install .
+```
+
+This installs BaseAgent as a package with all dependencies.
+
+### Method 2: Using requirements.txt
+
+```bash
+# Clone the repository
+git clone https://github.com/your-org/baseagent.git
+cd baseagent
+
+# Install dependencies
+pip install -r requirements.txt
+```
+
+### Method 3: Development Installation
+
+For development with editable installs:
+
+```bash
+git clone https://github.com/your-org/baseagent.git
+cd baseagent
+
+# Editable install
+pip install -e .
+```
+
+---
+
+## Dependencies
+
+BaseAgent requires these Python packages:
+
+```
+litellm>=1.0.0          # LLM API abstraction
+httpx>=0.24.0           # HTTP client
+pydantic>=2.0.0         # Data validation
+```
+
+These are automatically installed via pip.
+
+---
+
+## Environment Setup
+
+### 1. Choose Your LLM Provider
+
+BaseAgent supports multiple LLM providers. Choose one:
+
+#### Option A: Chutes AI (Recommended)
+
+```bash
+# Set your Chutes API token
+export CHUTES_API_TOKEN="your-token-from-chutes.ai"
+
+# Configure provider
+export LLM_PROVIDER="chutes"
+export LLM_MODEL="moonshotai/Kimi-K2.5-TEE"
+```
+
+Get your token at [chutes.ai](https://chutes.ai)
+
+#### Option B: OpenRouter
+
+```bash
+# Set your OpenRouter API key
+export OPENROUTER_API_KEY="sk-or-v1-..."
+
+# Model is auto-configured for OpenRouter
+```
+
+Get your key at [openrouter.ai](https://openrouter.ai)
+
+#### Option C: Direct Provider APIs
+
+```bash
+# For Anthropic
+export ANTHROPIC_API_KEY="sk-ant-..."
+
+# For OpenAI
+export OPENAI_API_KEY="sk-..."
+```
+
+### 2. Create a Configuration File (Optional)
+
+Create `.env` in the project root:
+
+```bash
+# .env file
+CHUTES_API_TOKEN=your-token-here
+LLM_PROVIDER=chutes
+LLM_MODEL=moonshotai/Kimi-K2.5-TEE
+LLM_COST_LIMIT=10.0
+```
+
+---
+
+## Verification
+
+### Step 1: Verify Python Installation
+
+```bash
+python3 --version
+# Expected: Python 3.11.x or higher
+```
+
+### Step 2: Verify Dependencies
+
+```bash
+python3 -c "import litellm; print('litellm:', litellm.__version__)"
+python3 -c "import httpx; print('httpx:', httpx.__version__)"
+python3 -c "import pydantic; print('pydantic:', pydantic.__version__)"
+```
+
+### Step 3: Verify BaseAgent Installation
+
+```bash
+python3 -c "from src.core.loop import run_agent_loop; print('BaseAgent: OK')"
+```
+
+### Step 4: Test Run
+
+```bash
+python3 agent.py --instruction "Print 'Hello, BaseAgent!'"
+```
+
+Expected output: JSONL events showing the agent executing your instruction.
+
+---
+
+## Directory Structure After Installation
+
+```
+baseagent/
+├── agent.py                 # ✓ Entry point
+├── src/
+│   ├── core/
+│   │   ├── loop.py          # ✓ Agent loop
+│   │   └── compaction.py    # ✓ Context manager
+│   ├── llm/
+│   │   └── client.py        # ✓ LLM client
+│   ├── config/
+│   │   └── defaults.py      # ✓ Configuration
+│   ├── tools/               # ✓ Tool implementations
+│   ├── prompts/
+│   │   └── system.py        # ✓ System prompt
+│   └── output/
+│       └── jsonl.py         # ✓ Event emission
+├── requirements.txt         # ✓ Dependencies
+├── pyproject.toml           # ✓ Package config
+├── docs/                    # ✓ Documentation
+├── rules/                   # Development guidelines
+└── astuces/                 # Implementation techniques
+```
+
+---
+
+## Troubleshooting
+
+### Issue: `ModuleNotFoundError: No module named 'litellm'`
+
+**Solution**: Install dependencies
+
+```bash
+pip install -r requirements.txt
+# or
+pip install litellm httpx pydantic
+```
+
+### Issue: `ImportError: cannot import name 'run_agent_loop'`
+
+**Solution**: Ensure you're in the project root directory
+
+```bash
+cd /path/to/baseagent
+python3 agent.py --instruction "..."
+```
+
+### Issue: API Key Errors
+
+**Solution**: Verify your environment variables are set
+
+```bash
+# Check if variables are set
+echo $CHUTES_API_TOKEN
+echo $OPENROUTER_API_KEY
+
+# Re-export if needed
+export CHUTES_API_TOKEN="your-token"
+```
+
+### Issue: `rg` (ripgrep) Not Found
+
+The `grep_files` tool will fall back to `grep` if `rg` is not available, but ripgrep is much faster.
+
+**Solution**: Install ripgrep
+
+```bash
+# Ubuntu/Debian
+apt-get install ripgrep
+
+# macOS
+brew install ripgrep
+
+# Or via cargo
+cargo install ripgrep
+```
+
+---
+
+## Next Steps
+
+- [Quick Start](./quickstart.md) - Run your first task
+- [Configuration](./configuration.md) - Customize settings
+- [Chutes Integration](./chutes-integration.md) - Set up Chutes API
diff --git a/docs/overview.md b/docs/overview.md
new file mode 100644
index 0000000..c05a533
--- /dev/null
+++ b/docs/overview.md
@@ -0,0 +1,214 @@
+# BaseAgent Overview
+
+> **A high-performance autonomous coding agent built for generalist problem-solving**
+
+## What is BaseAgent?
+
+BaseAgent is an autonomous coding agent designed for the [Term Challenge](https://term.challenge). Unlike traditional scripted automation, BaseAgent uses Large Language Models (LLMs) to reason about tasks and make decisions dynamically.
+
+The agent receives natural language instructions and autonomously:
+- Explores the codebase
+- Plans and executes solutions
+- Validates its own work
+- Handles errors and edge cases
+
+---
+
+## Core Design Principles
+
+### 1. No Hardcoding
+
+BaseAgent follows the **Golden Rule**: all decisions are made by the LLM, not by conditional logic.
+
+```python
+# ❌ FORBIDDEN - Hardcoded task routing
+if "file" in instruction:
+    create_file()
+elif "compile" in instruction:
+    compile_code()
+
+# ✅ REQUIRED - LLM-driven decisions
+response = llm.chat(messages, tools=tools)
+execute(response.tool_calls)
+```
+
+### 2. Single Code Path
+
+Every task, regardless of complexity or domain, flows through the same agent loop:
+
+```mermaid
+graph LR
+    A[Receive Instruction] --> B[Build Context]
+    B --> C[LLM Decides]
+    C --> D[Execute Tools]
+    D --> E{Complete?}
+    E -->|No| C
+    E -->|Yes| F[Verify & Return]
+```
+
+### 3. Iterative Execution
+
+BaseAgent never tries to solve everything in one shot. Instead, it:
+- Observes the current state
+- Thinks about the next step
+- Acts by calling tools
+- Repeats until the task is complete
+
+### 4. Self-Verification
+
+Before declaring a task complete, the agent automatically:
+1. Re-reads the original instruction
+2. Lists all requirements (explicit and implicit)
+3. Verifies each requirement with actual commands
+4. Only completes if all verifications pass
+
+---
+
+## High-Level Architecture
+
+```mermaid
+graph TB
+    subgraph Interface["User Interface"]
+        CLI["python agent.py --instruction '...'"]
+    end
+    
+    subgraph Engine["Core Engine"]
+        direction TB
+        Loop["Agent Loop<br/>(src/core/loop.py)"]
+        Context["Context Manager<br/>(src/core/compaction.py)"]
+        Prompt["System Prompt<br/>(src/prompts/system.py)"]
+    end
+    
+    subgraph LLM["LLM Layer"]
+        Client["LiteLLM Client<br/>(src/llm/client.py)"]
+        API["Provider API<br/>(Chutes/OpenRouter)"]
+    end
+    
+    subgraph Tools["Tool System"]
+        Registry["Tool Registry"]
+        Exec["Execution Engine"]
+    end
+    
+    CLI --> Loop
+    Loop --> Context
+    Loop --> Prompt
+    Loop --> Client
+    Client --> API
+    Loop --> Registry
+    Registry --> Exec
+    
+    style Loop fill:#4CAF50,color:#fff
+    style Client fill:#2196F3,color:#fff
+```
+
+---
+
+## Key Features
+
+### Autonomous Operation
+
+BaseAgent runs in **fully autonomous mode**:
+- No user confirmations required
+- Makes reasonable decisions when faced with ambiguity
+- Handles errors by trying alternative approaches
+- Never asks questions - just executes
+
+### Prompt Caching
+
+Achieves **90%+ cache hit rate** using Anthropic's prompt caching:
+- System prompt cached for stability
+- Last 2 messages cached to extend prefix
+- Reduces API costs by 90%
+
+### Context Management
+
+Intelligent memory management for long tasks:
+- Token-based overflow detection
+- Tool output pruning (protects recent outputs)
+- AI-powered compaction when needed
+- Middle-out truncation for large outputs
+
+### Comprehensive Tooling
+
+Eight specialized tools for coding tasks:
+
+| Tool | Purpose |
+|------|---------|
+| `shell_command` | Execute shell commands |
+| `read_file` | Read files with line numbers |
+| `write_file` | Create or overwrite files |
+| `apply_patch` | Surgical file modifications |
+| `grep_files` | Fast file content search |
+| `list_dir` | Directory exploration |
+| `view_image` | Image analysis |
+| `update_plan` | Progress tracking |
+
+---
+
+## Workflow Overview
+
+```mermaid
+sequenceDiagram
+    participant User
+    participant CLI as agent.py
+    participant Loop as Agent Loop
+    participant LLM as LLM (Chutes/OpenRouter)
+    participant Tools as Tool Registry
+
+    User->>CLI: python agent.py --instruction "..."
+    CLI->>Loop: Initialize session
+    
+    loop Until task complete
+        Loop->>Loop: Manage context (prune/compact)
+        Loop->>Loop: Apply prompt caching
+        Loop->>LLM: Send messages + tools
+        LLM-->>Loop: Response (text + tool_calls)
+        
+        alt Has tool calls
+            Loop->>Tools: Execute tool calls
+            Tools-->>Loop: Tool results
+        else No tool calls
+            Loop->>Loop: Self-verification check
+        end
+    end
+    
+    Loop-->>CLI: Task complete
+    CLI-->>User: JSONL output
+```
+
+---
+
+## What Makes BaseAgent a "Generalist"?
+
+| Characteristic | Description |
+|----------------|-------------|
+| **Single code path** | Same logic handles ALL tasks |
+| **LLM-driven decisions** | LLM chooses actions, not if-statements |
+| **No task keywords** | Zero references to specific task content |
+| **Iterative execution** | Observe → Think → Act loop |
+
+### The Generalist Test
+
+Ask yourself: *"Would this code behave differently if I changed the task instruction?"*
+
+If **YES** and it's not because of LLM reasoning → it's hardcoding → **FORBIDDEN**
+
+---
+
+## Design Philosophy
+
+BaseAgent is built on these principles:
+
+1. **Explore First** - Always gather context before acting
+2. **Iterate** - Never try to do everything in one shot
+3. **Verify** - Double-confirm before completing
+4. **Fail Gracefully** - Handle errors and retry
+5. **Stay Focused** - Complete the task, nothing more
+
+---
+
+## Next Steps
+
+- [Installation Guide](./installation.md) - Set up BaseAgent
+- [Quick Start](./quickstart.md) - Run your first task
+- [Architecture](./architecture.md) - Deep dive into the system design
diff --git a/docs/quickstart.md b/docs/quickstart.md
new file mode 100644
index 0000000..f8a9326
--- /dev/null
+++ b/docs/quickstart.md
@@ -0,0 +1,242 @@
+# Quick Start Guide
+
+> **Get BaseAgent running in 5 minutes**
+
+## Prerequisites
+
+Before starting, ensure you have:
+- Python 3.9+ installed
+- An LLM API key (Chutes, OpenRouter, or Anthropic)
+- BaseAgent installed (see [Installation](./installation.md))
+
+---
+
+## Step 1: Set Up Your API Key
+
+Choose your provider and set the environment variable:
+
+```bash
+# For Chutes AI (recommended)
+export CHUTES_API_TOKEN="your-token-from-chutes.ai"
+
+# OR for OpenRouter
+export OPENROUTER_API_KEY="sk-or-v1-..."
+```
+
+---
+
+## Step 2: Run Your First Task
+
+Navigate to the BaseAgent directory and run:
+
+```bash
+python3 agent.py --instruction "Create a file called hello.txt with the content 'Hello, World!'"
+```
+
+### Expected Output
+
+You'll see JSONL events as the agent works:
+
+```json
+{"type": "thread.started", "thread_id": "sess_1234567890"}
+{"type": "turn.started"}
+{"type": "item.started", "item": {"type": "command_execution", "command": "write_file"}}
+{"type": "item.completed", "item": {"type": "command_execution", "status": "completed"}}
+{"type": "turn.completed", "usage": {"input_tokens": 5000, "output_tokens": 200}}
+```
+
+And the file `hello.txt` will be created:
+
+```bash
+cat hello.txt
+# Output: Hello, World!
+```
+
+---
+
+## Step 3: Try More Examples
+
+### Example: Explore a Codebase
+
+```bash
+python3 agent.py --instruction "Explore this repository and describe its structure"
+```
+
+### Example: Find and Read Files
+
+```bash
+python3 agent.py --instruction "Find all Python files and show me the main entry point"
+```
+
+### Example: Create a Simple Script
+
+```bash
+python3 agent.py --instruction "Create a Python script that prints the Fibonacci sequence up to 100"
+```
+
+### Example: Modify Existing Code
+
+```bash
+python3 agent.py --instruction "Add a docstring to all functions in src/core/loop.py"
+```
+
+---
+
+## Understanding the Output
+
+BaseAgent emits JSONL (JSON Lines) format for machine-readable output:
+
+```mermaid
+sequenceDiagram
+    participant User
+    participant Agent
+    participant stdout as Output
+
+    User->>Agent: --instruction "..."
+    Agent->>stdout: {"type": "thread.started", ...}
+    Agent->>stdout: {"type": "turn.started"}
+    
+    loop Tool Execution
+        Agent->>stdout: {"type": "item.started", ...}
+        Agent->>stdout: {"type": "item.completed", ...}
+    end
+    
+    Agent->>stdout: {"type": "turn.completed", "usage": {...}}
+```
+
+### Key Event Types
+
+| Event | Description |
+|-------|-------------|
+| `thread.started` | Session begins with unique ID |
+| `turn.started` | Agent begins processing |
+| `item.started` | Tool execution begins |
+| `item.completed` | Tool execution finished |
+| `turn.completed` | Agent finished with usage stats |
+| `turn.failed` | Error occurred |
+
+---
+
+## Quick Command Reference
+
+```bash
+# Basic usage
+python3 agent.py --instruction "Your task description"
+
+# With environment variables inline
+CHUTES_API_TOKEN="..." python3 agent.py --instruction "..."
+
+# Redirect output to file
+python3 agent.py --instruction "..." > output.jsonl 2>&1
+```
+
+---
+
+## Agent Workflow
+
+Here's what happens when you run a task:
+
+```mermaid
+flowchart TB
+    subgraph Input
+        Cmd["python3 agent.py --instruction '...'"]
+    end
+    
+    subgraph Init["Initialization"]
+        Parse[Parse Arguments]
+        Config[Load Configuration]
+        LLM[Initialize LLM Client]
+        Tools[Register Tools]
+    end
+    
+    subgraph Loop["Agent Loop"]
+        Context[Manage Context]
+        Cache[Apply Caching]
+        Call[Call LLM]
+        Execute[Execute Tools]
+        Verify[Self-Verify]
+    end
+    
+    subgraph Output
+        JSONL[Emit JSONL Events]
+        Done[Task Complete]
+    end
+    
+    Cmd --> Parse --> Config --> LLM --> Tools
+    Tools --> Context --> Cache --> Call
+    Call --> Execute --> Context
+    Execute --> Verify --> Done
+    Context & Call & Execute --> JSONL
+```
+
+---
+
+## Tips for Effective Instructions
+
+### Be Specific
+
+```bash
+# ❌ Too vague
+python3 agent.py --instruction "Fix the bug"
+
+# ✅ Specific
+python3 agent.py --instruction "Fix the TypeError in src/utils.py line 42 where x is None"
+```
+
+### Provide Context
+
+```bash
+# ❌ Missing context
+python3 agent.py --instruction "Add tests"
+
+# ✅ With context
+python3 agent.py --instruction "Add unit tests for the calculate_total function in src/billing.py"
+```
+
+### Request Verification
+
+```bash
+# ✅ Ask for verification
+python3 agent.py --instruction "Create a Python script for sorting and verify it works with sample data"
+```
+
+---
+
+## Troubleshooting
+
+### Agent Not Finding Files
+
+The agent starts in the current directory. Ensure you're in the right location:
+
+```bash
+pwd  # Check current directory
+ls   # List files
+cd /path/to/project
+python3 /path/to/baseagent/agent.py --instruction "..."
+```
+
+### API Rate Limits
+
+If you hit rate limits, the agent will automatically retry with exponential backoff. You can also:
+
+```bash
+# Set a cost limit
+export LLM_COST_LIMIT="5.0"
+```
+
+### Long-Running Tasks
+
+For complex tasks, the agent may iterate many times. Monitor progress through the JSONL output:
+
+```bash
+python3 agent.py --instruction "..." 2>&1 | grep "item.completed"
+```
+
+---
+
+## Next Steps
+
+- [Usage Guide](./usage.md) - Detailed command-line options
+- [Configuration](./configuration.md) - Customize behavior
+- [Tools Reference](./tools.md) - Available tools
+- [Best Practices](./best-practices.md) - Optimization tips
diff --git a/docs/tools.md b/docs/tools.md
new file mode 100644
index 0000000..78cd143
--- /dev/null
+++ b/docs/tools.md
@@ -0,0 +1,509 @@
+# Tools Reference
+
+> **Complete documentation for all available tools in BaseAgent**
+
+## Overview
+
+BaseAgent provides eight specialized tools for autonomous task execution. Each tool is designed for a specific purpose and follows consistent patterns for input and output.
+
+---
+
+## Tool Summary
+
+| Tool | Purpose | Key Parameters |
+|------|---------|----------------|
+| `shell_command` | Execute shell commands | `command`, `workdir`, `timeout_ms` |
+| `read_file` | Read file contents | `file_path`, `offset`, `limit` |
+| `write_file` | Create/overwrite files | `file_path`, `content` |
+| `apply_patch` | Surgical file edits | `patch` |
+| `grep_files` | Search file contents | `pattern`, `include`, `path` |
+| `list_dir` | List directory contents | `dir_path`, `depth`, `limit` |
+| `view_image` | Analyze images | `path` |
+| `update_plan` | Track progress | `steps`, `explanation` |
+
+---
+
+## Tool Architecture
+
+```mermaid
+graph TB
+    subgraph Registry["Tool Registry (registry.py)"]
+        Lookup["Tool Lookup"]
+        Execute["Execution Engine"]
+        Truncate["Output Truncation"]
+    end
+    
+    subgraph Tools["Tool Implementations"]
+        Shell["shell_command"]
+        Read["read_file"]
+        Write["write_file"]
+        Patch["apply_patch"]
+        Grep["grep_files"]
+        List["list_dir"]
+        Image["view_image"]
+        Plan["update_plan"]
+    end
+    
+    subgraph Output["Results"]
+        Success["ToolResult(success=True)"]
+        Failure["ToolResult(success=False)"]
+    end
+    
+    Lookup --> Shell & Read & Write & Patch & Grep & List & Image & Plan
+    Shell & Read & Write & Patch & Grep & List & Image & Plan --> Execute
+    Execute --> Truncate
+    Truncate --> Success & Failure
+```
+
+---
+
+## shell_command
+
+Execute shell commands in the terminal.
+
+### Parameters
+
+| Parameter | Type | Required | Default | Description |
+|-----------|------|----------|---------|-------------|
+| `command` | string | Yes | - | Shell command to execute |
+| `workdir` | string | No | Current dir | Working directory |
+| `timeout_ms` | number | No | 60000 | Timeout in milliseconds |
+
+### Example Usage
+
+```json
+{
+  "name": "shell_command",
+  "arguments": {
+    "command": "ls -la",
+    "workdir": "/workspace",
+    "timeout_ms": 30000
+  }
+}
+```
+
+### Best Practices
+
+- Always set `workdir` to avoid directory confusion
+- Use `rg` (ripgrep) instead of `grep` for faster searches
+- Set appropriate timeouts for long-running commands
+- Prefer specific commands over `cd && command`
+
+### Output Format
+
+```
+total 40
+drwxr-xr-x 7 root root 4096 Feb  3 13:16 .
+drwxr-xr-x 1 root root 4096 Feb  3 12:00 ..
+-rw-r--r-- 1 root root 5432 Feb  3 13:16 agent.py
+drwxr-xr-x 4 root root 4096 Feb  3 13:16 src
+```
+
+---
+
+## read_file
+
+Read file contents with line numbers.
+
+### Parameters
+
+| Parameter | Type | Required | Default | Description |
+|-----------|------|----------|---------|-------------|
+| `file_path` | string | Yes | - | Path to the file |
+| `offset` | number | No | 1 | Starting line (1-indexed) |
+| `limit` | number | No | 2000 | Maximum lines to return |
+
+### Example Usage
+
+```json
+{
+  "name": "read_file",
+  "arguments": {
+    "file_path": "src/core/loop.py",
+    "offset": 1,
+    "limit": 100
+  }
+}
+```
+
+### Output Format
+
+```
+L1: """
+L2: Main agent loop - the heart of the SuperAgent system.
+L3: """
+L4: 
+L5: from __future__ import annotations
+L6: import time
+```
+
+### Best Practices
+
+- Use `offset` and `limit` for large files
+- Prefer `grep_files` to find specific content first
+- Read relevant sections, not entire large files
+
+---
+
+## write_file
+
+Create or overwrite a file.
+
+### Parameters
+
+| Parameter | Type | Required | Default | Description |
+|-----------|------|----------|---------|-------------|
+| `file_path` | string | Yes | - | Path to the file |
+| `content` | string | Yes | - | Content to write |
+
+### Example Usage
+
+```json
+{
+  "name": "write_file",
+  "arguments": {
+    "file_path": "hello.txt",
+    "content": "Hello, World!\n"
+  }
+}
+```
+
+### Best Practices
+
+- Use for new files or complete rewrites
+- Prefer `apply_patch` for surgical edits
+- Parent directories are created automatically
+- Include trailing newlines for proper file endings
+
+---
+
+## apply_patch
+
+Apply surgical file modifications using patch format.
+
+### Parameters
+
+| Parameter | Type | Required | Default | Description |
+|-----------|------|----------|---------|-------------|
+| `patch` | string | Yes | - | Patch content |
+
+### Patch Format
+
+```
+*** Begin Patch
+*** Add File: path/to/new/file.py
++line 1
++line 2
+*** Update File: path/to/existing/file.py
+@@ def existing_function():
+-    old_line
++    new_line
+*** Delete File: path/to/delete.py
+*** End Patch
+```
+
+### Example Usage
+
+```json
+{
+  "name": "apply_patch",
+  "arguments": {
+    "patch": "*** Begin Patch\n*** Update File: src/utils.py\n@@ def calculate(x):\n-    return x\n+    return x * 2\n*** End Patch"
+  }
+}
+```
+
+### Patch Rules
+
+1. Use `@@ context line` to identify location
+2. Prefix new lines with `+`
+3. Prefix removed lines with `-`
+4. Include 3 lines of context before and after changes
+5. File paths must be relative (never absolute)
+
+### Operations
+
+| Operation | Format | Description |
+|-----------|--------|-------------|
+| Add file | `*** Add File: path` | Create new file |
+| Update file | `*** Update File: path` | Modify existing file |
+| Delete file | `*** Delete File: path` | Remove file |
+
+---
+
+## grep_files
+
+Search file contents using patterns.
+
+### Parameters
+
+| Parameter | Type | Required | Default | Description |
+|-----------|------|----------|---------|-------------|
+| `pattern` | string | Yes | - | Regex pattern to search |
+| `include` | string | No | - | Glob filter (e.g., `*.py`) |
+| `path` | string | No | Current dir | Search path |
+| `limit` | number | No | 100 | Max files to return |
+
+### Example Usage
+
+```json
+{
+  "name": "grep_files",
+  "arguments": {
+    "pattern": "def.*token",
+    "include": "*.py",
+    "path": "src/",
+    "limit": 50
+  }
+}
+```
+
+### Output Format
+
+```
+src/llm/client.py
+src/core/compaction.py
+src/utils/truncate.py
+```
+
+### Best Practices
+
+- Use ripgrep regex syntax
+- Filter with `include` for faster searches
+- Search specific directories when possible
+- Results sorted by modification time
+
+---
+
+## list_dir
+
+List directory contents with type indicators.
+
+### Parameters
+
+| Parameter | Type | Required | Default | Description |
+|-----------|------|----------|---------|-------------|
+| `dir_path` | string | Yes | - | Directory path |
+| `offset` | number | No | 1 | Starting entry (1-indexed) |
+| `limit` | number | No | 50 | Max entries to return |
+| `depth` | number | No | 2 | Max directory depth |
+
+### Example Usage
+
+```json
+{
+  "name": "list_dir",
+  "arguments": {
+    "dir_path": "src/",
+    "depth": 3,
+    "limit": 100
+  }
+}
+```
+
+### Output Format
+
+```
+src/
+  core/
+    loop.py
+    compaction.py
+  llm/
+    client.py
+  tools/
+    shell.py
+    read_file.py
+```
+
+### Type Indicators
+
+| Indicator | Meaning |
+|-----------|---------|
+| `/` | Directory |
+| `@` | Symbolic link |
+| (none) | Regular file |
+
+---
+
+## view_image
+
+Load and analyze an image from the filesystem.
+
+### Parameters
+
+| Parameter | Type | Required | Default | Description |
+|-----------|------|----------|---------|-------------|
+| `path` | string | Yes | - | Path to image file |
+
+### Supported Formats
+
+- PNG
+- JPEG
+- GIF
+- WebP
+- BMP
+
+### Example Usage
+
+```json
+{
+  "name": "view_image",
+  "arguments": {
+    "path": "screenshots/error.png"
+  }
+}
+```
+
+### How It Works
+
+```mermaid
+sequenceDiagram
+    participant Agent
+    participant Tool as view_image
+    participant LLM as LLM API
+
+    Agent->>Tool: view_image(path)
+    Tool->>Tool: Load image file
+    Tool->>Tool: Encode as base64
+    Tool-->>Agent: ToolResult with inject_content
+    Agent->>Agent: Add image to messages
+    Agent->>LLM: Messages with image content
+    LLM-->>Agent: Analysis response
+```
+
+### Best Practices
+
+- Only use for images the user mentioned
+- Don't use if image is already in conversation
+- Large images are automatically resized
+- Count as ~1000 tokens in context
+
+---
+
+## update_plan
+
+Track task progress with a visible plan.
+
+### Parameters
+
+| Parameter | Type | Required | Default | Description |
+|-----------|------|----------|---------|-------------|
+| `steps` | array | Yes | - | List of step objects |
+| `explanation` | string | No | - | Why the plan changed |
+
+### Step Object
+
+```json
+{
+  "description": "Create helper functions",
+  "status": "completed"
+}
+```
+
+### Status Values
+
+| Status | Description |
+|--------|-------------|
+| `pending` | Not started |
+| `in_progress` | Currently working |
+| `completed` | Finished |
+
+### Example Usage
+
+```json
+{
+  "name": "update_plan",
+  "arguments": {
+    "steps": [
+      {"description": "Read existing code", "status": "completed"},
+      {"description": "Create helper module", "status": "in_progress"},
+      {"description": "Write unit tests", "status": "pending"},
+      {"description": "Update documentation", "status": "pending"}
+    ],
+    "explanation": "Starting implementation after code review"
+  }
+}
+```
+
+### Best Practices
+
+- Keep descriptions to 5-7 words
+- Mark steps completed as you go
+- Update plan when approach changes
+- Use for complex multi-step tasks
+
+---
+
+## Tool Output Limits
+
+All tool outputs are truncated to prevent context overflow:
+
+| Setting | Default | Description |
+|---------|---------|-------------|
+| `max_output_tokens` | 2500 | Maximum tokens per tool output |
+| Truncation strategy | Middle-out | Keeps start and end, removes middle |
+
+### Middle-Out Truncation
+
+```mermaid
+graph LR
+    subgraph Original["Original Output (10K tokens)"]
+        Start["First 1250 tokens"]
+        Middle["Middle section<br/>(removed)"]
+        End["Last 1250 tokens"]
+    end
+    
+    subgraph Truncated["Truncated Output (2500 tokens)"]
+        TStart["First 1250 tokens"]
+        Marker["[...truncated...]"]
+        TEnd["Last 1250 tokens"]
+    end
+    
+    Start --> TStart
+    End --> TEnd
+```
+
+**Why middle-out?**
+- Start contains headers, definitions
+- End contains results, errors
+- Middle is often repetitive
+
+---
+
+## Tool Execution Flow
+
+```mermaid
+flowchart TB
+    subgraph Request["LLM Request"]
+        ToolCall["tool_call: {name, arguments}"]
+    end
+    
+    subgraph Registry["Tool Registry"]
+        Lookup["Lookup Tool"]
+        Validate["Validate Arguments"]
+        Execute["Execute Tool"]
+    end
+    
+    subgraph Processing["Post-Processing"]
+        Truncate["Truncate Output"]
+        Format["Format Result"]
+    end
+    
+    subgraph Response["Tool Result"]
+        Success["success: true/false"]
+        Output["output: string"]
+        Inject["inject_content (images)"]
+    end
+    
+    ToolCall --> Lookup --> Validate --> Execute
+    Execute --> Truncate --> Format
+    Format --> Success & Output & Inject
+```
+
+---
+
+## Next Steps
+
+- [Usage Guide](./usage.md) - How to use the agent
+- [Context Management](./context-management.md) - Memory optimization
+- [Best Practices](./best-practices.md) - Effective tool usage
diff --git a/docs/usage.md b/docs/usage.md
new file mode 100644
index 0000000..d234c54
--- /dev/null
+++ b/docs/usage.md
@@ -0,0 +1,341 @@
+# Agent Usage Guide
+
+> **Complete guide to running BaseAgent and interpreting its output**
+
+## Command-Line Interface
+
+### Basic Syntax
+
+```bash
+python3 agent.py --instruction "Your task description"
+```
+
+### Required Arguments
+
+| Argument | Type | Description |
+|----------|------|-------------|
+| `--instruction` | string | The task for the agent to complete |
+
+---
+
+## Running the Agent
+
+### Simple Tasks
+
+```bash
+# Create a file
+python3 agent.py --instruction "Create a file called hello.txt with 'Hello, World!'"
+
+# Read and explain code
+python3 agent.py --instruction "Read src/core/loop.py and explain what it does"
+
+# Find files
+python3 agent.py --instruction "Find all Python files that contain 'import json'"
+```
+
+### Complex Tasks
+
+```bash
+# Multi-step task
+python3 agent.py --instruction "Create a Python module in src/utils/helpers.py with functions for string manipulation, then write tests for it"
+
+# Code modification
+python3 agent.py --instruction "Add error handling to all functions in src/api/client.py that make HTTP requests"
+
+# Investigation task
+python3 agent.py --instruction "Find the bug causing the TypeError in the test output and fix it"
+```
+
+---
+
+## Environment Variables
+
+Configure the agent's behavior with environment variables:
+
+```bash
+# LLM Provider (Chutes)
+export CHUTES_API_TOKEN="your-token"
+export LLM_PROVIDER="chutes"
+export LLM_MODEL="moonshotai/Kimi-K2.5-TEE"
+
+# LLM Provider (OpenRouter)
+export OPENROUTER_API_KEY="sk-or-v1-..."
+export LLM_MODEL="openrouter/anthropic/claude-sonnet-4-20250514"
+
+# Cost management
+export LLM_COST_LIMIT="10.0"
+
+# Run with inline variables
+LLM_COST_LIMIT="5.0" python3 agent.py --instruction "..."
+```
+
+---
+
+## Output Format
+
+BaseAgent emits JSONL (JSON Lines) events to stdout:
+
+```mermaid
+sequenceDiagram
+    participant Agent
+    participant stdout as Standard Output
+
+    Agent->>stdout: {"type": "thread.started", "thread_id": "sess_..."}
+    Agent->>stdout: {"type": "turn.started"}
+    
+    loop Tool Execution
+        Agent->>stdout: {"type": "item.started", "item": {...}}
+        Agent->>stdout: {"type": "item.completed", "item": {...}}
+    end
+    
+    Agent->>stdout: {"type": "turn.completed", "usage": {...}}
+```
+
+### Event Types
+
+| Event | Description |
+|-------|-------------|
+| `thread.started` | Session begins, includes unique thread ID |
+| `turn.started` | Agent begins processing the instruction |
+| `item.started` | A tool call is starting |
+| `item.completed` | A tool call has completed |
+| `turn.completed` | Agent finished, includes token usage |
+| `turn.failed` | An error occurred |
+
+### Example Output
+
+```json
+{"type": "thread.started", "thread_id": "sess_1706890123456"}
+{"type": "turn.started"}
+{"type": "item.started", "item": {"type": "command_execution", "id": "1", "command": "shell_command({command: 'ls -la'})", "status": "in_progress"}}
+{"type": "item.completed", "item": {"type": "command_execution", "id": "1", "command": "shell_command", "status": "completed", "aggregated_output": "total 40\ndrwxr-xr-x...", "exit_code": 0}}
+{"type": "item.completed", "item": {"type": "agent_message", "id": "2", "content": "I found the files. Now creating hello.txt..."}}
+{"type": "item.started", "item": {"type": "command_execution", "id": "3", "command": "write_file({file_path: 'hello.txt', content: 'Hello, World!'})", "status": "in_progress"}}
+{"type": "item.completed", "item": {"type": "command_execution", "id": "3", "command": "write_file", "status": "completed", "exit_code": 0}}
+{"type": "turn.completed", "usage": {"input_tokens": 5432, "cached_input_tokens": 4890, "output_tokens": 256}}
+```
+
+---
+
+## Logging Output
+
+Agent logs go to stderr:
+
+```
+[14:30:15] [superagent] ============================================================
+[14:30:15] [superagent] SuperAgent Starting (SDK 3.0 - litellm)
+[14:30:15] [superagent] ============================================================
+[14:30:15] [superagent] Model: openrouter/anthropic/claude-sonnet-4-20250514
+[14:30:15] [superagent] Instruction: Create hello.txt with 'Hello World'...
+[14:30:15] [loop] Getting initial state...
+[14:30:16] [loop] Iteration 1/200
+[14:30:16] [compaction] Context: 5432 tokens (3.2% of 168000)
+[14:30:16] [loop] Prompt caching: 1 system + 2 final messages marked (3 breakpoints)
+[14:30:17] [loop] Executing tool: write_file
+[14:30:17] [loop] Iteration 2/200
+[14:30:18] [loop] No tool calls in response
+[14:30:18] [loop] Requesting self-verification before completion
+```
+
+### Separating Output Streams
+
+```bash
+# Send JSONL to file, logs to terminal
+python3 agent.py --instruction "..." > output.jsonl
+
+# Send logs to file, JSONL to terminal
+python3 agent.py --instruction "..." 2> agent.log
+
+# Both to separate files
+python3 agent.py --instruction "..." > output.jsonl 2> agent.log
+```
+
+---
+
+## Processing Output
+
+### Parse JSONL with jq
+
+```bash
+# Get all completed items
+python3 agent.py --instruction "..." | jq 'select(.type == "item.completed")'
+
+# Get final usage stats
+python3 agent.py --instruction "..." | jq 'select(.type == "turn.completed") | .usage'
+
+# Get all agent messages
+python3 agent.py --instruction "..." | jq 'select(.item.type == "agent_message") | .item.content'
+```
+
+### Parse with Python
+
+```python
+import json
+import subprocess
+
+# Run agent and capture output
+result = subprocess.run(
+    ["python3", "agent.py", "--instruction", "Your task"],
+    capture_output=True,
+    text=True
+)
+
+# Parse JSONL output
+events = [json.loads(line) for line in result.stdout.strip().split('\n') if line]
+
+# Find usage stats
+for event in events:
+    if event.get("type") == "turn.completed":
+        print(f"Input tokens: {event['usage']['input_tokens']}")
+        print(f"Output tokens: {event['usage']['output_tokens']}")
+```
+
+---
+
+## Agent Workflow
+
+```mermaid
+flowchart TB
+    subgraph Input["Input Phase"]
+        Cmd["python3 agent.py --instruction '...'"]
+        Parse["Parse Arguments"]
+        Init["Initialize Components"]
+    end
+    
+    subgraph Explore["Exploration Phase"]
+        State["Get Current State"]
+        Context["Build Initial Context"]
+    end
+    
+    subgraph Execute["Execution Phase"]
+        Loop["Agent Loop"]
+        Tools["Execute Tools"]
+        Verify["Self-Verification"]
+    end
+    
+    subgraph Output["Output Phase"]
+        JSONL["Emit JSONL Events"]
+        Stats["Report Statistics"]
+    end
+    
+    Cmd --> Parse --> Init
+    Init --> State --> Context
+    Context --> Loop
+    Loop --> Tools --> Loop
+    Loop --> Verify
+    Verify --> Stats
+    Loop --> JSONL
+```
+
+---
+
+## Example Tasks
+
+### File Operations
+
+```bash
+# Create a file
+python3 agent.py --instruction "Create config.yaml with database settings for PostgreSQL"
+
+# Read and summarize
+python3 agent.py --instruction "Read README.md and create a one-paragraph summary"
+
+# Modify a file
+python3 agent.py --instruction "Add a new function to src/utils.py that validates email addresses"
+```
+
+### Code Analysis
+
+```bash
+# Explain code
+python3 agent.py --instruction "Explain how the authentication system works in src/auth/"
+
+# Find patterns
+python3 agent.py --instruction "Find all API endpoints and list them with their HTTP methods"
+
+# Review code
+python3 agent.py --instruction "Review src/api/handlers.py for potential security issues"
+```
+
+### Debugging
+
+```bash
+# Investigate error
+python3 agent.py --instruction "Find why 'test_user_creation' is failing and fix it"
+
+# Trace behavior
+python3 agent.py --instruction "Trace the data flow from user input to database in the signup process"
+```
+
+### Project Tasks
+
+```bash
+# Setup
+python3 agent.py --instruction "Create a Python project structure with src/, tests/, and setup.py"
+
+# Add feature
+python3 agent.py --instruction "Add logging to all functions in src/core/ using Python's logging module"
+
+# Refactor
+python3 agent.py --instruction "Refactor src/utils.py to follow the single responsibility principle"
+```
+
+---
+
+## Session Management
+
+Each agent run creates a new session with a unique ID:
+
+```json
+{"type": "thread.started", "thread_id": "sess_1706890123456"}
+```
+
+### Session Lifecycle
+
+```mermaid
+stateDiagram-v2
+    [*] --> Initializing: python3 agent.py
+    Initializing --> Running: thread.started
+    Running --> Iterating: turn.started
+    Iterating --> Executing: item.started
+    Executing --> Iterating: item.completed
+    Iterating --> Verifying: No tool calls
+    Verifying --> Iterating: Needs more work
+    Verifying --> Complete: Verified
+    Iterating --> Failed: Error
+    Complete --> [*]: turn.completed
+    Failed --> [*]: turn.failed
+```
+
+---
+
+## Performance Tips
+
+### Optimize Token Usage
+
+```bash
+# Set lower cost limit for testing
+export LLM_COST_LIMIT="2.0"
+```
+
+### Monitor Progress
+
+```bash
+# Watch tool executions in real-time
+python3 agent.py --instruction "..." 2>&1 | grep -E "Executing tool|Iteration"
+```
+
+### Debug Issues
+
+```bash
+# Full verbose output
+python3 agent.py --instruction "..." 2>&1 | tee agent_debug.log
+```
+
+---
+
+## Next Steps
+
+- [Tools Reference](./tools.md) - Available tools and their parameters
+- [Configuration](./configuration.md) - Customize agent behavior
+- [Best Practices](./best-practices.md) - Tips for effective usage
diff --git a/pyproject.toml b/pyproject.toml
index 864644a..41d9205 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -26,7 +26,6 @@ dependencies = [
     "tomli-w>=1.0",
     "rich>=13.0",
     "typer>=0.12.0",
-    "litellm>=1.50.0",
 ]
 
 [project.optional-dependencies]
diff --git a/requirements.txt b/requirements.txt
index 02cebfd..c4242ac 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -4,4 +4,3 @@ tomli>=2.0;python_version<'3.11'
 tomli-w>=1.0
 rich>=13.0
 typer>=0.12.0
-litellm>=1.50.0
diff --git a/rules/02-architecture-patterns.md b/rules/02-architecture-patterns.md
index 933b156..2ab44f6 100644
--- a/rules/02-architecture-patterns.md
+++ b/rules/02-architecture-patterns.md
@@ -20,7 +20,7 @@ my-agent/
 │   │   ├── loop.py       # Main loop
 │   │   └── compaction.py # Context management (MANDATORY)
 │   ├── llm/
-│   │   └── client.py     # LLM client (litellm)
+│   │   └── client.py     # LLM client (Chutes API)
 │   └── tools/
 │       └── ...           # Tools
 ├── requirements.txt      # Dependencies
@@ -275,7 +275,7 @@ flowchart TB
 ### Implementation
 
 ```python
-# Définition des outils (format OpenAI/litellm)
+# Tool definition (OpenAI-compatible format)
 TOOLS = [
     {
         "name": "run_command",
diff --git a/rules/06-llm-usage-guide.md b/rules/06-llm-usage-guide.md
index 417375a..bd6365e 100644
--- a/rules/06-llm-usage-guide.md
+++ b/rules/06-llm-usage-guide.md
@@ -1,6 +1,6 @@
-# 06 - LLM Usage Guide (SDK 3.0 - litellm)
+# 06 - LLM Usage Guide (SDK 3.0 - Chutes API)
 
-This guide covers using LLMs with **litellm** (no more term_sdk).
+This guide covers using LLMs with **Chutes API** via httpx (no more term_sdk).
 
 ---
 
@@ -9,11 +9,11 @@ This guide covers using LLMs with **litellm** (no more term_sdk).
 ### Initialization
 
 ```python
-from src.llm.client import LiteLLMClient, LLMError, CostLimitExceeded
+from src.llm.client import LLMClient, LLMError, CostLimitExceeded
 
 # Create the LLM client
-llm = LiteLLMClient(
-    model="openrouter/anthropic/claude-opus-4.5",
+llm = LLMClient(
+    model="moonshotai/Kimi-K2.5-TEE",
     temperature=0.0,  # 0 = deterministic
     max_tokens=16384,
     cost_limit=10.0   # Cost limit in $
@@ -342,7 +342,7 @@ def run(self, ctx: Any):
 ### Defining Tools
 
 ```python
-# Format OpenAI/litellm pour les outils
+# Tool format (OpenAI-compatible)
 
 TOOLS = [
     Tool(
@@ -502,11 +502,9 @@ Community fine-tuned models are **forbidden** because they may:
 
 ```python
 def setup(self):
-    # Any official foundation model works
-    # Examples: claude-3.5-sonnet, gpt-4o, deepseek-v3, llama-3, etc.
-    self.llm = LLM(
-        provider="openrouter",  # or any supported provider
-        default_model="anthropic/claude-3.5-sonnet",
+    # Default model: Kimi K2.5-TEE (1T params, 32B activated)
+    self.llm = LLMClient(
+        model="moonshotai/Kimi-K2.5-TEE",
         temperature=0.3
     )
 ```
@@ -542,74 +540,24 @@ def run(self, ctx: Any):
 
 ## Prompt Caching
 
-Prompt caching significantly reduces costs and latency by reusing previously processed prompts. The Term SDK supports caching via the `cache=True` parameter.
+Prompt caching significantly reduces costs and latency by reusing previously processed prompts.
 
-### Enabling Caching in Term SDK
+### Enabling Caching
 
 ```python
-from src.llm.client import LiteLLMClient
-
-class MyAgent(Agent):
-    def setup(self):
-        self.llm = LLM(
-            provider="openrouter",
-            default_model="anthropic/claude-3.5-sonnet",
-            cache=True  # Enable prompt caching
-        )
-```
-
-### How Caching Works by Provider
-
-| Provider | Caching | Configuration |
-|----------|---------|---------------|
-| **OpenAI** | Automatic | No config needed, min 1024 tokens |
-| **Anthropic** | Manual | Requires `cache_control` breakpoints |
-| **DeepSeek** | Automatic | No config needed |
-| **Google Gemini** | Automatic | No config needed, min 4096 tokens |
-| **Groq** | Automatic | No config needed |
-
-### Anthropic Cache Control (Important!)
-
-Anthropic requires explicit `cache_control` breakpoints. This is critical for cost savings:
-
-**Pricing:**
-- **Cache writes**: 1.25x input price (slightly more expensive)
-- **Cache reads**: 0.1x input price (90% savings!)
+from src.llm.client import LLMClient
 
-**TTL Options:**
-- Default: 5 minutes
-- Extended: 1 hour with `"ttl": "1h"`
+# Caching is handled at the message level
+llm = LLMClient(
+    model="moonshotai/Kimi-K2.5-TEE",
+)
 
-### Anthropic Caching Example
+# The system manages caching automatically through message preparation
+```
 
-```python
-# Structure messages with cache_control for large content
-messages = [
-    {
-        "role": "system",
-        "content": [
-            {
-                "type": "text",
-                "text": "You are a task-solving agent."
-            },
-            {
-                "type": "text",
-                "text": LARGE_SYSTEM_PROMPT,  # Cache this!
-                "cache_control": {
-                    "type": "ephemeral",
-                    "ttl": "1h"  # Optional: extend to 1 hour
-                }
-            }
-        ]
-    },
-    {
-        "role": "user",
-        "content": f"Task: {ctx.instruction}"
-    }
-]
+### How Caching Works
 
-response = self.llm.chat(messages)
-```
+Caching behavior depends on the model and provider. The client handles cache_control markers automatically, stripping them for providers that don't support them.
 
 ### What to Cache
 
@@ -625,48 +573,22 @@ response = self.llm.chat(messages)
 - Changing context
 - Small prompts (under 1024 tokens)
 
-### Cache Placement Strategy
-
-```python
-# Put static content FIRST, dynamic content LAST
-messages = [
-    {
-        "role": "system",
-        "content": [
-            # Static: Cache this large prompt
-            {
-                "type": "text",
-                "text": STATIC_SYSTEM_PROMPT,
-                "cache_control": {"type": "ephemeral"}
-            }
-        ]
-    },
-    # Dynamic: User instruction (changes each task)
-    {"role": "user", "content": ctx.instruction},
-    # Dynamic: Previous outputs (change each iteration)
-    {"role": "assistant", "content": last_response},
-    {"role": "user", "content": command_output}
-]
-```
-
 ### Inspecting Cache Usage
 
 ```python
-response = self.llm.chat(messages, usage=True)
+response = llm.chat(messages)
 
-# Check cache statistics
-if response.usage:
-    cached_tokens = response.usage.get("cached_tokens", 0)
-    cache_discount = response.usage.get("cache_discount", 0)
-    print(f"Cached: {cached_tokens} tokens, saved: ${cache_discount:.4f}")
+# Check cache statistics from response tokens
+if response.tokens:
+    cached_tokens = response.tokens.get("cached", 0)
+    print(f"Cached: {cached_tokens} tokens")
 ```
 
 ### Cost Optimization Tips
 
 1. **Keep static content first** - Cache hits require matching prefixes
-2. **Use 1-hour TTL for long sessions** - Avoids repeated cache writes
-3. **Batch related requests** - Maximize cache hits within TTL window
-4. **Monitor cache_discount** - Negative = cache write, positive = savings
+2. **Batch related requests** - Maximize cache hits within TTL window
+3. **Monitor token usage** - Track cached vs uncached tokens
 
 ---
 
@@ -681,4 +603,4 @@ if response.usage:
 | Token awareness | Truncate long outputs |
 | Clear prompts | Specific format requirements |
 | Tool definitions | Well-documented parameters |
-| **Prompt caching** | Enable `cache=True`, use cache_control for Anthropic |
+| **Prompt caching** | Use static prompts first for better cache hits |
diff --git a/rules/08-error-handling.md b/rules/08-error-handling.md
index 24137c5..ed2751c 100644
--- a/rules/08-error-handling.md
+++ b/rules/08-error-handling.md
@@ -47,12 +47,12 @@ Errors parsing LLM responses:
 
 ```python
 import time
-from src.llm.client import LiteLLMClient, LLMError, CostLimitExceeded
+from src.llm.client import LLMClient, LLMError, CostLimitExceeded
 
 class RobustLLMClient:
     def __init__(self, ctx: Any):
         self.ctx = ctx
-        self.llm = LLM(default_model="anthropic/claude-3.5-sonnet")
+        self.llm = LLMClient(model="moonshotai/Kimi-K2.5-TEE")
         self.max_retries = 3
         self.base_delay = 5
     
diff --git a/src/__init__.py b/src/__init__.py
index 40cb881..2d5c793 100644
--- a/src/__init__.py
+++ b/src/__init__.py
@@ -4,7 +4,7 @@
 Inspired by OpenAI Codex CLI, BaseAgent is designed to solve
 terminal-based coding tasks autonomously using LLMs.
 
-SDK 3.0 Compatible - Uses litellm instead of term_sdk.
+SDK 3.0 Compatible - Uses Chutes API via httpx instead of term_sdk.
 
 Usage:
     python agent.py --instruction "Your task here..."
@@ -15,8 +15,8 @@
 
 # Import main components for convenience
 from src.config.defaults import CONFIG
-from src.tools.registry import ToolRegistry
 from src.output.jsonl import emit
+from src.tools.registry import ToolRegistry
 
 __all__ = [
     "CONFIG",
diff --git a/src/api/retry.py b/src/api/retry.py
index 13f9830..7515f27 100644
--- a/src/api/retry.py
+++ b/src/api/retry.py
@@ -12,13 +12,13 @@
 
 from src.config.models import RetryConfig
 
-
 T = TypeVar("T")
 
 
 @dataclass
 class RetryState:
     """State of a retry operation."""
+
     attempt: int
     last_error: Optional[Exception]
     last_status_code: Optional[int]
@@ -27,57 +27,57 @@ class RetryState:
 
 class RetryHandler:
     """Handles retry logic with exponential backoff."""
-    
+
     def __init__(self, config: RetryConfig):
         self.config = config
-    
+
     def calculate_delay(self, attempt: int) -> float:
         """Calculate delay with exponential backoff and jitter.
-        
+
         Args:
             attempt: Current attempt number (1-indexed)
-            
+
         Returns:
             Delay in seconds with jitter
         """
         # Exponential backoff: base_delay * 2^(attempt-1)
         exp_delay = self.config.base_delay * (2 ** (attempt - 1))
-        
+
         # Cap at max_delay
         delay = min(exp_delay, self.config.max_delay)
-        
+
         # Add jitter (0.9 to 1.1 multiplier)
         jitter = random.uniform(0.9, 1.1)
-        
+
         return delay * jitter
-    
+
     def should_retry(self, error: Exception, attempt: int) -> bool:
         """Determine if we should retry based on the error.
-        
+
         Args:
             error: The exception that occurred
             attempt: Current attempt number
-            
+
         Returns:
             True if we should retry
         """
         if attempt >= self.config.max_attempts:
             return False
-        
+
         # Check for HTTP status codes
         if isinstance(error, httpx.HTTPStatusError):
             return error.response.status_code in self.config.retry_on_status
-        
+
         # Retry on connection errors
         if isinstance(error, (httpx.ConnectError, httpx.TimeoutException)):
             return True
-        
+
         # Retry on specific exception types
         if isinstance(error, (ConnectionError, TimeoutError)):
             return True
-        
+
         return False
-    
+
     def execute(
         self,
         func: Callable[..., T],
@@ -86,62 +86,64 @@ def execute(
         **kwargs: Any,
     ) -> T:
         """Execute a function with retry logic.
-        
+
         Args:
             func: Function to execute
             *args: Positional arguments for func
             on_retry: Optional callback called before each retry
             **kwargs: Keyword arguments for func
-            
+
         Returns:
             Result of func
-            
+
         Raises:
             The last exception if all retries fail
         """
         state = RetryState(attempt=0, last_error=None, last_status_code=None, total_delay=0)
-        
+
         while True:
             state.attempt += 1
-            
+
             try:
                 return func(*args, **kwargs)
             except Exception as e:
                 state.last_error = e
-                
+
                 # Extract status code if available
                 if isinstance(e, httpx.HTTPStatusError):
                     state.last_status_code = e.response.status_code
-                
+
                 # Check if we should retry
                 if not self.should_retry(e, state.attempt):
                     raise
-                
+
                 # Calculate and apply delay
                 delay = self.calculate_delay(state.attempt)
                 state.total_delay += delay
-                
+
                 # Call retry callback
                 if on_retry:
                     on_retry(state)
-                
+
                 time.sleep(delay)
 
 
 def with_retry(config: RetryConfig) -> Callable[[Callable[..., T]], Callable[..., T]]:
     """Decorator to add retry logic to a function.
-    
+
     Args:
         config: Retry configuration
-        
+
     Returns:
         Decorator function
     """
     handler = RetryHandler(config)
-    
+
     def decorator(func: Callable[..., T]) -> Callable[..., T]:
         @wraps(func)
         def wrapper(*args: Any, **kwargs: Any) -> T:
             return handler.execute(func, *args, **kwargs)
+
         return wrapper
+
     return decorator
diff --git a/src/config/__init__.py b/src/config/__init__.py
index c986030..08c8595 100644
--- a/src/config/__init__.py
+++ b/src/config/__init__.py
@@ -1,5 +1,5 @@
 """Configuration module."""
 
-from src.config.defaults import CONFIG, get_config, get
+from src.config.defaults import CONFIG, get, get_config
 
 __all__ = ["CONFIG", "get_config", "get"]
diff --git a/src/config/defaults.py b/src/config/defaults.py
index da7615f..e86f74b 100644
--- a/src/config/defaults.py
+++ b/src/config/defaults.py
@@ -17,91 +17,64 @@
 import os
 from typing import Any, Dict
 
-
 # Main configuration - simulates Codex exec benchmark mode
 CONFIG: Dict[str, Any] = {
     # ==========================================================================
     # Model Settings (simulates --model gpt-5.2 -c model_reasoning_effort=xhigh)
     # ==========================================================================
-    
-    # Model to use via OpenRouter (prefix with openrouter/ for litellm)
-    "model": os.environ.get("LLM_MODEL", "openrouter/anthropic/claude-sonnet-4-20250514"),
-    
+    # Model to use via Chutes API (OpenAI-compatible)
+    "model": os.environ.get("LLM_MODEL", "moonshotai/Kimi-K2.5-TEE"),
     # Provider
-    "provider": "openrouter",
-    
+    "provider": "chutes",
     # Reasoning effort: none, minimal, low, medium, high, xhigh (not used for Claude)
     "reasoning_effort": "none",
-    
     # Token limits
     "max_tokens": 16384,
-    
     # Temperature (0 = deterministic)
     "temperature": 0.0,
-    
     # ==========================================================================
     # Agent Execution Settings
     # ==========================================================================
-    
     # Maximum iterations before stopping
     "max_iterations": 200,
-    
     # Maximum tokens for tool output truncation (middle-out strategy)
     "max_output_tokens": 2500,  # ~10KB
-    
     # Timeout for shell commands (seconds)
     "shell_timeout": 60,
-    
     # ==========================================================================
     # Context Management (like OpenCode/Codex)
     # ==========================================================================
-    
     # Model context window (Claude Opus 4.5 = 200K)
     "model_context_limit": 200_000,
-    
     # Reserved tokens for output
     "output_token_max": 32_000,
-    
     # Trigger compaction at this % of usable context (85%)
     "auto_compact_threshold": 0.85,
-    
     # Tool output pruning constants (from OpenCode)
-    "prune_protect": 40_000,   # Protect this many tokens of recent tool output
-    "prune_minimum": 20_000,   # Only prune if we can recover at least this many
-    
+    "prune_protect": 40_000,  # Protect this many tokens of recent tool output
+    "prune_minimum": 20_000,  # Only prune if we can recover at least this many
     # ==========================================================================
-    # Prompt Caching (Anthropic via OpenRouter/Bedrock)
+    # Prompt Caching
     # ==========================================================================
-    
     # Enable prompt caching
     "cache_enabled": True,
-    
-    # Note: Anthropic caching requires minimum tokens per breakpoint:
-    # - Claude Opus 4.5 on Bedrock: 4096 tokens minimum
-    # - Claude Sonnet/other: 1024 tokens minimum
-    # System prompt should be large enough to meet this threshold
-    
+    # Note: Caching behavior depends on the model/provider
+    # System prompt should be large enough to meet provider thresholds
     # ==========================================================================
     # Simulated Codex Flags (all enabled/bypassed for benchmark)
     # ==========================================================================
-    
     # --dangerously-bypass-approvals-and-sandbox
     "bypass_approvals": True,
     "bypass_sandbox": True,
-    
     # --skip-git-repo-check
     "skip_git_check": True,
-    
     # --enable unified_exec
     "unified_exec": True,
-    
     # --json (always JSONL output)
     "json_output": True,
-    
     # ==========================================================================
     # Double Confirmation for Task Completion
     # ==========================================================================
-    
     # Require double confirmation before marking task complete
     # Disabled for fully autonomous operation in evaluation mode
     "require_completion_confirmation": False,
diff --git a/src/config/loader.py b/src/config/loader.py
index 7111850..31213d6 100644
--- a/src/config/loader.py
+++ b/src/config/loader.py
@@ -29,7 +29,7 @@ def _flatten_dict(d: dict[str, Any], parent_key: str = "", sep: str = "_") -> di
 def _nest_dict(flat: dict[str, Any]) -> dict[str, Any]:
     """Convert a flat dictionary with underscores to nested structure."""
     result: dict[str, Any] = {}
-    
+
     # Map of flat keys to nested paths
     mappings = {
         "agent_model": ["model"],
@@ -59,7 +59,7 @@ def _nest_dict(flat: dict[str, Any]) -> dict[str, Any]:
         "paths_readable_roots": ["paths", "readable_roots"],
         "paths_writable_roots": ["paths", "writable_roots"],
     }
-    
+
     for flat_key, value in flat.items():
         if flat_key in mappings:
             path = mappings[flat_key]
@@ -69,34 +69,34 @@ def _nest_dict(flat: dict[str, Any]) -> dict[str, Any]:
                     current[part] = {}
                 current = current[part]
             current[path[-1]] = value
-    
+
     return result
 
 
 def load_config_from_file(path: Path) -> AgentConfig:
     """Load configuration from a TOML file.
-    
+
     Args:
         path: Path to the TOML configuration file.
-        
+
     Returns:
         AgentConfig instance with loaded configuration.
-        
+
     Raises:
         FileNotFoundError: If the config file doesn't exist.
         ValueError: If the config file is invalid.
     """
     if not path.exists():
         raise FileNotFoundError(f"Config file not found: {path}")
-    
+
     with open(path, "rb") as f:
         raw_config = tomllib.load(f)
-    
+
     # TOML structure: [agent], [cache], [retry], etc.
     # We need to transform it to match our Pydantic model structure
     flat = _flatten_dict(raw_config)
     nested = _nest_dict(flat)
-    
+
     # Also handle direct keys from [agent] section
     if "agent" in raw_config:
         for key, value in raw_config["agent"].items():
@@ -104,12 +104,12 @@ def load_config_from_file(path: Path) -> AgentConfig:
                 nested[key] = value
             if key == "reasoning" and isinstance(value, dict):
                 nested["reasoning"] = value
-    
+
     # Handle other top-level sections directly
     for section in ["cache", "retry", "tools", "output", "paths"]:
         if section in raw_config and section not in nested:
             nested[section] = raw_config[section]
-    
+
     return AgentConfig(**nested)
 
 
@@ -118,31 +118,31 @@ def load_config(
     overrides: Optional[dict[str, Any]] = None,
 ) -> AgentConfig:
     """Load configuration with optional overrides.
-    
+
     Args:
         config_path: Optional path to a TOML config file.
         overrides: Optional dictionary of configuration overrides.
-        
+
     Returns:
         AgentConfig instance.
     """
     # Start with defaults
     config_dict: dict[str, Any] = {}
-    
+
     # Load from file if provided
     if config_path and config_path.exists():
         with open(config_path, "rb") as f:
             raw_config = tomllib.load(f)
-        
+
         # Transform TOML structure
         if "agent" in raw_config:
             for key, value in raw_config["agent"].items():
                 config_dict[key] = value
-        
+
         for section in ["cache", "retry", "tools", "output", "paths"]:
             if section in raw_config:
                 config_dict[section] = raw_config[section]
-    
+
     # Apply overrides
     if overrides:
         for key, value in overrides.items():
@@ -157,19 +157,19 @@ def load_config(
                 current[parts[-1]] = value
             else:
                 config_dict[key] = value
-    
+
     return AgentConfig(**config_dict)
 
 
 def find_config_file() -> Optional[Path]:
     """Find the configuration file in standard locations.
-    
+
     Searches in order:
     1. ./config.toml
     2. ./superagent.toml
     3. ~/.config/superagent/config.toml
     4. ~/.superagent/config.toml
-    
+
     Returns:
         Path to the config file if found, None otherwise.
     """
@@ -179,9 +179,9 @@ def find_config_file() -> Optional[Path]:
         Path.home() / ".config" / "superagent" / "config.toml",
         Path.home() / ".superagent" / "config.toml",
     ]
-    
+
     for path in search_paths:
         if path.exists():
             return path
-    
+
     return None
diff --git a/src/config/models.py b/src/config/models.py
index 172efe4..ef5c889 100644
--- a/src/config/models.py
+++ b/src/config/models.py
@@ -5,13 +5,13 @@
 import os
 from enum import Enum
 from pathlib import Path
-from typing import Literal, Optional
 
 from pydantic import BaseModel, Field, field_validator
 
 
 class ReasoningEffort(str, Enum):
     """Reasoning effort levels for the model."""
+
     NONE = "none"
     MINIMAL = "minimal"
     LOW = "low"
@@ -22,40 +22,47 @@ class ReasoningEffort(str, Enum):
 
 class OutputMode(str, Enum):
     """Output mode for the agent."""
+
     HUMAN = "human"
     JSON = "json"
 
 
 class Provider(str, Enum):
     """LLM provider."""
-    OPENROUTER = "openrouter"
+
+    CHUTES = "chutes"
     OPENAI = "openai"
     ANTHROPIC = "anthropic"
 
 
 class ReasoningConfig(BaseModel):
     """Configuration for model reasoning."""
-    effort: ReasoningEffort = Field(default=ReasoningEffort.HIGH, description="Reasoning effort level")
+
+    effort: ReasoningEffort = Field(
+        default=ReasoningEffort.HIGH, description="Reasoning effort level"
+    )
 
 
 class CacheConfig(BaseModel):
     """Configuration for prompt caching."""
+
     enabled: bool = Field(default=True, description="Enable prompt caching")
 
 
 class RetryConfig(BaseModel):
     """Configuration for retry logic."""
+
     max_attempts: int = Field(default=5, description="Maximum retry attempts")
     base_delay: float = Field(default=1.0, description="Base delay in seconds")
     max_delay: float = Field(default=60.0, description="Maximum delay in seconds")
     retry_on_status: list[int] = Field(
-        default=[429, 500, 502, 503, 504],
-        description="HTTP status codes to retry on"
+        default=[429, 500, 502, 503, 504], description="HTTP status codes to retry on"
     )
 
 
 class ToolsConfig(BaseModel):
     """Configuration for available tools."""
+
     shell_enabled: bool = Field(default=True, description="Enable shell execution")
     shell_timeout: int = Field(default=30, description="Shell timeout in seconds")
     file_ops_enabled: bool = Field(default=True, description="Enable file operations")
@@ -66,6 +73,7 @@ class ToolsConfig(BaseModel):
 
 class OutputConfig(BaseModel):
     """Configuration for output formatting."""
+
     mode: OutputMode = Field(default=OutputMode.HUMAN, description="Output mode")
     streaming: bool = Field(default=True, description="Enable streaming output")
     colors: bool = Field(default=True, description="Enable colored output")
@@ -73,10 +81,11 @@ class OutputConfig(BaseModel):
 
 class PathsConfig(BaseModel):
     """Configuration for file paths."""
+
     cwd: str = Field(default="", description="Working directory")
     readable_roots: list[str] = Field(default=[], description="Additional readable directories")
     writable_roots: list[str] = Field(default=[], description="Additional writable directories")
-    
+
     @field_validator("cwd", mode="before")
     @classmethod
     def resolve_cwd(cls, v: str) -> str:
@@ -88,21 +97,15 @@ def resolve_cwd(cls, v: str) -> str:
 
 class AgentConfig(BaseModel):
     """Main configuration for the SuperAgent."""
-    
+
     # Model settings
-    model: str = Field(
-        default="anthropic/claude-opus-4-20250514",
-        description="Model to use"
-    )
-    provider: Provider = Field(
-        default=Provider.OPENROUTER,
-        description="LLM provider"
-    )
+    model: str = Field(default="anthropic/claude-opus-4-20250514", description="Model to use")
+    provider: Provider = Field(default=Provider.CHUTES, description="LLM provider")
     max_iterations: int = Field(default=50, description="Maximum iterations")
     timeout: int = Field(default=120, description="Timeout per LLM call in seconds")
     temperature: float = Field(default=0.7, description="Generation temperature")
     max_tokens: int = Field(default=16384, description="Maximum tokens for response")
-    
+
     # Sub-configurations
     reasoning: ReasoningConfig = Field(default_factory=ReasoningConfig)
     cache: CacheConfig = Field(default_factory=CacheConfig)
@@ -110,32 +113,34 @@ class AgentConfig(BaseModel):
     tools: ToolsConfig = Field(default_factory=ToolsConfig)
     output: OutputConfig = Field(default_factory=OutputConfig)
     paths: PathsConfig = Field(default_factory=PathsConfig)
-    
+
     @property
     def working_directory(self) -> Path:
         """Get the working directory as a Path object."""
         return Path(self.paths.cwd or os.getcwd())
-    
+
     def get_api_key(self) -> str:
         """Get the API key for the configured provider."""
         env_vars = {
-            Provider.OPENROUTER: ["OPENROUTER_API_KEY"],
+            Provider.CHUTES: ["CHUTES_API_KEY"],
             Provider.OPENAI: ["OPENAI_API_KEY"],
             Provider.ANTHROPIC: ["ANTHROPIC_API_KEY"],
         }
-        
+
         for var in env_vars.get(self.provider, []):
             key = os.environ.get(var)
             if key:
                 return key
-        
-        raise ValueError(f"No API key found for provider {self.provider}. "
-                        f"Set one of: {env_vars.get(self.provider, [])}")
-    
+
+        raise ValueError(
+            f"No API key found for provider {self.provider}. "
+            f"Set one of: {env_vars.get(self.provider, [])}"
+        )
+
     def get_base_url(self) -> str:
         """Get the base URL for the configured provider."""
         urls = {
-            Provider.OPENROUTER: "https://openrouter.ai/api/v1",
+            Provider.CHUTES: "https://api.chutes.ai/v1",
             Provider.OPENAI: "https://api.openai.com/v1",
             Provider.ANTHROPIC: "https://api.anthropic.com/v1",
         }
diff --git a/src/core/__init__.py b/src/core/__init__.py
index f4f1956..f1e9bdb 100644
--- a/src/core/__init__.py
+++ b/src/core/__init__.py
@@ -1,28 +1,27 @@
 """Core module - agent loop, session management, and context compaction."""
 
-from src.core.executor import (
-    AgentExecutor,
-    ExecutionResult,
-    RiskLevel,
-    SandboxPolicy,
-)
-
 # Compaction module (like OpenCode/Codex context management)
 from src.core.compaction import (
-    manage_context,
-    estimate_tokens,
+    AUTO_COMPACT_THRESHOLD,
+    MODEL_CONTEXT_LIMIT,
+    OUTPUT_TOKEN_MAX,
+    PRUNE_MARKER,
+    PRUNE_MINIMUM,
+    PRUNE_PROTECT,
     estimate_message_tokens,
+    estimate_tokens,
     estimate_total_tokens,
     is_overflow,
+    manage_context,
     needs_compaction,
     prune_old_tool_outputs,
     run_compaction,
-    MODEL_CONTEXT_LIMIT,
-    OUTPUT_TOKEN_MAX,
-    AUTO_COMPACT_THRESHOLD,
-    PRUNE_PROTECT,
-    PRUNE_MINIMUM,
-    PRUNE_MARKER,
+)
+from src.core.executor import (
+    AgentExecutor,
+    ExecutionResult,
+    RiskLevel,
+    SandboxPolicy,
 )
 
 # Import run_agent_loop
diff --git a/src/core/agent.py b/src/core/agent.py
index a6725bb..e946829 100644
--- a/src/core/agent.py
+++ b/src/core/agent.py
@@ -2,11 +2,10 @@
 
 from __future__ import annotations
 
-import sys
 from pathlib import Path
-from typing import Any, Callable, Optional
+from typing import Callable, Optional
 
-from src.api.client import LLMClient, LLMResponse, FunctionCall
+from src.api.client import FunctionCall, LLMClient, LLMResponse
 from src.config.models import AgentConfig
 from src.core.session import Session
 from src.output.processor import OutputProcessor
@@ -16,14 +15,14 @@
 
 class Agent:
     """Main agent that runs the LLM loop with tool execution.
-    
+
     This implements the core agent loop similar to Codex CLI:
     1. Send messages to LLM
     2. If LLM returns tool calls, execute them
     3. Feed results back to LLM
     4. Repeat until no more tool calls (needs_follow_up = False)
     """
-    
+
     def __init__(
         self,
         config: Optional[AgentConfig] = None,
@@ -31,7 +30,7 @@ def __init__(
         output_processor: Optional[OutputProcessor] = None,
     ):
         """Initialize the agent.
-        
+
         Args:
             config: Agent configuration
             cwd: Working directory (defaults to current)
@@ -39,15 +38,15 @@ def __init__(
         """
         self.config = config or AgentConfig()
         self.cwd = cwd or Path(self.config.paths.cwd or ".").resolve()
-        
+
         # Initialize components
         self.client = LLMClient(self.config)
         self.tools = ToolRegistry(self.cwd)
         self.output = output_processor or OutputProcessor(self.config)
-        
+
         # Session state
         self.session: Optional[Session] = None
-    
+
     def run(
         self,
         prompt: str,
@@ -55,59 +54,59 @@ def run(
         on_tool_call: Optional[Callable[[str, dict], None]] = None,
     ) -> str:
         """Run the agent with a user prompt.
-        
+
         Args:
             prompt: User's instruction/prompt
             on_message: Optional callback for assistant messages
             on_tool_call: Optional callback for tool calls
-            
+
         Returns:
             Final assistant message
         """
         # Create session
         self.session = Session(config=self.config, cwd=self.cwd)
-        
+
         # Add system prompt
         system_prompt = get_system_prompt(cwd=self.cwd)
         self.session.add_system_message(system_prompt)
-        
+
         # Add user message
         self.session.add_user_message(prompt)
-        
+
         # Emit session started
         self.output.emit_turn_started(self.session)
-        
+
         # Run the agent loop
         try:
             final_message = self._run_loop(on_message, on_tool_call)
             self.session.mark_done(final_message)
             self.output.emit_turn_completed(self.session, final_message)
             return final_message
-        
+
         except Exception as e:
             error_msg = f"Agent error: {e}"
             self.output.emit_error(error_msg)
             self.session.mark_done(error_msg)
             raise
-        
+
         finally:
             self.client.close()
-    
+
     def _run_loop(
         self,
         on_message: Optional[Callable[[str], None]] = None,
         on_tool_call: Optional[Callable[[str, dict], None]] = None,
     ) -> str:
         """Run the main agent loop.
-        
+
         Returns:
             Final assistant message
         """
         if not self.session:
             raise RuntimeError("No session initialized")
-        
+
         last_message = ""
-        
+
         while True:
             # Check iteration limit
             if not self.session.increment_iteration():
@@ -115,42 +114,42 @@ def _run_loop(
                     f"Reached maximum iterations ({self.config.max_iterations})"
                 )
                 break
-            
+
             # Get tools for the LLM
             tools = self.tools.get_tools_for_llm()
-            
+
             # Call the LLM
             self.output.emit_thinking()
-            
+
             response = self.client.chat(
                 messages=self.session.get_messages_for_api(),
                 tools=tools,
             )
-            
+
             # Update token usage
             self.session.update_usage(
                 response.input_tokens,
                 response.output_tokens,
                 response.cached_tokens,
             )
-            
+
             # Process the response
             needs_follow_up = self._process_response(
                 response,
                 on_message,
                 on_tool_call,
             )
-            
+
             # Store last message
             if response.text:
                 last_message = response.text
-            
+
             # If no tool calls, we're done
             if not needs_follow_up:
                 break
-        
+
         return last_message
-    
+
     def _process_response(
         self,
         response: LLMResponse,
@@ -158,84 +157,86 @@ def _process_response(
         on_tool_call: Optional[Callable[[str, dict], None]] = None,
     ) -> bool:
         """Process an LLM response.
-        
+
         Args:
             response: The LLM response
             on_message: Callback for messages
             on_tool_call: Callback for tool calls
-            
+
         Returns:
             True if follow-up is needed (tool calls were made)
         """
         if not self.session:
             raise RuntimeError("No session initialized")
-        
+
         # Handle text response
         if response.text:
             self.output.emit_assistant_message(response.text)
             if on_message:
                 on_message(response.text)
-        
+
         # Check for tool calls
         if not response.has_function_calls:
             # No tool calls - add response and we're done
             self.session.add_assistant_message(response.text)
             return False
-        
+
         # Build tool_calls format for the message
         tool_calls_data = []
         for call in response.function_calls:
-            tool_calls_data.append({
-                "id": call.id,
-                "type": "function",
-                "function": {
-                    "name": call.name,
-                    "arguments": str(call.arguments),
-                },
-            })
-        
+            tool_calls_data.append(
+                {
+                    "id": call.id,
+                    "type": "function",
+                    "function": {
+                        "name": call.name,
+                        "arguments": str(call.arguments),
+                    },
+                }
+            )
+
         # Add assistant message with tool calls
         self.session.add_assistant_message(
             response.text or "",
             tool_calls=tool_calls_data,
         )
-        
+
         # Execute each tool call
         for call in response.function_calls:
             result = self._execute_tool_call(call, on_tool_call)
-            
+
             # Add tool result to conversation
             self.session.add_tool_result(
                 tool_call_id=call.id,
                 name=call.name,
                 content=result.to_message(),
             )
-        
+
         # Need follow-up since we executed tools
         return True
-    
+
     def _execute_tool_call(
         self,
         call: FunctionCall,
         on_tool_call: Optional[Callable[[str, dict], None]] = None,
     ) -> ToolResult:
         """Execute a single tool call.
-        
+
         Args:
             call: The function call to execute
             on_tool_call: Optional callback
-            
+
         Returns:
             ToolResult from execution
         """
         self.output.emit_tool_call_start(call.name, call.arguments)
-        
+
         if on_tool_call:
             on_tool_call(call.name, call.arguments)
-        
+
         # Execute the tool
         result = self.tools.execute(call.name, call.arguments)
-        
+
         self.output.emit_tool_call_end(call.name, result)
-        
+
         return result
diff --git a/src/core/compaction.py b/src/core/compaction.py
index 6876605..4ed4ae3 100644
--- a/src/core/compaction.py
+++ b/src/core/compaction.py
@@ -13,10 +13,10 @@
 
 import sys
 import time
-from typing import Any, Dict, List, Optional, TYPE_CHECKING
+from typing import TYPE_CHECKING, Any, Dict, List, Optional
 
 if TYPE_CHECKING:
-    from src.llm.client import LiteLLMClient
+    from src.llm.client import LLMClient
 
 # =============================================================================
 # Constants (matching OpenCode)
@@ -59,6 +59,7 @@
 # Token Estimation
 # =============================================================================
 
+
 def estimate_tokens(text: str) -> int:
     """Estimate tokens from text length (4 chars per token heuristic)."""
     return max(0, len(text or "") // APPROX_CHARS_PER_TOKEN)
@@ -67,7 +68,7 @@ def estimate_tokens(text: str) -> int:
 def estimate_message_tokens(msg: Dict[str, Any]) -> int:
     """Estimate tokens for a single message."""
     tokens = 0
-    
+
     # Content tokens
     content = msg.get("content")
     if isinstance(content, str):
@@ -79,17 +80,17 @@ def estimate_message_tokens(msg: Dict[str, Any]) -> int:
                 # Images count as ~1000 tokens roughly
                 if part.get("type") == "image_url":
                     tokens += 1000
-    
+
     # Tool calls tokens (function name + arguments)
     tool_calls = msg.get("tool_calls", [])
     for tc in tool_calls:
         func = tc.get("function", {})
         tokens += estimate_tokens(func.get("name", ""))
         tokens += estimate_tokens(func.get("arguments", ""))
-    
+
     # Role overhead (~4 tokens)
     tokens += 4
-    
+
     return tokens
 
 
@@ -102,6 +103,7 @@ def estimate_total_tokens(messages: List[Dict[str, Any]]) -> int:
 # Overflow Detection
 # =============================================================================
 
+
 def get_usable_context() -> int:
     """Get usable context window (total - reserved for output)."""
     return MODEL_CONTEXT_LIMIT - OUTPUT_TOKEN_MAX
@@ -123,6 +125,7 @@ def needs_compaction(messages: List[Dict[str, Any]]) -> bool:
 # Tool Output Pruning
 # =============================================================================
 
+
 def _log(msg: str) -> None:
     """Log to stderr."""
     timestamp = time.strftime("%H:%M:%S")
@@ -135,80 +138,82 @@ def prune_old_tool_outputs(
 ) -> List[Dict[str, Any]]:
     """
     Prune old tool outputs to save tokens.
-    
+
     Strategy (exactly like OpenCode compaction.ts lines 49-89):
     1. Go backwards through messages
     2. Skip first 2 user turns (most recent)
     3. Accumulate tool output tokens
     4. Once we've accumulated PRUNE_PROTECT (40K) tokens, start marking for prune
     5. Only actually prune if we can recover > PRUNE_MINIMUM (20K) tokens
-    
+
     Args:
         messages: List of messages
         protect_last_turns: Number of recent user turns to skip (default: 2)
-        
+
     Returns:
         Messages with old tool outputs pruned (content replaced with PRUNE_MARKER)
     """
     if not messages:
         return messages
-    
+
     total = 0  # Total tool output tokens seen (going backwards)
     pruned = 0  # Tokens that will be pruned
     to_prune: List[int] = []  # Indices to prune
     turns = 0  # User turn counter
-    
+
     # Go backwards through messages (like OpenCode)
     for msg_index in range(len(messages) - 1, -1, -1):
         msg = messages[msg_index]
-        
+
         # Count user turns
         if msg.get("role") == "user":
             turns += 1
-        
+
         # Skip the first N user turns (most recent)
         if turns < protect_last_turns:
             continue
-        
+
         # Process tool messages
         if msg.get("role") == "tool":
             content = msg.get("content", "")
-            
+
             # Skip already pruned
             if content == PRUNE_MARKER:
                 # Already compacted, stop here (like OpenCode: break loop)
                 break
-            
+
             estimate = estimate_tokens(content)
             total += estimate
-            
+
             # Once we've accumulated more than PRUNE_PROTECT tokens,
             # start marking older outputs for pruning
             if total > PRUNE_PROTECT:
                 pruned += estimate
                 to_prune.append(msg_index)
-    
+
     _log(f"Prune scan: {total} total tokens, {pruned} prunable")
-    
+
     # Only prune if we can recover enough tokens
     if pruned <= PRUNE_MINIMUM:
         _log(f"Prune skipped: only {pruned} tokens recoverable (min: {PRUNE_MINIMUM})")
         return messages
-    
+
     _log(f"Pruning {len(to_prune)} tool outputs, recovering ~{pruned} tokens")
-    
+
     # Create new messages with pruned content
     indices_to_prune = set(to_prune)
     result = []
     for i, msg in enumerate(messages):
         if i in indices_to_prune:
-            result.append({
-                **msg,
-                "content": PRUNE_MARKER,
-            })
+            result.append(
+                {
+                    **msg,
+                    "content": PRUNE_MARKER,
+                }
+            )
         else:
             result.append(msg)
-    
+
     return result
 
 
@@ -216,15 +221,16 @@ def prune_old_tool_outputs(
 # AI Compaction
 # =============================================================================
 
+
 def run_compaction(
-    llm: "LiteLLMClient",
+    llm: "LLMClient",
     messages: List[Dict[str, Any]],
     system_prompt: str,
     model: Optional[str] = None,
 ) -> List[Dict[str, Any]]:
     """
     Compact conversation history using AI summarization.
-    
+
     Process (like Codex):
     1. Send all messages + compaction prompt to LLM
     2. Get summary response
@@ -232,25 +238,27 @@ def run_compaction(
        - Original system prompt
        - Summary as user message (with prefix)
        - Ready for continuation
-    
+
     Args:
         llm: LLM client for summarization
         messages: Current message history
         system_prompt: Original system prompt to preserve
         model: Model to use (defaults to current)
-        
+
     Returns:
         Compacted message list
     """
     _log("Starting AI compaction...")
-    
+
     # Build compaction request
     compaction_messages = messages.copy()
-    compaction_messages.append({
-        "role": "user",
-        "content": COMPACTION_PROMPT,
-    })
-    
+    compaction_messages.append(
+        {
+            "role": "user",
+            "content": COMPACTION_PROMPT,
+        }
+    )
+
     try:
         # Call LLM for summary (no tools, just text)
         response = llm.chat(
@@ -258,24 +266,24 @@ def run_compaction(
             model=model,
             max_tokens=4096,  # Summary should be concise
         )
-        
+
         summary = response.text or ""
-        
+
         if not summary:
             _log("Compaction failed: empty response")
             return messages
-        
+
         summary_tokens = estimate_tokens(summary)
         _log(f"Compaction complete: {summary_tokens} token summary")
-        
+
         # Build new message list
         compacted = [
             {"role": "system", "content": system_prompt},
             {"role": "user", "content": SUMMARY_PREFIX + summary},
         ]
-        
+
         return compacted
-        
+
     except Exception as e:
         _log(f"Compaction failed: {e}")
         # Return original messages if compaction fails
@@ -286,57 +294,58 @@ def run_compaction(
 # Main Context Management
 # =============================================================================
 
+
 def manage_context(
     messages: List[Dict[str, Any]],
     system_prompt: str,
-    llm: "LiteLLMClient",
+    llm: "LLMClient",
     force_compaction: bool = False,
 ) -> List[Dict[str, Any]]:
     """
     Main context management function.
-    
+
     Called before each LLM request to ensure context fits.
-    
+
     Strategy:
     1. Estimate current token usage
     2. If under threshold, return as-is
     3. Try pruning old tool outputs first
     4. If still over threshold, run AI compaction
-    
+
     Args:
         messages: Current message history
         system_prompt: Original system prompt (preserved through compaction)
         llm: LLM client (for compaction)
         force_compaction: Force compaction even if under threshold
-        
+
     Returns:
         Managed message list (possibly compacted)
     """
     total_tokens = estimate_total_tokens(messages)
     usable = get_usable_context()
     usage_pct = (total_tokens / usable) * 100
-    
+
     _log(f"Context: {total_tokens} tokens ({usage_pct:.1f}% of {usable})")
-    
+
     # Check if we need to do anything
     if not force_compaction and not is_overflow(total_tokens):
         return messages
-    
-    _log(f"Context overflow detected, managing...")
-    
+
+    _log("Context overflow detected, managing...")
+
     # Step 1: Try pruning old tool outputs
     pruned = prune_old_tool_outputs(messages)
     pruned_tokens = estimate_total_tokens(pruned)
-    
+
     if not is_overflow(pruned_tokens) and not force_compaction:
         _log(f"Pruning sufficient: {total_tokens} -> {pruned_tokens} tokens")
         return pruned
-    
+
     # Step 2: Run AI compaction
     _log(f"Pruning insufficient ({pruned_tokens} tokens), running AI compaction...")
     compacted = run_compaction(llm, pruned, system_prompt)
     compacted_tokens = estimate_total_tokens(compacted)
-    
+
     _log(f"Compaction result: {total_tokens} -> {compacted_tokens} tokens")
-    
+
     return compacted
diff --git a/src/core/executor.py b/src/core/executor.py
index c2b5b68..c9be8f1 100644
--- a/src/core/executor.py
+++ b/src/core/executor.py
@@ -2,7 +2,7 @@
 
 This module wraps ToolRegistry with:
 - Timeout enforcement
-- Execution tracking  
+- Execution tracking
 - Batch execution support
 - Risk assessment for commands
 
@@ -14,48 +14,51 @@
 import concurrent.futures
 import json
 import time
-from dataclasses import dataclass, field
+from dataclasses import dataclass
 from enum import Enum, auto
 from pathlib import Path
-from typing import Any, Dict, List, Optional, Tuple, TYPE_CHECKING
+from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple
 
 if TYPE_CHECKING:
     pass  # AgentContext is duck-typed
 
-from src.tools.registry import ToolRegistry, ExecutorConfig, ExecutorStats
 from src.tools.base import ToolResult
+from src.tools.registry import ExecutorConfig, ExecutorStats, ToolRegistry
 
 
 class RiskLevel(Enum):
     """Risk level for tool operations."""
-    SAFE = auto()      # Read-only operations
-    LOW = auto()       # Network/environment access
-    MEDIUM = auto()    # File modifications
-    HIGH = auto()      # Destructive operations
+
+    SAFE = auto()  # Read-only operations
+    LOW = auto()  # Network/environment access
+    MEDIUM = auto()  # File modifications
+    HIGH = auto()  # Destructive operations
     CRITICAL = auto()  # System destruction potential
 
 
 class SandboxPolicy(Enum):
     """Policy for sandbox enforcement."""
-    STRICT = auto()    # Block all risky operations
-    PROMPT = auto()    # Prompt user for risky operations  
+
+    STRICT = auto()  # Block all risky operations
+    PROMPT = auto()  # Prompt user for risky operations
     PERMISSIVE = auto()  # Allow most operations
 
 
 @dataclass
 class ExecutionResult:
     """Result of a tool execution with timing and metadata."""
+
     tool_name: str
     result: ToolResult
     duration_ms: int
     cached: bool = False
     risk_level: Optional[RiskLevel] = None
-    
+
     @property
     def success(self) -> bool:
         """Whether the execution was successful."""
         return self.result.success
-    
+
     @property
     def output(self) -> str:
         """The output from the tool."""
@@ -65,9 +68,10 @@ def output(self) -> str:
 @dataclass
 class CachedExecutionResult:
     """A cached execution result with timestamp."""
+
     result: ExecutionResult
     cached_at: float
-    
+
     def is_valid(self, ttl: float) -> bool:
         """Check if the cached result is still valid."""
         return (time.time() - self.cached_at) < ttl
@@ -76,21 +80,21 @@ def is_valid(self, ttl: float) -> bool:
 class AgentExecutor:
     """
     High-level executor for agent tool calls.
-    
+
     Wraps ToolRegistry with:
     - Timeout enforcement
     - Execution tracking
     - Batch execution support
     - Risk assessment
     - Result caching
-    
+
     Example:
         executor = AgentExecutor(cwd=Path("/project"))
         result = executor.execute(ctx, "read_file", {"file_path": "main.py"})
         if result.success:
             print(result.output)
     """
-    
+
     def __init__(
         self,
         cwd: Optional[Path] = None,
@@ -98,7 +102,7 @@ def __init__(
         sandbox_policy: SandboxPolicy = SandboxPolicy.PROMPT,
     ):
         """Initialize the executor.
-        
+
         Args:
             cwd: Working directory for tool operations
             config: Executor configuration (timeouts, concurrency, etc.)
@@ -109,22 +113,22 @@ def __init__(
             self.registry._config = config
         self._sandbox_policy = sandbox_policy
         self._execution_cache: Dict[str, CachedExecutionResult] = {}
-    
+
     @property
     def config(self) -> ExecutorConfig:
         """Get the executor configuration."""
         return self.registry._config
-    
+
     @property
     def cwd(self) -> Path:
         """Get the current working directory."""
         return self.registry.cwd
-    
+
     @cwd.setter
     def cwd(self, value: Path) -> None:
         """Set the current working directory."""
         self.registry.cwd = value
-    
+
     def execute(
         self,
         ctx: "AgentContext",
@@ -134,39 +138,35 @@ def execute(
     ) -> ExecutionResult:
         """
         Execute a single tool with timeout.
-        
+
         Args:
             ctx: Agent context with shell() method
             tool_name: Name of tool to execute
             arguments: Tool arguments
             timeout: Optional timeout override (seconds)
-            
+
         Returns:
             ExecutionResult with result and timing
         """
         start = time.time()
-        
+
         # Assess risk level
         risk = self.assess_risk(tool_name, arguments)
-        
+
         # Use config timeout if not specified
         effective_timeout = timeout or self.registry._config.default_timeout
-        
+
         # Execute with timeout
         cached = False
         try:
-            result = self._execute_with_timeout(
-                ctx, tool_name, arguments, effective_timeout
-            )
+            result = self._execute_with_timeout(ctx, tool_name, arguments, effective_timeout)
         except TimeoutError:
-            result = ToolResult.fail(
-                f"Tool {tool_name} timed out after {effective_timeout}s"
-            )
+            result = ToolResult.fail(f"Tool {tool_name} timed out after {effective_timeout}s")
         except Exception as e:
             result = ToolResult.fail(f"Tool {tool_name} failed: {e}")
-        
+
         duration_ms = int((time.time() - start) * 1000)
-        
+
         return ExecutionResult(
             tool_name=tool_name,
             result=result,
@@ -174,7 +174,7 @@ def execute(
             cached=cached,
             risk_level=risk,
         )
-    
+
     def _execute_with_timeout(
         self,
         ctx: "AgentContext",
@@ -183,28 +183,26 @@ def _execute_with_timeout(
         timeout: float,
     ) -> ToolResult:
         """Execute with timeout using threading.
-        
+
         Args:
             ctx: Agent context
             tool_name: Name of tool to execute
             arguments: Tool arguments
             timeout: Timeout in seconds
-            
+
         Returns:
             ToolResult from the tool
-            
+
         Raises:
             TimeoutError: If execution exceeds timeout
         """
         with concurrent.futures.ThreadPoolExecutor(max_workers=1) as executor:
-            future = executor.submit(
-                self.registry.execute, ctx, tool_name, arguments
-            )
+            future = executor.submit(self.registry.execute, ctx, tool_name, arguments)
             try:
                 return future.result(timeout=timeout)
             except concurrent.futures.TimeoutError:
                 raise TimeoutError(f"Execution timed out after {timeout}s")
-    
+
     def execute_batch(
         self,
         ctx: "AgentContext",
@@ -213,27 +211,24 @@ def execute_batch(
     ) -> List[ExecutionResult]:
         """
         Execute multiple tools.
-        
+
         Args:
             ctx: Agent context
             calls: List of (tool_name, arguments) tuples
             parallel: If True, execute in parallel (up to max_concurrent)
-            
+
         Returns:
             List of ExecutionResults in same order as calls
         """
         if not calls:
             return []
-        
+
         if not parallel:
-            return [
-                self.execute(ctx, name, args)
-                for name, args in calls
-            ]
-        
+            return [self.execute(ctx, name, args) for name, args in calls]
+
         # Parallel execution with ordering preserved
         results: List[Optional[ExecutionResult]] = [None] * len(calls)
-        
+
         with concurrent.futures.ThreadPoolExecutor(
             max_workers=self.registry._config.max_concurrent
         ) as executor:
@@ -242,7 +237,7 @@ def execute_batch(
                 executor.submit(self.execute, ctx, name, args): i
                 for i, (name, args) in enumerate(calls)
             }
-            
+
             for future in concurrent.futures.as_completed(future_to_index):
                 index = future_to_index[future]
                 try:
@@ -256,9 +251,9 @@ def execute_batch(
                         duration_ms=0,
                         cached=False,
                     )
-        
+
         return results  # type: ignore
-    
+
     def execute_sequential(
         self,
         ctx: "AgentContext",
@@ -266,16 +261,16 @@ def execute_sequential(
     ) -> List[ExecutionResult]:
         """
         Execute tools sequentially (alias for execute_batch with parallel=False).
-        
+
         Args:
             ctx: Agent context
             calls: List of (tool_name, arguments) tuples
-            
+
         Returns:
             List of ExecutionResults in same order as calls
         """
         return self.execute_batch(ctx, calls, parallel=False)
-    
+
     def assess_risk(
         self,
         tool_name: str,
@@ -283,11 +278,11 @@ def assess_risk(
     ) -> RiskLevel:
         """
         Assess risk level of a tool call.
-        
+
         Args:
             tool_name: Name of the tool
             arguments: Tool arguments
-            
+
         Returns:
             RiskLevel indicating the risk of this operation
         """
@@ -295,35 +290,35 @@ def assess_risk(
         if tool_name == "shell_command":
             cmd = arguments.get("command", "")
             return self._assess_command_risk(cmd)
-        
+
         # Default risk by tool category
         if tool_name in ("read_file", "list_dir", "grep_files", "view_image"):
             return RiskLevel.SAFE
-        
+
         if tool_name == "write_file":
             return RiskLevel.MEDIUM
-        
+
         if tool_name == "apply_patch":
             return RiskLevel.MEDIUM
-        
+
         if tool_name == "update_plan":
             return RiskLevel.SAFE
-        
+
         # Unknown tools get medium risk
         return RiskLevel.MEDIUM
-    
+
     def _assess_command_risk(self, command: str) -> RiskLevel:
         """
         Assess risk of a shell command.
-        
+
         Args:
             command: Shell command string
-            
+
         Returns:
             RiskLevel for the command
         """
         cmd = command.lower().strip()
-        
+
         # Critical: system destruction
         if (
             (cmd.startswith("rm -rf /") and (cmd == "rm -rf /" or cmd.startswith("rm -rf / ")))
@@ -332,7 +327,7 @@ def _assess_command_risk(self, command: str) -> RiskLevel:
             or "mkfs" in cmd
         ):
             return RiskLevel.CRITICAL
-        
+
         # High: destructive operations
         if (
             "rm -rf" in cmd
@@ -346,7 +341,7 @@ def _assess_command_risk(self, command: str) -> RiskLevel:
             or ("wget" in cmd and "| sh" in cmd)
         ):
             return RiskLevel.HIGH
-        
+
         # Medium: file modifications
         if (
             "mv " in cmd
@@ -358,17 +353,11 @@ def _assess_command_risk(self, command: str) -> RiskLevel:
             or "pip install" in cmd
         ):
             return RiskLevel.MEDIUM
-        
+
         # Low: network or environment access
-        if (
-            "curl" in cmd
-            or "wget" in cmd
-            or "ssh" in cmd
-            or "env" in cmd
-            or "export" in cmd
-        ):
+        if "curl" in cmd or "wget" in cmd or "ssh" in cmd or "env" in cmd or "export" in cmd:
             return RiskLevel.LOW
-        
+
         # Safe: read-only operations
         if (
             cmd.startswith("ls")
@@ -384,9 +373,9 @@ def _assess_command_risk(self, command: str) -> RiskLevel:
             or cmd.startswith("git diff")
         ):
             return RiskLevel.SAFE
-        
+
         return RiskLevel.MEDIUM
-    
+
     def can_auto_approve(
         self,
         tool_name: str,
@@ -394,40 +383,40 @@ def can_auto_approve(
     ) -> bool:
         """
         Check if a tool call can be auto-approved based on risk and policy.
-        
+
         Args:
             tool_name: Name of the tool
             arguments: Tool arguments
-            
+
         Returns:
             True if the call can be auto-approved
         """
         risk = self.assess_risk(tool_name, arguments)
-        
+
         if self._sandbox_policy == SandboxPolicy.STRICT:
             return risk == RiskLevel.SAFE
         elif self._sandbox_policy == SandboxPolicy.PROMPT:
             return risk in (RiskLevel.SAFE, RiskLevel.LOW)
         else:  # PERMISSIVE
             return risk != RiskLevel.CRITICAL
-    
+
     def stats(self) -> ExecutorStats:
         """Get execution statistics."""
         return self.registry.stats()
-    
+
     def clear_cache(self) -> None:
         """Clear the result cache."""
         self.registry.clear_cache()
         self._execution_cache.clear()
-    
+
     def cache_size(self) -> int:
         """Get the current cache size."""
         return len(self.registry._cache)
-    
+
     def get_tools_for_llm(self) -> list:
         """Get tool specs for LLM."""
         return self.registry.get_tools_for_llm()
-    
+
     def get_plan(self) -> list:
         """Get the current execution plan."""
         return self.registry.get_plan()
diff --git a/src/core/loop.py b/src/core/loop.py
index 93715ec..1e136e4 100644
--- a/src/core/loop.py
+++ b/src/core/loop.py
@@ -3,7 +3,7 @@
 
 Implements the agentic loop that:
 1. Receives instruction via --instruction argument
-2. Calls LLM with tools (using litellm)
+2. Calls LLM with tools (using Chutes API)
 3. Executes tool calls
 4. Loops until task is complete
 5. Emits JSONL events throughout
@@ -17,38 +17,33 @@
 
 from __future__ import annotations
 
-import time
 import sys
+import time
 from pathlib import Path
-from typing import Any, Dict, List, Optional, TYPE_CHECKING
-
-from src.llm.client import LLMError, CostLimitExceeded
+from typing import TYPE_CHECKING, Any, Dict, List
 
+from src.core.compaction import (
+    manage_context,
+)
+from src.llm.client import CostLimitExceeded, LLMError
 from src.output.jsonl import (
-    emit,
-    next_item_id,
-    reset_item_counter,
+    ItemCompletedEvent,
+    ItemStartedEvent,
     ThreadStartedEvent,
-    TurnStartedEvent,
     TurnCompletedEvent,
     TurnFailedEvent,
-    ItemStartedEvent,
-    ItemCompletedEvent,
-    ErrorEvent,
+    TurnStartedEvent,
+    emit,
     make_agent_message_item,
     make_command_execution_item,
-    make_file_change_item,
+    next_item_id,
+    reset_item_counter,
 )
 from src.prompts.system import get_system_prompt
-from src.utils.truncate import middle_out_truncate, APPROX_BYTES_PER_TOKEN
-from src.core.compaction import (
-    manage_context,
-    estimate_total_tokens,
-    needs_compaction,
-)
+from src.utils.truncate import middle_out_truncate
 
 if TYPE_CHECKING:
-    from src.llm.client import LiteLLMClient
+    from src.llm.client import LLMClient
     from src.tools.registry import ToolRegistry
 
 
@@ -64,15 +59,12 @@ def _add_cache_control_to_message(
 ) -> Dict[str, Any]:
     """Add cache_control to a message, converting to multipart if needed."""
     content = msg.get("content")
-    
+
     if isinstance(content, list):
-        has_cache = any(
-            isinstance(p, dict) and "cache_control" in p
-            for p in content
-        )
+        has_cache = any(isinstance(p, dict) and "cache_control" in p for p in content)
         if has_cache:
             return msg
-        
+
         new_content = list(content)
         for i in range(len(new_content) - 1, -1, -1):
             part = new_content[i]
@@ -80,7 +72,7 @@ def _add_cache_control_to_message(
                 new_content[i] = {**part, "cache_control": cache_control}
                 break
         return {**msg, "content": new_content}
-    
+
     if isinstance(content, str):
         return {
             **msg,
@@ -92,7 +84,7 @@ def _add_cache_control_to_message(
                 }
             ],
         }
-    
+
     return msg
 
 
@@ -104,48 +96,48 @@ def _apply_caching(
     Apply prompt caching like OpenCode does:
     - Cache first 2 system messages (stable prefix)
     - Cache last 2 non-system messages (extends cache to cover conversation history)
-    
+
     How Anthropic caching works:
     - Cache is based on IDENTICAL PREFIX
     - A cache_control breakpoint tells Anthropic to cache everything BEFORE it
     - By marking the last messages, we cache the entire conversation history
     - Each new request only adds new messages after the cached prefix
-    
+
     Anthropic limits:
     - Maximum 4 cache_control breakpoints
     - Minimum tokens per breakpoint: 1024 (Sonnet), 4096 (Opus 4.5 on Bedrock)
-    
+
     Reference: OpenCode transform.ts applyCaching()
     """
     if not enabled or not messages:
         return messages
-    
+
     cache_control = {"type": "ephemeral"}
-    
+
     # Separate system and non-system message indices
     system_indices = []
     non_system_indices = []
-    
+
     for i, msg in enumerate(messages):
         if msg.get("role") == "system":
             system_indices.append(i)
         else:
             non_system_indices.append(i)
-    
+
     # Determine which messages to cache:
     # 1. First 2 system messages (stable system prompt)
     # 2. Last 2 non-system messages (extends cache to conversation history)
     # Total: up to 4 breakpoints (Anthropic limit)
     indices_to_cache = set()
-    
+
     # Add first 2 system messages
     for idx in system_indices[:2]:
         indices_to_cache.add(idx)
-    
+
     # Add last 2 non-system messages
     for idx in non_system_indices[-2:]:
         indices_to_cache.add(idx)
-    
+
     # Build result with cache_control added to selected messages
     result = []
     for i, msg in enumerate(messages):
@@ -153,79 +145,83 @@ def _apply_caching(
             result.append(_add_cache_control_to_message(msg, cache_control))
         else:
             result.append(msg)
-    
+
     cached_system = len([i for i in indices_to_cache if i in system_indices])
     cached_final = len([i for i in indices_to_cache if i in non_system_indices])
-    
+
     if indices_to_cache:
-        _log(f"Prompt caching: {cached_system} system + {cached_final} final messages marked ({len(indices_to_cache)} breakpoints)")
-    
+        _log(
+            f"Prompt caching: {cached_system} system + {cached_final} final messages marked ({len(indices_to_cache)} breakpoints)"
+        )
+
     return result
 
 
 def run_agent_loop(
-    llm: "LiteLLMClient",
+    llm: "LLMClient",
     tools: "ToolRegistry",
     ctx: Any,
     config: Dict[str, Any],
 ) -> None:
     """
     Run the main agent loop.
-    
+
     Args:
-        llm: LiteLLM client
+        llm: LLM client
         tools: Tool registry with available tools
         ctx: Agent context with instruction, shell(), done()
         config: Configuration dictionary
     """
     # Reset item counter for fresh session
     reset_item_counter()
-    
+
     # Generate session ID
     session_id = f"sess_{int(time.time() * 1000)}"
-    
+
     # 1. Emit thread.started
     emit(ThreadStartedEvent(thread_id=session_id))
-    
+
     # 2. Emit turn.started
     emit(TurnStartedEvent())
-    
+
     # 3. Build initial messages
     cwd = Path(ctx.cwd)
     system_prompt = get_system_prompt(cwd=cwd)
-    
+
     messages: List[Dict[str, Any]] = [
         {"role": "system", "content": system_prompt},
         {"role": "user", "content": ctx.instruction},
     ]
-    
+
     # 4. Get initial terminal state
     _log("Getting initial state...")
     initial_result = ctx.shell("pwd && ls -la")
     max_output_tokens = config.get("max_output_tokens", 2500)
     initial_state = middle_out_truncate(initial_result.output, max_tokens=max_output_tokens)
-    
-    messages.append({
-        "role": "user",
-        "content": f"Current directory and files:\n```\n{initial_state}\n```",
-    })
-    
+
+    messages.append(
+        {
+            "role": "user",
+            "content": f"Current directory and files:\n```\n{initial_state}\n```",
+        }
+    )
+
     # 5. Initialize tracking
     total_input_tokens = 0
     total_output_tokens = 0
     total_cached_tokens = 0
     pending_completion = False
     last_agent_message = ""
-    
+
     max_iterations = config.get("max_iterations", 200)
     cache_enabled = config.get("cache_enabled", True)
-    
+
     # 6. Main loop
     iteration = 0
     while iteration < max_iterations:
         iteration += 1
         _log(f"Iteration {iteration}/{max_iterations}")
-        
+
         try:
             # ================================================================
             # Context Management (replaces sliding window)
@@ -236,27 +232,27 @@ def run_agent_loop(
                 system_prompt=system_prompt,
                 llm=llm,
             )
-            
+
             # If compaction happened, update our messages reference
             if len(context_messages) < len(messages):
                 _log(f"Context compacted: {len(messages)} -> {len(context_messages)} messages")
                 messages = context_messages
-            
+
             # ================================================================
             # Apply caching (system prompt only for stability)
             # ================================================================
             cached_messages = _apply_caching(context_messages, enabled=cache_enabled)
-            
+
             # Get tool specs
             tool_specs = tools.get_tools_for_llm()
-            
+
             # ================================================================
             # Call LLM with retry logic
             # ================================================================
             max_retries = 5
             response = None
             last_error = None
-            
+
             for attempt in range(1, max_retries + 1):
                 try:
                     response = llm.chat(
@@ -267,7 +263,7 @@ def run_agent_loop(
                             "reasoning": {"effort": config.get("reasoning_effort", "xhigh")},
                         },
                     )
-                    
+
                     # Track token usage from response
                     if hasattr(response, "tokens") and response.tokens:
                         tokens = response.tokens
@@ -275,84 +271,89 @@ def run_agent_loop(
                             total_input_tokens += tokens.get("input", 0)
                             total_output_tokens += tokens.get("output", 0)
                             total_cached_tokens += tokens.get("cached", 0)
-                    
+
                     break  # Success, exit retry loop
-                    
+
                 except CostLimitExceeded:
                     raise  # Don't retry cost limit errors
-                    
+
                 except LLMError as e:
                     last_error = e
-                    error_msg = str(e.message) if hasattr(e, 'message') else str(e)
+                    error_msg = str(e.message) if hasattr(e, "message") else str(e)
                     _log(f"LLM error (attempt {attempt}/{max_retries}): {e.code} - {error_msg}")
-                    
+
                     # Don't retry authentication errors
                     if e.code in ("authentication_error", "invalid_api_key"):
                         raise
-                    
+
                     # Check if it's a retryable error
-                    is_retryable = any(x in error_msg.lower() for x in [
-                        "504", "timeout", "empty response", "overloaded", "rate_limit"
-                    ])
-                    
+                    is_retryable = any(
+                        x in error_msg.lower()
+                        for x in ["504", "timeout", "empty response", "overloaded", "rate_limit"]
+                    )
+
                     if attempt < max_retries and is_retryable:
                         wait_time = 10 * attempt  # 10s, 20s, 30s, 40s
                         _log(f"Retrying in {wait_time} seconds...")
                         time.sleep(wait_time)
                     else:
                         raise
-                        
+
                 except Exception as e:
                     last_error = e
                     error_msg = str(e)
-                    _log(f"Unexpected error (attempt {attempt}/{max_retries}): {type(e).__name__}: {error_msg}")
-                    
+                    _log(
+                        f"Unexpected error (attempt {attempt}/{max_retries}): {type(e).__name__}: {error_msg}"
+                    )
+
                     is_retryable = any(x in error_msg.lower() for x in ["504", "timeout"])
-                    
+
                     if attempt < max_retries and is_retryable:
                         wait_time = 10 * attempt
                         _log(f"Retrying in {wait_time} seconds...")
                         time.sleep(wait_time)
                     else:
                         raise
-            
+
         except CostLimitExceeded as e:
             _log(f"Cost limit exceeded: {e}")
             emit(TurnFailedEvent(error={"message": f"Cost limit exceeded: {e}"}))
             ctx.done()
             return
-            
+
         except LLMError as e:
             _log(f"LLM error (fatal): {e.code} - {e.message}")
             emit(TurnFailedEvent(error={"message": str(e)}))
             ctx.done()
             return
-        
+
         except Exception as e:
             _log(f"Unexpected error (fatal): {type(e).__name__}: {e}")
             emit(TurnFailedEvent(error={"message": str(e)}))
             ctx.done()
             return
-        
+
         # Process response text
         response_text = response.text or ""
-        
+
         if response_text:
             last_agent_message = response_text
-            
+
             # Emit agent message
             item_id = next_item_id()
-            emit(ItemCompletedEvent(
-                item=make_agent_message_item(item_id, response_text)
-            ))
-        
+            emit(ItemCompletedEvent(item=make_agent_message_item(item_id, response_text)))
+
         # Check for function calls
-        has_function_calls = response.has_function_calls() if hasattr(response, "has_function_calls") else bool(response.function_calls)
-        
+        has_function_calls = (
+            response.has_function_calls()
+            if hasattr(response, "has_function_calls")
+            else bool(response.function_calls)
+        )
+
         if not has_function_calls:
             # No tool calls - agent thinks it's done
             _log("No tool calls in response")
-            
+
             # Always do verification before completing (self-questioning)
             if pending_completion:
                 # Agent already verified - complete the task
@@ -362,7 +363,7 @@ def run_agent_loop(
                 # First time without tool calls - ask for self-verification
                 pending_completion = True
                 messages.append({"role": "assistant", "content": response_text})
-                
+
                 # Build verification prompt with original instruction
                 verification_prompt = f"""<system-reminder>
 # Self-Verification Required - CRITICAL
@@ -405,96 +406,118 @@ def run_agent_loop(
 
 Proceed with verification now.
 </system-reminder>"""
-                
-                messages.append({
-                    "role": "user", 
-                    "content": verification_prompt,
-                })
+
+                messages.append(
+                    {
+                        "role": "user",
+                        "content": verification_prompt,
+                    }
+                )
                 _log("Requesting self-verification before completion")
                 continue
-        
+
         # Reset pending completion flag (agent is still working)
         pending_completion = False
-        
+
         # Add assistant message with tool calls
         assistant_msg: Dict[str, Any] = {"role": "assistant", "content": response_text}
-        
+
         # Build tool_calls for message history
         tool_calls_data = []
         for call in response.function_calls:
-            tool_calls_data.append({
-                "id": call.id,
-                "type": "function",
-                "function": {
-                    "name": call.name,
-                    "arguments": str(call.arguments) if isinstance(call.arguments, dict) else call.arguments,
-                },
-            })
-        
+            tool_calls_data.append(
+                {
+                    "id": call.id,
+                    "type": "function",
+                    "function": {
+                        "name": call.name,
+                        "arguments": (
+                            str(call.arguments)
+                            if isinstance(call.arguments, dict)
+                            else call.arguments
+                        ),
+                    },
+                }
+            )
+
         if tool_calls_data:
             assistant_msg["tool_calls"] = tool_calls_data
-        
+
         messages.append(assistant_msg)
-        
+
         # Execute each tool call
         for call in response.function_calls:
             tool_name = call.name
             tool_args = call.arguments if isinstance(call.arguments, dict) else {}
-            
+
             _log(f"Executing tool: {tool_name}")
-            
+
             # Emit item.started
             item_id = next_item_id()
-            emit(ItemStartedEvent(
-                item=make_command_execution_item(
-                    item_id=item_id,
-                    command=f"{tool_name}({tool_args})",
-                    status="in_progress",
+            emit(
+                ItemStartedEvent(
+                    item=make_command_execution_item(
+                        item_id=item_id,
+                        command=f"{tool_name}({tool_args})",
+                        status="in_progress",
+                    )
                 )
-            ))
-            
+            )
+
             # Execute tool
             result = tools.execute(ctx, tool_name, tool_args)
-            
+
             # Truncate output using middle-out (keeps beginning and end)
             output = middle_out_truncate(result.output, max_tokens=max_output_tokens)
-            
+
             # Emit item.completed
-            emit(ItemCompletedEvent(
-                item=make_command_execution_item(
-                    item_id=item_id,
-                    command=f"{tool_name}",
-                    status="completed" if result.success else "failed",
-                    aggregated_output=output,
-                    exit_code=0 if result.success else 1,
+            emit(
+                ItemCompletedEvent(
+                    item=make_command_execution_item(
+                        item_id=item_id,
+                        command=f"{tool_name}",
+                        status="completed" if result.success else "failed",
+                        aggregated_output=output,
+                        exit_code=0 if result.success else 1,
+                    )
                 )
-            ))
-            
+            )
+
             # Handle image injection
             if result.inject_content:
                 # Add image to next user message
-                messages.append({
-                    "role": "user",
-                    "content": [
-                        {"type": "text", "text": f"Image from {tool_name}:"},
-                        result.inject_content,
-                    ],
-                })
-            
+                messages.append(
+                    {
+                        "role": "user",
+                        "content": [
+                            {"type": "text", "text": f"Image from {tool_name}:"},
+                            result.inject_content,
+                        ],
+                    }
+                )
+
             # Add tool result to messages
-            messages.append({
-                "role": "tool",
-                "tool_call_id": call.id,
-                "content": output,
-            })
-    
+            messages.append(
+                {
+                    "role": "tool",
+                    "tool_call_id": call.id,
+                    "content": output,
+                }
+            )
+
     # 7. Emit turn.completed
-    emit(TurnCompletedEvent(usage={
-        "input_tokens": total_input_tokens,
-        "cached_input_tokens": total_cached_tokens,
-        "output_tokens": total_output_tokens,
-    }))
-    
+    emit(
+        TurnCompletedEvent(
+            usage={
+                "input_tokens": total_input_tokens,
+                "cached_input_tokens": total_cached_tokens,
+                "output_tokens": total_output_tokens,
+            }
+        )
+    )
+
     _log(f"Loop complete after {iteration} iterations")
-    _log(f"Tokens: {total_input_tokens} input, {total_cached_tokens} cached, {total_output_tokens} output")
+    _log(
+        f"Tokens: {total_input_tokens} input, {total_cached_tokens} cached, {total_output_tokens} output"
+    )
     ctx.done()
diff --git a/src/core/session.py b/src/core/session.py
index 6a93734..065c2ef 100644
--- a/src/core/session.py
+++ b/src/core/session.py
@@ -14,14 +14,15 @@
 @dataclass
 class TokenUsage:
     """Token usage tracking."""
+
     input_tokens: int = 0
     output_tokens: int = 0
     cached_tokens: int = 0
-    
+
     @property
     def total_tokens(self) -> int:
         return self.input_tokens + self.output_tokens
-    
+
     def add(self, other: "TokenUsage") -> None:
         """Add usage from another TokenUsage instance."""
         self.input_tokens += other.input_tokens
@@ -32,115 +33,120 @@ def add(self, other: "TokenUsage") -> None:
 @dataclass
 class Message:
     """A message in the conversation history."""
+
     role: str  # "system", "user", "assistant", "tool"
     content: str
     tool_call_id: Optional[str] = None
     tool_calls: Optional[list[dict[str, Any]]] = None
     name: Optional[str] = None  # For tool messages
-    
+
     def to_dict(self) -> dict[str, Any]:
         """Convert to API format."""
         msg: dict[str, Any] = {"role": self.role, "content": self.content}
-        
+
         if self.tool_call_id:
             msg["tool_call_id"] = self.tool_call_id
-        
+
         if self.tool_calls:
             msg["tool_calls"] = self.tool_calls
-        
+
         if self.name:
             msg["name"] = self.name
-        
+
         return msg
 
 
 @dataclass
 class Session:
     """Manages the state of an agent session."""
-    
+
     id: str = field(default_factory=lambda: str(uuid.uuid4()))
     config: AgentConfig = field(default_factory=AgentConfig)
     cwd: Path = field(default_factory=Path.cwd)
-    
+
     # Conversation history
     messages: list[Message] = field(default_factory=list)
-    
+
     # Token usage
     usage: TokenUsage = field(default_factory=TokenUsage)
-    
+
     # Iteration tracking
     iteration: int = 0
-    
+
     # Timestamps
     started_at: datetime = field(default_factory=datetime.now)
     last_activity: datetime = field(default_factory=datetime.now)
-    
+
     # Status
     is_done: bool = False
     final_message: Optional[str] = None
-    
+
     def add_system_message(self, content: str) -> None:
         """Add a system message."""
         self.messages.append(Message(role="system", content=content))
         self._update_activity()
-    
+
     def add_user_message(self, content: str) -> None:
         """Add a user message."""
         self.messages.append(Message(role="user", content=content))
         self._update_activity()
-    
+
     def add_assistant_message(
         self,
         content: str,
         tool_calls: Optional[list[dict[str, Any]]] = None,
     ) -> None:
         """Add an assistant message."""
-        self.messages.append(Message(
-            role="assistant",
-            content=content,
-            tool_calls=tool_calls,
-        ))
+        self.messages.append(
+            Message(
+                role="assistant",
+                content=content,
+                tool_calls=tool_calls,
+            )
+        )
         self._update_activity()
-    
+
     def add_tool_result(self, tool_call_id: str, name: str, content: str) -> None:
         """Add a tool result message."""
-        self.messages.append(Message(
-            role="tool",
-            content=content,
-            tool_call_id=tool_call_id,
-            name=name,
-        ))
+        self.messages.append(
+            Message(
+                role="tool",
+                content=content,
+                tool_call_id=tool_call_id,
+                name=name,
+            )
+        )
         self._update_activity()
-    
+
     def get_messages_for_api(self) -> list[dict[str, Any]]:
         """Get messages formatted for the API."""
         return [msg.to_dict() for msg in self.messages]
-    
+
     def update_usage(self, input_tokens: int, output_tokens: int, cached_tokens: int = 0) -> None:
         """Update token usage."""
         self.usage.input_tokens += input_tokens
         self.usage.output_tokens += output_tokens
         self.usage.cached_tokens += cached_tokens
-    
+
     def increment_iteration(self) -> bool:
         """Increment iteration and check if we should continue.
-        
+
         Returns:
             True if we can continue, False if max iterations reached
         """
         self.iteration += 1
         return self.iteration < self.config.max_iterations
-    
+
     def mark_done(self, final_message: Optional[str] = None) -> None:
         """Mark the session as done."""
         self.is_done = True
         self.final_message = final_message
         self._update_activity()
-    
+
     def _update_activity(self) -> None:
         """Update last activity timestamp."""
         self.last_activity = datetime.now()
-    
+
     @property
     def elapsed_time(self) -> float:
         """Get elapsed time in seconds."""
diff --git a/src/exec/__init__.py b/src/exec/__init__.py
index 7929338..9fa1293 100644
--- a/src/exec/__init__.py
+++ b/src/exec/__init__.py
@@ -5,16 +5,16 @@
 """
 
 from .runner import (
-    OutputChunk,
+    DEFAULT_TIMEOUT,
+    MAX_OUTPUT_SIZE,
+    SENSITIVE_PATTERNS,
     ExecOptions,
     ExecOutput,
+    OutputChunk,
+    build_safe_environment,
     execute_command,
     execute_command_streaming,
-    build_safe_environment,
     truncate_output,
-    DEFAULT_TIMEOUT,
-    MAX_OUTPUT_SIZE,
-    SENSITIVE_PATTERNS,
 )
 
 __all__ = [
diff --git a/src/exec/runner.py b/src/exec/runner.py
index f264a19..8055564 100644
--- a/src/exec/runner.py
+++ b/src/exec/runner.py
@@ -15,7 +15,7 @@
 from dataclasses import dataclass, field
 from enum import Enum, auto
 from pathlib import Path
-from typing import Callable, Dict, List, Optional, Union
+from typing import Callable, Dict, List, Optional
 
 # =============================================================================
 # Constants
@@ -27,12 +27,12 @@
 # Patterns in variable names that indicate sensitive data (case-insensitive).
 # These will be excluded from the environment passed to child processes.
 SENSITIVE_PATTERNS: List[str] = [
-    "KEY",        # API_KEY, SSH_KEY, etc.
-    "SECRET",     # AWS_SECRET, etc.
-    "TOKEN",      # AUTH_TOKEN, etc.
-    "PASSWORD",   # DB_PASSWORD, etc.
-    "CREDENTIAL", # GOOGLE_CREDENTIALS, etc.
-    "PRIVATE",    # PRIVATE_KEY, etc.
+    "KEY",  # API_KEY, SSH_KEY, etc.
+    "SECRET",  # AWS_SECRET, etc.
+    "TOKEN",  # AUTH_TOKEN, etc.
+    "PASSWORD",  # DB_PASSWORD, etc.
+    "CREDENTIAL",  # GOOGLE_CREDENTIALS, etc.
+    "PRIVATE",  # PRIVATE_KEY, etc.
 ]
 
 
@@ -40,8 +40,10 @@
 # Output Types
 # =============================================================================
 
+
 class OutputChunkType(Enum):
     """Type of output chunk."""
+
     STDOUT = auto()
     STDERR = auto()
 
@@ -49,26 +51,27 @@ class OutputChunkType(Enum):
 @dataclass
 class OutputChunk:
     """Output chunk from streaming execution.
-    
+
     Represents a single chunk of output from either stdout or stderr.
     """
+
     chunk_type: OutputChunkType
     data: str
-    
+
     @classmethod
     def stdout(cls, data: str) -> "OutputChunk":
         """Create a stdout chunk."""
         return cls(chunk_type=OutputChunkType.STDOUT, data=data)
-    
+
     @classmethod
     def stderr(cls, data: str) -> "OutputChunk":
         """Create a stderr chunk."""
         return cls(chunk_type=OutputChunkType.STDERR, data=data)
-    
+
     def is_stdout(self) -> bool:
         """Check if this is a stdout chunk."""
         return self.chunk_type == OutputChunkType.STDOUT
-    
+
     def is_stderr(self) -> bool:
         """Check if this is a stderr chunk."""
         return self.chunk_type == OutputChunkType.STDERR
@@ -78,21 +81,23 @@ def is_stderr(self) -> bool:
 # Options and Output
 # =============================================================================
 
+
 @dataclass
 class ExecOptions:
     """Options for command execution.
-    
+
     Attributes:
         cwd: Working directory for command execution.
         timeout: Maximum execution time in seconds.
         env: Additional environment variables to set.
         capture_output: Whether to capture stdout/stderr.
     """
+
     cwd: Path = field(default_factory=Path.cwd)
     timeout: float = DEFAULT_TIMEOUT
     env: Dict[str, str] = field(default_factory=dict)
     capture_output: bool = True
-    
+
     def __post_init__(self):
         """Ensure cwd is a Path object."""
         if isinstance(self.cwd, str):
@@ -102,7 +107,7 @@ def __post_init__(self):
 @dataclass
 class ExecOutput:
     """Output from command execution.
-    
+
     Attributes:
         stdout: Standard output content.
         stderr: Standard error content.
@@ -111,6 +116,7 @@ class ExecOutput:
         duration: Execution duration in seconds.
         timed_out: Whether the command timed out.
     """
+
     stdout: str
     stderr: str
     aggregated: str
@@ -123,30 +129,31 @@ class ExecOutput:
 # Environment Building
 # =============================================================================
 
+
 def build_safe_environment(overrides: Optional[Dict[str, str]] = None) -> Dict[str, str]:
     """Build a safe environment for command execution.
-    
+
     - Inherits ALL environment variables from parent process
     - Excludes variables containing sensitive patterns (KEY, SECRET, TOKEN, etc.)
     - Forces non-interactive mode for common tools
     - Applies any custom overrides
-    
+
     Args:
         overrides: Custom environment variables to set (override filtering).
-        
+
     Returns:
         Dictionary of safe environment variables.
     """
     # Start with filtered parent environment
     env: Dict[str, str] = {}
-    
+
     for key, value in os.environ.items():
         # Exclude variables with sensitive patterns (case-insensitive)
         key_upper = key.upper()
         is_sensitive = any(pattern in key_upper for pattern in SENSITIVE_PATTERNS)
         if not is_sensitive:
             env[key] = value
-    
+
     # Force non-interactive mode for common tools
     # This prevents commands from hanging waiting for user input
     env["CI"] = "true"  # npm/yarn/pnpm/create-* use this
@@ -155,12 +162,12 @@ def build_safe_environment(overrides: Optional[Dict[str, str]] = None) -> Dict[s
     env["YARN_ENABLE_IMMUTABLE_INSTALLS"] = "false"  # yarn
     env["NO_COLOR"] = "1"  # disable color codes
     env["TERM"] = "dumb"  # simple terminal
-    
+
     # Apply custom overrides
     if overrides:
         for key, value in overrides.items():
             env[key] = value
-    
+
     return env
 
 
@@ -168,21 +175,22 @@ def build_safe_environment(overrides: Optional[Dict[str, str]] = None) -> Dict[s
 # Output Truncation
 # =============================================================================
 
+
 def truncate_output(data: bytes) -> str:
     """Truncate output if it exceeds MAX_OUTPUT_SIZE.
-    
+
     Args:
         data: Raw bytes from subprocess output.
-        
+
     Returns:
         Decoded string, truncated if necessary with a notice.
     """
     # Decode with replacement for invalid UTF-8
     text = data.decode("utf-8", errors="replace")
-    
+
     if len(text) > MAX_OUTPUT_SIZE:
         return f"{text[:MAX_OUTPUT_SIZE]}...\n[Output truncated, {len(text)} bytes total]"
-    
+
     return text
 
 
@@ -190,22 +198,23 @@ def truncate_output(data: bytes) -> str:
 # Command Execution
 # =============================================================================
 
+
 async def execute_command(
     command: List[str],
     options: Optional[ExecOptions] = None,
 ) -> ExecOutput:
     """Execute a command with timeout and output capture.
-    
+
     Args:
         command: Command and arguments as a list of strings.
         options: Execution options (uses defaults if not provided).
-        
+
     Returns:
         ExecOutput containing stdout, stderr, exit code, duration, etc.
     """
     if options is None:
         options = ExecOptions()
-    
+
     # Handle empty command
     if not command:
         return ExecOutput(
@@ -216,15 +225,15 @@ async def execute_command(
             duration=0.0,
             timed_out=False,
         )
-    
+
     program = command[0]
     args = command[1:]
-    
+
     start_time = time.monotonic()
-    
+
     # Build safe environment
     env = build_safe_environment(options.env)
-    
+
     try:
         # Create subprocess
         process = await asyncio.create_subprocess_exec(
@@ -236,21 +245,21 @@ async def execute_command(
             stderr=asyncio.subprocess.PIPE,
             env=env,
         )
-        
+
         try:
             # Wait for completion with timeout
             stdout_bytes, stderr_bytes = await asyncio.wait_for(
                 process.communicate(),
                 timeout=options.timeout,
             )
-            
+
             duration = time.monotonic() - start_time
             exit_code = process.returncode if process.returncode is not None else -1
-            
+
             # Truncate outputs if necessary
             stdout = truncate_output(stdout_bytes)
             stderr = truncate_output(stderr_bytes)
-            
+
             # Build aggregated output
             aggregated_parts = []
             if stdout:
@@ -258,7 +267,7 @@ async def execute_command(
             if stderr:
                 aggregated_parts.append(stderr)
             aggregated = "\n".join(aggregated_parts)
-            
+
             return ExecOutput(
                 stdout=stdout,
                 stderr=stderr,
@@ -267,17 +276,17 @@ async def execute_command(
                 duration=duration,
                 timed_out=False,
             )
-            
+
         except asyncio.TimeoutError:
             # Timeout - kill the process
             duration = time.monotonic() - start_time
-            
+
             try:
                 process.kill()
                 await process.wait()
             except ProcessLookupError:
                 pass  # Process already terminated
-            
+
             return ExecOutput(
                 stdout="",
                 stderr="",
@@ -286,7 +295,7 @@ async def execute_command(
                 duration=duration,
                 timed_out=True,
             )
-            
+
     except FileNotFoundError:
         duration = time.monotonic() - start_time
         return ExecOutput(
@@ -325,20 +334,20 @@ async def execute_command_streaming(
     callback: Optional[Callable[[OutputChunk], None]] = None,
 ) -> ExecOutput:
     """Execute a command with streaming output.
-    
+
     Reads stdout and stderr line by line, calling the callback for each chunk.
-    
+
     Args:
         command: Command and arguments as a list of strings.
         options: Execution options (uses defaults if not provided).
         callback: Function called with each OutputChunk as it arrives.
-        
+
     Returns:
         ExecOutput containing full stdout, stderr, exit code, duration, etc.
     """
     if options is None:
         options = ExecOptions()
-    
+
     # Handle empty command
     if not command:
         return ExecOutput(
@@ -349,19 +358,19 @@ async def execute_command_streaming(
             duration=0.0,
             timed_out=False,
         )
-    
+
     program = command[0]
     args = command[1:]
-    
+
     start_time = time.monotonic()
-    
+
     # Build safe environment
     env = build_safe_environment(options.env)
-    
+
     # Accumulators
     stdout_acc: List[str] = []
     stderr_acc: List[str] = []
-    
+
     try:
         # Create subprocess
         process = await asyncio.create_subprocess_exec(
@@ -373,7 +382,7 @@ async def execute_command_streaming(
             stderr=asyncio.subprocess.PIPE,
             env=env,
         )
-        
+
         async def read_stdout():
             """Read stdout line by line."""
             if process.stdout is None:
@@ -386,7 +395,7 @@ async def read_stdout():
                 stdout_acc.append(decoded)
                 if callback:
                     callback(OutputChunk.stdout(decoded))
-        
+
         async def read_stderr():
             """Read stderr line by line."""
             if process.stderr is None:
@@ -399,7 +408,7 @@ async def read_stderr():
                 stderr_acc.append(decoded)
                 if callback:
                     callback(OutputChunk.stderr(decoded))
-        
+
         try:
             # Read streams concurrently with timeout
             await asyncio.wait_for(
@@ -410,20 +419,24 @@ async def read_stderr():
                 ),
                 timeout=options.timeout,
             )
-            
+
             duration = time.monotonic() - start_time
             exit_code = process.returncode if process.returncode is not None else -1
-            
+
             # Join accumulated output
             stdout = "".join(stdout_acc)
             stderr = "".join(stderr_acc)
-            
+
             # Truncate if necessary
             if len(stdout) > MAX_OUTPUT_SIZE:
-                stdout = f"{stdout[:MAX_OUTPUT_SIZE]}...\n[Output truncated, {len(stdout)} bytes total]"
+                stdout = (
+                    f"{stdout[:MAX_OUTPUT_SIZE]}...\n[Output truncated, {len(stdout)} bytes total]"
+                )
             if len(stderr) > MAX_OUTPUT_SIZE:
-                stderr = f"{stderr[:MAX_OUTPUT_SIZE]}...\n[Output truncated, {len(stderr)} bytes total]"
-            
+                stderr = (
+                    f"{stderr[:MAX_OUTPUT_SIZE]}...\n[Output truncated, {len(stderr)} bytes total]"
+                )
+
             # Build aggregated output
             aggregated_parts = []
             if stdout:
@@ -431,7 +444,7 @@ async def read_stderr():
             if stderr:
                 aggregated_parts.append(stderr)
             aggregated = "\n".join(aggregated_parts)
-            
+
             return ExecOutput(
                 stdout=stdout,
                 stderr=stderr,
@@ -440,21 +453,21 @@ async def read_stderr():
                 duration=duration,
                 timed_out=False,
             )
-            
+
         except asyncio.TimeoutError:
             # Timeout - kill the process
             duration = time.monotonic() - start_time
-            
+
             try:
                 process.kill()
                 await process.wait()
             except ProcessLookupError:
                 pass
-            
+
             # Return what we accumulated before timeout
             stdout = "".join(stdout_acc)
             stderr = "".join(stderr_acc)
-            
+
             return ExecOutput(
                 stdout=stdout,
                 stderr=stderr,
@@ -463,7 +476,7 @@ async def read_stderr():
                 duration=duration,
                 timed_out=True,
             )
-            
+
     except FileNotFoundError:
         duration = time.monotonic() - start_time
         return ExecOutput(
@@ -500,16 +513,17 @@ async def read_stderr():
 # Synchronous Wrappers (convenience)
 # =============================================================================
 
+
 def execute_command_sync(
     command: List[str],
     options: Optional[ExecOptions] = None,
 ) -> ExecOutput:
     """Synchronous wrapper for execute_command.
-    
+
     Args:
         command: Command and arguments as a list of strings.
         options: Execution options.
-        
+
     Returns:
         ExecOutput containing stdout, stderr, exit code, etc.
     """
@@ -522,12 +536,12 @@ def execute_command_streaming_sync(
     callback: Optional[Callable[[OutputChunk], None]] = None,
 ) -> ExecOutput:
     """Synchronous wrapper for execute_command_streaming.
-    
+
     Args:
         command: Command and arguments as a list of strings.
         options: Execution options.
         callback: Function called with each OutputChunk.
-        
+
     Returns:
         ExecOutput containing stdout, stderr, exit code, etc.
     """
diff --git a/src/images/__init__.py b/src/images/__init__.py
index 770cc8d..c662b3c 100644
--- a/src/images/__init__.py
+++ b/src/images/__init__.py
@@ -1,11 +1,11 @@
 """Image handling module for SuperAgent."""
 
 from src.images.loader import (
+    MAX_HEIGHT,
+    MAX_WIDTH,
     load_image_as_data_uri,
     load_image_bytes,
     resize_image,
-    MAX_WIDTH,
-    MAX_HEIGHT,
 )
 
 __all__ = [
diff --git a/src/images/loader.py b/src/images/loader.py
index cf0179d..407eeef 100644
--- a/src/images/loader.py
+++ b/src/images/loader.py
@@ -17,7 +17,7 @@
 from functools import lru_cache
 from io import BytesIO
 from pathlib import Path
-from typing import Optional, Tuple
+from typing import Tuple
 
 # Maximum image dimensions (like Codex)
 MAX_WIDTH = 2048
@@ -26,6 +26,7 @@
 # Try to import PIL for image processing
 try:
     from PIL import Image
+
     HAS_PIL = True
 except ImportError:
     HAS_PIL = False
@@ -55,10 +56,10 @@ def _file_hash(path: Path) -> str:
 def load_image_bytes(path: Path) -> Tuple[bytes, str]:
     """
     Load image bytes from disk.
-    
+
     Args:
         path: Path to image file
-        
+
     Returns:
         Tuple of (bytes, mime_type)
     """
@@ -76,33 +77,33 @@ def resize_image(
 ) -> Tuple[bytes, str]:
     """
     Resize image if it exceeds max dimensions.
-    
+
     Args:
         data: Image bytes
         mime: MIME type
         max_width: Maximum width
         max_height: Maximum height
-        
+
     Returns:
         Tuple of (resized_bytes, mime_type)
     """
     if not HAS_PIL:
         # Can't resize without PIL, return as-is
         return data, mime
-    
+
     try:
         img = Image.open(BytesIO(data))
-        
+
         # Check if resize needed
         if img.width <= max_width and img.height <= max_height:
             return data, mime
-        
+
         # Resize maintaining aspect ratio
         img.thumbnail((max_width, max_height), Image.Resampling.LANCZOS)
-        
+
         # Encode back to bytes
         output = BytesIO()
-        
+
         # Use PNG for transparency, JPEG for photos
         if img.mode in ("RGBA", "LA") or mime == "image/png":
             img.save(output, format="PNG", optimize=True)
@@ -113,7 +114,7 @@ def resize_image(
                 img = img.convert("RGB")
             img.save(output, format="JPEG", quality=85, optimize=True)
             return output.getvalue(), "image/jpeg"
-            
+
     except Exception:
         # On any error, return original
         return data, mime
@@ -123,46 +124,46 @@ def resize_image(
 def _load_cached(path_str: str, file_hash: str) -> str:
     """Load and cache image as data URI (internal)."""
     path = Path(path_str)
-    
+
     # Load raw bytes
     data, mime = load_image_bytes(path)
-    
+
     # Resize if needed
     data, mime = resize_image(data, mime)
-    
+
     # Encode as base64
     b64 = base64.b64encode(data).decode("ascii")
-    
+
     return f"data:{mime};base64,{b64}"
 
 
 def load_image_as_data_uri(path: Path) -> str:
     """
     Load image, resize if needed, encode as base64 data URI.
-    
+
     Uses LRU cache based on file path and content hash.
-    
+
     Args:
         path: Path to image file
-        
+
     Returns:
         Data URI string (data:image/png;base64,...)
-        
+
     Raises:
         FileNotFoundError: If image doesn't exist
         ValueError: If file is not a valid image
     """
     path = Path(path).resolve()
-    
+
     if not path.exists():
         raise FileNotFoundError(f"Image not found: {path}")
-    
+
     if not path.is_file():
         raise ValueError(f"Not a file: {path}")
-    
+
     # Get file hash for cache key
     file_hash = _file_hash(path)
-    
+
     # Load with caching
     return _load_cached(str(path), file_hash)
 
@@ -170,10 +171,10 @@ def load_image_as_data_uri(path: Path) -> str:
 def make_image_content(data_uri: str) -> dict:
     """
     Create image content block for LLM API.
-    
+
     Args:
         data_uri: Base64 data URI
-        
+
     Returns:
         Content block dict for API
     """
diff --git a/src/llm/__init__.py b/src/llm/__init__.py
index 8a00106..39affcb 100644
--- a/src/llm/__init__.py
+++ b/src/llm/__init__.py
@@ -1,5 +1,12 @@
-"""LLM module using litellm."""
+"""LLM module using httpx for Chutes API."""
 
-from .client import LiteLLMClient, LLMResponse, FunctionCall, CostLimitExceeded, LLMError
+from .client import CostLimitExceeded, FunctionCall, LiteLLMClient, LLMClient, LLMError, LLMResponse
 
-__all__ = ["LiteLLMClient", "LLMResponse", "FunctionCall", "CostLimitExceeded", "LLMError"]
+__all__ = [
+    "LLMClient",
+    "LiteLLMClient",
+    "LLMResponse",
+    "FunctionCall",
+    "CostLimitExceeded",
+    "LLMError",
+]
diff --git a/src/llm/client.py b/src/llm/client.py
index 72e048b..2bf0f9d 100644
--- a/src/llm/client.py
+++ b/src/llm/client.py
@@ -1,17 +1,18 @@
-"""LLM Client using litellm - replaces term_sdk dependency."""
+"""LLM Client using httpx for Chutes API (OpenAI-compatible)."""
 
 from __future__ import annotations
 
 import json
 import os
-import sys
-import time
 from dataclasses import dataclass, field
 from typing import Any, Dict, List, Optional
 
+import httpx
+
 
 class CostLimitExceeded(Exception):
     """Raised when cost limit is exceeded."""
+
     def __init__(self, message: str, used: float = 0, limit: float = 0):
         super().__init__(message)
         self.used = used
@@ -20,6 +21,7 @@ def __init__(self, message: str, used: float = 0, limit: float = 0):
 
 class LLMError(Exception):
     """LLM API error."""
+
     def __init__(self, message: str, code: str = "unknown"):
         super().__init__(message)
         self.message = message
@@ -29,21 +31,22 @@ def __init__(self, message: str, code: str = "unknown"):
 @dataclass
 class FunctionCall:
     """Represents a function/tool call from the LLM."""
+
     id: str
     name: str
     arguments: Dict[str, Any]
-    
+
     @classmethod
     def from_openai(cls, call: Dict[str, Any]) -> "FunctionCall":
         """Parse from OpenAI tool_calls format."""
         func = call.get("function", {})
         args_str = func.get("arguments", "{}")
-        
+
         try:
             args = json.loads(args_str)
         except json.JSONDecodeError:
             args = {"raw": args_str}
-        
+
         return cls(
             id=call.get("id", ""),
             name=func.get("name", ""),
@@ -54,49 +57,67 @@ def from_openai(cls, call: Dict[str, Any]) -> "FunctionCall":
 @dataclass
 class LLMResponse:
     """Response from the LLM."""
+
     text: str = ""
     function_calls: List[FunctionCall] = field(default_factory=list)
     tokens: Optional[Dict[str, int]] = None
     model: str = ""
     finish_reason: str = ""
     raw: Optional[Dict[str, Any]] = None
-    
+
     def has_function_calls(self) -> bool:
         """Check if response contains function calls."""
         return len(self.function_calls) > 0
 
 
-class LiteLLMClient:
-    """LLM Client using litellm."""
-    
+class LLMClient:
+    """LLM Client using httpx for Chutes API (OpenAI-compatible format)."""
+
+    # Default Chutes API configuration
+    DEFAULT_BASE_URL = "https://api.chutes.ai/v1"
+    DEFAULT_API_KEY_ENV = "CHUTES_API_KEY"
+
     def __init__(
         self,
         model: str,
         temperature: Optional[float] = None,
         max_tokens: int = 16384,
         cost_limit: Optional[float] = None,
+        base_url: Optional[str] = None,
+        api_key: Optional[str] = None,
+        timeout: float = 120.0,
     ):
         self.model = model
         self.temperature = temperature
         self.max_tokens = max_tokens
         self.cost_limit = cost_limit or float(os.environ.get("LLM_COST_LIMIT", "10.0"))
-        
+        self.base_url = base_url or os.environ.get("CHUTES_BASE_URL", self.DEFAULT_BASE_URL)
+        self.timeout = timeout
+
+        # Get API key
+        self._api_key = api_key or os.environ.get(self.DEFAULT_API_KEY_ENV)
+        if not self._api_key:
+            raise ValueError(
+                f"API key required. Set {self.DEFAULT_API_KEY_ENV} environment variable or pass api_key parameter."
+            )
+
         self._total_cost = 0.0
         self._total_tokens = 0
         self._request_count = 0
         self._input_tokens = 0
         self._output_tokens = 0
         self._cached_tokens = 0
-        
-        # Import litellm
-        try:
-            import litellm
-            self._litellm = litellm
-            # Configure litellm
-            litellm.drop_params = True  # Drop unsupported params silently
-        except ImportError:
-            raise ImportError("litellm not installed. Run: pip install litellm")
-    
+
+        # Create httpx client with timeout
+        self._client = httpx.Client(
+            base_url=self.base_url,
+            headers={
+                "Authorization": f"Bearer {self._api_key}",
+                "Content-Type": "application/json",
+            },
+            timeout=httpx.Timeout(timeout, connect=30.0),
+        )
+
     def _supports_temperature(self, model: str) -> bool:
         """Check if model supports temperature parameter."""
         model_lower = model.lower()
@@ -104,32 +125,35 @@ def _supports_temperature(self, model: str) -> bool:
         if any(x in model_lower for x in ["o1", "o3", "deepseek-r1"]):
             return False
         return True
-    
+
     def _build_tools(self, tools: Optional[List[Dict[str, Any]]]) -> Optional[List[Dict[str, Any]]]:
         """Build tools in OpenAI format."""
         if not tools:
             return None
-        
+
         result = []
         for tool in tools:
-            result.append({
-                "type": "function",
-                "function": {
-                    "name": tool["name"],
-                    "description": tool.get("description", ""),
-                    "parameters": tool.get("parameters", {"type": "object", "properties": {}}),
-                },
-            })
+            result.append(
+                {
+                    "type": "function",
+                    "function": {
+                        "name": tool["name"],
+                        "description": tool.get("description", ""),
+                        "parameters": tool.get("parameters", {"type": "object", "properties": {}}),
+                    },
+                }
+            )
         return result
-    
+
     def chat(
         self,
         messages: List[Dict[str, Any]],
         tools: Optional[List[Dict[str, Any]]] = None,
         max_tokens: Optional[int] = None,
         extra_body: Optional[Dict[str, Any]] = None,
+        model: Optional[str] = None,
     ) -> LLMResponse:
-        """Send a chat request."""
+        """Send a chat request to Chutes API."""
         # Check cost limit
         if self._total_cost >= self.cost_limit:
             raise CostLimitExceeded(
@@ -137,102 +161,145 @@ def chat(
                 used=self._total_cost,
                 limit=self.cost_limit,
             )
-        
-        # Build request
-        kwargs: Dict[str, Any] = {
-            "model": self.model,
-            "messages": messages,
+
+        # Build request payload
+        payload: Dict[str, Any] = {
+            "model": model or self.model,
+            "messages": self._prepare_messages(messages),
             "max_tokens": max_tokens or self.max_tokens,
         }
-        
-        if self._supports_temperature(self.model) and self.temperature is not None:
-            kwargs["temperature"] = self.temperature
-        
+
+        if self._supports_temperature(payload["model"]) and self.temperature is not None:
+            payload["temperature"] = self.temperature
+
         if tools:
-            kwargs["tools"] = self._build_tools(tools)
-            kwargs["tool_choice"] = "auto"
-        
-        # Add extra body params (like reasoning effort)
+            payload["tools"] = self._build_tools(tools)
+            payload["tool_choice"] = "auto"
+
+        # Add extra body params (like reasoning effort) - some may be ignored by API
         if extra_body:
-            kwargs.update(extra_body)
-        
+            payload.update(extra_body)
+
         try:
-            response = self._litellm.completion(**kwargs)
+            response = self._client.post("/chat/completions", json=payload)
             self._request_count += 1
-        except Exception as e:
-            error_msg = str(e)
-            if "authentication" in error_msg.lower() or "api_key" in error_msg.lower():
-                raise LLMError(error_msg, code="authentication_error")
-            elif "rate" in error_msg.lower() or "limit" in error_msg.lower():
-                raise LLMError(error_msg, code="rate_limit")
-            else:
-                raise LLMError(error_msg, code="api_error")
-        
+
+            # Handle HTTP errors
+            if response.status_code != 200:
+                error_body = response.text
+                try:
+                    error_json = response.json()
+                    error_msg = error_json.get("error", {}).get("message", error_body)
+                except (json.JSONDecodeError, KeyError):
+                    error_msg = error_body
+
+                # Map status codes to error codes
+                if response.status_code == 401:
+                    raise LLMError(error_msg, code="authentication_error")
+                elif response.status_code == 429:
+                    raise LLMError(error_msg, code="rate_limit")
+                elif response.status_code >= 500:
+                    raise LLMError(error_msg, code="server_error")
+                else:
+                    raise LLMError(f"HTTP {response.status_code}: {error_msg}", code="api_error")
+
+            data = response.json()
+
+        except httpx.TimeoutException as e:
+            raise LLMError(f"Request timed out: {e}", code="timeout")
+        except httpx.ConnectError as e:
+            raise LLMError(f"Connection error: {e}", code="connection_error")
+        except httpx.HTTPError as e:
+            raise LLMError(f"HTTP error: {e}", code="api_error")
+
         # Parse response
-        result = LLMResponse(raw=response.model_dump() if hasattr(response, "model_dump") else None)
-        
+        result = LLMResponse(raw=data)
+
         # Extract usage
-        if hasattr(response, "usage") and response.usage:
-            usage = response.usage
-            input_tokens = getattr(usage, "prompt_tokens", 0) or 0
-            output_tokens = getattr(usage, "completion_tokens", 0) or 0
+        usage = data.get("usage", {})
+        if usage:
+            input_tokens = usage.get("prompt_tokens", 0) or 0
+            output_tokens = usage.get("completion_tokens", 0) or 0
             cached_tokens = 0
-            
-            # Check for cached tokens
-            if hasattr(usage, "prompt_tokens_details"):
-                details = usage.prompt_tokens_details
-                if details and hasattr(details, "cached_tokens"):
-                    cached_tokens = details.cached_tokens or 0
-            
+
+            # Check for cached tokens (OpenAI format)
+            prompt_details = usage.get("prompt_tokens_details", {})
+            if prompt_details:
+                cached_tokens = prompt_details.get("cached_tokens", 0) or 0
+
             self._input_tokens += input_tokens
             self._output_tokens += output_tokens
             self._cached_tokens += cached_tokens
             self._total_tokens += input_tokens + output_tokens
-            
+
             result.tokens = {
                 "input": input_tokens,
                 "output": output_tokens,
                 "cached": cached_tokens,
             }
-        
-        # Calculate cost using litellm
-        try:
-            cost = self._litellm.completion_cost(completion_response=response)
+
+            # Estimate cost (generic pricing, adjust per model if needed)
+            # Using conservative estimates: $3/1M input, $15/1M output
+            cost = (input_tokens * 3.0 / 1_000_000) + (output_tokens * 15.0 / 1_000_000)
             self._total_cost += cost
-        except Exception:
-            pass  # Cost calculation may fail for some models
-        
+
         # Extract model
-        result.model = getattr(response, "model", self.model)
-        
+        result.model = data.get("model", self.model)
+
         # Extract choices
-        if hasattr(response, "choices") and response.choices:
-            choice = response.choices[0]
-            message = choice.message
-            
-            result.finish_reason = getattr(choice, "finish_reason", "") or ""
-            result.text = getattr(message, "content", "") or ""
-            
+        choices = data.get("choices", [])
+        if choices:
+            choice = choices[0]
+            message = choice.get("message", {})
+
+            result.finish_reason = choice.get("finish_reason", "") or ""
+            result.text = message.get("content", "") or ""
+
             # Extract function calls
-            tool_calls = getattr(message, "tool_calls", None)
+            tool_calls = message.get("tool_calls", [])
             if tool_calls:
                 for call in tool_calls:
-                    if hasattr(call, "function"):
-                        func = call.function
-                        args_str = getattr(func, "arguments", "{}")
-                        try:
-                            args = json.loads(args_str) if isinstance(args_str, str) else args_str
-                        except json.JSONDecodeError:
-                            args = {"raw": args_str}
-                        
-                        result.function_calls.append(FunctionCall(
-                            id=getattr(call, "id", "") or "",
-                            name=getattr(func, "name", "") or "",
+                    func = call.get("function", {})
+                    args_str = func.get("arguments", "{}")
+
+                    try:
+                        args = json.loads(args_str) if isinstance(args_str, str) else args_str
+                    except json.JSONDecodeError:
+                        args = {"raw": args_str}
+
+                    result.function_calls.append(
+                        FunctionCall(
+                            id=call.get("id", "") or "",
+                            name=func.get("name", "") or "",
                             arguments=args if isinstance(args, dict) else {},
-                        ))
-        
+                        )
+                    )
+
         return result
-    
+
+    def _prepare_messages(self, messages: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
+        """Prepare messages for the API, cleaning up any incompatible fields."""
+        prepared = []
+        for msg in messages:
+            new_msg = dict(msg)
+
+            # Handle content with cache_control (Anthropic-specific, strip for OpenAI compat)
+            content = new_msg.get("content")
+            if isinstance(content, list):
+                # Convert multipart format, removing cache_control
+                cleaned_parts = []
+                for part in content:
+                    if isinstance(part, dict):
+                        cleaned_part = {k: v for k, v in part.items() if k != "cache_control"}
+                        cleaned_parts.append(cleaned_part)
+                    else:
+                        cleaned_parts.append(part)
+                new_msg["content"] = cleaned_parts
+
+            prepared.append(new_msg)
+
+        return prepared
+
     def get_stats(self) -> Dict[str, Any]:
         """Get usage statistics."""
         return {
@@ -243,7 +310,18 @@ def get_stats(self) -> Dict[str, Any]:
             "total_cost": self._total_cost,
             "request_count": self._request_count,
         }
-    
+
     def close(self):
-        """Close client (no-op for litellm)."""
-        pass
+        """Close the HTTP client."""
+        self._client.close()
+
+    def __enter__(self):
+        return self
+
+    def __exit__(self, exc_type, exc_val, exc_tb):
+        self.close()
+        return False
+
+
+# Alias for backward compatibility
+LiteLLMClient = LLMClient
diff --git a/src/main.py b/src/main.py
index b73454f..43ccf33 100644
--- a/src/main.py
+++ b/src/main.py
@@ -3,7 +3,6 @@
 from __future__ import annotations
 
 import os
-import sys
 from pathlib import Path
 from typing import Optional
 
@@ -11,8 +10,8 @@
 from rich.console import Console
 
 from src import __version__
-from src.config.loader import load_config, find_config_file
-from src.config.models import AgentConfig, Provider, OutputMode
+from src.config.loader import find_config_file, load_config
+from src.config.models import OutputMode, Provider
 from src.core.agent import Agent
 from src.output.processor import OutputProcessor
 
@@ -49,11 +48,9 @@ def main(
 @app.command("exec")
 def exec_command(
     prompt: str = typer.Argument(..., help="The task/prompt for the agent"),
-    
     # Model/Provider options
     model: Optional[str] = typer.Option(None, "--model", "-m", help="Model to use"),
     provider: Optional[Provider] = typer.Option(None, "--provider", "-p", help="LLM provider"),
-    
     # Config options
     config_file: Optional[Path] = typer.Option(
         None,
@@ -61,27 +58,24 @@ def exec_command(
         "-c",
         help="Path to config file",
     ),
-    
     # Output options
     json_mode: bool = typer.Option(False, "--json", help="Output in JSONL format"),
     verbose: bool = typer.Option(False, "--verbose", "-v", help="Verbose output"),
-    
     # Execution options
     workdir: Optional[Path] = typer.Option(None, "--workdir", "-w", help="Working directory"),
     max_iterations: Optional[int] = typer.Option(None, help="Maximum iterations"),
-    
     # Danger mode (compatibility with Codex CLI)
     dangerously_bypass_approvals: bool = typer.Option(
-        False, 
+        False,
         "--dangerously-bypass-approvals-and-sandbox",
         help="Run without sandbox/approvals (default behavior in SuperAgent)",
     ),
 ):
     """Execute a task with the agent."""
-    
+
     # Load configuration
     config_path = config_file or find_config_file()
-    
+
     overrides = {}
     if model:
         overrides["model"] = model
@@ -93,42 +87,42 @@ def exec_command(
         overrides["max_iterations"] = max_iterations
     if workdir:
         overrides["paths.cwd"] = str(workdir)
-        
+
     try:
         config = load_config(config_path, overrides)
     except Exception as e:
         console.print(f"[red]Error loading configuration: {e}[/red]")
         raise typer.Exit(1)
-    
+
     # Setup working directory
     cwd = Path(config.paths.cwd or os.getcwd()).resolve()
     if not cwd.exists():
         console.print(f"[red]Working directory does not exist: {cwd}[/red]")
         raise typer.Exit(1)
-        
+
     # Initialize output processor
     output = OutputProcessor(config)
-    
+
     # Run agent
     try:
         agent = Agent(config=config, cwd=cwd, output_processor=output)
-        
+
         # In JSON mode, we don't print "Starting..." messages to stdout
         if not json_mode:
             console.print(f"[bold blue]SuperAgent v{__version__}[/bold blue]")
             console.print(f"Model: [cyan]{config.model}[/cyan] ({config.provider})")
             console.print(f"Working directory: [cyan]{cwd}[/cyan]")
             console.print()
-            
+
         final_message = agent.run(prompt)
-        
+
         # In human mode, print the final message clearly
         if not json_mode and final_message:
             console.print()
             console.print("[bold green]Final Result:[/bold green]")
             output.print_final(final_message)
-            
-    except Exception as e:
+
+    except Exception:
         if verbose:
             console.print_exception()
         else:
@@ -149,7 +143,7 @@ def show_config(
     else:
         console.print("No config file found, using defaults")
         config = load_config()
-        
+
     console.print(config.model_dump_json(indent=2))
 
 
diff --git a/src/output/__init__.py b/src/output/__init__.py
index 0b96b30..9dc5ec2 100644
--- a/src/output/__init__.py
+++ b/src/output/__init__.py
@@ -1,44 +1,43 @@
 """Output module - JSONL event emission and streaming."""
 
 from src.output.jsonl import (
-    emit,
-    emit_raw,
-    next_item_id,
-    reset_item_counter,
+    ErrorEvent,
+    ItemCompletedEvent,
+    ItemStartedEvent,
+    ItemUpdatedEvent,
     ThreadStartedEvent,
-    TurnStartedEvent,
     TurnCompletedEvent,
     TurnFailedEvent,
-    ItemStartedEvent,
-    ItemUpdatedEvent,
-    ItemCompletedEvent,
-    ErrorEvent,
+    TurnStartedEvent,
+    emit,
+    emit_raw,
     make_agent_message_item,
     make_command_execution_item,
+    make_error_item,
     make_file_change_item,
     make_todo_list_item,
-    make_error_item,
+    next_item_id,
+    reset_item_counter,
 )
-
 from src.output.streaming import (
-    StreamState,
-    StreamEvent,
-    StartEvent,
-    TextDeltaEvent,
-    ToolCallStartEvent,
-    ToolCallDeltaEvent,
-    ToolCallCompleteEvent,
-    TokenUsageEvent,
     CompleteEvent,
+    SentenceBuffer,
+    StartEvent,
+    StreamBuffer,
+    StreamCollector,
     StreamContent,
+    StreamEvent,
+    StreamProcessor,
+    StreamState,
+    StreamStats,
     StreamToolCall,
+    TextDeltaEvent,
     TokenCounts,
-    StreamStats,
-    StreamProcessor,
-    StreamBuffer,
+    TokenUsageEvent,
+    ToolCallCompleteEvent,
+    ToolCallDeltaEvent,
+    ToolCallStartEvent,
     WordBuffer,
-    SentenceBuffer,
-    StreamCollector,
 )
 
 __all__ = [
diff --git a/src/output/events.py b/src/output/events.py
index dac2c7c..5c6805d 100644
--- a/src/output/events.py
+++ b/src/output/events.py
@@ -10,30 +10,32 @@
 
 class EventType(str, Enum):
     """Types of events that can be emitted."""
+
     TURN_STARTED = "turn.started"
     TURN_COMPLETED = "turn.completed"
     TURN_FAILED = "turn.failed"
-    
+
     ITEM_STARTED = "item.started"
     ITEM_UPDATED = "item.updated"
     ITEM_COMPLETED = "item.completed"
-    
+
     MESSAGE = "message"
     THINKING = "thinking"
-    
+
     TOOL_CALL_START = "tool.call.start"
     TOOL_CALL_END = "tool.call.end"
-    
+
     ERROR = "error"
 
 
 @dataclass
 class Event:
     """An event from the agent."""
+
     type: EventType
     timestamp: datetime = field(default_factory=datetime.now)
     data: dict[str, Any] = field(default_factory=dict)
-    
+
     def to_dict(self) -> dict[str, Any]:
         """Convert to JSON-serializable dict."""
         return {
@@ -41,7 +43,7 @@ def to_dict(self) -> dict[str, Any]:
             "timestamp": self.timestamp.isoformat(),
             **self.data,
         }
-    
+
     @classmethod
     def turn_started(cls, session_id: str) -> "Event":
         """Create a turn started event."""
@@ -49,7 +51,7 @@ def turn_started(cls, session_id: str) -> "Event":
             type=EventType.TURN_STARTED,
             data={"session_id": session_id},
         )
-    
+
     @classmethod
     def turn_completed(
         cls,
@@ -72,7 +74,7 @@ def turn_completed(
                 },
             },
         )
-    
+
     @classmethod
     def message(cls, content: str, role: str = "assistant") -> "Event":
         """Create a message event."""
@@ -80,12 +82,12 @@ def message(cls, content: str, role: str = "assistant") -> "Event":
             type=EventType.MESSAGE,
             data={"content": content, "role": role},
         )
-    
+
     @classmethod
     def thinking(cls) -> "Event":
         """Create a thinking event."""
         return cls(type=EventType.THINKING)
-    
+
     @classmethod
     def tool_call_start(cls, name: str, arguments: dict[str, Any]) -> "Event":
         """Create a tool call start event."""
@@ -93,7 +95,7 @@ def tool_call_start(cls, name: str, arguments: dict[str, Any]) -> "Event":
             type=EventType.TOOL_CALL_START,
             data={"name": name, "arguments": arguments},
         )
-    
+
     @classmethod
     def tool_call_end(
         cls,
@@ -112,7 +114,7 @@ def tool_call_end(
                 "error": error,
             },
         )
-    
+
     @classmethod
     def error(cls, message: str, details: Optional[dict[str, Any]] = None) -> "Event":
         """Create an error event."""
diff --git a/src/output/jsonl.py b/src/output/jsonl.py
index 4e85090..5d164bf 100644
--- a/src/output/jsonl.py
+++ b/src/output/jsonl.py
@@ -18,19 +18,18 @@
 from __future__ import annotations
 
 import json
-import sys
-import time
-from dataclasses import dataclass, field, asdict
+from dataclasses import asdict, dataclass, field
 from typing import Any, Dict, List, Optional
 
-
 # =============================================================================
 # Thread Events
 # =============================================================================
 
+
 @dataclass
 class ThreadStartedEvent:
     """Emitted when a new thread/session is started."""
+
     thread_id: str
     type: str = field(default="thread.started", init=False)
 
@@ -39,15 +38,18 @@ class ThreadStartedEvent:
 # Turn Events
 # =============================================================================
 
+
 @dataclass
 class TurnStartedEvent:
     """Emitted when a turn is started (user sends message)."""
+
     type: str = field(default="turn.started", init=False)
 
 
 @dataclass
 class Usage:
     """Token usage statistics."""
+
     input_tokens: int = 0
     cached_input_tokens: int = 0
     output_tokens: int = 0
@@ -56,6 +58,7 @@ class Usage:
 @dataclass
 class TurnCompletedEvent:
     """Emitted when a turn is completed successfully."""
+
     usage: Dict[str, int]
     type: str = field(default="turn.completed", init=False)
 
@@ -63,6 +66,7 @@ class TurnCompletedEvent:
 @dataclass
 class TurnFailedEvent:
     """Emitted when a turn fails."""
+
     error: Dict[str, str]
     type: str = field(default="turn.failed", init=False)
 
@@ -71,9 +75,11 @@ class TurnFailedEvent:
 # Item Events
 # =============================================================================
 
+
 @dataclass
 class ItemStartedEvent:
     """Emitted when an item starts processing."""
+
     item: Dict[str, Any]
     type: str = field(default="item.started", init=False)
 
@@ -81,6 +87,7 @@ class ItemStartedEvent:
 @dataclass
 class ItemUpdatedEvent:
     """Emitted when an item is updated (e.g., todo list)."""
+
     item: Dict[str, Any]
     type: str = field(default="item.updated", init=False)
 
@@ -88,6 +95,7 @@ class ItemUpdatedEvent:
 @dataclass
 class ItemCompletedEvent:
     """Emitted when an item completes processing."""
+
     item: Dict[str, Any]
     type: str = field(default="item.completed", init=False)
 
@@ -96,9 +104,11 @@ class ItemCompletedEvent:
 # Error Events
 # =============================================================================
 
+
 @dataclass
 class ErrorEvent:
     """Emitted for fatal errors."""
+
     message: str
     type: str = field(default="error", init=False)
 
@@ -107,6 +117,7 @@ class ErrorEvent:
 # Item Types (for item.started/completed payloads)
 # =============================================================================
 
+
 def make_agent_message_item(item_id: str, text: str) -> Dict[str, Any]:
     """Create an agent_message item."""
     return {
@@ -203,7 +214,7 @@ def reset_item_counter() -> None:
 def emit(event) -> None:
     """
     Emit a single JSONL event to stdout.
-    
+
     Args:
         event: Dataclass event to emit
     """
@@ -220,7 +231,7 @@ def emit(event) -> None:
 def emit_raw(data: Dict[str, Any]) -> None:
     """
     Emit a raw dictionary as JSONL.
-    
+
     Args:
         data: Dictionary to emit
     """
diff --git a/src/output/processor.py b/src/output/processor.py
index 274f783..00a32dd 100644
--- a/src/output/processor.py
+++ b/src/output/processor.py
@@ -8,8 +8,6 @@
 
 from rich.console import Console
 from rich.panel import Panel
-from rich.syntax import Syntax
-from rich.text import Text
 
 from src.config.models import AgentConfig, OutputMode
 from src.core.session import Session
@@ -19,7 +17,7 @@
 
 class OutputProcessor:
     """Processes and formats agent output."""
-    
+
     def __init__(
         self,
         config: AgentConfig,
@@ -27,7 +25,7 @@ def __init__(
         stderr: TextIO = sys.stderr,
     ):
         """Initialize the output processor.
-        
+
         Args:
             config: Agent configuration
             stdout: Standard output stream
@@ -36,20 +34,20 @@ def __init__(
         self.config = config
         self.stdout = stdout
         self.stderr = stderr
-        
+
         # Rich console for human-readable output
         self.console = Console(
             file=stderr,
             force_terminal=config.output.colors,
             no_color=not config.output.colors,
         )
-        
+
         # JSON mode outputs to stdout
         self.json_mode = config.output.mode == OutputMode.JSON
-    
+
     def emit(self, event: Event) -> None:
         """Emit an event.
-        
+
         Args:
             event: Event to emit
         """
@@ -57,19 +55,19 @@ def emit(self, event: Event) -> None:
             self._emit_json(event)
         else:
             self._emit_human(event)
-    
+
     def _emit_json(self, event: Event) -> None:
         """Emit event as JSON line to stdout."""
         line = json.dumps(event.to_dict())
         print(line, file=self.stdout, flush=True)
-    
+
     def _emit_human(self, event: Event) -> None:
         """Emit event in human-readable format to stderr."""
         from src.output.events import EventType
-        
+
         if event.type == EventType.TURN_STARTED:
             self.console.print("[dim]Session started[/dim]")
-        
+
         elif event.type == EventType.TURN_COMPLETED:
             usage = event.data.get("usage", {})
             self.console.print()
@@ -78,28 +76,28 @@ def _emit_human(self, event: Event) -> None:
                 f"{usage.get('output_tokens', 0)} out "
                 f"(cached: {usage.get('cached_tokens', 0)})[/dim]"
             )
-        
+
         elif event.type == EventType.MESSAGE:
             content = event.data.get("content", "")
             if content:
                 self.console.print()
                 self.console.print(Panel(content, border_style="blue"))
-        
+
         elif event.type == EventType.THINKING:
             self.console.print("[dim]Thinking...[/dim]", end="\r")
-        
+
         elif event.type == EventType.TOOL_CALL_START:
             name = event.data.get("name", "unknown")
             self.console.print(f"[yellow]> {name}[/yellow]")
-        
+
         elif event.type == EventType.TOOL_CALL_END:
             name = event.data.get("name", "unknown")
             success = event.data.get("success", False)
             output = event.data.get("output", "")
-            
+
             status = "[green]OK[/green]" if success else "[red]FAILED[/red]"
             self.console.print(f"[dim]  {status}[/dim]")
-            
+
             # Show truncated output
             if output:
                 lines = output.split("\n")
@@ -108,56 +106,60 @@ def _emit_human(self, event: Event) -> None:
                 else:
                     display = output
                 self.console.print(f"[dim]{display}[/dim]")
-        
+
         elif event.type == EventType.ERROR:
             message = event.data.get("message", "Unknown error")
             self.console.print(f"[red]Error: {message}[/red]")
-    
+
     # Convenience methods
-    
+
     def emit_turn_started(self, session: Session) -> None:
         """Emit turn started event."""
         self.emit(Event.turn_started(session.id))
-    
+
     def emit_turn_completed(self, session: Session, final_message: str) -> None:
         """Emit turn completed event."""
-        self.emit(Event.turn_completed(
-            session_id=session.id,
-            final_message=final_message,
-            input_tokens=session.usage.input_tokens,
-            output_tokens=session.usage.output_tokens,
-            cached_tokens=session.usage.cached_tokens,
-        ))
-    
+        self.emit(
+            Event.turn_completed(
+                session_id=session.id,
+                final_message=final_message,
+                input_tokens=session.usage.input_tokens,
+                output_tokens=session.usage.output_tokens,
+                cached_tokens=session.usage.cached_tokens,
+            )
+        )
+
     def emit_message(self, content: str, role: str = "assistant") -> None:
         """Emit a message event."""
         self.emit(Event.message(content, role))
-    
+
     def emit_assistant_message(self, content: str) -> None:
         """Emit an assistant message."""
         self.emit(Event.message(content, "assistant"))
-    
+
     def emit_thinking(self) -> None:
         """Emit a thinking event."""
         self.emit(Event.thinking())
-    
+
     def emit_tool_call_start(self, name: str, arguments: dict[str, Any]) -> None:
         """Emit tool call start event."""
         self.emit(Event.tool_call_start(name, arguments))
-    
+
     def emit_tool_call_end(self, name: str, result: ToolResult) -> None:
         """Emit tool call end event."""
-        self.emit(Event.tool_call_end(
-            name=name,
-            success=result.success,
-            output=result.output,
-            error=result.error,
-        ))
-    
+        self.emit(
+            Event.tool_call_end(
+                name=name,
+                success=result.success,
+                output=result.output,
+                error=result.error,
+            )
+        )
+
     def emit_error(self, message: str, details: Optional[dict[str, Any]] = None) -> None:
         """Emit an error event."""
         self.emit(Event.error(message, details))
-    
+
     def print_final(self, message: str) -> None:
         """Print the final message to stdout."""
         if self.json_mode:
diff --git a/src/output/streaming.py b/src/output/streaming.py
index 82e092b..d9210d4 100644
--- a/src/output/streaming.py
+++ b/src/output/streaming.py
@@ -6,16 +6,17 @@
 """
 
 from __future__ import annotations
-from dataclasses import dataclass, field
-from enum import Enum, auto
-from typing import List, Optional, Callable, Any
+
 import time
-import asyncio
 from collections import deque
+from dataclasses import dataclass, field
+from enum import Enum, auto
+from typing import List, Optional
 
 
 class StreamState(Enum):
     """State of the stream processor."""
+
     IDLE = auto()
     STREAMING_TEXT = auto()
     STREAMING_TOOL_CALL = auto()
@@ -26,24 +27,28 @@ class StreamState(Enum):
 @dataclass
 class StreamEvent:
     """Base class for stream events."""
+
     pass
 
 
 @dataclass
 class StartEvent(StreamEvent):
     """Event indicating stream start."""
+
     pass
 
 
-@dataclass 
+@dataclass
 class TextDeltaEvent(StreamEvent):
     """Event containing a text delta."""
+
     delta: str
 
 
 @dataclass
 class ToolCallStartEvent(StreamEvent):
     """Event indicating start of a tool call."""
+
     id: str
     name: str
 
@@ -51,6 +56,7 @@ class ToolCallStartEvent(StreamEvent):
 @dataclass
 class ToolCallDeltaEvent(StreamEvent):
     """Event containing tool call argument delta."""
+
     id: str
     arguments: str
 
@@ -58,12 +64,14 @@ class ToolCallDeltaEvent(StreamEvent):
 @dataclass
 class ToolCallCompleteEvent(StreamEvent):
     """Event indicating tool call completion."""
+
     id: str
 
 
 @dataclass
 class TokenUsageEvent(StreamEvent):
     """Event containing token usage information."""
+
     prompt: int
     completion: int
 
@@ -71,21 +79,24 @@ class TokenUsageEvent(StreamEvent):
 @dataclass
 class CompleteEvent(StreamEvent):
     """Event indicating stream completion."""
+
     pass
 
 
 @dataclass
 class ErrorEvent(StreamEvent):
     """Event indicating an error occurred."""
+
     message: str
 
 
 @dataclass
 class TokenCounts:
     """Token usage counts for prompt and completion."""
+
     prompt: int = 0
     completion: int = 0
-    
+
     def total(self) -> int:
         """Return total token count."""
         return self.prompt + self.completion
@@ -94,15 +105,17 @@ def total(self) -> int:
 @dataclass
 class StreamToolCall:
     """Represents a tool call being streamed."""
+
     id: str
     name: str
     arguments: str = ""
     complete: bool = False
-    
+
     def parse_arguments(self) -> Optional[dict]:
         """Parse arguments as JSON if complete."""
         if self.complete:
             import json
+
             try:
                 return json.loads(self.arguments)
             except:
@@ -113,32 +126,33 @@ def parse_arguments(self) -> Optional[dict]:
 @dataclass
 class StreamContent:
     """Accumulated content from a stream."""
+
     text: str = ""
     tool_calls: List[StreamToolCall] = field(default_factory=list)
     tokens: TokenCounts = field(default_factory=TokenCounts)
-    
+
     def append_text(self, delta: str):
         """Append text delta to content."""
         self.text += delta
-    
+
     def start_tool_call(self, id: str, name: str):
         """Start a new tool call."""
         self.tool_calls.append(StreamToolCall(id=id, name=name))
-    
+
     def append_tool_call(self, id: str, arguments: str):
         """Append arguments to an existing tool call."""
         for tc in self.tool_calls:
             if tc.id == id:
                 tc.arguments += arguments
                 break
-    
+
     def complete_tool_call(self, id: str):
         """Mark a tool call as complete."""
         for tc in self.tool_calls:
             if tc.id == id:
                 tc.complete = True
                 break
-    
+
     def has_content(self) -> bool:
         """Check if any content has been accumulated."""
         return bool(self.text) or bool(self.tool_calls)
@@ -147,6 +161,7 @@ def has_content(self) -> bool:
 @dataclass
 class StreamStats:
     """Statistics about a stream."""
+
     state: StreamState
     event_count: int
     text_length: int
@@ -158,7 +173,7 @@ class StreamStats:
 
 class StreamProcessor:
     """Process stream events and accumulate content."""
-    
+
     def __init__(self):
         self.state = StreamState.IDLE
         self.content = StreamContent()
@@ -167,70 +182,70 @@ def __init__(self):
         self.first_token_time: Optional[float] = None
         self.last_event_time: Optional[float] = None
         self.event_count = 0
-    
+
     def process(self, event: StreamEvent):
         """Process a stream event."""
         now = time.time()
-        
+
         if self.start_time is None:
             self.start_time = now
-        
+
         self.last_event_time = now
         self.event_count += 1
-        
+
         if isinstance(event, StartEvent):
             self.state = StreamState.STREAMING_TEXT
-        
+
         elif isinstance(event, TextDeltaEvent):
             if self.first_token_time is None:
                 self.first_token_time = now
             self.content.append_text(event.delta)
             self.state = StreamState.STREAMING_TEXT
-        
+
         elif isinstance(event, ToolCallStartEvent):
             self.content.start_tool_call(event.id, event.name)
             self.state = StreamState.STREAMING_TOOL_CALL
-        
+
         elif isinstance(event, ToolCallDeltaEvent):
             self.content.append_tool_call(event.id, event.arguments)
-        
+
         elif isinstance(event, ToolCallCompleteEvent):
             self.content.complete_tool_call(event.id)
-        
+
         elif isinstance(event, TokenUsageEvent):
             self.content.tokens.prompt = event.prompt
             self.content.tokens.completion = event.completion
-        
+
         elif isinstance(event, CompleteEvent):
             self.state = StreamState.COMPLETE
-        
+
         elif isinstance(event, ErrorEvent):
             self.state = StreamState.ERROR
-        
+
         self.buffer.append(event)
-    
+
     def time_to_first_token(self) -> Optional[float]:
         """Get time to first token in seconds."""
         if self.start_time and self.first_token_time:
             return self.first_token_time - self.start_time
         return None
-    
+
     def elapsed(self) -> Optional[float]:
         """Get elapsed time since stream start."""
         if self.start_time:
             return time.time() - self.start_time
         return None
-    
+
     def is_complete(self) -> bool:
         """Check if stream is complete or errored."""
         return self.state in (StreamState.COMPLETE, StreamState.ERROR)
-    
+
     def drain_events(self) -> List[StreamEvent]:
         """Drain and return all buffered events."""
         events = list(self.buffer)
         self.buffer.clear()
         return events
-    
+
     def stats(self) -> StreamStats:
         """Get current stream statistics."""
         return StreamStats(
@@ -246,16 +261,16 @@ def stats(self) -> StreamStats:
 
 class StreamBuffer:
     """Buffer for rate limiting output."""
-    
+
     def __init__(self, min_interval: float = 0.01):
         self.buffer = ""
         self.min_interval = min_interval
         self.last_flush = time.time()
-    
+
     def push(self, text: str):
         """Push text to buffer."""
         self.buffer += text
-    
+
     def flush_if_ready(self) -> Optional[str]:
         """Flush buffer if minimum interval has passed."""
         if time.time() - self.last_flush >= self.min_interval and self.buffer:
@@ -264,14 +279,14 @@ def flush_if_ready(self) -> Optional[str]:
             self.buffer = ""
             return result
         return None
-    
+
     def flush(self) -> str:
         """Force flush all buffered content."""
         self.last_flush = time.time()
         result = self.buffer
         self.buffer = ""
         return result
-    
+
     def is_empty(self) -> bool:
         """Check if buffer is empty."""
         return not self.buffer
@@ -279,27 +294,27 @@ def is_empty(self) -> bool:
 
 class WordBuffer:
     """Buffer for word-boundary aligned output."""
-    
+
     def __init__(self, min_words: int = 3):
         self.buffer = ""
         self.min_words = min_words
-    
+
     def push(self, text: str):
         """Push text to buffer."""
         self.buffer += text
-    
+
     def flush_words(self) -> Optional[str]:
         """Flush complete words if minimum word count reached."""
         word_count = len(self.buffer.split())
         if word_count >= self.min_words:
             # Find last whitespace
-            pos = self.buffer.rfind(' ')
+            pos = self.buffer.rfind(" ")
             if pos > 0:
-                result = self.buffer[:pos+1]
-                self.buffer = self.buffer[pos+1:]
+                result = self.buffer[: pos + 1]
+                self.buffer = self.buffer[pos + 1 :]
                 return result
         return None
-    
+
     def flush(self) -> str:
         """Force flush all buffered content."""
         result = self.buffer
@@ -309,32 +324,32 @@ def flush(self) -> str:
 
 class SentenceBuffer:
     """Buffer for sentence-boundary aligned output."""
-    
+
     def __init__(self):
         self.buffer = ""
-    
+
     def push(self, text: str):
         """Push text to buffer."""
         self.buffer += text
-    
+
     def flush_sentences(self) -> Optional[str]:
         """Flush complete sentences."""
         endings = [". ", "! ", "? ", ".\n", "!\n", "?\n"]
         last_end = 0
-        
+
         for ending in endings:
             pos = self.buffer.rfind(ending)
             if pos >= 0:
                 end = pos + len(ending)
                 if end > last_end:
                     last_end = end
-        
+
         if last_end > 0:
             result = self.buffer[:last_end]
             self.buffer = self.buffer[last_end:]
             return result
         return None
-    
+
     def flush(self) -> str:
         """Force flush all buffered content."""
         result = self.buffer
@@ -344,18 +359,18 @@ def flush(self) -> str:
 
 class StreamCollector:
     """Collect all stream content."""
-    
+
     def __init__(self):
         self.processor = StreamProcessor()
-    
+
     def process(self, event: StreamEvent):
         """Process a stream event."""
         self.processor.process(event)
-    
+
     def is_complete(self) -> bool:
         """Check if stream is complete."""
         return self.processor.is_complete()
-    
+
     def result(self) -> dict:
         """Get collected results."""
         return {
diff --git a/src/prompts/system.py b/src/prompts/system.py
index 48b11c0..6bb21c1 100644
--- a/src/prompts/system.py
+++ b/src/prompts/system.py
@@ -13,7 +13,6 @@
 from pathlib import Path
 from typing import Dict, List, Optional
 
-
 # =============================================================================
 # Context Strings
 # =============================================================================
@@ -67,15 +66,16 @@
 # Token Estimation
 # =============================================================================
 
+
 def estimate_tokens(text: str) -> int:
     """Estimate token count for text.
-    
+
     Uses a simple heuristic based on character count.
     More accurate estimation would require a tokenizer.
-    
+
     Args:
         text: Text to estimate tokens for.
-        
+
     Returns:
         Estimated token count.
     """
@@ -89,39 +89,41 @@ def estimate_tokens(text: str) -> int:
 # Data Classes
 # =============================================================================
 
+
 @dataclass
 class PromptSection:
     """A section of the system prompt.
-    
+
     Attributes:
         name: Section name (used as header).
         content: Section content.
         enabled: Whether this section is enabled.
         priority: Priority (higher = earlier in prompt).
     """
+
     name: str
     content: str
     enabled: bool = True
     priority: int = 0
-    
+
     def with_priority(self, priority: int) -> PromptSection:
         """Set priority and return self for chaining.
-        
+
         Args:
             priority: Priority value (higher = earlier).
-            
+
         Returns:
             Self for method chaining.
         """
         self.priority = priority
         return self
-    
+
     def set_enabled(self, enabled: bool) -> PromptSection:
         """Set enabled state and return self for chaining.
-        
+
         Args:
             enabled: Whether section is enabled.
-            
+
         Returns:
             Self for method chaining.
         """
@@ -132,10 +134,10 @@ def set_enabled(self, enabled: bool) -> PromptSection:
 @dataclass
 class SystemPrompt:
     """System prompt configuration.
-    
+
     Supports base prompts, sections, variables, capability contexts,
     custom instructions, and personas.
-    
+
     Attributes:
         base: Base prompt text.
         sections: Sections to include.
@@ -146,6 +148,7 @@ class SystemPrompt:
         custom_instructions: Custom instructions.
         persona: Persona/role.
     """
+
     base: Optional[str] = None
     sections: List[PromptSection] = field(default_factory=list)
     variables: Dict[str, str] = field(default_factory=dict)
@@ -155,133 +158,130 @@ class SystemPrompt:
     custom_instructions: Optional[str] = None
     persona: Optional[str] = None
     _token_count: int = 0
-    
+
     @classmethod
     def new(cls) -> SystemPrompt:
         """Create a new system prompt.
-        
+
         Returns:
             New SystemPrompt instance.
         """
         return cls()
-    
+
     @classmethod
     def with_base(cls, base: str) -> SystemPrompt:
         """Create with base text.
-        
+
         Args:
             base: Base prompt text.
-            
+
         Returns:
             New SystemPrompt with base set.
         """
         prompt = cls(base=base)
         prompt._recalculate_tokens()
         return prompt
-    
+
     def set_base(self, base: str) -> None:
         """Set base prompt.
-        
+
         Args:
             base: Base prompt text.
         """
         self.base = base
         self._recalculate_tokens()
-    
+
     def add_section(self, section: PromptSection) -> None:
         """Add a section.
-        
+
         Args:
             section: Section to add.
         """
         self.sections.append(section)
         self._recalculate_tokens()
-    
+
     def remove_section(self, name: str) -> None:
         """Remove a section by name.
-        
+
         Args:
             name: Name of section to remove.
         """
         self.sections = [s for s in self.sections if s.name != name]
         self._recalculate_tokens()
-    
+
     def set_variable(self, key: str, value: str) -> None:
         """Set a variable.
-        
+
         Args:
             key: Variable name.
             value: Variable value.
         """
         self.variables[key] = value
         self._recalculate_tokens()
-    
+
     def set_persona(self, persona: str) -> None:
         """Set persona.
-        
+
         Args:
             persona: Persona/role description.
         """
         self.persona = persona
         self._recalculate_tokens()
-    
+
     def set_custom_instructions(self, instructions: str) -> None:
         """Set custom instructions.
-        
+
         Args:
             instructions: Custom instructions text.
         """
         self.custom_instructions = instructions
         self._recalculate_tokens()
-    
+
     def enable_code_execution(self) -> None:
         """Enable code execution context."""
         self.code_execution = True
         self._recalculate_tokens()
-    
+
     def enable_file_operations(self) -> None:
         """Enable file operations context."""
         self.file_operations = True
         self._recalculate_tokens()
-    
+
     def enable_web_search(self) -> None:
         """Enable web search context."""
         self.web_search = True
         self._recalculate_tokens()
-    
+
     def token_count(self) -> int:
         """Get token count estimate.
-        
+
         Returns:
             Estimated token count.
         """
         return self._token_count
-    
+
     def render(self) -> Optional[str]:
         """Render the full system prompt.
-        
+
         Combines persona, base, sections (sorted by priority),
         capability contexts, and custom instructions.
-        
+
         Returns:
             Rendered prompt string, or None if empty.
         """
         parts: List[str] = []
-        
+
         # Persona
         if self.persona:
             parts.append(self.persona)
-        
+
         # Base prompt
         if self.base:
             rendered = self._render_template(self.base)
             parts.append(rendered)
-        
+
         # Sections (sorted by priority, higher first)
-        sorted_sections = sorted(
-            self.sections,
-            key=lambda s: -s.priority
-        )
+        sorted_sections = sorted(self.sections, key=lambda s: -s.priority)
         for section in sorted_sections:
             if section.enabled:
                 content = self._render_template(section.content)
@@ -289,7 +289,7 @@ def render(self) -> Optional[str]:
                     parts.append(f"## {section.name}\n{content}")
                 else:
                     parts.append(content)
-        
+
         # Capability contexts
         if self.code_execution:
             parts.append(CODE_EXECUTION_CONTEXT)
@@ -297,24 +297,24 @@ def render(self) -> Optional[str]:
             parts.append(FILE_OPERATIONS_CONTEXT)
         if self.web_search:
             parts.append(WEB_SEARCH_CONTEXT)
-        
+
         # Custom instructions
         if self.custom_instructions:
             parts.append(f"## Custom Instructions\n{self.custom_instructions}")
-        
+
         if not parts:
             return None
-        
+
         return "\n\n".join(parts)
-    
+
     def _render_template(self, template: str) -> str:
         """Render template with variables.
-        
+
         Supports both {{key}} and ${key} syntax.
-        
+
         Args:
             template: Template string.
-            
+
         Returns:
             Rendered string with variables substituted.
         """
@@ -325,7 +325,7 @@ def _render_template(self, template: str) -> str:
             # Support ${key} syntax
             result = result.replace(f"${{{key}}}", value)
         return result
-    
+
     def _recalculate_tokens(self) -> None:
         """Recalculate token count estimate."""
         rendered = self.render()
@@ -339,11 +339,12 @@ def _recalculate_tokens(self) -> None:
 # Builder Pattern
 # =============================================================================
 
+
 class SystemPromptBuilder:
     """Builder for system prompts.
-    
+
     Provides a fluent interface for constructing SystemPrompt instances.
-    
+
     Example:
         prompt = (SystemPromptBuilder()
             .persona("You are a helpful assistant.")
@@ -352,118 +353,109 @@ class SystemPromptBuilder:
             .code_execution()
             .build())
     """
-    
+
     def __init__(self) -> None:
         """Create a new builder."""
         self._prompt = SystemPrompt()
-    
+
     def base(self, base: str) -> SystemPromptBuilder:
         """Set base prompt.
-        
+
         Args:
             base: Base prompt text.
-            
+
         Returns:
             Self for method chaining.
         """
         self._prompt.base = base
         return self
-    
+
     def persona(self, persona: str) -> SystemPromptBuilder:
         """Set persona.
-        
+
         Args:
             persona: Persona/role description.
-            
+
         Returns:
             Self for method chaining.
         """
         self._prompt.persona = persona
         return self
-    
+
     def section(
-        self,
-        name: str,
-        content: str,
-        priority: int = 0,
-        enabled: bool = True
+        self, name: str, content: str, priority: int = 0, enabled: bool = True
     ) -> SystemPromptBuilder:
         """Add a section.
-        
+
         Args:
             name: Section name (used as header).
             content: Section content.
             priority: Priority (higher = earlier in prompt).
             enabled: Whether section is enabled.
-            
+
         Returns:
             Self for method chaining.
         """
         self._prompt.sections.append(
-            PromptSection(
-                name=name,
-                content=content,
-                priority=priority,
-                enabled=enabled
-            )
+            PromptSection(name=name, content=content, priority=priority, enabled=enabled)
         )
         return self
-    
+
     def variable(self, key: str, value: str) -> SystemPromptBuilder:
         """Add a variable.
-        
+
         Args:
             key: Variable name.
             value: Variable value.
-            
+
         Returns:
             Self for method chaining.
         """
         self._prompt.variables[key] = value
         return self
-    
+
     def custom_instructions(self, instructions: str) -> SystemPromptBuilder:
         """Set custom instructions.
-        
+
         Args:
             instructions: Custom instructions text.
-            
+
         Returns:
             Self for method chaining.
         """
         self._prompt.custom_instructions = instructions
         return self
-    
+
     def code_execution(self) -> SystemPromptBuilder:
         """Enable code execution context.
-        
+
         Returns:
             Self for method chaining.
         """
         self._prompt.code_execution = True
         return self
-    
+
     def file_operations(self) -> SystemPromptBuilder:
         """Enable file operations context.
-        
+
         Returns:
             Self for method chaining.
         """
         self._prompt.file_operations = True
         return self
-    
+
     def web_search(self) -> SystemPromptBuilder:
         """Enable web search context.
-        
+
         Returns:
             Self for method chaining.
         """
         self._prompt.web_search = True
         return self
-    
+
     def build(self) -> SystemPrompt:
         """Build the system prompt.
-        
+
         Returns:
             Configured SystemPrompt instance.
         """
@@ -475,59 +467,64 @@ def build(self) -> SystemPrompt:
 # Presets
 # =============================================================================
 
+
 class Presets:
     """Predefined system prompts for common use cases."""
-    
+
     @staticmethod
     def coding_assistant() -> SystemPrompt:
         """Default coding assistant prompt.
-        
+
         Returns:
             SystemPrompt configured for coding assistance.
         """
-        return (SystemPromptBuilder()
+        return (
+            SystemPromptBuilder()
             .persona("You are Fabric, an expert AI coding assistant.")
             .base(CODING_ASSISTANT_BASE)
             .code_execution()
             .file_operations()
-            .build())
-    
+            .build()
+        )
+
     @staticmethod
     def research_assistant() -> SystemPrompt:
         """Research assistant prompt.
-        
+
         Returns:
             SystemPrompt configured for research assistance.
         """
-        return (SystemPromptBuilder()
+        return (
+            SystemPromptBuilder()
             .persona("You are a helpful research assistant with access to web search.")
             .base("Help the user find and analyze information. Cite sources when possible.")
             .web_search()
-            .build())
-    
+            .build()
+        )
+
     @staticmethod
     def code_reviewer() -> SystemPrompt:
         """Code review prompt.
-        
+
         Returns:
             SystemPrompt configured for code review.
         """
-        return (SystemPromptBuilder()
+        return (
+            SystemPromptBuilder()
             .persona("You are an expert code reviewer.")
             .base(CODE_REVIEWER_BASE)
             .file_operations()
-            .build())
-    
+            .build()
+        )
+
     @staticmethod
     def minimal() -> SystemPrompt:
         """Minimal assistant prompt.
-        
+
         Returns:
             SystemPrompt with minimal configuration.
         """
-        return (SystemPromptBuilder()
-            .base("You are a helpful assistant. Be concise.")
-            .build())
+        return SystemPromptBuilder().base("You are a helpful assistant. Be concise.").build()
 
 
 # =============================================================================
@@ -779,28 +776,28 @@ def get_system_prompt(
     shell: Optional[str] = None,
 ) -> str:
     """Get the full system prompt with environment context.
-    
+
     Uses the SYSTEM_PROMPT constant which includes autonomous behavior
     and mandatory verification plan instructions.
-    
+
     Args:
         cwd: Current working directory.
         shell: Shell being used.
-        
+
     Returns:
         Complete system prompt string.
     """
     # Use the SYSTEM_PROMPT constant directly (includes all autonomous behavior instructions)
     cwd_str = str(cwd) if cwd else "/app"
     shell_str = shell or "/bin/sh"
-    
+
     # Add environment section
     env_lines = [
         f"- Working directory: {cwd_str}",
         f"- Platform: {platform.system()}",
         f"- Shell: {shell_str}",
     ]
-    
+
     return f"{SYSTEM_PROMPT}\n\n# Environment\n" + "\n".join(env_lines)
 
 
diff --git a/src/tools/__init__.py b/src/tools/__init__.py
index 38a99c4..a80d77f 100644
--- a/src/tools/__init__.py
+++ b/src/tools/__init__.py
@@ -1,22 +1,21 @@
 """Tools module - registry and tool implementations."""
 
-from src.tools.base import ToolResult, BaseTool, ToolMetadata
+# Individual tools
+from src.tools.apply_patch import ApplyPatchTool
+from src.tools.base import BaseTool, ToolMetadata, ToolResult
+from src.tools.list_dir import ListDirTool
+from src.tools.read_file import ReadFileTool
 from src.tools.registry import (
-    ToolRegistry,
+    CachedResult,
     ExecutorConfig,
     ExecutorStats,
+    ToolRegistry,
     ToolStats,
-    CachedResult,
 )
-from src.tools.specs import get_all_tools, get_tool_spec, TOOL_SPECS
-
-# Individual tools
-from src.tools.apply_patch import ApplyPatchTool
-from src.tools.read_file import ReadFileTool
-from src.tools.write_file import WriteFileTool
-from src.tools.list_dir import ListDirTool
 from src.tools.search_files import SearchFilesTool
+from src.tools.specs import TOOL_SPECS, get_all_tools, get_tool_spec
 from src.tools.view_image import view_image
+from src.tools.write_file import WriteFileTool
 
 __all__ = [
     # Base
diff --git a/src/tools/apply_patch.py b/src/tools/apply_patch.py
index cfff32c..593c6a8 100644
--- a/src/tools/apply_patch.py
+++ b/src/tools/apply_patch.py
@@ -7,23 +7,22 @@
 
 from __future__ import annotations
 
-import os
 import re
 from dataclasses import dataclass, field
-from enum import Enum
 from pathlib import Path
 from typing import Any, List, Optional, Tuple
 
 from src.tools.base import BaseTool, ToolResult
 
-
 # =============================================================================
 # Data Structures
 # =============================================================================
 
+
 @dataclass
 class HunkLine:
     """A single line in a hunk."""
+
     type: str  # "context", "add", "remove"
     content: str
 
@@ -31,6 +30,7 @@ class HunkLine:
 @dataclass
 class Hunk:
     """A parsed hunk from a unified diff."""
+
     old_start: int
     old_count: int
     new_start: int
@@ -41,6 +41,7 @@ class Hunk:
 @dataclass
 class FileChange:
     """A file change from a unified diff."""
+
     old_path: Optional[Path]
     new_path: Optional[Path]
     hunks: List[Hunk] = field(default_factory=list)
@@ -53,40 +54,41 @@ class FileChange:
 # Unified Diff Parser (matches fabric-core)
 # =============================================================================
 
+
 def parse_file_path(path_str: str) -> Optional[Path]:
     """Parse a file path from diff header.
-    
+
     Handles formats: a/path, b/path, or just path
     """
     path = path_str.strip()
-    
+
     # Remove a/ or b/ prefix
     if path.startswith("a/"):
         path = path[2:]
     elif path.startswith("b/"):
         path = path[2:]
-    
+
     # Remove timestamp if present (e.g., "file.txt\t2024-01-01 00:00:00")
     path = path.split("\t")[0].strip()
-    
+
     if path == "/dev/null":
         return Path("/dev/null")
-    
+
     return Path(path) if path else None
 
 
 def parse_hunk_header(line: str) -> Optional[Hunk]:
     """Parse a hunk header like '@@ -1,5 +1,6 @@'.
-    
+
     Returns Hunk with start/count info but empty lines.
     """
     # Strip @@ markers
     line = line.strip("@").strip()
     parts = line.split()
-    
+
     if len(parts) < 2:
         return None
-    
+
     def parse_range(s: str) -> Tuple[int, int]:
         """Parse range like '1,5' or '1' into (start, count)."""
         s = s.lstrip("-+")
@@ -94,7 +96,7 @@ def parse_range(s: str) -> Tuple[int, int]:
             parts = s.split(",")
             return int(parts[0]), int(parts[1])
         return int(s), 1
-    
+
     try:
         old_start, old_count = parse_range(parts[0])
         new_start, new_count = parse_range(parts[1])
@@ -110,19 +112,19 @@ def parse_range(s: str) -> Tuple[int, int]:
 
 def parse_unified_diff(patch: str) -> List[FileChange]:
     """Parse a unified diff into file changes.
-    
+
     Matches fabric-core parse_unified_diff() implementation.
     """
     file_changes: List[FileChange] = []
     current_change: Optional[FileChange] = None
     current_hunk: Optional[Hunk] = None
-    
+
     lines = patch.splitlines()
     i = 0
-    
+
     while i < len(lines):
         line = lines[i]
-        
+
         # Detect file header: --- a/path
         if line.startswith("--- "):
             # Save previous change
@@ -131,16 +133,16 @@ def parse_unified_diff(patch: str) -> List[FileChange]:
                     current_change.hunks.append(current_hunk)
                     current_hunk = None
                 file_changes.append(current_change)
-            
+
             old_path = parse_file_path(line[4:])
-            
+
             # Look for +++ line
             if i + 1 < len(lines) and lines[i + 1].startswith("+++ "):
                 new_path = parse_file_path(lines[i + 1][4:])
-                
+
                 is_new_file = old_path is not None and str(old_path) == "/dev/null"
                 is_deleted = new_path is not None and str(new_path) == "/dev/null"
-                
+
                 current_change = FileChange(
                     old_path=None if is_new_file else old_path,
                     new_path=None if is_deleted else new_path,
@@ -149,17 +151,17 @@ def parse_unified_diff(patch: str) -> List[FileChange]:
                 )
                 i += 2
                 continue
-        
+
         # Detect hunk header: @@ -1,5 +1,6 @@
         if line.startswith("@@ "):
             # Save previous hunk
             if current_change is not None and current_hunk is not None:
                 current_change.hunks.append(current_hunk)
-            
+
             current_hunk = parse_hunk_header(line)
             i += 1
             continue
-        
+
         # Parse hunk lines
         if current_hunk is not None:
             if line.startswith("+") and not line.startswith("+++"):
@@ -172,15 +174,15 @@ def parse_unified_diff(patch: str) -> List[FileChange]:
             elif line.startswith("\\"):
                 # "\ No newline at end of file" - ignore
                 pass
-        
+
         i += 1
-    
+
     # Save final change and hunk
     if current_change is not None:
         if current_hunk is not None:
             current_change.hunks.append(current_hunk)
         file_changes.append(current_change)
-    
+
     return file_changes
 
 
@@ -188,15 +190,16 @@ def parse_unified_diff(patch: str) -> List[FileChange]:
 # Hunk Application (matches fabric-core with fuzzy matching)
 # =============================================================================
 
+
 def matches_at_position(lines: List[str], match_lines: List[str], start: int) -> bool:
     """Check if lines match at a given position (with whitespace tolerance)."""
     if start + len(match_lines) > len(lines):
         return False
-    
+
     for i, expected in enumerate(match_lines):
         if lines[start + i].strip() != expected.strip():
             return False
-    
+
     return True
 
 
@@ -206,22 +209,19 @@ def find_hunk_position(
     suggested_start: int,
 ) -> int:
     """Find the best position to apply a hunk, with fuzzy matching.
-    
+
     Matches fabric-core find_hunk_position() - searches ±50 lines.
     """
     # Extract context and remove lines for matching
-    match_lines = [
-        hl.content for hl in hunk.lines
-        if hl.type in ("context", "remove")
-    ]
-    
+    match_lines = [hl.content for hl in hunk.lines if hl.type in ("context", "remove")]
+
     if not match_lines:
         return suggested_start
-    
+
     # Try exact position first
     if matches_at_position(lines, match_lines, suggested_start):
         return suggested_start
-    
+
     # Search nearby positions (within 50 lines)
     for offset in range(1, 51):
         # Try before
@@ -229,16 +229,16 @@ def find_hunk_position(
             pos = suggested_start - offset
             if matches_at_position(lines, match_lines, pos):
                 return pos
-        
+
         # Try after
         pos = suggested_start + offset
         if pos < len(lines) and matches_at_position(lines, match_lines, pos):
             return pos
-    
+
     # If we can't find a match but position is valid, use it anyway
     if suggested_start <= len(lines):
         return suggested_start
-    
+
     raise ValueError(f"Could not find matching context for hunk at line {hunk.old_start}")
 
 
@@ -247,39 +247,33 @@ def apply_hunks_to_lines(
     hunks: List[Hunk],
 ) -> str:
     """Apply hunks to existing lines.
-    
+
     Applies hunks in reverse order to maintain line numbers.
     """
     result_lines = list(original_lines)
-    
+
     # Apply in reverse order
     for hunk in reversed(hunks):
         start_idx = hunk.old_start - 1 if hunk.old_start > 0 else 0
-        
+
         # Find actual position
         actual_start = find_hunk_position(result_lines, hunk, start_idx)
-        
+
         # Count lines to remove
-        lines_to_remove = sum(
-            1 for hl in hunk.lines
-            if hl.type in ("remove", "context")
-        )
-        
+        lines_to_remove = sum(1 for hl in hunk.lines if hl.type in ("remove", "context"))
+
         # Build replacement
-        replacement = [
-            hl.content for hl in hunk.lines
-            if hl.type in ("add", "context")
-        ]
-        
+        replacement = [hl.content for hl in hunk.lines if hl.type in ("add", "context")]
+
         # Apply replacement
         end_idx = min(actual_start + lines_to_remove, len(result_lines))
         result_lines = result_lines[:actual_start] + replacement + result_lines[end_idx:]
-    
+
     # Join with newlines
     content = "\n".join(result_lines)
     if content and not content.endswith("\n"):
         content += "\n"
-    
+
     return content
 
 
@@ -290,7 +284,7 @@ def build_new_content(hunks: List[Hunk]) -> str:
         for hl in hunk.lines:
             if hl.type in ("add", "context"):
                 lines.append(hl.content)
-    
+
     content = "\n".join(lines)
     if content and not content.endswith("\n"):
         content += "\n"
@@ -301,27 +295,28 @@ def build_new_content(hunks: List[Hunk]) -> str:
 # File Change Application
 # =============================================================================
 
+
 def apply_file_change(
     change: FileChange,
     cwd: Path,
     dry_run: bool = False,
 ) -> str:
     """Apply a single file change."""
-    
+
     # Handle deletion
     if change.is_deleted and change.old_path:
         full_path = cwd / change.old_path
         if not dry_run:
             full_path.unlink()
         return f"  D {change.old_path}"
-    
+
     # Get target path
     target_path = change.new_path or change.old_path
     if not target_path:
         raise ValueError("No file path specified")
-    
+
     full_path = cwd / target_path
-    
+
     # Handle new file
     if change.is_new_file:
         content = build_new_content(change.hunks)
@@ -329,16 +324,16 @@ def apply_file_change(
             full_path.parent.mkdir(parents=True, exist_ok=True)
             full_path.write_text(content, encoding="utf-8")
         return f"  A {target_path}"
-    
+
     # Handle modification
     original_content = full_path.read_text(encoding="utf-8")
     original_lines = original_content.splitlines()
-    
+
     new_content = apply_hunks_to_lines(original_lines, change.hunks)
-    
+
     if not dry_run:
         full_path.write_text(new_content, encoding="utf-8")
-    
+
     return f"  M {target_path}"
 
 
@@ -348,23 +343,23 @@ def apply_unified_diff(
     dry_run: bool = False,
 ) -> str:
     """Apply a unified diff to the filesystem.
-    
+
     Main entry point matching fabric-core apply_unified_diff().
     """
     file_changes = parse_unified_diff(patch)
-    
+
     if not file_changes:
         return "No changes to apply"
-    
+
     report = []
     modified_files = []
-    
+
     for change in file_changes:
         result = apply_file_change(change, cwd, dry_run)
         report.append(result)
         if change.new_path:
             modified_files.append(str(change.new_path))
-    
+
     action = "Would apply" if dry_run else "Applied"
     return f"{action} changes to {len(modified_files)} file(s):\n" + "\n".join(report)
 
@@ -373,30 +368,31 @@ def apply_unified_diff(
 # Legacy Format Support (*** Begin Patch)
 # =============================================================================
 
+
 def parse_legacy_patch(patch: str) -> List[FileChange]:
     """Parse legacy *** Begin Patch format."""
     file_changes: List[FileChange] = []
-    
+
     # Extract content between markers
     match = re.search(r"\*\*\* Begin Patch\s*\n(.*?)\*\*\* End Patch", patch, re.DOTALL)
     if not match:
         return []
-    
+
     content = match.group(1)
-    
+
     # Split into file operations
     file_pattern = r"\*\*\* (Add|Delete|Update) File: (.+?)(?=\n\*\*\* (?:Add|Delete|Update)|$)"
-    
+
     for file_match in re.finditer(file_pattern, content, re.DOTALL):
         op_type = file_match.group(1).lower()
         file_path = file_match.group(2).strip()
-        
+
         # Get content after header
         start = file_match.end()
         remaining = content[start:]
         next_file = re.search(r"\*\*\* (?:Add|Delete|Update) File:", remaining)
-        file_content = remaining[:next_file.start()] if next_file else remaining
-        
+        file_content = remaining[: next_file.start()] if next_file else remaining
+
         if op_type == "add":
             change = FileChange(
                 old_path=None,
@@ -413,14 +409,16 @@ def parse_legacy_patch(patch: str) -> List[FileChange]:
             if hunk.lines:
                 change.hunks.append(hunk)
             file_changes.append(change)
-            
+
         elif op_type == "delete":
-            file_changes.append(FileChange(
-                old_path=Path(file_path),
-                new_path=None,
-                is_deleted=True,
-            ))
-            
+            file_changes.append(
+                FileChange(
+                    old_path=Path(file_path),
+                    new_path=None,
+                    is_deleted=True,
+                )
+            )
+
         elif op_type == "update":
             change = FileChange(
                 old_path=Path(file_path),
@@ -443,7 +441,7 @@ def parse_legacy_patch(patch: str) -> List[FileChange]:
             if current_hunk:
                 change.hunks.append(current_hunk)
             file_changes.append(change)
-    
+
     return file_changes
 
 
@@ -451,33 +449,34 @@ def parse_legacy_patch(patch: str) -> List[FileChange]:
 # Tool Implementation
 # =============================================================================
 
+
 class ApplyPatchTool(BaseTool):
     """Tool for applying file patches.
-    
+
     Supports both standard unified diff format and legacy *** Begin Patch format.
     """
-    
+
     name = "apply_patch"
     description = "Applies file patches using unified diff or custom format."
-    
+
     def execute(self, **kwargs: Any) -> ToolResult:
         """Apply a patch.
-        
+
         Args:
             **kwargs: Tool arguments
                 - patch: The patch content (unified diff or *** Begin Patch format)
                 - dry_run: If True, don't actually modify files
-            
+
         Returns:
             ToolResult with success/failure info
         """
         # Extract parameters from kwargs
         patch: str = kwargs.get("patch", "")
         dry_run: bool = kwargs.get("dry_run", False)
-        
+
         if not patch:
             return ToolResult.fail("Missing required parameter: patch")
-        
+
         try:
             # Detect format and parse
             if "*** Begin Patch" in patch:
@@ -485,26 +484,26 @@ def execute(self, **kwargs: Any) -> ToolResult:
                 file_changes = parse_legacy_patch(patch)
                 if not file_changes:
                     return ToolResult.fail("No valid operations in patch")
-                
+
                 report = []
                 for change in file_changes:
                     result = apply_file_change(change, self.cwd, dry_run)
                     report.append(result)
-                
+
                 action = "Would apply" if dry_run else "Applied"
                 return ToolResult.ok(f"{action} changes:\n" + "\n".join(report))
-            
+
             elif "---" in patch and "+++" in patch:
                 # Standard unified diff
                 result = apply_unified_diff(patch, self.cwd, dry_run)
                 return ToolResult.ok(result)
-            
+
             else:
                 return ToolResult.fail(
                     "Invalid patch format. Use unified diff (--- / +++) "
                     "or custom format (*** Begin Patch)"
                 )
-        
+
         except FileNotFoundError as e:
             return ToolResult.fail(f"File not found: {e}")
         except PermissionError as e:
diff --git a/src/tools/base.py b/src/tools/base.py
index c0729ba..2bc8e64 100644
--- a/src/tools/base.py
+++ b/src/tools/base.py
@@ -5,12 +5,13 @@
 from abc import ABC, abstractmethod
 from dataclasses import dataclass, field
 from pathlib import Path
-from typing import Any, Optional, List, Dict
+from typing import Any, Dict, List, Optional
 
 
 @dataclass
 class ToolMetadata:
     """Metadata about a tool execution."""
+
     duration_ms: int = 0
     exit_code: Optional[int] = None
     files_modified: List[str] = field(default_factory=list)
@@ -20,28 +21,29 @@ class ToolMetadata:
 @dataclass
 class ToolResult:
     """Result of a tool execution."""
+
     success: bool
     output: str
     error: Optional[str] = None
     data: Optional[dict[str, Any]] = None
     inject_content: Optional[dict[str, Any]] = None  # For injecting images into context
     metadata: Optional[ToolMetadata] = None
-    
+
     @classmethod
     def ok(cls, output: str, data: Optional[dict[str, Any]] = None) -> "ToolResult":
         """Create a successful result."""
         return cls(success=True, output=output, data=data)
-    
+
     @classmethod
     def fail(cls, error: str, output: str = "") -> "ToolResult":
         """Create a failed result."""
         return cls(success=False, output=output, error=error)
-    
+
     def with_metadata(self, metadata: ToolMetadata) -> "ToolResult":
         """Add metadata to this result."""
         self.metadata = metadata
         return self
-    
+
     def to_message(self) -> str:
         """Convert to message format for the LLM."""
         if self.success:
@@ -52,36 +54,36 @@ def to_message(self) -> str:
 
 class BaseTool(ABC):
     """Base class for all tools."""
-    
+
     name: str
     description: str
-    
+
     def __init__(self, cwd: Path):
         """Initialize the tool.
-        
+
         Args:
             cwd: Current working directory for the tool
         """
         self.cwd = cwd
-    
+
     @abstractmethod
     def execute(self, **kwargs: Any) -> ToolResult:
         """Execute the tool with the given arguments.
-        
+
         Args:
             **kwargs: Tool-specific arguments
-            
+
         Returns:
             ToolResult with success status and output
         """
         pass
-    
+
     def resolve_path(self, path: str) -> Path:
         """Resolve a path relative to the working directory.
-        
+
         Args:
             path: Path string (absolute or relative)
-            
+
         Returns:
             Resolved absolute Path
         """
@@ -89,11 +91,11 @@ def resolve_path(self, path: str) -> Path:
         if p.is_absolute():
             return p
         return (self.cwd / p).resolve()
-    
+
     @classmethod
     def get_spec(cls) -> dict[str, Any]:
         """Get the tool specification for the LLM.
-        
+
         Returns:
             Tool specification dict
         """
diff --git a/src/tools/grep_files.py b/src/tools/grep_files.py
index ff02283..07c2db2 100644
--- a/src/tools/grep_files.py
+++ b/src/tools/grep_files.py
@@ -2,26 +2,25 @@
 
 from __future__ import annotations
 
-import os
 import re
 import subprocess
 from pathlib import Path
-from typing import Any, Optional
+from typing import Optional
 
 from src.tools.base import BaseTool, ToolResult
 
 
 class GrepFilesTool(BaseTool):
     """Tool for searching file contents using patterns."""
-    
+
     name = "grep_files"
     description = "Finds files whose contents match the pattern."
-    
+
     # Default limits
     DEFAULT_LIMIT = 100
     MAX_LIMIT = 2000
     TIMEOUT_SECONDS = 30
-    
+
     def execute(
         self,
         pattern: str,
@@ -30,33 +29,33 @@ def execute(
         limit: int = DEFAULT_LIMIT,
     ) -> ToolResult:
         """Search for files matching a pattern.
-        
+
         Args:
             pattern: Regex pattern to search for
             include: Glob pattern to filter files
             path: Directory to search in
             limit: Maximum number of results
-            
+
         Returns:
             ToolResult with matching file paths
         """
         # Resolve search path
         search_path = self.resolve_path(path) if path else self.cwd
-        
+
         if not search_path.exists():
             return ToolResult.fail(f"Path not found: {search_path}")
-        
+
         # Cap limit
         limit = min(limit, self.MAX_LIMIT)
-        
+
         # Try ripgrep first (fastest)
         rg_result = self._search_with_ripgrep(pattern, include, search_path, limit)
         if rg_result is not None:
             return rg_result
-        
+
         # Fallback to Python implementation
         return self._search_with_python(pattern, include, search_path, limit)
-    
+
     def _search_with_ripgrep(
         self,
         pattern: str,
@@ -65,17 +64,17 @@ def _search_with_ripgrep(
         limit: int,
     ) -> Optional[ToolResult]:
         """Search using ripgrep (rg).
-        
+
         Returns None if ripgrep is not available.
         """
         cmd = ["rg", "--files-with-matches", "--no-heading"]
-        
+
         if include:
             # Convert glob to rg glob format
             cmd.extend(["--glob", include])
-        
+
         cmd.extend([pattern, str(search_path)])
-        
+
         try:
             result = subprocess.run(
                 cmd,
@@ -83,34 +82,34 @@ def _search_with_ripgrep(
                 text=True,
                 timeout=self.TIMEOUT_SECONDS,
             )
-            
+
             if result.returncode == 0:
                 files = result.stdout.strip().split("\n") if result.stdout.strip() else []
                 files = files[:limit]
-                
+
                 if not files:
                     return ToolResult.ok("No matching files found.")
-                
+
                 output = f"Found {len(files)} matching files:\n" + "\n".join(files)
                 return ToolResult.ok(output, data={"count": len(files), "files": files})
-            
+
             elif result.returncode == 1:
                 # No matches
                 return ToolResult.ok("No matching files found.")
-            
+
             elif result.returncode == 2:
                 # Error - might be bad pattern
                 return ToolResult.fail(f"Search error: {result.stderr.strip()}")
-            
+
             return None  # Try fallback
-            
+
         except FileNotFoundError:
             return None  # rg not installed, use fallback
         except subprocess.TimeoutExpired:
             return ToolResult.fail(f"Search timed out after {self.TIMEOUT_SECONDS}s")
         except Exception:
             return None  # Use fallback
-    
+
     def _search_with_python(
         self,
         pattern: str,
@@ -123,10 +122,10 @@ def _search_with_python(
             regex = re.compile(pattern)
         except re.error as e:
             return ToolResult.fail(f"Invalid regex pattern: {e}")
-        
+
         matching_files: list[str] = []
         errors: list[str] = []
-        
+
         # Convert include glob to regex if provided
         include_regex = None
         if include:
@@ -143,21 +142,21 @@ def _search_with_python(
                 include_regex = re.compile(f"^{include_pattern}$", re.IGNORECASE)
             except re.error:
                 pass
-        
+
         def should_include(file_path: Path) -> bool:
             if include_regex is None:
                 return True
             return include_regex.match(file_path.name) is not None
-        
+
         def search_dir(dir_path: Path) -> None:
             if len(matching_files) >= limit:
                 return
-            
+
             try:
                 for item in dir_path.iterdir():
                     if len(matching_files) >= limit:
                         return
-                    
+
                     if item.is_file() and should_include(item):
                         try:
                             content = item.read_text(encoding="utf-8", errors="ignore")
@@ -165,15 +164,15 @@ def search_dir(dir_path: Path) -> None:
                                 matching_files.append(str(item))
                         except (PermissionError, OSError):
                             pass
-                    
+
                     elif item.is_dir() and not item.is_symlink():
                         # Skip hidden directories
                         if not item.name.startswith("."):
                             search_dir(item)
-            
+
             except PermissionError:
                 errors.append(f"Permission denied: {dir_path}")
-        
+
         if search_path.is_file():
             try:
                 content = search_path.read_text(encoding="utf-8", errors="ignore")
@@ -183,13 +182,13 @@ def search_dir(dir_path: Path) -> None:
                 return ToolResult.fail(f"Cannot read file: {e}")
         else:
             search_dir(search_path)
-        
+
         if not matching_files:
             return ToolResult.ok("No matching files found.")
-        
+
         output = f"Found {len(matching_files)} matching files:\n" + "\n".join(matching_files)
-        
+
         if errors:
-            output += f"\n\nWarnings:\n" + "\n".join(errors[:5])
-        
+            output += "\n\nWarnings:\n" + "\n".join(errors[:5])
+
         return ToolResult.ok(output, data={"count": len(matching_files), "files": matching_files})
diff --git a/src/tools/list_dir.py b/src/tools/list_dir.py
index a3a2c13..5cbaa29 100644
--- a/src/tools/list_dir.py
+++ b/src/tools/list_dir.py
@@ -2,161 +2,158 @@
 
 from __future__ import annotations
 
-import os
 from pathlib import Path
-from typing import Any, Optional, List
+from typing import Any, List, Optional
 
-from .base import BaseTool, ToolResult, ToolMetadata
+from .base import BaseTool, ToolMetadata, ToolResult
 
 
 class ListDirTool(BaseTool):
     """Tool to list directory contents."""
-    
+
     name = "list_dir"
     description = "List the contents of a directory"
-    
+
     def execute(
         self,
         directory_path: str = ".",
         recursive: bool = False,
         include_hidden: bool = False,
         ignore_patterns: Optional[List[str]] = None,
-        **kwargs: Any
+        **kwargs: Any,
     ) -> ToolResult:
         """List directory contents.
-        
+
         Args:
             directory_path: Path to the directory to list
             recursive: Whether to list recursively
             include_hidden: Whether to include hidden files/directories
             ignore_patterns: List of patterns to ignore
-            
+
         Returns:
             ToolResult with directory listing and metadata
         """
         import time
+
         start_time = time.time()
-        
+
         resolved_path = self.resolve_path(directory_path)
-        
+
         if not resolved_path.exists():
             return ToolResult.fail(f"Directory not found: {directory_path}")
-        
+
         if not resolved_path.is_dir():
             return ToolResult.fail(f"Not a directory: {directory_path}")
-        
+
         ignore_patterns = ignore_patterns or []
         entries = []
         output_lines = []
-        
+
         try:
             if recursive:
                 items = self._list_recursive(resolved_path, include_hidden, ignore_patterns)
             else:
                 items = self._list_flat(resolved_path, include_hidden, ignore_patterns)
-            
-            for item_path, item_type, item_size in sorted(items, key=lambda x: (x[1] != "dir", x[0].lower())):
+
+            for item_path, item_type, item_size in sorted(
+                items, key=lambda x: (x[1] != "dir", x[0].lower())
+            ):
                 if item_type == "dir":
                     output_lines.append(f"dir {item_path}")
                 else:
                     output_lines.append(f"file {item_path}")
-                
-                entries.append({
-                    "name": item_path,
-                    "type": item_type,
-                    "size": item_size,
-                })
-            
+
+                entries.append(
+                    {
+                        "name": item_path,
+                        "type": item_type,
+                        "size": item_size,
+                    }
+                )
+
             if not entries:
-                output = f"Directory '{directory_path}' is empty (no files or subdirectories found)."
+                output = (
+                    f"Directory '{directory_path}' is empty (no files or subdirectories found)."
+                )
             else:
                 output = "\n".join(output_lines)
-            
+
             duration_ms = int((time.time() - start_time) * 1000)
             metadata = ToolMetadata(
                 duration_ms=duration_ms,
                 data={
                     "path": str(resolved_path),
                     "entries": entries,
-                }
+                },
             )
-            
+
             result = ToolResult.ok(output)
             return result.with_metadata(metadata)
-            
+
         except PermissionError:
             return ToolResult.fail(f"Permission denied: {directory_path}")
         except Exception as e:
             return ToolResult.fail(f"Error listing directory: {e}")
-    
+
     def _should_ignore(self, name: str, include_hidden: bool, ignore_patterns: List[str]) -> bool:
         """Check if a file/directory should be ignored."""
         # Check hidden files
         if not include_hidden and name.startswith("."):
             return True
-        
+
         # Check ignore patterns (simple glob matching)
         for pattern in ignore_patterns:
             if self._match_pattern(name, pattern):
                 return True
-        
+
         return False
-    
+
     def _match_pattern(self, name: str, pattern: str) -> bool:
         """Simple glob pattern matching with * and ?."""
         import fnmatch
+
         return fnmatch.fnmatch(name, pattern)
-    
+
     def _list_flat(
-        self,
-        path: Path,
-        include_hidden: bool,
-        ignore_patterns: List[str]
+        self, path: Path, include_hidden: bool, ignore_patterns: List[str]
     ) -> List[tuple[str, str, int]]:
         """List directory contents non-recursively."""
         items = []
-        
+
         for entry in path.iterdir():
             if self._should_ignore(entry.name, include_hidden, ignore_patterns):
                 continue
-            
+
             item_type = "dir" if entry.is_dir() else "file"
             item_size = 0 if entry.is_dir() else entry.stat().st_size
             items.append((entry.name, item_type, item_size))
-        
+
         return items
-    
+
     def _list_recursive(
-        self,
-        path: Path,
-        include_hidden: bool,
-        ignore_patterns: List[str],
-        prefix: str = ""
+        self, path: Path, include_hidden: bool, ignore_patterns: List[str], prefix: str = ""
     ) -> List[tuple[str, str, int]]:
         """List directory contents recursively."""
         items = []
-        
+
         for entry in path.iterdir():
             if self._should_ignore(entry.name, include_hidden, ignore_patterns):
                 continue
-            
+
             relative_name = f"{prefix}{entry.name}" if prefix else entry.name
             item_type = "dir" if entry.is_dir() else "file"
             item_size = 0 if entry.is_dir() else entry.stat().st_size
             items.append((relative_name, item_type, item_size))
-            
+
             if entry.is_dir():
                 # Recurse into subdirectory
                 sub_items = self._list_recursive(
-                    entry,
-                    include_hidden,
-                    ignore_patterns,
-                    prefix=f"{relative_name}/"
+                    entry, include_hidden, ignore_patterns, prefix=f"{relative_name}/"
                 )
                 items.extend(sub_items)
-        
+
         return items
-    
+
     @classmethod
     def get_spec(cls) -> dict[str, Any]:
         """Get the tool specification for the LLM."""
@@ -169,24 +166,24 @@ def get_spec(cls) -> dict[str, Any]:
                     "directory_path": {
                         "type": "string",
                         "description": "Path to the directory to list",
-                        "default": "."
+                        "default": ".",
                     },
                     "recursive": {
                         "type": "boolean",
                         "description": "Whether to list recursively",
-                        "default": False
+                        "default": False,
                     },
                     "include_hidden": {
                         "type": "boolean",
                         "description": "Whether to include hidden files/directories",
-                        "default": False
+                        "default": False,
                     },
                     "ignore_patterns": {
                         "type": "array",
                         "items": {"type": "string"},
-                        "description": "List of patterns to ignore"
-                    }
+                        "description": "List of patterns to ignore",
+                    },
                 },
-                "required": []
-            }
+                "required": [],
+            },
         }
diff --git a/src/tools/read_file.py b/src/tools/read_file.py
index 6efd51d..5a446d8 100644
--- a/src/tools/read_file.py
+++ b/src/tools/read_file.py
@@ -2,47 +2,42 @@
 
 from __future__ import annotations
 
-import os
-from pathlib import Path
 from typing import Any, Optional
 
-from .base import BaseTool, ToolResult, ToolMetadata
+from .base import BaseTool, ToolMetadata, ToolResult
 
 
 class ReadFileTool(BaseTool):
     """Tool to read file contents with line numbers."""
-    
+
     name = "read_file"
     description = "Read the contents of a file with line numbers"
-    
+
     def execute(
-        self,
-        file_path: str,
-        offset: int = 0,
-        limit: Optional[int] = None,
-        **kwargs: Any
+        self, file_path: str, offset: int = 0, limit: Optional[int] = None, **kwargs: Any
     ) -> ToolResult:
         """Read file contents.
-        
+
         Args:
             file_path: Path to the file to read
             offset: Line offset to start from (0-based)
             limit: Maximum number of lines to read (None for all)
-            
+
         Returns:
             ToolResult with file contents and metadata
         """
         import time
+
         start_time = time.time()
-        
+
         resolved_path = self.resolve_path(file_path)
-        
+
         if not resolved_path.exists():
             return ToolResult.fail(f"File not found: {file_path}")
-        
+
         if not resolved_path.is_file():
             return ToolResult.fail(f"Not a file: {file_path}")
-        
+
         try:
             content = resolved_path.read_text(encoding="utf-8")
         except UnicodeDecodeError:
@@ -53,10 +48,10 @@ def execute(
                 return ToolResult.fail(f"Cannot read file: {e}")
         except Exception as e:
             return ToolResult.fail(f"Error reading file: {e}")
-        
+
         lines = content.splitlines()
         total_lines = len(lines)
-        
+
         # Handle empty file
         if total_lines == 0 or (total_lines == 1 and lines[0] == ""):
             duration_ms = int((time.time() - start_time) * 1000)
@@ -72,30 +67,30 @@ def execute(
                     "offset": offset,
                     "truncated": False,
                     "empty": True,
-                }
+                },
             )
             result = ToolResult.ok("(empty file)")
             return result.with_metadata(metadata)
-        
+
         # Apply offset and limit
         if offset >= total_lines:
             return ToolResult.fail(f"Offset {offset} exceeds total lines {total_lines}")
-        
+
         end_index = total_lines
         if limit is not None:
             end_index = min(offset + limit, total_lines)
-        
+
         selected_lines = lines[offset:end_index]
         shown_lines = len(selected_lines)
         truncated = end_index < total_lines
-        
+
         # Format with line numbers
         formatted_lines = []
         for i, line in enumerate(selected_lines, start=offset + 1):
             formatted_lines.append(f"L{i}: {line}")
-        
+
         output = "\n".join(formatted_lines)
-        
+
         duration_ms = int((time.time() - start_time) * 1000)
         metadata = ToolMetadata(
             duration_ms=duration_ms,
@@ -109,12 +104,12 @@ def execute(
                 "offset": offset,
                 "truncated": truncated,
                 "empty": False,
-            }
+            },
         )
-        
+
         result = ToolResult.ok(output)
         return result.with_metadata(metadata)
-    
+
     @classmethod
     def get_spec(cls) -> dict[str, Any]:
         """Get the tool specification for the LLM."""
@@ -124,20 +119,17 @@ def get_spec(cls) -> dict[str, Any]:
             "parameters": {
                 "type": "object",
                 "properties": {
-                    "file_path": {
-                        "type": "string",
-                        "description": "Path to the file to read"
-                    },
+                    "file_path": {"type": "string", "description": "Path to the file to read"},
                     "offset": {
                         "type": "integer",
                         "description": "Line offset to start from (0-based)",
-                        "default": 0
+                        "default": 0,
                     },
                     "limit": {
                         "type": "integer",
-                        "description": "Maximum number of lines to read (optional)"
-                    }
+                        "description": "Maximum number of lines to read (optional)",
+                    },
                 },
-                "required": ["file_path"]
-            }
+                "required": ["file_path"],
+            },
         }
diff --git a/src/tools/registry.py b/src/tools/registry.py
index b96409f..deb1ef2 100644
--- a/src/tools/registry.py
+++ b/src/tools/registry.py
@@ -5,12 +5,11 @@
 import hashlib
 import json
 import subprocess
-import sys
 import time
 from concurrent.futures import ThreadPoolExecutor, as_completed
 from dataclasses import dataclass, field
 from pathlib import Path
-from typing import Any, Callable, Dict, List, Optional, Tuple, TYPE_CHECKING
+from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple
 
 from src.tools.base import ToolResult
 from src.tools.specs import get_all_tools
@@ -22,6 +21,7 @@
 @dataclass
 class ExecutorConfig:
     """Configuration for tool execution."""
+
     max_concurrent: int = 4
     default_timeout: float = 120.0
     cache_enabled: bool = True
@@ -31,9 +31,10 @@ class ExecutorConfig:
 @dataclass
 class CachedResult:
     """A cached tool result with timestamp."""
+
     result: ToolResult
     cached_at: float  # timestamp from time.time()
-    
+
     def is_valid(self, ttl: float) -> bool:
         """Check if the cached result is still valid."""
         return (time.time() - self.cached_at) < ttl
@@ -42,16 +43,17 @@ def is_valid(self, ttl: float) -> bool:
 @dataclass
 class ToolStats:
     """Per-tool execution statistics."""
+
     executions: int = 0
     successes: int = 0
     total_ms: int = 0
-    
+
     def success_rate(self) -> float:
         """Get the success rate for this tool."""
         if self.executions == 0:
             return 0.0
         return self.successes / self.executions
-    
+
     def avg_ms(self) -> float:
         """Get average execution time in milliseconds."""
         if self.executions == 0:
@@ -62,25 +64,26 @@ def avg_ms(self) -> float:
 @dataclass
 class ExecutorStats:
     """Aggregate execution statistics."""
+
     total_executions: int = 0
     successful_executions: int = 0
     failed_executions: int = 0
     cache_hits: int = 0
     total_duration_ms: int = 0
     by_tool: Dict[str, ToolStats] = field(default_factory=dict)
-    
+
     def success_rate(self) -> float:
         """Get overall success rate."""
         if self.total_executions == 0:
             return 0.0
         return self.successful_executions / self.total_executions
-    
+
     def cache_hit_rate(self) -> float:
         """Get cache hit rate."""
         if self.total_executions == 0:
             return 0.0
         return self.cache_hits / self.total_executions
-    
+
     def avg_duration_ms(self) -> float:
         """Get average execution duration in milliseconds."""
         if self.total_executions == 0:
@@ -90,18 +93,18 @@ def avg_duration_ms(self) -> float:
 
 class ToolRegistry:
     """Registry for managing and dispatching tool calls.
-    
+
     Tools receive AgentContext for shell execution.
     Includes caching and execution statistics.
     """
-    
+
     def __init__(
         self,
         cwd: Optional[Path] = None,
         config: Optional[ExecutorConfig] = None,
     ):
         """Initialize the registry.
-        
+
         Args:
             cwd: Current working directory for tools (optional, can be set later)
             config: Executor configuration (optional, uses defaults)
@@ -111,7 +114,7 @@ def __init__(
         self._config = config or ExecutorConfig()
         self._cache: Dict[str, CachedResult] = {}
         self._stats = ExecutorStats()
-    
+
     def execute(
         self,
         ctx: "AgentContext",
@@ -119,17 +122,17 @@ def execute(
         arguments: dict[str, Any],
     ) -> ToolResult:
         """Execute a tool by name.
-        
+
         Args:
             ctx: Agent context with shell() method
             name: Tool name
             arguments: Tool arguments
-            
+
         Returns:
             ToolResult from the tool execution
         """
         start_time = time.time()
-        
+
         # Check cache first if enabled
         if self._config.cache_enabled:
             cache_key = self._cache_key(name, arguments)
@@ -138,9 +141,9 @@ def execute(
                 duration_ms = int((time.time() - start_time) * 1000)
                 self._record_execution(name, duration_ms, success=True, cached=True)
                 return cached
-        
-        cwd = Path(ctx.cwd) if hasattr(ctx, 'cwd') else self.cwd
-        
+
+        cwd = Path(ctx.cwd) if hasattr(ctx, "cwd") else self.cwd
+
         try:
             if name == "shell_command":
                 result = self._execute_shell(ctx, cwd, arguments)
@@ -160,21 +163,21 @@ def execute(
                 result = self._execute_update_plan(arguments)
             else:
                 result = ToolResult.fail(f"Unknown tool: {name}")
-                
+
         except Exception as e:
             result = ToolResult.fail(f"Tool {name} failed: {e}")
-        
+
         # Record execution stats
         duration_ms = int((time.time() - start_time) * 1000)
         self._record_execution(name, duration_ms, success=result.success, cached=False)
-        
+
         # Cache successful results
         if self._config.cache_enabled and result.success:
             cache_key = self._cache_key(name, arguments)
             self._cache_result(cache_key, result)
-        
+
         return result
-    
+
     def _execute_shell(
         self,
         ctx: "AgentContext",
@@ -185,18 +188,18 @@ def _execute_shell(
         command = args.get("command", "")
         workdir = args.get("workdir")
         timeout_ms = args.get("timeout_ms", 60000)
-        
+
         if not command:
             return ToolResult.fail("No command provided")
-        
+
         # Resolve working directory
         effective_cwd = cwd
         if workdir:
             wd = Path(workdir)
             effective_cwd = wd if wd.is_absolute() else cwd / wd
-        
+
         timeout_sec = max(1, timeout_ms // 1000)
-        
+
         try:
             result = subprocess.run(
                 ["sh", "-c", command],
@@ -205,19 +208,19 @@ def _execute_shell(
                 text=True,
                 timeout=timeout_sec,
             )
-            
+
             output = result.stdout
             if result.stderr:
                 output += f"\n{result.stderr}"
-            
+
             if result.returncode != 0:
                 output += f"\n[exit code: {result.returncode}]"
-            
+
             return ToolResult(
                 success=result.returncode == 0,
                 output=output.strip(),
             )
-            
+
         except subprocess.TimeoutExpired:
             return ToolResult(
                 success=False,
@@ -225,106 +228,106 @@ def _execute_shell(
             )
         except Exception as e:
             return ToolResult.fail(str(e))
-    
+
     def _execute_read_file(self, cwd: Path, args: dict[str, Any]) -> ToolResult:
         """Read file contents."""
         file_path = args.get("file_path", "")
         offset = args.get("offset", 1)
         limit = args.get("limit", 2000)
-        
+
         if not file_path:
             return ToolResult.fail("No file_path provided")
-        
+
         path = Path(file_path)
         if not path.is_absolute():
             path = cwd / path
-        
+
         if not path.exists():
             return ToolResult.fail(f"File not found: {path}")
-        
+
         if not path.is_file():
             return ToolResult.fail(f"Not a file: {path}")
-        
+
         try:
             with open(path, "r", encoding="utf-8", errors="replace") as f:
                 lines = f.readlines()
-            
+
             # Apply offset and limit (1-indexed)
             start = max(0, offset - 1)
             end = start + limit
             selected = lines[start:end]
-            
+
             # Format with line numbers
             output_lines = []
             for i, line in enumerate(selected, start=start + 1):
                 output_lines.append(f"L{i}: {line.rstrip()}")
-            
+
             output = "\n".join(output_lines)
-            
+
             if len(lines) > end:
                 output += f"\n\n[... {len(lines) - end} more lines ...]"
-            
+
             return ToolResult.ok(output)
-            
+
         except Exception as e:
             return ToolResult.fail(f"Failed to read file: {e}")
-    
+
     def _execute_write_file(self, cwd: Path, args: dict[str, Any]) -> ToolResult:
         """Write content to a file."""
         file_path = args.get("file_path", "")
         content = args.get("content", "")
-        
+
         if not file_path:
             return ToolResult.fail("No file_path provided")
-        
+
         path = Path(file_path)
         if not path.is_absolute():
             path = cwd / path
-        
+
         try:
             # Ensure parent directory exists
             path.parent.mkdir(parents=True, exist_ok=True)
-            
+
             with open(path, "w", encoding="utf-8") as f:
                 f.write(content)
-            
+
             return ToolResult.ok(f"Wrote {len(content)} bytes to {path}")
-            
+
         except Exception as e:
             return ToolResult.fail(f"Failed to write file: {e}")
-    
+
     def _execute_list_dir(self, cwd: Path, args: dict[str, Any]) -> ToolResult:
         """List directory contents."""
         dir_path = args.get("dir_path", ".")
         depth = args.get("depth", 2)
         limit = args.get("limit", 50)
-        
+
         path = Path(dir_path)
         if not path.is_absolute():
             path = cwd / path
-        
+
         if not path.exists():
             return ToolResult.fail(f"Directory not found: {path}")
-        
+
         if not path.is_dir():
             return ToolResult.fail(f"Not a directory: {path}")
-        
+
         try:
             entries = []
             self._list_recursive(path, path, entries, depth, limit)
-            
+
             if not entries:
                 return ToolResult.ok("(empty directory)")
-            
+
             output = "\n".join(entries[:limit])
             if len(entries) > limit:
                 output += f"\n\n[... {len(entries) - limit} more entries ...]"
-            
+
             return ToolResult.ok(output)
-            
+
         except Exception as e:
             return ToolResult.fail(f"Failed to list directory: {e}")
-    
+
     def _list_recursive(
         self,
         base: Path,
@@ -337,27 +340,29 @@ def _list_recursive(
         """Recursively list directory contents."""
         if current_depth > max_depth or len(entries) >= max_entries:
             return
-        
+
         try:
             items = sorted(current.iterdir(), key=lambda x: (not x.is_dir(), x.name.lower()))
-            
+
             for item in items:
                 if len(entries) >= max_entries:
                     break
-                
+
                 rel_path = item.relative_to(base)
-                
+
                 if item.is_dir():
                     entries.append(f"{rel_path}/")
-                    self._list_recursive(base, item, entries, max_depth, max_entries, current_depth + 1)
+                    self._list_recursive(
+                        base, item, entries, max_depth, max_entries, current_depth + 1
+                    )
                 elif item.is_symlink():
                     entries.append(f"{rel_path}@")
                 else:
                     entries.append(str(rel_path))
-                    
+
         except PermissionError:
             pass
-    
+
     def _execute_grep(
         self,
         ctx: "AgentContext",
@@ -369,21 +374,21 @@ def _execute_grep(
         include = args.get("include", "")
         search_path = args.get("path", ".")
         limit = args.get("limit", 100)
-        
+
         if not pattern:
             return ToolResult.fail("No pattern provided")
-        
+
         # Build ripgrep command
         cmd_parts = ["rg", "-l", "--color=never"]
-        
+
         if include:
             cmd_parts.extend(["-g", include])
-        
+
         cmd_parts.append(pattern)
         cmd_parts.append(search_path)
-        
+
         cmd = " ".join(f'"{p}"' if " " in p else p for p in cmd_parts)
-        
+
         try:
             result = subprocess.run(
                 ["sh", "-c", cmd],
@@ -392,52 +397,53 @@ def _execute_grep(
                 text=True,
                 timeout=30,
             )
-            
+
             files = [f for f in result.stdout.strip().split("\n") if f]
-            
+
             if not files:
                 return ToolResult.ok("No matches found")
-            
+
             output = "\n".join(files[:limit])
             if len(files) > limit:
                 output += f"\n\n[... {len(files) - limit} more files ...]"
-            
+
             return ToolResult.ok(output)
-            
+
         except subprocess.TimeoutExpired:
             return ToolResult.fail("Search timed out")
         except Exception as e:
             return ToolResult.fail(f"Search failed: {e}")
-    
+
     def _execute_apply_patch(self, cwd: Path, args: dict[str, Any]) -> ToolResult:
         """Apply a patch to files."""
         patch = args.get("patch", "")
-        
+
         if not patch:
             return ToolResult.fail("No patch provided")
-        
+
         from src.tools.apply_patch import ApplyPatchTool
-        
+
         tool = ApplyPatchTool(cwd)
         return tool.execute(patch=patch)
-    
+
     def _execute_view_image(self, cwd: Path, args: dict[str, Any]) -> ToolResult:
         """View an image file."""
         path = args.get("path", "")
-        
+
         if not path:
             return ToolResult.fail("No path provided")
-        
+
         from src.tools.view_image import view_image
+
         return view_image(path, cwd)
-    
+
     def _execute_update_plan(self, args: dict[str, Any]) -> ToolResult:
         """Update the task plan."""
         steps = args.get("steps", [])
         explanation = args.get("explanation")
-        
+
         self._plan = steps
-        
+
         # Format plan for output
         lines = ["Plan updated:"]
         for i, step in enumerate(steps, 1):
@@ -447,55 +453,56 @@ def _execute_update_plan(self, args: dict[str, Any]) -> ToolResult:
                 "completed": "[x]",
             }.get(step.get("status", "pending"), "[ ]")
             lines.append(f"  {status_icon} {i}. {step.get('description', '')}")
-        
+
         if explanation:
             lines.append(f"\nReason: {explanation}")
-        
+
         return ToolResult.ok("\n".join(lines))
-    
+
     # -------------------------------------------------------------------------
     # Caching methods
     # -------------------------------------------------------------------------
-    
+
     def _cache_key(self, name: str, arguments: dict[str, Any]) -> str:
         """Generate a cache key for a tool call."""
         args_json = json.dumps(arguments, sort_keys=True, default=str)
         content = f"{name}:{args_json}"
         return hashlib.sha256(content.encode()).hexdigest()[:32]
-    
+
     def _get_cached(self, key: str) -> Optional[ToolResult]:
         """Get a cached result if valid."""
         cached = self._cache.get(key)
         if cached is not None and cached.is_valid(self._config.cache_ttl):
             return cached.result
         return None
-    
+
     def _cache_result(self, key: str, result: ToolResult) -> None:
         """Cache a tool result."""
         self._cache[key] = CachedResult(result=result, cached_at=time.time())
-        
+
         # Evict old entries if cache is too large
         if len(self._cache) > 1000:
             self._evict_expired_cache()
-    
+
     def _evict_expired_cache(self) -> None:
         """Remove expired entries from cache."""
         now = time.time()
         expired_keys = [
-            key for key, cached in self._cache.items()
+            key
+            for key, cached in self._cache.items()
             if not cached.is_valid(self._config.cache_ttl)
         ]
         for key in expired_keys:
             del self._cache[key]
-    
+
     def clear_cache(self) -> None:
         """Clear the entire cache."""
         self._cache.clear()
-    
+
     # -------------------------------------------------------------------------
     # Statistics methods
     # -------------------------------------------------------------------------
-    
+
     def _record_execution(
         self,
         tool_name: str,
@@ -506,91 +513,93 @@ def _record_execution(
         """Record execution statistics."""
         self._stats.total_executions += 1
         self._stats.total_duration_ms += duration_ms
-        
+
         if success:
             self._stats.successful_executions += 1
         else:
             self._stats.failed_executions += 1
-        
+
         if cached:
             self._stats.cache_hits += 1
-        
+
         # Per-tool stats
         if tool_name not in self._stats.by_tool:
             self._stats.by_tool[tool_name] = ToolStats()
-        
+
         tool_stats = self._stats.by_tool[tool_name]
         tool_stats.executions += 1
         tool_stats.total_ms += duration_ms
         if success:
             tool_stats.successes += 1
-    
+
     def stats(self) -> ExecutorStats:
         """Get execution statistics."""
         return self._stats
-    
+
     # -------------------------------------------------------------------------
     # Batch execution
     # -------------------------------------------------------------------------
-    
+
     def execute_batch(
         self,
         ctx: "AgentContext",
         calls: List[Tuple[str, dict]],
     ) -> List[ToolResult]:
         """Execute multiple tool calls in parallel.
-        
+
         Args:
             ctx: Agent context with shell() method
             calls: List of (tool_name, arguments) tuples
-            
+
         Returns:
             List of ToolResults in the same order as input calls
         """
         if not calls:
             return []
-        
+
         # For single call, just execute directly
         if len(calls) == 1:
             name, args = calls[0]
             return [self.execute(ctx, name, args)]
-        
+
         # Execute in parallel using ThreadPoolExecutor
         results: List[Optional[ToolResult]] = [None] * len(calls)
-        
+
         with ThreadPoolExecutor(max_workers=self._config.max_concurrent) as executor:
             future_to_index = {
                 executor.submit(self.execute, ctx, name, args): i
                 for i, (name, args) in enumerate(calls)
             }
-            
+
             for future in as_completed(future_to_index):
                 index = future_to_index[future]
                 try:
                     results[index] = future.result()
                 except Exception as e:
                     results[index] = ToolResult.fail(f"Batch execution failed: {e}")
-        
+
         # Ensure all results are filled (shouldn't happen, but just in case)
         return [r if r is not None else ToolResult.fail("No result") for r in results]
-    
+
     def get_plan(self) -> list[dict[str, str]]:
         """Get the current plan."""
         return self._plan.copy()
-    
+
     def get_tools_for_llm(self) -> list:
         """Get tool specifications formatted for the LLM.
-        
-        Returns tools in OpenAI-compatible format for litellm.
+
+        Returns tools in OpenAI-compatible format.
         """
         specs = get_all_tools()
         tools = []
-        
+
         for spec in specs:
-            tools.append({
-                "name": spec["name"],
-                "description": spec.get("description", ""),
-                "parameters": spec.get("parameters", {}),
-            })
-        
+            tools.append(
+                {
+                    "name": spec["name"],
+                    "description": spec.get("description", ""),
+                    "parameters": spec.get("parameters", {}),
+                }
+            )
+
         return tools
diff --git a/src/tools/search_files.py b/src/tools/search_files.py
index 658181f..e29dea7 100644
--- a/src/tools/search_files.py
+++ b/src/tools/search_files.py
@@ -5,49 +5,56 @@
 import fnmatch
 import os
 from pathlib import Path
-from typing import Any, Optional, List
+from typing import Any, Optional
 
-from .base import BaseTool, ToolResult, ToolMetadata
+from .base import BaseTool, ToolMetadata, ToolResult
 
 
 class SearchFilesTool(BaseTool):
     """Tool to search for files using glob patterns."""
-    
+
     name = "search_files"
     description = "Search for files matching a glob pattern"
-    
+
     # Default directories to skip
-    DEFAULT_SKIP_DIRS = {".git", "node_modules", "target", "__pycache__", ".venv", "venv", ".tox", "dist", "build"}
-    
+    DEFAULT_SKIP_DIRS = {
+        ".git",
+        "node_modules",
+        "target",
+        "__pycache__",
+        ".venv",
+        "venv",
+        ".tox",
+        "dist",
+        "build",
+    }
+
     def execute(
-        self,
-        pattern: str,
-        path: str = ".",
-        content_pattern: Optional[str] = None,
-        **kwargs: Any
+        self, pattern: str, path: str = ".", content_pattern: Optional[str] = None, **kwargs: Any
     ) -> ToolResult:
         """Search for files matching a pattern.
-        
+
         Args:
             pattern: Glob pattern to match files (e.g., "*.py", "**/*.js")
             path: Base path to search from
             content_pattern: Optional regex pattern to match file contents
-            
+
         Returns:
             ToolResult with list of matching file paths
         """
-        import time
         import re
+        import time
+
         start_time = time.time()
-        
+
         resolved_path = self.resolve_path(path)
-        
+
         if not resolved_path.exists():
             return ToolResult.fail(f"Path not found: {path}")
-        
+
         if not resolved_path.is_dir():
             return ToolResult.fail(f"Not a directory: {path}")
-        
+
         # Compile content pattern if provided
         content_regex = None
         if content_pattern:
@@ -55,47 +62,46 @@ def execute(
                 content_regex = re.compile(content_pattern)
             except re.error as e:
                 return ToolResult.fail(f"Invalid content pattern: {e}")
-        
+
         matches = []
-        
+
         try:
             # Walk the directory tree
             for root, dirs, files in os.walk(resolved_path):
                 # Skip hidden directories and default skip dirs
                 dirs[:] = [
-                    d for d in dirs
-                    if not d.startswith(".") and d not in self.DEFAULT_SKIP_DIRS
+                    d for d in dirs if not d.startswith(".") and d not in self.DEFAULT_SKIP_DIRS
                 ]
-                
+
                 root_path = Path(root)
-                
+
                 for filename in files:
                     # Skip hidden files
                     if filename.startswith("."):
                         continue
-                    
+
                     file_path = root_path / filename
                     relative_path = file_path.relative_to(resolved_path)
-                    
+
                     # Check glob pattern match
                     if not self._match_glob(str(relative_path), pattern):
                         continue
-                    
+
                     # Check content pattern if provided
                     if content_regex:
                         if not self._match_content(file_path, content_regex):
                             continue
-                    
+
                     matches.append(str(relative_path))
-            
+
             # Sort matches
             matches.sort()
-            
+
             if not matches:
                 output = f"No files found matching pattern '{pattern}'"
             else:
                 output = "\n".join(matches)
-            
+
             duration_ms = int((time.time() - start_time) * 1000)
             metadata = ToolMetadata(
                 duration_ms=duration_ms,
@@ -104,20 +110,20 @@ def execute(
                     "base_path": str(resolved_path),
                     "matches": matches,
                     "count": len(matches),
-                }
+                },
             )
-            
+
             result = ToolResult.ok(output)
             return result.with_metadata(metadata)
-            
+
         except PermissionError:
             return ToolResult.fail(f"Permission denied while searching: {path}")
         except Exception as e:
             return ToolResult.fail(f"Error searching files: {e}")
-    
+
     def _match_glob(self, filepath: str, pattern: str) -> bool:
         """Match a filepath against a glob pattern.
-        
+
         Supports:
         - * matches any characters except path separator
         - ? matches exactly one character
@@ -126,7 +132,7 @@ def _match_glob(self, filepath: str, pattern: str) -> bool:
         # Normalize path separators
         filepath = filepath.replace("\\", "/")
         pattern = pattern.replace("\\", "/")
-        
+
         # Handle ** pattern (recursive matching)
         if "**" in pattern:
             # Split pattern at **
@@ -135,19 +141,19 @@ def _match_glob(self, filepath: str, pattern: str) -> bool:
                 prefix, suffix = parts
                 prefix = prefix.rstrip("/")
                 suffix = suffix.lstrip("/")
-                
+
                 # Check prefix if it exists
                 if prefix and not filepath.startswith(prefix):
                     return False
-                
+
                 # Check suffix against any part of the path
                 if suffix:
                     return fnmatch.fnmatch(filepath, f"*{suffix}")
                 return True
-        
+
         # Simple glob matching for * and ?
         return fnmatch.fnmatch(filepath, pattern)
-    
+
     def _match_content(self, file_path: Path, regex: Any) -> bool:
         """Check if file content matches the regex pattern."""
         try:
@@ -155,7 +161,7 @@ def _match_content(self, file_path: Path, regex: Any) -> bool:
             return bool(regex.search(content))
         except Exception:
             return False
-    
+
     @classmethod
     def get_spec(cls) -> dict[str, Any]:
         """Get the tool specification for the LLM."""
@@ -167,18 +173,18 @@ def get_spec(cls) -> dict[str, Any]:
                 "properties": {
                     "pattern": {
                         "type": "string",
-                        "description": "Glob pattern to match files (e.g., '*.py', '**/*.js')"
+                        "description": "Glob pattern to match files (e.g., '*.py', '**/*.js')",
                     },
                     "path": {
                         "type": "string",
                         "description": "Base path to search from",
-                        "default": "."
+                        "default": ".",
                     },
                     "content_pattern": {
                         "type": "string",
-                        "description": "Optional regex pattern to match file contents"
-                    }
+                        "description": "Optional regex pattern to match file contents",
+                    },
                 },
-                "required": ["pattern"]
-            }
+                "required": ["pattern"],
+            },
         }
diff --git a/src/tools/shell.py b/src/tools/shell.py
index b1edf3d..8240826 100644
--- a/src/tools/shell.py
+++ b/src/tools/shell.py
@@ -5,38 +5,37 @@
 import os
 import platform
 import subprocess
-import sys
 from pathlib import Path
-from typing import Any, Optional
+from typing import Optional
 
 from src.tools.base import BaseTool, ToolResult
 
 
 class ShellCommandTool(BaseTool):
     """Tool for executing shell commands."""
-    
+
     name = "shell_command"
     description = "Runs a shell command and returns its output."
-    
+
     # Default timeout in milliseconds
     DEFAULT_TIMEOUT_MS = 30000
-    
+
     # Maximum output size
     MAX_OUTPUT_SIZE = 100000  # 100KB
-    
+
     def __init__(self, cwd: Path, timeout_ms: int = DEFAULT_TIMEOUT_MS):
         """Initialize the shell command tool.
-        
+
         Args:
             cwd: Working directory
             timeout_ms: Default timeout in milliseconds
         """
         super().__init__(cwd)
         self.default_timeout_ms = timeout_ms
-    
+
     def _get_shell(self) -> tuple[str, list[str]]:
         """Get the shell and shell arguments for the current platform.
-        
+
         Returns:
             Tuple of (shell executable, shell arguments)
         """
@@ -47,7 +46,7 @@ def _get_shell(self) -> tuple[str, list[str]]:
             # Use bash on Unix with login shell
             shell = os.environ.get("SHELL", "/bin/bash")
             return shell, ["-lc"]
-    
+
     def execute(
         self,
         command: str,
@@ -55,12 +54,12 @@ def execute(
         timeout_ms: Optional[int] = None,
     ) -> ToolResult:
         """Execute a shell command.
-        
+
         Args:
             command: The command to execute
             workdir: Working directory (defaults to cwd)
             timeout_ms: Timeout in milliseconds
-            
+
         Returns:
             ToolResult with command output
         """
@@ -69,16 +68,16 @@ def execute(
             work_path = self.resolve_path(workdir)
         else:
             work_path = self.cwd
-        
+
         if not work_path.exists():
             return ToolResult.fail(f"Working directory does not exist: {work_path}")
-        
+
         # Get timeout
         timeout_s = (timeout_ms or self.default_timeout_ms) / 1000
-        
+
         # Build command
         shell, shell_args = self._get_shell()
-        
+
         try:
             # Run the command
             result = subprocess.run(
@@ -89,40 +88,46 @@ def execute(
                 timeout=timeout_s,
                 env={**os.environ, "TERM": "dumb"},  # Disable color codes
             )
-            
+
             # Combine stdout and stderr
             output_parts = []
-            
+
             if result.stdout:
                 stdout = result.stdout
                 if len(stdout) > self.MAX_OUTPUT_SIZE:
-                    stdout = stdout[:self.MAX_OUTPUT_SIZE] + "\n... (output truncated)"
+                    stdout = stdout[: self.MAX_OUTPUT_SIZE] + "\n... (output truncated)"
                 output_parts.append(stdout)
-            
+
             if result.stderr:
                 stderr = result.stderr
                 if len(stderr) > self.MAX_OUTPUT_SIZE:
-                    stderr = stderr[:self.MAX_OUTPUT_SIZE] + "\n... (stderr truncated)"
+                    stderr = stderr[: self.MAX_OUTPUT_SIZE] + "\n... (stderr truncated)"
                 if output_parts:
                     output_parts.append(f"\nstderr:\n{stderr}")
                 else:
                     output_parts.append(stderr)
-            
+
             output = "".join(output_parts).strip()
-            
+
             # Add exit code info if non-zero
             if result.returncode != 0:
-                output = f"{output}\n\nExit code: {result.returncode}" if output else f"Exit code: {result.returncode}"
-            
+                output = (
+                    f"{output}\n\nExit code: {result.returncode}"
+                    if output
+                    else f"Exit code: {result.returncode}"
+                )
+
             if not output:
                 output = "(no output)"
-            
+
             # Return result based on exit code
             if result.returncode == 0:
                 return ToolResult.ok(output)
             else:
-                return ToolResult.ok(output)  # Still "ok" - we return the output even on non-zero exit
-        
+                return ToolResult.ok(
+                    output
+                )  # Still "ok" - we return the output even on non-zero exit
+
         except subprocess.TimeoutExpired:
             return ToolResult.fail(
                 f"Command timed out after {timeout_s}s",
diff --git a/src/tools/specs.py b/src/tools/specs.py
index fa4381d..69140e9 100644
--- a/src/tools/specs.py
+++ b/src/tools/specs.py
@@ -243,7 +243,7 @@
 
 def get_all_tools() -> list[dict[str, Any]]:
     """Get all tool specifications as a list.
-    
+
     Returns:
         List of tool specification dicts
     """
@@ -252,10 +252,10 @@ def get_all_tools() -> list[dict[str, Any]]:
 
 def get_tool_spec(name: str) -> dict[str, Any] | None:
     """Get a specific tool specification.
-    
+
     Args:
         name: Name of the tool
-        
+
     Returns:
         Tool specification dict or None if not found
     """
diff --git a/src/tools/view_image.py b/src/tools/view_image.py
index 65398e5..ac65d48 100644
--- a/src/tools/view_image.py
+++ b/src/tools/view_image.py
@@ -10,41 +10,41 @@
 from pathlib import Path
 from typing import Any, Dict, Optional, Tuple
 
-from src.tools.base import ToolResult
 from src.images.loader import load_image_as_data_uri, make_image_content
+from src.tools.base import ToolResult
 
 
 def get_image_dimensions(data: bytes) -> Optional[Tuple[int, int]]:
     """Parse image dimensions from raw bytes without PIL."""
     if len(data) < 24:
         return None
-    
+
     # PNG: signature 0x89 PNG, dimensions at offset 16-23
-    if data[:4] == b'\x89PNG' and len(data) >= 24:
-        width = int.from_bytes(data[16:20], 'big')
-        height = int.from_bytes(data[20:24], 'big')
+    if data[:4] == b"\x89PNG" and len(data) >= 24:
+        width = int.from_bytes(data[16:20], "big")
+        height = int.from_bytes(data[20:24], "big")
         return (width, height)
-    
+
     # JPEG: signature 0xFF 0xD8 0xFF, parse SOF markers
-    if data[:3] == b'\xff\xd8\xff':
+    if data[:3] == b"\xff\xd8\xff":
         return _parse_jpeg_dimensions(data)
-    
+
     # GIF: signature GIF87a or GIF89a, dimensions at offset 6-9 (little-endian)
-    if data[:6] in (b'GIF87a', b'GIF89a') and len(data) >= 10:
-        width = int.from_bytes(data[6:8], 'little')
-        height = int.from_bytes(data[8:10], 'little')
+    if data[:6] in (b"GIF87a", b"GIF89a") and len(data) >= 10:
+        width = int.from_bytes(data[6:8], "little")
+        height = int.from_bytes(data[8:10], "little")
         return (width, height)
-    
+
     # BMP: signature BM, dimensions at offset 18-25 (little-endian, signed)
-    if data[:2] == b'BM' and len(data) >= 26:
-        width = abs(int.from_bytes(data[18:22], 'little', signed=True))
-        height = abs(int.from_bytes(data[22:26], 'little', signed=True))
+    if data[:2] == b"BM" and len(data) >= 26:
+        width = abs(int.from_bytes(data[18:22], "little", signed=True))
+        height = abs(int.from_bytes(data[22:26], "little", signed=True))
         return (width, height)
-    
+
     # WebP: RIFF....WEBP
-    if len(data) >= 30 and data[:4] == b'RIFF' and data[8:12] == b'WEBP':
+    if len(data) >= 30 and data[:4] == b"RIFF" and data[8:12] == b"WEBP":
         return _parse_webp_dimensions(data)
-    
+
     return None
 
 
@@ -55,44 +55,43 @@ def _parse_jpeg_dimensions(data: bytes) -> Optional[Tuple[int, int]]:
         if data[i] != 0xFF:
             i += 1
             continue
-        
+
         marker = data[i + 1]
-        
+
         # SOF markers: C0, C1, C2, C3, C5, C6, C7, C9, CA, CB, CD, CE, CF
-        if marker in (0xC0, 0xC1, 0xC2, 0xC3, 0xC5, 0xC6, 0xC7, 
-                      0xC9, 0xCA, 0xCB, 0xCD, 0xCE, 0xCF):
+        if marker in (0xC0, 0xC1, 0xC2, 0xC3, 0xC5, 0xC6, 0xC7, 0xC9, 0xCA, 0xCB, 0xCD, 0xCE, 0xCF):
             if i + 9 < len(data):
-                height = int.from_bytes(data[i+5:i+7], 'big')
-                width = int.from_bytes(data[i+7:i+9], 'big')
+                height = int.from_bytes(data[i + 5 : i + 7], "big")
+                width = int.from_bytes(data[i + 7 : i + 9], "big")
                 return (width, height)
-        
+
         # Skip to next marker
         if marker in (0xFF, 0x00, 0x01) or 0xD0 <= marker <= 0xD9:
             i += 2
         elif i + 3 < len(data):
-            length = int.from_bytes(data[i+2:i+4], 'big')
+            length = int.from_bytes(data[i + 2 : i + 4], "big")
             i += 2 + length
         else:
             break
-    
+
     return None
 
 
 def _parse_webp_dimensions(data: bytes) -> Optional[Tuple[int, int]]:
     """Parse WebP dimensions (VP8 and VP8L formats)."""
     # VP8 format
-    if data[12:16] == b'VP8 ' and len(data) >= 30:
-        width = (int.from_bytes(data[26:28], 'little') & 0x3FFF)
-        height = (int.from_bytes(data[28:30], 'little') & 0x3FFF)
+    if data[12:16] == b"VP8 " and len(data) >= 30:
+        width = int.from_bytes(data[26:28], "little") & 0x3FFF
+        height = int.from_bytes(data[28:30], "little") & 0x3FFF
         return (width, height)
-    
+
     # VP8L format
-    if data[12:16] == b'VP8L' and len(data) >= 25:
+    if data[12:16] == b"VP8L" and len(data) >= 25:
         b0, b1, b2, b3 = data[21], data[22], data[23], data[24]
         width = ((b1 & 0x3F) << 8 | b0) + 1
         height = ((b3 & 0x0F) << 10 | b2 << 2 | (b1 >> 6)) + 1
         return (width, height)
-    
+
     return None
 
 
@@ -102,11 +101,11 @@ def view_image(
 ) -> ToolResult:
     """
     Load a local image and return it for the model context.
-    
+
     Args:
         file_path: Path to the image file (relative or absolute)
         cwd: Current working directory
-        
+
     Returns:
         ToolResult with success status and optional image content
     """
@@ -115,20 +114,20 @@ def view_image(
     if not path.is_absolute():
         path = cwd / path
     path = path.resolve()
-    
+
     # Check if file exists
     if not path.exists():
         return ToolResult(
             success=False,
             output=f"Image not found: {path}",
         )
-    
+
     if not path.is_file():
         return ToolResult(
             success=False,
             output=f"Not a file: {path}",
         )
-    
+
     # Check if it's an image file
     valid_extensions = {".png", ".jpg", ".jpeg", ".gif", ".webp", ".bmp"}
     if path.suffix.lower() not in valid_extensions:
@@ -136,31 +135,31 @@ def view_image(
             success=False,
             output=f"Not a valid image file: {path} (supported: {', '.join(valid_extensions)})",
         )
-    
+
     try:
         # Read raw bytes first to get dimensions
         image_data = path.read_bytes()
         dimensions = get_image_dimensions(image_data)
-        
+
         # Load and encode the image
         data_uri = load_image_as_data_uri(path)
-        
+
         # Create content block for injection
         image_content = make_image_content(data_uri)
-        
+
         # Build output message with dimensions if available
         if dimensions:
             width, height = dimensions
             output_msg = f"attached local image: {path.name} ({width}x{height})"
         else:
             output_msg = f"attached local image: {path.name}"
-        
+
         return ToolResult(
             success=True,
             output=output_msg,
             inject_content=image_content,
         )
-        
+
     except FileNotFoundError:
         return ToolResult(
             success=False,
diff --git a/src/tools/write_file.py b/src/tools/write_file.py
index 6a0bb1a..8aa9696 100644
--- a/src/tools/write_file.py
+++ b/src/tools/write_file.py
@@ -2,54 +2,48 @@
 
 from __future__ import annotations
 
-import os
-from pathlib import Path
 from typing import Any
 
-from .base import BaseTool, ToolResult, ToolMetadata
+from .base import BaseTool, ToolMetadata, ToolResult
 
 
 class WriteFileTool(BaseTool):
     """Tool to write content to a file."""
-    
+
     name = "write_file"
     description = "Write content to a file, creating parent directories if needed"
-    
-    def execute(
-        self,
-        file_path: str,
-        content: str,
-        **kwargs: Any
-    ) -> ToolResult:
+
+    def execute(self, file_path: str, content: str, **kwargs: Any) -> ToolResult:
         """Write content to a file.
-        
+
         Args:
             file_path: Path to the file to write
             content: Content to write to the file
-            
+
         Returns:
             ToolResult with write status and metadata
         """
         import time
+
         start_time = time.time()
-        
+
         resolved_path = self.resolve_path(file_path)
-        
+
         try:
             # Create parent directories if they don't exist
             resolved_path.parent.mkdir(parents=True, exist_ok=True)
-            
+
             # Write the content
             resolved_path.write_text(content, encoding="utf-8")
-            
+
             # Get file stats
             file_size = resolved_path.stat().st_size
-            
+
             # Create content preview (max 500 chars)
             content_preview = content[:500]
             if len(content) > 500:
                 content_preview += "..."
-            
+
             duration_ms = int((time.time() - start_time) * 1000)
             metadata = ToolMetadata(
                 duration_ms=duration_ms,
@@ -60,17 +54,17 @@ def execute(
                     "extension": resolved_path.suffix,
                     "size": file_size,
                     "content_preview": content_preview,
-                }
+                },
             )
-            
+
             result = ToolResult.ok(f"Successfully wrote {file_size} bytes to {file_path}")
             return result.with_metadata(metadata)
-            
+
         except PermissionError:
             return ToolResult.fail(f"Permission denied: {file_path}")
         except Exception as e:
             return ToolResult.fail(f"Error writing file: {e}")
-    
+
     @classmethod
     def get_spec(cls) -> dict[str, Any]:
         """Get the tool specification for the LLM."""
@@ -80,15 +74,9 @@ def get_spec(cls) -> dict[str, Any]:
             "parameters": {
                 "type": "object",
                 "properties": {
-                    "file_path": {
-                        "type": "string",
-                        "description": "Path to the file to write"
-                    },
-                    "content": {
-                        "type": "string",
-                        "description": "Content to write to the file"
-                    }
+                    "file_path": {"type": "string", "description": "Path to the file to write"},
+                    "content": {"type": "string", "description": "Content to write to the file"},
                 },
-                "required": ["file_path", "content"]
-            }
+                "required": ["file_path", "content"],
+            },
         }
diff --git a/src/utils/__init__.py b/src/utils/__init__.py
index fe7c9f6..62930e0 100644
--- a/src/utils/__init__.py
+++ b/src/utils/__init__.py
@@ -1,33 +1,30 @@
 """Utility functions."""
 
 # Legacy API (simple interface)
+# Full fabric-core API
 from src.utils.truncate import (
-    limit_output,
-    limit_lines,
-    smart_truncate,
-    limit_output_bytes,
-    truncate_output,
-    estimate_tokens,
     APPROX_BYTES_PER_TOKEN,
     DEFAULT_MAX_TOKENS,
-)
-
-# Full fabric-core API
-from src.utils.truncate import (
-    TruncateStrategy,
-    TruncateConfig,
-    TruncateResult,
     TokenEstimator,
     TruncateBuilder,
+    TruncateConfig,
+    TruncateResult,
+    TruncateStrategy,
+    estimate_tokens,
+    limit_lines,
+    limit_output,
+    limit_output_bytes,
+    smart_truncate,
     truncate,
-    truncate_file,
     truncate_batch,
+    truncate_file,
+    truncate_output,
 )
 
 __all__ = [
     # Legacy
     "limit_output",
-    "limit_lines", 
+    "limit_lines",
     "smart_truncate",
     "limit_output_bytes",
     "truncate_output",
diff --git a/src/utils/files.py b/src/utils/files.py
index 1516d55..141e918 100644
--- a/src/utils/files.py
+++ b/src/utils/files.py
@@ -2,37 +2,36 @@
 
 from __future__ import annotations
 
-import os
 from pathlib import Path
 from typing import Union
 
 
 def resolve_path(path: Union[str, Path], cwd: Optional[Path] = None) -> Path:
     """Resolve a path relative to CWD.
-    
+
     Args:
         path: Path to resolve
         cwd: Current working directory (defaults to os.getcwd())
-        
+
     Returns:
         Resolved absolute path
     """
     if cwd is None:
         cwd = Path.cwd()
-    
+
     p = Path(path)
     if p.is_absolute():
         return p.resolve()
-    
+
     return (cwd / p).resolve()
 
 
 def is_binary_file(path: Path) -> bool:
     """Check if a file is binary.
-    
+
     Args:
         path: Path to file
-        
+
     Returns:
         True if file appears to be binary
     """
@@ -46,28 +45,28 @@ def is_binary_file(path: Path) -> bool:
 
 def read_file_safely(path: Path, max_size: int = 10 * 1024 * 1024) -> str:
     """Read a file safely with size limit.
-    
+
     Args:
         path: Path to file
         max_size: Maximum size in bytes
-        
+
     Returns:
         File content
-        
+
     Raises:
         ValueError: If file is too large or binary
     """
     if not path.exists():
         raise FileNotFoundError(f"File not found: {path}")
-    
+
     if not path.is_file():
         raise ValueError(f"Not a file: {path}")
-    
+
     size = path.stat().st_size
     if size > max_size:
         raise ValueError(f"File too large: {size} bytes (max {max_size})")
-    
+
     if is_binary_file(path):
         raise ValueError("File appears to be binary")
-    
+
     return path.read_text(encoding="utf-8", errors="replace")
diff --git a/src/utils/tokens.py b/src/utils/tokens.py
index d57ab4d..0132969 100644
--- a/src/utils/tokens.py
+++ b/src/utils/tokens.py
@@ -5,17 +5,17 @@
 
 def estimate_tokens(text: str) -> int:
     """Estimate the number of tokens in a string.
-    
+
     This uses a simple heuristic (4 chars per token) which is commonly used
     as a rough approximation for English text when a tokenizer isn't available.
-    
+
     Args:
         text: Input text
-        
+
     Returns:
         Estimated token count
     """
     if not text:
         return 0
-        
+
     return len(text) // 4
diff --git a/src/utils/truncate.py b/src/utils/truncate.py
index 717219f..89e6c09 100644
--- a/src/utils/truncate.py
+++ b/src/utils/truncate.py
@@ -17,7 +17,7 @@
 
 class TruncateStrategy(Enum):
     """Truncation strategy."""
-    
+
     # Truncate from the end (keep beginning)
     END = "end"
     # Truncate from the beginning (keep end)
@@ -33,7 +33,7 @@ class TruncateStrategy(Enum):
 @dataclass
 class TruncateConfig:
     """Truncation configuration."""
-    
+
     # Maximum length in characters
     max_chars: int = 10000
     # Maximum length in tokens (approximate)
@@ -59,7 +59,7 @@ class TruncateConfig:
 @dataclass
 class TruncateResult:
     """Truncation result."""
-    
+
     # Resulting text
     text: str
     # Whether truncation occurred
@@ -74,13 +74,13 @@ class TruncateResult:
     final_tokens: int
     # Strategy that was used
     strategy_used: TruncateStrategy
-    
+
     def reduction_percent(self) -> float:
         """Get reduction percentage."""
         if self.original_chars == 0:
             return 0.0
         return (1.0 - (self.final_chars / self.original_chars)) * 100.0
-    
+
     def is_ok(self) -> bool:
         """Check if truncation was successful."""
         return len(self.text) > 0
@@ -89,62 +89,62 @@ def is_ok(self) -> bool:
 def estimate_tokens(text: str) -> int:
     """
     Estimate token count (rough approximation).
-    
+
     Rough estimate: ~4 chars per token for English.
     This is a simplification - real tokenization varies by model.
     """
     char_count = len(text)
     word_count = len(text.split())
-    
+
     # Average of character-based and word-based estimates
     return (char_count // 4 + word_count) // 2
 
 
 class TokenEstimator:
     """More accurate token estimation with caching."""
-    
+
     def __init__(self, chars_per_token: float = 4.0):
         """Create a new estimator."""
         self._cache: Dict[str, int] = {}
         self._chars_per_token = chars_per_token
-    
+
     @classmethod
     def with_ratio(cls, chars_per_token: float) -> "TokenEstimator":
         """Create with custom ratio."""
         return cls(chars_per_token=chars_per_token)
-    
+
     def estimate(self, text: str) -> int:
         """Estimate tokens for text."""
         # Create hash for caching
         text_hash = hashlib.md5(text.encode(), usedforsecurity=False).hexdigest()
-        
+
         if text_hash in self._cache:
             return self._cache[text_hash]
-        
+
         estimate = self._calculate(text)
-        
+
         # Cache if not too large
         if len(self._cache) < 10000:
             self._cache[text_hash] = estimate
-        
+
         return estimate
-    
+
     def _calculate(self, text: str) -> int:
         """Calculate token estimate."""
         char_count = len(text)
         return int((char_count / self._chars_per_token) + 0.5)  # ceil-like rounding
-    
+
     def calibrate(self, samples: List[Tuple[str, int]]) -> None:
         """Calibrate ratio based on actual token counts."""
         if not samples:
             return
-        
+
         total_chars = sum(len(text) for text, _ in samples)
         total_tokens = sum(tokens for _, tokens in samples)
-        
+
         if total_tokens > 0:
             self._chars_per_token = total_chars / total_tokens
-    
+
     def clear_cache(self) -> None:
         """Clear cache."""
         self._cache.clear()
@@ -154,12 +154,12 @@ def truncate(text: str, config: TruncateConfig) -> TruncateResult:
     """Truncate text according to configuration."""
     original_len = len(text)
     original_tokens = estimate_tokens(text)
-    
+
     # Check if truncation needed
     needs_truncation = original_len > config.max_chars
     if config.max_tokens is not None:
         needs_truncation = needs_truncation or (original_tokens > config.max_tokens)
-    
+
     if not needs_truncation:
         return TruncateResult(
             text=text,
@@ -170,7 +170,7 @@ def truncate(text: str, config: TruncateConfig) -> TruncateResult:
             final_tokens=original_tokens,
             strategy_used=config.strategy,
         )
-    
+
     # Apply truncation strategy
     if config.strategy == TruncateStrategy.END:
         truncated_text = truncate_end(text, config)
@@ -184,10 +184,10 @@ def truncate(text: str, config: TruncateConfig) -> TruncateResult:
         truncated_text = _truncate_summarize(text, config)
     else:
         truncated_text = truncate_end(text, config)
-    
+
     final_len = len(truncated_text)
     final_tokens = estimate_tokens(truncated_text)
-    
+
     return TruncateResult(
         text=truncated_text,
         truncated=True,
@@ -202,12 +202,12 @@ def truncate(text: str, config: TruncateConfig) -> TruncateResult:
 def truncate_end(text: str, config: TruncateConfig) -> str:
     """Simple truncation from end."""
     target_len = max(0, config.max_chars - len(config.suffix))
-    
+
     if len(text) <= target_len:
         return text
-    
+
     end = target_len
-    
+
     # Align to boundary
     if config.sentence_boundary:
         end = find_sentence_boundary(text, end, forward=False)
@@ -215,19 +215,19 @@ def truncate_end(text: str, config: TruncateConfig) -> str:
         end = find_line_boundary(text, end, forward=False)
     elif config.word_boundary:
         end = find_word_boundary(text, end, forward=False)
-    
+
     return f"{text[:end]}{config.suffix}"
 
 
 def truncate_start(text: str, config: TruncateConfig) -> str:
     """Truncation from start (keep end)."""
     target_len = max(0, config.max_chars - len(config.prefix))
-    
+
     if len(text) <= target_len:
         return text
-    
+
     start = len(text) - target_len
-    
+
     # Align to boundary
     if config.sentence_boundary:
         start = find_sentence_boundary(text, start, forward=True)
@@ -235,7 +235,7 @@ def truncate_start(text: str, config: TruncateConfig) -> str:
         start = find_line_boundary(text, start, forward=True)
     elif config.word_boundary:
         start = find_word_boundary(text, start, forward=True)
-    
+
     return f"{config.prefix}{text[start:]}"
 
 
@@ -243,14 +243,14 @@ def truncate_middle(text: str, config: TruncateConfig) -> str:
     """Truncation from middle (keep both ends)."""
     separator = "\n\n[...content omitted...]\n\n"
     target_len = max(0, config.max_chars - len(separator))
-    
+
     if len(text) <= target_len:
         return text
-    
+
     keep_each = target_len // 2
     start_end = _find_boundary(text, keep_each, forward=True, config=config)
     end_start = len(text) - _find_boundary(text, keep_each, forward=False, config=config)
-    
+
     return f"{text[:start_end]}{separator}{text[end_start:]}"
 
 
@@ -260,7 +260,7 @@ def truncate_smart(text: str, config: TruncateConfig) -> str:
     has_code = "```" in text or "    " in text
     has_lists = "\n- " in text or "\n* " in text or "\n1." in text
     has_headers = "\n#" in text or "\n==" in text
-    
+
     # Choose strategy based on content
     if has_code and config.preserve_code:
         return truncate_preserve_code(text, config)
@@ -276,15 +276,15 @@ def truncate_preserve_code(text: str, config: TruncateConfig) -> str:
     remaining = config.max_chars
     in_code_block = False
     code_block_content: List[str] = []
-    
-    for line in text.split('\n'):
+
+    for line in text.split("\n"):
         if line.startswith("```"):
             if in_code_block:
                 # End of code block - add it if it fits
                 code_block_content.append(line)
-                code_block_content.append('')  # For the newline
-                
-                block_text = '\n'.join(code_block_content)
+                code_block_content.append("")  # For the newline
+
+                block_text = "\n".join(code_block_content)
                 if len(block_text) <= remaining:
                     result.append(block_text)
                     remaining -= len(block_text)
@@ -300,15 +300,15 @@ def truncate_preserve_code(text: str, config: TruncateConfig) -> str:
             line_len = len(line) + 1  # +1 for newline
             if line_len <= remaining:
                 result.append(line)
-                result.append('')  # For newline
+                result.append("")  # For newline
                 remaining -= line_len
             else:
                 break
-    
-    result_text = '\n'.join(result)
+
+    result_text = "\n".join(result)
     if len(result_text) < len(text):
         result_text += config.suffix
-    
+
     return result_text
 
 
@@ -318,17 +318,13 @@ def truncate_preserve_structure(text: str, config: TruncateConfig) -> str:
     remaining = max(0, config.max_chars - len(config.suffix))
     current_section: List[str] = []
     section_header = ""
-    
-    for line in text.split('\n'):
-        is_header = (
-            line.startswith('#') or 
-            line.startswith("==") or 
-            line.startswith("--")
-        )
-        
+
+    for line in text.split("\n"):
+        is_header = line.startswith("#") or line.startswith("==") or line.startswith("--")
+
         if is_header:
             # Flush previous section
-            section_content = '\n'.join(current_section)
+            section_content = "\n".join(current_section)
             total_section = section_header + section_content
             if current_section and len(total_section) <= remaining:
                 result.append(total_section)
@@ -337,21 +333,21 @@ def truncate_preserve_structure(text: str, config: TruncateConfig) -> str:
             current_section.clear()
         else:
             current_section.append(line)
-        
+
         if remaining == 0:
             break
-    
+
     # Add last section if it fits
     if current_section:
-        section_content = '\n'.join(current_section)
+        section_content = "\n".join(current_section)
         total_section = section_header + section_content
         if len(total_section) <= remaining:
             result.append(total_section)
-    
-    result_text = ''.join(result)
+
+    result_text = "".join(result)
     if len(result_text) < len(text):
         result_text += config.suffix
-    
+
     return result_text
 
 
@@ -378,30 +374,30 @@ def find_word_boundary(text: str, pos: int, forward: bool) -> int:
     """Find word boundary near position."""
     if pos >= len(text):
         return len(text)
-    
+
     if forward:
         # Search forward for space or newline
         for i in range(pos, min(len(text), pos + 50)):
-            if text[i] in ' \n':
+            if text[i] in " \n":
                 return i
         # Search backward if nothing found
         for i in range(max(0, pos - 50), pos):
             idx = pos - 1 - (i - max(0, pos - 50))
-            if idx >= 0 and text[idx] in ' \n':
+            if idx >= 0 and text[idx] in " \n":
                 return idx + 1
     else:
         # Search backward
         for i in range(min(pos, len(text)) - 1, max(0, pos - 50) - 1, -1):
-            if text[i] in ' \n':
+            if text[i] in " \n":
                 return i + 1
-    
+
     return pos
 
 
 def find_sentence_boundary(text: str, pos: int, forward: bool) -> int:
     """Find sentence boundary near position."""
     sentence_ends = [". ", "! ", "? ", ".\n", "!\n", "?\n"]
-    
+
     if forward:
         for i in range(pos, min(len(text), pos + 200)):
             for end in sentence_ends:
@@ -412,19 +408,19 @@ def find_sentence_boundary(text: str, pos: int, forward: bool) -> int:
             for end in sentence_ends:
                 if text[i:].startswith(end):
                     return i + len(end)
-    
+
     return find_word_boundary(text, pos, forward)
 
 
 def find_line_boundary(text: str, pos: int, forward: bool) -> int:
     """Find line boundary near position."""
     if forward:
-        idx = text.find('\n', pos)
+        idx = text.find("\n", pos)
         if idx != -1:
             return idx + 1
         return pos
     else:
-        idx = text.rfind('\n', 0, pos)
+        idx = text.rfind("\n", 0, pos)
         if idx != -1:
             return idx + 1
         return 0
@@ -434,14 +430,14 @@ def truncate_file(content: str, file_type: str, max_chars: int) -> str:
     """Truncate file content intelligently."""
     code_types = {"rs", "py", "js", "ts", "go", "c", "cpp", "java", "rb", "php"}
     markdown_types = {"md", "markdown"}
-    
+
     config = TruncateConfig(
         max_chars=max_chars,
         preserve_code=file_type in code_types,
         preserve_markdown=file_type in markdown_types,
         strategy=TruncateStrategy.SMART,
     )
-    
+
     return truncate(content, config).text
 
 
@@ -449,84 +445,84 @@ def truncate_batch(items: List[str], total_chars: int) -> List[str]:
     """Truncate multiple strings to fit total budget."""
     if not items:
         return []
-    
+
     total_len = sum(len(s) for s in items)
-    
+
     if total_len <= total_chars:
         return list(items)
-    
+
     # Proportional allocation
     ratio = total_chars / total_len
-    
+
     result: List[str] = []
     for item in items:
         target = int(len(item) * ratio)
         config = TruncateConfig(max_chars=target)
         result.append(truncate(item, config).text)
-    
+
     return result
 
 
 @dataclass
 class TruncateBuilder:
     """Builder for truncation configuration."""
-    
+
     _config: TruncateConfig = field(default_factory=TruncateConfig)
-    
+
     def max_chars(self, max_val: int) -> "TruncateBuilder":
         """Set maximum characters."""
         self._config.max_chars = max_val
         return self
-    
+
     def max_tokens(self, max_val: int) -> "TruncateBuilder":
         """Set maximum tokens."""
         self._config.max_tokens = max_val
         return self
-    
+
     def strategy(self, strategy: TruncateStrategy) -> "TruncateBuilder":
         """Set strategy."""
         self._config.strategy = strategy
         return self
-    
+
     def suffix(self, suffix: str) -> "TruncateBuilder":
         """Set suffix."""
         self._config.suffix = suffix
         return self
-    
+
     def prefix(self, prefix: str) -> "TruncateBuilder":
         """Set prefix."""
         self._config.prefix = prefix
         return self
-    
+
     def word_boundary(self, enabled: bool) -> "TruncateBuilder":
         """Enable word boundary alignment."""
         self._config.word_boundary = enabled
         return self
-    
+
     def sentence_boundary(self, enabled: bool) -> "TruncateBuilder":
         """Enable sentence boundary alignment."""
         self._config.sentence_boundary = enabled
         return self
-    
+
     def line_boundary(self, enabled: bool) -> "TruncateBuilder":
         """Enable line boundary alignment."""
         self._config.line_boundary = enabled
         return self
-    
+
     def preserve_code(self, enabled: bool) -> "TruncateBuilder":
         """Preserve code blocks."""
         self._config.preserve_code = enabled
         return self
-    
+
     def preserve_markdown(self, enabled: bool) -> "TruncateBuilder":
         """Preserve markdown structure."""
         self._config.preserve_markdown = enabled
         return self
-    
+
     def build(self) -> TruncateConfig:
         """Build configuration."""
         return self._config
-    
+
     def truncate(self, text: str) -> TruncateResult:
         """Truncate text with built configuration."""
         return truncate(text, self._config)
@@ -546,6 +542,7 @@ def truncate(self, text: str) -> TruncateResult:
 @dataclass
 class LegacyTruncateResult:
     """Result of truncation operation (legacy)."""
+
     text: str
     truncated: bool
     original_bytes: int
@@ -560,17 +557,17 @@ def truncate_output(
 ) -> LegacyTruncateResult:
     """
     Truncate output to max tokens, keeping head and tail.
-    
+
     Matches Codex behavior:
     - Uses token-based (not byte-based) limits
     - Truncates middle, keeping equal head/tail
     - Format: "{N} tokens truncated"
     - Prepends "Total output lines: {N}" when truncated
-    
+
     Args:
         output: The output string to truncate
         max_tokens: Maximum tokens to keep (default: 2500 = ~10KB)
-        
+
     Returns:
         LegacyTruncateResult with truncated text and metadata
     """
@@ -583,12 +580,12 @@ def truncate_output(
             tokens_truncated=0,
             total_lines=0,
         )
-    
+
     output_bytes = output.encode("utf-8")
     original_bytes = len(output_bytes)
     original_tokens = original_bytes // APPROX_BYTES_PER_TOKEN
-    total_lines = output.count('\n') + (1 if output and not output.endswith('\n') else 0)
-    
+    total_lines = output.count("\n") + (1 if output and not output.endswith("\n") else 0)
+
     if original_tokens <= max_tokens:
         return LegacyTruncateResult(
             text=output,
@@ -598,35 +595,35 @@ def truncate_output(
             tokens_truncated=0,
             total_lines=total_lines,
         )
-    
+
     # Calculate bytes to keep (convert tokens back to bytes)
     max_bytes = max_tokens * APPROX_BYTES_PER_TOKEN
-    
+
     # Split evenly between head and tail
     head_bytes = max_bytes // 2
     tail_bytes = max_bytes - head_bytes
-    
+
     # Get head portion
     head_raw = output_bytes[:head_bytes]
-    # Get tail portion  
+    # Get tail portion
     tail_raw = output_bytes[-tail_bytes:]
-    
+
     # Decode, handling UTF-8 boundary issues
     head = head_raw.decode("utf-8", errors="ignore")
     tail = tail_raw.decode("utf-8", errors="ignore")
-    
+
     # Calculate truncated tokens
     kept_bytes = len(head.encode()) + len(tail.encode())
     tokens_truncated = (original_bytes - kept_bytes) // APPROX_BYTES_PER_TOKEN
-    
+
     # Build truncation message matching Codex format
     truncation_msg = f"\n...{tokens_truncated} tokens truncated...\n"
-    
+
     # Prepend total lines info
     lines_prefix = f"Total output lines: {total_lines}\n"
-    
+
     truncated_text = f"{lines_prefix}{head}{truncation_msg}{tail}"
-    
+
     return LegacyTruncateResult(
         text=truncated_text,
         truncated=True,
@@ -643,11 +640,11 @@ def limit_output(
 ) -> str:
     """
     Simple interface: truncate and return just the text.
-    
+
     Args:
         output: The output string to truncate
         max_tokens: Maximum tokens to keep
-        
+
     Returns:
         Truncated string
     """
@@ -661,33 +658,33 @@ def limit_lines(
 ) -> str:
     """
     Limit output to max lines, keeping first and last portions.
-    
+
     Args:
         output: The output string to truncate
         max_lines: Maximum lines to keep
         head_lines: Number of lines to keep from the start
-        
+
     Returns:
         Truncated string with message if truncated
     """
     if not output:
         return output
-    
+
     lines = output.splitlines(keepends=True)
     total_lines = len(lines)
-    
+
     if total_lines <= max_lines:
         return output
-    
+
     tail_lines = max_lines - head_lines
     omitted = total_lines - max_lines
-    
+
     head = "".join(lines[:head_lines])
     tail = "".join(lines[-tail_lines:]) if tail_lines > 0 else ""
-    
+
     # Match Codex message format
     truncation_msg = f"\n...{omitted} lines omitted...\n"
-    
+
     return f"Total output lines: {total_lines}\n{head}{truncation_msg}{tail}"
 
 
@@ -698,21 +695,21 @@ def smart_truncate(
 ) -> str:
     """
     Smart truncation: applies both token and line limits.
-    
+
     Args:
         output: The output to truncate
         max_tokens: Maximum tokens
         max_lines: Maximum lines
-        
+
     Returns:
         Truncated output
     """
     # First limit by lines (faster check)
     result = limit_lines(output, max_lines)
-    
+
     # Then limit by tokens
     result = limit_output(result, max_tokens)
-    
+
     return result
 
 
@@ -729,48 +726,48 @@ def middle_out_truncate(
 ) -> str:
     """
     Middle-out truncation like Codex.
-    
+
     Keeps beginning and end, removes middle.
     More useful than head-only because:
     - Beginning often has context/headers
     - End often has results/conclusions
-    
+
     Args:
         text: Text to truncate
         max_tokens: Maximum tokens to keep
-        
+
     Returns:
         Truncated text with marker in middle
     """
     if not text:
         return text
-    
+
     text_bytes = text.encode("utf-8")
     original_bytes = len(text_bytes)
     original_tokens = original_bytes // APPROX_BYTES_PER_TOKEN
-    
+
     if original_tokens <= max_tokens:
         return text
-    
+
     # Calculate bytes to keep
     max_bytes = max_tokens * APPROX_BYTES_PER_TOKEN
-    
+
     # Split 50/50 between head and tail
     head_bytes = max_bytes // 2
     tail_bytes = max_bytes - head_bytes
-    
+
     # Extract portions
     head_raw = text_bytes[:head_bytes]
     tail_raw = text_bytes[-tail_bytes:]
-    
+
     # Decode safely (handle UTF-8 boundary issues)
     head = head_raw.decode("utf-8", errors="ignore")
     tail = tail_raw.decode("utf-8", errors="ignore")
-    
+
     # Calculate removed tokens
     kept_bytes = len(head.encode()) + len(tail.encode())
     removed_tokens = (original_bytes - kept_bytes) // APPROX_BYTES_PER_TOKEN
-    
+
     return f"{head}\n\n...{removed_tokens} tokens truncated...\n\n{tail}"