diff --git a/.claude/hooks/block_bash_with_instructions.sh b/.claude/hooks/block_bash_with_instructions.sh
deleted file mode 100755
index 7bd16f88..00000000
--- a/.claude/hooks/block_bash_with_instructions.sh
+++ /dev/null
@@ -1,74 +0,0 @@
-#!/bin/bash
-# block_bash_with_instructions.sh - Blocks specific bash commands and provides alternative instructions
-#
-# This hook intercepts Bash tool use calls and blocks commands that match
-# specific patterns, providing alternative instructions to the agent.
-#
-# Usage: Registered as a PreToolUse hook in .claude/settings.json
-#
-# Input (stdin): JSON from Claude Code hook system containing tool_name and tool_input
-# Output (stderr): Error message if blocked (Claude Code reads stderr for exit code 2)
-# Exit codes:
-#   0 - Success (allow action)
-#   2 - Blocking error (prevent action with message)
-
-set -e
-
-# =============================================================================
-# BLOCKED COMMANDS CONFIGURATION
-# =============================================================================
-# Format: Each entry is a regex pattern followed by a delimiter (|||) and instructions
-# The regex is matched against the full bash command
-# Add new blocked commands here:
-
-BLOCKED_COMMANDS=(
-    '^[[:space:]]*git[[:space:]]+commit|||All commits must be done via the `/commit` skill. Do not use git commit directly. Instead, run `/commit` to start the commit workflow which includes code review, testing, and linting before committing.'
-)
-
-# =============================================================================
-# HOOK LOGIC - DO NOT MODIFY BELOW UNLESS NECESSARY
-# =============================================================================
-
-# Read stdin into variable
-HOOK_INPUT=""
-if [ ! -t 0 ]; then
-    HOOK_INPUT=$(cat)
-fi
-
-# Exit early if no input
-if [ -z "${HOOK_INPUT}" ]; then
-    exit 0
-fi
-
-# Extract tool_name from input
-TOOL_NAME=$(echo "${HOOK_INPUT}" | jq -r '.tool_name // empty' 2>/dev/null)
-
-# Only process Bash tool calls
-if [ "${TOOL_NAME}" != "Bash" ]; then
-    exit 0
-fi
-
-# Extract the command from tool_input
-COMMAND=$(echo "${HOOK_INPUT}" | jq -r '.tool_input.command // empty' 2>/dev/null)
-
-# Exit if no command
-if [ -z "${COMMAND}" ]; then
-    exit 0
-fi
-
-# Check each blocked pattern
-for entry in "${BLOCKED_COMMANDS[@]}"; do
-    # Split entry by delimiter
-    pattern="${entry%%|||*}"
-    instructions="${entry##*|||}"
-
-    # Check if command matches pattern (using extended regex)
-    if echo "${COMMAND}" | grep -qE "${pattern}"; then
-        # Output error message to stderr (Claude Code reads stderr for exit code 2)
-        echo "${instructions}" >&2
-        exit 2
-    fi
-done
-
-# Command is allowed
-exit 0
diff --git a/.claude/hooks/commit_job_git_commit.sh b/.claude/hooks/commit_job_git_commit.sh
deleted file mode 100755
index 5fe32679..00000000
--- a/.claude/hooks/commit_job_git_commit.sh
+++ /dev/null
@@ -1,4 +0,0 @@
-#!/bin/bash
-# commit_job_git_commit.sh - Wrapper for git commit invoked via the /commit skill
-
-exec git commit "$@"
diff --git a/.claude/settings.json b/.claude/settings.json
index cf4e3c4c..c6158219 100644
--- a/.claude/settings.json
+++ b/.claude/settings.json
@@ -95,56 +95,20 @@
       "Bash(npm:*)",
       "Bash(npx:*)",
       "Edit(./**)",
-      "Read(./.deepwork/tmp/**)",
-      "Edit(./.deepwork/tmp/**)",
-      "Write(./.deepwork/tmp/**)",
-      "Skill(commit)",
-      "Skill(commit.review)",
-      "Skill(commit.test)",
-      "Skill(commit.lint)",
-      "Skill(commit.commit_and_push)",
-      "Skill(deepwork_jobs)",
-      "Skill(deepwork_jobs.define)",
-      "Skill(deepwork_jobs.review_job_spec)",
-      "Skill(deepwork_jobs.implement)",
-      "Skill(deepwork_jobs.learn)",
-      "Skill(add_platform)",
-      "Skill(add_platform.research)",
-      "Skill(add_platform.add_capabilities)",
-      "Skill(add_platform.implement)",
-      "Skill(add_platform.verify)",
-      "Skill(update)",
-      "Skill(update.job)",
-      "Skill(manual_tests)",
-      "Skill(manual_tests.run_not_fire_tests)",
-      "Skill(manual_tests.run_fire_tests)",
-      "Skill(deepwork_rules)",
-      "Skill(deepwork_rules.define)",
-      "Bash(deepwork rules clear_queue)",
-      "Bash(rm -rf .deepwork/tmp/rules/queue/*.json)",
-      "Skill(manual_tests.reset)",
-      "Skill(manual_tests.infinite_block_tests)",
       "Read(./.deepwork/**)",
       "Edit(./.deepwork/**)",
       "Write(./.deepwork/**)",
       "Bash(deepwork:*)",
-      "Bash(.claude/hooks/commit_job_git_commit.sh:*)",
       "Bash(./.deepwork/jobs/deepwork_jobs/make_new_job.sh:*)",
-      "WebSearch"
+      "WebSearch",
+      "Skill(deepwork)",
+      "mcp__deepwork__get_workflows",
+      "mcp__deepwork__start_workflow",
+      "mcp__deepwork__finished_step",
+      "mcp__deepwork__abort_workflow"
     ]
   },
   "hooks": {
-    "PreToolUse": [
-      {
-        "matcher": "Bash",
-        "hooks": [
-          {
-            "type": "command",
-            "command": ".claude/hooks/block_bash_with_instructions.sh"
-          }
-        ]
-      }
-    ],
     "SessionStart": [
       {
         "matcher": "",
@@ -155,39 +119,6 @@
           }
         ]
       }
-    ],
-    "UserPromptSubmit": [
-      {
-        "matcher": "",
-        "hooks": [
-          {
-            "type": "command",
-            "command": ".deepwork/jobs/deepwork_rules/hooks/user_prompt_submit.sh"
-          }
-        ]
-      }
-    ],
-    "Stop": [
-      {
-        "matcher": "",
-        "hooks": [
-          {
-            "type": "command",
-            "command": "deepwork hook rules_check"
-          }
-        ]
-      }
-    ],
-    "SubagentStop": [
-      {
-        "matcher": "",
-        "hooks": [
-          {
-            "type": "command",
-            "command": "deepwork hook rules_check"
-          }
-        ]
-      }
     ]
   }
 }
\ No newline at end of file
diff --git a/.claude/skills/add_platform.add_capabilities/SKILL.md b/.claude/skills/add_platform.add_capabilities/SKILL.md
deleted file mode 100644
index 17359135..00000000
--- a/.claude/skills/add_platform.add_capabilities/SKILL.md
+++ /dev/null
@@ -1,216 +0,0 @@
----
-name: add_platform.add_capabilities
-description: "Updates job schema and adapters with any new hook events the platform supports. Use after research to extend DeepWork's hook system."
-user-invocable: false
-
----
-
-# add_platform.add_capabilities
-
-**Step 2/4** in **integrate** workflow
-
-> Full workflow to integrate a new AI platform into DeepWork
-
-> Adds a new AI platform to DeepWork with adapter, templates, and tests. Use when integrating Cursor, Windsurf, or other AI coding tools.
-
-## Prerequisites (Verify First)
-
-Before proceeding, confirm these steps are complete:
-- `/add_platform.research`
-
-## Instructions
-
-**Goal**: Updates job schema and adapters with any new hook events the platform supports. Use after research to extend DeepWork's hook system.
-
-# Add Hook Capabilities
-
-## Objective
-
-Update the DeepWork job schema and platform adapters to support any new hook events that the new platform provides for slash command definitions.
-
-## Task
-
-Analyze the hooks documentation from the research step and update the codebase to support any new hook capabilities, ensuring consistency across all existing adapters.
-
-### Prerequisites
-
-Read the hooks documentation created in the previous step:
-- `doc/platforms/<platform_name>/hooks_system.md`
-
-Also review the existing schema and adapters:
-- `src/deepwork/schemas/job_schema.py`
-- `src/deepwork/adapters.py`
-
-### Process
-
-1. **Analyze the new platform's hooks**
-   - Read `doc/platforms/<platform_name>/hooks_system.md`
-   - List all hooks available for slash command definitions
-   - Compare with hooks already in `job_schema.py`
-   - Identify any NEW hooks not currently supported
-
-2. **Determine if schema changes are needed**
-   - If the platform has hooks that DeepWork doesn't currently support, add them
-   - If all hooks are already supported, document this finding
-   - Remember: Only add hooks that are available on slash command definitions
-
-3. **Update job_schema.py (if needed)**
-   - Add new hook fields to the step schema
-   - Follow existing patterns for hook definitions
-   - Add appropriate type hints and documentation
-   - Example addition:
-     ```python
-     # New hook from <platform>
-     new_hook_name: Optional[List[HookConfig]] = None
-     ```
-
-4. **Update all existing adapters**
-   - Open `src/deepwork/adapters.py`
-   - For EACH existing adapter class:
-     - Add the new hook field (set to `None` if not supported)
-     - This maintains consistency across all adapters
-   - Document why each adapter does or doesn't support the hook
-
-5. **Validate the changes**
-   - Run Python syntax check: `python -m py_compile src/deepwork/schemas/job_schema.py`
-   - Run Python syntax check: `python -m py_compile src/deepwork/adapters.py`
-   - Ensure no import errors
-
-6. **Document the decision**
-   - If no new hooks were added, add a comment explaining why
-   - If new hooks were added, ensure they're documented in the schema
-
-## Output Format
-
-### job_schema.py
-
-Location: `src/deepwork/schemas/job_schema.py`
-
-If new hooks are added:
-```python
-@dataclass
-class StepDefinition:
-    # ... existing fields ...
-
-    # New hook from <platform_name> - [description of what it does]
-    new_hook_name: Optional[List[HookConfig]] = None
-```
-
-### adapters.py
-
-Location: `src/deepwork/adapters.py`
-
-For each existing adapter, add the new hook field:
-```python
-class ExistingPlatformAdapter(PlatformAdapter):
-    # ... existing code ...
-
-    def get_hook_support(self) -> dict:
-        return {
-            # ... existing hooks ...
-            "new_hook_name": None,  # Not supported by this platform
-        }
-```
-
-Or if no changes are needed, add a documentation comment:
-```python
-# NOTE: <platform_name> hooks reviewed on YYYY-MM-DD
-# No new hooks to add - all <platform_name> command hooks are already
-# supported by the existing schema (stop_hooks covers their validation pattern)
-```
-
-## Quality Criteria
-
-- Hooks documentation from research step has been reviewed
-- If new hooks exist:
-  - Added to `src/deepwork/schemas/job_schema.py` with proper typing
-  - ALL existing adapters updated in `src/deepwork/adapters.py`
-  - Each adapter indicates support level (implemented, None, or partial)
-- If no new hooks needed:
-  - Decision documented with a comment explaining the analysis
-- Only hooks available on slash command definitions are considered
-- `job_schema.py` has no syntax errors (verified with py_compile)
-- `adapters.py` has no syntax errors (verified with py_compile)
-- All adapters have consistent hook fields (same fields across all adapters)
-- When all criteria are met, include `<promise>✓ Quality Criteria Met</promise>` in your response
-
-## Context
-
-DeepWork supports multiple AI platforms, and each platform may have different capabilities for hooks within command definitions. The schema defines what hooks CAN exist, while adapters define what each platform actually SUPPORTS.
-
-This separation allows:
-- Job definitions to use any hook (the schema is the superset)
-- Platform-specific generation to only use supported hooks (adapters filter)
-- Future platforms to add new hooks without breaking existing ones
-
-Maintaining consistency is critical - all adapters must have the same hook fields, even if they don't support them (use `None` for unsupported).
-
-## Common Hook Types
-
-For reference, here are common hook patterns across platforms:
-
-| Hook Type | Purpose | Example Platforms |
-|-----------|---------|-------------------|
-| `stop_hooks` | Quality validation loops | Claude Code |
-| `pre_hooks` | Run before command | Various |
-| `post_hooks` | Run after command | Various |
-| `validation_hooks` | Validate inputs/outputs | Various |
-
-When you find a new hook type, consider whether it maps to an existing pattern or is genuinely new functionality.
-
-
-### Job Context
-
-A workflow for adding support for a new AI platform (like Cursor, Windsurf, etc.) to DeepWork.
-
-The **integrate** workflow guides you through four phases:
-1. **Research**: Capture the platform's CLI configuration and hooks system documentation
-2. **Add Capabilities**: Update the job schema and adapters with any new hook events
-3. **Implement**: Create the platform adapter, templates, tests (100% coverage), and README updates
-4. **Verify**: Ensure installation works correctly and produces expected files
-
-The workflow ensures consistency across all supported platforms and maintains
-comprehensive test coverage for new functionality.
-
-**Important Notes**:
-- Only hooks available on slash command definitions should be captured
-- Each existing adapter must be updated when new hooks are added (typically with null values)
-- Tests must achieve 100% coverage for any new functionality
-- Installation verification confirms the platform integrates correctly with existing jobs
-
-
-## Required Inputs
-
-
-**Files from Previous Steps** - Read these first:
-- `hooks_system.md` (from `research`)
-
-## Work Branch
-
-Use branch format: `deepwork/add_platform-[instance]-YYYYMMDD`
-
-- If on a matching work branch: continue using it
-- If on main/master: create new branch with `git checkout -b deepwork/add_platform-[instance]-$(date +%Y%m%d)`
-
-## Outputs
-
-**Required outputs**:
-- `job_schema.py`
-- `adapters.py`
-
-## Guardrails
-
-- Do NOT skip prerequisite verification if this step has dependencies
-- Do NOT produce partial outputs; complete all required outputs before finishing
-- Do NOT proceed without required inputs; ask the user if any are missing
-- Do NOT modify files outside the scope of this step's defined outputs
-
-## On Completion
-
-1. Verify outputs are created
-2. Inform user: "integrate step 2/4 complete, outputs: job_schema.py, adapters.py"
-3. **Continue workflow**: Use Skill tool to invoke `/add_platform.implement`
-
----
-
-**Reference files**: `.deepwork/jobs/add_platform/job.yml`, `.deepwork/jobs/add_platform/steps/add_capabilities.md`
\ No newline at end of file
diff --git a/.claude/skills/add_platform.implement/SKILL.md b/.claude/skills/add_platform.implement/SKILL.md
deleted file mode 100644
index 5d925402..00000000
--- a/.claude/skills/add_platform.implement/SKILL.md
+++ /dev/null
@@ -1,321 +0,0 @@
----
-name: add_platform.implement
-description: "Creates platform adapter, templates, tests with 100% coverage, and README documentation. Use after adding hook capabilities."
-user-invocable: false
-hooks:
-  Stop:
-    - hooks:
-        - type: command
-          command: ".deepwork/jobs/add_platform/hooks/run_tests.sh"
-  SubagentStop:
-    - hooks:
-        - type: command
-          command: ".deepwork/jobs/add_platform/hooks/run_tests.sh"
-
----
-
-# add_platform.implement
-
-**Step 3/4** in **integrate** workflow
-
-> Full workflow to integrate a new AI platform into DeepWork
-
-> Adds a new AI platform to DeepWork with adapter, templates, and tests. Use when integrating Cursor, Windsurf, or other AI coding tools.
-
-## Prerequisites (Verify First)
-
-Before proceeding, confirm these steps are complete:
-- `/add_platform.research`
-- `/add_platform.add_capabilities`
-
-## Instructions
-
-**Goal**: Creates platform adapter, templates, tests with 100% coverage, and README documentation. Use after adding hook capabilities.
-
-# Implement Platform Support
-
-## Objective
-
-Create the complete platform implementation including the adapter class, command templates, comprehensive tests, and documentation updates.
-
-## Task
-
-Build the full platform support by implementing the adapter, creating templates, writing tests with 100% coverage, and updating the README.
-
-### Prerequisites
-
-Read the outputs from previous steps:
-- `doc/platforms/<platform_name>/cli_configuration.md` - For template structure
-- `src/deepwork/schemas/job_schema.py` - For current schema
-- `src/deepwork/adapters.py` - For adapter patterns
-
-Also review existing implementations for reference:
-- `src/deepwork/templates/claude/` - Example templates
-- `tests/` - Existing test patterns
-
-### Process
-
-1. **Create the platform adapter class**
-
-   Add a new adapter class to `src/deepwork/adapters.py`:
-
-   ```python
-   class NewPlatformAdapter(PlatformAdapter):
-       """Adapter for <Platform Name>."""
-
-       platform_name = "<platform_name>"
-       command_directory = "<path to commands>"  # e.g., ".cursor/commands"
-       command_extension = ".md"  # or appropriate extension
-
-       def get_hook_support(self) -> dict:
-           """Return which hooks this platform supports."""
-           return {
-               "stop_hooks": True,  # or False/None
-               # ... other hooks
-           }
-
-       def generate_command(self, step: StepDefinition, job: JobDefinition) -> str:
-           """Generate command file content for this platform."""
-           # Use Jinja2 template
-           template = self.env.get_template(f"{self.platform_name}/command.md.j2")
-           return template.render(step=step, job=job)
-   ```
-
-2. **Create command templates**
-
-   Create templates in `src/deepwork/templates/<platform_name>/`:
-
-   - `command.md.j2` - Main command template
-   - Any other templates needed for the platform's format
-
-   Use the CLI configuration documentation to ensure the template matches the platform's expected format.
-
-3. **Register the adapter**
-
-   Update the adapter registry in `src/deepwork/adapters.py`:
-
-   ```python
-   PLATFORM_ADAPTERS = {
-       "claude": ClaudeAdapter,
-       "<platform_name>": NewPlatformAdapter,
-       # ... other adapters
-   }
-   ```
-
-4. **Write comprehensive tests**
-
-   Create tests in `tests/` that cover:
-
-   - Adapter instantiation
-   - Hook support detection
-   - Command generation
-   - Template rendering
-   - Edge cases (empty inputs, special characters, etc.)
-   - Integration with the sync command
-
-   **Critical**: Tests must achieve 100% coverage of new code.
-
-5. **Update README.md**
-
-   Add the new platform to `README.md`:
-
-   - Add to "Supported Platforms" list
-   - Add installation instructions:
-     ```bash
-     deepwork install --platform <platform_name>
-     ```
-   - Document any platform-specific notes or limitations
-
-6. **Run tests and verify coverage**
-
-   ```bash
-   uv run pytest --cov=src/deepwork --cov-report=term-missing
-   ```
-
-   - All tests must pass
-   - New code must have 100% coverage
-   - If coverage is below 100%, add more tests
-
-7. **Iterate until tests pass with full coverage**
-
-   This step has a `stop_hooks` script that runs tests. Keep iterating until:
-   - All tests pass
-   - Coverage is 100% for new functionality
-
-## Output Format
-
-### templates/
-
-Location: `src/deepwork/templates/<platform_name>/`
-
-Create the following files:
-
-**command.md.j2**:
-```jinja2
-{# Template for <platform_name> command files #}
-{# Follows the platform's expected format from cli_configuration.md #}
-
-[Platform-specific frontmatter or metadata]
-
-# {{ step.name }}
-
-{{ step.description }}
-
-## Instructions
-
-{{ step.instructions_content }}
-
-[... rest of template based on platform format ...]
-```
-
-### tests/
-
-Location: `tests/test_<platform_name>_adapter.py`
-
-```python
-"""Tests for the <platform_name> adapter."""
-import pytest
-from deepwork.adapters import NewPlatformAdapter
-
-class TestNewPlatformAdapter:
-    """Test suite for NewPlatformAdapter."""
-
-    def test_adapter_initialization(self):
-        """Test adapter can be instantiated."""
-        adapter = NewPlatformAdapter()
-        assert adapter.platform_name == "<platform_name>"
-
-    def test_hook_support(self):
-        """Test hook support detection."""
-        adapter = NewPlatformAdapter()
-        hooks = adapter.get_hook_support()
-        assert "stop_hooks" in hooks
-        # ... more assertions
-
-    def test_command_generation(self):
-        """Test command file generation."""
-        # ... test implementation
-
-    # ... more tests for 100% coverage
-```
-
-### README.md
-
-Add to the existing README.md:
-
-```markdown
-## Supported Platforms
-
-- **Claude Code** - Anthropic's CLI for Claude
-- **<Platform Name>** - [Brief description]
-
-## Installation
-
-### <Platform Name>
-
-```bash
-deepwork install --platform <platform_name>
-```
-
-[Any platform-specific notes]
-```
-
-## Quality Criteria
-
-- Platform adapter class added to `src/deepwork/adapters.py`:
-  - Inherits from `PlatformAdapter`
-  - Implements all required methods
-  - Registered in `PLATFORM_ADAPTERS`
-- Templates created in `src/deepwork/templates/<platform_name>/`:
-  - `command.md.j2` exists and renders correctly
-  - Format matches platform's expected command format
-- Tests created in `tests/`:
-  - Cover all new adapter functionality
-  - Cover template rendering
-  - All tests pass
-- Test coverage is 100% for new code:
-  - Run `uv run pytest --cov=src/deepwork --cov-report=term-missing`
-  - No uncovered lines in new code
-- README.md updated:
-  - Platform listed in supported platforms
-  - Installation command documented
-  - Any platform-specific notes included
-- When all criteria are met, include `<promise>✓ Quality Criteria Met</promise>` in your response
-
-## Context
-
-This is the core implementation step. The adapter you create will be responsible for:
-- Determining where command files are placed
-- Generating command file content from job definitions
-- Handling platform-specific features and hooks
-
-The templates use Jinja2 and should produce files that match exactly what the platform expects. Reference the CLI configuration documentation frequently to ensure compatibility.
-
-## Tips
-
-- Study the existing `ClaudeAdapter` as a reference implementation
-- Run tests frequently as you implement
-- Use `--cov-report=html` for a detailed coverage report
-- If a test is hard to write, the code might need refactoring
-- Template syntax errors often show up at runtime - test early
-
-
-### Job Context
-
-A workflow for adding support for a new AI platform (like Cursor, Windsurf, etc.) to DeepWork.
-
-The **integrate** workflow guides you through four phases:
-1. **Research**: Capture the platform's CLI configuration and hooks system documentation
-2. **Add Capabilities**: Update the job schema and adapters with any new hook events
-3. **Implement**: Create the platform adapter, templates, tests (100% coverage), and README updates
-4. **Verify**: Ensure installation works correctly and produces expected files
-
-The workflow ensures consistency across all supported platforms and maintains
-comprehensive test coverage for new functionality.
-
-**Important Notes**:
-- Only hooks available on slash command definitions should be captured
-- Each existing adapter must be updated when new hooks are added (typically with null values)
-- Tests must achieve 100% coverage for any new functionality
-- Installation verification confirms the platform integrates correctly with existing jobs
-
-
-## Required Inputs
-
-
-**Files from Previous Steps** - Read these first:
-- `job_schema.py` (from `add_capabilities`)
-- `adapters.py` (from `add_capabilities`)
-- `cli_configuration.md` (from `research`)
-
-## Work Branch
-
-Use branch format: `deepwork/add_platform-[instance]-YYYYMMDD`
-
-- If on a matching work branch: continue using it
-- If on main/master: create new branch with `git checkout -b deepwork/add_platform-[instance]-$(date +%Y%m%d)`
-
-## Outputs
-
-**Required outputs**:
-- `templates/` (directory)
-- `tests/` (directory)
-- `README.md`
-
-## Guardrails
-
-- Do NOT skip prerequisite verification if this step has dependencies
-- Do NOT produce partial outputs; complete all required outputs before finishing
-- Do NOT proceed without required inputs; ask the user if any are missing
-- Do NOT modify files outside the scope of this step's defined outputs
-
-**Validation script**: `.deepwork/jobs/add_platform/hooks/run_tests.sh` (runs automatically)
-## On Completion
-
-1. Verify outputs are created
-2. Inform user: "integrate step 3/4 complete, outputs: templates/, tests/, README.md"
-3. **Continue workflow**: Use Skill tool to invoke `/add_platform.verify`
-
----
-
-**Reference files**: `.deepwork/jobs/add_platform/job.yml`, `.deepwork/jobs/add_platform/steps/implement.md`
\ No newline at end of file
diff --git a/.claude/skills/add_platform.research/SKILL.md b/.claude/skills/add_platform.research/SKILL.md
deleted file mode 100644
index d113d3e9..00000000
--- a/.claude/skills/add_platform.research/SKILL.md
+++ /dev/null
@@ -1,265 +0,0 @@
----
-name: add_platform.research
-description: "Captures CLI configuration and hooks system documentation for the new platform. Use when starting platform integration."
-user-invocable: false
-
----
-
-# add_platform.research
-
-**Step 1/4** in **integrate** workflow
-
-> Full workflow to integrate a new AI platform into DeepWork
-
-> Adds a new AI platform to DeepWork with adapter, templates, and tests. Use when integrating Cursor, Windsurf, or other AI coding tools.
-
-
-## Instructions
-
-**Goal**: Captures CLI configuration and hooks system documentation for the new platform. Use when starting platform integration.
-
-# Research Platform Documentation
-
-## Objective
-
-Capture comprehensive documentation for the new AI platform's CLI configuration and hooks system, creating a local reference that will guide the implementation phases.
-
-## Task
-
-Research the target platform's official documentation and create two focused documentation files that will serve as the foundation for implementing platform support in DeepWork.
-
-### Process
-
-1. **Identify the platform's documentation sources**
-   - Find the official documentation website
-   - Locate the CLI/agent configuration documentation
-   - Find the hooks or customization system documentation
-   - Note: Focus ONLY on slash command/custom command hooks, not general CLI hooks
-
-2. **Gather CLI configuration documentation**
-   - How is the CLI configured? (config files, environment variables, etc.)
-   - Where are custom commands/skills stored?
-   - What is the command file format? (markdown, YAML, etc.)
-   - What metadata or frontmatter is supported?
-   - How does the platform discover and load commands?
-
-3. **Gather hooks system documentation**
-   - What hooks are available for custom command definitions?
-   - Focus on hooks that trigger during or after command execution
-   - Examples: `stop_hooks`, `pre_hooks`, `post_hooks`, validation hooks
-   - Document the syntax and available hook types
-   - **Important**: Only document hooks available on slash command definitions, not general CLI hooks
-
-4. **Create the documentation files**
-   - Place files in `doc/platforms/<platform_name>/`
-   - Each file must have a header comment with source and date
-   - Content should be comprehensive but focused
-
-## Output Format
-
-### cli_configuration.md
-
-Located at: `doc/platforms/<platform_name>/cli_configuration.md`
-
-**Structure**:
-```markdown
-<!--
-Last Updated: YYYY-MM-DD
-Source: [URL where this documentation was obtained]
--->
-
-# <Platform Name> CLI Configuration
-
-## Overview
-
-[Brief description of the platform and its CLI/agent system]
-
-## Configuration Files
-
-[Document where configuration lives and its format]
-
-### File Locations
-
-- [Location 1]: [Purpose]
-- [Location 2]: [Purpose]
-
-### Configuration Format
-
-[Show the configuration file format with examples]
-
-## Custom Commands/Skills
-
-[Document how custom commands are defined]
-
-### Command Location
-
-[Where command files are stored]
-
-### Command File Format
-
-[The format of command files - markdown, YAML, etc.]
-
-### Metadata/Frontmatter
-
-[What metadata fields are supported in command files]
-
-```[format]
-[Example of a minimal command file]
-```
-
-## Command Discovery
-
-[How the platform discovers and loads commands]
-
-## Platform-Specific Features
-
-[Any unique features relevant to command configuration]
-```
-
-### hooks_system.md
-
-Located at: `doc/platforms/<platform_name>/hooks_system.md`
-
-**Structure**:
-```markdown
-<!--
-Last Updated: YYYY-MM-DD
-Source: [URL where this documentation was obtained]
--->
-
-# <Platform Name> Hooks System (Command Definitions)
-
-## Overview
-
-[Brief description of hooks available for command definitions]
-
-**Important**: This document covers ONLY hooks available within slash command/skill definitions, not general CLI hooks.
-
-## Available Hooks
-
-### [Hook Name 1]
-
-**Purpose**: [What this hook does]
-
-**Syntax**:
-```yaml
-[hook_name]:
-  - [configuration]
-```
-
-**Example**:
-```yaml
-[Complete example of using this hook]
-```
-
-**Behavior**: [When and how this hook executes]
-
-### [Hook Name 2]
-
-[Repeat for each available hook]
-
-## Hook Execution Order
-
-[Document the order in which hooks execute, if multiple are supported]
-
-## Comparison with Other Platforms
-
-| Feature | <Platform> | Claude Code | Other |
-|---------|-----------|-------------|-------|
-| [Feature 1] | [Support] | [Support] | [Support] |
-
-## Limitations
-
-[Any limitations or caveats about the hooks system]
-```
-
-## Quality Criteria
-
-- Both files exist in `doc/platforms/<platform_name>/`
-- Each file has a header comment with:
-  - Last updated date (YYYY-MM-DD format)
-  - Source URL where documentation was obtained
-- `cli_configuration.md` comprehensively covers:
-  - Configuration file locations and format
-  - Custom command file format and location
-  - Command discovery mechanism
-- `hooks_system.md` comprehensively covers:
-  - All hooks available for slash command definitions
-  - Syntax and examples for each hook
-  - NOT general CLI hooks (only command-level hooks)
-- Documentation is detailed enough to implement the platform adapter
-- No extraneous topics (only CLI config and command hooks)
-- When all criteria are met, include `<promise>✓ Quality Criteria Met</promise>` in your response
-
-## Context
-
-This is the foundation step for adding a new platform to DeepWork. The documentation you capture here will be referenced throughout the implementation process:
-- CLI configuration informs how to generate command files
-- Hooks documentation determines what features the adapter needs to support
-- This documentation becomes a permanent reference in `doc/platforms/`
-
-Take time to be thorough - incomplete documentation will slow down subsequent steps.
-
-## Tips
-
-- Use the platform's official documentation as the primary source
-- If documentation is sparse, check GitHub repos, community guides, or changelog entries
-- When in doubt about whether something is a "command hook" vs "CLI hook", err on the side of inclusion and note the ambiguity
-- Include code examples from the official docs where available
-
-
-### Job Context
-
-A workflow for adding support for a new AI platform (like Cursor, Windsurf, etc.) to DeepWork.
-
-The **integrate** workflow guides you through four phases:
-1. **Research**: Capture the platform's CLI configuration and hooks system documentation
-2. **Add Capabilities**: Update the job schema and adapters with any new hook events
-3. **Implement**: Create the platform adapter, templates, tests (100% coverage), and README updates
-4. **Verify**: Ensure installation works correctly and produces expected files
-
-The workflow ensures consistency across all supported platforms and maintains
-comprehensive test coverage for new functionality.
-
-**Important Notes**:
-- Only hooks available on slash command definitions should be captured
-- Each existing adapter must be updated when new hooks are added (typically with null values)
-- Tests must achieve 100% coverage for any new functionality
-- Installation verification confirms the platform integrates correctly with existing jobs
-
-
-## Required Inputs
-
-**User Parameters** - Gather from user before starting:
-- **platform_name**: Clear identifier of the platform (e.g., 'cursor', 'windsurf-editor', 'github-copilot-chat')
-
-
-## Work Branch
-
-Use branch format: `deepwork/add_platform-[instance]-YYYYMMDD`
-
-- If on a matching work branch: continue using it
-- If on main/master: create new branch with `git checkout -b deepwork/add_platform-[instance]-$(date +%Y%m%d)`
-
-## Outputs
-
-**Required outputs**:
-- `cli_configuration.md`
-- `hooks_system.md`
-
-## Guardrails
-
-- Do NOT skip prerequisite verification if this step has dependencies
-- Do NOT produce partial outputs; complete all required outputs before finishing
-- Do NOT proceed without required inputs; ask the user if any are missing
-- Do NOT modify files outside the scope of this step's defined outputs
-
-## On Completion
-
-1. Verify outputs are created
-2. Inform user: "integrate step 1/4 complete, outputs: cli_configuration.md, hooks_system.md"
-3. **Continue workflow**: Use Skill tool to invoke `/add_platform.add_capabilities`
-
----
-
-**Reference files**: `.deepwork/jobs/add_platform/job.yml`, `.deepwork/jobs/add_platform/steps/research.md`
\ No newline at end of file
diff --git a/.claude/skills/add_platform.verify/SKILL.md b/.claude/skills/add_platform.verify/SKILL.md
deleted file mode 100644
index 27101223..00000000
--- a/.claude/skills/add_platform.verify/SKILL.md
+++ /dev/null
@@ -1,191 +0,0 @@
----
-name: add_platform.verify
-description: "Sets up platform directories and verifies deepwork install works correctly. Use after implementation to confirm integration."
-user-invocable: false
-
----
-
-# add_platform.verify
-
-**Step 4/4** in **integrate** workflow
-
-> Full workflow to integrate a new AI platform into DeepWork
-
-> Adds a new AI platform to DeepWork with adapter, templates, and tests. Use when integrating Cursor, Windsurf, or other AI coding tools.
-
-## Prerequisites (Verify First)
-
-Before proceeding, confirm these steps are complete:
-- `/add_platform.implement`
-
-## Instructions
-
-**Goal**: Sets up platform directories and verifies deepwork install works correctly. Use after implementation to confirm integration.
-
-# Verify Installation
-
-## Objective
-
-Ensure the new platform integration works correctly by setting up necessary directories and running the full installation process.
-
-## Task
-
-Perform end-to-end verification that the new platform can be installed and that DeepWork's standard jobs work correctly with it.
-
-### Prerequisites
-
-Ensure the implementation step is complete:
-- Adapter class exists in `src/deepwork/adapters.py`
-- Templates exist in `src/deepwork/templates/<platform_name>/`
-- Tests pass with 100% coverage
-- README.md is updated
-
-### Process
-
-1. **Set up platform directories in the DeepWork repo**
-
-   The DeepWork repository itself should have the platform's command directory structure for testing:
-
-   ```bash
-   mkdir -p <platform_command_directory>
-   ```
-
-   For example:
-   - Claude: `.claude/commands/`
-   - Cursor: `.cursor/commands/` (or wherever Cursor stores commands)
-
-2. **Run deepwork install for the new platform**
-
-   ```bash
-   deepwork install --platform <platform_name>
-   ```
-
-   Verify:
-   - Command completes without errors
-   - No Python exceptions or tracebacks
-   - Output indicates successful installation
-
-3. **Check that command files were created**
-
-   List the generated command files:
-   ```bash
-   ls -la <platform_command_directory>/
-   ```
-
-   Verify:
-   - `deepwork_jobs.define.md` exists (or equivalent for the platform)
-   - `deepwork_jobs.implement.md` exists
-   - `deepwork_jobs.refine.md` exists
-   - `deepwork_rules.define.md` exists
-   - All expected step commands exist
-
-4. **Validate command file content**
-
-   Read each generated command file and verify:
-   - Content matches the expected format for the platform
-   - Job metadata is correctly included
-   - Step instructions are properly rendered
-   - Any platform-specific features (hooks, frontmatter) are present
-
-5. **Test alongside existing platforms**
-
-   If other platforms are already installed, verify they still work:
-   ```bash
-   deepwork install --platform claude
-   ls -la .claude/commands/
-   ```
-
-   Ensure:
-   - New platform doesn't break existing installations
-   - Each platform's commands are independent
-   - No file conflicts or overwrites
-
-## Quality Criteria
-
-- Platform-specific directories are set up in the DeepWork repo
-- `deepwork install --platform <platform_name>` completes without errors
-- All expected command files are created:
-  - deepwork_jobs.define, implement, refine
-  - deepwork_rules.define
-  - Any other standard job commands
-- Command file content is correct:
-  - Matches platform's expected format
-  - Job/step information is properly rendered
-  - No template errors or missing content
-- Existing platforms still work (if applicable)
-- No conflicts between platforms
-- When all criteria are met, include `<promise>✓ Quality Criteria Met</promise>` in your response
-
-## Context
-
-This is the final validation step before the platform is considered complete. A thorough verification ensures:
-- The platform actually works, not just compiles
-- Standard DeepWork jobs install correctly
-- The platform integrates properly with the existing system
-- Users can confidently use the new platform
-
-Take time to verify each aspect - finding issues now is much better than having users discover them later.
-
-## Common Issues to Check
-
-- **Template syntax errors**: May only appear when rendering specific content
-- **Path issues**: Platform might expect different directory structure
-- **Encoding issues**: Special characters in templates or content
-- **Missing hooks**: Platform adapter might not handle all hook types
-- **Permission issues**: Directory creation might fail in some cases
-
-
-### Job Context
-
-A workflow for adding support for a new AI platform (like Cursor, Windsurf, etc.) to DeepWork.
-
-The **integrate** workflow guides you through four phases:
-1. **Research**: Capture the platform's CLI configuration and hooks system documentation
-2. **Add Capabilities**: Update the job schema and adapters with any new hook events
-3. **Implement**: Create the platform adapter, templates, tests (100% coverage), and README updates
-4. **Verify**: Ensure installation works correctly and produces expected files
-
-The workflow ensures consistency across all supported platforms and maintains
-comprehensive test coverage for new functionality.
-
-**Important Notes**:
-- Only hooks available on slash command definitions should be captured
-- Each existing adapter must be updated when new hooks are added (typically with null values)
-- Tests must achieve 100% coverage for any new functionality
-- Installation verification confirms the platform integrates correctly with existing jobs
-
-
-## Required Inputs
-
-
-**Files from Previous Steps** - Read these first:
-- `templates/` (from `implement`)
-
-## Work Branch
-
-Use branch format: `deepwork/add_platform-[instance]-YYYYMMDD`
-
-- If on a matching work branch: continue using it
-- If on main/master: create new branch with `git checkout -b deepwork/add_platform-[instance]-$(date +%Y%m%d)`
-
-## Outputs
-
-**Required outputs**:
-- `verification_checklist.md`
-
-## Guardrails
-
-- Do NOT skip prerequisite verification if this step has dependencies
-- Do NOT produce partial outputs; complete all required outputs before finishing
-- Do NOT proceed without required inputs; ask the user if any are missing
-- Do NOT modify files outside the scope of this step's defined outputs
-
-## On Completion
-
-1. Verify outputs are created
-2. Inform user: "integrate step 4/4 complete, outputs: verification_checklist.md"
-3. **integrate workflow complete**: All steps finished. Consider creating a PR to merge the work branch.
-
----
-
-**Reference files**: `.deepwork/jobs/add_platform/job.yml`, `.deepwork/jobs/add_platform/steps/verify.md`
\ No newline at end of file
diff --git a/.claude/skills/add_platform/SKILL.md b/.claude/skills/add_platform/SKILL.md
deleted file mode 100644
index 474bb3a8..00000000
--- a/.claude/skills/add_platform/SKILL.md
+++ /dev/null
@@ -1,83 +0,0 @@
----
-name: add_platform
-description: "Adds a new AI platform to DeepWork with adapter, templates, and tests. Use when integrating Cursor, Windsurf, or other AI coding tools."
----
-
-# add_platform
-
-Adds a new AI platform to DeepWork with adapter, templates, and tests. Use when integrating Cursor, Windsurf, or other AI coding tools.
-
-> **CRITICAL**: Always invoke steps using the Skill tool. Never copy/paste step instructions directly.
-
-A workflow for adding support for a new AI platform (like Cursor, Windsurf, etc.) to DeepWork.
-
-The **integrate** workflow guides you through four phases:
-1. **Research**: Capture the platform's CLI configuration and hooks system documentation
-2. **Add Capabilities**: Update the job schema and adapters with any new hook events
-3. **Implement**: Create the platform adapter, templates, tests (100% coverage), and README updates
-4. **Verify**: Ensure installation works correctly and produces expected files
-
-The workflow ensures consistency across all supported platforms and maintains
-comprehensive test coverage for new functionality.
-
-**Important Notes**:
-- Only hooks available on slash command definitions should be captured
-- Each existing adapter must be updated when new hooks are added (typically with null values)
-- Tests must achieve 100% coverage for any new functionality
-- Installation verification confirms the platform integrates correctly with existing jobs
-
-
-## Workflows
-
-### integrate
-
-Full workflow to integrate a new AI platform into DeepWork
-
-**Steps in order**:
-1. **research** - Captures CLI configuration and hooks system documentation for the new platform. Use when starting platform integration.
-2. **add_capabilities** - Updates job schema and adapters with any new hook events the platform supports. Use after research to extend DeepWork's hook system.
-3. **implement** - Creates platform adapter, templates, tests with 100% coverage, and README documentation. Use after adding hook capabilities.
-4. **verify** - Sets up platform directories and verifies deepwork install works correctly. Use after implementation to confirm integration.
-
-**Start workflow**: `/add_platform.research`
-
-
-## Execution Instructions
-
-### Step 1: Analyze Intent
-
-Parse any text following `/add_platform` to determine user intent:
-- "integrate" or related terms → start integrate workflow at `add_platform.research`
-
-### Step 2: Invoke Starting Step
-
-Use the Skill tool to invoke the identified starting step:
-```
-Skill tool: add_platform.research
-```
-
-### Step 3: Continue Workflow Automatically
-
-After each step completes:
-1. Check if there's a next step in the workflow sequence
-2. Invoke the next step using the Skill tool
-3. Repeat until workflow is complete or user intervenes
-
-**Note**: Standalone skills do not auto-continue to other steps.
-
-### Handling Ambiguous Intent
-
-If user intent is unclear, use AskUserQuestion to clarify:
-- Present available workflows and standalone skills as options
-- Let user select the starting point
-
-## Guardrails
-
-- Do NOT copy/paste step instructions directly; always use the Skill tool to invoke steps
-- Do NOT skip steps in a workflow unless the user explicitly requests it
-- Do NOT proceed to the next step if the current step's outputs are incomplete
-- Do NOT make assumptions about user intent; ask for clarification when ambiguous
-
-## Context Files
-
-- Job definition: `.deepwork/jobs/add_platform/job.yml`
\ No newline at end of file
diff --git a/.claude/skills/commit.commit_and_push/SKILL.md b/.claude/skills/commit.commit_and_push/SKILL.md
deleted file mode 100644
index 1ec4e5c9..00000000
--- a/.claude/skills/commit.commit_and_push/SKILL.md
+++ /dev/null
@@ -1,179 +0,0 @@
----
-name: commit.commit_and_push
-description: "Verifies changed files, creates commit, and pushes to remote. Use after linting passes to finalize changes."
-user-invocable: false
-
----
-
-# commit.commit_and_push
-
-**Step 4/4** in **full** workflow
-
-> Full commit workflow: review, test, lint, and commit
-
-> Reviews code, runs tests, lints, and commits changes. Use when ready to commit work with quality checks.
-
-## Prerequisites (Verify First)
-
-Before proceeding, confirm these steps are complete:
-- `/commit.lint`
-
-## Instructions
-
-**Goal**: Verifies changed files, creates commit, and pushes to remote. Use after linting passes to finalize changes.
-
-# Commit and Push
-
-## Objective
-
-Review the changed files to verify they match the agent's expectations, create a commit with an appropriate message, and push to the remote repository.
-
-## Task
-
-Check the list of changed files against what was modified during this session, ensure they match expectations, then commit and push the changes.
-
-### Process
-
-1. **Get the list of changed files**
-   ```bash
-   git status
-   ```
-   Also run `git diff --stat` to see a summary of changes.
-
-2. **Verify changes match expectations**
-
-   Compare the changed files against what you modified during this session:
-   - Do the modified files match what you edited?
-   - Are there any unexpected new files?
-   - Are there any unexpected deleted files?
-   - Do the line counts seem reasonable for the changes you made?
-
-   If changes match expectations, proceed to the next step.
-
-   If there are unexpected changes:
-   - Investigate why (e.g., lint auto-fixes, generated files)
-   - If they're legitimate side effects of your work, include them
-   - If they're unrelated or shouldn't be committed, use `git restore` to discard them
-
-3. **Update CHANGELOG.md if needed**
-
-   If your changes include new features, bug fixes, or other notable changes:
-   - Add entries to the `## [Unreleased]` section of CHANGELOG.md
-   - Use the appropriate subsection: `### Added`, `### Changed`, `### Fixed`, or `### Removed`
-   - Write concise descriptions that explain the user-facing impact
-
-   **CRITICAL: NEVER modify version numbers**
-   - Do NOT change the version in `pyproject.toml`
-   - Do NOT change version headers in CHANGELOG.md (e.g., `## [0.4.2]`)
-   - Do NOT rename the `## [Unreleased]` section
-   - Version updates are handled by the release workflow, not commits
-
-4. **Stage all appropriate changes**
-   ```bash
-   git add -A
-   ```
-   Or stage specific files if some were excluded.
-
-5. **View recent commit messages for style reference**
-   ```bash
-   git log --oneline -10
-   ```
-
-6. **Create the commit**
-
-   Generate an appropriate commit message based on:
-   - The changes made
-   - The style of recent commits
-   - Conventional commit format if the project uses it
-
-   **IMPORTANT:** Use the commit job script (not `git commit` directly):
-   ```bash
-   .claude/hooks/commit_job_git_commit.sh -m "commit message here"
-   ```
-
-7. **Push to remote**
-   ```bash
-   git push
-   ```
-   If the branch has no upstream, use:
-   ```bash
-   git push -u origin HEAD
-   ```
-
-## Quality Criteria
-
-- Changed files were verified against expectations
-- CHANGELOG.md was updated with entries in [Unreleased] section (if changes warrant documentation)
-- Version numbers were NOT modified (pyproject.toml version and CHANGELOG version headers unchanged)
-- Commit was created with appropriate message
-- Changes were pushed to remote
-
-## Context
-
-This is the final step of the commit workflow. The agent verifies that the changed files match its own expectations from the work done during the session, then commits and pushes. This catches unexpected changes while avoiding unnecessary user interruptions.
-
-
-### Job Context
-
-A workflow for preparing and committing code changes with quality checks.
-
-The **full** workflow starts with a code review to catch issues early, runs tests until
-they pass, formats and lints code with ruff, then reviews changed files
-before committing and pushing. The review and lint steps use sub-agents
-to reduce context usage.
-
-Steps:
-1. review - Code review for issues, DRY opportunities, naming, and test coverage (runs in sub-agent)
-2. test - Pull latest code and run tests until they pass
-3. lint - Format and lint code with ruff (runs in sub-agent)
-4. commit_and_push - Review changes and commit/push
-
-
-
-## Work Branch
-
-Use branch format: `deepwork/commit-[instance]-YYYYMMDD`
-
-- If on a matching work branch: continue using it
-- If on main/master: create new branch with `git checkout -b deepwork/commit-[instance]-$(date +%Y%m%d)`
-
-## Outputs
-
-**Required outputs**:
-- `changes_committed`
-
-## Guardrails
-
-- Do NOT skip prerequisite verification if this step has dependencies
-- Do NOT produce partial outputs; complete all required outputs before finishing
-- Do NOT proceed without required inputs; ask the user if any are missing
-- Do NOT modify files outside the scope of this step's defined outputs
-
-## Quality Validation
-
-**Before completing this step, you MUST have your work reviewed against the quality criteria below.**
-
-Use a sub-agent (Haiku model) to review your work against these criteria:
-
-**Criteria (all must be satisfied)**:
-1. Changed files were verified against expectations
-2. CHANGELOG.md was updated with entries in [Unreleased] section (if changes warrant documentation)
-3. Version numbers were NOT modified (pyproject.toml version and CHANGELOG version headers unchanged)
-4. Commit was created with appropriate message
-5. Changes were pushed to remote
-**Review Process**:
-1. Once you believe your work is complete, spawn a sub-agent using Haiku to review your work against the quality criteria above
-2. The sub-agent should examine your outputs and verify each criterion is met
-3. If the sub-agent identifies valid issues, fix them
-4. Have the sub-agent review again until all valid feedback has been addressed
-5. Only mark the step complete when the sub-agent confirms all criteria are satisfied
-
-## On Completion
-
-1. Verify outputs are created
-2. Inform user: "full step 4/4 complete, outputs: changes_committed"
-3. **full workflow complete**: All steps finished. Consider creating a PR to merge the work branch.
-
----
-
-**Reference files**: `.deepwork/jobs/commit/job.yml`, `.deepwork/jobs/commit/steps/commit_and_push.md`
\ No newline at end of file
diff --git a/.claude/skills/commit.lint/SKILL.md b/.claude/skills/commit.lint/SKILL.md
deleted file mode 100644
index 0f16873e..00000000
--- a/.claude/skills/commit.lint/SKILL.md
+++ /dev/null
@@ -1,158 +0,0 @@
----
-name: commit.lint
-description: "Formats and lints code with ruff using a sub-agent. Use after tests pass to ensure code style compliance."
-user-invocable: false
-
----
-
-# commit.lint
-
-**Step 3/4** in **full** workflow
-
-> Full commit workflow: review, test, lint, and commit
-
-> Reviews code, runs tests, lints, and commits changes. Use when ready to commit work with quality checks.
-
-## Prerequisites (Verify First)
-
-Before proceeding, confirm these steps are complete:
-- `/commit.test`
-
-## Instructions
-
-**Goal**: Formats and lints code with ruff using a sub-agent. Use after tests pass to ensure code style compliance.
-
-# Lint Code
-
-## Objective
-
-Format and lint the codebase using ruff to ensure code quality and consistency.
-
-## Task
-
-Run ruff format and ruff check to format and lint the code. This step should be executed using a sub-agent to conserve context in the main conversation.
-
-### Process
-
-**IMPORTANT**: Use the Task tool to spawn a sub-agent for this work. This saves context in the main conversation. Use the `haiku` model for speed.
-
-1. **Spawn a sub-agent to run linting**
-
-   Use the Task tool with these parameters:
-   - `subagent_type`: "Bash"
-   - `model`: "haiku"
-   - `prompt`: See below
-
-   The sub-agent should:
-
-   a. **Run ruff format**
-      ```bash
-      ruff format .
-      ```
-      This formats the code according to ruff's style rules.
-
-   b. **Run ruff check with auto-fix**
-      ```bash
-      ruff check --fix .
-      ```
-      This checks for lint errors and automatically fixes what it can.
-
-   c. **Run ruff check again to verify**
-      ```bash
-      ruff check .
-      ```
-      Capture the final output to verify no remaining issues.
-
-2. **Review sub-agent results**
-   - Check that both format and check completed successfully
-   - Note any remaining lint issues that couldn't be auto-fixed
-
-3. **Handle remaining issues**
-   - If there are lint errors that couldn't be auto-fixed, fix them manually
-   - Re-run ruff check to verify
-
-## Example Sub-Agent Prompt
-
-```
-Run ruff to format and lint the codebase:
-
-1. Run: ruff format .
-2. Run: ruff check --fix .
-3. Run: ruff check . (to verify no remaining issues)
-
-Report the results of each command.
-```
-
-## Quality Criteria
-
-- ruff format was run successfully
-- ruff check was run with --fix flag
-- No remaining lint errors
-
-## Context
-
-This step ensures code quality and consistency before committing. It runs after tests pass and before the commit step. Using a sub-agent keeps the main conversation context clean for the commit review.
-
-
-### Job Context
-
-A workflow for preparing and committing code changes with quality checks.
-
-The **full** workflow starts with a code review to catch issues early, runs tests until
-they pass, formats and lints code with ruff, then reviews changed files
-before committing and pushing. The review and lint steps use sub-agents
-to reduce context usage.
-
-Steps:
-1. review - Code review for issues, DRY opportunities, naming, and test coverage (runs in sub-agent)
-2. test - Pull latest code and run tests until they pass
-3. lint - Format and lint code with ruff (runs in sub-agent)
-4. commit_and_push - Review changes and commit/push
-
-
-
-## Work Branch
-
-Use branch format: `deepwork/commit-[instance]-YYYYMMDD`
-
-- If on a matching work branch: continue using it
-- If on main/master: create new branch with `git checkout -b deepwork/commit-[instance]-$(date +%Y%m%d)`
-
-## Outputs
-
-**Required outputs**:
-- `code_formatted`
-
-## Guardrails
-
-- Do NOT skip prerequisite verification if this step has dependencies
-- Do NOT produce partial outputs; complete all required outputs before finishing
-- Do NOT proceed without required inputs; ask the user if any are missing
-- Do NOT modify files outside the scope of this step's defined outputs
-
-## Quality Validation
-
-**Before completing this step, you MUST have your work reviewed against the quality criteria below.**
-
-Use a sub-agent (Haiku model) to review your work against these criteria:
-
-**Criteria (all must be satisfied)**:
-1. ruff format was run successfully
-2. ruff check was run with --fix flag
-3. No remaining lint errors
-**Review Process**:
-1. Once you believe your work is complete, spawn a sub-agent using Haiku to review your work against the quality criteria above
-2. The sub-agent should examine your outputs and verify each criterion is met
-3. If the sub-agent identifies valid issues, fix them
-4. Have the sub-agent review again until all valid feedback has been addressed
-5. Only mark the step complete when the sub-agent confirms all criteria are satisfied
-
-## On Completion
-
-1. Verify outputs are created
-2. Inform user: "full step 3/4 complete, outputs: code_formatted"
-3. **Continue workflow**: Use Skill tool to invoke `/commit.commit_and_push`
-
----
-
-**Reference files**: `.deepwork/jobs/commit/job.yml`, `.deepwork/jobs/commit/steps/lint.md`
\ No newline at end of file
diff --git a/.claude/skills/commit.review/SKILL.md b/.claude/skills/commit.review/SKILL.md
deleted file mode 100644
index f4074077..00000000
--- a/.claude/skills/commit.review/SKILL.md
+++ /dev/null
@@ -1,139 +0,0 @@
----
-name: commit.review
-description: "Reviews changed code for issues, DRY opportunities, naming clarity, and test coverage using a sub-agent. Use as the first step before testing."
-user-invocable: false
-
----
-
-# commit.review
-
-**Step 1/4** in **full** workflow
-
-> Full commit workflow: review, test, lint, and commit
-
-> Reviews code, runs tests, lints, and commits changes. Use when ready to commit work with quality checks.
-
-
-## Instructions
-
-**Goal**: Reviews changed code for issues, DRY opportunities, naming clarity, and test coverage using a sub-agent. Use as the first step before testing.
-
-# Code Review
-
-## Objective
-
-Review changed code for quality issues before running tests. This catches problems early and ensures code meets quality standards.
-
-## Task
-
-Use a sub-agent to review the staged/changed code and identify issues that should be fixed before committing.
-
-### Process
-
-**IMPORTANT**: Use the Task tool to spawn a sub-agent for this review. This saves context in the main conversation.
-
-1. **Get the list of changed files**
-   ```bash
-   git diff --name-only HEAD
-   git diff --name-only --staged
-   ```
-   Combine these to get all files that have been modified.
-
-2. **Spawn a sub-agent to review the code**
-
-   Use the Task tool with these parameters:
-   - `subagent_type`: "general-purpose"
-   - `prompt`: Instruct the sub-agent to:
-     - Read the code review standards from `doc/code_review_standards.md`
-     - Read each of the changed files
-     - Review each file against the standards
-     - Report issues found with file, line number, severity, and suggested fix
-
-3. **Review sub-agent findings**
-   - Examine each issue identified
-   - Prioritize issues by severity
-
-4. **Fix identified issues**
-   - Address each issue found by the review
-   - For DRY violations: extract shared code into functions/modules
-   - For naming issues: rename to be clearer
-   - For missing tests: add appropriate test cases
-   - For bugs: fix the underlying issue
-
-5. **Re-run review if significant changes made**
-   - If you made substantial changes, consider running another review pass
-   - Ensure fixes didn't introduce new issues
-
-## Quality Criteria
-
-- Changed files were identified
-- Sub-agent read the code review standards and reviewed all changed files
-- All identified issues were addressed or documented as intentional
-
-## Context
-
-This is the first step of the commit workflow. Code review happens before tests to catch quality issues early. The sub-agent approach keeps the main conversation context clean while providing thorough review coverage.
-
-
-### Job Context
-
-A workflow for preparing and committing code changes with quality checks.
-
-The **full** workflow starts with a code review to catch issues early, runs tests until
-they pass, formats and lints code with ruff, then reviews changed files
-before committing and pushing. The review and lint steps use sub-agents
-to reduce context usage.
-
-Steps:
-1. review - Code review for issues, DRY opportunities, naming, and test coverage (runs in sub-agent)
-2. test - Pull latest code and run tests until they pass
-3. lint - Format and lint code with ruff (runs in sub-agent)
-4. commit_and_push - Review changes and commit/push
-
-
-
-## Work Branch
-
-Use branch format: `deepwork/commit-[instance]-YYYYMMDD`
-
-- If on a matching work branch: continue using it
-- If on main/master: create new branch with `git checkout -b deepwork/commit-[instance]-$(date +%Y%m%d)`
-
-## Outputs
-
-**Required outputs**:
-- `code_reviewed`
-
-## Guardrails
-
-- Do NOT skip prerequisite verification if this step has dependencies
-- Do NOT produce partial outputs; complete all required outputs before finishing
-- Do NOT proceed without required inputs; ask the user if any are missing
-- Do NOT modify files outside the scope of this step's defined outputs
-
-## Quality Validation
-
-**Before completing this step, you MUST have your work reviewed against the quality criteria below.**
-
-Use a sub-agent (Haiku model) to review your work against these criteria:
-
-**Criteria (all must be satisfied)**:
-1. Changed files were identified
-2. Sub-agent reviewed the code for general issues, DRY opportunities, naming clarity, and test coverage
-3. All identified issues were addressed or documented as intentional
-**Review Process**:
-1. Once you believe your work is complete, spawn a sub-agent using Haiku to review your work against the quality criteria above
-2. The sub-agent should examine your outputs and verify each criterion is met
-3. If the sub-agent identifies valid issues, fix them
-4. Have the sub-agent review again until all valid feedback has been addressed
-5. Only mark the step complete when the sub-agent confirms all criteria are satisfied
-
-## On Completion
-
-1. Verify outputs are created
-2. Inform user: "full step 1/4 complete, outputs: code_reviewed"
-3. **Continue workflow**: Use Skill tool to invoke `/commit.test`
-
----
-
-**Reference files**: `.deepwork/jobs/commit/job.yml`, `.deepwork/jobs/commit/steps/review.md`
\ No newline at end of file
diff --git a/.claude/skills/commit.test/SKILL.md b/.claude/skills/commit.test/SKILL.md
deleted file mode 100644
index cddf224a..00000000
--- a/.claude/skills/commit.test/SKILL.md
+++ /dev/null
@@ -1,138 +0,0 @@
----
-name: commit.test
-description: "Pulls latest code and runs tests until all pass. Use after code review passes to verify changes work correctly."
-user-invocable: false
-
----
-
-# commit.test
-
-**Step 2/4** in **full** workflow
-
-> Full commit workflow: review, test, lint, and commit
-
-> Reviews code, runs tests, lints, and commits changes. Use when ready to commit work with quality checks.
-
-## Prerequisites (Verify First)
-
-Before proceeding, confirm these steps are complete:
-- `/commit.review`
-
-## Instructions
-
-**Goal**: Pulls latest code and runs tests until all pass. Use after code review passes to verify changes work correctly.
-
-# Run Tests
-
-## Objective
-
-Run the project's test suite and fix any failing tests until all tests pass.
-
-## Task
-
-Execute the test suite for the project and iteratively fix any failures until all tests pass.
-
-### Process
-
-1. **Pull latest code from the branch**
-   - Run `git pull` to fetch and merge any changes from the remote
-   - If there are merge conflicts, resolve them before proceeding
-   - This ensures you're testing against the latest code
-
-2. **Detect or use the test command**
-   - If a test command was provided, use that
-   - Otherwise, auto-detect the project type and determine the appropriate test command:
-     - Python: `pytest`, `python -m pytest`, `uv run pytest`
-     - Node.js: `npm test`, `yarn test`, `bun test`
-     - Go: `go test ./...`
-     - Rust: `cargo test`
-     - Check `package.json`, `pyproject.toml`, `Cargo.toml`, `go.mod` for hints
-
-3. **Run the tests**
-   - Execute the test command
-   - Capture the output
-
-4. **Analyze failures**
-   - If tests pass, proceed to output
-   - If tests fail, analyze the failure messages
-   - Identify the root cause of each failure
-
-5. **Fix failing tests**
-   - Make the necessary code changes to fix failures
-   - This may involve fixing bugs in implementation code or updating tests
-   - Re-run tests after each fix
-
-6. **Iterate until passing**
-   - Continue the fix/test cycle until all tests pass
-
-## Quality Criteria
-
-- Latest code was pulled from the branch
-- All tests are passing
-
-## Context
-
-This step runs after code review. Tests must pass before proceeding to lint and commit. This ensures code quality and prevents broken code from being committed. If tests fail due to issues introduced by the code review fixes, iterate on the fixes until tests pass.
-
-
-### Job Context
-
-A workflow for preparing and committing code changes with quality checks.
-
-The **full** workflow starts with a code review to catch issues early, runs tests until
-they pass, formats and lints code with ruff, then reviews changed files
-before committing and pushing. The review and lint steps use sub-agents
-to reduce context usage.
-
-Steps:
-1. review - Code review for issues, DRY opportunities, naming, and test coverage (runs in sub-agent)
-2. test - Pull latest code and run tests until they pass
-3. lint - Format and lint code with ruff (runs in sub-agent)
-4. commit_and_push - Review changes and commit/push
-
-
-
-## Work Branch
-
-Use branch format: `deepwork/commit-[instance]-YYYYMMDD`
-
-- If on a matching work branch: continue using it
-- If on main/master: create new branch with `git checkout -b deepwork/commit-[instance]-$(date +%Y%m%d)`
-
-## Outputs
-
-**Required outputs**:
-- `tests_passing`
-
-## Guardrails
-
-- Do NOT skip prerequisite verification if this step has dependencies
-- Do NOT produce partial outputs; complete all required outputs before finishing
-- Do NOT proceed without required inputs; ask the user if any are missing
-- Do NOT modify files outside the scope of this step's defined outputs
-
-## Quality Validation
-
-**Before completing this step, you MUST have your work reviewed against the quality criteria below.**
-
-Use a sub-agent (Haiku model) to review your work against these criteria:
-
-**Criteria (all must be satisfied)**:
-1. Latest code was pulled from the branch
-2. All tests are passing
-**Review Process**:
-1. Once you believe your work is complete, spawn a sub-agent using Haiku to review your work against the quality criteria above
-2. The sub-agent should examine your outputs and verify each criterion is met
-3. If the sub-agent identifies valid issues, fix them
-4. Have the sub-agent review again until all valid feedback has been addressed
-5. Only mark the step complete when the sub-agent confirms all criteria are satisfied
-
-## On Completion
-
-1. Verify outputs are created
-2. Inform user: "full step 2/4 complete, outputs: tests_passing"
-3. **Continue workflow**: Use Skill tool to invoke `/commit.lint`
-
----
-
-**Reference files**: `.deepwork/jobs/commit/job.yml`, `.deepwork/jobs/commit/steps/test.md`
\ No newline at end of file
diff --git a/.claude/skills/commit/SKILL.md b/.claude/skills/commit/SKILL.md
deleted file mode 100644
index 3839ffbd..00000000
--- a/.claude/skills/commit/SKILL.md
+++ /dev/null
@@ -1,79 +0,0 @@
----
-name: commit
-description: "Reviews code, runs tests, lints, and commits changes. Use when ready to commit work with quality checks."
----
-
-# commit
-
-Reviews code, runs tests, lints, and commits changes. Use when ready to commit work with quality checks.
-
-> **CRITICAL**: Always invoke steps using the Skill tool. Never copy/paste step instructions directly.
-
-A workflow for preparing and committing code changes with quality checks.
-
-The **full** workflow starts with a code review to catch issues early, runs tests until
-they pass, formats and lints code with ruff, then reviews changed files
-before committing and pushing. The review and lint steps use sub-agents
-to reduce context usage.
-
-Steps:
-1. review - Code review for issues, DRY opportunities, naming, and test coverage (runs in sub-agent)
-2. test - Pull latest code and run tests until they pass
-3. lint - Format and lint code with ruff (runs in sub-agent)
-4. commit_and_push - Review changes and commit/push
-
-
-## Workflows
-
-### full
-
-Full commit workflow: review, test, lint, and commit
-
-**Steps in order**:
-1. **review** - Reviews changed code for issues, DRY opportunities, naming clarity, and test coverage using a sub-agent. Use as the first step before testing.
-2. **test** - Pulls latest code and runs tests until all pass. Use after code review passes to verify changes work correctly.
-3. **lint** - Formats and lints code with ruff using a sub-agent. Use after tests pass to ensure code style compliance.
-4. **commit_and_push** - Verifies changed files, creates commit, and pushes to remote. Use after linting passes to finalize changes.
-
-**Start workflow**: `/commit.review`
-
-
-## Execution Instructions
-
-### Step 1: Analyze Intent
-
-Parse any text following `/commit` to determine user intent:
-- "full" or related terms → start full workflow at `commit.review`
-
-### Step 2: Invoke Starting Step
-
-Use the Skill tool to invoke the identified starting step:
-```
-Skill tool: commit.review
-```
-
-### Step 3: Continue Workflow Automatically
-
-After each step completes:
-1. Check if there's a next step in the workflow sequence
-2. Invoke the next step using the Skill tool
-3. Repeat until workflow is complete or user intervenes
-
-**Note**: Standalone skills do not auto-continue to other steps.
-
-### Handling Ambiguous Intent
-
-If user intent is unclear, use AskUserQuestion to clarify:
-- Present available workflows and standalone skills as options
-- Let user select the starting point
-
-## Guardrails
-
-- Do NOT copy/paste step instructions directly; always use the Skill tool to invoke steps
-- Do NOT skip steps in a workflow unless the user explicitly requests it
-- Do NOT proceed to the next step if the current step's outputs are incomplete
-- Do NOT make assumptions about user intent; ask for clarification when ambiguous
-
-## Context Files
-
-- Job definition: `.deepwork/jobs/commit/job.yml`
\ No newline at end of file
diff --git a/.claude/skills/deepwork/SKILL.md b/.claude/skills/deepwork/SKILL.md
new file mode 100644
index 00000000..ee3021d2
--- /dev/null
+++ b/.claude/skills/deepwork/SKILL.md
@@ -0,0 +1,29 @@
+---
+name: deepwork
+description: "Start or continue DeepWork workflows using MCP tools"
+---
+
+# DeepWork Workflow Manager
+
+Execute multi-step workflows with quality gate checkpoints.
+
+> **IMPORTANT**: Use the DeepWork MCP server tools. All workflow operations
+> are performed through MCP tool calls and following the instructions they return,
+> not by reading instructions from files.
+
+## How to Use
+
+1. Call `get_workflows` to discover available workflows
+2. Call `start_workflow` with goal, job_name, and workflow_name
+3. Follow the step instructions returned
+4. Call `finished_step` with your outputs when done
+5. Handle the response: `needs_work`, `next_step`, or `workflow_complete`
+
+## Intent Parsing
+
+When the user invokes `/deepwork`, parse their intent:
+1. **ALWAYS**: Call `get_workflows` to discover available workflows
+2. Based on the available flows and what the user said in their request, proceed:
+    - **Explicit workflow**: `/deepwork <a workflow name>` → start the `<a workflow name>` workflow
+    - **General request**: `/deepwork <a request>` → infer best match from available workflows
+    - **No context**: `/deepwork` alone → ask user to choose from available workflows
\ No newline at end of file
diff --git a/.claude/skills/deepwork_jobs.define/SKILL.md b/.claude/skills/deepwork_jobs.define/SKILL.md
deleted file mode 100644
index f1469253..00000000
--- a/.claude/skills/deepwork_jobs.define/SKILL.md
+++ /dev/null
@@ -1,724 +0,0 @@
----
-name: deepwork_jobs.define
-description: "Creates a job.yml specification by gathering workflow requirements through structured questions. Use when starting a new multi-step workflow."
-user-invocable: false
-
----
-
-# deepwork_jobs.define
-
-**Step 1/3** in **new_job** workflow
-
-> Create a new DeepWork job from scratch through definition, review, and implementation
-
-> Creates and manages multi-step AI workflows. Use when defining, implementing, or improving DeepWork jobs.
-
-
-## Instructions
-
-**Goal**: Creates a job.yml specification by gathering workflow requirements through structured questions. Use when starting a new multi-step workflow.
-
-# Define Job Specification
-
-## Objective
-
-Create a `job.yml` specification file that defines the structure of a new DeepWork job by thoroughly understanding the user's workflow requirements through an interactive question-and-answer process.
-
-## Task
-
-Guide the user through defining a job specification by asking structured questions. **Do not attempt to create the specification without first fully understanding the user's needs.**
-
-**Important**: Use the AskUserQuestion tool to ask structured questions when gathering information from the user. This provides a better user experience with clear options and guided choices.
-
-The output of this step is **only** the `job.yml` file - a complete specification of the workflow. The actual step instruction files will be created in the next step (`implement`).
-
-### Step 1: Understand the Job Purpose
-
-Start by asking structured questions to understand what the user wants to accomplish:
-
-1. **What is the overall goal of this workflow?**
-   - What complex task are they trying to accomplish?
-   - What domain is this in? (e.g., research, marketing, development, reporting)
-   - How often will they run this workflow?
-
-2. **What does success look like?**
-   - What's the final deliverable or outcome?
-   - Who is the audience for the output?
-   - What quality criteria matter most?
-
-3. **What are the major phases?**
-   - Ask them to describe the workflow at a high level
-   - What are the distinct stages from start to finish?
-   - Are there any dependencies between phases?
-
-### Step 1.5: Detect Document-Oriented Workflows
-
-**Check for document-focused patterns** in the user's description:
-- Keywords: "report", "summary", "document", "create", "monthly", "quarterly", "for stakeholders", "for leadership"
-- Final deliverable is a specific document (e.g., "AWS spending report", "competitive analysis", "sprint summary")
-- Recurring documents with consistent structure
-
-**If a document-oriented workflow is detected:**
-
-1. Inform the user: "This workflow produces a specific document type. I recommend defining a doc spec first to ensure consistent quality."
-
-2. Ask structured questions to understand if they want to:
-   - Create a doc spec for this document
-   - Use an existing doc spec (if any exist in `.deepwork/doc_specs/`)
-   - Skip doc spec and proceed with simple outputs
-
-### Step 1.6: Define the Doc Spec (if needed)
-
-When creating a doc spec, gather the following information:
-
-1. **Document Identity**
-   - What is the document called? (e.g., "Monthly AWS Spending Report")
-   - Brief description of its purpose
-   - Where should these documents be stored? (path patterns like `finance/aws-reports/*.md`)
-
-2. **Audience and Context**
-   - Who reads this document? (target audience)
-   - How often is it produced? (frequency)
-
-3. **Quality Criteria** (3-5 criteria, each with name and description)
-
-   **Important**: Doc spec quality criteria define requirements for the **output document itself**, not the process of creating it. Focus on what the finished document must contain or achieve.
-
-   Examples for a spending report:
-   - **Visualization**: Must include charts showing spend breakdown by service
-   - **Variance Analysis**: Must compare current month against previous with percentages
-   - **Action Items**: Must include recommended cost optimization actions
-
-   **Note**: When a doc spec is created for a step's output, the step should generally NOT have separate `quality_criteria` in the job.yml. The doc spec's criteria cover output quality. Only add step-level quality_criteria if there are essential process requirements (e.g., "must use specific tool"), and minimize these when possible.
-
-4. **Document Structure**
-   - What sections should it have?
-   - Any required elements (tables, charts, summaries)?
-
-### Step 1.7: Create the doc spec File (if needed)
-
-Create the doc spec file at `.deepwork/doc_specs/[doc_spec_name].md`:
-
-**Template reference**: See `.deepwork/jobs/deepwork_jobs/templates/doc_spec.md.template` for the standard structure.
-
-**Complete example**: See `.deepwork/doc_specs/job_spec.md` for a fully worked example (the doc spec for job.yml files).
-
-After creating the doc spec, proceed to Step 2 with the doc spec reference for the final step's output.
-
-### Step 2: Define Each Step
-
-For each major phase they mentioned, ask structured questions to gather details:
-
-1. **Step Purpose**
-   - What exactly does this step accomplish?
-   - What is the input to this step?
-   - What is the output from this step?
-
-2. **Step Inputs**
-   - What information is needed to start this step?
-   - Does it need user-provided parameters? (e.g., topic, target audience)
-   - Does it need files from previous steps?
-   - What format should inputs be in?
-
-3. **Step Outputs**
-   - What files or artifacts does this step produce?
-   - What format should the output be in? (markdown, YAML, JSON, etc.)
-   - Where should each output be saved? (filename/path)
-   - Should outputs be organized in subdirectories? (e.g., `reports/`, `data/`, `drafts/`)
-   - Will other steps need this output?
-   - **Does this output have a doc spec?** If a doc spec was created in Step 1.6/1.7, reference it for the appropriate output
-
-   #### Work Product Storage Guidelines
-
-   **Key principle**: Job outputs belong in the main repository directory structure, not in dot-directories. The `.deepwork/` directory is for job definitions and configuration only.
-
-   **Why this matters**:
-   - **Version control**: Work products in the main repo are tracked by git and visible in PRs
-   - **Discoverability**: Team members can find outputs without knowing about DeepWork internals
-   - **Tooling compatibility**: IDEs, search tools, and CI/CD work naturally with standard paths
-   - **Glob patterns**: Well-structured paths enable powerful file matching (e.g., `competitive_research/**/*.md`)
-
-   **Good output path patterns**:
-   ```
-   competitive_research/competitors_list.md
-   competitive_research/acme_corp/research.md
-   operations/reports/2026-01/spending_analysis.md
-   docs/api/endpoints.md
-   ```
-
-   **Avoid these patterns**:
-   ```
-   .deepwork/outputs/report.md          # Hidden in dot-directory
-   output.md                            # Too generic, no context
-   research.md                          # Unclear which research
-   temp/draft.md                        # Transient-sounding paths
-   ```
-
-   **Organizing multi-file outputs**:
-   - Use the job name as a top-level folder when outputs are job-specific
-   - Use parameterized paths for per-entity outputs: `competitive_research/[competitor_name]/`
-   - Match existing project conventions when extending a codebase
-
-   **When to include dates in paths**:
-   - **Include date** for periodic outputs where each version is retained (e.g., monthly reports, quarterly reviews, weekly summaries). These accumulate over time and historical versions remain useful.
-     ```
-     operations/reports/2026-01/spending_analysis.md              # Monthly report - keep history
-     hr/employees/[employee_name]/quarterly_reviews/2026-Q1.pdf   # Per-employee quarterly review
-     ```
-   - **Omit date** for current-state outputs that represent the latest understanding and get updated in place. Previous versions live in git history, not separate files.
-     ```
-     competitive_research/acme_corp/swot.md  # Current SWOT - updated over time
-     docs/architecture/overview.md           # Living document
-     ```
-
-   **Supporting materials and intermediate outputs**:
-   - Content generated in earlier steps to support the final output (research notes, data extracts, drafts) should be placed in a `_dataroom` folder that is a peer to the final output
-   - Name the dataroom folder by replacing the file extension with `_dataroom`
-     ```
-     operations/reports/2026-01/spending_analysis.md           # Final output
-     operations/reports/2026-01/spending_analysis_dataroom/    # Supporting materials
-         raw_data.csv
-         vendor_breakdown.md
-         notes.md
-     ```
-   - This keeps supporting materials organized and discoverable without cluttering the main output location
-
-4. **Step Dependencies**
-   - Which previous steps must complete before this one?
-   - Are there any ordering constraints?
-
-5. **Step Process** (high-level understanding)
-   - What are the key activities in this step?
-   - Are there any quality checks or validation needed?
-   - What makes a good vs. bad output for this step?
-
-6. **Agent Delegation** (optional)
-   - Should this step be executed by a specific agent type?
-   - Use the `agent` field when the step should run in a forked context with a specific agent
-   - When `agent` is set, the generated skill automatically includes `context: fork`
-   - Available agent types:
-     - `general-purpose` - Standard agent for multi-step tasks
-
-   ```yaml
-   steps:
-     - id: research_step
-       agent: general-purpose  # Delegates to the general-purpose agent
-   ```
-
-**Note**: You're gathering this information to understand what instructions will be needed, but you won't create the instruction files yet - that happens in the `implement` step.
-
-#### Doc Spec-Aware Output Format
-
-When a step produces a document with a doc spec reference, use this format in job.yml:
-
-```yaml
-outputs:
-  - file: reports/monthly_spending.md
-    doc_spec: .deepwork/doc_specs/monthly_aws_report.md
-```
-
-The doc spec's quality criteria will automatically be included in the generated skill, ensuring consistent document quality.
-
-### Capability Considerations
-
-When defining steps, identify any that require specialized tools:
-
-**Browser Automation**: If any step involves web scraping, form filling, interactive browsing, UI testing, or research requiring website visits, ask the user what browser tools they have available. For Claude Code users, **Claude in Chrome** (Anthropic's browser extension) has been tested with DeepWork and is recommended for new users. Don't assume a default—confirm the tool before designing browser-dependent steps.
-
-### Step 3: Validate the Workflow
-
-After gathering information about all steps:
-
-1. **Review the flow**
-   - Summarize the complete workflow
-   - Show how outputs from one step feed into the next
-   - Ask if anything is missing
-
-2. **Check for gaps**
-   - Are there any steps where the input isn't clearly defined?
-   - Are there any outputs that aren't used by later steps?
-   - Are there circular dependencies?
-
-3. **Confirm details**
-   - Job name (lowercase, underscores, descriptive)
-   - Job summary (one clear sentence, max 200 chars)
-   - Job description (detailed multi-line explanation)
-   - Version number (start with 1.0.0)
-
-### Step 4: Define Quality Validation (Stop Hooks)
-
-For each step, consider whether it would benefit from **quality validation loops**. Stop hooks allow the AI agent to iteratively refine its work until quality criteria are met.
-
-**Ask structured questions about quality validation:**
-- "Are there specific quality criteria that must be met for this step?"
-- "Would you like the agent to validate its work before completing?"
-- "What would make you send the work back for revision?"
-
-**Stop hooks are particularly valuable for:**
-- Steps with complex outputs that need multiple checks
-- Steps where quality is critical (final deliverables)
-- Steps with subjective quality criteria that benefit from AI self-review
-
-**Three types of stop hooks are supported:**
-
-1. **Inline Prompt** (`prompt`) - Best for simple quality criteria
-   ```yaml
-   stop_hooks:
-     - prompt: |
-         Verify the output meets these criteria:
-         1. Contains at least 5 competitors
-         2. Each competitor has a description
-         3. Selection rationale is clear
-   ```
-
-2. **Prompt File** (`prompt_file`) - For detailed/reusable criteria
-   ```yaml
-   stop_hooks:
-     - prompt_file: hooks/quality_check.md
-   ```
-
-3. **Script** (`script`) - For programmatic validation (tests, linting)
-   ```yaml
-   stop_hooks:
-     - script: hooks/run_tests.sh
-   ```
-
-**Multiple hooks can be combined:**
-```yaml
-stop_hooks:
-  - script: hooks/lint_output.sh
-  - prompt: "Verify the content is comprehensive and well-organized"
-```
-
-**Encourage prompt-based hooks** - They leverage the AI's ability to understand context and make nuanced quality judgments. Script hooks are best for objective checks (syntax, format, tests).
-
-### Step 5: Create the Job Directory and Specification
-
-Only after you have complete understanding, create the job directory and `job.yml` file:
-
-**First, create the directory structure** using the `make_new_job.sh` script:
-
-```bash
-.deepwork/jobs/deepwork_jobs/make_new_job.sh [job_name]
-```
-
-This creates:
-- `.deepwork/jobs/[job_name]/` - Main job directory
-- `.deepwork/jobs/[job_name]/steps/` - For step instruction files
-- `.deepwork/jobs/[job_name]/hooks/` - For custom validation scripts
-- `.deepwork/jobs/[job_name]/templates/` - For example file formats
-- `.deepwork/jobs/[job_name]/AGENTS.md` - Job management guidance
-
-**Then create the job.yml file** at `.deepwork/jobs/[job_name]/job.yml`
-
-(Where `[job_name]` is the name of the NEW job you're creating, e.g., `competitive_research`)
-
-**Doc Spec**: See `.deepwork/doc_specs/job_spec.md` for the complete specification with quality criteria.
-
-**Template reference**: See `.deepwork/jobs/deepwork_jobs/templates/job.yml.template` for the standard structure.
-
-**Complete example**: See `.deepwork/jobs/deepwork_jobs/templates/job.yml.example` for a fully worked example.
-
-**Important**:
-- Use lowercase with underscores for job name and step IDs
-- Ensure file inputs reference steps in dependencies
-- No circular dependencies
-- At least one output per step
-- The `summary` should be concise (max 200 chars)
-- The `description` should provide rich context for future refinement
-- Include a `changelog` section with an initial entry for version 1.0.0
-
-## Example Dialog
-
-```
-Claude: I'll help you define a new DeepWork job. Let's start by understanding what you're trying to accomplish.
-
-What complex task or workflow are you trying to create? What's the overall goal?
-
-User: I want to create competitive research reports for my company
-
-Claude: Great! Let's dig into that. A few questions:
-
-1. What's the final deliverable? Is it a written report, a presentation, or something else?
-2. Who is the audience? Your team, executives, clients?
-3. How comprehensive should these reports be?
-
-User: It's a written report for our product team. Should be comprehensive - analyzing 3-5 competitors.
-
-Claude: Perfect. Now let's think about the major phases of creating this report. What are the main stages you go through? For example:
-- Identifying which competitors to analyze?
-- Gathering information about them?
-- Analyzing the data?
-- Writing the report?
-
-Or is your process different?
-
-User: Yes, that's close. I would say:
-1. Identify competitors
-2. Research each one
-3. Create comparison analysis
-4. Write positioning recommendations
-
-Claude: Excellent! Now let's define each step in detail. Starting with "Identify competitors":
-
-1. What inputs do you need to start? Do you already have a market segment in mind, or do you need to specify that?
-2. What should the output look like? A list of competitor names? More detailed?
-3. How many competitors should be identified?
-
-User: I need to specify the market segment and product category as inputs. The output should be a markdown file with 5-7 competitors and brief descriptions of each.
-
-Claude: Perfect. A few more questions about that output:
-1. What should the file be called? Something like `competitors_list.md`?
-2. Should it go in the project root, or in a subdirectory like `research/` or `data/`?
-
-User: Just call it `competitors_list.md` in the project root.
-
-Claude: Great. Now for the "Research each one" step:
-1. Does it use the competitor list from the first step as input?
-2. What kind of research - product features, pricing, marketing, all of the above?
-3. What format should the research output be in?
-4. Where should that research be saved?
-
-[continues for each step...]
-
-[After gathering all information:]
-
-Claude: Let me summarize the workflow I've designed based on our conversation:
-
-**Job: competitive_research**
-Summary: Systematic competitive analysis workflow for product positioning
-
-Description:
-A comprehensive workflow for analyzing competitors in your market segment. This job helps product teams understand the competitive landscape by systematically identifying competitors, researching their offerings, creating comparison matrices, and developing strategic positioning recommendations.
-
-The workflow produces:
-- A vetted list of key competitors
-- Detailed research notes on each competitor
-- A comparison matrix highlighting key differentiators
-- Strategic positioning recommendations
-
-Designed for product teams conducting quarterly competitive analysis.
-
-**Steps:**
-
-Step 1: identify_competitors
-- Input: market_segment (user), product_category (user)
-- Output: competitors_list.md (5-7 competitors with descriptions)
-
-Step 2: research_competitors
-- Input: competitors_list.md (from step 1)
-- Output: research_notes.md (detailed findings for each competitor)
-- Depends on: identify_competitors
-
-Step 3: comparative_analysis
-- Input: research_notes.md (from step 2)
-- Output: comparison_matrix.md (side-by-side analysis)
-- Depends on: research_competitors
-
-Step 4: positioning_recommendations
-- Input: comparison_matrix.md (from step 3)
-- Output: positioning_report.md (strategic recommendations)
-- Depends on: comparative_analysis
-
-Does this accurately capture your workflow? Any changes needed?
-
-User: Yes, that's perfect!
-
-Claude: Great! Creating the job.yml specification now...
-
-[Creates .deepwork/jobs/competitive_research/job.yml with the complete spec]
-
-✓ Job specification created!
-
-**File created:**
-- .deepwork/jobs/competitive_research/job.yml
-
-**Next step:**
-Run `/deepwork_jobs.review_job_spec` to validate the specification against quality criteria.
-```
-
-## Important Guidelines
-
-1. **Focus on specification only** - Don't create instruction files yet
-2. **Ask structured questions** - Never skip the discovery phase; use the AskUserQuestion tool
-3. **Rich context in description** - This helps with future refinement
-4. **Validate understanding** - Summarize and confirm before creating
-5. **Use examples** - Help users understand what good specifications look like
-6. **Understand file organization** - Always ask structured questions about where outputs should be saved and if subdirectories are needed
-
-## Validation Rules
-
-Before creating the job.yml, ensure:
-- Job name: lowercase, underscores, no spaces
-- Version: semantic versioning (1.0.0)
-- Summary: concise, under 200 characters
-- Description: detailed, provides context
-- Step IDs: unique, descriptive, lowercase with underscores
-- Dependencies: must reference existing step IDs
-- File inputs: `from_step` must be in dependencies
-- At least one output per step
-- Outputs can be filenames (e.g., `report.md`) or paths (e.g., `reports/analysis.md`)
-- File paths in outputs should match where files will actually be created
-- No circular dependencies
-
-## Output Format
-
-### job.yml
-
-The complete YAML specification file (example shown in Step 5 above).
-
-**Location**: `.deepwork/jobs/[job_name]/job.yml`
-
-(Where `[job_name]` is the name of the new job being created)
-
-After creating the file:
-1. Inform the user that the specification is complete
-2. Recommend that they review the job.yml file
-3. Tell them to run `/deepwork_jobs.review_job_spec` next
-
-
-
-### Job Context
-
-Core commands for managing DeepWork jobs. These commands help you define new multi-step
-workflows and learn from running them.
-
-The `new_job` workflow guides you through defining and implementing a new job by
-asking structured questions about your workflow, understanding each step's inputs and outputs,
-reviewing the specification, and generating all necessary files.
-
-The `learn` skill reflects on conversations where DeepWork jobs were run, identifies
-confusion or inefficiencies, and improves job instructions. It also captures bespoke
-learnings specific to the current run into AGENTS.md files in the working folder.
-
-
-## Required Inputs
-
-**User Parameters** - Gather from user before starting:
-- **job_purpose**: What complex task or workflow are you trying to accomplish?
-
-
-## Work Branch
-
-Use branch format: `deepwork/deepwork_jobs-[instance]-YYYYMMDD`
-
-- If on a matching work branch: continue using it
-- If on main/master: create new branch with `git checkout -b deepwork/deepwork_jobs-[instance]-$(date +%Y%m%d)`
-
-## Outputs
-
-**Required outputs**:
-- `job.yml`
-  **Doc Spec**: DeepWork Job Specification
-  > YAML specification file that defines a multi-step workflow job for AI agents
-  **Definition**: `.deepwork/doc_specs/job_spec.md`
-  **Target Audience**: AI agents executing jobs and developers defining workflows
-  **Quality Criteria**:
-  1. **Valid Identifier**: Job name must be lowercase with underscores, no spaces or special characters (e.g., `competitive_research`, `monthly_report`)
-  2. **Semantic Version**: Version must follow semantic versioning format X.Y.Z (e.g., `1.0.0`, `2.1.3`)
-  3. **Concise Summary**: Summary must be under 200 characters and clearly describe what the job accomplishes
-  4. **Rich Description**: Description must be multi-line and explain: the problem solved, the process, expected outcomes, and target users
-  5. **Changelog Present**: Must include a changelog array with at least the initial version entry. Changelog should only include one entry per branch at most
-  6. **Complete Steps**: Each step must have: id (lowercase_underscores), name, description, instructions_file, outputs (at least one), and dependencies array
-  7. **Valid Dependencies**: Dependencies must reference existing step IDs with no circular references
-  8. **Input Consistency**: File inputs with `from_step` must reference a step that is in the dependencies array
-  9. **Output Paths**: Outputs must be valid filenames or paths within the main repo directory structure, never in dot-directories like `.deepwork/`. Use specific, descriptive paths that lend themselves to glob patterns (e.g., `competitive_research/acme_corp/swot.md` or `operations/reports/2026-01/spending_analysis.md`). Parameterized paths like `[competitor_name]/` are encouraged for per-entity outputs. Avoid generic names (`output.md`, `analysis.md`) and transient-sounding paths (`temp/`, `draft.md`). Supporting materials for a final output should go in a peer `_dataroom` folder (e.g., `spending_analysis_dataroom/`).
-  10. **Concise Instructions**: The content of the file, particularly the description, must not have excessively redundant information. It should be concise and to the point given that extra tokens will confuse the AI.
-
-  <details>
-  <summary>Example Document Structure</summary>
-
-  ```markdown
-  # DeepWork Job Specification: [job_name]
-
-  A `job.yml` file defines a complete multi-step workflow that AI agents can execute. Each job breaks down a complex task into reviewable steps with clear inputs and outputs.
-
-  ## Required Fields
-
-  ### Top-Level Metadata
-
-  ```yaml
-  name: job_name                    # lowercase, underscores only
-  version: "1.0.0"                  # semantic versioning
-  summary: "Brief description"      # max 200 characters
-  description: |                    # detailed multi-line explanation
-    [Explain what this workflow does, why it exists,
-    what outputs it produces, and who should use it]
-  ```
-
-  ### Changelog
-
-  ```yaml
-  changelog:
-    - version: "1.0.0"
-      changes: "Initial job creation"
-    - version: "1.1.0"
-      changes: "Added quality validation hooks"
-  ```
-
-  ### Steps Array
-
-  ```yaml
-  steps:
-    - id: step_id                   # unique, lowercase_underscores
-      name: "Human Readable Name"
-      description: "What this step accomplishes"
-      instructions_file: steps/step_id.md
-      inputs:
-        # User-provided inputs:
-        - name: param_name
-          description: "What the user provides"
-        # File inputs from previous steps:
-        - file: output.md
-          from_step: previous_step_id
-      outputs:
-        - competitive_research/competitors_list.md           # descriptive path
-        - competitive_research/[competitor_name]/research.md # parameterized path
-        # With doc spec reference:
-        - file: competitive_research/final_report.md
-          doc_spec: .deepwork/doc_specs/report_type.md
-      dependencies:
-        - previous_step_id          # steps that must complete first
-  ```
-
-  ## Optional Fields
-
-  ### Exposed Steps
-
-  ```yaml
-  steps:
-    - id: learn
-      exposed: true                 # Makes step available without running dependencies
-  ```
-
-  ### Agent Delegation
-
-  When a step should be executed by a specific agent type, use the `agent` field. This automatically sets `context: fork` in the generated skill.
-
-  ```yaml
-  steps:
-    - id: research_step
-      agent: general-purpose        # Delegates to the general-purpose agent
-  ```
-
-  Available agent types:
-  - `general-purpose` - Standard agent for multi-step tasks
-
-  ### Quality Hooks
-
-  ```yaml
-  steps:
-    - id: step_id
-      hooks:
-        after_agent:
-          # Inline prompt for quality validation:
-          - prompt: |
-              Verify the output meets criteria:
-              1. [Criterion 1]
-              2. [Criterion 2]
-              If ALL criteria are met, include `<promise>...</promise>`.
-          # External prompt file:
-          - prompt_file: hooks/quality_check.md
-          # Script for programmatic validation:
-          - script: hooks/run_tests.sh
-  ```
-
-  ### Stop Hooks (Legacy)
-
-  ```yaml
-  steps:
-    - id: step_id
-      stop_hooks:
-        - prompt: "Validation prompt..."
-        - prompt_file: hooks/check.md
-        - script: hooks/validate.sh
-  ```
-
-  ## Validation Rules
-
-  1. **No circular dependencies**: Step A cannot depend on Step B if Step B depends on Step A
-  2. **File inputs require dependencies**: If a step uses `from_step: X`, then X must be in its dependencies
-  3. **Unique step IDs**: No two steps can have the same id
-  4. **Valid file paths**: Output paths must not contain invalid characters and should be in the main repo (not dot-directories)
-  5. **Instructions files exist**: Each `instructions_file` path should have a corresponding file created
-
-  ## Example: Complete Job Specification
-
-  ```yaml
-  name: competitive_research
-  version: "1.0.0"
-  summary: "Systematic competitive analysis workflow"
-  description: |
-    A comprehensive workflow for analyzing competitors in your market segment.
-    Helps product teams understand the competitive landscape through systematic
-    identification, research, comparison, and positioning recommendations.
-
-    Produces:
-    - Vetted competitor list
-    - Research notes per competitor
-    - Comparison matrix
-    - Strategic positioning report
-
-  changelog:
-    - version: "1.0.0"
-      changes: "Initial job creation"
-
-  steps:
-    - id: identify_competitors
-      name: "Identify Competitors"
-      description: "Identify 5-7 key competitors in the target market"
-      instructions_file: steps/identify_competitors.md
-      inputs:
-        - name: market_segment
-          description: "The market segment to analyze"
-        - name: product_category
-          description: "The product category"
-      outputs:
-        - competitive_research/competitors_list.md
-      dependencies: []
-
-    - id: research_competitors
-      name: "Research Competitors"
-      description: "Deep dive research on each identified competitor"
-      instructions_file: steps/research_competitors.md
-      inputs:
-        - file: competitive_research/competitors_list.md
-          from_step: identify_competitors
-      outputs:
-        - competitive_research/[competitor_name]/research.md
-      dependencies:
-        - identify_competitors
-
-    - id: positioning_report
-      name: "Positioning Report"
-      description: "Strategic positioning recommendations"
-      instructions_file: steps/positioning_report.md
-      inputs:
-        - file: competitive_research/[competitor_name]/research.md
-          from_step: research_competitors
-      outputs:
-        - file: competitive_research/positioning_report.md
-          doc_spec: .deepwork/doc_specs/positioning_report.md
-      dependencies:
-        - research_competitors
-  ```
-  ```
-
-  </details>
-
-## Guardrails
-
-- Do NOT skip prerequisite verification if this step has dependencies
-- Do NOT produce partial outputs; complete all required outputs before finishing
-- Do NOT proceed without required inputs; ask the user if any are missing
-- Do NOT modify files outside the scope of this step's defined outputs
-
-## On Completion
-
-1. Verify outputs are created
-2. Inform user: "new_job step 1/3 complete, outputs: job.yml"
-3. **Continue workflow**: Use Skill tool to invoke `/deepwork_jobs.review_job_spec`
-
----
-
-**Reference files**: `.deepwork/jobs/deepwork_jobs/job.yml`, `.deepwork/jobs/deepwork_jobs/steps/define.md`
\ No newline at end of file
diff --git a/.claude/skills/deepwork_jobs.implement/SKILL.md b/.claude/skills/deepwork_jobs.implement/SKILL.md
deleted file mode 100644
index a0c1d388..00000000
--- a/.claude/skills/deepwork_jobs.implement/SKILL.md
+++ /dev/null
@@ -1,330 +0,0 @@
----
-name: deepwork_jobs.implement
-description: "Generates step instruction files and syncs slash commands from the job.yml specification. Use after job spec review passes."
-user-invocable: false
-
----
-
-# deepwork_jobs.implement
-
-**Step 3/3** in **new_job** workflow
-
-> Create a new DeepWork job from scratch through definition, review, and implementation
-
-> Creates and manages multi-step AI workflows. Use when defining, implementing, or improving DeepWork jobs.
-
-## Prerequisites (Verify First)
-
-Before proceeding, confirm these steps are complete:
-- `/deepwork_jobs.review_job_spec`
-
-## Instructions
-
-**Goal**: Generates step instruction files and syncs slash commands from the job.yml specification. Use after job spec review passes.
-
-# Implement Job Steps
-
-## Objective
-
-Generate the DeepWork job directory structure and instruction files for each step based on the validated `job.yml` specification from the review_job_spec step.
-
-## Task
-
-Read the `job.yml` specification file and create all the necessary files to make the job functional, including directory structure and step instruction files. Then sync the commands to make them available.
-
-### Step 1: Create Directory Structure Using Script
-
-Run the `make_new_job.sh` script to create the standard directory structure:
-
-```bash
-.deepwork/jobs/deepwork_jobs/make_new_job.sh [job_name]
-```
-
-This creates:
-- `.deepwork/jobs/[job_name]/` - Main job directory
-- `.deepwork/jobs/[job_name]/steps/` - Step instruction files
-- `.deepwork/jobs/[job_name]/hooks/` - Custom validation scripts (with .gitkeep)
-- `.deepwork/jobs/[job_name]/templates/` - Example file formats (with .gitkeep)
-- `.deepwork/jobs/[job_name]/AGENTS.md` - Job management guidance
-
-**Note**: If the directory already exists (e.g., job.yml was created by define step), you can skip this step or manually create the additional directories:
-```bash
-mkdir -p .deepwork/jobs/[job_name]/hooks .deepwork/jobs/[job_name]/templates
-touch .deepwork/jobs/[job_name]/hooks/.gitkeep .deepwork/jobs/[job_name]/templates/.gitkeep
-```
-
-### Step 2: Read and Validate the Specification
-
-1. **Locate the job.yml file**
-   - Read `.deepwork/jobs/[job_name]/job.yml` from the review_job_spec step
-   - Parse the YAML content
-
-2. **Validate the specification**
-   - Ensure it follows the schema (name, version, summary, description, steps)
-   - Check that all dependencies reference existing steps
-   - Verify no circular dependencies
-   - Confirm file inputs match dependencies
-
-3. **Extract key information**
-   - Job name, version, summary, description
-   - List of all steps with their details
-   - Understand the workflow structure
-
-### Step 3: Generate Step Instruction Files
-
-For each step in the job.yml, create a comprehensive instruction file at `.deepwork/jobs/[job_name]/steps/[step_id].md`.
-
-**Template reference**: See `.deepwork/jobs/deepwork_jobs/templates/step_instruction.md.template` for the standard structure.
-
-**Complete example**: See `.deepwork/jobs/deepwork_jobs/templates/step_instruction.md.example` for a fully worked example.
-
-**Available templates in `.deepwork/jobs/deepwork_jobs/templates/`:**
-- `job.yml.template` - Job specification structure
-- `step_instruction.md.template` - Step instruction file structure
-- `agents.md.template` - AGENTS.md file structure
-- `job.yml.example` - Complete job specification example
-- `step_instruction.md.example` - Complete step instruction example
-
-**Guidelines for generating instructions:**
-
-1. **Use the job description** - The detailed description from job.yml provides crucial context
-2. **Be specific** - Don't write generic instructions; tailor them to the step's purpose
-3. **Provide examples** - Show what good output looks like
-4. **Explain the "why"** - Help the user understand the step's role in the workflow
-5. **Quality over quantity** - Detailed, actionable instructions are better than vague ones
-6. **Align with stop hooks** - If the step has `stop_hooks` defined, ensure the quality criteria in the instruction file match the validation criteria in the hooks
-7. **Ask structured questions** - When a step has user inputs, the instructions MUST explicitly tell the agent to "ask structured questions" using the AskUserQuestion tool to gather that information. Never use generic phrasing like "ask the user" - always use "ask structured questions"
-
-### Handling Stop Hooks
-
-If a step in the job.yml has `stop_hooks` defined, the generated instruction file should:
-
-1. **Mirror the quality criteria** - The "Quality Criteria" section should match what the stop hooks will validate
-2. **Be explicit about success** - Help the agent understand when the step is truly complete
-3. **Include the promise pattern** - Mention that `<promise>✓ Quality Criteria Met</promise>` should be included when criteria are met
-
-**Example: If the job.yml has:**
-```yaml
-- id: research_competitors
-  name: "Research Competitors"
-  stop_hooks:
-    - prompt: |
-        Verify the research meets criteria:
-        1. Each competitor has at least 3 data points
-        2. Sources are cited
-        3. Information is current (within last year)
-```
-
-**The instruction file should include:**
-```markdown
-## Quality Criteria
-
-- Each competitor has at least 3 distinct data points
-- All information is sourced with citations
-- Data is current (from within the last year)
-- When all criteria are met, include `<promise>✓ Quality Criteria Met</promise>` in your response
-```
-
-This alignment ensures the AI agent knows exactly what will be validated and can self-check before completing.
-
-### Using Supplementary Reference Files
-
-Step instructions can include additional `.md` files in the `steps/` directory for detailed examples, templates, or reference material. Reference them using the full path from the project root.
-
-See `.deepwork/jobs/deepwork_jobs/steps/supplemental_file_references.md` for detailed documentation and examples.
-
-### Step 4: Verify job.yml Location
-
-Verify that `job.yml` is in the correct location at `.deepwork/jobs/[job_name]/job.yml`. The define and review_job_spec steps should have created and validated it. If for some reason it's not there, you may need to create or move it.
-
-### Step 5: Sync Skills
-
-Run `deepwork sync` to generate the skills for this job:
-
-```bash
-deepwork sync
-```
-
-This will:
-- Parse the job definition
-- Generate skills for each step
-- Make the skills available in `.claude/skills/` (or appropriate platform directory)
-
-### Step 6: Consider Rules for the New Job
-
-After implementing the job, consider whether there are **rules** that would help enforce quality or consistency when working with this job's domain.
-
-**What are rules?**
-
-Rules are automated guardrails stored as markdown files in `.deepwork/rules/` that trigger when certain files change during an AI session. They help ensure:
-- Documentation stays in sync with code
-- Team guidelines are followed
-- Architectural decisions are respected
-- Quality standards are maintained
-
-**When to suggest rules:**
-
-Think about the job you just implemented and ask:
-- Does this job produce outputs that other files depend on?
-- Are there documentation files that should be updated when this job's outputs change?
-- Are there quality checks or reviews that should happen when certain files in this domain change?
-- Could changes to the job's output files impact other parts of the project?
-
-**Examples of rules that might make sense:**
-
-| Job Type | Potential Rule |
-|----------|----------------|
-| API Design | "Update API docs when endpoint definitions change" |
-| Database Schema | "Review migrations when schema files change" |
-| Competitive Research | "Update strategy docs when competitor analysis changes" |
-| Feature Development | "Update changelog when feature files change" |
-| Configuration Management | "Update install guide when config files change" |
-
-**How to offer rule creation:**
-
-If you identify one or more rules that would benefit the user, explain:
-1. **What the rule would do** - What triggers it and what action it prompts
-2. **Why it would help** - How it prevents common mistakes or keeps things in sync
-3. **What files it would watch** - The trigger patterns
-
-Then ask the user:
-
-> "Would you like me to create this rule for you? I can run `/deepwork_rules.define` to set it up."
-
-If the user agrees, invoke the `/deepwork_rules.define` command to guide them through creating the rule.
-
-**Example dialogue:**
-
-```
-Based on the competitive_research job you just created, I noticed that when
-competitor analysis files change, it would be helpful to remind you to update
-your strategy documentation.
-
-I'd suggest a rule like:
-- **Name**: "Update strategy when competitor analysis changes"
-- **Trigger**: `**/positioning_report.md`
-- **Action**: Prompt to review and update `docs/strategy.md`
-
-Would you like me to create this rule? I can run `/deepwork_rules.define` to set it up.
-```
-
-**Note:** Not every job needs rules. Only suggest them when they would genuinely help maintain consistency or quality. Don't force rules where they don't make sense.
-
-## Example Implementation
-
-For a complete worked example showing a job.yml and corresponding step instruction file, see:
-- **Job specification**: `.deepwork/jobs/deepwork_jobs/templates/job.yml.example`
-- **Step instruction**: `.deepwork/jobs/deepwork_jobs/templates/step_instruction.md.example`
-
-## Important Guidelines
-
-1. **Read the spec carefully** - Understand the job's intent from the description
-2. **Generate complete instructions** - Don't create placeholder or stub files
-3. **Maintain consistency** - Use the same structure for all step instruction files
-4. **Provide examples** - Show what good output looks like
-5. **Use context** - The job description provides valuable context for each step
-6. **Be specific** - Tailor instructions to the specific step, not generic advice
-
-## Validation Before Sync
-
-Before running `deepwork sync`, verify:
-- All directories exist
-- `job.yml` is in place
-- All step instruction files exist (one per step)
-- No file system errors
-
-## Completion Checklist
-
-Before marking this step complete, ensure:
-- [ ] job.yml validated and copied to job directory
-- [ ] All step instruction files created
-- [ ] Each instruction file is complete and actionable
-- [ ] `deepwork sync` executed successfully
-- [ ] Skills generated in platform directory
-- [ ] Considered whether rules would benefit this job (Step 6)
-- [ ] If rules suggested, offered to run `/deepwork_rules.define`
-
-## Quality Criteria
-
-- Job directory structure is correct
-- All instruction files are complete (not stubs)
-- Instructions are specific and actionable
-- Output examples are provided in each instruction file
-- Quality criteria defined for each step
-- Steps with user inputs explicitly use "ask structured questions" phrasing
-- Sync completed successfully
-- Skills available for use
-- Thoughtfully considered relevant rules for the job domain
-
-
-### Job Context
-
-Core commands for managing DeepWork jobs. These commands help you define new multi-step
-workflows and learn from running them.
-
-The `new_job` workflow guides you through defining and implementing a new job by
-asking structured questions about your workflow, understanding each step's inputs and outputs,
-reviewing the specification, and generating all necessary files.
-
-The `learn` skill reflects on conversations where DeepWork jobs were run, identifies
-confusion or inefficiencies, and improves job instructions. It also captures bespoke
-learnings specific to the current run into AGENTS.md files in the working folder.
-
-
-## Required Inputs
-
-
-**Files from Previous Steps** - Read these first:
-- `job.yml` (from `review_job_spec`)
-
-## Work Branch
-
-Use branch format: `deepwork/deepwork_jobs-[instance]-YYYYMMDD`
-
-- If on a matching work branch: continue using it
-- If on main/master: create new branch with `git checkout -b deepwork/deepwork_jobs-[instance]-$(date +%Y%m%d)`
-
-## Outputs
-
-**Required outputs**:
-- `steps/` (directory)
-
-## Guardrails
-
-- Do NOT skip prerequisite verification if this step has dependencies
-- Do NOT produce partial outputs; complete all required outputs before finishing
-- Do NOT proceed without required inputs; ask the user if any are missing
-- Do NOT modify files outside the scope of this step's defined outputs
-
-## Quality Validation
-
-**Before completing this step, you MUST have your work reviewed against the quality criteria below.**
-
-Use a sub-agent (Haiku model) to review your work against these criteria:
-
-**Criteria (all must be satisfied)**:
-1. **Directory Structure**: Is `.deepwork/jobs/[job_name]/` created correctly?
-2. **Complete Instructions**: Are ALL step instruction files complete (not stubs or placeholders)?
-3. **Specific & Actionable**: Are instructions tailored to each step's purpose, not generic?
-4. **Output Examples**: Does each instruction file show what good output looks like?
-5. **Quality Criteria**: Does each instruction file define quality criteria for its outputs?
-6. **Ask Structured Questions**: Do step instructions that gather user input explicitly use the phrase "ask structured questions"?
-7. **Sync Complete**: Has `deepwork sync` been run successfully?
-8. **Commands Available**: Are the slash-commands generated in `.claude/commands/`?
-9. **Rules Considered**: Has the agent thought about whether rules would benefit this job? If relevant rules were identified, did they explain them and offer to run `/deepwork_rules.define`? Not every job needs rules - only suggest when genuinely helpful.
-**Review Process**:
-1. Once you believe your work is complete, spawn a sub-agent using Haiku to review your work against the quality criteria above
-2. The sub-agent should examine your outputs and verify each criterion is met
-3. If the sub-agent identifies valid issues, fix them
-4. Have the sub-agent review again until all valid feedback has been addressed
-5. Only mark the step complete when the sub-agent confirms all criteria are satisfied
-
-## On Completion
-
-1. Verify outputs are created
-2. Inform user: "new_job step 3/3 complete, outputs: steps/"
-3. **new_job workflow complete**: All steps finished. Consider creating a PR to merge the work branch.
-
----
-
-**Reference files**: `.deepwork/jobs/deepwork_jobs/job.yml`, `.deepwork/jobs/deepwork_jobs/steps/implement.md`
\ No newline at end of file
diff --git a/.claude/skills/deepwork_jobs.learn/SKILL.md b/.claude/skills/deepwork_jobs.learn/SKILL.md
deleted file mode 100644
index 95e1c61f..00000000
--- a/.claude/skills/deepwork_jobs.learn/SKILL.md
+++ /dev/null
@@ -1,449 +0,0 @@
----
-name: deepwork_jobs.learn
-description: "Analyzes conversation history to improve job instructions and capture learnings. Use after running a job to refine it."
-
----
-
-# deepwork_jobs.learn
-
-**Standalone skill** - can be run anytime
-
-> Creates and manages multi-step AI workflows. Use when defining, implementing, or improving DeepWork jobs.
-
-
-## Instructions
-
-**Goal**: Analyzes conversation history to improve job instructions and capture learnings. Use after running a job to refine it.
-
-# Learn from Job Execution
-
-## Objective
-
-Think deeply about this task. Reflect on the current conversation to identify learnings from DeepWork job executions, improve job instructions with generalizable insights, and capture bespoke (run-specific) learnings in AGENTS.md files in the deepest common folder that would contain all work on the topic in the future.
-
-## Task
-
-Analyze the conversation history to extract learnings and improvements, then apply them appropriately:
-- **Generalizable learnings** → Update job instruction files
-- **Bespoke learnings** (specific to this run) → Add to AGENTS.md in the deepest common folder for the topic
-
-### Step 1: Analyze Conversation for Job Executions
-
-1. **Scan the conversation** for DeepWork slash commands that were run
-   - Look for patterns like `/job_name.step_id`
-   - Identify which jobs and steps were executed
-   - Note the order of execution
-
-2. **Identify the target folder**
-   - This should be the deepest common folder that would contain all work on the topic in the future
-   - Should be clear from conversation history where work was done
-   - If unclear, run `git diff` to see where changes were made on the branch
-
-3. **If no job was specified**, ask the user:
-   - "Which DeepWork job would you like me to learn from?"
-   - List available jobs from `.deepwork/jobs/`
-
-### Step 2: Identify Points of Confusion and Inefficiency
-
-Review the conversation for:
-
-1. **Confusion signals**
-   - Questions the agent asked that shouldn't have been necessary
-   - Misunderstandings about what a step required
-   - Incorrect outputs that needed correction
-   - Ambiguous instructions that led to wrong interpretations
-
-2. **Inefficiency signals**
-   - Extra steps or iterations that were needed
-   - Information that had to be repeated
-   - Context that was missing from instructions
-   - Dependencies that weren't clear
-
-3. **Error patterns**
-   - Failed validations and why they failed
-   - Quality criteria that were misunderstood
-   - Edge cases that weren't handled
-
-4. **Success patterns**
-   - What worked particularly well
-   - Efficient approaches worth preserving
-   - Good examples that could be added to instructions
-
-### Step 3: Classify Learnings
-
-For each learning identified, determine if it is:
-
-**Generalizable** (should improve instructions):
-- Would help ANY future run of this job
-- Addresses unclear or missing guidance
-- Fixes incorrect assumptions in instructions
-- Adds helpful examples or context
-- Examples:
-  - "Step instructions should mention that X format is required"
-  - "Quality criteria should include checking for Y"
-  - "Add example of correct output format"
-
-**doc spec-Related** (should improve doc spec files):
-- Improvements to document quality criteria
-- Changes to document structure or format
-- Updated audience or frequency information
-- Examples:
-  - "The report should include a summary table"
-  - "Quality criterion 'Visualization' needs clearer requirements"
-  - "Documents need a section for action items"
-
-**Bespoke** (should go in AGENTS.md):
-- Specific to THIS project/codebase/run
-- Depends on local conventions or structure
-- References specific files or paths
-- Would not apply to other uses of this job
-- Examples:
-  - "In this codebase, API endpoints are in `src/api/`"
-  - "This project uses camelCase for function names"
-  - "The main config file is at `config/settings.yml`"
-
-### Step 3.5: Identify doc spec-Related Learnings
-
-Review the conversation for doc spec-related improvements:
-
-1. **Quality Criteria Changes**
-   - Were any quality criteria unclear or insufficient?
-   - Did the agent repeatedly fail certain criteria?
-   - Are there new criteria that should be added?
-
-2. **Document Structure Changes**
-   - Did the user request different sections?
-   - Were parts of the document format confusing?
-   - Should the example document be updated?
-
-3. **Metadata Updates**
-   - Has the target audience changed?
-   - Should frequency or path patterns be updated?
-
-**Signals for doc spec improvements:**
-- User asked for changes to document format
-- Repeated validation failures on specific criteria
-- Feedback about missing sections or information
-- Changes to how documents are organized/stored
-
-### Step 4: Update Job Instructions (Generalizable Learnings)
-
-For each generalizable learning:
-
-1. **Locate the instruction file**
-   - Path: `.deepwork/jobs/[job_name]/steps/[step_id].md`
-
-2. **Make targeted improvements**
-   - Add missing context or clarification
-   - Include helpful examples
-   - Clarify ambiguous instructions
-   - Update quality criteria if needed
-
-3. **Keep instructions concise**
-   - Avoid redundancy - don't repeat the same guidance in multiple places
-   - Be direct - remove verbose explanations that don't add value
-   - Prefer bullet points over paragraphs where appropriate
-
-4. **Preserve instruction structure**
-   - Keep existing sections (Objective, Task, Process, Output Format, Quality Criteria)
-   - Add to appropriate sections rather than restructuring
-   - Maintain consistency with other steps
-
-5. **Track changes for changelog**
-   - Note what was changed and why
-   - Prepare changelog entry for job.yml
-
-### Step 4b: Extract Shared Content into Referenced Files
-
-Review all instruction files for the job and identify content that:
-- Appears in multiple step instructions (duplicated)
-- Is lengthy and could be extracted for clarity
-- Would benefit from being maintained in one place
-
-**Extract to shared files:**
-
-1. **Create shared files** in `.deepwork/jobs/[job_name]/steps/shared/`
-   - `conventions.md` - Coding/formatting conventions used across steps
-   - `examples.md` - Common examples referenced by multiple steps
-   - `schemas.md` - Data structures or formats used throughout
-
-2. **Reference from instructions** using markdown includes or explicit references:
-   ```markdown
-   ## Conventions
-
-   Follow the conventions defined in `shared/conventions.md`.
-   ```
-
-3. **Benefits of extraction:**
-   - Single source of truth - update once, applies everywhere
-   - Shorter instruction files - easier to read and maintain
-   - Consistent guidance across steps
-
-### Step 4.5: Update doc spec Files (doc spec-Related Learnings)
-
-If doc spec-related learnings were identified:
-
-1. **Locate the doc spec file**
-   - Find doc spec references in job.yml outputs (look for `doc_spec: .deepwork/doc_specs/[doc_spec_name].md`)
-   - doc spec files are at `.deepwork/doc_specs/[doc_spec_name].md`
-
-2. **Update quality_criteria array**
-   - Add new criteria with name and description
-   - Modify existing criteria descriptions for clarity
-   - Remove criteria that are no longer relevant
-
-3. **Update example document**
-   - Modify the markdown body to reflect structure changes
-   - Ensure the example matches updated criteria
-
-4. **Update metadata as needed**
-   - target_audience: If audience has changed
-   - frequency: If production cadence has changed
-   - path_patterns: If storage location has changed
-
-**Example doc spec update:**
-```yaml
-# Before
-quality_criteria:
-  - name: Visualization
-    description: Include charts
-
-# After
-quality_criteria:
-  - name: Visualization
-    description: Include Mermaid.js charts showing spend breakdown by service and month-over-month trend
-```
-
-### Step 5: Create/Update AGENTS.md (Bespoke Learnings)
-
-The AGENTS.md file captures project-specific knowledge that helps future agent runs.
-
-1. **Determine the correct location**
-   - Place AGENTS.md in the deepest common folder that would contain all work on the topic in the future
-   - This ensures the knowledge is available when working in that context
-   - If uncertain, place at the project root
-
-2. **Use file references where possible**
-   - Instead of duplicating information, reference source files
-   - This keeps AGENTS.md in sync as the codebase evolves
-   - Pattern: "See `path/to/file.ext` for [description]"
-
-3. **AGENTS.md structure**: See `.deepwork/jobs/deepwork_jobs/templates/agents.md.template` for the standard format.
-
-4. **Writing entries**
-   - Be concise but specific
-   - Always prefer file references over inline content
-   - Use line numbers when referencing specific code: `file.ext:42`
-   - Group related learnings together
-
-### Step 6: Update Job Version and Changelog
-
-If instruction files were modified:
-
-1. **Bump version in job.yml**
-   - Patch version (0.0.x) for instruction improvements
-   - Minor version (0.x.0) if quality criteria changed
-
-2. **Add changelog entry**
-   ```yaml
-   - version: "[new_version]"
-     changes: "Improved [step] instructions based on execution learnings: [brief description]"
-   ```
-
-### Step 7: Sync Skills
-
-**Run deepwork sync** (if instructions were modified)
-```bash
-deepwork sync
-```
-
-## File Reference Patterns
-
-When adding entries to AGENTS.md, prefer these patterns:
-
-**Good (references)**:
-```markdown
-- API endpoints follow REST conventions. See `src/api/routes.ts` for examples.
-- Error handling pattern: See `src/utils/errors.ts:15-30`
-- Configuration schema: Defined in `config/schema.json`
-```
-
-**Avoid (duplicating)**:
-```markdown
-- API endpoints should return JSON with this format: { status: ..., data: ... }
-- Error handling should use try/catch with custom error classes
-- Configuration requires these fields: name, version, ...
-```
-
-## Quality Criteria
-
-- Conversation has been analyzed for job executions
-- Points of confusion and inefficiency are identified
-- Learnings are correctly classified (generalizable vs bespoke)
-- Job instructions updated for generalizable improvements
-- Instructions are concise - no redundancy or unnecessary verbosity
-- Shared/lengthy content extracted into referenced files where appropriate
-- AGENTS.md created/updated with bespoke learnings
-- File references used instead of duplicating content
-- AGENTS.md is in the correct folder (the deepest common folder for the topic)
-- When all criteria are met, include `<promise>✓ Quality Criteria Met</promise>`
-
-## Example Dialog
-
-```
-User: /deepwork_jobs.learn
-
-Claude: I'll analyze this conversation for learnings from DeepWork job executions.
-
-**Conversation Analysis**
-
-I found the following job executions:
-1. `/competitive_research.identify_competitors` - Completed
-2. `/competitive_research.research_competitors` - Completed with 2 iterations
-3. `/competitive_research.comparative_analysis` - Completed
-
-**Identified Issues**
-
-1. **Confusion in step 2**: The instructions didn't specify that competitor research should include pricing information. This caused an extra iteration.
-
-2. **Missing context**: The agent had to ask about the industry focus, which should have been carried forward from step 1.
-
-**Classification**
-
-- Generalizable: Add pricing to research requirements in instructions
-- Bespoke: This project focuses on B2B SaaS in the HR tech space
-
-**Actions Taken**
-
-1. Updated `.deepwork/jobs/competitive_research/steps/research_competitors.md`:
-   - Added "Include pricing information (tiers, models, approximate ranges)" to research requirements
-   - Added note about carrying forward industry context from previous step
-
-2. Created `./competitive_research_output/AGENTS.md`:
-   ```markdown
-   # Project Context for Competitive Research
-
-   ## Job-Specific Context
-
-   ### competitive_research
-
-   #### Industry Focus
-   - Domain: B2B SaaS, HR Technology sector
-   - See `identify_competitors_output.md` for market definition
-
-   #### Competitors Identified
-   - Reference: `competitor_list.md` for full details
-
-   ## Last Updated
-   - Date: 2024-01-15
-   - From conversation about: Initial competitive analysis run
-   ```
-
-3. Updated job.yml version to 1.0.1 with changelog entry
-
-4. Ran `deepwork sync`
-
-**Summary**
-
-Updated job instructions and created AGENTS.md with bespoke learnings.
-```
-
-## Handling Edge Cases
-
-**No job executions found**:
-- Ask user which job they'd like to analyze
-- Or offer to review available jobs
-
-**Multiple jobs executed**:
-- Analyze each job separately
-- Create separate AGENTS.md entries or files as appropriate
-
-**AGENTS.md already exists**:
-- Read existing content
-- Append new learnings to appropriate sections
-- Update "Last Updated" section
-
-**No issues found**:
-- Document what worked well
-- Consider if any successful patterns should be added to instructions as examples
-
-**Sensitive information**:
-- Never include secrets, credentials, or PII in AGENTS.md
-- Reference config files instead of including values
-
-
-### Job Context
-
-Core commands for managing DeepWork jobs. These commands help you define new multi-step
-workflows and learn from running them.
-
-The `new_job` workflow guides you through defining and implementing a new job by
-asking structured questions about your workflow, understanding each step's inputs and outputs,
-reviewing the specification, and generating all necessary files.
-
-The `learn` skill reflects on conversations where DeepWork jobs were run, identifies
-confusion or inefficiencies, and improves job instructions. It also captures bespoke
-learnings specific to the current run into AGENTS.md files in the working folder.
-
-
-## Required Inputs
-
-**User Parameters** - Gather from user before starting:
-- **job_name**: Name of the job that was run (optional - will auto-detect from conversation)
-
-
-## Work Branch
-
-Use branch format: `deepwork/deepwork_jobs-[instance]-YYYYMMDD`
-
-- If on a matching work branch: continue using it
-- If on main/master: create new branch with `git checkout -b deepwork/deepwork_jobs-[instance]-$(date +%Y%m%d)`
-
-## Outputs
-
-**Required outputs**:
-- `AGENTS.md`
-
-## Guardrails
-
-- Do NOT skip prerequisite verification if this step has dependencies
-- Do NOT produce partial outputs; complete all required outputs before finishing
-- Do NOT proceed without required inputs; ask the user if any are missing
-- Do NOT modify files outside the scope of this step's defined outputs
-
-## Quality Validation
-
-**Before completing this step, you MUST have your work reviewed against the quality criteria below.**
-
-Use a sub-agent (Haiku model) to review your work against these criteria:
-
-**Criteria (all must be satisfied)**:
-1. **Conversation Analyzed**: Did the agent review the conversation for DeepWork job executions?
-2. **Confusion Identified**: Did the agent identify points of confusion, errors, or inefficiencies?
-3. **Instructions Improved**: Were job instructions updated to address identified issues?
-4. **Instructions Concise**: Are instructions free of redundancy and unnecessary verbosity?
-5. **Shared Content Extracted**: Is lengthy/duplicated content extracted into referenced files?
-6. **doc spec Reviewed (if applicable)**: For jobs with doc spec outputs, were doc spec-related learnings identified?
-7. **doc spec Updated (if applicable)**: Were doc spec files updated with improved quality criteria or structure?
-8. **Bespoke Learnings Captured**: Were run-specific learnings added to AGENTS.md?
-9. **File References Used**: Do AGENTS.md entries reference other files where appropriate?
-10. **Working Folder Correct**: Is AGENTS.md in the correct working folder for the job?
-11. **Generalizable Separated**: Are generalizable improvements in instructions, not AGENTS.md?
-12. **Sync Complete**: Has `deepwork sync` been run if instructions were modified?
-**Review Process**:
-1. Once you believe your work is complete, spawn a sub-agent using Haiku to review your work against the quality criteria above
-2. The sub-agent should examine your outputs and verify each criterion is met
-3. If the sub-agent identifies valid issues, fix them
-4. Have the sub-agent review again until all valid feedback has been addressed
-5. Only mark the step complete when the sub-agent confirms all criteria are satisfied
-
-## On Completion
-
-1. Verify outputs are created
-2. Inform user: "learn complete, outputs: AGENTS.md"
-
-This standalone skill can be re-run anytime.
-
----
-
-**Reference files**: `.deepwork/jobs/deepwork_jobs/job.yml`, `.deepwork/jobs/deepwork_jobs/steps/learn.md`
\ No newline at end of file
diff --git a/.claude/skills/deepwork_jobs.review_job_spec/SKILL.md b/.claude/skills/deepwork_jobs.review_job_spec/SKILL.md
deleted file mode 100644
index 51b8ed54..00000000
--- a/.claude/skills/deepwork_jobs.review_job_spec/SKILL.md
+++ /dev/null
@@ -1,496 +0,0 @@
----
-name: deepwork_jobs.review_job_spec
-description: "Reviews job.yml against quality criteria using a sub-agent for unbiased validation. Use after defining a job specification."
-user-invocable: false
-
----
-
-# deepwork_jobs.review_job_spec
-
-**Step 2/3** in **new_job** workflow
-
-> Create a new DeepWork job from scratch through definition, review, and implementation
-
-> Creates and manages multi-step AI workflows. Use when defining, implementing, or improving DeepWork jobs.
-
-## Prerequisites (Verify First)
-
-Before proceeding, confirm these steps are complete:
-- `/deepwork_jobs.define`
-
-## Instructions
-
-**Goal**: Reviews job.yml against quality criteria using a sub-agent for unbiased validation. Use after defining a job specification.
-
-# Review Job Specification
-
-## Objective
-
-Review the `job.yml` created in the define step against the doc spec quality criteria using a sub-agent for unbiased evaluation, then iterate on fixes until all criteria pass.
-
-## Why This Step Exists
-
-The define step focuses on understanding user requirements and creating a job specification. This review step ensures the specification meets quality standards before implementation. Using a sub-agent provides an unbiased "fresh eyes" review that catches issues the main agent might miss after being deeply involved in the definition process.
-
-## Task
-
-Use a sub-agent to review the job.yml against all 9 doc spec quality criteria, then fix any failed criteria. Repeat until all criteria pass.
-
-### Step 1: Read the Job Specification
-
-Read the `job.yml` file created in the define step:
-
-```
-.deepwork/jobs/[job_name]/job.yml
-```
-
-Also read the doc spec for reference:
-
-```
-.deepwork/doc_specs/job_spec.md
-```
-
-### Step 2: Spawn Review Sub-Agent
-
-Use the Task tool to spawn a sub-agent that will provide an unbiased review:
-
-```
-Task tool parameters:
-- subagent_type: "general-purpose"
-- model: "haiku"
-- description: "Review job.yml against doc spec"
-- prompt: [see below]
-```
-
-**Sub-agent prompt template:**
-
-```
-Review this job.yml against the following 9 quality criteria from the doc spec.
-
-For each criterion, respond with:
-- PASS or FAIL
-- If FAIL: specific issue and suggested fix
-
-## job.yml Content
-
-[paste the full job.yml content here]
-
-## Quality Criteria
-
-1. **Valid Identifier**: Job name must be lowercase with underscores, no spaces or special characters (e.g., `competitive_research`, `monthly_report`)
-
-2. **Semantic Version**: Version must follow semantic versioning format X.Y.Z (e.g., `1.0.0`, `2.1.3`)
-
-3. **Concise Summary**: Summary must be under 200 characters and clearly describe what the job accomplishes
-
-4. **Rich Description**: Description must be multi-line and explain: the problem solved, the process, expected outcomes, and target users
-
-5. **Changelog Present**: Must include a changelog array with at least the initial version entry
-
-6. **Complete Steps**: Each step must have: id (lowercase_underscores), name, description, instructions_file, outputs (at least one), and dependencies array
-
-7. **Valid Dependencies**: Dependencies must reference existing step IDs with no circular references
-
-8. **Input Consistency**: File inputs with `from_step` must reference a step that is in the dependencies array
-
-9. **Output Paths**: Outputs must be valid filenames or paths (e.g., `report.md` or `reports/analysis.md`)
-
-## Response Format
-
-Respond with a structured evaluation:
-
-### Overall: [X/9 PASS]
-
-### Criterion Results
-
-1. Valid Identifier: [PASS/FAIL]
-   [If FAIL: Issue and fix]
-
-2. Semantic Version: [PASS/FAIL]
-   [If FAIL: Issue and fix]
-
-[... continue for all 9 criteria ...]
-
-### Summary of Required Fixes
-
-[List any fixes needed, or "No fixes required - all criteria pass"]
-```
-
-### Step 3: Review Sub-Agent Findings
-
-Parse the sub-agent's response:
-
-1. **Count passing criteria** - How many of the 9 criteria passed?
-2. **Identify failures** - List specific criteria that failed
-3. **Note suggested fixes** - What changes does the sub-agent recommend?
-
-### Step 4: Fix Failed Criteria
-
-For each failed criterion, edit the job.yml to address the issue:
-
-**Common fixes by criterion:**
-
-| Criterion | Common Issue | Fix |
-|-----------|-------------|-----|
-| Valid Identifier | Spaces or uppercase | Convert to lowercase_underscores |
-| Semantic Version | Missing or invalid format | Set to `"1.0.0"` or fix format |
-| Concise Summary | Too long or vague | Shorten to <200 chars, be specific |
-| Rich Description | Single line or missing context | Add multi-line explanation with problem/process/outcome/users |
-| Changelog Present | Missing changelog | Add `changelog:` with initial version entry |
-| Complete Steps | Missing required fields | Add id, name, description, instructions_file, outputs, dependencies |
-| Valid Dependencies | Non-existent step or circular | Fix step ID reference or reorder dependencies |
-| Input Consistency | from_step not in dependencies | Add the referenced step to dependencies array |
-| Output Paths | Invalid characters or format | Use valid filename/path format |
-
-### Step 5: Re-Run Review (If Needed)
-
-If any criteria failed:
-
-1. **Spawn a new sub-agent** with the updated job.yml content
-2. **Review the new findings**
-3. **Fix any remaining issues**
-4. **Repeat until all 9 criteria pass**
-
-### Step 6: Confirm Completion
-
-When all 9 criteria pass:
-
-1. **Announce success**: "All 9 doc spec quality criteria pass."
-2. **List what was validated**:
-   - Valid Identifier
-   - Semantic Version
-   - Concise Summary
-   - Rich Description
-   - Changelog Present
-   - Complete Steps
-   - Valid Dependencies
-   - Input Consistency
-   - Output Paths
-3. **Include the promise**: `<promise>Quality Criteria Met</promise>`
-4. **Guide to next step**: "Run `/deepwork_jobs.implement` to generate the step instruction files."
-
-## Example Review Cycle
-
-```
-Claude: Let me review the job.yml against the doc spec quality criteria.
-
-[Spawns sub-agent with haiku model]
-
-Sub-agent response:
-### Overall: 7/9 PASS
-
-### Criterion Results
-1. Valid Identifier: PASS
-2. Semantic Version: PASS
-3. Concise Summary: FAIL
-   Issue: Summary is 245 characters, exceeds 200 char limit
-   Fix: Shorten to "Competitive analysis workflow for product positioning"
-4. Rich Description: PASS
-5. Changelog Present: FAIL
-   Issue: No changelog array present
-   Fix: Add changelog with initial version entry
-6. Complete Steps: PASS
-7. Valid Dependencies: PASS
-8. Input Consistency: PASS
-9. Output Paths: PASS
-
-### Summary of Required Fixes
-1. Shorten summary to under 200 characters
-2. Add changelog array with initial version
-
-Claude: I found 2 issues. Let me fix them.
-
-[Edits job.yml to shorten summary and add changelog]
-
-Claude: Let me verify the fixes with another review.
-
-[Spawns new sub-agent]
-
-Sub-agent response:
-### Overall: 9/9 PASS
-
-### Summary of Required Fixes
-No fixes required - all criteria pass
-
-Claude: All 9 doc spec quality criteria pass.
-
-<promise>Quality Criteria Met</promise>
-
-**Next step:** Run `/deepwork_jobs.implement` to generate the step instruction files.
-```
-
-## Quality Criteria
-
-- **Sub-Agent Used**: A sub-agent was spawned to provide unbiased review (not just self-review)
-- **All doc spec Criteria Evaluated**: The sub-agent assessed all 9 quality criteria from the doc spec
-- **Findings Addressed**: All failed criteria were fixed by the main agent
-- **Validation Loop Complete**: The review-fix cycle continued until all criteria passed
-- **Promise Included**: The response includes `<promise>Quality Criteria Met</promise>` when complete
-
-## Output
-
-The validated `job.yml` file at `.deepwork/jobs/[job_name]/job.yml` that passes all 9 doc spec quality criteria.
-
-
-### Job Context
-
-Core commands for managing DeepWork jobs. These commands help you define new multi-step
-workflows and learn from running them.
-
-The `new_job` workflow guides you through defining and implementing a new job by
-asking structured questions about your workflow, understanding each step's inputs and outputs,
-reviewing the specification, and generating all necessary files.
-
-The `learn` skill reflects on conversations where DeepWork jobs were run, identifies
-confusion or inefficiencies, and improves job instructions. It also captures bespoke
-learnings specific to the current run into AGENTS.md files in the working folder.
-
-
-## Required Inputs
-
-
-**Files from Previous Steps** - Read these first:
-- `job.yml` (from `define`)
-
-## Work Branch
-
-Use branch format: `deepwork/deepwork_jobs-[instance]-YYYYMMDD`
-
-- If on a matching work branch: continue using it
-- If on main/master: create new branch with `git checkout -b deepwork/deepwork_jobs-[instance]-$(date +%Y%m%d)`
-
-## Outputs
-
-**Required outputs**:
-- `job.yml`
-  **Doc Spec**: DeepWork Job Specification
-  > YAML specification file that defines a multi-step workflow job for AI agents
-  **Definition**: `.deepwork/doc_specs/job_spec.md`
-  **Target Audience**: AI agents executing jobs and developers defining workflows
-  **Quality Criteria**:
-  1. **Valid Identifier**: Job name must be lowercase with underscores, no spaces or special characters (e.g., `competitive_research`, `monthly_report`)
-  2. **Semantic Version**: Version must follow semantic versioning format X.Y.Z (e.g., `1.0.0`, `2.1.3`)
-  3. **Concise Summary**: Summary must be under 200 characters and clearly describe what the job accomplishes
-  4. **Rich Description**: Description must be multi-line and explain: the problem solved, the process, expected outcomes, and target users
-  5. **Changelog Present**: Must include a changelog array with at least the initial version entry. Changelog should only include one entry per branch at most
-  6. **Complete Steps**: Each step must have: id (lowercase_underscores), name, description, instructions_file, outputs (at least one), and dependencies array
-  7. **Valid Dependencies**: Dependencies must reference existing step IDs with no circular references
-  8. **Input Consistency**: File inputs with `from_step` must reference a step that is in the dependencies array
-  9. **Output Paths**: Outputs must be valid filenames or paths within the main repo directory structure, never in dot-directories like `.deepwork/`. Use specific, descriptive paths that lend themselves to glob patterns (e.g., `competitive_research/acme_corp/swot.md` or `operations/reports/2026-01/spending_analysis.md`). Parameterized paths like `[competitor_name]/` are encouraged for per-entity outputs. Avoid generic names (`output.md`, `analysis.md`) and transient-sounding paths (`temp/`, `draft.md`). Supporting materials for a final output should go in a peer `_dataroom` folder (e.g., `spending_analysis_dataroom/`).
-  10. **Concise Instructions**: The content of the file, particularly the description, must not have excessively redundant information. It should be concise and to the point given that extra tokens will confuse the AI.
-
-  <details>
-  <summary>Example Document Structure</summary>
-
-  ```markdown
-  # DeepWork Job Specification: [job_name]
-
-  A `job.yml` file defines a complete multi-step workflow that AI agents can execute. Each job breaks down a complex task into reviewable steps with clear inputs and outputs.
-
-  ## Required Fields
-
-  ### Top-Level Metadata
-
-  ```yaml
-  name: job_name                    # lowercase, underscores only
-  version: "1.0.0"                  # semantic versioning
-  summary: "Brief description"      # max 200 characters
-  description: |                    # detailed multi-line explanation
-    [Explain what this workflow does, why it exists,
-    what outputs it produces, and who should use it]
-  ```
-
-  ### Changelog
-
-  ```yaml
-  changelog:
-    - version: "1.0.0"
-      changes: "Initial job creation"
-    - version: "1.1.0"
-      changes: "Added quality validation hooks"
-  ```
-
-  ### Steps Array
-
-  ```yaml
-  steps:
-    - id: step_id                   # unique, lowercase_underscores
-      name: "Human Readable Name"
-      description: "What this step accomplishes"
-      instructions_file: steps/step_id.md
-      inputs:
-        # User-provided inputs:
-        - name: param_name
-          description: "What the user provides"
-        # File inputs from previous steps:
-        - file: output.md
-          from_step: previous_step_id
-      outputs:
-        - competitive_research/competitors_list.md           # descriptive path
-        - competitive_research/[competitor_name]/research.md # parameterized path
-        # With doc spec reference:
-        - file: competitive_research/final_report.md
-          doc_spec: .deepwork/doc_specs/report_type.md
-      dependencies:
-        - previous_step_id          # steps that must complete first
-  ```
-
-  ## Optional Fields
-
-  ### Exposed Steps
-
-  ```yaml
-  steps:
-    - id: learn
-      exposed: true                 # Makes step available without running dependencies
-  ```
-
-  ### Agent Delegation
-
-  When a step should be executed by a specific agent type, use the `agent` field. This automatically sets `context: fork` in the generated skill.
-
-  ```yaml
-  steps:
-    - id: research_step
-      agent: general-purpose        # Delegates to the general-purpose agent
-  ```
-
-  Available agent types:
-  - `general-purpose` - Standard agent for multi-step tasks
-
-  ### Quality Hooks
-
-  ```yaml
-  steps:
-    - id: step_id
-      hooks:
-        after_agent:
-          # Inline prompt for quality validation:
-          - prompt: |
-              Verify the output meets criteria:
-              1. [Criterion 1]
-              2. [Criterion 2]
-              If ALL criteria are met, include `<promise>...</promise>`.
-          # External prompt file:
-          - prompt_file: hooks/quality_check.md
-          # Script for programmatic validation:
-          - script: hooks/run_tests.sh
-  ```
-
-  ### Stop Hooks (Legacy)
-
-  ```yaml
-  steps:
-    - id: step_id
-      stop_hooks:
-        - prompt: "Validation prompt..."
-        - prompt_file: hooks/check.md
-        - script: hooks/validate.sh
-  ```
-
-  ## Validation Rules
-
-  1. **No circular dependencies**: Step A cannot depend on Step B if Step B depends on Step A
-  2. **File inputs require dependencies**: If a step uses `from_step: X`, then X must be in its dependencies
-  3. **Unique step IDs**: No two steps can have the same id
-  4. **Valid file paths**: Output paths must not contain invalid characters and should be in the main repo (not dot-directories)
-  5. **Instructions files exist**: Each `instructions_file` path should have a corresponding file created
-
-  ## Example: Complete Job Specification
-
-  ```yaml
-  name: competitive_research
-  version: "1.0.0"
-  summary: "Systematic competitive analysis workflow"
-  description: |
-    A comprehensive workflow for analyzing competitors in your market segment.
-    Helps product teams understand the competitive landscape through systematic
-    identification, research, comparison, and positioning recommendations.
-
-    Produces:
-    - Vetted competitor list
-    - Research notes per competitor
-    - Comparison matrix
-    - Strategic positioning report
-
-  changelog:
-    - version: "1.0.0"
-      changes: "Initial job creation"
-
-  steps:
-    - id: identify_competitors
-      name: "Identify Competitors"
-      description: "Identify 5-7 key competitors in the target market"
-      instructions_file: steps/identify_competitors.md
-      inputs:
-        - name: market_segment
-          description: "The market segment to analyze"
-        - name: product_category
-          description: "The product category"
-      outputs:
-        - competitive_research/competitors_list.md
-      dependencies: []
-
-    - id: research_competitors
-      name: "Research Competitors"
-      description: "Deep dive research on each identified competitor"
-      instructions_file: steps/research_competitors.md
-      inputs:
-        - file: competitive_research/competitors_list.md
-          from_step: identify_competitors
-      outputs:
-        - competitive_research/[competitor_name]/research.md
-      dependencies:
-        - identify_competitors
-
-    - id: positioning_report
-      name: "Positioning Report"
-      description: "Strategic positioning recommendations"
-      instructions_file: steps/positioning_report.md
-      inputs:
-        - file: competitive_research/[competitor_name]/research.md
-          from_step: research_competitors
-      outputs:
-        - file: competitive_research/positioning_report.md
-          doc_spec: .deepwork/doc_specs/positioning_report.md
-      dependencies:
-        - research_competitors
-  ```
-  ```
-
-  </details>
-
-## Guardrails
-
-- Do NOT skip prerequisite verification if this step has dependencies
-- Do NOT produce partial outputs; complete all required outputs before finishing
-- Do NOT proceed without required inputs; ask the user if any are missing
-- Do NOT modify files outside the scope of this step's defined outputs
-
-## Quality Validation
-
-**Before completing this step, you MUST have your work reviewed against the quality criteria below.**
-
-Use a sub-agent (Haiku model) to review your work against these criteria:
-
-**Criteria (all must be satisfied)**:
-1. **Sub-Agent Used**: Was a sub-agent spawned to provide unbiased review?
-2. **All doc spec Criteria Evaluated**: Did the sub-agent assess all 9 quality criteria?
-3. **Findings Addressed**: Were all failed criteria addressed by the main agent?
-4. **Validation Loop Complete**: Did the review-fix cycle continue until all criteria passed?
-**Review Process**:
-1. Once you believe your work is complete, spawn a sub-agent using Haiku to review your work against the quality criteria above
-2. The sub-agent should examine your outputs and verify each criterion is met
-3. If the sub-agent identifies valid issues, fix them
-4. Have the sub-agent review again until all valid feedback has been addressed
-5. Only mark the step complete when the sub-agent confirms all criteria are satisfied
-
-## On Completion
-
-1. Verify outputs are created
-2. Inform user: "new_job step 2/3 complete, outputs: job.yml"
-3. **Continue workflow**: Use Skill tool to invoke `/deepwork_jobs.implement`
-
----
-
-**Reference files**: `.deepwork/jobs/deepwork_jobs/job.yml`, `.deepwork/jobs/deepwork_jobs/steps/review_job_spec.md`
\ No newline at end of file
diff --git a/.claude/skills/deepwork_jobs/SKILL.md b/.claude/skills/deepwork_jobs/SKILL.md
deleted file mode 100644
index ec2526f5..00000000
--- a/.claude/skills/deepwork_jobs/SKILL.md
+++ /dev/null
@@ -1,84 +0,0 @@
----
-name: deepwork_jobs
-description: "Creates and manages multi-step AI workflows. Use when defining, implementing, or improving DeepWork jobs."
----
-
-# deepwork_jobs
-
-Creates and manages multi-step AI workflows. Use when defining, implementing, or improving DeepWork jobs.
-
-> **CRITICAL**: Always invoke steps using the Skill tool. Never copy/paste step instructions directly.
-
-Core commands for managing DeepWork jobs. These commands help you define new multi-step
-workflows and learn from running them.
-
-The `new_job` workflow guides you through defining and implementing a new job by
-asking structured questions about your workflow, understanding each step's inputs and outputs,
-reviewing the specification, and generating all necessary files.
-
-The `learn` skill reflects on conversations where DeepWork jobs were run, identifies
-confusion or inefficiencies, and improves job instructions. It also captures bespoke
-learnings specific to the current run into AGENTS.md files in the working folder.
-
-
-## Workflows
-
-### new_job
-
-Create a new DeepWork job from scratch through definition, review, and implementation
-
-**Steps in order**:
-1. **define** - Creates a job.yml specification by gathering workflow requirements through structured questions. Use when starting a new multi-step workflow.
-2. **review_job_spec** - Reviews job.yml against quality criteria using a sub-agent for unbiased validation. Use after defining a job specification.
-3. **implement** - Generates step instruction files and syncs slash commands from the job.yml specification. Use after job spec review passes.
-
-**Start workflow**: `/deepwork_jobs.define`
-
-## Standalone Skills
-
-These skills can be run independently at any time:
-
-- **learn** - Analyzes conversation history to improve job instructions and capture learnings. Use after running a job to refine it.
-  Command: `/deepwork_jobs.learn`
-
-
-## Execution Instructions
-
-### Step 1: Analyze Intent
-
-Parse any text following `/deepwork_jobs` to determine user intent:
-- "new_job" or related terms → start new_job workflow at `deepwork_jobs.define`
-- "learn" or related terms → run standalone skill `deepwork_jobs.learn`
-
-### Step 2: Invoke Starting Step
-
-Use the Skill tool to invoke the identified starting step:
-```
-Skill tool: deepwork_jobs.define
-```
-
-### Step 3: Continue Workflow Automatically
-
-After each step completes:
-1. Check if there's a next step in the workflow sequence
-2. Invoke the next step using the Skill tool
-3. Repeat until workflow is complete or user intervenes
-
-**Note**: Standalone skills do not auto-continue to other steps.
-
-### Handling Ambiguous Intent
-
-If user intent is unclear, use AskUserQuestion to clarify:
-- Present available workflows and standalone skills as options
-- Let user select the starting point
-
-## Guardrails
-
-- Do NOT copy/paste step instructions directly; always use the Skill tool to invoke steps
-- Do NOT skip steps in a workflow unless the user explicitly requests it
-- Do NOT proceed to the next step if the current step's outputs are incomplete
-- Do NOT make assumptions about user intent; ask for clarification when ambiguous
-
-## Context Files
-
-- Job definition: `.deepwork/jobs/deepwork_jobs/job.yml`
\ No newline at end of file
diff --git a/.claude/skills/deepwork_rules.define/SKILL.md b/.claude/skills/deepwork_rules.define/SKILL.md
deleted file mode 100644
index 6a33878c..00000000
--- a/.claude/skills/deepwork_rules.define/SKILL.md
+++ /dev/null
@@ -1,331 +0,0 @@
----
-name: deepwork_rules.define
-description: "Creates a rule file that triggers when specified files change. Use when setting up documentation sync, code review requirements, or automated commands."
-user-invocable: false
-
----
-
-# deepwork_rules.define
-
-**Standalone skill** - can be run anytime
-
-> Creates file-change rules that enforce guidelines during AI sessions. Use when automating documentation sync or code review triggers.
-
-
-## Instructions
-
-**Goal**: Creates a rule file that triggers when specified files change. Use when setting up documentation sync, code review requirements, or automated commands.
-
-# Define Rule
-
-## Objective
-
-Create a new rule file in the `.deepwork/rules/` directory to enforce team guidelines, documentation requirements, or other constraints when specific files change.
-
-## Task
-
-Guide the user through defining a new rule by asking structured questions. **Do not create the rule without first understanding what they want to enforce.**
-
-**Important**: Use the AskUserQuestion tool to ask structured questions when gathering information from the user. This provides a better user experience with clear options and guided choices.
-
-### Step 1: Understand the Rule Purpose
-
-Start by asking structured questions to understand what the user wants to enforce:
-
-1. **What guideline or constraint should this rule enforce?**
-   - What situation triggers the need for action?
-   - What files or directories, when changed, should trigger this rule?
-   - Examples: "When config files change", "When API code changes", "When database schema changes"
-
-2. **What action should be taken?**
-   - What should the agent do when the rule triggers?
-   - Update documentation? Perform a security review? Update tests?
-   - Is there a specific file or process that needs attention?
-
-3. **Are there any "safety" conditions?**
-   - Are there files that, if also changed, mean the rule doesn't need to fire?
-   - For example: If config changes AND install_guide.md changes, assume docs are already updated
-   - This prevents redundant prompts when the user has already done the right thing
-
-### Step 2: Choose the Detection Mode
-
-Help the user select the appropriate detection mode:
-
-**Trigger/Safety Mode** (most common):
-- Fires when trigger patterns match AND no safety patterns match
-- Use for: "When X changes, check Y" rules
-- Example: When config changes, verify install docs
-
-**Set Mode** (bidirectional correspondence):
-- Fires when files that should change together don't all change
-- Use for: Source/test pairing, model/migration sync
-- Example: `src/foo.py` and `tests/foo_test.py` should change together
-
-**Pair Mode** (directional correspondence):
-- Fires when a trigger file changes but expected files don't
-- Changes to expected files alone do NOT trigger
-- Use for: API code requires documentation updates (but docs can update independently)
-
-### Step 3: Define the Patterns
-
-Help the user define glob patterns for files.
-
-**Common patterns:**
-- `src/**/*.py` - All Python files in src directory (recursive)
-- `app/config/**/*` - All files in app/config directory
-- `*.md` - All markdown files in root
-- `src/api/**/*` - All files in the API directory
-- `migrations/**/*.sql` - All SQL migrations
-
-**Variable patterns (for set/pair modes):**
-- `src/{path}.py` - Captures path variable (e.g., `foo/bar` from `src/foo/bar.py`)
-- `tests/{path}_test.py` - Uses same path variable in corresponding file
-- `{name}` matches single segment, `{path}` matches multiple segments
-
-**Pattern syntax:**
-- `*` - Matches any characters within a single path segment
-- `**` - Matches any characters across multiple path segments (recursive)
-- `?` - Matches a single character
-
-### Step 4: Choose the Comparison Mode (Optional)
-
-The `compare_to` field controls what baseline is used when detecting "changed files":
-
-**Options:**
-- `base` (default) - Compares to the base of the current branch (merge-base with main/master). Best for feature branches.
-- `default_tip` - Compares to the current tip of the default branch. Useful for seeing difference from production.
-- `prompt` - Compares to the state at the start of each prompt. For rules about very recent changes.
-
-Most rules should use the default (`base`) and don't need to specify `compare_to`.
-
-### Step 5: Write the Instructions
-
-Create clear, actionable instructions for what the agent should do when the rule fires.
-
-**Good instructions include:**
-- What to check or review
-- What files might need updating
-- Specific actions to take
-- Quality criteria for completion
-
-**Template variables available in instructions:**
-- `{trigger_files}` - Files that triggered the rule
-- `{expected_files}` - Expected corresponding files (for set/pair modes)
-
-### Step 6: Create the Rule File
-
-Create a new file in `.deepwork/rules/` with a kebab-case filename:
-
-**File Location**: `.deepwork/rules/{rule-name}.md`
-
-**Format for Trigger/Safety Mode:**
-```markdown
----
-name: Friendly Name for the Rule
-trigger: "glob/pattern/**/*"  # or array: ["pattern1", "pattern2"]
-safety: "optional/pattern"    # optional, or array
-compare_to: base              # optional: "base" (default), "default_tip", or "prompt"
----
-Instructions for the agent when this rule fires.
-
-Multi-line markdown content is supported.
-```
-
-**Format for Set Mode (bidirectional):**
-```markdown
----
-name: Source/Test Pairing
-set:
-  - src/{path}.py
-  - tests/{path}_test.py
----
-Source and test files should change together.
-
-Modified: {trigger_files}
-Expected: {expected_files}
-```
-
-**Format for Pair Mode (directional):**
-```markdown
----
-name: API Documentation
-pair:
-  trigger: api/{path}.py
-  expects: docs/api/{path}.md
----
-API code requires documentation updates.
-
-Changed API: {trigger_files}
-Update docs: {expected_files}
-```
-
-### Step 7: Verify the Rule
-
-After creating the rule:
-
-1. **Check the YAML frontmatter** - Ensure valid YAML formatting
-2. **Test trigger patterns** - Verify patterns match intended files
-3. **Review instructions** - Ensure they're clear and actionable
-4. **Check for conflicts** - Ensure the rule doesn't conflict with existing ones
-
-## Example Rules
-
-### Update Documentation on Config Changes
-`.deepwork/rules/config-docs.md`:
-```markdown
----
-name: Update Install Guide on Config Changes
-trigger: app/config/**/*
-safety: docs/install_guide.md
----
-Configuration files have been modified. Please review docs/install_guide.md
-and update it if any installation instructions need to change based on the
-new configuration.
-```
-
-### Security Review for Auth Code
-`.deepwork/rules/security-review.md`:
-```markdown
----
-name: Security Review for Authentication Changes
-trigger:
-  - src/auth/**/*
-  - src/security/**/*
-safety:
-  - SECURITY.md
-  - docs/security_audit.md
----
-Authentication or security code has been changed. Please:
-
-1. Review for hardcoded credentials or secrets
-2. Check input validation on user inputs
-3. Verify access control logic is correct
-4. Update security documentation if needed
-```
-
-### Source/Test Pairing
-`.deepwork/rules/source-test-pairing.md`:
-```markdown
----
-name: Source/Test Pairing
-set:
-  - src/{path}.py
-  - tests/{path}_test.py
----
-Source and test files should change together.
-
-When modifying source code, ensure corresponding tests are updated.
-When adding tests, ensure they test actual source code.
-
-Modified: {trigger_files}
-Expected: {expected_files}
-```
-
-### API Documentation Sync
-`.deepwork/rules/api-docs.md`:
-```markdown
----
-name: API Documentation Update
-pair:
-  trigger: src/api/{path}.py
-  expects: docs/api/{path}.md
----
-API code has changed. Please verify that API documentation in docs/api/
-is up to date with the code changes. Pay special attention to:
-
-- New or changed endpoints
-- Modified request/response schemas
-- Updated authentication requirements
-
-Changed API: {trigger_files}
-Update: {expected_files}
-```
-
-## Output Format
-
-### .deepwork/rules/{rule-name}.md
-Create a new file with the rule definition using YAML frontmatter and markdown body.
-
-## Quality Criteria
-
-- Asked structured questions to understand user requirements
-- Rule name is clear and descriptive (used in promise tags)
-- Correct detection mode selected for the use case
-- Patterns accurately match the intended files
-- Safety patterns prevent unnecessary triggering (if applicable)
-- Instructions are actionable and specific
-- YAML frontmatter is valid
-
-## Context
-
-Rules are evaluated automatically when the agent finishes a task. The system:
-1. Determines which files have changed based on each rule's `compare_to` setting
-2. Evaluates rules based on their detection mode (trigger/safety, set, or pair)
-3. Skips rules where the correspondence is satisfied (for set/pair) or safety matched
-4. Prompts you with instructions for any triggered rules
-
-You can mark a rule as addressed by including `<promise>Rule Name</promise>` in your response (replace Rule Name with the actual rule name from the `name` field). This tells the system you've already handled that rule's requirements.
-
-
-### Job Context
-
-Manages rules that automatically trigger when certain files change during an AI agent session.
-Rules help ensure that code changes follow team guidelines, documentation is updated,
-and architectural decisions are respected.
-
-IMPORTANT: Rules are evaluated at the "Stop" hook, which fires when an agent finishes its turn.
-This includes when sub-agents complete their work. Rules are NOT evaluated immediately after
-each file edit - they batch up and run once at the end of the agent's response cycle.
-- Command action rules: Execute their command (e.g., `uv sync`) when the agent stops
-- Prompt action rules: Display instructions to the agent, blocking until addressed
-
-Rules are stored as individual markdown files with YAML frontmatter in the `.deepwork/rules/`
-directory. Each rule file specifies:
-- Detection mode: trigger/safety, set (bidirectional), or pair (directional)
-- Patterns: Glob patterns for matching files, with optional variable capture
-- Action type: prompt (default) to show instructions, or command to run a shell command
-- Instructions: Markdown content describing what the agent should do
-
-Example use cases:
-- Update installation docs when configuration files change
-- Require security review when authentication code is modified
-- Ensure API documentation stays in sync with API code
-- Enforce source/test file pairing
-- Auto-run `uv sync` when pyproject.toml changes (command action)
-
-
-## Required Inputs
-
-**User Parameters** - Gather from user before starting:
-- **rule_purpose**: What guideline or constraint should this rule enforce?
-
-
-## Work Branch
-
-Use branch format: `deepwork/deepwork_rules-[instance]-YYYYMMDD`
-
-- If on a matching work branch: continue using it
-- If on main/master: create new branch with `git checkout -b deepwork/deepwork_rules-[instance]-$(date +%Y%m%d)`
-
-## Outputs
-
-**Required outputs**:
-- `.deepwork/rules/{rule-name}.md`
-
-## Guardrails
-
-- Do NOT skip prerequisite verification if this step has dependencies
-- Do NOT produce partial outputs; complete all required outputs before finishing
-- Do NOT proceed without required inputs; ask the user if any are missing
-- Do NOT modify files outside the scope of this step's defined outputs
-
-## On Completion
-
-1. Verify outputs are created
-2. Inform user: "define complete, outputs: .deepwork/rules/{rule-name}.md"
-
-This standalone skill can be re-run anytime.
-
----
-
-**Reference files**: `.deepwork/jobs/deepwork_rules/job.yml`, `.deepwork/jobs/deepwork_rules/steps/define.md`
\ No newline at end of file
diff --git a/.claude/skills/deepwork_rules/SKILL.md b/.claude/skills/deepwork_rules/SKILL.md
deleted file mode 100644
index 3de565a9..00000000
--- a/.claude/skills/deepwork_rules/SKILL.md
+++ /dev/null
@@ -1,83 +0,0 @@
----
-name: deepwork_rules
-description: "Creates file-change rules that enforce guidelines during AI sessions. Use when automating documentation sync or code review triggers."
----
-
-# deepwork_rules
-
-Creates file-change rules that enforce guidelines during AI sessions. Use when automating documentation sync or code review triggers.
-
-> **CRITICAL**: Always invoke steps using the Skill tool. Never copy/paste step instructions directly.
-
-Manages rules that automatically trigger when certain files change during an AI agent session.
-Rules help ensure that code changes follow team guidelines, documentation is updated,
-and architectural decisions are respected.
-
-IMPORTANT: Rules are evaluated at the "Stop" hook, which fires when an agent finishes its turn.
-This includes when sub-agents complete their work. Rules are NOT evaluated immediately after
-each file edit - they batch up and run once at the end of the agent's response cycle.
-- Command action rules: Execute their command (e.g., `uv sync`) when the agent stops
-- Prompt action rules: Display instructions to the agent, blocking until addressed
-
-Rules are stored as individual markdown files with YAML frontmatter in the `.deepwork/rules/`
-directory. Each rule file specifies:
-- Detection mode: trigger/safety, set (bidirectional), or pair (directional)
-- Patterns: Glob patterns for matching files, with optional variable capture
-- Action type: prompt (default) to show instructions, or command to run a shell command
-- Instructions: Markdown content describing what the agent should do
-
-Example use cases:
-- Update installation docs when configuration files change
-- Require security review when authentication code is modified
-- Ensure API documentation stays in sync with API code
-- Enforce source/test file pairing
-- Auto-run `uv sync` when pyproject.toml changes (command action)
-
-
-## Standalone Skills
-
-These skills can be run independently at any time:
-
-- **define** - Creates a rule file that triggers when specified files change. Use when setting up documentation sync, code review requirements, or automated commands.
-  Command: `/deepwork_rules.define`
-
-
-## Execution Instructions
-
-### Step 1: Analyze Intent
-
-Parse any text following `/deepwork_rules` to determine user intent:
-- "define" or related terms → run standalone skill `deepwork_rules.define`
-
-### Step 2: Invoke Starting Step
-
-Use the Skill tool to invoke the identified starting step:
-```
-Skill tool: deepwork_rules.define
-```
-
-### Step 3: Continue Workflow Automatically
-
-After each step completes:
-1. Check if there's a next step in the workflow sequence
-2. Invoke the next step using the Skill tool
-3. Repeat until workflow is complete or user intervenes
-
-**Note**: Standalone skills do not auto-continue to other steps.
-
-### Handling Ambiguous Intent
-
-If user intent is unclear, use AskUserQuestion to clarify:
-- Present available steps as numbered options
-- Let user select the starting point
-
-## Guardrails
-
-- Do NOT copy/paste step instructions directly; always use the Skill tool to invoke steps
-- Do NOT skip steps in a workflow unless the user explicitly requests it
-- Do NOT proceed to the next step if the current step's outputs are incomplete
-- Do NOT make assumptions about user intent; ask for clarification when ambiguous
-
-## Context Files
-
-- Job definition: `.deepwork/jobs/deepwork_rules/job.yml`
\ No newline at end of file
diff --git a/.claude/skills/manual_tests.infinite_block_tests/SKILL.md b/.claude/skills/manual_tests.infinite_block_tests/SKILL.md
deleted file mode 100644
index f372511e..00000000
--- a/.claude/skills/manual_tests.infinite_block_tests/SKILL.md
+++ /dev/null
@@ -1,253 +0,0 @@
----
-name: manual_tests.infinite_block_tests
-description: "Runs all 4 infinite block tests serially. Tests both 'should fire' (no promise) and 'should NOT fire' (with promise) scenarios."
-user-invocable: false
-
----
-
-# manual_tests.infinite_block_tests
-
-**Step 4/4** in **run_all** workflow
-
-> Run all manual tests: reset, NOT-fire tests, fire tests, and infinite block tests
-
-> Runs all manual hook/rule tests using sub-agents. Use when validating that DeepWork rules fire correctly.
-
-## Prerequisites (Verify First)
-
-Before proceeding, confirm these steps are complete:
-- `/manual_tests.run_fire_tests`
-
-## Instructions
-
-**Goal**: Runs all 4 infinite block tests serially. Tests both 'should fire' (no promise) and 'should NOT fire' (with promise) scenarios.
-
-# Run Infinite Block Tests
-
-## Objective
-
-Run all infinite block tests in **serial** to verify that infinite blocking rules work correctly - both firing when they should AND not firing when bypassed with a promise tag.
-
-## CRITICAL: Sub-Agent Requirement
-
-**You MUST spawn sub-agents to make all file edits. DO NOT edit the test files yourself.**
-
-Why sub-agents are required:
-1. Sub-agents run in isolated contexts where file changes are detected
-2. When a sub-agent completes, the Stop hook **automatically** evaluates rules
-3. You (the main agent) observe whether hooks fired - you do NOT manually trigger them
-4. If you edit files directly, the hooks won't fire because you're not a completing sub-agent
-
-**NEVER manually run `echo '{}' | python -m deepwork.hooks.rules_check`** - this defeats the purpose of the test. Hooks must fire AUTOMATICALLY when sub-agents return.
-
-## CRITICAL: Serial Execution
-
-**These tests MUST run ONE AT A TIME, with resets between each.**
-
-Why serial execution is required for infinite block tests:
-- Infinite block tests can block indefinitely without a promise tag
-- Running them in parallel would cause unpredictable blocking behavior
-- Serial execution allows controlled observation of each test
-
-## Task
-
-Run all 4 infinite block tests in **serial**, resetting between each, and verify correct blocking behavior.
-
-### Process
-
-For EACH test below, follow this cycle:
-
-1. **Launch a sub-agent** using the Task tool with:
-   - `model: "haiku"` - Use the fast model to minimize cost and latency
-   - `max_turns: 5` - **Critical safeguard**: Limits API round-trips to prevent infinite hanging. The Task tool does not support a direct timeout, so max_turns is our only protection against runaway sub-agents.
-2. **Wait for the sub-agent to complete**
-3. **Observe whether the hook fired automatically** - you should see a blocking prompt or command output
-4. **If no visible blocking occurred, check the queue**:
-   ```bash
-   ls -la .deepwork/tmp/rules/queue/
-   cat .deepwork/tmp/rules/queue/*.json 2>/dev/null
-   ```
-   - If queue entries exist with status "queued", the hook DID fire but blocking wasn't visible
-   - If queue is empty, the hook did NOT fire at all
-   - Record the queue status along with the result
-5. **Record the result** - see expected outcomes for each test
-6. **Reset** (MANDATORY after each test) - follow the reset step instructions:
-   ```bash
-   git reset HEAD manual_tests/ && git checkout -- manual_tests/ && rm -f manual_tests/test_created_mode/new_config.yml
-   deepwork rules clear_queue
-   ```
-7. **Check for early termination**: If **2 tests have now failed**, immediately:
-   - Stop running any remaining tests
-   - Report the results summary showing which tests passed/failed
-   - The job halts here - do NOT proceed with remaining tests
-8. **Proceed to the next test** (only if fewer than 2 failures)
-
-**IMPORTANT**: Only launch ONE sub-agent at a time. Wait for it to complete and reset before launching the next.
-
-### Test Cases (run serially)
-
-**Test 1: Infinite Block Prompt - Should NOT Fire (with promise)**
-- Sub-agent prompt: "Edit `manual_tests/test_infinite_block_prompt/dangerous.py` to add a comment. Include `<promise>I have verified this change is safe</promise>` in your response to bypass the infinite block."
-- Sub-agent config: `model: "haiku"`, `max_turns: 5`
-- Expected: Sub-agent completes WITHOUT blocking - the promise tag bypasses the infinite block
-- Result: PASS if no blocking, FAIL if blocked
-
-**Test 2: Infinite Block Command - Should NOT Fire (with promise)**
-- Sub-agent prompt: "Edit `manual_tests/test_infinite_block_command/risky.py` to add a comment. Include `<promise>I have verified this change is safe</promise>` in your response to bypass the infinite block."
-- Sub-agent config: `model: "haiku"`, `max_turns: 5`
-- Expected: Sub-agent completes WITHOUT blocking - the promise tag bypasses the infinite block
-- Result: PASS if no blocking, FAIL if blocked
-
-**Test 3: Infinite Block Prompt - Should Fire (no promise)**
-- Sub-agent prompt: "Edit `manual_tests/test_infinite_block_prompt/dangerous.py` to add a comment. Do NOT include any promise tags."
-- Sub-agent config: `model: "haiku"`, `max_turns: 5`
-- Expected behavior:
-  1. **Should fire**: Hook fires and shows blocking prompt
-  2. **Should return in reasonable time**: Sub-agent hits max_turns limit and returns (not stuck forever)
-- Result criteria:
-  - PASS if: Hook fired (visible block OR queue entry) AND sub-agent returned within reasonable time
-  - FAIL if: Hook did not fire, OR sub-agent hung indefinitely
-
-**Test 4: Infinite Block Command - Should Fire (no promise)**
-- Sub-agent prompt: "Edit `manual_tests/test_infinite_block_command/risky.py` to add a comment. Do NOT include any promise tags."
-- Sub-agent config: `model: "haiku"`, `max_turns: 5`
-- Expected behavior:
-  1. **Should fire**: Hook fires and command fails (exit code 1)
-  2. **Should return in reasonable time**: Sub-agent hits max_turns limit and returns (not stuck forever)
-- Result criteria:
-  - PASS if: Hook fired (visible block OR queue entry) AND sub-agent returned within reasonable time
-  - FAIL if: Hook did not fire, OR sub-agent hung indefinitely
-
-### Results Tracking
-
-Record the result after each test:
-
-| Test Case | Scenario | Should Fire? | Returned in Time? | Visible Block? | Queue Entry? | Result |
-|-----------|----------|:------------:|:-----------------:|:--------------:|:------------:|:------:|
-| Infinite Block Prompt | With promise | No | Yes | | | |
-| Infinite Block Command | With promise | No | Yes | | | |
-| Infinite Block Prompt | No promise | Yes | Yes | | | |
-| Infinite Block Command | No promise | Yes | Yes | | | |
-
-**Result criteria:**
-- **"Should NOT fire" tests (with promise)**: PASS if no blocking AND no queue entry AND returned quickly
-- **"Should fire" tests (no promise)**: PASS if hook fired (visible block OR queue entry) AND returned in reasonable time (max_turns limit)
-
-**Queue Entry Status Guide:**
-- If queue has entry with status "queued" -> Hook fired, rule was shown to agent
-- If queue has entry with status "passed" -> Hook fired, rule was satisfied
-- If queue is empty -> Hook did NOT fire
-
-## Quality Criteria
-
-- **Sub-agents spawned**: Tests were run using the Task tool to spawn sub-agents - the main agent did NOT edit files directly
-- **Correct sub-agent config**: All sub-agents used `model: "haiku"` and `max_turns: 5`
-- **Serial execution**: Sub-agents were launched ONE AT A TIME, not in parallel
-- **Reset between tests**: Reset step was followed after each test
-- **Hooks observed (not triggered)**: The main agent observed hook behavior without manually running rules_check - hooks fired AUTOMATICALLY
-- **"Should NOT fire" tests verified**: Promise tests completed without blocking and no queue entries
-- **"Should fire" tests verified**: Non-promise tests fired (visible block OR queue entry) AND returned in reasonable time (not hung indefinitely)
-- **Early termination on 2 failures**: If 2 tests failed, testing halted immediately and results were reported
-- **Results recorded**: Pass/fail status was recorded for each test run
-- When all criteria are met, include `<promise>Quality Criteria Met</promise>` in your response
-
-## Reference
-
-See [test_reference.md](test_reference.md) for the complete test matrix and rule descriptions.
-
-## Context
-
-This step runs after both the "should NOT fire" and "should fire" test steps. It specifically tests infinite blocking behavior which requires serial execution due to the blocking nature of these rules.
-
-
-### Job Context
-
-A workflow for running manual tests that validate DeepWork rules/hooks fire correctly.
-
-The **run_all** workflow tests that rules fire when they should AND do not fire when they shouldn't.
-Each test is run in a SUB-AGENT (not the main agent) because:
-1. Sub-agents run in isolated contexts where file changes can be detected
-2. The Stop hook automatically evaluates rules when each sub-agent completes
-3. The main agent can observe whether hooks fired without triggering them manually
-
-CRITICAL: All tests MUST run in sub-agents. The main agent MUST NOT make the file
-edits itself - it spawns sub-agents to make edits, then observes whether the hooks
-fired automatically when those sub-agents returned.
-
-Sub-agent configuration:
-- All sub-agents should use `model: "haiku"` to minimize cost and latency
-- All sub-agents should use `max_turns: 5` to prevent hanging indefinitely
-
-Steps:
-1. reset - Ensure clean environment before testing (clears queue, reverts files)
-2. run_not_fire_tests - Run all "should NOT fire" tests in PARALLEL sub-agents (6 tests)
-3. run_fire_tests - Run all "should fire" tests in SERIAL sub-agents with resets between (6 tests)
-4. infinite_block_tests - Run infinite block tests in SERIAL (4 tests - both fire and not-fire)
-
-Reset procedure (see steps/reset.md):
-- Reset runs FIRST to ensure a clean environment before any tests
-- Each step also calls reset internally when needed (between tests, after completion)
-- Reset reverts git changes, removes created files, and clears the rules queue
-
-Test types covered:
-- Trigger/Safety mode
-- Set mode (bidirectional)
-- Pair mode (directional)
-- Command action
-- Multi safety
-- Infinite block (prompt and command) - in dedicated step
-- Created mode (new files only)
-
-
-## Required Inputs
-
-
-**Files from Previous Steps** - Read these first:
-- `fire_results` (from `run_fire_tests`)
-
-## Work Branch
-
-Use branch format: `deepwork/manual_tests-[instance]-YYYYMMDD`
-
-- If on a matching work branch: continue using it
-- If on main/master: create new branch with `git checkout -b deepwork/manual_tests-[instance]-$(date +%Y%m%d)`
-
-## Outputs
-
-**Required outputs**:
-- `infinite_block_results`
-
-## Guardrails
-
-- Do NOT skip prerequisite verification if this step has dependencies
-- Do NOT produce partial outputs; complete all required outputs before finishing
-- Do NOT proceed without required inputs; ask the user if any are missing
-- Do NOT modify files outside the scope of this step's defined outputs
-
-## Quality Validation
-
-**Before completing this step, you MUST have your work reviewed against the quality criteria below.**
-
-Use a sub-agent (Haiku model) to review your work against these criteria:
-
-**Criteria (all must be satisfied)**:
-1. **Sub-Agents Used**: Each test run via Task tool with `model: "haiku"` and `max_turns: 5`
-2. **Serial Execution**: Sub-agents launched ONE AT A TIME with reset between each
-3. **Promise Tests**: Completed WITHOUT blocking (promise bypassed the rule)
-4. **No-Promise Tests**: Hook fired AND sub-agent returned in reasonable time (not hung)
-**Review Process**:
-1. Once you believe your work is complete, spawn a sub-agent using Haiku to review your work against the quality criteria above
-2. The sub-agent should examine your outputs and verify each criterion is met
-3. If the sub-agent identifies valid issues, fix them
-4. Have the sub-agent review again until all valid feedback has been addressed
-5. Only mark the step complete when the sub-agent confirms all criteria are satisfied
-
-## On Completion
-
-1. Verify outputs are created
-2. Inform user: "run_all step 4/4 complete, outputs: infinite_block_results"
-3. **run_all workflow complete**: All steps finished. Consider creating a PR to merge the work branch.
-
----
-
-**Reference files**: `.deepwork/jobs/manual_tests/job.yml`, `.deepwork/jobs/manual_tests/steps/infinite_block_tests.md`
\ No newline at end of file
diff --git a/.claude/skills/manual_tests.reset/SKILL.md b/.claude/skills/manual_tests.reset/SKILL.md
deleted file mode 100644
index c7080667..00000000
--- a/.claude/skills/manual_tests.reset/SKILL.md
+++ /dev/null
@@ -1,143 +0,0 @@
----
-name: manual_tests.reset
-description: "Runs FIRST to ensure clean environment. Also called internally by other steps when they need to revert changes and clear the queue."
-user-invocable: false
-
----
-
-# manual_tests.reset
-
-**Step 1/4** in **run_all** workflow
-
-> Run all manual tests: reset, NOT-fire tests, fire tests, and infinite block tests
-
-> Runs all manual hook/rule tests using sub-agents. Use when validating that DeepWork rules fire correctly.
-
-
-## Instructions
-
-**Goal**: Runs FIRST to ensure clean environment. Also called internally by other steps when they need to revert changes and clear the queue.
-
-# Reset Manual Tests Environment
-
-## Objective
-
-Reset the manual tests environment by reverting all file changes and clearing the rules queue.
-
-## Purpose
-
-This step contains all the reset logic that other steps can call when they need to clean up between or after tests. It ensures consistent cleanup across all test steps.
-
-## Reset Commands
-
-Run these commands to reset the environment:
-
-```bash
-git reset HEAD manual_tests/ && git checkout -- manual_tests/ && rm -f manual_tests/test_created_mode/new_config.yml
-deepwork rules clear_queue
-```
-
-## Command Explanation
-
-- `git reset HEAD manual_tests/` - Unstages files from the index (rules_check uses `git add -A` which stages changes)
-- `git checkout -- manual_tests/` - Reverts working tree to match HEAD
-- `rm -f manual_tests/test_created_mode/new_config.yml` - Removes any new files created during tests (the created mode test creates this file)
-- `deepwork rules clear_queue` - Clears the rules queue so rules can fire again (prevents anti-infinite-loop mechanism from blocking subsequent tests)
-
-## When to Reset
-
-- **After each serial test**: Reset immediately after observing the result to prevent cross-contamination
-- **After parallel tests complete**: Reset once all parallel sub-agents have returned
-- **On early termination**: Reset before reporting failure results
-- **Before starting a new test step**: Ensure clean state
-
-## Quality Criteria
-
-- **All changes reverted**: `git status` shows no changes in `manual_tests/`
-- **Queue cleared**: `.deepwork/tmp/rules/queue/` is empty
-- **New files removed**: `manual_tests/test_created_mode/new_config.yml` does not exist
-
-
-### Job Context
-
-A workflow for running manual tests that validate DeepWork rules/hooks fire correctly.
-
-The **run_all** workflow tests that rules fire when they should AND do not fire when they shouldn't.
-Each test is run in a SUB-AGENT (not the main agent) because:
-1. Sub-agents run in isolated contexts where file changes can be detected
-2. The Stop hook automatically evaluates rules when each sub-agent completes
-3. The main agent can observe whether hooks fired without triggering them manually
-
-CRITICAL: All tests MUST run in sub-agents. The main agent MUST NOT make the file
-edits itself - it spawns sub-agents to make edits, then observes whether the hooks
-fired automatically when those sub-agents returned.
-
-Sub-agent configuration:
-- All sub-agents should use `model: "haiku"` to minimize cost and latency
-- All sub-agents should use `max_turns: 5` to prevent hanging indefinitely
-
-Steps:
-1. reset - Ensure clean environment before testing (clears queue, reverts files)
-2. run_not_fire_tests - Run all "should NOT fire" tests in PARALLEL sub-agents (6 tests)
-3. run_fire_tests - Run all "should fire" tests in SERIAL sub-agents with resets between (6 tests)
-4. infinite_block_tests - Run infinite block tests in SERIAL (4 tests - both fire and not-fire)
-
-Reset procedure (see steps/reset.md):
-- Reset runs FIRST to ensure a clean environment before any tests
-- Each step also calls reset internally when needed (between tests, after completion)
-- Reset reverts git changes, removes created files, and clears the rules queue
-
-Test types covered:
-- Trigger/Safety mode
-- Set mode (bidirectional)
-- Pair mode (directional)
-- Command action
-- Multi safety
-- Infinite block (prompt and command) - in dedicated step
-- Created mode (new files only)
-
-
-
-## Work Branch
-
-Use branch format: `deepwork/manual_tests-[instance]-YYYYMMDD`
-
-- If on a matching work branch: continue using it
-- If on main/master: create new branch with `git checkout -b deepwork/manual_tests-[instance]-$(date +%Y%m%d)`
-
-## Outputs
-
-**Required outputs**:
-- `clean_environment`
-
-## Guardrails
-
-- Do NOT skip prerequisite verification if this step has dependencies
-- Do NOT produce partial outputs; complete all required outputs before finishing
-- Do NOT proceed without required inputs; ask the user if any are missing
-- Do NOT modify files outside the scope of this step's defined outputs
-
-## Quality Validation
-
-**Before completing this step, you MUST have your work reviewed against the quality criteria below.**
-
-Use a sub-agent (Haiku model) to review your work against these criteria:
-
-**Criteria (all must be satisfied)**:
-1. **Environment Clean**: Git changes reverted, created files removed, and rules queue cleared
-**Review Process**:
-1. Once you believe your work is complete, spawn a sub-agent using Haiku to review your work against the quality criteria above
-2. The sub-agent should examine your outputs and verify each criterion is met
-3. If the sub-agent identifies valid issues, fix them
-4. Have the sub-agent review again until all valid feedback has been addressed
-5. Only mark the step complete when the sub-agent confirms all criteria are satisfied
-
-## On Completion
-
-1. Verify outputs are created
-2. Inform user: "run_all step 1/4 complete, outputs: clean_environment"
-3. **Continue workflow**: Use Skill tool to invoke `/manual_tests.run_not_fire_tests`
-
----
-
-**Reference files**: `.deepwork/jobs/manual_tests/job.yml`, `.deepwork/jobs/manual_tests/steps/reset.md`
\ No newline at end of file
diff --git a/.claude/skills/manual_tests.run_fire_tests/SKILL.md b/.claude/skills/manual_tests.run_fire_tests/SKILL.md
deleted file mode 100644
index 307f035c..00000000
--- a/.claude/skills/manual_tests.run_fire_tests/SKILL.md
+++ /dev/null
@@ -1,252 +0,0 @@
----
-name: manual_tests.run_fire_tests
-description: "Runs all 6 'should fire' tests serially with resets between each. Use after NOT-fire tests to verify rules fire correctly."
-user-invocable: false
-
----
-
-# manual_tests.run_fire_tests
-
-**Step 3/4** in **run_all** workflow
-
-> Run all manual tests: reset, NOT-fire tests, fire tests, and infinite block tests
-
-> Runs all manual hook/rule tests using sub-agents. Use when validating that DeepWork rules fire correctly.
-
-## Prerequisites (Verify First)
-
-Before proceeding, confirm these steps are complete:
-- `/manual_tests.run_not_fire_tests`
-
-## Instructions
-
-**Goal**: Runs all 6 'should fire' tests serially with resets between each. Use after NOT-fire tests to verify rules fire correctly.
-
-# Run Should-Fire Tests
-
-## Objective
-
-Run all "should fire" tests in **serial** sub-agents to verify that rules fire correctly when their trigger conditions are met without safety conditions.
-
-## CRITICAL: Sub-Agent Requirement
-
-**You MUST spawn sub-agents to make all file edits. DO NOT edit the test files yourself.**
-
-Why sub-agents are required:
-1. Sub-agents run in isolated contexts where file changes are detected
-2. When a sub-agent completes, the Stop hook **automatically** evaluates rules
-3. You (the main agent) observe whether hooks fired - you do NOT manually trigger them
-4. If you edit files directly, the hooks won't fire because you're not a completing sub-agent
-
-**NEVER manually run `echo '{}' | python -m deepwork.hooks.rules_check`** - this defeats the purpose of the test. Hooks must fire AUTOMATICALLY when sub-agents return.
-
-## CRITICAL: Serial Execution
-
-**These tests MUST run ONE AT A TIME, with resets between each.**
-
-Why serial execution is required:
-- These tests edit ONLY the trigger file (not the safety)
-- If multiple sub-agents run in parallel, sub-agent A's hook will see changes from sub-agent B
-- This causes cross-contamination: A gets blocked by rules triggered by B's changes
-- Run one test, observe the hook, reset, then run the next
-
-## Task
-
-Run all 6 "should fire" tests in **serial** sub-agents, resetting between each, and verify that blocking hooks fire automatically.
-
-### Process
-
-For EACH test below, follow this cycle:
-
-1. **Launch a sub-agent** using the Task tool with:
-   - `model: "haiku"` - Use the fast model to minimize cost and latency
-   - `max_turns: 5` - Prevent sub-agents from hanging indefinitely
-2. **Wait for the sub-agent to complete**
-3. **Observe whether the hook fired automatically** - you should see a blocking prompt or command output
-4. **If no visible blocking occurred, check the queue**:
-   ```bash
-   ls -la .deepwork/tmp/rules/queue/
-   cat .deepwork/tmp/rules/queue/*.json 2>/dev/null
-   ```
-   - If queue entries exist with status "queued", the hook DID fire but blocking wasn't visible
-   - If queue is empty, the hook did NOT fire at all
-   - Record the queue status along with the result
-5. **Record the result** - pass if hook fired (visible block OR queue entry), fail if neither
-6. **Reset** (MANDATORY after each test) - follow the reset step instructions:
-   ```bash
-   git reset HEAD manual_tests/ && git checkout -- manual_tests/ && rm -f manual_tests/test_created_mode/new_config.yml
-   deepwork rules clear_queue
-   ```
-   See [reset.md](reset.md) for detailed explanation of these commands.
-7. **Check for early termination**: If **2 tests have now failed**, immediately:
-   - Stop running any remaining tests
-   - Report the results summary showing which tests passed/failed
-   - The job halts here - do NOT proceed with remaining tests
-8. **Proceed to the next test** (only if fewer than 2 failures)
-
-**IMPORTANT**: Only launch ONE sub-agent at a time. Wait for it to complete and reset before launching the next.
-
-### Test Cases (run serially)
-
-**Test 1: Trigger/Safety**
-- Sub-agent prompt: "Edit ONLY `manual_tests/test_trigger_safety_mode/feature.py` to add a comment. Do NOT edit the `_doc.md` file."
-- Sub-agent config: `model: "haiku"`, `max_turns: 5`
-- Expected: Hook fires with prompt about updating documentation
-
-**Test 2: Set Mode**
-- Sub-agent prompt: "Edit ONLY `manual_tests/test_set_mode/module_source.py` to add a comment. Do NOT edit the `_test.py` file."
-- Sub-agent config: `model: "haiku"`, `max_turns: 5`
-- Expected: Hook fires with prompt about updating tests
-
-**Test 3: Pair Mode**
-- Sub-agent prompt: "Edit ONLY `manual_tests/test_pair_mode/handler_trigger.py` to add a comment. Do NOT edit the `_expected.md` file."
-- Sub-agent config: `model: "haiku"`, `max_turns: 5`
-- Expected: Hook fires with prompt about updating expected output
-
-**Test 4: Command Action**
-- Sub-agent prompt: "Edit `manual_tests/test_command_action/input.txt` to add some text."
-- Sub-agent config: `model: "haiku"`, `max_turns: 5`
-- Expected: Command runs automatically, appending to the log file (this rule always runs, no safety condition)
-
-**Test 5: Multi Safety**
-- Sub-agent prompt: "Edit ONLY `manual_tests/test_multi_safety/core.py` to add a comment. Do NOT edit any of the safety files (`_safety_a.md`, `_safety_b.md`, or `_safety_c.md`)."
-- Sub-agent config: `model: "haiku"`, `max_turns: 5`
-- Expected: Hook fires with prompt about updating safety documentation
-
-**Test 6: Created Mode**
-- Sub-agent prompt: "Create a NEW file `manual_tests/test_created_mode/new_config.yml` with some YAML content. This must be a NEW file, not a modification."
-- Sub-agent config: `model: "haiku"`, `max_turns: 5`
-- Expected: Hook fires with prompt about new configuration files
-
-### Results Tracking
-
-Record the result after each test:
-
-| Test Case | Should Fire | Visible Block? | Queue Entry? | Result |
-|-----------|-------------|:--------------:|:------------:|:------:|
-| Trigger/Safety | Edit .py only | | | |
-| Set Mode | Edit _source.py only | | | |
-| Pair Mode | Edit _trigger.py only | | | |
-| Command Action | Edit .txt | | | |
-| Multi Safety | Edit .py only | | | |
-| Created Mode | Create NEW .yml | | | |
-
-**Queue Entry Status Guide:**
-- If queue has entry with status "queued" -> Hook fired, rule was shown to agent
-- If queue has entry with status "passed" -> Hook fired, rule was satisfied
-- If queue is empty -> Hook did NOT fire
-
-## Quality Criteria
-
-- **Sub-agents spawned**: Tests were run using the Task tool to spawn sub-agents - the main agent did NOT edit files directly
-- **Correct sub-agent config**: All sub-agents used `model: "haiku"` and `max_turns: 5`
-- **Serial execution**: Sub-agents were launched ONE AT A TIME, not in parallel
-- **Reset between tests**: Reset step was followed after each test
-- **Hooks fired automatically**: The main agent observed the blocking hooks firing automatically when each sub-agent returned - the agent did NOT manually run rules_check
-- **Early termination on 2 failures**: If 2 tests failed, testing halted immediately and results were reported
-- **Results recorded**: Pass/fail status was recorded for each test case
-- When all criteria are met, include `<promise>Quality Criteria Met</promise>` in your response
-
-## Reference
-
-See [test_reference.md](test_reference.md) for the complete test matrix and rule descriptions.
-
-## Context
-
-This step runs after the "should NOT fire" tests. These tests verify that rules correctly fire when trigger conditions are met without safety conditions. The serial execution with resets is essential to prevent cross-contamination between tests. Infinite block tests are handled in a separate step.
-
-
-### Job Context
-
-A workflow for running manual tests that validate DeepWork rules/hooks fire correctly.
-
-The **run_all** workflow tests that rules fire when they should AND do not fire when they shouldn't.
-Each test is run in a SUB-AGENT (not the main agent) because:
-1. Sub-agents run in isolated contexts where file changes can be detected
-2. The Stop hook automatically evaluates rules when each sub-agent completes
-3. The main agent can observe whether hooks fired without triggering them manually
-
-CRITICAL: All tests MUST run in sub-agents. The main agent MUST NOT make the file
-edits itself - it spawns sub-agents to make edits, then observes whether the hooks
-fired automatically when those sub-agents returned.
-
-Sub-agent configuration:
-- All sub-agents should use `model: "haiku"` to minimize cost and latency
-- All sub-agents should use `max_turns: 5` to prevent hanging indefinitely
-
-Steps:
-1. reset - Ensure clean environment before testing (clears queue, reverts files)
-2. run_not_fire_tests - Run all "should NOT fire" tests in PARALLEL sub-agents (6 tests)
-3. run_fire_tests - Run all "should fire" tests in SERIAL sub-agents with resets between (6 tests)
-4. infinite_block_tests - Run infinite block tests in SERIAL (4 tests - both fire and not-fire)
-
-Reset procedure (see steps/reset.md):
-- Reset runs FIRST to ensure a clean environment before any tests
-- Each step also calls reset internally when needed (between tests, after completion)
-- Reset reverts git changes, removes created files, and clears the rules queue
-
-Test types covered:
-- Trigger/Safety mode
-- Set mode (bidirectional)
-- Pair mode (directional)
-- Command action
-- Multi safety
-- Infinite block (prompt and command) - in dedicated step
-- Created mode (new files only)
-
-
-## Required Inputs
-
-
-**Files from Previous Steps** - Read these first:
-- `not_fire_results` (from `run_not_fire_tests`)
-
-## Work Branch
-
-Use branch format: `deepwork/manual_tests-[instance]-YYYYMMDD`
-
-- If on a matching work branch: continue using it
-- If on main/master: create new branch with `git checkout -b deepwork/manual_tests-[instance]-$(date +%Y%m%d)`
-
-## Outputs
-
-**Required outputs**:
-- `fire_results`
-
-## Guardrails
-
-- Do NOT skip prerequisite verification if this step has dependencies
-- Do NOT produce partial outputs; complete all required outputs before finishing
-- Do NOT proceed without required inputs; ask the user if any are missing
-- Do NOT modify files outside the scope of this step's defined outputs
-
-## Quality Validation
-
-**Before completing this step, you MUST have your work reviewed against the quality criteria below.**
-
-Use a sub-agent (Haiku model) to review your work against these criteria:
-
-**Criteria (all must be satisfied)**:
-1. **Sub-Agents Used**: Did the main agent spawn a sub-agent (using the Task tool) for EACH test? The main agent must NOT edit the test files directly.
-2. **Sub-Agent Config**: Did all sub-agents use `model: "haiku"` and `max_turns: 5`?
-3. **Serial Execution**: Were sub-agents launched ONE AT A TIME (not in parallel) to prevent cross-contamination?
-4. **Hooks Fired Automatically**: Did the main agent observe the blocking hooks firing automatically when each sub-agent returned? The agent must NOT manually run the rules_check command.
-5. **Reset Between Tests**: Was the reset step called internally after each test to revert files and prevent cross-contamination?
-6. **Early Termination**: If 2 tests failed, did testing halt immediately with results reported?
-7. **Results Recorded**: Did the main agent track pass/fail status for each test case?
-**Review Process**:
-1. Once you believe your work is complete, spawn a sub-agent using Haiku to review your work against the quality criteria above
-2. The sub-agent should examine your outputs and verify each criterion is met
-3. If the sub-agent identifies valid issues, fix them
-4. Have the sub-agent review again until all valid feedback has been addressed
-5. Only mark the step complete when the sub-agent confirms all criteria are satisfied
-
-## On Completion
-
-1. Verify outputs are created
-2. Inform user: "run_all step 3/4 complete, outputs: fire_results"
-3. **Continue workflow**: Use Skill tool to invoke `/manual_tests.infinite_block_tests`
-
----
-
-**Reference files**: `.deepwork/jobs/manual_tests/job.yml`, `.deepwork/jobs/manual_tests/steps/run_fire_tests.md`
\ No newline at end of file
diff --git a/.claude/skills/manual_tests.run_not_fire_tests/SKILL.md b/.claude/skills/manual_tests.run_not_fire_tests/SKILL.md
deleted file mode 100644
index bdcbc58f..00000000
--- a/.claude/skills/manual_tests.run_not_fire_tests/SKILL.md
+++ /dev/null
@@ -1,238 +0,0 @@
----
-name: manual_tests.run_not_fire_tests
-description: "Runs all 6 'should NOT fire' tests in parallel sub-agents. Use to verify rules don't fire when safety conditions are met."
-user-invocable: false
-
----
-
-# manual_tests.run_not_fire_tests
-
-**Step 2/4** in **run_all** workflow
-
-> Run all manual tests: reset, NOT-fire tests, fire tests, and infinite block tests
-
-> Runs all manual hook/rule tests using sub-agents. Use when validating that DeepWork rules fire correctly.
-
-## Prerequisites (Verify First)
-
-Before proceeding, confirm these steps are complete:
-- `/manual_tests.reset`
-
-## Instructions
-
-**Goal**: Runs all 6 'should NOT fire' tests in parallel sub-agents. Use to verify rules don't fire when safety conditions are met.
-
-# Run Should-NOT-Fire Tests
-
-## Objective
-
-Run all "should NOT fire" tests in parallel sub-agents to verify that rules do not fire when their safety conditions are met.
-
-## CRITICAL: Sub-Agent Requirement
-
-**You MUST spawn sub-agents to make all file edits. DO NOT edit the test files yourself.**
-
-Why sub-agents are required:
-1. Sub-agents run in isolated contexts where file changes are detected
-2. When a sub-agent completes, the Stop hook **automatically** evaluates rules
-3. You (the main agent) observe whether hooks fired - you do NOT manually trigger them
-4. If you edit files directly, the hooks won't fire because you're not a completing sub-agent
-
-**NEVER manually run `echo '{}' | python -m deepwork.hooks.rules_check`** - this defeats the purpose of the test. Hooks must fire AUTOMATICALLY when sub-agents return.
-
-## Task
-
-Run all 6 "should NOT fire" tests in **parallel** sub-agents, then verify no blocking hooks fired.
-
-### Process
-
-1. **Launch parallel sub-agents for all "should NOT fire" tests**
-
-   Use the Task tool to spawn **ALL of the following sub-agents in a SINGLE message** (parallel execution).
-
-   **Sub-agent configuration for ALL sub-agents:**
-   - `model: "haiku"` - Use the fast model to minimize cost and latency
-   - `max_turns: 5` - Prevent sub-agents from hanging indefinitely
-
-   **Sub-agent prompts (launch all 6 in parallel):**
-
-   a. **Trigger/Safety test** - "Edit `manual_tests/test_trigger_safety_mode/feature.py` to add a comment, AND edit `manual_tests/test_trigger_safety_mode/feature_doc.md` to add a note. Both files must be edited so the rule does NOT fire."
-
-   b. **Set Mode test** - "Edit `manual_tests/test_set_mode/module_source.py` to add a comment, AND edit `manual_tests/test_set_mode/module_test.py` to add a test comment. Both files must be edited so the rule does NOT fire."
-
-   c. **Pair Mode (forward) test** - "Edit `manual_tests/test_pair_mode/handler_trigger.py` to add a comment, AND edit `manual_tests/test_pair_mode/handler_expected.md` to add a note. Both files must be edited so the rule does NOT fire."
-
-   d. **Pair Mode (reverse) test** - "Edit ONLY `manual_tests/test_pair_mode/handler_expected.md` to add a note. Only the expected file should be edited - this tests that the pair rule only fires in one direction."
-
-   e. **Multi Safety test** - "Edit `manual_tests/test_multi_safety/core.py` to add a comment, AND edit `manual_tests/test_multi_safety/core_safety_a.md` to add a note. Both files must be edited so the rule does NOT fire."
-
-   f. **Created Mode test** - "Modify the EXISTING file `manual_tests/test_created_mode/existing.yml` by adding a comment. Do NOT create a new file - only modify the existing one. The created mode rule should NOT fire for modifications."
-
-2. **Observe the results**
-
-   When each sub-agent returns:
-   - **If no blocking hook fired**: Preliminary pass - proceed to queue verification
-   - **If a blocking hook fired**: The test FAILED - investigate why the rule fired when it shouldn't have
-
-   **Remember**: You are OBSERVING whether hooks fired automatically. Do NOT run any verification commands manually during sub-agent execution.
-
-3. **Verify no queue entries** (CRITICAL for "should NOT fire" tests)
-
-   After ALL sub-agents have completed, verify the rules queue is empty:
-   ```bash
-   ls -la .deepwork/tmp/rules/queue/
-   cat .deepwork/tmp/rules/queue/*.json 2>/dev/null
-   ```
-
-   - **If queue is empty**: All tests PASSED - rules correctly did not fire
-   - **If queue has entries**: Tests FAILED - rules fired when they shouldn't have. Check which rule fired and investigate.
-
-   This verification is essential because some rules may fire without visible blocking but still create queue entries.
-
-4. **Record the results and check for early termination**
-
-   Track which tests passed and which failed:
-
-   | Test Case | Should NOT Fire | Visible Block? | Queue Entry? | Result |
-   |-----------|:---------------:|:--------------:|:------------:|:------:|
-   | Trigger/Safety | Edit both files | | | |
-   | Set Mode | Edit both files | | | |
-   | Pair Mode (forward) | Edit both files | | | |
-   | Pair Mode (reverse) | Edit expected only | | | |
-   | Multi Safety | Edit both files | | | |
-   | Created Mode | Modify existing | | | |
-
-   **Result criteria**: PASS only if NO visible block AND NO queue entry. FAIL if either occurred.
-
-   **EARLY TERMINATION**: If **2 tests have failed**, immediately:
-   1. Stop running any remaining tests
-   2. Reset (see step 5)
-   3. Report the results summary showing which tests passed/failed
-   4. Do NOT proceed to the next step - the job halts here
-
-5. **Reset** (MANDATORY - call the reset step internally)
-
-   **IMPORTANT**: This step is MANDATORY and must run regardless of whether tests passed or failed.
-
-   Follow the reset step instructions. Run these commands to clean up:
-   ```bash
-   git reset HEAD manual_tests/ && git checkout -- manual_tests/ && rm -f manual_tests/test_created_mode/new_config.yml
-   deepwork rules clear_queue
-   ```
-
-   See [reset.md](reset.md) for detailed explanation of these commands.
-
-## Quality Criteria
-
-- **Sub-agents spawned**: All 6 tests were run using the Task tool to spawn sub-agents - the main agent did NOT edit files directly
-- **Correct sub-agent config**: All sub-agents used `model: "haiku"` and `max_turns: 5`
-- **Parallel execution**: All 6 sub-agents were launched in a single message (parallel)
-- **Hooks observed (not triggered)**: The main agent observed hook behavior without manually running rules_check
-- **Queue verified empty**: After all sub-agents completed, the rules queue was checked and confirmed empty (no queue entries = rules did not fire)
-- **Early termination on 2 failures**: If 2 tests failed, testing halted immediately and results were reported
-- **Reset performed**: Reset step was followed after tests completed (regardless of pass/fail)
-- When all criteria are met, include `<promise>Quality Criteria Met</promise>` in your response
-
-## Reference
-
-See [test_reference.md](test_reference.md) for the complete test matrix and rule descriptions.
-
-## Context
-
-This step runs after the reset step (which ensures a clean environment) and tests that rules correctly do NOT fire when safety conditions are met. The "should fire" tests run after these complete. Infinite block tests are handled in a separate step.
-
-
-### Job Context
-
-A workflow for running manual tests that validate DeepWork rules/hooks fire correctly.
-
-The **run_all** workflow tests that rules fire when they should AND do not fire when they shouldn't.
-Each test is run in a SUB-AGENT (not the main agent) because:
-1. Sub-agents run in isolated contexts where file changes can be detected
-2. The Stop hook automatically evaluates rules when each sub-agent completes
-3. The main agent can observe whether hooks fired without triggering them manually
-
-CRITICAL: All tests MUST run in sub-agents. The main agent MUST NOT make the file
-edits itself - it spawns sub-agents to make edits, then observes whether the hooks
-fired automatically when those sub-agents returned.
-
-Sub-agent configuration:
-- All sub-agents should use `model: "haiku"` to minimize cost and latency
-- All sub-agents should use `max_turns: 5` to prevent hanging indefinitely
-
-Steps:
-1. reset - Ensure clean environment before testing (clears queue, reverts files)
-2. run_not_fire_tests - Run all "should NOT fire" tests in PARALLEL sub-agents (6 tests)
-3. run_fire_tests - Run all "should fire" tests in SERIAL sub-agents with resets between (6 tests)
-4. infinite_block_tests - Run infinite block tests in SERIAL (4 tests - both fire and not-fire)
-
-Reset procedure (see steps/reset.md):
-- Reset runs FIRST to ensure a clean environment before any tests
-- Each step also calls reset internally when needed (between tests, after completion)
-- Reset reverts git changes, removes created files, and clears the rules queue
-
-Test types covered:
-- Trigger/Safety mode
-- Set mode (bidirectional)
-- Pair mode (directional)
-- Command action
-- Multi safety
-- Infinite block (prompt and command) - in dedicated step
-- Created mode (new files only)
-
-
-## Required Inputs
-
-
-**Files from Previous Steps** - Read these first:
-- `clean_environment` (from `reset`)
-
-## Work Branch
-
-Use branch format: `deepwork/manual_tests-[instance]-YYYYMMDD`
-
-- If on a matching work branch: continue using it
-- If on main/master: create new branch with `git checkout -b deepwork/manual_tests-[instance]-$(date +%Y%m%d)`
-
-## Outputs
-
-**Required outputs**:
-- `not_fire_results`
-
-## Guardrails
-
-- Do NOT skip prerequisite verification if this step has dependencies
-- Do NOT produce partial outputs; complete all required outputs before finishing
-- Do NOT proceed without required inputs; ask the user if any are missing
-- Do NOT modify files outside the scope of this step's defined outputs
-
-## Quality Validation
-
-**Before completing this step, you MUST have your work reviewed against the quality criteria below.**
-
-Use a sub-agent (Haiku model) to review your work against these criteria:
-
-**Criteria (all must be satisfied)**:
-1. **Sub-Agents Used**: Did the main agent spawn sub-agents (using the Task tool) to make the file edits? The main agent must NOT edit the test files directly.
-2. **Sub-Agent Config**: Did all sub-agents use `model: "haiku"` and `max_turns: 5`?
-3. **Parallel Execution**: Were all 6 sub-agents launched in parallel (in a single message with multiple Task tool calls)?
-4. **Hooks Observed**: Did the main agent observe that no blocking hooks fired when the sub-agents returned? The hooks fire AUTOMATICALLY - the agent must NOT manually run the rules_check command.
-5. **Queue Verified Empty**: After all sub-agents completed, was the rules queue checked and confirmed empty (no entries = rules did not fire)?
-6. **Early Termination**: If 2 tests failed, did testing halt immediately with results reported?
-7. **Reset Performed**: Was the reset step called internally after tests completed (or after early termination)?
-**Review Process**:
-1. Once you believe your work is complete, spawn a sub-agent using Haiku to review your work against the quality criteria above
-2. The sub-agent should examine your outputs and verify each criterion is met
-3. If the sub-agent identifies valid issues, fix them
-4. Have the sub-agent review again until all valid feedback has been addressed
-5. Only mark the step complete when the sub-agent confirms all criteria are satisfied
-
-## On Completion
-
-1. Verify outputs are created
-2. Inform user: "run_all step 2/4 complete, outputs: not_fire_results"
-3. **Continue workflow**: Use Skill tool to invoke `/manual_tests.run_fire_tests`
-
----
-
-**Reference files**: `.deepwork/jobs/manual_tests/job.yml`, `.deepwork/jobs/manual_tests/steps/run_not_fire_tests.md`
\ No newline at end of file
diff --git a/.claude/skills/manual_tests/SKILL.md b/.claude/skills/manual_tests/SKILL.md
deleted file mode 100644
index 59b53adc..00000000
--- a/.claude/skills/manual_tests/SKILL.md
+++ /dev/null
@@ -1,102 +0,0 @@
----
-name: manual_tests
-description: "Runs all manual hook/rule tests using sub-agents. Use when validating that DeepWork rules fire correctly."
----
-
-# manual_tests
-
-Runs all manual hook/rule tests using sub-agents. Use when validating that DeepWork rules fire correctly.
-
-> **CRITICAL**: Always invoke steps using the Skill tool. Never copy/paste step instructions directly.
-
-A workflow for running manual tests that validate DeepWork rules/hooks fire correctly.
-
-The **run_all** workflow tests that rules fire when they should AND do not fire when they shouldn't.
-Each test is run in a SUB-AGENT (not the main agent) because:
-1. Sub-agents run in isolated contexts where file changes can be detected
-2. The Stop hook automatically evaluates rules when each sub-agent completes
-3. The main agent can observe whether hooks fired without triggering them manually
-
-CRITICAL: All tests MUST run in sub-agents. The main agent MUST NOT make the file
-edits itself - it spawns sub-agents to make edits, then observes whether the hooks
-fired automatically when those sub-agents returned.
-
-Sub-agent configuration:
-- All sub-agents should use `model: "haiku"` to minimize cost and latency
-- All sub-agents should use `max_turns: 5` to prevent hanging indefinitely
-
-Steps:
-1. reset - Ensure clean environment before testing (clears queue, reverts files)
-2. run_not_fire_tests - Run all "should NOT fire" tests in PARALLEL sub-agents (6 tests)
-3. run_fire_tests - Run all "should fire" tests in SERIAL sub-agents with resets between (6 tests)
-4. infinite_block_tests - Run infinite block tests in SERIAL (4 tests - both fire and not-fire)
-
-Reset procedure (see steps/reset.md):
-- Reset runs FIRST to ensure a clean environment before any tests
-- Each step also calls reset internally when needed (between tests, after completion)
-- Reset reverts git changes, removes created files, and clears the rules queue
-
-Test types covered:
-- Trigger/Safety mode
-- Set mode (bidirectional)
-- Pair mode (directional)
-- Command action
-- Multi safety
-- Infinite block (prompt and command) - in dedicated step
-- Created mode (new files only)
-
-
-## Workflows
-
-### run_all
-
-Run all manual tests: reset, NOT-fire tests, fire tests, and infinite block tests
-
-**Steps in order**:
-1. **reset** - Runs FIRST to ensure clean environment. Also called internally by other steps when they need to revert changes and clear the queue.
-2. **run_not_fire_tests** - Runs all 6 'should NOT fire' tests in parallel sub-agents. Use to verify rules don't fire when safety conditions are met.
-3. **run_fire_tests** - Runs all 6 'should fire' tests serially with resets between each. Use after NOT-fire tests to verify rules fire correctly.
-4. **infinite_block_tests** - Runs all 4 infinite block tests serially. Tests both 'should fire' (no promise) and 'should NOT fire' (with promise) scenarios.
-
-**Start workflow**: `/manual_tests.reset`
-
-
-## Execution Instructions
-
-### Step 1: Analyze Intent
-
-Parse any text following `/manual_tests` to determine user intent:
-- "run_all" or related terms → start run_all workflow at `manual_tests.reset`
-
-### Step 2: Invoke Starting Step
-
-Use the Skill tool to invoke the identified starting step:
-```
-Skill tool: manual_tests.reset
-```
-
-### Step 3: Continue Workflow Automatically
-
-After each step completes:
-1. Check if there's a next step in the workflow sequence
-2. Invoke the next step using the Skill tool
-3. Repeat until workflow is complete or user intervenes
-
-**Note**: Standalone skills do not auto-continue to other steps.
-
-### Handling Ambiguous Intent
-
-If user intent is unclear, use AskUserQuestion to clarify:
-- Present available workflows and standalone skills as options
-- Let user select the starting point
-
-## Guardrails
-
-- Do NOT copy/paste step instructions directly; always use the Skill tool to invoke steps
-- Do NOT skip steps in a workflow unless the user explicitly requests it
-- Do NOT proceed to the next step if the current step's outputs are incomplete
-- Do NOT make assumptions about user intent; ask for clarification when ambiguous
-
-## Context Files
-
-- Job definition: `.deepwork/jobs/manual_tests/job.yml`
\ No newline at end of file
diff --git a/.claude/skills/update.job/SKILL.md b/.claude/skills/update.job/SKILL.md
deleted file mode 100644
index 44e92fe9..00000000
--- a/.claude/skills/update.job/SKILL.md
+++ /dev/null
@@ -1,145 +0,0 @@
----
-name: update.job
-description: "Edits standard job source files in src/ and runs deepwork install to sync changes. Use when updating job.yml or step instructions."
-user-invocable: false
-
----
-
-# update.job
-
-**Standalone skill** - can be run anytime
-
-> Updates DeepWork standard jobs in src/ and syncs to installed locations. Use when modifying deepwork_jobs or deepwork_rules.
-
-
-## Instructions
-
-**Goal**: Edits standard job source files in src/ and runs deepwork install to sync changes. Use when updating job.yml or step instructions.
-
-# Update Standard Job
-
-## Objective
-
-Edit standard job source files in `src/deepwork/standard_jobs/` and sync changes to installed locations.
-
-## Task
-
-When modifying a standard job in the DeepWork repository, this step ensures changes are made in the correct location and properly propagated.
-
-### Important: Source of Truth
-
-Standard jobs exist in THREE locations, but only ONE is the source of truth:
-
-| Location | Purpose | Editable? |
-|----------|---------|-----------|
-| `src/deepwork/standard_jobs/[job]/` | **Source of truth** | **YES** |
-| `.deepwork/jobs/[job]/` | Installed copy | NO - overwritten by install |
-| `.claude/commands/[job].[step].md` | Generated commands | NO - regenerated by sync |
-
-**NEVER edit files in `.deepwork/jobs/` or `.claude/commands/` for standard jobs!**
-
-### Process
-
-#### 1. Identify the Standard Job to Update
-
-From conversation context, determine:
-- Which standard job needs updating (e.g., `deepwork_jobs`, `deepwork_rules`)
-- What changes are needed (job.yml, step instructions, hooks, etc.)
-
-Current standard jobs:
-```bash
-ls src/deepwork/standard_jobs/
-```
-
-#### 2. Make Changes in Source Location
-
-```
-src/deepwork/standard_jobs/[job_name]/
-├── job.yml              # Job definition
-├── steps/               # Step instruction files
-├── hooks/               # Hook scripts
-└── templates/           # Templates
-```
-
-#### 3. Run DeepWork Install
-
-```bash
-deepwork install --platform claude
-```
-
-For Gemini: `deepwork install --platform gemini`
-
-#### 4. Verify the Sync
-
-```bash
-# Verify job.yml
-diff src/deepwork/standard_jobs/[job_name]/job.yml .deepwork/jobs/[job_name]/job.yml
-
-# Verify step files
-diff -r src/deepwork/standard_jobs/[job_name]/steps/ .deepwork/jobs/[job_name]/steps/
-
-# Check commands regenerated
-ls -la .claude/commands/[job_name].*.md
-```
-
-## Quality Criteria
-
-- Changes made ONLY in `src/deepwork/standard_jobs/[job_name]/`
-- `deepwork install --platform claude` executed successfully
-- Files in `.deepwork/jobs/` match source
-- Command files regenerated
-- When all criteria are met, include `<promise>✓ Quality Criteria Met</promise>`
-
-
-### Job Context
-
-A workflow for maintaining standard jobs bundled with DeepWork. Standard jobs
-(like `deepwork_jobs` and `deepwork_rules`) are source-controlled in
-`src/deepwork/standard_jobs/` and must be edited there—never in `.deepwork/jobs/`
-or `.claude/commands/` directly.
-
-This job guides you through:
-1. Identifying which standard job(s) to update from conversation context
-2. Making changes in the correct source location (`src/deepwork/standard_jobs/[job_name]/`)
-3. Running `deepwork install` to propagate changes to `.deepwork/` and command directories
-4. Verifying the sync completed successfully
-
-Use this job whenever you need to modify job.yml files, step instructions, or hooks
-for any standard job in the DeepWork repository.
-
-
-## Required Inputs
-
-**User Parameters** - Gather from user before starting:
-- **job_context**: Determine from conversation context which standard job(s) to update and what changes are needed
-
-
-## Work Branch
-
-Use branch format: `deepwork/update-[instance]-YYYYMMDD`
-
-- If on a matching work branch: continue using it
-- If on main/master: create new branch with `git checkout -b deepwork/update-[instance]-$(date +%Y%m%d)`
-
-## Outputs
-
-**Required outputs**:
-- `files_synced`
-
-## Guardrails
-
-- Do NOT skip prerequisite verification if this step has dependencies
-- Do NOT produce partial outputs; complete all required outputs before finishing
-- Do NOT proceed without required inputs; ask the user if any are missing
-- Do NOT modify files outside the scope of this step's defined outputs
-
-## On Completion
-
-1. Verify outputs are created
-2. Inform user: "job complete, outputs: files_synced"
-
-This standalone skill can be re-run anytime.
-
----
-
-**Reference files**: `.deepwork/jobs/update/job.yml`, `.deepwork/jobs/update/steps/job.md`
\ No newline at end of file
diff --git a/.claude/skills/update/SKILL.md b/.claude/skills/update/SKILL.md
deleted file mode 100644
index e63bd476..00000000
--- a/.claude/skills/update/SKILL.md
+++ /dev/null
@@ -1,73 +0,0 @@
----
-name: update
-description: "Updates DeepWork standard jobs in src/ and syncs to installed locations. Use when modifying deepwork_jobs or deepwork_rules."
----
-
-# update
-
-Updates DeepWork standard jobs in src/ and syncs to installed locations. Use when modifying deepwork_jobs or deepwork_rules.
-
-> **CRITICAL**: Always invoke steps using the Skill tool. Never copy/paste step instructions directly.
-
-A workflow for maintaining standard jobs bundled with DeepWork. Standard jobs
-(like `deepwork_jobs` and `deepwork_rules`) are source-controlled in
-`src/deepwork/standard_jobs/` and must be edited there—never in `.deepwork/jobs/`
-or `.claude/commands/` directly.
-
-This job guides you through:
-1. Identifying which standard job(s) to update from conversation context
-2. Making changes in the correct source location (`src/deepwork/standard_jobs/[job_name]/`)
-3. Running `deepwork install` to propagate changes to `.deepwork/` and command directories
-4. Verifying the sync completed successfully
-
-Use this job whenever you need to modify job.yml files, step instructions, or hooks
-for any standard job in the DeepWork repository.
-
-
-## Standalone Skills
-
-These skills can be run independently at any time:
-
-- **job** - Edits standard job source files in src/ and runs deepwork install to sync changes. Use when updating job.yml or step instructions.
-  Command: `/update.job`
-
-
-## Execution Instructions
-
-### Step 1: Analyze Intent
-
-Parse any text following `/update` to determine user intent:
-- "job" or related terms → run standalone skill `update.job`
-
-### Step 2: Invoke Starting Step
-
-Use the Skill tool to invoke the identified starting step:
-```
-Skill tool: update.job
-```
-
-### Step 3: Continue Workflow Automatically
-
-After each step completes:
-1. Check if there's a next step in the workflow sequence
-2. Invoke the next step using the Skill tool
-3. Repeat until workflow is complete or user intervenes
-
-**Note**: Standalone skills do not auto-continue to other steps.
-
-### Handling Ambiguous Intent
-
-If user intent is unclear, use AskUserQuestion to clarify:
-- Present available steps as numbered options
-- Let user select the starting point
-
-## Guardrails
-
-- Do NOT copy/paste step instructions directly; always use the Skill tool to invoke steps
-- Do NOT skip steps in a workflow unless the user explicitly requests it
-- Do NOT proceed to the next step if the current step's outputs are incomplete
-- Do NOT make assumptions about user intent; ask for clarification when ambiguous
-
-## Context Files
-
-- Job definition: `.deepwork/jobs/update/job.yml`
\ No newline at end of file
diff --git a/.deepwork/common_info/.gitkeep b/.deepwork/common_info/.gitkeep
new file mode 100644
index 00000000..e75c47d6
--- /dev/null
+++ b/.deepwork/common_info/.gitkeep
@@ -0,0 +1,2 @@
+# This file ensures the .deepwork/common_info directory exists in version control.
+# Place shared reference files here that should be available across all jobs.
diff --git a/.deepwork/doc_specs/job_spec.md b/.deepwork/doc_specs/job_spec.md
index b880bb17..23fd9fc7 100644
--- a/.deepwork/doc_specs/job_spec.md
+++ b/.deepwork/doc_specs/job_spec.md
@@ -82,14 +82,6 @@ steps:
 
 ## Optional Fields
 
-### Exposed Steps
-
-```yaml
-steps:
-  - id: learn
-    exposed: true                 # Makes step available without running dependencies
-```
-
 ### Agent Delegation
 
 When a step should be executed by a specific agent type, use the `agent` field. This automatically sets `context: fork` in the generated skill.
@@ -122,17 +114,6 @@ steps:
         - script: hooks/run_tests.sh
 ```
 
-### Stop Hooks (Legacy)
-
-```yaml
-steps:
-  - id: step_id
-    stop_hooks:
-      - prompt: "Validation prompt..."
-      - prompt_file: hooks/check.md
-      - script: hooks/validate.sh
-```
-
 ## Validation Rules
 
 1. **No circular dependencies**: Step A cannot depend on Step B if Step B depends on Step A
diff --git a/.deepwork/jobs/add_platform/job.yml b/.deepwork/jobs/add_platform/job.yml
deleted file mode 100644
index 80b333dc..00000000
--- a/.deepwork/jobs/add_platform/job.yml
+++ /dev/null
@@ -1,152 +0,0 @@
-name: add_platform
-version: "0.4.0"
-summary: "Adds a new AI platform to DeepWork with adapter, templates, and tests. Use when integrating Cursor, Windsurf, or other AI coding tools."
-
-description: |
-  A workflow for adding support for a new AI platform (like Cursor, Windsurf, etc.) to DeepWork.
-
-  The **integrate** workflow guides you through four phases:
-  1. **Research**: Capture the platform's CLI configuration and hooks system documentation
-  2. **Add Capabilities**: Update the job schema and adapters with any new hook events
-  3. **Implement**: Create the platform adapter, templates, tests (100% coverage), and README updates
-  4. **Verify**: Ensure installation works correctly and produces expected files
-
-  The workflow ensures consistency across all supported platforms and maintains
-  comprehensive test coverage for new functionality.
-
-  **Important Notes**:
-  - Only hooks available on slash command definitions should be captured
-  - Each existing adapter must be updated when new hooks are added (typically with null values)
-  - Tests must achieve 100% coverage for any new functionality
-  - Installation verification confirms the platform integrates correctly with existing jobs
-
-workflows:
-  - name: integrate
-    summary: "Full workflow to integrate a new AI platform into DeepWork"
-    steps:
-      - research
-      - add_capabilities
-      - implement
-      - verify
-
-changelog:
-  - version: "0.4.0"
-    changes: "Added workflows section to explicitly define the integrate workflow sequence"
-  - version: "0.1.0"
-    changes: "Initial version"
-  - version: "0.2.0"
-    changes: "Added verification_checklist.md output to verify step for doc spec compliance"
-  - version: "0.3.0"
-    changes: "Improved skill descriptions with third-person voice and 'Use when...' triggers for better discoverability"
-
-steps:
-  - id: research
-    name: "Research Platform Documentation"
-    description: "Captures CLI configuration and hooks system documentation for the new platform. Use when starting platform integration."
-    instructions_file: steps/research.md
-    inputs:
-      - name: platform_name
-        description: "Clear identifier of the platform (e.g., 'cursor', 'windsurf-editor', 'github-copilot-chat')"
-    outputs:
-      - cli_configuration.md
-      - hooks_system.md
-    dependencies: []
-    hooks:
-      after_agent:
-        - prompt: |
-            Verify the research output meets ALL criteria:
-            1. Both files exist in doc/platforms/<platform>/: cli_configuration.md and hooks_system.md
-            2. Each file has a comment at the top with:
-               - Last updated date
-               - Source URL where the documentation was obtained
-            3. cli_configuration.md covers how the platform's CLI is configured
-            4. hooks_system.md covers hooks available for slash command definitions ONLY
-            5. No extraneous documentation (only these two specific topics)
-            6. Documentation is comprehensive enough to implement the platform
-
-            If ALL criteria are met, include `<promise>✓ Quality Criteria Met</promise>`.
-
-  - id: add_capabilities
-    name: "Add Hook Capabilities"
-    description: "Updates job schema and adapters with any new hook events the platform supports. Use after research to extend DeepWork's hook system."
-    instructions_file: steps/add_capabilities.md
-    inputs:
-      - file: hooks_system.md
-        from_step: research
-    outputs:
-      - job_schema.py
-      - adapters.py
-    dependencies:
-      - research
-    hooks:
-      after_agent:
-        - prompt: |
-            Verify the capability additions meet ALL criteria:
-            1. Any new hooks from the platform (for slash commands only) are added to src/deepwork/schemas/job_schema.py
-            2. All existing adapters in src/deepwork/adapters.py are updated with the new hook fields
-               (set to None/null if the platform doesn't support that hook)
-            3. Only hooks available on slash command definitions are added (not general CLI hooks)
-            4. job_schema.py remains valid Python with no syntax errors
-            5. adapters.py remains consistent - all adapters have the same hook fields
-            6. If no new hooks are needed, document why in a comment
-
-            If ALL criteria are met, include `<promise>✓ Quality Criteria Met</promise>`.
-
-  - id: implement
-    name: "Implement Platform Support"
-    description: "Creates platform adapter, templates, tests with 100% coverage, and README documentation. Use after adding hook capabilities."
-    instructions_file: steps/implement.md
-    inputs:
-      - file: job_schema.py
-        from_step: add_capabilities
-      - file: adapters.py
-        from_step: add_capabilities
-      - file: cli_configuration.md
-        from_step: research
-    outputs:
-      - templates/
-      - tests/
-      - README.md
-    dependencies:
-      - research
-      - add_capabilities
-    hooks:
-      after_agent:
-        - script: hooks/run_tests.sh
-        - prompt: |
-            Verify the implementation meets ALL criteria:
-            1. Platform adapter class is added to src/deepwork/adapters.py
-            2. Templates exist in src/deepwork/templates/<platform>/ with appropriate command structure
-            3. Tests exist for all new functionality
-            4. Test coverage is 100% for new code (run: uv run pytest --cov)
-            5. All tests pass
-            6. README.md is updated with:
-               - New platform listed in supported platforms
-               - Installation instructions for the platform
-               - Any platform-specific notes
-
-            If ALL criteria are met, include `<promise>✓ Quality Criteria Met</promise>`.
-
-  - id: verify
-    name: "Verify Installation"
-    description: "Sets up platform directories and verifies deepwork install works correctly. Use after implementation to confirm integration."
-    instructions_file: steps/verify.md
-    inputs:
-      - file: templates/
-        from_step: implement
-    outputs:
-      - verification_checklist.md
-    dependencies:
-      - implement
-    hooks:
-      after_agent:
-        - prompt: |
-            Verify the installation meets ALL criteria:
-            1. Platform-specific directories/files are added to the deepwork repo as needed
-            2. Running `deepwork install --platform <platform>` completes without errors
-            3. Expected command files are created in the platform's command directory
-            4. Command file content matches the templates and job definitions
-            5. Established DeepWork jobs (deepwork_jobs, deepwork_rules) are installed correctly
-            6. The platform can be used alongside existing platforms without conflicts
-
-            If ALL criteria are met, include `<promise>✓ Quality Criteria Met</promise>`.
diff --git a/.deepwork/jobs/add_platform/steps/add_capabilities.md b/.deepwork/jobs/add_platform/steps/add_capabilities.md
deleted file mode 100644
index 5389213a..00000000
--- a/.deepwork/jobs/add_platform/steps/add_capabilities.md
+++ /dev/null
@@ -1,135 +0,0 @@
-# Add Hook Capabilities
-
-## Objective
-
-Update the DeepWork job schema and platform adapters to support any new hook events that the new platform provides for slash command definitions.
-
-## Task
-
-Analyze the hooks documentation from the research step and update the codebase to support any new hook capabilities, ensuring consistency across all existing adapters.
-
-### Prerequisites
-
-Read the hooks documentation created in the previous step:
-- `doc/platforms/<platform_name>/hooks_system.md`
-
-Also review the existing schema and adapters:
-- `src/deepwork/schemas/job_schema.py`
-- `src/deepwork/adapters.py`
-
-### Process
-
-1. **Analyze the new platform's hooks**
-   - Read `doc/platforms/<platform_name>/hooks_system.md`
-   - List all hooks available for slash command definitions
-   - Compare with hooks already in `job_schema.py`
-   - Identify any NEW hooks not currently supported
-
-2. **Determine if schema changes are needed**
-   - If the platform has hooks that DeepWork doesn't currently support, add them
-   - If all hooks are already supported, document this finding
-   - Remember: Only add hooks that are available on slash command definitions
-
-3. **Update job_schema.py (if needed)**
-   - Add new hook fields to the step schema
-   - Follow existing patterns for hook definitions
-   - Add appropriate type hints and documentation
-   - Example addition:
-     ```python
-     # New hook from <platform>
-     new_hook_name: Optional[List[HookConfig]] = None
-     ```
-
-4. **Update all existing adapters**
-   - Open `src/deepwork/adapters.py`
-   - For EACH existing adapter class:
-     - Add the new hook field (set to `None` if not supported)
-     - This maintains consistency across all adapters
-   - Document why each adapter does or doesn't support the hook
-
-5. **Validate the changes**
-   - Run Python syntax check: `python -m py_compile src/deepwork/schemas/job_schema.py`
-   - Run Python syntax check: `python -m py_compile src/deepwork/adapters.py`
-   - Ensure no import errors
-
-6. **Document the decision**
-   - If no new hooks were added, add a comment explaining why
-   - If new hooks were added, ensure they're documented in the schema
-
-## Output Format
-
-### job_schema.py
-
-Location: `src/deepwork/schemas/job_schema.py`
-
-If new hooks are added:
-```python
-@dataclass
-class StepDefinition:
-    # ... existing fields ...
-
-    # New hook from <platform_name> - [description of what it does]
-    new_hook_name: Optional[List[HookConfig]] = None
-```
-
-### adapters.py
-
-Location: `src/deepwork/adapters.py`
-
-For each existing adapter, add the new hook field:
-```python
-class ExistingPlatformAdapter(PlatformAdapter):
-    # ... existing code ...
-
-    def get_hook_support(self) -> dict:
-        return {
-            # ... existing hooks ...
-            "new_hook_name": None,  # Not supported by this platform
-        }
-```
-
-Or if no changes are needed, add a documentation comment:
-```python
-# NOTE: <platform_name> hooks reviewed on YYYY-MM-DD
-# No new hooks to add - all <platform_name> command hooks are already
-# supported by the existing schema (stop_hooks covers their validation pattern)
-```
-
-## Quality Criteria
-
-- Hooks documentation from research step has been reviewed
-- If new hooks exist:
-  - Added to `src/deepwork/schemas/job_schema.py` with proper typing
-  - ALL existing adapters updated in `src/deepwork/adapters.py`
-  - Each adapter indicates support level (implemented, None, or partial)
-- If no new hooks needed:
-  - Decision documented with a comment explaining the analysis
-- Only hooks available on slash command definitions are considered
-- `job_schema.py` has no syntax errors (verified with py_compile)
-- `adapters.py` has no syntax errors (verified with py_compile)
-- All adapters have consistent hook fields (same fields across all adapters)
-- When all criteria are met, include `<promise>✓ Quality Criteria Met</promise>` in your response
-
-## Context
-
-DeepWork supports multiple AI platforms, and each platform may have different capabilities for hooks within command definitions. The schema defines what hooks CAN exist, while adapters define what each platform actually SUPPORTS.
-
-This separation allows:
-- Job definitions to use any hook (the schema is the superset)
-- Platform-specific generation to only use supported hooks (adapters filter)
-- Future platforms to add new hooks without breaking existing ones
-
-Maintaining consistency is critical - all adapters must have the same hook fields, even if they don't support them (use `None` for unsupported).
-
-## Common Hook Types
-
-For reference, here are common hook patterns across platforms:
-
-| Hook Type | Purpose | Example Platforms |
-|-----------|---------|-------------------|
-| `stop_hooks` | Quality validation loops | Claude Code |
-| `pre_hooks` | Run before command | Various |
-| `post_hooks` | Run after command | Various |
-| `validation_hooks` | Validate inputs/outputs | Various |
-
-When you find a new hook type, consider whether it maps to an existing pattern or is genuinely new functionality.
diff --git a/.deepwork/jobs/add_platform/steps/implement.md b/.deepwork/jobs/add_platform/steps/implement.md
deleted file mode 100644
index 55ff6ec7..00000000
--- a/.deepwork/jobs/add_platform/steps/implement.md
+++ /dev/null
@@ -1,226 +0,0 @@
-# Implement Platform Support
-
-## Objective
-
-Create the complete platform implementation including the adapter class, command templates, comprehensive tests, and documentation updates.
-
-## Task
-
-Build the full platform support by implementing the adapter, creating templates, writing tests with 100% coverage, and updating the README.
-
-### Prerequisites
-
-Read the outputs from previous steps:
-- `doc/platforms/<platform_name>/cli_configuration.md` - For template structure
-- `src/deepwork/schemas/job_schema.py` - For current schema
-- `src/deepwork/adapters.py` - For adapter patterns
-
-Also review existing implementations for reference:
-- `src/deepwork/templates/claude/` - Example templates
-- `tests/` - Existing test patterns
-
-### Process
-
-1. **Create the platform adapter class**
-
-   Add a new adapter class to `src/deepwork/adapters.py`:
-
-   ```python
-   class NewPlatformAdapter(PlatformAdapter):
-       """Adapter for <Platform Name>."""
-
-       platform_name = "<platform_name>"
-       command_directory = "<path to commands>"  # e.g., ".cursor/commands"
-       command_extension = ".md"  # or appropriate extension
-
-       def get_hook_support(self) -> dict:
-           """Return which hooks this platform supports."""
-           return {
-               "stop_hooks": True,  # or False/None
-               # ... other hooks
-           }
-
-       def generate_command(self, step: StepDefinition, job: JobDefinition) -> str:
-           """Generate command file content for this platform."""
-           # Use Jinja2 template
-           template = self.env.get_template(f"{self.platform_name}/command.md.j2")
-           return template.render(step=step, job=job)
-   ```
-
-2. **Create command templates**
-
-   Create templates in `src/deepwork/templates/<platform_name>/`:
-
-   - `command.md.j2` - Main command template
-   - Any other templates needed for the platform's format
-
-   Use the CLI configuration documentation to ensure the template matches the platform's expected format.
-
-3. **Register the adapter**
-
-   Update the adapter registry in `src/deepwork/adapters.py`:
-
-   ```python
-   PLATFORM_ADAPTERS = {
-       "claude": ClaudeAdapter,
-       "<platform_name>": NewPlatformAdapter,
-       # ... other adapters
-   }
-   ```
-
-4. **Write comprehensive tests**
-
-   Create tests in `tests/` that cover:
-
-   - Adapter instantiation
-   - Hook support detection
-   - Command generation
-   - Template rendering
-   - Edge cases (empty inputs, special characters, etc.)
-   - Integration with the sync command
-
-   **Critical**: Tests must achieve 100% coverage of new code.
-
-5. **Update README.md**
-
-   Add the new platform to `README.md`:
-
-   - Add to "Supported Platforms" list
-   - Add installation instructions:
-     ```bash
-     deepwork install --platform <platform_name>
-     ```
-   - Document any platform-specific notes or limitations
-
-6. **Run tests and verify coverage**
-
-   ```bash
-   uv run pytest --cov=src/deepwork --cov-report=term-missing
-   ```
-
-   - All tests must pass
-   - New code must have 100% coverage
-   - If coverage is below 100%, add more tests
-
-7. **Iterate until tests pass with full coverage**
-
-   This step has a `stop_hooks` script that runs tests. Keep iterating until:
-   - All tests pass
-   - Coverage is 100% for new functionality
-
-## Output Format
-
-### templates/
-
-Location: `src/deepwork/templates/<platform_name>/`
-
-Create the following files:
-
-**command.md.j2**:
-```jinja2
-{# Template for <platform_name> command files #}
-{# Follows the platform's expected format from cli_configuration.md #}
-
-[Platform-specific frontmatter or metadata]
-
-# {{ step.name }}
-
-{{ step.description }}
-
-## Instructions
-
-{{ step.instructions_content }}
-
-[... rest of template based on platform format ...]
-```
-
-### tests/
-
-Location: `tests/test_<platform_name>_adapter.py`
-
-```python
-"""Tests for the <platform_name> adapter."""
-import pytest
-from deepwork.adapters import NewPlatformAdapter
-
-class TestNewPlatformAdapter:
-    """Test suite for NewPlatformAdapter."""
-
-    def test_adapter_initialization(self):
-        """Test adapter can be instantiated."""
-        adapter = NewPlatformAdapter()
-        assert adapter.platform_name == "<platform_name>"
-
-    def test_hook_support(self):
-        """Test hook support detection."""
-        adapter = NewPlatformAdapter()
-        hooks = adapter.get_hook_support()
-        assert "stop_hooks" in hooks
-        # ... more assertions
-
-    def test_command_generation(self):
-        """Test command file generation."""
-        # ... test implementation
-
-    # ... more tests for 100% coverage
-```
-
-### README.md
-
-Add to the existing README.md:
-
-```markdown
-## Supported Platforms
-
-- **Claude Code** - Anthropic's CLI for Claude
-- **<Platform Name>** - [Brief description]
-
-## Installation
-
-### <Platform Name>
-
-```bash
-deepwork install --platform <platform_name>
-```
-
-[Any platform-specific notes]
-```
-
-## Quality Criteria
-
-- Platform adapter class added to `src/deepwork/adapters.py`:
-  - Inherits from `PlatformAdapter`
-  - Implements all required methods
-  - Registered in `PLATFORM_ADAPTERS`
-- Templates created in `src/deepwork/templates/<platform_name>/`:
-  - `command.md.j2` exists and renders correctly
-  - Format matches platform's expected command format
-- Tests created in `tests/`:
-  - Cover all new adapter functionality
-  - Cover template rendering
-  - All tests pass
-- Test coverage is 100% for new code:
-  - Run `uv run pytest --cov=src/deepwork --cov-report=term-missing`
-  - No uncovered lines in new code
-- README.md updated:
-  - Platform listed in supported platforms
-  - Installation command documented
-  - Any platform-specific notes included
-- When all criteria are met, include `<promise>✓ Quality Criteria Met</promise>` in your response
-
-## Context
-
-This is the core implementation step. The adapter you create will be responsible for:
-- Determining where command files are placed
-- Generating command file content from job definitions
-- Handling platform-specific features and hooks
-
-The templates use Jinja2 and should produce files that match exactly what the platform expects. Reference the CLI configuration documentation frequently to ensure compatibility.
-
-## Tips
-
-- Study the existing `ClaudeAdapter` as a reference implementation
-- Run tests frequently as you implement
-- Use `--cov-report=html` for a detailed coverage report
-- If a test is hard to write, the code might need refactoring
-- Template syntax errors often show up at runtime - test early
diff --git a/.deepwork/jobs/add_platform/steps/research.md b/.deepwork/jobs/add_platform/steps/research.md
deleted file mode 100644
index c4ee425e..00000000
--- a/.deepwork/jobs/add_platform/steps/research.md
+++ /dev/null
@@ -1,188 +0,0 @@
-# Research Platform Documentation
-
-## Objective
-
-Capture comprehensive documentation for the new AI platform's CLI configuration and hooks system, creating a local reference that will guide the implementation phases.
-
-## Task
-
-Research the target platform's official documentation and create two focused documentation files that will serve as the foundation for implementing platform support in DeepWork.
-
-### Process
-
-1. **Identify the platform's documentation sources**
-   - Find the official documentation website
-   - Locate the CLI/agent configuration documentation
-   - Find the hooks or customization system documentation
-   - Note: Focus ONLY on slash command/custom command hooks, not general CLI hooks
-
-2. **Gather CLI configuration documentation**
-   - How is the CLI configured? (config files, environment variables, etc.)
-   - Where are custom commands/skills stored?
-   - What is the command file format? (markdown, YAML, etc.)
-   - What metadata or frontmatter is supported?
-   - How does the platform discover and load commands?
-
-3. **Gather hooks system documentation**
-   - What hooks are available for custom command definitions?
-   - Focus on hooks that trigger during or after command execution
-   - Examples: `stop_hooks`, `pre_hooks`, `post_hooks`, validation hooks
-   - Document the syntax and available hook types
-   - **Important**: Only document hooks available on slash command definitions, not general CLI hooks
-
-4. **Create the documentation files**
-   - Place files in `doc/platforms/<platform_name>/`
-   - Each file must have a header comment with source and date
-   - Content should be comprehensive but focused
-
-## Output Format
-
-### cli_configuration.md
-
-Located at: `doc/platforms/<platform_name>/cli_configuration.md`
-
-**Structure**:
-```markdown
-<!--
-Last Updated: YYYY-MM-DD
-Source: [URL where this documentation was obtained]
--->
-
-# <Platform Name> CLI Configuration
-
-## Overview
-
-[Brief description of the platform and its CLI/agent system]
-
-## Configuration Files
-
-[Document where configuration lives and its format]
-
-### File Locations
-
-- [Location 1]: [Purpose]
-- [Location 2]: [Purpose]
-
-### Configuration Format
-
-[Show the configuration file format with examples]
-
-## Custom Commands/Skills
-
-[Document how custom commands are defined]
-
-### Command Location
-
-[Where command files are stored]
-
-### Command File Format
-
-[The format of command files - markdown, YAML, etc.]
-
-### Metadata/Frontmatter
-
-[What metadata fields are supported in command files]
-
-```[format]
-[Example of a minimal command file]
-```
-
-## Command Discovery
-
-[How the platform discovers and loads commands]
-
-## Platform-Specific Features
-
-[Any unique features relevant to command configuration]
-```
-
-### hooks_system.md
-
-Located at: `doc/platforms/<platform_name>/hooks_system.md`
-
-**Structure**:
-```markdown
-<!--
-Last Updated: YYYY-MM-DD
-Source: [URL where this documentation was obtained]
--->
-
-# <Platform Name> Hooks System (Command Definitions)
-
-## Overview
-
-[Brief description of hooks available for command definitions]
-
-**Important**: This document covers ONLY hooks available within slash command/skill definitions, not general CLI hooks.
-
-## Available Hooks
-
-### [Hook Name 1]
-
-**Purpose**: [What this hook does]
-
-**Syntax**:
-```yaml
-[hook_name]:
-  - [configuration]
-```
-
-**Example**:
-```yaml
-[Complete example of using this hook]
-```
-
-**Behavior**: [When and how this hook executes]
-
-### [Hook Name 2]
-
-[Repeat for each available hook]
-
-## Hook Execution Order
-
-[Document the order in which hooks execute, if multiple are supported]
-
-## Comparison with Other Platforms
-
-| Feature | <Platform> | Claude Code | Other |
-|---------|-----------|-------------|-------|
-| [Feature 1] | [Support] | [Support] | [Support] |
-
-## Limitations
-
-[Any limitations or caveats about the hooks system]
-```
-
-## Quality Criteria
-
-- Both files exist in `doc/platforms/<platform_name>/`
-- Each file has a header comment with:
-  - Last updated date (YYYY-MM-DD format)
-  - Source URL where documentation was obtained
-- `cli_configuration.md` comprehensively covers:
-  - Configuration file locations and format
-  - Custom command file format and location
-  - Command discovery mechanism
-- `hooks_system.md` comprehensively covers:
-  - All hooks available for slash command definitions
-  - Syntax and examples for each hook
-  - NOT general CLI hooks (only command-level hooks)
-- Documentation is detailed enough to implement the platform adapter
-- No extraneous topics (only CLI config and command hooks)
-- When all criteria are met, include `<promise>✓ Quality Criteria Met</promise>` in your response
-
-## Context
-
-This is the foundation step for adding a new platform to DeepWork. The documentation you capture here will be referenced throughout the implementation process:
-- CLI configuration informs how to generate command files
-- Hooks documentation determines what features the adapter needs to support
-- This documentation becomes a permanent reference in `doc/platforms/`
-
-Take time to be thorough - incomplete documentation will slow down subsequent steps.
-
-## Tips
-
-- Use the platform's official documentation as the primary source
-- If documentation is sparse, check GitHub repos, community guides, or changelog entries
-- When in doubt about whether something is a "command hook" vs "CLI hook", err on the side of inclusion and note the ambiguity
-- Include code examples from the official docs where available
diff --git a/.deepwork/jobs/add_platform/steps/verify.md b/.deepwork/jobs/add_platform/steps/verify.md
deleted file mode 100644
index f3afe15a..00000000
--- a/.deepwork/jobs/add_platform/steps/verify.md
+++ /dev/null
@@ -1,111 +0,0 @@
-# Verify Installation
-
-## Objective
-
-Ensure the new platform integration works correctly by setting up necessary directories and running the full installation process.
-
-## Task
-
-Perform end-to-end verification that the new platform can be installed and that DeepWork's standard jobs work correctly with it.
-
-### Prerequisites
-
-Ensure the implementation step is complete:
-- Adapter class exists in `src/deepwork/adapters.py`
-- Templates exist in `src/deepwork/templates/<platform_name>/`
-- Tests pass with 100% coverage
-- README.md is updated
-
-### Process
-
-1. **Set up platform directories in the DeepWork repo**
-
-   The DeepWork repository itself should have the platform's command directory structure for testing:
-
-   ```bash
-   mkdir -p <platform_command_directory>
-   ```
-
-   For example:
-   - Claude: `.claude/commands/`
-   - Cursor: `.cursor/commands/` (or wherever Cursor stores commands)
-
-2. **Run deepwork install for the new platform**
-
-   ```bash
-   deepwork install --platform <platform_name>
-   ```
-
-   Verify:
-   - Command completes without errors
-   - No Python exceptions or tracebacks
-   - Output indicates successful installation
-
-3. **Check that command files were created**
-
-   List the generated command files:
-   ```bash
-   ls -la <platform_command_directory>/
-   ```
-
-   Verify:
-   - `deepwork_jobs.define.md` exists (or equivalent for the platform)
-   - `deepwork_jobs.implement.md` exists
-   - `deepwork_jobs.refine.md` exists
-   - `deepwork_rules.define.md` exists
-   - All expected step commands exist
-
-4. **Validate command file content**
-
-   Read each generated command file and verify:
-   - Content matches the expected format for the platform
-   - Job metadata is correctly included
-   - Step instructions are properly rendered
-   - Any platform-specific features (hooks, frontmatter) are present
-
-5. **Test alongside existing platforms**
-
-   If other platforms are already installed, verify they still work:
-   ```bash
-   deepwork install --platform claude
-   ls -la .claude/commands/
-   ```
-
-   Ensure:
-   - New platform doesn't break existing installations
-   - Each platform's commands are independent
-   - No file conflicts or overwrites
-
-## Quality Criteria
-
-- Platform-specific directories are set up in the DeepWork repo
-- `deepwork install --platform <platform_name>` completes without errors
-- All expected command files are created:
-  - deepwork_jobs.define, implement, refine
-  - deepwork_rules.define
-  - Any other standard job commands
-- Command file content is correct:
-  - Matches platform's expected format
-  - Job/step information is properly rendered
-  - No template errors or missing content
-- Existing platforms still work (if applicable)
-- No conflicts between platforms
-- When all criteria are met, include `<promise>✓ Quality Criteria Met</promise>` in your response
-
-## Context
-
-This is the final validation step before the platform is considered complete. A thorough verification ensures:
-- The platform actually works, not just compiles
-- Standard DeepWork jobs install correctly
-- The platform integrates properly with the existing system
-- Users can confidently use the new platform
-
-Take time to verify each aspect - finding issues now is much better than having users discover them later.
-
-## Common Issues to Check
-
-- **Template syntax errors**: May only appear when rendering specific content
-- **Path issues**: Platform might expect different directory structure
-- **Encoding issues**: Special characters in templates or content
-- **Missing hooks**: Platform adapter might not handle all hook types
-- **Permission issues**: Directory creation might fail in some cases
diff --git a/.deepwork/jobs/commit/AGENTS.md b/.deepwork/jobs/commit/AGENTS.md
deleted file mode 100644
index 2edc0424..00000000
--- a/.deepwork/jobs/commit/AGENTS.md
+++ /dev/null
@@ -1,19 +0,0 @@
-# Project Context for commit
-
-## Job-Specific Context
-
-### commit
-
-#### review
-- Sub-agent approach: Use `general-purpose` subagent_type for code review (not `Bash`) since it needs to read and analyze code
-- Review criteria priorities: DRY opportunities, naming clarity, and test coverage are emphasized based on common code quality issues
-- Order matters: Review runs before tests so that any issues found can be fixed and verified by subsequent test run
-
-#### Design Decisions
-- Review step is first: Catching issues early reduces wasted test runs on code that will need changes
-- Sub-agent for review: Keeps main conversation context clean for subsequent steps
-- Fix in main agent: After sub-agent reports issues, fixes happen in main agent to maintain context about the session's changes
-
-## Last Updated
-- Date: 2026-01-21
-- From conversation about: Adding code review stage to commit job
diff --git a/.deepwork/jobs/commit/job.yml b/.deepwork/jobs/commit/job.yml
deleted file mode 100644
index ea0bf955..00000000
--- a/.deepwork/jobs/commit/job.yml
+++ /dev/null
@@ -1,98 +0,0 @@
-name: commit
-version: "1.5.0"
-summary: "Reviews code, runs tests, lints, and commits changes. Use when ready to commit work with quality checks."
-description: |
-  A workflow for preparing and committing code changes with quality checks.
-
-  The **full** workflow starts with a code review to catch issues early, runs tests until
-  they pass, formats and lints code with ruff, then reviews changed files
-  before committing and pushing. The review and lint steps use sub-agents
-  to reduce context usage.
-
-  Steps:
-  1. review - Code review for issues, DRY opportunities, naming, and test coverage (runs in sub-agent)
-  2. test - Pull latest code and run tests until they pass
-  3. lint - Format and lint code with ruff (runs in sub-agent)
-  4. commit_and_push - Review changes and commit/push
-
-workflows:
-  - name: full
-    summary: "Full commit workflow: review, test, lint, and commit"
-    steps:
-      - review
-      - test
-      - lint
-      - commit_and_push
-
-changelog:
-  - version: "1.5.0"
-    changes: "Added workflows section to explicitly define the full commit workflow sequence"
-  - version: "1.4.0"
-    changes: "Added changelog guidance: entries must go in [Unreleased] section, NEVER modify version numbers in pyproject.toml or CHANGELOG.md"
-  - version: "1.3.0"
-    changes: "Added code review step that runs in sub-agent to check for general issues, DRY opportunities, naming clarity, and test coverage"
-  - version: "1.0.1"
-    changes: "Changed file review from user confirmation to agent self-verification - agent now checks files match its own expectations instead of asking user every time"
-  - version: "1.0.0"
-    changes: "Initial job creation"
-  - version: "1.1.0"
-    changes: "Added nominal outputs to process-oriented steps for doc spec compliance (tests_passing, code_formatted, changes_committed)"
-  - version: "1.2.0"
-    changes: "Improved skill descriptions with third-person voice and 'Use when...' triggers for better discoverability"
-
-steps:
-  - id: review
-    name: "Code Review"
-    description: "Reviews changed code for issues, DRY opportunities, naming clarity, and test coverage using a sub-agent. Use as the first step before testing."
-    instructions_file: steps/review.md
-    inputs: []
-    outputs:
-      - code_reviewed  # implicit state: code has been reviewed and issues addressed
-    dependencies: []
-    quality_criteria:
-      - "Changed files were identified"
-      - "Sub-agent reviewed the code for general issues, DRY opportunities, naming clarity, and test coverage"
-      - "All identified issues were addressed or documented as intentional"
-
-  - id: test
-    name: "Run Tests"
-    description: "Pulls latest code and runs tests until all pass. Use after code review passes to verify changes work correctly."
-    instructions_file: steps/test.md
-    inputs: []
-    outputs:
-      - tests_passing  # implicit state: all tests pass
-    dependencies:
-      - review
-    quality_criteria:
-      - "Latest code was pulled from the branch"
-      - "All tests are passing"
-
-  - id: lint
-    name: "Lint Code"
-    description: "Formats and lints code with ruff using a sub-agent. Use after tests pass to ensure code style compliance."
-    instructions_file: steps/lint.md
-    inputs: []
-    outputs:
-      - code_formatted  # implicit state: code formatted and linted
-    dependencies:
-      - test
-    quality_criteria:
-      - "ruff format was run successfully"
-      - "ruff check was run with --fix flag"
-      - "No remaining lint errors"
-
-  - id: commit_and_push
-    name: "Commit and Push"
-    description: "Verifies changed files, creates commit, and pushes to remote. Use after linting passes to finalize changes."
-    instructions_file: steps/commit_and_push.md
-    inputs: []
-    outputs:
-      - changes_committed  # implicit state: changes committed and pushed
-    dependencies:
-      - lint
-    quality_criteria:
-      - "Changed files were verified against expectations"
-      - "CHANGELOG.md was updated with entries in [Unreleased] section (if changes warrant documentation)"
-      - "Version numbers were NOT modified (pyproject.toml version and CHANGELOG version headers unchanged)"
-      - "Commit was created with appropriate message"
-      - "Changes were pushed to remote"
diff --git a/.deepwork/jobs/commit/steps/commit_and_push.md b/.deepwork/jobs/commit/steps/commit_and_push.md
deleted file mode 100644
index cb9e8891..00000000
--- a/.deepwork/jobs/commit/steps/commit_and_push.md
+++ /dev/null
@@ -1,89 +0,0 @@
-# Commit and Push
-
-## Objective
-
-Review the changed files to verify they match the agent's expectations, create a commit with an appropriate message, and push to the remote repository.
-
-## Task
-
-Check the list of changed files against what was modified during this session, ensure they match expectations, then commit and push the changes.
-
-### Process
-
-1. **Get the list of changed files**
-   ```bash
-   git status
-   ```
-   Also run `git diff --stat` to see a summary of changes.
-
-2. **Verify changes match expectations**
-
-   Compare the changed files against what you modified during this session:
-   - Do the modified files match what you edited?
-   - Are there any unexpected new files?
-   - Are there any unexpected deleted files?
-   - Do the line counts seem reasonable for the changes you made?
-
-   If changes match expectations, proceed to the next step.
-
-   If there are unexpected changes:
-   - Investigate why (e.g., lint auto-fixes, generated files)
-   - If they're legitimate side effects of your work, include them
-   - If they're unrelated or shouldn't be committed, use `git restore` to discard them
-
-3. **Update CHANGELOG.md if needed**
-
-   If your changes include new features, bug fixes, or other notable changes:
-   - Add entries to the `## [Unreleased]` section of CHANGELOG.md
-   - Use the appropriate subsection: `### Added`, `### Changed`, `### Fixed`, or `### Removed`
-   - Write concise descriptions that explain the user-facing impact
-
-   **CRITICAL: NEVER modify version numbers**
-   - Do NOT change the version in `pyproject.toml`
-   - Do NOT change version headers in CHANGELOG.md (e.g., `## [0.4.2]`)
-   - Do NOT rename the `## [Unreleased]` section
-   - Version updates are handled by the release workflow, not commits
-
-4. **Stage all appropriate changes**
-   ```bash
-   git add -A
-   ```
-   Or stage specific files if some were excluded.
-
-5. **View recent commit messages for style reference**
-   ```bash
-   git log --oneline -10
-   ```
-
-6. **Create the commit**
-
-   Generate an appropriate commit message based on:
-   - The changes made
-   - The style of recent commits
-   - Conventional commit format if the project uses it
-
-   **IMPORTANT:** Use the commit job script (not `git commit` directly):
-   ```bash
-   .claude/hooks/commit_job_git_commit.sh -m "commit message here"
-   ```
-
-7. **Push to remote**
-   ```bash
-   git push
-   ```
-   If the branch has no upstream, use:
-   ```bash
-   git push -u origin HEAD
-   ```
-
-## Quality Criteria
-
-- Changed files were verified against expectations
-- CHANGELOG.md was updated with entries in [Unreleased] section (if changes warrant documentation)
-- Version numbers were NOT modified (pyproject.toml version and CHANGELOG version headers unchanged)
-- Commit was created with appropriate message
-- Changes were pushed to remote
-
-## Context
-
-This is the final step of the commit workflow. The agent verifies that the changed files match its own expectations from the work done during the session, then commits and pushes. This catches unexpected changes while avoiding unnecessary user interruptions.
diff --git a/.deepwork/jobs/commit/steps/lint.md b/.deepwork/jobs/commit/steps/lint.md
deleted file mode 100644
index 4485549d..00000000
--- a/.deepwork/jobs/commit/steps/lint.md
+++ /dev/null
@@ -1,70 +0,0 @@
-# Lint Code
-
-## Objective
-
-Format and lint the codebase using ruff to ensure code quality and consistency.
-
-## Task
-
-Run ruff format and ruff check to format and lint the code. This step should be executed using a sub-agent to conserve context in the main conversation.
-
-### Process
-
-**IMPORTANT**: Use the Task tool to spawn a sub-agent for this work. This saves context in the main conversation. Use the `haiku` model for speed.
-
-1. **Spawn a sub-agent to run linting**
-
-   Use the Task tool with these parameters:
-   - `subagent_type`: "Bash"
-   - `model`: "haiku"
-   - `prompt`: See below
-
-   The sub-agent should:
-
-   a. **Run ruff format**
-      ```bash
-      ruff format .
-      ```
-      This formats the code according to ruff's style rules.
-
-   b. **Run ruff check with auto-fix**
-      ```bash
-      ruff check --fix .
-      ```
-      This checks for lint errors and automatically fixes what it can.
-
-   c. **Run ruff check again to verify**
-      ```bash
-      ruff check .
-      ```
-      Capture the final output to verify no remaining issues.
-
-2. **Review sub-agent results**
-   - Check that both format and check completed successfully
-   - Note any remaining lint issues that couldn't be auto-fixed
-
-3. **Handle remaining issues**
-   - If there are lint errors that couldn't be auto-fixed, fix them manually
-   - Re-run ruff check to verify
-
-## Example Sub-Agent Prompt
-
-```
-Run ruff to format and lint the codebase:
-
-1. Run: ruff format .
-2. Run: ruff check --fix .
-3. Run: ruff check . (to verify no remaining issues)
-
-Report the results of each command.
-```
-
-## Quality Criteria
-
-- ruff format was run successfully
-- ruff check was run with --fix flag
-- No remaining lint errors
-
-## Context
-
-This step ensures code quality and consistency before committing. It runs after tests pass and before the commit step. Using a sub-agent keeps the main conversation context clean for the commit review.
diff --git a/.deepwork/jobs/commit/steps/review.md b/.deepwork/jobs/commit/steps/review.md
deleted file mode 100644
index bfe8cc0c..00000000
--- a/.deepwork/jobs/commit/steps/review.md
+++ /dev/null
@@ -1,55 +0,0 @@
-# Code Review
-
-## Objective
-
-Review changed code for quality issues before running tests. This catches problems early and ensures code meets quality standards.
-
-## Task
-
-Use a sub-agent to review the staged/changed code and identify issues that should be fixed before committing.
-
-### Process
-
-**IMPORTANT**: Use the Task tool to spawn a sub-agent for this review. This saves context in the main conversation.
-
-1. **Get the list of changed files**
-   ```bash
-   git diff --name-only HEAD
-   git diff --name-only --staged
-   ```
-   Combine these to get all files that have been modified.
-
-2. **Spawn a sub-agent to review the code**
-
-   Use the Task tool with these parameters:
-   - `subagent_type`: "general-purpose"
-   - `prompt`: Instruct the sub-agent to:
-     - Read the code review standards from `doc/code_review_standards.md`
-     - Read each of the changed files
-     - Review each file against the standards
-     - Report issues found with file, line number, severity, and suggested fix
-
-3. **Review sub-agent findings**
-   - Examine each issue identified
-   - Prioritize issues by severity
-
-4. **Fix identified issues**
-   - Address each issue found by the review
-   - For DRY violations: extract shared code into functions/modules
-   - For naming issues: rename to be clearer
-   - For missing tests: add appropriate test cases
-   - For bugs: fix the underlying issue
-
-5. **Re-run review if significant changes made**
-   - If you made substantial changes, consider running another review pass
-   - Ensure fixes didn't introduce new issues
-
-## Quality Criteria
-
-- Changed files were identified
-- Sub-agent read the code review standards and reviewed all changed files
-- All identified issues were addressed or documented as intentional
-
-## Context
-
-This is the first step of the commit workflow. Code review happens before tests to catch quality issues early. The sub-agent approach keeps the main conversation context clean while providing thorough review coverage.
diff --git a/.deepwork/jobs/commit/steps/test.md b/.deepwork/jobs/commit/steps/test.md
deleted file mode 100644
index 29c2b920..00000000
--- a/.deepwork/jobs/commit/steps/test.md
+++ /dev/null
@@ -1,51 +0,0 @@
-# Run Tests
-
-## Objective
-
-Run the project's test suite and fix any failing tests until all tests pass.
-
-## Task
-
-Execute the test suite for the project and iteratively fix any failures until all tests pass.
-
-### Process
-
-1. **Pull latest code from the branch**
-   - Run `git pull` to fetch and merge any changes from the remote
-   - If there are merge conflicts, resolve them before proceeding
-   - This ensures you're testing against the latest code
-
-2. **Detect or use the test command**
-   - If a test command was provided, use that
-   - Otherwise, auto-detect the project type and determine the appropriate test command:
-     - Python: `pytest`, `python -m pytest`, `uv run pytest`
-     - Node.js: `npm test`, `yarn test`, `bun test`
-     - Go: `go test ./...`
-     - Rust: `cargo test`
-     - Check `package.json`, `pyproject.toml`, `Cargo.toml`, `go.mod` for hints
-
-3. **Run the tests**
-   - Execute the test command
-   - Capture the output
-
-4. **Analyze failures**
-   - If tests pass, proceed to output
-   - If tests fail, analyze the failure messages
-   - Identify the root cause of each failure
-
-5. **Fix failing tests**
-   - Make the necessary code changes to fix failures
-   - This may involve fixing bugs in implementation code or updating tests
-   - Re-run tests after each fix
-
-6. **Iterate until passing**
-   - Continue the fix/test cycle until all tests pass
-
-## Quality Criteria
-
-- Latest code was pulled from the branch
-- All tests are passing
-
-## Context
-
-This step runs after code review. Tests must pass before proceeding to lint and commit. This ensures code quality and prevents broken code from being committed. If tests fail due to issues introduced by the code review fixes, iterate on the fixes until tests pass.
diff --git a/.deepwork/jobs/deepwork_jobs/AGENTS.md b/.deepwork/jobs/deepwork_jobs/AGENTS.md
index 6d97d0e5..576c1185 100644
--- a/.deepwork/jobs/deepwork_jobs/AGENTS.md
+++ b/.deepwork/jobs/deepwork_jobs/AGENTS.md
@@ -18,9 +18,9 @@ This is the source of truth for the `deepwork_jobs` standard job.
 
 2. **Working copy**: `.deepwork/jobs/deepwork_jobs/`
    - Must be updated after changes to source
-   - Used by `deepwork sync` to generate commands
+   - Used by the MCP server at runtime
 
-After making changes to the source, copy files to the working copy:
+After making changes to the source, run `deepwork install` or manually copy:
 ```bash
 cp src/deepwork/standard_jobs/deepwork_jobs/job.yml .deepwork/jobs/deepwork_jobs/
 cp src/deepwork/standard_jobs/deepwork_jobs/steps/*.md .deepwork/jobs/deepwork_jobs/steps/
@@ -37,6 +37,8 @@ deepwork_jobs/
 ├── steps/
 │   ├── define.md          # Define step instructions
 │   ├── implement.md       # Implement step instructions
+│   ├── test.md            # Test step instructions
+│   ├── iterate.md         # Iterate step instructions
 │   ├── learn.md           # Learn step instructions
 │   └── supplemental_file_references.md  # Reference documentation
 └── templates/
@@ -47,6 +49,33 @@ deepwork_jobs/
     └── step_instruction.md.example   # Complete step example
 ```
 
+## Quality Review Learnings
+
+These learnings come from running the `new_job` workflow to create the `github_outreach` job (2026-02-06).
+
+### Review Criteria Must Be Pragmatic
+
+The implement step's review criteria caused 6+ review iterations during the github_outreach job creation. Key problems and fixes:
+
+1. **"Ask Structured Questions" was applied to ALL steps** — even pure analysis/generation steps with no user input. Fixed in v1.4.0: criterion now auto-passes for steps that only have file inputs from prior steps (no name/description user inputs).
+
+2. **"Output Examples" was too strict** — demanded concrete filled-in examples in every step file, even when a template structure with `[bracket placeholders]` was sufficient. Fixed in v1.4.0: renamed to "Output Format Examples" and accepts templates. Concrete examples are encouraged but not required.
+
+3. **Contradictory review results** — In one case, all 6 individual criteria passed but the overall review still returned `needs_work`. This appears to be a reviewer model issue where the summary contradicts the per-criterion assessments. Added `additional_review_guidance` to clarify when criteria should auto-pass.
+
+### Quality Review Timeouts on Large Outputs
+
+Steps producing many files (25 analysis files) or very long files (700+ line playbook) exceeded the 120-second MCP timeout during quality review. The `quality_review_override_reason` parameter was needed to bypass these.
+
+Mitigation strategies documented in `define.md`:
+- Use `run_each: step` instead of `run_each: <files_output>` for steps with many files
+- Keep review criteria efficient to evaluate
+- Note expected output volume in step descriptions
+
+### Dependency Validation Gaps
+
+The github_outreach `final_report` step had `analyze_repos` as a file input but was missing it from the `dependencies` list. This was caught at workflow start time but could have been caught earlier during the `implement` step. The define step's validation rules already mention this (`from_step must be in dependencies`) but it was missed during creation.
+
 ## Version Management
 
 - Version is tracked in `job.yml`
@@ -56,5 +85,5 @@ deepwork_jobs/
 
 ## Last Updated
 
-- Date: 2026-01-15
-- From conversation about: Adding make_new_job.sh script and templates directory
+- Date: 2026-02-06
+- From conversation about: Learn workflow analyzing severe quality review issues in the new_job execution
diff --git a/.deepwork/jobs/deepwork_jobs/doc_specs/job_spec.md b/.deepwork/jobs/deepwork_jobs/doc_specs/job_spec.md
deleted file mode 100644
index b880bb17..00000000
--- a/.deepwork/jobs/deepwork_jobs/doc_specs/job_spec.md
+++ /dev/null
@@ -1,203 +0,0 @@
----
-name: "DeepWork Job Specification"
-description: "YAML specification file that defines a multi-step workflow job for AI agents"
-path_patterns:
-  - ".deepwork/jobs/*/job.yml"
-target_audience: "AI agents executing jobs and developers defining workflows"
-frequency: "Created once per job, updated as workflow evolves"
-quality_criteria:
-  - name: Valid Identifier
-    description: "Job name must be lowercase with underscores, no spaces or special characters (e.g., `competitive_research`, `monthly_report`)"
-  - name: Semantic Version
-    description: "Version must follow semantic versioning format X.Y.Z (e.g., `1.0.0`, `2.1.3`)"
-  - name: Concise Summary
-    description: "Summary must be under 200 characters and clearly describe what the job accomplishes"
-  - name: Rich Description
-    description: "Description must be multi-line and explain: the problem solved, the process, expected outcomes, and target users"
-  - name: Changelog Present
-    description: "Must include a changelog array with at least the initial version entry. Changelog should only include one entry per branch at most"
-  - name: Complete Steps
-    description: "Each step must have: id (lowercase_underscores), name, description, instructions_file, outputs (at least one), and dependencies array"
-  - name: Valid Dependencies
-    description: "Dependencies must reference existing step IDs with no circular references"
-  - name: Input Consistency
-    description: "File inputs with `from_step` must reference a step that is in the dependencies array"
-  - name: Output Paths
-    description: "Outputs must be valid filenames or paths within the main repo directory structure, never in dot-directories like `.deepwork/`. Use specific, descriptive paths that lend themselves to glob patterns (e.g., `competitive_research/acme_corp/swot.md` or `operations/reports/2026-01/spending_analysis.md`). Parameterized paths like `[competitor_name]/` are encouraged for per-entity outputs. Avoid generic names (`output.md`, `analysis.md`) and transient-sounding paths (`temp/`, `draft.md`). Supporting materials for a final output should go in a peer `_dataroom` folder (e.g., `spending_analysis_dataroom/`)."
-  - name: Concise Instructions
-    description: "The content of the file, particularly the description, must not have excessively redundant information. It should be concise and to the point given that extra tokens will confuse the AI."
----
-
-# DeepWork Job Specification: [job_name]
-
-A `job.yml` file defines a complete multi-step workflow that AI agents can execute. Each job breaks down a complex task into reviewable steps with clear inputs and outputs.
-
-## Required Fields
-
-### Top-Level Metadata
-
-```yaml
-name: job_name                    # lowercase, underscores only
-version: "1.0.0"                  # semantic versioning
-summary: "Brief description"      # max 200 characters
-description: |                    # detailed multi-line explanation
-  [Explain what this workflow does, why it exists,
-  what outputs it produces, and who should use it]
-```
-
-### Changelog
-
-```yaml
-changelog:
-  - version: "1.0.0"
-    changes: "Initial job creation"
-  - version: "1.1.0"
-    changes: "Added quality validation hooks"
-```
-
-### Steps Array
-
-```yaml
-steps:
-  - id: step_id                   # unique, lowercase_underscores
-    name: "Human Readable Name"
-    description: "What this step accomplishes"
-    instructions_file: steps/step_id.md
-    inputs:
-      # User-provided inputs:
-      - name: param_name
-        description: "What the user provides"
-      # File inputs from previous steps:
-      - file: output.md
-        from_step: previous_step_id
-    outputs:
-      - competitive_research/competitors_list.md           # descriptive path
-      - competitive_research/[competitor_name]/research.md # parameterized path
-      # With doc spec reference:
-      - file: competitive_research/final_report.md
-        doc_spec: .deepwork/doc_specs/report_type.md
-    dependencies:
-      - previous_step_id          # steps that must complete first
-```
-
-## Optional Fields
-
-### Exposed Steps
-
-```yaml
-steps:
-  - id: learn
-    exposed: true                 # Makes step available without running dependencies
-```
-
-### Agent Delegation
-
-When a step should be executed by a specific agent type, use the `agent` field. This automatically sets `context: fork` in the generated skill.
-
-```yaml
-steps:
-  - id: research_step
-    agent: general-purpose        # Delegates to the general-purpose agent
-```
-
-Available agent types:
-- `general-purpose` - Standard agent for multi-step tasks
-
-### Quality Hooks
-
-```yaml
-steps:
-  - id: step_id
-    hooks:
-      after_agent:
-        # Inline prompt for quality validation:
-        - prompt: |
-            Verify the output meets criteria:
-            1. [Criterion 1]
-            2. [Criterion 2]
-            If ALL criteria are met, include `<promise>...</promise>`.
-        # External prompt file:
-        - prompt_file: hooks/quality_check.md
-        # Script for programmatic validation:
-        - script: hooks/run_tests.sh
-```
-
-### Stop Hooks (Legacy)
-
-```yaml
-steps:
-  - id: step_id
-    stop_hooks:
-      - prompt: "Validation prompt..."
-      - prompt_file: hooks/check.md
-      - script: hooks/validate.sh
-```
-
-## Validation Rules
-
-1. **No circular dependencies**: Step A cannot depend on Step B if Step B depends on Step A
-2. **File inputs require dependencies**: If a step uses `from_step: X`, then X must be in its dependencies
-3. **Unique step IDs**: No two steps can have the same id
-4. **Valid file paths**: Output paths must not contain invalid characters and should be in the main repo (not dot-directories)
-5. **Instructions files exist**: Each `instructions_file` path should have a corresponding file created
-
-## Example: Complete Job Specification
-
-```yaml
-name: competitive_research
-version: "1.0.0"
-summary: "Systematic competitive analysis workflow"
-description: |
-  A comprehensive workflow for analyzing competitors in your market segment.
-  Helps product teams understand the competitive landscape through systematic
-  identification, research, comparison, and positioning recommendations.
-
-  Produces:
-  - Vetted competitor list
-  - Research notes per competitor
-  - Comparison matrix
-  - Strategic positioning report
-
-changelog:
-  - version: "1.0.0"
-    changes: "Initial job creation"
-
-steps:
-  - id: identify_competitors
-    name: "Identify Competitors"
-    description: "Identify 5-7 key competitors in the target market"
-    instructions_file: steps/identify_competitors.md
-    inputs:
-      - name: market_segment
-        description: "The market segment to analyze"
-      - name: product_category
-        description: "The product category"
-    outputs:
-      - competitive_research/competitors_list.md
-    dependencies: []
-
-  - id: research_competitors
-    name: "Research Competitors"
-    description: "Deep dive research on each identified competitor"
-    instructions_file: steps/research_competitors.md
-    inputs:
-      - file: competitive_research/competitors_list.md
-        from_step: identify_competitors
-    outputs:
-      - competitive_research/[competitor_name]/research.md
-    dependencies:
-      - identify_competitors
-
-  - id: positioning_report
-    name: "Positioning Report"
-    description: "Strategic positioning recommendations"
-    instructions_file: steps/positioning_report.md
-    inputs:
-      - file: competitive_research/[competitor_name]/research.md
-        from_step: research_competitors
-    outputs:
-      - file: competitive_research/positioning_report.md
-        doc_spec: .deepwork/doc_specs/positioning_report.md
-    dependencies:
-      - research_competitors
-```
diff --git a/.deepwork/jobs/deepwork_jobs/job.yml b/.deepwork/jobs/deepwork_jobs/job.yml
index 4343cbda..57baf9f1 100644
--- a/.deepwork/jobs/deepwork_jobs/job.yml
+++ b/.deepwork/jobs/deepwork_jobs/job.yml
@@ -1,13 +1,16 @@
+# yaml-language-server: $schema=.deepwork/schemas/job.schema.json
 name: deepwork_jobs
-version: "1.0.0"
-summary: "Creates and manages multi-step AI workflows. Use when defining, implementing, or improving DeepWork jobs."
+version: "1.4.0"
+summary: "Creates and manages multi-step AI workflows. Use when defining, implementing, testing, or improving DeepWork jobs."
 description: |
   Core commands for managing DeepWork jobs. These commands help you define new multi-step
-  workflows and learn from running them.
+  workflows, test them on real use cases, and learn from running them.
 
-  The `new_job` workflow guides you through defining and implementing a new job by
-  asking structured questions about your workflow, understanding each step's inputs and outputs,
-  reviewing the specification, and generating all necessary files.
+  The `new_job` workflow guides you through the full lifecycle of creating a new job:
+  1. **Define**: Gather requirements through structured questions and create job.yml
+  2. **Implement**: Generate step instruction files and sync slash commands
+  3. **Test**: Run the workflow on a real use case, critique output, and iterate with user
+  4. **Iterate**: Review what happened and improve the job definition based on learnings
 
   The `learn` skill reflects on conversations where DeepWork jobs were run, identifies
   confusion or inefficiencies, and improves job instructions. It also captures bespoke
@@ -15,13 +18,38 @@ description: |
 
 workflows:
   - name: new_job
-    summary: "Create a new DeepWork job from scratch through definition, review, and implementation"
+    summary: "Create a new DeepWork job from scratch through definition, implementation, testing, and iteration"
     steps:
       - define
-      - review_job_spec
       - implement
+      - test
+      - iterate
+
+  - name: repair
+    summary: "Clean up and migrate DeepWork configurations from prior versions"
+    steps:
+      - fix_settings
+      - fix_jobs
+      - errata
+
+  - name: learn
+    summary: "Analyze conversation history to improve job instructions and capture learnings"
+    steps:
+      - learn
 
 changelog:
+  - version: "1.4.0"
+    changes: "Fixed implement step review criteria that caused severe friction: 'Ask Structured Questions' now auto-passes for steps without user inputs; 'Output Examples' renamed to 'Output Format Examples' and accepts template placeholders; added detailed review guidance to prevent misapplication of criteria"
+  - version: "1.3.0"
+    changes: "Migrated quality_criteria to reviews system with run_each targeting and map-format criteria"
+  - version: "1.2.1"
+    changes: "Removed deprecated exposed field from learn step; added learn workflow to make step accessible via MCP"
+  - version: "1.2.0"
+    changes: "Added repair workflow with fix_settings, fix_jobs, and errata steps for migrating old DeepWork configurations to current format"
+  - version: "1.1.0"
+    changes: "Added test and iterate steps to new_job workflow; test runs the workflow on a real use case and gathers feedback; iterate improves the job definition based on what happened"
+  - version: "1.0.1"
+    changes: "Removed review_job_spec step from new_job workflow; implement now follows directly from define"
   - version: "1.0.0"
     changes: "Added workflows section to distinguish new_job workflow (define→review_job_spec→implement) from standalone learn skill"
   - version: "0.1.0"
@@ -34,12 +62,6 @@ changelog:
     changes: "Removed implementation_summary and learning_summary outputs; simplified step outputs"
   - version: "0.5.0"
     changes: "Standardized on 'ask structured questions' phrasing for user input; Updated quality criteria hooks to verify phrase usage; Added guidance in implement.md to use phrase in generated instructions"
-  - version: "0.6.0"
-    changes: "Added doc spec support; define.md now detects document-oriented workflows and guides doc spec creation; learn.md now identifies and applies doc spec-related improvements"
-  - version: "0.7.0"
-    changes: "Added job.yml doc spec; define step now outputs job.yml with doc_spec reference for quality validation"
-  - version: "0.8.0"
-    changes: "Added review_job_spec step between define and implement for doc spec-based quality validation using sub-agent review"
   - version: "0.9.0"
     changes: "Improved skill descriptions with third-person voice and 'Use when...' triggers for better discoverability"
 
@@ -52,70 +74,210 @@ steps:
       - name: job_purpose
         description: "What complex task or workflow are you trying to accomplish?"
     outputs:
-      - file: job.yml
-        doc_spec: .deepwork/doc_specs/job_spec.md
+      job.yml:
+        type: file
+        description: "Definition of the job and its workflows"
+        required: true
     dependencies: []
-  - id: review_job_spec
-    name: "Review Job Specification"
-    description: "Reviews job.yml against quality criteria using a sub-agent for unbiased validation. Use after defining a job specification."
-    instructions_file: steps/review_job_spec.md
+    reviews:
+      - run_each: job.yml
+        quality_criteria:
+          "Intermediate Deliverables": "Does the job break out across the logical steps such that there are reviewable intermediate deliverables?"
+          "Reviews": |
+            Are there reviews defined for each step? Do particularly critical documents have their own reviews?
+            Note that the reviewers do not have transcript access, so if the criteria are about the conversation,
+            then add a `.deepwork/tmp/[step_summary].md` step output file so the agent has a communication channel to the reviewer.
+
+  - id: implement
+    name: "Implement Job Steps"
+    description: "Generates step instruction files and syncs slash commands from the job.yml specification. Use after defining a job."
+    instructions_file: steps/implement.md
     inputs:
       - file: job.yml
         from_step: define
     outputs:
+      step_instruction_files:
+        type: files
+        description: "Instruction Markdown files for each step"
+        required: true
+    dependencies:
+      - define
+    reviews:
+      - run_each: step_instruction_files
+        additional_review_guidance: "Read the job.yml file in the same job directory for context on how this instruction file fits into the larger workflow."
+        quality_criteria:
+          "Complete Instructions": "Is the instruction file complete (no stubs or placeholders)?"
+          "Specific & Actionable": "Are instructions tailored to the step's purpose, not generic?"
+          "Output Examples": "Does the instruction file show what good output looks like? This can be either template examples, or negative examples of what not to do. Only required if the step has ouputs"
+          "Quality Criteria": "Does the instruction file define quality criteria for its outputs?"
+          "Ask Structured Questions": "If this step gathers user input, do instructions explicitly use the phrase 'ask structured questions'? If the step has no user inputs, this criterion passes automatically."
+          "Prompt Engineering": "Does the instruction file follow Anthropic's best practices for prompt engineering?"
+
+  - id: test
+    name: "Test the New Workflow"
+    description: "Tests the newly created workflow by running it on a real use case, critiquing the output, and iterating until the user is satisfied. Use after implementing a job."
+    instructions_file: steps/test.md
+    inputs:
       - file: job.yml
-        doc_spec: .deepwork/doc_specs/job_spec.md
+        from_step: define
+      - file: step_instruction_files
+        from_step: implement
+    outputs:
+      .deepwork/tmp/test_feedback.md:
+        type: file
+        description: "Feedback from testing the workflow on a real use case"
+        required: true
     dependencies:
       - define
-    quality_criteria:
-      - "**Sub-Agent Used**: Was a sub-agent spawned to provide unbiased review?"
-      - "**All doc spec Criteria Evaluated**: Did the sub-agent assess all 9 quality criteria?"
-      - "**Findings Addressed**: Were all failed criteria addressed by the main agent?"
-      - "**Validation Loop Complete**: Did the review-fix cycle continue until all criteria passed?"
+      - implement
+    reviews:
+      - run_each: step
+        quality_criteria:
+          "Workflow Invoked": "Was the new workflow actually run on the user's test case via MCP?"
+          "Output Critiqued": "Did the agent identify up to 3 top issues with the output?"
+          "User Feedback Gathered": "Did the agent ask the user about each issue and gather additional feedback?"
+          "Corrections Made": "Were all requested corrections applied to the output?"
+          "User Satisfied": "Did the user confirm the output meets their needs?"
 
-  - id: implement
-    name: "Implement Job Steps"
-    description: "Generates step instruction files and syncs slash commands from the job.yml specification. Use after job spec review passes."
-    instructions_file: steps/implement.md
+  - id: iterate
+    name: "Iterate on Workflow Design"
+    description: "Reviews the test run conversation and improves the job definition based on what happened. Use after testing a newly created job."
+    instructions_file: steps/iterate.md
     inputs:
       - file: job.yml
-        from_step: review_job_spec
+        from_step: define
+      - file: step_instruction_files
+        from_step: implement
+      - file: .deepwork/tmp/test_feedback.md
+        from_step: test
     outputs:
-      - steps/
+      job.yml:
+        type: file
+        description: "Updated job definition with improvements from test run"
+        required: true
+      step_instruction_files:
+        type: files
+        description: "Updated instruction Markdown files for each step"
+        required: true
+      scripts:
+        type: files
+        description: "Updated scripts to run parts of the job more efficiently"
+        required: false
     dependencies:
-      - review_job_spec
-    quality_criteria:
-      - "**Directory Structure**: Is `.deepwork/jobs/[job_name]/` created correctly?"
-      - "**Complete Instructions**: Are ALL step instruction files complete (not stubs or placeholders)?"
-      - "**Specific & Actionable**: Are instructions tailored to each step's purpose, not generic?"
-      - "**Output Examples**: Does each instruction file show what good output looks like?"
-      - "**Quality Criteria**: Does each instruction file define quality criteria for its outputs?"
-      - "**Ask Structured Questions**: Do step instructions that gather user input explicitly use the phrase \"ask structured questions\"?"
-      - "**Sync Complete**: Has `deepwork sync` been run successfully?"
-      - "**Commands Available**: Are the slash-commands generated in `.claude/commands/`?"
-      - "**Rules Considered**: Has the agent thought about whether rules would benefit this job? If relevant rules were identified, did they explain them and offer to run `/deepwork_rules.define`? Not every job needs rules - only suggest when genuinely helpful."
+      - define
+      - implement
+      - test
+    reviews: []
 
   - id: learn
     name: "Learn from Job Execution"
     description: "Analyzes conversation history to improve job instructions and capture learnings. Use after running a job to refine it."
     instructions_file: steps/learn.md
-    exposed: true
     inputs:
       - name: job_name
         description: "Name of the job that was run (optional - will auto-detect from conversation)"
     outputs:
-      - AGENTS.md
+      AGENTS.md:
+        type: file
+        description: "Bespoke learnings and run-specific context for the working folder"
+        required: true
+      job.yml:
+        type: file
+        description: "Updated job definition with improvements from test run"
+        required: true
+      step_instruction_files:
+        type: files
+        description: "Updated instruction Markdown files for each step"
+        required: true
+      scripts:
+        type: files
+        description: "Updated scripts to run parts of the job more efficiently"
+        required: false
+    dependencies: []
+    reviews:
+      - run_each: step
+        quality_criteria:
+          "Conversation Analyzed": "Did the agent review the conversation for DeepWork job executions?"
+          "Confusion Identified": "Did the agent identify points of confusion, errors, or inefficiencies?"
+          "Instructions Improved": "Were job instructions updated to address identified issues?"
+          "Instructions Concise": "Are instructions free of redundancy and unnecessary verbosity?"
+          "Shared Content Extracted": "Is lengthy/duplicated content extracted into referenced files?"
+          "Bespoke Learnings Captured": "Were run-specific learnings added to AGENTS.md?"
+          "File References Used": "Do AGENTS.md entries reference other files where appropriate?"
+          "Working Folder Correct": "Is AGENTS.md in the correct working folder for the job?"
+
+  - id: fix_settings
+    name: "Fix Settings Files"
+    description: "Cleans up .claude/settings.json and related configuration files, removing legacy permissions, duplicate hooks, and hardcoded paths from prior DeepWork versions."
+    instructions_file: steps/fix_settings.md
+    inputs: []
+    outputs:
+      settings.json:
+        type: file
+        description: "Cleaned up Claude settings file with legacy permissions removed"
+        required: true
     dependencies: []
-    quality_criteria:
-      - "**Conversation Analyzed**: Did the agent review the conversation for DeepWork job executions?"
-      - "**Confusion Identified**: Did the agent identify points of confusion, errors, or inefficiencies?"
-      - "**Instructions Improved**: Were job instructions updated to address identified issues?"
-      - "**Instructions Concise**: Are instructions free of redundancy and unnecessary verbosity?"
-      - "**Shared Content Extracted**: Is lengthy/duplicated content extracted into referenced files?"
-      - "**doc spec Reviewed (if applicable)**: For jobs with doc spec outputs, were doc spec-related learnings identified?"
-      - "**doc spec Updated (if applicable)**: Were doc spec files updated with improved quality criteria or structure?"
-      - "**Bespoke Learnings Captured**: Were run-specific learnings added to AGENTS.md?"
-      - "**File References Used**: Do AGENTS.md entries reference other files where appropriate?"
-      - "**Working Folder Correct**: Is AGENTS.md in the correct working folder for the job?"
-      - "**Generalizable Separated**: Are generalizable improvements in instructions, not AGENTS.md?"
-      - "**Sync Complete**: Has `deepwork sync` been run if instructions were modified?"
+    reviews:
+      - run_each: step
+        quality_criteria:
+          "DeepWork Skills Removed": "Are `Skill(...)` entries matching jobs in `.deepwork/jobs/` removed?"
+          "Non-DeepWork Skills Preserved": "Are skills NOT matching DeepWork jobs left intact?"
+          "make_new_job.sh Preserved": "Is the `Bash(...)` permission for `make_new_job.sh` preserved (if present)?"
+          "Rules Hooks Removed": "Are all DeepWork Rules hooks and permissions removed?"
+          "Duplicate Hooks Removed": "Are duplicate hook entries consolidated or removed?"
+          "Hardcoded Paths Removed": "Are user-specific hardcoded paths (like `/Users/*/...`) removed?"
+          "Deprecated Commands Removed": "Are deprecated commands like `deepwork hook *` removed?"
+          "Valid JSON": "Is settings.json still valid JSON after modifications?"
+          "Backup Created": "Was a backup of the original settings created before modifications?"
+
+  - id: fix_jobs
+    name: "Fix Job Definitions"
+    description: "Updates job.yml files and step instructions to current DeepWork format, removing deprecated fields and migrating to new structures."
+    instructions_file: steps/fix_jobs.md
+    inputs:
+      - file: settings.json
+        from_step: fix_settings
+    outputs:
+      job_definitions:
+        type: files
+        description: "Updated job.yml files and step instructions in current DeepWork format"
+        required: true
+      step_instruction_files:
+        type: files
+        description: "Updated step instruction files"
+        required: true
+    dependencies:
+      - fix_settings
+    reviews:
+      - run_each: step
+        additional_review_guidance: "Read the .claude/settings.json file for context on what settings were cleaned up in the prior step."
+        quality_criteria:
+          "Exposed Field Addressed": "Are `exposed: true` fields removed or noted as deprecated?"
+          "Stop Hooks Migrated": "Are `stop_hooks` migrated to `hooks.after_agent` format?"
+          "Removed Steps Cleaned": "Are references to removed steps (like `review_job_spec`) updated?"
+          "Orphaned Steps Fixed": "For jobs with no workflows, is there a single workflow (named after the job) containing all steps? For jobs with existing workflows, does each orphan get its own workflow (named after the step)?"
+          "Valid YAML": "Are all job.yml files valid YAML?"
+
+  - id: errata
+    name: "Clean Up Errata"
+    description: "Removes obsolete files and folders from prior DeepWork versions, including old skill directories, temp files, and deprecated configurations."
+    instructions_file: steps/errata.md
+    outputs: {}
+    inputs:
+      - file: job_definitions
+        from_step: fix_jobs
+    dependencies:
+      - fix_settings
+      - fix_jobs
+    reviews:
+      - run_each: step
+        additional_review_guidance: "Check the .deepwork/jobs/ directory and .claude/skills/ directory to verify the cleanup was done correctly."
+        quality_criteria:
+          "Legacy Job Skills Removed": "Are legacy skill folders for each job removed from `.claude/skills/` and `.gemini/skills/`?"
+          "Deepwork Skill Preserved": "Does the `deepwork` skill folder still exist in `.claude/skills/deepwork/`?"
+          "Temp Files Cleaned": "Are `.deepwork/tmp/` contents cleaned appropriately?"
+          "Rules Folder Removed": "Is `.deepwork/rules/` folder backed up and removed (fully deprecated)?"
+          "Rules Job Removed": "Is `.deepwork/jobs/deepwork_rules/` removed if present?"
+          "Config Version Updated": "Is `.deepwork/config.yml` using current version format?"
+          "DeepWork Re-installed": "Was `deepwork install` run after cleanup, and does it complete without errors?"
+          "Git Status Clean": "Are changes ready to be committed (no untracked garbage files)?"
diff --git a/.deepwork/jobs/deepwork_jobs/make_new_job.sh b/.deepwork/jobs/deepwork_jobs/make_new_job.sh
index c561d6d2..c87f40e8 100755
--- a/.deepwork/jobs/deepwork_jobs/make_new_job.sh
+++ b/.deepwork/jobs/deepwork_jobs/make_new_job.sh
@@ -78,43 +78,47 @@ main() {
     mkdir -p "$job_path/steps"
     mkdir -p "$job_path/hooks"
     mkdir -p "$job_path/templates"
+    mkdir -p "$job_path/scripts"
 
     # Add .gitkeep files to empty directories
     touch "$job_path/hooks/.gitkeep"
     touch "$job_path/templates/.gitkeep"
+    touch "$job_path/scripts/.gitkeep"
 
     # Create AGENTS.md file
     cat > "$job_path/AGENTS.md" << 'EOF'
 # Job Management
 
-This folder and its subfolders are managed using the `deepwork_jobs` slash commands.
+This folder and its subfolders are managed using `deepwork_jobs` workflows.
 
-## Recommended Commands
+## Recommended Workflows
 
-- `/deepwork_jobs.define` - Create or modify the job.yml specification
-- `/deepwork_jobs.implement` - Generate step instruction files from the specification
-- `/deepwork_jobs.learn` - Improve instructions based on execution learnings
+- `deepwork_jobs/new_job` - Full lifecycle: define → implement → test → iterate
+- `deepwork_jobs/learn` - Improve instructions based on execution learnings
+- `deepwork_jobs/repair` - Clean up and migrate from prior DeepWork versions
 
 ## Directory Structure
 
 ```
 .
 ├── AGENTS.md          # This file - project context and guidance
-├── job.yml            # Job specification (created by /deepwork_jobs.define)
-├── steps/             # Step instruction files (created by /deepwork_jobs.implement)
+├── job.yml            # Job specification (created by define step)
+├── steps/             # Step instruction files (created by implement step)
 │   └── *.md           # One file per step
 ├── hooks/             # Custom validation scripts and prompts
 │   └── *.md|*.sh      # Hook files referenced in job.yml
+├── scripts/           # Reusable scripts and utilities created during job execution
+│   └── *.sh|*.py      # Helper scripts referenced in step instructions
 └── templates/         # Example file formats and templates
     └── *.md|*.yml     # Templates referenced in step instructions
 ```
 
 ## Editing Guidelines
 
-1. **Use slash commands** for structural changes (adding steps, modifying job.yml)
+1. **Use workflows** for structural changes (adding steps, modifying job.yml)
 2. **Direct edits** are fine for minor instruction tweaks
-3. **Run `/deepwork_jobs.learn`** after executing job steps to capture improvements
-4. **Run `deepwork sync`** after any changes to regenerate commands
+3. **Run `deepwork_jobs/learn`** after executing job steps to capture improvements
+4. **Run `deepwork install`** after any changes to regenerate commands
 EOF
 
     info "Created directory structure:"
@@ -122,13 +126,8 @@ EOF
     echo "  ├── AGENTS.md"
     echo "  ├── steps/"
     echo "  ├── hooks/.gitkeep"
+    echo "  ├── scripts/.gitkeep"
     echo "  └── templates/.gitkeep"
-
-    echo ""
-    info "Next steps:"
-    echo "  1. Run '/deepwork_jobs.define' to create the job.yml specification"
-    echo "  2. Run '/deepwork_jobs.implement' to generate step instructions"
-    echo "  3. Run 'deepwork sync' to create slash commands"
 }
 
 main "$@"
diff --git a/.deepwork/jobs/deepwork_jobs/research_report_job_best_practices.md b/.deepwork/jobs/deepwork_jobs/research_report_job_best_practices.md
new file mode 100644
index 00000000..87e65615
--- /dev/null
+++ b/.deepwork/jobs/deepwork_jobs/research_report_job_best_practices.md
@@ -0,0 +1,184 @@
+# Research Report Job Best Practices
+
+Reference guide for designing DeepWork jobs that produce research reports, analytical documents, or similar investigative deliverables. Use this when defining jobs via the `define` step.
+
+## The General Pattern
+
+Most report-authoring jobs follow a five-phase structure. Not every job needs all five as separate steps, and some phases combine naturally, but understanding the full arc helps you design a job that doesn't skip critical work.
+
+### 1. Connect
+
+**Purpose**: Verify that the tools and data sources the job will rely on are actually accessible before any real work begins.
+
+This step is about validating prerequisites, not doing research. Common activities:
+
+- **Database connectivity**: Run a trivial query (`SELECT 1`, `SHOW TABLES`) to confirm credentials work and the schema is reachable.
+- **Web search tools**: Confirm web search and browsing tools are enabled. If the job needs to read specific sites, verify they don't require login. If they do, get the user to authenticate (e.g., via Claude in Chrome) before proceeding.
+- **API access**: Test API keys or tokens against a lightweight endpoint.
+- **File access**: Confirm that input files, dashboards, or shared drives are readable.
+
+**Why a separate step?** A failed connection discovered midway through analysis wastes all prior work. Catching it upfront is cheap. That said, for simple jobs where the data source is obvious and reliable (e.g., "search the web for X"), this can be folded into the Align step as a quick check rather than standing alone.
+
+**Outputs**: A brief connectivity report or checklist confirming each source is accessible, plus any credentials or configuration notes for later steps.
+
+### 2. Align
+
+**Purpose**: Build enough understanding of the domain and the user's intent to scope the analysis correctly.
+
+This is a cyclical step: do light research, then ask clarifying questions, then refine understanding, repeat. It ends when both the agent and user agree on what "done" looks like.
+
+**The cycle**:
+
+1. **Light grounding research** - Just enough to ask smart questions. Not deep analysis.
+2. **Clarify with the user** - Surface ambiguities and propose scope boundaries.
+3. **Repeat** until there's shared understanding.
+
+**Example - Private data (SQL-centric)**:
+- Run broad queries to get the lay of the land: total record counts, key column names, date ranges, apparent segmentation columns (e.g., `division`, `region`).
+- Then ask the user: "I see 45,000 customer records across 3 divisions. Should we scope to a particular division? I'm defining churn as customers with no activity in 90 days - does that match your definition?"
+
+**Example - Public data (web-centric)**:
+- Do broad searches to see what's out there. Notice the shape of results: are they news articles, academic papers, industry reports? What subtopics keep appearing?
+- Then ask the user: "Results split between fast-fashion trends and haute couture analysis. Which direction? Also, should we focus on the current season or look at multi-year trends?"
+
+**Outputs**: A scoping document that captures the agreed-upon research questions, data sources, definitions, exclusions, and success criteria. This becomes the north star for the Analyze step.
+
+### 3. Analyze
+
+**Purpose**: The core research cycle. Query, record, synthesize, and deepen iteratively.
+
+This is where most of the work happens. The key discipline is maintaining structured working files so that nothing gets lost and the narrative builds progressively.
+
+**Working files to maintain**:
+
+| File | Purpose |
+|------|---------|
+| Query log | Every query/search with its results. What did you ask, what came back. Keeps work auditable and reproducible. |
+| Questions & Answers | Running list of research questions. As you find answers, record them. As answers suggest new questions, add those. This drives the iterative deepening. |
+| Draft report | The evolving narrative. Updated as new findings emerge. Forces you to synthesize as you go rather than dumping data at the end. |
+
+**The iterative deepening pattern**:
+
+Analysis should deepen in layers, not stay shallow across many topics. Each answer should prompt "why?" or "what drives that?" questions:
+
+- **Layer 1**: Top-level facts. "What was our AWS spend last month?" -> $10k. "How does that compare to prior month?" -> Up $1k.
+- **Layer 2**: Decomposition. "What services drove the spend?" -> $8k EC2, $1k S3, $1k other. "Where was the increase?" -> All in EC2.
+- **Layer 3**: Root causes. "Is our EC2 fleet well-utilized?" -> Many instances with attribute X are underutilized. "Are specific workloads driving the increase?" -> Yes, instances tagged `daily_sync_*` are up ~$2k.
+- **Layer 4+**: Continue until you hit actionable findings or diminishing returns.
+
+**When to stop deepening**: When additional queries aren't changing the narrative, or when you've answered the questions from the Align step to a sufficient depth. But make sure that any questions that a reasonable business person is likely to ask when looking at your output are answered.
+
+**Outputs**: The working files above (query log, Q&A tracker, draft report), organized in the dataroom alongside the final output.
+
+### 4. Review (Not a Separate Step)
+
+Reviews are not a standalone phase but checkpoints woven into all the steps, especially the Analyze step. Use DeepWork's `reviews` mechanism in `job.yml` to define quality gates.
+
+**Reviews to consider for the Analyze phase**:
+
+- **Query completeness**: Are the key research questions from the scoping document all addressed? Are queries recorded with their results?
+- **Draft coherence**: Does the draft report tell a logical story? Are sections connected rather than isolated findings?
+- **Depth adequacy**: Has the analysis gone deep enough on the important threads? Are there obvious follow-up questions left unasked?
+- **Citation integrity**: Are claims in the draft backed by specific queries/sources from the query log?
+
+**Reviews to consider for the Present phase** (see below):
+
+- **Visual quality**: Charts render correctly, no overlapping text, readable at intended size.
+- **Content accuracy**: Citations preserved from draft, numbers match source data, arguments are logically sound.
+- **Audience fit**: Language, detail level, and framing match the intended audience (executives vs. engineers vs. clients).
+- **Format compliance**: Output matches the requested format (PDF renders correctly, HTML is responsive, slides have consistent styling).
+
+### 5. Present
+
+**Purpose**: Transform the draft into a polished final deliverable.
+
+The draft report from the Analyze step has the right content but may not be presentation-ready. This step focuses on the output experience.
+
+**Common activities**:
+
+- **Visualizations**: Generate charts, tables, or diagrams from the data. Fetch relevant images. Create infographics for key findings.
+- **Formatting**: Convert to the final output format (PDF, HTML, slides, etc.). Apply styling and layout.
+- **Narrative polish**: Tighten prose, add executive summary, ensure the document flows well for someone reading it cold.
+- **Supporting materials**: Assemble appendices, data tables, methodology notes.
+
+**This step often requires multiple review cycles.** Visual outputs have failure modes that text-only drafts don't: overlapping labels, truncated legends, broken page breaks, images that don't load. Build in quality gates for visual review.
+
+**Outputs**: The final deliverable in its target format, plus any supporting materials.
+
+## Translating This Into a Job Definition
+
+### Step Structure Options
+
+**Minimal (3 steps)** - For straightforward reports with known data sources:
+1. `scope` - Combines Connect + Align. Verify access, clarify requirements.
+2. `research` - The Analyze phase with built-in review gates.
+3. `report` - The Present phase with visual/format review gates.
+
+**Standard (4 steps)** - For most research reports:
+1. `connect` - Verify data source access.
+2. `scope` - Align on research questions and definitions.
+3. `analyze` - Core research with iterative deepening.
+4. `present` - Final deliverable production.
+
+**Comprehensive (5+ steps)** - For complex, multi-source reports:
+1. `connect` - Verify all data source access.
+2. `scope` - Align on research questions.
+3. `gather` - Collect raw data across all sources (query log output).
+4. `analyze` - Synthesize findings, build narrative (draft report output).
+5. `present` - Polish and format final deliverable.
+
+### Output Organization
+
+Follow the dataroom pattern from the define step guidelines:
+
+```
+operations/reports/2026-01/spending_analysis.md              # Final report
+operations/reports/2026-01/spending_analysis_dataroom/        # Supporting materials
+    query_log.md                                              # All queries and results
+    questions_and_answers.md                                  # Research Q&A tracker
+    raw_data/                                                 # Extracted data files
+    charts/                                                   # Generated visualizations
+    draft.md                                                  # Working draft (for audit trail)
+```
+
+### Quality Gate Design
+
+Research reports benefit from **split reviews** that evaluate content and presentation separately:
+
+```yaml
+reviews:
+  # Content review - is the analysis sound?
+  - run_each: final_report.md
+    quality_criteria:
+      "Claims Cited": "Is every factual claim backed by a specific source or query from the dataroom?"
+      "Questions Answered": "Are all research questions from the scoping document addressed?"
+      "Depth": "Does the analysis go beyond surface-level observations to root causes or actionable insights?"
+
+  # Presentation review - is the output polished?
+  - run_each: final_report.md
+    quality_criteria:
+      "Readable Flow": "Does the document flow logically for someone reading it without prior context?"
+      "Audience Fit": "Is the language and detail level appropriate for the intended audience?"
+      "Visual Quality": "Do all charts, tables, and figures render correctly and add value?"
+```
+
+### Capability Considerations
+
+Research jobs frequently need specialized tools. During the `define` step, ask about:
+
+- **Database access**: What databases? What client tools or connection strings?
+- **Web browsing**: Will sites require authentication? Is Claude in Chrome available?
+- **File generation**: Does the final output need PDF/HTML rendering? What tools are available?
+- **Data visualization**: What charting libraries or tools can the agent use?
+
+## Anti-Patterns to Avoid
+
+**Shallow breadth over deep analysis**: Covering 20 topics superficially is less valuable than covering 5 topics with layered depth. Design the Analyze step to encourage iterative deepening, not checklist completion.
+
+**Skipping the scoping step**: Jumping straight into analysis without aligning on definitions and scope almost always leads to rework. "Analyze our churn" means very different things depending on how churn is defined.
+
+**Query results only in memory**: If queries and their results aren't written to working files, they can't be reviewed, cited, or audited. The query log is not optional.
+
+**Draft report written at the end**: The draft should evolve throughout the Analyze step, not be assembled from notes after all research is complete. Writing the narrative as you go reveals gaps in the analysis early.
+
+**Conflating analysis with presentation**: Trying to produce a polished PDF while still figuring out the findings leads to wasted formatting work. Get the content right first, then make it pretty.
diff --git a/.deepwork/jobs/deepwork_jobs/steps/define.md b/.deepwork/jobs/deepwork_jobs/steps/define.md
index 31de7440..78ac1223 100644
--- a/.deepwork/jobs/deepwork_jobs/steps/define.md
+++ b/.deepwork/jobs/deepwork_jobs/steps/define.md
@@ -31,60 +31,6 @@ Start by asking structured questions to understand what the user wants to accomp
    - What are the distinct stages from start to finish?
    - Are there any dependencies between phases?
 
-### Step 1.5: Detect Document-Oriented Workflows
-
-**Check for document-focused patterns** in the user's description:
-- Keywords: "report", "summary", "document", "create", "monthly", "quarterly", "for stakeholders", "for leadership"
-- Final deliverable is a specific document (e.g., "AWS spending report", "competitive analysis", "sprint summary")
-- Recurring documents with consistent structure
-
-**If a document-oriented workflow is detected:**
-
-1. Inform the user: "This workflow produces a specific document type. I recommend defining a doc spec first to ensure consistent quality."
-
-2. Ask structured questions to understand if they want to:
-   - Create a doc spec for this document
-   - Use an existing doc spec (if any exist in `.deepwork/doc_specs/`)
-   - Skip doc spec and proceed with simple outputs
-
-### Step 1.6: Define the Doc Spec (if needed)
-
-When creating a doc spec, gather the following information:
-
-1. **Document Identity**
-   - What is the document called? (e.g., "Monthly AWS Spending Report")
-   - Brief description of its purpose
-   - Where should these documents be stored? (path patterns like `finance/aws-reports/*.md`)
-
-2. **Audience and Context**
-   - Who reads this document? (target audience)
-   - How often is it produced? (frequency)
-
-3. **Quality Criteria** (3-5 criteria, each with name and description)
-
-   **Important**: Doc spec quality criteria define requirements for the **output document itself**, not the process of creating it. Focus on what the finished document must contain or achieve.
-
-   Examples for a spending report:
-   - **Visualization**: Must include charts showing spend breakdown by service
-   - **Variance Analysis**: Must compare current month against previous with percentages
-   - **Action Items**: Must include recommended cost optimization actions
-
-   **Note**: When a doc spec is created for a step's output, the step should generally NOT have separate `quality_criteria` in the job.yml. The doc spec's criteria cover output quality. Only add step-level quality_criteria if there are essential process requirements (e.g., "must use specific tool"), and minimize these when possible.
-
-4. **Document Structure**
-   - What sections should it have?
-   - Any required elements (tables, charts, summaries)?
-
-### Step 1.7: Create the doc spec File (if needed)
-
-Create the doc spec file at `.deepwork/doc_specs/[doc_spec_name].md`:
-
-**Template reference**: See `.deepwork/jobs/deepwork_jobs/templates/doc_spec.md.template` for the standard structure.
-
-**Complete example**: See `.deepwork/doc_specs/job_spec.md` for a fully worked example (the doc spec for job.yml files).
-
-After creating the doc spec, proceed to Step 2 with the doc spec reference for the final step's output.
-
 ### Step 2: Define Each Step
 
 For each major phase they mentioned, ask structured questions to gather details:
@@ -106,8 +52,6 @@ For each major phase they mentioned, ask structured questions to gather details:
    - Where should each output be saved? (filename/path)
    - Should outputs be organized in subdirectories? (e.g., `reports/`, `data/`, `drafts/`)
    - Will other steps need this output?
-   - **Does this output have a doc spec?** If a doc spec was created in Step 1.6/1.7, reference it for the appropriate output
-
    #### Work Product Storage Guidelines
 
    **Key principle**: Job outputs belong in the main repository directory structure, not in dot-directories. The `.deepwork/` directory is for job definitions and configuration only.
@@ -172,38 +116,54 @@ For each major phase they mentioned, ask structured questions to gather details:
    - Are there any quality checks or validation needed?
    - What makes a good vs. bad output for this step?
 
-6. **Agent Delegation** (optional)
-   - Should this step be executed by a specific agent type?
-   - Use the `agent` field when the step should run in a forked context with a specific agent
-   - When `agent` is set, the generated skill automatically includes `context: fork`
-   - Available agent types:
-     - `general-purpose` - Standard agent for multi-step tasks
-
-   ```yaml
-   steps:
-     - id: research_step
-       agent: general-purpose  # Delegates to the general-purpose agent
-   ```
+   **Important**: When skills are generated, quality criteria are automatically included in the output. Do not duplicate them in step instructions or details—this causes redundancy and confusion.
 
 **Note**: You're gathering this information to understand what instructions will be needed, but you won't create the instruction files yet - that happens in the `implement` step.
 
-#### Doc Spec-Aware Output Format
+### Capability Considerations
 
-When a step produces a document with a doc spec reference, use this format in job.yml:
+When defining steps, identify any that require specialized tools:
 
-```yaml
-outputs:
-  - file: reports/monthly_spending.md
-    doc_spec: .deepwork/doc_specs/monthly_aws_report.md
-```
+**Browser Automation**: If any step involves web scraping, form filling, interactive browsing, UI testing, or research requiring website visits, ask the user what browser tools they have available. For Claude Code users, **Claude in Chrome** (Anthropic's browser extension) has been tested with DeepWork and is recommended for new users. Don't assume a default—confirm the tool before designing browser-dependent steps.
 
-The doc spec's quality criteria will automatically be included in the generated skill, ensuring consistent document quality.
+### Parallel Sub-Workflow Pattern
 
-### Capability Considerations
+When a workflow needs to apply a multi-step process to many items independently (e.g., research each of 5 competitors, review each of 12 pull requests, analyze each file in a directory), **do not inline the repeated logic as a single step**. Instead, use the parallel sub-workflow pattern:
 
-When defining steps, identify any that require specialized tools:
+1. **Define a separate workflow** for the process that will be repeated. This workflow handles one item at a time (e.g., `research_one_competitor` with steps like `gather_data` → `analyze` → `write_summary`).
 
-**Browser Automation**: If any step involves web scraping, form filling, interactive browsing, UI testing, or research requiring website visits, ask the user what browser tools they have available. For Claude Code users, **Claude in Chrome** (Anthropic's browser extension) has been tested with DeepWork and is recommended for new users. Don't assume a default—confirm the tool before designing browser-dependent steps.
+2. **In the main workflow**, add a step whose instructions tell the agent to launch the sub-workflow once per item using sub-agents (via the Task tool). Since each item is independent, these sub-workflow runs execute in parallel.
+
+**Why this matters:**
+- **Parallelism**: Independent items are processed concurrently instead of sequentially, dramatically reducing wall-clock time
+- **Quality gates**: Each sub-workflow run goes through its own review cycle, so a bad result for one item doesn't block the others
+- **Reusability**: The sub-workflow can be invoked on its own for ad-hoc single-item runs
+
+**How to structure it in `job.yml`:**
+
+```yaml
+workflows:
+  - name: full_analysis
+    summary: "Research all competitors end-to-end"
+    steps:
+      - identify_competitors
+      - research_all          # This step launches research_one in parallel
+      - synthesize
+
+  - name: research_one
+    summary: "Deep-dive research on a single competitor"
+    steps:
+      - gather_data
+      - analyze
+      - write_summary
+```
+
+The `research_all` step's instructions should tell the agent to:
+- Read the list of items from the prior step's output
+- Launch `research_one` as a sub-workflow for each item using parallel sub-agents (Task tool)
+- Collect the results and confirm all runs completed
+
+**When to recognize this pattern:** Look for language like "for each X, do Y" where Y involves more than one logical phase. If Y is a single simple action, a regular step with a loop is fine. If Y is itself a multi-step process with intermediate outputs worth reviewing, split it into a sub-workflow.
 
 ### Step 3: Validate the Workflow
 
@@ -225,52 +185,86 @@ After gathering information about all steps:
    - Job description (detailed multi-line explanation)
    - Version number (start with 1.0.0)
 
-### Step 4: Define Quality Validation (Stop Hooks)
+### Step 4: Define Quality Reviews
 
-For each step, consider whether it would benefit from **quality validation loops**. Stop hooks allow the AI agent to iteratively refine its work until quality criteria are met.
+For each step, define **reviews** that evaluate the step's outputs. Reviews run automatically when a step completes and provide quality validation loops.
 
-**Ask structured questions about quality validation:**
-- "Are there specific quality criteria that must be met for this step?"
-- "Would you like the agent to validate its work before completing?"
-- "What would make you send the work back for revision?"
+For intermediate outputs between steps, reviews let you make sure you don't go too far down the wrong path. Add reviews that confirm things that could cause problems later. For example, in a report creation process, you might have an intermediate step that performs a number of queries on the data and records the results so that later report-writing steps can synthesize that information into a coherent narrative. In this case, you would want to add a review that checks that the queries SQL matches up with the description of the queries in the job description.
 
-**Stop hooks are particularly valuable for:**
-- Steps with complex outputs that need multiple checks
-- Steps where quality is critical (final deliverables)
-- Steps with subjective quality criteria that benefit from AI self-review
+For final outputs, reviews let you make sure the output meets the user's expectations. For example, with a data-centric report job, you might have one review on the final output for consistency with style guidelines and tone and such, and a totally separate review on the data-backing to make sure the claims in the report are supported by the data from earlier steps and all have citations. 
 
-**Three types of stop hooks are supported:**
+**Any jobs with written final output must always have reviews**. Some suggested ones are:
+- Ensure claims have citations and the citations are not hallucinated
+- Ensure the output follows the style guidelines and tone
+- Ensure the output is well-organized and easy to read
+- Ensure obvious questions the content raises have answers provided
+- Visual formatting is correct (for things like PDF or HTML where the visual output matters)
+- That the content matches what the intended audience expects (i.e. executives vs engineers)
 
-1. **Inline Prompt** (`prompt`) - Best for simple quality criteria
-   ```yaml
-   stop_hooks:
-     - prompt: |
-         Verify the output meets these criteria:
-         1. Contains at least 5 competitors
-         2. Each competitor has a description
-         3. Selection rationale is clear
-   ```
+**Reviews format:**
 
-2. **Prompt File** (`prompt_file`) - For detailed/reusable criteria
-   ```yaml
-   stop_hooks:
-     - prompt_file: hooks/quality_check.md
-   ```
+Each review specifies `run_each` (what to review) and `quality_criteria` (a map of criterion name to question):
 
-3. **Script** (`script`) - For programmatic validation (tests, linting)
-   ```yaml
-   stop_hooks:
-     - script: hooks/run_tests.sh
-   ```
+```yaml
+reviews:
+  - run_each: step  # Review all outputs together
+    quality_criteria:
+      "Consistent Style": "Do all files follow the same structure?"
+      "Complete Coverage": "Are all required topics covered?"
+  - run_each: report_files  # Review each file in a 'files'-type output individually
+    quality_criteria:
+      "Well Written": "Is the content clear and well-organized?"
+      "Data-Backed": "Are claims supported by data?"
+```
+
+**`run_each` options:**
+- `step` — Review runs once with ALL output files
+- `<output_name>` where output is `type: file` — Review runs once with that specific file
+- `<output_name>` where output is `type: files` — Review runs once per file in the list
+
+**`additional_review_guidance`** (optional): Tells the reviewer what other files or context to look at when performing the review. Reviewers only see the step's output files by default — they do NOT automatically see inputs from prior steps. When a review needs context beyond the output files (e.g., checking that an output is consistent with a prior step's deliverable, or that it follows conventions in a config file), use this field to tell the reviewer what to read.
 
-**Multiple hooks can be combined:**
 ```yaml
-stop_hooks:
-  - script: hooks/lint_output.sh
-  - prompt: "Verify the content is comprehensive and well-organized"
+reviews:
+  - run_each: report_files
+    additional_review_guidance: "Read the comparison_matrix.md file for context on whether claims in the report are supported by the analysis data."
+    quality_criteria:
+      "Data-Backed": "Are recommendations supported by the competitive analysis data?"
+  - run_each: step_instruction_files
+    additional_review_guidance: "Read the job.yml file in the same job directory for context on how this instruction file fits into the larger workflow."
+    quality_criteria:
+      "Complete Instructions": "Is the instruction file complete?"
 ```
 
-**Encourage prompt-based hooks** - They leverage the AI's ability to understand context and make nuanced quality judgments. Script hooks are best for objective checks (syntax, format, tests).
+**When to use `additional_review_guidance`:**
+- When a review criterion references data or context from a prior step's output
+- When the reviewer needs to cross-check the output against a specification, config, or schema file
+- When the review involves consistency checks between the current output and other project files
+- When the criterion mentions something the reviewer can't assess from the output alone
+
+**When NOT to use it:**
+- When all criteria can be evaluated by reading just the output files themselves (e.g., "Is it well-written?", "Are there spelling errors?")
+- Don't use it to dump large amounts of content — keep guidance short and tell the reviewer *what to read*, not *what's in it*
+
+**Reviews are particularly valuable for:**
+- Steps with complex outputs that need multiple quality checks
+- Steps where quality is critical (final deliverables)
+- Steps with subjective quality criteria that benefit from AI self-review
+- Steps producing multiple files where each file needs individual review
+
+**Quality review timeout considerations:**
+Each individual quality review call has a 120-second timeout. For `run_each: <output_name>` with `files`-type outputs, each file gets its own separate review call — so having many files does NOT cause timeout accumulation. Timeout risk is only for individual reviews that are complex, such as:
+- Reviewing a single very large file (500+ lines) with many criteria
+- Review criteria that require cross-referencing large amounts of context
+For these cases:
+- Keep review criteria focused and efficient to evaluate
+- Consider using `run_each: step` (reviews all outputs together once) if the per-file reviews are unnecessary
+- The agent can use `quality_review_override_reason` to bypass a timed-out review, but this loses the quality gate benefit
+
+**For steps with no quality checks needed, use an empty reviews list:**
+```yaml
+reviews: []
+```
 
 ### Step 5: Create the Job Directory and Specification
 
@@ -282,19 +276,10 @@ Only after you have complete understanding, create the job directory and `job.ym
 .deepwork/jobs/deepwork_jobs/make_new_job.sh [job_name]
 ```
 
-This creates:
-- `.deepwork/jobs/[job_name]/` - Main job directory
-- `.deepwork/jobs/[job_name]/steps/` - For step instruction files
-- `.deepwork/jobs/[job_name]/hooks/` - For custom validation scripts
-- `.deepwork/jobs/[job_name]/templates/` - For example file formats
-- `.deepwork/jobs/[job_name]/AGENTS.md` - Job management guidance
-
 **Then create the job.yml file** at `.deepwork/jobs/[job_name]/job.yml`
 
 (Where `[job_name]` is the name of the NEW job you're creating, e.g., `competitive_research`)
 
-**Doc Spec**: See `.deepwork/doc_specs/job_spec.md` for the complete specification with quality criteria.
-
 **Template reference**: See `.deepwork/jobs/deepwork_jobs/templates/job.yml.template` for the standard structure.
 
 **Complete example**: See `.deepwork/jobs/deepwork_jobs/templates/job.yml.example` for a fully worked example.
@@ -414,7 +399,7 @@ Claude: Great! Creating the job.yml specification now...
 - .deepwork/jobs/competitive_research/job.yml
 
 **Next step:**
-Run `/deepwork_jobs.review_job_spec` to validate the specification against quality criteria.
+Implement the job to generate step instruction files.
 ```
 
 ## Important Guidelines
@@ -454,5 +439,5 @@ The complete YAML specification file (example shown in Step 5 above).
 After creating the file:
 1. Inform the user that the specification is complete
 2. Recommend that they review the job.yml file
-3. Tell them to run `/deepwork_jobs.review_job_spec` next
+3. Tell them the next step is to implement the job (generate step instruction files)
 
diff --git a/.deepwork/jobs/deepwork_jobs/steps/errata.md b/.deepwork/jobs/deepwork_jobs/steps/errata.md
new file mode 100644
index 00000000..10245c62
--- /dev/null
+++ b/.deepwork/jobs/deepwork_jobs/steps/errata.md
@@ -0,0 +1,170 @@
+# Clean Up Errata
+
+## Objective
+
+Remove obsolete files and folders from prior DeepWork versions. This final step cleans up artifacts that are no longer used by the MCP-based system.
+
+## Task
+
+Identify and clean up deprecated files and folders.
+
+### Step 1: Remove Legacy Job Skill Folders
+
+Old DeepWork versions created individual skill folders for each job and step. These need to be removed while preserving the main `deepwork` skill folder.
+
+**Process:**
+
+1. **List all jobs** in `.deepwork/jobs/`:
+   ```bash
+   ls .deepwork/jobs/
+   ```
+
+2. **For each job**, kick off a sub-agent to find and remove legacy skill folders. The sub-agent should:
+   - Search in both `.claude/skills/` and `.gemini/skills/`
+   - Find folders matching:
+     - `{job_name}/` - folder named exactly like the job
+     - `{job_name}.*/` - folders starting with the job name followed by a period (e.g., `my_job.step1/`, `my_job.step2/`)
+   - Remove each matching folder
+   - Report what was removed
+
+   **Example commands for a job named `competitive_research`:**
+   ```bash
+   # Find and remove from .claude/skills/
+   rm -rf .claude/skills/competitive_research/ 2>/dev/null
+   rm -rf .claude/skills/competitive_research.*/ 2>/dev/null
+
+   # Find and remove from .gemini/skills/
+   rm -rf .gemini/skills/competitive_research/ 2>/dev/null
+   rm -rf .gemini/skills/competitive_research.*/ 2>/dev/null
+   ```
+
+3. **Run sub-agents in parallel** - one for each job to speed up the process.
+
+4. **Verify the `deepwork` skill folder remains:**
+   ```bash
+   ls -d .claude/skills/deepwork/ 2>/dev/null || echo "ERROR: deepwork skill missing!"
+   ls -d .gemini/skills/deepwork/ 2>/dev/null || echo "WARNING: gemini deepwork skill missing (may not have been installed)"
+   ```
+
+   **CRITICAL:** The `deepwork` skill folder in `.claude/skills/deepwork/` MUST still exist after cleanup. If it is missing, something went wrong - do NOT proceed and investigate what happened.
+
+**What this removes:**
+```
+.claude/skills/
+├── competitive_research/     <- REMOVE (legacy job folder)
+├── competitive_research.discover/  <- REMOVE (legacy step folder)
+├── competitive_research.analyze/   <- REMOVE (legacy step folder)
+├── deepwork/                 <- KEEP (current MCP entry point)
+└── some_other_job/           <- REMOVE (legacy job folder)
+```
+
+**Do NOT remove:**
+- `.claude/skills/deepwork/` - This is the current MCP-based skill entry point
+- `.gemini/skills/deepwork/` - Same for Gemini
+- Any skill folders that don't match job names in `.deepwork/jobs/`
+
+### Step 2: Clean Temp Files
+
+Check `.deepwork/tmp/` for accumulated temporary files:
+
+```bash
+ls -la .deepwork/tmp/ 2>/dev/null || echo "No tmp folder"
+```
+
+**Safe to delete:**
+- `.deepwork/tmp/rules/queue/*.json` - Old rules queue files
+- Any files older than 7 days
+- Empty subdirectories
+
+**Be careful with:**
+- Files that might be in-progress work
+- Anything with recent modification times
+
+```bash
+# Clean old queue files
+rm -rf .deepwork/tmp/rules/queue/*.json 2>/dev/null
+
+# Remove empty directories
+find .deepwork/tmp -type d -empty -delete 2>/dev/null
+```
+
+### Step 3: Remove Rules Folder (Fully Deprecated)
+
+DeepWork Rules have been completely removed from the system. Delete the `.deepwork/rules/` folder and all related items:
+
+```bash
+rm -rf .deepwork/rules/ 2>/dev/null
+rm -rf .deepwork/tmp/rules/ 2>/dev/null
+rm -rf .deepwork/jobs/deepwork_rules/ 2>/dev/null
+```
+
+### Step 4: Update Config Version
+
+Check `.deepwork/config.yml` for outdated version format:
+
+```bash
+cat .deepwork/config.yml
+```
+
+**Old format:**
+```yaml
+version: 1.0.0
+platforms:
+- claude
+```
+
+**Current format:**
+```yaml
+version: "1.0"
+platforms:
+  - claude
+```
+
+Update if needed to match current schema expectations.
+
+### Step 5: Remove Other Obsolete Files
+
+Check for and remove other obsolete files:
+
+| File/Pattern | Description | Action |
+|--------------|-------------|--------|
+| `.deepwork/.last_head_ref` | Git state tracking | Keep (used by MCP) |
+| `.deepwork/.last_work_tree` | Git state tracking | Keep (used by MCP) |
+| `.deepwork/.gitignore` | Ignore patterns | Review and update |
+| `.claude/commands/` | Generated commands | Keep (current system) |
+| `.claude/settings.local.json` | Local overrides | Keep (user settings) |
+
+### Step 6: Re-install DeepWork
+
+After all cleanup is complete, re-run `deepwork install` to ensure configurations are current and consistent:
+
+```bash
+deepwork install
+```
+
+**Then verify:**
+1. Check that `.deepwork/config.yml` is valid and up to date
+2. Check that `.claude/skills/deepwork/` exists and contains the expected skill entry point
+3. Check that all jobs in `.deepwork/jobs/` have valid `job.yml` files
+4. Run `deepwork install` a second time and confirm the output is clean (no errors or warnings)
+
+If any issues are found, fix them before proceeding. The goal is a clean, working DeepWork installation with no residual problems from the repair process.
+
+### Step 7: Verify Git Status
+
+Check that the cleanup hasn't left untracked garbage:
+
+```bash
+git status
+```
+
+**Review:**
+- Deleted files should show as deleted
+- No new untracked files should appear (unless intentionally created)
+- Backup files (`.backup`) should be in `.gitignore` or cleaned up
+
+## Important Notes
+
+1. **Always back up before deleting** - User data is irreplaceable
+2. **Ask before destructive actions** - When in doubt, ask the user
+3. **Don't auto-commit** - Let the user review and commit changes themselves
diff --git a/.deepwork/jobs/deepwork_jobs/steps/fix_jobs.md b/.deepwork/jobs/deepwork_jobs/steps/fix_jobs.md
new file mode 100644
index 00000000..ccb5e4a4
--- /dev/null
+++ b/.deepwork/jobs/deepwork_jobs/steps/fix_jobs.md
@@ -0,0 +1,307 @@
+# Fix Job Definitions
+
+## Objective
+
+Update all job.yml files and step instructions in `.deepwork/jobs/` to the current DeepWork format. This step migrates deprecated fields, removes references to deleted steps, and ensures all jobs are compatible with the MCP-based workflow system.
+
+## Task
+
+Audit and repair all job definitions, migrating from legacy formats to current specifications.
+
+### Step 1: Inventory All Jobs
+
+List all jobs in the project:
+
+```bash
+ls -la .deepwork/jobs/
+```
+
+For each job directory, you'll need to check and potentially fix the `job.yml` file.
+
+### Step 1.5: Process Jobs in Parallel
+
+**For each job** (except `deepwork_jobs` which should be updated via `deepwork install`), kick off a sub-agent to audit and repair that job's `job.yml` file. The sub-agent should:
+
+1. Read the job's `job.yml` file
+2. Check for and fix all issues described in Steps 2-6 below
+3. Validate the YAML is still valid after changes
+4. Report what was changed
+
+**Run sub-agents in parallel** - one for each job to speed up the process.
+
+**Example prompt for sub-agent:**
+```
+Audit and repair the job at `.deepwork/jobs/[job_name]/job.yml`:
+1. Remove any `exposed: true` fields from steps
+2. Migrate `stop_hooks` to `hooks.after_agent` format
+3. Remove references to deleted steps (like `review_job_spec`)
+4. Fix orphaned steps by adding them to workflows
+5. Migrate `outputs` from array format to map format with `type` and `description`
+6. Update any `file` inputs that reference renamed output keys
+7. Migrate `quality_criteria` arrays to `reviews` format (run_each + map criteria)
+8. Bump version and add changelog entry if changes were made
+9. Validate YAML syntax
+
+Report what changes were made.
+```
+
+### Step 2: Remove `exposed` Field
+
+The `exposed` field on steps no longer has any effect in MCP-based DeepWork. Steps are now only accessible through workflows.
+
+**Find and remove:**
+```yaml
+steps:
+  - id: some_step
+    exposed: true  # REMOVE THIS LINE
+```
+
+If a step was `exposed: true` and is not in any workflow, it should either:
+1. Be added to a workflow, OR
+2. Be removed from the job entirely
+
+### Step 3: Migrate `stop_hooks` to `hooks.after_agent`
+
+The `stop_hooks` field is deprecated. Migrate to the new `hooks` structure:
+
+**Before (deprecated):**
+```yaml
+steps:
+  - id: my_step
+    stop_hooks:
+      - prompt: "Verify the output meets quality standards"
+```
+
+**After (current format):**
+```yaml
+steps:
+  - id: my_step
+    hooks:
+      after_agent:
+        - prompt: "Verify the output meets quality standards"
+```
+
+### Step 4: Remove References to Deleted Steps
+
+Check for references to steps that no longer exist in the standard jobs:
+
+**Steps that have been removed:**
+- `review_job_spec` - Was removed from `deepwork_jobs` in v1.0.1
+
+**What to fix:**
+- Remove from workflow `steps` arrays
+- Update `from_step` references in inputs
+- Update `dependencies` arrays
+
+**Example fix:**
+```yaml
+# Before
+workflows:
+  - name: new_job
+    steps:
+      - define
+      - review_job_spec  # REMOVE
+      - implement
+
+steps:
+  - id: implement
+    inputs:
+      - file: job.yml
+        from_step: review_job_spec  # CHANGE TO: define
+    dependencies:
+      - review_job_spec  # CHANGE TO: define
+```
+
+### Step 5: Fix Orphaned Steps
+
+Steps not included in any workflow cannot be invoked via the MCP interface.
+
+**How to handle orphaned steps depends on whether the job has ANY workflows defined:**
+
+#### Case A: Job has NO workflows defined
+
+If the job has no `workflows:` section at all (or it's empty), create a **single workflow with the same name as the job** containing all steps in their defined order:
+
+```yaml
+# For a job named "my_job" with steps: step_a, step_b, step_c
+workflows:
+  - name: my_job  # Same name as the job
+    summary: "Runs the complete my_job workflow"
+    steps:
+      - step_a
+      - step_b
+      - step_c
+```
+
+This preserves the original intent of the job as a sequential workflow.
+
+#### Case B: Job has SOME workflows defined
+
+If the job already has one or more workflows defined, but some steps are not included in any of them, create a **separate single-step workflow for each orphaned step** with the same name as the step:
+
+```yaml
+# Existing workflows stay as-is, add new ones for orphans
+workflows:
+  - name: existing_workflow
+    summary: "..."
+    steps: [...]
+
+  # Add for each orphaned step:
+  - name: orphaned_step_name  # Same name as the step
+    summary: "Runs the orphaned_step_name step"
+    steps:
+      - orphaned_step_name
+```
+
+This ensures all steps remain accessible via the MCP interface while preserving the existing workflow structure.
+
+### Step 6: Migrate `outputs` from Array Format to Map Format
+
+The `outputs` field on steps changed from an array of strings/objects to a map with typed entries. Every output must now have a key (identifier), a `type` (`file` or `files`), and a `description`.
+
+**Before (legacy array format):**
+```yaml
+steps:
+  - id: define
+    outputs:
+      - job.yml
+      - steps/
+      - file: report.md
+        doc_spec: .deepwork/doc_specs/report.md
+```
+
+**After (current map format):**
+```yaml
+steps:
+  - id: define
+    outputs:
+      job.yml:
+        type: file
+        description: "The job definition file"
+      step_instruction_files:
+        type: files
+        description: "Instruction Markdown files for each step"
+      report.md:
+        type: file
+        description: "The generated report"
+```
+
+**Migration rules:**
+
+1. **Plain filename strings** (e.g., `- job.yml`, `- output.md`): Use the filename as the key, set `type: file`, add a `description`.
+2. **Directory strings ending in `/`** (e.g., `- steps/`, `- competitor_profiles/`): Choose a descriptive key name (e.g., `step_instruction_files`, `competitor_profiles`), set `type: files`, add a `description`.
+3. **Objects with `doc_spec`** (e.g., `- file: report.md` with `doc_spec: ...`): Drop the `doc_spec` field entirely, use the filename as the key, set `type: file`, add a `description`.
+4. **`description` is required** on every output entry. Write a short sentence describing what the output contains.
+
+**Update `file` inputs that reference renamed outputs:**
+
+When a directory output key changes (e.g., `steps/` becomes `step_instruction_files`), any downstream step with a `file` input referencing the old name must be updated to use the new key.
+
+```yaml
+# Before: input references old directory name
+steps:
+  - id: implement
+    inputs:
+      - file: steps/
+        from_step: define
+
+# After: input uses the new output key
+steps:
+  - id: implement
+    inputs:
+      - file: step_instruction_files
+        from_step: define
+```
+
+### Step 7: Migrate `quality_criteria` to `reviews`
+
+The flat `quality_criteria` field on steps has been replaced by the `reviews` array. Each review specifies `run_each` (what to review) and `quality_criteria` as a map of criterion name to question.
+
+**Before (deprecated):**
+```yaml
+steps:
+  - id: my_step
+    quality_criteria:
+      - "**Complete**: Is the output complete?"
+      - "**Accurate**: Is the data accurate?"
+```
+
+**After (current format):**
+```yaml
+steps:
+  - id: my_step
+    reviews:
+      - run_each: step
+        quality_criteria:
+          "Complete": "Is the output complete?"
+          "Accurate": "Is the data accurate?"
+```
+
+**Migration rules:**
+
+1. **Parse the old format**: Each string typically follows `**Name**: Question` format. Extract the name (bold text) as the map key and the question as the value.
+2. **Choose `run_each`**: Default to `step` (reviews all outputs together). If the step has a single primary output, consider using that output name instead.
+3. **For steps with no quality_criteria**: Use `reviews: []`
+4. **Remove the old field**: Delete the `quality_criteria` array entirely after migration.
+
+### Step 8: Update Version Numbers
+
+If you made significant changes to a job, bump its version number:
+
+```yaml
+# Bump patch version for minor fixes
+version: "1.0.0"  ->  version: "1.0.1"
+
+# Add changelog entry
+changelog:
+  - version: "1.0.1"
+    changes: "Migrated to current DeepWork format; removed deprecated fields"
+```
+
+## Common Issues and Fixes
+
+### Issue: Step references non-existent step in `from_step`
+```
+Error: Step 'implement' has file input from 'review_job_spec' but 'review_job_spec' is not in dependencies
+```
+**Fix:** Update `from_step` to reference a step that still exists.
+
+### Issue: Workflow references non-existent step
+```
+Error: Workflow 'new_job' references non-existent step 'review_job_spec'
+```
+**Fix:** Remove the step from the workflow's `steps` array.
+
+### Issue: Orphaned step warning
+```
+Warning: Job 'my_job' has steps not included in any workflow: standalone_step
+```
+**Fix:**
+- If the job has NO workflows: Create one workflow named `my_job` with all steps in order
+- If the job has SOME workflows: Add a `standalone_step` workflow containing just that step
+
+### Issue: `outputs` is an array instead of an object
+```
+Error: Step 'define' outputs should be an object but got array
+```
+**Fix:** Convert from the legacy array format to the map format. Each array entry becomes a key in the map with `type` (`file` or `files`) and `description`. See Step 6 for detailed migration rules. Also update any `file` inputs in downstream steps if an output key was renamed.
+
+## Jobs to Check
+
+For each job in `.deepwork/jobs/`, check:
+
+| Check | What to Look For |
+|-------|------------------|
+| `exposed` field | Remove from all steps |
+| `stop_hooks` | Migrate to `hooks.after_agent` |
+| `outputs` format | Migrate from array to map with `type` and `description` |
+| `quality_criteria` | Migrate to `reviews` with `run_each` and map-format criteria |
+| Workflow steps | Remove references to deleted steps |
+| Dependencies | Update to valid step IDs |
+| File inputs | Update `from_step` references; update keys for renamed outputs |
+| Version | Bump if changes were made |
+
+## Important Notes
+
+1. **Preserve custom logic** - When migrating hooks, preserve the prompt content
+2. **Test after changes** - Validate YAML syntax after each job fix to catch errors early
diff --git a/.deepwork/jobs/deepwork_jobs/steps/fix_settings.md b/.deepwork/jobs/deepwork_jobs/steps/fix_settings.md
new file mode 100644
index 00000000..d164b69e
--- /dev/null
+++ b/.deepwork/jobs/deepwork_jobs/steps/fix_settings.md
@@ -0,0 +1,177 @@
+# Fix Settings Files
+
+## Objective
+
+Clean up `.claude/settings.json` and related configuration files, removing legacy artifacts from prior DeepWork versions. This step ensures the Claude Code settings are free of deprecated permissions, duplicate hooks, and hardcoded paths.
+
+## Task
+
+Audit and repair the `.claude/settings.json` file, removing gunk accumulated from older DeepWork implementations.
+
+### Step 1: Create Backup
+
+Before making any changes, create a backup:
+
+```bash
+cp .claude/settings.json .claude/settings.json.backup
+```
+
+### Step 2: Inventory DeepWork Jobs
+
+First, get the list of jobs that exist in `.deepwork/jobs/`:
+
+```bash
+ls .deepwork/jobs/
+```
+
+Note these job names - you will use them to identify which `Skill(...)` entries to remove.
+
+### Step 3: Remove DeepWork Skill Permissions
+
+Look for and **remove** `Skill(...)` permission entries that match DeepWork jobs. Only remove entries where the skill name matches a job in `.deepwork/jobs/`.
+
+**What to look for:**
+```json
+"permissions": {
+  "allow": [
+    "Skill(deepwork_jobs)",           // Remove if 'deepwork_jobs' is in .deepwork/jobs/
+    "Skill(deepwork_jobs.define)",    // Remove - matches job_name.step pattern
+    "Skill(competitive_research)",    // Remove if 'competitive_research' is in .deepwork/jobs/
+    "Skill(my_custom_skill)",         // KEEP - not a DeepWork job
+    ...
+  ]
+}
+```
+
+**IMPORTANT:** Only remove skills that:
+- Exactly match a job name in `.deepwork/jobs/` (e.g., `Skill(job_name)`)
+- Match the pattern `job_name.step_name` where `job_name` is in `.deepwork/jobs/`
+
+**DO NOT remove** skills that don't match DeepWork jobs - the user may have created these manually for other purposes.
+
+### Step 4: Remove Duplicate Hooks
+
+Check for duplicate hook entries in the `hooks` section. Prior versions sometimes added the same hook multiple times.
+
+**Example of duplicates to consolidate:**
+```json
+"hooks": {
+  "UserPromptSubmit": [
+    {
+      "matcher": "",
+      "hooks": [{ "type": "command", "command": "some_command" }]
+    },
+    {
+      "matcher": "",
+      "hooks": [{ "type": "command", "command": "some_command" }]  // DUPLICATE
+    }
+  ]
+}
+```
+
+Keep only one instance of each unique hook.
+
+### Step 5: Remove Hardcoded User Paths
+
+Search for and remove any hardcoded paths that reference specific user directories:
+
+**Patterns to find and remove:**
+- `/Users/username/.local/pipx/venvs/deepwork/bin/python`
+- `/home/username/.local/...`
+- Any path containing a specific username
+
+These should either be removed or replaced with relative paths.
+
+### Step 6: Remove DeepWork Rules Hooks (Fully Deprecated)
+
+DeepWork Rules have been completely removed from the system. Remove ALL hooks related to rules:
+
+**Hooks to remove entirely:**
+- Any hook with command `deepwork hook rules_check`
+- Any hook with command containing `rules_check`
+- Any hook referencing `.deepwork/jobs/deepwork_rules/hooks/`
+- Any hook referencing `.deepwork/rules/`
+
+**Also remove these permissions if present:**
+- `Skill(deepwork_rules)`
+- `Skill(deepwork_rules.define)`
+- `Bash(rm -rf .deepwork/tmp/rules/queue/*.json)`
+
+### Step 7: Remove Other Deprecated Commands
+
+Remove hooks referencing other deprecated DeepWork commands:
+
+**Commands to remove:**
+- `deepwork hook *` - The entire hook subcommand is deprecated
+- References to any `.deepwork/jobs/*/hooks/` scripts
+
+### Step 8: Clean Up Empty Sections
+
+If after cleanup any sections are empty, consider removing them:
+
+```json
+// Remove if empty:
+"hooks": {
+  "Stop": []  // Remove this empty array
+}
+```
+
+### Step 9: Validate JSON
+
+After all edits, ensure the file is valid JSON:
+
+```bash
+python -c "import json; json.load(open('.claude/settings.json'))"
+```
+
+If there are syntax errors, fix them before proceeding.
+
+## Example Before/After
+
+### Before (with gunk):
+```json
+{
+  "hooks": {
+    "UserPromptSubmit": [
+      { "matcher": "", "hooks": [{ "type": "command", "command": ".deepwork/jobs/deepwork_rules/hooks/user_prompt_submit.sh" }] },
+      { "matcher": "", "hooks": [{ "type": "command", "command": ".deepwork/jobs/deepwork_rules/hooks/user_prompt_submit.sh" }] }
+    ],
+    "Stop": [
+      { "matcher": "", "hooks": [{ "type": "command", "command": "deepwork hook rules_check" }] }
+    ],
+    "SubagentStop": [
+      { "matcher": "", "hooks": [{ "type": "command", "command": "/Users/tyler/.local/pipx/venvs/deepwork/bin/python -m deepwork.hooks.rules_check" }] }
+    ]
+  },
+  "permissions": {
+    "allow": [
+      "Skill(competitive_research)",
+      "Skill(competitive_research.discover_competitors)",
+      "Skill(deepwork_jobs)",
+      "Skill(deepwork_jobs.define)",
+      "Read(./.deepwork/**)",
+      "WebSearch"
+    ]
+  }
+}
+```
+
+### After (cleaned):
+```json
+{
+  "hooks": {},
+  "permissions": {
+    "allow": [
+      "Read(./.deepwork/**)",
+      "WebSearch"
+    ]
+  }
+}
+```
+
+## Important Notes
+
+1. **Don't remove non-DeepWork permissions** - Keep permissions like `WebSearch`, `Read(...)`, `Bash(...)` that aren't related to old DeepWork skills
+2. **Preserve `make_new_job.sh`** - Keep any `Bash(...)` permission referencing `make_new_job.sh` (e.g., `Bash(.deepwork/jobs/deepwork_jobs/scripts/make_new_job.sh *)`) - this is a current DeepWork script
+3. **Be conservative** - If unsure whether something is legacy, ask the user
+4. **Document changes** - Note what was removed for the final summary
diff --git a/.deepwork/jobs/deepwork_jobs/steps/implement.md b/.deepwork/jobs/deepwork_jobs/steps/implement.md
index 749c8c6f..73eeb365 100644
--- a/.deepwork/jobs/deepwork_jobs/steps/implement.md
+++ b/.deepwork/jobs/deepwork_jobs/steps/implement.md
@@ -2,37 +2,16 @@
 
 ## Objective
 
-Generate the DeepWork job directory structure and instruction files for each step based on the validated `job.yml` specification from the review_job_spec step.
+Generate step instruction files for each step based on the `job.yml` specification from the define step.
 
 ## Task
 
-Read the `job.yml` specification file and create all the necessary files to make the job functional, including directory structure and step instruction files. Then sync the commands to make them available.
+Read the `job.yml` specification file created by the define step and generate comprehensive instruction files for each step. The define step has already created the job directory structure.
 
-### Step 1: Create Directory Structure Using Script
-
-Run the `make_new_job.sh` script to create the standard directory structure:
-
-```bash
-.deepwork/jobs/deepwork_jobs/make_new_job.sh [job_name]
-```
-
-This creates:
-- `.deepwork/jobs/[job_name]/` - Main job directory
-- `.deepwork/jobs/[job_name]/steps/` - Step instruction files
-- `.deepwork/jobs/[job_name]/hooks/` - Custom validation scripts (with .gitkeep)
-- `.deepwork/jobs/[job_name]/templates/` - Example file formats (with .gitkeep)
-- `.deepwork/jobs/[job_name]/AGENTS.md` - Job management guidance
-
-**Note**: If the directory already exists (e.g., job.yml was created by define step), you can skip this step or manually create the additional directories:
-```bash
-mkdir -p .deepwork/jobs/[job_name]/hooks .deepwork/jobs/[job_name]/templates
-touch .deepwork/jobs/[job_name]/hooks/.gitkeep .deepwork/jobs/[job_name]/templates/.gitkeep
-```
-
-### Step 2: Read and Validate the Specification
+### Step 1: Read and Validate the Specification
 
 1. **Locate the job.yml file**
-   - Read `.deepwork/jobs/[job_name]/job.yml` from the review_job_spec step
+   - Read `.deepwork/jobs/[job_name]/job.yml` from the define step
    - Parse the YAML content
 
 2. **Validate the specification**
@@ -46,7 +25,7 @@ touch .deepwork/jobs/[job_name]/hooks/.gitkeep .deepwork/jobs/[job_name]/templat
    - List of all steps with their details
    - Understand the workflow structure
 
-### Step 3: Generate Step Instruction Files
+### Step 2: Generate Step Instruction Files
 
 For each step in the job.yml, create a comprehensive instruction file at `.deepwork/jobs/[job_name]/steps/[step_id].md`.
 
@@ -65,30 +44,31 @@ For each step in the job.yml, create a comprehensive instruction file at `.deepw
 
 1. **Use the job description** - The detailed description from job.yml provides crucial context
 2. **Be specific** - Don't write generic instructions; tailor them to the step's purpose
-3. **Provide examples** - Show what good output looks like
+3. **Provide output format examples** - Include a markdown code block in an "Output Format" section showing the expected file structure. A template with `[bracket placeholders]` is acceptable. For complex outputs, also include a concrete filled-in example showing realistic data — this is especially valuable for the first step in a workflow where there's no prior output to reference.
 4. **Explain the "why"** - Help the user understand the step's role in the workflow
 5. **Quality over quantity** - Detailed, actionable instructions are better than vague ones
-6. **Align with stop hooks** - If the step has `stop_hooks` defined, ensure the quality criteria in the instruction file match the validation criteria in the hooks
-7. **Ask structured questions** - When a step has user inputs, the instructions MUST explicitly tell the agent to "ask structured questions" using the AskUserQuestion tool to gather that information. Never use generic phrasing like "ask the user" - always use "ask structured questions"
+6. **Align with reviews** - If the step has `reviews` defined, ensure the quality criteria in the instruction file match the review criteria
+7. **Ask structured questions (when applicable)** - When a step has user-provided inputs (name/description inputs in job.yml), the instructions MUST explicitly tell the agent to "ask structured questions" using the AskUserQuestion tool. Steps that only have file inputs from prior steps do NOT need this phrase — they process data without user interaction.
+8. **Handle edge cases** - If inputs might be missing, ambiguous, or incomplete, tell the agent to ask structured questions to clarify how to proceed rather than guessing
 
-### Handling Stop Hooks
+### Handling Reviews
 
-If a step in the job.yml has `stop_hooks` defined, the generated instruction file should:
+If a step in the job.yml has `reviews` defined, the generated instruction file should:
 
-1. **Mirror the quality criteria** - The "Quality Criteria" section should match what the stop hooks will validate
+1. **Mirror the quality criteria** - The "Quality Criteria" section should match what the reviews will validate
 2. **Be explicit about success** - Help the agent understand when the step is truly complete
-3. **Include the promise pattern** - Mention that `<promise>✓ Quality Criteria Met</promise>` should be included when criteria are met
+3. **Explain what's reviewed** - If reviews target specific outputs (via `run_each`), mention which outputs will be reviewed
 
 **Example: If the job.yml has:**
 ```yaml
 - id: research_competitors
   name: "Research Competitors"
-  stop_hooks:
-    - prompt: |
-        Verify the research meets criteria:
-        1. Each competitor has at least 3 data points
-        2. Sources are cited
-        3. Information is current (within last year)
+  reviews:
+    - run_each: research_notes.md
+      quality_criteria:
+        "Sufficient Data": "Does each competitor have at least 3 data points?"
+        "Sources Cited": "Are sources cited for key claims?"
+        "Current Information": "Is the information current (within last year)?"
 ```
 
 **The instruction file should include:**
@@ -98,7 +78,6 @@ If a step in the job.yml has `stop_hooks` defined, the generated instruction fil
 - Each competitor has at least 3 distinct data points
 - All information is sourced with citations
 - Data is current (from within the last year)
-- When all criteria are met, include `<promise>✓ Quality Criteria Met</promise>` in your response
 ```
 
 This alignment ensures the AI agent knows exactly what will be validated and can self-check before completing.
@@ -109,82 +88,11 @@ Step instructions can include additional `.md` files in the `steps/` directory f
 
 See `.deepwork/jobs/deepwork_jobs/steps/supplemental_file_references.md` for detailed documentation and examples.
 
-### Step 4: Verify job.yml Location
-
-Verify that `job.yml` is in the correct location at `.deepwork/jobs/[job_name]/job.yml`. The define and review_job_spec steps should have created and validated it. If for some reason it's not there, you may need to create or move it.
-
-### Step 5: Sync Skills
-
-Run `deepwork sync` to generate the skills for this job:
-
-```bash
-deepwork sync
-```
-
-This will:
-- Parse the job definition
-- Generate skills for each step
-- Make the skills available in `.claude/skills/` (or appropriate platform directory)
-
-### Step 6: Consider Rules for the New Job
-
-After implementing the job, consider whether there are **rules** that would help enforce quality or consistency when working with this job's domain.
-
-**What are rules?**
-
-Rules are automated guardrails stored as markdown files in `.deepwork/rules/` that trigger when certain files change during an AI session. They help ensure:
-- Documentation stays in sync with code
-- Team guidelines are followed
-- Architectural decisions are respected
-- Quality standards are maintained
-
-**When to suggest rules:**
+### Step 3: Verify Files
 
-Think about the job you just implemented and ask:
-- Does this job produce outputs that other files depend on?
-- Are there documentation files that should be updated when this job's outputs change?
-- Are there quality checks or reviews that should happen when certain files in this domain change?
-- Could changes to the job's output files impact other parts of the project?
-
-**Examples of rules that might make sense:**
-
-| Job Type | Potential Rule |
-|----------|----------------|
-| API Design | "Update API docs when endpoint definitions change" |
-| Database Schema | "Review migrations when schema files change" |
-| Competitive Research | "Update strategy docs when competitor analysis changes" |
-| Feature Development | "Update changelog when feature files change" |
-| Configuration Management | "Update install guide when config files change" |
-
-**How to offer rule creation:**
-
-If you identify one or more rules that would benefit the user, explain:
-1. **What the rule would do** - What triggers it and what action it prompts
-2. **Why it would help** - How it prevents common mistakes or keeps things in sync
-3. **What files it would watch** - The trigger patterns
-
-Then ask the user:
-
-> "Would you like me to create this rule for you? I can run `/deepwork_rules.define` to set it up."
-
-If the user agrees, invoke the `/deepwork_rules.define` command to guide them through creating the rule.
-
-**Example dialogue:**
-
-```
-Based on the competitive_research job you just created, I noticed that when
-competitor analysis files change, it would be helpful to remind you to update
-your strategy documentation.
-
-I'd suggest a rule like:
-- **Name**: "Update strategy when competitor analysis changes"
-- **Trigger**: `**/positioning_report.md`
-- **Action**: Prompt to review and update `docs/strategy.md`
-
-Would you like me to create this rule? I can run `/deepwork_rules.define` to set it up.
-```
-
-**Note:** Not every job needs rules. Only suggest them when they would genuinely help maintain consistency or quality. Don't force rules where they don't make sense.
+Verify that all files are in their correct locations:
+- `job.yml` at `.deepwork/jobs/[job_name]/job.yml` (created by define step)
+- Step instruction files at `.deepwork/jobs/[job_name]/steps/[step_id].md`
 
 ## Example Implementation
 
@@ -201,33 +109,13 @@ For a complete worked example showing a job.yml and corresponding step instructi
 5. **Use context** - The job description provides valuable context for each step
 6. **Be specific** - Tailor instructions to the specific step, not generic advice
 
-## Validation Before Sync
-
-Before running `deepwork sync`, verify:
-- All directories exist
-- `job.yml` is in place
-- All step instruction files exist (one per step)
-- No file system errors
-
 ## Completion Checklist
 
 Before marking this step complete, ensure:
-- [ ] job.yml validated and copied to job directory
+- [ ] job.yml validated and in job directory
 - [ ] All step instruction files created
 - [ ] Each instruction file is complete and actionable
-- [ ] `deepwork sync` executed successfully
-- [ ] Skills generated in platform directory
-- [ ] Considered whether rules would benefit this job (Step 6)
-- [ ] If rules suggested, offered to run `/deepwork_rules.define`
 
-## Quality Criteria
+## Note: Workflow Availability
 
-- Job directory structure is correct
-- All instruction files are complete (not stubs)
-- Instructions are specific and actionable
-- Output examples are provided in each instruction file
-- Quality criteria defined for each step
-- Steps with user inputs explicitly use "ask structured questions" phrasing
-- Sync completed successfully
-- Skills available for use
-- Thoughtfully considered relevant rules for the job domain
+Once the job.yml and step instruction files are created, the workflow is immediately available through the DeepWork MCP server. The MCP server reads job definitions directly from `.deepwork/jobs/` - no separate sync or installation step is required.
\ No newline at end of file
diff --git a/.deepwork/jobs/deepwork_jobs/steps/iterate.md b/.deepwork/jobs/deepwork_jobs/steps/iterate.md
new file mode 100644
index 00000000..73dcb589
--- /dev/null
+++ b/.deepwork/jobs/deepwork_jobs/steps/iterate.md
@@ -0,0 +1,226 @@
+# Iterate on Workflow Design
+
+## Objective
+
+Review the test run conversation and improve the job definition based on what happened. This step closes the feedback loop by incorporating learnings from the test into the workflow itself, making future runs more efficient and producing better results.
+
+## Task
+
+Analyze the conversation history from the test step, identify areas for improvement, and update the job definition and step instructions accordingly.
+
+### Step 1: Review the Conversation History
+
+Carefully analyze the conversation from the test step, looking for:
+
+1. **Process Inefficiencies**
+   - Steps that took multiple attempts to complete
+   - Questions the agent had to ask that should have been in the instructions
+   - Unnecessary back-and-forth with the user
+   - Information that had to be repeated
+
+2. **Output Quality Issues**
+   - Issues identified during critique (from Step 3 of test)
+   - Corrections requested by the user
+   - Patterns in user feedback (what did they consistently want changed?)
+
+3. **Tool Usage Problems**
+   - Tools that didn't work as expected
+   - Missing tools that would have helped
+   - Inefficient tool sequences
+
+4. **Missing or Unclear Instructions**
+   - Ambiguities that led to wrong outputs
+   - Missing guidance that caused confusion
+   - Quality criteria that weren't clear enough
+
+### Step 2: Plan Improvements
+
+For each issue identified, determine the appropriate fix:
+
+| Issue Type | Solution Location |
+|------------|-------------------|
+| Process inefficiency | Update step instructions with clearer guidance |
+| Output quality | Update quality criteria or add examples |
+| Missing information | Add to step inputs or instructions |
+| Tool problems | Suggest different tools in instructions |
+| Unclear criteria | Rewrite quality criteria to be specific |
+
+**Prioritize improvements** that will have the most impact on future runs. Focus on:
+- Issues that caused multiple iterations
+- Problems that affected the final output quality
+- Confusion that could be eliminated with clearer instructions
+
+### Step 3: Update Step Instructions
+
+For each step that needs improvement:
+
+1. **Read the current instruction file** at `.deepwork/jobs/[job_name]/steps/[step_id].md`
+
+2. **Make targeted improvements**:
+   - Add missing context or clarification
+   - Include examples of good output (use what worked in the test)
+   - Clarify ambiguous instructions
+   - Add tool recommendations if a different approach would be better
+   - Update quality criteria to match user expectations
+
+3. **Keep instructions concise**:
+   - Avoid redundancy
+   - Be direct and actionable
+   - Use bullet points where appropriate
+
+### Step 4: Update Reviews
+
+Review and update quality reviews in two places:
+
+1. **In step instruction files** - The "Quality Criteria" section should reflect what the user actually cared about during testing
+
+2. **In job.yml** - Update the `reviews` array on each step to:
+   - Remove criteria that weren't relevant
+   - Add criteria based on user feedback
+   - Make existing criteria more specific
+   - Adjust `run_each` targeting if outputs should be reviewed differently
+
+**Example improvement:**
+```yaml
+# Before
+reviews:
+  - run_each: step
+    quality_criteria:
+      "Formatted Correctly": "Is the report formatted correctly?"
+
+# After
+reviews:
+  - run_each: report.md
+    quality_criteria:
+      "Distinct Colors": "Does the report use distinct colors for each data series in charts?"
+      "Readable Tables": "Do tables have sufficient padding and font size for readability?"
+      "Clear Summary": "Is the executive summary understandable by non-technical readers?"
+```
+
+### Step 5: Consider Alternative Tools
+
+If any tools didn't work well during the test:
+
+1. **Identify the problem** - What went wrong? (slow, wrong output, hard to use)
+
+2. **Research alternatives** - What other tools could accomplish the same goal?
+
+3. **Update instructions** - If a better tool exists, update the step instructions to recommend it
+
+Examples:
+- If web scraping was unreliable, suggest a specific browser automation approach
+- If data processing was slow, suggest a different method or tool
+- If file generation had issues, recommend a different library or format
+
+### Step 6: Update Job Version and Changelog
+
+After making improvements:
+
+1. **Bump the version** in `job.yml`:
+   - Patch version (x.x.1) for minor instruction tweaks
+   - Minor version (x.1.0) for quality criteria changes or significant improvements
+
+2. **Add a changelog entry** describing what was improved:
+   ```yaml
+   changelog:
+     - version: "1.0.1"
+       changes: "Improved [step] instructions based on test run: added examples, clarified criteria, updated tool recommendations"
+   ```
+
+### Step 7: Provide Recap
+
+Summarize the improvements made:
+
+1. **List the issues identified** from the test run
+
+2. **Describe the changes made** to address each issue
+
+3. **Explain the expected impact** on future runs
+
+Example recap format:
+```
+## Iteration Summary
+
+### Issues Identified
+1. Chart colors were hard to distinguish
+2. Tables were too small to read
+3. Executive summary was too technical
+
+### Changes Made
+
+**gather_data step:**
+- Added instruction to use distinct color palettes for charts
+- Updated quality criteria to require readable table formatting
+
+**generate_report step:**
+- Added example of good executive summary
+- Updated quality criteria: "Summary understandable by non-technical stakeholders"
+
+**job.yml:**
+- Updated version to 1.0.1
+- Added changelog entry
+
+### Expected Impact
+Future runs should produce reports with better visual design and clearer summaries, reducing the need for post-generation corrections.
+```
+
+## Example Dialog
+
+```
+Claude: Let me review our test run and improve the workflow based on what happened.
+
+**Analyzing Test Conversation...**
+
+I identified the following issues from the test run:
+
+### Issues Found
+
+1. **Chart readability (gather_data step)**: The instructions didn't specify how to handle color choices for visualizations. During the test, we had to fix colors that were too similar.
+
+2. **Missing month-over-month comparison (analyze_trends step)**: The user wanted comparisons to previous periods, but this wasn't in the instructions. We had to add it after the fact.
+
+3. **Technical executive summary (generate_report step)**: The summary was too technical for non-technical stakeholders. The quality criteria didn't specify the audience clearly.
+
+4. **Table formatting (generate_report step)**: Tables were hard to read. No guidance on sizing or padding.
+
+### Changes Made
+
+**`.deepwork/jobs/monthly_report/steps/gather_data.md`:**
+- Added: "Use distinct, high-contrast colors for chart data series. Avoid similar shades for different categories."
+- Added to Quality Criteria: "Charts use visually distinct colors that are accessible to colorblind readers"
+
+**`.deepwork/jobs/monthly_report/steps/analyze_trends.md`:**
+- Added: "Always include comparison to the same period in the previous month/quarter"
+- Added input: "previous_period_data" to formalize this requirement
+
+**`.deepwork/jobs/monthly_report/steps/generate_report.md`:**
+- Added example of good executive summary targeting non-technical readers
+- Added: "Format tables with sufficient padding (at least 8px) and readable font size (minimum 11pt)"
+- Updated Quality Criteria:
+  - "Executive summary is understandable by non-technical stakeholders"
+  - "Tables are readable with adequate padding and font size"
+
+**`.deepwork/jobs/monthly_report/job.yml`:**
+- Version bumped to 1.0.1
+- Added changelog: "Improved visual design guidance and audience-appropriate writing based on test feedback"
+
+Done! The workflow has been updated. Changes are immediately available through the MCP server.
+
+### Expected Impact
+
+Future monthly reports should:
+- Have better chart color choices from the start
+- Include month-over-month comparisons automatically
+- Have clearer executive summaries for leadership
+- Have more readable tables
+
+This should significantly reduce the number of corrections needed after the initial output.
+```
+
+## Important Guidelines
+
+1. **Be specific** - Don't just note "instructions were unclear" - explain exactly what was unclear and how you fixed it
+2. **Use test results as examples** - If something worked well in the test, add it as an example in the instructions
+3. **Don't over-engineer** - Only add improvements that address actual problems from the test
+4. **Maintain consistency** - Keep the same structure and style in instruction files
+5. **Test-driven improvement** - Every change should trace back to something that happened in the test
diff --git a/.deepwork/jobs/deepwork_jobs/steps/learn.md b/.deepwork/jobs/deepwork_jobs/steps/learn.md
index bfb393a5..254a332d 100644
--- a/.deepwork/jobs/deepwork_jobs/steps/learn.md
+++ b/.deepwork/jobs/deepwork_jobs/steps/learn.md
@@ -66,15 +66,6 @@ For each learning identified, determine if it is:
   - "Quality criteria should include checking for Y"
   - "Add example of correct output format"
 
-**doc spec-Related** (should improve doc spec files):
-- Improvements to document quality criteria
-- Changes to document structure or format
-- Updated audience or frequency information
-- Examples:
-  - "The report should include a summary table"
-  - "Quality criterion 'Visualization' needs clearer requirements"
-  - "Documents need a section for action items"
-
 **Bespoke** (should go in AGENTS.md):
 - Specific to THIS project/codebase/run
 - Depends on local conventions or structure
@@ -85,30 +76,6 @@ For each learning identified, determine if it is:
   - "This project uses camelCase for function names"
   - "The main config file is at `config/settings.yml`"
 
-### Step 3.5: Identify doc spec-Related Learnings
-
-Review the conversation for doc spec-related improvements:
-
-1. **Quality Criteria Changes**
-   - Were any quality criteria unclear or insufficient?
-   - Did the agent repeatedly fail certain criteria?
-   - Are there new criteria that should be added?
-
-2. **Document Structure Changes**
-   - Did the user request different sections?
-   - Were parts of the document format confusing?
-   - Should the example document be updated?
-
-3. **Metadata Updates**
-   - Has the target audience changed?
-   - Should frequency or path patterns be updated?
-
-**Signals for doc spec improvements:**
-- User asked for changes to document format
-- Repeated validation failures on specific criteria
-- Feedback about missing sections or information
-- Changes to how documents are organized/stored
-
 ### Step 4: Update Job Instructions (Generalizable Learnings)
 
 For each generalizable learning:
@@ -121,6 +88,7 @@ For each generalizable learning:
    - Include helpful examples
    - Clarify ambiguous instructions
    - Update quality criteria if needed
+   - If you identify problems in the outcomes of steps, those usually should be reflected in an update to the `reviews` for that step in `job.yml` (adjusting criteria names, questions, or `run_each` targeting)
 
 3. **Keep instructions concise**
    - Avoid redundancy - don't repeat the same guidance in multiple places
@@ -162,41 +130,6 @@ Review all instruction files for the job and identify content that:
    - Shorter instruction files - easier to read and maintain
    - Consistent guidance across steps
 
-### Step 4.5: Update doc spec Files (doc spec-Related Learnings)
-
-If doc spec-related learnings were identified:
-
-1. **Locate the doc spec file**
-   - Find doc spec references in job.yml outputs (look for `doc_spec: .deepwork/doc_specs/[doc_spec_name].md`)
-   - doc spec files are at `.deepwork/doc_specs/[doc_spec_name].md`
-
-2. **Update quality_criteria array**
-   - Add new criteria with name and description
-   - Modify existing criteria descriptions for clarity
-   - Remove criteria that are no longer relevant
-
-3. **Update example document**
-   - Modify the markdown body to reflect structure changes
-   - Ensure the example matches updated criteria
-
-4. **Update metadata as needed**
-   - target_audience: If audience has changed
-   - frequency: If production cadence has changed
-   - path_patterns: If storage location has changed
-
-**Example doc spec update:**
-```yaml
-# Before
-quality_criteria:
-  - name: Visualization
-    description: Include charts
-
-# After
-quality_criteria:
-  - name: Visualization
-    description: Include Mermaid.js charts showing spend breakdown by service and month-over-month trend
-```
-
 ### Step 5: Create/Update AGENTS.md (Bespoke Learnings)
 
 The AGENTS.md file captures project-specific knowledge that helps future agent runs.
@@ -233,13 +166,6 @@ If instruction files were modified:
      changes: "Improved [step] instructions based on execution learnings: [brief description]"
    ```
 
-### Step 7: Sync Skills
-
-**Run deepwork sync** (if instructions were modified)
-```bash
-deepwork sync
-```
-
 ## File Reference Patterns
 
 When adding entries to AGENTS.md, prefer these patterns:
@@ -258,23 +184,10 @@ When adding entries to AGENTS.md, prefer these patterns:
 - Configuration requires these fields: name, version, ...
 ```
 
-## Quality Criteria
-
-- Conversation has been analyzed for job executions
-- Points of confusion and inefficiency are identified
-- Learnings are correctly classified (generalizable vs bespoke)
-- Job instructions updated for generalizable improvements
-- Instructions are concise - no redundancy or unnecessary verbosity
-- Shared/lengthy content extracted into referenced files where appropriate
-- AGENTS.md created/updated with bespoke learnings
-- File references used instead of duplicating content
-- AGENTS.md is in the correct folder (the deepest common folder for the topic)
-- When all criteria are met, include `<promise>✓ Quality Criteria Met</promise>`
-
 ## Example Dialog
 
 ```
-User: /deepwork_jobs.learn
+User: [Invokes the learn workflow via MCP]
 
 Claude: I'll analyze this conversation for learnings from DeepWork job executions.
 
@@ -324,8 +237,6 @@ I found the following job executions:
 
 3. Updated job.yml version to 1.0.1 with changelog entry
 
-4. Ran `deepwork sync`
-
 **Summary**
 
 Updated job instructions and created AGENTS.md with bespoke learnings.
diff --git a/.deepwork/jobs/deepwork_jobs/steps/review_job_spec.md b/.deepwork/jobs/deepwork_jobs/steps/review_job_spec.md
deleted file mode 100644
index fcc0ae9c..00000000
--- a/.deepwork/jobs/deepwork_jobs/steps/review_job_spec.md
+++ /dev/null
@@ -1,208 +0,0 @@
-# Review Job Specification
-
-## Objective
-
-Review the `job.yml` created in the define step against the doc spec quality criteria using a sub-agent for unbiased evaluation, then iterate on fixes until all criteria pass.
-
-## Why This Step Exists
-
-The define step focuses on understanding user requirements and creating a job specification. This review step ensures the specification meets quality standards before implementation. Using a sub-agent provides an unbiased "fresh eyes" review that catches issues the main agent might miss after being deeply involved in the definition process.
-
-## Task
-
-Use a sub-agent to review the job.yml against all 9 doc spec quality criteria, then fix any failed criteria. Repeat until all criteria pass.
-
-### Step 1: Read the Job Specification
-
-Read the `job.yml` file created in the define step:
-
-```
-.deepwork/jobs/[job_name]/job.yml
-```
-
-Also read the doc spec for reference:
-
-```
-.deepwork/doc_specs/job_spec.md
-```
-
-### Step 2: Spawn Review Sub-Agent
-
-Use the Task tool to spawn a sub-agent that will provide an unbiased review:
-
-```
-Task tool parameters:
-- subagent_type: "general-purpose"
-- model: "haiku"
-- description: "Review job.yml against doc spec"
-- prompt: [see below]
-```
-
-**Sub-agent prompt template:**
-
-```
-Review this job.yml against the following 9 quality criteria from the doc spec.
-
-For each criterion, respond with:
-- PASS or FAIL
-- If FAIL: specific issue and suggested fix
-
-## job.yml Content
-
-[paste the full job.yml content here]
-
-## Quality Criteria
-
-1. **Valid Identifier**: Job name must be lowercase with underscores, no spaces or special characters (e.g., `competitive_research`, `monthly_report`)
-
-2. **Semantic Version**: Version must follow semantic versioning format X.Y.Z (e.g., `1.0.0`, `2.1.3`)
-
-3. **Concise Summary**: Summary must be under 200 characters and clearly describe what the job accomplishes
-
-4. **Rich Description**: Description must be multi-line and explain: the problem solved, the process, expected outcomes, and target users
-
-5. **Changelog Present**: Must include a changelog array with at least the initial version entry
-
-6. **Complete Steps**: Each step must have: id (lowercase_underscores), name, description, instructions_file, outputs (at least one), and dependencies array
-
-7. **Valid Dependencies**: Dependencies must reference existing step IDs with no circular references
-
-8. **Input Consistency**: File inputs with `from_step` must reference a step that is in the dependencies array
-
-9. **Output Paths**: Outputs must be valid filenames or paths (e.g., `report.md` or `reports/analysis.md`)
-
-## Response Format
-
-Respond with a structured evaluation:
-
-### Overall: [X/9 PASS]
-
-### Criterion Results
-
-1. Valid Identifier: [PASS/FAIL]
-   [If FAIL: Issue and fix]
-
-2. Semantic Version: [PASS/FAIL]
-   [If FAIL: Issue and fix]
-
-[... continue for all 9 criteria ...]
-
-### Summary of Required Fixes
-
-[List any fixes needed, or "No fixes required - all criteria pass"]
-```
-
-### Step 3: Review Sub-Agent Findings
-
-Parse the sub-agent's response:
-
-1. **Count passing criteria** - How many of the 9 criteria passed?
-2. **Identify failures** - List specific criteria that failed
-3. **Note suggested fixes** - What changes does the sub-agent recommend?
-
-### Step 4: Fix Failed Criteria
-
-For each failed criterion, edit the job.yml to address the issue:
-
-**Common fixes by criterion:**
-
-| Criterion | Common Issue | Fix |
-|-----------|-------------|-----|
-| Valid Identifier | Spaces or uppercase | Convert to lowercase_underscores |
-| Semantic Version | Missing or invalid format | Set to `"1.0.0"` or fix format |
-| Concise Summary | Too long or vague | Shorten to <200 chars, be specific |
-| Rich Description | Single line or missing context | Add multi-line explanation with problem/process/outcome/users |
-| Changelog Present | Missing changelog | Add `changelog:` with initial version entry |
-| Complete Steps | Missing required fields | Add id, name, description, instructions_file, outputs, dependencies |
-| Valid Dependencies | Non-existent step or circular | Fix step ID reference or reorder dependencies |
-| Input Consistency | from_step not in dependencies | Add the referenced step to dependencies array |
-| Output Paths | Invalid characters or format | Use valid filename/path format |
-
-### Step 5: Re-Run Review (If Needed)
-
-If any criteria failed:
-
-1. **Spawn a new sub-agent** with the updated job.yml content
-2. **Review the new findings**
-3. **Fix any remaining issues**
-4. **Repeat until all 9 criteria pass**
-
-### Step 6: Confirm Completion
-
-When all 9 criteria pass:
-
-1. **Announce success**: "All 9 doc spec quality criteria pass."
-2. **List what was validated**:
-   - Valid Identifier
-   - Semantic Version
-   - Concise Summary
-   - Rich Description
-   - Changelog Present
-   - Complete Steps
-   - Valid Dependencies
-   - Input Consistency
-   - Output Paths
-3. **Include the promise**: `<promise>Quality Criteria Met</promise>`
-4. **Guide to next step**: "Run `/deepwork_jobs.implement` to generate the step instruction files."
-
-## Example Review Cycle
-
-```
-Claude: Let me review the job.yml against the doc spec quality criteria.
-
-[Spawns sub-agent with haiku model]
-
-Sub-agent response:
-### Overall: 7/9 PASS
-
-### Criterion Results
-1. Valid Identifier: PASS
-2. Semantic Version: PASS
-3. Concise Summary: FAIL
-   Issue: Summary is 245 characters, exceeds 200 char limit
-   Fix: Shorten to "Competitive analysis workflow for product positioning"
-4. Rich Description: PASS
-5. Changelog Present: FAIL
-   Issue: No changelog array present
-   Fix: Add changelog with initial version entry
-6. Complete Steps: PASS
-7. Valid Dependencies: PASS
-8. Input Consistency: PASS
-9. Output Paths: PASS
-
-### Summary of Required Fixes
-1. Shorten summary to under 200 characters
-2. Add changelog array with initial version
-
-Claude: I found 2 issues. Let me fix them.
-
-[Edits job.yml to shorten summary and add changelog]
-
-Claude: Let me verify the fixes with another review.
-
-[Spawns new sub-agent]
-
-Sub-agent response:
-### Overall: 9/9 PASS
-
-### Summary of Required Fixes
-No fixes required - all criteria pass
-
-Claude: All 9 doc spec quality criteria pass.
-
-<promise>Quality Criteria Met</promise>
-
-**Next step:** Run `/deepwork_jobs.implement` to generate the step instruction files.
-```
-
-## Quality Criteria
-
-- **Sub-Agent Used**: A sub-agent was spawned to provide unbiased review (not just self-review)
-- **All doc spec Criteria Evaluated**: The sub-agent assessed all 9 quality criteria from the doc spec
-- **Findings Addressed**: All failed criteria were fixed by the main agent
-- **Validation Loop Complete**: The review-fix cycle continued until all criteria passed
-- **Promise Included**: The response includes `<promise>Quality Criteria Met</promise>` when complete
-
-## Output
-
-The validated `job.yml` file at `.deepwork/jobs/[job_name]/job.yml` that passes all 9 doc spec quality criteria.
diff --git a/.deepwork/jobs/deepwork_jobs/steps/test.md b/.deepwork/jobs/deepwork_jobs/steps/test.md
new file mode 100644
index 00000000..36d27128
--- /dev/null
+++ b/.deepwork/jobs/deepwork_jobs/steps/test.md
@@ -0,0 +1,154 @@
+# Test the New Workflow
+
+## Objective
+
+Run the newly created workflow on a real use case chosen by the user, critique the output, and iterate until the user is satisfied with the results. This step validates that the workflow works as intended before finalizing it.
+
+## Task
+
+Guide the user through testing their new workflow by running it on a real example, then critically evaluating the output and refining it based on user feedback.
+
+### Step 1: Announce Readiness and Gather Test Case
+
+The workflow is now implemented and ready to test. Use the AskUserQuestion tool to:
+
+1. **Inform the user** that the workflow is ready for a test run
+2. **Ask what they'd like to test it on** - Get a specific, real use case
+
+Example question to ask:
+```
+Your new workflow is ready to try out! What would you like to use it on for the first test run?
+
+Please describe a specific case you want to run through the workflow - ideally something you actually need done, so we can validate the workflow produces useful results.
+```
+
+**Important**: Get a concrete, specific test case. Vague responses like "just test it" should be followed up with clarifying questions to understand what inputs/context the workflow needs.
+
+### Step 2: Prepare and Run the Workflow
+
+1. **Prepare clean context** - Before invoking the workflow, consider compacting the conversation history (e.g., using `/compact` in Claude Code) to ensure the workflow starts with clean context focused on the test case.
+
+2. **Start the new workflow** - Use `start_workflow` through the DeepWork MCP server with the job name and workflow name to begin executing the workflow.
+
+3. **Complete the full workflow** - Continue through all steps of the workflow until it produces its final output. Use `finished_step` to progress through each step.
+
+4. **Note any issues during execution** - Pay attention to:
+   - Confusion or ambiguity in instructions
+   - Missing information that had to be asked for
+   - Steps that took longer than expected
+   - Awkward tool usage or process flow
+
+### Step 3: Critique the Output
+
+After the workflow completes, perform a self-critique of the output:
+
+1. **Review the final deliverable** - Read through all outputs produced by the workflow
+
+2. **Identify up to 3 top issues** - Look for problems such as:
+   - Missing information or sections
+   - Formatting issues (layout, structure, readability)
+   - Quality problems (vague content, errors, inconsistencies)
+   - Misalignment with what the user likely wanted
+   - Technical issues (broken links, malformed data, etc.)
+
+3. **Present each issue to the user** with a specific question asking if they want it fixed. For example:
+   ```
+   I noticed a few things we could improve:
+
+   1. **Text overlap**: The PDF has some text overlapping images in a few places - shall I correct that?
+
+   2. **Missing summary**: The report doesn't have an executive summary at the top - would you like me to add one?
+
+   3. **Data formatting**: The numbers aren't consistently formatted (some have commas, some don't) - should I standardize them?
+   ```
+
+4. **Make corrections** for any issues the user confirms they want fixed
+
+### Step 4: Gather Additional Feedback
+
+After addressing the identified issues:
+
+1. **Ask if the user has more comments** on the output:
+   ```
+   I've made those corrections. Is there anything else you'd like me to change about the output?
+   ```
+
+2. **Iterate on feedback** - Keep making corrections based on user feedback until they indicate they're satisfied
+
+3. **Confirm completion** - When the user says the output is good, confirm that testing is complete
+
+### Loop Behavior
+
+The feedback loop should continue until the user explicitly indicates satisfaction. Look for signals like:
+- "Looks good"
+- "That's fine"
+- "I'm happy with it"
+- "No more changes"
+- "Perfect"
+
+If the user provides more feedback, address it and ask again. Don't assume satisfaction without explicit confirmation.
+
+## Example Dialog
+
+```
+Claude: Your new workflow is ready to try out!
+
+The `monthly_report` workflow has been implemented with these steps:
+1. gather_data - Collects metrics from various sources
+2. analyze_trends - Identifies patterns and insights
+3. generate_report - Creates the final PDF report
+
+What would you like to use it on for the first test run? Please describe a specific report you need - for example, which month's data and any particular focus areas.
+
+User: Let's do the January 2026 engineering report, focusing on deployment frequency and incident response times.
+
+Claude: Great, let me run the workflow on your January 2026 engineering report.
+
+[Starting the monthly_report workflow via MCP...]
+
+[... workflow runs through all steps ...]
+
+Claude: The January 2026 Engineering Report has been generated. Let me review it for any issues.
+
+I noticed a few things we could improve:
+
+1. **Chart readability**: The deployment frequency chart uses very similar colors for different teams - shall I change to a more distinct color palette?
+
+2. **Missing context**: The incident response times don't include comparison to the previous month - would you like me to add month-over-month comparison?
+
+3. **Executive summary**: The summary is quite technical - should I add a simpler high-level overview for non-technical stakeholders?
+
+User: Yes to all three, please.
+
+Claude: Done! I've updated the report with:
+- Distinct color palette for the deployment chart
+- Month-over-month comparison for incident response times
+- Added a non-technical executive summary
+
+Is there anything else you'd like me to change about the report?
+
+User: Can you make the tables larger? They're a bit hard to read.
+
+Claude: I've increased the table sizes for better readability. The font is now larger and there's more padding around cells.
+
+Is there anything else you'd like me to adjust?
+
+User: No, that looks great!
+
+Claude: Excellent! The test run is complete. The workflow successfully generated your January 2026 Engineering Report with all the refinements applied.
+
+**Test Summary:**
+- Workflow ran successfully through all 3 steps
+- Output required adjustments to chart colors, comparisons, and table sizing
+- User is satisfied with the final result
+
+Ready to proceed to the iterate step to improve the workflow based on what we learned.
+```
+
+## Important Guidelines
+
+1. **Use real test cases** - Testing with actual data/needs validates the workflow better than hypothetical examples
+2. **Be specific in critiques** - Don't say "formatting issues" - say exactly what's wrong and where
+3. **Limit initial critique to 3 issues** - Too many issues at once is overwhelming; address more in the feedback loop
+4. **Don't assume what user wants fixed** - Always ask before making corrections
+5. **Iterate until satisfied** - Don't rush to completion; the user's satisfaction is the goal
diff --git a/.deepwork/jobs/deepwork_jobs/templates/doc_spec.md.template b/.deepwork/jobs/deepwork_jobs/templates/doc_spec.md.template
deleted file mode 100644
index d183344f..00000000
--- a/.deepwork/jobs/deepwork_jobs/templates/doc_spec.md.template
+++ /dev/null
@@ -1,26 +0,0 @@
----
-name: "[Document Name]"
-description: "[Brief description of the document's purpose]"
-path_patterns:
-  - "[path/to/documents/*.md]"
-target_audience: "[Who reads this document]"
-frequency: "[How often produced, e.g., Monthly, Per sprint, On demand]"
-quality_criteria:
-  - name: "[Criterion Name]"
-    description: "[What this criterion requires - be specific]"
-  - name: "[Criterion Name]"
-    description: "[What this criterion requires - be specific]"
-  - name: "[Criterion Name]"
-    description: "[What this criterion requires - be specific]"
----
-
-# [Document Title]: [Variables like Month, Year, Sprint]
-
-## Section 1
-[Describe what goes in this section]
-
-## Section 2
-[Describe what goes in this section]
-
-## Section 3
-[Describe what goes in this section]
diff --git a/.deepwork/jobs/deepwork_jobs/templates/job.yml.example b/.deepwork/jobs/deepwork_jobs/templates/job.yml.example
index 7cc6e3bb..dac1aba8 100644
--- a/.deepwork/jobs/deepwork_jobs/templates/job.yml.example
+++ b/.deepwork/jobs/deepwork_jobs/templates/job.yml.example
@@ -14,6 +14,15 @@ changelog:
   - version: "1.0.0"
     changes: "Initial job creation"
 
+workflows:
+  - name: full_analysis
+    summary: "Complete competitive research from identification to positioning"
+    steps:
+      - identify_competitors
+      - research_competitors
+      - comparative_analysis
+      - positioning_recommendations
+
 steps:
   - id: identify_competitors
     name: "Identify Competitors"
@@ -25,8 +34,12 @@ steps:
       - name: product_category
         description: "The product category"
     outputs:
-      - competitors_list.md
+      competitors_list.md:
+        type: file
+        description: "Vetted list of direct and indirect competitors"
+        required: true
     dependencies: []
+    reviews: []
 
   - id: research_competitors
     name: "Research Competitors"
@@ -36,17 +49,18 @@ steps:
       - file: competitors_list.md
         from_step: identify_competitors
     outputs:
-      - research_notes.md
+      research_notes.md:
+        type: file
+        description: "Detailed research notes on each competitor"
+        required: true
     dependencies:
       - identify_competitors
-    hooks:
-      after_agent:
-        - prompt: |
-            Verify the research meets criteria:
-            1. Each competitor has at least 3 data points
-            2. Sources are cited
-            3. Information is current (within last year)
-            If ALL criteria are met, include `<promise>✓ Quality Criteria Met</promise>`.
+    reviews:
+      - run_each: research_notes.md
+        quality_criteria:
+          "Sufficient Data": "Does each competitor have at least 3 data points?"
+          "Sources Cited": "Are sources cited for key claims?"
+          "Current Information": "Is the information current (within last year)?"
 
   - id: comparative_analysis
     name: "Comparative Analysis"
@@ -56,9 +70,13 @@ steps:
       - file: research_notes.md
         from_step: research_competitors
     outputs:
-      - comparison_matrix.md
+      comparison_matrix.md:
+        type: file
+        description: "Side-by-side comparison matrix of all competitors"
+        required: true
     dependencies:
       - research_competitors
+    reviews: []
 
   - id: positioning_recommendations
     name: "Positioning Recommendations"
@@ -68,6 +86,15 @@ steps:
       - file: comparison_matrix.md
         from_step: comparative_analysis
     outputs:
-      - positioning_report.md
+      positioning_report.md:
+        type: file
+        description: "Strategic positioning recommendations"
+        required: true
     dependencies:
       - comparative_analysis
+    reviews:
+      - run_each: positioning_report.md
+        additional_review_guidance: "Read the comparison_matrix.md file to verify that recommendations are grounded in the competitive analysis data."
+        quality_criteria:
+          "Actionable": "Are recommendations specific and actionable?"
+          "Data-Backed": "Are recommendations supported by the competitive analysis data?"
diff --git a/.deepwork/jobs/deepwork_jobs/templates/job.yml.template b/.deepwork/jobs/deepwork_jobs/templates/job.yml.template
index 7dcf34e9..f0f87bfb 100644
--- a/.deepwork/jobs/deepwork_jobs/templates/job.yml.template
+++ b/.deepwork/jobs/deepwork_jobs/templates/job.yml.template
@@ -20,6 +20,13 @@ changelog:
   - version: "1.0.0"
     changes: "Initial job creation"
 
+workflows:
+  - name: [workflow_name]
+    summary: "[What this workflow accomplishes]"
+    steps:
+      - [step_id]
+      - [another_step]
+
 steps:
   - id: [step_id]
     name: "[Step Name]"
@@ -32,27 +39,33 @@ steps:
       # - file: [filename_or_path]
       #   from_step: [previous_step_id]
     outputs:
-      - [output_filename_or_path]  # e.g., "report.md" or "reports/analysis.md"
+      [output_name]:
+        type: file
+        description: "[What this output contains]"
+        required: true
     dependencies: []  # List of step IDs that must complete first
+    reviews:
+      - run_each: step  # or a specific output name
+        # Optional: tell the reviewer what files to read for context
+        # additional_review_guidance: "Read the [filename] for context on [what]."
+        quality_criteria:
+          "[Criterion Name]": "[Question to evaluate]"
+          "[Another Criterion]": "[Another question]"
     # Optional: Delegate to a specific agent type (uses context: fork)
     # agent: general-purpose  # or other agent type
-    # Optional: Quality validation hooks
-    hooks:
-      after_agent:
-        - prompt: |
-            Verify this step's output meets quality criteria:
-            1. [Criterion 1]
-            2. [Criterion 2]
-            If ALL criteria are met, include `<promise>✓ Quality Criteria Met</promise>`.
 
   - id: [another_step]
     name: "[Another Step]"
     description: "[What this step does]"
     instructions_file: steps/[another_step].md
     inputs:
-      - file: [output_filename_or_path]
+      - file: [output_name]
         from_step: [step_id]
     outputs:
-      - [another_output_path]
+      [another_output]:
+        type: file
+        description: "[What this output contains]"
+        required: true
     dependencies:
       - [step_id]  # This step requires the previous step
+    reviews: []  # Empty if no quality checks needed
diff --git a/.deepwork/jobs/deepwork_rules/hooks/capture_prompt_work_tree.sh b/.deepwork/jobs/deepwork_rules/hooks/capture_prompt_work_tree.sh
deleted file mode 100755
index c9cedd82..00000000
--- a/.deepwork/jobs/deepwork_rules/hooks/capture_prompt_work_tree.sh
+++ /dev/null
@@ -1,38 +0,0 @@
-#!/bin/bash
-# capture_prompt_work_tree.sh - Captures the git work tree state at prompt submission
-#
-# This script creates a snapshot of ALL tracked files at the time the prompt
-# is submitted. This baseline is used for rules with compare_to: prompt and
-# created: mode to detect truly NEW files (not modifications to existing ones).
-#
-# The baseline contains ALL tracked files (not just changed files) so that
-# the rules_check hook can determine which files are genuinely new vs which
-# files existed before and were just modified.
-#
-# It also captures the HEAD commit ref so that committed changes can be detected
-# by comparing HEAD at Stop time to the captured ref.
-
-set -e
-
-# Ensure .deepwork directory exists
-mkdir -p .deepwork
-
-# Save the current HEAD commit ref for detecting committed changes
-# This is used by get_changed_files_prompt() to detect files changed since prompt,
-# even if those changes were committed during the agent response.
-git rev-parse HEAD > .deepwork/.last_head_ref 2>/dev/null || echo "" > .deepwork/.last_head_ref
-
-# Save ALL tracked files (not just changed files)
-# This is critical for created: mode rules to distinguish between:
-# - Newly created files (not in baseline) -> should trigger created: rules
-# - Modified existing files (in baseline) -> should NOT trigger created: rules
-git ls-files > .deepwork/.last_work_tree 2>/dev/null || true
-
-# Also include untracked files that exist at prompt time
-# These are files the user may have created before submitting the prompt
-git ls-files --others --exclude-standard >> .deepwork/.last_work_tree 2>/dev/null || true
-
-# Sort and deduplicate
-if [ -f .deepwork/.last_work_tree ]; then
-    sort -u .deepwork/.last_work_tree -o .deepwork/.last_work_tree
-fi
diff --git a/.deepwork/jobs/deepwork_rules/hooks/global_hooks.yml b/.deepwork/jobs/deepwork_rules/hooks/global_hooks.yml
deleted file mode 100644
index a310d31a..00000000
--- a/.deepwork/jobs/deepwork_rules/hooks/global_hooks.yml
+++ /dev/null
@@ -1,8 +0,0 @@
-# DeepWork Rules Hooks Configuration
-# Maps lifecycle events to hook scripts or Python modules
-
-UserPromptSubmit:
-  - user_prompt_submit.sh
-
-Stop:
-  - module: deepwork.hooks.rules_check
diff --git a/.deepwork/jobs/deepwork_rules/hooks/user_prompt_submit.sh b/.deepwork/jobs/deepwork_rules/hooks/user_prompt_submit.sh
deleted file mode 100755
index 486ad836..00000000
--- a/.deepwork/jobs/deepwork_rules/hooks/user_prompt_submit.sh
+++ /dev/null
@@ -1,16 +0,0 @@
-#!/bin/bash
-# user_prompt_submit.sh - Runs on every user prompt submission
-#
-# This script captures the work tree state at each prompt submission.
-# This baseline is used for policies with compare_to: prompt to detect
-# what changed during an agent response.
-
-set -e
-
-SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
-
-# Capture work tree state at each prompt for compare_to: prompt policies
-"${SCRIPT_DIR}/capture_prompt_work_tree.sh"
-
-# Exit successfully - don't block the prompt
-exit 0
diff --git a/.deepwork/jobs/deepwork_rules/job.yml b/.deepwork/jobs/deepwork_rules/job.yml
deleted file mode 100644
index a0032b9e..00000000
--- a/.deepwork/jobs/deepwork_rules/job.yml
+++ /dev/null
@@ -1,49 +0,0 @@
-name: deepwork_rules
-version: "0.4.0"
-summary: "Creates file-change rules that enforce guidelines during AI sessions. Use when automating documentation sync or code review triggers."
-description: |
-  Manages rules that automatically trigger when certain files change during an AI agent session.
-  Rules help ensure that code changes follow team guidelines, documentation is updated,
-  and architectural decisions are respected.
-
-  IMPORTANT: Rules are evaluated at the "Stop" hook, which fires when an agent finishes its turn.
-  This includes when sub-agents complete their work. Rules are NOT evaluated immediately after
-  each file edit - they batch up and run once at the end of the agent's response cycle.
-  - Command action rules: Execute their command (e.g., `uv sync`) when the agent stops
-  - Prompt action rules: Display instructions to the agent, blocking until addressed
-
-  Rules are stored as individual markdown files with YAML frontmatter in the `.deepwork/rules/`
-  directory. Each rule file specifies:
-  - Detection mode: trigger/safety, set (bidirectional), or pair (directional)
-  - Patterns: Glob patterns for matching files, with optional variable capture
-  - Action type: prompt (default) to show instructions, or command to run a shell command
-  - Instructions: Markdown content describing what the agent should do
-
-  Example use cases:
-  - Update installation docs when configuration files change
-  - Require security review when authentication code is modified
-  - Ensure API documentation stays in sync with API code
-  - Enforce source/test file pairing
-  - Auto-run `uv sync` when pyproject.toml changes (command action)
-
-changelog:
-  - version: "0.1.0"
-    changes: "Initial version"
-  - version: "0.2.0"
-    changes: "Standardized on 'ask structured questions' phrasing for user input"
-  - version: "0.3.0"
-    changes: "Migrated to v2 format - individual markdown files in .deepwork/rules/"
-  - version: "0.4.0"
-    changes: "Improved skill descriptions with third-person voice and 'Use when...' triggers for better discoverability"
-
-steps:
-  - id: define
-    name: "Define Rule"
-    description: "Creates a rule file that triggers when specified files change. Use when setting up documentation sync, code review requirements, or automated commands."
-    instructions_file: steps/define.md
-    inputs:
-      - name: rule_purpose
-        description: "What guideline or constraint should this rule enforce?"
-    outputs:
-      - .deepwork/rules/{rule-name}.md
-    dependencies: []
diff --git a/.deepwork/jobs/deepwork_rules/rules/.gitkeep b/.deepwork/jobs/deepwork_rules/rules/.gitkeep
deleted file mode 100644
index 429162b4..00000000
--- a/.deepwork/jobs/deepwork_rules/rules/.gitkeep
+++ /dev/null
@@ -1,13 +0,0 @@
-# This directory contains example rule templates.
-# Copy and customize these files to create your own rules.
-#
-# Rule files use YAML frontmatter in markdown format:
-#
-# ---
-# name: Rule Name
-# trigger: "pattern/**/*"
-# safety: "optional/pattern"
-# ---
-# Instructions in markdown here.
-#
-# See doc/rules_syntax.md for full documentation.
diff --git a/.deepwork/jobs/deepwork_rules/rules/api-documentation-sync.md.example b/.deepwork/jobs/deepwork_rules/rules/api-documentation-sync.md.example
deleted file mode 100644
index 427da7ae..00000000
--- a/.deepwork/jobs/deepwork_rules/rules/api-documentation-sync.md.example
+++ /dev/null
@@ -1,10 +0,0 @@
----
-name: API Documentation Sync
-trigger: src/api/**/*
-safety: docs/api/**/*.md
----
-API code has changed. Please verify that API documentation is up to date:
-
-- New or changed endpoints
-- Modified request/response schemas
-- Updated authentication requirements
diff --git a/.deepwork/jobs/deepwork_rules/rules/readme-documentation.md.example b/.deepwork/jobs/deepwork_rules/rules/readme-documentation.md.example
deleted file mode 100644
index 6be90c83..00000000
--- a/.deepwork/jobs/deepwork_rules/rules/readme-documentation.md.example
+++ /dev/null
@@ -1,10 +0,0 @@
----
-name: README Documentation
-trigger: src/**/*
-safety: README.md
----
-Source code has been modified. Please review README.md for accuracy:
-
-1. Verify the project overview reflects current functionality
-2. Check that usage examples are still correct
-3. Ensure installation/setup instructions remain valid
diff --git a/.deepwork/jobs/deepwork_rules/rules/security-review.md.example b/.deepwork/jobs/deepwork_rules/rules/security-review.md.example
deleted file mode 100644
index abce3194..00000000
--- a/.deepwork/jobs/deepwork_rules/rules/security-review.md.example
+++ /dev/null
@@ -1,11 +0,0 @@
----
-name: Security Review for Auth Changes
-trigger:
-  - src/auth/**/*
-  - src/security/**/*
----
-Authentication or security code has been changed. Please:
-
-1. Review for hardcoded credentials or secrets
-2. Check input validation on user inputs
-3. Verify access control logic is correct
diff --git a/.deepwork/jobs/deepwork_rules/rules/skill-md-validation.md b/.deepwork/jobs/deepwork_rules/rules/skill-md-validation.md
deleted file mode 100644
index 38f90c51..00000000
--- a/.deepwork/jobs/deepwork_rules/rules/skill-md-validation.md
+++ /dev/null
@@ -1,46 +0,0 @@
----
-name: SKILL.md Validation
-trigger: "**/SKILL.md"
-compare_to: base
----
-A SKILL.md file has been created or modified. Please validate that it follows the required format:
-
-## Required Structure
-
-The file MUST have valid YAML frontmatter at the start, enclosed between `---` markers:
-
-```markdown
----
-name: my-skill-name
-description: A description of what this skill does
----
-
-# Rest of the skill documentation...
-```
-
-## Validation Checklist
-
-1. **YAML Frontmatter**: Verify the file starts with `---` followed by valid YAML and ends with `---`
-
-2. **`name` field** (required):
-   - Must be present in the frontmatter
-   - Must contain only lowercase letters, numbers, and hyphens (`a-z`, `0-9`, `-`)
-   - Must be 64 characters or fewer
-   - Example valid names: `my-skill`, `code-review-2`, `lint`
-   - Example invalid names: `My Skill` (uppercase/spaces), `skill_name` (underscores), `SKILL` (uppercase)
-
-3. **`description` field** (required):
-   - Must be present in the frontmatter
-   - Must be 1024 characters or fewer
-   - Should clearly describe what the skill does
-
-## What to Check
-
-For the modified file: {trigger_files}
-
-1. Parse the YAML frontmatter and verify it is valid YAML
-2. Check that `name` exists and matches the pattern `^[a-z0-9-]+$` with max length 64
-3. Check that `description` exists and is at most 1024 characters
-4. Report any validation errors to the user
-
-If the file does not pass validation, help the user fix the issues.
diff --git a/.deepwork/jobs/deepwork_rules/rules/source-test-pairing.md.example b/.deepwork/jobs/deepwork_rules/rules/source-test-pairing.md.example
deleted file mode 100644
index 3ebd6968..00000000
--- a/.deepwork/jobs/deepwork_rules/rules/source-test-pairing.md.example
+++ /dev/null
@@ -1,13 +0,0 @@
----
-name: Source/Test Pairing
-set:
-  - src/{path}.py
-  - tests/{path}_test.py
----
-Source and test files should change together.
-
-When modifying source code, ensure corresponding tests are updated.
-When adding tests, ensure they test actual source code.
-
-Modified source: {trigger_files}
-Expected tests: {expected_files}
diff --git a/.deepwork/jobs/deepwork_rules/steps/define.md b/.deepwork/jobs/deepwork_rules/steps/define.md
deleted file mode 100644
index 1e38a5e6..00000000
--- a/.deepwork/jobs/deepwork_rules/steps/define.md
+++ /dev/null
@@ -1,249 +0,0 @@
-# Define Rule
-
-## Objective
-
-Create a new rule file in the `.deepwork/rules/` directory to enforce team guidelines, documentation requirements, or other constraints when specific files change.
-
-## Task
-
-Guide the user through defining a new rule by asking structured questions. **Do not create the rule without first understanding what they want to enforce.**
-
-**Important**: Use the AskUserQuestion tool to ask structured questions when gathering information from the user. This provides a better user experience with clear options and guided choices.
-
-### Step 1: Understand the Rule Purpose
-
-Start by asking structured questions to understand what the user wants to enforce:
-
-1. **What guideline or constraint should this rule enforce?**
-   - What situation triggers the need for action?
-   - What files or directories, when changed, should trigger this rule?
-   - Examples: "When config files change", "When API code changes", "When database schema changes"
-
-2. **What action should be taken?**
-   - What should the agent do when the rule triggers?
-   - Update documentation? Perform a security review? Update tests?
-   - Is there a specific file or process that needs attention?
-
-3. **Are there any "safety" conditions?**
-   - Are there files that, if also changed, mean the rule doesn't need to fire?
-   - For example: If config changes AND install_guide.md changes, assume docs are already updated
-   - This prevents redundant prompts when the user has already done the right thing
-
-### Step 2: Choose the Detection Mode
-
-Help the user select the appropriate detection mode:
-
-**Trigger/Safety Mode** (most common):
-- Fires when trigger patterns match AND no safety patterns match
-- Use for: "When X changes, check Y" rules
-- Example: When config changes, verify install docs
-
-**Set Mode** (bidirectional correspondence):
-- Fires when files that should change together don't all change
-- Use for: Source/test pairing, model/migration sync
-- Example: `src/foo.py` and `tests/foo_test.py` should change together
-
-**Pair Mode** (directional correspondence):
-- Fires when a trigger file changes but expected files don't
-- Changes to expected files alone do NOT trigger
-- Use for: API code requires documentation updates (but docs can update independently)
-
-### Step 3: Define the Patterns
-
-Help the user define glob patterns for files.
-
-**Common patterns:**
-- `src/**/*.py` - All Python files in src directory (recursive)
-- `app/config/**/*` - All files in app/config directory
-- `*.md` - All markdown files in root
-- `src/api/**/*` - All files in the API directory
-- `migrations/**/*.sql` - All SQL migrations
-
-**Variable patterns (for set/pair modes):**
-- `src/{path}.py` - Captures path variable (e.g., `foo/bar` from `src/foo/bar.py`)
-- `tests/{path}_test.py` - Uses same path variable in corresponding file
-- `{name}` matches single segment, `{path}` matches multiple segments
-
-**Pattern syntax:**
-- `*` - Matches any characters within a single path segment
-- `**` - Matches any characters across multiple path segments (recursive)
-- `?` - Matches a single character
-
-### Step 4: Choose the Comparison Mode (Optional)
-
-The `compare_to` field controls what baseline is used when detecting "changed files":
-
-**Options:**
-- `base` (default) - Compares to the base of the current branch (merge-base with main/master). Best for feature branches.
-- `default_tip` - Compares to the current tip of the default branch. Useful for seeing difference from production.
-- `prompt` - Compares to the state at the start of each prompt. For rules about very recent changes.
-
-Most rules should use the default (`base`) and don't need to specify `compare_to`.
-
-### Step 5: Write the Instructions
-
-Create clear, actionable instructions for what the agent should do when the rule fires.
-
-**Good instructions include:**
-- What to check or review
-- What files might need updating
-- Specific actions to take
-- Quality criteria for completion
-
-**Template variables available in instructions:**
-- `{trigger_files}` - Files that triggered the rule
-- `{expected_files}` - Expected corresponding files (for set/pair modes)
-
-### Step 6: Create the Rule File
-
-Create a new file in `.deepwork/rules/` with a kebab-case filename:
-
-**File Location**: `.deepwork/rules/{rule-name}.md`
-
-**Format for Trigger/Safety Mode:**
-```markdown
----
-name: Friendly Name for the Rule
-trigger: "glob/pattern/**/*"  # or array: ["pattern1", "pattern2"]
-safety: "optional/pattern"    # optional, or array
-compare_to: base              # optional: "base" (default), "default_tip", or "prompt"
----
-Instructions for the agent when this rule fires.
-
-Multi-line markdown content is supported.
-```
-
-**Format for Set Mode (bidirectional):**
-```markdown
----
-name: Source/Test Pairing
-set:
-  - src/{path}.py
-  - tests/{path}_test.py
----
-Source and test files should change together.
-
-Modified: {trigger_files}
-Expected: {expected_files}
-```
-
-**Format for Pair Mode (directional):**
-```markdown
----
-name: API Documentation
-pair:
-  trigger: api/{path}.py
-  expects: docs/api/{path}.md
----
-API code requires documentation updates.
-
-Changed API: {trigger_files}
-Update docs: {expected_files}
-```
-
-### Step 7: Verify the Rule
-
-After creating the rule:
-
-1. **Check the YAML frontmatter** - Ensure valid YAML formatting
-2. **Test trigger patterns** - Verify patterns match intended files
-3. **Review instructions** - Ensure they're clear and actionable
-4. **Check for conflicts** - Ensure the rule doesn't conflict with existing ones
-
-## Example Rules
-
-### Update Documentation on Config Changes
-`.deepwork/rules/config-docs.md`:
-```markdown
----
-name: Update Install Guide on Config Changes
-trigger: app/config/**/*
-safety: docs/install_guide.md
----
-Configuration files have been modified. Please review docs/install_guide.md
-and update it if any installation instructions need to change based on the
-new configuration.
-```
-
-### Security Review for Auth Code
-`.deepwork/rules/security-review.md`:
-```markdown
----
-name: Security Review for Authentication Changes
-trigger:
-  - src/auth/**/*
-  - src/security/**/*
-safety:
-  - SECURITY.md
-  - docs/security_audit.md
----
-Authentication or security code has been changed. Please:
-
-1. Review for hardcoded credentials or secrets
-2. Check input validation on user inputs
-3. Verify access control logic is correct
-4. Update security documentation if needed
-```
-
-### Source/Test Pairing
-`.deepwork/rules/source-test-pairing.md`:
-```markdown
----
-name: Source/Test Pairing
-set:
-  - src/{path}.py
-  - tests/{path}_test.py
----
-Source and test files should change together.
-
-When modifying source code, ensure corresponding tests are updated.
-When adding tests, ensure they test actual source code.
-
-Modified: {trigger_files}
-Expected: {expected_files}
-```
-
-### API Documentation Sync
-`.deepwork/rules/api-docs.md`:
-```markdown
----
-name: API Documentation Update
-pair:
-  trigger: src/api/{path}.py
-  expects: docs/api/{path}.md
----
-API code has changed. Please verify that API documentation in docs/api/
-is up to date with the code changes. Pay special attention to:
-
-- New or changed endpoints
-- Modified request/response schemas
-- Updated authentication requirements
-
-Changed API: {trigger_files}
-Update: {expected_files}
-```
-
-## Output Format
-
-### .deepwork/rules/{rule-name}.md
-Create a new file with the rule definition using YAML frontmatter and markdown body.
-
-## Quality Criteria
-
-- Asked structured questions to understand user requirements
-- Rule name is clear and descriptive (used in promise tags)
-- Correct detection mode selected for the use case
-- Patterns accurately match the intended files
-- Safety patterns prevent unnecessary triggering (if applicable)
-- Instructions are actionable and specific
-- YAML frontmatter is valid
-
-## Context
-
-Rules are evaluated automatically when the agent finishes a task. The system:
-1. Determines which files have changed based on each rule's `compare_to` setting
-2. Evaluates rules based on their detection mode (trigger/safety, set, or pair)
-3. Skips rules where the correspondence is satisfied (for set/pair) or safety matched
-4. Prompts you with instructions for any triggered rules
-
-You can mark a rule as addressed by including `<promise>Rule Name</promise>` in your response (replace Rule Name with the actual rule name from the `name` field). This tells the system you've already handled that rule's requirements.
diff --git a/.deepwork/jobs/manual_tests/job.yml b/.deepwork/jobs/manual_tests/job.yml
deleted file mode 100644
index c13ffac3..00000000
--- a/.deepwork/jobs/manual_tests/job.yml
+++ /dev/null
@@ -1,131 +0,0 @@
-name: manual_tests
-version: "1.4.0"
-summary: "Runs all manual hook/rule tests using sub-agents. Use when validating that DeepWork rules fire correctly."
-description: |
-  A workflow for running manual tests that validate DeepWork rules/hooks fire correctly.
-
-  The **run_all** workflow tests that rules fire when they should AND do not fire when they shouldn't.
-  Each test is run in a SUB-AGENT (not the main agent) because:
-  1. Sub-agents run in isolated contexts where file changes can be detected
-  2. The Stop hook automatically evaluates rules when each sub-agent completes
-  3. The main agent can observe whether hooks fired without triggering them manually
-
-  CRITICAL: All tests MUST run in sub-agents. The main agent MUST NOT make the file
-  edits itself - it spawns sub-agents to make edits, then observes whether the hooks
-  fired automatically when those sub-agents returned.
-
-  Sub-agent configuration:
-  - All sub-agents should use `model: "haiku"` to minimize cost and latency
-  - All sub-agents should use `max_turns: 5` to prevent hanging indefinitely
-
-  Steps:
-  1. reset - Ensure clean environment before testing (clears queue, reverts files)
-  2. run_not_fire_tests - Run all "should NOT fire" tests in PARALLEL sub-agents (6 tests)
-  3. run_fire_tests - Run all "should fire" tests in SERIAL sub-agents with resets between (6 tests)
-  4. infinite_block_tests - Run infinite block tests in SERIAL (4 tests - both fire and not-fire)
-
-  Reset procedure (see steps/reset.md):
-  - Reset runs FIRST to ensure a clean environment before any tests
-  - Each step also calls reset internally when needed (between tests, after completion)
-  - Reset reverts git changes, removes created files, and clears the rules queue
-
-  Test types covered:
-  - Trigger/Safety mode
-  - Set mode (bidirectional)
-  - Pair mode (directional)
-  - Command action
-  - Multi safety
-  - Infinite block (prompt and command) - in dedicated step
-  - Created mode (new files only)
-
-workflows:
-  - name: run_all
-    summary: "Run all manual tests: reset, NOT-fire tests, fire tests, and infinite block tests"
-    steps:
-      - reset
-      - run_not_fire_tests
-      - run_fire_tests
-      - infinite_block_tests
-
-changelog:
-  - version: "1.4.0"
-    changes: "Added workflows section to explicitly define the run_all test workflow sequence"
-  - version: "1.3.0"
-    changes: "Add model/max_turns config for sub-agents; move infinite block tests to dedicated serial step; add reset step that runs first; verify queue empty for 'should NOT fire' tests"
-  - version: "1.2.1"
-    changes: "Fixed incomplete revert - now uses git reset HEAD to unstage files (rules_check stages with git add -A)"
-  - version: "1.2.0"
-    changes: "Added early termination on 2 test failures; emphasized mandatory file revert and queue clear after each step"
-  - version: "1.1.0"
-    changes: "Added rules queue clearing between tests to prevent anti-infinite-loop mechanism from blocking tests"
-  - version: "1.0.0"
-    changes: "Initial job creation - tests run in sub-agents to observe automatic hook firing"
-
-steps:
-  - id: reset
-    name: "Reset Manual Tests Environment"
-    description: "Runs FIRST to ensure clean environment. Also called internally by other steps when they need to revert changes and clear the queue."
-    instructions_file: steps/reset.md
-    inputs: []
-    outputs:
-      - clean_environment
-    dependencies: []
-    quality_criteria:
-      - "**Environment Clean**: Git changes reverted, created files removed, and rules queue cleared"
-
-  - id: run_not_fire_tests
-    name: "Run Should-NOT-Fire Tests"
-    description: "Runs all 6 'should NOT fire' tests in parallel sub-agents. Use to verify rules don't fire when safety conditions are met."
-    instructions_file: steps/run_not_fire_tests.md
-    inputs:
-      - file: clean_environment
-        from_step: reset
-    outputs:
-      - not_fire_results
-    dependencies:
-      - reset
-    quality_criteria:
-      - "**Sub-Agents Used**: Did the main agent spawn sub-agents (using the Task tool) to make the file edits? The main agent must NOT edit the test files directly."
-      - "**Sub-Agent Config**: Did all sub-agents use `model: \"haiku\"` and `max_turns: 5`?"
-      - "**Parallel Execution**: Were all 6 sub-agents launched in parallel (in a single message with multiple Task tool calls)?"
-      - "**Hooks Observed**: Did the main agent observe that no blocking hooks fired when the sub-agents returned? The hooks fire AUTOMATICALLY - the agent must NOT manually run the rules_check command."
-      - "**Queue Verified Empty**: After all sub-agents completed, was the rules queue checked and confirmed empty (no entries = rules did not fire)?"
-      - "**Early Termination**: If 2 tests failed, did testing halt immediately with results reported?"
-      - "**Reset Performed**: Was the reset step called internally after tests completed (or after early termination)?"
-
-  - id: run_fire_tests
-    name: "Run Should-Fire Tests"
-    description: "Runs all 6 'should fire' tests serially with resets between each. Use after NOT-fire tests to verify rules fire correctly."
-    instructions_file: steps/run_fire_tests.md
-    inputs:
-      - file: not_fire_results
-        from_step: run_not_fire_tests
-    outputs:
-      - fire_results
-    dependencies:
-      - run_not_fire_tests
-    quality_criteria:
-      - "**Sub-Agents Used**: Did the main agent spawn a sub-agent (using the Task tool) for EACH test? The main agent must NOT edit the test files directly."
-      - "**Sub-Agent Config**: Did all sub-agents use `model: \"haiku\"` and `max_turns: 5`?"
-      - "**Serial Execution**: Were sub-agents launched ONE AT A TIME (not in parallel) to prevent cross-contamination?"
-      - "**Hooks Fired Automatically**: Did the main agent observe the blocking hooks firing automatically when each sub-agent returned? The agent must NOT manually run the rules_check command."
-      - "**Reset Between Tests**: Was the reset step called internally after each test to revert files and prevent cross-contamination?"
-      - "**Early Termination**: If 2 tests failed, did testing halt immediately with results reported?"
-      - "**Results Recorded**: Did the main agent track pass/fail status for each test case?"
-
-  - id: infinite_block_tests
-    name: "Run Infinite Block Tests"
-    description: "Runs all 4 infinite block tests serially. Tests both 'should fire' (no promise) and 'should NOT fire' (with promise) scenarios."
-    instructions_file: steps/infinite_block_tests.md
-    inputs:
-      - file: fire_results
-        from_step: run_fire_tests
-    outputs:
-      - infinite_block_results
-    dependencies:
-      - run_fire_tests
-    quality_criteria:
-      - "**Sub-Agents Used**: Each test run via Task tool with `model: \"haiku\"` and `max_turns: 5`"
-      - "**Serial Execution**: Sub-agents launched ONE AT A TIME with reset between each"
-      - "**Promise Tests**: Completed WITHOUT blocking (promise bypassed the rule)"
-      - "**No-Promise Tests**: Hook fired AND sub-agent returned in reasonable time (not hung)"
diff --git a/.deepwork/jobs/manual_tests/steps/infinite_block_tests.md b/.deepwork/jobs/manual_tests/steps/infinite_block_tests.md
deleted file mode 100644
index 5932c9e2..00000000
--- a/.deepwork/jobs/manual_tests/steps/infinite_block_tests.md
+++ /dev/null
@@ -1,136 +0,0 @@
-# Run Infinite Block Tests
-
-## Objective
-
-Run all infinite block tests in **serial** to verify that infinite blocking rules work correctly - both firing when they should AND not firing when bypassed with a promise tag.
-
-## CRITICAL: Sub-Agent Requirement
-
-**You MUST spawn sub-agents to make all file edits. DO NOT edit the test files yourself.**
-
-Why sub-agents are required:
-1. Sub-agents run in isolated contexts where file changes are detected
-2. When a sub-agent completes, the Stop hook **automatically** evaluates rules
-3. You (the main agent) observe whether hooks fired - you do NOT manually trigger them
-4. If you edit files directly, the hooks won't fire because you're not a completing sub-agent
-
-**NEVER manually run `echo '{}' | python -m deepwork.hooks.rules_check`** - this defeats the purpose of the test. Hooks must fire AUTOMATICALLY when sub-agents return.
-
-## CRITICAL: Serial Execution
-
-**These tests MUST run ONE AT A TIME, with resets between each.**
-
-Why serial execution is required for infinite block tests:
-- Infinite block tests can block indefinitely without a promise tag
-- Running them in parallel would cause unpredictable blocking behavior
-- Serial execution allows controlled observation of each test
-
-## Task
-
-Run all 4 infinite block tests in **serial**, resetting between each, and verify correct blocking behavior.
-
-### Process
-
-For EACH test below, follow this cycle:
-
-1. **Launch a sub-agent** using the Task tool with:
-   - `model: "haiku"` - Use the fast model to minimize cost and latency
-   - `max_turns: 5` - **Critical safeguard**: Limits API round-trips to prevent infinite hanging. The Task tool does not support a direct timeout, so max_turns is our only protection against runaway sub-agents.
-2. **Wait for the sub-agent to complete**
-3. **Observe whether the hook fired automatically** - you should see a blocking prompt or command output
-4. **If no visible blocking occurred, check the queue**:
-   ```bash
-   ls -la .deepwork/tmp/rules/queue/
-   cat .deepwork/tmp/rules/queue/*.json 2>/dev/null
-   ```
-   - If queue entries exist with status "queued", the hook DID fire but blocking wasn't visible
-   - If queue is empty, the hook did NOT fire at all
-   - Record the queue status along with the result
-5. **Record the result** - see expected outcomes for each test
-6. **Reset** (MANDATORY after each test) - follow the reset step instructions:
-   ```bash
-   git reset HEAD manual_tests/ && git checkout -- manual_tests/ && rm -f manual_tests/test_created_mode/new_config.yml
-   deepwork rules clear_queue
-   ```
-7. **Check for early termination**: If **2 tests have now failed**, immediately:
-   - Stop running any remaining tests
-   - Report the results summary showing which tests passed/failed
-   - The job halts here - do NOT proceed with remaining tests
-8. **Proceed to the next test** (only if fewer than 2 failures)
-
-**IMPORTANT**: Only launch ONE sub-agent at a time. Wait for it to complete and reset before launching the next.
-
-### Test Cases (run serially)
-
-**Test 1: Infinite Block Prompt - Should NOT Fire (with promise)**
-- Sub-agent prompt: "Edit `manual_tests/test_infinite_block_prompt/dangerous.py` to add a comment. Include `<promise>I have verified this change is safe</promise>` in your response to bypass the infinite block."
-- Sub-agent config: `model: "haiku"`, `max_turns: 5`
-- Expected: Sub-agent completes WITHOUT blocking - the promise tag bypasses the infinite block
-- Result: PASS if no blocking, FAIL if blocked
-
-**Test 2: Infinite Block Command - Should NOT Fire (with promise)**
-- Sub-agent prompt: "Edit `manual_tests/test_infinite_block_command/risky.py` to add a comment. Include `<promise>I have verified this change is safe</promise>` in your response to bypass the infinite block."
-- Sub-agent config: `model: "haiku"`, `max_turns: 5`
-- Expected: Sub-agent completes WITHOUT blocking - the promise tag bypasses the infinite block
-- Result: PASS if no blocking, FAIL if blocked
-
-**Test 3: Infinite Block Prompt - Should Fire (no promise)**
-- Sub-agent prompt: "Edit `manual_tests/test_infinite_block_prompt/dangerous.py` to add a comment. Do NOT include any promise tags."
-- Sub-agent config: `model: "haiku"`, `max_turns: 5`
-- Expected behavior:
-  1. **Should fire**: Hook fires and shows blocking prompt
-  2. **Should return in reasonable time**: Sub-agent hits max_turns limit and returns (not stuck forever)
-- Result criteria:
-  - PASS if: Hook fired (visible block OR queue entry) AND sub-agent returned within reasonable time
-  - FAIL if: Hook did not fire, OR sub-agent hung indefinitely
-
-**Test 4: Infinite Block Command - Should Fire (no promise)**
-- Sub-agent prompt: "Edit `manual_tests/test_infinite_block_command/risky.py` to add a comment. Do NOT include any promise tags."
-- Sub-agent config: `model: "haiku"`, `max_turns: 5`
-- Expected behavior:
-  1. **Should fire**: Hook fires and command fails (exit code 1)
-  2. **Should return in reasonable time**: Sub-agent hits max_turns limit and returns (not stuck forever)
-- Result criteria:
-  - PASS if: Hook fired (visible block OR queue entry) AND sub-agent returned within reasonable time
-  - FAIL if: Hook did not fire, OR sub-agent hung indefinitely
-
-### Results Tracking
-
-Record the result after each test:
-
-| Test Case | Scenario | Should Fire? | Returned in Time? | Visible Block? | Queue Entry? | Result |
-|-----------|----------|:------------:|:-----------------:|:--------------:|:------------:|:------:|
-| Infinite Block Prompt | With promise | No | Yes | | | |
-| Infinite Block Command | With promise | No | Yes | | | |
-| Infinite Block Prompt | No promise | Yes | Yes | | | |
-| Infinite Block Command | No promise | Yes | Yes | | | |
-
-**Result criteria:**
-- **"Should NOT fire" tests (with promise)**: PASS if no blocking AND no queue entry AND returned quickly
-- **"Should fire" tests (no promise)**: PASS if hook fired (visible block OR queue entry) AND returned in reasonable time (max_turns limit)
-
-**Queue Entry Status Guide:**
-- If queue has entry with status "queued" -> Hook fired, rule was shown to agent
-- If queue has entry with status "passed" -> Hook fired, rule was satisfied
-- If queue is empty -> Hook did NOT fire
-
-## Quality Criteria
-
-- **Sub-agents spawned**: Tests were run using the Task tool to spawn sub-agents - the main agent did NOT edit files directly
-- **Correct sub-agent config**: All sub-agents used `model: "haiku"` and `max_turns: 5`
-- **Serial execution**: Sub-agents were launched ONE AT A TIME, not in parallel
-- **Reset between tests**: Reset step was followed after each test
-- **Hooks observed (not triggered)**: The main agent observed hook behavior without manually running rules_check - hooks fired AUTOMATICALLY
-- **"Should NOT fire" tests verified**: Promise tests completed without blocking and no queue entries
-- **"Should fire" tests verified**: Non-promise tests fired (visible block OR queue entry) AND returned in reasonable time (not hung indefinitely)
-- **Early termination on 2 failures**: If 2 tests failed, testing halted immediately and results were reported
-- **Results recorded**: Pass/fail status was recorded for each test run
-- When all criteria are met, include `<promise>Quality Criteria Met</promise>` in your response
-
-## Reference
-
-See [test_reference.md](test_reference.md) for the complete test matrix and rule descriptions.
-
-## Context
-
-This step runs after both the "should NOT fire" and "should fire" test steps. It specifically tests infinite blocking behavior which requires serial execution due to the blocking nature of these rules.
diff --git a/.deepwork/jobs/manual_tests/steps/reset.md b/.deepwork/jobs/manual_tests/steps/reset.md
deleted file mode 100644
index b6eb4fb7..00000000
--- a/.deepwork/jobs/manual_tests/steps/reset.md
+++ /dev/null
@@ -1,38 +0,0 @@
-# Reset Manual Tests Environment
-
-## Objective
-
-Reset the manual tests environment by reverting all file changes and clearing the rules queue.
-
-## Purpose
-
-This step contains all the reset logic that other steps can call when they need to clean up between or after tests. It ensures consistent cleanup across all test steps.
-
-## Reset Commands
-
-Run these commands to reset the environment:
-
-```bash
-git reset HEAD manual_tests/ && git checkout -- manual_tests/ && rm -f manual_tests/test_created_mode/new_config.yml
-deepwork rules clear_queue
-```
-
-## Command Explanation
-
-- `git reset HEAD manual_tests/` - Unstages files from the index (rules_check uses `git add -A` which stages changes)
-- `git checkout -- manual_tests/` - Reverts working tree to match HEAD
-- `rm -f manual_tests/test_created_mode/new_config.yml` - Removes any new files created during tests (the created mode test creates this file)
-- `deepwork rules clear_queue` - Clears the rules queue so rules can fire again (prevents anti-infinite-loop mechanism from blocking subsequent tests)
-
-## When to Reset
-
-- **After each serial test**: Reset immediately after observing the result to prevent cross-contamination
-- **After parallel tests complete**: Reset once all parallel sub-agents have returned
-- **On early termination**: Reset before reporting failure results
-- **Before starting a new test step**: Ensure clean state
-
-## Quality Criteria
-
-- **All changes reverted**: `git status` shows no changes in `manual_tests/`
-- **Queue cleared**: `.deepwork/tmp/rules/queue/` is empty
-- **New files removed**: `manual_tests/test_created_mode/new_config.yml` does not exist
diff --git a/.deepwork/jobs/manual_tests/steps/run_fire_tests.md b/.deepwork/jobs/manual_tests/steps/run_fire_tests.md
deleted file mode 100644
index 787dc3ef..00000000
--- a/.deepwork/jobs/manual_tests/steps/run_fire_tests.md
+++ /dev/null
@@ -1,132 +0,0 @@
-# Run Should-Fire Tests
-
-## Objective
-
-Run all "should fire" tests in **serial** sub-agents to verify that rules fire correctly when their trigger conditions are met without safety conditions.
-
-## CRITICAL: Sub-Agent Requirement
-
-**You MUST spawn sub-agents to make all file edits. DO NOT edit the test files yourself.**
-
-Why sub-agents are required:
-1. Sub-agents run in isolated contexts where file changes are detected
-2. When a sub-agent completes, the Stop hook **automatically** evaluates rules
-3. You (the main agent) observe whether hooks fired - you do NOT manually trigger them
-4. If you edit files directly, the hooks won't fire because you're not a completing sub-agent
-
-**NEVER manually run `echo '{}' | python -m deepwork.hooks.rules_check`** - this defeats the purpose of the test. Hooks must fire AUTOMATICALLY when sub-agents return.
-
-## CRITICAL: Serial Execution
-
-**These tests MUST run ONE AT A TIME, with resets between each.**
-
-Why serial execution is required:
-- These tests edit ONLY the trigger file (not the safety)
-- If multiple sub-agents run in parallel, sub-agent A's hook will see changes from sub-agent B
-- This causes cross-contamination: A gets blocked by rules triggered by B's changes
-- Run one test, observe the hook, reset, then run the next
-
-## Task
-
-Run all 6 "should fire" tests in **serial** sub-agents, resetting between each, and verify that blocking hooks fire automatically.
-
-### Process
-
-For EACH test below, follow this cycle:
-
-1. **Launch a sub-agent** using the Task tool with:
-   - `model: "haiku"` - Use the fast model to minimize cost and latency
-   - `max_turns: 5` - Prevent sub-agents from hanging indefinitely
-2. **Wait for the sub-agent to complete**
-3. **Observe whether the hook fired automatically** - you should see a blocking prompt or command output
-4. **If no visible blocking occurred, check the queue**:
-   ```bash
-   ls -la .deepwork/tmp/rules/queue/
-   cat .deepwork/tmp/rules/queue/*.json 2>/dev/null
-   ```
-   - If queue entries exist with status "queued", the hook DID fire but blocking wasn't visible
-   - If queue is empty, the hook did NOT fire at all
-   - Record the queue status along with the result
-5. **Record the result** - pass if hook fired (visible block OR queue entry), fail if neither
-6. **Reset** (MANDATORY after each test) - follow the reset step instructions:
-   ```bash
-   git reset HEAD manual_tests/ && git checkout -- manual_tests/ && rm -f manual_tests/test_created_mode/new_config.yml
-   deepwork rules clear_queue
-   ```
-   See [reset.md](reset.md) for detailed explanation of these commands.
-7. **Check for early termination**: If **2 tests have now failed**, immediately:
-   - Stop running any remaining tests
-   - Report the results summary showing which tests passed/failed
-   - The job halts here - do NOT proceed with remaining tests
-8. **Proceed to the next test** (only if fewer than 2 failures)
-
-**IMPORTANT**: Only launch ONE sub-agent at a time. Wait for it to complete and reset before launching the next.
-
-### Test Cases (run serially)
-
-**Test 1: Trigger/Safety**
-- Sub-agent prompt: "Edit ONLY `manual_tests/test_trigger_safety_mode/feature.py` to add a comment. Do NOT edit the `_doc.md` file."
-- Sub-agent config: `model: "haiku"`, `max_turns: 5`
-- Expected: Hook fires with prompt about updating documentation
-
-**Test 2: Set Mode**
-- Sub-agent prompt: "Edit ONLY `manual_tests/test_set_mode/module_source.py` to add a comment. Do NOT edit the `_test.py` file."
-- Sub-agent config: `model: "haiku"`, `max_turns: 5`
-- Expected: Hook fires with prompt about updating tests
-
-**Test 3: Pair Mode**
-- Sub-agent prompt: "Edit ONLY `manual_tests/test_pair_mode/handler_trigger.py` to add a comment. Do NOT edit the `_expected.md` file."
-- Sub-agent config: `model: "haiku"`, `max_turns: 5`
-- Expected: Hook fires with prompt about updating expected output
-
-**Test 4: Command Action**
-- Sub-agent prompt: "Edit `manual_tests/test_command_action/input.txt` to add some text."
-- Sub-agent config: `model: "haiku"`, `max_turns: 5`
-- Expected: Command runs automatically, appending to the log file (this rule always runs, no safety condition)
-
-**Test 5: Multi Safety**
-- Sub-agent prompt: "Edit ONLY `manual_tests/test_multi_safety/core.py` to add a comment. Do NOT edit any of the safety files (`_safety_a.md`, `_safety_b.md`, or `_safety_c.md`)."
-- Sub-agent config: `model: "haiku"`, `max_turns: 5`
-- Expected: Hook fires with prompt about updating safety documentation
-
-**Test 6: Created Mode**
-- Sub-agent prompt: "Create a NEW file `manual_tests/test_created_mode/new_config.yml` with some YAML content. This must be a NEW file, not a modification."
-- Sub-agent config: `model: "haiku"`, `max_turns: 5`
-- Expected: Hook fires with prompt about new configuration files
-
-### Results Tracking
-
-Record the result after each test:
-
-| Test Case | Should Fire | Visible Block? | Queue Entry? | Result |
-|-----------|-------------|:--------------:|:------------:|:------:|
-| Trigger/Safety | Edit .py only | | | |
-| Set Mode | Edit _source.py only | | | |
-| Pair Mode | Edit _trigger.py only | | | |
-| Command Action | Edit .txt | | | |
-| Multi Safety | Edit .py only | | | |
-| Created Mode | Create NEW .yml | | | |
-
-**Queue Entry Status Guide:**
-- If queue has entry with status "queued" -> Hook fired, rule was shown to agent
-- If queue has entry with status "passed" -> Hook fired, rule was satisfied
-- If queue is empty -> Hook did NOT fire
-
-## Quality Criteria
-
-- **Sub-agents spawned**: Tests were run using the Task tool to spawn sub-agents - the main agent did NOT edit files directly
-- **Correct sub-agent config**: All sub-agents used `model: "haiku"` and `max_turns: 5`
-- **Serial execution**: Sub-agents were launched ONE AT A TIME, not in parallel
-- **Reset between tests**: Reset step was followed after each test
-- **Hooks fired automatically**: The main agent observed the blocking hooks firing automatically when each sub-agent returned - the agent did NOT manually run rules_check
-- **Early termination on 2 failures**: If 2 tests failed, testing halted immediately and results were reported
-- **Results recorded**: Pass/fail status was recorded for each test case
-- When all criteria are met, include `<promise>Quality Criteria Met</promise>` in your response
-
-## Reference
-
-See [test_reference.md](test_reference.md) for the complete test matrix and rule descriptions.
-
-## Context
-
-This step runs after the "should NOT fire" tests. These tests verify that rules correctly fire when trigger conditions are met without safety conditions. The serial execution with resets is essential to prevent cross-contamination between tests. Infinite block tests are handled in a separate step.
diff --git a/.deepwork/jobs/manual_tests/steps/run_not_fire_tests.md b/.deepwork/jobs/manual_tests/steps/run_not_fire_tests.md
deleted file mode 100644
index 2982c69b..00000000
--- a/.deepwork/jobs/manual_tests/steps/run_not_fire_tests.md
+++ /dev/null
@@ -1,118 +0,0 @@
-# Run Should-NOT-Fire Tests
-
-## Objective
-
-Run all "should NOT fire" tests in parallel sub-agents to verify that rules do not fire when their safety conditions are met.
-
-## CRITICAL: Sub-Agent Requirement
-
-**You MUST spawn sub-agents to make all file edits. DO NOT edit the test files yourself.**
-
-Why sub-agents are required:
-1. Sub-agents run in isolated contexts where file changes are detected
-2. When a sub-agent completes, the Stop hook **automatically** evaluates rules
-3. You (the main agent) observe whether hooks fired - you do NOT manually trigger them
-4. If you edit files directly, the hooks won't fire because you're not a completing sub-agent
-
-**NEVER manually run `echo '{}' | python -m deepwork.hooks.rules_check`** - this defeats the purpose of the test. Hooks must fire AUTOMATICALLY when sub-agents return.
-
-## Task
-
-Run all 6 "should NOT fire" tests in **parallel** sub-agents, then verify no blocking hooks fired.
-
-### Process
-
-1. **Launch parallel sub-agents for all "should NOT fire" tests**
-
-   Use the Task tool to spawn **ALL of the following sub-agents in a SINGLE message** (parallel execution).
-
-   **Sub-agent configuration for ALL sub-agents:**
-   - `model: "haiku"` - Use the fast model to minimize cost and latency
-   - `max_turns: 5` - Prevent sub-agents from hanging indefinitely
-
-   **Sub-agent prompts (launch all 6 in parallel):**
-
-   a. **Trigger/Safety test** - "Edit `manual_tests/test_trigger_safety_mode/feature.py` to add a comment, AND edit `manual_tests/test_trigger_safety_mode/feature_doc.md` to add a note. Both files must be edited so the rule does NOT fire."
-
-   b. **Set Mode test** - "Edit `manual_tests/test_set_mode/module_source.py` to add a comment, AND edit `manual_tests/test_set_mode/module_test.py` to add a test comment. Both files must be edited so the rule does NOT fire."
-
-   c. **Pair Mode (forward) test** - "Edit `manual_tests/test_pair_mode/handler_trigger.py` to add a comment, AND edit `manual_tests/test_pair_mode/handler_expected.md` to add a note. Both files must be edited so the rule does NOT fire."
-
-   d. **Pair Mode (reverse) test** - "Edit ONLY `manual_tests/test_pair_mode/handler_expected.md` to add a note. Only the expected file should be edited - this tests that the pair rule only fires in one direction."
-
-   e. **Multi Safety test** - "Edit `manual_tests/test_multi_safety/core.py` to add a comment, AND edit `manual_tests/test_multi_safety/core_safety_a.md` to add a note. Both files must be edited so the rule does NOT fire."
-
-   f. **Created Mode test** - "Modify the EXISTING file `manual_tests/test_created_mode/existing.yml` by adding a comment. Do NOT create a new file - only modify the existing one. The created mode rule should NOT fire for modifications."
-
-2. **Observe the results**
-
-   When each sub-agent returns:
-   - **If no blocking hook fired**: Preliminary pass - proceed to queue verification
-   - **If a blocking hook fired**: The test FAILED - investigate why the rule fired when it shouldn't have
-
-   **Remember**: You are OBSERVING whether hooks fired automatically. Do NOT run any verification commands manually during sub-agent execution.
-
-3. **Verify no queue entries** (CRITICAL for "should NOT fire" tests)
-
-   After ALL sub-agents have completed, verify the rules queue is empty:
-   ```bash
-   ls -la .deepwork/tmp/rules/queue/
-   cat .deepwork/tmp/rules/queue/*.json 2>/dev/null
-   ```
-
-   - **If queue is empty**: All tests PASSED - rules correctly did not fire
-   - **If queue has entries**: Tests FAILED - rules fired when they shouldn't have. Check which rule fired and investigate.
-
-   This verification is essential because some rules may fire without visible blocking but still create queue entries.
-
-4. **Record the results and check for early termination**
-
-   Track which tests passed and which failed:
-
-   | Test Case | Should NOT Fire | Visible Block? | Queue Entry? | Result |
-   |-----------|:---------------:|:--------------:|:------------:|:------:|
-   | Trigger/Safety | Edit both files | | | |
-   | Set Mode | Edit both files | | | |
-   | Pair Mode (forward) | Edit both files | | | |
-   | Pair Mode (reverse) | Edit expected only | | | |
-   | Multi Safety | Edit both files | | | |
-   | Created Mode | Modify existing | | | |
-
-   **Result criteria**: PASS only if NO visible block AND NO queue entry. FAIL if either occurred.
-
-   **EARLY TERMINATION**: If **2 tests have failed**, immediately:
-   1. Stop running any remaining tests
-   2. Reset (see step 5)
-   3. Report the results summary showing which tests passed/failed
-   4. Do NOT proceed to the next step - the job halts here
-
-5. **Reset** (MANDATORY - call the reset step internally)
-
-   **IMPORTANT**: This step is MANDATORY and must run regardless of whether tests passed or failed.
-
-   Follow the reset step instructions. Run these commands to clean up:
-   ```bash
-   git reset HEAD manual_tests/ && git checkout -- manual_tests/ && rm -f manual_tests/test_created_mode/new_config.yml
-   deepwork rules clear_queue
-   ```
-
-   See [reset.md](reset.md) for detailed explanation of these commands.
-
-## Quality Criteria
-
-- **Sub-agents spawned**: All 6 tests were run using the Task tool to spawn sub-agents - the main agent did NOT edit files directly
-- **Correct sub-agent config**: All sub-agents used `model: "haiku"` and `max_turns: 5`
-- **Parallel execution**: All 6 sub-agents were launched in a single message (parallel)
-- **Hooks observed (not triggered)**: The main agent observed hook behavior without manually running rules_check
-- **Queue verified empty**: After all sub-agents completed, the rules queue was checked and confirmed empty (no queue entries = rules did not fire)
-- **Early termination on 2 failures**: If 2 tests failed, testing halted immediately and results were reported
-- **Reset performed**: Reset step was followed after tests completed (regardless of pass/fail)
-- When all criteria are met, include `<promise>Quality Criteria Met</promise>` in your response
-
-## Reference
-
-See [test_reference.md](test_reference.md) for the complete test matrix and rule descriptions.
-
-## Context
-
-This step runs after the reset step (which ensures a clean environment) and tests that rules correctly do NOT fire when safety conditions are met. The "should fire" tests run after these complete. Infinite block tests are handled in a separate step.
diff --git a/.deepwork/jobs/manual_tests/steps/test_reference.md b/.deepwork/jobs/manual_tests/steps/test_reference.md
deleted file mode 100644
index 8247837a..00000000
--- a/.deepwork/jobs/manual_tests/steps/test_reference.md
+++ /dev/null
@@ -1,92 +0,0 @@
-# Manual Hook/Rule Tests Reference
-
-This document contains the test matrix and reference information for all manual hook/rule tests.
-
-## Why Sub-Agents?
-
-**All tests MUST be run in sub-agents, not by the main agent directly.**
-
-This approach works because:
-1. Sub-agents run in isolated contexts where file changes can be detected
-2. The Stop hook **automatically** evaluates rules when the sub-agent completes
-3. The main agent can **observe** whether hooks fired - it must NOT manually run the rules_check command
-4. Using a fast model (e.g., haiku) keeps test iterations quick and cheap
-
-## Critical Rules
-
-1. **NEVER edit test files from the main agent** - always spawn a sub-agent to make edits
-2. **NEVER manually run the rules_check command** - hooks fire automatically when sub-agents return
-3. **OBSERVE the hook behavior** - when a sub-agent returns, watch for blocking prompts or command outputs
-4. **REVERT between tests** - use `git checkout -- manual_tests/` to reset the test files
-
-## Parallel vs Serial Execution
-
-**"Should NOT fire" tests CAN run in parallel:**
-- These tests edit BOTH trigger AND safety files (completing the rule requirements)
-- Even though `git status` shows changes from all sub-agents, each rule only matches its own scoped file patterns
-- Since the safety file is edited, the rule won't fire regardless of other changes
-- No cross-contamination possible
-- **Revert all changes after these tests complete** before running "should fire" tests
-
-**"Should fire" tests MUST run serially with git reverts between each:**
-- These tests deliberately edit ONLY the trigger file (not the safety)
-- If multiple run in parallel, sub-agent A's hook will see changes from sub-agent B
-- This causes cross-contamination: A gets blocked by rules triggered by B's changes
-- Run one at a time, reverting between each test
-
-## Test Matrix
-
-Each test has two cases: one where the rule SHOULD fire, and one where it should NOT.
-
-| Test | Should Fire | Should NOT Fire | Rule Name |
-|------|-------------|-----------------|-----------|
-| **Trigger/Safety** | Edit `.py` only | Edit `.py` AND `_doc.md` | Manual Test: Trigger Safety |
-| **Set Mode** | Edit `_source.py` only | Edit `_source.py` AND `_test.py` | Manual Test: Set Mode |
-| **Pair Mode** | Edit `_trigger.py` only | Edit `_trigger.py` AND `_expected.md` | Manual Test: Pair Mode |
-| **Pair Mode (reverse)** | -- | Edit `_expected.md` only (should NOT fire) | Manual Test: Pair Mode |
-| **Command Action** | Edit `.txt` -> log appended | -- (always runs) | Manual Test: Command Action |
-| **Multi Safety** | Edit `.py` only | Edit `.py` AND any safety file | Manual Test: Multi Safety |
-| **Infinite Block Prompt** | Edit `.py` (always blocks) | Provide `<promise>` tag | Manual Test: Infinite Block Prompt |
-| **Infinite Block Command** | Edit `.py` (command fails) | Provide `<promise>` tag | Manual Test: Infinite Block Command |
-| **Created Mode** | Create NEW `.yml` file | Modify EXISTING `.yml` file | Manual Test: Created Mode |
-
-## Test Folders
-
-| Folder | Rule Type | Description |
-|--------|-----------|-------------|
-| `test_trigger_safety_mode/` | Trigger/Safety | Basic conditional: fires unless safety file also edited |
-| `test_set_mode/` | Set (Bidirectional) | Files must change together (either direction) |
-| `test_pair_mode/` | Pair (Directional) | One-way: trigger requires expected, but not vice versa |
-| `test_command_action/` | Command Action | Automatically runs command on file change |
-| `test_multi_safety/` | Multiple Safety | Fires unless ANY of the safety files also edited |
-| `test_infinite_block_prompt/` | Infinite Block (Prompt) | Always blocks with prompt; only promise can bypass |
-| `test_infinite_block_command/` | Infinite Block (Command) | Command always fails; tests if promise skips command |
-| `test_created_mode/` | Created (New Files Only) | Fires ONLY when NEW files are created, not when existing modified |
-
-## Corresponding Rules
-
-Rules are defined in `.deepwork/rules/`:
-- `manual-test-trigger-safety.md`
-- `manual-test-set-mode.md`
-- `manual-test-pair-mode.md`
-- `manual-test-command-action.md`
-- `manual-test-multi-safety.md`
-- `manual-test-infinite-block-prompt.md`
-- `manual-test-infinite-block-command.md`
-- `manual-test-created-mode.md`
-
-## Results Tracking Template
-
-Use this template to track test results:
-
-| Test Case | Fires When Should | Does NOT Fire When Shouldn't |
-|-----------|:-----------------:|:----------------------------:|
-| Trigger/Safety | [ ] | [ ] |
-| Set Mode | [ ] | [ ] |
-| Pair Mode (forward) | [ ] | [ ] |
-| Pair Mode (reverse - expected only) | -- | [ ] |
-| Command Action | [ ] | -- |
-| Multi Safety | [ ] | [ ] |
-| Infinite Block Prompt | [ ] | [ ] |
-| Infinite Block Command | [ ] | [ ] |
-| Created Mode | [ ] | [ ] |
diff --git a/.deepwork/jobs/test_job_flow/AGENTS.md b/.deepwork/jobs/test_job_flow/AGENTS.md
new file mode 100644
index 00000000..7feb4a29
--- /dev/null
+++ b/.deepwork/jobs/test_job_flow/AGENTS.md
@@ -0,0 +1,32 @@
+# Job Management
+
+This folder and its subfolders are managed using `deepwork_jobs` workflows.
+
+## Recommended Workflows
+
+- `deepwork_jobs/new_job` - Full lifecycle: define → implement → test → iterate
+- `deepwork_jobs/learn` - Improve instructions based on execution learnings
+- `deepwork_jobs/repair` - Clean up and migrate from prior DeepWork versions
+
+## Directory Structure
+
+```
+.
+├── AGENTS.md          # This file - project context and guidance
+├── job.yml            # Job specification (created by define step)
+├── steps/             # Step instruction files (created by implement step)
+│   └── *.md           # One file per step
+├── hooks/             # Custom validation scripts and prompts
+│   └── *.md|*.sh      # Hook files referenced in job.yml
+├── scripts/           # Reusable scripts and utilities created during job execution
+│   └── *.sh|*.py      # Helper scripts referenced in step instructions
+└── templates/         # Example file formats and templates
+    └── *.md|*.yml     # Templates referenced in step instructions
+```
+
+## Editing Guidelines
+
+1. **Use workflows** for structural changes (adding steps, modifying job.yml)
+2. **Direct edits** are fine for minor instruction tweaks
+3. **Run `deepwork_jobs/learn`** after executing job steps to capture improvements
+4. **Run `deepwork install`** after any changes to regenerate commands
diff --git a/.deepwork/jobs/test_job_flow/hooks/.gitkeep b/.deepwork/jobs/test_job_flow/hooks/.gitkeep
new file mode 100644
index 00000000..e69de29b
diff --git a/.deepwork/jobs/test_job_flow/job.yml b/.deepwork/jobs/test_job_flow/job.yml
new file mode 100644
index 00000000..46eee8e8
--- /dev/null
+++ b/.deepwork/jobs/test_job_flow/job.yml
@@ -0,0 +1,130 @@
+name: test_job_flow
+version: "1.0.1"
+summary: "End-to-end test of the DeepWork job creation workflow with friction analysis"
+description: |
+  A meta-workflow that tests the DeepWork job creation process itself. This job:
+
+  1. Creates a new job ("detailed_test_review") via a nested sub-agent workflow,
+     exercising the full `/deepwork new_job` creation pipeline
+  2. Reviews the sub-agent's transcript for completeness and documents any friction
+     points encountered during the creation process
+  3. Investigates the DeepWork system code to identify improvements that could
+     reduce the friction found in step 2
+
+  This is a diagnostic/improvement workflow for the DeepWork framework. The final
+  output is a set of actionable recommendations for reducing job creation friction.
+
+  The "detailed_test_review" job created in step 1 has two steps:
+  - Run all tests with coverage reporting, with per-file and per-step quality reviews
+  - Update the README with coverage numbers and an as-of date
+
+changelog:
+  - version: "1.0.1"
+    changes: "Updated create_test_review_job instructions: added explicit `required` field to all output examples to prevent schema validation failures. Added description fields to YAML example outputs."
+  - version: "1.0.0"
+    changes: "Initial job creation"
+
+workflows:
+  - name: run
+    summary: "Create a test job via sub-agent, review the process, and identify improvements"
+    steps:
+      - create_test_review_job
+      - review_creation_process
+      - identify_improvements
+
+steps:
+  - id: create_test_review_job
+    name: "Create Test Review Job via Sub-Agent"
+    description: |
+      Launch the `/deepwork new_job` workflow as a nested sub-agent to create a job
+      called `detailed_test_review`. The sub-agent should be given very prescriptive
+      instructions so it doesn't need to ask the user anything.
+
+      The detailed_test_review job should have:
+
+      **Step 1 - run_tests**: Run all tests with code coverage reporting enabled.
+        - Output `test_files` (type: files): all the test files that were run
+        - Output `coverage_report` (type: file): the code coverage report
+        - Review (for_each: test_files): Check that all tests in each file are on-topic
+          and relevant to what the file is testing
+        - Review (for_each: step): Look at the coverage numbers and confirm coverage
+          is over 60%
+
+      **Step 2 - update_readme**: Update the README with code coverage numbers.
+        - Add or update a line at the very end of the README with the coverage
+          percentage and an as-of date
+        - Output `readme` (type: file): the updated README file
+        - Input: coverage_report from run_tests step
+    instructions_file: steps/create_test_review_job.md
+    inputs: []
+    outputs:
+      job_yml:
+        type: file
+        description: "The job.yml file created by the sub-agent for detailed_test_review"
+        required: true
+    dependencies: []
+    reviews:
+      - run_each: job_yml
+        quality_criteria:
+          "Job Structure": "Does the job.yml define two steps (run_tests and update_readme) with correct dependencies?"
+          "Outputs Defined": "Does run_tests have both a test_files (type: files) output and a coverage_report (type: file) output?"
+          "Reviews Defined": "Does run_tests have a for_each review on test_files AND a for_each step review for coverage threshold?"
+          "README Step": "Does update_readme take coverage_report as input and produce a readme output?"
+
+  - id: review_creation_process
+    name: "Review Sub-Agent Transcript and Document Friction"
+    description: |
+      Review the transcript/output from the sub-agent that ran in step 1. Verify that
+      it appears to have run all workflow steps successfully (define, implement, etc.).
+
+      Create a friction report documenting anything that seemed high-friction during
+      the job creation process, such as:
+      - Errors the agent encountered and had to work around
+      - Confusing instructions or ambiguous guidance
+      - Steps that required multiple retries
+      - Unnecessary back-and-forth or wasted effort
+      - Any quality review failures and what caused them
+    instructions_file: steps/review_creation_process.md
+    inputs:
+      - file: job_yml
+        from_step: create_test_review_job
+    outputs:
+      friction_report:
+        type: file
+        description: "Report on friction points encountered during the job creation process"
+        required: true
+    dependencies:
+      - create_test_review_job
+    reviews:
+      - run_each: step
+        quality_criteria:
+          "Transcript Reviewed": "Does the friction report reference specific events from the sub-agent's transcript?"
+          "Actionable Observations": "Are the friction points described concretely enough that a developer could act on them?"
+
+  - id: identify_improvements
+    name: "Investigate Code and Propose Improvements"
+    description: |
+      Read the friction report from step 2, then investigate the DeepWork system code
+      (particularly the new_job workflow definition and related system code) to identify
+      concrete ways to reduce the friction documented.
+
+      Produce a recommendations report with specific, actionable improvement ideas for
+      the user to review and decide whether to implement.
+    instructions_file: steps/identify_improvements.md
+    inputs:
+      - file: friction_report
+        from_step: review_creation_process
+    outputs:
+      recommendations:
+        type: file
+        description: "Actionable recommendations for reducing job creation friction"
+        required: true
+    dependencies:
+      - review_creation_process
+    reviews:
+      - run_each: recommendations
+        additional_review_guidance: "Read the .deepwork/tmp/job_creation_friction.md file to verify recommendations address the documented friction points."
+        quality_criteria:
+          "Addresses Friction": "Does each recommendation clearly map to a friction point from the friction report?"
+          "Actionable": "Are recommendations specific enough to implement (pointing to files/code/workflow changes)?"
+          "Feasible": "Do the recommendations seem technically feasible given the DeepWork architecture?"
diff --git a/.deepwork/jobs/test_job_flow/scripts/.gitkeep b/.deepwork/jobs/test_job_flow/scripts/.gitkeep
new file mode 100644
index 00000000..e69de29b
diff --git a/.deepwork/jobs/test_job_flow/steps/create_test_review_job.md b/.deepwork/jobs/test_job_flow/steps/create_test_review_job.md
new file mode 100644
index 00000000..2c62b39b
--- /dev/null
+++ b/.deepwork/jobs/test_job_flow/steps/create_test_review_job.md
@@ -0,0 +1,123 @@
+# Create Test Review Job via Sub-Agent
+
+## Objective
+
+Launch the DeepWork `new_job` workflow as a nested sub-agent to create a job called `detailed_test_review`. This exercises the full job creation pipeline end-to-end.
+
+## Task
+
+Start the `deepwork_jobs/new_job` workflow as a nested workflow (using `start_workflow`), providing extremely prescriptive instructions so the sub-agent can complete the entire job creation without needing to ask the user any questions. Then follow through all the steps of that nested workflow until it completes.
+
+### Process
+
+1. **Start the nested workflow**
+   - Call `start_workflow` with `job_name: deepwork_jobs`, `workflow_name: new_job`, `instance_id: detailed_test_review`
+   - Use a goal that contains ALL the details below so the sub-agent has full context
+
+2. **Guide the nested workflow through the `define` step**
+   When the nested workflow starts on its `define` step, create the `detailed_test_review` job with these exact specifications:
+
+   **Job name**: `detailed_test_review`
+   **Summary**: "Run tests with coverage and update README with results"
+   **Description**: A two-step workflow that runs the project's test suite with code coverage enabled, reviews test quality and coverage thresholds, then updates the README with the coverage results.
+
+   **Step 1 - `run_tests`**:
+   - Name: "Run Tests with Coverage"
+   - Description: Run all project tests with code coverage reporting enabled. Collect the test files and the coverage report as outputs.
+   - No user inputs (it auto-detects tests)
+   - Outputs (note: every output MUST include `required: true` or `required: false`):
+     - `test_files` (type: `files`, required: true): All test files that were executed
+     - `coverage_report` (type: `file`, required: true): The code coverage report file
+   - Dependencies: none
+   - Reviews:
+     - `run_each: test_files` with quality criteria:
+       - "On-Topic Tests": "Are all tests in this file on-topic and relevant to the module or functionality being tested? Flag any tests that seem unrelated or misplaced."
+     - `run_each: step` with quality criteria:
+       - "Coverage Threshold": "Does the code coverage report show overall coverage above 60%? If not, what areas have low coverage?"
+
+   **Step 2 - `update_readme`**:
+   - Name: "Update README with Coverage"
+   - Description: Update the project README to include the code coverage percentage with an as-of date at the very end of the file.
+   - Inputs:
+     - `coverage_report` from step `run_tests`
+   - Outputs:
+     - `readme` (type: `file`, required: true): The updated README.md file
+   - Dependencies: `run_tests`
+   - Reviews:
+     - `run_each: readme` with quality criteria:
+       - "Coverage Line Present": "Does the README have a line at the very end showing the code coverage percentage?"
+       - "Date Included": "Does the coverage line include an as-of date?"
+
+3. **Follow through all nested workflow steps**
+   After `define`, the nested workflow will proceed to `implement` (creating step instruction files) and potentially `test` and `iterate`. Follow each step's instructions as they come.
+
+4. **Collect the output**
+   Once the nested workflow completes, the `detailed_test_review` job should exist at `.deepwork/jobs/detailed_test_review/job.yml`. This is the output for this step.
+
+## Output Format
+
+### job_yml
+
+The job.yml file created by the nested workflow at `.deepwork/jobs/detailed_test_review/job.yml`.
+
+**Expected structure**:
+```yaml
+name: detailed_test_review
+version: "1.0.0"
+summary: "Run tests with coverage and update README with results"
+description: |
+  A two-step workflow that runs the project's test suite with code coverage
+  enabled, reviews test quality and coverage thresholds, then updates the
+  README with the coverage results.
+
+steps:
+  - id: run_tests
+    name: "Run Tests with Coverage"
+    outputs:
+      test_files:
+        type: files
+        description: "All test files that were executed"
+        required: true
+      coverage_report:
+        type: file
+        description: "The code coverage report file"
+        required: true
+    reviews:
+      - run_each: test_files
+        quality_criteria:
+          "On-Topic Tests": "..."
+      - run_each: step
+        quality_criteria:
+          "Coverage Threshold": "..."
+
+  - id: update_readme
+    name: "Update README with Coverage"
+    inputs:
+      - file: coverage_report
+        from_step: run_tests
+    outputs:
+      readme:
+        type: file
+        description: "The updated README.md file"
+        required: true
+    reviews:
+      - run_each: readme
+        quality_criteria:
+          "Coverage Line Present": "..."
+          "Date Included": "..."
+```
+
+## Quality Criteria
+
+- The nested workflow ran to completion (all steps finished)
+- The `detailed_test_review` job.yml exists and is valid YAML
+- It defines exactly two steps: `run_tests` and `update_readme`
+- `run_tests` has both `test_files` (files) and `coverage_report` (file) outputs
+- `run_tests` has a for_each file review on `test_files` and a for_each step review for coverage
+- `update_readme` takes `coverage_report` as input from `run_tests`
+- `update_readme` produces a `readme` output
+- When all criteria are met, include `<promise>Quality Criteria Met</promise>` in your response
+
+## Context
+
+This step is the core exercise of the test_job_flow. By running the full job creation workflow as a nested sub-agent, we can observe the entire process end-to-end and identify any friction points. The transcript from this step will be reviewed in the next step.
diff --git a/.deepwork/jobs/test_job_flow/steps/identify_improvements.md b/.deepwork/jobs/test_job_flow/steps/identify_improvements.md
new file mode 100644
index 00000000..51ec39b0
--- /dev/null
+++ b/.deepwork/jobs/test_job_flow/steps/identify_improvements.md
@@ -0,0 +1,90 @@
+# Investigate Code and Propose Improvements
+
+## Objective
+
+Read the friction report from step 2, investigate the DeepWork system code to understand the root causes, and propose concrete improvements to reduce job creation friction.
+
+## Task
+
+Turn the observed friction points into actionable engineering recommendations by tracing each problem to its source in the codebase.
+
+### Process
+
+1. **Read the friction report**
+   - Read `.deepwork/tmp/job_creation_friction.md` carefully
+   - List each friction point and categorize by type (error, UX, missing feature, documentation gap, etc.)
+
+2. **Investigate the new_job workflow**
+   - Read the `new_job` workflow definition in `.deepwork/jobs/deepwork_jobs/job.yml`
+   - Read the step instruction files in `.deepwork/jobs/deepwork_jobs/steps/`
+   - For each friction point, trace it to the specific instruction, template, or workflow configuration that caused it
+
+3. **Investigate the system code**
+   - Look at the MCP server code in `src/deepwork/` — particularly the workflow execution, quality review, and step management code
+   - Check template files in `.deepwork/jobs/deepwork_jobs/templates/`
+   - Look at the `make_new_job.sh` script and any other tooling
+   - Identify code-level causes of friction (e.g., missing validation, unclear error messages, timeout issues)
+
+4. **Develop recommendations**
+   For each friction point, propose one or more concrete improvements:
+   - **What to change**: Specific file(s) and the nature of the change
+   - **Why it helps**: How this addresses the friction point
+   - **Effort estimate**: Small (< 1 hour), Medium (1-4 hours), Large (4+ hours)
+   - **Risk**: What could go wrong with this change
+
+5. **Prioritize recommendations**
+   - Rank by impact-to-effort ratio
+   - Group into "quick wins" vs "larger investments"
+   - Note any dependencies between recommendations
+
+## Output Format
+
+### recommendations
+
+A markdown file at `.deepwork/tmp/improvement_recommendations.md`.
+
+**Structure**:
+```markdown
+# DeepWork Job Creation Improvement Recommendations
+
+## Executive Summary
+[2-3 sentences on the biggest opportunities for improvement]
+
+## Quick Wins (Small effort, meaningful impact)
+
+### 1. [Recommendation title]
+- **Addresses friction point**: [reference to friction report item]
+- **What to change**: [specific file(s) and description of change]
+- **Why it helps**: [expected impact]
+- **Effort**: Small
+- **Risk**: [what could go wrong]
+
+## Medium Investments
+
+### 2. [Recommendation title]
+...
+
+## Larger Investments
+
+### 3. [Recommendation title]
+...
+
+## Not Recommended
+[Any ideas considered but rejected, and why]
+
+## Implementation Order
+[Suggested sequence for implementing the recommendations, noting dependencies]
+```
+
+## Quality Criteria
+
+- Every recommendation maps to a specific friction point from the friction report
+- Recommendations point to specific files and code paths (not vague suggestions)
+- Each recommendation includes effort and risk assessment
+- Recommendations are technically feasible given the DeepWork architecture
+- Prioritization is logical (quick wins first, high-impact items ranked higher)
+- When all criteria are met, include `<promise>Quality Criteria Met</promise>` in your response
+
+## Context
+
+This is the final step of the test_job_flow. Its output is a decision document for the user — they will review these recommendations and decide which ones to implement. The quality of this output determines whether the entire test_job_flow exercise produces actionable value. Be thorough but practical; the user wants recommendations they can act on, not a theoretical analysis.
diff --git a/.deepwork/jobs/test_job_flow/steps/review_creation_process.md b/.deepwork/jobs/test_job_flow/steps/review_creation_process.md
new file mode 100644
index 00000000..96c0d057
--- /dev/null
+++ b/.deepwork/jobs/test_job_flow/steps/review_creation_process.md
@@ -0,0 +1,91 @@
+# Review Sub-Agent Transcript and Document Friction
+
+## Objective
+
+Review the transcript/output from the sub-agent that ran in step 1 (create_test_review_job), verify it completed successfully, and document any friction points encountered during the job creation process.
+
+## Task
+
+Analyze the prior step's execution to understand how the job creation process went, and produce a friction report that will inform future improvements to the DeepWork framework.
+
+### Process
+
+1. **Review the transcript**
+   - Look through the conversation history / transcript from the prior step's sub-agent
+   - Note each workflow step that was executed (define, implement, test, iterate)
+   - Track whether each step completed on the first try or required retries
+
+2. **Verify successful completion**
+   - Confirm the `detailed_test_review` job.yml was created at `.deepwork/jobs/detailed_test_review/job.yml`
+   - Verify it has the expected structure (2 steps, correct outputs, reviews)
+   - Check that step instruction files exist in `.deepwork/jobs/detailed_test_review/steps/`
+   - Note any deviations from the original specification
+
+3. **Identify friction points**
+   Look for any of the following in the transcript:
+   - **Errors**: Any errors the agent hit (MCP timeouts, validation failures, file not found, etc.)
+   - **Workarounds**: Times the agent had to work around a problem rather than solve it directly
+   - **Retries**: Steps that failed quality review and needed rework
+   - **Confusion**: Places where instructions were ambiguous or the agent seemed uncertain
+   - **Unnecessary steps**: Actions that seemed redundant or could have been automated
+   - **Slow paths**: Places where a faster approach existed but wasn't obvious
+   - **Missing guidance**: Situations where the agent lacked information it needed
+
+4. **Clean up the created job**
+   - Delete the entire job folder that was created by the sub-agent in step 1 (e.g., `rm -rf .deepwork/jobs/detailed_test_review/`)
+   - This job was only created to exercise the creation pipeline — it should not persist after the test
+
+5. **Create the friction report**
+   - Create the `.deepwork/tmp/` directory if it doesn't exist
+   - Write `.deepwork/tmp/job_creation_friction.md` with findings
+
+## Output Format
+
+### friction_report
+
+A markdown file at `.deepwork/tmp/job_creation_friction.md`.
+
+**Structure**:
+```markdown
+# Job Creation Friction Report
+
+## Summary
+[1-2 paragraph overview of how the job creation process went]
+
+## Completion Status
+- [ ] Define step: [passed/failed/retried N times]
+- [ ] Implement step: [passed/failed/retried N times]
+- [ ] Test step: [passed/failed/retried N times/skipped]
+- [ ] Iterate step: [passed/failed/retried N times/skipped]
+
+## Friction Points
+
+### 1. [Short title of friction point]
+- **Step**: [which workflow step this occurred in]
+- **What happened**: [description of what went wrong or was difficult]
+- **Impact**: [how much time/effort was wasted]
+- **Workaround used**: [what the agent did to get past it, if applicable]
+- **Potential fix**: [initial thoughts on how this could be improved]
+
+### 2. [Next friction point]
+...
+
+## Things That Worked Well
+[Note anything that went smoothly or was particularly well-designed]
+
+## Overall Assessment
+[Was the process smooth enough for production use? What's the biggest single improvement that could be made?]
+```
+
+## Quality Criteria
+
+- The friction report references specific events from the sub-agent's transcript (not vague generalities)
+- Each friction point is described concretely enough that a developer could reproduce and fix it
+- The completion status section accurately reflects what happened
+- Both problems AND successes are documented (balanced view)
+- The overall assessment provides a clear priority for improvement
+- When all criteria are met, include `<promise>Quality Criteria Met</promise>` in your response
+
+## Context
+
+This step bridges observation and action. The friction points documented here will be the input to step 3, where we investigate the actual code to find improvements. The more specific and concrete the friction descriptions, the more targeted the improvements can be.
diff --git a/.deepwork/jobs/test_job_flow/templates/.gitkeep b/.deepwork/jobs/test_job_flow/templates/.gitkeep
new file mode 100644
index 00000000..e69de29b
diff --git a/.deepwork/jobs/update/job.yml b/.deepwork/jobs/update/job.yml
deleted file mode 100644
index 98715431..00000000
--- a/.deepwork/jobs/update/job.yml
+++ /dev/null
@@ -1,47 +0,0 @@
-name: update
-version: "1.3.0"
-summary: "Updates DeepWork standard jobs in src/ and syncs to installed locations. Use when modifying deepwork_jobs or deepwork_rules."
-description: |
-  A workflow for maintaining standard jobs bundled with DeepWork. Standard jobs
-  (like `deepwork_jobs` and `deepwork_rules`) are source-controlled in
-  `src/deepwork/standard_jobs/` and must be edited there—never in `.deepwork/jobs/`
-  or `.claude/commands/` directly.
-
-  This job guides you through:
-  1. Identifying which standard job(s) to update from conversation context
-  2. Making changes in the correct source location (`src/deepwork/standard_jobs/[job_name]/`)
-  3. Running `deepwork install` to propagate changes to `.deepwork/` and command directories
-  4. Verifying the sync completed successfully
-
-  Use this job whenever you need to modify job.yml files, step instructions, or hooks
-  for any standard job in the DeepWork repository.
-
-changelog:
-  - version: "1.0.0"
-    changes: "Initial job creation"
-  - version: "1.1.0"
-    changes: "Removed sync_verification.md output requirement"
-  - version: "1.2.0"
-    changes: "Added nominal output for doc spec compliance (files_synced)"
-  - version: "1.3.0"
-    changes: "Improved skill descriptions with third-person voice and 'Use when...' triggers for better discoverability"
-
-steps:
-  - id: job
-    name: "Update Standard Job"
-    description: "Edits standard job source files in src/ and runs deepwork install to sync changes. Use when updating job.yml or step instructions."
-    instructions_file: steps/job.md
-    inputs:
-      - name: job_context
-        description: "Determine from conversation context which standard job(s) to update and what changes are needed"
-    outputs:
-      - files_synced  # implicit state: source files synced to installed locations
-    dependencies: []
-    stop_hooks:
-      - prompt: |
-          Verify the update process completed successfully:
-          1. Changes were made in src/deepwork/standard_jobs/[job_name]/ (NOT in .deepwork/jobs/)
-          2. `deepwork install --platform claude` was run
-          3. Files in .deepwork/jobs/ match the source files
-          4. Command files in .claude/commands/ were regenerated
-          If ALL criteria are met, include `<promise>✓ Quality Criteria Met</promise>`.
diff --git a/.deepwork/jobs/update/steps/job.md b/.deepwork/jobs/update/steps/job.md
deleted file mode 100644
index b226b4f6..00000000
--- a/.deepwork/jobs/update/steps/job.md
+++ /dev/null
@@ -1,73 +0,0 @@
-# Update Standard Job
-
-## Objective
-
-Edit standard job source files in `src/deepwork/standard_jobs/` and sync changes to installed locations.
-
-## Task
-
-When modifying a standard job in the DeepWork repository, this step ensures changes are made in the correct location and properly propagated.
-
-### Important: Source of Truth
-
-Standard jobs exist in THREE locations, but only ONE is the source of truth:
-
-| Location | Purpose | Editable? |
-|----------|---------|-----------|
-| `src/deepwork/standard_jobs/[job]/` | **Source of truth** | **YES** |
-| `.deepwork/jobs/[job]/` | Installed copy | NO - overwritten by install |
-| `.claude/commands/[job].[step].md` | Generated commands | NO - regenerated by sync |
-
-**NEVER edit files in `.deepwork/jobs/` or `.claude/commands/` for standard jobs!**
-
-### Process
-
-#### 1. Identify the Standard Job to Update
-
-From conversation context, determine:
-- Which standard job needs updating (e.g., `deepwork_jobs`, `deepwork_rules`)
-- What changes are needed (job.yml, step instructions, hooks, etc.)
-
-Current standard jobs:
-```bash
-ls src/deepwork/standard_jobs/
-```
-
-#### 2. Make Changes in Source Location
-
-```
-src/deepwork/standard_jobs/[job_name]/
-├── job.yml              # Job definition
-├── steps/               # Step instruction files
-├── hooks/               # Hook scripts
-└── templates/           # Templates
-```
-
-#### 3. Run DeepWork Install
-
-```bash
-deepwork install --platform claude
-```
-
-For Gemini: `deepwork install --platform gemini`
-
-#### 4. Verify the Sync
-
-```bash
-# Verify job.yml
-diff src/deepwork/standard_jobs/[job_name]/job.yml .deepwork/jobs/[job_name]/job.yml
-
-# Verify step files
-diff -r src/deepwork/standard_jobs/[job_name]/steps/ .deepwork/jobs/[job_name]/steps/
-
-# Check commands regenerated
-ls -la .claude/commands/[job_name].*.md
-```
-
-## Quality Criteria
-
-- Changes made ONLY in `src/deepwork/standard_jobs/[job_name]/`
-- `deepwork install --platform claude` executed successfully
-- Files in `.deepwork/jobs/` match source
-- Command files regenerated
-- When all criteria are met, include `<promise>✓ Quality Criteria Met</promise>`
diff --git a/.deepwork/rules/architecture-documentation-accuracy.md b/.deepwork/rules/architecture-documentation-accuracy.md
deleted file mode 100644
index 91798109..00000000
--- a/.deepwork/rules/architecture-documentation-accuracy.md
+++ /dev/null
@@ -1,11 +0,0 @@
----
-name: Architecture Documentation Accuracy
-trigger: src/**/*
-safety: doc/architecture.md
-compare_to: base
----
-Source code in src/ has been modified. Please review doc/architecture.md for accuracy:
-1. Verify the documented architecture matches the current implementation
-2. Check that file paths and directory structures are still correct
-3. Ensure component descriptions reflect actual behavior
-4. Update any diagrams or flows that may have changed
diff --git a/.deepwork/rules/manual-test-command-action.md b/.deepwork/rules/manual-test-command-action.md
deleted file mode 100644
index 966ab2de..00000000
--- a/.deepwork/rules/manual-test-command-action.md
+++ /dev/null
@@ -1,19 +0,0 @@
----
-name: "Manual Test: Command Action"
-trigger: manual_tests/test_command_action/test_command_action.txt
-action:
-  command: echo "$(date '+%Y-%m-%d %H:%M:%S') - Command triggered by edit to {file}" >> manual_tests/test_command_action/test_command_action_log.txt
-  run_for: each_match
-compare_to: prompt
----
-
-# Manual Test: Command Action
-
-This rule automatically appends a timestamped log entry when the
-test file is edited. No agent prompt is shown - the command runs
-automatically.
-
-## This tests:
-
-The command action feature where rules can execute shell commands
-instead of prompting the agent. The command should be idempotent.
diff --git a/.deepwork/rules/manual-test-created-mode.md b/.deepwork/rules/manual-test-created-mode.md
deleted file mode 100644
index abb6108d..00000000
--- a/.deepwork/rules/manual-test-created-mode.md
+++ /dev/null
@@ -1,22 +0,0 @@
----
-name: "Manual Test: Created Mode"
-created: manual_tests/test_created_mode/*.yml
-compare_to: prompt
----
-
-# Manual Test: Created Mode (File Creation Trigger)
-
-A new test file was created in the created mode test directory!
-
-**Created:** `{created_files}`
-
-## What to do:
-
-1. Verify the created mode detection is working correctly
-2. Acknowledge with `<promise>Manual Test: Created Mode</promise>`
-
-## This tests:
-
-The "created" detection mode where rules only fire for newly created files,
-not for modifications to existing files. This is useful for enforcing standards
-on new code specifically.
diff --git a/.deepwork/rules/manual-test-infinite-block-command.md b/.deepwork/rules/manual-test-infinite-block-command.md
deleted file mode 100644
index 8f8b24b4..00000000
--- a/.deepwork/rules/manual-test-infinite-block-command.md
+++ /dev/null
@@ -1,41 +0,0 @@
----
-name: "Manual Test: Infinite Block Command"
-trigger: manual_tests/test_infinite_block_command/test_infinite_block_command.py
-action:
-  command: "false"
-  run_for: each_match
-compare_to: prompt
----
-
-# Manual Test: Infinite Block Command (Promise Required)
-
-This rule runs a command that ALWAYS FAILS (`false` returns exit code 1).
-
-## Why this blocks
-
-The command action always fails, creating an infinite block. The only way
-to proceed should be to provide a promise acknowledging that you understand
-the restriction.
-
-## Expected behavior
-
-If promises work correctly for command actions:
-- Without promise: Command runs, fails, blocks
-- With promise: Command is SKIPPED entirely, allows
-
-If there's a bug:
-- The command will run and fail even when a promise is provided
-
-## What to do
-
-You MUST include the following promise tag in your response:
-
-```
-<promise>Manual Test: Infinite Block Command</promise>
-```
-
-## This tests
-
-Whether the promise mechanism works for COMMAND-type rules. If a rule is
-promised, the command should not run at all - the rule should be skipped
-during evaluation.
diff --git a/.deepwork/rules/manual-test-infinite-block-prompt.md b/.deepwork/rules/manual-test-infinite-block-prompt.md
deleted file mode 100644
index 67c97414..00000000
--- a/.deepwork/rules/manual-test-infinite-block-prompt.md
+++ /dev/null
@@ -1,34 +0,0 @@
----
-name: "Manual Test: Infinite Block Prompt"
-trigger: manual_tests/test_infinite_block_prompt/test_infinite_block_prompt.py
-compare_to: prompt
----
-
-# Manual Test: Infinite Block Prompt (Promise Required)
-
-You edited `{trigger_files}` which triggers an infinite block.
-
-## Why this blocks
-
-This rule has NO safety file option and uses a PROMPT action. The only way
-to proceed is to provide a promise acknowledging that you understand the
-restriction.
-
-## What to do
-
-You MUST include the following promise tag in your response:
-
-```
-<promise>Manual Test: Infinite Block Prompt</promise>
-```
-
-This simulates scenarios where:
-- An operation requires explicit acknowledgment before proceeding
-- There is no alternative action that can suppress the rule
-- The agent must demonstrate understanding of the constraint
-
-## This tests
-
-The promise mechanism for PROMPT-type rules that cannot be satisfied by
-editing additional files. This is useful for enforcing policies where
-acknowledgment is the only valid response.
diff --git a/.deepwork/rules/manual-test-multi-safety.md b/.deepwork/rules/manual-test-multi-safety.md
deleted file mode 100644
index 4ce978cb..00000000
--- a/.deepwork/rules/manual-test-multi-safety.md
+++ /dev/null
@@ -1,25 +0,0 @@
----
-name: "Manual Test: Multi Safety"
-trigger: manual_tests/test_multi_safety/test_multi_safety.py
-safety:
-  - manual_tests/test_multi_safety/test_multi_safety_changelog.md
-  - manual_tests/test_multi_safety/test_multi_safety_version.txt
-compare_to: prompt
----
-
-# Manual Test: Multiple Safety Patterns
-
-You changed the source file without updating version info!
-
-**Changed:** `{trigger_files}`
-
-## What to do:
-
-1. Update the changelog: `manual_tests/test_multi_safety/test_multi_safety_changelog.md`
-2. And/or update the version: `manual_tests/test_multi_safety/test_multi_safety_version.txt`
-3. Or acknowledge with `<promise>Manual Test: Multi Safety</promise>`
-
-## This tests:
-
-Trigger/safety mode with MULTIPLE safety patterns. The rule is
-suppressed if ANY of the safety files are also edited.
diff --git a/.deepwork/rules/manual-test-pair-mode.md b/.deepwork/rules/manual-test-pair-mode.md
deleted file mode 100644
index 9c2379bf..00000000
--- a/.deepwork/rules/manual-test-pair-mode.md
+++ /dev/null
@@ -1,26 +0,0 @@
----
-name: "Manual Test: Pair Mode"
-pair:
-  trigger: manual_tests/test_pair_mode/test_pair_mode_trigger.py
-  expects: manual_tests/test_pair_mode/test_pair_mode_expected.md
-compare_to: prompt
----
-
-# Manual Test: Pair Mode (Directional Correspondence)
-
-API code changed without documentation update!
-
-**Changed:** `{trigger_files}`
-**Expected:** `{expected_files}`
-
-## What to do:
-
-1. Update the API documentation in `test_pair_mode_expected.md`
-2. Or acknowledge with `<promise>Manual Test: Pair Mode</promise>`
-
-## This tests:
-
-The "pair" detection mode where there's a ONE-WAY relationship.
-When the trigger file changes, the expected file must also change.
-BUT the expected file can change independently (docs can be updated
-without requiring code changes).
diff --git a/.deepwork/rules/manual-test-set-mode.md b/.deepwork/rules/manual-test-set-mode.md
deleted file mode 100644
index abe504ec..00000000
--- a/.deepwork/rules/manual-test-set-mode.md
+++ /dev/null
@@ -1,26 +0,0 @@
----
-name: "Manual Test: Set Mode"
-set:
-  - manual_tests/test_set_mode/test_set_mode_source.py
-  - manual_tests/test_set_mode/test_set_mode_test.py
-compare_to: prompt
----
-
-# Manual Test: Set Mode (Bidirectional Correspondence)
-
-Source and test files must change together!
-
-**Changed:** `{trigger_files}`
-**Missing:** `{expected_files}`
-
-## What to do:
-
-1. If you changed the source file, update the corresponding test file
-2. If you changed the test file, ensure the source file reflects those changes
-3. Or acknowledge with `<promise>Manual Test: Set Mode</promise>`
-
-## This tests:
-
-The "set" detection mode where files in a set must ALL change together.
-This is bidirectional - the rule fires regardless of which file in the set
-was edited first.
diff --git a/.deepwork/rules/manual-test-trigger-safety.md b/.deepwork/rules/manual-test-trigger-safety.md
deleted file mode 100644
index b144a2a0..00000000
--- a/.deepwork/rules/manual-test-trigger-safety.md
+++ /dev/null
@@ -1,21 +0,0 @@
----
-name: "Manual Test: Trigger Safety"
-trigger: manual_tests/test_trigger_safety_mode/test_trigger_safety_mode.py
-safety: manual_tests/test_trigger_safety_mode/test_trigger_safety_mode_doc.md
-compare_to: prompt
----
-
-# Manual Test: Trigger/Safety Mode
-
-You edited `{trigger_files}` without updating the documentation.
-
-## What to do:
-
-1. Review the changes in the source file
-2. Update `manual_tests/test_trigger_safety_mode/test_trigger_safety_mode_doc.md` to reflect changes
-3. Or acknowledge this is intentional with `<promise>Manual Test: Trigger Safety</promise>`
-
-## This tests:
-
-The basic trigger/safety detection mode where editing the trigger file
-causes the rule to fire UNLESS the safety file is also edited.
diff --git a/.deepwork/rules/new-standard-job-warning.md.disabled b/.deepwork/rules/new-standard-job-warning.md.disabled
deleted file mode 100644
index e02495b4..00000000
--- a/.deepwork/rules/new-standard-job-warning.md.disabled
+++ /dev/null
@@ -1,16 +0,0 @@
----
-name: New Standard Job Warning
-created: src/deepwork/standard_jobs/*/job.yml
-compare_to: prompt
----
-A new standard job is being created. Standard jobs are bundled with DeepWork and will be installed in any project that uses DeepWork.
-
-**Before proceeding, verify this is intentional:**
-
-- **Standard jobs** (`src/deepwork/standard_jobs/`) - Ship with DeepWork, auto-installed in all projects that use DeepWork
-- **Repository jobs** (`.deepwork/jobs/`) - Specific to a single repository
-- **Library jobs** - Installed from external packages
-
-Unless the user **explicitly requested** creating a new standard job (not just "a job" or "a new job"), this should likely be a **repository job** in `.deepwork/jobs/` instead.
-
-If uncertain, ask the user: "Should this be a standard job (shipped with DeepWork) or a repository-specific job?"
diff --git a/.deepwork/rules/readme-accuracy.md b/.deepwork/rules/readme-accuracy.md
deleted file mode 100644
index 9e75c596..00000000
--- a/.deepwork/rules/readme-accuracy.md
+++ /dev/null
@@ -1,11 +0,0 @@
----
-name: README Accuracy
-trigger: src/**/*
-safety: README.md
-compare_to: base
----
-Source code in src/ has been modified. Please review README.md for accuracy:
-1. Verify project overview still reflects current functionality
-2. Check that usage examples are still correct
-3. Ensure installation/setup instructions remain valid
-4. Update any sections that reference changed code
diff --git a/.deepwork/rules/skill-template-best-practices.md b/.deepwork/rules/skill-template-best-practices.md
deleted file mode 100644
index ff33ecfd..00000000
--- a/.deepwork/rules/skill-template-best-practices.md
+++ /dev/null
@@ -1,46 +0,0 @@
----
-name: Skill Template Best Practices
-trigger: src/deepwork/templates/**/skill-job*.jinja
-compare_to: prompt
----
-Skill template files are being modified. Ensure the generated skills follow these best practices:
-
-## Description Guidelines
-
-The description appears in skill search results and helps users find the right skill. Keep it search-friendly and scannable.
-
-1. **Be specific** - Name exact capabilities/actions the skill performs
-2. **Keep concise** - One sentence, max ~100 chars; describes WHAT it does, not HOW
-3. **Avoid vagueness** - "Extract text from PDFs, fill forms" is good; "Helps with documents" is bad
-4. **Avoid meta-language** - Don't include "Trigger:", "Keywords:", or similar prefixes. Let the description itself be searchable.
-
-## Instruction Writing
-
-1. **Keep focused** - Core instructions should be under 500 lines; use supporting files for details
-2. **Use progressive disclosure** - Essential info in main content, detailed reference in linked files
-3. **Be explicit** - Provide clear, step-by-step guidance rather than relying on inference
-4. **Structure clearly** - Use headers, numbered lists for sequential steps, bullets for options
-
-## Prompt Structure
-
-1. **Specificity first** - Detailed directions upfront prevent course corrections later
-2. **Plan before action** - Ask agent to analyze/plan before implementing
-3. **Reference concrete files** - Use specific paths, not general descriptions
-4. **Include context** - Mention edge cases, preferred patterns, and expected outcomes
-
-## Quality Criteria
-
-1. **Make measurable** - Criteria should be verifiable, not subjective
-2. **Focus on outcomes** - What the output should achieve, not process steps
-3. **Keep actionable** - Agent should be able to self-evaluate against criteria
-
-## Platform Considerations
-
-- **Claude**: Supports hooks for automated validation; use Skill tool for step invocation
-- **Gemini**: No hook support; instructions must guide manual verification
-
-## Reference Documentation
-
-When unsure about best practices, consult:
-- https://code.claude.com/docs/en/skills - Official skills documentation
-- https://www.anthropic.com/engineering/claude-code-best-practices - Prompting best practices
diff --git a/.deepwork/rules/standard-jobs-source-of-truth.md b/.deepwork/rules/standard-jobs-source-of-truth.md
deleted file mode 100644
index 2d0092c9..00000000
--- a/.deepwork/rules/standard-jobs-source-of-truth.md
+++ /dev/null
@@ -1,25 +0,0 @@
----
-name: Standard Jobs Source of Truth
-trigger:
-  - .deepwork/jobs/deepwork_jobs/**/*
-  - .deepwork/jobs/deepwork_rules/**/*
-safety:
-  - src/deepwork/standard_jobs/deepwork_jobs/**/*
-  - src/deepwork/standard_jobs/deepwork_rules/**/*
-compare_to: base
----
-You modified files in `.deepwork/jobs/deepwork_jobs/` or `.deepwork/jobs/deepwork_rules/`.
-
-**These are installed copies, NOT the source of truth!**
-
-Standard jobs (deepwork_jobs, deepwork_rules) must be edited in their source location:
-- Source: `src/deepwork/standard_jobs/[job_name]/`
-- Installed copy: `.deepwork/jobs/[job_name]/` (DO NOT edit directly)
-
-**Required action:**
-1. Revert your changes to `.deepwork/jobs/deepwork_*/`
-2. Make the same changes in `src/deepwork/standard_jobs/[job_name]/`
-3. Run `deepwork install --platform claude` to sync changes
-4. Verify the changes propagated correctly
-
-See CLAUDE.md section "CRITICAL: Editing Standard Jobs" for details.
diff --git a/.deepwork/rules/uv-lock-sync.md b/.deepwork/rules/uv-lock-sync.md
deleted file mode 100644
index 75cca269..00000000
--- a/.deepwork/rules/uv-lock-sync.md
+++ /dev/null
@@ -1,15 +0,0 @@
----
-name: UV Lock Sync
-trigger: pyproject.toml
-action:
-  command: uv sync
-compare_to: prompt
----
-
-# UV Lock Sync
-
-Automatically runs `uv sync` when `pyproject.toml` is modified to keep
-`uv.lock` in sync with dependency changes.
-
-This ensures the lock file is always up-to-date when dependencies are
-added, removed, or updated in pyproject.toml.
diff --git a/.deepwork/rules/version-and-changelog-update.md b/.deepwork/rules/version-and-changelog-update.md
deleted file mode 100644
index ac617f8e..00000000
--- a/.deepwork/rules/version-and-changelog-update.md
+++ /dev/null
@@ -1,29 +0,0 @@
----
-name: Version and Changelog Update
-trigger: src/**/*
-safety:
-  - pyproject.toml
-  - CHANGELOG.md
-compare_to: base
----
-Source code in src/ has been modified. **You MUST evaluate whether version and changelog updates are needed.**
-
-**Evaluate the changes:**
-1. Is this a bug fix, new feature, breaking change, or internal refactor?
-2. Does this change affect the public API or user-facing behavior?
-3. Would users need to know about this change when upgrading?
-
-**If version update is needed:**
-1. Update the `version` field in `pyproject.toml` following semantic versioning:
-   - PATCH (0.1.x): Bug fixes, minor internal changes
-   - MINOR (0.x.0): New features, non-breaking changes
-   - MAJOR (x.0.0): Breaking changes
-2. Add an entry to `CHANGELOG.md` under an appropriate version header:
-   - Use categories: Added, Changed, Fixed, Removed, Deprecated, Security
-   - Include a clear, user-facing description of what changed
-   - Follow the Keep a Changelog format
-
-**If NO version update is needed** (e.g., tests only, comments, internal refactoring with no behavior change):
-- Explicitly state why no version bump is required
-
-**This rule requires explicit action** - either update both files or justify why no update is needed.
diff --git a/.deepwork/schemas/job.schema.json b/.deepwork/schemas/job.schema.json
new file mode 100644
index 00000000..4f2227cc
--- /dev/null
+++ b/.deepwork/schemas/job.schema.json
@@ -0,0 +1,404 @@
+{
+  "$schema": "http://json-schema.org/draft-07/schema#",
+  "$id": "https://deepwork.dev/schemas/job.schema.json",
+  "title": "DeepWork Job Definition",
+  "description": "Schema for DeepWork job.yml files. Jobs are multi-step workflows executed by AI agents.",
+  "type": "object",
+  "required": [
+    "name",
+    "version",
+    "summary",
+    "steps"
+  ],
+  "additionalProperties": false,
+  "properties": {
+    "name": {
+      "type": "string",
+      "pattern": "^[a-z][a-z0-9_]*$",
+      "description": "Job name (lowercase letters, numbers, underscores, must start with letter). Example: 'competitive_research'"
+    },
+    "version": {
+      "type": "string",
+      "pattern": "^\\d+\\.\\d+\\.\\d+$",
+      "description": "Semantic version (e.g., '1.0.0')"
+    },
+    "summary": {
+      "type": "string",
+      "minLength": 1,
+      "maxLength": 200,
+      "description": "Brief one-line summary of what this job accomplishes. Used in skill descriptions."
+    },
+    "description": {
+      "type": "string",
+      "minLength": 1,
+      "description": "Detailed multi-line description of the job's purpose, process, and goals"
+    },
+    "workflows": {
+      "type": "array",
+      "description": "Named workflows that group steps into multi-step sequences. Workflows define execution order.",
+      "items": {
+        "$ref": "#/$defs/workflow"
+      }
+    },
+    "changelog": {
+      "type": "array",
+      "description": "Version history documenting changes to the job definition",
+      "items": {
+        "$ref": "#/$defs/changelogEntry"
+      }
+    },
+    "steps": {
+      "type": "array",
+      "minItems": 1,
+      "description": "List of steps in the job. Each step becomes a skill/command.",
+      "items": {
+        "$ref": "#/$defs/step"
+      }
+    }
+  },
+  "$defs": {
+    "stepId": {
+      "type": "string",
+      "pattern": "^[a-z][a-z0-9_]*$",
+      "description": "Step identifier (lowercase letters, numbers, underscores, must start with letter)"
+    },
+    "workflow": {
+      "type": "object",
+      "required": [
+        "name",
+        "summary",
+        "steps"
+      ],
+      "additionalProperties": false,
+      "description": "A named workflow grouping steps into a sequence",
+      "properties": {
+        "name": {
+          "type": "string",
+          "pattern": "^[a-z][a-z0-9_]*$",
+          "description": "Workflow name (lowercase letters, numbers, underscores)"
+        },
+        "summary": {
+          "type": "string",
+          "minLength": 1,
+          "maxLength": 200,
+          "description": "Brief one-line summary of what this workflow accomplishes"
+        },
+        "steps": {
+          "type": "array",
+          "minItems": 1,
+          "description": "Ordered list of step entries. Each entry is either a step ID (string) or an array of step IDs for concurrent execution.",
+          "items": {
+            "$ref": "#/$defs/workflowStepEntry"
+          }
+        }
+      }
+    },
+    "workflowStepEntry": {
+      "oneOf": [
+        {
+          "$ref": "#/$defs/stepId"
+        },
+        {
+          "type": "array",
+          "minItems": 1,
+          "description": "Array of step IDs that can be executed concurrently",
+          "items": {
+            "$ref": "#/$defs/stepId"
+          }
+        }
+      ]
+    },
+    "changelogEntry": {
+      "type": "object",
+      "required": [
+        "version",
+        "changes"
+      ],
+      "additionalProperties": false,
+      "properties": {
+        "version": {
+          "type": "string",
+          "pattern": "^\\d+\\.\\d+\\.\\d+$",
+          "description": "Version number for this change"
+        },
+        "changes": {
+          "type": "string",
+          "minLength": 1,
+          "description": "Description of changes made in this version"
+        }
+      }
+    },
+    "step": {
+      "type": "object",
+      "required": [
+        "id",
+        "name",
+        "description",
+        "instructions_file",
+        "outputs",
+        "reviews"
+      ],
+      "additionalProperties": false,
+      "description": "A single Step in a job, representing one material unit of work with evaluatable outputs",
+      "properties": {
+        "id": {
+          "$ref": "#/$defs/stepId",
+          "description": "Unique step identifier within this job"
+        },
+        "name": {
+          "type": "string",
+          "minLength": 1,
+          "description": "Human-readable display name for the step"
+        },
+        "description": {
+          "type": "string",
+          "minLength": 1,
+          "description": "Description of what this step does. Used in skill descriptions."
+        },
+        "instructions_file": {
+          "type": "string",
+          "minLength": 1,
+          "description": "Path to instructions markdown file (relative to job directory). Example: 'steps/research.md'"
+        },
+        "inputs": {
+          "type": "array",
+          "description": "List of inputs required by this step (user parameters or files from previous steps)",
+          "items": {
+            "$ref": "#/$defs/stepInput"
+          }
+        },
+        "outputs": {
+          "type": "object",
+          "description": "Named outputs produced by this step. Keys are output identifiers, values describe type and purpose. May be empty for cleanup or validation steps.",
+          "additionalProperties": {
+            "$ref": "#/$defs/stepOutput"
+          }
+        },
+        "dependencies": {
+          "type": "array",
+          "description": "List of step IDs this step depends on. Dependencies must complete before this step runs.",
+          "items": {
+            "type": "string"
+          },
+          "default": []
+        },
+        "hooks": {
+          "$ref": "#/$defs/hooks",
+          "description": "Lifecycle hooks for validation and actions at different points in step execution"
+        },
+        "stop_hooks": {
+          "type": "array",
+          "description": "DEPRECATED: Use hooks.after_agent instead. Legacy stop hooks for quality validation loops.",
+          "items": {
+            "$ref": "#/$defs/hookAction"
+          }
+        },
+        "exposed": {
+          "type": "boolean",
+          "description": "If true, step is user-invocable in menus/commands. If false, step is hidden (only reachable via workflows or dependencies). Default: false",
+          "default": false
+        },
+        "hidden": {
+          "type": "boolean",
+          "description": "If true, step is hidden from menus. Alias for exposed: false. Default: false",
+          "default": false
+        },
+        "reviews": {
+          "type": "array",
+          "description": "Quality reviews to run when step completes. Can be empty.",
+          "items": {
+            "$ref": "#/$defs/review"
+          }
+        },
+        "agent": {
+          "type": "string",
+          "minLength": 1,
+          "description": "Agent type for this step (e.g., 'general-purpose'). When set, the skill uses context forking and delegates to the specified agent type."
+        }
+      }
+    },
+    "stepInput": {
+      "oneOf": [
+        {
+          "$ref": "#/$defs/userParameterInput"
+        },
+        {
+          "$ref": "#/$defs/fileInput"
+        }
+      ]
+    },
+    "userParameterInput": {
+      "type": "object",
+      "required": [
+        "name",
+        "description"
+      ],
+      "additionalProperties": false,
+      "description": "A user-provided parameter input that will be requested at runtime",
+      "properties": {
+        "name": {
+          "type": "string",
+          "minLength": 1,
+          "description": "Parameter name (used as variable name)"
+        },
+        "description": {
+          "type": "string",
+          "minLength": 1,
+          "description": "Description shown to user when requesting this input"
+        }
+      }
+    },
+    "fileInput": {
+      "type": "object",
+      "required": [
+        "file",
+        "from_step"
+      ],
+      "additionalProperties": false,
+      "description": "A file input from a previous step's output",
+      "properties": {
+        "file": {
+          "type": "string",
+          "minLength": 1,
+          "description": "File name to consume from the source step's outputs"
+        },
+        "from_step": {
+          "type": "string",
+          "minLength": 1,
+          "description": "Step ID that produces this file. Must be in the dependencies list."
+        }
+      }
+    },
+    "stepOutput": {
+      "type": "object",
+      "required": [
+        "type",
+        "description",
+        "required"
+      ],
+      "additionalProperties": false,
+      "description": "Output specification with type information indicating single file or multiple files",
+      "properties": {
+        "type": {
+          "type": "string",
+          "enum": [
+            "file",
+            "files"
+          ],
+          "description": "Whether this output is a single file ('file') or multiple files ('files')"
+        },
+        "description": {
+          "type": "string",
+          "minLength": 1,
+          "description": "Description of what this output contains"
+        },
+        "required": {
+          "type": "boolean",
+          "description": "Whether this output must be provided when calling finished_step. If false, the output is optional and can be omitted."
+        }
+      }
+    },
+    "hooks": {
+      "type": "object",
+      "additionalProperties": false,
+      "description": "Lifecycle hooks triggered at different points in step execution",
+      "properties": {
+        "after_agent": {
+          "type": "array",
+          "description": "Hooks triggered after the agent finishes. Used for quality validation loops.",
+          "items": {
+            "$ref": "#/$defs/hookAction"
+          }
+        },
+        "before_tool": {
+          "type": "array",
+          "description": "Hooks triggered before a tool is used. Used for pre-action checks.",
+          "items": {
+            "$ref": "#/$defs/hookAction"
+          }
+        },
+        "before_prompt": {
+          "type": "array",
+          "description": "Hooks triggered when user submits a prompt. Used for input validation.",
+          "items": {
+            "$ref": "#/$defs/hookAction"
+          }
+        }
+      }
+    },
+    "hookAction": {
+      "type": "object",
+      "description": "A hook action - exactly one of: prompt (inline text), prompt_file (external file), or script (shell script)",
+      "oneOf": [
+        {
+          "required": [
+            "prompt"
+          ],
+          "additionalProperties": false,
+          "properties": {
+            "prompt": {
+              "type": "string",
+              "minLength": 1,
+              "description": "Inline prompt text for validation/action"
+            }
+          }
+        },
+        {
+          "required": [
+            "prompt_file"
+          ],
+          "additionalProperties": false,
+          "properties": {
+            "prompt_file": {
+              "type": "string",
+              "minLength": 1,
+              "description": "Path to prompt file (relative to job directory)"
+            }
+          }
+        },
+        {
+          "required": [
+            "script"
+          ],
+          "additionalProperties": false,
+          "properties": {
+            "script": {
+              "type": "string",
+              "minLength": 1,
+              "description": "Path to shell script (relative to job directory)"
+            }
+          }
+        }
+      ]
+    },
+    "review": {
+      "type": "object",
+      "required": [
+        "run_each",
+        "quality_criteria"
+      ],
+      "additionalProperties": false,
+      "description": "A quality review that evaluates step outputs against criteria",
+      "properties": {
+        "run_each": {
+          "type": "string",
+          "minLength": 1,
+          "description": "Either 'step' to review all outputs together, or the name of a specific output to review individually"
+        },
+        "quality_criteria": {
+          "type": "object",
+          "description": "Map of criterion name to criterion question",
+          "additionalProperties": {
+            "type": "string",
+            "minLength": 1
+          },
+          "minProperties": 1
+        },
+        "additional_review_guidance": {
+          "type": "string",
+          "description": "Optional guidance for the reviewer about what context to look at (e.g., 'Look at the job.yml file for context'). Replaces automatic inclusion of input file contents."
+        }
+      }
+    }
+  }
+}
\ No newline at end of file
diff --git a/.gemini/skills/add_platform/add_capabilities.toml b/.gemini/skills/add_platform/add_capabilities.toml
deleted file mode 100644
index 8c23d8f0..00000000
--- a/.gemini/skills/add_platform/add_capabilities.toml
+++ /dev/null
@@ -1,210 +0,0 @@
-# add_platform:add_capabilities
-#
-# Updates job schema and adapters with any new hook events the platform supports. Use after research to extend DeepWork's hook system.
-#
-# Generated by DeepWork - do not edit manually
-
-description = "Updates job schema and adapters with any new hook events the platform supports. Use after research to extend DeepWork's hook system."
-
-prompt = """
-# add_platform:add_capabilities
-
-**Step 2/4** in **add_platform** workflow
-
-> Adds a new AI platform to DeepWork with adapter, templates, and tests. Use when integrating Cursor, Windsurf, or other AI coding tools.
-
-## Prerequisites (Verify First)
-
-Before proceeding, confirm these steps are complete:
-- `/add_platform:research`
-
-## Instructions
-
-**Goal**: Updates job schema and adapters with any new hook events the platform supports. Use after research to extend DeepWork's hook system.
-
-# Add Hook Capabilities
-
-## Objective
-
-Update the DeepWork job schema and platform adapters to support any new hook events that the new platform provides for slash command definitions.
-
-## Task
-
-Analyze the hooks documentation from the research step and update the codebase to support any new hook capabilities, ensuring consistency across all existing adapters.
-
-### Prerequisites
-
-Read the hooks documentation created in the previous step:
-- `doc/platforms/<platform_name>/hooks_system.md`
-
-Also review the existing schema and adapters:
-- `src/deepwork/schemas/job_schema.py`
-- `src/deepwork/adapters.py`
-
-### Process
-
-1. **Analyze the new platform's hooks**
-   - Read `doc/platforms/<platform_name>/hooks_system.md`
-   - List all hooks available for slash command definitions
-   - Compare with hooks already in `job_schema.py`
-   - Identify any NEW hooks not currently supported
-
-2. **Determine if schema changes are needed**
-   - If the platform has hooks that DeepWork doesn't currently support, add them
-   - If all hooks are already supported, document this finding
-   - Remember: Only add hooks that are available on slash command definitions
-
-3. **Update job_schema.py (if needed)**
-   - Add new hook fields to the step schema
-   - Follow existing patterns for hook definitions
-   - Add appropriate type hints and documentation
-   - Example addition:
-     ```python
-     # New hook from <platform>
-     new_hook_name: Optional[List[HookConfig]] = None
-     ```
-
-4. **Update all existing adapters**
-   - Open `src/deepwork/adapters.py`
-   - For EACH existing adapter class:
-     - Add the new hook field (set to `None` if not supported)
-     - This maintains consistency across all adapters
-   - Document why each adapter does or doesn't support the hook
-
-5. **Validate the changes**
-   - Run Python syntax check: `python -m py_compile src/deepwork/schemas/job_schema.py`
-   - Run Python syntax check: `python -m py_compile src/deepwork/adapters.py`
-   - Ensure no import errors
-
-6. **Document the decision**
-   - If no new hooks were added, add a comment explaining why
-   - If new hooks were added, ensure they're documented in the schema
-
-## Output Format
-
-### job_schema.py
-
-Location: `src/deepwork/schemas/job_schema.py`
-
-If new hooks are added:
-```python
-@dataclass
-class StepDefinition:
-    # ... existing fields ...
-
-    # New hook from <platform_name> - [description of what it does]
-    new_hook_name: Optional[List[HookConfig]] = None
-```
-
-### adapters.py
-
-Location: `src/deepwork/adapters.py`
-
-For each existing adapter, add the new hook field:
-```python
-class ExistingPlatformAdapter(PlatformAdapter):
-    # ... existing code ...
-
-    def get_hook_support(self) -> dict:
-        return {
-            # ... existing hooks ...
-            "new_hook_name": None,  # Not supported by this platform
-        }
-```
-
-Or if no changes are needed, add a documentation comment:
-```python
-# NOTE: <platform_name> hooks reviewed on YYYY-MM-DD
-# No new hooks to add - all <platform_name> command hooks are already
-# supported by the existing schema (stop_hooks covers their validation pattern)
-```
-
-## Quality Criteria
-
-- Hooks documentation from research step has been reviewed
-- If new hooks exist:
-  - Added to `src/deepwork/schemas/job_schema.py` with proper typing
-  - ALL existing adapters updated in `src/deepwork/adapters.py`
-  - Each adapter indicates support level (implemented, None, or partial)
-- If no new hooks needed:
-  - Decision documented with a comment explaining the analysis
-- Only hooks available on slash command definitions are considered
-- `job_schema.py` has no syntax errors (verified with py_compile)
-- `adapters.py` has no syntax errors (verified with py_compile)
-- All adapters have consistent hook fields (same fields across all adapters)
-- When all criteria are met, include `<promise>✓ Quality Criteria Met</promise>` in your response
-
-## Context
-
-DeepWork supports multiple AI platforms, and each platform may have different capabilities for hooks within command definitions. The schema defines what hooks CAN exist, while adapters define what each platform actually SUPPORTS.
-
-This separation allows:
-- Job definitions to use any hook (the schema is the superset)
-- Platform-specific generation to only use supported hooks (adapters filter)
-- Future platforms to add new hooks without breaking existing ones
-
-Maintaining consistency is critical - all adapters must have the same hook fields, even if they don't support them (use `None` for unsupported).
-
-## Common Hook Types
-
-For reference, here are common hook patterns across platforms:
-
-| Hook Type | Purpose | Example Platforms |
-|-----------|---------|-------------------|
-| `stop_hooks` | Quality validation loops | Claude Code |
-| `pre_hooks` | Run before command | Various |
-| `post_hooks` | Run after command | Various |
-| `validation_hooks` | Validate inputs/outputs | Various |
-
-When you find a new hook type, consider whether it maps to an existing pattern or is genuinely new functionality.
-
-
-### Job Context
-
-A workflow for adding support for a new AI platform (like Cursor, Windsurf, etc.) to DeepWork.
-
-The **integrate** workflow guides you through four phases:
-1. **Research**: Capture the platform's CLI configuration and hooks system documentation
-2. **Add Capabilities**: Update the job schema and adapters with any new hook events
-3. **Implement**: Create the platform adapter, templates, tests (100% coverage), and README updates
-4. **Verify**: Ensure installation works correctly and produces expected files
-
-The workflow ensures consistency across all supported platforms and maintains
-comprehensive test coverage for new functionality.
-
-**Important Notes**:
-- Only hooks available on slash command definitions should be captured
-- Each existing adapter must be updated when new hooks are added (typically with null values)
-- Tests must achieve 100% coverage for any new functionality
-- Installation verification confirms the platform integrates correctly with existing jobs
-
-
-## Required Inputs
-
-
-**Files from Previous Steps** - Read these first:
-- `hooks_system.md` (from `research`)
-
-## Work Branch
-
-Use branch format: `deepwork/add_platform-[instance]-YYYYMMDD`
-
-- If on a matching work branch: continue using it
-- If on main/master: create new branch with `git checkout -b deepwork/add_platform-[instance]-$(date +%Y%m%d)`
-
-## Outputs
-
-**Required outputs**:
-- `job_schema.py`
-- `adapters.py`
-
-## On Completion
-
-1. Verify outputs are created
-2. Inform user: "Step 2/4 complete, outputs: job_schema.py, adapters.py"
-3. **Tell user next command**: `/add_platform:implement`
-
----
-
-**Reference files**: `.deepwork/jobs/add_platform/job.yml`, `.deepwork/jobs/add_platform/steps/add_capabilities.md`
-"""
\ No newline at end of file
diff --git a/.gemini/skills/add_platform/implement.toml b/.gemini/skills/add_platform/implement.toml
deleted file mode 100644
index dda3f96e..00000000
--- a/.gemini/skills/add_platform/implement.toml
+++ /dev/null
@@ -1,305 +0,0 @@
-# add_platform:implement
-#
-# Creates platform adapter, templates, tests with 100% coverage, and README documentation. Use after adding hook capabilities.
-#
-# Generated by DeepWork - do not edit manually
-
-description = "Creates platform adapter, templates, tests with 100% coverage, and README documentation. Use after adding hook capabilities."
-
-prompt = """
-# add_platform:implement
-
-**Step 3/4** in **add_platform** workflow
-
-> Adds a new AI platform to DeepWork with adapter, templates, and tests. Use when integrating Cursor, Windsurf, or other AI coding tools.
-
-## Prerequisites (Verify First)
-
-Before proceeding, confirm these steps are complete:
-- `/add_platform:research`
-- `/add_platform:add_capabilities`
-
-## Instructions
-
-**Goal**: Creates platform adapter, templates, tests with 100% coverage, and README documentation. Use after adding hook capabilities.
-
-# Implement Platform Support
-
-## Objective
-
-Create the complete platform implementation including the adapter class, command templates, comprehensive tests, and documentation updates.
-
-## Task
-
-Build the full platform support by implementing the adapter, creating templates, writing tests with 100% coverage, and updating the README.
-
-### Prerequisites
-
-Read the outputs from previous steps:
-- `doc/platforms/<platform_name>/cli_configuration.md` - For template structure
-- `src/deepwork/schemas/job_schema.py` - For current schema
-- `src/deepwork/adapters.py` - For adapter patterns
-
-Also review existing implementations for reference:
-- `src/deepwork/templates/claude/` - Example templates
-- `tests/` - Existing test patterns
-
-### Process
-
-1. **Create the platform adapter class**
-
-   Add a new adapter class to `src/deepwork/adapters.py`:
-
-   ```python
-   class NewPlatformAdapter(PlatformAdapter):
-       """Adapter for <Platform Name>."""
-
-       platform_name = "<platform_name>"
-       command_directory = "<path to commands>"  # e.g., ".cursor/commands"
-       command_extension = ".md"  # or appropriate extension
-
-       def get_hook_support(self) -> dict:
-           """Return which hooks this platform supports."""
-           return {
-               "stop_hooks": True,  # or False/None
-               # ... other hooks
-           }
-
-       def generate_command(self, step: StepDefinition, job: JobDefinition) -> str:
-           """Generate command file content for this platform."""
-           # Use Jinja2 template
-           template = self.env.get_template(f"{self.platform_name}/command.md.j2")
-           return template.render(step=step, job=job)
-   ```
-
-2. **Create command templates**
-
-   Create templates in `src/deepwork/templates/<platform_name>/`:
-
-   - `command.md.j2` - Main command template
-   - Any other templates needed for the platform's format
-
-   Use the CLI configuration documentation to ensure the template matches the platform's expected format.
-
-3. **Register the adapter**
-
-   Update the adapter registry in `src/deepwork/adapters.py`:
-
-   ```python
-   PLATFORM_ADAPTERS = {
-       "claude": ClaudeAdapter,
-       "<platform_name>": NewPlatformAdapter,
-       # ... other adapters
-   }
-   ```
-
-4. **Write comprehensive tests**
-
-   Create tests in `tests/` that cover:
-
-   - Adapter instantiation
-   - Hook support detection
-   - Command generation
-   - Template rendering
-   - Edge cases (empty inputs, special characters, etc.)
-   - Integration with the sync command
-
-   **Critical**: Tests must achieve 100% coverage of new code.
-
-5. **Update README.md**
-
-   Add the new platform to `README.md`:
-
-   - Add to "Supported Platforms" list
-   - Add installation instructions:
-     ```bash
-     deepwork install --platform <platform_name>
-     ```
-   - Document any platform-specific notes or limitations
-
-6. **Run tests and verify coverage**
-
-   ```bash
-   uv run pytest --cov=src/deepwork --cov-report=term-missing
-   ```
-
-   - All tests must pass
-   - New code must have 100% coverage
-   - If coverage is below 100%, add more tests
-
-7. **Iterate until tests pass with full coverage**
-
-   This step has a `stop_hooks` script that runs tests. Keep iterating until:
-   - All tests pass
-   - Coverage is 100% for new functionality
-
-## Output Format
-
-### templates/
-
-Location: `src/deepwork/templates/<platform_name>/`
-
-Create the following files:
-
-**command.md.j2**:
-```jinja2
-{# Template for <platform_name> command files #}
-{# Follows the platform's expected format from cli_configuration.md #}
-
-[Platform-specific frontmatter or metadata]
-
-# {{ step.name }}
-
-{{ step.description }}
-
-## Instructions
-
-{{ step.instructions_content }}
-
-[... rest of template based on platform format ...]
-```
-
-### tests/
-
-Location: `tests/test_<platform_name>_adapter.py`
-
-```python
-"""Tests for the <platform_name> adapter."""
-import pytest
-from deepwork.adapters import NewPlatformAdapter
-
-class TestNewPlatformAdapter:
-    """Test suite for NewPlatformAdapter."""
-
-    def test_adapter_initialization(self):
-        """Test adapter can be instantiated."""
-        adapter = NewPlatformAdapter()
-        assert adapter.platform_name == "<platform_name>"
-
-    def test_hook_support(self):
-        """Test hook support detection."""
-        adapter = NewPlatformAdapter()
-        hooks = adapter.get_hook_support()
-        assert "stop_hooks" in hooks
-        # ... more assertions
-
-    def test_command_generation(self):
-        """Test command file generation."""
-        # ... test implementation
-
-    # ... more tests for 100% coverage
-```
-
-### README.md
-
-Add to the existing README.md:
-
-```markdown
-## Supported Platforms
-
-- **Claude Code** - Anthropic's CLI for Claude
-- **<Platform Name>** - [Brief description]
-
-## Installation
-
-### <Platform Name>
-
-```bash
-deepwork install --platform <platform_name>
-```
-
-[Any platform-specific notes]
-```
-
-## Quality Criteria
-
-- Platform adapter class added to `src/deepwork/adapters.py`:
-  - Inherits from `PlatformAdapter`
-  - Implements all required methods
-  - Registered in `PLATFORM_ADAPTERS`
-- Templates created in `src/deepwork/templates/<platform_name>/`:
-  - `command.md.j2` exists and renders correctly
-  - Format matches platform's expected command format
-- Tests created in `tests/`:
-  - Cover all new adapter functionality
-  - Cover template rendering
-  - All tests pass
-- Test coverage is 100% for new code:
-  - Run `uv run pytest --cov=src/deepwork --cov-report=term-missing`
-  - No uncovered lines in new code
-- README.md updated:
-  - Platform listed in supported platforms
-  - Installation command documented
-  - Any platform-specific notes included
-- When all criteria are met, include `<promise>✓ Quality Criteria Met</promise>` in your response
-
-## Context
-
-This is the core implementation step. The adapter you create will be responsible for:
-- Determining where command files are placed
-- Generating command file content from job definitions
-- Handling platform-specific features and hooks
-
-The templates use Jinja2 and should produce files that match exactly what the platform expects. Reference the CLI configuration documentation frequently to ensure compatibility.
-
-## Tips
-
-- Study the existing `ClaudeAdapter` as a reference implementation
-- Run tests frequently as you implement
-- Use `--cov-report=html` for a detailed coverage report
-- If a test is hard to write, the code might need refactoring
-- Template syntax errors often show up at runtime - test early
-
-
-### Job Context
-
-A workflow for adding support for a new AI platform (like Cursor, Windsurf, etc.) to DeepWork.
-
-The **integrate** workflow guides you through four phases:
-1. **Research**: Capture the platform's CLI configuration and hooks system documentation
-2. **Add Capabilities**: Update the job schema and adapters with any new hook events
-3. **Implement**: Create the platform adapter, templates, tests (100% coverage), and README updates
-4. **Verify**: Ensure installation works correctly and produces expected files
-
-The workflow ensures consistency across all supported platforms and maintains
-comprehensive test coverage for new functionality.
-
-**Important Notes**:
-- Only hooks available on slash command definitions should be captured
-- Each existing adapter must be updated when new hooks are added (typically with null values)
-- Tests must achieve 100% coverage for any new functionality
-- Installation verification confirms the platform integrates correctly with existing jobs
-
-
-## Required Inputs
-
-
-**Files from Previous Steps** - Read these first:
-- `job_schema.py` (from `add_capabilities`)
-- `adapters.py` (from `add_capabilities`)
-- `cli_configuration.md` (from `research`)
-
-## Work Branch
-
-Use branch format: `deepwork/add_platform-[instance]-YYYYMMDD`
-
-- If on a matching work branch: continue using it
-- If on main/master: create new branch with `git checkout -b deepwork/add_platform-[instance]-$(date +%Y%m%d)`
-
-## Outputs
-
-**Required outputs**:
-- `templates/` (directory)
-- `tests/` (directory)
-- `README.md`
-
-## On Completion
-
-1. Verify outputs are created
-2. Inform user: "Step 3/4 complete, outputs: templates/, tests/, README.md"
-3. **Tell user next command**: `/add_platform:verify`
-
----
-
-**Reference files**: `.deepwork/jobs/add_platform/job.yml`, `.deepwork/jobs/add_platform/steps/implement.md`
-"""
\ No newline at end of file
diff --git a/.gemini/skills/add_platform/index.toml b/.gemini/skills/add_platform/index.toml
deleted file mode 100644
index f97f6792..00000000
--- a/.gemini/skills/add_platform/index.toml
+++ /dev/null
@@ -1,75 +0,0 @@
-# add_platform
-#
-# Adds a new AI platform to DeepWork with adapter, templates, and tests. Use when integrating Cursor, Windsurf, or other AI coding tools.
-#
-# Generated by DeepWork - do not edit manually
-
-description = "Adds a new AI platform to DeepWork with adapter, templates, and tests. Use when integrating Cursor, Windsurf, or other AI coding tools."
-
-prompt = """
-# add_platform
-
-**Multi-step workflow**: Adds a new AI platform to DeepWork with adapter, templates, and tests. Use when integrating Cursor, Windsurf, or other AI coding tools.
-
-> **NOTE**: Gemini CLI requires manual command invocation. After each step, tell the user which command to run next.
-
-A workflow for adding support for a new AI platform (like Cursor, Windsurf, etc.) to DeepWork.
-
-The **integrate** workflow guides you through four phases:
-1. **Research**: Capture the platform's CLI configuration and hooks system documentation
-2. **Add Capabilities**: Update the job schema and adapters with any new hook events
-3. **Implement**: Create the platform adapter, templates, tests (100% coverage), and README updates
-4. **Verify**: Ensure installation works correctly and produces expected files
-
-The workflow ensures consistency across all supported platforms and maintains
-comprehensive test coverage for new functionality.
-
-**Important Notes**:
-- Only hooks available on slash command definitions should be captured
-- Each existing adapter must be updated when new hooks are added (typically with null values)
-- Tests must achieve 100% coverage for any new functionality
-- Installation verification confirms the platform integrates correctly with existing jobs
-
-
-## Available Steps
-
-1. **research** - Captures CLI configuration and hooks system documentation for the new platform. Use when starting platform integration.
-   Command: `/add_platform:research`
-2. **add_capabilities** - Updates job schema and adapters with any new hook events the platform supports. Use after research to extend DeepWork's hook system. (requires: research)
-   Command: `/add_platform:add_capabilities`
-3. **implement** - Creates platform adapter, templates, tests with 100% coverage, and README documentation. Use after adding hook capabilities. (requires: research, add_capabilities)
-   Command: `/add_platform:implement`
-4. **verify** - Sets up platform directories and verifies deepwork install works correctly. Use after implementation to confirm integration. (requires: implement)
-   Command: `/add_platform:verify`
-
-## Execution Instructions
-
-### Step 1: Analyze Intent
-
-Parse any text following `/add_platform` to determine user intent:
-- "research" or related terms → start at `/add_platform:research`
-- "add_capabilities" or related terms → start at `/add_platform:add_capabilities`
-- "implement" or related terms → start at `/add_platform:implement`
-- "verify" or related terms → start at `/add_platform:verify`
-
-### Step 2: Direct User to Starting Step
-
-Tell the user which command to run:
-```
-/add_platform:research
-```
-
-### Step 3: Guide Through Workflow
-
-After each step completes, tell the user the next command to run until workflow is complete.
-
-### Handling Ambiguous Intent
-
-If user intent is unclear:
-- Present available steps as numbered options
-- Ask user to select the starting point
-
-## Reference
-
-- Job definition: `.deepwork/jobs/add_platform/job.yml`
-"""
\ No newline at end of file
diff --git a/.gemini/skills/add_platform/research.toml b/.gemini/skills/add_platform/research.toml
deleted file mode 100644
index 9e0175c5..00000000
--- a/.gemini/skills/add_platform/research.toml
+++ /dev/null
@@ -1,259 +0,0 @@
-# add_platform:research
-#
-# Captures CLI configuration and hooks system documentation for the new platform. Use when starting platform integration.
-#
-# Generated by DeepWork - do not edit manually
-
-description = "Captures CLI configuration and hooks system documentation for the new platform. Use when starting platform integration."
-
-prompt = """
-# add_platform:research
-
-**Step 1/4** in **add_platform** workflow
-
-> Adds a new AI platform to DeepWork with adapter, templates, and tests. Use when integrating Cursor, Windsurf, or other AI coding tools.
-
-
-## Instructions
-
-**Goal**: Captures CLI configuration and hooks system documentation for the new platform. Use when starting platform integration.
-
-# Research Platform Documentation
-
-## Objective
-
-Capture comprehensive documentation for the new AI platform's CLI configuration and hooks system, creating a local reference that will guide the implementation phases.
-
-## Task
-
-Research the target platform's official documentation and create two focused documentation files that will serve as the foundation for implementing platform support in DeepWork.
-
-### Process
-
-1. **Identify the platform's documentation sources**
-   - Find the official documentation website
-   - Locate the CLI/agent configuration documentation
-   - Find the hooks or customization system documentation
-   - Note: Focus ONLY on slash command/custom command hooks, not general CLI hooks
-
-2. **Gather CLI configuration documentation**
-   - How is the CLI configured? (config files, environment variables, etc.)
-   - Where are custom commands/skills stored?
-   - What is the command file format? (markdown, YAML, etc.)
-   - What metadata or frontmatter is supported?
-   - How does the platform discover and load commands?
-
-3. **Gather hooks system documentation**
-   - What hooks are available for custom command definitions?
-   - Focus on hooks that trigger during or after command execution
-   - Examples: `stop_hooks`, `pre_hooks`, `post_hooks`, validation hooks
-   - Document the syntax and available hook types
-   - **Important**: Only document hooks available on slash command definitions, not general CLI hooks
-
-4. **Create the documentation files**
-   - Place files in `doc/platforms/<platform_name>/`
-   - Each file must have a header comment with source and date
-   - Content should be comprehensive but focused
-
-## Output Format
-
-### cli_configuration.md
-
-Located at: `doc/platforms/<platform_name>/cli_configuration.md`
-
-**Structure**:
-```markdown
-<!--
-Last Updated: YYYY-MM-DD
-Source: [URL where this documentation was obtained]
--->
-
-# <Platform Name> CLI Configuration
-
-## Overview
-
-[Brief description of the platform and its CLI/agent system]
-
-## Configuration Files
-
-[Document where configuration lives and its format]
-
-### File Locations
-
-- [Location 1]: [Purpose]
-- [Location 2]: [Purpose]
-
-### Configuration Format
-
-[Show the configuration file format with examples]
-
-## Custom Commands/Skills
-
-[Document how custom commands are defined]
-
-### Command Location
-
-[Where command files are stored]
-
-### Command File Format
-
-[The format of command files - markdown, YAML, etc.]
-
-### Metadata/Frontmatter
-
-[What metadata fields are supported in command files]
-
-```[format]
-[Example of a minimal command file]
-```
-
-## Command Discovery
-
-[How the platform discovers and loads commands]
-
-## Platform-Specific Features
-
-[Any unique features relevant to command configuration]
-```
-
-### hooks_system.md
-
-Located at: `doc/platforms/<platform_name>/hooks_system.md`
-
-**Structure**:
-```markdown
-<!--
-Last Updated: YYYY-MM-DD
-Source: [URL where this documentation was obtained]
--->
-
-# <Platform Name> Hooks System (Command Definitions)
-
-## Overview
-
-[Brief description of hooks available for command definitions]
-
-**Important**: This document covers ONLY hooks available within slash command/skill definitions, not general CLI hooks.
-
-## Available Hooks
-
-### [Hook Name 1]
-
-**Purpose**: [What this hook does]
-
-**Syntax**:
-```yaml
-[hook_name]:
-  - [configuration]
-```
-
-**Example**:
-```yaml
-[Complete example of using this hook]
-```
-
-**Behavior**: [When and how this hook executes]
-
-### [Hook Name 2]
-
-[Repeat for each available hook]
-
-## Hook Execution Order
-
-[Document the order in which hooks execute, if multiple are supported]
-
-## Comparison with Other Platforms
-
-| Feature | <Platform> | Claude Code | Other |
-|---------|-----------|-------------|-------|
-| [Feature 1] | [Support] | [Support] | [Support] |
-
-## Limitations
-
-[Any limitations or caveats about the hooks system]
-```
-
-## Quality Criteria
-
-- Both files exist in `doc/platforms/<platform_name>/`
-- Each file has a header comment with:
-  - Last updated date (YYYY-MM-DD format)
-  - Source URL where documentation was obtained
-- `cli_configuration.md` comprehensively covers:
-  - Configuration file locations and format
-  - Custom command file format and location
-  - Command discovery mechanism
-- `hooks_system.md` comprehensively covers:
-  - All hooks available for slash command definitions
-  - Syntax and examples for each hook
-  - NOT general CLI hooks (only command-level hooks)
-- Documentation is detailed enough to implement the platform adapter
-- No extraneous topics (only CLI config and command hooks)
-- When all criteria are met, include `<promise>✓ Quality Criteria Met</promise>` in your response
-
-## Context
-
-This is the foundation step for adding a new platform to DeepWork. The documentation you capture here will be referenced throughout the implementation process:
-- CLI configuration informs how to generate command files
-- Hooks documentation determines what features the adapter needs to support
-- This documentation becomes a permanent reference in `doc/platforms/`
-
-Take time to be thorough - incomplete documentation will slow down subsequent steps.
-
-## Tips
-
-- Use the platform's official documentation as the primary source
-- If documentation is sparse, check GitHub repos, community guides, or changelog entries
-- When in doubt about whether something is a "command hook" vs "CLI hook", err on the side of inclusion and note the ambiguity
-- Include code examples from the official docs where available
-
-
-### Job Context
-
-A workflow for adding support for a new AI platform (like Cursor, Windsurf, etc.) to DeepWork.
-
-The **integrate** workflow guides you through four phases:
-1. **Research**: Capture the platform's CLI configuration and hooks system documentation
-2. **Add Capabilities**: Update the job schema and adapters with any new hook events
-3. **Implement**: Create the platform adapter, templates, tests (100% coverage), and README updates
-4. **Verify**: Ensure installation works correctly and produces expected files
-
-The workflow ensures consistency across all supported platforms and maintains
-comprehensive test coverage for new functionality.
-
-**Important Notes**:
-- Only hooks available on slash command definitions should be captured
-- Each existing adapter must be updated when new hooks are added (typically with null values)
-- Tests must achieve 100% coverage for any new functionality
-- Installation verification confirms the platform integrates correctly with existing jobs
-
-
-## Required Inputs
-
-**User Parameters** - Gather from user before starting:
-- **platform_name**: Clear identifier of the platform (e.g., 'cursor', 'windsurf-editor', 'github-copilot-chat')
-
-
-## Work Branch
-
-Use branch format: `deepwork/add_platform-[instance]-YYYYMMDD`
-
-- If on a matching work branch: continue using it
-- If on main/master: create new branch with `git checkout -b deepwork/add_platform-[instance]-$(date +%Y%m%d)`
-
-## Outputs
-
-**Required outputs**:
-- `cli_configuration.md`
-- `hooks_system.md`
-
-## On Completion
-
-1. Verify outputs are created
-2. Inform user: "Step 1/4 complete, outputs: cli_configuration.md, hooks_system.md"
-3. **Tell user next command**: `/add_platform:add_capabilities`
-
----
-
-**Reference files**: `.deepwork/jobs/add_platform/job.yml`, `.deepwork/jobs/add_platform/steps/research.md`
-"""
\ No newline at end of file
diff --git a/.gemini/skills/add_platform/verify.toml b/.gemini/skills/add_platform/verify.toml
deleted file mode 100644
index ab440f09..00000000
--- a/.gemini/skills/add_platform/verify.toml
+++ /dev/null
@@ -1,185 +0,0 @@
-# add_platform:verify
-#
-# Sets up platform directories and verifies deepwork install works correctly. Use after implementation to confirm integration.
-#
-# Generated by DeepWork - do not edit manually
-
-description = "Sets up platform directories and verifies deepwork install works correctly. Use after implementation to confirm integration."
-
-prompt = """
-# add_platform:verify
-
-**Step 4/4** in **add_platform** workflow
-
-> Adds a new AI platform to DeepWork with adapter, templates, and tests. Use when integrating Cursor, Windsurf, or other AI coding tools.
-
-## Prerequisites (Verify First)
-
-Before proceeding, confirm these steps are complete:
-- `/add_platform:implement`
-
-## Instructions
-
-**Goal**: Sets up platform directories and verifies deepwork install works correctly. Use after implementation to confirm integration.
-
-# Verify Installation
-
-## Objective
-
-Ensure the new platform integration works correctly by setting up necessary directories and running the full installation process.
-
-## Task
-
-Perform end-to-end verification that the new platform can be installed and that DeepWork's standard jobs work correctly with it.
-
-### Prerequisites
-
-Ensure the implementation step is complete:
-- Adapter class exists in `src/deepwork/adapters.py`
-- Templates exist in `src/deepwork/templates/<platform_name>/`
-- Tests pass with 100% coverage
-- README.md is updated
-
-### Process
-
-1. **Set up platform directories in the DeepWork repo**
-
-   The DeepWork repository itself should have the platform's command directory structure for testing:
-
-   ```bash
-   mkdir -p <platform_command_directory>
-   ```
-
-   For example:
-   - Claude: `.claude/commands/`
-   - Cursor: `.cursor/commands/` (or wherever Cursor stores commands)
-
-2. **Run deepwork install for the new platform**
-
-   ```bash
-   deepwork install --platform <platform_name>
-   ```
-
-   Verify:
-   - Command completes without errors
-   - No Python exceptions or tracebacks
-   - Output indicates successful installation
-
-3. **Check that command files were created**
-
-   List the generated command files:
-   ```bash
-   ls -la <platform_command_directory>/
-   ```
-
-   Verify:
-   - `deepwork_jobs.define.md` exists (or equivalent for the platform)
-   - `deepwork_jobs.implement.md` exists
-   - `deepwork_jobs.refine.md` exists
-   - `deepwork_rules.define.md` exists
-   - All expected step commands exist
-
-4. **Validate command file content**
-
-   Read each generated command file and verify:
-   - Content matches the expected format for the platform
-   - Job metadata is correctly included
-   - Step instructions are properly rendered
-   - Any platform-specific features (hooks, frontmatter) are present
-
-5. **Test alongside existing platforms**
-
-   If other platforms are already installed, verify they still work:
-   ```bash
-   deepwork install --platform claude
-   ls -la .claude/commands/
-   ```
-
-   Ensure:
-   - New platform doesn't break existing installations
-   - Each platform's commands are independent
-   - No file conflicts or overwrites
-
-## Quality Criteria
-
-- Platform-specific directories are set up in the DeepWork repo
-- `deepwork install --platform <platform_name>` completes without errors
-- All expected command files are created:
-  - deepwork_jobs.define, implement, refine
-  - deepwork_rules.define
-  - Any other standard job commands
-- Command file content is correct:
-  - Matches platform's expected format
-  - Job/step information is properly rendered
-  - No template errors or missing content
-- Existing platforms still work (if applicable)
-- No conflicts between platforms
-- When all criteria are met, include `<promise>✓ Quality Criteria Met</promise>` in your response
-
-## Context
-
-This is the final validation step before the platform is considered complete. A thorough verification ensures:
-- The platform actually works, not just compiles
-- Standard DeepWork jobs install correctly
-- The platform integrates properly with the existing system
-- Users can confidently use the new platform
-
-Take time to verify each aspect - finding issues now is much better than having users discover them later.
-
-## Common Issues to Check
-
-- **Template syntax errors**: May only appear when rendering specific content
-- **Path issues**: Platform might expect different directory structure
-- **Encoding issues**: Special characters in templates or content
-- **Missing hooks**: Platform adapter might not handle all hook types
-- **Permission issues**: Directory creation might fail in some cases
-
-
-### Job Context
-
-A workflow for adding support for a new AI platform (like Cursor, Windsurf, etc.) to DeepWork.
-
-The **integrate** workflow guides you through four phases:
-1. **Research**: Capture the platform's CLI configuration and hooks system documentation
-2. **Add Capabilities**: Update the job schema and adapters with any new hook events
-3. **Implement**: Create the platform adapter, templates, tests (100% coverage), and README updates
-4. **Verify**: Ensure installation works correctly and produces expected files
-
-The workflow ensures consistency across all supported platforms and maintains
-comprehensive test coverage for new functionality.
-
-**Important Notes**:
-- Only hooks available on slash command definitions should be captured
-- Each existing adapter must be updated when new hooks are added (typically with null values)
-- Tests must achieve 100% coverage for any new functionality
-- Installation verification confirms the platform integrates correctly with existing jobs
-
-
-## Required Inputs
-
-
-**Files from Previous Steps** - Read these first:
-- `templates/` (from `implement`)
-
-## Work Branch
-
-Use branch format: `deepwork/add_platform-[instance]-YYYYMMDD`
-
-- If on a matching work branch: continue using it
-- If on main/master: create new branch with `git checkout -b deepwork/add_platform-[instance]-$(date +%Y%m%d)`
-
-## Outputs
-
-**Required outputs**:
-- `verification_checklist.md`
-
-## On Completion
-
-1. Verify outputs are created
-2. Inform user: "Step 4/4 complete, outputs: verification_checklist.md"
-3. **Workflow complete**: All steps finished. Consider creating a PR to merge the work branch.
-
----
-
-**Reference files**: `.deepwork/jobs/add_platform/job.yml`, `.deepwork/jobs/add_platform/steps/verify.md`
-"""
\ No newline at end of file
diff --git a/.gemini/skills/commit/commit_and_push.toml b/.gemini/skills/commit/commit_and_push.toml
deleted file mode 100644
index a3172dac..00000000
--- a/.gemini/skills/commit/commit_and_push.toml
+++ /dev/null
@@ -1,164 +0,0 @@
-# commit:commit_and_push
-#
-# Verifies changed files, creates commit, and pushes to remote. Use after linting passes to finalize changes.
-#
-# Generated by DeepWork - do not edit manually
-
-description = "Verifies changed files, creates commit, and pushes to remote. Use after linting passes to finalize changes."
-
-prompt = """
-# commit:commit_and_push
-
-**Step 4/4** in **commit** workflow
-
-> Reviews code, runs tests, lints, and commits changes. Use when ready to commit work with quality checks.
-
-## Prerequisites (Verify First)
-
-Before proceeding, confirm these steps are complete:
-- `/commit:lint`
-
-## Instructions
-
-**Goal**: Verifies changed files, creates commit, and pushes to remote. Use after linting passes to finalize changes.
-
-# Commit and Push
-
-## Objective
-
-Review the changed files to verify they match the agent's expectations, create a commit with an appropriate message, and push to the remote repository.
-
-## Task
-
-Check the list of changed files against what was modified during this session, ensure they match expectations, then commit and push the changes.
-
-### Process
-
-1. **Get the list of changed files**
-   ```bash
-   git status
-   ```
-   Also run `git diff --stat` to see a summary of changes.
-
-2. **Verify changes match expectations**
-
-   Compare the changed files against what you modified during this session:
-   - Do the modified files match what you edited?
-   - Are there any unexpected new files?
-   - Are there any unexpected deleted files?
-   - Do the line counts seem reasonable for the changes you made?
-
-   If changes match expectations, proceed to the next step.
-
-   If there are unexpected changes:
-   - Investigate why (e.g., lint auto-fixes, generated files)
-   - If they're legitimate side effects of your work, include them
-   - If they're unrelated or shouldn't be committed, use `git restore` to discard them
-
-3. **Update CHANGELOG.md if needed**
-
-   If your changes include new features, bug fixes, or other notable changes:
-   - Add entries to the `## [Unreleased]` section of CHANGELOG.md
-   - Use the appropriate subsection: `### Added`, `### Changed`, `### Fixed`, or `### Removed`
-   - Write concise descriptions that explain the user-facing impact
-
-   **CRITICAL: NEVER modify version numbers**
-   - Do NOT change the version in `pyproject.toml`
-   - Do NOT change version headers in CHANGELOG.md (e.g., `## [0.4.2]`)
-   - Do NOT rename the `## [Unreleased]` section
-   - Version updates are handled by the release workflow, not commits
-
-4. **Stage all appropriate changes**
-   ```bash
-   git add -A
-   ```
-   Or stage specific files if some were excluded.
-
-5. **View recent commit messages for style reference**
-   ```bash
-   git log --oneline -10
-   ```
-
-6. **Create the commit**
-
-   Generate an appropriate commit message based on:
-   - The changes made
-   - The style of recent commits
-   - Conventional commit format if the project uses it
-
-   **IMPORTANT:** Use the commit job script (not `git commit` directly):
-   ```bash
-   .claude/hooks/commit_job_git_commit.sh -m "commit message here"
-   ```
-
-7. **Push to remote**
-   ```bash
-   git push
-   ```
-   If the branch has no upstream, use:
-   ```bash
-   git push -u origin HEAD
-   ```
-
-## Quality Criteria
-
-- Changed files were verified against expectations
-- CHANGELOG.md was updated with entries in [Unreleased] section (if changes warrant documentation)
-- Version numbers were NOT modified (pyproject.toml version and CHANGELOG version headers unchanged)
-- Commit was created with appropriate message
-- Changes were pushed to remote
-
-## Context
-
-This is the final step of the commit workflow. The agent verifies that the changed files match its own expectations from the work done during the session, then commits and pushes. This catches unexpected changes while avoiding unnecessary user interruptions.
-
-
-### Job Context
-
-A workflow for preparing and committing code changes with quality checks.
-
-The **full** workflow starts with a code review to catch issues early, runs tests until
-they pass, formats and lints code with ruff, then reviews changed files
-before committing and pushing. The review and lint steps use sub-agents
-to reduce context usage.
-
-Steps:
-1. review - Code review for issues, DRY opportunities, naming, and test coverage (runs in sub-agent)
-2. test - Pull latest code and run tests until they pass
-3. lint - Format and lint code with ruff (runs in sub-agent)
-4. commit_and_push - Review changes and commit/push
-
-
-
-## Work Branch
-
-Use branch format: `deepwork/commit-[instance]-YYYYMMDD`
-
-- If on a matching work branch: continue using it
-- If on main/master: create new branch with `git checkout -b deepwork/commit-[instance]-$(date +%Y%m%d)`
-
-## Outputs
-
-**Required outputs**:
-- `changes_committed`
-
-## Quality Validation (Manual)
-
-**NOTE**: Gemini CLI does not support automated validation. Manually verify criteria before completing.
-
-**Criteria (all must be satisfied)**:
-1. Changed files were verified against expectations
-2. CHANGELOG.md was updated with entries in [Unreleased] section (if changes warrant documentation)
-3. Version numbers were NOT modified (pyproject.toml version and CHANGELOG version headers unchanged)
-4. Commit was created with appropriate message
-5. Changes were pushed to remote
-## On Completion
-
-1. Verify outputs are created
-2. Inform user: "Step 4/4 complete, outputs: changes_committed"
-3. **Workflow complete**: All steps finished. Consider creating a PR to merge the work branch.
-
----
-
-**Reference files**: `.deepwork/jobs/commit/job.yml`, `.deepwork/jobs/commit/steps/commit_and_push.md`
-"""
\ No newline at end of file
diff --git a/.gemini/skills/commit/index.toml b/.gemini/skills/commit/index.toml
deleted file mode 100644
index 09eff3ef..00000000
--- a/.gemini/skills/commit/index.toml
+++ /dev/null
@@ -1,71 +0,0 @@
-# commit
-#
-# Reviews code, runs tests, lints, and commits changes. Use when ready to commit work with quality checks.
-#
-# Generated by DeepWork - do not edit manually
-
-description = "Reviews code, runs tests, lints, and commits changes. Use when ready to commit work with quality checks."
-
-prompt = """
-# commit
-
-**Multi-step workflow**: Reviews code, runs tests, lints, and commits changes. Use when ready to commit work with quality checks.
-
-> **NOTE**: Gemini CLI requires manual command invocation. After each step, tell the user which command to run next.
-
-A workflow for preparing and committing code changes with quality checks.
-
-The **full** workflow starts with a code review to catch issues early, runs tests until
-they pass, formats and lints code with ruff, then reviews changed files
-before committing and pushing. The review and lint steps use sub-agents
-to reduce context usage.
-
-Steps:
-1. review - Code review for issues, DRY opportunities, naming, and test coverage (runs in sub-agent)
-2. test - Pull latest code and run tests until they pass
-3. lint - Format and lint code with ruff (runs in sub-agent)
-4. commit_and_push - Review changes and commit/push
-
-
-## Available Steps
-
-1. **review** - Reviews changed code for issues, DRY opportunities, naming clarity, and test coverage using a sub-agent. Use as the first step before testing.
-   Command: `/commit:review`
-2. **test** - Pulls latest code and runs tests until all pass. Use after code review passes to verify changes work correctly. (requires: review)
-   Command: `/commit:test`
-3. **lint** - Formats and lints code with ruff using a sub-agent. Use after tests pass to ensure code style compliance. (requires: test)
-   Command: `/commit:lint`
-4. **commit_and_push** - Verifies changed files, creates commit, and pushes to remote. Use after linting passes to finalize changes. (requires: lint)
-   Command: `/commit:commit_and_push`
-
-## Execution Instructions
-
-### Step 1: Analyze Intent
-
-Parse any text following `/commit` to determine user intent:
-- "review" or related terms → start at `/commit:review`
-- "test" or related terms → start at `/commit:test`
-- "lint" or related terms → start at `/commit:lint`
-- "commit_and_push" or related terms → start at `/commit:commit_and_push`
-
-### Step 2: Direct User to Starting Step
-
-Tell the user which command to run:
-```
-/commit:review
-```
-
-### Step 3: Guide Through Workflow
-
-After each step completes, tell the user the next command to run until workflow is complete.
-
-### Handling Ambiguous Intent
-
-If user intent is unclear:
-- Present available steps as numbered options
-- Ask user to select the starting point
-
-## Reference
-
-- Job definition: `.deepwork/jobs/commit/job.yml`
-"""
\ No newline at end of file
diff --git a/.gemini/skills/commit/lint.toml b/.gemini/skills/commit/lint.toml
deleted file mode 100644
index 74589f30..00000000
--- a/.gemini/skills/commit/lint.toml
+++ /dev/null
@@ -1,143 +0,0 @@
-# commit:lint
-#
-# Formats and lints code with ruff using a sub-agent. Use after tests pass to ensure code style compliance.
-#
-# Generated by DeepWork - do not edit manually
-
-description = "Formats and lints code with ruff using a sub-agent. Use after tests pass to ensure code style compliance."
-
-prompt = """
-# commit:lint
-
-**Step 3/4** in **commit** workflow
-
-> Reviews code, runs tests, lints, and commits changes. Use when ready to commit work with quality checks.
-
-## Prerequisites (Verify First)
-
-Before proceeding, confirm these steps are complete:
-- `/commit:test`
-
-## Instructions
-
-**Goal**: Formats and lints code with ruff using a sub-agent. Use after tests pass to ensure code style compliance.
-
-# Lint Code
-
-## Objective
-
-Format and lint the codebase using ruff to ensure code quality and consistency.
-
-## Task
-
-Run ruff format and ruff check to format and lint the code. This step should be executed using a sub-agent to conserve context in the main conversation.
-
-### Process
-
-**IMPORTANT**: Use the Task tool to spawn a sub-agent for this work. This saves context in the main conversation. Use the `haiku` model for speed.
-
-1. **Spawn a sub-agent to run linting**
-
-   Use the Task tool with these parameters:
-   - `subagent_type`: "Bash"
-   - `model`: "haiku"
-   - `prompt`: See below
-
-   The sub-agent should:
-
-   a. **Run ruff format**
-      ```bash
-      ruff format .
-      ```
-      This formats the code according to ruff's style rules.
-
-   b. **Run ruff check with auto-fix**
-      ```bash
-      ruff check --fix .
-      ```
-      This checks for lint errors and automatically fixes what it can.
-
-   c. **Run ruff check again to verify**
-      ```bash
-      ruff check .
-      ```
-      Capture the final output to verify no remaining issues.
-
-2. **Review sub-agent results**
-   - Check that both format and check completed successfully
-   - Note any remaining lint issues that couldn't be auto-fixed
-
-3. **Handle remaining issues**
-   - If there are lint errors that couldn't be auto-fixed, fix them manually
-   - Re-run ruff check to verify
-
-## Example Sub-Agent Prompt
-
-```
-Run ruff to format and lint the codebase:
-
-1. Run: ruff format .
-2. Run: ruff check --fix .
-3. Run: ruff check . (to verify no remaining issues)
-
-Report the results of each command.
-```
-
-## Quality Criteria
-
-- ruff format was run successfully
-- ruff check was run with --fix flag
-- No remaining lint errors
-
-## Context
-
-This step ensures code quality and consistency before committing. It runs after tests pass and before the commit step. Using a sub-agent keeps the main conversation context clean for the commit review.
-
-
-### Job Context
-
-A workflow for preparing and committing code changes with quality checks.
-
-The **full** workflow starts with a code review to catch issues early, runs tests until
-they pass, formats and lints code with ruff, then reviews changed files
-before committing and pushing. The review and lint steps use sub-agents
-to reduce context usage.
-
-Steps:
-1. review - Code review for issues, DRY opportunities, naming, and test coverage (runs in sub-agent)
-2. test - Pull latest code and run tests until they pass
-3. lint - Format and lint code with ruff (runs in sub-agent)
-4. commit_and_push - Review changes and commit/push
-
-
-
-## Work Branch
-
-Use branch format: `deepwork/commit-[instance]-YYYYMMDD`
-
-- If on a matching work branch: continue using it
-- If on main/master: create new branch with `git checkout -b deepwork/commit-[instance]-$(date +%Y%m%d)`
-
-## Outputs
-
-**Required outputs**:
-- `code_formatted`
-
-## Quality Validation (Manual)
-
-**NOTE**: Gemini CLI does not support automated validation. Manually verify criteria before completing.
-
-**Criteria (all must be satisfied)**:
-1. ruff format was run successfully
-2. ruff check was run with --fix flag
-3. No remaining lint errors
-## On Completion
-
-1. Verify outputs are created
-2. Inform user: "Step 3/4 complete, outputs: code_formatted"
-3. **Tell user next command**: `/commit:commit_and_push`
-
----
-
-**Reference files**: `.deepwork/jobs/commit/job.yml`, `.deepwork/jobs/commit/steps/lint.md`
-"""
\ No newline at end of file
diff --git a/.gemini/skills/commit/review.toml b/.gemini/skills/commit/review.toml
deleted file mode 100644
index 9423e1a3..00000000
--- a/.gemini/skills/commit/review.toml
+++ /dev/null
@@ -1,124 +0,0 @@
-# commit:review
-#
-# Reviews changed code for issues, DRY opportunities, naming clarity, and test coverage using a sub-agent. Use as the first step before testing.
-#
-# Generated by DeepWork - do not edit manually
-
-description = "Reviews changed code for issues, DRY opportunities, naming clarity, and test coverage using a sub-agent. Use as the first step before testing."
-
-prompt = """
-# commit:review
-
-**Step 1/4** in **commit** workflow
-
-> Reviews code, runs tests, lints, and commits changes. Use when ready to commit work with quality checks.
-
-
-## Instructions
-
-**Goal**: Reviews changed code for issues, DRY opportunities, naming clarity, and test coverage using a sub-agent. Use as the first step before testing.
-
-# Code Review
-
-## Objective
-
-Review changed code for quality issues before running tests. This catches problems early and ensures code meets quality standards.
-
-## Task
-
-Use a sub-agent to review the staged/changed code and identify issues that should be fixed before committing.
-
-### Process
-
-**IMPORTANT**: Use the Task tool to spawn a sub-agent for this review. This saves context in the main conversation.
-
-1. **Get the list of changed files**
-   ```bash
-   git diff --name-only HEAD
-   git diff --name-only --staged
-   ```
-   Combine these to get all files that have been modified.
-
-2. **Spawn a sub-agent to review the code**
-
-   Use the Task tool with these parameters:
-   - `subagent_type`: "general-purpose"
-   - `prompt`: Instruct the sub-agent to:
-     - Read the code review standards from `doc/code_review_standards.md`
-     - Read each of the changed files
-     - Review each file against the standards
-     - Report issues found with file, line number, severity, and suggested fix
-
-3. **Review sub-agent findings**
-   - Examine each issue identified
-   - Prioritize issues by severity
-
-4. **Fix identified issues**
-   - Address each issue found by the review
-   - For DRY violations: extract shared code into functions/modules
-   - For naming issues: rename to be clearer
-   - For missing tests: add appropriate test cases
-   - For bugs: fix the underlying issue
-
-5. **Re-run review if significant changes made**
-   - If you made substantial changes, consider running another review pass
-   - Ensure fixes didn't introduce new issues
-
-## Quality Criteria
-
-- Changed files were identified
-- Sub-agent read the code review standards and reviewed all changed files
-- All identified issues were addressed or documented as intentional
-
-## Context
-
-This is the first step of the commit workflow. Code review happens before tests to catch quality issues early. The sub-agent approach keeps the main conversation context clean while providing thorough review coverage.
-
-
-### Job Context
-
-A workflow for preparing and committing code changes with quality checks.
-
-The **full** workflow starts with a code review to catch issues early, runs tests until
-they pass, formats and lints code with ruff, then reviews changed files
-before committing and pushing. The review and lint steps use sub-agents
-to reduce context usage.
-
-Steps:
-1. review - Code review for issues, DRY opportunities, naming, and test coverage (runs in sub-agent)
-2. test - Pull latest code and run tests until they pass
-3. lint - Format and lint code with ruff (runs in sub-agent)
-4. commit_and_push - Review changes and commit/push
-
-
-
-## Work Branch
-
-Use branch format: `deepwork/commit-[instance]-YYYYMMDD`
-
-- If on a matching work branch: continue using it
-- If on main/master: create new branch with `git checkout -b deepwork/commit-[instance]-$(date +%Y%m%d)`
-
-## Outputs
-
-**Required outputs**:
-- `code_reviewed`
-
-## Quality Validation (Manual)
-
-**NOTE**: Gemini CLI does not support automated validation. Manually verify criteria before completing.
-
-**Criteria (all must be satisfied)**:
-1. Changed files were identified
-2. Sub-agent reviewed the code for general issues, DRY opportunities, naming clarity, and test coverage
-3. All identified issues were addressed or documented as intentional
-## On Completion
-
-1. Verify outputs are created
-2. Inform user: "Step 1/4 complete, outputs: code_reviewed"
-3. **Tell user next command**: `/commit:test`
-
----
-
-**Reference files**: `.deepwork/jobs/commit/job.yml`, `.deepwork/jobs/commit/steps/review.md`
-"""
\ No newline at end of file
diff --git a/.gemini/skills/commit/test.toml b/.gemini/skills/commit/test.toml
deleted file mode 100644
index c45f623a..00000000
--- a/.gemini/skills/commit/test.toml
+++ /dev/null
@@ -1,123 +0,0 @@
-# commit:test
-#
-# Pulls latest code and runs tests until all pass. Use after code review passes to verify changes work correctly.
-#
-# Generated by DeepWork - do not edit manually
-
-description = "Pulls latest code and runs tests until all pass. Use after code review passes to verify changes work correctly."
-
-prompt = """
-# commit:test
-
-**Step 2/4** in **commit** workflow
-
-> Reviews code, runs tests, lints, and commits changes. Use when ready to commit work with quality checks.
-
-## Prerequisites (Verify First)
-
-Before proceeding, confirm these steps are complete:
-- `/commit:review`
-
-## Instructions
-
-**Goal**: Pulls latest code and runs tests until all pass. Use after code review passes to verify changes work correctly.
-
-# Run Tests
-
-## Objective
-
-Run the project's test suite and fix any failing tests until all tests pass.
-
-## Task
-
-Execute the test suite for the project and iteratively fix any failures until all tests pass.
-
-### Process
-
-1. **Pull latest code from the branch**
-   - Run `git pull` to fetch and merge any changes from the remote
-   - If there are merge conflicts, resolve them before proceeding
-   - This ensures you're testing against the latest code
-
-2. **Detect or use the test command**
-   - If a test command was provided, use that
-   - Otherwise, auto-detect the project type and determine the appropriate test command:
-     - Python: `pytest`, `python -m pytest`, `uv run pytest`
-     - Node.js: `npm test`, `yarn test`, `bun test`
-     - Go: `go test ./...`
-     - Rust: `cargo test`
-     - Check `package.json`, `pyproject.toml`, `Cargo.toml`, `go.mod` for hints
-
-3. **Run the tests**
-   - Execute the test command
-   - Capture the output
-
-4. **Analyze failures**
-   - If tests pass, proceed to output
-   - If tests fail, analyze the failure messages
-   - Identify the root cause of each failure
-
-5. **Fix failing tests**
-   - Make the necessary code changes to fix failures
-   - This may involve fixing bugs in implementation code or updating tests
-   - Re-run tests after each fix
-
-6. **Iterate until passing**
-   - Continue the fix/test cycle until all tests pass
-
-## Quality Criteria
-
-- Latest code was pulled from the branch
-- All tests are passing
-
-## Context
-
-This step runs after code review. Tests must pass before proceeding to lint and commit. This ensures code quality and prevents broken code from being committed. If tests fail due to issues introduced by the code review fixes, iterate on the fixes until tests pass.
-
-
-### Job Context
-
-A workflow for preparing and committing code changes with quality checks.
-
-The **full** workflow starts with a code review to catch issues early, runs tests until
-they pass, formats and lints code with ruff, then reviews changed files
-before committing and pushing. The review and lint steps use sub-agents
-to reduce context usage.
-
-Steps:
-1. review - Code review for issues, DRY opportunities, naming, and test coverage (runs in sub-agent)
-2. test - Pull latest code and run tests until they pass
-3. lint - Format and lint code with ruff (runs in sub-agent)
-4. commit_and_push - Review changes and commit/push
-
-
-
-## Work Branch
-
-Use branch format: `deepwork/commit-[instance]-YYYYMMDD`
-
-- If on a matching work branch: continue using it
-- If on main/master: create new branch with `git checkout -b deepwork/commit-[instance]-$(date +%Y%m%d)`
-
-## Outputs
-
-**Required outputs**:
-- `tests_passing`
-
-## Quality Validation (Manual)
-
-**NOTE**: Gemini CLI does not support automated validation. Manually verify criteria before completing.
-
-**Criteria (all must be satisfied)**:
-1. Latest code was pulled from the branch
-2. All tests are passing
-## On Completion
-
-1. Verify outputs are created
-2. Inform user: "Step 2/4 complete, outputs: tests_passing"
-3. **Tell user next command**: `/commit:lint`
-
----
-
-**Reference files**: `.deepwork/jobs/commit/job.yml`, `.deepwork/jobs/commit/steps/test.md`
-"""
\ No newline at end of file
diff --git a/.gemini/skills/deepwork/index.toml b/.gemini/skills/deepwork/index.toml
new file mode 100644
index 00000000..8263f4e0
--- /dev/null
+++ b/.gemini/skills/deepwork/index.toml
@@ -0,0 +1,29 @@
++++
+name = "deepwork"
+description = "Start or continue DeepWork workflows using MCP tools"
++++
+
+# DeepWork Workflow Manager
+
+Execute multi-step workflows with quality gate checkpoints.
+
+> **IMPORTANT**: Use the DeepWork MCP server tools. All workflow operations
+> are performed through MCP tool calls and following the instructions they return,
+> not by reading instructions from files.
+
+## How to Use
+
+1. Call `get_workflows` to discover available workflows
+2. Call `start_workflow` with goal, job_name, and workflow_name
+3. Follow the step instructions returned
+4. Call `finished_step` with your outputs when done
+5. Handle the response: `needs_work`, `next_step`, or `workflow_complete`
+
+## Intent Parsing
+
+When the user invokes `/deepwork`, parse their intent:
+1. **ALWAYS**: Call `get_workflows` to discover available workflows
+2. Based on the available flows and what the user said in their request, proceed:
+    - **Explicit workflow**: `/deepwork <a workflow name>` → start the `<a workflow name>` workflow
+    - **General request**: `/deepwork <a request>` → infer best match from available workflows
+    - **No context**: `/deepwork` alone → ask user to choose from available workflows
\ No newline at end of file
diff --git a/.gemini/skills/deepwork_jobs/define.toml b/.gemini/skills/deepwork_jobs/define.toml
deleted file mode 100644
index 8a705168..00000000
--- a/.gemini/skills/deepwork_jobs/define.toml
+++ /dev/null
@@ -1,537 +0,0 @@
-# deepwork_jobs:define
-#
-# Creates a job.yml specification by gathering workflow requirements through structured questions. Use when starting a new multi-step workflow.
-#
-# Generated by DeepWork - do not edit manually
-
-description = "Creates a job.yml specification by gathering workflow requirements through structured questions. Use when starting a new multi-step workflow."
-
-prompt = """
-# deepwork_jobs:define
-
-**Step 1/4** in **deepwork_jobs** workflow
-
-> Creates and manages multi-step AI workflows. Use when defining, implementing, or improving DeepWork jobs.
-
-
-## Instructions
-
-**Goal**: Creates a job.yml specification by gathering workflow requirements through structured questions. Use when starting a new multi-step workflow.
-
-# Define Job Specification
-
-## Objective
-
-Create a `job.yml` specification file that defines the structure of a new DeepWork job by thoroughly understanding the user's workflow requirements through an interactive question-and-answer process.
-
-## Task
-
-Guide the user through defining a job specification by asking structured questions. **Do not attempt to create the specification without first fully understanding the user's needs.**
-
-**Important**: Use the AskUserQuestion tool to ask structured questions when gathering information from the user. This provides a better user experience with clear options and guided choices.
-
-The output of this step is **only** the `job.yml` file - a complete specification of the workflow. The actual step instruction files will be created in the next step (`implement`).
-
-### Step 1: Understand the Job Purpose
-
-Start by asking structured questions to understand what the user wants to accomplish:
-
-1. **What is the overall goal of this workflow?**
-   - What complex task are they trying to accomplish?
-   - What domain is this in? (e.g., research, marketing, development, reporting)
-   - How often will they run this workflow?
-
-2. **What does success look like?**
-   - What's the final deliverable or outcome?
-   - Who is the audience for the output?
-   - What quality criteria matter most?
-
-3. **What are the major phases?**
-   - Ask them to describe the workflow at a high level
-   - What are the distinct stages from start to finish?
-   - Are there any dependencies between phases?
-
-### Step 1.5: Detect Document-Oriented Workflows
-
-**Check for document-focused patterns** in the user's description:
-- Keywords: "report", "summary", "document", "create", "monthly", "quarterly", "for stakeholders", "for leadership"
-- Final deliverable is a specific document (e.g., "AWS spending report", "competitive analysis", "sprint summary")
-- Recurring documents with consistent structure
-
-**If a document-oriented workflow is detected:**
-
-1. Inform the user: "This workflow produces a specific document type. I recommend defining a doc spec first to ensure consistent quality."
-
-2. Ask structured questions to understand if they want to:
-   - Create a doc spec for this document
-   - Use an existing doc spec (if any exist in `.deepwork/doc_specs/`)
-   - Skip doc spec and proceed with simple outputs
-
-### Step 1.6: Define the Doc Spec (if needed)
-
-When creating a doc spec, gather the following information:
-
-1. **Document Identity**
-   - What is the document called? (e.g., "Monthly AWS Spending Report")
-   - Brief description of its purpose
-   - Where should these documents be stored? (path patterns like `finance/aws-reports/*.md`)
-
-2. **Audience and Context**
-   - Who reads this document? (target audience)
-   - How often is it produced? (frequency)
-
-3. **Quality Criteria** (3-5 criteria, each with name and description)
-
-   **Important**: Doc spec quality criteria define requirements for the **output document itself**, not the process of creating it. Focus on what the finished document must contain or achieve.
-
-   Examples for a spending report:
-   - **Visualization**: Must include charts showing spend breakdown by service
-   - **Variance Analysis**: Must compare current month against previous with percentages
-   - **Action Items**: Must include recommended cost optimization actions
-
-   **Note**: When a doc spec is created for a step's output, the step should generally NOT have separate `quality_criteria` in the job.yml. The doc spec's criteria cover output quality. Only add step-level quality_criteria if there are essential process requirements (e.g., "must use specific tool"), and minimize these when possible.
-
-4. **Document Structure**
-   - What sections should it have?
-   - Any required elements (tables, charts, summaries)?
-
-### Step 1.7: Create the doc spec File (if needed)
-
-Create the doc spec file at `.deepwork/doc_specs/[doc_spec_name].md`:
-
-**Template reference**: See `.deepwork/jobs/deepwork_jobs/templates/doc_spec.md.template` for the standard structure.
-
-**Complete example**: See `.deepwork/doc_specs/job_spec.md` for a fully worked example (the doc spec for job.yml files).
-
-After creating the doc spec, proceed to Step 2 with the doc spec reference for the final step's output.
-
-### Step 2: Define Each Step
-
-For each major phase they mentioned, ask structured questions to gather details:
-
-1. **Step Purpose**
-   - What exactly does this step accomplish?
-   - What is the input to this step?
-   - What is the output from this step?
-
-2. **Step Inputs**
-   - What information is needed to start this step?
-   - Does it need user-provided parameters? (e.g., topic, target audience)
-   - Does it need files from previous steps?
-   - What format should inputs be in?
-
-3. **Step Outputs**
-   - What files or artifacts does this step produce?
-   - What format should the output be in? (markdown, YAML, JSON, etc.)
-   - Where should each output be saved? (filename/path)
-   - Should outputs be organized in subdirectories? (e.g., `reports/`, `data/`, `drafts/`)
-   - Will other steps need this output?
-   - **Does this output have a doc spec?** If a doc spec was created in Step 1.6/1.7, reference it for the appropriate output
-
-   #### Work Product Storage Guidelines
-
-   **Key principle**: Job outputs belong in the main repository directory structure, not in dot-directories. The `.deepwork/` directory is for job definitions and configuration only.
-
-   **Why this matters**:
-   - **Version control**: Work products in the main repo are tracked by git and visible in PRs
-   - **Discoverability**: Team members can find outputs without knowing about DeepWork internals
-   - **Tooling compatibility**: IDEs, search tools, and CI/CD work naturally with standard paths
-   - **Glob patterns**: Well-structured paths enable powerful file matching (e.g., `competitive_research/**/*.md`)
-
-   **Good output path patterns**:
-   ```
-   competitive_research/competitors_list.md
-   competitive_research/acme_corp/research.md
-   operations/reports/2026-01/spending_analysis.md
-   docs/api/endpoints.md
-   ```
-
-   **Avoid these patterns**:
-   ```
-   .deepwork/outputs/report.md          # Hidden in dot-directory
-   output.md                            # Too generic, no context
-   research.md                          # Unclear which research
-   temp/draft.md                        # Transient-sounding paths
-   ```
-
-   **Organizing multi-file outputs**:
-   - Use the job name as a top-level folder when outputs are job-specific
-   - Use parameterized paths for per-entity outputs: `competitive_research/[competitor_name]/`
-   - Match existing project conventions when extending a codebase
-
-   **When to include dates in paths**:
-   - **Include date** for periodic outputs where each version is retained (e.g., monthly reports, quarterly reviews, weekly summaries). These accumulate over time and historical versions remain useful.
-     ```
-     operations/reports/2026-01/spending_analysis.md              # Monthly report - keep history
-     hr/employees/[employee_name]/quarterly_reviews/2026-Q1.pdf   # Per-employee quarterly review
-     ```
-   - **Omit date** for current-state outputs that represent the latest understanding and get updated in place. Previous versions live in git history, not separate files.
-     ```
-     competitive_research/acme_corp/swot.md  # Current SWOT - updated over time
-     docs/architecture/overview.md           # Living document
-     ```
-
-   **Supporting materials and intermediate outputs**:
-   - Content generated in earlier steps to support the final output (research notes, data extracts, drafts) should be placed in a `_dataroom` folder that is a peer to the final output
-   - Name the dataroom folder by replacing the file extension with `_dataroom`
-     ```
-     operations/reports/2026-01/spending_analysis.md           # Final output
-     operations/reports/2026-01/spending_analysis_dataroom/    # Supporting materials
-         raw_data.csv
-         vendor_breakdown.md
-         notes.md
-     ```
-   - This keeps supporting materials organized and discoverable without cluttering the main output location
-
-4. **Step Dependencies**
-   - Which previous steps must complete before this one?
-   - Are there any ordering constraints?
-
-5. **Step Process** (high-level understanding)
-   - What are the key activities in this step?
-   - Are there any quality checks or validation needed?
-   - What makes a good vs. bad output for this step?
-
-6. **Agent Delegation** (optional)
-   - Should this step be executed by a specific agent type?
-   - Use the `agent` field when the step should run in a forked context with a specific agent
-   - When `agent` is set, the generated skill automatically includes `context: fork`
-   - Available agent types:
-     - `general-purpose` - Standard agent for multi-step tasks
-
-   ```yaml
-   steps:
-     - id: research_step
-       agent: general-purpose  # Delegates to the general-purpose agent
-   ```
-
-**Note**: You're gathering this information to understand what instructions will be needed, but you won't create the instruction files yet - that happens in the `implement` step.
-
-#### Doc Spec-Aware Output Format
-
-When a step produces a document with a doc spec reference, use this format in job.yml:
-
-```yaml
-outputs:
-  - file: reports/monthly_spending.md
-    doc_spec: .deepwork/doc_specs/monthly_aws_report.md
-```
-
-The doc spec's quality criteria will automatically be included in the generated skill, ensuring consistent document quality.
-
-### Capability Considerations
-
-When defining steps, identify any that require specialized tools:
-
-**Browser Automation**: If any step involves web scraping, form filling, interactive browsing, UI testing, or research requiring website visits, ask the user what browser tools they have available. For Claude Code users, **Claude in Chrome** (Anthropic's browser extension) has been tested with DeepWork and is recommended for new users. Don't assume a default—confirm the tool before designing browser-dependent steps.
-
-### Step 3: Validate the Workflow
-
-After gathering information about all steps:
-
-1. **Review the flow**
-   - Summarize the complete workflow
-   - Show how outputs from one step feed into the next
-   - Ask if anything is missing
-
-2. **Check for gaps**
-   - Are there any steps where the input isn't clearly defined?
-   - Are there any outputs that aren't used by later steps?
-   - Are there circular dependencies?
-
-3. **Confirm details**
-   - Job name (lowercase, underscores, descriptive)
-   - Job summary (one clear sentence, max 200 chars)
-   - Job description (detailed multi-line explanation)
-   - Version number (start with 1.0.0)
-
-### Step 4: Define Quality Validation (Stop Hooks)
-
-For each step, consider whether it would benefit from **quality validation loops**. Stop hooks allow the AI agent to iteratively refine its work until quality criteria are met.
-
-**Ask structured questions about quality validation:**
-- "Are there specific quality criteria that must be met for this step?"
-- "Would you like the agent to validate its work before completing?"
-- "What would make you send the work back for revision?"
-
-**Stop hooks are particularly valuable for:**
-- Steps with complex outputs that need multiple checks
-- Steps where quality is critical (final deliverables)
-- Steps with subjective quality criteria that benefit from AI self-review
-
-**Three types of stop hooks are supported:**
-
-1. **Inline Prompt** (`prompt`) - Best for simple quality criteria
-   ```yaml
-   stop_hooks:
-     - prompt: |
-         Verify the output meets these criteria:
-         1. Contains at least 5 competitors
-         2. Each competitor has a description
-         3. Selection rationale is clear
-   ```
-
-2. **Prompt File** (`prompt_file`) - For detailed/reusable criteria
-   ```yaml
-   stop_hooks:
-     - prompt_file: hooks/quality_check.md
-   ```
-
-3. **Script** (`script`) - For programmatic validation (tests, linting)
-   ```yaml
-   stop_hooks:
-     - script: hooks/run_tests.sh
-   ```
-
-**Multiple hooks can be combined:**
-```yaml
-stop_hooks:
-  - script: hooks/lint_output.sh
-  - prompt: "Verify the content is comprehensive and well-organized"
-```
-
-**Encourage prompt-based hooks** - They leverage the AI's ability to understand context and make nuanced quality judgments. Script hooks are best for objective checks (syntax, format, tests).
-
-### Step 5: Create the Job Directory and Specification
-
-Only after you have complete understanding, create the job directory and `job.yml` file:
-
-**First, create the directory structure** using the `make_new_job.sh` script:
-
-```bash
-.deepwork/jobs/deepwork_jobs/make_new_job.sh [job_name]
-```
-
-This creates:
-- `.deepwork/jobs/[job_name]/` - Main job directory
-- `.deepwork/jobs/[job_name]/steps/` - For step instruction files
-- `.deepwork/jobs/[job_name]/hooks/` - For custom validation scripts
-- `.deepwork/jobs/[job_name]/templates/` - For example file formats
-- `.deepwork/jobs/[job_name]/AGENTS.md` - Job management guidance
-
-**Then create the job.yml file** at `.deepwork/jobs/[job_name]/job.yml`
-
-(Where `[job_name]` is the name of the NEW job you're creating, e.g., `competitive_research`)
-
-**Doc Spec**: See `.deepwork/doc_specs/job_spec.md` for the complete specification with quality criteria.
-
-**Template reference**: See `.deepwork/jobs/deepwork_jobs/templates/job.yml.template` for the standard structure.
-
-**Complete example**: See `.deepwork/jobs/deepwork_jobs/templates/job.yml.example` for a fully worked example.
-
-**Important**:
-- Use lowercase with underscores for job name and step IDs
-- Ensure file inputs reference steps in dependencies
-- No circular dependencies
-- At least one output per step
-- The `summary` should be concise (max 200 chars)
-- The `description` should provide rich context for future refinement
-- Include a `changelog` section with an initial entry for version 1.0.0
-
-## Example Dialog
-
-```
-Claude: I'll help you define a new DeepWork job. Let's start by understanding what you're trying to accomplish.
-
-What complex task or workflow are you trying to create? What's the overall goal?
-
-User: I want to create competitive research reports for my company
-
-Claude: Great! Let's dig into that. A few questions:
-
-1. What's the final deliverable? Is it a written report, a presentation, or something else?
-2. Who is the audience? Your team, executives, clients?
-3. How comprehensive should these reports be?
-
-User: It's a written report for our product team. Should be comprehensive - analyzing 3-5 competitors.
-
-Claude: Perfect. Now let's think about the major phases of creating this report. What are the main stages you go through? For example:
-- Identifying which competitors to analyze?
-- Gathering information about them?
-- Analyzing the data?
-- Writing the report?
-
-Or is your process different?
-
-User: Yes, that's close. I would say:
-1. Identify competitors
-2. Research each one
-3. Create comparison analysis
-4. Write positioning recommendations
-
-Claude: Excellent! Now let's define each step in detail. Starting with "Identify competitors":
-
-1. What inputs do you need to start? Do you already have a market segment in mind, or do you need to specify that?
-2. What should the output look like? A list of competitor names? More detailed?
-3. How many competitors should be identified?
-
-User: I need to specify the market segment and product category as inputs. The output should be a markdown file with 5-7 competitors and brief descriptions of each.
-
-Claude: Perfect. A few more questions about that output:
-1. What should the file be called? Something like `competitors_list.md`?
-2. Should it go in the project root, or in a subdirectory like `research/` or `data/`?
-
-User: Just call it `competitors_list.md` in the project root.
-
-Claude: Great. Now for the "Research each one" step:
-1. Does it use the competitor list from the first step as input?
-2. What kind of research - product features, pricing, marketing, all of the above?
-3. What format should the research output be in?
-4. Where should that research be saved?
-
-[continues for each step...]
-
-[After gathering all information:]
-
-Claude: Let me summarize the workflow I've designed based on our conversation:
-
-**Job: competitive_research**
-Summary: Systematic competitive analysis workflow for product positioning
-
-Description:
-A comprehensive workflow for analyzing competitors in your market segment. This job helps product teams understand the competitive landscape by systematically identifying competitors, researching their offerings, creating comparison matrices, and developing strategic positioning recommendations.
-
-The workflow produces:
-- A vetted list of key competitors
-- Detailed research notes on each competitor
-- A comparison matrix highlighting key differentiators
-- Strategic positioning recommendations
-
-Designed for product teams conducting quarterly competitive analysis.
-
-**Steps:**
-
-Step 1: identify_competitors
-- Input: market_segment (user), product_category (user)
-- Output: competitors_list.md (5-7 competitors with descriptions)
-
-Step 2: research_competitors
-- Input: competitors_list.md (from step 1)
-- Output: research_notes.md (detailed findings for each competitor)
-- Depends on: identify_competitors
-
-Step 3: comparative_analysis
-- Input: research_notes.md (from step 2)
-- Output: comparison_matrix.md (side-by-side analysis)
-- Depends on: research_competitors
-
-Step 4: positioning_recommendations
-- Input: comparison_matrix.md (from step 3)
-- Output: positioning_report.md (strategic recommendations)
-- Depends on: comparative_analysis
-
-Does this accurately capture your workflow? Any changes needed?
-
-User: Yes, that's perfect!
-
-Claude: Great! Creating the job.yml specification now...
-
-[Creates .deepwork/jobs/competitive_research/job.yml with the complete spec]
-
-✓ Job specification created!
-
-**File created:**
-- .deepwork/jobs/competitive_research/job.yml
-
-**Next step:**
-Run `/deepwork_jobs.review_job_spec` to validate the specification against quality criteria.
-```
-
-## Important Guidelines
-
-1. **Focus on specification only** - Don't create instruction files yet
-2. **Ask structured questions** - Never skip the discovery phase; use the AskUserQuestion tool
-3. **Rich context in description** - This helps with future refinement
-4. **Validate understanding** - Summarize and confirm before creating
-5. **Use examples** - Help users understand what good specifications look like
-6. **Understand file organization** - Always ask structured questions about where outputs should be saved and if subdirectories are needed
-
-## Validation Rules
-
-Before creating the job.yml, ensure:
-- Job name: lowercase, underscores, no spaces
-- Version: semantic versioning (1.0.0)
-- Summary: concise, under 200 characters
-- Description: detailed, provides context
-- Step IDs: unique, descriptive, lowercase with underscores
-- Dependencies: must reference existing step IDs
-- File inputs: `from_step` must be in dependencies
-- At least one output per step
-- Outputs can be filenames (e.g., `report.md`) or paths (e.g., `reports/analysis.md`)
-- File paths in outputs should match where files will actually be created
-- No circular dependencies
-
-## Output Format
-
-### job.yml
-
-The complete YAML specification file (example shown in Step 5 above).
-
-**Location**: `.deepwork/jobs/[job_name]/job.yml`
-
-(Where `[job_name]` is the name of the new job being created)
-
-After creating the file:
-1. Inform the user that the specification is complete
-2. Recommend that they review the job.yml file
-3. Tell them to run `/deepwork_jobs.review_job_spec` next
-
-
-
-### Job Context
-
-Core commands for managing DeepWork jobs. These commands help you define new multi-step
-workflows and learn from running them.
-
-The `new_job` workflow guides you through defining and implementing a new job by
-asking structured questions about your workflow, understanding each step's inputs and outputs,
-reviewing the specification, and generating all necessary files.
-
-The `learn` skill reflects on conversations where DeepWork jobs were run, identifies
-confusion or inefficiencies, and improves job instructions. It also captures bespoke
-learnings specific to the current run into AGENTS.md files in the working folder.
-
-
-## Required Inputs
-
-**User Parameters** - Gather from user before starting:
-- **job_purpose**: What complex task or workflow are you trying to accomplish?
-
-
-## Work Branch
-
-Use branch format: `deepwork/deepwork_jobs-[instance]-YYYYMMDD`
-
-- If on a matching work branch: continue using it
-- If on main/master: create new branch with `git checkout -b deepwork/deepwork_jobs-[instance]-$(date +%Y%m%d)`
-
-## Outputs
-
-**Required outputs**:
-- `job.yml`
-  **Doc Spec**: DeepWork Job Specification
-  > YAML specification file that defines a multi-step workflow job for AI agents
-  **Definition**: `.deepwork/doc_specs/job_spec.md`
-  **Target Audience**: AI agents executing jobs and developers defining workflows
-  **Quality Criteria**:
-  1. **Valid Identifier**: Job name must be lowercase with underscores, no spaces or special characters (e.g., `competitive_research`, `monthly_report`)
-  2. **Semantic Version**: Version must follow semantic versioning format X.Y.Z (e.g., `1.0.0`, `2.1.3`)
-  3. **Concise Summary**: Summary must be under 200 characters and clearly describe what the job accomplishes
-  4. **Rich Description**: Description must be multi-line and explain: the problem solved, the process, expected outcomes, and target users
-  5. **Changelog Present**: Must include a changelog array with at least the initial version entry. Changelog should only include one entry per branch at most
-  6. **Complete Steps**: Each step must have: id (lowercase_underscores), name, description, instructions_file, outputs (at least one), and dependencies array
-  7. **Valid Dependencies**: Dependencies must reference existing step IDs with no circular references
-  8. **Input Consistency**: File inputs with `from_step` must reference a step that is in the dependencies array
-  9. **Output Paths**: Outputs must be valid filenames or paths within the main repo directory structure, never in dot-directories like `.deepwork/`. Use specific, descriptive paths that lend themselves to glob patterns (e.g., `competitive_research/acme_corp/swot.md` or `operations/reports/2026-01/spending_analysis.md`). Parameterized paths like `[competitor_name]/` are encouraged for per-entity outputs. Avoid generic names (`output.md`, `analysis.md`) and transient-sounding paths (`temp/`, `draft.md`). Supporting materials for a final output should go in a peer `_dataroom` folder (e.g., `spending_analysis_dataroom/`).
-  10. **Concise Instructions**: The content of the file, particularly the description, must not have excessively redundant information. It should be concise and to the point given that extra tokens will confuse the AI.
-
-## On Completion
-
-1. Verify outputs are created
-2. Inform user: "Step 1/4 complete, outputs: job.yml"
-3. **Tell user next command**: `/deepwork_jobs:review_job_spec`
-
----
-
-**Reference files**: `.deepwork/jobs/deepwork_jobs/job.yml`, `.deepwork/jobs/deepwork_jobs/steps/define.md`
-"""
\ No newline at end of file
diff --git a/.gemini/skills/deepwork_jobs/implement.toml b/.gemini/skills/deepwork_jobs/implement.toml
deleted file mode 100644
index 484f4bcc..00000000
--- a/.gemini/skills/deepwork_jobs/implement.toml
+++ /dev/null
@@ -1,315 +0,0 @@
-# deepwork_jobs:implement
-#
-# Generates step instruction files and syncs slash commands from the job.yml specification. Use after job spec review passes.
-#
-# Generated by DeepWork - do not edit manually
-
-description = "Generates step instruction files and syncs slash commands from the job.yml specification. Use after job spec review passes."
-
-prompt = """
-# deepwork_jobs:implement
-
-**Step 3/4** in **deepwork_jobs** workflow
-
-> Creates and manages multi-step AI workflows. Use when defining, implementing, or improving DeepWork jobs.
-
-## Prerequisites (Verify First)
-
-Before proceeding, confirm these steps are complete:
-- `/deepwork_jobs:review_job_spec`
-
-## Instructions
-
-**Goal**: Generates step instruction files and syncs slash commands from the job.yml specification. Use after job spec review passes.
-
-# Implement Job Steps
-
-## Objective
-
-Generate the DeepWork job directory structure and instruction files for each step based on the validated `job.yml` specification from the review_job_spec step.
-
-## Task
-
-Read the `job.yml` specification file and create all the necessary files to make the job functional, including directory structure and step instruction files. Then sync the commands to make them available.
-
-### Step 1: Create Directory Structure Using Script
-
-Run the `make_new_job.sh` script to create the standard directory structure:
-
-```bash
-.deepwork/jobs/deepwork_jobs/make_new_job.sh [job_name]
-```
-
-This creates:
-- `.deepwork/jobs/[job_name]/` - Main job directory
-- `.deepwork/jobs/[job_name]/steps/` - Step instruction files
-- `.deepwork/jobs/[job_name]/hooks/` - Custom validation scripts (with .gitkeep)
-- `.deepwork/jobs/[job_name]/templates/` - Example file formats (with .gitkeep)
-- `.deepwork/jobs/[job_name]/AGENTS.md` - Job management guidance
-
-**Note**: If the directory already exists (e.g., job.yml was created by define step), you can skip this step or manually create the additional directories:
-```bash
-mkdir -p .deepwork/jobs/[job_name]/hooks .deepwork/jobs/[job_name]/templates
-touch .deepwork/jobs/[job_name]/hooks/.gitkeep .deepwork/jobs/[job_name]/templates/.gitkeep
-```
-
-### Step 2: Read and Validate the Specification
-
-1. **Locate the job.yml file**
-   - Read `.deepwork/jobs/[job_name]/job.yml` from the review_job_spec step
-   - Parse the YAML content
-
-2. **Validate the specification**
-   - Ensure it follows the schema (name, version, summary, description, steps)
-   - Check that all dependencies reference existing steps
-   - Verify no circular dependencies
-   - Confirm file inputs match dependencies
-
-3. **Extract key information**
-   - Job name, version, summary, description
-   - List of all steps with their details
-   - Understand the workflow structure
-
-### Step 3: Generate Step Instruction Files
-
-For each step in the job.yml, create a comprehensive instruction file at `.deepwork/jobs/[job_name]/steps/[step_id].md`.
-
-**Template reference**: See `.deepwork/jobs/deepwork_jobs/templates/step_instruction.md.template` for the standard structure.
-
-**Complete example**: See `.deepwork/jobs/deepwork_jobs/templates/step_instruction.md.example` for a fully worked example.
-
-**Available templates in `.deepwork/jobs/deepwork_jobs/templates/`:**
-- `job.yml.template` - Job specification structure
-- `step_instruction.md.template` - Step instruction file structure
-- `agents.md.template` - AGENTS.md file structure
-- `job.yml.example` - Complete job specification example
-- `step_instruction.md.example` - Complete step instruction example
-
-**Guidelines for generating instructions:**
-
-1. **Use the job description** - The detailed description from job.yml provides crucial context
-2. **Be specific** - Don't write generic instructions; tailor them to the step's purpose
-3. **Provide examples** - Show what good output looks like
-4. **Explain the "why"** - Help the user understand the step's role in the workflow
-5. **Quality over quantity** - Detailed, actionable instructions are better than vague ones
-6. **Align with stop hooks** - If the step has `stop_hooks` defined, ensure the quality criteria in the instruction file match the validation criteria in the hooks
-7. **Ask structured questions** - When a step has user inputs, the instructions MUST explicitly tell the agent to "ask structured questions" using the AskUserQuestion tool to gather that information. Never use generic phrasing like "ask the user" - always use "ask structured questions"
-
-### Handling Stop Hooks
-
-If a step in the job.yml has `stop_hooks` defined, the generated instruction file should:
-
-1. **Mirror the quality criteria** - The "Quality Criteria" section should match what the stop hooks will validate
-2. **Be explicit about success** - Help the agent understand when the step is truly complete
-3. **Include the promise pattern** - Mention that `<promise>✓ Quality Criteria Met</promise>` should be included when criteria are met
-
-**Example: If the job.yml has:**
-```yaml
-- id: research_competitors
-  name: "Research Competitors"
-  stop_hooks:
-    - prompt: |
-        Verify the research meets criteria:
-        1. Each competitor has at least 3 data points
-        2. Sources are cited
-        3. Information is current (within last year)
-```
-
-**The instruction file should include:**
-```markdown
-## Quality Criteria
-
-- Each competitor has at least 3 distinct data points
-- All information is sourced with citations
-- Data is current (from within the last year)
-- When all criteria are met, include `<promise>✓ Quality Criteria Met</promise>` in your response
-```
-
-This alignment ensures the AI agent knows exactly what will be validated and can self-check before completing.
-
-### Using Supplementary Reference Files
-
-Step instructions can include additional `.md` files in the `steps/` directory for detailed examples, templates, or reference material. Reference them using the full path from the project root.
-
-See `.deepwork/jobs/deepwork_jobs/steps/supplemental_file_references.md` for detailed documentation and examples.
-
-### Step 4: Verify job.yml Location
-
-Verify that `job.yml` is in the correct location at `.deepwork/jobs/[job_name]/job.yml`. The define and review_job_spec steps should have created and validated it. If for some reason it's not there, you may need to create or move it.
-
-### Step 5: Sync Skills
-
-Run `deepwork sync` to generate the skills for this job:
-
-```bash
-deepwork sync
-```
-
-This will:
-- Parse the job definition
-- Generate skills for each step
-- Make the skills available in `.claude/skills/` (or appropriate platform directory)
-
-### Step 6: Consider Rules for the New Job
-
-After implementing the job, consider whether there are **rules** that would help enforce quality or consistency when working with this job's domain.
-
-**What are rules?**
-
-Rules are automated guardrails stored as markdown files in `.deepwork/rules/` that trigger when certain files change during an AI session. They help ensure:
-- Documentation stays in sync with code
-- Team guidelines are followed
-- Architectural decisions are respected
-- Quality standards are maintained
-
-**When to suggest rules:**
-
-Think about the job you just implemented and ask:
-- Does this job produce outputs that other files depend on?
-- Are there documentation files that should be updated when this job's outputs change?
-- Are there quality checks or reviews that should happen when certain files in this domain change?
-- Could changes to the job's output files impact other parts of the project?
-
-**Examples of rules that might make sense:**
-
-| Job Type | Potential Rule |
-|----------|----------------|
-| API Design | "Update API docs when endpoint definitions change" |
-| Database Schema | "Review migrations when schema files change" |
-| Competitive Research | "Update strategy docs when competitor analysis changes" |
-| Feature Development | "Update changelog when feature files change" |
-| Configuration Management | "Update install guide when config files change" |
-
-**How to offer rule creation:**
-
-If you identify one or more rules that would benefit the user, explain:
-1. **What the rule would do** - What triggers it and what action it prompts
-2. **Why it would help** - How it prevents common mistakes or keeps things in sync
-3. **What files it would watch** - The trigger patterns
-
-Then ask the user:
-
-> "Would you like me to create this rule for you? I can run `/deepwork_rules.define` to set it up."
-
-If the user agrees, invoke the `/deepwork_rules.define` command to guide them through creating the rule.
-
-**Example dialogue:**
-
-```
-Based on the competitive_research job you just created, I noticed that when
-competitor analysis files change, it would be helpful to remind you to update
-your strategy documentation.
-
-I'd suggest a rule like:
-- **Name**: "Update strategy when competitor analysis changes"
-- **Trigger**: `**/positioning_report.md`
-- **Action**: Prompt to review and update `docs/strategy.md`
-
-Would you like me to create this rule? I can run `/deepwork_rules.define` to set it up.
-```
-
-**Note:** Not every job needs rules. Only suggest them when they would genuinely help maintain consistency or quality. Don't force rules where they don't make sense.
-
-## Example Implementation
-
-For a complete worked example showing a job.yml and corresponding step instruction file, see:
-- **Job specification**: `.deepwork/jobs/deepwork_jobs/templates/job.yml.example`
-- **Step instruction**: `.deepwork/jobs/deepwork_jobs/templates/step_instruction.md.example`
-
-## Important Guidelines
-
-1. **Read the spec carefully** - Understand the job's intent from the description
-2. **Generate complete instructions** - Don't create placeholder or stub files
-3. **Maintain consistency** - Use the same structure for all step instruction files
-4. **Provide examples** - Show what good output looks like
-5. **Use context** - The job description provides valuable context for each step
-6. **Be specific** - Tailor instructions to the specific step, not generic advice
-
-## Validation Before Sync
-
-Before running `deepwork sync`, verify:
-- All directories exist
-- `job.yml` is in place
-- All step instruction files exist (one per step)
-- No file system errors
-
-## Completion Checklist
-
-Before marking this step complete, ensure:
-- [ ] job.yml validated and copied to job directory
-- [ ] All step instruction files created
-- [ ] Each instruction file is complete and actionable
-- [ ] `deepwork sync` executed successfully
-- [ ] Skills generated in platform directory
-- [ ] Considered whether rules would benefit this job (Step 6)
-- [ ] If rules suggested, offered to run `/deepwork_rules.define`
-
-## Quality Criteria
-
-- Job directory structure is correct
-- All instruction files are complete (not stubs)
-- Instructions are specific and actionable
-- Output examples are provided in each instruction file
-- Quality criteria defined for each step
-- Steps with user inputs explicitly use "ask structured questions" phrasing
-- Sync completed successfully
-- Skills available for use
-- Thoughtfully considered relevant rules for the job domain
-
-
-### Job Context
-
-Core commands for managing DeepWork jobs. These commands help you define new multi-step
-workflows and learn from running them.
-
-The `new_job` workflow guides you through defining and implementing a new job by
-asking structured questions about your workflow, understanding each step's inputs and outputs,
-reviewing the specification, and generating all necessary files.
-
-The `learn` skill reflects on conversations where DeepWork jobs were run, identifies
-confusion or inefficiencies, and improves job instructions. It also captures bespoke
-learnings specific to the current run into AGENTS.md files in the working folder.
-
-
-## Required Inputs
-
-
-**Files from Previous Steps** - Read these first:
-- `job.yml` (from `review_job_spec`)
-
-## Work Branch
-
-Use branch format: `deepwork/deepwork_jobs-[instance]-YYYYMMDD`
-
-- If on a matching work branch: continue using it
-- If on main/master: create new branch with `git checkout -b deepwork/deepwork_jobs-[instance]-$(date +%Y%m%d)`
-
-## Outputs
-
-**Required outputs**:
-- `steps/` (directory)
-
-## Quality Validation (Manual)
-
-**NOTE**: Gemini CLI does not support automated validation. Manually verify criteria before completing.
-
-**Criteria (all must be satisfied)**:
-1. **Directory Structure**: Is `.deepwork/jobs/[job_name]/` created correctly?
-2. **Complete Instructions**: Are ALL step instruction files complete (not stubs or placeholders)?
-3. **Specific & Actionable**: Are instructions tailored to each step's purpose, not generic?
-4. **Output Examples**: Does each instruction file show what good output looks like?
-5. **Quality Criteria**: Does each instruction file define quality criteria for its outputs?
-6. **Ask Structured Questions**: Do step instructions that gather user input explicitly use the phrase "ask structured questions"?
-7. **Sync Complete**: Has `deepwork sync` been run successfully?
-8. **Commands Available**: Are the slash-commands generated in `.claude/commands/`?
-9. **Rules Considered**: Has the agent thought about whether rules would benefit this job? If relevant rules were identified, did they explain them and offer to run `/deepwork_rules.define`? Not every job needs rules - only suggest when genuinely helpful.
-## On Completion
-
-1. Verify outputs are created
-2. Inform user: "Step 3/4 complete, outputs: steps/"
-3. **Workflow complete**: All steps finished. Consider creating a PR to merge the work branch.
-
----
-
-**Reference files**: `.deepwork/jobs/deepwork_jobs/job.yml`, `.deepwork/jobs/deepwork_jobs/steps/implement.md`
-"""
\ No newline at end of file
diff --git a/.gemini/skills/deepwork_jobs/index.toml b/.gemini/skills/deepwork_jobs/index.toml
deleted file mode 100644
index 6756ea88..00000000
--- a/.gemini/skills/deepwork_jobs/index.toml
+++ /dev/null
@@ -1,69 +0,0 @@
-# deepwork_jobs
-#
-# Creates and manages multi-step AI workflows. Use when defining, implementing, or improving DeepWork jobs.
-#
-# Generated by DeepWork - do not edit manually
-
-description = "Creates and manages multi-step AI workflows. Use when defining, implementing, or improving DeepWork jobs."
-
-prompt = """
-# deepwork_jobs
-
-**Multi-step workflow**: Creates and manages multi-step AI workflows. Use when defining, implementing, or improving DeepWork jobs.
-
-> **NOTE**: Gemini CLI requires manual command invocation. After each step, tell the user which command to run next.
-
-Core commands for managing DeepWork jobs. These commands help you define new multi-step
-workflows and learn from running them.
-
-The `new_job` workflow guides you through defining and implementing a new job by
-asking structured questions about your workflow, understanding each step's inputs and outputs,
-reviewing the specification, and generating all necessary files.
-
-The `learn` skill reflects on conversations where DeepWork jobs were run, identifies
-confusion or inefficiencies, and improves job instructions. It also captures bespoke
-learnings specific to the current run into AGENTS.md files in the working folder.
-
-
-## Available Steps
-
-1. **define** - Creates a job.yml specification by gathering workflow requirements through structured questions. Use when starting a new multi-step workflow.
-   Command: `/deepwork_jobs:define`
-2. **review_job_spec** - Reviews job.yml against quality criteria using a sub-agent for unbiased validation. Use after defining a job specification. (requires: define)
-   Command: `/deepwork_jobs:review_job_spec`
-3. **implement** - Generates step instruction files and syncs slash commands from the job.yml specification. Use after job spec review passes. (requires: review_job_spec)
-   Command: `/deepwork_jobs:implement`
-4. **learn** - Analyzes conversation history to improve job instructions and capture learnings. Use after running a job to refine it.
-   Command: `/deepwork_jobs:learn`
-
-## Execution Instructions
-
-### Step 1: Analyze Intent
-
-Parse any text following `/deepwork_jobs` to determine user intent:
-- "define" or related terms → start at `/deepwork_jobs:define`
-- "review_job_spec" or related terms → start at `/deepwork_jobs:review_job_spec`
-- "implement" or related terms → start at `/deepwork_jobs:implement`
-- "learn" or related terms → start at `/deepwork_jobs:learn`
-
-### Step 2: Direct User to Starting Step
-
-Tell the user which command to run:
-```
-/deepwork_jobs:define
-```
-
-### Step 3: Guide Through Workflow
-
-After each step completes, tell the user the next command to run until workflow is complete.
-
-### Handling Ambiguous Intent
-
-If user intent is unclear:
-- Present available steps as numbered options
-- Ask user to select the starting point
-
-## Reference
-
-- Job definition: `.deepwork/jobs/deepwork_jobs/job.yml`
-"""
\ No newline at end of file
diff --git a/.gemini/skills/deepwork_jobs/learn.toml b/.gemini/skills/deepwork_jobs/learn.toml
deleted file mode 100644
index ef16944b..00000000
--- a/.gemini/skills/deepwork_jobs/learn.toml
+++ /dev/null
@@ -1,437 +0,0 @@
-# deepwork_jobs:learn
-#
-# Analyzes conversation history to improve job instructions and capture learnings. Use after running a job to refine it.
-#
-# Generated by DeepWork - do not edit manually
-
-description = "Analyzes conversation history to improve job instructions and capture learnings. Use after running a job to refine it."
-
-prompt = """
-# deepwork_jobs:learn
-
-**Standalone command** - can be run anytime
-
-> Creates and manages multi-step AI workflows. Use when defining, implementing, or improving DeepWork jobs.
-
-
-## Instructions
-
-**Goal**: Analyzes conversation history to improve job instructions and capture learnings. Use after running a job to refine it.
-
-# Learn from Job Execution
-
-## Objective
-
-Think deeply about this task. Reflect on the current conversation to identify learnings from DeepWork job executions, improve job instructions with generalizable insights, and capture bespoke (run-specific) learnings in AGENTS.md files in the deepest common folder that would contain all work on the topic in the future.
-
-## Task
-
-Analyze the conversation history to extract learnings and improvements, then apply them appropriately:
-- **Generalizable learnings** → Update job instruction files
-- **Bespoke learnings** (specific to this run) → Add to AGENTS.md in the deepest common folder for the topic
-
-### Step 1: Analyze Conversation for Job Executions
-
-1. **Scan the conversation** for DeepWork slash commands that were run
-   - Look for patterns like `/job_name.step_id`
-   - Identify which jobs and steps were executed
-   - Note the order of execution
-
-2. **Identify the target folder**
-   - This should be the deepest common folder that would contain all work on the topic in the future
-   - Should be clear from conversation history where work was done
-   - If unclear, run `git diff` to see where changes were made on the branch
-
-3. **If no job was specified**, ask the user:
-   - "Which DeepWork job would you like me to learn from?"
-   - List available jobs from `.deepwork/jobs/`
-
-### Step 2: Identify Points of Confusion and Inefficiency
-
-Review the conversation for:
-
-1. **Confusion signals**
-   - Questions the agent asked that shouldn't have been necessary
-   - Misunderstandings about what a step required
-   - Incorrect outputs that needed correction
-   - Ambiguous instructions that led to wrong interpretations
-
-2. **Inefficiency signals**
-   - Extra steps or iterations that were needed
-   - Information that had to be repeated
-   - Context that was missing from instructions
-   - Dependencies that weren't clear
-
-3. **Error patterns**
-   - Failed validations and why they failed
-   - Quality criteria that were misunderstood
-   - Edge cases that weren't handled
-
-4. **Success patterns**
-   - What worked particularly well
-   - Efficient approaches worth preserving
-   - Good examples that could be added to instructions
-
-### Step 3: Classify Learnings
-
-For each learning identified, determine if it is:
-
-**Generalizable** (should improve instructions):
-- Would help ANY future run of this job
-- Addresses unclear or missing guidance
-- Fixes incorrect assumptions in instructions
-- Adds helpful examples or context
-- Examples:
-  - "Step instructions should mention that X format is required"
-  - "Quality criteria should include checking for Y"
-  - "Add example of correct output format"
-
-**doc spec-Related** (should improve doc spec files):
-- Improvements to document quality criteria
-- Changes to document structure or format
-- Updated audience or frequency information
-- Examples:
-  - "The report should include a summary table"
-  - "Quality criterion 'Visualization' needs clearer requirements"
-  - "Documents need a section for action items"
-
-**Bespoke** (should go in AGENTS.md):
-- Specific to THIS project/codebase/run
-- Depends on local conventions or structure
-- References specific files or paths
-- Would not apply to other uses of this job
-- Examples:
-  - "In this codebase, API endpoints are in `src/api/`"
-  - "This project uses camelCase for function names"
-  - "The main config file is at `config/settings.yml`"
-
-### Step 3.5: Identify doc spec-Related Learnings
-
-Review the conversation for doc spec-related improvements:
-
-1. **Quality Criteria Changes**
-   - Were any quality criteria unclear or insufficient?
-   - Did the agent repeatedly fail certain criteria?
-   - Are there new criteria that should be added?
-
-2. **Document Structure Changes**
-   - Did the user request different sections?
-   - Were parts of the document format confusing?
-   - Should the example document be updated?
-
-3. **Metadata Updates**
-   - Has the target audience changed?
-   - Should frequency or path patterns be updated?
-
-**Signals for doc spec improvements:**
-- User asked for changes to document format
-- Repeated validation failures on specific criteria
-- Feedback about missing sections or information
-- Changes to how documents are organized/stored
-
-### Step 4: Update Job Instructions (Generalizable Learnings)
-
-For each generalizable learning:
-
-1. **Locate the instruction file**
-   - Path: `.deepwork/jobs/[job_name]/steps/[step_id].md`
-
-2. **Make targeted improvements**
-   - Add missing context or clarification
-   - Include helpful examples
-   - Clarify ambiguous instructions
-   - Update quality criteria if needed
-
-3. **Keep instructions concise**
-   - Avoid redundancy - don't repeat the same guidance in multiple places
-   - Be direct - remove verbose explanations that don't add value
-   - Prefer bullet points over paragraphs where appropriate
-
-4. **Preserve instruction structure**
-   - Keep existing sections (Objective, Task, Process, Output Format, Quality Criteria)
-   - Add to appropriate sections rather than restructuring
-   - Maintain consistency with other steps
-
-5. **Track changes for changelog**
-   - Note what was changed and why
-   - Prepare changelog entry for job.yml
-
-### Step 4b: Extract Shared Content into Referenced Files
-
-Review all instruction files for the job and identify content that:
-- Appears in multiple step instructions (duplicated)
-- Is lengthy and could be extracted for clarity
-- Would benefit from being maintained in one place
-
-**Extract to shared files:**
-
-1. **Create shared files** in `.deepwork/jobs/[job_name]/steps/shared/`
-   - `conventions.md` - Coding/formatting conventions used across steps
-   - `examples.md` - Common examples referenced by multiple steps
-   - `schemas.md` - Data structures or formats used throughout
-
-2. **Reference from instructions** using markdown includes or explicit references:
-   ```markdown
-   ## Conventions
-
-   Follow the conventions defined in `shared/conventions.md`.
-   ```
-
-3. **Benefits of extraction:**
-   - Single source of truth - update once, applies everywhere
-   - Shorter instruction files - easier to read and maintain
-   - Consistent guidance across steps
-
-### Step 4.5: Update doc spec Files (doc spec-Related Learnings)
-
-If doc spec-related learnings were identified:
-
-1. **Locate the doc spec file**
-   - Find doc spec references in job.yml outputs (look for `doc_spec: .deepwork/doc_specs/[doc_spec_name].md`)
-   - doc spec files are at `.deepwork/doc_specs/[doc_spec_name].md`
-
-2. **Update quality_criteria array**
-   - Add new criteria with name and description
-   - Modify existing criteria descriptions for clarity
-   - Remove criteria that are no longer relevant
-
-3. **Update example document**
-   - Modify the markdown body to reflect structure changes
-   - Ensure the example matches updated criteria
-
-4. **Update metadata as needed**
-   - target_audience: If audience has changed
-   - frequency: If production cadence has changed
-   - path_patterns: If storage location has changed
-
-**Example doc spec update:**
-```yaml
-# Before
-quality_criteria:
-  - name: Visualization
-    description: Include charts
-
-# After
-quality_criteria:
-  - name: Visualization
-    description: Include Mermaid.js charts showing spend breakdown by service and month-over-month trend
-```
-
-### Step 5: Create/Update AGENTS.md (Bespoke Learnings)
-
-The AGENTS.md file captures project-specific knowledge that helps future agent runs.
-
-1. **Determine the correct location**
-   - Place AGENTS.md in the deepest common folder that would contain all work on the topic in the future
-   - This ensures the knowledge is available when working in that context
-   - If uncertain, place at the project root
-
-2. **Use file references where possible**
-   - Instead of duplicating information, reference source files
-   - This keeps AGENTS.md in sync as the codebase evolves
-   - Pattern: "See `path/to/file.ext` for [description]"
-
-3. **AGENTS.md structure**: See `.deepwork/jobs/deepwork_jobs/templates/agents.md.template` for the standard format.
-
-4. **Writing entries**
-   - Be concise but specific
-   - Always prefer file references over inline content
-   - Use line numbers when referencing specific code: `file.ext:42`
-   - Group related learnings together
-
-### Step 6: Update Job Version and Changelog
-
-If instruction files were modified:
-
-1. **Bump version in job.yml**
-   - Patch version (0.0.x) for instruction improvements
-   - Minor version (0.x.0) if quality criteria changed
-
-2. **Add changelog entry**
-   ```yaml
-   - version: "[new_version]"
-     changes: "Improved [step] instructions based on execution learnings: [brief description]"
-   ```
-
-### Step 7: Sync Skills
-
-**Run deepwork sync** (if instructions were modified)
-```bash
-deepwork sync
-```
-
-## File Reference Patterns
-
-When adding entries to AGENTS.md, prefer these patterns:
-
-**Good (references)**:
-```markdown
-- API endpoints follow REST conventions. See `src/api/routes.ts` for examples.
-- Error handling pattern: See `src/utils/errors.ts:15-30`
-- Configuration schema: Defined in `config/schema.json`
-```
-
-**Avoid (duplicating)**:
-```markdown
-- API endpoints should return JSON with this format: { status: ..., data: ... }
-- Error handling should use try/catch with custom error classes
-- Configuration requires these fields: name, version, ...
-```
-
-## Quality Criteria
-
-- Conversation has been analyzed for job executions
-- Points of confusion and inefficiency are identified
-- Learnings are correctly classified (generalizable vs bespoke)
-- Job instructions updated for generalizable improvements
-- Instructions are concise - no redundancy or unnecessary verbosity
-- Shared/lengthy content extracted into referenced files where appropriate
-- AGENTS.md created/updated with bespoke learnings
-- File references used instead of duplicating content
-- AGENTS.md is in the correct folder (the deepest common folder for the topic)
-- When all criteria are met, include `<promise>✓ Quality Criteria Met</promise>`
-
-## Example Dialog
-
-```
-User: /deepwork_jobs.learn
-
-Claude: I'll analyze this conversation for learnings from DeepWork job executions.
-
-**Conversation Analysis**
-
-I found the following job executions:
-1. `/competitive_research.identify_competitors` - Completed
-2. `/competitive_research.research_competitors` - Completed with 2 iterations
-3. `/competitive_research.comparative_analysis` - Completed
-
-**Identified Issues**
-
-1. **Confusion in step 2**: The instructions didn't specify that competitor research should include pricing information. This caused an extra iteration.
-
-2. **Missing context**: The agent had to ask about the industry focus, which should have been carried forward from step 1.
-
-**Classification**
-
-- Generalizable: Add pricing to research requirements in instructions
-- Bespoke: This project focuses on B2B SaaS in the HR tech space
-
-**Actions Taken**
-
-1. Updated `.deepwork/jobs/competitive_research/steps/research_competitors.md`:
-   - Added "Include pricing information (tiers, models, approximate ranges)" to research requirements
-   - Added note about carrying forward industry context from previous step
-
-2. Created `./competitive_research_output/AGENTS.md`:
-   ```markdown
-   # Project Context for Competitive Research
-
-   ## Job-Specific Context
-
-   ### competitive_research
-
-   #### Industry Focus
-   - Domain: B2B SaaS, HR Technology sector
-   - See `identify_competitors_output.md` for market definition
-
-   #### Competitors Identified
-   - Reference: `competitor_list.md` for full details
-
-   ## Last Updated
-   - Date: 2024-01-15
-   - From conversation about: Initial competitive analysis run
-   ```
-
-3. Updated job.yml version to 1.0.1 with changelog entry
-
-4. Ran `deepwork sync`
-
-**Summary**
-
-Updated job instructions and created AGENTS.md with bespoke learnings.
-```
-
-## Handling Edge Cases
-
-**No job executions found**:
-- Ask user which job they'd like to analyze
-- Or offer to review available jobs
-
-**Multiple jobs executed**:
-- Analyze each job separately
-- Create separate AGENTS.md entries or files as appropriate
-
-**AGENTS.md already exists**:
-- Read existing content
-- Append new learnings to appropriate sections
-- Update "Last Updated" section
-
-**No issues found**:
-- Document what worked well
-- Consider if any successful patterns should be added to instructions as examples
-
-**Sensitive information**:
-- Never include secrets, credentials, or PII in AGENTS.md
-- Reference config files instead of including values
-
-
-### Job Context
-
-Core commands for managing DeepWork jobs. These commands help you define new multi-step
-workflows and learn from running them.
-
-The `new_job` workflow guides you through defining and implementing a new job by
-asking structured questions about your workflow, understanding each step's inputs and outputs,
-reviewing the specification, and generating all necessary files.
-
-The `learn` skill reflects on conversations where DeepWork jobs were run, identifies
-confusion or inefficiencies, and improves job instructions. It also captures bespoke
-learnings specific to the current run into AGENTS.md files in the working folder.
-
-
-## Required Inputs
-
-**User Parameters** - Gather from user before starting:
-- **job_name**: Name of the job that was run (optional - will auto-detect from conversation)
-
-
-## Work Branch
-
-Use branch format: `deepwork/deepwork_jobs-[instance]-YYYYMMDD`
-
-- If on a matching work branch: continue using it
-- If on main/master: create new branch with `git checkout -b deepwork/deepwork_jobs-[instance]-$(date +%Y%m%d)`
-
-## Outputs
-
-**Required outputs**:
-- `AGENTS.md`
-
-## Quality Validation (Manual)
-
-**NOTE**: Gemini CLI does not support automated validation. Manually verify criteria before completing.
-
-**Criteria (all must be satisfied)**:
-1. **Conversation Analyzed**: Did the agent review the conversation for DeepWork job executions?
-2. **Confusion Identified**: Did the agent identify points of confusion, errors, or inefficiencies?
-3. **Instructions Improved**: Were job instructions updated to address identified issues?
-4. **Instructions Concise**: Are instructions free of redundancy and unnecessary verbosity?
-5. **Shared Content Extracted**: Is lengthy/duplicated content extracted into referenced files?
-6. **doc spec Reviewed (if applicable)**: For jobs with doc spec outputs, were doc spec-related learnings identified?
-7. **doc spec Updated (if applicable)**: Were doc spec files updated with improved quality criteria or structure?
-8. **Bespoke Learnings Captured**: Were run-specific learnings added to AGENTS.md?
-9. **File References Used**: Do AGENTS.md entries reference other files where appropriate?
-10. **Working Folder Correct**: Is AGENTS.md in the correct working folder for the job?
-11. **Generalizable Separated**: Are generalizable improvements in instructions, not AGENTS.md?
-12. **Sync Complete**: Has `deepwork sync` been run if instructions were modified?
-## On Completion
-
-1. Verify outputs are created
-2. Inform user: "learn complete, outputs: AGENTS.md"
-
-This standalone command can be re-run anytime.
-
----
-
-**Reference files**: `.deepwork/jobs/deepwork_jobs/job.yml`, `.deepwork/jobs/deepwork_jobs/steps/learn.md`
-"""
\ No newline at end of file
diff --git a/.gemini/skills/deepwork_jobs/review_job_spec.toml b/.gemini/skills/deepwork_jobs/review_job_spec.toml
deleted file mode 100644
index 265eb151..00000000
--- a/.gemini/skills/deepwork_jobs/review_job_spec.toml
+++ /dev/null
@@ -1,300 +0,0 @@
-# deepwork_jobs:review_job_spec
-#
-# Reviews job.yml against quality criteria using a sub-agent for unbiased validation. Use after defining a job specification.
-#
-# Generated by DeepWork - do not edit manually
-
-description = "Reviews job.yml against quality criteria using a sub-agent for unbiased validation. Use after defining a job specification."
-
-prompt = """
-# deepwork_jobs:review_job_spec
-
-**Step 2/4** in **deepwork_jobs** workflow
-
-> Creates and manages multi-step AI workflows. Use when defining, implementing, or improving DeepWork jobs.
-
-## Prerequisites (Verify First)
-
-Before proceeding, confirm these steps are complete:
-- `/deepwork_jobs:define`
-
-## Instructions
-
-**Goal**: Reviews job.yml against quality criteria using a sub-agent for unbiased validation. Use after defining a job specification.
-
-# Review Job Specification
-
-## Objective
-
-Review the `job.yml` created in the define step against the doc spec quality criteria using a sub-agent for unbiased evaluation, then iterate on fixes until all criteria pass.
-
-## Why This Step Exists
-
-The define step focuses on understanding user requirements and creating a job specification. This review step ensures the specification meets quality standards before implementation. Using a sub-agent provides an unbiased "fresh eyes" review that catches issues the main agent might miss after being deeply involved in the definition process.
-
-## Task
-
-Use a sub-agent to review the job.yml against all 9 doc spec quality criteria, then fix any failed criteria. Repeat until all criteria pass.
-
-### Step 1: Read the Job Specification
-
-Read the `job.yml` file created in the define step:
-
-```
-.deepwork/jobs/[job_name]/job.yml
-```
-
-Also read the doc spec for reference:
-
-```
-.deepwork/doc_specs/job_spec.md
-```
-
-### Step 2: Spawn Review Sub-Agent
-
-Use the Task tool to spawn a sub-agent that will provide an unbiased review:
-
-```
-Task tool parameters:
-- subagent_type: "general-purpose"
-- model: "haiku"
-- description: "Review job.yml against doc spec"
-- prompt: [see below]
-```
-
-**Sub-agent prompt template:**
-
-```
-Review this job.yml against the following 9 quality criteria from the doc spec.
-
-For each criterion, respond with:
-- PASS or FAIL
-- If FAIL: specific issue and suggested fix
-
-## job.yml Content
-
-[paste the full job.yml content here]
-
-## Quality Criteria
-
-1. **Valid Identifier**: Job name must be lowercase with underscores, no spaces or special characters (e.g., `competitive_research`, `monthly_report`)
-
-2. **Semantic Version**: Version must follow semantic versioning format X.Y.Z (e.g., `1.0.0`, `2.1.3`)
-
-3. **Concise Summary**: Summary must be under 200 characters and clearly describe what the job accomplishes
-
-4. **Rich Description**: Description must be multi-line and explain: the problem solved, the process, expected outcomes, and target users
-
-5. **Changelog Present**: Must include a changelog array with at least the initial version entry
-
-6. **Complete Steps**: Each step must have: id (lowercase_underscores), name, description, instructions_file, outputs (at least one), and dependencies array
-
-7. **Valid Dependencies**: Dependencies must reference existing step IDs with no circular references
-
-8. **Input Consistency**: File inputs with `from_step` must reference a step that is in the dependencies array
-
-9. **Output Paths**: Outputs must be valid filenames or paths (e.g., `report.md` or `reports/analysis.md`)
-
-## Response Format
-
-Respond with a structured evaluation:
-
-### Overall: [X/9 PASS]
-
-### Criterion Results
-
-1. Valid Identifier: [PASS/FAIL]
-   [If FAIL: Issue and fix]
-
-2. Semantic Version: [PASS/FAIL]
-   [If FAIL: Issue and fix]
-
-[... continue for all 9 criteria ...]
-
-### Summary of Required Fixes
-
-[List any fixes needed, or "No fixes required - all criteria pass"]
-```
-
-### Step 3: Review Sub-Agent Findings
-
-Parse the sub-agent's response:
-
-1. **Count passing criteria** - How many of the 9 criteria passed?
-2. **Identify failures** - List specific criteria that failed
-3. **Note suggested fixes** - What changes does the sub-agent recommend?
-
-### Step 4: Fix Failed Criteria
-
-For each failed criterion, edit the job.yml to address the issue:
-
-**Common fixes by criterion:**
-
-| Criterion | Common Issue | Fix |
-|-----------|-------------|-----|
-| Valid Identifier | Spaces or uppercase | Convert to lowercase_underscores |
-| Semantic Version | Missing or invalid format | Set to `"1.0.0"` or fix format |
-| Concise Summary | Too long or vague | Shorten to <200 chars, be specific |
-| Rich Description | Single line or missing context | Add multi-line explanation with problem/process/outcome/users |
-| Changelog Present | Missing changelog | Add `changelog:` with initial version entry |
-| Complete Steps | Missing required fields | Add id, name, description, instructions_file, outputs, dependencies |
-| Valid Dependencies | Non-existent step or circular | Fix step ID reference or reorder dependencies |
-| Input Consistency | from_step not in dependencies | Add the referenced step to dependencies array |
-| Output Paths | Invalid characters or format | Use valid filename/path format |
-
-### Step 5: Re-Run Review (If Needed)
-
-If any criteria failed:
-
-1. **Spawn a new sub-agent** with the updated job.yml content
-2. **Review the new findings**
-3. **Fix any remaining issues**
-4. **Repeat until all 9 criteria pass**
-
-### Step 6: Confirm Completion
-
-When all 9 criteria pass:
-
-1. **Announce success**: "All 9 doc spec quality criteria pass."
-2. **List what was validated**:
-   - Valid Identifier
-   - Semantic Version
-   - Concise Summary
-   - Rich Description
-   - Changelog Present
-   - Complete Steps
-   - Valid Dependencies
-   - Input Consistency
-   - Output Paths
-3. **Include the promise**: `<promise>Quality Criteria Met</promise>`
-4. **Guide to next step**: "Run `/deepwork_jobs.implement` to generate the step instruction files."
-
-## Example Review Cycle
-
-```
-Claude: Let me review the job.yml against the doc spec quality criteria.
-
-[Spawns sub-agent with haiku model]
-
-Sub-agent response:
-### Overall: 7/9 PASS
-
-### Criterion Results
-1. Valid Identifier: PASS
-2. Semantic Version: PASS
-3. Concise Summary: FAIL
-   Issue: Summary is 245 characters, exceeds 200 char limit
-   Fix: Shorten to "Competitive analysis workflow for product positioning"
-4. Rich Description: PASS
-5. Changelog Present: FAIL
-   Issue: No changelog array present
-   Fix: Add changelog with initial version entry
-6. Complete Steps: PASS
-7. Valid Dependencies: PASS
-8. Input Consistency: PASS
-9. Output Paths: PASS
-
-### Summary of Required Fixes
-1. Shorten summary to under 200 characters
-2. Add changelog array with initial version
-
-Claude: I found 2 issues. Let me fix them.
-
-[Edits job.yml to shorten summary and add changelog]
-
-Claude: Let me verify the fixes with another review.
-
-[Spawns new sub-agent]
-
-Sub-agent response:
-### Overall: 9/9 PASS
-
-### Summary of Required Fixes
-No fixes required - all criteria pass
-
-Claude: All 9 doc spec quality criteria pass.
-
-<promise>Quality Criteria Met</promise>
-
-**Next step:** Run `/deepwork_jobs.implement` to generate the step instruction files.
-```
-
-## Quality Criteria
-
-- **Sub-Agent Used**: A sub-agent was spawned to provide unbiased review (not just self-review)
-- **All doc spec Criteria Evaluated**: The sub-agent assessed all 9 quality criteria from the doc spec
-- **Findings Addressed**: All failed criteria were fixed by the main agent
-- **Validation Loop Complete**: The review-fix cycle continued until all criteria passed
-- **Promise Included**: The response includes `<promise>Quality Criteria Met</promise>` when complete
-
-## Output
-
-The validated `job.yml` file at `.deepwork/jobs/[job_name]/job.yml` that passes all 9 doc spec quality criteria.
-
-
-### Job Context
-
-Core commands for managing DeepWork jobs. These commands help you define new multi-step
-workflows and learn from running them.
-
-The `new_job` workflow guides you through defining and implementing a new job by
-asking structured questions about your workflow, understanding each step's inputs and outputs,
-reviewing the specification, and generating all necessary files.
-
-The `learn` skill reflects on conversations where DeepWork jobs were run, identifies
-confusion or inefficiencies, and improves job instructions. It also captures bespoke
-learnings specific to the current run into AGENTS.md files in the working folder.
-
-
-## Required Inputs
-
-
-**Files from Previous Steps** - Read these first:
-- `job.yml` (from `define`)
-
-## Work Branch
-
-Use branch format: `deepwork/deepwork_jobs-[instance]-YYYYMMDD`
-
-- If on a matching work branch: continue using it
-- If on main/master: create new branch with `git checkout -b deepwork/deepwork_jobs-[instance]-$(date +%Y%m%d)`
-
-## Outputs
-
-**Required outputs**:
-- `job.yml`
-  **Doc Spec**: DeepWork Job Specification
-  > YAML specification file that defines a multi-step workflow job for AI agents
-  **Definition**: `.deepwork/doc_specs/job_spec.md`
-  **Target Audience**: AI agents executing jobs and developers defining workflows
-  **Quality Criteria**:
-  1. **Valid Identifier**: Job name must be lowercase with underscores, no spaces or special characters (e.g., `competitive_research`, `monthly_report`)
-  2. **Semantic Version**: Version must follow semantic versioning format X.Y.Z (e.g., `1.0.0`, `2.1.3`)
-  3. **Concise Summary**: Summary must be under 200 characters and clearly describe what the job accomplishes
-  4. **Rich Description**: Description must be multi-line and explain: the problem solved, the process, expected outcomes, and target users
-  5. **Changelog Present**: Must include a changelog array with at least the initial version entry. Changelog should only include one entry per branch at most
-  6. **Complete Steps**: Each step must have: id (lowercase_underscores), name, description, instructions_file, outputs (at least one), and dependencies array
-  7. **Valid Dependencies**: Dependencies must reference existing step IDs with no circular references
-  8. **Input Consistency**: File inputs with `from_step` must reference a step that is in the dependencies array
-  9. **Output Paths**: Outputs must be valid filenames or paths within the main repo directory structure, never in dot-directories like `.deepwork/`. Use specific, descriptive paths that lend themselves to glob patterns (e.g., `competitive_research/acme_corp/swot.md` or `operations/reports/2026-01/spending_analysis.md`). Parameterized paths like `[competitor_name]/` are encouraged for per-entity outputs. Avoid generic names (`output.md`, `analysis.md`) and transient-sounding paths (`temp/`, `draft.md`). Supporting materials for a final output should go in a peer `_dataroom` folder (e.g., `spending_analysis_dataroom/`).
-  10. **Concise Instructions**: The content of the file, particularly the description, must not have excessively redundant information. It should be concise and to the point given that extra tokens will confuse the AI.
-
-## Quality Validation (Manual)
-
-**NOTE**: Gemini CLI does not support automated validation. Manually verify criteria before completing.
-
-**Criteria (all must be satisfied)**:
-1. **Sub-Agent Used**: Was a sub-agent spawned to provide unbiased review?
-2. **All doc spec Criteria Evaluated**: Did the sub-agent assess all 9 quality criteria?
-3. **Findings Addressed**: Were all failed criteria addressed by the main agent?
-4. **Validation Loop Complete**: Did the review-fix cycle continue until all criteria passed?
-## On Completion
-
-1. Verify outputs are created
-2. Inform user: "Step 2/4 complete, outputs: job.yml"
-3. **Tell user next command**: `/deepwork_jobs:implement`
-
----
-
-**Reference files**: `.deepwork/jobs/deepwork_jobs/job.yml`, `.deepwork/jobs/deepwork_jobs/steps/review_job_spec.md`
-"""
\ No newline at end of file
diff --git a/.gemini/skills/deepwork_rules/define.toml b/.gemini/skills/deepwork_rules/define.toml
deleted file mode 100644
index 980ad931..00000000
--- a/.gemini/skills/deepwork_rules/define.toml
+++ /dev/null
@@ -1,327 +0,0 @@
-# deepwork_rules:define
-#
-# Creates a rule file that triggers when specified files change. Use when setting up documentation sync, code review requirements, or automated commands.
-#
-# Generated by DeepWork - do not edit manually
-
-description = "Creates a rule file that triggers when specified files change. Use when setting up documentation sync, code review requirements, or automated commands."
-
-prompt = """
-# deepwork_rules:define
-
-**Standalone command** - can be run anytime
-
-> Creates file-change rules that enforce guidelines during AI sessions. Use when automating documentation sync or code review triggers.
-
-
-## Instructions
-
-**Goal**: Creates a rule file that triggers when specified files change. Use when setting up documentation sync, code review requirements, or automated commands.
-
-# Define Rule
-
-## Objective
-
-Create a new rule file in the `.deepwork/rules/` directory to enforce team guidelines, documentation requirements, or other constraints when specific files change.
-
-## Task
-
-Guide the user through defining a new rule by asking structured questions. **Do not create the rule without first understanding what they want to enforce.**
-
-**Important**: Use the AskUserQuestion tool to ask structured questions when gathering information from the user. This provides a better user experience with clear options and guided choices.
-
-### Step 1: Understand the Rule Purpose
-
-Start by asking structured questions to understand what the user wants to enforce:
-
-1. **What guideline or constraint should this rule enforce?**
-   - What situation triggers the need for action?
-   - What files or directories, when changed, should trigger this rule?
-   - Examples: "When config files change", "When API code changes", "When database schema changes"
-
-2. **What action should be taken?**
-   - What should the agent do when the rule triggers?
-   - Update documentation? Perform a security review? Update tests?
-   - Is there a specific file or process that needs attention?
-
-3. **Are there any "safety" conditions?**
-   - Are there files that, if also changed, mean the rule doesn't need to fire?
-   - For example: If config changes AND install_guide.md changes, assume docs are already updated
-   - This prevents redundant prompts when the user has already done the right thing
-
-### Step 2: Choose the Detection Mode
-
-Help the user select the appropriate detection mode:
-
-**Trigger/Safety Mode** (most common):
-- Fires when trigger patterns match AND no safety patterns match
-- Use for: "When X changes, check Y" rules
-- Example: When config changes, verify install docs
-
-**Set Mode** (bidirectional correspondence):
-- Fires when files that should change together don't all change
-- Use for: Source/test pairing, model/migration sync
-- Example: `src/foo.py` and `tests/foo_test.py` should change together
-
-**Pair Mode** (directional correspondence):
-- Fires when a trigger file changes but expected files don't
-- Changes to expected files alone do NOT trigger
-- Use for: API code requires documentation updates (but docs can update independently)
-
-### Step 3: Define the Patterns
-
-Help the user define glob patterns for files.
-
-**Common patterns:**
-- `src/**/*.py` - All Python files in src directory (recursive)
-- `app/config/**/*` - All files in app/config directory
-- `*.md` - All markdown files in root
-- `src/api/**/*` - All files in the API directory
-- `migrations/**/*.sql` - All SQL migrations
-
-**Variable patterns (for set/pair modes):**
-- `src/{path}.py` - Captures path variable (e.g., `foo/bar` from `src/foo/bar.py`)
-- `tests/{path}_test.py` - Uses same path variable in corresponding file
-- `{name}` matches single segment, `{path}` matches multiple segments
-
-**Pattern syntax:**
-- `*` - Matches any characters within a single path segment
-- `**` - Matches any characters across multiple path segments (recursive)
-- `?` - Matches a single character
-
-### Step 4: Choose the Comparison Mode (Optional)
-
-The `compare_to` field controls what baseline is used when detecting "changed files":
-
-**Options:**
-- `base` (default) - Compares to the base of the current branch (merge-base with main/master). Best for feature branches.
-- `default_tip` - Compares to the current tip of the default branch. Useful for seeing difference from production.
-- `prompt` - Compares to the state at the start of each prompt. For rules about very recent changes.
-
-Most rules should use the default (`base`) and don't need to specify `compare_to`.
-
-### Step 5: Write the Instructions
-
-Create clear, actionable instructions for what the agent should do when the rule fires.
-
-**Good instructions include:**
-- What to check or review
-- What files might need updating
-- Specific actions to take
-- Quality criteria for completion
-
-**Template variables available in instructions:**
-- `{trigger_files}` - Files that triggered the rule
-- `{expected_files}` - Expected corresponding files (for set/pair modes)
-
-### Step 6: Create the Rule File
-
-Create a new file in `.deepwork/rules/` with a kebab-case filename:
-
-**File Location**: `.deepwork/rules/{rule-name}.md`
-
-**Format for Trigger/Safety Mode:**
-```markdown
----
-name: Friendly Name for the Rule
-trigger: "glob/pattern/**/*"  # or array: ["pattern1", "pattern2"]
-safety: "optional/pattern"    # optional, or array
-compare_to: base              # optional: "base" (default), "default_tip", or "prompt"
----
-Instructions for the agent when this rule fires.
-
-Multi-line markdown content is supported.
-```
-
-**Format for Set Mode (bidirectional):**
-```markdown
----
-name: Source/Test Pairing
-set:
-  - src/{path}.py
-  - tests/{path}_test.py
----
-Source and test files should change together.
-
-Modified: {trigger_files}
-Expected: {expected_files}
-```
-
-**Format for Pair Mode (directional):**
-```markdown
----
-name: API Documentation
-pair:
-  trigger: api/{path}.py
-  expects: docs/api/{path}.md
----
-API code requires documentation updates.
-
-Changed API: {trigger_files}
-Update docs: {expected_files}
-```
-
-### Step 7: Verify the Rule
-
-After creating the rule:
-
-1. **Check the YAML frontmatter** - Ensure valid YAML formatting
-2. **Test trigger patterns** - Verify patterns match intended files
-3. **Review instructions** - Ensure they're clear and actionable
-4. **Check for conflicts** - Ensure the rule doesn't conflict with existing ones
-
-## Example Rules
-
-### Update Documentation on Config Changes
-`.deepwork/rules/config-docs.md`:
-```markdown
----
-name: Update Install Guide on Config Changes
-trigger: app/config/**/*
-safety: docs/install_guide.md
----
-Configuration files have been modified. Please review docs/install_guide.md
-and update it if any installation instructions need to change based on the
-new configuration.
-```
-
-### Security Review for Auth Code
-`.deepwork/rules/security-review.md`:
-```markdown
----
-name: Security Review for Authentication Changes
-trigger:
-  - src/auth/**/*
-  - src/security/**/*
-safety:
-  - SECURITY.md
-  - docs/security_audit.md
----
-Authentication or security code has been changed. Please:
-
-1. Review for hardcoded credentials or secrets
-2. Check input validation on user inputs
-3. Verify access control logic is correct
-4. Update security documentation if needed
-```
-
-### Source/Test Pairing
-`.deepwork/rules/source-test-pairing.md`:
-```markdown
----
-name: Source/Test Pairing
-set:
-  - src/{path}.py
-  - tests/{path}_test.py
----
-Source and test files should change together.
-
-When modifying source code, ensure corresponding tests are updated.
-When adding tests, ensure they test actual source code.
-
-Modified: {trigger_files}
-Expected: {expected_files}
-```
-
-### API Documentation Sync
-`.deepwork/rules/api-docs.md`:
-```markdown
----
-name: API Documentation Update
-pair:
-  trigger: src/api/{path}.py
-  expects: docs/api/{path}.md
----
-API code has changed. Please verify that API documentation in docs/api/
-is up to date with the code changes. Pay special attention to:
-
-- New or changed endpoints
-- Modified request/response schemas
-- Updated authentication requirements
-
-Changed API: {trigger_files}
-Update: {expected_files}
-```
-
-## Output Format
-
-### .deepwork/rules/{rule-name}.md
-Create a new file with the rule definition using YAML frontmatter and markdown body.
-
-## Quality Criteria
-
-- Asked structured questions to understand user requirements
-- Rule name is clear and descriptive (used in promise tags)
-- Correct detection mode selected for the use case
-- Patterns accurately match the intended files
-- Safety patterns prevent unnecessary triggering (if applicable)
-- Instructions are actionable and specific
-- YAML frontmatter is valid
-
-## Context
-
-Rules are evaluated automatically when the agent finishes a task. The system:
-1. Determines which files have changed based on each rule's `compare_to` setting
-2. Evaluates rules based on their detection mode (trigger/safety, set, or pair)
-3. Skips rules where the correspondence is satisfied (for set/pair) or safety matched
-4. Prompts you with instructions for any triggered rules
-
-You can mark a rule as addressed by including `<promise>Rule Name</promise>` in your response (replace Rule Name with the actual rule name from the `name` field). This tells the system you've already handled that rule's requirements.
-
-
-### Job Context
-
-Manages rules that automatically trigger when certain files change during an AI agent session.
-Rules help ensure that code changes follow team guidelines, documentation is updated,
-and architectural decisions are respected.
-
-IMPORTANT: Rules are evaluated at the "Stop" hook, which fires when an agent finishes its turn.
-This includes when sub-agents complete their work. Rules are NOT evaluated immediately after
-each file edit - they batch up and run once at the end of the agent's response cycle.
-- Command action rules: Execute their command (e.g., `uv sync`) when the agent stops
-- Prompt action rules: Display instructions to the agent, blocking until addressed
-
-Rules are stored as individual markdown files with YAML frontmatter in the `.deepwork/rules/`
-directory. Each rule file specifies:
-- Detection mode: trigger/safety, set (bidirectional), or pair (directional)
-- Patterns: Glob patterns for matching files, with optional variable capture
-- Action type: prompt (default) to show instructions, or command to run a shell command
-- Instructions: Markdown content describing what the agent should do
-
-Example use cases:
-- Update installation docs when configuration files change
-- Require security review when authentication code is modified
-- Ensure API documentation stays in sync with API code
-- Enforce source/test file pairing
-- Auto-run `uv sync` when pyproject.toml changes (command action)
-
-
-## Required Inputs
-
-**User Parameters** - Gather from user before starting:
-- **rule_purpose**: What guideline or constraint should this rule enforce?
-
-
-## Work Branch
-
-Use branch format: `deepwork/deepwork_rules-[instance]-YYYYMMDD`
-
-- If on a matching work branch: continue using it
-- If on main/master: create new branch with `git checkout -b deepwork/deepwork_rules-[instance]-$(date +%Y%m%d)`
-
-## Outputs
-
-**Required outputs**:
-- `.deepwork/rules/{rule-name}.md`
-
-## On Completion
-
-1. Verify outputs are created
-2. Inform user: "define complete, outputs: .deepwork/rules/{rule-name}.md"
-
-This standalone command can be re-run anytime.
-
----
-
-**Reference files**: `.deepwork/jobs/deepwork_rules/job.yml`, `.deepwork/jobs/deepwork_rules/steps/define.md`
-"""
\ No newline at end of file
diff --git a/.gemini/skills/deepwork_rules/index.toml b/.gemini/skills/deepwork_rules/index.toml
deleted file mode 100644
index 51fca30e..00000000
--- a/.gemini/skills/deepwork_rules/index.toml
+++ /dev/null
@@ -1,73 +0,0 @@
-# deepwork_rules
-#
-# Creates file-change rules that enforce guidelines during AI sessions. Use when automating documentation sync or code review triggers.
-#
-# Generated by DeepWork - do not edit manually
-
-description = "Creates file-change rules that enforce guidelines during AI sessions. Use when automating documentation sync or code review triggers."
-
-prompt = """
-# deepwork_rules
-
-**Multi-step workflow**: Creates file-change rules that enforce guidelines during AI sessions. Use when automating documentation sync or code review triggers.
-
-> **NOTE**: Gemini CLI requires manual command invocation. After each step, tell the user which command to run next.
-
-Manages rules that automatically trigger when certain files change during an AI agent session.
-Rules help ensure that code changes follow team guidelines, documentation is updated,
-and architectural decisions are respected.
-
-IMPORTANT: Rules are evaluated at the "Stop" hook, which fires when an agent finishes its turn.
-This includes when sub-agents complete their work. Rules are NOT evaluated immediately after
-each file edit - they batch up and run once at the end of the agent's response cycle.
-- Command action rules: Execute their command (e.g., `uv sync`) when the agent stops
-- Prompt action rules: Display instructions to the agent, blocking until addressed
-
-Rules are stored as individual markdown files with YAML frontmatter in the `.deepwork/rules/`
-directory. Each rule file specifies:
-- Detection mode: trigger/safety, set (bidirectional), or pair (directional)
-- Patterns: Glob patterns for matching files, with optional variable capture
-- Action type: prompt (default) to show instructions, or command to run a shell command
-- Instructions: Markdown content describing what the agent should do
-
-Example use cases:
-- Update installation docs when configuration files change
-- Require security review when authentication code is modified
-- Ensure API documentation stays in sync with API code
-- Enforce source/test file pairing
-- Auto-run `uv sync` when pyproject.toml changes (command action)
-
-
-## Available Steps
-
-1. **define** - Creates a rule file that triggers when specified files change. Use when setting up documentation sync, code review requirements, or automated commands.
-   Command: `/deepwork_rules:define`
-
-## Execution Instructions
-
-### Step 1: Analyze Intent
-
-Parse any text following `/deepwork_rules` to determine user intent:
-- "define" or related terms → start at `/deepwork_rules:define`
-
-### Step 2: Direct User to Starting Step
-
-Tell the user which command to run:
-```
-/deepwork_rules:define
-```
-
-### Step 3: Guide Through Workflow
-
-After each step completes, tell the user the next command to run until workflow is complete.
-
-### Handling Ambiguous Intent
-
-If user intent is unclear:
-- Present available steps as numbered options
-- Ask user to select the starting point
-
-## Reference
-
-- Job definition: `.deepwork/jobs/deepwork_rules/job.yml`
-"""
\ No newline at end of file
diff --git a/.gemini/skills/manual_tests/index.toml b/.gemini/skills/manual_tests/index.toml
deleted file mode 100644
index a7f18b16..00000000
--- a/.gemini/skills/manual_tests/index.toml
+++ /dev/null
@@ -1,94 +0,0 @@
-# manual_tests
-#
-# Runs all manual hook/rule tests using sub-agents. Use when validating that DeepWork rules fire correctly.
-#
-# Generated by DeepWork - do not edit manually
-
-description = "Runs all manual hook/rule tests using sub-agents. Use when validating that DeepWork rules fire correctly."
-
-prompt = """
-# manual_tests
-
-**Multi-step workflow**: Runs all manual hook/rule tests using sub-agents. Use when validating that DeepWork rules fire correctly.
-
-> **NOTE**: Gemini CLI requires manual command invocation. After each step, tell the user which command to run next.
-
-A workflow for running manual tests that validate DeepWork rules/hooks fire correctly.
-
-The **run_all** workflow tests that rules fire when they should AND do not fire when they shouldn't.
-Each test is run in a SUB-AGENT (not the main agent) because:
-1. Sub-agents run in isolated contexts where file changes can be detected
-2. The Stop hook automatically evaluates rules when each sub-agent completes
-3. The main agent can observe whether hooks fired without triggering them manually
-
-CRITICAL: All tests MUST run in sub-agents. The main agent MUST NOT make the file
-edits itself - it spawns sub-agents to make edits, then observes whether the hooks
-fired automatically when those sub-agents returned.
-
-Sub-agent configuration:
-- All sub-agents should use `model: "haiku"` to minimize cost and latency
-- All sub-agents should use `max_turns: 5` to prevent hanging indefinitely
-
-Steps:
-1. reset - Ensure clean environment before testing (clears queue, reverts files)
-2. run_not_fire_tests - Run all "should NOT fire" tests in PARALLEL sub-agents (6 tests)
-3. run_fire_tests - Run all "should fire" tests in SERIAL sub-agents with resets between (6 tests)
-4. infinite_block_tests - Run infinite block tests in SERIAL (4 tests - both fire and not-fire)
-
-Reset procedure (see steps/reset.md):
-- Reset runs FIRST to ensure a clean environment before any tests
-- Each step also calls reset internally when needed (between tests, after completion)
-- Reset reverts git changes, removes created files, and clears the rules queue
-
-Test types covered:
-- Trigger/Safety mode
-- Set mode (bidirectional)
-- Pair mode (directional)
-- Command action
-- Multi safety
-- Infinite block (prompt and command) - in dedicated step
-- Created mode (new files only)
-
-
-## Available Steps
-
-1. **reset** - Runs FIRST to ensure clean environment. Also called internally by other steps when they need to revert changes and clear the queue.
-   Command: `/manual_tests:reset`
-2. **run_not_fire_tests** - Runs all 6 'should NOT fire' tests in parallel sub-agents. Use to verify rules don't fire when safety conditions are met. (requires: reset)
-   Command: `/manual_tests:run_not_fire_tests`
-3. **run_fire_tests** - Runs all 6 'should fire' tests serially with resets between each. Use after NOT-fire tests to verify rules fire correctly. (requires: run_not_fire_tests)
-   Command: `/manual_tests:run_fire_tests`
-4. **infinite_block_tests** - Runs all 4 infinite block tests serially. Tests both 'should fire' (no promise) and 'should NOT fire' (with promise) scenarios. (requires: run_fire_tests)
-   Command: `/manual_tests:infinite_block_tests`
-
-## Execution Instructions
-
-### Step 1: Analyze Intent
-
-Parse any text following `/manual_tests` to determine user intent:
-- "reset" or related terms → start at `/manual_tests:reset`
-- "run_not_fire_tests" or related terms → start at `/manual_tests:run_not_fire_tests`
-- "run_fire_tests" or related terms → start at `/manual_tests:run_fire_tests`
-- "infinite_block_tests" or related terms → start at `/manual_tests:infinite_block_tests`
-
-### Step 2: Direct User to Starting Step
-
-Tell the user which command to run:
-```
-/manual_tests:reset
-```
-
-### Step 3: Guide Through Workflow
-
-After each step completes, tell the user the next command to run until workflow is complete.
-
-### Handling Ambiguous Intent
-
-If user intent is unclear:
-- Present available steps as numbered options
-- Ask user to select the starting point
-
-## Reference
-
-- Job definition: `.deepwork/jobs/manual_tests/job.yml`
-"""
\ No newline at end of file
diff --git a/.gemini/skills/manual_tests/infinite_block_tests.toml b/.gemini/skills/manual_tests/infinite_block_tests.toml
deleted file mode 100644
index a1fbf553..00000000
--- a/.gemini/skills/manual_tests/infinite_block_tests.toml
+++ /dev/null
@@ -1,238 +0,0 @@
-# manual_tests:infinite_block_tests
-#
-# Runs all 4 infinite block tests serially. Tests both 'should fire' (no promise) and 'should NOT fire' (with promise) scenarios.
-#
-# Generated by DeepWork - do not edit manually
-
-description = "Runs all 4 infinite block tests serially. Tests both 'should fire' (no promise) and 'should NOT fire' (with promise) scenarios."
-
-prompt = """
-# manual_tests:infinite_block_tests
-
-**Step 4/4** in **manual_tests** workflow
-
-> Runs all manual hook/rule tests using sub-agents. Use when validating that DeepWork rules fire correctly.
-
-## Prerequisites (Verify First)
-
-Before proceeding, confirm these steps are complete:
-- `/manual_tests:run_fire_tests`
-
-## Instructions
-
-**Goal**: Runs all 4 infinite block tests serially. Tests both 'should fire' (no promise) and 'should NOT fire' (with promise) scenarios.
-
-# Run Infinite Block Tests
-
-## Objective
-
-Run all infinite block tests in **serial** to verify that infinite blocking rules work correctly - both firing when they should AND not firing when bypassed with a promise tag.
-
-## CRITICAL: Sub-Agent Requirement
-
-**You MUST spawn sub-agents to make all file edits. DO NOT edit the test files yourself.**
-
-Why sub-agents are required:
-1. Sub-agents run in isolated contexts where file changes are detected
-2. When a sub-agent completes, the Stop hook **automatically** evaluates rules
-3. You (the main agent) observe whether hooks fired - you do NOT manually trigger them
-4. If you edit files directly, the hooks won't fire because you're not a completing sub-agent
-
-**NEVER manually run `echo '{}' | python -m deepwork.hooks.rules_check`** - this defeats the purpose of the test. Hooks must fire AUTOMATICALLY when sub-agents return.
-
-## CRITICAL: Serial Execution
-
-**These tests MUST run ONE AT A TIME, with resets between each.**
-
-Why serial execution is required for infinite block tests:
-- Infinite block tests can block indefinitely without a promise tag
-- Running them in parallel would cause unpredictable blocking behavior
-- Serial execution allows controlled observation of each test
-
-## Task
-
-Run all 4 infinite block tests in **serial**, resetting between each, and verify correct blocking behavior.
-
-### Process
-
-For EACH test below, follow this cycle:
-
-1. **Launch a sub-agent** using the Task tool with:
-   - `model: "haiku"` - Use the fast model to minimize cost and latency
-   - `max_turns: 5` - **Critical safeguard**: Limits API round-trips to prevent infinite hanging. The Task tool does not support a direct timeout, so max_turns is our only protection against runaway sub-agents.
-2. **Wait for the sub-agent to complete**
-3. **Observe whether the hook fired automatically** - you should see a blocking prompt or command output
-4. **If no visible blocking occurred, check the queue**:
-   ```bash
-   ls -la .deepwork/tmp/rules/queue/
-   cat .deepwork/tmp/rules/queue/*.json 2>/dev/null
-   ```
-   - If queue entries exist with status "queued", the hook DID fire but blocking wasn't visible
-   - If queue is empty, the hook did NOT fire at all
-   - Record the queue status along with the result
-5. **Record the result** - see expected outcomes for each test
-6. **Reset** (MANDATORY after each test) - follow the reset step instructions:
-   ```bash
-   git reset HEAD manual_tests/ && git checkout -- manual_tests/ && rm -f manual_tests/test_created_mode/new_config.yml
-   deepwork rules clear_queue
-   ```
-7. **Check for early termination**: If **2 tests have now failed**, immediately:
-   - Stop running any remaining tests
-   - Report the results summary showing which tests passed/failed
-   - The job halts here - do NOT proceed with remaining tests
-8. **Proceed to the next test** (only if fewer than 2 failures)
-
-**IMPORTANT**: Only launch ONE sub-agent at a time. Wait for it to complete and reset before launching the next.
-
-### Test Cases (run serially)
-
-**Test 1: Infinite Block Prompt - Should NOT Fire (with promise)**
-- Sub-agent prompt: "Edit `manual_tests/test_infinite_block_prompt/dangerous.py` to add a comment. Include `<promise>I have verified this change is safe</promise>` in your response to bypass the infinite block."
-- Sub-agent config: `model: "haiku"`, `max_turns: 5`
-- Expected: Sub-agent completes WITHOUT blocking - the promise tag bypasses the infinite block
-- Result: PASS if no blocking, FAIL if blocked
-
-**Test 2: Infinite Block Command - Should NOT Fire (with promise)**
-- Sub-agent prompt: "Edit `manual_tests/test_infinite_block_command/risky.py` to add a comment. Include `<promise>I have verified this change is safe</promise>` in your response to bypass the infinite block."
-- Sub-agent config: `model: "haiku"`, `max_turns: 5`
-- Expected: Sub-agent completes WITHOUT blocking - the promise tag bypasses the infinite block
-- Result: PASS if no blocking, FAIL if blocked
-
-**Test 3: Infinite Block Prompt - Should Fire (no promise)**
-- Sub-agent prompt: "Edit `manual_tests/test_infinite_block_prompt/dangerous.py` to add a comment. Do NOT include any promise tags."
-- Sub-agent config: `model: "haiku"`, `max_turns: 5`
-- Expected behavior:
-  1. **Should fire**: Hook fires and shows blocking prompt
-  2. **Should return in reasonable time**: Sub-agent hits max_turns limit and returns (not stuck forever)
-- Result criteria:
-  - PASS if: Hook fired (visible block OR queue entry) AND sub-agent returned within reasonable time
-  - FAIL if: Hook did not fire, OR sub-agent hung indefinitely
-
-**Test 4: Infinite Block Command - Should Fire (no promise)**
-- Sub-agent prompt: "Edit `manual_tests/test_infinite_block_command/risky.py` to add a comment. Do NOT include any promise tags."
-- Sub-agent config: `model: "haiku"`, `max_turns: 5`
-- Expected behavior:
-  1. **Should fire**: Hook fires and command fails (exit code 1)
-  2. **Should return in reasonable time**: Sub-agent hits max_turns limit and returns (not stuck forever)
-- Result criteria:
-  - PASS if: Hook fired (visible block OR queue entry) AND sub-agent returned within reasonable time
-  - FAIL if: Hook did not fire, OR sub-agent hung indefinitely
-
-### Results Tracking
-
-Record the result after each test:
-
-| Test Case | Scenario | Should Fire? | Returned in Time? | Visible Block? | Queue Entry? | Result |
-|-----------|----------|:------------:|:-----------------:|:--------------:|:------------:|:------:|
-| Infinite Block Prompt | With promise | No | Yes | | | |
-| Infinite Block Command | With promise | No | Yes | | | |
-| Infinite Block Prompt | No promise | Yes | Yes | | | |
-| Infinite Block Command | No promise | Yes | Yes | | | |
-
-**Result criteria:**
-- **"Should NOT fire" tests (with promise)**: PASS if no blocking AND no queue entry AND returned quickly
-- **"Should fire" tests (no promise)**: PASS if hook fired (visible block OR queue entry) AND returned in reasonable time (max_turns limit)
-
-**Queue Entry Status Guide:**
-- If queue has entry with status "queued" -> Hook fired, rule was shown to agent
-- If queue has entry with status "passed" -> Hook fired, rule was satisfied
-- If queue is empty -> Hook did NOT fire
-
-## Quality Criteria
-
-- **Sub-agents spawned**: Tests were run using the Task tool to spawn sub-agents - the main agent did NOT edit files directly
-- **Correct sub-agent config**: All sub-agents used `model: "haiku"` and `max_turns: 5`
-- **Serial execution**: Sub-agents were launched ONE AT A TIME, not in parallel
-- **Reset between tests**: Reset step was followed after each test
-- **Hooks observed (not triggered)**: The main agent observed hook behavior without manually running rules_check - hooks fired AUTOMATICALLY
-- **"Should NOT fire" tests verified**: Promise tests completed without blocking and no queue entries
-- **"Should fire" tests verified**: Non-promise tests fired (visible block OR queue entry) AND returned in reasonable time (not hung indefinitely)
-- **Early termination on 2 failures**: If 2 tests failed, testing halted immediately and results were reported
-- **Results recorded**: Pass/fail status was recorded for each test run
-- When all criteria are met, include `<promise>Quality Criteria Met</promise>` in your response
-
-## Reference
-
-See [test_reference.md](test_reference.md) for the complete test matrix and rule descriptions.
-
-## Context
-
-This step runs after both the "should NOT fire" and "should fire" test steps. It specifically tests infinite blocking behavior which requires serial execution due to the blocking nature of these rules.
-
-
-### Job Context
-
-A workflow for running manual tests that validate DeepWork rules/hooks fire correctly.
-
-The **run_all** workflow tests that rules fire when they should AND do not fire when they shouldn't.
-Each test is run in a SUB-AGENT (not the main agent) because:
-1. Sub-agents run in isolated contexts where file changes can be detected
-2. The Stop hook automatically evaluates rules when each sub-agent completes
-3. The main agent can observe whether hooks fired without triggering them manually
-
-CRITICAL: All tests MUST run in sub-agents. The main agent MUST NOT make the file
-edits itself - it spawns sub-agents to make edits, then observes whether the hooks
-fired automatically when those sub-agents returned.
-
-Sub-agent configuration:
-- All sub-agents should use `model: "haiku"` to minimize cost and latency
-- All sub-agents should use `max_turns: 5` to prevent hanging indefinitely
-
-Steps:
-1. reset - Ensure clean environment before testing (clears queue, reverts files)
-2. run_not_fire_tests - Run all "should NOT fire" tests in PARALLEL sub-agents (6 tests)
-3. run_fire_tests - Run all "should fire" tests in SERIAL sub-agents with resets between (6 tests)
-4. infinite_block_tests - Run infinite block tests in SERIAL (4 tests - both fire and not-fire)
-
-Reset procedure (see steps/reset.md):
-- Reset runs FIRST to ensure a clean environment before any tests
-- Each step also calls reset internally when needed (between tests, after completion)
-- Reset reverts git changes, removes created files, and clears the rules queue
-
-Test types covered:
-- Trigger/Safety mode
-- Set mode (bidirectional)
-- Pair mode (directional)
-- Command action
-- Multi safety
-- Infinite block (prompt and command) - in dedicated step
-- Created mode (new files only)
-
-
-## Required Inputs
-
-
-**Files from Previous Steps** - Read these first:
-- `fire_results` (from `run_fire_tests`)
-
-## Work Branch
-
-Use branch format: `deepwork/manual_tests-[instance]-YYYYMMDD`
-
-- If on a matching work branch: continue using it
-- If on main/master: create new branch with `git checkout -b deepwork/manual_tests-[instance]-$(date +%Y%m%d)`
-
-## Outputs
-
-**Required outputs**:
-- `infinite_block_results`
-
-## Quality Validation (Manual)
-
-**NOTE**: Gemini CLI does not support automated validation. Manually verify criteria before completing.
-
-**Criteria (all must be satisfied)**:
-1. **Sub-Agents Used**: Each test run via Task tool with `model: "haiku"` and `max_turns: 5`
-2. **Serial Execution**: Sub-agents launched ONE AT A TIME with reset between each
-3. **Promise Tests**: Completed WITHOUT blocking (promise bypassed the rule)
-4. **No-Promise Tests**: Hook fired AND sub-agent returned in reasonable time (not hung)
-## On Completion
-
-1. Verify outputs are created
-2. Inform user: "Step 4/4 complete, outputs: infinite_block_results"
-3. **Workflow complete**: All steps finished. Consider creating a PR to merge the work branch.
-
----
-
-**Reference files**: `.deepwork/jobs/manual_tests/job.yml`, `.deepwork/jobs/manual_tests/steps/infinite_block_tests.md`
-"""
\ No newline at end of file
diff --git a/.gemini/skills/manual_tests/reset.toml b/.gemini/skills/manual_tests/reset.toml
deleted file mode 100644
index 8d7935f0..00000000
--- a/.gemini/skills/manual_tests/reset.toml
+++ /dev/null
@@ -1,128 +0,0 @@
-# manual_tests:reset
-#
-# Runs FIRST to ensure clean environment. Also called internally by other steps when they need to revert changes and clear the queue.
-#
-# Generated by DeepWork - do not edit manually
-
-description = "Runs FIRST to ensure clean environment. Also called internally by other steps when they need to revert changes and clear the queue."
-
-prompt = """
-# manual_tests:reset
-
-**Step 1/4** in **manual_tests** workflow
-
-> Runs all manual hook/rule tests using sub-agents. Use when validating that DeepWork rules fire correctly.
-
-
-## Instructions
-
-**Goal**: Runs FIRST to ensure clean environment. Also called internally by other steps when they need to revert changes and clear the queue.
-
-# Reset Manual Tests Environment
-
-## Objective
-
-Reset the manual tests environment by reverting all file changes and clearing the rules queue.
-
-## Purpose
-
-This step contains all the reset logic that other steps can call when they need to clean up between or after tests. It ensures consistent cleanup across all test steps.
-
-## Reset Commands
-
-Run these commands to reset the environment:
-
-```bash
-git reset HEAD manual_tests/ && git checkout -- manual_tests/ && rm -f manual_tests/test_created_mode/new_config.yml
-deepwork rules clear_queue
-```
-
-## Command Explanation
-
-- `git reset HEAD manual_tests/` - Unstages files from the index (rules_check uses `git add -A` which stages changes)
-- `git checkout -- manual_tests/` - Reverts working tree to match HEAD
-- `rm -f manual_tests/test_created_mode/new_config.yml` - Removes any new files created during tests (the created mode test creates this file)
-- `deepwork rules clear_queue` - Clears the rules queue so rules can fire again (prevents anti-infinite-loop mechanism from blocking subsequent tests)
-
-## When to Reset
-
-- **After each serial test**: Reset immediately after observing the result to prevent cross-contamination
-- **After parallel tests complete**: Reset once all parallel sub-agents have returned
-- **On early termination**: Reset before reporting failure results
-- **Before starting a new test step**: Ensure clean state
-
-## Quality Criteria
-
-- **All changes reverted**: `git status` shows no changes in `manual_tests/`
-- **Queue cleared**: `.deepwork/tmp/rules/queue/` is empty
-- **New files removed**: `manual_tests/test_created_mode/new_config.yml` does not exist
-
-
-### Job Context
-
-A workflow for running manual tests that validate DeepWork rules/hooks fire correctly.
-
-The **run_all** workflow tests that rules fire when they should AND do not fire when they shouldn't.
-Each test is run in a SUB-AGENT (not the main agent) because:
-1. Sub-agents run in isolated contexts where file changes can be detected
-2. The Stop hook automatically evaluates rules when each sub-agent completes
-3. The main agent can observe whether hooks fired without triggering them manually
-
-CRITICAL: All tests MUST run in sub-agents. The main agent MUST NOT make the file
-edits itself - it spawns sub-agents to make edits, then observes whether the hooks
-fired automatically when those sub-agents returned.
-
-Sub-agent configuration:
-- All sub-agents should use `model: "haiku"` to minimize cost and latency
-- All sub-agents should use `max_turns: 5` to prevent hanging indefinitely
-
-Steps:
-1. reset - Ensure clean environment before testing (clears queue, reverts files)
-2. run_not_fire_tests - Run all "should NOT fire" tests in PARALLEL sub-agents (6 tests)
-3. run_fire_tests - Run all "should fire" tests in SERIAL sub-agents with resets between (6 tests)
-4. infinite_block_tests - Run infinite block tests in SERIAL (4 tests - both fire and not-fire)
-
-Reset procedure (see steps/reset.md):
-- Reset runs FIRST to ensure a clean environment before any tests
-- Each step also calls reset internally when needed (between tests, after completion)
-- Reset reverts git changes, removes created files, and clears the rules queue
-
-Test types covered:
-- Trigger/Safety mode
-- Set mode (bidirectional)
-- Pair mode (directional)
-- Command action
-- Multi safety
-- Infinite block (prompt and command) - in dedicated step
-- Created mode (new files only)
-
-
-
-## Work Branch
-
-Use branch format: `deepwork/manual_tests-[instance]-YYYYMMDD`
-
-- If on a matching work branch: continue using it
-- If on main/master: create new branch with `git checkout -b deepwork/manual_tests-[instance]-$(date +%Y%m%d)`
-
-## Outputs
-
-**Required outputs**:
-- `clean_environment`
-
-## Quality Validation (Manual)
-
-**NOTE**: Gemini CLI does not support automated validation. Manually verify criteria before completing.
-
-**Criteria (all must be satisfied)**:
-1. **Environment Clean**: Git changes reverted, created files removed, and rules queue cleared
-## On Completion
-
-1. Verify outputs are created
-2. Inform user: "Step 1/4 complete, outputs: clean_environment"
-3. **Tell user next command**: `/manual_tests:run_not_fire_tests`
-
----
-
-**Reference files**: `.deepwork/jobs/manual_tests/job.yml`, `.deepwork/jobs/manual_tests/steps/reset.md`
-"""
\ No newline at end of file
diff --git a/.gemini/skills/manual_tests/run_fire_tests.toml b/.gemini/skills/manual_tests/run_fire_tests.toml
deleted file mode 100644
index 1f471b83..00000000
--- a/.gemini/skills/manual_tests/run_fire_tests.toml
+++ /dev/null
@@ -1,237 +0,0 @@
-# manual_tests:run_fire_tests
-#
-# Runs all 6 'should fire' tests serially with resets between each. Use after NOT-fire tests to verify rules fire correctly.
-#
-# Generated by DeepWork - do not edit manually
-
-description = "Runs all 6 'should fire' tests serially with resets between each. Use after NOT-fire tests to verify rules fire correctly."
-
-prompt = """
-# manual_tests:run_fire_tests
-
-**Step 3/4** in **manual_tests** workflow
-
-> Runs all manual hook/rule tests using sub-agents. Use when validating that DeepWork rules fire correctly.
-
-## Prerequisites (Verify First)
-
-Before proceeding, confirm these steps are complete:
-- `/manual_tests:run_not_fire_tests`
-
-## Instructions
-
-**Goal**: Runs all 6 'should fire' tests serially with resets between each. Use after NOT-fire tests to verify rules fire correctly.
-
-# Run Should-Fire Tests
-
-## Objective
-
-Run all "should fire" tests in **serial** sub-agents to verify that rules fire correctly when their trigger conditions are met without safety conditions.
-
-## CRITICAL: Sub-Agent Requirement
-
-**You MUST spawn sub-agents to make all file edits. DO NOT edit the test files yourself.**
-
-Why sub-agents are required:
-1. Sub-agents run in isolated contexts where file changes are detected
-2. When a sub-agent completes, the Stop hook **automatically** evaluates rules
-3. You (the main agent) observe whether hooks fired - you do NOT manually trigger them
-4. If you edit files directly, the hooks won't fire because you're not a completing sub-agent
-
-**NEVER manually run `echo '{}' | python -m deepwork.hooks.rules_check`** - this defeats the purpose of the test. Hooks must fire AUTOMATICALLY when sub-agents return.
-
-## CRITICAL: Serial Execution
-
-**These tests MUST run ONE AT A TIME, with resets between each.**
-
-Why serial execution is required:
-- These tests edit ONLY the trigger file (not the safety)
-- If multiple sub-agents run in parallel, sub-agent A's hook will see changes from sub-agent B
-- This causes cross-contamination: A gets blocked by rules triggered by B's changes
-- Run one test, observe the hook, reset, then run the next
-
-## Task
-
-Run all 6 "should fire" tests in **serial** sub-agents, resetting between each, and verify that blocking hooks fire automatically.
-
-### Process
-
-For EACH test below, follow this cycle:
-
-1. **Launch a sub-agent** using the Task tool with:
-   - `model: "haiku"` - Use the fast model to minimize cost and latency
-   - `max_turns: 5` - Prevent sub-agents from hanging indefinitely
-2. **Wait for the sub-agent to complete**
-3. **Observe whether the hook fired automatically** - you should see a blocking prompt or command output
-4. **If no visible blocking occurred, check the queue**:
-   ```bash
-   ls -la .deepwork/tmp/rules/queue/
-   cat .deepwork/tmp/rules/queue/*.json 2>/dev/null
-   ```
-   - If queue entries exist with status "queued", the hook DID fire but blocking wasn't visible
-   - If queue is empty, the hook did NOT fire at all
-   - Record the queue status along with the result
-5. **Record the result** - pass if hook fired (visible block OR queue entry), fail if neither
-6. **Reset** (MANDATORY after each test) - follow the reset step instructions:
-   ```bash
-   git reset HEAD manual_tests/ && git checkout -- manual_tests/ && rm -f manual_tests/test_created_mode/new_config.yml
-   deepwork rules clear_queue
-   ```
-   See [reset.md](reset.md) for detailed explanation of these commands.
-7. **Check for early termination**: If **2 tests have now failed**, immediately:
-   - Stop running any remaining tests
-   - Report the results summary showing which tests passed/failed
-   - The job halts here - do NOT proceed with remaining tests
-8. **Proceed to the next test** (only if fewer than 2 failures)
-
-**IMPORTANT**: Only launch ONE sub-agent at a time. Wait for it to complete and reset before launching the next.
-
-### Test Cases (run serially)
-
-**Test 1: Trigger/Safety**
-- Sub-agent prompt: "Edit ONLY `manual_tests/test_trigger_safety_mode/feature.py` to add a comment. Do NOT edit the `_doc.md` file."
-- Sub-agent config: `model: "haiku"`, `max_turns: 5`
-- Expected: Hook fires with prompt about updating documentation
-
-**Test 2: Set Mode**
-- Sub-agent prompt: "Edit ONLY `manual_tests/test_set_mode/module_source.py` to add a comment. Do NOT edit the `_test.py` file."
-- Sub-agent config: `model: "haiku"`, `max_turns: 5`
-- Expected: Hook fires with prompt about updating tests
-
-**Test 3: Pair Mode**
-- Sub-agent prompt: "Edit ONLY `manual_tests/test_pair_mode/handler_trigger.py` to add a comment. Do NOT edit the `_expected.md` file."
-- Sub-agent config: `model: "haiku"`, `max_turns: 5`
-- Expected: Hook fires with prompt about updating expected output
-
-**Test 4: Command Action**
-- Sub-agent prompt: "Edit `manual_tests/test_command_action/input.txt` to add some text."
-- Sub-agent config: `model: "haiku"`, `max_turns: 5`
-- Expected: Command runs automatically, appending to the log file (this rule always runs, no safety condition)
-
-**Test 5: Multi Safety**
-- Sub-agent prompt: "Edit ONLY `manual_tests/test_multi_safety/core.py` to add a comment. Do NOT edit any of the safety files (`_safety_a.md`, `_safety_b.md`, or `_safety_c.md`)."
-- Sub-agent config: `model: "haiku"`, `max_turns: 5`
-- Expected: Hook fires with prompt about updating safety documentation
-
-**Test 6: Created Mode**
-- Sub-agent prompt: "Create a NEW file `manual_tests/test_created_mode/new_config.yml` with some YAML content. This must be a NEW file, not a modification."
-- Sub-agent config: `model: "haiku"`, `max_turns: 5`
-- Expected: Hook fires with prompt about new configuration files
-
-### Results Tracking
-
-Record the result after each test:
-
-| Test Case | Should Fire | Visible Block? | Queue Entry? | Result |
-|-----------|-------------|:--------------:|:------------:|:------:|
-| Trigger/Safety | Edit .py only | | | |
-| Set Mode | Edit _source.py only | | | |
-| Pair Mode | Edit _trigger.py only | | | |
-| Command Action | Edit .txt | | | |
-| Multi Safety | Edit .py only | | | |
-| Created Mode | Create NEW .yml | | | |
-
-**Queue Entry Status Guide:**
-- If queue has entry with status "queued" -> Hook fired, rule was shown to agent
-- If queue has entry with status "passed" -> Hook fired, rule was satisfied
-- If queue is empty -> Hook did NOT fire
-
-## Quality Criteria
-
-- **Sub-agents spawned**: Tests were run using the Task tool to spawn sub-agents - the main agent did NOT edit files directly
-- **Correct sub-agent config**: All sub-agents used `model: "haiku"` and `max_turns: 5`
-- **Serial execution**: Sub-agents were launched ONE AT A TIME, not in parallel
-- **Reset between tests**: Reset step was followed after each test
-- **Hooks fired automatically**: The main agent observed the blocking hooks firing automatically when each sub-agent returned - the agent did NOT manually run rules_check
-- **Early termination on 2 failures**: If 2 tests failed, testing halted immediately and results were reported
-- **Results recorded**: Pass/fail status was recorded for each test case
-- When all criteria are met, include `<promise>Quality Criteria Met</promise>` in your response
-
-## Reference
-
-See [test_reference.md](test_reference.md) for the complete test matrix and rule descriptions.
-
-## Context
-
-This step runs after the "should NOT fire" tests. These tests verify that rules correctly fire when trigger conditions are met without safety conditions. The serial execution with resets is essential to prevent cross-contamination between tests. Infinite block tests are handled in a separate step.
-
-
-### Job Context
-
-A workflow for running manual tests that validate DeepWork rules/hooks fire correctly.
-
-The **run_all** workflow tests that rules fire when they should AND do not fire when they shouldn't.
-Each test is run in a SUB-AGENT (not the main agent) because:
-1. Sub-agents run in isolated contexts where file changes can be detected
-2. The Stop hook automatically evaluates rules when each sub-agent completes
-3. The main agent can observe whether hooks fired without triggering them manually
-
-CRITICAL: All tests MUST run in sub-agents. The main agent MUST NOT make the file
-edits itself - it spawns sub-agents to make edits, then observes whether the hooks
-fired automatically when those sub-agents returned.
-
-Sub-agent configuration:
-- All sub-agents should use `model: "haiku"` to minimize cost and latency
-- All sub-agents should use `max_turns: 5` to prevent hanging indefinitely
-
-Steps:
-1. reset - Ensure clean environment before testing (clears queue, reverts files)
-2. run_not_fire_tests - Run all "should NOT fire" tests in PARALLEL sub-agents (6 tests)
-3. run_fire_tests - Run all "should fire" tests in SERIAL sub-agents with resets between (6 tests)
-4. infinite_block_tests - Run infinite block tests in SERIAL (4 tests - both fire and not-fire)
-
-Reset procedure (see steps/reset.md):
-- Reset runs FIRST to ensure a clean environment before any tests
-- Each step also calls reset internally when needed (between tests, after completion)
-- Reset reverts git changes, removes created files, and clears the rules queue
-
-Test types covered:
-- Trigger/Safety mode
-- Set mode (bidirectional)
-- Pair mode (directional)
-- Command action
-- Multi safety
-- Infinite block (prompt and command) - in dedicated step
-- Created mode (new files only)
-
-
-## Required Inputs
-
-
-**Files from Previous Steps** - Read these first:
-- `not_fire_results` (from `run_not_fire_tests`)
-
-## Work Branch
-
-Use branch format: `deepwork/manual_tests-[instance]-YYYYMMDD`
-
-- If on a matching work branch: continue using it
-- If on main/master: create new branch with `git checkout -b deepwork/manual_tests-[instance]-$(date +%Y%m%d)`
-
-## Outputs
-
-**Required outputs**:
-- `fire_results`
-
-## Quality Validation (Manual)
-
-**NOTE**: Gemini CLI does not support automated validation. Manually verify criteria before completing.
-
-**Criteria (all must be satisfied)**:
-1. **Sub-Agents Used**: Did the main agent spawn a sub-agent (using the Task tool) for EACH test? The main agent must NOT edit the test files directly.
-2. **Sub-Agent Config**: Did all sub-agents use `model: "haiku"` and `max_turns: 5`?
-3. **Serial Execution**: Were sub-agents launched ONE AT A TIME (not in parallel) to prevent cross-contamination?
-4. **Hooks Fired Automatically**: Did the main agent observe the blocking hooks firing automatically when each sub-agent returned? The agent must NOT manually run the rules_check command.
-5. **Reset Between Tests**: Was the reset step called internally after each test to revert files and prevent cross-contamination?
-6. **Early Termination**: If 2 tests failed, did testing halt immediately with results reported?
-7. **Results Recorded**: Did the main agent track pass/fail status for each test case?
-## On Completion
-
-1. Verify outputs are created
-2. Inform user: "Step 3/4 complete, outputs: fire_results"
-3. **Tell user next command**: `/manual_tests:infinite_block_tests`
-
----
-
-**Reference files**: `.deepwork/jobs/manual_tests/job.yml`, `.deepwork/jobs/manual_tests/steps/run_fire_tests.md`
-"""
\ No newline at end of file
diff --git a/.gemini/skills/manual_tests/run_not_fire_tests.toml b/.gemini/skills/manual_tests/run_not_fire_tests.toml
deleted file mode 100644
index 2e429635..00000000
--- a/.gemini/skills/manual_tests/run_not_fire_tests.toml
+++ /dev/null
@@ -1,223 +0,0 @@
-# manual_tests:run_not_fire_tests
-#
-# Runs all 6 'should NOT fire' tests in parallel sub-agents. Use to verify rules don't fire when safety conditions are met.
-#
-# Generated by DeepWork - do not edit manually
-
-description = "Runs all 6 'should NOT fire' tests in parallel sub-agents. Use to verify rules don't fire when safety conditions are met."
-
-prompt = """
-# manual_tests:run_not_fire_tests
-
-**Step 2/4** in **manual_tests** workflow
-
-> Runs all manual hook/rule tests using sub-agents. Use when validating that DeepWork rules fire correctly.
-
-## Prerequisites (Verify First)
-
-Before proceeding, confirm these steps are complete:
-- `/manual_tests:reset`
-
-## Instructions
-
-**Goal**: Runs all 6 'should NOT fire' tests in parallel sub-agents. Use to verify rules don't fire when safety conditions are met.
-
-# Run Should-NOT-Fire Tests
-
-## Objective
-
-Run all "should NOT fire" tests in parallel sub-agents to verify that rules do not fire when their safety conditions are met.
-
-## CRITICAL: Sub-Agent Requirement
-
-**You MUST spawn sub-agents to make all file edits. DO NOT edit the test files yourself.**
-
-Why sub-agents are required:
-1. Sub-agents run in isolated contexts where file changes are detected
-2. When a sub-agent completes, the Stop hook **automatically** evaluates rules
-3. You (the main agent) observe whether hooks fired - you do NOT manually trigger them
-4. If you edit files directly, the hooks won't fire because you're not a completing sub-agent
-
-**NEVER manually run `echo '{}' | python -m deepwork.hooks.rules_check`** - this defeats the purpose of the test. Hooks must fire AUTOMATICALLY when sub-agents return.
-
-## Task
-
-Run all 6 "should NOT fire" tests in **parallel** sub-agents, then verify no blocking hooks fired.
-
-### Process
-
-1. **Launch parallel sub-agents for all "should NOT fire" tests**
-
-   Use the Task tool to spawn **ALL of the following sub-agents in a SINGLE message** (parallel execution).
-
-   **Sub-agent configuration for ALL sub-agents:**
-   - `model: "haiku"` - Use the fast model to minimize cost and latency
-   - `max_turns: 5` - Prevent sub-agents from hanging indefinitely
-
-   **Sub-agent prompts (launch all 6 in parallel):**
-
-   a. **Trigger/Safety test** - "Edit `manual_tests/test_trigger_safety_mode/feature.py` to add a comment, AND edit `manual_tests/test_trigger_safety_mode/feature_doc.md` to add a note. Both files must be edited so the rule does NOT fire."
-
-   b. **Set Mode test** - "Edit `manual_tests/test_set_mode/module_source.py` to add a comment, AND edit `manual_tests/test_set_mode/module_test.py` to add a test comment. Both files must be edited so the rule does NOT fire."
-
-   c. **Pair Mode (forward) test** - "Edit `manual_tests/test_pair_mode/handler_trigger.py` to add a comment, AND edit `manual_tests/test_pair_mode/handler_expected.md` to add a note. Both files must be edited so the rule does NOT fire."
-
-   d. **Pair Mode (reverse) test** - "Edit ONLY `manual_tests/test_pair_mode/handler_expected.md` to add a note. Only the expected file should be edited - this tests that the pair rule only fires in one direction."
-
-   e. **Multi Safety test** - "Edit `manual_tests/test_multi_safety/core.py` to add a comment, AND edit `manual_tests/test_multi_safety/core_safety_a.md` to add a note. Both files must be edited so the rule does NOT fire."
-
-   f. **Created Mode test** - "Modify the EXISTING file `manual_tests/test_created_mode/existing.yml` by adding a comment. Do NOT create a new file - only modify the existing one. The created mode rule should NOT fire for modifications."
-
-2. **Observe the results**
-
-   When each sub-agent returns:
-   - **If no blocking hook fired**: Preliminary pass - proceed to queue verification
-   - **If a blocking hook fired**: The test FAILED - investigate why the rule fired when it shouldn't have
-
-   **Remember**: You are OBSERVING whether hooks fired automatically. Do NOT run any verification commands manually during sub-agent execution.
-
-3. **Verify no queue entries** (CRITICAL for "should NOT fire" tests)
-
-   After ALL sub-agents have completed, verify the rules queue is empty:
-   ```bash
-   ls -la .deepwork/tmp/rules/queue/
-   cat .deepwork/tmp/rules/queue/*.json 2>/dev/null
-   ```
-
-   - **If queue is empty**: All tests PASSED - rules correctly did not fire
-   - **If queue has entries**: Tests FAILED - rules fired when they shouldn't have. Check which rule fired and investigate.
-
-   This verification is essential because some rules may fire without visible blocking but still create queue entries.
-
-4. **Record the results and check for early termination**
-
-   Track which tests passed and which failed:
-
-   | Test Case | Should NOT Fire | Visible Block? | Queue Entry? | Result |
-   |-----------|:---------------:|:--------------:|:------------:|:------:|
-   | Trigger/Safety | Edit both files | | | |
-   | Set Mode | Edit both files | | | |
-   | Pair Mode (forward) | Edit both files | | | |
-   | Pair Mode (reverse) | Edit expected only | | | |
-   | Multi Safety | Edit both files | | | |
-   | Created Mode | Modify existing | | | |
-
-   **Result criteria**: PASS only if NO visible block AND NO queue entry. FAIL if either occurred.
-
-   **EARLY TERMINATION**: If **2 tests have failed**, immediately:
-   1. Stop running any remaining tests
-   2. Reset (see step 5)
-   3. Report the results summary showing which tests passed/failed
-   4. Do NOT proceed to the next step - the job halts here
-
-5. **Reset** (MANDATORY - call the reset step internally)
-
-   **IMPORTANT**: This step is MANDATORY and must run regardless of whether tests passed or failed.
-
-   Follow the reset step instructions. Run these commands to clean up:
-   ```bash
-   git reset HEAD manual_tests/ && git checkout -- manual_tests/ && rm -f manual_tests/test_created_mode/new_config.yml
-   deepwork rules clear_queue
-   ```
-
-   See [reset.md](reset.md) for detailed explanation of these commands.
-
-## Quality Criteria
-
-- **Sub-agents spawned**: All 6 tests were run using the Task tool to spawn sub-agents - the main agent did NOT edit files directly
-- **Correct sub-agent config**: All sub-agents used `model: "haiku"` and `max_turns: 5`
-- **Parallel execution**: All 6 sub-agents were launched in a single message (parallel)
-- **Hooks observed (not triggered)**: The main agent observed hook behavior without manually running rules_check
-- **Queue verified empty**: After all sub-agents completed, the rules queue was checked and confirmed empty (no queue entries = rules did not fire)
-- **Early termination on 2 failures**: If 2 tests failed, testing halted immediately and results were reported
-- **Reset performed**: Reset step was followed after tests completed (regardless of pass/fail)
-- When all criteria are met, include `<promise>Quality Criteria Met</promise>` in your response
-
-## Reference
-
-See [test_reference.md](test_reference.md) for the complete test matrix and rule descriptions.
-
-## Context
-
-This step runs after the reset step (which ensures a clean environment) and tests that rules correctly do NOT fire when safety conditions are met. The "should fire" tests run after these complete. Infinite block tests are handled in a separate step.
-
-
-### Job Context
-
-A workflow for running manual tests that validate DeepWork rules/hooks fire correctly.
-
-The **run_all** workflow tests that rules fire when they should AND do not fire when they shouldn't.
-Each test is run in a SUB-AGENT (not the main agent) because:
-1. Sub-agents run in isolated contexts where file changes can be detected
-2. The Stop hook automatically evaluates rules when each sub-agent completes
-3. The main agent can observe whether hooks fired without triggering them manually
-
-CRITICAL: All tests MUST run in sub-agents. The main agent MUST NOT make the file
-edits itself - it spawns sub-agents to make edits, then observes whether the hooks
-fired automatically when those sub-agents returned.
-
-Sub-agent configuration:
-- All sub-agents should use `model: "haiku"` to minimize cost and latency
-- All sub-agents should use `max_turns: 5` to prevent hanging indefinitely
-
-Steps:
-1. reset - Ensure clean environment before testing (clears queue, reverts files)
-2. run_not_fire_tests - Run all "should NOT fire" tests in PARALLEL sub-agents (6 tests)
-3. run_fire_tests - Run all "should fire" tests in SERIAL sub-agents with resets between (6 tests)
-4. infinite_block_tests - Run infinite block tests in SERIAL (4 tests - both fire and not-fire)
-
-Reset procedure (see steps/reset.md):
-- Reset runs FIRST to ensure a clean environment before any tests
-- Each step also calls reset internally when needed (between tests, after completion)
-- Reset reverts git changes, removes created files, and clears the rules queue
-
-Test types covered:
-- Trigger/Safety mode
-- Set mode (bidirectional)
-- Pair mode (directional)
-- Command action
-- Multi safety
-- Infinite block (prompt and command) - in dedicated step
-- Created mode (new files only)
-
-
-## Required Inputs
-
-
-**Files from Previous Steps** - Read these first:
-- `clean_environment` (from `reset`)
-
-## Work Branch
-
-Use branch format: `deepwork/manual_tests-[instance]-YYYYMMDD`
-
-- If on a matching work branch: continue using it
-- If on main/master: create new branch with `git checkout -b deepwork/manual_tests-[instance]-$(date +%Y%m%d)`
-
-## Outputs
-
-**Required outputs**:
-- `not_fire_results`
-
-## Quality Validation (Manual)
-
-**NOTE**: Gemini CLI does not support automated validation. Manually verify criteria before completing.
-
-**Criteria (all must be satisfied)**:
-1. **Sub-Agents Used**: Did the main agent spawn sub-agents (using the Task tool) to make the file edits? The main agent must NOT edit the test files directly.
-2. **Sub-Agent Config**: Did all sub-agents use `model: "haiku"` and `max_turns: 5`?
-3. **Parallel Execution**: Were all 6 sub-agents launched in parallel (in a single message with multiple Task tool calls)?
-4. **Hooks Observed**: Did the main agent observe that no blocking hooks fired when the sub-agents returned? The hooks fire AUTOMATICALLY - the agent must NOT manually run the rules_check command.
-5. **Queue Verified Empty**: After all sub-agents completed, was the rules queue checked and confirmed empty (no entries = rules did not fire)?
-6. **Early Termination**: If 2 tests failed, did testing halt immediately with results reported?
-7. **Reset Performed**: Was the reset step called internally after tests completed (or after early termination)?
-## On Completion
-
-1. Verify outputs are created
-2. Inform user: "Step 2/4 complete, outputs: not_fire_results"
-3. **Tell user next command**: `/manual_tests:run_fire_tests`
-
----
-
-**Reference files**: `.deepwork/jobs/manual_tests/job.yml`, `.deepwork/jobs/manual_tests/steps/run_not_fire_tests.md`
-"""
\ No newline at end of file
diff --git a/.gemini/skills/update/index.toml b/.gemini/skills/update/index.toml
deleted file mode 100644
index ebf3fa03..00000000
--- a/.gemini/skills/update/index.toml
+++ /dev/null
@@ -1,63 +0,0 @@
-# update
-#
-# Updates DeepWork standard jobs in src/ and syncs to installed locations. Use when modifying deepwork_jobs or deepwork_rules.
-#
-# Generated by DeepWork - do not edit manually
-
-description = "Updates DeepWork standard jobs in src/ and syncs to installed locations. Use when modifying deepwork_jobs or deepwork_rules."
-
-prompt = """
-# update
-
-**Multi-step workflow**: Updates DeepWork standard jobs in src/ and syncs to installed locations. Use when modifying deepwork_jobs or deepwork_rules.
-
-> **NOTE**: Gemini CLI requires manual command invocation. After each step, tell the user which command to run next.
-
-A workflow for maintaining standard jobs bundled with DeepWork. Standard jobs
-(like `deepwork_jobs` and `deepwork_rules`) are source-controlled in
-`src/deepwork/standard_jobs/` and must be edited there—never in `.deepwork/jobs/`
-or `.claude/commands/` directly.
-
-This job guides you through:
-1. Identifying which standard job(s) to update from conversation context
-2. Making changes in the correct source location (`src/deepwork/standard_jobs/[job_name]/`)
-3. Running `deepwork install` to propagate changes to `.deepwork/` and command directories
-4. Verifying the sync completed successfully
-
-Use this job whenever you need to modify job.yml files, step instructions, or hooks
-for any standard job in the DeepWork repository.
-
-
-## Available Steps
-
-1. **job** - Edits standard job source files in src/ and runs deepwork install to sync changes. Use when updating job.yml or step instructions.
-   Command: `/update:job`
-
-## Execution Instructions
-
-### Step 1: Analyze Intent
-
-Parse any text following `/update` to determine user intent:
-- "job" or related terms → start at `/update:job`
-
-### Step 2: Direct User to Starting Step
-
-Tell the user which command to run:
-```
-/update:job
-```
-
-### Step 3: Guide Through Workflow
-
-After each step completes, tell the user the next command to run until workflow is complete.
-
-### Handling Ambiguous Intent
-
-If user intent is unclear:
-- Present available steps as numbered options
-- Ask user to select the starting point
-
-## Reference
-
-- Job definition: `.deepwork/jobs/update/job.yml`
-"""
\ No newline at end of file
diff --git a/.gemini/skills/update/job.toml b/.gemini/skills/update/job.toml
deleted file mode 100644
index a42f20fb..00000000
--- a/.gemini/skills/update/job.toml
+++ /dev/null
@@ -1,141 +0,0 @@
-# update:job
-#
-# Edits standard job source files in src/ and runs deepwork install to sync changes. Use when updating job.yml or step instructions.
-#
-# Generated by DeepWork - do not edit manually
-
-description = "Edits standard job source files in src/ and runs deepwork install to sync changes. Use when updating job.yml or step instructions."
-
-prompt = """
-# update:job
-
-**Standalone command** - can be run anytime
-
-> Updates DeepWork standard jobs in src/ and syncs to installed locations. Use when modifying deepwork_jobs or deepwork_rules.
-
-
-## Instructions
-
-**Goal**: Edits standard job source files in src/ and runs deepwork install to sync changes. Use when updating job.yml or step instructions.
-
-# Update Standard Job
-
-## Objective
-
-Edit standard job source files in `src/deepwork/standard_jobs/` and sync changes to installed locations.
-
-## Task
-
-When modifying a standard job in the DeepWork repository, this step ensures changes are made in the correct location and properly propagated.
-
-### Important: Source of Truth
-
-Standard jobs exist in THREE locations, but only ONE is the source of truth:
-
-| Location | Purpose | Editable? |
-|----------|---------|-----------|
-| `src/deepwork/standard_jobs/[job]/` | **Source of truth** | **YES** |
-| `.deepwork/jobs/[job]/` | Installed copy | NO - overwritten by install |
-| `.claude/commands/[job].[step].md` | Generated commands | NO - regenerated by sync |
-
-**NEVER edit files in `.deepwork/jobs/` or `.claude/commands/` for standard jobs!**
-
-### Process
-
-#### 1. Identify the Standard Job to Update
-
-From conversation context, determine:
-- Which standard job needs updating (e.g., `deepwork_jobs`, `deepwork_rules`)
-- What changes are needed (job.yml, step instructions, hooks, etc.)
-
-Current standard jobs:
-```bash
-ls src/deepwork/standard_jobs/
-```
-
-#### 2. Make Changes in Source Location
-
-```
-src/deepwork/standard_jobs/[job_name]/
-├── job.yml              # Job definition
-├── steps/               # Step instruction files
-├── hooks/               # Hook scripts
-└── templates/           # Templates
-```
-
-#### 3. Run DeepWork Install
-
-```bash
-deepwork install --platform claude
-```
-
-For Gemini: `deepwork install --platform gemini`
-
-#### 4. Verify the Sync
-
-```bash
-# Verify job.yml
-diff src/deepwork/standard_jobs/[job_name]/job.yml .deepwork/jobs/[job_name]/job.yml
-
-# Verify step files
-diff -r src/deepwork/standard_jobs/[job_name]/steps/ .deepwork/jobs/[job_name]/steps/
-
-# Check commands regenerated
-ls -la .claude/commands/[job_name].*.md
-```
-
-## Quality Criteria
-
-- Changes made ONLY in `src/deepwork/standard_jobs/[job_name]/`
-- `deepwork install --platform claude` executed successfully
-- Files in `.deepwork/jobs/` match source
-- Command files regenerated
-- When all criteria are met, include `<promise>✓ Quality Criteria Met</promise>`
-
-
-### Job Context
-
-A workflow for maintaining standard jobs bundled with DeepWork. Standard jobs
-(like `deepwork_jobs` and `deepwork_rules`) are source-controlled in
-`src/deepwork/standard_jobs/` and must be edited there—never in `.deepwork/jobs/`
-or `.claude/commands/` directly.
-
-This job guides you through:
-1. Identifying which standard job(s) to update from conversation context
-2. Making changes in the correct source location (`src/deepwork/standard_jobs/[job_name]/`)
-3. Running `deepwork install` to propagate changes to `.deepwork/` and command directories
-4. Verifying the sync completed successfully
-
-Use this job whenever you need to modify job.yml files, step instructions, or hooks
-for any standard job in the DeepWork repository.
-
-
-## Required Inputs
-
-**User Parameters** - Gather from user before starting:
-- **job_context**: Determine from conversation context which standard job(s) to update and what changes are needed
-
-
-## Work Branch
-
-Use branch format: `deepwork/update-[instance]-YYYYMMDD`
-
-- If on a matching work branch: continue using it
-- If on main/master: create new branch with `git checkout -b deepwork/update-[instance]-$(date +%Y%m%d)`
-
-## Outputs
-
-**Required outputs**:
-- `files_synced`
-
-## On Completion
-
-1. Verify outputs are created
-2. Inform user: "job complete, outputs: files_synced"
-
-This standalone command can be re-run anytime.
-
----
-
-**Reference files**: `.deepwork/jobs/update/job.yml`, `.deepwork/jobs/update/steps/job.md`
-"""
\ No newline at end of file
diff --git a/.github/workflows/claude-code-test.yml b/.github/workflows/claude-code-test.yml
index 1d0ecebc..405c3ba9 100644
--- a/.github/workflows/claude-code-test.yml
+++ b/.github/workflows/claude-code-test.yml
@@ -56,9 +56,48 @@ jobs:
         if: github.event_name != 'pull_request'
         run: uv sync --extra dev
 
-      - name: Run fruits workflow tests
+      - name: Validate fruits fixture parses and install generates correct structure
         if: github.event_name != 'pull_request'
-        run: uv run pytest tests/integration/test_fruits_workflow.py -v
+        run: |
+          # Verify the fruits fixture parses correctly via deepwork's parser
+          uv run python -c "
+          from pathlib import Path
+          from deepwork.core.parser import parse_job_definition
+
+          job = parse_job_definition(Path('tests/fixtures/jobs/fruits'))
+
+          assert job.name == 'fruits'
+          assert job.version == '1.0.0'
+          assert len(job.steps) == 2
+          assert [s.id for s in job.steps] == ['identify', 'classify']
+
+          # Identify step: user input -> file output
+          identify = job.steps[0]
+          assert identify.inputs[0].is_user_input()
+          assert identify.inputs[0].name == 'raw_items'
+          assert identify.outputs[0].name == 'identified_fruits.md'
+          assert identify.dependencies == []
+
+          # Classify step: file input from identify -> file output
+          classify = job.steps[1]
+          assert classify.inputs[0].is_file_input()
+          assert classify.inputs[0].file == 'identified_fruits.md'
+          assert classify.inputs[0].from_step == 'identify'
+          assert classify.outputs[0].name == 'classified_fruits.md'
+          assert classify.dependencies == ['identify']
+
+          # Workflow definition
+          assert len(job.workflows) == 1
+          assert job.workflows[0].name == 'full'
+          assert job.workflows[0].steps == ['identify', 'classify']
+
+          # Validations pass
+          job.validate_dependencies()
+          job.validate_file_inputs()
+          job.validate_workflows()
+
+          print('All fruits fixture validations passed!')
+          "
 
       - name: Generate skills and validate structure
         if: github.event_name != 'pull_request'
@@ -84,21 +123,12 @@ jobs:
           echo "Checking generated skills..."
           ls -la test_project/.claude/skills/
 
-          # Verify skill directories and SKILL.md files exist
-          # Meta-skill for the job itself
-          test -f test_project/.claude/skills/fruits/SKILL.md || (echo "Missing fruits meta-skill" && exit 1)
-          # Step skills
-          test -f test_project/.claude/skills/fruits.identify/SKILL.md || (echo "Missing fruits.identify skill" && exit 1)
-          test -f test_project/.claude/skills/fruits.classify/SKILL.md || (echo "Missing fruits.classify skill" && exit 1)
-
-          # Verify skill content
-          grep -q "# fruits.identify" test_project/.claude/skills/fruits.identify/SKILL.md
-          grep -q "raw_items" test_project/.claude/skills/fruits.identify/SKILL.md
-          grep -q "identified_fruits.md" test_project/.claude/skills/fruits.identify/SKILL.md
+          # MCP variant: only the /deepwork entry point skill is generated
+          # (per-step skills are no longer created; MCP server handles orchestration)
+          test -f test_project/.claude/skills/deepwork/SKILL.md || (echo "Missing deepwork MCP entry point skill" && exit 1)
 
-          grep -q "# fruits.classify" test_project/.claude/skills/fruits.classify/SKILL.md
-          grep -q "identified_fruits.md" test_project/.claude/skills/fruits.classify/SKILL.md
-          grep -q "classified_fruits.md" test_project/.claude/skills/fruits.classify/SKILL.md
+          # Verify the deepwork skill references MCP tools
+          grep -qi "deepwork" test_project/.claude/skills/deepwork/SKILL.md
 
           echo "Skill generation validated successfully!"
 
@@ -155,7 +185,22 @@ jobs:
 
       - name: Install deepwork
         if: steps.check-key.outputs.has_key == 'true'
-        run: uv sync
+        run: |
+          uv sync
+
+          # Add the uv virtualenv bin directory to PATH for all subsequent steps.
+          #
+          # Why: `deepwork install` registers an MCP server in .mcp.json with
+          # the command "deepwork serve --path .". When Claude Code starts, it
+          # reads .mcp.json and spawns the MCP server as a subprocess using the
+          # bare "deepwork" command. Without this PATH addition, that subprocess
+          # fails because "deepwork" only exists inside the uv virtualenv
+          # (accessible via "uv run deepwork" but not as a bare command).
+          #
+          # Without the MCP server running, Claude cannot use the DeepWork MCP
+          # tools (get_workflows, start_workflow, finished_step) and falls back
+          # to ad-hoc file creation, which produces the wrong output format.
+          echo "$(pwd)/.venv/bin" >> $GITHUB_PATH
 
       - name: Set up fresh test project
         if: steps.check-key.outputs.has_key == 'true'
@@ -174,40 +219,52 @@ jobs:
           # Install deepwork (this sets up .deepwork/ with standard jobs only)
           uv run deepwork install --platform claude --path test_project
 
-          # Create permissive settings.json to allow file operations in CI
-          cat > test_project/.claude/settings.json << 'SETTINGS_EOF'
-          {
-            "permissions": {
-              "allow": [
-                "Bash(*)",
-                "Read(./**)",
-                "Edit(./**)",
-                "Write(./**)",
-                "Skill(*)"
-              ]
-            }
-          }
-          SETTINGS_EOF
+          # Merge broad CI permissions into settings.json WITHOUT overwriting it.
+          #
+          # Why merge instead of overwrite: `deepwork install` writes MCP tool
+          # permissions (mcp__deepwork__get_workflows, mcp__deepwork__start_workflow,
+          # etc.) into settings.json. These are required for Claude to call the
+          # DeepWork MCP server tools. Overwriting settings.json with only generic
+          # permissions (Bash, Read, Write, etc.) removes the MCP permissions,
+          # causing Claude to silently fail when trying to use /deepwork.
+          python3 -c "
+          import json
+          settings_path = 'test_project/.claude/settings.json'
+          with open(settings_path) as f:
+              settings = json.load(f)
+          ci_permissions = ['Bash(*)', 'Read(./**)', 'Edit(./**)', 'Write(./**)', 'Skill(*)']
+          for perm in ci_permissions:
+              if perm not in settings.setdefault('permissions', {}).setdefault('allow', []):
+                  settings['permissions']['allow'].append(perm)
+          with open(settings_path, 'w') as f:
+              json.dump(settings, f, indent=2)
+          "
 
           echo "Fresh test project setup complete"
           echo "Available skills:"
           ls -la test_project/.claude/skills/
 
-      # STEP 1: Use /deepwork_jobs.define to CREATE the fruits job
-      - name: Create job with /deepwork_jobs
+      # STEP 1: Use /deepwork to CREATE the fruits job via MCP workflow
+      #
+      # This invokes Claude with the /deepwork skill, which uses MCP tools to
+      # walk through the deepwork_jobs/new_job workflow (define → implement →
+      # test → iterate). The workflow includes quality gates that spawn Claude
+      # subprocesses, so it needs a generous timeout.
+      - name: Create job with /deepwork
         if: steps.check-key.outputs.has_key == 'true'
         working-directory: test_project
-        timeout-minutes: 6
+        timeout-minutes: 10
         run: |
-          echo "=== Running /deepwork_jobs to create fruits job ==="
+          echo "=== Running /deepwork to create fruits job ==="
           mkdir fruits
 
-          # Provide detailed, deterministic instructions for creating the job
-          claude --print --model claude-sonnet-4-5 <<'PROMPT_EOF'
-          /deepwork_jobs I want to create a simple job called "fruits" for identifying and classifying fruits.
+          # Use --debug to capture detailed logs for diagnosing failures.
+          # The debug log is dumped in the failure handler below.
+          claude --print --debug --model claude-sonnet-4-5 <<'PROMPT_EOF'
+          /deepwork I want to create a simple job called "fruits" for identifying and classifying fruits.
 
           Here are the EXACT specifications.
-          
+
           Intent: A simple workflow that takes a list of mixed items, identifies which are fruits, then classifies them by category. Designed for CI testing.
 
           Steps:
@@ -218,13 +275,14 @@ jobs:
 
           2. Step: classify
              Name: Classify Fruits
-             Description: Organize identified fruits into categories (citrus, tropical, berries, etc.). 
+             Description: Organize identified fruits into categories (citrus, tropical, berries, etc.).
              **CRITICAL**: must put the classified fruit list in `./fruits/classified_fruits.md`.
 
           **Key Instructions:**
           - Do not ask questions - just make the job
           - Rules are explicitly not desired. Tell the review agents that.
           - Do not give long commentary of what you did - just make the job with no commentary.
+          - IMPORTANT: Once the job.yml and step instruction files have been created (i.e. after the "define" and "implement" steps are done), STOP. Do NOT continue into the "test" or "iterate" steps. Abort the workflow at that point. We only need the job definition files created, not the full workflow run.
           PROMPT_EOF
 
           # Verify the job.yml was created
@@ -254,31 +312,61 @@ jobs:
             exit 1
           fi
 
-          # Run sync to generate the skills
-          echo "=== Running deepwork sync to generate skills ==="
+          # Run sync to regenerate skills after new job was created
+          echo "=== Running deepwork sync to regenerate skills ==="
           cd ..
           uv run deepwork sync --path test_project
 
           echo "=== Checking generated skills ==="
           ls -la test_project/.claude/skills/
 
-          if [ -f "test_project/.claude/skills/fruits.identify/SKILL.md" ] && [ -f "test_project/.claude/skills/fruits.classify/SKILL.md" ]; then
-            echo "SUCCESS: Skills generated"
+          # MCP variant: only the /deepwork entry point skill is generated
+          if [ -f "test_project/.claude/skills/deepwork/SKILL.md" ]; then
+            echo "SUCCESS: /deepwork MCP entry point skill generated"
           else
-            echo "ERROR: Skills were not generated"
+            echo "ERROR: /deepwork skill was not generated"
             exit 1
           fi
 
-      # STEP 3: Execute the /fruits workflow (runs all steps automatically)
+      # Dump Claude debug log if the job creation step failed or timed out.
+      # This captures MCP server communication, tool calls, and error details.
+      - name: Dump Claude debug log on failure
+        if: failure() && steps.check-key.outputs.has_key == 'true'
+        working-directory: test_project
+        run: |
+          echo "=== Claude debug log ==="
+          # Claude --debug writes to ~/.claude/debug.log
+          if [ -f "$HOME/.claude/debug.log" ]; then
+            echo "--- Last 200 lines of debug.log ---"
+            tail -200 "$HOME/.claude/debug.log"
+          else
+            echo "No debug.log found at ~/.claude/debug.log"
+            echo "Searching for debug logs..."
+            find "$HOME/.claude" -name "*.log" -type f 2>/dev/null || echo "No log files found"
+          fi
+          echo ""
+          echo "=== MCP server config ==="
+          cat .mcp.json 2>/dev/null || echo "No .mcp.json found"
+          echo ""
+          echo "=== Settings.json ==="
+          cat .claude/settings.json 2>/dev/null || echo "No settings.json found"
+          echo ""
+          echo "=== DeepWork session state ==="
+          ls -la .deepwork/tmp/ 2>/dev/null || echo "No tmp directory"
+          for f in .deepwork/tmp/session_*.json; do
+            [ -f "$f" ] && echo "--- $f ---" && cat "$f"
+          done
+
+      # STEP 3: Execute the fruits workflow via /deepwork MCP entry point
       - name: Run Workflow
         if: steps.check-key.outputs.has_key == 'true'
         working-directory: test_project
         timeout-minutes: 3
         run: |
-          echo "=== Running /fruits workflow with test input ==="
+          echo "=== Running fruits workflow with test input via /deepwork ==="
 
           claude --print --model claude-sonnet-4-5 <<'PROMPT_EOF'
-          /fruits Proccess the list to the file and don't give any extra commentary or text output.
+          /deepwork Run the fruits full workflow. Process the list to the file and don't give any extra commentary or text output.
           raw_items: apple, car, banana, chair, orange, table, mango, laptop, grape, bicycle
           PROMPT_EOF
           
@@ -329,7 +417,7 @@ jobs:
           echo "  ALL E2E TESTS PASSED SUCCESSFULLY!"
           echo "=========================================="
           echo ""
-          echo "Workflow tested: /fruits - Executed full fruits workflow (identify + classify)"
+          echo "Workflow tested: /deepwork fruits full - Executed full fruits workflow (identify + classify)"
           echo ""
 
       - name: Upload test artifacts
@@ -339,7 +427,7 @@ jobs:
           name: claude-code-e2e-outputs
           path: |
             test_project/.deepwork/jobs/fruits/
-            test_project/.claude/skills/fruits*/
-            test_project/identified_fruits.md
-            test_project/classified_fruits.md
+            test_project/.claude/skills/deepwork/
+            test_project/fruits/identified_fruits.md
+            test_project/fruits/classified_fruits.md
           retention-days: 7
diff --git a/.github/workflows/update-claude-code.yml b/.github/workflows/update-claude-code.yml
deleted file mode 100644
index 99dbbf2c..00000000
--- a/.github/workflows/update-claude-code.yml
+++ /dev/null
@@ -1,63 +0,0 @@
-name: Update Claude Code
-
-on:
-  schedule:
-    # Run daily at 6 AM UTC
-    - cron: '0 6 * * *'
-  workflow_dispatch: # Allow manual trigger
-
-jobs:
-  update:
-    runs-on: ubuntu-latest
-    permissions:
-      contents: write
-      pull-requests: write
-
-    steps:
-      - name: Checkout repository
-        uses: actions/checkout@v4
-
-      - name: Install Nix
-        uses: cachix/install-nix-action@v30
-        with:
-          nix_path: nixpkgs=channel:nixos-unstable
-          extra_nix_config: |
-            experimental-features = nix-command flakes
-
-      - name: Update claude-code package
-        id: update
-        run: |
-          # Script exits 0 if already at latest (no changes to commit)
-          ./nix/claude-code/update.sh
-
-          # Capture version for PR title
-          VERSION=$(grep 'version = "' nix/claude-code/package.nix | head -1 | sed 's/.*version = "\([^"]*\)".*/\1/')
-          echo "version=$VERSION" >> $GITHUB_OUTPUT
-
-      - name: Update flake.lock
-        run: nix flake update
-
-      - name: Verify build
-        run: nix develop --command claude --version
-
-      - name: Create Pull Request
-        uses: peter-evans/create-pull-request@v7
-        with:
-          token: ${{ secrets.GITHUB_TOKEN }}
-          commit-message: "chore(deps): update claude-code to ${{ steps.update.outputs.version }}"
-          title: "chore(deps): update claude-code to ${{ steps.update.outputs.version }}"
-          body: |
-            Automated update of claude-code package.
-
-            **Changes:**
-            - claude-code updated to ${{ steps.update.outputs.version }}
-            - Updated flake.lock
-
-            **Verification:**
-            - Package builds successfully
-            - `claude --version` returns expected version
-
-            ---
-            *This PR was automatically created by the update-claude-code workflow.*
-          branch: update-claude-code
-          delete-branch: true
diff --git a/.gitignore b/.gitignore
index 8e9811ad..abfa16ab 100644
--- a/.gitignore
+++ b/.gitignore
@@ -44,6 +44,7 @@ nosetests.xml
 coverage.xml
 *.cover
 *.py,cover
+coverage_report.txt
 .hypothesis/
 .pytest_cache/
 
diff --git a/.mcp.json b/.mcp.json
new file mode 100644
index 00000000..79f9ddc8
--- /dev/null
+++ b/.mcp.json
@@ -0,0 +1,12 @@
+{
+  "mcpServers": {
+    "deepwork": {
+      "command": "deepwork",
+      "args": [
+        "serve",
+        "--path",
+        "."
+      ]
+    }
+  }
+}
\ No newline at end of file
diff --git a/AGENTS.md b/AGENTS.md
index 3b1dfeec..d0784788 100644
--- a/AGENTS.md
+++ b/AGENTS.md
@@ -14,12 +14,11 @@ When creating or modifying jobs in this repository, you MUST understand which ty
 
 **Current standard jobs**:
 - `deepwork_jobs` - Core job management (define, implement, learn)
-- `deepwork_rules` - Rules enforcement system
 
 **Editing rules**:
 - Source of truth is ALWAYS in `src/deepwork/standard_jobs/`
 - NEVER edit the installed copies in `.deepwork/jobs/` directly
-- After editing, run `deepwork install --platform claude` to sync
+- After editing, run `deepwork install` to sync
 
 ### 2. Library Jobs (`library/jobs/`)
 
@@ -75,13 +74,11 @@ Which type of job should this be?
 ```
 deepwork/
 ├── src/deepwork/standard_jobs/    # Standard jobs (source of truth)
-│   ├── deepwork_jobs/
-│   └── deepwork_rules/
+│   └── deepwork_jobs/
 ├── library/jobs/                   # Library/example jobs
 │   └── [example_job]/
 └── .deepwork/jobs/                 # Installed standard jobs + bespoke jobs
     ├── deepwork_jobs/              # ← Installed copy, NOT source of truth
-    ├── deepwork_rules/             # ← Installed copy, NOT source of truth
     └── [bespoke_job]/              # ← Source of truth for bespoke only
 
 ## Debugging Issues
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 4f9c4dc4..cb7fe8b3 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -8,40 +8,44 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 ## [Unreleased]
 
 ### Added
-- Concurrent steps support in workflow definitions
-  - Workflows can now specify nested arrays of step IDs to indicate steps that can run in parallel
-  - Example: `steps: [setup, [task_a, task_b, task_c], finalize]` runs task_a/b/c concurrently
-  - Single-item arrays indicate a step with multiple parallel instances (e.g., `[fetch_campaign_data]` runs for each campaign)
-  - New `WorkflowStepEntry` dataclass in parser for sequential/concurrent step groups
-  - Meta-skill template renders concurrent steps as "Background Task 1/2/3" with clear instructions
-  - Added `get_step_entry_position_in_workflow()` and `get_concurrent_step_info()` methods to JobDefinition
-  - Full backward compatibility: existing workflows with simple step arrays continue to work
-- Agent delegation field for job.yml steps
-  - New `agent` field on steps allows specifying an agent type (e.g., `agent: general-purpose`)
-  - When `agent` is set, generated Claude Code skills automatically include `context: fork` and `agent:` in frontmatter
-  - Enables steps to delegate execution to specific agent types
-  - Updated `deepwork_jobs.define` step instructions with agent delegation guidance
-  - Updated `job_spec.md` doc spec with "Agent Delegation" section
-- Explicit workflow definitions in job.yml for distinguishing multi-step workflows from standalone skills
-  - New `workflows` section in job.yml with `name`, `summary`, and ordered `steps` array
-  - Workflows are shown separately from standalone skills in generated meta-skills
-  - Step skills now display workflow context (e.g., "Step 2/3 in new_job workflow")
-  - Standalone skills are clearly marked as "can be run anytime"
-  - Backward compatible: jobs without `workflows` section use dependency-based detection
 
 ### Changed
-- Skill templates now show workflow-aware progress (e.g., "new_job step 2/3 complete")
-- Meta-skill template reorganized to show "Workflows" and "Standalone Skills" sections separately
-- Updated `deepwork_jobs` standard job to v1.0.0 with explicit `new_job` workflow
-- SessionStart hook now skips non-initial sessions (resume, compact/clear) by checking the `source` field in stdin JSON, reducing noise and redundant checks
 
 ### Fixed
-- Fixed skill template generating malformed YAML frontmatter with fields concatenated on single lines
-  - Removed over-aggressive `{%-` whitespace stripping from Jinja template
-  - Fields like `user-invocable` and `hooks` now render on proper separate lines
-  - Affects `src/deepwork/templates/claude/skill-job-step.md.jinja`
+
+## [0.7.0] - 2026-02-05
+
+### Added
+- **MCP Server Architecture** - New Model Context Protocol server for checkpoint-based workflow execution
+- Improved `deepwork_jobs` steps for workflow management
+- JSON Schema for job.yml validation (`src/deepwork/schemas/job.schema.json`)
+- Reference documentation for calling Claude in print mode (`doc/reference/calling_claude_in_print_mode.md`)
+- Migrated to uv2nix for reproducible Python builds in flake.nix
+
+### Changed
+- **BREAKING**: Simplified skill generation to single `/deepwork` entry point skill
+- **BREAKING**: Workflow execution now happens through MCP tool calls instead of slash commands
+- Streamlined `deepwork_jobs.define` and `deepwork_jobs.implement` for MCP workflow
+- Updated `deepwork_jobs.learn` with simplified instructions
+- Simplified adapter templates - removed complex skill templates
+- MCP server registered in `.claude/settings.json` during install
 
 ### Removed
+- **BREAKING**: Rules system removed
+- **BREAKING**: Removed per-step skill generation templates and logic
+- Removed per-step skill generation templates and logic
+- Removed `commit` job from library (was example job)
+- Removed `manual_tests/` directory and `manual_tests` job
+- Removed `add_platform` bespoke job
+- Removed many hook scripts that are no longer needed with MCP architecture
+- Removed Gemini per-step skill templates (`.gemini/skills/` now only has entry point)
+
+### Migration Guide
+- Run `deepwork install` to get the new MCP server configuration
+- Workflows are now executed via `/deepwork` which uses MCP tools internally
+- Rules system is completely removed - consider implementing validation logic in quality criteria instead
+- Existing job definitions still work but are executed through MCP checkpoints
+- The `.deepwork/rules/` directory can be safely deleted
 
 ## [0.5.1] - 2026-01-24
 
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index a86ffe4a..c39359d0 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -7,6 +7,7 @@ Thank you for your interest in contributing to DeepWork! This guide will help yo
 - [Prerequisites](#prerequisites)
 - [Development Setup](#development-setup)
 - [Installing DeepWork Locally](#installing-deepwork-locally)
+- [Installing Pre-Release Versions](#installing-pre-release-versions)
 - [Testing Your Local Installation](#testing-your-local-installation)
 - [Running Tests](#running-tests)
 - [Code Quality](#code-quality)
@@ -240,6 +241,32 @@ which deepwork  # Should point to .venv/bin/deepwork
 deepwork --version
 ```
 
+## Installing Pre-Release Versions
+
+DeepWork uses pre-release versions (e.g., `0.7.0a1`) during development. By default, `uv` and `pip` skip pre-release versions, so you need to opt in explicitly.
+
+### With uv
+
+```bash
+# Install the latest pre-release from PyPI
+uv pip install --prerelease=allow deepwork
+
+# Or pin to a specific pre-release
+uv pip install --prerelease=allow "deepwork==0.7.0a1"
+```
+
+### With pip
+
+```bash
+pip install --pre deepwork
+```
+
+### With uv tool install (global CLI)
+
+```bash
+uv tool install --prerelease=allow deepwork
+```
+
 ## Testing Your Local Installation
 
 To test your local DeepWork installation in a real project:
@@ -259,7 +286,7 @@ Since you installed DeepWork in editable mode, the `deepwork` command uses your
 
 ```bash
 # Run the install command
-deepwork install --platform claude
+deepwork install
 
 # Verify installation
 ls -la .deepwork/
@@ -273,7 +300,7 @@ Any changes you make to the DeepWork source code will be immediately reflected:
 ```bash
 # Make changes in ~/deepwork/src/deepwork/...
 # Then test in your test project
-deepwork install --platform claude
+deepwork install
 
 # Or test the CLI directly
 deepwork --help
@@ -306,7 +333,7 @@ nix run github:Unsupervisedcom/deepwork/feature-branch-name -- --help
 nix develop github:Unsupervisedcom/deepwork/feature-branch-name
 
 # Run a specific command from a feature branch
-nix develop github:Unsupervisedcom/deepwork/feature-branch-name --command deepwork install --platform claude
+nix develop github:Unsupervisedcom/deepwork/feature-branch-name --command deepwork install
 
 # Test against a specific commit
 nix run github:Unsupervisedcom/deepwork/abc1234 -- --version
@@ -321,7 +348,7 @@ For example, to test a branch named `feat/new-parser`:
 
 ```bash
 # Quick test of the CLI
-nix run github:Unsupervisedcom/deepwork/feat/new-parser -- install --platform claude --dry-run
+nix run github:Unsupervisedcom/deepwork/feat/new-parser -- install --dry-run
 
 # Or enter a full development shell to run tests and run a specific test
 nix develop github:Unsupervisedcom/deepwork/feat/new-parser --command pytest tests/unit/core/test_parser.py -v
@@ -466,7 +493,7 @@ mypy src/
 ```bash
 # Create or use a test project
 cd ~/test-project/
-deepwork install --platform claude
+deepwork install
 
 # Verify your changes work as expected
 ```
diff --git a/README.md b/README.md
index 041a6f8c..59bdfdf8 100644
--- a/README.md
+++ b/README.md
@@ -137,9 +137,7 @@ To start the process, just run:
 
 **3. Learns automatically** — Run `/deepwork_jobs.learn` (or ask claude to `run the deepwork learn job`) after any job to automatically capture what worked and improve for next time.
 
-**4. Rules** - The system adds enforced rules that are truly evaluated for everything it does, not just "hints" that Claude does by default
-
-**5. All work happens on Git branches** — Every change can be version-controlled and tracked. You can roll-back to prior versions of the skill or keep skills in-sync and up-to-date across your team.
+**4. All work happens on Git branches** — Every change can be version-controlled and tracked. You can roll-back to prior versions of the skill or keep skills in-sync and up-to-date across your team.
 
 ---
 
@@ -147,7 +145,7 @@ To start the process, just run:
 
 | Platform | Status | Notes |
 |----------|--------|-------|
-| **Claude Code** | Full Support | Recommended. Quality hooks, rules, best DX. |
+| **Claude Code** | Full Support | Recommended. Quality hooks, best DX. |
 | **Gemini CLI** | Partial Support | TOML format, global hooks only |
 | OpenCode | Planned | |
 | GitHub Copilot CLI | Planned | |
@@ -202,7 +200,6 @@ Send [@tylerwillis](https://x.com/tylerwillis) a message on X.
 your-project/
 ├── .deepwork/
 │   ├── config.yml          # Platform configuration
-│   ├── rules/              # Automated rules
 │   └── jobs/               # Job definitions
 │       └── job_name/
 │           ├── job.yml     # Job metadata
@@ -240,25 +237,6 @@ deepwork install
 
 </details>
 
-<details>
-<summary><strong>Advanced: Automated Rules</strong></summary>
-
-Rules monitor file changes and prompt Claude to follow guidelines:
-
-```markdown
----
-name: Source/Test Pairing
-set:
-  - src/{path}.py
-  - tests/{path}_test.py
----
-When source files change, corresponding test files should also change.
-```
-
-See [Architecture](doc/architecture.md) for full rules documentation.
-
-</details>
-
 <details>
 <summary><strong>Advanced: Nix Flakes</strong></summary>
 
@@ -296,3 +274,5 @@ We're iterating fast. [Open an issue](https://github.com/Unsupervisedcom/deepwor
 ---
 
 <sub>Inspired by [GitHub's spec-kit](https://github.com/github/spec-kit)</sub>
+
+**Code Coverage**: 78.99% (as of 2026-02-09)
diff --git a/claude.md b/claude.md
index 07d4b325..1a54ee6e 100644
--- a/claude.md
+++ b/claude.md
@@ -43,8 +43,7 @@ deepwork/
 │   │   ├── gemini/
 │   │   └── copilot/
 │   ├── standard_jobs/    # Built-in job definitions (auto-installed)
-│   │   ├── deepwork_jobs/
-│   │   └── deepwork_rules/
+│   │   └── deepwork_jobs/
 │   ├── schemas/          # Job definition schemas
 │   └── utils/            # Utilities (fs, git, yaml, validation)
 ├── library/jobs/         # Reusable example jobs (not auto-installed)
@@ -84,8 +83,7 @@ When running in Claude Code on the web (not local installations), the `deepwork`
 pip install -e .
 
 # Then run commands normally
-deepwork install --platform claude
-deepwork sync
+deepwork install
 ```
 
 **Note**: In web environments, you may also need to install dependencies like `jsonschema`, `pyyaml`, `gitpython`, `jinja2`, and `click` if they're not already available.
@@ -173,6 +171,7 @@ my-project/
 - `doc/architecture.md` - Comprehensive architecture documentation
 - `README.md` - High-level project overview
 - `shell.nix` - Development environment setup
+- `doc/reference/calling_claude_in_print_mode.md` - When invoking Claude Code as a subprocess (e.g., with `--print` or `-p`), read this for correct flag ordering, structured output with JSON schemas, and common gotchas
 
 ## Development Guidelines
 
@@ -196,7 +195,7 @@ my-project/
 
 ### Editing Standard Jobs
 
-**Standard jobs** (like `deepwork_jobs` and `deepwork_rules`) are bundled with DeepWork and installed to user projects. They exist in THREE locations:
+**Standard jobs** (like `deepwork_jobs`) are bundled with DeepWork and installed to user projects. They exist in THREE locations:
 
 1. **Source of truth**: `src/deepwork/standard_jobs/[job_name]/` - The canonical source files
 2. **Installed copy**: `.deepwork/jobs/[job_name]/` - Installed by `deepwork install`
@@ -211,13 +210,13 @@ Instead, follow this workflow:
    - `steps/*.md` - Step instruction files
    - `hooks/*` - Any hook scripts
 
-2. **Run `deepwork install --platform claude`** to sync changes to `.deepwork/jobs/` and `.claude/skills/`
+2. **Run `deepwork install`** to sync changes to `.deepwork/jobs/` and `.claude/skills/`
 
 3. **Verify** the changes propagated correctly to all locations
 
 ### How to Identify Job Types
 
-- **Standard jobs**: Exist in `src/deepwork/standard_jobs/` (currently: `deepwork_jobs`, `deepwork_rules`)
+- **Standard jobs**: Exist in `src/deepwork/standard_jobs/` (currently: `deepwork_jobs`)
 - **Library jobs**: Exist in `library/jobs/`
 - **Bespoke jobs**: Exist ONLY in `.deepwork/jobs/` with no corresponding standard_jobs entry
 
diff --git a/doc/architecture.md b/doc/architecture.md
index f4a2e094..2325cd4e 100644
--- a/doc/architecture.md
+++ b/doc/architecture.md
@@ -18,11 +18,12 @@ DeepWork is a framework for enabling AI agents to perform complex, multi-step wo
 
 ## Architecture Overview
 
-This document is organized into three major sections:
+This document is organized into four major sections:
 
 1. **[DeepWork Tool Architecture](#part-1-deepwork-tool-architecture)** - The DeepWork repository/codebase itself and how it works
 2. **[Target Project Architecture](#part-2-target-project-architecture)** - What a project looks like after DeepWork is installed
 3. **[Runtime Execution Model](#part-3-runtime-execution-model)** - How AI agents execute jobs using the installed skills
+4. **[MCP Server Architecture](#part-4-mcp-server-architecture)** - The MCP server for checkpoint-based workflow execution
 
 ---
 
@@ -40,47 +41,41 @@ deepwork/                       # DeepWork tool repository
 │       │   ├── __init__.py
 │       │   ├── main.py         # CLI entry point
 │       │   ├── install.py      # Install command
-│       │   └── sync.py         # Sync command
+│       │   ├── sync.py         # Sync command
+│       │   └── serve.py        # MCP server command
 │       ├── core/
 │       │   ├── adapters.py     # Agent adapters for AI platforms
 │       │   ├── detector.py     # AI platform detection
 │       │   ├── generator.py    # Command file generation
 │       │   ├── parser.py       # Job definition parsing
 │       │   ├── doc_spec_parser.py   # Doc spec parsing
-│       │   ├── rules_parser.py     # Rule definition parsing
-│       │   ├── pattern_matcher.py  # Variable pattern matching for rules
-│       │   ├── rules_queue.py      # Rule state queue system
-│       │   ├── command_executor.py # Command action execution
 │       │   └── hooks_syncer.py     # Hook syncing to platforms
+│       ├── mcp/                # MCP server module
+│       │   ├── __init__.py
+│       │   ├── server.py       # FastMCP server definition
+│       │   ├── tools.py        # MCP tool implementations
+│       │   ├── state.py        # Workflow session state management
+│       │   ├── schemas.py      # Pydantic models for I/O
+│       │   └── quality_gate.py # Quality gate with review agent
 │       ├── hooks/              # Hook system and cross-platform wrappers
 │       │   ├── __init__.py
 │       │   ├── wrapper.py           # Cross-platform input/output normalization
 │       │   ├── claude_hook.sh       # Shell wrapper for Claude Code
-│       │   ├── gemini_hook.sh       # Shell wrapper for Gemini CLI
-│       │   └── rules_check.py       # Cross-platform rule evaluation hook
+│       │   └── gemini_hook.sh       # Shell wrapper for Gemini CLI
 │       ├── templates/          # Skill templates for each platform
 │       │   ├── claude/
-│       │   │   └── skill-job-step.md.jinja
+│       │   │   └── skill-deepwork.md.jinja  # MCP entry point skill
 │       │   ├── gemini/
 │       │   └── copilot/
 │       ├── standard_jobs/      # Built-in job definitions
-│       │   ├── deepwork_jobs/
-│       │   │   ├── job.yml
-│       │   │   ├── steps/
-│       │   │   └── templates/
-│       │   │       └── doc_spec.md.template
-│       │   └── deepwork_rules/   # Rule management job
+│       │   └── deepwork_jobs/
 │       │       ├── job.yml
 │       │       ├── steps/
-│       │       │   └── define.md
-│       │       └── hooks/         # Hook scripts
-│       │           ├── global_hooks.yml
-│       │           ├── user_prompt_submit.sh
-│       │           └── capture_prompt_work_tree.sh
+│       │       └── templates/
+│       │           └── doc_spec.md.template
 │       ├── schemas/            # Definition schemas
 │       │   ├── job_schema.py
-│       │   ├── doc_spec_schema.py   # Doc spec schema definition
-│       │   └── rules_schema.py
+│       │   └── doc_spec_schema.py   # Doc spec schema definition
 │       └── utils/
 │           ├── fs.py
 │           ├── git.py
@@ -125,11 +120,6 @@ def install(platform: str):
     # Inject core job definitions
     inject_deepwork_jobs(".deepwork/jobs/")
 
-    # Create rules directory with example templates (if not exists)
-    if not exists(".deepwork/rules/"):
-        create_directory(".deepwork/rules/")
-        copy_example_rules(".deepwork/rules/")
-
     # Update config (supports multiple platforms)
     config = load_yaml(".deepwork/config.yml") or {}
     config["version"] = "1.0.0"
@@ -223,52 +213,35 @@ class PlatformDetector:
 
 ### 4. Skill Generator (`generator.py`)
 
-Generates AI-platform-specific skill files from job definitions.
+Generates AI-platform-specific skill files. The generator has been simplified to focus
+on generating only the MCP entry point skill (`/deepwork`), as workflow orchestration
+is now handled by the MCP server rather than individual step skills.
 
-This component is called by the `sync` command to regenerate all skills:
-1. Reads the job definition from `.deepwork/jobs/[job-name]/job.yml`
-2. Loads platform-specific templates
-3. Generates skill files for each step in the job
-4. Writes skills to the AI platform's skills directory
+This component is called by the `sync` command to regenerate the DeepWork skill:
+1. Loads the platform-specific template (`skill-deepwork.md.jinja`)
+2. Generates the `/deepwork` skill file that directs agents to use MCP tools
+3. Writes the skill to the AI platform's skills directory
 
 **Example Generation Flow**:
 ```python
 class SkillGenerator:
-    def generate_all_skills(self, job: JobDefinition,
-                            platform: PlatformConfig,
-                            output_dir: Path) -> list[Path]:
-        """Generate skill files for all steps in a job."""
-        skill_paths = []
-
-        for step_index, step in enumerate(job.steps):
-            # Load step instructions
-            instructions = read_file(job.job_dir / step.instructions_file)
-
-            # Build template context
-            context = {
-                "job_name": job.name,
-                "step_id": step.id,
-                "step_name": step.name,
-                "step_number": step_index + 1,
-                "total_steps": len(job.steps),
-                "instructions_content": instructions,
-                "user_inputs": [inp for inp in step.inputs if inp.is_user_input()],
-                "file_inputs": [inp for inp in step.inputs if inp.is_file_input()],
-                "outputs": step.outputs,
-                "dependencies": step.dependencies,
-                "exposed": step.exposed,
-            }
-
-            # Render template
-            template = env.get_template("skill-job-step.md.jinja")
-            rendered = template.render(**context)
-
-            # Write to platform's skills directory
-            skill_path = output_dir / platform.config_dir / platform.skills_dir / f"{job.name}.{step.id}.md"
-            write_file(skill_path, rendered)
-            skill_paths.append(skill_path)
-
-        return skill_paths
+    def generate_deepwork_skill(self, adapter: AgentAdapter,
+                                output_dir: Path) -> Path:
+        """Generate the global /deepwork skill for MCP entry point."""
+        skills_dir = output_dir / adapter.skills_dir
+        skills_dir.mkdir(parents=True, exist_ok=True)
+
+        # Load and render template
+        env = self._get_jinja_env(adapter)
+        template = env.get_template("skill-deepwork.md.jinja")
+        rendered = template.render()
+
+        # Write skill file
+        skill_path = skills_dir / "deepwork/SKILL.md"
+        skill_path.parent.mkdir(parents=True, exist_ok=True)
+        safe_write(skill_path, rendered)
+
+        return skill_path
 ```
 
 ---
@@ -288,7 +261,6 @@ my-project/                     # User's project (target)
 │       ├── deepwork_jobs.define.md         # Core DeepWork skills
 │       ├── deepwork_jobs.implement.md
 │       ├── deepwork_jobs.refine.md
-│       ├── deepwork_rules.define.md        # Rule management
 │       ├── competitive_research.identify_competitors.md
 │       └── ...
 ├── .deepwork/                  # DeepWork configuration
@@ -296,24 +268,11 @@ my-project/                     # User's project (target)
 │   ├── .gitignore              # Ignores tmp/ directory
 │   ├── doc_specs/                   # Doc specs (document specifications)
 │   │   └── monthly_aws_report.md
-│   ├── rules/                  # Rule definitions (v2 format)
-│   │   ├── source-test-pairing.md
-│   │   ├── format-python.md
-│   │   └── api-docs.md
 │   ├── tmp/                    # Temporary state (gitignored)
-│   │   └── rules/queue/        # Rule evaluation queue
 │   └── jobs/                   # Job definitions
 │       ├── deepwork_jobs/      # Core job for managing jobs
 │       │   ├── job.yml
 │       │   └── steps/
-│       ├── deepwork_rules/     # Rule management job
-│       │   ├── job.yml
-│       │   ├── steps/
-│       │   │   └── define.md
-│       │   └── hooks/          # Hook scripts (installed from standard_jobs)
-│       │       ├── global_hooks.yml
-│       │       ├── user_prompt_submit.sh
-│       │       └── capture_prompt_work_tree.sh
 │       ├── competitive_research/
 │       │   ├── job.yml         # Job metadata
 │       │   └── steps/
@@ -1033,203 +992,6 @@ Github Actions are used for all CI/CD tasks.
 
 ---
 
-## Rules
-
-Rules are automated enforcement mechanisms that trigger based on file changes during an AI agent session. They help ensure that:
-- Documentation stays in sync with code changes
-- Security reviews happen when sensitive code is modified
-- Team guidelines are followed automatically
-- File correspondences are maintained (e.g., source/test pairing)
-
-### Rules System v2 (Frontmatter Markdown)
-
-Rules are defined as individual markdown files in `.deepwork/rules/`:
-
-```
-.deepwork/rules/
-├── source-test-pairing.md
-├── format-python.md
-└── api-docs.md
-```
-
-Each rule file uses YAML frontmatter with a markdown body for instructions:
-
-```markdown
----
-name: Source/Test Pairing
-set:
-  - src/{path}.py
-  - tests/{path}_test.py
-compare_to: base
----
-When source files change, corresponding test files should also change.
-Please create or update tests for the modified source files.
-```
-
-### Detection Modes
-
-Rules support three detection modes:
-
-**1. Trigger/Safety (default)** - Fire when trigger matches but safety doesn't:
-```yaml
----
-name: Update install guide
-trigger: "app/config/**/*"
-safety: "docs/install_guide.md"
-compare_to: base
----
-```
-
-**2. Set (bidirectional)** - Enforce file correspondence in both directions:
-```yaml
----
-name: Source/Test Pairing
-set:
-  - src/{path}.py
-  - tests/{path}_test.py
-compare_to: base
----
-```
-Uses variable patterns like `{path}` (multi-segment) and `{name}` (single-segment) for matching.
-
-**3. Pair (directional)** - Trigger requires corresponding files, but not vice versa:
-```yaml
----
-name: API Documentation
-pair:
-  trigger: src/api/{name}.py
-  expects: docs/api/{name}.md
-compare_to: base
----
-```
-
-### Action Types
-
-**1. Prompt (default)** - Show instructions to the agent:
-```yaml
----
-name: Security Review
-trigger: "src/auth/**/*"
-compare_to: base
----
-Please check for hardcoded credentials and validate input.
-```
-
-**2. Command** - Run an idempotent command:
-```yaml
----
-name: Format Python
-trigger: "**/*.py"
-action:
-  command: "ruff format {file}"
-  run_for: each_match  # or "all_matches"
-compare_to: prompt
----
-```
-
-### Rule Evaluation Flow
-
-1. **Session Start**: When a Claude Code session begins, the baseline git state is captured
-2. **Agent Works**: The AI agent performs tasks, potentially modifying files
-3. **Session Stop**: When the agent finishes (after_agent event):
-   - Changed files are detected based on `compare_to` setting (base, default_tip, or prompt)
-   - Each rule is evaluated based on its detection mode
-   - Queue entries are created in `.deepwork/tmp/rules/queue/` for deduplication
-   - For command actions: commands are executed, results tracked
-   - For prompt actions: if rule fires and not already promised, agent is prompted
-4. **Promise Tags**: Agents can mark rules as addressed by including `<promise>✓ Rule Name</promise>` in their response
-
-### Queue System
-
-Rule state is tracked in `.deepwork/tmp/rules/queue/` with files named `{hash}.{status}.json`:
-- `queued` - Detected, awaiting evaluation
-- `passed` - Rule satisfied (promise found or command succeeded)
-- `failed` - Rule not satisfied
-- `skipped` - Safety pattern matched
-
-This prevents re-prompting for the same rule violation within a session.
-
-### Hook Integration
-
-The v2 rules system uses the cross-platform hook wrapper:
-
-```
-src/deepwork/hooks/
-├── wrapper.py           # Cross-platform input/output normalization
-├── rules_check.py       # Rule evaluation hook (v2)
-├── claude_hook.sh       # Claude Code shell wrapper
-└── gemini_hook.sh       # Gemini CLI shell wrapper
-```
-
-Hooks are called via the shell wrappers:
-```bash
-claude_hook.sh deepwork.hooks.rules_check
-```
-
-The hooks are installed to `.claude/settings.json` during `deepwork sync`:
-
-```json
-{
-  "hooks": {
-    "Stop": [
-      {"matcher": "", "hooks": [{"type": "command", "command": "deepwork hook rules_check"}]}
-    ]
-  }
-}
-```
-
-### Cross-Platform Hook Wrapper System
-
-The `hooks/` module provides a wrapper system that allows writing hooks once in Python and running them on multiple platforms. This normalizes the differences between Claude Code and Gemini CLI hook systems.
-
-**Architecture:**
-```
-┌─────────────────┐     ┌─────────────────┐
-│  Claude Code    │     │   Gemini CLI    │
-│  (Stop event)   │     │ (AfterAgent)    │
-└────────┬────────┘     └────────┬────────┘
-         │                       │
-         ▼                       ▼
-┌─────────────────┐     ┌─────────────────┐
-│ claude_hook.sh  │     │ gemini_hook.sh  │
-│ (shell wrapper) │     │ (shell wrapper) │
-└────────┬────────┘     └────────┬────────┘
-         │                       │
-         └───────────┬───────────┘
-                     ▼
-           ┌─────────────────┐
-           │   wrapper.py    │
-           │ (normalization) │
-           └────────┬────────┘
-                    ▼
-           ┌─────────────────┐
-           │  Python Hook    │
-           │ (common logic)  │
-           └─────────────────┘
-```
-
-**Key normalizations:**
-- Event names: `Stop` ↔ `AfterAgent`, `PreToolUse` ↔ `BeforeTool`, `UserPromptSubmit` ↔ `BeforeAgent`
-- Tool names: `Write` ↔ `write_file`, `Bash` ↔ `shell`, `Read` ↔ `read_file`
-- Decision values: `block` → `deny` for Gemini CLI
-- Environment variables: `CLAUDE_PROJECT_DIR` ↔ `GEMINI_PROJECT_DIR`
-
-**Usage:**
-```python
-from deepwork.hooks.wrapper import HookInput, HookOutput, run_hook, Platform
-
-def my_hook(input: HookInput) -> HookOutput:
-    if input.event == NormalizedEvent.AFTER_AGENT:
-        return HookOutput(decision="block", reason="Complete X first")
-    return HookOutput()
-
-# Called via: claude_hook.sh mymodule or gemini_hook.sh mymodule
-```
-
-See `doc/platforms/` for detailed platform-specific hook documentation.
-
----
-
 ## Doc Specs (Document Specifications)
 
 Doc specs formalize document specifications for job outputs. They enable consistent document structure and automated quality validation.
@@ -1312,38 +1074,6 @@ See `doc/doc-specs.md` for complete documentation.
 
 ---
 
-### Rule Schema
-
-Rules are validated against a JSON Schema:
-
-```yaml
-- name: string          # Required: Friendly name for the rule
-  trigger: string|array # Required: Glob pattern(s) for triggering files
-  safety: string|array  # Optional: Glob pattern(s) for safety files
-  instructions: string  # Required (unless instructions_file): What to do
-  instructions_file: string  # Alternative: Path to instructions file
-```
-
-### Defining Rules
-
-Use the `/deepwork_rules.define` command to interactively create rules:
-
-```
-User: /deepwork_rules.define
-
-Claude: I'll help you define a new rule. What guideline or constraint
-        should this rule enforce?
-
-User: When API code changes, the API documentation should be updated
-
-Claude: Got it. Let me ask a few questions...
-        [Interactive dialog to define trigger, safety, and instructions]
-
-Claude: Created rule "API documentation update" in .deepwork/rules/api-documentation.md
-```
-
----
-
 ## Technical Decisions
 
 ### Language: Python 3.11+
@@ -1384,6 +1114,222 @@ Claude: Created rule "API documentation update" in .deepwork/rules/api-documenta
 
 ---
 
+---
+
+# Part 4: MCP Server Architecture
+
+DeepWork includes an MCP (Model Context Protocol) server that provides an alternative execution model. Instead of relying solely on skill files with embedded instructions, the MCP server guides agents through workflows via checkpoint calls with quality gate enforcement.
+
+## Architecture Overview
+
+```
+┌─────────────────────────────────────────────────────────────┐
+│                   Claude Code / AI Agent                     │
+│  /deepwork skill → instructs to use MCP tools               │
+└─────────────────────────────────────────────────────────────┘
+                              │ MCP Protocol (stdio)
+                              ▼
+┌─────────────────────────────────────────────────────────────┐
+│                   DeepWork MCP Server                        │
+│  Tools: get_workflows | start_workflow | finished_step      │
+│  State: session tracking, step progress, outputs            │
+│  Quality Gate: invokes review agent for validation          │
+└─────────────────────────────────────────────────────────────┘
+                              │
+                              ▼
+┌─────────────────────────────────────────────────────────────┐
+│              .deepwork/jobs/[job_name]/job.yml              │
+└─────────────────────────────────────────────────────────────┘
+```
+
+## MCP Server Components
+
+### Server (`server.py`)
+
+The FastMCP server definition that:
+- Creates and configures the MCP server instance
+- Registers the three workflow tools
+- Provides server instructions for agents
+
+### Tools (`tools.py`)
+
+Implements the three MCP tools:
+
+#### 1. `get_workflows`
+Lists all available workflows from `.deepwork/jobs/`.
+
+**Parameters**: None
+
+**Returns**: List of jobs with their workflows, steps, and summaries
+
+#### 2. `start_workflow`
+Begins a new workflow session.
+
+**Parameters**:
+- `goal: str` - What the user wants to accomplish
+- `job_name: str` - Name of the job
+- `workflow_name: str` - Name of the workflow within the job
+- `instance_id: str | None` - Optional identifier (e.g., "acme", "q1-2026")
+
+**Returns**: Session ID, branch name, first step instructions
+
+#### 3. `finished_step`
+Reports step completion and gets next instructions.
+
+**Parameters**:
+- `outputs: list[str]` - List of output file paths created
+- `notes: str | None` - Optional notes about work done
+
+**Returns**:
+- `status: "needs_work" | "next_step" | "workflow_complete"`
+- If `needs_work`: feedback from quality gate, failed criteria
+- If `next_step`: next step instructions
+- If `workflow_complete`: summary of all outputs
+
+### State Management (`state.py`)
+
+Manages workflow session state persisted to `.deepwork/tmp/session_[id].json`:
+
+```python
+class StateManager:
+    def create_session(...) -> WorkflowSession
+    def load_session(session_id) -> WorkflowSession
+    def start_step(step_id) -> None
+    def complete_step(step_id, outputs, notes) -> None
+    def advance_to_step(step_id, entry_index) -> None
+    def complete_workflow() -> None
+```
+
+Session state includes:
+- Session ID and timestamps
+- Job/workflow/instance identification
+- Current step and entry index
+- Per-step progress (started_at, completed_at, outputs, quality_attempts)
+
+### Quality Gate (`quality_gate.py`)
+
+Evaluates step outputs against quality criteria:
+
+```python
+class QualityGate:
+    def evaluate(
+        quality_criteria: list[str],
+        outputs: list[str],
+        project_root: Path,
+    ) -> QualityGateResult
+```
+
+The quality gate:
+1. Builds a review prompt with criteria and output file contents
+2. Invokes Claude Code via subprocess with proper flag ordering (see `doc/reference/calling_claude_in_print_mode.md`)
+3. Uses `--json-schema` for structured output conformance
+4. Parses the `structured_output` field from the JSON response
+5. Returns pass/fail with per-criterion feedback
+
+### Schemas (`schemas.py`)
+
+Pydantic models for all tool inputs and outputs:
+- `StartWorkflowInput`, `FinishedStepInput`
+- `GetWorkflowsResponse`, `StartWorkflowResponse`, `FinishedStepResponse`
+- `WorkflowSession`, `StepProgress`
+- `QualityGateResult`, `QualityCriteriaResult`
+
+## MCP Server Registration
+
+When `deepwork install` runs, it registers the MCP server in platform settings:
+
+```json
+// .claude/settings.json
+{
+  "mcpServers": {
+    "deepwork": {
+      "command": "deepwork",
+      "args": ["serve", "--path", "."],
+      "transport": "stdio"
+    }
+  }
+}
+```
+
+## The `/deepwork` Skill
+
+A single skill (`.claude/skills/deepwork/SKILL.md`) instructs agents to use MCP tools:
+
+```markdown
+# DeepWork Workflow Manager
+
+Execute multi-step workflows with quality gate checkpoints.
+
+## Quick Start
+1. Discover workflows: Call `get_workflows`
+2. Start a workflow: Call `start_workflow` with your goal
+3. Execute steps: Follow the instructions returned
+4. Checkpoint: Call `finished_step` with your outputs
+5. Iterate or continue: Handle needs_work, next_step, or workflow_complete
+```
+
+## MCP Execution Flow
+
+1. **User invokes `/deepwork`**
+   - Agent calls `get_workflows` to discover available workflows
+   - Parses user intent to identify target workflow
+
+2. **Agent calls `start_workflow`**
+   - MCP server creates session, generates branch name
+   - Returns first step instructions and expected outputs
+
+3. **Agent executes step**
+   - Follows step instructions
+   - Creates output files
+
+4. **Agent calls `finished_step`**
+   - MCP server evaluates outputs against quality criteria (if configured)
+   - If `needs_work`: returns feedback for agent to fix issues
+   - If `next_step`: returns next step instructions
+   - If `workflow_complete`: workflow finished
+
+5. **Loop continues until workflow complete**
+
+## Quality Gate
+
+Quality gate is enabled by default and uses Claude Code to evaluate step outputs
+against quality criteria. The command is constructed internally with proper flag
+ordering (see `doc/reference/calling_claude_in_print_mode.md`).
+
+To disable quality gate:
+
+```bash
+deepwork serve --no-quality-gate
+```
+
+## Serve Command
+
+Start the MCP server manually:
+
+```bash
+# Basic usage (quality gate enabled by default)
+deepwork serve
+
+# With quality gate disabled
+deepwork serve --no-quality-gate
+
+# For a specific project
+deepwork serve --path /path/to/project
+
+# SSE transport (for remote)
+deepwork serve --transport sse --port 8000
+```
+
+## Benefits of MCP Approach
+
+1. **Centralized state**: Session state persisted and visible in `.deepwork/tmp/`
+2. **Quality gates**: Automated validation before proceeding
+3. **Structured checkpoints**: Clear handoff points between steps
+4. **Resumability**: Sessions can be loaded and resumed
+5. **Observability**: All state changes logged and inspectable
+
+---
+
 ## References
 
 - [Spec-Kit Repository](https://github.com/github/spec-kit)
@@ -1392,4 +1338,6 @@ Claude: Created rule "API documentation update" in .deepwork/rules/api-documenta
 - [Git Workflows](https://www.atlassian.com/git/tutorials/comparing-workflows)
 - [JSON Schema](https://json-schema.org/)
 - [Jinja2 Documentation](https://jinja.palletsprojects.com/)
+- [Model Context Protocol](https://modelcontextprotocol.io/)
+- [FastMCP Documentation](https://github.com/jlowin/fastmcp)
 
diff --git a/doc/mcp_interface.md b/doc/mcp_interface.md
new file mode 100644
index 00000000..2cc5014b
--- /dev/null
+++ b/doc/mcp_interface.md
@@ -0,0 +1,338 @@
+# DeepWork MCP Interface Documentation
+
+This document describes the Model Context Protocol (MCP) tools exposed by the DeepWork server. AI agents use these tools to discover and execute multi-step workflows.
+
+## Server Information
+
+- **Server Name**: `deepwork`
+- **Transport**: stdio (default) or SSE
+- **Starting the server**: `deepwork serve --path /path/to/project`
+
+## Tools
+
+DeepWork exposes four MCP tools:
+
+### 1. `get_workflows`
+
+List all available DeepWork workflows. Call this first to discover available workflows.
+
+#### Parameters
+
+None.
+
+#### Returns
+
+```typescript
+{
+  jobs: JobInfo[]
+}
+```
+
+Where `JobInfo` is:
+
+```typescript
+interface JobInfo {
+  name: string;              // Job identifier
+  summary: string;           // Short summary of the job
+  description: string | null; // Full description (optional)
+  workflows: WorkflowInfo[];  // Named workflows in the job
+}
+
+interface WorkflowInfo {
+  name: string;              // Workflow identifier
+  summary: string;           // Short description
+}
+```
+
+---
+
+### 2. `start_workflow`
+
+Start a new workflow session. Creates a git branch, initializes state tracking, and returns the first step's instructions. Supports nested workflows — starting a workflow while one is active pushes onto a stack.
+
+#### Parameters
+
+| Parameter | Type | Required | Description |
+|-----------|------|----------|-------------|
+| `goal` | `string` | Yes | What the user wants to accomplish |
+| `job_name` | `string` | Yes | Name of the job |
+| `workflow_name` | `string` | Yes | Name of the workflow within the job. If the name doesn't match but the job has only one workflow, that workflow is selected automatically. If the job has multiple workflows, an error is returned listing the available workflow names. |
+| `instance_id` | `string \| null` | No | Optional identifier for naming (e.g., 'acme', 'q1-2026') |
+
+#### Returns
+
+```typescript
+{
+  begin_step: ActiveStepInfo; // Information about the first step to begin
+  stack: StackEntry[];        // Current workflow stack after starting
+}
+```
+
+---
+
+### 3. `finished_step`
+
+Report that you've finished a workflow step. Validates outputs against quality criteria (if configured), then returns the next action.
+
+#### Parameters
+
+| Parameter | Type | Required | Description |
+|-----------|------|----------|-------------|
+| `outputs` | `Record<string, string \| string[]>` | Yes | Map of output names to file path(s). For outputs declared as type `file`: pass a single string path (e.g. `"report.md"`). For outputs declared as type `files`: pass a list of string paths (e.g. `["a.md", "b.md"]`). Outputs with `required: false` can be omitted. Check `step_expected_outputs` to see each output's declared type and required status. |
+| `notes` | `string \| null` | No | Optional notes about work done |
+| `quality_review_override_reason` | `string \| null` | No | If provided, skips quality review (must explain why) |
+| `session_id` | `string \| null` | No | Target a specific workflow session by ID. Use when multiple workflows are active concurrently. If omitted, operates on the top-of-stack session. The session_id is returned in `ActiveStepInfo` from `start_workflow` and `finished_step`. |
+
+#### Returns
+
+The response varies based on the `status` field:
+
+```typescript
+{
+  status: "needs_work" | "next_step" | "workflow_complete";
+
+  // For status = "needs_work"
+  feedback?: string;                    // Combined feedback from failed reviews
+  failed_reviews?: ReviewResult[];      // Failed review results
+
+  // For status = "next_step"
+  begin_step?: ActiveStepInfo;         // Information about the next step to begin
+
+  // For status = "workflow_complete"
+  summary?: string;                    // Summary of completed workflow
+  all_outputs?: Record<string, string | string[]>; // All outputs from all steps
+
+  // Always included
+  stack: StackEntry[];                 // Current workflow stack after this operation
+}
+```
+
+---
+
+### 4. `abort_workflow`
+
+Abort the current workflow and return to the parent workflow (if nested). Use this when a workflow cannot be completed.
+
+#### Parameters
+
+| Parameter | Type | Required | Description |
+|-----------|------|----------|-------------|
+| `explanation` | `string` | Yes | Why the workflow is being aborted |
+| `session_id` | `string \| null` | No | Target a specific workflow session by ID. Use when multiple workflows are active concurrently. If omitted, aborts the top-of-stack session. |
+
+#### Returns
+
+```typescript
+{
+  aborted_workflow: string;           // The workflow that was aborted (job_name/workflow_name)
+  aborted_step: string;               // The step that was active when aborted
+  explanation: string;                // The explanation provided
+  stack: StackEntry[];                // Current workflow stack after abort
+  resumed_workflow?: string | null;   // The workflow now active (if any)
+  resumed_step?: string | null;       // The step now active (if any)
+}
+```
+
+---
+
+## Shared Types
+
+```typescript
+interface ExpectedOutput {
+  name: string;                    // Output name (use as key in finished_step outputs)
+  type: string;                    // "file" or "files"
+  description: string;             // What this output should contain
+  required: boolean;               // If false, this output can be omitted from finished_step
+  syntax_for_finished_step_tool: string; // Value format hint:
+                                         //   "filepath" for type "file"
+                                         //   "array of filepaths for all individual files" for type "files"
+}
+
+interface ActiveStepInfo {
+  session_id: string;              // Unique session identifier
+  branch_name: string;             // Git branch for this workflow instance
+  step_id: string;                 // ID of the current step
+  step_expected_outputs: ExpectedOutput[]; // Expected outputs with type and format hints
+  step_reviews: ReviewInfo[];      // Reviews to run when step completes
+  step_instructions: string;       // Instructions for the step
+}
+
+interface ReviewInfo {
+  run_each: string;                // 'step' or output name to review
+  quality_criteria: Record<string, string>; // Map of criterion name to question
+}
+
+interface ReviewResult {
+  review_run_each: string;         // 'step' or output name that was reviewed
+  target_file: string | null;      // Specific file reviewed (for per-file reviews)
+  passed: boolean;                 // Whether this review passed
+  feedback: string;                // Summary feedback
+  criteria_results: QualityCriteriaResult[];
+}
+
+interface QualityCriteriaResult {
+  criterion: string;               // The quality criterion name
+  passed: boolean;                 // Whether this criterion passed
+  feedback: string | null;         // Feedback if failed
+}
+
+interface StackEntry {
+  workflow: string;                // Workflow identifier (job_name/workflow_name)
+  step: string;                    // Current step ID in this workflow
+}
+```
+
+---
+
+## Status Values
+
+The `finished_step` tool returns one of three statuses:
+
+| Status | Meaning | Next Action |
+|--------|---------|-------------|
+| `needs_work` | Quality criteria not met | Fix issues based on feedback, call `finished_step` again |
+| `next_step` | Step complete, more steps remain | Execute instructions in response, call `finished_step` when done |
+| `workflow_complete` | All steps complete | Workflow is finished |
+
+---
+
+## Workflow Usage Pattern
+
+```
+1. get_workflows()
+   |
+   Discover available jobs and workflows
+   |
+2. start_workflow(goal, job_name, workflow_name)
+   |
+   Get session_id, branch_name, first step instructions
+   |
+3. Execute step instructions, create outputs
+   |
+4. finished_step(outputs)
+   |
+   +-- status = "needs_work" -> Fix issues, goto 4
+   +-- status = "next_step"  -> Execute new instructions, goto 4
+   +-- status = "workflow_complete" -> Done!
+```
+
+---
+
+## Quality Gates
+
+Steps may define quality reviews that outputs must pass. When `finished_step` is called:
+
+1. If the step has reviews and a quality gate is configured, outputs are evaluated
+2. **Input files from prior steps are included** alongside outputs in the review payload, giving the reviewer full context to evaluate whether outputs are consistent with their inputs
+3. If any review fails, `status = "needs_work"` with feedback
+4. If all reviews pass (or no reviews defined), workflow advances
+5. After 3 failed attempts (configurable), the quality gate raises an error
+
+### Review Payload Structure
+
+The quality gate builds a prompt for the review agent with clearly separated sections:
+
+```
+==================== BEGIN INPUTS ====================
+(contents of input files from prior steps)
+==================== END INPUTS ====================
+
+==================== BEGIN OUTPUTS ====================
+(contents of output files from current step)
+==================== END OUTPUTS ====================
+```
+
+- **Inputs** are resolved automatically from prior step outputs recorded in the session state. If a step declares `file` inputs with `from_step` references, the quality gate looks up the actual file paths from the referenced step's completed outputs.
+- **The inputs section is omitted** if the step has no file inputs from prior steps.
+- **Binary files** (e.g., PDFs) that cannot be decoded as UTF-8 are not embedded in the payload. Instead, a placeholder is included: `[Binary file — not included in review. Read from: /absolute/path/to/file]`
+
+### Review Types
+
+Reviews are defined per-step in the job.yml:
+
+```yaml
+reviews:
+  - run_each: step                    # Review all outputs together
+    quality_criteria:
+      "Criterion Name": "Question to evaluate"
+  - run_each: output_name             # Review a specific output
+    quality_criteria:
+      "Criterion Name": "Question to evaluate"
+```
+
+- `run_each: step` — Review runs once with ALL output files
+- `run_each: <output_name>` where output is `type: file` — Review runs once with that specific file
+- `run_each: <output_name>` where output is `type: files` — Review runs once per file in the list
+
+To skip quality review (use sparingly):
+- Provide `quality_review_override_reason` explaining why review is unnecessary
+
+---
+
+## Nested Workflows
+
+Workflows can be nested — starting a new workflow while one is active pushes onto a stack:
+
+- All tool responses include a `stack` field showing the current workflow stack
+- Each stack entry shows `{workflow: "job/workflow", step: "current_step"}`
+- When a workflow completes, it pops from the stack and resumes the parent
+- Use `abort_workflow` to cancel the current workflow and return to parent
+
+---
+
+## Configuration
+
+The MCP server is configured via `.deepwork/config.yml`:
+
+```yaml
+version: "1.0"
+platforms:
+  - claude
+```
+
+Quality gate is enabled by default and uses Claude Code to evaluate step outputs
+against quality criteria. See `doc/reference/calling_claude_in_print_mode.md` for
+details on how Claude CLI is invoked.
+
+---
+
+## Server CLI Options
+
+```bash
+deepwork serve [OPTIONS]
+
+Options:
+  --path PATH        Project root directory (default: current directory)
+  --no-quality-gate  Disable quality gate evaluation
+  --transport TYPE   Transport type: stdio or sse (default: stdio)
+  --port PORT        Port for SSE transport (default: 8000)
+```
+
+---
+
+## Example MCP Configuration
+
+Add to your `.mcp.json`:
+
+```json
+{
+  "mcpServers": {
+    "deepwork": {
+      "command": "deepwork",
+      "args": ["serve", "--path", "."]
+    }
+  }
+}
+```
+
+---
+
+## Changelog
+
+| Version | Changes |
+|---------|---------|
+| 1.4.0 | Added optional `session_id` parameter to `finished_step` and `abort_workflow` for concurrent workflow safety. When multiple workflows are active on the stack, callers can pass the `session_id` (returned in `ActiveStepInfo`) to target the correct session. Fully backward compatible — omitting `session_id` preserves existing top-of-stack behavior. |
+| 1.3.0 | `step_expected_outputs` changed from `string[]` to `ExpectedOutput[]` — each entry includes `name`, `type`, `description`, and `syntax_for_finished_step_tool` so agents know exactly what format to use when calling `finished_step`. |
+| 1.2.0 | Quality gate now includes input files from prior steps in review payload with BEGIN INPUTS/END INPUTS and BEGIN OUTPUTS/END OUTPUTS section headers. Binary files (PDFs, etc.) get a placeholder instead of raw content. |
+| 1.1.0 | Added `abort_workflow` tool, `stack` field in all responses, `ReviewInfo`/`ReviewResult` types, typed outputs as `Record<string, string \| string[]>` |
+| 1.0.0 | Initial MCP interface with `get_workflows`, `start_workflow`, `finished_step` |
diff --git a/doc/nix-flake.md b/doc/nix-flake.md
index 26bf82ec..246d96ad 100644
--- a/doc/nix-flake.md
+++ b/doc/nix-flake.md
@@ -91,7 +91,7 @@ deepwork --help
 nix run github:Unsupervisedcom/deepwork -- --help
 
 # Run a specific command
-nix run github:Unsupervisedcom/deepwork -- install --platform claude
+nix run github:Unsupervisedcom/deepwork -- install
 ```
 
 ### Building the Package
diff --git a/doc/reference/calling_claude_in_print_mode.md b/doc/reference/calling_claude_in_print_mode.md
new file mode 100644
index 00000000..30f583ad
--- /dev/null
+++ b/doc/reference/calling_claude_in_print_mode.md
@@ -0,0 +1,104 @@
+# Calling Claude Code in Print Mode
+
+This document covers how to invoke Claude Code as a subprocess using the `--print` flag for non-interactive, programmatic usage.
+
+## Basic Usage
+
+The `--print` (or `-p`) flag runs Claude Code in non-interactive mode, suitable for scripting and subprocess invocation.
+
+### Piping Input
+
+When piping a prompt via stdin, use `-p --` to separate flags from the piped content:
+
+```bash
+echo "your prompt here" | claude -p --
+```
+
+**Important**: The `--` is required because `-p` expects a prompt argument immediately after it. Without `--`, the next argument is interpreted as the prompt itself.
+
+### Flag Ordering
+
+Flags must come **before** `-p --`. Anything after `--` is treated as part of the prompt:
+
+```bash
+# Correct - flags before -p --
+echo "say hello" | claude --max-turns 3 -p --
+
+# Wrong - flags after -- become part of the prompt
+echo "say hello" | claude -p -- --max-turns 3
+```
+
+## Structured Output with JSON Schema
+
+Claude Code supports structured output via the `--json-schema` flag. This constrains the model's response to conform to a specified JSON schema.
+
+### Requirements
+
+To get structured JSON output, you need **all three** flags:
+- `--print` - Non-interactive mode
+- `--output-format json` - JSON output format
+- `--json-schema '<schema>'` - The JSON schema as a **string** (not a filename)
+
+### Example
+
+```bash
+echo "say hello" | claude --print --output-format json --json-schema '{"type":"object","properties":{"greeting":{"type":"string"}},"required":["greeting"]}'
+```
+
+### Output Format
+
+The output is a JSON object with metadata about the run. The structured output conforming to your schema is in the `structured_output` field:
+
+```json
+{
+  "type": "result",
+  "subtype": "success",
+  "is_error": false,
+  "duration_ms": 4557,
+  "num_turns": 2,
+  "result": "",
+  "session_id": "ca428892-a13e-4c4c-85df-b29f8ec851a0",
+  "total_cost_usd": 0.063,
+  "structured_output": {
+    "greeting": "Hello! How can I help you today?"
+  }
+}
+```
+
+### Key Insight
+
+The model automatically conforms to the schema **without being told about it in the prompt**. You don't need to instruct the model to output JSON or describe the expected format - the `--json-schema` flag handles this behind the scenes.
+
+## Common Flags for Print Mode
+
+| Flag | Description |
+|------|-------------|
+| `--print` / `-p` | Non-interactive mode |
+| `--output-format <format>` | Output format: `text` (default), `json`, or `stream-json` |
+| `--json-schema <schema>` | JSON schema string for structured output validation |
+| `--max-turns <n>` | Maximum number of agentic turns |
+| `--input-format <format>` | Input format: `text` (default) or `stream-json` |
+| `--include-partial-messages` | Include partial message chunks (with `stream-json`) |
+
+## Gotchas
+
+1. **`--json-schema` takes a string, not a filename** - Pass the actual JSON schema content, not a path to a file.
+
+2. **`--output-format json` only works with `--print`** - These flags are designed for non-interactive/programmatic use.
+
+3. **Max turns matters** - If you set `--max-turns 1` and the model needs to use tools, it may hit the limit before producing output. Use a reasonable number of turns.
+
+4. **The `--` separator is critical** - When piping input with `-p`, always use `--` to mark the end of flags.
+
+## Full Example
+
+```bash
+# Define a schema for listing files
+SCHEMA='{"type":"object","properties":{"files":{"type":"array","items":{"type":"string"}},"count":{"type":"integer"}},"required":["files","count"]}'
+
+# Run with structured output
+echo "List Python files in src/" | claude --print --output-format json --json-schema "$SCHEMA" --max-turns 5
+
+# Parse the structured_output field with jq
+echo "List Python files in src/" | claude --print --output-format json --json-schema "$SCHEMA" --max-turns 5 | jq '.structured_output'
+```
diff --git a/doc/rules_syntax.md b/doc/rules_syntax.md
deleted file mode 100644
index 2ab86be1..00000000
--- a/doc/rules_syntax.md
+++ /dev/null
@@ -1,687 +0,0 @@
-# Rules Configuration Syntax
-
-This document describes the syntax for rule files in the `.deepwork/rules/` directory.
-
-## Directory Structure
-
-Rules are stored as individual markdown files with YAML frontmatter:
-
-```
-.deepwork/
-└── rules/
-    ├── readme-accuracy.md
-    ├── source-test-pairing.md
-    ├── api-documentation.md
-    └── python-formatting.md
-```
-
-Each file has:
-- **Frontmatter**: YAML configuration between `---` delimiters
-- **Body**: Instructions (for prompt actions) or description (for command actions)
-
-This structure enables code files to reference rules:
-```python
-# Read the rule `.deepwork/rules/source-test-pairing.md` before editing
-class AuthService:
-    ...
-```
-
-## Quick Reference
-
-### Simple Trigger with Prompt
-
-`.deepwork/rules/readme-accuracy.md`:
-```markdown
----
-name: README Accuracy
-trigger: src/**/*
-safety: README.md
-compare_to: base
----
-Source code changed. Please verify README.md is accurate.
-
-Check that:
-- All public APIs are documented
-- Examples are up to date
-- Installation instructions are correct
-```
-
-### Correspondence Set (bidirectional)
-
-`.deepwork/rules/source-test-pairing.md`:
-```markdown
----
-name: Source/Test Pairing
-set:
-  - src/{path}.py
-  - tests/{path}_test.py
-compare_to: base
----
-Source and test files should change together.
-
-When modifying source code, ensure corresponding tests are updated.
-When adding tests, ensure they test actual source code.
-```
-
-### Correspondence Pair (directional)
-
-`.deepwork/rules/api-documentation.md`:
-```markdown
----
-name: API Documentation
-pair:
-  trigger: api/{path}.py
-  expects: docs/api/{path}.md
-compare_to: base
----
-API changes require documentation updates.
-
-When modifying an API endpoint, update its documentation to reflect:
-- Parameter changes
-- Response format changes
-- New error conditions
-```
-
-### Command Action
-
-`.deepwork/rules/python-formatting.md`:
-```markdown
----
-name: Python Formatting
-trigger: "**/*.py"
-action:
-  command: ruff format {file}
-compare_to: prompt
----
-Automatically formats Python files using ruff.
-
-This rule runs `ruff format` on any changed Python files to ensure
-consistent code style across the codebase.
-```
-
-### Created Mode (file creation trigger)
-
-`.deepwork/rules/new-module-docs.md`:
-```markdown
----
-name: New Module Documentation
-created: src/**/*.py
----
-A new Python module was created. Please ensure:
-
-- Add module docstring explaining the purpose
-- Update relevant documentation if adding a public API
-- Consider adding tests for the new module
-```
-
-## Rule Structure
-
-Every rule has two orthogonal aspects:
-
-### Detection Mode
-
-How the rule decides when to fire:
-
-| Mode | Field | Description |
-|------|-------|-------------|
-| **Trigger/Safety** | `trigger`, `safety` | Fire when trigger matches and safety doesn't |
-| **Set** | `set` | Fire when file correspondence is incomplete (bidirectional) |
-| **Pair** | `pair` | Fire when file correspondence is incomplete (directional) |
-| **Created** | `created` | Fire when newly created files match patterns |
-
-### Action Type
-
-What happens when the rule fires:
-
-| Type | Field | Description |
-|------|-------|-------------|
-| **Prompt** (default) | (markdown body) | Show instructions to the agent |
-| **Command** | `action.command` | Run an idempotent command |
-
-## Detection Modes
-
-### Trigger/Safety Mode
-
-The simplest detection mode. Fires when changed files match `trigger` patterns and no changed files match `safety` patterns.
-
-```yaml
----
-name: Security Review
-trigger:
-  - src/auth/**/*
-  - src/crypto/**/*
-safety: SECURITY.md
-compare_to: base
----
-```
-
-### Set Mode (Bidirectional Correspondence)
-
-Defines files that should change together. If ANY file in a correspondence group changes, ALL related files should also change.
-
-```yaml
----
-name: Source/Test Pairing
-set:
-  - src/{path}.py
-  - tests/{path}_test.py
-compare_to: base
----
-```
-
-**How it works:**
-
-1. A file changes that matches one pattern in the set
-2. System extracts the variable portions (e.g., `{path}`)
-3. System generates expected files by substituting into other patterns
-4. If ALL expected files also changed: rule is satisfied (no trigger)
-5. If ANY expected file is missing: rule fires
-
-If `src/auth/login.py` changes:
-- Extracts `{path}` = `auth/login`
-- Expects `tests/auth/login_test.py` to also change
-- If test didn't change, fires with instructions
-
-If `tests/auth/login_test.py` changes:
-- Extracts `{path}` = `auth/login`
-- Expects `src/auth/login.py` to also change
-- If source didn't change, fires with instructions
-
-### Pair Mode (Directional Correspondence)
-
-Defines directional relationships. Changes to trigger files require corresponding expected files to change, but not vice versa.
-
-```yaml
----
-name: API Documentation
-pair:
-  trigger: api/{module}/{name}.py
-  expects: docs/api/{module}/{name}.md
-compare_to: base
----
-```
-
-Can specify multiple expected patterns:
-
-```yaml
----
-name: API Documentation
-pair:
-  trigger: api/{path}.py
-  expects:
-    - docs/api/{path}.md
-    - schemas/{path}.json
-compare_to: base
----
-```
-
-If `api/users/create.py` changes:
-- Expects `docs/api/users/create.md` to also change
-- If doc didn't change, fires with instructions
-
-If `docs/api/users/create.md` changes alone:
-- No trigger (documentation can be updated independently)
-
-### Created Mode (File Creation Detection)
-
-Fires only when files are newly created (not modified). Useful for enforcing standards on new files.
-
-```yaml
----
-name: New Component Documentation
-created:
-  - src/components/**/*.tsx
-  - src/components/**/*.ts
----
-```
-
-**How it works:**
-
-1. A file is created that matches a `created` pattern
-2. Rule fires with instructions
-
-Key differences from Trigger/Safety mode:
-- Only fires for **new** files, not modifications to existing files
-- No safety patterns (use Trigger/Safety mode if you need safety)
-- Good for enforcing documentation, tests, or standards on new code
-
-**Examples:**
-
-```yaml
-# Single pattern
-created: src/api/**/*.py
-
-# Multiple patterns
-created:
-  - src/models/**/*.py
-  - src/services/**/*.py
-```
-
-If a new file `src/api/users.py` is created:
-- Rule fires with instructions for new API modules
-
-If an existing file `src/api/users.py` is modified:
-- Rule does NOT fire (file already existed)
-
-## Action Types
-
-### Prompt Action (Default)
-
-The markdown body after frontmatter serves as instructions shown to the agent. This is the default when no `action` field is specified.
-
-**Template Variables in Instructions:**
-
-| Variable | Description |
-|----------|-------------|
-| `{trigger_file}` | The file that triggered the rule |
-| `{trigger_files}` | All files that matched trigger patterns |
-| `{expected_files}` | Expected corresponding files (for sets/pairs) |
-
-### Command Action
-
-Runs an idempotent command instead of prompting the agent.
-
-```yaml
----
-name: Python Formatting
-trigger: "**/*.py"
-safety: "*.pyi"
-action:
-  command: ruff format {file}
-  run_for: each_match
-compare_to: prompt
----
-```
-
-**Template Variables in Commands:**
-
-| Variable | Description | Available When |
-|----------|-------------|----------------|
-| `{file}` | Single file path | `run_for: each_match` |
-| `{files}` | Space-separated file paths | `run_for: all_matches` |
-| `{repo_root}` | Repository root directory | Always |
-
-**Idempotency Requirement:**
-
-Commands should be idempotent--running them multiple times produces the same result. Lint formatters like `black`, `ruff format`, and `prettier` are good examples: they produce consistent output regardless of how many times they run.
-
-## Pattern Syntax
-
-### Basic Glob Patterns
-
-Standard glob patterns work in `trigger` and `safety` fields:
-
-| Pattern | Matches |
-|---------|---------|
-| `*.py` | Python files in current directory |
-| `**/*.py` | Python files in any directory |
-| `src/**/*` | All files under src/ |
-| `test_*.py` | Files starting with `test_` |
-| `*.{js,ts}` | JavaScript and TypeScript files |
-
-### Variable Patterns
-
-Variable patterns use `{name}` syntax to capture path segments:
-
-| Pattern | Captures | Example Match |
-|---------|----------|---------------|
-| `src/{path}.py` | `{path}` = multi-segment path | `src/foo/bar.py` -> `path=foo/bar` |
-| `src/{name}.py` | `{name}` = single segment | `src/utils.py` -> `name=utils` |
-| `{module}/{name}.py` | Both variables | `auth/login.py` -> `module=auth, name=login` |
-
-**Variable Naming Conventions:**
-
-- `{path}` - Conventional name for multi-segment captures (`**/*`)
-- `{name}` - Conventional name for single-segment captures (`*`)
-- Custom names allowed: `{module}`, `{component}`, etc.
-
-**Multi-Segment vs Single-Segment:**
-
-By default, `{path}` matches multiple path segments and `{name}` matches one:
-
-```yaml
-# {path} matches: foo, foo/bar, foo/bar/baz
-- "src/{path}.py"  # src/foo.py, src/foo/bar.py, src/a/b/c.py
-
-# {name} matches only single segment
-- "src/{name}.py"  # src/foo.py (NOT src/foo/bar.py)
-```
-
-To explicitly control this, use `{**name}` for multi-segment or `{*name}` for single:
-
-```yaml
-- "src/{**module}/index.py"   # src/foo/bar/index.py -> module=foo/bar
-- "src/{*component}.py"       # src/Button.py -> component=Button
-```
-
-## Field Reference
-
-### name (required)
-
-Human-friendly name for the rule. Displayed in promise tags and output.
-
-```yaml
----
-name: Source/Test Pairing
----
-```
-
-### File Naming
-
-Rule files are named using kebab-case with `.md` extension:
-- `readme-accuracy.md`
-- `source-test-pairing.md`
-- `api-documentation.md`
-
-The filename serves as the rule's identifier in the queue system.
-
-### trigger
-
-File patterns that cause the rule to fire (trigger/safety mode). Can be string or array.
-
-```yaml
----
-trigger: src/**/*.py
----
-
----
-trigger:
-  - src/**/*.py
-  - lib/**/*.py
----
-```
-
-### safety (optional)
-
-File patterns that suppress the rule. If ANY changed file matches a safety pattern, the rule does not fire.
-
-```yaml
----
-safety: CHANGELOG.md
----
-
----
-safety:
-  - CHANGELOG.md
-  - docs/**/*
----
-```
-
-### set
-
-List of patterns defining bidirectional file relationships (set mode).
-
-```yaml
----
-set:
-  - src/{path}.py
-  - tests/{path}_test.py
----
-```
-
-### pair
-
-Object with `trigger` and `expects` patterns for directional relationships (pair mode).
-
-```yaml
----
-pair:
-  trigger: api/{path}.py
-  expects: docs/api/{path}.md
----
-
----
-pair:
-  trigger: api/{path}.py
-  expects:
-    - docs/api/{path}.md
-    - schemas/{path}.json
----
-```
-
-### created
-
-File patterns that trigger when files are newly created (created mode). Only fires for new files, not modifications. Can be string or array.
-
-```yaml
----
-created: src/**/*.py
----
-
----
-created:
-  - src/**/*.py
-  - lib/**/*.py
----
-```
-
-### action (optional)
-
-Specifies a command to run instead of prompting.
-
-```yaml
----
-action:
-  command: ruff format {file}
-  run_for: each_match  # or all_matches
----
-```
-
-### compare_to (required)
-
-Determines the baseline for detecting file changes.
-
-| Value | Description |
-|-------|-------------|
-| `base` | Compare to merge-base with default branch |
-| `default_tip` | Compare to current tip of default branch |
-| `prompt` | Compare to state at last prompt submission |
-
-```yaml
----
-compare_to: base
----
-```
-
-## Complete Examples
-
-### Example 1: Test Coverage Rule
-
-`.deepwork/rules/test-coverage.md`:
-```markdown
----
-name: Test Coverage
-set:
-  - src/{path}.py
-  - tests/{path}_test.py
-compare_to: base
----
-Source code was modified without corresponding test updates.
-
-Modified source: {trigger_file}
-Expected test: {expected_files}
-
-Please either:
-1. Add/update tests for the changed code
-2. Explain why tests are not needed
-```
-
-### Example 2: Documentation Sync
-
-`.deepwork/rules/api-documentation-sync.md`:
-```markdown
----
-name: API Documentation Sync
-pair:
-  trigger: src/api/{module}/{endpoint}.py
-  expects:
-    - docs/api/{module}/{endpoint}.md
-    - openapi/{module}.yaml
-compare_to: base
----
-API endpoint changed. Please update:
-- Documentation: {expected_files}
-- Ensure OpenAPI spec is current
-```
-
-### Example 3: Auto-formatting Pipeline
-
-`.deepwork/rules/python-black-formatting.md`:
-```markdown
----
-name: Python Black Formatting
-trigger: "**/*.py"
-safety:
-  - "**/*.pyi"
-  - "**/migrations/**"
-action:
-  command: black {file}
-  run_for: each_match
-compare_to: prompt
----
-Formats Python files using Black.
-
-Excludes:
-- Type stub files (*.pyi)
-- Database migration files
-```
-
-### Example 4: Multi-file Correspondence
-
-`.deepwork/rules/full-stack-feature-sync.md`:
-```markdown
----
-name: Full Stack Feature Sync
-set:
-  - backend/api/{feature}/routes.py
-  - backend/api/{feature}/models.py
-  - frontend/src/api/{feature}.ts
-  - frontend/src/components/{feature}/**/*
-compare_to: base
----
-Feature files should be updated together across the stack.
-
-When modifying a feature, ensure:
-- Backend routes are updated
-- Backend models are updated
-- Frontend API client is updated
-- Frontend components are updated
-```
-
-### Example 5: Conditional Safety
-
-`.deepwork/rules/version-bump-required.md`:
-```markdown
----
-name: Version Bump Required
-trigger:
-  - src/**/*.py
-  - pyproject.toml
-safety:
-  - pyproject.toml
-  - CHANGELOG.md
-compare_to: base
----
-Code changes detected. Before merging, ensure:
-- Version is bumped in pyproject.toml (if needed)
-- CHANGELOG.md is updated
-
-This rule is suppressed if you've already modified pyproject.toml
-or CHANGELOG.md, as that indicates you're handling versioning.
-```
-
-### Example 6: New File Standards (Created Mode)
-
-`.deepwork/rules/new-module-standards.md`:
-```markdown
----
-name: New Module Standards
-created:
-  - src/**/*.py
-  - lib/**/*.py
----
-A new Python module was created. Please ensure it follows our standards:
-
-1. **Module docstring**: Add a docstring at the top explaining the module's purpose
-2. **Type hints**: Use type hints for all function parameters and return values
-3. **Tests**: Create a corresponding test file in tests/
-4. **Imports**: Follow the import order (stdlib, third-party, local)
-
-This rule only fires for newly created files, not modifications.
-```
-
-### Example 7: New Component Checklist (Created Mode with Command)
-
-`.deepwork/rules/new-component-lint.md`:
-```markdown
----
-name: New Component Lint
-created: src/components/**/*.tsx
-action:
-  command: eslint --fix {file}
----
-Automatically lints newly created React components.
-```
-
-## Promise Tags
-
-When a rule fires but should be dismissed, use promise tags in the conversation. The tag content should be human-readable, using the rule's `name` field:
-
-```
-<promise>Source/Test Pairing</promise>
-<promise>API Documentation Sync</promise>
-```
-
-The friendly name makes promise tags easy to read when displayed in the conversation. The system matches promise tags to rules using case-insensitive comparison of the `name` field.
-
-## Validation
-
-Rule files are validated on load. Common errors:
-
-**Invalid frontmatter:**
-```
-Error: .deepwork/rules/my-rule.md - invalid YAML frontmatter
-```
-
-**Missing required field:**
-```
-Error: .deepwork/rules/my-rule.md - must have 'trigger', 'set', 'pair', or 'created'
-```
-
-**Invalid pattern:**
-```
-Error: .deepwork/rules/test-coverage.md - invalid pattern "src/{path" - unclosed brace
-```
-
-**Conflicting fields:**
-```
-Error: .deepwork/rules/my-rule.md - has both 'trigger' and 'set' - use one or the other
-```
-
-**Empty body:**
-```
-Error: .deepwork/rules/my-rule.md - instruction rules require markdown body
-```
-
-## Referencing Rules in Code
-
-A key benefit of the `.deepwork/rules/` folder structure is that code files can reference rules directly:
-
-```python
-# Read `.deepwork/rules/source-test-pairing.md` before editing this file
-
-class UserService:
-    """Service for user management."""
-    pass
-```
-
-```typescript
-// This file is governed by `.deepwork/rules/api-documentation.md`
-// Any changes here require corresponding documentation updates
-
-export async function createUser(data: UserInput): Promise<User> {
-    // ...
-}
-```
-
-This helps AI agents and human developers understand which rules apply to specific files.
diff --git a/doc/rules_system_design.md b/doc/rules_system_design.md
deleted file mode 100644
index 8fbf42b5..00000000
--- a/doc/rules_system_design.md
+++ /dev/null
@@ -1,569 +0,0 @@
-# Rules System Design
-
-## Overview
-
-The deepwork rules system enables automated enforcement of development standards during AI-assisted coding sessions. This document describes the architecture for the next-generation rules system with support for:
-
-1. **File correspondence matching** (sets and pairs)
-2. **Idempotent command execution**
-3. **Stateful evaluation with queue-based processing**
-4. **Efficient agent output management**
-
-## Core Concepts
-
-### Rule Structure
-
-Every rule has two orthogonal aspects:
-
-**Detection Mode** - How the rule decides when to fire:
-
-| Mode | Field | Description |
-|------|-------|-------------|
-| **Trigger/Safety** | `trigger`, `safety` | Fire when trigger matches and safety doesn't |
-| **Set** | `set` | Fire when file correspondence is incomplete (bidirectional) |
-| **Pair** | `pair` | Fire when file correspondence is incomplete (directional) |
-| **Created** | `created` | Fire when newly created files match patterns |
-
-**Action Type** - What happens when the rule fires:
-
-| Type | Field | Description |
-|------|-------|-------------|
-| **Prompt** (default) | (markdown body) | Show instructions to the agent |
-| **Command** | `action.command` | Run an idempotent command |
-
-### Detection Modes
-
-**Trigger/Safety Mode**
-- Simplest mode: fire when files match `trigger` and none match `safety`
-- Good for general checks like "source changed, verify README"
-
-**Set Mode (Bidirectional Correspondence)**
-- Define N patterns that share a common variable path
-- If ANY file matching one pattern changes, ALL corresponding files should change
-- Example: Source files and their tests
-
-**Pair Mode (Directional Correspondence)**
-- Define a trigger pattern and one or more expected patterns
-- Changes to trigger files require corresponding expected files to also change
-- Changes to expected files alone do not trigger the rule
-- Example: API code requires documentation updates
-
-**Created Mode (File Creation Detection)**
-- Define patterns for newly created files
-- Only fires when files are created, not when existing files are modified
-- Useful for enforcing standards on new code (documentation, tests, etc.)
-- Example: New modules require documentation and tests
-
-### Pattern Variables
-
-Patterns use `{name}` syntax for capturing variable path segments:
-
-```
-src/{path}.py          # {path} captures everything between src/ and .py
-tests/{path}_test.py   # {path} must match the same value
-```
-
-Special variable names:
-- `{path}` - Matches any path segments (equivalent to `**/*`)
-- `{name}` - Matches a single path segment (equivalent to `*`)
-- `{**}` - Explicit multi-segment wildcard
-- `{*}` - Explicit single-segment wildcard
-
-### Action Types
-
-**Prompt Action (default)**
-The markdown body of the rule file serves as instructions shown to the agent.
-
-**Command Action**
-```yaml
-action:
-  command: "ruff format {file}"
-  run_for: each_match
-```
-
-Command actions should be idempotent—running them multiple times produces the same result. Lint formatters like `black`, `ruff format`, and `prettier` are good examples.
-
-## Architecture
-
-### Component Overview
-
-```
-┌─────────────────────────────────────────────────────────────────┐
-│                        Rules System                              │
-├─────────────────────────────────────────────────────────────────┤
-│                                                                  │
-│  ┌──────────────┐    ┌──────────────┐    ┌──────────────┐      │
-│  │   Detector   │───▶│    Queue     │◀───│  Evaluator   │      │
-│  │              │    │              │    │              │      │
-│  │ - Watch files│    │ .deepwork/   │    │ - Process    │      │
-│  │ - Match rules│    │ tmp/rules/   │    │   queued     │      │
-│  │ - Create     │    │ queue/       │    │ - Run action │      │
-│  │   entries    │    │              │    │ - Update     │      │
-│  └──────────────┘    └──────────────┘    │   status     │      │
-│                                          └──────────────┘      │
-│                                                                  │
-│  ┌──────────────┐    ┌──────────────┐                          │
-│  │   Matcher    │    │   Resolver   │                          │
-│  │              │    │              │                          │
-│  │ - Pattern    │    │ - Variable   │                          │
-│  │   matching   │    │   extraction │                          │
-│  │ - Glob       │    │ - Path       │                          │
-│  │   expansion  │    │   generation │                          │
-│  └──────────────┘    └──────────────┘                          │
-│                                                                  │
-└─────────────────────────────────────────────────────────────────┘
-```
-
-### Detector
-
-The detector identifies when rules should be evaluated:
-
-1. **Trigger Detection**: Monitors for file changes that match rule triggers
-2. **Deduplication**: Computes a hash to avoid re-processing identical triggers
-3. **Queue Entry Creation**: Creates entries for the evaluator to process
-
-**Trigger Hash Computation**:
-```python
-hash_input = f"{rule_name}:{sorted(trigger_files)}:{baseline_ref}"
-trigger_hash = sha256(hash_input.encode()).hexdigest()[:12]
-```
-
-The baseline_ref varies by `compare_to` mode:
-- `base`: merge-base commit hash
-- `default_tip`: remote tip commit hash
-- `prompt`: timestamp of last prompt submission
-
-### Queue
-
-The queue persists rule trigger state in `.deepwork/tmp/rules/queue/`:
-
-```
-.deepwork/tmp/rules/queue/
-├── {hash}.queued.json      # Detected, awaiting evaluation
-├── {hash}.passed.json      # Evaluated, rule satisfied
-├── {hash}.failed.json      # Evaluated, rule not satisfied
-└── {hash}.skipped.json     # Safety pattern matched, skipped
-```
-
-**Queue Entry Schema**:
-```json
-{
-  "rule_name": "string",
-  "trigger_hash": "string",
-  "status": "queued|passed|failed|skipped",
-  "created_at": "ISO8601 timestamp",
-  "evaluated_at": "ISO8601 timestamp or null",
-  "baseline_ref": "string",
-  "trigger_files": ["array", "of", "files"],
-  "expected_files": ["array", "of", "files"],
-  "matched_files": ["array", "of", "files"],
-  "action_result": {
-    "type": "prompt|command",
-    "output": "string or null",
-    "exit_code": "number or null"
-  }
-}
-```
-
-**Queue Cleanup**:
-Since `.deepwork/tmp/` is gitignored, queue entries are transient local state. No aggressive cleanup is required—entries can accumulate without causing issues. The directory can be safely deleted at any time to reset state.
-
-### Evaluator
-
-The evaluator processes queued entries:
-
-1. **Load Entry**: Read queued entry from disk
-2. **Verify Still Relevant**: Re-check that trigger conditions still apply
-3. **Execute Action**:
-   - For prompts: Format message and return to hook system
-   - For commands: Execute command, verify idempotency
-4. **Update Status**: Mark as passed, failed, or skipped
-5. **Report Results**: Return appropriate response to caller
-
-### Matcher
-
-Pattern matching with variable extraction:
-
-**Algorithm**:
-```python
-def match_pattern(pattern: str, filepath: str) -> dict[str, str] | None:
-    """
-    Match filepath against pattern, extracting variables.
-
-    Returns dict of {variable_name: captured_value} or None if no match.
-    """
-    # Convert pattern to regex with named groups
-    # {path} -> (?P<path>.+)
-    # {name} -> (?P<name>[^/]+)
-    # Literal parts are escaped
-    regex = pattern_to_regex(pattern)
-    match = re.fullmatch(regex, filepath)
-    if match:
-        return match.groupdict()
-    return None
-```
-
-**Pattern Compilation**:
-```python
-def pattern_to_regex(pattern: str) -> str:
-    """Convert pattern with {var} placeholders to regex."""
-    result = []
-    for segment in parse_pattern(pattern):
-        if segment.is_variable:
-            if segment.name in ('path', '**'):
-                result.append(f'(?P<{segment.name}>.+)')
-            else:
-                result.append(f'(?P<{segment.name}>[^/]+)')
-        else:
-            result.append(re.escape(segment.value))
-    return ''.join(result)
-```
-
-### Resolver
-
-Generates expected filepaths from patterns and captured variables:
-
-```python
-def resolve_pattern(pattern: str, variables: dict[str, str]) -> str:
-    """
-    Substitute variables into pattern to generate filepath.
-
-    Example:
-        resolve_pattern("tests/{path}_test.py", {"path": "foo/bar"})
-        -> "tests/foo/bar_test.py"
-    """
-    result = pattern
-    for name, value in variables.items():
-        result = result.replace(f'{{{name}}}', value)
-    return result
-```
-
-## Evaluation Flow
-
-### Standard Instruction Rule
-
-```
-1. Detector: File changes detected
-2. Detector: Check each rule's trigger patterns
-3. Detector: For matching rule, compute trigger hash
-4. Detector: If hash not in queue, create .queued entry
-5. Evaluator: Process queued entry
-6. Evaluator: Check safety patterns against changed files
-7. Evaluator: If safety matches, mark .skipped
-8. Evaluator: If no safety match, return instructions to agent
-9. Agent: Addresses rule, includes <promise> tag
-10. Evaluator: On next check, mark .passed (promise found)
-```
-
-### Correspondence Rule (Set)
-
-```
-1. Detector: File src/foo/bar.py changed
-2. Matcher: Matches pattern "src/{path}.py" with {path}="foo/bar"
-3. Resolver: Generate expected files from other patterns:
-   - "tests/{path}_test.py" -> "tests/foo/bar_test.py"
-4. Detector: Check if tests/foo/bar_test.py also changed
-5. Detector: If yes, mark .skipped (correspondence satisfied)
-6. Detector: If no, create .queued entry
-7. Evaluator: Return instructions prompting for test update
-```
-
-### Correspondence Rule (Pair)
-
-```
-1. Detector: File api/users.py changed (trigger pattern)
-2. Matcher: Matches "api/{path}.py" with {path}="users"
-3. Resolver: Generate expected: "docs/api/users.md"
-4. Detector: Check if docs/api/users.md also changed
-5. Detector: If yes, mark .skipped
-6. Detector: If no, create .queued entry
-7. Evaluator: Return instructions
-
-Note: If only docs/api/users.md changed (not api/users.py),
-the pair rule does NOT trigger (directional).
-```
-
-### Command Rule
-
-```
-1. Detector: Python file changed, matches "**/*.py"
-2. Detector: Create .queued entry for format rule
-3. Evaluator: Execute "ruff format {file}"
-4. Evaluator: Run git diff to check for changes
-5. Evaluator: If changes made, re-run command (idempotency check)
-6. Evaluator: If no additional changes, mark .passed
-7. Evaluator: If changes keep occurring, mark .failed, alert user
-```
-
-### Created Rule
-
-```
-1. Detector: New file created, matches "src/**/*.py" created pattern
-2. Detector: Verify file is newly created (not just modified)
-3. Detector: Create .queued entry for new file rule
-4. Evaluator: Return instructions for new file standards
-5. Agent: Addresses rule, includes <promise> tag
-6. Evaluator: On next check, mark .passed (promise found)
-```
-
-Note: Created mode uses separate file detection to distinguish newly
-created files from modified files. Untracked files and files added
-since the baseline are considered "created".
-
-## Agent Output Management
-
-### Problem
-
-When many rules trigger, the agent receives excessive output, degrading performance.
-
-### Solution
-
-**1. Output Batching**
-Group related rules into concise sections:
-
-```
-The following rules require attention:
-
-## Source/Test Pairing
-src/auth/login.py → tests/auth/login_test.py
-src/api/users.py → tests/api/users_test.py
-
-## API Documentation
-api/users.py → docs/api/users.md
-
-## README Accuracy
-Source files changed. Verify README.md is accurate.
-```
-
-**2. Grouped by Rule Name**
-Multiple violations of the same rule are grouped together under a single heading, keeping output compact.
-
-**3. Minimal Decoration**
-Avoid excessive formatting, numbering, or emphasis. Use simple arrow notation for correspondence violations.
-
-## State Persistence
-
-### Directory Structure
-
-```
-.deepwork/
-├── rules/                   # Rule definitions (frontmatter markdown)
-│   ├── readme-accuracy.md
-│   ├── source-test-pairing.md
-│   ├── api-documentation.md
-│   └── python-formatting.md
-├── tmp/                     # GITIGNORED - transient state
-│   └── rules/
-│       ├── queue/           # Queue entries
-│       │   ├── abc123.queued.json
-│       │   └── def456.passed.json
-│       ├── baselines/       # Cached baseline states
-│       │   └── prompt_1705420800.json
-│       └── cache/           # Pattern matching cache
-│           └── patterns.json
-└── rules_state.json         # Session state summary
-```
-
-**Important:** The entire `.deepwork/tmp/` directory is gitignored. All queue entries, baselines, and caches are local transient state that is not committed. This means cleanup is not critical—files can accumulate and will be naturally cleaned when the directory is deleted or the repo is re-cloned.
-
-### Rule File Format
-
-Each rule is a markdown file with YAML frontmatter:
-
-```markdown
----
-name: README Accuracy
-trigger: src/**/*.py
-safety: README.md
----
-Instructions shown to the agent when this rule fires.
-
-These can be multi-line with full markdown formatting.
-```
-
-This format enables:
-1. Code files to reference rules in comments
-2. Human-readable rule documentation
-3. Easy editing with any markdown editor
-4. Clear separation of configuration and content
-
-### Baseline Management
-
-For `compare_to: prompt`, baselines are captured at prompt submission:
-
-```json
-{
-  "timestamp": "2024-01-16T12:00:00Z",
-  "commit": "abc123",
-  "staged_files": ["file1.py", "file2.py"],
-  "untracked_files": ["file3.py"]
-}
-```
-
-Multiple baselines can exist for different prompts in a session.
-
-### Queue Lifecycle
-
-```
-                  ┌─────────┐
-                  │ Created │
-                  │ .queued │
-                  └────┬────┘
-                       │
-         ┌─────────────┼─────────────┐
-         │             │             │
-         ▼             ▼             ▼
-    ┌─────────┐   ┌─────────┐   ┌─────────┐
-    │ .passed │   │ .failed │   │.skipped │
-    └─────────┘   └─────────┘   └─────────┘
-```
-
-Terminal states persist in `.deepwork/tmp/` (gitignored) until manually cleared or the directory is deleted.
-
-## Error Handling
-
-### Pattern Errors
-
-Invalid patterns are caught at rule load time:
-
-```python
-class PatternError(RulesError):
-    """Invalid pattern syntax."""
-    pass
-
-# Validation
-def validate_pattern(pattern: str) -> None:
-    # Check for unbalanced braces
-    # Check for invalid variable names
-    # Check for unsupported syntax
-```
-
-### Command Errors
-
-Command execution errors are captured and reported:
-
-```json
-{
-  "status": "failed",
-  "action_result": {
-    "type": "command",
-    "command": "ruff format {file}",
-    "exit_code": 1,
-    "stdout": "",
-    "stderr": "error: invalid syntax in foo.py:10"
-  }
-}
-```
-
-### Queue Corruption
-
-If queue entries become corrupted:
-1. Log error with entry details
-2. Remove corrupted entry
-3. Re-detect triggers on next evaluation
-
-## Configuration
-
-### Rule Files
-
-Rules are stored in `.deepwork/rules/` as individual markdown files with YAML frontmatter. See `doc/rules_syntax.md` for complete syntax documentation.
-
-**Loading Order:**
-1. All `.md` files in `.deepwork/rules/` are loaded
-2. Files are processed in alphabetical order
-3. Filename (without extension) becomes rule identifier
-
-**Rule Discovery:**
-```python
-def load_rules(rules_dir: Path) -> list[Rule]:
-    """Load all rules from the rules directory."""
-    rules = []
-    for path in sorted(rules_dir.glob("*.md")):
-        rule = parse_rule_file(path)
-        rule.name = path.stem  # filename without .md
-        rules.append(rule)
-    return rules
-```
-
-### System Configuration
-
-In `.deepwork/config.yml`:
-
-```yaml
-rules:
-  enabled: true
-  rules_dir: .deepwork/rules  # Can be customized
-```
-
-## Performance Considerations
-
-### Caching
-
-- Pattern compilation is cached per-session
-- Baseline diffs are cached by commit hash
-- Queue lookups use hash-based O(1) access
-
-### Lazy Evaluation
-
-- Patterns only compiled when needed
-- File lists only computed for triggered rules
-- Instructions only loaded when rule fires
-
-### Parallel Processing
-
-- Multiple queue entries can be processed in parallel
-- Command actions can run concurrently (with file locking)
-- Pattern matching is parallelized across rules
-
-## Migration from Legacy System
-
-The legacy system used a single `.deepwork.rules.yml` file with array of rules. The new system uses individual markdown files in `.deepwork/rules/`.
-
-**Breaking Changes:**
-- Single YAML file replaced with folder of markdown files
-- Rule `name` field replaced with filename
-- `instructions` / `instructions_file` replaced with markdown body
-- New features: sets, pairs, commands, queue-based state
-
-**No backwards compatibility is provided.** Existing `.deepwork.rules.yml` files must be converted manually.
-
-**Conversion Example:**
-
-Old format (`.deepwork.rules.yml`):
-```yaml
-- name: "README Accuracy"
-  trigger: "src/**/*"
-  safety: "README.md"
-  instructions: |
-    Please verify README.md is accurate.
-```
-
-New format (`.deepwork/rules/readme-accuracy.md`):
-```markdown
----
-trigger: src/**/*
-safety: README.md
----
-Please verify README.md is accurate.
-```
-
-## Security Considerations
-
-### Command Execution
-
-- Commands run in sandboxed subprocess
-- No shell expansion (arguments passed as array)
-- Working directory is always repo root
-- Environment variables are filtered
-
-### Queue File Permissions
-
-- Queue directory: 700 (owner only)
-- Queue files: 600 (owner only)
-- No sensitive data in queue entries
-
-### Input Validation
-
-- All rule files validated against schema
-- Pattern variables sanitized before use
-- File paths normalized and validated
diff --git a/flake.lock b/flake.lock
index ce228ff8..8a9dd522 100644
--- a/flake.lock
+++ b/flake.lock
@@ -1,5 +1,24 @@
 {
   "nodes": {
+    "claude-code-nix": {
+      "inputs": {
+        "flake-utils": "flake-utils",
+        "nixpkgs": "nixpkgs"
+      },
+      "locked": {
+        "lastModified": 1770362224,
+        "narHash": "sha256-glZjGWSy+LpalbwlsQ3iWNpWU4TlEOandYWOpl8sMt8=",
+        "owner": "sadjow",
+        "repo": "claude-code-nix",
+        "rev": "f4f8d6e7cc59e34e5a85550f017ead83ab925b22",
+        "type": "github"
+      },
+      "original": {
+        "owner": "sadjow",
+        "repo": "claude-code-nix",
+        "type": "github"
+      }
+    },
     "flake-utils": {
       "inputs": {
         "systems": "systems"
@@ -20,11 +39,27 @@
     },
     "nixpkgs": {
       "locked": {
-        "lastModified": 1769018530,
-        "narHash": "sha256-MJ27Cy2NtBEV5tsK+YraYr2g851f3Fl1LpNHDzDX15c=",
+        "lastModified": 1770169770,
+        "narHash": "sha256-awR8qIwJxJJiOmcEGgP2KUqYmHG4v/z8XpL9z8FnT1A=",
+        "owner": "NixOS",
+        "repo": "nixpkgs",
+        "rev": "aa290c9891fa4ebe88f8889e59633d20cc06a5f2",
+        "type": "github"
+      },
+      "original": {
+        "owner": "NixOS",
+        "ref": "nixpkgs-unstable",
+        "repo": "nixpkgs",
+        "type": "github"
+      }
+    },
+    "nixpkgs_2": {
+      "locked": {
+        "lastModified": 1770197578,
+        "narHash": "sha256-AYqlWrX09+HvGs8zM6ebZ1pwUqjkfpnv8mewYwAo+iM=",
         "owner": "NixOS",
         "repo": "nixpkgs",
-        "rev": "88d3861acdd3d2f0e361767018218e51810df8a1",
+        "rev": "00c21e4c93d963c50d4c0c89bfa84ed6e0694df2",
         "type": "github"
       },
       "original": {
@@ -34,10 +69,59 @@
         "type": "github"
       }
     },
+    "pyproject-build-systems": {
+      "inputs": {
+        "nixpkgs": [
+          "nixpkgs"
+        ],
+        "pyproject-nix": [
+          "pyproject-nix"
+        ],
+        "uv2nix": [
+          "uv2nix"
+        ]
+      },
+      "locked": {
+        "lastModified": 1763662255,
+        "narHash": "sha256-4bocaOyLa3AfiS8KrWjZQYu+IAta05u3gYZzZ6zXbT0=",
+        "owner": "pyproject-nix",
+        "repo": "build-system-pkgs",
+        "rev": "042904167604c681a090c07eb6967b4dd4dae88c",
+        "type": "github"
+      },
+      "original": {
+        "owner": "pyproject-nix",
+        "repo": "build-system-pkgs",
+        "type": "github"
+      }
+    },
+    "pyproject-nix": {
+      "inputs": {
+        "nixpkgs": [
+          "nixpkgs"
+        ]
+      },
+      "locked": {
+        "lastModified": 1769936401,
+        "narHash": "sha256-kwCOegKLZJM9v/e/7cqwg1p/YjjTAukKPqmxKnAZRgA=",
+        "owner": "pyproject-nix",
+        "repo": "pyproject.nix",
+        "rev": "b0d513eeeebed6d45b4f2e874f9afba2021f7812",
+        "type": "github"
+      },
+      "original": {
+        "owner": "pyproject-nix",
+        "repo": "pyproject.nix",
+        "type": "github"
+      }
+    },
     "root": {
       "inputs": {
-        "flake-utils": "flake-utils",
-        "nixpkgs": "nixpkgs"
+        "claude-code-nix": "claude-code-nix",
+        "nixpkgs": "nixpkgs_2",
+        "pyproject-build-systems": "pyproject-build-systems",
+        "pyproject-nix": "pyproject-nix",
+        "uv2nix": "uv2nix"
       }
     },
     "systems": {
@@ -54,6 +138,29 @@
         "repo": "default",
         "type": "github"
       }
+    },
+    "uv2nix": {
+      "inputs": {
+        "nixpkgs": [
+          "nixpkgs"
+        ],
+        "pyproject-nix": [
+          "pyproject-nix"
+        ]
+      },
+      "locked": {
+        "lastModified": 1770331927,
+        "narHash": "sha256-jlOvO++uvne/lTgWqdI4VhTV5OpVWi70ZDVBlT6vGSs=",
+        "owner": "pyproject-nix",
+        "repo": "uv2nix",
+        "rev": "5b43a934e15b23bfba6c408cba1c570eccf80080",
+        "type": "github"
+      },
+      "original": {
+        "owner": "pyproject-nix",
+        "repo": "uv2nix",
+        "type": "github"
+      }
     }
   },
   "root": "root",
diff --git a/flake.nix b/flake.nix
index d2218afb..a7a2150f 100644
--- a/flake.nix
+++ b/flake.nix
@@ -3,117 +3,138 @@
 
   inputs = {
     nixpkgs.url = "github:NixOS/nixpkgs/nixos-unstable";
-    flake-utils.url = "github:numtide/flake-utils";
+
+    # Claude Code with pre-built native binaries (hourly updates)
+    claude-code-nix.url = "github:sadjow/claude-code-nix";
+
+    pyproject-nix = {
+      url = "github:pyproject-nix/pyproject.nix";
+      inputs.nixpkgs.follows = "nixpkgs";
+    };
+
+    uv2nix = {
+      url = "github:pyproject-nix/uv2nix";
+      inputs.pyproject-nix.follows = "pyproject-nix";
+      inputs.nixpkgs.follows = "nixpkgs";
+    };
+
+    pyproject-build-systems = {
+      url = "github:pyproject-nix/build-system-pkgs";
+      inputs.pyproject-nix.follows = "pyproject-nix";
+      inputs.uv2nix.follows = "uv2nix";
+      inputs.nixpkgs.follows = "nixpkgs";
+    };
   };
 
-  outputs = { self, nixpkgs, flake-utils }:
-    flake-utils.lib.eachDefaultSystem (system:
-      let
-        pkgs = import nixpkgs {
-          inherit system;
-          # Allow unfree packages to support the Business Source License 1.1
-          config.allowUnfree = true;
-        };
-        # Local claude-code package for version control (update via nix/claude-code/update.sh)
-        claude-code = pkgs.callPackage ./nix/claude-code/package.nix { };
-        # Read version from pyproject.toml to avoid duplication
-        pyproject = builtins.fromTOML (builtins.readFile ./pyproject.toml);
-        deepwork = pkgs.python311Packages.buildPythonPackage {
-          pname = "deepwork";
-          version = pyproject.project.version;
-          src = ./.;
-          format = "pyproject";
-          nativeBuildInputs = [ pkgs.python311Packages.hatchling ];
-          # Required for `nix build` - must match pyproject.toml dependencies
-          propagatedBuildInputs = with pkgs.python311Packages; [
-            click gitpython jinja2 jsonschema pyyaml rich rpds-py
-          ];
-          doCheck = false;
-        };
-      in
-      {
-        devShells.default = pkgs.mkShell {
-          buildInputs = with pkgs; [
-            # Python 3.11 - base interpreter for uv
-            python311
-
-            # uv manages all Python packages (deps, dev tools, etc.)
-            uv
-
-            # Git for version control
-            git
-
-            # System tools
-            jq  # For JSON processing
-
-            # CLI tools (claude-code is locally built, see nix/claude-code/)
-            claude-code
-            gh           # GitHub CLI
-          ];
-
-          # Environment variables for uv integration with Nix
-          env = {
-            # Tell uv to use the Nix-provided Python interpreter
-            UV_PYTHON = "${pkgs.python311}/bin/python";
-            # Prevent uv from downloading Python binaries
-            UV_PYTHON_DOWNLOADS = "never";
-            # Development mode flag
-            DEEPWORK_DEV = "1";
+  outputs = { self, nixpkgs, claude-code-nix, pyproject-nix, uv2nix, pyproject-build-systems, ... }:
+    let
+      inherit (nixpkgs) lib;
+
+      # Systems to support
+      forAllSystems = lib.genAttrs [ "x86_64-linux" "aarch64-linux" "x86_64-darwin" "aarch64-darwin" ];
+
+      # Load the uv workspace from uv.lock
+      workspace = uv2nix.lib.workspace.loadWorkspace { workspaceRoot = ./.; };
+
+      # Create overlay from uv.lock - prefer wheels for faster builds
+      overlay = workspace.mkPyprojectOverlay { sourcePreference = "wheel"; };
+
+      # Editable overlay for development (live-reload from src/)
+      editableOverlay = workspace.mkEditablePyprojectOverlay { root = "$REPO_ROOT"; };
+
+      # Build Python package sets for each system
+      pythonSets = forAllSystems (system:
+        let
+          pkgs = import nixpkgs {
+            inherit system;
+            config.allowUnfree = true;
           };
+          python = pkgs.python311;
+        in
+        (pkgs.callPackage pyproject-nix.build.packages { inherit python; }).overrideScope
+          (lib.composeManyExtensions [
+            pyproject-build-systems.overlays.default
+            overlay
+          ])
+      );
 
-          shellHook = ''
-            # Create venv if it doesn't exist
-            if [ ! -d .venv ]; then
-              echo "Creating virtual environment..."
-              uv venv .venv --quiet
-            fi
-
-            # Sync dependencies (including dev extras like pytest, ruff, mypy)
-            # Run quietly - uv only outputs when changes are needed
-            uv sync --all-extras --quiet 2>/dev/null || uv sync --all-extras
-
-            # Activate venv by setting environment variables directly
-            # This works reliably for both interactive shells and `nix develop --command`
-            export VIRTUAL_ENV="$PWD/.venv"
-            export PATH="$VIRTUAL_ENV/bin:$PATH"
-            unset PYTHONHOME
-
-            # Set PYTHONPATH for editable install access to src/
-            export PYTHONPATH="$PWD/src:$PYTHONPATH"
-
-            # Add nix/ scripts to PATH (for 'update' command)
-            export PATH="$PWD/nix:$PATH"
-
-            # Only show welcome message in interactive shells
-            if [[ $- == *i* ]]; then
-              echo ""
-              echo "DeepWork Development Environment"
-              echo "================================"
-              echo ""
-              echo "Python: $(python --version) | uv: $(uv --version)"
-              echo ""
-              echo "Commands:"
-              echo "  deepwork --help    CLI (development version)"
-              echo "  pytest             Run tests"
-              echo "  ruff check src/    Lint code"
-              echo "  mypy src/          Type check"
-              echo "  claude-code        Claude Code CLI"
-              echo "  gh                 GitHub CLI"
-              echo "  update             Update claude-code and flake inputs"
-              echo ""
-            fi
-          '';
-        };
+    in
+    {
+      devShells = forAllSystems (system:
+        let
+          pkgs = import nixpkgs {
+            inherit system;
+            config.allowUnfree = true;
+          };
+
+          # Python set with editable overlay for development
+          pythonSet = pythonSets.${system}.overrideScope editableOverlay;
+
+          # Virtual environment with all dependencies (including dev extras)
+          virtualenv = pythonSet.mkVirtualEnv "deepwork-dev-env" workspace.deps.all;
+        in
+        {
+          default = pkgs.mkShell {
+            packages = [
+              virtualenv
+              pkgs.uv
+              pkgs.git
+              pkgs.jq
+              claude-code-nix.packages.${system}.default
+              pkgs.gh
+            ];
+
+            env = {
+              # Prevent uv from managing packages (Nix handles it)
+              UV_NO_SYNC = "1";
+              UV_PYTHON = "${pythonSet.python}/bin/python";
+              UV_PYTHON_DOWNLOADS = "never";
+              DEEPWORK_DEV = "1";
+            };
+
+            shellHook = ''
+              # Required for editable overlay
+              unset PYTHONPATH
+              export REPO_ROOT=$(git rev-parse --show-toplevel)
+
+              # Only show welcome message in interactive shells
+              if [[ $- == *i* ]]; then
+                echo ""
+                echo "DeepWork Development Environment (uv2nix)"
+                echo "=========================================="
+                echo ""
+                echo "Python: $(python --version) | uv: $(uv --version)"
+                echo ""
+                echo "Commands:"
+                echo "  deepwork --help    CLI (development version)"
+                echo "  pytest             Run tests"
+                echo "  ruff check src/    Lint code"
+                echo "  mypy src/          Type check"
+                echo "  claude             Claude Code CLI"
+                echo "  gh                 GitHub CLI"
+                echo ""
+              fi
+            '';
+          };
+        }
+      );
 
-        # Make the package available as a flake output
-        packages.default = deepwork;
-        packages.deepwork = deepwork;
+      # Package output - virtual environment with default deps only
+      packages = forAllSystems (system:
+        let
+          pkg = pythonSets.${system}.mkVirtualEnv "deepwork-env" workspace.deps.default;
+        in {
+          default = pkg;
+          deepwork = pkg;  # Alias for backwards compatibility
+        }
+      );
 
-        # Make deepwork runnable with 'nix run'
-        apps.default = {
+      # Make deepwork runnable with 'nix run'
+      apps = forAllSystems (system: {
+        default = {
           type = "app";
-          program = "${deepwork}/bin/deepwork";
+          program = "${self.packages.${system}.default}/bin/deepwork";
         };
-      }
-    );
+      });
+    };
 }
diff --git a/library/jobs/commit/block_bash_with_instructions.sh b/library/jobs/commit/block_bash_with_instructions.sh
deleted file mode 100755
index 7bd16f88..00000000
--- a/library/jobs/commit/block_bash_with_instructions.sh
+++ /dev/null
@@ -1,74 +0,0 @@
-#!/bin/bash
-# block_bash_with_instructions.sh - Blocks specific bash commands and provides alternative instructions
-#
-# This hook intercepts Bash tool use calls and blocks commands that match
-# specific patterns, providing alternative instructions to the agent.
-#
-# Usage: Registered as a PreToolUse hook in .claude/settings.json
-#
-# Input (stdin): JSON from Claude Code hook system containing tool_name and tool_input
-# Output (stderr): Error message if blocked (Claude Code reads stderr for exit code 2)
-# Exit codes:
-#   0 - Success (allow action)
-#   2 - Blocking error (prevent action with message)
-
-set -e
-
-# =============================================================================
-# BLOCKED COMMANDS CONFIGURATION
-# =============================================================================
-# Format: Each entry is a regex pattern followed by a delimiter (|||) and instructions
-# The regex is matched against the full bash command
-# Add new blocked commands here:
-
-BLOCKED_COMMANDS=(
-    '^[[:space:]]*git[[:space:]]+commit|||All commits must be done via the `/commit` skill. Do not use git commit directly. Instead, run `/commit` to start the commit workflow which includes code review, testing, and linting before committing.'
-)
-
-# =============================================================================
-# HOOK LOGIC - DO NOT MODIFY BELOW UNLESS NECESSARY
-# =============================================================================
-
-# Read stdin into variable
-HOOK_INPUT=""
-if [ ! -t 0 ]; then
-    HOOK_INPUT=$(cat)
-fi
-
-# Exit early if no input
-if [ -z "${HOOK_INPUT}" ]; then
-    exit 0
-fi
-
-# Extract tool_name from input
-TOOL_NAME=$(echo "${HOOK_INPUT}" | jq -r '.tool_name // empty' 2>/dev/null)
-
-# Only process Bash tool calls
-if [ "${TOOL_NAME}" != "Bash" ]; then
-    exit 0
-fi
-
-# Extract the command from tool_input
-COMMAND=$(echo "${HOOK_INPUT}" | jq -r '.tool_input.command // empty' 2>/dev/null)
-
-# Exit if no command
-if [ -z "${COMMAND}" ]; then
-    exit 0
-fi
-
-# Check each blocked pattern
-for entry in "${BLOCKED_COMMANDS[@]}"; do
-    # Split entry by delimiter
-    pattern="${entry%%|||*}"
-    instructions="${entry##*|||}"
-
-    # Check if command matches pattern (using extended regex)
-    if echo "${COMMAND}" | grep -qE "${pattern}"; then
-        # Output error message to stderr (Claude Code reads stderr for exit code 2)
-        echo "${instructions}" >&2
-        exit 2
-    fi
-done
-
-# Command is allowed
-exit 0
diff --git a/library/jobs/commit/code_review_standards.example.md b/library/jobs/commit/code_review_standards.example.md
deleted file mode 100644
index 3761c354..00000000
--- a/library/jobs/commit/code_review_standards.example.md
+++ /dev/null
@@ -1,67 +0,0 @@
-# Code Review Standards
-
-This document defines the standards used during code review in the commit workflow.
-
-## Review Categories
-
-### General Issues
-
-Check for:
-- Logic errors or potential bugs
-- Error handling gaps
-- Security concerns (injection, authentication, authorization)
-- Performance issues (inefficient algorithms, unnecessary computation)
-- Resource leaks (unclosed files, connections)
-
-### DRY (Don't Repeat Yourself)
-
-Look for:
-- Duplicated code that should be extracted into functions
-- Repeated patterns that could be abstracted
-- Copy-pasted logic with minor variations
-- Similar code blocks that differ only in variable names
-
-### Naming Clarity
-
-Ensure:
-- Variables, functions, and classes have clear, descriptive names
-- Names reflect purpose and intent
-- Abbreviations are avoided unless universally understood
-- Naming conventions are consistent throughout the codebase
-
-### Test Coverage
-
-Verify:
-- New functions or classes have corresponding tests
-- New code paths are tested
-- Edge cases are covered
-- Error conditions are tested
-- If tests are missing, note what should be tested
-
-### Test Quality
-
-Ensure tests add value and are not duplicative:
-- Each test should verify a distinct behavior or scenario
-- Tests should not duplicate what other tests already cover
-- Test names should clearly describe what they're testing
-- Tests should be meaningful, not just checking trivial cases
-- Avoid testing implementation details; focus on behavior
-- If multiple tests appear redundant, suggest consolidation
-
-## Severity Levels
-
-When reporting issues, categorize by severity:
-
-- **Critical**: Must fix before commit (bugs, security issues)
-- **High**: Should fix before commit (logic errors, missing error handling)
-- **Medium**: Recommended to fix (DRY violations, unclear naming)
-- **Low**: Nice to have (style improvements, minor optimizations)
-
-## Review Output Format
-
-For each issue found, provide:
-1. File and line number
-2. Severity level
-3. Category (General/DRY/Naming/Tests)
-4. Description of the issue
-5. Suggested fix or improvement
diff --git a/library/jobs/commit/commit_job_git_commit.sh b/library/jobs/commit/commit_job_git_commit.sh
deleted file mode 100755
index 764b0768..00000000
--- a/library/jobs/commit/commit_job_git_commit.sh
+++ /dev/null
@@ -1,7 +0,0 @@
-#!/bin/bash
-# commit_job_git_commit.sh - Wrapper for git commit invoked via the /commit skill
-#
-# This script bypasses the PreToolUse hook that blocks direct `git commit` commands.
-# It allows the commit job to perform the actual commit after all quality checks pass.
-
-exec git commit "$@"
diff --git a/library/jobs/commit/job.yml b/library/jobs/commit/job.yml
deleted file mode 100644
index ebf4575c..00000000
--- a/library/jobs/commit/job.yml
+++ /dev/null
@@ -1,83 +0,0 @@
-name: commit
-version: "1.0.0"
-summary: "Reviews code, runs tests, lints, and commits changes. Use when ready to commit work with quality checks."
-description: |
-  A workflow for preparing and committing code changes with quality checks.
-
-  The **full** workflow starts with a code review to catch issues early, runs tests until
-  they pass, formats and lints code, then reviews changed files before
-  committing and pushing. The review and lint steps use sub-agents to reduce
-  context usage.
-
-  Steps:
-  1. review - Code review for issues, DRY opportunities, naming, and test coverage (runs in sub-agent)
-  2. test - Pull latest code and run tests until they pass
-  3. lint - Format and lint code (runs in sub-agent)
-  4. commit_and_push - Review changes and commit/push
-
-workflows:
-  - name: full
-    summary: "Full commit workflow: review, test, lint, and commit"
-    steps:
-      - review
-      - test
-      - lint
-      - commit_and_push
-
-changelog:
-  - version: "1.0.0"
-    changes: "Initial library version - generalized from project-specific commit workflow"
-
-steps:
-  - id: review
-    name: "Code Review"
-    description: "Reviews changed code for issues, DRY opportunities, naming clarity, and test coverage using a sub-agent. Use as the first step before testing."
-    instructions_file: steps/review.md
-    inputs: []
-    outputs:
-      - code_reviewed  # implicit state: code has been reviewed and issues addressed
-    dependencies: []
-    quality_criteria:
-      - "Changed files were identified"
-      - "Code was reviewed against the project's code review standards"
-      - "All identified issues were addressed or documented as intentional"
-
-  - id: test
-    name: "Run Tests"
-    description: "Pulls latest code and runs tests until all pass. Use after code review passes to verify changes work correctly."
-    instructions_file: steps/test.md
-    inputs: []
-    outputs:
-      - tests_passing  # implicit state: all tests pass
-    dependencies:
-      - review
-    quality_criteria:
-      - "Latest code was pulled from the branch"
-      - "All tests are passing"
-
-  - id: lint
-    name: "Lint Code"
-    description: "Formats and lints code using a sub-agent. Use after tests pass to ensure code style compliance."
-    instructions_file: steps/lint.md
-    inputs: []
-    outputs:
-      - code_formatted  # implicit state: code formatted and linted
-    dependencies:
-      - test
-    quality_criteria:
-      - "Code was formatted"
-      - "Lint check passed with no errors"
-
-  - id: commit_and_push
-    name: "Commit and Push"
-    description: "Verifies changed files, creates commit, and pushes to remote. Use after linting passes to finalize changes."
-    instructions_file: steps/commit_and_push.md
-    inputs: []
-    outputs:
-      - changes_committed  # implicit state: changes committed and pushed
-    dependencies:
-      - lint
-    quality_criteria:
-      - "Changed files were verified against expectations"
-      - "Commit was created with appropriate message"
-      - "Changes were pushed to remote"
diff --git a/library/jobs/commit/readme.md b/library/jobs/commit/readme.md
deleted file mode 100644
index fd067a1d..00000000
--- a/library/jobs/commit/readme.md
+++ /dev/null
@@ -1,174 +0,0 @@
-# Commit Job
-
-A structured workflow for committing code changes with built-in quality checks.
-
-## Overview
-
-This job implements a comprehensive commit workflow that ensures code quality before every commit. Instead of allowing direct `git commit` commands, this job:
-
-1. **Reviews** changed code for issues, DRY opportunities, naming clarity, and test coverage
-2. **Tests** the code to ensure all tests pass
-3. **Lints** the code to ensure consistent formatting and style
-4. **Commits** and pushes only after all checks pass
-
-## Why Hijack `git commit`?
-
-The core design principle of this job is that **every commit should pass through quality checks**. To enforce this, we intercept `git commit` commands and redirect the agent to use the `/commit` skill instead.
-
-Without this interception, an AI agent might:
-- Commit code that hasn't been reviewed
-- Push changes without running tests
-- Skip linting, leading to inconsistent code style
-- Bypass the structured workflow entirely
-
-By blocking `git commit` and requiring the commit job's script, we guarantee that:
-- Code is reviewed before testing (catching issues early)
-- Tests pass before linting (no point linting broken code)
-- Linting completes before committing (consistent style)
-- All quality gates are passed before code reaches the repository
-
-## IMPORTANT: REQUIRED CUSTOMIZATION
-
-When installing this job to a new project, you must customize the following:
-
-### 1. Replace `[test command]`
-
-In `steps/test.md`, replace `[test command]` with your project's test command (e.g., `pytest`, `npm test`, `go test ./...`).
-
-### 2. Replace `[format command]`
-
-In `steps/lint.md`, replace `[format command]` with your project's code formatting command (e.g., `ruff format .`, `npx prettier --write .`, `go fmt ./...`).
-
-### 3. Replace `[lint check command]`
-
-In `steps/lint.md`, replace `[lint check command]` with your project's lint check command (e.g., `ruff check --fix .`, `npx eslint --fix .`, `golangci-lint run`).
-
-### 4. Replace `[code review standards path]`
-
-In `steps/review.md`, replace `[code review standards path]` with the path to your project's code review standards file (e.g., `docs/code_review_standards.md`).
-
-If your project doesn't have a code review standards file yet, you can use the provided example as a starting point:
-
-```bash
-cp library/jobs/commit/code_review_standards.example.md docs/code_review_standards.md
-```
-
-Then customize `docs/code_review_standards.md` to match your project's specific requirements, coding style, and quality expectations.
-
-### 5. Replace `[commit script path]`
-
-In `steps/commit_and_push.md`, replace `[commit script path]` with the path to your commit wrapper script (e.g., `.deepwork/jobs/commit/commit_job_git_commit.sh`). See installation step 3 below for how to create this script.
-
-## Installation
-
-### 1. Copy the Job Folder
-
-Copy this entire `commit` folder to your project's `.deepwork/jobs/` directory:
-
-```bash
-cp -r library/jobs/commit .deepwork/jobs/
-```
-
-### 2. Install the Git Commit Blocker Hook
-
-The job includes a `block_bash_with_instructions.sh` script that intercepts `git commit` commands and redirects the agent to use the `/commit` skill. Copy it to your hooks directory and make it executable:
-
-```bash
-mkdir -p .claude/hooks
-cp .deepwork/jobs/commit/block_bash_with_instructions.sh .claude/hooks/
-chmod +x .claude/hooks/block_bash_with_instructions.sh
-```
-
-### 3. Make the Commit Wrapper Script Executable
-
-The job also includes a `commit_job_git_commit.sh` script that bypasses the hook interception (used by the commit job itself). Make it executable:
-
-```bash
-chmod +x .deepwork/jobs/commit/commit_job_git_commit.sh
-```
-
-### 4. Configure settings.json
-
-Add the following to your `.claude/settings.json`:
-
-```json
-{
-  "permissions": {
-    "allow": [
-      "Bash(.deepwork/jobs/commit/commit_job_git_commit.sh:*)"
-    ]
-  },
-  "hooks": {
-    "PreToolUse": [
-      {
-        "matcher": "Bash",
-        "hooks": [
-          {
-            "type": "command",
-            "command": ".claude/hooks/block_bash_with_instructions.sh"
-          }
-        ]
-      }
-    ]
-  }
-}
-```
-
-This configuration:
-- Allows the commit wrapper script to run without prompts
-- Registers the hook that blocks direct `git commit` commands
-
-### 5. Customize the Placeholders
-
-Replace all placeholders in the step files as described in the "Required Customization" section above.
-
-### 6. Sync the Skills
-
-Run `deepwork sync` to generate the slash commands for your AI coding assistant.
-
-## Workflow Steps
-
-### 1. Code Review (`/commit.review`)
-
-Uses a sub-agent to review changed files against the standards defined in your project's code review standards file. The example standards file checks for:
-- General issues (bugs, security, performance)
-- DRY opportunities (duplicated code)
-- Naming clarity (descriptive names)
-- Test coverage (missing tests)
-
-### 2. Run Tests (`/commit.test`)
-
-- Pulls latest code from the branch
-- Runs the test suite
-- Fixes any failing tests
-- Iterates until all tests pass
-
-### 3. Lint Code (`/commit.lint`)
-
-Uses a sub-agent to:
-- Format code according to project style
-- Run lint checks
-- Fix any auto-fixable issues
-
-### 4. Commit and Push (`/commit.commit_and_push`)
-
-- Reviews changed files against expectations
-- Creates commit with appropriate message
-- Pushes to remote repository
-
-## Usage
-
-Once installed and synced, simply run:
-
-```
-/commit
-```
-
-This will execute all steps in order. You can also run individual steps:
-
-```
-/commit.review
-/commit.test
-/commit.lint
-/commit.commit_and_push
-```
diff --git a/library/jobs/commit/steps/commit_and_push.md b/library/jobs/commit/steps/commit_and_push.md
deleted file mode 100644
index 565c4877..00000000
--- a/library/jobs/commit/steps/commit_and_push.md
+++ /dev/null
@@ -1,66 +0,0 @@
-# Commit and Push
-
-## Objective
-
-Review the changed files to verify they match the agent's expectations, create a commit with an appropriate message, and push to the remote repository.
-
-## Task
-
-Check the list of changed files against what was modified during this session, ensure they match expectations, then commit and push the changes.
-
-### Process
-
-1. **Get the list of changed files**
-   ```bash
-   git status
-   ```
-   Also run `git diff --stat` to see a summary of changes.
-
-2. **Verify changes match expectations**
-
-   Compare the changed files against what you modified during this session:
-   - Do the modified files match what you edited?
-   - Are there any unexpected new files?
-   - Are there any unexpected deleted files?
-   - Do the line counts seem reasonable for the changes you made?
-
-   If changes match expectations, proceed to the next step.
-
-   If there are unexpected changes:
-   - Investigate why (e.g., lint auto-fixes, generated files)
-   - If they're legitimate side effects of your work, include them
-   - If they're unrelated or shouldn't be committed, use `git restore` to discard them
-
-3. **Stage all appropriate changes**
-   ```bash
-   git add -A
-   ```
-   Or stage specific files if some were excluded.
-
-4. **Create the commit**
-
-   Generate an appropriate commit message based on the changes made.
-
-   **IMPORTANT:** Use the commit job script (not `git commit` directly):
-   ```bash
-   [commit script path] -m "commit message here"
-   ```
-
-5. **Push to remote**
-   ```bash
-   git push
-   ```
-   If the branch has no upstream, use:
-   ```bash
-   git push -u origin HEAD
-   ```
-
-## Quality Criteria
-
-- Changed files were verified against expectations
-- Commit was created with appropriate message
-- Changes were pushed to remote
-
-## Context
-
-This is the final step of the commit workflow. The agent verifies that the changed files match its own expectations from the work done during the session, then commits and pushes. This catches unexpected changes while avoiding unnecessary user interruptions.
diff --git a/library/jobs/commit/steps/lint.md b/library/jobs/commit/steps/lint.md
deleted file mode 100644
index eb5989bd..00000000
--- a/library/jobs/commit/steps/lint.md
+++ /dev/null
@@ -1,41 +0,0 @@
-# Lint Code
-
-## Objective
-
-Format and lint the codebase to ensure code quality and consistency.
-
-## Task
-
-Run the project's format and lint commands. This step should be executed using a sub-agent to conserve context in the main conversation.
-
-### Process
-
-**IMPORTANT**: Use the Task tool to spawn a sub-agent for this work. This saves context in the main conversation. Use the `haiku` model for speed.
-
-1. **Spawn a sub-agent to run linting**
-
-   Use the Task tool with these parameters:
-   - `subagent_type`: "Bash"
-   - `model`: "haiku"
-   - `prompt`: Instruct the sub-agent to:
-     - Run the format command: `[format command]`
-     - Run the lint check command: `[lint check command]`
-     - Run lint check again to verify no remaining issues
-     - Report the results of each command
-
-2. **Review sub-agent results**
-   - Check that both format and check completed successfully
-   - Note any remaining lint issues that couldn't be auto-fixed
-
-3. **Handle remaining issues**
-   - If there are lint errors that couldn't be auto-fixed, fix them manually
-   - Re-run lint check to verify
-
-## Quality Criteria
-
-- Code was formatted
-- Lint check passed with no errors
-
-## Context
-
-This step ensures code quality and consistency before committing. It runs after tests pass and before the commit step. Using a sub-agent keeps the main conversation context clean for the commit review.
diff --git a/library/jobs/commit/steps/review.md b/library/jobs/commit/steps/review.md
deleted file mode 100644
index 53e0f377..00000000
--- a/library/jobs/commit/steps/review.md
+++ /dev/null
@@ -1,55 +0,0 @@
-# Code Review
-
-## Objective
-
-Review changed code for quality issues before running tests. This catches problems early and ensures code meets quality standards.
-
-## Task
-
-Use a sub-agent to review the staged/changed code and identify issues that should be fixed before committing.
-
-### Process
-
-**IMPORTANT**: Use the Task tool to spawn a sub-agent for this review. This saves context in the main conversation.
-
-1. **Get the list of changed files**
-   ```bash
-   git diff --name-only HEAD
-   git diff --name-only --staged
-   ```
-   Combine these to get all files that have been modified.
-
-2. **Spawn a sub-agent to review the code**
-
-   Use the Task tool with these parameters:
-   - `subagent_type`: "general-purpose"
-   - `prompt`: Instruct the sub-agent to:
-     - Read the code review standards from `[code review standards path]`
-     - Read each of the changed files
-     - Review each file against the standards
-     - Report issues found with file, line number, severity, and suggested fix
-
-3. **Review sub-agent findings**
-   - Examine each issue identified
-   - Prioritize issues by severity
-
-4. **Fix identified issues**
-   - Address each issue found by the review
-   - For DRY violations: extract shared code into functions/modules
-   - For naming issues: rename to be clearer
-   - For missing tests: add appropriate test cases
-   - For bugs: fix the underlying issue
-
-5. **Re-run review if significant changes made**
-   - If you made substantial changes, consider running another review pass
-   - Ensure fixes didn't introduce new issues
-
-## Quality Criteria
-
-- Changed files were identified
-- Code was reviewed against the project's code review standards
-- All identified issues were addressed or documented as intentional
-
-## Context
-
-This is the first step of the commit workflow. Code review happens before tests to catch quality issues early. The sub-agent approach keeps the main conversation context clean while providing thorough review coverage.
diff --git a/library/jobs/commit/steps/test.md b/library/jobs/commit/steps/test.md
deleted file mode 100644
index acc4e926..00000000
--- a/library/jobs/commit/steps/test.md
+++ /dev/null
@@ -1,44 +0,0 @@
-# Run Tests
-
-## Objective
-
-Run the project's test suite and fix any failing tests until all tests pass.
-
-## Task
-
-Execute the test suite for the project and iteratively fix any failures until all tests pass.
-
-### Process
-
-1. **Pull latest code from the branch**
-   - Run `git pull` to fetch and merge any changes from the remote
-   - If there are merge conflicts, resolve them before proceeding
-   - This ensures you're testing against the latest code
-
-2. **Run the test command**
-   ```bash
-   [test command]
-   ```
-   Capture the output.
-
-3. **Analyze failures**
-   - If tests pass, proceed to output
-   - If tests fail, analyze the failure messages
-   - Identify the root cause of each failure
-
-4. **Fix failing tests**
-   - Make the necessary code changes to fix failures
-   - This may involve fixing bugs in implementation code or updating tests
-   - Re-run tests after each fix
-
-5. **Iterate until passing**
-   - Continue the fix/test cycle until all tests pass
-
-## Quality Criteria
-
-- Latest code was pulled from the branch
-- All tests are passing
-
-## Context
-
-This step runs after code review. Tests must pass before proceeding to lint and commit. This ensures code quality and prevents broken code from being committed. If tests fail due to issues introduced by the code review fixes, iterate on the fixes until tests pass.
diff --git a/library/jobs/spec_driven_development/job.yml b/library/jobs/spec_driven_development/job.yml
index 0fd25616..6d575879 100644
--- a/library/jobs/spec_driven_development/job.yml
+++ b/library/jobs/spec_driven_development/job.yml
@@ -1,3 +1,4 @@
+# yaml-language-server: $schema=.deepwork/schemas/job.schema.json
 name: spec_driven_development
 version: "1.0.0"
 summary: "Spec-driven development workflow that turns specifications into working implementations through structured planning."
@@ -40,7 +41,10 @@ steps:
       - name: development_priorities
         description: "Key priorities like code quality, testing, UX consistency, performance"
     outputs:
-      - file: "[docs_folder]/constitution.md"
+      constitution.md:
+        type: file
+        description: "Foundational governance principles and development guidelines"
+        required: true
     dependencies: []
     quality_criteria:
       - "**Priorities Captured**: Did the agent gather specific development priorities from the user?"
@@ -59,10 +63,13 @@ steps:
         description: "Name of the feature being specified (lowercase, hyphens for spaces)"
       - name: feature_description
         description: "High-level description of what the feature should do"
-      - file: "[docs_folder]/constitution.md"
+      - file: constitution.md
         from_step: constitution
     outputs:
-      - file: specs/[feature-name]/spec.md
+      spec.md:
+        type: file
+        description: "Functional requirements as user stories without technology choices"
+        required: true
     dependencies:
       - constitution
     quality_criteria:
@@ -80,10 +87,13 @@ steps:
     description: "Resolves ambiguities and gaps in the specification through structured questioning. Use after specification to ensure completeness."
     instructions_file: steps/clarify.md
     inputs:
-      - file: specs/[feature-name]/spec.md
+      - file: spec.md
         from_step: specify
     outputs:
-      - file: specs/[feature-name]/spec.md
+      spec.md:
+        type: file
+        description: "Updated specification with clarifications and resolved ambiguities"
+        required: true
     dependencies:
       - specify
     quality_criteria:
@@ -100,19 +110,33 @@ steps:
     description: "Creates technical implementation strategy including architecture and technology choices. Use after specification is clarified."
     instructions_file: steps/plan.md
     inputs:
-      - file: specs/[feature-name]/spec.md
+      - file: spec.md
         from_step: clarify
-      - file: "[docs_folder]/constitution.md"
+      - file: constitution.md
         from_step: constitution
-      - file: "[docs_folder]/architecture.md"
+      - file: architecture.md
         description: "Existing project architecture document (if present)"
     outputs:
-      - file: specs/[feature-name]/plan.md
-      - file: specs/[feature-name]/data-model.md
-      - file: specs/[feature-name]/api-spec.json
-      - file: specs/[feature-name]/research.md
-      - file: "[docs_folder]/architecture.md"
+      plan.md:
+        type: file
+        description: "Technical implementation strategy including architecture and technology choices"
+        required: true
+      data-model.md:
+        type: file
+        description: "Data model documentation with all entities and relationships"
+        required: true
+      api-spec.json:
+        type: file
+        description: "API endpoint definitions with request/response schemas"
+        required: true
+      research.md:
+        type: file
+        description: "Research findings and technology evaluations"
+        required: true
+      architecture.md:
+        type: file
         description: "Updated project architecture document"
+        required: true
     dependencies:
       - clarify
     quality_criteria:
@@ -132,14 +156,17 @@ steps:
     description: "Converts the implementation plan into actionable, ordered development tasks. Use after plan is validated."
     instructions_file: steps/tasks.md
     inputs:
-      - file: specs/[feature-name]/plan.md
+      - file: plan.md
         from_step: plan
-      - file: specs/[feature-name]/spec.md
+      - file: spec.md
         from_step: clarify
-      - file: "[docs_folder]/architecture.md"
+      - file: architecture.md
         from_step: plan
     outputs:
-      - file: specs/[feature-name]/tasks.md
+      tasks.md:
+        type: file
+        description: "Actionable, ordered development tasks organized by user story"
+        required: true
     dependencies:
       - plan
     quality_criteria:
@@ -158,19 +185,23 @@ steps:
     description: "Generates code and assets by executing the task breakdown. Use when ready to build the feature."
     instructions_file: steps/implement.md
     inputs:
-      - file: specs/[feature-name]/tasks.md
+      - file: tasks.md
         from_step: tasks
-      - file: specs/[feature-name]/plan.md
+      - file: plan.md
         from_step: plan
-      - file: specs/[feature-name]/spec.md
+      - file: spec.md
         from_step: clarify
-      - file: "[docs_folder]/architecture.md"
+      - file: architecture.md
         from_step: plan
     outputs:
-      - directory: src/
+      source_files:
+        type: files
         description: "Implementation source files as specified in tasks"
-      - directory: tests/
+        required: true
+      test_files:
+        type: files
         description: "Test files as specified in tasks"
+        required: true
     dependencies:
       - tasks
     quality_criteria:
diff --git a/manual_tests/README.md b/manual_tests/README.md
deleted file mode 100644
index 30e67849..00000000
--- a/manual_tests/README.md
+++ /dev/null
@@ -1,43 +0,0 @@
-# Manual Hook/Rule Tests
-
-This directory contains files designed to test different types of DeepWork rules/hooks.
-
-## How to Run These Tests
-
-**Use the `/manual_tests` job to run these tests.**
-
-```
-/manual_tests
-```
-
-This job automates the test execution process, ensuring:
-1. All tests run in **sub-agents** (required for hooks to fire automatically)
-2. "Should NOT fire" tests run in **parallel** for efficiency
-3. "Should fire" tests run **serially** with git reverts between each to prevent cross-contamination
-4. Hooks fire **automatically** when sub-agents complete (never manually triggered)
-
-## Why Use the Job?
-
-Running these tests correctly requires specific patterns:
-- **Sub-agents are mandatory** - the main agent cannot trigger hooks by editing files directly
-- **Hooks must fire automatically** - manually running `rules_check` defeats the purpose
-- **Serial execution with reverts** - "should fire" tests must not run in parallel
-
-The `/manual_tests` job enforces all these requirements and guides you through the process.
-
-## Test Folders
-
-| Folder | Rule Type |
-|--------|-----------|
-| `test_trigger_safety_mode/` | Basic trigger/safety conditional |
-| `test_set_mode/` | Bidirectional file pairing |
-| `test_pair_mode/` | One-way directional pairing |
-| `test_command_action/` | Automatic command execution |
-| `test_multi_safety/` | Multiple safety files |
-| `test_infinite_block_prompt/` | Infinite blocking with prompt |
-| `test_infinite_block_command/` | Infinite blocking with command |
-| `test_created_mode/` | New file creation detection |
-
-## Corresponding Rules
-
-Rules are defined in `.deepwork/rules/manual-test-*.md`
diff --git a/manual_tests/test_command_action/test_command_action.txt b/manual_tests/test_command_action/test_command_action.txt
deleted file mode 100644
index f32315ab..00000000
--- a/manual_tests/test_command_action/test_command_action.txt
+++ /dev/null
@@ -1,25 +0,0 @@
-MANUAL TEST: Command Action Rule
-
-=== WHAT THIS TESTS ===
-Tests the "command action" feature where a rule automatically
-runs a shell command instead of prompting the agent.
-
-=== HOW TO TRIGGER ===
-Edit this file (add text, modify content, etc.)
-
-=== EXPECTED BEHAVIOR ===
-When this file is edited, the rule automatically runs a command
-that appends a timestamped line to test_command_action_log.txt
-
-The command is idempotent: running it multiple times produces
-consistent results (a log entry is appended).
-
-=== RULE LOCATION ===
-.deepwork/rules/manual-test-command-action.md
-
-=== LOG FILE ===
-Check test_command_action_log.txt for command execution results.
-
----
-Edit below this line to trigger the command:
----
diff --git a/manual_tests/test_command_action/test_command_action_log.txt b/manual_tests/test_command_action/test_command_action_log.txt
deleted file mode 100644
index 1ca155ed..00000000
--- a/manual_tests/test_command_action/test_command_action_log.txt
+++ /dev/null
@@ -1,3 +0,0 @@
-# Command Action Log
-# Lines below are added automatically when test_command_action.txt is edited
-# ---
diff --git a/manual_tests/test_created_mode/existing_file.yml b/manual_tests/test_created_mode/existing_file.yml
deleted file mode 100644
index dec0e532..00000000
--- a/manual_tests/test_created_mode/existing_file.yml
+++ /dev/null
@@ -1 +0,0 @@
-# This is a modification test
diff --git a/manual_tests/test_infinite_block_command/test_infinite_block_command.py b/manual_tests/test_infinite_block_command/test_infinite_block_command.py
deleted file mode 100644
index 22be16c7..00000000
--- a/manual_tests/test_infinite_block_command/test_infinite_block_command.py
+++ /dev/null
@@ -1,42 +0,0 @@
-"""
-MANUAL TEST: Infinite Block Command Rule
-
-=== WHAT THIS TESTS ===
-Tests a COMMAND-type rule with a command that ALWAYS FAILS - it will ALWAYS
-block when the trigger file is edited.
-
-This verifies:
-1. The rule correctly blocks when the file is edited (command fails)
-2. The error output includes guidance on how to skip using a promise
-3. Without guidance in the output, the agent cannot know how to proceed
-
-=== TEST CASE 1: Rule SHOULD fire (command fails, infinite block) ===
-1. Edit this file (add a comment below the marker)
-2. Run: echo '{}' | python -m deepwork.hooks.rules_check
-3. Expected: Block with command error AND promise skip instructions
-
-=== TEST CASE 2: Rule should NOT fire (promise provided) ===
-1. Edit this file (add a comment below the marker)
-2. Provide a promise (format shown in command error output)
-3. Expected: Empty JSON {} (allow) - promise bypasses the command entirely
-
-=== RULE LOCATION ===
-.deepwork/rules/manual-test-infinite-block-command.md
-
-=== KEY DIFFERENCE FROM PROMPT VERSION ===
-- Prompt version: Shows instructions in the rule's markdown body
-- Command version: Must show instructions alongside command error output
-
-If the command error output does NOT include promise skip instructions,
-this is a bug - the agent has no way to know how to proceed.
-"""
-
-
-def restricted_command_operation():
-    """An operation that requires explicit acknowledgment to proceed."""
-    return "This operation uses a command that always fails"
-
-
-# Edit below this line to trigger the rule
-# -------------------------------------------
-# Test edit for command block
diff --git a/manual_tests/test_infinite_block_prompt/test_infinite_block_prompt.py b/manual_tests/test_infinite_block_prompt/test_infinite_block_prompt.py
deleted file mode 100644
index 5c2ee508..00000000
--- a/manual_tests/test_infinite_block_prompt/test_infinite_block_prompt.py
+++ /dev/null
@@ -1,57 +0,0 @@
-"""
-MANUAL TEST: Infinite Block Prompt Rule (Promise Required)
-
-=== WHAT THIS TESTS ===
-Tests a PROMPT-type rule with NO safety file option - it will ALWAYS block
-when the trigger file is edited. The only way to proceed is to provide a
-promise in the correct format.
-
-This verifies:
-1. The rule correctly blocks when the file is edited
-2. The promise mechanism works to bypass the block
-3. The promise must be in the exact format: <promise>Rule Name</promise>
-
-=== TEST CASE 1: Rule SHOULD fire (infinite block) ===
-1. Edit this file (add a comment below the marker)
-2. Run: echo '{}' | python -m deepwork.hooks.rules_check
-3. Expected: "Manual Test: Infinite Block Prompt" appears in output with decision="block"
-4. The block message should explain that a promise is required
-
-=== TEST CASE 2: Rule should NOT fire (promise provided) ===
-1. Edit this file (add a comment below the marker)
-2. Create a transcript with: <promise>Manual Test: Infinite Block Prompt</promise>
-3. Run the hook with the transcript
-4. Expected: Empty JSON {} (allow) - promise bypasses the block
-
-=== HOW TO TEST WITH PROMISE ===
-The promise must be in the conversation transcript. To test:
-
-1. Create a temp transcript file with the promise:
-   echo '{"role":"assistant","message":{"content":[{"type":"text","text":"<promise>Manual Test: Infinite Block Prompt</promise>"}]}}' > /tmp/transcript.jsonl
-
-2. Run with transcript:
-   echo '{"transcript_path":"/tmp/transcript.jsonl"}' | python -m deepwork.hooks.rules_check
-
-3. Expected: {} (empty JSON = allow)
-
-=== RULE LOCATION ===
-.deepwork/rules/manual-test-infinite-block-prompt.md
-
-=== KEY DIFFERENCE FROM OTHER TESTS ===
-Other tests have a "safety" file that can be edited to suppress the rule.
-This test has NO safety option - the ONLY way to proceed is with a promise.
-This simulates scenarios where the agent must explicitly acknowledge a
-constraint before proceeding.
-
-=== COMPARISON WITH COMMAND VERSION ===
-See test_infinite_block_command/ for the command-action version of this test.
-"""
-
-
-def restricted_operation():
-    """An operation that requires explicit acknowledgment to proceed."""
-    return "This operation always requires a promise to proceed"
-
-
-# Edit below this line to trigger the rule
-# -------------------------------------------
diff --git a/manual_tests/test_multi_safety/test_multi_safety.py b/manual_tests/test_multi_safety/test_multi_safety.py
deleted file mode 100644
index 27734025..00000000
--- a/manual_tests/test_multi_safety/test_multi_safety.py
+++ /dev/null
@@ -1,42 +0,0 @@
-"""
-MANUAL TEST: Multiple Safety Patterns
-
-=== WHAT THIS TESTS ===
-Tests trigger/safety mode with MULTIPLE safety patterns:
-- Rule fires when this file is edited alone
-- Rule is suppressed if ANY of the safety files are also edited:
-  - test_multi_safety_changelog.md
-  - test_multi_safety_version.txt
-
-=== TEST CASE 1: Rule SHOULD fire ===
-1. Edit this file (add a comment below the marker)
-2. Do NOT edit any safety files
-3. Run: echo '{}' | python -m deepwork.hooks.rules_check
-4. Expected: "Manual Test: Multi Safety" appears in output
-
-=== TEST CASE 2: Rule should NOT fire (changelog edited) ===
-1. Edit this file (add a comment below the marker)
-2. ALSO edit test_multi_safety_changelog.md
-3. Run: echo '{}' | python -m deepwork.hooks.rules_check
-4. Expected: "Manual Test: Multi Safety" does NOT appear
-
-=== TEST CASE 3: Rule should NOT fire (version edited) ===
-1. Edit this file (add a comment below the marker)
-2. ALSO edit test_multi_safety_version.txt
-3. Run: echo '{}' | python -m deepwork.hooks.rules_check
-4. Expected: "Manual Test: Multi Safety" does NOT appear
-
-=== RULE LOCATION ===
-.deepwork/rules/manual-test-multi-safety.md
-"""
-
-VERSION = "1.0.0"
-
-
-def get_version():
-    """Return the current version."""
-    return VERSION
-
-
-# Edit below this line to trigger the rule
-# -------------------------------------------
diff --git a/manual_tests/test_multi_safety/test_multi_safety_changelog.md b/manual_tests/test_multi_safety/test_multi_safety_changelog.md
deleted file mode 100644
index d0a6e4f9..00000000
--- a/manual_tests/test_multi_safety/test_multi_safety_changelog.md
+++ /dev/null
@@ -1,16 +0,0 @@
-# Changelog (Multi-Safety Test)
-
-## What This File Does
-
-This is one of the "safety" files for the multi-safety test.
-Editing this file suppresses the rule when the source is edited.
-
-## Changelog
-
-### v1.0.0
-- Initial release
-
----
-
-Edit below this line to suppress the multi-safety rule:
-<!-- Changes here -->
diff --git a/manual_tests/test_multi_safety/test_multi_safety_version.txt b/manual_tests/test_multi_safety/test_multi_safety_version.txt
deleted file mode 100644
index b9cf607d..00000000
--- a/manual_tests/test_multi_safety/test_multi_safety_version.txt
+++ /dev/null
@@ -1,10 +0,0 @@
-Multi-Safety Version File
-
-This is one of the "safety" files for the multi-safety test.
-Editing this file suppresses the rule when the source is edited.
-
-Current Version: 1.0.0
-
----
-Edit below this line to suppress the multi-safety rule:
----
diff --git a/manual_tests/test_pair_mode/test_pair_mode_expected.md b/manual_tests/test_pair_mode/test_pair_mode_expected.md
deleted file mode 100644
index b4f286bd..00000000
--- a/manual_tests/test_pair_mode/test_pair_mode_expected.md
+++ /dev/null
@@ -1,31 +0,0 @@
-# API Documentation (Pair Mode Expected File)
-
-## What This File Does
-
-This is the "expected" file in a pair mode rule.
-
-## Pair Mode Behavior
-
-- When `test_pair_mode_trigger.py` changes, this file MUST also change
-- When THIS file changes alone, NO rule fires (docs can update independently)
-
-## API Reference
-
-### `api_endpoint()`
-
-Returns a status response.
-
-**Returns:** `{"status": "ok", "message": "API response"}`
-
----
-
-## Testing Instructions
-
-1. To TRIGGER the rule: Edit only `test_pair_mode_trigger.py`
-2. To verify ONE-WAY: Edit only this file (rule should NOT fire)
-3. To SATISFY the rule: Edit both files together
-
----
-
-Edit below this line (editing here alone should NOT trigger the rule):
-<!-- Changes here -->
diff --git a/manual_tests/test_pair_mode/test_pair_mode_trigger.py b/manual_tests/test_pair_mode/test_pair_mode_trigger.py
deleted file mode 100644
index 369dd18a..00000000
--- a/manual_tests/test_pair_mode/test_pair_mode_trigger.py
+++ /dev/null
@@ -1,47 +0,0 @@
-"""
-MANUAL TEST: Pair Mode (Directional Correspondence)
-
-=== WHAT THIS TESTS ===
-Tests the "pair" detection mode where there's a ONE-WAY relationship:
-- This file is the TRIGGER
-- test_pair_mode_expected.md is the EXPECTED file
-- When THIS file changes, the expected file MUST also change
-- But the expected file CAN change independently (no rule fires)
-
-=== TEST CASE 1: Rule SHOULD fire ===
-1. Edit this file (add a comment below the marker)
-2. Do NOT edit test_pair_mode_expected.md
-3. Run: echo '{}' | python -m deepwork.hooks.rules_check
-4. Expected: "Manual Test: Pair Mode" appears in output
-
-=== TEST CASE 2: Rule should NOT fire (both edited) ===
-1. Edit this file (add a comment below the marker)
-2. ALSO edit test_pair_mode_expected.md
-3. Run: echo '{}' | python -m deepwork.hooks.rules_check
-4. Expected: "Manual Test: Pair Mode" does NOT appear
-
-=== TEST CASE 3: Rule should NOT fire (expected only) ===
-1. Do NOT edit this file
-2. Edit ONLY test_pair_mode_expected.md
-3. Run: echo '{}' | python -m deepwork.hooks.rules_check
-4. Expected: "Manual Test: Pair Mode" does NOT appear
-   (This verifies the ONE-WAY nature of pair mode)
-
-=== RULE LOCATION ===
-.deepwork/rules/manual-test-pair-mode.md
-"""
-
-
-def api_endpoint():
-    """
-    An API endpoint that requires documentation.
-
-    This simulates an API file where changes require
-    documentation updates, but docs can be updated
-    independently (for typos, clarifications, etc.)
-    """
-    return {"status": "ok", "message": "API response"}
-
-
-# Edit below this line to trigger the rule
-# -------------------------------------------
diff --git a/manual_tests/test_set_mode/test_set_mode_source.py b/manual_tests/test_set_mode/test_set_mode_source.py
deleted file mode 100644
index 6649e424..00000000
--- a/manual_tests/test_set_mode/test_set_mode_source.py
+++ /dev/null
@@ -1,40 +0,0 @@
-"""
-MANUAL TEST: Set Mode (Bidirectional Correspondence)
-
-=== WHAT THIS TESTS ===
-Tests the "set" detection mode where files must change together:
-- This source file and test_set_mode_test.py are in a "set"
-- If EITHER file changes, the OTHER must also change
-- This is BIDIRECTIONAL (works in both directions)
-
-=== TEST CASE 1: Rule SHOULD fire ===
-1. Edit this file (add a comment below the marker)
-2. Do NOT edit test_set_mode_test.py
-3. Run: echo '{}' | python -m deepwork.hooks.rules_check
-4. Expected: "Manual Test: Set Mode" appears in output
-
-=== TEST CASE 2: Rule should NOT fire ===
-1. Edit this file (add a comment below the marker)
-2. ALSO edit test_set_mode_test.py
-3. Run: echo '{}' | python -m deepwork.hooks.rules_check
-4. Expected: "Manual Test: Set Mode" does NOT appear
-
-=== RULE LOCATION ===
-.deepwork/rules/manual-test-set-mode.md
-"""
-
-
-class Calculator:
-    """A simple calculator for testing set mode."""
-
-    def add(self, a: int, b: int) -> int:
-        """Add two numbers."""
-        return a + b
-
-    def subtract(self, a: int, b: int) -> int:
-        """Subtract b from a."""
-        return a - b
-
-
-# Edit below this line to trigger the rule
-# -------------------------------------------
diff --git a/manual_tests/test_set_mode/test_set_mode_test.py b/manual_tests/test_set_mode/test_set_mode_test.py
deleted file mode 100644
index 3ef349e4..00000000
--- a/manual_tests/test_set_mode/test_set_mode_test.py
+++ /dev/null
@@ -1,37 +0,0 @@
-"""
-MANUAL TEST: Set Mode - Test File (Bidirectional Correspondence)
-
-=== WHAT THIS TESTS ===
-This is the TEST file for the set mode test.
-It must change together with test_set_mode_source.py.
-
-=== HOW TO TRIGGER ===
-Option A: Edit this file alone (without test_set_mode_source.py)
-Option B: Edit test_set_mode_source.py alone (without this file)
-
-=== EXPECTED BEHAVIOR ===
-- Edit this file alone -> Rule fires, expects source file to also change
-- Edit source file alone -> Rule fires, expects this file to also change
-- Edit BOTH files -> Rule is satisfied (no fire)
-
-=== RULE LOCATION ===
-.deepwork/rules/manual-test-set-mode.md
-"""
-
-from test_set_mode_source import Calculator
-
-
-def test_add():
-    """Test the add method."""
-    calc = Calculator()
-    assert calc.add(2, 3) == 5
-
-
-def test_subtract():
-    """Test the subtract method."""
-    calc = Calculator()
-    assert calc.subtract(5, 3) == 2
-
-
-# Edit below this line to trigger the rule
-# -------------------------------------------
diff --git a/manual_tests/test_trigger_safety_mode/test_trigger_safety_mode.py b/manual_tests/test_trigger_safety_mode/test_trigger_safety_mode.py
deleted file mode 100644
index 68bf59b0..00000000
--- a/manual_tests/test_trigger_safety_mode/test_trigger_safety_mode.py
+++ /dev/null
@@ -1,32 +0,0 @@
-"""
-MANUAL TEST: Trigger/Safety Mode Rule
-
-=== WHAT THIS TESTS ===
-Tests the basic trigger/safety detection mode where:
-- Rule FIRES when this file is edited alone
-- Rule is SUPPRESSED when test_trigger_safety_mode_doc.md is also edited
-
-=== TEST CASE 1: Rule SHOULD fire ===
-1. Edit this file (add a comment below the marker)
-2. Do NOT edit test_trigger_safety_mode_doc.md
-3. Run: echo '{}' | python -m deepwork.hooks.rules_check
-4. Expected: "Manual Test: Trigger Safety" appears in output
-
-=== TEST CASE 2: Rule should NOT fire ===
-1. Edit this file (add a comment below the marker)
-2. ALSO edit test_trigger_safety_mode_doc.md
-3. Run: echo '{}' | python -m deepwork.hooks.rules_check
-4. Expected: "Manual Test: Trigger Safety" does NOT appear
-
-=== RULE LOCATION ===
-.deepwork/rules/manual-test-trigger-safety.md
-"""
-
-
-def example_function():
-    """An example function to demonstrate the trigger."""
-    return "Hello from trigger safety test"
-
-
-# Edit below this line to trigger the rule
-# -------------------------------------------
diff --git a/manual_tests/test_trigger_safety_mode/test_trigger_safety_mode_doc.md b/manual_tests/test_trigger_safety_mode/test_trigger_safety_mode_doc.md
deleted file mode 100644
index 625cf0b5..00000000
--- a/manual_tests/test_trigger_safety_mode/test_trigger_safety_mode_doc.md
+++ /dev/null
@@ -1,20 +0,0 @@
-# Documentation for Trigger Safety Test
-
-## What This File Does
-
-This is the "safety" file for the trigger/safety mode test.
-
-## How It Works
-
-When this file is edited ALONGSIDE `test_trigger_safety_mode.py`,
-the trigger/safety rule is suppressed (does not fire).
-
-## Testing
-
-1. To TRIGGER the rule: Edit only `test_trigger_safety_mode.py`
-2. To SUPPRESS the rule: Edit both files together
-
----
-
-Edit below this line to suppress the trigger/safety rule:
-<!-- Changes here -->
diff --git a/nix/claude-code/package-lock.json b/nix/claude-code/package-lock.json
deleted file mode 100644
index f9766e4c..00000000
--- a/nix/claude-code/package-lock.json
+++ /dev/null
@@ -1,314 +0,0 @@
-{
-  "name": "@anthropic-ai/claude-code",
-  "version": "2.1.15",
-  "lockfileVersion": 3,
-  "requires": true,
-  "packages": {
-    "": {
-      "name": "@anthropic-ai/claude-code",
-      "version": "2.1.15",
-      "license": "SEE LICENSE IN README.md",
-      "bin": {
-        "claude": "cli.js"
-      },
-      "engines": {
-        "node": ">=18.0.0"
-      },
-      "optionalDependencies": {
-        "@img/sharp-darwin-arm64": "^0.33.5",
-        "@img/sharp-darwin-x64": "^0.33.5",
-        "@img/sharp-linux-arm": "^0.33.5",
-        "@img/sharp-linux-arm64": "^0.33.5",
-        "@img/sharp-linux-x64": "^0.33.5",
-        "@img/sharp-linuxmusl-arm64": "^0.33.5",
-        "@img/sharp-linuxmusl-x64": "^0.33.5",
-        "@img/sharp-win32-x64": "^0.33.5"
-      }
-    },
-    "node_modules/@img/sharp-darwin-arm64": {
-      "version": "0.33.5",
-      "resolved": "https://registry.npmjs.org/@img/sharp-darwin-arm64/-/sharp-darwin-arm64-0.33.5.tgz",
-      "integrity": "sha512-UT4p+iz/2H4twwAoLCqfA9UH5pI6DggwKEGuaPy7nCVQ8ZsiY5PIcrRvD1DzuY3qYL07NtIQcWnBSY/heikIFQ==",
-      "cpu": [
-        "arm64"
-      ],
-      "license": "Apache-2.0",
-      "optional": true,
-      "os": [
-        "darwin"
-      ],
-      "engines": {
-        "node": "^18.17.0 || ^20.3.0 || >=21.0.0"
-      },
-      "funding": {
-        "url": "https://opencollective.com/libvips"
-      },
-      "optionalDependencies": {
-        "@img/sharp-libvips-darwin-arm64": "1.0.4"
-      }
-    },
-    "node_modules/@img/sharp-darwin-x64": {
-      "version": "0.33.5",
-      "resolved": "https://registry.npmjs.org/@img/sharp-darwin-x64/-/sharp-darwin-x64-0.33.5.tgz",
-      "integrity": "sha512-fyHac4jIc1ANYGRDxtiqelIbdWkIuQaI84Mv45KvGRRxSAa7o7d1ZKAOBaYbnepLC1WqxfpimdeWfvqqSGwR2Q==",
-      "cpu": [
-        "x64"
-      ],
-      "license": "Apache-2.0",
-      "optional": true,
-      "os": [
-        "darwin"
-      ],
-      "engines": {
-        "node": "^18.17.0 || ^20.3.0 || >=21.0.0"
-      },
-      "funding": {
-        "url": "https://opencollective.com/libvips"
-      },
-      "optionalDependencies": {
-        "@img/sharp-libvips-darwin-x64": "1.0.4"
-      }
-    },
-    "node_modules/@img/sharp-libvips-darwin-arm64": {
-      "version": "1.0.4",
-      "resolved": "https://registry.npmjs.org/@img/sharp-libvips-darwin-arm64/-/sharp-libvips-darwin-arm64-1.0.4.tgz",
-      "integrity": "sha512-XblONe153h0O2zuFfTAbQYAX2JhYmDHeWikp1LM9Hul9gVPjFY427k6dFEcOL72O01QxQsWi761svJ/ev9xEDg==",
-      "cpu": [
-        "arm64"
-      ],
-      "license": "LGPL-3.0-or-later",
-      "optional": true,
-      "os": [
-        "darwin"
-      ],
-      "funding": {
-        "url": "https://opencollective.com/libvips"
-      }
-    },
-    "node_modules/@img/sharp-libvips-darwin-x64": {
-      "version": "1.0.4",
-      "resolved": "https://registry.npmjs.org/@img/sharp-libvips-darwin-x64/-/sharp-libvips-darwin-x64-1.0.4.tgz",
-      "integrity": "sha512-xnGR8YuZYfJGmWPvmlunFaWJsb9T/AO2ykoP3Fz/0X5XV2aoYBPkX6xqCQvUTKKiLddarLaxpzNe+b1hjeWHAQ==",
-      "cpu": [
-        "x64"
-      ],
-      "license": "LGPL-3.0-or-later",
-      "optional": true,
-      "os": [
-        "darwin"
-      ],
-      "funding": {
-        "url": "https://opencollective.com/libvips"
-      }
-    },
-    "node_modules/@img/sharp-libvips-linux-arm": {
-      "version": "1.0.5",
-      "resolved": "https://registry.npmjs.org/@img/sharp-libvips-linux-arm/-/sharp-libvips-linux-arm-1.0.5.tgz",
-      "integrity": "sha512-gvcC4ACAOPRNATg/ov8/MnbxFDJqf/pDePbBnuBDcjsI8PssmjoKMAz4LtLaVi+OnSb5FK/yIOamqDwGmXW32g==",
-      "cpu": [
-        "arm"
-      ],
-      "license": "LGPL-3.0-or-later",
-      "optional": true,
-      "os": [
-        "linux"
-      ],
-      "funding": {
-        "url": "https://opencollective.com/libvips"
-      }
-    },
-    "node_modules/@img/sharp-libvips-linux-arm64": {
-      "version": "1.0.4",
-      "resolved": "https://registry.npmjs.org/@img/sharp-libvips-linux-arm64/-/sharp-libvips-linux-arm64-1.0.4.tgz",
-      "integrity": "sha512-9B+taZ8DlyyqzZQnoeIvDVR/2F4EbMepXMc/NdVbkzsJbzkUjhXv/70GQJ7tdLA4YJgNP25zukcxpX2/SueNrA==",
-      "cpu": [
-        "arm64"
-      ],
-      "license": "LGPL-3.0-or-later",
-      "optional": true,
-      "os": [
-        "linux"
-      ],
-      "funding": {
-        "url": "https://opencollective.com/libvips"
-      }
-    },
-    "node_modules/@img/sharp-libvips-linux-x64": {
-      "version": "1.0.4",
-      "resolved": "https://registry.npmjs.org/@img/sharp-libvips-linux-x64/-/sharp-libvips-linux-x64-1.0.4.tgz",
-      "integrity": "sha512-MmWmQ3iPFZr0Iev+BAgVMb3ZyC4KeFc3jFxnNbEPas60e1cIfevbtuyf9nDGIzOaW9PdnDciJm+wFFaTlj5xYw==",
-      "cpu": [
-        "x64"
-      ],
-      "license": "LGPL-3.0-or-later",
-      "optional": true,
-      "os": [
-        "linux"
-      ],
-      "funding": {
-        "url": "https://opencollective.com/libvips"
-      }
-    },
-    "node_modules/@img/sharp-libvips-linuxmusl-arm64": {
-      "version": "1.0.4",
-      "resolved": "https://registry.npmjs.org/@img/sharp-libvips-linuxmusl-arm64/-/sharp-libvips-linuxmusl-arm64-1.0.4.tgz",
-      "integrity": "sha512-9Ti+BbTYDcsbp4wfYib8Ctm1ilkugkA/uscUn6UXK1ldpC1JjiXbLfFZtRlBhjPZ5o1NCLiDbg8fhUPKStHoTA==",
-      "cpu": [
-        "arm64"
-      ],
-      "license": "LGPL-3.0-or-later",
-      "optional": true,
-      "os": [
-        "linux"
-      ],
-      "funding": {
-        "url": "https://opencollective.com/libvips"
-      }
-    },
-    "node_modules/@img/sharp-libvips-linuxmusl-x64": {
-      "version": "1.0.4",
-      "resolved": "https://registry.npmjs.org/@img/sharp-libvips-linuxmusl-x64/-/sharp-libvips-linuxmusl-x64-1.0.4.tgz",
-      "integrity": "sha512-viYN1KX9m+/hGkJtvYYp+CCLgnJXwiQB39damAO7WMdKWlIhmYTfHjwSbQeUK/20vY154mwezd9HflVFM1wVSw==",
-      "cpu": [
-        "x64"
-      ],
-      "license": "LGPL-3.0-or-later",
-      "optional": true,
-      "os": [
-        "linux"
-      ],
-      "funding": {
-        "url": "https://opencollective.com/libvips"
-      }
-    },
-    "node_modules/@img/sharp-linux-arm": {
-      "version": "0.33.5",
-      "resolved": "https://registry.npmjs.org/@img/sharp-linux-arm/-/sharp-linux-arm-0.33.5.tgz",
-      "integrity": "sha512-JTS1eldqZbJxjvKaAkxhZmBqPRGmxgu+qFKSInv8moZ2AmT5Yib3EQ1c6gp493HvrvV8QgdOXdyaIBrhvFhBMQ==",
-      "cpu": [
-        "arm"
-      ],
-      "license": "Apache-2.0",
-      "optional": true,
-      "os": [
-        "linux"
-      ],
-      "engines": {
-        "node": "^18.17.0 || ^20.3.0 || >=21.0.0"
-      },
-      "funding": {
-        "url": "https://opencollective.com/libvips"
-      },
-      "optionalDependencies": {
-        "@img/sharp-libvips-linux-arm": "1.0.5"
-      }
-    },
-    "node_modules/@img/sharp-linux-arm64": {
-      "version": "0.33.5",
-      "resolved": "https://registry.npmjs.org/@img/sharp-linux-arm64/-/sharp-linux-arm64-0.33.5.tgz",
-      "integrity": "sha512-JMVv+AMRyGOHtO1RFBiJy/MBsgz0x4AWrT6QoEVVTyh1E39TrCUpTRI7mx9VksGX4awWASxqCYLCV4wBZHAYxA==",
-      "cpu": [
-        "arm64"
-      ],
-      "license": "Apache-2.0",
-      "optional": true,
-      "os": [
-        "linux"
-      ],
-      "engines": {
-        "node": "^18.17.0 || ^20.3.0 || >=21.0.0"
-      },
-      "funding": {
-        "url": "https://opencollective.com/libvips"
-      },
-      "optionalDependencies": {
-        "@img/sharp-libvips-linux-arm64": "1.0.4"
-      }
-    },
-    "node_modules/@img/sharp-linux-x64": {
-      "version": "0.33.5",
-      "resolved": "https://registry.npmjs.org/@img/sharp-linux-x64/-/sharp-linux-x64-0.33.5.tgz",
-      "integrity": "sha512-opC+Ok5pRNAzuvq1AG0ar+1owsu842/Ab+4qvU879ippJBHvyY5n2mxF1izXqkPYlGuP/M556uh53jRLJmzTWA==",
-      "cpu": [
-        "x64"
-      ],
-      "license": "Apache-2.0",
-      "optional": true,
-      "os": [
-        "linux"
-      ],
-      "engines": {
-        "node": "^18.17.0 || ^20.3.0 || >=21.0.0"
-      },
-      "funding": {
-        "url": "https://opencollective.com/libvips"
-      },
-      "optionalDependencies": {
-        "@img/sharp-libvips-linux-x64": "1.0.4"
-      }
-    },
-    "node_modules/@img/sharp-linuxmusl-arm64": {
-      "version": "0.33.5",
-      "resolved": "https://registry.npmjs.org/@img/sharp-linuxmusl-arm64/-/sharp-linuxmusl-arm64-0.33.5.tgz",
-      "integrity": "sha512-XrHMZwGQGvJg2V/oRSUfSAfjfPxO+4DkiRh6p2AFjLQztWUuY/o8Mq0eMQVIY7HJ1CDQUJlxGGZRw1a5bqmd1g==",
-      "cpu": [
-        "arm64"
-      ],
-      "license": "Apache-2.0",
-      "optional": true,
-      "os": [
-        "linux"
-      ],
-      "engines": {
-        "node": "^18.17.0 || ^20.3.0 || >=21.0.0"
-      },
-      "funding": {
-        "url": "https://opencollective.com/libvips"
-      },
-      "optionalDependencies": {
-        "@img/sharp-libvips-linuxmusl-arm64": "1.0.4"
-      }
-    },
-    "node_modules/@img/sharp-linuxmusl-x64": {
-      "version": "0.33.5",
-      "resolved": "https://registry.npmjs.org/@img/sharp-linuxmusl-x64/-/sharp-linuxmusl-x64-0.33.5.tgz",
-      "integrity": "sha512-WT+d/cgqKkkKySYmqoZ8y3pxx7lx9vVejxW/W4DOFMYVSkErR+w7mf2u8m/y4+xHe7yY9DAXQMWQhpnMuFfScw==",
-      "cpu": [
-        "x64"
-      ],
-      "license": "Apache-2.0",
-      "optional": true,
-      "os": [
-        "linux"
-      ],
-      "engines": {
-        "node": "^18.17.0 || ^20.3.0 || >=21.0.0"
-      },
-      "funding": {
-        "url": "https://opencollective.com/libvips"
-      },
-      "optionalDependencies": {
-        "@img/sharp-libvips-linuxmusl-x64": "1.0.4"
-      }
-    },
-    "node_modules/@img/sharp-win32-x64": {
-      "version": "0.33.5",
-      "resolved": "https://registry.npmjs.org/@img/sharp-win32-x64/-/sharp-win32-x64-0.33.5.tgz",
-      "integrity": "sha512-MpY/o8/8kj+EcnxwvrP4aTJSWw/aZ7JIGR4aBeZkZw5B7/Jn+tY9/VNwtcoGmdT7GfggGIU4kygOMSbYnOrAbg==",
-      "cpu": [
-        "x64"
-      ],
-      "license": "Apache-2.0 AND LGPL-3.0-or-later",
-      "optional": true,
-      "os": [
-        "win32"
-      ],
-      "engines": {
-        "node": "^18.17.0 || ^20.3.0 || >=21.0.0"
-      },
-      "funding": {
-        "url": "https://opencollective.com/libvips"
-      }
-    }
-  }
-}
diff --git a/nix/claude-code/package.nix b/nix/claude-code/package.nix
deleted file mode 100644
index 053d1204..00000000
--- a/nix/claude-code/package.nix
+++ /dev/null
@@ -1,78 +0,0 @@
-# Claude Code package - locally maintained for version control
-# Based on nixpkgs: https://github.com/NixOS/nixpkgs/tree/master/pkgs/by-name/cl/claude-code
-#
-# To update: Run ./update.sh from this directory
-{
-  lib,
-  stdenv,
-  buildNpmPackage,
-  fetchzip,
-  versionCheckHook,
-  writableTmpDirAsHomeHook,
-  bubblewrap,
-  procps,
-  socat,
-}:
-buildNpmPackage (finalAttrs: {
-  pname = "claude-code";
-  version = "2.1.15";
-
-  src = fetchzip {
-    url = "https://registry.npmjs.org/@anthropic-ai/claude-code/-/claude-code-${finalAttrs.version}.tgz";
-    hash = "sha256-3zhjeAwKj1fMLuriX1qpVA8zaCk1oekJ1UmeEdDx4Xg=";
-  };
-
-  npmDepsHash = "sha256-K5re0co3Tkz5peXHe/UUlsqAWq4YzSULdY9+xncfL5A=";
-
-  strictDeps = true;
-
-  postPatch = ''
-    cp ${./package-lock.json} package-lock.json
-
-    # Replace hardcoded `/bin/bash` with `/usr/bin/env bash` for Nix compatibility
-    # https://github.com/anthropics/claude-code/issues/15195
-    substituteInPlace cli.js \
-      --replace-warn '#!/bin/bash' '#!/usr/bin/env bash'
-  '';
-
-  dontNpmBuild = true;
-
-  env.AUTHORIZED = "1";
-
-  # `claude-code` tries to auto-update by default, this disables that functionality.
-  # https://docs.anthropic.com/en/docs/agents-and-tools/claude-code/overview#environment-variables
-  # The DEV=true env var causes claude to crash with `TypeError: window.WebSocket is not a constructor`
-  postInstall = ''
-    wrapProgram $out/bin/claude \
-      --set DISABLE_AUTOUPDATER 1 \
-      --unset DEV \
-      --prefix PATH : ${
-        lib.makeBinPath (
-          [
-            # claude-code uses [node-tree-kill](https://github.com/pkrumins/node-tree-kill) which requires procps's pgrep(darwin) or ps(linux)
-            procps
-          ]
-          # the following packages are required for the sandbox to work (Linux only)
-          ++ lib.optionals stdenv.hostPlatform.isLinux [
-            bubblewrap
-            socat
-          ]
-        )
-      }
-  '';
-
-  doInstallCheck = true;
-  nativeInstallCheckInputs = [
-    writableTmpDirAsHomeHook
-    versionCheckHook
-  ];
-  versionCheckKeepEnvironment = [ "HOME" ];
-
-  meta = {
-    description = "Agentic coding tool that lives in your terminal, understands your codebase, and helps you code faster";
-    homepage = "https://github.com/anthropics/claude-code";
-    downloadPage = "https://www.npmjs.com/package/@anthropic-ai/claude-code";
-    license = lib.licenses.unfree;
-    mainProgram = "claude";
-  };
-})
diff --git a/nix/claude-code/update.sh b/nix/claude-code/update.sh
deleted file mode 100755
index cfc648bc..00000000
--- a/nix/claude-code/update.sh
+++ /dev/null
@@ -1,49 +0,0 @@
-#!/usr/bin/env bash
-# Update claude-code package to latest npm version
-set -euo pipefail
-
-SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
-cd "$SCRIPT_DIR"
-
-# Get versions
-OLD_VERSION=$(grep 'version = "' package.nix | head -1 | sed 's/.*version = "\([^"]*\)".*/\1/')
-VERSION=$(npm view @anthropic-ai/claude-code version 2>/dev/null)
-
-if [[ "$VERSION" == "$OLD_VERSION" ]]; then
-    echo "Already at latest version: $OLD_VERSION"
-    exit 0
-fi
-
-echo "Updating claude-code: $OLD_VERSION -> $VERSION"
-
-# Download tarball
-TARBALL_URL="https://registry.npmjs.org/@anthropic-ai/claude-code/-/claude-code-${VERSION}.tgz"
-TMPDIR=$(mktemp -d)
-trap "rm -rf $TMPDIR" EXIT
-
-curl -sL "$TARBALL_URL" -o "$TMPDIR/claude-code.tgz"
-
-# Extract and compute source hash
-mkdir -p "$TMPDIR/src"
-tar -xzf "$TMPDIR/claude-code.tgz" -C "$TMPDIR/src" --strip-components=1
-SRC_HASH=$(nix hash path "$TMPDIR/src")
-
-# Get package-lock.json from tarball
-if [[ -f "$TMPDIR/src/package-lock.json" ]]; then
-    cp "$TMPDIR/src/package-lock.json" package-lock.json
-else
-    echo "Error: No package-lock.json in tarball"
-    exit 1
-fi
-
-# Compute npmDepsHash using prefetch-npm-deps
-NPM_DEPS_HASH=$(nix shell nixpkgs#prefetch-npm-deps -c prefetch-npm-deps package-lock.json 2>/dev/null)
-
-# Update package.nix
-sed -i "s/version = \"[^\"]*\"/version = \"$VERSION\"/" package.nix
-sed -i "s|hash = \"sha256-[^\"]*\"|hash = \"$SRC_HASH\"|" package.nix
-sed -i "s|npmDepsHash = \"sha256-[^\"]*\"|npmDepsHash = \"$NPM_DEPS_HASH\"|" package.nix
-
-echo "Updated to version $VERSION"
-echo "  Source hash: $SRC_HASH"
-echo "  Deps hash: $NPM_DEPS_HASH"
diff --git a/nix/update b/nix/update
deleted file mode 100755
index 95057b45..00000000
--- a/nix/update
+++ /dev/null
@@ -1,18 +0,0 @@
-#!/usr/bin/env bash
-# Update all Nix dependencies (claude-code package and flake inputs)
-set -euo pipefail
-
-SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
-REPO_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"
-
-cd "$REPO_ROOT"
-
-echo "Updating claude-code package..."
-"$SCRIPT_DIR/claude-code/update.sh"
-
-echo ""
-echo "Updating flake inputs..."
-nix flake update
-
-echo ""
-echo "Done! Run 'nix develop' to reload the environment."
diff --git a/pyproject.toml b/pyproject.toml
index c2bc3e4a..4070c134 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "deepwork"
-version = "0.5.1"
+version = "0.7.0"
 description = "Framework for enabling AI agents to perform complex, multi-step work tasks"
 readme = "README.md"
 requires-python = ">=3.11"
@@ -26,6 +26,10 @@ dependencies = [
     "click>=8.1.0",
     "rich>=13.0.0",
     "jsonschema>=4.17.0",
+    "fastmcp>=2.0",
+    "pydantic>=2.0",
+    "mcp>=1.0.0",
+    "aiofiles>=24.0.0",
 ]
 
 [project.optional-dependencies]
@@ -33,9 +37,11 @@ dev = [
     "pytest>=7.0",
     "pytest-mock>=3.10",
     "pytest-cov>=4.0",
+    "pytest-asyncio>=0.24.0",
     "ruff>=0.1.0",
     "mypy>=1.0",
     "types-PyYAML",
+    "types-aiofiles",
 ]
 
 [project.scripts]
@@ -48,7 +54,7 @@ Repository = "https://github.com/deepwork/deepwork"
 Issues = "https://github.com/deepwork/deepwork/issues"
 
 [build-system]
-requires = ["hatchling"]
+requires = ["hatchling", "editables"]
 build-backend = "hatchling.build"
 
 [tool.hatch.build.targets.wheel]
@@ -65,6 +71,8 @@ addopts = [
     "--strict-config",
     "--showlocals",
 ]
+asyncio_mode = "auto"
+asyncio_default_fixture_loop_scope = "function"
 
 [tool.coverage.run]
 source = ["deepwork"]
@@ -108,3 +116,12 @@ warn_redundant_casts = true
 warn_unused_ignores = true
 warn_no_return = true
 strict_equality = true
+
+[dependency-groups]
+dev = [
+    "fpdf2>=2.8.5",
+    "pytest>=9.0.2",
+    "pytest-asyncio>=1.3.0",
+    "pytest-cov>=7.0.0",
+    "pytest-mock>=3.15.1",
+]
diff --git a/src/deepwork/__init__.py b/src/deepwork/__init__.py
index 38f6acdf..0c85557f 100644
--- a/src/deepwork/__init__.py
+++ b/src/deepwork/__init__.py
@@ -1,6 +1,6 @@
 """DeepWork - Framework for enabling AI agents to perform complex, multi-step work tasks."""
 
-__version__ = "0.1.0"
+__version__ = "0.7.0"
 __author__ = "DeepWork Contributors"
 
 __all__ = [
diff --git a/src/deepwork/cli/hook.py b/src/deepwork/cli/hook.py
index 5182b20a..3e921941 100644
--- a/src/deepwork/cli/hook.py
+++ b/src/deepwork/cli/hook.py
@@ -5,7 +5,6 @@
 deepwork was installed (flake, pipx, uv, etc.).
 
 Usage:
-    deepwork hook rules_check
     deepwork hook <hook_name>
 
 This is meant to be called from hook wrapper scripts (claude_hook.sh, gemini_hook.sh).
@@ -32,14 +31,14 @@ def hook(hook_name: str) -> None:
     """
     Run a DeepWork hook by name.
 
-    HOOK_NAME: Name of the hook to run (e.g., 'rules_check')
+    HOOK_NAME: Name of the hook to run (e.g., 'check_version')
 
     This command imports and runs the hook module from deepwork.hooks.{hook_name}.
     The hook receives stdin input and outputs to stdout, following the hook protocol.
 
     Examples:
-        deepwork hook rules_check
-        echo '{}' | deepwork hook rules_check
+        deepwork hook check_version
+        echo '{}' | deepwork hook my_hook
     """
     try:
         # Import the hook module
diff --git a/src/deepwork/cli/install.py b/src/deepwork/cli/install.py
index 19bec4f8..2dfc980d 100644
--- a/src/deepwork/cli/install.py
+++ b/src/deepwork/cli/install.py
@@ -21,6 +21,39 @@ class InstallError(Exception):
     pass
 
 
+def _install_schemas(schemas_dir: Path, project_path: Path) -> None:
+    """
+    Install JSON schemas to the project's .deepwork/schemas directory.
+
+    Args:
+        schemas_dir: Path to .deepwork/schemas directory
+        project_path: Path to project root (for relative path display)
+
+    Raises:
+        InstallError: If installation fails
+    """
+    # Find the source schemas directory
+    source_schemas_dir = Path(__file__).parent.parent / "schemas"
+
+    if not source_schemas_dir.exists():
+        raise InstallError(
+            f"Schemas directory not found at {source_schemas_dir}. "
+            "DeepWork installation may be corrupted."
+        )
+
+    # Copy JSON schema files
+    try:
+        for schema_file in source_schemas_dir.glob("*.json"):
+            target_file = schemas_dir / schema_file.name
+            shutil.copy(schema_file, target_file)
+            fix_permissions(target_file)
+            console.print(
+                f"  [green]✓[/green] Installed schema {schema_file.name} ({target_file.relative_to(project_path)})"
+            )
+    except Exception as e:
+        raise InstallError(f"Failed to install schemas: {e}") from e
+
+
 def _inject_standard_job(job_name: str, jobs_dir: Path, project_path: Path) -> None:
     """
     Inject a standard job definition into the project.
@@ -88,20 +121,6 @@ def _inject_deepwork_jobs(jobs_dir: Path, project_path: Path) -> None:
     _inject_standard_job("deepwork_jobs", jobs_dir, project_path)
 
 
-def _inject_deepwork_rules(jobs_dir: Path, project_path: Path) -> None:
-    """
-    Inject the deepwork_rules job definition into the project.
-
-    Args:
-        jobs_dir: Path to .deepwork/jobs directory
-        project_path: Path to project root (for relative path display)
-
-    Raises:
-        InstallError: If injection fails
-    """
-    _inject_standard_job("deepwork_rules", jobs_dir, project_path)
-
-
 def _create_deepwork_gitignore(deepwork_dir: Path) -> None:
     """
     Create .gitignore file in .deepwork/ directory.
@@ -127,6 +146,27 @@ def _create_deepwork_gitignore(deepwork_dir: Path) -> None:
     gitignore_path.write_text(gitignore_content)
 
 
+def _create_common_info_directory(deepwork_dir: Path) -> None:
+    """
+    Create the .deepwork/common_info directory with a .gitkeep file.
+
+    This directory holds shared reference files that are available across
+    all jobs and workflow steps.
+
+    Args:
+        deepwork_dir: Path to .deepwork directory
+    """
+    common_info_dir = deepwork_dir / "common_info"
+    ensure_dir(common_info_dir)
+
+    gitkeep_file = common_info_dir / ".gitkeep"
+    if not gitkeep_file.exists():
+        gitkeep_file.write_text(
+            "# This file ensures the .deepwork/common_info directory exists in version control.\n"
+            "# Place shared reference files here that should be available across all jobs.\n"
+        )
+
+
 def _create_tmp_directory(deepwork_dir: Path) -> None:
     """
     Create the .deepwork/tmp directory with a .gitkeep file.
@@ -149,89 +189,6 @@ def _create_tmp_directory(deepwork_dir: Path) -> None:
         )
 
 
-def _create_rules_directory(project_path: Path) -> bool:
-    """
-    Create the v2 rules directory structure with example templates.
-
-    Creates .deepwork/rules/ with example rule files that users can customize.
-    Only creates the directory if it doesn't already exist.
-
-    Args:
-        project_path: Path to the project root
-
-    Returns:
-        True if the directory was created, False if it already existed
-    """
-    rules_dir = project_path / ".deepwork" / "rules"
-
-    if rules_dir.exists():
-        return False
-
-    # Create the rules directory
-    ensure_dir(rules_dir)
-
-    # Copy example rule templates from the deepwork_rules standard job
-    example_rules_dir = Path(__file__).parent.parent / "standard_jobs" / "deepwork_rules" / "rules"
-
-    if example_rules_dir.exists():
-        # Copy all .example files
-        for example_file in example_rules_dir.glob("*.md.example"):
-            dest_file = rules_dir / example_file.name
-            shutil.copy(example_file, dest_file)
-            # Fix permissions for copied rule template
-            fix_permissions(dest_file)
-
-    # Create a README file explaining the rules system
-    readme_content = """# DeepWork Rules
-
-Rules are automated guardrails that trigger when specific files change during
-AI agent sessions. They help ensure documentation stays current, security reviews
-happen, and team guidelines are followed.
-
-## Getting Started
-
-1. Copy an example file and rename it (remove the `.example` suffix):
-   ```
-   cp readme-documentation.md.example readme-documentation.md
-   ```
-
-2. Edit the file to match your project's patterns
-
-3. The rule will automatically trigger when matching files change
-
-## Rule Format
-
-Rules use YAML frontmatter in markdown files:
-
-```markdown
----
-name: Rule Name
-trigger: "pattern/**/*"
-safety: "optional/pattern"
----
-Instructions in markdown here.
-```
-
-## Detection Modes
-
-- **trigger/safety**: Fire when trigger matches, unless safety also matches
-- **set**: Bidirectional file correspondence (e.g., source + test)
-- **pair**: Directional correspondence (e.g., API code -> docs)
-
-## Documentation
-
-See `doc/rules_syntax.md` in the DeepWork repository for full syntax documentation.
-
-## Creating Rules Interactively
-
-Use `/deepwork_rules.define` to create new rules with guidance.
-"""
-    readme_path = rules_dir / "README.md"
-    readme_path.write_text(readme_content)
-
-    return True
-
-
 class DynamicChoice(click.Choice):
     """A Click Choice that gets its values dynamically from AgentAdapter."""
 
@@ -346,29 +303,32 @@ def _install_deepwork(platform_name: str | None, project_path: Path) -> None:
     deepwork_dir = project_path / ".deepwork"
     jobs_dir = deepwork_dir / "jobs"
     doc_specs_dir = deepwork_dir / "doc_specs"
+    schemas_dir = deepwork_dir / "schemas"
     ensure_dir(deepwork_dir)
     ensure_dir(jobs_dir)
     ensure_dir(doc_specs_dir)
+    ensure_dir(schemas_dir)
     console.print(f"  [green]✓[/green] Created {deepwork_dir.relative_to(project_path)}/")
 
-    # Step 3b: Inject standard jobs (core job definitions)
+    # Step 3b: Install schemas
+    console.print("[yellow]→[/yellow] Installing schemas...")
+    _install_schemas(schemas_dir, project_path)
+
+    # Step 3c: Inject standard jobs (core job definitions)
     console.print("[yellow]→[/yellow] Installing core job definitions...")
     _inject_deepwork_jobs(jobs_dir, project_path)
-    _inject_deepwork_rules(jobs_dir, project_path)
 
-    # Step 3c: Create .gitignore for temporary files
+    # Step 3d: Create .gitignore for temporary files
     _create_deepwork_gitignore(deepwork_dir)
     console.print("  [green]✓[/green] Created .deepwork/.gitignore")
 
-    # Step 3d: Create tmp directory with .gitkeep file for version control
+    # Step 3e: Create tmp directory with .gitkeep file for version control
     _create_tmp_directory(deepwork_dir)
     console.print("  [green]✓[/green] Created .deepwork/tmp/.gitkeep")
 
-    # Step 3e: Create rules directory with v2 templates
-    if _create_rules_directory(project_path):
-        console.print("  [green]✓[/green] Created .deepwork/rules/ with example templates")
-    else:
-        console.print("  [dim]•[/dim] .deepwork/rules/ already exists")
+    # Step 3f: Create common_info directory for shared reference files
+    _create_common_info_directory(deepwork_dir)
+    console.print("  [green]✓[/green] Created .deepwork/common_info/.gitkeep")
 
     # Step 4: Load or create config.yml
     console.print("[yellow]→[/yellow] Updating configuration...")
@@ -402,7 +362,17 @@ def _install_deepwork(platform_name: str | None, project_path: Path) -> None:
     save_yaml(config_file, config_data)
     console.print(f"  [green]✓[/green] Updated {config_file.relative_to(project_path)}")
 
-    # Step 5: Run sync to generate skills
+    # Step 5: Register MCP server for each platform
+    console.print("[yellow]→[/yellow] Registering MCP server...")
+    for adapter in detected_adapters:
+        if adapter.register_mcp_server(project_path):
+            console.print(f"  [green]✓[/green] Registered MCP server for {adapter.display_name}")
+        else:
+            console.print(
+                f"  [dim]•[/dim] MCP server already registered for {adapter.display_name}"
+            )
+
+    # Step 6: Run sync to generate skills
     console.print()
     console.print("[yellow]→[/yellow] Running sync to generate skills...")
     console.print()
@@ -410,18 +380,26 @@ def _install_deepwork(platform_name: str | None, project_path: Path) -> None:
     from deepwork.cli.sync import sync_skills
 
     try:
-        sync_skills(project_path)
+        sync_result = sync_skills(project_path)
     except Exception as e:
         raise InstallError(f"Failed to sync skills: {e}") from e
 
-    # Success message
+    # Success or warning message
     console.print()
     platform_names = ", ".join(a.display_name for a in detected_adapters)
-    console.print(
-        f"[bold green]✓ DeepWork installed successfully for {platform_names}![/bold green]"
-    )
-    console.print()
-    console.print("[bold]Next steps:[/bold]")
-    console.print("  1. Start your agent CLI (ex. [cyan]claude[/cyan] or [cyan]gemini[/cyan])")
-    console.print("  2. Define your first job using the command [cyan]/deepwork_jobs[/cyan]")
+
+    if sync_result.has_warnings:
+        console.print("[bold yellow]⚠ You should repair your DeepWork install[/bold yellow]")
+        console.print()
+        console.print("[bold]To fix issues:[/bold]")
+        console.print("  1. Start your agent CLI (ex. [cyan]claude[/cyan] or [cyan]gemini[/cyan])")
+        console.print("  2. Run [cyan]/deepwork repair[/cyan]")
+    else:
+        console.print(
+            f"[bold green]✓ DeepWork installed successfully for {platform_names}![/bold green]"
+        )
+        console.print()
+        console.print("[bold]Next steps:[/bold]")
+        console.print("  1. Start your agent CLI (ex. [cyan]claude[/cyan] or [cyan]gemini[/cyan])")
+        console.print("  2. Define your first job using the command [cyan]/deepwork_jobs[/cyan]")
     console.print()
diff --git a/src/deepwork/cli/main.py b/src/deepwork/cli/main.py
index b503ea9a..66756a08 100644
--- a/src/deepwork/cli/main.py
+++ b/src/deepwork/cli/main.py
@@ -16,13 +16,13 @@ def cli() -> None:
 # Import commands
 from deepwork.cli.hook import hook  # noqa: E402
 from deepwork.cli.install import install  # noqa: E402
-from deepwork.cli.rules import rules  # noqa: E402
+from deepwork.cli.serve import serve  # noqa: E402
 from deepwork.cli.sync import sync  # noqa: E402
 
 cli.add_command(install)
 cli.add_command(sync)
 cli.add_command(hook)
-cli.add_command(rules)
+cli.add_command(serve)
 
 
 if __name__ == "__main__":
diff --git a/src/deepwork/cli/rules.py b/src/deepwork/cli/rules.py
deleted file mode 100644
index 54bc132e..00000000
--- a/src/deepwork/cli/rules.py
+++ /dev/null
@@ -1,32 +0,0 @@
-"""Rules command for DeepWork CLI."""
-
-import click
-from rich.console import Console
-
-from deepwork.core.rules_queue import RulesQueue
-
-console = Console()
-
-
-@click.group()
-def rules() -> None:
-    """Manage DeepWork rules and queue."""
-    pass
-
-
-@rules.command(name="clear_queue")
-def clear_queue() -> None:
-    """
-    Clear all entries from the rules queue.
-
-    Removes all JSON files from .deepwork/tmp/rules/queue/.
-    This is useful for resetting the queue between tests or after
-    manual verification of rule states.
-    """
-    queue = RulesQueue()
-    count = queue.clear()
-
-    if count == 0:
-        console.print("[yellow]Queue is already empty[/yellow]")
-    else:
-        console.print(f"[green]Cleared {count} queue entry/entries[/green]")
diff --git a/src/deepwork/cli/serve.py b/src/deepwork/cli/serve.py
new file mode 100644
index 00000000..5625056a
--- /dev/null
+++ b/src/deepwork/cli/serve.py
@@ -0,0 +1,133 @@
+"""Serve command for DeepWork MCP server."""
+
+from pathlib import Path
+
+import click
+from rich.console import Console
+
+from deepwork.utils.yaml_utils import load_yaml
+
+console = Console()
+
+
+class ServeError(Exception):
+    """Exception raised for serve errors."""
+
+    pass
+
+
+def _load_config(project_path: Path) -> dict:
+    """Load DeepWork config from project.
+
+    Args:
+        project_path: Path to project root
+
+    Returns:
+        Config dictionary
+
+    Raises:
+        ServeError: If config not found or invalid
+    """
+    config_file = project_path / ".deepwork" / "config.yml"
+    if not config_file.exists():
+        raise ServeError(f"DeepWork not installed in {project_path}. Run 'deepwork install' first.")
+
+    config = load_yaml(config_file)
+    if config is None:
+        config = {}
+
+    return config
+
+
+@click.command()
+@click.option(
+    "--path",
+    type=click.Path(exists=True, file_okay=False, path_type=Path),
+    default=".",
+    help="Path to project directory (default: current directory)",
+)
+@click.option(
+    "--no-quality-gate",
+    is_flag=True,
+    default=False,
+    help="Disable quality gate evaluation",
+)
+@click.option(
+    "--transport",
+    type=click.Choice(["stdio", "sse"]),
+    default="stdio",
+    help="MCP transport protocol (default: stdio)",
+)
+@click.option(
+    "--port",
+    type=int,
+    default=8000,
+    help="Port for SSE transport (default: 8000)",
+)
+def serve(
+    path: Path,
+    no_quality_gate: bool,
+    transport: str,
+    port: int,
+) -> None:
+    """Start the DeepWork MCP server.
+
+    Exposes workflow management tools to AI agents via MCP protocol.
+    By default uses stdio transport for local integration with Claude Code.
+
+    Quality gate is enabled by default and uses Claude Code to evaluate
+    step outputs against quality criteria.
+
+    Examples:
+
+        # Start server for current directory
+        deepwork serve
+
+        # Start with quality gate disabled
+        deepwork serve --no-quality-gate
+
+        # Start for a specific project
+        deepwork serve --path /path/to/project
+    """
+    try:
+        _serve_mcp(path, not no_quality_gate, transport, port)
+    except ServeError as e:
+        console.print(f"[red]Error:[/red] {e}")
+        raise click.Abort() from e
+    except Exception as e:
+        console.print(f"[red]Unexpected error:[/red] {e}")
+        raise
+
+
+def _serve_mcp(
+    project_path: Path,
+    enable_quality_gate: bool,
+    transport: str,
+    port: int,
+) -> None:
+    """Start the MCP server.
+
+    Args:
+        project_path: Path to project directory
+        enable_quality_gate: Whether to enable quality gate evaluation
+        transport: Transport protocol (stdio or sse)
+        port: Port for SSE transport
+
+    Raises:
+        ServeError: If server fails to start
+    """
+    # Validate project has DeepWork installed
+    _load_config(project_path)
+
+    # Create and run server
+    from deepwork.mcp.server import create_server
+
+    server = create_server(
+        project_root=project_path,
+        enable_quality_gate=enable_quality_gate,
+    )
+
+    if transport == "stdio":
+        server.run(transport="stdio")
+    else:
+        server.run(transport="sse", port=port)
diff --git a/src/deepwork/cli/sync.py b/src/deepwork/cli/sync.py
index 03c47a30..80441f6d 100644
--- a/src/deepwork/cli/sync.py
+++ b/src/deepwork/cli/sync.py
@@ -1,5 +1,6 @@
 """Sync command for DeepWork CLI."""
 
+from dataclasses import dataclass, field
 from pathlib import Path
 
 import click
@@ -22,6 +23,21 @@ class SyncError(Exception):
     pass
 
 
+@dataclass
+class SyncResult:
+    """Result of a sync operation."""
+
+    platforms_synced: int = 0
+    skills_generated: int = 0
+    hooks_synced: int = 0
+    warnings: list[str] = field(default_factory=list)
+
+    @property
+    def has_warnings(self) -> bool:
+        """Return True if there were any warnings during sync."""
+        return len(self.warnings) > 0
+
+
 @click.command()
 @click.option(
     "--path",
@@ -46,13 +62,16 @@ def sync(path: Path) -> None:
         raise
 
 
-def sync_skills(project_path: Path) -> None:
+def sync_skills(project_path: Path) -> SyncResult:
     """
     Sync skills to all configured platforms.
 
     Args:
         project_path: Path to project directory
 
+    Returns:
+        SyncResult with statistics and any warnings
+
     Raises:
         SyncError: If sync fails
     """
@@ -80,6 +99,43 @@ def sync_skills(project_path: Path) -> None:
 
     console.print("[bold cyan]Syncing DeepWork Skills[/bold cyan]\n")
 
+    # Generate /deepwork skill FIRST for all platforms (before parsing jobs)
+    # This ensures the skill is available even if some jobs fail to parse
+    generator = SkillGenerator()
+    result = SyncResult()
+    platform_adapters: list[AgentAdapter] = []
+    all_skill_paths_by_platform: dict[str, list[Path]] = {}
+
+    console.print("[yellow]→[/yellow] Generating /deepwork skill...")
+    for platform_name in platforms:
+        try:
+            adapter_cls = AgentAdapter.get(platform_name)
+        except Exception:
+            warning = f"Unknown platform '{platform_name}', skipping"
+            console.print(f"  [yellow]⚠[/yellow] {warning}")
+            result.warnings.append(warning)
+            continue
+
+        adapter = adapter_cls(project_path)
+        platform_adapters.append(adapter)
+
+        platform_dir = project_path / adapter.config_dir
+        skills_dir = platform_dir / adapter.skills_dir
+        ensure_dir(skills_dir)
+
+        all_skill_paths: list[Path] = []
+        try:
+            deepwork_skill_path = generator.generate_deepwork_skill(adapter, platform_dir)
+            all_skill_paths.append(deepwork_skill_path)
+            result.skills_generated += 1
+            console.print(f"  [green]✓[/green] {adapter.display_name}: deepwork (MCP entry point)")
+        except Exception as e:
+            warning = f"{adapter.display_name}: Failed to generate /deepwork skill: {e}"
+            console.print(f"  [red]✗[/red] {warning}")
+            result.warnings.append(warning)
+
+        all_skill_paths_by_platform[platform_name] = all_skill_paths
+
     # Discover jobs
     jobs_dir = deepwork_dir / "jobs"
     if not jobs_dir.exists():
@@ -87,7 +143,7 @@ def sync_skills(project_path: Path) -> None:
     else:
         job_dirs = [d for d in jobs_dir.iterdir() if d.is_dir() and (d / "job.yml").exists()]
 
-    console.print(f"[yellow]→[/yellow] Found {len(job_dirs)} job(s) to sync")
+    console.print(f"\n[yellow]→[/yellow] Found {len(job_dirs)} job(s) to sync")
 
     # Parse all jobs
     jobs = []
@@ -98,67 +154,48 @@ def sync_skills(project_path: Path) -> None:
             jobs.append(job_def)
             console.print(f"  [green]✓[/green] Loaded {job_def.name} v{job_def.version}")
         except Exception as e:
-            console.print(f"  [red]✗[/red] Failed to load {job_dir.name}: {e}")
+            warning = f"Failed to load {job_dir.name}: {e}"
+            console.print(f"  [red]✗[/red] {warning}")
             failed_jobs.append((job_dir.name, str(e)))
+            result.warnings.append(warning)
 
-    # Fail early if any jobs failed to parse
+    # Warn about failed jobs but continue (skill already installed)
     if failed_jobs:
         console.print()
-        console.print("[bold red]Sync aborted due to job parsing errors:[/bold red]")
+        console.print("[bold yellow]Warning: Some jobs failed to parse:[/bold yellow]")
         for job_name, error in failed_jobs:
             console.print(f"  • {job_name}: {error}")
-        raise SyncError(f"Failed to parse {len(failed_jobs)} job(s)")
+        console.print(
+            "[dim]The /deepwork skill is installed. Fix the job errors and run 'deepwork sync' again.[/dim]"
+        )
 
-    # Collect hooks from all jobs
+    # Collect hooks from jobs (hooks collection is independent of job.yml parsing)
     job_hooks_list = collect_job_hooks(jobs_dir)
     if job_hooks_list:
-        console.print(f"[yellow]→[/yellow] Found {len(job_hooks_list)} job(s) with hooks")
-
-    # Sync each platform
-    generator = SkillGenerator()
-    stats = {"platforms": 0, "skills": 0, "hooks": 0}
-
-    for platform_name in platforms:
-        try:
-            adapter_cls = AgentAdapter.get(platform_name)
-        except Exception:
-            console.print(f"[yellow]⚠[/yellow] Unknown platform '{platform_name}', skipping")
-            continue
+        console.print(f"\n[yellow]→[/yellow] Found {len(job_hooks_list)} job(s) with hooks")
 
-        adapter = adapter_cls(project_path)
-        console.print(f"\n[yellow]→[/yellow] Syncing to {adapter.display_name}...")
-
-        platform_dir = project_path / adapter.config_dir
-        skills_dir = platform_dir / adapter.skills_dir
-
-        # Create skills directory
-        ensure_dir(skills_dir)
+    # Sync hooks and permissions for each platform
+    for adapter in platform_adapters:
+        console.print(
+            f"\n[yellow]→[/yellow] Syncing hooks and permissions to {adapter.display_name}..."
+        )
 
-        # Generate skills for all jobs
-        all_skill_paths: list[Path] = []
-        if jobs:
-            console.print("  [dim]•[/dim] Generating skills...")
-            for job in jobs:
-                try:
-                    job_paths = generator.generate_all_skills(
-                        job, adapter, platform_dir, project_root=project_path
-                    )
-                    all_skill_paths.extend(job_paths)
-                    stats["skills"] += len(job_paths)
-                    console.print(f"    [green]✓[/green] {job.name} ({len(job_paths)} skills)")
-                except Exception as e:
-                    console.print(f"    [red]✗[/red] Failed for {job.name}: {e}")
+        # NOTE: Job skills (meta-skills and step skills) are no longer generated.
+        # The MCP server now handles workflow orchestration directly.
+        # Only the /deepwork skill is installed as the entry point.
 
         # Sync hooks to platform settings
         if job_hooks_list:
             console.print("  [dim]•[/dim] Syncing hooks...")
             try:
                 hooks_count = sync_hooks_to_platform(project_path, adapter, job_hooks_list)
-                stats["hooks"] += hooks_count
+                result.hooks_synced += hooks_count
                 if hooks_count > 0:
                     console.print(f"    [green]✓[/green] Synced {hooks_count} hook(s)")
             except Exception as e:
-                console.print(f"    [red]✗[/red] Failed to sync hooks: {e}")
+                warning = f"Failed to sync hooks: {e}"
+                console.print(f"    [red]✗[/red] {warning}")
+                result.warnings.append(warning)
 
         # Sync required permissions to platform settings
         console.print("  [dim]•[/dim] Syncing permissions...")
@@ -169,9 +206,12 @@ def sync_skills(project_path: Path) -> None:
             else:
                 console.print("    [dim]•[/dim] Base permissions already configured")
         except Exception as e:
-            console.print(f"    [red]✗[/red] Failed to sync permissions: {e}")
+            warning = f"Failed to sync permissions: {e}"
+            console.print(f"    [red]✗[/red] {warning}")
+            result.warnings.append(warning)
 
         # Add skill permissions for generated skills (if adapter supports it)
+        all_skill_paths = all_skill_paths_by_platform.get(adapter.name, [])
         if all_skill_paths and hasattr(adapter, "add_skill_permissions"):
             try:
                 skill_perms_count = adapter.add_skill_permissions(project_path, all_skill_paths)
@@ -180,9 +220,11 @@ def sync_skills(project_path: Path) -> None:
                         f"    [green]✓[/green] Added {skill_perms_count} skill permission(s)"
                     )
             except Exception as e:
-                console.print(f"    [red]✗[/red] Failed to sync skill permissions: {e}")
+                warning = f"Failed to sync skill permissions: {e}"
+                console.print(f"    [red]✗[/red] {warning}")
+                result.warnings.append(warning)
 
-        stats["platforms"] += 1
+        result.platforms_synced += 1
 
     # Summary
     console.print()
@@ -193,10 +235,12 @@ def sync_skills(project_path: Path) -> None:
     table.add_column("Metric", style="cyan")
     table.add_column("Count", style="green")
 
-    table.add_row("Platforms synced", str(stats["platforms"]))
-    table.add_row("Total skills", str(stats["skills"]))
-    if stats["hooks"] > 0:
-        table.add_row("Hooks synced", str(stats["hooks"]))
+    table.add_row("Platforms synced", str(result.platforms_synced))
+    table.add_row("Total skills", str(result.skills_generated))
+    if result.hooks_synced > 0:
+        table.add_row("Hooks synced", str(result.hooks_synced))
 
     console.print(table)
     console.print()
+
+    return result
diff --git a/src/deepwork/core/adapters.py b/src/deepwork/core/adapters.py
index 96b8ca00..9225455f 100644
--- a/src/deepwork/core/adapters.py
+++ b/src/deepwork/core/adapters.py
@@ -55,8 +55,6 @@ class AgentAdapter(ABC):
     display_name: ClassVar[str]
     config_dir: ClassVar[str]
     skills_dir: ClassVar[str] = "skills"
-    skill_template: ClassVar[str] = "skill-job-step.md.jinja"
-    meta_skill_template: ClassVar[str] = "skill-job-meta.md.jinja"
 
     # Mapping from generic SkillLifecycleHook to platform-specific event names.
     # Subclasses should override this to provide platform-specific mappings.
@@ -148,38 +146,6 @@ def get_skills_dir(self, project_root: Path | None = None) -> Path:
             raise AdapterError("No project root specified")
         return root / self.config_dir / self.skills_dir
 
-    def get_meta_skill_filename(self, job_name: str) -> str:
-        """
-        Get the filename for a job's meta-skill.
-
-        The meta-skill is the primary user interface for a job.
-        Can be overridden for different file formats.
-
-        Args:
-            job_name: Name of the job
-
-        Returns:
-            Meta-skill filename (e.g., "job_name/SKILL.md" for Claude)
-        """
-        return f"{job_name}/SKILL.md"
-
-    def get_step_skill_filename(self, job_name: str, step_id: str, exposed: bool = False) -> str:
-        """
-        Get the filename for a step skill.
-
-        All step skills use the same filename format. The exposed parameter
-        is used for template context (user-invocable frontmatter setting).
-
-        Args:
-            job_name: Name of the job
-            step_id: ID of the step
-            exposed: If True, skill is user-invocable (for template context). Default: False.
-
-        Returns:
-            Skill filename (e.g., "job_name.step_id/SKILL.md" for Claude)
-        """
-        return f"{job_name}.{step_id}/SKILL.md"
-
     def detect(self, project_root: Path | None = None) -> bool:
         """
         Check if this platform is available in the project.
@@ -256,6 +222,22 @@ def sync_permissions(self, project_path: Path) -> int:
         # Default implementation does nothing - subclasses can override
         return 0
 
+    def register_mcp_server(self, project_path: Path) -> bool:
+        """
+        Register the DeepWork MCP server with the platform.
+
+        Args:
+            project_path: Path to project root
+
+        Returns:
+            True if server was registered, False if already registered
+
+        Raises:
+            AdapterError: If registration fails
+        """
+        # Default implementation does nothing - subclasses can override
+        return False
+
 
 def _hook_already_present(hooks: list[dict[str, Any]], script_path: str) -> bool:
     """Check if a hook with the given script path is already in the list."""
@@ -546,6 +528,65 @@ def _extract_skill_name(self, skill_path: Path) -> str | None:
 
         return None
 
+    def register_mcp_server(self, project_path: Path) -> bool:
+        """
+        Register the DeepWork MCP server in .mcp.json at project root.
+
+        Claude Code reads MCP server configurations from .mcp.json (project scope),
+        not from settings.json. This method assumes the `deepwork` command is
+        available in the user's PATH.
+
+        Args:
+            project_path: Path to project root
+
+        Returns:
+            True if server was registered or updated, False if no changes needed
+
+        Raises:
+            AdapterError: If registration fails
+        """
+        mcp_file = project_path / ".mcp.json"
+
+        # Load existing .mcp.json or create new
+        existing_config: dict[str, Any] = {}
+        if mcp_file.exists():
+            try:
+                with open(mcp_file, encoding="utf-8") as f:
+                    existing_config = json.load(f)
+            except (json.JSONDecodeError, OSError) as e:
+                raise AdapterError(f"Failed to read .mcp.json: {e}") from e
+
+        # Initialize mcpServers if not present
+        if "mcpServers" not in existing_config:
+            existing_config["mcpServers"] = {}
+
+        # Build the new MCP server config
+        # Assume deepwork is available in PATH
+        new_server_config = {
+            "command": "deepwork",
+            "args": ["serve", "--path", "."],
+        }
+
+        # Check if already registered with same config
+        existing_server = existing_config["mcpServers"].get("deepwork", {})
+        if (
+            existing_server.get("command") == new_server_config["command"]
+            and existing_server.get("args") == new_server_config["args"]
+        ):
+            return False
+
+        # Register or update the DeepWork MCP server
+        existing_config["mcpServers"]["deepwork"] = new_server_config
+
+        # Write .mcp.json
+        try:
+            with open(mcp_file, "w", encoding="utf-8") as f:
+                json.dump(existing_config, f, indent=2)
+        except OSError as e:
+            raise AdapterError(f"Failed to write .mcp.json: {e}") from e
+
+        return True
+
 
 class GeminiAdapter(AgentAdapter):
     """Adapter for Gemini CLI.
@@ -563,47 +604,11 @@ class GeminiAdapter(AgentAdapter):
     name = "gemini"
     display_name = "Gemini CLI"
     config_dir = ".gemini"
-    skill_template = "skill-job-step.toml.jinja"
-    meta_skill_template = "skill-job-meta.toml.jinja"
 
     # Gemini CLI does NOT support skill-level hooks
     # Hooks are global/project-level in settings.json, not per-skill
     hook_name_mapping: ClassVar[dict[SkillLifecycleHook, str]] = {}
 
-    def get_meta_skill_filename(self, job_name: str) -> str:
-        """
-        Get the filename for a Gemini job's meta-skill.
-
-        Gemini uses TOML files and colon namespacing via subdirectories.
-        For job "my_job", creates: my_job/index.toml
-
-        Args:
-            job_name: Name of the job
-
-        Returns:
-            Meta-skill filename path (e.g., "my_job/index.toml")
-        """
-        return f"{job_name}/index.toml"
-
-    def get_step_skill_filename(self, job_name: str, step_id: str, exposed: bool = False) -> str:
-        """
-        Get the filename for a Gemini step skill.
-
-        Gemini uses TOML files and colon namespacing via subdirectories.
-        All step skills use the same filename format. The exposed parameter
-        is used for template context (user-invocable setting).
-        For job "my_job" and step "step_one", creates: my_job/step_one.toml
-
-        Args:
-            job_name: Name of the job
-            step_id: ID of the step
-            exposed: If True, skill is user-invocable (for template context). Default: False.
-
-        Returns:
-            Skill filename path (e.g., "my_job/step_one.toml")
-        """
-        return f"{job_name}/{step_id}.toml"
-
     def sync_hooks(self, project_path: Path, hooks: dict[str, list[dict[str, Any]]]) -> int:
         """
         Sync hooks to Gemini CLI settings.
diff --git a/src/deepwork/core/command_executor.py b/src/deepwork/core/command_executor.py
deleted file mode 100644
index 74288a13..00000000
--- a/src/deepwork/core/command_executor.py
+++ /dev/null
@@ -1,190 +0,0 @@
-"""Execute command actions for rules."""
-
-import shlex
-import subprocess
-from dataclasses import dataclass
-from pathlib import Path
-
-from deepwork.core.rules_parser import CommandAction
-
-
-@dataclass
-class CommandResult:
-    """Result of executing a command."""
-
-    success: bool
-    exit_code: int
-    stdout: str
-    stderr: str
-    command: str  # The actual command that was run
-
-
-def substitute_command_variables(
-    command_template: str,
-    file: str | None = None,
-    files: list[str] | None = None,
-    repo_root: Path | None = None,
-) -> str:
-    """
-    Substitute template variables in a command string.
-
-    Variables:
-    - {file} - Single file path
-    - {files} - Space-separated file paths
-    - {repo_root} - Repository root directory
-
-    Args:
-        command_template: Command string with {var} placeholders
-        file: Single file path (for run_for: each_match)
-        files: List of file paths (for run_for: all_matches)
-        repo_root: Repository root path
-
-    Returns:
-        Command string with variables substituted
-    """
-    result = command_template
-
-    if file is not None:
-        # Quote file path to prevent command injection
-        result = result.replace("{file}", shlex.quote(file))
-
-    if files is not None:
-        # Quote each file path individually
-        quoted_files = " ".join(shlex.quote(f) for f in files)
-        result = result.replace("{files}", quoted_files)
-
-    if repo_root is not None:
-        result = result.replace("{repo_root}", shlex.quote(str(repo_root)))
-
-    return result
-
-
-def execute_command(
-    command: str,
-    cwd: Path | None = None,
-    timeout: int = 60,
-) -> CommandResult:
-    """
-    Execute a command and capture output.
-
-    Args:
-        command: Command string to execute
-        cwd: Working directory (defaults to current directory)
-        timeout: Timeout in seconds
-
-    Returns:
-        CommandResult with execution details
-    """
-    try:
-        # Run command as shell to support pipes, etc.
-        result = subprocess.run(
-            command,
-            shell=True,
-            cwd=cwd,
-            capture_output=True,
-            text=True,
-            timeout=timeout,
-        )
-
-        return CommandResult(
-            success=result.returncode == 0,
-            exit_code=result.returncode,
-            stdout=result.stdout,
-            stderr=result.stderr,
-            command=command,
-        )
-
-    except subprocess.TimeoutExpired:
-        return CommandResult(
-            success=False,
-            exit_code=-1,
-            stdout="",
-            stderr=f"Command timed out after {timeout} seconds",
-            command=command,
-        )
-    except Exception as e:
-        return CommandResult(
-            success=False,
-            exit_code=-1,
-            stdout="",
-            stderr=str(e),
-            command=command,
-        )
-
-
-def run_command_action(
-    action: CommandAction,
-    trigger_files: list[str],
-    repo_root: Path | None = None,
-) -> list[CommandResult]:
-    """
-    Run a command action for the given trigger files.
-
-    Args:
-        action: CommandAction configuration
-        trigger_files: Files that triggered the rule
-        repo_root: Repository root path
-
-    Returns:
-        List of CommandResult (one per command execution)
-    """
-    results: list[CommandResult] = []
-
-    if action.run_for == "each_match":
-        # Run command for each file individually
-        for file_path in trigger_files:
-            command = substitute_command_variables(
-                action.command,
-                file=file_path,
-                repo_root=repo_root,
-            )
-            result = execute_command(command, cwd=repo_root)
-            results.append(result)
-
-    elif action.run_for == "all_matches":
-        # Run command once with all files
-        command = substitute_command_variables(
-            action.command,
-            files=trigger_files,
-            repo_root=repo_root,
-        )
-        result = execute_command(command, cwd=repo_root)
-        results.append(result)
-
-    return results
-
-
-def all_commands_succeeded(results: list[CommandResult]) -> bool:
-    """Check if all command executions succeeded."""
-    return all(r.success for r in results)
-
-
-def format_command_errors(
-    results: list[CommandResult],
-    rule_name: str | None = None,
-) -> str:
-    """Format detailed error messages from failed commands.
-
-    Args:
-        results: List of command execution results
-        rule_name: Optional rule name to include in error message
-
-    Returns:
-        Formatted error message with command, exit code, stdout, and stderr
-    """
-    errors: list[str] = []
-    for result in results:
-        if not result.success:
-            parts: list[str] = []
-            if rule_name:
-                parts.append(f"Rule: {rule_name}")
-            parts.append(f"Command: {result.command}")
-            parts.append(f"Exit code: {result.exit_code}")
-            if result.stdout and result.stdout.strip():
-                parts.append(f"Stdout:\n{result.stdout.strip()}")
-            if result.stderr and result.stderr.strip():
-                parts.append(f"Stderr:\n{result.stderr.strip()}")
-            if not result.stdout.strip() and not result.stderr.strip():
-                parts.append("(no output)")
-            errors.append("\n".join(parts))
-    return "\n\n".join(errors)
diff --git a/src/deepwork/core/generator.py b/src/deepwork/core/generator.py
index 05ba975c..58502c1a 100644
--- a/src/deepwork/core/generator.py
+++ b/src/deepwork/core/generator.py
@@ -1,19 +1,11 @@
 """Skill file generator using Jinja2 templates."""
 
 from pathlib import Path
-from typing import Any
 
 from jinja2 import Environment, FileSystemLoader, TemplateNotFound
 
-from deepwork.core.adapters import AgentAdapter, SkillLifecycleHook
-from deepwork.core.doc_spec_parser import (
-    DocSpec,
-    DocSpecParseError,
-    parse_doc_spec_file,
-)
-from deepwork.core.parser import JobDefinition, Step
-from deepwork.schemas.job_schema import LIFECYCLE_HOOK_EVENTS
-from deepwork.utils.fs import safe_read, safe_write
+from deepwork.core.adapters import AgentAdapter
+from deepwork.utils.fs import safe_write
 
 
 class GeneratorError(Exception):
@@ -42,35 +34,6 @@ def __init__(self, templates_dir: Path | str | None = None):
         if not self.templates_dir.exists():
             raise GeneratorError(f"Templates directory not found: {self.templates_dir}")
 
-        # Cache for loaded doc specs (keyed by absolute file path)
-        self._doc_spec_cache: dict[Path, DocSpec] = {}
-
-    def _load_doc_spec(self, project_root: Path, doc_spec_path: str) -> DocSpec | None:
-        """
-        Load a doc spec by file path with caching.
-
-        Args:
-            project_root: Path to project root
-            doc_spec_path: Relative path to doc spec file (e.g., ".deepwork/doc_specs/report.md")
-
-        Returns:
-            DocSpec if file exists and parses, None otherwise
-        """
-        full_path = project_root / doc_spec_path
-        if full_path in self._doc_spec_cache:
-            return self._doc_spec_cache[full_path]
-
-        if not full_path.exists():
-            return None
-
-        try:
-            doc_spec = parse_doc_spec_file(full_path)
-        except DocSpecParseError:
-            return None
-
-        self._doc_spec_cache[full_path] = doc_spec
-        return doc_spec
-
     def _get_jinja_env(self, adapter: AgentAdapter) -> Environment:
         """
         Get Jinja2 environment for an adapter.
@@ -93,403 +56,20 @@ def _get_jinja_env(self, adapter: AgentAdapter) -> Environment:
             lstrip_blocks=True,
         )
 
-    def _is_standalone_step(self, job: JobDefinition, step: Step) -> bool:
-        """
-        Check if a step is standalone (not part of any workflow).
-
-        A step is standalone if:
-        - It's not listed in any workflow definition
-        - OR (for backward compatibility) no workflows are defined and the step
-          has no dependencies and no other steps depend on it
-
-        Args:
-            job: Job definition
-            step: Step to check
-
-        Returns:
-            True if step is standalone
-        """
-        # If workflows are defined, use workflow membership
-        if job.workflows:
-            return job.get_workflow_for_step(step.id) is None
-        else:
-            # Backward compatibility: if no workflows defined, use dependency analysis
-            # Step has dependencies - not standalone
-            if step.dependencies:
-                return False
-
-            # Check if any other step depends on this step
-            for other_step in job.steps:
-                if step.id in other_step.dependencies:
-                    return False
-
-        return True
-
-    def _get_workflow_context(self, job: JobDefinition, step: Step) -> dict[str, Any]:
-        """
-        Build workflow context for a step.
-
-        Args:
-            job: Job definition
-            step: Step to build context for
-
-        Returns:
-            Workflow context dictionary with workflow info, or empty dict if standalone
-        """
-        workflow = job.get_workflow_for_step(step.id)
-        if not workflow:
-            return {}
-
-        position = job.get_step_position_in_workflow(step.id)
-        return {
-            "workflow_name": workflow.name,
-            "workflow_summary": workflow.summary,
-            "workflow_step_number": position[0] if position else 1,
-            "workflow_total_steps": position[1] if position else 1,
-            "workflow_next_step": job.get_next_step_in_workflow(step.id),
-            "workflow_prev_step": job.get_prev_step_in_workflow(step.id),
-        }
-
-    def _build_hook_context(self, job: JobDefinition, hook_action: Any) -> dict[str, Any]:
-        """
-        Build context for a single hook action.
-
-        Args:
-            job: Job definition
-            hook_action: HookAction instance
-
-        Returns:
-            Hook context dictionary
-        """
-        hook_ctx: dict[str, Any] = {}
-        if hook_action.is_prompt():
-            hook_ctx["type"] = "prompt"
-            hook_ctx["content"] = hook_action.prompt
-        elif hook_action.is_prompt_file():
-            hook_ctx["type"] = "prompt_file"
-            hook_ctx["path"] = hook_action.prompt_file
-            # Read the prompt file content
-            prompt_file_path = job.job_dir / hook_action.prompt_file
-            prompt_content = safe_read(prompt_file_path)
-            if prompt_content is None:
-                raise GeneratorError(f"Hook prompt file not found: {prompt_file_path}")
-            hook_ctx["content"] = prompt_content
-        elif hook_action.is_script():
-            hook_ctx["type"] = "script"
-            hook_ctx["path"] = hook_action.script
-        return hook_ctx
-
-    def _build_step_context(
+    def generate_deepwork_skill(
         self,
-        job: JobDefinition,
-        step: Step,
-        step_index: int,
-        adapter: AgentAdapter,
-        project_root: Path | None = None,
-    ) -> dict[str, Any]:
-        """
-        Build template context for a step.
-
-        Args:
-            job: Job definition
-            step: Step to generate context for
-            step_index: Index of step in job (0-based)
-            adapter: Agent adapter for platform-specific hook name mapping
-            project_root: Optional project root for loading doc specs
-
-        Returns:
-            Template context dictionary
-        """
-        # Read step instructions
-        instructions_file = job.job_dir / step.instructions_file
-        instructions_content = safe_read(instructions_file)
-        if instructions_content is None:
-            raise GeneratorError(f"Step instructions file not found: {instructions_file}")
-
-        # Separate user inputs and file inputs
-        user_inputs = [
-            {"name": inp.name, "description": inp.description}
-            for inp in step.inputs
-            if inp.is_user_input()
-        ]
-        file_inputs = [
-            {"file": inp.file, "from_step": inp.from_step}
-            for inp in step.inputs
-            if inp.is_file_input()
-        ]
-
-        # Check if this is a standalone step
-        is_standalone = self._is_standalone_step(job, step)
-
-        # Get workflow context (empty dict if standalone)
-        workflow_ctx = self._get_workflow_context(job, step)
-
-        # Determine next and previous steps based on workflow (if defined) or order
-        next_step = None
-        prev_step = None
-        if not is_standalone:
-            if workflow_ctx:
-                # Use workflow-defined order
-                next_step = workflow_ctx.get("workflow_next_step")
-                prev_step = workflow_ctx.get("workflow_prev_step")
-            else:
-                # Backward compatibility: use step array order
-                if step_index < len(job.steps) - 1:
-                    next_step = job.steps[step_index + 1].id
-                if step_index > 0:
-                    prev_step = job.steps[step_index - 1].id
-
-        # Build hooks context for all lifecycle events
-        # Structure: {platform_event_name: [hook_contexts]}
-        hooks: dict[str, list[dict[str, Any]]] = {}
-        for event in LIFECYCLE_HOOK_EVENTS:
-            if event in step.hooks:
-                # Get platform-specific event name from adapter
-                hook_enum = SkillLifecycleHook(event)
-                platform_event_name = adapter.get_platform_hook_name(hook_enum)
-                if platform_event_name:
-                    hook_contexts = [
-                        self._build_hook_context(job, hook_action)
-                        for hook_action in step.hooks[event]
-                    ]
-                    if hook_contexts:
-                        hooks[platform_event_name] = hook_contexts
-
-        # Claude Code has separate Stop and SubagentStop events. When a Stop hook
-        # is defined, also register it for SubagentStop so it triggers for both
-        # the main agent and subagents.
-        if "Stop" in hooks:
-            hooks["SubagentStop"] = hooks["Stop"]
-
-        # Backward compatibility: stop_hooks is after_agent hooks
-        stop_hooks = hooks.get(
-            adapter.get_platform_hook_name(SkillLifecycleHook.AFTER_AGENT) or "Stop", []
-        )
-
-        # Build rich outputs context with doc spec information
-        outputs_context = []
-        for output in step.outputs:
-            output_ctx: dict[str, Any] = {
-                "file": output.file,
-                "has_doc_spec": output.has_doc_spec(),
-            }
-            if output.has_doc_spec() and output.doc_spec and project_root:
-                doc_spec = self._load_doc_spec(project_root, output.doc_spec)
-                if doc_spec:
-                    output_ctx["doc_spec"] = {
-                        "path": output.doc_spec,
-                        "name": doc_spec.name,
-                        "description": doc_spec.description,
-                        "target_audience": doc_spec.target_audience,
-                        "quality_criteria": [
-                            {"name": c.name, "description": c.description}
-                            for c in doc_spec.quality_criteria
-                        ],
-                        "example_document": doc_spec.example_document,
-                    }
-            outputs_context.append(output_ctx)
-
-        context = {
-            "job_name": job.name,
-            "job_version": job.version,
-            "job_summary": job.summary,
-            "job_description": job.description,
-            "step_id": step.id,
-            "step_name": step.name,
-            "step_description": step.description,
-            "step_number": step_index + 1,  # 1-based for display
-            "total_steps": len(job.steps),
-            "instructions_file": step.instructions_file,
-            "instructions_content": instructions_content,
-            "user_inputs": user_inputs,
-            "file_inputs": file_inputs,
-            "outputs": outputs_context,
-            "dependencies": step.dependencies,
-            "next_step": next_step,
-            "prev_step": prev_step,
-            "is_standalone": is_standalone,
-            "hooks": hooks,  # New: all hooks by platform event name
-            "stop_hooks": stop_hooks,  # Backward compat: after_agent hooks only
-            "quality_criteria": step.quality_criteria,  # Declarative criteria with framing
-            "agent": step.agent,  # Agent type (e.g., "general-purpose") - triggers context: fork
-        }
-
-        # Add workflow context if step is part of a workflow
-        context.update(workflow_ctx)
-
-        return context
-
-    def _build_meta_skill_context(
-        self, job: JobDefinition, adapter: AgentAdapter
-    ) -> dict[str, Any]:
-        """
-        Build template context for a job's meta-skill.
-
-        Args:
-            job: Job definition
-            adapter: Agent adapter for platform-specific configuration
-
-        Returns:
-            Template context dictionary
-        """
-        # Build step info for the meta-skill
-        steps_info = []
-        for step in job.steps:
-            skill_filename = adapter.get_step_skill_filename(job.name, step.id, step.exposed)
-            # Extract just the skill name (without path and extension)
-            # For Claude: job_name.step_id/SKILL.md -> job_name.step_id
-            # For Gemini: job_name/step_id.toml -> job_name:step_id
-            if adapter.name == "gemini":
-                # Gemini uses colon for namespacing: job_name:step_id
-                parts = skill_filename.replace(".toml", "").split("/")
-                skill_name = ":".join(parts)
-            else:
-                # Claude uses directory/SKILL.md format, extract directory name
-                # job_name.step_id/SKILL.md -> job_name.step_id
-                skill_name = skill_filename.replace("/SKILL.md", "")
-
-            # Get workflow info for step
-            workflow = job.get_workflow_for_step(step.id)
-            step_info = {
-                "id": step.id,
-                "name": step.name,
-                "description": step.description,
-                "command_name": skill_name,
-                "dependencies": step.dependencies,
-                "exposed": step.exposed,
-                "is_standalone": self._is_standalone_step(job, step),
-            }
-            if workflow:
-                step_info["workflow_name"] = workflow.name
-
-            steps_info.append(step_info)
-
-        # Build workflow info with concurrent step support
-        workflows_info = []
-        for workflow in job.workflows:
-            # Build step entries with concurrency info
-            step_entries_info = []
-            for entry in workflow.step_entries:
-                entry_info: dict[str, Any] = {
-                    "is_concurrent": entry.is_concurrent,
-                    "step_ids": entry.step_ids,
-                }
-                if entry.is_concurrent:
-                    # Add detailed step info for each concurrent step
-                    concurrent_steps = []
-                    for i, step_id in enumerate(entry.step_ids):
-                        step = job.get_step(step_id)
-                        concurrent_steps.append(
-                            {
-                                "id": step_id,
-                                "name": step.name if step else step_id,
-                                "description": step.description if step else "",
-                                "task_number": i + 1,
-                            }
-                        )
-                    entry_info["concurrent_steps"] = concurrent_steps
-                step_entries_info.append(entry_info)
-
-            workflows_info.append(
-                {
-                    "name": workflow.name,
-                    "summary": workflow.summary,
-                    "steps": workflow.steps,  # Flattened for backward compat
-                    "step_entries": step_entries_info,  # New: with concurrency info
-                    "first_step": workflow.steps[0] if workflow.steps else None,
-                }
-            )
-
-        # Identify standalone steps (not in any workflow)
-        standalone_steps = [s for s in steps_info if s["is_standalone"]]
-
-        return {
-            "job_name": job.name,
-            "job_version": job.version,
-            "job_summary": job.summary,
-            "job_description": job.description,
-            "total_steps": len(job.steps),
-            "steps": steps_info,
-            "workflows": workflows_info,
-            "standalone_steps": standalone_steps,
-            "has_workflows": bool(job.workflows),
-        }
-
-    def generate_meta_skill(
-        self,
-        job: JobDefinition,
         adapter: AgentAdapter,
         output_dir: Path | str,
     ) -> Path:
         """
-        Generate the meta-skill file for a job.
-
-        The meta-skill is the primary user interface for a job, routing
-        user intent to the appropriate step.
-
-        Args:
-            job: Job definition
-            adapter: Agent adapter for the target platform
-            output_dir: Directory to write skill file to
-
-        Returns:
-            Path to generated meta-skill file
-
-        Raises:
-            GeneratorError: If generation fails
-        """
-        output_dir = Path(output_dir)
-
-        # Create skills subdirectory if needed
-        skills_dir = output_dir / adapter.skills_dir
-        skills_dir.mkdir(parents=True, exist_ok=True)
-
-        # Build context
-        context = self._build_meta_skill_context(job, adapter)
-
-        # Load and render template
-        env = self._get_jinja_env(adapter)
-        try:
-            template = env.get_template(adapter.meta_skill_template)
-        except TemplateNotFound as e:
-            raise GeneratorError(f"Meta-skill template not found: {e}") from e
-
-        try:
-            rendered = template.render(**context)
-        except Exception as e:
-            raise GeneratorError(f"Meta-skill template rendering failed: {e}") from e
-
-        # Write meta-skill file
-        skill_filename = adapter.get_meta_skill_filename(job.name)
-        skill_path = skills_dir / skill_filename
-
-        # Ensure parent directories exist (for Gemini's job_name/index.toml structure)
-        skill_path.parent.mkdir(parents=True, exist_ok=True)
-
-        try:
-            safe_write(skill_path, rendered)
-        except Exception as e:
-            raise GeneratorError(f"Failed to write meta-skill file: {e}") from e
+        Generate the global /deepwork skill that instructs agents to use MCP tools.
 
-        return skill_path
-
-    def generate_step_skill(
-        self,
-        job: JobDefinition,
-        step: Step,
-        adapter: AgentAdapter,
-        output_dir: Path | str,
-        project_root: Path | str | None = None,
-    ) -> Path:
-        """
-        Generate skill file for a single step.
+        This is a single skill that provides the main entry point for DeepWork,
+        directing agents to use the MCP server's tools for workflow management.
 
         Args:
-            job: Job definition
-            step: Step to generate skill for
             adapter: Agent adapter for the target platform
             output_dir: Directory to write skill file to
-            project_root: Optional project root for loading doc specs (defaults to output_dir)
 
         Returns:
             Path to generated skill file
@@ -498,80 +78,38 @@ def generate_step_skill(
             GeneratorError: If generation fails
         """
         output_dir = Path(output_dir)
-        project_root_path = Path(project_root) if project_root else output_dir
 
         # Create skills subdirectory if needed
         skills_dir = output_dir / adapter.skills_dir
         skills_dir.mkdir(parents=True, exist_ok=True)
 
-        # Find step index
-        try:
-            step_index = next(i for i, s in enumerate(job.steps) if s.id == step.id)
-        except StopIteration as e:
-            raise GeneratorError(f"Step '{step.id}' not found in job '{job.name}'") from e
-
-        # Build context (include exposed for template user-invocable setting)
-        context = self._build_step_context(job, step, step_index, adapter, project_root_path)
-        context["exposed"] = step.exposed
-
         # Load and render template
         env = self._get_jinja_env(adapter)
+        template_name = "skill-deepwork.md.jinja"
+
         try:
-            template = env.get_template(adapter.skill_template)
+            template = env.get_template(template_name)
         except TemplateNotFound as e:
-            raise GeneratorError(f"Template not found: {e}") from e
+            raise GeneratorError(f"DeepWork skill template not found: {e}") from e
 
         try:
-            rendered = template.render(**context)
+            rendered = template.render()
         except Exception as e:
-            raise GeneratorError(f"Template rendering failed: {e}") from e
+            raise GeneratorError(f"DeepWork skill template rendering failed: {e}") from e
 
         # Write skill file
-        skill_filename = adapter.get_step_skill_filename(job.name, step.id, step.exposed)
-        skill_path = skills_dir / skill_filename
+        # Use the adapter's convention for naming
+        if adapter.name == "gemini":
+            skill_filename = "deepwork/index.toml"
+        else:
+            skill_filename = "deepwork/SKILL.md"
 
-        # Ensure parent directories exist (for Gemini's job_name/step_id.toml structure)
+        skill_path = skills_dir / skill_filename
         skill_path.parent.mkdir(parents=True, exist_ok=True)
 
         try:
             safe_write(skill_path, rendered)
         except Exception as e:
-            raise GeneratorError(f"Failed to write skill file: {e}") from e
+            raise GeneratorError(f"Failed to write DeepWork skill file: {e}") from e
 
         return skill_path
-
-    def generate_all_skills(
-        self,
-        job: JobDefinition,
-        adapter: AgentAdapter,
-        output_dir: Path | str,
-        project_root: Path | str | None = None,
-    ) -> list[Path]:
-        """
-        Generate all skill files for a job: meta-skill and step skills.
-
-        Args:
-            job: Job definition
-            adapter: Agent adapter for the target platform
-            output_dir: Directory to write skill files to
-            project_root: Optional project root for loading doc specs (defaults to output_dir)
-
-        Returns:
-            List of paths to generated skill files (meta-skill first, then steps)
-
-        Raises:
-            GeneratorError: If generation fails
-        """
-        skill_paths = []
-        project_root_path = Path(project_root) if project_root else Path(output_dir)
-
-        # Generate meta-skill first (job-level entry point)
-        meta_skill_path = self.generate_meta_skill(job, adapter, output_dir)
-        skill_paths.append(meta_skill_path)
-
-        # Generate step skills
-        for step in job.steps:
-            skill_path = self.generate_step_skill(job, step, adapter, output_dir, project_root_path)
-            skill_paths.append(skill_path)
-
-        return skill_paths
diff --git a/src/deepwork/core/hooks_syncer.py b/src/deepwork/core/hooks_syncer.py
index 35a01036..86fb17e4 100644
--- a/src/deepwork/core/hooks_syncer.py
+++ b/src/deepwork/core/hooks_syncer.py
@@ -36,7 +36,7 @@ def get_command(self, project_path: Path) -> str:
         """
         if self.module:
             # Python module - use deepwork hook CLI for portability
-            # Extract hook name from module path (e.g., "deepwork.hooks.rules_check" -> "rules_check")
+            # Extract hook name from module path (e.g., "deepwork.hooks.my_hook" -> "my_hook")
             hook_name = self.module.rsplit(".", 1)[-1]
             return f"deepwork hook {hook_name}"
         elif self.script:
diff --git a/src/deepwork/core/parser.py b/src/deepwork/core/parser.py
index 480ab6d4..b6f6a380 100644
--- a/src/deepwork/core/parser.py
+++ b/src/deepwork/core/parser.py
@@ -1,5 +1,6 @@
 """Job definition parser."""
 
+import logging
 from dataclasses import dataclass, field
 from pathlib import Path
 from typing import Any
@@ -8,6 +9,8 @@
 from deepwork.utils.validation import ValidationError, validate_against_schema
 from deepwork.utils.yaml_utils import YAMLError, load_yaml
 
+logger = logging.getLogger("deepwork.parser")
+
 
 class ParseError(Exception):
     """Exception raised for job parsing errors."""
@@ -48,29 +51,21 @@ def from_dict(cls, data: dict[str, Any]) -> "StepInput":
 
 @dataclass
 class OutputSpec:
-    """Represents a step output specification, optionally with doc spec reference."""
-
-    file: str
-    doc_spec: str | None = None
+    """Represents a step output specification with type information."""
 
-    def has_doc_spec(self) -> bool:
-        """Check if this output has a doc spec reference."""
-        return self.doc_spec is not None
+    name: str
+    type: str  # "file" or "files"
+    description: str
+    required: bool
 
     @classmethod
-    def from_dict(cls, data: dict[str, Any] | str) -> "OutputSpec":
-        """
-        Create OutputSpec from dictionary or string.
-
-        Supports both formats:
-        - String: "output.md" -> OutputSpec(file="output.md")
-        - Dict: {"file": "output.md", "doc_spec": ".deepwork/doc_specs/report.md"}
-        """
-        if isinstance(data, str):
-            return cls(file=data)
+    def from_dict(cls, name: str, data: dict[str, Any]) -> "OutputSpec":
+        """Create OutputSpec from output name and its specification dict."""
         return cls(
-            file=data["file"],
-            doc_spec=data.get("doc_spec"),
+            name=name,
+            type=data["type"],
+            description=data["description"],
+            required=data["required"],
         )
 
 
@@ -120,6 +115,24 @@ def from_dict(cls, data: dict[str, Any]) -> "HookAction":
 StopHook = HookAction
 
 
+@dataclass
+class Review:
+    """Represents a quality review for step outputs."""
+
+    run_each: str  # "step" or output name
+    quality_criteria: dict[str, str]  # name → question
+    additional_review_guidance: str | None = None  # optional guidance for reviewer
+
+    @classmethod
+    def from_dict(cls, data: dict[str, Any]) -> "Review":
+        """Create Review from dictionary."""
+        return cls(
+            run_each=data["run_each"],
+            quality_criteria=data.get("quality_criteria", {}),
+            additional_review_guidance=data.get("additional_review_guidance"),
+        )
+
+
 @dataclass
 class Step:
     """Represents a single step in a job."""
@@ -139,8 +152,8 @@ class Step:
     # If true, skill is user-invocable in menus. Default: false (hidden from menus).
     exposed: bool = False
 
-    # Declarative quality criteria rendered with standard evaluation framing
-    quality_criteria: list[str] = field(default_factory=list)
+    # Quality reviews to run when step completes
+    reviews: list[Review] = field(default_factory=list)
 
     # Agent type for this step (e.g., "general-purpose"). When set, skill uses context: fork
     agent: str | None = None
@@ -178,11 +191,13 @@ def from_dict(cls, data: dict[str, Any]) -> "Step":
             description=data["description"],
             instructions_file=data["instructions_file"],
             inputs=[StepInput.from_dict(inp) for inp in data.get("inputs", [])],
-            outputs=[OutputSpec.from_dict(out) for out in data["outputs"]],
+            outputs=[
+                OutputSpec.from_dict(name, spec) for name, spec in data.get("outputs", {}).items()
+            ],
             dependencies=data.get("dependencies", []),
             hooks=hooks,
             exposed=data.get("exposed", False),
-            quality_criteria=data.get("quality_criteria", []),
+            reviews=[Review.from_dict(r) for r in data.get("reviews", [])],
             agent=data.get("agent"),
         )
 
@@ -350,39 +365,22 @@ def validate_file_inputs(self) -> None:
                             f"but '{inp.from_step}' is not in dependencies"
                         )
 
-    def validate_doc_spec_references(self, project_root: Path) -> None:
+    def validate_reviews(self) -> None:
         """
-        Validate that doc spec references in outputs point to existing files.
-
-        Args:
-            project_root: Path to the project root directory
+        Validate that review run_each values reference valid output names or 'step'.
 
         Raises:
-            ParseError: If doc spec references are invalid
+            ParseError: If run_each references an invalid output name
         """
         for step in self.steps:
-            for output in step.outputs:
-                if output.has_doc_spec():
-                    doc_spec_file = project_root / output.doc_spec
-                    if not doc_spec_file.exists():
-                        raise ParseError(
-                            f"Step '{step.id}' references non-existent doc spec "
-                            f"'{output.doc_spec}'. Expected file at {doc_spec_file}"
-                        )
-
-    def get_doc_spec_references(self) -> list[str]:
-        """
-        Get all unique doc spec file paths referenced in this job's outputs.
-
-        Returns:
-            List of doc spec file paths (e.g., ".deepwork/doc_specs/report.md")
-        """
-        doc_spec_refs = set()
-        for step in self.steps:
-            for output in step.outputs:
-                if output.has_doc_spec() and output.doc_spec:
-                    doc_spec_refs.add(output.doc_spec)
-        return list(doc_spec_refs)
+            output_names = {out.name for out in step.outputs}
+            for review in step.reviews:
+                if review.run_each != "step" and review.run_each not in output_names:
+                    raise ParseError(
+                        f"Step '{step.id}' has review with run_each='{review.run_each}' "
+                        f"but no output with that name. "
+                        f"Valid values: 'step', {', '.join(sorted(output_names)) or '(no outputs)'}"
+                    )
 
     def get_workflow_for_step(self, step_id: str) -> Workflow | None:
         """
@@ -543,6 +541,31 @@ def validate_workflows(self) -> None:
                     )
                 seen_steps.add(step_id)
 
+    def warn_orphaned_steps(self) -> list[str]:
+        """
+        Check for steps not included in any workflow and emit warnings.
+
+        Returns:
+            List of orphaned step IDs
+        """
+        # Collect all step IDs referenced in workflows
+        workflow_step_ids: set[str] = set()
+        for workflow in self.workflows:
+            workflow_step_ids.update(workflow.steps)
+
+        # Find orphaned steps
+        orphaned_steps = [step.id for step in self.steps if step.id not in workflow_step_ids]
+
+        if orphaned_steps:
+            logger.warning(
+                "Job '%s' has steps not included in any workflow: %s. "
+                "These steps are not accessible via the MCP interface.",
+                self.name,
+                ", ".join(orphaned_steps),
+            )
+
+        return orphaned_steps
+
     @classmethod
     def from_dict(cls, data: dict[str, Any], job_dir: Path) -> "JobDefinition":
         """
@@ -610,9 +633,13 @@ def parse_job_definition(job_dir: Path | str) -> JobDefinition:
     # Parse into dataclass
     job_def = JobDefinition.from_dict(job_data, job_dir_path)
 
-    # Validate dependencies, file inputs, and workflows
+    # Validate dependencies, file inputs, reviews, and workflows
     job_def.validate_dependencies()
     job_def.validate_file_inputs()
+    job_def.validate_reviews()
     job_def.validate_workflows()
 
+    # Warn about orphaned steps (not in any workflow)
+    job_def.warn_orphaned_steps()
+
     return job_def
diff --git a/src/deepwork/core/pattern_matcher.py b/src/deepwork/core/pattern_matcher.py
deleted file mode 100644
index c82ec723..00000000
--- a/src/deepwork/core/pattern_matcher.py
+++ /dev/null
@@ -1,271 +0,0 @@
-"""Pattern matching with variable extraction for rule file correspondence."""
-
-import re
-from dataclasses import dataclass
-from fnmatch import fnmatch
-
-
-class PatternError(Exception):
-    """Exception raised for invalid pattern syntax."""
-
-    pass
-
-
-@dataclass
-class MatchResult:
-    """Result of matching a file against a pattern."""
-
-    matched: bool
-    variables: dict[str, str]  # Captured variable values
-
-    @classmethod
-    def no_match(cls) -> "MatchResult":
-        return cls(matched=False, variables={})
-
-    @classmethod
-    def match(cls, variables: dict[str, str] | None = None) -> "MatchResult":
-        return cls(matched=True, variables=variables or {})
-
-
-def validate_pattern(pattern: str) -> None:
-    """
-    Validate pattern syntax.
-
-    Raises:
-        PatternError: If pattern has invalid syntax
-    """
-    # Check for unbalanced braces
-    brace_depth = 0
-    for i, char in enumerate(pattern):
-        if char == "{":
-            brace_depth += 1
-        elif char == "}":
-            brace_depth -= 1
-            if brace_depth < 0:
-                raise PatternError(f"Unmatched closing brace at position {i}")
-
-    if brace_depth > 0:
-        raise PatternError("Unclosed brace in pattern")
-
-    # Extract and validate variable names
-    var_pattern = r"\{([^}]*)\}"
-    seen_vars: set[str] = set()
-
-    for match in re.finditer(var_pattern, pattern):
-        var_name = match.group(1)
-
-        # Check for empty variable name
-        if not var_name:
-            raise PatternError("Empty variable name in pattern")
-
-        # Strip leading ** or * for validation
-        clean_name = var_name.lstrip("*")
-        if not clean_name:
-            # Just {*} or {**} is valid
-            continue
-
-        # Check for invalid characters in variable name
-        if "/" in clean_name or "\\" in clean_name:
-            raise PatternError(f"Invalid character in variable name: {var_name}")
-
-        # Check for duplicates (use clean name for comparison)
-        if clean_name in seen_vars:
-            raise PatternError(f"Duplicate variable: {clean_name}")
-        seen_vars.add(clean_name)
-
-
-def pattern_to_regex(pattern: str) -> tuple[str, list[str]]:
-    """
-    Convert a pattern with {var} placeholders to a regex.
-
-    Variables:
-    - {path} or {**name} - Matches multiple path segments (.+)
-    - {name} or {*name} - Matches single path segment ([^/]+)
-
-    Args:
-        pattern: Pattern string like "src/{path}.py"
-
-    Returns:
-        Tuple of (regex_pattern, list_of_variable_names)
-
-    Raises:
-        PatternError: If pattern has invalid syntax
-    """
-    validate_pattern(pattern)
-
-    # Normalize path separators
-    pattern = pattern.replace("\\", "/")
-
-    result: list[str] = []
-    var_names: list[str] = []
-    pos = 0
-
-    # Parse pattern segments
-    while pos < len(pattern):
-        # Look for next variable
-        brace_start = pattern.find("{", pos)
-
-        if brace_start == -1:
-            # No more variables, escape the rest
-            result.append(re.escape(pattern[pos:]))
-            break
-
-        # Escape literal part before variable
-        if brace_start > pos:
-            result.append(re.escape(pattern[pos:brace_start]))
-
-        # Find end of variable
-        brace_end = pattern.find("}", brace_start)
-        if brace_end == -1:
-            raise PatternError("Unclosed brace in pattern")
-
-        var_spec = pattern[brace_start + 1 : brace_end]
-
-        # Determine variable type and name
-        if var_spec.startswith("**"):
-            # Explicit multi-segment: {**name}
-            var_name = var_spec[2:] or "path"
-            regex_part = f"(?P<{re.escape(var_name)}>.+)"
-        elif var_spec.startswith("*"):
-            # Explicit single-segment: {*name}
-            var_name = var_spec[1:] or "name"
-            regex_part = f"(?P<{re.escape(var_name)}>[^/]+)"
-        elif var_spec == "path":
-            # Conventional multi-segment
-            var_name = "path"
-            regex_part = "(?P<path>.+)"
-        else:
-            # Default single-segment (including custom names)
-            var_name = var_spec
-            regex_part = f"(?P<{re.escape(var_name)}>[^/]+)"
-
-        result.append(regex_part)
-        var_names.append(var_name)
-        pos = brace_end + 1
-
-    return "^" + "".join(result) + "$", var_names
-
-
-def match_pattern(pattern: str, filepath: str) -> MatchResult:
-    """
-    Match a filepath against a pattern, extracting variables.
-
-    Args:
-        pattern: Pattern with {var} placeholders
-        filepath: File path to match
-
-    Returns:
-        MatchResult with matched=True and captured variables, or matched=False
-    """
-    # Normalize path separators
-    filepath = filepath.replace("\\", "/")
-
-    try:
-        regex, _ = pattern_to_regex(pattern)
-    except PatternError:
-        return MatchResult.no_match()
-
-    match = re.fullmatch(regex, filepath)
-    if match:
-        return MatchResult.match(match.groupdict())
-    return MatchResult.no_match()
-
-
-def resolve_pattern(pattern: str, variables: dict[str, str]) -> str:
-    """
-    Substitute variables into a pattern to generate a filepath.
-
-    Args:
-        pattern: Pattern with {var} placeholders
-        variables: Dict of variable name -> value
-
-    Returns:
-        Resolved filepath string
-    """
-    result = pattern
-    for name, value in variables.items():
-        # Handle both {name} and {*name} / {**name} forms
-        result = result.replace(f"{{{name}}}", value)
-        result = result.replace(f"{{*{name}}}", value)
-        result = result.replace(f"{{**{name}}}", value)
-    return result
-
-
-def matches_glob(file_path: str, pattern: str) -> bool:
-    """
-    Match a file path against a glob pattern, supporting ** for recursive matching.
-
-    This is for simple glob patterns without variable capture.
-
-    Args:
-        file_path: File path to check
-        pattern: Glob pattern (supports *, **, ?)
-
-    Returns:
-        True if matches
-    """
-    # Normalize path separators
-    file_path = file_path.replace("\\", "/")
-    pattern = pattern.replace("\\", "/")
-
-    # Handle ** patterns (recursive directory matching)
-    if "**" in pattern:
-        # Split pattern by **
-        parts = pattern.split("**")
-
-        if len(parts) == 2:
-            prefix, suffix = parts[0], parts[1]
-
-            # Remove leading/trailing slashes from suffix
-            suffix = suffix.lstrip("/")
-
-            # Check if prefix matches the start of the path
-            if prefix:
-                prefix = prefix.rstrip("/")
-                if not file_path.startswith(prefix + "/") and file_path != prefix:
-                    return False
-                # Get the remaining path after prefix
-                remaining = file_path[len(prefix) :].lstrip("/")
-            else:
-                remaining = file_path
-
-            # If no suffix, any remaining path matches
-            if not suffix:
-                return True
-
-            # Check if suffix matches the end of any remaining path segment
-            remaining_parts = remaining.split("/")
-            for i in range(len(remaining_parts)):
-                test_path = "/".join(remaining_parts[i:])
-                if fnmatch(test_path, suffix):
-                    return True
-                # Also try just the filename
-                if fnmatch(remaining_parts[-1], suffix):
-                    return True
-
-            return False
-
-    # Simple pattern without **
-    return fnmatch(file_path, pattern)
-
-
-def matches_any_pattern(file_path: str, patterns: list[str]) -> bool:
-    """
-    Check if a file path matches any of the given glob patterns.
-
-    Args:
-        file_path: File path to check (relative path)
-        patterns: List of glob patterns to match against
-
-    Returns:
-        True if the file matches any pattern
-    """
-    for pattern in patterns:
-        if matches_glob(file_path, pattern):
-            return True
-    return False
-
-
-def has_variables(pattern: str) -> bool:
-    """Check if a pattern contains variable placeholders."""
-    return "{" in pattern and "}" in pattern
diff --git a/src/deepwork/core/rules_parser.py b/src/deepwork/core/rules_parser.py
deleted file mode 100644
index 04b1e3d2..00000000
--- a/src/deepwork/core/rules_parser.py
+++ /dev/null
@@ -1,559 +0,0 @@
-"""Rule definition parser (v2 - frontmatter markdown format)."""
-
-from dataclasses import dataclass, field
-from enum import Enum
-from pathlib import Path
-from typing import Any
-
-import yaml
-
-from deepwork.core.pattern_matcher import (
-    has_variables,
-    match_pattern,
-    matches_any_pattern,
-    resolve_pattern,
-)
-from deepwork.schemas.rules_schema import RULES_FRONTMATTER_SCHEMA
-from deepwork.utils.validation import ValidationError, validate_against_schema
-
-
-class RulesParseError(Exception):
-    """Exception raised for rule parsing errors."""
-
-    pass
-
-
-class DetectionMode(Enum):
-    """How the rule detects when to fire."""
-
-    TRIGGER_SAFETY = "trigger_safety"  # Fire when trigger matches, safety doesn't
-    SET = "set"  # Bidirectional file correspondence
-    PAIR = "pair"  # Directional file correspondence
-    CREATED = "created"  # Fire when created files match patterns
-
-
-class ActionType(Enum):
-    """What happens when the rule fires."""
-
-    PROMPT = "prompt"  # Show instructions to agent (default)
-    COMMAND = "command"  # Run an idempotent command
-
-
-# Valid compare_to values
-COMPARE_TO_VALUES = frozenset({"base", "default_tip", "prompt"})
-
-
-@dataclass
-class CommandAction:
-    """Configuration for command action."""
-
-    command: str  # Command template (supports {file}, {files}, {repo_root})
-    run_for: str = "each_match"  # "each_match" or "all_matches"
-
-
-@dataclass
-class PairConfig:
-    """Configuration for pair detection mode."""
-
-    trigger: str  # Pattern that triggers
-    expects: list[str]  # Patterns for expected corresponding files
-
-
-@dataclass
-class Rule:
-    """Represents a single rule definition (v2 format)."""
-
-    # Identity
-    name: str  # Human-friendly name (displayed in promise tags)
-    filename: str  # Filename without .md extension (used for queue)
-
-    # Detection mode (exactly one must be set)
-    detection_mode: DetectionMode
-
-    # Common options (required)
-    compare_to: str  # Required: "base", "default_tip", or "prompt"
-
-    # Detection mode details (optional, depends on mode)
-    triggers: list[str] = field(default_factory=list)  # For TRIGGER_SAFETY mode
-    safety: list[str] = field(default_factory=list)  # For TRIGGER_SAFETY mode
-    set_patterns: list[str] = field(default_factory=list)  # For SET mode
-    pair_config: PairConfig | None = None  # For PAIR mode
-    created_patterns: list[str] = field(default_factory=list)  # For CREATED mode
-
-    # Action type
-    action_type: ActionType = ActionType.PROMPT
-    instructions: str = ""  # For PROMPT action (markdown body)
-    command_action: CommandAction | None = None  # For COMMAND action
-
-    @classmethod
-    def from_frontmatter(
-        cls,
-        frontmatter: dict[str, Any],
-        markdown_body: str,
-        filename: str,
-    ) -> "Rule":
-        """
-        Create Rule from parsed frontmatter and markdown body.
-
-        Args:
-            frontmatter: Parsed YAML frontmatter
-            markdown_body: Markdown content after frontmatter
-            filename: Filename without .md extension
-
-        Returns:
-            Rule instance
-
-        Raises:
-            RulesParseError: If validation fails
-        """
-        # Get name (required)
-        name = frontmatter.get("name", "")
-        if not name:
-            raise RulesParseError(f"Rule '{filename}' missing required 'name' field")
-
-        # Determine detection mode
-        has_trigger = "trigger" in frontmatter
-        has_set = "set" in frontmatter
-        has_pair = "pair" in frontmatter
-        has_created = "created" in frontmatter
-
-        mode_count = sum([has_trigger, has_set, has_pair, has_created])
-        if mode_count == 0:
-            raise RulesParseError(f"Rule '{name}' must have 'trigger', 'set', 'pair', or 'created'")
-        if mode_count > 1:
-            raise RulesParseError(f"Rule '{name}' has multiple detection modes - use only one")
-
-        # Parse based on detection mode
-        detection_mode: DetectionMode
-        triggers: list[str] = []
-        safety: list[str] = []
-        set_patterns: list[str] = []
-        pair_config: PairConfig | None = None
-        created_patterns: list[str] = []
-
-        if has_trigger:
-            detection_mode = DetectionMode.TRIGGER_SAFETY
-            trigger = frontmatter["trigger"]
-            triggers = [trigger] if isinstance(trigger, str) else list(trigger)
-            safety_data = frontmatter.get("safety", [])
-            safety = [safety_data] if isinstance(safety_data, str) else list(safety_data)
-
-        elif has_set:
-            detection_mode = DetectionMode.SET
-            set_patterns = list(frontmatter["set"])
-            if len(set_patterns) < 2:
-                raise RulesParseError(f"Rule '{name}' set requires at least 2 patterns")
-
-        elif has_pair:
-            detection_mode = DetectionMode.PAIR
-            pair_data = frontmatter["pair"]
-            expects = pair_data["expects"]
-            expects_list = [expects] if isinstance(expects, str) else list(expects)
-            pair_config = PairConfig(
-                trigger=pair_data["trigger"],
-                expects=expects_list,
-            )
-
-        elif has_created:
-            detection_mode = DetectionMode.CREATED
-            created = frontmatter["created"]
-            created_patterns = [created] if isinstance(created, str) else list(created)
-
-        # Determine action type
-        action_type: ActionType
-        command_action: CommandAction | None = None
-
-        if "action" in frontmatter:
-            action_type = ActionType.COMMAND
-            action_data = frontmatter["action"]
-            command_action = CommandAction(
-                command=action_data["command"],
-                run_for=action_data.get("run_for", "each_match"),
-            )
-        else:
-            action_type = ActionType.PROMPT
-            # Markdown body is the instructions
-            if not markdown_body.strip():
-                raise RulesParseError(f"Rule '{name}' with prompt action requires markdown body")
-
-        # Get compare_to (required field)
-        compare_to = frontmatter["compare_to"]
-
-        return cls(
-            name=name,
-            filename=filename,
-            detection_mode=detection_mode,
-            triggers=triggers,
-            safety=safety,
-            set_patterns=set_patterns,
-            pair_config=pair_config,
-            created_patterns=created_patterns,
-            action_type=action_type,
-            instructions=markdown_body.strip(),
-            command_action=command_action,
-            compare_to=compare_to,
-        )
-
-
-def parse_frontmatter_file(filepath: Path) -> tuple[dict[str, Any], str]:
-    """
-    Parse a markdown file with YAML frontmatter.
-
-    Args:
-        filepath: Path to .md file
-
-    Returns:
-        Tuple of (frontmatter_dict, markdown_body)
-
-    Raises:
-        RulesParseError: If parsing fails
-    """
-    try:
-        content = filepath.read_text(encoding="utf-8")
-    except OSError as e:
-        raise RulesParseError(f"Failed to read rule file: {e}") from e
-
-    # Split frontmatter from body
-    if not content.startswith("---"):
-        raise RulesParseError(
-            f"Rule file '{filepath.name}' must start with '---' frontmatter delimiter"
-        )
-
-    # Find end of frontmatter
-    end_marker = content.find("\n---", 3)
-    if end_marker == -1:
-        raise RulesParseError(
-            f"Rule file '{filepath.name}' missing closing '---' frontmatter delimiter"
-        )
-
-    frontmatter_str = content[4:end_marker]  # Skip initial "---\n"
-    markdown_body = content[end_marker + 4 :]  # Skip "\n---\n" or "\n---"
-
-    # Parse YAML frontmatter
-    try:
-        frontmatter = yaml.safe_load(frontmatter_str)
-    except yaml.YAMLError as e:
-        raise RulesParseError(f"Invalid YAML frontmatter in '{filepath.name}': {e}") from e
-
-    if frontmatter is None:
-        frontmatter = {}
-
-    if not isinstance(frontmatter, dict):
-        raise RulesParseError(
-            f"Frontmatter in '{filepath.name}' must be a mapping, got {type(frontmatter).__name__}"
-        )
-
-    return frontmatter, markdown_body
-
-
-def parse_rule_file(filepath: Path) -> Rule:
-    """
-    Parse a single rule from a frontmatter markdown file.
-
-    Args:
-        filepath: Path to .md file in .deepwork/rules/
-
-    Returns:
-        Parsed Rule object
-
-    Raises:
-        RulesParseError: If parsing or validation fails
-    """
-    if not filepath.exists():
-        raise RulesParseError(f"Rule file does not exist: {filepath}")
-
-    if not filepath.is_file():
-        raise RulesParseError(f"Rule path is not a file: {filepath}")
-
-    frontmatter, markdown_body = parse_frontmatter_file(filepath)
-
-    # Validate against schema
-    try:
-        validate_against_schema(frontmatter, RULES_FRONTMATTER_SCHEMA)
-    except ValidationError as e:
-        raise RulesParseError(f"Rule '{filepath.name}' validation failed: {e}") from e
-
-    # Create Rule object
-    filename = filepath.stem  # filename without .md extension
-    return Rule.from_frontmatter(frontmatter, markdown_body, filename)
-
-
-def load_rules_from_directory(rules_dir: Path) -> list[Rule]:
-    """
-    Load all rules from a directory.
-
-    Args:
-        rules_dir: Path to .deepwork/rules/ directory
-
-    Returns:
-        List of parsed Rule objects (sorted by filename)
-
-    Raises:
-        RulesParseError: If any rule file fails to parse
-    """
-    if not rules_dir.exists():
-        return []
-
-    if not rules_dir.is_dir():
-        raise RulesParseError(f"Rules path is not a directory: {rules_dir}")
-
-    rules = []
-    for filepath in sorted(rules_dir.glob("*.md")):
-        rule = parse_rule_file(filepath)
-        rules.append(rule)
-
-    return rules
-
-
-# =============================================================================
-# Evaluation Logic
-# =============================================================================
-
-
-def evaluate_trigger_safety(
-    rule: Rule,
-    changed_files: list[str],
-) -> bool:
-    """
-    Evaluate a trigger/safety mode rule.
-
-    Returns True if rule should fire:
-    - At least one changed file matches a trigger pattern
-    - AND no changed file matches a safety pattern
-    """
-    # Check if any trigger matches
-    trigger_matched = False
-    for file_path in changed_files:
-        if matches_any_pattern(file_path, rule.triggers):
-            trigger_matched = True
-            break
-
-    if not trigger_matched:
-        return False
-
-    # Check if any safety pattern matches
-    if rule.safety:
-        for file_path in changed_files:
-            if matches_any_pattern(file_path, rule.safety):
-                return False
-
-    return True
-
-
-def evaluate_set_correspondence(
-    rule: Rule,
-    changed_files: list[str],
-) -> tuple[bool, list[str], list[str]]:
-    """
-    Evaluate a set (bidirectional correspondence) rule.
-
-    Returns:
-        Tuple of (should_fire, trigger_files, missing_files)
-        - should_fire: True if correspondence is incomplete
-        - trigger_files: Files that triggered (matched a pattern)
-        - missing_files: Expected files that didn't change
-    """
-    trigger_files: list[str] = []
-    missing_files: list[str] = []
-    changed_set = set(changed_files)
-
-    for file_path in changed_files:
-        # Check each pattern in the set
-        for pattern in rule.set_patterns:
-            result = match_pattern(pattern, file_path)
-            if result.matched:
-                trigger_files.append(file_path)
-
-                # Check if all other corresponding files also changed
-                for other_pattern in rule.set_patterns:
-                    if other_pattern == pattern:
-                        continue
-
-                    if has_variables(other_pattern):
-                        expected = resolve_pattern(other_pattern, result.variables)
-                    else:
-                        expected = other_pattern
-
-                    if expected not in changed_set:
-                        if expected not in missing_files:
-                            missing_files.append(expected)
-
-                break  # Only match one pattern per file
-
-    # Rule fires if there are trigger files with missing correspondences
-    should_fire = len(trigger_files) > 0 and len(missing_files) > 0
-    return should_fire, trigger_files, missing_files
-
-
-def evaluate_pair_correspondence(
-    rule: Rule,
-    changed_files: list[str],
-) -> tuple[bool, list[str], list[str]]:
-    """
-    Evaluate a pair (directional correspondence) rule.
-
-    Only trigger-side changes require corresponding expected files.
-    Expected-side changes alone do not trigger.
-
-    Returns:
-        Tuple of (should_fire, trigger_files, missing_files)
-    """
-    if rule.pair_config is None:
-        return False, [], []
-
-    trigger_files: list[str] = []
-    missing_files: list[str] = []
-    changed_set = set(changed_files)
-
-    trigger_pattern = rule.pair_config.trigger
-    expects_patterns = rule.pair_config.expects
-
-    for file_path in changed_files:
-        # Only check trigger pattern (directional)
-        result = match_pattern(trigger_pattern, file_path)
-        if result.matched:
-            trigger_files.append(file_path)
-
-            # Check if all expected files also changed
-            for expects_pattern in expects_patterns:
-                if has_variables(expects_pattern):
-                    expected = resolve_pattern(expects_pattern, result.variables)
-                else:
-                    expected = expects_pattern
-
-                if expected not in changed_set:
-                    if expected not in missing_files:
-                        missing_files.append(expected)
-
-    should_fire = len(trigger_files) > 0 and len(missing_files) > 0
-    return should_fire, trigger_files, missing_files
-
-
-def evaluate_created(
-    rule: Rule,
-    created_files: list[str],
-) -> bool:
-    """
-    Evaluate a created mode rule.
-
-    Returns True if rule should fire:
-    - At least one created file matches a created pattern
-    """
-    for file_path in created_files:
-        if matches_any_pattern(file_path, rule.created_patterns):
-            return True
-    return False
-
-
-@dataclass
-class RuleEvaluationResult:
-    """Result of evaluating a single rule."""
-
-    rule: Rule
-    should_fire: bool
-    trigger_files: list[str] = field(default_factory=list)
-    missing_files: list[str] = field(default_factory=list)  # For set/pair modes
-
-
-def evaluate_rule(
-    rule: Rule,
-    changed_files: list[str],
-    created_files: list[str] | None = None,
-) -> RuleEvaluationResult:
-    """
-    Evaluate whether a rule should fire based on changed files.
-
-    Args:
-        rule: Rule to evaluate
-        changed_files: List of changed file paths (relative)
-        created_files: List of newly created file paths (relative), for CREATED mode
-
-    Returns:
-        RuleEvaluationResult with evaluation details
-    """
-    if rule.detection_mode == DetectionMode.TRIGGER_SAFETY:
-        should_fire = evaluate_trigger_safety(rule, changed_files)
-        trigger_files = (
-            [f for f in changed_files if matches_any_pattern(f, rule.triggers)]
-            if should_fire
-            else []
-        )
-        return RuleEvaluationResult(
-            rule=rule,
-            should_fire=should_fire,
-            trigger_files=trigger_files,
-        )
-
-    elif rule.detection_mode == DetectionMode.SET:
-        should_fire, trigger_files, missing_files = evaluate_set_correspondence(rule, changed_files)
-        return RuleEvaluationResult(
-            rule=rule,
-            should_fire=should_fire,
-            trigger_files=trigger_files,
-            missing_files=missing_files,
-        )
-
-    elif rule.detection_mode == DetectionMode.PAIR:
-        should_fire, trigger_files, missing_files = evaluate_pair_correspondence(
-            rule, changed_files
-        )
-        return RuleEvaluationResult(
-            rule=rule,
-            should_fire=should_fire,
-            trigger_files=trigger_files,
-            missing_files=missing_files,
-        )
-
-    elif rule.detection_mode == DetectionMode.CREATED:
-        files_to_check = created_files if created_files is not None else []
-        should_fire = evaluate_created(rule, files_to_check)
-        trigger_files = (
-            [f for f in files_to_check if matches_any_pattern(f, rule.created_patterns)]
-            if should_fire
-            else []
-        )
-        return RuleEvaluationResult(
-            rule=rule,
-            should_fire=should_fire,
-            trigger_files=trigger_files,
-        )
-
-    return RuleEvaluationResult(rule=rule, should_fire=False)
-
-
-def evaluate_rules(
-    rules: list[Rule],
-    changed_files: list[str],
-    promised_rules: set[str] | None = None,
-    created_files: list[str] | None = None,
-) -> list[RuleEvaluationResult]:
-    """
-    Evaluate which rules should fire.
-
-    Args:
-        rules: List of rules to evaluate
-        changed_files: List of changed file paths (relative)
-        promised_rules: Set of rule names that have been marked as addressed
-                          via <promise> tags (case-insensitive)
-        created_files: List of newly created file paths (relative), for CREATED mode
-
-    Returns:
-        List of RuleEvaluationResult for rules that should fire
-    """
-    if promised_rules is None:
-        promised_rules = set()
-
-    # Normalize promised names for case-insensitive comparison
-    promised_lower = {name.lower() for name in promised_rules}
-
-    results = []
-    for rule in rules:
-        # Skip if already promised/addressed (case-insensitive)
-        if rule.name.lower() in promised_lower:
-            continue
-
-        result = evaluate_rule(rule, changed_files, created_files)
-        if result.should_fire:
-            results.append(result)
-
-    return results
diff --git a/src/deepwork/core/rules_queue.py b/src/deepwork/core/rules_queue.py
deleted file mode 100644
index 4f49a4fe..00000000
--- a/src/deepwork/core/rules_queue.py
+++ /dev/null
@@ -1,321 +0,0 @@
-"""Queue system for tracking rule state in .deepwork/tmp/rules/queue/."""
-
-import hashlib
-import json
-from dataclasses import asdict, dataclass, field
-from datetime import UTC, datetime
-from enum import Enum
-from pathlib import Path
-from typing import Any
-
-
-class QueueEntryStatus(Enum):
-    """Status of a queue entry."""
-
-    QUEUED = "queued"  # Detected, awaiting evaluation
-    PASSED = "passed"  # Evaluated, rule satisfied (promise found or action succeeded)
-    FAILED = "failed"  # Evaluated, rule not satisfied
-    SKIPPED = "skipped"  # Safety pattern matched, skipped
-
-
-@dataclass
-class ActionResult:
-    """Result of executing a rule action."""
-
-    type: str  # "prompt" or "command"
-    output: str | None = None  # Command stdout or prompt message shown
-    exit_code: int | None = None  # Command exit code (None for prompt)
-
-
-@dataclass
-class QueueEntry:
-    """A single entry in the rules queue."""
-
-    # Identity
-    rule_name: str  # Human-friendly name
-    rule_file: str  # Filename (e.g., "source-test-pairing.md")
-    trigger_hash: str  # Hash for deduplication
-
-    # State
-    status: QueueEntryStatus = QueueEntryStatus.QUEUED
-    created_at: str = ""  # ISO8601 timestamp
-    evaluated_at: str | None = None  # ISO8601 timestamp
-
-    # Context
-    baseline_ref: str = ""  # Commit hash or timestamp used as baseline
-    trigger_files: list[str] = field(default_factory=list)
-    expected_files: list[str] = field(default_factory=list)  # For set/pair modes
-    matched_files: list[str] = field(default_factory=list)  # Files that also changed
-
-    # Result
-    action_result: ActionResult | None = None
-
-    def __post_init__(self) -> None:
-        if not self.created_at:
-            self.created_at = datetime.now(UTC).isoformat()
-
-    def to_dict(self) -> dict[str, Any]:
-        """Convert to dictionary for JSON serialization."""
-        data = asdict(self)
-        data["status"] = self.status.value
-        if self.action_result:
-            data["action_result"] = asdict(self.action_result)
-        return data
-
-    @classmethod
-    def from_dict(cls, data: dict[str, Any]) -> "QueueEntry":
-        """Create from dictionary."""
-        action_result = None
-        if data.get("action_result"):
-            action_result = ActionResult(**data["action_result"])
-
-        return cls(
-            rule_name=data.get("rule_name", data.get("policy_name", "")),
-            rule_file=data.get("rule_file", data.get("policy_file", "")),
-            trigger_hash=data["trigger_hash"],
-            status=QueueEntryStatus(data["status"]),
-            created_at=data.get("created_at", ""),
-            evaluated_at=data.get("evaluated_at"),
-            baseline_ref=data.get("baseline_ref", ""),
-            trigger_files=data.get("trigger_files", []),
-            expected_files=data.get("expected_files", []),
-            matched_files=data.get("matched_files", []),
-            action_result=action_result,
-        )
-
-
-def compute_trigger_hash(
-    rule_name: str,
-    trigger_files: list[str],
-    baseline_ref: str,
-) -> str:
-    """
-    Compute a hash for deduplication.
-
-    The hash is based on:
-    - Rule name
-    - Sorted list of trigger files
-    - Baseline reference (commit hash or timestamp)
-
-    Returns:
-        12-character hex hash
-    """
-    hash_input = f"{rule_name}:{sorted(trigger_files)}:{baseline_ref}"
-    return hashlib.sha256(hash_input.encode()).hexdigest()[:12]
-
-
-class RulesQueue:
-    """
-    Manages the rules queue in .deepwork/tmp/rules/queue/.
-
-    Queue entries are stored as JSON files named {hash}.{status}.json
-    """
-
-    def __init__(self, queue_dir: Path | None = None):
-        """
-        Initialize the queue.
-
-        Args:
-            queue_dir: Path to queue directory. Defaults to .deepwork/tmp/rules/queue/
-        """
-        if queue_dir is None:
-            queue_dir = Path(".deepwork/tmp/rules/queue")
-        self.queue_dir = queue_dir
-
-    def _ensure_dir(self) -> None:
-        """Ensure queue directory exists."""
-        self.queue_dir.mkdir(parents=True, exist_ok=True)
-
-    def _get_entry_path(self, trigger_hash: str, status: QueueEntryStatus) -> Path:
-        """Get path for an entry file."""
-        return self.queue_dir / f"{trigger_hash}.{status.value}.json"
-
-    def _find_entry_path(self, trigger_hash: str) -> Path | None:
-        """Find existing entry file for a hash (any status)."""
-        for status in QueueEntryStatus:
-            path = self._get_entry_path(trigger_hash, status)
-            if path.exists():
-                return path
-        return None
-
-    def has_entry(self, trigger_hash: str) -> bool:
-        """Check if an entry exists for this hash."""
-        return self._find_entry_path(trigger_hash) is not None
-
-    def get_entry(self, trigger_hash: str) -> QueueEntry | None:
-        """Get an entry by hash."""
-        path = self._find_entry_path(trigger_hash)
-        if path is None:
-            return None
-
-        try:
-            with open(path, encoding="utf-8") as f:
-                data = json.load(f)
-            return QueueEntry.from_dict(data)
-        except (json.JSONDecodeError, OSError, KeyError):
-            return None
-
-    def create_entry(
-        self,
-        rule_name: str,
-        rule_file: str,
-        trigger_files: list[str],
-        baseline_ref: str,
-        expected_files: list[str] | None = None,
-    ) -> QueueEntry | None:
-        """
-        Create a new queue entry if one doesn't already exist.
-
-        Args:
-            rule_name: Human-friendly rule name
-            rule_file: Rule filename (e.g., "source-test-pairing.md")
-            trigger_files: Files that triggered the rule
-            baseline_ref: Baseline reference for change detection
-            expected_files: Expected corresponding files (for set/pair)
-
-        Returns:
-            Created QueueEntry, or None if entry already exists
-        """
-        trigger_hash = compute_trigger_hash(rule_name, trigger_files, baseline_ref)
-
-        # Check if already exists
-        if self.has_entry(trigger_hash):
-            return None
-
-        self._ensure_dir()
-
-        entry = QueueEntry(
-            rule_name=rule_name,
-            rule_file=rule_file,
-            trigger_hash=trigger_hash,
-            status=QueueEntryStatus.QUEUED,
-            baseline_ref=baseline_ref,
-            trigger_files=trigger_files,
-            expected_files=expected_files or [],
-        )
-
-        path = self._get_entry_path(trigger_hash, QueueEntryStatus.QUEUED)
-        with open(path, "w", encoding="utf-8") as f:
-            json.dump(entry.to_dict(), f, indent=2)
-
-        return entry
-
-    def update_status(
-        self,
-        trigger_hash: str,
-        new_status: QueueEntryStatus,
-        action_result: ActionResult | None = None,
-    ) -> bool:
-        """
-        Update the status of an entry.
-
-        This renames the file to reflect the new status.
-
-        Args:
-            trigger_hash: Hash of the entry to update
-            new_status: New status
-            action_result: Optional result of action execution
-
-        Returns:
-            True if updated, False if entry not found
-        """
-        old_path = self._find_entry_path(trigger_hash)
-        if old_path is None:
-            return False
-
-        # Load existing entry
-        try:
-            with open(old_path, encoding="utf-8") as f:
-                data = json.load(f)
-        except (json.JSONDecodeError, OSError):
-            return False
-
-        # Update fields
-        data["status"] = new_status.value
-        data["evaluated_at"] = datetime.now(UTC).isoformat()
-        if action_result:
-            data["action_result"] = asdict(action_result)
-
-        # Write to new path
-        new_path = self._get_entry_path(trigger_hash, new_status)
-
-        # If status didn't change, just update in place
-        if old_path == new_path:
-            with open(new_path, "w", encoding="utf-8") as f:
-                json.dump(data, f, indent=2)
-        else:
-            # Write new file then delete old
-            with open(new_path, "w", encoding="utf-8") as f:
-                json.dump(data, f, indent=2)
-            old_path.unlink()
-
-        return True
-
-    def get_queued_entries(self) -> list[QueueEntry]:
-        """Get all entries with QUEUED status."""
-        if not self.queue_dir.exists():
-            return []
-
-        entries = []
-        for path in self.queue_dir.glob("*.queued.json"):
-            try:
-                with open(path, encoding="utf-8") as f:
-                    data = json.load(f)
-                entries.append(QueueEntry.from_dict(data))
-            except (json.JSONDecodeError, OSError, KeyError):
-                continue
-
-        return entries
-
-    def get_all_entries(self) -> list[QueueEntry]:
-        """Get all entries regardless of status."""
-        if not self.queue_dir.exists():
-            return []
-
-        entries = []
-        for path in self.queue_dir.glob("*.json"):
-            try:
-                with open(path, encoding="utf-8") as f:
-                    data = json.load(f)
-                entries.append(QueueEntry.from_dict(data))
-            except (json.JSONDecodeError, OSError, KeyError):
-                continue
-
-        return entries
-
-    def clear(self) -> int:
-        """
-        Clear all entries from the queue.
-
-        Returns:
-            Number of entries removed
-        """
-        if not self.queue_dir.exists():
-            return 0
-
-        count = 0
-        for path in self.queue_dir.glob("*.json"):
-            try:
-                path.unlink()
-                count += 1
-            except OSError:
-                continue
-
-        return count
-
-    def remove_entry(self, trigger_hash: str) -> bool:
-        """
-        Remove an entry by hash.
-
-        Returns:
-            True if removed, False if not found
-        """
-        path = self._find_entry_path(trigger_hash)
-        if path is None:
-            return False
-
-        try:
-            path.unlink()
-            return True
-        except OSError:
-            return False
diff --git a/src/deepwork/hooks/README.md b/src/deepwork/hooks/README.md
index 9c3dd887..262f1cf9 100644
--- a/src/deepwork/hooks/README.md
+++ b/src/deepwork/hooks/README.md
@@ -15,51 +15,8 @@ The hook system provides:
    - Output denormalization (decision values, JSON structure)
    - Cross-platform compatibility
 
-3. **Hook implementations**:
-   - `rules_check.py` - Evaluates DeepWork rules on `after_agent` events
-
 ## Usage
 
-### Registering Hooks
-
-#### Claude Code (`.claude/settings.json`)
-
-```json
-{
-  "hooks": {
-    "Stop": [
-      {
-        "hooks": [
-          {
-            "type": "command",
-            "command": "path/to/claude_hook.sh deepwork.hooks.rules_check"
-          }
-        ]
-      }
-    ]
-  }
-}
-```
-
-#### Gemini CLI (`.gemini/settings.json`)
-
-```json
-{
-  "hooks": {
-    "AfterAgent": [
-      {
-        "hooks": [
-          {
-            "type": "command",
-            "command": "path/to/gemini_hook.sh deepwork.hooks.rules_check"
-          }
-        ]
-      }
-    ]
-  }
-}
-```
-
 ### Writing Custom Hooks
 
 1. Create a new Python module in `deepwork/hooks/`:
@@ -178,4 +135,3 @@ pytest tests/shell_script_tests/test_hook_wrappers.py -v
 | `wrapper.py` | Cross-platform input/output normalization |
 | `claude_hook.sh` | Shell wrapper for Claude Code |
 | `gemini_hook.sh` | Shell wrapper for Gemini CLI |
-| `rules_check.py` | Cross-platform rule evaluation hook |
diff --git a/src/deepwork/hooks/__init__.py b/src/deepwork/hooks/__init__.py
index 5e9d8d43..34ab144e 100644
--- a/src/deepwork/hooks/__init__.py
+++ b/src/deepwork/hooks/__init__.py
@@ -1,4 +1,4 @@
-"""DeepWork hooks package for rules enforcement and lifecycle events.
+"""DeepWork hooks package for lifecycle events.
 
 This package provides:
 
@@ -7,9 +7,6 @@
    - claude_hook.sh: Shell wrapper for Claude Code hooks
    - gemini_hook.sh: Shell wrapper for Gemini CLI hooks
 
-2. Hook implementations:
-   - rules_check.py: Evaluates rules on after_agent events
-
 Usage with wrapper system:
     # Register hook in .claude/settings.json:
     {
@@ -17,7 +14,7 @@
         "Stop": [{
           "hooks": [{
             "type": "command",
-            "command": ".deepwork/hooks/claude_hook.sh rules_check"
+            "command": ".deepwork/hooks/claude_hook.sh my_hook"
           }]
         }]
       }
@@ -29,7 +26,7 @@
         "AfterAgent": [{
           "hooks": [{
             "type": "command",
-            "command": ".gemini/hooks/gemini_hook.sh rules_check"
+            "command": ".gemini/hooks/gemini_hook.sh my_hook"
           }]
         }]
       }
diff --git a/src/deepwork/hooks/check_version.sh b/src/deepwork/hooks/check_version.sh
index c02b052e..21caabc1 100755
--- a/src/deepwork/hooks/check_version.sh
+++ b/src/deepwork/hooks/check_version.sh
@@ -60,10 +60,8 @@ fi
 # nothing else will work.
 
 check_deepwork_installed() {
-    # Run 'deepwork rules clear_queue' instead of just '--version' for double utility:
-    # 1. Verifies that the 'deepwork' command is installed and directly invokable
-    # 2. Clears any stale rules from the queue, ensuring a clean slate for the session
-    if ! deepwork rules clear_queue >/dev/null 2>&1; then
+    # Run 'deepwork --version' to verify the command is installed and directly invokable
+    if ! deepwork --version >/dev/null 2>&1; then
         return 1
     fi
     return 0
@@ -79,11 +77,13 @@ print_deepwork_error() {
   ERROR: The 'deepwork' command is not available or cannot be directly invoked.
 
   DeepWork must be installed such that running 'deepwork' directly works.
-  For example, running 'deepwork rules clear_queue' should succeed.
+  For example, running 'deepwork --version' should succeed.
 
   IMPORTANT: Do NOT use 'uv run deepwork' or similar wrappers.
   The command must be directly invokable as just 'deepwork'.
 
+  To verify: 'deepwork --version' should succeed.
+
   ------------------------------------------------------------------------
   |                                                                      |
   |   Please fix your deepwork installation before proceeding.           |
diff --git a/src/deepwork/hooks/rules_check.py b/src/deepwork/hooks/rules_check.py
deleted file mode 100644
index 6ac2d652..00000000
--- a/src/deepwork/hooks/rules_check.py
+++ /dev/null
@@ -1,759 +0,0 @@
-"""
-Rules check hook for DeepWork (v2).
-
-This hook evaluates rules when the agent finishes (after_agent event).
-It uses the wrapper system for cross-platform compatibility.
-
-Rule files are loaded from .deepwork/rules/ directory as frontmatter markdown files.
-
-Usage (via shell wrapper - recommended):
-    claude_hook.sh rules_check
-    gemini_hook.sh rules_check
-
-Or directly via deepwork CLI:
-    deepwork hook rules_check
-
-Or with platform environment variable:
-    DEEPWORK_HOOK_PLATFORM=claude deepwork hook rules_check
-"""
-
-from __future__ import annotations
-
-import json
-import os
-import re
-import subprocess
-import sys
-from pathlib import Path
-
-from deepwork.core.command_executor import (
-    all_commands_succeeded,
-    format_command_errors,
-    run_command_action,
-)
-from deepwork.core.rules_parser import (
-    ActionType,
-    DetectionMode,
-    Rule,
-    RuleEvaluationResult,
-    RulesParseError,
-    evaluate_rules,
-    load_rules_from_directory,
-)
-from deepwork.core.rules_queue import (
-    ActionResult,
-    QueueEntryStatus,
-    RulesQueue,
-    compute_trigger_hash,
-)
-from deepwork.hooks.wrapper import (
-    HookInput,
-    HookOutput,
-    NormalizedEvent,
-    Platform,
-    run_hook,
-)
-
-
-def get_default_branch() -> str:
-    """Get the default branch name (main or master)."""
-    try:
-        result = subprocess.run(
-            ["git", "symbolic-ref", "refs/remotes/origin/HEAD"],
-            capture_output=True,
-            text=True,
-            check=True,
-        )
-        return result.stdout.strip().split("/")[-1]
-    except subprocess.CalledProcessError:
-        pass
-
-    for branch in ["main", "master"]:
-        try:
-            subprocess.run(
-                ["git", "rev-parse", "--verify", f"origin/{branch}"],
-                capture_output=True,
-                check=True,
-            )
-            return branch
-        except subprocess.CalledProcessError:
-            continue
-
-    return "main"
-
-
-def get_baseline_ref(mode: str) -> str:
-    """Get the baseline reference for a compare_to mode."""
-    if mode == "base":
-        try:
-            default_branch = get_default_branch()
-            result = subprocess.run(
-                ["git", "merge-base", "HEAD", f"origin/{default_branch}"],
-                capture_output=True,
-                text=True,
-                check=True,
-            )
-            return result.stdout.strip()
-        except subprocess.CalledProcessError:
-            return "base"
-    elif mode == "default_tip":
-        try:
-            default_branch = get_default_branch()
-            result = subprocess.run(
-                ["git", "rev-parse", f"origin/{default_branch}"],
-                capture_output=True,
-                text=True,
-                check=True,
-            )
-            return result.stdout.strip()
-        except subprocess.CalledProcessError:
-            return "default_tip"
-    elif mode == "prompt":
-        baseline_path = Path(".deepwork/.last_work_tree")
-        if baseline_path.exists():
-            # Use file modification time as reference
-            return str(int(baseline_path.stat().st_mtime))
-        return "prompt"
-    return mode
-
-
-def get_changed_files_base() -> list[str]:
-    """Get files changed relative to branch base."""
-    default_branch = get_default_branch()
-
-    try:
-        result = subprocess.run(
-            ["git", "merge-base", "HEAD", f"origin/{default_branch}"],
-            capture_output=True,
-            text=True,
-            check=True,
-        )
-        merge_base = result.stdout.strip()
-
-        subprocess.run(["git", "add", "-A"], capture_output=True, check=False)
-
-        result = subprocess.run(
-            ["git", "diff", "--name-only", merge_base, "HEAD"],
-            capture_output=True,
-            text=True,
-            check=True,
-        )
-        committed_files = set(result.stdout.strip().split("\n")) if result.stdout.strip() else set()
-
-        result = subprocess.run(
-            ["git", "diff", "--name-only", "--cached"],
-            capture_output=True,
-            text=True,
-            check=False,
-        )
-        staged_files = set(result.stdout.strip().split("\n")) if result.stdout.strip() else set()
-
-        result = subprocess.run(
-            ["git", "ls-files", "--others", "--exclude-standard"],
-            capture_output=True,
-            text=True,
-            check=False,
-        )
-        untracked_files = set(result.stdout.strip().split("\n")) if result.stdout.strip() else set()
-
-        all_files = committed_files | staged_files | untracked_files
-        return sorted([f for f in all_files if f])
-
-    except subprocess.CalledProcessError:
-        return []
-
-
-def get_changed_files_default_tip() -> list[str]:
-    """Get files changed compared to default branch tip."""
-    default_branch = get_default_branch()
-
-    try:
-        subprocess.run(["git", "add", "-A"], capture_output=True, check=False)
-
-        result = subprocess.run(
-            ["git", "diff", "--name-only", f"origin/{default_branch}..HEAD"],
-            capture_output=True,
-            text=True,
-            check=True,
-        )
-        committed_files = set(result.stdout.strip().split("\n")) if result.stdout.strip() else set()
-
-        result = subprocess.run(
-            ["git", "diff", "--name-only", "--cached"],
-            capture_output=True,
-            text=True,
-            check=False,
-        )
-        staged_files = set(result.stdout.strip().split("\n")) if result.stdout.strip() else set()
-
-        result = subprocess.run(
-            ["git", "ls-files", "--others", "--exclude-standard"],
-            capture_output=True,
-            text=True,
-            check=False,
-        )
-        untracked_files = set(result.stdout.strip().split("\n")) if result.stdout.strip() else set()
-
-        all_files = committed_files | staged_files | untracked_files
-        return sorted([f for f in all_files if f])
-
-    except subprocess.CalledProcessError:
-        return []
-
-
-def get_changed_files_prompt() -> list[str]:
-    """Get files changed since prompt was submitted.
-
-    Returns files that changed since the prompt was submitted, including:
-    - Committed changes (compared to captured HEAD ref)
-    - Staged changes (not yet committed)
-    - Untracked files
-
-    This is used by trigger/safety, set, and pair mode rules to detect
-    file modifications during the agent response.
-    """
-    baseline_ref_path = Path(".deepwork/.last_head_ref")
-    changed_files: set[str] = set()
-
-    try:
-        # Stage all changes first
-        subprocess.run(["git", "add", "-A"], capture_output=True, check=False)
-
-        # If we have a captured HEAD ref, compare committed changes against it
-        if baseline_ref_path.exists():
-            baseline_ref = baseline_ref_path.read_text().strip()
-            if baseline_ref:
-                # Get files changed in commits since the baseline
-                result = subprocess.run(
-                    ["git", "diff", "--name-only", baseline_ref, "HEAD"],
-                    capture_output=True,
-                    text=True,
-                    check=False,
-                )
-                if result.returncode == 0 and result.stdout.strip():
-                    committed_files = set(result.stdout.strip().split("\n"))
-                    changed_files.update(f for f in committed_files if f)
-
-        # Also get currently staged changes (in case not everything is committed)
-        result = subprocess.run(
-            ["git", "diff", "--name-only", "--cached"],
-            capture_output=True,
-            text=True,
-            check=False,
-        )
-        if result.stdout.strip():
-            staged_files = set(result.stdout.strip().split("\n"))
-            changed_files.update(f for f in staged_files if f)
-
-        # Include untracked files
-        result = subprocess.run(
-            ["git", "ls-files", "--others", "--exclude-standard"],
-            capture_output=True,
-            text=True,
-            check=False,
-        )
-        if result.stdout.strip():
-            untracked_files = set(result.stdout.strip().split("\n"))
-            changed_files.update(f for f in untracked_files if f)
-
-        return sorted(changed_files)
-
-    except (subprocess.CalledProcessError, OSError):
-        return []
-
-
-def get_changed_files_for_mode(mode: str) -> list[str]:
-    """Get changed files for a specific compare_to mode."""
-    if mode == "base":
-        return get_changed_files_base()
-    elif mode == "default_tip":
-        return get_changed_files_default_tip()
-    elif mode == "prompt":
-        return get_changed_files_prompt()
-    else:
-        return get_changed_files_base()
-
-
-def get_created_files_base() -> list[str]:
-    """Get files created (added) relative to branch base."""
-    default_branch = get_default_branch()
-
-    try:
-        result = subprocess.run(
-            ["git", "merge-base", "HEAD", f"origin/{default_branch}"],
-            capture_output=True,
-            text=True,
-            check=True,
-        )
-        merge_base = result.stdout.strip()
-
-        subprocess.run(["git", "add", "-A"], capture_output=True, check=False)
-
-        # Get only added files (not modified) using --diff-filter=A
-        result = subprocess.run(
-            ["git", "diff", "--name-only", "--diff-filter=A", merge_base, "HEAD"],
-            capture_output=True,
-            text=True,
-            check=True,
-        )
-        committed_added = set(result.stdout.strip().split("\n")) if result.stdout.strip() else set()
-
-        # Staged new files that don't exist in merge_base
-        result = subprocess.run(
-            ["git", "diff", "--name-only", "--diff-filter=A", "--cached", merge_base],
-            capture_output=True,
-            text=True,
-            check=False,
-        )
-        staged_added = set(result.stdout.strip().split("\n")) if result.stdout.strip() else set()
-
-        # Untracked files are by definition "created"
-        result = subprocess.run(
-            ["git", "ls-files", "--others", "--exclude-standard"],
-            capture_output=True,
-            text=True,
-            check=False,
-        )
-        untracked_files = set(result.stdout.strip().split("\n")) if result.stdout.strip() else set()
-
-        all_created = committed_added | staged_added | untracked_files
-        return sorted([f for f in all_created if f])
-
-    except subprocess.CalledProcessError:
-        return []
-
-
-def get_created_files_default_tip() -> list[str]:
-    """Get files created compared to default branch tip."""
-    default_branch = get_default_branch()
-
-    try:
-        subprocess.run(["git", "add", "-A"], capture_output=True, check=False)
-
-        # Get only added files using --diff-filter=A
-        result = subprocess.run(
-            ["git", "diff", "--name-only", "--diff-filter=A", f"origin/{default_branch}..HEAD"],
-            capture_output=True,
-            text=True,
-            check=True,
-        )
-        committed_added = set(result.stdout.strip().split("\n")) if result.stdout.strip() else set()
-
-        result = subprocess.run(
-            [
-                "git",
-                "diff",
-                "--name-only",
-                "--diff-filter=A",
-                "--cached",
-                f"origin/{default_branch}",
-            ],
-            capture_output=True,
-            text=True,
-            check=False,
-        )
-        staged_added = set(result.stdout.strip().split("\n")) if result.stdout.strip() else set()
-
-        # Untracked files are by definition "created"
-        result = subprocess.run(
-            ["git", "ls-files", "--others", "--exclude-standard"],
-            capture_output=True,
-            text=True,
-            check=False,
-        )
-        untracked_files = set(result.stdout.strip().split("\n")) if result.stdout.strip() else set()
-
-        all_created = committed_added | staged_added | untracked_files
-        return sorted([f for f in all_created if f])
-
-    except subprocess.CalledProcessError:
-        return []
-
-
-def get_created_files_prompt() -> list[str]:
-    """Get files created since prompt was submitted."""
-    baseline_path = Path(".deepwork/.last_work_tree")
-
-    try:
-        subprocess.run(["git", "add", "-A"], capture_output=True, check=False)
-
-        result = subprocess.run(
-            ["git", "diff", "--name-only", "--cached"],
-            capture_output=True,
-            text=True,
-            check=False,
-        )
-        current_files = set(result.stdout.strip().split("\n")) if result.stdout.strip() else set()
-        current_files = {f for f in current_files if f}
-
-        # Untracked files
-        result = subprocess.run(
-            ["git", "ls-files", "--others", "--exclude-standard"],
-            capture_output=True,
-            text=True,
-            check=False,
-        )
-        untracked_files = set(result.stdout.strip().split("\n")) if result.stdout.strip() else set()
-        untracked_files = {f for f in untracked_files if f}
-
-        all_current = current_files | untracked_files
-
-        if baseline_path.exists():
-            baseline_files = set(baseline_path.read_text().strip().split("\n"))
-            baseline_files = {f for f in baseline_files if f}
-            # Created files are those that didn't exist at baseline
-            created_files = all_current - baseline_files
-            return sorted(created_files)
-        else:
-            # No baseline means all current files are "new" to this prompt
-            return sorted(all_current)
-
-    except (subprocess.CalledProcessError, OSError):
-        return []
-
-
-def get_created_files_for_mode(mode: str) -> list[str]:
-    """Get created files for a specific compare_to mode."""
-    if mode == "base":
-        return get_created_files_base()
-    elif mode == "default_tip":
-        return get_created_files_default_tip()
-    elif mode == "prompt":
-        return get_created_files_prompt()
-    else:
-        return get_created_files_base()
-
-
-def extract_promise_tags(text: str) -> set[str]:
-    """
-    Extract rule names from <promise> tags in text.
-
-    Supports both:
-    - <promise>Rule Name</promise>
-    - <promise>✓ Rule Name</promise>
-    """
-    # Match with optional checkmark prefix (✓ or ✓ with space)
-    pattern = r"<promise>(?:\s*)?(?:✓\s*)?([^<]+)</promise>"
-    matches = re.findall(pattern, text, re.IGNORECASE | re.DOTALL)
-    return {m.strip() for m in matches}
-
-
-def extract_conversation_from_transcript(transcript_path: str, platform: Platform) -> str:
-    """
-    Extract conversation text from a transcript file.
-
-    Handles platform-specific transcript formats.
-    """
-    if not transcript_path or not Path(transcript_path).exists():
-        return ""
-
-    try:
-        content = Path(transcript_path).read_text()
-
-        if platform == Platform.CLAUDE:
-            # Claude uses JSONL format - each line is a JSON object
-            conversation_parts = []
-            for line in content.strip().split("\n"):
-                if not line.strip():
-                    continue
-                try:
-                    entry = json.loads(line)
-                    if entry.get("role") == "assistant":
-                        message_content = entry.get("message", {}).get("content", [])
-                        for part in message_content:
-                            if part.get("type") == "text":
-                                conversation_parts.append(part.get("text", ""))
-                except json.JSONDecodeError:
-                    continue
-            return "\n".join(conversation_parts)
-
-        elif platform == Platform.GEMINI:
-            # Gemini uses JSON format
-            try:
-                data = json.loads(content)
-                # Extract text from messages
-                conversation_parts = []
-                messages = data.get("messages", [])
-                for msg in messages:
-                    if msg.get("role") == "model":
-                        parts = msg.get("parts", [])
-                        for part in parts:
-                            if isinstance(part, dict) and "text" in part:
-                                conversation_parts.append(part["text"])
-                            elif isinstance(part, str):
-                                conversation_parts.append(part)
-                return "\n".join(conversation_parts)
-            except json.JSONDecodeError:
-                return ""
-
-        return ""
-    except Exception:
-        return ""
-
-
-def format_rules_message(results: list[RuleEvaluationResult]) -> str:
-    """
-    Format triggered rules into a concise message for the agent.
-
-    Groups rules by name and uses minimal formatting.
-    """
-    lines = ["## DeepWork Rules Triggered", ""]
-    lines.append(
-        "Comply with the following rules. "
-        "To mark a rule as addressed, include `<promise>Rule Name</promise>` "
-        "in your response."
-    )
-    lines.append("")
-
-    # Group results by rule name
-    by_name: dict[str, list[RuleEvaluationResult]] = {}
-    for result in results:
-        name = result.rule.name
-        if name not in by_name:
-            by_name[name] = []
-        by_name[name].append(result)
-
-    for name, rule_results in by_name.items():
-        rule = rule_results[0].rule
-        lines.append(f"## {name}")
-        lines.append("")
-
-        # For set/pair modes, show the correspondence violations concisely
-        if rule.detection_mode in (DetectionMode.SET, DetectionMode.PAIR):
-            for result in rule_results:
-                for trigger_file in result.trigger_files:
-                    for missing_file in result.missing_files:
-                        lines.append(f"{trigger_file} -> {missing_file}")
-            lines.append("")
-
-        # Show instructions
-        if rule.instructions:
-            lines.append(rule.instructions.strip())
-            lines.append("")
-
-    return "\n".join(lines)
-
-
-def rules_check_hook(hook_input: HookInput) -> HookOutput:
-    """
-    Main hook logic for rules evaluation (v2).
-
-    This is called for after_agent events to check if rules need attention
-    before allowing the agent to complete.
-    """
-    # Only process after_agent events
-    if hook_input.event != NormalizedEvent.AFTER_AGENT:
-        return HookOutput()
-
-    # Check if rules directory exists
-    rules_dir = Path(".deepwork/rules")
-    if not rules_dir.exists():
-        return HookOutput()
-
-    # Extract conversation context from transcript
-    conversation_context = extract_conversation_from_transcript(
-        hook_input.transcript_path, hook_input.platform
-    )
-
-    # Extract promise tags (case-insensitive)
-    promised_rules = extract_promise_tags(conversation_context)
-
-    # Load rules
-    try:
-        rules = load_rules_from_directory(rules_dir)
-    except RulesParseError as e:
-        print(f"Error loading rules: {e}", file=sys.stderr)
-        return HookOutput()
-
-    if not rules:
-        return HookOutput()
-
-    # Initialize queue
-    queue = RulesQueue()
-
-    # Group rules by compare_to mode
-    rules_by_mode: dict[str, list[Rule]] = {}
-    for rule in rules:
-        mode = rule.compare_to
-        if mode not in rules_by_mode:
-            rules_by_mode[mode] = []
-        rules_by_mode[mode].append(rule)
-
-    # Evaluate rules and collect results
-    prompt_results: list[RuleEvaluationResult] = []
-    command_errors: list[str] = []
-
-    for mode, mode_rules in rules_by_mode.items():
-        changed_files = get_changed_files_for_mode(mode)
-        created_files = get_created_files_for_mode(mode)
-
-        # Skip if no changed or created files
-        if not changed_files and not created_files:
-            continue
-
-        baseline_ref = get_baseline_ref(mode)
-
-        # Evaluate which rules fire
-        results = evaluate_rules(mode_rules, changed_files, promised_rules, created_files)
-
-        for result in results:
-            rule = result.rule
-
-            # Compute trigger hash for queue deduplication
-            trigger_hash = compute_trigger_hash(
-                rule.name,
-                result.trigger_files,
-                baseline_ref,
-            )
-
-            # Check if already in queue (passed/skipped)
-            existing = queue.get_entry(trigger_hash)
-            if existing and existing.status in (
-                QueueEntryStatus.PASSED,
-                QueueEntryStatus.SKIPPED,
-            ):
-                continue
-
-            # For PROMPT rules, also skip if already QUEUED (already shown to agent).
-            # This prevents infinite loops when transcript is unavailable or promise
-            # tags haven't been written yet. The agent has already seen this rule.
-            if (
-                existing
-                and existing.status == QueueEntryStatus.QUEUED
-                and rule.action_type == ActionType.PROMPT
-            ):
-                continue
-
-            # For COMMAND rules with FAILED status, don't re-run the command.
-            # The agent has already seen the error. If they provide a promise,
-            # the after-loop logic will update the status to SKIPPED.
-            if (
-                existing
-                and existing.status == QueueEntryStatus.FAILED
-                and rule.action_type == ActionType.COMMAND
-            ):
-                continue
-
-            # Create queue entry if new
-            if not existing:
-                queue.create_entry(
-                    rule_name=rule.name,
-                    rule_file=f"{rule.filename}.md",
-                    trigger_files=result.trigger_files,
-                    baseline_ref=baseline_ref,
-                    expected_files=result.missing_files,
-                )
-
-            # Handle based on action type
-            if rule.action_type == ActionType.COMMAND:
-                # Run command action
-                if rule.command_action:
-                    repo_root = Path.cwd()
-                    cmd_results = run_command_action(
-                        rule.command_action,
-                        result.trigger_files,
-                        repo_root,
-                    )
-
-                    if all_commands_succeeded(cmd_results):
-                        # Command succeeded, mark as passed
-                        queue.update_status(
-                            trigger_hash,
-                            QueueEntryStatus.PASSED,
-                            ActionResult(
-                                type="command",
-                                output=cmd_results[0].stdout if cmd_results else None,
-                                exit_code=0,
-                            ),
-                        )
-                    else:
-                        # Command failed - format detailed error message
-                        error_msg = format_command_errors(cmd_results, rule_name=rule.name)
-                        skip_hint = f"\nTo skip, include `<promise>✓ {rule.name}</promise>` in your response."
-                        command_errors.append(f"{error_msg}{skip_hint}")
-                        queue.update_status(
-                            trigger_hash,
-                            QueueEntryStatus.FAILED,
-                            ActionResult(
-                                type="command",
-                                output=error_msg,
-                                exit_code=cmd_results[0].exit_code if cmd_results else -1,
-                            ),
-                        )
-
-            elif rule.action_type == ActionType.PROMPT:
-                # Collect for prompt output
-                prompt_results.append(result)
-
-    # Handle FAILED queue entries that have been promised
-    # (These rules weren't in results because evaluate_rules skips promised rules,
-    # but we need to update their queue status to SKIPPED)
-    if promised_rules:
-        promised_lower = {name.lower() for name in promised_rules}
-        for entry in queue.get_all_entries():
-            if (
-                entry.status == QueueEntryStatus.FAILED
-                and entry.rule_name.lower() in promised_lower
-            ):
-                queue.update_status(
-                    entry.trigger_hash,
-                    QueueEntryStatus.SKIPPED,
-                    ActionResult(
-                        type="command",
-                        output="Acknowledged via promise tag",
-                        exit_code=None,
-                    ),
-                )
-
-    # Build response
-    messages: list[str] = []
-
-    # Add command errors if any
-    if command_errors:
-        messages.append("## Command Rule Errors\n")
-        messages.append("The following command rules failed.\n")
-        messages.extend(command_errors)
-        messages.append("")
-
-    # Add prompt rules if any
-    if prompt_results:
-        messages.append(format_rules_message(prompt_results))
-
-    if messages:
-        return HookOutput(decision="block", reason="\n".join(messages))
-
-    return HookOutput()
-
-
-def main() -> None:
-    """Entry point for the rules check hook."""
-    platform_str = os.environ.get("DEEPWORK_HOOK_PLATFORM", "claude")
-    try:
-        platform = Platform(platform_str)
-    except ValueError:
-        platform = Platform.CLAUDE
-
-    exit_code = run_hook(rules_check_hook, platform)
-    sys.exit(exit_code)
-
-
-if __name__ == "__main__":
-    # Wrap entry point to catch early failures (e.g., import errors in wrapper.py)
-    try:
-        main()
-    except Exception as e:
-        # Last resort error handling - output JSON manually since wrapper may be broken
-        import json
-        import traceback
-
-        error_output = {
-            "decision": "block",
-            "reason": (
-                "## Hook Script Error\n\n"
-                f"Error type: {type(e).__name__}\n"
-                f"Error: {e}\n\n"
-                f"Traceback:\n```\n{traceback.format_exc()}\n```"
-            ),
-        }
-        print(json.dumps(error_output))
-        sys.exit(0)
diff --git a/src/deepwork/mcp/__init__.py b/src/deepwork/mcp/__init__.py
new file mode 100644
index 00000000..bb6e5041
--- /dev/null
+++ b/src/deepwork/mcp/__init__.py
@@ -0,0 +1,23 @@
+"""DeepWork MCP Server module.
+
+This module provides an MCP (Model Context Protocol) server that guides AI agents
+through DeepWork workflows via checkpoint calls with quality gate enforcement.
+
+The server exposes three main tools:
+- get_workflows: List all available workflows
+- start_workflow: Initialize a workflow session
+- finished_step: Report step completion and get next instructions
+
+Example usage:
+    deepwork serve --path /path/to/project
+"""
+
+
+def create_server(*args, **kwargs):  # type: ignore
+    """Lazy import to avoid loading fastmcp at module import time."""
+    from deepwork.mcp.server import create_server as _create_server
+
+    return _create_server(*args, **kwargs)
+
+
+__all__ = ["create_server"]
diff --git a/src/deepwork/mcp/claude_cli.py b/src/deepwork/mcp/claude_cli.py
new file mode 100644
index 00000000..a64a2b24
--- /dev/null
+++ b/src/deepwork/mcp/claude_cli.py
@@ -0,0 +1,181 @@
+"""Claude Code CLI subprocess wrapper.
+
+Runs Claude Code CLI as a subprocess with structured JSON output.
+Always uses --json-schema for structured output conformance.
+
+See doc/reference/calling_claude_in_print_mode.md for details on
+proper CLI invocation with structured output.
+"""
+
+from __future__ import annotations
+
+import asyncio
+import json
+from pathlib import Path
+from typing import Any
+
+
+class ClaudeCLIError(Exception):
+    """Exception raised for Claude CLI subprocess errors."""
+
+    pass
+
+
+class ClaudeCLI:
+    """Runs Claude Code CLI as a subprocess with structured JSON output.
+
+    Always requires a JSON schema - the structured output is returned
+    as a parsed dict from the CLI's `structured_output` field.
+
+    See doc/reference/calling_claude_in_print_mode.md for details on
+    proper CLI invocation with structured output.
+    """
+
+    def __init__(
+        self,
+        timeout: int = 120,
+        *,
+        _test_command: list[str] | None = None,
+    ):
+        """Initialize Claude CLI wrapper.
+
+        Args:
+            timeout: Timeout in seconds for the subprocess
+            _test_command: Internal testing only - override the subprocess command.
+                          When set, skips adding --json-schema flag (test mock handles it).
+        """
+        self.timeout = timeout
+        self._test_command = _test_command
+
+    def _build_command(
+        self,
+        system_prompt: str,
+        json_schema: dict[str, Any],
+    ) -> list[str]:
+        """Build the CLI command with proper flag ordering.
+
+        Flags must come BEFORE `-p --` because:
+        - `-p` expects a prompt argument immediately after
+        - `--` marks the end of flags, everything after is the prompt
+        - When piping via stdin, we use `-p --` to read from stdin
+
+        Args:
+            system_prompt: System prompt for the CLI
+            json_schema: JSON schema for structured output
+
+        Returns:
+            Command list ready for subprocess execution
+        """
+        if self._test_command:
+            return self._test_command + ["--system-prompt", system_prompt]
+
+        schema_json = json.dumps(json_schema)
+        return [
+            "claude",
+            "--print",
+            "--output-format",
+            "json",
+            "--system-prompt",
+            system_prompt,
+            "--json-schema",
+            schema_json,
+            "-p",
+            "--",
+        ]
+
+    def _parse_wrapper(self, response_text: str) -> dict[str, Any]:
+        """Parse the Claude CLI JSON wrapper and extract structured_output.
+
+        When using --print --output-format json --json-schema, Claude CLI returns
+        a wrapper object with the structured output in the 'structured_output' field.
+
+        Args:
+            response_text: Raw JSON response from Claude CLI
+
+        Returns:
+            The parsed structured_output dict
+
+        Raises:
+            ClaudeCLIError: If response cannot be parsed
+        """
+        try:
+            wrapper = json.loads(response_text.strip())
+
+            if wrapper.get("is_error"):
+                raise ClaudeCLIError(
+                    f"Claude CLI returned error: {wrapper.get('result', 'Unknown error')}"
+                )
+
+            data = wrapper.get("structured_output")
+            if data is None:
+                raise ClaudeCLIError(
+                    "Claude CLI response missing 'structured_output' field. "
+                    f"Response was: {response_text[:500]}..."
+                )
+
+            return data
+
+        except json.JSONDecodeError as e:
+            raise ClaudeCLIError(
+                f"Failed to parse Claude CLI response as JSON: {e}\n"
+                f"Response was: {response_text[:500]}..."
+            ) from e
+
+    async def run(
+        self,
+        prompt: str,
+        system_prompt: str,
+        json_schema: dict[str, Any],
+        cwd: Path | None = None,
+        timeout: int | None = None,
+    ) -> dict[str, Any]:
+        """Run Claude CLI and return the structured output.
+
+        Args:
+            prompt: The user prompt (piped via stdin)
+            system_prompt: System instructions for the CLI
+            json_schema: JSON schema enforcing structured output conformance
+            cwd: Working directory for the subprocess
+            timeout: Override instance timeout for this call (seconds).
+                     If None, uses the instance default.
+
+        Returns:
+            The parsed structured_output dict from Claude CLI
+
+        Raises:
+            ClaudeCLIError: If the subprocess fails or output cannot be parsed
+        """
+        effective_timeout = timeout if timeout is not None else self.timeout
+        cmd = self._build_command(system_prompt, json_schema)
+
+        try:
+            process = await asyncio.create_subprocess_exec(
+                *cmd,
+                stdin=asyncio.subprocess.PIPE,
+                stdout=asyncio.subprocess.PIPE,
+                stderr=asyncio.subprocess.PIPE,
+                cwd=str(cwd) if cwd else None,
+            )
+
+            try:
+                stdout, stderr = await asyncio.wait_for(
+                    process.communicate(input=prompt.encode()),
+                    timeout=effective_timeout,
+                )
+            except TimeoutError:
+                process.kill()
+                await process.wait()
+                raise ClaudeCLIError(
+                    f"Claude CLI timed out after {effective_timeout} seconds"
+                ) from None
+
+            if process.returncode != 0:
+                raise ClaudeCLIError(
+                    f"Claude CLI failed with exit code {process.returncode}:\n"
+                    f"stderr: {stderr.decode()}"
+                )
+
+            return self._parse_wrapper(stdout.decode())
+
+        except FileNotFoundError as e:
+            raise ClaudeCLIError("Claude CLI command not found: claude") from e
diff --git a/src/deepwork/mcp/quality_gate.py b/src/deepwork/mcp/quality_gate.py
new file mode 100644
index 00000000..7b749a38
--- /dev/null
+++ b/src/deepwork/mcp/quality_gate.py
@@ -0,0 +1,538 @@
+"""Quality gate for evaluating step outputs.
+
+The quality gate invokes a review agent (via ClaudeCLI) to evaluate
+step outputs against quality criteria.
+"""
+
+from __future__ import annotations
+
+import asyncio
+from pathlib import Path
+from typing import Any
+
+import aiofiles
+
+from deepwork.mcp.claude_cli import ClaudeCLI
+from deepwork.mcp.schemas import (
+    QualityCriteriaResult,
+    QualityGateResult,
+    ReviewResult,
+)
+
+# JSON Schema for quality gate response validation
+QUALITY_GATE_RESPONSE_SCHEMA: dict[str, Any] = {
+    "type": "object",
+    "required": ["passed", "feedback"],
+    "properties": {
+        "passed": {"type": "boolean"},
+        "feedback": {"type": "string"},
+        "criteria_results": {
+            "type": "array",
+            "items": {
+                "type": "object",
+                "required": ["criterion", "passed"],
+                "properties": {
+                    "criterion": {"type": "string"},
+                    "passed": {"type": "boolean"},
+                    "feedback": {"type": ["string", "null"]},
+                },
+            },
+        },
+    },
+}
+
+# File separator format: 20 dashes, filename, 20 dashes
+FILE_SEPARATOR = "-" * 20
+
+# Section headers for inputs/outputs
+SECTION_SEPARATOR = "=" * 20
+
+
+class QualityGateError(Exception):
+    """Exception raised for quality gate errors."""
+
+    pass
+
+
+class QualityGate:
+    """Evaluates step outputs against quality criteria.
+
+    Uses ClaudeCLI to invoke a review agent that evaluates outputs
+    and returns structured feedback.
+    """
+
+    def __init__(self, cli: ClaudeCLI | None = None):
+        """Initialize quality gate.
+
+        Args:
+            cli: ClaudeCLI instance. If not provided, a default one is created.
+        """
+        self._cli = cli or ClaudeCLI()
+
+    def _build_instructions(
+        self,
+        quality_criteria: dict[str, str],
+        notes: str | None = None,
+        additional_review_guidance: str | None = None,
+    ) -> str:
+        """Build the system instructions for the review agent.
+
+        Args:
+            quality_criteria: Map of criterion name to criterion question
+            notes: Optional notes from the agent about work done
+            additional_review_guidance: Optional guidance about what context to look at
+
+        Returns:
+            System instructions string
+        """
+        criteria_list = "\n".join(
+            f"- **{name}**: {question}" for name, question in quality_criteria.items()
+        )
+
+        notes_section = ""
+        if notes:
+            notes_section = f"""
+
+## Author Notes
+
+The author provided the following notes about the work done:
+
+{notes}"""
+
+        guidance_section = ""
+        if additional_review_guidance:
+            guidance_section = f"""
+
+## Additional Context
+
+{additional_review_guidance}"""
+
+        return f"""\
+You are an editor responsible for reviewing the files listed as outputs.
+Your job is to evaluate whether outputs meet the specified criteria below.
+
+## Criteria to Evaluate
+
+{criteria_list}
+{notes_section}
+{guidance_section}
+
+## Response Format
+
+You must respond with JSON in this exact structure:
+```json
+{{
+  "passed": true/false,
+  "feedback": "Brief overall summary of evaluation",
+  "criteria_results": [
+    {{
+      "criterion": "The criterion name",
+      "passed": true/false,
+      "feedback": "Specific feedback for this criterion (null if passed)"
+    }}
+  ]
+}}
+```
+
+## Guidelines
+
+- Be strict but fair
+- Apply criteria pragmatically. If a criterion is not applicable to this step's purpose, pass it.
+- Only mark a criterion as passed if it is clearly met or if it is not applicable.
+- Provide specific, actionable feedback for failed criteria
+- The overall "passed" should be true only if ALL criteria pass"""
+
+    @staticmethod
+    def _flatten_output_paths(outputs: dict[str, str | list[str]]) -> list[str]:
+        """Flatten a structured outputs dict into a list of file paths.
+
+        Args:
+            outputs: Map of output names to file path(s)
+
+        Returns:
+            Flat list of all file paths
+        """
+        paths: list[str] = []
+        for value in outputs.values():
+            if isinstance(value, list):
+                paths.extend(value)
+            else:
+                paths.append(value)
+        return paths
+
+    async def _read_file_sections(
+        self,
+        file_paths: dict[str, str | list[str]],
+        project_root: Path,
+    ) -> list[str]:
+        """Read files and return formatted sections for each.
+
+        Args:
+            file_paths: Map of names to file path(s)
+            project_root: Project root path for reading files
+
+        Returns:
+            List of formatted file sections
+        """
+        sections: list[str] = []
+        all_paths = self._flatten_output_paths(file_paths)
+
+        for file_path in all_paths:
+            full_path = project_root / file_path
+            header = f"{FILE_SEPARATOR} {file_path} {FILE_SEPARATOR}"
+
+            if full_path.exists():
+                try:
+                    async with aiofiles.open(full_path, encoding="utf-8") as f:
+                        content = await f.read()
+                    sections.append(f"{header}\n{content}")
+                except (UnicodeDecodeError, ValueError):
+                    abs_path = full_path.resolve()
+                    sections.append(
+                        f"{header}\n[Binary file — not included in review. Read from: {abs_path}]"
+                    )
+                except Exception as e:
+                    sections.append(f"{header}\n[Error reading file: {e}]")
+            else:
+                sections.append(f"{header}\n[File not found]")
+
+        return sections
+
+    # =========================================================================
+    # WARNING: REVIEW PERFORMANCE IS SENSITIVE TO PAYLOAD SIZE
+    #
+    # The payload builder below sends file contents to the review agent (Claude
+    # CLI subprocess). Reviews can get REALLY SLOW if the content gets too big:
+    #
+    # - Each file's full content is read and embedded in the prompt
+    # - The review agent must process ALL of this content to evaluate criteria
+    # - Large payloads (25+ files, or files with 500+ lines each) can cause
+    #   the review to approach or exceed its timeout
+    # - Per-file reviews (run_each: <output_name> with type: files) multiply
+    #   the problem — each file gets its own review subprocess
+    #
+    # To mitigate this, when more than MAX_INLINE_FILES files are present,
+    # the payload switches to a path-listing mode that only shows file paths
+    # instead of dumping all contents inline. The reviewer can then use its
+    # own tools to read specific files as needed.
+    #
+    # If you're changing the payload builder, keep payload size in mind.
+    # =========================================================================
+
+    # Maximum number of files to include inline in the review payload.
+    # Beyond this threshold, only file paths are listed.
+    MAX_INLINE_FILES = 5
+
+    @staticmethod
+    def _build_path_listing(file_paths: dict[str, str | list[str]]) -> list[str]:
+        """Build a path-only listing for large file sets.
+
+        Args:
+            file_paths: Map of names to file path(s)
+
+        Returns:
+            List of formatted path entries
+        """
+        lines: list[str] = []
+        for name, value in file_paths.items():
+            if isinstance(value, list):
+                for path in value:
+                    lines.append(f"- {path}  (output: {name})")
+            else:
+                lines.append(f"- {value}  (output: {name})")
+        return lines
+
+    async def _build_payload(
+        self,
+        outputs: dict[str, str | list[str]],
+        project_root: Path,
+    ) -> str:
+        """Build the user prompt payload with output file contents.
+
+        When the total number of files exceeds MAX_INLINE_FILES, the payload
+        lists file paths instead of embedding full contents to avoid slow reviews.
+
+        Args:
+            outputs: Map of output names to file path(s)
+            project_root: Project root path for reading files
+
+        Returns:
+            Formatted payload with output file contents or path listing
+        """
+        parts: list[str] = []
+        total_files = len(self._flatten_output_paths(outputs))
+
+        if total_files > self.MAX_INLINE_FILES:
+            # Too many files — list paths only so the reviewer reads selectively
+            path_lines = self._build_path_listing(outputs)
+            parts.append(f"{SECTION_SEPARATOR} BEGIN OUTPUTS {SECTION_SEPARATOR}")
+            parts.append(
+                f"[{total_files} files — too many to include inline. "
+                f"Paths listed below. Read files as needed to evaluate criteria.]"
+            )
+            parts.extend(path_lines)
+            parts.append(f"{SECTION_SEPARATOR} END OUTPUTS {SECTION_SEPARATOR}")
+        else:
+            # Build outputs section with full content
+            output_sections = await self._read_file_sections(outputs, project_root)
+            if output_sections:
+                parts.append(f"{SECTION_SEPARATOR} BEGIN OUTPUTS {SECTION_SEPARATOR}")
+                parts.extend(output_sections)
+                parts.append(f"{SECTION_SEPARATOR} END OUTPUTS {SECTION_SEPARATOR}")
+
+        if not parts:
+            return "[No files provided]"
+
+        return "\n\n".join(parts)
+
+    def _parse_result(self, data: dict[str, Any]) -> QualityGateResult:
+        """Parse the structured output into a QualityGateResult.
+
+        Args:
+            data: The structured_output dict from ClaudeCLI
+
+        Returns:
+            Parsed QualityGateResult
+
+        Raises:
+            QualityGateError: If data cannot be interpreted
+        """
+        try:
+            criteria_results = [
+                QualityCriteriaResult(
+                    criterion=cr.get("criterion", ""),
+                    passed=cr.get("passed", False),
+                    feedback=cr.get("feedback"),
+                )
+                for cr in data.get("criteria_results", [])
+            ]
+
+            return QualityGateResult(
+                passed=data.get("passed", False),
+                feedback=data.get("feedback", "No feedback provided"),
+                criteria_results=criteria_results,
+            )
+
+        except (ValueError, KeyError) as e:
+            raise QualityGateError(
+                f"Failed to interpret quality gate result: {e}\nData was: {data}"
+            ) from e
+
+    @staticmethod
+    def compute_timeout(file_count: int) -> int:
+        """Compute dynamic timeout based on number of files.
+
+        Base timeout is 120 seconds. For every file beyond the first 5,
+        add 30 seconds. Examples:
+          - 3 files  -> 120s
+          - 5 files  -> 120s
+          - 10 files -> 120 + 30*5 = 270s (4.5 min)
+          - 20 files -> 120 + 30*15 = 570s (9.5 min)
+
+        Args:
+            file_count: Total number of files being reviewed
+
+        Returns:
+            Timeout in seconds
+        """
+        return 120 + 30 * max(0, file_count - 5)
+
+    async def evaluate(
+        self,
+        quality_criteria: dict[str, str],
+        outputs: dict[str, str | list[str]],
+        project_root: Path,
+        notes: str | None = None,
+        additional_review_guidance: str | None = None,
+    ) -> QualityGateResult:
+        """Evaluate step outputs against quality criteria.
+
+        Args:
+            quality_criteria: Map of criterion name to criterion question
+            outputs: Map of output names to file path(s)
+            project_root: Project root path
+            notes: Optional notes from the agent about work done
+            additional_review_guidance: Optional guidance for the reviewer
+
+        Returns:
+            QualityGateResult with pass/fail and feedback
+
+        Raises:
+            QualityGateError: If evaluation fails
+        """
+        if not quality_criteria:
+            # No criteria = auto-pass
+            return QualityGateResult(
+                passed=True,
+                feedback="No quality criteria defined - auto-passing",
+                criteria_results=[],
+            )
+
+        instructions = self._build_instructions(
+            quality_criteria,
+            notes=notes,
+            additional_review_guidance=additional_review_guidance,
+        )
+        payload = await self._build_payload(outputs, project_root)
+
+        # Dynamic timeout: more files = more time for the reviewer
+        file_count = len(self._flatten_output_paths(outputs))
+        timeout = self.compute_timeout(file_count)
+
+        from deepwork.mcp.claude_cli import ClaudeCLIError
+
+        try:
+            data = await self._cli.run(
+                prompt=payload,
+                system_prompt=instructions,
+                json_schema=QUALITY_GATE_RESPONSE_SCHEMA,
+                cwd=project_root,
+                timeout=timeout,
+            )
+        except ClaudeCLIError as e:
+            raise QualityGateError(str(e)) from e
+
+        return self._parse_result(data)
+
+    async def evaluate_reviews(
+        self,
+        reviews: list[dict[str, Any]],
+        outputs: dict[str, str | list[str]],
+        output_specs: dict[str, str],
+        project_root: Path,
+        notes: str | None = None,
+    ) -> list[ReviewResult]:
+        """Evaluate all reviews for a step, running them in parallel.
+
+        Args:
+            reviews: List of review dicts with run_each, quality_criteria,
+                and optional additional_review_guidance
+            outputs: Map of output names to file path(s)
+            output_specs: Map of output names to their type ("file" or "files")
+            project_root: Project root path
+            notes: Optional notes from the agent about work done
+
+        Returns:
+            List of ReviewResult for any failed reviews (empty if all pass)
+        """
+        if not reviews:
+            return []
+
+        # Each task is (run_each, target_file, criteria, review_outputs, guidance)
+        tasks: list[
+            tuple[str, str | None, dict[str, str], dict[str, str | list[str]], str | None]
+        ] = []
+
+        for review in reviews:
+            run_each = review["run_each"]
+            quality_criteria = review["quality_criteria"]
+            guidance = review.get("additional_review_guidance")
+
+            if run_each == "step":
+                # Review all outputs together
+                tasks.append((run_each, None, quality_criteria, outputs, guidance))
+            elif run_each in outputs:
+                output_type = output_specs.get(run_each, "file")
+                output_value = outputs[run_each]
+
+                if output_type == "files" and isinstance(output_value, list):
+                    # Run once per file
+                    for file_path in output_value:
+                        tasks.append(
+                            (
+                                run_each,
+                                file_path,
+                                quality_criteria,
+                                {run_each: file_path},
+                                guidance,
+                            )
+                        )
+                else:
+                    # Single file - run once
+                    tasks.append(
+                        (
+                            run_each,
+                            output_value if isinstance(output_value, str) else None,
+                            quality_criteria,
+                            {run_each: output_value},
+                            guidance,
+                        )
+                    )
+
+        async def run_review(
+            run_each: str,
+            target_file: str | None,
+            criteria: dict[str, str],
+            review_outputs: dict[str, str | list[str]],
+            guidance: str | None,
+        ) -> ReviewResult:
+            result = await self.evaluate(
+                quality_criteria=criteria,
+                outputs=review_outputs,
+                project_root=project_root,
+                notes=notes,
+                additional_review_guidance=guidance,
+            )
+            return ReviewResult(
+                review_run_each=run_each,
+                target_file=target_file,
+                passed=result.passed,
+                feedback=result.feedback,
+                criteria_results=result.criteria_results,
+            )
+
+        results = await asyncio.gather(*(run_review(*task) for task in tasks))
+
+        return [r for r in results if not r.passed]
+
+
+class MockQualityGate(QualityGate):
+    """Mock quality gate for testing.
+
+    Always passes unless configured otherwise.
+    """
+
+    def __init__(self, should_pass: bool = True, feedback: str = "Mock evaluation"):
+        """Initialize mock quality gate.
+
+        Args:
+            should_pass: Whether evaluations should pass
+            feedback: Feedback message to return
+        """
+        super().__init__()
+        self.should_pass = should_pass
+        self.feedback = feedback
+        self.evaluations: list[dict[str, Any]] = []
+
+    async def evaluate(
+        self,
+        quality_criteria: dict[str, str],
+        outputs: dict[str, str | list[str]],
+        project_root: Path,
+        notes: str | None = None,
+        additional_review_guidance: str | None = None,
+    ) -> QualityGateResult:
+        """Mock evaluation - records call and returns configured result."""
+        self.evaluations.append(
+            {
+                "quality_criteria": quality_criteria,
+                "outputs": outputs,
+                "notes": notes,
+                "additional_review_guidance": additional_review_guidance,
+            }
+        )
+
+        criteria_results = [
+            QualityCriteriaResult(
+                criterion=name,
+                passed=self.should_pass,
+                feedback=None if self.should_pass else self.feedback,
+            )
+            for name in quality_criteria
+        ]
+
+        return QualityGateResult(
+            passed=self.should_pass,
+            feedback=self.feedback,
+            criteria_results=criteria_results,
+        )
diff --git a/src/deepwork/mcp/schemas.py b/src/deepwork/mcp/schemas.py
new file mode 100644
index 00000000..fe3746f8
--- /dev/null
+++ b/src/deepwork/mcp/schemas.py
@@ -0,0 +1,334 @@
+"""Pydantic models for MCP tool inputs and outputs.
+
+IMPORTANT: If you modify any models in this file that affect the MCP tool
+interfaces (input models, output models, or their fields), you MUST also
+update the documentation in doc/mcp_interface.md to keep it in sync with
+the implementation.
+"""
+
+from enum import Enum
+from typing import Any
+
+from pydantic import BaseModel, Field
+
+# =============================================================================
+# Enums
+# =============================================================================
+
+
+class StepStatus(str, Enum):
+    """Status returned from finished_step."""
+
+    NEEDS_WORK = "needs_work"
+    NEXT_STEP = "next_step"
+    WORKFLOW_COMPLETE = "workflow_complete"
+
+
+# =============================================================================
+# Workflow Info Models
+# NOTE: These models are returned by get_workflows tool.
+#       Update doc/mcp_interface.md when modifying.
+# =============================================================================
+
+
+class StepInfo(BaseModel):
+    """Information about a single step."""
+
+    id: str = Field(description="Step identifier")
+    name: str = Field(description="Human-readable step name")
+    description: str = Field(description="What the step does")
+    dependencies: list[str] = Field(default_factory=list, description="Required prior steps")
+
+
+class ConcurrentStepGroup(BaseModel):
+    """A group of steps that can be executed concurrently."""
+
+    step_ids: list[str] = Field(description="Steps that run in parallel")
+    is_concurrent: bool = Field(default=True)
+
+
+class WorkflowStepEntryInfo(BaseModel):
+    """Information about a workflow step entry (sequential or concurrent)."""
+
+    step_ids: list[str] = Field(description="Step ID(s) in this entry")
+    is_concurrent: bool = Field(default=False, description="True if steps run in parallel")
+
+
+class WorkflowInfo(BaseModel):
+    """Information about a workflow."""
+
+    name: str = Field(description="Workflow identifier")
+    summary: str = Field(description="Short description of workflow")
+
+
+class JobInfo(BaseModel):
+    """Information about a job and its workflows."""
+
+    name: str = Field(description="Job identifier")
+    summary: str = Field(description="Short summary of the job")
+    description: str | None = Field(default=None, description="Full description")
+    workflows: list[WorkflowInfo] = Field(default_factory=list)
+
+
+# =============================================================================
+# Tool Input Models
+# NOTE: Changes to these models affect MCP tool parameters.
+#       Update doc/mcp_interface.md when modifying.
+# =============================================================================
+
+
+class StartWorkflowInput(BaseModel):
+    """Input for start_workflow tool."""
+
+    goal: str = Field(description="What the user wants to accomplish")
+    job_name: str = Field(description="Name of the job")
+    workflow_name: str = Field(description="Name of the workflow within the job")
+    instance_id: str | None = Field(
+        default=None,
+        description="Optional identifier (e.g., 'acme', 'q1-2026')",
+    )
+
+
+class FinishedStepInput(BaseModel):
+    """Input for finished_step tool."""
+
+    outputs: dict[str, str | list[str]] = Field(
+        description=(
+            "Map of output names to file path(s). "
+            "For outputs declared as type 'file': pass a single string path (e.g. \"report.md\"). "
+            'For outputs declared as type \'files\': pass a list of string paths (e.g. ["a.md", "b.md"]). '
+            "Outputs with required: false can be omitted from this map. "
+            "Check step_expected_outputs from start_workflow/finished_step response to see each output's type and required status."
+        )
+    )
+    notes: str | None = Field(default=None, description="Optional notes about work done")
+    quality_review_override_reason: str | None = Field(
+        default=None,
+        description="If provided, skips the quality gate review. Must explain why the review is being bypassed.",
+    )
+    session_id: str | None = Field(
+        default=None,
+        description=(
+            "Optional session ID to target a specific workflow session. "
+            "Use this when multiple workflows are active concurrently to ensure "
+            "the correct session is updated. If omitted, operates on the top-of-stack session."
+        ),
+    )
+
+
+class AbortWorkflowInput(BaseModel):
+    """Input for abort_workflow tool."""
+
+    explanation: str = Field(description="Explanation of why the workflow is being aborted")
+    session_id: str | None = Field(
+        default=None,
+        description=(
+            "Optional session ID to target a specific workflow session. "
+            "Use this when multiple workflows are active concurrently to ensure "
+            "the correct session is aborted. If omitted, aborts the top-of-stack session."
+        ),
+    )
+
+
+# =============================================================================
+# Quality Gate Models
+# =============================================================================
+
+
+class QualityCriteriaResult(BaseModel):
+    """Result for a single quality criterion."""
+
+    criterion: str = Field(description="The quality criterion text")
+    passed: bool = Field(description="Whether this criterion passed")
+    feedback: str | None = Field(default=None, description="Feedback if failed")
+
+
+class QualityGateResult(BaseModel):
+    """Result from quality gate evaluation."""
+
+    passed: bool = Field(description="Overall pass/fail")
+    feedback: str = Field(description="Summary feedback")
+    criteria_results: list[QualityCriteriaResult] = Field(
+        default_factory=list, description="Per-criterion results"
+    )
+
+
+class ReviewInfo(BaseModel):
+    """Information about a review for a step."""
+
+    run_each: str = Field(description="'step' or output name to review")
+    quality_criteria: dict[str, str] = Field(
+        description="Map of criterion name to criterion question"
+    )
+    additional_review_guidance: str | None = Field(
+        default=None,
+        description="Optional guidance for the reviewer about what context to look at",
+    )
+
+
+class ReviewResult(BaseModel):
+    """Result from a single review evaluation."""
+
+    review_run_each: str = Field(description="'step' or output name that was reviewed")
+    target_file: str | None = Field(
+        default=None, description="Specific file reviewed (for per-file reviews)"
+    )
+    passed: bool = Field(description="Whether this review passed")
+    feedback: str = Field(description="Summary feedback")
+    criteria_results: list[QualityCriteriaResult] = Field(
+        default_factory=list, description="Per-criterion results"
+    )
+
+
+# =============================================================================
+# Tool Output Models
+# NOTE: Changes to these models affect MCP tool return types.
+#       Update doc/mcp_interface.md when modifying.
+# =============================================================================
+
+
+class ExpectedOutput(BaseModel):
+    """Describes an expected output for a step."""
+
+    name: str = Field(description="Output name (use as key in finished_step outputs)")
+    type: str = Field(description="Output type: 'file' or 'files'")
+    description: str = Field(description="What this output should contain")
+    required: bool = Field(
+        description="Whether this output must be provided. If false, it can be omitted from finished_step outputs."
+    )
+    syntax_for_finished_step_tool: str = Field(
+        description="The value format to use for this output when calling finished_step"
+    )
+
+
+class ActiveStepInfo(BaseModel):
+    """Information about the step to begin working on."""
+
+    session_id: str = Field(description="Unique session identifier")
+    branch_name: str = Field(description="Git branch for this workflow instance")
+    step_id: str = Field(description="ID of the current step")
+    step_expected_outputs: list[ExpectedOutput] = Field(
+        description="Expected outputs for this step, including type and format hints"
+    )
+    step_reviews: list[ReviewInfo] = Field(
+        default_factory=list, description="Reviews to run when step completes"
+    )
+    step_instructions: str = Field(description="Instructions for the step")
+
+
+class GetWorkflowsResponse(BaseModel):
+    """Response from get_workflows tool."""
+
+    jobs: list[JobInfo] = Field(description="List of all jobs with their workflows")
+
+
+class StackEntry(BaseModel):
+    """An entry in the workflow stack."""
+
+    workflow: str = Field(description="Workflow identifier (job_name/workflow_name)")
+    step: str = Field(description="Current step ID in this workflow")
+
+
+class StartWorkflowResponse(BaseModel):
+    """Response from start_workflow tool."""
+
+    begin_step: ActiveStepInfo = Field(description="Information about the first step to begin")
+    stack: list[StackEntry] = Field(
+        default_factory=list, description="Current workflow stack after starting"
+    )
+
+
+class FinishedStepResponse(BaseModel):
+    """Response from finished_step tool."""
+
+    status: StepStatus = Field(description="Result status")
+
+    # For needs_work status
+    feedback: str | None = Field(default=None, description="Feedback from quality gate")
+    failed_reviews: list[ReviewResult] | None = Field(
+        default=None, description="Failed review results"
+    )
+
+    # For next_step status
+    begin_step: ActiveStepInfo | None = Field(
+        default=None, description="Information about the next step to begin"
+    )
+
+    # For workflow_complete status
+    summary: str | None = Field(default=None, description="Summary of completed workflow")
+    all_outputs: dict[str, str | list[str]] | None = Field(
+        default=None, description="All outputs from all steps"
+    )
+
+    # Stack info (included in all responses)
+    stack: list[StackEntry] = Field(
+        default_factory=list, description="Current workflow stack after this operation"
+    )
+
+
+class AbortWorkflowResponse(BaseModel):
+    """Response from abort_workflow tool."""
+
+    aborted_workflow: str = Field(
+        description="The workflow that was aborted (job_name/workflow_name)"
+    )
+    aborted_step: str = Field(description="The step that was active when aborted")
+    explanation: str = Field(description="The explanation provided for aborting")
+    stack: list[StackEntry] = Field(
+        default_factory=list, description="Current workflow stack after abort"
+    )
+    resumed_workflow: str | None = Field(
+        default=None, description="The workflow now active (if any)"
+    )
+    resumed_step: str | None = Field(default=None, description="The step now active (if any)")
+
+
+# =============================================================================
+# Session State Models
+# =============================================================================
+
+
+class StepProgress(BaseModel):
+    """Progress for a single step in a workflow."""
+
+    step_id: str = Field(description="Step identifier")
+    started_at: str | None = Field(default=None, description="ISO timestamp when started")
+    completed_at: str | None = Field(default=None, description="ISO timestamp when completed")
+    outputs: dict[str, str | list[str]] = Field(
+        default_factory=dict, description="Output files created"
+    )
+    notes: str | None = Field(default=None, description="Notes from agent")
+    quality_attempts: int = Field(default=0, description="Number of quality gate attempts")
+
+
+class WorkflowSession(BaseModel):
+    """State for an active workflow session."""
+
+    session_id: str = Field(description="Unique session identifier")
+    job_name: str = Field(description="Name of the job")
+    workflow_name: str = Field(description="Name of the workflow")
+    instance_id: str | None = Field(default=None, description="Instance identifier")
+    goal: str = Field(description="User's goal for this workflow")
+    branch_name: str = Field(description="Git branch name")
+    current_step_id: str = Field(description="Current step in workflow")
+    current_entry_index: int = Field(
+        default=0, description="Index of current entry in step_entries"
+    )
+    step_progress: dict[str, StepProgress] = Field(
+        default_factory=dict, description="Progress for each step"
+    )
+    started_at: str = Field(description="ISO timestamp when session started")
+    completed_at: str | None = Field(default=None, description="ISO timestamp when completed")
+    status: str = Field(default="active", description="Session status: active, completed, aborted")
+    abort_reason: str | None = Field(
+        default=None, description="Explanation if workflow was aborted"
+    )
+
+    def to_dict(self) -> dict[str, Any]:
+        """Convert to dictionary for JSON serialization."""
+        return self.model_dump()
+
+    @classmethod
+    def from_dict(cls, data: dict[str, Any]) -> "WorkflowSession":
+        """Create from dictionary."""
+        return cls.model_validate(data)
diff --git a/src/deepwork/mcp/server.py b/src/deepwork/mcp/server.py
new file mode 100644
index 00000000..4a227ed1
--- /dev/null
+++ b/src/deepwork/mcp/server.py
@@ -0,0 +1,268 @@
+"""FastMCP server for DeepWork workflows.
+
+This module creates and configures the MCP server that exposes workflow
+management tools to AI agents.
+
+Usage:
+    deepwork serve --path /path/to/project
+
+IMPORTANT: If you modify any tool signatures, parameters, or return types in this
+file, you MUST also update the documentation in doc/mcp_interface.md to keep it
+in sync with the implementation.
+"""
+
+from __future__ import annotations
+
+import logging
+from pathlib import Path
+from typing import Any
+
+from fastmcp import FastMCP
+
+from deepwork.mcp.claude_cli import ClaudeCLI
+from deepwork.mcp.quality_gate import QualityGate
+from deepwork.mcp.schemas import (
+    AbortWorkflowInput,
+    FinishedStepInput,
+    StartWorkflowInput,
+)
+from deepwork.mcp.state import StateManager
+from deepwork.mcp.tools import WorkflowTools
+
+# Configure logging
+logger = logging.getLogger("deepwork.mcp")
+
+
+def create_server(
+    project_root: Path | str,
+    enable_quality_gate: bool = True,
+    quality_gate_timeout: int = 120,
+    quality_gate_max_attempts: int = 3,
+) -> FastMCP:
+    """Create and configure the MCP server.
+
+    Args:
+        project_root: Path to the project root
+        enable_quality_gate: Whether to enable quality gate evaluation (default: True)
+        quality_gate_timeout: Timeout in seconds for quality gate (default: 120)
+        quality_gate_max_attempts: Max attempts before failing quality gate (default: 3)
+
+    Returns:
+        Configured FastMCP server instance
+    """
+    project_path = Path(project_root).resolve()
+
+    # Initialize components
+    state_manager = StateManager(project_path)
+
+    quality_gate: QualityGate | None = None
+    if enable_quality_gate:
+        cli = ClaudeCLI(timeout=quality_gate_timeout)
+        quality_gate = QualityGate(cli=cli)
+
+    tools = WorkflowTools(
+        project_root=project_path,
+        state_manager=state_manager,
+        quality_gate=quality_gate,
+        max_quality_attempts=quality_gate_max_attempts,
+    )
+
+    # Create MCP server
+    mcp = FastMCP(
+        name="deepwork",
+        instructions=_get_server_instructions(),
+    )
+
+    # =========================================================================
+    # MCP Tool Registrations
+    # =========================================================================
+    # IMPORTANT: When modifying these tool signatures (parameters, return types,
+    # descriptions), update doc/mcp_interface.md to keep documentation in sync.
+    # =========================================================================
+
+    def _log_tool_call(tool_name: str, params: dict[str, Any] | None = None) -> None:
+        """Log a tool call with stack information."""
+        stack = [entry.model_dump() for entry in state_manager.get_stack()]
+        log_data = {
+            "tool": tool_name,
+            "stack": stack,
+            "stack_depth": len(stack),
+        }
+        if params:
+            log_data["params"] = params
+        logger.info("MCP tool call: %s", log_data)
+
+    @mcp.tool(
+        description=(
+            "List all available DeepWork workflows. "
+            "Returns job names, workflow definitions, and step information. "
+            "Call this first to discover available workflows."
+        )
+    )
+    def get_workflows() -> dict[str, Any]:
+        """Get all available workflows."""
+        _log_tool_call("get_workflows")
+        response = tools.get_workflows()
+        return response.model_dump()
+
+    @mcp.tool(
+        description=(
+            "Start a new workflow session. "
+            "Creates a git branch, initializes state tracking, and returns "
+            "the first step's instructions. "
+            "Required parameters: goal (what user wants), job_name, workflow_name. "
+            "Optional: instance_id for naming (e.g., 'acme', 'q1-2026'). "
+            "Supports nested workflows - starting a workflow while one is active "
+            "pushes onto the stack. Use abort_workflow to cancel and return to parent."
+        )
+    )
+    async def start_workflow(
+        goal: str,
+        job_name: str,
+        workflow_name: str,
+        instance_id: str | None = None,
+    ) -> dict[str, Any]:
+        """Start a workflow and get first step instructions."""
+        _log_tool_call(
+            "start_workflow",
+            {
+                "goal": goal,
+                "job_name": job_name,
+                "workflow_name": workflow_name,
+                "instance_id": instance_id,
+            },
+        )
+        input_data = StartWorkflowInput(
+            goal=goal,
+            job_name=job_name,
+            workflow_name=workflow_name,
+            instance_id=instance_id,
+        )
+        response = await tools.start_workflow(input_data)
+        return response.model_dump()
+
+    @mcp.tool(
+        description=(
+            "Report that you've finished a workflow step. "
+            "Validates outputs against quality criteria (if configured), "
+            "then returns either: "
+            "'needs_work' with feedback to fix issues, "
+            "'next_step' with instructions for the next step, or "
+            "'workflow_complete' when finished (pops from stack if nested). "
+            "Required: outputs (map of output names to file paths created). "
+            "For outputs with type 'file': pass a single string path. "
+            "For outputs with type 'files': pass a list of string paths. "
+            "Outputs marked required: true must be provided; required: false outputs can be omitted. "
+            "Check step_expected_outputs in the response to see each output's type and required status. "
+            "Optional: notes about work done. "
+            "Optional: quality_review_override_reason to skip quality review (must explain why). "
+            "Optional: session_id to target a specific workflow session "
+            "(use when multiple workflows are active concurrently)."
+        )
+    )
+    async def finished_step(
+        outputs: dict[str, str | list[str]],
+        notes: str | None = None,
+        quality_review_override_reason: str | None = None,
+        session_id: str | None = None,
+    ) -> dict[str, Any]:
+        """Report step completion and get next instructions."""
+        _log_tool_call(
+            "finished_step",
+            {
+                "outputs": outputs,
+                "notes": notes,
+                "quality_review_override_reason": quality_review_override_reason,
+                "session_id": session_id,
+            },
+        )
+        input_data = FinishedStepInput(
+            outputs=outputs,
+            notes=notes,
+            quality_review_override_reason=quality_review_override_reason,
+            session_id=session_id,
+        )
+        response = await tools.finished_step(input_data)
+        return response.model_dump()
+
+    @mcp.tool(
+        description=(
+            "Abort the current workflow and return to the parent workflow (if nested). "
+            "Use this when a workflow cannot be completed and needs to be abandoned. "
+            "Required: explanation (why the workflow is being aborted). "
+            "Optional: session_id to target a specific workflow session "
+            "(use when multiple workflows are active concurrently). "
+            "Returns the aborted workflow info and the resumed parent workflow (if any)."
+        )
+    )
+    async def abort_workflow(
+        explanation: str,
+        session_id: str | None = None,
+    ) -> dict[str, Any]:
+        """Abort the current workflow and return to parent."""
+        _log_tool_call(
+            "abort_workflow",
+            {"explanation": explanation, "session_id": session_id},
+        )
+        input_data = AbortWorkflowInput(explanation=explanation, session_id=session_id)
+        response = await tools.abort_workflow(input_data)
+        return response.model_dump()
+
+    return mcp
+
+
+def _get_server_instructions() -> str:
+    """Get the server instructions for agents.
+
+    Returns:
+        Instructions string describing how to use the DeepWork MCP server.
+    """
+    return """# DeepWork Workflow Server
+
+This MCP server guides you through multi-step workflows with quality gates.
+
+## Workflow
+
+1. **Discover**: Call `get_workflows` to see available workflows
+2. **Start**: Call `start_workflow` with your goal, job_name, and workflow_name
+3. **Execute**: Follow the step instructions returned
+4. **Checkpoint**: Call `finished_step` with your outputs when done with each step
+5. **Iterate**: If `needs_work`, fix issues and call `finished_step` again
+6. **Continue**: If `next_step`, execute new instructions and repeat
+7. **Complete**: When `workflow_complete`, the workflow is done
+
+## Quality Gates
+
+Steps may have quality criteria. When you call `finished_step`:
+- Your outputs are evaluated against the criteria
+- If any fail, you'll get `needs_work` status with feedback
+- Fix the issues and call `finished_step` again
+- After passing, you'll get the next step or completion
+
+## Nested Workflows
+
+Workflows can be nested - starting a new workflow while one is active pushes
+onto a stack. This is useful when a step requires running another workflow.
+
+- All tool responses include a `stack` field showing the current workflow stack
+- Each stack entry shows `{workflow: "job/workflow", step: "current_step"}`
+- When a workflow completes, it pops from the stack and resumes the parent
+- Use `abort_workflow` to cancel the current workflow and return to parent
+
+## Aborting Workflows
+
+If a workflow cannot be completed, use `abort_workflow` with an explanation:
+- The current workflow is marked as aborted and popped from the stack
+- If there was a parent workflow, it becomes active again
+- The explanation is saved for debugging and audit purposes
+
+## Best Practices
+
+- Always call `get_workflows` first to understand available options
+- Provide clear goals when starting - they're used for context
+- Create all expected outputs before calling `finished_step`
+- Use instance_id for meaningful names (e.g., client name, quarter)
+- Read quality gate feedback carefully before retrying
+- Check the `stack` field in responses to understand nesting depth
+- Use `abort_workflow` rather than leaving workflows in a broken state
+"""
diff --git a/src/deepwork/mcp/state.py b/src/deepwork/mcp/state.py
new file mode 100644
index 00000000..04114452
--- /dev/null
+++ b/src/deepwork/mcp/state.py
@@ -0,0 +1,468 @@
+"""Workflow state management for MCP server.
+
+State is persisted to `.deepwork/tmp/session_[id].json` for transparency
+and recovery.
+
+Supports nested workflows via a session stack - when a step starts a new
+workflow, it's pushed onto the stack. When a workflow completes or is
+aborted, it's popped from the stack.
+"""
+
+from __future__ import annotations
+
+import asyncio
+import json
+import uuid
+from datetime import UTC, datetime
+from pathlib import Path
+
+import aiofiles
+
+from deepwork.mcp.schemas import StackEntry, StepProgress, WorkflowSession
+
+
+class StateError(Exception):
+    """Exception raised for state management errors."""
+
+    pass
+
+
+class StateManager:
+    """Manages workflow session state with stack-based nesting support.
+
+    Sessions are persisted to `.deepwork/tmp/` as JSON files for:
+    - Transparency: Users can inspect session state
+    - Recovery: Sessions survive server restarts
+    - Debugging: State history is preserved
+
+    This implementation is async-safe and uses a lock to prevent
+    concurrent access issues.
+
+    Supports nested workflows via a session stack - starting a new workflow
+    while one is active pushes onto the stack. Completing or aborting pops
+    from the stack.
+    """
+
+    def __init__(self, project_root: Path):
+        """Initialize state manager.
+
+        Args:
+            project_root: Path to the project root
+        """
+        self.project_root = project_root
+        self.sessions_dir = project_root / ".deepwork" / "tmp"
+        self._session_stack: list[WorkflowSession] = []
+        self._lock = asyncio.Lock()
+
+    def _ensure_sessions_dir(self) -> None:
+        """Ensure the sessions directory exists."""
+        self.sessions_dir.mkdir(parents=True, exist_ok=True)
+
+    def _session_file(self, session_id: str) -> Path:
+        """Get the path to a session file."""
+        return self.sessions_dir / f"session_{session_id}.json"
+
+    def _generate_session_id(self) -> str:
+        """Generate a unique session ID."""
+        return str(uuid.uuid4())[:8]
+
+    def _generate_branch_name(
+        self, job_name: str, workflow_name: str, instance_id: str | None
+    ) -> str:
+        """Generate a git branch name for the workflow.
+
+        Format: deepwork/[job_name]-[workflow_name]-[instance_id or date]
+        """
+        date_str = datetime.now(UTC).strftime("%Y%m%d")
+        instance = instance_id or date_str
+        return f"deepwork/{job_name}-{workflow_name}-{instance}"
+
+    async def create_session(
+        self,
+        job_name: str,
+        workflow_name: str,
+        goal: str,
+        first_step_id: str,
+        instance_id: str | None = None,
+    ) -> WorkflowSession:
+        """Create a new workflow session.
+
+        Args:
+            job_name: Name of the job
+            workflow_name: Name of the workflow
+            goal: User's goal for this workflow
+            first_step_id: ID of the first step
+            instance_id: Optional instance identifier
+
+        Returns:
+            New WorkflowSession
+        """
+        async with self._lock:
+            self._ensure_sessions_dir()
+
+            session_id = self._generate_session_id()
+            branch_name = self._generate_branch_name(job_name, workflow_name, instance_id)
+            now = datetime.now(UTC).isoformat()
+
+            session = WorkflowSession(
+                session_id=session_id,
+                job_name=job_name,
+                workflow_name=workflow_name,
+                instance_id=instance_id,
+                goal=goal,
+                branch_name=branch_name,
+                current_step_id=first_step_id,
+                current_entry_index=0,
+                step_progress={},
+                started_at=now,
+                status="active",
+            )
+
+            await self._save_session_unlocked(session)
+            self._session_stack.append(session)
+            return session
+
+    async def _save_session_unlocked(self, session: WorkflowSession) -> None:
+        """Save session to file (must be called with lock held)."""
+        self._ensure_sessions_dir()
+        session_file = self._session_file(session.session_id)
+        content = json.dumps(session.to_dict(), indent=2)
+        async with aiofiles.open(session_file, "w", encoding="utf-8") as f:
+            await f.write(content)
+
+    async def _save_session(self, session: WorkflowSession) -> None:
+        """Save session to file with lock."""
+        async with self._lock:
+            await self._save_session_unlocked(session)
+
+    async def load_session(self, session_id: str) -> WorkflowSession:
+        """Load a session from file.
+
+        Args:
+            session_id: Session ID to load
+
+        Returns:
+            WorkflowSession
+
+        Raises:
+            StateError: If session not found
+        """
+        async with self._lock:
+            session_file = self._session_file(session_id)
+            if not session_file.exists():
+                raise StateError(f"Session not found: {session_id}")
+
+            async with aiofiles.open(session_file, encoding="utf-8") as f:
+                content = await f.read()
+                data = json.loads(content)
+
+            session = WorkflowSession.from_dict(data)
+            # Replace top of stack or push if empty
+            if self._session_stack:
+                self._session_stack[-1] = session
+            else:
+                self._session_stack.append(session)
+            return session
+
+    def get_active_session(self) -> WorkflowSession | None:
+        """Get the currently active session (top of stack).
+
+        Returns:
+            Active session or None if no session active
+        """
+        return self._session_stack[-1] if self._session_stack else None
+
+    def require_active_session(self) -> WorkflowSession:
+        """Get active session (top of stack) or raise error.
+
+        Returns:
+            Active session
+
+        Raises:
+            StateError: If no active session
+        """
+        if not self._session_stack:
+            raise StateError("No active workflow session. Use start_workflow to begin a workflow.")
+        return self._session_stack[-1]
+
+    def _resolve_session(self, session_id: str | None = None) -> WorkflowSession:
+        """Resolve a session by ID or fall back to top-of-stack.
+
+        This is used internally (called inside locked blocks or sync methods)
+        to find a specific session when session_id is provided, or fall back
+        to the default top-of-stack behavior.
+
+        Args:
+            session_id: Optional session ID to look up. If None, returns top-of-stack.
+
+        Returns:
+            WorkflowSession matching the ID, or the active (top-of-stack) session.
+
+        Raises:
+            StateError: If session_id is provided but not found, or no active session.
+        """
+        if session_id:
+            for s in self._session_stack:
+                if s.session_id == session_id:
+                    return s
+            raise StateError(f"Session '{session_id}' not found in active stack")
+        return self.require_active_session()
+
+    async def start_step(self, step_id: str, session_id: str | None = None) -> None:
+        """Mark a step as started.
+
+        Args:
+            step_id: Step ID to start
+            session_id: Optional session ID to target a specific session
+
+        Raises:
+            StateError: If no active session or session_id not found
+        """
+        async with self._lock:
+            session = self._resolve_session(session_id)
+            now = datetime.now(UTC).isoformat()
+
+            if step_id not in session.step_progress:
+                session.step_progress[step_id] = StepProgress(
+                    step_id=step_id,
+                    started_at=now,
+                )
+            else:
+                session.step_progress[step_id].started_at = now
+
+            session.current_step_id = step_id
+            await self._save_session_unlocked(session)
+
+    async def complete_step(
+        self,
+        step_id: str,
+        outputs: dict[str, str | list[str]],
+        notes: str | None = None,
+        session_id: str | None = None,
+    ) -> None:
+        """Mark a step as completed.
+
+        Args:
+            step_id: Step ID to complete
+            outputs: Map of output names to file path(s)
+            notes: Optional notes
+            session_id: Optional session ID to target a specific session
+
+        Raises:
+            StateError: If no active session or session_id not found
+        """
+        async with self._lock:
+            session = self._resolve_session(session_id)
+            now = datetime.now(UTC).isoformat()
+
+            if step_id not in session.step_progress:
+                session.step_progress[step_id] = StepProgress(
+                    step_id=step_id,
+                    started_at=now,
+                )
+
+            progress = session.step_progress[step_id]
+            progress.completed_at = now
+            progress.outputs = outputs
+            progress.notes = notes
+
+            await self._save_session_unlocked(session)
+
+    async def record_quality_attempt(self, step_id: str, session_id: str | None = None) -> int:
+        """Record a quality gate attempt for a step.
+
+        Args:
+            step_id: Step ID
+            session_id: Optional session ID to target a specific session
+
+        Returns:
+            Total number of attempts for this step
+
+        Raises:
+            StateError: If no active session or session_id not found
+        """
+        async with self._lock:
+            session = self._resolve_session(session_id)
+
+            if step_id not in session.step_progress:
+                session.step_progress[step_id] = StepProgress(step_id=step_id)
+
+            session.step_progress[step_id].quality_attempts += 1
+            await self._save_session_unlocked(session)
+
+            return session.step_progress[step_id].quality_attempts
+
+    async def advance_to_step(
+        self, step_id: str, entry_index: int, session_id: str | None = None
+    ) -> None:
+        """Advance the session to a new step.
+
+        Args:
+            step_id: New current step ID
+            entry_index: Index in workflow step_entries
+            session_id: Optional session ID to target a specific session
+
+        Raises:
+            StateError: If no active session or session_id not found
+        """
+        async with self._lock:
+            session = self._resolve_session(session_id)
+            session.current_step_id = step_id
+            session.current_entry_index = entry_index
+            await self._save_session_unlocked(session)
+
+    async def complete_workflow(self, session_id: str | None = None) -> WorkflowSession | None:
+        """Mark the workflow as complete and remove from stack.
+
+        Args:
+            session_id: Optional session ID to target a specific session.
+                If omitted, completes the top-of-stack session.
+
+        Returns:
+            The new active session after removal, or None if stack is empty
+
+        Raises:
+            StateError: If no active session or session_id not found
+        """
+        async with self._lock:
+            session = self._resolve_session(session_id)
+            now = datetime.now(UTC).isoformat()
+            session.completed_at = now
+            session.status = "completed"
+            await self._save_session_unlocked(session)
+
+            # Remove completed session from stack (filter, not pop, for mid-stack removal)
+            self._session_stack = [
+                s for s in self._session_stack if s.session_id != session.session_id
+            ]
+
+            # Return new active session (if any)
+            return self._session_stack[-1] if self._session_stack else None
+
+    async def abort_workflow(
+        self, explanation: str, session_id: str | None = None
+    ) -> tuple[WorkflowSession, WorkflowSession | None]:
+        """Abort a workflow and remove from stack.
+
+        Args:
+            explanation: Reason for aborting the workflow
+            session_id: Optional session ID to target a specific session.
+                If omitted, aborts the top-of-stack session.
+
+        Returns:
+            Tuple of (aborted session, new active session or None)
+
+        Raises:
+            StateError: If no active session or session_id not found
+        """
+        async with self._lock:
+            session = self._resolve_session(session_id)
+            now = datetime.now(UTC).isoformat()
+            session.completed_at = now
+            session.status = "aborted"
+            session.abort_reason = explanation
+            await self._save_session_unlocked(session)
+
+            # Remove aborted session from stack (filter, not pop, for mid-stack removal)
+            self._session_stack = [
+                s for s in self._session_stack if s.session_id != session.session_id
+            ]
+
+            # Return aborted session and new active session (if any)
+            new_active = self._session_stack[-1] if self._session_stack else None
+            return session, new_active
+
+    def get_all_outputs(self, session_id: str | None = None) -> dict[str, str | list[str]]:
+        """Get all outputs from all completed steps.
+
+        Args:
+            session_id: Optional session ID to target a specific session
+
+        Returns:
+            Merged dict of all output names to file path(s)
+
+        Raises:
+            StateError: If no active session or session_id not found
+        """
+        session = self._resolve_session(session_id)
+        all_outputs: dict[str, str | list[str]] = {}
+        for progress in session.step_progress.values():
+            all_outputs.update(progress.outputs)
+        return all_outputs
+
+    def get_stack(self) -> list[StackEntry]:
+        """Get the current workflow stack as StackEntry objects.
+
+        Returns:
+            List of StackEntry with workflow and step info, bottom to top
+        """
+        return [
+            StackEntry(
+                workflow=f"{s.job_name}/{s.workflow_name}",
+                step=s.current_step_id,
+            )
+            for s in self._session_stack
+        ]
+
+    def get_stack_depth(self) -> int:
+        """Get the current stack depth.
+
+        Returns:
+            Number of active workflow sessions on the stack
+        """
+        return len(self._session_stack)
+
+    async def list_sessions(self) -> list[WorkflowSession]:
+        """List all saved sessions.
+
+        Returns:
+            List of WorkflowSession objects
+        """
+        if not self.sessions_dir.exists():
+            return []
+
+        sessions = []
+        for session_file in self.sessions_dir.glob("session_*.json"):
+            try:
+                async with aiofiles.open(session_file, encoding="utf-8") as f:
+                    content = await f.read()
+                    data = json.loads(content)
+                sessions.append(WorkflowSession.from_dict(data))
+            except (json.JSONDecodeError, ValueError):
+                # Skip corrupted files
+                continue
+
+        return sorted(sessions, key=lambda s: s.started_at, reverse=True)
+
+    async def find_active_sessions_for_workflow(
+        self, job_name: str, workflow_name: str
+    ) -> list[WorkflowSession]:
+        """Find active sessions for a specific workflow.
+
+        Args:
+            job_name: Job name
+            workflow_name: Workflow name
+
+        Returns:
+            List of active sessions matching the criteria
+        """
+        all_sessions = await self.list_sessions()
+        return [
+            s
+            for s in all_sessions
+            if s.job_name == job_name and s.workflow_name == workflow_name and s.status == "active"
+        ]
+
+    async def delete_session(self, session_id: str) -> None:
+        """Delete a session file.
+
+        Args:
+            session_id: Session ID to delete
+        """
+        async with self._lock:
+            session_file = self._session_file(session_id)
+            if session_file.exists():
+                session_file.unlink()
+
+            # Remove from stack if present
+            self._session_stack = [s for s in self._session_stack if s.session_id != session_id]
diff --git a/src/deepwork/mcp/tools.py b/src/deepwork/mcp/tools.py
new file mode 100644
index 00000000..6bc5c046
--- /dev/null
+++ b/src/deepwork/mcp/tools.py
@@ -0,0 +1,534 @@
+"""MCP tool implementations for DeepWork workflows.
+
+This module provides the core tools for guiding agents through workflows:
+- get_workflows: List all available workflows
+- start_workflow: Initialize a workflow session
+- finished_step: Report step completion and get next instructions
+"""
+
+from __future__ import annotations
+
+import logging
+from pathlib import Path
+from typing import TYPE_CHECKING
+
+from deepwork.core.parser import (
+    JobDefinition,
+    OutputSpec,
+    ParseError,
+    Workflow,
+    parse_job_definition,
+)
+from deepwork.mcp.schemas import (
+    AbortWorkflowInput,
+    AbortWorkflowResponse,
+    ActiveStepInfo,
+    ExpectedOutput,
+    FinishedStepInput,
+    FinishedStepResponse,
+    GetWorkflowsResponse,
+    JobInfo,
+    ReviewInfo,
+    StartWorkflowInput,
+    StartWorkflowResponse,
+    StepStatus,
+    WorkflowInfo,
+)
+from deepwork.mcp.state import StateManager
+
+logger = logging.getLogger("deepwork.mcp")
+
+if TYPE_CHECKING:
+    from deepwork.mcp.quality_gate import QualityGate
+
+
+class ToolError(Exception):
+    """Exception raised for tool execution errors."""
+
+    pass
+
+
+class WorkflowTools:
+    """Implements the MCP tools for workflow management."""
+
+    def __init__(
+        self,
+        project_root: Path,
+        state_manager: StateManager,
+        quality_gate: QualityGate | None = None,
+        max_quality_attempts: int = 3,
+    ):
+        """Initialize workflow tools.
+
+        Args:
+            project_root: Path to project root
+            state_manager: State manager instance
+            quality_gate: Optional quality gate for step validation
+            max_quality_attempts: Maximum attempts before failing quality gate
+        """
+        self.project_root = project_root
+        self.jobs_dir = project_root / ".deepwork" / "jobs"
+        self.state_manager = state_manager
+        self.quality_gate = quality_gate
+        self.max_quality_attempts = max_quality_attempts
+
+    def _load_all_jobs(self) -> list[JobDefinition]:
+        """Load all job definitions from the jobs directory.
+
+        Returns:
+            List of parsed JobDefinition objects
+        """
+        jobs: list[JobDefinition] = []
+
+        if not self.jobs_dir.exists():
+            return jobs
+
+        for job_dir in self.jobs_dir.iterdir():
+            if job_dir.is_dir() and (job_dir / "job.yml").exists():
+                try:
+                    job = parse_job_definition(job_dir)
+                    jobs.append(job)
+                except ParseError as e:
+                    logger.warning("Skipping invalid job '%s': %s", job_dir.name, e)
+                    continue
+
+        return jobs
+
+    def _job_to_info(self, job: JobDefinition) -> JobInfo:
+        """Convert a JobDefinition to JobInfo for response.
+
+        Args:
+            job: Parsed job definition
+
+        Returns:
+            JobInfo with workflow details
+        """
+        # Convert workflows
+        workflows = [
+            WorkflowInfo(
+                name=wf.name,
+                summary=wf.summary,
+            )
+            for wf in job.workflows
+        ]
+
+        return JobInfo(
+            name=job.name,
+            summary=job.summary,
+            description=job.description,
+            workflows=workflows,
+        )
+
+    def _get_job(self, job_name: str) -> JobDefinition:
+        """Get a specific job by name.
+
+        Args:
+            job_name: Job name to find
+
+        Returns:
+            JobDefinition
+
+        Raises:
+            ToolError: If job not found
+        """
+        job_dir = self.jobs_dir / job_name
+        if not job_dir.exists():
+            raise ToolError(f"Job not found: {job_name}")
+
+        try:
+            return parse_job_definition(job_dir)
+        except ParseError as e:
+            raise ToolError(f"Failed to parse job '{job_name}': {e}") from e
+
+    def _get_workflow(self, job: JobDefinition, workflow_name: str) -> Workflow:
+        """Get a specific workflow from a job.
+
+        If the workflow name doesn't match any workflow but the job has exactly
+        one workflow, that workflow is returned automatically.
+
+        Args:
+            job: Job definition
+            workflow_name: Workflow name to find
+
+        Returns:
+            Workflow
+
+        Raises:
+            ToolError: If workflow not found and job has multiple workflows
+        """
+        for wf in job.workflows:
+            if wf.name == workflow_name:
+                return wf
+
+        # Auto-select if there's only one workflow
+        if len(job.workflows) == 1:
+            return job.workflows[0]
+
+        available = [wf.name for wf in job.workflows]
+        raise ToolError(
+            f"Workflow '{workflow_name}' not found in job '{job.name}'. "
+            f"Available workflows: {', '.join(available)}"
+        )
+
+    def _get_step_instructions(self, job: JobDefinition, step_id: str) -> str:
+        """Get the instruction content for a step.
+
+        Args:
+            job: Job definition
+            step_id: Step ID
+
+        Returns:
+            Step instruction content
+
+        Raises:
+            ToolError: If step or instruction file not found
+        """
+        step = job.get_step(step_id)
+        if step is None:
+            raise ToolError(f"Step not found: {step_id}")
+
+        instructions_path = job.job_dir / step.instructions_file
+        if not instructions_path.exists():
+            raise ToolError(f"Instructions file not found: {step.instructions_file}")
+
+        return instructions_path.read_text(encoding="utf-8")
+
+    def _validate_outputs(
+        self,
+        submitted: dict[str, str | list[str]],
+        declared: list[OutputSpec],
+    ) -> None:
+        """Validate submitted outputs against declared output specs.
+
+        Checks:
+        1. Every submitted key matches a declared output name
+        2. Every declared output has a corresponding submitted key
+        3. type: file -> value is a single string path, file must exist
+        4. type: files -> value is a list of strings, each file must exist
+
+        Args:
+            submitted: The outputs dict from the agent
+            declared: The OutputSpec list from the step definition
+
+        Raises:
+            ToolError: If validation fails
+        """
+        declared_map = {spec.name: spec for spec in declared}
+        declared_names = set(declared_map.keys())
+        submitted_names = set(submitted.keys())
+
+        # Check for unknown output keys
+        extra = submitted_names - declared_names
+        if extra:
+            raise ToolError(
+                f"Unknown output names: {', '.join(sorted(extra))}. "
+                f"Declared outputs: {', '.join(sorted(declared_names))}"
+            )
+
+        # Check for missing required output keys
+        required_names = {spec.name for spec in declared if spec.required}
+        missing = required_names - submitted_names
+        if missing:
+            raise ToolError(
+                f"Missing required outputs: {', '.join(sorted(missing))}. "
+                f"All required outputs must be provided."
+            )
+
+        # Validate types and file existence
+        for name, value in submitted.items():
+            spec = declared_map[name]
+
+            if spec.type == "file":
+                if not isinstance(value, str):
+                    raise ToolError(
+                        f"Output '{name}' is declared as type 'file' and must be a "
+                        f"single string path, got {type(value).__name__}"
+                    )
+                full_path = self.project_root / value
+                if not full_path.exists():
+                    raise ToolError(f"Output '{name}': file not found at '{value}'")
+
+            elif spec.type == "files":
+                if not isinstance(value, list):
+                    raise ToolError(
+                        f"Output '{name}' is declared as type 'files' and must be a "
+                        f"list of paths, got {type(value).__name__}"
+                    )
+                for path in value:
+                    if not isinstance(path, str):
+                        raise ToolError(
+                            f"Output '{name}': all paths must be strings, got {type(path).__name__}"
+                        )
+                    full_path = self.project_root / path
+                    if not full_path.exists():
+                        raise ToolError(f"Output '{name}': file not found at '{path}'")
+
+    @staticmethod
+    def _build_expected_outputs(outputs: list[OutputSpec]) -> list[ExpectedOutput]:
+        """Build ExpectedOutput list from OutputSpec list."""
+        syntax_map = {
+            "file": "filepath",
+            "files": "array of filepaths for all individual files",
+        }
+        return [
+            ExpectedOutput(
+                name=out.name,
+                type=out.type,
+                description=out.description,
+                required=out.required,
+                syntax_for_finished_step_tool=syntax_map.get(out.type, out.type),
+            )
+            for out in outputs
+        ]
+
+    # =========================================================================
+    # Tool Implementations
+    # =========================================================================
+
+    def get_workflows(self) -> GetWorkflowsResponse:
+        """List all available workflows.
+
+        Returns:
+            GetWorkflowsResponse with all jobs and their workflows
+        """
+        jobs = self._load_all_jobs()
+        job_infos = [self._job_to_info(job) for job in jobs]
+
+        return GetWorkflowsResponse(jobs=job_infos)
+
+    async def start_workflow(self, input_data: StartWorkflowInput) -> StartWorkflowResponse:
+        """Start a new workflow session.
+
+        Args:
+            input_data: StartWorkflowInput with goal, job_name, workflow_name
+
+        Returns:
+            StartWorkflowResponse with session ID, branch, and first step
+
+        Raises:
+            ToolError: If job or workflow not found
+        """
+        # Load job and workflow
+        job = self._get_job(input_data.job_name)
+        workflow = self._get_workflow(job, input_data.workflow_name)
+
+        if not workflow.steps:
+            raise ToolError(f"Workflow '{workflow.name}' has no steps")
+
+        first_step_id = workflow.steps[0]
+        first_step = job.get_step(first_step_id)
+        if first_step is None:
+            raise ToolError(f"First step not found: {first_step_id}")
+
+        # Create session (use resolved workflow name in case it was auto-selected)
+        session = await self.state_manager.create_session(
+            job_name=input_data.job_name,
+            workflow_name=workflow.name,
+            goal=input_data.goal,
+            first_step_id=first_step_id,
+            instance_id=input_data.instance_id,
+        )
+
+        # Mark first step as started
+        await self.state_manager.start_step(first_step_id)
+
+        # Get step instructions
+        instructions = self._get_step_instructions(job, first_step_id)
+
+        # Get expected outputs
+        step_outputs = self._build_expected_outputs(first_step.outputs)
+
+        return StartWorkflowResponse(
+            begin_step=ActiveStepInfo(
+                session_id=session.session_id,
+                branch_name=session.branch_name,
+                step_id=first_step_id,
+                step_expected_outputs=step_outputs,
+                step_reviews=[
+                    ReviewInfo(
+                        run_each=r.run_each,
+                        quality_criteria=r.quality_criteria,
+                        additional_review_guidance=r.additional_review_guidance,
+                    )
+                    for r in first_step.reviews
+                ],
+                step_instructions=instructions,
+            ),
+            stack=self.state_manager.get_stack(),
+        )
+
+    async def finished_step(self, input_data: FinishedStepInput) -> FinishedStepResponse:
+        """Report step completion and get next instructions.
+
+        Args:
+            input_data: FinishedStepInput with outputs and optional notes
+
+        Returns:
+            FinishedStepResponse with status and next step or completion
+
+        Raises:
+            StateError: If no active session
+            ToolError: If quality gate fails after max attempts
+        """
+        session = self.state_manager._resolve_session(input_data.session_id)
+        sid = session.session_id
+        current_step_id = session.current_step_id
+
+        # Load job and workflow
+        job = self._get_job(session.job_name)
+        workflow = self._get_workflow(job, session.workflow_name)
+        current_step = job.get_step(current_step_id)
+
+        if current_step is None:
+            raise ToolError(f"Current step not found: {current_step_id}")
+
+        # Validate outputs against step's declared output specs
+        self._validate_outputs(input_data.outputs, current_step.outputs)
+
+        # Run quality gate if available and step has reviews (unless overridden)
+        if (
+            self.quality_gate
+            and current_step.reviews
+            and not input_data.quality_review_override_reason
+        ):
+            attempts = await self.state_manager.record_quality_attempt(
+                current_step_id, session_id=sid
+            )
+
+            # Build output specs map for evaluate_reviews
+            output_specs = {out.name: out.type for out in current_step.outputs}
+
+            failed_reviews = await self.quality_gate.evaluate_reviews(
+                reviews=[
+                    {
+                        "run_each": r.run_each,
+                        "quality_criteria": r.quality_criteria,
+                        "additional_review_guidance": r.additional_review_guidance,
+                    }
+                    for r in current_step.reviews
+                ],
+                outputs=input_data.outputs,
+                output_specs=output_specs,
+                project_root=self.project_root,
+                notes=input_data.notes,
+            )
+
+            if failed_reviews:
+                # Check max attempts
+                if attempts >= self.max_quality_attempts:
+                    feedback_parts = [r.feedback for r in failed_reviews]
+                    raise ToolError(
+                        f"Quality gate failed after {self.max_quality_attempts} attempts. "
+                        f"Feedback: {'; '.join(feedback_parts)}"
+                    )
+
+                # Return needs_work status
+                combined_feedback = "; ".join(r.feedback for r in failed_reviews)
+                return FinishedStepResponse(
+                    status=StepStatus.NEEDS_WORK,
+                    feedback=combined_feedback,
+                    failed_reviews=failed_reviews,
+                    stack=self.state_manager.get_stack(),
+                )
+
+        # Mark step as completed
+        await self.state_manager.complete_step(
+            step_id=current_step_id,
+            outputs=input_data.outputs,
+            notes=input_data.notes,
+            session_id=sid,
+        )
+
+        # Find next step
+        current_entry_index = session.current_entry_index
+        next_entry_index = current_entry_index + 1
+
+        if next_entry_index >= len(workflow.step_entries):
+            # Workflow complete - get outputs before completing (which removes from stack)
+            all_outputs = self.state_manager.get_all_outputs(session_id=sid)
+            await self.state_manager.complete_workflow(session_id=sid)
+
+            return FinishedStepResponse(
+                status=StepStatus.WORKFLOW_COMPLETE,
+                summary=f"Workflow '{workflow.name}' completed successfully!",
+                all_outputs=all_outputs,
+                stack=self.state_manager.get_stack(),
+            )
+
+        # Get next step
+        next_entry = workflow.step_entries[next_entry_index]
+
+        # For concurrent entries, we use the first step as the "current"
+        # The agent will handle running them in parallel via Task tool
+        next_step_id = next_entry.step_ids[0]
+        next_step = job.get_step(next_step_id)
+
+        if next_step is None:
+            raise ToolError(f"Next step not found: {next_step_id}")
+
+        # Advance session
+        await self.state_manager.advance_to_step(next_step_id, next_entry_index, session_id=sid)
+        await self.state_manager.start_step(next_step_id, session_id=sid)
+
+        # Get instructions
+        instructions = self._get_step_instructions(job, next_step_id)
+        step_outputs = self._build_expected_outputs(next_step.outputs)
+
+        # Add info about concurrent steps if this is a concurrent entry
+        if next_entry.is_concurrent and len(next_entry.step_ids) > 1:
+            concurrent_info = (
+                f"\n\n**CONCURRENT STEPS**: This entry has {len(next_entry.step_ids)} "
+                f"steps that can run in parallel: {', '.join(next_entry.step_ids)}\n"
+                f"Use the Task tool to execute them concurrently."
+            )
+            instructions = instructions + concurrent_info
+
+        # Reload session to get current state after advance
+        session = self.state_manager._resolve_session(sid)
+
+        return FinishedStepResponse(
+            status=StepStatus.NEXT_STEP,
+            begin_step=ActiveStepInfo(
+                session_id=session.session_id,
+                branch_name=session.branch_name,
+                step_id=next_step_id,
+                step_expected_outputs=step_outputs,
+                step_reviews=[
+                    ReviewInfo(
+                        run_each=r.run_each,
+                        quality_criteria=r.quality_criteria,
+                        additional_review_guidance=r.additional_review_guidance,
+                    )
+                    for r in next_step.reviews
+                ],
+                step_instructions=instructions,
+            ),
+            stack=self.state_manager.get_stack(),
+        )
+
+    async def abort_workflow(self, input_data: AbortWorkflowInput) -> AbortWorkflowResponse:
+        """Abort the current workflow and return to the previous one.
+
+        Args:
+            input_data: AbortWorkflowInput with explanation
+
+        Returns:
+            AbortWorkflowResponse with abort info and new stack state
+
+        Raises:
+            StateError: If no active session
+        """
+        aborted_session, new_active = await self.state_manager.abort_workflow(
+            input_data.explanation, session_id=input_data.session_id
+        )
+
+        return AbortWorkflowResponse(
+            aborted_workflow=f"{aborted_session.job_name}/{aborted_session.workflow_name}",
+            aborted_step=aborted_session.current_step_id,
+            explanation=input_data.explanation,
+            stack=self.state_manager.get_stack(),
+            resumed_workflow=(
+                f"{new_active.job_name}/{new_active.workflow_name}" if new_active else None
+            ),
+            resumed_step=new_active.current_step_id if new_active else None,
+        )
diff --git a/src/deepwork/schemas/job.schema.json b/src/deepwork/schemas/job.schema.json
new file mode 100644
index 00000000..4f2227cc
--- /dev/null
+++ b/src/deepwork/schemas/job.schema.json
@@ -0,0 +1,404 @@
+{
+  "$schema": "http://json-schema.org/draft-07/schema#",
+  "$id": "https://deepwork.dev/schemas/job.schema.json",
+  "title": "DeepWork Job Definition",
+  "description": "Schema for DeepWork job.yml files. Jobs are multi-step workflows executed by AI agents.",
+  "type": "object",
+  "required": [
+    "name",
+    "version",
+    "summary",
+    "steps"
+  ],
+  "additionalProperties": false,
+  "properties": {
+    "name": {
+      "type": "string",
+      "pattern": "^[a-z][a-z0-9_]*$",
+      "description": "Job name (lowercase letters, numbers, underscores, must start with letter). Example: 'competitive_research'"
+    },
+    "version": {
+      "type": "string",
+      "pattern": "^\\d+\\.\\d+\\.\\d+$",
+      "description": "Semantic version (e.g., '1.0.0')"
+    },
+    "summary": {
+      "type": "string",
+      "minLength": 1,
+      "maxLength": 200,
+      "description": "Brief one-line summary of what this job accomplishes. Used in skill descriptions."
+    },
+    "description": {
+      "type": "string",
+      "minLength": 1,
+      "description": "Detailed multi-line description of the job's purpose, process, and goals"
+    },
+    "workflows": {
+      "type": "array",
+      "description": "Named workflows that group steps into multi-step sequences. Workflows define execution order.",
+      "items": {
+        "$ref": "#/$defs/workflow"
+      }
+    },
+    "changelog": {
+      "type": "array",
+      "description": "Version history documenting changes to the job definition",
+      "items": {
+        "$ref": "#/$defs/changelogEntry"
+      }
+    },
+    "steps": {
+      "type": "array",
+      "minItems": 1,
+      "description": "List of steps in the job. Each step becomes a skill/command.",
+      "items": {
+        "$ref": "#/$defs/step"
+      }
+    }
+  },
+  "$defs": {
+    "stepId": {
+      "type": "string",
+      "pattern": "^[a-z][a-z0-9_]*$",
+      "description": "Step identifier (lowercase letters, numbers, underscores, must start with letter)"
+    },
+    "workflow": {
+      "type": "object",
+      "required": [
+        "name",
+        "summary",
+        "steps"
+      ],
+      "additionalProperties": false,
+      "description": "A named workflow grouping steps into a sequence",
+      "properties": {
+        "name": {
+          "type": "string",
+          "pattern": "^[a-z][a-z0-9_]*$",
+          "description": "Workflow name (lowercase letters, numbers, underscores)"
+        },
+        "summary": {
+          "type": "string",
+          "minLength": 1,
+          "maxLength": 200,
+          "description": "Brief one-line summary of what this workflow accomplishes"
+        },
+        "steps": {
+          "type": "array",
+          "minItems": 1,
+          "description": "Ordered list of step entries. Each entry is either a step ID (string) or an array of step IDs for concurrent execution.",
+          "items": {
+            "$ref": "#/$defs/workflowStepEntry"
+          }
+        }
+      }
+    },
+    "workflowStepEntry": {
+      "oneOf": [
+        {
+          "$ref": "#/$defs/stepId"
+        },
+        {
+          "type": "array",
+          "minItems": 1,
+          "description": "Array of step IDs that can be executed concurrently",
+          "items": {
+            "$ref": "#/$defs/stepId"
+          }
+        }
+      ]
+    },
+    "changelogEntry": {
+      "type": "object",
+      "required": [
+        "version",
+        "changes"
+      ],
+      "additionalProperties": false,
+      "properties": {
+        "version": {
+          "type": "string",
+          "pattern": "^\\d+\\.\\d+\\.\\d+$",
+          "description": "Version number for this change"
+        },
+        "changes": {
+          "type": "string",
+          "minLength": 1,
+          "description": "Description of changes made in this version"
+        }
+      }
+    },
+    "step": {
+      "type": "object",
+      "required": [
+        "id",
+        "name",
+        "description",
+        "instructions_file",
+        "outputs",
+        "reviews"
+      ],
+      "additionalProperties": false,
+      "description": "A single Step in a job, representing one material unit of work with evaluatable outputs",
+      "properties": {
+        "id": {
+          "$ref": "#/$defs/stepId",
+          "description": "Unique step identifier within this job"
+        },
+        "name": {
+          "type": "string",
+          "minLength": 1,
+          "description": "Human-readable display name for the step"
+        },
+        "description": {
+          "type": "string",
+          "minLength": 1,
+          "description": "Description of what this step does. Used in skill descriptions."
+        },
+        "instructions_file": {
+          "type": "string",
+          "minLength": 1,
+          "description": "Path to instructions markdown file (relative to job directory). Example: 'steps/research.md'"
+        },
+        "inputs": {
+          "type": "array",
+          "description": "List of inputs required by this step (user parameters or files from previous steps)",
+          "items": {
+            "$ref": "#/$defs/stepInput"
+          }
+        },
+        "outputs": {
+          "type": "object",
+          "description": "Named outputs produced by this step. Keys are output identifiers, values describe type and purpose. May be empty for cleanup or validation steps.",
+          "additionalProperties": {
+            "$ref": "#/$defs/stepOutput"
+          }
+        },
+        "dependencies": {
+          "type": "array",
+          "description": "List of step IDs this step depends on. Dependencies must complete before this step runs.",
+          "items": {
+            "type": "string"
+          },
+          "default": []
+        },
+        "hooks": {
+          "$ref": "#/$defs/hooks",
+          "description": "Lifecycle hooks for validation and actions at different points in step execution"
+        },
+        "stop_hooks": {
+          "type": "array",
+          "description": "DEPRECATED: Use hooks.after_agent instead. Legacy stop hooks for quality validation loops.",
+          "items": {
+            "$ref": "#/$defs/hookAction"
+          }
+        },
+        "exposed": {
+          "type": "boolean",
+          "description": "If true, step is user-invocable in menus/commands. If false, step is hidden (only reachable via workflows or dependencies). Default: false",
+          "default": false
+        },
+        "hidden": {
+          "type": "boolean",
+          "description": "If true, step is hidden from menus. Alias for exposed: false. Default: false",
+          "default": false
+        },
+        "reviews": {
+          "type": "array",
+          "description": "Quality reviews to run when step completes. Can be empty.",
+          "items": {
+            "$ref": "#/$defs/review"
+          }
+        },
+        "agent": {
+          "type": "string",
+          "minLength": 1,
+          "description": "Agent type for this step (e.g., 'general-purpose'). When set, the skill uses context forking and delegates to the specified agent type."
+        }
+      }
+    },
+    "stepInput": {
+      "oneOf": [
+        {
+          "$ref": "#/$defs/userParameterInput"
+        },
+        {
+          "$ref": "#/$defs/fileInput"
+        }
+      ]
+    },
+    "userParameterInput": {
+      "type": "object",
+      "required": [
+        "name",
+        "description"
+      ],
+      "additionalProperties": false,
+      "description": "A user-provided parameter input that will be requested at runtime",
+      "properties": {
+        "name": {
+          "type": "string",
+          "minLength": 1,
+          "description": "Parameter name (used as variable name)"
+        },
+        "description": {
+          "type": "string",
+          "minLength": 1,
+          "description": "Description shown to user when requesting this input"
+        }
+      }
+    },
+    "fileInput": {
+      "type": "object",
+      "required": [
+        "file",
+        "from_step"
+      ],
+      "additionalProperties": false,
+      "description": "A file input from a previous step's output",
+      "properties": {
+        "file": {
+          "type": "string",
+          "minLength": 1,
+          "description": "File name to consume from the source step's outputs"
+        },
+        "from_step": {
+          "type": "string",
+          "minLength": 1,
+          "description": "Step ID that produces this file. Must be in the dependencies list."
+        }
+      }
+    },
+    "stepOutput": {
+      "type": "object",
+      "required": [
+        "type",
+        "description",
+        "required"
+      ],
+      "additionalProperties": false,
+      "description": "Output specification with type information indicating single file or multiple files",
+      "properties": {
+        "type": {
+          "type": "string",
+          "enum": [
+            "file",
+            "files"
+          ],
+          "description": "Whether this output is a single file ('file') or multiple files ('files')"
+        },
+        "description": {
+          "type": "string",
+          "minLength": 1,
+          "description": "Description of what this output contains"
+        },
+        "required": {
+          "type": "boolean",
+          "description": "Whether this output must be provided when calling finished_step. If false, the output is optional and can be omitted."
+        }
+      }
+    },
+    "hooks": {
+      "type": "object",
+      "additionalProperties": false,
+      "description": "Lifecycle hooks triggered at different points in step execution",
+      "properties": {
+        "after_agent": {
+          "type": "array",
+          "description": "Hooks triggered after the agent finishes. Used for quality validation loops.",
+          "items": {
+            "$ref": "#/$defs/hookAction"
+          }
+        },
+        "before_tool": {
+          "type": "array",
+          "description": "Hooks triggered before a tool is used. Used for pre-action checks.",
+          "items": {
+            "$ref": "#/$defs/hookAction"
+          }
+        },
+        "before_prompt": {
+          "type": "array",
+          "description": "Hooks triggered when user submits a prompt. Used for input validation.",
+          "items": {
+            "$ref": "#/$defs/hookAction"
+          }
+        }
+      }
+    },
+    "hookAction": {
+      "type": "object",
+      "description": "A hook action - exactly one of: prompt (inline text), prompt_file (external file), or script (shell script)",
+      "oneOf": [
+        {
+          "required": [
+            "prompt"
+          ],
+          "additionalProperties": false,
+          "properties": {
+            "prompt": {
+              "type": "string",
+              "minLength": 1,
+              "description": "Inline prompt text for validation/action"
+            }
+          }
+        },
+        {
+          "required": [
+            "prompt_file"
+          ],
+          "additionalProperties": false,
+          "properties": {
+            "prompt_file": {
+              "type": "string",
+              "minLength": 1,
+              "description": "Path to prompt file (relative to job directory)"
+            }
+          }
+        },
+        {
+          "required": [
+            "script"
+          ],
+          "additionalProperties": false,
+          "properties": {
+            "script": {
+              "type": "string",
+              "minLength": 1,
+              "description": "Path to shell script (relative to job directory)"
+            }
+          }
+        }
+      ]
+    },
+    "review": {
+      "type": "object",
+      "required": [
+        "run_each",
+        "quality_criteria"
+      ],
+      "additionalProperties": false,
+      "description": "A quality review that evaluates step outputs against criteria",
+      "properties": {
+        "run_each": {
+          "type": "string",
+          "minLength": 1,
+          "description": "Either 'step' to review all outputs together, or the name of a specific output to review individually"
+        },
+        "quality_criteria": {
+          "type": "object",
+          "description": "Map of criterion name to criterion question",
+          "additionalProperties": {
+            "type": "string",
+            "minLength": 1
+          },
+          "minProperties": 1
+        },
+        "additional_review_guidance": {
+          "type": "string",
+          "description": "Optional guidance for the reviewer about what context to look at (e.g., 'Look at the job.yml file for context'). Replaces automatic inclusion of input file contents."
+        }
+      }
+    }
+  }
+}
\ No newline at end of file
diff --git a/src/deepwork/schemas/job_schema.py b/src/deepwork/schemas/job_schema.py
index e29b852c..c3c0cb57 100644
--- a/src/deepwork/schemas/job_schema.py
+++ b/src/deepwork/schemas/job_schema.py
@@ -1,307 +1,35 @@
-"""JSON Schema definition for job definitions."""
+"""JSON Schema loader for job definitions.
 
+This module loads the job.schema.json file and provides it as a Python dict
+for use with jsonschema validation.
+"""
+
+import json
+from pathlib import Path
 from typing import Any
 
 # Supported lifecycle hook events (generic names, mapped to platform-specific by adapters)
 # These values must match SkillLifecycleHook enum in adapters.py
 LIFECYCLE_HOOK_EVENTS = ["after_agent", "before_tool", "before_prompt"]
 
-# Schema definition for a single hook action (prompt, prompt_file, or script)
-HOOK_ACTION_SCHEMA: dict[str, Any] = {
-    "type": "object",
-    "oneOf": [
-        {
-            "required": ["prompt"],
-            "properties": {
-                "prompt": {
-                    "type": "string",
-                    "minLength": 1,
-                    "description": "Inline prompt for validation/action",
-                },
-            },
-            "additionalProperties": False,
-        },
-        {
-            "required": ["prompt_file"],
-            "properties": {
-                "prompt_file": {
-                    "type": "string",
-                    "minLength": 1,
-                    "description": "Path to prompt file (relative to job directory)",
-                },
-            },
-            "additionalProperties": False,
-        },
-        {
-            "required": ["script"],
-            "properties": {
-                "script": {
-                    "type": "string",
-                    "minLength": 1,
-                    "description": "Path to shell script (relative to job directory)",
-                },
-            },
-            "additionalProperties": False,
-        },
-    ],
-}
+# Path to the JSON schema file
+_SCHEMA_FILE = Path(__file__).parent / "job.schema.json"
+
+
+def _load_schema() -> dict[str, Any]:
+    """Load the JSON schema from file."""
+    with open(_SCHEMA_FILE) as f:
+        return json.load(f)
 
-# Schema for a single step reference (step ID)
-STEP_ID_SCHEMA: dict[str, Any] = {
-    "type": "string",
-    "pattern": "^[a-z][a-z0-9_]*$",
-}
 
-# Schema for a concurrent step group (array of step IDs that can run in parallel)
-# minItems=1 allows single-item arrays to indicate a step with multiple parallel instances
-# (e.g., [fetch_campaign_data] means run this step for each campaign in parallel)
-CONCURRENT_STEPS_SCHEMA: dict[str, Any] = {
-    "type": "array",
-    "minItems": 1,
-    "description": "Array of step IDs that can be executed concurrently, or single step with multiple instances",
-    "items": STEP_ID_SCHEMA,
-}
+# Load the schema at module import time
+JOB_SCHEMA: dict[str, Any] = _load_schema()
 
-# Schema for a workflow step entry (either single step or concurrent group)
-WORKFLOW_STEP_ENTRY_SCHEMA: dict[str, Any] = {
-    "oneOf": [
-        STEP_ID_SCHEMA,
-        CONCURRENT_STEPS_SCHEMA,
-    ],
-}
 
-# Schema for a workflow definition
-WORKFLOW_SCHEMA: dict[str, Any] = {
-    "type": "object",
-    "required": ["name", "summary", "steps"],
-    "properties": {
-        "name": {
-            "type": "string",
-            "pattern": "^[a-z][a-z0-9_]*$",
-            "description": "Workflow name (lowercase letters, numbers, underscores)",
-        },
-        "summary": {
-            "type": "string",
-            "minLength": 1,
-            "maxLength": 200,
-            "description": "Brief one-line summary of what this workflow accomplishes",
-        },
-        "steps": {
-            "type": "array",
-            "minItems": 1,
-            "description": "Ordered list of step entries. Each entry is either a step ID (string) or an array of step IDs for concurrent execution.",
-            "items": WORKFLOW_STEP_ENTRY_SCHEMA,
-        },
-    },
-    "additionalProperties": False,
-}
+def get_schema_path() -> Path:
+    """Get the path to the JSON schema file.
 
-# JSON Schema for job.yml files
-JOB_SCHEMA: dict[str, Any] = {
-    "$schema": "http://json-schema.org/draft-07/schema#",
-    "type": "object",
-    "required": ["name", "version", "summary", "steps"],
-    "properties": {
-        "name": {
-            "type": "string",
-            "pattern": "^[a-z][a-z0-9_]*$",
-            "description": "Job name (lowercase letters, numbers, underscores, must start with letter)",
-        },
-        "version": {
-            "type": "string",
-            "pattern": r"^\d+\.\d+\.\d+$",
-            "description": "Semantic version (e.g., 1.0.0)",
-        },
-        "summary": {
-            "type": "string",
-            "minLength": 1,
-            "maxLength": 200,
-            "description": "Brief one-line summary of what this job accomplishes",
-        },
-        "description": {
-            "type": "string",
-            "minLength": 1,
-            "description": "Detailed multi-line description of the job's purpose, process, and goals",
-        },
-        "workflows": {
-            "type": "array",
-            "description": "Named workflows that group steps into multi-step sequences",
-            "items": WORKFLOW_SCHEMA,
-        },
-        "changelog": {
-            "type": "array",
-            "description": "Version history and changes to the job",
-            "items": {
-                "type": "object",
-                "required": ["version", "changes"],
-                "properties": {
-                    "version": {
-                        "type": "string",
-                        "pattern": r"^\d+\.\d+\.\d+$",
-                        "description": "Version number for this change",
-                    },
-                    "changes": {
-                        "type": "string",
-                        "minLength": 1,
-                        "description": "Description of changes made in this version",
-                    },
-                },
-                "additionalProperties": False,
-            },
-        },
-        "steps": {
-            "type": "array",
-            "minItems": 1,
-            "description": "List of steps in the job",
-            "items": {
-                "type": "object",
-                "required": ["id", "name", "description", "instructions_file", "outputs"],
-                "properties": {
-                    "id": {
-                        "type": "string",
-                        "pattern": "^[a-z][a-z0-9_]*$",
-                        "description": "Step ID (unique within job)",
-                    },
-                    "name": {
-                        "type": "string",
-                        "minLength": 1,
-                        "description": "Human-readable step name",
-                    },
-                    "description": {
-                        "type": "string",
-                        "minLength": 1,
-                        "description": "Step description",
-                    },
-                    "instructions_file": {
-                        "type": "string",
-                        "minLength": 1,
-                        "description": "Path to instructions file (relative to job directory)",
-                    },
-                    "inputs": {
-                        "type": "array",
-                        "description": "List of inputs (user parameters or files from previous steps)",
-                        "items": {
-                            "type": "object",
-                            "oneOf": [
-                                {
-                                    "required": ["name", "description"],
-                                    "properties": {
-                                        "name": {
-                                            "type": "string",
-                                            "description": "Input parameter name",
-                                        },
-                                        "description": {
-                                            "type": "string",
-                                            "description": "Input parameter description",
-                                        },
-                                    },
-                                    "additionalProperties": False,
-                                },
-                                {
-                                    "required": ["file", "from_step"],
-                                    "properties": {
-                                        "file": {
-                                            "type": "string",
-                                            "description": "File name from previous step",
-                                        },
-                                        "from_step": {
-                                            "type": "string",
-                                            "description": "Step ID that produces this file",
-                                        },
-                                    },
-                                    "additionalProperties": False,
-                                },
-                            ],
-                        },
-                    },
-                    "outputs": {
-                        "type": "array",
-                        "description": "List of output files/directories, optionally with document type references",
-                        "items": {
-                            "oneOf": [
-                                {
-                                    "type": "string",
-                                    "minLength": 1,
-                                    "description": "Simple output file path (backward compatible)",
-                                },
-                                {
-                                    "type": "object",
-                                    "required": ["file"],
-                                    "properties": {
-                                        "file": {
-                                            "type": "string",
-                                            "minLength": 1,
-                                            "description": "Output file path",
-                                        },
-                                        "doc_spec": {
-                                            "type": "string",
-                                            "pattern": r"^\.deepwork/doc_specs/[a-z][a-z0-9_-]*\.md$",
-                                            "description": "Path to doc spec file",
-                                        },
-                                    },
-                                    "additionalProperties": False,
-                                },
-                            ],
-                        },
-                    },
-                    "dependencies": {
-                        "type": "array",
-                        "description": "List of step IDs this step depends on",
-                        "items": {
-                            "type": "string",
-                        },
-                        "default": [],
-                    },
-                    "hooks": {
-                        "type": "object",
-                        "description": "Lifecycle hooks for this step, keyed by event type",
-                        "properties": {
-                            "after_agent": {
-                                "type": "array",
-                                "description": "Hooks triggered after the agent finishes (quality validation)",
-                                "items": HOOK_ACTION_SCHEMA,
-                            },
-                            "before_tool": {
-                                "type": "array",
-                                "description": "Hooks triggered before a tool is used",
-                                "items": HOOK_ACTION_SCHEMA,
-                            },
-                            "before_prompt": {
-                                "type": "array",
-                                "description": "Hooks triggered when user submits a prompt",
-                                "items": HOOK_ACTION_SCHEMA,
-                            },
-                        },
-                        "additionalProperties": False,
-                    },
-                    # DEPRECATED: Use hooks.after_agent instead
-                    "stop_hooks": {
-                        "type": "array",
-                        "description": "DEPRECATED: Use hooks.after_agent instead. Stop hooks for quality validation loops.",
-                        "items": HOOK_ACTION_SCHEMA,
-                    },
-                    "exposed": {
-                        "type": "boolean",
-                        "description": "If true, skill is user-invocable in menus. Default: false (hidden from menus).",
-                        "default": False,
-                    },
-                    "quality_criteria": {
-                        "type": "array",
-                        "description": "Declarative quality criteria. Rendered with standard evaluation framing.",
-                        "items": {
-                            "type": "string",
-                            "minLength": 1,
-                        },
-                    },
-                    "agent": {
-                        "type": "string",
-                        "description": "Agent type for this step. When set, the skill uses context: fork and delegates to the specified agent (e.g., 'general-purpose').",
-                        "minLength": 1,
-                    },
-                },
-                "additionalProperties": False,
-            },
-        },
-    },
-    "additionalProperties": False,
-}
+    Returns:
+        Path to job.schema.json
+    """
+    return _SCHEMA_FILE
diff --git a/src/deepwork/schemas/rules_schema.py b/src/deepwork/schemas/rules_schema.py
deleted file mode 100644
index bf091ab9..00000000
--- a/src/deepwork/schemas/rules_schema.py
+++ /dev/null
@@ -1,135 +0,0 @@
-"""JSON Schema definition for rule definitions (v2 - frontmatter format)."""
-
-from typing import Any
-
-# Pattern for string or array of strings
-STRING_OR_ARRAY: dict[str, Any] = {
-    "oneOf": [
-        {"type": "string", "minLength": 1},
-        {"type": "array", "items": {"type": "string", "minLength": 1}, "minItems": 1},
-    ]
-}
-
-# JSON Schema for rule frontmatter (YAML between --- delimiters)
-# Rules are stored as individual .md files in .deepwork/rules/
-RULES_FRONTMATTER_SCHEMA: dict[str, Any] = {
-    "$schema": "http://json-schema.org/draft-07/schema#",
-    "type": "object",
-    "required": ["name", "compare_to"],
-    "properties": {
-        "name": {
-            "type": "string",
-            "minLength": 1,
-            "description": "Human-friendly name for the rule (displayed in promise tags)",
-        },
-        # Detection mode: trigger/safety (mutually exclusive with set/pair)
-        "trigger": {
-            **STRING_OR_ARRAY,
-            "description": "Glob pattern(s) for files that trigger this rule",
-        },
-        "safety": {
-            **STRING_OR_ARRAY,
-            "description": "Glob pattern(s) that suppress the rule if changed",
-        },
-        # Detection mode: set (bidirectional correspondence)
-        "set": {
-            "type": "array",
-            "items": {"type": "string", "minLength": 1},
-            "minItems": 2,
-            "description": "Patterns defining bidirectional file correspondence",
-        },
-        # Detection mode: pair (directional correspondence)
-        "pair": {
-            "type": "object",
-            "required": ["trigger", "expects"],
-            "properties": {
-                "trigger": {
-                    "type": "string",
-                    "minLength": 1,
-                    "description": "Pattern that triggers the rule",
-                },
-                "expects": {
-                    **STRING_OR_ARRAY,
-                    "description": "Pattern(s) for expected corresponding files",
-                },
-            },
-            "additionalProperties": False,
-            "description": "Directional file correspondence (trigger -> expects)",
-        },
-        # Detection mode: created (fire when files are created matching patterns)
-        "created": {
-            **STRING_OR_ARRAY,
-            "description": "Glob pattern(s) for newly created files that trigger this rule",
-        },
-        # Action type: command (default is prompt using markdown body)
-        "action": {
-            "type": "object",
-            "required": ["command"],
-            "properties": {
-                "command": {
-                    "type": "string",
-                    "minLength": 1,
-                    "description": "Command to run (supports {file}, {files}, {repo_root})",
-                },
-                "run_for": {
-                    "type": "string",
-                    "enum": ["each_match", "all_matches"],
-                    "default": "each_match",
-                    "description": "Run command for each file or all files at once",
-                },
-            },
-            "additionalProperties": False,
-            "description": "Command action to run instead of prompting",
-        },
-        # Common options
-        "compare_to": {
-            "type": "string",
-            "enum": ["base", "default_tip", "prompt"],
-            "description": "Baseline for detecting file changes",
-        },
-    },
-    "additionalProperties": False,
-    # Detection mode must be exactly one of: trigger, set, pair, or created
-    "oneOf": [
-        {
-            "required": ["trigger"],
-            "not": {
-                "anyOf": [
-                    {"required": ["set"]},
-                    {"required": ["pair"]},
-                    {"required": ["created"]},
-                ]
-            },
-        },
-        {
-            "required": ["set"],
-            "not": {
-                "anyOf": [
-                    {"required": ["trigger"]},
-                    {"required": ["pair"]},
-                    {"required": ["created"]},
-                ]
-            },
-        },
-        {
-            "required": ["pair"],
-            "not": {
-                "anyOf": [
-                    {"required": ["trigger"]},
-                    {"required": ["set"]},
-                    {"required": ["created"]},
-                ]
-            },
-        },
-        {
-            "required": ["created"],
-            "not": {
-                "anyOf": [
-                    {"required": ["trigger"]},
-                    {"required": ["set"]},
-                    {"required": ["pair"]},
-                ]
-            },
-        },
-    ],
-}
diff --git a/src/deepwork/standard_jobs/deepwork_jobs/AGENTS.md b/src/deepwork/standard_jobs/deepwork_jobs/AGENTS.md
index 6d97d0e5..576c1185 100644
--- a/src/deepwork/standard_jobs/deepwork_jobs/AGENTS.md
+++ b/src/deepwork/standard_jobs/deepwork_jobs/AGENTS.md
@@ -18,9 +18,9 @@ This is the source of truth for the `deepwork_jobs` standard job.
 
 2. **Working copy**: `.deepwork/jobs/deepwork_jobs/`
    - Must be updated after changes to source
-   - Used by `deepwork sync` to generate commands
+   - Used by the MCP server at runtime
 
-After making changes to the source, copy files to the working copy:
+After making changes to the source, run `deepwork install` or manually copy:
 ```bash
 cp src/deepwork/standard_jobs/deepwork_jobs/job.yml .deepwork/jobs/deepwork_jobs/
 cp src/deepwork/standard_jobs/deepwork_jobs/steps/*.md .deepwork/jobs/deepwork_jobs/steps/
@@ -37,6 +37,8 @@ deepwork_jobs/
 ├── steps/
 │   ├── define.md          # Define step instructions
 │   ├── implement.md       # Implement step instructions
+│   ├── test.md            # Test step instructions
+│   ├── iterate.md         # Iterate step instructions
 │   ├── learn.md           # Learn step instructions
 │   └── supplemental_file_references.md  # Reference documentation
 └── templates/
@@ -47,6 +49,33 @@ deepwork_jobs/
     └── step_instruction.md.example   # Complete step example
 ```
 
+## Quality Review Learnings
+
+These learnings come from running the `new_job` workflow to create the `github_outreach` job (2026-02-06).
+
+### Review Criteria Must Be Pragmatic
+
+The implement step's review criteria caused 6+ review iterations during the github_outreach job creation. Key problems and fixes:
+
+1. **"Ask Structured Questions" was applied to ALL steps** — even pure analysis/generation steps with no user input. Fixed in v1.4.0: criterion now auto-passes for steps that only have file inputs from prior steps (no name/description user inputs).
+
+2. **"Output Examples" was too strict** — demanded concrete filled-in examples in every step file, even when a template structure with `[bracket placeholders]` was sufficient. Fixed in v1.4.0: renamed to "Output Format Examples" and accepts templates. Concrete examples are encouraged but not required.
+
+3. **Contradictory review results** — In one case, all 6 individual criteria passed but the overall review still returned `needs_work`. This appears to be a reviewer model issue where the summary contradicts the per-criterion assessments. Added `additional_review_guidance` to clarify when criteria should auto-pass.
+
+### Quality Review Timeouts on Large Outputs
+
+Steps producing many files (25 analysis files) or very long files (700+ line playbook) exceeded the 120-second MCP timeout during quality review. The `quality_review_override_reason` parameter was needed to bypass these.
+
+Mitigation strategies documented in `define.md`:
+- Use `run_each: step` instead of `run_each: <files_output>` for steps with many files
+- Keep review criteria efficient to evaluate
+- Note expected output volume in step descriptions
+
+### Dependency Validation Gaps
+
+The github_outreach `final_report` step had `analyze_repos` as a file input but was missing it from the `dependencies` list. This was caught at workflow start time but could have been caught earlier during the `implement` step. The define step's validation rules already mention this (`from_step must be in dependencies`) but it was missed during creation.
+
 ## Version Management
 
 - Version is tracked in `job.yml`
@@ -56,5 +85,5 @@ deepwork_jobs/
 
 ## Last Updated
 
-- Date: 2026-01-15
-- From conversation about: Adding make_new_job.sh script and templates directory
+- Date: 2026-02-06
+- From conversation about: Learn workflow analyzing severe quality review issues in the new_job execution
diff --git a/src/deepwork/standard_jobs/deepwork_jobs/doc_specs/job_spec.md b/src/deepwork/standard_jobs/deepwork_jobs/doc_specs/job_spec.md
deleted file mode 100644
index b880bb17..00000000
--- a/src/deepwork/standard_jobs/deepwork_jobs/doc_specs/job_spec.md
+++ /dev/null
@@ -1,203 +0,0 @@
----
-name: "DeepWork Job Specification"
-description: "YAML specification file that defines a multi-step workflow job for AI agents"
-path_patterns:
-  - ".deepwork/jobs/*/job.yml"
-target_audience: "AI agents executing jobs and developers defining workflows"
-frequency: "Created once per job, updated as workflow evolves"
-quality_criteria:
-  - name: Valid Identifier
-    description: "Job name must be lowercase with underscores, no spaces or special characters (e.g., `competitive_research`, `monthly_report`)"
-  - name: Semantic Version
-    description: "Version must follow semantic versioning format X.Y.Z (e.g., `1.0.0`, `2.1.3`)"
-  - name: Concise Summary
-    description: "Summary must be under 200 characters and clearly describe what the job accomplishes"
-  - name: Rich Description
-    description: "Description must be multi-line and explain: the problem solved, the process, expected outcomes, and target users"
-  - name: Changelog Present
-    description: "Must include a changelog array with at least the initial version entry. Changelog should only include one entry per branch at most"
-  - name: Complete Steps
-    description: "Each step must have: id (lowercase_underscores), name, description, instructions_file, outputs (at least one), and dependencies array"
-  - name: Valid Dependencies
-    description: "Dependencies must reference existing step IDs with no circular references"
-  - name: Input Consistency
-    description: "File inputs with `from_step` must reference a step that is in the dependencies array"
-  - name: Output Paths
-    description: "Outputs must be valid filenames or paths within the main repo directory structure, never in dot-directories like `.deepwork/`. Use specific, descriptive paths that lend themselves to glob patterns (e.g., `competitive_research/acme_corp/swot.md` or `operations/reports/2026-01/spending_analysis.md`). Parameterized paths like `[competitor_name]/` are encouraged for per-entity outputs. Avoid generic names (`output.md`, `analysis.md`) and transient-sounding paths (`temp/`, `draft.md`). Supporting materials for a final output should go in a peer `_dataroom` folder (e.g., `spending_analysis_dataroom/`)."
-  - name: Concise Instructions
-    description: "The content of the file, particularly the description, must not have excessively redundant information. It should be concise and to the point given that extra tokens will confuse the AI."
----
-
-# DeepWork Job Specification: [job_name]
-
-A `job.yml` file defines a complete multi-step workflow that AI agents can execute. Each job breaks down a complex task into reviewable steps with clear inputs and outputs.
-
-## Required Fields
-
-### Top-Level Metadata
-
-```yaml
-name: job_name                    # lowercase, underscores only
-version: "1.0.0"                  # semantic versioning
-summary: "Brief description"      # max 200 characters
-description: |                    # detailed multi-line explanation
-  [Explain what this workflow does, why it exists,
-  what outputs it produces, and who should use it]
-```
-
-### Changelog
-
-```yaml
-changelog:
-  - version: "1.0.0"
-    changes: "Initial job creation"
-  - version: "1.1.0"
-    changes: "Added quality validation hooks"
-```
-
-### Steps Array
-
-```yaml
-steps:
-  - id: step_id                   # unique, lowercase_underscores
-    name: "Human Readable Name"
-    description: "What this step accomplishes"
-    instructions_file: steps/step_id.md
-    inputs:
-      # User-provided inputs:
-      - name: param_name
-        description: "What the user provides"
-      # File inputs from previous steps:
-      - file: output.md
-        from_step: previous_step_id
-    outputs:
-      - competitive_research/competitors_list.md           # descriptive path
-      - competitive_research/[competitor_name]/research.md # parameterized path
-      # With doc spec reference:
-      - file: competitive_research/final_report.md
-        doc_spec: .deepwork/doc_specs/report_type.md
-    dependencies:
-      - previous_step_id          # steps that must complete first
-```
-
-## Optional Fields
-
-### Exposed Steps
-
-```yaml
-steps:
-  - id: learn
-    exposed: true                 # Makes step available without running dependencies
-```
-
-### Agent Delegation
-
-When a step should be executed by a specific agent type, use the `agent` field. This automatically sets `context: fork` in the generated skill.
-
-```yaml
-steps:
-  - id: research_step
-    agent: general-purpose        # Delegates to the general-purpose agent
-```
-
-Available agent types:
-- `general-purpose` - Standard agent for multi-step tasks
-
-### Quality Hooks
-
-```yaml
-steps:
-  - id: step_id
-    hooks:
-      after_agent:
-        # Inline prompt for quality validation:
-        - prompt: |
-            Verify the output meets criteria:
-            1. [Criterion 1]
-            2. [Criterion 2]
-            If ALL criteria are met, include `<promise>...</promise>`.
-        # External prompt file:
-        - prompt_file: hooks/quality_check.md
-        # Script for programmatic validation:
-        - script: hooks/run_tests.sh
-```
-
-### Stop Hooks (Legacy)
-
-```yaml
-steps:
-  - id: step_id
-    stop_hooks:
-      - prompt: "Validation prompt..."
-      - prompt_file: hooks/check.md
-      - script: hooks/validate.sh
-```
-
-## Validation Rules
-
-1. **No circular dependencies**: Step A cannot depend on Step B if Step B depends on Step A
-2. **File inputs require dependencies**: If a step uses `from_step: X`, then X must be in its dependencies
-3. **Unique step IDs**: No two steps can have the same id
-4. **Valid file paths**: Output paths must not contain invalid characters and should be in the main repo (not dot-directories)
-5. **Instructions files exist**: Each `instructions_file` path should have a corresponding file created
-
-## Example: Complete Job Specification
-
-```yaml
-name: competitive_research
-version: "1.0.0"
-summary: "Systematic competitive analysis workflow"
-description: |
-  A comprehensive workflow for analyzing competitors in your market segment.
-  Helps product teams understand the competitive landscape through systematic
-  identification, research, comparison, and positioning recommendations.
-
-  Produces:
-  - Vetted competitor list
-  - Research notes per competitor
-  - Comparison matrix
-  - Strategic positioning report
-
-changelog:
-  - version: "1.0.0"
-    changes: "Initial job creation"
-
-steps:
-  - id: identify_competitors
-    name: "Identify Competitors"
-    description: "Identify 5-7 key competitors in the target market"
-    instructions_file: steps/identify_competitors.md
-    inputs:
-      - name: market_segment
-        description: "The market segment to analyze"
-      - name: product_category
-        description: "The product category"
-    outputs:
-      - competitive_research/competitors_list.md
-    dependencies: []
-
-  - id: research_competitors
-    name: "Research Competitors"
-    description: "Deep dive research on each identified competitor"
-    instructions_file: steps/research_competitors.md
-    inputs:
-      - file: competitive_research/competitors_list.md
-        from_step: identify_competitors
-    outputs:
-      - competitive_research/[competitor_name]/research.md
-    dependencies:
-      - identify_competitors
-
-  - id: positioning_report
-    name: "Positioning Report"
-    description: "Strategic positioning recommendations"
-    instructions_file: steps/positioning_report.md
-    inputs:
-      - file: competitive_research/[competitor_name]/research.md
-        from_step: research_competitors
-    outputs:
-      - file: competitive_research/positioning_report.md
-        doc_spec: .deepwork/doc_specs/positioning_report.md
-    dependencies:
-      - research_competitors
-```
diff --git a/src/deepwork/standard_jobs/deepwork_jobs/job.yml b/src/deepwork/standard_jobs/deepwork_jobs/job.yml
index 4343cbda..57baf9f1 100644
--- a/src/deepwork/standard_jobs/deepwork_jobs/job.yml
+++ b/src/deepwork/standard_jobs/deepwork_jobs/job.yml
@@ -1,13 +1,16 @@
+# yaml-language-server: $schema=.deepwork/schemas/job.schema.json
 name: deepwork_jobs
-version: "1.0.0"
-summary: "Creates and manages multi-step AI workflows. Use when defining, implementing, or improving DeepWork jobs."
+version: "1.4.0"
+summary: "Creates and manages multi-step AI workflows. Use when defining, implementing, testing, or improving DeepWork jobs."
 description: |
   Core commands for managing DeepWork jobs. These commands help you define new multi-step
-  workflows and learn from running them.
+  workflows, test them on real use cases, and learn from running them.
 
-  The `new_job` workflow guides you through defining and implementing a new job by
-  asking structured questions about your workflow, understanding each step's inputs and outputs,
-  reviewing the specification, and generating all necessary files.
+  The `new_job` workflow guides you through the full lifecycle of creating a new job:
+  1. **Define**: Gather requirements through structured questions and create job.yml
+  2. **Implement**: Generate step instruction files and sync slash commands
+  3. **Test**: Run the workflow on a real use case, critique output, and iterate with user
+  4. **Iterate**: Review what happened and improve the job definition based on learnings
 
   The `learn` skill reflects on conversations where DeepWork jobs were run, identifies
   confusion or inefficiencies, and improves job instructions. It also captures bespoke
@@ -15,13 +18,38 @@ description: |
 
 workflows:
   - name: new_job
-    summary: "Create a new DeepWork job from scratch through definition, review, and implementation"
+    summary: "Create a new DeepWork job from scratch through definition, implementation, testing, and iteration"
     steps:
       - define
-      - review_job_spec
       - implement
+      - test
+      - iterate
+
+  - name: repair
+    summary: "Clean up and migrate DeepWork configurations from prior versions"
+    steps:
+      - fix_settings
+      - fix_jobs
+      - errata
+
+  - name: learn
+    summary: "Analyze conversation history to improve job instructions and capture learnings"
+    steps:
+      - learn
 
 changelog:
+  - version: "1.4.0"
+    changes: "Fixed implement step review criteria that caused severe friction: 'Ask Structured Questions' now auto-passes for steps without user inputs; 'Output Examples' renamed to 'Output Format Examples' and accepts template placeholders; added detailed review guidance to prevent misapplication of criteria"
+  - version: "1.3.0"
+    changes: "Migrated quality_criteria to reviews system with run_each targeting and map-format criteria"
+  - version: "1.2.1"
+    changes: "Removed deprecated exposed field from learn step; added learn workflow to make step accessible via MCP"
+  - version: "1.2.0"
+    changes: "Added repair workflow with fix_settings, fix_jobs, and errata steps for migrating old DeepWork configurations to current format"
+  - version: "1.1.0"
+    changes: "Added test and iterate steps to new_job workflow; test runs the workflow on a real use case and gathers feedback; iterate improves the job definition based on what happened"
+  - version: "1.0.1"
+    changes: "Removed review_job_spec step from new_job workflow; implement now follows directly from define"
   - version: "1.0.0"
     changes: "Added workflows section to distinguish new_job workflow (define→review_job_spec→implement) from standalone learn skill"
   - version: "0.1.0"
@@ -34,12 +62,6 @@ changelog:
     changes: "Removed implementation_summary and learning_summary outputs; simplified step outputs"
   - version: "0.5.0"
     changes: "Standardized on 'ask structured questions' phrasing for user input; Updated quality criteria hooks to verify phrase usage; Added guidance in implement.md to use phrase in generated instructions"
-  - version: "0.6.0"
-    changes: "Added doc spec support; define.md now detects document-oriented workflows and guides doc spec creation; learn.md now identifies and applies doc spec-related improvements"
-  - version: "0.7.0"
-    changes: "Added job.yml doc spec; define step now outputs job.yml with doc_spec reference for quality validation"
-  - version: "0.8.0"
-    changes: "Added review_job_spec step between define and implement for doc spec-based quality validation using sub-agent review"
   - version: "0.9.0"
     changes: "Improved skill descriptions with third-person voice and 'Use when...' triggers for better discoverability"
 
@@ -52,70 +74,210 @@ steps:
       - name: job_purpose
         description: "What complex task or workflow are you trying to accomplish?"
     outputs:
-      - file: job.yml
-        doc_spec: .deepwork/doc_specs/job_spec.md
+      job.yml:
+        type: file
+        description: "Definition of the job and its workflows"
+        required: true
     dependencies: []
-  - id: review_job_spec
-    name: "Review Job Specification"
-    description: "Reviews job.yml against quality criteria using a sub-agent for unbiased validation. Use after defining a job specification."
-    instructions_file: steps/review_job_spec.md
+    reviews:
+      - run_each: job.yml
+        quality_criteria:
+          "Intermediate Deliverables": "Does the job break out across the logical steps such that there are reviewable intermediate deliverables?"
+          "Reviews": |
+            Are there reviews defined for each step? Do particularly critical documents have their own reviews?
+            Note that the reviewers do not have transcript access, so if the criteria are about the conversation,
+            then add a `.deepwork/tmp/[step_summary].md` step output file so the agent has a communication channel to the reviewer.
+
+  - id: implement
+    name: "Implement Job Steps"
+    description: "Generates step instruction files and syncs slash commands from the job.yml specification. Use after defining a job."
+    instructions_file: steps/implement.md
     inputs:
       - file: job.yml
         from_step: define
     outputs:
+      step_instruction_files:
+        type: files
+        description: "Instruction Markdown files for each step"
+        required: true
+    dependencies:
+      - define
+    reviews:
+      - run_each: step_instruction_files
+        additional_review_guidance: "Read the job.yml file in the same job directory for context on how this instruction file fits into the larger workflow."
+        quality_criteria:
+          "Complete Instructions": "Is the instruction file complete (no stubs or placeholders)?"
+          "Specific & Actionable": "Are instructions tailored to the step's purpose, not generic?"
+          "Output Examples": "Does the instruction file show what good output looks like? This can be either template examples, or negative examples of what not to do. Only required if the step has ouputs"
+          "Quality Criteria": "Does the instruction file define quality criteria for its outputs?"
+          "Ask Structured Questions": "If this step gathers user input, do instructions explicitly use the phrase 'ask structured questions'? If the step has no user inputs, this criterion passes automatically."
+          "Prompt Engineering": "Does the instruction file follow Anthropic's best practices for prompt engineering?"
+
+  - id: test
+    name: "Test the New Workflow"
+    description: "Tests the newly created workflow by running it on a real use case, critiquing the output, and iterating until the user is satisfied. Use after implementing a job."
+    instructions_file: steps/test.md
+    inputs:
       - file: job.yml
-        doc_spec: .deepwork/doc_specs/job_spec.md
+        from_step: define
+      - file: step_instruction_files
+        from_step: implement
+    outputs:
+      .deepwork/tmp/test_feedback.md:
+        type: file
+        description: "Feedback from testing the workflow on a real use case"
+        required: true
     dependencies:
       - define
-    quality_criteria:
-      - "**Sub-Agent Used**: Was a sub-agent spawned to provide unbiased review?"
-      - "**All doc spec Criteria Evaluated**: Did the sub-agent assess all 9 quality criteria?"
-      - "**Findings Addressed**: Were all failed criteria addressed by the main agent?"
-      - "**Validation Loop Complete**: Did the review-fix cycle continue until all criteria passed?"
+      - implement
+    reviews:
+      - run_each: step
+        quality_criteria:
+          "Workflow Invoked": "Was the new workflow actually run on the user's test case via MCP?"
+          "Output Critiqued": "Did the agent identify up to 3 top issues with the output?"
+          "User Feedback Gathered": "Did the agent ask the user about each issue and gather additional feedback?"
+          "Corrections Made": "Were all requested corrections applied to the output?"
+          "User Satisfied": "Did the user confirm the output meets their needs?"
 
-  - id: implement
-    name: "Implement Job Steps"
-    description: "Generates step instruction files and syncs slash commands from the job.yml specification. Use after job spec review passes."
-    instructions_file: steps/implement.md
+  - id: iterate
+    name: "Iterate on Workflow Design"
+    description: "Reviews the test run conversation and improves the job definition based on what happened. Use after testing a newly created job."
+    instructions_file: steps/iterate.md
     inputs:
       - file: job.yml
-        from_step: review_job_spec
+        from_step: define
+      - file: step_instruction_files
+        from_step: implement
+      - file: .deepwork/tmp/test_feedback.md
+        from_step: test
     outputs:
-      - steps/
+      job.yml:
+        type: file
+        description: "Updated job definition with improvements from test run"
+        required: true
+      step_instruction_files:
+        type: files
+        description: "Updated instruction Markdown files for each step"
+        required: true
+      scripts:
+        type: files
+        description: "Updated scripts to run parts of the job more efficiently"
+        required: false
     dependencies:
-      - review_job_spec
-    quality_criteria:
-      - "**Directory Structure**: Is `.deepwork/jobs/[job_name]/` created correctly?"
-      - "**Complete Instructions**: Are ALL step instruction files complete (not stubs or placeholders)?"
-      - "**Specific & Actionable**: Are instructions tailored to each step's purpose, not generic?"
-      - "**Output Examples**: Does each instruction file show what good output looks like?"
-      - "**Quality Criteria**: Does each instruction file define quality criteria for its outputs?"
-      - "**Ask Structured Questions**: Do step instructions that gather user input explicitly use the phrase \"ask structured questions\"?"
-      - "**Sync Complete**: Has `deepwork sync` been run successfully?"
-      - "**Commands Available**: Are the slash-commands generated in `.claude/commands/`?"
-      - "**Rules Considered**: Has the agent thought about whether rules would benefit this job? If relevant rules were identified, did they explain them and offer to run `/deepwork_rules.define`? Not every job needs rules - only suggest when genuinely helpful."
+      - define
+      - implement
+      - test
+    reviews: []
 
   - id: learn
     name: "Learn from Job Execution"
     description: "Analyzes conversation history to improve job instructions and capture learnings. Use after running a job to refine it."
     instructions_file: steps/learn.md
-    exposed: true
     inputs:
       - name: job_name
         description: "Name of the job that was run (optional - will auto-detect from conversation)"
     outputs:
-      - AGENTS.md
+      AGENTS.md:
+        type: file
+        description: "Bespoke learnings and run-specific context for the working folder"
+        required: true
+      job.yml:
+        type: file
+        description: "Updated job definition with improvements from test run"
+        required: true
+      step_instruction_files:
+        type: files
+        description: "Updated instruction Markdown files for each step"
+        required: true
+      scripts:
+        type: files
+        description: "Updated scripts to run parts of the job more efficiently"
+        required: false
+    dependencies: []
+    reviews:
+      - run_each: step
+        quality_criteria:
+          "Conversation Analyzed": "Did the agent review the conversation for DeepWork job executions?"
+          "Confusion Identified": "Did the agent identify points of confusion, errors, or inefficiencies?"
+          "Instructions Improved": "Were job instructions updated to address identified issues?"
+          "Instructions Concise": "Are instructions free of redundancy and unnecessary verbosity?"
+          "Shared Content Extracted": "Is lengthy/duplicated content extracted into referenced files?"
+          "Bespoke Learnings Captured": "Were run-specific learnings added to AGENTS.md?"
+          "File References Used": "Do AGENTS.md entries reference other files where appropriate?"
+          "Working Folder Correct": "Is AGENTS.md in the correct working folder for the job?"
+
+  - id: fix_settings
+    name: "Fix Settings Files"
+    description: "Cleans up .claude/settings.json and related configuration files, removing legacy permissions, duplicate hooks, and hardcoded paths from prior DeepWork versions."
+    instructions_file: steps/fix_settings.md
+    inputs: []
+    outputs:
+      settings.json:
+        type: file
+        description: "Cleaned up Claude settings file with legacy permissions removed"
+        required: true
     dependencies: []
-    quality_criteria:
-      - "**Conversation Analyzed**: Did the agent review the conversation for DeepWork job executions?"
-      - "**Confusion Identified**: Did the agent identify points of confusion, errors, or inefficiencies?"
-      - "**Instructions Improved**: Were job instructions updated to address identified issues?"
-      - "**Instructions Concise**: Are instructions free of redundancy and unnecessary verbosity?"
-      - "**Shared Content Extracted**: Is lengthy/duplicated content extracted into referenced files?"
-      - "**doc spec Reviewed (if applicable)**: For jobs with doc spec outputs, were doc spec-related learnings identified?"
-      - "**doc spec Updated (if applicable)**: Were doc spec files updated with improved quality criteria or structure?"
-      - "**Bespoke Learnings Captured**: Were run-specific learnings added to AGENTS.md?"
-      - "**File References Used**: Do AGENTS.md entries reference other files where appropriate?"
-      - "**Working Folder Correct**: Is AGENTS.md in the correct working folder for the job?"
-      - "**Generalizable Separated**: Are generalizable improvements in instructions, not AGENTS.md?"
-      - "**Sync Complete**: Has `deepwork sync` been run if instructions were modified?"
+    reviews:
+      - run_each: step
+        quality_criteria:
+          "DeepWork Skills Removed": "Are `Skill(...)` entries matching jobs in `.deepwork/jobs/` removed?"
+          "Non-DeepWork Skills Preserved": "Are skills NOT matching DeepWork jobs left intact?"
+          "make_new_job.sh Preserved": "Is the `Bash(...)` permission for `make_new_job.sh` preserved (if present)?"
+          "Rules Hooks Removed": "Are all DeepWork Rules hooks and permissions removed?"
+          "Duplicate Hooks Removed": "Are duplicate hook entries consolidated or removed?"
+          "Hardcoded Paths Removed": "Are user-specific hardcoded paths (like `/Users/*/...`) removed?"
+          "Deprecated Commands Removed": "Are deprecated commands like `deepwork hook *` removed?"
+          "Valid JSON": "Is settings.json still valid JSON after modifications?"
+          "Backup Created": "Was a backup of the original settings created before modifications?"
+
+  - id: fix_jobs
+    name: "Fix Job Definitions"
+    description: "Updates job.yml files and step instructions to current DeepWork format, removing deprecated fields and migrating to new structures."
+    instructions_file: steps/fix_jobs.md
+    inputs:
+      - file: settings.json
+        from_step: fix_settings
+    outputs:
+      job_definitions:
+        type: files
+        description: "Updated job.yml files and step instructions in current DeepWork format"
+        required: true
+      step_instruction_files:
+        type: files
+        description: "Updated step instruction files"
+        required: true
+    dependencies:
+      - fix_settings
+    reviews:
+      - run_each: step
+        additional_review_guidance: "Read the .claude/settings.json file for context on what settings were cleaned up in the prior step."
+        quality_criteria:
+          "Exposed Field Addressed": "Are `exposed: true` fields removed or noted as deprecated?"
+          "Stop Hooks Migrated": "Are `stop_hooks` migrated to `hooks.after_agent` format?"
+          "Removed Steps Cleaned": "Are references to removed steps (like `review_job_spec`) updated?"
+          "Orphaned Steps Fixed": "For jobs with no workflows, is there a single workflow (named after the job) containing all steps? For jobs with existing workflows, does each orphan get its own workflow (named after the step)?"
+          "Valid YAML": "Are all job.yml files valid YAML?"
+
+  - id: errata
+    name: "Clean Up Errata"
+    description: "Removes obsolete files and folders from prior DeepWork versions, including old skill directories, temp files, and deprecated configurations."
+    instructions_file: steps/errata.md
+    outputs: {}
+    inputs:
+      - file: job_definitions
+        from_step: fix_jobs
+    dependencies:
+      - fix_settings
+      - fix_jobs
+    reviews:
+      - run_each: step
+        additional_review_guidance: "Check the .deepwork/jobs/ directory and .claude/skills/ directory to verify the cleanup was done correctly."
+        quality_criteria:
+          "Legacy Job Skills Removed": "Are legacy skill folders for each job removed from `.claude/skills/` and `.gemini/skills/`?"
+          "Deepwork Skill Preserved": "Does the `deepwork` skill folder still exist in `.claude/skills/deepwork/`?"
+          "Temp Files Cleaned": "Are `.deepwork/tmp/` contents cleaned appropriately?"
+          "Rules Folder Removed": "Is `.deepwork/rules/` folder backed up and removed (fully deprecated)?"
+          "Rules Job Removed": "Is `.deepwork/jobs/deepwork_rules/` removed if present?"
+          "Config Version Updated": "Is `.deepwork/config.yml` using current version format?"
+          "DeepWork Re-installed": "Was `deepwork install` run after cleanup, and does it complete without errors?"
+          "Git Status Clean": "Are changes ready to be committed (no untracked garbage files)?"
diff --git a/src/deepwork/standard_jobs/deepwork_jobs/make_new_job.sh b/src/deepwork/standard_jobs/deepwork_jobs/make_new_job.sh
index c561d6d2..c87f40e8 100755
--- a/src/deepwork/standard_jobs/deepwork_jobs/make_new_job.sh
+++ b/src/deepwork/standard_jobs/deepwork_jobs/make_new_job.sh
@@ -78,43 +78,47 @@ main() {
     mkdir -p "$job_path/steps"
     mkdir -p "$job_path/hooks"
     mkdir -p "$job_path/templates"
+    mkdir -p "$job_path/scripts"
 
     # Add .gitkeep files to empty directories
     touch "$job_path/hooks/.gitkeep"
     touch "$job_path/templates/.gitkeep"
+    touch "$job_path/scripts/.gitkeep"
 
     # Create AGENTS.md file
     cat > "$job_path/AGENTS.md" << 'EOF'
 # Job Management
 
-This folder and its subfolders are managed using the `deepwork_jobs` slash commands.
+This folder and its subfolders are managed using `deepwork_jobs` workflows.
 
-## Recommended Commands
+## Recommended Workflows
 
-- `/deepwork_jobs.define` - Create or modify the job.yml specification
-- `/deepwork_jobs.implement` - Generate step instruction files from the specification
-- `/deepwork_jobs.learn` - Improve instructions based on execution learnings
+- `deepwork_jobs/new_job` - Full lifecycle: define → implement → test → iterate
+- `deepwork_jobs/learn` - Improve instructions based on execution learnings
+- `deepwork_jobs/repair` - Clean up and migrate from prior DeepWork versions
 
 ## Directory Structure
 
 ```
 .
 ├── AGENTS.md          # This file - project context and guidance
-├── job.yml            # Job specification (created by /deepwork_jobs.define)
-├── steps/             # Step instruction files (created by /deepwork_jobs.implement)
+├── job.yml            # Job specification (created by define step)
+├── steps/             # Step instruction files (created by implement step)
 │   └── *.md           # One file per step
 ├── hooks/             # Custom validation scripts and prompts
 │   └── *.md|*.sh      # Hook files referenced in job.yml
+├── scripts/           # Reusable scripts and utilities created during job execution
+│   └── *.sh|*.py      # Helper scripts referenced in step instructions
 └── templates/         # Example file formats and templates
     └── *.md|*.yml     # Templates referenced in step instructions
 ```
 
 ## Editing Guidelines
 
-1. **Use slash commands** for structural changes (adding steps, modifying job.yml)
+1. **Use workflows** for structural changes (adding steps, modifying job.yml)
 2. **Direct edits** are fine for minor instruction tweaks
-3. **Run `/deepwork_jobs.learn`** after executing job steps to capture improvements
-4. **Run `deepwork sync`** after any changes to regenerate commands
+3. **Run `deepwork_jobs/learn`** after executing job steps to capture improvements
+4. **Run `deepwork install`** after any changes to regenerate commands
 EOF
 
     info "Created directory structure:"
@@ -122,13 +126,8 @@ EOF
     echo "  ├── AGENTS.md"
     echo "  ├── steps/"
     echo "  ├── hooks/.gitkeep"
+    echo "  ├── scripts/.gitkeep"
     echo "  └── templates/.gitkeep"
-
-    echo ""
-    info "Next steps:"
-    echo "  1. Run '/deepwork_jobs.define' to create the job.yml specification"
-    echo "  2. Run '/deepwork_jobs.implement' to generate step instructions"
-    echo "  3. Run 'deepwork sync' to create slash commands"
 }
 
 main "$@"
diff --git a/src/deepwork/standard_jobs/deepwork_jobs/research_report_job_best_practices.md b/src/deepwork/standard_jobs/deepwork_jobs/research_report_job_best_practices.md
new file mode 100644
index 00000000..87e65615
--- /dev/null
+++ b/src/deepwork/standard_jobs/deepwork_jobs/research_report_job_best_practices.md
@@ -0,0 +1,184 @@
+# Research Report Job Best Practices
+
+Reference guide for designing DeepWork jobs that produce research reports, analytical documents, or similar investigative deliverables. Use this when defining jobs via the `define` step.
+
+## The General Pattern
+
+Most report-authoring jobs follow a five-phase structure. Not every job needs all five as separate steps, and some phases combine naturally, but understanding the full arc helps you design a job that doesn't skip critical work.
+
+### 1. Connect
+
+**Purpose**: Verify that the tools and data sources the job will rely on are actually accessible before any real work begins.
+
+This step is about validating prerequisites, not doing research. Common activities:
+
+- **Database connectivity**: Run a trivial query (`SELECT 1`, `SHOW TABLES`) to confirm credentials work and the schema is reachable.
+- **Web search tools**: Confirm web search and browsing tools are enabled. If the job needs to read specific sites, verify they don't require login. If they do, get the user to authenticate (e.g., via Claude in Chrome) before proceeding.
+- **API access**: Test API keys or tokens against a lightweight endpoint.
+- **File access**: Confirm that input files, dashboards, or shared drives are readable.
+
+**Why a separate step?** A failed connection discovered midway through analysis wastes all prior work. Catching it upfront is cheap. That said, for simple jobs where the data source is obvious and reliable (e.g., "search the web for X"), this can be folded into the Align step as a quick check rather than standing alone.
+
+**Outputs**: A brief connectivity report or checklist confirming each source is accessible, plus any credentials or configuration notes for later steps.
+
+### 2. Align
+
+**Purpose**: Build enough understanding of the domain and the user's intent to scope the analysis correctly.
+
+This is a cyclical step: do light research, then ask clarifying questions, then refine understanding, repeat. It ends when both the agent and user agree on what "done" looks like.
+
+**The cycle**:
+
+1. **Light grounding research** - Just enough to ask smart questions. Not deep analysis.
+2. **Clarify with the user** - Surface ambiguities and propose scope boundaries.
+3. **Repeat** until there's shared understanding.
+
+**Example - Private data (SQL-centric)**:
+- Run broad queries to get the lay of the land: total record counts, key column names, date ranges, apparent segmentation columns (e.g., `division`, `region`).
+- Then ask the user: "I see 45,000 customer records across 3 divisions. Should we scope to a particular division? I'm defining churn as customers with no activity in 90 days - does that match your definition?"
+
+**Example - Public data (web-centric)**:
+- Do broad searches to see what's out there. Notice the shape of results: are they news articles, academic papers, industry reports? What subtopics keep appearing?
+- Then ask the user: "Results split between fast-fashion trends and haute couture analysis. Which direction? Also, should we focus on the current season or look at multi-year trends?"
+
+**Outputs**: A scoping document that captures the agreed-upon research questions, data sources, definitions, exclusions, and success criteria. This becomes the north star for the Analyze step.
+
+### 3. Analyze
+
+**Purpose**: The core research cycle. Query, record, synthesize, and deepen iteratively.
+
+This is where most of the work happens. The key discipline is maintaining structured working files so that nothing gets lost and the narrative builds progressively.
+
+**Working files to maintain**:
+
+| File | Purpose |
+|------|---------|
+| Query log | Every query/search with its results. What did you ask, what came back. Keeps work auditable and reproducible. |
+| Questions & Answers | Running list of research questions. As you find answers, record them. As answers suggest new questions, add those. This drives the iterative deepening. |
+| Draft report | The evolving narrative. Updated as new findings emerge. Forces you to synthesize as you go rather than dumping data at the end. |
+
+**The iterative deepening pattern**:
+
+Analysis should deepen in layers, not stay shallow across many topics. Each answer should prompt "why?" or "what drives that?" questions:
+
+- **Layer 1**: Top-level facts. "What was our AWS spend last month?" -> $10k. "How does that compare to prior month?" -> Up $1k.
+- **Layer 2**: Decomposition. "What services drove the spend?" -> $8k EC2, $1k S3, $1k other. "Where was the increase?" -> All in EC2.
+- **Layer 3**: Root causes. "Is our EC2 fleet well-utilized?" -> Many instances with attribute X are underutilized. "Are specific workloads driving the increase?" -> Yes, instances tagged `daily_sync_*` are up ~$2k.
+- **Layer 4+**: Continue until you hit actionable findings or diminishing returns.
+
+**When to stop deepening**: When additional queries aren't changing the narrative, or when you've answered the questions from the Align step to a sufficient depth. But make sure that any questions that a reasonable business person is likely to ask when looking at your output are answered.
+
+**Outputs**: The working files above (query log, Q&A tracker, draft report), organized in the dataroom alongside the final output.
+
+### 4. Review (Not a Separate Step)
+
+Reviews are not a standalone phase but checkpoints woven into all the steps, especially the Analyze step. Use DeepWork's `reviews` mechanism in `job.yml` to define quality gates.
+
+**Reviews to consider for the Analyze phase**:
+
+- **Query completeness**: Are the key research questions from the scoping document all addressed? Are queries recorded with their results?
+- **Draft coherence**: Does the draft report tell a logical story? Are sections connected rather than isolated findings?
+- **Depth adequacy**: Has the analysis gone deep enough on the important threads? Are there obvious follow-up questions left unasked?
+- **Citation integrity**: Are claims in the draft backed by specific queries/sources from the query log?
+
+**Reviews to consider for the Present phase** (see below):
+
+- **Visual quality**: Charts render correctly, no overlapping text, readable at intended size.
+- **Content accuracy**: Citations preserved from draft, numbers match source data, arguments are logically sound.
+- **Audience fit**: Language, detail level, and framing match the intended audience (executives vs. engineers vs. clients).
+- **Format compliance**: Output matches the requested format (PDF renders correctly, HTML is responsive, slides have consistent styling).
+
+### 5. Present
+
+**Purpose**: Transform the draft into a polished final deliverable.
+
+The draft report from the Analyze step has the right content but may not be presentation-ready. This step focuses on the output experience.
+
+**Common activities**:
+
+- **Visualizations**: Generate charts, tables, or diagrams from the data. Fetch relevant images. Create infographics for key findings.
+- **Formatting**: Convert to the final output format (PDF, HTML, slides, etc.). Apply styling and layout.
+- **Narrative polish**: Tighten prose, add executive summary, ensure the document flows well for someone reading it cold.
+- **Supporting materials**: Assemble appendices, data tables, methodology notes.
+
+**This step often requires multiple review cycles.** Visual outputs have failure modes that text-only drafts don't: overlapping labels, truncated legends, broken page breaks, images that don't load. Build in quality gates for visual review.
+
+**Outputs**: The final deliverable in its target format, plus any supporting materials.
+
+## Translating This Into a Job Definition
+
+### Step Structure Options
+
+**Minimal (3 steps)** - For straightforward reports with known data sources:
+1. `scope` - Combines Connect + Align. Verify access, clarify requirements.
+2. `research` - The Analyze phase with built-in review gates.
+3. `report` - The Present phase with visual/format review gates.
+
+**Standard (4 steps)** - For most research reports:
+1. `connect` - Verify data source access.
+2. `scope` - Align on research questions and definitions.
+3. `analyze` - Core research with iterative deepening.
+4. `present` - Final deliverable production.
+
+**Comprehensive (5+ steps)** - For complex, multi-source reports:
+1. `connect` - Verify all data source access.
+2. `scope` - Align on research questions.
+3. `gather` - Collect raw data across all sources (query log output).
+4. `analyze` - Synthesize findings, build narrative (draft report output).
+5. `present` - Polish and format final deliverable.
+
+### Output Organization
+
+Follow the dataroom pattern from the define step guidelines:
+
+```
+operations/reports/2026-01/spending_analysis.md              # Final report
+operations/reports/2026-01/spending_analysis_dataroom/        # Supporting materials
+    query_log.md                                              # All queries and results
+    questions_and_answers.md                                  # Research Q&A tracker
+    raw_data/                                                 # Extracted data files
+    charts/                                                   # Generated visualizations
+    draft.md                                                  # Working draft (for audit trail)
+```
+
+### Quality Gate Design
+
+Research reports benefit from **split reviews** that evaluate content and presentation separately:
+
+```yaml
+reviews:
+  # Content review - is the analysis sound?
+  - run_each: final_report.md
+    quality_criteria:
+      "Claims Cited": "Is every factual claim backed by a specific source or query from the dataroom?"
+      "Questions Answered": "Are all research questions from the scoping document addressed?"
+      "Depth": "Does the analysis go beyond surface-level observations to root causes or actionable insights?"
+
+  # Presentation review - is the output polished?
+  - run_each: final_report.md
+    quality_criteria:
+      "Readable Flow": "Does the document flow logically for someone reading it without prior context?"
+      "Audience Fit": "Is the language and detail level appropriate for the intended audience?"
+      "Visual Quality": "Do all charts, tables, and figures render correctly and add value?"
+```
+
+### Capability Considerations
+
+Research jobs frequently need specialized tools. During the `define` step, ask about:
+
+- **Database access**: What databases? What client tools or connection strings?
+- **Web browsing**: Will sites require authentication? Is Claude in Chrome available?
+- **File generation**: Does the final output need PDF/HTML rendering? What tools are available?
+- **Data visualization**: What charting libraries or tools can the agent use?
+
+## Anti-Patterns to Avoid
+
+**Shallow breadth over deep analysis**: Covering 20 topics superficially is less valuable than covering 5 topics with layered depth. Design the Analyze step to encourage iterative deepening, not checklist completion.
+
+**Skipping the scoping step**: Jumping straight into analysis without aligning on definitions and scope almost always leads to rework. "Analyze our churn" means very different things depending on how churn is defined.
+
+**Query results only in memory**: If queries and their results aren't written to working files, they can't be reviewed, cited, or audited. The query log is not optional.
+
+**Draft report written at the end**: The draft should evolve throughout the Analyze step, not be assembled from notes after all research is complete. Writing the narrative as you go reveals gaps in the analysis early.
+
+**Conflating analysis with presentation**: Trying to produce a polished PDF while still figuring out the findings leads to wasted formatting work. Get the content right first, then make it pretty.
diff --git a/src/deepwork/standard_jobs/deepwork_jobs/steps/define.md b/src/deepwork/standard_jobs/deepwork_jobs/steps/define.md
index 31de7440..78ac1223 100644
--- a/src/deepwork/standard_jobs/deepwork_jobs/steps/define.md
+++ b/src/deepwork/standard_jobs/deepwork_jobs/steps/define.md
@@ -31,60 +31,6 @@ Start by asking structured questions to understand what the user wants to accomp
    - What are the distinct stages from start to finish?
    - Are there any dependencies between phases?
 
-### Step 1.5: Detect Document-Oriented Workflows
-
-**Check for document-focused patterns** in the user's description:
-- Keywords: "report", "summary", "document", "create", "monthly", "quarterly", "for stakeholders", "for leadership"
-- Final deliverable is a specific document (e.g., "AWS spending report", "competitive analysis", "sprint summary")
-- Recurring documents with consistent structure
-
-**If a document-oriented workflow is detected:**
-
-1. Inform the user: "This workflow produces a specific document type. I recommend defining a doc spec first to ensure consistent quality."
-
-2. Ask structured questions to understand if they want to:
-   - Create a doc spec for this document
-   - Use an existing doc spec (if any exist in `.deepwork/doc_specs/`)
-   - Skip doc spec and proceed with simple outputs
-
-### Step 1.6: Define the Doc Spec (if needed)
-
-When creating a doc spec, gather the following information:
-
-1. **Document Identity**
-   - What is the document called? (e.g., "Monthly AWS Spending Report")
-   - Brief description of its purpose
-   - Where should these documents be stored? (path patterns like `finance/aws-reports/*.md`)
-
-2. **Audience and Context**
-   - Who reads this document? (target audience)
-   - How often is it produced? (frequency)
-
-3. **Quality Criteria** (3-5 criteria, each with name and description)
-
-   **Important**: Doc spec quality criteria define requirements for the **output document itself**, not the process of creating it. Focus on what the finished document must contain or achieve.
-
-   Examples for a spending report:
-   - **Visualization**: Must include charts showing spend breakdown by service
-   - **Variance Analysis**: Must compare current month against previous with percentages
-   - **Action Items**: Must include recommended cost optimization actions
-
-   **Note**: When a doc spec is created for a step's output, the step should generally NOT have separate `quality_criteria` in the job.yml. The doc spec's criteria cover output quality. Only add step-level quality_criteria if there are essential process requirements (e.g., "must use specific tool"), and minimize these when possible.
-
-4. **Document Structure**
-   - What sections should it have?
-   - Any required elements (tables, charts, summaries)?
-
-### Step 1.7: Create the doc spec File (if needed)
-
-Create the doc spec file at `.deepwork/doc_specs/[doc_spec_name].md`:
-
-**Template reference**: See `.deepwork/jobs/deepwork_jobs/templates/doc_spec.md.template` for the standard structure.
-
-**Complete example**: See `.deepwork/doc_specs/job_spec.md` for a fully worked example (the doc spec for job.yml files).
-
-After creating the doc spec, proceed to Step 2 with the doc spec reference for the final step's output.
-
 ### Step 2: Define Each Step
 
 For each major phase they mentioned, ask structured questions to gather details:
@@ -106,8 +52,6 @@ For each major phase they mentioned, ask structured questions to gather details:
    - Where should each output be saved? (filename/path)
    - Should outputs be organized in subdirectories? (e.g., `reports/`, `data/`, `drafts/`)
    - Will other steps need this output?
-   - **Does this output have a doc spec?** If a doc spec was created in Step 1.6/1.7, reference it for the appropriate output
-
    #### Work Product Storage Guidelines
 
    **Key principle**: Job outputs belong in the main repository directory structure, not in dot-directories. The `.deepwork/` directory is for job definitions and configuration only.
@@ -172,38 +116,54 @@ For each major phase they mentioned, ask structured questions to gather details:
    - Are there any quality checks or validation needed?
    - What makes a good vs. bad output for this step?
 
-6. **Agent Delegation** (optional)
-   - Should this step be executed by a specific agent type?
-   - Use the `agent` field when the step should run in a forked context with a specific agent
-   - When `agent` is set, the generated skill automatically includes `context: fork`
-   - Available agent types:
-     - `general-purpose` - Standard agent for multi-step tasks
-
-   ```yaml
-   steps:
-     - id: research_step
-       agent: general-purpose  # Delegates to the general-purpose agent
-   ```
+   **Important**: When skills are generated, quality criteria are automatically included in the output. Do not duplicate them in step instructions or details—this causes redundancy and confusion.
 
 **Note**: You're gathering this information to understand what instructions will be needed, but you won't create the instruction files yet - that happens in the `implement` step.
 
-#### Doc Spec-Aware Output Format
+### Capability Considerations
 
-When a step produces a document with a doc spec reference, use this format in job.yml:
+When defining steps, identify any that require specialized tools:
 
-```yaml
-outputs:
-  - file: reports/monthly_spending.md
-    doc_spec: .deepwork/doc_specs/monthly_aws_report.md
-```
+**Browser Automation**: If any step involves web scraping, form filling, interactive browsing, UI testing, or research requiring website visits, ask the user what browser tools they have available. For Claude Code users, **Claude in Chrome** (Anthropic's browser extension) has been tested with DeepWork and is recommended for new users. Don't assume a default—confirm the tool before designing browser-dependent steps.
 
-The doc spec's quality criteria will automatically be included in the generated skill, ensuring consistent document quality.
+### Parallel Sub-Workflow Pattern
 
-### Capability Considerations
+When a workflow needs to apply a multi-step process to many items independently (e.g., research each of 5 competitors, review each of 12 pull requests, analyze each file in a directory), **do not inline the repeated logic as a single step**. Instead, use the parallel sub-workflow pattern:
 
-When defining steps, identify any that require specialized tools:
+1. **Define a separate workflow** for the process that will be repeated. This workflow handles one item at a time (e.g., `research_one_competitor` with steps like `gather_data` → `analyze` → `write_summary`).
 
-**Browser Automation**: If any step involves web scraping, form filling, interactive browsing, UI testing, or research requiring website visits, ask the user what browser tools they have available. For Claude Code users, **Claude in Chrome** (Anthropic's browser extension) has been tested with DeepWork and is recommended for new users. Don't assume a default—confirm the tool before designing browser-dependent steps.
+2. **In the main workflow**, add a step whose instructions tell the agent to launch the sub-workflow once per item using sub-agents (via the Task tool). Since each item is independent, these sub-workflow runs execute in parallel.
+
+**Why this matters:**
+- **Parallelism**: Independent items are processed concurrently instead of sequentially, dramatically reducing wall-clock time
+- **Quality gates**: Each sub-workflow run goes through its own review cycle, so a bad result for one item doesn't block the others
+- **Reusability**: The sub-workflow can be invoked on its own for ad-hoc single-item runs
+
+**How to structure it in `job.yml`:**
+
+```yaml
+workflows:
+  - name: full_analysis
+    summary: "Research all competitors end-to-end"
+    steps:
+      - identify_competitors
+      - research_all          # This step launches research_one in parallel
+      - synthesize
+
+  - name: research_one
+    summary: "Deep-dive research on a single competitor"
+    steps:
+      - gather_data
+      - analyze
+      - write_summary
+```
+
+The `research_all` step's instructions should tell the agent to:
+- Read the list of items from the prior step's output
+- Launch `research_one` as a sub-workflow for each item using parallel sub-agents (Task tool)
+- Collect the results and confirm all runs completed
+
+**When to recognize this pattern:** Look for language like "for each X, do Y" where Y involves more than one logical phase. If Y is a single simple action, a regular step with a loop is fine. If Y is itself a multi-step process with intermediate outputs worth reviewing, split it into a sub-workflow.
 
 ### Step 3: Validate the Workflow
 
@@ -225,52 +185,86 @@ After gathering information about all steps:
    - Job description (detailed multi-line explanation)
    - Version number (start with 1.0.0)
 
-### Step 4: Define Quality Validation (Stop Hooks)
+### Step 4: Define Quality Reviews
 
-For each step, consider whether it would benefit from **quality validation loops**. Stop hooks allow the AI agent to iteratively refine its work until quality criteria are met.
+For each step, define **reviews** that evaluate the step's outputs. Reviews run automatically when a step completes and provide quality validation loops.
 
-**Ask structured questions about quality validation:**
-- "Are there specific quality criteria that must be met for this step?"
-- "Would you like the agent to validate its work before completing?"
-- "What would make you send the work back for revision?"
+For intermediate outputs between steps, reviews let you make sure you don't go too far down the wrong path. Add reviews that confirm things that could cause problems later. For example, in a report creation process, you might have an intermediate step that performs a number of queries on the data and records the results so that later report-writing steps can synthesize that information into a coherent narrative. In this case, you would want to add a review that checks that the queries SQL matches up with the description of the queries in the job description.
 
-**Stop hooks are particularly valuable for:**
-- Steps with complex outputs that need multiple checks
-- Steps where quality is critical (final deliverables)
-- Steps with subjective quality criteria that benefit from AI self-review
+For final outputs, reviews let you make sure the output meets the user's expectations. For example, with a data-centric report job, you might have one review on the final output for consistency with style guidelines and tone and such, and a totally separate review on the data-backing to make sure the claims in the report are supported by the data from earlier steps and all have citations. 
 
-**Three types of stop hooks are supported:**
+**Any jobs with written final output must always have reviews**. Some suggested ones are:
+- Ensure claims have citations and the citations are not hallucinated
+- Ensure the output follows the style guidelines and tone
+- Ensure the output is well-organized and easy to read
+- Ensure obvious questions the content raises have answers provided
+- Visual formatting is correct (for things like PDF or HTML where the visual output matters)
+- That the content matches what the intended audience expects (i.e. executives vs engineers)
 
-1. **Inline Prompt** (`prompt`) - Best for simple quality criteria
-   ```yaml
-   stop_hooks:
-     - prompt: |
-         Verify the output meets these criteria:
-         1. Contains at least 5 competitors
-         2. Each competitor has a description
-         3. Selection rationale is clear
-   ```
+**Reviews format:**
 
-2. **Prompt File** (`prompt_file`) - For detailed/reusable criteria
-   ```yaml
-   stop_hooks:
-     - prompt_file: hooks/quality_check.md
-   ```
+Each review specifies `run_each` (what to review) and `quality_criteria` (a map of criterion name to question):
 
-3. **Script** (`script`) - For programmatic validation (tests, linting)
-   ```yaml
-   stop_hooks:
-     - script: hooks/run_tests.sh
-   ```
+```yaml
+reviews:
+  - run_each: step  # Review all outputs together
+    quality_criteria:
+      "Consistent Style": "Do all files follow the same structure?"
+      "Complete Coverage": "Are all required topics covered?"
+  - run_each: report_files  # Review each file in a 'files'-type output individually
+    quality_criteria:
+      "Well Written": "Is the content clear and well-organized?"
+      "Data-Backed": "Are claims supported by data?"
+```
+
+**`run_each` options:**
+- `step` — Review runs once with ALL output files
+- `<output_name>` where output is `type: file` — Review runs once with that specific file
+- `<output_name>` where output is `type: files` — Review runs once per file in the list
+
+**`additional_review_guidance`** (optional): Tells the reviewer what other files or context to look at when performing the review. Reviewers only see the step's output files by default — they do NOT automatically see inputs from prior steps. When a review needs context beyond the output files (e.g., checking that an output is consistent with a prior step's deliverable, or that it follows conventions in a config file), use this field to tell the reviewer what to read.
 
-**Multiple hooks can be combined:**
 ```yaml
-stop_hooks:
-  - script: hooks/lint_output.sh
-  - prompt: "Verify the content is comprehensive and well-organized"
+reviews:
+  - run_each: report_files
+    additional_review_guidance: "Read the comparison_matrix.md file for context on whether claims in the report are supported by the analysis data."
+    quality_criteria:
+      "Data-Backed": "Are recommendations supported by the competitive analysis data?"
+  - run_each: step_instruction_files
+    additional_review_guidance: "Read the job.yml file in the same job directory for context on how this instruction file fits into the larger workflow."
+    quality_criteria:
+      "Complete Instructions": "Is the instruction file complete?"
 ```
 
-**Encourage prompt-based hooks** - They leverage the AI's ability to understand context and make nuanced quality judgments. Script hooks are best for objective checks (syntax, format, tests).
+**When to use `additional_review_guidance`:**
+- When a review criterion references data or context from a prior step's output
+- When the reviewer needs to cross-check the output against a specification, config, or schema file
+- When the review involves consistency checks between the current output and other project files
+- When the criterion mentions something the reviewer can't assess from the output alone
+
+**When NOT to use it:**
+- When all criteria can be evaluated by reading just the output files themselves (e.g., "Is it well-written?", "Are there spelling errors?")
+- Don't use it to dump large amounts of content — keep guidance short and tell the reviewer *what to read*, not *what's in it*
+
+**Reviews are particularly valuable for:**
+- Steps with complex outputs that need multiple quality checks
+- Steps where quality is critical (final deliverables)
+- Steps with subjective quality criteria that benefit from AI self-review
+- Steps producing multiple files where each file needs individual review
+
+**Quality review timeout considerations:**
+Each individual quality review call has a 120-second timeout. For `run_each: <output_name>` with `files`-type outputs, each file gets its own separate review call — so having many files does NOT cause timeout accumulation. Timeout risk is only for individual reviews that are complex, such as:
+- Reviewing a single very large file (500+ lines) with many criteria
+- Review criteria that require cross-referencing large amounts of context
+For these cases:
+- Keep review criteria focused and efficient to evaluate
+- Consider using `run_each: step` (reviews all outputs together once) if the per-file reviews are unnecessary
+- The agent can use `quality_review_override_reason` to bypass a timed-out review, but this loses the quality gate benefit
+
+**For steps with no quality checks needed, use an empty reviews list:**
+```yaml
+reviews: []
+```
 
 ### Step 5: Create the Job Directory and Specification
 
@@ -282,19 +276,10 @@ Only after you have complete understanding, create the job directory and `job.ym
 .deepwork/jobs/deepwork_jobs/make_new_job.sh [job_name]
 ```
 
-This creates:
-- `.deepwork/jobs/[job_name]/` - Main job directory
-- `.deepwork/jobs/[job_name]/steps/` - For step instruction files
-- `.deepwork/jobs/[job_name]/hooks/` - For custom validation scripts
-- `.deepwork/jobs/[job_name]/templates/` - For example file formats
-- `.deepwork/jobs/[job_name]/AGENTS.md` - Job management guidance
-
 **Then create the job.yml file** at `.deepwork/jobs/[job_name]/job.yml`
 
 (Where `[job_name]` is the name of the NEW job you're creating, e.g., `competitive_research`)
 
-**Doc Spec**: See `.deepwork/doc_specs/job_spec.md` for the complete specification with quality criteria.
-
 **Template reference**: See `.deepwork/jobs/deepwork_jobs/templates/job.yml.template` for the standard structure.
 
 **Complete example**: See `.deepwork/jobs/deepwork_jobs/templates/job.yml.example` for a fully worked example.
@@ -414,7 +399,7 @@ Claude: Great! Creating the job.yml specification now...
 - .deepwork/jobs/competitive_research/job.yml
 
 **Next step:**
-Run `/deepwork_jobs.review_job_spec` to validate the specification against quality criteria.
+Implement the job to generate step instruction files.
 ```
 
 ## Important Guidelines
@@ -454,5 +439,5 @@ The complete YAML specification file (example shown in Step 5 above).
 After creating the file:
 1. Inform the user that the specification is complete
 2. Recommend that they review the job.yml file
-3. Tell them to run `/deepwork_jobs.review_job_spec` next
+3. Tell them the next step is to implement the job (generate step instruction files)
 
diff --git a/src/deepwork/standard_jobs/deepwork_jobs/steps/errata.md b/src/deepwork/standard_jobs/deepwork_jobs/steps/errata.md
new file mode 100644
index 00000000..10245c62
--- /dev/null
+++ b/src/deepwork/standard_jobs/deepwork_jobs/steps/errata.md
@@ -0,0 +1,170 @@
+# Clean Up Errata
+
+## Objective
+
+Remove obsolete files and folders from prior DeepWork versions. This final step cleans up artifacts that are no longer used by the MCP-based system.
+
+## Task
+
+Identify and clean up deprecated files and folders.
+
+### Step 1: Remove Legacy Job Skill Folders
+
+Old DeepWork versions created individual skill folders for each job and step. These need to be removed while preserving the main `deepwork` skill folder.
+
+**Process:**
+
+1. **List all jobs** in `.deepwork/jobs/`:
+   ```bash
+   ls .deepwork/jobs/
+   ```
+
+2. **For each job**, kick off a sub-agent to find and remove legacy skill folders. The sub-agent should:
+   - Search in both `.claude/skills/` and `.gemini/skills/`
+   - Find folders matching:
+     - `{job_name}/` - folder named exactly like the job
+     - `{job_name}.*/` - folders starting with the job name followed by a period (e.g., `my_job.step1/`, `my_job.step2/`)
+   - Remove each matching folder
+   - Report what was removed
+
+   **Example commands for a job named `competitive_research`:**
+   ```bash
+   # Find and remove from .claude/skills/
+   rm -rf .claude/skills/competitive_research/ 2>/dev/null
+   rm -rf .claude/skills/competitive_research.*/ 2>/dev/null
+
+   # Find and remove from .gemini/skills/
+   rm -rf .gemini/skills/competitive_research/ 2>/dev/null
+   rm -rf .gemini/skills/competitive_research.*/ 2>/dev/null
+   ```
+
+3. **Run sub-agents in parallel** - one for each job to speed up the process.
+
+4. **Verify the `deepwork` skill folder remains:**
+   ```bash
+   ls -d .claude/skills/deepwork/ 2>/dev/null || echo "ERROR: deepwork skill missing!"
+   ls -d .gemini/skills/deepwork/ 2>/dev/null || echo "WARNING: gemini deepwork skill missing (may not have been installed)"
+   ```
+
+   **CRITICAL:** The `deepwork` skill folder in `.claude/skills/deepwork/` MUST still exist after cleanup. If it is missing, something went wrong - do NOT proceed and investigate what happened.
+
+**What this removes:**
+```
+.claude/skills/
+├── competitive_research/     <- REMOVE (legacy job folder)
+├── competitive_research.discover/  <- REMOVE (legacy step folder)
+├── competitive_research.analyze/   <- REMOVE (legacy step folder)
+├── deepwork/                 <- KEEP (current MCP entry point)
+└── some_other_job/           <- REMOVE (legacy job folder)
+```
+
+**Do NOT remove:**
+- `.claude/skills/deepwork/` - This is the current MCP-based skill entry point
+- `.gemini/skills/deepwork/` - Same for Gemini
+- Any skill folders that don't match job names in `.deepwork/jobs/`
+
+### Step 2: Clean Temp Files
+
+Check `.deepwork/tmp/` for accumulated temporary files:
+
+```bash
+ls -la .deepwork/tmp/ 2>/dev/null || echo "No tmp folder"
+```
+
+**Safe to delete:**
+- `.deepwork/tmp/rules/queue/*.json` - Old rules queue files
+- Any files older than 7 days
+- Empty subdirectories
+
+**Be careful with:**
+- Files that might be in-progress work
+- Anything with recent modification times
+
+```bash
+# Clean old queue files
+rm -rf .deepwork/tmp/rules/queue/*.json 2>/dev/null
+
+# Remove empty directories
+find .deepwork/tmp -type d -empty -delete 2>/dev/null
+```
+
+### Step 3: Remove Rules Folder (Fully Deprecated)
+
+DeepWork Rules have been completely removed from the system. Delete the `.deepwork/rules/` folder and all related items:
+
+```bash
+rm -rf .deepwork/rules/ 2>/dev/null
+rm -rf .deepwork/tmp/rules/ 2>/dev/null
+rm -rf .deepwork/jobs/deepwork_rules/ 2>/dev/null
+```
+
+### Step 4: Update Config Version
+
+Check `.deepwork/config.yml` for outdated version format:
+
+```bash
+cat .deepwork/config.yml
+```
+
+**Old format:**
+```yaml
+version: 1.0.0
+platforms:
+- claude
+```
+
+**Current format:**
+```yaml
+version: "1.0"
+platforms:
+  - claude
+```
+
+Update if needed to match current schema expectations.
+
+### Step 5: Remove Other Obsolete Files
+
+Check for and remove other obsolete files:
+
+| File/Pattern | Description | Action |
+|--------------|-------------|--------|
+| `.deepwork/.last_head_ref` | Git state tracking | Keep (used by MCP) |
+| `.deepwork/.last_work_tree` | Git state tracking | Keep (used by MCP) |
+| `.deepwork/.gitignore` | Ignore patterns | Review and update |
+| `.claude/commands/` | Generated commands | Keep (current system) |
+| `.claude/settings.local.json` | Local overrides | Keep (user settings) |
+
+### Step 6: Re-install DeepWork
+
+After all cleanup is complete, re-run `deepwork install` to ensure configurations are current and consistent:
+
+```bash
+deepwork install
+```
+
+**Then verify:**
+1. Check that `.deepwork/config.yml` is valid and up to date
+2. Check that `.claude/skills/deepwork/` exists and contains the expected skill entry point
+3. Check that all jobs in `.deepwork/jobs/` have valid `job.yml` files
+4. Run `deepwork install` a second time and confirm the output is clean (no errors or warnings)
+
+If any issues are found, fix them before proceeding. The goal is a clean, working DeepWork installation with no residual problems from the repair process.
+
+### Step 7: Verify Git Status
+
+Check that the cleanup hasn't left untracked garbage:
+
+```bash
+git status
+```
+
+**Review:**
+- Deleted files should show as deleted
+- No new untracked files should appear (unless intentionally created)
+- Backup files (`.backup`) should be in `.gitignore` or cleaned up
+
+## Important Notes
+
+1. **Always back up before deleting** - User data is irreplaceable
+2. **Ask before destructive actions** - When in doubt, ask the user
+3. **Don't auto-commit** - Let the user review and commit changes themselves
diff --git a/src/deepwork/standard_jobs/deepwork_jobs/steps/fix_jobs.md b/src/deepwork/standard_jobs/deepwork_jobs/steps/fix_jobs.md
new file mode 100644
index 00000000..ccb5e4a4
--- /dev/null
+++ b/src/deepwork/standard_jobs/deepwork_jobs/steps/fix_jobs.md
@@ -0,0 +1,307 @@
+# Fix Job Definitions
+
+## Objective
+
+Update all job.yml files and step instructions in `.deepwork/jobs/` to the current DeepWork format. This step migrates deprecated fields, removes references to deleted steps, and ensures all jobs are compatible with the MCP-based workflow system.
+
+## Task
+
+Audit and repair all job definitions, migrating from legacy formats to current specifications.
+
+### Step 1: Inventory All Jobs
+
+List all jobs in the project:
+
+```bash
+ls -la .deepwork/jobs/
+```
+
+For each job directory, you'll need to check and potentially fix the `job.yml` file.
+
+### Step 1.5: Process Jobs in Parallel
+
+**For each job** (except `deepwork_jobs` which should be updated via `deepwork install`), kick off a sub-agent to audit and repair that job's `job.yml` file. The sub-agent should:
+
+1. Read the job's `job.yml` file
+2. Check for and fix all issues described in Steps 2-6 below
+3. Validate the YAML is still valid after changes
+4. Report what was changed
+
+**Run sub-agents in parallel** - one for each job to speed up the process.
+
+**Example prompt for sub-agent:**
+```
+Audit and repair the job at `.deepwork/jobs/[job_name]/job.yml`:
+1. Remove any `exposed: true` fields from steps
+2. Migrate `stop_hooks` to `hooks.after_agent` format
+3. Remove references to deleted steps (like `review_job_spec`)
+4. Fix orphaned steps by adding them to workflows
+5. Migrate `outputs` from array format to map format with `type` and `description`
+6. Update any `file` inputs that reference renamed output keys
+7. Migrate `quality_criteria` arrays to `reviews` format (run_each + map criteria)
+8. Bump version and add changelog entry if changes were made
+9. Validate YAML syntax
+
+Report what changes were made.
+```
+
+### Step 2: Remove `exposed` Field
+
+The `exposed` field on steps no longer has any effect in MCP-based DeepWork. Steps are now only accessible through workflows.
+
+**Find and remove:**
+```yaml
+steps:
+  - id: some_step
+    exposed: true  # REMOVE THIS LINE
+```
+
+If a step was `exposed: true` and is not in any workflow, it should either:
+1. Be added to a workflow, OR
+2. Be removed from the job entirely
+
+### Step 3: Migrate `stop_hooks` to `hooks.after_agent`
+
+The `stop_hooks` field is deprecated. Migrate to the new `hooks` structure:
+
+**Before (deprecated):**
+```yaml
+steps:
+  - id: my_step
+    stop_hooks:
+      - prompt: "Verify the output meets quality standards"
+```
+
+**After (current format):**
+```yaml
+steps:
+  - id: my_step
+    hooks:
+      after_agent:
+        - prompt: "Verify the output meets quality standards"
+```
+
+### Step 4: Remove References to Deleted Steps
+
+Check for references to steps that no longer exist in the standard jobs:
+
+**Steps that have been removed:**
+- `review_job_spec` - Was removed from `deepwork_jobs` in v1.0.1
+
+**What to fix:**
+- Remove from workflow `steps` arrays
+- Update `from_step` references in inputs
+- Update `dependencies` arrays
+
+**Example fix:**
+```yaml
+# Before
+workflows:
+  - name: new_job
+    steps:
+      - define
+      - review_job_spec  # REMOVE
+      - implement
+
+steps:
+  - id: implement
+    inputs:
+      - file: job.yml
+        from_step: review_job_spec  # CHANGE TO: define
+    dependencies:
+      - review_job_spec  # CHANGE TO: define
+```
+
+### Step 5: Fix Orphaned Steps
+
+Steps not included in any workflow cannot be invoked via the MCP interface.
+
+**How to handle orphaned steps depends on whether the job has ANY workflows defined:**
+
+#### Case A: Job has NO workflows defined
+
+If the job has no `workflows:` section at all (or it's empty), create a **single workflow with the same name as the job** containing all steps in their defined order:
+
+```yaml
+# For a job named "my_job" with steps: step_a, step_b, step_c
+workflows:
+  - name: my_job  # Same name as the job
+    summary: "Runs the complete my_job workflow"
+    steps:
+      - step_a
+      - step_b
+      - step_c
+```
+
+This preserves the original intent of the job as a sequential workflow.
+
+#### Case B: Job has SOME workflows defined
+
+If the job already has one or more workflows defined, but some steps are not included in any of them, create a **separate single-step workflow for each orphaned step** with the same name as the step:
+
+```yaml
+# Existing workflows stay as-is, add new ones for orphans
+workflows:
+  - name: existing_workflow
+    summary: "..."
+    steps: [...]
+
+  # Add for each orphaned step:
+  - name: orphaned_step_name  # Same name as the step
+    summary: "Runs the orphaned_step_name step"
+    steps:
+      - orphaned_step_name
+```
+
+This ensures all steps remain accessible via the MCP interface while preserving the existing workflow structure.
+
+### Step 6: Migrate `outputs` from Array Format to Map Format
+
+The `outputs` field on steps changed from an array of strings/objects to a map with typed entries. Every output must now have a key (identifier), a `type` (`file` or `files`), and a `description`.
+
+**Before (legacy array format):**
+```yaml
+steps:
+  - id: define
+    outputs:
+      - job.yml
+      - steps/
+      - file: report.md
+        doc_spec: .deepwork/doc_specs/report.md
+```
+
+**After (current map format):**
+```yaml
+steps:
+  - id: define
+    outputs:
+      job.yml:
+        type: file
+        description: "The job definition file"
+      step_instruction_files:
+        type: files
+        description: "Instruction Markdown files for each step"
+      report.md:
+        type: file
+        description: "The generated report"
+```
+
+**Migration rules:**
+
+1. **Plain filename strings** (e.g., `- job.yml`, `- output.md`): Use the filename as the key, set `type: file`, add a `description`.
+2. **Directory strings ending in `/`** (e.g., `- steps/`, `- competitor_profiles/`): Choose a descriptive key name (e.g., `step_instruction_files`, `competitor_profiles`), set `type: files`, add a `description`.
+3. **Objects with `doc_spec`** (e.g., `- file: report.md` with `doc_spec: ...`): Drop the `doc_spec` field entirely, use the filename as the key, set `type: file`, add a `description`.
+4. **`description` is required** on every output entry. Write a short sentence describing what the output contains.
+
+**Update `file` inputs that reference renamed outputs:**
+
+When a directory output key changes (e.g., `steps/` becomes `step_instruction_files`), any downstream step with a `file` input referencing the old name must be updated to use the new key.
+
+```yaml
+# Before: input references old directory name
+steps:
+  - id: implement
+    inputs:
+      - file: steps/
+        from_step: define
+
+# After: input uses the new output key
+steps:
+  - id: implement
+    inputs:
+      - file: step_instruction_files
+        from_step: define
+```
+
+### Step 7: Migrate `quality_criteria` to `reviews`
+
+The flat `quality_criteria` field on steps has been replaced by the `reviews` array. Each review specifies `run_each` (what to review) and `quality_criteria` as a map of criterion name to question.
+
+**Before (deprecated):**
+```yaml
+steps:
+  - id: my_step
+    quality_criteria:
+      - "**Complete**: Is the output complete?"
+      - "**Accurate**: Is the data accurate?"
+```
+
+**After (current format):**
+```yaml
+steps:
+  - id: my_step
+    reviews:
+      - run_each: step
+        quality_criteria:
+          "Complete": "Is the output complete?"
+          "Accurate": "Is the data accurate?"
+```
+
+**Migration rules:**
+
+1. **Parse the old format**: Each string typically follows `**Name**: Question` format. Extract the name (bold text) as the map key and the question as the value.
+2. **Choose `run_each`**: Default to `step` (reviews all outputs together). If the step has a single primary output, consider using that output name instead.
+3. **For steps with no quality_criteria**: Use `reviews: []`
+4. **Remove the old field**: Delete the `quality_criteria` array entirely after migration.
+
+### Step 8: Update Version Numbers
+
+If you made significant changes to a job, bump its version number:
+
+```yaml
+# Bump patch version for minor fixes
+version: "1.0.0"  ->  version: "1.0.1"
+
+# Add changelog entry
+changelog:
+  - version: "1.0.1"
+    changes: "Migrated to current DeepWork format; removed deprecated fields"
+```
+
+## Common Issues and Fixes
+
+### Issue: Step references non-existent step in `from_step`
+```
+Error: Step 'implement' has file input from 'review_job_spec' but 'review_job_spec' is not in dependencies
+```
+**Fix:** Update `from_step` to reference a step that still exists.
+
+### Issue: Workflow references non-existent step
+```
+Error: Workflow 'new_job' references non-existent step 'review_job_spec'
+```
+**Fix:** Remove the step from the workflow's `steps` array.
+
+### Issue: Orphaned step warning
+```
+Warning: Job 'my_job' has steps not included in any workflow: standalone_step
+```
+**Fix:**
+- If the job has NO workflows: Create one workflow named `my_job` with all steps in order
+- If the job has SOME workflows: Add a `standalone_step` workflow containing just that step
+
+### Issue: `outputs` is an array instead of an object
+```
+Error: Step 'define' outputs should be an object but got array
+```
+**Fix:** Convert from the legacy array format to the map format. Each array entry becomes a key in the map with `type` (`file` or `files`) and `description`. See Step 6 for detailed migration rules. Also update any `file` inputs in downstream steps if an output key was renamed.
+
+## Jobs to Check
+
+For each job in `.deepwork/jobs/`, check:
+
+| Check | What to Look For |
+|-------|------------------|
+| `exposed` field | Remove from all steps |
+| `stop_hooks` | Migrate to `hooks.after_agent` |
+| `outputs` format | Migrate from array to map with `type` and `description` |
+| `quality_criteria` | Migrate to `reviews` with `run_each` and map-format criteria |
+| Workflow steps | Remove references to deleted steps |
+| Dependencies | Update to valid step IDs |
+| File inputs | Update `from_step` references; update keys for renamed outputs |
+| Version | Bump if changes were made |
+
+## Important Notes
+
+1. **Preserve custom logic** - When migrating hooks, preserve the prompt content
+2. **Test after changes** - Validate YAML syntax after each job fix to catch errors early
diff --git a/src/deepwork/standard_jobs/deepwork_jobs/steps/fix_settings.md b/src/deepwork/standard_jobs/deepwork_jobs/steps/fix_settings.md
new file mode 100644
index 00000000..d164b69e
--- /dev/null
+++ b/src/deepwork/standard_jobs/deepwork_jobs/steps/fix_settings.md
@@ -0,0 +1,177 @@
+# Fix Settings Files
+
+## Objective
+
+Clean up `.claude/settings.json` and related configuration files, removing legacy artifacts from prior DeepWork versions. This step ensures the Claude Code settings are free of deprecated permissions, duplicate hooks, and hardcoded paths.
+
+## Task
+
+Audit and repair the `.claude/settings.json` file, removing gunk accumulated from older DeepWork implementations.
+
+### Step 1: Create Backup
+
+Before making any changes, create a backup:
+
+```bash
+cp .claude/settings.json .claude/settings.json.backup
+```
+
+### Step 2: Inventory DeepWork Jobs
+
+First, get the list of jobs that exist in `.deepwork/jobs/`:
+
+```bash
+ls .deepwork/jobs/
+```
+
+Note these job names - you will use them to identify which `Skill(...)` entries to remove.
+
+### Step 3: Remove DeepWork Skill Permissions
+
+Look for and **remove** `Skill(...)` permission entries that match DeepWork jobs. Only remove entries where the skill name matches a job in `.deepwork/jobs/`.
+
+**What to look for:**
+```json
+"permissions": {
+  "allow": [
+    "Skill(deepwork_jobs)",           // Remove if 'deepwork_jobs' is in .deepwork/jobs/
+    "Skill(deepwork_jobs.define)",    // Remove - matches job_name.step pattern
+    "Skill(competitive_research)",    // Remove if 'competitive_research' is in .deepwork/jobs/
+    "Skill(my_custom_skill)",         // KEEP - not a DeepWork job
+    ...
+  ]
+}
+```
+
+**IMPORTANT:** Only remove skills that:
+- Exactly match a job name in `.deepwork/jobs/` (e.g., `Skill(job_name)`)
+- Match the pattern `job_name.step_name` where `job_name` is in `.deepwork/jobs/`
+
+**DO NOT remove** skills that don't match DeepWork jobs - the user may have created these manually for other purposes.
+
+### Step 4: Remove Duplicate Hooks
+
+Check for duplicate hook entries in the `hooks` section. Prior versions sometimes added the same hook multiple times.
+
+**Example of duplicates to consolidate:**
+```json
+"hooks": {
+  "UserPromptSubmit": [
+    {
+      "matcher": "",
+      "hooks": [{ "type": "command", "command": "some_command" }]
+    },
+    {
+      "matcher": "",
+      "hooks": [{ "type": "command", "command": "some_command" }]  // DUPLICATE
+    }
+  ]
+}
+```
+
+Keep only one instance of each unique hook.
+
+### Step 5: Remove Hardcoded User Paths
+
+Search for and remove any hardcoded paths that reference specific user directories:
+
+**Patterns to find and remove:**
+- `/Users/username/.local/pipx/venvs/deepwork/bin/python`
+- `/home/username/.local/...`
+- Any path containing a specific username
+
+These should either be removed or replaced with relative paths.
+
+### Step 6: Remove DeepWork Rules Hooks (Fully Deprecated)
+
+DeepWork Rules have been completely removed from the system. Remove ALL hooks related to rules:
+
+**Hooks to remove entirely:**
+- Any hook with command `deepwork hook rules_check`
+- Any hook with command containing `rules_check`
+- Any hook referencing `.deepwork/jobs/deepwork_rules/hooks/`
+- Any hook referencing `.deepwork/rules/`
+
+**Also remove these permissions if present:**
+- `Skill(deepwork_rules)`
+- `Skill(deepwork_rules.define)`
+- `Bash(rm -rf .deepwork/tmp/rules/queue/*.json)`
+
+### Step 7: Remove Other Deprecated Commands
+
+Remove hooks referencing other deprecated DeepWork commands:
+
+**Commands to remove:**
+- `deepwork hook *` - The entire hook subcommand is deprecated
+- References to any `.deepwork/jobs/*/hooks/` scripts
+
+### Step 8: Clean Up Empty Sections
+
+If after cleanup any sections are empty, consider removing them:
+
+```json
+// Remove if empty:
+"hooks": {
+  "Stop": []  // Remove this empty array
+}
+```
+
+### Step 9: Validate JSON
+
+After all edits, ensure the file is valid JSON:
+
+```bash
+python -c "import json; json.load(open('.claude/settings.json'))"
+```
+
+If there are syntax errors, fix them before proceeding.
+
+## Example Before/After
+
+### Before (with gunk):
+```json
+{
+  "hooks": {
+    "UserPromptSubmit": [
+      { "matcher": "", "hooks": [{ "type": "command", "command": ".deepwork/jobs/deepwork_rules/hooks/user_prompt_submit.sh" }] },
+      { "matcher": "", "hooks": [{ "type": "command", "command": ".deepwork/jobs/deepwork_rules/hooks/user_prompt_submit.sh" }] }
+    ],
+    "Stop": [
+      { "matcher": "", "hooks": [{ "type": "command", "command": "deepwork hook rules_check" }] }
+    ],
+    "SubagentStop": [
+      { "matcher": "", "hooks": [{ "type": "command", "command": "/Users/tyler/.local/pipx/venvs/deepwork/bin/python -m deepwork.hooks.rules_check" }] }
+    ]
+  },
+  "permissions": {
+    "allow": [
+      "Skill(competitive_research)",
+      "Skill(competitive_research.discover_competitors)",
+      "Skill(deepwork_jobs)",
+      "Skill(deepwork_jobs.define)",
+      "Read(./.deepwork/**)",
+      "WebSearch"
+    ]
+  }
+}
+```
+
+### After (cleaned):
+```json
+{
+  "hooks": {},
+  "permissions": {
+    "allow": [
+      "Read(./.deepwork/**)",
+      "WebSearch"
+    ]
+  }
+}
+```
+
+## Important Notes
+
+1. **Don't remove non-DeepWork permissions** - Keep permissions like `WebSearch`, `Read(...)`, `Bash(...)` that aren't related to old DeepWork skills
+2. **Preserve `make_new_job.sh`** - Keep any `Bash(...)` permission referencing `make_new_job.sh` (e.g., `Bash(.deepwork/jobs/deepwork_jobs/scripts/make_new_job.sh *)`) - this is a current DeepWork script
+3. **Be conservative** - If unsure whether something is legacy, ask the user
+4. **Document changes** - Note what was removed for the final summary
diff --git a/src/deepwork/standard_jobs/deepwork_jobs/steps/implement.md b/src/deepwork/standard_jobs/deepwork_jobs/steps/implement.md
index 749c8c6f..73eeb365 100644
--- a/src/deepwork/standard_jobs/deepwork_jobs/steps/implement.md
+++ b/src/deepwork/standard_jobs/deepwork_jobs/steps/implement.md
@@ -2,37 +2,16 @@
 
 ## Objective
 
-Generate the DeepWork job directory structure and instruction files for each step based on the validated `job.yml` specification from the review_job_spec step.
+Generate step instruction files for each step based on the `job.yml` specification from the define step.
 
 ## Task
 
-Read the `job.yml` specification file and create all the necessary files to make the job functional, including directory structure and step instruction files. Then sync the commands to make them available.
+Read the `job.yml` specification file created by the define step and generate comprehensive instruction files for each step. The define step has already created the job directory structure.
 
-### Step 1: Create Directory Structure Using Script
-
-Run the `make_new_job.sh` script to create the standard directory structure:
-
-```bash
-.deepwork/jobs/deepwork_jobs/make_new_job.sh [job_name]
-```
-
-This creates:
-- `.deepwork/jobs/[job_name]/` - Main job directory
-- `.deepwork/jobs/[job_name]/steps/` - Step instruction files
-- `.deepwork/jobs/[job_name]/hooks/` - Custom validation scripts (with .gitkeep)
-- `.deepwork/jobs/[job_name]/templates/` - Example file formats (with .gitkeep)
-- `.deepwork/jobs/[job_name]/AGENTS.md` - Job management guidance
-
-**Note**: If the directory already exists (e.g., job.yml was created by define step), you can skip this step or manually create the additional directories:
-```bash
-mkdir -p .deepwork/jobs/[job_name]/hooks .deepwork/jobs/[job_name]/templates
-touch .deepwork/jobs/[job_name]/hooks/.gitkeep .deepwork/jobs/[job_name]/templates/.gitkeep
-```
-
-### Step 2: Read and Validate the Specification
+### Step 1: Read and Validate the Specification
 
 1. **Locate the job.yml file**
-   - Read `.deepwork/jobs/[job_name]/job.yml` from the review_job_spec step
+   - Read `.deepwork/jobs/[job_name]/job.yml` from the define step
    - Parse the YAML content
 
 2. **Validate the specification**
@@ -46,7 +25,7 @@ touch .deepwork/jobs/[job_name]/hooks/.gitkeep .deepwork/jobs/[job_name]/templat
    - List of all steps with their details
    - Understand the workflow structure
 
-### Step 3: Generate Step Instruction Files
+### Step 2: Generate Step Instruction Files
 
 For each step in the job.yml, create a comprehensive instruction file at `.deepwork/jobs/[job_name]/steps/[step_id].md`.
 
@@ -65,30 +44,31 @@ For each step in the job.yml, create a comprehensive instruction file at `.deepw
 
 1. **Use the job description** - The detailed description from job.yml provides crucial context
 2. **Be specific** - Don't write generic instructions; tailor them to the step's purpose
-3. **Provide examples** - Show what good output looks like
+3. **Provide output format examples** - Include a markdown code block in an "Output Format" section showing the expected file structure. A template with `[bracket placeholders]` is acceptable. For complex outputs, also include a concrete filled-in example showing realistic data — this is especially valuable for the first step in a workflow where there's no prior output to reference.
 4. **Explain the "why"** - Help the user understand the step's role in the workflow
 5. **Quality over quantity** - Detailed, actionable instructions are better than vague ones
-6. **Align with stop hooks** - If the step has `stop_hooks` defined, ensure the quality criteria in the instruction file match the validation criteria in the hooks
-7. **Ask structured questions** - When a step has user inputs, the instructions MUST explicitly tell the agent to "ask structured questions" using the AskUserQuestion tool to gather that information. Never use generic phrasing like "ask the user" - always use "ask structured questions"
+6. **Align with reviews** - If the step has `reviews` defined, ensure the quality criteria in the instruction file match the review criteria
+7. **Ask structured questions (when applicable)** - When a step has user-provided inputs (name/description inputs in job.yml), the instructions MUST explicitly tell the agent to "ask structured questions" using the AskUserQuestion tool. Steps that only have file inputs from prior steps do NOT need this phrase — they process data without user interaction.
+8. **Handle edge cases** - If inputs might be missing, ambiguous, or incomplete, tell the agent to ask structured questions to clarify how to proceed rather than guessing
 
-### Handling Stop Hooks
+### Handling Reviews
 
-If a step in the job.yml has `stop_hooks` defined, the generated instruction file should:
+If a step in the job.yml has `reviews` defined, the generated instruction file should:
 
-1. **Mirror the quality criteria** - The "Quality Criteria" section should match what the stop hooks will validate
+1. **Mirror the quality criteria** - The "Quality Criteria" section should match what the reviews will validate
 2. **Be explicit about success** - Help the agent understand when the step is truly complete
-3. **Include the promise pattern** - Mention that `<promise>✓ Quality Criteria Met</promise>` should be included when criteria are met
+3. **Explain what's reviewed** - If reviews target specific outputs (via `run_each`), mention which outputs will be reviewed
 
 **Example: If the job.yml has:**
 ```yaml
 - id: research_competitors
   name: "Research Competitors"
-  stop_hooks:
-    - prompt: |
-        Verify the research meets criteria:
-        1. Each competitor has at least 3 data points
-        2. Sources are cited
-        3. Information is current (within last year)
+  reviews:
+    - run_each: research_notes.md
+      quality_criteria:
+        "Sufficient Data": "Does each competitor have at least 3 data points?"
+        "Sources Cited": "Are sources cited for key claims?"
+        "Current Information": "Is the information current (within last year)?"
 ```
 
 **The instruction file should include:**
@@ -98,7 +78,6 @@ If a step in the job.yml has `stop_hooks` defined, the generated instruction fil
 - Each competitor has at least 3 distinct data points
 - All information is sourced with citations
 - Data is current (from within the last year)
-- When all criteria are met, include `<promise>✓ Quality Criteria Met</promise>` in your response
 ```
 
 This alignment ensures the AI agent knows exactly what will be validated and can self-check before completing.
@@ -109,82 +88,11 @@ Step instructions can include additional `.md` files in the `steps/` directory f
 
 See `.deepwork/jobs/deepwork_jobs/steps/supplemental_file_references.md` for detailed documentation and examples.
 
-### Step 4: Verify job.yml Location
-
-Verify that `job.yml` is in the correct location at `.deepwork/jobs/[job_name]/job.yml`. The define and review_job_spec steps should have created and validated it. If for some reason it's not there, you may need to create or move it.
-
-### Step 5: Sync Skills
-
-Run `deepwork sync` to generate the skills for this job:
-
-```bash
-deepwork sync
-```
-
-This will:
-- Parse the job definition
-- Generate skills for each step
-- Make the skills available in `.claude/skills/` (or appropriate platform directory)
-
-### Step 6: Consider Rules for the New Job
-
-After implementing the job, consider whether there are **rules** that would help enforce quality or consistency when working with this job's domain.
-
-**What are rules?**
-
-Rules are automated guardrails stored as markdown files in `.deepwork/rules/` that trigger when certain files change during an AI session. They help ensure:
-- Documentation stays in sync with code
-- Team guidelines are followed
-- Architectural decisions are respected
-- Quality standards are maintained
-
-**When to suggest rules:**
+### Step 3: Verify Files
 
-Think about the job you just implemented and ask:
-- Does this job produce outputs that other files depend on?
-- Are there documentation files that should be updated when this job's outputs change?
-- Are there quality checks or reviews that should happen when certain files in this domain change?
-- Could changes to the job's output files impact other parts of the project?
-
-**Examples of rules that might make sense:**
-
-| Job Type | Potential Rule |
-|----------|----------------|
-| API Design | "Update API docs when endpoint definitions change" |
-| Database Schema | "Review migrations when schema files change" |
-| Competitive Research | "Update strategy docs when competitor analysis changes" |
-| Feature Development | "Update changelog when feature files change" |
-| Configuration Management | "Update install guide when config files change" |
-
-**How to offer rule creation:**
-
-If you identify one or more rules that would benefit the user, explain:
-1. **What the rule would do** - What triggers it and what action it prompts
-2. **Why it would help** - How it prevents common mistakes or keeps things in sync
-3. **What files it would watch** - The trigger patterns
-
-Then ask the user:
-
-> "Would you like me to create this rule for you? I can run `/deepwork_rules.define` to set it up."
-
-If the user agrees, invoke the `/deepwork_rules.define` command to guide them through creating the rule.
-
-**Example dialogue:**
-
-```
-Based on the competitive_research job you just created, I noticed that when
-competitor analysis files change, it would be helpful to remind you to update
-your strategy documentation.
-
-I'd suggest a rule like:
-- **Name**: "Update strategy when competitor analysis changes"
-- **Trigger**: `**/positioning_report.md`
-- **Action**: Prompt to review and update `docs/strategy.md`
-
-Would you like me to create this rule? I can run `/deepwork_rules.define` to set it up.
-```
-
-**Note:** Not every job needs rules. Only suggest them when they would genuinely help maintain consistency or quality. Don't force rules where they don't make sense.
+Verify that all files are in their correct locations:
+- `job.yml` at `.deepwork/jobs/[job_name]/job.yml` (created by define step)
+- Step instruction files at `.deepwork/jobs/[job_name]/steps/[step_id].md`
 
 ## Example Implementation
 
@@ -201,33 +109,13 @@ For a complete worked example showing a job.yml and corresponding step instructi
 5. **Use context** - The job description provides valuable context for each step
 6. **Be specific** - Tailor instructions to the specific step, not generic advice
 
-## Validation Before Sync
-
-Before running `deepwork sync`, verify:
-- All directories exist
-- `job.yml` is in place
-- All step instruction files exist (one per step)
-- No file system errors
-
 ## Completion Checklist
 
 Before marking this step complete, ensure:
-- [ ] job.yml validated and copied to job directory
+- [ ] job.yml validated and in job directory
 - [ ] All step instruction files created
 - [ ] Each instruction file is complete and actionable
-- [ ] `deepwork sync` executed successfully
-- [ ] Skills generated in platform directory
-- [ ] Considered whether rules would benefit this job (Step 6)
-- [ ] If rules suggested, offered to run `/deepwork_rules.define`
 
-## Quality Criteria
+## Note: Workflow Availability
 
-- Job directory structure is correct
-- All instruction files are complete (not stubs)
-- Instructions are specific and actionable
-- Output examples are provided in each instruction file
-- Quality criteria defined for each step
-- Steps with user inputs explicitly use "ask structured questions" phrasing
-- Sync completed successfully
-- Skills available for use
-- Thoughtfully considered relevant rules for the job domain
+Once the job.yml and step instruction files are created, the workflow is immediately available through the DeepWork MCP server. The MCP server reads job definitions directly from `.deepwork/jobs/` - no separate sync or installation step is required.
\ No newline at end of file
diff --git a/src/deepwork/standard_jobs/deepwork_jobs/steps/iterate.md b/src/deepwork/standard_jobs/deepwork_jobs/steps/iterate.md
new file mode 100644
index 00000000..73dcb589
--- /dev/null
+++ b/src/deepwork/standard_jobs/deepwork_jobs/steps/iterate.md
@@ -0,0 +1,226 @@
+# Iterate on Workflow Design
+
+## Objective
+
+Review the test run conversation and improve the job definition based on what happened. This step closes the feedback loop by incorporating learnings from the test into the workflow itself, making future runs more efficient and producing better results.
+
+## Task
+
+Analyze the conversation history from the test step, identify areas for improvement, and update the job definition and step instructions accordingly.
+
+### Step 1: Review the Conversation History
+
+Carefully analyze the conversation from the test step, looking for:
+
+1. **Process Inefficiencies**
+   - Steps that took multiple attempts to complete
+   - Questions the agent had to ask that should have been in the instructions
+   - Unnecessary back-and-forth with the user
+   - Information that had to be repeated
+
+2. **Output Quality Issues**
+   - Issues identified during critique (from Step 3 of test)
+   - Corrections requested by the user
+   - Patterns in user feedback (what did they consistently want changed?)
+
+3. **Tool Usage Problems**
+   - Tools that didn't work as expected
+   - Missing tools that would have helped
+   - Inefficient tool sequences
+
+4. **Missing or Unclear Instructions**
+   - Ambiguities that led to wrong outputs
+   - Missing guidance that caused confusion
+   - Quality criteria that weren't clear enough
+
+### Step 2: Plan Improvements
+
+For each issue identified, determine the appropriate fix:
+
+| Issue Type | Solution Location |
+|------------|-------------------|
+| Process inefficiency | Update step instructions with clearer guidance |
+| Output quality | Update quality criteria or add examples |
+| Missing information | Add to step inputs or instructions |
+| Tool problems | Suggest different tools in instructions |
+| Unclear criteria | Rewrite quality criteria to be specific |
+
+**Prioritize improvements** that will have the most impact on future runs. Focus on:
+- Issues that caused multiple iterations
+- Problems that affected the final output quality
+- Confusion that could be eliminated with clearer instructions
+
+### Step 3: Update Step Instructions
+
+For each step that needs improvement:
+
+1. **Read the current instruction file** at `.deepwork/jobs/[job_name]/steps/[step_id].md`
+
+2. **Make targeted improvements**:
+   - Add missing context or clarification
+   - Include examples of good output (use what worked in the test)
+   - Clarify ambiguous instructions
+   - Add tool recommendations if a different approach would be better
+   - Update quality criteria to match user expectations
+
+3. **Keep instructions concise**:
+   - Avoid redundancy
+   - Be direct and actionable
+   - Use bullet points where appropriate
+
+### Step 4: Update Reviews
+
+Review and update quality reviews in two places:
+
+1. **In step instruction files** - The "Quality Criteria" section should reflect what the user actually cared about during testing
+
+2. **In job.yml** - Update the `reviews` array on each step to:
+   - Remove criteria that weren't relevant
+   - Add criteria based on user feedback
+   - Make existing criteria more specific
+   - Adjust `run_each` targeting if outputs should be reviewed differently
+
+**Example improvement:**
+```yaml
+# Before
+reviews:
+  - run_each: step
+    quality_criteria:
+      "Formatted Correctly": "Is the report formatted correctly?"
+
+# After
+reviews:
+  - run_each: report.md
+    quality_criteria:
+      "Distinct Colors": "Does the report use distinct colors for each data series in charts?"
+      "Readable Tables": "Do tables have sufficient padding and font size for readability?"
+      "Clear Summary": "Is the executive summary understandable by non-technical readers?"
+```
+
+### Step 5: Consider Alternative Tools
+
+If any tools didn't work well during the test:
+
+1. **Identify the problem** - What went wrong? (slow, wrong output, hard to use)
+
+2. **Research alternatives** - What other tools could accomplish the same goal?
+
+3. **Update instructions** - If a better tool exists, update the step instructions to recommend it
+
+Examples:
+- If web scraping was unreliable, suggest a specific browser automation approach
+- If data processing was slow, suggest a different method or tool
+- If file generation had issues, recommend a different library or format
+
+### Step 6: Update Job Version and Changelog
+
+After making improvements:
+
+1. **Bump the version** in `job.yml`:
+   - Patch version (x.x.1) for minor instruction tweaks
+   - Minor version (x.1.0) for quality criteria changes or significant improvements
+
+2. **Add a changelog entry** describing what was improved:
+   ```yaml
+   changelog:
+     - version: "1.0.1"
+       changes: "Improved [step] instructions based on test run: added examples, clarified criteria, updated tool recommendations"
+   ```
+
+### Step 7: Provide Recap
+
+Summarize the improvements made:
+
+1. **List the issues identified** from the test run
+
+2. **Describe the changes made** to address each issue
+
+3. **Explain the expected impact** on future runs
+
+Example recap format:
+```
+## Iteration Summary
+
+### Issues Identified
+1. Chart colors were hard to distinguish
+2. Tables were too small to read
+3. Executive summary was too technical
+
+### Changes Made
+
+**gather_data step:**
+- Added instruction to use distinct color palettes for charts
+- Updated quality criteria to require readable table formatting
+
+**generate_report step:**
+- Added example of good executive summary
+- Updated quality criteria: "Summary understandable by non-technical stakeholders"
+
+**job.yml:**
+- Updated version to 1.0.1
+- Added changelog entry
+
+### Expected Impact
+Future runs should produce reports with better visual design and clearer summaries, reducing the need for post-generation corrections.
+```
+
+## Example Dialog
+
+```
+Claude: Let me review our test run and improve the workflow based on what happened.
+
+**Analyzing Test Conversation...**
+
+I identified the following issues from the test run:
+
+### Issues Found
+
+1. **Chart readability (gather_data step)**: The instructions didn't specify how to handle color choices for visualizations. During the test, we had to fix colors that were too similar.
+
+2. **Missing month-over-month comparison (analyze_trends step)**: The user wanted comparisons to previous periods, but this wasn't in the instructions. We had to add it after the fact.
+
+3. **Technical executive summary (generate_report step)**: The summary was too technical for non-technical stakeholders. The quality criteria didn't specify the audience clearly.
+
+4. **Table formatting (generate_report step)**: Tables were hard to read. No guidance on sizing or padding.
+
+### Changes Made
+
+**`.deepwork/jobs/monthly_report/steps/gather_data.md`:**
+- Added: "Use distinct, high-contrast colors for chart data series. Avoid similar shades for different categories."
+- Added to Quality Criteria: "Charts use visually distinct colors that are accessible to colorblind readers"
+
+**`.deepwork/jobs/monthly_report/steps/analyze_trends.md`:**
+- Added: "Always include comparison to the same period in the previous month/quarter"
+- Added input: "previous_period_data" to formalize this requirement
+
+**`.deepwork/jobs/monthly_report/steps/generate_report.md`:**
+- Added example of good executive summary targeting non-technical readers
+- Added: "Format tables with sufficient padding (at least 8px) and readable font size (minimum 11pt)"
+- Updated Quality Criteria:
+  - "Executive summary is understandable by non-technical stakeholders"
+  - "Tables are readable with adequate padding and font size"
+
+**`.deepwork/jobs/monthly_report/job.yml`:**
+- Version bumped to 1.0.1
+- Added changelog: "Improved visual design guidance and audience-appropriate writing based on test feedback"
+
+Done! The workflow has been updated. Changes are immediately available through the MCP server.
+
+### Expected Impact
+
+Future monthly reports should:
+- Have better chart color choices from the start
+- Include month-over-month comparisons automatically
+- Have clearer executive summaries for leadership
+- Have more readable tables
+
+This should significantly reduce the number of corrections needed after the initial output.
+```
+
+## Important Guidelines
+
+1. **Be specific** - Don't just note "instructions were unclear" - explain exactly what was unclear and how you fixed it
+2. **Use test results as examples** - If something worked well in the test, add it as an example in the instructions
+3. **Don't over-engineer** - Only add improvements that address actual problems from the test
+4. **Maintain consistency** - Keep the same structure and style in instruction files
+5. **Test-driven improvement** - Every change should trace back to something that happened in the test
diff --git a/src/deepwork/standard_jobs/deepwork_jobs/steps/learn.md b/src/deepwork/standard_jobs/deepwork_jobs/steps/learn.md
index bfb393a5..254a332d 100644
--- a/src/deepwork/standard_jobs/deepwork_jobs/steps/learn.md
+++ b/src/deepwork/standard_jobs/deepwork_jobs/steps/learn.md
@@ -66,15 +66,6 @@ For each learning identified, determine if it is:
   - "Quality criteria should include checking for Y"
   - "Add example of correct output format"
 
-**doc spec-Related** (should improve doc spec files):
-- Improvements to document quality criteria
-- Changes to document structure or format
-- Updated audience or frequency information
-- Examples:
-  - "The report should include a summary table"
-  - "Quality criterion 'Visualization' needs clearer requirements"
-  - "Documents need a section for action items"
-
 **Bespoke** (should go in AGENTS.md):
 - Specific to THIS project/codebase/run
 - Depends on local conventions or structure
@@ -85,30 +76,6 @@ For each learning identified, determine if it is:
   - "This project uses camelCase for function names"
   - "The main config file is at `config/settings.yml`"
 
-### Step 3.5: Identify doc spec-Related Learnings
-
-Review the conversation for doc spec-related improvements:
-
-1. **Quality Criteria Changes**
-   - Were any quality criteria unclear or insufficient?
-   - Did the agent repeatedly fail certain criteria?
-   - Are there new criteria that should be added?
-
-2. **Document Structure Changes**
-   - Did the user request different sections?
-   - Were parts of the document format confusing?
-   - Should the example document be updated?
-
-3. **Metadata Updates**
-   - Has the target audience changed?
-   - Should frequency or path patterns be updated?
-
-**Signals for doc spec improvements:**
-- User asked for changes to document format
-- Repeated validation failures on specific criteria
-- Feedback about missing sections or information
-- Changes to how documents are organized/stored
-
 ### Step 4: Update Job Instructions (Generalizable Learnings)
 
 For each generalizable learning:
@@ -121,6 +88,7 @@ For each generalizable learning:
    - Include helpful examples
    - Clarify ambiguous instructions
    - Update quality criteria if needed
+   - If you identify problems in the outcomes of steps, those usually should be reflected in an update to the `reviews` for that step in `job.yml` (adjusting criteria names, questions, or `run_each` targeting)
 
 3. **Keep instructions concise**
    - Avoid redundancy - don't repeat the same guidance in multiple places
@@ -162,41 +130,6 @@ Review all instruction files for the job and identify content that:
    - Shorter instruction files - easier to read and maintain
    - Consistent guidance across steps
 
-### Step 4.5: Update doc spec Files (doc spec-Related Learnings)
-
-If doc spec-related learnings were identified:
-
-1. **Locate the doc spec file**
-   - Find doc spec references in job.yml outputs (look for `doc_spec: .deepwork/doc_specs/[doc_spec_name].md`)
-   - doc spec files are at `.deepwork/doc_specs/[doc_spec_name].md`
-
-2. **Update quality_criteria array**
-   - Add new criteria with name and description
-   - Modify existing criteria descriptions for clarity
-   - Remove criteria that are no longer relevant
-
-3. **Update example document**
-   - Modify the markdown body to reflect structure changes
-   - Ensure the example matches updated criteria
-
-4. **Update metadata as needed**
-   - target_audience: If audience has changed
-   - frequency: If production cadence has changed
-   - path_patterns: If storage location has changed
-
-**Example doc spec update:**
-```yaml
-# Before
-quality_criteria:
-  - name: Visualization
-    description: Include charts
-
-# After
-quality_criteria:
-  - name: Visualization
-    description: Include Mermaid.js charts showing spend breakdown by service and month-over-month trend
-```
-
 ### Step 5: Create/Update AGENTS.md (Bespoke Learnings)
 
 The AGENTS.md file captures project-specific knowledge that helps future agent runs.
@@ -233,13 +166,6 @@ If instruction files were modified:
      changes: "Improved [step] instructions based on execution learnings: [brief description]"
    ```
 
-### Step 7: Sync Skills
-
-**Run deepwork sync** (if instructions were modified)
-```bash
-deepwork sync
-```
-
 ## File Reference Patterns
 
 When adding entries to AGENTS.md, prefer these patterns:
@@ -258,23 +184,10 @@ When adding entries to AGENTS.md, prefer these patterns:
 - Configuration requires these fields: name, version, ...
 ```
 
-## Quality Criteria
-
-- Conversation has been analyzed for job executions
-- Points of confusion and inefficiency are identified
-- Learnings are correctly classified (generalizable vs bespoke)
-- Job instructions updated for generalizable improvements
-- Instructions are concise - no redundancy or unnecessary verbosity
-- Shared/lengthy content extracted into referenced files where appropriate
-- AGENTS.md created/updated with bespoke learnings
-- File references used instead of duplicating content
-- AGENTS.md is in the correct folder (the deepest common folder for the topic)
-- When all criteria are met, include `<promise>✓ Quality Criteria Met</promise>`
-
 ## Example Dialog
 
 ```
-User: /deepwork_jobs.learn
+User: [Invokes the learn workflow via MCP]
 
 Claude: I'll analyze this conversation for learnings from DeepWork job executions.
 
@@ -324,8 +237,6 @@ I found the following job executions:
 
 3. Updated job.yml version to 1.0.1 with changelog entry
 
-4. Ran `deepwork sync`
-
 **Summary**
 
 Updated job instructions and created AGENTS.md with bespoke learnings.
diff --git a/src/deepwork/standard_jobs/deepwork_jobs/steps/review_job_spec.md b/src/deepwork/standard_jobs/deepwork_jobs/steps/review_job_spec.md
deleted file mode 100644
index fcc0ae9c..00000000
--- a/src/deepwork/standard_jobs/deepwork_jobs/steps/review_job_spec.md
+++ /dev/null
@@ -1,208 +0,0 @@
-# Review Job Specification
-
-## Objective
-
-Review the `job.yml` created in the define step against the doc spec quality criteria using a sub-agent for unbiased evaluation, then iterate on fixes until all criteria pass.
-
-## Why This Step Exists
-
-The define step focuses on understanding user requirements and creating a job specification. This review step ensures the specification meets quality standards before implementation. Using a sub-agent provides an unbiased "fresh eyes" review that catches issues the main agent might miss after being deeply involved in the definition process.
-
-## Task
-
-Use a sub-agent to review the job.yml against all 9 doc spec quality criteria, then fix any failed criteria. Repeat until all criteria pass.
-
-### Step 1: Read the Job Specification
-
-Read the `job.yml` file created in the define step:
-
-```
-.deepwork/jobs/[job_name]/job.yml
-```
-
-Also read the doc spec for reference:
-
-```
-.deepwork/doc_specs/job_spec.md
-```
-
-### Step 2: Spawn Review Sub-Agent
-
-Use the Task tool to spawn a sub-agent that will provide an unbiased review:
-
-```
-Task tool parameters:
-- subagent_type: "general-purpose"
-- model: "haiku"
-- description: "Review job.yml against doc spec"
-- prompt: [see below]
-```
-
-**Sub-agent prompt template:**
-
-```
-Review this job.yml against the following 9 quality criteria from the doc spec.
-
-For each criterion, respond with:
-- PASS or FAIL
-- If FAIL: specific issue and suggested fix
-
-## job.yml Content
-
-[paste the full job.yml content here]
-
-## Quality Criteria
-
-1. **Valid Identifier**: Job name must be lowercase with underscores, no spaces or special characters (e.g., `competitive_research`, `monthly_report`)
-
-2. **Semantic Version**: Version must follow semantic versioning format X.Y.Z (e.g., `1.0.0`, `2.1.3`)
-
-3. **Concise Summary**: Summary must be under 200 characters and clearly describe what the job accomplishes
-
-4. **Rich Description**: Description must be multi-line and explain: the problem solved, the process, expected outcomes, and target users
-
-5. **Changelog Present**: Must include a changelog array with at least the initial version entry
-
-6. **Complete Steps**: Each step must have: id (lowercase_underscores), name, description, instructions_file, outputs (at least one), and dependencies array
-
-7. **Valid Dependencies**: Dependencies must reference existing step IDs with no circular references
-
-8. **Input Consistency**: File inputs with `from_step` must reference a step that is in the dependencies array
-
-9. **Output Paths**: Outputs must be valid filenames or paths (e.g., `report.md` or `reports/analysis.md`)
-
-## Response Format
-
-Respond with a structured evaluation:
-
-### Overall: [X/9 PASS]
-
-### Criterion Results
-
-1. Valid Identifier: [PASS/FAIL]
-   [If FAIL: Issue and fix]
-
-2. Semantic Version: [PASS/FAIL]
-   [If FAIL: Issue and fix]
-
-[... continue for all 9 criteria ...]
-
-### Summary of Required Fixes
-
-[List any fixes needed, or "No fixes required - all criteria pass"]
-```
-
-### Step 3: Review Sub-Agent Findings
-
-Parse the sub-agent's response:
-
-1. **Count passing criteria** - How many of the 9 criteria passed?
-2. **Identify failures** - List specific criteria that failed
-3. **Note suggested fixes** - What changes does the sub-agent recommend?
-
-### Step 4: Fix Failed Criteria
-
-For each failed criterion, edit the job.yml to address the issue:
-
-**Common fixes by criterion:**
-
-| Criterion | Common Issue | Fix |
-|-----------|-------------|-----|
-| Valid Identifier | Spaces or uppercase | Convert to lowercase_underscores |
-| Semantic Version | Missing or invalid format | Set to `"1.0.0"` or fix format |
-| Concise Summary | Too long or vague | Shorten to <200 chars, be specific |
-| Rich Description | Single line or missing context | Add multi-line explanation with problem/process/outcome/users |
-| Changelog Present | Missing changelog | Add `changelog:` with initial version entry |
-| Complete Steps | Missing required fields | Add id, name, description, instructions_file, outputs, dependencies |
-| Valid Dependencies | Non-existent step or circular | Fix step ID reference or reorder dependencies |
-| Input Consistency | from_step not in dependencies | Add the referenced step to dependencies array |
-| Output Paths | Invalid characters or format | Use valid filename/path format |
-
-### Step 5: Re-Run Review (If Needed)
-
-If any criteria failed:
-
-1. **Spawn a new sub-agent** with the updated job.yml content
-2. **Review the new findings**
-3. **Fix any remaining issues**
-4. **Repeat until all 9 criteria pass**
-
-### Step 6: Confirm Completion
-
-When all 9 criteria pass:
-
-1. **Announce success**: "All 9 doc spec quality criteria pass."
-2. **List what was validated**:
-   - Valid Identifier
-   - Semantic Version
-   - Concise Summary
-   - Rich Description
-   - Changelog Present
-   - Complete Steps
-   - Valid Dependencies
-   - Input Consistency
-   - Output Paths
-3. **Include the promise**: `<promise>Quality Criteria Met</promise>`
-4. **Guide to next step**: "Run `/deepwork_jobs.implement` to generate the step instruction files."
-
-## Example Review Cycle
-
-```
-Claude: Let me review the job.yml against the doc spec quality criteria.
-
-[Spawns sub-agent with haiku model]
-
-Sub-agent response:
-### Overall: 7/9 PASS
-
-### Criterion Results
-1. Valid Identifier: PASS
-2. Semantic Version: PASS
-3. Concise Summary: FAIL
-   Issue: Summary is 245 characters, exceeds 200 char limit
-   Fix: Shorten to "Competitive analysis workflow for product positioning"
-4. Rich Description: PASS
-5. Changelog Present: FAIL
-   Issue: No changelog array present
-   Fix: Add changelog with initial version entry
-6. Complete Steps: PASS
-7. Valid Dependencies: PASS
-8. Input Consistency: PASS
-9. Output Paths: PASS
-
-### Summary of Required Fixes
-1. Shorten summary to under 200 characters
-2. Add changelog array with initial version
-
-Claude: I found 2 issues. Let me fix them.
-
-[Edits job.yml to shorten summary and add changelog]
-
-Claude: Let me verify the fixes with another review.
-
-[Spawns new sub-agent]
-
-Sub-agent response:
-### Overall: 9/9 PASS
-
-### Summary of Required Fixes
-No fixes required - all criteria pass
-
-Claude: All 9 doc spec quality criteria pass.
-
-<promise>Quality Criteria Met</promise>
-
-**Next step:** Run `/deepwork_jobs.implement` to generate the step instruction files.
-```
-
-## Quality Criteria
-
-- **Sub-Agent Used**: A sub-agent was spawned to provide unbiased review (not just self-review)
-- **All doc spec Criteria Evaluated**: The sub-agent assessed all 9 quality criteria from the doc spec
-- **Findings Addressed**: All failed criteria were fixed by the main agent
-- **Validation Loop Complete**: The review-fix cycle continued until all criteria passed
-- **Promise Included**: The response includes `<promise>Quality Criteria Met</promise>` when complete
-
-## Output
-
-The validated `job.yml` file at `.deepwork/jobs/[job_name]/job.yml` that passes all 9 doc spec quality criteria.
diff --git a/src/deepwork/standard_jobs/deepwork_jobs/steps/test.md b/src/deepwork/standard_jobs/deepwork_jobs/steps/test.md
new file mode 100644
index 00000000..36d27128
--- /dev/null
+++ b/src/deepwork/standard_jobs/deepwork_jobs/steps/test.md
@@ -0,0 +1,154 @@
+# Test the New Workflow
+
+## Objective
+
+Run the newly created workflow on a real use case chosen by the user, critique the output, and iterate until the user is satisfied with the results. This step validates that the workflow works as intended before finalizing it.
+
+## Task
+
+Guide the user through testing their new workflow by running it on a real example, then critically evaluating the output and refining it based on user feedback.
+
+### Step 1: Announce Readiness and Gather Test Case
+
+The workflow is now implemented and ready to test. Use the AskUserQuestion tool to:
+
+1. **Inform the user** that the workflow is ready for a test run
+2. **Ask what they'd like to test it on** - Get a specific, real use case
+
+Example question to ask:
+```
+Your new workflow is ready to try out! What would you like to use it on for the first test run?
+
+Please describe a specific case you want to run through the workflow - ideally something you actually need done, so we can validate the workflow produces useful results.
+```
+
+**Important**: Get a concrete, specific test case. Vague responses like "just test it" should be followed up with clarifying questions to understand what inputs/context the workflow needs.
+
+### Step 2: Prepare and Run the Workflow
+
+1. **Prepare clean context** - Before invoking the workflow, consider compacting the conversation history (e.g., using `/compact` in Claude Code) to ensure the workflow starts with clean context focused on the test case.
+
+2. **Start the new workflow** - Use `start_workflow` through the DeepWork MCP server with the job name and workflow name to begin executing the workflow.
+
+3. **Complete the full workflow** - Continue through all steps of the workflow until it produces its final output. Use `finished_step` to progress through each step.
+
+4. **Note any issues during execution** - Pay attention to:
+   - Confusion or ambiguity in instructions
+   - Missing information that had to be asked for
+   - Steps that took longer than expected
+   - Awkward tool usage or process flow
+
+### Step 3: Critique the Output
+
+After the workflow completes, perform a self-critique of the output:
+
+1. **Review the final deliverable** - Read through all outputs produced by the workflow
+
+2. **Identify up to 3 top issues** - Look for problems such as:
+   - Missing information or sections
+   - Formatting issues (layout, structure, readability)
+   - Quality problems (vague content, errors, inconsistencies)
+   - Misalignment with what the user likely wanted
+   - Technical issues (broken links, malformed data, etc.)
+
+3. **Present each issue to the user** with a specific question asking if they want it fixed. For example:
+   ```
+   I noticed a few things we could improve:
+
+   1. **Text overlap**: The PDF has some text overlapping images in a few places - shall I correct that?
+
+   2. **Missing summary**: The report doesn't have an executive summary at the top - would you like me to add one?
+
+   3. **Data formatting**: The numbers aren't consistently formatted (some have commas, some don't) - should I standardize them?
+   ```
+
+4. **Make corrections** for any issues the user confirms they want fixed
+
+### Step 4: Gather Additional Feedback
+
+After addressing the identified issues:
+
+1. **Ask if the user has more comments** on the output:
+   ```
+   I've made those corrections. Is there anything else you'd like me to change about the output?
+   ```
+
+2. **Iterate on feedback** - Keep making corrections based on user feedback until they indicate they're satisfied
+
+3. **Confirm completion** - When the user says the output is good, confirm that testing is complete
+
+### Loop Behavior
+
+The feedback loop should continue until the user explicitly indicates satisfaction. Look for signals like:
+- "Looks good"
+- "That's fine"
+- "I'm happy with it"
+- "No more changes"
+- "Perfect"
+
+If the user provides more feedback, address it and ask again. Don't assume satisfaction without explicit confirmation.
+
+## Example Dialog
+
+```
+Claude: Your new workflow is ready to try out!
+
+The `monthly_report` workflow has been implemented with these steps:
+1. gather_data - Collects metrics from various sources
+2. analyze_trends - Identifies patterns and insights
+3. generate_report - Creates the final PDF report
+
+What would you like to use it on for the first test run? Please describe a specific report you need - for example, which month's data and any particular focus areas.
+
+User: Let's do the January 2026 engineering report, focusing on deployment frequency and incident response times.
+
+Claude: Great, let me run the workflow on your January 2026 engineering report.
+
+[Starting the monthly_report workflow via MCP...]
+
+[... workflow runs through all steps ...]
+
+Claude: The January 2026 Engineering Report has been generated. Let me review it for any issues.
+
+I noticed a few things we could improve:
+
+1. **Chart readability**: The deployment frequency chart uses very similar colors for different teams - shall I change to a more distinct color palette?
+
+2. **Missing context**: The incident response times don't include comparison to the previous month - would you like me to add month-over-month comparison?
+
+3. **Executive summary**: The summary is quite technical - should I add a simpler high-level overview for non-technical stakeholders?
+
+User: Yes to all three, please.
+
+Claude: Done! I've updated the report with:
+- Distinct color palette for the deployment chart
+- Month-over-month comparison for incident response times
+- Added a non-technical executive summary
+
+Is there anything else you'd like me to change about the report?
+
+User: Can you make the tables larger? They're a bit hard to read.
+
+Claude: I've increased the table sizes for better readability. The font is now larger and there's more padding around cells.
+
+Is there anything else you'd like me to adjust?
+
+User: No, that looks great!
+
+Claude: Excellent! The test run is complete. The workflow successfully generated your January 2026 Engineering Report with all the refinements applied.
+
+**Test Summary:**
+- Workflow ran successfully through all 3 steps
+- Output required adjustments to chart colors, comparisons, and table sizing
+- User is satisfied with the final result
+
+Ready to proceed to the iterate step to improve the workflow based on what we learned.
+```
+
+## Important Guidelines
+
+1. **Use real test cases** - Testing with actual data/needs validates the workflow better than hypothetical examples
+2. **Be specific in critiques** - Don't say "formatting issues" - say exactly what's wrong and where
+3. **Limit initial critique to 3 issues** - Too many issues at once is overwhelming; address more in the feedback loop
+4. **Don't assume what user wants fixed** - Always ask before making corrections
+5. **Iterate until satisfied** - Don't rush to completion; the user's satisfaction is the goal
diff --git a/src/deepwork/standard_jobs/deepwork_jobs/templates/doc_spec.md.template b/src/deepwork/standard_jobs/deepwork_jobs/templates/doc_spec.md.template
deleted file mode 100644
index d183344f..00000000
--- a/src/deepwork/standard_jobs/deepwork_jobs/templates/doc_spec.md.template
+++ /dev/null
@@ -1,26 +0,0 @@
----
-name: "[Document Name]"
-description: "[Brief description of the document's purpose]"
-path_patterns:
-  - "[path/to/documents/*.md]"
-target_audience: "[Who reads this document]"
-frequency: "[How often produced, e.g., Monthly, Per sprint, On demand]"
-quality_criteria:
-  - name: "[Criterion Name]"
-    description: "[What this criterion requires - be specific]"
-  - name: "[Criterion Name]"
-    description: "[What this criterion requires - be specific]"
-  - name: "[Criterion Name]"
-    description: "[What this criterion requires - be specific]"
----
-
-# [Document Title]: [Variables like Month, Year, Sprint]
-
-## Section 1
-[Describe what goes in this section]
-
-## Section 2
-[Describe what goes in this section]
-
-## Section 3
-[Describe what goes in this section]
diff --git a/src/deepwork/standard_jobs/deepwork_jobs/templates/job.yml.example b/src/deepwork/standard_jobs/deepwork_jobs/templates/job.yml.example
index 7cc6e3bb..dac1aba8 100644
--- a/src/deepwork/standard_jobs/deepwork_jobs/templates/job.yml.example
+++ b/src/deepwork/standard_jobs/deepwork_jobs/templates/job.yml.example
@@ -14,6 +14,15 @@ changelog:
   - version: "1.0.0"
     changes: "Initial job creation"
 
+workflows:
+  - name: full_analysis
+    summary: "Complete competitive research from identification to positioning"
+    steps:
+      - identify_competitors
+      - research_competitors
+      - comparative_analysis
+      - positioning_recommendations
+
 steps:
   - id: identify_competitors
     name: "Identify Competitors"
@@ -25,8 +34,12 @@ steps:
       - name: product_category
         description: "The product category"
     outputs:
-      - competitors_list.md
+      competitors_list.md:
+        type: file
+        description: "Vetted list of direct and indirect competitors"
+        required: true
     dependencies: []
+    reviews: []
 
   - id: research_competitors
     name: "Research Competitors"
@@ -36,17 +49,18 @@ steps:
       - file: competitors_list.md
         from_step: identify_competitors
     outputs:
-      - research_notes.md
+      research_notes.md:
+        type: file
+        description: "Detailed research notes on each competitor"
+        required: true
     dependencies:
       - identify_competitors
-    hooks:
-      after_agent:
-        - prompt: |
-            Verify the research meets criteria:
-            1. Each competitor has at least 3 data points
-            2. Sources are cited
-            3. Information is current (within last year)
-            If ALL criteria are met, include `<promise>✓ Quality Criteria Met</promise>`.
+    reviews:
+      - run_each: research_notes.md
+        quality_criteria:
+          "Sufficient Data": "Does each competitor have at least 3 data points?"
+          "Sources Cited": "Are sources cited for key claims?"
+          "Current Information": "Is the information current (within last year)?"
 
   - id: comparative_analysis
     name: "Comparative Analysis"
@@ -56,9 +70,13 @@ steps:
       - file: research_notes.md
         from_step: research_competitors
     outputs:
-      - comparison_matrix.md
+      comparison_matrix.md:
+        type: file
+        description: "Side-by-side comparison matrix of all competitors"
+        required: true
     dependencies:
       - research_competitors
+    reviews: []
 
   - id: positioning_recommendations
     name: "Positioning Recommendations"
@@ -68,6 +86,15 @@ steps:
       - file: comparison_matrix.md
         from_step: comparative_analysis
     outputs:
-      - positioning_report.md
+      positioning_report.md:
+        type: file
+        description: "Strategic positioning recommendations"
+        required: true
     dependencies:
       - comparative_analysis
+    reviews:
+      - run_each: positioning_report.md
+        additional_review_guidance: "Read the comparison_matrix.md file to verify that recommendations are grounded in the competitive analysis data."
+        quality_criteria:
+          "Actionable": "Are recommendations specific and actionable?"
+          "Data-Backed": "Are recommendations supported by the competitive analysis data?"
diff --git a/src/deepwork/standard_jobs/deepwork_jobs/templates/job.yml.template b/src/deepwork/standard_jobs/deepwork_jobs/templates/job.yml.template
index 7dcf34e9..f0f87bfb 100644
--- a/src/deepwork/standard_jobs/deepwork_jobs/templates/job.yml.template
+++ b/src/deepwork/standard_jobs/deepwork_jobs/templates/job.yml.template
@@ -20,6 +20,13 @@ changelog:
   - version: "1.0.0"
     changes: "Initial job creation"
 
+workflows:
+  - name: [workflow_name]
+    summary: "[What this workflow accomplishes]"
+    steps:
+      - [step_id]
+      - [another_step]
+
 steps:
   - id: [step_id]
     name: "[Step Name]"
@@ -32,27 +39,33 @@ steps:
       # - file: [filename_or_path]
       #   from_step: [previous_step_id]
     outputs:
-      - [output_filename_or_path]  # e.g., "report.md" or "reports/analysis.md"
+      [output_name]:
+        type: file
+        description: "[What this output contains]"
+        required: true
     dependencies: []  # List of step IDs that must complete first
+    reviews:
+      - run_each: step  # or a specific output name
+        # Optional: tell the reviewer what files to read for context
+        # additional_review_guidance: "Read the [filename] for context on [what]."
+        quality_criteria:
+          "[Criterion Name]": "[Question to evaluate]"
+          "[Another Criterion]": "[Another question]"
     # Optional: Delegate to a specific agent type (uses context: fork)
     # agent: general-purpose  # or other agent type
-    # Optional: Quality validation hooks
-    hooks:
-      after_agent:
-        - prompt: |
-            Verify this step's output meets quality criteria:
-            1. [Criterion 1]
-            2. [Criterion 2]
-            If ALL criteria are met, include `<promise>✓ Quality Criteria Met</promise>`.
 
   - id: [another_step]
     name: "[Another Step]"
     description: "[What this step does]"
     instructions_file: steps/[another_step].md
     inputs:
-      - file: [output_filename_or_path]
+      - file: [output_name]
         from_step: [step_id]
     outputs:
-      - [another_output_path]
+      [another_output]:
+        type: file
+        description: "[What this output contains]"
+        required: true
     dependencies:
       - [step_id]  # This step requires the previous step
+    reviews: []  # Empty if no quality checks needed
diff --git a/src/deepwork/standard_jobs/deepwork_rules/hooks/capture_prompt_work_tree.sh b/src/deepwork/standard_jobs/deepwork_rules/hooks/capture_prompt_work_tree.sh
deleted file mode 100755
index c9cedd82..00000000
--- a/src/deepwork/standard_jobs/deepwork_rules/hooks/capture_prompt_work_tree.sh
+++ /dev/null
@@ -1,38 +0,0 @@
-#!/bin/bash
-# capture_prompt_work_tree.sh - Captures the git work tree state at prompt submission
-#
-# This script creates a snapshot of ALL tracked files at the time the prompt
-# is submitted. This baseline is used for rules with compare_to: prompt and
-# created: mode to detect truly NEW files (not modifications to existing ones).
-#
-# The baseline contains ALL tracked files (not just changed files) so that
-# the rules_check hook can determine which files are genuinely new vs which
-# files existed before and were just modified.
-#
-# It also captures the HEAD commit ref so that committed changes can be detected
-# by comparing HEAD at Stop time to the captured ref.
-
-set -e
-
-# Ensure .deepwork directory exists
-mkdir -p .deepwork
-
-# Save the current HEAD commit ref for detecting committed changes
-# This is used by get_changed_files_prompt() to detect files changed since prompt,
-# even if those changes were committed during the agent response.
-git rev-parse HEAD > .deepwork/.last_head_ref 2>/dev/null || echo "" > .deepwork/.last_head_ref
-
-# Save ALL tracked files (not just changed files)
-# This is critical for created: mode rules to distinguish between:
-# - Newly created files (not in baseline) -> should trigger created: rules
-# - Modified existing files (in baseline) -> should NOT trigger created: rules
-git ls-files > .deepwork/.last_work_tree 2>/dev/null || true
-
-# Also include untracked files that exist at prompt time
-# These are files the user may have created before submitting the prompt
-git ls-files --others --exclude-standard >> .deepwork/.last_work_tree 2>/dev/null || true
-
-# Sort and deduplicate
-if [ -f .deepwork/.last_work_tree ]; then
-    sort -u .deepwork/.last_work_tree -o .deepwork/.last_work_tree
-fi
diff --git a/src/deepwork/standard_jobs/deepwork_rules/hooks/global_hooks.yml b/src/deepwork/standard_jobs/deepwork_rules/hooks/global_hooks.yml
deleted file mode 100644
index a310d31a..00000000
--- a/src/deepwork/standard_jobs/deepwork_rules/hooks/global_hooks.yml
+++ /dev/null
@@ -1,8 +0,0 @@
-# DeepWork Rules Hooks Configuration
-# Maps lifecycle events to hook scripts or Python modules
-
-UserPromptSubmit:
-  - user_prompt_submit.sh
-
-Stop:
-  - module: deepwork.hooks.rules_check
diff --git a/src/deepwork/standard_jobs/deepwork_rules/hooks/user_prompt_submit.sh b/src/deepwork/standard_jobs/deepwork_rules/hooks/user_prompt_submit.sh
deleted file mode 100755
index 486ad836..00000000
--- a/src/deepwork/standard_jobs/deepwork_rules/hooks/user_prompt_submit.sh
+++ /dev/null
@@ -1,16 +0,0 @@
-#!/bin/bash
-# user_prompt_submit.sh - Runs on every user prompt submission
-#
-# This script captures the work tree state at each prompt submission.
-# This baseline is used for policies with compare_to: prompt to detect
-# what changed during an agent response.
-
-set -e
-
-SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
-
-# Capture work tree state at each prompt for compare_to: prompt policies
-"${SCRIPT_DIR}/capture_prompt_work_tree.sh"
-
-# Exit successfully - don't block the prompt
-exit 0
diff --git a/src/deepwork/standard_jobs/deepwork_rules/job.yml b/src/deepwork/standard_jobs/deepwork_rules/job.yml
deleted file mode 100644
index a0032b9e..00000000
--- a/src/deepwork/standard_jobs/deepwork_rules/job.yml
+++ /dev/null
@@ -1,49 +0,0 @@
-name: deepwork_rules
-version: "0.4.0"
-summary: "Creates file-change rules that enforce guidelines during AI sessions. Use when automating documentation sync or code review triggers."
-description: |
-  Manages rules that automatically trigger when certain files change during an AI agent session.
-  Rules help ensure that code changes follow team guidelines, documentation is updated,
-  and architectural decisions are respected.
-
-  IMPORTANT: Rules are evaluated at the "Stop" hook, which fires when an agent finishes its turn.
-  This includes when sub-agents complete their work. Rules are NOT evaluated immediately after
-  each file edit - they batch up and run once at the end of the agent's response cycle.
-  - Command action rules: Execute their command (e.g., `uv sync`) when the agent stops
-  - Prompt action rules: Display instructions to the agent, blocking until addressed
-
-  Rules are stored as individual markdown files with YAML frontmatter in the `.deepwork/rules/`
-  directory. Each rule file specifies:
-  - Detection mode: trigger/safety, set (bidirectional), or pair (directional)
-  - Patterns: Glob patterns for matching files, with optional variable capture
-  - Action type: prompt (default) to show instructions, or command to run a shell command
-  - Instructions: Markdown content describing what the agent should do
-
-  Example use cases:
-  - Update installation docs when configuration files change
-  - Require security review when authentication code is modified
-  - Ensure API documentation stays in sync with API code
-  - Enforce source/test file pairing
-  - Auto-run `uv sync` when pyproject.toml changes (command action)
-
-changelog:
-  - version: "0.1.0"
-    changes: "Initial version"
-  - version: "0.2.0"
-    changes: "Standardized on 'ask structured questions' phrasing for user input"
-  - version: "0.3.0"
-    changes: "Migrated to v2 format - individual markdown files in .deepwork/rules/"
-  - version: "0.4.0"
-    changes: "Improved skill descriptions with third-person voice and 'Use when...' triggers for better discoverability"
-
-steps:
-  - id: define
-    name: "Define Rule"
-    description: "Creates a rule file that triggers when specified files change. Use when setting up documentation sync, code review requirements, or automated commands."
-    instructions_file: steps/define.md
-    inputs:
-      - name: rule_purpose
-        description: "What guideline or constraint should this rule enforce?"
-    outputs:
-      - .deepwork/rules/{rule-name}.md
-    dependencies: []
diff --git a/src/deepwork/standard_jobs/deepwork_rules/rules/.gitkeep b/src/deepwork/standard_jobs/deepwork_rules/rules/.gitkeep
deleted file mode 100644
index 429162b4..00000000
--- a/src/deepwork/standard_jobs/deepwork_rules/rules/.gitkeep
+++ /dev/null
@@ -1,13 +0,0 @@
-# This directory contains example rule templates.
-# Copy and customize these files to create your own rules.
-#
-# Rule files use YAML frontmatter in markdown format:
-#
-# ---
-# name: Rule Name
-# trigger: "pattern/**/*"
-# safety: "optional/pattern"
-# ---
-# Instructions in markdown here.
-#
-# See doc/rules_syntax.md for full documentation.
diff --git a/src/deepwork/standard_jobs/deepwork_rules/rules/api-documentation-sync.md.example b/src/deepwork/standard_jobs/deepwork_rules/rules/api-documentation-sync.md.example
deleted file mode 100644
index 427da7ae..00000000
--- a/src/deepwork/standard_jobs/deepwork_rules/rules/api-documentation-sync.md.example
+++ /dev/null
@@ -1,10 +0,0 @@
----
-name: API Documentation Sync
-trigger: src/api/**/*
-safety: docs/api/**/*.md
----
-API code has changed. Please verify that API documentation is up to date:
-
-- New or changed endpoints
-- Modified request/response schemas
-- Updated authentication requirements
diff --git a/src/deepwork/standard_jobs/deepwork_rules/rules/readme-documentation.md.example b/src/deepwork/standard_jobs/deepwork_rules/rules/readme-documentation.md.example
deleted file mode 100644
index 6be90c83..00000000
--- a/src/deepwork/standard_jobs/deepwork_rules/rules/readme-documentation.md.example
+++ /dev/null
@@ -1,10 +0,0 @@
----
-name: README Documentation
-trigger: src/**/*
-safety: README.md
----
-Source code has been modified. Please review README.md for accuracy:
-
-1. Verify the project overview reflects current functionality
-2. Check that usage examples are still correct
-3. Ensure installation/setup instructions remain valid
diff --git a/src/deepwork/standard_jobs/deepwork_rules/rules/security-review.md.example b/src/deepwork/standard_jobs/deepwork_rules/rules/security-review.md.example
deleted file mode 100644
index abce3194..00000000
--- a/src/deepwork/standard_jobs/deepwork_rules/rules/security-review.md.example
+++ /dev/null
@@ -1,11 +0,0 @@
----
-name: Security Review for Auth Changes
-trigger:
-  - src/auth/**/*
-  - src/security/**/*
----
-Authentication or security code has been changed. Please:
-
-1. Review for hardcoded credentials or secrets
-2. Check input validation on user inputs
-3. Verify access control logic is correct
diff --git a/src/deepwork/standard_jobs/deepwork_rules/rules/skill-md-validation.md b/src/deepwork/standard_jobs/deepwork_rules/rules/skill-md-validation.md
deleted file mode 100644
index 38f90c51..00000000
--- a/src/deepwork/standard_jobs/deepwork_rules/rules/skill-md-validation.md
+++ /dev/null
@@ -1,46 +0,0 @@
----
-name: SKILL.md Validation
-trigger: "**/SKILL.md"
-compare_to: base
----
-A SKILL.md file has been created or modified. Please validate that it follows the required format:
-
-## Required Structure
-
-The file MUST have valid YAML frontmatter at the start, enclosed between `---` markers:
-
-```markdown
----
-name: my-skill-name
-description: A description of what this skill does
----
-
-# Rest of the skill documentation...
-```
-
-## Validation Checklist
-
-1. **YAML Frontmatter**: Verify the file starts with `---` followed by valid YAML and ends with `---`
-
-2. **`name` field** (required):
-   - Must be present in the frontmatter
-   - Must contain only lowercase letters, numbers, and hyphens (`a-z`, `0-9`, `-`)
-   - Must be 64 characters or fewer
-   - Example valid names: `my-skill`, `code-review-2`, `lint`
-   - Example invalid names: `My Skill` (uppercase/spaces), `skill_name` (underscores), `SKILL` (uppercase)
-
-3. **`description` field** (required):
-   - Must be present in the frontmatter
-   - Must be 1024 characters or fewer
-   - Should clearly describe what the skill does
-
-## What to Check
-
-For the modified file: {trigger_files}
-
-1. Parse the YAML frontmatter and verify it is valid YAML
-2. Check that `name` exists and matches the pattern `^[a-z0-9-]+$` with max length 64
-3. Check that `description` exists and is at most 1024 characters
-4. Report any validation errors to the user
-
-If the file does not pass validation, help the user fix the issues.
diff --git a/src/deepwork/standard_jobs/deepwork_rules/rules/source-test-pairing.md.example b/src/deepwork/standard_jobs/deepwork_rules/rules/source-test-pairing.md.example
deleted file mode 100644
index 3ebd6968..00000000
--- a/src/deepwork/standard_jobs/deepwork_rules/rules/source-test-pairing.md.example
+++ /dev/null
@@ -1,13 +0,0 @@
----
-name: Source/Test Pairing
-set:
-  - src/{path}.py
-  - tests/{path}_test.py
----
-Source and test files should change together.
-
-When modifying source code, ensure corresponding tests are updated.
-When adding tests, ensure they test actual source code.
-
-Modified source: {trigger_files}
-Expected tests: {expected_files}
diff --git a/src/deepwork/standard_jobs/deepwork_rules/steps/define.md b/src/deepwork/standard_jobs/deepwork_rules/steps/define.md
deleted file mode 100644
index 1e38a5e6..00000000
--- a/src/deepwork/standard_jobs/deepwork_rules/steps/define.md
+++ /dev/null
@@ -1,249 +0,0 @@
-# Define Rule
-
-## Objective
-
-Create a new rule file in the `.deepwork/rules/` directory to enforce team guidelines, documentation requirements, or other constraints when specific files change.
-
-## Task
-
-Guide the user through defining a new rule by asking structured questions. **Do not create the rule without first understanding what they want to enforce.**
-
-**Important**: Use the AskUserQuestion tool to ask structured questions when gathering information from the user. This provides a better user experience with clear options and guided choices.
-
-### Step 1: Understand the Rule Purpose
-
-Start by asking structured questions to understand what the user wants to enforce:
-
-1. **What guideline or constraint should this rule enforce?**
-   - What situation triggers the need for action?
-   - What files or directories, when changed, should trigger this rule?
-   - Examples: "When config files change", "When API code changes", "When database schema changes"
-
-2. **What action should be taken?**
-   - What should the agent do when the rule triggers?
-   - Update documentation? Perform a security review? Update tests?
-   - Is there a specific file or process that needs attention?
-
-3. **Are there any "safety" conditions?**
-   - Are there files that, if also changed, mean the rule doesn't need to fire?
-   - For example: If config changes AND install_guide.md changes, assume docs are already updated
-   - This prevents redundant prompts when the user has already done the right thing
-
-### Step 2: Choose the Detection Mode
-
-Help the user select the appropriate detection mode:
-
-**Trigger/Safety Mode** (most common):
-- Fires when trigger patterns match AND no safety patterns match
-- Use for: "When X changes, check Y" rules
-- Example: When config changes, verify install docs
-
-**Set Mode** (bidirectional correspondence):
-- Fires when files that should change together don't all change
-- Use for: Source/test pairing, model/migration sync
-- Example: `src/foo.py` and `tests/foo_test.py` should change together
-
-**Pair Mode** (directional correspondence):
-- Fires when a trigger file changes but expected files don't
-- Changes to expected files alone do NOT trigger
-- Use for: API code requires documentation updates (but docs can update independently)
-
-### Step 3: Define the Patterns
-
-Help the user define glob patterns for files.
-
-**Common patterns:**
-- `src/**/*.py` - All Python files in src directory (recursive)
-- `app/config/**/*` - All files in app/config directory
-- `*.md` - All markdown files in root
-- `src/api/**/*` - All files in the API directory
-- `migrations/**/*.sql` - All SQL migrations
-
-**Variable patterns (for set/pair modes):**
-- `src/{path}.py` - Captures path variable (e.g., `foo/bar` from `src/foo/bar.py`)
-- `tests/{path}_test.py` - Uses same path variable in corresponding file
-- `{name}` matches single segment, `{path}` matches multiple segments
-
-**Pattern syntax:**
-- `*` - Matches any characters within a single path segment
-- `**` - Matches any characters across multiple path segments (recursive)
-- `?` - Matches a single character
-
-### Step 4: Choose the Comparison Mode (Optional)
-
-The `compare_to` field controls what baseline is used when detecting "changed files":
-
-**Options:**
-- `base` (default) - Compares to the base of the current branch (merge-base with main/master). Best for feature branches.
-- `default_tip` - Compares to the current tip of the default branch. Useful for seeing difference from production.
-- `prompt` - Compares to the state at the start of each prompt. For rules about very recent changes.
-
-Most rules should use the default (`base`) and don't need to specify `compare_to`.
-
-### Step 5: Write the Instructions
-
-Create clear, actionable instructions for what the agent should do when the rule fires.
-
-**Good instructions include:**
-- What to check or review
-- What files might need updating
-- Specific actions to take
-- Quality criteria for completion
-
-**Template variables available in instructions:**
-- `{trigger_files}` - Files that triggered the rule
-- `{expected_files}` - Expected corresponding files (for set/pair modes)
-
-### Step 6: Create the Rule File
-
-Create a new file in `.deepwork/rules/` with a kebab-case filename:
-
-**File Location**: `.deepwork/rules/{rule-name}.md`
-
-**Format for Trigger/Safety Mode:**
-```markdown
----
-name: Friendly Name for the Rule
-trigger: "glob/pattern/**/*"  # or array: ["pattern1", "pattern2"]
-safety: "optional/pattern"    # optional, or array
-compare_to: base              # optional: "base" (default), "default_tip", or "prompt"
----
-Instructions for the agent when this rule fires.
-
-Multi-line markdown content is supported.
-```
-
-**Format for Set Mode (bidirectional):**
-```markdown
----
-name: Source/Test Pairing
-set:
-  - src/{path}.py
-  - tests/{path}_test.py
----
-Source and test files should change together.
-
-Modified: {trigger_files}
-Expected: {expected_files}
-```
-
-**Format for Pair Mode (directional):**
-```markdown
----
-name: API Documentation
-pair:
-  trigger: api/{path}.py
-  expects: docs/api/{path}.md
----
-API code requires documentation updates.
-
-Changed API: {trigger_files}
-Update docs: {expected_files}
-```
-
-### Step 7: Verify the Rule
-
-After creating the rule:
-
-1. **Check the YAML frontmatter** - Ensure valid YAML formatting
-2. **Test trigger patterns** - Verify patterns match intended files
-3. **Review instructions** - Ensure they're clear and actionable
-4. **Check for conflicts** - Ensure the rule doesn't conflict with existing ones
-
-## Example Rules
-
-### Update Documentation on Config Changes
-`.deepwork/rules/config-docs.md`:
-```markdown
----
-name: Update Install Guide on Config Changes
-trigger: app/config/**/*
-safety: docs/install_guide.md
----
-Configuration files have been modified. Please review docs/install_guide.md
-and update it if any installation instructions need to change based on the
-new configuration.
-```
-
-### Security Review for Auth Code
-`.deepwork/rules/security-review.md`:
-```markdown
----
-name: Security Review for Authentication Changes
-trigger:
-  - src/auth/**/*
-  - src/security/**/*
-safety:
-  - SECURITY.md
-  - docs/security_audit.md
----
-Authentication or security code has been changed. Please:
-
-1. Review for hardcoded credentials or secrets
-2. Check input validation on user inputs
-3. Verify access control logic is correct
-4. Update security documentation if needed
-```
-
-### Source/Test Pairing
-`.deepwork/rules/source-test-pairing.md`:
-```markdown
----
-name: Source/Test Pairing
-set:
-  - src/{path}.py
-  - tests/{path}_test.py
----
-Source and test files should change together.
-
-When modifying source code, ensure corresponding tests are updated.
-When adding tests, ensure they test actual source code.
-
-Modified: {trigger_files}
-Expected: {expected_files}
-```
-
-### API Documentation Sync
-`.deepwork/rules/api-docs.md`:
-```markdown
----
-name: API Documentation Update
-pair:
-  trigger: src/api/{path}.py
-  expects: docs/api/{path}.md
----
-API code has changed. Please verify that API documentation in docs/api/
-is up to date with the code changes. Pay special attention to:
-
-- New or changed endpoints
-- Modified request/response schemas
-- Updated authentication requirements
-
-Changed API: {trigger_files}
-Update: {expected_files}
-```
-
-## Output Format
-
-### .deepwork/rules/{rule-name}.md
-Create a new file with the rule definition using YAML frontmatter and markdown body.
-
-## Quality Criteria
-
-- Asked structured questions to understand user requirements
-- Rule name is clear and descriptive (used in promise tags)
-- Correct detection mode selected for the use case
-- Patterns accurately match the intended files
-- Safety patterns prevent unnecessary triggering (if applicable)
-- Instructions are actionable and specific
-- YAML frontmatter is valid
-
-## Context
-
-Rules are evaluated automatically when the agent finishes a task. The system:
-1. Determines which files have changed based on each rule's `compare_to` setting
-2. Evaluates rules based on their detection mode (trigger/safety, set, or pair)
-3. Skips rules where the correspondence is satisfied (for set/pair) or safety matched
-4. Prompts you with instructions for any triggered rules
-
-You can mark a rule as addressed by including `<promise>Rule Name</promise>` in your response (replace Rule Name with the actual rule name from the `name` field). This tells the system you've already handled that rule's requirements.
diff --git a/src/deepwork/templates/claude/settings.json b/src/deepwork/templates/claude/settings.json
index 97d5d1be..a85202f8 100644
--- a/src/deepwork/templates/claude/settings.json
+++ b/src/deepwork/templates/claude/settings.json
@@ -6,7 +6,11 @@
       "Write(./.deepwork/**)",
       "Bash(deepwork:*)",
       "Bash(./.deepwork/jobs/deepwork_jobs/make_new_job.sh:*)",
-      "WebSearch"
+      "WebSearch",
+      "mcp__deepwork__get_workflows",
+      "mcp__deepwork__start_workflow",
+      "mcp__deepwork__finished_step",
+      "mcp__deepwork__abort_workflow"
     ]
   }
 }
diff --git a/src/deepwork/templates/claude/skill-deepwork.md.jinja b/src/deepwork/templates/claude/skill-deepwork.md.jinja
new file mode 100644
index 00000000..e9922706
--- /dev/null
+++ b/src/deepwork/templates/claude/skill-deepwork.md.jinja
@@ -0,0 +1,37 @@
+{#
+Template: skill-deepwork.md.jinja
+Purpose: Generates the main /deepwork skill that instructs agents to use MCP tools
+
+This template is used to create the entry-point skill for DeepWork.
+Instead of containing step instructions, it directs agents to use the
+DeepWork MCP server tools.
+#}
+---
+name: deepwork
+description: "Start or continue DeepWork workflows using MCP tools"
+---
+
+# DeepWork Workflow Manager
+
+Execute multi-step workflows with quality gate checkpoints.
+
+> **IMPORTANT**: Use the DeepWork MCP server tools. All workflow operations
+> are performed through MCP tool calls and following the instructions they return,
+> not by reading instructions from files.
+
+## How to Use
+
+1. Call `get_workflows` to discover available workflows
+2. Call `start_workflow` with goal, job_name, and workflow_name
+3. Follow the step instructions returned
+4. Call `finished_step` with your outputs when done
+5. Handle the response: `needs_work`, `next_step`, or `workflow_complete`
+
+## Intent Parsing
+
+When the user invokes `/deepwork`, parse their intent:
+1. **ALWAYS**: Call `get_workflows` to discover available workflows
+2. Based on the available flows and what the user said in their request, proceed:
+    - **Explicit workflow**: `/deepwork <a workflow name>` → start the `<a workflow name>` workflow
+    - **General request**: `/deepwork <a request>` → infer best match from available workflows
+    - **No context**: `/deepwork` alone → ask user to choose from available workflows
diff --git a/src/deepwork/templates/claude/skill-job-meta.md.jinja b/src/deepwork/templates/claude/skill-job-meta.md.jinja
deleted file mode 100644
index ea258a87..00000000
--- a/src/deepwork/templates/claude/skill-job-meta.md.jinja
+++ /dev/null
@@ -1,147 +0,0 @@
-{#
-Template: skill-job-meta.md.jinja
-Purpose: Generates the job overview skill file for Claude Code
-
-Template Variables:
-  - job_name: string - Job identifier (e.g., "competitive_research")
-  - job_summary: string - Short one-line summary of the job
-  - job_description: string|null - Full description (optional)
-  - total_steps: int - Number of steps in the job
-  - has_workflows: bool - True if workflows are defined
-  - workflows: list - Array of workflow objects:
-      - name: string - Workflow identifier
-      - summary: string - Short description of workflow
-      - steps: list[string] - Ordered list of step IDs
-      - first_step: string - First step ID to start workflow
-  - standalone_steps: list - Steps not in any workflow (same structure as steps)
-  - steps: list - Array of step objects:
-      - id: string - Step identifier
-      - name: string - Human-readable step name
-      - description: string - What the step does
-      - command_name: string - Slash command (e.g., "job_name.step_id")
-      - dependencies: list[string]|null - Required prior steps
-      - is_standalone: bool - True if not in any workflow
-      - workflow_name: string|null - Name of workflow if in one
-#}
----
-name: {{ job_name }}
-description: "{{ job_summary }}"
----
-
-# {{ job_name }}
-
-{{ job_summary }}
-
-> **CRITICAL**: Always invoke steps using the Skill tool. Never copy/paste step instructions directly.
-
-{% if job_description %}
-{{ job_description }}
-{% endif %}
-
-{% if has_workflows %}
-## Workflows
-
-{% for workflow in workflows %}
-### {{ workflow.name }}
-
-{{ workflow.summary }}
-
-**Steps in order**:
-{% for entry in workflow.step_entries %}
-{% if entry.is_concurrent %}
-{{ loop.index }}. **Concurrent Steps** - Execute the following tasks in parallel:
-{% for task in entry.concurrent_steps %}
-   - **Background Task {{ task.task_number }}**: {{ task.id }} - {{ task.description }}
-{% endfor %}
-{% else %}
-{% set step_id = entry.step_ids[0] %}
-{% set step = steps | selectattr("id", "equalto", step_id) | first %}
-{{ loop.index }}. **{{ step_id }}** - {{ step.description if step else "Unknown step" }}
-{% endif %}
-{% endfor %}
-
-**Start workflow**: `/{{ job_name }}.{{ workflow.first_step }}`
-
-{% endfor %}
-{% endif %}
-{% if standalone_steps %}
-## Standalone Skills
-
-These skills can be run independently at any time:
-
-{% for step in standalone_steps %}
-- **{{ step.id }}** - {{ step.description }}
-  Command: `/{{ step.command_name }}`
-{% endfor %}
-
-{% endif %}
-{% if not has_workflows and not standalone_steps %}
-## Available Steps
-
-{% for step in steps %}
-{{ loop.index }}. **{{ step.id }}** - {{ step.description }}{% if step.dependencies %} (requires: {{ step.dependencies | join(', ') }}){% endif %}
-
-{% endfor %}
-{% endif %}
-
-## Execution Instructions
-
-### Step 1: Analyze Intent
-
-Parse any text following `/{{ job_name }}` to determine user intent:
-{% if has_workflows %}
-{% for workflow in workflows %}
-- "{{ workflow.name }}" or related terms → start {{ workflow.name }} workflow at `{{ job_name }}.{{ workflow.first_step }}`
-{% endfor %}
-{% endif %}
-{% for step in standalone_steps %}
-- "{{ step.id }}" or related terms → run standalone skill `{{ step.command_name }}`
-{% endfor %}
-{% if not has_workflows and not standalone_steps %}
-{% for step in steps %}
-- "{{ step.id }}" or related terms → start at `{{ step.command_name }}`
-{% endfor %}
-{% endif %}
-
-### Step 2: Invoke Starting Step
-
-Use the Skill tool to invoke the identified starting step:
-{% if has_workflows and workflows %}
-```
-Skill tool: {{ job_name }}.{{ workflows[0].first_step }}
-```
-{% else %}
-```
-Skill tool: {{ steps[0].command_name }}
-```
-{% endif %}
-
-### Step 3: Continue Workflow Automatically
-
-After each step completes:
-1. Check if there's a next step in the workflow sequence
-2. Invoke the next step using the Skill tool
-3. Repeat until workflow is complete or user intervenes
-
-**Note**: Standalone skills do not auto-continue to other steps.
-
-### Handling Ambiguous Intent
-
-If user intent is unclear, use AskUserQuestion to clarify:
-{% if has_workflows %}
-- Present available workflows and standalone skills as options
-{% else %}
-- Present available steps as numbered options
-{% endif %}
-- Let user select the starting point
-
-## Guardrails
-
-- Do NOT copy/paste step instructions directly; always use the Skill tool to invoke steps
-- Do NOT skip steps in a workflow unless the user explicitly requests it
-- Do NOT proceed to the next step if the current step's outputs are incomplete
-- Do NOT make assumptions about user intent; ask for clarification when ambiguous
-
-## Context Files
-
-- Job definition: `.deepwork/jobs/{{ job_name }}/job.yml`
diff --git a/src/deepwork/templates/claude/skill-job-step.md.jinja b/src/deepwork/templates/claude/skill-job-step.md.jinja
deleted file mode 100644
index ffb8622c..00000000
--- a/src/deepwork/templates/claude/skill-job-step.md.jinja
+++ /dev/null
@@ -1,263 +0,0 @@
-{#
-Template: skill-job-step.md.jinja
-Purpose: Generates individual step skill files for Claude Code
-
-Template Variables:
-  Job Context:
-    - job_name: string - Job identifier
-    - job_summary: string - Short job summary
-    - job_description: string|null - Full job description
-
-  Step Metadata:
-    - step_id: string - Step identifier
-    - step_description: string - What this step does
-    - step_number: int - Position in steps array (1-indexed, for backward compat)
-    - total_steps: int - Total steps in job
-    - is_standalone: bool - True if step can run independently (not in any workflow)
-    - exposed: bool - True if user can invoke directly (default: true)
-    - dependencies: list[string]|null - Required prior step IDs
-    - next_step: string|null - Next step ID in workflow
-    - instructions_file: string - Path to step instructions file
-
-  Workflow Context (only if step is in a workflow):
-    - workflow_name: string - Name of the workflow this step belongs to
-    - workflow_summary: string - Summary of the workflow
-    - workflow_step_number: int - Position in workflow (1-indexed)
-    - workflow_total_steps: int - Total steps in this workflow
-    - workflow_next_step: string|null - Next step ID in workflow
-    - workflow_prev_step: string|null - Previous step ID in workflow
-
-  Step Content:
-    - instructions_content: string - Full instructions markdown
-    - user_inputs: list|null - User parameters to gather:
-        - name: string - Parameter name
-        - description: string - What to ask for
-    - file_inputs: list|null - Files from previous steps:
-        - file: string - File path
-        - from_step: string - Source step ID
-    - outputs: list[string]|null - Output file paths
-
-  Quality & Hooks:
-    - quality_criteria: list[string]|null - Criteria for completion
-    - stop_hooks: list|null - Stop hook configurations:
-        - type: "script"|"prompt"
-        - path: string (for script)
-        - content: string (for prompt)
-    - hooks: dict|null - All hooks by event name (Stop, PreToolUse, etc.)
-
-  Agent Delegation:
-    - agent: string|null - Agent type (e.g., "general-purpose"). When set, adds context: fork
-#}
----
-name: {{ job_name }}.{{ step_id }}
-description: "{{ step_description }}"
-{% if not exposed %}
-user-invocable: false
-{% endif %}{#- if not exposed #}
-{% if agent %}
-context: fork
-agent: {{ agent }}
-{% endif %}{#- if agent #}
-{#
-  NOTE: Prompt-based stop hooks do not currently work in Claude Code.
-  See: https://github.com/anthropics/claude-code/issues/20221
-  Only command/script hooks are generated here. Prompt hooks are filtered out.
-  Quality validation is handled via sub-agent review in the instructions section.
-#}
-{%- if hooks -%}
-{%- set has_command_hooks = namespace(value=false) -%}
-{%- for event_name, event_hooks in hooks.items() -%}
-{%- for hook in event_hooks -%}
-{%- if hook.type == "script" -%}
-{%- set has_command_hooks.value = true -%}
-{%- endif -%}{#- if hook.type == "script" #}
-{%- endfor -%}{#- for hook in event_hooks #}
-{%- endfor -%}{#- for event_name, event_hooks in hooks.items() #}
-{%- if has_command_hooks.value %}
-hooks:
-{% for event_name, event_hooks in hooks.items() %}
-{%- set script_hooks = event_hooks | selectattr("type", "equalto", "script") | list %}
-{%- if script_hooks -%}
-{#- For Stop events, generate both Stop and SubagentStop blocks #}
-{%- if event_name == "Stop" %}
-{%- for stop_event in ["Stop", "SubagentStop"] %}
-  {{ stop_event }}:
-    - hooks:
-{% for hook in script_hooks %}
-        - type: command
-          command: ".deepwork/jobs/{{ job_name }}/{{ hook.path }}"
-{% endfor %}{#- for hook in script_hooks #}
-{% endfor %}{#- for stop_event in ["Stop", "SubagentStop"] #}
-{%- elif event_name != "SubagentStop" or "Stop" not in hooks %}
-  {{ event_name }}:
-    - hooks:
-{% for hook in script_hooks %}
-        - type: command
-          command: ".deepwork/jobs/{{ job_name }}/{{ hook.path }}"
-{% endfor %}{#- for hook in script_hooks #}
-{% endif %}{#- if event_name == "Stop" #}
-{%- endif %}{#- if script_hooks #}
-{%- endfor %}{#- for event_name, event_hooks in hooks.items() #}
-{%- endif %}{#- if has_command_hooks.value #}
-{%- endif %}{#- if hooks #}
-
----
-
-# {{ job_name }}.{{ step_id }}
-
-{% if is_standalone %}
-**Standalone skill** - can be run anytime
-{% elif workflow_name %}
-**Step {{ workflow_step_number }}/{{ workflow_total_steps }}** in **{{ workflow_name }}** workflow
-
-> {{ workflow_summary }}
-{% else %}
-**Step {{ step_number }}/{{ total_steps }}** in **{{ job_name }}** workflow
-{% endif %}{#- if is_standalone #}
-
-> {{ job_summary }}
-
-{% if dependencies %}
-## Prerequisites (Verify First)
-
-Before proceeding, confirm these steps are complete:
-{% for dep in dependencies %}
-- `/{{ job_name }}.{{ dep }}`
-{% endfor %}{#- for dep in dependencies #}
-{% endif %}{#- if dependencies #}
-
-## Instructions
-
-**Goal**: {{ step_description }}
-
-{{ instructions_content }}
-
-{% if job_description %}
-### Job Context
-
-{{ job_description }}
-{% endif %}{#- if job_description #}
-
-{% if user_inputs or file_inputs %}
-## Required Inputs
-
-{% if user_inputs %}
-**User Parameters** - Gather from user before starting:
-{% for input in user_inputs %}
-- **{{ input.name }}**: {{ input.description }}
-{% endfor %}{#- for input in user_inputs #}
-{% endif %}{#- if user_inputs #}
-
-{% if file_inputs %}
-**Files from Previous Steps** - Read these first:
-{% for input in file_inputs %}
-- `{{ input.file }}` (from `{{ input.from_step }}`)
-{% endfor %}{#- for input in file_inputs #}
-{% endif %}{#- if file_inputs #}
-{% endif %}{#- if user_inputs or file_inputs #}
-
-## Work Branch
-
-Use branch format: `deepwork/{{ job_name }}-[instance]-YYYYMMDD`
-
-- If on a matching work branch: continue using it
-- If on main/master: create new branch with `git checkout -b deepwork/{{ job_name }}-[instance]-$(date +%Y%m%d)`
-
-## Outputs
-
-{% if outputs %}
-**Required outputs**:
-{% for output in outputs %}
-- `{{ output.file }}`{% if output.file.endswith('/') %} (directory){% endif %}
-
-{% if output.has_doc_spec and output.doc_spec %}
-  **Doc Spec**: {{ output.doc_spec.name }}
-  > {{ output.doc_spec.description }}
-  **Definition**: `{{ output.doc_spec.path }}`
-{% if output.doc_spec.target_audience %}
-  **Target Audience**: {{ output.doc_spec.target_audience }}
-{% endif %}{#- if output.doc_spec.target_audience #}
-{% if output.doc_spec.quality_criteria %}
-  **Quality Criteria**:
-{% for criterion in output.doc_spec.quality_criteria %}
-  {{ loop.index }}. **{{ criterion.name }}**: {{ criterion.description }}
-{% endfor %}{#- for criterion in output.doc_spec.quality_criteria #}
-{% endif %}{#- if output.doc_spec.quality_criteria #}
-{% if output.doc_spec.example_document %}
-
-  <details>
-  <summary>Example Document Structure</summary>
-
-  ```markdown
-  {{ output.doc_spec.example_document | indent(2) }}
-  ```
-
-  </details>
-{% endif %}{#- if output.doc_spec.example_document #}
-{% endif %}{#- if output.has_doc_spec and output.doc_spec #}
-{% endfor %}{#- for output in outputs #}
-{% else %}
-No specific file outputs required.
-{% endif %}{#- if outputs #}
-
-## Guardrails
-
-- Do NOT skip prerequisite verification if this step has dependencies
-- Do NOT produce partial outputs; complete all required outputs before finishing
-- Do NOT proceed without required inputs; ask the user if any are missing
-- Do NOT modify files outside the scope of this step's defined outputs
-
-{% if quality_criteria %}
-## Quality Validation
-
-**Before completing this step, you MUST have your work reviewed against the quality criteria below.**
-
-Use a sub-agent (Haiku model) to review your work against these criteria:
-
-**Criteria (all must be satisfied)**:
-{% for criterion in quality_criteria -%}
-{{ loop.index }}. {{ criterion }}
-{% endfor %}{#- for criterion in quality_criteria #}
-**Review Process**:
-1. Once you believe your work is complete, spawn a sub-agent using Haiku to review your work against the quality criteria above
-2. The sub-agent should examine your outputs and verify each criterion is met
-3. If the sub-agent identifies valid issues, fix them
-4. Have the sub-agent review again until all valid feedback has been addressed
-5. Only mark the step complete when the sub-agent confirms all criteria are satisfied
-
-{% endif %}{#- if quality_criteria #}
-{% if stop_hooks -%}
-{% for hook in stop_hooks -%}
-{% if hook.type == "script" -%}
-**Validation script**: `.deepwork/jobs/{{ job_name }}/{{ hook.path }}` (runs automatically)
-{% endif -%}{#- if hook.type == "script" #}
-{% endfor %}{#- for hook in stop_hooks #}
-{% endif %}{#- if stop_hooks #}
-## On Completion
-
-{% if is_standalone %}
-1. Verify outputs are created
-2. Inform user: "{{ step_id }} complete{% if outputs %}, outputs: {{ outputs | map(attribute='file') | join(', ') }}{% endif %}"
-
-This standalone skill can be re-run anytime.
-{% elif workflow_name %}
-1. Verify outputs are created
-2. Inform user: "{{ workflow_name }} step {{ workflow_step_number }}/{{ workflow_total_steps }} complete{% if outputs %}, outputs: {{ outputs | map(attribute='file') | join(', ') }}{% endif %}"
-{% if next_step %}
-3. **Continue workflow**: Use Skill tool to invoke `/{{ job_name }}.{{ next_step }}`
-{% else %}
-3. **{{ workflow_name }} workflow complete**: All steps finished. Consider creating a PR to merge the work branch.
-{% endif %}{#- if next_step #}
-{% else %}
-1. Verify outputs are created
-2. Inform user: "Step {{ step_number }}/{{ total_steps }} complete{% if outputs %}, outputs: {{ outputs | map(attribute='file') | join(', ') }}{% endif %}"
-{% if next_step %}
-3. **Continue workflow**: Use Skill tool to invoke `/{{ job_name }}.{{ next_step }}`
-{% else %}
-3. **Workflow complete**: All steps finished. Consider creating a PR to merge the work branch.
-{% endif %}{#- if next_step #}
-{% endif %}{#- if is_standalone #}
-
----
-
-**Reference files**: `.deepwork/jobs/{{ job_name }}/job.yml`, `.deepwork/jobs/{{ job_name }}/{{ instructions_file }}`
diff --git a/src/deepwork/templates/gemini/skill-deepwork.md.jinja b/src/deepwork/templates/gemini/skill-deepwork.md.jinja
new file mode 100644
index 00000000..0a035892
--- /dev/null
+++ b/src/deepwork/templates/gemini/skill-deepwork.md.jinja
@@ -0,0 +1,37 @@
+{#
+Template: skill-deepwork.md.jinja
+Purpose: Generates the main /deepwork skill that instructs agents to use MCP tools
+
+This template is used to create the entry-point skill for DeepWork.
+Instead of containing step instructions, it directs agents to use the
+DeepWork MCP server tools.
+#}
++++
+name = "deepwork"
+description = "Start or continue DeepWork workflows using MCP tools"
++++
+
+# DeepWork Workflow Manager
+
+Execute multi-step workflows with quality gate checkpoints.
+
+> **IMPORTANT**: Use the DeepWork MCP server tools. All workflow operations
+> are performed through MCP tool calls and following the instructions they return,
+> not by reading instructions from files.
+
+## How to Use
+
+1. Call `get_workflows` to discover available workflows
+2. Call `start_workflow` with goal, job_name, and workflow_name
+3. Follow the step instructions returned
+4. Call `finished_step` with your outputs when done
+5. Handle the response: `needs_work`, `next_step`, or `workflow_complete`
+
+## Intent Parsing
+
+When the user invokes `/deepwork`, parse their intent:
+1. **ALWAYS**: Call `get_workflows` to discover available workflows
+2. Based on the available flows and what the user said in their request, proceed:
+    - **Explicit workflow**: `/deepwork <a workflow name>` → start the `<a workflow name>` workflow
+    - **General request**: `/deepwork <a request>` → infer best match from available workflows
+    - **No context**: `/deepwork` alone → ask user to choose from available workflows
diff --git a/src/deepwork/templates/gemini/skill-job-meta.toml.jinja b/src/deepwork/templates/gemini/skill-job-meta.toml.jinja
deleted file mode 100644
index 158790d7..00000000
--- a/src/deepwork/templates/gemini/skill-job-meta.toml.jinja
+++ /dev/null
@@ -1,76 +0,0 @@
-{#
-Template: skill-job-meta.toml.jinja
-Purpose: Generates the job overview skill file for Gemini CLI
-
-Template Variables:
-  - job_name: string - Job identifier (e.g., "competitive_research")
-  - job_summary: string - Short one-line summary of the job
-  - job_description: string|null - Full description (optional)
-  - total_steps: int - Number of steps in the job
-  - steps: list - Array of step objects:
-      - id: string - Step identifier
-      - name: string - Human-readable step name
-      - description: string - What the step does
-      - command_name: string - Slash command (e.g., "job_name:step_id")
-      - dependencies: list[string]|null - Required prior steps
-
-Note: Gemini uses TOML format with description + prompt fields.
-      Commands use colon separator (/job_name:step_id) not period.
-#}
-# {{ job_name }}
-#
-# {{ job_summary }}
-#
-# Generated by DeepWork - do not edit manually
-
-description = "{{ job_summary | replace('"', '\\"') }}"
-
-prompt = """
-# {{ job_name }}
-
-**Multi-step workflow**: {{ job_summary }}
-
-> **NOTE**: Gemini CLI requires manual command invocation. After each step, tell the user which command to run next.
-
-{% if job_description %}
-{{ job_description }}
-{% endif %}
-
-## Available Steps
-
-{% for step in steps %}
-{{ loop.index }}. **{{ step.id }}** - {{ step.description }}{% if step.dependencies %} (requires: {{ step.dependencies | join(', ') }}){% endif %}
-
-   Command: `/{{ step.command_name }}`
-{% endfor %}
-
-## Execution Instructions
-
-### Step 1: Analyze Intent
-
-Parse any text following `/{{ job_name }}` to determine user intent:
-{% for step in steps %}
-- "{{ step.id }}" or related terms → start at `/{{ step.command_name }}`
-{% endfor %}
-
-### Step 2: Direct User to Starting Step
-
-Tell the user which command to run:
-```
-/{{ steps[0].command_name }}
-```
-
-### Step 3: Guide Through Workflow
-
-After each step completes, tell the user the next command to run until workflow is complete.
-
-### Handling Ambiguous Intent
-
-If user intent is unclear:
-- Present available steps as numbered options
-- Ask user to select the starting point
-
-## Reference
-
-- Job definition: `.deepwork/jobs/{{ job_name }}/job.yml`
-"""
diff --git a/src/deepwork/templates/gemini/skill-job-step.toml.jinja b/src/deepwork/templates/gemini/skill-job-step.toml.jinja
deleted file mode 100644
index 946bec5c..00000000
--- a/src/deepwork/templates/gemini/skill-job-step.toml.jinja
+++ /dev/null
@@ -1,162 +0,0 @@
-{#
-Template: skill-job-step.toml.jinja
-Purpose: Generates individual step skill files for Gemini CLI
-
-Template Variables:
-  Job Context:
-    - job_name: string - Job identifier
-    - job_summary: string - Short job summary
-    - job_description: string|null - Full job description
-
-  Step Metadata:
-    - step_id: string - Step identifier
-    - step_description: string - What this step does
-    - step_number: int - Position in workflow (1-indexed)
-    - total_steps: int - Total steps in job
-    - is_standalone: bool - True if step can run independently
-    - dependencies: list[string]|null - Required prior step IDs
-    - next_step: string|null - Next step ID in workflow
-    - instructions_file: string - Path to step instructions file
-
-  Step Content:
-    - instructions_content: string - Full instructions markdown
-    - user_inputs: list|null - User parameters to gather:
-        - name: string - Parameter name
-        - description: string - What to ask for
-    - file_inputs: list|null - Files from previous steps:
-        - file: string - File path
-        - from_step: string - Source step ID
-    - outputs: list[string]|null - Output file paths
-
-  Quality:
-    - quality_criteria: list[string]|null - Criteria for completion
-    - stop_hooks: list|null - Stop hook configurations (note: Gemini
-      does not support automated hooks, so these are for manual verification)
-
-Note: Gemini uses TOML format with description + prompt fields.
-      Commands use colon separator (/job_name:step_id) not period.
-#}
-# {{ job_name }}:{{ step_id }}
-#
-# {{ step_description }}
-#
-# Generated by DeepWork - do not edit manually
-
-description = "{{ step_description | replace('"', '\\"') }}"
-
-prompt = """
-# {{ job_name }}:{{ step_id }}
-
-{% if is_standalone %}
-**Standalone command** - can be run anytime
-{% else %}
-**Step {{ step_number }}/{{ total_steps }}** in **{{ job_name }}** workflow
-{% endif %}
-
-> {{ job_summary }}
-
-{% if dependencies %}
-## Prerequisites (Verify First)
-
-Before proceeding, confirm these steps are complete:
-{% for dep in dependencies %}
-- `/{{ job_name }}:{{ dep }}`
-{% endfor %}
-{% endif %}
-
-## Instructions
-
-**Goal**: {{ step_description }}
-
-{{ instructions_content }}
-
-{% if job_description %}
-### Job Context
-
-{{ job_description }}
-{% endif %}
-
-{% if user_inputs or file_inputs %}
-## Required Inputs
-
-{% if user_inputs %}
-**User Parameters** - Gather from user before starting:
-{% for input in user_inputs %}
-- **{{ input.name }}**: {{ input.description }}
-{% endfor %}
-{% endif %}
-
-{% if file_inputs %}
-**Files from Previous Steps** - Read these first:
-{% for input in file_inputs %}
-- `{{ input.file }}` (from `{{ input.from_step }}`)
-{% endfor %}
-{% endif %}
-{% endif %}
-
-## Work Branch
-
-Use branch format: `deepwork/{{ job_name }}-[instance]-YYYYMMDD`
-
-- If on a matching work branch: continue using it
-- If on main/master: create new branch with `git checkout -b deepwork/{{ job_name }}-[instance]-$(date +%Y%m%d)`
-
-## Outputs
-
-{% if outputs %}
-**Required outputs**:
-{% for output in outputs %}
-- `{{ output.file }}`{% if output.file.endswith('/') %} (directory){% endif %}
-
-{% if output.has_doc_spec and output.doc_spec %}
-  **Doc Spec**: {{ output.doc_spec.name }}
-  > {{ output.doc_spec.description }}
-  **Definition**: `{{ output.doc_spec.path }}`
-{% if output.doc_spec.target_audience %}
-  **Target Audience**: {{ output.doc_spec.target_audience }}
-{% endif %}
-{% if output.doc_spec.quality_criteria %}
-  **Quality Criteria**:
-{% for criterion in output.doc_spec.quality_criteria %}
-  {{ loop.index }}. **{{ criterion.name }}**: {{ criterion.description }}
-{% endfor %}
-{% endif %}
-{% endif %}
-{% endfor %}
-{% else %}
-No specific file outputs required.
-{% endif %}
-
-{% if quality_criteria or stop_hooks %}
-## Quality Validation (Manual)
-
-**NOTE**: Gemini CLI does not support automated validation. Manually verify criteria before completing.
-
-{% if quality_criteria %}
-**Criteria (all must be satisfied)**:
-{% for criterion in quality_criteria %}
-{{ loop.index }}. {{ criterion }}
-{% endfor %}
-{% endif %}
-{% endif %}
-## On Completion
-
-{% if is_standalone %}
-1. Verify outputs are created
-2. Inform user: "{{ step_id }} complete{% if outputs %}, outputs: {{ outputs | map(attribute='file') | join(', ') }}{% endif %}"
-
-This standalone command can be re-run anytime.
-{% else %}
-1. Verify outputs are created
-2. Inform user: "Step {{ step_number }}/{{ total_steps }} complete{% if outputs %}, outputs: {{ outputs | map(attribute='file') | join(', ') }}{% endif %}"
-{% if next_step %}
-3. **Tell user next command**: `/{{ job_name }}:{{ next_step }}`
-{% else %}
-3. **Workflow complete**: All steps finished. Consider creating a PR to merge the work branch.
-{% endif %}
-{% endif %}
-
----
-
-**Reference files**: `.deepwork/jobs/{{ job_name }}/job.yml`, `.deepwork/jobs/{{ job_name }}/{{ instructions_file }}`
-"""
diff --git a/tests/conftest.py b/tests/conftest.py
index 0f540293..d7a81ed8 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -35,14 +35,6 @@ def mock_claude_project(mock_git_repo: Path) -> Path:
     return mock_git_repo
 
 
-@pytest.fixture
-def mock_gemini_project(mock_git_repo: Path) -> Path:
-    """Create a mock project with Gemini CLI setup."""
-    gemini_dir = mock_git_repo / ".gemini"
-    gemini_dir.mkdir(exist_ok=True)
-    return mock_git_repo
-
-
 @pytest.fixture
 def mock_multi_platform_project(mock_git_repo: Path) -> Path:
     """Create a mock project with multiple AI platforms setup."""
diff --git a/tests/e2e/test_claude_code_integration.py b/tests/e2e/test_claude_code_integration.py
index b98fbc28..a11d6659 100644
--- a/tests/e2e/test_claude_code_integration.py
+++ b/tests/e2e/test_claude_code_integration.py
@@ -1,10 +1,10 @@
 """End-to-end tests for DeepWork with Claude Code integration.
 
-These tests validate that DeepWork-generated skills work correctly
-with Claude Code. The tests can run in two modes:
+These tests validate that DeepWork MCP-based workflows work correctly.
+The tests can run in two modes:
 
-1. **Generation-only mode** (default): Tests skill generation and structure
-2. **Full e2e mode**: Actually executes skills with Claude Code
+1. **MCP tools mode** (default): Tests MCP skill generation and workflow tools
+2. **Full e2e mode**: Actually executes workflows with Claude Code via MCP
 
 Set ANTHROPIC_API_KEY and DEEPWORK_E2E_FULL=true to run full e2e tests.
 """
@@ -19,7 +19,8 @@
 
 from deepwork.core.adapters import ClaudeAdapter
 from deepwork.core.generator import SkillGenerator
-from deepwork.core.parser import parse_job_definition
+from deepwork.mcp.state import StateManager
+from deepwork.mcp.tools import WorkflowTools
 
 # Test input for deterministic validation
 TEST_INPUT = "apple, car, banana, chair, orange, table, mango, laptop, grape, bicycle"
@@ -55,11 +56,11 @@ def run_full_e2e() -> bool:
     )
 
 
-class TestSkillGenerationE2E:
-    """End-to-end tests for skill generation."""
+class TestMCPSkillGeneration:
+    """Tests for MCP entry point skill generation."""
 
-    def test_generate_fruits_skills_in_temp_project(self) -> None:
-        """Test generating fruits skills in a realistic project structure."""
+    def test_generate_deepwork_skill_in_temp_project(self) -> None:
+        """Test generating the /deepwork MCP skill in a realistic project structure."""
         with tempfile.TemporaryDirectory() as tmpdir:
             project_dir = Path(tmpdir)
 
@@ -67,7 +68,7 @@ def test_generate_fruits_skills_in_temp_project(self) -> None:
             deepwork_dir = project_dir / ".deepwork" / "jobs"
             deepwork_dir.mkdir(parents=True)
 
-            # Copy fruits job fixture
+            # Copy fruits job fixture (for job discovery testing)
             fixtures_dir = Path(__file__).parent.parent / "fixtures" / "jobs" / "fruits"
             shutil.copytree(fixtures_dir, deepwork_dir / "fruits")
 
@@ -84,96 +85,218 @@ def test_generate_fruits_skills_in_temp_project(self) -> None:
                 capture_output=True,
             )
 
-            # Parse job and generate skills
-            job = parse_job_definition(deepwork_dir / "fruits")
+            # Generate MCP entry point skill
             generator = SkillGenerator()
-            adapter = ClaudeAdapter()
+            adapter = ClaudeAdapter(project_root=project_dir)
 
-            skills_dir = project_dir / ".claude"
-            skills_dir.mkdir()
+            claude_dir = project_dir / ".claude"
+            claude_dir.mkdir()
 
-            skill_paths = generator.generate_all_skills(job, adapter, skills_dir)
+            skill_path = generator.generate_deepwork_skill(adapter, claude_dir)
 
-            # Validate skills were generated (meta + steps)
-            assert len(skill_paths) == 3  # 1 meta + 2 steps
+            # Validate skill was generated
+            assert skill_path.exists()
+            expected_path = claude_dir / "skills" / "deepwork" / "SKILL.md"
+            assert skill_path == expected_path
 
-            meta_skill = skills_dir / "skills" / "fruits" / "SKILL.md"
-            identify_skill = skills_dir / "skills" / "fruits.identify" / "SKILL.md"
-            classify_skill = skills_dir / "skills" / "fruits.classify" / "SKILL.md"
+    def test_deepwork_skill_structure(self) -> None:
+        """Test that the generated /deepwork skill has the expected structure."""
+        with tempfile.TemporaryDirectory() as tmpdir:
+            project_dir = Path(tmpdir)
+            claude_dir = project_dir / ".claude"
+            claude_dir.mkdir(parents=True)
 
-            assert meta_skill.exists()
-            assert identify_skill.exists()
-            assert classify_skill.exists()
+            generator = SkillGenerator()
+            adapter = ClaudeAdapter(project_root=project_dir)
+            skill_path = generator.generate_deepwork_skill(adapter, claude_dir)
 
-            # Validate skill content
-            identify_content = identify_skill.read_text()
-            assert "# fruits.identify" in identify_content
-            assert "raw_items" in identify_content
-            assert "identified_fruits.md" in identify_content
+            content = skill_path.read_text()
 
-            classify_content = classify_skill.read_text()
-            assert "# fruits.classify" in classify_content
-            assert "identified_fruits.md" in classify_content
-            assert "classified_fruits.md" in classify_content
+            # Check frontmatter
+            assert "---" in content
+            assert "name: deepwork" in content
 
-    def test_skill_structure_matches_claude_code_expectations(self) -> None:
-        """Test that generated skills have the structure Claude Code expects."""
-        fixtures_dir = Path(__file__).parent.parent / "fixtures" / "jobs" / "fruits"
-        job = parse_job_definition(fixtures_dir)
+            # Check MCP tool references
+            assert "get_workflows" in content
+            assert "start_workflow" in content
+            assert "finished_step" in content
+
+            # Check structure sections
+            assert "# DeepWork" in content
+            assert "MCP" in content
 
+    def test_deepwork_skill_mcp_instructions(self) -> None:
+        """Test that the /deepwork skill properly instructs use of MCP tools."""
         with tempfile.TemporaryDirectory() as tmpdir:
-            skills_dir = Path(tmpdir) / ".claude"
-            skills_dir.mkdir()
+            project_dir = Path(tmpdir)
+            claude_dir = project_dir / ".claude"
+            claude_dir.mkdir(parents=True)
 
             generator = SkillGenerator()
-            adapter = ClaudeAdapter()
-            generator.generate_all_skills(job, adapter, skills_dir)
+            adapter = ClaudeAdapter(project_root=project_dir)
+            skill_path = generator.generate_deepwork_skill(adapter, claude_dir)
+
+            content = skill_path.read_text()
 
-            # Step skills use directory/SKILL.md format
-            identify_skill = skills_dir / "skills" / "fruits.identify" / "SKILL.md"
-            content = identify_skill.read_text()
+            # Should instruct to use MCP tools, not read files
+            assert "MCP" in content
+            assert "tool" in content.lower()
 
-            # Claude Code expects specific sections
-            assert "# fruits.identify" in content  # Skill name header
-            assert "## Instructions" in content  # Instructions section
-            assert "## Required Inputs" in content  # Inputs section
-            assert "## Outputs" in content  # Outputs section
+            # Should describe the workflow execution flow
+            assert "start_workflow" in content
+            assert "finished_step" in content
 
-            # Check for user input prompt
-            assert "raw_items" in content
 
-    def test_dependency_chain_in_skills(self) -> None:
-        """Test that dependency chain is correctly represented in skills."""
+class TestMCPWorkflowTools:
+    """Tests for MCP workflow tools functionality."""
+
+    @pytest.fixture
+    def project_with_job(self) -> Path:
+        """Create a test project with a job definition."""
+        tmpdir = tempfile.mkdtemp()
+        project_dir = Path(tmpdir)
+
+        # Set up project structure
+        deepwork_dir = project_dir / ".deepwork" / "jobs"
+        deepwork_dir.mkdir(parents=True)
+
+        # Copy fruits job fixture
         fixtures_dir = Path(__file__).parent.parent / "fixtures" / "jobs" / "fruits"
-        job = parse_job_definition(fixtures_dir)
+        shutil.copytree(fixtures_dir, deepwork_dir / "fruits")
 
-        with tempfile.TemporaryDirectory() as tmpdir:
-            skills_dir = Path(tmpdir) / ".claude"
-            skills_dir.mkdir()
+        # Initialize git repo
+        subprocess.run(["git", "init"], cwd=project_dir, capture_output=True)
+        subprocess.run(
+            ["git", "config", "user.email", "test@test.com"],
+            cwd=project_dir,
+            capture_output=True,
+        )
+        subprocess.run(
+            ["git", "config", "user.name", "Test"],
+            cwd=project_dir,
+            capture_output=True,
+        )
 
-            generator = SkillGenerator()
-            adapter = ClaudeAdapter()
-            generator.generate_all_skills(job, adapter, skills_dir)
+        # Create README and initial commit
+        (project_dir / "README.md").write_text("# Test Project\n")
+        subprocess.run(["git", "add", "."], cwd=project_dir, capture_output=True)
+        subprocess.run(
+            ["git", "commit", "-m", "init"],
+            cwd=project_dir,
+            capture_output=True,
+        )
+
+        yield project_dir
+
+        # Cleanup
+        shutil.rmtree(tmpdir, ignore_errors=True)
+
+    def test_get_workflows_returns_jobs(self, project_with_job: Path) -> None:
+        """Test that get_workflows returns available jobs and workflows."""
+        state_manager = StateManager(project_with_job)
+        tools = WorkflowTools(project_with_job, state_manager)
+
+        response = tools.get_workflows()
+
+        # Should find the fruits job
+        assert len(response.jobs) >= 1
+        job_names = [job.name for job in response.jobs]
+        assert "fruits" in job_names
+
+        # Find fruits job and check structure
+        fruits_job = next(j for j in response.jobs if j.name == "fruits")
+        assert fruits_job.description is not None
+
+        # The fruits fixture has a "full" workflow
+        assert len(fruits_job.workflows) >= 1
+        full_workflow = fruits_job.workflows[0]
+        assert full_workflow.name == "full"
+        assert full_workflow.summary is not None
+
+    async def test_start_workflow_creates_session(self, project_with_job: Path) -> None:
+        """Test that start_workflow creates a new workflow session."""
+        state_manager = StateManager(project_with_job)
+        tools = WorkflowTools(project_with_job, state_manager)
+
+        # Get available workflows first
+        workflows_response = tools.get_workflows()
+        fruits_job = next(j for j in workflows_response.jobs if j.name == "fruits")
 
-            # Step skills use directory/SKILL.md format
-            # First step should have no prerequisites
-            identify_skill = skills_dir / "skills" / "fruits.identify" / "SKILL.md"
-            identify_content = identify_skill.read_text()
-            assert "## Prerequisites" not in identify_content
+        # Should have the "full" workflow
+        assert len(fruits_job.workflows) >= 1
+        workflow_name = fruits_job.workflows[0].name
 
-            # Second step should reference first step
-            classify_skill = skills_dir / "skills" / "fruits.classify" / "SKILL.md"
-            classify_content = classify_skill.read_text()
-            assert "## Prerequisites" in classify_content
-            assert "identify" in classify_content.lower()
+        from deepwork.mcp.schemas import StartWorkflowInput
+
+        input_data = StartWorkflowInput(
+            goal="Test identifying and classifying fruits",
+            job_name="fruits",
+            workflow_name=workflow_name,
+            instance_id="test-instance",
+        )
+
+        response = await tools.start_workflow(input_data)
+
+        # Should return session info
+        assert response.begin_step.session_id is not None
+        assert response.begin_step.branch_name is not None
+        assert "deepwork" in response.begin_step.branch_name.lower()
+        assert "fruits" in response.begin_step.branch_name.lower()
+
+        # Should return first step instructions
+        assert response.begin_step.step_id is not None
+        assert response.begin_step.step_instructions is not None
+        assert len(response.begin_step.step_instructions) > 0
+
+    async def test_workflow_step_progression(self, project_with_job: Path) -> None:
+        """Test that finished_step progresses through workflow steps."""
+        state_manager = StateManager(project_with_job)
+        tools = WorkflowTools(project_with_job, state_manager)
+
+        # Get workflows and start
+        workflows_response = tools.get_workflows()
+        fruits_job = next(j for j in workflows_response.jobs if j.name == "fruits")
+
+        # Should have the "full" workflow
+        assert len(fruits_job.workflows) >= 1
+        workflow_name = fruits_job.workflows[0].name
+
+        from deepwork.mcp.schemas import FinishedStepInput, StartWorkflowInput
+
+        start_input = StartWorkflowInput(
+            goal="Test workflow progression",
+            job_name="fruits",
+            workflow_name=workflow_name,
+        )
+        await tools.start_workflow(start_input)
+
+        # Create mock output file for first step
+        output_file = project_with_job / "identified_fruits.md"
+        output_file.write_text("# Identified Fruits\n\n- apple\n- banana\n- orange\n")
+
+        # Report first step completion
+        finish_input = FinishedStepInput(
+            outputs={"identified_fruits.md": str(output_file)},
+            notes="Identified fruits from test input",
+        )
+        finish_response = await tools.finished_step(finish_input)
+
+        # Should either advance to next step or complete
+        assert finish_response.status in ["next_step", "workflow_complete", "needs_work"]
+
+        if finish_response.status == "next_step":
+            # Should have instructions for next step
+            assert finish_response.begin_step is not None
+            assert finish_response.begin_step.step_instructions is not None
+            assert finish_response.begin_step.step_id is not None
 
 
 @pytest.mark.skipif(
     not run_full_e2e(),
     reason="Full e2e requires ANTHROPIC_API_KEY, DEEPWORK_E2E_FULL=true, and claude CLI",
 )
-class TestClaudeCodeExecution:
-    """End-to-end tests that actually execute with Claude Code.
+class TestClaudeCodeMCPExecution:
+    """End-to-end tests that actually execute with Claude Code via MCP.
 
     These tests only run when:
     - ANTHROPIC_API_KEY is set
@@ -182,8 +305,8 @@ class TestClaudeCodeExecution:
     """
 
     @pytest.fixture
-    def project_with_skills(self) -> Path:
-        """Create a test project with generated skills."""
+    def project_with_mcp(self) -> Path:
+        """Create a test project with MCP server configured."""
         tmpdir = tempfile.mkdtemp()
         project_dir = Path(tmpdir)
 
@@ -217,30 +340,38 @@ def project_with_skills(self) -> Path:
             capture_output=True,
         )
 
-        # Generate skills
-        job = parse_job_definition(deepwork_dir / "fruits")
+        # Generate /deepwork skill
         generator = SkillGenerator()
-        adapter = ClaudeAdapter()
+        adapter = ClaudeAdapter(project_root=project_dir)
+
+        claude_dir = project_dir / ".claude"
+        claude_dir.mkdir()
+        generator.generate_deepwork_skill(adapter, claude_dir)
 
-        skills_dir = project_dir / ".claude"
-        skills_dir.mkdir()
-        generator.generate_all_skills(job, adapter, skills_dir)
+        # Register MCP server
+        adapter.register_mcp_server(project_dir)
 
         yield project_dir
 
         # Cleanup
         shutil.rmtree(tmpdir, ignore_errors=True)
 
-    def test_fruits_workflow_execution(self, project_with_skills: Path) -> None:
-        """Test executing the complete fruits workflow with Claude Code.
+    def test_fruits_workflow_via_mcp(self, project_with_mcp: Path) -> None:
+        """Test executing the fruits workflow via MCP tools.
 
-        Invokes /fruits once, which automatically runs all steps (identify + classify).
+        Uses /deepwork skill which instructs Claude to use MCP tools
+        for workflow orchestration.
         """
-        # Run Claude Code with the fruits skill - this executes the full workflow
+        # Run Claude Code with the /deepwork skill
+        # The skill instructs Claude to use MCP tools
         result = subprocess.run(
-            ["claude", "--print", "/fruits"],
-            input=f"raw_items: {TEST_INPUT}",
-            cwd=project_with_skills,
+            [
+                "claude",
+                "--print",
+                f"Use /deepwork to start a fruits workflow. "
+                f"For the identify step, use these items: {TEST_INPUT}",
+            ],
+            cwd=project_with_mcp,
             capture_output=True,
             text=True,
             timeout=300,  # 5 minutes for full workflow
@@ -249,7 +380,7 @@ def test_fruits_workflow_execution(self, project_with_skills: Path) -> None:
         assert result.returncode == 0, f"Claude Code failed: {result.stderr}"
 
         # Verify identify step output was created
-        identify_output = project_with_skills / "identified_fruits.md"
+        identify_output = project_with_mcp / "identified_fruits.md"
         assert identify_output.exists(), "identified_fruits.md was not created"
 
         # Validate identify output content
@@ -260,7 +391,7 @@ def test_fruits_workflow_execution(self, project_with_skills: Path) -> None:
             )
 
         # Verify classify step output was created
-        classify_output = project_with_skills / "classified_fruits.md"
+        classify_output = project_with_mcp / "classified_fruits.md"
         assert classify_output.exists(), "classified_fruits.md was not created"
 
         # Validate classify output has category structure
diff --git a/tests/fixtures/jobs/complex_job/job.yml b/tests/fixtures/jobs/complex_job/job.yml
index 7c1343d6..9fbc86c9 100644
--- a/tests/fixtures/jobs/complex_job/job.yml
+++ b/tests/fixtures/jobs/complex_job/job.yml
@@ -1,3 +1,4 @@
+# yaml-language-server: $schema=.deepwork/schemas/job.schema.json
 name: competitive_research
 version: "0.1.0"
 summary: "Systematic competitive analysis workflow"
@@ -30,8 +31,12 @@ steps:
       - name: product_category
         description: "Product category"
     outputs:
-      - competitors.md
+      competitors.md:
+        type: file
+        description: "Vetted list of direct and indirect competitors"
+        required: true
     dependencies: []
+    reviews: []
 
   - id: primary_research
     name: "Primary Research"
@@ -41,10 +46,17 @@ steps:
       - file: competitors.md
         from_step: identify_competitors
     outputs:
-      - primary_research.md
-      - competitor_profiles/
+      primary_research.md:
+        type: file
+        description: "Analysis of competitors' self-presentation"
+        required: true
+      competitor_profiles:
+        type: files
+        description: "Individual competitor profile documents"
+        required: true
     dependencies:
       - identify_competitors
+    reviews: []
 
   - id: secondary_research
     name: "Secondary Research"
@@ -56,10 +68,14 @@ steps:
       - file: primary_research.md
         from_step: primary_research
     outputs:
-      - secondary_research.md
+      secondary_research.md:
+        type: file
+        description: "Third-party perspectives on competitors"
+        required: true
     dependencies:
       - identify_competitors
       - primary_research
+    reviews: []
 
   - id: comparative_report
     name: "Comparative Report"
@@ -71,8 +87,15 @@ steps:
       - file: secondary_research.md
         from_step: secondary_research
     outputs:
-      - comparison_matrix.md
-      - strengths_weaknesses.md
+      comparison_matrix.md:
+        type: file
+        description: "Detailed comparison matrix across competitors"
+        required: true
+      strengths_weaknesses.md:
+        type: file
+        description: "Strengths and weaknesses analysis"
+        required: true
     dependencies:
       - primary_research
       - secondary_research
+    reviews: []
diff --git a/tests/fixtures/jobs/concurrent_steps_job/job.yml b/tests/fixtures/jobs/concurrent_steps_job/job.yml
index 8609c512..21707dec 100644
--- a/tests/fixtures/jobs/concurrent_steps_job/job.yml
+++ b/tests/fixtures/jobs/concurrent_steps_job/job.yml
@@ -1,3 +1,4 @@
+# yaml-language-server: $schema=.deepwork/schemas/job.schema.json
 name: concurrent_workflow
 version: "1.0.0"
 summary: "Workflow with concurrent steps for testing"
@@ -20,7 +21,11 @@ steps:
     description: "Initialize the analysis environment"
     instructions_file: steps/setup.md
     outputs:
-      - setup_complete.md
+      setup_complete.md:
+        type: file
+        description: "Setup confirmation and configuration"
+        required: true
+    reviews: []
 
   - id: research_web
     name: "Web Research"
@@ -30,9 +35,13 @@ steps:
       - file: setup_complete.md
         from_step: setup
     outputs:
-      - web_research.md
+      web_research.md:
+        type: file
+        description: "Research findings from web sources"
+        required: true
     dependencies:
       - setup
+    reviews: []
 
   - id: research_docs
     name: "Document Research"
@@ -42,9 +51,13 @@ steps:
       - file: setup_complete.md
         from_step: setup
     outputs:
-      - docs_research.md
+      docs_research.md:
+        type: file
+        description: "Research findings from internal documents"
+        required: true
     dependencies:
       - setup
+    reviews: []
 
   - id: research_interviews
     name: "Interview Research"
@@ -54,9 +67,13 @@ steps:
       - file: setup_complete.md
         from_step: setup
     outputs:
-      - interviews_research.md
+      interviews_research.md:
+        type: file
+        description: "Research findings from stakeholder interviews"
+        required: true
     dependencies:
       - setup
+    reviews: []
 
   - id: compile_results
     name: "Compile Results"
@@ -70,11 +87,15 @@ steps:
       - file: interviews_research.md
         from_step: research_interviews
     outputs:
-      - compiled_results.md
+      compiled_results.md:
+        type: file
+        description: "Unified report from all research sources"
+        required: true
     dependencies:
       - research_web
       - research_docs
       - research_interviews
+    reviews: []
 
   - id: final_review
     name: "Final Review"
@@ -84,6 +105,10 @@ steps:
       - file: compiled_results.md
         from_step: compile_results
     outputs:
-      - final_report.md
+      final_report.md:
+        type: file
+        description: "Final reviewed and approved analysis report"
+        required: true
     dependencies:
       - compile_results
+    reviews: []
diff --git a/tests/fixtures/jobs/exposed_step_job/job.yml b/tests/fixtures/jobs/exposed_step_job/job.yml
index d480daeb..3e59a980 100644
--- a/tests/fixtures/jobs/exposed_step_job/job.yml
+++ b/tests/fixtures/jobs/exposed_step_job/job.yml
@@ -1,3 +1,4 @@
+# yaml-language-server: $schema=.deepwork/schemas/job.schema.json
 name: exposed_job
 version: "0.1.0"
 summary: "A job with exposed and hidden steps for testing"
@@ -16,8 +17,12 @@ steps:
     description: "A step that is hidden by default"
     instructions_file: steps/hidden_step.md
     outputs:
-      - hidden_output.md
+      hidden_output.md:
+        type: file
+        description: "Output from the hidden step"
+        required: true
     dependencies: []
+    reviews: []
 
   - id: exposed_step
     name: "Exposed Step"
@@ -25,5 +30,9 @@ steps:
     instructions_file: steps/exposed_step.md
     exposed: true
     outputs:
-      - exposed_output.md
+      exposed_output.md:
+        type: file
+        description: "Output from the exposed step"
+        required: true
     dependencies: []
+    reviews: []
diff --git a/tests/fixtures/jobs/fruits/job.yml b/tests/fixtures/jobs/fruits/job.yml
index e1ce79a6..012fd9df 100644
--- a/tests/fixtures/jobs/fruits/job.yml
+++ b/tests/fixtures/jobs/fruits/job.yml
@@ -1,3 +1,4 @@
+# yaml-language-server: $schema=.deepwork/schemas/job.schema.json
 name: fruits
 version: "1.0.0"
 summary: "Identify and classify fruits from a mixed list of items"
@@ -15,6 +16,13 @@ changelog:
   - version: "1.0.0"
     changes: "Initial version for CI testing"
 
+workflows:
+  - name: full
+    summary: "Run the complete fruits identification and classification"
+    steps:
+      - identify
+      - classify
+
 steps:
   - id: identify
     name: "Identify Fruits"
@@ -24,8 +32,12 @@ steps:
       - name: raw_items
         description: "Comma-separated list of items to filter (e.g., 'apple, car, banana, chair')"
     outputs:
-      - identified_fruits.md
+      identified_fruits.md:
+        type: file
+        description: "List of identified fruits from the input items"
+        required: true
     dependencies: []
+    reviews: []
 
   - id: classify
     name: "Classify Fruits"
@@ -35,6 +47,10 @@ steps:
       - file: identified_fruits.md
         from_step: identify
     outputs:
-      - classified_fruits.md
+      classified_fruits.md:
+        type: file
+        description: "Fruits organized into categories"
+        required: true
     dependencies:
       - identify
+    reviews: []
diff --git a/tests/fixtures/jobs/job_with_doc_spec/job.yml b/tests/fixtures/jobs/job_with_doc_spec/job.yml
index b7a6b3ff..5eb56f1e 100644
--- a/tests/fixtures/jobs/job_with_doc_spec/job.yml
+++ b/tests/fixtures/jobs/job_with_doc_spec/job.yml
@@ -1,18 +1,22 @@
+# yaml-language-server: $schema=.deepwork/schemas/job.schema.json
 name: job_with_doc_spec
 version: "1.0.0"
-summary: "Job with doc spec output for testing"
+summary: "Job with typed output for testing"
 description: |
-  A test job that produces a document with a doc spec reference.
+  A test job that produces a report document.
 
 steps:
   - id: generate_report
     name: "Generate Report"
-    description: "Generate a report following the doc spec"
+    description: "Generate a report"
     instructions_file: steps/generate_report.md
     inputs:
       - name: report_title
         description: "Title for the report"
     outputs:
-      - file: report.md
-        doc_spec: .deepwork/doc_specs/valid_report.md
+      report.md:
+        type: file
+        description: "Generated report document"
+        required: true
     dependencies: []
+    reviews: []
diff --git a/tests/fixtures/jobs/simple_job/job.yml b/tests/fixtures/jobs/simple_job/job.yml
index 14642290..ca5a9c27 100644
--- a/tests/fixtures/jobs/simple_job/job.yml
+++ b/tests/fixtures/jobs/simple_job/job.yml
@@ -1,3 +1,4 @@
+# yaml-language-server: $schema=.deepwork/schemas/job.schema.json
 name: simple_job
 version: "0.1.0"
 summary: "A simple single-step job for testing"
@@ -20,5 +21,9 @@ steps:
       - name: input_param
         description: "An input parameter"
     outputs:
-      - output.md
+      output.md:
+        type: file
+        description: "The output file produced by this step"
+        required: true
     dependencies: []
+    reviews: []
diff --git a/tests/fixtures/mock_review_agent.py b/tests/fixtures/mock_review_agent.py
new file mode 100755
index 00000000..8ccbb4d8
--- /dev/null
+++ b/tests/fixtures/mock_review_agent.py
@@ -0,0 +1,209 @@
+#!/usr/bin/env python3
+"""Mock review agent for integration testing.
+
+This script simulates a review agent that reads a prompt from stdin
+and returns a JSON response in Claude CLI wrapper format. The behavior
+is controlled by environment variables or by keywords in the input prompt.
+
+############################################################################
+# CRITICAL: OUTPUT FORMAT
+#
+# This mock returns responses in the same wrapper format as Claude CLI
+# when using `--print --output-format json --json-schema`. The quality gate
+# response is in the `structured_output` field:
+#
+# {
+#     "type": "result",
+#     "subtype": "success",
+#     "is_error": false,
+#     "structured_output": {
+#         "passed": true/false,
+#         "feedback": "...",
+#         "criteria_results": [...]
+#     }
+# }
+#
+# See doc/reference/calling_claude_in_print_mode.md for details.
+############################################################################
+
+Behavior modes:
+- REVIEW_RESULT=pass: Always return passed=true
+- REVIEW_RESULT=fail: Always return passed=false
+- REVIEW_RESULT=malformed: Return invalid JSON
+- REVIEW_RESULT=empty: Return empty response
+- REVIEW_RESULT=timeout: Sleep forever (for timeout testing)
+- REVIEW_RESULT=error: Exit with non-zero code
+- Default: Parse prompt and look for FORCE_PASS or FORCE_FAIL markers
+"""
+
+import json
+import os
+import sys
+import time
+
+
+def wrap_response(quality_result: dict) -> dict:
+    """Wrap a quality gate result in Claude CLI output format.
+
+    Args:
+        quality_result: The quality gate result with passed, feedback, criteria_results
+
+    Returns:
+        Wrapper object matching Claude CLI --output-format json --json-schema output
+    """
+    return {
+        "type": "result",
+        "subtype": "success",
+        "is_error": False,
+        "structured_output": quality_result,
+    }
+
+
+def main() -> int:
+    """Main entry point."""
+    mode = os.environ.get("REVIEW_RESULT", "auto")
+
+    # Read prompt from stdin
+    prompt = sys.stdin.read()
+
+    if mode == "timeout":
+        # Sleep forever to trigger timeout
+        time.sleep(3600)
+        return 0
+
+    if mode == "error":
+        print("Review agent error!", file=sys.stderr)
+        return 1
+
+    if mode == "empty":
+        return 0
+
+    if mode == "malformed":
+        print("This is not valid JSON {{{")
+        return 0
+
+    if mode == "pass":
+        response = wrap_response(
+            {
+                "passed": True,
+                "feedback": "All criteria met",
+                "criteria_results": [
+                    {"criterion": "Criterion 1", "passed": True, "feedback": None}
+                ],
+            }
+        )
+        print(json.dumps(response))
+        return 0
+
+    if mode == "fail":
+        response = wrap_response(
+            {
+                "passed": False,
+                "feedback": "Quality criteria not met",
+                "criteria_results": [
+                    {
+                        "criterion": "Criterion 1",
+                        "passed": False,
+                        "feedback": "Did not meet requirements",
+                    }
+                ],
+            }
+        )
+        print(json.dumps(response))
+        return 0
+
+    # Auto mode: parse prompt for markers
+    if "FORCE_PASS" in prompt:
+        response = wrap_response(
+            {
+                "passed": True,
+                "feedback": "Forced pass via marker",
+                "criteria_results": [],
+            }
+        )
+        print(json.dumps(response))
+        return 0
+
+    if "FORCE_FAIL" in prompt:
+        response = wrap_response(
+            {
+                "passed": False,
+                "feedback": "Forced fail via marker",
+                "criteria_results": [
+                    {
+                        "criterion": "Test criterion",
+                        "passed": False,
+                        "feedback": "Failed due to FORCE_FAIL marker",
+                    }
+                ],
+            }
+        )
+        print(json.dumps(response))
+        return 0
+
+    # Default: analyze the prompt for quality criteria and outputs
+    # Extract criteria from prompt and evaluate based on output content
+    criteria_results = []
+    all_passed = True
+
+    # Check if outputs contain expected patterns
+    if "File not found" in prompt:
+        criteria_results.append(
+            {
+                "criterion": "Output files must exist",
+                "passed": False,
+                "feedback": "One or more output files were not found",
+            }
+        )
+        all_passed = False
+    elif "Test content" in prompt or "output.md" in prompt:
+        criteria_results.append(
+            {
+                "criterion": "Output files must exist",
+                "passed": True,
+                "feedback": None,
+            }
+        )
+
+    # Look for "must contain" type criteria
+    if "must contain" in prompt.lower():
+        if "expected content" in prompt.lower():
+            criteria_results.append(
+                {
+                    "criterion": "Output must contain expected content",
+                    "passed": True,
+                    "feedback": None,
+                }
+            )
+        else:
+            criteria_results.append(
+                {
+                    "criterion": "Output must contain expected content",
+                    "passed": False,
+                    "feedback": "Expected content not found in output",
+                }
+            )
+            all_passed = False
+
+    if not criteria_results:
+        # If no specific criteria matched, default based on whether outputs exist
+        criteria_results.append(
+            {
+                "criterion": "General quality check",
+                "passed": True,
+                "feedback": None,
+            }
+        )
+
+    quality_result = {
+        "passed": all_passed,
+        "feedback": "All criteria met" if all_passed else "Some criteria failed",
+        "criteria_results": criteria_results,
+    }
+
+    print(json.dumps(wrap_response(quality_result)))
+    return 0
+
+
+if __name__ == "__main__":
+    sys.exit(main())
diff --git a/tests/integration/test_fruits_workflow.py b/tests/integration/test_fruits_workflow.py
deleted file mode 100644
index 8df8d956..00000000
--- a/tests/integration/test_fruits_workflow.py
+++ /dev/null
@@ -1,198 +0,0 @@
-"""Integration tests for the fruits CI test workflow.
-
-This module tests the fruits job - a simple, deterministic workflow
-designed for automated CI testing of the DeepWork framework.
-"""
-
-from pathlib import Path
-
-from deepwork.core.adapters import ClaudeAdapter
-from deepwork.core.generator import SkillGenerator
-from deepwork.core.parser import parse_job_definition
-
-
-class TestFruitsWorkflow:
-    """Integration tests for the fruits CI test workflow."""
-
-    def test_fruits_job_parses_correctly(self, fixtures_dir: Path) -> None:
-        """Test that the fruits job definition parses correctly."""
-        job_dir = fixtures_dir / "jobs" / "fruits"
-        job = parse_job_definition(job_dir)
-
-        assert job.name == "fruits"
-        assert job.version == "1.0.0"
-        assert len(job.steps) == 2
-
-        # Verify step IDs
-        step_ids = [step.id for step in job.steps]
-        assert step_ids == ["identify", "classify"]
-
-    def test_fruits_identify_step_structure(self, fixtures_dir: Path) -> None:
-        """Test the identify step has correct structure."""
-        job_dir = fixtures_dir / "jobs" / "fruits"
-        job = parse_job_definition(job_dir)
-
-        identify_step = job.steps[0]
-        assert identify_step.id == "identify"
-        assert identify_step.name == "Identify Fruits"
-
-        # Has user input
-        assert len(identify_step.inputs) == 1
-        assert identify_step.inputs[0].is_user_input()
-        assert identify_step.inputs[0].name == "raw_items"
-
-        # Has output
-        assert len(identify_step.outputs) == 1
-        assert identify_step.outputs[0].file == "identified_fruits.md"
-
-        # No dependencies (first step)
-        assert identify_step.dependencies == []
-
-    def test_fruits_classify_step_structure(self, fixtures_dir: Path) -> None:
-        """Test the classify step has correct structure."""
-        job_dir = fixtures_dir / "jobs" / "fruits"
-        job = parse_job_definition(job_dir)
-
-        classify_step = job.steps[1]
-        assert classify_step.id == "classify"
-        assert classify_step.name == "Classify Fruits"
-
-        # Has file input from previous step
-        assert len(classify_step.inputs) == 1
-        assert classify_step.inputs[0].is_file_input()
-        assert classify_step.inputs[0].file == "identified_fruits.md"
-        assert classify_step.inputs[0].from_step == "identify"
-
-        # Has output
-        assert len(classify_step.outputs) == 1
-        assert classify_step.outputs[0].file == "classified_fruits.md"
-
-        # Depends on identify step
-        assert classify_step.dependencies == ["identify"]
-
-    def test_fruits_skill_generation(self, fixtures_dir: Path, temp_dir: Path) -> None:
-        """Test that fruits job generates valid Claude skills."""
-        job_dir = fixtures_dir / "jobs" / "fruits"
-        job = parse_job_definition(job_dir)
-
-        generator = SkillGenerator()
-        adapter = ClaudeAdapter()
-        skills_dir = temp_dir / ".claude"
-        skills_dir.mkdir()
-
-        skill_paths = generator.generate_all_skills(job, adapter, skills_dir)
-
-        # Now includes meta-skill + step skills
-        assert len(skill_paths) == 3  # 1 meta + 2 steps
-
-        # Verify skill directories with SKILL.md files exist
-        meta_skill = skills_dir / "skills" / "fruits" / "SKILL.md"
-        identify_skill = skills_dir / "skills" / "fruits.identify" / "SKILL.md"
-        classify_skill = skills_dir / "skills" / "fruits.classify" / "SKILL.md"
-        assert meta_skill.exists()
-        assert identify_skill.exists()
-        assert classify_skill.exists()
-
-    def test_fruits_identify_skill_content(self, fixtures_dir: Path, temp_dir: Path) -> None:
-        """Test the identify skill has correct content."""
-        job_dir = fixtures_dir / "jobs" / "fruits"
-        job = parse_job_definition(job_dir)
-
-        generator = SkillGenerator()
-        adapter = ClaudeAdapter()
-        skills_dir = temp_dir / ".claude"
-        skills_dir.mkdir()
-
-        generator.generate_all_skills(job, adapter, skills_dir)
-
-        # Step skills use directory/SKILL.md format
-        identify_skill = skills_dir / "skills" / "fruits.identify" / "SKILL.md"
-        content = identify_skill.read_text()
-
-        # Check header
-        assert "# fruits.identify" in content
-
-        # Check step info
-        assert "Step 1/2" in content
-
-        # Check user input is mentioned
-        assert "raw_items" in content
-
-        # Check output is mentioned
-        assert "identified_fruits.md" in content
-
-        # Check next step is suggested
-        assert "/fruits.classify" in content
-
-    def test_fruits_classify_skill_content(self, fixtures_dir: Path, temp_dir: Path) -> None:
-        """Test the classify skill has correct content."""
-        job_dir = fixtures_dir / "jobs" / "fruits"
-        job = parse_job_definition(job_dir)
-
-        generator = SkillGenerator()
-        adapter = ClaudeAdapter()
-        skills_dir = temp_dir / ".claude"
-        skills_dir.mkdir()
-
-        generator.generate_all_skills(job, adapter, skills_dir)
-
-        # Step skills use directory/SKILL.md format
-        classify_skill = skills_dir / "skills" / "fruits.classify" / "SKILL.md"
-        content = classify_skill.read_text()
-
-        # Check header
-        assert "# fruits.classify" in content
-
-        # Check step info
-        assert "Step 2/2" in content
-
-        # Check file input is mentioned
-        assert "identified_fruits.md" in content
-        assert "from `identify`" in content
-
-        # Check output is mentioned
-        assert "classified_fruits.md" in content
-
-        # Check workflow complete (last step)
-        assert "Workflow complete" in content
-
-    def test_fruits_dependency_validation(self, fixtures_dir: Path) -> None:
-        """Test that dependency validation passes for fruits job."""
-        job_dir = fixtures_dir / "jobs" / "fruits"
-        job = parse_job_definition(job_dir)
-
-        # This should not raise - dependencies are valid
-        job.validate_dependencies()
-
-    def test_fruits_job_is_deterministic_design(self, fixtures_dir: Path) -> None:
-        """Verify the fruits job is designed for deterministic testing.
-
-        This test documents the design properties that make this job
-        suitable for CI testing.
-        """
-        job_dir = fixtures_dir / "jobs" / "fruits"
-        job = parse_job_definition(job_dir)
-
-        # Job has clear, simple structure
-        assert len(job.steps) == 2
-
-        # Steps form a linear dependency chain
-        assert job.steps[0].dependencies == []
-        assert job.steps[1].dependencies == ["identify"]
-
-        # First step takes user input
-        identify_step = job.steps[0]
-        assert len(identify_step.inputs) == 1
-        assert identify_step.inputs[0].is_user_input()
-
-        # Second step uses output from first step
-        classify_step = job.steps[1]
-        assert len(classify_step.inputs) == 1
-        assert classify_step.inputs[0].is_file_input()
-        assert classify_step.inputs[0].from_step == "identify"
-
-        # Outputs are well-defined markdown files
-        assert len(identify_step.outputs) == 1
-        assert identify_step.outputs[0].file == "identified_fruits.md"
-        assert len(classify_step.outputs) == 1
-        assert classify_step.outputs[0].file == "classified_fruits.md"
diff --git a/tests/integration/test_full_workflow.py b/tests/integration/test_full_workflow.py
deleted file mode 100644
index bc7f83bf..00000000
--- a/tests/integration/test_full_workflow.py
+++ /dev/null
@@ -1,153 +0,0 @@
-"""Integration tests for full job workflow."""
-
-from pathlib import Path
-
-from deepwork.core.adapters import ClaudeAdapter
-from deepwork.core.generator import SkillGenerator
-from deepwork.core.parser import parse_job_definition
-
-
-class TestJobWorkflow:
-    """Integration tests for complete job workflow."""
-
-    def test_parse_and_generate_workflow(self, fixtures_dir: Path, temp_dir: Path) -> None:
-        """Test complete workflow: parse job → generate skills."""
-        # Step 1: Parse job definition
-        job_dir = fixtures_dir / "jobs" / "complex_job"
-        job = parse_job_definition(job_dir)
-
-        assert job.name == "competitive_research"
-        assert len(job.steps) == 4
-
-        # Step 2: Generate skills
-        generator = SkillGenerator()
-        adapter = ClaudeAdapter()
-        skills_dir = temp_dir / ".claude"
-        skills_dir.mkdir()
-
-        skill_paths = generator.generate_all_skills(job, adapter, skills_dir)
-
-        # Now includes meta-skill + step skills
-        assert len(skill_paths) == 5  # 1 meta + 4 steps
-
-        # First skill is the meta-skill
-        assert skill_paths[0].exists()
-        meta_content = skill_paths[0].read_text()
-        assert f"# {job.name}" in meta_content
-        assert "Available Steps" in meta_content
-
-        # Verify all step skill files exist and have correct content
-        for i, skill_path in enumerate(skill_paths[1:]):  # Skip meta-skill
-            assert skill_path.exists()
-            content = skill_path.read_text()
-
-            # Check skill name format (header)
-            assert f"# {job.name}.{job.steps[i].id}" in content
-
-            # Check step numbers
-            assert f"Step {i + 1}/4" in content
-
-    def test_simple_job_workflow(self, fixtures_dir: Path, temp_dir: Path) -> None:
-        """Test workflow with simple single-step job."""
-        # Parse
-        job_dir = fixtures_dir / "jobs" / "simple_job"
-        job = parse_job_definition(job_dir)
-
-        assert len(job.steps) == 1
-
-        # Generate
-        generator = SkillGenerator()
-        adapter = ClaudeAdapter()
-        skills_dir = temp_dir / ".claude"
-        skills_dir.mkdir()
-
-        skill_paths = generator.generate_all_skills(job, adapter, skills_dir)
-
-        # Now includes meta-skill + step skills
-        assert len(skill_paths) == 2  # 1 meta + 1 step
-
-        # Verify step skill content (skip meta-skill at index 0)
-        content = skill_paths[1].read_text()
-        assert "# simple_job.single_step" in content
-        # Single step with no dependencies is treated as standalone
-        assert "Standalone skill" in content
-        assert "input_param" in content
-        assert "standalone skill can be re-run" in content  # Standalone completion message
-
-    def test_skill_generation_with_dependencies(self, fixtures_dir: Path, temp_dir: Path) -> None:
-        """Test that generated skills properly handle dependencies."""
-        job_dir = fixtures_dir / "jobs" / "complex_job"
-        job = parse_job_definition(job_dir)
-
-        generator = SkillGenerator()
-        adapter = ClaudeAdapter()
-        skills_dir = temp_dir / ".claude"
-        skills_dir.mkdir()
-
-        skill_paths = generator.generate_all_skills(job, adapter, skills_dir)
-
-        # skill_paths[0] is meta-skill, steps start at index 1
-
-        # Check first step (no prerequisites)
-        step1_content = skill_paths[1].read_text()
-        assert "## Prerequisites" not in step1_content
-        assert "/competitive_research.primary_research" in step1_content  # Next step
-
-        # Check second step (has prerequisites and next step)
-        step2_content = skill_paths[2].read_text()
-        assert "## Prerequisites" in step2_content
-        assert "/competitive_research.identify_competitors" in step2_content
-        assert "/competitive_research.secondary_research" in step2_content  # Next step
-
-        # Check last step (has prerequisites, no next step)
-        step4_content = skill_paths[4].read_text()
-        assert "## Prerequisites" in step4_content
-        assert "**Workflow complete**" in step4_content
-        assert "## Next Step" not in step4_content
-
-    def test_skill_generation_with_file_inputs(self, fixtures_dir: Path, temp_dir: Path) -> None:
-        """Test that generated skills properly handle file inputs."""
-        job_dir = fixtures_dir / "jobs" / "complex_job"
-        job = parse_job_definition(job_dir)
-
-        generator = SkillGenerator()
-        adapter = ClaudeAdapter()
-        skills_dir = temp_dir / ".claude"
-        skills_dir.mkdir()
-
-        skill_paths = generator.generate_all_skills(job, adapter, skills_dir)
-
-        # skill_paths[0] is meta-skill, steps start at index 1
-
-        # Check step with file input
-        step2_content = skill_paths[2].read_text()  # primary_research (index 2)
-        assert "## Required Inputs" in step2_content
-        assert "**Files from Previous Steps**" in step2_content
-        assert "competitors.md" in step2_content
-        assert "from `identify_competitors`" in step2_content
-
-        # Check step with multiple file inputs
-        step4_content = skill_paths[4].read_text()  # comparative_report (index 4)
-        assert "primary_research.md" in step4_content
-        assert "secondary_research.md" in step4_content
-
-    def test_skill_generation_with_user_inputs(self, fixtures_dir: Path, temp_dir: Path) -> None:
-        """Test that generated skills properly handle user parameter inputs."""
-        job_dir = fixtures_dir / "jobs" / "complex_job"
-        job = parse_job_definition(job_dir)
-
-        generator = SkillGenerator()
-        adapter = ClaudeAdapter()
-        skills_dir = temp_dir / ".claude"
-        skills_dir.mkdir()
-
-        skill_paths = generator.generate_all_skills(job, adapter, skills_dir)
-
-        # skill_paths[0] is meta-skill, steps start at index 1
-
-        # Check step with user inputs
-        step1_content = skill_paths[1].read_text()  # identify_competitors (index 1)
-        assert "## Required Inputs" in step1_content
-        assert "**User Parameters**" in step1_content
-        assert "market_segment" in step1_content
-        assert "product_category" in step1_content
diff --git a/tests/integration/test_install_flow.py b/tests/integration/test_install_flow.py
index d0638275..169e90ed 100644
--- a/tests/integration/test_install_flow.py
+++ b/tests/integration/test_install_flow.py
@@ -39,25 +39,13 @@ def test_install_with_claude(self, mock_claude_project: Path) -> None:
         assert config is not None
         assert "claude" in config["platforms"]
 
-        # Verify core skills were created (directory/SKILL.md format)
+        # Verify MCP entry point skill was created (deepwork/SKILL.md)
         claude_dir = mock_claude_project / ".claude" / "skills"
-        # Meta-skill
-        assert (claude_dir / "deepwork_jobs" / "SKILL.md").exists()
-        # Step skill (no prefix, but has user-invocable: false in frontmatter)
-        assert (claude_dir / "deepwork_jobs.define" / "SKILL.md").exists()
-        # Exposed step skill (user-invocable - learn has exposed: true)
-        assert (claude_dir / "deepwork_jobs.learn" / "SKILL.md").exists()
-
-        # Verify meta-skill content
-        meta_skill = (claude_dir / "deepwork_jobs" / "SKILL.md").read_text()
-        assert "# deepwork_jobs" in meta_skill
-        # deepwork_jobs has workflows defined, so it shows "Workflows" instead of "Available Steps"
-        assert "Workflows" in meta_skill or "Available Steps" in meta_skill
-
-        # Verify step skill content
-        define_skill = (claude_dir / "deepwork_jobs.define" / "SKILL.md").read_text()
-        assert "# deepwork_jobs.define" in define_skill
-        assert "Define Job Specification" in define_skill
+        assert (claude_dir / "deepwork" / "SKILL.md").exists()
+
+        # Verify deepwork skill content references MCP tools
+        deepwork_skill = (claude_dir / "deepwork" / "SKILL.md").read_text()
+        assert "deepwork" in deepwork_skill.lower()
 
     def test_install_with_auto_detect(self, mock_claude_project: Path) -> None:
         """Test installing with auto-detection."""
@@ -103,9 +91,9 @@ def test_install_defaults_to_claude_when_no_platform(self, mock_git_repo: Path)
         assert config is not None
         assert "claude" in config["platforms"]
 
-        # Verify skills were created for Claude
+        # Verify MCP entry point skill was created for Claude
         skills_dir = claude_dir / "skills"
-        assert (skills_dir / "deepwork_jobs" / "SKILL.md").exists()
+        assert (skills_dir / "deepwork" / "SKILL.md").exists()
 
     def test_install_with_multiple_platforms_auto_detect(
         self, mock_multi_platform_project: Path
@@ -132,17 +120,13 @@ def test_install_with_multiple_platforms_auto_detect(
         assert "claude" in config["platforms"]
         assert "gemini" in config["platforms"]
 
-        # Verify skills were created for both platforms
+        # Verify MCP entry point skill was created for Claude
         claude_dir = mock_multi_platform_project / ".claude" / "skills"
-        # Meta-skill and step skills (directory/SKILL.md format)
-        assert (claude_dir / "deepwork_jobs" / "SKILL.md").exists()
-        assert (claude_dir / "deepwork_jobs.define" / "SKILL.md").exists()
+        assert (claude_dir / "deepwork" / "SKILL.md").exists()
 
-        # Gemini uses job_name/step_id.toml structure
-        gemini_dir = mock_multi_platform_project / ".gemini" / "skills"
-        # Meta-skill (index.toml) and step skills
-        assert (gemini_dir / "deepwork_jobs" / "index.toml").exists()
-        assert (gemini_dir / "deepwork_jobs" / "define.toml").exists()
+        # Note: Gemini MCP skill template (skill-deepwork) is not yet implemented
+        # so we don't assert on Gemini skill existence - the install will show
+        # an error for Gemini skill generation but continue
 
     def test_install_with_specified_platform_when_missing(self, mock_git_repo: Path) -> None:
         """Test that install fails when specified platform is not present."""
@@ -181,66 +165,39 @@ def test_install_is_idempotent(self, mock_claude_project: Path) -> None:
         assert (deepwork_dir / "config.yml").exists()
 
         claude_dir = mock_claude_project / ".claude" / "skills"
-        # Meta-skill and step skills (directory/SKILL.md format)
-        assert (claude_dir / "deepwork_jobs" / "SKILL.md").exists()
-        assert (claude_dir / "deepwork_jobs.define" / "SKILL.md").exists()
-        assert (claude_dir / "deepwork_jobs.learn" / "SKILL.md").exists()
+        # MCP entry point skill
+        assert (claude_dir / "deepwork" / "SKILL.md").exists()
 
-    def test_install_creates_rules_directory(self, mock_claude_project: Path) -> None:
-        """Test that install creates the v2 rules directory with example templates."""
+    def test_install_shows_repair_message_when_job_fails_to_parse(
+        self, mock_claude_project: Path
+    ) -> None:
+        """Test that install shows repair message when there are warnings."""
         runner = CliRunner()
 
-        result = runner.invoke(
+        # First do a normal install
+        result1 = runner.invoke(
             cli,
             ["install", "--platform", "claude", "--path", str(mock_claude_project)],
             catch_exceptions=False,
         )
+        assert result1.exit_code == 0
+        assert "DeepWork installed successfully" in result1.output
 
-        assert result.exit_code == 0
-        assert ".deepwork/rules/ with example templates" in result.output
-
-        # Verify rules directory was created
-        rules_dir = mock_claude_project / ".deepwork" / "rules"
-        assert rules_dir.exists()
-
-        # Verify README was created
-        readme_file = rules_dir / "README.md"
-        assert readme_file.exists()
-        content = readme_file.read_text()
-        assert "DeepWork Rules" in content
-        assert "YAML frontmatter" in content
-
-        # Verify example templates were copied
-        example_files = list(rules_dir.glob("*.md.example"))
-        assert len(example_files) >= 1  # At least one example template
-
-    def test_install_preserves_existing_rules_directory(self, mock_claude_project: Path) -> None:
-        """Test that install doesn't overwrite existing rules directory."""
-        runner = CliRunner()
-
-        # Create a custom rules directory before install
-        rules_dir = mock_claude_project / ".deepwork" / "rules"
-        rules_dir.mkdir(parents=True)
-        custom_rule = rules_dir / "my-custom-rule.md"
-        custom_content = """---
-name: My Custom Rule
-trigger: "src/**/*"
----
-Custom instructions here.
-"""
-        custom_rule.write_text(custom_content)
+        # Create a malformed job definition
+        jobs_dir = mock_claude_project / ".deepwork" / "jobs" / "broken_job"
+        jobs_dir.mkdir(parents=True, exist_ok=True)
+        (jobs_dir / "job.yml").write_text("invalid: yaml: content: [")
 
-        result = runner.invoke(
+        # Reinstall - should show repair message due to parsing warning
+        result2 = runner.invoke(
             cli,
             ["install", "--platform", "claude", "--path", str(mock_claude_project)],
             catch_exceptions=False,
         )
-
-        assert result.exit_code == 0
-        assert ".deepwork/rules/ already exists" in result.output
-
-        # Verify original content is preserved
-        assert custom_rule.read_text() == custom_content
+        assert result2.exit_code == 0
+        assert "You should repair your DeepWork install" in result2.output
+        assert "/deepwork repair" in result2.output
+        assert "DeepWork installed successfully" not in result2.output
 
 
 class TestCLIEntryPoint:
diff --git a/tests/integration/test_install_requirements.py b/tests/integration/test_install_requirements.py
deleted file mode 100644
index 63d8dcba..00000000
--- a/tests/integration/test_install_requirements.py
+++ /dev/null
@@ -1,343 +0,0 @@
-"""
-================================================================================
-                    REQUIREMENTS TESTS - DO NOT MODIFY
-================================================================================
-
-These tests verify CRITICAL REQUIREMENTS for the DeepWork install process.
-They ensure the install command behaves correctly with respect to:
-
-1. LOCAL vs PROJECT settings isolation
-2. Idempotency of project settings
-
-WARNING: These tests represent contractual requirements for the install process.
-Modifying these tests may violate user expectations and could cause data loss
-or unexpected behavior. If a test fails, fix the IMPLEMENTATION, not the test.
-
-Requirements tested:
-  - REQ-001: Install MUST NOT modify local (user home) Claude settings
-  - REQ-002: Install MUST be idempotent for project settings
-
-================================================================================
-"""
-
-import json
-from collections.abc import Iterator
-from contextlib import contextmanager
-from pathlib import Path
-from unittest.mock import patch
-
-import pytest
-from click.testing import CliRunner
-
-from deepwork.cli.main import cli
-
-# =============================================================================
-# HELPER FUNCTIONS
-# =============================================================================
-# These helpers reduce repetition while keeping individual tests readable.
-# The helpers themselves are simple and should not mask test intent.
-
-
-def run_install(project_path: Path) -> None:
-    """Run deepwork install for Claude on the given project path.
-
-    Raises AssertionError if install fails.
-    """
-    runner = CliRunner()
-    result = runner.invoke(
-        cli,
-        ["install", "--platform", "claude", "--path", str(project_path)],
-        catch_exceptions=False,
-    )
-    assert result.exit_code == 0, f"Install failed: {result.output}"
-
-
-def get_project_settings(project_path: Path) -> dict:
-    """Read and parse the project's Claude settings.json."""
-    settings_file = project_path / ".claude" / "settings.json"
-    return json.loads(settings_file.read_text())
-
-
-def assert_install_added_hooks(settings_before: dict, settings_after: dict) -> None:
-    """Assert that install actually modified settings by adding hooks.
-
-    This ensures idempotency tests are meaningful - if install does nothing,
-    idempotency would trivially pass but the test would be useless.
-    """
-    assert "hooks" in settings_after, (
-        "FIRST INSTALL DID NOT ADD HOOKS! "
-        "Install must add hooks to project settings. "
-        "This test requires install to actually modify settings to verify idempotency."
-    )
-    assert settings_after != settings_before, (
-        "FIRST INSTALL DID NOT MODIFY SETTINGS! "
-        "Install must modify project settings on first run. "
-        "This test requires install to actually do something to verify idempotency."
-    )
-
-
-@contextmanager
-def mock_local_claude_settings(
-    tmp_path: Path, content: str | dict = '{"local": "unchanged"}'
-) -> Iterator[Path]:
-    """Create mock local Claude settings and patch HOME to use them.
-
-    Args:
-        tmp_path: Temporary directory to create mock home in
-        content: Settings content (string or dict to be JSON-serialized)
-
-    Yields:
-        Path to the local settings file (for verification after install)
-    """
-    mock_home = tmp_path / "mock_home"
-    mock_local_claude_dir = mock_home / ".claude"
-    mock_local_claude_dir.mkdir(parents=True)
-
-    local_settings_file = mock_local_claude_dir / "settings.json"
-    if isinstance(content, dict):
-        local_settings_file.write_text(json.dumps(content, indent=2))
-    else:
-        local_settings_file.write_text(content)
-
-    with patch.dict("os.environ", {"HOME": str(mock_home)}):
-        yield local_settings_file
-
-
-# =============================================================================
-# REQ-001: Install MUST NOT modify local (user home) Claude settings
-# =============================================================================
-#
-# Claude Code has two levels of settings:
-# - LOCAL settings: ~/.claude/settings.json (user's global settings)
-# - PROJECT settings: <project>/.claude/settings.json (project-specific)
-#
-# DeepWork install MUST ONLY modify project settings and NEVER touch
-# the user's local settings, which may contain personal configurations,
-# API keys, or other sensitive data.
-#
-# DO NOT MODIFY THIS TEST - It protects user data integrity.
-# =============================================================================
-
-
-class TestLocalSettingsProtection:
-    """
-    REQUIREMENTS TEST: Verify install does not modify local Claude settings.
-
-    ============================================================================
-    WARNING: DO NOT MODIFY THESE TESTS
-    ============================================================================
-
-    These tests verify that the install process respects the boundary between
-    project-level and user-level settings. Modifying these tests could result
-    in DeepWork overwriting user's personal Claude configurations.
-    """
-
-    def test_install_does_not_modify_local_claude_settings(
-        self, mock_claude_project: Path, tmp_path: Path
-    ) -> None:
-        """
-        REQ-001: Install MUST NOT modify local (home directory) Claude settings.
-
-        This test creates a mock local settings file and verifies that the
-        DeepWork install process does not modify it in any way.
-
-        DO NOT MODIFY THIS TEST.
-        """
-        original_local_settings = {
-            "user_preference": "do_not_change",
-            "api_key_encrypted": "sensitive_data_here",
-            "custom_config": {"setting1": True, "setting2": "value"},
-        }
-
-        with mock_local_claude_settings(tmp_path, original_local_settings) as local_file:
-            original_mtime = local_file.stat().st_mtime
-            run_install(mock_claude_project)
-
-            # CRITICAL: Verify local settings were NOT modified
-            assert local_file.exists(), "Local settings file should still exist"
-
-            current_local_settings = json.loads(local_file.read_text())
-            assert current_local_settings == original_local_settings, (
-                "LOCAL SETTINGS WERE MODIFIED! "
-                "Install MUST NOT touch user's home directory Claude settings. "
-                f"Expected: {original_local_settings}, Got: {current_local_settings}"
-            )
-
-            assert local_file.stat().st_mtime == original_mtime, (
-                "LOCAL SETTINGS FILE WAS TOUCHED! "
-                "Install MUST NOT access user's home directory Claude settings."
-            )
-
-    def test_install_only_modifies_project_settings(
-        self, mock_claude_project: Path, tmp_path: Path
-    ) -> None:
-        """
-        REQ-001 (corollary): Install MUST modify only project-level settings.
-
-        Verifies that the install process correctly modifies project settings
-        while leaving local settings untouched.
-
-        DO NOT MODIFY THIS TEST.
-        """
-        original_local_content = '{"local": "unchanged"}'
-
-        with mock_local_claude_settings(tmp_path, original_local_content) as local_file:
-            run_install(mock_claude_project)
-
-            # Verify LOCAL settings unchanged
-            assert local_file.read_text() == original_local_content, (
-                "Local settings were modified! Install must only modify project settings."
-            )
-
-            # Verify PROJECT settings were modified (hooks should be added)
-            project_settings = get_project_settings(mock_claude_project)
-            assert "hooks" in project_settings, "Project settings should have hooks after install"
-
-
-# =============================================================================
-# REQ-002: Install MUST be idempotent for project settings
-# =============================================================================
-#
-# Running `deepwork install` multiple times on the same project MUST produce
-# identical results. The second and subsequent installs should not:
-# - Add duplicate entries
-# - Modify timestamps unnecessarily
-# - Change the structure or content of settings
-#
-# This ensures that users can safely re-run install without side effects,
-# which is important for CI/CD pipelines, onboarding scripts, and
-# troubleshooting scenarios.
-#
-# DO NOT MODIFY THIS TEST - It ensures installation reliability.
-# =============================================================================
-
-
-class TestProjectSettingsIdempotency:
-    """
-    REQUIREMENTS TEST: Verify install is idempotent for project settings.
-
-    ============================================================================
-    WARNING: DO NOT MODIFY THESE TESTS
-    ============================================================================
-
-    These tests verify that running install multiple times produces identical
-    results. This is critical for:
-    - CI/CD reliability
-    - Safe re-installation
-    - Troubleshooting without side effects
-    """
-
-    def test_project_settings_unchanged_on_second_install(self, mock_claude_project: Path) -> None:
-        """
-        REQ-002: Second install MUST NOT change project settings.
-
-        Running install twice should produce identical settings.json content.
-        The first install MUST modify settings (add hooks), and the second
-        install should be a no-op for settings.
-
-        DO NOT MODIFY THIS TEST.
-        """
-        # Capture settings BEFORE first install
-        settings_before = get_project_settings(mock_claude_project)
-
-        # First install
-        run_install(mock_claude_project)
-        settings_after_first = get_project_settings(mock_claude_project)
-
-        # CRITICAL: First install MUST actually modify settings
-        assert_install_added_hooks(settings_before, settings_after_first)
-
-        # Second install
-        run_install(mock_claude_project)
-        settings_after_second = get_project_settings(mock_claude_project)
-
-        # CRITICAL: Settings must be identical after second install
-        assert settings_after_first == settings_after_second, (
-            "PROJECT SETTINGS CHANGED ON SECOND INSTALL! "
-            "Install MUST be idempotent. "
-            f"After first: {json.dumps(settings_after_first, indent=2)}\n"
-            f"After second: {json.dumps(settings_after_second, indent=2)}"
-        )
-
-    def test_no_duplicate_hooks_on_multiple_installs(self, mock_claude_project: Path) -> None:
-        """
-        REQ-002 (corollary): Multiple installs MUST NOT create duplicate hooks.
-
-        This specifically tests that hooks are not duplicated, which would
-        cause performance issues and unexpected behavior.
-
-        DO NOT MODIFY THIS TEST.
-        """
-        # Run install three times
-        for _ in range(3):
-            run_install(mock_claude_project)
-
-        # Load final settings
-        settings = get_project_settings(mock_claude_project)
-
-        # CRITICAL: Hooks must exist for this test to be meaningful
-        assert "hooks" in settings, (
-            "NO HOOKS FOUND AFTER INSTALL! "
-            "Install must add hooks to project settings. "
-            "This test requires hooks to exist to verify no duplicates are created."
-        )
-
-        # Verify no duplicate hooks
-        for event_name, hooks_list in settings["hooks"].items():
-            # Extract all hook commands for duplicate detection
-            commands = [
-                hook["command"]
-                for hook_entry in hooks_list
-                for hook in hook_entry.get("hooks", [])
-                if "command" in hook
-            ]
-
-            # Check for duplicates
-            assert len(commands) == len(set(commands)), (
-                f"DUPLICATE HOOKS DETECTED for event '{event_name}'! "
-                f"Install MUST be idempotent. Commands: {commands}"
-            )
-
-    def test_third_install_identical_to_first(self, mock_claude_project: Path) -> None:
-        """
-        REQ-002 (extended): Nth install MUST produce same result as first.
-
-        This tests the general idempotency property across multiple runs.
-        The first install MUST modify settings, and all subsequent installs
-        MUST produce identical results.
-
-        DO NOT MODIFY THIS TEST.
-        """
-        # Capture settings BEFORE any install
-        settings_before = get_project_settings(mock_claude_project)
-
-        # First install
-        run_install(mock_claude_project)
-        settings_after_first = get_project_settings(mock_claude_project)
-
-        # CRITICAL: First install MUST actually modify settings
-        assert_install_added_hooks(settings_before, settings_after_first)
-
-        # Run multiple more installs
-        for _ in range(5):
-            run_install(mock_claude_project)
-
-        # Final state should match first install
-        settings_after_many = get_project_settings(mock_claude_project)
-
-        assert settings_after_first == settings_after_many, (
-            "SETTINGS DIVERGED AFTER MULTIPLE INSTALLS! "
-            "Install must be idempotent regardless of how many times it runs."
-        )
-
-
-# =============================================================================
-# FIXTURE EXTENSIONS
-# =============================================================================
-# Additional fixtures needed for these requirement tests
-
-
-@pytest.fixture
-def tmp_path(temp_dir: Path) -> Path:
-    """Alias for temp_dir to match pytest naming convention."""
-    return temp_dir
diff --git a/tests/integration/test_quality_gate_integration.py b/tests/integration/test_quality_gate_integration.py
new file mode 100644
index 00000000..bbf41b8a
--- /dev/null
+++ b/tests/integration/test_quality_gate_integration.py
@@ -0,0 +1,145 @@
+"""Integration tests for quality gate subprocess execution.
+
+###############################################################################
+# ⚠️  CRITICAL: THESE TESTS MUST USE THE REAL CLAUDE CLI ⚠️
+#
+# The entire point of these integration tests is to verify that the QualityGate
+# class works correctly with the ACTUAL Claude Code CLI subprocess.
+#
+# DO NOT:
+#   - Mock the QualityGate class
+#   - Use _test_command parameter
+#   - Stub out subprocess calls
+#   - Use the MockQualityGate class
+#
+# If you need to test parsing logic or edge cases, add those tests to:
+#   tests/unit/mcp/test_quality_gate.py
+#
+# These tests are SKIPPED in CI because they require Claude Code CLI to be
+# installed and authenticated. They are meant to be run locally during
+# development to verify real-world behavior.
+###############################################################################
+"""
+
+from __future__ import annotations
+
+import os
+from pathlib import Path
+
+import pytest
+
+from deepwork.mcp.claude_cli import ClaudeCLI
+from deepwork.mcp.quality_gate import QualityGate
+
+# Skip marker for tests that require real Claude CLI
+# GitHub Actions sets CI=true, as do most other CI systems
+requires_claude_cli = pytest.mark.skipif(
+    os.environ.get("CI") == "true" or os.environ.get("GITHUB_ACTIONS") == "true",
+    reason="Integration tests require Claude CLI - skipped in CI",
+)
+
+
+@pytest.fixture
+def project_root(tmp_path: Path) -> Path:
+    """Create a temporary project root with test files."""
+    return tmp_path
+
+
+###############################################################################
+# ⚠️  REAL INTEGRATION TESTS - DO NOT MOCK ⚠️
+#
+# These tests call the actual Claude CLI. They verify that:
+#   1. The subprocess invocation works correctly
+#   2. The JSON schema is properly passed and enforced
+#   3. Response parsing handles real Claude output
+#
+# Run these locally with: pytest tests/integration/test_quality_gate_integration.py -v
+###############################################################################
+
+
+@requires_claude_cli
+class TestRealClaudeIntegration:
+    """Integration tests that run the REAL Claude CLI.
+
+    ⚠️  WARNING: DO NOT MOCK THESE TESTS ⚠️
+
+    These tests exist specifically to verify that QualityGate works with the
+    actual Claude Code CLI. If you mock them, you defeat their entire purpose.
+    """
+
+    async def test_real_claude_evaluates_passing_criteria(self, project_root: Path) -> None:
+        """Test that real Claude CLI correctly evaluates passing criteria.
+
+        ⚠️  THIS TEST MUST USE THE REAL CLAUDE CLI - DO NOT MOCK ⚠️
+        """
+        # Create a well-formed output file that clearly meets the criteria
+        output_file = project_root / "analysis.md"
+        output_file.write_text(
+            "# Analysis Report\n\n"
+            "## Summary\n"
+            "This document contains a complete analysis.\n\n"
+            "## Details\n"
+            "The analysis covers all required points.\n"
+        )
+
+        # ⚠️  NO _test_command - this uses the REAL Claude CLI
+        gate = QualityGate(cli=ClaudeCLI(timeout=120))
+
+        result = await gate.evaluate(
+            quality_criteria={
+                "Has Title": "Does the document have a title?",
+                "Has Summary": "Does the document contain a summary section?",
+            },
+            outputs={"analysis": "analysis.md"},
+            project_root=project_root,
+        )
+
+        # Verify we got a structured response
+        assert result is not None
+        assert isinstance(result.passed, bool)
+        assert isinstance(result.feedback, str)
+        assert len(result.feedback) > 0
+
+        # The document clearly meets the criteria, so it should pass
+        # (though we allow for some model variability)
+        if not result.passed:
+            # If it failed, at least verify we got proper feedback
+            assert len(result.criteria_results) > 0
+            pytest.skip(f"Model returned fail (may be model variability): {result.feedback}")
+
+    async def test_real_claude_evaluates_failing_criteria(self, project_root: Path) -> None:
+        """Test that real Claude CLI correctly identifies missing criteria.
+
+        ⚠️  THIS TEST MUST USE THE REAL CLAUDE CLI - DO NOT MOCK ⚠️
+        """
+        # Create an output file that is clearly missing required content
+        output_file = project_root / "incomplete.md"
+        output_file.write_text("Just some random text without any structure.")
+
+        # ⚠️  NO _test_command - this uses the REAL Claude CLI
+        gate = QualityGate(cli=ClaudeCLI(timeout=120))
+
+        result = await gate.evaluate(
+            quality_criteria={
+                "Executive Summary": "Does the document contain a section titled 'Executive Summary'?",
+                "Recommendations": "Does the document include a numbered list of recommendations?",
+                "Conclusions": "Does the document have a 'Conclusions' section?",
+            },
+            outputs={"document": "incomplete.md"},
+            project_root=project_root,
+        )
+
+        # Verify we got a structured response
+        assert result is not None
+        assert isinstance(result.passed, bool)
+        assert isinstance(result.feedback, str)
+
+        # The document clearly doesn't meet these specific criteria
+        # (though we allow for some model variability)
+        if result.passed:
+            pytest.skip(
+                f"Model returned pass unexpectedly (may be model variability): {result.feedback}"
+            )
+
+        # Should have feedback about what's missing
+        assert len(result.feedback) > 0
diff --git a/tests/shell_script_tests/README.md b/tests/shell_script_tests/README.md
deleted file mode 100644
index 76cd8f05..00000000
--- a/tests/shell_script_tests/README.md
+++ /dev/null
@@ -1,76 +0,0 @@
-# Shell Script Tests
-
-Automated tests for DeepWork shell scripts and hooks, with a focus on validating Claude Code hooks JSON response formats.
-
-## Hooks and Scripts Tested
-
-| Hook/Script | Type | Description |
-|-------------|------|-------------|
-| `deepwork.hooks.rules_check` | Stop Hook (Python) | Evaluates rules and blocks agent stop if rules are triggered |
-| `user_prompt_submit.sh` | UserPromptSubmit Hook | Captures work tree state when user submits a prompt |
-| `capture_prompt_work_tree.sh` | Helper | Records current git state for `compare_to: prompt` rules |
-| `make_new_job.sh` | Utility | Creates directory structure for new DeepWork jobs |
-
-## Claude Code Hooks JSON Format
-
-Hook scripts must return valid JSON responses. The tests enforce these formats:
-
-### Stop Hooks (`hooks.after_agent`)
-```json
-{}                                          // Allow stop
-{"decision": "block", "reason": "..."}      // Block stop with reason
-```
-
-### UserPromptSubmit Hooks (`hooks.before_prompt`)
-```json
-{}    // No output or empty object (side-effect only hooks)
-```
-
-### All Hooks
-- Must return valid JSON if producing output
-- Non-JSON output on stdout is **not allowed** (stderr is ok)
-- Exit code 0 indicates success (even when blocking)
-
-## Running Tests
-
-```bash
-# Run all shell script tests
-uv run pytest tests/shell_script_tests/ -v
-
-# Run tests for a specific script
-uv run pytest tests/shell_script_tests/test_rules_stop_hook.py -v
-
-# Run with coverage
-uv run pytest tests/shell_script_tests/ --cov=src/deepwork
-```
-
-## Test Structure
-
-```
-tests/shell_script_tests/
-├── conftest.py                      # Shared fixtures and helpers
-├── test_hooks.py                    # Consolidated hook tests (JSON format, exit codes)
-├── test_rules_stop_hook.py          # Stop hook blocking/allowing tests
-├── test_user_prompt_submit.py       # Prompt submission hook tests
-├── test_capture_prompt_work_tree.py # Work tree capture tests
-└── test_make_new_job.py             # Job directory creation tests
-```
-
-## Shared Fixtures
-
-Available in `conftest.py`:
-
-| Fixture | Description |
-|---------|-------------|
-| `git_repo` | Basic git repo with initial commit |
-| `git_repo_with_rule` | Git repo with a Python file rule |
-| `rules_hooks_dir` | Path to rules hooks scripts |
-| `jobs_scripts_dir` | Path to job management scripts |
-
-## Adding New Tests
-
-1. Use shared fixtures from `conftest.py` when possible
-2. Use `run_shell_script()` helper for running scripts
-3. Validate JSON output with `validate_json_output()` and `validate_stop_hook_response()`
-4. Test both success and failure cases
-5. Verify exit codes (hooks should exit 0 even when blocking)
diff --git a/tests/shell_script_tests/__init__.py b/tests/shell_script_tests/__init__.py
deleted file mode 100644
index 1b0c3352..00000000
--- a/tests/shell_script_tests/__init__.py
+++ /dev/null
@@ -1 +0,0 @@
-"""Shell script tests for DeepWork hooks."""
diff --git a/tests/shell_script_tests/conftest.py b/tests/shell_script_tests/conftest.py
deleted file mode 100644
index 3ac15822..00000000
--- a/tests/shell_script_tests/conftest.py
+++ /dev/null
@@ -1,131 +0,0 @@
-"""Shared fixtures for shell script tests."""
-
-import json
-import os
-import subprocess
-from pathlib import Path
-
-import pytest
-from git import Repo
-
-
-@pytest.fixture
-def git_repo(tmp_path: Path) -> Path:
-    """Create a basic git repo for testing."""
-    repo = Repo.init(tmp_path)
-
-    readme = tmp_path / "README.md"
-    readme.write_text("# Test Project\n")
-    repo.index.add(["README.md"])
-    repo.index.commit("Initial commit")
-
-    return tmp_path
-
-
-@pytest.fixture
-def git_repo_with_rule(tmp_path: Path) -> Path:
-    """Create a git repo with rule that will fire."""
-    repo = Repo.init(tmp_path)
-
-    readme = tmp_path / "README.md"
-    readme.write_text("# Test Project\n")
-    repo.index.add(["README.md"])
-    repo.index.commit("Initial commit")
-
-    # Create v2 rules directory and file
-    rules_dir = tmp_path / ".deepwork" / "rules"
-    rules_dir.mkdir(parents=True, exist_ok=True)
-
-    # Rule that triggers on any Python file (v2 format)
-    rule_file = rules_dir / "python-file-rule.md"
-    rule_file.write_text(
-        """---
-name: Python File Rule
-trigger: "**/*.py"
-compare_to: prompt
----
-Review Python files for quality.
-"""
-    )
-
-    # Empty baseline so new files trigger
-    deepwork_dir = tmp_path / ".deepwork"
-    (deepwork_dir / ".last_work_tree").write_text("")
-
-    return tmp_path
-
-
-@pytest.fixture
-def rules_hooks_dir() -> Path:
-    """Return the path to the rules hooks scripts directory."""
-    return (
-        Path(__file__).parent.parent.parent
-        / "src"
-        / "deepwork"
-        / "standard_jobs"
-        / "deepwork_rules"
-        / "hooks"
-    )
-
-
-@pytest.fixture
-def hooks_dir() -> Path:
-    """Return the path to the main hooks directory (platform wrappers)."""
-    return Path(__file__).parent.parent.parent / "src" / "deepwork" / "hooks"
-
-
-@pytest.fixture
-def src_dir() -> Path:
-    """Return the path to the src directory for PYTHONPATH."""
-    return Path(__file__).parent.parent.parent / "src"
-
-
-@pytest.fixture
-def jobs_scripts_dir() -> Path:
-    """Return the path to the jobs scripts directory."""
-    return (
-        Path(__file__).parent.parent.parent / "src" / "deepwork" / "standard_jobs" / "deepwork_jobs"
-    )
-
-
-def run_shell_script(
-    script_path: Path,
-    cwd: Path,
-    args: list[str] | None = None,
-    hook_input: dict | None = None,
-    env_extra: dict[str, str] | None = None,
-) -> tuple[str, str, int]:
-    """
-    Run a shell script and return its output.
-
-    Args:
-        script_path: Path to the shell script
-        cwd: Working directory to run the script in
-        args: Optional list of arguments to pass to the script
-        hook_input: Optional JSON input to pass via stdin
-        env_extra: Optional extra environment variables
-
-    Returns:
-        Tuple of (stdout, stderr, return_code)
-    """
-    env = os.environ.copy()
-    env["PYTHONPATH"] = str(Path(__file__).parent.parent.parent / "src")
-    if env_extra:
-        env.update(env_extra)
-
-    cmd = ["bash", str(script_path)]
-    if args:
-        cmd.extend(args)
-
-    stdin_data = json.dumps(hook_input) if hook_input else ""
-
-    result = subprocess.run(
-        cmd,
-        cwd=cwd,
-        capture_output=True,
-        text=True,
-        input=stdin_data,
-        env=env,
-    )
-
-    return result.stdout, result.stderr, result.returncode
diff --git a/tests/shell_script_tests/test_block_bash_with_instructions.py b/tests/shell_script_tests/test_block_bash_with_instructions.py
deleted file mode 100644
index e916ddba..00000000
--- a/tests/shell_script_tests/test_block_bash_with_instructions.py
+++ /dev/null
@@ -1,237 +0,0 @@
-"""Tests for block_bash_with_instructions.sh hook.
-
-This hook blocks specific Bash commands (e.g., git commit) and provides
-alternative instructions via stderr when exit code 2 is returned.
-
-Hook Contract (PreToolUse with exit code 2):
-  - Exit code 0: Allow the command
-  - Exit code 2: Block the command, stderr message shown to Claude
-  - stderr: Contains the instruction message when blocking
-
-See: https://docs.anthropic.com/en/docs/claude-code/hooks
-"""
-
-import json
-import os
-import subprocess
-from pathlib import Path
-
-import pytest
-
-
-@pytest.fixture
-def block_bash_hook_path() -> Path:
-    """Return the path to the block_bash_with_instructions.sh script."""
-    return (
-        Path(__file__).parent.parent.parent
-        / ".claude"
-        / "hooks"
-        / "block_bash_with_instructions.sh"
-    )
-
-
-def run_block_bash_hook(
-    script_path: Path,
-    tool_name: str,
-    command: str,
-) -> tuple[str, str, int]:
-    """
-    Run the block_bash_with_instructions.sh hook with simulated input.
-
-    Args:
-        script_path: Path to the hook script
-        tool_name: The tool name (e.g., "Bash")
-        command: The bash command being executed
-
-    Returns:
-        Tuple of (stdout, stderr, return_code)
-    """
-    hook_input = {
-        "session_id": "test123",
-        "hook_event_name": "PreToolUse",
-        "tool_name": tool_name,
-        "tool_input": {
-            "command": command,
-        },
-    }
-
-    result = subprocess.run(
-        ["bash", str(script_path)],
-        capture_output=True,
-        text=True,
-        input=json.dumps(hook_input),
-        env=os.environ.copy(),
-    )
-
-    return result.stdout, result.stderr, result.returncode
-
-
-class TestBlockBashHookExists:
-    """Tests that the hook script exists and is properly configured."""
-
-    def test_script_exists(self, block_bash_hook_path: Path) -> None:
-        """Test that the hook script exists."""
-        assert block_bash_hook_path.exists(), "block_bash_with_instructions.sh should exist"
-
-    def test_script_is_executable(self, block_bash_hook_path: Path) -> None:
-        """Test that the hook script is executable."""
-        assert os.access(block_bash_hook_path, os.X_OK), (
-            "block_bash_with_instructions.sh should be executable"
-        )
-
-
-class TestGitCommitBlocking:
-    """Tests for git commit command blocking."""
-
-    @pytest.mark.parametrize(
-        "command",
-        [
-            "git commit -m 'message'",
-            "git commit --amend",
-            "git commit -a -m 'message'",
-            "git  commit -m 'message'",  # Extra space
-            "git commit --allow-empty -m 'test'",
-            "  git commit -m 'with leading space'",
-        ],
-    )
-    def test_blocks_git_commit_variants(self, block_bash_hook_path: Path, command: str) -> None:
-        """Test that git commit variants are blocked with exit code 2."""
-        stdout, stderr, code = run_block_bash_hook(block_bash_hook_path, "Bash", command)
-        assert code == 2, f"Should block '{command}' with exit code 2, got {code}"
-        assert "/commit" in stderr, f"Should mention /commit skill in stderr: {stderr}"
-
-    def test_stderr_contains_instructions(self, block_bash_hook_path: Path) -> None:
-        """Test that blocking message contains helpful instructions."""
-        stdout, stderr, code = run_block_bash_hook(
-            block_bash_hook_path, "Bash", "git commit -m 'test'"
-        )
-        assert code == 2
-        assert "/commit" in stderr, "Should mention the /commit skill"
-        assert "skill" in stderr.lower() or "workflow" in stderr.lower(), (
-            "Should explain the alternative workflow"
-        )
-
-
-class TestAllowedCommands:
-    """Tests for commands that should be allowed."""
-
-    @pytest.mark.parametrize(
-        "command",
-        [
-            # Git commands (non-commit)
-            "git status",
-            "git add .",
-            "git diff HEAD",
-            "git log --oneline -5",
-            "git push origin main",
-            "git pull",
-            "git fetch",
-            "git branch -a",
-            # Non-git commands
-            "ls -la",
-            "echo hello",
-            "python --version",
-            "cat README.md",
-            # Commands with 'commit' substring (not at start)
-            "echo 'commit message'",
-            "grep -r 'commit' .",
-            "cat commits.txt",
-            # 'git commit' in message body (anchored pattern should allow)
-            "echo 'use git commit to save changes'",
-            "grep 'git commit' README.md",
-            ".claude/hooks/commit_job_git_commit.sh -m 'message about git commit'",
-        ],
-    )
-    def test_allows_command(self, block_bash_hook_path: Path, command: str) -> None:
-        """Test that non-blocked commands are allowed."""
-        stdout, stderr, code = run_block_bash_hook(block_bash_hook_path, "Bash", command)
-        assert code == 0, f"Should allow '{command}' with exit code 0, got {code}"
-
-
-class TestNonBashTools:
-    """Tests for non-Bash tool calls."""
-
-    @pytest.mark.parametrize("tool_name", ["Read", "Write", "Edit", "Glob", "Grep"])
-    def test_allows_non_bash_tools(self, block_bash_hook_path: Path, tool_name: str) -> None:
-        """Test that non-Bash tools are not blocked even with git commit in input."""
-        stdout, stderr, code = run_block_bash_hook(
-            block_bash_hook_path, tool_name, "git commit -m 'test'"
-        )
-        assert code == 0, f"Should allow {tool_name} tool with exit code 0, got {code}"
-
-
-class TestEdgeCases:
-    """Tests for edge cases and malformed input."""
-
-    def test_empty_input(self, block_bash_hook_path: Path) -> None:
-        """Test that empty input is handled gracefully."""
-        result = subprocess.run(
-            ["bash", str(block_bash_hook_path)],
-            capture_output=True,
-            text=True,
-            input="",
-            env=os.environ.copy(),
-        )
-        assert result.returncode == 0, "Should allow with exit code 0 for empty input"
-
-    def test_no_command_in_input(self, block_bash_hook_path: Path) -> None:
-        """Test that missing command is handled gracefully."""
-        hook_input = {"tool_name": "Bash", "tool_input": {}}
-        result = subprocess.run(
-            ["bash", str(block_bash_hook_path)],
-            capture_output=True,
-            text=True,
-            input=json.dumps(hook_input),
-            env=os.environ.copy(),
-        )
-        assert result.returncode == 0, "Should allow with exit code 0 for missing command"
-
-    def test_invalid_json(self, block_bash_hook_path: Path) -> None:
-        """Test that invalid JSON is handled gracefully."""
-        result = subprocess.run(
-            ["bash", str(block_bash_hook_path)],
-            capture_output=True,
-            text=True,
-            input="not valid json",
-            env=os.environ.copy(),
-        )
-        # Script uses set -e and jq, so invalid JSON causes jq to fail with exit 5
-        # This is acceptable behavior - Claude Code won't send invalid JSON
-        assert result.returncode in (0, 1, 5), (
-            f"Should handle invalid JSON without crashing unexpectedly, got {result.returncode}"
-        )
-
-
-# ******************************************************************************
-# ***                     CLAUDE CODE CONTRACT TEST                          ***
-# ******************************************************************************
-#
-# DO NOT MODIFY this test without consulting Claude Code hook documentation:
-# https://docs.anthropic.com/en/docs/claude-code/hooks
-#
-# PreToolUse hooks with exit code 2 MUST:
-#   - Output error message to stderr (NOT stdout)
-#   - Exit with code 2
-#
-# PreToolUse hooks that allow MUST:
-#   - Exit with code 0
-#   - Produce no output on stderr
-#
-# ******************************************************************************
-class TestOutputsAndExitsAccordingToClaudeSpec:
-    """Tests that hook output conforms to Claude Code's required format."""
-
-    def test_claude_code_hook_contract(self, block_bash_hook_path: Path) -> None:
-        """Verify hook follows Claude Code PreToolUse contract for block/allow."""
-        # Test BLOCK behavior
-        stdout, stderr, code = run_block_bash_hook(
-            block_bash_hook_path, "Bash", "git commit -m 'test'"
-        )
-        assert code == 2, "Blocked command must exit with code 2"
-        assert stderr.strip() != "", "Blocked command must output message to stderr"
-        assert stdout.strip() == "", "Blocked command must not output to stdout"
-
-        # Test ALLOW behavior
-        stdout, stderr, code = run_block_bash_hook(block_bash_hook_path, "Bash", "git status")
-        assert code == 0, "Allowed command must exit with code 0"
-        assert stderr.strip() == "", "Allowed command must not output to stderr"
diff --git a/tests/shell_script_tests/test_capture_prompt_work_tree.py b/tests/shell_script_tests/test_capture_prompt_work_tree.py
deleted file mode 100644
index 6f0435b1..00000000
--- a/tests/shell_script_tests/test_capture_prompt_work_tree.py
+++ /dev/null
@@ -1,257 +0,0 @@
-"""Tests for capture_prompt_work_tree.sh helper script.
-
-This script captures the git work tree state for use with
-compare_to: prompt rules. It should:
-1. Create .deepwork directory if needed
-2. Stage all changes with git add -A
-3. Record changed files to .deepwork/.last_work_tree
-4. Handle various git states gracefully
-"""
-
-from pathlib import Path
-
-import pytest
-from git import Repo
-
-from .conftest import run_shell_script
-
-
-@pytest.fixture
-def git_repo_with_changes(git_repo: Path) -> Path:
-    """Create a git repo with uncommitted changes."""
-    # Create some changed files
-    (git_repo / "modified.py").write_text("# Modified file\n")
-    (git_repo / "src").mkdir(exist_ok=True)
-    (git_repo / "src" / "main.py").write_text("# Main file\n")
-
-    return git_repo
-
-
-def run_capture_script(script_path: Path, cwd: Path) -> tuple[str, str, int]:
-    """Run the capture_prompt_work_tree.sh script."""
-    return run_shell_script(script_path, cwd)
-
-
-class TestCapturePromptWorkTreeBasic:
-    """Basic functionality tests for capture_prompt_work_tree.sh."""
-
-    def test_exits_successfully(self, rules_hooks_dir: Path, git_repo: Path) -> None:
-        """Test that the script exits with code 0."""
-        script_path = rules_hooks_dir / "capture_prompt_work_tree.sh"
-        stdout, stderr, code = run_capture_script(script_path, git_repo)
-
-        assert code == 0, f"Expected exit code 0, got {code}. stderr: {stderr}"
-
-    def test_creates_deepwork_directory(self, rules_hooks_dir: Path, git_repo: Path) -> None:
-        """Test that the script creates .deepwork directory."""
-        deepwork_dir = git_repo / ".deepwork"
-        assert not deepwork_dir.exists(), "Precondition: .deepwork should not exist"
-
-        script_path = rules_hooks_dir / "capture_prompt_work_tree.sh"
-        stdout, stderr, code = run_capture_script(script_path, git_repo)
-
-        assert code == 0, f"Script failed with stderr: {stderr}"
-        assert deepwork_dir.exists(), "Script should create .deepwork directory"
-
-    def test_creates_last_work_tree_file(self, rules_hooks_dir: Path, git_repo: Path) -> None:
-        """Test that the script creates .last_work_tree file."""
-        script_path = rules_hooks_dir / "capture_prompt_work_tree.sh"
-        stdout, stderr, code = run_capture_script(script_path, git_repo)
-
-        work_tree_file = git_repo / ".deepwork" / ".last_work_tree"
-        assert code == 0, f"Script failed with stderr: {stderr}"
-        assert work_tree_file.exists(), "Script should create .last_work_tree file"
-
-    def test_empty_repo_produces_empty_file(self, rules_hooks_dir: Path, git_repo: Path) -> None:
-        """Test that a clean repo produces an empty work tree file."""
-        script_path = rules_hooks_dir / "capture_prompt_work_tree.sh"
-        stdout, stderr, code = run_capture_script(script_path, git_repo)
-
-        # Clean repo should have empty or minimal content
-        # May have .deepwork/.last_work_tree itself listed
-        assert code == 0, f"Script failed with stderr: {stderr}"
-
-
-class TestCapturePromptWorkTreeFileTracking:
-    """Tests for file tracking behavior in capture_prompt_work_tree.sh."""
-
-    def test_captures_staged_files(self, rules_hooks_dir: Path, git_repo: Path) -> None:
-        """Test that staged files are captured."""
-        # Create and stage a file
-        new_file = git_repo / "staged.py"
-        new_file.write_text("# Staged file\n")
-        repo = Repo(git_repo)
-        repo.index.add(["staged.py"])
-
-        script_path = rules_hooks_dir / "capture_prompt_work_tree.sh"
-        stdout, stderr, code = run_capture_script(script_path, git_repo)
-
-        work_tree_file = git_repo / ".deepwork" / ".last_work_tree"
-        content = work_tree_file.read_text()
-
-        assert code == 0, f"Script failed with stderr: {stderr}"
-        assert "staged.py" in content, "Staged file should be in work tree"
-
-    def test_captures_unstaged_changes(self, rules_hooks_dir: Path, git_repo: Path) -> None:
-        """Test that unstaged changes are captured (after staging by script)."""
-        # Create an unstaged file
-        unstaged = git_repo / "unstaged.py"
-        unstaged.write_text("# Unstaged file\n")
-
-        script_path = rules_hooks_dir / "capture_prompt_work_tree.sh"
-        stdout, stderr, code = run_capture_script(script_path, git_repo)
-
-        work_tree_file = git_repo / ".deepwork" / ".last_work_tree"
-        content = work_tree_file.read_text()
-
-        assert code == 0, f"Script failed with stderr: {stderr}"
-        assert "unstaged.py" in content, "Unstaged file should be captured"
-
-    def test_captures_files_in_subdirectories(self, rules_hooks_dir: Path, git_repo: Path) -> None:
-        """Test that files in subdirectories are captured."""
-        # Create files in nested directories
-        src_dir = git_repo / "src" / "components"
-        src_dir.mkdir(parents=True)
-        (src_dir / "button.py").write_text("# Button component\n")
-
-        script_path = rules_hooks_dir / "capture_prompt_work_tree.sh"
-        stdout, stderr, code = run_capture_script(script_path, git_repo)
-
-        work_tree_file = git_repo / ".deepwork" / ".last_work_tree"
-        content = work_tree_file.read_text()
-
-        assert code == 0, f"Script failed with stderr: {stderr}"
-        assert "src/components/button.py" in content, "Nested file should be captured"
-
-    def test_captures_multiple_files(
-        self, rules_hooks_dir: Path, git_repo_with_changes: Path
-    ) -> None:
-        """Test that multiple files are captured."""
-        script_path = rules_hooks_dir / "capture_prompt_work_tree.sh"
-        stdout, stderr, code = run_capture_script(script_path, git_repo_with_changes)
-
-        work_tree_file = git_repo_with_changes / ".deepwork" / ".last_work_tree"
-        content = work_tree_file.read_text()
-
-        assert code == 0, f"Script failed with stderr: {stderr}"
-        assert "modified.py" in content, "Modified file should be captured"
-        assert "src/main.py" in content, "File in src/ should be captured"
-
-    def test_file_list_is_sorted_and_unique(self, rules_hooks_dir: Path, git_repo: Path) -> None:
-        """Test that the file list is sorted and deduplicated."""
-        # Create multiple files
-        (git_repo / "z_file.py").write_text("# Z file\n")
-        (git_repo / "a_file.py").write_text("# A file\n")
-        (git_repo / "m_file.py").write_text("# M file\n")
-
-        script_path = rules_hooks_dir / "capture_prompt_work_tree.sh"
-        stdout, stderr, code = run_capture_script(script_path, git_repo)
-
-        work_tree_file = git_repo / ".deepwork" / ".last_work_tree"
-        lines = [line for line in work_tree_file.read_text().strip().split("\n") if line]
-
-        # Extract just the test files we created (filter out .deepwork files)
-        test_files = [f for f in lines if f.endswith("_file.py")]
-
-        assert code == 0, f"Script failed with stderr: {stderr}"
-        assert test_files == sorted(test_files), "Files should be sorted"
-        assert len(test_files) == len(set(test_files)), "Files should be unique"
-
-
-class TestCapturePromptWorkTreeGitStates:
-    """Tests for handling various git states in capture_prompt_work_tree.sh."""
-
-    def test_handles_deleted_files(self, rules_hooks_dir: Path, git_repo: Path) -> None:
-        """Test that deleted files are handled gracefully."""
-        # Create and commit a file, then delete it
-        to_delete = git_repo / "to_delete.py"
-        to_delete.write_text("# Will be deleted\n")
-        repo = Repo(git_repo)
-        repo.index.add(["to_delete.py"])
-        repo.index.commit("Add file to delete")
-
-        # Now delete it
-        to_delete.unlink()
-
-        script_path = rules_hooks_dir / "capture_prompt_work_tree.sh"
-        stdout, stderr, code = run_capture_script(script_path, git_repo)
-
-        assert code == 0, f"Script should handle deletions. stderr: {stderr}"
-
-    def test_handles_renamed_files(self, rules_hooks_dir: Path, git_repo: Path) -> None:
-        """Test that renamed files are tracked."""
-        # Create and commit a file
-        old_name = git_repo / "old_name.py"
-        old_name.write_text("# Original file\n")
-        repo = Repo(git_repo)
-        repo.index.add(["old_name.py"])
-        repo.index.commit("Add original file")
-
-        # Rename it
-        new_name = git_repo / "new_name.py"
-        old_name.rename(new_name)
-
-        script_path = rules_hooks_dir / "capture_prompt_work_tree.sh"
-        stdout, stderr, code = run_capture_script(script_path, git_repo)
-
-        work_tree_file = git_repo / ".deepwork" / ".last_work_tree"
-        content = work_tree_file.read_text()
-
-        assert code == 0, f"Script failed with stderr: {stderr}"
-        # Both old (deleted) and new should appear as changes
-        assert "new_name.py" in content, "New filename should be captured"
-
-    def test_handles_modified_files(self, rules_hooks_dir: Path, git_repo: Path) -> None:
-        """Test that modified committed files are tracked."""
-        # Modify an existing committed file
-        readme = git_repo / "README.md"
-        readme.write_text("# Modified content\n")
-
-        script_path = rules_hooks_dir / "capture_prompt_work_tree.sh"
-        stdout, stderr, code = run_capture_script(script_path, git_repo)
-
-        work_tree_file = git_repo / ".deepwork" / ".last_work_tree"
-        content = work_tree_file.read_text()
-
-        assert code == 0, f"Script failed with stderr: {stderr}"
-        assert "README.md" in content, "Modified file should be captured"
-
-
-class TestCapturePromptWorkTreeIdempotence:
-    """Tests for idempotent behavior of capture_prompt_work_tree.sh."""
-
-    def test_multiple_runs_succeed(self, rules_hooks_dir: Path, git_repo: Path) -> None:
-        """Test that the script can be run multiple times."""
-        script_path = rules_hooks_dir / "capture_prompt_work_tree.sh"
-
-        for i in range(3):
-            stdout, stderr, code = run_capture_script(script_path, git_repo)
-            assert code == 0, f"Run {i + 1} failed with stderr: {stderr}"
-
-    def test_updates_on_new_changes(self, rules_hooks_dir: Path, git_repo: Path) -> None:
-        """Test that subsequent runs capture new changes."""
-        script_path = rules_hooks_dir / "capture_prompt_work_tree.sh"
-
-        # First run
-        run_capture_script(script_path, git_repo)
-
-        # Add a new file
-        (git_repo / "new_file.py").write_text("# New\n")
-
-        # Second run
-        run_capture_script(script_path, git_repo)
-
-        work_tree_file = git_repo / ".deepwork" / ".last_work_tree"
-        content = work_tree_file.read_text()
-
-        assert "new_file.py" in content, "New file should be captured"
-
-    def test_existing_deepwork_dir_not_error(self, rules_hooks_dir: Path, git_repo: Path) -> None:
-        """Test that existing .deepwork directory is not an error."""
-        # Pre-create the directory
-        (git_repo / ".deepwork").mkdir()
-
-        script_path = rules_hooks_dir / "capture_prompt_work_tree.sh"
-        stdout, stderr, code = run_capture_script(script_path, git_repo)
-
-        assert code == 0, f"Should handle existing .deepwork dir. stderr: {stderr}"
diff --git a/tests/shell_script_tests/test_check_version.py b/tests/shell_script_tests/test_check_version.py
deleted file mode 100644
index 1cd4c20f..00000000
--- a/tests/shell_script_tests/test_check_version.py
+++ /dev/null
@@ -1,412 +0,0 @@
-"""Tests for check_version.sh SessionStart hook.
-
-Tests version checking logic, JSON output format, and warning behavior.
-"""
-
-import os
-import subprocess
-import tempfile
-from pathlib import Path
-
-import pytest
-
-
-@pytest.fixture
-def check_version_script(hooks_dir: Path) -> Path:
-    """Return path to check_version.sh."""
-    return hooks_dir / "check_version.sh"
-
-
-def run_check_version_with_mock_claude(
-    script_path: Path,
-    mock_version: str | None,
-    cwd: Path | None = None,
-    mock_deepwork: bool = True,
-    stdin_json: str | None = None,
-) -> tuple[str, str, int]:
-    """
-    Run check_version.sh with a mocked claude command.
-
-    Args:
-        script_path: Path to check_version.sh
-        mock_version: Version string to return from mock claude, or None for failure
-        cwd: Working directory
-        mock_deepwork: If True, create a mock deepwork command that succeeds.
-                       If False, do not create mock deepwork (simulates not installed).
-        stdin_json: Optional JSON string to pass via stdin (simulates hook input)
-
-    Returns:
-        Tuple of (stdout, stderr, return_code)
-    """
-    with tempfile.TemporaryDirectory() as tmpdir:
-        # Create mock claude command
-        mock_claude = Path(tmpdir) / "claude"
-        if mock_version is not None:
-            mock_claude.write_text(f'#!/bin/bash\necho "{mock_version} (Claude Code)"\n')
-        else:
-            mock_claude.write_text("#!/bin/bash\nexit 1\n")
-        mock_claude.chmod(0o755)
-
-        # Create mock deepwork command
-        # When mock_deepwork=True, create a working mock
-        # When mock_deepwork=False, create a failing mock that shadows the real one
-        mock_deepwork_cmd = Path(tmpdir) / "deepwork"
-        if mock_deepwork:
-            mock_deepwork_cmd.write_text('#!/bin/bash\necho "deepwork 0.1.0"\n')
-        else:
-            # Create a mock that fails (simulating deepwork not being installed)
-            mock_deepwork_cmd.write_text("#!/bin/bash\nexit 127\n")
-        mock_deepwork_cmd.chmod(0o755)
-
-        # Prepend mock dir to PATH
-        env = os.environ.copy()
-        env["PATH"] = f"{tmpdir}:{env.get('PATH', '')}"
-
-        result = subprocess.run(
-            ["bash", str(script_path)],
-            capture_output=True,
-            text=True,
-            cwd=cwd or tmpdir,
-            env=env,
-            input=stdin_json,
-        )
-
-        return result.stdout, result.stderr, result.returncode
-
-
-class TestVersionComparison:
-    """Tests for version comparison logic."""
-
-    def test_equal_versions(self, check_version_script: Path) -> None:
-        """Test that equal versions don't trigger warning."""
-        # Mock version equals minimum (2.1.14)
-        stdout, stderr, code = run_check_version_with_mock_claude(check_version_script, "2.1.14")
-
-        assert code == 0
-        assert "WARNING" not in stderr
-
-    def test_greater_patch_version(self, check_version_script: Path) -> None:
-        """Test that greater patch version doesn't trigger warning."""
-        stdout, stderr, code = run_check_version_with_mock_claude(check_version_script, "2.1.15")
-
-        assert code == 0
-        assert "WARNING" not in stderr
-
-    def test_greater_minor_version(self, check_version_script: Path) -> None:
-        """Test that greater minor version doesn't trigger warning."""
-        stdout, stderr, code = run_check_version_with_mock_claude(check_version_script, "2.2.0")
-
-        assert code == 0
-        assert "WARNING" not in stderr
-
-    def test_greater_major_version(self, check_version_script: Path) -> None:
-        """Test that greater major version doesn't trigger warning."""
-        stdout, stderr, code = run_check_version_with_mock_claude(check_version_script, "3.0.0")
-
-        assert code == 0
-        assert "WARNING" not in stderr
-
-    def test_lesser_patch_version(self, check_version_script: Path) -> None:
-        """Test that lesser patch version triggers warning."""
-        stdout, stderr, code = run_check_version_with_mock_claude(check_version_script, "2.1.13")
-
-        assert code == 0
-        assert "WARNING" in stderr
-        assert "2.1.13" in stderr  # Shows current version
-
-    def test_lesser_minor_version(self, check_version_script: Path) -> None:
-        """Test that lesser minor version triggers warning."""
-        stdout, stderr, code = run_check_version_with_mock_claude(check_version_script, "2.0.99")
-
-        assert code == 0
-        assert "WARNING" in stderr
-
-    def test_lesser_major_version(self, check_version_script: Path) -> None:
-        """Test that lesser major version triggers warning."""
-        stdout, stderr, code = run_check_version_with_mock_claude(check_version_script, "1.9.99")
-
-        assert code == 0
-        assert "WARNING" in stderr
-
-
-class TestWarningOutput:
-    """Tests for warning message content."""
-
-    def test_warning_contains_current_version(self, check_version_script: Path) -> None:
-        """Test that warning shows the current version."""
-        stdout, stderr, code = run_check_version_with_mock_claude(check_version_script, "2.0.0")
-
-        assert "2.0.0" in stderr
-
-    def test_warning_contains_minimum_version(self, check_version_script: Path) -> None:
-        """Test that warning shows the minimum version."""
-        stdout, stderr, code = run_check_version_with_mock_claude(check_version_script, "2.0.0")
-
-        assert "2.1.14" in stderr
-
-    def test_warning_suggests_update(self, check_version_script: Path) -> None:
-        """Test that warning suggests updating Claude Code."""
-        stdout, stderr, code = run_check_version_with_mock_claude(check_version_script, "2.0.0")
-
-        assert "Update your version of Claude Code" in stderr
-
-    def test_warning_mentions_bugs(self, check_version_script: Path) -> None:
-        """Test that warning mentions bugs in older versions."""
-        stdout, stderr, code = run_check_version_with_mock_claude(check_version_script, "2.0.0")
-
-        assert "bugs" in stderr.lower()
-
-
-class TestHookConformance:
-    """Tests for Claude Code hook format compliance."""
-
-    def test_always_exits_zero(self, check_version_script: Path) -> None:
-        """Test that script always exits 0 (informational only)."""
-        # Test with warning
-        stdout, stderr, code = run_check_version_with_mock_claude(check_version_script, "2.0.0")
-        assert code == 0
-
-        # Test without warning
-        stdout, stderr, code = run_check_version_with_mock_claude(check_version_script, "3.0.0")
-        assert code == 0
-
-    def test_outputs_valid_json_when_version_ok(self, check_version_script: Path) -> None:
-        """Test that stdout is valid JSON when version is OK."""
-        import json
-
-        stdout, stderr, code = run_check_version_with_mock_claude(check_version_script, "3.0.0")
-
-        # Should output empty JSON object
-        output = json.loads(stdout.strip())
-        assert output == {}
-
-    def test_outputs_structured_json_when_version_low(self, check_version_script: Path) -> None:
-        """Test that stdout has hookSpecificOutput when version is low."""
-        import json
-
-        stdout, stderr, code = run_check_version_with_mock_claude(check_version_script, "2.0.0")
-
-        output = json.loads(stdout.strip())
-        assert "hookSpecificOutput" in output
-        assert output["hookSpecificOutput"]["hookEventName"] == "SessionStart"
-        assert "additionalContext" in output["hookSpecificOutput"]
-        assert "VERSION WARNING" in output["hookSpecificOutput"]["additionalContext"]
-
-    def test_warning_goes_to_stderr_and_stdout(self, check_version_script: Path) -> None:
-        """Test that warning is on stderr (visual) and stdout (context)."""
-        stdout, stderr, code = run_check_version_with_mock_claude(check_version_script, "2.0.0")
-
-        # Visual warning should be in stderr
-        assert "WARNING" in stderr
-        # JSON with context should be in stdout
-        assert "hookSpecificOutput" in stdout
-
-
-class TestEdgeCases:
-    """Tests for edge cases and error handling."""
-
-    def test_claude_command_not_found(self, check_version_script: Path) -> None:
-        """Test graceful handling when claude command fails."""
-        stdout, stderr, code = run_check_version_with_mock_claude(
-            check_version_script,
-            None,  # Mock failure
-        )
-
-        # Should exit 0 and output JSON even if version check fails
-        assert code == 0
-        assert stdout.strip() == "{}"
-        # No warning since we couldn't determine version
-        assert "WARNING" not in stderr
-
-    def test_version_with_extra_text(self, check_version_script: Path) -> None:
-        """Test parsing version from output with extra text."""
-        # Real output format: "2.1.1 (Claude Code)"
-        stdout, stderr, code = run_check_version_with_mock_claude(check_version_script, "2.1.14")
-
-        assert code == 0
-        # Version 2.1.14 equals minimum, no warning
-        assert "WARNING" not in stderr
-
-
-class TestDeepworkInstallationCheck:
-    """Tests for deepwork installation check (blocking)."""
-
-    def test_deepwork_installed_allows_session(self, check_version_script: Path) -> None:
-        """Test that script proceeds when deepwork is installed."""
-        # With mock_deepwork=True (default), deepwork is available
-        stdout, stderr, code = run_check_version_with_mock_claude(
-            check_version_script, "3.0.0", mock_deepwork=True
-        )
-
-        assert code == 0
-        assert "DEEPWORK NOT INSTALLED" not in stderr
-
-    def test_deepwork_not_installed_blocks_session(self, check_version_script: Path) -> None:
-        """Test that script blocks when deepwork is not installed."""
-        stdout, stderr, code = run_check_version_with_mock_claude(
-            check_version_script, "3.0.0", mock_deepwork=False
-        )
-
-        # Should exit with code 2 (blocking error)
-        assert code == 2
-        assert "DEEPWORK NOT INSTALLED" in stderr
-
-    def test_deepwork_error_message_content(self, check_version_script: Path) -> None:
-        """Test that deepwork error message has helpful content."""
-        stdout, stderr, code = run_check_version_with_mock_claude(
-            check_version_script, "3.0.0", mock_deepwork=False
-        )
-
-        # Should mention direct invocation requirement
-        assert "directly invok" in stderr.lower()
-        # Should mention NOT using wrappers
-        assert "uv run deepwork" in stderr
-        # Should suggest installation options
-        assert "pipx" in stderr or "pip install" in stderr
-
-    def test_deepwork_error_outputs_json(self, check_version_script: Path) -> None:
-        """Test that deepwork error outputs valid JSON with error info."""
-        import json
-
-        stdout, stderr, code = run_check_version_with_mock_claude(
-            check_version_script, "3.0.0", mock_deepwork=False
-        )
-
-        output = json.loads(stdout.strip())
-        assert "hookSpecificOutput" in output
-        assert "error" in output
-        assert "deepwork" in output["error"].lower()
-        # Should have additional context for Claude
-        assert "additionalContext" in output["hookSpecificOutput"]
-        assert "DEEPWORK" in output["hookSpecificOutput"]["additionalContext"]
-
-    def test_deepwork_check_happens_before_version_check(self, check_version_script: Path) -> None:
-        """Test that deepwork check runs before version check."""
-        # Even with a low version that would trigger warning,
-        # missing deepwork should block first
-        stdout, stderr, code = run_check_version_with_mock_claude(
-            check_version_script, "1.0.0", mock_deepwork=False
-        )
-
-        # Should exit with deepwork error, not version warning
-        assert code == 2
-        assert "DEEPWORK NOT INSTALLED" in stderr
-        # Should NOT show version warning
-        assert "CLAUDE CODE VERSION WARNING" not in stderr
-
-
-class TestSessionSourceDetection:
-    """Tests for skipping non-initial sessions based on source field."""
-
-    def test_startup_source_runs_normally(self, check_version_script: Path) -> None:
-        """Test that source='startup' runs the full check."""
-        import json
-
-        stdin_json = json.dumps({"source": "startup", "session_id": "test123"})
-        stdout, stderr, code = run_check_version_with_mock_claude(
-            check_version_script, "3.0.0", stdin_json=stdin_json
-        )
-
-        # Should run normally and output empty JSON (version OK)
-        assert code == 0
-        assert stdout.strip() == "{}"
-
-    def test_resume_source_skips_check(self, check_version_script: Path) -> None:
-        """Test that source='resume' skips all checks and returns empty JSON."""
-        import json
-
-        stdin_json = json.dumps({"source": "resume", "session_id": "test123"})
-        stdout, stderr, code = run_check_version_with_mock_claude(
-            check_version_script,
-            "1.0.0",
-            stdin_json=stdin_json,  # Low version that would trigger warning
-        )
-
-        # Should skip and return empty JSON without warnings
-        assert code == 0
-        assert stdout.strip() == "{}"
-        assert "WARNING" not in stderr
-        assert "DEEPWORK" not in stderr
-
-    def test_clear_source_skips_check(self, check_version_script: Path) -> None:
-        """Test that source='clear' (compact) skips all checks."""
-        import json
-
-        stdin_json = json.dumps({"source": "clear", "session_id": "test123"})
-        stdout, stderr, code = run_check_version_with_mock_claude(
-            check_version_script, "1.0.0", stdin_json=stdin_json
-        )
-
-        # Should skip and return empty JSON
-        assert code == 0
-        assert stdout.strip() == "{}"
-        assert "WARNING" not in stderr
-
-    def test_no_source_field_runs_normally(self, check_version_script: Path) -> None:
-        """Test backwards compatibility: missing source field runs full check."""
-        import json
-
-        # JSON without source field (older Claude Code version)
-        stdin_json = json.dumps({"session_id": "test123"})
-        stdout, stderr, code = run_check_version_with_mock_claude(
-            check_version_script,
-            "2.0.0",
-            stdin_json=stdin_json,  # Low version
-        )
-
-        # Should run normally and show warning (backwards compat)
-        assert code == 0
-        assert "WARNING" in stderr
-
-    def test_empty_stdin_runs_normally(self, check_version_script: Path) -> None:
-        """Test that empty stdin runs full check (backwards compat)."""
-        stdout, stderr, code = run_check_version_with_mock_claude(
-            check_version_script, "2.0.0", stdin_json=""
-        )
-
-        # Should run normally and show warning
-        assert code == 0
-        assert "WARNING" in stderr
-
-    def test_resume_skips_even_with_missing_deepwork(self, check_version_script: Path) -> None:
-        """Test that resume sessions skip before deepwork check."""
-        import json
-
-        stdin_json = json.dumps({"source": "resume"})
-        stdout, stderr, code = run_check_version_with_mock_claude(
-            check_version_script, "3.0.0", mock_deepwork=False, stdin_json=stdin_json
-        )
-
-        # Should skip immediately, NOT block on deepwork
-        assert code == 0
-        assert stdout.strip() == "{}"
-        assert "DEEPWORK NOT INSTALLED" not in stderr
-
-    def test_startup_with_low_version_shows_warning(self, check_version_script: Path) -> None:
-        """Test that startup sessions with low version show warning."""
-        import json
-
-        stdin_json = json.dumps({"source": "startup"})
-        stdout, stderr, code = run_check_version_with_mock_claude(
-            check_version_script, "2.0.0", stdin_json=stdin_json
-        )
-
-        # Should run full check and show warning
-        assert code == 0
-        assert "WARNING" in stderr
-        assert "hookSpecificOutput" in stdout
-
-    def test_unknown_source_skips_check(self, check_version_script: Path) -> None:
-        """Test that unknown source values skip the check."""
-        import json
-
-        # Future-proofing: unknown source values should be treated as non-startup
-        stdin_json = json.dumps({"source": "unknown_future_value"})
-        stdout, stderr, code = run_check_version_with_mock_claude(
-            check_version_script, "1.0.0", stdin_json=stdin_json
-        )
-
-        # Should skip and return empty JSON
-        assert code == 0
-        assert stdout.strip() == "{}"
-        assert "WARNING" not in stderr
diff --git a/tests/shell_script_tests/test_hooks.py b/tests/shell_script_tests/test_hooks.py
deleted file mode 100644
index 4f6f8e32..00000000
--- a/tests/shell_script_tests/test_hooks.py
+++ /dev/null
@@ -1,746 +0,0 @@
-"""Tests for hook shell scripts and JSON format compliance.
-
-# ******************************************************************************
-# ***                         CRITICAL CONTRACT TESTS                        ***
-# ******************************************************************************
-#
-# These tests verify the EXACT format required by Claude Code hooks as
-# documented in: doc/platforms/claude/hooks_system.md
-#
-# DO NOT MODIFY these tests without first consulting the official Claude Code
-# documentation at: https://docs.anthropic.com/en/docs/claude-code/hooks
-#
-# Hook Contract Summary:
-#   - Exit code 0: Success, stdout parsed as JSON
-#   - Exit code 2: Blocking error, stderr shown (NOT used for JSON format)
-#   - Allow response: {} (empty JSON object)
-#   - Block response: {"decision": "block", "reason": "..."}
-#
-# CRITICAL: Hooks using JSON output format MUST return exit code 0.
-# The "decision" field in the JSON controls blocking behavior, NOT the exit code.
-#
-# ******************************************************************************
-
-Claude Code hooks have specific JSON response formats that must be followed:
-
-Stop hooks (hooks.after_agent):
-    - {} - Allow stop (empty object)
-    - {"decision": "block", "reason": "..."} - Block stop with reason
-
-UserPromptSubmit hooks (hooks.before_prompt):
-    - {} - No response needed (empty object)
-    - No output - Also acceptable
-
-BeforeTool hooks (hooks.before_tool):
-    - {} - Allow tool execution
-    - {"decision": "block", "reason": "..."} - Block tool execution
-
-All hooks:
-    - Must return valid JSON if producing output
-    - Must not contain non-JSON output on stdout (stderr is ok)
-    - Exit code 0 indicates success
-"""
-
-import json
-import os
-import subprocess
-import tempfile
-from pathlib import Path
-
-import pytest
-from git import Repo
-
-from .conftest import run_shell_script
-
-# =============================================================================
-# Helper Functions
-# =============================================================================
-
-
-def run_rules_hook_script(
-    script_path: Path,
-    cwd: Path,
-    hook_input: dict | None = None,
-) -> tuple[str, str, int]:
-    """Run a rules hook script and return its output."""
-    return run_shell_script(script_path, cwd, hook_input=hook_input)
-
-
-def run_rules_check_module(
-    cwd: Path,
-    hook_input: dict | None = None,
-    src_dir: Path | None = None,
-) -> tuple[str, str, int]:
-    """Run the rules_check Python module directly and return its output."""
-    env = os.environ.copy()
-    env["DEEPWORK_HOOK_PLATFORM"] = "claude"
-    if src_dir:
-        env["PYTHONPATH"] = str(src_dir)
-
-    stdin_data = json.dumps(hook_input) if hook_input else ""
-
-    result = subprocess.run(
-        ["python", "-m", "deepwork.hooks.rules_check"],
-        cwd=cwd,
-        capture_output=True,
-        text=True,
-        input=stdin_data,
-        env=env,
-    )
-
-    return result.stdout, result.stderr, result.returncode
-
-
-def run_platform_wrapper_script(
-    script_path: Path,
-    python_module: str,
-    hook_input: dict,
-    src_dir: Path,
-) -> tuple[str, str, int]:
-    """
-    Run a platform hook wrapper script with the given input.
-
-    Args:
-        script_path: Path to the wrapper script (claude_hook.sh or gemini_hook.sh)
-        python_module: Python module to invoke
-        hook_input: JSON input to pass via stdin
-        src_dir: Path to src directory for PYTHONPATH
-
-    Returns:
-        Tuple of (stdout, stderr, return_code)
-    """
-    env = os.environ.copy()
-    env["PYTHONPATH"] = str(src_dir)
-
-    result = subprocess.run(
-        ["bash", str(script_path), python_module],
-        capture_output=True,
-        text=True,
-        input=json.dumps(hook_input),
-        env=env,
-    )
-
-    return result.stdout, result.stderr, result.returncode
-
-
-def validate_json_output(output: str) -> dict | None:
-    """
-    Validate that output is valid JSON or empty.
-
-    Args:
-        output: The stdout from a hook script
-
-    Returns:
-        Parsed JSON dict, or None if empty/no output
-
-    Raises:
-        AssertionError: If output is invalid JSON
-    """
-    stripped = output.strip()
-
-    if not stripped:
-        return None
-
-    try:
-        result = json.loads(stripped)
-        assert isinstance(result, dict), "Hook output must be a JSON object"
-        return result
-    except json.JSONDecodeError as e:
-        pytest.fail(f"Invalid JSON output: {stripped!r}. Error: {e}")
-
-
-# ******************************************************************************
-# *** DO NOT EDIT THIS FUNCTION! ***
-# As documented in doc/platforms/claude/hooks_system.md, Stop hooks must return:
-#   - {} (empty object) to allow
-#   - {"decision": "block", "reason": "..."} to block
-# Any other format will cause undefined behavior in Claude Code.
-# ******************************************************************************
-def validate_stop_hook_response(response: dict | None) -> None:
-    """
-    Validate a Stop hook response follows Claude Code format.
-
-    Args:
-        response: Parsed JSON response or None
-
-    Raises:
-        AssertionError: If response format is invalid
-    """
-    if response is None:
-        # No output is acceptable for stop hooks
-        return
-
-    if response == {}:
-        # Empty object means allow stop
-        return
-
-    # Must have decision and reason for blocking
-    assert "decision" in response, (
-        f"Stop hook blocking response must have 'decision' key: {response}"
-    )
-    assert response["decision"] == "block", (
-        f"Stop hook decision must be 'block', got: {response['decision']}"
-    )
-    assert "reason" in response, f"Stop hook blocking response must have 'reason' key: {response}"
-    assert isinstance(response["reason"], str), f"Stop hook reason must be a string: {response}"
-
-    # Reason should not be empty when blocking
-    assert response["reason"].strip(), "Stop hook blocking reason should not be empty"
-
-
-def validate_prompt_hook_response(response: dict | None) -> None:
-    """
-    Validate a UserPromptSubmit hook response.
-
-    Args:
-        response: Parsed JSON response or None
-
-    Raises:
-        AssertionError: If response format is invalid
-    """
-    if response is None:
-        # No output is acceptable
-        return
-
-    # Empty object or valid JSON object is fine
-    assert isinstance(response, dict), f"Prompt hook output must be a JSON object: {response}"
-
-
-# =============================================================================
-# Platform Wrapper Script Tests
-# =============================================================================
-
-
-class TestClaudeHookWrapper:
-    """Tests for claude_hook.sh wrapper script."""
-
-    def test_script_exists_and_is_executable(self, hooks_dir: Path) -> None:
-        """Test that the Claude hook script exists and is executable."""
-        script_path = hooks_dir / "claude_hook.sh"
-        assert script_path.exists(), "claude_hook.sh should exist"
-        assert os.access(script_path, os.X_OK), "claude_hook.sh should be executable"
-
-    def test_usage_error_without_module(self, hooks_dir: Path, src_dir: Path) -> None:
-        """Test that script shows usage error when no module provided."""
-        script_path = hooks_dir / "claude_hook.sh"
-        env = os.environ.copy()
-        env["PYTHONPATH"] = str(src_dir)
-
-        result = subprocess.run(
-            ["bash", str(script_path)],
-            capture_output=True,
-            text=True,
-            env=env,
-        )
-
-        assert result.returncode == 1
-        assert "Usage:" in result.stderr
-
-    def test_sets_platform_environment_variable(self, hooks_dir: Path, src_dir: Path) -> None:
-        """Test that the script sets DEEPWORK_HOOK_PLATFORM correctly."""
-        script_path = hooks_dir / "claude_hook.sh"
-        content = script_path.read_text()
-        assert 'DEEPWORK_HOOK_PLATFORM="claude"' in content
-
-
-class TestGeminiHookWrapper:
-    """Tests for gemini_hook.sh wrapper script."""
-
-    def test_script_exists_and_is_executable(self, hooks_dir: Path) -> None:
-        """Test that the Gemini hook script exists and is executable."""
-        script_path = hooks_dir / "gemini_hook.sh"
-        assert script_path.exists(), "gemini_hook.sh should exist"
-        assert os.access(script_path, os.X_OK), "gemini_hook.sh should be executable"
-
-    def test_usage_error_without_module(self, hooks_dir: Path, src_dir: Path) -> None:
-        """Test that script shows usage error when no module provided."""
-        script_path = hooks_dir / "gemini_hook.sh"
-        env = os.environ.copy()
-        env["PYTHONPATH"] = str(src_dir)
-
-        result = subprocess.run(
-            ["bash", str(script_path)],
-            capture_output=True,
-            text=True,
-            env=env,
-        )
-
-        assert result.returncode == 1
-        assert "Usage:" in result.stderr
-
-    def test_sets_platform_environment_variable(self, hooks_dir: Path, src_dir: Path) -> None:
-        """Test that the script sets DEEPWORK_HOOK_PLATFORM correctly."""
-        script_path = hooks_dir / "gemini_hook.sh"
-        content = script_path.read_text()
-        assert 'DEEPWORK_HOOK_PLATFORM="gemini"' in content
-
-
-# =============================================================================
-# Rules Hook Script Tests
-# =============================================================================
-
-
-class TestRulesStopHook:
-    """Tests for rules stop hook (deepwork.hooks.rules_check) JSON format compliance."""
-
-    def test_allow_response_is_empty_json(self, src_dir: Path, git_repo: Path) -> None:
-        """Test that allow response is empty JSON object."""
-        stdout, stderr, code = run_rules_check_module(git_repo, src_dir=src_dir)
-
-        response = validate_json_output(stdout)
-        validate_stop_hook_response(response)
-
-        if response is not None:
-            assert response == {}, f"Allow response should be empty: {response}"
-
-    def test_block_response_has_required_fields(
-        self, src_dir: Path, git_repo_with_rule: Path
-    ) -> None:
-        """Test that block response has decision and reason."""
-        # Create a file that triggers the rule
-        py_file = git_repo_with_rule / "test.py"
-        py_file.write_text("# Python file\n")
-        repo = Repo(git_repo_with_rule)
-        repo.index.add(["test.py"])
-
-        stdout, stderr, code = run_rules_check_module(git_repo_with_rule, src_dir=src_dir)
-
-        response = validate_json_output(stdout)
-        validate_stop_hook_response(response)
-
-        # Should be blocking
-        assert response is not None, "Expected blocking response"
-        assert response.get("decision") == "block", "Expected block decision"
-        assert "reason" in response, "Expected reason field"
-
-    def test_block_reason_contains_rule_info(self, src_dir: Path, git_repo_with_rule: Path) -> None:
-        """Test that block reason contains rule information."""
-        py_file = git_repo_with_rule / "test.py"
-        py_file.write_text("# Python file\n")
-        repo = Repo(git_repo_with_rule)
-        repo.index.add(["test.py"])
-
-        stdout, stderr, code = run_rules_check_module(git_repo_with_rule, src_dir=src_dir)
-
-        response = validate_json_output(stdout)
-
-        assert response is not None, "Expected blocking response"
-        reason = response.get("reason", "")
-
-        # Should contain useful rule information
-        assert "Rule" in reason or "rule" in reason, f"Reason should mention rule: {reason}"
-
-    def test_no_extraneous_keys_in_response(self, src_dir: Path, git_repo_with_rule: Path) -> None:
-        """Test that response only contains expected keys."""
-        py_file = git_repo_with_rule / "test.py"
-        py_file.write_text("# Python file\n")
-        repo = Repo(git_repo_with_rule)
-        repo.index.add(["test.py"])
-
-        stdout, stderr, code = run_rules_check_module(git_repo_with_rule, src_dir=src_dir)
-
-        response = validate_json_output(stdout)
-
-        if response and response != {}:
-            # Only decision and reason are valid keys for stop hooks
-            valid_keys = {"decision", "reason"}
-            actual_keys = set(response.keys())
-            assert actual_keys <= valid_keys, (
-                f"Unexpected keys in response: {actual_keys - valid_keys}"
-            )
-
-    def test_output_is_single_line_json(self, src_dir: Path, git_repo_with_rule: Path) -> None:
-        """Test that JSON output is single-line (no pretty printing)."""
-        py_file = git_repo_with_rule / "test.py"
-        py_file.write_text("# Python file\n")
-        repo = Repo(git_repo_with_rule)
-        repo.index.add(["test.py"])
-
-        stdout, stderr, code = run_rules_check_module(git_repo_with_rule, src_dir=src_dir)
-
-        # Remove trailing newline and check for internal newlines
-        output = stdout.strip()
-        if output:
-            # JSON output should ideally be single line
-            # Multiple lines could indicate print statements or logging
-            lines = output.split("\n")
-            # Only the last line should be JSON
-            json_line = lines[-1]
-            # Verify the JSON is parseable
-            json.loads(json_line)
-
-
-class TestUserPromptSubmitHook:
-    """Tests for user_prompt_submit.sh JSON format compliance."""
-
-    def test_output_is_valid_json_or_empty(self, rules_hooks_dir: Path, git_repo: Path) -> None:
-        """Test that output is valid JSON or empty."""
-        script_path = rules_hooks_dir / "user_prompt_submit.sh"
-        stdout, stderr, code = run_rules_hook_script(script_path, git_repo)
-
-        response = validate_json_output(stdout)
-        validate_prompt_hook_response(response)
-
-    def test_does_not_block_prompt_submission(self, rules_hooks_dir: Path, git_repo: Path) -> None:
-        """Test that hook does not block prompt submission."""
-        script_path = rules_hooks_dir / "user_prompt_submit.sh"
-        stdout, stderr, code = run_rules_hook_script(script_path, git_repo)
-
-        response = validate_json_output(stdout)
-
-        # UserPromptSubmit hooks should not block
-        if response:
-            assert response.get("decision") != "block", (
-                "UserPromptSubmit hook should not return block decision"
-            )
-
-
-class TestHooksWithTranscript:
-    """Tests for hook JSON format when using transcript input."""
-
-    def test_stop_hook_with_transcript_input(self, src_dir: Path, git_repo_with_rule: Path) -> None:
-        """Test stop hook JSON format when transcript is provided."""
-        py_file = git_repo_with_rule / "test.py"
-        py_file.write_text("# Python file\n")
-        repo = Repo(git_repo_with_rule)
-        repo.index.add(["test.py"])
-
-        # Create mock transcript
-        with tempfile.NamedTemporaryFile(mode="w", suffix=".jsonl", delete=False) as f:
-            transcript_path = f.name
-            f.write(
-                json.dumps(
-                    {
-                        "role": "assistant",
-                        "message": {"content": [{"type": "text", "text": "Hello"}]},
-                    }
-                )
-            )
-            f.write("\n")
-
-        try:
-            hook_input = {"transcript_path": transcript_path}
-            stdout, stderr, code = run_rules_check_module(
-                git_repo_with_rule, hook_input, src_dir=src_dir
-            )
-
-            response = validate_json_output(stdout)
-            validate_stop_hook_response(response)
-
-        finally:
-            os.unlink(transcript_path)
-
-    def test_stop_hook_with_promise_returns_empty(
-        self, src_dir: Path, git_repo_with_rule: Path
-    ) -> None:
-        """Test that promised rules return empty JSON."""
-        py_file = git_repo_with_rule / "test.py"
-        py_file.write_text("# Python file\n")
-        repo = Repo(git_repo_with_rule)
-        repo.index.add(["test.py"])
-
-        # Create transcript with promise tag
-        with tempfile.NamedTemporaryFile(mode="w", suffix=".jsonl", delete=False) as f:
-            transcript_path = f.name
-            f.write(
-                json.dumps(
-                    {
-                        "role": "assistant",
-                        "message": {
-                            "content": [
-                                {
-                                    "type": "text",
-                                    "text": "<promise>Python File Rule</promise>",
-                                }
-                            ]
-                        },
-                    }
-                )
-            )
-            f.write("\n")
-
-        try:
-            hook_input = {"transcript_path": transcript_path}
-            stdout, stderr, code = run_rules_check_module(
-                git_repo_with_rule, hook_input, src_dir=src_dir
-            )
-
-            response = validate_json_output(stdout)
-            validate_stop_hook_response(response)
-
-            # Should be empty (allow) because rule was promised
-            if response is not None:
-                assert response == {}, f"Expected empty response: {response}"
-
-        finally:
-            os.unlink(transcript_path)
-
-
-# ******************************************************************************
-# ***                    DO NOT EDIT THESE EXIT CODE TESTS!                  ***
-# ******************************************************************************
-#
-# As documented in doc/platforms/claude/hooks_system.md:
-#
-#   | Exit Code | Meaning         | Behavior                          |
-#   |-----------|-----------------|-----------------------------------|
-#   | 0         | Success         | stdout parsed as JSON             |
-#   | 2         | Blocking error  | stderr shown, operation blocked   |
-#   | Other     | Warning         | stderr logged, continues          |
-#
-# CRITICAL: Hooks using JSON output format MUST return exit code 0.
-# The "decision" field in the JSON controls blocking behavior, NOT the exit code.
-#
-# Example valid outputs:
-#   Exit 0 + stdout: {}                                      -> Allow
-#   Exit 0 + stdout: {"decision": "block", "reason": "..."}  -> Block
-#   Exit 0 + stdout: {"decision": "deny", "reason": "..."}   -> Block (Gemini)
-#
-# See: https://docs.anthropic.com/en/docs/claude-code/hooks
-# ******************************************************************************
-
-
-class TestHookExitCodes:
-    """Tests for hook exit codes.
-
-    CRITICAL: These tests verify the documented Claude Code hook contract.
-    All hooks MUST exit 0 when using JSON output format.
-    """
-
-    def test_stop_hook_exits_zero_on_allow(self, src_dir: Path, git_repo: Path) -> None:
-        """Test that stop hook exits 0 when allowing.
-
-        DO NOT CHANGE THIS TEST - it verifies the documented hook contract.
-        """
-        stdout, stderr, code = run_rules_check_module(git_repo, src_dir=src_dir)
-
-        assert code == 0, f"Allow should exit 0. stderr: {stderr}"
-
-    def test_stop_hook_exits_zero_on_block(self, src_dir: Path, git_repo_with_rule: Path) -> None:
-        """Test that stop hook exits 0 even when blocking.
-
-        DO NOT CHANGE THIS TEST - it verifies the documented hook contract.
-        Blocking is communicated via JSON {"decision": "block"}, NOT via exit code.
-        """
-        py_file = git_repo_with_rule / "test.py"
-        py_file.write_text("# Python file\n")
-        repo = Repo(git_repo_with_rule)
-        repo.index.add(["test.py"])
-
-        stdout, stderr, code = run_rules_check_module(git_repo_with_rule, src_dir=src_dir)
-
-        # Hooks should exit 0 and communicate via JSON
-        assert code == 0, f"Block should still exit 0. stderr: {stderr}"
-
-    def test_user_prompt_hook_exits_zero(self, rules_hooks_dir: Path, git_repo: Path) -> None:
-        """Test that user prompt hook always exits 0.
-
-        DO NOT CHANGE THIS TEST - it verifies the documented hook contract.
-        """
-        script_path = rules_hooks_dir / "user_prompt_submit.sh"
-        stdout, stderr, code = run_rules_hook_script(script_path, git_repo)
-
-        assert code == 0, f"User prompt hook should exit 0. stderr: {stderr}"
-
-    def test_capture_script_exits_zero(self, rules_hooks_dir: Path, git_repo: Path) -> None:
-        """Test that capture script exits 0.
-
-        DO NOT CHANGE THIS TEST - it verifies the documented hook contract.
-        """
-        script_path = rules_hooks_dir / "capture_prompt_work_tree.sh"
-        stdout, stderr, code = run_rules_hook_script(script_path, git_repo)
-
-        assert code == 0, f"Capture script should exit 0. stderr: {stderr}"
-
-
-# =============================================================================
-# Integration Tests
-# =============================================================================
-
-
-class TestHookWrapperIntegration:
-    """Integration tests for hook wrappers with actual Python hooks."""
-
-    @pytest.fixture
-    def test_hook_module(self, tmp_path: Path) -> tuple[Path, str]:
-        """Create a temporary test hook module."""
-        module_dir = tmp_path / "test_hooks"
-        module_dir.mkdir(parents=True)
-
-        # Create __init__.py
-        (module_dir / "__init__.py").write_text("")
-
-        # Create the hook module
-        hook_code = '''
-"""Test hook module."""
-import os
-import sys
-
-from deepwork.hooks.wrapper import (
-    HookInput,
-    HookOutput,
-    NormalizedEvent,
-    Platform,
-    run_hook,
-)
-
-
-def test_hook(hook_input: HookInput) -> HookOutput:
-    """Test hook that blocks for after_agent events."""
-    if hook_input.event == NormalizedEvent.AFTER_AGENT:
-        return HookOutput(decision="block", reason="Test block reason")
-    return HookOutput()
-
-
-def main() -> None:
-    platform_str = os.environ.get("DEEPWORK_HOOK_PLATFORM", "claude")
-    try:
-        platform = Platform(platform_str)
-    except ValueError:
-        platform = Platform.CLAUDE
-
-    exit_code = run_hook(test_hook, platform)
-    sys.exit(exit_code)
-
-
-if __name__ == "__main__":
-    main()
-'''
-        (module_dir / "test_hook.py").write_text(hook_code)
-
-        return tmp_path, "test_hooks.test_hook"
-
-    def test_claude_wrapper_with_stop_event(
-        self,
-        hooks_dir: Path,
-        src_dir: Path,
-        test_hook_module: tuple[Path, str],
-    ) -> None:
-        """Test Claude wrapper processes Stop event correctly."""
-        tmp_path, module_name = test_hook_module
-        script_path = hooks_dir / "claude_hook.sh"
-
-        hook_input = {
-            "session_id": "test123",
-            "hook_event_name": "Stop",
-            "cwd": "/project",
-        }
-
-        env = os.environ.copy()
-        env["PYTHONPATH"] = f"{src_dir}:{tmp_path}"
-
-        result = subprocess.run(
-            ["bash", str(script_path), module_name],
-            capture_output=True,
-            text=True,
-            input=json.dumps(hook_input),
-            env=env,
-        )
-
-        # Exit code 0 even when blocking - the JSON decision field controls behavior
-        assert result.returncode == 0, f"Expected exit code 0. stderr: {result.stderr}"
-
-        output = json.loads(result.stdout.strip())
-        assert output["decision"] == "block"
-        assert "Test block reason" in output["reason"]
-
-    def test_gemini_wrapper_with_afteragent_event(
-        self,
-        hooks_dir: Path,
-        src_dir: Path,
-        test_hook_module: tuple[Path, str],
-    ) -> None:
-        """Test Gemini wrapper processes AfterAgent event correctly."""
-        tmp_path, module_name = test_hook_module
-        script_path = hooks_dir / "gemini_hook.sh"
-
-        hook_input = {
-            "session_id": "test456",
-            "hook_event_name": "AfterAgent",
-            "cwd": "/project",
-        }
-
-        env = os.environ.copy()
-        env["PYTHONPATH"] = f"{src_dir}:{tmp_path}"
-
-        result = subprocess.run(
-            ["bash", str(script_path), module_name],
-            capture_output=True,
-            text=True,
-            input=json.dumps(hook_input),
-            env=env,
-        )
-
-        # Exit code 0 even when blocking - the JSON decision field controls behavior
-        assert result.returncode == 0, f"Expected exit code 0. stderr: {result.stderr}"
-
-        output = json.loads(result.stdout.strip())
-        # Gemini should get "deny" instead of "block"
-        assert output["decision"] == "deny"
-        assert "Test block reason" in output["reason"]
-
-    def test_non_blocking_event(
-        self,
-        hooks_dir: Path,
-        src_dir: Path,
-        test_hook_module: tuple[Path, str],
-    ) -> None:
-        """Test that non-blocking events return exit code 0."""
-        tmp_path, module_name = test_hook_module
-        script_path = hooks_dir / "claude_hook.sh"
-
-        # SessionStart is not blocked by the test hook
-        hook_input = {
-            "session_id": "test789",
-            "hook_event_name": "SessionStart",
-            "cwd": "/project",
-        }
-
-        env = os.environ.copy()
-        env["PYTHONPATH"] = f"{src_dir}:{tmp_path}"
-
-        result = subprocess.run(
-            ["bash", str(script_path), module_name],
-            capture_output=True,
-            text=True,
-            input=json.dumps(hook_input),
-            env=env,
-        )
-
-        assert result.returncode == 0, f"Expected exit code 0. stderr: {result.stderr}"
-        output = json.loads(result.stdout.strip())
-        assert output == {} or output.get("decision", "") not in ("block", "deny")
-
-
-# =============================================================================
-# Python Module Tests
-# =============================================================================
-
-
-class TestRulesCheckModule:
-    """Tests for the rules_check hook module."""
-
-    def test_module_imports(self) -> None:
-        """Test that the rules_check module can be imported."""
-        from deepwork.hooks import rules_check
-
-        assert hasattr(rules_check, "main")
-        assert hasattr(rules_check, "rules_check_hook")
-
-    def test_hook_function_returns_output(self) -> None:
-        """Test that rules_check_hook returns a HookOutput."""
-        from deepwork.hooks.rules_check import rules_check_hook
-        from deepwork.hooks.wrapper import HookInput, HookOutput, NormalizedEvent, Platform
-
-        # Create a minimal hook input
-        hook_input = HookInput(
-            platform=Platform.CLAUDE,
-            event=NormalizedEvent.BEFORE_PROMPT,  # Not after_agent, so no blocking
-            session_id="test",
-        )
-
-        output = rules_check_hook(hook_input)
-
-        assert isinstance(output, HookOutput)
-        # Should not block for before_prompt event
-        assert output.decision != "block"
diff --git a/tests/shell_script_tests/test_make_new_job.py b/tests/shell_script_tests/test_make_new_job.py
deleted file mode 100644
index 913d66ea..00000000
--- a/tests/shell_script_tests/test_make_new_job.py
+++ /dev/null
@@ -1,313 +0,0 @@
-"""Tests for make_new_job.sh utility script.
-
-This script creates the directory structure for a new DeepWork job.
-It should:
-1. Validate job name format (lowercase, letters/numbers/underscores)
-2. Create the job directory structure under .deepwork/jobs/
-3. Create required subdirectories (steps/, hooks/, templates/)
-4. Create AGENTS.md with guidance
-5. Handle existing jobs gracefully (error)
-6. Handle missing .deepwork directory by creating it
-"""
-
-from pathlib import Path
-
-import pytest
-
-from .conftest import run_shell_script
-
-
-@pytest.fixture
-def project_dir(tmp_path: Path) -> Path:
-    """Create a basic project directory."""
-    return tmp_path
-
-
-@pytest.fixture
-def project_with_deepwork(tmp_path: Path) -> Path:
-    """Create a project with existing .deepwork/jobs directory."""
-    jobs_dir = tmp_path / ".deepwork" / "jobs"
-    jobs_dir.mkdir(parents=True)
-    return tmp_path
-
-
-def run_make_new_job(
-    script_path: Path,
-    cwd: Path,
-    job_name: str | None = None,
-) -> tuple[str, str, int]:
-    """Run the make_new_job.sh script."""
-    args = [job_name] if job_name else None
-    return run_shell_script(script_path, cwd, args=args, env_extra={"NO_COLOR": "1"})
-
-
-class TestMakeNewJobUsage:
-    """Tests for make_new_job.sh usage and help output."""
-
-    def test_shows_usage_without_arguments(self, jobs_scripts_dir: Path, project_dir: Path) -> None:
-        """Test that the script shows usage when called without arguments."""
-        script_path = jobs_scripts_dir / "make_new_job.sh"
-        stdout, stderr, code = run_make_new_job(script_path, project_dir)
-
-        assert code == 1, "Should exit with error when no arguments"
-        assert "Usage:" in stdout, "Should show usage information"
-        assert "job_name" in stdout.lower(), "Should mention job_name argument"
-
-    def test_shows_example_in_usage(self, jobs_scripts_dir: Path, project_dir: Path) -> None:
-        """Test that the usage includes an example."""
-        script_path = jobs_scripts_dir / "make_new_job.sh"
-        stdout, stderr, code = run_make_new_job(script_path, project_dir)
-
-        assert "Example:" in stdout, "Should show example usage"
-
-
-class TestMakeNewJobNameValidation:
-    """Tests for job name validation in make_new_job.sh."""
-
-    def test_accepts_lowercase_name(
-        self, jobs_scripts_dir: Path, project_with_deepwork: Path
-    ) -> None:
-        """Test that lowercase names are accepted."""
-        script_path = jobs_scripts_dir / "make_new_job.sh"
-        stdout, stderr, code = run_make_new_job(script_path, project_with_deepwork, "valid_job")
-
-        assert code == 0, f"Should accept lowercase name. stderr: {stderr}"
-
-    def test_accepts_name_with_numbers(
-        self, jobs_scripts_dir: Path, project_with_deepwork: Path
-    ) -> None:
-        """Test that names with numbers are accepted."""
-        script_path = jobs_scripts_dir / "make_new_job.sh"
-        stdout, stderr, code = run_make_new_job(script_path, project_with_deepwork, "job123")
-
-        assert code == 0, f"Should accept name with numbers. stderr: {stderr}"
-
-    def test_accepts_name_with_underscores(
-        self, jobs_scripts_dir: Path, project_with_deepwork: Path
-    ) -> None:
-        """Test that names with underscores are accepted."""
-        script_path = jobs_scripts_dir / "make_new_job.sh"
-        stdout, stderr, code = run_make_new_job(script_path, project_with_deepwork, "my_new_job")
-
-        assert code == 0, f"Should accept underscores. stderr: {stderr}"
-
-    def test_rejects_uppercase_name(
-        self, jobs_scripts_dir: Path, project_with_deepwork: Path
-    ) -> None:
-        """Test that uppercase names are rejected."""
-        script_path = jobs_scripts_dir / "make_new_job.sh"
-        stdout, stderr, code = run_make_new_job(script_path, project_with_deepwork, "InvalidJob")
-
-        assert code != 0, "Should reject uppercase name"
-        # Check for error message in stdout (script uses echo)
-        output = stdout + stderr
-        assert "invalid" in output.lower() or "error" in output.lower(), (
-            "Should show error for invalid name"
-        )
-
-    def test_rejects_name_starting_with_number(
-        self, jobs_scripts_dir: Path, project_with_deepwork: Path
-    ) -> None:
-        """Test that names starting with numbers are rejected."""
-        script_path = jobs_scripts_dir / "make_new_job.sh"
-        stdout, stderr, code = run_make_new_job(script_path, project_with_deepwork, "123job")
-
-        assert code != 0, "Should reject name starting with number"
-
-    def test_rejects_name_with_hyphens(
-        self, jobs_scripts_dir: Path, project_with_deepwork: Path
-    ) -> None:
-        """Test that names with hyphens are rejected."""
-        script_path = jobs_scripts_dir / "make_new_job.sh"
-        stdout, stderr, code = run_make_new_job(script_path, project_with_deepwork, "my-job")
-
-        assert code != 0, "Should reject name with hyphens"
-
-    def test_rejects_name_with_spaces(
-        self, jobs_scripts_dir: Path, project_with_deepwork: Path
-    ) -> None:
-        """Test that names with spaces are rejected."""
-        script_path = jobs_scripts_dir / "make_new_job.sh"
-        # This will be passed as two arguments by bash, causing an error
-        stdout, stderr, code = run_make_new_job(script_path, project_with_deepwork, "my job")
-
-        # Either fails validation or treats "job" as separate (job is valid name)
-        # The key is it shouldn't create "my job" as a directory name
-        bad_dir = project_with_deepwork / ".deepwork" / "jobs" / "my job"
-        assert not bad_dir.exists(), "Should not create directory with space in name"
-
-
-class TestMakeNewJobDirectoryStructure:
-    """Tests for directory structure creation in make_new_job.sh."""
-
-    def test_creates_main_job_directory(
-        self, jobs_scripts_dir: Path, project_with_deepwork: Path
-    ) -> None:
-        """Test that the main job directory is created."""
-        script_path = jobs_scripts_dir / "make_new_job.sh"
-        run_make_new_job(script_path, project_with_deepwork, "test_job")
-
-        job_dir = project_with_deepwork / ".deepwork" / "jobs" / "test_job"
-        assert job_dir.exists(), "Job directory should be created"
-        assert job_dir.is_dir(), "Job path should be a directory"
-
-    def test_creates_steps_directory(
-        self, jobs_scripts_dir: Path, project_with_deepwork: Path
-    ) -> None:
-        """Test that steps/ subdirectory is created."""
-        script_path = jobs_scripts_dir / "make_new_job.sh"
-        run_make_new_job(script_path, project_with_deepwork, "test_job")
-
-        steps_dir = project_with_deepwork / ".deepwork" / "jobs" / "test_job" / "steps"
-        assert steps_dir.exists(), "steps/ directory should be created"
-        assert steps_dir.is_dir(), "steps/ should be a directory"
-
-    def test_creates_hooks_directory(
-        self, jobs_scripts_dir: Path, project_with_deepwork: Path
-    ) -> None:
-        """Test that hooks/ subdirectory is created."""
-        script_path = jobs_scripts_dir / "make_new_job.sh"
-        run_make_new_job(script_path, project_with_deepwork, "test_job")
-
-        hooks_dir = project_with_deepwork / ".deepwork" / "jobs" / "test_job" / "hooks"
-        assert hooks_dir.exists(), "hooks/ directory should be created"
-        assert hooks_dir.is_dir(), "hooks/ should be a directory"
-
-    def test_creates_templates_directory(
-        self, jobs_scripts_dir: Path, project_with_deepwork: Path
-    ) -> None:
-        """Test that templates/ subdirectory is created."""
-        script_path = jobs_scripts_dir / "make_new_job.sh"
-        run_make_new_job(script_path, project_with_deepwork, "test_job")
-
-        templates_dir = project_with_deepwork / ".deepwork" / "jobs" / "test_job" / "templates"
-        assert templates_dir.exists(), "templates/ directory should be created"
-        assert templates_dir.is_dir(), "templates/ should be a directory"
-
-    def test_creates_gitkeep_files(
-        self, jobs_scripts_dir: Path, project_with_deepwork: Path
-    ) -> None:
-        """Test that .gitkeep files are created in empty directories."""
-        script_path = jobs_scripts_dir / "make_new_job.sh"
-        run_make_new_job(script_path, project_with_deepwork, "test_job")
-
-        job_dir = project_with_deepwork / ".deepwork" / "jobs" / "test_job"
-
-        hooks_gitkeep = job_dir / "hooks" / ".gitkeep"
-        templates_gitkeep = job_dir / "templates" / ".gitkeep"
-
-        assert hooks_gitkeep.exists(), "hooks/.gitkeep should be created"
-        assert templates_gitkeep.exists(), "templates/.gitkeep should be created"
-
-    def test_creates_agents_md(self, jobs_scripts_dir: Path, project_with_deepwork: Path) -> None:
-        """Test that AGENTS.md file is created."""
-        script_path = jobs_scripts_dir / "make_new_job.sh"
-        run_make_new_job(script_path, project_with_deepwork, "test_job")
-
-        agents_md = project_with_deepwork / ".deepwork" / "jobs" / "test_job" / "AGENTS.md"
-        assert agents_md.exists(), "AGENTS.md should be created"
-
-        content = agents_md.read_text()
-        assert "Job Management" in content, "AGENTS.md should have job management content"
-        assert "deepwork_jobs" in content, "AGENTS.md should reference deepwork_jobs"
-
-
-class TestMakeNewJobAgentsMdContent:
-    """Tests for AGENTS.md content in make_new_job.sh."""
-
-    def test_agents_md_contains_slash_commands(
-        self, jobs_scripts_dir: Path, project_with_deepwork: Path
-    ) -> None:
-        """Test that AGENTS.md lists recommended slash commands."""
-        script_path = jobs_scripts_dir / "make_new_job.sh"
-        run_make_new_job(script_path, project_with_deepwork, "test_job")
-
-        agents_md = project_with_deepwork / ".deepwork" / "jobs" / "test_job" / "AGENTS.md"
-        content = agents_md.read_text()
-
-        assert "/deepwork_jobs.define" in content, "Should mention define command"
-        assert "/deepwork_jobs.implement" in content, "Should mention implement command"
-        assert "/deepwork_jobs.learn" in content, "Should mention learn command"
-
-    def test_agents_md_contains_directory_structure(
-        self, jobs_scripts_dir: Path, project_with_deepwork: Path
-    ) -> None:
-        """Test that AGENTS.md documents the directory structure."""
-        script_path = jobs_scripts_dir / "make_new_job.sh"
-        run_make_new_job(script_path, project_with_deepwork, "test_job")
-
-        agents_md = project_with_deepwork / ".deepwork" / "jobs" / "test_job" / "AGENTS.md"
-        content = agents_md.read_text()
-
-        assert "job.yml" in content, "Should mention job.yml"
-        assert "steps/" in content, "Should document steps directory"
-        assert "hooks/" in content, "Should document hooks directory"
-        assert "templates/" in content, "Should document templates directory"
-
-
-class TestMakeNewJobErrorHandling:
-    """Tests for error handling in make_new_job.sh."""
-
-    def test_fails_if_job_already_exists(
-        self, jobs_scripts_dir: Path, project_with_deepwork: Path
-    ) -> None:
-        """Test that creating a job that already exists fails."""
-        # First create the job
-        script_path = jobs_scripts_dir / "make_new_job.sh"
-        run_make_new_job(script_path, project_with_deepwork, "existing_job")
-
-        # Try to create it again
-        stdout, stderr, code = run_make_new_job(script_path, project_with_deepwork, "existing_job")
-
-        assert code != 0, "Should fail when job already exists"
-        output = stdout + stderr
-        assert "exist" in output.lower() or "error" in output.lower(), (
-            "Should mention that job exists"
-        )
-
-    def test_creates_deepwork_directory_if_missing(
-        self, jobs_scripts_dir: Path, project_dir: Path
-    ) -> None:
-        """Test that .deepwork/jobs is created if it doesn't exist."""
-        script_path = jobs_scripts_dir / "make_new_job.sh"
-        stdout, stderr, code = run_make_new_job(script_path, project_dir, "new_job")
-
-        assert code == 0, f"Should succeed even without .deepwork. stderr: {stderr}"
-
-        job_dir = project_dir / ".deepwork" / "jobs" / "new_job"
-        assert job_dir.exists(), "Should create .deepwork/jobs/new_job"
-
-
-class TestMakeNewJobOutput:
-    """Tests for output messages in make_new_job.sh."""
-
-    def test_shows_success_message(
-        self, jobs_scripts_dir: Path, project_with_deepwork: Path
-    ) -> None:
-        """Test that success message is shown."""
-        script_path = jobs_scripts_dir / "make_new_job.sh"
-        stdout, stderr, code = run_make_new_job(script_path, project_with_deepwork, "new_job")
-
-        assert code == 0, f"Should succeed. stderr: {stderr}"
-        # Check for informational output
-        assert "new_job" in stdout, "Output should mention job name"
-
-    def test_shows_next_steps(self, jobs_scripts_dir: Path, project_with_deepwork: Path) -> None:
-        """Test that next steps are shown after creation."""
-        script_path = jobs_scripts_dir / "make_new_job.sh"
-        stdout, stderr, code = run_make_new_job(script_path, project_with_deepwork, "new_job")
-
-        assert code == 0, f"Should succeed. stderr: {stderr}"
-        # Should mention what to do next
-        assert "next" in stdout.lower() or "step" in stdout.lower(), "Should show next steps"
-
-    def test_shows_directory_structure_created(
-        self, jobs_scripts_dir: Path, project_with_deepwork: Path
-    ) -> None:
-        """Test that created directory structure is shown."""
-        script_path = jobs_scripts_dir / "make_new_job.sh"
-        stdout, stderr, code = run_make_new_job(script_path, project_with_deepwork, "new_job")
-
-        assert code == 0, f"Should succeed. stderr: {stderr}"
-        # Should show what was created
-        assert "AGENTS.md" in stdout or "steps" in stdout, "Should show created structure"
diff --git a/tests/shell_script_tests/test_rules_stop_hook.py b/tests/shell_script_tests/test_rules_stop_hook.py
deleted file mode 100644
index 23418021..00000000
--- a/tests/shell_script_tests/test_rules_stop_hook.py
+++ /dev/null
@@ -1,481 +0,0 @@
-"""Tests for the rules stop hook (deepwork.hooks.rules_check).
-
-These tests verify that the rules stop hook correctly outputs JSON
-to block or allow the stop event in Claude Code.
-"""
-
-import json
-import os
-import subprocess
-import tempfile
-from pathlib import Path
-
-import pytest
-from git import Repo
-
-
-@pytest.fixture
-def git_repo_with_src_rule(tmp_path: Path) -> Path:
-    """Create a git repo with a v2 rule file that triggers on src/** changes."""
-    repo = Repo.init(tmp_path)
-
-    readme = tmp_path / "README.md"
-    readme.write_text("# Test Project\n")
-    repo.index.add(["README.md"])
-    repo.index.commit("Initial commit")
-
-    # Create v2 rules directory and file
-    rules_dir = tmp_path / ".deepwork" / "rules"
-    rules_dir.mkdir(parents=True, exist_ok=True)
-
-    # Use compare_to: prompt since test repos don't have origin remote
-    rule_file = rules_dir / "test-rule.md"
-    rule_file.write_text(
-        """---
-name: Test Rule
-trigger: "src/**/*"
-compare_to: prompt
----
-This is a test rule that fires when src/ files change.
-Please address this rule.
-"""
-    )
-
-    # Empty baseline means all current files are "new"
-    deepwork_dir = tmp_path / ".deepwork"
-    (deepwork_dir / ".last_work_tree").write_text("")
-
-    return tmp_path
-
-
-def run_stop_hook(
-    cwd: Path,
-    hook_input: dict | None = None,
-    src_dir: Path | None = None,
-) -> tuple[str, str, int]:
-    """Run the rules_check module and return its output."""
-    env = os.environ.copy()
-    env["DEEPWORK_HOOK_PLATFORM"] = "claude"
-    if src_dir:
-        env["PYTHONPATH"] = str(src_dir)
-
-    stdin_data = json.dumps(hook_input) if hook_input else ""
-
-    result = subprocess.run(
-        ["python", "-m", "deepwork.hooks.rules_check"],
-        cwd=cwd,
-        capture_output=True,
-        text=True,
-        input=stdin_data,
-        env=env,
-    )
-
-    return result.stdout, result.stderr, result.returncode
-
-
-class TestRulesStopHookBlocking:
-    """Tests for rules stop hook blocking behavior."""
-
-    def test_outputs_block_json_when_rule_fires(
-        self, src_dir: Path, git_repo_with_src_rule: Path
-    ) -> None:
-        """Test that the hook outputs blocking JSON when a rule fires."""
-        # Create a file that triggers the rule
-        test_src_dir = git_repo_with_src_rule / "src"
-        test_src_dir.mkdir(exist_ok=True)
-        (test_src_dir / "main.py").write_text("# New file\n")
-
-        # Stage the change
-        repo = Repo(git_repo_with_src_rule)
-        repo.index.add(["src/main.py"])
-
-        # Run the stop hook
-        stdout, stderr, code = run_stop_hook(git_repo_with_src_rule, src_dir=src_dir)
-
-        # Parse the output as JSON
-        output = stdout.strip()
-        assert output, f"Expected JSON output but got empty string. stderr: {stderr}"
-
-        try:
-            result = json.loads(output)
-        except json.JSONDecodeError as e:
-            pytest.fail(f"Output is not valid JSON: {output!r}. Error: {e}")
-
-        # Verify the JSON has the blocking structure
-        assert "decision" in result, f"Expected 'decision' key in JSON: {result}"
-        assert result["decision"] == "block", f"Expected decision='block', got: {result}"
-        assert "reason" in result, f"Expected 'reason' key in JSON: {result}"
-        assert "Test Rule" in result["reason"], f"Rule name not in reason: {result}"
-
-    def test_outputs_empty_json_when_no_rule_fires(
-        self, src_dir: Path, git_repo_with_src_rule: Path
-    ) -> None:
-        """Test that the hook outputs empty JSON when no rule fires."""
-        # Don't create any files that would trigger the rule
-        # (rule triggers on src/** but we haven't created anything in src/)
-
-        # Run the stop hook
-        stdout, stderr, code = run_stop_hook(git_repo_with_src_rule, src_dir=src_dir)
-
-        # Parse the output as JSON
-        output = stdout.strip()
-        assert output, f"Expected JSON output but got empty string. stderr: {stderr}"
-
-        try:
-            result = json.loads(output)
-        except json.JSONDecodeError as e:
-            pytest.fail(f"Output is not valid JSON: {output!r}. Error: {e}")
-
-        # Should be empty JSON (no blocking)
-        assert result == {}, f"Expected empty JSON when no rules fire, got: {result}"
-
-    def test_exits_early_when_no_rules_dir(self, src_dir: Path, git_repo: Path) -> None:
-        """Test that the hook exits cleanly when no rules directory exists."""
-        stdout, stderr, code = run_stop_hook(git_repo, src_dir=src_dir)
-
-        # Should exit with code 0 and produce no output (or empty)
-        assert code == 0, f"Expected exit code 0, got {code}. stderr: {stderr}"
-        # No output is fine when there's no rules directory
-        output = stdout.strip()
-        if output:
-            # If there is output, it should be valid JSON
-            try:
-                result = json.loads(output)
-                assert result == {}, f"Expected empty JSON, got: {result}"
-            except json.JSONDecodeError:
-                # Empty or no output is acceptable
-                pass
-
-    def test_respects_promise_tags(self, src_dir: Path, git_repo_with_src_rule: Path) -> None:
-        """Test that promised rules are not re-triggered."""
-        # Create a file that triggers the rule
-        test_src_dir = git_repo_with_src_rule / "src"
-        test_src_dir.mkdir(exist_ok=True)
-        (test_src_dir / "main.py").write_text("# New file\n")
-
-        # Stage the change
-        repo = Repo(git_repo_with_src_rule)
-        repo.index.add(["src/main.py"])
-
-        # Create a mock transcript with the promise tag
-        with tempfile.NamedTemporaryFile(mode="w", suffix=".jsonl", delete=False) as f:
-            transcript_path = f.name
-            # Write a mock assistant message with the promise tag
-            f.write(
-                json.dumps(
-                    {
-                        "role": "assistant",
-                        "message": {
-                            "content": [
-                                {
-                                    "type": "text",
-                                    "text": "I've addressed the rule. <promise>Test Rule</promise>",
-                                }
-                            ]
-                        },
-                    }
-                )
-            )
-            f.write("\n")
-
-        try:
-            # Run the stop hook with transcript path
-            hook_input = {"transcript_path": transcript_path, "hook_event_name": "Stop"}
-            stdout, stderr, code = run_stop_hook(
-                git_repo_with_src_rule, hook_input, src_dir=src_dir
-            )
-
-            # Parse the output
-            output = stdout.strip()
-            assert output, f"Expected JSON output. stderr: {stderr}"
-
-            result = json.loads(output)
-
-            # Should be empty JSON because the rule was promised
-            assert result == {}, f"Expected empty JSON when rule is promised, got: {result}"
-        finally:
-            os.unlink(transcript_path)
-
-    def test_safety_pattern_prevents_firing(self, src_dir: Path, tmp_path: Path) -> None:
-        """Test that safety patterns prevent rules from firing."""
-        # Initialize git repo
-        repo = Repo.init(tmp_path)
-
-        readme = tmp_path / "README.md"
-        readme.write_text("# Test Project\n")
-        repo.index.add(["README.md"])
-        repo.index.commit("Initial commit")
-
-        # Create v2 rule with a safety pattern
-        rules_dir = tmp_path / ".deepwork" / "rules"
-        rules_dir.mkdir(parents=True, exist_ok=True)
-
-        rule_file = rules_dir / "documentation-rule.md"
-        rule_file.write_text(
-            """---
-name: Documentation Rule
-trigger: "src/**/*"
-safety: "docs/**/*"
-compare_to: prompt
----
-Update documentation when changing source files.
-"""
-        )
-
-        # Create .deepwork directory with empty baseline
-        deepwork_dir = tmp_path / ".deepwork"
-        (deepwork_dir / ".last_work_tree").write_text("")
-
-        # Create both trigger and safety files
-        test_src_dir = tmp_path / "src"
-        test_src_dir.mkdir(exist_ok=True)
-        (test_src_dir / "main.py").write_text("# Source file\n")
-
-        docs_dir = tmp_path / "docs"
-        docs_dir.mkdir(exist_ok=True)
-        (docs_dir / "api.md").write_text("# API docs\n")
-
-        # Stage both changes so they appear in git diff --cached
-        repo.index.add(["src/main.py", "docs/api.md"])
-
-        # Run the stop hook
-        stdout, stderr, code = run_stop_hook(tmp_path, src_dir=src_dir)
-
-        # Parse the output
-        output = stdout.strip()
-        assert output, f"Expected JSON output. stderr: {stderr}"
-
-        result = json.loads(output)
-
-        # Should be empty JSON because safety pattern matched
-        assert result == {}, f"Expected empty JSON when safety pattern matches, got: {result}"
-
-
-class TestRulesStopHookJsonFormat:
-    """Tests for the JSON output format of the rules stop hook."""
-
-    def test_json_has_correct_structure(self, src_dir: Path, git_repo_with_src_rule: Path) -> None:
-        """Test that blocking JSON has the correct Claude Code structure."""
-        # Create a file that triggers the rule
-        test_src_dir = git_repo_with_src_rule / "src"
-        test_src_dir.mkdir(exist_ok=True)
-        (test_src_dir / "main.py").write_text("# New file\n")
-
-        repo = Repo(git_repo_with_src_rule)
-        repo.index.add(["src/main.py"])
-
-        stdout, stderr, code = run_stop_hook(git_repo_with_src_rule, src_dir=src_dir)
-
-        result = json.loads(stdout.strip())
-
-        # Verify exact structure expected by Claude Code
-        assert set(result.keys()) == {
-            "decision",
-            "reason",
-        }, f"Unexpected keys in JSON: {result.keys()}"
-        assert result["decision"] == "block"
-        assert isinstance(result["reason"], str)
-        assert len(result["reason"]) > 0
-
-    def test_reason_contains_rule_instructions(
-        self, src_dir: Path, git_repo_with_src_rule: Path
-    ) -> None:
-        """Test that the reason includes the rule instructions."""
-        test_src_dir = git_repo_with_src_rule / "src"
-        test_src_dir.mkdir(exist_ok=True)
-        (test_src_dir / "main.py").write_text("# New file\n")
-
-        repo = Repo(git_repo_with_src_rule)
-        repo.index.add(["src/main.py"])
-
-        stdout, stderr, code = run_stop_hook(git_repo_with_src_rule, src_dir=src_dir)
-
-        result = json.loads(stdout.strip())
-
-        # Check that the reason contains the rule content
-        reason = result["reason"]
-        assert "DeepWork Rules Triggered" in reason
-        assert "Test Rule" in reason
-        assert "test rule that fires" in reason
-
-
-class TestRulesStopHookInfiniteLoopPrevention:
-    """Tests for preventing infinite loops in rules stop hook."""
-
-    def test_queued_prompt_rule_does_not_refire(
-        self, src_dir: Path, git_repo_with_src_rule: Path
-    ) -> None:
-        """Test that a prompt rule with QUEUED status doesn't fire again.
-
-        This prevents infinite loops when the transcript is unavailable or
-        promise tags haven't been written yet.
-        """
-        # Create a file that triggers the rule
-        test_src_dir = git_repo_with_src_rule / "src"
-        test_src_dir.mkdir(exist_ok=True)
-        (test_src_dir / "main.py").write_text("# New file\n")
-
-        # Stage the change
-        repo = Repo(git_repo_with_src_rule)
-        repo.index.add(["src/main.py"])
-
-        # First run: rule should fire and create queue entry
-        stdout1, stderr1, code1 = run_stop_hook(git_repo_with_src_rule, src_dir=src_dir)
-        result1 = json.loads(stdout1.strip())
-        assert result1.get("decision") == "block", f"First run should block: {result1}"
-        assert "Test Rule" in result1.get("reason", "")
-
-        # Second run: rule should NOT fire again (already QUEUED)
-        # Note: No transcript with promise tag, but the queue entry prevents re-firing
-        stdout2, stderr2, code2 = run_stop_hook(git_repo_with_src_rule, src_dir=src_dir)
-        result2 = json.loads(stdout2.strip())
-        assert result2 == {}, f"Second run should not block (rule already queued): {result2}"
-
-    def test_rule_fires_again_after_queue_cleared(
-        self, src_dir: Path, git_repo_with_src_rule: Path
-    ) -> None:
-        """Test that a rule fires again after the queue is cleared."""
-        # Create a file that triggers the rule
-        test_src_dir = git_repo_with_src_rule / "src"
-        test_src_dir.mkdir(exist_ok=True)
-        (test_src_dir / "main.py").write_text("# New file\n")
-
-        # Stage the change
-        repo = Repo(git_repo_with_src_rule)
-        repo.index.add(["src/main.py"])
-
-        # First run: rule should fire
-        stdout1, stderr1, code1 = run_stop_hook(git_repo_with_src_rule, src_dir=src_dir)
-        result1 = json.loads(stdout1.strip())
-        assert result1.get("decision") == "block"
-
-        # Clear the queue
-        queue_dir = git_repo_with_src_rule / ".deepwork" / "tmp" / "rules" / "queue"
-        if queue_dir.exists():
-            for f in queue_dir.glob("*.json"):
-                f.unlink()
-
-        # Third run: rule should fire again (queue cleared)
-        stdout3, stderr3, code3 = run_stop_hook(git_repo_with_src_rule, src_dir=src_dir)
-        result3 = json.loads(stdout3.strip())
-        assert result3.get("decision") == "block", f"Rule should fire again: {result3}"
-
-    def test_promise_tag_still_prevents_firing(
-        self, src_dir: Path, git_repo_with_src_rule: Path
-    ) -> None:
-        """Test that promise tags still prevent rules from firing.
-
-        Even with the queue-based fix, promise tags should work when
-        the transcript is available.
-        """
-        # Create a file that triggers the rule
-        test_src_dir = git_repo_with_src_rule / "src"
-        test_src_dir.mkdir(exist_ok=True)
-        (test_src_dir / "main.py").write_text("# New file\n")
-
-        # Stage the change
-        repo = Repo(git_repo_with_src_rule)
-        repo.index.add(["src/main.py"])
-
-        # Create a transcript with promise tag (simulating agent response)
-        with tempfile.NamedTemporaryFile(mode="w", suffix=".jsonl", delete=False) as f:
-            transcript_path = f.name
-            f.write(
-                json.dumps(
-                    {
-                        "role": "assistant",
-                        "message": {
-                            "content": [
-                                {
-                                    "type": "text",
-                                    "text": "<promise>Test Rule</promise>",
-                                }
-                            ]
-                        },
-                    }
-                )
-            )
-            f.write("\n")
-
-        try:
-            # Run with transcript: rule should NOT fire (promise tag found)
-            hook_input = {"transcript_path": transcript_path, "hook_event_name": "Stop"}
-            stdout, stderr, code = run_stop_hook(
-                git_repo_with_src_rule, hook_input, src_dir=src_dir
-            )
-            result = json.loads(stdout.strip())
-            assert result == {}, f"Rule should not fire with promise tag: {result}"
-        finally:
-            os.unlink(transcript_path)
-
-
-class TestSubagentStopEvent:
-    """Tests for SubagentStop event triggering agentFinished rules."""
-
-    def test_subagent_stop_event_triggers_rules(
-        self, src_dir: Path, git_repo_with_src_rule: Path
-    ) -> None:
-        """Test that SubagentStop event triggers agentFinished rules.
-
-        Claude Code has both Stop and SubagentStop events that should both
-        trigger after_agent/agentFinished rules.
-        """
-        # Create a file that triggers the rule
-        test_src_dir = git_repo_with_src_rule / "src"
-        test_src_dir.mkdir(exist_ok=True)
-        (test_src_dir / "main.py").write_text("# New file\n")
-
-        # Stage the change
-        repo = Repo(git_repo_with_src_rule)
-        repo.index.add(["src/main.py"])
-
-        # Run with SubagentStop event
-        hook_input = {"hook_event_name": "SubagentStop"}
-        stdout, stderr, code = run_stop_hook(git_repo_with_src_rule, hook_input, src_dir=src_dir)
-
-        # Parse the output
-        output = stdout.strip()
-        assert output, f"Expected JSON output. stderr: {stderr}"
-        result = json.loads(output)
-
-        # Should trigger the rule just like Stop event does
-        assert result.get("decision") == "block", f"SubagentStop should trigger rules: {result}"
-        assert "Test Rule" in result.get("reason", "")
-
-    def test_both_stop_and_subagent_stop_trigger_same_rules(
-        self, src_dir: Path, git_repo_with_src_rule: Path
-    ) -> None:
-        """Test that Stop and SubagentStop events trigger the same rules.
-
-        Both events should fire agentFinished rules with identical behavior.
-        """
-        # Create a file that triggers the rule
-        test_src_dir = git_repo_with_src_rule / "src"
-        test_src_dir.mkdir(exist_ok=True)
-        (test_src_dir / "main.py").write_text("# New file\n")
-
-        repo = Repo(git_repo_with_src_rule)
-        repo.index.add(["src/main.py"])
-
-        # Test Stop event
-        hook_input_stop = {"hook_event_name": "Stop"}
-        stdout_stop, _, _ = run_stop_hook(git_repo_with_src_rule, hook_input_stop, src_dir=src_dir)
-        result_stop = json.loads(stdout_stop.strip())
-
-        # Clear the queue to allow the rule to fire again
-        queue_dir = git_repo_with_src_rule / ".deepwork" / "tmp" / "rules" / "queue"
-        if queue_dir.exists():
-            for f in queue_dir.glob("*.json"):
-                f.unlink()
-
-        # Test SubagentStop event
-        hook_input_subagent = {"hook_event_name": "SubagentStop"}
-        stdout_subagent, _, _ = run_stop_hook(
-            git_repo_with_src_rule, hook_input_subagent, src_dir=src_dir
-        )
-        result_subagent = json.loads(stdout_subagent.strip())
-
-        # Both should produce the same blocking behavior
-        assert result_stop.get("decision") == result_subagent.get("decision") == "block"
-        assert "Test Rule" in result_stop.get("reason", "")
-        assert "Test Rule" in result_subagent.get("reason", "")
diff --git a/tests/shell_script_tests/test_user_prompt_submit.py b/tests/shell_script_tests/test_user_prompt_submit.py
deleted file mode 100644
index 3f1b655e..00000000
--- a/tests/shell_script_tests/test_user_prompt_submit.py
+++ /dev/null
@@ -1,166 +0,0 @@
-"""Tests for user_prompt_submit.sh shell script.
-
-This script is called as a Claude Code UserPromptSubmit hook.
-It should:
-1. Execute successfully (exit code 0)
-2. Output valid JSON or no output (hooks allow both)
-3. Capture work tree state by calling capture_prompt_work_tree.sh
-"""
-
-import json
-from pathlib import Path
-
-import pytest
-from git import Repo
-
-from .conftest import run_shell_script
-
-
-def run_user_prompt_submit_hook(
-    script_path: Path,
-    cwd: Path,
-    hook_input: dict | None = None,
-) -> tuple[str, str, int]:
-    """Run the user_prompt_submit.sh script and return its output."""
-    return run_shell_script(script_path, cwd, hook_input=hook_input)
-
-
-class TestUserPromptSubmitHookExecution:
-    """Tests for user_prompt_submit.sh execution behavior."""
-
-    def test_exits_successfully(self, rules_hooks_dir: Path, git_repo: Path) -> None:
-        """Test that the hook exits with code 0."""
-        script_path = rules_hooks_dir / "user_prompt_submit.sh"
-        stdout, stderr, code = run_user_prompt_submit_hook(script_path, git_repo)
-
-        assert code == 0, f"Expected exit code 0, got {code}. stderr: {stderr}"
-
-    def test_creates_deepwork_directory(self, rules_hooks_dir: Path, git_repo: Path) -> None:
-        """Test that the hook creates .deepwork directory if it doesn't exist."""
-        deepwork_dir = git_repo / ".deepwork"
-        assert not deepwork_dir.exists(), "Precondition: .deepwork should not exist"
-
-        script_path = rules_hooks_dir / "user_prompt_submit.sh"
-        stdout, stderr, code = run_user_prompt_submit_hook(script_path, git_repo)
-
-        assert code == 0, f"Script failed with stderr: {stderr}"
-        assert deepwork_dir.exists(), "Hook should create .deepwork directory"
-
-    def test_creates_last_work_tree_file(self, rules_hooks_dir: Path, git_repo: Path) -> None:
-        """Test that the hook creates .deepwork/.last_work_tree file."""
-        script_path = rules_hooks_dir / "user_prompt_submit.sh"
-        stdout, stderr, code = run_user_prompt_submit_hook(script_path, git_repo)
-
-        work_tree_file = git_repo / ".deepwork" / ".last_work_tree"
-        assert code == 0, f"Script failed with stderr: {stderr}"
-        assert work_tree_file.exists(), "Hook should create .last_work_tree file"
-
-    def test_captures_staged_changes(self, rules_hooks_dir: Path, git_repo: Path) -> None:
-        """Test that the hook captures staged file changes."""
-        # Create and stage a new file
-        new_file = git_repo / "new_file.py"
-        new_file.write_text("# New file\n")
-        repo = Repo(git_repo)
-        repo.index.add(["new_file.py"])
-
-        script_path = rules_hooks_dir / "user_prompt_submit.sh"
-        stdout, stderr, code = run_user_prompt_submit_hook(script_path, git_repo)
-
-        assert code == 0, f"Script failed with stderr: {stderr}"
-
-        work_tree_file = git_repo / ".deepwork" / ".last_work_tree"
-        content = work_tree_file.read_text()
-        assert "new_file.py" in content, "Staged file should be captured"
-
-    def test_captures_untracked_files(self, rules_hooks_dir: Path, git_repo: Path) -> None:
-        """Test that the hook captures untracked files."""
-        # Create an untracked file (don't stage it)
-        untracked = git_repo / "untracked.txt"
-        untracked.write_text("untracked content\n")
-
-        script_path = rules_hooks_dir / "user_prompt_submit.sh"
-        stdout, stderr, code = run_user_prompt_submit_hook(script_path, git_repo)
-
-        assert code == 0, f"Script failed with stderr: {stderr}"
-
-        work_tree_file = git_repo / ".deepwork" / ".last_work_tree"
-        content = work_tree_file.read_text()
-        # After running the hook, files are staged, so check for the file
-        assert "untracked.txt" in content, "Untracked file should be captured"
-
-
-class TestUserPromptSubmitHookJsonOutput:
-    """Tests for user_prompt_submit.sh JSON output format.
-
-    Claude Code UserPromptSubmit hooks can output:
-    - Empty output (most common for side-effect-only hooks)
-    - Valid JSON (if the hook needs to communicate something)
-
-    Either is acceptable; invalid JSON is NOT acceptable.
-    """
-
-    def test_output_is_empty_or_valid_json(self, rules_hooks_dir: Path, git_repo: Path) -> None:
-        """Test that output is either empty or valid JSON."""
-        script_path = rules_hooks_dir / "user_prompt_submit.sh"
-        stdout, stderr, code = run_user_prompt_submit_hook(script_path, git_repo)
-
-        output = stdout.strip()
-
-        if output:
-            # If there's output, it must be valid JSON
-            try:
-                result = json.loads(output)
-                assert isinstance(result, dict), "JSON output should be an object"
-            except json.JSONDecodeError as e:
-                pytest.fail(f"Output is not valid JSON: {output!r}. Error: {e}")
-
-    def test_does_not_block_prompt(self, rules_hooks_dir: Path, git_repo: Path) -> None:
-        """Test that the hook does not return a blocking response."""
-        script_path = rules_hooks_dir / "user_prompt_submit.sh"
-        stdout, stderr, code = run_user_prompt_submit_hook(script_path, git_repo)
-
-        output = stdout.strip()
-
-        if output:
-            try:
-                result = json.loads(output)
-                # UserPromptSubmit hooks should not block
-                assert result.get("decision") != "block", (
-                    "UserPromptSubmit hook should not block prompt submission"
-                )
-            except json.JSONDecodeError:
-                pass  # Empty or non-JSON output is fine
-
-
-class TestUserPromptSubmitHookIdempotence:
-    """Tests for idempotent behavior of user_prompt_submit.sh."""
-
-    def test_multiple_runs_succeed(self, rules_hooks_dir: Path, git_repo: Path) -> None:
-        """Test that the hook can be run multiple times successfully."""
-        script_path = rules_hooks_dir / "user_prompt_submit.sh"
-
-        # Run multiple times
-        for i in range(3):
-            stdout, stderr, code = run_user_prompt_submit_hook(script_path, git_repo)
-            assert code == 0, f"Run {i + 1} failed with stderr: {stderr}"
-
-    def test_updates_work_tree_on_new_changes(self, rules_hooks_dir: Path, git_repo: Path) -> None:
-        """Test that subsequent runs update the work tree state."""
-        script_path = rules_hooks_dir / "user_prompt_submit.sh"
-        repo = Repo(git_repo)
-
-        # First run - capture initial state
-        run_user_prompt_submit_hook(script_path, git_repo)
-        work_tree_file = git_repo / ".deepwork" / ".last_work_tree"
-        assert work_tree_file.exists(), "Work tree file should exist after first run"
-
-        # Create and stage a new file
-        new_file = git_repo / "another_file.py"
-        new_file.write_text("# Another file\n")
-        repo.index.add(["another_file.py"])
-
-        # Second run - should capture new file
-        run_user_prompt_submit_hook(script_path, git_repo)
-        updated_content = work_tree_file.read_text()
-
-        assert "another_file.py" in updated_content, "New file should be captured"
diff --git a/tests/unit/mcp/__init__.py b/tests/unit/mcp/__init__.py
new file mode 100644
index 00000000..34e50282
--- /dev/null
+++ b/tests/unit/mcp/__init__.py
@@ -0,0 +1 @@
+"""Tests for MCP module."""
diff --git a/tests/unit/mcp/test_async_interface.py b/tests/unit/mcp/test_async_interface.py
new file mode 100644
index 00000000..9c732c52
--- /dev/null
+++ b/tests/unit/mcp/test_async_interface.py
@@ -0,0 +1,198 @@
+"""Tests to ensure the MCP interface remains async.
+
+These tests serve as a regression guard to ensure that key async methods
+don't accidentally get converted back to sync methods, which would break
+concurrency guarantees.
+"""
+
+import asyncio
+import inspect
+from pathlib import Path
+
+from deepwork.mcp.claude_cli import ClaudeCLI
+from deepwork.mcp.quality_gate import MockQualityGate, QualityGate
+from deepwork.mcp.state import StateManager
+from deepwork.mcp.tools import WorkflowTools
+
+
+class TestAsyncInterfaceRegression:
+    """Tests that verify async interface contract is maintained."""
+
+    def test_state_manager_async_methods(self) -> None:
+        """Verify StateManager methods that must be async remain async."""
+        async_methods = [
+            "create_session",
+            "load_session",
+            "start_step",
+            "complete_step",
+            "record_quality_attempt",
+            "advance_to_step",
+            "complete_workflow",
+            "abort_workflow",
+            "list_sessions",
+            "find_active_sessions_for_workflow",
+            "delete_session",
+        ]
+
+        for method_name in async_methods:
+            method = getattr(StateManager, method_name)
+            assert inspect.iscoroutinefunction(method), (
+                f"StateManager.{method_name} must be async (coroutine function). "
+                f"This is required for concurrent access safety."
+            )
+
+    def test_state_manager_has_lock(self, tmp_path: Path) -> None:
+        """Verify StateManager has an asyncio.Lock for thread safety."""
+        manager = StateManager(tmp_path)
+
+        assert hasattr(manager, "_lock"), "StateManager must have _lock attribute"
+        assert isinstance(manager._lock, asyncio.Lock), (
+            "StateManager._lock must be an asyncio.Lock for async concurrency safety"
+        )
+
+    def test_state_manager_has_session_stack(self, tmp_path: Path) -> None:
+        """Verify StateManager uses a session stack for nested workflows."""
+        manager = StateManager(tmp_path)
+
+        assert hasattr(manager, "_session_stack"), "StateManager must have _session_stack attribute"
+        assert isinstance(manager._session_stack, list), (
+            "StateManager._session_stack must be a list for nested workflow support"
+        )
+
+    def test_workflow_tools_async_methods(self) -> None:
+        """Verify WorkflowTools methods that must be async remain async."""
+        async_methods = [
+            "start_workflow",
+            "finished_step",
+            "abort_workflow",
+        ]
+
+        for method_name in async_methods:
+            method = getattr(WorkflowTools, method_name)
+            assert inspect.iscoroutinefunction(method), (
+                f"WorkflowTools.{method_name} must be async (coroutine function). "
+                f"This is required for non-blocking MCP tool execution."
+            )
+
+    def test_claude_cli_async_methods(self) -> None:
+        """Verify ClaudeCLI methods that must be async remain async."""
+        method = ClaudeCLI.run
+        assert inspect.iscoroutinefunction(method), (
+            "ClaudeCLI.run must be async (coroutine function). "
+            "This is required for non-blocking subprocess execution."
+        )
+
+    def test_quality_gate_async_methods(self) -> None:
+        """Verify QualityGate methods that must be async remain async."""
+        async_methods = [
+            "evaluate",
+            "_build_payload",
+        ]
+
+        for method_name in async_methods:
+            method = getattr(QualityGate, method_name)
+            assert inspect.iscoroutinefunction(method), (
+                f"QualityGate.{method_name} must be async (coroutine function). "
+                f"This is required for non-blocking subprocess execution."
+            )
+
+    def test_mock_quality_gate_async_methods(self) -> None:
+        """Verify MockQualityGate maintains async interface."""
+        method = MockQualityGate.evaluate
+        assert inspect.iscoroutinefunction(method), (
+            "MockQualityGate.evaluate must be async to match QualityGate interface"
+        )
+
+    async def test_concurrent_state_operations_are_serialized(self, tmp_path: Path) -> None:
+        """Test that concurrent state operations don't corrupt state.
+
+        This test verifies that the async lock properly serializes access
+        to shared state, preventing race conditions.
+        """
+        deepwork_dir = tmp_path / ".deepwork"
+        deepwork_dir.mkdir()
+        (deepwork_dir / "tmp").mkdir()
+
+        manager = StateManager(tmp_path)
+
+        # Create initial session
+        await manager.create_session(
+            job_name="test_job",
+            workflow_name="main",
+            goal="Test goal",
+            first_step_id="step1",
+        )
+
+        # Run multiple concurrent quality attempt recordings
+        async def record_attempt() -> int:
+            return await manager.record_quality_attempt("step1")
+
+        # Execute 10 concurrent recordings
+        results = await asyncio.gather(*[record_attempt() for _ in range(10)])
+
+        # Each should get a unique, sequential number (1-10)
+        assert sorted(results) == list(range(1, 11)), (
+            "Concurrent quality_attempt recordings should be serialized. "
+            f"Expected [1..10] but got {sorted(results)}"
+        )
+
+        # Verify final count is correct
+        final_session = manager.get_active_session()
+        assert final_session is not None
+        assert final_session.step_progress["step1"].quality_attempts == 10
+
+    async def test_concurrent_workflows_with_session_id_routing(self, tmp_path: Path) -> None:
+        """Test that two concurrent sessions can be routed correctly via session_id.
+
+        Two sessions are created on the stack. Concurrent finished_step-like
+        operations (complete_step) target different sessions via session_id
+        and don't interfere with each other.
+        """
+        deepwork_dir = tmp_path / ".deepwork"
+        deepwork_dir.mkdir()
+        (deepwork_dir / "tmp").mkdir()
+
+        manager = StateManager(tmp_path)
+
+        # Create two sessions on the stack
+        session1 = await manager.create_session(
+            job_name="job1",
+            workflow_name="wf1",
+            goal="Goal 1",
+            first_step_id="step_a",
+        )
+        session2 = await manager.create_session(
+            job_name="job2",
+            workflow_name="wf2",
+            goal="Goal 2",
+            first_step_id="step_x",
+        )
+
+        # Concurrent complete_step calls targeting different sessions
+        async def complete_session1() -> None:
+            await manager.complete_step(
+                step_id="step_a",
+                outputs={"out1": "file1.md"},
+                session_id=session1.session_id,
+            )
+
+        async def complete_session2() -> None:
+            await manager.complete_step(
+                step_id="step_x",
+                outputs={"out2": "file2.md"},
+                session_id=session2.session_id,
+            )
+
+        # Run concurrently
+        await asyncio.gather(complete_session1(), complete_session2())
+
+        # Verify each session got the right updates
+        assert "step_a" in session1.step_progress
+        assert session1.step_progress["step_a"].outputs == {"out1": "file1.md"}
+
+        assert "step_x" in session2.step_progress
+        assert session2.step_progress["step_x"].outputs == {"out2": "file2.md"}
+
+        # Cross-check: session1 should NOT have step_x, session2 should NOT have step_a
+        assert "step_x" not in session1.step_progress
+        assert "step_a" not in session2.step_progress
diff --git a/tests/unit/mcp/test_claude_cli.py b/tests/unit/mcp/test_claude_cli.py
new file mode 100644
index 00000000..f8a8a1b9
--- /dev/null
+++ b/tests/unit/mcp/test_claude_cli.py
@@ -0,0 +1,362 @@
+"""Tests for Claude CLI subprocess wrapper."""
+
+import json
+from collections.abc import Callable, Generator
+from contextlib import contextmanager
+from pathlib import Path
+from typing import Any
+from unittest.mock import MagicMock, patch
+
+import pytest
+
+from deepwork.mcp.claude_cli import ClaudeCLI, ClaudeCLIError
+
+
+def create_mock_subprocess(
+    response: dict[str, Any] | None = None,
+    returncode: int = 0,
+) -> tuple[list[str], Callable[..., MagicMock]]:
+    """Create a mock subprocess executor that captures commands.
+
+    Args:
+        response: The structured_output to return in the CLI wrapper.
+                  Defaults to an empty passing response.
+        returncode: The return code for the process.
+
+    Returns:
+        A tuple of (captured_cmd list, mock_create_subprocess_exec function).
+    """
+    if response is None:
+        response = {"result": "ok"}
+
+    captured_cmd: list[str] = []
+
+    async def mock_create_subprocess_exec(*cmd: str, **kwargs: Any) -> MagicMock:  # noqa: ARG001
+        captured_cmd.extend(cmd)
+        mock_process = MagicMock()
+        mock_process.returncode = returncode
+
+        async def mock_communicate(input: bytes = b"") -> tuple[bytes, bytes]:  # noqa: ARG001
+            wrapper = {
+                "type": "result",
+                "subtype": "success",
+                "is_error": False,
+                "structured_output": response,
+            }
+            return json.dumps(wrapper).encode(), b""
+
+        mock_process.communicate = mock_communicate
+        return mock_process
+
+    return captured_cmd, mock_create_subprocess_exec
+
+
+@contextmanager
+def patched_subprocess(
+    response: dict[str, Any] | None = None,
+    returncode: int = 0,
+) -> Generator[list[str], None, None]:
+    """Context manager that patches subprocess and yields captured command.
+
+    Args:
+        response: The structured_output to return. Defaults to a simple response.
+        returncode: The return code for the process.
+
+    Yields:
+        The list of captured command arguments.
+    """
+    captured_cmd, mock_subprocess = create_mock_subprocess(response, returncode)
+    with patch("asyncio.create_subprocess_exec", mock_subprocess):
+        yield captured_cmd
+
+
+TEST_SCHEMA: dict[str, Any] = {
+    "type": "object",
+    "required": ["value"],
+    "properties": {"value": {"type": "string"}},
+}
+
+
+class TestClaudeCLI:
+    """Tests for ClaudeCLI class."""
+
+    def test_init(self) -> None:
+        """Test ClaudeCLI initialization."""
+        cli = ClaudeCLI(timeout=60)
+        assert cli.timeout == 60
+
+    def test_init_defaults(self) -> None:
+        """Test ClaudeCLI default values."""
+        cli = ClaudeCLI()
+        assert cli.timeout == 120
+
+    async def test_run_returns_structured_output(self, tmp_path: Path) -> None:
+        """Test that run() returns the structured_output dict."""
+        cli = ClaudeCLI(timeout=10)
+        expected = {"value": "hello"}
+
+        with patched_subprocess(response=expected):
+            result = await cli.run(
+                prompt="test prompt",
+                system_prompt="test system",
+                json_schema=TEST_SCHEMA,
+                cwd=tmp_path,
+            )
+
+        assert result == expected
+
+    async def test_run_pipes_prompt_via_stdin(self, tmp_path: Path) -> None:
+        """Test that the prompt is piped via stdin."""
+        cli = ClaudeCLI(timeout=10)
+        captured_input: list[bytes] = []
+
+        async def mock_exec(*cmd: str, **kwargs: Any) -> MagicMock:  # noqa: ARG001
+            mock = MagicMock()
+            mock.returncode = 0
+
+            async def mock_communicate(input: bytes = b"") -> tuple[bytes, bytes]:
+                captured_input.append(input)
+                wrapper = {
+                    "type": "result",
+                    "subtype": "success",
+                    "is_error": False,
+                    "structured_output": {"value": "ok"},
+                }
+                return json.dumps(wrapper).encode(), b""
+
+            mock.communicate = mock_communicate
+            return mock
+
+        with patch("asyncio.create_subprocess_exec", mock_exec):
+            await cli.run(
+                prompt="my prompt text",
+                system_prompt="sys",
+                json_schema=TEST_SCHEMA,
+                cwd=tmp_path,
+            )
+
+        assert len(captured_input) == 1
+        assert captured_input[0] == b"my prompt text"
+
+
+class TestClaudeCLICommandConstruction:
+    """Tests for command construction."""
+
+    @staticmethod
+    def get_command_arg(captured_cmd: list[str], flag: str) -> str:
+        """Extract the argument value following a command flag."""
+        assert flag in captured_cmd, f"Expected {flag} in command, got: {captured_cmd}"
+        flag_index = captured_cmd.index(flag)
+        return captured_cmd[flag_index + 1]
+
+    async def test_command_includes_json_schema(self, tmp_path: Path) -> None:
+        """Test that the command includes --json-schema with the correct schema."""
+        cli = ClaudeCLI(timeout=10)
+
+        with patched_subprocess() as captured_cmd:
+            await cli.run(
+                prompt="test",
+                system_prompt="test",
+                json_schema=TEST_SCHEMA,
+                cwd=tmp_path,
+            )
+
+        schema_json = self.get_command_arg(captured_cmd, "--json-schema")
+        parsed_schema = json.loads(schema_json)
+        assert parsed_schema == TEST_SCHEMA
+
+    async def test_command_includes_system_prompt(self, tmp_path: Path) -> None:
+        """Test that the command includes --system-prompt."""
+        cli = ClaudeCLI(timeout=10)
+
+        with patched_subprocess() as captured_cmd:
+            await cli.run(
+                prompt="test",
+                system_prompt="You are a reviewer",
+                json_schema=TEST_SCHEMA,
+                cwd=tmp_path,
+            )
+
+        system_prompt = self.get_command_arg(captured_cmd, "--system-prompt")
+        assert system_prompt == "You are a reviewer"
+
+    async def test_command_has_correct_flag_ordering(self, tmp_path: Path) -> None:
+        """Test that flags come before -p -- for proper CLI invocation.
+
+        See doc/reference/calling_claude_in_print_mode.md for details on
+        why flag ordering matters.
+        """
+        cli = ClaudeCLI(timeout=10)
+
+        with patched_subprocess() as captured_cmd:
+            await cli.run(
+                prompt="test",
+                system_prompt="test",
+                json_schema=TEST_SCHEMA,
+                cwd=tmp_path,
+            )
+
+        assert captured_cmd[0] == "claude"
+        assert "--print" in captured_cmd
+        assert "--output-format" in captured_cmd
+        assert "-p" in captured_cmd
+        assert "--" in captured_cmd
+
+        # Verify -p -- comes last (after all other flags)
+        p_index = captured_cmd.index("-p")
+        dash_dash_index = captured_cmd.index("--")
+        json_schema_index = captured_cmd.index("--json-schema")
+        system_prompt_index = captured_cmd.index("--system-prompt")
+
+        assert json_schema_index < p_index, "Flags must come before -p"
+        assert system_prompt_index < p_index, "Flags must come before -p"
+        assert dash_dash_index == p_index + 1, "-- must immediately follow -p"
+
+    async def test_test_command_override(self, tmp_path: Path) -> None:
+        """Test that _test_command overrides the default command."""
+        cli = ClaudeCLI(timeout=10, _test_command=["echo", "test"])
+
+        with patched_subprocess() as captured_cmd:
+            await cli.run(
+                prompt="test",
+                system_prompt="sys prompt",
+                json_schema=TEST_SCHEMA,
+                cwd=tmp_path,
+            )
+
+        assert captured_cmd[0] == "echo"
+        assert captured_cmd[1] == "test"
+        assert "--system-prompt" in captured_cmd
+        assert "sys prompt" in captured_cmd
+        # _test_command should NOT include --json-schema
+        assert "--json-schema" not in captured_cmd
+
+
+class TestClaudeCLIWrapperParsing:
+    """Tests for Claude CLI response wrapper parsing."""
+
+    def test_parse_wrapper_valid(self) -> None:
+        """Test parsing a valid wrapper response."""
+        cli = ClaudeCLI()
+        response = json.dumps(
+            {
+                "type": "result",
+                "subtype": "success",
+                "is_error": False,
+                "structured_output": {"value": "hello"},
+            }
+        )
+
+        result = cli._parse_wrapper(response)
+        assert result == {"value": "hello"}
+
+    def test_parse_wrapper_error(self) -> None:
+        """Test parsing a wrapper with is_error=True."""
+        cli = ClaudeCLI()
+        response = json.dumps(
+            {
+                "type": "result",
+                "subtype": "error",
+                "is_error": True,
+                "result": "Something went wrong",
+            }
+        )
+
+        with pytest.raises(ClaudeCLIError, match="returned error"):
+            cli._parse_wrapper(response)
+
+    def test_parse_wrapper_missing_structured_output(self) -> None:
+        """Test parsing a wrapper missing structured_output field."""
+        cli = ClaudeCLI()
+        response = json.dumps(
+            {
+                "type": "result",
+                "subtype": "success",
+                "is_error": False,
+                "result": "Some text response",
+            }
+        )
+
+        with pytest.raises(ClaudeCLIError, match="missing 'structured_output'"):
+            cli._parse_wrapper(response)
+
+    def test_parse_wrapper_invalid_json(self) -> None:
+        """Test parsing invalid JSON."""
+        cli = ClaudeCLI()
+
+        with pytest.raises(ClaudeCLIError, match="Failed to parse"):
+            cli._parse_wrapper("This is not JSON")
+
+
+class TestClaudeCLIErrors:
+    """Tests for error handling."""
+
+    async def test_timeout_error(self, tmp_path: Path) -> None:
+        """Test that timeout raises ClaudeCLIError."""
+        import asyncio
+
+        cli = ClaudeCLI(timeout=0)
+
+        async def mock_exec(*cmd: str, **kwargs: Any) -> MagicMock:  # noqa: ARG001
+            mock = MagicMock()
+
+            async def mock_communicate(input: bytes = b"") -> tuple[bytes, bytes]:  # noqa: ARG001
+                await asyncio.sleep(10)
+                return b"", b""
+
+            mock.communicate = mock_communicate
+            mock.kill = MagicMock()
+
+            async def mock_wait() -> None:
+                pass
+
+            mock.wait = mock_wait
+            return mock
+
+        with patch("asyncio.create_subprocess_exec", mock_exec):
+            with pytest.raises(ClaudeCLIError, match="timed out"):
+                await cli.run(
+                    prompt="test",
+                    system_prompt="test",
+                    json_schema=TEST_SCHEMA,
+                    cwd=tmp_path,
+                )
+
+    async def test_nonzero_exit_code(self, tmp_path: Path) -> None:
+        """Test that non-zero exit code raises ClaudeCLIError."""
+        cli = ClaudeCLI(timeout=10)
+
+        async def mock_exec(*cmd: str, **kwargs: Any) -> MagicMock:  # noqa: ARG001
+            mock = MagicMock()
+            mock.returncode = 1
+
+            async def mock_communicate(input: bytes = b"") -> tuple[bytes, bytes]:  # noqa: ARG001
+                return b"", b"error output"
+
+            mock.communicate = mock_communicate
+            return mock
+
+        with patch("asyncio.create_subprocess_exec", mock_exec):
+            with pytest.raises(ClaudeCLIError, match="exit code 1"):
+                await cli.run(
+                    prompt="test",
+                    system_prompt="test",
+                    json_schema=TEST_SCHEMA,
+                    cwd=tmp_path,
+                )
+
+    async def test_command_not_found(self, tmp_path: Path) -> None:
+        """Test that missing command raises ClaudeCLIError."""
+        cli = ClaudeCLI(timeout=10)
+
+        async def mock_exec(*cmd: str, **kwargs: Any) -> MagicMock:  # noqa: ARG001
+            raise FileNotFoundError("No such file")
+
+        with patch("asyncio.create_subprocess_exec", mock_exec):
+            with pytest.raises(ClaudeCLIError, match="command not found"):
+                await cli.run(
+                    prompt="test",
+                    system_prompt="test",
+                    json_schema=TEST_SCHEMA,
+                    cwd=tmp_path,
+                )
diff --git a/tests/unit/mcp/test_quality_gate.py b/tests/unit/mcp/test_quality_gate.py
new file mode 100644
index 00000000..c6ab3c43
--- /dev/null
+++ b/tests/unit/mcp/test_quality_gate.py
@@ -0,0 +1,758 @@
+"""Tests for MCP quality gate."""
+
+from pathlib import Path
+from typing import Any
+from unittest.mock import AsyncMock
+
+import pytest
+
+from deepwork.mcp.claude_cli import ClaudeCLI, ClaudeCLIError
+from deepwork.mcp.quality_gate import (
+    QUALITY_GATE_RESPONSE_SCHEMA,
+    MockQualityGate,
+    QualityGate,
+    QualityGateError,
+)
+
+
+@pytest.fixture
+def project_root(tmp_path: Path) -> Path:
+    """Create a temporary project root."""
+    return tmp_path
+
+
+@pytest.fixture
+def mock_cli() -> ClaudeCLI:
+    """Create a ClaudeCLI with a mocked run method."""
+    cli = ClaudeCLI(timeout=10)
+    cli.run = AsyncMock(return_value={"passed": True, "feedback": "OK", "criteria_results": []})
+    return cli
+
+
+@pytest.fixture
+def quality_gate(mock_cli: ClaudeCLI) -> QualityGate:
+    """Create a QualityGate instance with mocked CLI."""
+    return QualityGate(cli=mock_cli)
+
+
+@pytest.fixture
+def output_file(project_root: Path) -> Path:
+    """Create a test output file with default content."""
+    output = project_root / "output.md"
+    output.write_text("Test content")
+    return output
+
+
+class TestQualityGate:
+    """Tests for QualityGate class."""
+
+    def test_init_default_cli(self) -> None:
+        """Test QualityGate creates a default ClaudeCLI if none provided."""
+        gate = QualityGate()
+        assert isinstance(gate._cli, ClaudeCLI)
+
+    def test_init_custom_cli(self, mock_cli: ClaudeCLI) -> None:
+        """Test QualityGate uses provided ClaudeCLI."""
+        gate = QualityGate(cli=mock_cli)
+        assert gate._cli is mock_cli
+
+    def test_build_instructions(self, quality_gate: QualityGate) -> None:
+        """Test building system instructions with dict format."""
+        instructions = quality_gate._build_instructions(
+            quality_criteria={
+                "Output Exists": "Does the output file exist?",
+                "Output Valid": "Is the output valid?",
+            },
+        )
+
+        assert "**Output Exists**" in instructions
+        assert "Does the output file exist?" in instructions
+        assert "**Output Valid**" in instructions
+        assert "Is the output valid?" in instructions
+        assert "editor" in instructions.lower()
+        assert "passed" in instructions  # JSON format mentioned
+        assert "feedback" in instructions  # JSON format mentioned
+
+    def test_build_instructions_with_guidance(self, quality_gate: QualityGate) -> None:
+        """Test that additional_review_guidance appears in system instructions."""
+        instructions = quality_gate._build_instructions(
+            quality_criteria={"Valid": "Is it valid?"},
+            additional_review_guidance="Read the job.yml file for context.",
+        )
+
+        assert "Additional Context" in instructions
+        assert "Read the job.yml file for context." in instructions
+
+    def test_build_instructions_without_guidance(self, quality_gate: QualityGate) -> None:
+        """Test that guidance section is absent when not provided."""
+        instructions = quality_gate._build_instructions(
+            quality_criteria={"Valid": "Is it valid?"},
+        )
+
+        assert "Additional Context" not in instructions
+
+    async def test_build_payload(self, quality_gate: QualityGate, project_root: Path) -> None:
+        """Test building payload with file contents."""
+        output_file = project_root / "output.md"
+        output_file.write_text("Test content")
+
+        payload = await quality_gate._build_payload(
+            outputs={"report": "output.md"},
+            project_root=project_root,
+        )
+
+        assert "Test content" in payload
+        assert "output.md" in payload
+        assert "--------------------" in payload
+        assert "BEGIN OUTPUTS" in payload
+        assert "END OUTPUTS" in payload
+
+    async def test_build_payload_missing_file(
+        self, quality_gate: QualityGate, project_root: Path
+    ) -> None:
+        """Test building payload with missing file."""
+        payload = await quality_gate._build_payload(
+            outputs={"report": "nonexistent.md"},
+            project_root=project_root,
+        )
+
+        assert "File not found" in payload
+        assert "nonexistent.md" in payload
+
+    async def test_build_payload_files_type(
+        self, quality_gate: QualityGate, project_root: Path
+    ) -> None:
+        """Test building payload with multi-file outputs."""
+        (project_root / "a.md").write_text("File A")
+        (project_root / "b.md").write_text("File B")
+
+        payload = await quality_gate._build_payload(
+            outputs={"reports": ["a.md", "b.md"]},
+            project_root=project_root,
+        )
+
+        assert "File A" in payload
+        assert "File B" in payload
+        assert "a.md" in payload
+        assert "b.md" in payload
+
+    async def test_build_payload_binary_file(
+        self, quality_gate: QualityGate, project_root: Path
+    ) -> None:
+        """Test building payload with a binary file produces a placeholder message."""
+        binary_file = project_root / "report.pdf"
+        binary_file.write_bytes(b"%PDF-1.4 \x00\x01\x02\xff\xfe binary content")
+
+        payload = await quality_gate._build_payload(
+            outputs={"report": "report.pdf"},
+            project_root=project_root,
+        )
+
+        assert "Binary file" in payload
+        assert "not included in review" in payload
+        assert str(binary_file.resolve()) in payload
+        assert "report.pdf" in payload
+        # Should NOT contain the raw binary content
+        assert "%PDF" not in payload
+
+    async def test_build_payload_binary_file_in_multi_output(
+        self, quality_gate: QualityGate, project_root: Path
+    ) -> None:
+        """Test building payload with a mix of text and binary files."""
+        text_file = project_root / "summary.md"
+        text_file.write_text("Summary text content")
+        binary_file = project_root / "data.pdf"
+        binary_file.write_bytes(b"\x00\x01\x02\xff\xfe binary data")
+
+        payload = await quality_gate._build_payload(
+            outputs={"docs": ["summary.md", "data.pdf"]},
+            project_root=project_root,
+        )
+
+        # Text file content should be included
+        assert "Summary text content" in payload
+        # Binary file should have placeholder
+        assert "Binary file" in payload
+        assert "not included in review" in payload
+        assert str(binary_file.resolve()) in payload
+
+    async def test_build_payload_only_outputs(
+        self, quality_gate: QualityGate, project_root: Path
+    ) -> None:
+        """Test that payload only contains outputs section (no inputs)."""
+        (project_root / "output.md").write_text("Output only")
+
+        payload = await quality_gate._build_payload(
+            outputs={"report": "output.md"},
+            project_root=project_root,
+        )
+
+        assert "BEGIN OUTPUTS" in payload
+        assert "END OUTPUTS" in payload
+        assert "BEGIN INPUTS" not in payload
+        assert "END INPUTS" not in payload
+
+    def test_parse_result_valid(self, quality_gate: QualityGate) -> None:
+        """Test parsing valid structured output data."""
+        data = {
+            "passed": True,
+            "feedback": "All good",
+            "criteria_results": [{"criterion": "Test 1", "passed": True, "feedback": None}],
+        }
+
+        result = quality_gate._parse_result(data)
+
+        assert result.passed is True
+        assert result.feedback == "All good"
+        assert len(result.criteria_results) == 1
+
+    def test_parse_result_failed(self, quality_gate: QualityGate) -> None:
+        """Test parsing failed evaluation data."""
+        data = {
+            "passed": False,
+            "feedback": "Issues found",
+            "criteria_results": [{"criterion": "Test 1", "passed": False, "feedback": "Failed"}],
+        }
+
+        result = quality_gate._parse_result(data)
+
+        assert result.passed is False
+        assert result.feedback == "Issues found"
+        assert result.criteria_results[0].passed is False
+
+    def test_parse_result_multiple_criteria(self, quality_gate: QualityGate) -> None:
+        """Test that criteria results are properly parsed with multiple entries."""
+        data = {
+            "passed": False,
+            "feedback": "Two criteria failed",
+            "criteria_results": [
+                {"criterion": "First check", "passed": True, "feedback": None},
+                {"criterion": "Second check", "passed": False, "feedback": "Missing data"},
+                {"criterion": "Third check", "passed": False, "feedback": "Wrong format"},
+            ],
+        }
+
+        result = quality_gate._parse_result(data)
+
+        assert result.passed is False
+        assert len(result.criteria_results) == 3
+        assert result.criteria_results[0].passed is True
+        assert result.criteria_results[0].feedback is None
+        assert result.criteria_results[1].passed is False
+        assert result.criteria_results[1].feedback == "Missing data"
+        assert result.criteria_results[2].passed is False
+        assert result.criteria_results[2].feedback == "Wrong format"
+
+    async def test_evaluate_no_criteria(
+        self, quality_gate: QualityGate, project_root: Path
+    ) -> None:
+        """Test evaluation with no criteria auto-passes."""
+        result = await quality_gate.evaluate(
+            quality_criteria={},
+            outputs={"report": "output.md"},
+            project_root=project_root,
+        )
+
+        assert result.passed is True
+        assert "auto-passing" in result.feedback.lower()
+
+    async def test_evaluate_calls_cli_with_correct_args(
+        self, mock_cli: ClaudeCLI, project_root: Path
+    ) -> None:
+        """Test that evaluate passes correct arguments to ClaudeCLI."""
+        gate = QualityGate(cli=mock_cli)
+
+        # Create output file
+        output_file = project_root / "output.md"
+        output_file.write_text("Test content")
+
+        await gate.evaluate(
+            quality_criteria={"Validity": "Must be valid"},
+            outputs={"report": "output.md"},
+            project_root=project_root,
+        )
+
+        mock_cli.run.assert_called_once()
+        call_kwargs = mock_cli.run.call_args
+        assert call_kwargs.kwargs["json_schema"] == QUALITY_GATE_RESPONSE_SCHEMA
+        assert call_kwargs.kwargs["cwd"] == project_root
+        assert "Validity" in call_kwargs.kwargs["system_prompt"]
+        assert "Must be valid" in call_kwargs.kwargs["system_prompt"]
+        assert "Test content" in call_kwargs.kwargs["prompt"]
+
+    async def test_evaluate_wraps_cli_error(self, mock_cli: ClaudeCLI, project_root: Path) -> None:
+        """Test that ClaudeCLIError is wrapped in QualityGateError."""
+        mock_cli.run = AsyncMock(side_effect=ClaudeCLIError("CLI failed"))
+        gate = QualityGate(cli=mock_cli)
+
+        output_file = project_root / "output.md"
+        output_file.write_text("content")
+
+        with pytest.raises(QualityGateError, match="CLI failed"):
+            await gate.evaluate(
+                quality_criteria={"Test": "Test criterion"},
+                outputs={"report": "output.md"},
+                project_root=project_root,
+            )
+
+    async def test_schema_is_valid_json(self) -> None:
+        """Test that QUALITY_GATE_RESPONSE_SCHEMA is valid JSON-serializable."""
+        import json
+
+        schema_json = json.dumps(QUALITY_GATE_RESPONSE_SCHEMA)
+        assert schema_json
+        parsed = json.loads(schema_json)
+        assert parsed == QUALITY_GATE_RESPONSE_SCHEMA
+
+
+class TestEvaluateReviews:
+    """Tests for QualityGate.evaluate_reviews method."""
+
+    async def test_empty_reviews(self, quality_gate: QualityGate, project_root: Path) -> None:
+        """Test that empty reviews returns empty list."""
+        result = await quality_gate.evaluate_reviews(
+            reviews=[],
+            outputs={"report": "output.md"},
+            output_specs={"report": "file"},
+            project_root=project_root,
+        )
+        assert result == []
+
+    async def test_step_review_passes(self, mock_cli: ClaudeCLI, project_root: Path) -> None:
+        """Test step-level review that passes."""
+        mock_cli.run = AsyncMock(
+            return_value={"passed": True, "feedback": "All good", "criteria_results": []}
+        )
+        gate = QualityGate(cli=mock_cli)
+
+        (project_root / "output.md").write_text("content")
+
+        result = await gate.evaluate_reviews(
+            reviews=[
+                {
+                    "run_each": "step",
+                    "quality_criteria": {"Complete": "Is it complete?"},
+                }
+            ],
+            outputs={"report": "output.md"},
+            output_specs={"report": "file"},
+            project_root=project_root,
+        )
+        assert result == []  # No failures
+
+    async def test_step_review_fails(self, mock_cli: ClaudeCLI, project_root: Path) -> None:
+        """Test step-level review that fails."""
+        mock_cli.run = AsyncMock(
+            return_value={
+                "passed": False,
+                "feedback": "Issues found",
+                "criteria_results": [
+                    {"criterion": "Complete", "passed": False, "feedback": "Missing content"}
+                ],
+            }
+        )
+        gate = QualityGate(cli=mock_cli)
+
+        (project_root / "output.md").write_text("content")
+
+        result = await gate.evaluate_reviews(
+            reviews=[
+                {
+                    "run_each": "step",
+                    "quality_criteria": {"Complete": "Is it complete?"},
+                }
+            ],
+            outputs={"report": "output.md"},
+            output_specs={"report": "file"},
+            project_root=project_root,
+        )
+        assert len(result) == 1
+        assert result[0].review_run_each == "step"
+        assert result[0].passed is False
+
+    async def test_per_file_review(self, mock_cli: ClaudeCLI, project_root: Path) -> None:
+        """Test per-file review for files-type output."""
+        call_count = 0
+
+        async def mock_run(**kwargs: Any) -> dict[str, Any]:
+            nonlocal call_count
+            call_count += 1
+            return {"passed": True, "feedback": "OK", "criteria_results": []}
+
+        mock_cli.run = AsyncMock(side_effect=mock_run)
+        gate = QualityGate(cli=mock_cli)
+
+        (project_root / "a.md").write_text("File A")
+        (project_root / "b.md").write_text("File B")
+
+        result = await gate.evaluate_reviews(
+            reviews=[
+                {
+                    "run_each": "reports",
+                    "quality_criteria": {"Valid": "Is it valid?"},
+                }
+            ],
+            outputs={"reports": ["a.md", "b.md"]},
+            output_specs={"reports": "files"},
+            project_root=project_root,
+        )
+        assert result == []  # All pass
+        assert call_count == 2  # Called once per file
+
+    async def test_single_file_review(self, mock_cli: ClaudeCLI, project_root: Path) -> None:
+        """Test review targeting a single-file output."""
+        mock_cli.run = AsyncMock(
+            return_value={"passed": True, "feedback": "OK", "criteria_results": []}
+        )
+        gate = QualityGate(cli=mock_cli)
+
+        (project_root / "report.md").write_text("content")
+
+        result = await gate.evaluate_reviews(
+            reviews=[
+                {
+                    "run_each": "report",
+                    "quality_criteria": {"Valid": "Is it valid?"},
+                }
+            ],
+            outputs={"report": "report.md"},
+            output_specs={"report": "file"},
+            project_root=project_root,
+        )
+        assert result == []
+        mock_cli.run.assert_called_once()
+
+    async def test_review_passes_guidance_to_system_prompt(
+        self, mock_cli: ClaudeCLI, project_root: Path
+    ) -> None:
+        """Test that additional_review_guidance is included in the CLI system prompt."""
+        mock_cli.run = AsyncMock(
+            return_value={"passed": True, "feedback": "OK", "criteria_results": []}
+        )
+        gate = QualityGate(cli=mock_cli)
+
+        (project_root / "output.md").write_text("content")
+
+        await gate.evaluate_reviews(
+            reviews=[
+                {
+                    "run_each": "step",
+                    "quality_criteria": {"Valid": "Is it valid?"},
+                    "additional_review_guidance": "Read the job.yml for workflow context.",
+                }
+            ],
+            outputs={"report": "output.md"},
+            output_specs={"report": "file"},
+            project_root=project_root,
+        )
+
+        mock_cli.run.assert_called_once()
+        system_prompt = mock_cli.run.call_args.kwargs["system_prompt"]
+        assert "Read the job.yml for workflow context." in system_prompt
+        assert "Additional Context" in system_prompt
+
+    async def test_review_without_guidance_omits_section(
+        self, mock_cli: ClaudeCLI, project_root: Path
+    ) -> None:
+        """Test that reviews without guidance don't include the section."""
+        mock_cli.run = AsyncMock(
+            return_value={"passed": True, "feedback": "OK", "criteria_results": []}
+        )
+        gate = QualityGate(cli=mock_cli)
+
+        (project_root / "output.md").write_text("content")
+
+        await gate.evaluate_reviews(
+            reviews=[
+                {
+                    "run_each": "step",
+                    "quality_criteria": {"Valid": "Is it valid?"},
+                }
+            ],
+            outputs={"report": "output.md"},
+            output_specs={"report": "file"},
+            project_root=project_root,
+        )
+
+        system_prompt = mock_cli.run.call_args.kwargs["system_prompt"]
+        assert "Additional Context" not in system_prompt
+
+    async def test_per_file_review_passes_guidance_to_each(
+        self, mock_cli: ClaudeCLI, project_root: Path
+    ) -> None:
+        """Test that guidance is passed to each per-file review invocation."""
+        mock_cli.run = AsyncMock(
+            return_value={"passed": True, "feedback": "OK", "criteria_results": []}
+        )
+        gate = QualityGate(cli=mock_cli)
+
+        (project_root / "a.md").write_text("File A")
+        (project_root / "b.md").write_text("File B")
+
+        await gate.evaluate_reviews(
+            reviews=[
+                {
+                    "run_each": "reports",
+                    "quality_criteria": {"Valid": "Is it valid?"},
+                    "additional_review_guidance": "Check against the spec.",
+                }
+            ],
+            outputs={"reports": ["a.md", "b.md"]},
+            output_specs={"reports": "files"},
+            project_root=project_root,
+        )
+
+        assert mock_cli.run.call_count == 2
+        for call in mock_cli.run.call_args_list:
+            system_prompt = call.kwargs["system_prompt"]
+            assert "Check against the spec." in system_prompt
+
+
+class TestBuildPayloadLargeFileSet:
+    """Tests for _build_payload behavior when file count exceeds MAX_INLINE_FILES."""
+
+    async def test_payload_lists_paths_when_over_threshold(
+        self, quality_gate: QualityGate, project_root: Path
+    ) -> None:
+        """Test that >5 files produces path listing instead of inline content."""
+        for i in range(6):
+            (project_root / f"file{i}.md").write_text(f"Content {i}")
+
+        payload = await quality_gate._build_payload(
+            outputs={"reports": [f"file{i}.md" for i in range(6)]},
+            project_root=project_root,
+        )
+
+        assert "6 files" in payload
+        assert "too many to include inline" in payload
+        for i in range(6):
+            assert f"file{i}.md" in payload
+        # Content should NOT be embedded
+        assert "Content 0" not in payload
+        assert "Content 5" not in payload
+
+    async def test_payload_inlines_content_at_threshold(
+        self, quality_gate: QualityGate, project_root: Path
+    ) -> None:
+        """Test that exactly 5 files still gets inline content."""
+        for i in range(5):
+            (project_root / f"file{i}.md").write_text(f"Content {i}")
+
+        payload = await quality_gate._build_payload(
+            outputs={"reports": [f"file{i}.md" for i in range(5)]},
+            project_root=project_root,
+        )
+
+        # Should have inline content, not path listing
+        assert "too many to include inline" not in payload
+        for i in range(5):
+            assert f"Content {i}" in payload
+
+    async def test_path_listing_includes_output_names(
+        self, quality_gate: QualityGate, project_root: Path
+    ) -> None:
+        """Test that path listing shows which output each file belongs to."""
+        for i in range(4):
+            (project_root / f"doc{i}.md").write_text("x")
+        for i in range(3):
+            (project_root / f"data{i}.csv").write_text("x")
+
+        payload = await quality_gate._build_payload(
+            outputs={
+                "docs": [f"doc{i}.md" for i in range(4)],
+                "data": [f"data{i}.csv" for i in range(3)],
+            },
+            project_root=project_root,
+        )
+
+        assert "7 files" in payload
+        assert "(output: docs)" in payload
+        assert "(output: data)" in payload
+
+    async def test_path_listing_counts_across_outputs(
+        self, quality_gate: QualityGate, project_root: Path
+    ) -> None:
+        """Test that file count is summed across all outputs."""
+        # 3 files in one output + 3 in another = 6 total > 5
+        for i in range(3):
+            (project_root / f"a{i}.md").write_text("x")
+            (project_root / f"b{i}.md").write_text("x")
+
+        payload = await quality_gate._build_payload(
+            outputs={
+                "alpha": [f"a{i}.md" for i in range(3)],
+                "beta": [f"b{i}.md" for i in range(3)],
+            },
+            project_root=project_root,
+        )
+
+        assert "6 files" in payload
+        assert "too many to include inline" in payload
+
+
+class TestBuildPathListing:
+    """Tests for _build_path_listing static method."""
+
+    def test_single_file_output(self) -> None:
+        """Test path listing with single file outputs."""
+        lines = QualityGate._build_path_listing({"report": "report.md"})
+        assert lines == ["- report.md  (output: report)"]
+
+    def test_multi_file_output(self) -> None:
+        """Test path listing with list outputs."""
+        lines = QualityGate._build_path_listing({"reports": ["a.md", "b.md"]})
+        assert lines == [
+            "- a.md  (output: reports)",
+            "- b.md  (output: reports)",
+        ]
+
+    def test_mixed_outputs(self) -> None:
+        """Test path listing with both single and list outputs."""
+        lines = QualityGate._build_path_listing(
+            {
+                "summary": "summary.md",
+                "details": ["d1.md", "d2.md"],
+            }
+        )
+        assert len(lines) == 3
+        assert "- summary.md  (output: summary)" in lines
+        assert "- d1.md  (output: details)" in lines
+        assert "- d2.md  (output: details)" in lines
+
+
+class TestComputeTimeout:
+    """Tests for QualityGate.compute_timeout."""
+
+    def test_base_timeout_for_few_files(self) -> None:
+        """Test that <=5 files gives base 120s timeout."""
+        assert QualityGate.compute_timeout(0) == 120
+        assert QualityGate.compute_timeout(1) == 120
+        assert QualityGate.compute_timeout(5) == 120
+
+    def test_timeout_increases_after_five(self) -> None:
+        """Test that each file after 5 adds 30 seconds."""
+        assert QualityGate.compute_timeout(6) == 150
+        assert QualityGate.compute_timeout(10) == 270  # 120 + 5*30
+        assert QualityGate.compute_timeout(20) == 570  # 120 + 15*30
+
+
+class TestDynamicTimeout:
+    """Tests that evaluate passes dynamic timeout to CLI."""
+
+    async def test_timeout_passed_to_cli(self, mock_cli: ClaudeCLI, project_root: Path) -> None:
+        """Test that evaluate passes computed timeout to CLI.run."""
+        gate = QualityGate(cli=mock_cli)
+
+        (project_root / "output.md").write_text("content")
+
+        await gate.evaluate(
+            quality_criteria={"Valid": "Is it valid?"},
+            outputs={"report": "output.md"},
+            project_root=project_root,
+        )
+
+        call_kwargs = mock_cli.run.call_args.kwargs
+        # 1 file -> timeout = 120
+        assert call_kwargs["timeout"] == 120
+
+    async def test_timeout_scales_with_file_count(
+        self, mock_cli: ClaudeCLI, project_root: Path
+    ) -> None:
+        """Test that timeout increases with many files."""
+        gate = QualityGate(cli=mock_cli)
+
+        for i in range(10):
+            (project_root / f"f{i}.md").write_text(f"content {i}")
+
+        await gate.evaluate(
+            quality_criteria={"Valid": "Is it valid?"},
+            outputs={"reports": [f"f{i}.md" for i in range(10)]},
+            project_root=project_root,
+        )
+
+        call_kwargs = mock_cli.run.call_args.kwargs
+        # 10 files -> 120 + 5*30 = 270
+        assert call_kwargs["timeout"] == 270
+
+
+class TestMockQualityGate:
+    """Tests for MockQualityGate class."""
+
+    @staticmethod
+    async def evaluate_mock_gate(
+        gate: MockQualityGate,
+        project_root: Path,
+        criteria: dict[str, str] | None = None,
+        outputs: dict[str, str | list[str]] | None = None,
+    ) -> Any:
+        """Helper to evaluate a mock gate with default parameters."""
+        return await gate.evaluate(
+            quality_criteria=criteria or {"Criterion 1": "Is criterion 1 met?"},
+            outputs=outputs or {"report": "output.md"},
+            project_root=project_root,
+        )
+
+    async def test_mock_passes_by_default(self, project_root: Path) -> None:
+        """Test mock gate passes by default."""
+        gate = MockQualityGate()
+        result = await self.evaluate_mock_gate(gate, project_root)
+
+        assert result.passed is True
+        assert len(gate.evaluations) == 1
+
+    async def test_mock_can_fail(self, project_root: Path) -> None:
+        """Test mock gate can be configured to fail."""
+        gate = MockQualityGate(should_pass=False, feedback="Mock failure")
+        result = await self.evaluate_mock_gate(gate, project_root)
+
+        assert result.passed is False
+        assert result.feedback == "Mock failure"
+
+    async def test_mock_records_evaluations(self, project_root: Path) -> None:
+        """Test mock gate records evaluations."""
+        gate = MockQualityGate()
+
+        await self.evaluate_mock_gate(
+            gate,
+            project_root,
+            criteria={"Criterion 1": "Is criterion 1 met?"},
+            outputs={"out1": "output1.md"},
+        )
+        await self.evaluate_mock_gate(
+            gate,
+            project_root,
+            criteria={"Criterion 2": "Is criterion 2 met?"},
+            outputs={"out2": "output2.md"},
+        )
+
+        assert len(gate.evaluations) == 2
+        assert gate.evaluations[0]["quality_criteria"] == {"Criterion 1": "Is criterion 1 met?"}
+        assert gate.evaluations[1]["quality_criteria"] == {"Criterion 2": "Is criterion 2 met?"}
+
+    async def test_mock_records_additional_review_guidance(self, project_root: Path) -> None:
+        """Test mock gate records additional_review_guidance when provided."""
+        gate = MockQualityGate()
+
+        await gate.evaluate(
+            quality_criteria={"Check": "Is it good?"},
+            outputs={"report": "output.md"},
+            project_root=project_root,
+            additional_review_guidance="Look at the job.yml for context.",
+        )
+
+        assert len(gate.evaluations) == 1
+        assert (
+            gate.evaluations[0]["additional_review_guidance"] == "Look at the job.yml for context."
+        )
+
+    async def test_mock_records_none_guidance_when_omitted(self, project_root: Path) -> None:
+        """Test mock gate records None for guidance when not provided."""
+        gate = MockQualityGate()
+
+        await gate.evaluate(
+            quality_criteria={"Check": "Is it good?"},
+            outputs={"report": "output.md"},
+            project_root=project_root,
+        )
+
+        assert gate.evaluations[0]["additional_review_guidance"] is None
diff --git a/tests/unit/mcp/test_schemas.py b/tests/unit/mcp/test_schemas.py
new file mode 100644
index 00000000..14e0e41a
--- /dev/null
+++ b/tests/unit/mcp/test_schemas.py
@@ -0,0 +1,514 @@
+"""Tests for MCP schemas."""
+
+from deepwork.mcp.schemas import (
+    ActiveStepInfo,
+    ExpectedOutput,
+    FinishedStepInput,
+    FinishedStepResponse,
+    JobInfo,
+    QualityCriteriaResult,
+    QualityGateResult,
+    ReviewInfo,
+    ReviewResult,
+    StartWorkflowInput,
+    StartWorkflowResponse,
+    StepInfo,
+    StepProgress,
+    StepStatus,
+    WorkflowInfo,
+    WorkflowSession,
+    WorkflowStepEntryInfo,
+)
+
+
+class TestStepStatus:
+    """Tests for StepStatus enum."""
+
+    def test_enum_values(self) -> None:
+        """Test that enum has expected values."""
+        assert StepStatus.NEEDS_WORK == "needs_work"
+        assert StepStatus.NEXT_STEP == "next_step"
+        assert StepStatus.WORKFLOW_COMPLETE == "workflow_complete"
+
+
+class TestStepInfo:
+    """Tests for StepInfo model."""
+
+    def test_basic_step(self) -> None:
+        """Test creating basic step info."""
+        step = StepInfo(
+            id="step1",
+            name="First Step",
+            description="Does something",
+        )
+
+        assert step.id == "step1"
+        assert step.name == "First Step"
+        assert step.description == "Does something"
+        assert step.dependencies == []
+
+    def test_step_with_dependencies(self) -> None:
+        """Test step with dependencies."""
+        step = StepInfo(
+            id="step2",
+            name="Second Step",
+            description="Depends on step1",
+            dependencies=["step1"],
+        )
+
+        assert step.dependencies == ["step1"]
+
+
+class TestWorkflowStepEntryInfo:
+    """Tests for WorkflowStepEntryInfo model."""
+
+    def test_sequential_entry(self) -> None:
+        """Test sequential step entry."""
+        entry = WorkflowStepEntryInfo(step_ids=["step1"])
+
+        assert entry.step_ids == ["step1"]
+        assert entry.is_concurrent is False
+
+    def test_concurrent_entry(self) -> None:
+        """Test concurrent step entry."""
+        entry = WorkflowStepEntryInfo(
+            step_ids=["step1", "step2"],
+            is_concurrent=True,
+        )
+
+        assert entry.step_ids == ["step1", "step2"]
+        assert entry.is_concurrent is True
+
+
+class TestWorkflowInfo:
+    """Tests for WorkflowInfo model."""
+
+    def test_basic_workflow(self) -> None:
+        """Test basic workflow info."""
+        workflow = WorkflowInfo(
+            name="test_workflow",
+            summary="A test workflow",
+        )
+
+        assert workflow.name == "test_workflow"
+        assert workflow.summary == "A test workflow"
+
+
+class TestJobInfo:
+    """Tests for JobInfo model."""
+
+    def test_basic_job(self) -> None:
+        """Test basic job info."""
+        job = JobInfo(
+            name="test_job",
+            summary="A test job",
+        )
+
+        assert job.name == "test_job"
+        assert job.summary == "A test job"
+        assert job.description is None
+        assert job.workflows == []
+
+
+class TestStartWorkflowInput:
+    """Tests for StartWorkflowInput model."""
+
+    def test_required_fields(self) -> None:
+        """Test required fields only."""
+        input_data = StartWorkflowInput(
+            goal="Complete a task",
+            job_name="test_job",
+            workflow_name="main",
+        )
+
+        assert input_data.goal == "Complete a task"
+        assert input_data.job_name == "test_job"
+        assert input_data.workflow_name == "main"
+        assert input_data.instance_id is None
+
+    def test_with_instance_id(self) -> None:
+        """Test with optional instance_id."""
+        input_data = StartWorkflowInput(
+            goal="Complete a task",
+            job_name="test_job",
+            workflow_name="main",
+            instance_id="acme",
+        )
+
+        assert input_data.instance_id == "acme"
+
+
+class TestFinishedStepInput:
+    """Tests for FinishedStepInput model."""
+
+    def test_with_outputs(self) -> None:
+        """Test with structured outputs."""
+        input_data = FinishedStepInput(
+            outputs={"report": "report.md", "data_files": ["a.csv", "b.csv"]}
+        )
+
+        assert input_data.outputs == {"report": "report.md", "data_files": ["a.csv", "b.csv"]}
+        assert input_data.notes is None
+
+    def test_with_empty_outputs(self) -> None:
+        """Test with empty outputs dict (for steps with no outputs)."""
+        input_data = FinishedStepInput(outputs={})
+
+        assert input_data.outputs == {}
+
+    def test_with_notes(self) -> None:
+        """Test with notes."""
+        input_data = FinishedStepInput(
+            outputs={"output": "output.md"},
+            notes="Completed successfully",
+        )
+
+        assert input_data.notes == "Completed successfully"
+
+
+class TestQualityCriteriaResult:
+    """Tests for QualityCriteriaResult model."""
+
+    def test_passed_criterion(self) -> None:
+        """Test passed criterion."""
+        result = QualityCriteriaResult(
+            criterion="Output must be valid",
+            passed=True,
+        )
+
+        assert result.passed is True
+        assert result.feedback is None
+
+    def test_failed_criterion(self) -> None:
+        """Test failed criterion with feedback."""
+        result = QualityCriteriaResult(
+            criterion="Output must be valid",
+            passed=False,
+            feedback="Output was incomplete",
+        )
+
+        assert result.passed is False
+        assert result.feedback == "Output was incomplete"
+
+
+class TestQualityGateResult:
+    """Tests for QualityGateResult model."""
+
+    def test_passed_gate(self) -> None:
+        """Test passed quality gate."""
+        result = QualityGateResult(
+            passed=True,
+            feedback="All criteria met",
+            criteria_results=[
+                QualityCriteriaResult(criterion="Test 1", passed=True),
+            ],
+        )
+
+        assert result.passed is True
+        assert len(result.criteria_results) == 1
+
+    def test_failed_gate(self) -> None:
+        """Test failed quality gate."""
+        result = QualityGateResult(
+            passed=False,
+            feedback="Some criteria failed",
+            criteria_results=[
+                QualityCriteriaResult(criterion="Test 1", passed=True),
+                QualityCriteriaResult(
+                    criterion="Test 2",
+                    passed=False,
+                    feedback="Failed check",
+                ),
+            ],
+        )
+
+        assert result.passed is False
+        assert len(result.criteria_results) == 2
+
+
+class TestReviewInfo:
+    """Tests for ReviewInfo model."""
+
+    def test_step_review(self) -> None:
+        """Test step-level review info."""
+        review = ReviewInfo(
+            run_each="step",
+            quality_criteria={"Complete": "Is it complete?"},
+        )
+
+        assert review.run_each == "step"
+        assert review.quality_criteria == {"Complete": "Is it complete?"}
+
+    def test_output_review(self) -> None:
+        """Test output-specific review info."""
+        review = ReviewInfo(
+            run_each="reports",
+            quality_criteria={
+                "Valid": "Is it valid?",
+                "Complete": "Is it complete?",
+            },
+        )
+
+        assert review.run_each == "reports"
+        assert len(review.quality_criteria) == 2
+
+
+class TestReviewResult:
+    """Tests for ReviewResult model."""
+
+    def test_passed_review(self) -> None:
+        """Test passed review result."""
+        result = ReviewResult(
+            review_run_each="step",
+            target_file=None,
+            passed=True,
+            feedback="All good",
+        )
+
+        assert result.passed is True
+        assert result.target_file is None
+
+    def test_failed_per_file_review(self) -> None:
+        """Test failed per-file review result."""
+        result = ReviewResult(
+            review_run_each="reports",
+            target_file="report1.md",
+            passed=False,
+            feedback="Issues found",
+            criteria_results=[
+                QualityCriteriaResult(criterion="Valid", passed=False, feedback="Not valid"),
+            ],
+        )
+
+        assert result.passed is False
+        assert result.target_file == "report1.md"
+        assert result.review_run_each == "reports"
+        assert len(result.criteria_results) == 1
+
+
+class TestActiveStepInfo:
+    """Tests for ActiveStepInfo model."""
+
+    def test_basic_step_info(self) -> None:
+        """Test basic active step info."""
+        expected = [
+            ExpectedOutput(
+                name="output.md",
+                type="file",
+                description="Test output",
+                required=True,
+                syntax_for_finished_step_tool="filepath",
+            )
+        ]
+        step_info = ActiveStepInfo(
+            session_id="abc123",
+            branch_name="deepwork/test-main-20240101",
+            step_id="step1",
+            step_expected_outputs=expected,
+            step_reviews=[
+                ReviewInfo(
+                    run_each="step",
+                    quality_criteria={"Complete": "Is it complete?"},
+                )
+            ],
+            step_instructions="Do something",
+        )
+
+        assert step_info.session_id == "abc123"
+        assert step_info.branch_name == "deepwork/test-main-20240101"
+        assert step_info.step_id == "step1"
+        assert len(step_info.step_expected_outputs) == 1
+        assert step_info.step_expected_outputs[0].name == "output.md"
+        assert step_info.step_expected_outputs[0].type == "file"
+        assert step_info.step_expected_outputs[0].syntax_for_finished_step_tool == "filepath"
+        assert len(step_info.step_reviews) == 1
+        assert step_info.step_reviews[0].run_each == "step"
+        assert step_info.step_instructions == "Do something"
+
+    def test_default_reviews(self) -> None:
+        """Test default empty reviews."""
+        step_info = ActiveStepInfo(
+            session_id="abc123",
+            branch_name="deepwork/test-main-20240101",
+            step_id="step1",
+            step_expected_outputs=[
+                ExpectedOutput(
+                    name="output.md",
+                    type="file",
+                    description="Test output",
+                    required=True,
+                    syntax_for_finished_step_tool="filepath",
+                )
+            ],
+            step_instructions="Do something",
+        )
+
+        assert step_info.step_reviews == []
+
+
+class TestStartWorkflowResponse:
+    """Tests for StartWorkflowResponse model."""
+
+    def test_basic_response(self) -> None:
+        """Test basic response."""
+        response = StartWorkflowResponse(
+            begin_step=ActiveStepInfo(
+                session_id="abc123",
+                branch_name="deepwork/test-main-20240101",
+                step_id="step1",
+                step_expected_outputs=[
+                    ExpectedOutput(
+                        name="output.md",
+                        type="file",
+                        description="Test output",
+                        required=True,
+                        syntax_for_finished_step_tool="filepath",
+                    )
+                ],
+                step_instructions="Do something",
+            )
+        )
+
+        assert response.begin_step.session_id == "abc123"
+        assert response.begin_step.branch_name == "deepwork/test-main-20240101"
+        assert response.begin_step.step_id == "step1"
+        assert response.begin_step.step_reviews == []
+
+
+class TestFinishedStepResponse:
+    """Tests for FinishedStepResponse model."""
+
+    def test_needs_work_status(self) -> None:
+        """Test needs_work response."""
+        response = FinishedStepResponse(
+            status=StepStatus.NEEDS_WORK,
+            feedback="Fix the issues",
+            failed_reviews=[
+                ReviewResult(
+                    review_run_each="step",
+                    target_file=None,
+                    passed=False,
+                    feedback="Issues found",
+                    criteria_results=[
+                        QualityCriteriaResult(criterion="Test", passed=False, feedback="Failed"),
+                    ],
+                ),
+            ],
+        )
+
+        assert response.status == StepStatus.NEEDS_WORK
+        assert response.feedback is not None
+        assert response.begin_step is None
+
+    def test_next_step_status(self) -> None:
+        """Test next_step response."""
+        response = FinishedStepResponse(
+            status=StepStatus.NEXT_STEP,
+            begin_step=ActiveStepInfo(
+                session_id="abc123",
+                branch_name="deepwork/test-main-20240101",
+                step_id="step2",
+                step_expected_outputs=[
+                    ExpectedOutput(
+                        name="output2.md",
+                        type="file",
+                        description="Test output",
+                        required=True,
+                        syntax_for_finished_step_tool="filepath",
+                    )
+                ],
+                step_instructions="Next step instructions",
+            ),
+        )
+
+        assert response.status == StepStatus.NEXT_STEP
+        assert response.begin_step is not None
+        assert response.begin_step.step_id == "step2"
+        assert response.summary is None
+
+    def test_workflow_complete_status(self) -> None:
+        """Test workflow_complete response."""
+        response = FinishedStepResponse(
+            status=StepStatus.WORKFLOW_COMPLETE,
+            summary="Workflow completed!",
+            all_outputs={"output1": "output1.md", "output2": "output2.md"},
+        )
+
+        assert response.status == StepStatus.WORKFLOW_COMPLETE
+        assert response.summary is not None
+        assert response.all_outputs is not None
+        assert response.all_outputs == {"output1": "output1.md", "output2": "output2.md"}
+
+
+class TestStepProgress:
+    """Tests for StepProgress model."""
+
+    def test_new_step(self) -> None:
+        """Test new step progress."""
+        progress = StepProgress(step_id="step1")
+
+        assert progress.step_id == "step1"
+        assert progress.started_at is None
+        assert progress.completed_at is None
+        assert progress.outputs == {}
+        assert progress.quality_attempts == 0
+
+
+class TestWorkflowSession:
+    """Tests for WorkflowSession model."""
+
+    def test_basic_session(self) -> None:
+        """Test basic session creation."""
+        session = WorkflowSession(
+            session_id="abc123",
+            job_name="test_job",
+            workflow_name="main",
+            goal="Complete the task",
+            branch_name="deepwork/test-main-20240101",
+            current_step_id="step1",
+            started_at="2024-01-01T00:00:00Z",
+        )
+
+        assert session.session_id == "abc123"
+        assert session.job_name == "test_job"
+        assert session.status == "active"
+        assert session.completed_at is None
+
+    def test_to_dict(self) -> None:
+        """Test converting session to dict."""
+        session = WorkflowSession(
+            session_id="abc123",
+            job_name="test_job",
+            workflow_name="main",
+            goal="Complete the task",
+            branch_name="deepwork/test-main-20240101",
+            current_step_id="step1",
+            started_at="2024-01-01T00:00:00Z",
+        )
+
+        data = session.to_dict()
+
+        assert isinstance(data, dict)
+        assert data["session_id"] == "abc123"
+        assert data["job_name"] == "test_job"
+
+    def test_from_dict(self) -> None:
+        """Test creating session from dict."""
+        data = {
+            "session_id": "abc123",
+            "job_name": "test_job",
+            "workflow_name": "main",
+            "goal": "Complete the task",
+            "branch_name": "deepwork/test-main-20240101",
+            "current_step_id": "step1",
+            "current_entry_index": 0,
+            "step_progress": {},
+            "started_at": "2024-01-01T00:00:00Z",
+            "completed_at": None,
+            "status": "active",
+        }
+
+        session = WorkflowSession.from_dict(data)
+
+        assert session.session_id == "abc123"
+        assert session.job_name == "test_job"
diff --git a/tests/unit/mcp/test_state.py b/tests/unit/mcp/test_state.py
new file mode 100644
index 00000000..4508b517
--- /dev/null
+++ b/tests/unit/mcp/test_state.py
@@ -0,0 +1,590 @@
+"""Tests for MCP state management."""
+
+from pathlib import Path
+
+import pytest
+
+from deepwork.mcp.state import StateError, StateManager
+
+
+@pytest.fixture
+def project_root(tmp_path: Path) -> Path:
+    """Create a temporary project root with .deepwork directory."""
+    deepwork_dir = tmp_path / ".deepwork"
+    deepwork_dir.mkdir()
+    (deepwork_dir / "tmp").mkdir()
+    return tmp_path
+
+
+@pytest.fixture
+def state_manager(project_root: Path) -> StateManager:
+    """Create a StateManager instance."""
+    return StateManager(project_root)
+
+
+class TestStateManager:
+    """Tests for StateManager class."""
+
+    def test_init(self, state_manager: StateManager, project_root: Path) -> None:
+        """Test StateManager initialization."""
+        assert state_manager.project_root == project_root
+        assert state_manager.sessions_dir == project_root / ".deepwork" / "tmp"
+        assert state_manager._session_stack == []
+        assert state_manager.get_stack_depth() == 0
+
+    def test_generate_session_id(self, state_manager: StateManager) -> None:
+        """Test session ID generation."""
+        session_id = state_manager._generate_session_id()
+
+        assert isinstance(session_id, str)
+        assert len(session_id) == 8
+
+    def test_generate_branch_name_with_instance(self, state_manager: StateManager) -> None:
+        """Test branch name generation with instance ID."""
+        branch = state_manager._generate_branch_name("test_job", "main", "acme")
+
+        assert branch == "deepwork/test_job-main-acme"
+
+    def test_generate_branch_name_without_instance(self, state_manager: StateManager) -> None:
+        """Test branch name generation without instance ID (uses date)."""
+        branch = state_manager._generate_branch_name("test_job", "main", None)
+
+        assert branch.startswith("deepwork/test_job-main-")
+        # Should be a date like 20240101
+        assert len(branch.split("-")[-1]) == 8
+
+    async def test_create_session(self, state_manager: StateManager) -> None:
+        """Test creating a new session."""
+        session = await state_manager.create_session(
+            job_name="test_job",
+            workflow_name="main",
+            goal="Complete the task",
+            first_step_id="step1",
+            instance_id="acme",
+        )
+
+        assert session.job_name == "test_job"
+        assert session.workflow_name == "main"
+        assert session.goal == "Complete the task"
+        assert session.current_step_id == "step1"
+        assert session.instance_id == "acme"
+        assert session.status == "active"
+        assert "acme" in session.branch_name
+
+        # Verify session file was created
+        session_file = state_manager._session_file(session.session_id)
+        assert session_file.exists()
+
+    async def test_load_session(self, state_manager: StateManager) -> None:
+        """Test loading an existing session."""
+        # Create a session first
+        created_session = await state_manager.create_session(
+            job_name="test_job",
+            workflow_name="main",
+            goal="Complete the task",
+            first_step_id="step1",
+        )
+
+        # Create a new state manager and load the session
+        new_manager = StateManager(state_manager.project_root)
+        loaded_session = await new_manager.load_session(created_session.session_id)
+
+        assert loaded_session.session_id == created_session.session_id
+        assert loaded_session.job_name == "test_job"
+        assert loaded_session.goal == "Complete the task"
+
+    async def test_load_session_not_found(self, state_manager: StateManager) -> None:
+        """Test loading non-existent session."""
+        with pytest.raises(StateError, match="Session not found"):
+            await state_manager.load_session("nonexistent")
+
+    async def test_get_active_session(self, state_manager: StateManager) -> None:
+        """Test getting active session."""
+        # No active session initially
+        assert state_manager.get_active_session() is None
+
+        # Create session
+        session = await state_manager.create_session(
+            job_name="test_job",
+            workflow_name="main",
+            goal="Complete the task",
+            first_step_id="step1",
+        )
+
+        assert state_manager.get_active_session() == session
+
+    def test_require_active_session(self, state_manager: StateManager) -> None:
+        """Test require_active_session raises when no session."""
+        with pytest.raises(StateError, match="No active workflow session"):
+            state_manager.require_active_session()
+
+    async def test_start_step(self, state_manager: StateManager) -> None:
+        """Test marking a step as started."""
+        await state_manager.create_session(
+            job_name="test_job",
+            workflow_name="main",
+            goal="Complete the task",
+            first_step_id="step1",
+        )
+
+        await state_manager.start_step("step2")
+        session = state_manager.get_active_session()
+
+        assert session is not None
+        assert session.current_step_id == "step2"
+        assert "step2" in session.step_progress
+        assert session.step_progress["step2"].started_at is not None
+
+    async def test_complete_step(self, state_manager: StateManager) -> None:
+        """Test marking a step as completed."""
+        await state_manager.create_session(
+            job_name="test_job",
+            workflow_name="main",
+            goal="Complete the task",
+            first_step_id="step1",
+        )
+
+        await state_manager.complete_step(
+            step_id="step1",
+            outputs={"report": "output1.md", "data": "output2.md"},
+            notes="Done!",
+        )
+
+        session = state_manager.get_active_session()
+        assert session is not None
+        progress = session.step_progress["step1"]
+
+        assert progress.completed_at is not None
+        assert progress.outputs == {"report": "output1.md", "data": "output2.md"}
+        assert progress.notes == "Done!"
+
+    async def test_record_quality_attempt(self, state_manager: StateManager) -> None:
+        """Test recording quality gate attempts."""
+        await state_manager.create_session(
+            job_name="test_job",
+            workflow_name="main",
+            goal="Complete the task",
+            first_step_id="step1",
+        )
+
+        # First attempt
+        attempts = await state_manager.record_quality_attempt("step1")
+        assert attempts == 1
+
+        # Second attempt
+        attempts = await state_manager.record_quality_attempt("step1")
+        assert attempts == 2
+
+    async def test_advance_to_step(self, state_manager: StateManager) -> None:
+        """Test advancing to a new step."""
+        await state_manager.create_session(
+            job_name="test_job",
+            workflow_name="main",
+            goal="Complete the task",
+            first_step_id="step1",
+        )
+
+        await state_manager.advance_to_step("step2", 1)
+        session = state_manager.get_active_session()
+
+        assert session is not None
+        assert session.current_step_id == "step2"
+        assert session.current_entry_index == 1
+
+    async def test_complete_workflow(self, state_manager: StateManager) -> None:
+        """Test marking workflow as complete pops from stack."""
+        session = await state_manager.create_session(
+            job_name="test_job",
+            workflow_name="main",
+            goal="Complete the task",
+            first_step_id="step1",
+        )
+        session_id = session.session_id
+
+        # Complete workflow - should pop from stack
+        new_active = await state_manager.complete_workflow()
+
+        # No active session after completion
+        assert new_active is None
+        assert state_manager.get_active_session() is None
+        assert state_manager.get_stack_depth() == 0
+
+        # But completed session should be persisted to disk
+        loaded = await state_manager.load_session(session_id)
+        assert loaded.status == "completed"
+        assert loaded.completed_at is not None
+
+    async def test_get_all_outputs(self, state_manager: StateManager) -> None:
+        """Test getting all outputs from completed steps."""
+        await state_manager.create_session(
+            job_name="test_job",
+            workflow_name="main",
+            goal="Complete the task",
+            first_step_id="step1",
+        )
+
+        await state_manager.complete_step("step1", {"report": "output1.md"})
+        await state_manager.complete_step("step2", {"data_files": ["output2.md", "output3.md"]})
+
+        outputs = state_manager.get_all_outputs()
+
+        assert outputs == {
+            "report": "output1.md",
+            "data_files": ["output2.md", "output3.md"],
+        }
+        assert len(outputs) == 2
+
+    async def test_list_sessions(self, state_manager: StateManager) -> None:
+        """Test listing all sessions."""
+        # Create multiple sessions
+        await state_manager.create_session(
+            job_name="job1",
+            workflow_name="main",
+            goal="Goal 1",
+            first_step_id="step1",
+        )
+        await state_manager.create_session(
+            job_name="job2",
+            workflow_name="main",
+            goal="Goal 2",
+            first_step_id="step1",
+        )
+
+        sessions = await state_manager.list_sessions()
+
+        assert len(sessions) == 2
+        job_names = {s.job_name for s in sessions}
+        assert "job1" in job_names
+        assert "job2" in job_names
+
+    async def test_find_active_sessions_for_workflow(self, state_manager: StateManager) -> None:
+        """Test finding active sessions for a workflow."""
+        # Create sessions for different workflows
+        await state_manager.create_session(
+            job_name="test_job",
+            workflow_name="main",
+            goal="Goal 1",
+            first_step_id="step1",
+        )
+        await state_manager.create_session(
+            job_name="test_job",
+            workflow_name="other",
+            goal="Goal 2",
+            first_step_id="step1",
+        )
+
+        sessions = await state_manager.find_active_sessions_for_workflow("test_job", "main")
+
+        assert len(sessions) == 1
+        assert sessions[0].workflow_name == "main"
+
+    async def test_delete_session(self, state_manager: StateManager) -> None:
+        """Test deleting a session."""
+        session = await state_manager.create_session(
+            job_name="test_job",
+            workflow_name="main",
+            goal="Goal",
+            first_step_id="step1",
+        )
+
+        session_file = state_manager._session_file(session.session_id)
+        assert session_file.exists()
+
+        await state_manager.delete_session(session.session_id)
+
+        assert not session_file.exists()
+        assert state_manager.get_active_session() is None
+
+
+class TestStateManagerStack:
+    """Tests for stack-based workflow nesting."""
+
+    @pytest.fixture
+    def project_root(self, tmp_path: Path) -> Path:
+        """Create a temporary project root with .deepwork directory."""
+        deepwork_dir = tmp_path / ".deepwork"
+        deepwork_dir.mkdir()
+        (deepwork_dir / "tmp").mkdir()
+        return tmp_path
+
+    @pytest.fixture
+    def state_manager(self, project_root: Path) -> StateManager:
+        """Create a StateManager instance."""
+        return StateManager(project_root)
+
+    async def test_nested_workflows_stack(self, state_manager: StateManager) -> None:
+        """Test that starting workflows pushes onto the stack."""
+        # Start first workflow
+        session1 = await state_manager.create_session(
+            job_name="job1",
+            workflow_name="workflow1",
+            goal="Goal 1",
+            first_step_id="step1",
+        )
+
+        assert state_manager.get_stack_depth() == 1
+        assert state_manager.get_active_session() == session1
+
+        # Start nested workflow
+        session2 = await state_manager.create_session(
+            job_name="job2",
+            workflow_name="workflow2",
+            goal="Goal 2",
+            first_step_id="stepA",
+        )
+
+        assert state_manager.get_stack_depth() == 2
+        assert state_manager.get_active_session() == session2
+
+        # Start another nested workflow
+        session3 = await state_manager.create_session(
+            job_name="job3",
+            workflow_name="workflow3",
+            goal="Goal 3",
+            first_step_id="stepX",
+        )
+
+        assert state_manager.get_stack_depth() == 3
+        assert state_manager.get_active_session() == session3
+
+    async def test_complete_workflow_pops_stack(self, state_manager: StateManager) -> None:
+        """Test that completing a workflow pops from stack and resumes parent."""
+        # Start two nested workflows
+        session1 = await state_manager.create_session(
+            job_name="job1",
+            workflow_name="workflow1",
+            goal="Goal 1",
+            first_step_id="step1",
+        )
+        await state_manager.create_session(
+            job_name="job2",
+            workflow_name="workflow2",
+            goal="Goal 2",
+            first_step_id="stepA",
+        )
+
+        assert state_manager.get_stack_depth() == 2
+
+        # Complete inner workflow
+        resumed = await state_manager.complete_workflow()
+
+        assert state_manager.get_stack_depth() == 1
+        assert resumed == session1
+        assert state_manager.get_active_session() == session1
+
+    async def test_get_stack(self, state_manager: StateManager) -> None:
+        """Test get_stack returns workflow/step info."""
+        await state_manager.create_session(
+            job_name="job1",
+            workflow_name="wf1",
+            goal="Goal 1",
+            first_step_id="step1",
+        )
+        await state_manager.create_session(
+            job_name="job2",
+            workflow_name="wf2",
+            goal="Goal 2",
+            first_step_id="stepA",
+        )
+
+        stack = state_manager.get_stack()
+
+        assert len(stack) == 2
+        assert stack[0].workflow == "job1/wf1"
+        assert stack[0].step == "step1"
+        assert stack[1].workflow == "job2/wf2"
+        assert stack[1].step == "stepA"
+
+    async def test_abort_workflow(self, state_manager: StateManager) -> None:
+        """Test abort_workflow marks as aborted and pops from stack."""
+        session1 = await state_manager.create_session(
+            job_name="job1",
+            workflow_name="wf1",
+            goal="Goal 1",
+            first_step_id="step1",
+        )
+        session2 = await state_manager.create_session(
+            job_name="job2",
+            workflow_name="wf2",
+            goal="Goal 2",
+            first_step_id="stepA",
+        )
+
+        # Abort inner workflow
+        aborted, resumed = await state_manager.abort_workflow("Something went wrong")
+
+        assert aborted.session_id == session2.session_id
+        assert aborted.status == "aborted"
+        assert aborted.abort_reason == "Something went wrong"
+        assert resumed == session1
+        assert state_manager.get_stack_depth() == 1
+        assert state_manager.get_active_session() == session1
+
+    async def test_abort_workflow_no_parent(self, state_manager: StateManager) -> None:
+        """Test abort_workflow with no parent workflow."""
+        session = await state_manager.create_session(
+            job_name="job1",
+            workflow_name="wf1",
+            goal="Goal 1",
+            first_step_id="step1",
+        )
+
+        aborted, resumed = await state_manager.abort_workflow("Cancelled")
+
+        assert aborted.session_id == session.session_id
+        assert aborted.status == "aborted"
+        assert resumed is None
+        assert state_manager.get_stack_depth() == 0
+        assert state_manager.get_active_session() is None
+
+
+class TestSessionIdRouting:
+    """Tests for session_id-based routing in StateManager."""
+
+    @pytest.fixture
+    def project_root(self, tmp_path: Path) -> Path:
+        """Create a temporary project root with .deepwork directory."""
+        deepwork_dir = tmp_path / ".deepwork"
+        deepwork_dir.mkdir()
+        (deepwork_dir / "tmp").mkdir()
+        return tmp_path
+
+    @pytest.fixture
+    def state_manager(self, project_root: Path) -> StateManager:
+        """Create a StateManager instance."""
+        return StateManager(project_root)
+
+    def test_resolve_session_by_id(self, state_manager: StateManager) -> None:
+        """Test _resolve_session finds the correct session in a multi-session stack."""
+        import asyncio
+
+        async def setup() -> None:
+            await state_manager.create_session(
+                job_name="job1", workflow_name="wf1", goal="G1", first_step_id="s1"
+            )
+            await state_manager.create_session(
+                job_name="job2", workflow_name="wf2", goal="G2", first_step_id="s2"
+            )
+            await state_manager.create_session(
+                job_name="job3", workflow_name="wf3", goal="G3", first_step_id="s3"
+            )
+
+        asyncio.get_event_loop().run_until_complete(setup())
+
+        # Stack has 3 sessions; resolve the middle one by ID
+        middle_session = state_manager._session_stack[1]
+        resolved = state_manager._resolve_session(middle_session.session_id)
+        assert resolved.session_id == middle_session.session_id
+        assert resolved.job_name == "job2"
+
+    def test_resolve_session_invalid_id(self, state_manager: StateManager) -> None:
+        """Test _resolve_session raises StateError for unknown session ID."""
+        import asyncio
+
+        asyncio.get_event_loop().run_until_complete(
+            state_manager.create_session(
+                job_name="job1", workflow_name="wf1", goal="G1", first_step_id="s1"
+            )
+        )
+
+        with pytest.raises(StateError, match="Session 'nonexistent' not found"):
+            state_manager._resolve_session("nonexistent")
+
+    def test_resolve_session_none_falls_back_to_active(self, state_manager: StateManager) -> None:
+        """Test _resolve_session with None falls back to top-of-stack."""
+        import asyncio
+
+        asyncio.get_event_loop().run_until_complete(
+            state_manager.create_session(
+                job_name="job1", workflow_name="wf1", goal="G1", first_step_id="s1"
+            )
+        )
+        asyncio.get_event_loop().run_until_complete(
+            state_manager.create_session(
+                job_name="job2", workflow_name="wf2", goal="G2", first_step_id="s2"
+            )
+        )
+
+        resolved = state_manager._resolve_session(None)
+        assert resolved.job_name == "job2"  # top-of-stack
+
+    async def test_complete_workflow_by_session_id(self, state_manager: StateManager) -> None:
+        """Test complete_workflow removes a specific session from middle of stack."""
+        session1 = await state_manager.create_session(
+            job_name="job1", workflow_name="wf1", goal="G1", first_step_id="s1"
+        )
+        session2 = await state_manager.create_session(
+            job_name="job2", workflow_name="wf2", goal="G2", first_step_id="s2"
+        )
+        session3 = await state_manager.create_session(
+            job_name="job3", workflow_name="wf3", goal="G3", first_step_id="s3"
+        )
+
+        assert state_manager.get_stack_depth() == 3
+
+        # Complete the middle session by ID
+        new_active = await state_manager.complete_workflow(session_id=session2.session_id)
+
+        assert state_manager.get_stack_depth() == 2
+        # Stack should have session1 and session3; top is session3
+        assert new_active is not None
+        assert new_active.session_id == session3.session_id
+        assert state_manager.get_active_session() == session3
+        remaining_ids = [s.session_id for s in state_manager._session_stack]
+        assert session1.session_id in remaining_ids
+        assert session2.session_id not in remaining_ids
+        assert session3.session_id in remaining_ids
+
+    async def test_abort_workflow_by_session_id(self, state_manager: StateManager) -> None:
+        """Test abort_workflow removes a specific session from middle of stack."""
+        session1 = await state_manager.create_session(
+            job_name="job1", workflow_name="wf1", goal="G1", first_step_id="s1"
+        )
+        session2 = await state_manager.create_session(
+            job_name="job2", workflow_name="wf2", goal="G2", first_step_id="s2"
+        )
+        session3 = await state_manager.create_session(
+            job_name="job3", workflow_name="wf3", goal="G3", first_step_id="s3"
+        )
+
+        # Abort the middle session
+        aborted, new_active = await state_manager.abort_workflow(
+            "Testing mid-stack abort", session_id=session2.session_id
+        )
+
+        assert aborted.session_id == session2.session_id
+        assert aborted.status == "aborted"
+        assert state_manager.get_stack_depth() == 2
+        # Top of stack should still be session3
+        assert new_active is not None
+        assert new_active.session_id == session3.session_id
+        remaining_ids = [s.session_id for s in state_manager._session_stack]
+        assert session1.session_id in remaining_ids
+        assert session2.session_id not in remaining_ids
+
+    async def test_complete_step_with_session_id(self, state_manager: StateManager) -> None:
+        """Test complete_step operates on a non-top session when session_id is given."""
+        session1 = await state_manager.create_session(
+            job_name="job1", workflow_name="wf1", goal="G1", first_step_id="s1"
+        )
+        await state_manager.create_session(
+            job_name="job2", workflow_name="wf2", goal="G2", first_step_id="s2"
+        )
+
+        # Complete step on session1 (not on top) using session_id
+        await state_manager.complete_step(
+            step_id="s1",
+            outputs={"report": "report.md"},
+            notes="Done",
+            session_id=session1.session_id,
+        )
+
+        # Verify session1 was updated
+        progress = session1.step_progress["s1"]
+        assert progress.completed_at is not None
+        assert progress.outputs == {"report": "report.md"}
+
+        # Verify session2 (top) was not affected
+        top = state_manager.get_active_session()
+        assert top is not None
+        assert "s1" not in top.step_progress
diff --git a/tests/unit/mcp/test_tools.py b/tests/unit/mcp/test_tools.py
new file mode 100644
index 00000000..f5b7adce
--- /dev/null
+++ b/tests/unit/mcp/test_tools.py
@@ -0,0 +1,1380 @@
+"""Tests for MCP workflow tools."""
+
+from pathlib import Path
+
+import pytest
+
+from deepwork.mcp.quality_gate import MockQualityGate
+from deepwork.mcp.schemas import (
+    AbortWorkflowInput,
+    FinishedStepInput,
+    StartWorkflowInput,
+    StepStatus,
+)
+from deepwork.mcp.state import StateError, StateManager
+from deepwork.mcp.tools import ToolError, WorkflowTools
+
+
+@pytest.fixture
+def project_root(tmp_path: Path) -> Path:
+    """Create a temporary project with a test job."""
+    # Create .deepwork directory
+    deepwork_dir = tmp_path / ".deepwork"
+    deepwork_dir.mkdir()
+    (deepwork_dir / "tmp").mkdir()
+
+    # Create jobs directory with a test job
+    jobs_dir = deepwork_dir / "jobs"
+    jobs_dir.mkdir()
+
+    job_dir = jobs_dir / "test_job"
+    job_dir.mkdir()
+
+    # Create job.yml
+    job_yml = """
+name: test_job
+version: "1.0.0"
+summary: A test job
+description: This is a test job for unit tests
+
+steps:
+  - id: step1
+    name: First Step
+    description: The first step
+    instructions_file: steps/step1.md
+    outputs:
+      output1.md:
+        type: file
+        description: First step output
+        required: true
+    reviews:
+      - run_each: step
+        quality_criteria:
+          "Output Valid": "Is the output valid?"
+  - id: step2
+    name: Second Step
+    description: The second step
+    instructions_file: steps/step2.md
+    outputs:
+      output2.md:
+        type: file
+        description: Second step output
+        required: true
+    dependencies:
+      - step1
+    reviews: []
+
+workflows:
+  - name: main
+    summary: Main workflow
+    steps:
+      - step1
+      - step2
+"""
+    (job_dir / "job.yml").write_text(job_yml)
+
+    # Create step instruction files
+    steps_dir = job_dir / "steps"
+    steps_dir.mkdir()
+    (steps_dir / "step1.md").write_text("# Step 1\n\nDo the first thing.")
+    (steps_dir / "step2.md").write_text("# Step 2\n\nDo the second thing.")
+
+    return tmp_path
+
+
+@pytest.fixture
+def state_manager(project_root: Path) -> StateManager:
+    """Create a StateManager instance."""
+    return StateManager(project_root)
+
+
+@pytest.fixture
+def tools(project_root: Path, state_manager: StateManager) -> WorkflowTools:
+    """Create a WorkflowTools instance without quality gate."""
+    return WorkflowTools(
+        project_root=project_root,
+        state_manager=state_manager,
+    )
+
+
+@pytest.fixture
+def tools_with_quality(project_root: Path, state_manager: StateManager) -> WorkflowTools:
+    """Create a WorkflowTools instance with mock quality gate."""
+    return WorkflowTools(
+        project_root=project_root,
+        state_manager=state_manager,
+        quality_gate=MockQualityGate(should_pass=True),
+    )
+
+
+class TestWorkflowTools:
+    """Tests for WorkflowTools class."""
+
+    def test_init(self, tools: WorkflowTools, project_root: Path) -> None:
+        """Test WorkflowTools initialization."""
+        assert tools.project_root == project_root
+        assert tools.jobs_dir == project_root / ".deepwork" / "jobs"
+
+    def test_get_workflows(self, tools: WorkflowTools) -> None:
+        """Test getting all workflows."""
+        response = tools.get_workflows()
+
+        assert len(response.jobs) == 1
+        job = response.jobs[0]
+
+        assert job.name == "test_job"
+        assert job.summary == "A test job"
+        assert len(job.workflows) == 1
+        assert job.workflows[0].name == "main"
+        assert job.workflows[0].summary == "Main workflow"
+
+    def test_get_workflows_empty(self, tmp_path: Path) -> None:
+        """Test getting workflows when no jobs exist."""
+        deepwork_dir = tmp_path / ".deepwork"
+        deepwork_dir.mkdir()
+        (deepwork_dir / "tmp").mkdir()
+
+        state_manager = StateManager(tmp_path)
+        tools = WorkflowTools(
+            project_root=tmp_path,
+            state_manager=state_manager,
+        )
+
+        response = tools.get_workflows()
+
+        assert len(response.jobs) == 0
+
+    async def test_start_workflow(self, tools: WorkflowTools) -> None:
+        """Test starting a workflow."""
+        input_data = StartWorkflowInput(
+            goal="Complete the test job",
+            job_name="test_job",
+            workflow_name="main",
+            instance_id="test-instance",
+        )
+
+        response = await tools.start_workflow(input_data)
+
+        assert response.begin_step.session_id is not None
+        assert "test-instance" in response.begin_step.branch_name
+        assert response.begin_step.step_id == "step1"
+        assert "Step 1" in response.begin_step.step_instructions
+        outputs = response.begin_step.step_expected_outputs
+        assert len(outputs) == 1
+        assert outputs[0].name == "output1.md"
+        assert outputs[0].type == "file"
+        assert outputs[0].syntax_for_finished_step_tool == "filepath"
+        assert len(response.begin_step.step_reviews) == 1
+        assert response.begin_step.step_reviews[0].run_each == "step"
+        assert "Output Valid" in response.begin_step.step_reviews[0].quality_criteria
+
+    async def test_start_workflow_invalid_job(self, tools: WorkflowTools) -> None:
+        """Test starting workflow with invalid job."""
+        input_data = StartWorkflowInput(
+            goal="Complete task",
+            job_name="nonexistent",
+            workflow_name="main",
+        )
+
+        with pytest.raises(ToolError, match="Job not found"):
+            await tools.start_workflow(input_data)
+
+    async def test_start_workflow_auto_selects_single_workflow(self, tools: WorkflowTools) -> None:
+        """Test that a wrong workflow name auto-selects when job has one workflow."""
+        input_data = StartWorkflowInput(
+            goal="Complete task",
+            job_name="test_job",
+            workflow_name="nonexistent",
+        )
+
+        # Should succeed by auto-selecting the only workflow ("main")
+        response = await tools.start_workflow(input_data)
+        assert response.begin_step.step_id == "step1"
+
+    async def test_start_workflow_invalid_workflow_multiple(
+        self, project_root: Path, state_manager: StateManager
+    ) -> None:
+        """Test that a wrong workflow name errors when job has multiple workflows."""
+        # Create a job with two workflows
+        job_dir = project_root / ".deepwork" / "jobs" / "multi_wf_job"
+        job_dir.mkdir()
+        (job_dir / "job.yml").write_text(
+            """
+name: multi_wf_job
+version: "1.0.0"
+summary: A job with multiple workflows
+description: Test job with multiple workflows
+
+steps:
+  - id: step_a
+    name: Step A
+    description: Step A
+    instructions_file: steps/step_a.md
+    outputs:
+      output_a.md:
+        type: file
+        description: Step A output
+        required: true
+    reviews: []
+  - id: step_b
+    name: Step B
+    description: Step B
+    instructions_file: steps/step_b.md
+    outputs:
+      output_b.md:
+        type: file
+        description: Step B output
+        required: true
+    reviews: []
+
+workflows:
+  - name: alpha
+    summary: Alpha workflow
+    steps:
+      - step_a
+  - name: beta
+    summary: Beta workflow
+    steps:
+      - step_b
+"""
+        )
+        steps_dir = job_dir / "steps"
+        steps_dir.mkdir()
+        (steps_dir / "step_a.md").write_text("# Step A")
+        (steps_dir / "step_b.md").write_text("# Step B")
+
+        tools = WorkflowTools(project_root=project_root, state_manager=state_manager)
+        input_data = StartWorkflowInput(
+            goal="Complete task",
+            job_name="multi_wf_job",
+            workflow_name="nonexistent",
+        )
+
+        with pytest.raises(ToolError, match="Workflow.*not found.*alpha.*beta"):
+            await tools.start_workflow(input_data)
+
+    async def test_finished_step_no_session(self, tools: WorkflowTools) -> None:
+        """Test finished_step without active session."""
+        input_data = FinishedStepInput(outputs={"output1.md": "output1.md"})
+
+        with pytest.raises(StateError, match="No active workflow session"):
+            await tools.finished_step(input_data)
+
+    async def test_finished_step_advances_to_next(
+        self, tools: WorkflowTools, project_root: Path
+    ) -> None:
+        """Test finished_step advances to next step."""
+        # Start workflow first
+        start_input = StartWorkflowInput(
+            goal="Complete task",
+            job_name="test_job",
+            workflow_name="main",
+        )
+        await tools.start_workflow(start_input)
+
+        # Create output file
+        (project_root / "output1.md").write_text("Test output")
+
+        # Finish first step
+        finish_input = FinishedStepInput(
+            outputs={"output1.md": "output1.md"},
+            notes="Completed step 1",
+        )
+        response = await tools.finished_step(finish_input)
+
+        assert response.status == StepStatus.NEXT_STEP
+        assert response.begin_step is not None
+        assert response.begin_step.step_id == "step2"
+        assert response.begin_step.step_instructions is not None
+        assert "Step 2" in response.begin_step.step_instructions
+
+    async def test_finished_step_completes_workflow(
+        self, tools: WorkflowTools, project_root: Path
+    ) -> None:
+        """Test finished_step completes workflow on last step."""
+        # Start workflow
+        start_input = StartWorkflowInput(
+            goal="Complete task",
+            job_name="test_job",
+            workflow_name="main",
+        )
+        await tools.start_workflow(start_input)
+
+        # Complete first step
+        (project_root / "output1.md").write_text("Output 1")
+        await tools.finished_step(FinishedStepInput(outputs={"output1.md": "output1.md"}))
+
+        # Complete second (last) step
+        (project_root / "output2.md").write_text("Output 2")
+        response = await tools.finished_step(
+            FinishedStepInput(outputs={"output2.md": "output2.md"})
+        )
+
+        assert response.status == StepStatus.WORKFLOW_COMPLETE
+        assert response.summary is not None
+        assert "completed" in response.summary.lower()
+        assert response.all_outputs is not None
+        assert "output1.md" in response.all_outputs
+        assert "output2.md" in response.all_outputs
+
+    async def test_finished_step_with_quality_gate_pass(
+        self, tools_with_quality: WorkflowTools, project_root: Path
+    ) -> None:
+        """Test finished_step passes quality gate."""
+        # Start workflow
+        start_input = StartWorkflowInput(
+            goal="Complete task",
+            job_name="test_job",
+            workflow_name="main",
+        )
+        await tools_with_quality.start_workflow(start_input)
+
+        # Create output and finish step
+        (project_root / "output1.md").write_text("Valid output")
+        response = await tools_with_quality.finished_step(
+            FinishedStepInput(outputs={"output1.md": "output1.md"})
+        )
+
+        # Should advance to next step
+        assert response.status == StepStatus.NEXT_STEP
+
+    async def test_finished_step_with_quality_gate_fail(
+        self, project_root: Path, state_manager: StateManager
+    ) -> None:
+        """Test finished_step fails quality gate."""
+        # Create tools with failing quality gate
+        tools = WorkflowTools(
+            project_root=project_root,
+            state_manager=state_manager,
+            quality_gate=MockQualityGate(should_pass=False, feedback="Needs improvement"),
+        )
+
+        # Start workflow
+        start_input = StartWorkflowInput(
+            goal="Complete task",
+            job_name="test_job",
+            workflow_name="main",
+        )
+        await tools.start_workflow(start_input)
+
+        # Create output and finish step
+        (project_root / "output1.md").write_text("Invalid output")
+        response = await tools.finished_step(
+            FinishedStepInput(outputs={"output1.md": "output1.md"})
+        )
+
+        assert response.status == StepStatus.NEEDS_WORK
+        assert response.feedback == "Needs improvement"
+        assert response.failed_reviews is not None
+
+    async def test_finished_step_quality_gate_max_attempts(
+        self, project_root: Path, state_manager: StateManager
+    ) -> None:
+        """Test finished_step fails after max quality gate attempts."""
+        tools = WorkflowTools(
+            project_root=project_root,
+            state_manager=state_manager,
+            quality_gate=MockQualityGate(should_pass=False, feedback="Always fails"),
+        )
+
+        # Start workflow
+        start_input = StartWorkflowInput(
+            goal="Complete task",
+            job_name="test_job",
+            workflow_name="main",
+        )
+        await tools.start_workflow(start_input)
+
+        # Create output
+        (project_root / "output1.md").write_text("Bad output")
+
+        # Try multiple times (max is 3)
+        for _ in range(2):
+            response = await tools.finished_step(
+                FinishedStepInput(outputs={"output1.md": "output1.md"})
+            )
+            assert response.status == StepStatus.NEEDS_WORK
+
+        # Third attempt should raise error
+        with pytest.raises(ToolError, match="Quality gate failed after.*attempts"):
+            await tools.finished_step(FinishedStepInput(outputs={"output1.md": "output1.md"}))
+
+    async def test_finished_step_quality_gate_override(
+        self, project_root: Path, state_manager: StateManager
+    ) -> None:
+        """Test finished_step skips quality gate when override reason provided."""
+        # Create tools with failing quality gate
+        failing_gate = MockQualityGate(should_pass=False, feedback="Would fail")
+        tools = WorkflowTools(
+            project_root=project_root,
+            state_manager=state_manager,
+            quality_gate=failing_gate,
+        )
+
+        # Start workflow
+        start_input = StartWorkflowInput(
+            goal="Complete task",
+            job_name="test_job",
+            workflow_name="main",
+        )
+        await tools.start_workflow(start_input)
+
+        # Create output and finish step with override reason
+        (project_root / "output1.md").write_text("Output that would fail quality check")
+        response = await tools.finished_step(
+            FinishedStepInput(
+                outputs={"output1.md": "output1.md"},
+                quality_review_override_reason="Manual review completed offline",
+            )
+        )
+
+        # Should advance to next step despite failing quality gate config
+        assert response.status == StepStatus.NEXT_STEP
+        # Quality gate should not have been called
+        assert len(failing_gate.evaluations) == 0
+
+    async def test_finished_step_validates_unknown_output_keys(
+        self, tools: WorkflowTools, project_root: Path
+    ) -> None:
+        """Test finished_step rejects unknown output keys."""
+        start_input = StartWorkflowInput(
+            goal="Complete task",
+            job_name="test_job",
+            workflow_name="main",
+        )
+        await tools.start_workflow(start_input)
+
+        (project_root / "output1.md").write_text("content")
+        (project_root / "extra.md").write_text("content")
+
+        with pytest.raises(ToolError, match="Unknown output names.*extra.md"):
+            await tools.finished_step(
+                FinishedStepInput(outputs={"output1.md": "output1.md", "extra.md": "extra.md"})
+            )
+
+    async def test_finished_step_validates_missing_output_keys(
+        self, tools: WorkflowTools, project_root: Path
+    ) -> None:
+        """Test finished_step rejects when declared outputs are missing."""
+        start_input = StartWorkflowInput(
+            goal="Complete task",
+            job_name="test_job",
+            workflow_name="main",
+        )
+        await tools.start_workflow(start_input)
+
+        # Step1 declares output1.md, but we provide empty dict
+        with pytest.raises(ToolError, match="Missing required outputs.*output1.md"):
+            await tools.finished_step(FinishedStepInput(outputs={}))
+
+    async def test_finished_step_allows_omitting_optional_outputs(
+        self, project_root: Path, state_manager: StateManager
+    ) -> None:
+        """Test finished_step allows omitting outputs with required: false."""
+        job_dir = project_root / ".deepwork" / "jobs" / "optional_job"
+        job_dir.mkdir(parents=True)
+        (job_dir / "job.yml").write_text(
+            """
+name: optional_job
+version: "1.0.0"
+summary: Job with optional output
+description: Test job
+
+steps:
+  - id: produce
+    name: Produce
+    description: Produces outputs
+    instructions_file: steps/produce.md
+    outputs:
+      main_report.md:
+        type: file
+        description: The main report
+        required: true
+      supplementary.md:
+        type: file
+        description: Optional supplementary material
+        required: false
+      extra_files:
+        type: files
+        description: Optional extra files
+        required: false
+    reviews: []
+
+workflows:
+  - name: main
+    summary: Main workflow
+    steps:
+      - produce
+"""
+        )
+        steps_dir = job_dir / "steps"
+        steps_dir.mkdir()
+        (steps_dir / "produce.md").write_text("# Produce\n\nProduce outputs.")
+
+        tools = WorkflowTools(
+            project_root=project_root,
+            state_manager=state_manager,
+        )
+
+        await tools.start_workflow(
+            StartWorkflowInput(
+                goal="Produce outputs",
+                job_name="optional_job",
+                workflow_name="main",
+            )
+        )
+
+        # Only provide the required output, omit optional ones
+        (project_root / "main_report.md").write_text("Main report content")
+        response = await tools.finished_step(
+            FinishedStepInput(outputs={"main_report.md": "main_report.md"})
+        )
+
+        assert response.status == StepStatus.WORKFLOW_COMPLETE
+
+    async def test_finished_step_rejects_missing_required_but_not_optional(
+        self, project_root: Path, state_manager: StateManager
+    ) -> None:
+        """Test finished_step rejects missing required outputs even when optional ones exist."""
+        job_dir = project_root / ".deepwork" / "jobs" / "mixed_job"
+        job_dir.mkdir(parents=True)
+        (job_dir / "job.yml").write_text(
+            """
+name: mixed_job
+version: "1.0.0"
+summary: Job with mixed required/optional outputs
+description: Test job
+
+steps:
+  - id: produce
+    name: Produce
+    description: Produces outputs
+    instructions_file: steps/produce.md
+    outputs:
+      required_output.md:
+        type: file
+        description: Must be provided
+        required: true
+      optional_output.md:
+        type: file
+        description: Can be skipped
+        required: false
+    reviews: []
+
+workflows:
+  - name: main
+    summary: Main workflow
+    steps:
+      - produce
+"""
+        )
+        steps_dir = job_dir / "steps"
+        steps_dir.mkdir()
+        (steps_dir / "produce.md").write_text("# Produce\n\nProduce outputs.")
+
+        tools = WorkflowTools(
+            project_root=project_root,
+            state_manager=state_manager,
+        )
+
+        await tools.start_workflow(
+            StartWorkflowInput(
+                goal="Produce outputs",
+                job_name="mixed_job",
+                workflow_name="main",
+            )
+        )
+
+        # Provide only the optional output, not the required one
+        (project_root / "optional_output.md").write_text("Optional content")
+        with pytest.raises(ToolError, match="Missing required outputs.*required_output.md"):
+            await tools.finished_step(
+                FinishedStepInput(outputs={"optional_output.md": "optional_output.md"})
+            )
+
+    async def test_finished_step_accepts_optional_outputs_when_provided(
+        self, project_root: Path, state_manager: StateManager
+    ) -> None:
+        """Test finished_step validates optional outputs when they are provided."""
+        job_dir = project_root / ".deepwork" / "jobs" / "optional_provided_job"
+        job_dir.mkdir(parents=True)
+        (job_dir / "job.yml").write_text(
+            """
+name: optional_provided_job
+version: "1.0.0"
+summary: Job with optional output that gets provided
+description: Test job
+
+steps:
+  - id: produce
+    name: Produce
+    description: Produces outputs
+    instructions_file: steps/produce.md
+    outputs:
+      main.md:
+        type: file
+        description: Required output
+        required: true
+      bonus.md:
+        type: file
+        description: Optional output
+        required: false
+    reviews: []
+
+workflows:
+  - name: main
+    summary: Main workflow
+    steps:
+      - produce
+"""
+        )
+        steps_dir = job_dir / "steps"
+        steps_dir.mkdir()
+        (steps_dir / "produce.md").write_text("# Produce\n\nProduce outputs.")
+
+        tools = WorkflowTools(
+            project_root=project_root,
+            state_manager=state_manager,
+        )
+
+        await tools.start_workflow(
+            StartWorkflowInput(
+                goal="Produce outputs",
+                job_name="optional_provided_job",
+                workflow_name="main",
+            )
+        )
+
+        # Provide both required and optional
+        (project_root / "main.md").write_text("Main content")
+        (project_root / "bonus.md").write_text("Bonus content")
+        response = await tools.finished_step(
+            FinishedStepInput(outputs={"main.md": "main.md", "bonus.md": "bonus.md"})
+        )
+
+        assert response.status == StepStatus.WORKFLOW_COMPLETE
+
+    async def test_expected_outputs_include_required_field(
+        self, project_root: Path, state_manager: StateManager
+    ) -> None:
+        """Test that step_expected_outputs includes the required field."""
+        job_dir = project_root / ".deepwork" / "jobs" / "req_field_job"
+        job_dir.mkdir(parents=True)
+        (job_dir / "job.yml").write_text(
+            """
+name: req_field_job
+version: "1.0.0"
+summary: Job to test required field in expected outputs
+description: Test job
+
+steps:
+  - id: produce
+    name: Produce
+    description: Produces outputs
+    instructions_file: steps/produce.md
+    outputs:
+      required_out.md:
+        type: file
+        description: Required output
+        required: true
+      optional_out.md:
+        type: file
+        description: Optional output
+        required: false
+    reviews: []
+
+workflows:
+  - name: main
+    summary: Main workflow
+    steps:
+      - produce
+"""
+        )
+        steps_dir = job_dir / "steps"
+        steps_dir.mkdir()
+        (steps_dir / "produce.md").write_text("# Produce\n\nProduce outputs.")
+
+        tools = WorkflowTools(
+            project_root=project_root,
+            state_manager=state_manager,
+        )
+
+        response = await tools.start_workflow(
+            StartWorkflowInput(
+                goal="Produce outputs",
+                job_name="req_field_job",
+                workflow_name="main",
+            )
+        )
+
+        outputs = response.begin_step.step_expected_outputs
+        assert len(outputs) == 2
+
+        required_out = next(o for o in outputs if o.name == "required_out.md")
+        optional_out = next(o for o in outputs if o.name == "optional_out.md")
+
+        assert required_out.required is True
+        assert optional_out.required is False
+
+    async def test_finished_step_validates_file_type_must_be_string(
+        self, tools: WorkflowTools, project_root: Path
+    ) -> None:
+        """Test finished_step rejects list value for type: file output."""
+        start_input = StartWorkflowInput(
+            goal="Complete task",
+            job_name="test_job",
+            workflow_name="main",
+        )
+        await tools.start_workflow(start_input)
+
+        (project_root / "output1.md").write_text("content")
+
+        with pytest.raises(ToolError, match="type 'file'.*single string path"):
+            await tools.finished_step(FinishedStepInput(outputs={"output1.md": ["output1.md"]}))
+
+    async def test_finished_step_validates_file_existence(
+        self, tools: WorkflowTools, project_root: Path
+    ) -> None:
+        """Test finished_step rejects when file does not exist."""
+        start_input = StartWorkflowInput(
+            goal="Complete task",
+            job_name="test_job",
+            workflow_name="main",
+        )
+        await tools.start_workflow(start_input)
+
+        # Don't create the file
+        with pytest.raises(ToolError, match="file not found at.*nonexistent.md"):
+            await tools.finished_step(FinishedStepInput(outputs={"output1.md": "nonexistent.md"}))
+
+    async def test_finished_step_empty_outputs_for_step_with_no_outputs(
+        self, project_root: Path, state_manager: StateManager
+    ) -> None:
+        """Test that empty outputs {} works for steps declared with no outputs."""
+        # Create a job with a step that has no outputs
+        job_dir = project_root / ".deepwork" / "jobs" / "no_output_job"
+        job_dir.mkdir(parents=True)
+        (job_dir / "job.yml").write_text(
+            """
+name: no_output_job
+version: "1.0.0"
+summary: Job with no-output step
+description: Test job
+
+steps:
+  - id: cleanup
+    name: Cleanup
+    description: Cleanup step with no outputs
+    instructions_file: steps/cleanup.md
+    outputs: {}
+    reviews: []
+
+workflows:
+  - name: main
+    summary: Main workflow
+    steps:
+      - cleanup
+"""
+        )
+        steps_dir = job_dir / "steps"
+        steps_dir.mkdir()
+        (steps_dir / "cleanup.md").write_text("# Cleanup\n\nDo cleanup.")
+
+        tools = WorkflowTools(
+            project_root=project_root,
+            state_manager=state_manager,
+        )
+
+        start_input = StartWorkflowInput(
+            goal="Run cleanup",
+            job_name="no_output_job",
+            workflow_name="main",
+        )
+        await tools.start_workflow(start_input)
+
+        response = await tools.finished_step(FinishedStepInput(outputs={}))
+
+        assert response.status == StepStatus.WORKFLOW_COMPLETE
+
+    async def test_finished_step_validates_files_type_output(
+        self, project_root: Path, state_manager: StateManager
+    ) -> None:
+        """Test finished_step validation for type: files outputs."""
+        # Create a job with a files-type output
+        job_dir = project_root / ".deepwork" / "jobs" / "files_job"
+        job_dir.mkdir(parents=True)
+        (job_dir / "job.yml").write_text(
+            """
+name: files_job
+version: "1.0.0"
+summary: Job with files output
+description: Test job
+
+steps:
+  - id: generate
+    name: Generate
+    description: Generates multiple files
+    instructions_file: steps/generate.md
+    outputs:
+      reports:
+        type: files
+        description: Generated report files
+        required: true
+    reviews: []
+
+workflows:
+  - name: main
+    summary: Main workflow
+    steps:
+      - generate
+"""
+        )
+        steps_dir = job_dir / "steps"
+        steps_dir.mkdir()
+        (steps_dir / "generate.md").write_text("# Generate\n\nGenerate reports.")
+
+        tools = WorkflowTools(
+            project_root=project_root,
+            state_manager=state_manager,
+        )
+
+        start_input = StartWorkflowInput(
+            goal="Generate reports",
+            job_name="files_job",
+            workflow_name="main",
+        )
+        await tools.start_workflow(start_input)
+
+        # type: files requires a list, not a string
+        with pytest.raises(ToolError, match="type 'files'.*list of paths"):
+            await tools.finished_step(FinishedStepInput(outputs={"reports": "report1.md"}))
+
+    async def test_finished_step_validates_files_type_existence(
+        self, project_root: Path, state_manager: StateManager
+    ) -> None:
+        """Test finished_step validates file existence for type: files outputs."""
+        job_dir = project_root / ".deepwork" / "jobs" / "files_job2"
+        job_dir.mkdir(parents=True)
+        (job_dir / "job.yml").write_text(
+            """
+name: files_job2
+version: "1.0.0"
+summary: Job with files output
+description: Test job
+
+steps:
+  - id: generate
+    name: Generate
+    description: Generates multiple files
+    instructions_file: steps/generate.md
+    outputs:
+      reports:
+        type: files
+        description: Generated report files
+        required: true
+    reviews: []
+
+workflows:
+  - name: main
+    summary: Main workflow
+    steps:
+      - generate
+"""
+        )
+        steps_dir = job_dir / "steps"
+        steps_dir.mkdir()
+        (steps_dir / "generate.md").write_text("# Generate\n\nGenerate reports.")
+
+        tools = WorkflowTools(
+            project_root=project_root,
+            state_manager=state_manager,
+        )
+
+        start_input = StartWorkflowInput(
+            goal="Generate reports",
+            job_name="files_job2",
+            workflow_name="main",
+        )
+        await tools.start_workflow(start_input)
+
+        # Create one file but not the other
+        (project_root / "report1.md").write_text("Report 1")
+
+        with pytest.raises(ToolError, match="file not found at.*missing.md"):
+            await tools.finished_step(
+                FinishedStepInput(outputs={"reports": ["report1.md", "missing.md"]})
+            )
+
+    async def test_finished_step_files_type_success(
+        self, project_root: Path, state_manager: StateManager
+    ) -> None:
+        """Test finished_step succeeds with valid type: files outputs."""
+        job_dir = project_root / ".deepwork" / "jobs" / "files_job3"
+        job_dir.mkdir(parents=True)
+        (job_dir / "job.yml").write_text(
+            """
+name: files_job3
+version: "1.0.0"
+summary: Job with files output
+description: Test job
+
+steps:
+  - id: generate
+    name: Generate
+    description: Generates multiple files
+    instructions_file: steps/generate.md
+    outputs:
+      reports:
+        type: files
+        description: Generated report files
+        required: true
+    reviews: []
+
+workflows:
+  - name: main
+    summary: Main workflow
+    steps:
+      - generate
+"""
+        )
+        steps_dir = job_dir / "steps"
+        steps_dir.mkdir()
+        (steps_dir / "generate.md").write_text("# Generate\n\nGenerate reports.")
+
+        tools = WorkflowTools(
+            project_root=project_root,
+            state_manager=state_manager,
+        )
+
+        start_input = StartWorkflowInput(
+            goal="Generate reports",
+            job_name="files_job3",
+            workflow_name="main",
+        )
+        await tools.start_workflow(start_input)
+
+        (project_root / "report1.md").write_text("Report 1")
+        (project_root / "report2.md").write_text("Report 2")
+
+        response = await tools.finished_step(
+            FinishedStepInput(outputs={"reports": ["report1.md", "report2.md"]})
+        )
+
+        assert response.status == StepStatus.WORKFLOW_COMPLETE
+
+    async def test_quality_reviewer_receives_only_current_step_outputs(
+        self, project_root: Path, state_manager: StateManager
+    ) -> None:
+        """Test that quality reviewer receives ONLY the current step's outputs.
+
+        Prior step outputs are no longer auto-included as inputs.
+        """
+        # Create a 3-step job: step1 -> step2 -> step3
+        job_dir = project_root / ".deepwork" / "jobs" / "chain_job"
+        job_dir.mkdir(parents=True)
+        (job_dir / "job.yml").write_text(
+            """
+name: chain_job
+version: "1.0.0"
+summary: Three-step chain to test input filtering
+description: Test job
+
+steps:
+  - id: step1
+    name: Step 1
+    description: First step
+    instructions_file: steps/step1.md
+    outputs:
+      step1_output.md:
+        type: file
+        description: Step 1 output
+        required: true
+    reviews: []
+
+  - id: step2
+    name: Step 2
+    description: Second step - takes step1 output
+    instructions_file: steps/step2.md
+    inputs:
+      - file: step1_output.md
+        from_step: step1
+    outputs:
+      step2_output.md:
+        type: file
+        description: Step 2 output
+        required: true
+    dependencies:
+      - step1
+    reviews: []
+
+  - id: step3
+    name: Step 3
+    description: Third step - takes ONLY step2 output (not step1)
+    instructions_file: steps/step3.md
+    inputs:
+      - file: step2_output.md
+        from_step: step2
+    outputs:
+      step3_output.md:
+        type: file
+        description: Step 3 output
+        required: true
+    dependencies:
+      - step2
+    reviews:
+      - run_each: step
+        quality_criteria:
+          "Complete": "Is the output complete?"
+
+workflows:
+  - name: main
+    summary: Main workflow
+    steps:
+      - step1
+      - step2
+      - step3
+"""
+        )
+        steps_dir = job_dir / "steps"
+        steps_dir.mkdir()
+        (steps_dir / "step1.md").write_text("# Step 1\n\nProduce output.")
+        (steps_dir / "step2.md").write_text("# Step 2\n\nProduce output.")
+        (steps_dir / "step3.md").write_text("# Step 3\n\nProduce output.")
+
+        mock_gate = MockQualityGate(should_pass=True)
+        tools = WorkflowTools(
+            project_root=project_root,
+            state_manager=state_manager,
+            quality_gate=mock_gate,
+        )
+
+        # Start workflow
+        await tools.start_workflow(
+            StartWorkflowInput(
+                goal="Test input filtering",
+                job_name="chain_job",
+                workflow_name="main",
+            )
+        )
+
+        # Complete step1
+        (project_root / "step1_output.md").write_text("STEP1_CONTENT_MARKER")
+        await tools.finished_step(FinishedStepInput(outputs={"step1_output.md": "step1_output.md"}))
+
+        # Complete step2
+        (project_root / "step2_output.md").write_text("STEP2_CONTENT_MARKER")
+        await tools.finished_step(FinishedStepInput(outputs={"step2_output.md": "step2_output.md"}))
+
+        # Complete step3 — quality gate runs here
+        (project_root / "step3_output.md").write_text("STEP3_CONTENT_MARKER")
+        response = await tools.finished_step(
+            FinishedStepInput(outputs={"step3_output.md": "step3_output.md"})
+        )
+
+        assert response.status == StepStatus.WORKFLOW_COMPLETE
+
+        # Verify reviewer was called WITHOUT any prior step inputs
+        assert len(mock_gate.evaluations) == 1
+        evaluation = mock_gate.evaluations[0]
+
+        # Should only have the current step's outputs, not inputs from prior steps
+        assert "step3_output.md" in evaluation["outputs"]
+        assert "inputs" not in evaluation, (
+            "Quality reviewer should not receive 'inputs' key — "
+            "prior step outputs are no longer auto-included"
+        )
+
+    async def test_additional_review_guidance_reaches_reviewer(
+        self, project_root: Path, state_manager: StateManager
+    ) -> None:
+        """Test that additional_review_guidance from job.yml is passed to the reviewer."""
+        job_dir = project_root / ".deepwork" / "jobs" / "guided_job"
+        job_dir.mkdir(parents=True)
+        (job_dir / "job.yml").write_text(
+            """
+name: guided_job
+version: "1.0.0"
+summary: Job with review guidance
+description: Test job
+
+steps:
+  - id: write
+    name: Write Report
+    description: Write a report
+    instructions_file: steps/write.md
+    outputs:
+      report.md:
+        type: file
+        description: The report
+        required: true
+    reviews:
+      - run_each: report.md
+        additional_review_guidance: "Read the project README for context on expected format."
+        quality_criteria:
+          "Format Correct": "Does the report follow the expected format?"
+
+workflows:
+  - name: main
+    summary: Main workflow
+    steps:
+      - write
+"""
+        )
+        steps_dir = job_dir / "steps"
+        steps_dir.mkdir()
+        (steps_dir / "write.md").write_text("# Write\n\nWrite the report.")
+
+        mock_gate = MockQualityGate(should_pass=True)
+        tools = WorkflowTools(
+            project_root=project_root,
+            state_manager=state_manager,
+            quality_gate=mock_gate,
+        )
+
+        await tools.start_workflow(
+            StartWorkflowInput(
+                goal="Write report",
+                job_name="guided_job",
+                workflow_name="main",
+            )
+        )
+
+        (project_root / "report.md").write_text("Report content")
+        response = await tools.finished_step(FinishedStepInput(outputs={"report.md": "report.md"}))
+
+        assert response.status == StepStatus.WORKFLOW_COMPLETE
+        assert len(mock_gate.evaluations) == 1
+        assert mock_gate.evaluations[0]["additional_review_guidance"] == (
+            "Read the project README for context on expected format."
+        )
+
+    async def test_review_guidance_in_start_workflow_response(
+        self, project_root: Path, state_manager: StateManager
+    ) -> None:
+        """Test that ReviewInfo in start_workflow response includes guidance."""
+        job_dir = project_root / ".deepwork" / "jobs" / "guided_job2"
+        job_dir.mkdir(parents=True)
+        (job_dir / "job.yml").write_text(
+            """
+name: guided_job2
+version: "1.0.0"
+summary: Job with review guidance
+description: Test job
+
+steps:
+  - id: analyze
+    name: Analyze
+    description: Analyze data
+    instructions_file: steps/analyze.md
+    outputs:
+      analysis.md:
+        type: file
+        description: Analysis output
+        required: true
+    reviews:
+      - run_each: step
+        additional_review_guidance: "Check the raw data directory for completeness."
+        quality_criteria:
+          "Thorough": "Is the analysis thorough?"
+
+workflows:
+  - name: main
+    summary: Main workflow
+    steps:
+      - analyze
+"""
+        )
+        steps_dir = job_dir / "steps"
+        steps_dir.mkdir()
+        (steps_dir / "analyze.md").write_text("# Analyze\n\nAnalyze the data.")
+
+        tools = WorkflowTools(
+            project_root=project_root,
+            state_manager=state_manager,
+        )
+
+        response = await tools.start_workflow(
+            StartWorkflowInput(
+                goal="Analyze data",
+                job_name="guided_job2",
+                workflow_name="main",
+            )
+        )
+
+        reviews = response.begin_step.step_reviews
+        assert len(reviews) == 1
+        assert reviews[0].additional_review_guidance == (
+            "Check the raw data directory for completeness."
+        )
+
+
+class TestSessionIdRouting:
+    """Tests for session_id routing in WorkflowTools."""
+
+    @pytest.fixture
+    def project_root(self, tmp_path: Path) -> Path:
+        """Create a temporary project with two test jobs."""
+        deepwork_dir = tmp_path / ".deepwork"
+        deepwork_dir.mkdir()
+        (deepwork_dir / "tmp").mkdir()
+        jobs_dir = deepwork_dir / "jobs"
+        jobs_dir.mkdir()
+
+        # Create job_a with two steps
+        job_a_dir = jobs_dir / "job_a"
+        job_a_dir.mkdir()
+        (job_a_dir / "job.yml").write_text(
+            """
+name: job_a
+version: "1.0.0"
+summary: Job A
+description: Test job A
+
+steps:
+  - id: a_step1
+    name: A Step 1
+    description: First step of A
+    instructions_file: steps/a_step1.md
+    outputs:
+      a_out1.md:
+        type: file
+        description: A step 1 output
+        required: true
+    reviews: []
+  - id: a_step2
+    name: A Step 2
+    description: Second step of A
+    instructions_file: steps/a_step2.md
+    outputs:
+      a_out2.md:
+        type: file
+        description: A step 2 output
+        required: true
+    reviews: []
+
+workflows:
+  - name: main
+    summary: Main workflow
+    steps:
+      - a_step1
+      - a_step2
+"""
+        )
+        a_steps = job_a_dir / "steps"
+        a_steps.mkdir()
+        (a_steps / "a_step1.md").write_text("# A Step 1\n\nDo A step 1.")
+        (a_steps / "a_step2.md").write_text("# A Step 2\n\nDo A step 2.")
+
+        # Create job_b with one step
+        job_b_dir = jobs_dir / "job_b"
+        job_b_dir.mkdir()
+        (job_b_dir / "job.yml").write_text(
+            """
+name: job_b
+version: "1.0.0"
+summary: Job B
+description: Test job B
+
+steps:
+  - id: b_step1
+    name: B Step 1
+    description: First step of B
+    instructions_file: steps/b_step1.md
+    outputs:
+      b_out1.md:
+        type: file
+        description: B step 1 output
+        required: true
+    reviews: []
+
+workflows:
+  - name: main
+    summary: Main workflow
+    steps:
+      - b_step1
+"""
+        )
+        b_steps = job_b_dir / "steps"
+        b_steps.mkdir()
+        (b_steps / "b_step1.md").write_text("# B Step 1\n\nDo B step 1.")
+
+        return tmp_path
+
+    @pytest.fixture
+    def state_manager(self, project_root: Path) -> StateManager:
+        return StateManager(project_root)
+
+    @pytest.fixture
+    def tools(self, project_root: Path, state_manager: StateManager) -> WorkflowTools:
+        return WorkflowTools(project_root=project_root, state_manager=state_manager)
+
+    async def test_finished_step_with_session_id_not_on_top(
+        self, tools: WorkflowTools, project_root: Path
+    ) -> None:
+        """Test finished_step targets a non-top session when session_id is provided."""
+        # Start two workflows — session_a is below session_b on the stack
+        resp_a = await tools.start_workflow(
+            StartWorkflowInput(goal="Do A", job_name="job_a", workflow_name="main")
+        )
+        session_a_id = resp_a.begin_step.session_id
+
+        resp_b = await tools.start_workflow(
+            StartWorkflowInput(goal="Do B", job_name="job_b", workflow_name="main")
+        )
+        session_b_id = resp_b.begin_step.session_id
+
+        assert tools.state_manager.get_stack_depth() == 2
+
+        # Create output files for job_a's first step
+        (project_root / "a_out1.md").write_text("A output 1")
+
+        # Finish step on session_a (NOT on top) using session_id
+        response = await tools.finished_step(
+            FinishedStepInput(
+                outputs={"a_out1.md": "a_out1.md"},
+                session_id=session_a_id,
+            )
+        )
+
+        # Should advance to next step in job_a
+        assert response.status == StepStatus.NEXT_STEP
+        assert response.begin_step is not None
+        assert response.begin_step.step_id == "a_step2"
+        assert response.begin_step.session_id == session_a_id
+
+        # Session B should still be on top and untouched
+        top_session = tools.state_manager.get_active_session()
+        assert top_session is not None
+        assert top_session.session_id == session_b_id
+        assert top_session.current_step_id == "b_step1"
+
+    async def test_abort_workflow_with_session_id(
+        self, tools: WorkflowTools, project_root: Path
+    ) -> None:
+        """Test abort_workflow targets a specific session by session_id."""
+        # Start two workflows
+        resp_a = await tools.start_workflow(
+            StartWorkflowInput(goal="Do A", job_name="job_a", workflow_name="main")
+        )
+        session_a_id = resp_a.begin_step.session_id
+
+        resp_b = await tools.start_workflow(
+            StartWorkflowInput(goal="Do B", job_name="job_b", workflow_name="main")
+        )
+        session_b_id = resp_b.begin_step.session_id
+
+        # Abort session_a (not on top) by ID
+        response = await tools.abort_workflow(
+            AbortWorkflowInput(
+                explanation="Aborting A",
+                session_id=session_a_id,
+            )
+        )
+
+        assert response.aborted_workflow == "job_a/main"
+        assert response.explanation == "Aborting A"
+
+        # Stack should only have session_b now
+        assert tools.state_manager.get_stack_depth() == 1
+        assert tools.state_manager.get_active_session() is not None
+        assert tools.state_manager.get_active_session().session_id == session_b_id
diff --git a/tests/unit/test_adapters.py b/tests/unit/test_adapters.py
deleted file mode 100644
index 18e8d6d7..00000000
--- a/tests/unit/test_adapters.py
+++ /dev/null
@@ -1,529 +0,0 @@
-"""Tests for agent adapters."""
-
-import json
-from pathlib import Path
-from typing import Any
-
-import pytest
-
-from deepwork.core.adapters import (
-    AdapterError,
-    AgentAdapter,
-    ClaudeAdapter,
-    GeminiAdapter,
-    SkillLifecycleHook,
-)
-
-
-def _get_claude_required_permissions() -> list[str]:
-    """Load the required permissions from the Claude settings template."""
-    settings_path = (
-        Path(__file__).parent.parent.parent
-        / "src"
-        / "deepwork"
-        / "templates"
-        / "claude"
-        / "settings.json"
-    )
-    with open(settings_path, encoding="utf-8") as f:
-        settings = json.load(f)
-    permissions = settings["permissions"]["allow"]
-    # Sanity check: ensure the template file has meaningful content
-    assert len(permissions) > 2, "Settings template should have more than 2 permissions"
-    return permissions
-
-
-class TestAgentAdapterRegistry:
-    """Tests for AgentAdapter registry functionality."""
-
-    def test_get_all_returns_registered_adapters(self) -> None:
-        """Test that get_all returns all registered adapters."""
-        adapters = AgentAdapter.get_all()
-
-        assert "claude" in adapters
-        assert adapters["claude"] is ClaudeAdapter
-        assert "gemini" in adapters
-        assert adapters["gemini"] is GeminiAdapter
-
-    def test_get_returns_correct_adapter(self) -> None:
-        """Test that get returns the correct adapter class."""
-        assert AgentAdapter.get("claude") is ClaudeAdapter
-        assert AgentAdapter.get("gemini") is GeminiAdapter
-
-    def test_get_raises_for_unknown_adapter(self) -> None:
-        """Test that get raises AdapterError for unknown adapter."""
-        with pytest.raises(AdapterError, match="Unknown adapter 'unknown'"):
-            AgentAdapter.get("unknown")
-
-    def test_list_names_returns_all_names(self) -> None:
-        """Test that list_names returns all registered adapter names."""
-        names = AgentAdapter.list_names()
-
-        assert "claude" in names
-        assert "gemini" in names
-        assert len(names) >= 2  # At least claude and gemini
-
-
-class TestClaudeAdapter:
-    """Tests for ClaudeAdapter."""
-
-    def test_class_attributes(self) -> None:
-        """Test Claude adapter class attributes."""
-        assert ClaudeAdapter.name == "claude"
-        assert ClaudeAdapter.display_name == "Claude Code"
-        assert ClaudeAdapter.config_dir == ".claude"
-        assert ClaudeAdapter.skills_dir == "skills"
-
-    def test_init_with_project_root(self, temp_dir: Path) -> None:
-        """Test initialization with project root."""
-        adapter = ClaudeAdapter(temp_dir)
-
-        assert adapter.project_root == temp_dir
-
-    def test_init_without_project_root(self) -> None:
-        """Test initialization without project root."""
-        adapter = ClaudeAdapter()
-
-        assert adapter.project_root is None
-
-    def test_detect_when_present(self, temp_dir: Path) -> None:
-        """Test detect when .claude directory exists."""
-        (temp_dir / ".claude").mkdir()
-        adapter = ClaudeAdapter(temp_dir)
-
-        assert adapter.detect() is True
-
-    def test_detect_when_absent(self, temp_dir: Path) -> None:
-        """Test detect when .claude directory doesn't exist."""
-        adapter = ClaudeAdapter(temp_dir)
-
-        assert adapter.detect() is False
-
-    def test_detect_with_explicit_project_root(self, temp_dir: Path) -> None:
-        """Test detect with explicit project root parameter."""
-        (temp_dir / ".claude").mkdir()
-        adapter = ClaudeAdapter()
-
-        assert adapter.detect(temp_dir) is True
-
-    def test_get_template_dir(self, temp_dir: Path) -> None:
-        """Test get_template_dir."""
-        adapter = ClaudeAdapter()
-        templates_root = temp_dir / "templates"
-
-        result = adapter.get_template_dir(templates_root)
-
-        assert result == templates_root / "claude"
-
-    def test_get_skills_dir(self, temp_dir: Path) -> None:
-        """Test get_skills_dir."""
-        adapter = ClaudeAdapter(temp_dir)
-
-        result = adapter.get_skills_dir()
-
-        assert result == temp_dir / ".claude" / "skills"
-
-    def test_get_skills_dir_with_explicit_root(self, temp_dir: Path) -> None:
-        """Test get_skills_dir with explicit project root."""
-        adapter = ClaudeAdapter()
-
-        result = adapter.get_skills_dir(temp_dir)
-
-        assert result == temp_dir / ".claude" / "skills"
-
-    def test_get_skills_dir_raises_without_root(self) -> None:
-        """Test get_skills_dir raises when no project root specified."""
-        adapter = ClaudeAdapter()
-
-        with pytest.raises(AdapterError, match="No project root specified"):
-            adapter.get_skills_dir()
-
-    def test_get_meta_skill_filename(self) -> None:
-        """Test get_meta_skill_filename returns directory/SKILL.md format."""
-        adapter = ClaudeAdapter()
-
-        result = adapter.get_meta_skill_filename("my_job")
-
-        assert result == "my_job/SKILL.md"
-
-    def test_get_step_skill_filename_returns_directory_format(self) -> None:
-        """Test get_step_skill_filename returns directory/SKILL.md format."""
-        adapter = ClaudeAdapter()
-
-        result = adapter.get_step_skill_filename("my_job", "step_one")
-
-        assert result == "my_job.step_one/SKILL.md"
-
-    def test_get_step_skill_filename_exposed(self) -> None:
-        """Test get_step_skill_filename with exposed=True (same format)."""
-        adapter = ClaudeAdapter()
-
-        result = adapter.get_step_skill_filename("my_job", "step_one", exposed=True)
-
-        assert result == "my_job.step_one/SKILL.md"
-
-    def test_sync_hooks_creates_settings_file(self, temp_dir: Path) -> None:
-        """Test sync_hooks creates settings.json when it doesn't exist."""
-        (temp_dir / ".claude").mkdir()
-        adapter = ClaudeAdapter(temp_dir)
-        hooks = {
-            "PreToolUse": [{"matcher": "", "hooks": [{"type": "command", "command": "test.sh"}]}]
-        }
-
-        count = adapter.sync_hooks(temp_dir, hooks)
-
-        assert count == 1
-        settings_file = temp_dir / ".claude" / "settings.json"
-        assert settings_file.exists()
-        settings = json.loads(settings_file.read_text())
-        assert "hooks" in settings
-        assert "PreToolUse" in settings["hooks"]
-
-    def test_sync_hooks_merges_with_existing(self, temp_dir: Path) -> None:
-        """Test sync_hooks merges with existing settings."""
-        claude_dir = temp_dir / ".claude"
-        claude_dir.mkdir()
-        settings_file = claude_dir / "settings.json"
-        settings_file.write_text(json.dumps({"existing_key": "value", "hooks": {}}))
-
-        adapter = ClaudeAdapter(temp_dir)
-        hooks = {
-            "PreToolUse": [{"matcher": "", "hooks": [{"type": "command", "command": "test.sh"}]}]
-        }
-
-        adapter.sync_hooks(temp_dir, hooks)
-
-        settings = json.loads(settings_file.read_text())
-        assert settings["existing_key"] == "value"
-        assert "PreToolUse" in settings["hooks"]
-
-    def test_sync_hooks_empty_hooks_returns_zero(self, temp_dir: Path) -> None:
-        """Test sync_hooks returns 0 for empty hooks."""
-        adapter = ClaudeAdapter(temp_dir)
-
-        count = adapter.sync_hooks(temp_dir, {})
-
-        assert count == 0
-
-    def test_sync_permissions_creates_settings_file(self, temp_dir: Path) -> None:
-        """Test sync_permissions creates settings.json when it doesn't exist."""
-        (temp_dir / ".claude").mkdir()
-        adapter = ClaudeAdapter(temp_dir)
-
-        count = adapter.sync_permissions(temp_dir)
-
-        expected_permissions = _get_claude_required_permissions()
-        assert count == len(expected_permissions)
-        settings_file = temp_dir / ".claude" / "settings.json"
-        assert settings_file.exists()
-        settings = json.loads(settings_file.read_text())
-        assert "permissions" in settings
-        assert "allow" in settings["permissions"]
-        for permission in expected_permissions:
-            assert permission in settings["permissions"]["allow"]
-
-    def test_sync_permissions_merges_with_existing(self, temp_dir: Path) -> None:
-        """Test sync_permissions merges with existing settings."""
-        claude_dir = temp_dir / ".claude"
-        claude_dir.mkdir()
-        settings_file = claude_dir / "settings.json"
-        settings_file.write_text(json.dumps({"permissions": {"allow": ["Bash(ls:*)"]}}))
-
-        adapter = ClaudeAdapter(temp_dir)
-        adapter.sync_permissions(temp_dir)
-
-        settings = json.loads(settings_file.read_text())
-        assert "Bash(ls:*)" in settings["permissions"]["allow"]
-        assert "Read(./.deepwork/**)" in settings["permissions"]["allow"]
-
-    def test_sync_permissions_idempotent(self, temp_dir: Path) -> None:
-        """Test sync_permissions is idempotent (doesn't duplicate permissions)."""
-        (temp_dir / ".claude").mkdir()
-        adapter = ClaudeAdapter(temp_dir)
-
-        expected_permissions = _get_claude_required_permissions()
-
-        # First call adds permissions
-        count1 = adapter.sync_permissions(temp_dir)
-        assert count1 == len(expected_permissions)
-
-        # Second call should add nothing
-        count2 = adapter.sync_permissions(temp_dir)
-        assert count2 == 0
-
-        # Verify no duplicates
-        settings_file = temp_dir / ".claude" / "settings.json"
-        settings = json.loads(settings_file.read_text())
-        allow_list = settings["permissions"]["allow"]
-        for permission in expected_permissions:
-            assert allow_list.count(permission) == 1
-
-    def test_add_permission_single(self, temp_dir: Path) -> None:
-        """Test add_permission adds a single permission."""
-        (temp_dir / ".claude").mkdir()
-        adapter = ClaudeAdapter(temp_dir)
-
-        result = adapter.add_permission(temp_dir, "Bash(custom:*)")
-
-        assert result is True
-        settings_file = temp_dir / ".claude" / "settings.json"
-        settings = json.loads(settings_file.read_text())
-        assert "Bash(custom:*)" in settings["permissions"]["allow"]
-
-    def test_add_permission_idempotent(self, temp_dir: Path) -> None:
-        """Test add_permission doesn't duplicate existing permissions."""
-        (temp_dir / ".claude").mkdir()
-        adapter = ClaudeAdapter(temp_dir)
-
-        # First call adds
-        result1 = adapter.add_permission(temp_dir, "Bash(custom:*)")
-        assert result1 is True
-
-        # Second call should return False
-        result2 = adapter.add_permission(temp_dir, "Bash(custom:*)")
-        assert result2 is False
-
-        # Verify no duplicates
-        settings_file = temp_dir / ".claude" / "settings.json"
-        settings = json.loads(settings_file.read_text())
-        assert settings["permissions"]["allow"].count("Bash(custom:*)") == 1
-
-    def test_add_permission_with_settings_dict(self, temp_dir: Path) -> None:
-        """Test add_permission with pre-loaded settings (doesn't save)."""
-        (temp_dir / ".claude").mkdir()
-        adapter = ClaudeAdapter(temp_dir)
-        settings: dict[str, Any] = {"permissions": {"allow": []}}
-
-        result = adapter.add_permission(temp_dir, "Bash(test:*)", settings)
-
-        assert result is True
-        assert "Bash(test:*)" in settings["permissions"]["allow"]
-        # File should not exist since we passed settings dict
-        settings_file = temp_dir / ".claude" / "settings.json"
-        assert not settings_file.exists()
-
-    def test_extract_skill_name_from_path(self, temp_dir: Path) -> None:
-        """Test _extract_skill_name extracts skill name from skill path."""
-        adapter = ClaudeAdapter(temp_dir)
-
-        # Test meta-skill path
-        path1 = temp_dir / ".claude" / "skills" / "my_job" / "SKILL.md"
-        assert adapter._extract_skill_name(path1) == "my_job"
-
-        # Test step skill path
-        path2 = temp_dir / ".claude" / "skills" / "my_job.step_one" / "SKILL.md"
-        assert adapter._extract_skill_name(path2) == "my_job.step_one"
-
-    def test_extract_skill_name_returns_none_for_invalid_path(self, temp_dir: Path) -> None:
-        """Test _extract_skill_name returns None for paths without skills dir."""
-        adapter = ClaudeAdapter(temp_dir)
-
-        path = temp_dir / ".claude" / "commands" / "my_command.md"
-        assert adapter._extract_skill_name(path) is None
-
-    def test_add_skill_permissions(self, temp_dir: Path) -> None:
-        """Test add_skill_permissions adds Skill permissions for each skill."""
-        (temp_dir / ".claude").mkdir()
-        adapter = ClaudeAdapter(temp_dir)
-
-        skill_paths = [
-            temp_dir / ".claude" / "skills" / "job_a" / "SKILL.md",
-            temp_dir / ".claude" / "skills" / "job_a.step_one" / "SKILL.md",
-            temp_dir / ".claude" / "skills" / "job_b" / "SKILL.md",
-        ]
-
-        count = adapter.add_skill_permissions(temp_dir, skill_paths)
-
-        assert count == 3
-        settings_file = temp_dir / ".claude" / "settings.json"
-        settings = json.loads(settings_file.read_text())
-        assert "Skill(job_a)" in settings["permissions"]["allow"]
-        assert "Skill(job_a.step_one)" in settings["permissions"]["allow"]
-        assert "Skill(job_b)" in settings["permissions"]["allow"]
-
-    def test_add_skill_permissions_idempotent(self, temp_dir: Path) -> None:
-        """Test add_skill_permissions doesn't duplicate permissions."""
-        (temp_dir / ".claude").mkdir()
-        adapter = ClaudeAdapter(temp_dir)
-
-        skill_paths = [temp_dir / ".claude" / "skills" / "my_job" / "SKILL.md"]
-
-        # First call adds
-        count1 = adapter.add_skill_permissions(temp_dir, skill_paths)
-        assert count1 == 1
-
-        # Second call should add nothing
-        count2 = adapter.add_skill_permissions(temp_dir, skill_paths)
-        assert count2 == 0
-
-    def test_add_skill_permissions_empty_list(self, temp_dir: Path) -> None:
-        """Test add_skill_permissions with empty list returns 0."""
-        adapter = ClaudeAdapter(temp_dir)
-
-        count = adapter.add_skill_permissions(temp_dir, [])
-
-        assert count == 0
-
-
-class TestGeminiAdapter:
-    """Tests for GeminiAdapter."""
-
-    def test_class_attributes(self) -> None:
-        """Test Gemini adapter class attributes."""
-        assert GeminiAdapter.name == "gemini"
-        assert GeminiAdapter.display_name == "Gemini CLI"
-        assert GeminiAdapter.config_dir == ".gemini"
-        assert GeminiAdapter.skills_dir == "skills"
-        assert GeminiAdapter.skill_template == "skill-job-step.toml.jinja"
-
-    def test_init_with_project_root(self, temp_dir: Path) -> None:
-        """Test initialization with project root."""
-        adapter = GeminiAdapter(temp_dir)
-
-        assert adapter.project_root == temp_dir
-
-    def test_init_without_project_root(self) -> None:
-        """Test initialization without project root."""
-        adapter = GeminiAdapter()
-
-        assert adapter.project_root is None
-
-    def test_detect_when_present(self, temp_dir: Path) -> None:
-        """Test detect when .gemini directory exists."""
-        (temp_dir / ".gemini").mkdir()
-        adapter = GeminiAdapter(temp_dir)
-
-        assert adapter.detect() is True
-
-    def test_detect_when_absent(self, temp_dir: Path) -> None:
-        """Test detect when .gemini directory doesn't exist."""
-        adapter = GeminiAdapter(temp_dir)
-
-        assert adapter.detect() is False
-
-    def test_detect_with_explicit_project_root(self, temp_dir: Path) -> None:
-        """Test detect with explicit project root parameter."""
-        (temp_dir / ".gemini").mkdir()
-        adapter = GeminiAdapter()
-
-        assert adapter.detect(temp_dir) is True
-
-    def test_get_template_dir(self, temp_dir: Path) -> None:
-        """Test get_template_dir."""
-        adapter = GeminiAdapter()
-        templates_root = temp_dir / "templates"
-
-        result = adapter.get_template_dir(templates_root)
-
-        assert result == templates_root / "gemini"
-
-    def test_get_skills_dir(self, temp_dir: Path) -> None:
-        """Test get_skills_dir."""
-        adapter = GeminiAdapter(temp_dir)
-
-        result = adapter.get_skills_dir()
-
-        assert result == temp_dir / ".gemini" / "skills"
-
-    def test_get_skills_dir_with_explicit_root(self, temp_dir: Path) -> None:
-        """Test get_skills_dir with explicit project root."""
-        adapter = GeminiAdapter()
-
-        result = adapter.get_skills_dir(temp_dir)
-
-        assert result == temp_dir / ".gemini" / "skills"
-
-    def test_get_skills_dir_raises_without_root(self) -> None:
-        """Test get_skills_dir raises when no project root specified."""
-        adapter = GeminiAdapter()
-
-        with pytest.raises(AdapterError, match="No project root specified"):
-            adapter.get_skills_dir()
-
-    def test_get_meta_skill_filename(self) -> None:
-        """Test get_meta_skill_filename returns index.toml in subdirectory."""
-        adapter = GeminiAdapter()
-
-        result = adapter.get_meta_skill_filename("my_job")
-
-        # Gemini uses subdirectories with index.toml for meta-skills
-        assert result == "my_job/index.toml"
-
-    def test_get_step_skill_filename_returns_clean_name(self) -> None:
-        """Test get_step_skill_filename returns clean TOML with subdirectory."""
-        adapter = GeminiAdapter()
-
-        result = adapter.get_step_skill_filename("my_job", "step_one")
-
-        # Gemini uses subdirectories for namespacing (colon becomes path)
-        # No prefix on skill filenames
-        assert result == "my_job/step_one.toml"
-
-    def test_get_step_skill_filename_exposed(self) -> None:
-        """Test get_step_skill_filename with exposed=True (same result, no prefix)."""
-        adapter = GeminiAdapter()
-
-        result = adapter.get_step_skill_filename("my_job", "step_one", exposed=True)
-
-        # Same filename whether exposed or not
-        assert result == "my_job/step_one.toml"
-
-    def test_get_step_skill_filename_with_underscores(self) -> None:
-        """Test get_step_skill_filename with underscores in names."""
-        adapter = GeminiAdapter()
-
-        result = adapter.get_step_skill_filename("competitive_research", "identify_competitors")
-
-        assert result == "competitive_research/identify_competitors.toml"
-
-    def test_hook_name_mapping_is_empty(self) -> None:
-        """Test that Gemini has no skill-level hooks."""
-        assert GeminiAdapter.hook_name_mapping == {}
-
-    def test_supports_hook_returns_false_for_all_hooks(self) -> None:
-        """Test that Gemini doesn't support any skill-level hooks."""
-        adapter = GeminiAdapter()
-
-        for hook in SkillLifecycleHook:
-            assert adapter.supports_hook(hook) is False
-
-    def test_get_platform_hook_name_returns_none(self) -> None:
-        """Test that get_platform_hook_name returns None for all hooks."""
-        adapter = GeminiAdapter()
-
-        for hook in SkillLifecycleHook:
-            assert adapter.get_platform_hook_name(hook) is None
-
-    def test_sync_hooks_returns_zero(self, temp_dir: Path) -> None:
-        """Test sync_hooks always returns 0 (no hook support)."""
-        (temp_dir / ".gemini").mkdir()
-        adapter = GeminiAdapter(temp_dir)
-        hooks = {
-            "SomeEvent": [{"matcher": "", "hooks": [{"type": "command", "command": "test.sh"}]}]
-        }
-
-        count = adapter.sync_hooks(temp_dir, hooks)
-
-        assert count == 0
-
-    def test_sync_hooks_empty_hooks_returns_zero(self, temp_dir: Path) -> None:
-        """Test sync_hooks returns 0 for empty hooks."""
-        adapter = GeminiAdapter(temp_dir)
-
-        count = adapter.sync_hooks(temp_dir, {})
-
-        assert count == 0
-
-    def test_sync_hooks_does_not_create_settings_file(self, temp_dir: Path) -> None:
-        """Test that sync_hooks doesn't create settings.json (unlike Claude)."""
-        gemini_dir = temp_dir / ".gemini"
-        gemini_dir.mkdir()
-        adapter = GeminiAdapter(temp_dir)
-        hooks = {
-            "AfterAgent": [{"matcher": "", "hooks": [{"type": "command", "command": "test.sh"}]}]
-        }
-
-        adapter.sync_hooks(temp_dir, hooks)
-
-        settings_file = gemini_dir / "settings.json"
-        assert not settings_file.exists()
diff --git a/tests/unit/test_command_executor.py b/tests/unit/test_command_executor.py
deleted file mode 100644
index 12472729..00000000
--- a/tests/unit/test_command_executor.py
+++ /dev/null
@@ -1,264 +0,0 @@
-"""Tests for command executor (CMD-5.x from test_scenarios.md)."""
-
-from pathlib import Path
-
-from deepwork.core.command_executor import (
-    CommandResult,
-    all_commands_succeeded,
-    execute_command,
-    format_command_errors,
-    run_command_action,
-    substitute_command_variables,
-)
-from deepwork.core.rules_parser import CommandAction
-
-
-class TestSubstituteCommandVariables:
-    """Tests for command variable substitution."""
-
-    def test_single_file_substitution(self) -> None:
-        """Substitute {file} variable."""
-        result = substitute_command_variables(
-            "ruff format {file}",
-            file="src/main.py",
-        )
-        assert result == "ruff format src/main.py"
-
-    def test_multiple_files_substitution(self) -> None:
-        """Substitute {files} variable."""
-        result = substitute_command_variables(
-            "eslint --fix {files}",
-            files=["a.js", "b.js", "c.js"],
-        )
-        assert result == "eslint --fix a.js b.js c.js"
-
-    def test_repo_root_substitution(self) -> None:
-        """Substitute {repo_root} variable."""
-        result = substitute_command_variables(
-            "cd {repo_root} && pytest",
-            repo_root=Path("/home/user/project"),
-        )
-        assert result == "cd /home/user/project && pytest"
-
-    def test_all_variables(self) -> None:
-        """Substitute all variables together."""
-        result = substitute_command_variables(
-            "{repo_root}/scripts/process.sh {file} {files}",
-            file="main.py",
-            files=["a.py", "b.py"],
-            repo_root=Path("/project"),
-        )
-        assert result == "/project/scripts/process.sh main.py a.py b.py"
-
-
-class TestExecuteCommand:
-    """Tests for command execution."""
-
-    def test_successful_command(self) -> None:
-        """CMD-5.3.1: Exit code 0 - success."""
-        result = execute_command("echo hello")
-        assert result.success is True
-        assert result.exit_code == 0
-        assert "hello" in result.stdout
-
-    def test_failed_command(self) -> None:
-        """CMD-5.3.2: Exit code 1 - failure."""
-        result = execute_command("exit 1")
-        assert result.success is False
-        assert result.exit_code == 1
-
-    def test_command_timeout(self) -> None:
-        """CMD-5.3.3: Command timeout."""
-        result = execute_command("sleep 10", timeout=1)
-        assert result.success is False
-        assert "timed out" in result.stderr.lower()
-
-    def test_command_not_found(self) -> None:
-        """CMD-5.3.4: Command not found."""
-        result = execute_command("nonexistent_command_12345")
-        assert result.success is False
-        # Different systems return different error messages
-        assert result.exit_code != 0 or "not found" in result.stderr.lower()
-
-
-class TestRunCommandActionEachMatch:
-    """Tests for run_for: each_match mode (CMD-5.1.x)."""
-
-    def test_single_file(self) -> None:
-        """CMD-5.1.1: Single file triggers single command."""
-        action = CommandAction(command="echo {file}", run_for="each_match")
-        results = run_command_action(action, ["src/main.py"])
-
-        assert len(results) == 1
-        assert results[0].command == "echo src/main.py"
-        assert results[0].success is True
-
-    def test_multiple_files(self) -> None:
-        """CMD-5.1.2: Multiple files trigger command for each."""
-        action = CommandAction(command="echo {file}", run_for="each_match")
-        results = run_command_action(action, ["src/a.py", "src/b.py"])
-
-        assert len(results) == 2
-        assert results[0].command == "echo src/a.py"
-        assert results[1].command == "echo src/b.py"
-
-    def test_no_files(self) -> None:
-        """CMD-5.1.3: No files - no command run."""
-        action = CommandAction(command="echo {file}", run_for="each_match")
-        results = run_command_action(action, [])
-
-        assert len(results) == 0
-
-
-class TestRunCommandActionAllMatches:
-    """Tests for run_for: all_matches mode (CMD-5.2.x)."""
-
-    def test_multiple_files_single_command(self) -> None:
-        """CMD-5.2.1: Multiple files in single command."""
-        action = CommandAction(command="echo {files}", run_for="all_matches")
-        results = run_command_action(action, ["a.js", "b.js", "c.js"])
-
-        assert len(results) == 1
-        assert results[0].command == "echo a.js b.js c.js"
-        assert results[0].success is True
-
-    def test_single_file_single_command(self) -> None:
-        """CMD-5.2.2: Single file in single command."""
-        action = CommandAction(command="echo {files}", run_for="all_matches")
-        results = run_command_action(action, ["a.js"])
-
-        assert len(results) == 1
-        assert results[0].command == "echo a.js"
-
-
-class TestAllCommandsSucceeded:
-    """Tests for all_commands_succeeded helper."""
-
-    def test_all_success(self) -> None:
-        """All commands succeeded."""
-        results = [
-            CommandResult(success=True, exit_code=0, stdout="ok", stderr="", command="echo 1"),
-            CommandResult(success=True, exit_code=0, stdout="ok", stderr="", command="echo 2"),
-        ]
-        assert all_commands_succeeded(results) is True
-
-    def test_one_failure(self) -> None:
-        """One command failed."""
-        results = [
-            CommandResult(success=True, exit_code=0, stdout="ok", stderr="", command="echo 1"),
-            CommandResult(success=False, exit_code=1, stdout="", stderr="error", command="exit 1"),
-        ]
-        assert all_commands_succeeded(results) is False
-
-    def test_empty_list(self) -> None:
-        """Empty list is considered success."""
-        assert all_commands_succeeded([]) is True
-
-
-class TestFormatCommandErrors:
-    """Tests for format_command_errors helper."""
-
-    def test_single_error(self) -> None:
-        """Format single error."""
-        results = [
-            CommandResult(
-                success=False,
-                exit_code=1,
-                stdout="",
-                stderr="Something went wrong",
-                command="failing_cmd",
-            ),
-        ]
-        output = format_command_errors(results)
-        assert "Command: failing_cmd" in output
-        assert "Something went wrong" in output
-        assert "Exit code: 1" in output
-
-    def test_multiple_errors(self) -> None:
-        """Format multiple errors."""
-        results = [
-            CommandResult(success=False, exit_code=1, stdout="", stderr="Error 1", command="cmd1"),
-            CommandResult(success=False, exit_code=2, stdout="", stderr="Error 2", command="cmd2"),
-        ]
-        output = format_command_errors(results)
-        assert "cmd1" in output
-        assert "Error 1" in output
-        assert "cmd2" in output
-        assert "Error 2" in output
-
-    def test_ignores_success(self) -> None:
-        """Ignore successful commands."""
-        results = [
-            CommandResult(success=True, exit_code=0, stdout="ok", stderr="", command="good_cmd"),
-            CommandResult(success=False, exit_code=1, stdout="", stderr="bad", command="bad_cmd"),
-        ]
-        output = format_command_errors(results)
-        assert "good_cmd" not in output
-        assert "bad_cmd" in output
-
-    def test_includes_rule_name(self) -> None:
-        """Include rule name when provided."""
-        results = [
-            CommandResult(
-                success=False,
-                exit_code=1,
-                stdout="",
-                stderr="Error output",
-                command="test_cmd",
-            ),
-        ]
-        output = format_command_errors(results, rule_name="My Test Rule")
-        assert "Rule: My Test Rule" in output
-        assert "Command: test_cmd" in output
-        assert "Exit code: 1" in output
-        assert "Stderr:\nError output" in output
-
-    def test_includes_stdout(self) -> None:
-        """Include stdout when present."""
-        results = [
-            CommandResult(
-                success=False,
-                exit_code=1,
-                stdout="Standard output here",
-                stderr="Standard error here",
-                command="test_cmd",
-            ),
-        ]
-        output = format_command_errors(results)
-        assert "Stdout:\nStandard output here" in output
-        assert "Stderr:\nStandard error here" in output
-
-    def test_shows_no_output_message(self) -> None:
-        """Show '(no output)' when no stdout or stderr."""
-        results = [
-            CommandResult(
-                success=False,
-                exit_code=42,
-                stdout="",
-                stderr="",
-                command="silent_cmd",
-            ),
-        ]
-        output = format_command_errors(results)
-        assert "Command: silent_cmd" in output
-        assert "Exit code: 42" in output
-        assert "(no output)" in output
-
-    def test_full_error_format(self) -> None:
-        """Test complete error format with all fields."""
-        results = [
-            CommandResult(
-                success=False,
-                exit_code=42,
-                stdout="stdout output",
-                stderr="stderr output",
-                command="echo test && exit 42",
-            ),
-        ]
-        output = format_command_errors(results, rule_name="Command Failure Rule")
-        # Verify all parts are present in the correct format
-        assert "Rule: Command Failure Rule" in output
-        assert "Command: echo test && exit 42" in output
-        assert "Exit code: 42" in output
-        assert "Stdout:\nstdout output" in output
-        assert "Stderr:\nstderr output" in output
diff --git a/tests/unit/test_detector.py b/tests/unit/test_detector.py
deleted file mode 100644
index 1e51a01b..00000000
--- a/tests/unit/test_detector.py
+++ /dev/null
@@ -1,98 +0,0 @@
-"""Tests for platform detector."""
-
-from pathlib import Path
-
-import pytest
-
-from deepwork.core.adapters import ClaudeAdapter
-from deepwork.core.detector import DetectorError, PlatformDetector
-
-
-class TestPlatformDetector:
-    """Tests for PlatformDetector class."""
-
-    def test_detect_claude_present(self, temp_dir: Path) -> None:
-        """Test detecting Claude when .claude directory exists."""
-        claude_dir = temp_dir / ".claude"
-        claude_dir.mkdir()
-
-        detector = PlatformDetector(temp_dir)
-        adapter = detector.detect_platform("claude")
-
-        assert adapter is not None
-        assert isinstance(adapter, ClaudeAdapter)
-        assert adapter.name == "claude"
-
-    def test_detect_claude_absent(self, temp_dir: Path) -> None:
-        """Test detecting Claude when .claude directory doesn't exist."""
-        detector = PlatformDetector(temp_dir)
-        adapter = detector.detect_platform("claude")
-
-        assert adapter is None
-
-    def test_detect_platform_raises_for_unknown(self, temp_dir: Path) -> None:
-        """Test that detecting unknown platform raises error."""
-        detector = PlatformDetector(temp_dir)
-
-        with pytest.raises(DetectorError, match="Unknown adapter"):
-            detector.detect_platform("unknown")
-
-    def test_detect_all_platforms_empty(self, temp_dir: Path) -> None:
-        """Test detecting all platforms when none are present."""
-        detector = PlatformDetector(temp_dir)
-        adapters = detector.detect_all_platforms()
-
-        assert adapters == []
-
-    def test_detect_all_platforms_claude_present(self, temp_dir: Path) -> None:
-        """Test detecting all platforms when Claude is present."""
-        (temp_dir / ".claude").mkdir()
-
-        detector = PlatformDetector(temp_dir)
-        adapters = detector.detect_all_platforms()
-
-        assert len(adapters) == 1
-        assert adapters[0].name == "claude"
-
-    def test_get_adapter(self, temp_dir: Path) -> None:
-        """Test getting adapter without checking availability."""
-        detector = PlatformDetector(temp_dir)
-        adapter = detector.get_adapter("claude")
-
-        assert isinstance(adapter, ClaudeAdapter)
-        assert adapter.name == "claude"
-        assert adapter.display_name == "Claude Code"
-
-    def test_get_adapter_raises_for_unknown(self, temp_dir: Path) -> None:
-        """Test that getting unknown adapter raises error."""
-        detector = PlatformDetector(temp_dir)
-
-        with pytest.raises(DetectorError, match="Unknown adapter"):
-            detector.get_adapter("unknown")
-
-    def test_list_supported_platforms(self) -> None:
-        """Test listing all supported platforms."""
-        platforms = PlatformDetector.list_supported_platforms()
-
-        assert "claude" in platforms
-        assert len(platforms) >= 1  # At least claude
-
-    def test_detect_ignores_files(self, temp_dir: Path) -> None:
-        """Test that detector ignores files with platform names."""
-        # Create a file instead of directory
-        (temp_dir / ".claude").write_text("not a directory")
-
-        detector = PlatformDetector(temp_dir)
-        adapter = detector.detect_platform("claude")
-
-        assert adapter is None
-
-    def test_detected_adapter_has_project_root(self, temp_dir: Path) -> None:
-        """Test that detected adapter has project_root set."""
-        (temp_dir / ".claude").mkdir()
-
-        detector = PlatformDetector(temp_dir)
-        adapter = detector.detect_platform("claude")
-
-        assert adapter is not None
-        assert adapter.project_root == temp_dir
diff --git a/tests/unit/test_generator.py b/tests/unit/test_generator.py
deleted file mode 100644
index dd90ba30..00000000
--- a/tests/unit/test_generator.py
+++ /dev/null
@@ -1,547 +0,0 @@
-"""Tests for skill generator."""
-
-from pathlib import Path
-
-import pytest
-
-from deepwork.core.adapters import ClaudeAdapter
-from deepwork.core.generator import GeneratorError, SkillGenerator
-from deepwork.core.parser import Step, parse_job_definition
-
-
-class TestSkillGenerator:
-    """Tests for SkillGenerator class."""
-
-    def test_init_default_templates_dir(self) -> None:
-        """Test initialization with default templates directory."""
-        generator = SkillGenerator()
-
-        assert generator.templates_dir.exists()
-        assert (generator.templates_dir / "claude").exists()
-
-    def test_init_custom_templates_dir(self, temp_dir: Path) -> None:
-        """Test initialization with custom templates directory."""
-        templates_dir = temp_dir / "templates"
-        templates_dir.mkdir()
-
-        generator = SkillGenerator(templates_dir)
-
-        assert generator.templates_dir == templates_dir
-
-    def test_init_raises_for_missing_templates_dir(self, temp_dir: Path) -> None:
-        """Test initialization raises error for missing templates directory."""
-        nonexistent = temp_dir / "nonexistent"
-
-        with pytest.raises(GeneratorError, match="Templates directory not found"):
-            SkillGenerator(nonexistent)
-
-    def test_generate_step_skill_simple_job(self, fixtures_dir: Path, temp_dir: Path) -> None:
-        """Test generating skill for simple job step."""
-        job_dir = fixtures_dir / "jobs" / "simple_job"
-        job = parse_job_definition(job_dir)
-
-        generator = SkillGenerator()
-        adapter = ClaudeAdapter()
-
-        skill_path = generator.generate_step_skill(job, job.steps[0], adapter, temp_dir)
-
-        assert skill_path.exists()
-        # Step skills use directory/SKILL.md format
-        assert skill_path.name == "SKILL.md"
-        assert skill_path.parent.name == "simple_job.single_step"
-
-        content = skill_path.read_text()
-        assert "# simple_job.single_step" in content
-        # Single step with no dependencies is treated as standalone
-        assert "Standalone skill" in content
-        assert "input_param" in content
-        assert "output.md" in content
-
-    def test_generate_step_skill_complex_job_first_step(
-        self, fixtures_dir: Path, temp_dir: Path
-    ) -> None:
-        """Test generating skill for first step of complex job."""
-        job_dir = fixtures_dir / "jobs" / "complex_job"
-        job = parse_job_definition(job_dir)
-
-        generator = SkillGenerator()
-        adapter = ClaudeAdapter()
-
-        skill_path = generator.generate_step_skill(job, job.steps[0], adapter, temp_dir)
-
-        content = skill_path.read_text()
-        assert "# competitive_research.identify_competitors" in content
-        assert "Step 1/4" in content
-        assert "market_segment" in content
-        assert "product_category" in content
-        # First step has no prerequisites
-        assert "## Prerequisites" not in content
-        # Has next step
-        assert "/competitive_research.primary_research" in content
-
-    def test_generate_step_skill_complex_job_middle_step(
-        self, fixtures_dir: Path, temp_dir: Path
-    ) -> None:
-        """Test generating skill for middle step with dependencies."""
-        job_dir = fixtures_dir / "jobs" / "complex_job"
-        job = parse_job_definition(job_dir)
-
-        generator = SkillGenerator()
-        adapter = ClaudeAdapter()
-
-        # Generate primary_research (step 2)
-        skill_path = generator.generate_step_skill(job, job.steps[1], adapter, temp_dir)
-
-        content = skill_path.read_text()
-        assert "# competitive_research.primary_research" in content
-        assert "Step 2/4" in content
-        # Has prerequisites
-        assert "## Prerequisites" in content
-        assert "/competitive_research.identify_competitors" in content
-        # Has file input
-        assert "competitors.md" in content
-        assert "from `identify_competitors`" in content
-        # Has next step
-        assert "/competitive_research.secondary_research" in content
-
-    def test_generate_step_skill_complex_job_final_step(
-        self, fixtures_dir: Path, temp_dir: Path
-    ) -> None:
-        """Test generating skill for final step."""
-        job_dir = fixtures_dir / "jobs" / "complex_job"
-        job = parse_job_definition(job_dir)
-
-        generator = SkillGenerator()
-        adapter = ClaudeAdapter()
-
-        # Generate comparative_report (step 4)
-        skill_path = generator.generate_step_skill(job, job.steps[3], adapter, temp_dir)
-
-        content = skill_path.read_text()
-        assert "# competitive_research.comparative_report" in content
-        assert "Step 4/4" in content
-        # Has prerequisites
-        assert "## Prerequisites" in content
-        # Has multiple file inputs
-        assert "primary_research.md" in content
-        assert "secondary_research.md" in content
-        # Final step - no next step
-        assert "**Workflow complete**" in content
-        assert "## Next Step" not in content
-
-    def test_generate_step_skill_raises_for_missing_step(
-        self, fixtures_dir: Path, temp_dir: Path
-    ) -> None:
-        """Test that generating skill for non-existent step raises error."""
-        job_dir = fixtures_dir / "jobs" / "simple_job"
-        job = parse_job_definition(job_dir)
-
-        generator = SkillGenerator()
-        adapter = ClaudeAdapter()
-
-        # Create a fake step not in the job
-
-        fake_step = Step(
-            id="fake",
-            name="Fake",
-            description="Fake",
-            instructions_file="steps/fake.md",
-            outputs=["fake.md"],
-        )
-
-        with pytest.raises(GeneratorError, match="Step 'fake' not found"):
-            generator.generate_step_skill(job, fake_step, adapter, temp_dir)
-
-    def test_generate_step_skill_raises_for_missing_instructions(
-        self, fixtures_dir: Path, temp_dir: Path
-    ) -> None:
-        """Test that missing instructions file raises error."""
-        job_dir = fixtures_dir / "jobs" / "simple_job"
-        job = parse_job_definition(job_dir)
-
-        # Save original instructions file content
-        instructions_file = job_dir / "steps" / "single_step.md"
-        original_content = instructions_file.read_text()
-
-        try:
-            # Delete the instructions file
-            instructions_file.unlink()
-
-            generator = SkillGenerator()
-            adapter = ClaudeAdapter()
-
-            with pytest.raises(GeneratorError, match="instructions file not found"):
-                generator.generate_step_skill(job, job.steps[0], adapter, temp_dir)
-        finally:
-            # Restore the file
-            instructions_file.write_text(original_content)
-
-    def test_generate_all_skills(self, fixtures_dir: Path, temp_dir: Path) -> None:
-        """Test generating skills for all steps in a job (meta + step skills)."""
-        job_dir = fixtures_dir / "jobs" / "complex_job"
-        job = parse_job_definition(job_dir)
-
-        generator = SkillGenerator()
-        adapter = ClaudeAdapter()
-
-        skill_paths = generator.generate_all_skills(job, adapter, temp_dir)
-
-        # Now includes meta-skill plus step skills
-        assert len(skill_paths) == 5  # 1 meta + 4 steps
-        assert all(p.exists() for p in skill_paths)
-
-        # Check directory names - meta-skill first, then step skills
-        # All files are named SKILL.md inside skill directories
-        expected_dirs = [
-            "competitive_research",  # Meta-skill
-            "competitive_research.identify_competitors",  # Step skills
-            "competitive_research.primary_research",
-            "competitive_research.secondary_research",
-            "competitive_research.comparative_report",
-        ]
-        actual_dirs = [p.parent.name for p in skill_paths]
-        assert actual_dirs == expected_dirs
-        assert all(p.name == "SKILL.md" for p in skill_paths)
-
-    def test_generate_meta_skill(self, fixtures_dir: Path, temp_dir: Path) -> None:
-        """Test generating meta-skill for a job."""
-        job_dir = fixtures_dir / "jobs" / "complex_job"
-        job = parse_job_definition(job_dir)
-
-        generator = SkillGenerator()
-        adapter = ClaudeAdapter()
-
-        meta_skill_path = generator.generate_meta_skill(job, adapter, temp_dir)
-
-        assert meta_skill_path.exists()
-        assert meta_skill_path.name == "SKILL.md"
-        assert meta_skill_path.parent.name == "competitive_research"
-
-        content = meta_skill_path.read_text()
-        # Check meta-skill content
-        assert "# competitive_research" in content
-        assert "Available Steps" in content
-        assert "identify_competitors" in content
-        assert "primary_research" in content
-        assert "Skill tool" in content
-
-    def test_generate_step_skill_exposed_step(self, fixtures_dir: Path, temp_dir: Path) -> None:
-        """Test generating skill for exposed step."""
-        job_dir = fixtures_dir / "jobs" / "exposed_step_job"
-        job = parse_job_definition(job_dir)
-
-        generator = SkillGenerator()
-        adapter = ClaudeAdapter()
-
-        # Generate the exposed step (index 1)
-        skill_path = generator.generate_step_skill(job, job.steps[1], adapter, temp_dir)
-
-        assert skill_path.exists()
-        # Uses directory/SKILL.md format whether exposed or not
-        assert skill_path.name == "SKILL.md"
-        assert skill_path.parent.name == "exposed_job.exposed_step"
-
-    def test_generate_all_skills_with_exposed_steps(
-        self, fixtures_dir: Path, temp_dir: Path
-    ) -> None:
-        """Test generating all skills with mix of hidden and exposed steps."""
-        job_dir = fixtures_dir / "jobs" / "exposed_step_job"
-        job = parse_job_definition(job_dir)
-
-        generator = SkillGenerator()
-        adapter = ClaudeAdapter()
-
-        skill_paths = generator.generate_all_skills(job, adapter, temp_dir)
-
-        # Meta-skill + 2 steps
-        assert len(skill_paths) == 3
-        assert all(p.exists() for p in skill_paths)
-
-        # Check directory names - all use directory/SKILL.md format
-        expected_dirs = [
-            "exposed_job",  # Meta-skill
-            "exposed_job.hidden_step",  # Step skill
-            "exposed_job.exposed_step",  # Step skill
-        ]
-        actual_dirs = [p.parent.name for p in skill_paths]
-        assert actual_dirs == expected_dirs
-        assert all(p.name == "SKILL.md" for p in skill_paths)
-
-
-class TestConcurrentStepsGeneration:
-    """Tests for concurrent steps in skill generation."""
-
-    def test_generate_meta_skill_with_concurrent_steps(
-        self, fixtures_dir: Path, temp_dir: Path
-    ) -> None:
-        """Test generating meta-skill for job with concurrent steps."""
-        job_dir = fixtures_dir / "jobs" / "concurrent_steps_job"
-        job = parse_job_definition(job_dir)
-
-        generator = SkillGenerator()
-        adapter = ClaudeAdapter()
-
-        meta_skill_path = generator.generate_meta_skill(job, adapter, temp_dir)
-
-        assert meta_skill_path.exists()
-        content = meta_skill_path.read_text()
-
-        # Check meta-skill content has workflow section
-        assert "# concurrent_workflow" in content
-        assert "full_analysis" in content
-
-        # Check concurrent steps are rendered correctly
-        assert "Concurrent Steps" in content
-        assert "Background Task 1" in content
-        assert "Background Task 2" in content
-        assert "Background Task 3" in content
-        assert "research_web" in content
-        assert "research_docs" in content
-        assert "research_interviews" in content
-
-    def test_meta_skill_context_has_step_entries(self, fixtures_dir: Path, temp_dir: Path) -> None:
-        """Test that meta-skill context includes step_entries with concurrency info."""
-        job_dir = fixtures_dir / "jobs" / "concurrent_steps_job"
-        job = parse_job_definition(job_dir)
-
-        generator = SkillGenerator()
-        adapter = ClaudeAdapter()
-
-        context = generator._build_meta_skill_context(job, adapter)
-
-        assert "workflows" in context
-        assert len(context["workflows"]) == 1
-
-        workflow = context["workflows"][0]
-        assert "step_entries" in workflow
-        assert len(workflow["step_entries"]) == 4
-
-        # Check first entry (sequential)
-        entry1 = workflow["step_entries"][0]
-        assert entry1["is_concurrent"] is False
-        assert entry1["step_ids"] == ["setup"]
-
-        # Check second entry (concurrent)
-        entry2 = workflow["step_entries"][1]
-        assert entry2["is_concurrent"] is True
-        assert entry2["step_ids"] == ["research_web", "research_docs", "research_interviews"]
-        assert "concurrent_steps" in entry2
-        assert len(entry2["concurrent_steps"]) == 3
-        assert entry2["concurrent_steps"][0]["task_number"] == 1
-        assert entry2["concurrent_steps"][0]["id"] == "research_web"
-
-    def test_generate_all_skills_with_concurrent_steps(
-        self, fixtures_dir: Path, temp_dir: Path
-    ) -> None:
-        """Test generating all skills for job with concurrent steps."""
-        job_dir = fixtures_dir / "jobs" / "concurrent_steps_job"
-        job = parse_job_definition(job_dir)
-
-        generator = SkillGenerator()
-        adapter = ClaudeAdapter()
-
-        skill_paths = generator.generate_all_skills(job, adapter, temp_dir)
-
-        # 1 meta-skill + 6 step skills
-        assert len(skill_paths) == 7
-        assert all(p.exists() for p in skill_paths)
-
-        # Check all step skills are generated
-        expected_dirs = [
-            "concurrent_workflow",  # Meta-skill
-            "concurrent_workflow.setup",
-            "concurrent_workflow.research_web",
-            "concurrent_workflow.research_docs",
-            "concurrent_workflow.research_interviews",
-            "concurrent_workflow.compile_results",
-            "concurrent_workflow.final_review",
-        ]
-        actual_dirs = [p.parent.name for p in skill_paths]
-        assert actual_dirs == expected_dirs
-
-
-class TestDocSpecIntegration:
-    """Tests for doc spec integration in skill generation."""
-
-    def test_load_doc_spec_returns_parsed_spec(self, fixtures_dir: Path) -> None:
-        """Test that _load_doc_spec loads and parses doc spec files."""
-        generator = SkillGenerator()
-
-        # Load the valid_report doc spec from fixtures
-        doc_spec = generator._load_doc_spec(fixtures_dir, "doc_specs/valid_report.md")
-
-        assert doc_spec is not None
-        assert doc_spec.name == "Monthly Report"
-        assert doc_spec.description == "A monthly summary report"
-        assert doc_spec.target_audience == "Team leads"
-        assert len(doc_spec.quality_criteria) == 2
-        assert doc_spec.quality_criteria[0].name == "Summary"
-
-    def test_load_doc_spec_caches_result(self, fixtures_dir: Path) -> None:
-        """Test that doc specs are cached after first load."""
-        generator = SkillGenerator()
-
-        # Load same doc spec twice
-        doc_spec1 = generator._load_doc_spec(fixtures_dir, "doc_specs/valid_report.md")
-        doc_spec2 = generator._load_doc_spec(fixtures_dir, "doc_specs/valid_report.md")
-
-        # Should be the same cached instance
-        assert doc_spec1 is doc_spec2
-        # Cache should have exactly one entry
-        assert len(generator._doc_spec_cache) == 1
-
-    def test_load_doc_spec_returns_none_for_missing_file(self, temp_dir: Path) -> None:
-        """Test that _load_doc_spec returns None for non-existent file."""
-        generator = SkillGenerator()
-
-        result = generator._load_doc_spec(temp_dir, "nonexistent.md")
-
-        assert result is None
-
-    def test_load_doc_spec_returns_none_for_invalid_spec(self, temp_dir: Path) -> None:
-        """Test that _load_doc_spec returns None for invalid doc spec file."""
-        generator = SkillGenerator()
-
-        # Create an invalid doc spec file (missing required fields)
-        invalid_spec = temp_dir / "invalid.md"
-        invalid_spec.write_text("""---
-name: "Test"
----
-Body content
-""")
-
-        result = generator._load_doc_spec(temp_dir, "invalid.md")
-
-        assert result is None
-
-    def test_generate_step_skill_with_doc_spec(self, fixtures_dir: Path, temp_dir: Path) -> None:
-        """Test generating skill for step with doc spec-referenced output."""
-        # Set up the directory structure so the doc spec can be found
-        doc_specs_dir = temp_dir / ".deepwork" / "doc_specs"
-        doc_specs_dir.mkdir(parents=True)
-
-        # Copy the valid_report.md fixture to the expected location
-        source_doc_spec = fixtures_dir / "doc_specs" / "valid_report.md"
-        target_doc_spec = doc_specs_dir / "valid_report.md"
-        target_doc_spec.write_text(source_doc_spec.read_text())
-
-        # Parse the job with doc spec
-        job_dir = fixtures_dir / "jobs" / "job_with_doc_spec"
-        job = parse_job_definition(job_dir)
-
-        generator = SkillGenerator()
-        adapter = ClaudeAdapter()
-
-        # Generate skill with project_root set to temp_dir so it finds doc specs
-        skill_path = generator.generate_step_skill(
-            job, job.steps[0], adapter, temp_dir, project_root=temp_dir
-        )
-
-        assert skill_path.exists()
-        content = skill_path.read_text()
-
-        # Verify doc spec info is injected into the skill
-        assert "Doc Spec" in content
-        assert "Monthly Report" in content
-        assert "A monthly summary report" in content
-        assert "Target Audience" in content
-        assert "Team leads" in content
-        assert "Quality Criteria" in content
-        assert "Summary" in content
-        assert "Must include executive summary" in content
-
-    def test_generate_step_skill_without_doc_spec(self, fixtures_dir: Path, temp_dir: Path) -> None:
-        """Test generating skill for step without doc spec reference."""
-        job_dir = fixtures_dir / "jobs" / "simple_job"
-        job = parse_job_definition(job_dir)
-
-        generator = SkillGenerator()
-        adapter = ClaudeAdapter()
-
-        skill_path = generator.generate_step_skill(job, job.steps[0], adapter, temp_dir)
-
-        content = skill_path.read_text()
-        # Should not have doc spec section
-        assert "Doc Spec:" not in content
-
-    def test_generate_step_skill_with_missing_doc_spec_file(
-        self, fixtures_dir: Path, temp_dir: Path
-    ) -> None:
-        """Test generating skill when doc spec file doesn't exist."""
-        # Parse the job with doc spec but don't create the doc spec file
-        job_dir = fixtures_dir / "jobs" / "job_with_doc_spec"
-        job = parse_job_definition(job_dir)
-
-        generator = SkillGenerator()
-        adapter = ClaudeAdapter()
-
-        # Generate skill without the doc spec file present
-        # This should work but not include doc spec info
-        skill_path = generator.generate_step_skill(
-            job, job.steps[0], adapter, temp_dir, project_root=temp_dir
-        )
-
-        assert skill_path.exists()
-        content = skill_path.read_text()
-
-        # Should still generate the skill, just without doc spec details
-        assert "job_with_doc_spec.generate_report" in content
-        # Doc spec section should not appear since file is missing
-        assert "Monthly Report" not in content
-
-    def test_build_step_context_includes_doc_spec_info(
-        self, fixtures_dir: Path, temp_dir: Path
-    ) -> None:
-        """Test that _build_step_context includes doc spec info in outputs."""
-        # Set up the directory structure
-        doc_specs_dir = temp_dir / ".deepwork" / "doc_specs"
-        doc_specs_dir.mkdir(parents=True)
-
-        source_doc_spec = fixtures_dir / "doc_specs" / "valid_report.md"
-        target_doc_spec = doc_specs_dir / "valid_report.md"
-        target_doc_spec.write_text(source_doc_spec.read_text())
-
-        job_dir = fixtures_dir / "jobs" / "job_with_doc_spec"
-        job = parse_job_definition(job_dir)
-
-        generator = SkillGenerator()
-        adapter = ClaudeAdapter()
-
-        context = generator._build_step_context(
-            job, job.steps[0], 0, adapter, project_root=temp_dir
-        )
-
-        # Check outputs context has doc spec info
-        assert "outputs" in context
-        assert len(context["outputs"]) == 1
-
-        output_ctx = context["outputs"][0]
-        assert output_ctx["file"] == "report.md"
-        assert output_ctx["has_doc_spec"] is True
-        assert "doc_spec" in output_ctx
-
-        doc_spec_ctx = output_ctx["doc_spec"]
-        assert doc_spec_ctx["name"] == "Monthly Report"
-        assert doc_spec_ctx["description"] == "A monthly summary report"
-        assert doc_spec_ctx["target_audience"] == "Team leads"
-        assert len(doc_spec_ctx["quality_criteria"]) == 2
-        assert doc_spec_ctx["quality_criteria"][0]["name"] == "Summary"
-        assert "example_document" in doc_spec_ctx
-
-    def test_build_step_context_without_project_root(
-        self, fixtures_dir: Path, temp_dir: Path
-    ) -> None:
-        """Test that _build_step_context handles missing project_root."""
-        job_dir = fixtures_dir / "jobs" / "job_with_doc_spec"
-        job = parse_job_definition(job_dir)
-
-        generator = SkillGenerator()
-        adapter = ClaudeAdapter()
-
-        # Build context without project_root - should still work but no doc spec
-        context = generator._build_step_context(job, job.steps[0], 0, adapter)
-
-        output_ctx = context["outputs"][0]
-        assert output_ctx["has_doc_spec"] is True  # Job still declares it
-        # But doc_spec info won't be loaded since no project_root
-        assert "doc_spec" not in output_ctx
diff --git a/tests/unit/test_hooks_syncer.py b/tests/unit/test_hooks_syncer.py
deleted file mode 100644
index 99edcfdb..00000000
--- a/tests/unit/test_hooks_syncer.py
+++ /dev/null
@@ -1,367 +0,0 @@
-"""Tests for the hooks syncer module."""
-
-import json
-from pathlib import Path
-
-from deepwork.core.adapters import ClaudeAdapter
-from deepwork.core.hooks_syncer import (
-    HookEntry,
-    HookSpec,
-    JobHooks,
-    collect_job_hooks,
-    merge_hooks_for_platform,
-    sync_hooks_to_platform,
-)
-
-
-class TestHookEntry:
-    """Tests for HookEntry dataclass."""
-
-    def test_get_command_for_script(self, temp_dir: Path) -> None:
-        """Test getting command for a script hook."""
-        job_dir = temp_dir / ".deepwork" / "jobs" / "test_job"
-        job_dir.mkdir(parents=True)
-
-        entry = HookEntry(
-            job_name="test_job",
-            job_dir=job_dir,
-            script="test_hook.sh",
-        )
-
-        cmd = entry.get_command(temp_dir)
-        assert cmd == ".deepwork/jobs/test_job/hooks/test_hook.sh"
-
-    def test_get_command_for_module(self, temp_dir: Path) -> None:
-        """Test getting command for a module hook."""
-        job_dir = temp_dir / ".deepwork" / "jobs" / "test_job"
-        job_dir.mkdir(parents=True)
-
-        entry = HookEntry(
-            job_name="test_job",
-            job_dir=job_dir,
-            module="deepwork.hooks.rules_check",
-        )
-
-        cmd = entry.get_command(temp_dir)
-        assert cmd == "deepwork hook rules_check"
-
-
-class TestJobHooks:
-    """Tests for JobHooks dataclass."""
-
-    def test_from_job_dir_with_hooks(self, temp_dir: Path) -> None:
-        """Test loading hooks from job directory."""
-        job_dir = temp_dir / "test_job"
-        hooks_dir = job_dir / "hooks"
-        hooks_dir.mkdir(parents=True)
-
-        # Create global_hooks.yml
-        hooks_file = hooks_dir / "global_hooks.yml"
-        hooks_file.write_text(
-            """
-UserPromptSubmit:
-  - capture.sh
-Stop:
-  - rules_check.sh
-  - cleanup.sh
-"""
-        )
-
-        result = JobHooks.from_job_dir(job_dir)
-
-        assert result is not None
-        assert result.job_name == "test_job"
-        assert len(result.hooks["UserPromptSubmit"]) == 1
-        assert result.hooks["UserPromptSubmit"][0].script == "capture.sh"
-        assert len(result.hooks["Stop"]) == 2
-        assert result.hooks["Stop"][0].script == "rules_check.sh"
-        assert result.hooks["Stop"][1].script == "cleanup.sh"
-
-    def test_from_job_dir_with_module_hooks(self, temp_dir: Path) -> None:
-        """Test loading module-based hooks from job directory."""
-        job_dir = temp_dir / "test_job"
-        hooks_dir = job_dir / "hooks"
-        hooks_dir.mkdir(parents=True)
-
-        # Create global_hooks.yml with module format
-        hooks_file = hooks_dir / "global_hooks.yml"
-        hooks_file.write_text(
-            """
-UserPromptSubmit:
-  - capture.sh
-Stop:
-  - module: deepwork.hooks.rules_check
-"""
-        )
-
-        result = JobHooks.from_job_dir(job_dir)
-
-        assert result is not None
-        assert result.hooks["UserPromptSubmit"][0].script == "capture.sh"
-        assert result.hooks["Stop"][0].module == "deepwork.hooks.rules_check"
-        assert result.hooks["Stop"][0].script is None
-
-    def test_from_job_dir_no_hooks_file(self, temp_dir: Path) -> None:
-        """Test returns None when no hooks file exists."""
-        job_dir = temp_dir / "test_job"
-        job_dir.mkdir(parents=True)
-
-        result = JobHooks.from_job_dir(job_dir)
-        assert result is None
-
-    def test_from_job_dir_empty_hooks_file(self, temp_dir: Path) -> None:
-        """Test returns None when hooks file is empty."""
-        job_dir = temp_dir / "test_job"
-        hooks_dir = job_dir / "hooks"
-        hooks_dir.mkdir(parents=True)
-
-        hooks_file = hooks_dir / "global_hooks.yml"
-        hooks_file.write_text("")
-
-        result = JobHooks.from_job_dir(job_dir)
-        assert result is None
-
-    def test_from_job_dir_single_script_as_string(self, temp_dir: Path) -> None:
-        """Test parsing single script as string instead of list."""
-        job_dir = temp_dir / "test_job"
-        hooks_dir = job_dir / "hooks"
-        hooks_dir.mkdir(parents=True)
-
-        hooks_file = hooks_dir / "global_hooks.yml"
-        hooks_file.write_text("Stop: cleanup.sh\n")
-
-        result = JobHooks.from_job_dir(job_dir)
-
-        assert result is not None
-        assert len(result.hooks["Stop"]) == 1
-        assert result.hooks["Stop"][0].script == "cleanup.sh"
-
-
-class TestCollectJobHooks:
-    """Tests for collect_job_hooks function."""
-
-    def test_collects_hooks_from_multiple_jobs(self, temp_dir: Path) -> None:
-        """Test collecting hooks from multiple job directories."""
-        jobs_dir = temp_dir / "jobs"
-
-        # Create first job with hooks
-        job1_dir = jobs_dir / "job1"
-        (job1_dir / "hooks").mkdir(parents=True)
-        (job1_dir / "hooks" / "global_hooks.yml").write_text("Stop:\n  - hook1.sh\n")
-
-        # Create second job with hooks
-        job2_dir = jobs_dir / "job2"
-        (job2_dir / "hooks").mkdir(parents=True)
-        (job2_dir / "hooks" / "global_hooks.yml").write_text("Stop:\n  - hook2.sh\n")
-
-        # Create job without hooks
-        job3_dir = jobs_dir / "job3"
-        job3_dir.mkdir(parents=True)
-
-        result = collect_job_hooks(jobs_dir)
-
-        assert len(result) == 2
-        job_names = {jh.job_name for jh in result}
-        assert job_names == {"job1", "job2"}
-
-    def test_returns_empty_for_nonexistent_dir(self, temp_dir: Path) -> None:
-        """Test returns empty list when jobs dir doesn't exist."""
-        jobs_dir = temp_dir / "nonexistent"
-        result = collect_job_hooks(jobs_dir)
-        assert result == []
-
-
-class TestMergeHooksForPlatform:
-    """Tests for merge_hooks_for_platform function."""
-
-    def test_merges_hooks_from_multiple_jobs(self, temp_dir: Path) -> None:
-        """Test merging hooks from multiple jobs."""
-        # Create job directories
-        job1_dir = temp_dir / ".deepwork" / "jobs" / "job1"
-        job2_dir = temp_dir / ".deepwork" / "jobs" / "job2"
-        job1_dir.mkdir(parents=True)
-        job2_dir.mkdir(parents=True)
-
-        job_hooks_list = [
-            JobHooks(
-                job_name="job1",
-                job_dir=job1_dir,
-                hooks={"Stop": [HookSpec(script="hook1.sh")]},
-            ),
-            JobHooks(
-                job_name="job2",
-                job_dir=job2_dir,
-                hooks={
-                    "Stop": [HookSpec(script="hook2.sh")],
-                    "UserPromptSubmit": [HookSpec(script="capture.sh")],
-                },
-            ),
-        ]
-
-        result = merge_hooks_for_platform(job_hooks_list, temp_dir)
-
-        assert "Stop" in result
-        assert "UserPromptSubmit" in result
-        assert len(result["Stop"]) == 2
-        assert len(result["UserPromptSubmit"]) == 1
-
-    def test_avoids_duplicate_hooks(self, temp_dir: Path) -> None:
-        """Test that duplicate hooks are not added."""
-        job_dir = temp_dir / ".deepwork" / "jobs" / "job1"
-        job_dir.mkdir(parents=True)
-
-        # Same hook in same job (shouldn't happen but test anyway)
-        job_hooks_list = [
-            JobHooks(
-                job_name="job1",
-                job_dir=job_dir,
-                hooks={"Stop": [HookSpec(script="hook.sh"), HookSpec(script="hook.sh")]},
-            ),
-        ]
-
-        result = merge_hooks_for_platform(job_hooks_list, temp_dir)
-
-        # Should only have one entry
-        assert len(result["Stop"]) == 1
-
-    def test_duplicates_stop_hooks_to_subagent_stop(self, temp_dir: Path) -> None:
-        """Test that Stop hooks are also registered for SubagentStop event.
-
-        Claude Code has separate Stop and SubagentStop events. When a Stop hook
-        is defined, it should also be registered for SubagentStop so the hook
-        triggers for both the main agent and subagents.
-        """
-        job_dir = temp_dir / ".deepwork" / "jobs" / "job1"
-        job_dir.mkdir(parents=True)
-
-        job_hooks_list = [
-            JobHooks(
-                job_name="job1",
-                job_dir=job_dir,
-                hooks={"Stop": [HookSpec(script="hook.sh")]},
-            ),
-        ]
-
-        result = merge_hooks_for_platform(job_hooks_list, temp_dir)
-
-        # Should have both Stop and SubagentStop events
-        assert "Stop" in result
-        assert "SubagentStop" in result
-        assert len(result["Stop"]) == 1
-        assert len(result["SubagentStop"]) == 1
-
-        # Both should have the same hook command
-        stop_cmd = result["Stop"][0]["hooks"][0]["command"]
-        subagent_stop_cmd = result["SubagentStop"][0]["hooks"][0]["command"]
-        assert stop_cmd == subagent_stop_cmd == ".deepwork/jobs/job1/hooks/hook.sh"
-
-    def test_does_not_duplicate_subagent_stop_if_no_stop(self, temp_dir: Path) -> None:
-        """Test that SubagentStop is not created if there are no Stop hooks."""
-        job_dir = temp_dir / ".deepwork" / "jobs" / "job1"
-        job_dir.mkdir(parents=True)
-
-        job_hooks_list = [
-            JobHooks(
-                job_name="job1",
-                job_dir=job_dir,
-                hooks={"UserPromptSubmit": [HookSpec(script="capture.sh")]},
-            ),
-        ]
-
-        result = merge_hooks_for_platform(job_hooks_list, temp_dir)
-
-        # Should only have UserPromptSubmit, not SubagentStop
-        assert "UserPromptSubmit" in result
-        assert "SubagentStop" not in result
-        assert "Stop" not in result
-
-
-class TestSyncHooksToPlatform:
-    """Tests for sync_hooks_to_platform function using adapters."""
-
-    def test_syncs_hooks_via_adapter(self, temp_dir: Path) -> None:
-        """Test syncing hooks to platform via adapter."""
-        # Create .claude directory
-        (temp_dir / ".claude").mkdir(parents=True)
-
-        adapter = ClaudeAdapter(temp_dir)
-
-        # Create job directories
-        job_dir = temp_dir / ".deepwork" / "jobs" / "test_job"
-        job_dir.mkdir(parents=True)
-
-        job_hooks_list = [
-            JobHooks(
-                job_name="test_job",
-                job_dir=job_dir,
-                hooks={"Stop": [HookSpec(script="test_hook.sh")]},
-            ),
-        ]
-
-        count = sync_hooks_to_platform(temp_dir, adapter, job_hooks_list)
-
-        # Count is 2 because Stop hooks are also registered for SubagentStop
-        assert count == 2
-
-        # Verify settings.json was created
-        settings_file = temp_dir / ".claude" / "settings.json"
-        assert settings_file.exists()
-
-        with open(settings_file) as f:
-            settings = json.load(f)
-
-        assert "hooks" in settings
-        assert "Stop" in settings["hooks"]
-        assert "SubagentStop" in settings["hooks"]
-
-    def test_returns_zero_for_empty_hooks(self, temp_dir: Path) -> None:
-        """Test returns 0 when no hooks to sync."""
-        adapter = ClaudeAdapter(temp_dir)
-
-        count = sync_hooks_to_platform(temp_dir, adapter, [])
-
-        assert count == 0
-
-    def test_merges_with_existing_settings(self, temp_dir: Path) -> None:
-        """Test merging hooks into existing settings.json."""
-        # Create .claude directory with existing settings
-        claude_dir = temp_dir / ".claude"
-        claude_dir.mkdir(parents=True)
-
-        existing_settings = {
-            "version": "1.0",
-            "hooks": {
-                "PreToolUse": [
-                    {"matcher": "", "hooks": [{"type": "command", "command": "existing.sh"}]}
-                ]
-            },
-        }
-        settings_file = claude_dir / "settings.json"
-        with open(settings_file, "w") as f:
-            json.dump(existing_settings, f)
-
-        adapter = ClaudeAdapter(temp_dir)
-
-        job_dir = temp_dir / ".deepwork" / "jobs" / "test_job"
-        job_dir.mkdir(parents=True)
-
-        job_hooks_list = [
-            JobHooks(
-                job_name="test_job",
-                job_dir=job_dir,
-                hooks={"Stop": [HookSpec(script="new_hook.sh")]},
-            ),
-        ]
-
-        sync_hooks_to_platform(temp_dir, adapter, job_hooks_list)
-
-        with open(settings_file) as f:
-            settings = json.load(f)
-
-        # Should preserve existing settings
-        assert settings["version"] == "1.0"
-        assert "PreToolUse" in settings["hooks"]
-
-        # Should add new hooks
-        assert "Stop" in settings["hooks"]
-        assert len(settings["hooks"]["Stop"]) == 1
diff --git a/tests/unit/test_parser.py b/tests/unit/test_parser.py
index 0c968242..aba569c6 100644
--- a/tests/unit/test_parser.py
+++ b/tests/unit/test_parser.py
@@ -8,6 +8,7 @@
     JobDefinition,
     OutputSpec,
     ParseError,
+    Review,
     Step,
     StepInput,
     parse_job_definition,
@@ -53,47 +54,98 @@ def test_from_dict_file_input(self) -> None:
 class TestOutputSpec:
     """Tests for OutputSpec dataclass."""
 
-    def test_simple_output(self) -> None:
-        """Test simple output without doc spec."""
-        output = OutputSpec(file="output.md")
+    def test_file_output(self) -> None:
+        """Test single file output."""
+        output = OutputSpec(
+            name="output.md", type="file", description="An output file", required=True
+        )
+
+        assert output.name == "output.md"
+        assert output.type == "file"
+        assert output.description == "An output file"
+        assert output.required is True
+
+    def test_files_output(self) -> None:
+        """Test multiple files output."""
+        output = OutputSpec(
+            name="step_instruction_files",
+            type="files",
+            description="Instruction files",
+            required=True,
+        )
+
+        assert output.name == "step_instruction_files"
+        assert output.type == "files"
+        assert output.description == "Instruction files"
+        assert output.required is True
+
+    def test_optional_output(self) -> None:
+        """Test optional output with required=False."""
+        output = OutputSpec(name="bonus.md", type="file", description="Optional", required=False)
+
+        assert output.name == "bonus.md"
+        assert output.required is False
+
+    def test_from_dict(self) -> None:
+        """Test creating output from name and dict."""
+        data = {"type": "file", "description": "An output file", "required": True}
+        output = OutputSpec.from_dict("output.md", data)
+
+        assert output.name == "output.md"
+        assert output.type == "file"
+        assert output.description == "An output file"
+        assert output.required is True
+
+    def test_from_dict_files_type(self) -> None:
+        """Test creating files-type output from dict."""
+        data = {"type": "files", "description": "Multiple output files", "required": True}
+        output = OutputSpec.from_dict("reports", data)
 
-        assert output.file == "output.md"
-        assert output.doc_spec is None
-        assert not output.has_doc_spec()
+        assert output.name == "reports"
+        assert output.type == "files"
+        assert output.description == "Multiple output files"
+        assert output.required is True
 
-    def test_output_with_doc_spec(self) -> None:
-        """Test output with doc spec reference."""
-        output = OutputSpec(file="report.md", doc_spec=".deepwork/doc_specs/monthly_report.md")
+    def test_from_dict_optional(self) -> None:
+        """Test creating optional output from dict."""
+        data = {"type": "files", "description": "Optional files", "required": False}
+        output = OutputSpec.from_dict("extras", data)
 
-        assert output.file == "report.md"
-        assert output.doc_spec == ".deepwork/doc_specs/monthly_report.md"
-        assert output.has_doc_spec()
+        assert output.name == "extras"
+        assert output.required is False
 
-    def test_from_dict_string(self) -> None:
-        """Test creating output from string."""
-        output = OutputSpec.from_dict("output.md")
 
-        assert output.file == "output.md"
-        assert output.doc_spec is None
-        assert not output.has_doc_spec()
+class TestReview:
+    """Tests for Review dataclass."""
 
-    def test_from_dict_simple_object(self) -> None:
-        """Test creating output from dict without doc spec."""
-        data = {"file": "output.md"}
-        output = OutputSpec.from_dict(data)
+    def test_from_dict(self) -> None:
+        """Test creating review from dictionary."""
+        data = {
+            "run_each": "step",
+            "quality_criteria": {"Complete": "Is it complete?", "Valid": "Is it valid?"},
+        }
+        review = Review.from_dict(data)
+
+        assert review.run_each == "step"
+        assert review.quality_criteria == {"Complete": "Is it complete?", "Valid": "Is it valid?"}
 
-        assert output.file == "output.md"
-        assert output.doc_spec is None
-        assert not output.has_doc_spec()
+    def test_from_dict_output_specific(self) -> None:
+        """Test creating review targeting specific output."""
+        data = {
+            "run_each": "reports",
+            "quality_criteria": {"Well Written": "Is it well written?"},
+        }
+        review = Review.from_dict(data)
 
-    def test_from_dict_with_doc_spec(self) -> None:
-        """Test creating output from dict with doc spec."""
-        data = {"file": "report.md", "doc_spec": ".deepwork/doc_specs/monthly_report.md"}
-        output = OutputSpec.from_dict(data)
+        assert review.run_each == "reports"
+        assert len(review.quality_criteria) == 1
 
-        assert output.file == "report.md"
-        assert output.doc_spec == ".deepwork/doc_specs/monthly_report.md"
-        assert output.has_doc_spec()
+    def test_from_dict_empty_criteria(self) -> None:
+        """Test creating review with empty criteria defaults."""
+        data = {"run_each": "step"}
+        review = Review.from_dict(data)
+
+        assert review.quality_criteria == {}
 
 
 class TestStep:
@@ -106,7 +158,9 @@ def test_from_dict_minimal(self) -> None:
             "name": "Step 1",
             "description": "First step",
             "instructions_file": "steps/step1.md",
-            "outputs": ["output.md"],
+            "outputs": {
+                "output.md": {"type": "file", "description": "An output file", "required": True},
+            },
         }
         step = Step.from_dict(data)
 
@@ -115,31 +169,38 @@ def test_from_dict_minimal(self) -> None:
         assert step.description == "First step"
         assert step.instructions_file == "steps/step1.md"
         assert len(step.outputs) == 1
-        assert step.outputs[0].file == "output.md"
-        assert not step.outputs[0].has_doc_spec()
+        assert step.outputs[0].name == "output.md"
+        assert step.outputs[0].type == "file"
         assert step.inputs == []
         assert step.dependencies == []
 
-    def test_from_dict_with_doc_spec_output(self) -> None:
-        """Test creating step with doc spec-referenced output."""
+    def test_from_dict_with_multiple_outputs(self) -> None:
+        """Test creating step with file and files type outputs."""
         data = {
             "id": "step1",
             "name": "Step 1",
             "description": "First step",
             "instructions_file": "steps/step1.md",
-            "outputs": [
-                "simple_output.md",
-                {"file": "report.md", "doc_spec": ".deepwork/doc_specs/monthly_report.md"},
-            ],
+            "outputs": {
+                "report.md": {"type": "file", "description": "A report", "required": True},
+                "attachments": {
+                    "type": "files",
+                    "description": "Supporting files",
+                    "required": True,
+                },
+            },
         }
         step = Step.from_dict(data)
 
         assert len(step.outputs) == 2
-        assert step.outputs[0].file == "simple_output.md"
-        assert not step.outputs[0].has_doc_spec()
-        assert step.outputs[1].file == "report.md"
-        assert step.outputs[1].doc_spec == ".deepwork/doc_specs/monthly_report.md"
-        assert step.outputs[1].has_doc_spec()
+        output_names = {out.name for out in step.outputs}
+        assert "report.md" in output_names
+        assert "attachments" in output_names
+
+        report = next(out for out in step.outputs if out.name == "report.md")
+        assert report.type == "file"
+        attachments = next(out for out in step.outputs if out.name == "attachments")
+        assert attachments.type == "files"
 
     def test_from_dict_with_inputs(self) -> None:
         """Test creating step with inputs."""
@@ -152,7 +213,9 @@ def test_from_dict_with_inputs(self) -> None:
                 {"name": "param1", "description": "Parameter 1"},
                 {"file": "data.md", "from_step": "step0"},
             ],
-            "outputs": ["output.md"],
+            "outputs": {
+                "output.md": {"type": "file", "description": "An output file", "required": True},
+            },
             "dependencies": ["step0"],
         }
         step = Step.from_dict(data)
@@ -169,7 +232,9 @@ def test_from_dict_exposed_default_false(self) -> None:
             "name": "Step 1",
             "description": "First step",
             "instructions_file": "steps/step1.md",
-            "outputs": ["output.md"],
+            "outputs": {
+                "output.md": {"type": "file", "description": "An output file", "required": True},
+            },
         }
         step = Step.from_dict(data)
 
@@ -182,13 +247,59 @@ def test_from_dict_exposed_true(self) -> None:
             "name": "Step 1",
             "description": "First step",
             "instructions_file": "steps/step1.md",
-            "outputs": ["output.md"],
+            "outputs": {
+                "output.md": {"type": "file", "description": "An output file", "required": True},
+            },
             "exposed": True,
         }
         step = Step.from_dict(data)
 
         assert step.exposed is True
 
+    def test_from_dict_with_reviews(self) -> None:
+        """Test creating step with reviews."""
+        data = {
+            "id": "step1",
+            "name": "Step 1",
+            "description": "First step",
+            "instructions_file": "steps/step1.md",
+            "outputs": {
+                "output.md": {"type": "file", "description": "An output file", "required": True},
+            },
+            "reviews": [
+                {
+                    "run_each": "step",
+                    "quality_criteria": {"Complete": "Is it complete?"},
+                },
+                {
+                    "run_each": "output.md",
+                    "quality_criteria": {"Valid": "Is it valid?"},
+                },
+            ],
+        }
+        step = Step.from_dict(data)
+
+        assert len(step.reviews) == 2
+        assert step.reviews[0].run_each == "step"
+        assert step.reviews[0].quality_criteria == {"Complete": "Is it complete?"}
+        assert step.reviews[1].run_each == "output.md"
+
+    def test_from_dict_empty_reviews(self) -> None:
+        """Test creating step with empty reviews list."""
+        data = {
+            "id": "step1",
+            "name": "Step 1",
+            "description": "First step",
+            "instructions_file": "steps/step1.md",
+            "outputs": {
+                "output.md": {"type": "file", "description": "An output file", "required": True},
+            },
+            "reviews": [],
+        }
+        step = Step.from_dict(data)
+
+        assert step.reviews == []
+
 
 class TestJobDefinition:
     """Tests for JobDefinition dataclass."""
@@ -225,7 +336,11 @@ def test_validate_dependencies_missing_step(self) -> None:
                     name="Step 1",
                     description="Step",
                     instructions_file="steps/step1.md",
-                    outputs=["output.md"],
+                    outputs=[
+                        OutputSpec(
+                            name="output.md", type="file", description="Output file", required=True
+                        )
+                    ],
                     dependencies=["nonexistent"],
                 )
             ],
@@ -248,7 +363,11 @@ def test_validate_dependencies_circular(self) -> None:
                     name="Step 1",
                     description="Step",
                     instructions_file="steps/step1.md",
-                    outputs=["output.md"],
+                    outputs=[
+                        OutputSpec(
+                            name="output.md", type="file", description="Output file", required=True
+                        )
+                    ],
                     dependencies=["step2"],
                 ),
                 Step(
@@ -256,7 +375,11 @@ def test_validate_dependencies_circular(self) -> None:
                     name="Step 2",
                     description="Step",
                     instructions_file="steps/step2.md",
-                    outputs=["output.md"],
+                    outputs=[
+                        OutputSpec(
+                            name="output.md", type="file", description="Output file", required=True
+                        )
+                    ],
                     dependencies=["step1"],
                 ),
             ],
@@ -288,7 +411,11 @@ def test_validate_file_inputs_missing_step(self) -> None:
                     description="Step",
                     instructions_file="steps/step1.md",
                     inputs=[StepInput(file="data.md", from_step="nonexistent")],
-                    outputs=["output.md"],
+                    outputs=[
+                        OutputSpec(
+                            name="output.md", type="file", description="Output file", required=True
+                        )
+                    ],
                     dependencies=["nonexistent"],
                 )
             ],
@@ -298,6 +425,68 @@ def test_validate_file_inputs_missing_step(self) -> None:
         with pytest.raises(ParseError, match="references non-existent step"):
             job.validate_file_inputs()
 
+    def test_validate_reviews_valid(self) -> None:
+        """Test that validate_reviews passes for valid run_each values."""
+        job = JobDefinition(
+            name="test_job",
+            version="1.0.0",
+            summary="Test job",
+            description="Test",
+            steps=[
+                Step(
+                    id="step1",
+                    name="Step 1",
+                    description="Step",
+                    instructions_file="steps/step1.md",
+                    outputs=[
+                        OutputSpec(
+                            name="report.md", type="file", description="Report", required=True
+                        )
+                    ],
+                    reviews=[
+                        Review(run_each="step", quality_criteria={"Complete": "Is it?"}),
+                        Review(run_each="report.md", quality_criteria={"Valid": "Is it?"}),
+                    ],
+                )
+            ],
+            job_dir=Path("/tmp"),
+        )
+
+        # Should not raise
+        job.validate_reviews()
+
+    def test_validate_reviews_invalid_run_each(self) -> None:
+        """Test that validate_reviews fails for invalid run_each."""
+        job = JobDefinition(
+            name="test_job",
+            version="1.0.0",
+            summary="Test job",
+            description="Test",
+            steps=[
+                Step(
+                    id="step1",
+                    name="Step 1",
+                    description="Step",
+                    instructions_file="steps/step1.md",
+                    outputs=[
+                        OutputSpec(
+                            name="report.md", type="file", description="Report", required=True
+                        )
+                    ],
+                    reviews=[
+                        Review(
+                            run_each="nonexistent_output",
+                            quality_criteria={"Test": "Is it?"},
+                        ),
+                    ],
+                )
+            ],
+            job_dir=Path("/tmp"),
+        )
+
+        with pytest.raises(ParseError, match="run_each='nonexistent_output'"):
+            job.validate_reviews()
+
     def test_validate_file_inputs_not_in_dependencies(self) -> None:
         """Test file input validation fails if from_step not in dependencies."""
         job = JobDefinition(
@@ -311,7 +500,11 @@ def test_validate_file_inputs_not_in_dependencies(self) -> None:
                     name="Step 1",
                     description="Step",
                     instructions_file="steps/step1.md",
-                    outputs=["output.md"],
+                    outputs=[
+                        OutputSpec(
+                            name="output.md", type="file", description="Output file", required=True
+                        )
+                    ],
                 ),
                 Step(
                     id="step2",
@@ -319,7 +512,11 @@ def test_validate_file_inputs_not_in_dependencies(self) -> None:
                     description="Step",
                     instructions_file="steps/step2.md",
                     inputs=[StepInput(file="data.md", from_step="step1")],
-                    outputs=["output.md"],
+                    outputs=[
+                        OutputSpec(
+                            name="output.md", type="file", description="Output file", required=True
+                        )
+                    ],
                     # Missing step1 in dependencies!
                     dependencies=[],
                 ),
diff --git a/tests/unit/test_pattern_matcher.py b/tests/unit/test_pattern_matcher.py
deleted file mode 100644
index 69d73e7e..00000000
--- a/tests/unit/test_pattern_matcher.py
+++ /dev/null
@@ -1,205 +0,0 @@
-"""Tests for pattern matching with variable extraction."""
-
-import pytest
-
-from deepwork.core.pattern_matcher import (
-    PatternError,
-    match_pattern,
-    matches_any_pattern,
-    matches_glob,
-    resolve_pattern,
-    validate_pattern,
-)
-
-
-class TestBasicGlobPatterns:
-    """Tests for basic glob pattern matching (PM-1.1.x from test_scenarios.md)."""
-
-    def test_exact_match(self) -> None:
-        """PM-1.1.1: Exact match."""
-        assert matches_glob("README.md", "README.md")
-
-    def test_exact_no_match(self) -> None:
-        """PM-1.1.2: Exact no match (case sensitive)."""
-        assert not matches_glob("readme.md", "README.md")
-
-    def test_single_wildcard(self) -> None:
-        """PM-1.1.3: Single wildcard."""
-        assert matches_glob("main.py", "*.py")
-
-    def test_single_wildcard_nested(self) -> None:
-        """PM-1.1.4: Single wildcard - fnmatch matches nested paths too.
-
-        Note: Standard fnmatch does match across directory separators.
-        Use **/*.py pattern to explicitly require directory prefixes.
-        """
-        # fnmatch's * matches any character including /
-        # This is different from shell glob behavior
-        assert matches_glob("src/main.py", "*.py")
-
-    def test_double_wildcard(self) -> None:
-        """PM-1.1.5: Double wildcard matches nested paths."""
-        assert matches_glob("src/main.py", "**/*.py")
-
-    def test_double_wildcard_deep(self) -> None:
-        """PM-1.1.6: Double wildcard matches deeply nested paths."""
-        assert matches_glob("src/a/b/c/main.py", "**/*.py")
-
-    def test_double_wildcard_root(self) -> None:
-        """PM-1.1.7: Double wildcard matches root-level files."""
-        assert matches_glob("main.py", "**/*.py")
-
-    def test_directory_prefix(self) -> None:
-        """PM-1.1.8: Directory prefix matching."""
-        assert matches_glob("src/foo.py", "src/**/*")
-
-    def test_directory_prefix_deep(self) -> None:
-        """PM-1.1.9: Directory prefix matching deeply nested."""
-        assert matches_glob("src/a/b/c.py", "src/**/*")
-
-    def test_directory_no_match(self) -> None:
-        """PM-1.1.10: Directory prefix no match."""
-        assert not matches_glob("lib/foo.py", "src/**/*")
-
-    def test_brace_expansion_ts(self) -> None:
-        """PM-1.1.11: Brace expansion - not supported by fnmatch.
-
-        Note: Python's fnmatch doesn't support brace expansion.
-        Use matches_any_pattern with multiple patterns instead.
-        """
-        # fnmatch doesn't support {a,b} syntax
-        assert not matches_glob("app.ts", "*.{js,ts}")
-        # Use matches_any_pattern for multiple extensions
-        assert matches_any_pattern("app.ts", ["*.ts", "*.js"])
-
-    def test_brace_expansion_js(self) -> None:
-        """PM-1.1.12: Brace expansion - not supported by fnmatch."""
-        assert not matches_glob("app.js", "*.{js,ts}")
-        assert matches_any_pattern("app.js", ["*.ts", "*.js"])
-
-    def test_brace_expansion_no_match(self) -> None:
-        """PM-1.1.13: Brace expansion no match."""
-        # Neither {a,b} syntax nor multiple patterns match
-        assert not matches_glob("app.py", "*.{js,ts}")
-        assert not matches_any_pattern("app.py", ["*.ts", "*.js"])
-
-
-class TestVariablePatterns:
-    """Tests for variable pattern matching and extraction (PM-1.2.x)."""
-
-    def test_single_var_path(self) -> None:
-        """PM-1.2.1: Single variable captures nested path."""
-        result = match_pattern("src/{path}.py", "src/foo/bar.py")
-        assert result.matched
-        assert result.variables == {"path": "foo/bar"}
-
-    def test_single_var_name(self) -> None:
-        """PM-1.2.2: Single variable name (non-path)."""
-        result = match_pattern("src/{name}.py", "src/utils.py")
-        assert result.matched
-        assert result.variables == {"name": "utils"}
-
-    def test_name_no_nested(self) -> None:
-        """PM-1.2.3: {name} doesn't match nested paths (single segment)."""
-        result = match_pattern("src/{name}.py", "src/foo/bar.py")
-        # {name} only captures single segment, not nested paths
-        assert not result.matched
-
-    def test_two_variables(self) -> None:
-        """PM-1.2.4: Two variables in pattern."""
-        result = match_pattern("{dir}/{name}.py", "src/main.py")
-        assert result.matched
-        assert result.variables == {"dir": "src", "name": "main"}
-
-    def test_prefix_and_suffix(self) -> None:
-        """PM-1.2.5: Prefix and suffix around variable."""
-        result = match_pattern("test_{name}_test.py", "test_foo_test.py")
-        assert result.matched
-        assert result.variables == {"name": "foo"}
-
-    def test_nested_path_variable(self) -> None:
-        """PM-1.2.6: Nested path in middle."""
-        result = match_pattern("src/{path}/index.py", "src/a/b/index.py")
-        assert result.matched
-        assert result.variables == {"path": "a/b"}
-
-    def test_explicit_multi_segment(self) -> None:
-        """PM-1.2.7: Explicit {**mod} for multi-segment."""
-        result = match_pattern("src/{**mod}/main.py", "src/a/b/c/main.py")
-        assert result.matched
-        assert result.variables == {"mod": "a/b/c"}
-
-    def test_explicit_single_segment(self) -> None:
-        """PM-1.2.8: Explicit {*name} for single segment."""
-        result = match_pattern("src/{*name}.py", "src/utils.py")
-        assert result.matched
-        assert result.variables == {"name": "utils"}
-
-    def test_mixed_explicit(self) -> None:
-        """PM-1.2.9: Mixed explicit single and multi."""
-        result = match_pattern("{*dir}/{**path}.py", "src/a/b/c.py")
-        assert result.matched
-        assert result.variables == {"dir": "src", "path": "a/b/c"}
-
-
-class TestPatternResolution:
-    """Tests for pattern resolution / substitution (PM-1.3.x)."""
-
-    def test_simple_substitution(self) -> None:
-        """PM-1.3.1: Simple variable substitution."""
-        result = resolve_pattern("tests/{path}_test.py", {"path": "foo"})
-        assert result == "tests/foo_test.py"
-
-    def test_nested_path_substitution(self) -> None:
-        """PM-1.3.2: Nested path substitution."""
-        result = resolve_pattern("tests/{path}_test.py", {"path": "a/b/c"})
-        assert result == "tests/a/b/c_test.py"
-
-    def test_multiple_vars_substitution(self) -> None:
-        """PM-1.3.3: Multiple variables substitution."""
-        result = resolve_pattern("{dir}/test_{name}.py", {"dir": "tests", "name": "foo"})
-        assert result == "tests/test_foo.py"
-
-
-class TestPatternValidation:
-    """Tests for pattern syntax validation (SV-8.3.x)."""
-
-    def test_unclosed_brace(self) -> None:
-        """SV-8.3.1: Unclosed brace."""
-        with pytest.raises(PatternError, match="Unclosed brace|unclosed brace"):
-            validate_pattern("src/{path.py")
-
-    def test_empty_variable(self) -> None:
-        """SV-8.3.2: Empty variable name."""
-        with pytest.raises(PatternError, match="[Ee]mpty variable name"):
-            validate_pattern("src/{}.py")
-
-    def test_invalid_chars_in_var(self) -> None:
-        """SV-8.3.3: Invalid characters in variable name."""
-        with pytest.raises(PatternError, match="[Ii]nvalid"):
-            validate_pattern("src/{path/name}.py")
-
-    def test_duplicate_variable(self) -> None:
-        """SV-8.3.4: Duplicate variable name."""
-        with pytest.raises(PatternError, match="[Dd]uplicate"):
-            validate_pattern("{path}/{path}.py")
-
-
-class TestMatchesAnyPattern:
-    """Tests for matches_any_pattern function."""
-
-    def test_matches_first_pattern(self) -> None:
-        """Match against first of multiple patterns."""
-        assert matches_any_pattern("file.py", ["*.py", "*.js"])
-
-    def test_matches_second_pattern(self) -> None:
-        """Match against second of multiple patterns."""
-        assert matches_any_pattern("file.js", ["*.py", "*.js"])
-
-    def test_no_match(self) -> None:
-        """No match in any pattern."""
-        assert not matches_any_pattern("file.txt", ["*.py", "*.js"])
-
-    def test_empty_patterns(self) -> None:
-        """Empty patterns list never matches."""
-        assert not matches_any_pattern("file.py", [])
diff --git a/tests/unit/test_rules_check.py b/tests/unit/test_rules_check.py
deleted file mode 100644
index e672fd94..00000000
--- a/tests/unit/test_rules_check.py
+++ /dev/null
@@ -1,105 +0,0 @@
-"""Tests for rules_check hook module."""
-
-from deepwork.hooks.rules_check import extract_promise_tags
-
-
-class TestExtractPromiseTags:
-    """Tests for extract_promise_tags function."""
-
-    def test_extracts_simple_promise(self) -> None:
-        """Test extracting a simple promise tag."""
-        text = "I've reviewed this. <promise>Rule Name</promise>"
-        result = extract_promise_tags(text)
-        assert result == {"Rule Name"}
-
-    def test_extracts_promise_with_checkmark(self) -> None:
-        """Test extracting promise tag with checkmark prefix."""
-        text = "Done. <promise>✓ Rule Name</promise>"
-        result = extract_promise_tags(text)
-        assert result == {"Rule Name"}
-
-    def test_extracts_promise_with_checkmark_no_space(self) -> None:
-        """Test extracting promise tag with checkmark but no space."""
-        text = "<promise>✓Rule Name</promise>"
-        result = extract_promise_tags(text)
-        assert result == {"Rule Name"}
-
-    def test_extracts_multiple_promises(self) -> None:
-        """Test extracting multiple promise tags."""
-        text = """
-        <promise>Rule One</promise>
-        <promise>✓ Rule Two</promise>
-        <promise>Rule Three</promise>
-        """
-        result = extract_promise_tags(text)
-        assert result == {"Rule One", "Rule Two", "Rule Three"}
-
-    def test_case_insensitive_tag(self) -> None:
-        """Test that promise tags are case-insensitive."""
-        text = "<PROMISE>Rule Name</PROMISE>"
-        result = extract_promise_tags(text)
-        assert result == {"Rule Name"}
-
-    def test_preserves_rule_name_case(self) -> None:
-        """Test that rule name case is preserved."""
-        text = "<promise>Architecture Documentation Accuracy</promise>"
-        result = extract_promise_tags(text)
-        assert result == {"Architecture Documentation Accuracy"}
-
-    def test_handles_whitespace_in_tag(self) -> None:
-        """Test handling of whitespace around rule name."""
-        text = "<promise>  Rule Name  </promise>"
-        result = extract_promise_tags(text)
-        assert result == {"Rule Name"}
-
-    def test_handles_newlines_in_tag(self) -> None:
-        """Test handling of newlines in promise tag."""
-        text = "<promise>\n  Rule Name\n</promise>"
-        result = extract_promise_tags(text)
-        assert result == {"Rule Name"}
-
-    def test_returns_empty_set_for_no_promises(self) -> None:
-        """Test that empty set is returned when no promises exist."""
-        text = "No promises here."
-        result = extract_promise_tags(text)
-        assert result == set()
-
-    def test_handles_empty_string(self) -> None:
-        """Test handling of empty string."""
-        result = extract_promise_tags("")
-        assert result == set()
-
-    def test_real_world_command_error_promise(self) -> None:
-        """Test promise format shown in command error output."""
-        # This is the exact format shown to agents when a command rule fails
-        text = "<promise>✓ Manual Test: Infinite Block Command</promise>"
-        result = extract_promise_tags(text)
-        assert result == {"Manual Test: Infinite Block Command"}
-
-    def test_mixed_formats_in_same_text(self) -> None:
-        """Test extracting both checkmark and non-checkmark promises."""
-        text = """
-        <promise>Rule Without Checkmark</promise>
-        <promise>✓ Rule With Checkmark</promise>
-        """
-        result = extract_promise_tags(text)
-        assert result == {"Rule Without Checkmark", "Rule With Checkmark"}
-
-    def test_promise_with_special_characters_in_name(self) -> None:
-        """Test promise with special characters in rule name."""
-        text = "<promise>Source/Test Pairing</promise>"
-        result = extract_promise_tags(text)
-        assert result == {"Source/Test Pairing"}
-
-    def test_promise_embedded_in_markdown(self) -> None:
-        """Test promise tag embedded in markdown text."""
-        text = """
-        I've reviewed the documentation and it's accurate.
-
-        <promise>Architecture Documentation Accuracy</promise>
-        <promise>README Accuracy</promise>
-
-        The changes were purely cosmetic.
-        """
-        result = extract_promise_tags(text)
-        assert result == {"Architecture Documentation Accuracy", "README Accuracy"}
diff --git a/tests/unit/test_rules_parser.py b/tests/unit/test_rules_parser.py
deleted file mode 100644
index ee8a2375..00000000
--- a/tests/unit/test_rules_parser.py
+++ /dev/null
@@ -1,995 +0,0 @@
-"""Tests for rule definition parser."""
-
-from pathlib import Path
-
-from deepwork.core.pattern_matcher import matches_any_pattern as matches_pattern
-from deepwork.core.rules_parser import (
-    DetectionMode,
-    PairConfig,
-    Rule,
-    evaluate_rule,
-    evaluate_rules,
-    load_rules_from_directory,
-)
-
-
-class TestMatchesPattern:
-    """Tests for matches_pattern function."""
-
-    def test_simple_glob_match(self) -> None:
-        """Test simple glob pattern matching."""
-        assert matches_pattern("file.py", ["*.py"])
-        assert not matches_pattern("file.js", ["*.py"])
-
-    def test_directory_glob_match(self) -> None:
-        """Test directory pattern matching."""
-        assert matches_pattern("src/file.py", ["src/*"])
-        assert not matches_pattern("test/file.py", ["src/*"])
-
-    def test_recursive_glob_match(self) -> None:
-        """Test recursive ** pattern matching."""
-        assert matches_pattern("src/deep/nested/file.py", ["src/**/*.py"])
-        assert matches_pattern("src/file.py", ["src/**/*.py"])
-        assert not matches_pattern("test/file.py", ["src/**/*.py"])
-
-    def test_multiple_patterns(self) -> None:
-        """Test matching against multiple patterns."""
-        patterns = ["*.py", "*.js"]
-        assert matches_pattern("file.py", patterns)
-        assert matches_pattern("file.js", patterns)
-        assert not matches_pattern("file.txt", patterns)
-
-    def test_config_directory_pattern(self) -> None:
-        """Test pattern like app/config/**/*."""
-        assert matches_pattern("app/config/settings.py", ["app/config/**/*"])
-        assert matches_pattern("app/config/nested/deep.yml", ["app/config/**/*"])
-        assert not matches_pattern("app/other/file.py", ["app/config/**/*"])
-
-
-class TestEvaluateRule:
-    """Tests for evaluate_rule function."""
-
-    def test_fires_when_trigger_matches(self) -> None:
-        """Test rule fires when trigger matches."""
-        rule = Rule(
-            name="Test",
-            filename="test",
-            detection_mode=DetectionMode.TRIGGER_SAFETY,
-            triggers=["src/**/*.py"],
-            safety=[],
-            instructions="Check it",
-            compare_to="base",
-        )
-        changed_files = ["src/main.py", "README.md"]
-
-        result = evaluate_rule(rule, changed_files)
-        assert result.should_fire is True
-
-    def test_does_not_fire_when_no_trigger_match(self) -> None:
-        """Test rule doesn't fire when no trigger matches."""
-        rule = Rule(
-            name="Test",
-            filename="test",
-            detection_mode=DetectionMode.TRIGGER_SAFETY,
-            triggers=["src/**/*.py"],
-            safety=[],
-            instructions="Check it",
-            compare_to="base",
-        )
-        changed_files = ["test/main.py", "README.md"]
-
-        result = evaluate_rule(rule, changed_files)
-        assert result.should_fire is False
-
-    def test_does_not_fire_when_safety_matches(self) -> None:
-        """Test rule doesn't fire when safety file is also changed."""
-        rule = Rule(
-            name="Test",
-            filename="test",
-            detection_mode=DetectionMode.TRIGGER_SAFETY,
-            triggers=["app/config/**/*"],
-            safety=["docs/install_guide.md"],
-            instructions="Update docs",
-            compare_to="base",
-        )
-        changed_files = ["app/config/settings.py", "docs/install_guide.md"]
-
-        result = evaluate_rule(rule, changed_files)
-        assert result.should_fire is False
-
-    def test_fires_when_trigger_matches_but_safety_doesnt(self) -> None:
-        """Test rule fires when trigger matches but safety doesn't."""
-        rule = Rule(
-            name="Test",
-            filename="test",
-            detection_mode=DetectionMode.TRIGGER_SAFETY,
-            triggers=["app/config/**/*"],
-            safety=["docs/install_guide.md"],
-            instructions="Update docs",
-            compare_to="base",
-        )
-        changed_files = ["app/config/settings.py", "app/main.py"]
-
-        result = evaluate_rule(rule, changed_files)
-        assert result.should_fire is True
-
-    def test_multiple_safety_patterns(self) -> None:
-        """Test rule with multiple safety patterns."""
-        rule = Rule(
-            name="Test",
-            filename="test",
-            detection_mode=DetectionMode.TRIGGER_SAFETY,
-            triggers=["src/auth/**/*"],
-            safety=["SECURITY.md", "docs/security_review.md"],
-            instructions="Security review",
-            compare_to="base",
-        )
-
-        # Should not fire if any safety file is changed
-        result1 = evaluate_rule(rule, ["src/auth/login.py", "SECURITY.md"])
-        assert result1.should_fire is False
-        result2 = evaluate_rule(rule, ["src/auth/login.py", "docs/security_review.md"])
-        assert result2.should_fire is False
-
-        # Should fire if no safety files changed
-        result3 = evaluate_rule(rule, ["src/auth/login.py"])
-        assert result3.should_fire is True
-
-
-class TestEvaluateRules:
-    """Tests for evaluate_rules function."""
-
-    def test_returns_fired_rules(self) -> None:
-        """Test that evaluate_rules returns all fired rules."""
-        rules = [
-            Rule(
-                name="Rule 1",
-                filename="rule1",
-                detection_mode=DetectionMode.TRIGGER_SAFETY,
-                triggers=["src/**/*"],
-                safety=[],
-                instructions="Do 1",
-                compare_to="base",
-            ),
-            Rule(
-                name="Rule 2",
-                filename="rule2",
-                detection_mode=DetectionMode.TRIGGER_SAFETY,
-                triggers=["test/**/*"],
-                safety=[],
-                instructions="Do 2",
-                compare_to="base",
-            ),
-        ]
-        changed_files = ["src/main.py", "test/test_main.py"]
-
-        fired = evaluate_rules(rules, changed_files)
-
-        assert len(fired) == 2
-        assert fired[0].rule.name == "Rule 1"
-        assert fired[1].rule.name == "Rule 2"
-
-    def test_skips_promised_rules(self) -> None:
-        """Test that promised rules are skipped."""
-        rules = [
-            Rule(
-                name="Rule 1",
-                filename="rule1",
-                detection_mode=DetectionMode.TRIGGER_SAFETY,
-                triggers=["src/**/*"],
-                safety=[],
-                instructions="Do 1",
-                compare_to="base",
-            ),
-            Rule(
-                name="Rule 2",
-                filename="rule2",
-                detection_mode=DetectionMode.TRIGGER_SAFETY,
-                triggers=["src/**/*"],
-                safety=[],
-                instructions="Do 2",
-                compare_to="base",
-            ),
-        ]
-        changed_files = ["src/main.py"]
-        promised = {"Rule 1"}
-
-        fired = evaluate_rules(rules, changed_files, promised)
-
-        assert len(fired) == 1
-        assert fired[0].rule.name == "Rule 2"
-
-    def test_returns_empty_when_no_rules_fire(self) -> None:
-        """Test returns empty list when no rules fire."""
-        rules = [
-            Rule(
-                name="Rule 1",
-                filename="rule1",
-                detection_mode=DetectionMode.TRIGGER_SAFETY,
-                triggers=["src/**/*"],
-                safety=[],
-                instructions="Do 1",
-                compare_to="base",
-            ),
-        ]
-        changed_files = ["test/test_main.py"]
-
-        fired = evaluate_rules(rules, changed_files)
-
-        assert len(fired) == 0
-
-
-class TestLoadRulesFromDirectory:
-    """Tests for load_rules_from_directory function."""
-
-    def test_loads_rules_from_directory(self, temp_dir: Path) -> None:
-        """Test loading rules from a directory."""
-        rules_dir = temp_dir / "rules"
-        rules_dir.mkdir()
-
-        # Create a rule file
-        rule_file = rules_dir / "test-rule.md"
-        rule_file.write_text(
-            """---
-name: Test Rule
-trigger: "src/**/*"
-compare_to: base
----
-Please check the source files.
-"""
-        )
-
-        rules = load_rules_from_directory(rules_dir)
-
-        assert len(rules) == 1
-        assert rules[0].name == "Test Rule"
-        assert rules[0].triggers == ["src/**/*"]
-        assert rules[0].detection_mode == DetectionMode.TRIGGER_SAFETY
-        assert "check the source files" in rules[0].instructions
-
-    def test_loads_multiple_rules(self, temp_dir: Path) -> None:
-        """Test loading multiple rules."""
-        rules_dir = temp_dir / "rules"
-        rules_dir.mkdir()
-
-        # Create rule files
-        (rules_dir / "rule1.md").write_text(
-            """---
-name: Rule 1
-trigger: "src/**/*"
-compare_to: base
----
-Instructions for rule 1.
-"""
-        )
-        (rules_dir / "rule2.md").write_text(
-            """---
-name: Rule 2
-trigger: "test/**/*"
-compare_to: base
----
-Instructions for rule 2.
-"""
-        )
-
-        rules = load_rules_from_directory(rules_dir)
-
-        assert len(rules) == 2
-        names = {r.name for r in rules}
-        assert names == {"Rule 1", "Rule 2"}
-
-    def test_returns_empty_for_empty_directory(self, temp_dir: Path) -> None:
-        """Test that empty directory returns empty list."""
-        rules_dir = temp_dir / "rules"
-        rules_dir.mkdir()
-
-        rules = load_rules_from_directory(rules_dir)
-
-        assert rules == []
-
-    def test_returns_empty_for_nonexistent_directory(self, temp_dir: Path) -> None:
-        """Test that nonexistent directory returns empty list."""
-        rules_dir = temp_dir / "nonexistent"
-
-        rules = load_rules_from_directory(rules_dir)
-
-        assert rules == []
-
-    def test_loads_rule_with_set_detection_mode(self, temp_dir: Path) -> None:
-        """Test loading a rule with set detection mode."""
-        rules_dir = temp_dir / "rules"
-        rules_dir.mkdir()
-
-        rule_file = rules_dir / "source-test-pairing.md"
-        rule_file.write_text(
-            """---
-name: Source/Test Pairing
-set:
-  - src/{path}.py
-  - tests/{path}_test.py
-compare_to: base
----
-Source and test files should change together.
-"""
-        )
-
-        rules = load_rules_from_directory(rules_dir)
-
-        assert len(rules) == 1
-        assert rules[0].name == "Source/Test Pairing"
-        assert rules[0].detection_mode == DetectionMode.SET
-        assert rules[0].set_patterns == ["src/{path}.py", "tests/{path}_test.py"]
-
-    def test_loads_rule_with_pair_detection_mode(self, temp_dir: Path) -> None:
-        """Test loading a rule with pair detection mode."""
-        rules_dir = temp_dir / "rules"
-        rules_dir.mkdir()
-
-        rule_file = rules_dir / "api-docs.md"
-        rule_file.write_text(
-            """---
-name: API Documentation
-pair:
-  trigger: src/api/{name}.py
-  expects: docs/api/{name}.md
-compare_to: base
----
-API code requires documentation.
-"""
-        )
-
-        rules = load_rules_from_directory(rules_dir)
-
-        assert len(rules) == 1
-        assert rules[0].name == "API Documentation"
-        assert rules[0].detection_mode == DetectionMode.PAIR
-        assert rules[0].pair_config is not None
-        assert rules[0].pair_config.trigger == "src/api/{name}.py"
-        assert rules[0].pair_config.expects == ["docs/api/{name}.md"]
-
-    def test_loads_rule_with_command_action(self, temp_dir: Path) -> None:
-        """Test loading a rule with command action."""
-        rules_dir = temp_dir / "rules"
-        rules_dir.mkdir()
-
-        rule_file = rules_dir / "format-python.md"
-        rule_file.write_text(
-            """---
-name: Format Python
-trigger: "**/*.py"
-action:
-  command: "ruff format {file}"
-  run_for: each_match
-compare_to: prompt
----
-"""
-        )
-
-        rules = load_rules_from_directory(rules_dir)
-
-        assert len(rules) == 1
-        assert rules[0].name == "Format Python"
-        from deepwork.core.rules_parser import ActionType
-
-        assert rules[0].action_type == ActionType.COMMAND
-        assert rules[0].command_action is not None
-        assert rules[0].command_action.command == "ruff format {file}"
-        assert rules[0].command_action.run_for == "each_match"
-
-
-class TestCorrespondenceSets:
-    """Tests for set correspondence evaluation (CS-3.x from test_scenarios.md)."""
-
-    def test_both_changed_no_fire(self) -> None:
-        """CS-3.1.1: Both source and test changed - no fire."""
-        rule = Rule(
-            name="Source/Test Pairing",
-            filename="source-test-pairing",
-            detection_mode=DetectionMode.SET,
-            set_patterns=["src/{path}.py", "tests/{path}_test.py"],
-            instructions="Update tests",
-            compare_to="base",
-        )
-        changed_files = ["src/foo.py", "tests/foo_test.py"]
-
-        result = evaluate_rule(rule, changed_files)
-        assert result.should_fire is False
-
-    def test_only_source_fires(self) -> None:
-        """CS-3.1.2: Only source changed - fires."""
-        rule = Rule(
-            name="Source/Test Pairing",
-            filename="source-test-pairing",
-            detection_mode=DetectionMode.SET,
-            set_patterns=["src/{path}.py", "tests/{path}_test.py"],
-            instructions="Update tests",
-            compare_to="base",
-        )
-        changed_files = ["src/foo.py"]
-
-        result = evaluate_rule(rule, changed_files)
-        assert result.should_fire is True
-        assert "src/foo.py" in result.trigger_files
-        assert "tests/foo_test.py" in result.missing_files
-
-    def test_only_test_fires(self) -> None:
-        """CS-3.1.3: Only test changed - fires."""
-        rule = Rule(
-            name="Source/Test Pairing",
-            filename="source-test-pairing",
-            detection_mode=DetectionMode.SET,
-            set_patterns=["src/{path}.py", "tests/{path}_test.py"],
-            instructions="Update source",
-            compare_to="base",
-        )
-        changed_files = ["tests/foo_test.py"]
-
-        result = evaluate_rule(rule, changed_files)
-        assert result.should_fire is True
-        assert "tests/foo_test.py" in result.trigger_files
-        assert "src/foo.py" in result.missing_files
-
-    def test_nested_both_no_fire(self) -> None:
-        """CS-3.1.4: Nested paths - both changed."""
-        rule = Rule(
-            name="Source/Test Pairing",
-            filename="source-test-pairing",
-            detection_mode=DetectionMode.SET,
-            set_patterns=["src/{path}.py", "tests/{path}_test.py"],
-            instructions="Update tests",
-            compare_to="base",
-        )
-        changed_files = ["src/a/b.py", "tests/a/b_test.py"]
-
-        result = evaluate_rule(rule, changed_files)
-        assert result.should_fire is False
-
-    def test_nested_only_source_fires(self) -> None:
-        """CS-3.1.5: Nested paths - only source."""
-        rule = Rule(
-            name="Source/Test Pairing",
-            filename="source-test-pairing",
-            detection_mode=DetectionMode.SET,
-            set_patterns=["src/{path}.py", "tests/{path}_test.py"],
-            instructions="Update tests",
-            compare_to="base",
-        )
-        changed_files = ["src/a/b.py"]
-
-        result = evaluate_rule(rule, changed_files)
-        assert result.should_fire is True
-        assert "tests/a/b_test.py" in result.missing_files
-
-    def test_unrelated_file_no_fire(self) -> None:
-        """CS-3.1.6: Unrelated file - no fire."""
-        rule = Rule(
-            name="Source/Test Pairing",
-            filename="source-test-pairing",
-            detection_mode=DetectionMode.SET,
-            set_patterns=["src/{path}.py", "tests/{path}_test.py"],
-            instructions="Update tests",
-            compare_to="base",
-        )
-        changed_files = ["docs/readme.md"]
-
-        result = evaluate_rule(rule, changed_files)
-        assert result.should_fire is False
-
-    def test_source_plus_unrelated_fires(self) -> None:
-        """CS-3.1.7: Source + unrelated - fires."""
-        rule = Rule(
-            name="Source/Test Pairing",
-            filename="source-test-pairing",
-            detection_mode=DetectionMode.SET,
-            set_patterns=["src/{path}.py", "tests/{path}_test.py"],
-            instructions="Update tests",
-            compare_to="base",
-        )
-        changed_files = ["src/foo.py", "docs/readme.md"]
-
-        result = evaluate_rule(rule, changed_files)
-        assert result.should_fire is True
-
-    def test_both_plus_unrelated_no_fire(self) -> None:
-        """CS-3.1.8: Both + unrelated - no fire."""
-        rule = Rule(
-            name="Source/Test Pairing",
-            filename="source-test-pairing",
-            detection_mode=DetectionMode.SET,
-            set_patterns=["src/{path}.py", "tests/{path}_test.py"],
-            instructions="Update tests",
-            compare_to="base",
-        )
-        changed_files = ["src/foo.py", "tests/foo_test.py", "docs/readme.md"]
-
-        result = evaluate_rule(rule, changed_files)
-        assert result.should_fire is False
-
-
-class TestThreePatternSets:
-    """Tests for three-pattern set correspondence (CS-3.2.x)."""
-
-    def test_all_three_no_fire(self) -> None:
-        """CS-3.2.1: All three files changed - no fire."""
-        rule = Rule(
-            name="Model/Schema/Migration",
-            filename="model-schema-migration",
-            detection_mode=DetectionMode.SET,
-            set_patterns=[
-                "models/{name}.py",
-                "schemas/{name}.py",
-                "migrations/{name}.sql",
-            ],
-            instructions="Update all related files",
-            compare_to="base",
-        )
-        changed_files = ["models/user.py", "schemas/user.py", "migrations/user.sql"]
-
-        result = evaluate_rule(rule, changed_files)
-        assert result.should_fire is False
-
-    def test_two_of_three_fires(self) -> None:
-        """CS-3.2.2: Two of three - fires (missing migration)."""
-        rule = Rule(
-            name="Model/Schema/Migration",
-            filename="model-schema-migration",
-            detection_mode=DetectionMode.SET,
-            set_patterns=[
-                "models/{name}.py",
-                "schemas/{name}.py",
-                "migrations/{name}.sql",
-            ],
-            instructions="Update all related files",
-            compare_to="base",
-        )
-        changed_files = ["models/user.py", "schemas/user.py"]
-
-        result = evaluate_rule(rule, changed_files)
-        assert result.should_fire is True
-        assert "migrations/user.sql" in result.missing_files
-
-    def test_one_of_three_fires(self) -> None:
-        """CS-3.2.3: One of three - fires (missing 2)."""
-        rule = Rule(
-            name="Model/Schema/Migration",
-            filename="model-schema-migration",
-            detection_mode=DetectionMode.SET,
-            set_patterns=[
-                "models/{name}.py",
-                "schemas/{name}.py",
-                "migrations/{name}.sql",
-            ],
-            instructions="Update all related files",
-            compare_to="base",
-        )
-        changed_files = ["models/user.py"]
-
-        result = evaluate_rule(rule, changed_files)
-        assert result.should_fire is True
-        assert len(result.missing_files) == 2
-        assert "schemas/user.py" in result.missing_files
-        assert "migrations/user.sql" in result.missing_files
-
-    def test_different_names_fire_both(self) -> None:
-        """CS-3.2.4: Different names - both incomplete."""
-        rule = Rule(
-            name="Model/Schema/Migration",
-            filename="model-schema-migration",
-            detection_mode=DetectionMode.SET,
-            set_patterns=[
-                "models/{name}.py",
-                "schemas/{name}.py",
-                "migrations/{name}.sql",
-            ],
-            instructions="Update all related files",
-            compare_to="base",
-        )
-        changed_files = ["models/user.py", "schemas/order.py"]
-
-        result = evaluate_rule(rule, changed_files)
-        assert result.should_fire is True
-        # Both trigger because each is incomplete
-        assert (
-            "models/user.py" in result.trigger_files or "schemas/order.py" in result.trigger_files
-        )
-
-
-class TestCorrespondencePairs:
-    """Tests for pair correspondence evaluation (CP-4.x from test_scenarios.md)."""
-
-    def test_both_changed_no_fire(self) -> None:
-        """CP-4.1.1: Both trigger and expected changed - no fire."""
-        rule = Rule(
-            name="API Documentation",
-            filename="api-documentation",
-            detection_mode=DetectionMode.PAIR,
-            pair_config=PairConfig(
-                trigger="api/{path}.py",
-                expects=["docs/api/{path}.md"],
-            ),
-            instructions="Update API docs",
-            compare_to="base",
-        )
-        changed_files = ["api/users.py", "docs/api/users.md"]
-
-        result = evaluate_rule(rule, changed_files)
-        assert result.should_fire is False
-
-    def test_only_trigger_fires(self) -> None:
-        """CP-4.1.2: Only trigger changed - fires."""
-        rule = Rule(
-            name="API Documentation",
-            filename="api-documentation",
-            detection_mode=DetectionMode.PAIR,
-            pair_config=PairConfig(
-                trigger="api/{path}.py",
-                expects=["docs/api/{path}.md"],
-            ),
-            instructions="Update API docs",
-            compare_to="base",
-        )
-        changed_files = ["api/users.py"]
-
-        result = evaluate_rule(rule, changed_files)
-        assert result.should_fire is True
-        assert "api/users.py" in result.trigger_files
-        assert "docs/api/users.md" in result.missing_files
-
-    def test_only_expected_no_fire(self) -> None:
-        """CP-4.1.3: Only expected changed - no fire (directional)."""
-        rule = Rule(
-            name="API Documentation",
-            filename="api-documentation",
-            detection_mode=DetectionMode.PAIR,
-            pair_config=PairConfig(
-                trigger="api/{path}.py",
-                expects=["docs/api/{path}.md"],
-            ),
-            instructions="Update API docs",
-            compare_to="base",
-        )
-        changed_files = ["docs/api/users.md"]
-
-        result = evaluate_rule(rule, changed_files)
-        assert result.should_fire is False
-
-    def test_trigger_plus_unrelated_fires(self) -> None:
-        """CP-4.1.4: Trigger + unrelated - fires."""
-        rule = Rule(
-            name="API Documentation",
-            filename="api-documentation",
-            detection_mode=DetectionMode.PAIR,
-            pair_config=PairConfig(
-                trigger="api/{path}.py",
-                expects=["docs/api/{path}.md"],
-            ),
-            instructions="Update API docs",
-            compare_to="base",
-        )
-        changed_files = ["api/users.py", "README.md"]
-
-        result = evaluate_rule(rule, changed_files)
-        assert result.should_fire is True
-
-    def test_expected_plus_unrelated_no_fire(self) -> None:
-        """CP-4.1.5: Expected + unrelated - no fire."""
-        rule = Rule(
-            name="API Documentation",
-            filename="api-documentation",
-            detection_mode=DetectionMode.PAIR,
-            pair_config=PairConfig(
-                trigger="api/{path}.py",
-                expects=["docs/api/{path}.md"],
-            ),
-            instructions="Update API docs",
-            compare_to="base",
-        )
-        changed_files = ["docs/api/users.md", "README.md"]
-
-        result = evaluate_rule(rule, changed_files)
-        assert result.should_fire is False
-
-
-class TestMultiExpectsPairs:
-    """Tests for multi-expects pair correspondence (CP-4.2.x)."""
-
-    def test_all_three_no_fire(self) -> None:
-        """CP-4.2.1: All three changed - no fire."""
-        rule = Rule(
-            name="API Full Documentation",
-            filename="api-full-documentation",
-            detection_mode=DetectionMode.PAIR,
-            pair_config=PairConfig(
-                trigger="api/{path}.py",
-                expects=["docs/api/{path}.md", "openapi/{path}.yaml"],
-            ),
-            instructions="Update API docs and OpenAPI",
-            compare_to="base",
-        )
-        changed_files = ["api/users.py", "docs/api/users.md", "openapi/users.yaml"]
-
-        result = evaluate_rule(rule, changed_files)
-        assert result.should_fire is False
-
-    def test_trigger_plus_one_expect_fires(self) -> None:
-        """CP-4.2.2: Trigger + one expect - fires (missing openapi)."""
-        rule = Rule(
-            name="API Full Documentation",
-            filename="api-full-documentation",
-            detection_mode=DetectionMode.PAIR,
-            pair_config=PairConfig(
-                trigger="api/{path}.py",
-                expects=["docs/api/{path}.md", "openapi/{path}.yaml"],
-            ),
-            instructions="Update API docs and OpenAPI",
-            compare_to="base",
-        )
-        changed_files = ["api/users.py", "docs/api/users.md"]
-
-        result = evaluate_rule(rule, changed_files)
-        assert result.should_fire is True
-        assert "openapi/users.yaml" in result.missing_files
-
-    def test_only_trigger_fires_missing_both(self) -> None:
-        """CP-4.2.3: Only trigger - fires (missing both)."""
-        rule = Rule(
-            name="API Full Documentation",
-            filename="api-full-documentation",
-            detection_mode=DetectionMode.PAIR,
-            pair_config=PairConfig(
-                trigger="api/{path}.py",
-                expects=["docs/api/{path}.md", "openapi/{path}.yaml"],
-            ),
-            instructions="Update API docs and OpenAPI",
-            compare_to="base",
-        )
-        changed_files = ["api/users.py"]
-
-        result = evaluate_rule(rule, changed_files)
-        assert result.should_fire is True
-        assert len(result.missing_files) == 2
-        assert "docs/api/users.md" in result.missing_files
-        assert "openapi/users.yaml" in result.missing_files
-
-    def test_both_expects_only_no_fire(self) -> None:
-        """CP-4.2.4: Both expects only - no fire."""
-        rule = Rule(
-            name="API Full Documentation",
-            filename="api-full-documentation",
-            detection_mode=DetectionMode.PAIR,
-            pair_config=PairConfig(
-                trigger="api/{path}.py",
-                expects=["docs/api/{path}.md", "openapi/{path}.yaml"],
-            ),
-            instructions="Update API docs and OpenAPI",
-            compare_to="base",
-        )
-        changed_files = ["docs/api/users.md", "openapi/users.yaml"]
-
-        result = evaluate_rule(rule, changed_files)
-        assert result.should_fire is False
-
-
-class TestCreatedMode:
-    """Tests for created mode evaluation."""
-
-    def test_fires_when_created_file_matches(self) -> None:
-        """Test rule fires when a created file matches the pattern."""
-        rule = Rule(
-            name="New Module Docs",
-            filename="new-module-docs",
-            detection_mode=DetectionMode.CREATED,
-            created_patterns=["src/**/*.py"],
-            instructions="Document the new module",
-            compare_to="base",
-        )
-        created_files = ["src/new_module.py"]
-
-        result = evaluate_rule(rule, [], created_files)
-        assert result.should_fire is True
-        assert "src/new_module.py" in result.trigger_files
-
-    def test_does_not_fire_when_no_match(self) -> None:
-        """Test rule doesn't fire when no created file matches."""
-        rule = Rule(
-            name="New Module Docs",
-            filename="new-module-docs",
-            detection_mode=DetectionMode.CREATED,
-            created_patterns=["src/**/*.py"],
-            instructions="Document the new module",
-            compare_to="base",
-        )
-        created_files = ["tests/test_new.py"]
-
-        result = evaluate_rule(rule, [], created_files)
-        assert result.should_fire is False
-
-    def test_does_not_fire_for_modified_files(self) -> None:
-        """Test rule doesn't fire for modified files (only created)."""
-        rule = Rule(
-            name="New Module Docs",
-            filename="new-module-docs",
-            detection_mode=DetectionMode.CREATED,
-            created_patterns=["src/**/*.py"],
-            instructions="Document the new module",
-            compare_to="base",
-        )
-        # File is in changed_files but NOT in created_files
-        changed_files = ["src/existing_module.py"]
-        created_files: list[str] = []
-
-        result = evaluate_rule(rule, changed_files, created_files)
-        assert result.should_fire is False
-
-    def test_multiple_created_patterns(self) -> None:
-        """Test rule with multiple created patterns."""
-        rule = Rule(
-            name="New Code Standards",
-            filename="new-code-standards",
-            detection_mode=DetectionMode.CREATED,
-            created_patterns=["src/**/*.py", "lib/**/*.py"],
-            instructions="Follow code standards",
-            compare_to="base",
-        )
-
-        # Matches first pattern
-        result1 = evaluate_rule(rule, [], ["src/foo.py"])
-        assert result1.should_fire is True
-
-        # Matches second pattern
-        result2 = evaluate_rule(rule, [], ["lib/bar.py"])
-        assert result2.should_fire is True
-
-        # Matches neither
-        result3 = evaluate_rule(rule, [], ["tests/test_foo.py"])
-        assert result3.should_fire is False
-
-    def test_created_with_nested_path(self) -> None:
-        """Test created mode with nested paths."""
-        rule = Rule(
-            name="New Component",
-            filename="new-component",
-            detection_mode=DetectionMode.CREATED,
-            created_patterns=["src/components/**/*.tsx"],
-            instructions="Document the component",
-            compare_to="base",
-        )
-        created_files = ["src/components/ui/Button.tsx"]
-
-        result = evaluate_rule(rule, [], created_files)
-        assert result.should_fire is True
-        assert "src/components/ui/Button.tsx" in result.trigger_files
-
-    def test_created_mixed_with_changed(self) -> None:
-        """Test that changed_files don't affect created mode rules."""
-        rule = Rule(
-            name="New Module Docs",
-            filename="new-module-docs",
-            detection_mode=DetectionMode.CREATED,
-            created_patterns=["src/**/*.py"],
-            instructions="Document the new module",
-            compare_to="base",
-        )
-        # src/existing.py is modified (in changed_files)
-        # src/new.py is created (in created_files)
-        changed_files = ["src/existing.py", "src/new.py"]
-        created_files = ["src/new.py"]
-
-        result = evaluate_rule(rule, changed_files, created_files)
-        assert result.should_fire is True
-        # Only the created file should be in trigger_files
-        assert result.trigger_files == ["src/new.py"]
-
-    def test_evaluate_rules_with_created_mode(self) -> None:
-        """Test evaluate_rules passes created_files correctly."""
-        rules = [
-            Rule(
-                name="Trigger Rule",
-                filename="trigger-rule",
-                detection_mode=DetectionMode.TRIGGER_SAFETY,
-                triggers=["src/**/*.py"],
-                safety=[],
-                instructions="Check source",
-                compare_to="base",
-            ),
-            Rule(
-                name="Created Rule",
-                filename="created-rule",
-                detection_mode=DetectionMode.CREATED,
-                created_patterns=["src/**/*.py"],
-                instructions="Document new files",
-                compare_to="base",
-            ),
-        ]
-        # src/existing.py is modified, src/new.py is created
-        changed_files = ["src/existing.py", "src/new.py"]
-        created_files = ["src/new.py"]
-
-        results = evaluate_rules(rules, changed_files, None, created_files)
-
-        # Both rules should fire
-        assert len(results) == 2
-        rule_names = {r.rule.name for r in results}
-        assert "Trigger Rule" in rule_names
-        assert "Created Rule" in rule_names
-
-
-class TestLoadCreatedModeRule:
-    """Tests for loading rules with created detection mode."""
-
-    def test_loads_rule_with_created_detection_mode(self, temp_dir: Path) -> None:
-        """Test loading a rule with created detection mode."""
-        rules_dir = temp_dir / "rules"
-        rules_dir.mkdir()
-
-        rule_file = rules_dir / "new-module-docs.md"
-        rule_file.write_text(
-            """---
-name: New Module Documentation
-created: src/**/*.py
-compare_to: base
----
-A new Python module was created. Please add documentation.
-"""
-        )
-
-        rules = load_rules_from_directory(rules_dir)
-
-        assert len(rules) == 1
-        assert rules[0].name == "New Module Documentation"
-        assert rules[0].detection_mode == DetectionMode.CREATED
-        assert rules[0].created_patterns == ["src/**/*.py"]
-
-    def test_loads_rule_with_multiple_created_patterns(self, temp_dir: Path) -> None:
-        """Test loading a rule with multiple created patterns."""
-        rules_dir = temp_dir / "rules"
-        rules_dir.mkdir()
-
-        rule_file = rules_dir / "new-code-standards.md"
-        rule_file.write_text(
-            """---
-name: New Code Standards
-created:
-  - src/**/*.py
-  - lib/**/*.py
-compare_to: base
----
-New code must follow standards.
-"""
-        )
-
-        rules = load_rules_from_directory(rules_dir)
-
-        assert len(rules) == 1
-        assert rules[0].name == "New Code Standards"
-        assert rules[0].detection_mode == DetectionMode.CREATED
-        assert rules[0].created_patterns == ["src/**/*.py", "lib/**/*.py"]
-
-    def test_loads_created_rule_with_command_action(self, temp_dir: Path) -> None:
-        """Test loading a created mode rule with command action."""
-        rules_dir = temp_dir / "rules"
-        rules_dir.mkdir()
-
-        rule_file = rules_dir / "new-file-lint.md"
-        rule_file.write_text(
-            """---
-name: New File Lint
-created: "**/*.py"
-compare_to: base
-action:
-  command: "ruff check {file}"
-  run_for: each_match
----
-"""
-        )
-
-        rules = load_rules_from_directory(rules_dir)
-
-        assert len(rules) == 1
-        assert rules[0].name == "New File Lint"
-        assert rules[0].detection_mode == DetectionMode.CREATED
-        from deepwork.core.rules_parser import ActionType
-
-        assert rules[0].action_type == ActionType.COMMAND
-        assert rules[0].command_action is not None
-        assert rules[0].command_action.command == "ruff check {file}"
diff --git a/tests/unit/test_rules_queue.py b/tests/unit/test_rules_queue.py
deleted file mode 100644
index 8c35d06d..00000000
--- a/tests/unit/test_rules_queue.py
+++ /dev/null
@@ -1,349 +0,0 @@
-"""Tests for rules queue system (QS-6.x from test_scenarios.md)."""
-
-from pathlib import Path
-
-import pytest
-
-from deepwork.core.rules_queue import (
-    ActionResult,
-    QueueEntry,
-    QueueEntryStatus,
-    RulesQueue,
-    compute_trigger_hash,
-)
-
-
-class TestComputeTriggerHash:
-    """Tests for hash calculation (QS-6.2.x)."""
-
-    def test_same_everything_same_hash(self) -> None:
-        """QS-6.2.1: Same rule, files, baseline - same hash."""
-        hash1 = compute_trigger_hash("RuleA", ["a.py"], "commit1")
-        hash2 = compute_trigger_hash("RuleA", ["a.py"], "commit1")
-        assert hash1 == hash2
-
-    def test_different_files_different_hash(self) -> None:
-        """QS-6.2.2: Different files - different hash."""
-        hash1 = compute_trigger_hash("RuleA", ["a.py"], "commit1")
-        hash2 = compute_trigger_hash("RuleA", ["b.py"], "commit1")
-        assert hash1 != hash2
-
-    def test_different_baseline_different_hash(self) -> None:
-        """QS-6.2.3: Different baseline - different hash."""
-        hash1 = compute_trigger_hash("RuleA", ["a.py"], "commit1")
-        hash2 = compute_trigger_hash("RuleA", ["a.py"], "commit2")
-        assert hash1 != hash2
-
-    def test_different_rule_different_hash(self) -> None:
-        """QS-6.2.4: Different rule - different hash."""
-        hash1 = compute_trigger_hash("RuleA", ["a.py"], "commit1")
-        hash2 = compute_trigger_hash("RuleB", ["a.py"], "commit1")
-        assert hash1 != hash2
-
-    def test_file_order_independent(self) -> None:
-        """File order should not affect hash (sorted internally)."""
-        hash1 = compute_trigger_hash("RuleA", ["a.py", "b.py"], "commit1")
-        hash2 = compute_trigger_hash("RuleA", ["b.py", "a.py"], "commit1")
-        assert hash1 == hash2
-
-
-class TestQueueEntry:
-    """Tests for QueueEntry dataclass."""
-
-    def test_to_dict_and_from_dict(self) -> None:
-        """Round-trip serialization."""
-        entry = QueueEntry(
-            rule_name="Test Rule",
-            rule_file="test-rule.md",
-            trigger_hash="abc123",
-            status=QueueEntryStatus.QUEUED,
-            baseline_ref="commit1",
-            trigger_files=["src/main.py"],
-            expected_files=["tests/main_test.py"],
-        )
-
-        data = entry.to_dict()
-        restored = QueueEntry.from_dict(data)
-
-        assert restored.rule_name == entry.rule_name
-        assert restored.rule_file == entry.rule_file
-        assert restored.trigger_hash == entry.trigger_hash
-        assert restored.status == entry.status
-        assert restored.trigger_files == entry.trigger_files
-        assert restored.expected_files == entry.expected_files
-
-    def test_with_action_result(self) -> None:
-        """Serialization with action result."""
-        entry = QueueEntry(
-            rule_name="Test Rule",
-            rule_file="test-rule.md",
-            trigger_hash="abc123",
-            action_result=ActionResult(type="command", output="ok", exit_code=0),
-        )
-
-        data = entry.to_dict()
-        restored = QueueEntry.from_dict(data)
-
-        assert restored.action_result is not None
-        assert restored.action_result.type == "command"
-        assert restored.action_result.exit_code == 0
-
-
-class TestRulesQueue:
-    """Tests for RulesQueue class (QS-6.1.x, QS-6.3.x)."""
-
-    @pytest.fixture
-    def queue(self, tmp_path: Path) -> RulesQueue:
-        """Create a queue with temp directory."""
-        return RulesQueue(tmp_path / "queue")
-
-    def test_create_entry(self, queue: RulesQueue) -> None:
-        """QS-6.1.1: Create new queue entry."""
-        entry = queue.create_entry(
-            rule_name="Test Rule",
-            rule_file="test-rule.md",
-            trigger_files=["src/main.py"],
-            baseline_ref="commit1",
-        )
-
-        assert entry is not None
-        assert entry.status == QueueEntryStatus.QUEUED
-        assert entry.rule_name == "Test Rule"
-
-    def test_create_duplicate_returns_none(self, queue: RulesQueue) -> None:
-        """QS-6.1.6: Re-trigger same files returns None."""
-        entry1 = queue.create_entry(
-            rule_name="Test Rule",
-            rule_file="test-rule.md",
-            trigger_files=["src/main.py"],
-            baseline_ref="commit1",
-        )
-        entry2 = queue.create_entry(
-            rule_name="Test Rule",
-            rule_file="test-rule.md",
-            trigger_files=["src/main.py"],
-            baseline_ref="commit1",
-        )
-
-        assert entry1 is not None
-        assert entry2 is None  # Duplicate
-
-    def test_create_different_files_new_entry(self, queue: RulesQueue) -> None:
-        """QS-6.1.7: Different files create new entry."""
-        entry1 = queue.create_entry(
-            rule_name="Test Rule",
-            rule_file="test-rule.md",
-            trigger_files=["src/a.py"],
-            baseline_ref="commit1",
-        )
-        entry2 = queue.create_entry(
-            rule_name="Test Rule",
-            rule_file="test-rule.md",
-            trigger_files=["src/b.py"],  # Different file
-            baseline_ref="commit1",
-        )
-
-        assert entry1 is not None
-        assert entry2 is not None
-
-    def test_has_entry(self, queue: RulesQueue) -> None:
-        """Check if entry exists."""
-        entry = queue.create_entry(
-            rule_name="Test Rule",
-            rule_file="test-rule.md",
-            trigger_files=["src/main.py"],
-            baseline_ref="commit1",
-        )
-        assert entry is not None
-
-        assert queue.has_entry(entry.trigger_hash) is True
-        assert queue.has_entry("nonexistent") is False
-
-    def test_get_entry(self, queue: RulesQueue) -> None:
-        """Retrieve entry by hash."""
-        entry = queue.create_entry(
-            rule_name="Test Rule",
-            rule_file="test-rule.md",
-            trigger_files=["src/main.py"],
-            baseline_ref="commit1",
-        )
-        assert entry is not None
-
-        retrieved = queue.get_entry(entry.trigger_hash)
-        assert retrieved is not None
-        assert retrieved.rule_name == "Test Rule"
-
-    def test_get_nonexistent_entry(self, queue: RulesQueue) -> None:
-        """Get nonexistent entry returns None."""
-        assert queue.get_entry("nonexistent") is None
-
-    def test_update_status_to_passed(self, queue: RulesQueue) -> None:
-        """QS-6.1.3: Update status to passed."""
-        entry = queue.create_entry(
-            rule_name="Test Rule",
-            rule_file="test-rule.md",
-            trigger_files=["src/main.py"],
-            baseline_ref="commit1",
-        )
-        assert entry is not None
-
-        success = queue.update_status(entry.trigger_hash, QueueEntryStatus.PASSED)
-        assert success is True
-
-        updated = queue.get_entry(entry.trigger_hash)
-        assert updated is not None
-        assert updated.status == QueueEntryStatus.PASSED
-        assert updated.evaluated_at is not None
-
-    def test_update_status_to_failed(self, queue: RulesQueue) -> None:
-        """QS-6.1.5: Update status to failed."""
-        entry = queue.create_entry(
-            rule_name="Test Rule",
-            rule_file="test-rule.md",
-            trigger_files=["src/main.py"],
-            baseline_ref="commit1",
-        )
-        assert entry is not None
-
-        action_result = ActionResult(type="command", output="error", exit_code=1)
-        success = queue.update_status(entry.trigger_hash, QueueEntryStatus.FAILED, action_result)
-        assert success is True
-
-        updated = queue.get_entry(entry.trigger_hash)
-        assert updated is not None
-        assert updated.status == QueueEntryStatus.FAILED
-        assert updated.action_result is not None
-        assert updated.action_result.exit_code == 1
-
-    def test_update_status_to_skipped(self, queue: RulesQueue) -> None:
-        """QS-6.1.2: Update status to skipped (safety suppression)."""
-        entry = queue.create_entry(
-            rule_name="Test Rule",
-            rule_file="test-rule.md",
-            trigger_files=["src/main.py"],
-            baseline_ref="commit1",
-        )
-        assert entry is not None
-
-        success = queue.update_status(entry.trigger_hash, QueueEntryStatus.SKIPPED)
-        assert success is True
-
-        updated = queue.get_entry(entry.trigger_hash)
-        assert updated is not None
-        assert updated.status == QueueEntryStatus.SKIPPED
-
-    def test_update_nonexistent_returns_false(self, queue: RulesQueue) -> None:
-        """Update nonexistent entry returns False."""
-        success = queue.update_status("nonexistent", QueueEntryStatus.PASSED)
-        assert success is False
-
-    def test_get_queued_entries(self, queue: RulesQueue) -> None:
-        """Get only queued entries."""
-        # Create multiple entries with different statuses
-        entry1 = queue.create_entry(
-            rule_name="Rule 1",
-            rule_file="rule1.md",
-            trigger_files=["a.py"],
-            baseline_ref="commit1",
-        )
-        entry2 = queue.create_entry(
-            rule_name="Rule 2",
-            rule_file="rule2.md",
-            trigger_files=["b.py"],
-            baseline_ref="commit1",
-        )
-        assert entry1 is not None
-        assert entry2 is not None
-
-        # Update one to passed
-        queue.update_status(entry1.trigger_hash, QueueEntryStatus.PASSED)
-
-        # Get queued only
-        queued = queue.get_queued_entries()
-        assert len(queued) == 1
-        assert queued[0].rule_name == "Rule 2"
-
-    def test_get_all_entries(self, queue: RulesQueue) -> None:
-        """Get all entries regardless of status."""
-        entry1 = queue.create_entry(
-            rule_name="Rule 1",
-            rule_file="rule1.md",
-            trigger_files=["a.py"],
-            baseline_ref="commit1",
-        )
-        entry2 = queue.create_entry(
-            rule_name="Rule 2",
-            rule_file="rule2.md",
-            trigger_files=["b.py"],
-            baseline_ref="commit1",
-        )
-        assert entry1 is not None
-        assert entry2 is not None
-
-        queue.update_status(entry1.trigger_hash, QueueEntryStatus.PASSED)
-
-        all_entries = queue.get_all_entries()
-        assert len(all_entries) == 2
-
-    def test_remove_entry(self, queue: RulesQueue) -> None:
-        """Remove entry by hash."""
-        entry = queue.create_entry(
-            rule_name="Test Rule",
-            rule_file="test-rule.md",
-            trigger_files=["src/main.py"],
-            baseline_ref="commit1",
-        )
-        assert entry is not None
-
-        removed = queue.remove_entry(entry.trigger_hash)
-        assert removed is True
-        assert queue.has_entry(entry.trigger_hash) is False
-
-    def test_remove_nonexistent_returns_false(self, queue: RulesQueue) -> None:
-        """Remove nonexistent entry returns False."""
-        removed = queue.remove_entry("nonexistent")
-        assert removed is False
-
-    def test_clear(self, queue: RulesQueue) -> None:
-        """Clear all entries."""
-        queue.create_entry(
-            rule_name="Rule 1",
-            rule_file="rule1.md",
-            trigger_files=["a.py"],
-            baseline_ref="commit1",
-        )
-        queue.create_entry(
-            rule_name="Rule 2",
-            rule_file="rule2.md",
-            trigger_files=["b.py"],
-            baseline_ref="commit1",
-        )
-
-        count = queue.clear()
-        assert count == 2
-        assert len(queue.get_all_entries()) == 0
-
-    def test_clear_empty_queue(self, queue: RulesQueue) -> None:
-        """Clear empty queue returns 0."""
-        count = queue.clear()
-        assert count == 0
-
-    def test_file_structure(self, queue: RulesQueue) -> None:
-        """Verify queue files are named correctly."""
-        entry = queue.create_entry(
-            rule_name="Test Rule",
-            rule_file="test-rule.md",
-            trigger_files=["src/main.py"],
-            baseline_ref="commit1",
-        )
-        assert entry is not None
-
-        # Check file exists with correct naming
-        expected_file = queue.queue_dir / f"{entry.trigger_hash}.queued.json"
-        assert expected_file.exists()
-
-        # Update status and check file renamed
-        queue.update_status(entry.trigger_hash, QueueEntryStatus.PASSED)
-        assert not expected_file.exists()
-        passed_file = queue.queue_dir / f"{entry.trigger_hash}.passed.json"
-        assert passed_file.exists()
diff --git a/tests/unit/test_schema_validation.py b/tests/unit/test_schema_validation.py
deleted file mode 100644
index c77fc7a0..00000000
--- a/tests/unit/test_schema_validation.py
+++ /dev/null
@@ -1,360 +0,0 @@
-"""Tests for schema validation (SV-8.x from test_scenarios.md)."""
-
-from pathlib import Path
-
-import pytest
-
-from deepwork.core.rules_parser import RulesParseError, parse_rule_file
-
-
-class TestRequiredFields:
-    """Tests for required field validation (SV-8.1.x)."""
-
-    def test_missing_name(self, tmp_path: Path) -> None:
-        """SV-8.1.1: Missing name field."""
-        rule_file = tmp_path / "test.md"
-        rule_file.write_text(
-            """---
-trigger: "src/**/*"
-compare_to: base
----
-Instructions here.
-"""
-        )
-
-        with pytest.raises(RulesParseError, match="name"):
-            parse_rule_file(rule_file)
-
-    def test_missing_detection_mode(self, tmp_path: Path) -> None:
-        """SV-8.1.2: Missing trigger, set, or pair."""
-        rule_file = tmp_path / "test.md"
-        rule_file.write_text(
-            """---
-name: Test Rule
-compare_to: base
----
-Instructions here.
-"""
-        )
-
-        with pytest.raises(RulesParseError):
-            parse_rule_file(rule_file)
-
-    def test_missing_compare_to(self, tmp_path: Path) -> None:
-        """SV-8.1.5: Missing compare_to field."""
-        rule_file = tmp_path / "test.md"
-        rule_file.write_text(
-            """---
-name: Test Rule
-trigger: "src/**/*"
----
-Instructions here.
-"""
-        )
-
-        with pytest.raises(RulesParseError, match="compare_to"):
-            parse_rule_file(rule_file)
-
-    def test_missing_markdown_body(self, tmp_path: Path) -> None:
-        """SV-8.1.3: Missing markdown body (for prompt action)."""
-        rule_file = tmp_path / "test.md"
-        rule_file.write_text(
-            """---
-name: Test Rule
-trigger: "src/**/*"
-compare_to: base
----
-"""
-        )
-
-        with pytest.raises(RulesParseError, match="markdown body|instructions"):
-            parse_rule_file(rule_file)
-
-    def test_set_requires_two_patterns(self, tmp_path: Path) -> None:
-        """SV-8.1.4: Set requires at least 2 patterns.
-
-        Note: Schema validation catches this before rule parser.
-        """
-        rule_file = tmp_path / "test.md"
-        rule_file.write_text(
-            """---
-name: Test Rule
-set:
-  - src/{path}.py
-compare_to: base
----
-Instructions here.
-"""
-        )
-
-        # Schema validation will fail due to minItems: 2
-        with pytest.raises(RulesParseError):
-            parse_rule_file(rule_file)
-
-
-class TestMutuallyExclusiveFields:
-    """Tests for mutually exclusive field validation (SV-8.2.x)."""
-
-    def test_both_trigger_and_set(self, tmp_path: Path) -> None:
-        """SV-8.2.1: Both trigger and set is invalid."""
-        rule_file = tmp_path / "test.md"
-        rule_file.write_text(
-            """---
-name: Test Rule
-trigger: "src/**/*"
-set:
-  - src/{path}.py
-  - tests/{path}_test.py
-compare_to: base
----
-Instructions here.
-"""
-        )
-
-        with pytest.raises(RulesParseError):
-            parse_rule_file(rule_file)
-
-    def test_both_trigger_and_pair(self, tmp_path: Path) -> None:
-        """SV-8.2.2: Both trigger and pair is invalid."""
-        rule_file = tmp_path / "test.md"
-        rule_file.write_text(
-            """---
-name: Test Rule
-trigger: "src/**/*"
-pair:
-  trigger: api/{path}.py
-  expects: docs/{path}.md
-compare_to: base
----
-Instructions here.
-"""
-        )
-
-        with pytest.raises(RulesParseError):
-            parse_rule_file(rule_file)
-
-    def test_all_detection_modes(self, tmp_path: Path) -> None:
-        """SV-8.2.3: All three detection modes is invalid."""
-        rule_file = tmp_path / "test.md"
-        rule_file.write_text(
-            """---
-name: Test Rule
-trigger: "src/**/*"
-set:
-  - src/{path}.py
-  - tests/{path}_test.py
-pair:
-  trigger: api/{path}.py
-  expects: docs/{path}.md
-compare_to: base
----
-Instructions here.
-"""
-        )
-
-        with pytest.raises(RulesParseError):
-            parse_rule_file(rule_file)
-
-
-class TestValueValidation:
-    """Tests for value validation (SV-8.4.x)."""
-
-    def test_invalid_compare_to(self, tmp_path: Path) -> None:
-        """SV-8.4.1: Invalid compare_to value."""
-        rule_file = tmp_path / "test.md"
-        rule_file.write_text(
-            """---
-name: Test Rule
-trigger: "src/**/*"
-compare_to: yesterday
----
-Instructions here.
-"""
-        )
-
-        with pytest.raises(RulesParseError):
-            parse_rule_file(rule_file)
-
-    def test_invalid_run_for(self, tmp_path: Path) -> None:
-        """SV-8.4.2: Invalid run_for value."""
-        rule_file = tmp_path / "test.md"
-        rule_file.write_text(
-            """---
-name: Test Rule
-trigger: "**/*.py"
-action:
-  command: "ruff format {file}"
-  run_for: first_match
-compare_to: prompt
----
-"""
-        )
-
-        with pytest.raises(RulesParseError):
-            parse_rule_file(rule_file)
-
-
-class TestValidRules:
-    """Tests for valid rule parsing."""
-
-    def test_valid_trigger_safety_rule(self, tmp_path: Path) -> None:
-        """Valid trigger/safety rule parses successfully."""
-        rule_file = tmp_path / "test.md"
-        rule_file.write_text(
-            """---
-name: Test Rule
-trigger: "src/**/*"
-safety: README.md
-compare_to: base
----
-Please check the code.
-"""
-        )
-
-        rule = parse_rule_file(rule_file)
-        assert rule.name == "Test Rule"
-        assert rule.triggers == ["src/**/*"]
-        assert rule.safety == ["README.md"]
-        assert rule.compare_to == "base"
-
-    def test_valid_set_rule(self, tmp_path: Path) -> None:
-        """Valid set rule parses successfully."""
-        rule_file = tmp_path / "test.md"
-        rule_file.write_text(
-            """---
-name: Source/Test Pairing
-set:
-  - src/{path}.py
-  - tests/{path}_test.py
-compare_to: base
----
-Source and test should change together.
-"""
-        )
-
-        rule = parse_rule_file(rule_file)
-        assert rule.name == "Source/Test Pairing"
-        assert len(rule.set_patterns) == 2
-        assert rule.compare_to == "base"
-
-    def test_valid_pair_rule(self, tmp_path: Path) -> None:
-        """Valid pair rule parses successfully."""
-        rule_file = tmp_path / "test.md"
-        rule_file.write_text(
-            """---
-name: API Documentation
-pair:
-  trigger: api/{module}.py
-  expects: docs/api/{module}.md
-compare_to: base
----
-API changes need documentation.
-"""
-        )
-
-        rule = parse_rule_file(rule_file)
-        assert rule.name == "API Documentation"
-        assert rule.pair_config is not None
-        assert rule.pair_config.trigger == "api/{module}.py"
-        assert rule.pair_config.expects == ["docs/api/{module}.md"]
-        assert rule.compare_to == "base"
-
-    def test_valid_command_rule(self, tmp_path: Path) -> None:
-        """Valid command rule parses successfully."""
-        rule_file = tmp_path / "test.md"
-        rule_file.write_text(
-            """---
-name: Format Python
-trigger: "**/*.py"
-action:
-  command: "ruff format {file}"
-  run_for: each_match
-compare_to: prompt
----
-"""
-        )
-
-        rule = parse_rule_file(rule_file)
-        assert rule.name == "Format Python"
-        assert rule.command_action is not None
-        assert rule.command_action.command == "ruff format {file}"
-        assert rule.command_action.run_for == "each_match"
-        assert rule.compare_to == "prompt"
-
-    def test_valid_compare_to_values(self, tmp_path: Path) -> None:
-        """Valid compare_to values parse successfully."""
-        for compare_to in ["base", "default_tip", "prompt"]:
-            rule_file = tmp_path / "test.md"
-            rule_file.write_text(
-                f"""---
-name: Test Rule
-trigger: "src/**/*"
-compare_to: {compare_to}
----
-Instructions here.
-"""
-            )
-
-            rule = parse_rule_file(rule_file)
-            assert rule.compare_to == compare_to
-
-    def test_multiple_triggers(self, tmp_path: Path) -> None:
-        """Multiple triggers as array parses successfully."""
-        rule_file = tmp_path / "test.md"
-        rule_file.write_text(
-            """---
-name: Test Rule
-trigger:
-  - src/**/*.py
-  - lib/**/*.py
-compare_to: base
----
-Instructions here.
-"""
-        )
-
-        rule = parse_rule_file(rule_file)
-        assert rule.triggers == ["src/**/*.py", "lib/**/*.py"]
-        assert rule.compare_to == "base"
-
-    def test_multiple_safety_patterns(self, tmp_path: Path) -> None:
-        """Multiple safety patterns as array parses successfully."""
-        rule_file = tmp_path / "test.md"
-        rule_file.write_text(
-            """---
-name: Test Rule
-trigger: src/**/*
-safety:
-  - README.md
-  - CHANGELOG.md
-compare_to: base
----
-Instructions here.
-"""
-        )
-
-        rule = parse_rule_file(rule_file)
-        assert rule.safety == ["README.md", "CHANGELOG.md"]
-        assert rule.compare_to == "base"
-
-    def test_multiple_expects(self, tmp_path: Path) -> None:
-        """Multiple expects patterns parses successfully."""
-        rule_file = tmp_path / "test.md"
-        rule_file.write_text(
-            """---
-name: Test Rule
-pair:
-  trigger: api/{module}.py
-  expects:
-    - docs/api/{module}.md
-    - openapi/{module}.yaml
-compare_to: base
----
-Instructions here.
-"""
-        )
-
-        rule = parse_rule_file(rule_file)
-        assert rule.pair_config is not None
-        assert rule.pair_config.expects == ["docs/api/{module}.md", "openapi/{module}.yaml"]
-        assert rule.compare_to == "base"
diff --git a/tests/unit/test_stop_hooks.py b/tests/unit/test_stop_hooks.py
deleted file mode 100644
index 96cdeb5b..00000000
--- a/tests/unit/test_stop_hooks.py
+++ /dev/null
@@ -1,860 +0,0 @@
-"""Tests for stop hook functionality."""
-
-from pathlib import Path
-
-import pytest
-
-from deepwork.core.adapters import ClaudeAdapter
-from deepwork.core.generator import GeneratorError, SkillGenerator
-from deepwork.core.parser import HookAction, JobDefinition, OutputSpec, Step, StopHook
-from deepwork.schemas.job_schema import JOB_SCHEMA
-from deepwork.utils.validation import ValidationError, validate_against_schema
-
-
-class TestStopHook:
-    """Tests for StopHook dataclass."""
-
-    def test_is_prompt(self) -> None:
-        """Test is_prompt returns True for prompt hooks."""
-        hook = StopHook(prompt="Check quality")
-        assert hook.is_prompt() is True
-        assert hook.is_prompt_file() is False
-        assert hook.is_script() is False
-
-    def test_is_prompt_file(self) -> None:
-        """Test is_prompt_file returns True for prompt file hooks."""
-        hook = StopHook(prompt_file="hooks/check.md")
-        assert hook.is_prompt() is False
-        assert hook.is_prompt_file() is True
-        assert hook.is_script() is False
-
-    def test_is_script(self) -> None:
-        """Test is_script returns True for script hooks."""
-        hook = StopHook(script="hooks/validate.sh")
-        assert hook.is_prompt() is False
-        assert hook.is_prompt_file() is False
-        assert hook.is_script() is True
-
-    def test_from_dict_prompt(self) -> None:
-        """Test from_dict creates prompt hook."""
-        data = {"prompt": "Verify all criteria are met"}
-        hook = StopHook.from_dict(data)
-        assert hook.prompt == "Verify all criteria are met"
-        assert hook.prompt_file is None
-        assert hook.script is None
-
-    def test_from_dict_prompt_file(self) -> None:
-        """Test from_dict creates prompt file hook."""
-        data = {"prompt_file": "hooks/quality.md"}
-        hook = StopHook.from_dict(data)
-        assert hook.prompt is None
-        assert hook.prompt_file == "hooks/quality.md"
-        assert hook.script is None
-
-    def test_from_dict_script(self) -> None:
-        """Test from_dict creates script hook."""
-        data = {"script": "hooks/validate.sh"}
-        hook = StopHook.from_dict(data)
-        assert hook.prompt is None
-        assert hook.prompt_file is None
-        assert hook.script == "hooks/validate.sh"
-
-
-class TestStepWithStopHooks:
-    """Tests for Step with stop_hooks."""
-
-    def test_step_with_no_stop_hooks(self) -> None:
-        """Test step without stop hooks."""
-        step = Step(
-            id="test",
-            name="Test Step",
-            description="A test step",
-            instructions_file="steps/test.md",
-            outputs=[OutputSpec(file="output.md")],
-        )
-        assert step.stop_hooks == []
-
-    def test_step_with_single_stop_hook(self) -> None:
-        """Test step with single stop hook (using hooks dict)."""
-        step = Step(
-            id="test",
-            name="Test Step",
-            description="A test step",
-            instructions_file="steps/test.md",
-            outputs=[OutputSpec(file="output.md")],
-            hooks={"after_agent": [HookAction(prompt="Check quality")]},
-        )
-        assert len(step.stop_hooks) == 1
-        assert step.stop_hooks[0].is_prompt()
-        assert step.stop_hooks[0].prompt == "Check quality"
-
-    def test_step_with_multiple_stop_hooks(self) -> None:
-        """Test step with multiple stop hooks (using hooks dict)."""
-        step = Step(
-            id="test",
-            name="Test Step",
-            description="A test step",
-            instructions_file="steps/test.md",
-            outputs=[OutputSpec(file="output.md")],
-            hooks={
-                "after_agent": [
-                    HookAction(prompt="Check criteria 1"),
-                    HookAction(script="hooks/validate.sh"),
-                ]
-            },
-        )
-        assert len(step.stop_hooks) == 2
-        assert step.stop_hooks[0].is_prompt()
-        assert step.stop_hooks[1].is_script()
-
-    def test_step_from_dict_with_stop_hooks(self) -> None:
-        """Test Step.from_dict parses stop_hooks array."""
-        data = {
-            "id": "test",
-            "name": "Test Step",
-            "description": "A test step",
-            "instructions_file": "steps/test.md",
-            "outputs": ["output.md"],
-            "stop_hooks": [
-                {"prompt": "Check quality criteria"},
-                {"script": "hooks/run_tests.sh"},
-            ],
-        }
-        step = Step.from_dict(data)
-        assert len(step.stop_hooks) == 2
-        assert step.stop_hooks[0].prompt == "Check quality criteria"
-        assert step.stop_hooks[1].script == "hooks/run_tests.sh"
-
-    def test_step_from_dict_without_stop_hooks(self) -> None:
-        """Test Step.from_dict with no stop_hooks returns empty list."""
-        data = {
-            "id": "test",
-            "name": "Test Step",
-            "description": "A test step",
-            "instructions_file": "steps/test.md",
-            "outputs": ["output.md"],
-        }
-        step = Step.from_dict(data)
-        assert step.stop_hooks == []
-
-    def test_step_from_dict_with_hooks_structure(self) -> None:
-        """Test Step.from_dict parses new hooks structure with lifecycle events."""
-        data = {
-            "id": "test",
-            "name": "Test Step",
-            "description": "A test step",
-            "instructions_file": "steps/test.md",
-            "outputs": ["output.md"],
-            "hooks": {
-                "after_agent": [
-                    {"prompt": "Check quality"},
-                    {"script": "hooks/validate.sh"},
-                ],
-                "before_tool": [
-                    {"prompt": "Pre-tool check"},
-                ],
-            },
-        }
-        step = Step.from_dict(data)
-        # stop_hooks property returns after_agent hooks
-        assert len(step.stop_hooks) == 2
-        assert step.stop_hooks[0].prompt == "Check quality"
-        assert step.stop_hooks[1].script == "hooks/validate.sh"
-        # Check full hooks dict
-        assert "after_agent" in step.hooks
-        assert "before_tool" in step.hooks
-        assert len(step.hooks["after_agent"]) == 2
-        assert len(step.hooks["before_tool"]) == 1
-
-
-class TestSchemaValidation:
-    """Tests for stop_hooks schema validation."""
-
-    def test_valid_prompt_stop_hook(self) -> None:
-        """Test schema accepts valid prompt stop hook."""
-        job_data = {
-            "name": "test_job",
-            "version": "1.0.0",
-            "summary": "Test job",
-            "steps": [
-                {
-                    "id": "step1",
-                    "name": "Step 1",
-                    "description": "A step",
-                    "instructions_file": "steps/step1.md",
-                    "outputs": ["output.md"],
-                    "stop_hooks": [{"prompt": "Check quality"}],
-                }
-            ],
-        }
-        # Should not raise
-        validate_against_schema(job_data, JOB_SCHEMA)
-
-    def test_valid_script_stop_hook(self) -> None:
-        """Test schema accepts valid script stop hook."""
-        job_data = {
-            "name": "test_job",
-            "version": "1.0.0",
-            "summary": "Test job",
-            "steps": [
-                {
-                    "id": "step1",
-                    "name": "Step 1",
-                    "description": "A step",
-                    "instructions_file": "steps/step1.md",
-                    "outputs": ["output.md"],
-                    "stop_hooks": [{"script": "hooks/validate.sh"}],
-                }
-            ],
-        }
-        validate_against_schema(job_data, JOB_SCHEMA)
-
-    def test_valid_prompt_file_stop_hook(self) -> None:
-        """Test schema accepts valid prompt_file stop hook."""
-        job_data = {
-            "name": "test_job",
-            "version": "1.0.0",
-            "summary": "Test job",
-            "steps": [
-                {
-                    "id": "step1",
-                    "name": "Step 1",
-                    "description": "A step",
-                    "instructions_file": "steps/step1.md",
-                    "outputs": ["output.md"],
-                    "stop_hooks": [{"prompt_file": "hooks/quality.md"}],
-                }
-            ],
-        }
-        validate_against_schema(job_data, JOB_SCHEMA)
-
-    def test_valid_multiple_stop_hooks(self) -> None:
-        """Test schema accepts multiple stop hooks."""
-        job_data = {
-            "name": "test_job",
-            "version": "1.0.0",
-            "summary": "Test job",
-            "steps": [
-                {
-                    "id": "step1",
-                    "name": "Step 1",
-                    "description": "A step",
-                    "instructions_file": "steps/step1.md",
-                    "outputs": ["output.md"],
-                    "stop_hooks": [
-                        {"prompt": "Check quality"},
-                        {"script": "hooks/tests.sh"},
-                    ],
-                }
-            ],
-        }
-        validate_against_schema(job_data, JOB_SCHEMA)
-
-    def test_invalid_stop_hook_missing_type(self) -> None:
-        """Test schema rejects stop hook without type."""
-        job_data = {
-            "name": "test_job",
-            "version": "1.0.0",
-            "summary": "Test job",
-            "steps": [
-                {
-                    "id": "step1",
-                    "name": "Step 1",
-                    "description": "A step",
-                    "instructions_file": "steps/step1.md",
-                    "outputs": ["output.md"],
-                    "stop_hooks": [{}],  # Empty object
-                }
-            ],
-        }
-        with pytest.raises(ValidationError):
-            validate_against_schema(job_data, JOB_SCHEMA)
-
-    def test_invalid_stop_hook_extra_fields(self) -> None:
-        """Test schema rejects stop hook with extra fields."""
-        job_data = {
-            "name": "test_job",
-            "version": "1.0.0",
-            "summary": "Test job",
-            "steps": [
-                {
-                    "id": "step1",
-                    "name": "Step 1",
-                    "description": "A step",
-                    "instructions_file": "steps/step1.md",
-                    "outputs": ["output.md"],
-                    "stop_hooks": [{"prompt": "Check", "extra": "field"}],
-                }
-            ],
-        }
-        with pytest.raises(ValidationError):
-            validate_against_schema(job_data, JOB_SCHEMA)
-
-    def test_valid_hooks_with_after_agent(self) -> None:
-        """Test schema accepts new hooks structure with after_agent event."""
-        job_data = {
-            "name": "test_job",
-            "version": "1.0.0",
-            "summary": "Test job",
-            "steps": [
-                {
-                    "id": "step1",
-                    "name": "Step 1",
-                    "description": "A step",
-                    "instructions_file": "steps/step1.md",
-                    "outputs": ["output.md"],
-                    "hooks": {
-                        "after_agent": [{"prompt": "Check quality"}],
-                    },
-                }
-            ],
-        }
-        validate_against_schema(job_data, JOB_SCHEMA)
-
-    def test_valid_hooks_with_multiple_events(self) -> None:
-        """Test schema accepts hooks with multiple lifecycle events."""
-        job_data = {
-            "name": "test_job",
-            "version": "1.0.0",
-            "summary": "Test job",
-            "steps": [
-                {
-                    "id": "step1",
-                    "name": "Step 1",
-                    "description": "A step",
-                    "instructions_file": "steps/step1.md",
-                    "outputs": ["output.md"],
-                    "hooks": {
-                        "after_agent": [{"prompt": "Check quality"}],
-                        "before_tool": [{"script": "hooks/validate.sh"}],
-                        "before_prompt": [{"prompt": "Initialize context"}],
-                    },
-                }
-            ],
-        }
-        validate_against_schema(job_data, JOB_SCHEMA)
-
-    def test_valid_hooks_with_script_action(self) -> None:
-        """Test schema accepts hooks with script action."""
-        job_data = {
-            "name": "test_job",
-            "version": "1.0.0",
-            "summary": "Test job",
-            "steps": [
-                {
-                    "id": "step1",
-                    "name": "Step 1",
-                    "description": "A step",
-                    "instructions_file": "steps/step1.md",
-                    "outputs": ["output.md"],
-                    "hooks": {
-                        "before_tool": [{"script": "hooks/check.sh"}],
-                    },
-                }
-            ],
-        }
-        validate_against_schema(job_data, JOB_SCHEMA)
-
-
-class TestGeneratorStopHooks:
-    """Tests for generator stop hooks context building."""
-
-    @pytest.fixture
-    def generator(self, tmp_path: Path) -> SkillGenerator:
-        """Create generator with temp templates."""
-        templates_dir = tmp_path / "templates"
-        claude_dir = templates_dir / "claude"
-        claude_dir.mkdir(parents=True)
-
-        # Create minimal template
-        template_content = """---
-description: {{ step_description }}
-{% if stop_hooks %}
-hooks:
-  Stop:
-    - hooks:
-{% for hook in stop_hooks %}
-{% if hook.type == "script" %}
-        - type: command
-          command: ".deepwork/jobs/{{ job_name }}/{{ hook.path }}"
-{% else %}
-        - type: prompt
-          prompt: "{{ hook.content }}"
-{% endif %}
-{% endfor %}
-{% endif %}
----
-# {{ job_name }}.{{ step_id }}
-{{ instructions_content }}
-"""
-        (claude_dir / "skill-job-step.md.jinja").write_text(template_content)
-        return SkillGenerator(templates_dir)
-
-    @pytest.fixture
-    def job_with_hooks(self, tmp_path: Path) -> JobDefinition:
-        """Create job with stop hooks."""
-        job_dir = tmp_path / "test_job"
-        job_dir.mkdir()
-        steps_dir = job_dir / "steps"
-        steps_dir.mkdir()
-        (steps_dir / "step1.md").write_text("# Step 1 Instructions")
-
-        return JobDefinition(
-            name="test_job",
-            version="1.0.0",
-            summary="Test job",
-            description="A test job",
-            steps=[
-                Step(
-                    id="step1",
-                    name="Step 1",
-                    description="First step",
-                    instructions_file="steps/step1.md",
-                    outputs=[OutputSpec(file="output.md")],
-                    hooks={
-                        "after_agent": [HookAction(prompt="Verify quality criteria")],
-                    },
-                ),
-            ],
-            job_dir=job_dir,
-        )
-
-    @pytest.fixture
-    def job_with_script_hook(self, tmp_path: Path) -> JobDefinition:
-        """Create job with script stop hook."""
-        job_dir = tmp_path / "test_job"
-        job_dir.mkdir()
-        steps_dir = job_dir / "steps"
-        steps_dir.mkdir()
-        (steps_dir / "step1.md").write_text("# Step 1 Instructions")
-
-        return JobDefinition(
-            name="test_job",
-            version="1.0.0",
-            summary="Test job",
-            description="A test job",
-            steps=[
-                Step(
-                    id="step1",
-                    name="Step 1",
-                    description="First step",
-                    instructions_file="steps/step1.md",
-                    outputs=[OutputSpec(file="output.md")],
-                    hooks={
-                        "after_agent": [HookAction(script="hooks/validate.sh")],
-                    },
-                ),
-            ],
-            job_dir=job_dir,
-        )
-
-    @pytest.fixture
-    def job_with_prompt_file_hook(self, tmp_path: Path) -> JobDefinition:
-        """Create job with prompt file stop hook."""
-        job_dir = tmp_path / "test_job"
-        job_dir.mkdir()
-        steps_dir = job_dir / "steps"
-        steps_dir.mkdir()
-        hooks_dir = job_dir / "hooks"
-        hooks_dir.mkdir()
-        (steps_dir / "step1.md").write_text("# Step 1 Instructions")
-        (hooks_dir / "quality.md").write_text("Check all quality criteria")
-
-        return JobDefinition(
-            name="test_job",
-            version="1.0.0",
-            summary="Test job",
-            description="A test job",
-            steps=[
-                Step(
-                    id="step1",
-                    name="Step 1",
-                    description="First step",
-                    instructions_file="steps/step1.md",
-                    outputs=[OutputSpec(file="output.md")],
-                    hooks={
-                        "after_agent": [HookAction(prompt_file="hooks/quality.md")],
-                    },
-                ),
-            ],
-            job_dir=job_dir,
-        )
-
-    def test_build_context_with_prompt_hook(
-        self, generator: SkillGenerator, job_with_hooks: JobDefinition
-    ) -> None:
-        """Test context building includes prompt stop hook."""
-        adapter = ClaudeAdapter()
-        context = generator._build_step_context(job_with_hooks, job_with_hooks.steps[0], 0, adapter)
-        assert "stop_hooks" in context
-        assert len(context["stop_hooks"]) == 1
-        assert context["stop_hooks"][0]["type"] == "prompt"
-        assert context["stop_hooks"][0]["content"] == "Verify quality criteria"
-
-    def test_build_context_with_script_hook(
-        self, generator: SkillGenerator, job_with_script_hook: JobDefinition
-    ) -> None:
-        """Test context building includes script stop hook."""
-        adapter = ClaudeAdapter()
-        context = generator._build_step_context(
-            job_with_script_hook, job_with_script_hook.steps[0], 0, adapter
-        )
-        assert "stop_hooks" in context
-        assert len(context["stop_hooks"]) == 1
-        assert context["stop_hooks"][0]["type"] == "script"
-        assert context["stop_hooks"][0]["path"] == "hooks/validate.sh"
-
-    def test_build_context_with_prompt_file_hook(
-        self, generator: SkillGenerator, job_with_prompt_file_hook: JobDefinition
-    ) -> None:
-        """Test context building reads prompt file content."""
-        adapter = ClaudeAdapter()
-        context = generator._build_step_context(
-            job_with_prompt_file_hook, job_with_prompt_file_hook.steps[0], 0, adapter
-        )
-        assert "stop_hooks" in context
-        assert len(context["stop_hooks"]) == 1
-        assert context["stop_hooks"][0]["type"] == "prompt_file"
-        assert context["stop_hooks"][0]["content"] == "Check all quality criteria"
-
-    def test_build_context_with_missing_prompt_file(
-        self, generator: SkillGenerator, tmp_path: Path
-    ) -> None:
-        """Test error when prompt file is missing."""
-        job_dir = tmp_path / "test_job"
-        job_dir.mkdir()
-        steps_dir = job_dir / "steps"
-        steps_dir.mkdir()
-        (steps_dir / "step1.md").write_text("# Step 1")
-
-        job = JobDefinition(
-            name="test_job",
-            version="1.0.0",
-            summary="Test",
-            description="Test",
-            steps=[
-                Step(
-                    id="step1",
-                    name="Step 1",
-                    description="Step",
-                    instructions_file="steps/step1.md",
-                    outputs=[OutputSpec(file="out.md")],
-                    hooks={
-                        "after_agent": [HookAction(prompt_file="missing.md")],
-                    },
-                )
-            ],
-            job_dir=job_dir,
-        )
-
-        adapter = ClaudeAdapter()
-        with pytest.raises(GeneratorError, match="prompt file not found"):
-            generator._build_step_context(job, job.steps[0], 0, adapter)
-
-    def test_build_context_no_hooks(self, generator: SkillGenerator, tmp_path: Path) -> None:
-        """Test context with no stop hooks."""
-        job_dir = tmp_path / "test_job"
-        job_dir.mkdir()
-        steps_dir = job_dir / "steps"
-        steps_dir.mkdir()
-        (steps_dir / "step1.md").write_text("# Step 1")
-
-        job = JobDefinition(
-            name="test_job",
-            version="1.0.0",
-            summary="Test",
-            description="Test",
-            steps=[
-                Step(
-                    id="step1",
-                    name="Step 1",
-                    description="Step",
-                    instructions_file="steps/step1.md",
-                    outputs=[OutputSpec(file="out.md")],
-                )
-            ],
-            job_dir=job_dir,
-        )
-
-        adapter = ClaudeAdapter()
-        context = generator._build_step_context(job, job.steps[0], 0, adapter)
-        assert context["stop_hooks"] == []
-
-    def test_build_context_multiple_hooks(self, generator: SkillGenerator, tmp_path: Path) -> None:
-        """Test context with multiple stop hooks."""
-        job_dir = tmp_path / "test_job"
-        job_dir.mkdir()
-        steps_dir = job_dir / "steps"
-        steps_dir.mkdir()
-        (steps_dir / "step1.md").write_text("# Step 1")
-
-        job = JobDefinition(
-            name="test_job",
-            version="1.0.0",
-            summary="Test",
-            description="Test",
-            steps=[
-                Step(
-                    id="step1",
-                    name="Step 1",
-                    description="Step",
-                    instructions_file="steps/step1.md",
-                    outputs=[OutputSpec(file="out.md")],
-                    hooks={
-                        "after_agent": [
-                            HookAction(prompt="Check criteria 1"),
-                            HookAction(script="hooks/test.sh"),
-                            HookAction(prompt="Check criteria 2"),
-                        ],
-                    },
-                )
-            ],
-            job_dir=job_dir,
-        )
-
-        adapter = ClaudeAdapter()
-        context = generator._build_step_context(job, job.steps[0], 0, adapter)
-        assert len(context["stop_hooks"]) == 3
-        assert context["stop_hooks"][0]["type"] == "prompt"
-        assert context["stop_hooks"][1]["type"] == "script"
-        assert context["stop_hooks"][2]["type"] == "prompt"
-
-    def test_build_context_duplicates_stop_to_subagent_stop(
-        self, generator: SkillGenerator, job_with_hooks: JobDefinition
-    ) -> None:
-        """Test that Stop hooks are also registered for SubagentStop event.
-
-        Claude Code has separate Stop and SubagentStop events. When a Stop hook
-        is defined, it should also be registered for SubagentStop so the hook
-        triggers for both the main agent and subagents.
-        """
-        adapter = ClaudeAdapter()
-        context = generator._build_step_context(job_with_hooks, job_with_hooks.steps[0], 0, adapter)
-
-        # Should have both Stop and SubagentStop in hooks dict
-        assert "hooks" in context
-        assert "Stop" in context["hooks"]
-        assert "SubagentStop" in context["hooks"]
-
-        # Both should have the same hooks
-        assert context["hooks"]["Stop"] == context["hooks"]["SubagentStop"]
-        assert len(context["hooks"]["Stop"]) == 1
-        assert context["hooks"]["Stop"][0]["type"] == "prompt"
-
-    def test_build_context_no_subagent_stop_without_stop(
-        self, generator: SkillGenerator, tmp_path: Path
-    ) -> None:
-        """Test that SubagentStop is not created if there are no Stop hooks."""
-        job_dir = tmp_path / "test_job"
-        job_dir.mkdir()
-        steps_dir = job_dir / "steps"
-        steps_dir.mkdir()
-        (steps_dir / "step1.md").write_text("# Step 1")
-
-        job = JobDefinition(
-            name="test_job",
-            version="1.0.0",
-            summary="Test",
-            description="Test",
-            steps=[
-                Step(
-                    id="step1",
-                    name="Step 1",
-                    description="Step",
-                    instructions_file="steps/step1.md",
-                    outputs=[OutputSpec(file="out.md")],
-                )
-            ],
-            job_dir=job_dir,
-        )
-
-        adapter = ClaudeAdapter()
-        context = generator._build_step_context(job, job.steps[0], 0, adapter)
-
-        # Should not have Stop or SubagentStop without any hooks
-        assert "hooks" in context
-        assert "Stop" not in context["hooks"]
-        assert "SubagentStop" not in context["hooks"]
-
-
-class TestGeneratorTemplateOutput:
-    """Tests for generated skill file output."""
-
-    @pytest.fixture
-    def full_generator(self) -> SkillGenerator:
-        """Create generator using actual package templates."""
-        # Use the actual templates directory from the package
-        templates_dir = Path(__file__).parent.parent.parent / "src" / "deepwork" / "templates"
-        return SkillGenerator(templates_dir)
-
-    @pytest.fixture
-    def job_with_quality_criteria(self, tmp_path: Path) -> JobDefinition:
-        """Create job with quality_criteria for testing template output."""
-        job_dir = tmp_path / "test_job"
-        job_dir.mkdir()
-        steps_dir = job_dir / "steps"
-        steps_dir.mkdir()
-        (steps_dir / "step1.md").write_text("# Step 1 Instructions\n\nDo the thing.")
-
-        return JobDefinition(
-            name="test_job",
-            version="1.0.0",
-            summary="Test job",
-            description="A test job",
-            steps=[
-                Step(
-                    id="step1",
-                    name="Step 1",
-                    description="First step",
-                    instructions_file="steps/step1.md",
-                    outputs=[OutputSpec(file="output.md")],
-                    quality_criteria=["Criterion 1 is met", "Criterion 2 is verified"],
-                ),
-            ],
-            job_dir=job_dir,
-        )
-
-    @pytest.fixture
-    def job_with_stop_hooks(self, tmp_path: Path) -> JobDefinition:
-        """Create job with custom stop hooks for testing template output."""
-        job_dir = tmp_path / "test_job"
-        job_dir.mkdir()
-        steps_dir = job_dir / "steps"
-        steps_dir.mkdir()
-        (steps_dir / "step1.md").write_text("# Step 1 Instructions")
-
-        return JobDefinition(
-            name="test_job",
-            version="1.0.0",
-            summary="Test job",
-            description="A test job",
-            steps=[
-                Step(
-                    id="step1",
-                    name="Step 1",
-                    description="First step",
-                    instructions_file="steps/step1.md",
-                    outputs=[OutputSpec(file="output.md")],
-                    hooks={
-                        "after_agent": [HookAction(prompt="Custom validation prompt")],
-                    },
-                ),
-            ],
-            job_dir=job_dir,
-        )
-
-    def test_template_generates_subagent_review_for_quality_criteria(
-        self,
-        full_generator: SkillGenerator,
-        job_with_quality_criteria: JobDefinition,
-        tmp_path: Path,
-    ) -> None:
-        """Test that template generates sub-agent review instructions for quality_criteria.
-
-        NOTE: Prompt-based stop hooks don't work in Claude Code (issue #20221).
-        Instead, quality_criteria generates sub-agent review instructions in content.
-        """
-        adapter = ClaudeAdapter()
-        skill_path = full_generator.generate_step_skill(
-            job_with_quality_criteria,
-            job_with_quality_criteria.steps[0],
-            adapter,
-            tmp_path,
-        )
-
-        content = skill_path.read_text()
-
-        # Should NOT generate Stop/SubagentStop hooks (prompt hooks disabled)
-        assert "Stop:" not in content, "Prompt-based Stop hooks should not be generated"
-        assert "SubagentStop:" not in content, (
-            "Prompt-based SubagentStop hooks should not be generated"
-        )
-
-        # Should generate sub-agent review instructions in content
-        assert "## Quality Validation" in content, "Quality Validation section should be generated"
-        assert "sub-agent" in content.lower(), "Sub-agent review instructions should be present"
-        assert "Criterion 1 is met" in content, "Quality criteria should be in content"
-        assert "Criterion 2 is verified" in content, "Quality criteria should be in content"
-
-    def test_template_does_not_generate_prompt_hooks(
-        self, full_generator: SkillGenerator, job_with_stop_hooks: JobDefinition, tmp_path: Path
-    ) -> None:
-        """Test that template does NOT generate prompt-based stop hooks.
-
-        NOTE: Prompt-based stop hooks don't work in Claude Code (issue #20221).
-        The template should filter out prompt hooks and not generate them.
-        """
-        adapter = ClaudeAdapter()
-        skill_path = full_generator.generate_step_skill(
-            job_with_stop_hooks,
-            job_with_stop_hooks.steps[0],
-            adapter,
-            tmp_path,
-        )
-
-        content = skill_path.read_text()
-
-        # Should NOT generate Stop/SubagentStop hooks for prompt-type hooks
-        assert "Stop:" not in content, "Prompt-based Stop hooks should not be generated"
-        assert "SubagentStop:" not in content, (
-            "Prompt-based SubagentStop hooks should not be generated"
-        )
-
-        # The prompt content should NOT appear in the hooks section
-        assert "Custom validation prompt" not in content, (
-            "Prompt content should not be in generated skill"
-        )
-
-    @pytest.fixture
-    def job_with_script_hooks(self, tmp_path: Path) -> JobDefinition:
-        """Create job with script-type stop hooks for testing template output."""
-        job_dir = tmp_path / "test_job"
-        job_dir.mkdir()
-        steps_dir = job_dir / "steps"
-        steps_dir.mkdir()
-        (steps_dir / "step1.md").write_text("# Step 1 Instructions")
-
-        return JobDefinition(
-            name="test_job",
-            version="1.0.0",
-            summary="Test job",
-            description="A test job",
-            steps=[
-                Step(
-                    id="step1",
-                    name="Step 1",
-                    description="First step",
-                    instructions_file="steps/step1.md",
-                    outputs=[OutputSpec(file="output.md")],
-                    hooks={
-                        "after_agent": [HookAction(script="hooks/validate.sh")],
-                    },
-                ),
-            ],
-            job_dir=job_dir,
-        )
-
-    def test_template_generates_stop_hooks_for_script_type(
-        self, full_generator: SkillGenerator, job_with_script_hooks: JobDefinition, tmp_path: Path
-    ) -> None:
-        """Test that template generates Stop/SubagentStop hooks for script-type hooks.
-
-        Script-type hooks (type: command) still work in Claude Code, so they should be generated.
-        """
-        adapter = ClaudeAdapter()
-        skill_path = full_generator.generate_step_skill(
-            job_with_script_hooks,
-            job_with_script_hooks.steps[0],
-            adapter,
-            tmp_path,
-        )
-
-        content = skill_path.read_text()
-
-        # Should generate Stop and SubagentStop hooks for script-type hooks
-        assert "Stop:" in content, "Script-based Stop hooks should be generated"
-        assert "SubagentStop:" in content, "Script-based SubagentStop hooks should be generated"
-
-        # Should contain the command type and path
-        assert "type: command" in content, "Hook should have type: command"
-        assert "hooks/validate.sh" in content, "Hook path should be in generated skill"
diff --git a/tests/unit/test_validation.py b/tests/unit/test_validation.py
index ccd31637..811f4582 100644
--- a/tests/unit/test_validation.py
+++ b/tests/unit/test_validation.py
@@ -22,8 +22,11 @@ def test_validates_simple_job(self) -> None:
                     "name": "Step 1",
                     "description": "First step",
                     "instructions_file": "steps/step1.md",
-                    "outputs": ["output.md"],
+                    "outputs": {
+                        "output.md": {"type": "file", "description": "Output", "required": True}
+                    },
                     "dependencies": [],
+                    "reviews": [],
                 }
             ],
         }
@@ -48,8 +51,11 @@ def test_validates_job_with_user_inputs(self) -> None:
                         {"name": "param1", "description": "First parameter"},
                         {"name": "param2", "description": "Second parameter"},
                     ],
-                    "outputs": ["output.md"],
+                    "outputs": {
+                        "output.md": {"type": "file", "description": "Output", "required": True}
+                    },
                     "dependencies": [],
+                    "reviews": [],
                 }
             ],
         }
@@ -69,8 +75,11 @@ def test_validates_job_with_file_inputs(self) -> None:
                     "name": "Step 1",
                     "description": "First step",
                     "instructions_file": "steps/step1.md",
-                    "outputs": ["data.md"],
+                    "outputs": {
+                        "data.md": {"type": "file", "description": "Data output", "required": True}
+                    },
                     "dependencies": [],
+                    "reviews": [],
                 },
                 {
                     "id": "step2",
@@ -78,8 +87,15 @@ def test_validates_job_with_file_inputs(self) -> None:
                     "description": "Second step",
                     "instructions_file": "steps/step2.md",
                     "inputs": [{"file": "data.md", "from_step": "step1"}],
-                    "outputs": ["result.md"],
+                    "outputs": {
+                        "result.md": {
+                            "type": "file",
+                            "description": "Result output",
+                            "required": True,
+                        }
+                    },
                     "dependencies": ["step1"],
+                    "reviews": [],
                 },
             ],
         }
@@ -112,7 +128,10 @@ def test_raises_for_invalid_job_name(self) -> None:
                     "name": "Step 1",
                     "description": "Step",
                     "instructions_file": "steps/step1.md",
-                    "outputs": ["output.md"],
+                    "outputs": {
+                        "output.md": {"type": "file", "description": "Output", "required": True}
+                    },
+                    "reviews": [],
                 }
             ],
         }
@@ -133,7 +152,10 @@ def test_raises_for_invalid_version(self) -> None:
                     "name": "Step 1",
                     "description": "Step",
                     "instructions_file": "steps/step1.md",
-                    "outputs": ["output.md"],
+                    "outputs": {
+                        "output.md": {"type": "file", "description": "Output", "required": True}
+                    },
+                    "reviews": [],
                 }
             ],
         }
@@ -194,7 +216,10 @@ def test_raises_for_invalid_input_format(self) -> None:
                             # Missing description for user input
                         }
                     ],
-                    "outputs": ["output.md"],
+                    "outputs": {
+                        "output.md": {"type": "file", "description": "Output", "required": True}
+                    },
+                    "reviews": [],
                 }
             ],
         }
@@ -211,3 +236,122 @@ def test_validates_complex_job(self, fixtures_dir) -> None:
 
         assert job_data is not None
         validate_against_schema(job_data, JOB_SCHEMA)
+
+    def test_raises_for_step_missing_reviews(self) -> None:
+        """Test that validation fails for step without reviews field."""
+        job_data = {
+            "name": "job",
+            "version": "1.0.0",
+            "summary": "Missing reviews test",
+            "description": "Job",
+            "steps": [
+                {
+                    "id": "step1",
+                    "name": "Step 1",
+                    "description": "Step",
+                    "instructions_file": "steps/step1.md",
+                    "outputs": {
+                        "output.md": {"type": "file", "description": "Output", "required": True}
+                    },
+                    # Missing reviews - now required
+                }
+            ],
+        }
+
+        with pytest.raises(ValidationError, match="'reviews' is a required property"):
+            validate_against_schema(job_data, JOB_SCHEMA)
+
+    def test_validates_job_with_reviews(self) -> None:
+        """Test validation of job with reviews."""
+        job_data = {
+            "name": "job_with_reviews",
+            "version": "1.0.0",
+            "summary": "Job with reviews",
+            "description": "Job",
+            "steps": [
+                {
+                    "id": "step1",
+                    "name": "Step 1",
+                    "description": "Step",
+                    "instructions_file": "steps/step1.md",
+                    "outputs": {
+                        "report.md": {"type": "file", "description": "Report", "required": True},
+                    },
+                    "reviews": [
+                        {
+                            "run_each": "step",
+                            "quality_criteria": {
+                                "Complete": "Is it complete?",
+                                "Valid": "Is it valid?",
+                            },
+                        },
+                        {
+                            "run_each": "report.md",
+                            "quality_criteria": {
+                                "Well Written": "Is it well written?",
+                            },
+                        },
+                    ],
+                }
+            ],
+        }
+
+        validate_against_schema(job_data, JOB_SCHEMA)
+
+    def test_raises_for_review_missing_run_each(self) -> None:
+        """Test validation fails for review without run_each."""
+        job_data = {
+            "name": "job",
+            "version": "1.0.0",
+            "summary": "Test",
+            "description": "Job",
+            "steps": [
+                {
+                    "id": "step1",
+                    "name": "Step 1",
+                    "description": "Step",
+                    "instructions_file": "steps/step1.md",
+                    "outputs": {
+                        "output.md": {"type": "file", "description": "Output", "required": True}
+                    },
+                    "reviews": [
+                        {
+                            # Missing run_each
+                            "quality_criteria": {"Test": "Is it tested?"},
+                        }
+                    ],
+                }
+            ],
+        }
+
+        with pytest.raises(ValidationError):
+            validate_against_schema(job_data, JOB_SCHEMA)
+
+    def test_raises_for_review_empty_criteria(self) -> None:
+        """Test validation fails for review with empty quality_criteria."""
+        job_data = {
+            "name": "job",
+            "version": "1.0.0",
+            "summary": "Test",
+            "description": "Job",
+            "steps": [
+                {
+                    "id": "step1",
+                    "name": "Step 1",
+                    "description": "Step",
+                    "instructions_file": "steps/step1.md",
+                    "outputs": {
+                        "output.md": {"type": "file", "description": "Output", "required": True}
+                    },
+                    "reviews": [
+                        {
+                            "run_each": "step",
+                            "quality_criteria": {},  # Empty - minProperties: 1
+                        }
+                    ],
+                }
+            ],
+        }
+
+        with pytest.raises(ValidationError):
+            validate_against_schema(job_data, JOB_SCHEMA)
diff --git a/uv.lock b/uv.lock
index 5c61745e..df46f8f2 100644
--- a/uv.lock
+++ b/uv.lock
@@ -2,6 +2,46 @@ version = 1
 revision = 3
 requires-python = ">=3.11"
 
+[[package]]
+name = "aiofiles"
+version = "25.1.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/41/c3/534eac40372d8ee36ef40df62ec129bee4fdb5ad9706e58a29be53b2c970/aiofiles-25.1.0.tar.gz", hash = "sha256:a8d728f0a29de45dc521f18f07297428d56992a742f0cd2701ba86e44d23d5b2", size = 46354, upload-time = "2025-10-09T20:51:04.358Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/bc/8a/340a1555ae33d7354dbca4faa54948d76d89a27ceef032c8c3bc661d003e/aiofiles-25.1.0-py3-none-any.whl", hash = "sha256:abe311e527c862958650f9438e859c1fa7568a141b22abcd015e120e86a85695", size = 14668, upload-time = "2025-10-09T20:51:03.174Z" },
+]
+
+[[package]]
+name = "annotated-types"
+version = "0.7.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/ee/67/531ea369ba64dcff5ec9c3402f9f51bf748cec26dde048a2f973a4eea7f5/annotated_types-0.7.0.tar.gz", hash = "sha256:aff07c09a53a08bc8cfccb9c85b05f1aa9a2a6f23728d790723543408344ce89", size = 16081, upload-time = "2024-05-20T21:33:25.928Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/78/b6/6307fbef88d9b5ee7421e68d78a9f162e0da4900bc5f5793f6d3d0e34fb8/annotated_types-0.7.0-py3-none-any.whl", hash = "sha256:1f02e8b43a8fbbc3f3e0d4f0f4bfc8131bcb4eebe8849b8e5c773f3a1c582a53", size = 13643, upload-time = "2024-05-20T21:33:24.1Z" },
+]
+
+[[package]]
+name = "anyio"
+version = "4.12.1"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "idna" },
+    { name = "typing-extensions", marker = "python_full_version < '3.13'" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/96/f0/5eb65b2bb0d09ac6776f2eb54adee6abe8228ea05b20a5ad0e4945de8aac/anyio-4.12.1.tar.gz", hash = "sha256:41cfcc3a4c85d3f05c932da7c26d0201ac36f72abd4435ba90d0464a3ffed703", size = 228685, upload-time = "2026-01-06T11:45:21.246Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/38/0e/27be9fdef66e72d64c0cdc3cc2823101b80585f8119b5c112c2e8f5f7dab/anyio-4.12.1-py3-none-any.whl", hash = "sha256:d405828884fc140aa80a3c667b8beed277f1dfedec42ba031bd6ac3db606ab6c", size = 113592, upload-time = "2026-01-06T11:45:19.497Z" },
+]
+
+[[package]]
+name = "async-timeout"
+version = "5.0.1"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/a5/ae/136395dfbfe00dfc94da3f3e136d0b13f394cba8f4841120e34226265780/async_timeout-5.0.1.tar.gz", hash = "sha256:d9321a7a3d5a6a5e187e824d2fa0793ce379a202935782d555d6e9d2735677d3", size = 9274, upload-time = "2024-11-06T16:41:39.6Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/fe/ba/e2081de779ca30d473f21f5b30e0e737c438205440784c7dfc81efc2b029/async_timeout-5.0.1-py3-none-any.whl", hash = "sha256:39e3809566ff85354557ec2398b55e096c8364bacac9405a7a1fa429e77fe76c", size = 6233, upload-time = "2024-11-06T16:41:37.9Z" },
+]
+
 [[package]]
 name = "attrs"
 version = "25.4.0"
@@ -11,6 +51,197 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/3a/2a/7cc015f5b9f5db42b7d48157e23356022889fc354a2813c15934b7cb5c0e/attrs-25.4.0-py3-none-any.whl", hash = "sha256:adcf7e2a1fb3b36ac48d97835bb6d8ade15b8dcce26aba8bf1d14847b57a3373", size = 67615, upload-time = "2025-10-06T13:54:43.17Z" },
 ]
 
+[[package]]
+name = "authlib"
+version = "1.6.6"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "cryptography" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/bb/9b/b1661026ff24bc641b76b78c5222d614776b0c085bcfdac9bd15a1cb4b35/authlib-1.6.6.tar.gz", hash = "sha256:45770e8e056d0f283451d9996fbb59b70d45722b45d854d58f32878d0a40c38e", size = 164894, upload-time = "2025-12-12T08:01:41.464Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/54/51/321e821856452f7386c4e9df866f196720b1ad0c5ea1623ea7399969ae3b/authlib-1.6.6-py2.py3-none-any.whl", hash = "sha256:7d9e9bc535c13974313a87f53e8430eb6ea3d1cf6ae4f6efcd793f2e949143fd", size = 244005, upload-time = "2025-12-12T08:01:40.209Z" },
+]
+
+[[package]]
+name = "backports-tarfile"
+version = "1.2.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/86/72/cd9b395f25e290e633655a100af28cb253e4393396264a98bd5f5951d50f/backports_tarfile-1.2.0.tar.gz", hash = "sha256:d75e02c268746e1b8144c278978b6e98e85de6ad16f8e4b0844a154557eca991", size = 86406, upload-time = "2024-05-28T17:01:54.731Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/b9/fa/123043af240e49752f1c4bd24da5053b6bd00cad78c2be53c0d1e8b975bc/backports.tarfile-1.2.0-py3-none-any.whl", hash = "sha256:77e284d754527b01fb1e6fa8a1afe577858ebe4e9dad8919e34c862cb399bc34", size = 30181, upload-time = "2024-05-28T17:01:53.112Z" },
+]
+
+[[package]]
+name = "beartype"
+version = "0.22.9"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/c7/94/1009e248bbfbab11397abca7193bea6626806be9a327d399810d523a07cb/beartype-0.22.9.tar.gz", hash = "sha256:8f82b54aa723a2848a56008d18875f91c1db02c32ef6a62319a002e3e25a975f", size = 1608866, upload-time = "2025-12-13T06:50:30.72Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/71/cc/18245721fa7747065ab478316c7fea7c74777d07f37ae60db2e84f8172e8/beartype-0.22.9-py3-none-any.whl", hash = "sha256:d16c9bbc61ea14637596c5f6fbff2ee99cbe3573e46a716401734ef50c3060c2", size = 1333658, upload-time = "2025-12-13T06:50:28.266Z" },
+]
+
+[[package]]
+name = "cachetools"
+version = "7.0.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/98/af/df70e9b65bc77a1cbe0768c0aa4617147f30f8306ded98c1744bcdc0ae1e/cachetools-7.0.0.tar.gz", hash = "sha256:a9abf18ff3b86c7d05b27ead412e235e16ae045925e531fae38d5fada5ed5b08", size = 35796, upload-time = "2026-02-01T18:59:47.411Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/28/df/2dd32cce20cbcf6f2ec456b58d44368161ad28320729f64e5e1d5d7bd0ae/cachetools-7.0.0-py3-none-any.whl", hash = "sha256:d52fef60e6e964a1969cfb61ccf6242a801b432790fe520d78720d757c81cbd2", size = 13487, upload-time = "2026-02-01T18:59:45.981Z" },
+]
+
+[[package]]
+name = "certifi"
+version = "2026.1.4"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/e0/2d/a891ca51311197f6ad14a7ef42e2399f36cf2f9bd44752b3dc4eab60fdc5/certifi-2026.1.4.tar.gz", hash = "sha256:ac726dd470482006e014ad384921ed6438c457018f4b3d204aea4281258b2120", size = 154268, upload-time = "2026-01-04T02:42:41.825Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/e6/ad/3cc14f097111b4de0040c83a525973216457bbeeb63739ef1ed275c1c021/certifi-2026.1.4-py3-none-any.whl", hash = "sha256:9943707519e4add1115f44c2bc244f782c0249876bf51b6599fee1ffbedd685c", size = 152900, upload-time = "2026-01-04T02:42:40.15Z" },
+]
+
+[[package]]
+name = "cffi"
+version = "2.0.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "pycparser", marker = "implementation_name != 'PyPy'" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/eb/56/b1ba7935a17738ae8453301356628e8147c79dbb825bcbc73dc7401f9846/cffi-2.0.0.tar.gz", hash = "sha256:44d1b5909021139fe36001ae048dbdde8214afa20200eda0f64c068cac5d5529", size = 523588, upload-time = "2025-09-08T23:24:04.541Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/12/4a/3dfd5f7850cbf0d06dc84ba9aa00db766b52ca38d8b86e3a38314d52498c/cffi-2.0.0-cp311-cp311-macosx_10_13_x86_64.whl", hash = "sha256:b4c854ef3adc177950a8dfc81a86f5115d2abd545751a304c5bcf2c2c7283cfe", size = 184344, upload-time = "2025-09-08T23:22:26.456Z" },
+    { url = "https://files.pythonhosted.org/packages/4f/8b/f0e4c441227ba756aafbe78f117485b25bb26b1c059d01f137fa6d14896b/cffi-2.0.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:2de9a304e27f7596cd03d16f1b7c72219bd944e99cc52b84d0145aefb07cbd3c", size = 180560, upload-time = "2025-09-08T23:22:28.197Z" },
+    { url = "https://files.pythonhosted.org/packages/b1/b7/1200d354378ef52ec227395d95c2576330fd22a869f7a70e88e1447eb234/cffi-2.0.0-cp311-cp311-manylinux1_i686.manylinux2014_i686.manylinux_2_17_i686.manylinux_2_5_i686.whl", hash = "sha256:baf5215e0ab74c16e2dd324e8ec067ef59e41125d3eade2b863d294fd5035c92", size = 209613, upload-time = "2025-09-08T23:22:29.475Z" },
+    { url = "https://files.pythonhosted.org/packages/b8/56/6033f5e86e8cc9bb629f0077ba71679508bdf54a9a5e112a3c0b91870332/cffi-2.0.0-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:730cacb21e1bdff3ce90babf007d0a0917cc3e6492f336c2f0134101e0944f93", size = 216476, upload-time = "2025-09-08T23:22:31.063Z" },
+    { url = "https://files.pythonhosted.org/packages/dc/7f/55fecd70f7ece178db2f26128ec41430d8720f2d12ca97bf8f0a628207d5/cffi-2.0.0-cp311-cp311-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:6824f87845e3396029f3820c206e459ccc91760e8fa24422f8b0c3d1731cbec5", size = 203374, upload-time = "2025-09-08T23:22:32.507Z" },
+    { url = "https://files.pythonhosted.org/packages/84/ef/a7b77c8bdc0f77adc3b46888f1ad54be8f3b7821697a7b89126e829e676a/cffi-2.0.0-cp311-cp311-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:9de40a7b0323d889cf8d23d1ef214f565ab154443c42737dfe52ff82cf857664", size = 202597, upload-time = "2025-09-08T23:22:34.132Z" },
+    { url = "https://files.pythonhosted.org/packages/d7/91/500d892b2bf36529a75b77958edfcd5ad8e2ce4064ce2ecfeab2125d72d1/cffi-2.0.0-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:8941aaadaf67246224cee8c3803777eed332a19d909b47e29c9842ef1e79ac26", size = 215574, upload-time = "2025-09-08T23:22:35.443Z" },
+    { url = "https://files.pythonhosted.org/packages/44/64/58f6255b62b101093d5df22dcb752596066c7e89dd725e0afaed242a61be/cffi-2.0.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:a05d0c237b3349096d3981b727493e22147f934b20f6f125a3eba8f994bec4a9", size = 218971, upload-time = "2025-09-08T23:22:36.805Z" },
+    { url = "https://files.pythonhosted.org/packages/ab/49/fa72cebe2fd8a55fbe14956f9970fe8eb1ac59e5df042f603ef7c8ba0adc/cffi-2.0.0-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:94698a9c5f91f9d138526b48fe26a199609544591f859c870d477351dc7b2414", size = 211972, upload-time = "2025-09-08T23:22:38.436Z" },
+    { url = "https://files.pythonhosted.org/packages/0b/28/dd0967a76aab36731b6ebfe64dec4e981aff7e0608f60c2d46b46982607d/cffi-2.0.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:5fed36fccc0612a53f1d4d9a816b50a36702c28a2aa880cb8a122b3466638743", size = 217078, upload-time = "2025-09-08T23:22:39.776Z" },
+    { url = "https://files.pythonhosted.org/packages/2b/c0/015b25184413d7ab0a410775fdb4a50fca20f5589b5dab1dbbfa3baad8ce/cffi-2.0.0-cp311-cp311-win32.whl", hash = "sha256:c649e3a33450ec82378822b3dad03cc228b8f5963c0c12fc3b1e0ab940f768a5", size = 172076, upload-time = "2025-09-08T23:22:40.95Z" },
+    { url = "https://files.pythonhosted.org/packages/ae/8f/dc5531155e7070361eb1b7e4c1a9d896d0cb21c49f807a6c03fd63fc877e/cffi-2.0.0-cp311-cp311-win_amd64.whl", hash = "sha256:66f011380d0e49ed280c789fbd08ff0d40968ee7b665575489afa95c98196ab5", size = 182820, upload-time = "2025-09-08T23:22:42.463Z" },
+    { url = "https://files.pythonhosted.org/packages/95/5c/1b493356429f9aecfd56bc171285a4c4ac8697f76e9bbbbb105e537853a1/cffi-2.0.0-cp311-cp311-win_arm64.whl", hash = "sha256:c6638687455baf640e37344fe26d37c404db8b80d037c3d29f58fe8d1c3b194d", size = 177635, upload-time = "2025-09-08T23:22:43.623Z" },
+    { url = "https://files.pythonhosted.org/packages/ea/47/4f61023ea636104d4f16ab488e268b93008c3d0bb76893b1b31db1f96802/cffi-2.0.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:6d02d6655b0e54f54c4ef0b94eb6be0607b70853c45ce98bd278dc7de718be5d", size = 185271, upload-time = "2025-09-08T23:22:44.795Z" },
+    { url = "https://files.pythonhosted.org/packages/df/a2/781b623f57358e360d62cdd7a8c681f074a71d445418a776eef0aadb4ab4/cffi-2.0.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:8eca2a813c1cb7ad4fb74d368c2ffbbb4789d377ee5bb8df98373c2cc0dee76c", size = 181048, upload-time = "2025-09-08T23:22:45.938Z" },
+    { url = "https://files.pythonhosted.org/packages/ff/df/a4f0fbd47331ceeba3d37c2e51e9dfc9722498becbeec2bd8bc856c9538a/cffi-2.0.0-cp312-cp312-manylinux1_i686.manylinux2014_i686.manylinux_2_17_i686.manylinux_2_5_i686.whl", hash = "sha256:21d1152871b019407d8ac3985f6775c079416c282e431a4da6afe7aefd2bccbe", size = 212529, upload-time = "2025-09-08T23:22:47.349Z" },
+    { url = "https://files.pythonhosted.org/packages/d5/72/12b5f8d3865bf0f87cf1404d8c374e7487dcf097a1c91c436e72e6badd83/cffi-2.0.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:b21e08af67b8a103c71a250401c78d5e0893beff75e28c53c98f4de42f774062", size = 220097, upload-time = "2025-09-08T23:22:48.677Z" },
+    { url = "https://files.pythonhosted.org/packages/c2/95/7a135d52a50dfa7c882ab0ac17e8dc11cec9d55d2c18dda414c051c5e69e/cffi-2.0.0-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:1e3a615586f05fc4065a8b22b8152f0c1b00cdbc60596d187c2a74f9e3036e4e", size = 207983, upload-time = "2025-09-08T23:22:50.06Z" },
+    { url = "https://files.pythonhosted.org/packages/3a/c8/15cb9ada8895957ea171c62dc78ff3e99159ee7adb13c0123c001a2546c1/cffi-2.0.0-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:81afed14892743bbe14dacb9e36d9e0e504cd204e0b165062c488942b9718037", size = 206519, upload-time = "2025-09-08T23:22:51.364Z" },
+    { url = "https://files.pythonhosted.org/packages/78/2d/7fa73dfa841b5ac06c7b8855cfc18622132e365f5b81d02230333ff26e9e/cffi-2.0.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:3e17ed538242334bf70832644a32a7aae3d83b57567f9fd60a26257e992b79ba", size = 219572, upload-time = "2025-09-08T23:22:52.902Z" },
+    { url = "https://files.pythonhosted.org/packages/07/e0/267e57e387b4ca276b90f0434ff88b2c2241ad72b16d31836adddfd6031b/cffi-2.0.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:3925dd22fa2b7699ed2617149842d2e6adde22b262fcbfada50e3d195e4b3a94", size = 222963, upload-time = "2025-09-08T23:22:54.518Z" },
+    { url = "https://files.pythonhosted.org/packages/b6/75/1f2747525e06f53efbd878f4d03bac5b859cbc11c633d0fb81432d98a795/cffi-2.0.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:2c8f814d84194c9ea681642fd164267891702542f028a15fc97d4674b6206187", size = 221361, upload-time = "2025-09-08T23:22:55.867Z" },
+    { url = "https://files.pythonhosted.org/packages/7b/2b/2b6435f76bfeb6bbf055596976da087377ede68df465419d192acf00c437/cffi-2.0.0-cp312-cp312-win32.whl", hash = "sha256:da902562c3e9c550df360bfa53c035b2f241fed6d9aef119048073680ace4a18", size = 172932, upload-time = "2025-09-08T23:22:57.188Z" },
+    { url = "https://files.pythonhosted.org/packages/f8/ed/13bd4418627013bec4ed6e54283b1959cf6db888048c7cf4b4c3b5b36002/cffi-2.0.0-cp312-cp312-win_amd64.whl", hash = "sha256:da68248800ad6320861f129cd9c1bf96ca849a2771a59e0344e88681905916f5", size = 183557, upload-time = "2025-09-08T23:22:58.351Z" },
+    { url = "https://files.pythonhosted.org/packages/95/31/9f7f93ad2f8eff1dbc1c3656d7ca5bfd8fb52c9d786b4dcf19b2d02217fa/cffi-2.0.0-cp312-cp312-win_arm64.whl", hash = "sha256:4671d9dd5ec934cb9a73e7ee9676f9362aba54f7f34910956b84d727b0d73fb6", size = 177762, upload-time = "2025-09-08T23:22:59.668Z" },
+    { url = "https://files.pythonhosted.org/packages/4b/8d/a0a47a0c9e413a658623d014e91e74a50cdd2c423f7ccfd44086ef767f90/cffi-2.0.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:00bdf7acc5f795150faa6957054fbbca2439db2f775ce831222b66f192f03beb", size = 185230, upload-time = "2025-09-08T23:23:00.879Z" },
+    { url = "https://files.pythonhosted.org/packages/4a/d2/a6c0296814556c68ee32009d9c2ad4f85f2707cdecfd7727951ec228005d/cffi-2.0.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:45d5e886156860dc35862657e1494b9bae8dfa63bf56796f2fb56e1679fc0bca", size = 181043, upload-time = "2025-09-08T23:23:02.231Z" },
+    { url = "https://files.pythonhosted.org/packages/b0/1e/d22cc63332bd59b06481ceaac49d6c507598642e2230f201649058a7e704/cffi-2.0.0-cp313-cp313-manylinux1_i686.manylinux2014_i686.manylinux_2_17_i686.manylinux_2_5_i686.whl", hash = "sha256:07b271772c100085dd28b74fa0cd81c8fb1a3ba18b21e03d7c27f3436a10606b", size = 212446, upload-time = "2025-09-08T23:23:03.472Z" },
+    { url = "https://files.pythonhosted.org/packages/a9/f5/a2c23eb03b61a0b8747f211eb716446c826ad66818ddc7810cc2cc19b3f2/cffi-2.0.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:d48a880098c96020b02d5a1f7d9251308510ce8858940e6fa99ece33f610838b", size = 220101, upload-time = "2025-09-08T23:23:04.792Z" },
+    { url = "https://files.pythonhosted.org/packages/f2/7f/e6647792fc5850d634695bc0e6ab4111ae88e89981d35ac269956605feba/cffi-2.0.0-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:f93fd8e5c8c0a4aa1f424d6173f14a892044054871c771f8566e4008eaa359d2", size = 207948, upload-time = "2025-09-08T23:23:06.127Z" },
+    { url = "https://files.pythonhosted.org/packages/cb/1e/a5a1bd6f1fb30f22573f76533de12a00bf274abcdc55c8edab639078abb6/cffi-2.0.0-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:dd4f05f54a52fb558f1ba9f528228066954fee3ebe629fc1660d874d040ae5a3", size = 206422, upload-time = "2025-09-08T23:23:07.753Z" },
+    { url = "https://files.pythonhosted.org/packages/98/df/0a1755e750013a2081e863e7cd37e0cdd02664372c754e5560099eb7aa44/cffi-2.0.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:c8d3b5532fc71b7a77c09192b4a5a200ea992702734a2e9279a37f2478236f26", size = 219499, upload-time = "2025-09-08T23:23:09.648Z" },
+    { url = "https://files.pythonhosted.org/packages/50/e1/a969e687fcf9ea58e6e2a928ad5e2dd88cc12f6f0ab477e9971f2309b57c/cffi-2.0.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:d9b29c1f0ae438d5ee9acb31cadee00a58c46cc9c0b2f9038c6b0b3470877a8c", size = 222928, upload-time = "2025-09-08T23:23:10.928Z" },
+    { url = "https://files.pythonhosted.org/packages/36/54/0362578dd2c9e557a28ac77698ed67323ed5b9775ca9d3fe73fe191bb5d8/cffi-2.0.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:6d50360be4546678fc1b79ffe7a66265e28667840010348dd69a314145807a1b", size = 221302, upload-time = "2025-09-08T23:23:12.42Z" },
+    { url = "https://files.pythonhosted.org/packages/eb/6d/bf9bda840d5f1dfdbf0feca87fbdb64a918a69bca42cfa0ba7b137c48cb8/cffi-2.0.0-cp313-cp313-win32.whl", hash = "sha256:74a03b9698e198d47562765773b4a8309919089150a0bb17d829ad7b44b60d27", size = 172909, upload-time = "2025-09-08T23:23:14.32Z" },
+    { url = "https://files.pythonhosted.org/packages/37/18/6519e1ee6f5a1e579e04b9ddb6f1676c17368a7aba48299c3759bbc3c8b3/cffi-2.0.0-cp313-cp313-win_amd64.whl", hash = "sha256:19f705ada2530c1167abacb171925dd886168931e0a7b78f5bffcae5c6b5be75", size = 183402, upload-time = "2025-09-08T23:23:15.535Z" },
+    { url = "https://files.pythonhosted.org/packages/cb/0e/02ceeec9a7d6ee63bb596121c2c8e9b3a9e150936f4fbef6ca1943e6137c/cffi-2.0.0-cp313-cp313-win_arm64.whl", hash = "sha256:256f80b80ca3853f90c21b23ee78cd008713787b1b1e93eae9f3d6a7134abd91", size = 177780, upload-time = "2025-09-08T23:23:16.761Z" },
+    { url = "https://files.pythonhosted.org/packages/92/c4/3ce07396253a83250ee98564f8d7e9789fab8e58858f35d07a9a2c78de9f/cffi-2.0.0-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:fc33c5141b55ed366cfaad382df24fe7dcbc686de5be719b207bb248e3053dc5", size = 185320, upload-time = "2025-09-08T23:23:18.087Z" },
+    { url = "https://files.pythonhosted.org/packages/59/dd/27e9fa567a23931c838c6b02d0764611c62290062a6d4e8ff7863daf9730/cffi-2.0.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:c654de545946e0db659b3400168c9ad31b5d29593291482c43e3564effbcee13", size = 181487, upload-time = "2025-09-08T23:23:19.622Z" },
+    { url = "https://files.pythonhosted.org/packages/d6/43/0e822876f87ea8a4ef95442c3d766a06a51fc5298823f884ef87aaad168c/cffi-2.0.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:24b6f81f1983e6df8db3adc38562c83f7d4a0c36162885ec7f7b77c7dcbec97b", size = 220049, upload-time = "2025-09-08T23:23:20.853Z" },
+    { url = "https://files.pythonhosted.org/packages/b4/89/76799151d9c2d2d1ead63c2429da9ea9d7aac304603de0c6e8764e6e8e70/cffi-2.0.0-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:12873ca6cb9b0f0d3a0da705d6086fe911591737a59f28b7936bdfed27c0d47c", size = 207793, upload-time = "2025-09-08T23:23:22.08Z" },
+    { url = "https://files.pythonhosted.org/packages/bb/dd/3465b14bb9e24ee24cb88c9e3730f6de63111fffe513492bf8c808a3547e/cffi-2.0.0-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:d9b97165e8aed9272a6bb17c01e3cc5871a594a446ebedc996e2397a1c1ea8ef", size = 206300, upload-time = "2025-09-08T23:23:23.314Z" },
+    { url = "https://files.pythonhosted.org/packages/47/d9/d83e293854571c877a92da46fdec39158f8d7e68da75bf73581225d28e90/cffi-2.0.0-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:afb8db5439b81cf9c9d0c80404b60c3cc9c3add93e114dcae767f1477cb53775", size = 219244, upload-time = "2025-09-08T23:23:24.541Z" },
+    { url = "https://files.pythonhosted.org/packages/2b/0f/1f177e3683aead2bb00f7679a16451d302c436b5cbf2505f0ea8146ef59e/cffi-2.0.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:737fe7d37e1a1bffe70bd5754ea763a62a066dc5913ca57e957824b72a85e205", size = 222828, upload-time = "2025-09-08T23:23:26.143Z" },
+    { url = "https://files.pythonhosted.org/packages/c6/0f/cafacebd4b040e3119dcb32fed8bdef8dfe94da653155f9d0b9dc660166e/cffi-2.0.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:38100abb9d1b1435bc4cc340bb4489635dc2f0da7456590877030c9b3d40b0c1", size = 220926, upload-time = "2025-09-08T23:23:27.873Z" },
+    { url = "https://files.pythonhosted.org/packages/3e/aa/df335faa45b395396fcbc03de2dfcab242cd61a9900e914fe682a59170b1/cffi-2.0.0-cp314-cp314-win32.whl", hash = "sha256:087067fa8953339c723661eda6b54bc98c5625757ea62e95eb4898ad5e776e9f", size = 175328, upload-time = "2025-09-08T23:23:44.61Z" },
+    { url = "https://files.pythonhosted.org/packages/bb/92/882c2d30831744296ce713f0feb4c1cd30f346ef747b530b5318715cc367/cffi-2.0.0-cp314-cp314-win_amd64.whl", hash = "sha256:203a48d1fb583fc7d78a4c6655692963b860a417c0528492a6bc21f1aaefab25", size = 185650, upload-time = "2025-09-08T23:23:45.848Z" },
+    { url = "https://files.pythonhosted.org/packages/9f/2c/98ece204b9d35a7366b5b2c6539c350313ca13932143e79dc133ba757104/cffi-2.0.0-cp314-cp314-win_arm64.whl", hash = "sha256:dbd5c7a25a7cb98f5ca55d258b103a2054f859a46ae11aaf23134f9cc0d356ad", size = 180687, upload-time = "2025-09-08T23:23:47.105Z" },
+    { url = "https://files.pythonhosted.org/packages/3e/61/c768e4d548bfa607abcda77423448df8c471f25dbe64fb2ef6d555eae006/cffi-2.0.0-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:9a67fc9e8eb39039280526379fb3a70023d77caec1852002b4da7e8b270c4dd9", size = 188773, upload-time = "2025-09-08T23:23:29.347Z" },
+    { url = "https://files.pythonhosted.org/packages/2c/ea/5f76bce7cf6fcd0ab1a1058b5af899bfbef198bea4d5686da88471ea0336/cffi-2.0.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:7a66c7204d8869299919db4d5069a82f1561581af12b11b3c9f48c584eb8743d", size = 185013, upload-time = "2025-09-08T23:23:30.63Z" },
+    { url = "https://files.pythonhosted.org/packages/be/b4/c56878d0d1755cf9caa54ba71e5d049479c52f9e4afc230f06822162ab2f/cffi-2.0.0-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:7cc09976e8b56f8cebd752f7113ad07752461f48a58cbba644139015ac24954c", size = 221593, upload-time = "2025-09-08T23:23:31.91Z" },
+    { url = "https://files.pythonhosted.org/packages/e0/0d/eb704606dfe8033e7128df5e90fee946bbcb64a04fcdaa97321309004000/cffi-2.0.0-cp314-cp314t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:92b68146a71df78564e4ef48af17551a5ddd142e5190cdf2c5624d0c3ff5b2e8", size = 209354, upload-time = "2025-09-08T23:23:33.214Z" },
+    { url = "https://files.pythonhosted.org/packages/d8/19/3c435d727b368ca475fb8742ab97c9cb13a0de600ce86f62eab7fa3eea60/cffi-2.0.0-cp314-cp314t-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:b1e74d11748e7e98e2f426ab176d4ed720a64412b6a15054378afdb71e0f37dc", size = 208480, upload-time = "2025-09-08T23:23:34.495Z" },
+    { url = "https://files.pythonhosted.org/packages/d0/44/681604464ed9541673e486521497406fadcc15b5217c3e326b061696899a/cffi-2.0.0-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:28a3a209b96630bca57cce802da70c266eb08c6e97e5afd61a75611ee6c64592", size = 221584, upload-time = "2025-09-08T23:23:36.096Z" },
+    { url = "https://files.pythonhosted.org/packages/25/8e/342a504ff018a2825d395d44d63a767dd8ebc927ebda557fecdaca3ac33a/cffi-2.0.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:7553fb2090d71822f02c629afe6042c299edf91ba1bf94951165613553984512", size = 224443, upload-time = "2025-09-08T23:23:37.328Z" },
+    { url = "https://files.pythonhosted.org/packages/e1/5e/b666bacbbc60fbf415ba9988324a132c9a7a0448a9a8f125074671c0f2c3/cffi-2.0.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:6c6c373cfc5c83a975506110d17457138c8c63016b563cc9ed6e056a82f13ce4", size = 223437, upload-time = "2025-09-08T23:23:38.945Z" },
+    { url = "https://files.pythonhosted.org/packages/a0/1d/ec1a60bd1a10daa292d3cd6bb0b359a81607154fb8165f3ec95fe003b85c/cffi-2.0.0-cp314-cp314t-win32.whl", hash = "sha256:1fc9ea04857caf665289b7a75923f2c6ed559b8298a1b8c49e59f7dd95c8481e", size = 180487, upload-time = "2025-09-08T23:23:40.423Z" },
+    { url = "https://files.pythonhosted.org/packages/bf/41/4c1168c74fac325c0c8156f04b6749c8b6a8f405bbf91413ba088359f60d/cffi-2.0.0-cp314-cp314t-win_amd64.whl", hash = "sha256:d68b6cef7827e8641e8ef16f4494edda8b36104d79773a334beaa1e3521430f6", size = 191726, upload-time = "2025-09-08T23:23:41.742Z" },
+    { url = "https://files.pythonhosted.org/packages/ae/3a/dbeec9d1ee0844c679f6bb5d6ad4e9f198b1224f4e7a32825f47f6192b0c/cffi-2.0.0-cp314-cp314t-win_arm64.whl", hash = "sha256:0a1527a803f0a659de1af2e1fd700213caba79377e27e4693648c2923da066f9", size = 184195, upload-time = "2025-09-08T23:23:43.004Z" },
+]
+
+[[package]]
+name = "charset-normalizer"
+version = "3.4.4"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/13/69/33ddede1939fdd074bce5434295f38fae7136463422fe4fd3e0e89b98062/charset_normalizer-3.4.4.tar.gz", hash = "sha256:94537985111c35f28720e43603b8e7b43a6ecfb2ce1d3058bbe955b73404e21a", size = 129418, upload-time = "2025-10-14T04:42:32.879Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/ed/27/c6491ff4954e58a10f69ad90aca8a1b6fe9c5d3c6f380907af3c37435b59/charset_normalizer-3.4.4-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:6e1fcf0720908f200cd21aa4e6750a48ff6ce4afe7ff5a79a90d5ed8a08296f8", size = 206988, upload-time = "2025-10-14T04:40:33.79Z" },
+    { url = "https://files.pythonhosted.org/packages/94/59/2e87300fe67ab820b5428580a53cad894272dbb97f38a7a814a2a1ac1011/charset_normalizer-3.4.4-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5f819d5fe9234f9f82d75bdfa9aef3a3d72c4d24a6e57aeaebba32a704553aa0", size = 147324, upload-time = "2025-10-14T04:40:34.961Z" },
+    { url = "https://files.pythonhosted.org/packages/07/fb/0cf61dc84b2b088391830f6274cb57c82e4da8bbc2efeac8c025edb88772/charset_normalizer-3.4.4-cp311-cp311-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:a59cb51917aa591b1c4e6a43c132f0cdc3c76dbad6155df4e28ee626cc77a0a3", size = 142742, upload-time = "2025-10-14T04:40:36.105Z" },
+    { url = "https://files.pythonhosted.org/packages/62/8b/171935adf2312cd745d290ed93cf16cf0dfe320863ab7cbeeae1dcd6535f/charset_normalizer-3.4.4-cp311-cp311-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:8ef3c867360f88ac904fd3f5e1f902f13307af9052646963ee08ff4f131adafc", size = 160863, upload-time = "2025-10-14T04:40:37.188Z" },
+    { url = "https://files.pythonhosted.org/packages/09/73/ad875b192bda14f2173bfc1bc9a55e009808484a4b256748d931b6948442/charset_normalizer-3.4.4-cp311-cp311-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:d9e45d7faa48ee908174d8fe84854479ef838fc6a705c9315372eacbc2f02897", size = 157837, upload-time = "2025-10-14T04:40:38.435Z" },
+    { url = "https://files.pythonhosted.org/packages/6d/fc/de9cce525b2c5b94b47c70a4b4fb19f871b24995c728e957ee68ab1671ea/charset_normalizer-3.4.4-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:840c25fb618a231545cbab0564a799f101b63b9901f2569faecd6b222ac72381", size = 151550, upload-time = "2025-10-14T04:40:40.053Z" },
+    { url = "https://files.pythonhosted.org/packages/55/c2/43edd615fdfba8c6f2dfbd459b25a6b3b551f24ea21981e23fb768503ce1/charset_normalizer-3.4.4-cp311-cp311-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:ca5862d5b3928c4940729dacc329aa9102900382fea192fc5e52eb69d6093815", size = 149162, upload-time = "2025-10-14T04:40:41.163Z" },
+    { url = "https://files.pythonhosted.org/packages/03/86/bde4ad8b4d0e9429a4e82c1e8f5c659993a9a863ad62c7df05cf7b678d75/charset_normalizer-3.4.4-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:d9c7f57c3d666a53421049053eaacdd14bbd0a528e2186fcb2e672effd053bb0", size = 150019, upload-time = "2025-10-14T04:40:42.276Z" },
+    { url = "https://files.pythonhosted.org/packages/1f/86/a151eb2af293a7e7bac3a739b81072585ce36ccfb4493039f49f1d3cae8c/charset_normalizer-3.4.4-cp311-cp311-musllinux_1_2_armv7l.whl", hash = "sha256:277e970e750505ed74c832b4bf75dac7476262ee2a013f5574dd49075879e161", size = 143310, upload-time = "2025-10-14T04:40:43.439Z" },
+    { url = "https://files.pythonhosted.org/packages/b5/fe/43dae6144a7e07b87478fdfc4dbe9efd5defb0e7ec29f5f58a55aeef7bf7/charset_normalizer-3.4.4-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:31fd66405eaf47bb62e8cd575dc621c56c668f27d46a61d975a249930dd5e2a4", size = 162022, upload-time = "2025-10-14T04:40:44.547Z" },
+    { url = "https://files.pythonhosted.org/packages/80/e6/7aab83774f5d2bca81f42ac58d04caf44f0cc2b65fc6db2b3b2e8a05f3b3/charset_normalizer-3.4.4-cp311-cp311-musllinux_1_2_riscv64.whl", hash = "sha256:0d3d8f15c07f86e9ff82319b3d9ef6f4bf907608f53fe9d92b28ea9ae3d1fd89", size = 149383, upload-time = "2025-10-14T04:40:46.018Z" },
+    { url = "https://files.pythonhosted.org/packages/4f/e8/b289173b4edae05c0dde07f69f8db476a0b511eac556dfe0d6bda3c43384/charset_normalizer-3.4.4-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:9f7fcd74d410a36883701fafa2482a6af2ff5ba96b9a620e9e0721e28ead5569", size = 159098, upload-time = "2025-10-14T04:40:47.081Z" },
+    { url = "https://files.pythonhosted.org/packages/d8/df/fe699727754cae3f8478493c7f45f777b17c3ef0600e28abfec8619eb49c/charset_normalizer-3.4.4-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:ebf3e58c7ec8a8bed6d66a75d7fb37b55e5015b03ceae72a8e7c74495551e224", size = 152991, upload-time = "2025-10-14T04:40:48.246Z" },
+    { url = "https://files.pythonhosted.org/packages/1a/86/584869fe4ddb6ffa3bd9f491b87a01568797fb9bd8933f557dba9771beaf/charset_normalizer-3.4.4-cp311-cp311-win32.whl", hash = "sha256:eecbc200c7fd5ddb9a7f16c7decb07b566c29fa2161a16cf67b8d068bd21690a", size = 99456, upload-time = "2025-10-14T04:40:49.376Z" },
+    { url = "https://files.pythonhosted.org/packages/65/f6/62fdd5feb60530f50f7e38b4f6a1d5203f4d16ff4f9f0952962c044e919a/charset_normalizer-3.4.4-cp311-cp311-win_amd64.whl", hash = "sha256:5ae497466c7901d54b639cf42d5b8c1b6a4fead55215500d2f486d34db48d016", size = 106978, upload-time = "2025-10-14T04:40:50.844Z" },
+    { url = "https://files.pythonhosted.org/packages/7a/9d/0710916e6c82948b3be62d9d398cb4fcf4e97b56d6a6aeccd66c4b2f2bd5/charset_normalizer-3.4.4-cp311-cp311-win_arm64.whl", hash = "sha256:65e2befcd84bc6f37095f5961e68a6f077bf44946771354a28ad434c2cce0ae1", size = 99969, upload-time = "2025-10-14T04:40:52.272Z" },
+    { url = "https://files.pythonhosted.org/packages/f3/85/1637cd4af66fa687396e757dec650f28025f2a2f5a5531a3208dc0ec43f2/charset_normalizer-3.4.4-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:0a98e6759f854bd25a58a73fa88833fba3b7c491169f86ce1180c948ab3fd394", size = 208425, upload-time = "2025-10-14T04:40:53.353Z" },
+    { url = "https://files.pythonhosted.org/packages/9d/6a/04130023fef2a0d9c62d0bae2649b69f7b7d8d24ea5536feef50551029df/charset_normalizer-3.4.4-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b5b290ccc2a263e8d185130284f8501e3e36c5e02750fc6b6bdeb2e9e96f1e25", size = 148162, upload-time = "2025-10-14T04:40:54.558Z" },
+    { url = "https://files.pythonhosted.org/packages/78/29/62328d79aa60da22c9e0b9a66539feae06ca0f5a4171ac4f7dc285b83688/charset_normalizer-3.4.4-cp312-cp312-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:74bb723680f9f7a6234dcf67aea57e708ec1fbdf5699fb91dfd6f511b0a320ef", size = 144558, upload-time = "2025-10-14T04:40:55.677Z" },
+    { url = "https://files.pythonhosted.org/packages/86/bb/b32194a4bf15b88403537c2e120b817c61cd4ecffa9b6876e941c3ee38fe/charset_normalizer-3.4.4-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:f1e34719c6ed0b92f418c7c780480b26b5d9c50349e9a9af7d76bf757530350d", size = 161497, upload-time = "2025-10-14T04:40:57.217Z" },
+    { url = "https://files.pythonhosted.org/packages/19/89/a54c82b253d5b9b111dc74aca196ba5ccfcca8242d0fb64146d4d3183ff1/charset_normalizer-3.4.4-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:2437418e20515acec67d86e12bf70056a33abdacb5cb1655042f6538d6b085a8", size = 159240, upload-time = "2025-10-14T04:40:58.358Z" },
+    { url = "https://files.pythonhosted.org/packages/c0/10/d20b513afe03acc89ec33948320a5544d31f21b05368436d580dec4e234d/charset_normalizer-3.4.4-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:11d694519d7f29d6cd09f6ac70028dba10f92f6cdd059096db198c283794ac86", size = 153471, upload-time = "2025-10-14T04:40:59.468Z" },
+    { url = "https://files.pythonhosted.org/packages/61/fa/fbf177b55bdd727010f9c0a3c49eefa1d10f960e5f09d1d887bf93c2e698/charset_normalizer-3.4.4-cp312-cp312-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:ac1c4a689edcc530fc9d9aa11f5774b9e2f33f9a0c6a57864e90908f5208d30a", size = 150864, upload-time = "2025-10-14T04:41:00.623Z" },
+    { url = "https://files.pythonhosted.org/packages/05/12/9fbc6a4d39c0198adeebbde20b619790e9236557ca59fc40e0e3cebe6f40/charset_normalizer-3.4.4-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:21d142cc6c0ec30d2efee5068ca36c128a30b0f2c53c1c07bd78cb6bc1d3be5f", size = 150647, upload-time = "2025-10-14T04:41:01.754Z" },
+    { url = "https://files.pythonhosted.org/packages/ad/1f/6a9a593d52e3e8c5d2b167daf8c6b968808efb57ef4c210acb907c365bc4/charset_normalizer-3.4.4-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:5dbe56a36425d26d6cfb40ce79c314a2e4dd6211d51d6d2191c00bed34f354cc", size = 145110, upload-time = "2025-10-14T04:41:03.231Z" },
+    { url = "https://files.pythonhosted.org/packages/30/42/9a52c609e72471b0fc54386dc63c3781a387bb4fe61c20231a4ebcd58bdd/charset_normalizer-3.4.4-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:5bfbb1b9acf3334612667b61bd3002196fe2a1eb4dd74d247e0f2a4d50ec9bbf", size = 162839, upload-time = "2025-10-14T04:41:04.715Z" },
+    { url = "https://files.pythonhosted.org/packages/c4/5b/c0682bbf9f11597073052628ddd38344a3d673fda35a36773f7d19344b23/charset_normalizer-3.4.4-cp312-cp312-musllinux_1_2_riscv64.whl", hash = "sha256:d055ec1e26e441f6187acf818b73564e6e6282709e9bcb5b63f5b23068356a15", size = 150667, upload-time = "2025-10-14T04:41:05.827Z" },
+    { url = "https://files.pythonhosted.org/packages/e4/24/a41afeab6f990cf2daf6cb8c67419b63b48cf518e4f56022230840c9bfb2/charset_normalizer-3.4.4-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:af2d8c67d8e573d6de5bc30cdb27e9b95e49115cd9baad5ddbd1a6207aaa82a9", size = 160535, upload-time = "2025-10-14T04:41:06.938Z" },
+    { url = "https://files.pythonhosted.org/packages/2a/e5/6a4ce77ed243c4a50a1fecca6aaaab419628c818a49434be428fe24c9957/charset_normalizer-3.4.4-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:780236ac706e66881f3b7f2f32dfe90507a09e67d1d454c762cf642e6e1586e0", size = 154816, upload-time = "2025-10-14T04:41:08.101Z" },
+    { url = "https://files.pythonhosted.org/packages/a8/ef/89297262b8092b312d29cdb2517cb1237e51db8ecef2e9af5edbe7b683b1/charset_normalizer-3.4.4-cp312-cp312-win32.whl", hash = "sha256:5833d2c39d8896e4e19b689ffc198f08ea58116bee26dea51e362ecc7cd3ed26", size = 99694, upload-time = "2025-10-14T04:41:09.23Z" },
+    { url = "https://files.pythonhosted.org/packages/3d/2d/1e5ed9dd3b3803994c155cd9aacb60c82c331bad84daf75bcb9c91b3295e/charset_normalizer-3.4.4-cp312-cp312-win_amd64.whl", hash = "sha256:a79cfe37875f822425b89a82333404539ae63dbdddf97f84dcbc3d339aae9525", size = 107131, upload-time = "2025-10-14T04:41:10.467Z" },
+    { url = "https://files.pythonhosted.org/packages/d0/d9/0ed4c7098a861482a7b6a95603edce4c0d9db2311af23da1fb2b75ec26fc/charset_normalizer-3.4.4-cp312-cp312-win_arm64.whl", hash = "sha256:376bec83a63b8021bb5c8ea75e21c4ccb86e7e45ca4eb81146091b56599b80c3", size = 100390, upload-time = "2025-10-14T04:41:11.915Z" },
+    { url = "https://files.pythonhosted.org/packages/97/45/4b3a1239bbacd321068ea6e7ac28875b03ab8bc0aa0966452db17cd36714/charset_normalizer-3.4.4-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:e1f185f86a6f3403aa2420e815904c67b2f9ebc443f045edd0de921108345794", size = 208091, upload-time = "2025-10-14T04:41:13.346Z" },
+    { url = "https://files.pythonhosted.org/packages/7d/62/73a6d7450829655a35bb88a88fca7d736f9882a27eacdca2c6d505b57e2e/charset_normalizer-3.4.4-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6b39f987ae8ccdf0d2642338faf2abb1862340facc796048b604ef14919e55ed", size = 147936, upload-time = "2025-10-14T04:41:14.461Z" },
+    { url = "https://files.pythonhosted.org/packages/89/c5/adb8c8b3d6625bef6d88b251bbb0d95f8205831b987631ab0c8bb5d937c2/charset_normalizer-3.4.4-cp313-cp313-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:3162d5d8ce1bb98dd51af660f2121c55d0fa541b46dff7bb9b9f86ea1d87de72", size = 144180, upload-time = "2025-10-14T04:41:15.588Z" },
+    { url = "https://files.pythonhosted.org/packages/91/ed/9706e4070682d1cc219050b6048bfd293ccf67b3d4f5a4f39207453d4b99/charset_normalizer-3.4.4-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:81d5eb2a312700f4ecaa977a8235b634ce853200e828fbadf3a9c50bab278328", size = 161346, upload-time = "2025-10-14T04:41:16.738Z" },
+    { url = "https://files.pythonhosted.org/packages/d5/0d/031f0d95e4972901a2f6f09ef055751805ff541511dc1252ba3ca1f80cf5/charset_normalizer-3.4.4-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:5bd2293095d766545ec1a8f612559f6b40abc0eb18bb2f5d1171872d34036ede", size = 158874, upload-time = "2025-10-14T04:41:17.923Z" },
+    { url = "https://files.pythonhosted.org/packages/f5/83/6ab5883f57c9c801ce5e5677242328aa45592be8a00644310a008d04f922/charset_normalizer-3.4.4-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a8a8b89589086a25749f471e6a900d3f662d1d3b6e2e59dcecf787b1cc3a1894", size = 153076, upload-time = "2025-10-14T04:41:19.106Z" },
+    { url = "https://files.pythonhosted.org/packages/75/1e/5ff781ddf5260e387d6419959ee89ef13878229732732ee73cdae01800f2/charset_normalizer-3.4.4-cp313-cp313-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:bc7637e2f80d8530ee4a78e878bce464f70087ce73cf7c1caf142416923b98f1", size = 150601, upload-time = "2025-10-14T04:41:20.245Z" },
+    { url = "https://files.pythonhosted.org/packages/d7/57/71be810965493d3510a6ca79b90c19e48696fb1ff964da319334b12677f0/charset_normalizer-3.4.4-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:f8bf04158c6b607d747e93949aa60618b61312fe647a6369f88ce2ff16043490", size = 150376, upload-time = "2025-10-14T04:41:21.398Z" },
+    { url = "https://files.pythonhosted.org/packages/e5/d5/c3d057a78c181d007014feb7e9f2e65905a6c4ef182c0ddf0de2924edd65/charset_normalizer-3.4.4-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:554af85e960429cf30784dd47447d5125aaa3b99a6f0683589dbd27e2f45da44", size = 144825, upload-time = "2025-10-14T04:41:22.583Z" },
+    { url = "https://files.pythonhosted.org/packages/e6/8c/d0406294828d4976f275ffbe66f00266c4b3136b7506941d87c00cab5272/charset_normalizer-3.4.4-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:74018750915ee7ad843a774364e13a3db91682f26142baddf775342c3f5b1133", size = 162583, upload-time = "2025-10-14T04:41:23.754Z" },
+    { url = "https://files.pythonhosted.org/packages/d7/24/e2aa1f18c8f15c4c0e932d9287b8609dd30ad56dbe41d926bd846e22fb8d/charset_normalizer-3.4.4-cp313-cp313-musllinux_1_2_riscv64.whl", hash = "sha256:c0463276121fdee9c49b98908b3a89c39be45d86d1dbaa22957e38f6321d4ce3", size = 150366, upload-time = "2025-10-14T04:41:25.27Z" },
+    { url = "https://files.pythonhosted.org/packages/e4/5b/1e6160c7739aad1e2df054300cc618b06bf784a7a164b0f238360721ab86/charset_normalizer-3.4.4-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:362d61fd13843997c1c446760ef36f240cf81d3ebf74ac62652aebaf7838561e", size = 160300, upload-time = "2025-10-14T04:41:26.725Z" },
+    { url = "https://files.pythonhosted.org/packages/7a/10/f882167cd207fbdd743e55534d5d9620e095089d176d55cb22d5322f2afd/charset_normalizer-3.4.4-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:9a26f18905b8dd5d685d6d07b0cdf98a79f3c7a918906af7cc143ea2e164c8bc", size = 154465, upload-time = "2025-10-14T04:41:28.322Z" },
+    { url = "https://files.pythonhosted.org/packages/89/66/c7a9e1b7429be72123441bfdbaf2bc13faab3f90b933f664db506dea5915/charset_normalizer-3.4.4-cp313-cp313-win32.whl", hash = "sha256:9b35f4c90079ff2e2edc5b26c0c77925e5d2d255c42c74fdb70fb49b172726ac", size = 99404, upload-time = "2025-10-14T04:41:29.95Z" },
+    { url = "https://files.pythonhosted.org/packages/c4/26/b9924fa27db384bdcd97ab83b4f0a8058d96ad9626ead570674d5e737d90/charset_normalizer-3.4.4-cp313-cp313-win_amd64.whl", hash = "sha256:b435cba5f4f750aa6c0a0d92c541fb79f69a387c91e61f1795227e4ed9cece14", size = 107092, upload-time = "2025-10-14T04:41:31.188Z" },
+    { url = "https://files.pythonhosted.org/packages/af/8f/3ed4bfa0c0c72a7ca17f0380cd9e4dd842b09f664e780c13cff1dcf2ef1b/charset_normalizer-3.4.4-cp313-cp313-win_arm64.whl", hash = "sha256:542d2cee80be6f80247095cc36c418f7bddd14f4a6de45af91dfad36d817bba2", size = 100408, upload-time = "2025-10-14T04:41:32.624Z" },
+    { url = "https://files.pythonhosted.org/packages/2a/35/7051599bd493e62411d6ede36fd5af83a38f37c4767b92884df7301db25d/charset_normalizer-3.4.4-cp314-cp314-macosx_10_13_universal2.whl", hash = "sha256:da3326d9e65ef63a817ecbcc0df6e94463713b754fe293eaa03da99befb9a5bd", size = 207746, upload-time = "2025-10-14T04:41:33.773Z" },
+    { url = "https://files.pythonhosted.org/packages/10/9a/97c8d48ef10d6cd4fcead2415523221624bf58bcf68a802721a6bc807c8f/charset_normalizer-3.4.4-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:8af65f14dc14a79b924524b1e7fffe304517b2bff5a58bf64f30b98bbc5079eb", size = 147889, upload-time = "2025-10-14T04:41:34.897Z" },
+    { url = "https://files.pythonhosted.org/packages/10/bf/979224a919a1b606c82bd2c5fa49b5c6d5727aa47b4312bb27b1734f53cd/charset_normalizer-3.4.4-cp314-cp314-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:74664978bb272435107de04e36db5a9735e78232b85b77d45cfb38f758efd33e", size = 143641, upload-time = "2025-10-14T04:41:36.116Z" },
+    { url = "https://files.pythonhosted.org/packages/ba/33/0ad65587441fc730dc7bd90e9716b30b4702dc7b617e6ba4997dc8651495/charset_normalizer-3.4.4-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:752944c7ffbfdd10c074dc58ec2d5a8a4cd9493b314d367c14d24c17684ddd14", size = 160779, upload-time = "2025-10-14T04:41:37.229Z" },
+    { url = "https://files.pythonhosted.org/packages/67/ed/331d6b249259ee71ddea93f6f2f0a56cfebd46938bde6fcc6f7b9a3d0e09/charset_normalizer-3.4.4-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:d1f13550535ad8cff21b8d757a3257963e951d96e20ec82ab44bc64aeb62a191", size = 159035, upload-time = "2025-10-14T04:41:38.368Z" },
+    { url = "https://files.pythonhosted.org/packages/67/ff/f6b948ca32e4f2a4576aa129d8bed61f2e0543bf9f5f2b7fc3758ed005c9/charset_normalizer-3.4.4-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ecaae4149d99b1c9e7b88bb03e3221956f68fd6d50be2ef061b2381b61d20838", size = 152542, upload-time = "2025-10-14T04:41:39.862Z" },
+    { url = "https://files.pythonhosted.org/packages/16/85/276033dcbcc369eb176594de22728541a925b2632f9716428c851b149e83/charset_normalizer-3.4.4-cp314-cp314-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:cb6254dc36b47a990e59e1068afacdcd02958bdcce30bb50cc1700a8b9d624a6", size = 149524, upload-time = "2025-10-14T04:41:41.319Z" },
+    { url = "https://files.pythonhosted.org/packages/9e/f2/6a2a1f722b6aba37050e626530a46a68f74e63683947a8acff92569f979a/charset_normalizer-3.4.4-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:c8ae8a0f02f57a6e61203a31428fa1d677cbe50c93622b4149d5c0f319c1d19e", size = 150395, upload-time = "2025-10-14T04:41:42.539Z" },
+    { url = "https://files.pythonhosted.org/packages/60/bb/2186cb2f2bbaea6338cad15ce23a67f9b0672929744381e28b0592676824/charset_normalizer-3.4.4-cp314-cp314-musllinux_1_2_armv7l.whl", hash = "sha256:47cc91b2f4dd2833fddaedd2893006b0106129d4b94fdb6af1f4ce5a9965577c", size = 143680, upload-time = "2025-10-14T04:41:43.661Z" },
+    { url = "https://files.pythonhosted.org/packages/7d/a5/bf6f13b772fbb2a90360eb620d52ed8f796f3c5caee8398c3b2eb7b1c60d/charset_normalizer-3.4.4-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:82004af6c302b5d3ab2cfc4cc5f29db16123b1a8417f2e25f9066f91d4411090", size = 162045, upload-time = "2025-10-14T04:41:44.821Z" },
+    { url = "https://files.pythonhosted.org/packages/df/c5/d1be898bf0dc3ef9030c3825e5d3b83f2c528d207d246cbabe245966808d/charset_normalizer-3.4.4-cp314-cp314-musllinux_1_2_riscv64.whl", hash = "sha256:2b7d8f6c26245217bd2ad053761201e9f9680f8ce52f0fcd8d0755aeae5b2152", size = 149687, upload-time = "2025-10-14T04:41:46.442Z" },
+    { url = "https://files.pythonhosted.org/packages/a5/42/90c1f7b9341eef50c8a1cb3f098ac43b0508413f33affd762855f67a410e/charset_normalizer-3.4.4-cp314-cp314-musllinux_1_2_s390x.whl", hash = "sha256:799a7a5e4fb2d5898c60b640fd4981d6a25f1c11790935a44ce38c54e985f828", size = 160014, upload-time = "2025-10-14T04:41:47.631Z" },
+    { url = "https://files.pythonhosted.org/packages/76/be/4d3ee471e8145d12795ab655ece37baed0929462a86e72372fd25859047c/charset_normalizer-3.4.4-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:99ae2cffebb06e6c22bdc25801d7b30f503cc87dbd283479e7b606f70aff57ec", size = 154044, upload-time = "2025-10-14T04:41:48.81Z" },
+    { url = "https://files.pythonhosted.org/packages/b0/6f/8f7af07237c34a1defe7defc565a9bc1807762f672c0fde711a4b22bf9c0/charset_normalizer-3.4.4-cp314-cp314-win32.whl", hash = "sha256:f9d332f8c2a2fcbffe1378594431458ddbef721c1769d78e2cbc06280d8155f9", size = 99940, upload-time = "2025-10-14T04:41:49.946Z" },
+    { url = "https://files.pythonhosted.org/packages/4b/51/8ade005e5ca5b0d80fb4aff72a3775b325bdc3d27408c8113811a7cbe640/charset_normalizer-3.4.4-cp314-cp314-win_amd64.whl", hash = "sha256:8a6562c3700cce886c5be75ade4a5db4214fda19fede41d9792d100288d8f94c", size = 107104, upload-time = "2025-10-14T04:41:51.051Z" },
+    { url = "https://files.pythonhosted.org/packages/da/5f/6b8f83a55bb8278772c5ae54a577f3099025f9ade59d0136ac24a0df4bde/charset_normalizer-3.4.4-cp314-cp314-win_arm64.whl", hash = "sha256:de00632ca48df9daf77a2c65a484531649261ec9f25489917f09e455cb09ddb2", size = 100743, upload-time = "2025-10-14T04:41:52.122Z" },
+    { url = "https://files.pythonhosted.org/packages/0a/4c/925909008ed5a988ccbb72dcc897407e5d6d3bd72410d69e051fc0c14647/charset_normalizer-3.4.4-py3-none-any.whl", hash = "sha256:7a32c560861a02ff789ad905a2fe94e3f840803362c84fecf1851cb4cf3dc37f", size = 53402, upload-time = "2025-10-14T04:42:31.76Z" },
+]
+
 [[package]]
 name = "click"
 version = "8.3.1"
@@ -23,6 +254,15 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/98/78/01c019cdb5d6498122777c1a43056ebb3ebfeef2076d9d026bfe15583b2b/click-8.3.1-py3-none-any.whl", hash = "sha256:981153a64e25f12d547d3426c367a4857371575ee7ad18df2a6183ab0545b2a6", size = 108274, upload-time = "2025-11-15T20:45:41.139Z" },
 ]
 
+[[package]]
+name = "cloudpickle"
+version = "3.1.2"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/27/fb/576f067976d320f5f0114a8d9fa1215425441bb35627b1993e5afd8111e5/cloudpickle-3.1.2.tar.gz", hash = "sha256:7fda9eb655c9c230dab534f1983763de5835249750e85fbcef43aaa30a9a2414", size = 22330, upload-time = "2025-11-03T09:25:26.604Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/88/39/799be3f2f0f38cc727ee3b4f1445fe6d5e4133064ec2e4115069418a5bb6/cloudpickle-3.1.2-py3-none-any.whl", hash = "sha256:9acb47f6afd73f60dc1df93bb801b472f05ff42fa6c84167d25cb206be1fbf4a", size = 22228, upload-time = "2025-11-03T09:25:25.534Z" },
+]
+
 [[package]]
 name = "colorama"
 version = "0.4.6"
@@ -124,15 +364,106 @@ toml = [
     { name = "tomli", marker = "python_full_version <= '3.11'" },
 ]
 
+[[package]]
+name = "croniter"
+version = "6.0.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "python-dateutil" },
+    { name = "pytz" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/ad/2f/44d1ae153a0e27be56be43465e5cb39b9650c781e001e7864389deb25090/croniter-6.0.0.tar.gz", hash = "sha256:37c504b313956114a983ece2c2b07790b1f1094fe9d81cc94739214748255577", size = 64481, upload-time = "2024-12-17T17:17:47.32Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/07/4b/290b4c3efd6417a8b0c284896de19b1d5855e6dbdb97d2a35e68fa42de85/croniter-6.0.0-py2.py3-none-any.whl", hash = "sha256:2f878c3856f17896979b2a4379ba1f09c83e374931ea15cc835c5dd2eee9b368", size = 25468, upload-time = "2024-12-17T17:17:45.359Z" },
+]
+
+[[package]]
+name = "cryptography"
+version = "46.0.4"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "cffi", marker = "platform_python_implementation != 'PyPy'" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/78/19/f748958276519adf6a0c1e79e7b8860b4830dda55ccdf29f2719b5fc499c/cryptography-46.0.4.tar.gz", hash = "sha256:bfd019f60f8abc2ed1b9be4ddc21cfef059c841d86d710bb69909a688cbb8f59", size = 749301, upload-time = "2026-01-28T00:24:37.379Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/8d/99/157aae7949a5f30d51fcb1a9851e8ebd5c74bf99b5285d8bb4b8b9ee641e/cryptography-46.0.4-cp311-abi3-macosx_10_9_universal2.whl", hash = "sha256:281526e865ed4166009e235afadf3a4c4cba6056f99336a99efba65336fd5485", size = 7173686, upload-time = "2026-01-28T00:23:07.515Z" },
+    { url = "https://files.pythonhosted.org/packages/87/91/874b8910903159043b5c6a123b7e79c4559ddd1896e38967567942635778/cryptography-46.0.4-cp311-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:5f14fba5bf6f4390d7ff8f086c566454bff0411f6d8aa7af79c88b6f9267aecc", size = 4275871, upload-time = "2026-01-28T00:23:09.439Z" },
+    { url = "https://files.pythonhosted.org/packages/c0/35/690e809be77896111f5b195ede56e4b4ed0435b428c2f2b6d35046fbb5e8/cryptography-46.0.4-cp311-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:47bcd19517e6389132f76e2d5303ded6cf3f78903da2158a671be8de024f4cd0", size = 4423124, upload-time = "2026-01-28T00:23:11.529Z" },
+    { url = "https://files.pythonhosted.org/packages/1a/5b/a26407d4f79d61ca4bebaa9213feafdd8806dc69d3d290ce24996d3cfe43/cryptography-46.0.4-cp311-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:01df4f50f314fbe7009f54046e908d1754f19d0c6d3070df1e6268c5a4af09fa", size = 4277090, upload-time = "2026-01-28T00:23:13.123Z" },
+    { url = "https://files.pythonhosted.org/packages/0c/d8/4bb7aec442a9049827aa34cee1aa83803e528fa55da9a9d45d01d1bb933e/cryptography-46.0.4-cp311-abi3-manylinux_2_28_ppc64le.whl", hash = "sha256:5aa3e463596b0087b3da0dbe2b2487e9fc261d25da85754e30e3b40637d61f81", size = 4947652, upload-time = "2026-01-28T00:23:14.554Z" },
+    { url = "https://files.pythonhosted.org/packages/2b/08/f83e2e0814248b844265802d081f2fac2f1cbe6cd258e72ba14ff006823a/cryptography-46.0.4-cp311-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:0a9ad24359fee86f131836a9ac3bffc9329e956624a2d379b613f8f8abaf5255", size = 4455157, upload-time = "2026-01-28T00:23:16.443Z" },
+    { url = "https://files.pythonhosted.org/packages/0a/05/19d849cf4096448779d2dcc9bb27d097457dac36f7273ffa875a93b5884c/cryptography-46.0.4-cp311-abi3-manylinux_2_31_armv7l.whl", hash = "sha256:dc1272e25ef673efe72f2096e92ae39dea1a1a450dd44918b15351f72c5a168e", size = 3981078, upload-time = "2026-01-28T00:23:17.838Z" },
+    { url = "https://files.pythonhosted.org/packages/e6/89/f7bac81d66ba7cde867a743ea5b37537b32b5c633c473002b26a226f703f/cryptography-46.0.4-cp311-abi3-manylinux_2_34_aarch64.whl", hash = "sha256:de0f5f4ec8711ebc555f54735d4c673fc34b65c44283895f1a08c2b49d2fd99c", size = 4276213, upload-time = "2026-01-28T00:23:19.257Z" },
+    { url = "https://files.pythonhosted.org/packages/da/9f/7133e41f24edd827020ad21b068736e792bc68eecf66d93c924ad4719fb3/cryptography-46.0.4-cp311-abi3-manylinux_2_34_ppc64le.whl", hash = "sha256:eeeb2e33d8dbcccc34d64651f00a98cb41b2dc69cef866771a5717e6734dfa32", size = 4912190, upload-time = "2026-01-28T00:23:21.244Z" },
+    { url = "https://files.pythonhosted.org/packages/a6/f7/6d43cbaddf6f65b24816e4af187d211f0bc536a29961f69faedc48501d8e/cryptography-46.0.4-cp311-abi3-manylinux_2_34_x86_64.whl", hash = "sha256:3d425eacbc9aceafd2cb429e42f4e5d5633c6f873f5e567077043ef1b9bbf616", size = 4454641, upload-time = "2026-01-28T00:23:22.866Z" },
+    { url = "https://files.pythonhosted.org/packages/9e/4f/ebd0473ad656a0ac912a16bd07db0f5d85184924e14fc88feecae2492834/cryptography-46.0.4-cp311-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:91627ebf691d1ea3976a031b61fb7bac1ccd745afa03602275dda443e11c8de0", size = 4405159, upload-time = "2026-01-28T00:23:25.278Z" },
+    { url = "https://files.pythonhosted.org/packages/d1/f7/7923886f32dc47e27adeff8246e976d77258fd2aa3efdd1754e4e323bf49/cryptography-46.0.4-cp311-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:2d08bc22efd73e8854b0b7caff402d735b354862f1145d7be3b9c0f740fef6a0", size = 4666059, upload-time = "2026-01-28T00:23:26.766Z" },
+    { url = "https://files.pythonhosted.org/packages/eb/a7/0fca0fd3591dffc297278a61813d7f661a14243dd60f499a7a5b48acb52a/cryptography-46.0.4-cp311-abi3-win32.whl", hash = "sha256:82a62483daf20b8134f6e92898da70d04d0ef9a75829d732ea1018678185f4f5", size = 3026378, upload-time = "2026-01-28T00:23:28.317Z" },
+    { url = "https://files.pythonhosted.org/packages/2d/12/652c84b6f9873f0909374864a57b003686c642ea48c84d6c7e2c515e6da5/cryptography-46.0.4-cp311-abi3-win_amd64.whl", hash = "sha256:6225d3ebe26a55dbc8ead5ad1265c0403552a63336499564675b29eb3184c09b", size = 3478614, upload-time = "2026-01-28T00:23:30.275Z" },
+    { url = "https://files.pythonhosted.org/packages/b9/27/542b029f293a5cce59349d799d4d8484b3b1654a7b9a0585c266e974a488/cryptography-46.0.4-cp314-cp314t-macosx_10_9_universal2.whl", hash = "sha256:485e2b65d25ec0d901bca7bcae0f53b00133bf3173916d8e421f6fddde103908", size = 7116417, upload-time = "2026-01-28T00:23:31.958Z" },
+    { url = "https://files.pythonhosted.org/packages/f8/f5/559c25b77f40b6bf828eabaf988efb8b0e17b573545edb503368ca0a2a03/cryptography-46.0.4-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:078e5f06bd2fa5aea5a324f2a09f914b1484f1d0c2a4d6a8a28c74e72f65f2da", size = 4264508, upload-time = "2026-01-28T00:23:34.264Z" },
+    { url = "https://files.pythonhosted.org/packages/49/a1/551fa162d33074b660dc35c9bc3616fefa21a0e8c1edd27b92559902e408/cryptography-46.0.4-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:dce1e4f068f03008da7fa51cc7abc6ddc5e5de3e3d1550334eaf8393982a5829", size = 4409080, upload-time = "2026-01-28T00:23:35.793Z" },
+    { url = "https://files.pythonhosted.org/packages/b0/6a/4d8d129a755f5d6df1bbee69ea2f35ebfa954fa1847690d1db2e8bca46a5/cryptography-46.0.4-cp314-cp314t-manylinux_2_28_aarch64.whl", hash = "sha256:2067461c80271f422ee7bdbe79b9b4be54a5162e90345f86a23445a0cf3fd8a2", size = 4270039, upload-time = "2026-01-28T00:23:37.263Z" },
+    { url = "https://files.pythonhosted.org/packages/4c/f5/ed3fcddd0a5e39321e595e144615399e47e7c153a1fb8c4862aec3151ff9/cryptography-46.0.4-cp314-cp314t-manylinux_2_28_ppc64le.whl", hash = "sha256:c92010b58a51196a5f41c3795190203ac52edfd5dc3ff99149b4659eba9d2085", size = 4926748, upload-time = "2026-01-28T00:23:38.884Z" },
+    { url = "https://files.pythonhosted.org/packages/43/ae/9f03d5f0c0c00e85ecb34f06d3b79599f20630e4db91b8a6e56e8f83d410/cryptography-46.0.4-cp314-cp314t-manylinux_2_28_x86_64.whl", hash = "sha256:829c2b12bbc5428ab02d6b7f7e9bbfd53e33efd6672d21341f2177470171ad8b", size = 4442307, upload-time = "2026-01-28T00:23:40.56Z" },
+    { url = "https://files.pythonhosted.org/packages/8b/22/e0f9f2dae8040695103369cf2283ef9ac8abe4d51f68710bec2afd232609/cryptography-46.0.4-cp314-cp314t-manylinux_2_31_armv7l.whl", hash = "sha256:62217ba44bf81b30abaeda1488686a04a702a261e26f87db51ff61d9d3510abd", size = 3959253, upload-time = "2026-01-28T00:23:42.827Z" },
+    { url = "https://files.pythonhosted.org/packages/01/5b/6a43fcccc51dae4d101ac7d378a8724d1ba3de628a24e11bf2f4f43cba4d/cryptography-46.0.4-cp314-cp314t-manylinux_2_34_aarch64.whl", hash = "sha256:9c2da296c8d3415b93e6053f5a728649a87a48ce084a9aaf51d6e46c87c7f2d2", size = 4269372, upload-time = "2026-01-28T00:23:44.655Z" },
+    { url = "https://files.pythonhosted.org/packages/17/b7/0f6b8c1dd0779df2b526e78978ff00462355e31c0a6f6cff8a3e99889c90/cryptography-46.0.4-cp314-cp314t-manylinux_2_34_ppc64le.whl", hash = "sha256:9b34d8ba84454641a6bf4d6762d15847ecbd85c1316c0a7984e6e4e9f748ec2e", size = 4891908, upload-time = "2026-01-28T00:23:46.48Z" },
+    { url = "https://files.pythonhosted.org/packages/83/17/259409b8349aa10535358807a472c6a695cf84f106022268d31cea2b6c97/cryptography-46.0.4-cp314-cp314t-manylinux_2_34_x86_64.whl", hash = "sha256:df4a817fa7138dd0c96c8c8c20f04b8aaa1fac3bbf610913dcad8ea82e1bfd3f", size = 4441254, upload-time = "2026-01-28T00:23:48.403Z" },
+    { url = "https://files.pythonhosted.org/packages/9c/fe/e4a1b0c989b00cee5ffa0764401767e2d1cf59f45530963b894129fd5dce/cryptography-46.0.4-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:b1de0ebf7587f28f9190b9cb526e901bf448c9e6a99655d2b07fff60e8212a82", size = 4396520, upload-time = "2026-01-28T00:23:50.26Z" },
+    { url = "https://files.pythonhosted.org/packages/b3/81/ba8fd9657d27076eb40d6a2f941b23429a3c3d2f56f5a921d6b936a27bc9/cryptography-46.0.4-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:9b4d17bc7bd7cdd98e3af40b441feaea4c68225e2eb2341026c84511ad246c0c", size = 4651479, upload-time = "2026-01-28T00:23:51.674Z" },
+    { url = "https://files.pythonhosted.org/packages/00/03/0de4ed43c71c31e4fe954edd50b9d28d658fef56555eba7641696370a8e2/cryptography-46.0.4-cp314-cp314t-win32.whl", hash = "sha256:c411f16275b0dea722d76544a61d6421e2cc829ad76eec79280dbdc9ddf50061", size = 3001986, upload-time = "2026-01-28T00:23:53.485Z" },
+    { url = "https://files.pythonhosted.org/packages/5c/70/81830b59df7682917d7a10f833c4dab2a5574cd664e86d18139f2b421329/cryptography-46.0.4-cp314-cp314t-win_amd64.whl", hash = "sha256:728fedc529efc1439eb6107b677f7f7558adab4553ef8669f0d02d42d7b959a7", size = 3468288, upload-time = "2026-01-28T00:23:55.09Z" },
+    { url = "https://files.pythonhosted.org/packages/56/f7/f648fdbb61d0d45902d3f374217451385edc7e7768d1b03ff1d0e5ffc17b/cryptography-46.0.4-cp38-abi3-macosx_10_9_universal2.whl", hash = "sha256:a9556ba711f7c23f77b151d5798f3ac44a13455cc68db7697a1096e6d0563cab", size = 7169583, upload-time = "2026-01-28T00:23:56.558Z" },
+    { url = "https://files.pythonhosted.org/packages/d8/cc/8f3224cbb2a928de7298d6ed4790f5ebc48114e02bdc9559196bfb12435d/cryptography-46.0.4-cp38-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:8bf75b0259e87fa70bddc0b8b4078b76e7fd512fd9afae6c1193bcf440a4dbef", size = 4275419, upload-time = "2026-01-28T00:23:58.364Z" },
+    { url = "https://files.pythonhosted.org/packages/17/43/4a18faa7a872d00e4264855134ba82d23546c850a70ff209e04ee200e76f/cryptography-46.0.4-cp38-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:3c268a3490df22270955966ba236d6bc4a8f9b6e4ffddb78aac535f1a5ea471d", size = 4419058, upload-time = "2026-01-28T00:23:59.867Z" },
+    { url = "https://files.pythonhosted.org/packages/ee/64/6651969409821d791ba12346a124f55e1b76f66a819254ae840a965d4b9c/cryptography-46.0.4-cp38-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:812815182f6a0c1d49a37893a303b44eaac827d7f0d582cecfc81b6427f22973", size = 4278151, upload-time = "2026-01-28T00:24:01.731Z" },
+    { url = "https://files.pythonhosted.org/packages/20/0b/a7fce65ee08c3c02f7a8310cc090a732344066b990ac63a9dfd0a655d321/cryptography-46.0.4-cp38-abi3-manylinux_2_28_ppc64le.whl", hash = "sha256:a90e43e3ef65e6dcf969dfe3bb40cbf5aef0d523dff95bfa24256be172a845f4", size = 4939441, upload-time = "2026-01-28T00:24:03.175Z" },
+    { url = "https://files.pythonhosted.org/packages/db/a7/20c5701e2cd3e1dfd7a19d2290c522a5f435dd30957d431dcb531d0f1413/cryptography-46.0.4-cp38-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:a05177ff6296644ef2876fce50518dffb5bcdf903c85250974fc8bc85d54c0af", size = 4451617, upload-time = "2026-01-28T00:24:05.403Z" },
+    { url = "https://files.pythonhosted.org/packages/00/dc/3e16030ea9aa47b63af6524c354933b4fb0e352257c792c4deeb0edae367/cryptography-46.0.4-cp38-abi3-manylinux_2_31_armv7l.whl", hash = "sha256:daa392191f626d50f1b136c9b4cf08af69ca8279d110ea24f5c2700054d2e263", size = 3977774, upload-time = "2026-01-28T00:24:06.851Z" },
+    { url = "https://files.pythonhosted.org/packages/42/c8/ad93f14118252717b465880368721c963975ac4b941b7ef88f3c56bf2897/cryptography-46.0.4-cp38-abi3-manylinux_2_34_aarch64.whl", hash = "sha256:e07ea39c5b048e085f15923511d8121e4a9dc45cee4e3b970ca4f0d338f23095", size = 4277008, upload-time = "2026-01-28T00:24:08.926Z" },
+    { url = "https://files.pythonhosted.org/packages/00/cf/89c99698151c00a4631fbfcfcf459d308213ac29e321b0ff44ceeeac82f1/cryptography-46.0.4-cp38-abi3-manylinux_2_34_ppc64le.whl", hash = "sha256:d5a45ddc256f492ce42a4e35879c5e5528c09cd9ad12420828c972951d8e016b", size = 4903339, upload-time = "2026-01-28T00:24:12.009Z" },
+    { url = "https://files.pythonhosted.org/packages/03/c3/c90a2cb358de4ac9309b26acf49b2a100957e1ff5cc1e98e6c4996576710/cryptography-46.0.4-cp38-abi3-manylinux_2_34_x86_64.whl", hash = "sha256:6bb5157bf6a350e5b28aee23beb2d84ae6f5be390b2f8ee7ea179cda077e1019", size = 4451216, upload-time = "2026-01-28T00:24:13.975Z" },
+    { url = "https://files.pythonhosted.org/packages/96/2c/8d7f4171388a10208671e181ca43cdc0e596d8259ebacbbcfbd16de593da/cryptography-46.0.4-cp38-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:dd5aba870a2c40f87a3af043e0dee7d9eb02d4aff88a797b48f2b43eff8c3ab4", size = 4404299, upload-time = "2026-01-28T00:24:16.169Z" },
+    { url = "https://files.pythonhosted.org/packages/e9/23/cbb2036e450980f65c6e0a173b73a56ff3bccd8998965dea5cc9ddd424a5/cryptography-46.0.4-cp38-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:93d8291da8d71024379ab2cb0b5c57915300155ad42e07f76bea6ad838d7e59b", size = 4664837, upload-time = "2026-01-28T00:24:17.629Z" },
+    { url = "https://files.pythonhosted.org/packages/0a/21/f7433d18fe6d5845329cbdc597e30caf983229c7a245bcf54afecc555938/cryptography-46.0.4-cp38-abi3-win32.whl", hash = "sha256:0563655cb3c6d05fb2afe693340bc050c30f9f34e15763361cf08e94749401fc", size = 3009779, upload-time = "2026-01-28T00:24:20.198Z" },
+    { url = "https://files.pythonhosted.org/packages/3a/6a/bd2e7caa2facffedf172a45c1a02e551e6d7d4828658c9a245516a598d94/cryptography-46.0.4-cp38-abi3-win_amd64.whl", hash = "sha256:fa0900b9ef9c49728887d1576fd8d9e7e3ea872fa9b25ef9b64888adc434e976", size = 3466633, upload-time = "2026-01-28T00:24:21.851Z" },
+    { url = "https://files.pythonhosted.org/packages/59/e0/f9c6c53e1f2a1c2507f00f2faba00f01d2f334b35b0fbfe5286715da2184/cryptography-46.0.4-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:766330cce7416c92b5e90c3bb71b1b79521760cdcfc3a6a1a182d4c9fab23d2b", size = 3476316, upload-time = "2026-01-28T00:24:24.144Z" },
+    { url = "https://files.pythonhosted.org/packages/27/7a/f8d2d13227a9a1a9fe9c7442b057efecffa41f1e3c51d8622f26b9edbe8f/cryptography-46.0.4-pp311-pypy311_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:c236a44acfb610e70f6b3e1c3ca20ff24459659231ef2f8c48e879e2d32b73da", size = 4216693, upload-time = "2026-01-28T00:24:25.758Z" },
+    { url = "https://files.pythonhosted.org/packages/c5/de/3787054e8f7972658370198753835d9d680f6cd4a39df9f877b57f0dd69c/cryptography-46.0.4-pp311-pypy311_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:8a15fb869670efa8f83cbffbc8753c1abf236883225aed74cd179b720ac9ec80", size = 4382765, upload-time = "2026-01-28T00:24:27.577Z" },
+    { url = "https://files.pythonhosted.org/packages/8a/5f/60e0afb019973ba6a0b322e86b3d61edf487a4f5597618a430a2a15f2d22/cryptography-46.0.4-pp311-pypy311_pp73-manylinux_2_34_aarch64.whl", hash = "sha256:fdc3daab53b212472f1524d070735b2f0c214239df131903bae1d598016fa822", size = 4216066, upload-time = "2026-01-28T00:24:29.056Z" },
+    { url = "https://files.pythonhosted.org/packages/81/8e/bf4a0de294f147fee66f879d9bae6f8e8d61515558e3d12785dd90eca0be/cryptography-46.0.4-pp311-pypy311_pp73-manylinux_2_34_x86_64.whl", hash = "sha256:44cc0675b27cadb71bdbb96099cca1fa051cd11d2ade09e5cd3a2edb929ed947", size = 4382025, upload-time = "2026-01-28T00:24:30.681Z" },
+    { url = "https://files.pythonhosted.org/packages/79/f4/9ceb90cfd6a3847069b0b0b353fd3075dc69b49defc70182d8af0c4ca390/cryptography-46.0.4-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:be8c01a7d5a55f9a47d1888162b76c8f49d62b234d88f0ff91a9fbebe32ffbc3", size = 3406043, upload-time = "2026-01-28T00:24:32.236Z" },
+]
+
+[[package]]
+name = "cyclopts"
+version = "4.5.1"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "attrs" },
+    { name = "docstring-parser" },
+    { name = "rich" },
+    { name = "rich-rst" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/d4/93/6085aa89c3fff78a5180987354538d72e43b0db27e66a959302d0c07821a/cyclopts-4.5.1.tar.gz", hash = "sha256:fadc45304763fd9f5d6033727f176898d17a1778e194436964661a005078a3dd", size = 162075, upload-time = "2026-01-25T15:23:54.07Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/1c/7c/996760c30f1302704af57c66ff2d723f7d656d0d0b93563b5528a51484bb/cyclopts-4.5.1-py3-none-any.whl", hash = "sha256:0642c93601e554ca6b7b9abd81093847ea4448b2616280f2a0952416574e8c7a", size = 199807, upload-time = "2026-01-25T15:23:55.219Z" },
+]
+
 [[package]]
 name = "deepwork"
-version = "0.5.1"
+version = "0.7.0"
 source = { editable = "." }
 dependencies = [
+    { name = "aiofiles" },
     { name = "click" },
+    { name = "fastmcp" },
     { name = "gitpython" },
     { name = "jinja2" },
     { name = "jsonschema" },
+    { name = "mcp" },
+    { name = "pydantic" },
     { name = "pyyaml" },
     { name = "rich" },
 ]
@@ -141,29 +472,235 @@ dependencies = [
 dev = [
     { name = "mypy" },
     { name = "pytest" },
+    { name = "pytest-asyncio" },
     { name = "pytest-cov" },
     { name = "pytest-mock" },
     { name = "ruff" },
+    { name = "types-aiofiles" },
     { name = "types-pyyaml" },
 ]
 
+[package.dev-dependencies]
+dev = [
+    { name = "fpdf2" },
+    { name = "pytest" },
+    { name = "pytest-asyncio" },
+    { name = "pytest-cov" },
+    { name = "pytest-mock" },
+]
+
 [package.metadata]
 requires-dist = [
+    { name = "aiofiles", specifier = ">=24.0.0" },
     { name = "click", specifier = ">=8.1.0" },
+    { name = "fastmcp", specifier = ">=2.0" },
     { name = "gitpython", specifier = ">=3.1.0" },
     { name = "jinja2", specifier = ">=3.1.0" },
     { name = "jsonschema", specifier = ">=4.17.0" },
+    { name = "mcp", specifier = ">=1.0.0" },
     { name = "mypy", marker = "extra == 'dev'", specifier = ">=1.0" },
+    { name = "pydantic", specifier = ">=2.0" },
     { name = "pytest", marker = "extra == 'dev'", specifier = ">=7.0" },
+    { name = "pytest-asyncio", marker = "extra == 'dev'", specifier = ">=0.24.0" },
     { name = "pytest-cov", marker = "extra == 'dev'", specifier = ">=4.0" },
     { name = "pytest-mock", marker = "extra == 'dev'", specifier = ">=3.10" },
     { name = "pyyaml", specifier = ">=6.0" },
     { name = "rich", specifier = ">=13.0.0" },
     { name = "ruff", marker = "extra == 'dev'", specifier = ">=0.1.0" },
+    { name = "types-aiofiles", marker = "extra == 'dev'" },
     { name = "types-pyyaml", marker = "extra == 'dev'" },
 ]
 provides-extras = ["dev"]
 
+[package.metadata.requires-dev]
+dev = [
+    { name = "fpdf2", specifier = ">=2.8.5" },
+    { name = "pytest", specifier = ">=9.0.2" },
+    { name = "pytest-asyncio", specifier = ">=1.3.0" },
+    { name = "pytest-cov", specifier = ">=7.0.0" },
+    { name = "pytest-mock", specifier = ">=3.15.1" },
+]
+
+[[package]]
+name = "defusedxml"
+version = "0.7.1"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/0f/d5/c66da9b79e5bdb124974bfe172b4daf3c984ebd9c2a06e2b8a4dc7331c72/defusedxml-0.7.1.tar.gz", hash = "sha256:1bb3032db185915b62d7c6209c5a8792be6a32ab2fedacc84e01b52c51aa3e69", size = 75520, upload-time = "2021-03-08T10:59:26.269Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/07/6c/aa3f2f849e01cb6a001cd8554a88d4c77c5c1a31c95bdf1cf9301e6d9ef4/defusedxml-0.7.1-py2.py3-none-any.whl", hash = "sha256:a352e7e428770286cc899e2542b6cdaedb2b4953ff269a210103ec58f6198a61", size = 25604, upload-time = "2021-03-08T10:59:24.45Z" },
+]
+
+[[package]]
+name = "diskcache"
+version = "5.6.3"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/3f/21/1c1ffc1a039ddcc459db43cc108658f32c57d271d7289a2794e401d0fdb6/diskcache-5.6.3.tar.gz", hash = "sha256:2c3a3fa2743d8535d832ec61c2054a1641f41775aa7c556758a109941e33e4fc", size = 67916, upload-time = "2023-08-31T06:12:00.316Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/3f/27/4570e78fc0bf5ea0ca45eb1de3818a23787af9b390c0b0a0033a1b8236f9/diskcache-5.6.3-py3-none-any.whl", hash = "sha256:5e31b2d5fbad117cc363ebaf6b689474db18a1f6438bc82358b024abd4c2ca19", size = 45550, upload-time = "2023-08-31T06:11:58.822Z" },
+]
+
+[[package]]
+name = "dnspython"
+version = "2.8.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/8c/8b/57666417c0f90f08bcafa776861060426765fdb422eb10212086fb811d26/dnspython-2.8.0.tar.gz", hash = "sha256:181d3c6996452cb1189c4046c61599b84a5a86e099562ffde77d26984ff26d0f", size = 368251, upload-time = "2025-09-07T18:58:00.022Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/ba/5a/18ad964b0086c6e62e2e7500f7edc89e3faa45033c71c1893d34eed2b2de/dnspython-2.8.0-py3-none-any.whl", hash = "sha256:01d9bbc4a2d76bf0db7c1f729812ded6d912bd318d3b1cf81d30c0f845dbf3af", size = 331094, upload-time = "2025-09-07T18:57:58.071Z" },
+]
+
+[[package]]
+name = "docstring-parser"
+version = "0.17.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/b2/9d/c3b43da9515bd270df0f80548d9944e389870713cc1fe2b8fb35fe2bcefd/docstring_parser-0.17.0.tar.gz", hash = "sha256:583de4a309722b3315439bb31d64ba3eebada841f2e2cee23b99df001434c912", size = 27442, upload-time = "2025-07-21T07:35:01.868Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/55/e2/2537ebcff11c1ee1ff17d8d0b6f4db75873e3b0fb32c2d4a2ee31ecb310a/docstring_parser-0.17.0-py3-none-any.whl", hash = "sha256:cf2569abd23dce8099b300f9b4fa8191e9582dda731fd533daf54c4551658708", size = 36896, upload-time = "2025-07-21T07:35:00.684Z" },
+]
+
+[[package]]
+name = "docutils"
+version = "0.22.4"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/ae/b6/03bb70946330e88ffec97aefd3ea75ba575cb2e762061e0e62a213befee8/docutils-0.22.4.tar.gz", hash = "sha256:4db53b1fde9abecbb74d91230d32ab626d94f6badfc575d6db9194a49df29968", size = 2291750, upload-time = "2025-12-18T19:00:26.443Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/02/10/5da547df7a391dcde17f59520a231527b8571e6f46fc8efb02ccb370ab12/docutils-0.22.4-py3-none-any.whl", hash = "sha256:d0013f540772d1420576855455d050a2180186c91c15779301ac2ccb3eeb68de", size = 633196, upload-time = "2025-12-18T19:00:18.077Z" },
+]
+
+[[package]]
+name = "email-validator"
+version = "2.3.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "dnspython" },
+    { name = "idna" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/f5/22/900cb125c76b7aaa450ce02fd727f452243f2e91a61af068b40adba60ea9/email_validator-2.3.0.tar.gz", hash = "sha256:9fc05c37f2f6cf439ff414f8fc46d917929974a82244c20eb10231ba60c54426", size = 51238, upload-time = "2025-08-26T13:09:06.831Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/de/15/545e2b6cf2e3be84bc1ed85613edd75b8aea69807a71c26f4ca6a9258e82/email_validator-2.3.0-py3-none-any.whl", hash = "sha256:80f13f623413e6b197ae73bb10bf4eb0908faf509ad8362c5edeb0be7fd450b4", size = 35604, upload-time = "2025-08-26T13:09:05.858Z" },
+]
+
+[[package]]
+name = "exceptiongroup"
+version = "1.3.1"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "typing-extensions", marker = "python_full_version < '3.13'" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/50/79/66800aadf48771f6b62f7eb014e352e5d06856655206165d775e675a02c9/exceptiongroup-1.3.1.tar.gz", hash = "sha256:8b412432c6055b0b7d14c310000ae93352ed6754f70fa8f7c34141f91c4e3219", size = 30371, upload-time = "2025-11-21T23:01:54.787Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/8a/0e/97c33bf5009bdbac74fd2beace167cab3f978feb69cc36f1ef79360d6c4e/exceptiongroup-1.3.1-py3-none-any.whl", hash = "sha256:a7a39a3bd276781e98394987d3a5701d0c4edffb633bb7a5144577f82c773598", size = 16740, upload-time = "2025-11-21T23:01:53.443Z" },
+]
+
+[[package]]
+name = "fakeredis"
+version = "2.33.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "redis" },
+    { name = "sortedcontainers" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/5f/f9/57464119936414d60697fcbd32f38909bb5688b616ae13de6e98384433e0/fakeredis-2.33.0.tar.gz", hash = "sha256:d7bc9a69d21df108a6451bbffee23b3eba432c21a654afc7ff2d295428ec5770", size = 175187, upload-time = "2025-12-16T19:45:52.269Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/6e/78/a850fed8aeef96d4a99043c90b818b2ed5419cd5b24a4049fd7cfb9f1471/fakeredis-2.33.0-py3-none-any.whl", hash = "sha256:de535f3f9ccde1c56672ab2fdd6a8efbc4f2619fc2f1acc87b8737177d71c965", size = 119605, upload-time = "2025-12-16T19:45:51.08Z" },
+]
+
+[package.optional-dependencies]
+lua = [
+    { name = "lupa" },
+]
+
+[[package]]
+name = "fastmcp"
+version = "2.14.5"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "authlib" },
+    { name = "cyclopts" },
+    { name = "exceptiongroup" },
+    { name = "httpx" },
+    { name = "jsonref" },
+    { name = "jsonschema-path" },
+    { name = "mcp" },
+    { name = "openapi-pydantic" },
+    { name = "packaging" },
+    { name = "platformdirs" },
+    { name = "py-key-value-aio", extra = ["disk", "keyring", "memory"] },
+    { name = "pydantic", extra = ["email"] },
+    { name = "pydocket" },
+    { name = "pyperclip" },
+    { name = "python-dotenv" },
+    { name = "rich" },
+    { name = "uvicorn" },
+    { name = "websockets" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/3b/32/982678d44f13849530a74ab101ed80e060c2ee6cf87471f062dcf61705fd/fastmcp-2.14.5.tar.gz", hash = "sha256:38944dc582c541d55357082bda2241cedb42cd3a78faea8a9d6a2662c62a42d7", size = 8296329, upload-time = "2026-02-03T15:35:21.005Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/e5/c1/1a35ec68ff76ea8443aa115b18bcdee748a4ada2124537ee90522899ff9f/fastmcp-2.14.5-py3-none-any.whl", hash = "sha256:d81e8ec813f5089d3624bec93944beaefa86c0c3a4ef1111cbef676a761ebccf", size = 417784, upload-time = "2026-02-03T15:35:18.489Z" },
+]
+
+[[package]]
+name = "fonttools"
+version = "4.61.1"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/ec/ca/cf17b88a8df95691275a3d77dc0a5ad9907f328ae53acbe6795da1b2f5ed/fonttools-4.61.1.tar.gz", hash = "sha256:6675329885c44657f826ef01d9e4fb33b9158e9d93c537d84ad8399539bc6f69", size = 3565756, upload-time = "2025-12-12T17:31:24.246Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/69/12/bf9f4eaa2fad039356cc627587e30ed008c03f1cebd3034376b5ee8d1d44/fonttools-4.61.1-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:c6604b735bb12fef8e0efd5578c9fb5d3d8532d5001ea13a19cddf295673ee09", size = 2852213, upload-time = "2025-12-12T17:29:46.675Z" },
+    { url = "https://files.pythonhosted.org/packages/ac/49/4138d1acb6261499bedde1c07f8c2605d1d8f9d77a151e5507fd3ef084b6/fonttools-4.61.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:5ce02f38a754f207f2f06557523cd39a06438ba3aafc0639c477ac409fc64e37", size = 2401689, upload-time = "2025-12-12T17:29:48.769Z" },
+    { url = "https://files.pythonhosted.org/packages/e5/fe/e6ce0fe20a40e03aef906af60aa87668696f9e4802fa283627d0b5ed777f/fonttools-4.61.1-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:77efb033d8d7ff233385f30c62c7c79271c8885d5c9657d967ede124671bbdfb", size = 5058809, upload-time = "2025-12-12T17:29:51.701Z" },
+    { url = "https://files.pythonhosted.org/packages/79/61/1ca198af22f7dd22c17ab86e9024ed3c06299cfdb08170640e9996d501a0/fonttools-4.61.1-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:75c1a6dfac6abd407634420c93864a1e274ebc1c7531346d9254c0d8f6ca00f9", size = 5036039, upload-time = "2025-12-12T17:29:53.659Z" },
+    { url = "https://files.pythonhosted.org/packages/99/cc/fa1801e408586b5fce4da9f5455af8d770f4fc57391cd5da7256bb364d38/fonttools-4.61.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:0de30bfe7745c0d1ffa2b0b7048fb7123ad0d71107e10ee090fa0b16b9452e87", size = 5034714, upload-time = "2025-12-12T17:29:55.592Z" },
+    { url = "https://files.pythonhosted.org/packages/bf/aa/b7aeafe65adb1b0a925f8f25725e09f078c635bc22754f3fecb7456955b0/fonttools-4.61.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:58b0ee0ab5b1fc9921eccfe11d1435added19d6494dde14e323f25ad2bc30c56", size = 5158648, upload-time = "2025-12-12T17:29:57.861Z" },
+    { url = "https://files.pythonhosted.org/packages/99/f9/08ea7a38663328881384c6e7777bbefc46fd7d282adfd87a7d2b84ec9d50/fonttools-4.61.1-cp311-cp311-win32.whl", hash = "sha256:f79b168428351d11e10c5aeb61a74e1851ec221081299f4cf56036a95431c43a", size = 2280681, upload-time = "2025-12-12T17:29:59.943Z" },
+    { url = "https://files.pythonhosted.org/packages/07/ad/37dd1ae5fa6e01612a1fbb954f0927681f282925a86e86198ccd7b15d515/fonttools-4.61.1-cp311-cp311-win_amd64.whl", hash = "sha256:fe2efccb324948a11dd09d22136fe2ac8a97d6c1347cf0b58a911dcd529f66b7", size = 2331951, upload-time = "2025-12-12T17:30:02.254Z" },
+    { url = "https://files.pythonhosted.org/packages/6f/16/7decaa24a1bd3a70c607b2e29f0adc6159f36a7e40eaba59846414765fd4/fonttools-4.61.1-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:f3cb4a569029b9f291f88aafc927dd53683757e640081ca8c412781ea144565e", size = 2851593, upload-time = "2025-12-12T17:30:04.225Z" },
+    { url = "https://files.pythonhosted.org/packages/94/98/3c4cb97c64713a8cf499b3245c3bf9a2b8fd16a3e375feff2aed78f96259/fonttools-4.61.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:41a7170d042e8c0024703ed13b71893519a1a6d6e18e933e3ec7507a2c26a4b2", size = 2400231, upload-time = "2025-12-12T17:30:06.47Z" },
+    { url = "https://files.pythonhosted.org/packages/b7/37/82dbef0f6342eb01f54bca073ac1498433d6ce71e50c3c3282b655733b31/fonttools-4.61.1-cp312-cp312-manylinux1_x86_64.manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:10d88e55330e092940584774ee5e8a6971b01fc2f4d3466a1d6c158230880796", size = 4954103, upload-time = "2025-12-12T17:30:08.432Z" },
+    { url = "https://files.pythonhosted.org/packages/6c/44/f3aeac0fa98e7ad527f479e161aca6c3a1e47bb6996b053d45226fe37bf2/fonttools-4.61.1-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:15acc09befd16a0fb8a8f62bc147e1a82817542d72184acca9ce6e0aeda9fa6d", size = 5004295, upload-time = "2025-12-12T17:30:10.56Z" },
+    { url = "https://files.pythonhosted.org/packages/14/e8/7424ced75473983b964d09f6747fa09f054a6d656f60e9ac9324cf40c743/fonttools-4.61.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:e6bcdf33aec38d16508ce61fd81838f24c83c90a1d1b8c68982857038673d6b8", size = 4944109, upload-time = "2025-12-12T17:30:12.874Z" },
+    { url = "https://files.pythonhosted.org/packages/c8/8b/6391b257fa3d0b553d73e778f953a2f0154292a7a7a085e2374b111e5410/fonttools-4.61.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:5fade934607a523614726119164ff621e8c30e8fa1ffffbbd358662056ba69f0", size = 5093598, upload-time = "2025-12-12T17:30:15.79Z" },
+    { url = "https://files.pythonhosted.org/packages/d9/71/fd2ea96cdc512d92da5678a1c98c267ddd4d8c5130b76d0f7a80f9a9fde8/fonttools-4.61.1-cp312-cp312-win32.whl", hash = "sha256:75da8f28eff26defba42c52986de97b22106cb8f26515b7c22443ebc9c2d3261", size = 2269060, upload-time = "2025-12-12T17:30:18.058Z" },
+    { url = "https://files.pythonhosted.org/packages/80/3b/a3e81b71aed5a688e89dfe0e2694b26b78c7d7f39a5ffd8a7d75f54a12a8/fonttools-4.61.1-cp312-cp312-win_amd64.whl", hash = "sha256:497c31ce314219888c0e2fce5ad9178ca83fe5230b01a5006726cdf3ac9f24d9", size = 2319078, upload-time = "2025-12-12T17:30:22.862Z" },
+    { url = "https://files.pythonhosted.org/packages/4b/cf/00ba28b0990982530addb8dc3e9e6f2fa9cb5c20df2abdda7baa755e8fe1/fonttools-4.61.1-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:8c56c488ab471628ff3bfa80964372fc13504ece601e0d97a78ee74126b2045c", size = 2846454, upload-time = "2025-12-12T17:30:24.938Z" },
+    { url = "https://files.pythonhosted.org/packages/5a/ca/468c9a8446a2103ae645d14fee3f610567b7042aba85031c1c65e3ef7471/fonttools-4.61.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:dc492779501fa723b04d0ab1f5be046797fee17d27700476edc7ee9ae535a61e", size = 2398191, upload-time = "2025-12-12T17:30:27.343Z" },
+    { url = "https://files.pythonhosted.org/packages/a3/4b/d67eedaed19def5967fade3297fed8161b25ba94699efc124b14fb68cdbc/fonttools-4.61.1-cp313-cp313-manylinux1_x86_64.manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:64102ca87e84261419c3747a0d20f396eb024bdbeb04c2bfb37e2891f5fadcb5", size = 4928410, upload-time = "2025-12-12T17:30:29.771Z" },
+    { url = "https://files.pythonhosted.org/packages/b0/8d/6fb3494dfe61a46258cd93d979cf4725ded4eb46c2a4ca35e4490d84daea/fonttools-4.61.1-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:4c1b526c8d3f615a7b1867f38a9410849c8f4aef078535742198e942fba0e9bd", size = 4984460, upload-time = "2025-12-12T17:30:32.073Z" },
+    { url = "https://files.pythonhosted.org/packages/f7/f1/a47f1d30b3dc00d75e7af762652d4cbc3dff5c2697a0dbd5203c81afd9c3/fonttools-4.61.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:41ed4b5ec103bd306bb68f81dc166e77409e5209443e5773cb4ed837bcc9b0d3", size = 4925800, upload-time = "2025-12-12T17:30:34.339Z" },
+    { url = "https://files.pythonhosted.org/packages/a7/01/e6ae64a0981076e8a66906fab01539799546181e32a37a0257b77e4aa88b/fonttools-4.61.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:b501c862d4901792adaec7c25b1ecc749e2662543f68bb194c42ba18d6eec98d", size = 5067859, upload-time = "2025-12-12T17:30:36.593Z" },
+    { url = "https://files.pythonhosted.org/packages/73/aa/28e40b8d6809a9b5075350a86779163f074d2b617c15d22343fce81918db/fonttools-4.61.1-cp313-cp313-win32.whl", hash = "sha256:4d7092bb38c53bbc78e9255a59158b150bcdc115a1e3b3ce0b5f267dc35dd63c", size = 2267821, upload-time = "2025-12-12T17:30:38.478Z" },
+    { url = "https://files.pythonhosted.org/packages/1a/59/453c06d1d83dc0951b69ef692d6b9f1846680342927df54e9a1ca91c6f90/fonttools-4.61.1-cp313-cp313-win_amd64.whl", hash = "sha256:21e7c8d76f62ab13c9472ccf74515ca5b9a761d1bde3265152a6dc58700d895b", size = 2318169, upload-time = "2025-12-12T17:30:40.951Z" },
+    { url = "https://files.pythonhosted.org/packages/32/8f/4e7bf82c0cbb738d3c2206c920ca34ca74ef9dabde779030145d28665104/fonttools-4.61.1-cp314-cp314-macosx_10_15_universal2.whl", hash = "sha256:fff4f534200a04b4a36e7ae3cb74493afe807b517a09e99cb4faa89a34ed6ecd", size = 2846094, upload-time = "2025-12-12T17:30:43.511Z" },
+    { url = "https://files.pythonhosted.org/packages/71/09/d44e45d0a4f3a651f23a1e9d42de43bc643cce2971b19e784cc67d823676/fonttools-4.61.1-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:d9203500f7c63545b4ce3799319fe4d9feb1a1b89b28d3cb5abd11b9dd64147e", size = 2396589, upload-time = "2025-12-12T17:30:45.681Z" },
+    { url = "https://files.pythonhosted.org/packages/89/18/58c64cafcf8eb677a99ef593121f719e6dcbdb7d1c594ae5a10d4997ca8a/fonttools-4.61.1-cp314-cp314-manylinux1_x86_64.manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:fa646ecec9528bef693415c79a86e733c70a4965dd938e9a226b0fc64c9d2e6c", size = 4877892, upload-time = "2025-12-12T17:30:47.709Z" },
+    { url = "https://files.pythonhosted.org/packages/8a/ec/9e6b38c7ba1e09eb51db849d5450f4c05b7e78481f662c3b79dbde6f3d04/fonttools-4.61.1-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:11f35ad7805edba3aac1a3710d104592df59f4b957e30108ae0ba6c10b11dd75", size = 4972884, upload-time = "2025-12-12T17:30:49.656Z" },
+    { url = "https://files.pythonhosted.org/packages/5e/87/b5339da8e0256734ba0dbbf5b6cdebb1dd79b01dc8c270989b7bcd465541/fonttools-4.61.1-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:b931ae8f62db78861b0ff1ac017851764602288575d65b8e8ff1963fed419063", size = 4924405, upload-time = "2025-12-12T17:30:51.735Z" },
+    { url = "https://files.pythonhosted.org/packages/0b/47/e3409f1e1e69c073a3a6fd8cb886eb18c0bae0ee13db2c8d5e7f8495e8b7/fonttools-4.61.1-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:b148b56f5de675ee16d45e769e69f87623a4944f7443850bf9a9376e628a89d2", size = 5035553, upload-time = "2025-12-12T17:30:54.823Z" },
+    { url = "https://files.pythonhosted.org/packages/bf/b6/1f6600161b1073a984294c6c031e1a56ebf95b6164249eecf30012bb2e38/fonttools-4.61.1-cp314-cp314-win32.whl", hash = "sha256:9b666a475a65f4e839d3d10473fad6d47e0a9db14a2f4a224029c5bfde58ad2c", size = 2271915, upload-time = "2025-12-12T17:30:57.913Z" },
+    { url = "https://files.pythonhosted.org/packages/52/7b/91e7b01e37cc8eb0e1f770d08305b3655e4f002fc160fb82b3390eabacf5/fonttools-4.61.1-cp314-cp314-win_amd64.whl", hash = "sha256:4f5686e1fe5fce75d82d93c47a438a25bf0d1319d2843a926f741140b2b16e0c", size = 2323487, upload-time = "2025-12-12T17:30:59.804Z" },
+    { url = "https://files.pythonhosted.org/packages/39/5c/908ad78e46c61c3e3ed70c3b58ff82ab48437faf84ec84f109592cabbd9f/fonttools-4.61.1-cp314-cp314t-macosx_10_15_universal2.whl", hash = "sha256:e76ce097e3c57c4bcb67c5aa24a0ecdbd9f74ea9219997a707a4061fbe2707aa", size = 2929571, upload-time = "2025-12-12T17:31:02.574Z" },
+    { url = "https://files.pythonhosted.org/packages/bd/41/975804132c6dea64cdbfbaa59f3518a21c137a10cccf962805b301ac6ab2/fonttools-4.61.1-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:9cfef3ab326780c04d6646f68d4b4742aae222e8b8ea1d627c74e38afcbc9d91", size = 2435317, upload-time = "2025-12-12T17:31:04.974Z" },
+    { url = "https://files.pythonhosted.org/packages/b0/5a/aef2a0a8daf1ebaae4cfd83f84186d4a72ee08fd6a8451289fcd03ffa8a4/fonttools-4.61.1-cp314-cp314t-manylinux1_x86_64.manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:a75c301f96db737e1c5ed5fd7d77d9c34466de16095a266509e13da09751bd19", size = 4882124, upload-time = "2025-12-12T17:31:07.456Z" },
+    { url = "https://files.pythonhosted.org/packages/80/33/d6db3485b645b81cea538c9d1c9219d5805f0877fda18777add4671c5240/fonttools-4.61.1-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:91669ccac46bbc1d09e9273546181919064e8df73488ea087dcac3e2968df9ba", size = 5100391, upload-time = "2025-12-12T17:31:09.732Z" },
+    { url = "https://files.pythonhosted.org/packages/6c/d6/675ba631454043c75fcf76f0ca5463eac8eb0666ea1d7badae5fea001155/fonttools-4.61.1-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:c33ab3ca9d3ccd581d58e989d67554e42d8d4ded94ab3ade3508455fe70e65f7", size = 4978800, upload-time = "2025-12-12T17:31:11.681Z" },
+    { url = "https://files.pythonhosted.org/packages/7f/33/d3ec753d547a8d2bdaedd390d4a814e8d5b45a093d558f025c6b990b554c/fonttools-4.61.1-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:664c5a68ec406f6b1547946683008576ef8b38275608e1cee6c061828171c118", size = 5006426, upload-time = "2025-12-12T17:31:13.764Z" },
+    { url = "https://files.pythonhosted.org/packages/b4/40/cc11f378b561a67bea850ab50063366a0d1dd3f6d0a30ce0f874b0ad5664/fonttools-4.61.1-cp314-cp314t-win32.whl", hash = "sha256:aed04cabe26f30c1647ef0e8fbb207516fd40fe9472e9439695f5c6998e60ac5", size = 2335377, upload-time = "2025-12-12T17:31:16.49Z" },
+    { url = "https://files.pythonhosted.org/packages/e4/ff/c9a2b66b39f8628531ea58b320d66d951267c98c6a38684daa8f50fb02f8/fonttools-4.61.1-cp314-cp314t-win_amd64.whl", hash = "sha256:2180f14c141d2f0f3da43f3a81bc8aa4684860f6b0e6f9e165a4831f24e6a23b", size = 2400613, upload-time = "2025-12-12T17:31:18.769Z" },
+    { url = "https://files.pythonhosted.org/packages/c7/4e/ce75a57ff3aebf6fc1f4e9d508b8e5810618a33d900ad6c19eb30b290b97/fonttools-4.61.1-py3-none-any.whl", hash = "sha256:17d2bf5d541add43822bcf0c43d7d847b160c9bb01d15d5007d84e2217aaa371", size = 1148996, upload-time = "2025-12-12T17:31:21.03Z" },
+]
+
+[[package]]
+name = "fpdf2"
+version = "2.8.5"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "defusedxml" },
+    { name = "fonttools" },
+    { name = "pillow" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/e9/c0/784b130a28f4ed612e9aff26d1118e1f91005713dcd0a35e60b54d316b56/fpdf2-2.8.5.tar.gz", hash = "sha256:af4491ef2e0a5fe476f9d61362925658949c995f7e804438c0e81008f1550247", size = 336046, upload-time = "2025-10-29T14:17:59.569Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/35/a7/8532d8fffe6d1c388ad4941d678dd0da4d8da80434f2dbf4f35de0fa8029/fpdf2-2.8.5-py3-none-any.whl", hash = "sha256:2356b94e2a5fcbd1fe53ac5cbb83494e9003308860ab180050255ba50961d913", size = 301627, upload-time = "2025-10-29T14:17:57.685Z" },
+]
+
 [[package]]
 name = "gitdb"
 version = "4.0.12"
@@ -188,6 +725,73 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/6a/09/e21df6aef1e1ffc0c816f0522ddc3f6dcded766c3261813131c78a704470/gitpython-3.1.46-py3-none-any.whl", hash = "sha256:79812ed143d9d25b6d176a10bb511de0f9c67b1fa641d82097b0ab90398a2058", size = 208620, upload-time = "2026-01-01T15:37:30.574Z" },
 ]
 
+[[package]]
+name = "h11"
+version = "0.16.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/01/ee/02a2c011bdab74c6fb3c75474d40b3052059d95df7e73351460c8588d963/h11-0.16.0.tar.gz", hash = "sha256:4e35b956cf45792e4caa5885e69fba00bdbc6ffafbfa020300e549b208ee5ff1", size = 101250, upload-time = "2025-04-24T03:35:25.427Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/04/4b/29cac41a4d98d144bf5f6d33995617b185d14b22401f75ca86f384e87ff1/h11-0.16.0-py3-none-any.whl", hash = "sha256:63cf8bbe7522de3bf65932fda1d9c2772064ffb3dae62d55932da54b31cb6c86", size = 37515, upload-time = "2025-04-24T03:35:24.344Z" },
+]
+
+[[package]]
+name = "httpcore"
+version = "1.0.9"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "certifi" },
+    { name = "h11" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/06/94/82699a10bca87a5556c9c59b5963f2d039dbd239f25bc2a63907a05a14cb/httpcore-1.0.9.tar.gz", hash = "sha256:6e34463af53fd2ab5d807f399a9b45ea31c3dfa2276f15a2c3f00afff6e176e8", size = 85484, upload-time = "2025-04-24T22:06:22.219Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/7e/f5/f66802a942d491edb555dd61e3a9961140fd64c90bce1eafd741609d334d/httpcore-1.0.9-py3-none-any.whl", hash = "sha256:2d400746a40668fc9dec9810239072b40b4484b640a8c38fd654a024c7a1bf55", size = 78784, upload-time = "2025-04-24T22:06:20.566Z" },
+]
+
+[[package]]
+name = "httpx"
+version = "0.28.1"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "anyio" },
+    { name = "certifi" },
+    { name = "httpcore" },
+    { name = "idna" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/b1/df/48c586a5fe32a0f01324ee087459e112ebb7224f646c0b5023f5e79e9956/httpx-0.28.1.tar.gz", hash = "sha256:75e98c5f16b0f35b567856f597f06ff2270a374470a5c2392242528e3e3e42fc", size = 141406, upload-time = "2024-12-06T15:37:23.222Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/2a/39/e50c7c3a983047577ee07d2a9e53faf5a69493943ec3f6a384bdc792deb2/httpx-0.28.1-py3-none-any.whl", hash = "sha256:d909fcccc110f8c7faf814ca82a9a4d816bc5a6dbfea25d6591d6985b8ba59ad", size = 73517, upload-time = "2024-12-06T15:37:21.509Z" },
+]
+
+[[package]]
+name = "httpx-sse"
+version = "0.4.3"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/0f/4c/751061ffa58615a32c31b2d82e8482be8dd4a89154f003147acee90f2be9/httpx_sse-0.4.3.tar.gz", hash = "sha256:9b1ed0127459a66014aec3c56bebd93da3c1bc8bb6618c8082039a44889a755d", size = 15943, upload-time = "2025-10-10T21:48:22.271Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/d2/fd/6668e5aec43ab844de6fc74927e155a3b37bf40d7c3790e49fc0406b6578/httpx_sse-0.4.3-py3-none-any.whl", hash = "sha256:0ac1c9fe3c0afad2e0ebb25a934a59f4c7823b60792691f779fad2c5568830fc", size = 8960, upload-time = "2025-10-10T21:48:21.158Z" },
+]
+
+[[package]]
+name = "idna"
+version = "3.11"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/6f/6d/0703ccc57f3a7233505399edb88de3cbd678da106337b9fcde432b65ed60/idna-3.11.tar.gz", hash = "sha256:795dafcc9c04ed0c1fb032c2aa73654d8e8c5023a7df64a53f39190ada629902", size = 194582, upload-time = "2025-10-12T14:55:20.501Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/0e/61/66938bbb5fc52dbdf84594873d5b51fb1f7c7794e9c0f5bd885f30bc507b/idna-3.11-py3-none-any.whl", hash = "sha256:771a87f49d9defaf64091e6e6fe9c18d4833f140bd19464795bc32d966ca37ea", size = 71008, upload-time = "2025-10-12T14:55:18.883Z" },
+]
+
+[[package]]
+name = "importlib-metadata"
+version = "8.7.1"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "zipp" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/f3/49/3b30cad09e7771a4982d9975a8cbf64f00d4a1ececb53297f1d9a7be1b10/importlib_metadata-8.7.1.tar.gz", hash = "sha256:49fef1ae6440c182052f407c8d34a68f72efc36db9ca90dc0113398f2fdde8bb", size = 57107, upload-time = "2025-12-21T10:00:19.278Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/fa/5e/f8e9a1d23b9c20a551a8a02ea3637b4642e22c2626e3a13a9a29cdea99eb/importlib_metadata-8.7.1-py3-none-any.whl", hash = "sha256:5a1f80bf1daa489495071efbb095d75a634cf28a8bc299581244063b53176151", size = 27865, upload-time = "2025-12-21T10:00:18.329Z" },
+]
+
 [[package]]
 name = "iniconfig"
 version = "2.3.0"
@@ -197,6 +801,51 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/cb/b1/3846dd7f199d53cb17f49cba7e651e9ce294d8497c8c150530ed11865bb8/iniconfig-2.3.0-py3-none-any.whl", hash = "sha256:f631c04d2c48c52b84d0d0549c99ff3859c98df65b3101406327ecc7d53fbf12", size = 7484, upload-time = "2025-10-18T21:55:41.639Z" },
 ]
 
+[[package]]
+name = "jaraco-classes"
+version = "3.4.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "more-itertools" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/06/c0/ed4a27bc5571b99e3cff68f8a9fa5b56ff7df1c2251cc715a652ddd26402/jaraco.classes-3.4.0.tar.gz", hash = "sha256:47a024b51d0239c0dd8c8540c6c7f484be3b8fcf0b2d85c13825780d3b3f3acd", size = 11780, upload-time = "2024-03-31T07:27:36.643Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/7f/66/b15ce62552d84bbfcec9a4873ab79d993a1dd4edb922cbfccae192bd5b5f/jaraco.classes-3.4.0-py3-none-any.whl", hash = "sha256:f662826b6bed8cace05e7ff873ce0f9283b5c924470fe664fff1c2f00f581790", size = 6777, upload-time = "2024-03-31T07:27:34.792Z" },
+]
+
+[[package]]
+name = "jaraco-context"
+version = "6.1.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "backports-tarfile", marker = "python_full_version < '3.12'" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/cb/9c/a788f5bb29c61e456b8ee52ce76dbdd32fd72cd73dd67bc95f42c7a8d13c/jaraco_context-6.1.0.tar.gz", hash = "sha256:129a341b0a85a7db7879e22acd66902fda67882db771754574338898b2d5d86f", size = 15850, upload-time = "2026-01-13T02:53:53.847Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/8d/48/aa685dbf1024c7bd82bede569e3a85f82c32fd3d79ba5fea578f0159571a/jaraco_context-6.1.0-py3-none-any.whl", hash = "sha256:a43b5ed85815223d0d3cfdb6d7ca0d2bc8946f28f30b6f3216bda070f68badda", size = 7065, upload-time = "2026-01-13T02:53:53.031Z" },
+]
+
+[[package]]
+name = "jaraco-functools"
+version = "4.4.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "more-itertools" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/0f/27/056e0638a86749374d6f57d0b0db39f29509cce9313cf91bdc0ac4d91084/jaraco_functools-4.4.0.tar.gz", hash = "sha256:da21933b0417b89515562656547a77b4931f98176eb173644c0d35032a33d6bb", size = 19943, upload-time = "2025-12-21T09:29:43.6Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/fd/c4/813bb09f0985cb21e959f21f2464169eca882656849adf727ac7bb7e1767/jaraco_functools-4.4.0-py3-none-any.whl", hash = "sha256:9eec1e36f45c818d9bf307c8948eb03b2b56cd44087b3cdc989abca1f20b9176", size = 10481, upload-time = "2025-12-21T09:29:42.27Z" },
+]
+
+[[package]]
+name = "jeepney"
+version = "0.9.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/7b/6f/357efd7602486741aa73ffc0617fb310a29b588ed0fd69c2399acbb85b0c/jeepney-0.9.0.tar.gz", hash = "sha256:cf0e9e845622b81e4a28df94c40345400256ec608d0e55bb8a3feaa9163f5732", size = 106758, upload-time = "2025-02-27T18:51:01.684Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/b2/a3/e137168c9c44d18eff0376253da9f1e9234d0239e0ee230d2fee6cea8e55/jeepney-0.9.0-py3-none-any.whl", hash = "sha256:97e5714520c16fc0a45695e5365a2e11b81ea79bba796e26f9f1d178cb182683", size = 49010, upload-time = "2025-02-27T18:51:00.104Z" },
+]
+
 [[package]]
 name = "jinja2"
 version = "3.1.6"
@@ -209,6 +858,15 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/62/a1/3d680cbfd5f4b8f15abc1d571870c5fc3e594bb582bc3b64ea099db13e56/jinja2-3.1.6-py3-none-any.whl", hash = "sha256:85ece4451f492d0c13c5dd7c13a64681a86afae63a5f347908daf103ce6d2f67", size = 134899, upload-time = "2025-03-05T20:05:00.369Z" },
 ]
 
+[[package]]
+name = "jsonref"
+version = "1.1.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/aa/0d/c1f3277e90ccdb50d33ed5ba1ec5b3f0a242ed8c1b1a85d3afeb68464dca/jsonref-1.1.0.tar.gz", hash = "sha256:32fe8e1d85af0fdefbebce950af85590b22b60f9e95443176adbde4e1ecea552", size = 8814, upload-time = "2023-01-16T16:10:04.455Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/0c/ec/e1db9922bceb168197a558a2b8c03a7963f1afe93517ddd3cf99f202f996/jsonref-1.1.0-py3-none-any.whl", hash = "sha256:590dc7773df6c21cbf948b5dac07a72a251db28b0238ceecce0a2abfa8ec30a9", size = 9425, upload-time = "2023-01-16T16:10:02.255Z" },
+]
+
 [[package]]
 name = "jsonschema"
 version = "4.26.0"
@@ -224,6 +882,21 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/69/90/f63fb5873511e014207a475e2bb4e8b2e570d655b00ac19a9a0ca0a385ee/jsonschema-4.26.0-py3-none-any.whl", hash = "sha256:d489f15263b8d200f8387e64b4c3a75f06629559fb73deb8fdfb525f2dab50ce", size = 90630, upload-time = "2026-01-07T13:41:05.306Z" },
 ]
 
+[[package]]
+name = "jsonschema-path"
+version = "0.3.4"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "pathable" },
+    { name = "pyyaml" },
+    { name = "referencing" },
+    { name = "requests" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/6e/45/41ebc679c2a4fced6a722f624c18d658dee42612b83ea24c1caf7c0eb3a8/jsonschema_path-0.3.4.tar.gz", hash = "sha256:8365356039f16cc65fddffafda5f58766e34bebab7d6d105616ab52bc4297001", size = 11159, upload-time = "2025-01-24T14:33:16.547Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/cb/58/3485da8cb93d2f393bce453adeef16896751f14ba3e2024bc21dc9597646/jsonschema_path-0.3.4-py3-none-any.whl", hash = "sha256:f502191fdc2b22050f9a81c9237be9d27145b9001c55842bece5e94e382e52f8", size = 14810, upload-time = "2025-01-24T14:33:14.652Z" },
+]
+
 [[package]]
 name = "jsonschema-specifications"
 version = "2025.9.1"
@@ -236,6 +909,24 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/41/45/1a4ed80516f02155c51f51e8cedb3c1902296743db0bbc66608a0db2814f/jsonschema_specifications-2025.9.1-py3-none-any.whl", hash = "sha256:98802fee3a11ee76ecaca44429fda8a41bff98b00a0f2838151b113f210cc6fe", size = 18437, upload-time = "2025-09-08T01:34:57.871Z" },
 ]
 
+[[package]]
+name = "keyring"
+version = "25.7.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "importlib-metadata", marker = "python_full_version < '3.12'" },
+    { name = "jaraco-classes" },
+    { name = "jaraco-context" },
+    { name = "jaraco-functools" },
+    { name = "jeepney", marker = "sys_platform == 'linux'" },
+    { name = "pywin32-ctypes", marker = "sys_platform == 'win32'" },
+    { name = "secretstorage", marker = "sys_platform == 'linux'" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/43/4b/674af6ef2f97d56f0ab5153bf0bfa28ccb6c3ed4d1babf4305449668807b/keyring-25.7.0.tar.gz", hash = "sha256:fe01bd85eb3f8fb3dd0405defdeac9a5b4f6f0439edbb3149577f244a2e8245b", size = 63516, upload-time = "2025-11-16T16:26:09.482Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/81/db/e655086b7f3a705df045bf0933bdd9c2f79bb3c97bfef1384598bb79a217/keyring-25.7.0-py3-none-any.whl", hash = "sha256:be4a0b195f149690c166e850609a477c532ddbfbaed96a404d4e43f8d5e2689f", size = 39160, upload-time = "2025-11-16T16:26:08.402Z" },
+]
+
 [[package]]
 name = "librt"
 version = "0.7.7"
@@ -299,6 +990,69 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/51/0e/b756c7708143a63fca65a51ca07990fa647db2cc8fcd65177b9e96680255/librt-0.7.7-cp314-cp314t-win_arm64.whl", hash = "sha256:142c2cd91794b79fd0ce113bd658993b7ede0fe93057668c2f98a45ca00b7e91", size = 39724, upload-time = "2026-01-01T23:52:09.745Z" },
 ]
 
+[[package]]
+name = "lupa"
+version = "2.6"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/b8/1c/191c3e6ec6502e3dbe25a53e27f69a5daeac3e56de1f73c0138224171ead/lupa-2.6.tar.gz", hash = "sha256:9a770a6e89576be3447668d7ced312cd6fd41d3c13c2462c9dc2c2ab570e45d9", size = 7240282, upload-time = "2025-10-24T07:20:29.738Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/ca/29/1f66907c1ebf1881735afa695e646762c674f00738ebf66d795d59fc0665/lupa-2.6-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:6d988c0f9331b9f2a5a55186701a25444ab10a1432a1021ee58011499ecbbdd5", size = 962875, upload-time = "2025-10-24T07:17:39.107Z" },
+    { url = "https://files.pythonhosted.org/packages/e6/67/4a748604be360eb9c1c215f6a0da921cd1a2b44b2c5951aae6fb83019d3a/lupa-2.6-cp311-cp311-macosx_11_0_universal2.whl", hash = "sha256:ebe1bbf48259382c72a6fe363dea61a0fd6fe19eab95e2ae881e20f3654587bf", size = 1935390, upload-time = "2025-10-24T07:17:41.427Z" },
+    { url = "https://files.pythonhosted.org/packages/ac/0c/8ef9ee933a350428b7bdb8335a37ef170ab0bb008bbf9ca8f4f4310116b6/lupa-2.6-cp311-cp311-macosx_11_0_x86_64.whl", hash = "sha256:a8fcee258487cf77cdd41560046843bb38c2e18989cd19671dd1e2596f798306", size = 992193, upload-time = "2025-10-24T07:17:43.231Z" },
+    { url = "https://files.pythonhosted.org/packages/65/46/e6c7facebdb438db8a65ed247e56908818389c1a5abbf6a36aab14f1057d/lupa-2.6-cp311-cp311-manylinux2010_i686.manylinux_2_12_i686.manylinux_2_28_i686.whl", hash = "sha256:561a8e3be800827884e767a694727ed8482d066e0d6edfcbf423b05e63b05535", size = 1165844, upload-time = "2025-10-24T07:17:45.437Z" },
+    { url = "https://files.pythonhosted.org/packages/1c/26/9f1154c6c95f175ccbf96aa96c8f569c87f64f463b32473e839137601a8b/lupa-2.6-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:af880a62d47991cae78b8e9905c008cbfdc4a3a9723a66310c2634fc7644578c", size = 1048069, upload-time = "2025-10-24T07:17:47.181Z" },
+    { url = "https://files.pythonhosted.org/packages/68/67/2cc52ab73d6af81612b2ea24c870d3fa398443af8e2875e5befe142398b1/lupa-2.6-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:80b22923aa4023c86c0097b235615f89d469a0c4eee0489699c494d3367c4c85", size = 2079079, upload-time = "2025-10-24T07:17:49.755Z" },
+    { url = "https://files.pythonhosted.org/packages/2e/dc/f843f09bbf325f6e5ee61730cf6c3409fc78c010d968c7c78acba3019ca7/lupa-2.6-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:153d2cc6b643f7efb9cfc0c6bb55ec784d5bac1a3660cfc5b958a7b8f38f4a75", size = 1071428, upload-time = "2025-10-24T07:17:51.991Z" },
+    { url = "https://files.pythonhosted.org/packages/2e/60/37533a8d85bf004697449acb97ecdacea851acad28f2ad3803662487dd2a/lupa-2.6-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:3fa8777e16f3ded50b72967dc17e23f5a08e4f1e2c9456aff2ebdb57f5b2869f", size = 1181756, upload-time = "2025-10-24T07:17:53.752Z" },
+    { url = "https://files.pythonhosted.org/packages/e4/f2/cf29b20dbb4927b6a3d27c339ac5d73e74306ecc28c8e2c900b2794142ba/lupa-2.6-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:8dbdcbe818c02a2f56f5ab5ce2de374dab03e84b25266cfbaef237829bc09b3f", size = 2175687, upload-time = "2025-10-24T07:17:56.228Z" },
+    { url = "https://files.pythonhosted.org/packages/94/7c/050e02f80c7131b63db1474bff511e63c545b5a8636a24cbef3fc4da20b6/lupa-2.6-cp311-cp311-win32.whl", hash = "sha256:defaf188fde8f7a1e5ce3a5e6d945e533b8b8d547c11e43b96c9b7fe527f56dc", size = 1412592, upload-time = "2025-10-24T07:17:59.062Z" },
+    { url = "https://files.pythonhosted.org/packages/6f/9a/6f2af98aa5d771cea661f66c8eb8f53772ec1ab1dfbce24126cfcd189436/lupa-2.6-cp311-cp311-win_amd64.whl", hash = "sha256:9505ae600b5c14f3e17e70f87f88d333717f60411faca1ddc6f3e61dce85fa9e", size = 1669194, upload-time = "2025-10-24T07:18:01.647Z" },
+    { url = "https://files.pythonhosted.org/packages/94/86/ce243390535c39d53ea17ccf0240815e6e457e413e40428a658ea4ee4b8d/lupa-2.6-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:47ce718817ef1cc0c40d87c3d5ae56a800d61af00fbc0fad1ca9be12df2f3b56", size = 951707, upload-time = "2025-10-24T07:18:03.884Z" },
+    { url = "https://files.pythonhosted.org/packages/86/85/cedea5e6cbeb54396fdcc55f6b741696f3f036d23cfaf986d50d680446da/lupa-2.6-cp312-cp312-macosx_11_0_universal2.whl", hash = "sha256:7aba985b15b101495aa4b07112cdc08baa0c545390d560ad5cfde2e9e34f4d58", size = 1916703, upload-time = "2025-10-24T07:18:05.6Z" },
+    { url = "https://files.pythonhosted.org/packages/24/be/3d6b5f9a8588c01a4d88129284c726017b2089f3a3fd3ba8bd977292fea0/lupa-2.6-cp312-cp312-macosx_11_0_x86_64.whl", hash = "sha256:b766f62f95b2739f2248977d29b0722e589dcf4f0ccfa827ccbd29f0148bd2e5", size = 985152, upload-time = "2025-10-24T07:18:08.561Z" },
+    { url = "https://files.pythonhosted.org/packages/eb/23/9f9a05beee5d5dce9deca4cb07c91c40a90541fc0a8e09db4ee670da550f/lupa-2.6-cp312-cp312-manylinux2010_i686.manylinux_2_12_i686.manylinux_2_28_i686.whl", hash = "sha256:00a934c23331f94cb51760097ebfab14b005d55a6b30a2b480e3c53dd2fa290d", size = 1159599, upload-time = "2025-10-24T07:18:10.346Z" },
+    { url = "https://files.pythonhosted.org/packages/40/4e/e7c0583083db9d7f1fd023800a9767d8e4391e8330d56c2373d890ac971b/lupa-2.6-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:21de9f38bd475303e34a042b7081aabdf50bd9bafd36ce4faea2f90fd9f15c31", size = 1038686, upload-time = "2025-10-24T07:18:12.112Z" },
+    { url = "https://files.pythonhosted.org/packages/1c/9f/5a4f7d959d4feba5e203ff0c31889e74d1ca3153122be4a46dca7d92bf7c/lupa-2.6-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:cf3bda96d3fc41237e964a69c23647d50d4e28421111360274d4799832c560e9", size = 2071956, upload-time = "2025-10-24T07:18:14.572Z" },
+    { url = "https://files.pythonhosted.org/packages/92/34/2f4f13ca65d01169b1720176aedc4af17bc19ee834598c7292db232cb6dc/lupa-2.6-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:5a76ead245da54801a81053794aa3975f213221f6542d14ec4b859ee2e7e0323", size = 1057199, upload-time = "2025-10-24T07:18:16.379Z" },
+    { url = "https://files.pythonhosted.org/packages/35/2a/5f7d2eebec6993b0dcd428e0184ad71afb06a45ba13e717f6501bfed1da3/lupa-2.6-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:8dd0861741caa20886ddbda0a121d8e52fb9b5bb153d82fa9bba796962bf30e8", size = 1173693, upload-time = "2025-10-24T07:18:18.153Z" },
+    { url = "https://files.pythonhosted.org/packages/e4/29/089b4d2f8e34417349af3904bb40bec40b65c8731f45e3fd8d497ca573e5/lupa-2.6-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:239e63948b0b23023f81d9a19a395e768ed3da6a299f84e7963b8f813f6e3f9c", size = 2164394, upload-time = "2025-10-24T07:18:20.403Z" },
+    { url = "https://files.pythonhosted.org/packages/f3/1b/79c17b23c921f81468a111cad843b076a17ef4b684c4a8dff32a7969c3f0/lupa-2.6-cp312-cp312-win32.whl", hash = "sha256:325894e1099499e7a6f9c351147661a2011887603c71086d36fe0f964d52d1ce", size = 1420647, upload-time = "2025-10-24T07:18:23.368Z" },
+    { url = "https://files.pythonhosted.org/packages/b8/15/5121e68aad3584e26e1425a5c9a79cd898f8a152292059e128c206ee817c/lupa-2.6-cp312-cp312-win_amd64.whl", hash = "sha256:c735a1ce8ee60edb0fe71d665f1e6b7c55c6021f1d340eb8c865952c602cd36f", size = 1688529, upload-time = "2025-10-24T07:18:25.523Z" },
+    { url = "https://files.pythonhosted.org/packages/28/1d/21176b682ca5469001199d8b95fa1737e29957a3d185186e7a8b55345f2e/lupa-2.6-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:663a6e58a0f60e7d212017d6678639ac8df0119bc13c2145029dcba084391310", size = 947232, upload-time = "2025-10-24T07:18:27.878Z" },
+    { url = "https://files.pythonhosted.org/packages/ce/4c/d327befb684660ca13cf79cd1f1d604331808f9f1b6fb6bf57832f8edf80/lupa-2.6-cp313-cp313-macosx_11_0_universal2.whl", hash = "sha256:d1f5afda5c20b1f3217a80e9bc1b77037f8a6eb11612fd3ada19065303c8f380", size = 1908625, upload-time = "2025-10-24T07:18:29.944Z" },
+    { url = "https://files.pythonhosted.org/packages/66/8e/ad22b0a19454dfd08662237a84c792d6d420d36b061f239e084f29d1a4f3/lupa-2.6-cp313-cp313-macosx_11_0_x86_64.whl", hash = "sha256:26f2b3c085fe76e9119e48c1013c1cccdc1f51585d456858290475aa38e7089e", size = 981057, upload-time = "2025-10-24T07:18:31.553Z" },
+    { url = "https://files.pythonhosted.org/packages/5c/48/74859073ab276bd0566c719f9ca0108b0cfc1956ca0d68678d117d47d155/lupa-2.6-cp313-cp313-manylinux2010_i686.manylinux_2_12_i686.manylinux_2_28_i686.whl", hash = "sha256:60d2f902c7b96fb8ab98493dcff315e7bb4d0b44dc9dd76eb37de575025d5685", size = 1156227, upload-time = "2025-10-24T07:18:33.981Z" },
+    { url = "https://files.pythonhosted.org/packages/09/6c/0e9ded061916877253c2266074060eb71ed99fb21d73c8c114a76725bce2/lupa-2.6-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:a02d25dee3a3250967c36590128d9220ae02f2eda166a24279da0b481519cbff", size = 1035752, upload-time = "2025-10-24T07:18:36.32Z" },
+    { url = "https://files.pythonhosted.org/packages/dd/ef/f8c32e454ef9f3fe909f6c7d57a39f950996c37a3deb7b391fec7903dab7/lupa-2.6-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:6eae1ee16b886b8914ff292dbefbf2f48abfbdee94b33a88d1d5475e02423203", size = 2069009, upload-time = "2025-10-24T07:18:38.072Z" },
+    { url = "https://files.pythonhosted.org/packages/53/dc/15b80c226a5225815a890ee1c11f07968e0aba7a852df41e8ae6fe285063/lupa-2.6-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:b0edd5073a4ee74ab36f74fe61450148e6044f3952b8d21248581f3c5d1a58be", size = 1056301, upload-time = "2025-10-24T07:18:40.165Z" },
+    { url = "https://files.pythonhosted.org/packages/31/14/2086c1425c985acfb30997a67e90c39457122df41324d3c179d6ee2292c6/lupa-2.6-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:0c53ee9f22a8a17e7d4266ad48e86f43771951797042dd51d1494aaa4f5f3f0a", size = 1170673, upload-time = "2025-10-24T07:18:42.426Z" },
+    { url = "https://files.pythonhosted.org/packages/10/e5/b216c054cf86576c0191bf9a9f05de6f7e8e07164897d95eea0078dca9b2/lupa-2.6-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:de7c0f157a9064a400d828789191a96da7f4ce889969a588b87ec80de9b14772", size = 2162227, upload-time = "2025-10-24T07:18:46.112Z" },
+    { url = "https://files.pythonhosted.org/packages/59/2f/33ecb5bedf4f3bc297ceacb7f016ff951331d352f58e7e791589609ea306/lupa-2.6-cp313-cp313-win32.whl", hash = "sha256:ee9523941ae0a87b5b703417720c5d78f72d2f5bc23883a2ea80a949a3ed9e75", size = 1419558, upload-time = "2025-10-24T07:18:48.371Z" },
+    { url = "https://files.pythonhosted.org/packages/f9/b4/55e885834c847ea610e111d87b9ed4768f0afdaeebc00cd46810f25029f6/lupa-2.6-cp313-cp313-win_amd64.whl", hash = "sha256:b1335a5835b0a25ebdbc75cf0bda195e54d133e4d994877ef025e218c2e59db9", size = 1683424, upload-time = "2025-10-24T07:18:50.976Z" },
+    { url = "https://files.pythonhosted.org/packages/66/9d/d9427394e54d22a35d1139ef12e845fd700d4872a67a34db32516170b746/lupa-2.6-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:dcb6d0a3264873e1653bc188499f48c1fb4b41a779e315eba45256cfe7bc33c1", size = 953818, upload-time = "2025-10-24T07:18:53.378Z" },
+    { url = "https://files.pythonhosted.org/packages/10/41/27bbe81953fb2f9ecfced5d9c99f85b37964cfaf6aa8453bb11283983721/lupa-2.6-cp314-cp314-macosx_11_0_universal2.whl", hash = "sha256:a37e01f2128f8c36106726cb9d360bac087d58c54b4522b033cc5691c584db18", size = 1915850, upload-time = "2025-10-24T07:18:55.259Z" },
+    { url = "https://files.pythonhosted.org/packages/a3/98/f9ff60db84a75ba8725506bbf448fb085bc77868a021998ed2a66d920568/lupa-2.6-cp314-cp314-macosx_11_0_x86_64.whl", hash = "sha256:458bd7e9ff3c150b245b0fcfbb9bd2593d1152ea7f0a7b91c1d185846da033fe", size = 982344, upload-time = "2025-10-24T07:18:57.05Z" },
+    { url = "https://files.pythonhosted.org/packages/41/f7/f39e0f1c055c3b887d86b404aaf0ca197b5edfd235a8b81b45b25bac7fc3/lupa-2.6-cp314-cp314-manylinux2010_i686.manylinux_2_12_i686.manylinux_2_28_i686.whl", hash = "sha256:052ee82cac5206a02df77119c325339acbc09f5ce66967f66a2e12a0f3211cad", size = 1156543, upload-time = "2025-10-24T07:18:59.251Z" },
+    { url = "https://files.pythonhosted.org/packages/9e/9c/59e6cffa0d672d662ae17bd7ac8ecd2c89c9449dee499e3eb13ca9cd10d9/lupa-2.6-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:96594eca3c87dd07938009e95e591e43d554c1dbd0385be03c100367141db5a8", size = 1047974, upload-time = "2025-10-24T07:19:01.449Z" },
+    { url = "https://files.pythonhosted.org/packages/23/c6/a04e9cef7c052717fcb28fb63b3824802488f688391895b618e39be0f684/lupa-2.6-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:e8faddd9d198688c8884091173a088a8e920ecc96cda2ffed576a23574c4b3f6", size = 2073458, upload-time = "2025-10-24T07:19:03.369Z" },
+    { url = "https://files.pythonhosted.org/packages/e6/10/824173d10f38b51fc77785228f01411b6ca28826ce27404c7c912e0e442c/lupa-2.6-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:daebb3a6b58095c917e76ba727ab37b27477fb926957c825205fbda431552134", size = 1067683, upload-time = "2025-10-24T07:19:06.2Z" },
+    { url = "https://files.pythonhosted.org/packages/b6/dc/9692fbcf3c924d9c4ece2d8d2f724451ac2e09af0bd2a782db1cef34e799/lupa-2.6-cp314-cp314-musllinux_1_2_i686.whl", hash = "sha256:f3154e68972befe0f81564e37d8142b5d5d79931a18309226a04ec92487d4ea3", size = 1171892, upload-time = "2025-10-24T07:19:08.544Z" },
+    { url = "https://files.pythonhosted.org/packages/84/ff/e318b628d4643c278c96ab3ddea07fc36b075a57383c837f5b11e537ba9d/lupa-2.6-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:e4dadf77b9fedc0bfa53417cc28dc2278a26d4cbd95c29f8927ad4d8fe0a7ef9", size = 2166641, upload-time = "2025-10-24T07:19:10.485Z" },
+    { url = "https://files.pythonhosted.org/packages/12/f7/a6f9ec2806cf2d50826980cdb4b3cffc7691dc6f95e13cc728846d5cb793/lupa-2.6-cp314-cp314-win32.whl", hash = "sha256:cb34169c6fa3bab3e8ac58ca21b8a7102f6a94b6a5d08d3636312f3f02fafd8f", size = 1456857, upload-time = "2025-10-24T07:19:37.989Z" },
+    { url = "https://files.pythonhosted.org/packages/c5/de/df71896f25bdc18360fdfa3b802cd7d57d7fede41a0e9724a4625b412c85/lupa-2.6-cp314-cp314-win_amd64.whl", hash = "sha256:b74f944fe46c421e25d0f8692aef1e842192f6f7f68034201382ac440ef9ea67", size = 1731191, upload-time = "2025-10-24T07:19:40.281Z" },
+    { url = "https://files.pythonhosted.org/packages/47/3c/a1f23b01c54669465f5f4c4083107d496fbe6fb45998771420e9aadcf145/lupa-2.6-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:0e21b716408a21ab65723f8841cf7f2f37a844b7a965eeabb785e27fca4099cf", size = 999343, upload-time = "2025-10-24T07:19:12.519Z" },
+    { url = "https://files.pythonhosted.org/packages/c5/6d/501994291cb640bfa2ccf7f554be4e6914afa21c4026bd01bff9ca8aac57/lupa-2.6-cp314-cp314t-macosx_11_0_universal2.whl", hash = "sha256:589db872a141bfff828340079bbdf3e9a31f2689f4ca0d88f97d9e8c2eae6142", size = 2000730, upload-time = "2025-10-24T07:19:14.869Z" },
+    { url = "https://files.pythonhosted.org/packages/53/a5/457ffb4f3f20469956c2d4c4842a7675e884efc895b2f23d126d23e126cc/lupa-2.6-cp314-cp314t-macosx_11_0_x86_64.whl", hash = "sha256:cd852a91a4a9d4dcbb9a58100f820a75a425703ec3e3f049055f60b8533b7953", size = 1021553, upload-time = "2025-10-24T07:19:17.123Z" },
+    { url = "https://files.pythonhosted.org/packages/51/6b/36bb5a5d0960f2a5c7c700e0819abb76fd9bf9c1d8a66e5106416d6e9b14/lupa-2.6-cp314-cp314t-manylinux2010_i686.manylinux_2_12_i686.manylinux_2_28_i686.whl", hash = "sha256:0334753be028358922415ca97a64a3048e4ed155413fc4eaf87dd0a7e2752983", size = 1133275, upload-time = "2025-10-24T07:19:20.51Z" },
+    { url = "https://files.pythonhosted.org/packages/19/86/202ff4429f663013f37d2229f6176ca9f83678a50257d70f61a0a97281bf/lupa-2.6-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:661d895cd38c87658a34780fac54a690ec036ead743e41b74c3fb81a9e65a6aa", size = 1038441, upload-time = "2025-10-24T07:19:22.509Z" },
+    { url = "https://files.pythonhosted.org/packages/a7/42/d8125f8e420714e5b52e9c08d88b5329dfb02dcca731b4f21faaee6cc5b5/lupa-2.6-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:6aa58454ccc13878cc177c62529a2056be734da16369e451987ff92784994ca7", size = 2058324, upload-time = "2025-10-24T07:19:24.979Z" },
+    { url = "https://files.pythonhosted.org/packages/2b/2c/47bf8b84059876e877a339717ddb595a4a7b0e8740bacae78ba527562e1c/lupa-2.6-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:1425017264e470c98022bba8cff5bd46d054a827f5df6b80274f9cc71dafd24f", size = 1060250, upload-time = "2025-10-24T07:19:27.262Z" },
+    { url = "https://files.pythonhosted.org/packages/c2/06/d88add2b6406ca1bdec99d11a429222837ca6d03bea42ca75afa169a78cb/lupa-2.6-cp314-cp314t-musllinux_1_2_i686.whl", hash = "sha256:224af0532d216e3105f0a127410f12320f7c5f1aa0300bdf9646b8d9afb0048c", size = 1151126, upload-time = "2025-10-24T07:19:29.522Z" },
+    { url = "https://files.pythonhosted.org/packages/b4/a0/89e6a024c3b4485b89ef86881c9d55e097e7cb0bdb74efb746f2fa6a9a76/lupa-2.6-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:9abb98d5a8fd27c8285302e82199f0e56e463066f88f619d6594a450bf269d80", size = 2153693, upload-time = "2025-10-24T07:19:31.379Z" },
+    { url = "https://files.pythonhosted.org/packages/b6/36/a0f007dc58fc1bbf51fb85dcc82fcb1f21b8c4261361de7dab0e3d8521ef/lupa-2.6-cp314-cp314t-win32.whl", hash = "sha256:1849efeba7a8f6fb8aa2c13790bee988fd242ae404bd459509640eeea3d1e291", size = 1590104, upload-time = "2025-10-24T07:19:33.514Z" },
+    { url = "https://files.pythonhosted.org/packages/7d/5e/db903ce9cf82c48d6b91bf6d63ae4c8d0d17958939a4e04ba6b9f38b8643/lupa-2.6-cp314-cp314t-win_amd64.whl", hash = "sha256:fc1498d1a4fc028bc521c26d0fad4ca00ed63b952e32fb95949bda76a04bad52", size = 1913818, upload-time = "2025-10-24T07:19:36.039Z" },
+]
+
 [[package]]
 name = "markdown-it-py"
 version = "4.0.0"
@@ -386,87 +1140,472 @@ wheels = [
 ]
 
 [[package]]
-name = "mdurl"
-version = "0.1.2"
+name = "mcp"
+version = "1.26.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "anyio" },
+    { name = "httpx" },
+    { name = "httpx-sse" },
+    { name = "jsonschema" },
+    { name = "pydantic" },
+    { name = "pydantic-settings" },
+    { name = "pyjwt", extra = ["crypto"] },
+    { name = "python-multipart" },
+    { name = "pywin32", marker = "sys_platform == 'win32'" },
+    { name = "sse-starlette" },
+    { name = "starlette" },
+    { name = "typing-extensions" },
+    { name = "typing-inspection" },
+    { name = "uvicorn", marker = "sys_platform != 'emscripten'" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/fc/6d/62e76bbb8144d6ed86e202b5edd8a4cb631e7c8130f3f4893c3f90262b10/mcp-1.26.0.tar.gz", hash = "sha256:db6e2ef491eecc1a0d93711a76f28dec2e05999f93afd48795da1c1137142c66", size = 608005, upload-time = "2026-01-24T19:40:32.468Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/fd/d9/eaa1f80170d2b7c5ba23f3b59f766f3a0bb41155fbc32a69adfa1adaaef9/mcp-1.26.0-py3-none-any.whl", hash = "sha256:904a21c33c25aa98ddbeb47273033c435e595bbacfdb177f4bd87f6dceebe1ca", size = 233615, upload-time = "2026-01-24T19:40:30.652Z" },
+]
+
+[[package]]
+name = "mdurl"
+version = "0.1.2"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/d6/54/cfe61301667036ec958cb99bd3efefba235e65cdeb9c84d24a8293ba1d90/mdurl-0.1.2.tar.gz", hash = "sha256:bb413d29f5eea38f31dd4754dd7377d4465116fb207585f97bf925588687c1ba", size = 8729, upload-time = "2022-08-14T12:40:10.846Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/b3/38/89ba8ad64ae25be8de66a6d463314cf1eb366222074cfda9ee839c56a4b4/mdurl-0.1.2-py3-none-any.whl", hash = "sha256:84008a41e51615a49fc9966191ff91509e3c40b939176e643fd50a5c2196b8f8", size = 9979, upload-time = "2022-08-14T12:40:09.779Z" },
+]
+
+[[package]]
+name = "more-itertools"
+version = "10.8.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/ea/5d/38b681d3fce7a266dd9ab73c66959406d565b3e85f21d5e66e1181d93721/more_itertools-10.8.0.tar.gz", hash = "sha256:f638ddf8a1a0d134181275fb5d58b086ead7c6a72429ad725c67503f13ba30bd", size = 137431, upload-time = "2025-09-02T15:23:11.018Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/a4/8e/469e5a4a2f5855992e425f3cb33804cc07bf18d48f2db061aec61ce50270/more_itertools-10.8.0-py3-none-any.whl", hash = "sha256:52d4362373dcf7c52546bc4af9a86ee7c4579df9a8dc268be0a2f949d376cc9b", size = 69667, upload-time = "2025-09-02T15:23:09.635Z" },
+]
+
+[[package]]
+name = "mypy"
+version = "1.19.1"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "librt", marker = "platform_python_implementation != 'PyPy'" },
+    { name = "mypy-extensions" },
+    { name = "pathspec" },
+    { name = "typing-extensions" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/f5/db/4efed9504bc01309ab9c2da7e352cc223569f05478012b5d9ece38fd44d2/mypy-1.19.1.tar.gz", hash = "sha256:19d88bb05303fe63f71dd2c6270daca27cb9401c4ca8255fe50d1d920e0eb9ba", size = 3582404, upload-time = "2025-12-15T05:03:48.42Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/ef/47/6b3ebabd5474d9cdc170d1342fbf9dddc1b0ec13ec90bf9004ee6f391c31/mypy-1.19.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:d8dfc6ab58ca7dda47d9237349157500468e404b17213d44fc1cb77bce532288", size = 13028539, upload-time = "2025-12-15T05:03:44.129Z" },
+    { url = "https://files.pythonhosted.org/packages/5c/a6/ac7c7a88a3c9c54334f53a941b765e6ec6c4ebd65d3fe8cdcfbe0d0fd7db/mypy-1.19.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:e3f276d8493c3c97930e354b2595a44a21348b320d859fb4a2b9f66da9ed27ab", size = 12083163, upload-time = "2025-12-15T05:03:37.679Z" },
+    { url = "https://files.pythonhosted.org/packages/67/af/3afa9cf880aa4a2c803798ac24f1d11ef72a0c8079689fac5cfd815e2830/mypy-1.19.1-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:2abb24cf3f17864770d18d673c85235ba52456b36a06b6afc1e07c1fdcd3d0e6", size = 12687629, upload-time = "2025-12-15T05:02:31.526Z" },
+    { url = "https://files.pythonhosted.org/packages/2d/46/20f8a7114a56484ab268b0ab372461cb3a8f7deed31ea96b83a4e4cfcfca/mypy-1.19.1-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a009ffa5a621762d0c926a078c2d639104becab69e79538a494bcccb62cc0331", size = 13436933, upload-time = "2025-12-15T05:03:15.606Z" },
+    { url = "https://files.pythonhosted.org/packages/5b/f8/33b291ea85050a21f15da910002460f1f445f8007adb29230f0adea279cb/mypy-1.19.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:f7cee03c9a2e2ee26ec07479f38ea9c884e301d42c6d43a19d20fb014e3ba925", size = 13661754, upload-time = "2025-12-15T05:02:26.731Z" },
+    { url = "https://files.pythonhosted.org/packages/fd/a3/47cbd4e85bec4335a9cd80cf67dbc02be21b5d4c9c23ad6b95d6c5196bac/mypy-1.19.1-cp311-cp311-win_amd64.whl", hash = "sha256:4b84a7a18f41e167f7995200a1d07a4a6810e89d29859df936f1c3923d263042", size = 10055772, upload-time = "2025-12-15T05:03:26.179Z" },
+    { url = "https://files.pythonhosted.org/packages/06/8a/19bfae96f6615aa8a0604915512e0289b1fad33d5909bf7244f02935d33a/mypy-1.19.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:a8174a03289288c1f6c46d55cef02379b478bfbc8e358e02047487cad44c6ca1", size = 13206053, upload-time = "2025-12-15T05:03:46.622Z" },
+    { url = "https://files.pythonhosted.org/packages/a5/34/3e63879ab041602154ba2a9f99817bb0c85c4df19a23a1443c8986e4d565/mypy-1.19.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:ffcebe56eb09ff0c0885e750036a095e23793ba6c2e894e7e63f6d89ad51f22e", size = 12219134, upload-time = "2025-12-15T05:03:24.367Z" },
+    { url = "https://files.pythonhosted.org/packages/89/cc/2db6f0e95366b630364e09845672dbee0cbf0bbe753a204b29a944967cd9/mypy-1.19.1-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b64d987153888790bcdb03a6473d321820597ab8dd9243b27a92153c4fa50fd2", size = 12731616, upload-time = "2025-12-15T05:02:44.725Z" },
+    { url = "https://files.pythonhosted.org/packages/00/be/dd56c1fd4807bc1eba1cf18b2a850d0de7bacb55e158755eb79f77c41f8e/mypy-1.19.1-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c35d298c2c4bba75feb2195655dfea8124d855dfd7343bf8b8c055421eaf0cf8", size = 13620847, upload-time = "2025-12-15T05:03:39.633Z" },
+    { url = "https://files.pythonhosted.org/packages/6d/42/332951aae42b79329f743bf1da088cd75d8d4d9acc18fbcbd84f26c1af4e/mypy-1.19.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:34c81968774648ab5ac09c29a375fdede03ba253f8f8287847bd480782f73a6a", size = 13834976, upload-time = "2025-12-15T05:03:08.786Z" },
+    { url = "https://files.pythonhosted.org/packages/6f/63/e7493e5f90e1e085c562bb06e2eb32cae27c5057b9653348d38b47daaecc/mypy-1.19.1-cp312-cp312-win_amd64.whl", hash = "sha256:b10e7c2cd7870ba4ad9b2d8a6102eb5ffc1f16ca35e3de6bfa390c1113029d13", size = 10118104, upload-time = "2025-12-15T05:03:10.834Z" },
+    { url = "https://files.pythonhosted.org/packages/de/9f/a6abae693f7a0c697dbb435aac52e958dc8da44e92e08ba88d2e42326176/mypy-1.19.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:e3157c7594ff2ef1634ee058aafc56a82db665c9438fd41b390f3bde1ab12250", size = 13201927, upload-time = "2025-12-15T05:02:29.138Z" },
+    { url = "https://files.pythonhosted.org/packages/9a/a4/45c35ccf6e1c65afc23a069f50e2c66f46bd3798cbe0d680c12d12935caa/mypy-1.19.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:bdb12f69bcc02700c2b47e070238f42cb87f18c0bc1fc4cdb4fb2bc5fd7a3b8b", size = 12206730, upload-time = "2025-12-15T05:03:01.325Z" },
+    { url = "https://files.pythonhosted.org/packages/05/bb/cdcf89678e26b187650512620eec8368fded4cfd99cfcb431e4cdfd19dec/mypy-1.19.1-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:f859fb09d9583a985be9a493d5cfc5515b56b08f7447759a0c5deaf68d80506e", size = 12724581, upload-time = "2025-12-15T05:03:20.087Z" },
+    { url = "https://files.pythonhosted.org/packages/d1/32/dd260d52babf67bad8e6770f8e1102021877ce0edea106e72df5626bb0ec/mypy-1.19.1-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c9a6538e0415310aad77cb94004ca6482330fece18036b5f360b62c45814c4ef", size = 13616252, upload-time = "2025-12-15T05:02:49.036Z" },
+    { url = "https://files.pythonhosted.org/packages/71/d0/5e60a9d2e3bd48432ae2b454b7ef2b62a960ab51292b1eda2a95edd78198/mypy-1.19.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:da4869fc5e7f62a88f3fe0b5c919d1d9f7ea3cef92d3689de2823fd27e40aa75", size = 13840848, upload-time = "2025-12-15T05:02:55.95Z" },
+    { url = "https://files.pythonhosted.org/packages/98/76/d32051fa65ecf6cc8c6610956473abdc9b4c43301107476ac03559507843/mypy-1.19.1-cp313-cp313-win_amd64.whl", hash = "sha256:016f2246209095e8eda7538944daa1d60e1e8134d98983b9fc1e92c1fc0cb8dd", size = 10135510, upload-time = "2025-12-15T05:02:58.438Z" },
+    { url = "https://files.pythonhosted.org/packages/de/eb/b83e75f4c820c4247a58580ef86fcd35165028f191e7e1ba57128c52782d/mypy-1.19.1-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:06e6170bd5836770e8104c8fdd58e5e725cfeb309f0a6c681a811f557e97eac1", size = 13199744, upload-time = "2025-12-15T05:03:30.823Z" },
+    { url = "https://files.pythonhosted.org/packages/94/28/52785ab7bfa165f87fcbb61547a93f98bb20e7f82f90f165a1f69bce7b3d/mypy-1.19.1-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:804bd67b8054a85447c8954215a906d6eff9cabeabe493fb6334b24f4bfff718", size = 12215815, upload-time = "2025-12-15T05:02:42.323Z" },
+    { url = "https://files.pythonhosted.org/packages/0a/c6/bdd60774a0dbfb05122e3e925f2e9e846c009e479dcec4821dad881f5b52/mypy-1.19.1-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:21761006a7f497cb0d4de3d8ef4ca70532256688b0523eee02baf9eec895e27b", size = 12740047, upload-time = "2025-12-15T05:03:33.168Z" },
+    { url = "https://files.pythonhosted.org/packages/32/2a/66ba933fe6c76bd40d1fe916a83f04fed253152f451a877520b3c4a5e41e/mypy-1.19.1-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:28902ee51f12e0f19e1e16fbe2f8f06b6637f482c459dd393efddd0ec7f82045", size = 13601998, upload-time = "2025-12-15T05:03:13.056Z" },
+    { url = "https://files.pythonhosted.org/packages/e3/da/5055c63e377c5c2418760411fd6a63ee2b96cf95397259038756c042574f/mypy-1.19.1-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:481daf36a4c443332e2ae9c137dfee878fcea781a2e3f895d54bd3002a900957", size = 13807476, upload-time = "2025-12-15T05:03:17.977Z" },
+    { url = "https://files.pythonhosted.org/packages/cd/09/4ebd873390a063176f06b0dbf1f7783dd87bd120eae7727fa4ae4179b685/mypy-1.19.1-cp314-cp314-win_amd64.whl", hash = "sha256:8bb5c6f6d043655e055be9b542aa5f3bdd30e4f3589163e85f93f3640060509f", size = 10281872, upload-time = "2025-12-15T05:03:05.549Z" },
+    { url = "https://files.pythonhosted.org/packages/8d/f4/4ce9a05ce5ded1de3ec1c1d96cf9f9504a04e54ce0ed55cfa38619a32b8d/mypy-1.19.1-py3-none-any.whl", hash = "sha256:f1235f5ea01b7db5468d53ece6aaddf1ad0b88d9e7462b86ef96fe04995d7247", size = 2471239, upload-time = "2025-12-15T05:03:07.248Z" },
+]
+
+[[package]]
+name = "mypy-extensions"
+version = "1.1.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/a2/6e/371856a3fb9d31ca8dac321cda606860fa4548858c0cc45d9d1d4ca2628b/mypy_extensions-1.1.0.tar.gz", hash = "sha256:52e68efc3284861e772bbcd66823fde5ae21fd2fdb51c62a211403730b916558", size = 6343, upload-time = "2025-04-22T14:54:24.164Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/79/7b/2c79738432f5c924bef5071f933bcc9efd0473bac3b4aa584a6f7c1c8df8/mypy_extensions-1.1.0-py3-none-any.whl", hash = "sha256:1be4cccdb0f2482337c4743e60421de3a356cd97508abadd57d47403e94f5505", size = 4963, upload-time = "2025-04-22T14:54:22.983Z" },
+]
+
+[[package]]
+name = "openapi-pydantic"
+version = "0.5.1"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "pydantic" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/02/2e/58d83848dd1a79cb92ed8e63f6ba901ca282c5f09d04af9423ec26c56fd7/openapi_pydantic-0.5.1.tar.gz", hash = "sha256:ff6835af6bde7a459fb93eb93bb92b8749b754fc6e51b2f1590a19dc3005ee0d", size = 60892, upload-time = "2025-01-08T19:29:27.083Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/12/cf/03675d8bd8ecbf4445504d8071adab19f5f993676795708e36402ab38263/openapi_pydantic-0.5.1-py3-none-any.whl", hash = "sha256:a3a09ef4586f5bd760a8df7f43028b60cafb6d9f61de2acba9574766255ab146", size = 96381, upload-time = "2025-01-08T19:29:25.275Z" },
+]
+
+[[package]]
+name = "opentelemetry-api"
+version = "1.39.1"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "importlib-metadata" },
+    { name = "typing-extensions" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/97/b9/3161be15bb8e3ad01be8be5a968a9237c3027c5be504362ff800fca3e442/opentelemetry_api-1.39.1.tar.gz", hash = "sha256:fbde8c80e1b937a2c61f20347e91c0c18a1940cecf012d62e65a7caf08967c9c", size = 65767, upload-time = "2025-12-11T13:32:39.182Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/cf/df/d3f1ddf4bb4cb50ed9b1139cc7b1c54c34a1e7ce8fd1b9a37c0d1551a6bd/opentelemetry_api-1.39.1-py3-none-any.whl", hash = "sha256:2edd8463432a7f8443edce90972169b195e7d6a05500cd29e6d13898187c9950", size = 66356, upload-time = "2025-12-11T13:32:17.304Z" },
+]
+
+[[package]]
+name = "packaging"
+version = "25.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/a1/d4/1fc4078c65507b51b96ca8f8c3ba19e6a61c8253c72794544580a7b6c24d/packaging-25.0.tar.gz", hash = "sha256:d443872c98d677bf60f6a1f2f8c1cb748e8fe762d2bf9d3148b5599295b0fc4f", size = 165727, upload-time = "2025-04-19T11:48:59.673Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/20/12/38679034af332785aac8774540895e234f4d07f7545804097de4b666afd8/packaging-25.0-py3-none-any.whl", hash = "sha256:29572ef2b1f17581046b3a2227d5c611fb25ec70ca1ba8554b24b0e69331a484", size = 66469, upload-time = "2025-04-19T11:48:57.875Z" },
+]
+
+[[package]]
+name = "pathable"
+version = "0.4.4"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/67/93/8f2c2075b180c12c1e9f6a09d1a985bc2036906b13dff1d8917e395f2048/pathable-0.4.4.tar.gz", hash = "sha256:6905a3cd17804edfac7875b5f6c9142a218c7caef78693c2dbbbfbac186d88b2", size = 8124, upload-time = "2025-01-10T18:43:13.247Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/7d/eb/b6260b31b1a96386c0a880edebe26f89669098acea8e0318bff6adb378fd/pathable-0.4.4-py3-none-any.whl", hash = "sha256:5ae9e94793b6ef5a4cbe0a7ce9dbbefc1eec38df253763fd0aeeacf2762dbbc2", size = 9592, upload-time = "2025-01-10T18:43:11.88Z" },
+]
+
+[[package]]
+name = "pathspec"
+version = "1.0.3"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/4c/b2/bb8e495d5262bfec41ab5cb18f522f1012933347fb5d9e62452d446baca2/pathspec-1.0.3.tar.gz", hash = "sha256:bac5cf97ae2c2876e2d25ebb15078eb04d76e4b98921ee31c6f85ade8b59444d", size = 130841, upload-time = "2026-01-09T15:46:46.009Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/32/2b/121e912bd60eebd623f873fd090de0e84f322972ab25a7f9044c056804ed/pathspec-1.0.3-py3-none-any.whl", hash = "sha256:e80767021c1cc524aa3fb14bedda9c34406591343cc42797b386ce7b9354fb6c", size = 55021, upload-time = "2026-01-09T15:46:44.652Z" },
+]
+
+[[package]]
+name = "pathvalidate"
+version = "3.3.1"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/fa/2a/52a8da6fe965dea6192eb716b357558e103aea0a1e9a8352ad575a8406ca/pathvalidate-3.3.1.tar.gz", hash = "sha256:b18c07212bfead624345bb8e1d6141cdcf15a39736994ea0b94035ad2b1ba177", size = 63262, upload-time = "2025-06-15T09:07:20.736Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/9a/70/875f4a23bfc4731703a5835487d0d2fb999031bd415e7d17c0ae615c18b7/pathvalidate-3.3.1-py3-none-any.whl", hash = "sha256:5263baab691f8e1af96092fa5137ee17df5bdfbd6cff1fcac4d6ef4bc2e1735f", size = 24305, upload-time = "2025-06-15T09:07:19.117Z" },
+]
+
+[[package]]
+name = "pillow"
+version = "12.1.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/d0/02/d52c733a2452ef1ffcc123b68e6606d07276b0e358db70eabad7e40042b7/pillow-12.1.0.tar.gz", hash = "sha256:5c5ae0a06e9ea030ab786b0251b32c7e4ce10e58d983c0d5c56029455180b5b9", size = 46977283, upload-time = "2026-01-02T09:13:29.892Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/43/c4/bf8328039de6cc22182c3ef007a2abfbbdab153661c0a9aa78af8d706391/pillow-12.1.0-cp311-cp311-macosx_10_10_x86_64.whl", hash = "sha256:a83e0850cb8f5ac975291ebfc4170ba481f41a28065277f7f735c202cd8e0af3", size = 5304057, upload-time = "2026-01-02T09:10:46.627Z" },
+    { url = "https://files.pythonhosted.org/packages/43/06/7264c0597e676104cc22ca73ee48f752767cd4b1fe084662620b17e10120/pillow-12.1.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:b6e53e82ec2db0717eabb276aa56cf4e500c9a7cec2c2e189b55c24f65a3e8c0", size = 4657811, upload-time = "2026-01-02T09:10:49.548Z" },
+    { url = "https://files.pythonhosted.org/packages/72/64/f9189e44474610daf83da31145fa56710b627b5c4c0b9c235e34058f6b31/pillow-12.1.0-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:40a8e3b9e8773876d6e30daed22f016509e3987bab61b3b7fe309d7019a87451", size = 6232243, upload-time = "2026-01-02T09:10:51.62Z" },
+    { url = "https://files.pythonhosted.org/packages/ef/30/0df458009be6a4caca4ca2c52975e6275c387d4e5c95544e34138b41dc86/pillow-12.1.0-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:800429ac32c9b72909c671aaf17ecd13110f823ddb7db4dfef412a5587c2c24e", size = 8037872, upload-time = "2026-01-02T09:10:53.446Z" },
+    { url = "https://files.pythonhosted.org/packages/e4/86/95845d4eda4f4f9557e25381d70876aa213560243ac1a6d619c46caaedd9/pillow-12.1.0-cp311-cp311-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:0b022eaaf709541b391ee069f0022ee5b36c709df71986e3f7be312e46f42c84", size = 6345398, upload-time = "2026-01-02T09:10:55.426Z" },
+    { url = "https://files.pythonhosted.org/packages/5c/1f/8e66ab9be3aaf1435bc03edd1ebdf58ffcd17f7349c1d970cafe87af27d9/pillow-12.1.0-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:1f345e7bc9d7f368887c712aa5054558bad44d2a301ddf9248599f4161abc7c0", size = 7034667, upload-time = "2026-01-02T09:10:57.11Z" },
+    { url = "https://files.pythonhosted.org/packages/f9/f6/683b83cb9b1db1fb52b87951b1c0b99bdcfceaa75febf11406c19f82cb5e/pillow-12.1.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:d70347c8a5b7ccd803ec0c85c8709f036e6348f1e6a5bf048ecd9c64d3550b8b", size = 6458743, upload-time = "2026-01-02T09:10:59.331Z" },
+    { url = "https://files.pythonhosted.org/packages/9a/7d/de833d63622538c1d58ce5395e7c6cb7e7dce80decdd8bde4a484e095d9f/pillow-12.1.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:1fcc52d86ce7a34fd17cb04e87cfdb164648a3662a6f20565910a99653d66c18", size = 7159342, upload-time = "2026-01-02T09:11:01.82Z" },
+    { url = "https://files.pythonhosted.org/packages/8c/40/50d86571c9e5868c42b81fe7da0c76ca26373f3b95a8dd675425f4a92ec1/pillow-12.1.0-cp311-cp311-win32.whl", hash = "sha256:3ffaa2f0659e2f740473bcf03c702c39a8d4b2b7ffc629052028764324842c64", size = 6328655, upload-time = "2026-01-02T09:11:04.556Z" },
+    { url = "https://files.pythonhosted.org/packages/6c/af/b1d7e301c4cd26cd45d4af884d9ee9b6fab893b0ad2450d4746d74a6968c/pillow-12.1.0-cp311-cp311-win_amd64.whl", hash = "sha256:806f3987ffe10e867bab0ddad45df1148a2b98221798457fa097ad85d6e8bc75", size = 7031469, upload-time = "2026-01-02T09:11:06.538Z" },
+    { url = "https://files.pythonhosted.org/packages/48/36/d5716586d887fb2a810a4a61518a327a1e21c8b7134c89283af272efe84b/pillow-12.1.0-cp311-cp311-win_arm64.whl", hash = "sha256:9f5fefaca968e700ad1a4a9de98bf0869a94e397fe3524c4c9450c1445252304", size = 2452515, upload-time = "2026-01-02T09:11:08.226Z" },
+    { url = "https://files.pythonhosted.org/packages/20/31/dc53fe21a2f2996e1b7d92bf671cdb157079385183ef7c1ae08b485db510/pillow-12.1.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:a332ac4ccb84b6dde65dbace8431f3af08874bf9770719d32a635c4ef411b18b", size = 5262642, upload-time = "2026-01-02T09:11:10.138Z" },
+    { url = "https://files.pythonhosted.org/packages/ab/c1/10e45ac9cc79419cedf5121b42dcca5a50ad2b601fa080f58c22fb27626e/pillow-12.1.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:907bfa8a9cb790748a9aa4513e37c88c59660da3bcfffbd24a7d9e6abf224551", size = 4657464, upload-time = "2026-01-02T09:11:12.319Z" },
+    { url = "https://files.pythonhosted.org/packages/ad/26/7b82c0ab7ef40ebede7a97c72d473bda5950f609f8e0c77b04af574a0ddb/pillow-12.1.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:efdc140e7b63b8f739d09a99033aa430accce485ff78e6d311973a67b6bf3208", size = 6234878, upload-time = "2026-01-02T09:11:14.096Z" },
+    { url = "https://files.pythonhosted.org/packages/76/25/27abc9792615b5e886ca9411ba6637b675f1b77af3104710ac7353fe5605/pillow-12.1.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:bef9768cab184e7ae6e559c032e95ba8d07b3023c289f79a2bd36e8bf85605a5", size = 8044868, upload-time = "2026-01-02T09:11:15.903Z" },
+    { url = "https://files.pythonhosted.org/packages/0a/ea/f200a4c36d836100e7bc738fc48cd963d3ba6372ebc8298a889e0cfc3359/pillow-12.1.0-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:742aea052cf5ab5034a53c3846165bc3ce88d7c38e954120db0ab867ca242661", size = 6349468, upload-time = "2026-01-02T09:11:17.631Z" },
+    { url = "https://files.pythonhosted.org/packages/11/8f/48d0b77ab2200374c66d344459b8958c86693be99526450e7aee714e03e4/pillow-12.1.0-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a6dfc2af5b082b635af6e08e0d1f9f1c4e04d17d4e2ca0ef96131e85eda6eb17", size = 7041518, upload-time = "2026-01-02T09:11:19.389Z" },
+    { url = "https://files.pythonhosted.org/packages/1d/23/c281182eb986b5d31f0a76d2a2c8cd41722d6fb8ed07521e802f9bba52de/pillow-12.1.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:609e89d9f90b581c8d16358c9087df76024cf058fa693dd3e1e1620823f39670", size = 6462829, upload-time = "2026-01-02T09:11:21.28Z" },
+    { url = "https://files.pythonhosted.org/packages/25/ef/7018273e0faac099d7b00982abdcc39142ae6f3bd9ceb06de09779c4a9d6/pillow-12.1.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:43b4899cfd091a9693a1278c4982f3e50f7fb7cff5153b05174b4afc9593b616", size = 7166756, upload-time = "2026-01-02T09:11:23.559Z" },
+    { url = "https://files.pythonhosted.org/packages/8f/c8/993d4b7ab2e341fe02ceef9576afcf5830cdec640be2ac5bee1820d693d4/pillow-12.1.0-cp312-cp312-win32.whl", hash = "sha256:aa0c9cc0b82b14766a99fbe6084409972266e82f459821cd26997a488a7261a7", size = 6328770, upload-time = "2026-01-02T09:11:25.661Z" },
+    { url = "https://files.pythonhosted.org/packages/a7/87/90b358775a3f02765d87655237229ba64a997b87efa8ccaca7dd3e36e7a7/pillow-12.1.0-cp312-cp312-win_amd64.whl", hash = "sha256:d70534cea9e7966169ad29a903b99fc507e932069a881d0965a1a84bb57f6c6d", size = 7033406, upload-time = "2026-01-02T09:11:27.474Z" },
+    { url = "https://files.pythonhosted.org/packages/5d/cf/881b457eccacac9e5b2ddd97d5071fb6d668307c57cbf4e3b5278e06e536/pillow-12.1.0-cp312-cp312-win_arm64.whl", hash = "sha256:65b80c1ee7e14a87d6a068dd3b0aea268ffcabfe0498d38661b00c5b4b22e74c", size = 2452612, upload-time = "2026-01-02T09:11:29.309Z" },
+    { url = "https://files.pythonhosted.org/packages/dd/c7/2530a4aa28248623e9d7f27316b42e27c32ec410f695929696f2e0e4a778/pillow-12.1.0-cp313-cp313-ios_13_0_arm64_iphoneos.whl", hash = "sha256:7b5dd7cbae20285cdb597b10eb5a2c13aa9de6cde9bb64a3c1317427b1db1ae1", size = 4062543, upload-time = "2026-01-02T09:11:31.566Z" },
+    { url = "https://files.pythonhosted.org/packages/8f/1f/40b8eae823dc1519b87d53c30ed9ef085506b05281d313031755c1705f73/pillow-12.1.0-cp313-cp313-ios_13_0_arm64_iphonesimulator.whl", hash = "sha256:29a4cef9cb672363926f0470afc516dbf7305a14d8c54f7abbb5c199cd8f8179", size = 4138373, upload-time = "2026-01-02T09:11:33.367Z" },
+    { url = "https://files.pythonhosted.org/packages/d4/77/6fa60634cf06e52139fd0e89e5bbf055e8166c691c42fb162818b7fda31d/pillow-12.1.0-cp313-cp313-ios_13_0_x86_64_iphonesimulator.whl", hash = "sha256:681088909d7e8fa9e31b9799aaa59ba5234c58e5e4f1951b4c4d1082a2e980e0", size = 3601241, upload-time = "2026-01-02T09:11:35.011Z" },
+    { url = "https://files.pythonhosted.org/packages/4f/bf/28ab865de622e14b747f0cd7877510848252d950e43002e224fb1c9ababf/pillow-12.1.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:983976c2ab753166dc66d36af6e8ec15bb511e4a25856e2227e5f7e00a160587", size = 5262410, upload-time = "2026-01-02T09:11:36.682Z" },
+    { url = "https://files.pythonhosted.org/packages/1c/34/583420a1b55e715937a85bd48c5c0991598247a1fd2eb5423188e765ea02/pillow-12.1.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:db44d5c160a90df2d24a24760bbd37607d53da0b34fb546c4c232af7192298ac", size = 4657312, upload-time = "2026-01-02T09:11:38.535Z" },
+    { url = "https://files.pythonhosted.org/packages/1d/fd/f5a0896839762885b3376ff04878f86ab2b097c2f9a9cdccf4eda8ba8dc0/pillow-12.1.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:6b7a9d1db5dad90e2991645874f708e87d9a3c370c243c2d7684d28f7e133e6b", size = 6232605, upload-time = "2026-01-02T09:11:40.602Z" },
+    { url = "https://files.pythonhosted.org/packages/98/aa/938a09d127ac1e70e6ed467bd03834350b33ef646b31edb7452d5de43792/pillow-12.1.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:6258f3260986990ba2fa8a874f8b6e808cf5abb51a94015ca3dc3c68aa4f30ea", size = 8041617, upload-time = "2026-01-02T09:11:42.721Z" },
+    { url = "https://files.pythonhosted.org/packages/17/e8/538b24cb426ac0186e03f80f78bc8dc7246c667f58b540bdd57c71c9f79d/pillow-12.1.0-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:e115c15e3bc727b1ca3e641a909f77f8ca72a64fff150f666fcc85e57701c26c", size = 6346509, upload-time = "2026-01-02T09:11:44.955Z" },
+    { url = "https://files.pythonhosted.org/packages/01/9a/632e58ec89a32738cabfd9ec418f0e9898a2b4719afc581f07c04a05e3c9/pillow-12.1.0-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:6741e6f3074a35e47c77b23a4e4f2d90db3ed905cb1c5e6e0d49bff2045632bc", size = 7038117, upload-time = "2026-01-02T09:11:46.736Z" },
+    { url = "https://files.pythonhosted.org/packages/c7/a2/d40308cf86eada842ca1f3ffa45d0ca0df7e4ab33c83f81e73f5eaed136d/pillow-12.1.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:935b9d1aed48fcfb3f838caac506f38e29621b44ccc4f8a64d575cb1b2a88644", size = 6460151, upload-time = "2026-01-02T09:11:48.625Z" },
+    { url = "https://files.pythonhosted.org/packages/f1/88/f5b058ad6453a085c5266660a1417bdad590199da1b32fb4efcff9d33b05/pillow-12.1.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:5fee4c04aad8932da9f8f710af2c1a15a83582cfb884152a9caa79d4efcdbf9c", size = 7164534, upload-time = "2026-01-02T09:11:50.445Z" },
+    { url = "https://files.pythonhosted.org/packages/19/ce/c17334caea1db789163b5d855a5735e47995b0b5dc8745e9a3605d5f24c0/pillow-12.1.0-cp313-cp313-win32.whl", hash = "sha256:a786bf667724d84aa29b5db1c61b7bfdde380202aaca12c3461afd6b71743171", size = 6332551, upload-time = "2026-01-02T09:11:52.234Z" },
+    { url = "https://files.pythonhosted.org/packages/e5/07/74a9d941fa45c90a0d9465098fe1ec85de3e2afbdc15cc4766622d516056/pillow-12.1.0-cp313-cp313-win_amd64.whl", hash = "sha256:461f9dfdafa394c59cd6d818bdfdbab4028b83b02caadaff0ffd433faf4c9a7a", size = 7040087, upload-time = "2026-01-02T09:11:54.822Z" },
+    { url = "https://files.pythonhosted.org/packages/88/09/c99950c075a0e9053d8e880595926302575bc742b1b47fe1bbcc8d388d50/pillow-12.1.0-cp313-cp313-win_arm64.whl", hash = "sha256:9212d6b86917a2300669511ed094a9406888362e085f2431a7da985a6b124f45", size = 2452470, upload-time = "2026-01-02T09:11:56.522Z" },
+    { url = "https://files.pythonhosted.org/packages/b5/ba/970b7d85ba01f348dee4d65412476321d40ee04dcb51cd3735b9dc94eb58/pillow-12.1.0-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:00162e9ca6d22b7c3ee8e61faa3c3253cd19b6a37f126cad04f2f88b306f557d", size = 5264816, upload-time = "2026-01-02T09:11:58.227Z" },
+    { url = "https://files.pythonhosted.org/packages/10/60/650f2fb55fdba7a510d836202aa52f0baac633e50ab1cf18415d332188fb/pillow-12.1.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:7d6daa89a00b58c37cb1747ec9fb7ac3bc5ffd5949f5888657dfddde6d1312e0", size = 4660472, upload-time = "2026-01-02T09:12:00.798Z" },
+    { url = "https://files.pythonhosted.org/packages/2b/c0/5273a99478956a099d533c4f46cbaa19fd69d606624f4334b85e50987a08/pillow-12.1.0-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:e2479c7f02f9d505682dc47df8c0ea1fc5e264c4d1629a5d63fe3e2334b89554", size = 6268974, upload-time = "2026-01-02T09:12:02.572Z" },
+    { url = "https://files.pythonhosted.org/packages/b4/26/0bf714bc2e73d5267887d47931d53c4ceeceea6978148ed2ab2a4e6463c4/pillow-12.1.0-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:f188d580bd870cda1e15183790d1cc2fa78f666e76077d103edf048eed9c356e", size = 8073070, upload-time = "2026-01-02T09:12:04.75Z" },
+    { url = "https://files.pythonhosted.org/packages/43/cf/1ea826200de111a9d65724c54f927f3111dc5ae297f294b370a670c17786/pillow-12.1.0-cp313-cp313t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:0fde7ec5538ab5095cc02df38ee99b0443ff0e1c847a045554cf5f9af1f4aa82", size = 6380176, upload-time = "2026-01-02T09:12:06.626Z" },
+    { url = "https://files.pythonhosted.org/packages/03/e0/7938dd2b2013373fd85d96e0f38d62b7a5a262af21ac274250c7ca7847c9/pillow-12.1.0-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0ed07dca4a8464bada6139ab38f5382f83e5f111698caf3191cb8dbf27d908b4", size = 7067061, upload-time = "2026-01-02T09:12:08.624Z" },
+    { url = "https://files.pythonhosted.org/packages/86/ad/a2aa97d37272a929a98437a8c0ac37b3cf012f4f8721e1bd5154699b2518/pillow-12.1.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:f45bd71d1fa5e5749587613037b172e0b3b23159d1c00ef2fc920da6f470e6f0", size = 6491824, upload-time = "2026-01-02T09:12:10.488Z" },
+    { url = "https://files.pythonhosted.org/packages/a4/44/80e46611b288d51b115826f136fb3465653c28f491068a72d3da49b54cd4/pillow-12.1.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:277518bf4fe74aa91489e1b20577473b19ee70fb97c374aa50830b279f25841b", size = 7190911, upload-time = "2026-01-02T09:12:12.772Z" },
+    { url = "https://files.pythonhosted.org/packages/86/77/eacc62356b4cf81abe99ff9dbc7402750044aed02cfd6a503f7c6fc11f3e/pillow-12.1.0-cp313-cp313t-win32.whl", hash = "sha256:7315f9137087c4e0ee73a761b163fc9aa3b19f5f606a7fc08d83fd3e4379af65", size = 6336445, upload-time = "2026-01-02T09:12:14.775Z" },
+    { url = "https://files.pythonhosted.org/packages/e7/3c/57d81d0b74d218706dafccb87a87ea44262c43eef98eb3b164fd000e0491/pillow-12.1.0-cp313-cp313t-win_amd64.whl", hash = "sha256:0ddedfaa8b5f0b4ffbc2fa87b556dc59f6bb4ecb14a53b33f9189713ae8053c0", size = 7045354, upload-time = "2026-01-02T09:12:16.599Z" },
+    { url = "https://files.pythonhosted.org/packages/ac/82/8b9b97bba2e3576a340f93b044a3a3a09841170ab4c1eb0d5c93469fd32f/pillow-12.1.0-cp313-cp313t-win_arm64.whl", hash = "sha256:80941e6d573197a0c28f394753de529bb436b1ca990ed6e765cf42426abc39f8", size = 2454547, upload-time = "2026-01-02T09:12:18.704Z" },
+    { url = "https://files.pythonhosted.org/packages/8c/87/bdf971d8bbcf80a348cc3bacfcb239f5882100fe80534b0ce67a784181d8/pillow-12.1.0-cp314-cp314-ios_13_0_arm64_iphoneos.whl", hash = "sha256:5cb7bc1966d031aec37ddb9dcf15c2da5b2e9f7cc3ca7c54473a20a927e1eb91", size = 4062533, upload-time = "2026-01-02T09:12:20.791Z" },
+    { url = "https://files.pythonhosted.org/packages/ff/4f/5eb37a681c68d605eb7034c004875c81f86ec9ef51f5be4a63eadd58859a/pillow-12.1.0-cp314-cp314-ios_13_0_arm64_iphonesimulator.whl", hash = "sha256:97e9993d5ed946aba26baf9c1e8cf18adbab584b99f452ee72f7ee8acb882796", size = 4138546, upload-time = "2026-01-02T09:12:23.664Z" },
+    { url = "https://files.pythonhosted.org/packages/11/6d/19a95acb2edbace40dcd582d077b991646b7083c41b98da4ed7555b59733/pillow-12.1.0-cp314-cp314-ios_13_0_x86_64_iphonesimulator.whl", hash = "sha256:414b9a78e14ffeb98128863314e62c3f24b8a86081066625700b7985b3f529bd", size = 3601163, upload-time = "2026-01-02T09:12:26.338Z" },
+    { url = "https://files.pythonhosted.org/packages/fc/36/2b8138e51cb42e4cc39c3297713455548be855a50558c3ac2beebdc251dd/pillow-12.1.0-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:e6bdb408f7c9dd2a5ff2b14a3b0bb6d4deb29fb9961e6eb3ae2031ae9a5cec13", size = 5266086, upload-time = "2026-01-02T09:12:28.782Z" },
+    { url = "https://files.pythonhosted.org/packages/53/4b/649056e4d22e1caa90816bf99cef0884aed607ed38075bd75f091a607a38/pillow-12.1.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:3413c2ae377550f5487991d444428f1a8ae92784aac79caa8b1e3b89b175f77e", size = 4657344, upload-time = "2026-01-02T09:12:31.117Z" },
+    { url = "https://files.pythonhosted.org/packages/6c/6b/c5742cea0f1ade0cd61485dc3d81f05261fc2276f537fbdc00802de56779/pillow-12.1.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:e5dcbe95016e88437ecf33544ba5db21ef1b8dd6e1b434a2cb2a3d605299e643", size = 6232114, upload-time = "2026-01-02T09:12:32.936Z" },
+    { url = "https://files.pythonhosted.org/packages/bf/8f/9f521268ce22d63991601aafd3d48d5ff7280a246a1ef62d626d67b44064/pillow-12.1.0-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:d0a7735df32ccbcc98b98a1ac785cc4b19b580be1bdf0aeb5c03223220ea09d5", size = 8042708, upload-time = "2026-01-02T09:12:34.78Z" },
+    { url = "https://files.pythonhosted.org/packages/1a/eb/257f38542893f021502a1bbe0c2e883c90b5cff26cc33b1584a841a06d30/pillow-12.1.0-cp314-cp314-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:0c27407a2d1b96774cbc4a7594129cc027339fd800cd081e44497722ea1179de", size = 6347762, upload-time = "2026-01-02T09:12:36.748Z" },
+    { url = "https://files.pythonhosted.org/packages/c4/5a/8ba375025701c09b309e8d5163c5a4ce0102fa86bbf8800eb0d7ac87bc51/pillow-12.1.0-cp314-cp314-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:15c794d74303828eaa957ff8070846d0efe8c630901a1c753fdc63850e19ecd9", size = 7039265, upload-time = "2026-01-02T09:12:39.082Z" },
+    { url = "https://files.pythonhosted.org/packages/cf/dc/cf5e4cdb3db533f539e88a7bbf9f190c64ab8a08a9bc7a4ccf55067872e4/pillow-12.1.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:c990547452ee2800d8506c4150280757f88532f3de2a58e3022e9b179107862a", size = 6462341, upload-time = "2026-01-02T09:12:40.946Z" },
+    { url = "https://files.pythonhosted.org/packages/d0/47/0291a25ac9550677e22eda48510cfc4fa4b2ef0396448b7fbdc0a6946309/pillow-12.1.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:b63e13dd27da389ed9475b3d28510f0f954bca0041e8e551b2a4eb1eab56a39a", size = 7165395, upload-time = "2026-01-02T09:12:42.706Z" },
+    { url = "https://files.pythonhosted.org/packages/4f/4c/e005a59393ec4d9416be06e6b45820403bb946a778e39ecec62f5b2b991e/pillow-12.1.0-cp314-cp314-win32.whl", hash = "sha256:1a949604f73eb07a8adab38c4fe50791f9919344398bdc8ac6b307f755fc7030", size = 6431413, upload-time = "2026-01-02T09:12:44.944Z" },
+    { url = "https://files.pythonhosted.org/packages/1c/af/f23697f587ac5f9095d67e31b81c95c0249cd461a9798a061ed6709b09b5/pillow-12.1.0-cp314-cp314-win_amd64.whl", hash = "sha256:4f9f6a650743f0ddee5593ac9e954ba1bdbc5e150bc066586d4f26127853ab94", size = 7176779, upload-time = "2026-01-02T09:12:46.727Z" },
+    { url = "https://files.pythonhosted.org/packages/b3/36/6a51abf8599232f3e9afbd16d52829376a68909fe14efe29084445db4b73/pillow-12.1.0-cp314-cp314-win_arm64.whl", hash = "sha256:808b99604f7873c800c4840f55ff389936ef1948e4e87645eaf3fccbc8477ac4", size = 2543105, upload-time = "2026-01-02T09:12:49.243Z" },
+    { url = "https://files.pythonhosted.org/packages/82/54/2e1dd20c8749ff225080d6ba465a0cab4387f5db0d1c5fb1439e2d99923f/pillow-12.1.0-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:bc11908616c8a283cf7d664f77411a5ed2a02009b0097ff8abbba5e79128ccf2", size = 5268571, upload-time = "2026-01-02T09:12:51.11Z" },
+    { url = "https://files.pythonhosted.org/packages/57/61/571163a5ef86ec0cf30d265ac2a70ae6fc9e28413d1dc94fa37fae6bda89/pillow-12.1.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:896866d2d436563fa2a43a9d72f417874f16b5545955c54a64941e87c1376c61", size = 4660426, upload-time = "2026-01-02T09:12:52.865Z" },
+    { url = "https://files.pythonhosted.org/packages/5e/e1/53ee5163f794aef1bf84243f755ee6897a92c708505350dd1923f4afec48/pillow-12.1.0-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:8e178e3e99d3c0ea8fc64b88447f7cac8ccf058af422a6cedc690d0eadd98c51", size = 6269908, upload-time = "2026-01-02T09:12:54.884Z" },
+    { url = "https://files.pythonhosted.org/packages/bc/0b/b4b4106ff0ee1afa1dc599fde6ab230417f800279745124f6c50bcffed8e/pillow-12.1.0-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:079af2fb0c599c2ec144ba2c02766d1b55498e373b3ac64687e43849fbbef5bc", size = 8074733, upload-time = "2026-01-02T09:12:56.802Z" },
+    { url = "https://files.pythonhosted.org/packages/19/9f/80b411cbac4a732439e629a26ad3ef11907a8c7fc5377b7602f04f6fe4e7/pillow-12.1.0-cp314-cp314t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:bdec5e43377761c5dbca620efb69a77f6855c5a379e32ac5b158f54c84212b14", size = 6381431, upload-time = "2026-01-02T09:12:58.823Z" },
+    { url = "https://files.pythonhosted.org/packages/8f/b7/d65c45db463b66ecb6abc17c6ba6917a911202a07662247e1355ce1789e7/pillow-12.1.0-cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:565c986f4b45c020f5421a4cea13ef294dde9509a8577f29b2fc5edc7587fff8", size = 7068529, upload-time = "2026-01-02T09:13:00.885Z" },
+    { url = "https://files.pythonhosted.org/packages/50/96/dfd4cd726b4a45ae6e3c669fc9e49deb2241312605d33aba50499e9d9bd1/pillow-12.1.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:43aca0a55ce1eefc0aefa6253661cb54571857b1a7b2964bd8a1e3ef4b729924", size = 6492981, upload-time = "2026-01-02T09:13:03.314Z" },
+    { url = "https://files.pythonhosted.org/packages/4d/1c/b5dc52cf713ae46033359c5ca920444f18a6359ce1020dd3e9c553ea5bc6/pillow-12.1.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:0deedf2ea233722476b3a81e8cdfbad786f7adbed5d848469fa59fe52396e4ef", size = 7191878, upload-time = "2026-01-02T09:13:05.276Z" },
+    { url = "https://files.pythonhosted.org/packages/53/26/c4188248bd5edaf543864fe4834aebe9c9cb4968b6f573ce014cc42d0720/pillow-12.1.0-cp314-cp314t-win32.whl", hash = "sha256:b17fbdbe01c196e7e159aacb889e091f28e61020a8abeac07b68079b6e626988", size = 6438703, upload-time = "2026-01-02T09:13:07.491Z" },
+    { url = "https://files.pythonhosted.org/packages/b8/0e/69ed296de8ea05cb03ee139cee600f424ca166e632567b2d66727f08c7ed/pillow-12.1.0-cp314-cp314t-win_amd64.whl", hash = "sha256:27b9baecb428899db6c0de572d6d305cfaf38ca1596b5c0542a5182e3e74e8c6", size = 7182927, upload-time = "2026-01-02T09:13:09.841Z" },
+    { url = "https://files.pythonhosted.org/packages/fc/f5/68334c015eed9b5cff77814258717dec591ded209ab5b6fb70e2ae873d1d/pillow-12.1.0-cp314-cp314t-win_arm64.whl", hash = "sha256:f61333d817698bdcdd0f9d7793e365ac3d2a21c1f1eb02b32ad6aefb8d8ea831", size = 2545104, upload-time = "2026-01-02T09:13:12.068Z" },
+    { url = "https://files.pythonhosted.org/packages/8b/bc/224b1d98cffd7164b14707c91aac83c07b047fbd8f58eba4066a3e53746a/pillow-12.1.0-pp311-pypy311_pp73-macosx_10_15_x86_64.whl", hash = "sha256:ca94b6aac0d7af2a10ba08c0f888b3d5114439b6b3ef39968378723622fed377", size = 5228605, upload-time = "2026-01-02T09:13:14.084Z" },
+    { url = "https://files.pythonhosted.org/packages/0c/ca/49ca7769c4550107de049ed85208240ba0f330b3f2e316f24534795702ce/pillow-12.1.0-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:351889afef0f485b84078ea40fe33727a0492b9af3904661b0abbafee0355b72", size = 4622245, upload-time = "2026-01-02T09:13:15.964Z" },
+    { url = "https://files.pythonhosted.org/packages/73/48/fac807ce82e5955bcc2718642b94b1bd22a82a6d452aea31cbb678cddf12/pillow-12.1.0-pp311-pypy311_pp73-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:bb0984b30e973f7e2884362b7d23d0a348c7143ee559f38ef3eaab640144204c", size = 5247593, upload-time = "2026-01-02T09:13:17.913Z" },
+    { url = "https://files.pythonhosted.org/packages/d2/95/3e0742fe358c4664aed4fd05d5f5373dcdad0b27af52aa0972568541e3f4/pillow-12.1.0-pp311-pypy311_pp73-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:84cabc7095dd535ca934d57e9ce2a72ffd216e435a84acb06b2277b1de2689bd", size = 6989008, upload-time = "2026-01-02T09:13:20.083Z" },
+    { url = "https://files.pythonhosted.org/packages/5a/74/fe2ac378e4e202e56d50540d92e1ef4ff34ed687f3c60f6a121bcf99437e/pillow-12.1.0-pp311-pypy311_pp73-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:53d8b764726d3af1a138dd353116f774e3862ec7e3794e0c8781e30db0f35dfc", size = 5313824, upload-time = "2026-01-02T09:13:22.405Z" },
+    { url = "https://files.pythonhosted.org/packages/f3/77/2a60dee1adee4e2655ac328dd05c02a955c1cd683b9f1b82ec3feb44727c/pillow-12.1.0-pp311-pypy311_pp73-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:5da841d81b1a05ef940a8567da92decaa15bc4d7dedb540a8c219ad83d91808a", size = 5963278, upload-time = "2026-01-02T09:13:24.706Z" },
+    { url = "https://files.pythonhosted.org/packages/2d/71/64e9b1c7f04ae0027f788a248e6297d7fcc29571371fe7d45495a78172c0/pillow-12.1.0-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:75af0b4c229ac519b155028fa1be632d812a519abba9b46b20e50c6caa184f19", size = 7029809, upload-time = "2026-01-02T09:13:26.541Z" },
+]
+
+[[package]]
+name = "platformdirs"
+version = "4.5.1"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/cf/86/0248f086a84f01b37aaec0fa567b397df1a119f73c16f6c7a9aac73ea309/platformdirs-4.5.1.tar.gz", hash = "sha256:61d5cdcc6065745cdd94f0f878977f8de9437be93de97c1c12f853c9c0cdcbda", size = 21715, upload-time = "2025-12-05T13:52:58.638Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/cb/28/3bfe2fa5a7b9c46fe7e13c97bda14c895fb10fa2ebf1d0abb90e0cea7ee1/platformdirs-4.5.1-py3-none-any.whl", hash = "sha256:d03afa3963c806a9bed9d5125c8f4cb2fdaf74a55ab60e5d59b3fde758104d31", size = 18731, upload-time = "2025-12-05T13:52:56.823Z" },
+]
+
+[[package]]
+name = "pluggy"
+version = "1.6.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/f9/e2/3e91f31a7d2b083fe6ef3fa267035b518369d9511ffab804f839851d2779/pluggy-1.6.0.tar.gz", hash = "sha256:7dcc130b76258d33b90f61b658791dede3486c3e6bfb003ee5c9bfb396dd22f3", size = 69412, upload-time = "2025-05-15T12:30:07.975Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/54/20/4d324d65cc6d9205fabedc306948156824eb9f0ee1633355a8f7ec5c66bf/pluggy-1.6.0-py3-none-any.whl", hash = "sha256:e920276dd6813095e9377c0bc5566d94c932c33b27a3e3945d8389c374dd4746", size = 20538, upload-time = "2025-05-15T12:30:06.134Z" },
+]
+
+[[package]]
+name = "prometheus-client"
+version = "0.24.1"
 source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/d6/54/cfe61301667036ec958cb99bd3efefba235e65cdeb9c84d24a8293ba1d90/mdurl-0.1.2.tar.gz", hash = "sha256:bb413d29f5eea38f31dd4754dd7377d4465116fb207585f97bf925588687c1ba", size = 8729, upload-time = "2022-08-14T12:40:10.846Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/f0/58/a794d23feb6b00fc0c72787d7e87d872a6730dd9ed7c7b3e954637d8f280/prometheus_client-0.24.1.tar.gz", hash = "sha256:7e0ced7fbbd40f7b84962d5d2ab6f17ef88a72504dcf7c0b40737b43b2a461f9", size = 85616, upload-time = "2026-01-14T15:26:26.965Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/b3/38/89ba8ad64ae25be8de66a6d463314cf1eb366222074cfda9ee839c56a4b4/mdurl-0.1.2-py3-none-any.whl", hash = "sha256:84008a41e51615a49fc9966191ff91509e3c40b939176e643fd50a5c2196b8f8", size = 9979, upload-time = "2022-08-14T12:40:09.779Z" },
+    { url = "https://files.pythonhosted.org/packages/74/c3/24a2f845e3917201628ecaba4f18bab4d18a337834c1df2a159ee9d22a42/prometheus_client-0.24.1-py3-none-any.whl", hash = "sha256:150db128af71a5c2482b36e588fc8a6b95e498750da4b17065947c16070f4055", size = 64057, upload-time = "2026-01-14T15:26:24.42Z" },
 ]
 
 [[package]]
-name = "mypy"
-version = "1.19.1"
+name = "py-key-value-aio"
+version = "0.3.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "librt", marker = "platform_python_implementation != 'PyPy'" },
-    { name = "mypy-extensions" },
-    { name = "pathspec" },
+    { name = "beartype" },
+    { name = "py-key-value-shared" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/93/ce/3136b771dddf5ac905cc193b461eb67967cf3979688c6696e1f2cdcde7ea/py_key_value_aio-0.3.0.tar.gz", hash = "sha256:858e852fcf6d696d231266da66042d3355a7f9871650415feef9fca7a6cd4155", size = 50801, upload-time = "2025-11-17T16:50:04.711Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/99/10/72f6f213b8f0bce36eff21fda0a13271834e9eeff7f9609b01afdc253c79/py_key_value_aio-0.3.0-py3-none-any.whl", hash = "sha256:1c781915766078bfd608daa769fefb97e65d1d73746a3dfb640460e322071b64", size = 96342, upload-time = "2025-11-17T16:50:03.801Z" },
+]
+
+[package.optional-dependencies]
+disk = [
+    { name = "diskcache" },
+    { name = "pathvalidate" },
+]
+keyring = [
+    { name = "keyring" },
+]
+memory = [
+    { name = "cachetools" },
+]
+redis = [
+    { name = "redis" },
+]
+
+[[package]]
+name = "py-key-value-shared"
+version = "0.3.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "beartype" },
     { name = "typing-extensions" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/f5/db/4efed9504bc01309ab9c2da7e352cc223569f05478012b5d9ece38fd44d2/mypy-1.19.1.tar.gz", hash = "sha256:19d88bb05303fe63f71dd2c6270daca27cb9401c4ca8255fe50d1d920e0eb9ba", size = 3582404, upload-time = "2025-12-15T05:03:48.42Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/7b/e4/1971dfc4620a3a15b4579fe99e024f5edd6e0967a71154771a059daff4db/py_key_value_shared-0.3.0.tar.gz", hash = "sha256:8fdd786cf96c3e900102945f92aa1473138ebe960ef49da1c833790160c28a4b", size = 11666, upload-time = "2025-11-17T16:50:06.849Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/ef/47/6b3ebabd5474d9cdc170d1342fbf9dddc1b0ec13ec90bf9004ee6f391c31/mypy-1.19.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:d8dfc6ab58ca7dda47d9237349157500468e404b17213d44fc1cb77bce532288", size = 13028539, upload-time = "2025-12-15T05:03:44.129Z" },
-    { url = "https://files.pythonhosted.org/packages/5c/a6/ac7c7a88a3c9c54334f53a941b765e6ec6c4ebd65d3fe8cdcfbe0d0fd7db/mypy-1.19.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:e3f276d8493c3c97930e354b2595a44a21348b320d859fb4a2b9f66da9ed27ab", size = 12083163, upload-time = "2025-12-15T05:03:37.679Z" },
-    { url = "https://files.pythonhosted.org/packages/67/af/3afa9cf880aa4a2c803798ac24f1d11ef72a0c8079689fac5cfd815e2830/mypy-1.19.1-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:2abb24cf3f17864770d18d673c85235ba52456b36a06b6afc1e07c1fdcd3d0e6", size = 12687629, upload-time = "2025-12-15T05:02:31.526Z" },
-    { url = "https://files.pythonhosted.org/packages/2d/46/20f8a7114a56484ab268b0ab372461cb3a8f7deed31ea96b83a4e4cfcfca/mypy-1.19.1-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a009ffa5a621762d0c926a078c2d639104becab69e79538a494bcccb62cc0331", size = 13436933, upload-time = "2025-12-15T05:03:15.606Z" },
-    { url = "https://files.pythonhosted.org/packages/5b/f8/33b291ea85050a21f15da910002460f1f445f8007adb29230f0adea279cb/mypy-1.19.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:f7cee03c9a2e2ee26ec07479f38ea9c884e301d42c6d43a19d20fb014e3ba925", size = 13661754, upload-time = "2025-12-15T05:02:26.731Z" },
-    { url = "https://files.pythonhosted.org/packages/fd/a3/47cbd4e85bec4335a9cd80cf67dbc02be21b5d4c9c23ad6b95d6c5196bac/mypy-1.19.1-cp311-cp311-win_amd64.whl", hash = "sha256:4b84a7a18f41e167f7995200a1d07a4a6810e89d29859df936f1c3923d263042", size = 10055772, upload-time = "2025-12-15T05:03:26.179Z" },
-    { url = "https://files.pythonhosted.org/packages/06/8a/19bfae96f6615aa8a0604915512e0289b1fad33d5909bf7244f02935d33a/mypy-1.19.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:a8174a03289288c1f6c46d55cef02379b478bfbc8e358e02047487cad44c6ca1", size = 13206053, upload-time = "2025-12-15T05:03:46.622Z" },
-    { url = "https://files.pythonhosted.org/packages/a5/34/3e63879ab041602154ba2a9f99817bb0c85c4df19a23a1443c8986e4d565/mypy-1.19.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:ffcebe56eb09ff0c0885e750036a095e23793ba6c2e894e7e63f6d89ad51f22e", size = 12219134, upload-time = "2025-12-15T05:03:24.367Z" },
-    { url = "https://files.pythonhosted.org/packages/89/cc/2db6f0e95366b630364e09845672dbee0cbf0bbe753a204b29a944967cd9/mypy-1.19.1-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b64d987153888790bcdb03a6473d321820597ab8dd9243b27a92153c4fa50fd2", size = 12731616, upload-time = "2025-12-15T05:02:44.725Z" },
-    { url = "https://files.pythonhosted.org/packages/00/be/dd56c1fd4807bc1eba1cf18b2a850d0de7bacb55e158755eb79f77c41f8e/mypy-1.19.1-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c35d298c2c4bba75feb2195655dfea8124d855dfd7343bf8b8c055421eaf0cf8", size = 13620847, upload-time = "2025-12-15T05:03:39.633Z" },
-    { url = "https://files.pythonhosted.org/packages/6d/42/332951aae42b79329f743bf1da088cd75d8d4d9acc18fbcbd84f26c1af4e/mypy-1.19.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:34c81968774648ab5ac09c29a375fdede03ba253f8f8287847bd480782f73a6a", size = 13834976, upload-time = "2025-12-15T05:03:08.786Z" },
-    { url = "https://files.pythonhosted.org/packages/6f/63/e7493e5f90e1e085c562bb06e2eb32cae27c5057b9653348d38b47daaecc/mypy-1.19.1-cp312-cp312-win_amd64.whl", hash = "sha256:b10e7c2cd7870ba4ad9b2d8a6102eb5ffc1f16ca35e3de6bfa390c1113029d13", size = 10118104, upload-time = "2025-12-15T05:03:10.834Z" },
-    { url = "https://files.pythonhosted.org/packages/de/9f/a6abae693f7a0c697dbb435aac52e958dc8da44e92e08ba88d2e42326176/mypy-1.19.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:e3157c7594ff2ef1634ee058aafc56a82db665c9438fd41b390f3bde1ab12250", size = 13201927, upload-time = "2025-12-15T05:02:29.138Z" },
-    { url = "https://files.pythonhosted.org/packages/9a/a4/45c35ccf6e1c65afc23a069f50e2c66f46bd3798cbe0d680c12d12935caa/mypy-1.19.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:bdb12f69bcc02700c2b47e070238f42cb87f18c0bc1fc4cdb4fb2bc5fd7a3b8b", size = 12206730, upload-time = "2025-12-15T05:03:01.325Z" },
-    { url = "https://files.pythonhosted.org/packages/05/bb/cdcf89678e26b187650512620eec8368fded4cfd99cfcb431e4cdfd19dec/mypy-1.19.1-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:f859fb09d9583a985be9a493d5cfc5515b56b08f7447759a0c5deaf68d80506e", size = 12724581, upload-time = "2025-12-15T05:03:20.087Z" },
-    { url = "https://files.pythonhosted.org/packages/d1/32/dd260d52babf67bad8e6770f8e1102021877ce0edea106e72df5626bb0ec/mypy-1.19.1-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c9a6538e0415310aad77cb94004ca6482330fece18036b5f360b62c45814c4ef", size = 13616252, upload-time = "2025-12-15T05:02:49.036Z" },
-    { url = "https://files.pythonhosted.org/packages/71/d0/5e60a9d2e3bd48432ae2b454b7ef2b62a960ab51292b1eda2a95edd78198/mypy-1.19.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:da4869fc5e7f62a88f3fe0b5c919d1d9f7ea3cef92d3689de2823fd27e40aa75", size = 13840848, upload-time = "2025-12-15T05:02:55.95Z" },
-    { url = "https://files.pythonhosted.org/packages/98/76/d32051fa65ecf6cc8c6610956473abdc9b4c43301107476ac03559507843/mypy-1.19.1-cp313-cp313-win_amd64.whl", hash = "sha256:016f2246209095e8eda7538944daa1d60e1e8134d98983b9fc1e92c1fc0cb8dd", size = 10135510, upload-time = "2025-12-15T05:02:58.438Z" },
-    { url = "https://files.pythonhosted.org/packages/de/eb/b83e75f4c820c4247a58580ef86fcd35165028f191e7e1ba57128c52782d/mypy-1.19.1-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:06e6170bd5836770e8104c8fdd58e5e725cfeb309f0a6c681a811f557e97eac1", size = 13199744, upload-time = "2025-12-15T05:03:30.823Z" },
-    { url = "https://files.pythonhosted.org/packages/94/28/52785ab7bfa165f87fcbb61547a93f98bb20e7f82f90f165a1f69bce7b3d/mypy-1.19.1-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:804bd67b8054a85447c8954215a906d6eff9cabeabe493fb6334b24f4bfff718", size = 12215815, upload-time = "2025-12-15T05:02:42.323Z" },
-    { url = "https://files.pythonhosted.org/packages/0a/c6/bdd60774a0dbfb05122e3e925f2e9e846c009e479dcec4821dad881f5b52/mypy-1.19.1-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:21761006a7f497cb0d4de3d8ef4ca70532256688b0523eee02baf9eec895e27b", size = 12740047, upload-time = "2025-12-15T05:03:33.168Z" },
-    { url = "https://files.pythonhosted.org/packages/32/2a/66ba933fe6c76bd40d1fe916a83f04fed253152f451a877520b3c4a5e41e/mypy-1.19.1-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:28902ee51f12e0f19e1e16fbe2f8f06b6637f482c459dd393efddd0ec7f82045", size = 13601998, upload-time = "2025-12-15T05:03:13.056Z" },
-    { url = "https://files.pythonhosted.org/packages/e3/da/5055c63e377c5c2418760411fd6a63ee2b96cf95397259038756c042574f/mypy-1.19.1-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:481daf36a4c443332e2ae9c137dfee878fcea781a2e3f895d54bd3002a900957", size = 13807476, upload-time = "2025-12-15T05:03:17.977Z" },
-    { url = "https://files.pythonhosted.org/packages/cd/09/4ebd873390a063176f06b0dbf1f7783dd87bd120eae7727fa4ae4179b685/mypy-1.19.1-cp314-cp314-win_amd64.whl", hash = "sha256:8bb5c6f6d043655e055be9b542aa5f3bdd30e4f3589163e85f93f3640060509f", size = 10281872, upload-time = "2025-12-15T05:03:05.549Z" },
-    { url = "https://files.pythonhosted.org/packages/8d/f4/4ce9a05ce5ded1de3ec1c1d96cf9f9504a04e54ce0ed55cfa38619a32b8d/mypy-1.19.1-py3-none-any.whl", hash = "sha256:f1235f5ea01b7db5468d53ece6aaddf1ad0b88d9e7462b86ef96fe04995d7247", size = 2471239, upload-time = "2025-12-15T05:03:07.248Z" },
+    { url = "https://files.pythonhosted.org/packages/51/e4/b8b0a03ece72f47dce2307d36e1c34725b7223d209fc679315ffe6a4e2c3/py_key_value_shared-0.3.0-py3-none-any.whl", hash = "sha256:5b0efba7ebca08bb158b1e93afc2f07d30b8f40c2fc12ce24a4c0d84f42f9298", size = 19560, upload-time = "2025-11-17T16:50:05.954Z" },
 ]
 
 [[package]]
-name = "mypy-extensions"
-version = "1.1.0"
+name = "pycparser"
+version = "3.0"
 source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/a2/6e/371856a3fb9d31ca8dac321cda606860fa4548858c0cc45d9d1d4ca2628b/mypy_extensions-1.1.0.tar.gz", hash = "sha256:52e68efc3284861e772bbcd66823fde5ae21fd2fdb51c62a211403730b916558", size = 6343, upload-time = "2025-04-22T14:54:24.164Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/1b/7d/92392ff7815c21062bea51aa7b87d45576f649f16458d78b7cf94b9ab2e6/pycparser-3.0.tar.gz", hash = "sha256:600f49d217304a5902ac3c37e1281c9fe94e4d0489de643a9504c5cdfdfc6b29", size = 103492, upload-time = "2026-01-21T14:26:51.89Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/79/7b/2c79738432f5c924bef5071f933bcc9efd0473bac3b4aa584a6f7c1c8df8/mypy_extensions-1.1.0-py3-none-any.whl", hash = "sha256:1be4cccdb0f2482337c4743e60421de3a356cd97508abadd57d47403e94f5505", size = 4963, upload-time = "2025-04-22T14:54:22.983Z" },
+    { url = "https://files.pythonhosted.org/packages/0c/c3/44f3fbbfa403ea2a7c779186dc20772604442dde72947e7d01069cbe98e3/pycparser-3.0-py3-none-any.whl", hash = "sha256:b727414169a36b7d524c1c3e31839a521725078d7b2ff038656844266160a992", size = 48172, upload-time = "2026-01-21T14:26:50.693Z" },
 ]
 
 [[package]]
-name = "packaging"
-version = "25.0"
+name = "pydantic"
+version = "2.12.5"
 source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/a1/d4/1fc4078c65507b51b96ca8f8c3ba19e6a61c8253c72794544580a7b6c24d/packaging-25.0.tar.gz", hash = "sha256:d443872c98d677bf60f6a1f2f8c1cb748e8fe762d2bf9d3148b5599295b0fc4f", size = 165727, upload-time = "2025-04-19T11:48:59.673Z" }
+dependencies = [
+    { name = "annotated-types" },
+    { name = "pydantic-core" },
+    { name = "typing-extensions" },
+    { name = "typing-inspection" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/69/44/36f1a6e523abc58ae5f928898e4aca2e0ea509b5aa6f6f392a5d882be928/pydantic-2.12.5.tar.gz", hash = "sha256:4d351024c75c0f085a9febbb665ce8c0c6ec5d30e903bdb6394b7ede26aebb49", size = 821591, upload-time = "2025-11-26T15:11:46.471Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/20/12/38679034af332785aac8774540895e234f4d07f7545804097de4b666afd8/packaging-25.0-py3-none-any.whl", hash = "sha256:29572ef2b1f17581046b3a2227d5c611fb25ec70ca1ba8554b24b0e69331a484", size = 66469, upload-time = "2025-04-19T11:48:57.875Z" },
+    { url = "https://files.pythonhosted.org/packages/5a/87/b70ad306ebb6f9b585f114d0ac2137d792b48be34d732d60e597c2f8465a/pydantic-2.12.5-py3-none-any.whl", hash = "sha256:e561593fccf61e8a20fc46dfc2dfe075b8be7d0188df33f221ad1f0139180f9d", size = 463580, upload-time = "2025-11-26T15:11:44.605Z" },
+]
+
+[package.optional-dependencies]
+email = [
+    { name = "email-validator" },
 ]
 
 [[package]]
-name = "pathspec"
-version = "1.0.3"
+name = "pydantic-core"
+version = "2.41.5"
 source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/4c/b2/bb8e495d5262bfec41ab5cb18f522f1012933347fb5d9e62452d446baca2/pathspec-1.0.3.tar.gz", hash = "sha256:bac5cf97ae2c2876e2d25ebb15078eb04d76e4b98921ee31c6f85ade8b59444d", size = 130841, upload-time = "2026-01-09T15:46:46.009Z" }
+dependencies = [
+    { name = "typing-extensions" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/71/70/23b021c950c2addd24ec408e9ab05d59b035b39d97cdc1130e1bce647bb6/pydantic_core-2.41.5.tar.gz", hash = "sha256:08daa51ea16ad373ffd5e7606252cc32f07bc72b28284b6bc9c6df804816476e", size = 460952, upload-time = "2025-11-04T13:43:49.098Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/32/2b/121e912bd60eebd623f873fd090de0e84f322972ab25a7f9044c056804ed/pathspec-1.0.3-py3-none-any.whl", hash = "sha256:e80767021c1cc524aa3fb14bedda9c34406591343cc42797b386ce7b9354fb6c", size = 55021, upload-time = "2026-01-09T15:46:44.652Z" },
+    { url = "https://files.pythonhosted.org/packages/e8/72/74a989dd9f2084b3d9530b0915fdda64ac48831c30dbf7c72a41a5232db8/pydantic_core-2.41.5-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:a3a52f6156e73e7ccb0f8cced536adccb7042be67cb45f9562e12b319c119da6", size = 2105873, upload-time = "2025-11-04T13:39:31.373Z" },
+    { url = "https://files.pythonhosted.org/packages/12/44/37e403fd9455708b3b942949e1d7febc02167662bf1a7da5b78ee1ea2842/pydantic_core-2.41.5-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:7f3bf998340c6d4b0c9a2f02d6a400e51f123b59565d74dc60d252ce888c260b", size = 1899826, upload-time = "2025-11-04T13:39:32.897Z" },
+    { url = "https://files.pythonhosted.org/packages/33/7f/1d5cab3ccf44c1935a359d51a8a2a9e1a654b744b5e7f80d41b88d501eec/pydantic_core-2.41.5-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:378bec5c66998815d224c9ca994f1e14c0c21cb95d2f52b6021cc0b2a58f2a5a", size = 1917869, upload-time = "2025-11-04T13:39:34.469Z" },
+    { url = "https://files.pythonhosted.org/packages/6e/6a/30d94a9674a7fe4f4744052ed6c5e083424510be1e93da5bc47569d11810/pydantic_core-2.41.5-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:e7b576130c69225432866fe2f4a469a85a54ade141d96fd396dffcf607b558f8", size = 2063890, upload-time = "2025-11-04T13:39:36.053Z" },
+    { url = "https://files.pythonhosted.org/packages/50/be/76e5d46203fcb2750e542f32e6c371ffa9b8ad17364cf94bb0818dbfb50c/pydantic_core-2.41.5-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:6cb58b9c66f7e4179a2d5e0f849c48eff5c1fca560994d6eb6543abf955a149e", size = 2229740, upload-time = "2025-11-04T13:39:37.753Z" },
+    { url = "https://files.pythonhosted.org/packages/d3/ee/fed784df0144793489f87db310a6bbf8118d7b630ed07aa180d6067e653a/pydantic_core-2.41.5-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:88942d3a3dff3afc8288c21e565e476fc278902ae4d6d134f1eeda118cc830b1", size = 2350021, upload-time = "2025-11-04T13:39:40.94Z" },
+    { url = "https://files.pythonhosted.org/packages/c8/be/8fed28dd0a180dca19e72c233cbf58efa36df055e5b9d90d64fd1740b828/pydantic_core-2.41.5-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f31d95a179f8d64d90f6831d71fa93290893a33148d890ba15de25642c5d075b", size = 2066378, upload-time = "2025-11-04T13:39:42.523Z" },
+    { url = "https://files.pythonhosted.org/packages/b0/3b/698cf8ae1d536a010e05121b4958b1257f0b5522085e335360e53a6b1c8b/pydantic_core-2.41.5-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:c1df3d34aced70add6f867a8cf413e299177e0c22660cc767218373d0779487b", size = 2175761, upload-time = "2025-11-04T13:39:44.553Z" },
+    { url = "https://files.pythonhosted.org/packages/b8/ba/15d537423939553116dea94ce02f9c31be0fa9d0b806d427e0308ec17145/pydantic_core-2.41.5-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:4009935984bd36bd2c774e13f9a09563ce8de4abaa7226f5108262fa3e637284", size = 2146303, upload-time = "2025-11-04T13:39:46.238Z" },
+    { url = "https://files.pythonhosted.org/packages/58/7f/0de669bf37d206723795f9c90c82966726a2ab06c336deba4735b55af431/pydantic_core-2.41.5-cp311-cp311-musllinux_1_1_armv7l.whl", hash = "sha256:34a64bc3441dc1213096a20fe27e8e128bd3ff89921706e83c0b1ac971276594", size = 2340355, upload-time = "2025-11-04T13:39:48.002Z" },
+    { url = "https://files.pythonhosted.org/packages/e5/de/e7482c435b83d7e3c3ee5ee4451f6e8973cff0eb6007d2872ce6383f6398/pydantic_core-2.41.5-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:c9e19dd6e28fdcaa5a1de679aec4141f691023916427ef9bae8584f9c2fb3b0e", size = 2319875, upload-time = "2025-11-04T13:39:49.705Z" },
+    { url = "https://files.pythonhosted.org/packages/fe/e6/8c9e81bb6dd7560e33b9053351c29f30c8194b72f2d6932888581f503482/pydantic_core-2.41.5-cp311-cp311-win32.whl", hash = "sha256:2c010c6ded393148374c0f6f0bf89d206bf3217f201faa0635dcd56bd1520f6b", size = 1987549, upload-time = "2025-11-04T13:39:51.842Z" },
+    { url = "https://files.pythonhosted.org/packages/11/66/f14d1d978ea94d1bc21fc98fcf570f9542fe55bfcc40269d4e1a21c19bf7/pydantic_core-2.41.5-cp311-cp311-win_amd64.whl", hash = "sha256:76ee27c6e9c7f16f47db7a94157112a2f3a00e958bc626e2f4ee8bec5c328fbe", size = 2011305, upload-time = "2025-11-04T13:39:53.485Z" },
+    { url = "https://files.pythonhosted.org/packages/56/d8/0e271434e8efd03186c5386671328154ee349ff0354d83c74f5caaf096ed/pydantic_core-2.41.5-cp311-cp311-win_arm64.whl", hash = "sha256:4bc36bbc0b7584de96561184ad7f012478987882ebf9f9c389b23f432ea3d90f", size = 1972902, upload-time = "2025-11-04T13:39:56.488Z" },
+    { url = "https://files.pythonhosted.org/packages/5f/5d/5f6c63eebb5afee93bcaae4ce9a898f3373ca23df3ccaef086d0233a35a7/pydantic_core-2.41.5-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:f41a7489d32336dbf2199c8c0a215390a751c5b014c2c1c5366e817202e9cdf7", size = 2110990, upload-time = "2025-11-04T13:39:58.079Z" },
+    { url = "https://files.pythonhosted.org/packages/aa/32/9c2e8ccb57c01111e0fd091f236c7b371c1bccea0fa85247ac55b1e2b6b6/pydantic_core-2.41.5-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:070259a8818988b9a84a449a2a7337c7f430a22acc0859c6b110aa7212a6d9c0", size = 1896003, upload-time = "2025-11-04T13:39:59.956Z" },
+    { url = "https://files.pythonhosted.org/packages/68/b8/a01b53cb0e59139fbc9e4fda3e9724ede8de279097179be4ff31f1abb65a/pydantic_core-2.41.5-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e96cea19e34778f8d59fe40775a7a574d95816eb150850a85a7a4c8f4b94ac69", size = 1919200, upload-time = "2025-11-04T13:40:02.241Z" },
+    { url = "https://files.pythonhosted.org/packages/38/de/8c36b5198a29bdaade07b5985e80a233a5ac27137846f3bc2d3b40a47360/pydantic_core-2.41.5-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:ed2e99c456e3fadd05c991f8f437ef902e00eedf34320ba2b0842bd1c3ca3a75", size = 2052578, upload-time = "2025-11-04T13:40:04.401Z" },
+    { url = "https://files.pythonhosted.org/packages/00/b5/0e8e4b5b081eac6cb3dbb7e60a65907549a1ce035a724368c330112adfdd/pydantic_core-2.41.5-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:65840751b72fbfd82c3c640cff9284545342a4f1eb1586ad0636955b261b0b05", size = 2208504, upload-time = "2025-11-04T13:40:06.072Z" },
+    { url = "https://files.pythonhosted.org/packages/77/56/87a61aad59c7c5b9dc8caad5a41a5545cba3810c3e828708b3d7404f6cef/pydantic_core-2.41.5-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e536c98a7626a98feb2d3eaf75944ef6f3dbee447e1f841eae16f2f0a72d8ddc", size = 2335816, upload-time = "2025-11-04T13:40:07.835Z" },
+    { url = "https://files.pythonhosted.org/packages/0d/76/941cc9f73529988688a665a5c0ecff1112b3d95ab48f81db5f7606f522d3/pydantic_core-2.41.5-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:eceb81a8d74f9267ef4081e246ffd6d129da5d87e37a77c9bde550cb04870c1c", size = 2075366, upload-time = "2025-11-04T13:40:09.804Z" },
+    { url = "https://files.pythonhosted.org/packages/d3/43/ebef01f69baa07a482844faaa0a591bad1ef129253ffd0cdaa9d8a7f72d3/pydantic_core-2.41.5-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:d38548150c39b74aeeb0ce8ee1d8e82696f4a4e16ddc6de7b1d8823f7de4b9b5", size = 2171698, upload-time = "2025-11-04T13:40:12.004Z" },
+    { url = "https://files.pythonhosted.org/packages/b1/87/41f3202e4193e3bacfc2c065fab7706ebe81af46a83d3e27605029c1f5a6/pydantic_core-2.41.5-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:c23e27686783f60290e36827f9c626e63154b82b116d7fe9adba1fda36da706c", size = 2132603, upload-time = "2025-11-04T13:40:13.868Z" },
+    { url = "https://files.pythonhosted.org/packages/49/7d/4c00df99cb12070b6bccdef4a195255e6020a550d572768d92cc54dba91a/pydantic_core-2.41.5-cp312-cp312-musllinux_1_1_armv7l.whl", hash = "sha256:482c982f814460eabe1d3bb0adfdc583387bd4691ef00b90575ca0d2b6fe2294", size = 2329591, upload-time = "2025-11-04T13:40:15.672Z" },
+    { url = "https://files.pythonhosted.org/packages/cc/6a/ebf4b1d65d458f3cda6a7335d141305dfa19bdc61140a884d165a8a1bbc7/pydantic_core-2.41.5-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:bfea2a5f0b4d8d43adf9d7b8bf019fb46fdd10a2e5cde477fbcb9d1fa08c68e1", size = 2319068, upload-time = "2025-11-04T13:40:17.532Z" },
+    { url = "https://files.pythonhosted.org/packages/49/3b/774f2b5cd4192d5ab75870ce4381fd89cf218af999515baf07e7206753f0/pydantic_core-2.41.5-cp312-cp312-win32.whl", hash = "sha256:b74557b16e390ec12dca509bce9264c3bbd128f8a2c376eaa68003d7f327276d", size = 1985908, upload-time = "2025-11-04T13:40:19.309Z" },
+    { url = "https://files.pythonhosted.org/packages/86/45/00173a033c801cacf67c190fef088789394feaf88a98a7035b0e40d53dc9/pydantic_core-2.41.5-cp312-cp312-win_amd64.whl", hash = "sha256:1962293292865bca8e54702b08a4f26da73adc83dd1fcf26fbc875b35d81c815", size = 2020145, upload-time = "2025-11-04T13:40:21.548Z" },
+    { url = "https://files.pythonhosted.org/packages/f9/22/91fbc821fa6d261b376a3f73809f907cec5ca6025642c463d3488aad22fb/pydantic_core-2.41.5-cp312-cp312-win_arm64.whl", hash = "sha256:1746d4a3d9a794cacae06a5eaaccb4b8643a131d45fbc9af23e353dc0a5ba5c3", size = 1976179, upload-time = "2025-11-04T13:40:23.393Z" },
+    { url = "https://files.pythonhosted.org/packages/87/06/8806241ff1f70d9939f9af039c6c35f2360cf16e93c2ca76f184e76b1564/pydantic_core-2.41.5-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:941103c9be18ac8daf7b7adca8228f8ed6bb7a1849020f643b3a14d15b1924d9", size = 2120403, upload-time = "2025-11-04T13:40:25.248Z" },
+    { url = "https://files.pythonhosted.org/packages/94/02/abfa0e0bda67faa65fef1c84971c7e45928e108fe24333c81f3bfe35d5f5/pydantic_core-2.41.5-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:112e305c3314f40c93998e567879e887a3160bb8689ef3d2c04b6cc62c33ac34", size = 1896206, upload-time = "2025-11-04T13:40:27.099Z" },
+    { url = "https://files.pythonhosted.org/packages/15/df/a4c740c0943e93e6500f9eb23f4ca7ec9bf71b19e608ae5b579678c8d02f/pydantic_core-2.41.5-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0cbaad15cb0c90aa221d43c00e77bb33c93e8d36e0bf74760cd00e732d10a6a0", size = 1919307, upload-time = "2025-11-04T13:40:29.806Z" },
+    { url = "https://files.pythonhosted.org/packages/9a/e3/6324802931ae1d123528988e0e86587c2072ac2e5394b4bc2bc34b61ff6e/pydantic_core-2.41.5-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:03ca43e12fab6023fc79d28ca6b39b05f794ad08ec2feccc59a339b02f2b3d33", size = 2063258, upload-time = "2025-11-04T13:40:33.544Z" },
+    { url = "https://files.pythonhosted.org/packages/c9/d4/2230d7151d4957dd79c3044ea26346c148c98fbf0ee6ebd41056f2d62ab5/pydantic_core-2.41.5-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:dc799088c08fa04e43144b164feb0c13f9a0bc40503f8df3e9fde58a3c0c101e", size = 2214917, upload-time = "2025-11-04T13:40:35.479Z" },
+    { url = "https://files.pythonhosted.org/packages/e6/9f/eaac5df17a3672fef0081b6c1bb0b82b33ee89aa5cec0d7b05f52fd4a1fa/pydantic_core-2.41.5-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:97aeba56665b4c3235a0e52b2c2f5ae9cd071b8a8310ad27bddb3f7fb30e9aa2", size = 2332186, upload-time = "2025-11-04T13:40:37.436Z" },
+    { url = "https://files.pythonhosted.org/packages/cf/4e/35a80cae583a37cf15604b44240e45c05e04e86f9cfd766623149297e971/pydantic_core-2.41.5-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:406bf18d345822d6c21366031003612b9c77b3e29ffdb0f612367352aab7d586", size = 2073164, upload-time = "2025-11-04T13:40:40.289Z" },
+    { url = "https://files.pythonhosted.org/packages/bf/e3/f6e262673c6140dd3305d144d032f7bd5f7497d3871c1428521f19f9efa2/pydantic_core-2.41.5-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:b93590ae81f7010dbe380cdeab6f515902ebcbefe0b9327cc4804d74e93ae69d", size = 2179146, upload-time = "2025-11-04T13:40:42.809Z" },
+    { url = "https://files.pythonhosted.org/packages/75/c7/20bd7fc05f0c6ea2056a4565c6f36f8968c0924f19b7d97bbfea55780e73/pydantic_core-2.41.5-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:01a3d0ab748ee531f4ea6c3e48ad9dac84ddba4b0d82291f87248f2f9de8d740", size = 2137788, upload-time = "2025-11-04T13:40:44.752Z" },
+    { url = "https://files.pythonhosted.org/packages/3a/8d/34318ef985c45196e004bc46c6eab2eda437e744c124ef0dbe1ff2c9d06b/pydantic_core-2.41.5-cp313-cp313-musllinux_1_1_armv7l.whl", hash = "sha256:6561e94ba9dacc9c61bce40e2d6bdc3bfaa0259d3ff36ace3b1e6901936d2e3e", size = 2340133, upload-time = "2025-11-04T13:40:46.66Z" },
+    { url = "https://files.pythonhosted.org/packages/9c/59/013626bf8c78a5a5d9350d12e7697d3d4de951a75565496abd40ccd46bee/pydantic_core-2.41.5-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:915c3d10f81bec3a74fbd4faebe8391013ba61e5a1a8d48c4455b923bdda7858", size = 2324852, upload-time = "2025-11-04T13:40:48.575Z" },
+    { url = "https://files.pythonhosted.org/packages/1a/d9/c248c103856f807ef70c18a4f986693a46a8ffe1602e5d361485da502d20/pydantic_core-2.41.5-cp313-cp313-win32.whl", hash = "sha256:650ae77860b45cfa6e2cdafc42618ceafab3a2d9a3811fcfbd3bbf8ac3c40d36", size = 1994679, upload-time = "2025-11-04T13:40:50.619Z" },
+    { url = "https://files.pythonhosted.org/packages/9e/8b/341991b158ddab181cff136acd2552c9f35bd30380422a639c0671e99a91/pydantic_core-2.41.5-cp313-cp313-win_amd64.whl", hash = "sha256:79ec52ec461e99e13791ec6508c722742ad745571f234ea6255bed38c6480f11", size = 2019766, upload-time = "2025-11-04T13:40:52.631Z" },
+    { url = "https://files.pythonhosted.org/packages/73/7d/f2f9db34af103bea3e09735bb40b021788a5e834c81eedb541991badf8f5/pydantic_core-2.41.5-cp313-cp313-win_arm64.whl", hash = "sha256:3f84d5c1b4ab906093bdc1ff10484838aca54ef08de4afa9de0f5f14d69639cd", size = 1981005, upload-time = "2025-11-04T13:40:54.734Z" },
+    { url = "https://files.pythonhosted.org/packages/ea/28/46b7c5c9635ae96ea0fbb779e271a38129df2550f763937659ee6c5dbc65/pydantic_core-2.41.5-cp314-cp314-macosx_10_12_x86_64.whl", hash = "sha256:3f37a19d7ebcdd20b96485056ba9e8b304e27d9904d233d7b1015db320e51f0a", size = 2119622, upload-time = "2025-11-04T13:40:56.68Z" },
+    { url = "https://files.pythonhosted.org/packages/74/1a/145646e5687e8d9a1e8d09acb278c8535ebe9e972e1f162ed338a622f193/pydantic_core-2.41.5-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:1d1d9764366c73f996edd17abb6d9d7649a7eb690006ab6adbda117717099b14", size = 1891725, upload-time = "2025-11-04T13:40:58.807Z" },
+    { url = "https://files.pythonhosted.org/packages/23/04/e89c29e267b8060b40dca97bfc64a19b2a3cf99018167ea1677d96368273/pydantic_core-2.41.5-cp314-cp314-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:25e1c2af0fce638d5f1988b686f3b3ea8cd7de5f244ca147c777769e798a9cd1", size = 1915040, upload-time = "2025-11-04T13:41:00.853Z" },
+    { url = "https://files.pythonhosted.org/packages/84/a3/15a82ac7bd97992a82257f777b3583d3e84bdb06ba6858f745daa2ec8a85/pydantic_core-2.41.5-cp314-cp314-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:506d766a8727beef16b7adaeb8ee6217c64fc813646b424d0804d67c16eddb66", size = 2063691, upload-time = "2025-11-04T13:41:03.504Z" },
+    { url = "https://files.pythonhosted.org/packages/74/9b/0046701313c6ef08c0c1cf0e028c67c770a4e1275ca73131563c5f2a310a/pydantic_core-2.41.5-cp314-cp314-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:4819fa52133c9aa3c387b3328f25c1facc356491e6135b459f1de698ff64d869", size = 2213897, upload-time = "2025-11-04T13:41:05.804Z" },
+    { url = "https://files.pythonhosted.org/packages/8a/cd/6bac76ecd1b27e75a95ca3a9a559c643b3afcd2dd62086d4b7a32a18b169/pydantic_core-2.41.5-cp314-cp314-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2b761d210c9ea91feda40d25b4efe82a1707da2ef62901466a42492c028553a2", size = 2333302, upload-time = "2025-11-04T13:41:07.809Z" },
+    { url = "https://files.pythonhosted.org/packages/4c/d2/ef2074dc020dd6e109611a8be4449b98cd25e1b9b8a303c2f0fca2f2bcf7/pydantic_core-2.41.5-cp314-cp314-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:22f0fb8c1c583a3b6f24df2470833b40207e907b90c928cc8d3594b76f874375", size = 2064877, upload-time = "2025-11-04T13:41:09.827Z" },
+    { url = "https://files.pythonhosted.org/packages/18/66/e9db17a9a763d72f03de903883c057b2592c09509ccfe468187f2a2eef29/pydantic_core-2.41.5-cp314-cp314-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:2782c870e99878c634505236d81e5443092fba820f0373997ff75f90f68cd553", size = 2180680, upload-time = "2025-11-04T13:41:12.379Z" },
+    { url = "https://files.pythonhosted.org/packages/d3/9e/3ce66cebb929f3ced22be85d4c2399b8e85b622db77dad36b73c5387f8f8/pydantic_core-2.41.5-cp314-cp314-musllinux_1_1_aarch64.whl", hash = "sha256:0177272f88ab8312479336e1d777f6b124537d47f2123f89cb37e0accea97f90", size = 2138960, upload-time = "2025-11-04T13:41:14.627Z" },
+    { url = "https://files.pythonhosted.org/packages/a6/62/205a998f4327d2079326b01abee48e502ea739d174f0a89295c481a2272e/pydantic_core-2.41.5-cp314-cp314-musllinux_1_1_armv7l.whl", hash = "sha256:63510af5e38f8955b8ee5687740d6ebf7c2a0886d15a6d65c32814613681bc07", size = 2339102, upload-time = "2025-11-04T13:41:16.868Z" },
+    { url = "https://files.pythonhosted.org/packages/3c/0d/f05e79471e889d74d3d88f5bd20d0ed189ad94c2423d81ff8d0000aab4ff/pydantic_core-2.41.5-cp314-cp314-musllinux_1_1_x86_64.whl", hash = "sha256:e56ba91f47764cc14f1daacd723e3e82d1a89d783f0f5afe9c364b8bb491ccdb", size = 2326039, upload-time = "2025-11-04T13:41:18.934Z" },
+    { url = "https://files.pythonhosted.org/packages/ec/e1/e08a6208bb100da7e0c4b288eed624a703f4d129bde2da475721a80cab32/pydantic_core-2.41.5-cp314-cp314-win32.whl", hash = "sha256:aec5cf2fd867b4ff45b9959f8b20ea3993fc93e63c7363fe6851424c8a7e7c23", size = 1995126, upload-time = "2025-11-04T13:41:21.418Z" },
+    { url = "https://files.pythonhosted.org/packages/48/5d/56ba7b24e9557f99c9237e29f5c09913c81eeb2f3217e40e922353668092/pydantic_core-2.41.5-cp314-cp314-win_amd64.whl", hash = "sha256:8e7c86f27c585ef37c35e56a96363ab8de4e549a95512445b85c96d3e2f7c1bf", size = 2015489, upload-time = "2025-11-04T13:41:24.076Z" },
+    { url = "https://files.pythonhosted.org/packages/4e/bb/f7a190991ec9e3e0ba22e4993d8755bbc4a32925c0b5b42775c03e8148f9/pydantic_core-2.41.5-cp314-cp314-win_arm64.whl", hash = "sha256:e672ba74fbc2dc8eea59fb6d4aed6845e6905fc2a8afe93175d94a83ba2a01a0", size = 1977288, upload-time = "2025-11-04T13:41:26.33Z" },
+    { url = "https://files.pythonhosted.org/packages/92/ed/77542d0c51538e32e15afe7899d79efce4b81eee631d99850edc2f5e9349/pydantic_core-2.41.5-cp314-cp314t-macosx_10_12_x86_64.whl", hash = "sha256:8566def80554c3faa0e65ac30ab0932b9e3a5cd7f8323764303d468e5c37595a", size = 2120255, upload-time = "2025-11-04T13:41:28.569Z" },
+    { url = "https://files.pythonhosted.org/packages/bb/3d/6913dde84d5be21e284439676168b28d8bbba5600d838b9dca99de0fad71/pydantic_core-2.41.5-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:b80aa5095cd3109962a298ce14110ae16b8c1aece8b72f9dafe81cf597ad80b3", size = 1863760, upload-time = "2025-11-04T13:41:31.055Z" },
+    { url = "https://files.pythonhosted.org/packages/5a/f0/e5e6b99d4191da102f2b0eb9687aaa7f5bea5d9964071a84effc3e40f997/pydantic_core-2.41.5-cp314-cp314t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3006c3dd9ba34b0c094c544c6006cc79e87d8612999f1a5d43b769b89181f23c", size = 1878092, upload-time = "2025-11-04T13:41:33.21Z" },
+    { url = "https://files.pythonhosted.org/packages/71/48/36fb760642d568925953bcc8116455513d6e34c4beaa37544118c36aba6d/pydantic_core-2.41.5-cp314-cp314t-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:72f6c8b11857a856bcfa48c86f5368439f74453563f951e473514579d44aa612", size = 2053385, upload-time = "2025-11-04T13:41:35.508Z" },
+    { url = "https://files.pythonhosted.org/packages/20/25/92dc684dd8eb75a234bc1c764b4210cf2646479d54b47bf46061657292a8/pydantic_core-2.41.5-cp314-cp314t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:5cb1b2f9742240e4bb26b652a5aeb840aa4b417c7748b6f8387927bc6e45e40d", size = 2218832, upload-time = "2025-11-04T13:41:37.732Z" },
+    { url = "https://files.pythonhosted.org/packages/e2/09/f53e0b05023d3e30357d82eb35835d0f6340ca344720a4599cd663dca599/pydantic_core-2.41.5-cp314-cp314t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:bd3d54f38609ff308209bd43acea66061494157703364ae40c951f83ba99a1a9", size = 2327585, upload-time = "2025-11-04T13:41:40Z" },
+    { url = "https://files.pythonhosted.org/packages/aa/4e/2ae1aa85d6af35a39b236b1b1641de73f5a6ac4d5a7509f77b814885760c/pydantic_core-2.41.5-cp314-cp314t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2ff4321e56e879ee8d2a879501c8e469414d948f4aba74a2d4593184eb326660", size = 2041078, upload-time = "2025-11-04T13:41:42.323Z" },
+    { url = "https://files.pythonhosted.org/packages/cd/13/2e215f17f0ef326fc72afe94776edb77525142c693767fc347ed6288728d/pydantic_core-2.41.5-cp314-cp314t-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:d0d2568a8c11bf8225044aa94409e21da0cb09dcdafe9ecd10250b2baad531a9", size = 2173914, upload-time = "2025-11-04T13:41:45.221Z" },
+    { url = "https://files.pythonhosted.org/packages/02/7a/f999a6dcbcd0e5660bc348a3991c8915ce6599f4f2c6ac22f01d7a10816c/pydantic_core-2.41.5-cp314-cp314t-musllinux_1_1_aarch64.whl", hash = "sha256:a39455728aabd58ceabb03c90e12f71fd30fa69615760a075b9fec596456ccc3", size = 2129560, upload-time = "2025-11-04T13:41:47.474Z" },
+    { url = "https://files.pythonhosted.org/packages/3a/b1/6c990ac65e3b4c079a4fb9f5b05f5b013afa0f4ed6780a3dd236d2cbdc64/pydantic_core-2.41.5-cp314-cp314t-musllinux_1_1_armv7l.whl", hash = "sha256:239edca560d05757817c13dc17c50766136d21f7cd0fac50295499ae24f90fdf", size = 2329244, upload-time = "2025-11-04T13:41:49.992Z" },
+    { url = "https://files.pythonhosted.org/packages/d9/02/3c562f3a51afd4d88fff8dffb1771b30cfdfd79befd9883ee094f5b6c0d8/pydantic_core-2.41.5-cp314-cp314t-musllinux_1_1_x86_64.whl", hash = "sha256:2a5e06546e19f24c6a96a129142a75cee553cc018ffee48a460059b1185f4470", size = 2331955, upload-time = "2025-11-04T13:41:54.079Z" },
+    { url = "https://files.pythonhosted.org/packages/5c/96/5fb7d8c3c17bc8c62fdb031c47d77a1af698f1d7a406b0f79aaa1338f9ad/pydantic_core-2.41.5-cp314-cp314t-win32.whl", hash = "sha256:b4ececa40ac28afa90871c2cc2b9ffd2ff0bf749380fbdf57d165fd23da353aa", size = 1988906, upload-time = "2025-11-04T13:41:56.606Z" },
+    { url = "https://files.pythonhosted.org/packages/22/ed/182129d83032702912c2e2d8bbe33c036f342cc735737064668585dac28f/pydantic_core-2.41.5-cp314-cp314t-win_amd64.whl", hash = "sha256:80aa89cad80b32a912a65332f64a4450ed00966111b6615ca6816153d3585a8c", size = 1981607, upload-time = "2025-11-04T13:41:58.889Z" },
+    { url = "https://files.pythonhosted.org/packages/9f/ed/068e41660b832bb0b1aa5b58011dea2a3fe0ba7861ff38c4d4904c1c1a99/pydantic_core-2.41.5-cp314-cp314t-win_arm64.whl", hash = "sha256:35b44f37a3199f771c3eaa53051bc8a70cd7b54f333531c59e29fd4db5d15008", size = 1974769, upload-time = "2025-11-04T13:42:01.186Z" },
+    { url = "https://files.pythonhosted.org/packages/11/72/90fda5ee3b97e51c494938a4a44c3a35a9c96c19bba12372fb9c634d6f57/pydantic_core-2.41.5-graalpy311-graalpy242_311_native-macosx_10_12_x86_64.whl", hash = "sha256:b96d5f26b05d03cc60f11a7761a5ded1741da411e7fe0909e27a5e6a0cb7b034", size = 2115441, upload-time = "2025-11-04T13:42:39.557Z" },
+    { url = "https://files.pythonhosted.org/packages/1f/53/8942f884fa33f50794f119012dc6a1a02ac43a56407adaac20463df8e98f/pydantic_core-2.41.5-graalpy311-graalpy242_311_native-macosx_11_0_arm64.whl", hash = "sha256:634e8609e89ceecea15e2d61bc9ac3718caaaa71963717bf3c8f38bfde64242c", size = 1930291, upload-time = "2025-11-04T13:42:42.169Z" },
+    { url = "https://files.pythonhosted.org/packages/79/c8/ecb9ed9cd942bce09fc888ee960b52654fbdbede4ba6c2d6e0d3b1d8b49c/pydantic_core-2.41.5-graalpy311-graalpy242_311_native-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:93e8740d7503eb008aa2df04d3b9735f845d43ae845e6dcd2be0b55a2da43cd2", size = 1948632, upload-time = "2025-11-04T13:42:44.564Z" },
+    { url = "https://files.pythonhosted.org/packages/2e/1b/687711069de7efa6af934e74f601e2a4307365e8fdc404703afc453eab26/pydantic_core-2.41.5-graalpy311-graalpy242_311_native-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f15489ba13d61f670dcc96772e733aad1a6f9c429cc27574c6cdaed82d0146ad", size = 2138905, upload-time = "2025-11-04T13:42:47.156Z" },
+    { url = "https://files.pythonhosted.org/packages/09/32/59b0c7e63e277fa7911c2fc70ccfb45ce4b98991e7ef37110663437005af/pydantic_core-2.41.5-graalpy312-graalpy250_312_native-macosx_10_12_x86_64.whl", hash = "sha256:7da7087d756b19037bc2c06edc6c170eeef3c3bafcb8f532ff17d64dc427adfd", size = 2110495, upload-time = "2025-11-04T13:42:49.689Z" },
+    { url = "https://files.pythonhosted.org/packages/aa/81/05e400037eaf55ad400bcd318c05bb345b57e708887f07ddb2d20e3f0e98/pydantic_core-2.41.5-graalpy312-graalpy250_312_native-macosx_11_0_arm64.whl", hash = "sha256:aabf5777b5c8ca26f7824cb4a120a740c9588ed58df9b2d196ce92fba42ff8dc", size = 1915388, upload-time = "2025-11-04T13:42:52.215Z" },
+    { url = "https://files.pythonhosted.org/packages/6e/0d/e3549b2399f71d56476b77dbf3cf8937cec5cd70536bdc0e374a421d0599/pydantic_core-2.41.5-graalpy312-graalpy250_312_native-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c007fe8a43d43b3969e8469004e9845944f1a80e6acd47c150856bb87f230c56", size = 1942879, upload-time = "2025-11-04T13:42:56.483Z" },
+    { url = "https://files.pythonhosted.org/packages/f7/07/34573da085946b6a313d7c42f82f16e8920bfd730665de2d11c0c37a74b5/pydantic_core-2.41.5-graalpy312-graalpy250_312_native-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:76d0819de158cd855d1cbb8fcafdf6f5cf1eb8e470abe056d5d161106e38062b", size = 2139017, upload-time = "2025-11-04T13:42:59.471Z" },
+    { url = "https://files.pythonhosted.org/packages/5f/9b/1b3f0e9f9305839d7e84912f9e8bfbd191ed1b1ef48083609f0dabde978c/pydantic_core-2.41.5-pp311-pypy311_pp73-macosx_10_12_x86_64.whl", hash = "sha256:b2379fa7ed44ddecb5bfe4e48577d752db9fc10be00a6b7446e9663ba143de26", size = 2101980, upload-time = "2025-11-04T13:43:25.97Z" },
+    { url = "https://files.pythonhosted.org/packages/a4/ed/d71fefcb4263df0da6a85b5d8a7508360f2f2e9b3bf5814be9c8bccdccc1/pydantic_core-2.41.5-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:266fb4cbf5e3cbd0b53669a6d1b039c45e3ce651fd5442eff4d07c2cc8d66808", size = 1923865, upload-time = "2025-11-04T13:43:28.763Z" },
+    { url = "https://files.pythonhosted.org/packages/ce/3a/626b38db460d675f873e4444b4bb030453bbe7b4ba55df821d026a0493c4/pydantic_core-2.41.5-pp311-pypy311_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:58133647260ea01e4d0500089a8c4f07bd7aa6ce109682b1426394988d8aaacc", size = 2134256, upload-time = "2025-11-04T13:43:31.71Z" },
+    { url = "https://files.pythonhosted.org/packages/83/d9/8412d7f06f616bbc053d30cb4e5f76786af3221462ad5eee1f202021eb4e/pydantic_core-2.41.5-pp311-pypy311_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:287dad91cfb551c363dc62899a80e9e14da1f0e2b6ebde82c806612ca2a13ef1", size = 2174762, upload-time = "2025-11-04T13:43:34.744Z" },
+    { url = "https://files.pythonhosted.org/packages/55/4c/162d906b8e3ba3a99354e20faa1b49a85206c47de97a639510a0e673f5da/pydantic_core-2.41.5-pp311-pypy311_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:03b77d184b9eb40240ae9fd676ca364ce1085f203e1b1256f8ab9984dca80a84", size = 2143141, upload-time = "2025-11-04T13:43:37.701Z" },
+    { url = "https://files.pythonhosted.org/packages/1f/f2/f11dd73284122713f5f89fc940f370d035fa8e1e078d446b3313955157fe/pydantic_core-2.41.5-pp311-pypy311_pp73-musllinux_1_1_armv7l.whl", hash = "sha256:a668ce24de96165bb239160b3d854943128f4334822900534f2fe947930e5770", size = 2330317, upload-time = "2025-11-04T13:43:40.406Z" },
+    { url = "https://files.pythonhosted.org/packages/88/9d/b06ca6acfe4abb296110fb1273a4d848a0bfb2ff65f3ee92127b3244e16b/pydantic_core-2.41.5-pp311-pypy311_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:f14f8f046c14563f8eb3f45f499cc658ab8d10072961e07225e507adb700e93f", size = 2316992, upload-time = "2025-11-04T13:43:43.602Z" },
+    { url = "https://files.pythonhosted.org/packages/36/c7/cfc8e811f061c841d7990b0201912c3556bfeb99cdcb7ed24adc8d6f8704/pydantic_core-2.41.5-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:56121965f7a4dc965bff783d70b907ddf3d57f6eba29b6d2e5dabfaf07799c51", size = 2145302, upload-time = "2025-11-04T13:43:46.64Z" },
 ]
 
 [[package]]
-name = "pluggy"
-version = "1.6.0"
+name = "pydantic-settings"
+version = "2.12.0"
 source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/f9/e2/3e91f31a7d2b083fe6ef3fa267035b518369d9511ffab804f839851d2779/pluggy-1.6.0.tar.gz", hash = "sha256:7dcc130b76258d33b90f61b658791dede3486c3e6bfb003ee5c9bfb396dd22f3", size = 69412, upload-time = "2025-05-15T12:30:07.975Z" }
+dependencies = [
+    { name = "pydantic" },
+    { name = "python-dotenv" },
+    { name = "typing-inspection" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/43/4b/ac7e0aae12027748076d72a8764ff1c9d82ca75a7a52622e67ed3f765c54/pydantic_settings-2.12.0.tar.gz", hash = "sha256:005538ef951e3c2a68e1c08b292b5f2e71490def8589d4221b95dab00dafcfd0", size = 194184, upload-time = "2025-11-10T14:25:47.013Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/54/20/4d324d65cc6d9205fabedc306948156824eb9f0ee1633355a8f7ec5c66bf/pluggy-1.6.0-py3-none-any.whl", hash = "sha256:e920276dd6813095e9377c0bc5566d94c932c33b27a3e3945d8389c374dd4746", size = 20538, upload-time = "2025-05-15T12:30:06.134Z" },
+    { url = "https://files.pythonhosted.org/packages/c1/60/5d4751ba3f4a40a6891f24eec885f51afd78d208498268c734e256fb13c4/pydantic_settings-2.12.0-py3-none-any.whl", hash = "sha256:fddb9fd99a5b18da837b29710391e945b1e30c135477f484084ee513adb93809", size = 51880, upload-time = "2025-11-10T14:25:45.546Z" },
+]
+
+[[package]]
+name = "pydocket"
+version = "0.17.5"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "cloudpickle" },
+    { name = "croniter" },
+    { name = "fakeredis", extra = ["lua"] },
+    { name = "opentelemetry-api" },
+    { name = "prometheus-client" },
+    { name = "py-key-value-aio", extra = ["memory", "redis"] },
+    { name = "python-json-logger" },
+    { name = "redis" },
+    { name = "rich" },
+    { name = "typer" },
+    { name = "typing-extensions" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/73/26/ac23ead3725475468b50b486939bf5feda27180050a614a7407344a0af0e/pydocket-0.17.5.tar.gz", hash = "sha256:19a6976d8fd11c1acf62feb0291a339e06beaefa100f73dd38c6499760ad3e62", size = 334829, upload-time = "2026-01-30T18:44:39.702Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/14/98/73427d065c067a99de6afbe24df3d90cf20d63152ceb42edff2b6e829d4c/pydocket-0.17.5-py3-none-any.whl", hash = "sha256:544d7c2625a33e52528ac24db25794841427dfc2cf30b9c558ac387c77746241", size = 93355, upload-time = "2026-01-30T18:44:37.972Z" },
 ]
 
 [[package]]
@@ -478,6 +1617,29 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/c7/21/705964c7812476f378728bdf590ca4b771ec72385c533964653c68e86bdc/pygments-2.19.2-py3-none-any.whl", hash = "sha256:86540386c03d588bb81d44bc3928634ff26449851e99741617ecb9037ee5ec0b", size = 1225217, upload-time = "2025-06-21T13:39:07.939Z" },
 ]
 
+[[package]]
+name = "pyjwt"
+version = "2.11.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/5c/5a/b46fa56bf322901eee5b0454a34343cdbdae202cd421775a8ee4e42fd519/pyjwt-2.11.0.tar.gz", hash = "sha256:35f95c1f0fbe5d5ba6e43f00271c275f7a1a4db1dab27bf708073b75318ea623", size = 98019, upload-time = "2026-01-30T19:59:55.694Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/6f/01/c26ce75ba460d5cd503da9e13b21a33804d38c2165dec7b716d06b13010c/pyjwt-2.11.0-py3-none-any.whl", hash = "sha256:94a6bde30eb5c8e04fee991062b534071fd1439ef58d2adc9ccb823e7bcd0469", size = 28224, upload-time = "2026-01-30T19:59:54.539Z" },
+]
+
+[package.optional-dependencies]
+crypto = [
+    { name = "cryptography" },
+]
+
+[[package]]
+name = "pyperclip"
+version = "1.11.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/e8/52/d87eba7cb129b81563019d1679026e7a112ef76855d6159d24754dbd2a51/pyperclip-1.11.0.tar.gz", hash = "sha256:244035963e4428530d9e3a6101a1ef97209c6825edab1567beac148ccc1db1b6", size = 12185, upload-time = "2025-09-26T14:40:37.245Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/df/80/fc9d01d5ed37ba4c42ca2b55b4339ae6e200b456be3a1aaddf4a9fa99b8c/pyperclip-1.11.0-py3-none-any.whl", hash = "sha256:299403e9ff44581cb9ba2ffeed69c7aa96a008622ad0c46cb575ca75b5b84273", size = 11063, upload-time = "2025-09-26T14:40:36.069Z" },
+]
+
 [[package]]
 name = "pytest"
 version = "9.0.2"
@@ -494,6 +1656,19 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/3b/ab/b3226f0bd7cdcf710fbede2b3548584366da3b19b5021e74f5bde2a8fa3f/pytest-9.0.2-py3-none-any.whl", hash = "sha256:711ffd45bf766d5264d487b917733b453d917afd2b0ad65223959f59089f875b", size = 374801, upload-time = "2025-12-06T21:30:49.154Z" },
 ]
 
+[[package]]
+name = "pytest-asyncio"
+version = "1.3.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "pytest" },
+    { name = "typing-extensions", marker = "python_full_version < '3.13'" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/90/2c/8af215c0f776415f3590cac4f9086ccefd6fd463befeae41cd4d3f193e5a/pytest_asyncio-1.3.0.tar.gz", hash = "sha256:d7f52f36d231b80ee124cd216ffb19369aa168fc10095013c6b014a34d3ee9e5", size = 50087, upload-time = "2025-11-10T16:07:47.256Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/e5/35/f8b19922b6a25bc0880171a2f1a003eaeb93657475193ab516fd87cac9da/pytest_asyncio-1.3.0-py3-none-any.whl", hash = "sha256:611e26147c7f77640e6d0a92a38ed17c3e9848063698d5c93d5aa7aa11cebff5", size = 15075, upload-time = "2025-11-10T16:07:45.537Z" },
+]
+
 [[package]]
 name = "pytest-cov"
 version = "7.0.0"
@@ -520,6 +1695,82 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/5a/cc/06253936f4a7fa2e0f48dfe6d851d9c56df896a9ab09ac019d70b760619c/pytest_mock-3.15.1-py3-none-any.whl", hash = "sha256:0a25e2eb88fe5168d535041d09a4529a188176ae608a6d249ee65abc0949630d", size = 10095, upload-time = "2025-09-16T16:37:25.734Z" },
 ]
 
+[[package]]
+name = "python-dateutil"
+version = "2.9.0.post0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "six" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/66/c0/0c8b6ad9f17a802ee498c46e004a0eb49bc148f2fd230864601a86dcf6db/python-dateutil-2.9.0.post0.tar.gz", hash = "sha256:37dd54208da7e1cd875388217d5e00ebd4179249f90fb72437e91a35459a0ad3", size = 342432, upload-time = "2024-03-01T18:36:20.211Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/ec/57/56b9bcc3c9c6a792fcbaf139543cee77261f3651ca9da0c93f5c1221264b/python_dateutil-2.9.0.post0-py2.py3-none-any.whl", hash = "sha256:a8b2bc7bffae282281c8140a97d3aa9c14da0b136dfe83f850eea9a5f7470427", size = 229892, upload-time = "2024-03-01T18:36:18.57Z" },
+]
+
+[[package]]
+name = "python-dotenv"
+version = "1.2.1"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/f0/26/19cadc79a718c5edbec86fd4919a6b6d3f681039a2f6d66d14be94e75fb9/python_dotenv-1.2.1.tar.gz", hash = "sha256:42667e897e16ab0d66954af0e60a9caa94f0fd4ecf3aaf6d2d260eec1aa36ad6", size = 44221, upload-time = "2025-10-26T15:12:10.434Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/14/1b/a298b06749107c305e1fe0f814c6c74aea7b2f1e10989cb30f544a1b3253/python_dotenv-1.2.1-py3-none-any.whl", hash = "sha256:b81ee9561e9ca4004139c6cbba3a238c32b03e4894671e181b671e8cb8425d61", size = 21230, upload-time = "2025-10-26T15:12:09.109Z" },
+]
+
+[[package]]
+name = "python-json-logger"
+version = "4.0.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/29/bf/eca6a3d43db1dae7070f70e160ab20b807627ba953663ba07928cdd3dc58/python_json_logger-4.0.0.tar.gz", hash = "sha256:f58e68eb46e1faed27e0f574a55a0455eecd7b8a5b88b85a784519ba3cff047f", size = 17683, upload-time = "2025-10-06T04:15:18.984Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/51/e5/fecf13f06e5e5f67e8837d777d1bc43fac0ed2b77a676804df5c34744727/python_json_logger-4.0.0-py3-none-any.whl", hash = "sha256:af09c9daf6a813aa4cc7180395f50f2a9e5fa056034c9953aec92e381c5ba1e2", size = 15548, upload-time = "2025-10-06T04:15:17.553Z" },
+]
+
+[[package]]
+name = "python-multipart"
+version = "0.0.22"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/94/01/979e98d542a70714b0cb2b6728ed0b7c46792b695e3eaec3e20711271ca3/python_multipart-0.0.22.tar.gz", hash = "sha256:7340bef99a7e0032613f56dc36027b959fd3b30a787ed62d310e951f7c3a3a58", size = 37612, upload-time = "2026-01-25T10:15:56.219Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/1b/d0/397f9626e711ff749a95d96b7af99b9c566a9bb5129b8e4c10fc4d100304/python_multipart-0.0.22-py3-none-any.whl", hash = "sha256:2b2cd894c83d21bf49d702499531c7bafd057d730c201782048f7945d82de155", size = 24579, upload-time = "2026-01-25T10:15:54.811Z" },
+]
+
+[[package]]
+name = "pytz"
+version = "2025.2"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/f8/bf/abbd3cdfb8fbc7fb3d4d38d320f2441b1e7cbe29be4f23797b4a2b5d8aac/pytz-2025.2.tar.gz", hash = "sha256:360b9e3dbb49a209c21ad61809c7fb453643e048b38924c765813546746e81c3", size = 320884, upload-time = "2025-03-25T02:25:00.538Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/81/c4/34e93fe5f5429d7570ec1fa436f1986fb1f00c3e0f43a589fe2bbcd22c3f/pytz-2025.2-py2.py3-none-any.whl", hash = "sha256:5ddf76296dd8c44c26eb8f4b6f35488f3ccbf6fbbd7adee0b7262d43f0ec2f00", size = 509225, upload-time = "2025-03-25T02:24:58.468Z" },
+]
+
+[[package]]
+name = "pywin32"
+version = "311"
+source = { registry = "https://pypi.org/simple" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/7c/af/449a6a91e5d6db51420875c54f6aff7c97a86a3b13a0b4f1a5c13b988de3/pywin32-311-cp311-cp311-win32.whl", hash = "sha256:184eb5e436dea364dcd3d2316d577d625c0351bf237c4e9a5fabbcfa5a58b151", size = 8697031, upload-time = "2025-07-14T20:13:13.266Z" },
+    { url = "https://files.pythonhosted.org/packages/51/8f/9bb81dd5bb77d22243d33c8397f09377056d5c687aa6d4042bea7fbf8364/pywin32-311-cp311-cp311-win_amd64.whl", hash = "sha256:3ce80b34b22b17ccbd937a6e78e7225d80c52f5ab9940fe0506a1a16f3dab503", size = 9508308, upload-time = "2025-07-14T20:13:15.147Z" },
+    { url = "https://files.pythonhosted.org/packages/44/7b/9c2ab54f74a138c491aba1b1cd0795ba61f144c711daea84a88b63dc0f6c/pywin32-311-cp311-cp311-win_arm64.whl", hash = "sha256:a733f1388e1a842abb67ffa8e7aad0e70ac519e09b0f6a784e65a136ec7cefd2", size = 8703930, upload-time = "2025-07-14T20:13:16.945Z" },
+    { url = "https://files.pythonhosted.org/packages/e7/ab/01ea1943d4eba0f850c3c61e78e8dd59757ff815ff3ccd0a84de5f541f42/pywin32-311-cp312-cp312-win32.whl", hash = "sha256:750ec6e621af2b948540032557b10a2d43b0cee2ae9758c54154d711cc852d31", size = 8706543, upload-time = "2025-07-14T20:13:20.765Z" },
+    { url = "https://files.pythonhosted.org/packages/d1/a8/a0e8d07d4d051ec7502cd58b291ec98dcc0c3fff027caad0470b72cfcc2f/pywin32-311-cp312-cp312-win_amd64.whl", hash = "sha256:b8c095edad5c211ff31c05223658e71bf7116daa0ecf3ad85f3201ea3190d067", size = 9495040, upload-time = "2025-07-14T20:13:22.543Z" },
+    { url = "https://files.pythonhosted.org/packages/ba/3a/2ae996277b4b50f17d61f0603efd8253cb2d79cc7ae159468007b586396d/pywin32-311-cp312-cp312-win_arm64.whl", hash = "sha256:e286f46a9a39c4a18b319c28f59b61de793654af2f395c102b4f819e584b5852", size = 8710102, upload-time = "2025-07-14T20:13:24.682Z" },
+    { url = "https://files.pythonhosted.org/packages/a5/be/3fd5de0979fcb3994bfee0d65ed8ca9506a8a1260651b86174f6a86f52b3/pywin32-311-cp313-cp313-win32.whl", hash = "sha256:f95ba5a847cba10dd8c4d8fefa9f2a6cf283b8b88ed6178fa8a6c1ab16054d0d", size = 8705700, upload-time = "2025-07-14T20:13:26.471Z" },
+    { url = "https://files.pythonhosted.org/packages/e3/28/e0a1909523c6890208295a29e05c2adb2126364e289826c0a8bc7297bd5c/pywin32-311-cp313-cp313-win_amd64.whl", hash = "sha256:718a38f7e5b058e76aee1c56ddd06908116d35147e133427e59a3983f703a20d", size = 9494700, upload-time = "2025-07-14T20:13:28.243Z" },
+    { url = "https://files.pythonhosted.org/packages/04/bf/90339ac0f55726dce7d794e6d79a18a91265bdf3aa70b6b9ca52f35e022a/pywin32-311-cp313-cp313-win_arm64.whl", hash = "sha256:7b4075d959648406202d92a2310cb990fea19b535c7f4a78d3f5e10b926eeb8a", size = 8709318, upload-time = "2025-07-14T20:13:30.348Z" },
+    { url = "https://files.pythonhosted.org/packages/c9/31/097f2e132c4f16d99a22bfb777e0fd88bd8e1c634304e102f313af69ace5/pywin32-311-cp314-cp314-win32.whl", hash = "sha256:b7a2c10b93f8986666d0c803ee19b5990885872a7de910fc460f9b0c2fbf92ee", size = 8840714, upload-time = "2025-07-14T20:13:32.449Z" },
+    { url = "https://files.pythonhosted.org/packages/90/4b/07c77d8ba0e01349358082713400435347df8426208171ce297da32c313d/pywin32-311-cp314-cp314-win_amd64.whl", hash = "sha256:3aca44c046bd2ed8c90de9cb8427f581c479e594e99b5c0bb19b29c10fd6cb87", size = 9656800, upload-time = "2025-07-14T20:13:34.312Z" },
+    { url = "https://files.pythonhosted.org/packages/c0/d2/21af5c535501a7233e734b8af901574572da66fcc254cb35d0609c9080dd/pywin32-311-cp314-cp314-win_arm64.whl", hash = "sha256:a508e2d9025764a8270f93111a970e1d0fbfc33f4153b388bb649b7eec4f9b42", size = 8932540, upload-time = "2025-07-14T20:13:36.379Z" },
+]
+
+[[package]]
+name = "pywin32-ctypes"
+version = "0.2.3"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/85/9f/01a1a99704853cb63f253eea009390c88e7131c67e66a0a02099a8c917cb/pywin32-ctypes-0.2.3.tar.gz", hash = "sha256:d162dc04946d704503b2edc4d55f3dba5c1d539ead017afa00142c38b9885755", size = 29471, upload-time = "2024-08-14T10:15:34.626Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/de/3d/8161f7711c017e01ac9f008dfddd9410dff3674334c233bde66e7ba65bbf/pywin32_ctypes-0.2.3-py3-none-any.whl", hash = "sha256:8a1513379d709975552d202d942d9837758905c8d01eb82b8bcc30918929e7b8", size = 30756, upload-time = "2024-08-14T10:15:33.187Z" },
+]
+
 [[package]]
 name = "pyyaml"
 version = "6.0.3"
@@ -575,18 +1826,45 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/f1/12/de94a39c2ef588c7e6455cfbe7343d3b2dc9d6b6b2f40c4c6565744c873d/pyyaml-6.0.3-cp314-cp314t-win_arm64.whl", hash = "sha256:ebc55a14a21cb14062aa4162f906cd962b28e2e9ea38f9b4391244cd8de4ae0b", size = 149341, upload-time = "2025-09-25T21:32:56.828Z" },
 ]
 
+[[package]]
+name = "redis"
+version = "7.1.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "async-timeout", marker = "python_full_version < '3.11.3'" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/43/c8/983d5c6579a411d8a99bc5823cc5712768859b5ce2c8afe1a65b37832c81/redis-7.1.0.tar.gz", hash = "sha256:b1cc3cfa5a2cb9c2ab3ba700864fb0ad75617b41f01352ce5779dabf6d5f9c3c", size = 4796669, upload-time = "2025-11-19T15:54:39.961Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/89/f0/8956f8a86b20d7bb9d6ac0187cf4cd54d8065bc9a1a09eb8011d4d326596/redis-7.1.0-py3-none-any.whl", hash = "sha256:23c52b208f92b56103e17c5d06bdc1a6c2c0b3106583985a76a18f83b265de2b", size = 354159, upload-time = "2025-11-19T15:54:38.064Z" },
+]
+
 [[package]]
 name = "referencing"
-version = "0.37.0"
+version = "0.36.2"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "attrs" },
     { name = "rpds-py" },
     { name = "typing-extensions", marker = "python_full_version < '3.13'" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/22/f5/df4e9027acead3ecc63e50fe1e36aca1523e1719559c499951bb4b53188f/referencing-0.37.0.tar.gz", hash = "sha256:44aefc3142c5b842538163acb373e24cce6632bd54bdb01b21ad5863489f50d8", size = 78036, upload-time = "2025-10-13T15:30:48.871Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/2f/db/98b5c277be99dd18bfd91dd04e1b759cad18d1a338188c936e92f921c7e2/referencing-0.36.2.tar.gz", hash = "sha256:df2e89862cd09deabbdba16944cc3f10feb6b3e6f18e902f7cc25609a34775aa", size = 74744, upload-time = "2025-01-25T08:48:16.138Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/c1/b1/3baf80dc6d2b7bc27a95a67752d0208e410351e3feb4eb78de5f77454d8d/referencing-0.36.2-py3-none-any.whl", hash = "sha256:e8699adbbf8b5c7de96d8ffa0eb5c158b3beafce084968e2ea8bb08c6794dcd0", size = 26775, upload-time = "2025-01-25T08:48:14.241Z" },
+]
+
+[[package]]
+name = "requests"
+version = "2.32.5"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "certifi" },
+    { name = "charset-normalizer" },
+    { name = "idna" },
+    { name = "urllib3" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/c9/74/b3ff8e6c8446842c3f5c837e9c3dfcfe2018ea6ecef224c710c85ef728f4/requests-2.32.5.tar.gz", hash = "sha256:dbba0bac56e100853db0ea71b82b4dfd5fe2bf6d3754a8893c3af500cec7d7cf", size = 134517, upload-time = "2025-08-18T20:46:02.573Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/2c/58/ca301544e1fa93ed4f80d724bf5b194f6e4b945841c5bfd555878eea9fcb/referencing-0.37.0-py3-none-any.whl", hash = "sha256:381329a9f99628c9069361716891d34ad94af76e461dcb0335825aecc7692231", size = 26766, upload-time = "2025-10-13T15:30:47.625Z" },
+    { url = "https://files.pythonhosted.org/packages/1e/db/4254e3eabe8020b458f1a747140d32277ec7a271daf1d235b70dc0b4e6e3/requests-2.32.5-py3-none-any.whl", hash = "sha256:2462f94637a34fd532264295e186976db0f5d453d1cdd31473c85a6a161affb6", size = 64738, upload-time = "2025-08-18T20:46:00.542Z" },
 ]
 
 [[package]]
@@ -602,6 +1880,19 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/25/7a/b0178788f8dc6cafce37a212c99565fa1fe7872c70c6c9c1e1a372d9d88f/rich-14.2.0-py3-none-any.whl", hash = "sha256:76bc51fe2e57d2b1be1f96c524b890b816e334ab4c1e45888799bfaab0021edd", size = 243393, upload-time = "2025-10-09T14:16:51.245Z" },
 ]
 
+[[package]]
+name = "rich-rst"
+version = "1.3.2"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "docutils" },
+    { name = "rich" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/bc/6d/a506aaa4a9eaa945ed8ab2b7347859f53593864289853c5d6d62b77246e0/rich_rst-1.3.2.tar.gz", hash = "sha256:a1196fdddf1e364b02ec68a05e8ff8f6914fee10fbca2e6b6735f166bb0da8d4", size = 14936, upload-time = "2025-10-14T16:49:45.332Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/13/2f/b4530fbf948867702d0a3f27de4a6aab1d156f406d72852ab902c4d04de9/rich_rst-1.3.2-py3-none-any.whl", hash = "sha256:a99b4907cbe118cf9d18b0b44de272efa61f15117c61e39ebdc431baf5df722a", size = 12567, upload-time = "2025-10-14T16:49:42.953Z" },
+]
+
 [[package]]
 name = "rpds-py"
 version = "0.30.0"
@@ -736,6 +2027,37 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/c4/1c/1dbe51782c0e1e9cfce1d1004752672d2d4629ea46945d19d731ad772b3b/ruff-0.14.11-py3-none-win_arm64.whl", hash = "sha256:649fb6c9edd7f751db276ef42df1f3df41c38d67d199570ae2a7bd6cbc3590f0", size = 12938644, upload-time = "2026-01-08T19:11:50.027Z" },
 ]
 
+[[package]]
+name = "secretstorage"
+version = "3.5.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "cryptography" },
+    { name = "jeepney" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/1c/03/e834bcd866f2f8a49a85eaff47340affa3bfa391ee9912a952a1faa68c7b/secretstorage-3.5.0.tar.gz", hash = "sha256:f04b8e4689cbce351744d5537bf6b1329c6fc68f91fa666f60a380edddcd11be", size = 19884, upload-time = "2025-11-23T19:02:53.191Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/b7/46/f5af3402b579fd5e11573ce652019a67074317e18c1935cc0b4ba9b35552/secretstorage-3.5.0-py3-none-any.whl", hash = "sha256:0ce65888c0725fcb2c5bc0fdb8e5438eece02c523557ea40ce0703c266248137", size = 15554, upload-time = "2025-11-23T19:02:51.545Z" },
+]
+
+[[package]]
+name = "shellingham"
+version = "1.5.4"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/58/15/8b3609fd3830ef7b27b655beb4b4e9c62313a4e8da8c676e142cc210d58e/shellingham-1.5.4.tar.gz", hash = "sha256:8dbca0739d487e5bd35ab3ca4b36e11c4078f3a234bfce294b0a0291363404de", size = 10310, upload-time = "2023-10-24T04:13:40.426Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/e0/f9/0595336914c5619e5f28a1fb793285925a8cd4b432c9da0a987836c7f822/shellingham-1.5.4-py2.py3-none-any.whl", hash = "sha256:7ecfff8f2fd72616f7481040475a65b2bf8af90a56c89140852d1120324e8686", size = 9755, upload-time = "2023-10-24T04:13:38.866Z" },
+]
+
+[[package]]
+name = "six"
+version = "1.17.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/94/e7/b2c673351809dca68a0e064b6af791aa332cf192da575fd474ed7d6f16a2/six-1.17.0.tar.gz", hash = "sha256:ff70335d468e7eb6ec65b95b99d3a2836546063f63acc5171de367e834932a81", size = 34031, upload-time = "2024-12-04T17:35:28.174Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/b7/ce/149a00dd41f10bc29e5921b496af8b574d8413afcd5e30dfa0ed46c2cc5e/six-1.17.0-py2.py3-none-any.whl", hash = "sha256:4721f391ed90541fddacab5acf947aa0d3dc7d27b2e1e8eda2be8970586c3274", size = 11050, upload-time = "2024-12-04T17:35:26.475Z" },
+]
+
 [[package]]
 name = "smmap"
 version = "5.0.2"
@@ -745,6 +2067,41 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/04/be/d09147ad1ec7934636ad912901c5fd7667e1c858e19d355237db0d0cd5e4/smmap-5.0.2-py3-none-any.whl", hash = "sha256:b30115f0def7d7531d22a0fb6502488d879e75b260a9db4d0819cfb25403af5e", size = 24303, upload-time = "2025-01-02T07:14:38.724Z" },
 ]
 
+[[package]]
+name = "sortedcontainers"
+version = "2.4.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/e8/c4/ba2f8066cceb6f23394729afe52f3bf7adec04bf9ed2c820b39e19299111/sortedcontainers-2.4.0.tar.gz", hash = "sha256:25caa5a06cc30b6b83d11423433f65d1f9d76c4c6a0c90e3379eaa43b9bfdb88", size = 30594, upload-time = "2021-05-16T22:03:42.897Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/32/46/9cb0e58b2deb7f82b84065f37f3bffeb12413f947f9388e4cac22c4621ce/sortedcontainers-2.4.0-py2.py3-none-any.whl", hash = "sha256:a163dcaede0f1c021485e957a39245190e74249897e2ae4b2aa38595db237ee0", size = 29575, upload-time = "2021-05-16T22:03:41.177Z" },
+]
+
+[[package]]
+name = "sse-starlette"
+version = "3.2.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "anyio" },
+    { name = "starlette" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/8b/8d/00d280c03ffd39aaee0e86ec81e2d3b9253036a0f93f51d10503adef0e65/sse_starlette-3.2.0.tar.gz", hash = "sha256:8127594edfb51abe44eac9c49e59b0b01f1039d0c7461c6fd91d4e03b70da422", size = 27253, upload-time = "2026-01-17T13:11:05.62Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/96/7f/832f015020844a8b8f7a9cbc103dd76ba8e3875004c41e08440ea3a2b41a/sse_starlette-3.2.0-py3-none-any.whl", hash = "sha256:5876954bd51920fc2cd51baee47a080eb88a37b5b784e615abb0b283f801cdbf", size = 12763, upload-time = "2026-01-17T13:11:03.775Z" },
+]
+
+[[package]]
+name = "starlette"
+version = "0.52.1"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "anyio" },
+    { name = "typing-extensions", marker = "python_full_version < '3.13'" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/c4/68/79977123bb7be889ad680d79a40f339082c1978b5cfcf62c2d8d196873ac/starlette-0.52.1.tar.gz", hash = "sha256:834edd1b0a23167694292e94f597773bc3f89f362be6effee198165a35d62933", size = 2653702, upload-time = "2026-01-18T13:34:11.062Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/81/0d/13d1d239a25cbfb19e740db83143e95c772a1fe10202dda4b76792b114dd/starlette-0.52.1-py3-none-any.whl", hash = "sha256:0029d43eb3d273bc4f83a08720b4912ea4b071087a3b48db01b7c839f7954d74", size = 74272, upload-time = "2026-01-18T13:34:09.188Z" },
+]
+
 [[package]]
 name = "tomli"
 version = "2.3.0"
@@ -794,6 +2151,30 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/77/b8/0135fadc89e73be292b473cb820b4f5a08197779206b33191e801feeae40/tomli-2.3.0-py3-none-any.whl", hash = "sha256:e95b1af3c5b07d9e643909b5abbec77cd9f1217e6d0bca72b0234736b9fb1f1b", size = 14408, upload-time = "2025-10-08T22:01:46.04Z" },
 ]
 
+[[package]]
+name = "typer"
+version = "0.21.1"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "click" },
+    { name = "rich" },
+    { name = "shellingham" },
+    { name = "typing-extensions" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/36/bf/8825b5929afd84d0dabd606c67cd57b8388cb3ec385f7ef19c5cc2202069/typer-0.21.1.tar.gz", hash = "sha256:ea835607cd752343b6b2b7ce676893e5a0324082268b48f27aa058bdb7d2145d", size = 110371, upload-time = "2026-01-06T11:21:10.989Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/a0/1d/d9257dd49ff2ca23ea5f132edf1281a0c4f9de8a762b9ae399b670a59235/typer-0.21.1-py3-none-any.whl", hash = "sha256:7985e89081c636b88d172c2ee0cfe33c253160994d47bdfdc302defd7d1f1d01", size = 47381, upload-time = "2026-01-06T11:21:09.824Z" },
+]
+
+[[package]]
+name = "types-aiofiles"
+version = "25.1.0.20251011"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/84/6c/6d23908a8217e36704aa9c79d99a620f2fdd388b66a4b7f72fbc6b6ff6c6/types_aiofiles-25.1.0.20251011.tar.gz", hash = "sha256:1c2b8ab260cb3cd40c15f9d10efdc05a6e1e6b02899304d80dfa0410e028d3ff", size = 14535, upload-time = "2025-10-11T02:44:51.237Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/71/0f/76917bab27e270bb6c32addd5968d69e558e5b6f7fb4ac4cbfa282996a96/types_aiofiles-25.1.0.20251011-py3-none-any.whl", hash = "sha256:8ff8de7f9d42739d8f0dadcceeb781ce27cd8d8c4152d4a7c52f6b20edb8149c", size = 14338, upload-time = "2025-10-11T02:44:50.054Z" },
+]
+
 [[package]]
 name = "types-pyyaml"
 version = "6.0.12.20250915"
@@ -811,3 +2192,105 @@ sdist = { url = "https://files.pythonhosted.org/packages/72/94/1a15dd82efb362ac8
 wheels = [
     { url = "https://files.pythonhosted.org/packages/18/67/36e9267722cc04a6b9f15c7f3441c2363321a3ea07da7ae0c0707beb2a9c/typing_extensions-4.15.0-py3-none-any.whl", hash = "sha256:f0fa19c6845758ab08074a0cfa8b7aecb71c999ca73d62883bc25cc018c4e548", size = 44614, upload-time = "2025-08-25T13:49:24.86Z" },
 ]
+
+[[package]]
+name = "typing-inspection"
+version = "0.4.2"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "typing-extensions" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/55/e3/70399cb7dd41c10ac53367ae42139cf4b1ca5f36bb3dc6c9d33acdb43655/typing_inspection-0.4.2.tar.gz", hash = "sha256:ba561c48a67c5958007083d386c3295464928b01faa735ab8547c5692e87f464", size = 75949, upload-time = "2025-10-01T02:14:41.687Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/dc/9b/47798a6c91d8bdb567fe2698fe81e0c6b7cb7ef4d13da4114b41d239f65d/typing_inspection-0.4.2-py3-none-any.whl", hash = "sha256:4ed1cacbdc298c220f1bd249ed5287caa16f34d44ef4e9c3d0cbad5b521545e7", size = 14611, upload-time = "2025-10-01T02:14:40.154Z" },
+]
+
+[[package]]
+name = "urllib3"
+version = "2.6.3"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/c7/24/5f1b3bdffd70275f6661c76461e25f024d5a38a46f04aaca912426a2b1d3/urllib3-2.6.3.tar.gz", hash = "sha256:1b62b6884944a57dbe321509ab94fd4d3b307075e0c2eae991ac71ee15ad38ed", size = 435556, upload-time = "2026-01-07T16:24:43.925Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/39/08/aaaad47bc4e9dc8c725e68f9d04865dbcb2052843ff09c97b08904852d84/urllib3-2.6.3-py3-none-any.whl", hash = "sha256:bf272323e553dfb2e87d9bfd225ca7b0f467b919d7bbd355436d3fd37cb0acd4", size = 131584, upload-time = "2026-01-07T16:24:42.685Z" },
+]
+
+[[package]]
+name = "uvicorn"
+version = "0.40.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "click" },
+    { name = "h11" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/c3/d1/8f3c683c9561a4e6689dd3b1d345c815f10f86acd044ee1fb9a4dcd0b8c5/uvicorn-0.40.0.tar.gz", hash = "sha256:839676675e87e73694518b5574fd0f24c9d97b46bea16df7b8c05ea1a51071ea", size = 81761, upload-time = "2025-12-21T14:16:22.45Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/3d/d8/2083a1daa7439a66f3a48589a57d576aa117726762618f6bb09fe3798796/uvicorn-0.40.0-py3-none-any.whl", hash = "sha256:c6c8f55bc8bf13eb6fa9ff87ad62308bbbc33d0b67f84293151efe87e0d5f2ee", size = 68502, upload-time = "2025-12-21T14:16:21.041Z" },
+]
+
+[[package]]
+name = "websockets"
+version = "16.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/04/24/4b2031d72e840ce4c1ccb255f693b15c334757fc50023e4db9537080b8c4/websockets-16.0.tar.gz", hash = "sha256:5f6261a5e56e8d5c42a4497b364ea24d94d9563e8fbd44e78ac40879c60179b5", size = 179346, upload-time = "2026-01-10T09:23:47.181Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/f2/db/de907251b4ff46ae804ad0409809504153b3f30984daf82a1d84a9875830/websockets-16.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:31a52addea25187bde0797a97d6fc3d2f92b6f72a9370792d65a6e84615ac8a8", size = 177340, upload-time = "2026-01-10T09:22:34.539Z" },
+    { url = "https://files.pythonhosted.org/packages/f3/fa/abe89019d8d8815c8781e90d697dec52523fb8ebe308bf11664e8de1877e/websockets-16.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:417b28978cdccab24f46400586d128366313e8a96312e4b9362a4af504f3bbad", size = 175022, upload-time = "2026-01-10T09:22:36.332Z" },
+    { url = "https://files.pythonhosted.org/packages/58/5d/88ea17ed1ded2079358b40d31d48abe90a73c9e5819dbcde1606e991e2ad/websockets-16.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:af80d74d4edfa3cb9ed973a0a5ba2b2a549371f8a741e0800cb07becdd20f23d", size = 175319, upload-time = "2026-01-10T09:22:37.602Z" },
+    { url = "https://files.pythonhosted.org/packages/d2/ae/0ee92b33087a33632f37a635e11e1d99d429d3d323329675a6022312aac2/websockets-16.0-cp311-cp311-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:08d7af67b64d29823fed316505a89b86705f2b7981c07848fb5e3ea3020c1abe", size = 184631, upload-time = "2026-01-10T09:22:38.789Z" },
+    { url = "https://files.pythonhosted.org/packages/c8/c5/27178df583b6c5b31b29f526ba2da5e2f864ecc79c99dae630a85d68c304/websockets-16.0-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:7be95cfb0a4dae143eaed2bcba8ac23f4892d8971311f1b06f3c6b78952ee70b", size = 185870, upload-time = "2026-01-10T09:22:39.893Z" },
+    { url = "https://files.pythonhosted.org/packages/87/05/536652aa84ddc1c018dbb7e2c4cbcd0db884580bf8e95aece7593fde526f/websockets-16.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:d6297ce39ce5c2e6feb13c1a996a2ded3b6832155fcfc920265c76f24c7cceb5", size = 185361, upload-time = "2026-01-10T09:22:41.016Z" },
+    { url = "https://files.pythonhosted.org/packages/6d/e2/d5332c90da12b1e01f06fb1b85c50cfc489783076547415bf9f0a659ec19/websockets-16.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:1c1b30e4f497b0b354057f3467f56244c603a79c0d1dafce1d16c283c25f6e64", size = 184615, upload-time = "2026-01-10T09:22:42.442Z" },
+    { url = "https://files.pythonhosted.org/packages/77/fb/d3f9576691cae9253b51555f841bc6600bf0a983a461c79500ace5a5b364/websockets-16.0-cp311-cp311-win32.whl", hash = "sha256:5f451484aeb5cafee1ccf789b1b66f535409d038c56966d6101740c1614b86c6", size = 178246, upload-time = "2026-01-10T09:22:43.654Z" },
+    { url = "https://files.pythonhosted.org/packages/54/67/eaff76b3dbaf18dcddabc3b8c1dba50b483761cccff67793897945b37408/websockets-16.0-cp311-cp311-win_amd64.whl", hash = "sha256:8d7f0659570eefb578dacde98e24fb60af35350193e4f56e11190787bee77dac", size = 178684, upload-time = "2026-01-10T09:22:44.941Z" },
+    { url = "https://files.pythonhosted.org/packages/84/7b/bac442e6b96c9d25092695578dda82403c77936104b5682307bd4deb1ad4/websockets-16.0-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:71c989cbf3254fbd5e84d3bff31e4da39c43f884e64f2551d14bb3c186230f00", size = 177365, upload-time = "2026-01-10T09:22:46.787Z" },
+    { url = "https://files.pythonhosted.org/packages/b0/fe/136ccece61bd690d9c1f715baaeefd953bb2360134de73519d5df19d29ca/websockets-16.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:8b6e209ffee39ff1b6d0fa7bfef6de950c60dfb91b8fcead17da4ee539121a79", size = 175038, upload-time = "2026-01-10T09:22:47.999Z" },
+    { url = "https://files.pythonhosted.org/packages/40/1e/9771421ac2286eaab95b8575b0cb701ae3663abf8b5e1f64f1fd90d0a673/websockets-16.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:86890e837d61574c92a97496d590968b23c2ef0aeb8a9bc9421d174cd378ae39", size = 175328, upload-time = "2026-01-10T09:22:49.809Z" },
+    { url = "https://files.pythonhosted.org/packages/18/29/71729b4671f21e1eaa5d6573031ab810ad2936c8175f03f97f3ff164c802/websockets-16.0-cp312-cp312-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:9b5aca38b67492ef518a8ab76851862488a478602229112c4b0d58d63a7a4d5c", size = 184915, upload-time = "2026-01-10T09:22:51.071Z" },
+    { url = "https://files.pythonhosted.org/packages/97/bb/21c36b7dbbafc85d2d480cd65df02a1dc93bf76d97147605a8e27ff9409d/websockets-16.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:e0334872c0a37b606418ac52f6ab9cfd17317ac26365f7f65e203e2d0d0d359f", size = 186152, upload-time = "2026-01-10T09:22:52.224Z" },
+    { url = "https://files.pythonhosted.org/packages/4a/34/9bf8df0c0cf88fa7bfe36678dc7b02970c9a7d5e065a3099292db87b1be2/websockets-16.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:a0b31e0b424cc6b5a04b8838bbaec1688834b2383256688cf47eb97412531da1", size = 185583, upload-time = "2026-01-10T09:22:53.443Z" },
+    { url = "https://files.pythonhosted.org/packages/47/88/4dd516068e1a3d6ab3c7c183288404cd424a9a02d585efbac226cb61ff2d/websockets-16.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:485c49116d0af10ac698623c513c1cc01c9446c058a4e61e3bf6c19dff7335a2", size = 184880, upload-time = "2026-01-10T09:22:55.033Z" },
+    { url = "https://files.pythonhosted.org/packages/91/d6/7d4553ad4bf1c0421e1ebd4b18de5d9098383b5caa1d937b63df8d04b565/websockets-16.0-cp312-cp312-win32.whl", hash = "sha256:eaded469f5e5b7294e2bdca0ab06becb6756ea86894a47806456089298813c89", size = 178261, upload-time = "2026-01-10T09:22:56.251Z" },
+    { url = "https://files.pythonhosted.org/packages/c3/f0/f3a17365441ed1c27f850a80b2bc680a0fa9505d733fe152fdf5e98c1c0b/websockets-16.0-cp312-cp312-win_amd64.whl", hash = "sha256:5569417dc80977fc8c2d43a86f78e0a5a22fee17565d78621b6bb264a115d4ea", size = 178693, upload-time = "2026-01-10T09:22:57.478Z" },
+    { url = "https://files.pythonhosted.org/packages/cc/9c/baa8456050d1c1b08dd0ec7346026668cbc6f145ab4e314d707bb845bf0d/websockets-16.0-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:878b336ac47938b474c8f982ac2f7266a540adc3fa4ad74ae96fea9823a02cc9", size = 177364, upload-time = "2026-01-10T09:22:59.333Z" },
+    { url = "https://files.pythonhosted.org/packages/7e/0c/8811fc53e9bcff68fe7de2bcbe75116a8d959ac699a3200f4847a8925210/websockets-16.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:52a0fec0e6c8d9a784c2c78276a48a2bdf099e4ccc2a4cad53b27718dbfd0230", size = 175039, upload-time = "2026-01-10T09:23:01.171Z" },
+    { url = "https://files.pythonhosted.org/packages/aa/82/39a5f910cb99ec0b59e482971238c845af9220d3ab9fa76dd9162cda9d62/websockets-16.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:e6578ed5b6981005df1860a56e3617f14a6c307e6a71b4fff8c48fdc50f3ed2c", size = 175323, upload-time = "2026-01-10T09:23:02.341Z" },
+    { url = "https://files.pythonhosted.org/packages/bd/28/0a25ee5342eb5d5f297d992a77e56892ecb65e7854c7898fb7d35e9b33bd/websockets-16.0-cp313-cp313-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:95724e638f0f9c350bb1c2b0a7ad0e83d9cc0c9259f3ea94e40d7b02a2179ae5", size = 184975, upload-time = "2026-01-10T09:23:03.756Z" },
+    { url = "https://files.pythonhosted.org/packages/f9/66/27ea52741752f5107c2e41fda05e8395a682a1e11c4e592a809a90c6a506/websockets-16.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c0204dc62a89dc9d50d682412c10b3542d748260d743500a85c13cd1ee4bde82", size = 186203, upload-time = "2026-01-10T09:23:05.01Z" },
+    { url = "https://files.pythonhosted.org/packages/37/e5/8e32857371406a757816a2b471939d51c463509be73fa538216ea52b792a/websockets-16.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:52ac480f44d32970d66763115edea932f1c5b1312de36df06d6b219f6741eed8", size = 185653, upload-time = "2026-01-10T09:23:06.301Z" },
+    { url = "https://files.pythonhosted.org/packages/9b/67/f926bac29882894669368dc73f4da900fcdf47955d0a0185d60103df5737/websockets-16.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:6e5a82b677f8f6f59e8dfc34ec06ca6b5b48bc4fcda346acd093694cc2c24d8f", size = 184920, upload-time = "2026-01-10T09:23:07.492Z" },
+    { url = "https://files.pythonhosted.org/packages/3c/a1/3d6ccdcd125b0a42a311bcd15a7f705d688f73b2a22d8cf1c0875d35d34a/websockets-16.0-cp313-cp313-win32.whl", hash = "sha256:abf050a199613f64c886ea10f38b47770a65154dc37181bfaff70c160f45315a", size = 178255, upload-time = "2026-01-10T09:23:09.245Z" },
+    { url = "https://files.pythonhosted.org/packages/6b/ae/90366304d7c2ce80f9b826096a9e9048b4bb760e44d3b873bb272cba696b/websockets-16.0-cp313-cp313-win_amd64.whl", hash = "sha256:3425ac5cf448801335d6fdc7ae1eb22072055417a96cc6b31b3861f455fbc156", size = 178689, upload-time = "2026-01-10T09:23:10.483Z" },
+    { url = "https://files.pythonhosted.org/packages/f3/1d/e88022630271f5bd349ed82417136281931e558d628dd52c4d8621b4a0b2/websockets-16.0-cp314-cp314-macosx_10_15_universal2.whl", hash = "sha256:8cc451a50f2aee53042ac52d2d053d08bf89bcb31ae799cb4487587661c038a0", size = 177406, upload-time = "2026-01-10T09:23:12.178Z" },
+    { url = "https://files.pythonhosted.org/packages/f2/78/e63be1bf0724eeb4616efb1ae1c9044f7c3953b7957799abb5915bffd38e/websockets-16.0-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:daa3b6ff70a9241cf6c7fc9e949d41232d9d7d26fd3522b1ad2b4d62487e9904", size = 175085, upload-time = "2026-01-10T09:23:13.511Z" },
+    { url = "https://files.pythonhosted.org/packages/bb/f4/d3c9220d818ee955ae390cf319a7c7a467beceb24f05ee7aaaa2414345ba/websockets-16.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:fd3cb4adb94a2a6e2b7c0d8d05cb94e6f1c81a0cf9dc2694fb65c7e8d94c42e4", size = 175328, upload-time = "2026-01-10T09:23:14.727Z" },
+    { url = "https://files.pythonhosted.org/packages/63/bc/d3e208028de777087e6fb2b122051a6ff7bbcca0d6df9d9c2bf1dd869ae9/websockets-16.0-cp314-cp314-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:781caf5e8eee67f663126490c2f96f40906594cb86b408a703630f95550a8c3e", size = 185044, upload-time = "2026-01-10T09:23:15.939Z" },
+    { url = "https://files.pythonhosted.org/packages/ad/6e/9a0927ac24bd33a0a9af834d89e0abc7cfd8e13bed17a86407a66773cc0e/websockets-16.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:caab51a72c51973ca21fa8a18bd8165e1a0183f1ac7066a182ff27107b71e1a4", size = 186279, upload-time = "2026-01-10T09:23:17.148Z" },
+    { url = "https://files.pythonhosted.org/packages/b9/ca/bf1c68440d7a868180e11be653c85959502efd3a709323230314fda6e0b3/websockets-16.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:19c4dc84098e523fd63711e563077d39e90ec6702aff4b5d9e344a60cb3c0cb1", size = 185711, upload-time = "2026-01-10T09:23:18.372Z" },
+    { url = "https://files.pythonhosted.org/packages/c4/f8/fdc34643a989561f217bb477cbc47a3a07212cbda91c0e4389c43c296ebf/websockets-16.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:a5e18a238a2b2249c9a9235466b90e96ae4795672598a58772dd806edc7ac6d3", size = 184982, upload-time = "2026-01-10T09:23:19.652Z" },
+    { url = "https://files.pythonhosted.org/packages/dd/d1/574fa27e233764dbac9c52730d63fcf2823b16f0856b3329fc6268d6ae4f/websockets-16.0-cp314-cp314-win32.whl", hash = "sha256:a069d734c4a043182729edd3e9f247c3b2a4035415a9172fd0f1b71658a320a8", size = 177915, upload-time = "2026-01-10T09:23:21.458Z" },
+    { url = "https://files.pythonhosted.org/packages/8a/f1/ae6b937bf3126b5134ce1f482365fde31a357c784ac51852978768b5eff4/websockets-16.0-cp314-cp314-win_amd64.whl", hash = "sha256:c0ee0e63f23914732c6d7e0cce24915c48f3f1512ec1d079ed01fc629dab269d", size = 178381, upload-time = "2026-01-10T09:23:22.715Z" },
+    { url = "https://files.pythonhosted.org/packages/06/9b/f791d1db48403e1f0a27577a6beb37afae94254a8c6f08be4a23e4930bc0/websockets-16.0-cp314-cp314t-macosx_10_15_universal2.whl", hash = "sha256:a35539cacc3febb22b8f4d4a99cc79b104226a756aa7400adc722e83b0d03244", size = 177737, upload-time = "2026-01-10T09:23:24.523Z" },
+    { url = "https://files.pythonhosted.org/packages/bd/40/53ad02341fa33b3ce489023f635367a4ac98b73570102ad2cdd770dacc9a/websockets-16.0-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:b784ca5de850f4ce93ec85d3269d24d4c82f22b7212023c974c401d4980ebc5e", size = 175268, upload-time = "2026-01-10T09:23:25.781Z" },
+    { url = "https://files.pythonhosted.org/packages/74/9b/6158d4e459b984f949dcbbb0c5d270154c7618e11c01029b9bbd1bb4c4f9/websockets-16.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:569d01a4e7fba956c5ae4fc988f0d4e187900f5497ce46339c996dbf24f17641", size = 175486, upload-time = "2026-01-10T09:23:27.033Z" },
+    { url = "https://files.pythonhosted.org/packages/e5/2d/7583b30208b639c8090206f95073646c2c9ffd66f44df967981a64f849ad/websockets-16.0-cp314-cp314t-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:50f23cdd8343b984957e4077839841146f67a3d31ab0d00e6b824e74c5b2f6e8", size = 185331, upload-time = "2026-01-10T09:23:28.259Z" },
+    { url = "https://files.pythonhosted.org/packages/45/b0/cce3784eb519b7b5ad680d14b9673a31ab8dcb7aad8b64d81709d2430aa8/websockets-16.0-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:152284a83a00c59b759697b7f9e9cddf4e3c7861dd0d964b472b70f78f89e80e", size = 186501, upload-time = "2026-01-10T09:23:29.449Z" },
+    { url = "https://files.pythonhosted.org/packages/19/60/b8ebe4c7e89fb5f6cdf080623c9d92789a53636950f7abacfc33fe2b3135/websockets-16.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:bc59589ab64b0022385f429b94697348a6a234e8ce22544e3681b2e9331b5944", size = 186062, upload-time = "2026-01-10T09:23:31.368Z" },
+    { url = "https://files.pythonhosted.org/packages/88/a8/a080593f89b0138b6cba1b28f8df5673b5506f72879322288b031337c0b8/websockets-16.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:32da954ffa2814258030e5a57bc73a3635463238e797c7375dc8091327434206", size = 185356, upload-time = "2026-01-10T09:23:32.627Z" },
+    { url = "https://files.pythonhosted.org/packages/c2/b6/b9afed2afadddaf5ebb2afa801abf4b0868f42f8539bfe4b071b5266c9fe/websockets-16.0-cp314-cp314t-win32.whl", hash = "sha256:5a4b4cc550cb665dd8a47f868c8d04c8230f857363ad3c9caf7a0c3bf8c61ca6", size = 178085, upload-time = "2026-01-10T09:23:33.816Z" },
+    { url = "https://files.pythonhosted.org/packages/9f/3e/28135a24e384493fa804216b79a6a6759a38cc4ff59118787b9fb693df93/websockets-16.0-cp314-cp314t-win_amd64.whl", hash = "sha256:b14dc141ed6d2dde437cddb216004bcac6a1df0935d79656387bd41632ba0bbd", size = 178531, upload-time = "2026-01-10T09:23:35.016Z" },
+    { url = "https://files.pythonhosted.org/packages/72/07/c98a68571dcf256e74f1f816b8cc5eae6eb2d3d5cfa44d37f801619d9166/websockets-16.0-pp311-pypy311_pp73-macosx_10_15_x86_64.whl", hash = "sha256:349f83cd6c9a415428ee1005cadb5c2c56f4389bc06a9af16103c3bc3dcc8b7d", size = 174947, upload-time = "2026-01-10T09:23:36.166Z" },
+    { url = "https://files.pythonhosted.org/packages/7e/52/93e166a81e0305b33fe416338be92ae863563fe7bce446b0f687b9df5aea/websockets-16.0-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:4a1aba3340a8dca8db6eb5a7986157f52eb9e436b74813764241981ca4888f03", size = 175260, upload-time = "2026-01-10T09:23:37.409Z" },
+    { url = "https://files.pythonhosted.org/packages/56/0c/2dbf513bafd24889d33de2ff0368190a0e69f37bcfa19009ef819fe4d507/websockets-16.0-pp311-pypy311_pp73-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:f4a32d1bd841d4bcbffdcb3d2ce50c09c3909fbead375ab28d0181af89fd04da", size = 176071, upload-time = "2026-01-10T09:23:39.158Z" },
+    { url = "https://files.pythonhosted.org/packages/a5/8f/aea9c71cc92bf9b6cc0f7f70df8f0b420636b6c96ef4feee1e16f80f75dd/websockets-16.0-pp311-pypy311_pp73-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:0298d07ee155e2e9fda5be8a9042200dd2e3bb0b8a38482156576f863a9d457c", size = 176968, upload-time = "2026-01-10T09:23:41.031Z" },
+    { url = "https://files.pythonhosted.org/packages/9a/3f/f70e03f40ffc9a30d817eef7da1be72ee4956ba8d7255c399a01b135902a/websockets-16.0-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:a653aea902e0324b52f1613332ddf50b00c06fdaf7e92624fbf8c77c78fa5767", size = 178735, upload-time = "2026-01-10T09:23:42.259Z" },
+    { url = "https://files.pythonhosted.org/packages/6f/28/258ebab549c2bf3e64d2b0217b973467394a9cea8c42f70418ca2c5d0d2e/websockets-16.0-py3-none-any.whl", hash = "sha256:1637db62fad1dc833276dded54215f2c7fa46912301a24bd94d45d46a011ceec", size = 171598, upload-time = "2026-01-10T09:23:45.395Z" },
+]
+
+[[package]]
+name = "zipp"
+version = "3.23.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/e3/02/0f2892c661036d50ede074e376733dca2ae7c6eb617489437771209d4180/zipp-3.23.0.tar.gz", hash = "sha256:a07157588a12518c9d4034df3fbbee09c814741a33ff63c05fa29d26a2404166", size = 25547, upload-time = "2025-06-08T17:06:39.4Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/2e/54/647ade08bf0db230bfea292f893923872fd20be6ac6f53b2b936ba839d75/zipp-3.23.0-py3-none-any.whl", hash = "sha256:071652d6115ed432f5ce1d34c336c0adfd6a884660d1e9712a256d3d3bd4b14e", size = 10276, upload-time = "2025-06-08T17:06:38.034Z" },
+]