From 6b3e1a278314703b6ec825b8661c002491fc5b3f Mon Sep 17 00:00:00 2001
From: Noah Horton <noah@unsupervised.com>
Date: Tue, 3 Feb 2026 12:14:47 -0700
Subject: [PATCH 01/45] Removed rules

---
 .claude/settings.json                         |  42 -
 .claude/skills/add_platform.verify/SKILL.md   |   2 -
 .../skills/deepwork_jobs.implement/SKILL.md   |  63 --
 .claude/skills/deepwork_rules.define/SKILL.md | 331 ------
 .claude/skills/deepwork_rules/SKILL.md        |  83 --
 .../SKILL.md                                  | 253 -----
 .claude/skills/manual_tests.reset/SKILL.md    | 143 ---
 .../manual_tests.run_fire_tests/SKILL.md      | 252 -----
 .../manual_tests.run_not_fire_tests/SKILL.md  | 238 -----
 .claude/skills/manual_tests/SKILL.md          | 102 --
 .claude/skills/update.job/SKILL.md            |   4 +-
 .claude/skills/update/SKILL.md                |   6 +-
 .deepwork/jobs/add_platform/steps/verify.md   |   2 -
 .../jobs/deepwork_jobs/steps/implement.md     |  63 --
 .../hooks/capture_prompt_work_tree.sh         |  38 -
 .../deepwork_rules/hooks/global_hooks.yml     |   8 -
 .../hooks/user_prompt_submit.sh               |  16 -
 .deepwork/jobs/deepwork_rules/job.yml         |  49 -
 .deepwork/jobs/deepwork_rules/rules/.gitkeep  |  13 -
 .../rules/api-documentation-sync.md.example   |  10 -
 .../rules/readme-documentation.md.example     |  10 -
 .../rules/security-review.md.example          |  11 -
 .../rules/skill-md-validation.md              |  46 -
 .../rules/source-test-pairing.md.example      |  13 -
 .deepwork/jobs/deepwork_rules/steps/define.md | 249 -----
 .deepwork/jobs/manual_tests/job.yml           | 131 ---
 .../steps/infinite_block_tests.md             | 136 ---
 .deepwork/jobs/manual_tests/steps/reset.md    |  38 -
 .../jobs/manual_tests/steps/run_fire_tests.md | 132 ---
 .../manual_tests/steps/run_not_fire_tests.md  | 118 ---
 .../jobs/manual_tests/steps/test_reference.md |  92 --
 .deepwork/jobs/update/job.yml                 |   4 +-
 .../architecture-documentation-accuracy.md    |  11 -
 .deepwork/rules/manual-test-command-action.md |  19 -
 .deepwork/rules/manual-test-created-mode.md   |  22 -
 .../manual-test-infinite-block-command.md     |  41 -
 .../manual-test-infinite-block-prompt.md      |  34 -
 .deepwork/rules/manual-test-multi-safety.md   |  25 -
 .deepwork/rules/manual-test-pair-mode.md      |  26 -
 .deepwork/rules/manual-test-set-mode.md       |  26 -
 .deepwork/rules/manual-test-trigger-safety.md |  21 -
 .../new-standard-job-warning.md.disabled      |  16 -
 .deepwork/rules/readme-accuracy.md            |  11 -
 .../rules/skill-template-best-practices.md    |  46 -
 .../rules/standard-jobs-source-of-truth.md    |  25 -
 .deepwork/rules/uv-lock-sync.md               |  15 -
 .../rules/version-and-changelog-update.md     |  29 -
 .gemini/skills/add_platform/verify.toml       |   2 -
 .gemini/skills/deepwork_jobs/implement.toml   |  63 --
 .gemini/skills/deepwork_rules/define.toml     | 327 ------
 .gemini/skills/deepwork_rules/index.toml      |  73 --
 .gemini/skills/manual_tests/index.toml        |  94 --
 .../manual_tests/infinite_block_tests.toml    | 238 -----
 .gemini/skills/manual_tests/reset.toml        | 128 ---
 .../skills/manual_tests/run_fire_tests.toml   | 237 -----
 .../manual_tests/run_not_fire_tests.toml      | 223 ----
 .gemini/skills/update/index.toml              |   8 +-
 .gemini/skills/update/job.toml                |   4 +-
 AGENTS.md                                     |   5 +-
 README.md                                     |  26 +-
 claude.md                                     |   7 +-
 doc/architecture.md                           | 272 +----
 doc/rules_syntax.md                           | 687 ------------
 doc/rules_system_design.md                    | 569 ----------
 manual_tests/README.md                        |  43 -
 .../test_command_action.txt                   |  25 -
 .../test_command_action_log.txt               |   3 -
 .../test_created_mode/existing_file.yml       |   1 -
 .../test_infinite_block_command.py            |  42 -
 .../test_infinite_block_prompt.py             |  57 -
 .../test_multi_safety/test_multi_safety.py    |  42 -
 .../test_multi_safety_changelog.md            |  16 -
 .../test_multi_safety_version.txt             |  10 -
 .../test_pair_mode/test_pair_mode_expected.md |  31 -
 .../test_pair_mode/test_pair_mode_trigger.py  |  47 -
 .../test_set_mode/test_set_mode_source.py     |  40 -
 .../test_set_mode/test_set_mode_test.py       |  37 -
 .../test_trigger_safety_mode.py               |  32 -
 .../test_trigger_safety_mode_doc.md           |  20 -
 src/deepwork/cli/install.py                   | 104 --
 src/deepwork/cli/main.py                      |   2 -
 src/deepwork/cli/rules.py                     |  32 -
 src/deepwork/core/command_executor.py         | 190 ----
 src/deepwork/core/pattern_matcher.py          | 271 -----
 src/deepwork/core/rules_parser.py             | 559 ----------
 src/deepwork/core/rules_queue.py              | 321 ------
 src/deepwork/hooks/README.md                  |  44 -
 src/deepwork/hooks/rules_check.py             | 759 -------------
 src/deepwork/schemas/rules_schema.py          | 135 ---
 .../deepwork_jobs/steps/implement.md          |  63 --
 .../hooks/capture_prompt_work_tree.sh         |  38 -
 .../deepwork_rules/hooks/global_hooks.yml     |   8 -
 .../hooks/user_prompt_submit.sh               |  16 -
 .../standard_jobs/deepwork_rules/job.yml      |  49 -
 .../deepwork_rules/rules/.gitkeep             |  13 -
 .../rules/api-documentation-sync.md.example   |  10 -
 .../rules/readme-documentation.md.example     |  10 -
 .../rules/security-review.md.example          |  11 -
 .../rules/skill-md-validation.md              |  46 -
 .../rules/source-test-pairing.md.example      |  13 -
 .../deepwork_rules/steps/define.md            | 249 -----
 tests/integration/test_install_flow.py        |  57 -
 .../integration/test_install_requirements.py  |  59 +-
 tests/shell_script_tests/conftest.py          |  46 -
 .../test_capture_prompt_work_tree.py          | 257 -----
 tests/shell_script_tests/test_hooks.py        | 348 ------
 .../test_rules_stop_hook.py                   | 481 ---------
 .../test_user_prompt_submit.py                | 166 ---
 tests/unit/test_command_executor.py           | 264 -----
 tests/unit/test_hooks_syncer.py               |  12 +-
 tests/unit/test_pattern_matcher.py            | 205 ----
 tests/unit/test_rules_check.py                | 105 --
 tests/unit/test_rules_parser.py               | 995 ------------------
 tests/unit/test_rules_queue.py                | 349 ------
 tests/unit/test_schema_validation.py          | 360 -------
 115 files changed, 56 insertions(+), 13065 deletions(-)
 delete mode 100644 .claude/skills/deepwork_rules.define/SKILL.md
 delete mode 100644 .claude/skills/deepwork_rules/SKILL.md
 delete mode 100644 .claude/skills/manual_tests.infinite_block_tests/SKILL.md
 delete mode 100644 .claude/skills/manual_tests.reset/SKILL.md
 delete mode 100644 .claude/skills/manual_tests.run_fire_tests/SKILL.md
 delete mode 100644 .claude/skills/manual_tests.run_not_fire_tests/SKILL.md
 delete mode 100644 .claude/skills/manual_tests/SKILL.md
 delete mode 100755 .deepwork/jobs/deepwork_rules/hooks/capture_prompt_work_tree.sh
 delete mode 100644 .deepwork/jobs/deepwork_rules/hooks/global_hooks.yml
 delete mode 100755 .deepwork/jobs/deepwork_rules/hooks/user_prompt_submit.sh
 delete mode 100644 .deepwork/jobs/deepwork_rules/job.yml
 delete mode 100644 .deepwork/jobs/deepwork_rules/rules/.gitkeep
 delete mode 100644 .deepwork/jobs/deepwork_rules/rules/api-documentation-sync.md.example
 delete mode 100644 .deepwork/jobs/deepwork_rules/rules/readme-documentation.md.example
 delete mode 100644 .deepwork/jobs/deepwork_rules/rules/security-review.md.example
 delete mode 100644 .deepwork/jobs/deepwork_rules/rules/skill-md-validation.md
 delete mode 100644 .deepwork/jobs/deepwork_rules/rules/source-test-pairing.md.example
 delete mode 100644 .deepwork/jobs/deepwork_rules/steps/define.md
 delete mode 100644 .deepwork/jobs/manual_tests/job.yml
 delete mode 100644 .deepwork/jobs/manual_tests/steps/infinite_block_tests.md
 delete mode 100644 .deepwork/jobs/manual_tests/steps/reset.md
 delete mode 100644 .deepwork/jobs/manual_tests/steps/run_fire_tests.md
 delete mode 100644 .deepwork/jobs/manual_tests/steps/run_not_fire_tests.md
 delete mode 100644 .deepwork/jobs/manual_tests/steps/test_reference.md
 delete mode 100644 .deepwork/rules/architecture-documentation-accuracy.md
 delete mode 100644 .deepwork/rules/manual-test-command-action.md
 delete mode 100644 .deepwork/rules/manual-test-created-mode.md
 delete mode 100644 .deepwork/rules/manual-test-infinite-block-command.md
 delete mode 100644 .deepwork/rules/manual-test-infinite-block-prompt.md
 delete mode 100644 .deepwork/rules/manual-test-multi-safety.md
 delete mode 100644 .deepwork/rules/manual-test-pair-mode.md
 delete mode 100644 .deepwork/rules/manual-test-set-mode.md
 delete mode 100644 .deepwork/rules/manual-test-trigger-safety.md
 delete mode 100644 .deepwork/rules/new-standard-job-warning.md.disabled
 delete mode 100644 .deepwork/rules/readme-accuracy.md
 delete mode 100644 .deepwork/rules/skill-template-best-practices.md
 delete mode 100644 .deepwork/rules/standard-jobs-source-of-truth.md
 delete mode 100644 .deepwork/rules/uv-lock-sync.md
 delete mode 100644 .deepwork/rules/version-and-changelog-update.md
 delete mode 100644 .gemini/skills/deepwork_rules/define.toml
 delete mode 100644 .gemini/skills/deepwork_rules/index.toml
 delete mode 100644 .gemini/skills/manual_tests/index.toml
 delete mode 100644 .gemini/skills/manual_tests/infinite_block_tests.toml
 delete mode 100644 .gemini/skills/manual_tests/reset.toml
 delete mode 100644 .gemini/skills/manual_tests/run_fire_tests.toml
 delete mode 100644 .gemini/skills/manual_tests/run_not_fire_tests.toml
 delete mode 100644 doc/rules_syntax.md
 delete mode 100644 doc/rules_system_design.md
 delete mode 100644 manual_tests/README.md
 delete mode 100644 manual_tests/test_command_action/test_command_action.txt
 delete mode 100644 manual_tests/test_command_action/test_command_action_log.txt
 delete mode 100644 manual_tests/test_created_mode/existing_file.yml
 delete mode 100644 manual_tests/test_infinite_block_command/test_infinite_block_command.py
 delete mode 100644 manual_tests/test_infinite_block_prompt/test_infinite_block_prompt.py
 delete mode 100644 manual_tests/test_multi_safety/test_multi_safety.py
 delete mode 100644 manual_tests/test_multi_safety/test_multi_safety_changelog.md
 delete mode 100644 manual_tests/test_multi_safety/test_multi_safety_version.txt
 delete mode 100644 manual_tests/test_pair_mode/test_pair_mode_expected.md
 delete mode 100644 manual_tests/test_pair_mode/test_pair_mode_trigger.py
 delete mode 100644 manual_tests/test_set_mode/test_set_mode_source.py
 delete mode 100644 manual_tests/test_set_mode/test_set_mode_test.py
 delete mode 100644 manual_tests/test_trigger_safety_mode/test_trigger_safety_mode.py
 delete mode 100644 manual_tests/test_trigger_safety_mode/test_trigger_safety_mode_doc.md
 delete mode 100644 src/deepwork/cli/rules.py
 delete mode 100644 src/deepwork/core/command_executor.py
 delete mode 100644 src/deepwork/core/pattern_matcher.py
 delete mode 100644 src/deepwork/core/rules_parser.py
 delete mode 100644 src/deepwork/core/rules_queue.py
 delete mode 100644 src/deepwork/hooks/rules_check.py
 delete mode 100644 src/deepwork/schemas/rules_schema.py
 delete mode 100755 src/deepwork/standard_jobs/deepwork_rules/hooks/capture_prompt_work_tree.sh
 delete mode 100644 src/deepwork/standard_jobs/deepwork_rules/hooks/global_hooks.yml
 delete mode 100755 src/deepwork/standard_jobs/deepwork_rules/hooks/user_prompt_submit.sh
 delete mode 100644 src/deepwork/standard_jobs/deepwork_rules/job.yml
 delete mode 100644 src/deepwork/standard_jobs/deepwork_rules/rules/.gitkeep
 delete mode 100644 src/deepwork/standard_jobs/deepwork_rules/rules/api-documentation-sync.md.example
 delete mode 100644 src/deepwork/standard_jobs/deepwork_rules/rules/readme-documentation.md.example
 delete mode 100644 src/deepwork/standard_jobs/deepwork_rules/rules/security-review.md.example
 delete mode 100644 src/deepwork/standard_jobs/deepwork_rules/rules/skill-md-validation.md
 delete mode 100644 src/deepwork/standard_jobs/deepwork_rules/rules/source-test-pairing.md.example
 delete mode 100644 src/deepwork/standard_jobs/deepwork_rules/steps/define.md
 delete mode 100644 tests/shell_script_tests/test_capture_prompt_work_tree.py
 delete mode 100644 tests/shell_script_tests/test_rules_stop_hook.py
 delete mode 100644 tests/shell_script_tests/test_user_prompt_submit.py
 delete mode 100644 tests/unit/test_command_executor.py
 delete mode 100644 tests/unit/test_pattern_matcher.py
 delete mode 100644 tests/unit/test_rules_check.py
 delete mode 100644 tests/unit/test_rules_parser.py
 delete mode 100644 tests/unit/test_rules_queue.py
 delete mode 100644 tests/unit/test_schema_validation.py

diff --git a/.claude/settings.json b/.claude/settings.json
index cf4e3c4c..bb150fb3 100644
--- a/.claude/settings.json
+++ b/.claude/settings.json
@@ -115,15 +115,6 @@
       "Skill(add_platform.verify)",
       "Skill(update)",
       "Skill(update.job)",
-      "Skill(manual_tests)",
-      "Skill(manual_tests.run_not_fire_tests)",
-      "Skill(manual_tests.run_fire_tests)",
-      "Skill(deepwork_rules)",
-      "Skill(deepwork_rules.define)",
-      "Bash(deepwork rules clear_queue)",
-      "Bash(rm -rf .deepwork/tmp/rules/queue/*.json)",
-      "Skill(manual_tests.reset)",
-      "Skill(manual_tests.infinite_block_tests)",
       "Read(./.deepwork/**)",
       "Edit(./.deepwork/**)",
       "Write(./.deepwork/**)",
@@ -155,39 +146,6 @@
           }
         ]
       }
-    ],
-    "UserPromptSubmit": [
-      {
-        "matcher": "",
-        "hooks": [
-          {
-            "type": "command",
-            "command": ".deepwork/jobs/deepwork_rules/hooks/user_prompt_submit.sh"
-          }
-        ]
-      }
-    ],
-    "Stop": [
-      {
-        "matcher": "",
-        "hooks": [
-          {
-            "type": "command",
-            "command": "deepwork hook rules_check"
-          }
-        ]
-      }
-    ],
-    "SubagentStop": [
-      {
-        "matcher": "",
-        "hooks": [
-          {
-            "type": "command",
-            "command": "deepwork hook rules_check"
-          }
-        ]
-      }
     ]
   }
 }
\ No newline at end of file
diff --git a/.claude/skills/add_platform.verify/SKILL.md b/.claude/skills/add_platform.verify/SKILL.md
index 27101223..debe5a19 100644
--- a/.claude/skills/add_platform.verify/SKILL.md
+++ b/.claude/skills/add_platform.verify/SKILL.md
@@ -76,7 +76,6 @@ Ensure the implementation step is complete:
    - `deepwork_jobs.define.md` exists (or equivalent for the platform)
    - `deepwork_jobs.implement.md` exists
    - `deepwork_jobs.refine.md` exists
-   - `deepwork_rules.define.md` exists
    - All expected step commands exist
 
 4. **Validate command file content**
@@ -106,7 +105,6 @@ Ensure the implementation step is complete:
 - `deepwork install --platform <platform_name>` completes without errors
 - All expected command files are created:
   - deepwork_jobs.define, implement, refine
-  - deepwork_rules.define
   - Any other standard job commands
 - Command file content is correct:
   - Matches platform's expected format
diff --git a/.claude/skills/deepwork_jobs.implement/SKILL.md b/.claude/skills/deepwork_jobs.implement/SKILL.md
index a0c1d388..f5494ae7 100644
--- a/.claude/skills/deepwork_jobs.implement/SKILL.md
+++ b/.claude/skills/deepwork_jobs.implement/SKILL.md
@@ -150,66 +150,6 @@ This will:
 - Generate skills for each step
 - Make the skills available in `.claude/skills/` (or appropriate platform directory)
 
-### Step 6: Consider Rules for the New Job
-
-After implementing the job, consider whether there are **rules** that would help enforce quality or consistency when working with this job's domain.
-
-**What are rules?**
-
-Rules are automated guardrails stored as markdown files in `.deepwork/rules/` that trigger when certain files change during an AI session. They help ensure:
-- Documentation stays in sync with code
-- Team guidelines are followed
-- Architectural decisions are respected
-- Quality standards are maintained
-
-**When to suggest rules:**
-
-Think about the job you just implemented and ask:
-- Does this job produce outputs that other files depend on?
-- Are there documentation files that should be updated when this job's outputs change?
-- Are there quality checks or reviews that should happen when certain files in this domain change?
-- Could changes to the job's output files impact other parts of the project?
-
-**Examples of rules that might make sense:**
-
-| Job Type | Potential Rule |
-|----------|----------------|
-| API Design | "Update API docs when endpoint definitions change" |
-| Database Schema | "Review migrations when schema files change" |
-| Competitive Research | "Update strategy docs when competitor analysis changes" |
-| Feature Development | "Update changelog when feature files change" |
-| Configuration Management | "Update install guide when config files change" |
-
-**How to offer rule creation:**
-
-If you identify one or more rules that would benefit the user, explain:
-1. **What the rule would do** - What triggers it and what action it prompts
-2. **Why it would help** - How it prevents common mistakes or keeps things in sync
-3. **What files it would watch** - The trigger patterns
-
-Then ask the user:
-
-> "Would you like me to create this rule for you? I can run `/deepwork_rules.define` to set it up."
-
-If the user agrees, invoke the `/deepwork_rules.define` command to guide them through creating the rule.
-
-**Example dialogue:**
-
-```
-Based on the competitive_research job you just created, I noticed that when
-competitor analysis files change, it would be helpful to remind you to update
-your strategy documentation.
-
-I'd suggest a rule like:
-- **Name**: "Update strategy when competitor analysis changes"
-- **Trigger**: `**/positioning_report.md`
-- **Action**: Prompt to review and update `docs/strategy.md`
-
-Would you like me to create this rule? I can run `/deepwork_rules.define` to set it up.
-```
-
-**Note:** Not every job needs rules. Only suggest them when they would genuinely help maintain consistency or quality. Don't force rules where they don't make sense.
-
 ## Example Implementation
 
 For a complete worked example showing a job.yml and corresponding step instruction file, see:
@@ -241,8 +181,6 @@ Before marking this step complete, ensure:
 - [ ] Each instruction file is complete and actionable
 - [ ] `deepwork sync` executed successfully
 - [ ] Skills generated in platform directory
-- [ ] Considered whether rules would benefit this job (Step 6)
-- [ ] If rules suggested, offered to run `/deepwork_rules.define`
 
 ## Quality Criteria
 
@@ -254,7 +192,6 @@ Before marking this step complete, ensure:
 - Steps with user inputs explicitly use "ask structured questions" phrasing
 - Sync completed successfully
 - Skills available for use
-- Thoughtfully considered relevant rules for the job domain
 
 
 ### Job Context
diff --git a/.claude/skills/deepwork_rules.define/SKILL.md b/.claude/skills/deepwork_rules.define/SKILL.md
deleted file mode 100644
index 6a33878c..00000000
--- a/.claude/skills/deepwork_rules.define/SKILL.md
+++ /dev/null
@@ -1,331 +0,0 @@
----
-name: deepwork_rules.define
-description: "Creates a rule file that triggers when specified files change. Use when setting up documentation sync, code review requirements, or automated commands."
-user-invocable: false
-
----
-
-# deepwork_rules.define
-
-**Standalone skill** - can be run anytime
-
-> Creates file-change rules that enforce guidelines during AI sessions. Use when automating documentation sync or code review triggers.
-
-
-## Instructions
-
-**Goal**: Creates a rule file that triggers when specified files change. Use when setting up documentation sync, code review requirements, or automated commands.
-
-# Define Rule
-
-## Objective
-
-Create a new rule file in the `.deepwork/rules/` directory to enforce team guidelines, documentation requirements, or other constraints when specific files change.
-
-## Task
-
-Guide the user through defining a new rule by asking structured questions. **Do not create the rule without first understanding what they want to enforce.**
-
-**Important**: Use the AskUserQuestion tool to ask structured questions when gathering information from the user. This provides a better user experience with clear options and guided choices.
-
-### Step 1: Understand the Rule Purpose
-
-Start by asking structured questions to understand what the user wants to enforce:
-
-1. **What guideline or constraint should this rule enforce?**
-   - What situation triggers the need for action?
-   - What files or directories, when changed, should trigger this rule?
-   - Examples: "When config files change", "When API code changes", "When database schema changes"
-
-2. **What action should be taken?**
-   - What should the agent do when the rule triggers?
-   - Update documentation? Perform a security review? Update tests?
-   - Is there a specific file or process that needs attention?
-
-3. **Are there any "safety" conditions?**
-   - Are there files that, if also changed, mean the rule doesn't need to fire?
-   - For example: If config changes AND install_guide.md changes, assume docs are already updated
-   - This prevents redundant prompts when the user has already done the right thing
-
-### Step 2: Choose the Detection Mode
-
-Help the user select the appropriate detection mode:
-
-**Trigger/Safety Mode** (most common):
-- Fires when trigger patterns match AND no safety patterns match
-- Use for: "When X changes, check Y" rules
-- Example: When config changes, verify install docs
-
-**Set Mode** (bidirectional correspondence):
-- Fires when files that should change together don't all change
-- Use for: Source/test pairing, model/migration sync
-- Example: `src/foo.py` and `tests/foo_test.py` should change together
-
-**Pair Mode** (directional correspondence):
-- Fires when a trigger file changes but expected files don't
-- Changes to expected files alone do NOT trigger
-- Use for: API code requires documentation updates (but docs can update independently)
-
-### Step 3: Define the Patterns
-
-Help the user define glob patterns for files.
-
-**Common patterns:**
-- `src/**/*.py` - All Python files in src directory (recursive)
-- `app/config/**/*` - All files in app/config directory
-- `*.md` - All markdown files in root
-- `src/api/**/*` - All files in the API directory
-- `migrations/**/*.sql` - All SQL migrations
-
-**Variable patterns (for set/pair modes):**
-- `src/{path}.py` - Captures path variable (e.g., `foo/bar` from `src/foo/bar.py`)
-- `tests/{path}_test.py` - Uses same path variable in corresponding file
-- `{name}` matches single segment, `{path}` matches multiple segments
-
-**Pattern syntax:**
-- `*` - Matches any characters within a single path segment
-- `**` - Matches any characters across multiple path segments (recursive)
-- `?` - Matches a single character
-
-### Step 4: Choose the Comparison Mode (Optional)
-
-The `compare_to` field controls what baseline is used when detecting "changed files":
-
-**Options:**
-- `base` (default) - Compares to the base of the current branch (merge-base with main/master). Best for feature branches.
-- `default_tip` - Compares to the current tip of the default branch. Useful for seeing difference from production.
-- `prompt` - Compares to the state at the start of each prompt. For rules about very recent changes.
-
-Most rules should use the default (`base`) and don't need to specify `compare_to`.
-
-### Step 5: Write the Instructions
-
-Create clear, actionable instructions for what the agent should do when the rule fires.
-
-**Good instructions include:**
-- What to check or review
-- What files might need updating
-- Specific actions to take
-- Quality criteria for completion
-
-**Template variables available in instructions:**
-- `{trigger_files}` - Files that triggered the rule
-- `{expected_files}` - Expected corresponding files (for set/pair modes)
-
-### Step 6: Create the Rule File
-
-Create a new file in `.deepwork/rules/` with a kebab-case filename:
-
-**File Location**: `.deepwork/rules/{rule-name}.md`
-
-**Format for Trigger/Safety Mode:**
-```markdown
----
-name: Friendly Name for the Rule
-trigger: "glob/pattern/**/*"  # or array: ["pattern1", "pattern2"]
-safety: "optional/pattern"    # optional, or array
-compare_to: base              # optional: "base" (default), "default_tip", or "prompt"
----
-Instructions for the agent when this rule fires.
-
-Multi-line markdown content is supported.
-```
-
-**Format for Set Mode (bidirectional):**
-```markdown
----
-name: Source/Test Pairing
-set:
-  - src/{path}.py
-  - tests/{path}_test.py
----
-Source and test files should change together.
-
-Modified: {trigger_files}
-Expected: {expected_files}
-```
-
-**Format for Pair Mode (directional):**
-```markdown
----
-name: API Documentation
-pair:
-  trigger: api/{path}.py
-  expects: docs/api/{path}.md
----
-API code requires documentation updates.
-
-Changed API: {trigger_files}
-Update docs: {expected_files}
-```
-
-### Step 7: Verify the Rule
-
-After creating the rule:
-
-1. **Check the YAML frontmatter** - Ensure valid YAML formatting
-2. **Test trigger patterns** - Verify patterns match intended files
-3. **Review instructions** - Ensure they're clear and actionable
-4. **Check for conflicts** - Ensure the rule doesn't conflict with existing ones
-
-## Example Rules
-
-### Update Documentation on Config Changes
-`.deepwork/rules/config-docs.md`:
-```markdown
----
-name: Update Install Guide on Config Changes
-trigger: app/config/**/*
-safety: docs/install_guide.md
----
-Configuration files have been modified. Please review docs/install_guide.md
-and update it if any installation instructions need to change based on the
-new configuration.
-```
-
-### Security Review for Auth Code
-`.deepwork/rules/security-review.md`:
-```markdown
----
-name: Security Review for Authentication Changes
-trigger:
-  - src/auth/**/*
-  - src/security/**/*
-safety:
-  - SECURITY.md
-  - docs/security_audit.md
----
-Authentication or security code has been changed. Please:
-
-1. Review for hardcoded credentials or secrets
-2. Check input validation on user inputs
-3. Verify access control logic is correct
-4. Update security documentation if needed
-```
-
-### Source/Test Pairing
-`.deepwork/rules/source-test-pairing.md`:
-```markdown
----
-name: Source/Test Pairing
-set:
-  - src/{path}.py
-  - tests/{path}_test.py
----
-Source and test files should change together.
-
-When modifying source code, ensure corresponding tests are updated.
-When adding tests, ensure they test actual source code.
-
-Modified: {trigger_files}
-Expected: {expected_files}
-```
-
-### API Documentation Sync
-`.deepwork/rules/api-docs.md`:
-```markdown
----
-name: API Documentation Update
-pair:
-  trigger: src/api/{path}.py
-  expects: docs/api/{path}.md
----
-API code has changed. Please verify that API documentation in docs/api/
-is up to date with the code changes. Pay special attention to:
-
-- New or changed endpoints
-- Modified request/response schemas
-- Updated authentication requirements
-
-Changed API: {trigger_files}
-Update: {expected_files}
-```
-
-## Output Format
-
-### .deepwork/rules/{rule-name}.md
-Create a new file with the rule definition using YAML frontmatter and markdown body.
-
-## Quality Criteria
-
-- Asked structured questions to understand user requirements
-- Rule name is clear and descriptive (used in promise tags)
-- Correct detection mode selected for the use case
-- Patterns accurately match the intended files
-- Safety patterns prevent unnecessary triggering (if applicable)
-- Instructions are actionable and specific
-- YAML frontmatter is valid
-
-## Context
-
-Rules are evaluated automatically when the agent finishes a task. The system:
-1. Determines which files have changed based on each rule's `compare_to` setting
-2. Evaluates rules based on their detection mode (trigger/safety, set, or pair)
-3. Skips rules where the correspondence is satisfied (for set/pair) or safety matched
-4. Prompts you with instructions for any triggered rules
-
-You can mark a rule as addressed by including `<promise>Rule Name</promise>` in your response (replace Rule Name with the actual rule name from the `name` field). This tells the system you've already handled that rule's requirements.
-
-
-### Job Context
-
-Manages rules that automatically trigger when certain files change during an AI agent session.
-Rules help ensure that code changes follow team guidelines, documentation is updated,
-and architectural decisions are respected.
-
-IMPORTANT: Rules are evaluated at the "Stop" hook, which fires when an agent finishes its turn.
-This includes when sub-agents complete their work. Rules are NOT evaluated immediately after
-each file edit - they batch up and run once at the end of the agent's response cycle.
-- Command action rules: Execute their command (e.g., `uv sync`) when the agent stops
-- Prompt action rules: Display instructions to the agent, blocking until addressed
-
-Rules are stored as individual markdown files with YAML frontmatter in the `.deepwork/rules/`
-directory. Each rule file specifies:
-- Detection mode: trigger/safety, set (bidirectional), or pair (directional)
-- Patterns: Glob patterns for matching files, with optional variable capture
-- Action type: prompt (default) to show instructions, or command to run a shell command
-- Instructions: Markdown content describing what the agent should do
-
-Example use cases:
-- Update installation docs when configuration files change
-- Require security review when authentication code is modified
-- Ensure API documentation stays in sync with API code
-- Enforce source/test file pairing
-- Auto-run `uv sync` when pyproject.toml changes (command action)
-
-
-## Required Inputs
-
-**User Parameters** - Gather from user before starting:
-- **rule_purpose**: What guideline or constraint should this rule enforce?
-
-
-## Work Branch
-
-Use branch format: `deepwork/deepwork_rules-[instance]-YYYYMMDD`
-
-- If on a matching work branch: continue using it
-- If on main/master: create new branch with `git checkout -b deepwork/deepwork_rules-[instance]-$(date +%Y%m%d)`
-
-## Outputs
-
-**Required outputs**:
-- `.deepwork/rules/{rule-name}.md`
-
-## Guardrails
-
-- Do NOT skip prerequisite verification if this step has dependencies
-- Do NOT produce partial outputs; complete all required outputs before finishing
-- Do NOT proceed without required inputs; ask the user if any are missing
-- Do NOT modify files outside the scope of this step's defined outputs
-
-## On Completion
-
-1. Verify outputs are created
-2. Inform user: "define complete, outputs: .deepwork/rules/{rule-name}.md"
-
-This standalone skill can be re-run anytime.
-
----
-
-**Reference files**: `.deepwork/jobs/deepwork_rules/job.yml`, `.deepwork/jobs/deepwork_rules/steps/define.md`
\ No newline at end of file
diff --git a/.claude/skills/deepwork_rules/SKILL.md b/.claude/skills/deepwork_rules/SKILL.md
deleted file mode 100644
index 3de565a9..00000000
--- a/.claude/skills/deepwork_rules/SKILL.md
+++ /dev/null
@@ -1,83 +0,0 @@
----
-name: deepwork_rules
-description: "Creates file-change rules that enforce guidelines during AI sessions. Use when automating documentation sync or code review triggers."
----
-
-# deepwork_rules
-
-Creates file-change rules that enforce guidelines during AI sessions. Use when automating documentation sync or code review triggers.
-
-> **CRITICAL**: Always invoke steps using the Skill tool. Never copy/paste step instructions directly.
-
-Manages rules that automatically trigger when certain files change during an AI agent session.
-Rules help ensure that code changes follow team guidelines, documentation is updated,
-and architectural decisions are respected.
-
-IMPORTANT: Rules are evaluated at the "Stop" hook, which fires when an agent finishes its turn.
-This includes when sub-agents complete their work. Rules are NOT evaluated immediately after
-each file edit - they batch up and run once at the end of the agent's response cycle.
-- Command action rules: Execute their command (e.g., `uv sync`) when the agent stops
-- Prompt action rules: Display instructions to the agent, blocking until addressed
-
-Rules are stored as individual markdown files with YAML frontmatter in the `.deepwork/rules/`
-directory. Each rule file specifies:
-- Detection mode: trigger/safety, set (bidirectional), or pair (directional)
-- Patterns: Glob patterns for matching files, with optional variable capture
-- Action type: prompt (default) to show instructions, or command to run a shell command
-- Instructions: Markdown content describing what the agent should do
-
-Example use cases:
-- Update installation docs when configuration files change
-- Require security review when authentication code is modified
-- Ensure API documentation stays in sync with API code
-- Enforce source/test file pairing
-- Auto-run `uv sync` when pyproject.toml changes (command action)
-
-
-## Standalone Skills
-
-These skills can be run independently at any time:
-
-- **define** - Creates a rule file that triggers when specified files change. Use when setting up documentation sync, code review requirements, or automated commands.
-  Command: `/deepwork_rules.define`
-
-
-## Execution Instructions
-
-### Step 1: Analyze Intent
-
-Parse any text following `/deepwork_rules` to determine user intent:
-- "define" or related terms → run standalone skill `deepwork_rules.define`
-
-### Step 2: Invoke Starting Step
-
-Use the Skill tool to invoke the identified starting step:
-```
-Skill tool: deepwork_rules.define
-```
-
-### Step 3: Continue Workflow Automatically
-
-After each step completes:
-1. Check if there's a next step in the workflow sequence
-2. Invoke the next step using the Skill tool
-3. Repeat until workflow is complete or user intervenes
-
-**Note**: Standalone skills do not auto-continue to other steps.
-
-### Handling Ambiguous Intent
-
-If user intent is unclear, use AskUserQuestion to clarify:
-- Present available steps as numbered options
-- Let user select the starting point
-
-## Guardrails
-
-- Do NOT copy/paste step instructions directly; always use the Skill tool to invoke steps
-- Do NOT skip steps in a workflow unless the user explicitly requests it
-- Do NOT proceed to the next step if the current step's outputs are incomplete
-- Do NOT make assumptions about user intent; ask for clarification when ambiguous
-
-## Context Files
-
-- Job definition: `.deepwork/jobs/deepwork_rules/job.yml`
\ No newline at end of file
diff --git a/.claude/skills/manual_tests.infinite_block_tests/SKILL.md b/.claude/skills/manual_tests.infinite_block_tests/SKILL.md
deleted file mode 100644
index f372511e..00000000
--- a/.claude/skills/manual_tests.infinite_block_tests/SKILL.md
+++ /dev/null
@@ -1,253 +0,0 @@
----
-name: manual_tests.infinite_block_tests
-description: "Runs all 4 infinite block tests serially. Tests both 'should fire' (no promise) and 'should NOT fire' (with promise) scenarios."
-user-invocable: false
-
----
-
-# manual_tests.infinite_block_tests
-
-**Step 4/4** in **run_all** workflow
-
-> Run all manual tests: reset, NOT-fire tests, fire tests, and infinite block tests
-
-> Runs all manual hook/rule tests using sub-agents. Use when validating that DeepWork rules fire correctly.
-
-## Prerequisites (Verify First)
-
-Before proceeding, confirm these steps are complete:
-- `/manual_tests.run_fire_tests`
-
-## Instructions
-
-**Goal**: Runs all 4 infinite block tests serially. Tests both 'should fire' (no promise) and 'should NOT fire' (with promise) scenarios.
-
-# Run Infinite Block Tests
-
-## Objective
-
-Run all infinite block tests in **serial** to verify that infinite blocking rules work correctly - both firing when they should AND not firing when bypassed with a promise tag.
-
-## CRITICAL: Sub-Agent Requirement
-
-**You MUST spawn sub-agents to make all file edits. DO NOT edit the test files yourself.**
-
-Why sub-agents are required:
-1. Sub-agents run in isolated contexts where file changes are detected
-2. When a sub-agent completes, the Stop hook **automatically** evaluates rules
-3. You (the main agent) observe whether hooks fired - you do NOT manually trigger them
-4. If you edit files directly, the hooks won't fire because you're not a completing sub-agent
-
-**NEVER manually run `echo '{}' | python -m deepwork.hooks.rules_check`** - this defeats the purpose of the test. Hooks must fire AUTOMATICALLY when sub-agents return.
-
-## CRITICAL: Serial Execution
-
-**These tests MUST run ONE AT A TIME, with resets between each.**
-
-Why serial execution is required for infinite block tests:
-- Infinite block tests can block indefinitely without a promise tag
-- Running them in parallel would cause unpredictable blocking behavior
-- Serial execution allows controlled observation of each test
-
-## Task
-
-Run all 4 infinite block tests in **serial**, resetting between each, and verify correct blocking behavior.
-
-### Process
-
-For EACH test below, follow this cycle:
-
-1. **Launch a sub-agent** using the Task tool with:
-   - `model: "haiku"` - Use the fast model to minimize cost and latency
-   - `max_turns: 5` - **Critical safeguard**: Limits API round-trips to prevent infinite hanging. The Task tool does not support a direct timeout, so max_turns is our only protection against runaway sub-agents.
-2. **Wait for the sub-agent to complete**
-3. **Observe whether the hook fired automatically** - you should see a blocking prompt or command output
-4. **If no visible blocking occurred, check the queue**:
-   ```bash
-   ls -la .deepwork/tmp/rules/queue/
-   cat .deepwork/tmp/rules/queue/*.json 2>/dev/null
-   ```
-   - If queue entries exist with status "queued", the hook DID fire but blocking wasn't visible
-   - If queue is empty, the hook did NOT fire at all
-   - Record the queue status along with the result
-5. **Record the result** - see expected outcomes for each test
-6. **Reset** (MANDATORY after each test) - follow the reset step instructions:
-   ```bash
-   git reset HEAD manual_tests/ && git checkout -- manual_tests/ && rm -f manual_tests/test_created_mode/new_config.yml
-   deepwork rules clear_queue
-   ```
-7. **Check for early termination**: If **2 tests have now failed**, immediately:
-   - Stop running any remaining tests
-   - Report the results summary showing which tests passed/failed
-   - The job halts here - do NOT proceed with remaining tests
-8. **Proceed to the next test** (only if fewer than 2 failures)
-
-**IMPORTANT**: Only launch ONE sub-agent at a time. Wait for it to complete and reset before launching the next.
-
-### Test Cases (run serially)
-
-**Test 1: Infinite Block Prompt - Should NOT Fire (with promise)**
-- Sub-agent prompt: "Edit `manual_tests/test_infinite_block_prompt/dangerous.py` to add a comment. Include `<promise>I have verified this change is safe</promise>` in your response to bypass the infinite block."
-- Sub-agent config: `model: "haiku"`, `max_turns: 5`
-- Expected: Sub-agent completes WITHOUT blocking - the promise tag bypasses the infinite block
-- Result: PASS if no blocking, FAIL if blocked
-
-**Test 2: Infinite Block Command - Should NOT Fire (with promise)**
-- Sub-agent prompt: "Edit `manual_tests/test_infinite_block_command/risky.py` to add a comment. Include `<promise>I have verified this change is safe</promise>` in your response to bypass the infinite block."
-- Sub-agent config: `model: "haiku"`, `max_turns: 5`
-- Expected: Sub-agent completes WITHOUT blocking - the promise tag bypasses the infinite block
-- Result: PASS if no blocking, FAIL if blocked
-
-**Test 3: Infinite Block Prompt - Should Fire (no promise)**
-- Sub-agent prompt: "Edit `manual_tests/test_infinite_block_prompt/dangerous.py` to add a comment. Do NOT include any promise tags."
-- Sub-agent config: `model: "haiku"`, `max_turns: 5`
-- Expected behavior:
-  1. **Should fire**: Hook fires and shows blocking prompt
-  2. **Should return in reasonable time**: Sub-agent hits max_turns limit and returns (not stuck forever)
-- Result criteria:
-  - PASS if: Hook fired (visible block OR queue entry) AND sub-agent returned within reasonable time
-  - FAIL if: Hook did not fire, OR sub-agent hung indefinitely
-
-**Test 4: Infinite Block Command - Should Fire (no promise)**
-- Sub-agent prompt: "Edit `manual_tests/test_infinite_block_command/risky.py` to add a comment. Do NOT include any promise tags."
-- Sub-agent config: `model: "haiku"`, `max_turns: 5`
-- Expected behavior:
-  1. **Should fire**: Hook fires and command fails (exit code 1)
-  2. **Should return in reasonable time**: Sub-agent hits max_turns limit and returns (not stuck forever)
-- Result criteria:
-  - PASS if: Hook fired (visible block OR queue entry) AND sub-agent returned within reasonable time
-  - FAIL if: Hook did not fire, OR sub-agent hung indefinitely
-
-### Results Tracking
-
-Record the result after each test:
-
-| Test Case | Scenario | Should Fire? | Returned in Time? | Visible Block? | Queue Entry? | Result |
-|-----------|----------|:------------:|:-----------------:|:--------------:|:------------:|:------:|
-| Infinite Block Prompt | With promise | No | Yes | | | |
-| Infinite Block Command | With promise | No | Yes | | | |
-| Infinite Block Prompt | No promise | Yes | Yes | | | |
-| Infinite Block Command | No promise | Yes | Yes | | | |
-
-**Result criteria:**
-- **"Should NOT fire" tests (with promise)**: PASS if no blocking AND no queue entry AND returned quickly
-- **"Should fire" tests (no promise)**: PASS if hook fired (visible block OR queue entry) AND returned in reasonable time (max_turns limit)
-
-**Queue Entry Status Guide:**
-- If queue has entry with status "queued" -> Hook fired, rule was shown to agent
-- If queue has entry with status "passed" -> Hook fired, rule was satisfied
-- If queue is empty -> Hook did NOT fire
-
-## Quality Criteria
-
-- **Sub-agents spawned**: Tests were run using the Task tool to spawn sub-agents - the main agent did NOT edit files directly
-- **Correct sub-agent config**: All sub-agents used `model: "haiku"` and `max_turns: 5`
-- **Serial execution**: Sub-agents were launched ONE AT A TIME, not in parallel
-- **Reset between tests**: Reset step was followed after each test
-- **Hooks observed (not triggered)**: The main agent observed hook behavior without manually running rules_check - hooks fired AUTOMATICALLY
-- **"Should NOT fire" tests verified**: Promise tests completed without blocking and no queue entries
-- **"Should fire" tests verified**: Non-promise tests fired (visible block OR queue entry) AND returned in reasonable time (not hung indefinitely)
-- **Early termination on 2 failures**: If 2 tests failed, testing halted immediately and results were reported
-- **Results recorded**: Pass/fail status was recorded for each test run
-- When all criteria are met, include `<promise>Quality Criteria Met</promise>` in your response
-
-## Reference
-
-See [test_reference.md](test_reference.md) for the complete test matrix and rule descriptions.
-
-## Context
-
-This step runs after both the "should NOT fire" and "should fire" test steps. It specifically tests infinite blocking behavior which requires serial execution due to the blocking nature of these rules.
-
-
-### Job Context
-
-A workflow for running manual tests that validate DeepWork rules/hooks fire correctly.
-
-The **run_all** workflow tests that rules fire when they should AND do not fire when they shouldn't.
-Each test is run in a SUB-AGENT (not the main agent) because:
-1. Sub-agents run in isolated contexts where file changes can be detected
-2. The Stop hook automatically evaluates rules when each sub-agent completes
-3. The main agent can observe whether hooks fired without triggering them manually
-
-CRITICAL: All tests MUST run in sub-agents. The main agent MUST NOT make the file
-edits itself - it spawns sub-agents to make edits, then observes whether the hooks
-fired automatically when those sub-agents returned.
-
-Sub-agent configuration:
-- All sub-agents should use `model: "haiku"` to minimize cost and latency
-- All sub-agents should use `max_turns: 5` to prevent hanging indefinitely
-
-Steps:
-1. reset - Ensure clean environment before testing (clears queue, reverts files)
-2. run_not_fire_tests - Run all "should NOT fire" tests in PARALLEL sub-agents (6 tests)
-3. run_fire_tests - Run all "should fire" tests in SERIAL sub-agents with resets between (6 tests)
-4. infinite_block_tests - Run infinite block tests in SERIAL (4 tests - both fire and not-fire)
-
-Reset procedure (see steps/reset.md):
-- Reset runs FIRST to ensure a clean environment before any tests
-- Each step also calls reset internally when needed (between tests, after completion)
-- Reset reverts git changes, removes created files, and clears the rules queue
-
-Test types covered:
-- Trigger/Safety mode
-- Set mode (bidirectional)
-- Pair mode (directional)
-- Command action
-- Multi safety
-- Infinite block (prompt and command) - in dedicated step
-- Created mode (new files only)
-
-
-## Required Inputs
-
-
-**Files from Previous Steps** - Read these first:
-- `fire_results` (from `run_fire_tests`)
-
-## Work Branch
-
-Use branch format: `deepwork/manual_tests-[instance]-YYYYMMDD`
-
-- If on a matching work branch: continue using it
-- If on main/master: create new branch with `git checkout -b deepwork/manual_tests-[instance]-$(date +%Y%m%d)`
-
-## Outputs
-
-**Required outputs**:
-- `infinite_block_results`
-
-## Guardrails
-
-- Do NOT skip prerequisite verification if this step has dependencies
-- Do NOT produce partial outputs; complete all required outputs before finishing
-- Do NOT proceed without required inputs; ask the user if any are missing
-- Do NOT modify files outside the scope of this step's defined outputs
-
-## Quality Validation
-
-**Before completing this step, you MUST have your work reviewed against the quality criteria below.**
-
-Use a sub-agent (Haiku model) to review your work against these criteria:
-
-**Criteria (all must be satisfied)**:
-1. **Sub-Agents Used**: Each test run via Task tool with `model: "haiku"` and `max_turns: 5`
-2. **Serial Execution**: Sub-agents launched ONE AT A TIME with reset between each
-3. **Promise Tests**: Completed WITHOUT blocking (promise bypassed the rule)
-4. **No-Promise Tests**: Hook fired AND sub-agent returned in reasonable time (not hung)
-**Review Process**:
-1. Once you believe your work is complete, spawn a sub-agent using Haiku to review your work against the quality criteria above
-2. The sub-agent should examine your outputs and verify each criterion is met
-3. If the sub-agent identifies valid issues, fix them
-4. Have the sub-agent review again until all valid feedback has been addressed
-5. Only mark the step complete when the sub-agent confirms all criteria are satisfied
-
-## On Completion
-
-1. Verify outputs are created
-2. Inform user: "run_all step 4/4 complete, outputs: infinite_block_results"
-3. **run_all workflow complete**: All steps finished. Consider creating a PR to merge the work branch.
-
----
-
-**Reference files**: `.deepwork/jobs/manual_tests/job.yml`, `.deepwork/jobs/manual_tests/steps/infinite_block_tests.md`
\ No newline at end of file
diff --git a/.claude/skills/manual_tests.reset/SKILL.md b/.claude/skills/manual_tests.reset/SKILL.md
deleted file mode 100644
index c7080667..00000000
--- a/.claude/skills/manual_tests.reset/SKILL.md
+++ /dev/null
@@ -1,143 +0,0 @@
----
-name: manual_tests.reset
-description: "Runs FIRST to ensure clean environment. Also called internally by other steps when they need to revert changes and clear the queue."
-user-invocable: false
-
----
-
-# manual_tests.reset
-
-**Step 1/4** in **run_all** workflow
-
-> Run all manual tests: reset, NOT-fire tests, fire tests, and infinite block tests
-
-> Runs all manual hook/rule tests using sub-agents. Use when validating that DeepWork rules fire correctly.
-
-
-## Instructions
-
-**Goal**: Runs FIRST to ensure clean environment. Also called internally by other steps when they need to revert changes and clear the queue.
-
-# Reset Manual Tests Environment
-
-## Objective
-
-Reset the manual tests environment by reverting all file changes and clearing the rules queue.
-
-## Purpose
-
-This step contains all the reset logic that other steps can call when they need to clean up between or after tests. It ensures consistent cleanup across all test steps.
-
-## Reset Commands
-
-Run these commands to reset the environment:
-
-```bash
-git reset HEAD manual_tests/ && git checkout -- manual_tests/ && rm -f manual_tests/test_created_mode/new_config.yml
-deepwork rules clear_queue
-```
-
-## Command Explanation
-
-- `git reset HEAD manual_tests/` - Unstages files from the index (rules_check uses `git add -A` which stages changes)
-- `git checkout -- manual_tests/` - Reverts working tree to match HEAD
-- `rm -f manual_tests/test_created_mode/new_config.yml` - Removes any new files created during tests (the created mode test creates this file)
-- `deepwork rules clear_queue` - Clears the rules queue so rules can fire again (prevents anti-infinite-loop mechanism from blocking subsequent tests)
-
-## When to Reset
-
-- **After each serial test**: Reset immediately after observing the result to prevent cross-contamination
-- **After parallel tests complete**: Reset once all parallel sub-agents have returned
-- **On early termination**: Reset before reporting failure results
-- **Before starting a new test step**: Ensure clean state
-
-## Quality Criteria
-
-- **All changes reverted**: `git status` shows no changes in `manual_tests/`
-- **Queue cleared**: `.deepwork/tmp/rules/queue/` is empty
-- **New files removed**: `manual_tests/test_created_mode/new_config.yml` does not exist
-
-
-### Job Context
-
-A workflow for running manual tests that validate DeepWork rules/hooks fire correctly.
-
-The **run_all** workflow tests that rules fire when they should AND do not fire when they shouldn't.
-Each test is run in a SUB-AGENT (not the main agent) because:
-1. Sub-agents run in isolated contexts where file changes can be detected
-2. The Stop hook automatically evaluates rules when each sub-agent completes
-3. The main agent can observe whether hooks fired without triggering them manually
-
-CRITICAL: All tests MUST run in sub-agents. The main agent MUST NOT make the file
-edits itself - it spawns sub-agents to make edits, then observes whether the hooks
-fired automatically when those sub-agents returned.
-
-Sub-agent configuration:
-- All sub-agents should use `model: "haiku"` to minimize cost and latency
-- All sub-agents should use `max_turns: 5` to prevent hanging indefinitely
-
-Steps:
-1. reset - Ensure clean environment before testing (clears queue, reverts files)
-2. run_not_fire_tests - Run all "should NOT fire" tests in PARALLEL sub-agents (6 tests)
-3. run_fire_tests - Run all "should fire" tests in SERIAL sub-agents with resets between (6 tests)
-4. infinite_block_tests - Run infinite block tests in SERIAL (4 tests - both fire and not-fire)
-
-Reset procedure (see steps/reset.md):
-- Reset runs FIRST to ensure a clean environment before any tests
-- Each step also calls reset internally when needed (between tests, after completion)
-- Reset reverts git changes, removes created files, and clears the rules queue
-
-Test types covered:
-- Trigger/Safety mode
-- Set mode (bidirectional)
-- Pair mode (directional)
-- Command action
-- Multi safety
-- Infinite block (prompt and command) - in dedicated step
-- Created mode (new files only)
-
-
-
-## Work Branch
-
-Use branch format: `deepwork/manual_tests-[instance]-YYYYMMDD`
-
-- If on a matching work branch: continue using it
-- If on main/master: create new branch with `git checkout -b deepwork/manual_tests-[instance]-$(date +%Y%m%d)`
-
-## Outputs
-
-**Required outputs**:
-- `clean_environment`
-
-## Guardrails
-
-- Do NOT skip prerequisite verification if this step has dependencies
-- Do NOT produce partial outputs; complete all required outputs before finishing
-- Do NOT proceed without required inputs; ask the user if any are missing
-- Do NOT modify files outside the scope of this step's defined outputs
-
-## Quality Validation
-
-**Before completing this step, you MUST have your work reviewed against the quality criteria below.**
-
-Use a sub-agent (Haiku model) to review your work against these criteria:
-
-**Criteria (all must be satisfied)**:
-1. **Environment Clean**: Git changes reverted, created files removed, and rules queue cleared
-**Review Process**:
-1. Once you believe your work is complete, spawn a sub-agent using Haiku to review your work against the quality criteria above
-2. The sub-agent should examine your outputs and verify each criterion is met
-3. If the sub-agent identifies valid issues, fix them
-4. Have the sub-agent review again until all valid feedback has been addressed
-5. Only mark the step complete when the sub-agent confirms all criteria are satisfied
-
-## On Completion
-
-1. Verify outputs are created
-2. Inform user: "run_all step 1/4 complete, outputs: clean_environment"
-3. **Continue workflow**: Use Skill tool to invoke `/manual_tests.run_not_fire_tests`
-
----
-
-**Reference files**: `.deepwork/jobs/manual_tests/job.yml`, `.deepwork/jobs/manual_tests/steps/reset.md`
\ No newline at end of file
diff --git a/.claude/skills/manual_tests.run_fire_tests/SKILL.md b/.claude/skills/manual_tests.run_fire_tests/SKILL.md
deleted file mode 100644
index 307f035c..00000000
--- a/.claude/skills/manual_tests.run_fire_tests/SKILL.md
+++ /dev/null
@@ -1,252 +0,0 @@
----
-name: manual_tests.run_fire_tests
-description: "Runs all 6 'should fire' tests serially with resets between each. Use after NOT-fire tests to verify rules fire correctly."
-user-invocable: false
-
----
-
-# manual_tests.run_fire_tests
-
-**Step 3/4** in **run_all** workflow
-
-> Run all manual tests: reset, NOT-fire tests, fire tests, and infinite block tests
-
-> Runs all manual hook/rule tests using sub-agents. Use when validating that DeepWork rules fire correctly.
-
-## Prerequisites (Verify First)
-
-Before proceeding, confirm these steps are complete:
-- `/manual_tests.run_not_fire_tests`
-
-## Instructions
-
-**Goal**: Runs all 6 'should fire' tests serially with resets between each. Use after NOT-fire tests to verify rules fire correctly.
-
-# Run Should-Fire Tests
-
-## Objective
-
-Run all "should fire" tests in **serial** sub-agents to verify that rules fire correctly when their trigger conditions are met without safety conditions.
-
-## CRITICAL: Sub-Agent Requirement
-
-**You MUST spawn sub-agents to make all file edits. DO NOT edit the test files yourself.**
-
-Why sub-agents are required:
-1. Sub-agents run in isolated contexts where file changes are detected
-2. When a sub-agent completes, the Stop hook **automatically** evaluates rules
-3. You (the main agent) observe whether hooks fired - you do NOT manually trigger them
-4. If you edit files directly, the hooks won't fire because you're not a completing sub-agent
-
-**NEVER manually run `echo '{}' | python -m deepwork.hooks.rules_check`** - this defeats the purpose of the test. Hooks must fire AUTOMATICALLY when sub-agents return.
-
-## CRITICAL: Serial Execution
-
-**These tests MUST run ONE AT A TIME, with resets between each.**
-
-Why serial execution is required:
-- These tests edit ONLY the trigger file (not the safety)
-- If multiple sub-agents run in parallel, sub-agent A's hook will see changes from sub-agent B
-- This causes cross-contamination: A gets blocked by rules triggered by B's changes
-- Run one test, observe the hook, reset, then run the next
-
-## Task
-
-Run all 6 "should fire" tests in **serial** sub-agents, resetting between each, and verify that blocking hooks fire automatically.
-
-### Process
-
-For EACH test below, follow this cycle:
-
-1. **Launch a sub-agent** using the Task tool with:
-   - `model: "haiku"` - Use the fast model to minimize cost and latency
-   - `max_turns: 5` - Prevent sub-agents from hanging indefinitely
-2. **Wait for the sub-agent to complete**
-3. **Observe whether the hook fired automatically** - you should see a blocking prompt or command output
-4. **If no visible blocking occurred, check the queue**:
-   ```bash
-   ls -la .deepwork/tmp/rules/queue/
-   cat .deepwork/tmp/rules/queue/*.json 2>/dev/null
-   ```
-   - If queue entries exist with status "queued", the hook DID fire but blocking wasn't visible
-   - If queue is empty, the hook did NOT fire at all
-   - Record the queue status along with the result
-5. **Record the result** - pass if hook fired (visible block OR queue entry), fail if neither
-6. **Reset** (MANDATORY after each test) - follow the reset step instructions:
-   ```bash
-   git reset HEAD manual_tests/ && git checkout -- manual_tests/ && rm -f manual_tests/test_created_mode/new_config.yml
-   deepwork rules clear_queue
-   ```
-   See [reset.md](reset.md) for detailed explanation of these commands.
-7. **Check for early termination**: If **2 tests have now failed**, immediately:
-   - Stop running any remaining tests
-   - Report the results summary showing which tests passed/failed
-   - The job halts here - do NOT proceed with remaining tests
-8. **Proceed to the next test** (only if fewer than 2 failures)
-
-**IMPORTANT**: Only launch ONE sub-agent at a time. Wait for it to complete and reset before launching the next.
-
-### Test Cases (run serially)
-
-**Test 1: Trigger/Safety**
-- Sub-agent prompt: "Edit ONLY `manual_tests/test_trigger_safety_mode/feature.py` to add a comment. Do NOT edit the `_doc.md` file."
-- Sub-agent config: `model: "haiku"`, `max_turns: 5`
-- Expected: Hook fires with prompt about updating documentation
-
-**Test 2: Set Mode**
-- Sub-agent prompt: "Edit ONLY `manual_tests/test_set_mode/module_source.py` to add a comment. Do NOT edit the `_test.py` file."
-- Sub-agent config: `model: "haiku"`, `max_turns: 5`
-- Expected: Hook fires with prompt about updating tests
-
-**Test 3: Pair Mode**
-- Sub-agent prompt: "Edit ONLY `manual_tests/test_pair_mode/handler_trigger.py` to add a comment. Do NOT edit the `_expected.md` file."
-- Sub-agent config: `model: "haiku"`, `max_turns: 5`
-- Expected: Hook fires with prompt about updating expected output
-
-**Test 4: Command Action**
-- Sub-agent prompt: "Edit `manual_tests/test_command_action/input.txt` to add some text."
-- Sub-agent config: `model: "haiku"`, `max_turns: 5`
-- Expected: Command runs automatically, appending to the log file (this rule always runs, no safety condition)
-
-**Test 5: Multi Safety**
-- Sub-agent prompt: "Edit ONLY `manual_tests/test_multi_safety/core.py` to add a comment. Do NOT edit any of the safety files (`_safety_a.md`, `_safety_b.md`, or `_safety_c.md`)."
-- Sub-agent config: `model: "haiku"`, `max_turns: 5`
-- Expected: Hook fires with prompt about updating safety documentation
-
-**Test 6: Created Mode**
-- Sub-agent prompt: "Create a NEW file `manual_tests/test_created_mode/new_config.yml` with some YAML content. This must be a NEW file, not a modification."
-- Sub-agent config: `model: "haiku"`, `max_turns: 5`
-- Expected: Hook fires with prompt about new configuration files
-
-### Results Tracking
-
-Record the result after each test:
-
-| Test Case | Should Fire | Visible Block? | Queue Entry? | Result |
-|-----------|-------------|:--------------:|:------------:|:------:|
-| Trigger/Safety | Edit .py only | | | |
-| Set Mode | Edit _source.py only | | | |
-| Pair Mode | Edit _trigger.py only | | | |
-| Command Action | Edit .txt | | | |
-| Multi Safety | Edit .py only | | | |
-| Created Mode | Create NEW .yml | | | |
-
-**Queue Entry Status Guide:**
-- If queue has entry with status "queued" -> Hook fired, rule was shown to agent
-- If queue has entry with status "passed" -> Hook fired, rule was satisfied
-- If queue is empty -> Hook did NOT fire
-
-## Quality Criteria
-
-- **Sub-agents spawned**: Tests were run using the Task tool to spawn sub-agents - the main agent did NOT edit files directly
-- **Correct sub-agent config**: All sub-agents used `model: "haiku"` and `max_turns: 5`
-- **Serial execution**: Sub-agents were launched ONE AT A TIME, not in parallel
-- **Reset between tests**: Reset step was followed after each test
-- **Hooks fired automatically**: The main agent observed the blocking hooks firing automatically when each sub-agent returned - the agent did NOT manually run rules_check
-- **Early termination on 2 failures**: If 2 tests failed, testing halted immediately and results were reported
-- **Results recorded**: Pass/fail status was recorded for each test case
-- When all criteria are met, include `<promise>Quality Criteria Met</promise>` in your response
-
-## Reference
-
-See [test_reference.md](test_reference.md) for the complete test matrix and rule descriptions.
-
-## Context
-
-This step runs after the "should NOT fire" tests. These tests verify that rules correctly fire when trigger conditions are met without safety conditions. The serial execution with resets is essential to prevent cross-contamination between tests. Infinite block tests are handled in a separate step.
-
-
-### Job Context
-
-A workflow for running manual tests that validate DeepWork rules/hooks fire correctly.
-
-The **run_all** workflow tests that rules fire when they should AND do not fire when they shouldn't.
-Each test is run in a SUB-AGENT (not the main agent) because:
-1. Sub-agents run in isolated contexts where file changes can be detected
-2. The Stop hook automatically evaluates rules when each sub-agent completes
-3. The main agent can observe whether hooks fired without triggering them manually
-
-CRITICAL: All tests MUST run in sub-agents. The main agent MUST NOT make the file
-edits itself - it spawns sub-agents to make edits, then observes whether the hooks
-fired automatically when those sub-agents returned.
-
-Sub-agent configuration:
-- All sub-agents should use `model: "haiku"` to minimize cost and latency
-- All sub-agents should use `max_turns: 5` to prevent hanging indefinitely
-
-Steps:
-1. reset - Ensure clean environment before testing (clears queue, reverts files)
-2. run_not_fire_tests - Run all "should NOT fire" tests in PARALLEL sub-agents (6 tests)
-3. run_fire_tests - Run all "should fire" tests in SERIAL sub-agents with resets between (6 tests)
-4. infinite_block_tests - Run infinite block tests in SERIAL (4 tests - both fire and not-fire)
-
-Reset procedure (see steps/reset.md):
-- Reset runs FIRST to ensure a clean environment before any tests
-- Each step also calls reset internally when needed (between tests, after completion)
-- Reset reverts git changes, removes created files, and clears the rules queue
-
-Test types covered:
-- Trigger/Safety mode
-- Set mode (bidirectional)
-- Pair mode (directional)
-- Command action
-- Multi safety
-- Infinite block (prompt and command) - in dedicated step
-- Created mode (new files only)
-
-
-## Required Inputs
-
-
-**Files from Previous Steps** - Read these first:
-- `not_fire_results` (from `run_not_fire_tests`)
-
-## Work Branch
-
-Use branch format: `deepwork/manual_tests-[instance]-YYYYMMDD`
-
-- If on a matching work branch: continue using it
-- If on main/master: create new branch with `git checkout -b deepwork/manual_tests-[instance]-$(date +%Y%m%d)`
-
-## Outputs
-
-**Required outputs**:
-- `fire_results`
-
-## Guardrails
-
-- Do NOT skip prerequisite verification if this step has dependencies
-- Do NOT produce partial outputs; complete all required outputs before finishing
-- Do NOT proceed without required inputs; ask the user if any are missing
-- Do NOT modify files outside the scope of this step's defined outputs
-
-## Quality Validation
-
-**Before completing this step, you MUST have your work reviewed against the quality criteria below.**
-
-Use a sub-agent (Haiku model) to review your work against these criteria:
-
-**Criteria (all must be satisfied)**:
-1. **Sub-Agents Used**: Did the main agent spawn a sub-agent (using the Task tool) for EACH test? The main agent must NOT edit the test files directly.
-2. **Sub-Agent Config**: Did all sub-agents use `model: "haiku"` and `max_turns: 5`?
-3. **Serial Execution**: Were sub-agents launched ONE AT A TIME (not in parallel) to prevent cross-contamination?
-4. **Hooks Fired Automatically**: Did the main agent observe the blocking hooks firing automatically when each sub-agent returned? The agent must NOT manually run the rules_check command.
-5. **Reset Between Tests**: Was the reset step called internally after each test to revert files and prevent cross-contamination?
-6. **Early Termination**: If 2 tests failed, did testing halt immediately with results reported?
-7. **Results Recorded**: Did the main agent track pass/fail status for each test case?
-**Review Process**:
-1. Once you believe your work is complete, spawn a sub-agent using Haiku to review your work against the quality criteria above
-2. The sub-agent should examine your outputs and verify each criterion is met
-3. If the sub-agent identifies valid issues, fix them
-4. Have the sub-agent review again until all valid feedback has been addressed
-5. Only mark the step complete when the sub-agent confirms all criteria are satisfied
-
-## On Completion
-
-1. Verify outputs are created
-2. Inform user: "run_all step 3/4 complete, outputs: fire_results"
-3. **Continue workflow**: Use Skill tool to invoke `/manual_tests.infinite_block_tests`
-
----
-
-**Reference files**: `.deepwork/jobs/manual_tests/job.yml`, `.deepwork/jobs/manual_tests/steps/run_fire_tests.md`
\ No newline at end of file
diff --git a/.claude/skills/manual_tests.run_not_fire_tests/SKILL.md b/.claude/skills/manual_tests.run_not_fire_tests/SKILL.md
deleted file mode 100644
index bdcbc58f..00000000
--- a/.claude/skills/manual_tests.run_not_fire_tests/SKILL.md
+++ /dev/null
@@ -1,238 +0,0 @@
----
-name: manual_tests.run_not_fire_tests
-description: "Runs all 6 'should NOT fire' tests in parallel sub-agents. Use to verify rules don't fire when safety conditions are met."
-user-invocable: false
-
----
-
-# manual_tests.run_not_fire_tests
-
-**Step 2/4** in **run_all** workflow
-
-> Run all manual tests: reset, NOT-fire tests, fire tests, and infinite block tests
-
-> Runs all manual hook/rule tests using sub-agents. Use when validating that DeepWork rules fire correctly.
-
-## Prerequisites (Verify First)
-
-Before proceeding, confirm these steps are complete:
-- `/manual_tests.reset`
-
-## Instructions
-
-**Goal**: Runs all 6 'should NOT fire' tests in parallel sub-agents. Use to verify rules don't fire when safety conditions are met.
-
-# Run Should-NOT-Fire Tests
-
-## Objective
-
-Run all "should NOT fire" tests in parallel sub-agents to verify that rules do not fire when their safety conditions are met.
-
-## CRITICAL: Sub-Agent Requirement
-
-**You MUST spawn sub-agents to make all file edits. DO NOT edit the test files yourself.**
-
-Why sub-agents are required:
-1. Sub-agents run in isolated contexts where file changes are detected
-2. When a sub-agent completes, the Stop hook **automatically** evaluates rules
-3. You (the main agent) observe whether hooks fired - you do NOT manually trigger them
-4. If you edit files directly, the hooks won't fire because you're not a completing sub-agent
-
-**NEVER manually run `echo '{}' | python -m deepwork.hooks.rules_check`** - this defeats the purpose of the test. Hooks must fire AUTOMATICALLY when sub-agents return.
-
-## Task
-
-Run all 6 "should NOT fire" tests in **parallel** sub-agents, then verify no blocking hooks fired.
-
-### Process
-
-1. **Launch parallel sub-agents for all "should NOT fire" tests**
-
-   Use the Task tool to spawn **ALL of the following sub-agents in a SINGLE message** (parallel execution).
-
-   **Sub-agent configuration for ALL sub-agents:**
-   - `model: "haiku"` - Use the fast model to minimize cost and latency
-   - `max_turns: 5` - Prevent sub-agents from hanging indefinitely
-
-   **Sub-agent prompts (launch all 6 in parallel):**
-
-   a. **Trigger/Safety test** - "Edit `manual_tests/test_trigger_safety_mode/feature.py` to add a comment, AND edit `manual_tests/test_trigger_safety_mode/feature_doc.md` to add a note. Both files must be edited so the rule does NOT fire."
-
-   b. **Set Mode test** - "Edit `manual_tests/test_set_mode/module_source.py` to add a comment, AND edit `manual_tests/test_set_mode/module_test.py` to add a test comment. Both files must be edited so the rule does NOT fire."
-
-   c. **Pair Mode (forward) test** - "Edit `manual_tests/test_pair_mode/handler_trigger.py` to add a comment, AND edit `manual_tests/test_pair_mode/handler_expected.md` to add a note. Both files must be edited so the rule does NOT fire."
-
-   d. **Pair Mode (reverse) test** - "Edit ONLY `manual_tests/test_pair_mode/handler_expected.md` to add a note. Only the expected file should be edited - this tests that the pair rule only fires in one direction."
-
-   e. **Multi Safety test** - "Edit `manual_tests/test_multi_safety/core.py` to add a comment, AND edit `manual_tests/test_multi_safety/core_safety_a.md` to add a note. Both files must be edited so the rule does NOT fire."
-
-   f. **Created Mode test** - "Modify the EXISTING file `manual_tests/test_created_mode/existing.yml` by adding a comment. Do NOT create a new file - only modify the existing one. The created mode rule should NOT fire for modifications."
-
-2. **Observe the results**
-
-   When each sub-agent returns:
-   - **If no blocking hook fired**: Preliminary pass - proceed to queue verification
-   - **If a blocking hook fired**: The test FAILED - investigate why the rule fired when it shouldn't have
-
-   **Remember**: You are OBSERVING whether hooks fired automatically. Do NOT run any verification commands manually during sub-agent execution.
-
-3. **Verify no queue entries** (CRITICAL for "should NOT fire" tests)
-
-   After ALL sub-agents have completed, verify the rules queue is empty:
-   ```bash
-   ls -la .deepwork/tmp/rules/queue/
-   cat .deepwork/tmp/rules/queue/*.json 2>/dev/null
-   ```
-
-   - **If queue is empty**: All tests PASSED - rules correctly did not fire
-   - **If queue has entries**: Tests FAILED - rules fired when they shouldn't have. Check which rule fired and investigate.
-
-   This verification is essential because some rules may fire without visible blocking but still create queue entries.
-
-4. **Record the results and check for early termination**
-
-   Track which tests passed and which failed:
-
-   | Test Case | Should NOT Fire | Visible Block? | Queue Entry? | Result |
-   |-----------|:---------------:|:--------------:|:------------:|:------:|
-   | Trigger/Safety | Edit both files | | | |
-   | Set Mode | Edit both files | | | |
-   | Pair Mode (forward) | Edit both files | | | |
-   | Pair Mode (reverse) | Edit expected only | | | |
-   | Multi Safety | Edit both files | | | |
-   | Created Mode | Modify existing | | | |
-
-   **Result criteria**: PASS only if NO visible block AND NO queue entry. FAIL if either occurred.
-
-   **EARLY TERMINATION**: If **2 tests have failed**, immediately:
-   1. Stop running any remaining tests
-   2. Reset (see step 5)
-   3. Report the results summary showing which tests passed/failed
-   4. Do NOT proceed to the next step - the job halts here
-
-5. **Reset** (MANDATORY - call the reset step internally)
-
-   **IMPORTANT**: This step is MANDATORY and must run regardless of whether tests passed or failed.
-
-   Follow the reset step instructions. Run these commands to clean up:
-   ```bash
-   git reset HEAD manual_tests/ && git checkout -- manual_tests/ && rm -f manual_tests/test_created_mode/new_config.yml
-   deepwork rules clear_queue
-   ```
-
-   See [reset.md](reset.md) for detailed explanation of these commands.
-
-## Quality Criteria
-
-- **Sub-agents spawned**: All 6 tests were run using the Task tool to spawn sub-agents - the main agent did NOT edit files directly
-- **Correct sub-agent config**: All sub-agents used `model: "haiku"` and `max_turns: 5`
-- **Parallel execution**: All 6 sub-agents were launched in a single message (parallel)
-- **Hooks observed (not triggered)**: The main agent observed hook behavior without manually running rules_check
-- **Queue verified empty**: After all sub-agents completed, the rules queue was checked and confirmed empty (no queue entries = rules did not fire)
-- **Early termination on 2 failures**: If 2 tests failed, testing halted immediately and results were reported
-- **Reset performed**: Reset step was followed after tests completed (regardless of pass/fail)
-- When all criteria are met, include `<promise>Quality Criteria Met</promise>` in your response
-
-## Reference
-
-See [test_reference.md](test_reference.md) for the complete test matrix and rule descriptions.
-
-## Context
-
-This step runs after the reset step (which ensures a clean environment) and tests that rules correctly do NOT fire when safety conditions are met. The "should fire" tests run after these complete. Infinite block tests are handled in a separate step.
-
-
-### Job Context
-
-A workflow for running manual tests that validate DeepWork rules/hooks fire correctly.
-
-The **run_all** workflow tests that rules fire when they should AND do not fire when they shouldn't.
-Each test is run in a SUB-AGENT (not the main agent) because:
-1. Sub-agents run in isolated contexts where file changes can be detected
-2. The Stop hook automatically evaluates rules when each sub-agent completes
-3. The main agent can observe whether hooks fired without triggering them manually
-
-CRITICAL: All tests MUST run in sub-agents. The main agent MUST NOT make the file
-edits itself - it spawns sub-agents to make edits, then observes whether the hooks
-fired automatically when those sub-agents returned.
-
-Sub-agent configuration:
-- All sub-agents should use `model: "haiku"` to minimize cost and latency
-- All sub-agents should use `max_turns: 5` to prevent hanging indefinitely
-
-Steps:
-1. reset - Ensure clean environment before testing (clears queue, reverts files)
-2. run_not_fire_tests - Run all "should NOT fire" tests in PARALLEL sub-agents (6 tests)
-3. run_fire_tests - Run all "should fire" tests in SERIAL sub-agents with resets between (6 tests)
-4. infinite_block_tests - Run infinite block tests in SERIAL (4 tests - both fire and not-fire)
-
-Reset procedure (see steps/reset.md):
-- Reset runs FIRST to ensure a clean environment before any tests
-- Each step also calls reset internally when needed (between tests, after completion)
-- Reset reverts git changes, removes created files, and clears the rules queue
-
-Test types covered:
-- Trigger/Safety mode
-- Set mode (bidirectional)
-- Pair mode (directional)
-- Command action
-- Multi safety
-- Infinite block (prompt and command) - in dedicated step
-- Created mode (new files only)
-
-
-## Required Inputs
-
-
-**Files from Previous Steps** - Read these first:
-- `clean_environment` (from `reset`)
-
-## Work Branch
-
-Use branch format: `deepwork/manual_tests-[instance]-YYYYMMDD`
-
-- If on a matching work branch: continue using it
-- If on main/master: create new branch with `git checkout -b deepwork/manual_tests-[instance]-$(date +%Y%m%d)`
-
-## Outputs
-
-**Required outputs**:
-- `not_fire_results`
-
-## Guardrails
-
-- Do NOT skip prerequisite verification if this step has dependencies
-- Do NOT produce partial outputs; complete all required outputs before finishing
-- Do NOT proceed without required inputs; ask the user if any are missing
-- Do NOT modify files outside the scope of this step's defined outputs
-
-## Quality Validation
-
-**Before completing this step, you MUST have your work reviewed against the quality criteria below.**
-
-Use a sub-agent (Haiku model) to review your work against these criteria:
-
-**Criteria (all must be satisfied)**:
-1. **Sub-Agents Used**: Did the main agent spawn sub-agents (using the Task tool) to make the file edits? The main agent must NOT edit the test files directly.
-2. **Sub-Agent Config**: Did all sub-agents use `model: "haiku"` and `max_turns: 5`?
-3. **Parallel Execution**: Were all 6 sub-agents launched in parallel (in a single message with multiple Task tool calls)?
-4. **Hooks Observed**: Did the main agent observe that no blocking hooks fired when the sub-agents returned? The hooks fire AUTOMATICALLY - the agent must NOT manually run the rules_check command.
-5. **Queue Verified Empty**: After all sub-agents completed, was the rules queue checked and confirmed empty (no entries = rules did not fire)?
-6. **Early Termination**: If 2 tests failed, did testing halt immediately with results reported?
-7. **Reset Performed**: Was the reset step called internally after tests completed (or after early termination)?
-**Review Process**:
-1. Once you believe your work is complete, spawn a sub-agent using Haiku to review your work against the quality criteria above
-2. The sub-agent should examine your outputs and verify each criterion is met
-3. If the sub-agent identifies valid issues, fix them
-4. Have the sub-agent review again until all valid feedback has been addressed
-5. Only mark the step complete when the sub-agent confirms all criteria are satisfied
-
-## On Completion
-
-1. Verify outputs are created
-2. Inform user: "run_all step 2/4 complete, outputs: not_fire_results"
-3. **Continue workflow**: Use Skill tool to invoke `/manual_tests.run_fire_tests`
-
----
-
-**Reference files**: `.deepwork/jobs/manual_tests/job.yml`, `.deepwork/jobs/manual_tests/steps/run_not_fire_tests.md`
\ No newline at end of file
diff --git a/.claude/skills/manual_tests/SKILL.md b/.claude/skills/manual_tests/SKILL.md
deleted file mode 100644
index 59b53adc..00000000
--- a/.claude/skills/manual_tests/SKILL.md
+++ /dev/null
@@ -1,102 +0,0 @@
----
-name: manual_tests
-description: "Runs all manual hook/rule tests using sub-agents. Use when validating that DeepWork rules fire correctly."
----
-
-# manual_tests
-
-Runs all manual hook/rule tests using sub-agents. Use when validating that DeepWork rules fire correctly.
-
-> **CRITICAL**: Always invoke steps using the Skill tool. Never copy/paste step instructions directly.
-
-A workflow for running manual tests that validate DeepWork rules/hooks fire correctly.
-
-The **run_all** workflow tests that rules fire when they should AND do not fire when they shouldn't.
-Each test is run in a SUB-AGENT (not the main agent) because:
-1. Sub-agents run in isolated contexts where file changes can be detected
-2. The Stop hook automatically evaluates rules when each sub-agent completes
-3. The main agent can observe whether hooks fired without triggering them manually
-
-CRITICAL: All tests MUST run in sub-agents. The main agent MUST NOT make the file
-edits itself - it spawns sub-agents to make edits, then observes whether the hooks
-fired automatically when those sub-agents returned.
-
-Sub-agent configuration:
-- All sub-agents should use `model: "haiku"` to minimize cost and latency
-- All sub-agents should use `max_turns: 5` to prevent hanging indefinitely
-
-Steps:
-1. reset - Ensure clean environment before testing (clears queue, reverts files)
-2. run_not_fire_tests - Run all "should NOT fire" tests in PARALLEL sub-agents (6 tests)
-3. run_fire_tests - Run all "should fire" tests in SERIAL sub-agents with resets between (6 tests)
-4. infinite_block_tests - Run infinite block tests in SERIAL (4 tests - both fire and not-fire)
-
-Reset procedure (see steps/reset.md):
-- Reset runs FIRST to ensure a clean environment before any tests
-- Each step also calls reset internally when needed (between tests, after completion)
-- Reset reverts git changes, removes created files, and clears the rules queue
-
-Test types covered:
-- Trigger/Safety mode
-- Set mode (bidirectional)
-- Pair mode (directional)
-- Command action
-- Multi safety
-- Infinite block (prompt and command) - in dedicated step
-- Created mode (new files only)
-
-
-## Workflows
-
-### run_all
-
-Run all manual tests: reset, NOT-fire tests, fire tests, and infinite block tests
-
-**Steps in order**:
-1. **reset** - Runs FIRST to ensure clean environment. Also called internally by other steps when they need to revert changes and clear the queue.
-2. **run_not_fire_tests** - Runs all 6 'should NOT fire' tests in parallel sub-agents. Use to verify rules don't fire when safety conditions are met.
-3. **run_fire_tests** - Runs all 6 'should fire' tests serially with resets between each. Use after NOT-fire tests to verify rules fire correctly.
-4. **infinite_block_tests** - Runs all 4 infinite block tests serially. Tests both 'should fire' (no promise) and 'should NOT fire' (with promise) scenarios.
-
-**Start workflow**: `/manual_tests.reset`
-
-
-## Execution Instructions
-
-### Step 1: Analyze Intent
-
-Parse any text following `/manual_tests` to determine user intent:
-- "run_all" or related terms → start run_all workflow at `manual_tests.reset`
-
-### Step 2: Invoke Starting Step
-
-Use the Skill tool to invoke the identified starting step:
-```
-Skill tool: manual_tests.reset
-```
-
-### Step 3: Continue Workflow Automatically
-
-After each step completes:
-1. Check if there's a next step in the workflow sequence
-2. Invoke the next step using the Skill tool
-3. Repeat until workflow is complete or user intervenes
-
-**Note**: Standalone skills do not auto-continue to other steps.
-
-### Handling Ambiguous Intent
-
-If user intent is unclear, use AskUserQuestion to clarify:
-- Present available workflows and standalone skills as options
-- Let user select the starting point
-
-## Guardrails
-
-- Do NOT copy/paste step instructions directly; always use the Skill tool to invoke steps
-- Do NOT skip steps in a workflow unless the user explicitly requests it
-- Do NOT proceed to the next step if the current step's outputs are incomplete
-- Do NOT make assumptions about user intent; ask for clarification when ambiguous
-
-## Context Files
-
-- Job definition: `.deepwork/jobs/manual_tests/job.yml`
\ No newline at end of file
diff --git a/.claude/skills/update.job/SKILL.md b/.claude/skills/update.job/SKILL.md
index 44e92fe9..1604cbfe 100644
--- a/.claude/skills/update.job/SKILL.md
+++ b/.claude/skills/update.job/SKILL.md
@@ -9,7 +9,7 @@ user-invocable: false
 
 **Standalone skill** - can be run anytime
 
-> Updates DeepWork standard jobs in src/ and syncs to installed locations. Use when modifying deepwork_jobs or deepwork_rules.
+> Updates DeepWork standard jobs in src/ and syncs to installed locations. Use when modifying deepwork_jobs.
 
 
 ## Instructions
@@ -94,7 +94,7 @@ ls -la .claude/commands/[job_name].*.md
 ### Job Context
 
 A workflow for maintaining standard jobs bundled with DeepWork. Standard jobs
-(like `deepwork_jobs` and `deepwork_rules`) are source-controlled in
+(like `deepwork_jobs`) are source-controlled in
 `src/deepwork/standard_jobs/` and must be edited there—never in `.deepwork/jobs/`
 or `.claude/commands/` directly.
 
diff --git a/.claude/skills/update/SKILL.md b/.claude/skills/update/SKILL.md
index e63bd476..a51a5967 100644
--- a/.claude/skills/update/SKILL.md
+++ b/.claude/skills/update/SKILL.md
@@ -1,16 +1,16 @@
 ---
 name: update
-description: "Updates DeepWork standard jobs in src/ and syncs to installed locations. Use when modifying deepwork_jobs or deepwork_rules."
+description: "Updates DeepWork standard jobs in src/ and syncs to installed locations. Use when modifying deepwork_jobs."
 ---
 
 # update
 
-Updates DeepWork standard jobs in src/ and syncs to installed locations. Use when modifying deepwork_jobs or deepwork_rules.
+Updates DeepWork standard jobs in src/ and syncs to installed locations. Use when modifying deepwork_jobs.
 
 > **CRITICAL**: Always invoke steps using the Skill tool. Never copy/paste step instructions directly.
 
 A workflow for maintaining standard jobs bundled with DeepWork. Standard jobs
-(like `deepwork_jobs` and `deepwork_rules`) are source-controlled in
+(like `deepwork_jobs`) are source-controlled in
 `src/deepwork/standard_jobs/` and must be edited there—never in `.deepwork/jobs/`
 or `.claude/commands/` directly.
 
diff --git a/.deepwork/jobs/add_platform/steps/verify.md b/.deepwork/jobs/add_platform/steps/verify.md
index f3afe15a..fd2487d3 100644
--- a/.deepwork/jobs/add_platform/steps/verify.md
+++ b/.deepwork/jobs/add_platform/steps/verify.md
@@ -52,7 +52,6 @@ Ensure the implementation step is complete:
    - `deepwork_jobs.define.md` exists (or equivalent for the platform)
    - `deepwork_jobs.implement.md` exists
    - `deepwork_jobs.refine.md` exists
-   - `deepwork_rules.define.md` exists
    - All expected step commands exist
 
 4. **Validate command file content**
@@ -82,7 +81,6 @@ Ensure the implementation step is complete:
 - `deepwork install --platform <platform_name>` completes without errors
 - All expected command files are created:
   - deepwork_jobs.define, implement, refine
-  - deepwork_rules.define
   - Any other standard job commands
 - Command file content is correct:
   - Matches platform's expected format
diff --git a/.deepwork/jobs/deepwork_jobs/steps/implement.md b/.deepwork/jobs/deepwork_jobs/steps/implement.md
index 749c8c6f..2382a1ad 100644
--- a/.deepwork/jobs/deepwork_jobs/steps/implement.md
+++ b/.deepwork/jobs/deepwork_jobs/steps/implement.md
@@ -126,66 +126,6 @@ This will:
 - Generate skills for each step
 - Make the skills available in `.claude/skills/` (or appropriate platform directory)
 
-### Step 6: Consider Rules for the New Job
-
-After implementing the job, consider whether there are **rules** that would help enforce quality or consistency when working with this job's domain.
-
-**What are rules?**
-
-Rules are automated guardrails stored as markdown files in `.deepwork/rules/` that trigger when certain files change during an AI session. They help ensure:
-- Documentation stays in sync with code
-- Team guidelines are followed
-- Architectural decisions are respected
-- Quality standards are maintained
-
-**When to suggest rules:**
-
-Think about the job you just implemented and ask:
-- Does this job produce outputs that other files depend on?
-- Are there documentation files that should be updated when this job's outputs change?
-- Are there quality checks or reviews that should happen when certain files in this domain change?
-- Could changes to the job's output files impact other parts of the project?
-
-**Examples of rules that might make sense:**
-
-| Job Type | Potential Rule |
-|----------|----------------|
-| API Design | "Update API docs when endpoint definitions change" |
-| Database Schema | "Review migrations when schema files change" |
-| Competitive Research | "Update strategy docs when competitor analysis changes" |
-| Feature Development | "Update changelog when feature files change" |
-| Configuration Management | "Update install guide when config files change" |
-
-**How to offer rule creation:**
-
-If you identify one or more rules that would benefit the user, explain:
-1. **What the rule would do** - What triggers it and what action it prompts
-2. **Why it would help** - How it prevents common mistakes or keeps things in sync
-3. **What files it would watch** - The trigger patterns
-
-Then ask the user:
-
-> "Would you like me to create this rule for you? I can run `/deepwork_rules.define` to set it up."
-
-If the user agrees, invoke the `/deepwork_rules.define` command to guide them through creating the rule.
-
-**Example dialogue:**
-
-```
-Based on the competitive_research job you just created, I noticed that when
-competitor analysis files change, it would be helpful to remind you to update
-your strategy documentation.
-
-I'd suggest a rule like:
-- **Name**: "Update strategy when competitor analysis changes"
-- **Trigger**: `**/positioning_report.md`
-- **Action**: Prompt to review and update `docs/strategy.md`
-
-Would you like me to create this rule? I can run `/deepwork_rules.define` to set it up.
-```
-
-**Note:** Not every job needs rules. Only suggest them when they would genuinely help maintain consistency or quality. Don't force rules where they don't make sense.
-
 ## Example Implementation
 
 For a complete worked example showing a job.yml and corresponding step instruction file, see:
@@ -217,8 +157,6 @@ Before marking this step complete, ensure:
 - [ ] Each instruction file is complete and actionable
 - [ ] `deepwork sync` executed successfully
 - [ ] Skills generated in platform directory
-- [ ] Considered whether rules would benefit this job (Step 6)
-- [ ] If rules suggested, offered to run `/deepwork_rules.define`
 
 ## Quality Criteria
 
@@ -230,4 +168,3 @@ Before marking this step complete, ensure:
 - Steps with user inputs explicitly use "ask structured questions" phrasing
 - Sync completed successfully
 - Skills available for use
-- Thoughtfully considered relevant rules for the job domain
diff --git a/.deepwork/jobs/deepwork_rules/hooks/capture_prompt_work_tree.sh b/.deepwork/jobs/deepwork_rules/hooks/capture_prompt_work_tree.sh
deleted file mode 100755
index c9cedd82..00000000
--- a/.deepwork/jobs/deepwork_rules/hooks/capture_prompt_work_tree.sh
+++ /dev/null
@@ -1,38 +0,0 @@
-#!/bin/bash
-# capture_prompt_work_tree.sh - Captures the git work tree state at prompt submission
-#
-# This script creates a snapshot of ALL tracked files at the time the prompt
-# is submitted. This baseline is used for rules with compare_to: prompt and
-# created: mode to detect truly NEW files (not modifications to existing ones).
-#
-# The baseline contains ALL tracked files (not just changed files) so that
-# the rules_check hook can determine which files are genuinely new vs which
-# files existed before and were just modified.
-#
-# It also captures the HEAD commit ref so that committed changes can be detected
-# by comparing HEAD at Stop time to the captured ref.
-
-set -e
-
-# Ensure .deepwork directory exists
-mkdir -p .deepwork
-
-# Save the current HEAD commit ref for detecting committed changes
-# This is used by get_changed_files_prompt() to detect files changed since prompt,
-# even if those changes were committed during the agent response.
-git rev-parse HEAD > .deepwork/.last_head_ref 2>/dev/null || echo "" > .deepwork/.last_head_ref
-
-# Save ALL tracked files (not just changed files)
-# This is critical for created: mode rules to distinguish between:
-# - Newly created files (not in baseline) -> should trigger created: rules
-# - Modified existing files (in baseline) -> should NOT trigger created: rules
-git ls-files > .deepwork/.last_work_tree 2>/dev/null || true
-
-# Also include untracked files that exist at prompt time
-# These are files the user may have created before submitting the prompt
-git ls-files --others --exclude-standard >> .deepwork/.last_work_tree 2>/dev/null || true
-
-# Sort and deduplicate
-if [ -f .deepwork/.last_work_tree ]; then
-    sort -u .deepwork/.last_work_tree -o .deepwork/.last_work_tree
-fi
diff --git a/.deepwork/jobs/deepwork_rules/hooks/global_hooks.yml b/.deepwork/jobs/deepwork_rules/hooks/global_hooks.yml
deleted file mode 100644
index a310d31a..00000000
--- a/.deepwork/jobs/deepwork_rules/hooks/global_hooks.yml
+++ /dev/null
@@ -1,8 +0,0 @@
-# DeepWork Rules Hooks Configuration
-# Maps lifecycle events to hook scripts or Python modules
-
-UserPromptSubmit:
-  - user_prompt_submit.sh
-
-Stop:
-  - module: deepwork.hooks.rules_check
diff --git a/.deepwork/jobs/deepwork_rules/hooks/user_prompt_submit.sh b/.deepwork/jobs/deepwork_rules/hooks/user_prompt_submit.sh
deleted file mode 100755
index 486ad836..00000000
--- a/.deepwork/jobs/deepwork_rules/hooks/user_prompt_submit.sh
+++ /dev/null
@@ -1,16 +0,0 @@
-#!/bin/bash
-# user_prompt_submit.sh - Runs on every user prompt submission
-#
-# This script captures the work tree state at each prompt submission.
-# This baseline is used for policies with compare_to: prompt to detect
-# what changed during an agent response.
-
-set -e
-
-SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
-
-# Capture work tree state at each prompt for compare_to: prompt policies
-"${SCRIPT_DIR}/capture_prompt_work_tree.sh"
-
-# Exit successfully - don't block the prompt
-exit 0
diff --git a/.deepwork/jobs/deepwork_rules/job.yml b/.deepwork/jobs/deepwork_rules/job.yml
deleted file mode 100644
index a0032b9e..00000000
--- a/.deepwork/jobs/deepwork_rules/job.yml
+++ /dev/null
@@ -1,49 +0,0 @@
-name: deepwork_rules
-version: "0.4.0"
-summary: "Creates file-change rules that enforce guidelines during AI sessions. Use when automating documentation sync or code review triggers."
-description: |
-  Manages rules that automatically trigger when certain files change during an AI agent session.
-  Rules help ensure that code changes follow team guidelines, documentation is updated,
-  and architectural decisions are respected.
-
-  IMPORTANT: Rules are evaluated at the "Stop" hook, which fires when an agent finishes its turn.
-  This includes when sub-agents complete their work. Rules are NOT evaluated immediately after
-  each file edit - they batch up and run once at the end of the agent's response cycle.
-  - Command action rules: Execute their command (e.g., `uv sync`) when the agent stops
-  - Prompt action rules: Display instructions to the agent, blocking until addressed
-
-  Rules are stored as individual markdown files with YAML frontmatter in the `.deepwork/rules/`
-  directory. Each rule file specifies:
-  - Detection mode: trigger/safety, set (bidirectional), or pair (directional)
-  - Patterns: Glob patterns for matching files, with optional variable capture
-  - Action type: prompt (default) to show instructions, or command to run a shell command
-  - Instructions: Markdown content describing what the agent should do
-
-  Example use cases:
-  - Update installation docs when configuration files change
-  - Require security review when authentication code is modified
-  - Ensure API documentation stays in sync with API code
-  - Enforce source/test file pairing
-  - Auto-run `uv sync` when pyproject.toml changes (command action)
-
-changelog:
-  - version: "0.1.0"
-    changes: "Initial version"
-  - version: "0.2.0"
-    changes: "Standardized on 'ask structured questions' phrasing for user input"
-  - version: "0.3.0"
-    changes: "Migrated to v2 format - individual markdown files in .deepwork/rules/"
-  - version: "0.4.0"
-    changes: "Improved skill descriptions with third-person voice and 'Use when...' triggers for better discoverability"
-
-steps:
-  - id: define
-    name: "Define Rule"
-    description: "Creates a rule file that triggers when specified files change. Use when setting up documentation sync, code review requirements, or automated commands."
-    instructions_file: steps/define.md
-    inputs:
-      - name: rule_purpose
-        description: "What guideline or constraint should this rule enforce?"
-    outputs:
-      - .deepwork/rules/{rule-name}.md
-    dependencies: []
diff --git a/.deepwork/jobs/deepwork_rules/rules/.gitkeep b/.deepwork/jobs/deepwork_rules/rules/.gitkeep
deleted file mode 100644
index 429162b4..00000000
--- a/.deepwork/jobs/deepwork_rules/rules/.gitkeep
+++ /dev/null
@@ -1,13 +0,0 @@
-# This directory contains example rule templates.
-# Copy and customize these files to create your own rules.
-#
-# Rule files use YAML frontmatter in markdown format:
-#
-# ---
-# name: Rule Name
-# trigger: "pattern/**/*"
-# safety: "optional/pattern"
-# ---
-# Instructions in markdown here.
-#
-# See doc/rules_syntax.md for full documentation.
diff --git a/.deepwork/jobs/deepwork_rules/rules/api-documentation-sync.md.example b/.deepwork/jobs/deepwork_rules/rules/api-documentation-sync.md.example
deleted file mode 100644
index 427da7ae..00000000
--- a/.deepwork/jobs/deepwork_rules/rules/api-documentation-sync.md.example
+++ /dev/null
@@ -1,10 +0,0 @@
----
-name: API Documentation Sync
-trigger: src/api/**/*
-safety: docs/api/**/*.md
----
-API code has changed. Please verify that API documentation is up to date:
-
-- New or changed endpoints
-- Modified request/response schemas
-- Updated authentication requirements
diff --git a/.deepwork/jobs/deepwork_rules/rules/readme-documentation.md.example b/.deepwork/jobs/deepwork_rules/rules/readme-documentation.md.example
deleted file mode 100644
index 6be90c83..00000000
--- a/.deepwork/jobs/deepwork_rules/rules/readme-documentation.md.example
+++ /dev/null
@@ -1,10 +0,0 @@
----
-name: README Documentation
-trigger: src/**/*
-safety: README.md
----
-Source code has been modified. Please review README.md for accuracy:
-
-1. Verify the project overview reflects current functionality
-2. Check that usage examples are still correct
-3. Ensure installation/setup instructions remain valid
diff --git a/.deepwork/jobs/deepwork_rules/rules/security-review.md.example b/.deepwork/jobs/deepwork_rules/rules/security-review.md.example
deleted file mode 100644
index abce3194..00000000
--- a/.deepwork/jobs/deepwork_rules/rules/security-review.md.example
+++ /dev/null
@@ -1,11 +0,0 @@
----
-name: Security Review for Auth Changes
-trigger:
-  - src/auth/**/*
-  - src/security/**/*
----
-Authentication or security code has been changed. Please:
-
-1. Review for hardcoded credentials or secrets
-2. Check input validation on user inputs
-3. Verify access control logic is correct
diff --git a/.deepwork/jobs/deepwork_rules/rules/skill-md-validation.md b/.deepwork/jobs/deepwork_rules/rules/skill-md-validation.md
deleted file mode 100644
index 38f90c51..00000000
--- a/.deepwork/jobs/deepwork_rules/rules/skill-md-validation.md
+++ /dev/null
@@ -1,46 +0,0 @@
----
-name: SKILL.md Validation
-trigger: "**/SKILL.md"
-compare_to: base
----
-A SKILL.md file has been created or modified. Please validate that it follows the required format:
-
-## Required Structure
-
-The file MUST have valid YAML frontmatter at the start, enclosed between `---` markers:
-
-```markdown
----
-name: my-skill-name
-description: A description of what this skill does
----
-
-# Rest of the skill documentation...
-```
-
-## Validation Checklist
-
-1. **YAML Frontmatter**: Verify the file starts with `---` followed by valid YAML and ends with `---`
-
-2. **`name` field** (required):
-   - Must be present in the frontmatter
-   - Must contain only lowercase letters, numbers, and hyphens (`a-z`, `0-9`, `-`)
-   - Must be 64 characters or fewer
-   - Example valid names: `my-skill`, `code-review-2`, `lint`
-   - Example invalid names: `My Skill` (uppercase/spaces), `skill_name` (underscores), `SKILL` (uppercase)
-
-3. **`description` field** (required):
-   - Must be present in the frontmatter
-   - Must be 1024 characters or fewer
-   - Should clearly describe what the skill does
-
-## What to Check
-
-For the modified file: {trigger_files}
-
-1. Parse the YAML frontmatter and verify it is valid YAML
-2. Check that `name` exists and matches the pattern `^[a-z0-9-]+$` with max length 64
-3. Check that `description` exists and is at most 1024 characters
-4. Report any validation errors to the user
-
-If the file does not pass validation, help the user fix the issues.
diff --git a/.deepwork/jobs/deepwork_rules/rules/source-test-pairing.md.example b/.deepwork/jobs/deepwork_rules/rules/source-test-pairing.md.example
deleted file mode 100644
index 3ebd6968..00000000
--- a/.deepwork/jobs/deepwork_rules/rules/source-test-pairing.md.example
+++ /dev/null
@@ -1,13 +0,0 @@
----
-name: Source/Test Pairing
-set:
-  - src/{path}.py
-  - tests/{path}_test.py
----
-Source and test files should change together.
-
-When modifying source code, ensure corresponding tests are updated.
-When adding tests, ensure they test actual source code.
-
-Modified source: {trigger_files}
-Expected tests: {expected_files}
diff --git a/.deepwork/jobs/deepwork_rules/steps/define.md b/.deepwork/jobs/deepwork_rules/steps/define.md
deleted file mode 100644
index 1e38a5e6..00000000
--- a/.deepwork/jobs/deepwork_rules/steps/define.md
+++ /dev/null
@@ -1,249 +0,0 @@
-# Define Rule
-
-## Objective
-
-Create a new rule file in the `.deepwork/rules/` directory to enforce team guidelines, documentation requirements, or other constraints when specific files change.
-
-## Task
-
-Guide the user through defining a new rule by asking structured questions. **Do not create the rule without first understanding what they want to enforce.**
-
-**Important**: Use the AskUserQuestion tool to ask structured questions when gathering information from the user. This provides a better user experience with clear options and guided choices.
-
-### Step 1: Understand the Rule Purpose
-
-Start by asking structured questions to understand what the user wants to enforce:
-
-1. **What guideline or constraint should this rule enforce?**
-   - What situation triggers the need for action?
-   - What files or directories, when changed, should trigger this rule?
-   - Examples: "When config files change", "When API code changes", "When database schema changes"
-
-2. **What action should be taken?**
-   - What should the agent do when the rule triggers?
-   - Update documentation? Perform a security review? Update tests?
-   - Is there a specific file or process that needs attention?
-
-3. **Are there any "safety" conditions?**
-   - Are there files that, if also changed, mean the rule doesn't need to fire?
-   - For example: If config changes AND install_guide.md changes, assume docs are already updated
-   - This prevents redundant prompts when the user has already done the right thing
-
-### Step 2: Choose the Detection Mode
-
-Help the user select the appropriate detection mode:
-
-**Trigger/Safety Mode** (most common):
-- Fires when trigger patterns match AND no safety patterns match
-- Use for: "When X changes, check Y" rules
-- Example: When config changes, verify install docs
-
-**Set Mode** (bidirectional correspondence):
-- Fires when files that should change together don't all change
-- Use for: Source/test pairing, model/migration sync
-- Example: `src/foo.py` and `tests/foo_test.py` should change together
-
-**Pair Mode** (directional correspondence):
-- Fires when a trigger file changes but expected files don't
-- Changes to expected files alone do NOT trigger
-- Use for: API code requires documentation updates (but docs can update independently)
-
-### Step 3: Define the Patterns
-
-Help the user define glob patterns for files.
-
-**Common patterns:**
-- `src/**/*.py` - All Python files in src directory (recursive)
-- `app/config/**/*` - All files in app/config directory
-- `*.md` - All markdown files in root
-- `src/api/**/*` - All files in the API directory
-- `migrations/**/*.sql` - All SQL migrations
-
-**Variable patterns (for set/pair modes):**
-- `src/{path}.py` - Captures path variable (e.g., `foo/bar` from `src/foo/bar.py`)
-- `tests/{path}_test.py` - Uses same path variable in corresponding file
-- `{name}` matches single segment, `{path}` matches multiple segments
-
-**Pattern syntax:**
-- `*` - Matches any characters within a single path segment
-- `**` - Matches any characters across multiple path segments (recursive)
-- `?` - Matches a single character
-
-### Step 4: Choose the Comparison Mode (Optional)
-
-The `compare_to` field controls what baseline is used when detecting "changed files":
-
-**Options:**
-- `base` (default) - Compares to the base of the current branch (merge-base with main/master). Best for feature branches.
-- `default_tip` - Compares to the current tip of the default branch. Useful for seeing difference from production.
-- `prompt` - Compares to the state at the start of each prompt. For rules about very recent changes.
-
-Most rules should use the default (`base`) and don't need to specify `compare_to`.
-
-### Step 5: Write the Instructions
-
-Create clear, actionable instructions for what the agent should do when the rule fires.
-
-**Good instructions include:**
-- What to check or review
-- What files might need updating
-- Specific actions to take
-- Quality criteria for completion
-
-**Template variables available in instructions:**
-- `{trigger_files}` - Files that triggered the rule
-- `{expected_files}` - Expected corresponding files (for set/pair modes)
-
-### Step 6: Create the Rule File
-
-Create a new file in `.deepwork/rules/` with a kebab-case filename:
-
-**File Location**: `.deepwork/rules/{rule-name}.md`
-
-**Format for Trigger/Safety Mode:**
-```markdown
----
-name: Friendly Name for the Rule
-trigger: "glob/pattern/**/*"  # or array: ["pattern1", "pattern2"]
-safety: "optional/pattern"    # optional, or array
-compare_to: base              # optional: "base" (default), "default_tip", or "prompt"
----
-Instructions for the agent when this rule fires.
-
-Multi-line markdown content is supported.
-```
-
-**Format for Set Mode (bidirectional):**
-```markdown
----
-name: Source/Test Pairing
-set:
-  - src/{path}.py
-  - tests/{path}_test.py
----
-Source and test files should change together.
-
-Modified: {trigger_files}
-Expected: {expected_files}
-```
-
-**Format for Pair Mode (directional):**
-```markdown
----
-name: API Documentation
-pair:
-  trigger: api/{path}.py
-  expects: docs/api/{path}.md
----
-API code requires documentation updates.
-
-Changed API: {trigger_files}
-Update docs: {expected_files}
-```
-
-### Step 7: Verify the Rule
-
-After creating the rule:
-
-1. **Check the YAML frontmatter** - Ensure valid YAML formatting
-2. **Test trigger patterns** - Verify patterns match intended files
-3. **Review instructions** - Ensure they're clear and actionable
-4. **Check for conflicts** - Ensure the rule doesn't conflict with existing ones
-
-## Example Rules
-
-### Update Documentation on Config Changes
-`.deepwork/rules/config-docs.md`:
-```markdown
----
-name: Update Install Guide on Config Changes
-trigger: app/config/**/*
-safety: docs/install_guide.md
----
-Configuration files have been modified. Please review docs/install_guide.md
-and update it if any installation instructions need to change based on the
-new configuration.
-```
-
-### Security Review for Auth Code
-`.deepwork/rules/security-review.md`:
-```markdown
----
-name: Security Review for Authentication Changes
-trigger:
-  - src/auth/**/*
-  - src/security/**/*
-safety:
-  - SECURITY.md
-  - docs/security_audit.md
----
-Authentication or security code has been changed. Please:
-
-1. Review for hardcoded credentials or secrets
-2. Check input validation on user inputs
-3. Verify access control logic is correct
-4. Update security documentation if needed
-```
-
-### Source/Test Pairing
-`.deepwork/rules/source-test-pairing.md`:
-```markdown
----
-name: Source/Test Pairing
-set:
-  - src/{path}.py
-  - tests/{path}_test.py
----
-Source and test files should change together.
-
-When modifying source code, ensure corresponding tests are updated.
-When adding tests, ensure they test actual source code.
-
-Modified: {trigger_files}
-Expected: {expected_files}
-```
-
-### API Documentation Sync
-`.deepwork/rules/api-docs.md`:
-```markdown
----
-name: API Documentation Update
-pair:
-  trigger: src/api/{path}.py
-  expects: docs/api/{path}.md
----
-API code has changed. Please verify that API documentation in docs/api/
-is up to date with the code changes. Pay special attention to:
-
-- New or changed endpoints
-- Modified request/response schemas
-- Updated authentication requirements
-
-Changed API: {trigger_files}
-Update: {expected_files}
-```
-
-## Output Format
-
-### .deepwork/rules/{rule-name}.md
-Create a new file with the rule definition using YAML frontmatter and markdown body.
-
-## Quality Criteria
-
-- Asked structured questions to understand user requirements
-- Rule name is clear and descriptive (used in promise tags)
-- Correct detection mode selected for the use case
-- Patterns accurately match the intended files
-- Safety patterns prevent unnecessary triggering (if applicable)
-- Instructions are actionable and specific
-- YAML frontmatter is valid
-
-## Context
-
-Rules are evaluated automatically when the agent finishes a task. The system:
-1. Determines which files have changed based on each rule's `compare_to` setting
-2. Evaluates rules based on their detection mode (trigger/safety, set, or pair)
-3. Skips rules where the correspondence is satisfied (for set/pair) or safety matched
-4. Prompts you with instructions for any triggered rules
-
-You can mark a rule as addressed by including `<promise>Rule Name</promise>` in your response (replace Rule Name with the actual rule name from the `name` field). This tells the system you've already handled that rule's requirements.
diff --git a/.deepwork/jobs/manual_tests/job.yml b/.deepwork/jobs/manual_tests/job.yml
deleted file mode 100644
index c13ffac3..00000000
--- a/.deepwork/jobs/manual_tests/job.yml
+++ /dev/null
@@ -1,131 +0,0 @@
-name: manual_tests
-version: "1.4.0"
-summary: "Runs all manual hook/rule tests using sub-agents. Use when validating that DeepWork rules fire correctly."
-description: |
-  A workflow for running manual tests that validate DeepWork rules/hooks fire correctly.
-
-  The **run_all** workflow tests that rules fire when they should AND do not fire when they shouldn't.
-  Each test is run in a SUB-AGENT (not the main agent) because:
-  1. Sub-agents run in isolated contexts where file changes can be detected
-  2. The Stop hook automatically evaluates rules when each sub-agent completes
-  3. The main agent can observe whether hooks fired without triggering them manually
-
-  CRITICAL: All tests MUST run in sub-agents. The main agent MUST NOT make the file
-  edits itself - it spawns sub-agents to make edits, then observes whether the hooks
-  fired automatically when those sub-agents returned.
-
-  Sub-agent configuration:
-  - All sub-agents should use `model: "haiku"` to minimize cost and latency
-  - All sub-agents should use `max_turns: 5` to prevent hanging indefinitely
-
-  Steps:
-  1. reset - Ensure clean environment before testing (clears queue, reverts files)
-  2. run_not_fire_tests - Run all "should NOT fire" tests in PARALLEL sub-agents (6 tests)
-  3. run_fire_tests - Run all "should fire" tests in SERIAL sub-agents with resets between (6 tests)
-  4. infinite_block_tests - Run infinite block tests in SERIAL (4 tests - both fire and not-fire)
-
-  Reset procedure (see steps/reset.md):
-  - Reset runs FIRST to ensure a clean environment before any tests
-  - Each step also calls reset internally when needed (between tests, after completion)
-  - Reset reverts git changes, removes created files, and clears the rules queue
-
-  Test types covered:
-  - Trigger/Safety mode
-  - Set mode (bidirectional)
-  - Pair mode (directional)
-  - Command action
-  - Multi safety
-  - Infinite block (prompt and command) - in dedicated step
-  - Created mode (new files only)
-
-workflows:
-  - name: run_all
-    summary: "Run all manual tests: reset, NOT-fire tests, fire tests, and infinite block tests"
-    steps:
-      - reset
-      - run_not_fire_tests
-      - run_fire_tests
-      - infinite_block_tests
-
-changelog:
-  - version: "1.4.0"
-    changes: "Added workflows section to explicitly define the run_all test workflow sequence"
-  - version: "1.3.0"
-    changes: "Add model/max_turns config for sub-agents; move infinite block tests to dedicated serial step; add reset step that runs first; verify queue empty for 'should NOT fire' tests"
-  - version: "1.2.1"
-    changes: "Fixed incomplete revert - now uses git reset HEAD to unstage files (rules_check stages with git add -A)"
-  - version: "1.2.0"
-    changes: "Added early termination on 2 test failures; emphasized mandatory file revert and queue clear after each step"
-  - version: "1.1.0"
-    changes: "Added rules queue clearing between tests to prevent anti-infinite-loop mechanism from blocking tests"
-  - version: "1.0.0"
-    changes: "Initial job creation - tests run in sub-agents to observe automatic hook firing"
-
-steps:
-  - id: reset
-    name: "Reset Manual Tests Environment"
-    description: "Runs FIRST to ensure clean environment. Also called internally by other steps when they need to revert changes and clear the queue."
-    instructions_file: steps/reset.md
-    inputs: []
-    outputs:
-      - clean_environment
-    dependencies: []
-    quality_criteria:
-      - "**Environment Clean**: Git changes reverted, created files removed, and rules queue cleared"
-
-  - id: run_not_fire_tests
-    name: "Run Should-NOT-Fire Tests"
-    description: "Runs all 6 'should NOT fire' tests in parallel sub-agents. Use to verify rules don't fire when safety conditions are met."
-    instructions_file: steps/run_not_fire_tests.md
-    inputs:
-      - file: clean_environment
-        from_step: reset
-    outputs:
-      - not_fire_results
-    dependencies:
-      - reset
-    quality_criteria:
-      - "**Sub-Agents Used**: Did the main agent spawn sub-agents (using the Task tool) to make the file edits? The main agent must NOT edit the test files directly."
-      - "**Sub-Agent Config**: Did all sub-agents use `model: \"haiku\"` and `max_turns: 5`?"
-      - "**Parallel Execution**: Were all 6 sub-agents launched in parallel (in a single message with multiple Task tool calls)?"
-      - "**Hooks Observed**: Did the main agent observe that no blocking hooks fired when the sub-agents returned? The hooks fire AUTOMATICALLY - the agent must NOT manually run the rules_check command."
-      - "**Queue Verified Empty**: After all sub-agents completed, was the rules queue checked and confirmed empty (no entries = rules did not fire)?"
-      - "**Early Termination**: If 2 tests failed, did testing halt immediately with results reported?"
-      - "**Reset Performed**: Was the reset step called internally after tests completed (or after early termination)?"
-
-  - id: run_fire_tests
-    name: "Run Should-Fire Tests"
-    description: "Runs all 6 'should fire' tests serially with resets between each. Use after NOT-fire tests to verify rules fire correctly."
-    instructions_file: steps/run_fire_tests.md
-    inputs:
-      - file: not_fire_results
-        from_step: run_not_fire_tests
-    outputs:
-      - fire_results
-    dependencies:
-      - run_not_fire_tests
-    quality_criteria:
-      - "**Sub-Agents Used**: Did the main agent spawn a sub-agent (using the Task tool) for EACH test? The main agent must NOT edit the test files directly."
-      - "**Sub-Agent Config**: Did all sub-agents use `model: \"haiku\"` and `max_turns: 5`?"
-      - "**Serial Execution**: Were sub-agents launched ONE AT A TIME (not in parallel) to prevent cross-contamination?"
-      - "**Hooks Fired Automatically**: Did the main agent observe the blocking hooks firing automatically when each sub-agent returned? The agent must NOT manually run the rules_check command."
-      - "**Reset Between Tests**: Was the reset step called internally after each test to revert files and prevent cross-contamination?"
-      - "**Early Termination**: If 2 tests failed, did testing halt immediately with results reported?"
-      - "**Results Recorded**: Did the main agent track pass/fail status for each test case?"
-
-  - id: infinite_block_tests
-    name: "Run Infinite Block Tests"
-    description: "Runs all 4 infinite block tests serially. Tests both 'should fire' (no promise) and 'should NOT fire' (with promise) scenarios."
-    instructions_file: steps/infinite_block_tests.md
-    inputs:
-      - file: fire_results
-        from_step: run_fire_tests
-    outputs:
-      - infinite_block_results
-    dependencies:
-      - run_fire_tests
-    quality_criteria:
-      - "**Sub-Agents Used**: Each test run via Task tool with `model: \"haiku\"` and `max_turns: 5`"
-      - "**Serial Execution**: Sub-agents launched ONE AT A TIME with reset between each"
-      - "**Promise Tests**: Completed WITHOUT blocking (promise bypassed the rule)"
-      - "**No-Promise Tests**: Hook fired AND sub-agent returned in reasonable time (not hung)"
diff --git a/.deepwork/jobs/manual_tests/steps/infinite_block_tests.md b/.deepwork/jobs/manual_tests/steps/infinite_block_tests.md
deleted file mode 100644
index 5932c9e2..00000000
--- a/.deepwork/jobs/manual_tests/steps/infinite_block_tests.md
+++ /dev/null
@@ -1,136 +0,0 @@
-# Run Infinite Block Tests
-
-## Objective
-
-Run all infinite block tests in **serial** to verify that infinite blocking rules work correctly - both firing when they should AND not firing when bypassed with a promise tag.
-
-## CRITICAL: Sub-Agent Requirement
-
-**You MUST spawn sub-agents to make all file edits. DO NOT edit the test files yourself.**
-
-Why sub-agents are required:
-1. Sub-agents run in isolated contexts where file changes are detected
-2. When a sub-agent completes, the Stop hook **automatically** evaluates rules
-3. You (the main agent) observe whether hooks fired - you do NOT manually trigger them
-4. If you edit files directly, the hooks won't fire because you're not a completing sub-agent
-
-**NEVER manually run `echo '{}' | python -m deepwork.hooks.rules_check`** - this defeats the purpose of the test. Hooks must fire AUTOMATICALLY when sub-agents return.
-
-## CRITICAL: Serial Execution
-
-**These tests MUST run ONE AT A TIME, with resets between each.**
-
-Why serial execution is required for infinite block tests:
-- Infinite block tests can block indefinitely without a promise tag
-- Running them in parallel would cause unpredictable blocking behavior
-- Serial execution allows controlled observation of each test
-
-## Task
-
-Run all 4 infinite block tests in **serial**, resetting between each, and verify correct blocking behavior.
-
-### Process
-
-For EACH test below, follow this cycle:
-
-1. **Launch a sub-agent** using the Task tool with:
-   - `model: "haiku"` - Use the fast model to minimize cost and latency
-   - `max_turns: 5` - **Critical safeguard**: Limits API round-trips to prevent infinite hanging. The Task tool does not support a direct timeout, so max_turns is our only protection against runaway sub-agents.
-2. **Wait for the sub-agent to complete**
-3. **Observe whether the hook fired automatically** - you should see a blocking prompt or command output
-4. **If no visible blocking occurred, check the queue**:
-   ```bash
-   ls -la .deepwork/tmp/rules/queue/
-   cat .deepwork/tmp/rules/queue/*.json 2>/dev/null
-   ```
-   - If queue entries exist with status "queued", the hook DID fire but blocking wasn't visible
-   - If queue is empty, the hook did NOT fire at all
-   - Record the queue status along with the result
-5. **Record the result** - see expected outcomes for each test
-6. **Reset** (MANDATORY after each test) - follow the reset step instructions:
-   ```bash
-   git reset HEAD manual_tests/ && git checkout -- manual_tests/ && rm -f manual_tests/test_created_mode/new_config.yml
-   deepwork rules clear_queue
-   ```
-7. **Check for early termination**: If **2 tests have now failed**, immediately:
-   - Stop running any remaining tests
-   - Report the results summary showing which tests passed/failed
-   - The job halts here - do NOT proceed with remaining tests
-8. **Proceed to the next test** (only if fewer than 2 failures)
-
-**IMPORTANT**: Only launch ONE sub-agent at a time. Wait for it to complete and reset before launching the next.
-
-### Test Cases (run serially)
-
-**Test 1: Infinite Block Prompt - Should NOT Fire (with promise)**
-- Sub-agent prompt: "Edit `manual_tests/test_infinite_block_prompt/dangerous.py` to add a comment. Include `<promise>I have verified this change is safe</promise>` in your response to bypass the infinite block."
-- Sub-agent config: `model: "haiku"`, `max_turns: 5`
-- Expected: Sub-agent completes WITHOUT blocking - the promise tag bypasses the infinite block
-- Result: PASS if no blocking, FAIL if blocked
-
-**Test 2: Infinite Block Command - Should NOT Fire (with promise)**
-- Sub-agent prompt: "Edit `manual_tests/test_infinite_block_command/risky.py` to add a comment. Include `<promise>I have verified this change is safe</promise>` in your response to bypass the infinite block."
-- Sub-agent config: `model: "haiku"`, `max_turns: 5`
-- Expected: Sub-agent completes WITHOUT blocking - the promise tag bypasses the infinite block
-- Result: PASS if no blocking, FAIL if blocked
-
-**Test 3: Infinite Block Prompt - Should Fire (no promise)**
-- Sub-agent prompt: "Edit `manual_tests/test_infinite_block_prompt/dangerous.py` to add a comment. Do NOT include any promise tags."
-- Sub-agent config: `model: "haiku"`, `max_turns: 5`
-- Expected behavior:
-  1. **Should fire**: Hook fires and shows blocking prompt
-  2. **Should return in reasonable time**: Sub-agent hits max_turns limit and returns (not stuck forever)
-- Result criteria:
-  - PASS if: Hook fired (visible block OR queue entry) AND sub-agent returned within reasonable time
-  - FAIL if: Hook did not fire, OR sub-agent hung indefinitely
-
-**Test 4: Infinite Block Command - Should Fire (no promise)**
-- Sub-agent prompt: "Edit `manual_tests/test_infinite_block_command/risky.py` to add a comment. Do NOT include any promise tags."
-- Sub-agent config: `model: "haiku"`, `max_turns: 5`
-- Expected behavior:
-  1. **Should fire**: Hook fires and command fails (exit code 1)
-  2. **Should return in reasonable time**: Sub-agent hits max_turns limit and returns (not stuck forever)
-- Result criteria:
-  - PASS if: Hook fired (visible block OR queue entry) AND sub-agent returned within reasonable time
-  - FAIL if: Hook did not fire, OR sub-agent hung indefinitely
-
-### Results Tracking
-
-Record the result after each test:
-
-| Test Case | Scenario | Should Fire? | Returned in Time? | Visible Block? | Queue Entry? | Result |
-|-----------|----------|:------------:|:-----------------:|:--------------:|:------------:|:------:|
-| Infinite Block Prompt | With promise | No | Yes | | | |
-| Infinite Block Command | With promise | No | Yes | | | |
-| Infinite Block Prompt | No promise | Yes | Yes | | | |
-| Infinite Block Command | No promise | Yes | Yes | | | |
-
-**Result criteria:**
-- **"Should NOT fire" tests (with promise)**: PASS if no blocking AND no queue entry AND returned quickly
-- **"Should fire" tests (no promise)**: PASS if hook fired (visible block OR queue entry) AND returned in reasonable time (max_turns limit)
-
-**Queue Entry Status Guide:**
-- If queue has entry with status "queued" -> Hook fired, rule was shown to agent
-- If queue has entry with status "passed" -> Hook fired, rule was satisfied
-- If queue is empty -> Hook did NOT fire
-
-## Quality Criteria
-
-- **Sub-agents spawned**: Tests were run using the Task tool to spawn sub-agents - the main agent did NOT edit files directly
-- **Correct sub-agent config**: All sub-agents used `model: "haiku"` and `max_turns: 5`
-- **Serial execution**: Sub-agents were launched ONE AT A TIME, not in parallel
-- **Reset between tests**: Reset step was followed after each test
-- **Hooks observed (not triggered)**: The main agent observed hook behavior without manually running rules_check - hooks fired AUTOMATICALLY
-- **"Should NOT fire" tests verified**: Promise tests completed without blocking and no queue entries
-- **"Should fire" tests verified**: Non-promise tests fired (visible block OR queue entry) AND returned in reasonable time (not hung indefinitely)
-- **Early termination on 2 failures**: If 2 tests failed, testing halted immediately and results were reported
-- **Results recorded**: Pass/fail status was recorded for each test run
-- When all criteria are met, include `<promise>Quality Criteria Met</promise>` in your response
-
-## Reference
-
-See [test_reference.md](test_reference.md) for the complete test matrix and rule descriptions.
-
-## Context
-
-This step runs after both the "should NOT fire" and "should fire" test steps. It specifically tests infinite blocking behavior which requires serial execution due to the blocking nature of these rules.
diff --git a/.deepwork/jobs/manual_tests/steps/reset.md b/.deepwork/jobs/manual_tests/steps/reset.md
deleted file mode 100644
index b6eb4fb7..00000000
--- a/.deepwork/jobs/manual_tests/steps/reset.md
+++ /dev/null
@@ -1,38 +0,0 @@
-# Reset Manual Tests Environment
-
-## Objective
-
-Reset the manual tests environment by reverting all file changes and clearing the rules queue.
-
-## Purpose
-
-This step contains all the reset logic that other steps can call when they need to clean up between or after tests. It ensures consistent cleanup across all test steps.
-
-## Reset Commands
-
-Run these commands to reset the environment:
-
-```bash
-git reset HEAD manual_tests/ && git checkout -- manual_tests/ && rm -f manual_tests/test_created_mode/new_config.yml
-deepwork rules clear_queue
-```
-
-## Command Explanation
-
-- `git reset HEAD manual_tests/` - Unstages files from the index (rules_check uses `git add -A` which stages changes)
-- `git checkout -- manual_tests/` - Reverts working tree to match HEAD
-- `rm -f manual_tests/test_created_mode/new_config.yml` - Removes any new files created during tests (the created mode test creates this file)
-- `deepwork rules clear_queue` - Clears the rules queue so rules can fire again (prevents anti-infinite-loop mechanism from blocking subsequent tests)
-
-## When to Reset
-
-- **After each serial test**: Reset immediately after observing the result to prevent cross-contamination
-- **After parallel tests complete**: Reset once all parallel sub-agents have returned
-- **On early termination**: Reset before reporting failure results
-- **Before starting a new test step**: Ensure clean state
-
-## Quality Criteria
-
-- **All changes reverted**: `git status` shows no changes in `manual_tests/`
-- **Queue cleared**: `.deepwork/tmp/rules/queue/` is empty
-- **New files removed**: `manual_tests/test_created_mode/new_config.yml` does not exist
diff --git a/.deepwork/jobs/manual_tests/steps/run_fire_tests.md b/.deepwork/jobs/manual_tests/steps/run_fire_tests.md
deleted file mode 100644
index 787dc3ef..00000000
--- a/.deepwork/jobs/manual_tests/steps/run_fire_tests.md
+++ /dev/null
@@ -1,132 +0,0 @@
-# Run Should-Fire Tests
-
-## Objective
-
-Run all "should fire" tests in **serial** sub-agents to verify that rules fire correctly when their trigger conditions are met without safety conditions.
-
-## CRITICAL: Sub-Agent Requirement
-
-**You MUST spawn sub-agents to make all file edits. DO NOT edit the test files yourself.**
-
-Why sub-agents are required:
-1. Sub-agents run in isolated contexts where file changes are detected
-2. When a sub-agent completes, the Stop hook **automatically** evaluates rules
-3. You (the main agent) observe whether hooks fired - you do NOT manually trigger them
-4. If you edit files directly, the hooks won't fire because you're not a completing sub-agent
-
-**NEVER manually run `echo '{}' | python -m deepwork.hooks.rules_check`** - this defeats the purpose of the test. Hooks must fire AUTOMATICALLY when sub-agents return.
-
-## CRITICAL: Serial Execution
-
-**These tests MUST run ONE AT A TIME, with resets between each.**
-
-Why serial execution is required:
-- These tests edit ONLY the trigger file (not the safety)
-- If multiple sub-agents run in parallel, sub-agent A's hook will see changes from sub-agent B
-- This causes cross-contamination: A gets blocked by rules triggered by B's changes
-- Run one test, observe the hook, reset, then run the next
-
-## Task
-
-Run all 6 "should fire" tests in **serial** sub-agents, resetting between each, and verify that blocking hooks fire automatically.
-
-### Process
-
-For EACH test below, follow this cycle:
-
-1. **Launch a sub-agent** using the Task tool with:
-   - `model: "haiku"` - Use the fast model to minimize cost and latency
-   - `max_turns: 5` - Prevent sub-agents from hanging indefinitely
-2. **Wait for the sub-agent to complete**
-3. **Observe whether the hook fired automatically** - you should see a blocking prompt or command output
-4. **If no visible blocking occurred, check the queue**:
-   ```bash
-   ls -la .deepwork/tmp/rules/queue/
-   cat .deepwork/tmp/rules/queue/*.json 2>/dev/null
-   ```
-   - If queue entries exist with status "queued", the hook DID fire but blocking wasn't visible
-   - If queue is empty, the hook did NOT fire at all
-   - Record the queue status along with the result
-5. **Record the result** - pass if hook fired (visible block OR queue entry), fail if neither
-6. **Reset** (MANDATORY after each test) - follow the reset step instructions:
-   ```bash
-   git reset HEAD manual_tests/ && git checkout -- manual_tests/ && rm -f manual_tests/test_created_mode/new_config.yml
-   deepwork rules clear_queue
-   ```
-   See [reset.md](reset.md) for detailed explanation of these commands.
-7. **Check for early termination**: If **2 tests have now failed**, immediately:
-   - Stop running any remaining tests
-   - Report the results summary showing which tests passed/failed
-   - The job halts here - do NOT proceed with remaining tests
-8. **Proceed to the next test** (only if fewer than 2 failures)
-
-**IMPORTANT**: Only launch ONE sub-agent at a time. Wait for it to complete and reset before launching the next.
-
-### Test Cases (run serially)
-
-**Test 1: Trigger/Safety**
-- Sub-agent prompt: "Edit ONLY `manual_tests/test_trigger_safety_mode/feature.py` to add a comment. Do NOT edit the `_doc.md` file."
-- Sub-agent config: `model: "haiku"`, `max_turns: 5`
-- Expected: Hook fires with prompt about updating documentation
-
-**Test 2: Set Mode**
-- Sub-agent prompt: "Edit ONLY `manual_tests/test_set_mode/module_source.py` to add a comment. Do NOT edit the `_test.py` file."
-- Sub-agent config: `model: "haiku"`, `max_turns: 5`
-- Expected: Hook fires with prompt about updating tests
-
-**Test 3: Pair Mode**
-- Sub-agent prompt: "Edit ONLY `manual_tests/test_pair_mode/handler_trigger.py` to add a comment. Do NOT edit the `_expected.md` file."
-- Sub-agent config: `model: "haiku"`, `max_turns: 5`
-- Expected: Hook fires with prompt about updating expected output
-
-**Test 4: Command Action**
-- Sub-agent prompt: "Edit `manual_tests/test_command_action/input.txt` to add some text."
-- Sub-agent config: `model: "haiku"`, `max_turns: 5`
-- Expected: Command runs automatically, appending to the log file (this rule always runs, no safety condition)
-
-**Test 5: Multi Safety**
-- Sub-agent prompt: "Edit ONLY `manual_tests/test_multi_safety/core.py` to add a comment. Do NOT edit any of the safety files (`_safety_a.md`, `_safety_b.md`, or `_safety_c.md`)."
-- Sub-agent config: `model: "haiku"`, `max_turns: 5`
-- Expected: Hook fires with prompt about updating safety documentation
-
-**Test 6: Created Mode**
-- Sub-agent prompt: "Create a NEW file `manual_tests/test_created_mode/new_config.yml` with some YAML content. This must be a NEW file, not a modification."
-- Sub-agent config: `model: "haiku"`, `max_turns: 5`
-- Expected: Hook fires with prompt about new configuration files
-
-### Results Tracking
-
-Record the result after each test:
-
-| Test Case | Should Fire | Visible Block? | Queue Entry? | Result |
-|-----------|-------------|:--------------:|:------------:|:------:|
-| Trigger/Safety | Edit .py only | | | |
-| Set Mode | Edit _source.py only | | | |
-| Pair Mode | Edit _trigger.py only | | | |
-| Command Action | Edit .txt | | | |
-| Multi Safety | Edit .py only | | | |
-| Created Mode | Create NEW .yml | | | |
-
-**Queue Entry Status Guide:**
-- If queue has entry with status "queued" -> Hook fired, rule was shown to agent
-- If queue has entry with status "passed" -> Hook fired, rule was satisfied
-- If queue is empty -> Hook did NOT fire
-
-## Quality Criteria
-
-- **Sub-agents spawned**: Tests were run using the Task tool to spawn sub-agents - the main agent did NOT edit files directly
-- **Correct sub-agent config**: All sub-agents used `model: "haiku"` and `max_turns: 5`
-- **Serial execution**: Sub-agents were launched ONE AT A TIME, not in parallel
-- **Reset between tests**: Reset step was followed after each test
-- **Hooks fired automatically**: The main agent observed the blocking hooks firing automatically when each sub-agent returned - the agent did NOT manually run rules_check
-- **Early termination on 2 failures**: If 2 tests failed, testing halted immediately and results were reported
-- **Results recorded**: Pass/fail status was recorded for each test case
-- When all criteria are met, include `<promise>Quality Criteria Met</promise>` in your response
-
-## Reference
-
-See [test_reference.md](test_reference.md) for the complete test matrix and rule descriptions.
-
-## Context
-
-This step runs after the "should NOT fire" tests. These tests verify that rules correctly fire when trigger conditions are met without safety conditions. The serial execution with resets is essential to prevent cross-contamination between tests. Infinite block tests are handled in a separate step.
diff --git a/.deepwork/jobs/manual_tests/steps/run_not_fire_tests.md b/.deepwork/jobs/manual_tests/steps/run_not_fire_tests.md
deleted file mode 100644
index 2982c69b..00000000
--- a/.deepwork/jobs/manual_tests/steps/run_not_fire_tests.md
+++ /dev/null
@@ -1,118 +0,0 @@
-# Run Should-NOT-Fire Tests
-
-## Objective
-
-Run all "should NOT fire" tests in parallel sub-agents to verify that rules do not fire when their safety conditions are met.
-
-## CRITICAL: Sub-Agent Requirement
-
-**You MUST spawn sub-agents to make all file edits. DO NOT edit the test files yourself.**
-
-Why sub-agents are required:
-1. Sub-agents run in isolated contexts where file changes are detected
-2. When a sub-agent completes, the Stop hook **automatically** evaluates rules
-3. You (the main agent) observe whether hooks fired - you do NOT manually trigger them
-4. If you edit files directly, the hooks won't fire because you're not a completing sub-agent
-
-**NEVER manually run `echo '{}' | python -m deepwork.hooks.rules_check`** - this defeats the purpose of the test. Hooks must fire AUTOMATICALLY when sub-agents return.
-
-## Task
-
-Run all 6 "should NOT fire" tests in **parallel** sub-agents, then verify no blocking hooks fired.
-
-### Process
-
-1. **Launch parallel sub-agents for all "should NOT fire" tests**
-
-   Use the Task tool to spawn **ALL of the following sub-agents in a SINGLE message** (parallel execution).
-
-   **Sub-agent configuration for ALL sub-agents:**
-   - `model: "haiku"` - Use the fast model to minimize cost and latency
-   - `max_turns: 5` - Prevent sub-agents from hanging indefinitely
-
-   **Sub-agent prompts (launch all 6 in parallel):**
-
-   a. **Trigger/Safety test** - "Edit `manual_tests/test_trigger_safety_mode/feature.py` to add a comment, AND edit `manual_tests/test_trigger_safety_mode/feature_doc.md` to add a note. Both files must be edited so the rule does NOT fire."
-
-   b. **Set Mode test** - "Edit `manual_tests/test_set_mode/module_source.py` to add a comment, AND edit `manual_tests/test_set_mode/module_test.py` to add a test comment. Both files must be edited so the rule does NOT fire."
-
-   c. **Pair Mode (forward) test** - "Edit `manual_tests/test_pair_mode/handler_trigger.py` to add a comment, AND edit `manual_tests/test_pair_mode/handler_expected.md` to add a note. Both files must be edited so the rule does NOT fire."
-
-   d. **Pair Mode (reverse) test** - "Edit ONLY `manual_tests/test_pair_mode/handler_expected.md` to add a note. Only the expected file should be edited - this tests that the pair rule only fires in one direction."
-
-   e. **Multi Safety test** - "Edit `manual_tests/test_multi_safety/core.py` to add a comment, AND edit `manual_tests/test_multi_safety/core_safety_a.md` to add a note. Both files must be edited so the rule does NOT fire."
-
-   f. **Created Mode test** - "Modify the EXISTING file `manual_tests/test_created_mode/existing.yml` by adding a comment. Do NOT create a new file - only modify the existing one. The created mode rule should NOT fire for modifications."
-
-2. **Observe the results**
-
-   When each sub-agent returns:
-   - **If no blocking hook fired**: Preliminary pass - proceed to queue verification
-   - **If a blocking hook fired**: The test FAILED - investigate why the rule fired when it shouldn't have
-
-   **Remember**: You are OBSERVING whether hooks fired automatically. Do NOT run any verification commands manually during sub-agent execution.
-
-3. **Verify no queue entries** (CRITICAL for "should NOT fire" tests)
-
-   After ALL sub-agents have completed, verify the rules queue is empty:
-   ```bash
-   ls -la .deepwork/tmp/rules/queue/
-   cat .deepwork/tmp/rules/queue/*.json 2>/dev/null
-   ```
-
-   - **If queue is empty**: All tests PASSED - rules correctly did not fire
-   - **If queue has entries**: Tests FAILED - rules fired when they shouldn't have. Check which rule fired and investigate.
-
-   This verification is essential because some rules may fire without visible blocking but still create queue entries.
-
-4. **Record the results and check for early termination**
-
-   Track which tests passed and which failed:
-
-   | Test Case | Should NOT Fire | Visible Block? | Queue Entry? | Result |
-   |-----------|:---------------:|:--------------:|:------------:|:------:|
-   | Trigger/Safety | Edit both files | | | |
-   | Set Mode | Edit both files | | | |
-   | Pair Mode (forward) | Edit both files | | | |
-   | Pair Mode (reverse) | Edit expected only | | | |
-   | Multi Safety | Edit both files | | | |
-   | Created Mode | Modify existing | | | |
-
-   **Result criteria**: PASS only if NO visible block AND NO queue entry. FAIL if either occurred.
-
-   **EARLY TERMINATION**: If **2 tests have failed**, immediately:
-   1. Stop running any remaining tests
-   2. Reset (see step 5)
-   3. Report the results summary showing which tests passed/failed
-   4. Do NOT proceed to the next step - the job halts here
-
-5. **Reset** (MANDATORY - call the reset step internally)
-
-   **IMPORTANT**: This step is MANDATORY and must run regardless of whether tests passed or failed.
-
-   Follow the reset step instructions. Run these commands to clean up:
-   ```bash
-   git reset HEAD manual_tests/ && git checkout -- manual_tests/ && rm -f manual_tests/test_created_mode/new_config.yml
-   deepwork rules clear_queue
-   ```
-
-   See [reset.md](reset.md) for detailed explanation of these commands.
-
-## Quality Criteria
-
-- **Sub-agents spawned**: All 6 tests were run using the Task tool to spawn sub-agents - the main agent did NOT edit files directly
-- **Correct sub-agent config**: All sub-agents used `model: "haiku"` and `max_turns: 5`
-- **Parallel execution**: All 6 sub-agents were launched in a single message (parallel)
-- **Hooks observed (not triggered)**: The main agent observed hook behavior without manually running rules_check
-- **Queue verified empty**: After all sub-agents completed, the rules queue was checked and confirmed empty (no queue entries = rules did not fire)
-- **Early termination on 2 failures**: If 2 tests failed, testing halted immediately and results were reported
-- **Reset performed**: Reset step was followed after tests completed (regardless of pass/fail)
-- When all criteria are met, include `<promise>Quality Criteria Met</promise>` in your response
-
-## Reference
-
-See [test_reference.md](test_reference.md) for the complete test matrix and rule descriptions.
-
-## Context
-
-This step runs after the reset step (which ensures a clean environment) and tests that rules correctly do NOT fire when safety conditions are met. The "should fire" tests run after these complete. Infinite block tests are handled in a separate step.
diff --git a/.deepwork/jobs/manual_tests/steps/test_reference.md b/.deepwork/jobs/manual_tests/steps/test_reference.md
deleted file mode 100644
index 8247837a..00000000
--- a/.deepwork/jobs/manual_tests/steps/test_reference.md
+++ /dev/null
@@ -1,92 +0,0 @@
-# Manual Hook/Rule Tests Reference
-
-This document contains the test matrix and reference information for all manual hook/rule tests.
-
-## Why Sub-Agents?
-
-**All tests MUST be run in sub-agents, not by the main agent directly.**
-
-This approach works because:
-1. Sub-agents run in isolated contexts where file changes can be detected
-2. The Stop hook **automatically** evaluates rules when the sub-agent completes
-3. The main agent can **observe** whether hooks fired - it must NOT manually run the rules_check command
-4. Using a fast model (e.g., haiku) keeps test iterations quick and cheap
-
-## Critical Rules
-
-1. **NEVER edit test files from the main agent** - always spawn a sub-agent to make edits
-2. **NEVER manually run the rules_check command** - hooks fire automatically when sub-agents return
-3. **OBSERVE the hook behavior** - when a sub-agent returns, watch for blocking prompts or command outputs
-4. **REVERT between tests** - use `git checkout -- manual_tests/` to reset the test files
-
-## Parallel vs Serial Execution
-
-**"Should NOT fire" tests CAN run in parallel:**
-- These tests edit BOTH trigger AND safety files (completing the rule requirements)
-- Even though `git status` shows changes from all sub-agents, each rule only matches its own scoped file patterns
-- Since the safety file is edited, the rule won't fire regardless of other changes
-- No cross-contamination possible
-- **Revert all changes after these tests complete** before running "should fire" tests
-
-**"Should fire" tests MUST run serially with git reverts between each:**
-- These tests deliberately edit ONLY the trigger file (not the safety)
-- If multiple run in parallel, sub-agent A's hook will see changes from sub-agent B
-- This causes cross-contamination: A gets blocked by rules triggered by B's changes
-- Run one at a time, reverting between each test
-
-## Test Matrix
-
-Each test has two cases: one where the rule SHOULD fire, and one where it should NOT.
-
-| Test | Should Fire | Should NOT Fire | Rule Name |
-|------|-------------|-----------------|-----------|
-| **Trigger/Safety** | Edit `.py` only | Edit `.py` AND `_doc.md` | Manual Test: Trigger Safety |
-| **Set Mode** | Edit `_source.py` only | Edit `_source.py` AND `_test.py` | Manual Test: Set Mode |
-| **Pair Mode** | Edit `_trigger.py` only | Edit `_trigger.py` AND `_expected.md` | Manual Test: Pair Mode |
-| **Pair Mode (reverse)** | -- | Edit `_expected.md` only (should NOT fire) | Manual Test: Pair Mode |
-| **Command Action** | Edit `.txt` -> log appended | -- (always runs) | Manual Test: Command Action |
-| **Multi Safety** | Edit `.py` only | Edit `.py` AND any safety file | Manual Test: Multi Safety |
-| **Infinite Block Prompt** | Edit `.py` (always blocks) | Provide `<promise>` tag | Manual Test: Infinite Block Prompt |
-| **Infinite Block Command** | Edit `.py` (command fails) | Provide `<promise>` tag | Manual Test: Infinite Block Command |
-| **Created Mode** | Create NEW `.yml` file | Modify EXISTING `.yml` file | Manual Test: Created Mode |
-
-## Test Folders
-
-| Folder | Rule Type | Description |
-|--------|-----------|-------------|
-| `test_trigger_safety_mode/` | Trigger/Safety | Basic conditional: fires unless safety file also edited |
-| `test_set_mode/` | Set (Bidirectional) | Files must change together (either direction) |
-| `test_pair_mode/` | Pair (Directional) | One-way: trigger requires expected, but not vice versa |
-| `test_command_action/` | Command Action | Automatically runs command on file change |
-| `test_multi_safety/` | Multiple Safety | Fires unless ANY of the safety files also edited |
-| `test_infinite_block_prompt/` | Infinite Block (Prompt) | Always blocks with prompt; only promise can bypass |
-| `test_infinite_block_command/` | Infinite Block (Command) | Command always fails; tests if promise skips command |
-| `test_created_mode/` | Created (New Files Only) | Fires ONLY when NEW files are created, not when existing modified |
-
-## Corresponding Rules
-
-Rules are defined in `.deepwork/rules/`:
-- `manual-test-trigger-safety.md`
-- `manual-test-set-mode.md`
-- `manual-test-pair-mode.md`
-- `manual-test-command-action.md`
-- `manual-test-multi-safety.md`
-- `manual-test-infinite-block-prompt.md`
-- `manual-test-infinite-block-command.md`
-- `manual-test-created-mode.md`
-
-## Results Tracking Template
-
-Use this template to track test results:
-
-| Test Case | Fires When Should | Does NOT Fire When Shouldn't |
-|-----------|:-----------------:|:----------------------------:|
-| Trigger/Safety | [ ] | [ ] |
-| Set Mode | [ ] | [ ] |
-| Pair Mode (forward) | [ ] | [ ] |
-| Pair Mode (reverse - expected only) | -- | [ ] |
-| Command Action | [ ] | -- |
-| Multi Safety | [ ] | [ ] |
-| Infinite Block Prompt | [ ] | [ ] |
-| Infinite Block Command | [ ] | [ ] |
-| Created Mode | [ ] | [ ] |
diff --git a/.deepwork/jobs/update/job.yml b/.deepwork/jobs/update/job.yml
index 98715431..61b0013e 100644
--- a/.deepwork/jobs/update/job.yml
+++ b/.deepwork/jobs/update/job.yml
@@ -1,9 +1,9 @@
 name: update
 version: "1.3.0"
-summary: "Updates DeepWork standard jobs in src/ and syncs to installed locations. Use when modifying deepwork_jobs or deepwork_rules."
+summary: "Updates DeepWork standard jobs in src/ and syncs to installed locations. Use when modifying deepwork_jobs."
 description: |
   A workflow for maintaining standard jobs bundled with DeepWork. Standard jobs
-  (like `deepwork_jobs` and `deepwork_rules`) are source-controlled in
+  (like `deepwork_jobs`) are source-controlled in
   `src/deepwork/standard_jobs/` and must be edited there—never in `.deepwork/jobs/`
   or `.claude/commands/` directly.
 
diff --git a/.deepwork/rules/architecture-documentation-accuracy.md b/.deepwork/rules/architecture-documentation-accuracy.md
deleted file mode 100644
index 91798109..00000000
--- a/.deepwork/rules/architecture-documentation-accuracy.md
+++ /dev/null
@@ -1,11 +0,0 @@
----
-name: Architecture Documentation Accuracy
-trigger: src/**/*
-safety: doc/architecture.md
-compare_to: base
----
-Source code in src/ has been modified. Please review doc/architecture.md for accuracy:
-1. Verify the documented architecture matches the current implementation
-2. Check that file paths and directory structures are still correct
-3. Ensure component descriptions reflect actual behavior
-4. Update any diagrams or flows that may have changed
diff --git a/.deepwork/rules/manual-test-command-action.md b/.deepwork/rules/manual-test-command-action.md
deleted file mode 100644
index 966ab2de..00000000
--- a/.deepwork/rules/manual-test-command-action.md
+++ /dev/null
@@ -1,19 +0,0 @@
----
-name: "Manual Test: Command Action"
-trigger: manual_tests/test_command_action/test_command_action.txt
-action:
-  command: echo "$(date '+%Y-%m-%d %H:%M:%S') - Command triggered by edit to {file}" >> manual_tests/test_command_action/test_command_action_log.txt
-  run_for: each_match
-compare_to: prompt
----
-
-# Manual Test: Command Action
-
-This rule automatically appends a timestamped log entry when the
-test file is edited. No agent prompt is shown - the command runs
-automatically.
-
-## This tests:
-
-The command action feature where rules can execute shell commands
-instead of prompting the agent. The command should be idempotent.
diff --git a/.deepwork/rules/manual-test-created-mode.md b/.deepwork/rules/manual-test-created-mode.md
deleted file mode 100644
index abb6108d..00000000
--- a/.deepwork/rules/manual-test-created-mode.md
+++ /dev/null
@@ -1,22 +0,0 @@
----
-name: "Manual Test: Created Mode"
-created: manual_tests/test_created_mode/*.yml
-compare_to: prompt
----
-
-# Manual Test: Created Mode (File Creation Trigger)
-
-A new test file was created in the created mode test directory!
-
-**Created:** `{created_files}`
-
-## What to do:
-
-1. Verify the created mode detection is working correctly
-2. Acknowledge with `<promise>Manual Test: Created Mode</promise>`
-
-## This tests:
-
-The "created" detection mode where rules only fire for newly created files,
-not for modifications to existing files. This is useful for enforcing standards
-on new code specifically.
diff --git a/.deepwork/rules/manual-test-infinite-block-command.md b/.deepwork/rules/manual-test-infinite-block-command.md
deleted file mode 100644
index 8f8b24b4..00000000
--- a/.deepwork/rules/manual-test-infinite-block-command.md
+++ /dev/null
@@ -1,41 +0,0 @@
----
-name: "Manual Test: Infinite Block Command"
-trigger: manual_tests/test_infinite_block_command/test_infinite_block_command.py
-action:
-  command: "false"
-  run_for: each_match
-compare_to: prompt
----
-
-# Manual Test: Infinite Block Command (Promise Required)
-
-This rule runs a command that ALWAYS FAILS (`false` returns exit code 1).
-
-## Why this blocks
-
-The command action always fails, creating an infinite block. The only way
-to proceed should be to provide a promise acknowledging that you understand
-the restriction.
-
-## Expected behavior
-
-If promises work correctly for command actions:
-- Without promise: Command runs, fails, blocks
-- With promise: Command is SKIPPED entirely, allows
-
-If there's a bug:
-- The command will run and fail even when a promise is provided
-
-## What to do
-
-You MUST include the following promise tag in your response:
-
-```
-<promise>Manual Test: Infinite Block Command</promise>
-```
-
-## This tests
-
-Whether the promise mechanism works for COMMAND-type rules. If a rule is
-promised, the command should not run at all - the rule should be skipped
-during evaluation.
diff --git a/.deepwork/rules/manual-test-infinite-block-prompt.md b/.deepwork/rules/manual-test-infinite-block-prompt.md
deleted file mode 100644
index 67c97414..00000000
--- a/.deepwork/rules/manual-test-infinite-block-prompt.md
+++ /dev/null
@@ -1,34 +0,0 @@
----
-name: "Manual Test: Infinite Block Prompt"
-trigger: manual_tests/test_infinite_block_prompt/test_infinite_block_prompt.py
-compare_to: prompt
----
-
-# Manual Test: Infinite Block Prompt (Promise Required)
-
-You edited `{trigger_files}` which triggers an infinite block.
-
-## Why this blocks
-
-This rule has NO safety file option and uses a PROMPT action. The only way
-to proceed is to provide a promise acknowledging that you understand the
-restriction.
-
-## What to do
-
-You MUST include the following promise tag in your response:
-
-```
-<promise>Manual Test: Infinite Block Prompt</promise>
-```
-
-This simulates scenarios where:
-- An operation requires explicit acknowledgment before proceeding
-- There is no alternative action that can suppress the rule
-- The agent must demonstrate understanding of the constraint
-
-## This tests
-
-The promise mechanism for PROMPT-type rules that cannot be satisfied by
-editing additional files. This is useful for enforcing policies where
-acknowledgment is the only valid response.
diff --git a/.deepwork/rules/manual-test-multi-safety.md b/.deepwork/rules/manual-test-multi-safety.md
deleted file mode 100644
index 4ce978cb..00000000
--- a/.deepwork/rules/manual-test-multi-safety.md
+++ /dev/null
@@ -1,25 +0,0 @@
----
-name: "Manual Test: Multi Safety"
-trigger: manual_tests/test_multi_safety/test_multi_safety.py
-safety:
-  - manual_tests/test_multi_safety/test_multi_safety_changelog.md
-  - manual_tests/test_multi_safety/test_multi_safety_version.txt
-compare_to: prompt
----
-
-# Manual Test: Multiple Safety Patterns
-
-You changed the source file without updating version info!
-
-**Changed:** `{trigger_files}`
-
-## What to do:
-
-1. Update the changelog: `manual_tests/test_multi_safety/test_multi_safety_changelog.md`
-2. And/or update the version: `manual_tests/test_multi_safety/test_multi_safety_version.txt`
-3. Or acknowledge with `<promise>Manual Test: Multi Safety</promise>`
-
-## This tests:
-
-Trigger/safety mode with MULTIPLE safety patterns. The rule is
-suppressed if ANY of the safety files are also edited.
diff --git a/.deepwork/rules/manual-test-pair-mode.md b/.deepwork/rules/manual-test-pair-mode.md
deleted file mode 100644
index 9c2379bf..00000000
--- a/.deepwork/rules/manual-test-pair-mode.md
+++ /dev/null
@@ -1,26 +0,0 @@
----
-name: "Manual Test: Pair Mode"
-pair:
-  trigger: manual_tests/test_pair_mode/test_pair_mode_trigger.py
-  expects: manual_tests/test_pair_mode/test_pair_mode_expected.md
-compare_to: prompt
----
-
-# Manual Test: Pair Mode (Directional Correspondence)
-
-API code changed without documentation update!
-
-**Changed:** `{trigger_files}`
-**Expected:** `{expected_files}`
-
-## What to do:
-
-1. Update the API documentation in `test_pair_mode_expected.md`
-2. Or acknowledge with `<promise>Manual Test: Pair Mode</promise>`
-
-## This tests:
-
-The "pair" detection mode where there's a ONE-WAY relationship.
-When the trigger file changes, the expected file must also change.
-BUT the expected file can change independently (docs can be updated
-without requiring code changes).
diff --git a/.deepwork/rules/manual-test-set-mode.md b/.deepwork/rules/manual-test-set-mode.md
deleted file mode 100644
index abe504ec..00000000
--- a/.deepwork/rules/manual-test-set-mode.md
+++ /dev/null
@@ -1,26 +0,0 @@
----
-name: "Manual Test: Set Mode"
-set:
-  - manual_tests/test_set_mode/test_set_mode_source.py
-  - manual_tests/test_set_mode/test_set_mode_test.py
-compare_to: prompt
----
-
-# Manual Test: Set Mode (Bidirectional Correspondence)
-
-Source and test files must change together!
-
-**Changed:** `{trigger_files}`
-**Missing:** `{expected_files}`
-
-## What to do:
-
-1. If you changed the source file, update the corresponding test file
-2. If you changed the test file, ensure the source file reflects those changes
-3. Or acknowledge with `<promise>Manual Test: Set Mode</promise>`
-
-## This tests:
-
-The "set" detection mode where files in a set must ALL change together.
-This is bidirectional - the rule fires regardless of which file in the set
-was edited first.
diff --git a/.deepwork/rules/manual-test-trigger-safety.md b/.deepwork/rules/manual-test-trigger-safety.md
deleted file mode 100644
index b144a2a0..00000000
--- a/.deepwork/rules/manual-test-trigger-safety.md
+++ /dev/null
@@ -1,21 +0,0 @@
----
-name: "Manual Test: Trigger Safety"
-trigger: manual_tests/test_trigger_safety_mode/test_trigger_safety_mode.py
-safety: manual_tests/test_trigger_safety_mode/test_trigger_safety_mode_doc.md
-compare_to: prompt
----
-
-# Manual Test: Trigger/Safety Mode
-
-You edited `{trigger_files}` without updating the documentation.
-
-## What to do:
-
-1. Review the changes in the source file
-2. Update `manual_tests/test_trigger_safety_mode/test_trigger_safety_mode_doc.md` to reflect changes
-3. Or acknowledge this is intentional with `<promise>Manual Test: Trigger Safety</promise>`
-
-## This tests:
-
-The basic trigger/safety detection mode where editing the trigger file
-causes the rule to fire UNLESS the safety file is also edited.
diff --git a/.deepwork/rules/new-standard-job-warning.md.disabled b/.deepwork/rules/new-standard-job-warning.md.disabled
deleted file mode 100644
index e02495b4..00000000
--- a/.deepwork/rules/new-standard-job-warning.md.disabled
+++ /dev/null
@@ -1,16 +0,0 @@
----
-name: New Standard Job Warning
-created: src/deepwork/standard_jobs/*/job.yml
-compare_to: prompt
----
-A new standard job is being created. Standard jobs are bundled with DeepWork and will be installed in any project that uses DeepWork.
-
-**Before proceeding, verify this is intentional:**
-
-- **Standard jobs** (`src/deepwork/standard_jobs/`) - Ship with DeepWork, auto-installed in all projects that use DeepWork
-- **Repository jobs** (`.deepwork/jobs/`) - Specific to a single repository
-- **Library jobs** - Installed from external packages
-
-Unless the user **explicitly requested** creating a new standard job (not just "a job" or "a new job"), this should likely be a **repository job** in `.deepwork/jobs/` instead.
-
-If uncertain, ask the user: "Should this be a standard job (shipped with DeepWork) or a repository-specific job?"
diff --git a/.deepwork/rules/readme-accuracy.md b/.deepwork/rules/readme-accuracy.md
deleted file mode 100644
index 9e75c596..00000000
--- a/.deepwork/rules/readme-accuracy.md
+++ /dev/null
@@ -1,11 +0,0 @@
----
-name: README Accuracy
-trigger: src/**/*
-safety: README.md
-compare_to: base
----
-Source code in src/ has been modified. Please review README.md for accuracy:
-1. Verify project overview still reflects current functionality
-2. Check that usage examples are still correct
-3. Ensure installation/setup instructions remain valid
-4. Update any sections that reference changed code
diff --git a/.deepwork/rules/skill-template-best-practices.md b/.deepwork/rules/skill-template-best-practices.md
deleted file mode 100644
index ff33ecfd..00000000
--- a/.deepwork/rules/skill-template-best-practices.md
+++ /dev/null
@@ -1,46 +0,0 @@
----
-name: Skill Template Best Practices
-trigger: src/deepwork/templates/**/skill-job*.jinja
-compare_to: prompt
----
-Skill template files are being modified. Ensure the generated skills follow these best practices:
-
-## Description Guidelines
-
-The description appears in skill search results and helps users find the right skill. Keep it search-friendly and scannable.
-
-1. **Be specific** - Name exact capabilities/actions the skill performs
-2. **Keep concise** - One sentence, max ~100 chars; describes WHAT it does, not HOW
-3. **Avoid vagueness** - "Extract text from PDFs, fill forms" is good; "Helps with documents" is bad
-4. **Avoid meta-language** - Don't include "Trigger:", "Keywords:", or similar prefixes. Let the description itself be searchable.
-
-## Instruction Writing
-
-1. **Keep focused** - Core instructions should be under 500 lines; use supporting files for details
-2. **Use progressive disclosure** - Essential info in main content, detailed reference in linked files
-3. **Be explicit** - Provide clear, step-by-step guidance rather than relying on inference
-4. **Structure clearly** - Use headers, numbered lists for sequential steps, bullets for options
-
-## Prompt Structure
-
-1. **Specificity first** - Detailed directions upfront prevent course corrections later
-2. **Plan before action** - Ask agent to analyze/plan before implementing
-3. **Reference concrete files** - Use specific paths, not general descriptions
-4. **Include context** - Mention edge cases, preferred patterns, and expected outcomes
-
-## Quality Criteria
-
-1. **Make measurable** - Criteria should be verifiable, not subjective
-2. **Focus on outcomes** - What the output should achieve, not process steps
-3. **Keep actionable** - Agent should be able to self-evaluate against criteria
-
-## Platform Considerations
-
-- **Claude**: Supports hooks for automated validation; use Skill tool for step invocation
-- **Gemini**: No hook support; instructions must guide manual verification
-
-## Reference Documentation
-
-When unsure about best practices, consult:
-- https://code.claude.com/docs/en/skills - Official skills documentation
-- https://www.anthropic.com/engineering/claude-code-best-practices - Prompting best practices
diff --git a/.deepwork/rules/standard-jobs-source-of-truth.md b/.deepwork/rules/standard-jobs-source-of-truth.md
deleted file mode 100644
index 2d0092c9..00000000
--- a/.deepwork/rules/standard-jobs-source-of-truth.md
+++ /dev/null
@@ -1,25 +0,0 @@
----
-name: Standard Jobs Source of Truth
-trigger:
-  - .deepwork/jobs/deepwork_jobs/**/*
-  - .deepwork/jobs/deepwork_rules/**/*
-safety:
-  - src/deepwork/standard_jobs/deepwork_jobs/**/*
-  - src/deepwork/standard_jobs/deepwork_rules/**/*
-compare_to: base
----
-You modified files in `.deepwork/jobs/deepwork_jobs/` or `.deepwork/jobs/deepwork_rules/`.
-
-**These are installed copies, NOT the source of truth!**
-
-Standard jobs (deepwork_jobs, deepwork_rules) must be edited in their source location:
-- Source: `src/deepwork/standard_jobs/[job_name]/`
-- Installed copy: `.deepwork/jobs/[job_name]/` (DO NOT edit directly)
-
-**Required action:**
-1. Revert your changes to `.deepwork/jobs/deepwork_*/`
-2. Make the same changes in `src/deepwork/standard_jobs/[job_name]/`
-3. Run `deepwork install --platform claude` to sync changes
-4. Verify the changes propagated correctly
-
-See CLAUDE.md section "CRITICAL: Editing Standard Jobs" for details.
diff --git a/.deepwork/rules/uv-lock-sync.md b/.deepwork/rules/uv-lock-sync.md
deleted file mode 100644
index 75cca269..00000000
--- a/.deepwork/rules/uv-lock-sync.md
+++ /dev/null
@@ -1,15 +0,0 @@
----
-name: UV Lock Sync
-trigger: pyproject.toml
-action:
-  command: uv sync
-compare_to: prompt
----
-
-# UV Lock Sync
-
-Automatically runs `uv sync` when `pyproject.toml` is modified to keep
-`uv.lock` in sync with dependency changes.
-
-This ensures the lock file is always up-to-date when dependencies are
-added, removed, or updated in pyproject.toml.
diff --git a/.deepwork/rules/version-and-changelog-update.md b/.deepwork/rules/version-and-changelog-update.md
deleted file mode 100644
index ac617f8e..00000000
--- a/.deepwork/rules/version-and-changelog-update.md
+++ /dev/null
@@ -1,29 +0,0 @@
----
-name: Version and Changelog Update
-trigger: src/**/*
-safety:
-  - pyproject.toml
-  - CHANGELOG.md
-compare_to: base
----
-Source code in src/ has been modified. **You MUST evaluate whether version and changelog updates are needed.**
-
-**Evaluate the changes:**
-1. Is this a bug fix, new feature, breaking change, or internal refactor?
-2. Does this change affect the public API or user-facing behavior?
-3. Would users need to know about this change when upgrading?
-
-**If version update is needed:**
-1. Update the `version` field in `pyproject.toml` following semantic versioning:
-   - PATCH (0.1.x): Bug fixes, minor internal changes
-   - MINOR (0.x.0): New features, non-breaking changes
-   - MAJOR (x.0.0): Breaking changes
-2. Add an entry to `CHANGELOG.md` under an appropriate version header:
-   - Use categories: Added, Changed, Fixed, Removed, Deprecated, Security
-   - Include a clear, user-facing description of what changed
-   - Follow the Keep a Changelog format
-
-**If NO version update is needed** (e.g., tests only, comments, internal refactoring with no behavior change):
-- Explicitly state why no version bump is required
-
-**This rule requires explicit action** - either update both files or justify why no update is needed.
diff --git a/.gemini/skills/add_platform/verify.toml b/.gemini/skills/add_platform/verify.toml
index ab440f09..3d8f081c 100644
--- a/.gemini/skills/add_platform/verify.toml
+++ b/.gemini/skills/add_platform/verify.toml
@@ -76,7 +76,6 @@ Ensure the implementation step is complete:
    - `deepwork_jobs.define.md` exists (or equivalent for the platform)
    - `deepwork_jobs.implement.md` exists
    - `deepwork_jobs.refine.md` exists
-   - `deepwork_rules.define.md` exists
    - All expected step commands exist
 
 4. **Validate command file content**
@@ -106,7 +105,6 @@ Ensure the implementation step is complete:
 - `deepwork install --platform <platform_name>` completes without errors
 - All expected command files are created:
   - deepwork_jobs.define, implement, refine
-  - deepwork_rules.define
   - Any other standard job commands
 - Command file content is correct:
   - Matches platform's expected format
diff --git a/.gemini/skills/deepwork_jobs/implement.toml b/.gemini/skills/deepwork_jobs/implement.toml
index 484f4bcc..c645746f 100644
--- a/.gemini/skills/deepwork_jobs/implement.toml
+++ b/.gemini/skills/deepwork_jobs/implement.toml
@@ -150,66 +150,6 @@ This will:
 - Generate skills for each step
 - Make the skills available in `.claude/skills/` (or appropriate platform directory)
 
-### Step 6: Consider Rules for the New Job
-
-After implementing the job, consider whether there are **rules** that would help enforce quality or consistency when working with this job's domain.
-
-**What are rules?**
-
-Rules are automated guardrails stored as markdown files in `.deepwork/rules/` that trigger when certain files change during an AI session. They help ensure:
-- Documentation stays in sync with code
-- Team guidelines are followed
-- Architectural decisions are respected
-- Quality standards are maintained
-
-**When to suggest rules:**
-
-Think about the job you just implemented and ask:
-- Does this job produce outputs that other files depend on?
-- Are there documentation files that should be updated when this job's outputs change?
-- Are there quality checks or reviews that should happen when certain files in this domain change?
-- Could changes to the job's output files impact other parts of the project?
-
-**Examples of rules that might make sense:**
-
-| Job Type | Potential Rule |
-|----------|----------------|
-| API Design | "Update API docs when endpoint definitions change" |
-| Database Schema | "Review migrations when schema files change" |
-| Competitive Research | "Update strategy docs when competitor analysis changes" |
-| Feature Development | "Update changelog when feature files change" |
-| Configuration Management | "Update install guide when config files change" |
-
-**How to offer rule creation:**
-
-If you identify one or more rules that would benefit the user, explain:
-1. **What the rule would do** - What triggers it and what action it prompts
-2. **Why it would help** - How it prevents common mistakes or keeps things in sync
-3. **What files it would watch** - The trigger patterns
-
-Then ask the user:
-
-> "Would you like me to create this rule for you? I can run `/deepwork_rules.define` to set it up."
-
-If the user agrees, invoke the `/deepwork_rules.define` command to guide them through creating the rule.
-
-**Example dialogue:**
-
-```
-Based on the competitive_research job you just created, I noticed that when
-competitor analysis files change, it would be helpful to remind you to update
-your strategy documentation.
-
-I'd suggest a rule like:
-- **Name**: "Update strategy when competitor analysis changes"
-- **Trigger**: `**/positioning_report.md`
-- **Action**: Prompt to review and update `docs/strategy.md`
-
-Would you like me to create this rule? I can run `/deepwork_rules.define` to set it up.
-```
-
-**Note:** Not every job needs rules. Only suggest them when they would genuinely help maintain consistency or quality. Don't force rules where they don't make sense.
-
 ## Example Implementation
 
 For a complete worked example showing a job.yml and corresponding step instruction file, see:
@@ -241,8 +181,6 @@ Before marking this step complete, ensure:
 - [ ] Each instruction file is complete and actionable
 - [ ] `deepwork sync` executed successfully
 - [ ] Skills generated in platform directory
-- [ ] Considered whether rules would benefit this job (Step 6)
-- [ ] If rules suggested, offered to run `/deepwork_rules.define`
 
 ## Quality Criteria
 
@@ -254,7 +192,6 @@ Before marking this step complete, ensure:
 - Steps with user inputs explicitly use "ask structured questions" phrasing
 - Sync completed successfully
 - Skills available for use
-- Thoughtfully considered relevant rules for the job domain
 
 
 ### Job Context
diff --git a/.gemini/skills/deepwork_rules/define.toml b/.gemini/skills/deepwork_rules/define.toml
deleted file mode 100644
index 980ad931..00000000
--- a/.gemini/skills/deepwork_rules/define.toml
+++ /dev/null
@@ -1,327 +0,0 @@
-# deepwork_rules:define
-#
-# Creates a rule file that triggers when specified files change. Use when setting up documentation sync, code review requirements, or automated commands.
-#
-# Generated by DeepWork - do not edit manually
-
-description = "Creates a rule file that triggers when specified files change. Use when setting up documentation sync, code review requirements, or automated commands."
-
-prompt = """
-# deepwork_rules:define
-
-**Standalone command** - can be run anytime
-
-> Creates file-change rules that enforce guidelines during AI sessions. Use when automating documentation sync or code review triggers.
-
-
-## Instructions
-
-**Goal**: Creates a rule file that triggers when specified files change. Use when setting up documentation sync, code review requirements, or automated commands.
-
-# Define Rule
-
-## Objective
-
-Create a new rule file in the `.deepwork/rules/` directory to enforce team guidelines, documentation requirements, or other constraints when specific files change.
-
-## Task
-
-Guide the user through defining a new rule by asking structured questions. **Do not create the rule without first understanding what they want to enforce.**
-
-**Important**: Use the AskUserQuestion tool to ask structured questions when gathering information from the user. This provides a better user experience with clear options and guided choices.
-
-### Step 1: Understand the Rule Purpose
-
-Start by asking structured questions to understand what the user wants to enforce:
-
-1. **What guideline or constraint should this rule enforce?**
-   - What situation triggers the need for action?
-   - What files or directories, when changed, should trigger this rule?
-   - Examples: "When config files change", "When API code changes", "When database schema changes"
-
-2. **What action should be taken?**
-   - What should the agent do when the rule triggers?
-   - Update documentation? Perform a security review? Update tests?
-   - Is there a specific file or process that needs attention?
-
-3. **Are there any "safety" conditions?**
-   - Are there files that, if also changed, mean the rule doesn't need to fire?
-   - For example: If config changes AND install_guide.md changes, assume docs are already updated
-   - This prevents redundant prompts when the user has already done the right thing
-
-### Step 2: Choose the Detection Mode
-
-Help the user select the appropriate detection mode:
-
-**Trigger/Safety Mode** (most common):
-- Fires when trigger patterns match AND no safety patterns match
-- Use for: "When X changes, check Y" rules
-- Example: When config changes, verify install docs
-
-**Set Mode** (bidirectional correspondence):
-- Fires when files that should change together don't all change
-- Use for: Source/test pairing, model/migration sync
-- Example: `src/foo.py` and `tests/foo_test.py` should change together
-
-**Pair Mode** (directional correspondence):
-- Fires when a trigger file changes but expected files don't
-- Changes to expected files alone do NOT trigger
-- Use for: API code requires documentation updates (but docs can update independently)
-
-### Step 3: Define the Patterns
-
-Help the user define glob patterns for files.
-
-**Common patterns:**
-- `src/**/*.py` - All Python files in src directory (recursive)
-- `app/config/**/*` - All files in app/config directory
-- `*.md` - All markdown files in root
-- `src/api/**/*` - All files in the API directory
-- `migrations/**/*.sql` - All SQL migrations
-
-**Variable patterns (for set/pair modes):**
-- `src/{path}.py` - Captures path variable (e.g., `foo/bar` from `src/foo/bar.py`)
-- `tests/{path}_test.py` - Uses same path variable in corresponding file
-- `{name}` matches single segment, `{path}` matches multiple segments
-
-**Pattern syntax:**
-- `*` - Matches any characters within a single path segment
-- `**` - Matches any characters across multiple path segments (recursive)
-- `?` - Matches a single character
-
-### Step 4: Choose the Comparison Mode (Optional)
-
-The `compare_to` field controls what baseline is used when detecting "changed files":
-
-**Options:**
-- `base` (default) - Compares to the base of the current branch (merge-base with main/master). Best for feature branches.
-- `default_tip` - Compares to the current tip of the default branch. Useful for seeing difference from production.
-- `prompt` - Compares to the state at the start of each prompt. For rules about very recent changes.
-
-Most rules should use the default (`base`) and don't need to specify `compare_to`.
-
-### Step 5: Write the Instructions
-
-Create clear, actionable instructions for what the agent should do when the rule fires.
-
-**Good instructions include:**
-- What to check or review
-- What files might need updating
-- Specific actions to take
-- Quality criteria for completion
-
-**Template variables available in instructions:**
-- `{trigger_files}` - Files that triggered the rule
-- `{expected_files}` - Expected corresponding files (for set/pair modes)
-
-### Step 6: Create the Rule File
-
-Create a new file in `.deepwork/rules/` with a kebab-case filename:
-
-**File Location**: `.deepwork/rules/{rule-name}.md`
-
-**Format for Trigger/Safety Mode:**
-```markdown
----
-name: Friendly Name for the Rule
-trigger: "glob/pattern/**/*"  # or array: ["pattern1", "pattern2"]
-safety: "optional/pattern"    # optional, or array
-compare_to: base              # optional: "base" (default), "default_tip", or "prompt"
----
-Instructions for the agent when this rule fires.
-
-Multi-line markdown content is supported.
-```
-
-**Format for Set Mode (bidirectional):**
-```markdown
----
-name: Source/Test Pairing
-set:
-  - src/{path}.py
-  - tests/{path}_test.py
----
-Source and test files should change together.
-
-Modified: {trigger_files}
-Expected: {expected_files}
-```
-
-**Format for Pair Mode (directional):**
-```markdown
----
-name: API Documentation
-pair:
-  trigger: api/{path}.py
-  expects: docs/api/{path}.md
----
-API code requires documentation updates.
-
-Changed API: {trigger_files}
-Update docs: {expected_files}
-```
-
-### Step 7: Verify the Rule
-
-After creating the rule:
-
-1. **Check the YAML frontmatter** - Ensure valid YAML formatting
-2. **Test trigger patterns** - Verify patterns match intended files
-3. **Review instructions** - Ensure they're clear and actionable
-4. **Check for conflicts** - Ensure the rule doesn't conflict with existing ones
-
-## Example Rules
-
-### Update Documentation on Config Changes
-`.deepwork/rules/config-docs.md`:
-```markdown
----
-name: Update Install Guide on Config Changes
-trigger: app/config/**/*
-safety: docs/install_guide.md
----
-Configuration files have been modified. Please review docs/install_guide.md
-and update it if any installation instructions need to change based on the
-new configuration.
-```
-
-### Security Review for Auth Code
-`.deepwork/rules/security-review.md`:
-```markdown
----
-name: Security Review for Authentication Changes
-trigger:
-  - src/auth/**/*
-  - src/security/**/*
-safety:
-  - SECURITY.md
-  - docs/security_audit.md
----
-Authentication or security code has been changed. Please:
-
-1. Review for hardcoded credentials or secrets
-2. Check input validation on user inputs
-3. Verify access control logic is correct
-4. Update security documentation if needed
-```
-
-### Source/Test Pairing
-`.deepwork/rules/source-test-pairing.md`:
-```markdown
----
-name: Source/Test Pairing
-set:
-  - src/{path}.py
-  - tests/{path}_test.py
----
-Source and test files should change together.
-
-When modifying source code, ensure corresponding tests are updated.
-When adding tests, ensure they test actual source code.
-
-Modified: {trigger_files}
-Expected: {expected_files}
-```
-
-### API Documentation Sync
-`.deepwork/rules/api-docs.md`:
-```markdown
----
-name: API Documentation Update
-pair:
-  trigger: src/api/{path}.py
-  expects: docs/api/{path}.md
----
-API code has changed. Please verify that API documentation in docs/api/
-is up to date with the code changes. Pay special attention to:
-
-- New or changed endpoints
-- Modified request/response schemas
-- Updated authentication requirements
-
-Changed API: {trigger_files}
-Update: {expected_files}
-```
-
-## Output Format
-
-### .deepwork/rules/{rule-name}.md
-Create a new file with the rule definition using YAML frontmatter and markdown body.
-
-## Quality Criteria
-
-- Asked structured questions to understand user requirements
-- Rule name is clear and descriptive (used in promise tags)
-- Correct detection mode selected for the use case
-- Patterns accurately match the intended files
-- Safety patterns prevent unnecessary triggering (if applicable)
-- Instructions are actionable and specific
-- YAML frontmatter is valid
-
-## Context
-
-Rules are evaluated automatically when the agent finishes a task. The system:
-1. Determines which files have changed based on each rule's `compare_to` setting
-2. Evaluates rules based on their detection mode (trigger/safety, set, or pair)
-3. Skips rules where the correspondence is satisfied (for set/pair) or safety matched
-4. Prompts you with instructions for any triggered rules
-
-You can mark a rule as addressed by including `<promise>Rule Name</promise>` in your response (replace Rule Name with the actual rule name from the `name` field). This tells the system you've already handled that rule's requirements.
-
-
-### Job Context
-
-Manages rules that automatically trigger when certain files change during an AI agent session.
-Rules help ensure that code changes follow team guidelines, documentation is updated,
-and architectural decisions are respected.
-
-IMPORTANT: Rules are evaluated at the "Stop" hook, which fires when an agent finishes its turn.
-This includes when sub-agents complete their work. Rules are NOT evaluated immediately after
-each file edit - they batch up and run once at the end of the agent's response cycle.
-- Command action rules: Execute their command (e.g., `uv sync`) when the agent stops
-- Prompt action rules: Display instructions to the agent, blocking until addressed
-
-Rules are stored as individual markdown files with YAML frontmatter in the `.deepwork/rules/`
-directory. Each rule file specifies:
-- Detection mode: trigger/safety, set (bidirectional), or pair (directional)
-- Patterns: Glob patterns for matching files, with optional variable capture
-- Action type: prompt (default) to show instructions, or command to run a shell command
-- Instructions: Markdown content describing what the agent should do
-
-Example use cases:
-- Update installation docs when configuration files change
-- Require security review when authentication code is modified
-- Ensure API documentation stays in sync with API code
-- Enforce source/test file pairing
-- Auto-run `uv sync` when pyproject.toml changes (command action)
-
-
-## Required Inputs
-
-**User Parameters** - Gather from user before starting:
-- **rule_purpose**: What guideline or constraint should this rule enforce?
-
-
-## Work Branch
-
-Use branch format: `deepwork/deepwork_rules-[instance]-YYYYMMDD`
-
-- If on a matching work branch: continue using it
-- If on main/master: create new branch with `git checkout -b deepwork/deepwork_rules-[instance]-$(date +%Y%m%d)`
-
-## Outputs
-
-**Required outputs**:
-- `.deepwork/rules/{rule-name}.md`
-
-## On Completion
-
-1. Verify outputs are created
-2. Inform user: "define complete, outputs: .deepwork/rules/{rule-name}.md"
-
-This standalone command can be re-run anytime.
-
----
-
-**Reference files**: `.deepwork/jobs/deepwork_rules/job.yml`, `.deepwork/jobs/deepwork_rules/steps/define.md`
-"""
\ No newline at end of file
diff --git a/.gemini/skills/deepwork_rules/index.toml b/.gemini/skills/deepwork_rules/index.toml
deleted file mode 100644
index 51fca30e..00000000
--- a/.gemini/skills/deepwork_rules/index.toml
+++ /dev/null
@@ -1,73 +0,0 @@
-# deepwork_rules
-#
-# Creates file-change rules that enforce guidelines during AI sessions. Use when automating documentation sync or code review triggers.
-#
-# Generated by DeepWork - do not edit manually
-
-description = "Creates file-change rules that enforce guidelines during AI sessions. Use when automating documentation sync or code review triggers."
-
-prompt = """
-# deepwork_rules
-
-**Multi-step workflow**: Creates file-change rules that enforce guidelines during AI sessions. Use when automating documentation sync or code review triggers.
-
-> **NOTE**: Gemini CLI requires manual command invocation. After each step, tell the user which command to run next.
-
-Manages rules that automatically trigger when certain files change during an AI agent session.
-Rules help ensure that code changes follow team guidelines, documentation is updated,
-and architectural decisions are respected.
-
-IMPORTANT: Rules are evaluated at the "Stop" hook, which fires when an agent finishes its turn.
-This includes when sub-agents complete their work. Rules are NOT evaluated immediately after
-each file edit - they batch up and run once at the end of the agent's response cycle.
-- Command action rules: Execute their command (e.g., `uv sync`) when the agent stops
-- Prompt action rules: Display instructions to the agent, blocking until addressed
-
-Rules are stored as individual markdown files with YAML frontmatter in the `.deepwork/rules/`
-directory. Each rule file specifies:
-- Detection mode: trigger/safety, set (bidirectional), or pair (directional)
-- Patterns: Glob patterns for matching files, with optional variable capture
-- Action type: prompt (default) to show instructions, or command to run a shell command
-- Instructions: Markdown content describing what the agent should do
-
-Example use cases:
-- Update installation docs when configuration files change
-- Require security review when authentication code is modified
-- Ensure API documentation stays in sync with API code
-- Enforce source/test file pairing
-- Auto-run `uv sync` when pyproject.toml changes (command action)
-
-
-## Available Steps
-
-1. **define** - Creates a rule file that triggers when specified files change. Use when setting up documentation sync, code review requirements, or automated commands.
-   Command: `/deepwork_rules:define`
-
-## Execution Instructions
-
-### Step 1: Analyze Intent
-
-Parse any text following `/deepwork_rules` to determine user intent:
-- "define" or related terms → start at `/deepwork_rules:define`
-
-### Step 2: Direct User to Starting Step
-
-Tell the user which command to run:
-```
-/deepwork_rules:define
-```
-
-### Step 3: Guide Through Workflow
-
-After each step completes, tell the user the next command to run until workflow is complete.
-
-### Handling Ambiguous Intent
-
-If user intent is unclear:
-- Present available steps as numbered options
-- Ask user to select the starting point
-
-## Reference
-
-- Job definition: `.deepwork/jobs/deepwork_rules/job.yml`
-"""
\ No newline at end of file
diff --git a/.gemini/skills/manual_tests/index.toml b/.gemini/skills/manual_tests/index.toml
deleted file mode 100644
index a7f18b16..00000000
--- a/.gemini/skills/manual_tests/index.toml
+++ /dev/null
@@ -1,94 +0,0 @@
-# manual_tests
-#
-# Runs all manual hook/rule tests using sub-agents. Use when validating that DeepWork rules fire correctly.
-#
-# Generated by DeepWork - do not edit manually
-
-description = "Runs all manual hook/rule tests using sub-agents. Use when validating that DeepWork rules fire correctly."
-
-prompt = """
-# manual_tests
-
-**Multi-step workflow**: Runs all manual hook/rule tests using sub-agents. Use when validating that DeepWork rules fire correctly.
-
-> **NOTE**: Gemini CLI requires manual command invocation. After each step, tell the user which command to run next.
-
-A workflow for running manual tests that validate DeepWork rules/hooks fire correctly.
-
-The **run_all** workflow tests that rules fire when they should AND do not fire when they shouldn't.
-Each test is run in a SUB-AGENT (not the main agent) because:
-1. Sub-agents run in isolated contexts where file changes can be detected
-2. The Stop hook automatically evaluates rules when each sub-agent completes
-3. The main agent can observe whether hooks fired without triggering them manually
-
-CRITICAL: All tests MUST run in sub-agents. The main agent MUST NOT make the file
-edits itself - it spawns sub-agents to make edits, then observes whether the hooks
-fired automatically when those sub-agents returned.
-
-Sub-agent configuration:
-- All sub-agents should use `model: "haiku"` to minimize cost and latency
-- All sub-agents should use `max_turns: 5` to prevent hanging indefinitely
-
-Steps:
-1. reset - Ensure clean environment before testing (clears queue, reverts files)
-2. run_not_fire_tests - Run all "should NOT fire" tests in PARALLEL sub-agents (6 tests)
-3. run_fire_tests - Run all "should fire" tests in SERIAL sub-agents with resets between (6 tests)
-4. infinite_block_tests - Run infinite block tests in SERIAL (4 tests - both fire and not-fire)
-
-Reset procedure (see steps/reset.md):
-- Reset runs FIRST to ensure a clean environment before any tests
-- Each step also calls reset internally when needed (between tests, after completion)
-- Reset reverts git changes, removes created files, and clears the rules queue
-
-Test types covered:
-- Trigger/Safety mode
-- Set mode (bidirectional)
-- Pair mode (directional)
-- Command action
-- Multi safety
-- Infinite block (prompt and command) - in dedicated step
-- Created mode (new files only)
-
-
-## Available Steps
-
-1. **reset** - Runs FIRST to ensure clean environment. Also called internally by other steps when they need to revert changes and clear the queue.
-   Command: `/manual_tests:reset`
-2. **run_not_fire_tests** - Runs all 6 'should NOT fire' tests in parallel sub-agents. Use to verify rules don't fire when safety conditions are met. (requires: reset)
-   Command: `/manual_tests:run_not_fire_tests`
-3. **run_fire_tests** - Runs all 6 'should fire' tests serially with resets between each. Use after NOT-fire tests to verify rules fire correctly. (requires: run_not_fire_tests)
-   Command: `/manual_tests:run_fire_tests`
-4. **infinite_block_tests** - Runs all 4 infinite block tests serially. Tests both 'should fire' (no promise) and 'should NOT fire' (with promise) scenarios. (requires: run_fire_tests)
-   Command: `/manual_tests:infinite_block_tests`
-
-## Execution Instructions
-
-### Step 1: Analyze Intent
-
-Parse any text following `/manual_tests` to determine user intent:
-- "reset" or related terms → start at `/manual_tests:reset`
-- "run_not_fire_tests" or related terms → start at `/manual_tests:run_not_fire_tests`
-- "run_fire_tests" or related terms → start at `/manual_tests:run_fire_tests`
-- "infinite_block_tests" or related terms → start at `/manual_tests:infinite_block_tests`
-
-### Step 2: Direct User to Starting Step
-
-Tell the user which command to run:
-```
-/manual_tests:reset
-```
-
-### Step 3: Guide Through Workflow
-
-After each step completes, tell the user the next command to run until workflow is complete.
-
-### Handling Ambiguous Intent
-
-If user intent is unclear:
-- Present available steps as numbered options
-- Ask user to select the starting point
-
-## Reference
-
-- Job definition: `.deepwork/jobs/manual_tests/job.yml`
-"""
\ No newline at end of file
diff --git a/.gemini/skills/manual_tests/infinite_block_tests.toml b/.gemini/skills/manual_tests/infinite_block_tests.toml
deleted file mode 100644
index a1fbf553..00000000
--- a/.gemini/skills/manual_tests/infinite_block_tests.toml
+++ /dev/null
@@ -1,238 +0,0 @@
-# manual_tests:infinite_block_tests
-#
-# Runs all 4 infinite block tests serially. Tests both 'should fire' (no promise) and 'should NOT fire' (with promise) scenarios.
-#
-# Generated by DeepWork - do not edit manually
-
-description = "Runs all 4 infinite block tests serially. Tests both 'should fire' (no promise) and 'should NOT fire' (with promise) scenarios."
-
-prompt = """
-# manual_tests:infinite_block_tests
-
-**Step 4/4** in **manual_tests** workflow
-
-> Runs all manual hook/rule tests using sub-agents. Use when validating that DeepWork rules fire correctly.
-
-## Prerequisites (Verify First)
-
-Before proceeding, confirm these steps are complete:
-- `/manual_tests:run_fire_tests`
-
-## Instructions
-
-**Goal**: Runs all 4 infinite block tests serially. Tests both 'should fire' (no promise) and 'should NOT fire' (with promise) scenarios.
-
-# Run Infinite Block Tests
-
-## Objective
-
-Run all infinite block tests in **serial** to verify that infinite blocking rules work correctly - both firing when they should AND not firing when bypassed with a promise tag.
-
-## CRITICAL: Sub-Agent Requirement
-
-**You MUST spawn sub-agents to make all file edits. DO NOT edit the test files yourself.**
-
-Why sub-agents are required:
-1. Sub-agents run in isolated contexts where file changes are detected
-2. When a sub-agent completes, the Stop hook **automatically** evaluates rules
-3. You (the main agent) observe whether hooks fired - you do NOT manually trigger them
-4. If you edit files directly, the hooks won't fire because you're not a completing sub-agent
-
-**NEVER manually run `echo '{}' | python -m deepwork.hooks.rules_check`** - this defeats the purpose of the test. Hooks must fire AUTOMATICALLY when sub-agents return.
-
-## CRITICAL: Serial Execution
-
-**These tests MUST run ONE AT A TIME, with resets between each.**
-
-Why serial execution is required for infinite block tests:
-- Infinite block tests can block indefinitely without a promise tag
-- Running them in parallel would cause unpredictable blocking behavior
-- Serial execution allows controlled observation of each test
-
-## Task
-
-Run all 4 infinite block tests in **serial**, resetting between each, and verify correct blocking behavior.
-
-### Process
-
-For EACH test below, follow this cycle:
-
-1. **Launch a sub-agent** using the Task tool with:
-   - `model: "haiku"` - Use the fast model to minimize cost and latency
-   - `max_turns: 5` - **Critical safeguard**: Limits API round-trips to prevent infinite hanging. The Task tool does not support a direct timeout, so max_turns is our only protection against runaway sub-agents.
-2. **Wait for the sub-agent to complete**
-3. **Observe whether the hook fired automatically** - you should see a blocking prompt or command output
-4. **If no visible blocking occurred, check the queue**:
-   ```bash
-   ls -la .deepwork/tmp/rules/queue/
-   cat .deepwork/tmp/rules/queue/*.json 2>/dev/null
-   ```
-   - If queue entries exist with status "queued", the hook DID fire but blocking wasn't visible
-   - If queue is empty, the hook did NOT fire at all
-   - Record the queue status along with the result
-5. **Record the result** - see expected outcomes for each test
-6. **Reset** (MANDATORY after each test) - follow the reset step instructions:
-   ```bash
-   git reset HEAD manual_tests/ && git checkout -- manual_tests/ && rm -f manual_tests/test_created_mode/new_config.yml
-   deepwork rules clear_queue
-   ```
-7. **Check for early termination**: If **2 tests have now failed**, immediately:
-   - Stop running any remaining tests
-   - Report the results summary showing which tests passed/failed
-   - The job halts here - do NOT proceed with remaining tests
-8. **Proceed to the next test** (only if fewer than 2 failures)
-
-**IMPORTANT**: Only launch ONE sub-agent at a time. Wait for it to complete and reset before launching the next.
-
-### Test Cases (run serially)
-
-**Test 1: Infinite Block Prompt - Should NOT Fire (with promise)**
-- Sub-agent prompt: "Edit `manual_tests/test_infinite_block_prompt/dangerous.py` to add a comment. Include `<promise>I have verified this change is safe</promise>` in your response to bypass the infinite block."
-- Sub-agent config: `model: "haiku"`, `max_turns: 5`
-- Expected: Sub-agent completes WITHOUT blocking - the promise tag bypasses the infinite block
-- Result: PASS if no blocking, FAIL if blocked
-
-**Test 2: Infinite Block Command - Should NOT Fire (with promise)**
-- Sub-agent prompt: "Edit `manual_tests/test_infinite_block_command/risky.py` to add a comment. Include `<promise>I have verified this change is safe</promise>` in your response to bypass the infinite block."
-- Sub-agent config: `model: "haiku"`, `max_turns: 5`
-- Expected: Sub-agent completes WITHOUT blocking - the promise tag bypasses the infinite block
-- Result: PASS if no blocking, FAIL if blocked
-
-**Test 3: Infinite Block Prompt - Should Fire (no promise)**
-- Sub-agent prompt: "Edit `manual_tests/test_infinite_block_prompt/dangerous.py` to add a comment. Do NOT include any promise tags."
-- Sub-agent config: `model: "haiku"`, `max_turns: 5`
-- Expected behavior:
-  1. **Should fire**: Hook fires and shows blocking prompt
-  2. **Should return in reasonable time**: Sub-agent hits max_turns limit and returns (not stuck forever)
-- Result criteria:
-  - PASS if: Hook fired (visible block OR queue entry) AND sub-agent returned within reasonable time
-  - FAIL if: Hook did not fire, OR sub-agent hung indefinitely
-
-**Test 4: Infinite Block Command - Should Fire (no promise)**
-- Sub-agent prompt: "Edit `manual_tests/test_infinite_block_command/risky.py` to add a comment. Do NOT include any promise tags."
-- Sub-agent config: `model: "haiku"`, `max_turns: 5`
-- Expected behavior:
-  1. **Should fire**: Hook fires and command fails (exit code 1)
-  2. **Should return in reasonable time**: Sub-agent hits max_turns limit and returns (not stuck forever)
-- Result criteria:
-  - PASS if: Hook fired (visible block OR queue entry) AND sub-agent returned within reasonable time
-  - FAIL if: Hook did not fire, OR sub-agent hung indefinitely
-
-### Results Tracking
-
-Record the result after each test:
-
-| Test Case | Scenario | Should Fire? | Returned in Time? | Visible Block? | Queue Entry? | Result |
-|-----------|----------|:------------:|:-----------------:|:--------------:|:------------:|:------:|
-| Infinite Block Prompt | With promise | No | Yes | | | |
-| Infinite Block Command | With promise | No | Yes | | | |
-| Infinite Block Prompt | No promise | Yes | Yes | | | |
-| Infinite Block Command | No promise | Yes | Yes | | | |
-
-**Result criteria:**
-- **"Should NOT fire" tests (with promise)**: PASS if no blocking AND no queue entry AND returned quickly
-- **"Should fire" tests (no promise)**: PASS if hook fired (visible block OR queue entry) AND returned in reasonable time (max_turns limit)
-
-**Queue Entry Status Guide:**
-- If queue has entry with status "queued" -> Hook fired, rule was shown to agent
-- If queue has entry with status "passed" -> Hook fired, rule was satisfied
-- If queue is empty -> Hook did NOT fire
-
-## Quality Criteria
-
-- **Sub-agents spawned**: Tests were run using the Task tool to spawn sub-agents - the main agent did NOT edit files directly
-- **Correct sub-agent config**: All sub-agents used `model: "haiku"` and `max_turns: 5`
-- **Serial execution**: Sub-agents were launched ONE AT A TIME, not in parallel
-- **Reset between tests**: Reset step was followed after each test
-- **Hooks observed (not triggered)**: The main agent observed hook behavior without manually running rules_check - hooks fired AUTOMATICALLY
-- **"Should NOT fire" tests verified**: Promise tests completed without blocking and no queue entries
-- **"Should fire" tests verified**: Non-promise tests fired (visible block OR queue entry) AND returned in reasonable time (not hung indefinitely)
-- **Early termination on 2 failures**: If 2 tests failed, testing halted immediately and results were reported
-- **Results recorded**: Pass/fail status was recorded for each test run
-- When all criteria are met, include `<promise>Quality Criteria Met</promise>` in your response
-
-## Reference
-
-See [test_reference.md](test_reference.md) for the complete test matrix and rule descriptions.
-
-## Context
-
-This step runs after both the "should NOT fire" and "should fire" test steps. It specifically tests infinite blocking behavior which requires serial execution due to the blocking nature of these rules.
-
-
-### Job Context
-
-A workflow for running manual tests that validate DeepWork rules/hooks fire correctly.
-
-The **run_all** workflow tests that rules fire when they should AND do not fire when they shouldn't.
-Each test is run in a SUB-AGENT (not the main agent) because:
-1. Sub-agents run in isolated contexts where file changes can be detected
-2. The Stop hook automatically evaluates rules when each sub-agent completes
-3. The main agent can observe whether hooks fired without triggering them manually
-
-CRITICAL: All tests MUST run in sub-agents. The main agent MUST NOT make the file
-edits itself - it spawns sub-agents to make edits, then observes whether the hooks
-fired automatically when those sub-agents returned.
-
-Sub-agent configuration:
-- All sub-agents should use `model: "haiku"` to minimize cost and latency
-- All sub-agents should use `max_turns: 5` to prevent hanging indefinitely
-
-Steps:
-1. reset - Ensure clean environment before testing (clears queue, reverts files)
-2. run_not_fire_tests - Run all "should NOT fire" tests in PARALLEL sub-agents (6 tests)
-3. run_fire_tests - Run all "should fire" tests in SERIAL sub-agents with resets between (6 tests)
-4. infinite_block_tests - Run infinite block tests in SERIAL (4 tests - both fire and not-fire)
-
-Reset procedure (see steps/reset.md):
-- Reset runs FIRST to ensure a clean environment before any tests
-- Each step also calls reset internally when needed (between tests, after completion)
-- Reset reverts git changes, removes created files, and clears the rules queue
-
-Test types covered:
-- Trigger/Safety mode
-- Set mode (bidirectional)
-- Pair mode (directional)
-- Command action
-- Multi safety
-- Infinite block (prompt and command) - in dedicated step
-- Created mode (new files only)
-
-
-## Required Inputs
-
-
-**Files from Previous Steps** - Read these first:
-- `fire_results` (from `run_fire_tests`)
-
-## Work Branch
-
-Use branch format: `deepwork/manual_tests-[instance]-YYYYMMDD`
-
-- If on a matching work branch: continue using it
-- If on main/master: create new branch with `git checkout -b deepwork/manual_tests-[instance]-$(date +%Y%m%d)`
-
-## Outputs
-
-**Required outputs**:
-- `infinite_block_results`
-
-## Quality Validation (Manual)
-
-**NOTE**: Gemini CLI does not support automated validation. Manually verify criteria before completing.
-
-**Criteria (all must be satisfied)**:
-1. **Sub-Agents Used**: Each test run via Task tool with `model: "haiku"` and `max_turns: 5`
-2. **Serial Execution**: Sub-agents launched ONE AT A TIME with reset between each
-3. **Promise Tests**: Completed WITHOUT blocking (promise bypassed the rule)
-4. **No-Promise Tests**: Hook fired AND sub-agent returned in reasonable time (not hung)
-## On Completion
-
-1. Verify outputs are created
-2. Inform user: "Step 4/4 complete, outputs: infinite_block_results"
-3. **Workflow complete**: All steps finished. Consider creating a PR to merge the work branch.
-
----
-
-**Reference files**: `.deepwork/jobs/manual_tests/job.yml`, `.deepwork/jobs/manual_tests/steps/infinite_block_tests.md`
-"""
\ No newline at end of file
diff --git a/.gemini/skills/manual_tests/reset.toml b/.gemini/skills/manual_tests/reset.toml
deleted file mode 100644
index 8d7935f0..00000000
--- a/.gemini/skills/manual_tests/reset.toml
+++ /dev/null
@@ -1,128 +0,0 @@
-# manual_tests:reset
-#
-# Runs FIRST to ensure clean environment. Also called internally by other steps when they need to revert changes and clear the queue.
-#
-# Generated by DeepWork - do not edit manually
-
-description = "Runs FIRST to ensure clean environment. Also called internally by other steps when they need to revert changes and clear the queue."
-
-prompt = """
-# manual_tests:reset
-
-**Step 1/4** in **manual_tests** workflow
-
-> Runs all manual hook/rule tests using sub-agents. Use when validating that DeepWork rules fire correctly.
-
-
-## Instructions
-
-**Goal**: Runs FIRST to ensure clean environment. Also called internally by other steps when they need to revert changes and clear the queue.
-
-# Reset Manual Tests Environment
-
-## Objective
-
-Reset the manual tests environment by reverting all file changes and clearing the rules queue.
-
-## Purpose
-
-This step contains all the reset logic that other steps can call when they need to clean up between or after tests. It ensures consistent cleanup across all test steps.
-
-## Reset Commands
-
-Run these commands to reset the environment:
-
-```bash
-git reset HEAD manual_tests/ && git checkout -- manual_tests/ && rm -f manual_tests/test_created_mode/new_config.yml
-deepwork rules clear_queue
-```
-
-## Command Explanation
-
-- `git reset HEAD manual_tests/` - Unstages files from the index (rules_check uses `git add -A` which stages changes)
-- `git checkout -- manual_tests/` - Reverts working tree to match HEAD
-- `rm -f manual_tests/test_created_mode/new_config.yml` - Removes any new files created during tests (the created mode test creates this file)
-- `deepwork rules clear_queue` - Clears the rules queue so rules can fire again (prevents anti-infinite-loop mechanism from blocking subsequent tests)
-
-## When to Reset
-
-- **After each serial test**: Reset immediately after observing the result to prevent cross-contamination
-- **After parallel tests complete**: Reset once all parallel sub-agents have returned
-- **On early termination**: Reset before reporting failure results
-- **Before starting a new test step**: Ensure clean state
-
-## Quality Criteria
-
-- **All changes reverted**: `git status` shows no changes in `manual_tests/`
-- **Queue cleared**: `.deepwork/tmp/rules/queue/` is empty
-- **New files removed**: `manual_tests/test_created_mode/new_config.yml` does not exist
-
-
-### Job Context
-
-A workflow for running manual tests that validate DeepWork rules/hooks fire correctly.
-
-The **run_all** workflow tests that rules fire when they should AND do not fire when they shouldn't.
-Each test is run in a SUB-AGENT (not the main agent) because:
-1. Sub-agents run in isolated contexts where file changes can be detected
-2. The Stop hook automatically evaluates rules when each sub-agent completes
-3. The main agent can observe whether hooks fired without triggering them manually
-
-CRITICAL: All tests MUST run in sub-agents. The main agent MUST NOT make the file
-edits itself - it spawns sub-agents to make edits, then observes whether the hooks
-fired automatically when those sub-agents returned.
-
-Sub-agent configuration:
-- All sub-agents should use `model: "haiku"` to minimize cost and latency
-- All sub-agents should use `max_turns: 5` to prevent hanging indefinitely
-
-Steps:
-1. reset - Ensure clean environment before testing (clears queue, reverts files)
-2. run_not_fire_tests - Run all "should NOT fire" tests in PARALLEL sub-agents (6 tests)
-3. run_fire_tests - Run all "should fire" tests in SERIAL sub-agents with resets between (6 tests)
-4. infinite_block_tests - Run infinite block tests in SERIAL (4 tests - both fire and not-fire)
-
-Reset procedure (see steps/reset.md):
-- Reset runs FIRST to ensure a clean environment before any tests
-- Each step also calls reset internally when needed (between tests, after completion)
-- Reset reverts git changes, removes created files, and clears the rules queue
-
-Test types covered:
-- Trigger/Safety mode
-- Set mode (bidirectional)
-- Pair mode (directional)
-- Command action
-- Multi safety
-- Infinite block (prompt and command) - in dedicated step
-- Created mode (new files only)
-
-
-
-## Work Branch
-
-Use branch format: `deepwork/manual_tests-[instance]-YYYYMMDD`
-
-- If on a matching work branch: continue using it
-- If on main/master: create new branch with `git checkout -b deepwork/manual_tests-[instance]-$(date +%Y%m%d)`
-
-## Outputs
-
-**Required outputs**:
-- `clean_environment`
-
-## Quality Validation (Manual)
-
-**NOTE**: Gemini CLI does not support automated validation. Manually verify criteria before completing.
-
-**Criteria (all must be satisfied)**:
-1. **Environment Clean**: Git changes reverted, created files removed, and rules queue cleared
-## On Completion
-
-1. Verify outputs are created
-2. Inform user: "Step 1/4 complete, outputs: clean_environment"
-3. **Tell user next command**: `/manual_tests:run_not_fire_tests`
-
----
-
-**Reference files**: `.deepwork/jobs/manual_tests/job.yml`, `.deepwork/jobs/manual_tests/steps/reset.md`
-"""
\ No newline at end of file
diff --git a/.gemini/skills/manual_tests/run_fire_tests.toml b/.gemini/skills/manual_tests/run_fire_tests.toml
deleted file mode 100644
index 1f471b83..00000000
--- a/.gemini/skills/manual_tests/run_fire_tests.toml
+++ /dev/null
@@ -1,237 +0,0 @@
-# manual_tests:run_fire_tests
-#
-# Runs all 6 'should fire' tests serially with resets between each. Use after NOT-fire tests to verify rules fire correctly.
-#
-# Generated by DeepWork - do not edit manually
-
-description = "Runs all 6 'should fire' tests serially with resets between each. Use after NOT-fire tests to verify rules fire correctly."
-
-prompt = """
-# manual_tests:run_fire_tests
-
-**Step 3/4** in **manual_tests** workflow
-
-> Runs all manual hook/rule tests using sub-agents. Use when validating that DeepWork rules fire correctly.
-
-## Prerequisites (Verify First)
-
-Before proceeding, confirm these steps are complete:
-- `/manual_tests:run_not_fire_tests`
-
-## Instructions
-
-**Goal**: Runs all 6 'should fire' tests serially with resets between each. Use after NOT-fire tests to verify rules fire correctly.
-
-# Run Should-Fire Tests
-
-## Objective
-
-Run all "should fire" tests in **serial** sub-agents to verify that rules fire correctly when their trigger conditions are met without safety conditions.
-
-## CRITICAL: Sub-Agent Requirement
-
-**You MUST spawn sub-agents to make all file edits. DO NOT edit the test files yourself.**
-
-Why sub-agents are required:
-1. Sub-agents run in isolated contexts where file changes are detected
-2. When a sub-agent completes, the Stop hook **automatically** evaluates rules
-3. You (the main agent) observe whether hooks fired - you do NOT manually trigger them
-4. If you edit files directly, the hooks won't fire because you're not a completing sub-agent
-
-**NEVER manually run `echo '{}' | python -m deepwork.hooks.rules_check`** - this defeats the purpose of the test. Hooks must fire AUTOMATICALLY when sub-agents return.
-
-## CRITICAL: Serial Execution
-
-**These tests MUST run ONE AT A TIME, with resets between each.**
-
-Why serial execution is required:
-- These tests edit ONLY the trigger file (not the safety)
-- If multiple sub-agents run in parallel, sub-agent A's hook will see changes from sub-agent B
-- This causes cross-contamination: A gets blocked by rules triggered by B's changes
-- Run one test, observe the hook, reset, then run the next
-
-## Task
-
-Run all 6 "should fire" tests in **serial** sub-agents, resetting between each, and verify that blocking hooks fire automatically.
-
-### Process
-
-For EACH test below, follow this cycle:
-
-1. **Launch a sub-agent** using the Task tool with:
-   - `model: "haiku"` - Use the fast model to minimize cost and latency
-   - `max_turns: 5` - Prevent sub-agents from hanging indefinitely
-2. **Wait for the sub-agent to complete**
-3. **Observe whether the hook fired automatically** - you should see a blocking prompt or command output
-4. **If no visible blocking occurred, check the queue**:
-   ```bash
-   ls -la .deepwork/tmp/rules/queue/
-   cat .deepwork/tmp/rules/queue/*.json 2>/dev/null
-   ```
-   - If queue entries exist with status "queued", the hook DID fire but blocking wasn't visible
-   - If queue is empty, the hook did NOT fire at all
-   - Record the queue status along with the result
-5. **Record the result** - pass if hook fired (visible block OR queue entry), fail if neither
-6. **Reset** (MANDATORY after each test) - follow the reset step instructions:
-   ```bash
-   git reset HEAD manual_tests/ && git checkout -- manual_tests/ && rm -f manual_tests/test_created_mode/new_config.yml
-   deepwork rules clear_queue
-   ```
-   See [reset.md](reset.md) for detailed explanation of these commands.
-7. **Check for early termination**: If **2 tests have now failed**, immediately:
-   - Stop running any remaining tests
-   - Report the results summary showing which tests passed/failed
-   - The job halts here - do NOT proceed with remaining tests
-8. **Proceed to the next test** (only if fewer than 2 failures)
-
-**IMPORTANT**: Only launch ONE sub-agent at a time. Wait for it to complete and reset before launching the next.
-
-### Test Cases (run serially)
-
-**Test 1: Trigger/Safety**
-- Sub-agent prompt: "Edit ONLY `manual_tests/test_trigger_safety_mode/feature.py` to add a comment. Do NOT edit the `_doc.md` file."
-- Sub-agent config: `model: "haiku"`, `max_turns: 5`
-- Expected: Hook fires with prompt about updating documentation
-
-**Test 2: Set Mode**
-- Sub-agent prompt: "Edit ONLY `manual_tests/test_set_mode/module_source.py` to add a comment. Do NOT edit the `_test.py` file."
-- Sub-agent config: `model: "haiku"`, `max_turns: 5`
-- Expected: Hook fires with prompt about updating tests
-
-**Test 3: Pair Mode**
-- Sub-agent prompt: "Edit ONLY `manual_tests/test_pair_mode/handler_trigger.py` to add a comment. Do NOT edit the `_expected.md` file."
-- Sub-agent config: `model: "haiku"`, `max_turns: 5`
-- Expected: Hook fires with prompt about updating expected output
-
-**Test 4: Command Action**
-- Sub-agent prompt: "Edit `manual_tests/test_command_action/input.txt` to add some text."
-- Sub-agent config: `model: "haiku"`, `max_turns: 5`
-- Expected: Command runs automatically, appending to the log file (this rule always runs, no safety condition)
-
-**Test 5: Multi Safety**
-- Sub-agent prompt: "Edit ONLY `manual_tests/test_multi_safety/core.py` to add a comment. Do NOT edit any of the safety files (`_safety_a.md`, `_safety_b.md`, or `_safety_c.md`)."
-- Sub-agent config: `model: "haiku"`, `max_turns: 5`
-- Expected: Hook fires with prompt about updating safety documentation
-
-**Test 6: Created Mode**
-- Sub-agent prompt: "Create a NEW file `manual_tests/test_created_mode/new_config.yml` with some YAML content. This must be a NEW file, not a modification."
-- Sub-agent config: `model: "haiku"`, `max_turns: 5`
-- Expected: Hook fires with prompt about new configuration files
-
-### Results Tracking
-
-Record the result after each test:
-
-| Test Case | Should Fire | Visible Block? | Queue Entry? | Result |
-|-----------|-------------|:--------------:|:------------:|:------:|
-| Trigger/Safety | Edit .py only | | | |
-| Set Mode | Edit _source.py only | | | |
-| Pair Mode | Edit _trigger.py only | | | |
-| Command Action | Edit .txt | | | |
-| Multi Safety | Edit .py only | | | |
-| Created Mode | Create NEW .yml | | | |
-
-**Queue Entry Status Guide:**
-- If queue has entry with status "queued" -> Hook fired, rule was shown to agent
-- If queue has entry with status "passed" -> Hook fired, rule was satisfied
-- If queue is empty -> Hook did NOT fire
-
-## Quality Criteria
-
-- **Sub-agents spawned**: Tests were run using the Task tool to spawn sub-agents - the main agent did NOT edit files directly
-- **Correct sub-agent config**: All sub-agents used `model: "haiku"` and `max_turns: 5`
-- **Serial execution**: Sub-agents were launched ONE AT A TIME, not in parallel
-- **Reset between tests**: Reset step was followed after each test
-- **Hooks fired automatically**: The main agent observed the blocking hooks firing automatically when each sub-agent returned - the agent did NOT manually run rules_check
-- **Early termination on 2 failures**: If 2 tests failed, testing halted immediately and results were reported
-- **Results recorded**: Pass/fail status was recorded for each test case
-- When all criteria are met, include `<promise>Quality Criteria Met</promise>` in your response
-
-## Reference
-
-See [test_reference.md](test_reference.md) for the complete test matrix and rule descriptions.
-
-## Context
-
-This step runs after the "should NOT fire" tests. These tests verify that rules correctly fire when trigger conditions are met without safety conditions. The serial execution with resets is essential to prevent cross-contamination between tests. Infinite block tests are handled in a separate step.
-
-
-### Job Context
-
-A workflow for running manual tests that validate DeepWork rules/hooks fire correctly.
-
-The **run_all** workflow tests that rules fire when they should AND do not fire when they shouldn't.
-Each test is run in a SUB-AGENT (not the main agent) because:
-1. Sub-agents run in isolated contexts where file changes can be detected
-2. The Stop hook automatically evaluates rules when each sub-agent completes
-3. The main agent can observe whether hooks fired without triggering them manually
-
-CRITICAL: All tests MUST run in sub-agents. The main agent MUST NOT make the file
-edits itself - it spawns sub-agents to make edits, then observes whether the hooks
-fired automatically when those sub-agents returned.
-
-Sub-agent configuration:
-- All sub-agents should use `model: "haiku"` to minimize cost and latency
-- All sub-agents should use `max_turns: 5` to prevent hanging indefinitely
-
-Steps:
-1. reset - Ensure clean environment before testing (clears queue, reverts files)
-2. run_not_fire_tests - Run all "should NOT fire" tests in PARALLEL sub-agents (6 tests)
-3. run_fire_tests - Run all "should fire" tests in SERIAL sub-agents with resets between (6 tests)
-4. infinite_block_tests - Run infinite block tests in SERIAL (4 tests - both fire and not-fire)
-
-Reset procedure (see steps/reset.md):
-- Reset runs FIRST to ensure a clean environment before any tests
-- Each step also calls reset internally when needed (between tests, after completion)
-- Reset reverts git changes, removes created files, and clears the rules queue
-
-Test types covered:
-- Trigger/Safety mode
-- Set mode (bidirectional)
-- Pair mode (directional)
-- Command action
-- Multi safety
-- Infinite block (prompt and command) - in dedicated step
-- Created mode (new files only)
-
-
-## Required Inputs
-
-
-**Files from Previous Steps** - Read these first:
-- `not_fire_results` (from `run_not_fire_tests`)
-
-## Work Branch
-
-Use branch format: `deepwork/manual_tests-[instance]-YYYYMMDD`
-
-- If on a matching work branch: continue using it
-- If on main/master: create new branch with `git checkout -b deepwork/manual_tests-[instance]-$(date +%Y%m%d)`
-
-## Outputs
-
-**Required outputs**:
-- `fire_results`
-
-## Quality Validation (Manual)
-
-**NOTE**: Gemini CLI does not support automated validation. Manually verify criteria before completing.
-
-**Criteria (all must be satisfied)**:
-1. **Sub-Agents Used**: Did the main agent spawn a sub-agent (using the Task tool) for EACH test? The main agent must NOT edit the test files directly.
-2. **Sub-Agent Config**: Did all sub-agents use `model: "haiku"` and `max_turns: 5`?
-3. **Serial Execution**: Were sub-agents launched ONE AT A TIME (not in parallel) to prevent cross-contamination?
-4. **Hooks Fired Automatically**: Did the main agent observe the blocking hooks firing automatically when each sub-agent returned? The agent must NOT manually run the rules_check command.
-5. **Reset Between Tests**: Was the reset step called internally after each test to revert files and prevent cross-contamination?
-6. **Early Termination**: If 2 tests failed, did testing halt immediately with results reported?
-7. **Results Recorded**: Did the main agent track pass/fail status for each test case?
-## On Completion
-
-1. Verify outputs are created
-2. Inform user: "Step 3/4 complete, outputs: fire_results"
-3. **Tell user next command**: `/manual_tests:infinite_block_tests`
-
----
-
-**Reference files**: `.deepwork/jobs/manual_tests/job.yml`, `.deepwork/jobs/manual_tests/steps/run_fire_tests.md`
-"""
\ No newline at end of file
diff --git a/.gemini/skills/manual_tests/run_not_fire_tests.toml b/.gemini/skills/manual_tests/run_not_fire_tests.toml
deleted file mode 100644
index 2e429635..00000000
--- a/.gemini/skills/manual_tests/run_not_fire_tests.toml
+++ /dev/null
@@ -1,223 +0,0 @@
-# manual_tests:run_not_fire_tests
-#
-# Runs all 6 'should NOT fire' tests in parallel sub-agents. Use to verify rules don't fire when safety conditions are met.
-#
-# Generated by DeepWork - do not edit manually
-
-description = "Runs all 6 'should NOT fire' tests in parallel sub-agents. Use to verify rules don't fire when safety conditions are met."
-
-prompt = """
-# manual_tests:run_not_fire_tests
-
-**Step 2/4** in **manual_tests** workflow
-
-> Runs all manual hook/rule tests using sub-agents. Use when validating that DeepWork rules fire correctly.
-
-## Prerequisites (Verify First)
-
-Before proceeding, confirm these steps are complete:
-- `/manual_tests:reset`
-
-## Instructions
-
-**Goal**: Runs all 6 'should NOT fire' tests in parallel sub-agents. Use to verify rules don't fire when safety conditions are met.
-
-# Run Should-NOT-Fire Tests
-
-## Objective
-
-Run all "should NOT fire" tests in parallel sub-agents to verify that rules do not fire when their safety conditions are met.
-
-## CRITICAL: Sub-Agent Requirement
-
-**You MUST spawn sub-agents to make all file edits. DO NOT edit the test files yourself.**
-
-Why sub-agents are required:
-1. Sub-agents run in isolated contexts where file changes are detected
-2. When a sub-agent completes, the Stop hook **automatically** evaluates rules
-3. You (the main agent) observe whether hooks fired - you do NOT manually trigger them
-4. If you edit files directly, the hooks won't fire because you're not a completing sub-agent
-
-**NEVER manually run `echo '{}' | python -m deepwork.hooks.rules_check`** - this defeats the purpose of the test. Hooks must fire AUTOMATICALLY when sub-agents return.
-
-## Task
-
-Run all 6 "should NOT fire" tests in **parallel** sub-agents, then verify no blocking hooks fired.
-
-### Process
-
-1. **Launch parallel sub-agents for all "should NOT fire" tests**
-
-   Use the Task tool to spawn **ALL of the following sub-agents in a SINGLE message** (parallel execution).
-
-   **Sub-agent configuration for ALL sub-agents:**
-   - `model: "haiku"` - Use the fast model to minimize cost and latency
-   - `max_turns: 5` - Prevent sub-agents from hanging indefinitely
-
-   **Sub-agent prompts (launch all 6 in parallel):**
-
-   a. **Trigger/Safety test** - "Edit `manual_tests/test_trigger_safety_mode/feature.py` to add a comment, AND edit `manual_tests/test_trigger_safety_mode/feature_doc.md` to add a note. Both files must be edited so the rule does NOT fire."
-
-   b. **Set Mode test** - "Edit `manual_tests/test_set_mode/module_source.py` to add a comment, AND edit `manual_tests/test_set_mode/module_test.py` to add a test comment. Both files must be edited so the rule does NOT fire."
-
-   c. **Pair Mode (forward) test** - "Edit `manual_tests/test_pair_mode/handler_trigger.py` to add a comment, AND edit `manual_tests/test_pair_mode/handler_expected.md` to add a note. Both files must be edited so the rule does NOT fire."
-
-   d. **Pair Mode (reverse) test** - "Edit ONLY `manual_tests/test_pair_mode/handler_expected.md` to add a note. Only the expected file should be edited - this tests that the pair rule only fires in one direction."
-
-   e. **Multi Safety test** - "Edit `manual_tests/test_multi_safety/core.py` to add a comment, AND edit `manual_tests/test_multi_safety/core_safety_a.md` to add a note. Both files must be edited so the rule does NOT fire."
-
-   f. **Created Mode test** - "Modify the EXISTING file `manual_tests/test_created_mode/existing.yml` by adding a comment. Do NOT create a new file - only modify the existing one. The created mode rule should NOT fire for modifications."
-
-2. **Observe the results**
-
-   When each sub-agent returns:
-   - **If no blocking hook fired**: Preliminary pass - proceed to queue verification
-   - **If a blocking hook fired**: The test FAILED - investigate why the rule fired when it shouldn't have
-
-   **Remember**: You are OBSERVING whether hooks fired automatically. Do NOT run any verification commands manually during sub-agent execution.
-
-3. **Verify no queue entries** (CRITICAL for "should NOT fire" tests)
-
-   After ALL sub-agents have completed, verify the rules queue is empty:
-   ```bash
-   ls -la .deepwork/tmp/rules/queue/
-   cat .deepwork/tmp/rules/queue/*.json 2>/dev/null
-   ```
-
-   - **If queue is empty**: All tests PASSED - rules correctly did not fire
-   - **If queue has entries**: Tests FAILED - rules fired when they shouldn't have. Check which rule fired and investigate.
-
-   This verification is essential because some rules may fire without visible blocking but still create queue entries.
-
-4. **Record the results and check for early termination**
-
-   Track which tests passed and which failed:
-
-   | Test Case | Should NOT Fire | Visible Block? | Queue Entry? | Result |
-   |-----------|:---------------:|:--------------:|:------------:|:------:|
-   | Trigger/Safety | Edit both files | | | |
-   | Set Mode | Edit both files | | | |
-   | Pair Mode (forward) | Edit both files | | | |
-   | Pair Mode (reverse) | Edit expected only | | | |
-   | Multi Safety | Edit both files | | | |
-   | Created Mode | Modify existing | | | |
-
-   **Result criteria**: PASS only if NO visible block AND NO queue entry. FAIL if either occurred.
-
-   **EARLY TERMINATION**: If **2 tests have failed**, immediately:
-   1. Stop running any remaining tests
-   2. Reset (see step 5)
-   3. Report the results summary showing which tests passed/failed
-   4. Do NOT proceed to the next step - the job halts here
-
-5. **Reset** (MANDATORY - call the reset step internally)
-
-   **IMPORTANT**: This step is MANDATORY and must run regardless of whether tests passed or failed.
-
-   Follow the reset step instructions. Run these commands to clean up:
-   ```bash
-   git reset HEAD manual_tests/ && git checkout -- manual_tests/ && rm -f manual_tests/test_created_mode/new_config.yml
-   deepwork rules clear_queue
-   ```
-
-   See [reset.md](reset.md) for detailed explanation of these commands.
-
-## Quality Criteria
-
-- **Sub-agents spawned**: All 6 tests were run using the Task tool to spawn sub-agents - the main agent did NOT edit files directly
-- **Correct sub-agent config**: All sub-agents used `model: "haiku"` and `max_turns: 5`
-- **Parallel execution**: All 6 sub-agents were launched in a single message (parallel)
-- **Hooks observed (not triggered)**: The main agent observed hook behavior without manually running rules_check
-- **Queue verified empty**: After all sub-agents completed, the rules queue was checked and confirmed empty (no queue entries = rules did not fire)
-- **Early termination on 2 failures**: If 2 tests failed, testing halted immediately and results were reported
-- **Reset performed**: Reset step was followed after tests completed (regardless of pass/fail)
-- When all criteria are met, include `<promise>Quality Criteria Met</promise>` in your response
-
-## Reference
-
-See [test_reference.md](test_reference.md) for the complete test matrix and rule descriptions.
-
-## Context
-
-This step runs after the reset step (which ensures a clean environment) and tests that rules correctly do NOT fire when safety conditions are met. The "should fire" tests run after these complete. Infinite block tests are handled in a separate step.
-
-
-### Job Context
-
-A workflow for running manual tests that validate DeepWork rules/hooks fire correctly.
-
-The **run_all** workflow tests that rules fire when they should AND do not fire when they shouldn't.
-Each test is run in a SUB-AGENT (not the main agent) because:
-1. Sub-agents run in isolated contexts where file changes can be detected
-2. The Stop hook automatically evaluates rules when each sub-agent completes
-3. The main agent can observe whether hooks fired without triggering them manually
-
-CRITICAL: All tests MUST run in sub-agents. The main agent MUST NOT make the file
-edits itself - it spawns sub-agents to make edits, then observes whether the hooks
-fired automatically when those sub-agents returned.
-
-Sub-agent configuration:
-- All sub-agents should use `model: "haiku"` to minimize cost and latency
-- All sub-agents should use `max_turns: 5` to prevent hanging indefinitely
-
-Steps:
-1. reset - Ensure clean environment before testing (clears queue, reverts files)
-2. run_not_fire_tests - Run all "should NOT fire" tests in PARALLEL sub-agents (6 tests)
-3. run_fire_tests - Run all "should fire" tests in SERIAL sub-agents with resets between (6 tests)
-4. infinite_block_tests - Run infinite block tests in SERIAL (4 tests - both fire and not-fire)
-
-Reset procedure (see steps/reset.md):
-- Reset runs FIRST to ensure a clean environment before any tests
-- Each step also calls reset internally when needed (between tests, after completion)
-- Reset reverts git changes, removes created files, and clears the rules queue
-
-Test types covered:
-- Trigger/Safety mode
-- Set mode (bidirectional)
-- Pair mode (directional)
-- Command action
-- Multi safety
-- Infinite block (prompt and command) - in dedicated step
-- Created mode (new files only)
-
-
-## Required Inputs
-
-
-**Files from Previous Steps** - Read these first:
-- `clean_environment` (from `reset`)
-
-## Work Branch
-
-Use branch format: `deepwork/manual_tests-[instance]-YYYYMMDD`
-
-- If on a matching work branch: continue using it
-- If on main/master: create new branch with `git checkout -b deepwork/manual_tests-[instance]-$(date +%Y%m%d)`
-
-## Outputs
-
-**Required outputs**:
-- `not_fire_results`
-
-## Quality Validation (Manual)
-
-**NOTE**: Gemini CLI does not support automated validation. Manually verify criteria before completing.
-
-**Criteria (all must be satisfied)**:
-1. **Sub-Agents Used**: Did the main agent spawn sub-agents (using the Task tool) to make the file edits? The main agent must NOT edit the test files directly.
-2. **Sub-Agent Config**: Did all sub-agents use `model: "haiku"` and `max_turns: 5`?
-3. **Parallel Execution**: Were all 6 sub-agents launched in parallel (in a single message with multiple Task tool calls)?
-4. **Hooks Observed**: Did the main agent observe that no blocking hooks fired when the sub-agents returned? The hooks fire AUTOMATICALLY - the agent must NOT manually run the rules_check command.
-5. **Queue Verified Empty**: After all sub-agents completed, was the rules queue checked and confirmed empty (no entries = rules did not fire)?
-6. **Early Termination**: If 2 tests failed, did testing halt immediately with results reported?
-7. **Reset Performed**: Was the reset step called internally after tests completed (or after early termination)?
-## On Completion
-
-1. Verify outputs are created
-2. Inform user: "Step 2/4 complete, outputs: not_fire_results"
-3. **Tell user next command**: `/manual_tests:run_fire_tests`
-
----
-
-**Reference files**: `.deepwork/jobs/manual_tests/job.yml`, `.deepwork/jobs/manual_tests/steps/run_not_fire_tests.md`
-"""
\ No newline at end of file
diff --git a/.gemini/skills/update/index.toml b/.gemini/skills/update/index.toml
index ebf3fa03..fd38a15e 100644
--- a/.gemini/skills/update/index.toml
+++ b/.gemini/skills/update/index.toml
@@ -1,20 +1,20 @@
 # update
 #
-# Updates DeepWork standard jobs in src/ and syncs to installed locations. Use when modifying deepwork_jobs or deepwork_rules.
+# Updates DeepWork standard jobs in src/ and syncs to installed locations. Use when modifying deepwork_jobs.
 #
 # Generated by DeepWork - do not edit manually
 
-description = "Updates DeepWork standard jobs in src/ and syncs to installed locations. Use when modifying deepwork_jobs or deepwork_rules."
+description = "Updates DeepWork standard jobs in src/ and syncs to installed locations. Use when modifying deepwork_jobs."
 
 prompt = """
 # update
 
-**Multi-step workflow**: Updates DeepWork standard jobs in src/ and syncs to installed locations. Use when modifying deepwork_jobs or deepwork_rules.
+**Multi-step workflow**: Updates DeepWork standard jobs in src/ and syncs to installed locations. Use when modifying deepwork_jobs.
 
 > **NOTE**: Gemini CLI requires manual command invocation. After each step, tell the user which command to run next.
 
 A workflow for maintaining standard jobs bundled with DeepWork. Standard jobs
-(like `deepwork_jobs` and `deepwork_rules`) are source-controlled in
+(like `deepwork_jobs`) are source-controlled in
 `src/deepwork/standard_jobs/` and must be edited there—never in `.deepwork/jobs/`
 or `.claude/commands/` directly.
 
diff --git a/.gemini/skills/update/job.toml b/.gemini/skills/update/job.toml
index a42f20fb..7ab6a71b 100644
--- a/.gemini/skills/update/job.toml
+++ b/.gemini/skills/update/job.toml
@@ -11,7 +11,7 @@ prompt = """
 
 **Standalone command** - can be run anytime
 
-> Updates DeepWork standard jobs in src/ and syncs to installed locations. Use when modifying deepwork_jobs or deepwork_rules.
+> Updates DeepWork standard jobs in src/ and syncs to installed locations. Use when modifying deepwork_jobs.
 
 
 ## Instructions
@@ -96,7 +96,7 @@ ls -la .claude/commands/[job_name].*.md
 ### Job Context
 
 A workflow for maintaining standard jobs bundled with DeepWork. Standard jobs
-(like `deepwork_jobs` and `deepwork_rules`) are source-controlled in
+(like `deepwork_jobs`) are source-controlled in
 `src/deepwork/standard_jobs/` and must be edited there—never in `.deepwork/jobs/`
 or `.claude/commands/` directly.
 
diff --git a/AGENTS.md b/AGENTS.md
index 3b1dfeec..b4ee13c6 100644
--- a/AGENTS.md
+++ b/AGENTS.md
@@ -14,7 +14,6 @@ When creating or modifying jobs in this repository, you MUST understand which ty
 
 **Current standard jobs**:
 - `deepwork_jobs` - Core job management (define, implement, learn)
-- `deepwork_rules` - Rules enforcement system
 
 **Editing rules**:
 - Source of truth is ALWAYS in `src/deepwork/standard_jobs/`
@@ -75,13 +74,11 @@ Which type of job should this be?
 ```
 deepwork/
 ├── src/deepwork/standard_jobs/    # Standard jobs (source of truth)
-│   ├── deepwork_jobs/
-│   └── deepwork_rules/
+│   └── deepwork_jobs/
 ├── library/jobs/                   # Library/example jobs
 │   └── [example_job]/
 └── .deepwork/jobs/                 # Installed standard jobs + bespoke jobs
     ├── deepwork_jobs/              # ← Installed copy, NOT source of truth
-    ├── deepwork_rules/             # ← Installed copy, NOT source of truth
     └── [bespoke_job]/              # ← Source of truth for bespoke only
 
 ## Debugging Issues
diff --git a/README.md b/README.md
index 76a659de..8f4ea1fa 100644
--- a/README.md
+++ b/README.md
@@ -133,9 +133,7 @@ To start the process, just run:
 
 **3. Learns automatically** — Run `/deepwork_jobs.learn` (or ask claude to `run the deepwork learn job`) after any job to automatically capture what worked and improve for next time.
 
-**4. Rules** - The system adds enforced rules that are truly evaluated for everything it does, not just "hints" that Claude does by default
-
-**5. All work happens on Git branches** — Every change can be version-controlled and tracked. You can roll-back to prior versions of the skill or keep skills in-sync and up-to-date across your team.
+**4. All work happens on Git branches** — Every change can be version-controlled and tracked. You can roll-back to prior versions of the skill or keep skills in-sync and up-to-date across your team.
 
 ---
 
@@ -143,7 +141,7 @@ To start the process, just run:
 
 | Platform | Status | Notes |
 |----------|--------|-------|
-| **Claude Code** | Full Support | Recommended. Quality hooks, rules, best DX. |
+| **Claude Code** | Full Support | Recommended. Quality hooks, best DX. |
 | **Gemini CLI** | Partial Support | TOML format, global hooks only |
 | OpenCode | Planned | |
 | GitHub Copilot CLI | Planned | |
@@ -198,7 +196,6 @@ Send [@tylerwillis](https://x.com/tylerwillis) a message on X.
 your-project/
 ├── .deepwork/
 │   ├── config.yml          # Platform configuration
-│   ├── rules/              # Automated rules
 │   └── jobs/               # Job definitions
 │       └── job_name/
 │           ├── job.yml     # Job metadata
@@ -236,25 +233,6 @@ deepwork install
 
 </details>
 
-<details>
-<summary><strong>Advanced: Automated Rules</strong></summary>
-
-Rules monitor file changes and prompt Claude to follow guidelines:
-
-```markdown
----
-name: Source/Test Pairing
-set:
-  - src/{path}.py
-  - tests/{path}_test.py
----
-When source files change, corresponding test files should also change.
-```
-
-See [Architecture](doc/architecture.md) for full rules documentation.
-
-</details>
-
 <details>
 <summary><strong>Advanced: Nix Flakes</strong></summary>
 
diff --git a/claude.md b/claude.md
index 07d4b325..81bcd8ff 100644
--- a/claude.md
+++ b/claude.md
@@ -43,8 +43,7 @@ deepwork/
 │   │   ├── gemini/
 │   │   └── copilot/
 │   ├── standard_jobs/    # Built-in job definitions (auto-installed)
-│   │   ├── deepwork_jobs/
-│   │   └── deepwork_rules/
+│   │   └── deepwork_jobs/
 │   ├── schemas/          # Job definition schemas
 │   └── utils/            # Utilities (fs, git, yaml, validation)
 ├── library/jobs/         # Reusable example jobs (not auto-installed)
@@ -196,7 +195,7 @@ my-project/
 
 ### Editing Standard Jobs
 
-**Standard jobs** (like `deepwork_jobs` and `deepwork_rules`) are bundled with DeepWork and installed to user projects. They exist in THREE locations:
+**Standard jobs** (like `deepwork_jobs`) are bundled with DeepWork and installed to user projects. They exist in THREE locations:
 
 1. **Source of truth**: `src/deepwork/standard_jobs/[job_name]/` - The canonical source files
 2. **Installed copy**: `.deepwork/jobs/[job_name]/` - Installed by `deepwork install`
@@ -217,7 +216,7 @@ Instead, follow this workflow:
 
 ### How to Identify Job Types
 
-- **Standard jobs**: Exist in `src/deepwork/standard_jobs/` (currently: `deepwork_jobs`, `deepwork_rules`)
+- **Standard jobs**: Exist in `src/deepwork/standard_jobs/` (currently: `deepwork_jobs`)
 - **Library jobs**: Exist in `library/jobs/`
 - **Bespoke jobs**: Exist ONLY in `.deepwork/jobs/` with no corresponding standard_jobs entry
 
diff --git a/doc/architecture.md b/doc/architecture.md
index f4a2e094..d08cf808 100644
--- a/doc/architecture.md
+++ b/doc/architecture.md
@@ -47,40 +47,26 @@ deepwork/                       # DeepWork tool repository
 │       │   ├── generator.py    # Command file generation
 │       │   ├── parser.py       # Job definition parsing
 │       │   ├── doc_spec_parser.py   # Doc spec parsing
-│       │   ├── rules_parser.py     # Rule definition parsing
-│       │   ├── pattern_matcher.py  # Variable pattern matching for rules
-│       │   ├── rules_queue.py      # Rule state queue system
-│       │   ├── command_executor.py # Command action execution
 │       │   └── hooks_syncer.py     # Hook syncing to platforms
 │       ├── hooks/              # Hook system and cross-platform wrappers
 │       │   ├── __init__.py
 │       │   ├── wrapper.py           # Cross-platform input/output normalization
 │       │   ├── claude_hook.sh       # Shell wrapper for Claude Code
-│       │   ├── gemini_hook.sh       # Shell wrapper for Gemini CLI
-│       │   └── rules_check.py       # Cross-platform rule evaluation hook
+│       │   └── gemini_hook.sh       # Shell wrapper for Gemini CLI
 │       ├── templates/          # Skill templates for each platform
 │       │   ├── claude/
 │       │   │   └── skill-job-step.md.jinja
 │       │   ├── gemini/
 │       │   └── copilot/
 │       ├── standard_jobs/      # Built-in job definitions
-│       │   ├── deepwork_jobs/
-│       │   │   ├── job.yml
-│       │   │   ├── steps/
-│       │   │   └── templates/
-│       │   │       └── doc_spec.md.template
-│       │   └── deepwork_rules/   # Rule management job
+│       │   └── deepwork_jobs/
 │       │       ├── job.yml
 │       │       ├── steps/
-│       │       │   └── define.md
-│       │       └── hooks/         # Hook scripts
-│       │           ├── global_hooks.yml
-│       │           ├── user_prompt_submit.sh
-│       │           └── capture_prompt_work_tree.sh
+│       │       └── templates/
+│       │           └── doc_spec.md.template
 │       ├── schemas/            # Definition schemas
 │       │   ├── job_schema.py
-│       │   ├── doc_spec_schema.py   # Doc spec schema definition
-│       │   └── rules_schema.py
+│       │   └── doc_spec_schema.py   # Doc spec schema definition
 │       └── utils/
 │           ├── fs.py
 │           ├── git.py
@@ -125,11 +111,6 @@ def install(platform: str):
     # Inject core job definitions
     inject_deepwork_jobs(".deepwork/jobs/")
 
-    # Create rules directory with example templates (if not exists)
-    if not exists(".deepwork/rules/"):
-        create_directory(".deepwork/rules/")
-        copy_example_rules(".deepwork/rules/")
-
     # Update config (supports multiple platforms)
     config = load_yaml(".deepwork/config.yml") or {}
     config["version"] = "1.0.0"
@@ -288,7 +269,6 @@ my-project/                     # User's project (target)
 │       ├── deepwork_jobs.define.md         # Core DeepWork skills
 │       ├── deepwork_jobs.implement.md
 │       ├── deepwork_jobs.refine.md
-│       ├── deepwork_rules.define.md        # Rule management
 │       ├── competitive_research.identify_competitors.md
 │       └── ...
 ├── .deepwork/                  # DeepWork configuration
@@ -296,24 +276,11 @@ my-project/                     # User's project (target)
 │   ├── .gitignore              # Ignores tmp/ directory
 │   ├── doc_specs/                   # Doc specs (document specifications)
 │   │   └── monthly_aws_report.md
-│   ├── rules/                  # Rule definitions (v2 format)
-│   │   ├── source-test-pairing.md
-│   │   ├── format-python.md
-│   │   └── api-docs.md
 │   ├── tmp/                    # Temporary state (gitignored)
-│   │   └── rules/queue/        # Rule evaluation queue
 │   └── jobs/                   # Job definitions
 │       ├── deepwork_jobs/      # Core job for managing jobs
 │       │   ├── job.yml
 │       │   └── steps/
-│       ├── deepwork_rules/     # Rule management job
-│       │   ├── job.yml
-│       │   ├── steps/
-│       │   │   └── define.md
-│       │   └── hooks/          # Hook scripts (installed from standard_jobs)
-│       │       ├── global_hooks.yml
-│       │       ├── user_prompt_submit.sh
-│       │       └── capture_prompt_work_tree.sh
 │       ├── competitive_research/
 │       │   ├── job.yml         # Job metadata
 │       │   └── steps/
@@ -1033,203 +1000,6 @@ Github Actions are used for all CI/CD tasks.
 
 ---
 
-## Rules
-
-Rules are automated enforcement mechanisms that trigger based on file changes during an AI agent session. They help ensure that:
-- Documentation stays in sync with code changes
-- Security reviews happen when sensitive code is modified
-- Team guidelines are followed automatically
-- File correspondences are maintained (e.g., source/test pairing)
-
-### Rules System v2 (Frontmatter Markdown)
-
-Rules are defined as individual markdown files in `.deepwork/rules/`:
-
-```
-.deepwork/rules/
-├── source-test-pairing.md
-├── format-python.md
-└── api-docs.md
-```
-
-Each rule file uses YAML frontmatter with a markdown body for instructions:
-
-```markdown
----
-name: Source/Test Pairing
-set:
-  - src/{path}.py
-  - tests/{path}_test.py
-compare_to: base
----
-When source files change, corresponding test files should also change.
-Please create or update tests for the modified source files.
-```
-
-### Detection Modes
-
-Rules support three detection modes:
-
-**1. Trigger/Safety (default)** - Fire when trigger matches but safety doesn't:
-```yaml
----
-name: Update install guide
-trigger: "app/config/**/*"
-safety: "docs/install_guide.md"
-compare_to: base
----
-```
-
-**2. Set (bidirectional)** - Enforce file correspondence in both directions:
-```yaml
----
-name: Source/Test Pairing
-set:
-  - src/{path}.py
-  - tests/{path}_test.py
-compare_to: base
----
-```
-Uses variable patterns like `{path}` (multi-segment) and `{name}` (single-segment) for matching.
-
-**3. Pair (directional)** - Trigger requires corresponding files, but not vice versa:
-```yaml
----
-name: API Documentation
-pair:
-  trigger: src/api/{name}.py
-  expects: docs/api/{name}.md
-compare_to: base
----
-```
-
-### Action Types
-
-**1. Prompt (default)** - Show instructions to the agent:
-```yaml
----
-name: Security Review
-trigger: "src/auth/**/*"
-compare_to: base
----
-Please check for hardcoded credentials and validate input.
-```
-
-**2. Command** - Run an idempotent command:
-```yaml
----
-name: Format Python
-trigger: "**/*.py"
-action:
-  command: "ruff format {file}"
-  run_for: each_match  # or "all_matches"
-compare_to: prompt
----
-```
-
-### Rule Evaluation Flow
-
-1. **Session Start**: When a Claude Code session begins, the baseline git state is captured
-2. **Agent Works**: The AI agent performs tasks, potentially modifying files
-3. **Session Stop**: When the agent finishes (after_agent event):
-   - Changed files are detected based on `compare_to` setting (base, default_tip, or prompt)
-   - Each rule is evaluated based on its detection mode
-   - Queue entries are created in `.deepwork/tmp/rules/queue/` for deduplication
-   - For command actions: commands are executed, results tracked
-   - For prompt actions: if rule fires and not already promised, agent is prompted
-4. **Promise Tags**: Agents can mark rules as addressed by including `<promise>✓ Rule Name</promise>` in their response
-
-### Queue System
-
-Rule state is tracked in `.deepwork/tmp/rules/queue/` with files named `{hash}.{status}.json`:
-- `queued` - Detected, awaiting evaluation
-- `passed` - Rule satisfied (promise found or command succeeded)
-- `failed` - Rule not satisfied
-- `skipped` - Safety pattern matched
-
-This prevents re-prompting for the same rule violation within a session.
-
-### Hook Integration
-
-The v2 rules system uses the cross-platform hook wrapper:
-
-```
-src/deepwork/hooks/
-├── wrapper.py           # Cross-platform input/output normalization
-├── rules_check.py       # Rule evaluation hook (v2)
-├── claude_hook.sh       # Claude Code shell wrapper
-└── gemini_hook.sh       # Gemini CLI shell wrapper
-```
-
-Hooks are called via the shell wrappers:
-```bash
-claude_hook.sh deepwork.hooks.rules_check
-```
-
-The hooks are installed to `.claude/settings.json` during `deepwork sync`:
-
-```json
-{
-  "hooks": {
-    "Stop": [
-      {"matcher": "", "hooks": [{"type": "command", "command": "deepwork hook rules_check"}]}
-    ]
-  }
-}
-```
-
-### Cross-Platform Hook Wrapper System
-
-The `hooks/` module provides a wrapper system that allows writing hooks once in Python and running them on multiple platforms. This normalizes the differences between Claude Code and Gemini CLI hook systems.
-
-**Architecture:**
-```
-┌─────────────────┐     ┌─────────────────┐
-│  Claude Code    │     │   Gemini CLI    │
-│  (Stop event)   │     │ (AfterAgent)    │
-└────────┬────────┘     └────────┬────────┘
-         │                       │
-         ▼                       ▼
-┌─────────────────┐     ┌─────────────────┐
-│ claude_hook.sh  │     │ gemini_hook.sh  │
-│ (shell wrapper) │     │ (shell wrapper) │
-└────────┬────────┘     └────────┬────────┘
-         │                       │
-         └───────────┬───────────┘
-                     ▼
-           ┌─────────────────┐
-           │   wrapper.py    │
-           │ (normalization) │
-           └────────┬────────┘
-                    ▼
-           ┌─────────────────┐
-           │  Python Hook    │
-           │ (common logic)  │
-           └─────────────────┘
-```
-
-**Key normalizations:**
-- Event names: `Stop` ↔ `AfterAgent`, `PreToolUse` ↔ `BeforeTool`, `UserPromptSubmit` ↔ `BeforeAgent`
-- Tool names: `Write` ↔ `write_file`, `Bash` ↔ `shell`, `Read` ↔ `read_file`
-- Decision values: `block` → `deny` for Gemini CLI
-- Environment variables: `CLAUDE_PROJECT_DIR` ↔ `GEMINI_PROJECT_DIR`
-
-**Usage:**
-```python
-from deepwork.hooks.wrapper import HookInput, HookOutput, run_hook, Platform
-
-def my_hook(input: HookInput) -> HookOutput:
-    if input.event == NormalizedEvent.AFTER_AGENT:
-        return HookOutput(decision="block", reason="Complete X first")
-    return HookOutput()
-
-# Called via: claude_hook.sh mymodule or gemini_hook.sh mymodule
-```
-
-See `doc/platforms/` for detailed platform-specific hook documentation.
-
----
-
 ## Doc Specs (Document Specifications)
 
 Doc specs formalize document specifications for job outputs. They enable consistent document structure and automated quality validation.
@@ -1312,38 +1082,6 @@ See `doc/doc-specs.md` for complete documentation.
 
 ---
 
-### Rule Schema
-
-Rules are validated against a JSON Schema:
-
-```yaml
-- name: string          # Required: Friendly name for the rule
-  trigger: string|array # Required: Glob pattern(s) for triggering files
-  safety: string|array  # Optional: Glob pattern(s) for safety files
-  instructions: string  # Required (unless instructions_file): What to do
-  instructions_file: string  # Alternative: Path to instructions file
-```
-
-### Defining Rules
-
-Use the `/deepwork_rules.define` command to interactively create rules:
-
-```
-User: /deepwork_rules.define
-
-Claude: I'll help you define a new rule. What guideline or constraint
-        should this rule enforce?
-
-User: When API code changes, the API documentation should be updated
-
-Claude: Got it. Let me ask a few questions...
-        [Interactive dialog to define trigger, safety, and instructions]
-
-Claude: Created rule "API documentation update" in .deepwork/rules/api-documentation.md
-```
-
----
-
 ## Technical Decisions
 
 ### Language: Python 3.11+
diff --git a/doc/rules_syntax.md b/doc/rules_syntax.md
deleted file mode 100644
index 2ab86be1..00000000
--- a/doc/rules_syntax.md
+++ /dev/null
@@ -1,687 +0,0 @@
-# Rules Configuration Syntax
-
-This document describes the syntax for rule files in the `.deepwork/rules/` directory.
-
-## Directory Structure
-
-Rules are stored as individual markdown files with YAML frontmatter:
-
-```
-.deepwork/
-└── rules/
-    ├── readme-accuracy.md
-    ├── source-test-pairing.md
-    ├── api-documentation.md
-    └── python-formatting.md
-```
-
-Each file has:
-- **Frontmatter**: YAML configuration between `---` delimiters
-- **Body**: Instructions (for prompt actions) or description (for command actions)
-
-This structure enables code files to reference rules:
-```python
-# Read the rule `.deepwork/rules/source-test-pairing.md` before editing
-class AuthService:
-    ...
-```
-
-## Quick Reference
-
-### Simple Trigger with Prompt
-
-`.deepwork/rules/readme-accuracy.md`:
-```markdown
----
-name: README Accuracy
-trigger: src/**/*
-safety: README.md
-compare_to: base
----
-Source code changed. Please verify README.md is accurate.
-
-Check that:
-- All public APIs are documented
-- Examples are up to date
-- Installation instructions are correct
-```
-
-### Correspondence Set (bidirectional)
-
-`.deepwork/rules/source-test-pairing.md`:
-```markdown
----
-name: Source/Test Pairing
-set:
-  - src/{path}.py
-  - tests/{path}_test.py
-compare_to: base
----
-Source and test files should change together.
-
-When modifying source code, ensure corresponding tests are updated.
-When adding tests, ensure they test actual source code.
-```
-
-### Correspondence Pair (directional)
-
-`.deepwork/rules/api-documentation.md`:
-```markdown
----
-name: API Documentation
-pair:
-  trigger: api/{path}.py
-  expects: docs/api/{path}.md
-compare_to: base
----
-API changes require documentation updates.
-
-When modifying an API endpoint, update its documentation to reflect:
-- Parameter changes
-- Response format changes
-- New error conditions
-```
-
-### Command Action
-
-`.deepwork/rules/python-formatting.md`:
-```markdown
----
-name: Python Formatting
-trigger: "**/*.py"
-action:
-  command: ruff format {file}
-compare_to: prompt
----
-Automatically formats Python files using ruff.
-
-This rule runs `ruff format` on any changed Python files to ensure
-consistent code style across the codebase.
-```
-
-### Created Mode (file creation trigger)
-
-`.deepwork/rules/new-module-docs.md`:
-```markdown
----
-name: New Module Documentation
-created: src/**/*.py
----
-A new Python module was created. Please ensure:
-
-- Add module docstring explaining the purpose
-- Update relevant documentation if adding a public API
-- Consider adding tests for the new module
-```
-
-## Rule Structure
-
-Every rule has two orthogonal aspects:
-
-### Detection Mode
-
-How the rule decides when to fire:
-
-| Mode | Field | Description |
-|------|-------|-------------|
-| **Trigger/Safety** | `trigger`, `safety` | Fire when trigger matches and safety doesn't |
-| **Set** | `set` | Fire when file correspondence is incomplete (bidirectional) |
-| **Pair** | `pair` | Fire when file correspondence is incomplete (directional) |
-| **Created** | `created` | Fire when newly created files match patterns |
-
-### Action Type
-
-What happens when the rule fires:
-
-| Type | Field | Description |
-|------|-------|-------------|
-| **Prompt** (default) | (markdown body) | Show instructions to the agent |
-| **Command** | `action.command` | Run an idempotent command |
-
-## Detection Modes
-
-### Trigger/Safety Mode
-
-The simplest detection mode. Fires when changed files match `trigger` patterns and no changed files match `safety` patterns.
-
-```yaml
----
-name: Security Review
-trigger:
-  - src/auth/**/*
-  - src/crypto/**/*
-safety: SECURITY.md
-compare_to: base
----
-```
-
-### Set Mode (Bidirectional Correspondence)
-
-Defines files that should change together. If ANY file in a correspondence group changes, ALL related files should also change.
-
-```yaml
----
-name: Source/Test Pairing
-set:
-  - src/{path}.py
-  - tests/{path}_test.py
-compare_to: base
----
-```
-
-**How it works:**
-
-1. A file changes that matches one pattern in the set
-2. System extracts the variable portions (e.g., `{path}`)
-3. System generates expected files by substituting into other patterns
-4. If ALL expected files also changed: rule is satisfied (no trigger)
-5. If ANY expected file is missing: rule fires
-
-If `src/auth/login.py` changes:
-- Extracts `{path}` = `auth/login`
-- Expects `tests/auth/login_test.py` to also change
-- If test didn't change, fires with instructions
-
-If `tests/auth/login_test.py` changes:
-- Extracts `{path}` = `auth/login`
-- Expects `src/auth/login.py` to also change
-- If source didn't change, fires with instructions
-
-### Pair Mode (Directional Correspondence)
-
-Defines directional relationships. Changes to trigger files require corresponding expected files to change, but not vice versa.
-
-```yaml
----
-name: API Documentation
-pair:
-  trigger: api/{module}/{name}.py
-  expects: docs/api/{module}/{name}.md
-compare_to: base
----
-```
-
-Can specify multiple expected patterns:
-
-```yaml
----
-name: API Documentation
-pair:
-  trigger: api/{path}.py
-  expects:
-    - docs/api/{path}.md
-    - schemas/{path}.json
-compare_to: base
----
-```
-
-If `api/users/create.py` changes:
-- Expects `docs/api/users/create.md` to also change
-- If doc didn't change, fires with instructions
-
-If `docs/api/users/create.md` changes alone:
-- No trigger (documentation can be updated independently)
-
-### Created Mode (File Creation Detection)
-
-Fires only when files are newly created (not modified). Useful for enforcing standards on new files.
-
-```yaml
----
-name: New Component Documentation
-created:
-  - src/components/**/*.tsx
-  - src/components/**/*.ts
----
-```
-
-**How it works:**
-
-1. A file is created that matches a `created` pattern
-2. Rule fires with instructions
-
-Key differences from Trigger/Safety mode:
-- Only fires for **new** files, not modifications to existing files
-- No safety patterns (use Trigger/Safety mode if you need safety)
-- Good for enforcing documentation, tests, or standards on new code
-
-**Examples:**
-
-```yaml
-# Single pattern
-created: src/api/**/*.py
-
-# Multiple patterns
-created:
-  - src/models/**/*.py
-  - src/services/**/*.py
-```
-
-If a new file `src/api/users.py` is created:
-- Rule fires with instructions for new API modules
-
-If an existing file `src/api/users.py` is modified:
-- Rule does NOT fire (file already existed)
-
-## Action Types
-
-### Prompt Action (Default)
-
-The markdown body after frontmatter serves as instructions shown to the agent. This is the default when no `action` field is specified.
-
-**Template Variables in Instructions:**
-
-| Variable | Description |
-|----------|-------------|
-| `{trigger_file}` | The file that triggered the rule |
-| `{trigger_files}` | All files that matched trigger patterns |
-| `{expected_files}` | Expected corresponding files (for sets/pairs) |
-
-### Command Action
-
-Runs an idempotent command instead of prompting the agent.
-
-```yaml
----
-name: Python Formatting
-trigger: "**/*.py"
-safety: "*.pyi"
-action:
-  command: ruff format {file}
-  run_for: each_match
-compare_to: prompt
----
-```
-
-**Template Variables in Commands:**
-
-| Variable | Description | Available When |
-|----------|-------------|----------------|
-| `{file}` | Single file path | `run_for: each_match` |
-| `{files}` | Space-separated file paths | `run_for: all_matches` |
-| `{repo_root}` | Repository root directory | Always |
-
-**Idempotency Requirement:**
-
-Commands should be idempotent--running them multiple times produces the same result. Lint formatters like `black`, `ruff format`, and `prettier` are good examples: they produce consistent output regardless of how many times they run.
-
-## Pattern Syntax
-
-### Basic Glob Patterns
-
-Standard glob patterns work in `trigger` and `safety` fields:
-
-| Pattern | Matches |
-|---------|---------|
-| `*.py` | Python files in current directory |
-| `**/*.py` | Python files in any directory |
-| `src/**/*` | All files under src/ |
-| `test_*.py` | Files starting with `test_` |
-| `*.{js,ts}` | JavaScript and TypeScript files |
-
-### Variable Patterns
-
-Variable patterns use `{name}` syntax to capture path segments:
-
-| Pattern | Captures | Example Match |
-|---------|----------|---------------|
-| `src/{path}.py` | `{path}` = multi-segment path | `src/foo/bar.py` -> `path=foo/bar` |
-| `src/{name}.py` | `{name}` = single segment | `src/utils.py` -> `name=utils` |
-| `{module}/{name}.py` | Both variables | `auth/login.py` -> `module=auth, name=login` |
-
-**Variable Naming Conventions:**
-
-- `{path}` - Conventional name for multi-segment captures (`**/*`)
-- `{name}` - Conventional name for single-segment captures (`*`)
-- Custom names allowed: `{module}`, `{component}`, etc.
-
-**Multi-Segment vs Single-Segment:**
-
-By default, `{path}` matches multiple path segments and `{name}` matches one:
-
-```yaml
-# {path} matches: foo, foo/bar, foo/bar/baz
-- "src/{path}.py"  # src/foo.py, src/foo/bar.py, src/a/b/c.py
-
-# {name} matches only single segment
-- "src/{name}.py"  # src/foo.py (NOT src/foo/bar.py)
-```
-
-To explicitly control this, use `{**name}` for multi-segment or `{*name}` for single:
-
-```yaml
-- "src/{**module}/index.py"   # src/foo/bar/index.py -> module=foo/bar
-- "src/{*component}.py"       # src/Button.py -> component=Button
-```
-
-## Field Reference
-
-### name (required)
-
-Human-friendly name for the rule. Displayed in promise tags and output.
-
-```yaml
----
-name: Source/Test Pairing
----
-```
-
-### File Naming
-
-Rule files are named using kebab-case with `.md` extension:
-- `readme-accuracy.md`
-- `source-test-pairing.md`
-- `api-documentation.md`
-
-The filename serves as the rule's identifier in the queue system.
-
-### trigger
-
-File patterns that cause the rule to fire (trigger/safety mode). Can be string or array.
-
-```yaml
----
-trigger: src/**/*.py
----
-
----
-trigger:
-  - src/**/*.py
-  - lib/**/*.py
----
-```
-
-### safety (optional)
-
-File patterns that suppress the rule. If ANY changed file matches a safety pattern, the rule does not fire.
-
-```yaml
----
-safety: CHANGELOG.md
----
-
----
-safety:
-  - CHANGELOG.md
-  - docs/**/*
----
-```
-
-### set
-
-List of patterns defining bidirectional file relationships (set mode).
-
-```yaml
----
-set:
-  - src/{path}.py
-  - tests/{path}_test.py
----
-```
-
-### pair
-
-Object with `trigger` and `expects` patterns for directional relationships (pair mode).
-
-```yaml
----
-pair:
-  trigger: api/{path}.py
-  expects: docs/api/{path}.md
----
-
----
-pair:
-  trigger: api/{path}.py
-  expects:
-    - docs/api/{path}.md
-    - schemas/{path}.json
----
-```
-
-### created
-
-File patterns that trigger when files are newly created (created mode). Only fires for new files, not modifications. Can be string or array.
-
-```yaml
----
-created: src/**/*.py
----
-
----
-created:
-  - src/**/*.py
-  - lib/**/*.py
----
-```
-
-### action (optional)
-
-Specifies a command to run instead of prompting.
-
-```yaml
----
-action:
-  command: ruff format {file}
-  run_for: each_match  # or all_matches
----
-```
-
-### compare_to (required)
-
-Determines the baseline for detecting file changes.
-
-| Value | Description |
-|-------|-------------|
-| `base` | Compare to merge-base with default branch |
-| `default_tip` | Compare to current tip of default branch |
-| `prompt` | Compare to state at last prompt submission |
-
-```yaml
----
-compare_to: base
----
-```
-
-## Complete Examples
-
-### Example 1: Test Coverage Rule
-
-`.deepwork/rules/test-coverage.md`:
-```markdown
----
-name: Test Coverage
-set:
-  - src/{path}.py
-  - tests/{path}_test.py
-compare_to: base
----
-Source code was modified without corresponding test updates.
-
-Modified source: {trigger_file}
-Expected test: {expected_files}
-
-Please either:
-1. Add/update tests for the changed code
-2. Explain why tests are not needed
-```
-
-### Example 2: Documentation Sync
-
-`.deepwork/rules/api-documentation-sync.md`:
-```markdown
----
-name: API Documentation Sync
-pair:
-  trigger: src/api/{module}/{endpoint}.py
-  expects:
-    - docs/api/{module}/{endpoint}.md
-    - openapi/{module}.yaml
-compare_to: base
----
-API endpoint changed. Please update:
-- Documentation: {expected_files}
-- Ensure OpenAPI spec is current
-```
-
-### Example 3: Auto-formatting Pipeline
-
-`.deepwork/rules/python-black-formatting.md`:
-```markdown
----
-name: Python Black Formatting
-trigger: "**/*.py"
-safety:
-  - "**/*.pyi"
-  - "**/migrations/**"
-action:
-  command: black {file}
-  run_for: each_match
-compare_to: prompt
----
-Formats Python files using Black.
-
-Excludes:
-- Type stub files (*.pyi)
-- Database migration files
-```
-
-### Example 4: Multi-file Correspondence
-
-`.deepwork/rules/full-stack-feature-sync.md`:
-```markdown
----
-name: Full Stack Feature Sync
-set:
-  - backend/api/{feature}/routes.py
-  - backend/api/{feature}/models.py
-  - frontend/src/api/{feature}.ts
-  - frontend/src/components/{feature}/**/*
-compare_to: base
----
-Feature files should be updated together across the stack.
-
-When modifying a feature, ensure:
-- Backend routes are updated
-- Backend models are updated
-- Frontend API client is updated
-- Frontend components are updated
-```
-
-### Example 5: Conditional Safety
-
-`.deepwork/rules/version-bump-required.md`:
-```markdown
----
-name: Version Bump Required
-trigger:
-  - src/**/*.py
-  - pyproject.toml
-safety:
-  - pyproject.toml
-  - CHANGELOG.md
-compare_to: base
----
-Code changes detected. Before merging, ensure:
-- Version is bumped in pyproject.toml (if needed)
-- CHANGELOG.md is updated
-
-This rule is suppressed if you've already modified pyproject.toml
-or CHANGELOG.md, as that indicates you're handling versioning.
-```
-
-### Example 6: New File Standards (Created Mode)
-
-`.deepwork/rules/new-module-standards.md`:
-```markdown
----
-name: New Module Standards
-created:
-  - src/**/*.py
-  - lib/**/*.py
----
-A new Python module was created. Please ensure it follows our standards:
-
-1. **Module docstring**: Add a docstring at the top explaining the module's purpose
-2. **Type hints**: Use type hints for all function parameters and return values
-3. **Tests**: Create a corresponding test file in tests/
-4. **Imports**: Follow the import order (stdlib, third-party, local)
-
-This rule only fires for newly created files, not modifications.
-```
-
-### Example 7: New Component Checklist (Created Mode with Command)
-
-`.deepwork/rules/new-component-lint.md`:
-```markdown
----
-name: New Component Lint
-created: src/components/**/*.tsx
-action:
-  command: eslint --fix {file}
----
-Automatically lints newly created React components.
-```
-
-## Promise Tags
-
-When a rule fires but should be dismissed, use promise tags in the conversation. The tag content should be human-readable, using the rule's `name` field:
-
-```
-<promise>Source/Test Pairing</promise>
-<promise>API Documentation Sync</promise>
-```
-
-The friendly name makes promise tags easy to read when displayed in the conversation. The system matches promise tags to rules using case-insensitive comparison of the `name` field.
-
-## Validation
-
-Rule files are validated on load. Common errors:
-
-**Invalid frontmatter:**
-```
-Error: .deepwork/rules/my-rule.md - invalid YAML frontmatter
-```
-
-**Missing required field:**
-```
-Error: .deepwork/rules/my-rule.md - must have 'trigger', 'set', 'pair', or 'created'
-```
-
-**Invalid pattern:**
-```
-Error: .deepwork/rules/test-coverage.md - invalid pattern "src/{path" - unclosed brace
-```
-
-**Conflicting fields:**
-```
-Error: .deepwork/rules/my-rule.md - has both 'trigger' and 'set' - use one or the other
-```
-
-**Empty body:**
-```
-Error: .deepwork/rules/my-rule.md - instruction rules require markdown body
-```
-
-## Referencing Rules in Code
-
-A key benefit of the `.deepwork/rules/` folder structure is that code files can reference rules directly:
-
-```python
-# Read `.deepwork/rules/source-test-pairing.md` before editing this file
-
-class UserService:
-    """Service for user management."""
-    pass
-```
-
-```typescript
-// This file is governed by `.deepwork/rules/api-documentation.md`
-// Any changes here require corresponding documentation updates
-
-export async function createUser(data: UserInput): Promise<User> {
-    // ...
-}
-```
-
-This helps AI agents and human developers understand which rules apply to specific files.
diff --git a/doc/rules_system_design.md b/doc/rules_system_design.md
deleted file mode 100644
index 8fbf42b5..00000000
--- a/doc/rules_system_design.md
+++ /dev/null
@@ -1,569 +0,0 @@
-# Rules System Design
-
-## Overview
-
-The deepwork rules system enables automated enforcement of development standards during AI-assisted coding sessions. This document describes the architecture for the next-generation rules system with support for:
-
-1. **File correspondence matching** (sets and pairs)
-2. **Idempotent command execution**
-3. **Stateful evaluation with queue-based processing**
-4. **Efficient agent output management**
-
-## Core Concepts
-
-### Rule Structure
-
-Every rule has two orthogonal aspects:
-
-**Detection Mode** - How the rule decides when to fire:
-
-| Mode | Field | Description |
-|------|-------|-------------|
-| **Trigger/Safety** | `trigger`, `safety` | Fire when trigger matches and safety doesn't |
-| **Set** | `set` | Fire when file correspondence is incomplete (bidirectional) |
-| **Pair** | `pair` | Fire when file correspondence is incomplete (directional) |
-| **Created** | `created` | Fire when newly created files match patterns |
-
-**Action Type** - What happens when the rule fires:
-
-| Type | Field | Description |
-|------|-------|-------------|
-| **Prompt** (default) | (markdown body) | Show instructions to the agent |
-| **Command** | `action.command` | Run an idempotent command |
-
-### Detection Modes
-
-**Trigger/Safety Mode**
-- Simplest mode: fire when files match `trigger` and none match `safety`
-- Good for general checks like "source changed, verify README"
-
-**Set Mode (Bidirectional Correspondence)**
-- Define N patterns that share a common variable path
-- If ANY file matching one pattern changes, ALL corresponding files should change
-- Example: Source files and their tests
-
-**Pair Mode (Directional Correspondence)**
-- Define a trigger pattern and one or more expected patterns
-- Changes to trigger files require corresponding expected files to also change
-- Changes to expected files alone do not trigger the rule
-- Example: API code requires documentation updates
-
-**Created Mode (File Creation Detection)**
-- Define patterns for newly created files
-- Only fires when files are created, not when existing files are modified
-- Useful for enforcing standards on new code (documentation, tests, etc.)
-- Example: New modules require documentation and tests
-
-### Pattern Variables
-
-Patterns use `{name}` syntax for capturing variable path segments:
-
-```
-src/{path}.py          # {path} captures everything between src/ and .py
-tests/{path}_test.py   # {path} must match the same value
-```
-
-Special variable names:
-- `{path}` - Matches any path segments (equivalent to `**/*`)
-- `{name}` - Matches a single path segment (equivalent to `*`)
-- `{**}` - Explicit multi-segment wildcard
-- `{*}` - Explicit single-segment wildcard
-
-### Action Types
-
-**Prompt Action (default)**
-The markdown body of the rule file serves as instructions shown to the agent.
-
-**Command Action**
-```yaml
-action:
-  command: "ruff format {file}"
-  run_for: each_match
-```
-
-Command actions should be idempotent—running them multiple times produces the same result. Lint formatters like `black`, `ruff format`, and `prettier` are good examples.
-
-## Architecture
-
-### Component Overview
-
-```
-┌─────────────────────────────────────────────────────────────────┐
-│                        Rules System                              │
-├─────────────────────────────────────────────────────────────────┤
-│                                                                  │
-│  ┌──────────────┐    ┌──────────────┐    ┌──────────────┐      │
-│  │   Detector   │───▶│    Queue     │◀───│  Evaluator   │      │
-│  │              │    │              │    │              │      │
-│  │ - Watch files│    │ .deepwork/   │    │ - Process    │      │
-│  │ - Match rules│    │ tmp/rules/   │    │   queued     │      │
-│  │ - Create     │    │ queue/       │    │ - Run action │      │
-│  │   entries    │    │              │    │ - Update     │      │
-│  └──────────────┘    └──────────────┘    │   status     │      │
-│                                          └──────────────┘      │
-│                                                                  │
-│  ┌──────────────┐    ┌──────────────┐                          │
-│  │   Matcher    │    │   Resolver   │                          │
-│  │              │    │              │                          │
-│  │ - Pattern    │    │ - Variable   │                          │
-│  │   matching   │    │   extraction │                          │
-│  │ - Glob       │    │ - Path       │                          │
-│  │   expansion  │    │   generation │                          │
-│  └──────────────┘    └──────────────┘                          │
-│                                                                  │
-└─────────────────────────────────────────────────────────────────┘
-```
-
-### Detector
-
-The detector identifies when rules should be evaluated:
-
-1. **Trigger Detection**: Monitors for file changes that match rule triggers
-2. **Deduplication**: Computes a hash to avoid re-processing identical triggers
-3. **Queue Entry Creation**: Creates entries for the evaluator to process
-
-**Trigger Hash Computation**:
-```python
-hash_input = f"{rule_name}:{sorted(trigger_files)}:{baseline_ref}"
-trigger_hash = sha256(hash_input.encode()).hexdigest()[:12]
-```
-
-The baseline_ref varies by `compare_to` mode:
-- `base`: merge-base commit hash
-- `default_tip`: remote tip commit hash
-- `prompt`: timestamp of last prompt submission
-
-### Queue
-
-The queue persists rule trigger state in `.deepwork/tmp/rules/queue/`:
-
-```
-.deepwork/tmp/rules/queue/
-├── {hash}.queued.json      # Detected, awaiting evaluation
-├── {hash}.passed.json      # Evaluated, rule satisfied
-├── {hash}.failed.json      # Evaluated, rule not satisfied
-└── {hash}.skipped.json     # Safety pattern matched, skipped
-```
-
-**Queue Entry Schema**:
-```json
-{
-  "rule_name": "string",
-  "trigger_hash": "string",
-  "status": "queued|passed|failed|skipped",
-  "created_at": "ISO8601 timestamp",
-  "evaluated_at": "ISO8601 timestamp or null",
-  "baseline_ref": "string",
-  "trigger_files": ["array", "of", "files"],
-  "expected_files": ["array", "of", "files"],
-  "matched_files": ["array", "of", "files"],
-  "action_result": {
-    "type": "prompt|command",
-    "output": "string or null",
-    "exit_code": "number or null"
-  }
-}
-```
-
-**Queue Cleanup**:
-Since `.deepwork/tmp/` is gitignored, queue entries are transient local state. No aggressive cleanup is required—entries can accumulate without causing issues. The directory can be safely deleted at any time to reset state.
-
-### Evaluator
-
-The evaluator processes queued entries:
-
-1. **Load Entry**: Read queued entry from disk
-2. **Verify Still Relevant**: Re-check that trigger conditions still apply
-3. **Execute Action**:
-   - For prompts: Format message and return to hook system
-   - For commands: Execute command, verify idempotency
-4. **Update Status**: Mark as passed, failed, or skipped
-5. **Report Results**: Return appropriate response to caller
-
-### Matcher
-
-Pattern matching with variable extraction:
-
-**Algorithm**:
-```python
-def match_pattern(pattern: str, filepath: str) -> dict[str, str] | None:
-    """
-    Match filepath against pattern, extracting variables.
-
-    Returns dict of {variable_name: captured_value} or None if no match.
-    """
-    # Convert pattern to regex with named groups
-    # {path} -> (?P<path>.+)
-    # {name} -> (?P<name>[^/]+)
-    # Literal parts are escaped
-    regex = pattern_to_regex(pattern)
-    match = re.fullmatch(regex, filepath)
-    if match:
-        return match.groupdict()
-    return None
-```
-
-**Pattern Compilation**:
-```python
-def pattern_to_regex(pattern: str) -> str:
-    """Convert pattern with {var} placeholders to regex."""
-    result = []
-    for segment in parse_pattern(pattern):
-        if segment.is_variable:
-            if segment.name in ('path', '**'):
-                result.append(f'(?P<{segment.name}>.+)')
-            else:
-                result.append(f'(?P<{segment.name}>[^/]+)')
-        else:
-            result.append(re.escape(segment.value))
-    return ''.join(result)
-```
-
-### Resolver
-
-Generates expected filepaths from patterns and captured variables:
-
-```python
-def resolve_pattern(pattern: str, variables: dict[str, str]) -> str:
-    """
-    Substitute variables into pattern to generate filepath.
-
-    Example:
-        resolve_pattern("tests/{path}_test.py", {"path": "foo/bar"})
-        -> "tests/foo/bar_test.py"
-    """
-    result = pattern
-    for name, value in variables.items():
-        result = result.replace(f'{{{name}}}', value)
-    return result
-```
-
-## Evaluation Flow
-
-### Standard Instruction Rule
-
-```
-1. Detector: File changes detected
-2. Detector: Check each rule's trigger patterns
-3. Detector: For matching rule, compute trigger hash
-4. Detector: If hash not in queue, create .queued entry
-5. Evaluator: Process queued entry
-6. Evaluator: Check safety patterns against changed files
-7. Evaluator: If safety matches, mark .skipped
-8. Evaluator: If no safety match, return instructions to agent
-9. Agent: Addresses rule, includes <promise> tag
-10. Evaluator: On next check, mark .passed (promise found)
-```
-
-### Correspondence Rule (Set)
-
-```
-1. Detector: File src/foo/bar.py changed
-2. Matcher: Matches pattern "src/{path}.py" with {path}="foo/bar"
-3. Resolver: Generate expected files from other patterns:
-   - "tests/{path}_test.py" -> "tests/foo/bar_test.py"
-4. Detector: Check if tests/foo/bar_test.py also changed
-5. Detector: If yes, mark .skipped (correspondence satisfied)
-6. Detector: If no, create .queued entry
-7. Evaluator: Return instructions prompting for test update
-```
-
-### Correspondence Rule (Pair)
-
-```
-1. Detector: File api/users.py changed (trigger pattern)
-2. Matcher: Matches "api/{path}.py" with {path}="users"
-3. Resolver: Generate expected: "docs/api/users.md"
-4. Detector: Check if docs/api/users.md also changed
-5. Detector: If yes, mark .skipped
-6. Detector: If no, create .queued entry
-7. Evaluator: Return instructions
-
-Note: If only docs/api/users.md changed (not api/users.py),
-the pair rule does NOT trigger (directional).
-```
-
-### Command Rule
-
-```
-1. Detector: Python file changed, matches "**/*.py"
-2. Detector: Create .queued entry for format rule
-3. Evaluator: Execute "ruff format {file}"
-4. Evaluator: Run git diff to check for changes
-5. Evaluator: If changes made, re-run command (idempotency check)
-6. Evaluator: If no additional changes, mark .passed
-7. Evaluator: If changes keep occurring, mark .failed, alert user
-```
-
-### Created Rule
-
-```
-1. Detector: New file created, matches "src/**/*.py" created pattern
-2. Detector: Verify file is newly created (not just modified)
-3. Detector: Create .queued entry for new file rule
-4. Evaluator: Return instructions for new file standards
-5. Agent: Addresses rule, includes <promise> tag
-6. Evaluator: On next check, mark .passed (promise found)
-```
-
-Note: Created mode uses separate file detection to distinguish newly
-created files from modified files. Untracked files and files added
-since the baseline are considered "created".
-
-## Agent Output Management
-
-### Problem
-
-When many rules trigger, the agent receives excessive output, degrading performance.
-
-### Solution
-
-**1. Output Batching**
-Group related rules into concise sections:
-
-```
-The following rules require attention:
-
-## Source/Test Pairing
-src/auth/login.py → tests/auth/login_test.py
-src/api/users.py → tests/api/users_test.py
-
-## API Documentation
-api/users.py → docs/api/users.md
-
-## README Accuracy
-Source files changed. Verify README.md is accurate.
-```
-
-**2. Grouped by Rule Name**
-Multiple violations of the same rule are grouped together under a single heading, keeping output compact.
-
-**3. Minimal Decoration**
-Avoid excessive formatting, numbering, or emphasis. Use simple arrow notation for correspondence violations.
-
-## State Persistence
-
-### Directory Structure
-
-```
-.deepwork/
-├── rules/                   # Rule definitions (frontmatter markdown)
-│   ├── readme-accuracy.md
-│   ├── source-test-pairing.md
-│   ├── api-documentation.md
-│   └── python-formatting.md
-├── tmp/                     # GITIGNORED - transient state
-│   └── rules/
-│       ├── queue/           # Queue entries
-│       │   ├── abc123.queued.json
-│       │   └── def456.passed.json
-│       ├── baselines/       # Cached baseline states
-│       │   └── prompt_1705420800.json
-│       └── cache/           # Pattern matching cache
-│           └── patterns.json
-└── rules_state.json         # Session state summary
-```
-
-**Important:** The entire `.deepwork/tmp/` directory is gitignored. All queue entries, baselines, and caches are local transient state that is not committed. This means cleanup is not critical—files can accumulate and will be naturally cleaned when the directory is deleted or the repo is re-cloned.
-
-### Rule File Format
-
-Each rule is a markdown file with YAML frontmatter:
-
-```markdown
----
-name: README Accuracy
-trigger: src/**/*.py
-safety: README.md
----
-Instructions shown to the agent when this rule fires.
-
-These can be multi-line with full markdown formatting.
-```
-
-This format enables:
-1. Code files to reference rules in comments
-2. Human-readable rule documentation
-3. Easy editing with any markdown editor
-4. Clear separation of configuration and content
-
-### Baseline Management
-
-For `compare_to: prompt`, baselines are captured at prompt submission:
-
-```json
-{
-  "timestamp": "2024-01-16T12:00:00Z",
-  "commit": "abc123",
-  "staged_files": ["file1.py", "file2.py"],
-  "untracked_files": ["file3.py"]
-}
-```
-
-Multiple baselines can exist for different prompts in a session.
-
-### Queue Lifecycle
-
-```
-                  ┌─────────┐
-                  │ Created │
-                  │ .queued │
-                  └────┬────┘
-                       │
-         ┌─────────────┼─────────────┐
-         │             │             │
-         ▼             ▼             ▼
-    ┌─────────┐   ┌─────────┐   ┌─────────┐
-    │ .passed │   │ .failed │   │.skipped │
-    └─────────┘   └─────────┘   └─────────┘
-```
-
-Terminal states persist in `.deepwork/tmp/` (gitignored) until manually cleared or the directory is deleted.
-
-## Error Handling
-
-### Pattern Errors
-
-Invalid patterns are caught at rule load time:
-
-```python
-class PatternError(RulesError):
-    """Invalid pattern syntax."""
-    pass
-
-# Validation
-def validate_pattern(pattern: str) -> None:
-    # Check for unbalanced braces
-    # Check for invalid variable names
-    # Check for unsupported syntax
-```
-
-### Command Errors
-
-Command execution errors are captured and reported:
-
-```json
-{
-  "status": "failed",
-  "action_result": {
-    "type": "command",
-    "command": "ruff format {file}",
-    "exit_code": 1,
-    "stdout": "",
-    "stderr": "error: invalid syntax in foo.py:10"
-  }
-}
-```
-
-### Queue Corruption
-
-If queue entries become corrupted:
-1. Log error with entry details
-2. Remove corrupted entry
-3. Re-detect triggers on next evaluation
-
-## Configuration
-
-### Rule Files
-
-Rules are stored in `.deepwork/rules/` as individual markdown files with YAML frontmatter. See `doc/rules_syntax.md` for complete syntax documentation.
-
-**Loading Order:**
-1. All `.md` files in `.deepwork/rules/` are loaded
-2. Files are processed in alphabetical order
-3. Filename (without extension) becomes rule identifier
-
-**Rule Discovery:**
-```python
-def load_rules(rules_dir: Path) -> list[Rule]:
-    """Load all rules from the rules directory."""
-    rules = []
-    for path in sorted(rules_dir.glob("*.md")):
-        rule = parse_rule_file(path)
-        rule.name = path.stem  # filename without .md
-        rules.append(rule)
-    return rules
-```
-
-### System Configuration
-
-In `.deepwork/config.yml`:
-
-```yaml
-rules:
-  enabled: true
-  rules_dir: .deepwork/rules  # Can be customized
-```
-
-## Performance Considerations
-
-### Caching
-
-- Pattern compilation is cached per-session
-- Baseline diffs are cached by commit hash
-- Queue lookups use hash-based O(1) access
-
-### Lazy Evaluation
-
-- Patterns only compiled when needed
-- File lists only computed for triggered rules
-- Instructions only loaded when rule fires
-
-### Parallel Processing
-
-- Multiple queue entries can be processed in parallel
-- Command actions can run concurrently (with file locking)
-- Pattern matching is parallelized across rules
-
-## Migration from Legacy System
-
-The legacy system used a single `.deepwork.rules.yml` file with array of rules. The new system uses individual markdown files in `.deepwork/rules/`.
-
-**Breaking Changes:**
-- Single YAML file replaced with folder of markdown files
-- Rule `name` field replaced with filename
-- `instructions` / `instructions_file` replaced with markdown body
-- New features: sets, pairs, commands, queue-based state
-
-**No backwards compatibility is provided.** Existing `.deepwork.rules.yml` files must be converted manually.
-
-**Conversion Example:**
-
-Old format (`.deepwork.rules.yml`):
-```yaml
-- name: "README Accuracy"
-  trigger: "src/**/*"
-  safety: "README.md"
-  instructions: |
-    Please verify README.md is accurate.
-```
-
-New format (`.deepwork/rules/readme-accuracy.md`):
-```markdown
----
-trigger: src/**/*
-safety: README.md
----
-Please verify README.md is accurate.
-```
-
-## Security Considerations
-
-### Command Execution
-
-- Commands run in sandboxed subprocess
-- No shell expansion (arguments passed as array)
-- Working directory is always repo root
-- Environment variables are filtered
-
-### Queue File Permissions
-
-- Queue directory: 700 (owner only)
-- Queue files: 600 (owner only)
-- No sensitive data in queue entries
-
-### Input Validation
-
-- All rule files validated against schema
-- Pattern variables sanitized before use
-- File paths normalized and validated
diff --git a/manual_tests/README.md b/manual_tests/README.md
deleted file mode 100644
index 30e67849..00000000
--- a/manual_tests/README.md
+++ /dev/null
@@ -1,43 +0,0 @@
-# Manual Hook/Rule Tests
-
-This directory contains files designed to test different types of DeepWork rules/hooks.
-
-## How to Run These Tests
-
-**Use the `/manual_tests` job to run these tests.**
-
-```
-/manual_tests
-```
-
-This job automates the test execution process, ensuring:
-1. All tests run in **sub-agents** (required for hooks to fire automatically)
-2. "Should NOT fire" tests run in **parallel** for efficiency
-3. "Should fire" tests run **serially** with git reverts between each to prevent cross-contamination
-4. Hooks fire **automatically** when sub-agents complete (never manually triggered)
-
-## Why Use the Job?
-
-Running these tests correctly requires specific patterns:
-- **Sub-agents are mandatory** - the main agent cannot trigger hooks by editing files directly
-- **Hooks must fire automatically** - manually running `rules_check` defeats the purpose
-- **Serial execution with reverts** - "should fire" tests must not run in parallel
-
-The `/manual_tests` job enforces all these requirements and guides you through the process.
-
-## Test Folders
-
-| Folder | Rule Type |
-|--------|-----------|
-| `test_trigger_safety_mode/` | Basic trigger/safety conditional |
-| `test_set_mode/` | Bidirectional file pairing |
-| `test_pair_mode/` | One-way directional pairing |
-| `test_command_action/` | Automatic command execution |
-| `test_multi_safety/` | Multiple safety files |
-| `test_infinite_block_prompt/` | Infinite blocking with prompt |
-| `test_infinite_block_command/` | Infinite blocking with command |
-| `test_created_mode/` | New file creation detection |
-
-## Corresponding Rules
-
-Rules are defined in `.deepwork/rules/manual-test-*.md`
diff --git a/manual_tests/test_command_action/test_command_action.txt b/manual_tests/test_command_action/test_command_action.txt
deleted file mode 100644
index f32315ab..00000000
--- a/manual_tests/test_command_action/test_command_action.txt
+++ /dev/null
@@ -1,25 +0,0 @@
-MANUAL TEST: Command Action Rule
-
-=== WHAT THIS TESTS ===
-Tests the "command action" feature where a rule automatically
-runs a shell command instead of prompting the agent.
-
-=== HOW TO TRIGGER ===
-Edit this file (add text, modify content, etc.)
-
-=== EXPECTED BEHAVIOR ===
-When this file is edited, the rule automatically runs a command
-that appends a timestamped line to test_command_action_log.txt
-
-The command is idempotent: running it multiple times produces
-consistent results (a log entry is appended).
-
-=== RULE LOCATION ===
-.deepwork/rules/manual-test-command-action.md
-
-=== LOG FILE ===
-Check test_command_action_log.txt for command execution results.
-
----
-Edit below this line to trigger the command:
----
diff --git a/manual_tests/test_command_action/test_command_action_log.txt b/manual_tests/test_command_action/test_command_action_log.txt
deleted file mode 100644
index 1ca155ed..00000000
--- a/manual_tests/test_command_action/test_command_action_log.txt
+++ /dev/null
@@ -1,3 +0,0 @@
-# Command Action Log
-# Lines below are added automatically when test_command_action.txt is edited
-# ---
diff --git a/manual_tests/test_created_mode/existing_file.yml b/manual_tests/test_created_mode/existing_file.yml
deleted file mode 100644
index dec0e532..00000000
--- a/manual_tests/test_created_mode/existing_file.yml
+++ /dev/null
@@ -1 +0,0 @@
-# This is a modification test
diff --git a/manual_tests/test_infinite_block_command/test_infinite_block_command.py b/manual_tests/test_infinite_block_command/test_infinite_block_command.py
deleted file mode 100644
index 22be16c7..00000000
--- a/manual_tests/test_infinite_block_command/test_infinite_block_command.py
+++ /dev/null
@@ -1,42 +0,0 @@
-"""
-MANUAL TEST: Infinite Block Command Rule
-
-=== WHAT THIS TESTS ===
-Tests a COMMAND-type rule with a command that ALWAYS FAILS - it will ALWAYS
-block when the trigger file is edited.
-
-This verifies:
-1. The rule correctly blocks when the file is edited (command fails)
-2. The error output includes guidance on how to skip using a promise
-3. Without guidance in the output, the agent cannot know how to proceed
-
-=== TEST CASE 1: Rule SHOULD fire (command fails, infinite block) ===
-1. Edit this file (add a comment below the marker)
-2. Run: echo '{}' | python -m deepwork.hooks.rules_check
-3. Expected: Block with command error AND promise skip instructions
-
-=== TEST CASE 2: Rule should NOT fire (promise provided) ===
-1. Edit this file (add a comment below the marker)
-2. Provide a promise (format shown in command error output)
-3. Expected: Empty JSON {} (allow) - promise bypasses the command entirely
-
-=== RULE LOCATION ===
-.deepwork/rules/manual-test-infinite-block-command.md
-
-=== KEY DIFFERENCE FROM PROMPT VERSION ===
-- Prompt version: Shows instructions in the rule's markdown body
-- Command version: Must show instructions alongside command error output
-
-If the command error output does NOT include promise skip instructions,
-this is a bug - the agent has no way to know how to proceed.
-"""
-
-
-def restricted_command_operation():
-    """An operation that requires explicit acknowledgment to proceed."""
-    return "This operation uses a command that always fails"
-
-
-# Edit below this line to trigger the rule
-# -------------------------------------------
-# Test edit for command block
diff --git a/manual_tests/test_infinite_block_prompt/test_infinite_block_prompt.py b/manual_tests/test_infinite_block_prompt/test_infinite_block_prompt.py
deleted file mode 100644
index 5c2ee508..00000000
--- a/manual_tests/test_infinite_block_prompt/test_infinite_block_prompt.py
+++ /dev/null
@@ -1,57 +0,0 @@
-"""
-MANUAL TEST: Infinite Block Prompt Rule (Promise Required)
-
-=== WHAT THIS TESTS ===
-Tests a PROMPT-type rule with NO safety file option - it will ALWAYS block
-when the trigger file is edited. The only way to proceed is to provide a
-promise in the correct format.
-
-This verifies:
-1. The rule correctly blocks when the file is edited
-2. The promise mechanism works to bypass the block
-3. The promise must be in the exact format: <promise>Rule Name</promise>
-
-=== TEST CASE 1: Rule SHOULD fire (infinite block) ===
-1. Edit this file (add a comment below the marker)
-2. Run: echo '{}' | python -m deepwork.hooks.rules_check
-3. Expected: "Manual Test: Infinite Block Prompt" appears in output with decision="block"
-4. The block message should explain that a promise is required
-
-=== TEST CASE 2: Rule should NOT fire (promise provided) ===
-1. Edit this file (add a comment below the marker)
-2. Create a transcript with: <promise>Manual Test: Infinite Block Prompt</promise>
-3. Run the hook with the transcript
-4. Expected: Empty JSON {} (allow) - promise bypasses the block
-
-=== HOW TO TEST WITH PROMISE ===
-The promise must be in the conversation transcript. To test:
-
-1. Create a temp transcript file with the promise:
-   echo '{"role":"assistant","message":{"content":[{"type":"text","text":"<promise>Manual Test: Infinite Block Prompt</promise>"}]}}' > /tmp/transcript.jsonl
-
-2. Run with transcript:
-   echo '{"transcript_path":"/tmp/transcript.jsonl"}' | python -m deepwork.hooks.rules_check
-
-3. Expected: {} (empty JSON = allow)
-
-=== RULE LOCATION ===
-.deepwork/rules/manual-test-infinite-block-prompt.md
-
-=== KEY DIFFERENCE FROM OTHER TESTS ===
-Other tests have a "safety" file that can be edited to suppress the rule.
-This test has NO safety option - the ONLY way to proceed is with a promise.
-This simulates scenarios where the agent must explicitly acknowledge a
-constraint before proceeding.
-
-=== COMPARISON WITH COMMAND VERSION ===
-See test_infinite_block_command/ for the command-action version of this test.
-"""
-
-
-def restricted_operation():
-    """An operation that requires explicit acknowledgment to proceed."""
-    return "This operation always requires a promise to proceed"
-
-
-# Edit below this line to trigger the rule
-# -------------------------------------------
diff --git a/manual_tests/test_multi_safety/test_multi_safety.py b/manual_tests/test_multi_safety/test_multi_safety.py
deleted file mode 100644
index 27734025..00000000
--- a/manual_tests/test_multi_safety/test_multi_safety.py
+++ /dev/null
@@ -1,42 +0,0 @@
-"""
-MANUAL TEST: Multiple Safety Patterns
-
-=== WHAT THIS TESTS ===
-Tests trigger/safety mode with MULTIPLE safety patterns:
-- Rule fires when this file is edited alone
-- Rule is suppressed if ANY of the safety files are also edited:
-  - test_multi_safety_changelog.md
-  - test_multi_safety_version.txt
-
-=== TEST CASE 1: Rule SHOULD fire ===
-1. Edit this file (add a comment below the marker)
-2. Do NOT edit any safety files
-3. Run: echo '{}' | python -m deepwork.hooks.rules_check
-4. Expected: "Manual Test: Multi Safety" appears in output
-
-=== TEST CASE 2: Rule should NOT fire (changelog edited) ===
-1. Edit this file (add a comment below the marker)
-2. ALSO edit test_multi_safety_changelog.md
-3. Run: echo '{}' | python -m deepwork.hooks.rules_check
-4. Expected: "Manual Test: Multi Safety" does NOT appear
-
-=== TEST CASE 3: Rule should NOT fire (version edited) ===
-1. Edit this file (add a comment below the marker)
-2. ALSO edit test_multi_safety_version.txt
-3. Run: echo '{}' | python -m deepwork.hooks.rules_check
-4. Expected: "Manual Test: Multi Safety" does NOT appear
-
-=== RULE LOCATION ===
-.deepwork/rules/manual-test-multi-safety.md
-"""
-
-VERSION = "1.0.0"
-
-
-def get_version():
-    """Return the current version."""
-    return VERSION
-
-
-# Edit below this line to trigger the rule
-# -------------------------------------------
diff --git a/manual_tests/test_multi_safety/test_multi_safety_changelog.md b/manual_tests/test_multi_safety/test_multi_safety_changelog.md
deleted file mode 100644
index d0a6e4f9..00000000
--- a/manual_tests/test_multi_safety/test_multi_safety_changelog.md
+++ /dev/null
@@ -1,16 +0,0 @@
-# Changelog (Multi-Safety Test)
-
-## What This File Does
-
-This is one of the "safety" files for the multi-safety test.
-Editing this file suppresses the rule when the source is edited.
-
-## Changelog
-
-### v1.0.0
-- Initial release
-
----
-
-Edit below this line to suppress the multi-safety rule:
-<!-- Changes here -->
diff --git a/manual_tests/test_multi_safety/test_multi_safety_version.txt b/manual_tests/test_multi_safety/test_multi_safety_version.txt
deleted file mode 100644
index b9cf607d..00000000
--- a/manual_tests/test_multi_safety/test_multi_safety_version.txt
+++ /dev/null
@@ -1,10 +0,0 @@
-Multi-Safety Version File
-
-This is one of the "safety" files for the multi-safety test.
-Editing this file suppresses the rule when the source is edited.
-
-Current Version: 1.0.0
-
----
-Edit below this line to suppress the multi-safety rule:
----
diff --git a/manual_tests/test_pair_mode/test_pair_mode_expected.md b/manual_tests/test_pair_mode/test_pair_mode_expected.md
deleted file mode 100644
index b4f286bd..00000000
--- a/manual_tests/test_pair_mode/test_pair_mode_expected.md
+++ /dev/null
@@ -1,31 +0,0 @@
-# API Documentation (Pair Mode Expected File)
-
-## What This File Does
-
-This is the "expected" file in a pair mode rule.
-
-## Pair Mode Behavior
-
-- When `test_pair_mode_trigger.py` changes, this file MUST also change
-- When THIS file changes alone, NO rule fires (docs can update independently)
-
-## API Reference
-
-### `api_endpoint()`
-
-Returns a status response.
-
-**Returns:** `{"status": "ok", "message": "API response"}`
-
----
-
-## Testing Instructions
-
-1. To TRIGGER the rule: Edit only `test_pair_mode_trigger.py`
-2. To verify ONE-WAY: Edit only this file (rule should NOT fire)
-3. To SATISFY the rule: Edit both files together
-
----
-
-Edit below this line (editing here alone should NOT trigger the rule):
-<!-- Changes here -->
diff --git a/manual_tests/test_pair_mode/test_pair_mode_trigger.py b/manual_tests/test_pair_mode/test_pair_mode_trigger.py
deleted file mode 100644
index 369dd18a..00000000
--- a/manual_tests/test_pair_mode/test_pair_mode_trigger.py
+++ /dev/null
@@ -1,47 +0,0 @@
-"""
-MANUAL TEST: Pair Mode (Directional Correspondence)
-
-=== WHAT THIS TESTS ===
-Tests the "pair" detection mode where there's a ONE-WAY relationship:
-- This file is the TRIGGER
-- test_pair_mode_expected.md is the EXPECTED file
-- When THIS file changes, the expected file MUST also change
-- But the expected file CAN change independently (no rule fires)
-
-=== TEST CASE 1: Rule SHOULD fire ===
-1. Edit this file (add a comment below the marker)
-2. Do NOT edit test_pair_mode_expected.md
-3. Run: echo '{}' | python -m deepwork.hooks.rules_check
-4. Expected: "Manual Test: Pair Mode" appears in output
-
-=== TEST CASE 2: Rule should NOT fire (both edited) ===
-1. Edit this file (add a comment below the marker)
-2. ALSO edit test_pair_mode_expected.md
-3. Run: echo '{}' | python -m deepwork.hooks.rules_check
-4. Expected: "Manual Test: Pair Mode" does NOT appear
-
-=== TEST CASE 3: Rule should NOT fire (expected only) ===
-1. Do NOT edit this file
-2. Edit ONLY test_pair_mode_expected.md
-3. Run: echo '{}' | python -m deepwork.hooks.rules_check
-4. Expected: "Manual Test: Pair Mode" does NOT appear
-   (This verifies the ONE-WAY nature of pair mode)
-
-=== RULE LOCATION ===
-.deepwork/rules/manual-test-pair-mode.md
-"""
-
-
-def api_endpoint():
-    """
-    An API endpoint that requires documentation.
-
-    This simulates an API file where changes require
-    documentation updates, but docs can be updated
-    independently (for typos, clarifications, etc.)
-    """
-    return {"status": "ok", "message": "API response"}
-
-
-# Edit below this line to trigger the rule
-# -------------------------------------------
diff --git a/manual_tests/test_set_mode/test_set_mode_source.py b/manual_tests/test_set_mode/test_set_mode_source.py
deleted file mode 100644
index 6649e424..00000000
--- a/manual_tests/test_set_mode/test_set_mode_source.py
+++ /dev/null
@@ -1,40 +0,0 @@
-"""
-MANUAL TEST: Set Mode (Bidirectional Correspondence)
-
-=== WHAT THIS TESTS ===
-Tests the "set" detection mode where files must change together:
-- This source file and test_set_mode_test.py are in a "set"
-- If EITHER file changes, the OTHER must also change
-- This is BIDIRECTIONAL (works in both directions)
-
-=== TEST CASE 1: Rule SHOULD fire ===
-1. Edit this file (add a comment below the marker)
-2. Do NOT edit test_set_mode_test.py
-3. Run: echo '{}' | python -m deepwork.hooks.rules_check
-4. Expected: "Manual Test: Set Mode" appears in output
-
-=== TEST CASE 2: Rule should NOT fire ===
-1. Edit this file (add a comment below the marker)
-2. ALSO edit test_set_mode_test.py
-3. Run: echo '{}' | python -m deepwork.hooks.rules_check
-4. Expected: "Manual Test: Set Mode" does NOT appear
-
-=== RULE LOCATION ===
-.deepwork/rules/manual-test-set-mode.md
-"""
-
-
-class Calculator:
-    """A simple calculator for testing set mode."""
-
-    def add(self, a: int, b: int) -> int:
-        """Add two numbers."""
-        return a + b
-
-    def subtract(self, a: int, b: int) -> int:
-        """Subtract b from a."""
-        return a - b
-
-
-# Edit below this line to trigger the rule
-# -------------------------------------------
diff --git a/manual_tests/test_set_mode/test_set_mode_test.py b/manual_tests/test_set_mode/test_set_mode_test.py
deleted file mode 100644
index 3ef349e4..00000000
--- a/manual_tests/test_set_mode/test_set_mode_test.py
+++ /dev/null
@@ -1,37 +0,0 @@
-"""
-MANUAL TEST: Set Mode - Test File (Bidirectional Correspondence)
-
-=== WHAT THIS TESTS ===
-This is the TEST file for the set mode test.
-It must change together with test_set_mode_source.py.
-
-=== HOW TO TRIGGER ===
-Option A: Edit this file alone (without test_set_mode_source.py)
-Option B: Edit test_set_mode_source.py alone (without this file)
-
-=== EXPECTED BEHAVIOR ===
-- Edit this file alone -> Rule fires, expects source file to also change
-- Edit source file alone -> Rule fires, expects this file to also change
-- Edit BOTH files -> Rule is satisfied (no fire)
-
-=== RULE LOCATION ===
-.deepwork/rules/manual-test-set-mode.md
-"""
-
-from test_set_mode_source import Calculator
-
-
-def test_add():
-    """Test the add method."""
-    calc = Calculator()
-    assert calc.add(2, 3) == 5
-
-
-def test_subtract():
-    """Test the subtract method."""
-    calc = Calculator()
-    assert calc.subtract(5, 3) == 2
-
-
-# Edit below this line to trigger the rule
-# -------------------------------------------
diff --git a/manual_tests/test_trigger_safety_mode/test_trigger_safety_mode.py b/manual_tests/test_trigger_safety_mode/test_trigger_safety_mode.py
deleted file mode 100644
index 68bf59b0..00000000
--- a/manual_tests/test_trigger_safety_mode/test_trigger_safety_mode.py
+++ /dev/null
@@ -1,32 +0,0 @@
-"""
-MANUAL TEST: Trigger/Safety Mode Rule
-
-=== WHAT THIS TESTS ===
-Tests the basic trigger/safety detection mode where:
-- Rule FIRES when this file is edited alone
-- Rule is SUPPRESSED when test_trigger_safety_mode_doc.md is also edited
-
-=== TEST CASE 1: Rule SHOULD fire ===
-1. Edit this file (add a comment below the marker)
-2. Do NOT edit test_trigger_safety_mode_doc.md
-3. Run: echo '{}' | python -m deepwork.hooks.rules_check
-4. Expected: "Manual Test: Trigger Safety" appears in output
-
-=== TEST CASE 2: Rule should NOT fire ===
-1. Edit this file (add a comment below the marker)
-2. ALSO edit test_trigger_safety_mode_doc.md
-3. Run: echo '{}' | python -m deepwork.hooks.rules_check
-4. Expected: "Manual Test: Trigger Safety" does NOT appear
-
-=== RULE LOCATION ===
-.deepwork/rules/manual-test-trigger-safety.md
-"""
-
-
-def example_function():
-    """An example function to demonstrate the trigger."""
-    return "Hello from trigger safety test"
-
-
-# Edit below this line to trigger the rule
-# -------------------------------------------
diff --git a/manual_tests/test_trigger_safety_mode/test_trigger_safety_mode_doc.md b/manual_tests/test_trigger_safety_mode/test_trigger_safety_mode_doc.md
deleted file mode 100644
index 625cf0b5..00000000
--- a/manual_tests/test_trigger_safety_mode/test_trigger_safety_mode_doc.md
+++ /dev/null
@@ -1,20 +0,0 @@
-# Documentation for Trigger Safety Test
-
-## What This File Does
-
-This is the "safety" file for the trigger/safety mode test.
-
-## How It Works
-
-When this file is edited ALONGSIDE `test_trigger_safety_mode.py`,
-the trigger/safety rule is suppressed (does not fire).
-
-## Testing
-
-1. To TRIGGER the rule: Edit only `test_trigger_safety_mode.py`
-2. To SUPPRESS the rule: Edit both files together
-
----
-
-Edit below this line to suppress the trigger/safety rule:
-<!-- Changes here -->
diff --git a/src/deepwork/cli/install.py b/src/deepwork/cli/install.py
index 19bec4f8..dc945eb7 100644
--- a/src/deepwork/cli/install.py
+++ b/src/deepwork/cli/install.py
@@ -88,20 +88,6 @@ def _inject_deepwork_jobs(jobs_dir: Path, project_path: Path) -> None:
     _inject_standard_job("deepwork_jobs", jobs_dir, project_path)
 
 
-def _inject_deepwork_rules(jobs_dir: Path, project_path: Path) -> None:
-    """
-    Inject the deepwork_rules job definition into the project.
-
-    Args:
-        jobs_dir: Path to .deepwork/jobs directory
-        project_path: Path to project root (for relative path display)
-
-    Raises:
-        InstallError: If injection fails
-    """
-    _inject_standard_job("deepwork_rules", jobs_dir, project_path)
-
-
 def _create_deepwork_gitignore(deepwork_dir: Path) -> None:
     """
     Create .gitignore file in .deepwork/ directory.
@@ -149,89 +135,6 @@ def _create_tmp_directory(deepwork_dir: Path) -> None:
         )
 
 
-def _create_rules_directory(project_path: Path) -> bool:
-    """
-    Create the v2 rules directory structure with example templates.
-
-    Creates .deepwork/rules/ with example rule files that users can customize.
-    Only creates the directory if it doesn't already exist.
-
-    Args:
-        project_path: Path to the project root
-
-    Returns:
-        True if the directory was created, False if it already existed
-    """
-    rules_dir = project_path / ".deepwork" / "rules"
-
-    if rules_dir.exists():
-        return False
-
-    # Create the rules directory
-    ensure_dir(rules_dir)
-
-    # Copy example rule templates from the deepwork_rules standard job
-    example_rules_dir = Path(__file__).parent.parent / "standard_jobs" / "deepwork_rules" / "rules"
-
-    if example_rules_dir.exists():
-        # Copy all .example files
-        for example_file in example_rules_dir.glob("*.md.example"):
-            dest_file = rules_dir / example_file.name
-            shutil.copy(example_file, dest_file)
-            # Fix permissions for copied rule template
-            fix_permissions(dest_file)
-
-    # Create a README file explaining the rules system
-    readme_content = """# DeepWork Rules
-
-Rules are automated guardrails that trigger when specific files change during
-AI agent sessions. They help ensure documentation stays current, security reviews
-happen, and team guidelines are followed.
-
-## Getting Started
-
-1. Copy an example file and rename it (remove the `.example` suffix):
-   ```
-   cp readme-documentation.md.example readme-documentation.md
-   ```
-
-2. Edit the file to match your project's patterns
-
-3. The rule will automatically trigger when matching files change
-
-## Rule Format
-
-Rules use YAML frontmatter in markdown files:
-
-```markdown
----
-name: Rule Name
-trigger: "pattern/**/*"
-safety: "optional/pattern"
----
-Instructions in markdown here.
-```
-
-## Detection Modes
-
-- **trigger/safety**: Fire when trigger matches, unless safety also matches
-- **set**: Bidirectional file correspondence (e.g., source + test)
-- **pair**: Directional correspondence (e.g., API code -> docs)
-
-## Documentation
-
-See `doc/rules_syntax.md` in the DeepWork repository for full syntax documentation.
-
-## Creating Rules Interactively
-
-Use `/deepwork_rules.define` to create new rules with guidance.
-"""
-    readme_path = rules_dir / "README.md"
-    readme_path.write_text(readme_content)
-
-    return True
-
-
 class DynamicChoice(click.Choice):
     """A Click Choice that gets its values dynamically from AgentAdapter."""
 
@@ -354,7 +257,6 @@ def _install_deepwork(platform_name: str | None, project_path: Path) -> None:
     # Step 3b: Inject standard jobs (core job definitions)
     console.print("[yellow]→[/yellow] Installing core job definitions...")
     _inject_deepwork_jobs(jobs_dir, project_path)
-    _inject_deepwork_rules(jobs_dir, project_path)
 
     # Step 3c: Create .gitignore for temporary files
     _create_deepwork_gitignore(deepwork_dir)
@@ -364,12 +266,6 @@ def _install_deepwork(platform_name: str | None, project_path: Path) -> None:
     _create_tmp_directory(deepwork_dir)
     console.print("  [green]✓[/green] Created .deepwork/tmp/.gitkeep")
 
-    # Step 3e: Create rules directory with v2 templates
-    if _create_rules_directory(project_path):
-        console.print("  [green]✓[/green] Created .deepwork/rules/ with example templates")
-    else:
-        console.print("  [dim]•[/dim] .deepwork/rules/ already exists")
-
     # Step 4: Load or create config.yml
     console.print("[yellow]→[/yellow] Updating configuration...")
     config_file = deepwork_dir / "config.yml"
diff --git a/src/deepwork/cli/main.py b/src/deepwork/cli/main.py
index b503ea9a..840decbf 100644
--- a/src/deepwork/cli/main.py
+++ b/src/deepwork/cli/main.py
@@ -16,13 +16,11 @@ def cli() -> None:
 # Import commands
 from deepwork.cli.hook import hook  # noqa: E402
 from deepwork.cli.install import install  # noqa: E402
-from deepwork.cli.rules import rules  # noqa: E402
 from deepwork.cli.sync import sync  # noqa: E402
 
 cli.add_command(install)
 cli.add_command(sync)
 cli.add_command(hook)
-cli.add_command(rules)
 
 
 if __name__ == "__main__":
diff --git a/src/deepwork/cli/rules.py b/src/deepwork/cli/rules.py
deleted file mode 100644
index 54bc132e..00000000
--- a/src/deepwork/cli/rules.py
+++ /dev/null
@@ -1,32 +0,0 @@
-"""Rules command for DeepWork CLI."""
-
-import click
-from rich.console import Console
-
-from deepwork.core.rules_queue import RulesQueue
-
-console = Console()
-
-
-@click.group()
-def rules() -> None:
-    """Manage DeepWork rules and queue."""
-    pass
-
-
-@rules.command(name="clear_queue")
-def clear_queue() -> None:
-    """
-    Clear all entries from the rules queue.
-
-    Removes all JSON files from .deepwork/tmp/rules/queue/.
-    This is useful for resetting the queue between tests or after
-    manual verification of rule states.
-    """
-    queue = RulesQueue()
-    count = queue.clear()
-
-    if count == 0:
-        console.print("[yellow]Queue is already empty[/yellow]")
-    else:
-        console.print(f"[green]Cleared {count} queue entry/entries[/green]")
diff --git a/src/deepwork/core/command_executor.py b/src/deepwork/core/command_executor.py
deleted file mode 100644
index 74288a13..00000000
--- a/src/deepwork/core/command_executor.py
+++ /dev/null
@@ -1,190 +0,0 @@
-"""Execute command actions for rules."""
-
-import shlex
-import subprocess
-from dataclasses import dataclass
-from pathlib import Path
-
-from deepwork.core.rules_parser import CommandAction
-
-
-@dataclass
-class CommandResult:
-    """Result of executing a command."""
-
-    success: bool
-    exit_code: int
-    stdout: str
-    stderr: str
-    command: str  # The actual command that was run
-
-
-def substitute_command_variables(
-    command_template: str,
-    file: str | None = None,
-    files: list[str] | None = None,
-    repo_root: Path | None = None,
-) -> str:
-    """
-    Substitute template variables in a command string.
-
-    Variables:
-    - {file} - Single file path
-    - {files} - Space-separated file paths
-    - {repo_root} - Repository root directory
-
-    Args:
-        command_template: Command string with {var} placeholders
-        file: Single file path (for run_for: each_match)
-        files: List of file paths (for run_for: all_matches)
-        repo_root: Repository root path
-
-    Returns:
-        Command string with variables substituted
-    """
-    result = command_template
-
-    if file is not None:
-        # Quote file path to prevent command injection
-        result = result.replace("{file}", shlex.quote(file))
-
-    if files is not None:
-        # Quote each file path individually
-        quoted_files = " ".join(shlex.quote(f) for f in files)
-        result = result.replace("{files}", quoted_files)
-
-    if repo_root is not None:
-        result = result.replace("{repo_root}", shlex.quote(str(repo_root)))
-
-    return result
-
-
-def execute_command(
-    command: str,
-    cwd: Path | None = None,
-    timeout: int = 60,
-) -> CommandResult:
-    """
-    Execute a command and capture output.
-
-    Args:
-        command: Command string to execute
-        cwd: Working directory (defaults to current directory)
-        timeout: Timeout in seconds
-
-    Returns:
-        CommandResult with execution details
-    """
-    try:
-        # Run command as shell to support pipes, etc.
-        result = subprocess.run(
-            command,
-            shell=True,
-            cwd=cwd,
-            capture_output=True,
-            text=True,
-            timeout=timeout,
-        )
-
-        return CommandResult(
-            success=result.returncode == 0,
-            exit_code=result.returncode,
-            stdout=result.stdout,
-            stderr=result.stderr,
-            command=command,
-        )
-
-    except subprocess.TimeoutExpired:
-        return CommandResult(
-            success=False,
-            exit_code=-1,
-            stdout="",
-            stderr=f"Command timed out after {timeout} seconds",
-            command=command,
-        )
-    except Exception as e:
-        return CommandResult(
-            success=False,
-            exit_code=-1,
-            stdout="",
-            stderr=str(e),
-            command=command,
-        )
-
-
-def run_command_action(
-    action: CommandAction,
-    trigger_files: list[str],
-    repo_root: Path | None = None,
-) -> list[CommandResult]:
-    """
-    Run a command action for the given trigger files.
-
-    Args:
-        action: CommandAction configuration
-        trigger_files: Files that triggered the rule
-        repo_root: Repository root path
-
-    Returns:
-        List of CommandResult (one per command execution)
-    """
-    results: list[CommandResult] = []
-
-    if action.run_for == "each_match":
-        # Run command for each file individually
-        for file_path in trigger_files:
-            command = substitute_command_variables(
-                action.command,
-                file=file_path,
-                repo_root=repo_root,
-            )
-            result = execute_command(command, cwd=repo_root)
-            results.append(result)
-
-    elif action.run_for == "all_matches":
-        # Run command once with all files
-        command = substitute_command_variables(
-            action.command,
-            files=trigger_files,
-            repo_root=repo_root,
-        )
-        result = execute_command(command, cwd=repo_root)
-        results.append(result)
-
-    return results
-
-
-def all_commands_succeeded(results: list[CommandResult]) -> bool:
-    """Check if all command executions succeeded."""
-    return all(r.success for r in results)
-
-
-def format_command_errors(
-    results: list[CommandResult],
-    rule_name: str | None = None,
-) -> str:
-    """Format detailed error messages from failed commands.
-
-    Args:
-        results: List of command execution results
-        rule_name: Optional rule name to include in error message
-
-    Returns:
-        Formatted error message with command, exit code, stdout, and stderr
-    """
-    errors: list[str] = []
-    for result in results:
-        if not result.success:
-            parts: list[str] = []
-            if rule_name:
-                parts.append(f"Rule: {rule_name}")
-            parts.append(f"Command: {result.command}")
-            parts.append(f"Exit code: {result.exit_code}")
-            if result.stdout and result.stdout.strip():
-                parts.append(f"Stdout:\n{result.stdout.strip()}")
-            if result.stderr and result.stderr.strip():
-                parts.append(f"Stderr:\n{result.stderr.strip()}")
-            if not result.stdout.strip() and not result.stderr.strip():
-                parts.append("(no output)")
-            errors.append("\n".join(parts))
-    return "\n\n".join(errors)
diff --git a/src/deepwork/core/pattern_matcher.py b/src/deepwork/core/pattern_matcher.py
deleted file mode 100644
index c82ec723..00000000
--- a/src/deepwork/core/pattern_matcher.py
+++ /dev/null
@@ -1,271 +0,0 @@
-"""Pattern matching with variable extraction for rule file correspondence."""
-
-import re
-from dataclasses import dataclass
-from fnmatch import fnmatch
-
-
-class PatternError(Exception):
-    """Exception raised for invalid pattern syntax."""
-
-    pass
-
-
-@dataclass
-class MatchResult:
-    """Result of matching a file against a pattern."""
-
-    matched: bool
-    variables: dict[str, str]  # Captured variable values
-
-    @classmethod
-    def no_match(cls) -> "MatchResult":
-        return cls(matched=False, variables={})
-
-    @classmethod
-    def match(cls, variables: dict[str, str] | None = None) -> "MatchResult":
-        return cls(matched=True, variables=variables or {})
-
-
-def validate_pattern(pattern: str) -> None:
-    """
-    Validate pattern syntax.
-
-    Raises:
-        PatternError: If pattern has invalid syntax
-    """
-    # Check for unbalanced braces
-    brace_depth = 0
-    for i, char in enumerate(pattern):
-        if char == "{":
-            brace_depth += 1
-        elif char == "}":
-            brace_depth -= 1
-            if brace_depth < 0:
-                raise PatternError(f"Unmatched closing brace at position {i}")
-
-    if brace_depth > 0:
-        raise PatternError("Unclosed brace in pattern")
-
-    # Extract and validate variable names
-    var_pattern = r"\{([^}]*)\}"
-    seen_vars: set[str] = set()
-
-    for match in re.finditer(var_pattern, pattern):
-        var_name = match.group(1)
-
-        # Check for empty variable name
-        if not var_name:
-            raise PatternError("Empty variable name in pattern")
-
-        # Strip leading ** or * for validation
-        clean_name = var_name.lstrip("*")
-        if not clean_name:
-            # Just {*} or {**} is valid
-            continue
-
-        # Check for invalid characters in variable name
-        if "/" in clean_name or "\\" in clean_name:
-            raise PatternError(f"Invalid character in variable name: {var_name}")
-
-        # Check for duplicates (use clean name for comparison)
-        if clean_name in seen_vars:
-            raise PatternError(f"Duplicate variable: {clean_name}")
-        seen_vars.add(clean_name)
-
-
-def pattern_to_regex(pattern: str) -> tuple[str, list[str]]:
-    """
-    Convert a pattern with {var} placeholders to a regex.
-
-    Variables:
-    - {path} or {**name} - Matches multiple path segments (.+)
-    - {name} or {*name} - Matches single path segment ([^/]+)
-
-    Args:
-        pattern: Pattern string like "src/{path}.py"
-
-    Returns:
-        Tuple of (regex_pattern, list_of_variable_names)
-
-    Raises:
-        PatternError: If pattern has invalid syntax
-    """
-    validate_pattern(pattern)
-
-    # Normalize path separators
-    pattern = pattern.replace("\\", "/")
-
-    result: list[str] = []
-    var_names: list[str] = []
-    pos = 0
-
-    # Parse pattern segments
-    while pos < len(pattern):
-        # Look for next variable
-        brace_start = pattern.find("{", pos)
-
-        if brace_start == -1:
-            # No more variables, escape the rest
-            result.append(re.escape(pattern[pos:]))
-            break
-
-        # Escape literal part before variable
-        if brace_start > pos:
-            result.append(re.escape(pattern[pos:brace_start]))
-
-        # Find end of variable
-        brace_end = pattern.find("}", brace_start)
-        if brace_end == -1:
-            raise PatternError("Unclosed brace in pattern")
-
-        var_spec = pattern[brace_start + 1 : brace_end]
-
-        # Determine variable type and name
-        if var_spec.startswith("**"):
-            # Explicit multi-segment: {**name}
-            var_name = var_spec[2:] or "path"
-            regex_part = f"(?P<{re.escape(var_name)}>.+)"
-        elif var_spec.startswith("*"):
-            # Explicit single-segment: {*name}
-            var_name = var_spec[1:] or "name"
-            regex_part = f"(?P<{re.escape(var_name)}>[^/]+)"
-        elif var_spec == "path":
-            # Conventional multi-segment
-            var_name = "path"
-            regex_part = "(?P<path>.+)"
-        else:
-            # Default single-segment (including custom names)
-            var_name = var_spec
-            regex_part = f"(?P<{re.escape(var_name)}>[^/]+)"
-
-        result.append(regex_part)
-        var_names.append(var_name)
-        pos = brace_end + 1
-
-    return "^" + "".join(result) + "$", var_names
-
-
-def match_pattern(pattern: str, filepath: str) -> MatchResult:
-    """
-    Match a filepath against a pattern, extracting variables.
-
-    Args:
-        pattern: Pattern with {var} placeholders
-        filepath: File path to match
-
-    Returns:
-        MatchResult with matched=True and captured variables, or matched=False
-    """
-    # Normalize path separators
-    filepath = filepath.replace("\\", "/")
-
-    try:
-        regex, _ = pattern_to_regex(pattern)
-    except PatternError:
-        return MatchResult.no_match()
-
-    match = re.fullmatch(regex, filepath)
-    if match:
-        return MatchResult.match(match.groupdict())
-    return MatchResult.no_match()
-
-
-def resolve_pattern(pattern: str, variables: dict[str, str]) -> str:
-    """
-    Substitute variables into a pattern to generate a filepath.
-
-    Args:
-        pattern: Pattern with {var} placeholders
-        variables: Dict of variable name -> value
-
-    Returns:
-        Resolved filepath string
-    """
-    result = pattern
-    for name, value in variables.items():
-        # Handle both {name} and {*name} / {**name} forms
-        result = result.replace(f"{{{name}}}", value)
-        result = result.replace(f"{{*{name}}}", value)
-        result = result.replace(f"{{**{name}}}", value)
-    return result
-
-
-def matches_glob(file_path: str, pattern: str) -> bool:
-    """
-    Match a file path against a glob pattern, supporting ** for recursive matching.
-
-    This is for simple glob patterns without variable capture.
-
-    Args:
-        file_path: File path to check
-        pattern: Glob pattern (supports *, **, ?)
-
-    Returns:
-        True if matches
-    """
-    # Normalize path separators
-    file_path = file_path.replace("\\", "/")
-    pattern = pattern.replace("\\", "/")
-
-    # Handle ** patterns (recursive directory matching)
-    if "**" in pattern:
-        # Split pattern by **
-        parts = pattern.split("**")
-
-        if len(parts) == 2:
-            prefix, suffix = parts[0], parts[1]
-
-            # Remove leading/trailing slashes from suffix
-            suffix = suffix.lstrip("/")
-
-            # Check if prefix matches the start of the path
-            if prefix:
-                prefix = prefix.rstrip("/")
-                if not file_path.startswith(prefix + "/") and file_path != prefix:
-                    return False
-                # Get the remaining path after prefix
-                remaining = file_path[len(prefix) :].lstrip("/")
-            else:
-                remaining = file_path
-
-            # If no suffix, any remaining path matches
-            if not suffix:
-                return True
-
-            # Check if suffix matches the end of any remaining path segment
-            remaining_parts = remaining.split("/")
-            for i in range(len(remaining_parts)):
-                test_path = "/".join(remaining_parts[i:])
-                if fnmatch(test_path, suffix):
-                    return True
-                # Also try just the filename
-                if fnmatch(remaining_parts[-1], suffix):
-                    return True
-
-            return False
-
-    # Simple pattern without **
-    return fnmatch(file_path, pattern)
-
-
-def matches_any_pattern(file_path: str, patterns: list[str]) -> bool:
-    """
-    Check if a file path matches any of the given glob patterns.
-
-    Args:
-        file_path: File path to check (relative path)
-        patterns: List of glob patterns to match against
-
-    Returns:
-        True if the file matches any pattern
-    """
-    for pattern in patterns:
-        if matches_glob(file_path, pattern):
-            return True
-    return False
-
-
-def has_variables(pattern: str) -> bool:
-    """Check if a pattern contains variable placeholders."""
-    return "{" in pattern and "}" in pattern
diff --git a/src/deepwork/core/rules_parser.py b/src/deepwork/core/rules_parser.py
deleted file mode 100644
index 04b1e3d2..00000000
--- a/src/deepwork/core/rules_parser.py
+++ /dev/null
@@ -1,559 +0,0 @@
-"""Rule definition parser (v2 - frontmatter markdown format)."""
-
-from dataclasses import dataclass, field
-from enum import Enum
-from pathlib import Path
-from typing import Any
-
-import yaml
-
-from deepwork.core.pattern_matcher import (
-    has_variables,
-    match_pattern,
-    matches_any_pattern,
-    resolve_pattern,
-)
-from deepwork.schemas.rules_schema import RULES_FRONTMATTER_SCHEMA
-from deepwork.utils.validation import ValidationError, validate_against_schema
-
-
-class RulesParseError(Exception):
-    """Exception raised for rule parsing errors."""
-
-    pass
-
-
-class DetectionMode(Enum):
-    """How the rule detects when to fire."""
-
-    TRIGGER_SAFETY = "trigger_safety"  # Fire when trigger matches, safety doesn't
-    SET = "set"  # Bidirectional file correspondence
-    PAIR = "pair"  # Directional file correspondence
-    CREATED = "created"  # Fire when created files match patterns
-
-
-class ActionType(Enum):
-    """What happens when the rule fires."""
-
-    PROMPT = "prompt"  # Show instructions to agent (default)
-    COMMAND = "command"  # Run an idempotent command
-
-
-# Valid compare_to values
-COMPARE_TO_VALUES = frozenset({"base", "default_tip", "prompt"})
-
-
-@dataclass
-class CommandAction:
-    """Configuration for command action."""
-
-    command: str  # Command template (supports {file}, {files}, {repo_root})
-    run_for: str = "each_match"  # "each_match" or "all_matches"
-
-
-@dataclass
-class PairConfig:
-    """Configuration for pair detection mode."""
-
-    trigger: str  # Pattern that triggers
-    expects: list[str]  # Patterns for expected corresponding files
-
-
-@dataclass
-class Rule:
-    """Represents a single rule definition (v2 format)."""
-
-    # Identity
-    name: str  # Human-friendly name (displayed in promise tags)
-    filename: str  # Filename without .md extension (used for queue)
-
-    # Detection mode (exactly one must be set)
-    detection_mode: DetectionMode
-
-    # Common options (required)
-    compare_to: str  # Required: "base", "default_tip", or "prompt"
-
-    # Detection mode details (optional, depends on mode)
-    triggers: list[str] = field(default_factory=list)  # For TRIGGER_SAFETY mode
-    safety: list[str] = field(default_factory=list)  # For TRIGGER_SAFETY mode
-    set_patterns: list[str] = field(default_factory=list)  # For SET mode
-    pair_config: PairConfig | None = None  # For PAIR mode
-    created_patterns: list[str] = field(default_factory=list)  # For CREATED mode
-
-    # Action type
-    action_type: ActionType = ActionType.PROMPT
-    instructions: str = ""  # For PROMPT action (markdown body)
-    command_action: CommandAction | None = None  # For COMMAND action
-
-    @classmethod
-    def from_frontmatter(
-        cls,
-        frontmatter: dict[str, Any],
-        markdown_body: str,
-        filename: str,
-    ) -> "Rule":
-        """
-        Create Rule from parsed frontmatter and markdown body.
-
-        Args:
-            frontmatter: Parsed YAML frontmatter
-            markdown_body: Markdown content after frontmatter
-            filename: Filename without .md extension
-
-        Returns:
-            Rule instance
-
-        Raises:
-            RulesParseError: If validation fails
-        """
-        # Get name (required)
-        name = frontmatter.get("name", "")
-        if not name:
-            raise RulesParseError(f"Rule '{filename}' missing required 'name' field")
-
-        # Determine detection mode
-        has_trigger = "trigger" in frontmatter
-        has_set = "set" in frontmatter
-        has_pair = "pair" in frontmatter
-        has_created = "created" in frontmatter
-
-        mode_count = sum([has_trigger, has_set, has_pair, has_created])
-        if mode_count == 0:
-            raise RulesParseError(f"Rule '{name}' must have 'trigger', 'set', 'pair', or 'created'")
-        if mode_count > 1:
-            raise RulesParseError(f"Rule '{name}' has multiple detection modes - use only one")
-
-        # Parse based on detection mode
-        detection_mode: DetectionMode
-        triggers: list[str] = []
-        safety: list[str] = []
-        set_patterns: list[str] = []
-        pair_config: PairConfig | None = None
-        created_patterns: list[str] = []
-
-        if has_trigger:
-            detection_mode = DetectionMode.TRIGGER_SAFETY
-            trigger = frontmatter["trigger"]
-            triggers = [trigger] if isinstance(trigger, str) else list(trigger)
-            safety_data = frontmatter.get("safety", [])
-            safety = [safety_data] if isinstance(safety_data, str) else list(safety_data)
-
-        elif has_set:
-            detection_mode = DetectionMode.SET
-            set_patterns = list(frontmatter["set"])
-            if len(set_patterns) < 2:
-                raise RulesParseError(f"Rule '{name}' set requires at least 2 patterns")
-
-        elif has_pair:
-            detection_mode = DetectionMode.PAIR
-            pair_data = frontmatter["pair"]
-            expects = pair_data["expects"]
-            expects_list = [expects] if isinstance(expects, str) else list(expects)
-            pair_config = PairConfig(
-                trigger=pair_data["trigger"],
-                expects=expects_list,
-            )
-
-        elif has_created:
-            detection_mode = DetectionMode.CREATED
-            created = frontmatter["created"]
-            created_patterns = [created] if isinstance(created, str) else list(created)
-
-        # Determine action type
-        action_type: ActionType
-        command_action: CommandAction | None = None
-
-        if "action" in frontmatter:
-            action_type = ActionType.COMMAND
-            action_data = frontmatter["action"]
-            command_action = CommandAction(
-                command=action_data["command"],
-                run_for=action_data.get("run_for", "each_match"),
-            )
-        else:
-            action_type = ActionType.PROMPT
-            # Markdown body is the instructions
-            if not markdown_body.strip():
-                raise RulesParseError(f"Rule '{name}' with prompt action requires markdown body")
-
-        # Get compare_to (required field)
-        compare_to = frontmatter["compare_to"]
-
-        return cls(
-            name=name,
-            filename=filename,
-            detection_mode=detection_mode,
-            triggers=triggers,
-            safety=safety,
-            set_patterns=set_patterns,
-            pair_config=pair_config,
-            created_patterns=created_patterns,
-            action_type=action_type,
-            instructions=markdown_body.strip(),
-            command_action=command_action,
-            compare_to=compare_to,
-        )
-
-
-def parse_frontmatter_file(filepath: Path) -> tuple[dict[str, Any], str]:
-    """
-    Parse a markdown file with YAML frontmatter.
-
-    Args:
-        filepath: Path to .md file
-
-    Returns:
-        Tuple of (frontmatter_dict, markdown_body)
-
-    Raises:
-        RulesParseError: If parsing fails
-    """
-    try:
-        content = filepath.read_text(encoding="utf-8")
-    except OSError as e:
-        raise RulesParseError(f"Failed to read rule file: {e}") from e
-
-    # Split frontmatter from body
-    if not content.startswith("---"):
-        raise RulesParseError(
-            f"Rule file '{filepath.name}' must start with '---' frontmatter delimiter"
-        )
-
-    # Find end of frontmatter
-    end_marker = content.find("\n---", 3)
-    if end_marker == -1:
-        raise RulesParseError(
-            f"Rule file '{filepath.name}' missing closing '---' frontmatter delimiter"
-        )
-
-    frontmatter_str = content[4:end_marker]  # Skip initial "---\n"
-    markdown_body = content[end_marker + 4 :]  # Skip "\n---\n" or "\n---"
-
-    # Parse YAML frontmatter
-    try:
-        frontmatter = yaml.safe_load(frontmatter_str)
-    except yaml.YAMLError as e:
-        raise RulesParseError(f"Invalid YAML frontmatter in '{filepath.name}': {e}") from e
-
-    if frontmatter is None:
-        frontmatter = {}
-
-    if not isinstance(frontmatter, dict):
-        raise RulesParseError(
-            f"Frontmatter in '{filepath.name}' must be a mapping, got {type(frontmatter).__name__}"
-        )
-
-    return frontmatter, markdown_body
-
-
-def parse_rule_file(filepath: Path) -> Rule:
-    """
-    Parse a single rule from a frontmatter markdown file.
-
-    Args:
-        filepath: Path to .md file in .deepwork/rules/
-
-    Returns:
-        Parsed Rule object
-
-    Raises:
-        RulesParseError: If parsing or validation fails
-    """
-    if not filepath.exists():
-        raise RulesParseError(f"Rule file does not exist: {filepath}")
-
-    if not filepath.is_file():
-        raise RulesParseError(f"Rule path is not a file: {filepath}")
-
-    frontmatter, markdown_body = parse_frontmatter_file(filepath)
-
-    # Validate against schema
-    try:
-        validate_against_schema(frontmatter, RULES_FRONTMATTER_SCHEMA)
-    except ValidationError as e:
-        raise RulesParseError(f"Rule '{filepath.name}' validation failed: {e}") from e
-
-    # Create Rule object
-    filename = filepath.stem  # filename without .md extension
-    return Rule.from_frontmatter(frontmatter, markdown_body, filename)
-
-
-def load_rules_from_directory(rules_dir: Path) -> list[Rule]:
-    """
-    Load all rules from a directory.
-
-    Args:
-        rules_dir: Path to .deepwork/rules/ directory
-
-    Returns:
-        List of parsed Rule objects (sorted by filename)
-
-    Raises:
-        RulesParseError: If any rule file fails to parse
-    """
-    if not rules_dir.exists():
-        return []
-
-    if not rules_dir.is_dir():
-        raise RulesParseError(f"Rules path is not a directory: {rules_dir}")
-
-    rules = []
-    for filepath in sorted(rules_dir.glob("*.md")):
-        rule = parse_rule_file(filepath)
-        rules.append(rule)
-
-    return rules
-
-
-# =============================================================================
-# Evaluation Logic
-# =============================================================================
-
-
-def evaluate_trigger_safety(
-    rule: Rule,
-    changed_files: list[str],
-) -> bool:
-    """
-    Evaluate a trigger/safety mode rule.
-
-    Returns True if rule should fire:
-    - At least one changed file matches a trigger pattern
-    - AND no changed file matches a safety pattern
-    """
-    # Check if any trigger matches
-    trigger_matched = False
-    for file_path in changed_files:
-        if matches_any_pattern(file_path, rule.triggers):
-            trigger_matched = True
-            break
-
-    if not trigger_matched:
-        return False
-
-    # Check if any safety pattern matches
-    if rule.safety:
-        for file_path in changed_files:
-            if matches_any_pattern(file_path, rule.safety):
-                return False
-
-    return True
-
-
-def evaluate_set_correspondence(
-    rule: Rule,
-    changed_files: list[str],
-) -> tuple[bool, list[str], list[str]]:
-    """
-    Evaluate a set (bidirectional correspondence) rule.
-
-    Returns:
-        Tuple of (should_fire, trigger_files, missing_files)
-        - should_fire: True if correspondence is incomplete
-        - trigger_files: Files that triggered (matched a pattern)
-        - missing_files: Expected files that didn't change
-    """
-    trigger_files: list[str] = []
-    missing_files: list[str] = []
-    changed_set = set(changed_files)
-
-    for file_path in changed_files:
-        # Check each pattern in the set
-        for pattern in rule.set_patterns:
-            result = match_pattern(pattern, file_path)
-            if result.matched:
-                trigger_files.append(file_path)
-
-                # Check if all other corresponding files also changed
-                for other_pattern in rule.set_patterns:
-                    if other_pattern == pattern:
-                        continue
-
-                    if has_variables(other_pattern):
-                        expected = resolve_pattern(other_pattern, result.variables)
-                    else:
-                        expected = other_pattern
-
-                    if expected not in changed_set:
-                        if expected not in missing_files:
-                            missing_files.append(expected)
-
-                break  # Only match one pattern per file
-
-    # Rule fires if there are trigger files with missing correspondences
-    should_fire = len(trigger_files) > 0 and len(missing_files) > 0
-    return should_fire, trigger_files, missing_files
-
-
-def evaluate_pair_correspondence(
-    rule: Rule,
-    changed_files: list[str],
-) -> tuple[bool, list[str], list[str]]:
-    """
-    Evaluate a pair (directional correspondence) rule.
-
-    Only trigger-side changes require corresponding expected files.
-    Expected-side changes alone do not trigger.
-
-    Returns:
-        Tuple of (should_fire, trigger_files, missing_files)
-    """
-    if rule.pair_config is None:
-        return False, [], []
-
-    trigger_files: list[str] = []
-    missing_files: list[str] = []
-    changed_set = set(changed_files)
-
-    trigger_pattern = rule.pair_config.trigger
-    expects_patterns = rule.pair_config.expects
-
-    for file_path in changed_files:
-        # Only check trigger pattern (directional)
-        result = match_pattern(trigger_pattern, file_path)
-        if result.matched:
-            trigger_files.append(file_path)
-
-            # Check if all expected files also changed
-            for expects_pattern in expects_patterns:
-                if has_variables(expects_pattern):
-                    expected = resolve_pattern(expects_pattern, result.variables)
-                else:
-                    expected = expects_pattern
-
-                if expected not in changed_set:
-                    if expected not in missing_files:
-                        missing_files.append(expected)
-
-    should_fire = len(trigger_files) > 0 and len(missing_files) > 0
-    return should_fire, trigger_files, missing_files
-
-
-def evaluate_created(
-    rule: Rule,
-    created_files: list[str],
-) -> bool:
-    """
-    Evaluate a created mode rule.
-
-    Returns True if rule should fire:
-    - At least one created file matches a created pattern
-    """
-    for file_path in created_files:
-        if matches_any_pattern(file_path, rule.created_patterns):
-            return True
-    return False
-
-
-@dataclass
-class RuleEvaluationResult:
-    """Result of evaluating a single rule."""
-
-    rule: Rule
-    should_fire: bool
-    trigger_files: list[str] = field(default_factory=list)
-    missing_files: list[str] = field(default_factory=list)  # For set/pair modes
-
-
-def evaluate_rule(
-    rule: Rule,
-    changed_files: list[str],
-    created_files: list[str] | None = None,
-) -> RuleEvaluationResult:
-    """
-    Evaluate whether a rule should fire based on changed files.
-
-    Args:
-        rule: Rule to evaluate
-        changed_files: List of changed file paths (relative)
-        created_files: List of newly created file paths (relative), for CREATED mode
-
-    Returns:
-        RuleEvaluationResult with evaluation details
-    """
-    if rule.detection_mode == DetectionMode.TRIGGER_SAFETY:
-        should_fire = evaluate_trigger_safety(rule, changed_files)
-        trigger_files = (
-            [f for f in changed_files if matches_any_pattern(f, rule.triggers)]
-            if should_fire
-            else []
-        )
-        return RuleEvaluationResult(
-            rule=rule,
-            should_fire=should_fire,
-            trigger_files=trigger_files,
-        )
-
-    elif rule.detection_mode == DetectionMode.SET:
-        should_fire, trigger_files, missing_files = evaluate_set_correspondence(rule, changed_files)
-        return RuleEvaluationResult(
-            rule=rule,
-            should_fire=should_fire,
-            trigger_files=trigger_files,
-            missing_files=missing_files,
-        )
-
-    elif rule.detection_mode == DetectionMode.PAIR:
-        should_fire, trigger_files, missing_files = evaluate_pair_correspondence(
-            rule, changed_files
-        )
-        return RuleEvaluationResult(
-            rule=rule,
-            should_fire=should_fire,
-            trigger_files=trigger_files,
-            missing_files=missing_files,
-        )
-
-    elif rule.detection_mode == DetectionMode.CREATED:
-        files_to_check = created_files if created_files is not None else []
-        should_fire = evaluate_created(rule, files_to_check)
-        trigger_files = (
-            [f for f in files_to_check if matches_any_pattern(f, rule.created_patterns)]
-            if should_fire
-            else []
-        )
-        return RuleEvaluationResult(
-            rule=rule,
-            should_fire=should_fire,
-            trigger_files=trigger_files,
-        )
-
-    return RuleEvaluationResult(rule=rule, should_fire=False)
-
-
-def evaluate_rules(
-    rules: list[Rule],
-    changed_files: list[str],
-    promised_rules: set[str] | None = None,
-    created_files: list[str] | None = None,
-) -> list[RuleEvaluationResult]:
-    """
-    Evaluate which rules should fire.
-
-    Args:
-        rules: List of rules to evaluate
-        changed_files: List of changed file paths (relative)
-        promised_rules: Set of rule names that have been marked as addressed
-                          via <promise> tags (case-insensitive)
-        created_files: List of newly created file paths (relative), for CREATED mode
-
-    Returns:
-        List of RuleEvaluationResult for rules that should fire
-    """
-    if promised_rules is None:
-        promised_rules = set()
-
-    # Normalize promised names for case-insensitive comparison
-    promised_lower = {name.lower() for name in promised_rules}
-
-    results = []
-    for rule in rules:
-        # Skip if already promised/addressed (case-insensitive)
-        if rule.name.lower() in promised_lower:
-            continue
-
-        result = evaluate_rule(rule, changed_files, created_files)
-        if result.should_fire:
-            results.append(result)
-
-    return results
diff --git a/src/deepwork/core/rules_queue.py b/src/deepwork/core/rules_queue.py
deleted file mode 100644
index 4f49a4fe..00000000
--- a/src/deepwork/core/rules_queue.py
+++ /dev/null
@@ -1,321 +0,0 @@
-"""Queue system for tracking rule state in .deepwork/tmp/rules/queue/."""
-
-import hashlib
-import json
-from dataclasses import asdict, dataclass, field
-from datetime import UTC, datetime
-from enum import Enum
-from pathlib import Path
-from typing import Any
-
-
-class QueueEntryStatus(Enum):
-    """Status of a queue entry."""
-
-    QUEUED = "queued"  # Detected, awaiting evaluation
-    PASSED = "passed"  # Evaluated, rule satisfied (promise found or action succeeded)
-    FAILED = "failed"  # Evaluated, rule not satisfied
-    SKIPPED = "skipped"  # Safety pattern matched, skipped
-
-
-@dataclass
-class ActionResult:
-    """Result of executing a rule action."""
-
-    type: str  # "prompt" or "command"
-    output: str | None = None  # Command stdout or prompt message shown
-    exit_code: int | None = None  # Command exit code (None for prompt)
-
-
-@dataclass
-class QueueEntry:
-    """A single entry in the rules queue."""
-
-    # Identity
-    rule_name: str  # Human-friendly name
-    rule_file: str  # Filename (e.g., "source-test-pairing.md")
-    trigger_hash: str  # Hash for deduplication
-
-    # State
-    status: QueueEntryStatus = QueueEntryStatus.QUEUED
-    created_at: str = ""  # ISO8601 timestamp
-    evaluated_at: str | None = None  # ISO8601 timestamp
-
-    # Context
-    baseline_ref: str = ""  # Commit hash or timestamp used as baseline
-    trigger_files: list[str] = field(default_factory=list)
-    expected_files: list[str] = field(default_factory=list)  # For set/pair modes
-    matched_files: list[str] = field(default_factory=list)  # Files that also changed
-
-    # Result
-    action_result: ActionResult | None = None
-
-    def __post_init__(self) -> None:
-        if not self.created_at:
-            self.created_at = datetime.now(UTC).isoformat()
-
-    def to_dict(self) -> dict[str, Any]:
-        """Convert to dictionary for JSON serialization."""
-        data = asdict(self)
-        data["status"] = self.status.value
-        if self.action_result:
-            data["action_result"] = asdict(self.action_result)
-        return data
-
-    @classmethod
-    def from_dict(cls, data: dict[str, Any]) -> "QueueEntry":
-        """Create from dictionary."""
-        action_result = None
-        if data.get("action_result"):
-            action_result = ActionResult(**data["action_result"])
-
-        return cls(
-            rule_name=data.get("rule_name", data.get("policy_name", "")),
-            rule_file=data.get("rule_file", data.get("policy_file", "")),
-            trigger_hash=data["trigger_hash"],
-            status=QueueEntryStatus(data["status"]),
-            created_at=data.get("created_at", ""),
-            evaluated_at=data.get("evaluated_at"),
-            baseline_ref=data.get("baseline_ref", ""),
-            trigger_files=data.get("trigger_files", []),
-            expected_files=data.get("expected_files", []),
-            matched_files=data.get("matched_files", []),
-            action_result=action_result,
-        )
-
-
-def compute_trigger_hash(
-    rule_name: str,
-    trigger_files: list[str],
-    baseline_ref: str,
-) -> str:
-    """
-    Compute a hash for deduplication.
-
-    The hash is based on:
-    - Rule name
-    - Sorted list of trigger files
-    - Baseline reference (commit hash or timestamp)
-
-    Returns:
-        12-character hex hash
-    """
-    hash_input = f"{rule_name}:{sorted(trigger_files)}:{baseline_ref}"
-    return hashlib.sha256(hash_input.encode()).hexdigest()[:12]
-
-
-class RulesQueue:
-    """
-    Manages the rules queue in .deepwork/tmp/rules/queue/.
-
-    Queue entries are stored as JSON files named {hash}.{status}.json
-    """
-
-    def __init__(self, queue_dir: Path | None = None):
-        """
-        Initialize the queue.
-
-        Args:
-            queue_dir: Path to queue directory. Defaults to .deepwork/tmp/rules/queue/
-        """
-        if queue_dir is None:
-            queue_dir = Path(".deepwork/tmp/rules/queue")
-        self.queue_dir = queue_dir
-
-    def _ensure_dir(self) -> None:
-        """Ensure queue directory exists."""
-        self.queue_dir.mkdir(parents=True, exist_ok=True)
-
-    def _get_entry_path(self, trigger_hash: str, status: QueueEntryStatus) -> Path:
-        """Get path for an entry file."""
-        return self.queue_dir / f"{trigger_hash}.{status.value}.json"
-
-    def _find_entry_path(self, trigger_hash: str) -> Path | None:
-        """Find existing entry file for a hash (any status)."""
-        for status in QueueEntryStatus:
-            path = self._get_entry_path(trigger_hash, status)
-            if path.exists():
-                return path
-        return None
-
-    def has_entry(self, trigger_hash: str) -> bool:
-        """Check if an entry exists for this hash."""
-        return self._find_entry_path(trigger_hash) is not None
-
-    def get_entry(self, trigger_hash: str) -> QueueEntry | None:
-        """Get an entry by hash."""
-        path = self._find_entry_path(trigger_hash)
-        if path is None:
-            return None
-
-        try:
-            with open(path, encoding="utf-8") as f:
-                data = json.load(f)
-            return QueueEntry.from_dict(data)
-        except (json.JSONDecodeError, OSError, KeyError):
-            return None
-
-    def create_entry(
-        self,
-        rule_name: str,
-        rule_file: str,
-        trigger_files: list[str],
-        baseline_ref: str,
-        expected_files: list[str] | None = None,
-    ) -> QueueEntry | None:
-        """
-        Create a new queue entry if one doesn't already exist.
-
-        Args:
-            rule_name: Human-friendly rule name
-            rule_file: Rule filename (e.g., "source-test-pairing.md")
-            trigger_files: Files that triggered the rule
-            baseline_ref: Baseline reference for change detection
-            expected_files: Expected corresponding files (for set/pair)
-
-        Returns:
-            Created QueueEntry, or None if entry already exists
-        """
-        trigger_hash = compute_trigger_hash(rule_name, trigger_files, baseline_ref)
-
-        # Check if already exists
-        if self.has_entry(trigger_hash):
-            return None
-
-        self._ensure_dir()
-
-        entry = QueueEntry(
-            rule_name=rule_name,
-            rule_file=rule_file,
-            trigger_hash=trigger_hash,
-            status=QueueEntryStatus.QUEUED,
-            baseline_ref=baseline_ref,
-            trigger_files=trigger_files,
-            expected_files=expected_files or [],
-        )
-
-        path = self._get_entry_path(trigger_hash, QueueEntryStatus.QUEUED)
-        with open(path, "w", encoding="utf-8") as f:
-            json.dump(entry.to_dict(), f, indent=2)
-
-        return entry
-
-    def update_status(
-        self,
-        trigger_hash: str,
-        new_status: QueueEntryStatus,
-        action_result: ActionResult | None = None,
-    ) -> bool:
-        """
-        Update the status of an entry.
-
-        This renames the file to reflect the new status.
-
-        Args:
-            trigger_hash: Hash of the entry to update
-            new_status: New status
-            action_result: Optional result of action execution
-
-        Returns:
-            True if updated, False if entry not found
-        """
-        old_path = self._find_entry_path(trigger_hash)
-        if old_path is None:
-            return False
-
-        # Load existing entry
-        try:
-            with open(old_path, encoding="utf-8") as f:
-                data = json.load(f)
-        except (json.JSONDecodeError, OSError):
-            return False
-
-        # Update fields
-        data["status"] = new_status.value
-        data["evaluated_at"] = datetime.now(UTC).isoformat()
-        if action_result:
-            data["action_result"] = asdict(action_result)
-
-        # Write to new path
-        new_path = self._get_entry_path(trigger_hash, new_status)
-
-        # If status didn't change, just update in place
-        if old_path == new_path:
-            with open(new_path, "w", encoding="utf-8") as f:
-                json.dump(data, f, indent=2)
-        else:
-            # Write new file then delete old
-            with open(new_path, "w", encoding="utf-8") as f:
-                json.dump(data, f, indent=2)
-            old_path.unlink()
-
-        return True
-
-    def get_queued_entries(self) -> list[QueueEntry]:
-        """Get all entries with QUEUED status."""
-        if not self.queue_dir.exists():
-            return []
-
-        entries = []
-        for path in self.queue_dir.glob("*.queued.json"):
-            try:
-                with open(path, encoding="utf-8") as f:
-                    data = json.load(f)
-                entries.append(QueueEntry.from_dict(data))
-            except (json.JSONDecodeError, OSError, KeyError):
-                continue
-
-        return entries
-
-    def get_all_entries(self) -> list[QueueEntry]:
-        """Get all entries regardless of status."""
-        if not self.queue_dir.exists():
-            return []
-
-        entries = []
-        for path in self.queue_dir.glob("*.json"):
-            try:
-                with open(path, encoding="utf-8") as f:
-                    data = json.load(f)
-                entries.append(QueueEntry.from_dict(data))
-            except (json.JSONDecodeError, OSError, KeyError):
-                continue
-
-        return entries
-
-    def clear(self) -> int:
-        """
-        Clear all entries from the queue.
-
-        Returns:
-            Number of entries removed
-        """
-        if not self.queue_dir.exists():
-            return 0
-
-        count = 0
-        for path in self.queue_dir.glob("*.json"):
-            try:
-                path.unlink()
-                count += 1
-            except OSError:
-                continue
-
-        return count
-
-    def remove_entry(self, trigger_hash: str) -> bool:
-        """
-        Remove an entry by hash.
-
-        Returns:
-            True if removed, False if not found
-        """
-        path = self._find_entry_path(trigger_hash)
-        if path is None:
-            return False
-
-        try:
-            path.unlink()
-            return True
-        except OSError:
-            return False
diff --git a/src/deepwork/hooks/README.md b/src/deepwork/hooks/README.md
index 9c3dd887..262f1cf9 100644
--- a/src/deepwork/hooks/README.md
+++ b/src/deepwork/hooks/README.md
@@ -15,51 +15,8 @@ The hook system provides:
    - Output denormalization (decision values, JSON structure)
    - Cross-platform compatibility
 
-3. **Hook implementations**:
-   - `rules_check.py` - Evaluates DeepWork rules on `after_agent` events
-
 ## Usage
 
-### Registering Hooks
-
-#### Claude Code (`.claude/settings.json`)
-
-```json
-{
-  "hooks": {
-    "Stop": [
-      {
-        "hooks": [
-          {
-            "type": "command",
-            "command": "path/to/claude_hook.sh deepwork.hooks.rules_check"
-          }
-        ]
-      }
-    ]
-  }
-}
-```
-
-#### Gemini CLI (`.gemini/settings.json`)
-
-```json
-{
-  "hooks": {
-    "AfterAgent": [
-      {
-        "hooks": [
-          {
-            "type": "command",
-            "command": "path/to/gemini_hook.sh deepwork.hooks.rules_check"
-          }
-        ]
-      }
-    ]
-  }
-}
-```
-
 ### Writing Custom Hooks
 
 1. Create a new Python module in `deepwork/hooks/`:
@@ -178,4 +135,3 @@ pytest tests/shell_script_tests/test_hook_wrappers.py -v
 | `wrapper.py` | Cross-platform input/output normalization |
 | `claude_hook.sh` | Shell wrapper for Claude Code |
 | `gemini_hook.sh` | Shell wrapper for Gemini CLI |
-| `rules_check.py` | Cross-platform rule evaluation hook |
diff --git a/src/deepwork/hooks/rules_check.py b/src/deepwork/hooks/rules_check.py
deleted file mode 100644
index 6ac2d652..00000000
--- a/src/deepwork/hooks/rules_check.py
+++ /dev/null
@@ -1,759 +0,0 @@
-"""
-Rules check hook for DeepWork (v2).
-
-This hook evaluates rules when the agent finishes (after_agent event).
-It uses the wrapper system for cross-platform compatibility.
-
-Rule files are loaded from .deepwork/rules/ directory as frontmatter markdown files.
-
-Usage (via shell wrapper - recommended):
-    claude_hook.sh rules_check
-    gemini_hook.sh rules_check
-
-Or directly via deepwork CLI:
-    deepwork hook rules_check
-
-Or with platform environment variable:
-    DEEPWORK_HOOK_PLATFORM=claude deepwork hook rules_check
-"""
-
-from __future__ import annotations
-
-import json
-import os
-import re
-import subprocess
-import sys
-from pathlib import Path
-
-from deepwork.core.command_executor import (
-    all_commands_succeeded,
-    format_command_errors,
-    run_command_action,
-)
-from deepwork.core.rules_parser import (
-    ActionType,
-    DetectionMode,
-    Rule,
-    RuleEvaluationResult,
-    RulesParseError,
-    evaluate_rules,
-    load_rules_from_directory,
-)
-from deepwork.core.rules_queue import (
-    ActionResult,
-    QueueEntryStatus,
-    RulesQueue,
-    compute_trigger_hash,
-)
-from deepwork.hooks.wrapper import (
-    HookInput,
-    HookOutput,
-    NormalizedEvent,
-    Platform,
-    run_hook,
-)
-
-
-def get_default_branch() -> str:
-    """Get the default branch name (main or master)."""
-    try:
-        result = subprocess.run(
-            ["git", "symbolic-ref", "refs/remotes/origin/HEAD"],
-            capture_output=True,
-            text=True,
-            check=True,
-        )
-        return result.stdout.strip().split("/")[-1]
-    except subprocess.CalledProcessError:
-        pass
-
-    for branch in ["main", "master"]:
-        try:
-            subprocess.run(
-                ["git", "rev-parse", "--verify", f"origin/{branch}"],
-                capture_output=True,
-                check=True,
-            )
-            return branch
-        except subprocess.CalledProcessError:
-            continue
-
-    return "main"
-
-
-def get_baseline_ref(mode: str) -> str:
-    """Get the baseline reference for a compare_to mode."""
-    if mode == "base":
-        try:
-            default_branch = get_default_branch()
-            result = subprocess.run(
-                ["git", "merge-base", "HEAD", f"origin/{default_branch}"],
-                capture_output=True,
-                text=True,
-                check=True,
-            )
-            return result.stdout.strip()
-        except subprocess.CalledProcessError:
-            return "base"
-    elif mode == "default_tip":
-        try:
-            default_branch = get_default_branch()
-            result = subprocess.run(
-                ["git", "rev-parse", f"origin/{default_branch}"],
-                capture_output=True,
-                text=True,
-                check=True,
-            )
-            return result.stdout.strip()
-        except subprocess.CalledProcessError:
-            return "default_tip"
-    elif mode == "prompt":
-        baseline_path = Path(".deepwork/.last_work_tree")
-        if baseline_path.exists():
-            # Use file modification time as reference
-            return str(int(baseline_path.stat().st_mtime))
-        return "prompt"
-    return mode
-
-
-def get_changed_files_base() -> list[str]:
-    """Get files changed relative to branch base."""
-    default_branch = get_default_branch()
-
-    try:
-        result = subprocess.run(
-            ["git", "merge-base", "HEAD", f"origin/{default_branch}"],
-            capture_output=True,
-            text=True,
-            check=True,
-        )
-        merge_base = result.stdout.strip()
-
-        subprocess.run(["git", "add", "-A"], capture_output=True, check=False)
-
-        result = subprocess.run(
-            ["git", "diff", "--name-only", merge_base, "HEAD"],
-            capture_output=True,
-            text=True,
-            check=True,
-        )
-        committed_files = set(result.stdout.strip().split("\n")) if result.stdout.strip() else set()
-
-        result = subprocess.run(
-            ["git", "diff", "--name-only", "--cached"],
-            capture_output=True,
-            text=True,
-            check=False,
-        )
-        staged_files = set(result.stdout.strip().split("\n")) if result.stdout.strip() else set()
-
-        result = subprocess.run(
-            ["git", "ls-files", "--others", "--exclude-standard"],
-            capture_output=True,
-            text=True,
-            check=False,
-        )
-        untracked_files = set(result.stdout.strip().split("\n")) if result.stdout.strip() else set()
-
-        all_files = committed_files | staged_files | untracked_files
-        return sorted([f for f in all_files if f])
-
-    except subprocess.CalledProcessError:
-        return []
-
-
-def get_changed_files_default_tip() -> list[str]:
-    """Get files changed compared to default branch tip."""
-    default_branch = get_default_branch()
-
-    try:
-        subprocess.run(["git", "add", "-A"], capture_output=True, check=False)
-
-        result = subprocess.run(
-            ["git", "diff", "--name-only", f"origin/{default_branch}..HEAD"],
-            capture_output=True,
-            text=True,
-            check=True,
-        )
-        committed_files = set(result.stdout.strip().split("\n")) if result.stdout.strip() else set()
-
-        result = subprocess.run(
-            ["git", "diff", "--name-only", "--cached"],
-            capture_output=True,
-            text=True,
-            check=False,
-        )
-        staged_files = set(result.stdout.strip().split("\n")) if result.stdout.strip() else set()
-
-        result = subprocess.run(
-            ["git", "ls-files", "--others", "--exclude-standard"],
-            capture_output=True,
-            text=True,
-            check=False,
-        )
-        untracked_files = set(result.stdout.strip().split("\n")) if result.stdout.strip() else set()
-
-        all_files = committed_files | staged_files | untracked_files
-        return sorted([f for f in all_files if f])
-
-    except subprocess.CalledProcessError:
-        return []
-
-
-def get_changed_files_prompt() -> list[str]:
-    """Get files changed since prompt was submitted.
-
-    Returns files that changed since the prompt was submitted, including:
-    - Committed changes (compared to captured HEAD ref)
-    - Staged changes (not yet committed)
-    - Untracked files
-
-    This is used by trigger/safety, set, and pair mode rules to detect
-    file modifications during the agent response.
-    """
-    baseline_ref_path = Path(".deepwork/.last_head_ref")
-    changed_files: set[str] = set()
-
-    try:
-        # Stage all changes first
-        subprocess.run(["git", "add", "-A"], capture_output=True, check=False)
-
-        # If we have a captured HEAD ref, compare committed changes against it
-        if baseline_ref_path.exists():
-            baseline_ref = baseline_ref_path.read_text().strip()
-            if baseline_ref:
-                # Get files changed in commits since the baseline
-                result = subprocess.run(
-                    ["git", "diff", "--name-only", baseline_ref, "HEAD"],
-                    capture_output=True,
-                    text=True,
-                    check=False,
-                )
-                if result.returncode == 0 and result.stdout.strip():
-                    committed_files = set(result.stdout.strip().split("\n"))
-                    changed_files.update(f for f in committed_files if f)
-
-        # Also get currently staged changes (in case not everything is committed)
-        result = subprocess.run(
-            ["git", "diff", "--name-only", "--cached"],
-            capture_output=True,
-            text=True,
-            check=False,
-        )
-        if result.stdout.strip():
-            staged_files = set(result.stdout.strip().split("\n"))
-            changed_files.update(f for f in staged_files if f)
-
-        # Include untracked files
-        result = subprocess.run(
-            ["git", "ls-files", "--others", "--exclude-standard"],
-            capture_output=True,
-            text=True,
-            check=False,
-        )
-        if result.stdout.strip():
-            untracked_files = set(result.stdout.strip().split("\n"))
-            changed_files.update(f for f in untracked_files if f)
-
-        return sorted(changed_files)
-
-    except (subprocess.CalledProcessError, OSError):
-        return []
-
-
-def get_changed_files_for_mode(mode: str) -> list[str]:
-    """Get changed files for a specific compare_to mode."""
-    if mode == "base":
-        return get_changed_files_base()
-    elif mode == "default_tip":
-        return get_changed_files_default_tip()
-    elif mode == "prompt":
-        return get_changed_files_prompt()
-    else:
-        return get_changed_files_base()
-
-
-def get_created_files_base() -> list[str]:
-    """Get files created (added) relative to branch base."""
-    default_branch = get_default_branch()
-
-    try:
-        result = subprocess.run(
-            ["git", "merge-base", "HEAD", f"origin/{default_branch}"],
-            capture_output=True,
-            text=True,
-            check=True,
-        )
-        merge_base = result.stdout.strip()
-
-        subprocess.run(["git", "add", "-A"], capture_output=True, check=False)
-
-        # Get only added files (not modified) using --diff-filter=A
-        result = subprocess.run(
-            ["git", "diff", "--name-only", "--diff-filter=A", merge_base, "HEAD"],
-            capture_output=True,
-            text=True,
-            check=True,
-        )
-        committed_added = set(result.stdout.strip().split("\n")) if result.stdout.strip() else set()
-
-        # Staged new files that don't exist in merge_base
-        result = subprocess.run(
-            ["git", "diff", "--name-only", "--diff-filter=A", "--cached", merge_base],
-            capture_output=True,
-            text=True,
-            check=False,
-        )
-        staged_added = set(result.stdout.strip().split("\n")) if result.stdout.strip() else set()
-
-        # Untracked files are by definition "created"
-        result = subprocess.run(
-            ["git", "ls-files", "--others", "--exclude-standard"],
-            capture_output=True,
-            text=True,
-            check=False,
-        )
-        untracked_files = set(result.stdout.strip().split("\n")) if result.stdout.strip() else set()
-
-        all_created = committed_added | staged_added | untracked_files
-        return sorted([f for f in all_created if f])
-
-    except subprocess.CalledProcessError:
-        return []
-
-
-def get_created_files_default_tip() -> list[str]:
-    """Get files created compared to default branch tip."""
-    default_branch = get_default_branch()
-
-    try:
-        subprocess.run(["git", "add", "-A"], capture_output=True, check=False)
-
-        # Get only added files using --diff-filter=A
-        result = subprocess.run(
-            ["git", "diff", "--name-only", "--diff-filter=A", f"origin/{default_branch}..HEAD"],
-            capture_output=True,
-            text=True,
-            check=True,
-        )
-        committed_added = set(result.stdout.strip().split("\n")) if result.stdout.strip() else set()
-
-        result = subprocess.run(
-            [
-                "git",
-                "diff",
-                "--name-only",
-                "--diff-filter=A",
-                "--cached",
-                f"origin/{default_branch}",
-            ],
-            capture_output=True,
-            text=True,
-            check=False,
-        )
-        staged_added = set(result.stdout.strip().split("\n")) if result.stdout.strip() else set()
-
-        # Untracked files are by definition "created"
-        result = subprocess.run(
-            ["git", "ls-files", "--others", "--exclude-standard"],
-            capture_output=True,
-            text=True,
-            check=False,
-        )
-        untracked_files = set(result.stdout.strip().split("\n")) if result.stdout.strip() else set()
-
-        all_created = committed_added | staged_added | untracked_files
-        return sorted([f for f in all_created if f])
-
-    except subprocess.CalledProcessError:
-        return []
-
-
-def get_created_files_prompt() -> list[str]:
-    """Get files created since prompt was submitted."""
-    baseline_path = Path(".deepwork/.last_work_tree")
-
-    try:
-        subprocess.run(["git", "add", "-A"], capture_output=True, check=False)
-
-        result = subprocess.run(
-            ["git", "diff", "--name-only", "--cached"],
-            capture_output=True,
-            text=True,
-            check=False,
-        )
-        current_files = set(result.stdout.strip().split("\n")) if result.stdout.strip() else set()
-        current_files = {f for f in current_files if f}
-
-        # Untracked files
-        result = subprocess.run(
-            ["git", "ls-files", "--others", "--exclude-standard"],
-            capture_output=True,
-            text=True,
-            check=False,
-        )
-        untracked_files = set(result.stdout.strip().split("\n")) if result.stdout.strip() else set()
-        untracked_files = {f for f in untracked_files if f}
-
-        all_current = current_files | untracked_files
-
-        if baseline_path.exists():
-            baseline_files = set(baseline_path.read_text().strip().split("\n"))
-            baseline_files = {f for f in baseline_files if f}
-            # Created files are those that didn't exist at baseline
-            created_files = all_current - baseline_files
-            return sorted(created_files)
-        else:
-            # No baseline means all current files are "new" to this prompt
-            return sorted(all_current)
-
-    except (subprocess.CalledProcessError, OSError):
-        return []
-
-
-def get_created_files_for_mode(mode: str) -> list[str]:
-    """Get created files for a specific compare_to mode."""
-    if mode == "base":
-        return get_created_files_base()
-    elif mode == "default_tip":
-        return get_created_files_default_tip()
-    elif mode == "prompt":
-        return get_created_files_prompt()
-    else:
-        return get_created_files_base()
-
-
-def extract_promise_tags(text: str) -> set[str]:
-    """
-    Extract rule names from <promise> tags in text.
-
-    Supports both:
-    - <promise>Rule Name</promise>
-    - <promise>✓ Rule Name</promise>
-    """
-    # Match with optional checkmark prefix (✓ or ✓ with space)
-    pattern = r"<promise>(?:\s*)?(?:✓\s*)?([^<]+)</promise>"
-    matches = re.findall(pattern, text, re.IGNORECASE | re.DOTALL)
-    return {m.strip() for m in matches}
-
-
-def extract_conversation_from_transcript(transcript_path: str, platform: Platform) -> str:
-    """
-    Extract conversation text from a transcript file.
-
-    Handles platform-specific transcript formats.
-    """
-    if not transcript_path or not Path(transcript_path).exists():
-        return ""
-
-    try:
-        content = Path(transcript_path).read_text()
-
-        if platform == Platform.CLAUDE:
-            # Claude uses JSONL format - each line is a JSON object
-            conversation_parts = []
-            for line in content.strip().split("\n"):
-                if not line.strip():
-                    continue
-                try:
-                    entry = json.loads(line)
-                    if entry.get("role") == "assistant":
-                        message_content = entry.get("message", {}).get("content", [])
-                        for part in message_content:
-                            if part.get("type") == "text":
-                                conversation_parts.append(part.get("text", ""))
-                except json.JSONDecodeError:
-                    continue
-            return "\n".join(conversation_parts)
-
-        elif platform == Platform.GEMINI:
-            # Gemini uses JSON format
-            try:
-                data = json.loads(content)
-                # Extract text from messages
-                conversation_parts = []
-                messages = data.get("messages", [])
-                for msg in messages:
-                    if msg.get("role") == "model":
-                        parts = msg.get("parts", [])
-                        for part in parts:
-                            if isinstance(part, dict) and "text" in part:
-                                conversation_parts.append(part["text"])
-                            elif isinstance(part, str):
-                                conversation_parts.append(part)
-                return "\n".join(conversation_parts)
-            except json.JSONDecodeError:
-                return ""
-
-        return ""
-    except Exception:
-        return ""
-
-
-def format_rules_message(results: list[RuleEvaluationResult]) -> str:
-    """
-    Format triggered rules into a concise message for the agent.
-
-    Groups rules by name and uses minimal formatting.
-    """
-    lines = ["## DeepWork Rules Triggered", ""]
-    lines.append(
-        "Comply with the following rules. "
-        "To mark a rule as addressed, include `<promise>Rule Name</promise>` "
-        "in your response."
-    )
-    lines.append("")
-
-    # Group results by rule name
-    by_name: dict[str, list[RuleEvaluationResult]] = {}
-    for result in results:
-        name = result.rule.name
-        if name not in by_name:
-            by_name[name] = []
-        by_name[name].append(result)
-
-    for name, rule_results in by_name.items():
-        rule = rule_results[0].rule
-        lines.append(f"## {name}")
-        lines.append("")
-
-        # For set/pair modes, show the correspondence violations concisely
-        if rule.detection_mode in (DetectionMode.SET, DetectionMode.PAIR):
-            for result in rule_results:
-                for trigger_file in result.trigger_files:
-                    for missing_file in result.missing_files:
-                        lines.append(f"{trigger_file} -> {missing_file}")
-            lines.append("")
-
-        # Show instructions
-        if rule.instructions:
-            lines.append(rule.instructions.strip())
-            lines.append("")
-
-    return "\n".join(lines)
-
-
-def rules_check_hook(hook_input: HookInput) -> HookOutput:
-    """
-    Main hook logic for rules evaluation (v2).
-
-    This is called for after_agent events to check if rules need attention
-    before allowing the agent to complete.
-    """
-    # Only process after_agent events
-    if hook_input.event != NormalizedEvent.AFTER_AGENT:
-        return HookOutput()
-
-    # Check if rules directory exists
-    rules_dir = Path(".deepwork/rules")
-    if not rules_dir.exists():
-        return HookOutput()
-
-    # Extract conversation context from transcript
-    conversation_context = extract_conversation_from_transcript(
-        hook_input.transcript_path, hook_input.platform
-    )
-
-    # Extract promise tags (case-insensitive)
-    promised_rules = extract_promise_tags(conversation_context)
-
-    # Load rules
-    try:
-        rules = load_rules_from_directory(rules_dir)
-    except RulesParseError as e:
-        print(f"Error loading rules: {e}", file=sys.stderr)
-        return HookOutput()
-
-    if not rules:
-        return HookOutput()
-
-    # Initialize queue
-    queue = RulesQueue()
-
-    # Group rules by compare_to mode
-    rules_by_mode: dict[str, list[Rule]] = {}
-    for rule in rules:
-        mode = rule.compare_to
-        if mode not in rules_by_mode:
-            rules_by_mode[mode] = []
-        rules_by_mode[mode].append(rule)
-
-    # Evaluate rules and collect results
-    prompt_results: list[RuleEvaluationResult] = []
-    command_errors: list[str] = []
-
-    for mode, mode_rules in rules_by_mode.items():
-        changed_files = get_changed_files_for_mode(mode)
-        created_files = get_created_files_for_mode(mode)
-
-        # Skip if no changed or created files
-        if not changed_files and not created_files:
-            continue
-
-        baseline_ref = get_baseline_ref(mode)
-
-        # Evaluate which rules fire
-        results = evaluate_rules(mode_rules, changed_files, promised_rules, created_files)
-
-        for result in results:
-            rule = result.rule
-
-            # Compute trigger hash for queue deduplication
-            trigger_hash = compute_trigger_hash(
-                rule.name,
-                result.trigger_files,
-                baseline_ref,
-            )
-
-            # Check if already in queue (passed/skipped)
-            existing = queue.get_entry(trigger_hash)
-            if existing and existing.status in (
-                QueueEntryStatus.PASSED,
-                QueueEntryStatus.SKIPPED,
-            ):
-                continue
-
-            # For PROMPT rules, also skip if already QUEUED (already shown to agent).
-            # This prevents infinite loops when transcript is unavailable or promise
-            # tags haven't been written yet. The agent has already seen this rule.
-            if (
-                existing
-                and existing.status == QueueEntryStatus.QUEUED
-                and rule.action_type == ActionType.PROMPT
-            ):
-                continue
-
-            # For COMMAND rules with FAILED status, don't re-run the command.
-            # The agent has already seen the error. If they provide a promise,
-            # the after-loop logic will update the status to SKIPPED.
-            if (
-                existing
-                and existing.status == QueueEntryStatus.FAILED
-                and rule.action_type == ActionType.COMMAND
-            ):
-                continue
-
-            # Create queue entry if new
-            if not existing:
-                queue.create_entry(
-                    rule_name=rule.name,
-                    rule_file=f"{rule.filename}.md",
-                    trigger_files=result.trigger_files,
-                    baseline_ref=baseline_ref,
-                    expected_files=result.missing_files,
-                )
-
-            # Handle based on action type
-            if rule.action_type == ActionType.COMMAND:
-                # Run command action
-                if rule.command_action:
-                    repo_root = Path.cwd()
-                    cmd_results = run_command_action(
-                        rule.command_action,
-                        result.trigger_files,
-                        repo_root,
-                    )
-
-                    if all_commands_succeeded(cmd_results):
-                        # Command succeeded, mark as passed
-                        queue.update_status(
-                            trigger_hash,
-                            QueueEntryStatus.PASSED,
-                            ActionResult(
-                                type="command",
-                                output=cmd_results[0].stdout if cmd_results else None,
-                                exit_code=0,
-                            ),
-                        )
-                    else:
-                        # Command failed - format detailed error message
-                        error_msg = format_command_errors(cmd_results, rule_name=rule.name)
-                        skip_hint = f"\nTo skip, include `<promise>✓ {rule.name}</promise>` in your response."
-                        command_errors.append(f"{error_msg}{skip_hint}")
-                        queue.update_status(
-                            trigger_hash,
-                            QueueEntryStatus.FAILED,
-                            ActionResult(
-                                type="command",
-                                output=error_msg,
-                                exit_code=cmd_results[0].exit_code if cmd_results else -1,
-                            ),
-                        )
-
-            elif rule.action_type == ActionType.PROMPT:
-                # Collect for prompt output
-                prompt_results.append(result)
-
-    # Handle FAILED queue entries that have been promised
-    # (These rules weren't in results because evaluate_rules skips promised rules,
-    # but we need to update their queue status to SKIPPED)
-    if promised_rules:
-        promised_lower = {name.lower() for name in promised_rules}
-        for entry in queue.get_all_entries():
-            if (
-                entry.status == QueueEntryStatus.FAILED
-                and entry.rule_name.lower() in promised_lower
-            ):
-                queue.update_status(
-                    entry.trigger_hash,
-                    QueueEntryStatus.SKIPPED,
-                    ActionResult(
-                        type="command",
-                        output="Acknowledged via promise tag",
-                        exit_code=None,
-                    ),
-                )
-
-    # Build response
-    messages: list[str] = []
-
-    # Add command errors if any
-    if command_errors:
-        messages.append("## Command Rule Errors\n")
-        messages.append("The following command rules failed.\n")
-        messages.extend(command_errors)
-        messages.append("")
-
-    # Add prompt rules if any
-    if prompt_results:
-        messages.append(format_rules_message(prompt_results))
-
-    if messages:
-        return HookOutput(decision="block", reason="\n".join(messages))
-
-    return HookOutput()
-
-
-def main() -> None:
-    """Entry point for the rules check hook."""
-    platform_str = os.environ.get("DEEPWORK_HOOK_PLATFORM", "claude")
-    try:
-        platform = Platform(platform_str)
-    except ValueError:
-        platform = Platform.CLAUDE
-
-    exit_code = run_hook(rules_check_hook, platform)
-    sys.exit(exit_code)
-
-
-if __name__ == "__main__":
-    # Wrap entry point to catch early failures (e.g., import errors in wrapper.py)
-    try:
-        main()
-    except Exception as e:
-        # Last resort error handling - output JSON manually since wrapper may be broken
-        import json
-        import traceback
-
-        error_output = {
-            "decision": "block",
-            "reason": (
-                "## Hook Script Error\n\n"
-                f"Error type: {type(e).__name__}\n"
-                f"Error: {e}\n\n"
-                f"Traceback:\n```\n{traceback.format_exc()}\n```"
-            ),
-        }
-        print(json.dumps(error_output))
-        sys.exit(0)
diff --git a/src/deepwork/schemas/rules_schema.py b/src/deepwork/schemas/rules_schema.py
deleted file mode 100644
index bf091ab9..00000000
--- a/src/deepwork/schemas/rules_schema.py
+++ /dev/null
@@ -1,135 +0,0 @@
-"""JSON Schema definition for rule definitions (v2 - frontmatter format)."""
-
-from typing import Any
-
-# Pattern for string or array of strings
-STRING_OR_ARRAY: dict[str, Any] = {
-    "oneOf": [
-        {"type": "string", "minLength": 1},
-        {"type": "array", "items": {"type": "string", "minLength": 1}, "minItems": 1},
-    ]
-}
-
-# JSON Schema for rule frontmatter (YAML between --- delimiters)
-# Rules are stored as individual .md files in .deepwork/rules/
-RULES_FRONTMATTER_SCHEMA: dict[str, Any] = {
-    "$schema": "http://json-schema.org/draft-07/schema#",
-    "type": "object",
-    "required": ["name", "compare_to"],
-    "properties": {
-        "name": {
-            "type": "string",
-            "minLength": 1,
-            "description": "Human-friendly name for the rule (displayed in promise tags)",
-        },
-        # Detection mode: trigger/safety (mutually exclusive with set/pair)
-        "trigger": {
-            **STRING_OR_ARRAY,
-            "description": "Glob pattern(s) for files that trigger this rule",
-        },
-        "safety": {
-            **STRING_OR_ARRAY,
-            "description": "Glob pattern(s) that suppress the rule if changed",
-        },
-        # Detection mode: set (bidirectional correspondence)
-        "set": {
-            "type": "array",
-            "items": {"type": "string", "minLength": 1},
-            "minItems": 2,
-            "description": "Patterns defining bidirectional file correspondence",
-        },
-        # Detection mode: pair (directional correspondence)
-        "pair": {
-            "type": "object",
-            "required": ["trigger", "expects"],
-            "properties": {
-                "trigger": {
-                    "type": "string",
-                    "minLength": 1,
-                    "description": "Pattern that triggers the rule",
-                },
-                "expects": {
-                    **STRING_OR_ARRAY,
-                    "description": "Pattern(s) for expected corresponding files",
-                },
-            },
-            "additionalProperties": False,
-            "description": "Directional file correspondence (trigger -> expects)",
-        },
-        # Detection mode: created (fire when files are created matching patterns)
-        "created": {
-            **STRING_OR_ARRAY,
-            "description": "Glob pattern(s) for newly created files that trigger this rule",
-        },
-        # Action type: command (default is prompt using markdown body)
-        "action": {
-            "type": "object",
-            "required": ["command"],
-            "properties": {
-                "command": {
-                    "type": "string",
-                    "minLength": 1,
-                    "description": "Command to run (supports {file}, {files}, {repo_root})",
-                },
-                "run_for": {
-                    "type": "string",
-                    "enum": ["each_match", "all_matches"],
-                    "default": "each_match",
-                    "description": "Run command for each file or all files at once",
-                },
-            },
-            "additionalProperties": False,
-            "description": "Command action to run instead of prompting",
-        },
-        # Common options
-        "compare_to": {
-            "type": "string",
-            "enum": ["base", "default_tip", "prompt"],
-            "description": "Baseline for detecting file changes",
-        },
-    },
-    "additionalProperties": False,
-    # Detection mode must be exactly one of: trigger, set, pair, or created
-    "oneOf": [
-        {
-            "required": ["trigger"],
-            "not": {
-                "anyOf": [
-                    {"required": ["set"]},
-                    {"required": ["pair"]},
-                    {"required": ["created"]},
-                ]
-            },
-        },
-        {
-            "required": ["set"],
-            "not": {
-                "anyOf": [
-                    {"required": ["trigger"]},
-                    {"required": ["pair"]},
-                    {"required": ["created"]},
-                ]
-            },
-        },
-        {
-            "required": ["pair"],
-            "not": {
-                "anyOf": [
-                    {"required": ["trigger"]},
-                    {"required": ["set"]},
-                    {"required": ["created"]},
-                ]
-            },
-        },
-        {
-            "required": ["created"],
-            "not": {
-                "anyOf": [
-                    {"required": ["trigger"]},
-                    {"required": ["set"]},
-                    {"required": ["pair"]},
-                ]
-            },
-        },
-    ],
-}
diff --git a/src/deepwork/standard_jobs/deepwork_jobs/steps/implement.md b/src/deepwork/standard_jobs/deepwork_jobs/steps/implement.md
index 749c8c6f..2382a1ad 100644
--- a/src/deepwork/standard_jobs/deepwork_jobs/steps/implement.md
+++ b/src/deepwork/standard_jobs/deepwork_jobs/steps/implement.md
@@ -126,66 +126,6 @@ This will:
 - Generate skills for each step
 - Make the skills available in `.claude/skills/` (or appropriate platform directory)
 
-### Step 6: Consider Rules for the New Job
-
-After implementing the job, consider whether there are **rules** that would help enforce quality or consistency when working with this job's domain.
-
-**What are rules?**
-
-Rules are automated guardrails stored as markdown files in `.deepwork/rules/` that trigger when certain files change during an AI session. They help ensure:
-- Documentation stays in sync with code
-- Team guidelines are followed
-- Architectural decisions are respected
-- Quality standards are maintained
-
-**When to suggest rules:**
-
-Think about the job you just implemented and ask:
-- Does this job produce outputs that other files depend on?
-- Are there documentation files that should be updated when this job's outputs change?
-- Are there quality checks or reviews that should happen when certain files in this domain change?
-- Could changes to the job's output files impact other parts of the project?
-
-**Examples of rules that might make sense:**
-
-| Job Type | Potential Rule |
-|----------|----------------|
-| API Design | "Update API docs when endpoint definitions change" |
-| Database Schema | "Review migrations when schema files change" |
-| Competitive Research | "Update strategy docs when competitor analysis changes" |
-| Feature Development | "Update changelog when feature files change" |
-| Configuration Management | "Update install guide when config files change" |
-
-**How to offer rule creation:**
-
-If you identify one or more rules that would benefit the user, explain:
-1. **What the rule would do** - What triggers it and what action it prompts
-2. **Why it would help** - How it prevents common mistakes or keeps things in sync
-3. **What files it would watch** - The trigger patterns
-
-Then ask the user:
-
-> "Would you like me to create this rule for you? I can run `/deepwork_rules.define` to set it up."
-
-If the user agrees, invoke the `/deepwork_rules.define` command to guide them through creating the rule.
-
-**Example dialogue:**
-
-```
-Based on the competitive_research job you just created, I noticed that when
-competitor analysis files change, it would be helpful to remind you to update
-your strategy documentation.
-
-I'd suggest a rule like:
-- **Name**: "Update strategy when competitor analysis changes"
-- **Trigger**: `**/positioning_report.md`
-- **Action**: Prompt to review and update `docs/strategy.md`
-
-Would you like me to create this rule? I can run `/deepwork_rules.define` to set it up.
-```
-
-**Note:** Not every job needs rules. Only suggest them when they would genuinely help maintain consistency or quality. Don't force rules where they don't make sense.
-
 ## Example Implementation
 
 For a complete worked example showing a job.yml and corresponding step instruction file, see:
@@ -217,8 +157,6 @@ Before marking this step complete, ensure:
 - [ ] Each instruction file is complete and actionable
 - [ ] `deepwork sync` executed successfully
 - [ ] Skills generated in platform directory
-- [ ] Considered whether rules would benefit this job (Step 6)
-- [ ] If rules suggested, offered to run `/deepwork_rules.define`
 
 ## Quality Criteria
 
@@ -230,4 +168,3 @@ Before marking this step complete, ensure:
 - Steps with user inputs explicitly use "ask structured questions" phrasing
 - Sync completed successfully
 - Skills available for use
-- Thoughtfully considered relevant rules for the job domain
diff --git a/src/deepwork/standard_jobs/deepwork_rules/hooks/capture_prompt_work_tree.sh b/src/deepwork/standard_jobs/deepwork_rules/hooks/capture_prompt_work_tree.sh
deleted file mode 100755
index c9cedd82..00000000
--- a/src/deepwork/standard_jobs/deepwork_rules/hooks/capture_prompt_work_tree.sh
+++ /dev/null
@@ -1,38 +0,0 @@
-#!/bin/bash
-# capture_prompt_work_tree.sh - Captures the git work tree state at prompt submission
-#
-# This script creates a snapshot of ALL tracked files at the time the prompt
-# is submitted. This baseline is used for rules with compare_to: prompt and
-# created: mode to detect truly NEW files (not modifications to existing ones).
-#
-# The baseline contains ALL tracked files (not just changed files) so that
-# the rules_check hook can determine which files are genuinely new vs which
-# files existed before and were just modified.
-#
-# It also captures the HEAD commit ref so that committed changes can be detected
-# by comparing HEAD at Stop time to the captured ref.
-
-set -e
-
-# Ensure .deepwork directory exists
-mkdir -p .deepwork
-
-# Save the current HEAD commit ref for detecting committed changes
-# This is used by get_changed_files_prompt() to detect files changed since prompt,
-# even if those changes were committed during the agent response.
-git rev-parse HEAD > .deepwork/.last_head_ref 2>/dev/null || echo "" > .deepwork/.last_head_ref
-
-# Save ALL tracked files (not just changed files)
-# This is critical for created: mode rules to distinguish between:
-# - Newly created files (not in baseline) -> should trigger created: rules
-# - Modified existing files (in baseline) -> should NOT trigger created: rules
-git ls-files > .deepwork/.last_work_tree 2>/dev/null || true
-
-# Also include untracked files that exist at prompt time
-# These are files the user may have created before submitting the prompt
-git ls-files --others --exclude-standard >> .deepwork/.last_work_tree 2>/dev/null || true
-
-# Sort and deduplicate
-if [ -f .deepwork/.last_work_tree ]; then
-    sort -u .deepwork/.last_work_tree -o .deepwork/.last_work_tree
-fi
diff --git a/src/deepwork/standard_jobs/deepwork_rules/hooks/global_hooks.yml b/src/deepwork/standard_jobs/deepwork_rules/hooks/global_hooks.yml
deleted file mode 100644
index a310d31a..00000000
--- a/src/deepwork/standard_jobs/deepwork_rules/hooks/global_hooks.yml
+++ /dev/null
@@ -1,8 +0,0 @@
-# DeepWork Rules Hooks Configuration
-# Maps lifecycle events to hook scripts or Python modules
-
-UserPromptSubmit:
-  - user_prompt_submit.sh
-
-Stop:
-  - module: deepwork.hooks.rules_check
diff --git a/src/deepwork/standard_jobs/deepwork_rules/hooks/user_prompt_submit.sh b/src/deepwork/standard_jobs/deepwork_rules/hooks/user_prompt_submit.sh
deleted file mode 100755
index 486ad836..00000000
--- a/src/deepwork/standard_jobs/deepwork_rules/hooks/user_prompt_submit.sh
+++ /dev/null
@@ -1,16 +0,0 @@
-#!/bin/bash
-# user_prompt_submit.sh - Runs on every user prompt submission
-#
-# This script captures the work tree state at each prompt submission.
-# This baseline is used for policies with compare_to: prompt to detect
-# what changed during an agent response.
-
-set -e
-
-SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
-
-# Capture work tree state at each prompt for compare_to: prompt policies
-"${SCRIPT_DIR}/capture_prompt_work_tree.sh"
-
-# Exit successfully - don't block the prompt
-exit 0
diff --git a/src/deepwork/standard_jobs/deepwork_rules/job.yml b/src/deepwork/standard_jobs/deepwork_rules/job.yml
deleted file mode 100644
index a0032b9e..00000000
--- a/src/deepwork/standard_jobs/deepwork_rules/job.yml
+++ /dev/null
@@ -1,49 +0,0 @@
-name: deepwork_rules
-version: "0.4.0"
-summary: "Creates file-change rules that enforce guidelines during AI sessions. Use when automating documentation sync or code review triggers."
-description: |
-  Manages rules that automatically trigger when certain files change during an AI agent session.
-  Rules help ensure that code changes follow team guidelines, documentation is updated,
-  and architectural decisions are respected.
-
-  IMPORTANT: Rules are evaluated at the "Stop" hook, which fires when an agent finishes its turn.
-  This includes when sub-agents complete their work. Rules are NOT evaluated immediately after
-  each file edit - they batch up and run once at the end of the agent's response cycle.
-  - Command action rules: Execute their command (e.g., `uv sync`) when the agent stops
-  - Prompt action rules: Display instructions to the agent, blocking until addressed
-
-  Rules are stored as individual markdown files with YAML frontmatter in the `.deepwork/rules/`
-  directory. Each rule file specifies:
-  - Detection mode: trigger/safety, set (bidirectional), or pair (directional)
-  - Patterns: Glob patterns for matching files, with optional variable capture
-  - Action type: prompt (default) to show instructions, or command to run a shell command
-  - Instructions: Markdown content describing what the agent should do
-
-  Example use cases:
-  - Update installation docs when configuration files change
-  - Require security review when authentication code is modified
-  - Ensure API documentation stays in sync with API code
-  - Enforce source/test file pairing
-  - Auto-run `uv sync` when pyproject.toml changes (command action)
-
-changelog:
-  - version: "0.1.0"
-    changes: "Initial version"
-  - version: "0.2.0"
-    changes: "Standardized on 'ask structured questions' phrasing for user input"
-  - version: "0.3.0"
-    changes: "Migrated to v2 format - individual markdown files in .deepwork/rules/"
-  - version: "0.4.0"
-    changes: "Improved skill descriptions with third-person voice and 'Use when...' triggers for better discoverability"
-
-steps:
-  - id: define
-    name: "Define Rule"
-    description: "Creates a rule file that triggers when specified files change. Use when setting up documentation sync, code review requirements, or automated commands."
-    instructions_file: steps/define.md
-    inputs:
-      - name: rule_purpose
-        description: "What guideline or constraint should this rule enforce?"
-    outputs:
-      - .deepwork/rules/{rule-name}.md
-    dependencies: []
diff --git a/src/deepwork/standard_jobs/deepwork_rules/rules/.gitkeep b/src/deepwork/standard_jobs/deepwork_rules/rules/.gitkeep
deleted file mode 100644
index 429162b4..00000000
--- a/src/deepwork/standard_jobs/deepwork_rules/rules/.gitkeep
+++ /dev/null
@@ -1,13 +0,0 @@
-# This directory contains example rule templates.
-# Copy and customize these files to create your own rules.
-#
-# Rule files use YAML frontmatter in markdown format:
-#
-# ---
-# name: Rule Name
-# trigger: "pattern/**/*"
-# safety: "optional/pattern"
-# ---
-# Instructions in markdown here.
-#
-# See doc/rules_syntax.md for full documentation.
diff --git a/src/deepwork/standard_jobs/deepwork_rules/rules/api-documentation-sync.md.example b/src/deepwork/standard_jobs/deepwork_rules/rules/api-documentation-sync.md.example
deleted file mode 100644
index 427da7ae..00000000
--- a/src/deepwork/standard_jobs/deepwork_rules/rules/api-documentation-sync.md.example
+++ /dev/null
@@ -1,10 +0,0 @@
----
-name: API Documentation Sync
-trigger: src/api/**/*
-safety: docs/api/**/*.md
----
-API code has changed. Please verify that API documentation is up to date:
-
-- New or changed endpoints
-- Modified request/response schemas
-- Updated authentication requirements
diff --git a/src/deepwork/standard_jobs/deepwork_rules/rules/readme-documentation.md.example b/src/deepwork/standard_jobs/deepwork_rules/rules/readme-documentation.md.example
deleted file mode 100644
index 6be90c83..00000000
--- a/src/deepwork/standard_jobs/deepwork_rules/rules/readme-documentation.md.example
+++ /dev/null
@@ -1,10 +0,0 @@
----
-name: README Documentation
-trigger: src/**/*
-safety: README.md
----
-Source code has been modified. Please review README.md for accuracy:
-
-1. Verify the project overview reflects current functionality
-2. Check that usage examples are still correct
-3. Ensure installation/setup instructions remain valid
diff --git a/src/deepwork/standard_jobs/deepwork_rules/rules/security-review.md.example b/src/deepwork/standard_jobs/deepwork_rules/rules/security-review.md.example
deleted file mode 100644
index abce3194..00000000
--- a/src/deepwork/standard_jobs/deepwork_rules/rules/security-review.md.example
+++ /dev/null
@@ -1,11 +0,0 @@
----
-name: Security Review for Auth Changes
-trigger:
-  - src/auth/**/*
-  - src/security/**/*
----
-Authentication or security code has been changed. Please:
-
-1. Review for hardcoded credentials or secrets
-2. Check input validation on user inputs
-3. Verify access control logic is correct
diff --git a/src/deepwork/standard_jobs/deepwork_rules/rules/skill-md-validation.md b/src/deepwork/standard_jobs/deepwork_rules/rules/skill-md-validation.md
deleted file mode 100644
index 38f90c51..00000000
--- a/src/deepwork/standard_jobs/deepwork_rules/rules/skill-md-validation.md
+++ /dev/null
@@ -1,46 +0,0 @@
----
-name: SKILL.md Validation
-trigger: "**/SKILL.md"
-compare_to: base
----
-A SKILL.md file has been created or modified. Please validate that it follows the required format:
-
-## Required Structure
-
-The file MUST have valid YAML frontmatter at the start, enclosed between `---` markers:
-
-```markdown
----
-name: my-skill-name
-description: A description of what this skill does
----
-
-# Rest of the skill documentation...
-```
-
-## Validation Checklist
-
-1. **YAML Frontmatter**: Verify the file starts with `---` followed by valid YAML and ends with `---`
-
-2. **`name` field** (required):
-   - Must be present in the frontmatter
-   - Must contain only lowercase letters, numbers, and hyphens (`a-z`, `0-9`, `-`)
-   - Must be 64 characters or fewer
-   - Example valid names: `my-skill`, `code-review-2`, `lint`
-   - Example invalid names: `My Skill` (uppercase/spaces), `skill_name` (underscores), `SKILL` (uppercase)
-
-3. **`description` field** (required):
-   - Must be present in the frontmatter
-   - Must be 1024 characters or fewer
-   - Should clearly describe what the skill does
-
-## What to Check
-
-For the modified file: {trigger_files}
-
-1. Parse the YAML frontmatter and verify it is valid YAML
-2. Check that `name` exists and matches the pattern `^[a-z0-9-]+$` with max length 64
-3. Check that `description` exists and is at most 1024 characters
-4. Report any validation errors to the user
-
-If the file does not pass validation, help the user fix the issues.
diff --git a/src/deepwork/standard_jobs/deepwork_rules/rules/source-test-pairing.md.example b/src/deepwork/standard_jobs/deepwork_rules/rules/source-test-pairing.md.example
deleted file mode 100644
index 3ebd6968..00000000
--- a/src/deepwork/standard_jobs/deepwork_rules/rules/source-test-pairing.md.example
+++ /dev/null
@@ -1,13 +0,0 @@
----
-name: Source/Test Pairing
-set:
-  - src/{path}.py
-  - tests/{path}_test.py
----
-Source and test files should change together.
-
-When modifying source code, ensure corresponding tests are updated.
-When adding tests, ensure they test actual source code.
-
-Modified source: {trigger_files}
-Expected tests: {expected_files}
diff --git a/src/deepwork/standard_jobs/deepwork_rules/steps/define.md b/src/deepwork/standard_jobs/deepwork_rules/steps/define.md
deleted file mode 100644
index 1e38a5e6..00000000
--- a/src/deepwork/standard_jobs/deepwork_rules/steps/define.md
+++ /dev/null
@@ -1,249 +0,0 @@
-# Define Rule
-
-## Objective
-
-Create a new rule file in the `.deepwork/rules/` directory to enforce team guidelines, documentation requirements, or other constraints when specific files change.
-
-## Task
-
-Guide the user through defining a new rule by asking structured questions. **Do not create the rule without first understanding what they want to enforce.**
-
-**Important**: Use the AskUserQuestion tool to ask structured questions when gathering information from the user. This provides a better user experience with clear options and guided choices.
-
-### Step 1: Understand the Rule Purpose
-
-Start by asking structured questions to understand what the user wants to enforce:
-
-1. **What guideline or constraint should this rule enforce?**
-   - What situation triggers the need for action?
-   - What files or directories, when changed, should trigger this rule?
-   - Examples: "When config files change", "When API code changes", "When database schema changes"
-
-2. **What action should be taken?**
-   - What should the agent do when the rule triggers?
-   - Update documentation? Perform a security review? Update tests?
-   - Is there a specific file or process that needs attention?
-
-3. **Are there any "safety" conditions?**
-   - Are there files that, if also changed, mean the rule doesn't need to fire?
-   - For example: If config changes AND install_guide.md changes, assume docs are already updated
-   - This prevents redundant prompts when the user has already done the right thing
-
-### Step 2: Choose the Detection Mode
-
-Help the user select the appropriate detection mode:
-
-**Trigger/Safety Mode** (most common):
-- Fires when trigger patterns match AND no safety patterns match
-- Use for: "When X changes, check Y" rules
-- Example: When config changes, verify install docs
-
-**Set Mode** (bidirectional correspondence):
-- Fires when files that should change together don't all change
-- Use for: Source/test pairing, model/migration sync
-- Example: `src/foo.py` and `tests/foo_test.py` should change together
-
-**Pair Mode** (directional correspondence):
-- Fires when a trigger file changes but expected files don't
-- Changes to expected files alone do NOT trigger
-- Use for: API code requires documentation updates (but docs can update independently)
-
-### Step 3: Define the Patterns
-
-Help the user define glob patterns for files.
-
-**Common patterns:**
-- `src/**/*.py` - All Python files in src directory (recursive)
-- `app/config/**/*` - All files in app/config directory
-- `*.md` - All markdown files in root
-- `src/api/**/*` - All files in the API directory
-- `migrations/**/*.sql` - All SQL migrations
-
-**Variable patterns (for set/pair modes):**
-- `src/{path}.py` - Captures path variable (e.g., `foo/bar` from `src/foo/bar.py`)
-- `tests/{path}_test.py` - Uses same path variable in corresponding file
-- `{name}` matches single segment, `{path}` matches multiple segments
-
-**Pattern syntax:**
-- `*` - Matches any characters within a single path segment
-- `**` - Matches any characters across multiple path segments (recursive)
-- `?` - Matches a single character
-
-### Step 4: Choose the Comparison Mode (Optional)
-
-The `compare_to` field controls what baseline is used when detecting "changed files":
-
-**Options:**
-- `base` (default) - Compares to the base of the current branch (merge-base with main/master). Best for feature branches.
-- `default_tip` - Compares to the current tip of the default branch. Useful for seeing difference from production.
-- `prompt` - Compares to the state at the start of each prompt. For rules about very recent changes.
-
-Most rules should use the default (`base`) and don't need to specify `compare_to`.
-
-### Step 5: Write the Instructions
-
-Create clear, actionable instructions for what the agent should do when the rule fires.
-
-**Good instructions include:**
-- What to check or review
-- What files might need updating
-- Specific actions to take
-- Quality criteria for completion
-
-**Template variables available in instructions:**
-- `{trigger_files}` - Files that triggered the rule
-- `{expected_files}` - Expected corresponding files (for set/pair modes)
-
-### Step 6: Create the Rule File
-
-Create a new file in `.deepwork/rules/` with a kebab-case filename:
-
-**File Location**: `.deepwork/rules/{rule-name}.md`
-
-**Format for Trigger/Safety Mode:**
-```markdown
----
-name: Friendly Name for the Rule
-trigger: "glob/pattern/**/*"  # or array: ["pattern1", "pattern2"]
-safety: "optional/pattern"    # optional, or array
-compare_to: base              # optional: "base" (default), "default_tip", or "prompt"
----
-Instructions for the agent when this rule fires.
-
-Multi-line markdown content is supported.
-```
-
-**Format for Set Mode (bidirectional):**
-```markdown
----
-name: Source/Test Pairing
-set:
-  - src/{path}.py
-  - tests/{path}_test.py
----
-Source and test files should change together.
-
-Modified: {trigger_files}
-Expected: {expected_files}
-```
-
-**Format for Pair Mode (directional):**
-```markdown
----
-name: API Documentation
-pair:
-  trigger: api/{path}.py
-  expects: docs/api/{path}.md
----
-API code requires documentation updates.
-
-Changed API: {trigger_files}
-Update docs: {expected_files}
-```
-
-### Step 7: Verify the Rule
-
-After creating the rule:
-
-1. **Check the YAML frontmatter** - Ensure valid YAML formatting
-2. **Test trigger patterns** - Verify patterns match intended files
-3. **Review instructions** - Ensure they're clear and actionable
-4. **Check for conflicts** - Ensure the rule doesn't conflict with existing ones
-
-## Example Rules
-
-### Update Documentation on Config Changes
-`.deepwork/rules/config-docs.md`:
-```markdown
----
-name: Update Install Guide on Config Changes
-trigger: app/config/**/*
-safety: docs/install_guide.md
----
-Configuration files have been modified. Please review docs/install_guide.md
-and update it if any installation instructions need to change based on the
-new configuration.
-```
-
-### Security Review for Auth Code
-`.deepwork/rules/security-review.md`:
-```markdown
----
-name: Security Review for Authentication Changes
-trigger:
-  - src/auth/**/*
-  - src/security/**/*
-safety:
-  - SECURITY.md
-  - docs/security_audit.md
----
-Authentication or security code has been changed. Please:
-
-1. Review for hardcoded credentials or secrets
-2. Check input validation on user inputs
-3. Verify access control logic is correct
-4. Update security documentation if needed
-```
-
-### Source/Test Pairing
-`.deepwork/rules/source-test-pairing.md`:
-```markdown
----
-name: Source/Test Pairing
-set:
-  - src/{path}.py
-  - tests/{path}_test.py
----
-Source and test files should change together.
-
-When modifying source code, ensure corresponding tests are updated.
-When adding tests, ensure they test actual source code.
-
-Modified: {trigger_files}
-Expected: {expected_files}
-```
-
-### API Documentation Sync
-`.deepwork/rules/api-docs.md`:
-```markdown
----
-name: API Documentation Update
-pair:
-  trigger: src/api/{path}.py
-  expects: docs/api/{path}.md
----
-API code has changed. Please verify that API documentation in docs/api/
-is up to date with the code changes. Pay special attention to:
-
-- New or changed endpoints
-- Modified request/response schemas
-- Updated authentication requirements
-
-Changed API: {trigger_files}
-Update: {expected_files}
-```
-
-## Output Format
-
-### .deepwork/rules/{rule-name}.md
-Create a new file with the rule definition using YAML frontmatter and markdown body.
-
-## Quality Criteria
-
-- Asked structured questions to understand user requirements
-- Rule name is clear and descriptive (used in promise tags)
-- Correct detection mode selected for the use case
-- Patterns accurately match the intended files
-- Safety patterns prevent unnecessary triggering (if applicable)
-- Instructions are actionable and specific
-- YAML frontmatter is valid
-
-## Context
-
-Rules are evaluated automatically when the agent finishes a task. The system:
-1. Determines which files have changed based on each rule's `compare_to` setting
-2. Evaluates rules based on their detection mode (trigger/safety, set, or pair)
-3. Skips rules where the correspondence is satisfied (for set/pair) or safety matched
-4. Prompts you with instructions for any triggered rules
-
-You can mark a rule as addressed by including `<promise>Rule Name</promise>` in your response (replace Rule Name with the actual rule name from the `name` field). This tells the system you've already handled that rule's requirements.
diff --git a/tests/integration/test_install_flow.py b/tests/integration/test_install_flow.py
index d0638275..2c800a8c 100644
--- a/tests/integration/test_install_flow.py
+++ b/tests/integration/test_install_flow.py
@@ -186,63 +186,6 @@ def test_install_is_idempotent(self, mock_claude_project: Path) -> None:
         assert (claude_dir / "deepwork_jobs.define" / "SKILL.md").exists()
         assert (claude_dir / "deepwork_jobs.learn" / "SKILL.md").exists()
 
-    def test_install_creates_rules_directory(self, mock_claude_project: Path) -> None:
-        """Test that install creates the v2 rules directory with example templates."""
-        runner = CliRunner()
-
-        result = runner.invoke(
-            cli,
-            ["install", "--platform", "claude", "--path", str(mock_claude_project)],
-            catch_exceptions=False,
-        )
-
-        assert result.exit_code == 0
-        assert ".deepwork/rules/ with example templates" in result.output
-
-        # Verify rules directory was created
-        rules_dir = mock_claude_project / ".deepwork" / "rules"
-        assert rules_dir.exists()
-
-        # Verify README was created
-        readme_file = rules_dir / "README.md"
-        assert readme_file.exists()
-        content = readme_file.read_text()
-        assert "DeepWork Rules" in content
-        assert "YAML frontmatter" in content
-
-        # Verify example templates were copied
-        example_files = list(rules_dir.glob("*.md.example"))
-        assert len(example_files) >= 1  # At least one example template
-
-    def test_install_preserves_existing_rules_directory(self, mock_claude_project: Path) -> None:
-        """Test that install doesn't overwrite existing rules directory."""
-        runner = CliRunner()
-
-        # Create a custom rules directory before install
-        rules_dir = mock_claude_project / ".deepwork" / "rules"
-        rules_dir.mkdir(parents=True)
-        custom_rule = rules_dir / "my-custom-rule.md"
-        custom_content = """---
-name: My Custom Rule
-trigger: "src/**/*"
----
-Custom instructions here.
-"""
-        custom_rule.write_text(custom_content)
-
-        result = runner.invoke(
-            cli,
-            ["install", "--platform", "claude", "--path", str(mock_claude_project)],
-            catch_exceptions=False,
-        )
-
-        assert result.exit_code == 0
-        assert ".deepwork/rules/ already exists" in result.output
-
-        # Verify original content is preserved
-        assert custom_rule.read_text() == custom_content
-
-
 class TestCLIEntryPoint:
     """Tests for CLI entry point."""
 
diff --git a/tests/integration/test_install_requirements.py b/tests/integration/test_install_requirements.py
index 63d8dcba..f04cdb8d 100644
--- a/tests/integration/test_install_requirements.py
+++ b/tests/integration/test_install_requirements.py
@@ -58,17 +58,15 @@ def get_project_settings(project_path: Path) -> dict:
     return json.loads(settings_file.read_text())
 
 
-def assert_install_added_hooks(settings_before: dict, settings_after: dict) -> None:
-    """Assert that install actually modified settings by adding hooks.
+def assert_install_modified_settings(settings_before: dict, settings_after: dict) -> None:
+    """Assert that install actually modified settings.
 
     This ensures idempotency tests are meaningful - if install does nothing,
     idempotency would trivially pass but the test would be useless.
+
+    Note: Install may or may not add hooks depending on which jobs are installed.
+    The key assertion is that settings were modified in some way.
     """
-    assert "hooks" in settings_after, (
-        "FIRST INSTALL DID NOT ADD HOOKS! "
-        "Install must add hooks to project settings. "
-        "This test requires install to actually modify settings to verify idempotency."
-    )
     assert settings_after != settings_before, (
         "FIRST INSTALL DID NOT MODIFY SETTINGS! "
         "Install must modify project settings on first run. "
@@ -189,9 +187,10 @@ def test_install_only_modifies_project_settings(
                 "Local settings were modified! Install must only modify project settings."
             )
 
-            # Verify PROJECT settings were modified (hooks should be added)
+            # Verify PROJECT settings were modified
             project_settings = get_project_settings(mock_claude_project)
-            assert "hooks" in project_settings, "Project settings should have hooks after install"
+            # Settings should exist after install
+            assert project_settings is not None, "Project settings should exist after install"
 
 
 # =============================================================================
@@ -245,7 +244,7 @@ def test_project_settings_unchanged_on_second_install(self, mock_claude_project:
         settings_after_first = get_project_settings(mock_claude_project)
 
         # CRITICAL: First install MUST actually modify settings
-        assert_install_added_hooks(settings_before, settings_after_first)
+        assert_install_modified_settings(settings_before, settings_after_first)
 
         # Second install
         run_install(mock_claude_project)
@@ -275,28 +274,22 @@ def test_no_duplicate_hooks_on_multiple_installs(self, mock_claude_project: Path
         # Load final settings
         settings = get_project_settings(mock_claude_project)
 
-        # CRITICAL: Hooks must exist for this test to be meaningful
-        assert "hooks" in settings, (
-            "NO HOOKS FOUND AFTER INSTALL! "
-            "Install must add hooks to project settings. "
-            "This test requires hooks to exist to verify no duplicates are created."
-        )
-
-        # Verify no duplicate hooks
-        for event_name, hooks_list in settings["hooks"].items():
-            # Extract all hook commands for duplicate detection
-            commands = [
-                hook["command"]
-                for hook_entry in hooks_list
-                for hook in hook_entry.get("hooks", [])
-                if "command" in hook
-            ]
-
-            # Check for duplicates
-            assert len(commands) == len(set(commands)), (
-                f"DUPLICATE HOOKS DETECTED for event '{event_name}'! "
-                f"Install MUST be idempotent. Commands: {commands}"
-            )
+        # If hooks exist, verify no duplicates
+        if "hooks" in settings:
+            for event_name, hooks_list in settings["hooks"].items():
+                # Extract all hook commands for duplicate detection
+                commands = [
+                    hook["command"]
+                    for hook_entry in hooks_list
+                    for hook in hook_entry.get("hooks", [])
+                    if "command" in hook
+                ]
+
+                # Check for duplicates
+                assert len(commands) == len(set(commands)), (
+                    f"DUPLICATE HOOKS DETECTED for event '{event_name}'! "
+                    f"Install MUST be idempotent. Commands: {commands}"
+                )
 
     def test_third_install_identical_to_first(self, mock_claude_project: Path) -> None:
         """
@@ -316,7 +309,7 @@ def test_third_install_identical_to_first(self, mock_claude_project: Path) -> No
         settings_after_first = get_project_settings(mock_claude_project)
 
         # CRITICAL: First install MUST actually modify settings
-        assert_install_added_hooks(settings_before, settings_after_first)
+        assert_install_modified_settings(settings_before, settings_after_first)
 
         # Run multiple more installs
         for _ in range(5):
diff --git a/tests/shell_script_tests/conftest.py b/tests/shell_script_tests/conftest.py
index 3ac15822..01b0250b 100644
--- a/tests/shell_script_tests/conftest.py
+++ b/tests/shell_script_tests/conftest.py
@@ -22,52 +22,6 @@ def git_repo(tmp_path: Path) -> Path:
     return tmp_path
 
 
-@pytest.fixture
-def git_repo_with_rule(tmp_path: Path) -> Path:
-    """Create a git repo with rule that will fire."""
-    repo = Repo.init(tmp_path)
-
-    readme = tmp_path / "README.md"
-    readme.write_text("# Test Project\n")
-    repo.index.add(["README.md"])
-    repo.index.commit("Initial commit")
-
-    # Create v2 rules directory and file
-    rules_dir = tmp_path / ".deepwork" / "rules"
-    rules_dir.mkdir(parents=True, exist_ok=True)
-
-    # Rule that triggers on any Python file (v2 format)
-    rule_file = rules_dir / "python-file-rule.md"
-    rule_file.write_text(
-        """---
-name: Python File Rule
-trigger: "**/*.py"
-compare_to: prompt
----
-Review Python files for quality.
-"""
-    )
-
-    # Empty baseline so new files trigger
-    deepwork_dir = tmp_path / ".deepwork"
-    (deepwork_dir / ".last_work_tree").write_text("")
-
-    return tmp_path
-
-
-@pytest.fixture
-def rules_hooks_dir() -> Path:
-    """Return the path to the rules hooks scripts directory."""
-    return (
-        Path(__file__).parent.parent.parent
-        / "src"
-        / "deepwork"
-        / "standard_jobs"
-        / "deepwork_rules"
-        / "hooks"
-    )
-
-
 @pytest.fixture
 def hooks_dir() -> Path:
     """Return the path to the main hooks directory (platform wrappers)."""
diff --git a/tests/shell_script_tests/test_capture_prompt_work_tree.py b/tests/shell_script_tests/test_capture_prompt_work_tree.py
deleted file mode 100644
index 6f0435b1..00000000
--- a/tests/shell_script_tests/test_capture_prompt_work_tree.py
+++ /dev/null
@@ -1,257 +0,0 @@
-"""Tests for capture_prompt_work_tree.sh helper script.
-
-This script captures the git work tree state for use with
-compare_to: prompt rules. It should:
-1. Create .deepwork directory if needed
-2. Stage all changes with git add -A
-3. Record changed files to .deepwork/.last_work_tree
-4. Handle various git states gracefully
-"""
-
-from pathlib import Path
-
-import pytest
-from git import Repo
-
-from .conftest import run_shell_script
-
-
-@pytest.fixture
-def git_repo_with_changes(git_repo: Path) -> Path:
-    """Create a git repo with uncommitted changes."""
-    # Create some changed files
-    (git_repo / "modified.py").write_text("# Modified file\n")
-    (git_repo / "src").mkdir(exist_ok=True)
-    (git_repo / "src" / "main.py").write_text("# Main file\n")
-
-    return git_repo
-
-
-def run_capture_script(script_path: Path, cwd: Path) -> tuple[str, str, int]:
-    """Run the capture_prompt_work_tree.sh script."""
-    return run_shell_script(script_path, cwd)
-
-
-class TestCapturePromptWorkTreeBasic:
-    """Basic functionality tests for capture_prompt_work_tree.sh."""
-
-    def test_exits_successfully(self, rules_hooks_dir: Path, git_repo: Path) -> None:
-        """Test that the script exits with code 0."""
-        script_path = rules_hooks_dir / "capture_prompt_work_tree.sh"
-        stdout, stderr, code = run_capture_script(script_path, git_repo)
-
-        assert code == 0, f"Expected exit code 0, got {code}. stderr: {stderr}"
-
-    def test_creates_deepwork_directory(self, rules_hooks_dir: Path, git_repo: Path) -> None:
-        """Test that the script creates .deepwork directory."""
-        deepwork_dir = git_repo / ".deepwork"
-        assert not deepwork_dir.exists(), "Precondition: .deepwork should not exist"
-
-        script_path = rules_hooks_dir / "capture_prompt_work_tree.sh"
-        stdout, stderr, code = run_capture_script(script_path, git_repo)
-
-        assert code == 0, f"Script failed with stderr: {stderr}"
-        assert deepwork_dir.exists(), "Script should create .deepwork directory"
-
-    def test_creates_last_work_tree_file(self, rules_hooks_dir: Path, git_repo: Path) -> None:
-        """Test that the script creates .last_work_tree file."""
-        script_path = rules_hooks_dir / "capture_prompt_work_tree.sh"
-        stdout, stderr, code = run_capture_script(script_path, git_repo)
-
-        work_tree_file = git_repo / ".deepwork" / ".last_work_tree"
-        assert code == 0, f"Script failed with stderr: {stderr}"
-        assert work_tree_file.exists(), "Script should create .last_work_tree file"
-
-    def test_empty_repo_produces_empty_file(self, rules_hooks_dir: Path, git_repo: Path) -> None:
-        """Test that a clean repo produces an empty work tree file."""
-        script_path = rules_hooks_dir / "capture_prompt_work_tree.sh"
-        stdout, stderr, code = run_capture_script(script_path, git_repo)
-
-        # Clean repo should have empty or minimal content
-        # May have .deepwork/.last_work_tree itself listed
-        assert code == 0, f"Script failed with stderr: {stderr}"
-
-
-class TestCapturePromptWorkTreeFileTracking:
-    """Tests for file tracking behavior in capture_prompt_work_tree.sh."""
-
-    def test_captures_staged_files(self, rules_hooks_dir: Path, git_repo: Path) -> None:
-        """Test that staged files are captured."""
-        # Create and stage a file
-        new_file = git_repo / "staged.py"
-        new_file.write_text("# Staged file\n")
-        repo = Repo(git_repo)
-        repo.index.add(["staged.py"])
-
-        script_path = rules_hooks_dir / "capture_prompt_work_tree.sh"
-        stdout, stderr, code = run_capture_script(script_path, git_repo)
-
-        work_tree_file = git_repo / ".deepwork" / ".last_work_tree"
-        content = work_tree_file.read_text()
-
-        assert code == 0, f"Script failed with stderr: {stderr}"
-        assert "staged.py" in content, "Staged file should be in work tree"
-
-    def test_captures_unstaged_changes(self, rules_hooks_dir: Path, git_repo: Path) -> None:
-        """Test that unstaged changes are captured (after staging by script)."""
-        # Create an unstaged file
-        unstaged = git_repo / "unstaged.py"
-        unstaged.write_text("# Unstaged file\n")
-
-        script_path = rules_hooks_dir / "capture_prompt_work_tree.sh"
-        stdout, stderr, code = run_capture_script(script_path, git_repo)
-
-        work_tree_file = git_repo / ".deepwork" / ".last_work_tree"
-        content = work_tree_file.read_text()
-
-        assert code == 0, f"Script failed with stderr: {stderr}"
-        assert "unstaged.py" in content, "Unstaged file should be captured"
-
-    def test_captures_files_in_subdirectories(self, rules_hooks_dir: Path, git_repo: Path) -> None:
-        """Test that files in subdirectories are captured."""
-        # Create files in nested directories
-        src_dir = git_repo / "src" / "components"
-        src_dir.mkdir(parents=True)
-        (src_dir / "button.py").write_text("# Button component\n")
-
-        script_path = rules_hooks_dir / "capture_prompt_work_tree.sh"
-        stdout, stderr, code = run_capture_script(script_path, git_repo)
-
-        work_tree_file = git_repo / ".deepwork" / ".last_work_tree"
-        content = work_tree_file.read_text()
-
-        assert code == 0, f"Script failed with stderr: {stderr}"
-        assert "src/components/button.py" in content, "Nested file should be captured"
-
-    def test_captures_multiple_files(
-        self, rules_hooks_dir: Path, git_repo_with_changes: Path
-    ) -> None:
-        """Test that multiple files are captured."""
-        script_path = rules_hooks_dir / "capture_prompt_work_tree.sh"
-        stdout, stderr, code = run_capture_script(script_path, git_repo_with_changes)
-
-        work_tree_file = git_repo_with_changes / ".deepwork" / ".last_work_tree"
-        content = work_tree_file.read_text()
-
-        assert code == 0, f"Script failed with stderr: {stderr}"
-        assert "modified.py" in content, "Modified file should be captured"
-        assert "src/main.py" in content, "File in src/ should be captured"
-
-    def test_file_list_is_sorted_and_unique(self, rules_hooks_dir: Path, git_repo: Path) -> None:
-        """Test that the file list is sorted and deduplicated."""
-        # Create multiple files
-        (git_repo / "z_file.py").write_text("# Z file\n")
-        (git_repo / "a_file.py").write_text("# A file\n")
-        (git_repo / "m_file.py").write_text("# M file\n")
-
-        script_path = rules_hooks_dir / "capture_prompt_work_tree.sh"
-        stdout, stderr, code = run_capture_script(script_path, git_repo)
-
-        work_tree_file = git_repo / ".deepwork" / ".last_work_tree"
-        lines = [line for line in work_tree_file.read_text().strip().split("\n") if line]
-
-        # Extract just the test files we created (filter out .deepwork files)
-        test_files = [f for f in lines if f.endswith("_file.py")]
-
-        assert code == 0, f"Script failed with stderr: {stderr}"
-        assert test_files == sorted(test_files), "Files should be sorted"
-        assert len(test_files) == len(set(test_files)), "Files should be unique"
-
-
-class TestCapturePromptWorkTreeGitStates:
-    """Tests for handling various git states in capture_prompt_work_tree.sh."""
-
-    def test_handles_deleted_files(self, rules_hooks_dir: Path, git_repo: Path) -> None:
-        """Test that deleted files are handled gracefully."""
-        # Create and commit a file, then delete it
-        to_delete = git_repo / "to_delete.py"
-        to_delete.write_text("# Will be deleted\n")
-        repo = Repo(git_repo)
-        repo.index.add(["to_delete.py"])
-        repo.index.commit("Add file to delete")
-
-        # Now delete it
-        to_delete.unlink()
-
-        script_path = rules_hooks_dir / "capture_prompt_work_tree.sh"
-        stdout, stderr, code = run_capture_script(script_path, git_repo)
-
-        assert code == 0, f"Script should handle deletions. stderr: {stderr}"
-
-    def test_handles_renamed_files(self, rules_hooks_dir: Path, git_repo: Path) -> None:
-        """Test that renamed files are tracked."""
-        # Create and commit a file
-        old_name = git_repo / "old_name.py"
-        old_name.write_text("# Original file\n")
-        repo = Repo(git_repo)
-        repo.index.add(["old_name.py"])
-        repo.index.commit("Add original file")
-
-        # Rename it
-        new_name = git_repo / "new_name.py"
-        old_name.rename(new_name)
-
-        script_path = rules_hooks_dir / "capture_prompt_work_tree.sh"
-        stdout, stderr, code = run_capture_script(script_path, git_repo)
-
-        work_tree_file = git_repo / ".deepwork" / ".last_work_tree"
-        content = work_tree_file.read_text()
-
-        assert code == 0, f"Script failed with stderr: {stderr}"
-        # Both old (deleted) and new should appear as changes
-        assert "new_name.py" in content, "New filename should be captured"
-
-    def test_handles_modified_files(self, rules_hooks_dir: Path, git_repo: Path) -> None:
-        """Test that modified committed files are tracked."""
-        # Modify an existing committed file
-        readme = git_repo / "README.md"
-        readme.write_text("# Modified content\n")
-
-        script_path = rules_hooks_dir / "capture_prompt_work_tree.sh"
-        stdout, stderr, code = run_capture_script(script_path, git_repo)
-
-        work_tree_file = git_repo / ".deepwork" / ".last_work_tree"
-        content = work_tree_file.read_text()
-
-        assert code == 0, f"Script failed with stderr: {stderr}"
-        assert "README.md" in content, "Modified file should be captured"
-
-
-class TestCapturePromptWorkTreeIdempotence:
-    """Tests for idempotent behavior of capture_prompt_work_tree.sh."""
-
-    def test_multiple_runs_succeed(self, rules_hooks_dir: Path, git_repo: Path) -> None:
-        """Test that the script can be run multiple times."""
-        script_path = rules_hooks_dir / "capture_prompt_work_tree.sh"
-
-        for i in range(3):
-            stdout, stderr, code = run_capture_script(script_path, git_repo)
-            assert code == 0, f"Run {i + 1} failed with stderr: {stderr}"
-
-    def test_updates_on_new_changes(self, rules_hooks_dir: Path, git_repo: Path) -> None:
-        """Test that subsequent runs capture new changes."""
-        script_path = rules_hooks_dir / "capture_prompt_work_tree.sh"
-
-        # First run
-        run_capture_script(script_path, git_repo)
-
-        # Add a new file
-        (git_repo / "new_file.py").write_text("# New\n")
-
-        # Second run
-        run_capture_script(script_path, git_repo)
-
-        work_tree_file = git_repo / ".deepwork" / ".last_work_tree"
-        content = work_tree_file.read_text()
-
-        assert "new_file.py" in content, "New file should be captured"
-
-    def test_existing_deepwork_dir_not_error(self, rules_hooks_dir: Path, git_repo: Path) -> None:
-        """Test that existing .deepwork directory is not an error."""
-        # Pre-create the directory
-        (git_repo / ".deepwork").mkdir()
-
-        script_path = rules_hooks_dir / "capture_prompt_work_tree.sh"
-        stdout, stderr, code = run_capture_script(script_path, git_repo)
-
-        assert code == 0, f"Should handle existing .deepwork dir. stderr: {stderr}"
diff --git a/tests/shell_script_tests/test_hooks.py b/tests/shell_script_tests/test_hooks.py
index 4f6f8e32..0910b6c9 100644
--- a/tests/shell_script_tests/test_hooks.py
+++ b/tests/shell_script_tests/test_hooks.py
@@ -44,53 +44,15 @@
 import json
 import os
 import subprocess
-import tempfile
 from pathlib import Path
 
 import pytest
-from git import Repo
-
-from .conftest import run_shell_script
 
 # =============================================================================
 # Helper Functions
 # =============================================================================
 
 
-def run_rules_hook_script(
-    script_path: Path,
-    cwd: Path,
-    hook_input: dict | None = None,
-) -> tuple[str, str, int]:
-    """Run a rules hook script and return its output."""
-    return run_shell_script(script_path, cwd, hook_input=hook_input)
-
-
-def run_rules_check_module(
-    cwd: Path,
-    hook_input: dict | None = None,
-    src_dir: Path | None = None,
-) -> tuple[str, str, int]:
-    """Run the rules_check Python module directly and return its output."""
-    env = os.environ.copy()
-    env["DEEPWORK_HOOK_PLATFORM"] = "claude"
-    if src_dir:
-        env["PYTHONPATH"] = str(src_dir)
-
-    stdin_data = json.dumps(hook_input) if hook_input else ""
-
-    result = subprocess.run(
-        ["python", "-m", "deepwork.hooks.rules_check"],
-        cwd=cwd,
-        capture_output=True,
-        text=True,
-        input=stdin_data,
-        env=env,
-    )
-
-    return result.stdout, result.stderr, result.returncode
-
-
 def run_platform_wrapper_script(
     script_path: Path,
     python_module: str,
@@ -275,284 +237,6 @@ def test_sets_platform_environment_variable(self, hooks_dir: Path, src_dir: Path
         assert 'DEEPWORK_HOOK_PLATFORM="gemini"' in content
 
 
-# =============================================================================
-# Rules Hook Script Tests
-# =============================================================================
-
-
-class TestRulesStopHook:
-    """Tests for rules stop hook (deepwork.hooks.rules_check) JSON format compliance."""
-
-    def test_allow_response_is_empty_json(self, src_dir: Path, git_repo: Path) -> None:
-        """Test that allow response is empty JSON object."""
-        stdout, stderr, code = run_rules_check_module(git_repo, src_dir=src_dir)
-
-        response = validate_json_output(stdout)
-        validate_stop_hook_response(response)
-
-        if response is not None:
-            assert response == {}, f"Allow response should be empty: {response}"
-
-    def test_block_response_has_required_fields(
-        self, src_dir: Path, git_repo_with_rule: Path
-    ) -> None:
-        """Test that block response has decision and reason."""
-        # Create a file that triggers the rule
-        py_file = git_repo_with_rule / "test.py"
-        py_file.write_text("# Python file\n")
-        repo = Repo(git_repo_with_rule)
-        repo.index.add(["test.py"])
-
-        stdout, stderr, code = run_rules_check_module(git_repo_with_rule, src_dir=src_dir)
-
-        response = validate_json_output(stdout)
-        validate_stop_hook_response(response)
-
-        # Should be blocking
-        assert response is not None, "Expected blocking response"
-        assert response.get("decision") == "block", "Expected block decision"
-        assert "reason" in response, "Expected reason field"
-
-    def test_block_reason_contains_rule_info(self, src_dir: Path, git_repo_with_rule: Path) -> None:
-        """Test that block reason contains rule information."""
-        py_file = git_repo_with_rule / "test.py"
-        py_file.write_text("# Python file\n")
-        repo = Repo(git_repo_with_rule)
-        repo.index.add(["test.py"])
-
-        stdout, stderr, code = run_rules_check_module(git_repo_with_rule, src_dir=src_dir)
-
-        response = validate_json_output(stdout)
-
-        assert response is not None, "Expected blocking response"
-        reason = response.get("reason", "")
-
-        # Should contain useful rule information
-        assert "Rule" in reason or "rule" in reason, f"Reason should mention rule: {reason}"
-
-    def test_no_extraneous_keys_in_response(self, src_dir: Path, git_repo_with_rule: Path) -> None:
-        """Test that response only contains expected keys."""
-        py_file = git_repo_with_rule / "test.py"
-        py_file.write_text("# Python file\n")
-        repo = Repo(git_repo_with_rule)
-        repo.index.add(["test.py"])
-
-        stdout, stderr, code = run_rules_check_module(git_repo_with_rule, src_dir=src_dir)
-
-        response = validate_json_output(stdout)
-
-        if response and response != {}:
-            # Only decision and reason are valid keys for stop hooks
-            valid_keys = {"decision", "reason"}
-            actual_keys = set(response.keys())
-            assert actual_keys <= valid_keys, (
-                f"Unexpected keys in response: {actual_keys - valid_keys}"
-            )
-
-    def test_output_is_single_line_json(self, src_dir: Path, git_repo_with_rule: Path) -> None:
-        """Test that JSON output is single-line (no pretty printing)."""
-        py_file = git_repo_with_rule / "test.py"
-        py_file.write_text("# Python file\n")
-        repo = Repo(git_repo_with_rule)
-        repo.index.add(["test.py"])
-
-        stdout, stderr, code = run_rules_check_module(git_repo_with_rule, src_dir=src_dir)
-
-        # Remove trailing newline and check for internal newlines
-        output = stdout.strip()
-        if output:
-            # JSON output should ideally be single line
-            # Multiple lines could indicate print statements or logging
-            lines = output.split("\n")
-            # Only the last line should be JSON
-            json_line = lines[-1]
-            # Verify the JSON is parseable
-            json.loads(json_line)
-
-
-class TestUserPromptSubmitHook:
-    """Tests for user_prompt_submit.sh JSON format compliance."""
-
-    def test_output_is_valid_json_or_empty(self, rules_hooks_dir: Path, git_repo: Path) -> None:
-        """Test that output is valid JSON or empty."""
-        script_path = rules_hooks_dir / "user_prompt_submit.sh"
-        stdout, stderr, code = run_rules_hook_script(script_path, git_repo)
-
-        response = validate_json_output(stdout)
-        validate_prompt_hook_response(response)
-
-    def test_does_not_block_prompt_submission(self, rules_hooks_dir: Path, git_repo: Path) -> None:
-        """Test that hook does not block prompt submission."""
-        script_path = rules_hooks_dir / "user_prompt_submit.sh"
-        stdout, stderr, code = run_rules_hook_script(script_path, git_repo)
-
-        response = validate_json_output(stdout)
-
-        # UserPromptSubmit hooks should not block
-        if response:
-            assert response.get("decision") != "block", (
-                "UserPromptSubmit hook should not return block decision"
-            )
-
-
-class TestHooksWithTranscript:
-    """Tests for hook JSON format when using transcript input."""
-
-    def test_stop_hook_with_transcript_input(self, src_dir: Path, git_repo_with_rule: Path) -> None:
-        """Test stop hook JSON format when transcript is provided."""
-        py_file = git_repo_with_rule / "test.py"
-        py_file.write_text("# Python file\n")
-        repo = Repo(git_repo_with_rule)
-        repo.index.add(["test.py"])
-
-        # Create mock transcript
-        with tempfile.NamedTemporaryFile(mode="w", suffix=".jsonl", delete=False) as f:
-            transcript_path = f.name
-            f.write(
-                json.dumps(
-                    {
-                        "role": "assistant",
-                        "message": {"content": [{"type": "text", "text": "Hello"}]},
-                    }
-                )
-            )
-            f.write("\n")
-
-        try:
-            hook_input = {"transcript_path": transcript_path}
-            stdout, stderr, code = run_rules_check_module(
-                git_repo_with_rule, hook_input, src_dir=src_dir
-            )
-
-            response = validate_json_output(stdout)
-            validate_stop_hook_response(response)
-
-        finally:
-            os.unlink(transcript_path)
-
-    def test_stop_hook_with_promise_returns_empty(
-        self, src_dir: Path, git_repo_with_rule: Path
-    ) -> None:
-        """Test that promised rules return empty JSON."""
-        py_file = git_repo_with_rule / "test.py"
-        py_file.write_text("# Python file\n")
-        repo = Repo(git_repo_with_rule)
-        repo.index.add(["test.py"])
-
-        # Create transcript with promise tag
-        with tempfile.NamedTemporaryFile(mode="w", suffix=".jsonl", delete=False) as f:
-            transcript_path = f.name
-            f.write(
-                json.dumps(
-                    {
-                        "role": "assistant",
-                        "message": {
-                            "content": [
-                                {
-                                    "type": "text",
-                                    "text": "<promise>Python File Rule</promise>",
-                                }
-                            ]
-                        },
-                    }
-                )
-            )
-            f.write("\n")
-
-        try:
-            hook_input = {"transcript_path": transcript_path}
-            stdout, stderr, code = run_rules_check_module(
-                git_repo_with_rule, hook_input, src_dir=src_dir
-            )
-
-            response = validate_json_output(stdout)
-            validate_stop_hook_response(response)
-
-            # Should be empty (allow) because rule was promised
-            if response is not None:
-                assert response == {}, f"Expected empty response: {response}"
-
-        finally:
-            os.unlink(transcript_path)
-
-
-# ******************************************************************************
-# ***                    DO NOT EDIT THESE EXIT CODE TESTS!                  ***
-# ******************************************************************************
-#
-# As documented in doc/platforms/claude/hooks_system.md:
-#
-#   | Exit Code | Meaning         | Behavior                          |
-#   |-----------|-----------------|-----------------------------------|
-#   | 0         | Success         | stdout parsed as JSON             |
-#   | 2         | Blocking error  | stderr shown, operation blocked   |
-#   | Other     | Warning         | stderr logged, continues          |
-#
-# CRITICAL: Hooks using JSON output format MUST return exit code 0.
-# The "decision" field in the JSON controls blocking behavior, NOT the exit code.
-#
-# Example valid outputs:
-#   Exit 0 + stdout: {}                                      -> Allow
-#   Exit 0 + stdout: {"decision": "block", "reason": "..."}  -> Block
-#   Exit 0 + stdout: {"decision": "deny", "reason": "..."}   -> Block (Gemini)
-#
-# See: https://docs.anthropic.com/en/docs/claude-code/hooks
-# ******************************************************************************
-
-
-class TestHookExitCodes:
-    """Tests for hook exit codes.
-
-    CRITICAL: These tests verify the documented Claude Code hook contract.
-    All hooks MUST exit 0 when using JSON output format.
-    """
-
-    def test_stop_hook_exits_zero_on_allow(self, src_dir: Path, git_repo: Path) -> None:
-        """Test that stop hook exits 0 when allowing.
-
-        DO NOT CHANGE THIS TEST - it verifies the documented hook contract.
-        """
-        stdout, stderr, code = run_rules_check_module(git_repo, src_dir=src_dir)
-
-        assert code == 0, f"Allow should exit 0. stderr: {stderr}"
-
-    def test_stop_hook_exits_zero_on_block(self, src_dir: Path, git_repo_with_rule: Path) -> None:
-        """Test that stop hook exits 0 even when blocking.
-
-        DO NOT CHANGE THIS TEST - it verifies the documented hook contract.
-        Blocking is communicated via JSON {"decision": "block"}, NOT via exit code.
-        """
-        py_file = git_repo_with_rule / "test.py"
-        py_file.write_text("# Python file\n")
-        repo = Repo(git_repo_with_rule)
-        repo.index.add(["test.py"])
-
-        stdout, stderr, code = run_rules_check_module(git_repo_with_rule, src_dir=src_dir)
-
-        # Hooks should exit 0 and communicate via JSON
-        assert code == 0, f"Block should still exit 0. stderr: {stderr}"
-
-    def test_user_prompt_hook_exits_zero(self, rules_hooks_dir: Path, git_repo: Path) -> None:
-        """Test that user prompt hook always exits 0.
-
-        DO NOT CHANGE THIS TEST - it verifies the documented hook contract.
-        """
-        script_path = rules_hooks_dir / "user_prompt_submit.sh"
-        stdout, stderr, code = run_rules_hook_script(script_path, git_repo)
-
-        assert code == 0, f"User prompt hook should exit 0. stderr: {stderr}"
-
-    def test_capture_script_exits_zero(self, rules_hooks_dir: Path, git_repo: Path) -> None:
-        """Test that capture script exits 0.
-
-        DO NOT CHANGE THIS TEST - it verifies the documented hook contract.
-        """
-        script_path = rules_hooks_dir / "capture_prompt_work_tree.sh"
-        stdout, stderr, code = run_rules_hook_script(script_path, git_repo)
-
-        assert code == 0, f"Capture script should exit 0. stderr: {stderr}"
-
-
 # =============================================================================
 # Integration Tests
 # =============================================================================
@@ -712,35 +396,3 @@ def test_non_blocking_event(
         assert output == {} or output.get("decision", "") not in ("block", "deny")
 
 
-# =============================================================================
-# Python Module Tests
-# =============================================================================
-
-
-class TestRulesCheckModule:
-    """Tests for the rules_check hook module."""
-
-    def test_module_imports(self) -> None:
-        """Test that the rules_check module can be imported."""
-        from deepwork.hooks import rules_check
-
-        assert hasattr(rules_check, "main")
-        assert hasattr(rules_check, "rules_check_hook")
-
-    def test_hook_function_returns_output(self) -> None:
-        """Test that rules_check_hook returns a HookOutput."""
-        from deepwork.hooks.rules_check import rules_check_hook
-        from deepwork.hooks.wrapper import HookInput, HookOutput, NormalizedEvent, Platform
-
-        # Create a minimal hook input
-        hook_input = HookInput(
-            platform=Platform.CLAUDE,
-            event=NormalizedEvent.BEFORE_PROMPT,  # Not after_agent, so no blocking
-            session_id="test",
-        )
-
-        output = rules_check_hook(hook_input)
-
-        assert isinstance(output, HookOutput)
-        # Should not block for before_prompt event
-        assert output.decision != "block"
diff --git a/tests/shell_script_tests/test_rules_stop_hook.py b/tests/shell_script_tests/test_rules_stop_hook.py
deleted file mode 100644
index 23418021..00000000
--- a/tests/shell_script_tests/test_rules_stop_hook.py
+++ /dev/null
@@ -1,481 +0,0 @@
-"""Tests for the rules stop hook (deepwork.hooks.rules_check).
-
-These tests verify that the rules stop hook correctly outputs JSON
-to block or allow the stop event in Claude Code.
-"""
-
-import json
-import os
-import subprocess
-import tempfile
-from pathlib import Path
-
-import pytest
-from git import Repo
-
-
-@pytest.fixture
-def git_repo_with_src_rule(tmp_path: Path) -> Path:
-    """Create a git repo with a v2 rule file that triggers on src/** changes."""
-    repo = Repo.init(tmp_path)
-
-    readme = tmp_path / "README.md"
-    readme.write_text("# Test Project\n")
-    repo.index.add(["README.md"])
-    repo.index.commit("Initial commit")
-
-    # Create v2 rules directory and file
-    rules_dir = tmp_path / ".deepwork" / "rules"
-    rules_dir.mkdir(parents=True, exist_ok=True)
-
-    # Use compare_to: prompt since test repos don't have origin remote
-    rule_file = rules_dir / "test-rule.md"
-    rule_file.write_text(
-        """---
-name: Test Rule
-trigger: "src/**/*"
-compare_to: prompt
----
-This is a test rule that fires when src/ files change.
-Please address this rule.
-"""
-    )
-
-    # Empty baseline means all current files are "new"
-    deepwork_dir = tmp_path / ".deepwork"
-    (deepwork_dir / ".last_work_tree").write_text("")
-
-    return tmp_path
-
-
-def run_stop_hook(
-    cwd: Path,
-    hook_input: dict | None = None,
-    src_dir: Path | None = None,
-) -> tuple[str, str, int]:
-    """Run the rules_check module and return its output."""
-    env = os.environ.copy()
-    env["DEEPWORK_HOOK_PLATFORM"] = "claude"
-    if src_dir:
-        env["PYTHONPATH"] = str(src_dir)
-
-    stdin_data = json.dumps(hook_input) if hook_input else ""
-
-    result = subprocess.run(
-        ["python", "-m", "deepwork.hooks.rules_check"],
-        cwd=cwd,
-        capture_output=True,
-        text=True,
-        input=stdin_data,
-        env=env,
-    )
-
-    return result.stdout, result.stderr, result.returncode
-
-
-class TestRulesStopHookBlocking:
-    """Tests for rules stop hook blocking behavior."""
-
-    def test_outputs_block_json_when_rule_fires(
-        self, src_dir: Path, git_repo_with_src_rule: Path
-    ) -> None:
-        """Test that the hook outputs blocking JSON when a rule fires."""
-        # Create a file that triggers the rule
-        test_src_dir = git_repo_with_src_rule / "src"
-        test_src_dir.mkdir(exist_ok=True)
-        (test_src_dir / "main.py").write_text("# New file\n")
-
-        # Stage the change
-        repo = Repo(git_repo_with_src_rule)
-        repo.index.add(["src/main.py"])
-
-        # Run the stop hook
-        stdout, stderr, code = run_stop_hook(git_repo_with_src_rule, src_dir=src_dir)
-
-        # Parse the output as JSON
-        output = stdout.strip()
-        assert output, f"Expected JSON output but got empty string. stderr: {stderr}"
-
-        try:
-            result = json.loads(output)
-        except json.JSONDecodeError as e:
-            pytest.fail(f"Output is not valid JSON: {output!r}. Error: {e}")
-
-        # Verify the JSON has the blocking structure
-        assert "decision" in result, f"Expected 'decision' key in JSON: {result}"
-        assert result["decision"] == "block", f"Expected decision='block', got: {result}"
-        assert "reason" in result, f"Expected 'reason' key in JSON: {result}"
-        assert "Test Rule" in result["reason"], f"Rule name not in reason: {result}"
-
-    def test_outputs_empty_json_when_no_rule_fires(
-        self, src_dir: Path, git_repo_with_src_rule: Path
-    ) -> None:
-        """Test that the hook outputs empty JSON when no rule fires."""
-        # Don't create any files that would trigger the rule
-        # (rule triggers on src/** but we haven't created anything in src/)
-
-        # Run the stop hook
-        stdout, stderr, code = run_stop_hook(git_repo_with_src_rule, src_dir=src_dir)
-
-        # Parse the output as JSON
-        output = stdout.strip()
-        assert output, f"Expected JSON output but got empty string. stderr: {stderr}"
-
-        try:
-            result = json.loads(output)
-        except json.JSONDecodeError as e:
-            pytest.fail(f"Output is not valid JSON: {output!r}. Error: {e}")
-
-        # Should be empty JSON (no blocking)
-        assert result == {}, f"Expected empty JSON when no rules fire, got: {result}"
-
-    def test_exits_early_when_no_rules_dir(self, src_dir: Path, git_repo: Path) -> None:
-        """Test that the hook exits cleanly when no rules directory exists."""
-        stdout, stderr, code = run_stop_hook(git_repo, src_dir=src_dir)
-
-        # Should exit with code 0 and produce no output (or empty)
-        assert code == 0, f"Expected exit code 0, got {code}. stderr: {stderr}"
-        # No output is fine when there's no rules directory
-        output = stdout.strip()
-        if output:
-            # If there is output, it should be valid JSON
-            try:
-                result = json.loads(output)
-                assert result == {}, f"Expected empty JSON, got: {result}"
-            except json.JSONDecodeError:
-                # Empty or no output is acceptable
-                pass
-
-    def test_respects_promise_tags(self, src_dir: Path, git_repo_with_src_rule: Path) -> None:
-        """Test that promised rules are not re-triggered."""
-        # Create a file that triggers the rule
-        test_src_dir = git_repo_with_src_rule / "src"
-        test_src_dir.mkdir(exist_ok=True)
-        (test_src_dir / "main.py").write_text("# New file\n")
-
-        # Stage the change
-        repo = Repo(git_repo_with_src_rule)
-        repo.index.add(["src/main.py"])
-
-        # Create a mock transcript with the promise tag
-        with tempfile.NamedTemporaryFile(mode="w", suffix=".jsonl", delete=False) as f:
-            transcript_path = f.name
-            # Write a mock assistant message with the promise tag
-            f.write(
-                json.dumps(
-                    {
-                        "role": "assistant",
-                        "message": {
-                            "content": [
-                                {
-                                    "type": "text",
-                                    "text": "I've addressed the rule. <promise>Test Rule</promise>",
-                                }
-                            ]
-                        },
-                    }
-                )
-            )
-            f.write("\n")
-
-        try:
-            # Run the stop hook with transcript path
-            hook_input = {"transcript_path": transcript_path, "hook_event_name": "Stop"}
-            stdout, stderr, code = run_stop_hook(
-                git_repo_with_src_rule, hook_input, src_dir=src_dir
-            )
-
-            # Parse the output
-            output = stdout.strip()
-            assert output, f"Expected JSON output. stderr: {stderr}"
-
-            result = json.loads(output)
-
-            # Should be empty JSON because the rule was promised
-            assert result == {}, f"Expected empty JSON when rule is promised, got: {result}"
-        finally:
-            os.unlink(transcript_path)
-
-    def test_safety_pattern_prevents_firing(self, src_dir: Path, tmp_path: Path) -> None:
-        """Test that safety patterns prevent rules from firing."""
-        # Initialize git repo
-        repo = Repo.init(tmp_path)
-
-        readme = tmp_path / "README.md"
-        readme.write_text("# Test Project\n")
-        repo.index.add(["README.md"])
-        repo.index.commit("Initial commit")
-
-        # Create v2 rule with a safety pattern
-        rules_dir = tmp_path / ".deepwork" / "rules"
-        rules_dir.mkdir(parents=True, exist_ok=True)
-
-        rule_file = rules_dir / "documentation-rule.md"
-        rule_file.write_text(
-            """---
-name: Documentation Rule
-trigger: "src/**/*"
-safety: "docs/**/*"
-compare_to: prompt
----
-Update documentation when changing source files.
-"""
-        )
-
-        # Create .deepwork directory with empty baseline
-        deepwork_dir = tmp_path / ".deepwork"
-        (deepwork_dir / ".last_work_tree").write_text("")
-
-        # Create both trigger and safety files
-        test_src_dir = tmp_path / "src"
-        test_src_dir.mkdir(exist_ok=True)
-        (test_src_dir / "main.py").write_text("# Source file\n")
-
-        docs_dir = tmp_path / "docs"
-        docs_dir.mkdir(exist_ok=True)
-        (docs_dir / "api.md").write_text("# API docs\n")
-
-        # Stage both changes so they appear in git diff --cached
-        repo.index.add(["src/main.py", "docs/api.md"])
-
-        # Run the stop hook
-        stdout, stderr, code = run_stop_hook(tmp_path, src_dir=src_dir)
-
-        # Parse the output
-        output = stdout.strip()
-        assert output, f"Expected JSON output. stderr: {stderr}"
-
-        result = json.loads(output)
-
-        # Should be empty JSON because safety pattern matched
-        assert result == {}, f"Expected empty JSON when safety pattern matches, got: {result}"
-
-
-class TestRulesStopHookJsonFormat:
-    """Tests for the JSON output format of the rules stop hook."""
-
-    def test_json_has_correct_structure(self, src_dir: Path, git_repo_with_src_rule: Path) -> None:
-        """Test that blocking JSON has the correct Claude Code structure."""
-        # Create a file that triggers the rule
-        test_src_dir = git_repo_with_src_rule / "src"
-        test_src_dir.mkdir(exist_ok=True)
-        (test_src_dir / "main.py").write_text("# New file\n")
-
-        repo = Repo(git_repo_with_src_rule)
-        repo.index.add(["src/main.py"])
-
-        stdout, stderr, code = run_stop_hook(git_repo_with_src_rule, src_dir=src_dir)
-
-        result = json.loads(stdout.strip())
-
-        # Verify exact structure expected by Claude Code
-        assert set(result.keys()) == {
-            "decision",
-            "reason",
-        }, f"Unexpected keys in JSON: {result.keys()}"
-        assert result["decision"] == "block"
-        assert isinstance(result["reason"], str)
-        assert len(result["reason"]) > 0
-
-    def test_reason_contains_rule_instructions(
-        self, src_dir: Path, git_repo_with_src_rule: Path
-    ) -> None:
-        """Test that the reason includes the rule instructions."""
-        test_src_dir = git_repo_with_src_rule / "src"
-        test_src_dir.mkdir(exist_ok=True)
-        (test_src_dir / "main.py").write_text("# New file\n")
-
-        repo = Repo(git_repo_with_src_rule)
-        repo.index.add(["src/main.py"])
-
-        stdout, stderr, code = run_stop_hook(git_repo_with_src_rule, src_dir=src_dir)
-
-        result = json.loads(stdout.strip())
-
-        # Check that the reason contains the rule content
-        reason = result["reason"]
-        assert "DeepWork Rules Triggered" in reason
-        assert "Test Rule" in reason
-        assert "test rule that fires" in reason
-
-
-class TestRulesStopHookInfiniteLoopPrevention:
-    """Tests for preventing infinite loops in rules stop hook."""
-
-    def test_queued_prompt_rule_does_not_refire(
-        self, src_dir: Path, git_repo_with_src_rule: Path
-    ) -> None:
-        """Test that a prompt rule with QUEUED status doesn't fire again.
-
-        This prevents infinite loops when the transcript is unavailable or
-        promise tags haven't been written yet.
-        """
-        # Create a file that triggers the rule
-        test_src_dir = git_repo_with_src_rule / "src"
-        test_src_dir.mkdir(exist_ok=True)
-        (test_src_dir / "main.py").write_text("# New file\n")
-
-        # Stage the change
-        repo = Repo(git_repo_with_src_rule)
-        repo.index.add(["src/main.py"])
-
-        # First run: rule should fire and create queue entry
-        stdout1, stderr1, code1 = run_stop_hook(git_repo_with_src_rule, src_dir=src_dir)
-        result1 = json.loads(stdout1.strip())
-        assert result1.get("decision") == "block", f"First run should block: {result1}"
-        assert "Test Rule" in result1.get("reason", "")
-
-        # Second run: rule should NOT fire again (already QUEUED)
-        # Note: No transcript with promise tag, but the queue entry prevents re-firing
-        stdout2, stderr2, code2 = run_stop_hook(git_repo_with_src_rule, src_dir=src_dir)
-        result2 = json.loads(stdout2.strip())
-        assert result2 == {}, f"Second run should not block (rule already queued): {result2}"
-
-    def test_rule_fires_again_after_queue_cleared(
-        self, src_dir: Path, git_repo_with_src_rule: Path
-    ) -> None:
-        """Test that a rule fires again after the queue is cleared."""
-        # Create a file that triggers the rule
-        test_src_dir = git_repo_with_src_rule / "src"
-        test_src_dir.mkdir(exist_ok=True)
-        (test_src_dir / "main.py").write_text("# New file\n")
-
-        # Stage the change
-        repo = Repo(git_repo_with_src_rule)
-        repo.index.add(["src/main.py"])
-
-        # First run: rule should fire
-        stdout1, stderr1, code1 = run_stop_hook(git_repo_with_src_rule, src_dir=src_dir)
-        result1 = json.loads(stdout1.strip())
-        assert result1.get("decision") == "block"
-
-        # Clear the queue
-        queue_dir = git_repo_with_src_rule / ".deepwork" / "tmp" / "rules" / "queue"
-        if queue_dir.exists():
-            for f in queue_dir.glob("*.json"):
-                f.unlink()
-
-        # Third run: rule should fire again (queue cleared)
-        stdout3, stderr3, code3 = run_stop_hook(git_repo_with_src_rule, src_dir=src_dir)
-        result3 = json.loads(stdout3.strip())
-        assert result3.get("decision") == "block", f"Rule should fire again: {result3}"
-
-    def test_promise_tag_still_prevents_firing(
-        self, src_dir: Path, git_repo_with_src_rule: Path
-    ) -> None:
-        """Test that promise tags still prevent rules from firing.
-
-        Even with the queue-based fix, promise tags should work when
-        the transcript is available.
-        """
-        # Create a file that triggers the rule
-        test_src_dir = git_repo_with_src_rule / "src"
-        test_src_dir.mkdir(exist_ok=True)
-        (test_src_dir / "main.py").write_text("# New file\n")
-
-        # Stage the change
-        repo = Repo(git_repo_with_src_rule)
-        repo.index.add(["src/main.py"])
-
-        # Create a transcript with promise tag (simulating agent response)
-        with tempfile.NamedTemporaryFile(mode="w", suffix=".jsonl", delete=False) as f:
-            transcript_path = f.name
-            f.write(
-                json.dumps(
-                    {
-                        "role": "assistant",
-                        "message": {
-                            "content": [
-                                {
-                                    "type": "text",
-                                    "text": "<promise>Test Rule</promise>",
-                                }
-                            ]
-                        },
-                    }
-                )
-            )
-            f.write("\n")
-
-        try:
-            # Run with transcript: rule should NOT fire (promise tag found)
-            hook_input = {"transcript_path": transcript_path, "hook_event_name": "Stop"}
-            stdout, stderr, code = run_stop_hook(
-                git_repo_with_src_rule, hook_input, src_dir=src_dir
-            )
-            result = json.loads(stdout.strip())
-            assert result == {}, f"Rule should not fire with promise tag: {result}"
-        finally:
-            os.unlink(transcript_path)
-
-
-class TestSubagentStopEvent:
-    """Tests for SubagentStop event triggering agentFinished rules."""
-
-    def test_subagent_stop_event_triggers_rules(
-        self, src_dir: Path, git_repo_with_src_rule: Path
-    ) -> None:
-        """Test that SubagentStop event triggers agentFinished rules.
-
-        Claude Code has both Stop and SubagentStop events that should both
-        trigger after_agent/agentFinished rules.
-        """
-        # Create a file that triggers the rule
-        test_src_dir = git_repo_with_src_rule / "src"
-        test_src_dir.mkdir(exist_ok=True)
-        (test_src_dir / "main.py").write_text("# New file\n")
-
-        # Stage the change
-        repo = Repo(git_repo_with_src_rule)
-        repo.index.add(["src/main.py"])
-
-        # Run with SubagentStop event
-        hook_input = {"hook_event_name": "SubagentStop"}
-        stdout, stderr, code = run_stop_hook(git_repo_with_src_rule, hook_input, src_dir=src_dir)
-
-        # Parse the output
-        output = stdout.strip()
-        assert output, f"Expected JSON output. stderr: {stderr}"
-        result = json.loads(output)
-
-        # Should trigger the rule just like Stop event does
-        assert result.get("decision") == "block", f"SubagentStop should trigger rules: {result}"
-        assert "Test Rule" in result.get("reason", "")
-
-    def test_both_stop_and_subagent_stop_trigger_same_rules(
-        self, src_dir: Path, git_repo_with_src_rule: Path
-    ) -> None:
-        """Test that Stop and SubagentStop events trigger the same rules.
-
-        Both events should fire agentFinished rules with identical behavior.
-        """
-        # Create a file that triggers the rule
-        test_src_dir = git_repo_with_src_rule / "src"
-        test_src_dir.mkdir(exist_ok=True)
-        (test_src_dir / "main.py").write_text("# New file\n")
-
-        repo = Repo(git_repo_with_src_rule)
-        repo.index.add(["src/main.py"])
-
-        # Test Stop event
-        hook_input_stop = {"hook_event_name": "Stop"}
-        stdout_stop, _, _ = run_stop_hook(git_repo_with_src_rule, hook_input_stop, src_dir=src_dir)
-        result_stop = json.loads(stdout_stop.strip())
-
-        # Clear the queue to allow the rule to fire again
-        queue_dir = git_repo_with_src_rule / ".deepwork" / "tmp" / "rules" / "queue"
-        if queue_dir.exists():
-            for f in queue_dir.glob("*.json"):
-                f.unlink()
-
-        # Test SubagentStop event
-        hook_input_subagent = {"hook_event_name": "SubagentStop"}
-        stdout_subagent, _, _ = run_stop_hook(
-            git_repo_with_src_rule, hook_input_subagent, src_dir=src_dir
-        )
-        result_subagent = json.loads(stdout_subagent.strip())
-
-        # Both should produce the same blocking behavior
-        assert result_stop.get("decision") == result_subagent.get("decision") == "block"
-        assert "Test Rule" in result_stop.get("reason", "")
-        assert "Test Rule" in result_subagent.get("reason", "")
diff --git a/tests/shell_script_tests/test_user_prompt_submit.py b/tests/shell_script_tests/test_user_prompt_submit.py
deleted file mode 100644
index 3f1b655e..00000000
--- a/tests/shell_script_tests/test_user_prompt_submit.py
+++ /dev/null
@@ -1,166 +0,0 @@
-"""Tests for user_prompt_submit.sh shell script.
-
-This script is called as a Claude Code UserPromptSubmit hook.
-It should:
-1. Execute successfully (exit code 0)
-2. Output valid JSON or no output (hooks allow both)
-3. Capture work tree state by calling capture_prompt_work_tree.sh
-"""
-
-import json
-from pathlib import Path
-
-import pytest
-from git import Repo
-
-from .conftest import run_shell_script
-
-
-def run_user_prompt_submit_hook(
-    script_path: Path,
-    cwd: Path,
-    hook_input: dict | None = None,
-) -> tuple[str, str, int]:
-    """Run the user_prompt_submit.sh script and return its output."""
-    return run_shell_script(script_path, cwd, hook_input=hook_input)
-
-
-class TestUserPromptSubmitHookExecution:
-    """Tests for user_prompt_submit.sh execution behavior."""
-
-    def test_exits_successfully(self, rules_hooks_dir: Path, git_repo: Path) -> None:
-        """Test that the hook exits with code 0."""
-        script_path = rules_hooks_dir / "user_prompt_submit.sh"
-        stdout, stderr, code = run_user_prompt_submit_hook(script_path, git_repo)
-
-        assert code == 0, f"Expected exit code 0, got {code}. stderr: {stderr}"
-
-    def test_creates_deepwork_directory(self, rules_hooks_dir: Path, git_repo: Path) -> None:
-        """Test that the hook creates .deepwork directory if it doesn't exist."""
-        deepwork_dir = git_repo / ".deepwork"
-        assert not deepwork_dir.exists(), "Precondition: .deepwork should not exist"
-
-        script_path = rules_hooks_dir / "user_prompt_submit.sh"
-        stdout, stderr, code = run_user_prompt_submit_hook(script_path, git_repo)
-
-        assert code == 0, f"Script failed with stderr: {stderr}"
-        assert deepwork_dir.exists(), "Hook should create .deepwork directory"
-
-    def test_creates_last_work_tree_file(self, rules_hooks_dir: Path, git_repo: Path) -> None:
-        """Test that the hook creates .deepwork/.last_work_tree file."""
-        script_path = rules_hooks_dir / "user_prompt_submit.sh"
-        stdout, stderr, code = run_user_prompt_submit_hook(script_path, git_repo)
-
-        work_tree_file = git_repo / ".deepwork" / ".last_work_tree"
-        assert code == 0, f"Script failed with stderr: {stderr}"
-        assert work_tree_file.exists(), "Hook should create .last_work_tree file"
-
-    def test_captures_staged_changes(self, rules_hooks_dir: Path, git_repo: Path) -> None:
-        """Test that the hook captures staged file changes."""
-        # Create and stage a new file
-        new_file = git_repo / "new_file.py"
-        new_file.write_text("# New file\n")
-        repo = Repo(git_repo)
-        repo.index.add(["new_file.py"])
-
-        script_path = rules_hooks_dir / "user_prompt_submit.sh"
-        stdout, stderr, code = run_user_prompt_submit_hook(script_path, git_repo)
-
-        assert code == 0, f"Script failed with stderr: {stderr}"
-
-        work_tree_file = git_repo / ".deepwork" / ".last_work_tree"
-        content = work_tree_file.read_text()
-        assert "new_file.py" in content, "Staged file should be captured"
-
-    def test_captures_untracked_files(self, rules_hooks_dir: Path, git_repo: Path) -> None:
-        """Test that the hook captures untracked files."""
-        # Create an untracked file (don't stage it)
-        untracked = git_repo / "untracked.txt"
-        untracked.write_text("untracked content\n")
-
-        script_path = rules_hooks_dir / "user_prompt_submit.sh"
-        stdout, stderr, code = run_user_prompt_submit_hook(script_path, git_repo)
-
-        assert code == 0, f"Script failed with stderr: {stderr}"
-
-        work_tree_file = git_repo / ".deepwork" / ".last_work_tree"
-        content = work_tree_file.read_text()
-        # After running the hook, files are staged, so check for the file
-        assert "untracked.txt" in content, "Untracked file should be captured"
-
-
-class TestUserPromptSubmitHookJsonOutput:
-    """Tests for user_prompt_submit.sh JSON output format.
-
-    Claude Code UserPromptSubmit hooks can output:
-    - Empty output (most common for side-effect-only hooks)
-    - Valid JSON (if the hook needs to communicate something)
-
-    Either is acceptable; invalid JSON is NOT acceptable.
-    """
-
-    def test_output_is_empty_or_valid_json(self, rules_hooks_dir: Path, git_repo: Path) -> None:
-        """Test that output is either empty or valid JSON."""
-        script_path = rules_hooks_dir / "user_prompt_submit.sh"
-        stdout, stderr, code = run_user_prompt_submit_hook(script_path, git_repo)
-
-        output = stdout.strip()
-
-        if output:
-            # If there's output, it must be valid JSON
-            try:
-                result = json.loads(output)
-                assert isinstance(result, dict), "JSON output should be an object"
-            except json.JSONDecodeError as e:
-                pytest.fail(f"Output is not valid JSON: {output!r}. Error: {e}")
-
-    def test_does_not_block_prompt(self, rules_hooks_dir: Path, git_repo: Path) -> None:
-        """Test that the hook does not return a blocking response."""
-        script_path = rules_hooks_dir / "user_prompt_submit.sh"
-        stdout, stderr, code = run_user_prompt_submit_hook(script_path, git_repo)
-
-        output = stdout.strip()
-
-        if output:
-            try:
-                result = json.loads(output)
-                # UserPromptSubmit hooks should not block
-                assert result.get("decision") != "block", (
-                    "UserPromptSubmit hook should not block prompt submission"
-                )
-            except json.JSONDecodeError:
-                pass  # Empty or non-JSON output is fine
-
-
-class TestUserPromptSubmitHookIdempotence:
-    """Tests for idempotent behavior of user_prompt_submit.sh."""
-
-    def test_multiple_runs_succeed(self, rules_hooks_dir: Path, git_repo: Path) -> None:
-        """Test that the hook can be run multiple times successfully."""
-        script_path = rules_hooks_dir / "user_prompt_submit.sh"
-
-        # Run multiple times
-        for i in range(3):
-            stdout, stderr, code = run_user_prompt_submit_hook(script_path, git_repo)
-            assert code == 0, f"Run {i + 1} failed with stderr: {stderr}"
-
-    def test_updates_work_tree_on_new_changes(self, rules_hooks_dir: Path, git_repo: Path) -> None:
-        """Test that subsequent runs update the work tree state."""
-        script_path = rules_hooks_dir / "user_prompt_submit.sh"
-        repo = Repo(git_repo)
-
-        # First run - capture initial state
-        run_user_prompt_submit_hook(script_path, git_repo)
-        work_tree_file = git_repo / ".deepwork" / ".last_work_tree"
-        assert work_tree_file.exists(), "Work tree file should exist after first run"
-
-        # Create and stage a new file
-        new_file = git_repo / "another_file.py"
-        new_file.write_text("# Another file\n")
-        repo.index.add(["another_file.py"])
-
-        # Second run - should capture new file
-        run_user_prompt_submit_hook(script_path, git_repo)
-        updated_content = work_tree_file.read_text()
-
-        assert "another_file.py" in updated_content, "New file should be captured"
diff --git a/tests/unit/test_command_executor.py b/tests/unit/test_command_executor.py
deleted file mode 100644
index 12472729..00000000
--- a/tests/unit/test_command_executor.py
+++ /dev/null
@@ -1,264 +0,0 @@
-"""Tests for command executor (CMD-5.x from test_scenarios.md)."""
-
-from pathlib import Path
-
-from deepwork.core.command_executor import (
-    CommandResult,
-    all_commands_succeeded,
-    execute_command,
-    format_command_errors,
-    run_command_action,
-    substitute_command_variables,
-)
-from deepwork.core.rules_parser import CommandAction
-
-
-class TestSubstituteCommandVariables:
-    """Tests for command variable substitution."""
-
-    def test_single_file_substitution(self) -> None:
-        """Substitute {file} variable."""
-        result = substitute_command_variables(
-            "ruff format {file}",
-            file="src/main.py",
-        )
-        assert result == "ruff format src/main.py"
-
-    def test_multiple_files_substitution(self) -> None:
-        """Substitute {files} variable."""
-        result = substitute_command_variables(
-            "eslint --fix {files}",
-            files=["a.js", "b.js", "c.js"],
-        )
-        assert result == "eslint --fix a.js b.js c.js"
-
-    def test_repo_root_substitution(self) -> None:
-        """Substitute {repo_root} variable."""
-        result = substitute_command_variables(
-            "cd {repo_root} && pytest",
-            repo_root=Path("/home/user/project"),
-        )
-        assert result == "cd /home/user/project && pytest"
-
-    def test_all_variables(self) -> None:
-        """Substitute all variables together."""
-        result = substitute_command_variables(
-            "{repo_root}/scripts/process.sh {file} {files}",
-            file="main.py",
-            files=["a.py", "b.py"],
-            repo_root=Path("/project"),
-        )
-        assert result == "/project/scripts/process.sh main.py a.py b.py"
-
-
-class TestExecuteCommand:
-    """Tests for command execution."""
-
-    def test_successful_command(self) -> None:
-        """CMD-5.3.1: Exit code 0 - success."""
-        result = execute_command("echo hello")
-        assert result.success is True
-        assert result.exit_code == 0
-        assert "hello" in result.stdout
-
-    def test_failed_command(self) -> None:
-        """CMD-5.3.2: Exit code 1 - failure."""
-        result = execute_command("exit 1")
-        assert result.success is False
-        assert result.exit_code == 1
-
-    def test_command_timeout(self) -> None:
-        """CMD-5.3.3: Command timeout."""
-        result = execute_command("sleep 10", timeout=1)
-        assert result.success is False
-        assert "timed out" in result.stderr.lower()
-
-    def test_command_not_found(self) -> None:
-        """CMD-5.3.4: Command not found."""
-        result = execute_command("nonexistent_command_12345")
-        assert result.success is False
-        # Different systems return different error messages
-        assert result.exit_code != 0 or "not found" in result.stderr.lower()
-
-
-class TestRunCommandActionEachMatch:
-    """Tests for run_for: each_match mode (CMD-5.1.x)."""
-
-    def test_single_file(self) -> None:
-        """CMD-5.1.1: Single file triggers single command."""
-        action = CommandAction(command="echo {file}", run_for="each_match")
-        results = run_command_action(action, ["src/main.py"])
-
-        assert len(results) == 1
-        assert results[0].command == "echo src/main.py"
-        assert results[0].success is True
-
-    def test_multiple_files(self) -> None:
-        """CMD-5.1.2: Multiple files trigger command for each."""
-        action = CommandAction(command="echo {file}", run_for="each_match")
-        results = run_command_action(action, ["src/a.py", "src/b.py"])
-
-        assert len(results) == 2
-        assert results[0].command == "echo src/a.py"
-        assert results[1].command == "echo src/b.py"
-
-    def test_no_files(self) -> None:
-        """CMD-5.1.3: No files - no command run."""
-        action = CommandAction(command="echo {file}", run_for="each_match")
-        results = run_command_action(action, [])
-
-        assert len(results) == 0
-
-
-class TestRunCommandActionAllMatches:
-    """Tests for run_for: all_matches mode (CMD-5.2.x)."""
-
-    def test_multiple_files_single_command(self) -> None:
-        """CMD-5.2.1: Multiple files in single command."""
-        action = CommandAction(command="echo {files}", run_for="all_matches")
-        results = run_command_action(action, ["a.js", "b.js", "c.js"])
-
-        assert len(results) == 1
-        assert results[0].command == "echo a.js b.js c.js"
-        assert results[0].success is True
-
-    def test_single_file_single_command(self) -> None:
-        """CMD-5.2.2: Single file in single command."""
-        action = CommandAction(command="echo {files}", run_for="all_matches")
-        results = run_command_action(action, ["a.js"])
-
-        assert len(results) == 1
-        assert results[0].command == "echo a.js"
-
-
-class TestAllCommandsSucceeded:
-    """Tests for all_commands_succeeded helper."""
-
-    def test_all_success(self) -> None:
-        """All commands succeeded."""
-        results = [
-            CommandResult(success=True, exit_code=0, stdout="ok", stderr="", command="echo 1"),
-            CommandResult(success=True, exit_code=0, stdout="ok", stderr="", command="echo 2"),
-        ]
-        assert all_commands_succeeded(results) is True
-
-    def test_one_failure(self) -> None:
-        """One command failed."""
-        results = [
-            CommandResult(success=True, exit_code=0, stdout="ok", stderr="", command="echo 1"),
-            CommandResult(success=False, exit_code=1, stdout="", stderr="error", command="exit 1"),
-        ]
-        assert all_commands_succeeded(results) is False
-
-    def test_empty_list(self) -> None:
-        """Empty list is considered success."""
-        assert all_commands_succeeded([]) is True
-
-
-class TestFormatCommandErrors:
-    """Tests for format_command_errors helper."""
-
-    def test_single_error(self) -> None:
-        """Format single error."""
-        results = [
-            CommandResult(
-                success=False,
-                exit_code=1,
-                stdout="",
-                stderr="Something went wrong",
-                command="failing_cmd",
-            ),
-        ]
-        output = format_command_errors(results)
-        assert "Command: failing_cmd" in output
-        assert "Something went wrong" in output
-        assert "Exit code: 1" in output
-
-    def test_multiple_errors(self) -> None:
-        """Format multiple errors."""
-        results = [
-            CommandResult(success=False, exit_code=1, stdout="", stderr="Error 1", command="cmd1"),
-            CommandResult(success=False, exit_code=2, stdout="", stderr="Error 2", command="cmd2"),
-        ]
-        output = format_command_errors(results)
-        assert "cmd1" in output
-        assert "Error 1" in output
-        assert "cmd2" in output
-        assert "Error 2" in output
-
-    def test_ignores_success(self) -> None:
-        """Ignore successful commands."""
-        results = [
-            CommandResult(success=True, exit_code=0, stdout="ok", stderr="", command="good_cmd"),
-            CommandResult(success=False, exit_code=1, stdout="", stderr="bad", command="bad_cmd"),
-        ]
-        output = format_command_errors(results)
-        assert "good_cmd" not in output
-        assert "bad_cmd" in output
-
-    def test_includes_rule_name(self) -> None:
-        """Include rule name when provided."""
-        results = [
-            CommandResult(
-                success=False,
-                exit_code=1,
-                stdout="",
-                stderr="Error output",
-                command="test_cmd",
-            ),
-        ]
-        output = format_command_errors(results, rule_name="My Test Rule")
-        assert "Rule: My Test Rule" in output
-        assert "Command: test_cmd" in output
-        assert "Exit code: 1" in output
-        assert "Stderr:\nError output" in output
-
-    def test_includes_stdout(self) -> None:
-        """Include stdout when present."""
-        results = [
-            CommandResult(
-                success=False,
-                exit_code=1,
-                stdout="Standard output here",
-                stderr="Standard error here",
-                command="test_cmd",
-            ),
-        ]
-        output = format_command_errors(results)
-        assert "Stdout:\nStandard output here" in output
-        assert "Stderr:\nStandard error here" in output
-
-    def test_shows_no_output_message(self) -> None:
-        """Show '(no output)' when no stdout or stderr."""
-        results = [
-            CommandResult(
-                success=False,
-                exit_code=42,
-                stdout="",
-                stderr="",
-                command="silent_cmd",
-            ),
-        ]
-        output = format_command_errors(results)
-        assert "Command: silent_cmd" in output
-        assert "Exit code: 42" in output
-        assert "(no output)" in output
-
-    def test_full_error_format(self) -> None:
-        """Test complete error format with all fields."""
-        results = [
-            CommandResult(
-                success=False,
-                exit_code=42,
-                stdout="stdout output",
-                stderr="stderr output",
-                command="echo test && exit 42",
-            ),
-        ]
-        output = format_command_errors(results, rule_name="Command Failure Rule")
-        # Verify all parts are present in the correct format
-        assert "Rule: Command Failure Rule" in output
-        assert "Command: echo test && exit 42" in output
-        assert "Exit code: 42" in output
-        assert "Stdout:\nstdout output" in output
-        assert "Stderr:\nstderr output" in output
diff --git a/tests/unit/test_hooks_syncer.py b/tests/unit/test_hooks_syncer.py
index 99edcfdb..64cd17ce 100644
--- a/tests/unit/test_hooks_syncer.py
+++ b/tests/unit/test_hooks_syncer.py
@@ -39,11 +39,11 @@ def test_get_command_for_module(self, temp_dir: Path) -> None:
         entry = HookEntry(
             job_name="test_job",
             job_dir=job_dir,
-            module="deepwork.hooks.rules_check",
+            module="deepwork.hooks.my_hook",
         )
 
         cmd = entry.get_command(temp_dir)
-        assert cmd == "deepwork hook rules_check"
+        assert cmd == "deepwork hook my_hook"
 
 
 class TestJobHooks:
@@ -62,7 +62,7 @@ def test_from_job_dir_with_hooks(self, temp_dir: Path) -> None:
 UserPromptSubmit:
   - capture.sh
 Stop:
-  - rules_check.sh
+  - validate.sh
   - cleanup.sh
 """
         )
@@ -74,7 +74,7 @@ def test_from_job_dir_with_hooks(self, temp_dir: Path) -> None:
         assert len(result.hooks["UserPromptSubmit"]) == 1
         assert result.hooks["UserPromptSubmit"][0].script == "capture.sh"
         assert len(result.hooks["Stop"]) == 2
-        assert result.hooks["Stop"][0].script == "rules_check.sh"
+        assert result.hooks["Stop"][0].script == "validate.sh"
         assert result.hooks["Stop"][1].script == "cleanup.sh"
 
     def test_from_job_dir_with_module_hooks(self, temp_dir: Path) -> None:
@@ -90,7 +90,7 @@ def test_from_job_dir_with_module_hooks(self, temp_dir: Path) -> None:
 UserPromptSubmit:
   - capture.sh
 Stop:
-  - module: deepwork.hooks.rules_check
+  - module: deepwork.hooks.validate
 """
         )
 
@@ -98,7 +98,7 @@ def test_from_job_dir_with_module_hooks(self, temp_dir: Path) -> None:
 
         assert result is not None
         assert result.hooks["UserPromptSubmit"][0].script == "capture.sh"
-        assert result.hooks["Stop"][0].module == "deepwork.hooks.rules_check"
+        assert result.hooks["Stop"][0].module == "deepwork.hooks.validate"
         assert result.hooks["Stop"][0].script is None
 
     def test_from_job_dir_no_hooks_file(self, temp_dir: Path) -> None:
diff --git a/tests/unit/test_pattern_matcher.py b/tests/unit/test_pattern_matcher.py
deleted file mode 100644
index 69d73e7e..00000000
--- a/tests/unit/test_pattern_matcher.py
+++ /dev/null
@@ -1,205 +0,0 @@
-"""Tests for pattern matching with variable extraction."""
-
-import pytest
-
-from deepwork.core.pattern_matcher import (
-    PatternError,
-    match_pattern,
-    matches_any_pattern,
-    matches_glob,
-    resolve_pattern,
-    validate_pattern,
-)
-
-
-class TestBasicGlobPatterns:
-    """Tests for basic glob pattern matching (PM-1.1.x from test_scenarios.md)."""
-
-    def test_exact_match(self) -> None:
-        """PM-1.1.1: Exact match."""
-        assert matches_glob("README.md", "README.md")
-
-    def test_exact_no_match(self) -> None:
-        """PM-1.1.2: Exact no match (case sensitive)."""
-        assert not matches_glob("readme.md", "README.md")
-
-    def test_single_wildcard(self) -> None:
-        """PM-1.1.3: Single wildcard."""
-        assert matches_glob("main.py", "*.py")
-
-    def test_single_wildcard_nested(self) -> None:
-        """PM-1.1.4: Single wildcard - fnmatch matches nested paths too.
-
-        Note: Standard fnmatch does match across directory separators.
-        Use **/*.py pattern to explicitly require directory prefixes.
-        """
-        # fnmatch's * matches any character including /
-        # This is different from shell glob behavior
-        assert matches_glob("src/main.py", "*.py")
-
-    def test_double_wildcard(self) -> None:
-        """PM-1.1.5: Double wildcard matches nested paths."""
-        assert matches_glob("src/main.py", "**/*.py")
-
-    def test_double_wildcard_deep(self) -> None:
-        """PM-1.1.6: Double wildcard matches deeply nested paths."""
-        assert matches_glob("src/a/b/c/main.py", "**/*.py")
-
-    def test_double_wildcard_root(self) -> None:
-        """PM-1.1.7: Double wildcard matches root-level files."""
-        assert matches_glob("main.py", "**/*.py")
-
-    def test_directory_prefix(self) -> None:
-        """PM-1.1.8: Directory prefix matching."""
-        assert matches_glob("src/foo.py", "src/**/*")
-
-    def test_directory_prefix_deep(self) -> None:
-        """PM-1.1.9: Directory prefix matching deeply nested."""
-        assert matches_glob("src/a/b/c.py", "src/**/*")
-
-    def test_directory_no_match(self) -> None:
-        """PM-1.1.10: Directory prefix no match."""
-        assert not matches_glob("lib/foo.py", "src/**/*")
-
-    def test_brace_expansion_ts(self) -> None:
-        """PM-1.1.11: Brace expansion - not supported by fnmatch.
-
-        Note: Python's fnmatch doesn't support brace expansion.
-        Use matches_any_pattern with multiple patterns instead.
-        """
-        # fnmatch doesn't support {a,b} syntax
-        assert not matches_glob("app.ts", "*.{js,ts}")
-        # Use matches_any_pattern for multiple extensions
-        assert matches_any_pattern("app.ts", ["*.ts", "*.js"])
-
-    def test_brace_expansion_js(self) -> None:
-        """PM-1.1.12: Brace expansion - not supported by fnmatch."""
-        assert not matches_glob("app.js", "*.{js,ts}")
-        assert matches_any_pattern("app.js", ["*.ts", "*.js"])
-
-    def test_brace_expansion_no_match(self) -> None:
-        """PM-1.1.13: Brace expansion no match."""
-        # Neither {a,b} syntax nor multiple patterns match
-        assert not matches_glob("app.py", "*.{js,ts}")
-        assert not matches_any_pattern("app.py", ["*.ts", "*.js"])
-
-
-class TestVariablePatterns:
-    """Tests for variable pattern matching and extraction (PM-1.2.x)."""
-
-    def test_single_var_path(self) -> None:
-        """PM-1.2.1: Single variable captures nested path."""
-        result = match_pattern("src/{path}.py", "src/foo/bar.py")
-        assert result.matched
-        assert result.variables == {"path": "foo/bar"}
-
-    def test_single_var_name(self) -> None:
-        """PM-1.2.2: Single variable name (non-path)."""
-        result = match_pattern("src/{name}.py", "src/utils.py")
-        assert result.matched
-        assert result.variables == {"name": "utils"}
-
-    def test_name_no_nested(self) -> None:
-        """PM-1.2.3: {name} doesn't match nested paths (single segment)."""
-        result = match_pattern("src/{name}.py", "src/foo/bar.py")
-        # {name} only captures single segment, not nested paths
-        assert not result.matched
-
-    def test_two_variables(self) -> None:
-        """PM-1.2.4: Two variables in pattern."""
-        result = match_pattern("{dir}/{name}.py", "src/main.py")
-        assert result.matched
-        assert result.variables == {"dir": "src", "name": "main"}
-
-    def test_prefix_and_suffix(self) -> None:
-        """PM-1.2.5: Prefix and suffix around variable."""
-        result = match_pattern("test_{name}_test.py", "test_foo_test.py")
-        assert result.matched
-        assert result.variables == {"name": "foo"}
-
-    def test_nested_path_variable(self) -> None:
-        """PM-1.2.6: Nested path in middle."""
-        result = match_pattern("src/{path}/index.py", "src/a/b/index.py")
-        assert result.matched
-        assert result.variables == {"path": "a/b"}
-
-    def test_explicit_multi_segment(self) -> None:
-        """PM-1.2.7: Explicit {**mod} for multi-segment."""
-        result = match_pattern("src/{**mod}/main.py", "src/a/b/c/main.py")
-        assert result.matched
-        assert result.variables == {"mod": "a/b/c"}
-
-    def test_explicit_single_segment(self) -> None:
-        """PM-1.2.8: Explicit {*name} for single segment."""
-        result = match_pattern("src/{*name}.py", "src/utils.py")
-        assert result.matched
-        assert result.variables == {"name": "utils"}
-
-    def test_mixed_explicit(self) -> None:
-        """PM-1.2.9: Mixed explicit single and multi."""
-        result = match_pattern("{*dir}/{**path}.py", "src/a/b/c.py")
-        assert result.matched
-        assert result.variables == {"dir": "src", "path": "a/b/c"}
-
-
-class TestPatternResolution:
-    """Tests for pattern resolution / substitution (PM-1.3.x)."""
-
-    def test_simple_substitution(self) -> None:
-        """PM-1.3.1: Simple variable substitution."""
-        result = resolve_pattern("tests/{path}_test.py", {"path": "foo"})
-        assert result == "tests/foo_test.py"
-
-    def test_nested_path_substitution(self) -> None:
-        """PM-1.3.2: Nested path substitution."""
-        result = resolve_pattern("tests/{path}_test.py", {"path": "a/b/c"})
-        assert result == "tests/a/b/c_test.py"
-
-    def test_multiple_vars_substitution(self) -> None:
-        """PM-1.3.3: Multiple variables substitution."""
-        result = resolve_pattern("{dir}/test_{name}.py", {"dir": "tests", "name": "foo"})
-        assert result == "tests/test_foo.py"
-
-
-class TestPatternValidation:
-    """Tests for pattern syntax validation (SV-8.3.x)."""
-
-    def test_unclosed_brace(self) -> None:
-        """SV-8.3.1: Unclosed brace."""
-        with pytest.raises(PatternError, match="Unclosed brace|unclosed brace"):
-            validate_pattern("src/{path.py")
-
-    def test_empty_variable(self) -> None:
-        """SV-8.3.2: Empty variable name."""
-        with pytest.raises(PatternError, match="[Ee]mpty variable name"):
-            validate_pattern("src/{}.py")
-
-    def test_invalid_chars_in_var(self) -> None:
-        """SV-8.3.3: Invalid characters in variable name."""
-        with pytest.raises(PatternError, match="[Ii]nvalid"):
-            validate_pattern("src/{path/name}.py")
-
-    def test_duplicate_variable(self) -> None:
-        """SV-8.3.4: Duplicate variable name."""
-        with pytest.raises(PatternError, match="[Dd]uplicate"):
-            validate_pattern("{path}/{path}.py")
-
-
-class TestMatchesAnyPattern:
-    """Tests for matches_any_pattern function."""
-
-    def test_matches_first_pattern(self) -> None:
-        """Match against first of multiple patterns."""
-        assert matches_any_pattern("file.py", ["*.py", "*.js"])
-
-    def test_matches_second_pattern(self) -> None:
-        """Match against second of multiple patterns."""
-        assert matches_any_pattern("file.js", ["*.py", "*.js"])
-
-    def test_no_match(self) -> None:
-        """No match in any pattern."""
-        assert not matches_any_pattern("file.txt", ["*.py", "*.js"])
-
-    def test_empty_patterns(self) -> None:
-        """Empty patterns list never matches."""
-        assert not matches_any_pattern("file.py", [])
diff --git a/tests/unit/test_rules_check.py b/tests/unit/test_rules_check.py
deleted file mode 100644
index e672fd94..00000000
--- a/tests/unit/test_rules_check.py
+++ /dev/null
@@ -1,105 +0,0 @@
-"""Tests for rules_check hook module."""
-
-from deepwork.hooks.rules_check import extract_promise_tags
-
-
-class TestExtractPromiseTags:
-    """Tests for extract_promise_tags function."""
-
-    def test_extracts_simple_promise(self) -> None:
-        """Test extracting a simple promise tag."""
-        text = "I've reviewed this. <promise>Rule Name</promise>"
-        result = extract_promise_tags(text)
-        assert result == {"Rule Name"}
-
-    def test_extracts_promise_with_checkmark(self) -> None:
-        """Test extracting promise tag with checkmark prefix."""
-        text = "Done. <promise>✓ Rule Name</promise>"
-        result = extract_promise_tags(text)
-        assert result == {"Rule Name"}
-
-    def test_extracts_promise_with_checkmark_no_space(self) -> None:
-        """Test extracting promise tag with checkmark but no space."""
-        text = "<promise>✓Rule Name</promise>"
-        result = extract_promise_tags(text)
-        assert result == {"Rule Name"}
-
-    def test_extracts_multiple_promises(self) -> None:
-        """Test extracting multiple promise tags."""
-        text = """
-        <promise>Rule One</promise>
-        <promise>✓ Rule Two</promise>
-        <promise>Rule Three</promise>
-        """
-        result = extract_promise_tags(text)
-        assert result == {"Rule One", "Rule Two", "Rule Three"}
-
-    def test_case_insensitive_tag(self) -> None:
-        """Test that promise tags are case-insensitive."""
-        text = "<PROMISE>Rule Name</PROMISE>"
-        result = extract_promise_tags(text)
-        assert result == {"Rule Name"}
-
-    def test_preserves_rule_name_case(self) -> None:
-        """Test that rule name case is preserved."""
-        text = "<promise>Architecture Documentation Accuracy</promise>"
-        result = extract_promise_tags(text)
-        assert result == {"Architecture Documentation Accuracy"}
-
-    def test_handles_whitespace_in_tag(self) -> None:
-        """Test handling of whitespace around rule name."""
-        text = "<promise>  Rule Name  </promise>"
-        result = extract_promise_tags(text)
-        assert result == {"Rule Name"}
-
-    def test_handles_newlines_in_tag(self) -> None:
-        """Test handling of newlines in promise tag."""
-        text = "<promise>\n  Rule Name\n</promise>"
-        result = extract_promise_tags(text)
-        assert result == {"Rule Name"}
-
-    def test_returns_empty_set_for_no_promises(self) -> None:
-        """Test that empty set is returned when no promises exist."""
-        text = "No promises here."
-        result = extract_promise_tags(text)
-        assert result == set()
-
-    def test_handles_empty_string(self) -> None:
-        """Test handling of empty string."""
-        result = extract_promise_tags("")
-        assert result == set()
-
-    def test_real_world_command_error_promise(self) -> None:
-        """Test promise format shown in command error output."""
-        # This is the exact format shown to agents when a command rule fails
-        text = "<promise>✓ Manual Test: Infinite Block Command</promise>"
-        result = extract_promise_tags(text)
-        assert result == {"Manual Test: Infinite Block Command"}
-
-    def test_mixed_formats_in_same_text(self) -> None:
-        """Test extracting both checkmark and non-checkmark promises."""
-        text = """
-        <promise>Rule Without Checkmark</promise>
-        <promise>✓ Rule With Checkmark</promise>
-        """
-        result = extract_promise_tags(text)
-        assert result == {"Rule Without Checkmark", "Rule With Checkmark"}
-
-    def test_promise_with_special_characters_in_name(self) -> None:
-        """Test promise with special characters in rule name."""
-        text = "<promise>Source/Test Pairing</promise>"
-        result = extract_promise_tags(text)
-        assert result == {"Source/Test Pairing"}
-
-    def test_promise_embedded_in_markdown(self) -> None:
-        """Test promise tag embedded in markdown text."""
-        text = """
-        I've reviewed the documentation and it's accurate.
-
-        <promise>Architecture Documentation Accuracy</promise>
-        <promise>README Accuracy</promise>
-
-        The changes were purely cosmetic.
-        """
-        result = extract_promise_tags(text)
-        assert result == {"Architecture Documentation Accuracy", "README Accuracy"}
diff --git a/tests/unit/test_rules_parser.py b/tests/unit/test_rules_parser.py
deleted file mode 100644
index ee8a2375..00000000
--- a/tests/unit/test_rules_parser.py
+++ /dev/null
@@ -1,995 +0,0 @@
-"""Tests for rule definition parser."""
-
-from pathlib import Path
-
-from deepwork.core.pattern_matcher import matches_any_pattern as matches_pattern
-from deepwork.core.rules_parser import (
-    DetectionMode,
-    PairConfig,
-    Rule,
-    evaluate_rule,
-    evaluate_rules,
-    load_rules_from_directory,
-)
-
-
-class TestMatchesPattern:
-    """Tests for matches_pattern function."""
-
-    def test_simple_glob_match(self) -> None:
-        """Test simple glob pattern matching."""
-        assert matches_pattern("file.py", ["*.py"])
-        assert not matches_pattern("file.js", ["*.py"])
-
-    def test_directory_glob_match(self) -> None:
-        """Test directory pattern matching."""
-        assert matches_pattern("src/file.py", ["src/*"])
-        assert not matches_pattern("test/file.py", ["src/*"])
-
-    def test_recursive_glob_match(self) -> None:
-        """Test recursive ** pattern matching."""
-        assert matches_pattern("src/deep/nested/file.py", ["src/**/*.py"])
-        assert matches_pattern("src/file.py", ["src/**/*.py"])
-        assert not matches_pattern("test/file.py", ["src/**/*.py"])
-
-    def test_multiple_patterns(self) -> None:
-        """Test matching against multiple patterns."""
-        patterns = ["*.py", "*.js"]
-        assert matches_pattern("file.py", patterns)
-        assert matches_pattern("file.js", patterns)
-        assert not matches_pattern("file.txt", patterns)
-
-    def test_config_directory_pattern(self) -> None:
-        """Test pattern like app/config/**/*."""
-        assert matches_pattern("app/config/settings.py", ["app/config/**/*"])
-        assert matches_pattern("app/config/nested/deep.yml", ["app/config/**/*"])
-        assert not matches_pattern("app/other/file.py", ["app/config/**/*"])
-
-
-class TestEvaluateRule:
-    """Tests for evaluate_rule function."""
-
-    def test_fires_when_trigger_matches(self) -> None:
-        """Test rule fires when trigger matches."""
-        rule = Rule(
-            name="Test",
-            filename="test",
-            detection_mode=DetectionMode.TRIGGER_SAFETY,
-            triggers=["src/**/*.py"],
-            safety=[],
-            instructions="Check it",
-            compare_to="base",
-        )
-        changed_files = ["src/main.py", "README.md"]
-
-        result = evaluate_rule(rule, changed_files)
-        assert result.should_fire is True
-
-    def test_does_not_fire_when_no_trigger_match(self) -> None:
-        """Test rule doesn't fire when no trigger matches."""
-        rule = Rule(
-            name="Test",
-            filename="test",
-            detection_mode=DetectionMode.TRIGGER_SAFETY,
-            triggers=["src/**/*.py"],
-            safety=[],
-            instructions="Check it",
-            compare_to="base",
-        )
-        changed_files = ["test/main.py", "README.md"]
-
-        result = evaluate_rule(rule, changed_files)
-        assert result.should_fire is False
-
-    def test_does_not_fire_when_safety_matches(self) -> None:
-        """Test rule doesn't fire when safety file is also changed."""
-        rule = Rule(
-            name="Test",
-            filename="test",
-            detection_mode=DetectionMode.TRIGGER_SAFETY,
-            triggers=["app/config/**/*"],
-            safety=["docs/install_guide.md"],
-            instructions="Update docs",
-            compare_to="base",
-        )
-        changed_files = ["app/config/settings.py", "docs/install_guide.md"]
-
-        result = evaluate_rule(rule, changed_files)
-        assert result.should_fire is False
-
-    def test_fires_when_trigger_matches_but_safety_doesnt(self) -> None:
-        """Test rule fires when trigger matches but safety doesn't."""
-        rule = Rule(
-            name="Test",
-            filename="test",
-            detection_mode=DetectionMode.TRIGGER_SAFETY,
-            triggers=["app/config/**/*"],
-            safety=["docs/install_guide.md"],
-            instructions="Update docs",
-            compare_to="base",
-        )
-        changed_files = ["app/config/settings.py", "app/main.py"]
-
-        result = evaluate_rule(rule, changed_files)
-        assert result.should_fire is True
-
-    def test_multiple_safety_patterns(self) -> None:
-        """Test rule with multiple safety patterns."""
-        rule = Rule(
-            name="Test",
-            filename="test",
-            detection_mode=DetectionMode.TRIGGER_SAFETY,
-            triggers=["src/auth/**/*"],
-            safety=["SECURITY.md", "docs/security_review.md"],
-            instructions="Security review",
-            compare_to="base",
-        )
-
-        # Should not fire if any safety file is changed
-        result1 = evaluate_rule(rule, ["src/auth/login.py", "SECURITY.md"])
-        assert result1.should_fire is False
-        result2 = evaluate_rule(rule, ["src/auth/login.py", "docs/security_review.md"])
-        assert result2.should_fire is False
-
-        # Should fire if no safety files changed
-        result3 = evaluate_rule(rule, ["src/auth/login.py"])
-        assert result3.should_fire is True
-
-
-class TestEvaluateRules:
-    """Tests for evaluate_rules function."""
-
-    def test_returns_fired_rules(self) -> None:
-        """Test that evaluate_rules returns all fired rules."""
-        rules = [
-            Rule(
-                name="Rule 1",
-                filename="rule1",
-                detection_mode=DetectionMode.TRIGGER_SAFETY,
-                triggers=["src/**/*"],
-                safety=[],
-                instructions="Do 1",
-                compare_to="base",
-            ),
-            Rule(
-                name="Rule 2",
-                filename="rule2",
-                detection_mode=DetectionMode.TRIGGER_SAFETY,
-                triggers=["test/**/*"],
-                safety=[],
-                instructions="Do 2",
-                compare_to="base",
-            ),
-        ]
-        changed_files = ["src/main.py", "test/test_main.py"]
-
-        fired = evaluate_rules(rules, changed_files)
-
-        assert len(fired) == 2
-        assert fired[0].rule.name == "Rule 1"
-        assert fired[1].rule.name == "Rule 2"
-
-    def test_skips_promised_rules(self) -> None:
-        """Test that promised rules are skipped."""
-        rules = [
-            Rule(
-                name="Rule 1",
-                filename="rule1",
-                detection_mode=DetectionMode.TRIGGER_SAFETY,
-                triggers=["src/**/*"],
-                safety=[],
-                instructions="Do 1",
-                compare_to="base",
-            ),
-            Rule(
-                name="Rule 2",
-                filename="rule2",
-                detection_mode=DetectionMode.TRIGGER_SAFETY,
-                triggers=["src/**/*"],
-                safety=[],
-                instructions="Do 2",
-                compare_to="base",
-            ),
-        ]
-        changed_files = ["src/main.py"]
-        promised = {"Rule 1"}
-
-        fired = evaluate_rules(rules, changed_files, promised)
-
-        assert len(fired) == 1
-        assert fired[0].rule.name == "Rule 2"
-
-    def test_returns_empty_when_no_rules_fire(self) -> None:
-        """Test returns empty list when no rules fire."""
-        rules = [
-            Rule(
-                name="Rule 1",
-                filename="rule1",
-                detection_mode=DetectionMode.TRIGGER_SAFETY,
-                triggers=["src/**/*"],
-                safety=[],
-                instructions="Do 1",
-                compare_to="base",
-            ),
-        ]
-        changed_files = ["test/test_main.py"]
-
-        fired = evaluate_rules(rules, changed_files)
-
-        assert len(fired) == 0
-
-
-class TestLoadRulesFromDirectory:
-    """Tests for load_rules_from_directory function."""
-
-    def test_loads_rules_from_directory(self, temp_dir: Path) -> None:
-        """Test loading rules from a directory."""
-        rules_dir = temp_dir / "rules"
-        rules_dir.mkdir()
-
-        # Create a rule file
-        rule_file = rules_dir / "test-rule.md"
-        rule_file.write_text(
-            """---
-name: Test Rule
-trigger: "src/**/*"
-compare_to: base
----
-Please check the source files.
-"""
-        )
-
-        rules = load_rules_from_directory(rules_dir)
-
-        assert len(rules) == 1
-        assert rules[0].name == "Test Rule"
-        assert rules[0].triggers == ["src/**/*"]
-        assert rules[0].detection_mode == DetectionMode.TRIGGER_SAFETY
-        assert "check the source files" in rules[0].instructions
-
-    def test_loads_multiple_rules(self, temp_dir: Path) -> None:
-        """Test loading multiple rules."""
-        rules_dir = temp_dir / "rules"
-        rules_dir.mkdir()
-
-        # Create rule files
-        (rules_dir / "rule1.md").write_text(
-            """---
-name: Rule 1
-trigger: "src/**/*"
-compare_to: base
----
-Instructions for rule 1.
-"""
-        )
-        (rules_dir / "rule2.md").write_text(
-            """---
-name: Rule 2
-trigger: "test/**/*"
-compare_to: base
----
-Instructions for rule 2.
-"""
-        )
-
-        rules = load_rules_from_directory(rules_dir)
-
-        assert len(rules) == 2
-        names = {r.name for r in rules}
-        assert names == {"Rule 1", "Rule 2"}
-
-    def test_returns_empty_for_empty_directory(self, temp_dir: Path) -> None:
-        """Test that empty directory returns empty list."""
-        rules_dir = temp_dir / "rules"
-        rules_dir.mkdir()
-
-        rules = load_rules_from_directory(rules_dir)
-
-        assert rules == []
-
-    def test_returns_empty_for_nonexistent_directory(self, temp_dir: Path) -> None:
-        """Test that nonexistent directory returns empty list."""
-        rules_dir = temp_dir / "nonexistent"
-
-        rules = load_rules_from_directory(rules_dir)
-
-        assert rules == []
-
-    def test_loads_rule_with_set_detection_mode(self, temp_dir: Path) -> None:
-        """Test loading a rule with set detection mode."""
-        rules_dir = temp_dir / "rules"
-        rules_dir.mkdir()
-
-        rule_file = rules_dir / "source-test-pairing.md"
-        rule_file.write_text(
-            """---
-name: Source/Test Pairing
-set:
-  - src/{path}.py
-  - tests/{path}_test.py
-compare_to: base
----
-Source and test files should change together.
-"""
-        )
-
-        rules = load_rules_from_directory(rules_dir)
-
-        assert len(rules) == 1
-        assert rules[0].name == "Source/Test Pairing"
-        assert rules[0].detection_mode == DetectionMode.SET
-        assert rules[0].set_patterns == ["src/{path}.py", "tests/{path}_test.py"]
-
-    def test_loads_rule_with_pair_detection_mode(self, temp_dir: Path) -> None:
-        """Test loading a rule with pair detection mode."""
-        rules_dir = temp_dir / "rules"
-        rules_dir.mkdir()
-
-        rule_file = rules_dir / "api-docs.md"
-        rule_file.write_text(
-            """---
-name: API Documentation
-pair:
-  trigger: src/api/{name}.py
-  expects: docs/api/{name}.md
-compare_to: base
----
-API code requires documentation.
-"""
-        )
-
-        rules = load_rules_from_directory(rules_dir)
-
-        assert len(rules) == 1
-        assert rules[0].name == "API Documentation"
-        assert rules[0].detection_mode == DetectionMode.PAIR
-        assert rules[0].pair_config is not None
-        assert rules[0].pair_config.trigger == "src/api/{name}.py"
-        assert rules[0].pair_config.expects == ["docs/api/{name}.md"]
-
-    def test_loads_rule_with_command_action(self, temp_dir: Path) -> None:
-        """Test loading a rule with command action."""
-        rules_dir = temp_dir / "rules"
-        rules_dir.mkdir()
-
-        rule_file = rules_dir / "format-python.md"
-        rule_file.write_text(
-            """---
-name: Format Python
-trigger: "**/*.py"
-action:
-  command: "ruff format {file}"
-  run_for: each_match
-compare_to: prompt
----
-"""
-        )
-
-        rules = load_rules_from_directory(rules_dir)
-
-        assert len(rules) == 1
-        assert rules[0].name == "Format Python"
-        from deepwork.core.rules_parser import ActionType
-
-        assert rules[0].action_type == ActionType.COMMAND
-        assert rules[0].command_action is not None
-        assert rules[0].command_action.command == "ruff format {file}"
-        assert rules[0].command_action.run_for == "each_match"
-
-
-class TestCorrespondenceSets:
-    """Tests for set correspondence evaluation (CS-3.x from test_scenarios.md)."""
-
-    def test_both_changed_no_fire(self) -> None:
-        """CS-3.1.1: Both source and test changed - no fire."""
-        rule = Rule(
-            name="Source/Test Pairing",
-            filename="source-test-pairing",
-            detection_mode=DetectionMode.SET,
-            set_patterns=["src/{path}.py", "tests/{path}_test.py"],
-            instructions="Update tests",
-            compare_to="base",
-        )
-        changed_files = ["src/foo.py", "tests/foo_test.py"]
-
-        result = evaluate_rule(rule, changed_files)
-        assert result.should_fire is False
-
-    def test_only_source_fires(self) -> None:
-        """CS-3.1.2: Only source changed - fires."""
-        rule = Rule(
-            name="Source/Test Pairing",
-            filename="source-test-pairing",
-            detection_mode=DetectionMode.SET,
-            set_patterns=["src/{path}.py", "tests/{path}_test.py"],
-            instructions="Update tests",
-            compare_to="base",
-        )
-        changed_files = ["src/foo.py"]
-
-        result = evaluate_rule(rule, changed_files)
-        assert result.should_fire is True
-        assert "src/foo.py" in result.trigger_files
-        assert "tests/foo_test.py" in result.missing_files
-
-    def test_only_test_fires(self) -> None:
-        """CS-3.1.3: Only test changed - fires."""
-        rule = Rule(
-            name="Source/Test Pairing",
-            filename="source-test-pairing",
-            detection_mode=DetectionMode.SET,
-            set_patterns=["src/{path}.py", "tests/{path}_test.py"],
-            instructions="Update source",
-            compare_to="base",
-        )
-        changed_files = ["tests/foo_test.py"]
-
-        result = evaluate_rule(rule, changed_files)
-        assert result.should_fire is True
-        assert "tests/foo_test.py" in result.trigger_files
-        assert "src/foo.py" in result.missing_files
-
-    def test_nested_both_no_fire(self) -> None:
-        """CS-3.1.4: Nested paths - both changed."""
-        rule = Rule(
-            name="Source/Test Pairing",
-            filename="source-test-pairing",
-            detection_mode=DetectionMode.SET,
-            set_patterns=["src/{path}.py", "tests/{path}_test.py"],
-            instructions="Update tests",
-            compare_to="base",
-        )
-        changed_files = ["src/a/b.py", "tests/a/b_test.py"]
-
-        result = evaluate_rule(rule, changed_files)
-        assert result.should_fire is False
-
-    def test_nested_only_source_fires(self) -> None:
-        """CS-3.1.5: Nested paths - only source."""
-        rule = Rule(
-            name="Source/Test Pairing",
-            filename="source-test-pairing",
-            detection_mode=DetectionMode.SET,
-            set_patterns=["src/{path}.py", "tests/{path}_test.py"],
-            instructions="Update tests",
-            compare_to="base",
-        )
-        changed_files = ["src/a/b.py"]
-
-        result = evaluate_rule(rule, changed_files)
-        assert result.should_fire is True
-        assert "tests/a/b_test.py" in result.missing_files
-
-    def test_unrelated_file_no_fire(self) -> None:
-        """CS-3.1.6: Unrelated file - no fire."""
-        rule = Rule(
-            name="Source/Test Pairing",
-            filename="source-test-pairing",
-            detection_mode=DetectionMode.SET,
-            set_patterns=["src/{path}.py", "tests/{path}_test.py"],
-            instructions="Update tests",
-            compare_to="base",
-        )
-        changed_files = ["docs/readme.md"]
-
-        result = evaluate_rule(rule, changed_files)
-        assert result.should_fire is False
-
-    def test_source_plus_unrelated_fires(self) -> None:
-        """CS-3.1.7: Source + unrelated - fires."""
-        rule = Rule(
-            name="Source/Test Pairing",
-            filename="source-test-pairing",
-            detection_mode=DetectionMode.SET,
-            set_patterns=["src/{path}.py", "tests/{path}_test.py"],
-            instructions="Update tests",
-            compare_to="base",
-        )
-        changed_files = ["src/foo.py", "docs/readme.md"]
-
-        result = evaluate_rule(rule, changed_files)
-        assert result.should_fire is True
-
-    def test_both_plus_unrelated_no_fire(self) -> None:
-        """CS-3.1.8: Both + unrelated - no fire."""
-        rule = Rule(
-            name="Source/Test Pairing",
-            filename="source-test-pairing",
-            detection_mode=DetectionMode.SET,
-            set_patterns=["src/{path}.py", "tests/{path}_test.py"],
-            instructions="Update tests",
-            compare_to="base",
-        )
-        changed_files = ["src/foo.py", "tests/foo_test.py", "docs/readme.md"]
-
-        result = evaluate_rule(rule, changed_files)
-        assert result.should_fire is False
-
-
-class TestThreePatternSets:
-    """Tests for three-pattern set correspondence (CS-3.2.x)."""
-
-    def test_all_three_no_fire(self) -> None:
-        """CS-3.2.1: All three files changed - no fire."""
-        rule = Rule(
-            name="Model/Schema/Migration",
-            filename="model-schema-migration",
-            detection_mode=DetectionMode.SET,
-            set_patterns=[
-                "models/{name}.py",
-                "schemas/{name}.py",
-                "migrations/{name}.sql",
-            ],
-            instructions="Update all related files",
-            compare_to="base",
-        )
-        changed_files = ["models/user.py", "schemas/user.py", "migrations/user.sql"]
-
-        result = evaluate_rule(rule, changed_files)
-        assert result.should_fire is False
-
-    def test_two_of_three_fires(self) -> None:
-        """CS-3.2.2: Two of three - fires (missing migration)."""
-        rule = Rule(
-            name="Model/Schema/Migration",
-            filename="model-schema-migration",
-            detection_mode=DetectionMode.SET,
-            set_patterns=[
-                "models/{name}.py",
-                "schemas/{name}.py",
-                "migrations/{name}.sql",
-            ],
-            instructions="Update all related files",
-            compare_to="base",
-        )
-        changed_files = ["models/user.py", "schemas/user.py"]
-
-        result = evaluate_rule(rule, changed_files)
-        assert result.should_fire is True
-        assert "migrations/user.sql" in result.missing_files
-
-    def test_one_of_three_fires(self) -> None:
-        """CS-3.2.3: One of three - fires (missing 2)."""
-        rule = Rule(
-            name="Model/Schema/Migration",
-            filename="model-schema-migration",
-            detection_mode=DetectionMode.SET,
-            set_patterns=[
-                "models/{name}.py",
-                "schemas/{name}.py",
-                "migrations/{name}.sql",
-            ],
-            instructions="Update all related files",
-            compare_to="base",
-        )
-        changed_files = ["models/user.py"]
-
-        result = evaluate_rule(rule, changed_files)
-        assert result.should_fire is True
-        assert len(result.missing_files) == 2
-        assert "schemas/user.py" in result.missing_files
-        assert "migrations/user.sql" in result.missing_files
-
-    def test_different_names_fire_both(self) -> None:
-        """CS-3.2.4: Different names - both incomplete."""
-        rule = Rule(
-            name="Model/Schema/Migration",
-            filename="model-schema-migration",
-            detection_mode=DetectionMode.SET,
-            set_patterns=[
-                "models/{name}.py",
-                "schemas/{name}.py",
-                "migrations/{name}.sql",
-            ],
-            instructions="Update all related files",
-            compare_to="base",
-        )
-        changed_files = ["models/user.py", "schemas/order.py"]
-
-        result = evaluate_rule(rule, changed_files)
-        assert result.should_fire is True
-        # Both trigger because each is incomplete
-        assert (
-            "models/user.py" in result.trigger_files or "schemas/order.py" in result.trigger_files
-        )
-
-
-class TestCorrespondencePairs:
-    """Tests for pair correspondence evaluation (CP-4.x from test_scenarios.md)."""
-
-    def test_both_changed_no_fire(self) -> None:
-        """CP-4.1.1: Both trigger and expected changed - no fire."""
-        rule = Rule(
-            name="API Documentation",
-            filename="api-documentation",
-            detection_mode=DetectionMode.PAIR,
-            pair_config=PairConfig(
-                trigger="api/{path}.py",
-                expects=["docs/api/{path}.md"],
-            ),
-            instructions="Update API docs",
-            compare_to="base",
-        )
-        changed_files = ["api/users.py", "docs/api/users.md"]
-
-        result = evaluate_rule(rule, changed_files)
-        assert result.should_fire is False
-
-    def test_only_trigger_fires(self) -> None:
-        """CP-4.1.2: Only trigger changed - fires."""
-        rule = Rule(
-            name="API Documentation",
-            filename="api-documentation",
-            detection_mode=DetectionMode.PAIR,
-            pair_config=PairConfig(
-                trigger="api/{path}.py",
-                expects=["docs/api/{path}.md"],
-            ),
-            instructions="Update API docs",
-            compare_to="base",
-        )
-        changed_files = ["api/users.py"]
-
-        result = evaluate_rule(rule, changed_files)
-        assert result.should_fire is True
-        assert "api/users.py" in result.trigger_files
-        assert "docs/api/users.md" in result.missing_files
-
-    def test_only_expected_no_fire(self) -> None:
-        """CP-4.1.3: Only expected changed - no fire (directional)."""
-        rule = Rule(
-            name="API Documentation",
-            filename="api-documentation",
-            detection_mode=DetectionMode.PAIR,
-            pair_config=PairConfig(
-                trigger="api/{path}.py",
-                expects=["docs/api/{path}.md"],
-            ),
-            instructions="Update API docs",
-            compare_to="base",
-        )
-        changed_files = ["docs/api/users.md"]
-
-        result = evaluate_rule(rule, changed_files)
-        assert result.should_fire is False
-
-    def test_trigger_plus_unrelated_fires(self) -> None:
-        """CP-4.1.4: Trigger + unrelated - fires."""
-        rule = Rule(
-            name="API Documentation",
-            filename="api-documentation",
-            detection_mode=DetectionMode.PAIR,
-            pair_config=PairConfig(
-                trigger="api/{path}.py",
-                expects=["docs/api/{path}.md"],
-            ),
-            instructions="Update API docs",
-            compare_to="base",
-        )
-        changed_files = ["api/users.py", "README.md"]
-
-        result = evaluate_rule(rule, changed_files)
-        assert result.should_fire is True
-
-    def test_expected_plus_unrelated_no_fire(self) -> None:
-        """CP-4.1.5: Expected + unrelated - no fire."""
-        rule = Rule(
-            name="API Documentation",
-            filename="api-documentation",
-            detection_mode=DetectionMode.PAIR,
-            pair_config=PairConfig(
-                trigger="api/{path}.py",
-                expects=["docs/api/{path}.md"],
-            ),
-            instructions="Update API docs",
-            compare_to="base",
-        )
-        changed_files = ["docs/api/users.md", "README.md"]
-
-        result = evaluate_rule(rule, changed_files)
-        assert result.should_fire is False
-
-
-class TestMultiExpectsPairs:
-    """Tests for multi-expects pair correspondence (CP-4.2.x)."""
-
-    def test_all_three_no_fire(self) -> None:
-        """CP-4.2.1: All three changed - no fire."""
-        rule = Rule(
-            name="API Full Documentation",
-            filename="api-full-documentation",
-            detection_mode=DetectionMode.PAIR,
-            pair_config=PairConfig(
-                trigger="api/{path}.py",
-                expects=["docs/api/{path}.md", "openapi/{path}.yaml"],
-            ),
-            instructions="Update API docs and OpenAPI",
-            compare_to="base",
-        )
-        changed_files = ["api/users.py", "docs/api/users.md", "openapi/users.yaml"]
-
-        result = evaluate_rule(rule, changed_files)
-        assert result.should_fire is False
-
-    def test_trigger_plus_one_expect_fires(self) -> None:
-        """CP-4.2.2: Trigger + one expect - fires (missing openapi)."""
-        rule = Rule(
-            name="API Full Documentation",
-            filename="api-full-documentation",
-            detection_mode=DetectionMode.PAIR,
-            pair_config=PairConfig(
-                trigger="api/{path}.py",
-                expects=["docs/api/{path}.md", "openapi/{path}.yaml"],
-            ),
-            instructions="Update API docs and OpenAPI",
-            compare_to="base",
-        )
-        changed_files = ["api/users.py", "docs/api/users.md"]
-
-        result = evaluate_rule(rule, changed_files)
-        assert result.should_fire is True
-        assert "openapi/users.yaml" in result.missing_files
-
-    def test_only_trigger_fires_missing_both(self) -> None:
-        """CP-4.2.3: Only trigger - fires (missing both)."""
-        rule = Rule(
-            name="API Full Documentation",
-            filename="api-full-documentation",
-            detection_mode=DetectionMode.PAIR,
-            pair_config=PairConfig(
-                trigger="api/{path}.py",
-                expects=["docs/api/{path}.md", "openapi/{path}.yaml"],
-            ),
-            instructions="Update API docs and OpenAPI",
-            compare_to="base",
-        )
-        changed_files = ["api/users.py"]
-
-        result = evaluate_rule(rule, changed_files)
-        assert result.should_fire is True
-        assert len(result.missing_files) == 2
-        assert "docs/api/users.md" in result.missing_files
-        assert "openapi/users.yaml" in result.missing_files
-
-    def test_both_expects_only_no_fire(self) -> None:
-        """CP-4.2.4: Both expects only - no fire."""
-        rule = Rule(
-            name="API Full Documentation",
-            filename="api-full-documentation",
-            detection_mode=DetectionMode.PAIR,
-            pair_config=PairConfig(
-                trigger="api/{path}.py",
-                expects=["docs/api/{path}.md", "openapi/{path}.yaml"],
-            ),
-            instructions="Update API docs and OpenAPI",
-            compare_to="base",
-        )
-        changed_files = ["docs/api/users.md", "openapi/users.yaml"]
-
-        result = evaluate_rule(rule, changed_files)
-        assert result.should_fire is False
-
-
-class TestCreatedMode:
-    """Tests for created mode evaluation."""
-
-    def test_fires_when_created_file_matches(self) -> None:
-        """Test rule fires when a created file matches the pattern."""
-        rule = Rule(
-            name="New Module Docs",
-            filename="new-module-docs",
-            detection_mode=DetectionMode.CREATED,
-            created_patterns=["src/**/*.py"],
-            instructions="Document the new module",
-            compare_to="base",
-        )
-        created_files = ["src/new_module.py"]
-
-        result = evaluate_rule(rule, [], created_files)
-        assert result.should_fire is True
-        assert "src/new_module.py" in result.trigger_files
-
-    def test_does_not_fire_when_no_match(self) -> None:
-        """Test rule doesn't fire when no created file matches."""
-        rule = Rule(
-            name="New Module Docs",
-            filename="new-module-docs",
-            detection_mode=DetectionMode.CREATED,
-            created_patterns=["src/**/*.py"],
-            instructions="Document the new module",
-            compare_to="base",
-        )
-        created_files = ["tests/test_new.py"]
-
-        result = evaluate_rule(rule, [], created_files)
-        assert result.should_fire is False
-
-    def test_does_not_fire_for_modified_files(self) -> None:
-        """Test rule doesn't fire for modified files (only created)."""
-        rule = Rule(
-            name="New Module Docs",
-            filename="new-module-docs",
-            detection_mode=DetectionMode.CREATED,
-            created_patterns=["src/**/*.py"],
-            instructions="Document the new module",
-            compare_to="base",
-        )
-        # File is in changed_files but NOT in created_files
-        changed_files = ["src/existing_module.py"]
-        created_files: list[str] = []
-
-        result = evaluate_rule(rule, changed_files, created_files)
-        assert result.should_fire is False
-
-    def test_multiple_created_patterns(self) -> None:
-        """Test rule with multiple created patterns."""
-        rule = Rule(
-            name="New Code Standards",
-            filename="new-code-standards",
-            detection_mode=DetectionMode.CREATED,
-            created_patterns=["src/**/*.py", "lib/**/*.py"],
-            instructions="Follow code standards",
-            compare_to="base",
-        )
-
-        # Matches first pattern
-        result1 = evaluate_rule(rule, [], ["src/foo.py"])
-        assert result1.should_fire is True
-
-        # Matches second pattern
-        result2 = evaluate_rule(rule, [], ["lib/bar.py"])
-        assert result2.should_fire is True
-
-        # Matches neither
-        result3 = evaluate_rule(rule, [], ["tests/test_foo.py"])
-        assert result3.should_fire is False
-
-    def test_created_with_nested_path(self) -> None:
-        """Test created mode with nested paths."""
-        rule = Rule(
-            name="New Component",
-            filename="new-component",
-            detection_mode=DetectionMode.CREATED,
-            created_patterns=["src/components/**/*.tsx"],
-            instructions="Document the component",
-            compare_to="base",
-        )
-        created_files = ["src/components/ui/Button.tsx"]
-
-        result = evaluate_rule(rule, [], created_files)
-        assert result.should_fire is True
-        assert "src/components/ui/Button.tsx" in result.trigger_files
-
-    def test_created_mixed_with_changed(self) -> None:
-        """Test that changed_files don't affect created mode rules."""
-        rule = Rule(
-            name="New Module Docs",
-            filename="new-module-docs",
-            detection_mode=DetectionMode.CREATED,
-            created_patterns=["src/**/*.py"],
-            instructions="Document the new module",
-            compare_to="base",
-        )
-        # src/existing.py is modified (in changed_files)
-        # src/new.py is created (in created_files)
-        changed_files = ["src/existing.py", "src/new.py"]
-        created_files = ["src/new.py"]
-
-        result = evaluate_rule(rule, changed_files, created_files)
-        assert result.should_fire is True
-        # Only the created file should be in trigger_files
-        assert result.trigger_files == ["src/new.py"]
-
-    def test_evaluate_rules_with_created_mode(self) -> None:
-        """Test evaluate_rules passes created_files correctly."""
-        rules = [
-            Rule(
-                name="Trigger Rule",
-                filename="trigger-rule",
-                detection_mode=DetectionMode.TRIGGER_SAFETY,
-                triggers=["src/**/*.py"],
-                safety=[],
-                instructions="Check source",
-                compare_to="base",
-            ),
-            Rule(
-                name="Created Rule",
-                filename="created-rule",
-                detection_mode=DetectionMode.CREATED,
-                created_patterns=["src/**/*.py"],
-                instructions="Document new files",
-                compare_to="base",
-            ),
-        ]
-        # src/existing.py is modified, src/new.py is created
-        changed_files = ["src/existing.py", "src/new.py"]
-        created_files = ["src/new.py"]
-
-        results = evaluate_rules(rules, changed_files, None, created_files)
-
-        # Both rules should fire
-        assert len(results) == 2
-        rule_names = {r.rule.name for r in results}
-        assert "Trigger Rule" in rule_names
-        assert "Created Rule" in rule_names
-
-
-class TestLoadCreatedModeRule:
-    """Tests for loading rules with created detection mode."""
-
-    def test_loads_rule_with_created_detection_mode(self, temp_dir: Path) -> None:
-        """Test loading a rule with created detection mode."""
-        rules_dir = temp_dir / "rules"
-        rules_dir.mkdir()
-
-        rule_file = rules_dir / "new-module-docs.md"
-        rule_file.write_text(
-            """---
-name: New Module Documentation
-created: src/**/*.py
-compare_to: base
----
-A new Python module was created. Please add documentation.
-"""
-        )
-
-        rules = load_rules_from_directory(rules_dir)
-
-        assert len(rules) == 1
-        assert rules[0].name == "New Module Documentation"
-        assert rules[0].detection_mode == DetectionMode.CREATED
-        assert rules[0].created_patterns == ["src/**/*.py"]
-
-    def test_loads_rule_with_multiple_created_patterns(self, temp_dir: Path) -> None:
-        """Test loading a rule with multiple created patterns."""
-        rules_dir = temp_dir / "rules"
-        rules_dir.mkdir()
-
-        rule_file = rules_dir / "new-code-standards.md"
-        rule_file.write_text(
-            """---
-name: New Code Standards
-created:
-  - src/**/*.py
-  - lib/**/*.py
-compare_to: base
----
-New code must follow standards.
-"""
-        )
-
-        rules = load_rules_from_directory(rules_dir)
-
-        assert len(rules) == 1
-        assert rules[0].name == "New Code Standards"
-        assert rules[0].detection_mode == DetectionMode.CREATED
-        assert rules[0].created_patterns == ["src/**/*.py", "lib/**/*.py"]
-
-    def test_loads_created_rule_with_command_action(self, temp_dir: Path) -> None:
-        """Test loading a created mode rule with command action."""
-        rules_dir = temp_dir / "rules"
-        rules_dir.mkdir()
-
-        rule_file = rules_dir / "new-file-lint.md"
-        rule_file.write_text(
-            """---
-name: New File Lint
-created: "**/*.py"
-compare_to: base
-action:
-  command: "ruff check {file}"
-  run_for: each_match
----
-"""
-        )
-
-        rules = load_rules_from_directory(rules_dir)
-
-        assert len(rules) == 1
-        assert rules[0].name == "New File Lint"
-        assert rules[0].detection_mode == DetectionMode.CREATED
-        from deepwork.core.rules_parser import ActionType
-
-        assert rules[0].action_type == ActionType.COMMAND
-        assert rules[0].command_action is not None
-        assert rules[0].command_action.command == "ruff check {file}"
diff --git a/tests/unit/test_rules_queue.py b/tests/unit/test_rules_queue.py
deleted file mode 100644
index 8c35d06d..00000000
--- a/tests/unit/test_rules_queue.py
+++ /dev/null
@@ -1,349 +0,0 @@
-"""Tests for rules queue system (QS-6.x from test_scenarios.md)."""
-
-from pathlib import Path
-
-import pytest
-
-from deepwork.core.rules_queue import (
-    ActionResult,
-    QueueEntry,
-    QueueEntryStatus,
-    RulesQueue,
-    compute_trigger_hash,
-)
-
-
-class TestComputeTriggerHash:
-    """Tests for hash calculation (QS-6.2.x)."""
-
-    def test_same_everything_same_hash(self) -> None:
-        """QS-6.2.1: Same rule, files, baseline - same hash."""
-        hash1 = compute_trigger_hash("RuleA", ["a.py"], "commit1")
-        hash2 = compute_trigger_hash("RuleA", ["a.py"], "commit1")
-        assert hash1 == hash2
-
-    def test_different_files_different_hash(self) -> None:
-        """QS-6.2.2: Different files - different hash."""
-        hash1 = compute_trigger_hash("RuleA", ["a.py"], "commit1")
-        hash2 = compute_trigger_hash("RuleA", ["b.py"], "commit1")
-        assert hash1 != hash2
-
-    def test_different_baseline_different_hash(self) -> None:
-        """QS-6.2.3: Different baseline - different hash."""
-        hash1 = compute_trigger_hash("RuleA", ["a.py"], "commit1")
-        hash2 = compute_trigger_hash("RuleA", ["a.py"], "commit2")
-        assert hash1 != hash2
-
-    def test_different_rule_different_hash(self) -> None:
-        """QS-6.2.4: Different rule - different hash."""
-        hash1 = compute_trigger_hash("RuleA", ["a.py"], "commit1")
-        hash2 = compute_trigger_hash("RuleB", ["a.py"], "commit1")
-        assert hash1 != hash2
-
-    def test_file_order_independent(self) -> None:
-        """File order should not affect hash (sorted internally)."""
-        hash1 = compute_trigger_hash("RuleA", ["a.py", "b.py"], "commit1")
-        hash2 = compute_trigger_hash("RuleA", ["b.py", "a.py"], "commit1")
-        assert hash1 == hash2
-
-
-class TestQueueEntry:
-    """Tests for QueueEntry dataclass."""
-
-    def test_to_dict_and_from_dict(self) -> None:
-        """Round-trip serialization."""
-        entry = QueueEntry(
-            rule_name="Test Rule",
-            rule_file="test-rule.md",
-            trigger_hash="abc123",
-            status=QueueEntryStatus.QUEUED,
-            baseline_ref="commit1",
-            trigger_files=["src/main.py"],
-            expected_files=["tests/main_test.py"],
-        )
-
-        data = entry.to_dict()
-        restored = QueueEntry.from_dict(data)
-
-        assert restored.rule_name == entry.rule_name
-        assert restored.rule_file == entry.rule_file
-        assert restored.trigger_hash == entry.trigger_hash
-        assert restored.status == entry.status
-        assert restored.trigger_files == entry.trigger_files
-        assert restored.expected_files == entry.expected_files
-
-    def test_with_action_result(self) -> None:
-        """Serialization with action result."""
-        entry = QueueEntry(
-            rule_name="Test Rule",
-            rule_file="test-rule.md",
-            trigger_hash="abc123",
-            action_result=ActionResult(type="command", output="ok", exit_code=0),
-        )
-
-        data = entry.to_dict()
-        restored = QueueEntry.from_dict(data)
-
-        assert restored.action_result is not None
-        assert restored.action_result.type == "command"
-        assert restored.action_result.exit_code == 0
-
-
-class TestRulesQueue:
-    """Tests for RulesQueue class (QS-6.1.x, QS-6.3.x)."""
-
-    @pytest.fixture
-    def queue(self, tmp_path: Path) -> RulesQueue:
-        """Create a queue with temp directory."""
-        return RulesQueue(tmp_path / "queue")
-
-    def test_create_entry(self, queue: RulesQueue) -> None:
-        """QS-6.1.1: Create new queue entry."""
-        entry = queue.create_entry(
-            rule_name="Test Rule",
-            rule_file="test-rule.md",
-            trigger_files=["src/main.py"],
-            baseline_ref="commit1",
-        )
-
-        assert entry is not None
-        assert entry.status == QueueEntryStatus.QUEUED
-        assert entry.rule_name == "Test Rule"
-
-    def test_create_duplicate_returns_none(self, queue: RulesQueue) -> None:
-        """QS-6.1.6: Re-trigger same files returns None."""
-        entry1 = queue.create_entry(
-            rule_name="Test Rule",
-            rule_file="test-rule.md",
-            trigger_files=["src/main.py"],
-            baseline_ref="commit1",
-        )
-        entry2 = queue.create_entry(
-            rule_name="Test Rule",
-            rule_file="test-rule.md",
-            trigger_files=["src/main.py"],
-            baseline_ref="commit1",
-        )
-
-        assert entry1 is not None
-        assert entry2 is None  # Duplicate
-
-    def test_create_different_files_new_entry(self, queue: RulesQueue) -> None:
-        """QS-6.1.7: Different files create new entry."""
-        entry1 = queue.create_entry(
-            rule_name="Test Rule",
-            rule_file="test-rule.md",
-            trigger_files=["src/a.py"],
-            baseline_ref="commit1",
-        )
-        entry2 = queue.create_entry(
-            rule_name="Test Rule",
-            rule_file="test-rule.md",
-            trigger_files=["src/b.py"],  # Different file
-            baseline_ref="commit1",
-        )
-
-        assert entry1 is not None
-        assert entry2 is not None
-
-    def test_has_entry(self, queue: RulesQueue) -> None:
-        """Check if entry exists."""
-        entry = queue.create_entry(
-            rule_name="Test Rule",
-            rule_file="test-rule.md",
-            trigger_files=["src/main.py"],
-            baseline_ref="commit1",
-        )
-        assert entry is not None
-
-        assert queue.has_entry(entry.trigger_hash) is True
-        assert queue.has_entry("nonexistent") is False
-
-    def test_get_entry(self, queue: RulesQueue) -> None:
-        """Retrieve entry by hash."""
-        entry = queue.create_entry(
-            rule_name="Test Rule",
-            rule_file="test-rule.md",
-            trigger_files=["src/main.py"],
-            baseline_ref="commit1",
-        )
-        assert entry is not None
-
-        retrieved = queue.get_entry(entry.trigger_hash)
-        assert retrieved is not None
-        assert retrieved.rule_name == "Test Rule"
-
-    def test_get_nonexistent_entry(self, queue: RulesQueue) -> None:
-        """Get nonexistent entry returns None."""
-        assert queue.get_entry("nonexistent") is None
-
-    def test_update_status_to_passed(self, queue: RulesQueue) -> None:
-        """QS-6.1.3: Update status to passed."""
-        entry = queue.create_entry(
-            rule_name="Test Rule",
-            rule_file="test-rule.md",
-            trigger_files=["src/main.py"],
-            baseline_ref="commit1",
-        )
-        assert entry is not None
-
-        success = queue.update_status(entry.trigger_hash, QueueEntryStatus.PASSED)
-        assert success is True
-
-        updated = queue.get_entry(entry.trigger_hash)
-        assert updated is not None
-        assert updated.status == QueueEntryStatus.PASSED
-        assert updated.evaluated_at is not None
-
-    def test_update_status_to_failed(self, queue: RulesQueue) -> None:
-        """QS-6.1.5: Update status to failed."""
-        entry = queue.create_entry(
-            rule_name="Test Rule",
-            rule_file="test-rule.md",
-            trigger_files=["src/main.py"],
-            baseline_ref="commit1",
-        )
-        assert entry is not None
-
-        action_result = ActionResult(type="command", output="error", exit_code=1)
-        success = queue.update_status(entry.trigger_hash, QueueEntryStatus.FAILED, action_result)
-        assert success is True
-
-        updated = queue.get_entry(entry.trigger_hash)
-        assert updated is not None
-        assert updated.status == QueueEntryStatus.FAILED
-        assert updated.action_result is not None
-        assert updated.action_result.exit_code == 1
-
-    def test_update_status_to_skipped(self, queue: RulesQueue) -> None:
-        """QS-6.1.2: Update status to skipped (safety suppression)."""
-        entry = queue.create_entry(
-            rule_name="Test Rule",
-            rule_file="test-rule.md",
-            trigger_files=["src/main.py"],
-            baseline_ref="commit1",
-        )
-        assert entry is not None
-
-        success = queue.update_status(entry.trigger_hash, QueueEntryStatus.SKIPPED)
-        assert success is True
-
-        updated = queue.get_entry(entry.trigger_hash)
-        assert updated is not None
-        assert updated.status == QueueEntryStatus.SKIPPED
-
-    def test_update_nonexistent_returns_false(self, queue: RulesQueue) -> None:
-        """Update nonexistent entry returns False."""
-        success = queue.update_status("nonexistent", QueueEntryStatus.PASSED)
-        assert success is False
-
-    def test_get_queued_entries(self, queue: RulesQueue) -> None:
-        """Get only queued entries."""
-        # Create multiple entries with different statuses
-        entry1 = queue.create_entry(
-            rule_name="Rule 1",
-            rule_file="rule1.md",
-            trigger_files=["a.py"],
-            baseline_ref="commit1",
-        )
-        entry2 = queue.create_entry(
-            rule_name="Rule 2",
-            rule_file="rule2.md",
-            trigger_files=["b.py"],
-            baseline_ref="commit1",
-        )
-        assert entry1 is not None
-        assert entry2 is not None
-
-        # Update one to passed
-        queue.update_status(entry1.trigger_hash, QueueEntryStatus.PASSED)
-
-        # Get queued only
-        queued = queue.get_queued_entries()
-        assert len(queued) == 1
-        assert queued[0].rule_name == "Rule 2"
-
-    def test_get_all_entries(self, queue: RulesQueue) -> None:
-        """Get all entries regardless of status."""
-        entry1 = queue.create_entry(
-            rule_name="Rule 1",
-            rule_file="rule1.md",
-            trigger_files=["a.py"],
-            baseline_ref="commit1",
-        )
-        entry2 = queue.create_entry(
-            rule_name="Rule 2",
-            rule_file="rule2.md",
-            trigger_files=["b.py"],
-            baseline_ref="commit1",
-        )
-        assert entry1 is not None
-        assert entry2 is not None
-
-        queue.update_status(entry1.trigger_hash, QueueEntryStatus.PASSED)
-
-        all_entries = queue.get_all_entries()
-        assert len(all_entries) == 2
-
-    def test_remove_entry(self, queue: RulesQueue) -> None:
-        """Remove entry by hash."""
-        entry = queue.create_entry(
-            rule_name="Test Rule",
-            rule_file="test-rule.md",
-            trigger_files=["src/main.py"],
-            baseline_ref="commit1",
-        )
-        assert entry is not None
-
-        removed = queue.remove_entry(entry.trigger_hash)
-        assert removed is True
-        assert queue.has_entry(entry.trigger_hash) is False
-
-    def test_remove_nonexistent_returns_false(self, queue: RulesQueue) -> None:
-        """Remove nonexistent entry returns False."""
-        removed = queue.remove_entry("nonexistent")
-        assert removed is False
-
-    def test_clear(self, queue: RulesQueue) -> None:
-        """Clear all entries."""
-        queue.create_entry(
-            rule_name="Rule 1",
-            rule_file="rule1.md",
-            trigger_files=["a.py"],
-            baseline_ref="commit1",
-        )
-        queue.create_entry(
-            rule_name="Rule 2",
-            rule_file="rule2.md",
-            trigger_files=["b.py"],
-            baseline_ref="commit1",
-        )
-
-        count = queue.clear()
-        assert count == 2
-        assert len(queue.get_all_entries()) == 0
-
-    def test_clear_empty_queue(self, queue: RulesQueue) -> None:
-        """Clear empty queue returns 0."""
-        count = queue.clear()
-        assert count == 0
-
-    def test_file_structure(self, queue: RulesQueue) -> None:
-        """Verify queue files are named correctly."""
-        entry = queue.create_entry(
-            rule_name="Test Rule",
-            rule_file="test-rule.md",
-            trigger_files=["src/main.py"],
-            baseline_ref="commit1",
-        )
-        assert entry is not None
-
-        # Check file exists with correct naming
-        expected_file = queue.queue_dir / f"{entry.trigger_hash}.queued.json"
-        assert expected_file.exists()
-
-        # Update status and check file renamed
-        queue.update_status(entry.trigger_hash, QueueEntryStatus.PASSED)
-        assert not expected_file.exists()
-        passed_file = queue.queue_dir / f"{entry.trigger_hash}.passed.json"
-        assert passed_file.exists()
diff --git a/tests/unit/test_schema_validation.py b/tests/unit/test_schema_validation.py
deleted file mode 100644
index c77fc7a0..00000000
--- a/tests/unit/test_schema_validation.py
+++ /dev/null
@@ -1,360 +0,0 @@
-"""Tests for schema validation (SV-8.x from test_scenarios.md)."""
-
-from pathlib import Path
-
-import pytest
-
-from deepwork.core.rules_parser import RulesParseError, parse_rule_file
-
-
-class TestRequiredFields:
-    """Tests for required field validation (SV-8.1.x)."""
-
-    def test_missing_name(self, tmp_path: Path) -> None:
-        """SV-8.1.1: Missing name field."""
-        rule_file = tmp_path / "test.md"
-        rule_file.write_text(
-            """---
-trigger: "src/**/*"
-compare_to: base
----
-Instructions here.
-"""
-        )
-
-        with pytest.raises(RulesParseError, match="name"):
-            parse_rule_file(rule_file)
-
-    def test_missing_detection_mode(self, tmp_path: Path) -> None:
-        """SV-8.1.2: Missing trigger, set, or pair."""
-        rule_file = tmp_path / "test.md"
-        rule_file.write_text(
-            """---
-name: Test Rule
-compare_to: base
----
-Instructions here.
-"""
-        )
-
-        with pytest.raises(RulesParseError):
-            parse_rule_file(rule_file)
-
-    def test_missing_compare_to(self, tmp_path: Path) -> None:
-        """SV-8.1.5: Missing compare_to field."""
-        rule_file = tmp_path / "test.md"
-        rule_file.write_text(
-            """---
-name: Test Rule
-trigger: "src/**/*"
----
-Instructions here.
-"""
-        )
-
-        with pytest.raises(RulesParseError, match="compare_to"):
-            parse_rule_file(rule_file)
-
-    def test_missing_markdown_body(self, tmp_path: Path) -> None:
-        """SV-8.1.3: Missing markdown body (for prompt action)."""
-        rule_file = tmp_path / "test.md"
-        rule_file.write_text(
-            """---
-name: Test Rule
-trigger: "src/**/*"
-compare_to: base
----
-"""
-        )
-
-        with pytest.raises(RulesParseError, match="markdown body|instructions"):
-            parse_rule_file(rule_file)
-
-    def test_set_requires_two_patterns(self, tmp_path: Path) -> None:
-        """SV-8.1.4: Set requires at least 2 patterns.
-
-        Note: Schema validation catches this before rule parser.
-        """
-        rule_file = tmp_path / "test.md"
-        rule_file.write_text(
-            """---
-name: Test Rule
-set:
-  - src/{path}.py
-compare_to: base
----
-Instructions here.
-"""
-        )
-
-        # Schema validation will fail due to minItems: 2
-        with pytest.raises(RulesParseError):
-            parse_rule_file(rule_file)
-
-
-class TestMutuallyExclusiveFields:
-    """Tests for mutually exclusive field validation (SV-8.2.x)."""
-
-    def test_both_trigger_and_set(self, tmp_path: Path) -> None:
-        """SV-8.2.1: Both trigger and set is invalid."""
-        rule_file = tmp_path / "test.md"
-        rule_file.write_text(
-            """---
-name: Test Rule
-trigger: "src/**/*"
-set:
-  - src/{path}.py
-  - tests/{path}_test.py
-compare_to: base
----
-Instructions here.
-"""
-        )
-
-        with pytest.raises(RulesParseError):
-            parse_rule_file(rule_file)
-
-    def test_both_trigger_and_pair(self, tmp_path: Path) -> None:
-        """SV-8.2.2: Both trigger and pair is invalid."""
-        rule_file = tmp_path / "test.md"
-        rule_file.write_text(
-            """---
-name: Test Rule
-trigger: "src/**/*"
-pair:
-  trigger: api/{path}.py
-  expects: docs/{path}.md
-compare_to: base
----
-Instructions here.
-"""
-        )
-
-        with pytest.raises(RulesParseError):
-            parse_rule_file(rule_file)
-
-    def test_all_detection_modes(self, tmp_path: Path) -> None:
-        """SV-8.2.3: All three detection modes is invalid."""
-        rule_file = tmp_path / "test.md"
-        rule_file.write_text(
-            """---
-name: Test Rule
-trigger: "src/**/*"
-set:
-  - src/{path}.py
-  - tests/{path}_test.py
-pair:
-  trigger: api/{path}.py
-  expects: docs/{path}.md
-compare_to: base
----
-Instructions here.
-"""
-        )
-
-        with pytest.raises(RulesParseError):
-            parse_rule_file(rule_file)
-
-
-class TestValueValidation:
-    """Tests for value validation (SV-8.4.x)."""
-
-    def test_invalid_compare_to(self, tmp_path: Path) -> None:
-        """SV-8.4.1: Invalid compare_to value."""
-        rule_file = tmp_path / "test.md"
-        rule_file.write_text(
-            """---
-name: Test Rule
-trigger: "src/**/*"
-compare_to: yesterday
----
-Instructions here.
-"""
-        )
-
-        with pytest.raises(RulesParseError):
-            parse_rule_file(rule_file)
-
-    def test_invalid_run_for(self, tmp_path: Path) -> None:
-        """SV-8.4.2: Invalid run_for value."""
-        rule_file = tmp_path / "test.md"
-        rule_file.write_text(
-            """---
-name: Test Rule
-trigger: "**/*.py"
-action:
-  command: "ruff format {file}"
-  run_for: first_match
-compare_to: prompt
----
-"""
-        )
-
-        with pytest.raises(RulesParseError):
-            parse_rule_file(rule_file)
-
-
-class TestValidRules:
-    """Tests for valid rule parsing."""
-
-    def test_valid_trigger_safety_rule(self, tmp_path: Path) -> None:
-        """Valid trigger/safety rule parses successfully."""
-        rule_file = tmp_path / "test.md"
-        rule_file.write_text(
-            """---
-name: Test Rule
-trigger: "src/**/*"
-safety: README.md
-compare_to: base
----
-Please check the code.
-"""
-        )
-
-        rule = parse_rule_file(rule_file)
-        assert rule.name == "Test Rule"
-        assert rule.triggers == ["src/**/*"]
-        assert rule.safety == ["README.md"]
-        assert rule.compare_to == "base"
-
-    def test_valid_set_rule(self, tmp_path: Path) -> None:
-        """Valid set rule parses successfully."""
-        rule_file = tmp_path / "test.md"
-        rule_file.write_text(
-            """---
-name: Source/Test Pairing
-set:
-  - src/{path}.py
-  - tests/{path}_test.py
-compare_to: base
----
-Source and test should change together.
-"""
-        )
-
-        rule = parse_rule_file(rule_file)
-        assert rule.name == "Source/Test Pairing"
-        assert len(rule.set_patterns) == 2
-        assert rule.compare_to == "base"
-
-    def test_valid_pair_rule(self, tmp_path: Path) -> None:
-        """Valid pair rule parses successfully."""
-        rule_file = tmp_path / "test.md"
-        rule_file.write_text(
-            """---
-name: API Documentation
-pair:
-  trigger: api/{module}.py
-  expects: docs/api/{module}.md
-compare_to: base
----
-API changes need documentation.
-"""
-        )
-
-        rule = parse_rule_file(rule_file)
-        assert rule.name == "API Documentation"
-        assert rule.pair_config is not None
-        assert rule.pair_config.trigger == "api/{module}.py"
-        assert rule.pair_config.expects == ["docs/api/{module}.md"]
-        assert rule.compare_to == "base"
-
-    def test_valid_command_rule(self, tmp_path: Path) -> None:
-        """Valid command rule parses successfully."""
-        rule_file = tmp_path / "test.md"
-        rule_file.write_text(
-            """---
-name: Format Python
-trigger: "**/*.py"
-action:
-  command: "ruff format {file}"
-  run_for: each_match
-compare_to: prompt
----
-"""
-        )
-
-        rule = parse_rule_file(rule_file)
-        assert rule.name == "Format Python"
-        assert rule.command_action is not None
-        assert rule.command_action.command == "ruff format {file}"
-        assert rule.command_action.run_for == "each_match"
-        assert rule.compare_to == "prompt"
-
-    def test_valid_compare_to_values(self, tmp_path: Path) -> None:
-        """Valid compare_to values parse successfully."""
-        for compare_to in ["base", "default_tip", "prompt"]:
-            rule_file = tmp_path / "test.md"
-            rule_file.write_text(
-                f"""---
-name: Test Rule
-trigger: "src/**/*"
-compare_to: {compare_to}
----
-Instructions here.
-"""
-            )
-
-            rule = parse_rule_file(rule_file)
-            assert rule.compare_to == compare_to
-
-    def test_multiple_triggers(self, tmp_path: Path) -> None:
-        """Multiple triggers as array parses successfully."""
-        rule_file = tmp_path / "test.md"
-        rule_file.write_text(
-            """---
-name: Test Rule
-trigger:
-  - src/**/*.py
-  - lib/**/*.py
-compare_to: base
----
-Instructions here.
-"""
-        )
-
-        rule = parse_rule_file(rule_file)
-        assert rule.triggers == ["src/**/*.py", "lib/**/*.py"]
-        assert rule.compare_to == "base"
-
-    def test_multiple_safety_patterns(self, tmp_path: Path) -> None:
-        """Multiple safety patterns as array parses successfully."""
-        rule_file = tmp_path / "test.md"
-        rule_file.write_text(
-            """---
-name: Test Rule
-trigger: src/**/*
-safety:
-  - README.md
-  - CHANGELOG.md
-compare_to: base
----
-Instructions here.
-"""
-        )
-
-        rule = parse_rule_file(rule_file)
-        assert rule.safety == ["README.md", "CHANGELOG.md"]
-        assert rule.compare_to == "base"
-
-    def test_multiple_expects(self, tmp_path: Path) -> None:
-        """Multiple expects patterns parses successfully."""
-        rule_file = tmp_path / "test.md"
-        rule_file.write_text(
-            """---
-name: Test Rule
-pair:
-  trigger: api/{module}.py
-  expects:
-    - docs/api/{module}.md
-    - openapi/{module}.yaml
-compare_to: base
----
-Instructions here.
-"""
-        )
-
-        rule = parse_rule_file(rule_file)
-        assert rule.pair_config is not None
-        assert rule.pair_config.expects == ["docs/api/{module}.md", "openapi/{module}.yaml"]
-        assert rule.compare_to == "base"

From 26a9911695e5b3096b3417aa34e87abaf6e9facb Mon Sep 17 00:00:00 2001
From: Noah Horton <noah@unsupervised.com>
Date: Tue, 3 Feb 2026 13:05:51 -0700
Subject: [PATCH 02/45] Port theoretically done

---
 .claude/settings.json                         |   14 +-
 .claude/skills/deepwork/SKILL.md              |  141 ++
 doc/architecture.md                           |  237 ++-
 pyproject.toml                                |    3 +
 src/deepwork/cli/install.py                   |   10 +-
 src/deepwork/cli/main.py                      |    2 +
 src/deepwork/cli/serve.py                     |  139 ++
 src/deepwork/cli/sync.py                      |   14 +-
 src/deepwork/core/adapters.py                 |   60 +
 src/deepwork/core/generator.py                |   58 +
 src/deepwork/mcp/__init__.py                  |   23 +
 src/deepwork/mcp/quality_gate.py              |  275 ++++
 src/deepwork/mcp/schemas.py                   |  225 +++
 src/deepwork/mcp/server.py                    |  157 ++
 src/deepwork/mcp/state.py                     |  330 +++++
 src/deepwork/mcp/tools.py                     |  385 +++++
 .../templates/claude/skill-deepwork.md.jinja  |  149 ++
 tests/unit/mcp/__init__.py                    |    1 +
 tests/unit/mcp/test_quality_gate.py           |  183 +++
 tests/unit/mcp/test_schemas.py                |  359 +++++
 tests/unit/mcp/test_state.py                  |  287 ++++
 tests/unit/mcp/test_tools.py                  |  310 ++++
 uv.lock                                       | 1275 ++++++++++++++++-
 23 files changed, 4627 insertions(+), 10 deletions(-)
 create mode 100644 .claude/skills/deepwork/SKILL.md
 create mode 100644 src/deepwork/cli/serve.py
 create mode 100644 src/deepwork/mcp/__init__.py
 create mode 100644 src/deepwork/mcp/quality_gate.py
 create mode 100644 src/deepwork/mcp/schemas.py
 create mode 100644 src/deepwork/mcp/server.py
 create mode 100644 src/deepwork/mcp/state.py
 create mode 100644 src/deepwork/mcp/tools.py
 create mode 100644 src/deepwork/templates/claude/skill-deepwork.md.jinja
 create mode 100644 tests/unit/mcp/__init__.py
 create mode 100644 tests/unit/mcp/test_quality_gate.py
 create mode 100644 tests/unit/mcp/test_schemas.py
 create mode 100644 tests/unit/mcp/test_state.py
 create mode 100644 tests/unit/mcp/test_tools.py

diff --git a/.claude/settings.json b/.claude/settings.json
index bb150fb3..cfc707c0 100644
--- a/.claude/settings.json
+++ b/.claude/settings.json
@@ -121,7 +121,8 @@
       "Bash(deepwork:*)",
       "Bash(.claude/hooks/commit_job_git_commit.sh:*)",
       "Bash(./.deepwork/jobs/deepwork_jobs/make_new_job.sh:*)",
-      "WebSearch"
+      "WebSearch",
+      "Skill(deepwork)"
     ]
   },
   "hooks": {
@@ -147,5 +148,16 @@
         ]
       }
     ]
+  },
+  "mcpServers": {
+    "deepwork": {
+      "command": "deepwork",
+      "args": [
+        "serve",
+        "--path",
+        "."
+      ],
+      "transport": "stdio"
+    }
   }
 }
\ No newline at end of file
diff --git a/.claude/skills/deepwork/SKILL.md b/.claude/skills/deepwork/SKILL.md
new file mode 100644
index 00000000..3b1a9267
--- /dev/null
+++ b/.claude/skills/deepwork/SKILL.md
@@ -0,0 +1,141 @@
+---
+name: deepwork
+description: "Start or continue DeepWork workflows using MCP tools"
+---
+
+# DeepWork Workflow Manager
+
+Execute multi-step workflows with quality gate checkpoints.
+
+> **IMPORTANT**: This skill uses the DeepWork MCP server. All workflow operations
+> are performed through MCP tool calls, not by reading instructions from files.
+
+## Quick Start
+
+1. **Discover workflows**: Call `get_workflows` to see available options
+2. **Start a workflow**: Call `start_workflow` with your goal
+3. **Execute steps**: Follow the instructions returned
+4. **Checkpoint**: Call `finished_step` with your outputs
+5. **Iterate or continue**: Handle `needs_work`, `next_step`, or `workflow_complete`
+
+## MCP Tools Reference
+
+### get_workflows
+
+Lists all available workflows in this project.
+
+```
+Tool: deepwork.get_workflows
+Parameters: none
+```
+
+Returns jobs with their workflows, steps, and summaries.
+
+### start_workflow
+
+Begins a new workflow session.
+
+```
+Tool: deepwork.start_workflow
+Parameters:
+  - goal: string (required) - What you want to accomplish
+  - job_name: string (required) - Name of the job
+  - workflow_name: string (required) - Name of the workflow
+  - instance_id: string (optional) - Identifier like "acme" or "q1-2026"
+```
+
+Returns session ID, branch name, and first step instructions.
+
+### finished_step
+
+Reports completion of the current step.
+
+```
+Tool: deepwork.finished_step
+Parameters:
+  - outputs: list[string] (required) - File paths of created outputs
+  - notes: string (optional) - Notes about what was done
+```
+
+Returns one of:
+- `needs_work`: Quality criteria not met; fix and retry
+- `next_step`: Proceed to next step with new instructions
+- `workflow_complete`: All steps done; workflow finished
+
+## Execution Flow
+
+```
+User: /deepwork [intent]
+     │
+     ▼
+┌─────────────────┐
+│ get_workflows   │ ◄── Discover available workflows
+└────────┬────────┘
+         │
+         ▼
+┌─────────────────┐
+│ Parse intent    │ ◄── Match user intent to workflow
+└────────┬────────┘
+         │
+         ▼
+┌─────────────────┐
+│ start_workflow  │ ◄── Begin session, get first step
+└────────┬────────┘
+         │
+         ▼
+┌─────────────────┐
+│ Execute step    │ ◄── Follow step instructions
+│ Create outputs  │
+└────────┬────────┘
+         │
+         ▼
+┌─────────────────┐
+│ finished_step   │ ◄── Report completion
+└────────┬────────┘
+         │
+    ┌────┴────┐
+    │         │
+needs_work  next_step ─────► Loop back to "Execute step"
+    │         │
+    │    workflow_complete
+    │         │
+    ▼         ▼
+┌─────────────────┐
+│ Fix issues and  │      Done!
+│ retry           │
+└─────────────────┘
+```
+
+## Intent Parsing
+
+When the user invokes `/deepwork`, parse their intent:
+
+1. **Explicit workflow**: `/deepwork new_job` → start `new_job` workflow
+2. **General request**: `/deepwork I want to create a new workflow` → infer best match
+3. **No context**: `/deepwork` alone → call `get_workflows` and ask user to choose
+
+## Quality Gates
+
+Steps may have quality criteria. When you call `finished_step`:
+
+1. Outputs are evaluated against criteria
+2. If any fail → `needs_work` status with feedback
+3. Fix issues based on feedback
+4. Call `finished_step` again
+5. After passing → proceed to next step
+
+## Git Workflow
+
+DeepWork creates branches for workflow instances:
+- Format: `deepwork/{job_name}-{workflow_name}-{instance_id or date}`
+- Example: `deepwork/competitive_research-full_analysis-acme`
+
+Commit work as you go. Create PR when workflow completes.
+
+## Guardrails
+
+- Always use MCP tools; never manually read step instruction files
+- Create ALL expected outputs before calling `finished_step`
+- Read quality gate feedback carefully before retrying
+- Don't skip steps unless user explicitly requests it
+- Ask for clarification when user intent is ambiguous
\ No newline at end of file
diff --git a/doc/architecture.md b/doc/architecture.md
index d08cf808..aad03028 100644
--- a/doc/architecture.md
+++ b/doc/architecture.md
@@ -18,11 +18,12 @@ DeepWork is a framework for enabling AI agents to perform complex, multi-step wo
 
 ## Architecture Overview
 
-This document is organized into three major sections:
+This document is organized into four major sections:
 
 1. **[DeepWork Tool Architecture](#part-1-deepwork-tool-architecture)** - The DeepWork repository/codebase itself and how it works
 2. **[Target Project Architecture](#part-2-target-project-architecture)** - What a project looks like after DeepWork is installed
 3. **[Runtime Execution Model](#part-3-runtime-execution-model)** - How AI agents execute jobs using the installed skills
+4. **[MCP Server Architecture](#part-4-mcp-server-architecture)** - The MCP server for checkpoint-based workflow execution
 
 ---
 
@@ -40,7 +41,8 @@ deepwork/                       # DeepWork tool repository
 │       │   ├── __init__.py
 │       │   ├── main.py         # CLI entry point
 │       │   ├── install.py      # Install command
-│       │   └── sync.py         # Sync command
+│       │   ├── sync.py         # Sync command
+│       │   └── serve.py        # MCP server command
 │       ├── core/
 │       │   ├── adapters.py     # Agent adapters for AI platforms
 │       │   ├── detector.py     # AI platform detection
@@ -48,6 +50,13 @@ deepwork/                       # DeepWork tool repository
 │       │   ├── parser.py       # Job definition parsing
 │       │   ├── doc_spec_parser.py   # Doc spec parsing
 │       │   └── hooks_syncer.py     # Hook syncing to platforms
+│       ├── mcp/                # MCP server module
+│       │   ├── __init__.py
+│       │   ├── server.py       # FastMCP server definition
+│       │   ├── tools.py        # MCP tool implementations
+│       │   ├── state.py        # Workflow session state management
+│       │   ├── schemas.py      # Pydantic models for I/O
+│       │   └── quality_gate.py # Quality gate with review agent
 │       ├── hooks/              # Hook system and cross-platform wrappers
 │       │   ├── __init__.py
 │       │   ├── wrapper.py           # Cross-platform input/output normalization
@@ -55,7 +64,8 @@ deepwork/                       # DeepWork tool repository
 │       │   └── gemini_hook.sh       # Shell wrapper for Gemini CLI
 │       ├── templates/          # Skill templates for each platform
 │       │   ├── claude/
-│       │   │   └── skill-job-step.md.jinja
+│       │   │   ├── skill-job-step.md.jinja
+│       │   │   └── skill-deepwork.md.jinja  # MCP entry point skill
 │       │   ├── gemini/
 │       │   └── copilot/
 │       ├── standard_jobs/      # Built-in job definitions
@@ -1122,6 +1132,225 @@ See `doc/doc-specs.md` for complete documentation.
 
 ---
 
+---
+
+# Part 4: MCP Server Architecture
+
+DeepWork includes an MCP (Model Context Protocol) server that provides an alternative execution model. Instead of relying solely on skill files with embedded instructions, the MCP server guides agents through workflows via checkpoint calls with quality gate enforcement.
+
+## Architecture Overview
+
+```
+┌─────────────────────────────────────────────────────────────┐
+│                   Claude Code / AI Agent                     │
+│  /deepwork skill → instructs to use MCP tools               │
+└─────────────────────────────────────────────────────────────┘
+                              │ MCP Protocol (stdio)
+                              ▼
+┌─────────────────────────────────────────────────────────────┐
+│                   DeepWork MCP Server                        │
+│  Tools: get_workflows | start_workflow | finished_step      │
+│  State: session tracking, step progress, outputs            │
+│  Quality Gate: invokes review agent for validation          │
+└─────────────────────────────────────────────────────────────┘
+                              │
+                              ▼
+┌─────────────────────────────────────────────────────────────┐
+│              .deepwork/jobs/[job_name]/job.yml              │
+└─────────────────────────────────────────────────────────────┘
+```
+
+## MCP Server Components
+
+### Server (`server.py`)
+
+The FastMCP server definition that:
+- Creates and configures the MCP server instance
+- Registers the three workflow tools
+- Provides server instructions for agents
+
+### Tools (`tools.py`)
+
+Implements the three MCP tools:
+
+#### 1. `get_workflows`
+Lists all available workflows from `.deepwork/jobs/`.
+
+**Parameters**: None
+
+**Returns**: List of jobs with their workflows, steps, and summaries
+
+#### 2. `start_workflow`
+Begins a new workflow session.
+
+**Parameters**:
+- `goal: str` - What the user wants to accomplish
+- `job_name: str` - Name of the job
+- `workflow_name: str` - Name of the workflow within the job
+- `instance_id: str | None` - Optional identifier (e.g., "acme", "q1-2026")
+
+**Returns**: Session ID, branch name, first step instructions
+
+#### 3. `finished_step`
+Reports step completion and gets next instructions.
+
+**Parameters**:
+- `outputs: list[str]` - List of output file paths created
+- `notes: str | None` - Optional notes about work done
+
+**Returns**:
+- `status: "needs_work" | "next_step" | "workflow_complete"`
+- If `needs_work`: feedback from quality gate, failed criteria
+- If `next_step`: next step instructions
+- If `workflow_complete`: summary of all outputs
+
+### State Management (`state.py`)
+
+Manages workflow session state persisted to `.deepwork/tmp/session_[id].json`:
+
+```python
+class StateManager:
+    def create_session(...) -> WorkflowSession
+    def load_session(session_id) -> WorkflowSession
+    def start_step(step_id) -> None
+    def complete_step(step_id, outputs, notes) -> None
+    def advance_to_step(step_id, entry_index) -> None
+    def complete_workflow() -> None
+```
+
+Session state includes:
+- Session ID and timestamps
+- Job/workflow/instance identification
+- Current step and entry index
+- Per-step progress (started_at, completed_at, outputs, quality_attempts)
+
+### Quality Gate (`quality_gate.py`)
+
+Evaluates step outputs against quality criteria:
+
+```python
+class QualityGate:
+    def evaluate(
+        step_instructions: str,
+        quality_criteria: list[str],
+        outputs: list[str],
+        project_root: Path,
+    ) -> QualityGateResult
+```
+
+The quality gate:
+1. Builds a review prompt with step instructions, criteria, and output contents
+2. Invokes a review agent via subprocess (configurable command)
+3. Parses the structured JSON response
+4. Returns pass/fail with per-criterion feedback
+
+### Schemas (`schemas.py`)
+
+Pydantic models for all tool inputs and outputs:
+- `StartWorkflowInput`, `FinishedStepInput`
+- `GetWorkflowsResponse`, `StartWorkflowResponse`, `FinishedStepResponse`
+- `WorkflowSession`, `StepProgress`
+- `QualityGateResult`, `QualityCriteriaResult`
+
+## MCP Server Registration
+
+When `deepwork install` runs, it registers the MCP server in platform settings:
+
+```json
+// .claude/settings.json
+{
+  "mcpServers": {
+    "deepwork": {
+      "command": "deepwork",
+      "args": ["serve", "--path", "."],
+      "transport": "stdio"
+    }
+  }
+}
+```
+
+## The `/deepwork` Skill
+
+A single skill (`.claude/skills/deepwork/SKILL.md`) instructs agents to use MCP tools:
+
+```markdown
+# DeepWork Workflow Manager
+
+Execute multi-step workflows with quality gate checkpoints.
+
+## Quick Start
+1. Discover workflows: Call `get_workflows`
+2. Start a workflow: Call `start_workflow` with your goal
+3. Execute steps: Follow the instructions returned
+4. Checkpoint: Call `finished_step` with your outputs
+5. Iterate or continue: Handle needs_work, next_step, or workflow_complete
+```
+
+## MCP Execution Flow
+
+1. **User invokes `/deepwork`**
+   - Agent calls `get_workflows` to discover available workflows
+   - Parses user intent to identify target workflow
+
+2. **Agent calls `start_workflow`**
+   - MCP server creates session, generates branch name
+   - Returns first step instructions and expected outputs
+
+3. **Agent executes step**
+   - Follows step instructions
+   - Creates output files
+
+4. **Agent calls `finished_step`**
+   - MCP server evaluates outputs against quality criteria (if configured)
+   - If `needs_work`: returns feedback for agent to fix issues
+   - If `next_step`: returns next step instructions
+   - If `workflow_complete`: workflow finished
+
+5. **Loop continues until workflow complete**
+
+## Quality Gate Configuration
+
+Configure in `.deepwork/config.yml`:
+
+```yaml
+version: 0.2.0
+platforms:
+  - claude
+
+quality_gate:
+  agent_review_command: "claude -p --output-format json"
+  timeout: 120
+  max_attempts: 3
+```
+
+## Serve Command
+
+Start the MCP server manually:
+
+```bash
+# Basic usage
+deepwork serve
+
+# With quality gate
+deepwork serve --quality-gate "claude -p --output-format json"
+
+# For a specific project
+deepwork serve --path /path/to/project
+
+# SSE transport (for remote)
+deepwork serve --transport sse --port 8000
+```
+
+## Benefits of MCP Approach
+
+1. **Centralized state**: Session state persisted and visible in `.deepwork/tmp/`
+2. **Quality gates**: Automated validation before proceeding
+3. **Structured checkpoints**: Clear handoff points between steps
+4. **Resumability**: Sessions can be loaded and resumed
+5. **Observability**: All state changes logged and inspectable
+
+---
+
 ## References
 
 - [Spec-Kit Repository](https://github.com/github/spec-kit)
@@ -1130,4 +1359,6 @@ See `doc/doc-specs.md` for complete documentation.
 - [Git Workflows](https://www.atlassian.com/git/tutorials/comparing-workflows)
 - [JSON Schema](https://json-schema.org/)
 - [Jinja2 Documentation](https://jinja.palletsprojects.com/)
+- [Model Context Protocol](https://modelcontextprotocol.io/)
+- [FastMCP Documentation](https://github.com/jlowin/fastmcp)
 
diff --git a/pyproject.toml b/pyproject.toml
index c2bc3e4a..f5d4bbd9 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -26,6 +26,9 @@ dependencies = [
     "click>=8.1.0",
     "rich>=13.0.0",
     "jsonschema>=4.17.0",
+    "fastmcp>=2.0",
+    "pydantic>=2.0",
+    "mcp>=1.0.0",
 ]
 
 [project.optional-dependencies]
diff --git a/src/deepwork/cli/install.py b/src/deepwork/cli/install.py
index dc945eb7..25ae2597 100644
--- a/src/deepwork/cli/install.py
+++ b/src/deepwork/cli/install.py
@@ -298,7 +298,15 @@ def _install_deepwork(platform_name: str | None, project_path: Path) -> None:
     save_yaml(config_file, config_data)
     console.print(f"  [green]✓[/green] Updated {config_file.relative_to(project_path)}")
 
-    # Step 5: Run sync to generate skills
+    # Step 5: Register MCP server for each platform
+    console.print("[yellow]→[/yellow] Registering MCP server...")
+    for adapter in detected_adapters:
+        if adapter.register_mcp_server(project_path):
+            console.print(f"  [green]✓[/green] Registered MCP server for {adapter.display_name}")
+        else:
+            console.print(f"  [dim]•[/dim] MCP server already registered for {adapter.display_name}")
+
+    # Step 6: Run sync to generate skills
     console.print()
     console.print("[yellow]→[/yellow] Running sync to generate skills...")
     console.print()
diff --git a/src/deepwork/cli/main.py b/src/deepwork/cli/main.py
index 840decbf..66756a08 100644
--- a/src/deepwork/cli/main.py
+++ b/src/deepwork/cli/main.py
@@ -16,11 +16,13 @@ def cli() -> None:
 # Import commands
 from deepwork.cli.hook import hook  # noqa: E402
 from deepwork.cli.install import install  # noqa: E402
+from deepwork.cli.serve import serve  # noqa: E402
 from deepwork.cli.sync import sync  # noqa: E402
 
 cli.add_command(install)
 cli.add_command(sync)
 cli.add_command(hook)
+cli.add_command(serve)
 
 
 if __name__ == "__main__":
diff --git a/src/deepwork/cli/serve.py b/src/deepwork/cli/serve.py
new file mode 100644
index 00000000..0a74b0a9
--- /dev/null
+++ b/src/deepwork/cli/serve.py
@@ -0,0 +1,139 @@
+"""Serve command for DeepWork MCP server."""
+
+from pathlib import Path
+
+import click
+from rich.console import Console
+
+from deepwork.utils.yaml_utils import load_yaml
+
+console = Console()
+
+
+class ServeError(Exception):
+    """Exception raised for serve errors."""
+
+    pass
+
+
+def _load_config(project_path: Path) -> dict:
+    """Load DeepWork config from project.
+
+    Args:
+        project_path: Path to project root
+
+    Returns:
+        Config dictionary
+
+    Raises:
+        ServeError: If config not found or invalid
+    """
+    config_file = project_path / ".deepwork" / "config.yml"
+    if not config_file.exists():
+        raise ServeError(
+            f"DeepWork not installed in {project_path}. "
+            "Run 'deepwork install' first."
+        )
+
+    config = load_yaml(config_file)
+    if config is None:
+        config = {}
+
+    return config
+
+
+@click.command()
+@click.option(
+    "--path",
+    type=click.Path(exists=True, file_okay=False, path_type=Path),
+    default=".",
+    help="Path to project directory (default: current directory)",
+)
+@click.option(
+    "--quality-gate",
+    type=str,
+    default=None,
+    help="Command for quality gate agent (e.g., 'claude -p --output-format json')",
+)
+@click.option(
+    "--transport",
+    type=click.Choice(["stdio", "sse"]),
+    default="stdio",
+    help="MCP transport protocol (default: stdio)",
+)
+@click.option(
+    "--port",
+    type=int,
+    default=8000,
+    help="Port for SSE transport (default: 8000)",
+)
+def serve(
+    path: Path,
+    quality_gate: str | None,
+    transport: str,
+    port: int,
+) -> None:
+    """Start the DeepWork MCP server.
+
+    Exposes workflow management tools to AI agents via MCP protocol.
+    By default uses stdio transport for local integration with Claude Code.
+
+    Examples:
+
+        # Start server for current directory
+        deepwork serve
+
+        # Start with quality gate enabled
+        deepwork serve --quality-gate "claude -p --output-format json"
+
+        # Start for a specific project
+        deepwork serve --path /path/to/project
+    """
+    try:
+        _serve_mcp(path, quality_gate, transport, port)
+    except ServeError as e:
+        console.print(f"[red]Error:[/red] {e}")
+        raise click.Abort() from e
+    except Exception as e:
+        console.print(f"[red]Unexpected error:[/red] {e}")
+        raise
+
+
+def _serve_mcp(
+    project_path: Path,
+    quality_gate_command: str | None,
+    transport: str,
+    port: int,
+) -> None:
+    """Start the MCP server.
+
+    Args:
+        project_path: Path to project directory
+        quality_gate_command: Optional quality gate command
+        transport: Transport protocol (stdio or sse)
+        port: Port for SSE transport
+
+    Raises:
+        ServeError: If server fails to start
+    """
+    # Validate project has DeepWork installed
+    _load_config(project_path)
+
+    # Load quality gate from config if not specified
+    if quality_gate_command is None:
+        config = _load_config(project_path)
+        qg_config = config.get("quality_gate", {})
+        quality_gate_command = qg_config.get("agent_review_command")
+
+    # Create and run server
+    from deepwork.mcp.server import create_server
+
+    server = create_server(
+        project_root=project_path,
+        quality_gate_command=quality_gate_command,
+    )
+
+    if transport == "stdio":
+        server.run(transport="stdio")
+    else:
+        server.run(transport="sse", port=port)
diff --git a/src/deepwork/cli/sync.py b/src/deepwork/cli/sync.py
index 03c47a30..687d47d1 100644
--- a/src/deepwork/cli/sync.py
+++ b/src/deepwork/cli/sync.py
@@ -134,10 +134,20 @@ def sync_skills(project_path: Path) -> None:
         # Create skills directory
         ensure_dir(skills_dir)
 
-        # Generate skills for all jobs
+        # Generate the global /deepwork skill (MCP entry point)
+        console.print("  [dim]•[/dim] Generating /deepwork skill...")
         all_skill_paths: list[Path] = []
+        try:
+            deepwork_skill_path = generator.generate_deepwork_skill(adapter, platform_dir)
+            all_skill_paths.append(deepwork_skill_path)
+            stats["skills"] += 1
+            console.print("    [green]✓[/green] deepwork (MCP entry point)")
+        except Exception as e:
+            console.print(f"    [red]✗[/red] Failed to generate /deepwork skill: {e}")
+
+        # Generate skills for all jobs
         if jobs:
-            console.print("  [dim]•[/dim] Generating skills...")
+            console.print("  [dim]•[/dim] Generating job skills...")
             for job in jobs:
                 try:
                     job_paths = generator.generate_all_skills(
diff --git a/src/deepwork/core/adapters.py b/src/deepwork/core/adapters.py
index 96b8ca00..e0a3f101 100644
--- a/src/deepwork/core/adapters.py
+++ b/src/deepwork/core/adapters.py
@@ -256,6 +256,22 @@ def sync_permissions(self, project_path: Path) -> int:
         # Default implementation does nothing - subclasses can override
         return 0
 
+    def register_mcp_server(self, project_path: Path) -> bool:
+        """
+        Register the DeepWork MCP server with the platform.
+
+        Args:
+            project_path: Path to project root
+
+        Returns:
+            True if server was registered, False if already registered
+
+        Raises:
+            AdapterError: If registration fails
+        """
+        # Default implementation does nothing - subclasses can override
+        return False
+
 
 def _hook_already_present(hooks: list[dict[str, Any]], script_path: str) -> bool:
     """Check if a hook with the given script path is already in the list."""
@@ -546,6 +562,50 @@ def _extract_skill_name(self, skill_path: Path) -> str | None:
 
         return None
 
+    def register_mcp_server(self, project_path: Path) -> bool:
+        """
+        Register the DeepWork MCP server in Claude Code settings.json.
+
+        Adds the mcpServers configuration for DeepWork:
+        {
+          "mcpServers": {
+            "deepwork": {
+              "command": "deepwork",
+              "args": ["serve", "--path", "."],
+              "transport": "stdio"
+            }
+          }
+        }
+
+        Args:
+            project_path: Path to project root
+
+        Returns:
+            True if server was registered, False if already registered
+
+        Raises:
+            AdapterError: If registration fails
+        """
+        settings = self._load_settings(project_path)
+
+        # Initialize mcpServers if not present
+        if "mcpServers" not in settings:
+            settings["mcpServers"] = {}
+
+        # Check if already registered
+        if "deepwork" in settings["mcpServers"]:
+            return False
+
+        # Register the DeepWork MCP server
+        settings["mcpServers"]["deepwork"] = {
+            "command": "deepwork",
+            "args": ["serve", "--path", "."],
+            "transport": "stdio",
+        }
+
+        self._save_settings(project_path, settings)
+        return True
+
 
 class GeminiAdapter(AgentAdapter):
     """Adapter for Gemini CLI.
diff --git a/src/deepwork/core/generator.py b/src/deepwork/core/generator.py
index 05ba975c..75f289c1 100644
--- a/src/deepwork/core/generator.py
+++ b/src/deepwork/core/generator.py
@@ -575,3 +575,61 @@ def generate_all_skills(
             skill_paths.append(skill_path)
 
         return skill_paths
+
+    def generate_deepwork_skill(
+        self,
+        adapter: AgentAdapter,
+        output_dir: Path | str,
+    ) -> Path:
+        """
+        Generate the global /deepwork skill that instructs agents to use MCP tools.
+
+        This is a single skill that provides the main entry point for DeepWork,
+        directing agents to use the MCP server's tools for workflow management.
+
+        Args:
+            adapter: Agent adapter for the target platform
+            output_dir: Directory to write skill file to
+
+        Returns:
+            Path to generated skill file
+
+        Raises:
+            GeneratorError: If generation fails
+        """
+        output_dir = Path(output_dir)
+
+        # Create skills subdirectory if needed
+        skills_dir = output_dir / adapter.skills_dir
+        skills_dir.mkdir(parents=True, exist_ok=True)
+
+        # Load and render template
+        env = self._get_jinja_env(adapter)
+        template_name = "skill-deepwork.md.jinja"
+
+        try:
+            template = env.get_template(template_name)
+        except TemplateNotFound as e:
+            raise GeneratorError(f"DeepWork skill template not found: {e}") from e
+
+        try:
+            rendered = template.render()
+        except Exception as e:
+            raise GeneratorError(f"DeepWork skill template rendering failed: {e}") from e
+
+        # Write skill file
+        # Use the adapter's convention for naming
+        if adapter.name == "gemini":
+            skill_filename = "deepwork/index.toml"
+        else:
+            skill_filename = "deepwork/SKILL.md"
+
+        skill_path = skills_dir / skill_filename
+        skill_path.parent.mkdir(parents=True, exist_ok=True)
+
+        try:
+            safe_write(skill_path, rendered)
+        except Exception as e:
+            raise GeneratorError(f"Failed to write DeepWork skill file: {e}") from e
+
+        return skill_path
diff --git a/src/deepwork/mcp/__init__.py b/src/deepwork/mcp/__init__.py
new file mode 100644
index 00000000..bb6e5041
--- /dev/null
+++ b/src/deepwork/mcp/__init__.py
@@ -0,0 +1,23 @@
+"""DeepWork MCP Server module.
+
+This module provides an MCP (Model Context Protocol) server that guides AI agents
+through DeepWork workflows via checkpoint calls with quality gate enforcement.
+
+The server exposes three main tools:
+- get_workflows: List all available workflows
+- start_workflow: Initialize a workflow session
+- finished_step: Report step completion and get next instructions
+
+Example usage:
+    deepwork serve --path /path/to/project
+"""
+
+
+def create_server(*args, **kwargs):  # type: ignore
+    """Lazy import to avoid loading fastmcp at module import time."""
+    from deepwork.mcp.server import create_server as _create_server
+
+    return _create_server(*args, **kwargs)
+
+
+__all__ = ["create_server"]
diff --git a/src/deepwork/mcp/quality_gate.py b/src/deepwork/mcp/quality_gate.py
new file mode 100644
index 00000000..17a13bba
--- /dev/null
+++ b/src/deepwork/mcp/quality_gate.py
@@ -0,0 +1,275 @@
+"""Quality gate for evaluating step outputs.
+
+The quality gate invokes a review agent (via subprocess) to evaluate
+step outputs against quality criteria.
+"""
+
+from __future__ import annotations
+
+import json
+import subprocess
+from pathlib import Path
+
+from deepwork.mcp.schemas import QualityCriteriaResult, QualityGateResult
+
+
+class QualityGateError(Exception):
+    """Exception raised for quality gate errors."""
+
+    pass
+
+
+class QualityGate:
+    """Evaluates step outputs against quality criteria.
+
+    Uses a subprocess to invoke a review agent (e.g., Claude CLI) that
+    evaluates outputs and returns structured feedback.
+    """
+
+    def __init__(
+        self,
+        command: str = "claude -p --output-format json",
+        timeout: int = 120,
+    ):
+        """Initialize quality gate.
+
+        Args:
+            command: Command to invoke review agent (receives prompt via stdin)
+            timeout: Timeout in seconds for review agent
+        """
+        self.command = command
+        self.timeout = timeout
+
+    def _build_review_prompt(
+        self,
+        step_instructions: str,
+        quality_criteria: list[str],
+        outputs: list[str],
+        project_root: Path,
+    ) -> str:
+        """Build the prompt for the review agent.
+
+        Args:
+            step_instructions: The step's instruction content
+            quality_criteria: List of quality criteria to evaluate
+            outputs: List of output file paths
+            project_root: Project root path for reading files
+
+        Returns:
+            Formatted review prompt
+        """
+        # Read output file contents
+        output_contents: list[str] = []
+        for output_path in outputs:
+            full_path = project_root / output_path
+            if full_path.exists():
+                try:
+                    content = full_path.read_text(encoding="utf-8")
+                    output_contents.append(f"### {output_path}\n```\n{content}\n```")
+                except Exception as e:
+                    output_contents.append(f"### {output_path}\nError reading file: {e}")
+            else:
+                output_contents.append(f"### {output_path}\nFile not found")
+
+        outputs_text = "\n\n".join(output_contents) if output_contents else "No outputs provided"
+
+        criteria_list = "\n".join(f"- {c}" for c in quality_criteria)
+
+        return f"""You are a quality gate reviewer for a workflow step. Evaluate the outputs against the quality criteria.
+
+## Step Instructions
+
+{step_instructions}
+
+## Quality Criteria
+
+{criteria_list}
+
+## Outputs to Review
+
+{outputs_text}
+
+## Your Task
+
+Evaluate each output against the quality criteria. For each criterion, determine if it passes or fails.
+
+Return your evaluation as JSON with this exact structure:
+```json
+{{
+  "passed": true/false,
+  "feedback": "Brief overall summary",
+  "criteria_results": [
+    {{
+      "criterion": "The criterion text",
+      "passed": true/false,
+      "feedback": "Specific feedback for this criterion (null if passed)"
+    }}
+  ]
+}}
+```
+
+Be strict but fair. Only mark as passed if the criterion is clearly met.
+"""
+
+    def _parse_response(self, response_text: str) -> QualityGateResult:
+        """Parse the review agent's response.
+
+        Args:
+            response_text: Raw response from review agent
+
+        Returns:
+            Parsed QualityGateResult
+
+        Raises:
+            QualityGateError: If response cannot be parsed
+        """
+        # Try to extract JSON from the response
+        try:
+            # Look for JSON in code blocks
+            if "```json" in response_text:
+                start = response_text.index("```json") + 7
+                end = response_text.index("```", start)
+                json_text = response_text[start:end].strip()
+            elif "```" in response_text:
+                start = response_text.index("```") + 3
+                end = response_text.index("```", start)
+                json_text = response_text[start:end].strip()
+            else:
+                # Assume entire response is JSON
+                json_text = response_text.strip()
+
+            data = json.loads(json_text)
+
+            # Parse criteria results
+            criteria_results = [
+                QualityCriteriaResult(
+                    criterion=cr.get("criterion", ""),
+                    passed=cr.get("passed", False),
+                    feedback=cr.get("feedback"),
+                )
+                for cr in data.get("criteria_results", [])
+            ]
+
+            return QualityGateResult(
+                passed=data.get("passed", False),
+                feedback=data.get("feedback", "No feedback provided"),
+                criteria_results=criteria_results,
+            )
+
+        except (json.JSONDecodeError, ValueError, KeyError) as e:
+            raise QualityGateError(
+                f"Failed to parse review agent response: {e}\n"
+                f"Response was: {response_text[:500]}..."
+            ) from e
+
+    def evaluate(
+        self,
+        step_instructions: str,
+        quality_criteria: list[str],
+        outputs: list[str],
+        project_root: Path,
+    ) -> QualityGateResult:
+        """Evaluate step outputs against quality criteria.
+
+        Args:
+            step_instructions: The step's instruction content
+            quality_criteria: List of quality criteria to evaluate
+            outputs: List of output file paths
+            project_root: Project root path
+
+        Returns:
+            QualityGateResult with pass/fail and feedback
+
+        Raises:
+            QualityGateError: If evaluation fails
+        """
+        if not quality_criteria:
+            # No criteria = auto-pass
+            return QualityGateResult(
+                passed=True,
+                feedback="No quality criteria defined - auto-passing",
+                criteria_results=[],
+            )
+
+        prompt = self._build_review_prompt(
+            step_instructions=step_instructions,
+            quality_criteria=quality_criteria,
+            outputs=outputs,
+            project_root=project_root,
+        )
+
+        try:
+            # Run review agent
+            result = subprocess.run(
+                self.command.split(),
+                input=prompt,
+                capture_output=True,
+                text=True,
+                timeout=self.timeout,
+                cwd=str(project_root),
+            )
+
+            if result.returncode != 0:
+                raise QualityGateError(
+                    f"Review agent failed with exit code {result.returncode}:\n"
+                    f"stderr: {result.stderr}"
+                )
+
+            return self._parse_response(result.stdout)
+
+        except subprocess.TimeoutExpired as e:
+            raise QualityGateError(
+                f"Review agent timed out after {self.timeout} seconds"
+            ) from e
+        except FileNotFoundError as e:
+            raise QualityGateError(
+                f"Review agent command not found: {self.command.split()[0]}"
+            ) from e
+
+
+class MockQualityGate(QualityGate):
+    """Mock quality gate for testing.
+
+    Always passes unless configured otherwise.
+    """
+
+    def __init__(self, should_pass: bool = True, feedback: str = "Mock evaluation"):
+        """Initialize mock quality gate.
+
+        Args:
+            should_pass: Whether evaluations should pass
+            feedback: Feedback message to return
+        """
+        super().__init__()
+        self.should_pass = should_pass
+        self.feedback = feedback
+        self.evaluations: list[dict] = []
+
+    def evaluate(
+        self,
+        step_instructions: str,
+        quality_criteria: list[str],
+        outputs: list[str],
+        project_root: Path,
+    ) -> QualityGateResult:
+        """Mock evaluation - records call and returns configured result."""
+        self.evaluations.append({
+            "step_instructions": step_instructions,
+            "quality_criteria": quality_criteria,
+            "outputs": outputs,
+        })
+
+        criteria_results = [
+            QualityCriteriaResult(
+                criterion=c,
+                passed=self.should_pass,
+                feedback=None if self.should_pass else self.feedback,
+            )
+            for c in quality_criteria
+        ]
+
+        return QualityGateResult(
+            passed=self.should_pass,
+            feedback=self.feedback,
+            criteria_results=criteria_results,
+        )
diff --git a/src/deepwork/mcp/schemas.py b/src/deepwork/mcp/schemas.py
new file mode 100644
index 00000000..18375c79
--- /dev/null
+++ b/src/deepwork/mcp/schemas.py
@@ -0,0 +1,225 @@
+"""Pydantic models for MCP tool inputs and outputs."""
+
+from enum import Enum
+from typing import Any
+
+from pydantic import BaseModel, Field
+
+# =============================================================================
+# Enums
+# =============================================================================
+
+
+class StepStatus(str, Enum):
+    """Status returned from finished_step."""
+
+    NEEDS_WORK = "needs_work"
+    NEXT_STEP = "next_step"
+    WORKFLOW_COMPLETE = "workflow_complete"
+
+
+# =============================================================================
+# Workflow Info Models
+# =============================================================================
+
+
+class StepInfo(BaseModel):
+    """Information about a single step."""
+
+    id: str = Field(description="Step identifier")
+    name: str = Field(description="Human-readable step name")
+    description: str = Field(description="What the step does")
+    dependencies: list[str] = Field(default_factory=list, description="Required prior steps")
+
+
+class ConcurrentStepGroup(BaseModel):
+    """A group of steps that can be executed concurrently."""
+
+    step_ids: list[str] = Field(description="Steps that run in parallel")
+    is_concurrent: bool = Field(default=True)
+
+
+class WorkflowStepEntryInfo(BaseModel):
+    """Information about a workflow step entry (sequential or concurrent)."""
+
+    step_ids: list[str] = Field(description="Step ID(s) in this entry")
+    is_concurrent: bool = Field(
+        default=False, description="True if steps run in parallel"
+    )
+
+
+class WorkflowInfo(BaseModel):
+    """Information about a workflow."""
+
+    name: str = Field(description="Workflow identifier")
+    summary: str = Field(description="Short description of workflow")
+    steps: list[str] = Field(description="Flattened list of step IDs in order")
+    step_entries: list[WorkflowStepEntryInfo] = Field(
+        description="Step entries (sequential or concurrent)"
+    )
+    first_step: str = Field(description="First step ID to start workflow")
+
+
+class JobInfo(BaseModel):
+    """Information about a job and its workflows."""
+
+    name: str = Field(description="Job identifier")
+    summary: str = Field(description="Short summary of the job")
+    description: str | None = Field(default=None, description="Full description")
+    workflows: list[WorkflowInfo] = Field(default_factory=list)
+    standalone_steps: list[StepInfo] = Field(
+        default_factory=list, description="Steps not in any workflow"
+    )
+
+
+# =============================================================================
+# Tool Input Models
+# =============================================================================
+
+
+class StartWorkflowInput(BaseModel):
+    """Input for start_workflow tool."""
+
+    goal: str = Field(description="What the user wants to accomplish")
+    job_name: str = Field(description="Name of the job")
+    workflow_name: str = Field(description="Name of the workflow within the job")
+    instance_id: str | None = Field(
+        default=None,
+        description="Optional identifier (e.g., 'acme', 'q1-2026')",
+    )
+
+
+class FinishedStepInput(BaseModel):
+    """Input for finished_step tool."""
+
+    outputs: list[str] = Field(description="List of output file paths created")
+    notes: str | None = Field(default=None, description="Optional notes about work done")
+
+
+# =============================================================================
+# Quality Gate Models
+# =============================================================================
+
+
+class QualityCriteriaResult(BaseModel):
+    """Result for a single quality criterion."""
+
+    criterion: str = Field(description="The quality criterion text")
+    passed: bool = Field(description="Whether this criterion passed")
+    feedback: str | None = Field(default=None, description="Feedback if failed")
+
+
+class QualityGateResult(BaseModel):
+    """Result from quality gate evaluation."""
+
+    passed: bool = Field(description="Overall pass/fail")
+    feedback: str = Field(description="Summary feedback")
+    criteria_results: list[QualityCriteriaResult] = Field(
+        default_factory=list, description="Per-criterion results"
+    )
+
+
+# =============================================================================
+# Tool Output Models
+# =============================================================================
+
+
+class GetWorkflowsResponse(BaseModel):
+    """Response from get_workflows tool."""
+
+    jobs: list[JobInfo] = Field(description="List of all jobs with their workflows")
+
+
+class StartWorkflowResponse(BaseModel):
+    """Response from start_workflow tool."""
+
+    session_id: str = Field(description="Unique session identifier")
+    branch_name: str = Field(description="Git branch for this workflow instance")
+    current_step_id: str = Field(description="ID of the current step")
+    step_instructions: str = Field(description="Instructions for the first step")
+    step_outputs: list[str] = Field(description="Expected output files for this step")
+    quality_criteria: list[str] = Field(
+        default_factory=list, description="Criteria for step completion"
+    )
+
+
+class FinishedStepResponse(BaseModel):
+    """Response from finished_step tool."""
+
+    status: StepStatus = Field(description="Result status")
+
+    # For needs_work status
+    feedback: str | None = Field(default=None, description="Feedback from quality gate")
+    failed_criteria: list[QualityCriteriaResult] | None = Field(
+        default=None, description="Failed quality criteria"
+    )
+
+    # For next_step status
+    next_step_id: str | None = Field(default=None, description="ID of next step")
+    step_instructions: str | None = Field(
+        default=None, description="Instructions for next step"
+    )
+    step_outputs: list[str] | None = Field(
+        default=None, description="Expected outputs for next step"
+    )
+    quality_criteria: list[str] | None = Field(
+        default=None, description="Criteria for next step"
+    )
+
+    # For workflow_complete status
+    summary: str | None = Field(
+        default=None, description="Summary of completed workflow"
+    )
+    all_outputs: list[str] | None = Field(
+        default=None, description="All outputs from all steps"
+    )
+
+
+# =============================================================================
+# Session State Models
+# =============================================================================
+
+
+class StepProgress(BaseModel):
+    """Progress for a single step in a workflow."""
+
+    step_id: str = Field(description="Step identifier")
+    started_at: str | None = Field(default=None, description="ISO timestamp when started")
+    completed_at: str | None = Field(
+        default=None, description="ISO timestamp when completed"
+    )
+    outputs: list[str] = Field(default_factory=list, description="Output files created")
+    notes: str | None = Field(default=None, description="Notes from agent")
+    quality_attempts: int = Field(default=0, description="Number of quality gate attempts")
+
+
+class WorkflowSession(BaseModel):
+    """State for an active workflow session."""
+
+    session_id: str = Field(description="Unique session identifier")
+    job_name: str = Field(description="Name of the job")
+    workflow_name: str = Field(description="Name of the workflow")
+    instance_id: str | None = Field(default=None, description="Instance identifier")
+    goal: str = Field(description="User's goal for this workflow")
+    branch_name: str = Field(description="Git branch name")
+    current_step_id: str = Field(description="Current step in workflow")
+    current_entry_index: int = Field(
+        default=0, description="Index of current entry in step_entries"
+    )
+    step_progress: dict[str, StepProgress] = Field(
+        default_factory=dict, description="Progress for each step"
+    )
+    started_at: str = Field(description="ISO timestamp when session started")
+    completed_at: str | None = Field(
+        default=None, description="ISO timestamp when completed"
+    )
+    status: str = Field(default="active", description="Session status")
+
+    def to_dict(self) -> dict[str, Any]:
+        """Convert to dictionary for JSON serialization."""
+        return self.model_dump()
+
+    @classmethod
+    def from_dict(cls, data: dict[str, Any]) -> "WorkflowSession":
+        """Create from dictionary."""
+        return cls.model_validate(data)
diff --git a/src/deepwork/mcp/server.py b/src/deepwork/mcp/server.py
new file mode 100644
index 00000000..5af0d059
--- /dev/null
+++ b/src/deepwork/mcp/server.py
@@ -0,0 +1,157 @@
+"""FastMCP server for DeepWork workflows.
+
+This module creates and configures the MCP server that exposes workflow
+management tools to AI agents.
+
+Usage:
+    deepwork serve --path /path/to/project
+"""
+
+from __future__ import annotations
+
+from pathlib import Path
+from typing import Any
+
+from fastmcp import FastMCP
+
+from deepwork.mcp.quality_gate import QualityGate
+from deepwork.mcp.schemas import (
+    FinishedStepInput,
+    StartWorkflowInput,
+)
+from deepwork.mcp.state import StateManager
+from deepwork.mcp.tools import WorkflowTools
+
+
+def create_server(
+    project_root: Path | str,
+    quality_gate_command: str | None = None,
+) -> FastMCP:
+    """Create and configure the MCP server.
+
+    Args:
+        project_root: Path to the project root
+        quality_gate_command: Optional command for quality gate agent
+
+    Returns:
+        Configured FastMCP server instance
+    """
+    project_path = Path(project_root).resolve()
+
+    # Initialize components
+    state_manager = StateManager(project_path)
+
+    quality_gate: QualityGate | None = None
+    if quality_gate_command:
+        quality_gate = QualityGate(command=quality_gate_command)
+
+    tools = WorkflowTools(
+        project_root=project_path,
+        state_manager=state_manager,
+        quality_gate=quality_gate,
+    )
+
+    # Create MCP server
+    mcp = FastMCP(
+        name="deepwork",
+        instructions=_get_server_instructions(),
+    )
+
+    # Register tools
+    @mcp.tool(
+        description=(
+            "List all available DeepWork workflows. "
+            "Returns job names, workflow definitions, and step information. "
+            "Call this first to discover available workflows."
+        )
+    )
+    def get_workflows() -> dict[str, Any]:
+        """Get all available workflows."""
+        response = tools.get_workflows()
+        return response.model_dump()
+
+    @mcp.tool(
+        description=(
+            "Start a new workflow session. "
+            "Creates a git branch, initializes state tracking, and returns "
+            "the first step's instructions. "
+            "Required parameters: goal (what user wants), job_name, workflow_name. "
+            "Optional: instance_id for naming (e.g., 'acme', 'q1-2026')."
+        )
+    )
+    def start_workflow(
+        goal: str,
+        job_name: str,
+        workflow_name: str,
+        instance_id: str | None = None,
+    ) -> dict[str, Any]:
+        """Start a workflow and get first step instructions."""
+        input_data = StartWorkflowInput(
+            goal=goal,
+            job_name=job_name,
+            workflow_name=workflow_name,
+            instance_id=instance_id,
+        )
+        response = tools.start_workflow(input_data)
+        return response.model_dump()
+
+    @mcp.tool(
+        description=(
+            "Report that you've finished a workflow step. "
+            "Validates outputs against quality criteria (if configured), "
+            "then returns either: "
+            "'needs_work' with feedback to fix issues, "
+            "'next_step' with instructions for the next step, or "
+            "'workflow_complete' when finished. "
+            "Required: outputs (list of file paths created). "
+            "Optional: notes about work done."
+        )
+    )
+    def finished_step(
+        outputs: list[str],
+        notes: str | None = None,
+    ) -> dict[str, Any]:
+        """Report step completion and get next instructions."""
+        input_data = FinishedStepInput(outputs=outputs, notes=notes)
+        response = tools.finished_step(input_data)
+        return response.model_dump()
+
+    return mcp
+
+
+def _get_server_instructions() -> str:
+    """Get the server instructions for agents.
+
+    Returns:
+        Instructions string describing how to use the DeepWork MCP server.
+    """
+    return """# DeepWork Workflow Server
+
+This MCP server guides you through multi-step workflows with quality gates.
+
+## Workflow
+
+1. **Discover**: Call `get_workflows` to see available workflows
+2. **Start**: Call `start_workflow` with your goal, job_name, and workflow_name
+3. **Execute**: Follow the step instructions returned
+4. **Checkpoint**: Call `finished_step` with your outputs when done with each step
+5. **Iterate**: If `needs_work`, fix issues and call `finished_step` again
+6. **Continue**: If `next_step`, execute new instructions and repeat
+7. **Complete**: When `workflow_complete`, the workflow is done
+
+## Quality Gates
+
+Steps may have quality criteria. When you call `finished_step`:
+- Your outputs are evaluated against the criteria
+- If any fail, you'll get `needs_work` status with feedback
+- Fix the issues and call `finished_step` again
+- After passing, you'll get the next step or completion
+
+## Best Practices
+
+- Always call `get_workflows` first to understand available options
+- Provide clear goals when starting - they're used for context
+- Create all expected outputs before calling `finished_step`
+- Use instance_id for meaningful names (e.g., client name, quarter)
+- Read quality gate feedback carefully before retrying
+"""
diff --git a/src/deepwork/mcp/state.py b/src/deepwork/mcp/state.py
new file mode 100644
index 00000000..160283e0
--- /dev/null
+++ b/src/deepwork/mcp/state.py
@@ -0,0 +1,330 @@
+"""Workflow state management for MCP server.
+
+State is persisted to `.deepwork/tmp/session_[id].json` for transparency
+and recovery.
+"""
+
+from __future__ import annotations
+
+import json
+import uuid
+from datetime import UTC, datetime
+from pathlib import Path
+
+from deepwork.mcp.schemas import StepProgress, WorkflowSession
+
+
+class StateError(Exception):
+    """Exception raised for state management errors."""
+
+    pass
+
+
+class StateManager:
+    """Manages workflow session state.
+
+    Sessions are persisted to `.deepwork/tmp/` as JSON files for:
+    - Transparency: Users can inspect session state
+    - Recovery: Sessions survive server restarts
+    - Debugging: State history is preserved
+    """
+
+    def __init__(self, project_root: Path):
+        """Initialize state manager.
+
+        Args:
+            project_root: Path to the project root
+        """
+        self.project_root = project_root
+        self.sessions_dir = project_root / ".deepwork" / "tmp"
+        self._active_session: WorkflowSession | None = None
+
+    def _ensure_sessions_dir(self) -> None:
+        """Ensure the sessions directory exists."""
+        self.sessions_dir.mkdir(parents=True, exist_ok=True)
+
+    def _session_file(self, session_id: str) -> Path:
+        """Get the path to a session file."""
+        return self.sessions_dir / f"session_{session_id}.json"
+
+    def _generate_session_id(self) -> str:
+        """Generate a unique session ID."""
+        return str(uuid.uuid4())[:8]
+
+    def _generate_branch_name(
+        self, job_name: str, workflow_name: str, instance_id: str | None
+    ) -> str:
+        """Generate a git branch name for the workflow.
+
+        Format: deepwork/[job_name]-[workflow_name]-[instance_id or date]
+        """
+        date_str = datetime.now(UTC).strftime("%Y%m%d")
+        instance = instance_id or date_str
+        return f"deepwork/{job_name}-{workflow_name}-{instance}"
+
+    def create_session(
+        self,
+        job_name: str,
+        workflow_name: str,
+        goal: str,
+        first_step_id: str,
+        instance_id: str | None = None,
+    ) -> WorkflowSession:
+        """Create a new workflow session.
+
+        Args:
+            job_name: Name of the job
+            workflow_name: Name of the workflow
+            goal: User's goal for this workflow
+            first_step_id: ID of the first step
+            instance_id: Optional instance identifier
+
+        Returns:
+            New WorkflowSession
+        """
+        self._ensure_sessions_dir()
+
+        session_id = self._generate_session_id()
+        branch_name = self._generate_branch_name(job_name, workflow_name, instance_id)
+        now = datetime.now(UTC).isoformat()
+
+        session = WorkflowSession(
+            session_id=session_id,
+            job_name=job_name,
+            workflow_name=workflow_name,
+            instance_id=instance_id,
+            goal=goal,
+            branch_name=branch_name,
+            current_step_id=first_step_id,
+            current_entry_index=0,
+            step_progress={},
+            started_at=now,
+            status="active",
+        )
+
+        self._save_session(session)
+        self._active_session = session
+        return session
+
+    def _save_session(self, session: WorkflowSession) -> None:
+        """Save session to file."""
+        self._ensure_sessions_dir()
+        session_file = self._session_file(session.session_id)
+        with open(session_file, "w", encoding="utf-8") as f:
+            json.dump(session.to_dict(), f, indent=2)
+
+    def load_session(self, session_id: str) -> WorkflowSession:
+        """Load a session from file.
+
+        Args:
+            session_id: Session ID to load
+
+        Returns:
+            WorkflowSession
+
+        Raises:
+            StateError: If session not found
+        """
+        session_file = self._session_file(session_id)
+        if not session_file.exists():
+            raise StateError(f"Session not found: {session_id}")
+
+        with open(session_file, encoding="utf-8") as f:
+            data = json.load(f)
+
+        session = WorkflowSession.from_dict(data)
+        self._active_session = session
+        return session
+
+    def get_active_session(self) -> WorkflowSession | None:
+        """Get the currently active session.
+
+        Returns:
+            Active session or None if no session active
+        """
+        return self._active_session
+
+    def require_active_session(self) -> WorkflowSession:
+        """Get active session or raise error.
+
+        Returns:
+            Active session
+
+        Raises:
+            StateError: If no active session
+        """
+        if self._active_session is None:
+            raise StateError(
+                "No active workflow session. Use start_workflow to begin a workflow."
+            )
+        return self._active_session
+
+    def start_step(self, step_id: str) -> None:
+        """Mark a step as started.
+
+        Args:
+            step_id: Step ID to start
+
+        Raises:
+            StateError: If no active session
+        """
+        session = self.require_active_session()
+        now = datetime.now(UTC).isoformat()
+
+        if step_id not in session.step_progress:
+            session.step_progress[step_id] = StepProgress(
+                step_id=step_id,
+                started_at=now,
+            )
+        else:
+            session.step_progress[step_id].started_at = now
+
+        session.current_step_id = step_id
+        self._save_session(session)
+
+    def complete_step(
+        self, step_id: str, outputs: list[str], notes: str | None = None
+    ) -> None:
+        """Mark a step as completed.
+
+        Args:
+            step_id: Step ID to complete
+            outputs: Output files created
+            notes: Optional notes
+
+        Raises:
+            StateError: If no active session
+        """
+        session = self.require_active_session()
+        now = datetime.now(UTC).isoformat()
+
+        if step_id not in session.step_progress:
+            session.step_progress[step_id] = StepProgress(
+                step_id=step_id,
+                started_at=now,
+            )
+
+        progress = session.step_progress[step_id]
+        progress.completed_at = now
+        progress.outputs = outputs
+        progress.notes = notes
+
+        self._save_session(session)
+
+    def record_quality_attempt(self, step_id: str) -> int:
+        """Record a quality gate attempt for a step.
+
+        Args:
+            step_id: Step ID
+
+        Returns:
+            Total number of attempts for this step
+
+        Raises:
+            StateError: If no active session
+        """
+        session = self.require_active_session()
+
+        if step_id not in session.step_progress:
+            session.step_progress[step_id] = StepProgress(step_id=step_id)
+
+        session.step_progress[step_id].quality_attempts += 1
+        self._save_session(session)
+
+        return session.step_progress[step_id].quality_attempts
+
+    def advance_to_step(self, step_id: str, entry_index: int) -> None:
+        """Advance the session to a new step.
+
+        Args:
+            step_id: New current step ID
+            entry_index: Index in workflow step_entries
+
+        Raises:
+            StateError: If no active session
+        """
+        session = self.require_active_session()
+        session.current_step_id = step_id
+        session.current_entry_index = entry_index
+        self._save_session(session)
+
+    def complete_workflow(self) -> None:
+        """Mark the workflow as complete.
+
+        Raises:
+            StateError: If no active session
+        """
+        session = self.require_active_session()
+        now = datetime.now(UTC).isoformat()
+        session.completed_at = now
+        session.status = "completed"
+        self._save_session(session)
+
+    def get_all_outputs(self) -> list[str]:
+        """Get all outputs from all completed steps.
+
+        Returns:
+            List of all output file paths
+
+        Raises:
+            StateError: If no active session
+        """
+        session = self.require_active_session()
+        outputs: list[str] = []
+        for progress in session.step_progress.values():
+            outputs.extend(progress.outputs)
+        return outputs
+
+    def list_sessions(self) -> list[WorkflowSession]:
+        """List all saved sessions.
+
+        Returns:
+            List of WorkflowSession objects
+        """
+        if not self.sessions_dir.exists():
+            return []
+
+        sessions = []
+        for session_file in self.sessions_dir.glob("session_*.json"):
+            try:
+                with open(session_file, encoding="utf-8") as f:
+                    data = json.load(f)
+                sessions.append(WorkflowSession.from_dict(data))
+            except (json.JSONDecodeError, ValueError):
+                # Skip corrupted files
+                continue
+
+        return sorted(sessions, key=lambda s: s.started_at, reverse=True)
+
+    def find_active_sessions_for_workflow(
+        self, job_name: str, workflow_name: str
+    ) -> list[WorkflowSession]:
+        """Find active sessions for a specific workflow.
+
+        Args:
+            job_name: Job name
+            workflow_name: Workflow name
+
+        Returns:
+            List of active sessions matching the criteria
+        """
+        return [
+            s
+            for s in self.list_sessions()
+            if s.job_name == job_name
+            and s.workflow_name == workflow_name
+            and s.status == "active"
+        ]
+
+    def delete_session(self, session_id: str) -> None:
+        """Delete a session file.
+
+        Args:
+            session_id: Session ID to delete
+        """
+        session_file = self._session_file(session_id)
+        if session_file.exists():
+            session_file.unlink()
+
+        if self._active_session and self._active_session.session_id == session_id:
+            self._active_session = None
diff --git a/src/deepwork/mcp/tools.py b/src/deepwork/mcp/tools.py
new file mode 100644
index 00000000..c4663316
--- /dev/null
+++ b/src/deepwork/mcp/tools.py
@@ -0,0 +1,385 @@
+"""MCP tool implementations for DeepWork workflows.
+
+This module provides the core tools for guiding agents through workflows:
+- get_workflows: List all available workflows
+- start_workflow: Initialize a workflow session
+- finished_step: Report step completion and get next instructions
+"""
+
+from __future__ import annotations
+
+from pathlib import Path
+from typing import TYPE_CHECKING
+
+from deepwork.core.parser import JobDefinition, ParseError, Workflow, parse_job_definition
+from deepwork.mcp.schemas import (
+    FinishedStepInput,
+    FinishedStepResponse,
+    GetWorkflowsResponse,
+    JobInfo,
+    StartWorkflowInput,
+    StartWorkflowResponse,
+    StepInfo,
+    StepStatus,
+    WorkflowInfo,
+    WorkflowStepEntryInfo,
+)
+from deepwork.mcp.state import StateManager
+
+if TYPE_CHECKING:
+    from deepwork.mcp.quality_gate import QualityGate
+
+
+class ToolError(Exception):
+    """Exception raised for tool execution errors."""
+
+    pass
+
+
+class WorkflowTools:
+    """Implements the MCP tools for workflow management."""
+
+    def __init__(
+        self,
+        project_root: Path,
+        state_manager: StateManager,
+        quality_gate: QualityGate | None = None,
+    ):
+        """Initialize workflow tools.
+
+        Args:
+            project_root: Path to project root
+            state_manager: State manager instance
+            quality_gate: Optional quality gate for step validation
+        """
+        self.project_root = project_root
+        self.jobs_dir = project_root / ".deepwork" / "jobs"
+        self.state_manager = state_manager
+        self.quality_gate = quality_gate
+
+    def _load_all_jobs(self) -> list[JobDefinition]:
+        """Load all job definitions from the jobs directory.
+
+        Returns:
+            List of parsed JobDefinition objects
+        """
+        jobs: list[JobDefinition] = []
+
+        if not self.jobs_dir.exists():
+            return jobs
+
+        for job_dir in self.jobs_dir.iterdir():
+            if job_dir.is_dir() and (job_dir / "job.yml").exists():
+                try:
+                    job = parse_job_definition(job_dir)
+                    jobs.append(job)
+                except ParseError:
+                    # Skip invalid job definitions
+                    continue
+
+        return jobs
+
+    def _job_to_info(self, job: JobDefinition) -> JobInfo:
+        """Convert a JobDefinition to JobInfo for response.
+
+        Args:
+            job: Parsed job definition
+
+        Returns:
+            JobInfo with workflow and step details
+        """
+        # Convert workflows
+        workflows = []
+        workflow_step_ids: set[str] = set()
+
+        for wf in job.workflows:
+            workflow_step_ids.update(wf.steps)
+
+            step_entries = [
+                WorkflowStepEntryInfo(
+                    step_ids=entry.step_ids,
+                    is_concurrent=entry.is_concurrent,
+                )
+                for entry in wf.step_entries
+            ]
+
+            workflows.append(
+                WorkflowInfo(
+                    name=wf.name,
+                    summary=wf.summary,
+                    steps=wf.steps,
+                    step_entries=step_entries,
+                    first_step=wf.steps[0] if wf.steps else "",
+                )
+            )
+
+        # Find standalone steps (not in any workflow)
+        standalone_steps = [
+            StepInfo(
+                id=step.id,
+                name=step.name,
+                description=step.description,
+                dependencies=step.dependencies,
+            )
+            for step in job.steps
+            if step.id not in workflow_step_ids
+        ]
+
+        return JobInfo(
+            name=job.name,
+            summary=job.summary,
+            description=job.description,
+            workflows=workflows,
+            standalone_steps=standalone_steps,
+        )
+
+    def _get_job(self, job_name: str) -> JobDefinition:
+        """Get a specific job by name.
+
+        Args:
+            job_name: Job name to find
+
+        Returns:
+            JobDefinition
+
+        Raises:
+            ToolError: If job not found
+        """
+        job_dir = self.jobs_dir / job_name
+        if not job_dir.exists():
+            raise ToolError(f"Job not found: {job_name}")
+
+        try:
+            return parse_job_definition(job_dir)
+        except ParseError as e:
+            raise ToolError(f"Failed to parse job '{job_name}': {e}") from e
+
+    def _get_workflow(self, job: JobDefinition, workflow_name: str) -> Workflow:
+        """Get a specific workflow from a job.
+
+        Args:
+            job: Job definition
+            workflow_name: Workflow name to find
+
+        Returns:
+            Workflow
+
+        Raises:
+            ToolError: If workflow not found
+        """
+        for wf in job.workflows:
+            if wf.name == workflow_name:
+                return wf
+
+        available = [wf.name for wf in job.workflows]
+        raise ToolError(
+            f"Workflow '{workflow_name}' not found in job '{job.name}'. "
+            f"Available workflows: {', '.join(available)}"
+        )
+
+    def _get_step_instructions(self, job: JobDefinition, step_id: str) -> str:
+        """Get the instruction content for a step.
+
+        Args:
+            job: Job definition
+            step_id: Step ID
+
+        Returns:
+            Step instruction content
+
+        Raises:
+            ToolError: If step or instruction file not found
+        """
+        step = job.get_step(step_id)
+        if step is None:
+            raise ToolError(f"Step not found: {step_id}")
+
+        instructions_path = job.job_dir / step.instructions_file
+        if not instructions_path.exists():
+            raise ToolError(
+                f"Instructions file not found: {step.instructions_file}"
+            )
+
+        return instructions_path.read_text(encoding="utf-8")
+
+    # =========================================================================
+    # Tool Implementations
+    # =========================================================================
+
+    def get_workflows(self) -> GetWorkflowsResponse:
+        """List all available workflows.
+
+        Returns:
+            GetWorkflowsResponse with all jobs and their workflows
+        """
+        jobs = self._load_all_jobs()
+        job_infos = [self._job_to_info(job) for job in jobs]
+
+        return GetWorkflowsResponse(jobs=job_infos)
+
+    def start_workflow(self, input_data: StartWorkflowInput) -> StartWorkflowResponse:
+        """Start a new workflow session.
+
+        Args:
+            input_data: StartWorkflowInput with goal, job_name, workflow_name
+
+        Returns:
+            StartWorkflowResponse with session ID, branch, and first step
+
+        Raises:
+            ToolError: If job or workflow not found
+        """
+        # Load job and workflow
+        job = self._get_job(input_data.job_name)
+        workflow = self._get_workflow(job, input_data.workflow_name)
+
+        if not workflow.steps:
+            raise ToolError(f"Workflow '{workflow.name}' has no steps")
+
+        first_step_id = workflow.steps[0]
+        first_step = job.get_step(first_step_id)
+        if first_step is None:
+            raise ToolError(f"First step not found: {first_step_id}")
+
+        # Create session
+        session = self.state_manager.create_session(
+            job_name=input_data.job_name,
+            workflow_name=input_data.workflow_name,
+            goal=input_data.goal,
+            first_step_id=first_step_id,
+            instance_id=input_data.instance_id,
+        )
+
+        # Mark first step as started
+        self.state_manager.start_step(first_step_id)
+
+        # Get step instructions
+        instructions = self._get_step_instructions(job, first_step_id)
+
+        # Get expected outputs
+        step_outputs = [out.file for out in first_step.outputs]
+
+        return StartWorkflowResponse(
+            session_id=session.session_id,
+            branch_name=session.branch_name,
+            current_step_id=first_step_id,
+            step_instructions=instructions,
+            step_outputs=step_outputs,
+            quality_criteria=first_step.quality_criteria,
+        )
+
+    def finished_step(self, input_data: FinishedStepInput) -> FinishedStepResponse:
+        """Report step completion and get next instructions.
+
+        Args:
+            input_data: FinishedStepInput with outputs and optional notes
+
+        Returns:
+            FinishedStepResponse with status and next step or completion
+
+        Raises:
+            StateError: If no active session
+            ToolError: If quality gate fails after max attempts
+        """
+        session = self.state_manager.require_active_session()
+        current_step_id = session.current_step_id
+
+        # Load job and workflow
+        job = self._get_job(session.job_name)
+        workflow = self._get_workflow(job, session.workflow_name)
+        current_step = job.get_step(current_step_id)
+
+        if current_step is None:
+            raise ToolError(f"Current step not found: {current_step_id}")
+
+        # Run quality gate if available and step has criteria
+        if self.quality_gate and current_step.quality_criteria:
+            attempts = self.state_manager.record_quality_attempt(current_step_id)
+
+            instructions = self._get_step_instructions(job, current_step_id)
+            result = self.quality_gate.evaluate(
+                step_instructions=instructions,
+                quality_criteria=current_step.quality_criteria,
+                outputs=input_data.outputs,
+                project_root=self.project_root,
+            )
+
+            if not result.passed:
+                # Check max attempts
+                max_attempts = 3  # Could be configurable
+                if attempts >= max_attempts:
+                    raise ToolError(
+                        f"Quality gate failed after {max_attempts} attempts. "
+                        f"Feedback: {result.feedback}"
+                    )
+
+                # Return needs_work status
+                failed_criteria = [
+                    cr for cr in result.criteria_results if not cr.passed
+                ]
+                return FinishedStepResponse(
+                    status=StepStatus.NEEDS_WORK,
+                    feedback=result.feedback,
+                    failed_criteria=failed_criteria,
+                )
+
+        # Mark step as completed
+        self.state_manager.complete_step(
+            step_id=current_step_id,
+            outputs=input_data.outputs,
+            notes=input_data.notes,
+        )
+
+        # Find next step
+        current_entry_index = session.current_entry_index
+        next_entry_index = current_entry_index + 1
+
+        if next_entry_index >= len(workflow.step_entries):
+            # Workflow complete
+            self.state_manager.complete_workflow()
+            all_outputs = self.state_manager.get_all_outputs()
+
+            return FinishedStepResponse(
+                status=StepStatus.WORKFLOW_COMPLETE,
+                summary=f"Workflow '{workflow.name}' completed successfully!",
+                all_outputs=all_outputs,
+            )
+
+        # Get next step
+        next_entry = workflow.step_entries[next_entry_index]
+
+        # For concurrent entries, we use the first step as the "current"
+        # The agent will handle running them in parallel via Task tool
+        next_step_id = next_entry.step_ids[0]
+        next_step = job.get_step(next_step_id)
+
+        if next_step is None:
+            raise ToolError(f"Next step not found: {next_step_id}")
+
+        # Advance session
+        self.state_manager.advance_to_step(next_step_id, next_entry_index)
+        self.state_manager.start_step(next_step_id)
+
+        # Get instructions
+        instructions = self._get_step_instructions(job, next_step_id)
+        step_outputs = [out.file for out in next_step.outputs]
+
+        # Build response with concurrent step info if applicable
+        response = FinishedStepResponse(
+            status=StepStatus.NEXT_STEP,
+            next_step_id=next_step_id,
+            step_instructions=instructions,
+            step_outputs=step_outputs,
+            quality_criteria=next_step.quality_criteria,
+        )
+
+        # Add info about concurrent steps if this is a concurrent entry
+        if next_entry.is_concurrent and len(next_entry.step_ids) > 1:
+            concurrent_info = (
+                f"\n\n**CONCURRENT STEPS**: This entry has {len(next_entry.step_ids)} "
+                f"steps that can run in parallel: {', '.join(next_entry.step_ids)}\n"
+                f"Use the Task tool to execute them concurrently."
+            )
+            response.step_instructions = instructions + concurrent_info
+
+        return response
diff --git a/src/deepwork/templates/claude/skill-deepwork.md.jinja b/src/deepwork/templates/claude/skill-deepwork.md.jinja
new file mode 100644
index 00000000..9d555058
--- /dev/null
+++ b/src/deepwork/templates/claude/skill-deepwork.md.jinja
@@ -0,0 +1,149 @@
+{#
+Template: skill-deepwork.md.jinja
+Purpose: Generates the main /deepwork skill that instructs agents to use MCP tools
+
+This template is used to create the entry-point skill for DeepWork.
+Instead of containing step instructions, it directs agents to use the
+DeepWork MCP server tools.
+#}
+---
+name: deepwork
+description: "Start or continue DeepWork workflows using MCP tools"
+---
+
+# DeepWork Workflow Manager
+
+Execute multi-step workflows with quality gate checkpoints.
+
+> **IMPORTANT**: This skill uses the DeepWork MCP server. All workflow operations
+> are performed through MCP tool calls, not by reading instructions from files.
+
+## Quick Start
+
+1. **Discover workflows**: Call `get_workflows` to see available options
+2. **Start a workflow**: Call `start_workflow` with your goal
+3. **Execute steps**: Follow the instructions returned
+4. **Checkpoint**: Call `finished_step` with your outputs
+5. **Iterate or continue**: Handle `needs_work`, `next_step`, or `workflow_complete`
+
+## MCP Tools Reference
+
+### get_workflows
+
+Lists all available workflows in this project.
+
+```
+Tool: deepwork.get_workflows
+Parameters: none
+```
+
+Returns jobs with their workflows, steps, and summaries.
+
+### start_workflow
+
+Begins a new workflow session.
+
+```
+Tool: deepwork.start_workflow
+Parameters:
+  - goal: string (required) - What you want to accomplish
+  - job_name: string (required) - Name of the job
+  - workflow_name: string (required) - Name of the workflow
+  - instance_id: string (optional) - Identifier like "acme" or "q1-2026"
+```
+
+Returns session ID, branch name, and first step instructions.
+
+### finished_step
+
+Reports completion of the current step.
+
+```
+Tool: deepwork.finished_step
+Parameters:
+  - outputs: list[string] (required) - File paths of created outputs
+  - notes: string (optional) - Notes about what was done
+```
+
+Returns one of:
+- `needs_work`: Quality criteria not met; fix and retry
+- `next_step`: Proceed to next step with new instructions
+- `workflow_complete`: All steps done; workflow finished
+
+## Execution Flow
+
+```
+User: /deepwork [intent]
+     │
+     ▼
+┌─────────────────┐
+│ get_workflows   │ ◄── Discover available workflows
+└────────┬────────┘
+         │
+         ▼
+┌─────────────────┐
+│ Parse intent    │ ◄── Match user intent to workflow
+└────────┬────────┘
+         │
+         ▼
+┌─────────────────┐
+│ start_workflow  │ ◄── Begin session, get first step
+└────────┬────────┘
+         │
+         ▼
+┌─────────────────┐
+│ Execute step    │ ◄── Follow step instructions
+│ Create outputs  │
+└────────┬────────┘
+         │
+         ▼
+┌─────────────────┐
+│ finished_step   │ ◄── Report completion
+└────────┬────────┘
+         │
+    ┌────┴────┐
+    │         │
+needs_work  next_step ─────► Loop back to "Execute step"
+    │         │
+    │    workflow_complete
+    │         │
+    ▼         ▼
+┌─────────────────┐
+│ Fix issues and  │      Done!
+│ retry           │
+└─────────────────┘
+```
+
+## Intent Parsing
+
+When the user invokes `/deepwork`, parse their intent:
+
+1. **Explicit workflow**: `/deepwork new_job` → start `new_job` workflow
+2. **General request**: `/deepwork I want to create a new workflow` → infer best match
+3. **No context**: `/deepwork` alone → call `get_workflows` and ask user to choose
+
+## Quality Gates
+
+Steps may have quality criteria. When you call `finished_step`:
+
+1. Outputs are evaluated against criteria
+2. If any fail → `needs_work` status with feedback
+3. Fix issues based on feedback
+4. Call `finished_step` again
+5. After passing → proceed to next step
+
+## Git Workflow
+
+DeepWork creates branches for workflow instances:
+- Format: `deepwork/{job_name}-{workflow_name}-{instance_id or date}`
+- Example: `deepwork/competitive_research-full_analysis-acme`
+
+Commit work as you go. Create PR when workflow completes.
+
+## Guardrails
+
+- Always use MCP tools; never manually read step instruction files
+- Create ALL expected outputs before calling `finished_step`
+- Read quality gate feedback carefully before retrying
+- Don't skip steps unless user explicitly requests it
+- Ask for clarification when user intent is ambiguous
diff --git a/tests/unit/mcp/__init__.py b/tests/unit/mcp/__init__.py
new file mode 100644
index 00000000..34e50282
--- /dev/null
+++ b/tests/unit/mcp/__init__.py
@@ -0,0 +1 @@
+"""Tests for MCP module."""
diff --git a/tests/unit/mcp/test_quality_gate.py b/tests/unit/mcp/test_quality_gate.py
new file mode 100644
index 00000000..d5b55c77
--- /dev/null
+++ b/tests/unit/mcp/test_quality_gate.py
@@ -0,0 +1,183 @@
+"""Tests for MCP quality gate."""
+
+from pathlib import Path
+
+import pytest
+
+from deepwork.mcp.quality_gate import MockQualityGate, QualityGate, QualityGateError
+
+
+@pytest.fixture
+def project_root(tmp_path: Path) -> Path:
+    """Create a temporary project root."""
+    return tmp_path
+
+
+@pytest.fixture
+def quality_gate() -> QualityGate:
+    """Create a QualityGate instance."""
+    return QualityGate(command="echo test", timeout=10)
+
+
+class TestQualityGate:
+    """Tests for QualityGate class."""
+
+    def test_init(self) -> None:
+        """Test QualityGate initialization."""
+        gate = QualityGate(command="claude -p", timeout=60)
+
+        assert gate.command == "claude -p"
+        assert gate.timeout == 60
+
+    def test_init_defaults(self) -> None:
+        """Test QualityGate default values."""
+        gate = QualityGate()
+
+        assert gate.command == "claude -p --output-format json"
+        assert gate.timeout == 120
+
+    def test_build_review_prompt(self, quality_gate: QualityGate, project_root: Path) -> None:
+        """Test building review prompt."""
+        # Create test output file
+        output_file = project_root / "output.md"
+        output_file.write_text("Test content")
+
+        prompt = quality_gate._build_review_prompt(
+            step_instructions="Do something",
+            quality_criteria=["Output must exist", "Output must be valid"],
+            outputs=["output.md"],
+            project_root=project_root,
+        )
+
+        assert "Do something" in prompt
+        assert "Output must exist" in prompt
+        assert "Output must be valid" in prompt
+        assert "Test content" in prompt
+        assert "output.md" in prompt
+
+    def test_build_review_prompt_missing_file(
+        self, quality_gate: QualityGate, project_root: Path
+    ) -> None:
+        """Test building prompt with missing file."""
+        prompt = quality_gate._build_review_prompt(
+            step_instructions="Do something",
+            quality_criteria=["Criteria"],
+            outputs=["nonexistent.md"],
+            project_root=project_root,
+        )
+
+        assert "File not found" in prompt
+
+    def test_parse_response_valid_json(self, quality_gate: QualityGate) -> None:
+        """Test parsing valid JSON response."""
+        response = """
+        Here's my evaluation:
+
+        ```json
+        {
+            "passed": true,
+            "feedback": "All good",
+            "criteria_results": [
+                {"criterion": "Test 1", "passed": true, "feedback": null}
+            ]
+        }
+        ```
+        """
+
+        result = quality_gate._parse_response(response)
+
+        assert result.passed is True
+        assert result.feedback == "All good"
+        assert len(result.criteria_results) == 1
+
+    def test_parse_response_failed(self, quality_gate: QualityGate) -> None:
+        """Test parsing failed evaluation response."""
+        response = """
+        ```json
+        {
+            "passed": false,
+            "feedback": "Issues found",
+            "criteria_results": [
+                {"criterion": "Test 1", "passed": false, "feedback": "Failed"}
+            ]
+        }
+        ```
+        """
+
+        result = quality_gate._parse_response(response)
+
+        assert result.passed is False
+        assert result.feedback == "Issues found"
+        assert result.criteria_results[0].passed is False
+
+    def test_parse_response_invalid_json(self, quality_gate: QualityGate) -> None:
+        """Test parsing invalid JSON response."""
+        response = "This is not JSON"
+
+        with pytest.raises(QualityGateError, match="Failed to parse"):
+            quality_gate._parse_response(response)
+
+    def test_evaluate_no_criteria(self, quality_gate: QualityGate, project_root: Path) -> None:
+        """Test evaluation with no criteria auto-passes."""
+        result = quality_gate.evaluate(
+            step_instructions="Do something",
+            quality_criteria=[],
+            outputs=["output.md"],
+            project_root=project_root,
+        )
+
+        assert result.passed is True
+        assert "auto-passing" in result.feedback.lower()
+
+
+class TestMockQualityGate:
+    """Tests for MockQualityGate class."""
+
+    def test_mock_passes_by_default(self, project_root: Path) -> None:
+        """Test mock gate passes by default."""
+        gate = MockQualityGate()
+
+        result = gate.evaluate(
+            step_instructions="Do something",
+            quality_criteria=["Criterion 1"],
+            outputs=["output.md"],
+            project_root=project_root,
+        )
+
+        assert result.passed is True
+        assert len(gate.evaluations) == 1
+
+    def test_mock_can_fail(self, project_root: Path) -> None:
+        """Test mock gate can be configured to fail."""
+        gate = MockQualityGate(should_pass=False, feedback="Mock failure")
+
+        result = gate.evaluate(
+            step_instructions="Do something",
+            quality_criteria=["Criterion 1"],
+            outputs=["output.md"],
+            project_root=project_root,
+        )
+
+        assert result.passed is False
+        assert result.feedback == "Mock failure"
+
+    def test_mock_records_evaluations(self, project_root: Path) -> None:
+        """Test mock gate records evaluations."""
+        gate = MockQualityGate()
+
+        gate.evaluate(
+            step_instructions="Instruction 1",
+            quality_criteria=["Criterion 1"],
+            outputs=["output1.md"],
+            project_root=project_root,
+        )
+        gate.evaluate(
+            step_instructions="Instruction 2",
+            quality_criteria=["Criterion 2"],
+            outputs=["output2.md"],
+            project_root=project_root,
+        )
+
+        assert len(gate.evaluations) == 2
+        assert gate.evaluations[0]["step_instructions"] == "Instruction 1"
+        assert gate.evaluations[1]["step_instructions"] == "Instruction 2"
diff --git a/tests/unit/mcp/test_schemas.py b/tests/unit/mcp/test_schemas.py
new file mode 100644
index 00000000..c498d785
--- /dev/null
+++ b/tests/unit/mcp/test_schemas.py
@@ -0,0 +1,359 @@
+"""Tests for MCP schemas."""
+
+
+from deepwork.mcp.schemas import (
+    FinishedStepInput,
+    FinishedStepResponse,
+    JobInfo,
+    QualityCriteriaResult,
+    QualityGateResult,
+    StartWorkflowInput,
+    StartWorkflowResponse,
+    StepInfo,
+    StepProgress,
+    StepStatus,
+    WorkflowInfo,
+    WorkflowSession,
+    WorkflowStepEntryInfo,
+)
+
+
+class TestStepStatus:
+    """Tests for StepStatus enum."""
+
+    def test_enum_values(self) -> None:
+        """Test that enum has expected values."""
+        assert StepStatus.NEEDS_WORK == "needs_work"
+        assert StepStatus.NEXT_STEP == "next_step"
+        assert StepStatus.WORKFLOW_COMPLETE == "workflow_complete"
+
+
+class TestStepInfo:
+    """Tests for StepInfo model."""
+
+    def test_basic_step(self) -> None:
+        """Test creating basic step info."""
+        step = StepInfo(
+            id="step1",
+            name="First Step",
+            description="Does something",
+        )
+
+        assert step.id == "step1"
+        assert step.name == "First Step"
+        assert step.description == "Does something"
+        assert step.dependencies == []
+
+    def test_step_with_dependencies(self) -> None:
+        """Test step with dependencies."""
+        step = StepInfo(
+            id="step2",
+            name="Second Step",
+            description="Depends on step1",
+            dependencies=["step1"],
+        )
+
+        assert step.dependencies == ["step1"]
+
+
+class TestWorkflowStepEntryInfo:
+    """Tests for WorkflowStepEntryInfo model."""
+
+    def test_sequential_entry(self) -> None:
+        """Test sequential step entry."""
+        entry = WorkflowStepEntryInfo(step_ids=["step1"])
+
+        assert entry.step_ids == ["step1"]
+        assert entry.is_concurrent is False
+
+    def test_concurrent_entry(self) -> None:
+        """Test concurrent step entry."""
+        entry = WorkflowStepEntryInfo(
+            step_ids=["step1", "step2"],
+            is_concurrent=True,
+        )
+
+        assert entry.step_ids == ["step1", "step2"]
+        assert entry.is_concurrent is True
+
+
+class TestWorkflowInfo:
+    """Tests for WorkflowInfo model."""
+
+    def test_basic_workflow(self) -> None:
+        """Test basic workflow info."""
+        workflow = WorkflowInfo(
+            name="test_workflow",
+            summary="A test workflow",
+            steps=["step1", "step2"],
+            step_entries=[
+                WorkflowStepEntryInfo(step_ids=["step1"]),
+                WorkflowStepEntryInfo(step_ids=["step2"]),
+            ],
+            first_step="step1",
+        )
+
+        assert workflow.name == "test_workflow"
+        assert workflow.first_step == "step1"
+        assert len(workflow.steps) == 2
+
+
+class TestJobInfo:
+    """Tests for JobInfo model."""
+
+    def test_basic_job(self) -> None:
+        """Test basic job info."""
+        job = JobInfo(
+            name="test_job",
+            summary="A test job",
+        )
+
+        assert job.name == "test_job"
+        assert job.summary == "A test job"
+        assert job.description is None
+        assert job.workflows == []
+        assert job.standalone_steps == []
+
+
+class TestStartWorkflowInput:
+    """Tests for StartWorkflowInput model."""
+
+    def test_required_fields(self) -> None:
+        """Test required fields only."""
+        input_data = StartWorkflowInput(
+            goal="Complete a task",
+            job_name="test_job",
+            workflow_name="main",
+        )
+
+        assert input_data.goal == "Complete a task"
+        assert input_data.job_name == "test_job"
+        assert input_data.workflow_name == "main"
+        assert input_data.instance_id is None
+
+    def test_with_instance_id(self) -> None:
+        """Test with optional instance_id."""
+        input_data = StartWorkflowInput(
+            goal="Complete a task",
+            job_name="test_job",
+            workflow_name="main",
+            instance_id="acme",
+        )
+
+        assert input_data.instance_id == "acme"
+
+
+class TestFinishedStepInput:
+    """Tests for FinishedStepInput model."""
+
+    def test_with_outputs(self) -> None:
+        """Test with outputs only."""
+        input_data = FinishedStepInput(outputs=["output1.md", "output2.md"])
+
+        assert input_data.outputs == ["output1.md", "output2.md"]
+        assert input_data.notes is None
+
+    def test_with_notes(self) -> None:
+        """Test with notes."""
+        input_data = FinishedStepInput(
+            outputs=["output.md"],
+            notes="Completed successfully",
+        )
+
+        assert input_data.notes == "Completed successfully"
+
+
+class TestQualityCriteriaResult:
+    """Tests for QualityCriteriaResult model."""
+
+    def test_passed_criterion(self) -> None:
+        """Test passed criterion."""
+        result = QualityCriteriaResult(
+            criterion="Output must be valid",
+            passed=True,
+        )
+
+        assert result.passed is True
+        assert result.feedback is None
+
+    def test_failed_criterion(self) -> None:
+        """Test failed criterion with feedback."""
+        result = QualityCriteriaResult(
+            criterion="Output must be valid",
+            passed=False,
+            feedback="Output was incomplete",
+        )
+
+        assert result.passed is False
+        assert result.feedback == "Output was incomplete"
+
+
+class TestQualityGateResult:
+    """Tests for QualityGateResult model."""
+
+    def test_passed_gate(self) -> None:
+        """Test passed quality gate."""
+        result = QualityGateResult(
+            passed=True,
+            feedback="All criteria met",
+            criteria_results=[
+                QualityCriteriaResult(criterion="Test 1", passed=True),
+            ],
+        )
+
+        assert result.passed is True
+        assert len(result.criteria_results) == 1
+
+    def test_failed_gate(self) -> None:
+        """Test failed quality gate."""
+        result = QualityGateResult(
+            passed=False,
+            feedback="Some criteria failed",
+            criteria_results=[
+                QualityCriteriaResult(criterion="Test 1", passed=True),
+                QualityCriteriaResult(
+                    criterion="Test 2",
+                    passed=False,
+                    feedback="Failed check",
+                ),
+            ],
+        )
+
+        assert result.passed is False
+        assert len(result.criteria_results) == 2
+
+
+class TestStartWorkflowResponse:
+    """Tests for StartWorkflowResponse model."""
+
+    def test_basic_response(self) -> None:
+        """Test basic response."""
+        response = StartWorkflowResponse(
+            session_id="abc123",
+            branch_name="deepwork/test-main-20240101",
+            current_step_id="step1",
+            step_instructions="Do something",
+            step_outputs=["output.md"],
+        )
+
+        assert response.session_id == "abc123"
+        assert response.branch_name == "deepwork/test-main-20240101"
+        assert response.current_step_id == "step1"
+        assert response.quality_criteria == []
+
+
+class TestFinishedStepResponse:
+    """Tests for FinishedStepResponse model."""
+
+    def test_needs_work_status(self) -> None:
+        """Test needs_work response."""
+        response = FinishedStepResponse(
+            status=StepStatus.NEEDS_WORK,
+            feedback="Fix the issues",
+            failed_criteria=[
+                QualityCriteriaResult(criterion="Test", passed=False, feedback="Failed"),
+            ],
+        )
+
+        assert response.status == StepStatus.NEEDS_WORK
+        assert response.feedback is not None
+        assert response.next_step_id is None
+
+    def test_next_step_status(self) -> None:
+        """Test next_step response."""
+        response = FinishedStepResponse(
+            status=StepStatus.NEXT_STEP,
+            next_step_id="step2",
+            step_instructions="Next step instructions",
+            step_outputs=["output2.md"],
+        )
+
+        assert response.status == StepStatus.NEXT_STEP
+        assert response.next_step_id == "step2"
+        assert response.summary is None
+
+    def test_workflow_complete_status(self) -> None:
+        """Test workflow_complete response."""
+        response = FinishedStepResponse(
+            status=StepStatus.WORKFLOW_COMPLETE,
+            summary="Workflow completed!",
+            all_outputs=["output1.md", "output2.md"],
+        )
+
+        assert response.status == StepStatus.WORKFLOW_COMPLETE
+        assert response.summary is not None
+        assert response.all_outputs is not None
+
+
+class TestStepProgress:
+    """Tests for StepProgress model."""
+
+    def test_new_step(self) -> None:
+        """Test new step progress."""
+        progress = StepProgress(step_id="step1")
+
+        assert progress.step_id == "step1"
+        assert progress.started_at is None
+        assert progress.completed_at is None
+        assert progress.outputs == []
+        assert progress.quality_attempts == 0
+
+
+class TestWorkflowSession:
+    """Tests for WorkflowSession model."""
+
+    def test_basic_session(self) -> None:
+        """Test basic session creation."""
+        session = WorkflowSession(
+            session_id="abc123",
+            job_name="test_job",
+            workflow_name="main",
+            goal="Complete the task",
+            branch_name="deepwork/test-main-20240101",
+            current_step_id="step1",
+            started_at="2024-01-01T00:00:00Z",
+        )
+
+        assert session.session_id == "abc123"
+        assert session.job_name == "test_job"
+        assert session.status == "active"
+        assert session.completed_at is None
+
+    def test_to_dict(self) -> None:
+        """Test converting session to dict."""
+        session = WorkflowSession(
+            session_id="abc123",
+            job_name="test_job",
+            workflow_name="main",
+            goal="Complete the task",
+            branch_name="deepwork/test-main-20240101",
+            current_step_id="step1",
+            started_at="2024-01-01T00:00:00Z",
+        )
+
+        data = session.to_dict()
+
+        assert isinstance(data, dict)
+        assert data["session_id"] == "abc123"
+        assert data["job_name"] == "test_job"
+
+    def test_from_dict(self) -> None:
+        """Test creating session from dict."""
+        data = {
+            "session_id": "abc123",
+            "job_name": "test_job",
+            "workflow_name": "main",
+            "goal": "Complete the task",
+            "branch_name": "deepwork/test-main-20240101",
+            "current_step_id": "step1",
+            "current_entry_index": 0,
+            "step_progress": {},
+            "started_at": "2024-01-01T00:00:00Z",
+            "completed_at": None,
+            "status": "active",
+        }
+
+        session = WorkflowSession.from_dict(data)
+
+        assert session.session_id == "abc123"
+        assert session.job_name == "test_job"
diff --git a/tests/unit/mcp/test_state.py b/tests/unit/mcp/test_state.py
new file mode 100644
index 00000000..2eec2a0a
--- /dev/null
+++ b/tests/unit/mcp/test_state.py
@@ -0,0 +1,287 @@
+"""Tests for MCP state management."""
+
+from pathlib import Path
+
+import pytest
+
+from deepwork.mcp.state import StateError, StateManager
+
+
+@pytest.fixture
+def project_root(tmp_path: Path) -> Path:
+    """Create a temporary project root with .deepwork directory."""
+    deepwork_dir = tmp_path / ".deepwork"
+    deepwork_dir.mkdir()
+    (deepwork_dir / "tmp").mkdir()
+    return tmp_path
+
+
+@pytest.fixture
+def state_manager(project_root: Path) -> StateManager:
+    """Create a StateManager instance."""
+    return StateManager(project_root)
+
+
+class TestStateManager:
+    """Tests for StateManager class."""
+
+    def test_init(self, state_manager: StateManager, project_root: Path) -> None:
+        """Test StateManager initialization."""
+        assert state_manager.project_root == project_root
+        assert state_manager.sessions_dir == project_root / ".deepwork" / "tmp"
+        assert state_manager._active_session is None
+
+    def test_generate_session_id(self, state_manager: StateManager) -> None:
+        """Test session ID generation."""
+        session_id = state_manager._generate_session_id()
+
+        assert isinstance(session_id, str)
+        assert len(session_id) == 8
+
+    def test_generate_branch_name_with_instance(self, state_manager: StateManager) -> None:
+        """Test branch name generation with instance ID."""
+        branch = state_manager._generate_branch_name("test_job", "main", "acme")
+
+        assert branch == "deepwork/test_job-main-acme"
+
+    def test_generate_branch_name_without_instance(self, state_manager: StateManager) -> None:
+        """Test branch name generation without instance ID (uses date)."""
+        branch = state_manager._generate_branch_name("test_job", "main", None)
+
+        assert branch.startswith("deepwork/test_job-main-")
+        # Should be a date like 20240101
+        assert len(branch.split("-")[-1]) == 8
+
+    def test_create_session(self, state_manager: StateManager) -> None:
+        """Test creating a new session."""
+        session = state_manager.create_session(
+            job_name="test_job",
+            workflow_name="main",
+            goal="Complete the task",
+            first_step_id="step1",
+            instance_id="acme",
+        )
+
+        assert session.job_name == "test_job"
+        assert session.workflow_name == "main"
+        assert session.goal == "Complete the task"
+        assert session.current_step_id == "step1"
+        assert session.instance_id == "acme"
+        assert session.status == "active"
+        assert "acme" in session.branch_name
+
+        # Verify session file was created
+        session_file = state_manager._session_file(session.session_id)
+        assert session_file.exists()
+
+    def test_load_session(self, state_manager: StateManager) -> None:
+        """Test loading an existing session."""
+        # Create a session first
+        created_session = state_manager.create_session(
+            job_name="test_job",
+            workflow_name="main",
+            goal="Complete the task",
+            first_step_id="step1",
+        )
+
+        # Create a new state manager and load the session
+        new_manager = StateManager(state_manager.project_root)
+        loaded_session = new_manager.load_session(created_session.session_id)
+
+        assert loaded_session.session_id == created_session.session_id
+        assert loaded_session.job_name == "test_job"
+        assert loaded_session.goal == "Complete the task"
+
+    def test_load_session_not_found(self, state_manager: StateManager) -> None:
+        """Test loading non-existent session."""
+        with pytest.raises(StateError, match="Session not found"):
+            state_manager.load_session("nonexistent")
+
+    def test_get_active_session(self, state_manager: StateManager) -> None:
+        """Test getting active session."""
+        # No active session initially
+        assert state_manager.get_active_session() is None
+
+        # Create session
+        session = state_manager.create_session(
+            job_name="test_job",
+            workflow_name="main",
+            goal="Complete the task",
+            first_step_id="step1",
+        )
+
+        assert state_manager.get_active_session() == session
+
+    def test_require_active_session(self, state_manager: StateManager) -> None:
+        """Test require_active_session raises when no session."""
+        with pytest.raises(StateError, match="No active workflow session"):
+            state_manager.require_active_session()
+
+    def test_start_step(self, state_manager: StateManager) -> None:
+        """Test marking a step as started."""
+        state_manager.create_session(
+            job_name="test_job",
+            workflow_name="main",
+            goal="Complete the task",
+            first_step_id="step1",
+        )
+
+        state_manager.start_step("step2")
+        session = state_manager.get_active_session()
+
+        assert session is not None
+        assert session.current_step_id == "step2"
+        assert "step2" in session.step_progress
+        assert session.step_progress["step2"].started_at is not None
+
+    def test_complete_step(self, state_manager: StateManager) -> None:
+        """Test marking a step as completed."""
+        state_manager.create_session(
+            job_name="test_job",
+            workflow_name="main",
+            goal="Complete the task",
+            first_step_id="step1",
+        )
+
+        state_manager.complete_step(
+            step_id="step1",
+            outputs=["output1.md", "output2.md"],
+            notes="Done!",
+        )
+
+        session = state_manager.get_active_session()
+        assert session is not None
+        progress = session.step_progress["step1"]
+
+        assert progress.completed_at is not None
+        assert progress.outputs == ["output1.md", "output2.md"]
+        assert progress.notes == "Done!"
+
+    def test_record_quality_attempt(self, state_manager: StateManager) -> None:
+        """Test recording quality gate attempts."""
+        state_manager.create_session(
+            job_name="test_job",
+            workflow_name="main",
+            goal="Complete the task",
+            first_step_id="step1",
+        )
+
+        # First attempt
+        attempts = state_manager.record_quality_attempt("step1")
+        assert attempts == 1
+
+        # Second attempt
+        attempts = state_manager.record_quality_attempt("step1")
+        assert attempts == 2
+
+    def test_advance_to_step(self, state_manager: StateManager) -> None:
+        """Test advancing to a new step."""
+        state_manager.create_session(
+            job_name="test_job",
+            workflow_name="main",
+            goal="Complete the task",
+            first_step_id="step1",
+        )
+
+        state_manager.advance_to_step("step2", 1)
+        session = state_manager.get_active_session()
+
+        assert session is not None
+        assert session.current_step_id == "step2"
+        assert session.current_entry_index == 1
+
+    def test_complete_workflow(self, state_manager: StateManager) -> None:
+        """Test marking workflow as complete."""
+        state_manager.create_session(
+            job_name="test_job",
+            workflow_name="main",
+            goal="Complete the task",
+            first_step_id="step1",
+        )
+
+        state_manager.complete_workflow()
+        session = state_manager.get_active_session()
+
+        assert session is not None
+        assert session.status == "completed"
+        assert session.completed_at is not None
+
+    def test_get_all_outputs(self, state_manager: StateManager) -> None:
+        """Test getting all outputs from completed steps."""
+        state_manager.create_session(
+            job_name="test_job",
+            workflow_name="main",
+            goal="Complete the task",
+            first_step_id="step1",
+        )
+
+        state_manager.complete_step("step1", ["output1.md"])
+        state_manager.complete_step("step2", ["output2.md", "output3.md"])
+
+        outputs = state_manager.get_all_outputs()
+
+        assert "output1.md" in outputs
+        assert "output2.md" in outputs
+        assert "output3.md" in outputs
+        assert len(outputs) == 3
+
+    def test_list_sessions(self, state_manager: StateManager) -> None:
+        """Test listing all sessions."""
+        # Create multiple sessions
+        state_manager.create_session(
+            job_name="job1",
+            workflow_name="main",
+            goal="Goal 1",
+            first_step_id="step1",
+        )
+        state_manager.create_session(
+            job_name="job2",
+            workflow_name="main",
+            goal="Goal 2",
+            first_step_id="step1",
+        )
+
+        sessions = state_manager.list_sessions()
+
+        assert len(sessions) == 2
+        job_names = {s.job_name for s in sessions}
+        assert "job1" in job_names
+        assert "job2" in job_names
+
+    def test_find_active_sessions_for_workflow(self, state_manager: StateManager) -> None:
+        """Test finding active sessions for a workflow."""
+        # Create sessions for different workflows
+        state_manager.create_session(
+            job_name="test_job",
+            workflow_name="main",
+            goal="Goal 1",
+            first_step_id="step1",
+        )
+        state_manager.create_session(
+            job_name="test_job",
+            workflow_name="other",
+            goal="Goal 2",
+            first_step_id="step1",
+        )
+
+        sessions = state_manager.find_active_sessions_for_workflow("test_job", "main")
+
+        assert len(sessions) == 1
+        assert sessions[0].workflow_name == "main"
+
+    def test_delete_session(self, state_manager: StateManager) -> None:
+        """Test deleting a session."""
+        session = state_manager.create_session(
+            job_name="test_job",
+            workflow_name="main",
+            goal="Goal",
+            first_step_id="step1",
+        )
+
+        session_file = state_manager._session_file(session.session_id)
+        assert session_file.exists()
+
+        state_manager.delete_session(session.session_id)
+
+        assert not session_file.exists()
+        assert state_manager.get_active_session() is None
diff --git a/tests/unit/mcp/test_tools.py b/tests/unit/mcp/test_tools.py
new file mode 100644
index 00000000..b783edb8
--- /dev/null
+++ b/tests/unit/mcp/test_tools.py
@@ -0,0 +1,310 @@
+"""Tests for MCP workflow tools."""
+
+from pathlib import Path
+
+import pytest
+
+from deepwork.mcp.quality_gate import MockQualityGate
+from deepwork.mcp.schemas import FinishedStepInput, StartWorkflowInput, StepStatus
+from deepwork.mcp.state import StateError, StateManager
+from deepwork.mcp.tools import ToolError, WorkflowTools
+
+
+@pytest.fixture
+def project_root(tmp_path: Path) -> Path:
+    """Create a temporary project with a test job."""
+    # Create .deepwork directory
+    deepwork_dir = tmp_path / ".deepwork"
+    deepwork_dir.mkdir()
+    (deepwork_dir / "tmp").mkdir()
+
+    # Create jobs directory with a test job
+    jobs_dir = deepwork_dir / "jobs"
+    jobs_dir.mkdir()
+
+    job_dir = jobs_dir / "test_job"
+    job_dir.mkdir()
+
+    # Create job.yml
+    job_yml = """
+name: test_job
+version: "1.0.0"
+summary: A test job
+description: This is a test job for unit tests
+
+steps:
+  - id: step1
+    name: First Step
+    description: The first step
+    instructions_file: steps/step1.md
+    outputs:
+      - output1.md
+    quality_criteria:
+      - Output must be valid
+  - id: step2
+    name: Second Step
+    description: The second step
+    instructions_file: steps/step2.md
+    outputs:
+      - output2.md
+    dependencies:
+      - step1
+
+workflows:
+  - name: main
+    summary: Main workflow
+    steps:
+      - step1
+      - step2
+"""
+    (job_dir / "job.yml").write_text(job_yml)
+
+    # Create step instruction files
+    steps_dir = job_dir / "steps"
+    steps_dir.mkdir()
+    (steps_dir / "step1.md").write_text("# Step 1\n\nDo the first thing.")
+    (steps_dir / "step2.md").write_text("# Step 2\n\nDo the second thing.")
+
+    return tmp_path
+
+
+@pytest.fixture
+def state_manager(project_root: Path) -> StateManager:
+    """Create a StateManager instance."""
+    return StateManager(project_root)
+
+
+@pytest.fixture
+def tools(project_root: Path, state_manager: StateManager) -> WorkflowTools:
+    """Create a WorkflowTools instance without quality gate."""
+    return WorkflowTools(
+        project_root=project_root,
+        state_manager=state_manager,
+    )
+
+
+@pytest.fixture
+def tools_with_quality(project_root: Path, state_manager: StateManager) -> WorkflowTools:
+    """Create a WorkflowTools instance with mock quality gate."""
+    return WorkflowTools(
+        project_root=project_root,
+        state_manager=state_manager,
+        quality_gate=MockQualityGate(should_pass=True),
+    )
+
+
+class TestWorkflowTools:
+    """Tests for WorkflowTools class."""
+
+    def test_init(self, tools: WorkflowTools, project_root: Path) -> None:
+        """Test WorkflowTools initialization."""
+        assert tools.project_root == project_root
+        assert tools.jobs_dir == project_root / ".deepwork" / "jobs"
+
+    def test_get_workflows(self, tools: WorkflowTools) -> None:
+        """Test getting all workflows."""
+        response = tools.get_workflows()
+
+        assert len(response.jobs) == 1
+        job = response.jobs[0]
+
+        assert job.name == "test_job"
+        assert job.summary == "A test job"
+        assert len(job.workflows) == 1
+        assert job.workflows[0].name == "main"
+        assert job.workflows[0].steps == ["step1", "step2"]
+        assert job.workflows[0].first_step == "step1"
+
+    def test_get_workflows_empty(self, tmp_path: Path) -> None:
+        """Test getting workflows when no jobs exist."""
+        deepwork_dir = tmp_path / ".deepwork"
+        deepwork_dir.mkdir()
+        (deepwork_dir / "tmp").mkdir()
+
+        state_manager = StateManager(tmp_path)
+        tools = WorkflowTools(
+            project_root=tmp_path,
+            state_manager=state_manager,
+        )
+
+        response = tools.get_workflows()
+
+        assert len(response.jobs) == 0
+
+    def test_start_workflow(self, tools: WorkflowTools) -> None:
+        """Test starting a workflow."""
+        input_data = StartWorkflowInput(
+            goal="Complete the test job",
+            job_name="test_job",
+            workflow_name="main",
+            instance_id="test-instance",
+        )
+
+        response = tools.start_workflow(input_data)
+
+        assert response.session_id is not None
+        assert "test-instance" in response.branch_name
+        assert response.current_step_id == "step1"
+        assert "Step 1" in response.step_instructions
+        assert "output1.md" in response.step_outputs
+        assert "Output must be valid" in response.quality_criteria
+
+    def test_start_workflow_invalid_job(self, tools: WorkflowTools) -> None:
+        """Test starting workflow with invalid job."""
+        input_data = StartWorkflowInput(
+            goal="Complete task",
+            job_name="nonexistent",
+            workflow_name="main",
+        )
+
+        with pytest.raises(ToolError, match="Job not found"):
+            tools.start_workflow(input_data)
+
+    def test_start_workflow_invalid_workflow(self, tools: WorkflowTools) -> None:
+        """Test starting workflow with invalid workflow name."""
+        input_data = StartWorkflowInput(
+            goal="Complete task",
+            job_name="test_job",
+            workflow_name="nonexistent",
+        )
+
+        with pytest.raises(ToolError, match="Workflow.*not found"):
+            tools.start_workflow(input_data)
+
+    def test_finished_step_no_session(self, tools: WorkflowTools) -> None:
+        """Test finished_step without active session."""
+        input_data = FinishedStepInput(outputs=["output1.md"])
+
+        with pytest.raises(StateError, match="No active workflow session"):
+            tools.finished_step(input_data)
+
+    def test_finished_step_advances_to_next(
+        self, tools: WorkflowTools, project_root: Path
+    ) -> None:
+        """Test finished_step advances to next step."""
+        # Start workflow first
+        start_input = StartWorkflowInput(
+            goal="Complete task",
+            job_name="test_job",
+            workflow_name="main",
+        )
+        tools.start_workflow(start_input)
+
+        # Create output file
+        (project_root / "output1.md").write_text("Test output")
+
+        # Finish first step
+        finish_input = FinishedStepInput(
+            outputs=["output1.md"],
+            notes="Completed step 1",
+        )
+        response = tools.finished_step(finish_input)
+
+        assert response.status == StepStatus.NEXT_STEP
+        assert response.next_step_id == "step2"
+        assert response.step_instructions is not None
+        assert "Step 2" in response.step_instructions
+
+    def test_finished_step_completes_workflow(
+        self, tools: WorkflowTools, project_root: Path
+    ) -> None:
+        """Test finished_step completes workflow on last step."""
+        # Start workflow
+        start_input = StartWorkflowInput(
+            goal="Complete task",
+            job_name="test_job",
+            workflow_name="main",
+        )
+        tools.start_workflow(start_input)
+
+        # Complete first step
+        (project_root / "output1.md").write_text("Output 1")
+        tools.finished_step(FinishedStepInput(outputs=["output1.md"]))
+
+        # Complete second (last) step
+        (project_root / "output2.md").write_text("Output 2")
+        response = tools.finished_step(FinishedStepInput(outputs=["output2.md"]))
+
+        assert response.status == StepStatus.WORKFLOW_COMPLETE
+        assert response.summary is not None
+        assert "completed" in response.summary.lower()
+        assert "output1.md" in response.all_outputs
+        assert "output2.md" in response.all_outputs
+
+    def test_finished_step_with_quality_gate_pass(
+        self, tools_with_quality: WorkflowTools, project_root: Path
+    ) -> None:
+        """Test finished_step passes quality gate."""
+        # Start workflow
+        start_input = StartWorkflowInput(
+            goal="Complete task",
+            job_name="test_job",
+            workflow_name="main",
+        )
+        tools_with_quality.start_workflow(start_input)
+
+        # Create output and finish step
+        (project_root / "output1.md").write_text("Valid output")
+        response = tools_with_quality.finished_step(
+            FinishedStepInput(outputs=["output1.md"])
+        )
+
+        # Should advance to next step
+        assert response.status == StepStatus.NEXT_STEP
+
+    def test_finished_step_with_quality_gate_fail(
+        self, project_root: Path, state_manager: StateManager
+    ) -> None:
+        """Test finished_step fails quality gate."""
+        # Create tools with failing quality gate
+        tools = WorkflowTools(
+            project_root=project_root,
+            state_manager=state_manager,
+            quality_gate=MockQualityGate(should_pass=False, feedback="Needs improvement"),
+        )
+
+        # Start workflow
+        start_input = StartWorkflowInput(
+            goal="Complete task",
+            job_name="test_job",
+            workflow_name="main",
+        )
+        tools.start_workflow(start_input)
+
+        # Create output and finish step
+        (project_root / "output1.md").write_text("Invalid output")
+        response = tools.finished_step(FinishedStepInput(outputs=["output1.md"]))
+
+        assert response.status == StepStatus.NEEDS_WORK
+        assert response.feedback == "Needs improvement"
+        assert response.failed_criteria is not None
+
+    def test_finished_step_quality_gate_max_attempts(
+        self, project_root: Path, state_manager: StateManager
+    ) -> None:
+        """Test finished_step fails after max quality gate attempts."""
+        tools = WorkflowTools(
+            project_root=project_root,
+            state_manager=state_manager,
+            quality_gate=MockQualityGate(should_pass=False, feedback="Always fails"),
+        )
+
+        # Start workflow
+        start_input = StartWorkflowInput(
+            goal="Complete task",
+            job_name="test_job",
+            workflow_name="main",
+        )
+        tools.start_workflow(start_input)
+
+        # Create output
+        (project_root / "output1.md").write_text("Bad output")
+
+        # Try multiple times (max is 3)
+        for _ in range(2):
+            response = tools.finished_step(FinishedStepInput(outputs=["output1.md"]))
+            assert response.status == StepStatus.NEEDS_WORK
+
+        # Third attempt should raise error
+        with pytest.raises(ToolError, match="Quality gate failed after.*attempts"):
+            tools.finished_step(FinishedStepInput(outputs=["output1.md"]))
diff --git a/uv.lock b/uv.lock
index 5c61745e..ab3885ac 100644
--- a/uv.lock
+++ b/uv.lock
@@ -2,6 +2,37 @@ version = 1
 revision = 3
 requires-python = ">=3.11"
 
+[[package]]
+name = "annotated-types"
+version = "0.7.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/ee/67/531ea369ba64dcff5ec9c3402f9f51bf748cec26dde048a2f973a4eea7f5/annotated_types-0.7.0.tar.gz", hash = "sha256:aff07c09a53a08bc8cfccb9c85b05f1aa9a2a6f23728d790723543408344ce89", size = 16081, upload-time = "2024-05-20T21:33:25.928Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/78/b6/6307fbef88d9b5ee7421e68d78a9f162e0da4900bc5f5793f6d3d0e34fb8/annotated_types-0.7.0-py3-none-any.whl", hash = "sha256:1f02e8b43a8fbbc3f3e0d4f0f4bfc8131bcb4eebe8849b8e5c773f3a1c582a53", size = 13643, upload-time = "2024-05-20T21:33:24.1Z" },
+]
+
+[[package]]
+name = "anyio"
+version = "4.12.1"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "idna" },
+    { name = "typing-extensions", marker = "python_full_version < '3.13'" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/96/f0/5eb65b2bb0d09ac6776f2eb54adee6abe8228ea05b20a5ad0e4945de8aac/anyio-4.12.1.tar.gz", hash = "sha256:41cfcc3a4c85d3f05c932da7c26d0201ac36f72abd4435ba90d0464a3ffed703", size = 228685, upload-time = "2026-01-06T11:45:21.246Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/38/0e/27be9fdef66e72d64c0cdc3cc2823101b80585f8119b5c112c2e8f5f7dab/anyio-4.12.1-py3-none-any.whl", hash = "sha256:d405828884fc140aa80a3c667b8beed277f1dfedec42ba031bd6ac3db606ab6c", size = 113592, upload-time = "2026-01-06T11:45:19.497Z" },
+]
+
+[[package]]
+name = "async-timeout"
+version = "5.0.1"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/a5/ae/136395dfbfe00dfc94da3f3e136d0b13f394cba8f4841120e34226265780/async_timeout-5.0.1.tar.gz", hash = "sha256:d9321a7a3d5a6a5e187e824d2fa0793ce379a202935782d555d6e9d2735677d3", size = 9274, upload-time = "2024-11-06T16:41:39.6Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/fe/ba/e2081de779ca30d473f21f5b30e0e737c438205440784c7dfc81efc2b029/async_timeout-5.0.1-py3-none-any.whl", hash = "sha256:39e3809566ff85354557ec2398b55e096c8364bacac9405a7a1fa429e77fe76c", size = 6233, upload-time = "2024-11-06T16:41:37.9Z" },
+]
+
 [[package]]
 name = "attrs"
 version = "25.4.0"
@@ -11,6 +42,197 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/3a/2a/7cc015f5b9f5db42b7d48157e23356022889fc354a2813c15934b7cb5c0e/attrs-25.4.0-py3-none-any.whl", hash = "sha256:adcf7e2a1fb3b36ac48d97835bb6d8ade15b8dcce26aba8bf1d14847b57a3373", size = 67615, upload-time = "2025-10-06T13:54:43.17Z" },
 ]
 
+[[package]]
+name = "authlib"
+version = "1.6.6"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "cryptography" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/bb/9b/b1661026ff24bc641b76b78c5222d614776b0c085bcfdac9bd15a1cb4b35/authlib-1.6.6.tar.gz", hash = "sha256:45770e8e056d0f283451d9996fbb59b70d45722b45d854d58f32878d0a40c38e", size = 164894, upload-time = "2025-12-12T08:01:41.464Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/54/51/321e821856452f7386c4e9df866f196720b1ad0c5ea1623ea7399969ae3b/authlib-1.6.6-py2.py3-none-any.whl", hash = "sha256:7d9e9bc535c13974313a87f53e8430eb6ea3d1cf6ae4f6efcd793f2e949143fd", size = 244005, upload-time = "2025-12-12T08:01:40.209Z" },
+]
+
+[[package]]
+name = "backports-tarfile"
+version = "1.2.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/86/72/cd9b395f25e290e633655a100af28cb253e4393396264a98bd5f5951d50f/backports_tarfile-1.2.0.tar.gz", hash = "sha256:d75e02c268746e1b8144c278978b6e98e85de6ad16f8e4b0844a154557eca991", size = 86406, upload-time = "2024-05-28T17:01:54.731Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/b9/fa/123043af240e49752f1c4bd24da5053b6bd00cad78c2be53c0d1e8b975bc/backports.tarfile-1.2.0-py3-none-any.whl", hash = "sha256:77e284d754527b01fb1e6fa8a1afe577858ebe4e9dad8919e34c862cb399bc34", size = 30181, upload-time = "2024-05-28T17:01:53.112Z" },
+]
+
+[[package]]
+name = "beartype"
+version = "0.22.9"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/c7/94/1009e248bbfbab11397abca7193bea6626806be9a327d399810d523a07cb/beartype-0.22.9.tar.gz", hash = "sha256:8f82b54aa723a2848a56008d18875f91c1db02c32ef6a62319a002e3e25a975f", size = 1608866, upload-time = "2025-12-13T06:50:30.72Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/71/cc/18245721fa7747065ab478316c7fea7c74777d07f37ae60db2e84f8172e8/beartype-0.22.9-py3-none-any.whl", hash = "sha256:d16c9bbc61ea14637596c5f6fbff2ee99cbe3573e46a716401734ef50c3060c2", size = 1333658, upload-time = "2025-12-13T06:50:28.266Z" },
+]
+
+[[package]]
+name = "cachetools"
+version = "7.0.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/98/af/df70e9b65bc77a1cbe0768c0aa4617147f30f8306ded98c1744bcdc0ae1e/cachetools-7.0.0.tar.gz", hash = "sha256:a9abf18ff3b86c7d05b27ead412e235e16ae045925e531fae38d5fada5ed5b08", size = 35796, upload-time = "2026-02-01T18:59:47.411Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/28/df/2dd32cce20cbcf6f2ec456b58d44368161ad28320729f64e5e1d5d7bd0ae/cachetools-7.0.0-py3-none-any.whl", hash = "sha256:d52fef60e6e964a1969cfb61ccf6242a801b432790fe520d78720d757c81cbd2", size = 13487, upload-time = "2026-02-01T18:59:45.981Z" },
+]
+
+[[package]]
+name = "certifi"
+version = "2026.1.4"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/e0/2d/a891ca51311197f6ad14a7ef42e2399f36cf2f9bd44752b3dc4eab60fdc5/certifi-2026.1.4.tar.gz", hash = "sha256:ac726dd470482006e014ad384921ed6438c457018f4b3d204aea4281258b2120", size = 154268, upload-time = "2026-01-04T02:42:41.825Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/e6/ad/3cc14f097111b4de0040c83a525973216457bbeeb63739ef1ed275c1c021/certifi-2026.1.4-py3-none-any.whl", hash = "sha256:9943707519e4add1115f44c2bc244f782c0249876bf51b6599fee1ffbedd685c", size = 152900, upload-time = "2026-01-04T02:42:40.15Z" },
+]
+
+[[package]]
+name = "cffi"
+version = "2.0.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "pycparser", marker = "implementation_name != 'PyPy'" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/eb/56/b1ba7935a17738ae8453301356628e8147c79dbb825bcbc73dc7401f9846/cffi-2.0.0.tar.gz", hash = "sha256:44d1b5909021139fe36001ae048dbdde8214afa20200eda0f64c068cac5d5529", size = 523588, upload-time = "2025-09-08T23:24:04.541Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/12/4a/3dfd5f7850cbf0d06dc84ba9aa00db766b52ca38d8b86e3a38314d52498c/cffi-2.0.0-cp311-cp311-macosx_10_13_x86_64.whl", hash = "sha256:b4c854ef3adc177950a8dfc81a86f5115d2abd545751a304c5bcf2c2c7283cfe", size = 184344, upload-time = "2025-09-08T23:22:26.456Z" },
+    { url = "https://files.pythonhosted.org/packages/4f/8b/f0e4c441227ba756aafbe78f117485b25bb26b1c059d01f137fa6d14896b/cffi-2.0.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:2de9a304e27f7596cd03d16f1b7c72219bd944e99cc52b84d0145aefb07cbd3c", size = 180560, upload-time = "2025-09-08T23:22:28.197Z" },
+    { url = "https://files.pythonhosted.org/packages/b1/b7/1200d354378ef52ec227395d95c2576330fd22a869f7a70e88e1447eb234/cffi-2.0.0-cp311-cp311-manylinux1_i686.manylinux2014_i686.manylinux_2_17_i686.manylinux_2_5_i686.whl", hash = "sha256:baf5215e0ab74c16e2dd324e8ec067ef59e41125d3eade2b863d294fd5035c92", size = 209613, upload-time = "2025-09-08T23:22:29.475Z" },
+    { url = "https://files.pythonhosted.org/packages/b8/56/6033f5e86e8cc9bb629f0077ba71679508bdf54a9a5e112a3c0b91870332/cffi-2.0.0-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:730cacb21e1bdff3ce90babf007d0a0917cc3e6492f336c2f0134101e0944f93", size = 216476, upload-time = "2025-09-08T23:22:31.063Z" },
+    { url = "https://files.pythonhosted.org/packages/dc/7f/55fecd70f7ece178db2f26128ec41430d8720f2d12ca97bf8f0a628207d5/cffi-2.0.0-cp311-cp311-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:6824f87845e3396029f3820c206e459ccc91760e8fa24422f8b0c3d1731cbec5", size = 203374, upload-time = "2025-09-08T23:22:32.507Z" },
+    { url = "https://files.pythonhosted.org/packages/84/ef/a7b77c8bdc0f77adc3b46888f1ad54be8f3b7821697a7b89126e829e676a/cffi-2.0.0-cp311-cp311-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:9de40a7b0323d889cf8d23d1ef214f565ab154443c42737dfe52ff82cf857664", size = 202597, upload-time = "2025-09-08T23:22:34.132Z" },
+    { url = "https://files.pythonhosted.org/packages/d7/91/500d892b2bf36529a75b77958edfcd5ad8e2ce4064ce2ecfeab2125d72d1/cffi-2.0.0-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:8941aaadaf67246224cee8c3803777eed332a19d909b47e29c9842ef1e79ac26", size = 215574, upload-time = "2025-09-08T23:22:35.443Z" },
+    { url = "https://files.pythonhosted.org/packages/44/64/58f6255b62b101093d5df22dcb752596066c7e89dd725e0afaed242a61be/cffi-2.0.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:a05d0c237b3349096d3981b727493e22147f934b20f6f125a3eba8f994bec4a9", size = 218971, upload-time = "2025-09-08T23:22:36.805Z" },
+    { url = "https://files.pythonhosted.org/packages/ab/49/fa72cebe2fd8a55fbe14956f9970fe8eb1ac59e5df042f603ef7c8ba0adc/cffi-2.0.0-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:94698a9c5f91f9d138526b48fe26a199609544591f859c870d477351dc7b2414", size = 211972, upload-time = "2025-09-08T23:22:38.436Z" },
+    { url = "https://files.pythonhosted.org/packages/0b/28/dd0967a76aab36731b6ebfe64dec4e981aff7e0608f60c2d46b46982607d/cffi-2.0.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:5fed36fccc0612a53f1d4d9a816b50a36702c28a2aa880cb8a122b3466638743", size = 217078, upload-time = "2025-09-08T23:22:39.776Z" },
+    { url = "https://files.pythonhosted.org/packages/2b/c0/015b25184413d7ab0a410775fdb4a50fca20f5589b5dab1dbbfa3baad8ce/cffi-2.0.0-cp311-cp311-win32.whl", hash = "sha256:c649e3a33450ec82378822b3dad03cc228b8f5963c0c12fc3b1e0ab940f768a5", size = 172076, upload-time = "2025-09-08T23:22:40.95Z" },
+    { url = "https://files.pythonhosted.org/packages/ae/8f/dc5531155e7070361eb1b7e4c1a9d896d0cb21c49f807a6c03fd63fc877e/cffi-2.0.0-cp311-cp311-win_amd64.whl", hash = "sha256:66f011380d0e49ed280c789fbd08ff0d40968ee7b665575489afa95c98196ab5", size = 182820, upload-time = "2025-09-08T23:22:42.463Z" },
+    { url = "https://files.pythonhosted.org/packages/95/5c/1b493356429f9aecfd56bc171285a4c4ac8697f76e9bbbbb105e537853a1/cffi-2.0.0-cp311-cp311-win_arm64.whl", hash = "sha256:c6638687455baf640e37344fe26d37c404db8b80d037c3d29f58fe8d1c3b194d", size = 177635, upload-time = "2025-09-08T23:22:43.623Z" },
+    { url = "https://files.pythonhosted.org/packages/ea/47/4f61023ea636104d4f16ab488e268b93008c3d0bb76893b1b31db1f96802/cffi-2.0.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:6d02d6655b0e54f54c4ef0b94eb6be0607b70853c45ce98bd278dc7de718be5d", size = 185271, upload-time = "2025-09-08T23:22:44.795Z" },
+    { url = "https://files.pythonhosted.org/packages/df/a2/781b623f57358e360d62cdd7a8c681f074a71d445418a776eef0aadb4ab4/cffi-2.0.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:8eca2a813c1cb7ad4fb74d368c2ffbbb4789d377ee5bb8df98373c2cc0dee76c", size = 181048, upload-time = "2025-09-08T23:22:45.938Z" },
+    { url = "https://files.pythonhosted.org/packages/ff/df/a4f0fbd47331ceeba3d37c2e51e9dfc9722498becbeec2bd8bc856c9538a/cffi-2.0.0-cp312-cp312-manylinux1_i686.manylinux2014_i686.manylinux_2_17_i686.manylinux_2_5_i686.whl", hash = "sha256:21d1152871b019407d8ac3985f6775c079416c282e431a4da6afe7aefd2bccbe", size = 212529, upload-time = "2025-09-08T23:22:47.349Z" },
+    { url = "https://files.pythonhosted.org/packages/d5/72/12b5f8d3865bf0f87cf1404d8c374e7487dcf097a1c91c436e72e6badd83/cffi-2.0.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:b21e08af67b8a103c71a250401c78d5e0893beff75e28c53c98f4de42f774062", size = 220097, upload-time = "2025-09-08T23:22:48.677Z" },
+    { url = "https://files.pythonhosted.org/packages/c2/95/7a135d52a50dfa7c882ab0ac17e8dc11cec9d55d2c18dda414c051c5e69e/cffi-2.0.0-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:1e3a615586f05fc4065a8b22b8152f0c1b00cdbc60596d187c2a74f9e3036e4e", size = 207983, upload-time = "2025-09-08T23:22:50.06Z" },
+    { url = "https://files.pythonhosted.org/packages/3a/c8/15cb9ada8895957ea171c62dc78ff3e99159ee7adb13c0123c001a2546c1/cffi-2.0.0-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:81afed14892743bbe14dacb9e36d9e0e504cd204e0b165062c488942b9718037", size = 206519, upload-time = "2025-09-08T23:22:51.364Z" },
+    { url = "https://files.pythonhosted.org/packages/78/2d/7fa73dfa841b5ac06c7b8855cfc18622132e365f5b81d02230333ff26e9e/cffi-2.0.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:3e17ed538242334bf70832644a32a7aae3d83b57567f9fd60a26257e992b79ba", size = 219572, upload-time = "2025-09-08T23:22:52.902Z" },
+    { url = "https://files.pythonhosted.org/packages/07/e0/267e57e387b4ca276b90f0434ff88b2c2241ad72b16d31836adddfd6031b/cffi-2.0.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:3925dd22fa2b7699ed2617149842d2e6adde22b262fcbfada50e3d195e4b3a94", size = 222963, upload-time = "2025-09-08T23:22:54.518Z" },
+    { url = "https://files.pythonhosted.org/packages/b6/75/1f2747525e06f53efbd878f4d03bac5b859cbc11c633d0fb81432d98a795/cffi-2.0.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:2c8f814d84194c9ea681642fd164267891702542f028a15fc97d4674b6206187", size = 221361, upload-time = "2025-09-08T23:22:55.867Z" },
+    { url = "https://files.pythonhosted.org/packages/7b/2b/2b6435f76bfeb6bbf055596976da087377ede68df465419d192acf00c437/cffi-2.0.0-cp312-cp312-win32.whl", hash = "sha256:da902562c3e9c550df360bfa53c035b2f241fed6d9aef119048073680ace4a18", size = 172932, upload-time = "2025-09-08T23:22:57.188Z" },
+    { url = "https://files.pythonhosted.org/packages/f8/ed/13bd4418627013bec4ed6e54283b1959cf6db888048c7cf4b4c3b5b36002/cffi-2.0.0-cp312-cp312-win_amd64.whl", hash = "sha256:da68248800ad6320861f129cd9c1bf96ca849a2771a59e0344e88681905916f5", size = 183557, upload-time = "2025-09-08T23:22:58.351Z" },
+    { url = "https://files.pythonhosted.org/packages/95/31/9f7f93ad2f8eff1dbc1c3656d7ca5bfd8fb52c9d786b4dcf19b2d02217fa/cffi-2.0.0-cp312-cp312-win_arm64.whl", hash = "sha256:4671d9dd5ec934cb9a73e7ee9676f9362aba54f7f34910956b84d727b0d73fb6", size = 177762, upload-time = "2025-09-08T23:22:59.668Z" },
+    { url = "https://files.pythonhosted.org/packages/4b/8d/a0a47a0c9e413a658623d014e91e74a50cdd2c423f7ccfd44086ef767f90/cffi-2.0.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:00bdf7acc5f795150faa6957054fbbca2439db2f775ce831222b66f192f03beb", size = 185230, upload-time = "2025-09-08T23:23:00.879Z" },
+    { url = "https://files.pythonhosted.org/packages/4a/d2/a6c0296814556c68ee32009d9c2ad4f85f2707cdecfd7727951ec228005d/cffi-2.0.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:45d5e886156860dc35862657e1494b9bae8dfa63bf56796f2fb56e1679fc0bca", size = 181043, upload-time = "2025-09-08T23:23:02.231Z" },
+    { url = "https://files.pythonhosted.org/packages/b0/1e/d22cc63332bd59b06481ceaac49d6c507598642e2230f201649058a7e704/cffi-2.0.0-cp313-cp313-manylinux1_i686.manylinux2014_i686.manylinux_2_17_i686.manylinux_2_5_i686.whl", hash = "sha256:07b271772c100085dd28b74fa0cd81c8fb1a3ba18b21e03d7c27f3436a10606b", size = 212446, upload-time = "2025-09-08T23:23:03.472Z" },
+    { url = "https://files.pythonhosted.org/packages/a9/f5/a2c23eb03b61a0b8747f211eb716446c826ad66818ddc7810cc2cc19b3f2/cffi-2.0.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:d48a880098c96020b02d5a1f7d9251308510ce8858940e6fa99ece33f610838b", size = 220101, upload-time = "2025-09-08T23:23:04.792Z" },
+    { url = "https://files.pythonhosted.org/packages/f2/7f/e6647792fc5850d634695bc0e6ab4111ae88e89981d35ac269956605feba/cffi-2.0.0-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:f93fd8e5c8c0a4aa1f424d6173f14a892044054871c771f8566e4008eaa359d2", size = 207948, upload-time = "2025-09-08T23:23:06.127Z" },
+    { url = "https://files.pythonhosted.org/packages/cb/1e/a5a1bd6f1fb30f22573f76533de12a00bf274abcdc55c8edab639078abb6/cffi-2.0.0-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:dd4f05f54a52fb558f1ba9f528228066954fee3ebe629fc1660d874d040ae5a3", size = 206422, upload-time = "2025-09-08T23:23:07.753Z" },
+    { url = "https://files.pythonhosted.org/packages/98/df/0a1755e750013a2081e863e7cd37e0cdd02664372c754e5560099eb7aa44/cffi-2.0.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:c8d3b5532fc71b7a77c09192b4a5a200ea992702734a2e9279a37f2478236f26", size = 219499, upload-time = "2025-09-08T23:23:09.648Z" },
+    { url = "https://files.pythonhosted.org/packages/50/e1/a969e687fcf9ea58e6e2a928ad5e2dd88cc12f6f0ab477e9971f2309b57c/cffi-2.0.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:d9b29c1f0ae438d5ee9acb31cadee00a58c46cc9c0b2f9038c6b0b3470877a8c", size = 222928, upload-time = "2025-09-08T23:23:10.928Z" },
+    { url = "https://files.pythonhosted.org/packages/36/54/0362578dd2c9e557a28ac77698ed67323ed5b9775ca9d3fe73fe191bb5d8/cffi-2.0.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:6d50360be4546678fc1b79ffe7a66265e28667840010348dd69a314145807a1b", size = 221302, upload-time = "2025-09-08T23:23:12.42Z" },
+    { url = "https://files.pythonhosted.org/packages/eb/6d/bf9bda840d5f1dfdbf0feca87fbdb64a918a69bca42cfa0ba7b137c48cb8/cffi-2.0.0-cp313-cp313-win32.whl", hash = "sha256:74a03b9698e198d47562765773b4a8309919089150a0bb17d829ad7b44b60d27", size = 172909, upload-time = "2025-09-08T23:23:14.32Z" },
+    { url = "https://files.pythonhosted.org/packages/37/18/6519e1ee6f5a1e579e04b9ddb6f1676c17368a7aba48299c3759bbc3c8b3/cffi-2.0.0-cp313-cp313-win_amd64.whl", hash = "sha256:19f705ada2530c1167abacb171925dd886168931e0a7b78f5bffcae5c6b5be75", size = 183402, upload-time = "2025-09-08T23:23:15.535Z" },
+    { url = "https://files.pythonhosted.org/packages/cb/0e/02ceeec9a7d6ee63bb596121c2c8e9b3a9e150936f4fbef6ca1943e6137c/cffi-2.0.0-cp313-cp313-win_arm64.whl", hash = "sha256:256f80b80ca3853f90c21b23ee78cd008713787b1b1e93eae9f3d6a7134abd91", size = 177780, upload-time = "2025-09-08T23:23:16.761Z" },
+    { url = "https://files.pythonhosted.org/packages/92/c4/3ce07396253a83250ee98564f8d7e9789fab8e58858f35d07a9a2c78de9f/cffi-2.0.0-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:fc33c5141b55ed366cfaad382df24fe7dcbc686de5be719b207bb248e3053dc5", size = 185320, upload-time = "2025-09-08T23:23:18.087Z" },
+    { url = "https://files.pythonhosted.org/packages/59/dd/27e9fa567a23931c838c6b02d0764611c62290062a6d4e8ff7863daf9730/cffi-2.0.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:c654de545946e0db659b3400168c9ad31b5d29593291482c43e3564effbcee13", size = 181487, upload-time = "2025-09-08T23:23:19.622Z" },
+    { url = "https://files.pythonhosted.org/packages/d6/43/0e822876f87ea8a4ef95442c3d766a06a51fc5298823f884ef87aaad168c/cffi-2.0.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:24b6f81f1983e6df8db3adc38562c83f7d4a0c36162885ec7f7b77c7dcbec97b", size = 220049, upload-time = "2025-09-08T23:23:20.853Z" },
+    { url = "https://files.pythonhosted.org/packages/b4/89/76799151d9c2d2d1ead63c2429da9ea9d7aac304603de0c6e8764e6e8e70/cffi-2.0.0-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:12873ca6cb9b0f0d3a0da705d6086fe911591737a59f28b7936bdfed27c0d47c", size = 207793, upload-time = "2025-09-08T23:23:22.08Z" },
+    { url = "https://files.pythonhosted.org/packages/bb/dd/3465b14bb9e24ee24cb88c9e3730f6de63111fffe513492bf8c808a3547e/cffi-2.0.0-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:d9b97165e8aed9272a6bb17c01e3cc5871a594a446ebedc996e2397a1c1ea8ef", size = 206300, upload-time = "2025-09-08T23:23:23.314Z" },
+    { url = "https://files.pythonhosted.org/packages/47/d9/d83e293854571c877a92da46fdec39158f8d7e68da75bf73581225d28e90/cffi-2.0.0-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:afb8db5439b81cf9c9d0c80404b60c3cc9c3add93e114dcae767f1477cb53775", size = 219244, upload-time = "2025-09-08T23:23:24.541Z" },
+    { url = "https://files.pythonhosted.org/packages/2b/0f/1f177e3683aead2bb00f7679a16451d302c436b5cbf2505f0ea8146ef59e/cffi-2.0.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:737fe7d37e1a1bffe70bd5754ea763a62a066dc5913ca57e957824b72a85e205", size = 222828, upload-time = "2025-09-08T23:23:26.143Z" },
+    { url = "https://files.pythonhosted.org/packages/c6/0f/cafacebd4b040e3119dcb32fed8bdef8dfe94da653155f9d0b9dc660166e/cffi-2.0.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:38100abb9d1b1435bc4cc340bb4489635dc2f0da7456590877030c9b3d40b0c1", size = 220926, upload-time = "2025-09-08T23:23:27.873Z" },
+    { url = "https://files.pythonhosted.org/packages/3e/aa/df335faa45b395396fcbc03de2dfcab242cd61a9900e914fe682a59170b1/cffi-2.0.0-cp314-cp314-win32.whl", hash = "sha256:087067fa8953339c723661eda6b54bc98c5625757ea62e95eb4898ad5e776e9f", size = 175328, upload-time = "2025-09-08T23:23:44.61Z" },
+    { url = "https://files.pythonhosted.org/packages/bb/92/882c2d30831744296ce713f0feb4c1cd30f346ef747b530b5318715cc367/cffi-2.0.0-cp314-cp314-win_amd64.whl", hash = "sha256:203a48d1fb583fc7d78a4c6655692963b860a417c0528492a6bc21f1aaefab25", size = 185650, upload-time = "2025-09-08T23:23:45.848Z" },
+    { url = "https://files.pythonhosted.org/packages/9f/2c/98ece204b9d35a7366b5b2c6539c350313ca13932143e79dc133ba757104/cffi-2.0.0-cp314-cp314-win_arm64.whl", hash = "sha256:dbd5c7a25a7cb98f5ca55d258b103a2054f859a46ae11aaf23134f9cc0d356ad", size = 180687, upload-time = "2025-09-08T23:23:47.105Z" },
+    { url = "https://files.pythonhosted.org/packages/3e/61/c768e4d548bfa607abcda77423448df8c471f25dbe64fb2ef6d555eae006/cffi-2.0.0-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:9a67fc9e8eb39039280526379fb3a70023d77caec1852002b4da7e8b270c4dd9", size = 188773, upload-time = "2025-09-08T23:23:29.347Z" },
+    { url = "https://files.pythonhosted.org/packages/2c/ea/5f76bce7cf6fcd0ab1a1058b5af899bfbef198bea4d5686da88471ea0336/cffi-2.0.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:7a66c7204d8869299919db4d5069a82f1561581af12b11b3c9f48c584eb8743d", size = 185013, upload-time = "2025-09-08T23:23:30.63Z" },
+    { url = "https://files.pythonhosted.org/packages/be/b4/c56878d0d1755cf9caa54ba71e5d049479c52f9e4afc230f06822162ab2f/cffi-2.0.0-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:7cc09976e8b56f8cebd752f7113ad07752461f48a58cbba644139015ac24954c", size = 221593, upload-time = "2025-09-08T23:23:31.91Z" },
+    { url = "https://files.pythonhosted.org/packages/e0/0d/eb704606dfe8033e7128df5e90fee946bbcb64a04fcdaa97321309004000/cffi-2.0.0-cp314-cp314t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:92b68146a71df78564e4ef48af17551a5ddd142e5190cdf2c5624d0c3ff5b2e8", size = 209354, upload-time = "2025-09-08T23:23:33.214Z" },
+    { url = "https://files.pythonhosted.org/packages/d8/19/3c435d727b368ca475fb8742ab97c9cb13a0de600ce86f62eab7fa3eea60/cffi-2.0.0-cp314-cp314t-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:b1e74d11748e7e98e2f426ab176d4ed720a64412b6a15054378afdb71e0f37dc", size = 208480, upload-time = "2025-09-08T23:23:34.495Z" },
+    { url = "https://files.pythonhosted.org/packages/d0/44/681604464ed9541673e486521497406fadcc15b5217c3e326b061696899a/cffi-2.0.0-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:28a3a209b96630bca57cce802da70c266eb08c6e97e5afd61a75611ee6c64592", size = 221584, upload-time = "2025-09-08T23:23:36.096Z" },
+    { url = "https://files.pythonhosted.org/packages/25/8e/342a504ff018a2825d395d44d63a767dd8ebc927ebda557fecdaca3ac33a/cffi-2.0.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:7553fb2090d71822f02c629afe6042c299edf91ba1bf94951165613553984512", size = 224443, upload-time = "2025-09-08T23:23:37.328Z" },
+    { url = "https://files.pythonhosted.org/packages/e1/5e/b666bacbbc60fbf415ba9988324a132c9a7a0448a9a8f125074671c0f2c3/cffi-2.0.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:6c6c373cfc5c83a975506110d17457138c8c63016b563cc9ed6e056a82f13ce4", size = 223437, upload-time = "2025-09-08T23:23:38.945Z" },
+    { url = "https://files.pythonhosted.org/packages/a0/1d/ec1a60bd1a10daa292d3cd6bb0b359a81607154fb8165f3ec95fe003b85c/cffi-2.0.0-cp314-cp314t-win32.whl", hash = "sha256:1fc9ea04857caf665289b7a75923f2c6ed559b8298a1b8c49e59f7dd95c8481e", size = 180487, upload-time = "2025-09-08T23:23:40.423Z" },
+    { url = "https://files.pythonhosted.org/packages/bf/41/4c1168c74fac325c0c8156f04b6749c8b6a8f405bbf91413ba088359f60d/cffi-2.0.0-cp314-cp314t-win_amd64.whl", hash = "sha256:d68b6cef7827e8641e8ef16f4494edda8b36104d79773a334beaa1e3521430f6", size = 191726, upload-time = "2025-09-08T23:23:41.742Z" },
+    { url = "https://files.pythonhosted.org/packages/ae/3a/dbeec9d1ee0844c679f6bb5d6ad4e9f198b1224f4e7a32825f47f6192b0c/cffi-2.0.0-cp314-cp314t-win_arm64.whl", hash = "sha256:0a1527a803f0a659de1af2e1fd700213caba79377e27e4693648c2923da066f9", size = 184195, upload-time = "2025-09-08T23:23:43.004Z" },
+]
+
+[[package]]
+name = "charset-normalizer"
+version = "3.4.4"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/13/69/33ddede1939fdd074bce5434295f38fae7136463422fe4fd3e0e89b98062/charset_normalizer-3.4.4.tar.gz", hash = "sha256:94537985111c35f28720e43603b8e7b43a6ecfb2ce1d3058bbe955b73404e21a", size = 129418, upload-time = "2025-10-14T04:42:32.879Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/ed/27/c6491ff4954e58a10f69ad90aca8a1b6fe9c5d3c6f380907af3c37435b59/charset_normalizer-3.4.4-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:6e1fcf0720908f200cd21aa4e6750a48ff6ce4afe7ff5a79a90d5ed8a08296f8", size = 206988, upload-time = "2025-10-14T04:40:33.79Z" },
+    { url = "https://files.pythonhosted.org/packages/94/59/2e87300fe67ab820b5428580a53cad894272dbb97f38a7a814a2a1ac1011/charset_normalizer-3.4.4-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5f819d5fe9234f9f82d75bdfa9aef3a3d72c4d24a6e57aeaebba32a704553aa0", size = 147324, upload-time = "2025-10-14T04:40:34.961Z" },
+    { url = "https://files.pythonhosted.org/packages/07/fb/0cf61dc84b2b088391830f6274cb57c82e4da8bbc2efeac8c025edb88772/charset_normalizer-3.4.4-cp311-cp311-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:a59cb51917aa591b1c4e6a43c132f0cdc3c76dbad6155df4e28ee626cc77a0a3", size = 142742, upload-time = "2025-10-14T04:40:36.105Z" },
+    { url = "https://files.pythonhosted.org/packages/62/8b/171935adf2312cd745d290ed93cf16cf0dfe320863ab7cbeeae1dcd6535f/charset_normalizer-3.4.4-cp311-cp311-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:8ef3c867360f88ac904fd3f5e1f902f13307af9052646963ee08ff4f131adafc", size = 160863, upload-time = "2025-10-14T04:40:37.188Z" },
+    { url = "https://files.pythonhosted.org/packages/09/73/ad875b192bda14f2173bfc1bc9a55e009808484a4b256748d931b6948442/charset_normalizer-3.4.4-cp311-cp311-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:d9e45d7faa48ee908174d8fe84854479ef838fc6a705c9315372eacbc2f02897", size = 157837, upload-time = "2025-10-14T04:40:38.435Z" },
+    { url = "https://files.pythonhosted.org/packages/6d/fc/de9cce525b2c5b94b47c70a4b4fb19f871b24995c728e957ee68ab1671ea/charset_normalizer-3.4.4-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:840c25fb618a231545cbab0564a799f101b63b9901f2569faecd6b222ac72381", size = 151550, upload-time = "2025-10-14T04:40:40.053Z" },
+    { url = "https://files.pythonhosted.org/packages/55/c2/43edd615fdfba8c6f2dfbd459b25a6b3b551f24ea21981e23fb768503ce1/charset_normalizer-3.4.4-cp311-cp311-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:ca5862d5b3928c4940729dacc329aa9102900382fea192fc5e52eb69d6093815", size = 149162, upload-time = "2025-10-14T04:40:41.163Z" },
+    { url = "https://files.pythonhosted.org/packages/03/86/bde4ad8b4d0e9429a4e82c1e8f5c659993a9a863ad62c7df05cf7b678d75/charset_normalizer-3.4.4-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:d9c7f57c3d666a53421049053eaacdd14bbd0a528e2186fcb2e672effd053bb0", size = 150019, upload-time = "2025-10-14T04:40:42.276Z" },
+    { url = "https://files.pythonhosted.org/packages/1f/86/a151eb2af293a7e7bac3a739b81072585ce36ccfb4493039f49f1d3cae8c/charset_normalizer-3.4.4-cp311-cp311-musllinux_1_2_armv7l.whl", hash = "sha256:277e970e750505ed74c832b4bf75dac7476262ee2a013f5574dd49075879e161", size = 143310, upload-time = "2025-10-14T04:40:43.439Z" },
+    { url = "https://files.pythonhosted.org/packages/b5/fe/43dae6144a7e07b87478fdfc4dbe9efd5defb0e7ec29f5f58a55aeef7bf7/charset_normalizer-3.4.4-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:31fd66405eaf47bb62e8cd575dc621c56c668f27d46a61d975a249930dd5e2a4", size = 162022, upload-time = "2025-10-14T04:40:44.547Z" },
+    { url = "https://files.pythonhosted.org/packages/80/e6/7aab83774f5d2bca81f42ac58d04caf44f0cc2b65fc6db2b3b2e8a05f3b3/charset_normalizer-3.4.4-cp311-cp311-musllinux_1_2_riscv64.whl", hash = "sha256:0d3d8f15c07f86e9ff82319b3d9ef6f4bf907608f53fe9d92b28ea9ae3d1fd89", size = 149383, upload-time = "2025-10-14T04:40:46.018Z" },
+    { url = "https://files.pythonhosted.org/packages/4f/e8/b289173b4edae05c0dde07f69f8db476a0b511eac556dfe0d6bda3c43384/charset_normalizer-3.4.4-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:9f7fcd74d410a36883701fafa2482a6af2ff5ba96b9a620e9e0721e28ead5569", size = 159098, upload-time = "2025-10-14T04:40:47.081Z" },
+    { url = "https://files.pythonhosted.org/packages/d8/df/fe699727754cae3f8478493c7f45f777b17c3ef0600e28abfec8619eb49c/charset_normalizer-3.4.4-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:ebf3e58c7ec8a8bed6d66a75d7fb37b55e5015b03ceae72a8e7c74495551e224", size = 152991, upload-time = "2025-10-14T04:40:48.246Z" },
+    { url = "https://files.pythonhosted.org/packages/1a/86/584869fe4ddb6ffa3bd9f491b87a01568797fb9bd8933f557dba9771beaf/charset_normalizer-3.4.4-cp311-cp311-win32.whl", hash = "sha256:eecbc200c7fd5ddb9a7f16c7decb07b566c29fa2161a16cf67b8d068bd21690a", size = 99456, upload-time = "2025-10-14T04:40:49.376Z" },
+    { url = "https://files.pythonhosted.org/packages/65/f6/62fdd5feb60530f50f7e38b4f6a1d5203f4d16ff4f9f0952962c044e919a/charset_normalizer-3.4.4-cp311-cp311-win_amd64.whl", hash = "sha256:5ae497466c7901d54b639cf42d5b8c1b6a4fead55215500d2f486d34db48d016", size = 106978, upload-time = "2025-10-14T04:40:50.844Z" },
+    { url = "https://files.pythonhosted.org/packages/7a/9d/0710916e6c82948b3be62d9d398cb4fcf4e97b56d6a6aeccd66c4b2f2bd5/charset_normalizer-3.4.4-cp311-cp311-win_arm64.whl", hash = "sha256:65e2befcd84bc6f37095f5961e68a6f077bf44946771354a28ad434c2cce0ae1", size = 99969, upload-time = "2025-10-14T04:40:52.272Z" },
+    { url = "https://files.pythonhosted.org/packages/f3/85/1637cd4af66fa687396e757dec650f28025f2a2f5a5531a3208dc0ec43f2/charset_normalizer-3.4.4-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:0a98e6759f854bd25a58a73fa88833fba3b7c491169f86ce1180c948ab3fd394", size = 208425, upload-time = "2025-10-14T04:40:53.353Z" },
+    { url = "https://files.pythonhosted.org/packages/9d/6a/04130023fef2a0d9c62d0bae2649b69f7b7d8d24ea5536feef50551029df/charset_normalizer-3.4.4-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b5b290ccc2a263e8d185130284f8501e3e36c5e02750fc6b6bdeb2e9e96f1e25", size = 148162, upload-time = "2025-10-14T04:40:54.558Z" },
+    { url = "https://files.pythonhosted.org/packages/78/29/62328d79aa60da22c9e0b9a66539feae06ca0f5a4171ac4f7dc285b83688/charset_normalizer-3.4.4-cp312-cp312-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:74bb723680f9f7a6234dcf67aea57e708ec1fbdf5699fb91dfd6f511b0a320ef", size = 144558, upload-time = "2025-10-14T04:40:55.677Z" },
+    { url = "https://files.pythonhosted.org/packages/86/bb/b32194a4bf15b88403537c2e120b817c61cd4ecffa9b6876e941c3ee38fe/charset_normalizer-3.4.4-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:f1e34719c6ed0b92f418c7c780480b26b5d9c50349e9a9af7d76bf757530350d", size = 161497, upload-time = "2025-10-14T04:40:57.217Z" },
+    { url = "https://files.pythonhosted.org/packages/19/89/a54c82b253d5b9b111dc74aca196ba5ccfcca8242d0fb64146d4d3183ff1/charset_normalizer-3.4.4-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:2437418e20515acec67d86e12bf70056a33abdacb5cb1655042f6538d6b085a8", size = 159240, upload-time = "2025-10-14T04:40:58.358Z" },
+    { url = "https://files.pythonhosted.org/packages/c0/10/d20b513afe03acc89ec33948320a5544d31f21b05368436d580dec4e234d/charset_normalizer-3.4.4-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:11d694519d7f29d6cd09f6ac70028dba10f92f6cdd059096db198c283794ac86", size = 153471, upload-time = "2025-10-14T04:40:59.468Z" },
+    { url = "https://files.pythonhosted.org/packages/61/fa/fbf177b55bdd727010f9c0a3c49eefa1d10f960e5f09d1d887bf93c2e698/charset_normalizer-3.4.4-cp312-cp312-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:ac1c4a689edcc530fc9d9aa11f5774b9e2f33f9a0c6a57864e90908f5208d30a", size = 150864, upload-time = "2025-10-14T04:41:00.623Z" },
+    { url = "https://files.pythonhosted.org/packages/05/12/9fbc6a4d39c0198adeebbde20b619790e9236557ca59fc40e0e3cebe6f40/charset_normalizer-3.4.4-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:21d142cc6c0ec30d2efee5068ca36c128a30b0f2c53c1c07bd78cb6bc1d3be5f", size = 150647, upload-time = "2025-10-14T04:41:01.754Z" },
+    { url = "https://files.pythonhosted.org/packages/ad/1f/6a9a593d52e3e8c5d2b167daf8c6b968808efb57ef4c210acb907c365bc4/charset_normalizer-3.4.4-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:5dbe56a36425d26d6cfb40ce79c314a2e4dd6211d51d6d2191c00bed34f354cc", size = 145110, upload-time = "2025-10-14T04:41:03.231Z" },
+    { url = "https://files.pythonhosted.org/packages/30/42/9a52c609e72471b0fc54386dc63c3781a387bb4fe61c20231a4ebcd58bdd/charset_normalizer-3.4.4-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:5bfbb1b9acf3334612667b61bd3002196fe2a1eb4dd74d247e0f2a4d50ec9bbf", size = 162839, upload-time = "2025-10-14T04:41:04.715Z" },
+    { url = "https://files.pythonhosted.org/packages/c4/5b/c0682bbf9f11597073052628ddd38344a3d673fda35a36773f7d19344b23/charset_normalizer-3.4.4-cp312-cp312-musllinux_1_2_riscv64.whl", hash = "sha256:d055ec1e26e441f6187acf818b73564e6e6282709e9bcb5b63f5b23068356a15", size = 150667, upload-time = "2025-10-14T04:41:05.827Z" },
+    { url = "https://files.pythonhosted.org/packages/e4/24/a41afeab6f990cf2daf6cb8c67419b63b48cf518e4f56022230840c9bfb2/charset_normalizer-3.4.4-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:af2d8c67d8e573d6de5bc30cdb27e9b95e49115cd9baad5ddbd1a6207aaa82a9", size = 160535, upload-time = "2025-10-14T04:41:06.938Z" },
+    { url = "https://files.pythonhosted.org/packages/2a/e5/6a4ce77ed243c4a50a1fecca6aaaab419628c818a49434be428fe24c9957/charset_normalizer-3.4.4-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:780236ac706e66881f3b7f2f32dfe90507a09e67d1d454c762cf642e6e1586e0", size = 154816, upload-time = "2025-10-14T04:41:08.101Z" },
+    { url = "https://files.pythonhosted.org/packages/a8/ef/89297262b8092b312d29cdb2517cb1237e51db8ecef2e9af5edbe7b683b1/charset_normalizer-3.4.4-cp312-cp312-win32.whl", hash = "sha256:5833d2c39d8896e4e19b689ffc198f08ea58116bee26dea51e362ecc7cd3ed26", size = 99694, upload-time = "2025-10-14T04:41:09.23Z" },
+    { url = "https://files.pythonhosted.org/packages/3d/2d/1e5ed9dd3b3803994c155cd9aacb60c82c331bad84daf75bcb9c91b3295e/charset_normalizer-3.4.4-cp312-cp312-win_amd64.whl", hash = "sha256:a79cfe37875f822425b89a82333404539ae63dbdddf97f84dcbc3d339aae9525", size = 107131, upload-time = "2025-10-14T04:41:10.467Z" },
+    { url = "https://files.pythonhosted.org/packages/d0/d9/0ed4c7098a861482a7b6a95603edce4c0d9db2311af23da1fb2b75ec26fc/charset_normalizer-3.4.4-cp312-cp312-win_arm64.whl", hash = "sha256:376bec83a63b8021bb5c8ea75e21c4ccb86e7e45ca4eb81146091b56599b80c3", size = 100390, upload-time = "2025-10-14T04:41:11.915Z" },
+    { url = "https://files.pythonhosted.org/packages/97/45/4b3a1239bbacd321068ea6e7ac28875b03ab8bc0aa0966452db17cd36714/charset_normalizer-3.4.4-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:e1f185f86a6f3403aa2420e815904c67b2f9ebc443f045edd0de921108345794", size = 208091, upload-time = "2025-10-14T04:41:13.346Z" },
+    { url = "https://files.pythonhosted.org/packages/7d/62/73a6d7450829655a35bb88a88fca7d736f9882a27eacdca2c6d505b57e2e/charset_normalizer-3.4.4-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6b39f987ae8ccdf0d2642338faf2abb1862340facc796048b604ef14919e55ed", size = 147936, upload-time = "2025-10-14T04:41:14.461Z" },
+    { url = "https://files.pythonhosted.org/packages/89/c5/adb8c8b3d6625bef6d88b251bbb0d95f8205831b987631ab0c8bb5d937c2/charset_normalizer-3.4.4-cp313-cp313-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:3162d5d8ce1bb98dd51af660f2121c55d0fa541b46dff7bb9b9f86ea1d87de72", size = 144180, upload-time = "2025-10-14T04:41:15.588Z" },
+    { url = "https://files.pythonhosted.org/packages/91/ed/9706e4070682d1cc219050b6048bfd293ccf67b3d4f5a4f39207453d4b99/charset_normalizer-3.4.4-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:81d5eb2a312700f4ecaa977a8235b634ce853200e828fbadf3a9c50bab278328", size = 161346, upload-time = "2025-10-14T04:41:16.738Z" },
+    { url = "https://files.pythonhosted.org/packages/d5/0d/031f0d95e4972901a2f6f09ef055751805ff541511dc1252ba3ca1f80cf5/charset_normalizer-3.4.4-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:5bd2293095d766545ec1a8f612559f6b40abc0eb18bb2f5d1171872d34036ede", size = 158874, upload-time = "2025-10-14T04:41:17.923Z" },
+    { url = "https://files.pythonhosted.org/packages/f5/83/6ab5883f57c9c801ce5e5677242328aa45592be8a00644310a008d04f922/charset_normalizer-3.4.4-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a8a8b89589086a25749f471e6a900d3f662d1d3b6e2e59dcecf787b1cc3a1894", size = 153076, upload-time = "2025-10-14T04:41:19.106Z" },
+    { url = "https://files.pythonhosted.org/packages/75/1e/5ff781ddf5260e387d6419959ee89ef13878229732732ee73cdae01800f2/charset_normalizer-3.4.4-cp313-cp313-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:bc7637e2f80d8530ee4a78e878bce464f70087ce73cf7c1caf142416923b98f1", size = 150601, upload-time = "2025-10-14T04:41:20.245Z" },
+    { url = "https://files.pythonhosted.org/packages/d7/57/71be810965493d3510a6ca79b90c19e48696fb1ff964da319334b12677f0/charset_normalizer-3.4.4-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:f8bf04158c6b607d747e93949aa60618b61312fe647a6369f88ce2ff16043490", size = 150376, upload-time = "2025-10-14T04:41:21.398Z" },
+    { url = "https://files.pythonhosted.org/packages/e5/d5/c3d057a78c181d007014feb7e9f2e65905a6c4ef182c0ddf0de2924edd65/charset_normalizer-3.4.4-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:554af85e960429cf30784dd47447d5125aaa3b99a6f0683589dbd27e2f45da44", size = 144825, upload-time = "2025-10-14T04:41:22.583Z" },
+    { url = "https://files.pythonhosted.org/packages/e6/8c/d0406294828d4976f275ffbe66f00266c4b3136b7506941d87c00cab5272/charset_normalizer-3.4.4-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:74018750915ee7ad843a774364e13a3db91682f26142baddf775342c3f5b1133", size = 162583, upload-time = "2025-10-14T04:41:23.754Z" },
+    { url = "https://files.pythonhosted.org/packages/d7/24/e2aa1f18c8f15c4c0e932d9287b8609dd30ad56dbe41d926bd846e22fb8d/charset_normalizer-3.4.4-cp313-cp313-musllinux_1_2_riscv64.whl", hash = "sha256:c0463276121fdee9c49b98908b3a89c39be45d86d1dbaa22957e38f6321d4ce3", size = 150366, upload-time = "2025-10-14T04:41:25.27Z" },
+    { url = "https://files.pythonhosted.org/packages/e4/5b/1e6160c7739aad1e2df054300cc618b06bf784a7a164b0f238360721ab86/charset_normalizer-3.4.4-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:362d61fd13843997c1c446760ef36f240cf81d3ebf74ac62652aebaf7838561e", size = 160300, upload-time = "2025-10-14T04:41:26.725Z" },
+    { url = "https://files.pythonhosted.org/packages/7a/10/f882167cd207fbdd743e55534d5d9620e095089d176d55cb22d5322f2afd/charset_normalizer-3.4.4-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:9a26f18905b8dd5d685d6d07b0cdf98a79f3c7a918906af7cc143ea2e164c8bc", size = 154465, upload-time = "2025-10-14T04:41:28.322Z" },
+    { url = "https://files.pythonhosted.org/packages/89/66/c7a9e1b7429be72123441bfdbaf2bc13faab3f90b933f664db506dea5915/charset_normalizer-3.4.4-cp313-cp313-win32.whl", hash = "sha256:9b35f4c90079ff2e2edc5b26c0c77925e5d2d255c42c74fdb70fb49b172726ac", size = 99404, upload-time = "2025-10-14T04:41:29.95Z" },
+    { url = "https://files.pythonhosted.org/packages/c4/26/b9924fa27db384bdcd97ab83b4f0a8058d96ad9626ead570674d5e737d90/charset_normalizer-3.4.4-cp313-cp313-win_amd64.whl", hash = "sha256:b435cba5f4f750aa6c0a0d92c541fb79f69a387c91e61f1795227e4ed9cece14", size = 107092, upload-time = "2025-10-14T04:41:31.188Z" },
+    { url = "https://files.pythonhosted.org/packages/af/8f/3ed4bfa0c0c72a7ca17f0380cd9e4dd842b09f664e780c13cff1dcf2ef1b/charset_normalizer-3.4.4-cp313-cp313-win_arm64.whl", hash = "sha256:542d2cee80be6f80247095cc36c418f7bddd14f4a6de45af91dfad36d817bba2", size = 100408, upload-time = "2025-10-14T04:41:32.624Z" },
+    { url = "https://files.pythonhosted.org/packages/2a/35/7051599bd493e62411d6ede36fd5af83a38f37c4767b92884df7301db25d/charset_normalizer-3.4.4-cp314-cp314-macosx_10_13_universal2.whl", hash = "sha256:da3326d9e65ef63a817ecbcc0df6e94463713b754fe293eaa03da99befb9a5bd", size = 207746, upload-time = "2025-10-14T04:41:33.773Z" },
+    { url = "https://files.pythonhosted.org/packages/10/9a/97c8d48ef10d6cd4fcead2415523221624bf58bcf68a802721a6bc807c8f/charset_normalizer-3.4.4-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:8af65f14dc14a79b924524b1e7fffe304517b2bff5a58bf64f30b98bbc5079eb", size = 147889, upload-time = "2025-10-14T04:41:34.897Z" },
+    { url = "https://files.pythonhosted.org/packages/10/bf/979224a919a1b606c82bd2c5fa49b5c6d5727aa47b4312bb27b1734f53cd/charset_normalizer-3.4.4-cp314-cp314-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:74664978bb272435107de04e36db5a9735e78232b85b77d45cfb38f758efd33e", size = 143641, upload-time = "2025-10-14T04:41:36.116Z" },
+    { url = "https://files.pythonhosted.org/packages/ba/33/0ad65587441fc730dc7bd90e9716b30b4702dc7b617e6ba4997dc8651495/charset_normalizer-3.4.4-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:752944c7ffbfdd10c074dc58ec2d5a8a4cd9493b314d367c14d24c17684ddd14", size = 160779, upload-time = "2025-10-14T04:41:37.229Z" },
+    { url = "https://files.pythonhosted.org/packages/67/ed/331d6b249259ee71ddea93f6f2f0a56cfebd46938bde6fcc6f7b9a3d0e09/charset_normalizer-3.4.4-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:d1f13550535ad8cff21b8d757a3257963e951d96e20ec82ab44bc64aeb62a191", size = 159035, upload-time = "2025-10-14T04:41:38.368Z" },
+    { url = "https://files.pythonhosted.org/packages/67/ff/f6b948ca32e4f2a4576aa129d8bed61f2e0543bf9f5f2b7fc3758ed005c9/charset_normalizer-3.4.4-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ecaae4149d99b1c9e7b88bb03e3221956f68fd6d50be2ef061b2381b61d20838", size = 152542, upload-time = "2025-10-14T04:41:39.862Z" },
+    { url = "https://files.pythonhosted.org/packages/16/85/276033dcbcc369eb176594de22728541a925b2632f9716428c851b149e83/charset_normalizer-3.4.4-cp314-cp314-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:cb6254dc36b47a990e59e1068afacdcd02958bdcce30bb50cc1700a8b9d624a6", size = 149524, upload-time = "2025-10-14T04:41:41.319Z" },
+    { url = "https://files.pythonhosted.org/packages/9e/f2/6a2a1f722b6aba37050e626530a46a68f74e63683947a8acff92569f979a/charset_normalizer-3.4.4-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:c8ae8a0f02f57a6e61203a31428fa1d677cbe50c93622b4149d5c0f319c1d19e", size = 150395, upload-time = "2025-10-14T04:41:42.539Z" },
+    { url = "https://files.pythonhosted.org/packages/60/bb/2186cb2f2bbaea6338cad15ce23a67f9b0672929744381e28b0592676824/charset_normalizer-3.4.4-cp314-cp314-musllinux_1_2_armv7l.whl", hash = "sha256:47cc91b2f4dd2833fddaedd2893006b0106129d4b94fdb6af1f4ce5a9965577c", size = 143680, upload-time = "2025-10-14T04:41:43.661Z" },
+    { url = "https://files.pythonhosted.org/packages/7d/a5/bf6f13b772fbb2a90360eb620d52ed8f796f3c5caee8398c3b2eb7b1c60d/charset_normalizer-3.4.4-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:82004af6c302b5d3ab2cfc4cc5f29db16123b1a8417f2e25f9066f91d4411090", size = 162045, upload-time = "2025-10-14T04:41:44.821Z" },
+    { url = "https://files.pythonhosted.org/packages/df/c5/d1be898bf0dc3ef9030c3825e5d3b83f2c528d207d246cbabe245966808d/charset_normalizer-3.4.4-cp314-cp314-musllinux_1_2_riscv64.whl", hash = "sha256:2b7d8f6c26245217bd2ad053761201e9f9680f8ce52f0fcd8d0755aeae5b2152", size = 149687, upload-time = "2025-10-14T04:41:46.442Z" },
+    { url = "https://files.pythonhosted.org/packages/a5/42/90c1f7b9341eef50c8a1cb3f098ac43b0508413f33affd762855f67a410e/charset_normalizer-3.4.4-cp314-cp314-musllinux_1_2_s390x.whl", hash = "sha256:799a7a5e4fb2d5898c60b640fd4981d6a25f1c11790935a44ce38c54e985f828", size = 160014, upload-time = "2025-10-14T04:41:47.631Z" },
+    { url = "https://files.pythonhosted.org/packages/76/be/4d3ee471e8145d12795ab655ece37baed0929462a86e72372fd25859047c/charset_normalizer-3.4.4-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:99ae2cffebb06e6c22bdc25801d7b30f503cc87dbd283479e7b606f70aff57ec", size = 154044, upload-time = "2025-10-14T04:41:48.81Z" },
+    { url = "https://files.pythonhosted.org/packages/b0/6f/8f7af07237c34a1defe7defc565a9bc1807762f672c0fde711a4b22bf9c0/charset_normalizer-3.4.4-cp314-cp314-win32.whl", hash = "sha256:f9d332f8c2a2fcbffe1378594431458ddbef721c1769d78e2cbc06280d8155f9", size = 99940, upload-time = "2025-10-14T04:41:49.946Z" },
+    { url = "https://files.pythonhosted.org/packages/4b/51/8ade005e5ca5b0d80fb4aff72a3775b325bdc3d27408c8113811a7cbe640/charset_normalizer-3.4.4-cp314-cp314-win_amd64.whl", hash = "sha256:8a6562c3700cce886c5be75ade4a5db4214fda19fede41d9792d100288d8f94c", size = 107104, upload-time = "2025-10-14T04:41:51.051Z" },
+    { url = "https://files.pythonhosted.org/packages/da/5f/6b8f83a55bb8278772c5ae54a577f3099025f9ade59d0136ac24a0df4bde/charset_normalizer-3.4.4-cp314-cp314-win_arm64.whl", hash = "sha256:de00632ca48df9daf77a2c65a484531649261ec9f25489917f09e455cb09ddb2", size = 100743, upload-time = "2025-10-14T04:41:52.122Z" },
+    { url = "https://files.pythonhosted.org/packages/0a/4c/925909008ed5a988ccbb72dcc897407e5d6d3bd72410d69e051fc0c14647/charset_normalizer-3.4.4-py3-none-any.whl", hash = "sha256:7a32c560861a02ff789ad905a2fe94e3f840803362c84fecf1851cb4cf3dc37f", size = 53402, upload-time = "2025-10-14T04:42:31.76Z" },
+]
+
 [[package]]
 name = "click"
 version = "8.3.1"
@@ -23,6 +245,15 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/98/78/01c019cdb5d6498122777c1a43056ebb3ebfeef2076d9d026bfe15583b2b/click-8.3.1-py3-none-any.whl", hash = "sha256:981153a64e25f12d547d3426c367a4857371575ee7ad18df2a6183ab0545b2a6", size = 108274, upload-time = "2025-11-15T20:45:41.139Z" },
 ]
 
+[[package]]
+name = "cloudpickle"
+version = "3.1.2"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/27/fb/576f067976d320f5f0114a8d9fa1215425441bb35627b1993e5afd8111e5/cloudpickle-3.1.2.tar.gz", hash = "sha256:7fda9eb655c9c230dab534f1983763de5835249750e85fbcef43aaa30a9a2414", size = 22330, upload-time = "2025-11-03T09:25:26.604Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/88/39/799be3f2f0f38cc727ee3b4f1445fe6d5e4133064ec2e4115069418a5bb6/cloudpickle-3.1.2-py3-none-any.whl", hash = "sha256:9acb47f6afd73f60dc1df93bb801b472f05ff42fa6c84167d25cb206be1fbf4a", size = 22228, upload-time = "2025-11-03T09:25:25.534Z" },
+]
+
 [[package]]
 name = "colorama"
 version = "0.4.6"
@@ -124,15 +355,105 @@ toml = [
     { name = "tomli", marker = "python_full_version <= '3.11'" },
 ]
 
+[[package]]
+name = "croniter"
+version = "6.0.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "python-dateutil" },
+    { name = "pytz" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/ad/2f/44d1ae153a0e27be56be43465e5cb39b9650c781e001e7864389deb25090/croniter-6.0.0.tar.gz", hash = "sha256:37c504b313956114a983ece2c2b07790b1f1094fe9d81cc94739214748255577", size = 64481, upload-time = "2024-12-17T17:17:47.32Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/07/4b/290b4c3efd6417a8b0c284896de19b1d5855e6dbdb97d2a35e68fa42de85/croniter-6.0.0-py2.py3-none-any.whl", hash = "sha256:2f878c3856f17896979b2a4379ba1f09c83e374931ea15cc835c5dd2eee9b368", size = 25468, upload-time = "2024-12-17T17:17:45.359Z" },
+]
+
+[[package]]
+name = "cryptography"
+version = "46.0.4"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "cffi", marker = "platform_python_implementation != 'PyPy'" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/78/19/f748958276519adf6a0c1e79e7b8860b4830dda55ccdf29f2719b5fc499c/cryptography-46.0.4.tar.gz", hash = "sha256:bfd019f60f8abc2ed1b9be4ddc21cfef059c841d86d710bb69909a688cbb8f59", size = 749301, upload-time = "2026-01-28T00:24:37.379Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/8d/99/157aae7949a5f30d51fcb1a9851e8ebd5c74bf99b5285d8bb4b8b9ee641e/cryptography-46.0.4-cp311-abi3-macosx_10_9_universal2.whl", hash = "sha256:281526e865ed4166009e235afadf3a4c4cba6056f99336a99efba65336fd5485", size = 7173686, upload-time = "2026-01-28T00:23:07.515Z" },
+    { url = "https://files.pythonhosted.org/packages/87/91/874b8910903159043b5c6a123b7e79c4559ddd1896e38967567942635778/cryptography-46.0.4-cp311-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:5f14fba5bf6f4390d7ff8f086c566454bff0411f6d8aa7af79c88b6f9267aecc", size = 4275871, upload-time = "2026-01-28T00:23:09.439Z" },
+    { url = "https://files.pythonhosted.org/packages/c0/35/690e809be77896111f5b195ede56e4b4ed0435b428c2f2b6d35046fbb5e8/cryptography-46.0.4-cp311-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:47bcd19517e6389132f76e2d5303ded6cf3f78903da2158a671be8de024f4cd0", size = 4423124, upload-time = "2026-01-28T00:23:11.529Z" },
+    { url = "https://files.pythonhosted.org/packages/1a/5b/a26407d4f79d61ca4bebaa9213feafdd8806dc69d3d290ce24996d3cfe43/cryptography-46.0.4-cp311-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:01df4f50f314fbe7009f54046e908d1754f19d0c6d3070df1e6268c5a4af09fa", size = 4277090, upload-time = "2026-01-28T00:23:13.123Z" },
+    { url = "https://files.pythonhosted.org/packages/0c/d8/4bb7aec442a9049827aa34cee1aa83803e528fa55da9a9d45d01d1bb933e/cryptography-46.0.4-cp311-abi3-manylinux_2_28_ppc64le.whl", hash = "sha256:5aa3e463596b0087b3da0dbe2b2487e9fc261d25da85754e30e3b40637d61f81", size = 4947652, upload-time = "2026-01-28T00:23:14.554Z" },
+    { url = "https://files.pythonhosted.org/packages/2b/08/f83e2e0814248b844265802d081f2fac2f1cbe6cd258e72ba14ff006823a/cryptography-46.0.4-cp311-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:0a9ad24359fee86f131836a9ac3bffc9329e956624a2d379b613f8f8abaf5255", size = 4455157, upload-time = "2026-01-28T00:23:16.443Z" },
+    { url = "https://files.pythonhosted.org/packages/0a/05/19d849cf4096448779d2dcc9bb27d097457dac36f7273ffa875a93b5884c/cryptography-46.0.4-cp311-abi3-manylinux_2_31_armv7l.whl", hash = "sha256:dc1272e25ef673efe72f2096e92ae39dea1a1a450dd44918b15351f72c5a168e", size = 3981078, upload-time = "2026-01-28T00:23:17.838Z" },
+    { url = "https://files.pythonhosted.org/packages/e6/89/f7bac81d66ba7cde867a743ea5b37537b32b5c633c473002b26a226f703f/cryptography-46.0.4-cp311-abi3-manylinux_2_34_aarch64.whl", hash = "sha256:de0f5f4ec8711ebc555f54735d4c673fc34b65c44283895f1a08c2b49d2fd99c", size = 4276213, upload-time = "2026-01-28T00:23:19.257Z" },
+    { url = "https://files.pythonhosted.org/packages/da/9f/7133e41f24edd827020ad21b068736e792bc68eecf66d93c924ad4719fb3/cryptography-46.0.4-cp311-abi3-manylinux_2_34_ppc64le.whl", hash = "sha256:eeeb2e33d8dbcccc34d64651f00a98cb41b2dc69cef866771a5717e6734dfa32", size = 4912190, upload-time = "2026-01-28T00:23:21.244Z" },
+    { url = "https://files.pythonhosted.org/packages/a6/f7/6d43cbaddf6f65b24816e4af187d211f0bc536a29961f69faedc48501d8e/cryptography-46.0.4-cp311-abi3-manylinux_2_34_x86_64.whl", hash = "sha256:3d425eacbc9aceafd2cb429e42f4e5d5633c6f873f5e567077043ef1b9bbf616", size = 4454641, upload-time = "2026-01-28T00:23:22.866Z" },
+    { url = "https://files.pythonhosted.org/packages/9e/4f/ebd0473ad656a0ac912a16bd07db0f5d85184924e14fc88feecae2492834/cryptography-46.0.4-cp311-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:91627ebf691d1ea3976a031b61fb7bac1ccd745afa03602275dda443e11c8de0", size = 4405159, upload-time = "2026-01-28T00:23:25.278Z" },
+    { url = "https://files.pythonhosted.org/packages/d1/f7/7923886f32dc47e27adeff8246e976d77258fd2aa3efdd1754e4e323bf49/cryptography-46.0.4-cp311-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:2d08bc22efd73e8854b0b7caff402d735b354862f1145d7be3b9c0f740fef6a0", size = 4666059, upload-time = "2026-01-28T00:23:26.766Z" },
+    { url = "https://files.pythonhosted.org/packages/eb/a7/0fca0fd3591dffc297278a61813d7f661a14243dd60f499a7a5b48acb52a/cryptography-46.0.4-cp311-abi3-win32.whl", hash = "sha256:82a62483daf20b8134f6e92898da70d04d0ef9a75829d732ea1018678185f4f5", size = 3026378, upload-time = "2026-01-28T00:23:28.317Z" },
+    { url = "https://files.pythonhosted.org/packages/2d/12/652c84b6f9873f0909374864a57b003686c642ea48c84d6c7e2c515e6da5/cryptography-46.0.4-cp311-abi3-win_amd64.whl", hash = "sha256:6225d3ebe26a55dbc8ead5ad1265c0403552a63336499564675b29eb3184c09b", size = 3478614, upload-time = "2026-01-28T00:23:30.275Z" },
+    { url = "https://files.pythonhosted.org/packages/b9/27/542b029f293a5cce59349d799d4d8484b3b1654a7b9a0585c266e974a488/cryptography-46.0.4-cp314-cp314t-macosx_10_9_universal2.whl", hash = "sha256:485e2b65d25ec0d901bca7bcae0f53b00133bf3173916d8e421f6fddde103908", size = 7116417, upload-time = "2026-01-28T00:23:31.958Z" },
+    { url = "https://files.pythonhosted.org/packages/f8/f5/559c25b77f40b6bf828eabaf988efb8b0e17b573545edb503368ca0a2a03/cryptography-46.0.4-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:078e5f06bd2fa5aea5a324f2a09f914b1484f1d0c2a4d6a8a28c74e72f65f2da", size = 4264508, upload-time = "2026-01-28T00:23:34.264Z" },
+    { url = "https://files.pythonhosted.org/packages/49/a1/551fa162d33074b660dc35c9bc3616fefa21a0e8c1edd27b92559902e408/cryptography-46.0.4-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:dce1e4f068f03008da7fa51cc7abc6ddc5e5de3e3d1550334eaf8393982a5829", size = 4409080, upload-time = "2026-01-28T00:23:35.793Z" },
+    { url = "https://files.pythonhosted.org/packages/b0/6a/4d8d129a755f5d6df1bbee69ea2f35ebfa954fa1847690d1db2e8bca46a5/cryptography-46.0.4-cp314-cp314t-manylinux_2_28_aarch64.whl", hash = "sha256:2067461c80271f422ee7bdbe79b9b4be54a5162e90345f86a23445a0cf3fd8a2", size = 4270039, upload-time = "2026-01-28T00:23:37.263Z" },
+    { url = "https://files.pythonhosted.org/packages/4c/f5/ed3fcddd0a5e39321e595e144615399e47e7c153a1fb8c4862aec3151ff9/cryptography-46.0.4-cp314-cp314t-manylinux_2_28_ppc64le.whl", hash = "sha256:c92010b58a51196a5f41c3795190203ac52edfd5dc3ff99149b4659eba9d2085", size = 4926748, upload-time = "2026-01-28T00:23:38.884Z" },
+    { url = "https://files.pythonhosted.org/packages/43/ae/9f03d5f0c0c00e85ecb34f06d3b79599f20630e4db91b8a6e56e8f83d410/cryptography-46.0.4-cp314-cp314t-manylinux_2_28_x86_64.whl", hash = "sha256:829c2b12bbc5428ab02d6b7f7e9bbfd53e33efd6672d21341f2177470171ad8b", size = 4442307, upload-time = "2026-01-28T00:23:40.56Z" },
+    { url = "https://files.pythonhosted.org/packages/8b/22/e0f9f2dae8040695103369cf2283ef9ac8abe4d51f68710bec2afd232609/cryptography-46.0.4-cp314-cp314t-manylinux_2_31_armv7l.whl", hash = "sha256:62217ba44bf81b30abaeda1488686a04a702a261e26f87db51ff61d9d3510abd", size = 3959253, upload-time = "2026-01-28T00:23:42.827Z" },
+    { url = "https://files.pythonhosted.org/packages/01/5b/6a43fcccc51dae4d101ac7d378a8724d1ba3de628a24e11bf2f4f43cba4d/cryptography-46.0.4-cp314-cp314t-manylinux_2_34_aarch64.whl", hash = "sha256:9c2da296c8d3415b93e6053f5a728649a87a48ce084a9aaf51d6e46c87c7f2d2", size = 4269372, upload-time = "2026-01-28T00:23:44.655Z" },
+    { url = "https://files.pythonhosted.org/packages/17/b7/0f6b8c1dd0779df2b526e78978ff00462355e31c0a6f6cff8a3e99889c90/cryptography-46.0.4-cp314-cp314t-manylinux_2_34_ppc64le.whl", hash = "sha256:9b34d8ba84454641a6bf4d6762d15847ecbd85c1316c0a7984e6e4e9f748ec2e", size = 4891908, upload-time = "2026-01-28T00:23:46.48Z" },
+    { url = "https://files.pythonhosted.org/packages/83/17/259409b8349aa10535358807a472c6a695cf84f106022268d31cea2b6c97/cryptography-46.0.4-cp314-cp314t-manylinux_2_34_x86_64.whl", hash = "sha256:df4a817fa7138dd0c96c8c8c20f04b8aaa1fac3bbf610913dcad8ea82e1bfd3f", size = 4441254, upload-time = "2026-01-28T00:23:48.403Z" },
+    { url = "https://files.pythonhosted.org/packages/9c/fe/e4a1b0c989b00cee5ffa0764401767e2d1cf59f45530963b894129fd5dce/cryptography-46.0.4-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:b1de0ebf7587f28f9190b9cb526e901bf448c9e6a99655d2b07fff60e8212a82", size = 4396520, upload-time = "2026-01-28T00:23:50.26Z" },
+    { url = "https://files.pythonhosted.org/packages/b3/81/ba8fd9657d27076eb40d6a2f941b23429a3c3d2f56f5a921d6b936a27bc9/cryptography-46.0.4-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:9b4d17bc7bd7cdd98e3af40b441feaea4c68225e2eb2341026c84511ad246c0c", size = 4651479, upload-time = "2026-01-28T00:23:51.674Z" },
+    { url = "https://files.pythonhosted.org/packages/00/03/0de4ed43c71c31e4fe954edd50b9d28d658fef56555eba7641696370a8e2/cryptography-46.0.4-cp314-cp314t-win32.whl", hash = "sha256:c411f16275b0dea722d76544a61d6421e2cc829ad76eec79280dbdc9ddf50061", size = 3001986, upload-time = "2026-01-28T00:23:53.485Z" },
+    { url = "https://files.pythonhosted.org/packages/5c/70/81830b59df7682917d7a10f833c4dab2a5574cd664e86d18139f2b421329/cryptography-46.0.4-cp314-cp314t-win_amd64.whl", hash = "sha256:728fedc529efc1439eb6107b677f7f7558adab4553ef8669f0d02d42d7b959a7", size = 3468288, upload-time = "2026-01-28T00:23:55.09Z" },
+    { url = "https://files.pythonhosted.org/packages/56/f7/f648fdbb61d0d45902d3f374217451385edc7e7768d1b03ff1d0e5ffc17b/cryptography-46.0.4-cp38-abi3-macosx_10_9_universal2.whl", hash = "sha256:a9556ba711f7c23f77b151d5798f3ac44a13455cc68db7697a1096e6d0563cab", size = 7169583, upload-time = "2026-01-28T00:23:56.558Z" },
+    { url = "https://files.pythonhosted.org/packages/d8/cc/8f3224cbb2a928de7298d6ed4790f5ebc48114e02bdc9559196bfb12435d/cryptography-46.0.4-cp38-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:8bf75b0259e87fa70bddc0b8b4078b76e7fd512fd9afae6c1193bcf440a4dbef", size = 4275419, upload-time = "2026-01-28T00:23:58.364Z" },
+    { url = "https://files.pythonhosted.org/packages/17/43/4a18faa7a872d00e4264855134ba82d23546c850a70ff209e04ee200e76f/cryptography-46.0.4-cp38-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:3c268a3490df22270955966ba236d6bc4a8f9b6e4ffddb78aac535f1a5ea471d", size = 4419058, upload-time = "2026-01-28T00:23:59.867Z" },
+    { url = "https://files.pythonhosted.org/packages/ee/64/6651969409821d791ba12346a124f55e1b76f66a819254ae840a965d4b9c/cryptography-46.0.4-cp38-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:812815182f6a0c1d49a37893a303b44eaac827d7f0d582cecfc81b6427f22973", size = 4278151, upload-time = "2026-01-28T00:24:01.731Z" },
+    { url = "https://files.pythonhosted.org/packages/20/0b/a7fce65ee08c3c02f7a8310cc090a732344066b990ac63a9dfd0a655d321/cryptography-46.0.4-cp38-abi3-manylinux_2_28_ppc64le.whl", hash = "sha256:a90e43e3ef65e6dcf969dfe3bb40cbf5aef0d523dff95bfa24256be172a845f4", size = 4939441, upload-time = "2026-01-28T00:24:03.175Z" },
+    { url = "https://files.pythonhosted.org/packages/db/a7/20c5701e2cd3e1dfd7a19d2290c522a5f435dd30957d431dcb531d0f1413/cryptography-46.0.4-cp38-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:a05177ff6296644ef2876fce50518dffb5bcdf903c85250974fc8bc85d54c0af", size = 4451617, upload-time = "2026-01-28T00:24:05.403Z" },
+    { url = "https://files.pythonhosted.org/packages/00/dc/3e16030ea9aa47b63af6524c354933b4fb0e352257c792c4deeb0edae367/cryptography-46.0.4-cp38-abi3-manylinux_2_31_armv7l.whl", hash = "sha256:daa392191f626d50f1b136c9b4cf08af69ca8279d110ea24f5c2700054d2e263", size = 3977774, upload-time = "2026-01-28T00:24:06.851Z" },
+    { url = "https://files.pythonhosted.org/packages/42/c8/ad93f14118252717b465880368721c963975ac4b941b7ef88f3c56bf2897/cryptography-46.0.4-cp38-abi3-manylinux_2_34_aarch64.whl", hash = "sha256:e07ea39c5b048e085f15923511d8121e4a9dc45cee4e3b970ca4f0d338f23095", size = 4277008, upload-time = "2026-01-28T00:24:08.926Z" },
+    { url = "https://files.pythonhosted.org/packages/00/cf/89c99698151c00a4631fbfcfcf459d308213ac29e321b0ff44ceeeac82f1/cryptography-46.0.4-cp38-abi3-manylinux_2_34_ppc64le.whl", hash = "sha256:d5a45ddc256f492ce42a4e35879c5e5528c09cd9ad12420828c972951d8e016b", size = 4903339, upload-time = "2026-01-28T00:24:12.009Z" },
+    { url = "https://files.pythonhosted.org/packages/03/c3/c90a2cb358de4ac9309b26acf49b2a100957e1ff5cc1e98e6c4996576710/cryptography-46.0.4-cp38-abi3-manylinux_2_34_x86_64.whl", hash = "sha256:6bb5157bf6a350e5b28aee23beb2d84ae6f5be390b2f8ee7ea179cda077e1019", size = 4451216, upload-time = "2026-01-28T00:24:13.975Z" },
+    { url = "https://files.pythonhosted.org/packages/96/2c/8d7f4171388a10208671e181ca43cdc0e596d8259ebacbbcfbd16de593da/cryptography-46.0.4-cp38-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:dd5aba870a2c40f87a3af043e0dee7d9eb02d4aff88a797b48f2b43eff8c3ab4", size = 4404299, upload-time = "2026-01-28T00:24:16.169Z" },
+    { url = "https://files.pythonhosted.org/packages/e9/23/cbb2036e450980f65c6e0a173b73a56ff3bccd8998965dea5cc9ddd424a5/cryptography-46.0.4-cp38-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:93d8291da8d71024379ab2cb0b5c57915300155ad42e07f76bea6ad838d7e59b", size = 4664837, upload-time = "2026-01-28T00:24:17.629Z" },
+    { url = "https://files.pythonhosted.org/packages/0a/21/f7433d18fe6d5845329cbdc597e30caf983229c7a245bcf54afecc555938/cryptography-46.0.4-cp38-abi3-win32.whl", hash = "sha256:0563655cb3c6d05fb2afe693340bc050c30f9f34e15763361cf08e94749401fc", size = 3009779, upload-time = "2026-01-28T00:24:20.198Z" },
+    { url = "https://files.pythonhosted.org/packages/3a/6a/bd2e7caa2facffedf172a45c1a02e551e6d7d4828658c9a245516a598d94/cryptography-46.0.4-cp38-abi3-win_amd64.whl", hash = "sha256:fa0900b9ef9c49728887d1576fd8d9e7e3ea872fa9b25ef9b64888adc434e976", size = 3466633, upload-time = "2026-01-28T00:24:21.851Z" },
+    { url = "https://files.pythonhosted.org/packages/59/e0/f9c6c53e1f2a1c2507f00f2faba00f01d2f334b35b0fbfe5286715da2184/cryptography-46.0.4-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:766330cce7416c92b5e90c3bb71b1b79521760cdcfc3a6a1a182d4c9fab23d2b", size = 3476316, upload-time = "2026-01-28T00:24:24.144Z" },
+    { url = "https://files.pythonhosted.org/packages/27/7a/f8d2d13227a9a1a9fe9c7442b057efecffa41f1e3c51d8622f26b9edbe8f/cryptography-46.0.4-pp311-pypy311_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:c236a44acfb610e70f6b3e1c3ca20ff24459659231ef2f8c48e879e2d32b73da", size = 4216693, upload-time = "2026-01-28T00:24:25.758Z" },
+    { url = "https://files.pythonhosted.org/packages/c5/de/3787054e8f7972658370198753835d9d680f6cd4a39df9f877b57f0dd69c/cryptography-46.0.4-pp311-pypy311_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:8a15fb869670efa8f83cbffbc8753c1abf236883225aed74cd179b720ac9ec80", size = 4382765, upload-time = "2026-01-28T00:24:27.577Z" },
+    { url = "https://files.pythonhosted.org/packages/8a/5f/60e0afb019973ba6a0b322e86b3d61edf487a4f5597618a430a2a15f2d22/cryptography-46.0.4-pp311-pypy311_pp73-manylinux_2_34_aarch64.whl", hash = "sha256:fdc3daab53b212472f1524d070735b2f0c214239df131903bae1d598016fa822", size = 4216066, upload-time = "2026-01-28T00:24:29.056Z" },
+    { url = "https://files.pythonhosted.org/packages/81/8e/bf4a0de294f147fee66f879d9bae6f8e8d61515558e3d12785dd90eca0be/cryptography-46.0.4-pp311-pypy311_pp73-manylinux_2_34_x86_64.whl", hash = "sha256:44cc0675b27cadb71bdbb96099cca1fa051cd11d2ade09e5cd3a2edb929ed947", size = 4382025, upload-time = "2026-01-28T00:24:30.681Z" },
+    { url = "https://files.pythonhosted.org/packages/79/f4/9ceb90cfd6a3847069b0b0b353fd3075dc69b49defc70182d8af0c4ca390/cryptography-46.0.4-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:be8c01a7d5a55f9a47d1888162b76c8f49d62b234d88f0ff91a9fbebe32ffbc3", size = 3406043, upload-time = "2026-01-28T00:24:32.236Z" },
+]
+
+[[package]]
+name = "cyclopts"
+version = "4.5.1"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "attrs" },
+    { name = "docstring-parser" },
+    { name = "rich" },
+    { name = "rich-rst" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/d4/93/6085aa89c3fff78a5180987354538d72e43b0db27e66a959302d0c07821a/cyclopts-4.5.1.tar.gz", hash = "sha256:fadc45304763fd9f5d6033727f176898d17a1778e194436964661a005078a3dd", size = 162075, upload-time = "2026-01-25T15:23:54.07Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/1c/7c/996760c30f1302704af57c66ff2d723f7d656d0d0b93563b5528a51484bb/cyclopts-4.5.1-py3-none-any.whl", hash = "sha256:0642c93601e554ca6b7b9abd81093847ea4448b2616280f2a0952416574e8c7a", size = 199807, upload-time = "2026-01-25T15:23:55.219Z" },
+]
+
 [[package]]
 name = "deepwork"
 version = "0.5.1"
 source = { editable = "." }
 dependencies = [
     { name = "click" },
+    { name = "fastmcp" },
     { name = "gitpython" },
     { name = "jinja2" },
     { name = "jsonschema" },
+    { name = "mcp" },
+    { name = "pydantic" },
     { name = "pyyaml" },
     { name = "rich" },
 ]
@@ -150,10 +471,13 @@ dev = [
 [package.metadata]
 requires-dist = [
     { name = "click", specifier = ">=8.1.0" },
+    { name = "fastmcp", specifier = ">=2.0" },
     { name = "gitpython", specifier = ">=3.1.0" },
     { name = "jinja2", specifier = ">=3.1.0" },
     { name = "jsonschema", specifier = ">=4.17.0" },
+    { name = "mcp", specifier = ">=1.0.0" },
     { name = "mypy", marker = "extra == 'dev'", specifier = ">=1.0" },
+    { name = "pydantic", specifier = ">=2.0" },
     { name = "pytest", marker = "extra == 'dev'", specifier = ">=7.0" },
     { name = "pytest-cov", marker = "extra == 'dev'", specifier = ">=4.0" },
     { name = "pytest-mock", marker = "extra == 'dev'", specifier = ">=3.10" },
@@ -164,6 +488,114 @@ requires-dist = [
 ]
 provides-extras = ["dev"]
 
+[[package]]
+name = "diskcache"
+version = "5.6.3"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/3f/21/1c1ffc1a039ddcc459db43cc108658f32c57d271d7289a2794e401d0fdb6/diskcache-5.6.3.tar.gz", hash = "sha256:2c3a3fa2743d8535d832ec61c2054a1641f41775aa7c556758a109941e33e4fc", size = 67916, upload-time = "2023-08-31T06:12:00.316Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/3f/27/4570e78fc0bf5ea0ca45eb1de3818a23787af9b390c0b0a0033a1b8236f9/diskcache-5.6.3-py3-none-any.whl", hash = "sha256:5e31b2d5fbad117cc363ebaf6b689474db18a1f6438bc82358b024abd4c2ca19", size = 45550, upload-time = "2023-08-31T06:11:58.822Z" },
+]
+
+[[package]]
+name = "dnspython"
+version = "2.8.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/8c/8b/57666417c0f90f08bcafa776861060426765fdb422eb10212086fb811d26/dnspython-2.8.0.tar.gz", hash = "sha256:181d3c6996452cb1189c4046c61599b84a5a86e099562ffde77d26984ff26d0f", size = 368251, upload-time = "2025-09-07T18:58:00.022Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/ba/5a/18ad964b0086c6e62e2e7500f7edc89e3faa45033c71c1893d34eed2b2de/dnspython-2.8.0-py3-none-any.whl", hash = "sha256:01d9bbc4a2d76bf0db7c1f729812ded6d912bd318d3b1cf81d30c0f845dbf3af", size = 331094, upload-time = "2025-09-07T18:57:58.071Z" },
+]
+
+[[package]]
+name = "docstring-parser"
+version = "0.17.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/b2/9d/c3b43da9515bd270df0f80548d9944e389870713cc1fe2b8fb35fe2bcefd/docstring_parser-0.17.0.tar.gz", hash = "sha256:583de4a309722b3315439bb31d64ba3eebada841f2e2cee23b99df001434c912", size = 27442, upload-time = "2025-07-21T07:35:01.868Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/55/e2/2537ebcff11c1ee1ff17d8d0b6f4db75873e3b0fb32c2d4a2ee31ecb310a/docstring_parser-0.17.0-py3-none-any.whl", hash = "sha256:cf2569abd23dce8099b300f9b4fa8191e9582dda731fd533daf54c4551658708", size = 36896, upload-time = "2025-07-21T07:35:00.684Z" },
+]
+
+[[package]]
+name = "docutils"
+version = "0.22.4"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/ae/b6/03bb70946330e88ffec97aefd3ea75ba575cb2e762061e0e62a213befee8/docutils-0.22.4.tar.gz", hash = "sha256:4db53b1fde9abecbb74d91230d32ab626d94f6badfc575d6db9194a49df29968", size = 2291750, upload-time = "2025-12-18T19:00:26.443Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/02/10/5da547df7a391dcde17f59520a231527b8571e6f46fc8efb02ccb370ab12/docutils-0.22.4-py3-none-any.whl", hash = "sha256:d0013f540772d1420576855455d050a2180186c91c15779301ac2ccb3eeb68de", size = 633196, upload-time = "2025-12-18T19:00:18.077Z" },
+]
+
+[[package]]
+name = "email-validator"
+version = "2.3.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "dnspython" },
+    { name = "idna" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/f5/22/900cb125c76b7aaa450ce02fd727f452243f2e91a61af068b40adba60ea9/email_validator-2.3.0.tar.gz", hash = "sha256:9fc05c37f2f6cf439ff414f8fc46d917929974a82244c20eb10231ba60c54426", size = 51238, upload-time = "2025-08-26T13:09:06.831Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/de/15/545e2b6cf2e3be84bc1ed85613edd75b8aea69807a71c26f4ca6a9258e82/email_validator-2.3.0-py3-none-any.whl", hash = "sha256:80f13f623413e6b197ae73bb10bf4eb0908faf509ad8362c5edeb0be7fd450b4", size = 35604, upload-time = "2025-08-26T13:09:05.858Z" },
+]
+
+[[package]]
+name = "exceptiongroup"
+version = "1.3.1"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "typing-extensions", marker = "python_full_version < '3.13'" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/50/79/66800aadf48771f6b62f7eb014e352e5d06856655206165d775e675a02c9/exceptiongroup-1.3.1.tar.gz", hash = "sha256:8b412432c6055b0b7d14c310000ae93352ed6754f70fa8f7c34141f91c4e3219", size = 30371, upload-time = "2025-11-21T23:01:54.787Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/8a/0e/97c33bf5009bdbac74fd2beace167cab3f978feb69cc36f1ef79360d6c4e/exceptiongroup-1.3.1-py3-none-any.whl", hash = "sha256:a7a39a3bd276781e98394987d3a5701d0c4edffb633bb7a5144577f82c773598", size = 16740, upload-time = "2025-11-21T23:01:53.443Z" },
+]
+
+[[package]]
+name = "fakeredis"
+version = "2.33.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "redis" },
+    { name = "sortedcontainers" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/5f/f9/57464119936414d60697fcbd32f38909bb5688b616ae13de6e98384433e0/fakeredis-2.33.0.tar.gz", hash = "sha256:d7bc9a69d21df108a6451bbffee23b3eba432c21a654afc7ff2d295428ec5770", size = 175187, upload-time = "2025-12-16T19:45:52.269Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/6e/78/a850fed8aeef96d4a99043c90b818b2ed5419cd5b24a4049fd7cfb9f1471/fakeredis-2.33.0-py3-none-any.whl", hash = "sha256:de535f3f9ccde1c56672ab2fdd6a8efbc4f2619fc2f1acc87b8737177d71c965", size = 119605, upload-time = "2025-12-16T19:45:51.08Z" },
+]
+
+[package.optional-dependencies]
+lua = [
+    { name = "lupa" },
+]
+
+[[package]]
+name = "fastmcp"
+version = "2.14.5"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "authlib" },
+    { name = "cyclopts" },
+    { name = "exceptiongroup" },
+    { name = "httpx" },
+    { name = "jsonref" },
+    { name = "jsonschema-path" },
+    { name = "mcp" },
+    { name = "openapi-pydantic" },
+    { name = "packaging" },
+    { name = "platformdirs" },
+    { name = "py-key-value-aio", extra = ["disk", "keyring", "memory"] },
+    { name = "pydantic", extra = ["email"] },
+    { name = "pydocket" },
+    { name = "pyperclip" },
+    { name = "python-dotenv" },
+    { name = "rich" },
+    { name = "uvicorn" },
+    { name = "websockets" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/3b/32/982678d44f13849530a74ab101ed80e060c2ee6cf87471f062dcf61705fd/fastmcp-2.14.5.tar.gz", hash = "sha256:38944dc582c541d55357082bda2241cedb42cd3a78faea8a9d6a2662c62a42d7", size = 8296329, upload-time = "2026-02-03T15:35:21.005Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/e5/c1/1a35ec68ff76ea8443aa115b18bcdee748a4ada2124537ee90522899ff9f/fastmcp-2.14.5-py3-none-any.whl", hash = "sha256:d81e8ec813f5089d3624bec93944beaefa86c0c3a4ef1111cbef676a761ebccf", size = 417784, upload-time = "2026-02-03T15:35:18.489Z" },
+]
+
 [[package]]
 name = "gitdb"
 version = "4.0.12"
@@ -188,6 +620,73 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/6a/09/e21df6aef1e1ffc0c816f0522ddc3f6dcded766c3261813131c78a704470/gitpython-3.1.46-py3-none-any.whl", hash = "sha256:79812ed143d9d25b6d176a10bb511de0f9c67b1fa641d82097b0ab90398a2058", size = 208620, upload-time = "2026-01-01T15:37:30.574Z" },
 ]
 
+[[package]]
+name = "h11"
+version = "0.16.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/01/ee/02a2c011bdab74c6fb3c75474d40b3052059d95df7e73351460c8588d963/h11-0.16.0.tar.gz", hash = "sha256:4e35b956cf45792e4caa5885e69fba00bdbc6ffafbfa020300e549b208ee5ff1", size = 101250, upload-time = "2025-04-24T03:35:25.427Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/04/4b/29cac41a4d98d144bf5f6d33995617b185d14b22401f75ca86f384e87ff1/h11-0.16.0-py3-none-any.whl", hash = "sha256:63cf8bbe7522de3bf65932fda1d9c2772064ffb3dae62d55932da54b31cb6c86", size = 37515, upload-time = "2025-04-24T03:35:24.344Z" },
+]
+
+[[package]]
+name = "httpcore"
+version = "1.0.9"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "certifi" },
+    { name = "h11" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/06/94/82699a10bca87a5556c9c59b5963f2d039dbd239f25bc2a63907a05a14cb/httpcore-1.0.9.tar.gz", hash = "sha256:6e34463af53fd2ab5d807f399a9b45ea31c3dfa2276f15a2c3f00afff6e176e8", size = 85484, upload-time = "2025-04-24T22:06:22.219Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/7e/f5/f66802a942d491edb555dd61e3a9961140fd64c90bce1eafd741609d334d/httpcore-1.0.9-py3-none-any.whl", hash = "sha256:2d400746a40668fc9dec9810239072b40b4484b640a8c38fd654a024c7a1bf55", size = 78784, upload-time = "2025-04-24T22:06:20.566Z" },
+]
+
+[[package]]
+name = "httpx"
+version = "0.28.1"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "anyio" },
+    { name = "certifi" },
+    { name = "httpcore" },
+    { name = "idna" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/b1/df/48c586a5fe32a0f01324ee087459e112ebb7224f646c0b5023f5e79e9956/httpx-0.28.1.tar.gz", hash = "sha256:75e98c5f16b0f35b567856f597f06ff2270a374470a5c2392242528e3e3e42fc", size = 141406, upload-time = "2024-12-06T15:37:23.222Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/2a/39/e50c7c3a983047577ee07d2a9e53faf5a69493943ec3f6a384bdc792deb2/httpx-0.28.1-py3-none-any.whl", hash = "sha256:d909fcccc110f8c7faf814ca82a9a4d816bc5a6dbfea25d6591d6985b8ba59ad", size = 73517, upload-time = "2024-12-06T15:37:21.509Z" },
+]
+
+[[package]]
+name = "httpx-sse"
+version = "0.4.3"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/0f/4c/751061ffa58615a32c31b2d82e8482be8dd4a89154f003147acee90f2be9/httpx_sse-0.4.3.tar.gz", hash = "sha256:9b1ed0127459a66014aec3c56bebd93da3c1bc8bb6618c8082039a44889a755d", size = 15943, upload-time = "2025-10-10T21:48:22.271Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/d2/fd/6668e5aec43ab844de6fc74927e155a3b37bf40d7c3790e49fc0406b6578/httpx_sse-0.4.3-py3-none-any.whl", hash = "sha256:0ac1c9fe3c0afad2e0ebb25a934a59f4c7823b60792691f779fad2c5568830fc", size = 8960, upload-time = "2025-10-10T21:48:21.158Z" },
+]
+
+[[package]]
+name = "idna"
+version = "3.11"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/6f/6d/0703ccc57f3a7233505399edb88de3cbd678da106337b9fcde432b65ed60/idna-3.11.tar.gz", hash = "sha256:795dafcc9c04ed0c1fb032c2aa73654d8e8c5023a7df64a53f39190ada629902", size = 194582, upload-time = "2025-10-12T14:55:20.501Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/0e/61/66938bbb5fc52dbdf84594873d5b51fb1f7c7794e9c0f5bd885f30bc507b/idna-3.11-py3-none-any.whl", hash = "sha256:771a87f49d9defaf64091e6e6fe9c18d4833f140bd19464795bc32d966ca37ea", size = 71008, upload-time = "2025-10-12T14:55:18.883Z" },
+]
+
+[[package]]
+name = "importlib-metadata"
+version = "8.7.1"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "zipp" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/f3/49/3b30cad09e7771a4982d9975a8cbf64f00d4a1ececb53297f1d9a7be1b10/importlib_metadata-8.7.1.tar.gz", hash = "sha256:49fef1ae6440c182052f407c8d34a68f72efc36db9ca90dc0113398f2fdde8bb", size = 57107, upload-time = "2025-12-21T10:00:19.278Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/fa/5e/f8e9a1d23b9c20a551a8a02ea3637b4642e22c2626e3a13a9a29cdea99eb/importlib_metadata-8.7.1-py3-none-any.whl", hash = "sha256:5a1f80bf1daa489495071efbb095d75a634cf28a8bc299581244063b53176151", size = 27865, upload-time = "2025-12-21T10:00:18.329Z" },
+]
+
 [[package]]
 name = "iniconfig"
 version = "2.3.0"
@@ -197,6 +696,51 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/cb/b1/3846dd7f199d53cb17f49cba7e651e9ce294d8497c8c150530ed11865bb8/iniconfig-2.3.0-py3-none-any.whl", hash = "sha256:f631c04d2c48c52b84d0d0549c99ff3859c98df65b3101406327ecc7d53fbf12", size = 7484, upload-time = "2025-10-18T21:55:41.639Z" },
 ]
 
+[[package]]
+name = "jaraco-classes"
+version = "3.4.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "more-itertools" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/06/c0/ed4a27bc5571b99e3cff68f8a9fa5b56ff7df1c2251cc715a652ddd26402/jaraco.classes-3.4.0.tar.gz", hash = "sha256:47a024b51d0239c0dd8c8540c6c7f484be3b8fcf0b2d85c13825780d3b3f3acd", size = 11780, upload-time = "2024-03-31T07:27:36.643Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/7f/66/b15ce62552d84bbfcec9a4873ab79d993a1dd4edb922cbfccae192bd5b5f/jaraco.classes-3.4.0-py3-none-any.whl", hash = "sha256:f662826b6bed8cace05e7ff873ce0f9283b5c924470fe664fff1c2f00f581790", size = 6777, upload-time = "2024-03-31T07:27:34.792Z" },
+]
+
+[[package]]
+name = "jaraco-context"
+version = "6.1.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "backports-tarfile", marker = "python_full_version < '3.12'" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/cb/9c/a788f5bb29c61e456b8ee52ce76dbdd32fd72cd73dd67bc95f42c7a8d13c/jaraco_context-6.1.0.tar.gz", hash = "sha256:129a341b0a85a7db7879e22acd66902fda67882db771754574338898b2d5d86f", size = 15850, upload-time = "2026-01-13T02:53:53.847Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/8d/48/aa685dbf1024c7bd82bede569e3a85f82c32fd3d79ba5fea578f0159571a/jaraco_context-6.1.0-py3-none-any.whl", hash = "sha256:a43b5ed85815223d0d3cfdb6d7ca0d2bc8946f28f30b6f3216bda070f68badda", size = 7065, upload-time = "2026-01-13T02:53:53.031Z" },
+]
+
+[[package]]
+name = "jaraco-functools"
+version = "4.4.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "more-itertools" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/0f/27/056e0638a86749374d6f57d0b0db39f29509cce9313cf91bdc0ac4d91084/jaraco_functools-4.4.0.tar.gz", hash = "sha256:da21933b0417b89515562656547a77b4931f98176eb173644c0d35032a33d6bb", size = 19943, upload-time = "2025-12-21T09:29:43.6Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/fd/c4/813bb09f0985cb21e959f21f2464169eca882656849adf727ac7bb7e1767/jaraco_functools-4.4.0-py3-none-any.whl", hash = "sha256:9eec1e36f45c818d9bf307c8948eb03b2b56cd44087b3cdc989abca1f20b9176", size = 10481, upload-time = "2025-12-21T09:29:42.27Z" },
+]
+
+[[package]]
+name = "jeepney"
+version = "0.9.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/7b/6f/357efd7602486741aa73ffc0617fb310a29b588ed0fd69c2399acbb85b0c/jeepney-0.9.0.tar.gz", hash = "sha256:cf0e9e845622b81e4a28df94c40345400256ec608d0e55bb8a3feaa9163f5732", size = 106758, upload-time = "2025-02-27T18:51:01.684Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/b2/a3/e137168c9c44d18eff0376253da9f1e9234d0239e0ee230d2fee6cea8e55/jeepney-0.9.0-py3-none-any.whl", hash = "sha256:97e5714520c16fc0a45695e5365a2e11b81ea79bba796e26f9f1d178cb182683", size = 49010, upload-time = "2025-02-27T18:51:00.104Z" },
+]
+
 [[package]]
 name = "jinja2"
 version = "3.1.6"
@@ -209,6 +753,15 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/62/a1/3d680cbfd5f4b8f15abc1d571870c5fc3e594bb582bc3b64ea099db13e56/jinja2-3.1.6-py3-none-any.whl", hash = "sha256:85ece4451f492d0c13c5dd7c13a64681a86afae63a5f347908daf103ce6d2f67", size = 134899, upload-time = "2025-03-05T20:05:00.369Z" },
 ]
 
+[[package]]
+name = "jsonref"
+version = "1.1.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/aa/0d/c1f3277e90ccdb50d33ed5ba1ec5b3f0a242ed8c1b1a85d3afeb68464dca/jsonref-1.1.0.tar.gz", hash = "sha256:32fe8e1d85af0fdefbebce950af85590b22b60f9e95443176adbde4e1ecea552", size = 8814, upload-time = "2023-01-16T16:10:04.455Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/0c/ec/e1db9922bceb168197a558a2b8c03a7963f1afe93517ddd3cf99f202f996/jsonref-1.1.0-py3-none-any.whl", hash = "sha256:590dc7773df6c21cbf948b5dac07a72a251db28b0238ceecce0a2abfa8ec30a9", size = 9425, upload-time = "2023-01-16T16:10:02.255Z" },
+]
+
 [[package]]
 name = "jsonschema"
 version = "4.26.0"
@@ -224,6 +777,21 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/69/90/f63fb5873511e014207a475e2bb4e8b2e570d655b00ac19a9a0ca0a385ee/jsonschema-4.26.0-py3-none-any.whl", hash = "sha256:d489f15263b8d200f8387e64b4c3a75f06629559fb73deb8fdfb525f2dab50ce", size = 90630, upload-time = "2026-01-07T13:41:05.306Z" },
 ]
 
+[[package]]
+name = "jsonschema-path"
+version = "0.3.4"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "pathable" },
+    { name = "pyyaml" },
+    { name = "referencing" },
+    { name = "requests" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/6e/45/41ebc679c2a4fced6a722f624c18d658dee42612b83ea24c1caf7c0eb3a8/jsonschema_path-0.3.4.tar.gz", hash = "sha256:8365356039f16cc65fddffafda5f58766e34bebab7d6d105616ab52bc4297001", size = 11159, upload-time = "2025-01-24T14:33:16.547Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/cb/58/3485da8cb93d2f393bce453adeef16896751f14ba3e2024bc21dc9597646/jsonschema_path-0.3.4-py3-none-any.whl", hash = "sha256:f502191fdc2b22050f9a81c9237be9d27145b9001c55842bece5e94e382e52f8", size = 14810, upload-time = "2025-01-24T14:33:14.652Z" },
+]
+
 [[package]]
 name = "jsonschema-specifications"
 version = "2025.9.1"
@@ -236,6 +804,24 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/41/45/1a4ed80516f02155c51f51e8cedb3c1902296743db0bbc66608a0db2814f/jsonschema_specifications-2025.9.1-py3-none-any.whl", hash = "sha256:98802fee3a11ee76ecaca44429fda8a41bff98b00a0f2838151b113f210cc6fe", size = 18437, upload-time = "2025-09-08T01:34:57.871Z" },
 ]
 
+[[package]]
+name = "keyring"
+version = "25.7.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "importlib-metadata", marker = "python_full_version < '3.12'" },
+    { name = "jaraco-classes" },
+    { name = "jaraco-context" },
+    { name = "jaraco-functools" },
+    { name = "jeepney", marker = "sys_platform == 'linux'" },
+    { name = "pywin32-ctypes", marker = "sys_platform == 'win32'" },
+    { name = "secretstorage", marker = "sys_platform == 'linux'" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/43/4b/674af6ef2f97d56f0ab5153bf0bfa28ccb6c3ed4d1babf4305449668807b/keyring-25.7.0.tar.gz", hash = "sha256:fe01bd85eb3f8fb3dd0405defdeac9a5b4f6f0439edbb3149577f244a2e8245b", size = 63516, upload-time = "2025-11-16T16:26:09.482Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/81/db/e655086b7f3a705df045bf0933bdd9c2f79bb3c97bfef1384598bb79a217/keyring-25.7.0-py3-none-any.whl", hash = "sha256:be4a0b195f149690c166e850609a477c532ddbfbaed96a404d4e43f8d5e2689f", size = 39160, upload-time = "2025-11-16T16:26:08.402Z" },
+]
+
 [[package]]
 name = "librt"
 version = "0.7.7"
@@ -299,6 +885,69 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/51/0e/b756c7708143a63fca65a51ca07990fa647db2cc8fcd65177b9e96680255/librt-0.7.7-cp314-cp314t-win_arm64.whl", hash = "sha256:142c2cd91794b79fd0ce113bd658993b7ede0fe93057668c2f98a45ca00b7e91", size = 39724, upload-time = "2026-01-01T23:52:09.745Z" },
 ]
 
+[[package]]
+name = "lupa"
+version = "2.6"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/b8/1c/191c3e6ec6502e3dbe25a53e27f69a5daeac3e56de1f73c0138224171ead/lupa-2.6.tar.gz", hash = "sha256:9a770a6e89576be3447668d7ced312cd6fd41d3c13c2462c9dc2c2ab570e45d9", size = 7240282, upload-time = "2025-10-24T07:20:29.738Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/ca/29/1f66907c1ebf1881735afa695e646762c674f00738ebf66d795d59fc0665/lupa-2.6-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:6d988c0f9331b9f2a5a55186701a25444ab10a1432a1021ee58011499ecbbdd5", size = 962875, upload-time = "2025-10-24T07:17:39.107Z" },
+    { url = "https://files.pythonhosted.org/packages/e6/67/4a748604be360eb9c1c215f6a0da921cd1a2b44b2c5951aae6fb83019d3a/lupa-2.6-cp311-cp311-macosx_11_0_universal2.whl", hash = "sha256:ebe1bbf48259382c72a6fe363dea61a0fd6fe19eab95e2ae881e20f3654587bf", size = 1935390, upload-time = "2025-10-24T07:17:41.427Z" },
+    { url = "https://files.pythonhosted.org/packages/ac/0c/8ef9ee933a350428b7bdb8335a37ef170ab0bb008bbf9ca8f4f4310116b6/lupa-2.6-cp311-cp311-macosx_11_0_x86_64.whl", hash = "sha256:a8fcee258487cf77cdd41560046843bb38c2e18989cd19671dd1e2596f798306", size = 992193, upload-time = "2025-10-24T07:17:43.231Z" },
+    { url = "https://files.pythonhosted.org/packages/65/46/e6c7facebdb438db8a65ed247e56908818389c1a5abbf6a36aab14f1057d/lupa-2.6-cp311-cp311-manylinux2010_i686.manylinux_2_12_i686.manylinux_2_28_i686.whl", hash = "sha256:561a8e3be800827884e767a694727ed8482d066e0d6edfcbf423b05e63b05535", size = 1165844, upload-time = "2025-10-24T07:17:45.437Z" },
+    { url = "https://files.pythonhosted.org/packages/1c/26/9f1154c6c95f175ccbf96aa96c8f569c87f64f463b32473e839137601a8b/lupa-2.6-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:af880a62d47991cae78b8e9905c008cbfdc4a3a9723a66310c2634fc7644578c", size = 1048069, upload-time = "2025-10-24T07:17:47.181Z" },
+    { url = "https://files.pythonhosted.org/packages/68/67/2cc52ab73d6af81612b2ea24c870d3fa398443af8e2875e5befe142398b1/lupa-2.6-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:80b22923aa4023c86c0097b235615f89d469a0c4eee0489699c494d3367c4c85", size = 2079079, upload-time = "2025-10-24T07:17:49.755Z" },
+    { url = "https://files.pythonhosted.org/packages/2e/dc/f843f09bbf325f6e5ee61730cf6c3409fc78c010d968c7c78acba3019ca7/lupa-2.6-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:153d2cc6b643f7efb9cfc0c6bb55ec784d5bac1a3660cfc5b958a7b8f38f4a75", size = 1071428, upload-time = "2025-10-24T07:17:51.991Z" },
+    { url = "https://files.pythonhosted.org/packages/2e/60/37533a8d85bf004697449acb97ecdacea851acad28f2ad3803662487dd2a/lupa-2.6-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:3fa8777e16f3ded50b72967dc17e23f5a08e4f1e2c9456aff2ebdb57f5b2869f", size = 1181756, upload-time = "2025-10-24T07:17:53.752Z" },
+    { url = "https://files.pythonhosted.org/packages/e4/f2/cf29b20dbb4927b6a3d27c339ac5d73e74306ecc28c8e2c900b2794142ba/lupa-2.6-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:8dbdcbe818c02a2f56f5ab5ce2de374dab03e84b25266cfbaef237829bc09b3f", size = 2175687, upload-time = "2025-10-24T07:17:56.228Z" },
+    { url = "https://files.pythonhosted.org/packages/94/7c/050e02f80c7131b63db1474bff511e63c545b5a8636a24cbef3fc4da20b6/lupa-2.6-cp311-cp311-win32.whl", hash = "sha256:defaf188fde8f7a1e5ce3a5e6d945e533b8b8d547c11e43b96c9b7fe527f56dc", size = 1412592, upload-time = "2025-10-24T07:17:59.062Z" },
+    { url = "https://files.pythonhosted.org/packages/6f/9a/6f2af98aa5d771cea661f66c8eb8f53772ec1ab1dfbce24126cfcd189436/lupa-2.6-cp311-cp311-win_amd64.whl", hash = "sha256:9505ae600b5c14f3e17e70f87f88d333717f60411faca1ddc6f3e61dce85fa9e", size = 1669194, upload-time = "2025-10-24T07:18:01.647Z" },
+    { url = "https://files.pythonhosted.org/packages/94/86/ce243390535c39d53ea17ccf0240815e6e457e413e40428a658ea4ee4b8d/lupa-2.6-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:47ce718817ef1cc0c40d87c3d5ae56a800d61af00fbc0fad1ca9be12df2f3b56", size = 951707, upload-time = "2025-10-24T07:18:03.884Z" },
+    { url = "https://files.pythonhosted.org/packages/86/85/cedea5e6cbeb54396fdcc55f6b741696f3f036d23cfaf986d50d680446da/lupa-2.6-cp312-cp312-macosx_11_0_universal2.whl", hash = "sha256:7aba985b15b101495aa4b07112cdc08baa0c545390d560ad5cfde2e9e34f4d58", size = 1916703, upload-time = "2025-10-24T07:18:05.6Z" },
+    { url = "https://files.pythonhosted.org/packages/24/be/3d6b5f9a8588c01a4d88129284c726017b2089f3a3fd3ba8bd977292fea0/lupa-2.6-cp312-cp312-macosx_11_0_x86_64.whl", hash = "sha256:b766f62f95b2739f2248977d29b0722e589dcf4f0ccfa827ccbd29f0148bd2e5", size = 985152, upload-time = "2025-10-24T07:18:08.561Z" },
+    { url = "https://files.pythonhosted.org/packages/eb/23/9f9a05beee5d5dce9deca4cb07c91c40a90541fc0a8e09db4ee670da550f/lupa-2.6-cp312-cp312-manylinux2010_i686.manylinux_2_12_i686.manylinux_2_28_i686.whl", hash = "sha256:00a934c23331f94cb51760097ebfab14b005d55a6b30a2b480e3c53dd2fa290d", size = 1159599, upload-time = "2025-10-24T07:18:10.346Z" },
+    { url = "https://files.pythonhosted.org/packages/40/4e/e7c0583083db9d7f1fd023800a9767d8e4391e8330d56c2373d890ac971b/lupa-2.6-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:21de9f38bd475303e34a042b7081aabdf50bd9bafd36ce4faea2f90fd9f15c31", size = 1038686, upload-time = "2025-10-24T07:18:12.112Z" },
+    { url = "https://files.pythonhosted.org/packages/1c/9f/5a4f7d959d4feba5e203ff0c31889e74d1ca3153122be4a46dca7d92bf7c/lupa-2.6-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:cf3bda96d3fc41237e964a69c23647d50d4e28421111360274d4799832c560e9", size = 2071956, upload-time = "2025-10-24T07:18:14.572Z" },
+    { url = "https://files.pythonhosted.org/packages/92/34/2f4f13ca65d01169b1720176aedc4af17bc19ee834598c7292db232cb6dc/lupa-2.6-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:5a76ead245da54801a81053794aa3975f213221f6542d14ec4b859ee2e7e0323", size = 1057199, upload-time = "2025-10-24T07:18:16.379Z" },
+    { url = "https://files.pythonhosted.org/packages/35/2a/5f7d2eebec6993b0dcd428e0184ad71afb06a45ba13e717f6501bfed1da3/lupa-2.6-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:8dd0861741caa20886ddbda0a121d8e52fb9b5bb153d82fa9bba796962bf30e8", size = 1173693, upload-time = "2025-10-24T07:18:18.153Z" },
+    { url = "https://files.pythonhosted.org/packages/e4/29/089b4d2f8e34417349af3904bb40bec40b65c8731f45e3fd8d497ca573e5/lupa-2.6-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:239e63948b0b23023f81d9a19a395e768ed3da6a299f84e7963b8f813f6e3f9c", size = 2164394, upload-time = "2025-10-24T07:18:20.403Z" },
+    { url = "https://files.pythonhosted.org/packages/f3/1b/79c17b23c921f81468a111cad843b076a17ef4b684c4a8dff32a7969c3f0/lupa-2.6-cp312-cp312-win32.whl", hash = "sha256:325894e1099499e7a6f9c351147661a2011887603c71086d36fe0f964d52d1ce", size = 1420647, upload-time = "2025-10-24T07:18:23.368Z" },
+    { url = "https://files.pythonhosted.org/packages/b8/15/5121e68aad3584e26e1425a5c9a79cd898f8a152292059e128c206ee817c/lupa-2.6-cp312-cp312-win_amd64.whl", hash = "sha256:c735a1ce8ee60edb0fe71d665f1e6b7c55c6021f1d340eb8c865952c602cd36f", size = 1688529, upload-time = "2025-10-24T07:18:25.523Z" },
+    { url = "https://files.pythonhosted.org/packages/28/1d/21176b682ca5469001199d8b95fa1737e29957a3d185186e7a8b55345f2e/lupa-2.6-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:663a6e58a0f60e7d212017d6678639ac8df0119bc13c2145029dcba084391310", size = 947232, upload-time = "2025-10-24T07:18:27.878Z" },
+    { url = "https://files.pythonhosted.org/packages/ce/4c/d327befb684660ca13cf79cd1f1d604331808f9f1b6fb6bf57832f8edf80/lupa-2.6-cp313-cp313-macosx_11_0_universal2.whl", hash = "sha256:d1f5afda5c20b1f3217a80e9bc1b77037f8a6eb11612fd3ada19065303c8f380", size = 1908625, upload-time = "2025-10-24T07:18:29.944Z" },
+    { url = "https://files.pythonhosted.org/packages/66/8e/ad22b0a19454dfd08662237a84c792d6d420d36b061f239e084f29d1a4f3/lupa-2.6-cp313-cp313-macosx_11_0_x86_64.whl", hash = "sha256:26f2b3c085fe76e9119e48c1013c1cccdc1f51585d456858290475aa38e7089e", size = 981057, upload-time = "2025-10-24T07:18:31.553Z" },
+    { url = "https://files.pythonhosted.org/packages/5c/48/74859073ab276bd0566c719f9ca0108b0cfc1956ca0d68678d117d47d155/lupa-2.6-cp313-cp313-manylinux2010_i686.manylinux_2_12_i686.manylinux_2_28_i686.whl", hash = "sha256:60d2f902c7b96fb8ab98493dcff315e7bb4d0b44dc9dd76eb37de575025d5685", size = 1156227, upload-time = "2025-10-24T07:18:33.981Z" },
+    { url = "https://files.pythonhosted.org/packages/09/6c/0e9ded061916877253c2266074060eb71ed99fb21d73c8c114a76725bce2/lupa-2.6-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:a02d25dee3a3250967c36590128d9220ae02f2eda166a24279da0b481519cbff", size = 1035752, upload-time = "2025-10-24T07:18:36.32Z" },
+    { url = "https://files.pythonhosted.org/packages/dd/ef/f8c32e454ef9f3fe909f6c7d57a39f950996c37a3deb7b391fec7903dab7/lupa-2.6-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:6eae1ee16b886b8914ff292dbefbf2f48abfbdee94b33a88d1d5475e02423203", size = 2069009, upload-time = "2025-10-24T07:18:38.072Z" },
+    { url = "https://files.pythonhosted.org/packages/53/dc/15b80c226a5225815a890ee1c11f07968e0aba7a852df41e8ae6fe285063/lupa-2.6-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:b0edd5073a4ee74ab36f74fe61450148e6044f3952b8d21248581f3c5d1a58be", size = 1056301, upload-time = "2025-10-24T07:18:40.165Z" },
+    { url = "https://files.pythonhosted.org/packages/31/14/2086c1425c985acfb30997a67e90c39457122df41324d3c179d6ee2292c6/lupa-2.6-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:0c53ee9f22a8a17e7d4266ad48e86f43771951797042dd51d1494aaa4f5f3f0a", size = 1170673, upload-time = "2025-10-24T07:18:42.426Z" },
+    { url = "https://files.pythonhosted.org/packages/10/e5/b216c054cf86576c0191bf9a9f05de6f7e8e07164897d95eea0078dca9b2/lupa-2.6-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:de7c0f157a9064a400d828789191a96da7f4ce889969a588b87ec80de9b14772", size = 2162227, upload-time = "2025-10-24T07:18:46.112Z" },
+    { url = "https://files.pythonhosted.org/packages/59/2f/33ecb5bedf4f3bc297ceacb7f016ff951331d352f58e7e791589609ea306/lupa-2.6-cp313-cp313-win32.whl", hash = "sha256:ee9523941ae0a87b5b703417720c5d78f72d2f5bc23883a2ea80a949a3ed9e75", size = 1419558, upload-time = "2025-10-24T07:18:48.371Z" },
+    { url = "https://files.pythonhosted.org/packages/f9/b4/55e885834c847ea610e111d87b9ed4768f0afdaeebc00cd46810f25029f6/lupa-2.6-cp313-cp313-win_amd64.whl", hash = "sha256:b1335a5835b0a25ebdbc75cf0bda195e54d133e4d994877ef025e218c2e59db9", size = 1683424, upload-time = "2025-10-24T07:18:50.976Z" },
+    { url = "https://files.pythonhosted.org/packages/66/9d/d9427394e54d22a35d1139ef12e845fd700d4872a67a34db32516170b746/lupa-2.6-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:dcb6d0a3264873e1653bc188499f48c1fb4b41a779e315eba45256cfe7bc33c1", size = 953818, upload-time = "2025-10-24T07:18:53.378Z" },
+    { url = "https://files.pythonhosted.org/packages/10/41/27bbe81953fb2f9ecfced5d9c99f85b37964cfaf6aa8453bb11283983721/lupa-2.6-cp314-cp314-macosx_11_0_universal2.whl", hash = "sha256:a37e01f2128f8c36106726cb9d360bac087d58c54b4522b033cc5691c584db18", size = 1915850, upload-time = "2025-10-24T07:18:55.259Z" },
+    { url = "https://files.pythonhosted.org/packages/a3/98/f9ff60db84a75ba8725506bbf448fb085bc77868a021998ed2a66d920568/lupa-2.6-cp314-cp314-macosx_11_0_x86_64.whl", hash = "sha256:458bd7e9ff3c150b245b0fcfbb9bd2593d1152ea7f0a7b91c1d185846da033fe", size = 982344, upload-time = "2025-10-24T07:18:57.05Z" },
+    { url = "https://files.pythonhosted.org/packages/41/f7/f39e0f1c055c3b887d86b404aaf0ca197b5edfd235a8b81b45b25bac7fc3/lupa-2.6-cp314-cp314-manylinux2010_i686.manylinux_2_12_i686.manylinux_2_28_i686.whl", hash = "sha256:052ee82cac5206a02df77119c325339acbc09f5ce66967f66a2e12a0f3211cad", size = 1156543, upload-time = "2025-10-24T07:18:59.251Z" },
+    { url = "https://files.pythonhosted.org/packages/9e/9c/59e6cffa0d672d662ae17bd7ac8ecd2c89c9449dee499e3eb13ca9cd10d9/lupa-2.6-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:96594eca3c87dd07938009e95e591e43d554c1dbd0385be03c100367141db5a8", size = 1047974, upload-time = "2025-10-24T07:19:01.449Z" },
+    { url = "https://files.pythonhosted.org/packages/23/c6/a04e9cef7c052717fcb28fb63b3824802488f688391895b618e39be0f684/lupa-2.6-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:e8faddd9d198688c8884091173a088a8e920ecc96cda2ffed576a23574c4b3f6", size = 2073458, upload-time = "2025-10-24T07:19:03.369Z" },
+    { url = "https://files.pythonhosted.org/packages/e6/10/824173d10f38b51fc77785228f01411b6ca28826ce27404c7c912e0e442c/lupa-2.6-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:daebb3a6b58095c917e76ba727ab37b27477fb926957c825205fbda431552134", size = 1067683, upload-time = "2025-10-24T07:19:06.2Z" },
+    { url = "https://files.pythonhosted.org/packages/b6/dc/9692fbcf3c924d9c4ece2d8d2f724451ac2e09af0bd2a782db1cef34e799/lupa-2.6-cp314-cp314-musllinux_1_2_i686.whl", hash = "sha256:f3154e68972befe0f81564e37d8142b5d5d79931a18309226a04ec92487d4ea3", size = 1171892, upload-time = "2025-10-24T07:19:08.544Z" },
+    { url = "https://files.pythonhosted.org/packages/84/ff/e318b628d4643c278c96ab3ddea07fc36b075a57383c837f5b11e537ba9d/lupa-2.6-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:e4dadf77b9fedc0bfa53417cc28dc2278a26d4cbd95c29f8927ad4d8fe0a7ef9", size = 2166641, upload-time = "2025-10-24T07:19:10.485Z" },
+    { url = "https://files.pythonhosted.org/packages/12/f7/a6f9ec2806cf2d50826980cdb4b3cffc7691dc6f95e13cc728846d5cb793/lupa-2.6-cp314-cp314-win32.whl", hash = "sha256:cb34169c6fa3bab3e8ac58ca21b8a7102f6a94b6a5d08d3636312f3f02fafd8f", size = 1456857, upload-time = "2025-10-24T07:19:37.989Z" },
+    { url = "https://files.pythonhosted.org/packages/c5/de/df71896f25bdc18360fdfa3b802cd7d57d7fede41a0e9724a4625b412c85/lupa-2.6-cp314-cp314-win_amd64.whl", hash = "sha256:b74f944fe46c421e25d0f8692aef1e842192f6f7f68034201382ac440ef9ea67", size = 1731191, upload-time = "2025-10-24T07:19:40.281Z" },
+    { url = "https://files.pythonhosted.org/packages/47/3c/a1f23b01c54669465f5f4c4083107d496fbe6fb45998771420e9aadcf145/lupa-2.6-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:0e21b716408a21ab65723f8841cf7f2f37a844b7a965eeabb785e27fca4099cf", size = 999343, upload-time = "2025-10-24T07:19:12.519Z" },
+    { url = "https://files.pythonhosted.org/packages/c5/6d/501994291cb640bfa2ccf7f554be4e6914afa21c4026bd01bff9ca8aac57/lupa-2.6-cp314-cp314t-macosx_11_0_universal2.whl", hash = "sha256:589db872a141bfff828340079bbdf3e9a31f2689f4ca0d88f97d9e8c2eae6142", size = 2000730, upload-time = "2025-10-24T07:19:14.869Z" },
+    { url = "https://files.pythonhosted.org/packages/53/a5/457ffb4f3f20469956c2d4c4842a7675e884efc895b2f23d126d23e126cc/lupa-2.6-cp314-cp314t-macosx_11_0_x86_64.whl", hash = "sha256:cd852a91a4a9d4dcbb9a58100f820a75a425703ec3e3f049055f60b8533b7953", size = 1021553, upload-time = "2025-10-24T07:19:17.123Z" },
+    { url = "https://files.pythonhosted.org/packages/51/6b/36bb5a5d0960f2a5c7c700e0819abb76fd9bf9c1d8a66e5106416d6e9b14/lupa-2.6-cp314-cp314t-manylinux2010_i686.manylinux_2_12_i686.manylinux_2_28_i686.whl", hash = "sha256:0334753be028358922415ca97a64a3048e4ed155413fc4eaf87dd0a7e2752983", size = 1133275, upload-time = "2025-10-24T07:19:20.51Z" },
+    { url = "https://files.pythonhosted.org/packages/19/86/202ff4429f663013f37d2229f6176ca9f83678a50257d70f61a0a97281bf/lupa-2.6-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:661d895cd38c87658a34780fac54a690ec036ead743e41b74c3fb81a9e65a6aa", size = 1038441, upload-time = "2025-10-24T07:19:22.509Z" },
+    { url = "https://files.pythonhosted.org/packages/a7/42/d8125f8e420714e5b52e9c08d88b5329dfb02dcca731b4f21faaee6cc5b5/lupa-2.6-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:6aa58454ccc13878cc177c62529a2056be734da16369e451987ff92784994ca7", size = 2058324, upload-time = "2025-10-24T07:19:24.979Z" },
+    { url = "https://files.pythonhosted.org/packages/2b/2c/47bf8b84059876e877a339717ddb595a4a7b0e8740bacae78ba527562e1c/lupa-2.6-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:1425017264e470c98022bba8cff5bd46d054a827f5df6b80274f9cc71dafd24f", size = 1060250, upload-time = "2025-10-24T07:19:27.262Z" },
+    { url = "https://files.pythonhosted.org/packages/c2/06/d88add2b6406ca1bdec99d11a429222837ca6d03bea42ca75afa169a78cb/lupa-2.6-cp314-cp314t-musllinux_1_2_i686.whl", hash = "sha256:224af0532d216e3105f0a127410f12320f7c5f1aa0300bdf9646b8d9afb0048c", size = 1151126, upload-time = "2025-10-24T07:19:29.522Z" },
+    { url = "https://files.pythonhosted.org/packages/b4/a0/89e6a024c3b4485b89ef86881c9d55e097e7cb0bdb74efb746f2fa6a9a76/lupa-2.6-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:9abb98d5a8fd27c8285302e82199f0e56e463066f88f619d6594a450bf269d80", size = 2153693, upload-time = "2025-10-24T07:19:31.379Z" },
+    { url = "https://files.pythonhosted.org/packages/b6/36/a0f007dc58fc1bbf51fb85dcc82fcb1f21b8c4261361de7dab0e3d8521ef/lupa-2.6-cp314-cp314t-win32.whl", hash = "sha256:1849efeba7a8f6fb8aa2c13790bee988fd242ae404bd459509640eeea3d1e291", size = 1590104, upload-time = "2025-10-24T07:19:33.514Z" },
+    { url = "https://files.pythonhosted.org/packages/7d/5e/db903ce9cf82c48d6b91bf6d63ae4c8d0d17958939a4e04ba6b9f38b8643/lupa-2.6-cp314-cp314t-win_amd64.whl", hash = "sha256:fc1498d1a4fc028bc521c26d0fad4ca00ed63b952e32fb95949bda76a04bad52", size = 1913818, upload-time = "2025-10-24T07:19:36.039Z" },
+]
+
 [[package]]
 name = "markdown-it-py"
 version = "4.0.0"
@@ -385,6 +1034,31 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/70/bc/6f1c2f612465f5fa89b95bead1f44dcb607670fd42891d8fdcd5d039f4f4/markupsafe-3.0.3-cp314-cp314t-win_arm64.whl", hash = "sha256:32001d6a8fc98c8cb5c947787c5d08b0a50663d139f1305bac5885d98d9b40fa", size = 14146, upload-time = "2025-09-27T18:37:28.327Z" },
 ]
 
+[[package]]
+name = "mcp"
+version = "1.26.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "anyio" },
+    { name = "httpx" },
+    { name = "httpx-sse" },
+    { name = "jsonschema" },
+    { name = "pydantic" },
+    { name = "pydantic-settings" },
+    { name = "pyjwt", extra = ["crypto"] },
+    { name = "python-multipart" },
+    { name = "pywin32", marker = "sys_platform == 'win32'" },
+    { name = "sse-starlette" },
+    { name = "starlette" },
+    { name = "typing-extensions" },
+    { name = "typing-inspection" },
+    { name = "uvicorn", marker = "sys_platform != 'emscripten'" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/fc/6d/62e76bbb8144d6ed86e202b5edd8a4cb631e7c8130f3f4893c3f90262b10/mcp-1.26.0.tar.gz", hash = "sha256:db6e2ef491eecc1a0d93711a76f28dec2e05999f93afd48795da1c1137142c66", size = 608005, upload-time = "2026-01-24T19:40:32.468Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/fd/d9/eaa1f80170d2b7c5ba23f3b59f766f3a0bb41155fbc32a69adfa1adaaef9/mcp-1.26.0-py3-none-any.whl", hash = "sha256:904a21c33c25aa98ddbeb47273033c435e595bbacfdb177f4bd87f6dceebe1ca", size = 233615, upload-time = "2026-01-24T19:40:30.652Z" },
+]
+
 [[package]]
 name = "mdurl"
 version = "0.1.2"
@@ -394,6 +1068,15 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/b3/38/89ba8ad64ae25be8de66a6d463314cf1eb366222074cfda9ee839c56a4b4/mdurl-0.1.2-py3-none-any.whl", hash = "sha256:84008a41e51615a49fc9966191ff91509e3c40b939176e643fd50a5c2196b8f8", size = 9979, upload-time = "2022-08-14T12:40:09.779Z" },
 ]
 
+[[package]]
+name = "more-itertools"
+version = "10.8.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/ea/5d/38b681d3fce7a266dd9ab73c66959406d565b3e85f21d5e66e1181d93721/more_itertools-10.8.0.tar.gz", hash = "sha256:f638ddf8a1a0d134181275fb5d58b086ead7c6a72429ad725c67503f13ba30bd", size = 137431, upload-time = "2025-09-02T15:23:11.018Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/a4/8e/469e5a4a2f5855992e425f3cb33804cc07bf18d48f2db061aec61ce50270/more_itertools-10.8.0-py3-none-any.whl", hash = "sha256:52d4362373dcf7c52546bc4af9a86ee7c4579df9a8dc268be0a2f949d376cc9b", size = 69667, upload-time = "2025-09-02T15:23:09.635Z" },
+]
+
 [[package]]
 name = "mypy"
 version = "1.19.1"
@@ -442,6 +1125,31 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/79/7b/2c79738432f5c924bef5071f933bcc9efd0473bac3b4aa584a6f7c1c8df8/mypy_extensions-1.1.0-py3-none-any.whl", hash = "sha256:1be4cccdb0f2482337c4743e60421de3a356cd97508abadd57d47403e94f5505", size = 4963, upload-time = "2025-04-22T14:54:22.983Z" },
 ]
 
+[[package]]
+name = "openapi-pydantic"
+version = "0.5.1"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "pydantic" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/02/2e/58d83848dd1a79cb92ed8e63f6ba901ca282c5f09d04af9423ec26c56fd7/openapi_pydantic-0.5.1.tar.gz", hash = "sha256:ff6835af6bde7a459fb93eb93bb92b8749b754fc6e51b2f1590a19dc3005ee0d", size = 60892, upload-time = "2025-01-08T19:29:27.083Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/12/cf/03675d8bd8ecbf4445504d8071adab19f5f993676795708e36402ab38263/openapi_pydantic-0.5.1-py3-none-any.whl", hash = "sha256:a3a09ef4586f5bd760a8df7f43028b60cafb6d9f61de2acba9574766255ab146", size = 96381, upload-time = "2025-01-08T19:29:25.275Z" },
+]
+
+[[package]]
+name = "opentelemetry-api"
+version = "1.39.1"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "importlib-metadata" },
+    { name = "typing-extensions" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/97/b9/3161be15bb8e3ad01be8be5a968a9237c3027c5be504362ff800fca3e442/opentelemetry_api-1.39.1.tar.gz", hash = "sha256:fbde8c80e1b937a2c61f20347e91c0c18a1940cecf012d62e65a7caf08967c9c", size = 65767, upload-time = "2025-12-11T13:32:39.182Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/cf/df/d3f1ddf4bb4cb50ed9b1139cc7b1c54c34a1e7ce8fd1b9a37c0d1551a6bd/opentelemetry_api-1.39.1-py3-none-any.whl", hash = "sha256:2edd8463432a7f8443edce90972169b195e7d6a05500cd29e6d13898187c9950", size = 66356, upload-time = "2025-12-11T13:32:17.304Z" },
+]
+
 [[package]]
 name = "packaging"
 version = "25.0"
@@ -451,6 +1159,15 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/20/12/38679034af332785aac8774540895e234f4d07f7545804097de4b666afd8/packaging-25.0-py3-none-any.whl", hash = "sha256:29572ef2b1f17581046b3a2227d5c611fb25ec70ca1ba8554b24b0e69331a484", size = 66469, upload-time = "2025-04-19T11:48:57.875Z" },
 ]
 
+[[package]]
+name = "pathable"
+version = "0.4.4"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/67/93/8f2c2075b180c12c1e9f6a09d1a985bc2036906b13dff1d8917e395f2048/pathable-0.4.4.tar.gz", hash = "sha256:6905a3cd17804edfac7875b5f6c9142a218c7caef78693c2dbbbfbac186d88b2", size = 8124, upload-time = "2025-01-10T18:43:13.247Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/7d/eb/b6260b31b1a96386c0a880edebe26f89669098acea8e0318bff6adb378fd/pathable-0.4.4-py3-none-any.whl", hash = "sha256:5ae9e94793b6ef5a4cbe0a7ce9dbbefc1eec38df253763fd0aeeacf2762dbbc2", size = 9592, upload-time = "2025-01-10T18:43:11.88Z" },
+]
+
 [[package]]
 name = "pathspec"
 version = "1.0.3"
@@ -460,6 +1177,24 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/32/2b/121e912bd60eebd623f873fd090de0e84f322972ab25a7f9044c056804ed/pathspec-1.0.3-py3-none-any.whl", hash = "sha256:e80767021c1cc524aa3fb14bedda9c34406591343cc42797b386ce7b9354fb6c", size = 55021, upload-time = "2026-01-09T15:46:44.652Z" },
 ]
 
+[[package]]
+name = "pathvalidate"
+version = "3.3.1"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/fa/2a/52a8da6fe965dea6192eb716b357558e103aea0a1e9a8352ad575a8406ca/pathvalidate-3.3.1.tar.gz", hash = "sha256:b18c07212bfead624345bb8e1d6141cdcf15a39736994ea0b94035ad2b1ba177", size = 63262, upload-time = "2025-06-15T09:07:20.736Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/9a/70/875f4a23bfc4731703a5835487d0d2fb999031bd415e7d17c0ae615c18b7/pathvalidate-3.3.1-py3-none-any.whl", hash = "sha256:5263baab691f8e1af96092fa5137ee17df5bdfbd6cff1fcac4d6ef4bc2e1735f", size = 24305, upload-time = "2025-06-15T09:07:19.117Z" },
+]
+
+[[package]]
+name = "platformdirs"
+version = "4.5.1"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/cf/86/0248f086a84f01b37aaec0fa567b397df1a119f73c16f6c7a9aac73ea309/platformdirs-4.5.1.tar.gz", hash = "sha256:61d5cdcc6065745cdd94f0f878977f8de9437be93de97c1c12f853c9c0cdcbda", size = 21715, upload-time = "2025-12-05T13:52:58.638Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/cb/28/3bfe2fa5a7b9c46fe7e13c97bda14c895fb10fa2ebf1d0abb90e0cea7ee1/platformdirs-4.5.1-py3-none-any.whl", hash = "sha256:d03afa3963c806a9bed9d5125c8f4cb2fdaf74a55ab60e5d59b3fde758104d31", size = 18731, upload-time = "2025-12-05T13:52:56.823Z" },
+]
+
 [[package]]
 name = "pluggy"
 version = "1.6.0"
@@ -469,6 +1204,218 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/54/20/4d324d65cc6d9205fabedc306948156824eb9f0ee1633355a8f7ec5c66bf/pluggy-1.6.0-py3-none-any.whl", hash = "sha256:e920276dd6813095e9377c0bc5566d94c932c33b27a3e3945d8389c374dd4746", size = 20538, upload-time = "2025-05-15T12:30:06.134Z" },
 ]
 
+[[package]]
+name = "prometheus-client"
+version = "0.24.1"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/f0/58/a794d23feb6b00fc0c72787d7e87d872a6730dd9ed7c7b3e954637d8f280/prometheus_client-0.24.1.tar.gz", hash = "sha256:7e0ced7fbbd40f7b84962d5d2ab6f17ef88a72504dcf7c0b40737b43b2a461f9", size = 85616, upload-time = "2026-01-14T15:26:26.965Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/74/c3/24a2f845e3917201628ecaba4f18bab4d18a337834c1df2a159ee9d22a42/prometheus_client-0.24.1-py3-none-any.whl", hash = "sha256:150db128af71a5c2482b36e588fc8a6b95e498750da4b17065947c16070f4055", size = 64057, upload-time = "2026-01-14T15:26:24.42Z" },
+]
+
+[[package]]
+name = "py-key-value-aio"
+version = "0.3.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "beartype" },
+    { name = "py-key-value-shared" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/93/ce/3136b771dddf5ac905cc193b461eb67967cf3979688c6696e1f2cdcde7ea/py_key_value_aio-0.3.0.tar.gz", hash = "sha256:858e852fcf6d696d231266da66042d3355a7f9871650415feef9fca7a6cd4155", size = 50801, upload-time = "2025-11-17T16:50:04.711Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/99/10/72f6f213b8f0bce36eff21fda0a13271834e9eeff7f9609b01afdc253c79/py_key_value_aio-0.3.0-py3-none-any.whl", hash = "sha256:1c781915766078bfd608daa769fefb97e65d1d73746a3dfb640460e322071b64", size = 96342, upload-time = "2025-11-17T16:50:03.801Z" },
+]
+
+[package.optional-dependencies]
+disk = [
+    { name = "diskcache" },
+    { name = "pathvalidate" },
+]
+keyring = [
+    { name = "keyring" },
+]
+memory = [
+    { name = "cachetools" },
+]
+redis = [
+    { name = "redis" },
+]
+
+[[package]]
+name = "py-key-value-shared"
+version = "0.3.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "beartype" },
+    { name = "typing-extensions" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/7b/e4/1971dfc4620a3a15b4579fe99e024f5edd6e0967a71154771a059daff4db/py_key_value_shared-0.3.0.tar.gz", hash = "sha256:8fdd786cf96c3e900102945f92aa1473138ebe960ef49da1c833790160c28a4b", size = 11666, upload-time = "2025-11-17T16:50:06.849Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/51/e4/b8b0a03ece72f47dce2307d36e1c34725b7223d209fc679315ffe6a4e2c3/py_key_value_shared-0.3.0-py3-none-any.whl", hash = "sha256:5b0efba7ebca08bb158b1e93afc2f07d30b8f40c2fc12ce24a4c0d84f42f9298", size = 19560, upload-time = "2025-11-17T16:50:05.954Z" },
+]
+
+[[package]]
+name = "pycparser"
+version = "3.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/1b/7d/92392ff7815c21062bea51aa7b87d45576f649f16458d78b7cf94b9ab2e6/pycparser-3.0.tar.gz", hash = "sha256:600f49d217304a5902ac3c37e1281c9fe94e4d0489de643a9504c5cdfdfc6b29", size = 103492, upload-time = "2026-01-21T14:26:51.89Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/0c/c3/44f3fbbfa403ea2a7c779186dc20772604442dde72947e7d01069cbe98e3/pycparser-3.0-py3-none-any.whl", hash = "sha256:b727414169a36b7d524c1c3e31839a521725078d7b2ff038656844266160a992", size = 48172, upload-time = "2026-01-21T14:26:50.693Z" },
+]
+
+[[package]]
+name = "pydantic"
+version = "2.12.5"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "annotated-types" },
+    { name = "pydantic-core" },
+    { name = "typing-extensions" },
+    { name = "typing-inspection" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/69/44/36f1a6e523abc58ae5f928898e4aca2e0ea509b5aa6f6f392a5d882be928/pydantic-2.12.5.tar.gz", hash = "sha256:4d351024c75c0f085a9febbb665ce8c0c6ec5d30e903bdb6394b7ede26aebb49", size = 821591, upload-time = "2025-11-26T15:11:46.471Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/5a/87/b70ad306ebb6f9b585f114d0ac2137d792b48be34d732d60e597c2f8465a/pydantic-2.12.5-py3-none-any.whl", hash = "sha256:e561593fccf61e8a20fc46dfc2dfe075b8be7d0188df33f221ad1f0139180f9d", size = 463580, upload-time = "2025-11-26T15:11:44.605Z" },
+]
+
+[package.optional-dependencies]
+email = [
+    { name = "email-validator" },
+]
+
+[[package]]
+name = "pydantic-core"
+version = "2.41.5"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "typing-extensions" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/71/70/23b021c950c2addd24ec408e9ab05d59b035b39d97cdc1130e1bce647bb6/pydantic_core-2.41.5.tar.gz", hash = "sha256:08daa51ea16ad373ffd5e7606252cc32f07bc72b28284b6bc9c6df804816476e", size = 460952, upload-time = "2025-11-04T13:43:49.098Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/e8/72/74a989dd9f2084b3d9530b0915fdda64ac48831c30dbf7c72a41a5232db8/pydantic_core-2.41.5-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:a3a52f6156e73e7ccb0f8cced536adccb7042be67cb45f9562e12b319c119da6", size = 2105873, upload-time = "2025-11-04T13:39:31.373Z" },
+    { url = "https://files.pythonhosted.org/packages/12/44/37e403fd9455708b3b942949e1d7febc02167662bf1a7da5b78ee1ea2842/pydantic_core-2.41.5-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:7f3bf998340c6d4b0c9a2f02d6a400e51f123b59565d74dc60d252ce888c260b", size = 1899826, upload-time = "2025-11-04T13:39:32.897Z" },
+    { url = "https://files.pythonhosted.org/packages/33/7f/1d5cab3ccf44c1935a359d51a8a2a9e1a654b744b5e7f80d41b88d501eec/pydantic_core-2.41.5-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:378bec5c66998815d224c9ca994f1e14c0c21cb95d2f52b6021cc0b2a58f2a5a", size = 1917869, upload-time = "2025-11-04T13:39:34.469Z" },
+    { url = "https://files.pythonhosted.org/packages/6e/6a/30d94a9674a7fe4f4744052ed6c5e083424510be1e93da5bc47569d11810/pydantic_core-2.41.5-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:e7b576130c69225432866fe2f4a469a85a54ade141d96fd396dffcf607b558f8", size = 2063890, upload-time = "2025-11-04T13:39:36.053Z" },
+    { url = "https://files.pythonhosted.org/packages/50/be/76e5d46203fcb2750e542f32e6c371ffa9b8ad17364cf94bb0818dbfb50c/pydantic_core-2.41.5-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:6cb58b9c66f7e4179a2d5e0f849c48eff5c1fca560994d6eb6543abf955a149e", size = 2229740, upload-time = "2025-11-04T13:39:37.753Z" },
+    { url = "https://files.pythonhosted.org/packages/d3/ee/fed784df0144793489f87db310a6bbf8118d7b630ed07aa180d6067e653a/pydantic_core-2.41.5-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:88942d3a3dff3afc8288c21e565e476fc278902ae4d6d134f1eeda118cc830b1", size = 2350021, upload-time = "2025-11-04T13:39:40.94Z" },
+    { url = "https://files.pythonhosted.org/packages/c8/be/8fed28dd0a180dca19e72c233cbf58efa36df055e5b9d90d64fd1740b828/pydantic_core-2.41.5-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f31d95a179f8d64d90f6831d71fa93290893a33148d890ba15de25642c5d075b", size = 2066378, upload-time = "2025-11-04T13:39:42.523Z" },
+    { url = "https://files.pythonhosted.org/packages/b0/3b/698cf8ae1d536a010e05121b4958b1257f0b5522085e335360e53a6b1c8b/pydantic_core-2.41.5-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:c1df3d34aced70add6f867a8cf413e299177e0c22660cc767218373d0779487b", size = 2175761, upload-time = "2025-11-04T13:39:44.553Z" },
+    { url = "https://files.pythonhosted.org/packages/b8/ba/15d537423939553116dea94ce02f9c31be0fa9d0b806d427e0308ec17145/pydantic_core-2.41.5-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:4009935984bd36bd2c774e13f9a09563ce8de4abaa7226f5108262fa3e637284", size = 2146303, upload-time = "2025-11-04T13:39:46.238Z" },
+    { url = "https://files.pythonhosted.org/packages/58/7f/0de669bf37d206723795f9c90c82966726a2ab06c336deba4735b55af431/pydantic_core-2.41.5-cp311-cp311-musllinux_1_1_armv7l.whl", hash = "sha256:34a64bc3441dc1213096a20fe27e8e128bd3ff89921706e83c0b1ac971276594", size = 2340355, upload-time = "2025-11-04T13:39:48.002Z" },
+    { url = "https://files.pythonhosted.org/packages/e5/de/e7482c435b83d7e3c3ee5ee4451f6e8973cff0eb6007d2872ce6383f6398/pydantic_core-2.41.5-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:c9e19dd6e28fdcaa5a1de679aec4141f691023916427ef9bae8584f9c2fb3b0e", size = 2319875, upload-time = "2025-11-04T13:39:49.705Z" },
+    { url = "https://files.pythonhosted.org/packages/fe/e6/8c9e81bb6dd7560e33b9053351c29f30c8194b72f2d6932888581f503482/pydantic_core-2.41.5-cp311-cp311-win32.whl", hash = "sha256:2c010c6ded393148374c0f6f0bf89d206bf3217f201faa0635dcd56bd1520f6b", size = 1987549, upload-time = "2025-11-04T13:39:51.842Z" },
+    { url = "https://files.pythonhosted.org/packages/11/66/f14d1d978ea94d1bc21fc98fcf570f9542fe55bfcc40269d4e1a21c19bf7/pydantic_core-2.41.5-cp311-cp311-win_amd64.whl", hash = "sha256:76ee27c6e9c7f16f47db7a94157112a2f3a00e958bc626e2f4ee8bec5c328fbe", size = 2011305, upload-time = "2025-11-04T13:39:53.485Z" },
+    { url = "https://files.pythonhosted.org/packages/56/d8/0e271434e8efd03186c5386671328154ee349ff0354d83c74f5caaf096ed/pydantic_core-2.41.5-cp311-cp311-win_arm64.whl", hash = "sha256:4bc36bbc0b7584de96561184ad7f012478987882ebf9f9c389b23f432ea3d90f", size = 1972902, upload-time = "2025-11-04T13:39:56.488Z" },
+    { url = "https://files.pythonhosted.org/packages/5f/5d/5f6c63eebb5afee93bcaae4ce9a898f3373ca23df3ccaef086d0233a35a7/pydantic_core-2.41.5-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:f41a7489d32336dbf2199c8c0a215390a751c5b014c2c1c5366e817202e9cdf7", size = 2110990, upload-time = "2025-11-04T13:39:58.079Z" },
+    { url = "https://files.pythonhosted.org/packages/aa/32/9c2e8ccb57c01111e0fd091f236c7b371c1bccea0fa85247ac55b1e2b6b6/pydantic_core-2.41.5-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:070259a8818988b9a84a449a2a7337c7f430a22acc0859c6b110aa7212a6d9c0", size = 1896003, upload-time = "2025-11-04T13:39:59.956Z" },
+    { url = "https://files.pythonhosted.org/packages/68/b8/a01b53cb0e59139fbc9e4fda3e9724ede8de279097179be4ff31f1abb65a/pydantic_core-2.41.5-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e96cea19e34778f8d59fe40775a7a574d95816eb150850a85a7a4c8f4b94ac69", size = 1919200, upload-time = "2025-11-04T13:40:02.241Z" },
+    { url = "https://files.pythonhosted.org/packages/38/de/8c36b5198a29bdaade07b5985e80a233a5ac27137846f3bc2d3b40a47360/pydantic_core-2.41.5-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:ed2e99c456e3fadd05c991f8f437ef902e00eedf34320ba2b0842bd1c3ca3a75", size = 2052578, upload-time = "2025-11-04T13:40:04.401Z" },
+    { url = "https://files.pythonhosted.org/packages/00/b5/0e8e4b5b081eac6cb3dbb7e60a65907549a1ce035a724368c330112adfdd/pydantic_core-2.41.5-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:65840751b72fbfd82c3c640cff9284545342a4f1eb1586ad0636955b261b0b05", size = 2208504, upload-time = "2025-11-04T13:40:06.072Z" },
+    { url = "https://files.pythonhosted.org/packages/77/56/87a61aad59c7c5b9dc8caad5a41a5545cba3810c3e828708b3d7404f6cef/pydantic_core-2.41.5-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e536c98a7626a98feb2d3eaf75944ef6f3dbee447e1f841eae16f2f0a72d8ddc", size = 2335816, upload-time = "2025-11-04T13:40:07.835Z" },
+    { url = "https://files.pythonhosted.org/packages/0d/76/941cc9f73529988688a665a5c0ecff1112b3d95ab48f81db5f7606f522d3/pydantic_core-2.41.5-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:eceb81a8d74f9267ef4081e246ffd6d129da5d87e37a77c9bde550cb04870c1c", size = 2075366, upload-time = "2025-11-04T13:40:09.804Z" },
+    { url = "https://files.pythonhosted.org/packages/d3/43/ebef01f69baa07a482844faaa0a591bad1ef129253ffd0cdaa9d8a7f72d3/pydantic_core-2.41.5-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:d38548150c39b74aeeb0ce8ee1d8e82696f4a4e16ddc6de7b1d8823f7de4b9b5", size = 2171698, upload-time = "2025-11-04T13:40:12.004Z" },
+    { url = "https://files.pythonhosted.org/packages/b1/87/41f3202e4193e3bacfc2c065fab7706ebe81af46a83d3e27605029c1f5a6/pydantic_core-2.41.5-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:c23e27686783f60290e36827f9c626e63154b82b116d7fe9adba1fda36da706c", size = 2132603, upload-time = "2025-11-04T13:40:13.868Z" },
+    { url = "https://files.pythonhosted.org/packages/49/7d/4c00df99cb12070b6bccdef4a195255e6020a550d572768d92cc54dba91a/pydantic_core-2.41.5-cp312-cp312-musllinux_1_1_armv7l.whl", hash = "sha256:482c982f814460eabe1d3bb0adfdc583387bd4691ef00b90575ca0d2b6fe2294", size = 2329591, upload-time = "2025-11-04T13:40:15.672Z" },
+    { url = "https://files.pythonhosted.org/packages/cc/6a/ebf4b1d65d458f3cda6a7335d141305dfa19bdc61140a884d165a8a1bbc7/pydantic_core-2.41.5-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:bfea2a5f0b4d8d43adf9d7b8bf019fb46fdd10a2e5cde477fbcb9d1fa08c68e1", size = 2319068, upload-time = "2025-11-04T13:40:17.532Z" },
+    { url = "https://files.pythonhosted.org/packages/49/3b/774f2b5cd4192d5ab75870ce4381fd89cf218af999515baf07e7206753f0/pydantic_core-2.41.5-cp312-cp312-win32.whl", hash = "sha256:b74557b16e390ec12dca509bce9264c3bbd128f8a2c376eaa68003d7f327276d", size = 1985908, upload-time = "2025-11-04T13:40:19.309Z" },
+    { url = "https://files.pythonhosted.org/packages/86/45/00173a033c801cacf67c190fef088789394feaf88a98a7035b0e40d53dc9/pydantic_core-2.41.5-cp312-cp312-win_amd64.whl", hash = "sha256:1962293292865bca8e54702b08a4f26da73adc83dd1fcf26fbc875b35d81c815", size = 2020145, upload-time = "2025-11-04T13:40:21.548Z" },
+    { url = "https://files.pythonhosted.org/packages/f9/22/91fbc821fa6d261b376a3f73809f907cec5ca6025642c463d3488aad22fb/pydantic_core-2.41.5-cp312-cp312-win_arm64.whl", hash = "sha256:1746d4a3d9a794cacae06a5eaaccb4b8643a131d45fbc9af23e353dc0a5ba5c3", size = 1976179, upload-time = "2025-11-04T13:40:23.393Z" },
+    { url = "https://files.pythonhosted.org/packages/87/06/8806241ff1f70d9939f9af039c6c35f2360cf16e93c2ca76f184e76b1564/pydantic_core-2.41.5-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:941103c9be18ac8daf7b7adca8228f8ed6bb7a1849020f643b3a14d15b1924d9", size = 2120403, upload-time = "2025-11-04T13:40:25.248Z" },
+    { url = "https://files.pythonhosted.org/packages/94/02/abfa0e0bda67faa65fef1c84971c7e45928e108fe24333c81f3bfe35d5f5/pydantic_core-2.41.5-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:112e305c3314f40c93998e567879e887a3160bb8689ef3d2c04b6cc62c33ac34", size = 1896206, upload-time = "2025-11-04T13:40:27.099Z" },
+    { url = "https://files.pythonhosted.org/packages/15/df/a4c740c0943e93e6500f9eb23f4ca7ec9bf71b19e608ae5b579678c8d02f/pydantic_core-2.41.5-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0cbaad15cb0c90aa221d43c00e77bb33c93e8d36e0bf74760cd00e732d10a6a0", size = 1919307, upload-time = "2025-11-04T13:40:29.806Z" },
+    { url = "https://files.pythonhosted.org/packages/9a/e3/6324802931ae1d123528988e0e86587c2072ac2e5394b4bc2bc34b61ff6e/pydantic_core-2.41.5-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:03ca43e12fab6023fc79d28ca6b39b05f794ad08ec2feccc59a339b02f2b3d33", size = 2063258, upload-time = "2025-11-04T13:40:33.544Z" },
+    { url = "https://files.pythonhosted.org/packages/c9/d4/2230d7151d4957dd79c3044ea26346c148c98fbf0ee6ebd41056f2d62ab5/pydantic_core-2.41.5-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:dc799088c08fa04e43144b164feb0c13f9a0bc40503f8df3e9fde58a3c0c101e", size = 2214917, upload-time = "2025-11-04T13:40:35.479Z" },
+    { url = "https://files.pythonhosted.org/packages/e6/9f/eaac5df17a3672fef0081b6c1bb0b82b33ee89aa5cec0d7b05f52fd4a1fa/pydantic_core-2.41.5-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:97aeba56665b4c3235a0e52b2c2f5ae9cd071b8a8310ad27bddb3f7fb30e9aa2", size = 2332186, upload-time = "2025-11-04T13:40:37.436Z" },
+    { url = "https://files.pythonhosted.org/packages/cf/4e/35a80cae583a37cf15604b44240e45c05e04e86f9cfd766623149297e971/pydantic_core-2.41.5-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:406bf18d345822d6c21366031003612b9c77b3e29ffdb0f612367352aab7d586", size = 2073164, upload-time = "2025-11-04T13:40:40.289Z" },
+    { url = "https://files.pythonhosted.org/packages/bf/e3/f6e262673c6140dd3305d144d032f7bd5f7497d3871c1428521f19f9efa2/pydantic_core-2.41.5-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:b93590ae81f7010dbe380cdeab6f515902ebcbefe0b9327cc4804d74e93ae69d", size = 2179146, upload-time = "2025-11-04T13:40:42.809Z" },
+    { url = "https://files.pythonhosted.org/packages/75/c7/20bd7fc05f0c6ea2056a4565c6f36f8968c0924f19b7d97bbfea55780e73/pydantic_core-2.41.5-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:01a3d0ab748ee531f4ea6c3e48ad9dac84ddba4b0d82291f87248f2f9de8d740", size = 2137788, upload-time = "2025-11-04T13:40:44.752Z" },
+    { url = "https://files.pythonhosted.org/packages/3a/8d/34318ef985c45196e004bc46c6eab2eda437e744c124ef0dbe1ff2c9d06b/pydantic_core-2.41.5-cp313-cp313-musllinux_1_1_armv7l.whl", hash = "sha256:6561e94ba9dacc9c61bce40e2d6bdc3bfaa0259d3ff36ace3b1e6901936d2e3e", size = 2340133, upload-time = "2025-11-04T13:40:46.66Z" },
+    { url = "https://files.pythonhosted.org/packages/9c/59/013626bf8c78a5a5d9350d12e7697d3d4de951a75565496abd40ccd46bee/pydantic_core-2.41.5-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:915c3d10f81bec3a74fbd4faebe8391013ba61e5a1a8d48c4455b923bdda7858", size = 2324852, upload-time = "2025-11-04T13:40:48.575Z" },
+    { url = "https://files.pythonhosted.org/packages/1a/d9/c248c103856f807ef70c18a4f986693a46a8ffe1602e5d361485da502d20/pydantic_core-2.41.5-cp313-cp313-win32.whl", hash = "sha256:650ae77860b45cfa6e2cdafc42618ceafab3a2d9a3811fcfbd3bbf8ac3c40d36", size = 1994679, upload-time = "2025-11-04T13:40:50.619Z" },
+    { url = "https://files.pythonhosted.org/packages/9e/8b/341991b158ddab181cff136acd2552c9f35bd30380422a639c0671e99a91/pydantic_core-2.41.5-cp313-cp313-win_amd64.whl", hash = "sha256:79ec52ec461e99e13791ec6508c722742ad745571f234ea6255bed38c6480f11", size = 2019766, upload-time = "2025-11-04T13:40:52.631Z" },
+    { url = "https://files.pythonhosted.org/packages/73/7d/f2f9db34af103bea3e09735bb40b021788a5e834c81eedb541991badf8f5/pydantic_core-2.41.5-cp313-cp313-win_arm64.whl", hash = "sha256:3f84d5c1b4ab906093bdc1ff10484838aca54ef08de4afa9de0f5f14d69639cd", size = 1981005, upload-time = "2025-11-04T13:40:54.734Z" },
+    { url = "https://files.pythonhosted.org/packages/ea/28/46b7c5c9635ae96ea0fbb779e271a38129df2550f763937659ee6c5dbc65/pydantic_core-2.41.5-cp314-cp314-macosx_10_12_x86_64.whl", hash = "sha256:3f37a19d7ebcdd20b96485056ba9e8b304e27d9904d233d7b1015db320e51f0a", size = 2119622, upload-time = "2025-11-04T13:40:56.68Z" },
+    { url = "https://files.pythonhosted.org/packages/74/1a/145646e5687e8d9a1e8d09acb278c8535ebe9e972e1f162ed338a622f193/pydantic_core-2.41.5-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:1d1d9764366c73f996edd17abb6d9d7649a7eb690006ab6adbda117717099b14", size = 1891725, upload-time = "2025-11-04T13:40:58.807Z" },
+    { url = "https://files.pythonhosted.org/packages/23/04/e89c29e267b8060b40dca97bfc64a19b2a3cf99018167ea1677d96368273/pydantic_core-2.41.5-cp314-cp314-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:25e1c2af0fce638d5f1988b686f3b3ea8cd7de5f244ca147c777769e798a9cd1", size = 1915040, upload-time = "2025-11-04T13:41:00.853Z" },
+    { url = "https://files.pythonhosted.org/packages/84/a3/15a82ac7bd97992a82257f777b3583d3e84bdb06ba6858f745daa2ec8a85/pydantic_core-2.41.5-cp314-cp314-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:506d766a8727beef16b7adaeb8ee6217c64fc813646b424d0804d67c16eddb66", size = 2063691, upload-time = "2025-11-04T13:41:03.504Z" },
+    { url = "https://files.pythonhosted.org/packages/74/9b/0046701313c6ef08c0c1cf0e028c67c770a4e1275ca73131563c5f2a310a/pydantic_core-2.41.5-cp314-cp314-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:4819fa52133c9aa3c387b3328f25c1facc356491e6135b459f1de698ff64d869", size = 2213897, upload-time = "2025-11-04T13:41:05.804Z" },
+    { url = "https://files.pythonhosted.org/packages/8a/cd/6bac76ecd1b27e75a95ca3a9a559c643b3afcd2dd62086d4b7a32a18b169/pydantic_core-2.41.5-cp314-cp314-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2b761d210c9ea91feda40d25b4efe82a1707da2ef62901466a42492c028553a2", size = 2333302, upload-time = "2025-11-04T13:41:07.809Z" },
+    { url = "https://files.pythonhosted.org/packages/4c/d2/ef2074dc020dd6e109611a8be4449b98cd25e1b9b8a303c2f0fca2f2bcf7/pydantic_core-2.41.5-cp314-cp314-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:22f0fb8c1c583a3b6f24df2470833b40207e907b90c928cc8d3594b76f874375", size = 2064877, upload-time = "2025-11-04T13:41:09.827Z" },
+    { url = "https://files.pythonhosted.org/packages/18/66/e9db17a9a763d72f03de903883c057b2592c09509ccfe468187f2a2eef29/pydantic_core-2.41.5-cp314-cp314-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:2782c870e99878c634505236d81e5443092fba820f0373997ff75f90f68cd553", size = 2180680, upload-time = "2025-11-04T13:41:12.379Z" },
+    { url = "https://files.pythonhosted.org/packages/d3/9e/3ce66cebb929f3ced22be85d4c2399b8e85b622db77dad36b73c5387f8f8/pydantic_core-2.41.5-cp314-cp314-musllinux_1_1_aarch64.whl", hash = "sha256:0177272f88ab8312479336e1d777f6b124537d47f2123f89cb37e0accea97f90", size = 2138960, upload-time = "2025-11-04T13:41:14.627Z" },
+    { url = "https://files.pythonhosted.org/packages/a6/62/205a998f4327d2079326b01abee48e502ea739d174f0a89295c481a2272e/pydantic_core-2.41.5-cp314-cp314-musllinux_1_1_armv7l.whl", hash = "sha256:63510af5e38f8955b8ee5687740d6ebf7c2a0886d15a6d65c32814613681bc07", size = 2339102, upload-time = "2025-11-04T13:41:16.868Z" },
+    { url = "https://files.pythonhosted.org/packages/3c/0d/f05e79471e889d74d3d88f5bd20d0ed189ad94c2423d81ff8d0000aab4ff/pydantic_core-2.41.5-cp314-cp314-musllinux_1_1_x86_64.whl", hash = "sha256:e56ba91f47764cc14f1daacd723e3e82d1a89d783f0f5afe9c364b8bb491ccdb", size = 2326039, upload-time = "2025-11-04T13:41:18.934Z" },
+    { url = "https://files.pythonhosted.org/packages/ec/e1/e08a6208bb100da7e0c4b288eed624a703f4d129bde2da475721a80cab32/pydantic_core-2.41.5-cp314-cp314-win32.whl", hash = "sha256:aec5cf2fd867b4ff45b9959f8b20ea3993fc93e63c7363fe6851424c8a7e7c23", size = 1995126, upload-time = "2025-11-04T13:41:21.418Z" },
+    { url = "https://files.pythonhosted.org/packages/48/5d/56ba7b24e9557f99c9237e29f5c09913c81eeb2f3217e40e922353668092/pydantic_core-2.41.5-cp314-cp314-win_amd64.whl", hash = "sha256:8e7c86f27c585ef37c35e56a96363ab8de4e549a95512445b85c96d3e2f7c1bf", size = 2015489, upload-time = "2025-11-04T13:41:24.076Z" },
+    { url = "https://files.pythonhosted.org/packages/4e/bb/f7a190991ec9e3e0ba22e4993d8755bbc4a32925c0b5b42775c03e8148f9/pydantic_core-2.41.5-cp314-cp314-win_arm64.whl", hash = "sha256:e672ba74fbc2dc8eea59fb6d4aed6845e6905fc2a8afe93175d94a83ba2a01a0", size = 1977288, upload-time = "2025-11-04T13:41:26.33Z" },
+    { url = "https://files.pythonhosted.org/packages/92/ed/77542d0c51538e32e15afe7899d79efce4b81eee631d99850edc2f5e9349/pydantic_core-2.41.5-cp314-cp314t-macosx_10_12_x86_64.whl", hash = "sha256:8566def80554c3faa0e65ac30ab0932b9e3a5cd7f8323764303d468e5c37595a", size = 2120255, upload-time = "2025-11-04T13:41:28.569Z" },
+    { url = "https://files.pythonhosted.org/packages/bb/3d/6913dde84d5be21e284439676168b28d8bbba5600d838b9dca99de0fad71/pydantic_core-2.41.5-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:b80aa5095cd3109962a298ce14110ae16b8c1aece8b72f9dafe81cf597ad80b3", size = 1863760, upload-time = "2025-11-04T13:41:31.055Z" },
+    { url = "https://files.pythonhosted.org/packages/5a/f0/e5e6b99d4191da102f2b0eb9687aaa7f5bea5d9964071a84effc3e40f997/pydantic_core-2.41.5-cp314-cp314t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3006c3dd9ba34b0c094c544c6006cc79e87d8612999f1a5d43b769b89181f23c", size = 1878092, upload-time = "2025-11-04T13:41:33.21Z" },
+    { url = "https://files.pythonhosted.org/packages/71/48/36fb760642d568925953bcc8116455513d6e34c4beaa37544118c36aba6d/pydantic_core-2.41.5-cp314-cp314t-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:72f6c8b11857a856bcfa48c86f5368439f74453563f951e473514579d44aa612", size = 2053385, upload-time = "2025-11-04T13:41:35.508Z" },
+    { url = "https://files.pythonhosted.org/packages/20/25/92dc684dd8eb75a234bc1c764b4210cf2646479d54b47bf46061657292a8/pydantic_core-2.41.5-cp314-cp314t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:5cb1b2f9742240e4bb26b652a5aeb840aa4b417c7748b6f8387927bc6e45e40d", size = 2218832, upload-time = "2025-11-04T13:41:37.732Z" },
+    { url = "https://files.pythonhosted.org/packages/e2/09/f53e0b05023d3e30357d82eb35835d0f6340ca344720a4599cd663dca599/pydantic_core-2.41.5-cp314-cp314t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:bd3d54f38609ff308209bd43acea66061494157703364ae40c951f83ba99a1a9", size = 2327585, upload-time = "2025-11-04T13:41:40Z" },
+    { url = "https://files.pythonhosted.org/packages/aa/4e/2ae1aa85d6af35a39b236b1b1641de73f5a6ac4d5a7509f77b814885760c/pydantic_core-2.41.5-cp314-cp314t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2ff4321e56e879ee8d2a879501c8e469414d948f4aba74a2d4593184eb326660", size = 2041078, upload-time = "2025-11-04T13:41:42.323Z" },
+    { url = "https://files.pythonhosted.org/packages/cd/13/2e215f17f0ef326fc72afe94776edb77525142c693767fc347ed6288728d/pydantic_core-2.41.5-cp314-cp314t-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:d0d2568a8c11bf8225044aa94409e21da0cb09dcdafe9ecd10250b2baad531a9", size = 2173914, upload-time = "2025-11-04T13:41:45.221Z" },
+    { url = "https://files.pythonhosted.org/packages/02/7a/f999a6dcbcd0e5660bc348a3991c8915ce6599f4f2c6ac22f01d7a10816c/pydantic_core-2.41.5-cp314-cp314t-musllinux_1_1_aarch64.whl", hash = "sha256:a39455728aabd58ceabb03c90e12f71fd30fa69615760a075b9fec596456ccc3", size = 2129560, upload-time = "2025-11-04T13:41:47.474Z" },
+    { url = "https://files.pythonhosted.org/packages/3a/b1/6c990ac65e3b4c079a4fb9f5b05f5b013afa0f4ed6780a3dd236d2cbdc64/pydantic_core-2.41.5-cp314-cp314t-musllinux_1_1_armv7l.whl", hash = "sha256:239edca560d05757817c13dc17c50766136d21f7cd0fac50295499ae24f90fdf", size = 2329244, upload-time = "2025-11-04T13:41:49.992Z" },
+    { url = "https://files.pythonhosted.org/packages/d9/02/3c562f3a51afd4d88fff8dffb1771b30cfdfd79befd9883ee094f5b6c0d8/pydantic_core-2.41.5-cp314-cp314t-musllinux_1_1_x86_64.whl", hash = "sha256:2a5e06546e19f24c6a96a129142a75cee553cc018ffee48a460059b1185f4470", size = 2331955, upload-time = "2025-11-04T13:41:54.079Z" },
+    { url = "https://files.pythonhosted.org/packages/5c/96/5fb7d8c3c17bc8c62fdb031c47d77a1af698f1d7a406b0f79aaa1338f9ad/pydantic_core-2.41.5-cp314-cp314t-win32.whl", hash = "sha256:b4ececa40ac28afa90871c2cc2b9ffd2ff0bf749380fbdf57d165fd23da353aa", size = 1988906, upload-time = "2025-11-04T13:41:56.606Z" },
+    { url = "https://files.pythonhosted.org/packages/22/ed/182129d83032702912c2e2d8bbe33c036f342cc735737064668585dac28f/pydantic_core-2.41.5-cp314-cp314t-win_amd64.whl", hash = "sha256:80aa89cad80b32a912a65332f64a4450ed00966111b6615ca6816153d3585a8c", size = 1981607, upload-time = "2025-11-04T13:41:58.889Z" },
+    { url = "https://files.pythonhosted.org/packages/9f/ed/068e41660b832bb0b1aa5b58011dea2a3fe0ba7861ff38c4d4904c1c1a99/pydantic_core-2.41.5-cp314-cp314t-win_arm64.whl", hash = "sha256:35b44f37a3199f771c3eaa53051bc8a70cd7b54f333531c59e29fd4db5d15008", size = 1974769, upload-time = "2025-11-04T13:42:01.186Z" },
+    { url = "https://files.pythonhosted.org/packages/11/72/90fda5ee3b97e51c494938a4a44c3a35a9c96c19bba12372fb9c634d6f57/pydantic_core-2.41.5-graalpy311-graalpy242_311_native-macosx_10_12_x86_64.whl", hash = "sha256:b96d5f26b05d03cc60f11a7761a5ded1741da411e7fe0909e27a5e6a0cb7b034", size = 2115441, upload-time = "2025-11-04T13:42:39.557Z" },
+    { url = "https://files.pythonhosted.org/packages/1f/53/8942f884fa33f50794f119012dc6a1a02ac43a56407adaac20463df8e98f/pydantic_core-2.41.5-graalpy311-graalpy242_311_native-macosx_11_0_arm64.whl", hash = "sha256:634e8609e89ceecea15e2d61bc9ac3718caaaa71963717bf3c8f38bfde64242c", size = 1930291, upload-time = "2025-11-04T13:42:42.169Z" },
+    { url = "https://files.pythonhosted.org/packages/79/c8/ecb9ed9cd942bce09fc888ee960b52654fbdbede4ba6c2d6e0d3b1d8b49c/pydantic_core-2.41.5-graalpy311-graalpy242_311_native-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:93e8740d7503eb008aa2df04d3b9735f845d43ae845e6dcd2be0b55a2da43cd2", size = 1948632, upload-time = "2025-11-04T13:42:44.564Z" },
+    { url = "https://files.pythonhosted.org/packages/2e/1b/687711069de7efa6af934e74f601e2a4307365e8fdc404703afc453eab26/pydantic_core-2.41.5-graalpy311-graalpy242_311_native-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f15489ba13d61f670dcc96772e733aad1a6f9c429cc27574c6cdaed82d0146ad", size = 2138905, upload-time = "2025-11-04T13:42:47.156Z" },
+    { url = "https://files.pythonhosted.org/packages/09/32/59b0c7e63e277fa7911c2fc70ccfb45ce4b98991e7ef37110663437005af/pydantic_core-2.41.5-graalpy312-graalpy250_312_native-macosx_10_12_x86_64.whl", hash = "sha256:7da7087d756b19037bc2c06edc6c170eeef3c3bafcb8f532ff17d64dc427adfd", size = 2110495, upload-time = "2025-11-04T13:42:49.689Z" },
+    { url = "https://files.pythonhosted.org/packages/aa/81/05e400037eaf55ad400bcd318c05bb345b57e708887f07ddb2d20e3f0e98/pydantic_core-2.41.5-graalpy312-graalpy250_312_native-macosx_11_0_arm64.whl", hash = "sha256:aabf5777b5c8ca26f7824cb4a120a740c9588ed58df9b2d196ce92fba42ff8dc", size = 1915388, upload-time = "2025-11-04T13:42:52.215Z" },
+    { url = "https://files.pythonhosted.org/packages/6e/0d/e3549b2399f71d56476b77dbf3cf8937cec5cd70536bdc0e374a421d0599/pydantic_core-2.41.5-graalpy312-graalpy250_312_native-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c007fe8a43d43b3969e8469004e9845944f1a80e6acd47c150856bb87f230c56", size = 1942879, upload-time = "2025-11-04T13:42:56.483Z" },
+    { url = "https://files.pythonhosted.org/packages/f7/07/34573da085946b6a313d7c42f82f16e8920bfd730665de2d11c0c37a74b5/pydantic_core-2.41.5-graalpy312-graalpy250_312_native-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:76d0819de158cd855d1cbb8fcafdf6f5cf1eb8e470abe056d5d161106e38062b", size = 2139017, upload-time = "2025-11-04T13:42:59.471Z" },
+    { url = "https://files.pythonhosted.org/packages/5f/9b/1b3f0e9f9305839d7e84912f9e8bfbd191ed1b1ef48083609f0dabde978c/pydantic_core-2.41.5-pp311-pypy311_pp73-macosx_10_12_x86_64.whl", hash = "sha256:b2379fa7ed44ddecb5bfe4e48577d752db9fc10be00a6b7446e9663ba143de26", size = 2101980, upload-time = "2025-11-04T13:43:25.97Z" },
+    { url = "https://files.pythonhosted.org/packages/a4/ed/d71fefcb4263df0da6a85b5d8a7508360f2f2e9b3bf5814be9c8bccdccc1/pydantic_core-2.41.5-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:266fb4cbf5e3cbd0b53669a6d1b039c45e3ce651fd5442eff4d07c2cc8d66808", size = 1923865, upload-time = "2025-11-04T13:43:28.763Z" },
+    { url = "https://files.pythonhosted.org/packages/ce/3a/626b38db460d675f873e4444b4bb030453bbe7b4ba55df821d026a0493c4/pydantic_core-2.41.5-pp311-pypy311_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:58133647260ea01e4d0500089a8c4f07bd7aa6ce109682b1426394988d8aaacc", size = 2134256, upload-time = "2025-11-04T13:43:31.71Z" },
+    { url = "https://files.pythonhosted.org/packages/83/d9/8412d7f06f616bbc053d30cb4e5f76786af3221462ad5eee1f202021eb4e/pydantic_core-2.41.5-pp311-pypy311_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:287dad91cfb551c363dc62899a80e9e14da1f0e2b6ebde82c806612ca2a13ef1", size = 2174762, upload-time = "2025-11-04T13:43:34.744Z" },
+    { url = "https://files.pythonhosted.org/packages/55/4c/162d906b8e3ba3a99354e20faa1b49a85206c47de97a639510a0e673f5da/pydantic_core-2.41.5-pp311-pypy311_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:03b77d184b9eb40240ae9fd676ca364ce1085f203e1b1256f8ab9984dca80a84", size = 2143141, upload-time = "2025-11-04T13:43:37.701Z" },
+    { url = "https://files.pythonhosted.org/packages/1f/f2/f11dd73284122713f5f89fc940f370d035fa8e1e078d446b3313955157fe/pydantic_core-2.41.5-pp311-pypy311_pp73-musllinux_1_1_armv7l.whl", hash = "sha256:a668ce24de96165bb239160b3d854943128f4334822900534f2fe947930e5770", size = 2330317, upload-time = "2025-11-04T13:43:40.406Z" },
+    { url = "https://files.pythonhosted.org/packages/88/9d/b06ca6acfe4abb296110fb1273a4d848a0bfb2ff65f3ee92127b3244e16b/pydantic_core-2.41.5-pp311-pypy311_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:f14f8f046c14563f8eb3f45f499cc658ab8d10072961e07225e507adb700e93f", size = 2316992, upload-time = "2025-11-04T13:43:43.602Z" },
+    { url = "https://files.pythonhosted.org/packages/36/c7/cfc8e811f061c841d7990b0201912c3556bfeb99cdcb7ed24adc8d6f8704/pydantic_core-2.41.5-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:56121965f7a4dc965bff783d70b907ddf3d57f6eba29b6d2e5dabfaf07799c51", size = 2145302, upload-time = "2025-11-04T13:43:46.64Z" },
+]
+
+[[package]]
+name = "pydantic-settings"
+version = "2.12.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "pydantic" },
+    { name = "python-dotenv" },
+    { name = "typing-inspection" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/43/4b/ac7e0aae12027748076d72a8764ff1c9d82ca75a7a52622e67ed3f765c54/pydantic_settings-2.12.0.tar.gz", hash = "sha256:005538ef951e3c2a68e1c08b292b5f2e71490def8589d4221b95dab00dafcfd0", size = 194184, upload-time = "2025-11-10T14:25:47.013Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/c1/60/5d4751ba3f4a40a6891f24eec885f51afd78d208498268c734e256fb13c4/pydantic_settings-2.12.0-py3-none-any.whl", hash = "sha256:fddb9fd99a5b18da837b29710391e945b1e30c135477f484084ee513adb93809", size = 51880, upload-time = "2025-11-10T14:25:45.546Z" },
+]
+
+[[package]]
+name = "pydocket"
+version = "0.17.5"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "cloudpickle" },
+    { name = "croniter" },
+    { name = "fakeredis", extra = ["lua"] },
+    { name = "opentelemetry-api" },
+    { name = "prometheus-client" },
+    { name = "py-key-value-aio", extra = ["memory", "redis"] },
+    { name = "python-json-logger" },
+    { name = "redis" },
+    { name = "rich" },
+    { name = "typer" },
+    { name = "typing-extensions" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/73/26/ac23ead3725475468b50b486939bf5feda27180050a614a7407344a0af0e/pydocket-0.17.5.tar.gz", hash = "sha256:19a6976d8fd11c1acf62feb0291a339e06beaefa100f73dd38c6499760ad3e62", size = 334829, upload-time = "2026-01-30T18:44:39.702Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/14/98/73427d065c067a99de6afbe24df3d90cf20d63152ceb42edff2b6e829d4c/pydocket-0.17.5-py3-none-any.whl", hash = "sha256:544d7c2625a33e52528ac24db25794841427dfc2cf30b9c558ac387c77746241", size = 93355, upload-time = "2026-01-30T18:44:37.972Z" },
+]
+
 [[package]]
 name = "pygments"
 version = "2.19.2"
@@ -478,6 +1425,29 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/c7/21/705964c7812476f378728bdf590ca4b771ec72385c533964653c68e86bdc/pygments-2.19.2-py3-none-any.whl", hash = "sha256:86540386c03d588bb81d44bc3928634ff26449851e99741617ecb9037ee5ec0b", size = 1225217, upload-time = "2025-06-21T13:39:07.939Z" },
 ]
 
+[[package]]
+name = "pyjwt"
+version = "2.11.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/5c/5a/b46fa56bf322901eee5b0454a34343cdbdae202cd421775a8ee4e42fd519/pyjwt-2.11.0.tar.gz", hash = "sha256:35f95c1f0fbe5d5ba6e43f00271c275f7a1a4db1dab27bf708073b75318ea623", size = 98019, upload-time = "2026-01-30T19:59:55.694Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/6f/01/c26ce75ba460d5cd503da9e13b21a33804d38c2165dec7b716d06b13010c/pyjwt-2.11.0-py3-none-any.whl", hash = "sha256:94a6bde30eb5c8e04fee991062b534071fd1439ef58d2adc9ccb823e7bcd0469", size = 28224, upload-time = "2026-01-30T19:59:54.539Z" },
+]
+
+[package.optional-dependencies]
+crypto = [
+    { name = "cryptography" },
+]
+
+[[package]]
+name = "pyperclip"
+version = "1.11.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/e8/52/d87eba7cb129b81563019d1679026e7a112ef76855d6159d24754dbd2a51/pyperclip-1.11.0.tar.gz", hash = "sha256:244035963e4428530d9e3a6101a1ef97209c6825edab1567beac148ccc1db1b6", size = 12185, upload-time = "2025-09-26T14:40:37.245Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/df/80/fc9d01d5ed37ba4c42ca2b55b4339ae6e200b456be3a1aaddf4a9fa99b8c/pyperclip-1.11.0-py3-none-any.whl", hash = "sha256:299403e9ff44581cb9ba2ffeed69c7aa96a008622ad0c46cb575ca75b5b84273", size = 11063, upload-time = "2025-09-26T14:40:36.069Z" },
+]
+
 [[package]]
 name = "pytest"
 version = "9.0.2"
@@ -520,6 +1490,82 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/5a/cc/06253936f4a7fa2e0f48dfe6d851d9c56df896a9ab09ac019d70b760619c/pytest_mock-3.15.1-py3-none-any.whl", hash = "sha256:0a25e2eb88fe5168d535041d09a4529a188176ae608a6d249ee65abc0949630d", size = 10095, upload-time = "2025-09-16T16:37:25.734Z" },
 ]
 
+[[package]]
+name = "python-dateutil"
+version = "2.9.0.post0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "six" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/66/c0/0c8b6ad9f17a802ee498c46e004a0eb49bc148f2fd230864601a86dcf6db/python-dateutil-2.9.0.post0.tar.gz", hash = "sha256:37dd54208da7e1cd875388217d5e00ebd4179249f90fb72437e91a35459a0ad3", size = 342432, upload-time = "2024-03-01T18:36:20.211Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/ec/57/56b9bcc3c9c6a792fcbaf139543cee77261f3651ca9da0c93f5c1221264b/python_dateutil-2.9.0.post0-py2.py3-none-any.whl", hash = "sha256:a8b2bc7bffae282281c8140a97d3aa9c14da0b136dfe83f850eea9a5f7470427", size = 229892, upload-time = "2024-03-01T18:36:18.57Z" },
+]
+
+[[package]]
+name = "python-dotenv"
+version = "1.2.1"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/f0/26/19cadc79a718c5edbec86fd4919a6b6d3f681039a2f6d66d14be94e75fb9/python_dotenv-1.2.1.tar.gz", hash = "sha256:42667e897e16ab0d66954af0e60a9caa94f0fd4ecf3aaf6d2d260eec1aa36ad6", size = 44221, upload-time = "2025-10-26T15:12:10.434Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/14/1b/a298b06749107c305e1fe0f814c6c74aea7b2f1e10989cb30f544a1b3253/python_dotenv-1.2.1-py3-none-any.whl", hash = "sha256:b81ee9561e9ca4004139c6cbba3a238c32b03e4894671e181b671e8cb8425d61", size = 21230, upload-time = "2025-10-26T15:12:09.109Z" },
+]
+
+[[package]]
+name = "python-json-logger"
+version = "4.0.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/29/bf/eca6a3d43db1dae7070f70e160ab20b807627ba953663ba07928cdd3dc58/python_json_logger-4.0.0.tar.gz", hash = "sha256:f58e68eb46e1faed27e0f574a55a0455eecd7b8a5b88b85a784519ba3cff047f", size = 17683, upload-time = "2025-10-06T04:15:18.984Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/51/e5/fecf13f06e5e5f67e8837d777d1bc43fac0ed2b77a676804df5c34744727/python_json_logger-4.0.0-py3-none-any.whl", hash = "sha256:af09c9daf6a813aa4cc7180395f50f2a9e5fa056034c9953aec92e381c5ba1e2", size = 15548, upload-time = "2025-10-06T04:15:17.553Z" },
+]
+
+[[package]]
+name = "python-multipart"
+version = "0.0.22"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/94/01/979e98d542a70714b0cb2b6728ed0b7c46792b695e3eaec3e20711271ca3/python_multipart-0.0.22.tar.gz", hash = "sha256:7340bef99a7e0032613f56dc36027b959fd3b30a787ed62d310e951f7c3a3a58", size = 37612, upload-time = "2026-01-25T10:15:56.219Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/1b/d0/397f9626e711ff749a95d96b7af99b9c566a9bb5129b8e4c10fc4d100304/python_multipart-0.0.22-py3-none-any.whl", hash = "sha256:2b2cd894c83d21bf49d702499531c7bafd057d730c201782048f7945d82de155", size = 24579, upload-time = "2026-01-25T10:15:54.811Z" },
+]
+
+[[package]]
+name = "pytz"
+version = "2025.2"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/f8/bf/abbd3cdfb8fbc7fb3d4d38d320f2441b1e7cbe29be4f23797b4a2b5d8aac/pytz-2025.2.tar.gz", hash = "sha256:360b9e3dbb49a209c21ad61809c7fb453643e048b38924c765813546746e81c3", size = 320884, upload-time = "2025-03-25T02:25:00.538Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/81/c4/34e93fe5f5429d7570ec1fa436f1986fb1f00c3e0f43a589fe2bbcd22c3f/pytz-2025.2-py2.py3-none-any.whl", hash = "sha256:5ddf76296dd8c44c26eb8f4b6f35488f3ccbf6fbbd7adee0b7262d43f0ec2f00", size = 509225, upload-time = "2025-03-25T02:24:58.468Z" },
+]
+
+[[package]]
+name = "pywin32"
+version = "311"
+source = { registry = "https://pypi.org/simple" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/7c/af/449a6a91e5d6db51420875c54f6aff7c97a86a3b13a0b4f1a5c13b988de3/pywin32-311-cp311-cp311-win32.whl", hash = "sha256:184eb5e436dea364dcd3d2316d577d625c0351bf237c4e9a5fabbcfa5a58b151", size = 8697031, upload-time = "2025-07-14T20:13:13.266Z" },
+    { url = "https://files.pythonhosted.org/packages/51/8f/9bb81dd5bb77d22243d33c8397f09377056d5c687aa6d4042bea7fbf8364/pywin32-311-cp311-cp311-win_amd64.whl", hash = "sha256:3ce80b34b22b17ccbd937a6e78e7225d80c52f5ab9940fe0506a1a16f3dab503", size = 9508308, upload-time = "2025-07-14T20:13:15.147Z" },
+    { url = "https://files.pythonhosted.org/packages/44/7b/9c2ab54f74a138c491aba1b1cd0795ba61f144c711daea84a88b63dc0f6c/pywin32-311-cp311-cp311-win_arm64.whl", hash = "sha256:a733f1388e1a842abb67ffa8e7aad0e70ac519e09b0f6a784e65a136ec7cefd2", size = 8703930, upload-time = "2025-07-14T20:13:16.945Z" },
+    { url = "https://files.pythonhosted.org/packages/e7/ab/01ea1943d4eba0f850c3c61e78e8dd59757ff815ff3ccd0a84de5f541f42/pywin32-311-cp312-cp312-win32.whl", hash = "sha256:750ec6e621af2b948540032557b10a2d43b0cee2ae9758c54154d711cc852d31", size = 8706543, upload-time = "2025-07-14T20:13:20.765Z" },
+    { url = "https://files.pythonhosted.org/packages/d1/a8/a0e8d07d4d051ec7502cd58b291ec98dcc0c3fff027caad0470b72cfcc2f/pywin32-311-cp312-cp312-win_amd64.whl", hash = "sha256:b8c095edad5c211ff31c05223658e71bf7116daa0ecf3ad85f3201ea3190d067", size = 9495040, upload-time = "2025-07-14T20:13:22.543Z" },
+    { url = "https://files.pythonhosted.org/packages/ba/3a/2ae996277b4b50f17d61f0603efd8253cb2d79cc7ae159468007b586396d/pywin32-311-cp312-cp312-win_arm64.whl", hash = "sha256:e286f46a9a39c4a18b319c28f59b61de793654af2f395c102b4f819e584b5852", size = 8710102, upload-time = "2025-07-14T20:13:24.682Z" },
+    { url = "https://files.pythonhosted.org/packages/a5/be/3fd5de0979fcb3994bfee0d65ed8ca9506a8a1260651b86174f6a86f52b3/pywin32-311-cp313-cp313-win32.whl", hash = "sha256:f95ba5a847cba10dd8c4d8fefa9f2a6cf283b8b88ed6178fa8a6c1ab16054d0d", size = 8705700, upload-time = "2025-07-14T20:13:26.471Z" },
+    { url = "https://files.pythonhosted.org/packages/e3/28/e0a1909523c6890208295a29e05c2adb2126364e289826c0a8bc7297bd5c/pywin32-311-cp313-cp313-win_amd64.whl", hash = "sha256:718a38f7e5b058e76aee1c56ddd06908116d35147e133427e59a3983f703a20d", size = 9494700, upload-time = "2025-07-14T20:13:28.243Z" },
+    { url = "https://files.pythonhosted.org/packages/04/bf/90339ac0f55726dce7d794e6d79a18a91265bdf3aa70b6b9ca52f35e022a/pywin32-311-cp313-cp313-win_arm64.whl", hash = "sha256:7b4075d959648406202d92a2310cb990fea19b535c7f4a78d3f5e10b926eeb8a", size = 8709318, upload-time = "2025-07-14T20:13:30.348Z" },
+    { url = "https://files.pythonhosted.org/packages/c9/31/097f2e132c4f16d99a22bfb777e0fd88bd8e1c634304e102f313af69ace5/pywin32-311-cp314-cp314-win32.whl", hash = "sha256:b7a2c10b93f8986666d0c803ee19b5990885872a7de910fc460f9b0c2fbf92ee", size = 8840714, upload-time = "2025-07-14T20:13:32.449Z" },
+    { url = "https://files.pythonhosted.org/packages/90/4b/07c77d8ba0e01349358082713400435347df8426208171ce297da32c313d/pywin32-311-cp314-cp314-win_amd64.whl", hash = "sha256:3aca44c046bd2ed8c90de9cb8427f581c479e594e99b5c0bb19b29c10fd6cb87", size = 9656800, upload-time = "2025-07-14T20:13:34.312Z" },
+    { url = "https://files.pythonhosted.org/packages/c0/d2/21af5c535501a7233e734b8af901574572da66fcc254cb35d0609c9080dd/pywin32-311-cp314-cp314-win_arm64.whl", hash = "sha256:a508e2d9025764a8270f93111a970e1d0fbfc33f4153b388bb649b7eec4f9b42", size = 8932540, upload-time = "2025-07-14T20:13:36.379Z" },
+]
+
+[[package]]
+name = "pywin32-ctypes"
+version = "0.2.3"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/85/9f/01a1a99704853cb63f253eea009390c88e7131c67e66a0a02099a8c917cb/pywin32-ctypes-0.2.3.tar.gz", hash = "sha256:d162dc04946d704503b2edc4d55f3dba5c1d539ead017afa00142c38b9885755", size = 29471, upload-time = "2024-08-14T10:15:34.626Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/de/3d/8161f7711c017e01ac9f008dfddd9410dff3674334c233bde66e7ba65bbf/pywin32_ctypes-0.2.3-py3-none-any.whl", hash = "sha256:8a1513379d709975552d202d942d9837758905c8d01eb82b8bcc30918929e7b8", size = 30756, upload-time = "2024-08-14T10:15:33.187Z" },
+]
+
 [[package]]
 name = "pyyaml"
 version = "6.0.3"
@@ -575,18 +1621,45 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/f1/12/de94a39c2ef588c7e6455cfbe7343d3b2dc9d6b6b2f40c4c6565744c873d/pyyaml-6.0.3-cp314-cp314t-win_arm64.whl", hash = "sha256:ebc55a14a21cb14062aa4162f906cd962b28e2e9ea38f9b4391244cd8de4ae0b", size = 149341, upload-time = "2025-09-25T21:32:56.828Z" },
 ]
 
+[[package]]
+name = "redis"
+version = "7.1.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "async-timeout", marker = "python_full_version < '3.11.3'" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/43/c8/983d5c6579a411d8a99bc5823cc5712768859b5ce2c8afe1a65b37832c81/redis-7.1.0.tar.gz", hash = "sha256:b1cc3cfa5a2cb9c2ab3ba700864fb0ad75617b41f01352ce5779dabf6d5f9c3c", size = 4796669, upload-time = "2025-11-19T15:54:39.961Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/89/f0/8956f8a86b20d7bb9d6ac0187cf4cd54d8065bc9a1a09eb8011d4d326596/redis-7.1.0-py3-none-any.whl", hash = "sha256:23c52b208f92b56103e17c5d06bdc1a6c2c0b3106583985a76a18f83b265de2b", size = 354159, upload-time = "2025-11-19T15:54:38.064Z" },
+]
+
 [[package]]
 name = "referencing"
-version = "0.37.0"
+version = "0.36.2"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "attrs" },
     { name = "rpds-py" },
     { name = "typing-extensions", marker = "python_full_version < '3.13'" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/22/f5/df4e9027acead3ecc63e50fe1e36aca1523e1719559c499951bb4b53188f/referencing-0.37.0.tar.gz", hash = "sha256:44aefc3142c5b842538163acb373e24cce6632bd54bdb01b21ad5863489f50d8", size = 78036, upload-time = "2025-10-13T15:30:48.871Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/2f/db/98b5c277be99dd18bfd91dd04e1b759cad18d1a338188c936e92f921c7e2/referencing-0.36.2.tar.gz", hash = "sha256:df2e89862cd09deabbdba16944cc3f10feb6b3e6f18e902f7cc25609a34775aa", size = 74744, upload-time = "2025-01-25T08:48:16.138Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/c1/b1/3baf80dc6d2b7bc27a95a67752d0208e410351e3feb4eb78de5f77454d8d/referencing-0.36.2-py3-none-any.whl", hash = "sha256:e8699adbbf8b5c7de96d8ffa0eb5c158b3beafce084968e2ea8bb08c6794dcd0", size = 26775, upload-time = "2025-01-25T08:48:14.241Z" },
+]
+
+[[package]]
+name = "requests"
+version = "2.32.5"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "certifi" },
+    { name = "charset-normalizer" },
+    { name = "idna" },
+    { name = "urllib3" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/c9/74/b3ff8e6c8446842c3f5c837e9c3dfcfe2018ea6ecef224c710c85ef728f4/requests-2.32.5.tar.gz", hash = "sha256:dbba0bac56e100853db0ea71b82b4dfd5fe2bf6d3754a8893c3af500cec7d7cf", size = 134517, upload-time = "2025-08-18T20:46:02.573Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/2c/58/ca301544e1fa93ed4f80d724bf5b194f6e4b945841c5bfd555878eea9fcb/referencing-0.37.0-py3-none-any.whl", hash = "sha256:381329a9f99628c9069361716891d34ad94af76e461dcb0335825aecc7692231", size = 26766, upload-time = "2025-10-13T15:30:47.625Z" },
+    { url = "https://files.pythonhosted.org/packages/1e/db/4254e3eabe8020b458f1a747140d32277ec7a271daf1d235b70dc0b4e6e3/requests-2.32.5-py3-none-any.whl", hash = "sha256:2462f94637a34fd532264295e186976db0f5d453d1cdd31473c85a6a161affb6", size = 64738, upload-time = "2025-08-18T20:46:00.542Z" },
 ]
 
 [[package]]
@@ -602,6 +1675,19 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/25/7a/b0178788f8dc6cafce37a212c99565fa1fe7872c70c6c9c1e1a372d9d88f/rich-14.2.0-py3-none-any.whl", hash = "sha256:76bc51fe2e57d2b1be1f96c524b890b816e334ab4c1e45888799bfaab0021edd", size = 243393, upload-time = "2025-10-09T14:16:51.245Z" },
 ]
 
+[[package]]
+name = "rich-rst"
+version = "1.3.2"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "docutils" },
+    { name = "rich" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/bc/6d/a506aaa4a9eaa945ed8ab2b7347859f53593864289853c5d6d62b77246e0/rich_rst-1.3.2.tar.gz", hash = "sha256:a1196fdddf1e364b02ec68a05e8ff8f6914fee10fbca2e6b6735f166bb0da8d4", size = 14936, upload-time = "2025-10-14T16:49:45.332Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/13/2f/b4530fbf948867702d0a3f27de4a6aab1d156f406d72852ab902c4d04de9/rich_rst-1.3.2-py3-none-any.whl", hash = "sha256:a99b4907cbe118cf9d18b0b44de272efa61f15117c61e39ebdc431baf5df722a", size = 12567, upload-time = "2025-10-14T16:49:42.953Z" },
+]
+
 [[package]]
 name = "rpds-py"
 version = "0.30.0"
@@ -736,6 +1822,37 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/c4/1c/1dbe51782c0e1e9cfce1d1004752672d2d4629ea46945d19d731ad772b3b/ruff-0.14.11-py3-none-win_arm64.whl", hash = "sha256:649fb6c9edd7f751db276ef42df1f3df41c38d67d199570ae2a7bd6cbc3590f0", size = 12938644, upload-time = "2026-01-08T19:11:50.027Z" },
 ]
 
+[[package]]
+name = "secretstorage"
+version = "3.5.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "cryptography" },
+    { name = "jeepney" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/1c/03/e834bcd866f2f8a49a85eaff47340affa3bfa391ee9912a952a1faa68c7b/secretstorage-3.5.0.tar.gz", hash = "sha256:f04b8e4689cbce351744d5537bf6b1329c6fc68f91fa666f60a380edddcd11be", size = 19884, upload-time = "2025-11-23T19:02:53.191Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/b7/46/f5af3402b579fd5e11573ce652019a67074317e18c1935cc0b4ba9b35552/secretstorage-3.5.0-py3-none-any.whl", hash = "sha256:0ce65888c0725fcb2c5bc0fdb8e5438eece02c523557ea40ce0703c266248137", size = 15554, upload-time = "2025-11-23T19:02:51.545Z" },
+]
+
+[[package]]
+name = "shellingham"
+version = "1.5.4"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/58/15/8b3609fd3830ef7b27b655beb4b4e9c62313a4e8da8c676e142cc210d58e/shellingham-1.5.4.tar.gz", hash = "sha256:8dbca0739d487e5bd35ab3ca4b36e11c4078f3a234bfce294b0a0291363404de", size = 10310, upload-time = "2023-10-24T04:13:40.426Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/e0/f9/0595336914c5619e5f28a1fb793285925a8cd4b432c9da0a987836c7f822/shellingham-1.5.4-py2.py3-none-any.whl", hash = "sha256:7ecfff8f2fd72616f7481040475a65b2bf8af90a56c89140852d1120324e8686", size = 9755, upload-time = "2023-10-24T04:13:38.866Z" },
+]
+
+[[package]]
+name = "six"
+version = "1.17.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/94/e7/b2c673351809dca68a0e064b6af791aa332cf192da575fd474ed7d6f16a2/six-1.17.0.tar.gz", hash = "sha256:ff70335d468e7eb6ec65b95b99d3a2836546063f63acc5171de367e834932a81", size = 34031, upload-time = "2024-12-04T17:35:28.174Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/b7/ce/149a00dd41f10bc29e5921b496af8b574d8413afcd5e30dfa0ed46c2cc5e/six-1.17.0-py2.py3-none-any.whl", hash = "sha256:4721f391ed90541fddacab5acf947aa0d3dc7d27b2e1e8eda2be8970586c3274", size = 11050, upload-time = "2024-12-04T17:35:26.475Z" },
+]
+
 [[package]]
 name = "smmap"
 version = "5.0.2"
@@ -745,6 +1862,41 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/04/be/d09147ad1ec7934636ad912901c5fd7667e1c858e19d355237db0d0cd5e4/smmap-5.0.2-py3-none-any.whl", hash = "sha256:b30115f0def7d7531d22a0fb6502488d879e75b260a9db4d0819cfb25403af5e", size = 24303, upload-time = "2025-01-02T07:14:38.724Z" },
 ]
 
+[[package]]
+name = "sortedcontainers"
+version = "2.4.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/e8/c4/ba2f8066cceb6f23394729afe52f3bf7adec04bf9ed2c820b39e19299111/sortedcontainers-2.4.0.tar.gz", hash = "sha256:25caa5a06cc30b6b83d11423433f65d1f9d76c4c6a0c90e3379eaa43b9bfdb88", size = 30594, upload-time = "2021-05-16T22:03:42.897Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/32/46/9cb0e58b2deb7f82b84065f37f3bffeb12413f947f9388e4cac22c4621ce/sortedcontainers-2.4.0-py2.py3-none-any.whl", hash = "sha256:a163dcaede0f1c021485e957a39245190e74249897e2ae4b2aa38595db237ee0", size = 29575, upload-time = "2021-05-16T22:03:41.177Z" },
+]
+
+[[package]]
+name = "sse-starlette"
+version = "3.2.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "anyio" },
+    { name = "starlette" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/8b/8d/00d280c03ffd39aaee0e86ec81e2d3b9253036a0f93f51d10503adef0e65/sse_starlette-3.2.0.tar.gz", hash = "sha256:8127594edfb51abe44eac9c49e59b0b01f1039d0c7461c6fd91d4e03b70da422", size = 27253, upload-time = "2026-01-17T13:11:05.62Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/96/7f/832f015020844a8b8f7a9cbc103dd76ba8e3875004c41e08440ea3a2b41a/sse_starlette-3.2.0-py3-none-any.whl", hash = "sha256:5876954bd51920fc2cd51baee47a080eb88a37b5b784e615abb0b283f801cdbf", size = 12763, upload-time = "2026-01-17T13:11:03.775Z" },
+]
+
+[[package]]
+name = "starlette"
+version = "0.52.1"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "anyio" },
+    { name = "typing-extensions", marker = "python_full_version < '3.13'" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/c4/68/79977123bb7be889ad680d79a40f339082c1978b5cfcf62c2d8d196873ac/starlette-0.52.1.tar.gz", hash = "sha256:834edd1b0a23167694292e94f597773bc3f89f362be6effee198165a35d62933", size = 2653702, upload-time = "2026-01-18T13:34:11.062Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/81/0d/13d1d239a25cbfb19e740db83143e95c772a1fe10202dda4b76792b114dd/starlette-0.52.1-py3-none-any.whl", hash = "sha256:0029d43eb3d273bc4f83a08720b4912ea4b071087a3b48db01b7c839f7954d74", size = 74272, upload-time = "2026-01-18T13:34:09.188Z" },
+]
+
 [[package]]
 name = "tomli"
 version = "2.3.0"
@@ -794,6 +1946,21 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/77/b8/0135fadc89e73be292b473cb820b4f5a08197779206b33191e801feeae40/tomli-2.3.0-py3-none-any.whl", hash = "sha256:e95b1af3c5b07d9e643909b5abbec77cd9f1217e6d0bca72b0234736b9fb1f1b", size = 14408, upload-time = "2025-10-08T22:01:46.04Z" },
 ]
 
+[[package]]
+name = "typer"
+version = "0.21.1"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "click" },
+    { name = "rich" },
+    { name = "shellingham" },
+    { name = "typing-extensions" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/36/bf/8825b5929afd84d0dabd606c67cd57b8388cb3ec385f7ef19c5cc2202069/typer-0.21.1.tar.gz", hash = "sha256:ea835607cd752343b6b2b7ce676893e5a0324082268b48f27aa058bdb7d2145d", size = 110371, upload-time = "2026-01-06T11:21:10.989Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/a0/1d/d9257dd49ff2ca23ea5f132edf1281a0c4f9de8a762b9ae399b670a59235/typer-0.21.1-py3-none-any.whl", hash = "sha256:7985e89081c636b88d172c2ee0cfe33c253160994d47bdfdc302defd7d1f1d01", size = 47381, upload-time = "2026-01-06T11:21:09.824Z" },
+]
+
 [[package]]
 name = "types-pyyaml"
 version = "6.0.12.20250915"
@@ -811,3 +1978,105 @@ sdist = { url = "https://files.pythonhosted.org/packages/72/94/1a15dd82efb362ac8
 wheels = [
     { url = "https://files.pythonhosted.org/packages/18/67/36e9267722cc04a6b9f15c7f3441c2363321a3ea07da7ae0c0707beb2a9c/typing_extensions-4.15.0-py3-none-any.whl", hash = "sha256:f0fa19c6845758ab08074a0cfa8b7aecb71c999ca73d62883bc25cc018c4e548", size = 44614, upload-time = "2025-08-25T13:49:24.86Z" },
 ]
+
+[[package]]
+name = "typing-inspection"
+version = "0.4.2"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "typing-extensions" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/55/e3/70399cb7dd41c10ac53367ae42139cf4b1ca5f36bb3dc6c9d33acdb43655/typing_inspection-0.4.2.tar.gz", hash = "sha256:ba561c48a67c5958007083d386c3295464928b01faa735ab8547c5692e87f464", size = 75949, upload-time = "2025-10-01T02:14:41.687Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/dc/9b/47798a6c91d8bdb567fe2698fe81e0c6b7cb7ef4d13da4114b41d239f65d/typing_inspection-0.4.2-py3-none-any.whl", hash = "sha256:4ed1cacbdc298c220f1bd249ed5287caa16f34d44ef4e9c3d0cbad5b521545e7", size = 14611, upload-time = "2025-10-01T02:14:40.154Z" },
+]
+
+[[package]]
+name = "urllib3"
+version = "2.6.3"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/c7/24/5f1b3bdffd70275f6661c76461e25f024d5a38a46f04aaca912426a2b1d3/urllib3-2.6.3.tar.gz", hash = "sha256:1b62b6884944a57dbe321509ab94fd4d3b307075e0c2eae991ac71ee15ad38ed", size = 435556, upload-time = "2026-01-07T16:24:43.925Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/39/08/aaaad47bc4e9dc8c725e68f9d04865dbcb2052843ff09c97b08904852d84/urllib3-2.6.3-py3-none-any.whl", hash = "sha256:bf272323e553dfb2e87d9bfd225ca7b0f467b919d7bbd355436d3fd37cb0acd4", size = 131584, upload-time = "2026-01-07T16:24:42.685Z" },
+]
+
+[[package]]
+name = "uvicorn"
+version = "0.40.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "click" },
+    { name = "h11" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/c3/d1/8f3c683c9561a4e6689dd3b1d345c815f10f86acd044ee1fb9a4dcd0b8c5/uvicorn-0.40.0.tar.gz", hash = "sha256:839676675e87e73694518b5574fd0f24c9d97b46bea16df7b8c05ea1a51071ea", size = 81761, upload-time = "2025-12-21T14:16:22.45Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/3d/d8/2083a1daa7439a66f3a48589a57d576aa117726762618f6bb09fe3798796/uvicorn-0.40.0-py3-none-any.whl", hash = "sha256:c6c8f55bc8bf13eb6fa9ff87ad62308bbbc33d0b67f84293151efe87e0d5f2ee", size = 68502, upload-time = "2025-12-21T14:16:21.041Z" },
+]
+
+[[package]]
+name = "websockets"
+version = "16.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/04/24/4b2031d72e840ce4c1ccb255f693b15c334757fc50023e4db9537080b8c4/websockets-16.0.tar.gz", hash = "sha256:5f6261a5e56e8d5c42a4497b364ea24d94d9563e8fbd44e78ac40879c60179b5", size = 179346, upload-time = "2026-01-10T09:23:47.181Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/f2/db/de907251b4ff46ae804ad0409809504153b3f30984daf82a1d84a9875830/websockets-16.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:31a52addea25187bde0797a97d6fc3d2f92b6f72a9370792d65a6e84615ac8a8", size = 177340, upload-time = "2026-01-10T09:22:34.539Z" },
+    { url = "https://files.pythonhosted.org/packages/f3/fa/abe89019d8d8815c8781e90d697dec52523fb8ebe308bf11664e8de1877e/websockets-16.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:417b28978cdccab24f46400586d128366313e8a96312e4b9362a4af504f3bbad", size = 175022, upload-time = "2026-01-10T09:22:36.332Z" },
+    { url = "https://files.pythonhosted.org/packages/58/5d/88ea17ed1ded2079358b40d31d48abe90a73c9e5819dbcde1606e991e2ad/websockets-16.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:af80d74d4edfa3cb9ed973a0a5ba2b2a549371f8a741e0800cb07becdd20f23d", size = 175319, upload-time = "2026-01-10T09:22:37.602Z" },
+    { url = "https://files.pythonhosted.org/packages/d2/ae/0ee92b33087a33632f37a635e11e1d99d429d3d323329675a6022312aac2/websockets-16.0-cp311-cp311-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:08d7af67b64d29823fed316505a89b86705f2b7981c07848fb5e3ea3020c1abe", size = 184631, upload-time = "2026-01-10T09:22:38.789Z" },
+    { url = "https://files.pythonhosted.org/packages/c8/c5/27178df583b6c5b31b29f526ba2da5e2f864ecc79c99dae630a85d68c304/websockets-16.0-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:7be95cfb0a4dae143eaed2bcba8ac23f4892d8971311f1b06f3c6b78952ee70b", size = 185870, upload-time = "2026-01-10T09:22:39.893Z" },
+    { url = "https://files.pythonhosted.org/packages/87/05/536652aa84ddc1c018dbb7e2c4cbcd0db884580bf8e95aece7593fde526f/websockets-16.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:d6297ce39ce5c2e6feb13c1a996a2ded3b6832155fcfc920265c76f24c7cceb5", size = 185361, upload-time = "2026-01-10T09:22:41.016Z" },
+    { url = "https://files.pythonhosted.org/packages/6d/e2/d5332c90da12b1e01f06fb1b85c50cfc489783076547415bf9f0a659ec19/websockets-16.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:1c1b30e4f497b0b354057f3467f56244c603a79c0d1dafce1d16c283c25f6e64", size = 184615, upload-time = "2026-01-10T09:22:42.442Z" },
+    { url = "https://files.pythonhosted.org/packages/77/fb/d3f9576691cae9253b51555f841bc6600bf0a983a461c79500ace5a5b364/websockets-16.0-cp311-cp311-win32.whl", hash = "sha256:5f451484aeb5cafee1ccf789b1b66f535409d038c56966d6101740c1614b86c6", size = 178246, upload-time = "2026-01-10T09:22:43.654Z" },
+    { url = "https://files.pythonhosted.org/packages/54/67/eaff76b3dbaf18dcddabc3b8c1dba50b483761cccff67793897945b37408/websockets-16.0-cp311-cp311-win_amd64.whl", hash = "sha256:8d7f0659570eefb578dacde98e24fb60af35350193e4f56e11190787bee77dac", size = 178684, upload-time = "2026-01-10T09:22:44.941Z" },
+    { url = "https://files.pythonhosted.org/packages/84/7b/bac442e6b96c9d25092695578dda82403c77936104b5682307bd4deb1ad4/websockets-16.0-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:71c989cbf3254fbd5e84d3bff31e4da39c43f884e64f2551d14bb3c186230f00", size = 177365, upload-time = "2026-01-10T09:22:46.787Z" },
+    { url = "https://files.pythonhosted.org/packages/b0/fe/136ccece61bd690d9c1f715baaeefd953bb2360134de73519d5df19d29ca/websockets-16.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:8b6e209ffee39ff1b6d0fa7bfef6de950c60dfb91b8fcead17da4ee539121a79", size = 175038, upload-time = "2026-01-10T09:22:47.999Z" },
+    { url = "https://files.pythonhosted.org/packages/40/1e/9771421ac2286eaab95b8575b0cb701ae3663abf8b5e1f64f1fd90d0a673/websockets-16.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:86890e837d61574c92a97496d590968b23c2ef0aeb8a9bc9421d174cd378ae39", size = 175328, upload-time = "2026-01-10T09:22:49.809Z" },
+    { url = "https://files.pythonhosted.org/packages/18/29/71729b4671f21e1eaa5d6573031ab810ad2936c8175f03f97f3ff164c802/websockets-16.0-cp312-cp312-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:9b5aca38b67492ef518a8ab76851862488a478602229112c4b0d58d63a7a4d5c", size = 184915, upload-time = "2026-01-10T09:22:51.071Z" },
+    { url = "https://files.pythonhosted.org/packages/97/bb/21c36b7dbbafc85d2d480cd65df02a1dc93bf76d97147605a8e27ff9409d/websockets-16.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:e0334872c0a37b606418ac52f6ab9cfd17317ac26365f7f65e203e2d0d0d359f", size = 186152, upload-time = "2026-01-10T09:22:52.224Z" },
+    { url = "https://files.pythonhosted.org/packages/4a/34/9bf8df0c0cf88fa7bfe36678dc7b02970c9a7d5e065a3099292db87b1be2/websockets-16.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:a0b31e0b424cc6b5a04b8838bbaec1688834b2383256688cf47eb97412531da1", size = 185583, upload-time = "2026-01-10T09:22:53.443Z" },
+    { url = "https://files.pythonhosted.org/packages/47/88/4dd516068e1a3d6ab3c7c183288404cd424a9a02d585efbac226cb61ff2d/websockets-16.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:485c49116d0af10ac698623c513c1cc01c9446c058a4e61e3bf6c19dff7335a2", size = 184880, upload-time = "2026-01-10T09:22:55.033Z" },
+    { url = "https://files.pythonhosted.org/packages/91/d6/7d4553ad4bf1c0421e1ebd4b18de5d9098383b5caa1d937b63df8d04b565/websockets-16.0-cp312-cp312-win32.whl", hash = "sha256:eaded469f5e5b7294e2bdca0ab06becb6756ea86894a47806456089298813c89", size = 178261, upload-time = "2026-01-10T09:22:56.251Z" },
+    { url = "https://files.pythonhosted.org/packages/c3/f0/f3a17365441ed1c27f850a80b2bc680a0fa9505d733fe152fdf5e98c1c0b/websockets-16.0-cp312-cp312-win_amd64.whl", hash = "sha256:5569417dc80977fc8c2d43a86f78e0a5a22fee17565d78621b6bb264a115d4ea", size = 178693, upload-time = "2026-01-10T09:22:57.478Z" },
+    { url = "https://files.pythonhosted.org/packages/cc/9c/baa8456050d1c1b08dd0ec7346026668cbc6f145ab4e314d707bb845bf0d/websockets-16.0-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:878b336ac47938b474c8f982ac2f7266a540adc3fa4ad74ae96fea9823a02cc9", size = 177364, upload-time = "2026-01-10T09:22:59.333Z" },
+    { url = "https://files.pythonhosted.org/packages/7e/0c/8811fc53e9bcff68fe7de2bcbe75116a8d959ac699a3200f4847a8925210/websockets-16.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:52a0fec0e6c8d9a784c2c78276a48a2bdf099e4ccc2a4cad53b27718dbfd0230", size = 175039, upload-time = "2026-01-10T09:23:01.171Z" },
+    { url = "https://files.pythonhosted.org/packages/aa/82/39a5f910cb99ec0b59e482971238c845af9220d3ab9fa76dd9162cda9d62/websockets-16.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:e6578ed5b6981005df1860a56e3617f14a6c307e6a71b4fff8c48fdc50f3ed2c", size = 175323, upload-time = "2026-01-10T09:23:02.341Z" },
+    { url = "https://files.pythonhosted.org/packages/bd/28/0a25ee5342eb5d5f297d992a77e56892ecb65e7854c7898fb7d35e9b33bd/websockets-16.0-cp313-cp313-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:95724e638f0f9c350bb1c2b0a7ad0e83d9cc0c9259f3ea94e40d7b02a2179ae5", size = 184975, upload-time = "2026-01-10T09:23:03.756Z" },
+    { url = "https://files.pythonhosted.org/packages/f9/66/27ea52741752f5107c2e41fda05e8395a682a1e11c4e592a809a90c6a506/websockets-16.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c0204dc62a89dc9d50d682412c10b3542d748260d743500a85c13cd1ee4bde82", size = 186203, upload-time = "2026-01-10T09:23:05.01Z" },
+    { url = "https://files.pythonhosted.org/packages/37/e5/8e32857371406a757816a2b471939d51c463509be73fa538216ea52b792a/websockets-16.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:52ac480f44d32970d66763115edea932f1c5b1312de36df06d6b219f6741eed8", size = 185653, upload-time = "2026-01-10T09:23:06.301Z" },
+    { url = "https://files.pythonhosted.org/packages/9b/67/f926bac29882894669368dc73f4da900fcdf47955d0a0185d60103df5737/websockets-16.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:6e5a82b677f8f6f59e8dfc34ec06ca6b5b48bc4fcda346acd093694cc2c24d8f", size = 184920, upload-time = "2026-01-10T09:23:07.492Z" },
+    { url = "https://files.pythonhosted.org/packages/3c/a1/3d6ccdcd125b0a42a311bcd15a7f705d688f73b2a22d8cf1c0875d35d34a/websockets-16.0-cp313-cp313-win32.whl", hash = "sha256:abf050a199613f64c886ea10f38b47770a65154dc37181bfaff70c160f45315a", size = 178255, upload-time = "2026-01-10T09:23:09.245Z" },
+    { url = "https://files.pythonhosted.org/packages/6b/ae/90366304d7c2ce80f9b826096a9e9048b4bb760e44d3b873bb272cba696b/websockets-16.0-cp313-cp313-win_amd64.whl", hash = "sha256:3425ac5cf448801335d6fdc7ae1eb22072055417a96cc6b31b3861f455fbc156", size = 178689, upload-time = "2026-01-10T09:23:10.483Z" },
+    { url = "https://files.pythonhosted.org/packages/f3/1d/e88022630271f5bd349ed82417136281931e558d628dd52c4d8621b4a0b2/websockets-16.0-cp314-cp314-macosx_10_15_universal2.whl", hash = "sha256:8cc451a50f2aee53042ac52d2d053d08bf89bcb31ae799cb4487587661c038a0", size = 177406, upload-time = "2026-01-10T09:23:12.178Z" },
+    { url = "https://files.pythonhosted.org/packages/f2/78/e63be1bf0724eeb4616efb1ae1c9044f7c3953b7957799abb5915bffd38e/websockets-16.0-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:daa3b6ff70a9241cf6c7fc9e949d41232d9d7d26fd3522b1ad2b4d62487e9904", size = 175085, upload-time = "2026-01-10T09:23:13.511Z" },
+    { url = "https://files.pythonhosted.org/packages/bb/f4/d3c9220d818ee955ae390cf319a7c7a467beceb24f05ee7aaaa2414345ba/websockets-16.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:fd3cb4adb94a2a6e2b7c0d8d05cb94e6f1c81a0cf9dc2694fb65c7e8d94c42e4", size = 175328, upload-time = "2026-01-10T09:23:14.727Z" },
+    { url = "https://files.pythonhosted.org/packages/63/bc/d3e208028de777087e6fb2b122051a6ff7bbcca0d6df9d9c2bf1dd869ae9/websockets-16.0-cp314-cp314-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:781caf5e8eee67f663126490c2f96f40906594cb86b408a703630f95550a8c3e", size = 185044, upload-time = "2026-01-10T09:23:15.939Z" },
+    { url = "https://files.pythonhosted.org/packages/ad/6e/9a0927ac24bd33a0a9af834d89e0abc7cfd8e13bed17a86407a66773cc0e/websockets-16.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:caab51a72c51973ca21fa8a18bd8165e1a0183f1ac7066a182ff27107b71e1a4", size = 186279, upload-time = "2026-01-10T09:23:17.148Z" },
+    { url = "https://files.pythonhosted.org/packages/b9/ca/bf1c68440d7a868180e11be653c85959502efd3a709323230314fda6e0b3/websockets-16.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:19c4dc84098e523fd63711e563077d39e90ec6702aff4b5d9e344a60cb3c0cb1", size = 185711, upload-time = "2026-01-10T09:23:18.372Z" },
+    { url = "https://files.pythonhosted.org/packages/c4/f8/fdc34643a989561f217bb477cbc47a3a07212cbda91c0e4389c43c296ebf/websockets-16.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:a5e18a238a2b2249c9a9235466b90e96ae4795672598a58772dd806edc7ac6d3", size = 184982, upload-time = "2026-01-10T09:23:19.652Z" },
+    { url = "https://files.pythonhosted.org/packages/dd/d1/574fa27e233764dbac9c52730d63fcf2823b16f0856b3329fc6268d6ae4f/websockets-16.0-cp314-cp314-win32.whl", hash = "sha256:a069d734c4a043182729edd3e9f247c3b2a4035415a9172fd0f1b71658a320a8", size = 177915, upload-time = "2026-01-10T09:23:21.458Z" },
+    { url = "https://files.pythonhosted.org/packages/8a/f1/ae6b937bf3126b5134ce1f482365fde31a357c784ac51852978768b5eff4/websockets-16.0-cp314-cp314-win_amd64.whl", hash = "sha256:c0ee0e63f23914732c6d7e0cce24915c48f3f1512ec1d079ed01fc629dab269d", size = 178381, upload-time = "2026-01-10T09:23:22.715Z" },
+    { url = "https://files.pythonhosted.org/packages/06/9b/f791d1db48403e1f0a27577a6beb37afae94254a8c6f08be4a23e4930bc0/websockets-16.0-cp314-cp314t-macosx_10_15_universal2.whl", hash = "sha256:a35539cacc3febb22b8f4d4a99cc79b104226a756aa7400adc722e83b0d03244", size = 177737, upload-time = "2026-01-10T09:23:24.523Z" },
+    { url = "https://files.pythonhosted.org/packages/bd/40/53ad02341fa33b3ce489023f635367a4ac98b73570102ad2cdd770dacc9a/websockets-16.0-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:b784ca5de850f4ce93ec85d3269d24d4c82f22b7212023c974c401d4980ebc5e", size = 175268, upload-time = "2026-01-10T09:23:25.781Z" },
+    { url = "https://files.pythonhosted.org/packages/74/9b/6158d4e459b984f949dcbbb0c5d270154c7618e11c01029b9bbd1bb4c4f9/websockets-16.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:569d01a4e7fba956c5ae4fc988f0d4e187900f5497ce46339c996dbf24f17641", size = 175486, upload-time = "2026-01-10T09:23:27.033Z" },
+    { url = "https://files.pythonhosted.org/packages/e5/2d/7583b30208b639c8090206f95073646c2c9ffd66f44df967981a64f849ad/websockets-16.0-cp314-cp314t-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:50f23cdd8343b984957e4077839841146f67a3d31ab0d00e6b824e74c5b2f6e8", size = 185331, upload-time = "2026-01-10T09:23:28.259Z" },
+    { url = "https://files.pythonhosted.org/packages/45/b0/cce3784eb519b7b5ad680d14b9673a31ab8dcb7aad8b64d81709d2430aa8/websockets-16.0-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:152284a83a00c59b759697b7f9e9cddf4e3c7861dd0d964b472b70f78f89e80e", size = 186501, upload-time = "2026-01-10T09:23:29.449Z" },
+    { url = "https://files.pythonhosted.org/packages/19/60/b8ebe4c7e89fb5f6cdf080623c9d92789a53636950f7abacfc33fe2b3135/websockets-16.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:bc59589ab64b0022385f429b94697348a6a234e8ce22544e3681b2e9331b5944", size = 186062, upload-time = "2026-01-10T09:23:31.368Z" },
+    { url = "https://files.pythonhosted.org/packages/88/a8/a080593f89b0138b6cba1b28f8df5673b5506f72879322288b031337c0b8/websockets-16.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:32da954ffa2814258030e5a57bc73a3635463238e797c7375dc8091327434206", size = 185356, upload-time = "2026-01-10T09:23:32.627Z" },
+    { url = "https://files.pythonhosted.org/packages/c2/b6/b9afed2afadddaf5ebb2afa801abf4b0868f42f8539bfe4b071b5266c9fe/websockets-16.0-cp314-cp314t-win32.whl", hash = "sha256:5a4b4cc550cb665dd8a47f868c8d04c8230f857363ad3c9caf7a0c3bf8c61ca6", size = 178085, upload-time = "2026-01-10T09:23:33.816Z" },
+    { url = "https://files.pythonhosted.org/packages/9f/3e/28135a24e384493fa804216b79a6a6759a38cc4ff59118787b9fb693df93/websockets-16.0-cp314-cp314t-win_amd64.whl", hash = "sha256:b14dc141ed6d2dde437cddb216004bcac6a1df0935d79656387bd41632ba0bbd", size = 178531, upload-time = "2026-01-10T09:23:35.016Z" },
+    { url = "https://files.pythonhosted.org/packages/72/07/c98a68571dcf256e74f1f816b8cc5eae6eb2d3d5cfa44d37f801619d9166/websockets-16.0-pp311-pypy311_pp73-macosx_10_15_x86_64.whl", hash = "sha256:349f83cd6c9a415428ee1005cadb5c2c56f4389bc06a9af16103c3bc3dcc8b7d", size = 174947, upload-time = "2026-01-10T09:23:36.166Z" },
+    { url = "https://files.pythonhosted.org/packages/7e/52/93e166a81e0305b33fe416338be92ae863563fe7bce446b0f687b9df5aea/websockets-16.0-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:4a1aba3340a8dca8db6eb5a7986157f52eb9e436b74813764241981ca4888f03", size = 175260, upload-time = "2026-01-10T09:23:37.409Z" },
+    { url = "https://files.pythonhosted.org/packages/56/0c/2dbf513bafd24889d33de2ff0368190a0e69f37bcfa19009ef819fe4d507/websockets-16.0-pp311-pypy311_pp73-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:f4a32d1bd841d4bcbffdcb3d2ce50c09c3909fbead375ab28d0181af89fd04da", size = 176071, upload-time = "2026-01-10T09:23:39.158Z" },
+    { url = "https://files.pythonhosted.org/packages/a5/8f/aea9c71cc92bf9b6cc0f7f70df8f0b420636b6c96ef4feee1e16f80f75dd/websockets-16.0-pp311-pypy311_pp73-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:0298d07ee155e2e9fda5be8a9042200dd2e3bb0b8a38482156576f863a9d457c", size = 176968, upload-time = "2026-01-10T09:23:41.031Z" },
+    { url = "https://files.pythonhosted.org/packages/9a/3f/f70e03f40ffc9a30d817eef7da1be72ee4956ba8d7255c399a01b135902a/websockets-16.0-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:a653aea902e0324b52f1613332ddf50b00c06fdaf7e92624fbf8c77c78fa5767", size = 178735, upload-time = "2026-01-10T09:23:42.259Z" },
+    { url = "https://files.pythonhosted.org/packages/6f/28/258ebab549c2bf3e64d2b0217b973467394a9cea8c42f70418ca2c5d0d2e/websockets-16.0-py3-none-any.whl", hash = "sha256:1637db62fad1dc833276dded54215f2c7fa46912301a24bd94d45d46a011ceec", size = 171598, upload-time = "2026-01-10T09:23:45.395Z" },
+]
+
+[[package]]
+name = "zipp"
+version = "3.23.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/e3/02/0f2892c661036d50ede074e376733dca2ae7c6eb617489437771209d4180/zipp-3.23.0.tar.gz", hash = "sha256:a07157588a12518c9d4034df3fbbee09c814741a33ff63c05fa29d26a2404166", size = 25547, upload-time = "2025-06-08T17:06:39.4Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/2e/54/647ade08bf0db230bfea292f893923872fd20be6ac6f53b2b936ba839d75/zipp-3.23.0-py3-none-any.whl", hash = "sha256:071652d6115ed432f5ce1d34c336c0adfd6a884660d1e9712a256d3d3bd4b14e", size = 10276, upload-time = "2025-06-08T17:06:38.034Z" },
+]

From 9b633b09b4b1548ece0aa295c45aa75da15e40cc Mon Sep 17 00:00:00 2001
From: Noah Horton <noah@unsupervised.com>
Date: Tue, 3 Feb 2026 13:28:27 -0700
Subject: [PATCH 03/45] mcp loads now

---
 .claude/settings.json                         |  11 -
 .../add_platform.add_capabilities/SKILL.md    | 216 ------
 .../skills/add_platform.implement/SKILL.md    | 321 --------
 .claude/skills/add_platform.research/SKILL.md | 265 -------
 .claude/skills/add_platform.verify/SKILL.md   | 189 -----
 .claude/skills/add_platform/SKILL.md          |  83 --
 .../skills/commit.commit_and_push/SKILL.md    | 179 -----
 .claude/skills/commit.lint/SKILL.md           | 158 ----
 .claude/skills/commit.review/SKILL.md         | 139 ----
 .claude/skills/commit.test/SKILL.md           | 138 ----
 .claude/skills/commit/SKILL.md                |  79 --
 .claude/skills/deepwork_jobs.define/SKILL.md  | 724 ------------------
 .../skills/deepwork_jobs.implement/SKILL.md   | 267 -------
 .claude/skills/deepwork_jobs.learn/SKILL.md   | 449 -----------
 .../deepwork_jobs.review_job_spec/SKILL.md    | 496 ------------
 .claude/skills/deepwork_jobs/SKILL.md         |  84 --
 .claude/skills/update.job/SKILL.md            | 145 ----
 .claude/skills/update/SKILL.md                |  73 --
 .../skills/add_platform/add_capabilities.toml | 210 -----
 .gemini/skills/add_platform/implement.toml    | 305 --------
 .gemini/skills/add_platform/index.toml        |  75 --
 .gemini/skills/add_platform/research.toml     | 259 -------
 .gemini/skills/add_platform/verify.toml       | 183 -----
 .gemini/skills/commit/commit_and_push.toml    | 164 ----
 .gemini/skills/commit/index.toml              |  71 --
 .gemini/skills/commit/lint.toml               | 143 ----
 .gemini/skills/commit/review.toml             | 124 ---
 .gemini/skills/commit/test.toml               | 123 ---
 .gemini/skills/deepwork_jobs/define.toml      | 537 -------------
 .gemini/skills/deepwork_jobs/implement.toml   | 252 ------
 .gemini/skills/deepwork_jobs/index.toml       |  69 --
 .gemini/skills/deepwork_jobs/learn.toml       | 437 -----------
 .../skills/deepwork_jobs/review_job_spec.toml | 300 --------
 .gemini/skills/update/index.toml              |  63 --
 .gemini/skills/update/job.toml                | 141 ----
 .mcp.json                                     |  12 +
 src/deepwork/core/adapters.py                 |  76 +-
 37 files changed, 64 insertions(+), 7496 deletions(-)
 delete mode 100644 .claude/skills/add_platform.add_capabilities/SKILL.md
 delete mode 100644 .claude/skills/add_platform.implement/SKILL.md
 delete mode 100644 .claude/skills/add_platform.research/SKILL.md
 delete mode 100644 .claude/skills/add_platform.verify/SKILL.md
 delete mode 100644 .claude/skills/add_platform/SKILL.md
 delete mode 100644 .claude/skills/commit.commit_and_push/SKILL.md
 delete mode 100644 .claude/skills/commit.lint/SKILL.md
 delete mode 100644 .claude/skills/commit.review/SKILL.md
 delete mode 100644 .claude/skills/commit.test/SKILL.md
 delete mode 100644 .claude/skills/commit/SKILL.md
 delete mode 100644 .claude/skills/deepwork_jobs.define/SKILL.md
 delete mode 100644 .claude/skills/deepwork_jobs.implement/SKILL.md
 delete mode 100644 .claude/skills/deepwork_jobs.learn/SKILL.md
 delete mode 100644 .claude/skills/deepwork_jobs.review_job_spec/SKILL.md
 delete mode 100644 .claude/skills/deepwork_jobs/SKILL.md
 delete mode 100644 .claude/skills/update.job/SKILL.md
 delete mode 100644 .claude/skills/update/SKILL.md
 delete mode 100644 .gemini/skills/add_platform/add_capabilities.toml
 delete mode 100644 .gemini/skills/add_platform/implement.toml
 delete mode 100644 .gemini/skills/add_platform/index.toml
 delete mode 100644 .gemini/skills/add_platform/research.toml
 delete mode 100644 .gemini/skills/add_platform/verify.toml
 delete mode 100644 .gemini/skills/commit/commit_and_push.toml
 delete mode 100644 .gemini/skills/commit/index.toml
 delete mode 100644 .gemini/skills/commit/lint.toml
 delete mode 100644 .gemini/skills/commit/review.toml
 delete mode 100644 .gemini/skills/commit/test.toml
 delete mode 100644 .gemini/skills/deepwork_jobs/define.toml
 delete mode 100644 .gemini/skills/deepwork_jobs/implement.toml
 delete mode 100644 .gemini/skills/deepwork_jobs/index.toml
 delete mode 100644 .gemini/skills/deepwork_jobs/learn.toml
 delete mode 100644 .gemini/skills/deepwork_jobs/review_job_spec.toml
 delete mode 100644 .gemini/skills/update/index.toml
 delete mode 100644 .gemini/skills/update/job.toml
 create mode 100644 .mcp.json

diff --git a/.claude/settings.json b/.claude/settings.json
index cfc707c0..36dc7bc8 100644
--- a/.claude/settings.json
+++ b/.claude/settings.json
@@ -148,16 +148,5 @@
         ]
       }
     ]
-  },
-  "mcpServers": {
-    "deepwork": {
-      "command": "deepwork",
-      "args": [
-        "serve",
-        "--path",
-        "."
-      ],
-      "transport": "stdio"
-    }
   }
 }
\ No newline at end of file
diff --git a/.claude/skills/add_platform.add_capabilities/SKILL.md b/.claude/skills/add_platform.add_capabilities/SKILL.md
deleted file mode 100644
index 17359135..00000000
--- a/.claude/skills/add_platform.add_capabilities/SKILL.md
+++ /dev/null
@@ -1,216 +0,0 @@
----
-name: add_platform.add_capabilities
-description: "Updates job schema and adapters with any new hook events the platform supports. Use after research to extend DeepWork's hook system."
-user-invocable: false
-
----
-
-# add_platform.add_capabilities
-
-**Step 2/4** in **integrate** workflow
-
-> Full workflow to integrate a new AI platform into DeepWork
-
-> Adds a new AI platform to DeepWork with adapter, templates, and tests. Use when integrating Cursor, Windsurf, or other AI coding tools.
-
-## Prerequisites (Verify First)
-
-Before proceeding, confirm these steps are complete:
-- `/add_platform.research`
-
-## Instructions
-
-**Goal**: Updates job schema and adapters with any new hook events the platform supports. Use after research to extend DeepWork's hook system.
-
-# Add Hook Capabilities
-
-## Objective
-
-Update the DeepWork job schema and platform adapters to support any new hook events that the new platform provides for slash command definitions.
-
-## Task
-
-Analyze the hooks documentation from the research step and update the codebase to support any new hook capabilities, ensuring consistency across all existing adapters.
-
-### Prerequisites
-
-Read the hooks documentation created in the previous step:
-- `doc/platforms/<platform_name>/hooks_system.md`
-
-Also review the existing schema and adapters:
-- `src/deepwork/schemas/job_schema.py`
-- `src/deepwork/adapters.py`
-
-### Process
-
-1. **Analyze the new platform's hooks**
-   - Read `doc/platforms/<platform_name>/hooks_system.md`
-   - List all hooks available for slash command definitions
-   - Compare with hooks already in `job_schema.py`
-   - Identify any NEW hooks not currently supported
-
-2. **Determine if schema changes are needed**
-   - If the platform has hooks that DeepWork doesn't currently support, add them
-   - If all hooks are already supported, document this finding
-   - Remember: Only add hooks that are available on slash command definitions
-
-3. **Update job_schema.py (if needed)**
-   - Add new hook fields to the step schema
-   - Follow existing patterns for hook definitions
-   - Add appropriate type hints and documentation
-   - Example addition:
-     ```python
-     # New hook from <platform>
-     new_hook_name: Optional[List[HookConfig]] = None
-     ```
-
-4. **Update all existing adapters**
-   - Open `src/deepwork/adapters.py`
-   - For EACH existing adapter class:
-     - Add the new hook field (set to `None` if not supported)
-     - This maintains consistency across all adapters
-   - Document why each adapter does or doesn't support the hook
-
-5. **Validate the changes**
-   - Run Python syntax check: `python -m py_compile src/deepwork/schemas/job_schema.py`
-   - Run Python syntax check: `python -m py_compile src/deepwork/adapters.py`
-   - Ensure no import errors
-
-6. **Document the decision**
-   - If no new hooks were added, add a comment explaining why
-   - If new hooks were added, ensure they're documented in the schema
-
-## Output Format
-
-### job_schema.py
-
-Location: `src/deepwork/schemas/job_schema.py`
-
-If new hooks are added:
-```python
-@dataclass
-class StepDefinition:
-    # ... existing fields ...
-
-    # New hook from <platform_name> - [description of what it does]
-    new_hook_name: Optional[List[HookConfig]] = None
-```
-
-### adapters.py
-
-Location: `src/deepwork/adapters.py`
-
-For each existing adapter, add the new hook field:
-```python
-class ExistingPlatformAdapter(PlatformAdapter):
-    # ... existing code ...
-
-    def get_hook_support(self) -> dict:
-        return {
-            # ... existing hooks ...
-            "new_hook_name": None,  # Not supported by this platform
-        }
-```
-
-Or if no changes are needed, add a documentation comment:
-```python
-# NOTE: <platform_name> hooks reviewed on YYYY-MM-DD
-# No new hooks to add - all <platform_name> command hooks are already
-# supported by the existing schema (stop_hooks covers their validation pattern)
-```
-
-## Quality Criteria
-
-- Hooks documentation from research step has been reviewed
-- If new hooks exist:
-  - Added to `src/deepwork/schemas/job_schema.py` with proper typing
-  - ALL existing adapters updated in `src/deepwork/adapters.py`
-  - Each adapter indicates support level (implemented, None, or partial)
-- If no new hooks needed:
-  - Decision documented with a comment explaining the analysis
-- Only hooks available on slash command definitions are considered
-- `job_schema.py` has no syntax errors (verified with py_compile)
-- `adapters.py` has no syntax errors (verified with py_compile)
-- All adapters have consistent hook fields (same fields across all adapters)
-- When all criteria are met, include `<promise>✓ Quality Criteria Met</promise>` in your response
-
-## Context
-
-DeepWork supports multiple AI platforms, and each platform may have different capabilities for hooks within command definitions. The schema defines what hooks CAN exist, while adapters define what each platform actually SUPPORTS.
-
-This separation allows:
-- Job definitions to use any hook (the schema is the superset)
-- Platform-specific generation to only use supported hooks (adapters filter)
-- Future platforms to add new hooks without breaking existing ones
-
-Maintaining consistency is critical - all adapters must have the same hook fields, even if they don't support them (use `None` for unsupported).
-
-## Common Hook Types
-
-For reference, here are common hook patterns across platforms:
-
-| Hook Type | Purpose | Example Platforms |
-|-----------|---------|-------------------|
-| `stop_hooks` | Quality validation loops | Claude Code |
-| `pre_hooks` | Run before command | Various |
-| `post_hooks` | Run after command | Various |
-| `validation_hooks` | Validate inputs/outputs | Various |
-
-When you find a new hook type, consider whether it maps to an existing pattern or is genuinely new functionality.
-
-
-### Job Context
-
-A workflow for adding support for a new AI platform (like Cursor, Windsurf, etc.) to DeepWork.
-
-The **integrate** workflow guides you through four phases:
-1. **Research**: Capture the platform's CLI configuration and hooks system documentation
-2. **Add Capabilities**: Update the job schema and adapters with any new hook events
-3. **Implement**: Create the platform adapter, templates, tests (100% coverage), and README updates
-4. **Verify**: Ensure installation works correctly and produces expected files
-
-The workflow ensures consistency across all supported platforms and maintains
-comprehensive test coverage for new functionality.
-
-**Important Notes**:
-- Only hooks available on slash command definitions should be captured
-- Each existing adapter must be updated when new hooks are added (typically with null values)
-- Tests must achieve 100% coverage for any new functionality
-- Installation verification confirms the platform integrates correctly with existing jobs
-
-
-## Required Inputs
-
-
-**Files from Previous Steps** - Read these first:
-- `hooks_system.md` (from `research`)
-
-## Work Branch
-
-Use branch format: `deepwork/add_platform-[instance]-YYYYMMDD`
-
-- If on a matching work branch: continue using it
-- If on main/master: create new branch with `git checkout -b deepwork/add_platform-[instance]-$(date +%Y%m%d)`
-
-## Outputs
-
-**Required outputs**:
-- `job_schema.py`
-- `adapters.py`
-
-## Guardrails
-
-- Do NOT skip prerequisite verification if this step has dependencies
-- Do NOT produce partial outputs; complete all required outputs before finishing
-- Do NOT proceed without required inputs; ask the user if any are missing
-- Do NOT modify files outside the scope of this step's defined outputs
-
-## On Completion
-
-1. Verify outputs are created
-2. Inform user: "integrate step 2/4 complete, outputs: job_schema.py, adapters.py"
-3. **Continue workflow**: Use Skill tool to invoke `/add_platform.implement`
-
----
-
-**Reference files**: `.deepwork/jobs/add_platform/job.yml`, `.deepwork/jobs/add_platform/steps/add_capabilities.md`
\ No newline at end of file
diff --git a/.claude/skills/add_platform.implement/SKILL.md b/.claude/skills/add_platform.implement/SKILL.md
deleted file mode 100644
index 5d925402..00000000
--- a/.claude/skills/add_platform.implement/SKILL.md
+++ /dev/null
@@ -1,321 +0,0 @@
----
-name: add_platform.implement
-description: "Creates platform adapter, templates, tests with 100% coverage, and README documentation. Use after adding hook capabilities."
-user-invocable: false
-hooks:
-  Stop:
-    - hooks:
-        - type: command
-          command: ".deepwork/jobs/add_platform/hooks/run_tests.sh"
-  SubagentStop:
-    - hooks:
-        - type: command
-          command: ".deepwork/jobs/add_platform/hooks/run_tests.sh"
-
----
-
-# add_platform.implement
-
-**Step 3/4** in **integrate** workflow
-
-> Full workflow to integrate a new AI platform into DeepWork
-
-> Adds a new AI platform to DeepWork with adapter, templates, and tests. Use when integrating Cursor, Windsurf, or other AI coding tools.
-
-## Prerequisites (Verify First)
-
-Before proceeding, confirm these steps are complete:
-- `/add_platform.research`
-- `/add_platform.add_capabilities`
-
-## Instructions
-
-**Goal**: Creates platform adapter, templates, tests with 100% coverage, and README documentation. Use after adding hook capabilities.
-
-# Implement Platform Support
-
-## Objective
-
-Create the complete platform implementation including the adapter class, command templates, comprehensive tests, and documentation updates.
-
-## Task
-
-Build the full platform support by implementing the adapter, creating templates, writing tests with 100% coverage, and updating the README.
-
-### Prerequisites
-
-Read the outputs from previous steps:
-- `doc/platforms/<platform_name>/cli_configuration.md` - For template structure
-- `src/deepwork/schemas/job_schema.py` - For current schema
-- `src/deepwork/adapters.py` - For adapter patterns
-
-Also review existing implementations for reference:
-- `src/deepwork/templates/claude/` - Example templates
-- `tests/` - Existing test patterns
-
-### Process
-
-1. **Create the platform adapter class**
-
-   Add a new adapter class to `src/deepwork/adapters.py`:
-
-   ```python
-   class NewPlatformAdapter(PlatformAdapter):
-       """Adapter for <Platform Name>."""
-
-       platform_name = "<platform_name>"
-       command_directory = "<path to commands>"  # e.g., ".cursor/commands"
-       command_extension = ".md"  # or appropriate extension
-
-       def get_hook_support(self) -> dict:
-           """Return which hooks this platform supports."""
-           return {
-               "stop_hooks": True,  # or False/None
-               # ... other hooks
-           }
-
-       def generate_command(self, step: StepDefinition, job: JobDefinition) -> str:
-           """Generate command file content for this platform."""
-           # Use Jinja2 template
-           template = self.env.get_template(f"{self.platform_name}/command.md.j2")
-           return template.render(step=step, job=job)
-   ```
-
-2. **Create command templates**
-
-   Create templates in `src/deepwork/templates/<platform_name>/`:
-
-   - `command.md.j2` - Main command template
-   - Any other templates needed for the platform's format
-
-   Use the CLI configuration documentation to ensure the template matches the platform's expected format.
-
-3. **Register the adapter**
-
-   Update the adapter registry in `src/deepwork/adapters.py`:
-
-   ```python
-   PLATFORM_ADAPTERS = {
-       "claude": ClaudeAdapter,
-       "<platform_name>": NewPlatformAdapter,
-       # ... other adapters
-   }
-   ```
-
-4. **Write comprehensive tests**
-
-   Create tests in `tests/` that cover:
-
-   - Adapter instantiation
-   - Hook support detection
-   - Command generation
-   - Template rendering
-   - Edge cases (empty inputs, special characters, etc.)
-   - Integration with the sync command
-
-   **Critical**: Tests must achieve 100% coverage of new code.
-
-5. **Update README.md**
-
-   Add the new platform to `README.md`:
-
-   - Add to "Supported Platforms" list
-   - Add installation instructions:
-     ```bash
-     deepwork install --platform <platform_name>
-     ```
-   - Document any platform-specific notes or limitations
-
-6. **Run tests and verify coverage**
-
-   ```bash
-   uv run pytest --cov=src/deepwork --cov-report=term-missing
-   ```
-
-   - All tests must pass
-   - New code must have 100% coverage
-   - If coverage is below 100%, add more tests
-
-7. **Iterate until tests pass with full coverage**
-
-   This step has a `stop_hooks` script that runs tests. Keep iterating until:
-   - All tests pass
-   - Coverage is 100% for new functionality
-
-## Output Format
-
-### templates/
-
-Location: `src/deepwork/templates/<platform_name>/`
-
-Create the following files:
-
-**command.md.j2**:
-```jinja2
-{# Template for <platform_name> command files #}
-{# Follows the platform's expected format from cli_configuration.md #}
-
-[Platform-specific frontmatter or metadata]
-
-# {{ step.name }}
-
-{{ step.description }}
-
-## Instructions
-
-{{ step.instructions_content }}
-
-[... rest of template based on platform format ...]
-```
-
-### tests/
-
-Location: `tests/test_<platform_name>_adapter.py`
-
-```python
-"""Tests for the <platform_name> adapter."""
-import pytest
-from deepwork.adapters import NewPlatformAdapter
-
-class TestNewPlatformAdapter:
-    """Test suite for NewPlatformAdapter."""
-
-    def test_adapter_initialization(self):
-        """Test adapter can be instantiated."""
-        adapter = NewPlatformAdapter()
-        assert adapter.platform_name == "<platform_name>"
-
-    def test_hook_support(self):
-        """Test hook support detection."""
-        adapter = NewPlatformAdapter()
-        hooks = adapter.get_hook_support()
-        assert "stop_hooks" in hooks
-        # ... more assertions
-
-    def test_command_generation(self):
-        """Test command file generation."""
-        # ... test implementation
-
-    # ... more tests for 100% coverage
-```
-
-### README.md
-
-Add to the existing README.md:
-
-```markdown
-## Supported Platforms
-
-- **Claude Code** - Anthropic's CLI for Claude
-- **<Platform Name>** - [Brief description]
-
-## Installation
-
-### <Platform Name>
-
-```bash
-deepwork install --platform <platform_name>
-```
-
-[Any platform-specific notes]
-```
-
-## Quality Criteria
-
-- Platform adapter class added to `src/deepwork/adapters.py`:
-  - Inherits from `PlatformAdapter`
-  - Implements all required methods
-  - Registered in `PLATFORM_ADAPTERS`
-- Templates created in `src/deepwork/templates/<platform_name>/`:
-  - `command.md.j2` exists and renders correctly
-  - Format matches platform's expected command format
-- Tests created in `tests/`:
-  - Cover all new adapter functionality
-  - Cover template rendering
-  - All tests pass
-- Test coverage is 100% for new code:
-  - Run `uv run pytest --cov=src/deepwork --cov-report=term-missing`
-  - No uncovered lines in new code
-- README.md updated:
-  - Platform listed in supported platforms
-  - Installation command documented
-  - Any platform-specific notes included
-- When all criteria are met, include `<promise>✓ Quality Criteria Met</promise>` in your response
-
-## Context
-
-This is the core implementation step. The adapter you create will be responsible for:
-- Determining where command files are placed
-- Generating command file content from job definitions
-- Handling platform-specific features and hooks
-
-The templates use Jinja2 and should produce files that match exactly what the platform expects. Reference the CLI configuration documentation frequently to ensure compatibility.
-
-## Tips
-
-- Study the existing `ClaudeAdapter` as a reference implementation
-- Run tests frequently as you implement
-- Use `--cov-report=html` for a detailed coverage report
-- If a test is hard to write, the code might need refactoring
-- Template syntax errors often show up at runtime - test early
-
-
-### Job Context
-
-A workflow for adding support for a new AI platform (like Cursor, Windsurf, etc.) to DeepWork.
-
-The **integrate** workflow guides you through four phases:
-1. **Research**: Capture the platform's CLI configuration and hooks system documentation
-2. **Add Capabilities**: Update the job schema and adapters with any new hook events
-3. **Implement**: Create the platform adapter, templates, tests (100% coverage), and README updates
-4. **Verify**: Ensure installation works correctly and produces expected files
-
-The workflow ensures consistency across all supported platforms and maintains
-comprehensive test coverage for new functionality.
-
-**Important Notes**:
-- Only hooks available on slash command definitions should be captured
-- Each existing adapter must be updated when new hooks are added (typically with null values)
-- Tests must achieve 100% coverage for any new functionality
-- Installation verification confirms the platform integrates correctly with existing jobs
-
-
-## Required Inputs
-
-
-**Files from Previous Steps** - Read these first:
-- `job_schema.py` (from `add_capabilities`)
-- `adapters.py` (from `add_capabilities`)
-- `cli_configuration.md` (from `research`)
-
-## Work Branch
-
-Use branch format: `deepwork/add_platform-[instance]-YYYYMMDD`
-
-- If on a matching work branch: continue using it
-- If on main/master: create new branch with `git checkout -b deepwork/add_platform-[instance]-$(date +%Y%m%d)`
-
-## Outputs
-
-**Required outputs**:
-- `templates/` (directory)
-- `tests/` (directory)
-- `README.md`
-
-## Guardrails
-
-- Do NOT skip prerequisite verification if this step has dependencies
-- Do NOT produce partial outputs; complete all required outputs before finishing
-- Do NOT proceed without required inputs; ask the user if any are missing
-- Do NOT modify files outside the scope of this step's defined outputs
-
-**Validation script**: `.deepwork/jobs/add_platform/hooks/run_tests.sh` (runs automatically)
-## On Completion
-
-1. Verify outputs are created
-2. Inform user: "integrate step 3/4 complete, outputs: templates/, tests/, README.md"
-3. **Continue workflow**: Use Skill tool to invoke `/add_platform.verify`
-
----
-
-**Reference files**: `.deepwork/jobs/add_platform/job.yml`, `.deepwork/jobs/add_platform/steps/implement.md`
\ No newline at end of file
diff --git a/.claude/skills/add_platform.research/SKILL.md b/.claude/skills/add_platform.research/SKILL.md
deleted file mode 100644
index d113d3e9..00000000
--- a/.claude/skills/add_platform.research/SKILL.md
+++ /dev/null
@@ -1,265 +0,0 @@
----
-name: add_platform.research
-description: "Captures CLI configuration and hooks system documentation for the new platform. Use when starting platform integration."
-user-invocable: false
-
----
-
-# add_platform.research
-
-**Step 1/4** in **integrate** workflow
-
-> Full workflow to integrate a new AI platform into DeepWork
-
-> Adds a new AI platform to DeepWork with adapter, templates, and tests. Use when integrating Cursor, Windsurf, or other AI coding tools.
-
-
-## Instructions
-
-**Goal**: Captures CLI configuration and hooks system documentation for the new platform. Use when starting platform integration.
-
-# Research Platform Documentation
-
-## Objective
-
-Capture comprehensive documentation for the new AI platform's CLI configuration and hooks system, creating a local reference that will guide the implementation phases.
-
-## Task
-
-Research the target platform's official documentation and create two focused documentation files that will serve as the foundation for implementing platform support in DeepWork.
-
-### Process
-
-1. **Identify the platform's documentation sources**
-   - Find the official documentation website
-   - Locate the CLI/agent configuration documentation
-   - Find the hooks or customization system documentation
-   - Note: Focus ONLY on slash command/custom command hooks, not general CLI hooks
-
-2. **Gather CLI configuration documentation**
-   - How is the CLI configured? (config files, environment variables, etc.)
-   - Where are custom commands/skills stored?
-   - What is the command file format? (markdown, YAML, etc.)
-   - What metadata or frontmatter is supported?
-   - How does the platform discover and load commands?
-
-3. **Gather hooks system documentation**
-   - What hooks are available for custom command definitions?
-   - Focus on hooks that trigger during or after command execution
-   - Examples: `stop_hooks`, `pre_hooks`, `post_hooks`, validation hooks
-   - Document the syntax and available hook types
-   - **Important**: Only document hooks available on slash command definitions, not general CLI hooks
-
-4. **Create the documentation files**
-   - Place files in `doc/platforms/<platform_name>/`
-   - Each file must have a header comment with source and date
-   - Content should be comprehensive but focused
-
-## Output Format
-
-### cli_configuration.md
-
-Located at: `doc/platforms/<platform_name>/cli_configuration.md`
-
-**Structure**:
-```markdown
-<!--
-Last Updated: YYYY-MM-DD
-Source: [URL where this documentation was obtained]
--->
-
-# <Platform Name> CLI Configuration
-
-## Overview
-
-[Brief description of the platform and its CLI/agent system]
-
-## Configuration Files
-
-[Document where configuration lives and its format]
-
-### File Locations
-
-- [Location 1]: [Purpose]
-- [Location 2]: [Purpose]
-
-### Configuration Format
-
-[Show the configuration file format with examples]
-
-## Custom Commands/Skills
-
-[Document how custom commands are defined]
-
-### Command Location
-
-[Where command files are stored]
-
-### Command File Format
-
-[The format of command files - markdown, YAML, etc.]
-
-### Metadata/Frontmatter
-
-[What metadata fields are supported in command files]
-
-```[format]
-[Example of a minimal command file]
-```
-
-## Command Discovery
-
-[How the platform discovers and loads commands]
-
-## Platform-Specific Features
-
-[Any unique features relevant to command configuration]
-```
-
-### hooks_system.md
-
-Located at: `doc/platforms/<platform_name>/hooks_system.md`
-
-**Structure**:
-```markdown
-<!--
-Last Updated: YYYY-MM-DD
-Source: [URL where this documentation was obtained]
--->
-
-# <Platform Name> Hooks System (Command Definitions)
-
-## Overview
-
-[Brief description of hooks available for command definitions]
-
-**Important**: This document covers ONLY hooks available within slash command/skill definitions, not general CLI hooks.
-
-## Available Hooks
-
-### [Hook Name 1]
-
-**Purpose**: [What this hook does]
-
-**Syntax**:
-```yaml
-[hook_name]:
-  - [configuration]
-```
-
-**Example**:
-```yaml
-[Complete example of using this hook]
-```
-
-**Behavior**: [When and how this hook executes]
-
-### [Hook Name 2]
-
-[Repeat for each available hook]
-
-## Hook Execution Order
-
-[Document the order in which hooks execute, if multiple are supported]
-
-## Comparison with Other Platforms
-
-| Feature | <Platform> | Claude Code | Other |
-|---------|-----------|-------------|-------|
-| [Feature 1] | [Support] | [Support] | [Support] |
-
-## Limitations
-
-[Any limitations or caveats about the hooks system]
-```
-
-## Quality Criteria
-
-- Both files exist in `doc/platforms/<platform_name>/`
-- Each file has a header comment with:
-  - Last updated date (YYYY-MM-DD format)
-  - Source URL where documentation was obtained
-- `cli_configuration.md` comprehensively covers:
-  - Configuration file locations and format
-  - Custom command file format and location
-  - Command discovery mechanism
-- `hooks_system.md` comprehensively covers:
-  - All hooks available for slash command definitions
-  - Syntax and examples for each hook
-  - NOT general CLI hooks (only command-level hooks)
-- Documentation is detailed enough to implement the platform adapter
-- No extraneous topics (only CLI config and command hooks)
-- When all criteria are met, include `<promise>✓ Quality Criteria Met</promise>` in your response
-
-## Context
-
-This is the foundation step for adding a new platform to DeepWork. The documentation you capture here will be referenced throughout the implementation process:
-- CLI configuration informs how to generate command files
-- Hooks documentation determines what features the adapter needs to support
-- This documentation becomes a permanent reference in `doc/platforms/`
-
-Take time to be thorough - incomplete documentation will slow down subsequent steps.
-
-## Tips
-
-- Use the platform's official documentation as the primary source
-- If documentation is sparse, check GitHub repos, community guides, or changelog entries
-- When in doubt about whether something is a "command hook" vs "CLI hook", err on the side of inclusion and note the ambiguity
-- Include code examples from the official docs where available
-
-
-### Job Context
-
-A workflow for adding support for a new AI platform (like Cursor, Windsurf, etc.) to DeepWork.
-
-The **integrate** workflow guides you through four phases:
-1. **Research**: Capture the platform's CLI configuration and hooks system documentation
-2. **Add Capabilities**: Update the job schema and adapters with any new hook events
-3. **Implement**: Create the platform adapter, templates, tests (100% coverage), and README updates
-4. **Verify**: Ensure installation works correctly and produces expected files
-
-The workflow ensures consistency across all supported platforms and maintains
-comprehensive test coverage for new functionality.
-
-**Important Notes**:
-- Only hooks available on slash command definitions should be captured
-- Each existing adapter must be updated when new hooks are added (typically with null values)
-- Tests must achieve 100% coverage for any new functionality
-- Installation verification confirms the platform integrates correctly with existing jobs
-
-
-## Required Inputs
-
-**User Parameters** - Gather from user before starting:
-- **platform_name**: Clear identifier of the platform (e.g., 'cursor', 'windsurf-editor', 'github-copilot-chat')
-
-
-## Work Branch
-
-Use branch format: `deepwork/add_platform-[instance]-YYYYMMDD`
-
-- If on a matching work branch: continue using it
-- If on main/master: create new branch with `git checkout -b deepwork/add_platform-[instance]-$(date +%Y%m%d)`
-
-## Outputs
-
-**Required outputs**:
-- `cli_configuration.md`
-- `hooks_system.md`
-
-## Guardrails
-
-- Do NOT skip prerequisite verification if this step has dependencies
-- Do NOT produce partial outputs; complete all required outputs before finishing
-- Do NOT proceed without required inputs; ask the user if any are missing
-- Do NOT modify files outside the scope of this step's defined outputs
-
-## On Completion
-
-1. Verify outputs are created
-2. Inform user: "integrate step 1/4 complete, outputs: cli_configuration.md, hooks_system.md"
-3. **Continue workflow**: Use Skill tool to invoke `/add_platform.add_capabilities`
-
----
-
-**Reference files**: `.deepwork/jobs/add_platform/job.yml`, `.deepwork/jobs/add_platform/steps/research.md`
\ No newline at end of file
diff --git a/.claude/skills/add_platform.verify/SKILL.md b/.claude/skills/add_platform.verify/SKILL.md
deleted file mode 100644
index debe5a19..00000000
--- a/.claude/skills/add_platform.verify/SKILL.md
+++ /dev/null
@@ -1,189 +0,0 @@
----
-name: add_platform.verify
-description: "Sets up platform directories and verifies deepwork install works correctly. Use after implementation to confirm integration."
-user-invocable: false
-
----
-
-# add_platform.verify
-
-**Step 4/4** in **integrate** workflow
-
-> Full workflow to integrate a new AI platform into DeepWork
-
-> Adds a new AI platform to DeepWork with adapter, templates, and tests. Use when integrating Cursor, Windsurf, or other AI coding tools.
-
-## Prerequisites (Verify First)
-
-Before proceeding, confirm these steps are complete:
-- `/add_platform.implement`
-
-## Instructions
-
-**Goal**: Sets up platform directories and verifies deepwork install works correctly. Use after implementation to confirm integration.
-
-# Verify Installation
-
-## Objective
-
-Ensure the new platform integration works correctly by setting up necessary directories and running the full installation process.
-
-## Task
-
-Perform end-to-end verification that the new platform can be installed and that DeepWork's standard jobs work correctly with it.
-
-### Prerequisites
-
-Ensure the implementation step is complete:
-- Adapter class exists in `src/deepwork/adapters.py`
-- Templates exist in `src/deepwork/templates/<platform_name>/`
-- Tests pass with 100% coverage
-- README.md is updated
-
-### Process
-
-1. **Set up platform directories in the DeepWork repo**
-
-   The DeepWork repository itself should have the platform's command directory structure for testing:
-
-   ```bash
-   mkdir -p <platform_command_directory>
-   ```
-
-   For example:
-   - Claude: `.claude/commands/`
-   - Cursor: `.cursor/commands/` (or wherever Cursor stores commands)
-
-2. **Run deepwork install for the new platform**
-
-   ```bash
-   deepwork install --platform <platform_name>
-   ```
-
-   Verify:
-   - Command completes without errors
-   - No Python exceptions or tracebacks
-   - Output indicates successful installation
-
-3. **Check that command files were created**
-
-   List the generated command files:
-   ```bash
-   ls -la <platform_command_directory>/
-   ```
-
-   Verify:
-   - `deepwork_jobs.define.md` exists (or equivalent for the platform)
-   - `deepwork_jobs.implement.md` exists
-   - `deepwork_jobs.refine.md` exists
-   - All expected step commands exist
-
-4. **Validate command file content**
-
-   Read each generated command file and verify:
-   - Content matches the expected format for the platform
-   - Job metadata is correctly included
-   - Step instructions are properly rendered
-   - Any platform-specific features (hooks, frontmatter) are present
-
-5. **Test alongside existing platforms**
-
-   If other platforms are already installed, verify they still work:
-   ```bash
-   deepwork install --platform claude
-   ls -la .claude/commands/
-   ```
-
-   Ensure:
-   - New platform doesn't break existing installations
-   - Each platform's commands are independent
-   - No file conflicts or overwrites
-
-## Quality Criteria
-
-- Platform-specific directories are set up in the DeepWork repo
-- `deepwork install --platform <platform_name>` completes without errors
-- All expected command files are created:
-  - deepwork_jobs.define, implement, refine
-  - Any other standard job commands
-- Command file content is correct:
-  - Matches platform's expected format
-  - Job/step information is properly rendered
-  - No template errors or missing content
-- Existing platforms still work (if applicable)
-- No conflicts between platforms
-- When all criteria are met, include `<promise>✓ Quality Criteria Met</promise>` in your response
-
-## Context
-
-This is the final validation step before the platform is considered complete. A thorough verification ensures:
-- The platform actually works, not just compiles
-- Standard DeepWork jobs install correctly
-- The platform integrates properly with the existing system
-- Users can confidently use the new platform
-
-Take time to verify each aspect - finding issues now is much better than having users discover them later.
-
-## Common Issues to Check
-
-- **Template syntax errors**: May only appear when rendering specific content
-- **Path issues**: Platform might expect different directory structure
-- **Encoding issues**: Special characters in templates or content
-- **Missing hooks**: Platform adapter might not handle all hook types
-- **Permission issues**: Directory creation might fail in some cases
-
-
-### Job Context
-
-A workflow for adding support for a new AI platform (like Cursor, Windsurf, etc.) to DeepWork.
-
-The **integrate** workflow guides you through four phases:
-1. **Research**: Capture the platform's CLI configuration and hooks system documentation
-2. **Add Capabilities**: Update the job schema and adapters with any new hook events
-3. **Implement**: Create the platform adapter, templates, tests (100% coverage), and README updates
-4. **Verify**: Ensure installation works correctly and produces expected files
-
-The workflow ensures consistency across all supported platforms and maintains
-comprehensive test coverage for new functionality.
-
-**Important Notes**:
-- Only hooks available on slash command definitions should be captured
-- Each existing adapter must be updated when new hooks are added (typically with null values)
-- Tests must achieve 100% coverage for any new functionality
-- Installation verification confirms the platform integrates correctly with existing jobs
-
-
-## Required Inputs
-
-
-**Files from Previous Steps** - Read these first:
-- `templates/` (from `implement`)
-
-## Work Branch
-
-Use branch format: `deepwork/add_platform-[instance]-YYYYMMDD`
-
-- If on a matching work branch: continue using it
-- If on main/master: create new branch with `git checkout -b deepwork/add_platform-[instance]-$(date +%Y%m%d)`
-
-## Outputs
-
-**Required outputs**:
-- `verification_checklist.md`
-
-## Guardrails
-
-- Do NOT skip prerequisite verification if this step has dependencies
-- Do NOT produce partial outputs; complete all required outputs before finishing
-- Do NOT proceed without required inputs; ask the user if any are missing
-- Do NOT modify files outside the scope of this step's defined outputs
-
-## On Completion
-
-1. Verify outputs are created
-2. Inform user: "integrate step 4/4 complete, outputs: verification_checklist.md"
-3. **integrate workflow complete**: All steps finished. Consider creating a PR to merge the work branch.
-
----
-
-**Reference files**: `.deepwork/jobs/add_platform/job.yml`, `.deepwork/jobs/add_platform/steps/verify.md`
\ No newline at end of file
diff --git a/.claude/skills/add_platform/SKILL.md b/.claude/skills/add_platform/SKILL.md
deleted file mode 100644
index 474bb3a8..00000000
--- a/.claude/skills/add_platform/SKILL.md
+++ /dev/null
@@ -1,83 +0,0 @@
----
-name: add_platform
-description: "Adds a new AI platform to DeepWork with adapter, templates, and tests. Use when integrating Cursor, Windsurf, or other AI coding tools."
----
-
-# add_platform
-
-Adds a new AI platform to DeepWork with adapter, templates, and tests. Use when integrating Cursor, Windsurf, or other AI coding tools.
-
-> **CRITICAL**: Always invoke steps using the Skill tool. Never copy/paste step instructions directly.
-
-A workflow for adding support for a new AI platform (like Cursor, Windsurf, etc.) to DeepWork.
-
-The **integrate** workflow guides you through four phases:
-1. **Research**: Capture the platform's CLI configuration and hooks system documentation
-2. **Add Capabilities**: Update the job schema and adapters with any new hook events
-3. **Implement**: Create the platform adapter, templates, tests (100% coverage), and README updates
-4. **Verify**: Ensure installation works correctly and produces expected files
-
-The workflow ensures consistency across all supported platforms and maintains
-comprehensive test coverage for new functionality.
-
-**Important Notes**:
-- Only hooks available on slash command definitions should be captured
-- Each existing adapter must be updated when new hooks are added (typically with null values)
-- Tests must achieve 100% coverage for any new functionality
-- Installation verification confirms the platform integrates correctly with existing jobs
-
-
-## Workflows
-
-### integrate
-
-Full workflow to integrate a new AI platform into DeepWork
-
-**Steps in order**:
-1. **research** - Captures CLI configuration and hooks system documentation for the new platform. Use when starting platform integration.
-2. **add_capabilities** - Updates job schema and adapters with any new hook events the platform supports. Use after research to extend DeepWork's hook system.
-3. **implement** - Creates platform adapter, templates, tests with 100% coverage, and README documentation. Use after adding hook capabilities.
-4. **verify** - Sets up platform directories and verifies deepwork install works correctly. Use after implementation to confirm integration.
-
-**Start workflow**: `/add_platform.research`
-
-
-## Execution Instructions
-
-### Step 1: Analyze Intent
-
-Parse any text following `/add_platform` to determine user intent:
-- "integrate" or related terms → start integrate workflow at `add_platform.research`
-
-### Step 2: Invoke Starting Step
-
-Use the Skill tool to invoke the identified starting step:
-```
-Skill tool: add_platform.research
-```
-
-### Step 3: Continue Workflow Automatically
-
-After each step completes:
-1. Check if there's a next step in the workflow sequence
-2. Invoke the next step using the Skill tool
-3. Repeat until workflow is complete or user intervenes
-
-**Note**: Standalone skills do not auto-continue to other steps.
-
-### Handling Ambiguous Intent
-
-If user intent is unclear, use AskUserQuestion to clarify:
-- Present available workflows and standalone skills as options
-- Let user select the starting point
-
-## Guardrails
-
-- Do NOT copy/paste step instructions directly; always use the Skill tool to invoke steps
-- Do NOT skip steps in a workflow unless the user explicitly requests it
-- Do NOT proceed to the next step if the current step's outputs are incomplete
-- Do NOT make assumptions about user intent; ask for clarification when ambiguous
-
-## Context Files
-
-- Job definition: `.deepwork/jobs/add_platform/job.yml`
\ No newline at end of file
diff --git a/.claude/skills/commit.commit_and_push/SKILL.md b/.claude/skills/commit.commit_and_push/SKILL.md
deleted file mode 100644
index 1ec4e5c9..00000000
--- a/.claude/skills/commit.commit_and_push/SKILL.md
+++ /dev/null
@@ -1,179 +0,0 @@
----
-name: commit.commit_and_push
-description: "Verifies changed files, creates commit, and pushes to remote. Use after linting passes to finalize changes."
-user-invocable: false
-
----
-
-# commit.commit_and_push
-
-**Step 4/4** in **full** workflow
-
-> Full commit workflow: review, test, lint, and commit
-
-> Reviews code, runs tests, lints, and commits changes. Use when ready to commit work with quality checks.
-
-## Prerequisites (Verify First)
-
-Before proceeding, confirm these steps are complete:
-- `/commit.lint`
-
-## Instructions
-
-**Goal**: Verifies changed files, creates commit, and pushes to remote. Use after linting passes to finalize changes.
-
-# Commit and Push
-
-## Objective
-
-Review the changed files to verify they match the agent's expectations, create a commit with an appropriate message, and push to the remote repository.
-
-## Task
-
-Check the list of changed files against what was modified during this session, ensure they match expectations, then commit and push the changes.
-
-### Process
-
-1. **Get the list of changed files**
-   ```bash
-   git status
-   ```
-   Also run `git diff --stat` to see a summary of changes.
-
-2. **Verify changes match expectations**
-
-   Compare the changed files against what you modified during this session:
-   - Do the modified files match what you edited?
-   - Are there any unexpected new files?
-   - Are there any unexpected deleted files?
-   - Do the line counts seem reasonable for the changes you made?
-
-   If changes match expectations, proceed to the next step.
-
-   If there are unexpected changes:
-   - Investigate why (e.g., lint auto-fixes, generated files)
-   - If they're legitimate side effects of your work, include them
-   - If they're unrelated or shouldn't be committed, use `git restore` to discard them
-
-3. **Update CHANGELOG.md if needed**
-
-   If your changes include new features, bug fixes, or other notable changes:
-   - Add entries to the `## [Unreleased]` section of CHANGELOG.md
-   - Use the appropriate subsection: `### Added`, `### Changed`, `### Fixed`, or `### Removed`
-   - Write concise descriptions that explain the user-facing impact
-
-   **CRITICAL: NEVER modify version numbers**
-   - Do NOT change the version in `pyproject.toml`
-   - Do NOT change version headers in CHANGELOG.md (e.g., `## [0.4.2]`)
-   - Do NOT rename the `## [Unreleased]` section
-   - Version updates are handled by the release workflow, not commits
-
-4. **Stage all appropriate changes**
-   ```bash
-   git add -A
-   ```
-   Or stage specific files if some were excluded.
-
-5. **View recent commit messages for style reference**
-   ```bash
-   git log --oneline -10
-   ```
-
-6. **Create the commit**
-
-   Generate an appropriate commit message based on:
-   - The changes made
-   - The style of recent commits
-   - Conventional commit format if the project uses it
-
-   **IMPORTANT:** Use the commit job script (not `git commit` directly):
-   ```bash
-   .claude/hooks/commit_job_git_commit.sh -m "commit message here"
-   ```
-
-7. **Push to remote**
-   ```bash
-   git push
-   ```
-   If the branch has no upstream, use:
-   ```bash
-   git push -u origin HEAD
-   ```
-
-## Quality Criteria
-
-- Changed files were verified against expectations
-- CHANGELOG.md was updated with entries in [Unreleased] section (if changes warrant documentation)
-- Version numbers were NOT modified (pyproject.toml version and CHANGELOG version headers unchanged)
-- Commit was created with appropriate message
-- Changes were pushed to remote
-
-## Context
-
-This is the final step of the commit workflow. The agent verifies that the changed files match its own expectations from the work done during the session, then commits and pushes. This catches unexpected changes while avoiding unnecessary user interruptions.
-
-
-### Job Context
-
-A workflow for preparing and committing code changes with quality checks.
-
-The **full** workflow starts with a code review to catch issues early, runs tests until
-they pass, formats and lints code with ruff, then reviews changed files
-before committing and pushing. The review and lint steps use sub-agents
-to reduce context usage.
-
-Steps:
-1. review - Code review for issues, DRY opportunities, naming, and test coverage (runs in sub-agent)
-2. test - Pull latest code and run tests until they pass
-3. lint - Format and lint code with ruff (runs in sub-agent)
-4. commit_and_push - Review changes and commit/push
-
-
-
-## Work Branch
-
-Use branch format: `deepwork/commit-[instance]-YYYYMMDD`
-
-- If on a matching work branch: continue using it
-- If on main/master: create new branch with `git checkout -b deepwork/commit-[instance]-$(date +%Y%m%d)`
-
-## Outputs
-
-**Required outputs**:
-- `changes_committed`
-
-## Guardrails
-
-- Do NOT skip prerequisite verification if this step has dependencies
-- Do NOT produce partial outputs; complete all required outputs before finishing
-- Do NOT proceed without required inputs; ask the user if any are missing
-- Do NOT modify files outside the scope of this step's defined outputs
-
-## Quality Validation
-
-**Before completing this step, you MUST have your work reviewed against the quality criteria below.**
-
-Use a sub-agent (Haiku model) to review your work against these criteria:
-
-**Criteria (all must be satisfied)**:
-1. Changed files were verified against expectations
-2. CHANGELOG.md was updated with entries in [Unreleased] section (if changes warrant documentation)
-3. Version numbers were NOT modified (pyproject.toml version and CHANGELOG version headers unchanged)
-4. Commit was created with appropriate message
-5. Changes were pushed to remote
-**Review Process**:
-1. Once you believe your work is complete, spawn a sub-agent using Haiku to review your work against the quality criteria above
-2. The sub-agent should examine your outputs and verify each criterion is met
-3. If the sub-agent identifies valid issues, fix them
-4. Have the sub-agent review again until all valid feedback has been addressed
-5. Only mark the step complete when the sub-agent confirms all criteria are satisfied
-
-## On Completion
-
-1. Verify outputs are created
-2. Inform user: "full step 4/4 complete, outputs: changes_committed"
-3. **full workflow complete**: All steps finished. Consider creating a PR to merge the work branch.
-
----
-
-**Reference files**: `.deepwork/jobs/commit/job.yml`, `.deepwork/jobs/commit/steps/commit_and_push.md`
\ No newline at end of file
diff --git a/.claude/skills/commit.lint/SKILL.md b/.claude/skills/commit.lint/SKILL.md
deleted file mode 100644
index 0f16873e..00000000
--- a/.claude/skills/commit.lint/SKILL.md
+++ /dev/null
@@ -1,158 +0,0 @@
----
-name: commit.lint
-description: "Formats and lints code with ruff using a sub-agent. Use after tests pass to ensure code style compliance."
-user-invocable: false
-
----
-
-# commit.lint
-
-**Step 3/4** in **full** workflow
-
-> Full commit workflow: review, test, lint, and commit
-
-> Reviews code, runs tests, lints, and commits changes. Use when ready to commit work with quality checks.
-
-## Prerequisites (Verify First)
-
-Before proceeding, confirm these steps are complete:
-- `/commit.test`
-
-## Instructions
-
-**Goal**: Formats and lints code with ruff using a sub-agent. Use after tests pass to ensure code style compliance.
-
-# Lint Code
-
-## Objective
-
-Format and lint the codebase using ruff to ensure code quality and consistency.
-
-## Task
-
-Run ruff format and ruff check to format and lint the code. This step should be executed using a sub-agent to conserve context in the main conversation.
-
-### Process
-
-**IMPORTANT**: Use the Task tool to spawn a sub-agent for this work. This saves context in the main conversation. Use the `haiku` model for speed.
-
-1. **Spawn a sub-agent to run linting**
-
-   Use the Task tool with these parameters:
-   - `subagent_type`: "Bash"
-   - `model`: "haiku"
-   - `prompt`: See below
-
-   The sub-agent should:
-
-   a. **Run ruff format**
-      ```bash
-      ruff format .
-      ```
-      This formats the code according to ruff's style rules.
-
-   b. **Run ruff check with auto-fix**
-      ```bash
-      ruff check --fix .
-      ```
-      This checks for lint errors and automatically fixes what it can.
-
-   c. **Run ruff check again to verify**
-      ```bash
-      ruff check .
-      ```
-      Capture the final output to verify no remaining issues.
-
-2. **Review sub-agent results**
-   - Check that both format and check completed successfully
-   - Note any remaining lint issues that couldn't be auto-fixed
-
-3. **Handle remaining issues**
-   - If there are lint errors that couldn't be auto-fixed, fix them manually
-   - Re-run ruff check to verify
-
-## Example Sub-Agent Prompt
-
-```
-Run ruff to format and lint the codebase:
-
-1. Run: ruff format .
-2. Run: ruff check --fix .
-3. Run: ruff check . (to verify no remaining issues)
-
-Report the results of each command.
-```
-
-## Quality Criteria
-
-- ruff format was run successfully
-- ruff check was run with --fix flag
-- No remaining lint errors
-
-## Context
-
-This step ensures code quality and consistency before committing. It runs after tests pass and before the commit step. Using a sub-agent keeps the main conversation context clean for the commit review.
-
-
-### Job Context
-
-A workflow for preparing and committing code changes with quality checks.
-
-The **full** workflow starts with a code review to catch issues early, runs tests until
-they pass, formats and lints code with ruff, then reviews changed files
-before committing and pushing. The review and lint steps use sub-agents
-to reduce context usage.
-
-Steps:
-1. review - Code review for issues, DRY opportunities, naming, and test coverage (runs in sub-agent)
-2. test - Pull latest code and run tests until they pass
-3. lint - Format and lint code with ruff (runs in sub-agent)
-4. commit_and_push - Review changes and commit/push
-
-
-
-## Work Branch
-
-Use branch format: `deepwork/commit-[instance]-YYYYMMDD`
-
-- If on a matching work branch: continue using it
-- If on main/master: create new branch with `git checkout -b deepwork/commit-[instance]-$(date +%Y%m%d)`
-
-## Outputs
-
-**Required outputs**:
-- `code_formatted`
-
-## Guardrails
-
-- Do NOT skip prerequisite verification if this step has dependencies
-- Do NOT produce partial outputs; complete all required outputs before finishing
-- Do NOT proceed without required inputs; ask the user if any are missing
-- Do NOT modify files outside the scope of this step's defined outputs
-
-## Quality Validation
-
-**Before completing this step, you MUST have your work reviewed against the quality criteria below.**
-
-Use a sub-agent (Haiku model) to review your work against these criteria:
-
-**Criteria (all must be satisfied)**:
-1. ruff format was run successfully
-2. ruff check was run with --fix flag
-3. No remaining lint errors
-**Review Process**:
-1. Once you believe your work is complete, spawn a sub-agent using Haiku to review your work against the quality criteria above
-2. The sub-agent should examine your outputs and verify each criterion is met
-3. If the sub-agent identifies valid issues, fix them
-4. Have the sub-agent review again until all valid feedback has been addressed
-5. Only mark the step complete when the sub-agent confirms all criteria are satisfied
-
-## On Completion
-
-1. Verify outputs are created
-2. Inform user: "full step 3/4 complete, outputs: code_formatted"
-3. **Continue workflow**: Use Skill tool to invoke `/commit.commit_and_push`
-
----
-
-**Reference files**: `.deepwork/jobs/commit/job.yml`, `.deepwork/jobs/commit/steps/lint.md`
\ No newline at end of file
diff --git a/.claude/skills/commit.review/SKILL.md b/.claude/skills/commit.review/SKILL.md
deleted file mode 100644
index f4074077..00000000
--- a/.claude/skills/commit.review/SKILL.md
+++ /dev/null
@@ -1,139 +0,0 @@
----
-name: commit.review
-description: "Reviews changed code for issues, DRY opportunities, naming clarity, and test coverage using a sub-agent. Use as the first step before testing."
-user-invocable: false
-
----
-
-# commit.review
-
-**Step 1/4** in **full** workflow
-
-> Full commit workflow: review, test, lint, and commit
-
-> Reviews code, runs tests, lints, and commits changes. Use when ready to commit work with quality checks.
-
-
-## Instructions
-
-**Goal**: Reviews changed code for issues, DRY opportunities, naming clarity, and test coverage using a sub-agent. Use as the first step before testing.
-
-# Code Review
-
-## Objective
-
-Review changed code for quality issues before running tests. This catches problems early and ensures code meets quality standards.
-
-## Task
-
-Use a sub-agent to review the staged/changed code and identify issues that should be fixed before committing.
-
-### Process
-
-**IMPORTANT**: Use the Task tool to spawn a sub-agent for this review. This saves context in the main conversation.
-
-1. **Get the list of changed files**
-   ```bash
-   git diff --name-only HEAD
-   git diff --name-only --staged
-   ```
-   Combine these to get all files that have been modified.
-
-2. **Spawn a sub-agent to review the code**
-
-   Use the Task tool with these parameters:
-   - `subagent_type`: "general-purpose"
-   - `prompt`: Instruct the sub-agent to:
-     - Read the code review standards from `doc/code_review_standards.md`
-     - Read each of the changed files
-     - Review each file against the standards
-     - Report issues found with file, line number, severity, and suggested fix
-
-3. **Review sub-agent findings**
-   - Examine each issue identified
-   - Prioritize issues by severity
-
-4. **Fix identified issues**
-   - Address each issue found by the review
-   - For DRY violations: extract shared code into functions/modules
-   - For naming issues: rename to be clearer
-   - For missing tests: add appropriate test cases
-   - For bugs: fix the underlying issue
-
-5. **Re-run review if significant changes made**
-   - If you made substantial changes, consider running another review pass
-   - Ensure fixes didn't introduce new issues
-
-## Quality Criteria
-
-- Changed files were identified
-- Sub-agent read the code review standards and reviewed all changed files
-- All identified issues were addressed or documented as intentional
-
-## Context
-
-This is the first step of the commit workflow. Code review happens before tests to catch quality issues early. The sub-agent approach keeps the main conversation context clean while providing thorough review coverage.
-
-
-### Job Context
-
-A workflow for preparing and committing code changes with quality checks.
-
-The **full** workflow starts with a code review to catch issues early, runs tests until
-they pass, formats and lints code with ruff, then reviews changed files
-before committing and pushing. The review and lint steps use sub-agents
-to reduce context usage.
-
-Steps:
-1. review - Code review for issues, DRY opportunities, naming, and test coverage (runs in sub-agent)
-2. test - Pull latest code and run tests until they pass
-3. lint - Format and lint code with ruff (runs in sub-agent)
-4. commit_and_push - Review changes and commit/push
-
-
-
-## Work Branch
-
-Use branch format: `deepwork/commit-[instance]-YYYYMMDD`
-
-- If on a matching work branch: continue using it
-- If on main/master: create new branch with `git checkout -b deepwork/commit-[instance]-$(date +%Y%m%d)`
-
-## Outputs
-
-**Required outputs**:
-- `code_reviewed`
-
-## Guardrails
-
-- Do NOT skip prerequisite verification if this step has dependencies
-- Do NOT produce partial outputs; complete all required outputs before finishing
-- Do NOT proceed without required inputs; ask the user if any are missing
-- Do NOT modify files outside the scope of this step's defined outputs
-
-## Quality Validation
-
-**Before completing this step, you MUST have your work reviewed against the quality criteria below.**
-
-Use a sub-agent (Haiku model) to review your work against these criteria:
-
-**Criteria (all must be satisfied)**:
-1. Changed files were identified
-2. Sub-agent reviewed the code for general issues, DRY opportunities, naming clarity, and test coverage
-3. All identified issues were addressed or documented as intentional
-**Review Process**:
-1. Once you believe your work is complete, spawn a sub-agent using Haiku to review your work against the quality criteria above
-2. The sub-agent should examine your outputs and verify each criterion is met
-3. If the sub-agent identifies valid issues, fix them
-4. Have the sub-agent review again until all valid feedback has been addressed
-5. Only mark the step complete when the sub-agent confirms all criteria are satisfied
-
-## On Completion
-
-1. Verify outputs are created
-2. Inform user: "full step 1/4 complete, outputs: code_reviewed"
-3. **Continue workflow**: Use Skill tool to invoke `/commit.test`
-
----
-
-**Reference files**: `.deepwork/jobs/commit/job.yml`, `.deepwork/jobs/commit/steps/review.md`
\ No newline at end of file
diff --git a/.claude/skills/commit.test/SKILL.md b/.claude/skills/commit.test/SKILL.md
deleted file mode 100644
index cddf224a..00000000
--- a/.claude/skills/commit.test/SKILL.md
+++ /dev/null
@@ -1,138 +0,0 @@
----
-name: commit.test
-description: "Pulls latest code and runs tests until all pass. Use after code review passes to verify changes work correctly."
-user-invocable: false
-
----
-
-# commit.test
-
-**Step 2/4** in **full** workflow
-
-> Full commit workflow: review, test, lint, and commit
-
-> Reviews code, runs tests, lints, and commits changes. Use when ready to commit work with quality checks.
-
-## Prerequisites (Verify First)
-
-Before proceeding, confirm these steps are complete:
-- `/commit.review`
-
-## Instructions
-
-**Goal**: Pulls latest code and runs tests until all pass. Use after code review passes to verify changes work correctly.
-
-# Run Tests
-
-## Objective
-
-Run the project's test suite and fix any failing tests until all tests pass.
-
-## Task
-
-Execute the test suite for the project and iteratively fix any failures until all tests pass.
-
-### Process
-
-1. **Pull latest code from the branch**
-   - Run `git pull` to fetch and merge any changes from the remote
-   - If there are merge conflicts, resolve them before proceeding
-   - This ensures you're testing against the latest code
-
-2. **Detect or use the test command**
-   - If a test command was provided, use that
-   - Otherwise, auto-detect the project type and determine the appropriate test command:
-     - Python: `pytest`, `python -m pytest`, `uv run pytest`
-     - Node.js: `npm test`, `yarn test`, `bun test`
-     - Go: `go test ./...`
-     - Rust: `cargo test`
-     - Check `package.json`, `pyproject.toml`, `Cargo.toml`, `go.mod` for hints
-
-3. **Run the tests**
-   - Execute the test command
-   - Capture the output
-
-4. **Analyze failures**
-   - If tests pass, proceed to output
-   - If tests fail, analyze the failure messages
-   - Identify the root cause of each failure
-
-5. **Fix failing tests**
-   - Make the necessary code changes to fix failures
-   - This may involve fixing bugs in implementation code or updating tests
-   - Re-run tests after each fix
-
-6. **Iterate until passing**
-   - Continue the fix/test cycle until all tests pass
-
-## Quality Criteria
-
-- Latest code was pulled from the branch
-- All tests are passing
-
-## Context
-
-This step runs after code review. Tests must pass before proceeding to lint and commit. This ensures code quality and prevents broken code from being committed. If tests fail due to issues introduced by the code review fixes, iterate on the fixes until tests pass.
-
-
-### Job Context
-
-A workflow for preparing and committing code changes with quality checks.
-
-The **full** workflow starts with a code review to catch issues early, runs tests until
-they pass, formats and lints code with ruff, then reviews changed files
-before committing and pushing. The review and lint steps use sub-agents
-to reduce context usage.
-
-Steps:
-1. review - Code review for issues, DRY opportunities, naming, and test coverage (runs in sub-agent)
-2. test - Pull latest code and run tests until they pass
-3. lint - Format and lint code with ruff (runs in sub-agent)
-4. commit_and_push - Review changes and commit/push
-
-
-
-## Work Branch
-
-Use branch format: `deepwork/commit-[instance]-YYYYMMDD`
-
-- If on a matching work branch: continue using it
-- If on main/master: create new branch with `git checkout -b deepwork/commit-[instance]-$(date +%Y%m%d)`
-
-## Outputs
-
-**Required outputs**:
-- `tests_passing`
-
-## Guardrails
-
-- Do NOT skip prerequisite verification if this step has dependencies
-- Do NOT produce partial outputs; complete all required outputs before finishing
-- Do NOT proceed without required inputs; ask the user if any are missing
-- Do NOT modify files outside the scope of this step's defined outputs
-
-## Quality Validation
-
-**Before completing this step, you MUST have your work reviewed against the quality criteria below.**
-
-Use a sub-agent (Haiku model) to review your work against these criteria:
-
-**Criteria (all must be satisfied)**:
-1. Latest code was pulled from the branch
-2. All tests are passing
-**Review Process**:
-1. Once you believe your work is complete, spawn a sub-agent using Haiku to review your work against the quality criteria above
-2. The sub-agent should examine your outputs and verify each criterion is met
-3. If the sub-agent identifies valid issues, fix them
-4. Have the sub-agent review again until all valid feedback has been addressed
-5. Only mark the step complete when the sub-agent confirms all criteria are satisfied
-
-## On Completion
-
-1. Verify outputs are created
-2. Inform user: "full step 2/4 complete, outputs: tests_passing"
-3. **Continue workflow**: Use Skill tool to invoke `/commit.lint`
-
----
-
-**Reference files**: `.deepwork/jobs/commit/job.yml`, `.deepwork/jobs/commit/steps/test.md`
\ No newline at end of file
diff --git a/.claude/skills/commit/SKILL.md b/.claude/skills/commit/SKILL.md
deleted file mode 100644
index 3839ffbd..00000000
--- a/.claude/skills/commit/SKILL.md
+++ /dev/null
@@ -1,79 +0,0 @@
----
-name: commit
-description: "Reviews code, runs tests, lints, and commits changes. Use when ready to commit work with quality checks."
----
-
-# commit
-
-Reviews code, runs tests, lints, and commits changes. Use when ready to commit work with quality checks.
-
-> **CRITICAL**: Always invoke steps using the Skill tool. Never copy/paste step instructions directly.
-
-A workflow for preparing and committing code changes with quality checks.
-
-The **full** workflow starts with a code review to catch issues early, runs tests until
-they pass, formats and lints code with ruff, then reviews changed files
-before committing and pushing. The review and lint steps use sub-agents
-to reduce context usage.
-
-Steps:
-1. review - Code review for issues, DRY opportunities, naming, and test coverage (runs in sub-agent)
-2. test - Pull latest code and run tests until they pass
-3. lint - Format and lint code with ruff (runs in sub-agent)
-4. commit_and_push - Review changes and commit/push
-
-
-## Workflows
-
-### full
-
-Full commit workflow: review, test, lint, and commit
-
-**Steps in order**:
-1. **review** - Reviews changed code for issues, DRY opportunities, naming clarity, and test coverage using a sub-agent. Use as the first step before testing.
-2. **test** - Pulls latest code and runs tests until all pass. Use after code review passes to verify changes work correctly.
-3. **lint** - Formats and lints code with ruff using a sub-agent. Use after tests pass to ensure code style compliance.
-4. **commit_and_push** - Verifies changed files, creates commit, and pushes to remote. Use after linting passes to finalize changes.
-
-**Start workflow**: `/commit.review`
-
-
-## Execution Instructions
-
-### Step 1: Analyze Intent
-
-Parse any text following `/commit` to determine user intent:
-- "full" or related terms → start full workflow at `commit.review`
-
-### Step 2: Invoke Starting Step
-
-Use the Skill tool to invoke the identified starting step:
-```
-Skill tool: commit.review
-```
-
-### Step 3: Continue Workflow Automatically
-
-After each step completes:
-1. Check if there's a next step in the workflow sequence
-2. Invoke the next step using the Skill tool
-3. Repeat until workflow is complete or user intervenes
-
-**Note**: Standalone skills do not auto-continue to other steps.
-
-### Handling Ambiguous Intent
-
-If user intent is unclear, use AskUserQuestion to clarify:
-- Present available workflows and standalone skills as options
-- Let user select the starting point
-
-## Guardrails
-
-- Do NOT copy/paste step instructions directly; always use the Skill tool to invoke steps
-- Do NOT skip steps in a workflow unless the user explicitly requests it
-- Do NOT proceed to the next step if the current step's outputs are incomplete
-- Do NOT make assumptions about user intent; ask for clarification when ambiguous
-
-## Context Files
-
-- Job definition: `.deepwork/jobs/commit/job.yml`
\ No newline at end of file
diff --git a/.claude/skills/deepwork_jobs.define/SKILL.md b/.claude/skills/deepwork_jobs.define/SKILL.md
deleted file mode 100644
index f1469253..00000000
--- a/.claude/skills/deepwork_jobs.define/SKILL.md
+++ /dev/null
@@ -1,724 +0,0 @@
----
-name: deepwork_jobs.define
-description: "Creates a job.yml specification by gathering workflow requirements through structured questions. Use when starting a new multi-step workflow."
-user-invocable: false
-
----
-
-# deepwork_jobs.define
-
-**Step 1/3** in **new_job** workflow
-
-> Create a new DeepWork job from scratch through definition, review, and implementation
-
-> Creates and manages multi-step AI workflows. Use when defining, implementing, or improving DeepWork jobs.
-
-
-## Instructions
-
-**Goal**: Creates a job.yml specification by gathering workflow requirements through structured questions. Use when starting a new multi-step workflow.
-
-# Define Job Specification
-
-## Objective
-
-Create a `job.yml` specification file that defines the structure of a new DeepWork job by thoroughly understanding the user's workflow requirements through an interactive question-and-answer process.
-
-## Task
-
-Guide the user through defining a job specification by asking structured questions. **Do not attempt to create the specification without first fully understanding the user's needs.**
-
-**Important**: Use the AskUserQuestion tool to ask structured questions when gathering information from the user. This provides a better user experience with clear options and guided choices.
-
-The output of this step is **only** the `job.yml` file - a complete specification of the workflow. The actual step instruction files will be created in the next step (`implement`).
-
-### Step 1: Understand the Job Purpose
-
-Start by asking structured questions to understand what the user wants to accomplish:
-
-1. **What is the overall goal of this workflow?**
-   - What complex task are they trying to accomplish?
-   - What domain is this in? (e.g., research, marketing, development, reporting)
-   - How often will they run this workflow?
-
-2. **What does success look like?**
-   - What's the final deliverable or outcome?
-   - Who is the audience for the output?
-   - What quality criteria matter most?
-
-3. **What are the major phases?**
-   - Ask them to describe the workflow at a high level
-   - What are the distinct stages from start to finish?
-   - Are there any dependencies between phases?
-
-### Step 1.5: Detect Document-Oriented Workflows
-
-**Check for document-focused patterns** in the user's description:
-- Keywords: "report", "summary", "document", "create", "monthly", "quarterly", "for stakeholders", "for leadership"
-- Final deliverable is a specific document (e.g., "AWS spending report", "competitive analysis", "sprint summary")
-- Recurring documents with consistent structure
-
-**If a document-oriented workflow is detected:**
-
-1. Inform the user: "This workflow produces a specific document type. I recommend defining a doc spec first to ensure consistent quality."
-
-2. Ask structured questions to understand if they want to:
-   - Create a doc spec for this document
-   - Use an existing doc spec (if any exist in `.deepwork/doc_specs/`)
-   - Skip doc spec and proceed with simple outputs
-
-### Step 1.6: Define the Doc Spec (if needed)
-
-When creating a doc spec, gather the following information:
-
-1. **Document Identity**
-   - What is the document called? (e.g., "Monthly AWS Spending Report")
-   - Brief description of its purpose
-   - Where should these documents be stored? (path patterns like `finance/aws-reports/*.md`)
-
-2. **Audience and Context**
-   - Who reads this document? (target audience)
-   - How often is it produced? (frequency)
-
-3. **Quality Criteria** (3-5 criteria, each with name and description)
-
-   **Important**: Doc spec quality criteria define requirements for the **output document itself**, not the process of creating it. Focus on what the finished document must contain or achieve.
-
-   Examples for a spending report:
-   - **Visualization**: Must include charts showing spend breakdown by service
-   - **Variance Analysis**: Must compare current month against previous with percentages
-   - **Action Items**: Must include recommended cost optimization actions
-
-   **Note**: When a doc spec is created for a step's output, the step should generally NOT have separate `quality_criteria` in the job.yml. The doc spec's criteria cover output quality. Only add step-level quality_criteria if there are essential process requirements (e.g., "must use specific tool"), and minimize these when possible.
-
-4. **Document Structure**
-   - What sections should it have?
-   - Any required elements (tables, charts, summaries)?
-
-### Step 1.7: Create the doc spec File (if needed)
-
-Create the doc spec file at `.deepwork/doc_specs/[doc_spec_name].md`:
-
-**Template reference**: See `.deepwork/jobs/deepwork_jobs/templates/doc_spec.md.template` for the standard structure.
-
-**Complete example**: See `.deepwork/doc_specs/job_spec.md` for a fully worked example (the doc spec for job.yml files).
-
-After creating the doc spec, proceed to Step 2 with the doc spec reference for the final step's output.
-
-### Step 2: Define Each Step
-
-For each major phase they mentioned, ask structured questions to gather details:
-
-1. **Step Purpose**
-   - What exactly does this step accomplish?
-   - What is the input to this step?
-   - What is the output from this step?
-
-2. **Step Inputs**
-   - What information is needed to start this step?
-   - Does it need user-provided parameters? (e.g., topic, target audience)
-   - Does it need files from previous steps?
-   - What format should inputs be in?
-
-3. **Step Outputs**
-   - What files or artifacts does this step produce?
-   - What format should the output be in? (markdown, YAML, JSON, etc.)
-   - Where should each output be saved? (filename/path)
-   - Should outputs be organized in subdirectories? (e.g., `reports/`, `data/`, `drafts/`)
-   - Will other steps need this output?
-   - **Does this output have a doc spec?** If a doc spec was created in Step 1.6/1.7, reference it for the appropriate output
-
-   #### Work Product Storage Guidelines
-
-   **Key principle**: Job outputs belong in the main repository directory structure, not in dot-directories. The `.deepwork/` directory is for job definitions and configuration only.
-
-   **Why this matters**:
-   - **Version control**: Work products in the main repo are tracked by git and visible in PRs
-   - **Discoverability**: Team members can find outputs without knowing about DeepWork internals
-   - **Tooling compatibility**: IDEs, search tools, and CI/CD work naturally with standard paths
-   - **Glob patterns**: Well-structured paths enable powerful file matching (e.g., `competitive_research/**/*.md`)
-
-   **Good output path patterns**:
-   ```
-   competitive_research/competitors_list.md
-   competitive_research/acme_corp/research.md
-   operations/reports/2026-01/spending_analysis.md
-   docs/api/endpoints.md
-   ```
-
-   **Avoid these patterns**:
-   ```
-   .deepwork/outputs/report.md          # Hidden in dot-directory
-   output.md                            # Too generic, no context
-   research.md                          # Unclear which research
-   temp/draft.md                        # Transient-sounding paths
-   ```
-
-   **Organizing multi-file outputs**:
-   - Use the job name as a top-level folder when outputs are job-specific
-   - Use parameterized paths for per-entity outputs: `competitive_research/[competitor_name]/`
-   - Match existing project conventions when extending a codebase
-
-   **When to include dates in paths**:
-   - **Include date** for periodic outputs where each version is retained (e.g., monthly reports, quarterly reviews, weekly summaries). These accumulate over time and historical versions remain useful.
-     ```
-     operations/reports/2026-01/spending_analysis.md              # Monthly report - keep history
-     hr/employees/[employee_name]/quarterly_reviews/2026-Q1.pdf   # Per-employee quarterly review
-     ```
-   - **Omit date** for current-state outputs that represent the latest understanding and get updated in place. Previous versions live in git history, not separate files.
-     ```
-     competitive_research/acme_corp/swot.md  # Current SWOT - updated over time
-     docs/architecture/overview.md           # Living document
-     ```
-
-   **Supporting materials and intermediate outputs**:
-   - Content generated in earlier steps to support the final output (research notes, data extracts, drafts) should be placed in a `_dataroom` folder that is a peer to the final output
-   - Name the dataroom folder by replacing the file extension with `_dataroom`
-     ```
-     operations/reports/2026-01/spending_analysis.md           # Final output
-     operations/reports/2026-01/spending_analysis_dataroom/    # Supporting materials
-         raw_data.csv
-         vendor_breakdown.md
-         notes.md
-     ```
-   - This keeps supporting materials organized and discoverable without cluttering the main output location
-
-4. **Step Dependencies**
-   - Which previous steps must complete before this one?
-   - Are there any ordering constraints?
-
-5. **Step Process** (high-level understanding)
-   - What are the key activities in this step?
-   - Are there any quality checks or validation needed?
-   - What makes a good vs. bad output for this step?
-
-6. **Agent Delegation** (optional)
-   - Should this step be executed by a specific agent type?
-   - Use the `agent` field when the step should run in a forked context with a specific agent
-   - When `agent` is set, the generated skill automatically includes `context: fork`
-   - Available agent types:
-     - `general-purpose` - Standard agent for multi-step tasks
-
-   ```yaml
-   steps:
-     - id: research_step
-       agent: general-purpose  # Delegates to the general-purpose agent
-   ```
-
-**Note**: You're gathering this information to understand what instructions will be needed, but you won't create the instruction files yet - that happens in the `implement` step.
-
-#### Doc Spec-Aware Output Format
-
-When a step produces a document with a doc spec reference, use this format in job.yml:
-
-```yaml
-outputs:
-  - file: reports/monthly_spending.md
-    doc_spec: .deepwork/doc_specs/monthly_aws_report.md
-```
-
-The doc spec's quality criteria will automatically be included in the generated skill, ensuring consistent document quality.
-
-### Capability Considerations
-
-When defining steps, identify any that require specialized tools:
-
-**Browser Automation**: If any step involves web scraping, form filling, interactive browsing, UI testing, or research requiring website visits, ask the user what browser tools they have available. For Claude Code users, **Claude in Chrome** (Anthropic's browser extension) has been tested with DeepWork and is recommended for new users. Don't assume a default—confirm the tool before designing browser-dependent steps.
-
-### Step 3: Validate the Workflow
-
-After gathering information about all steps:
-
-1. **Review the flow**
-   - Summarize the complete workflow
-   - Show how outputs from one step feed into the next
-   - Ask if anything is missing
-
-2. **Check for gaps**
-   - Are there any steps where the input isn't clearly defined?
-   - Are there any outputs that aren't used by later steps?
-   - Are there circular dependencies?
-
-3. **Confirm details**
-   - Job name (lowercase, underscores, descriptive)
-   - Job summary (one clear sentence, max 200 chars)
-   - Job description (detailed multi-line explanation)
-   - Version number (start with 1.0.0)
-
-### Step 4: Define Quality Validation (Stop Hooks)
-
-For each step, consider whether it would benefit from **quality validation loops**. Stop hooks allow the AI agent to iteratively refine its work until quality criteria are met.
-
-**Ask structured questions about quality validation:**
-- "Are there specific quality criteria that must be met for this step?"
-- "Would you like the agent to validate its work before completing?"
-- "What would make you send the work back for revision?"
-
-**Stop hooks are particularly valuable for:**
-- Steps with complex outputs that need multiple checks
-- Steps where quality is critical (final deliverables)
-- Steps with subjective quality criteria that benefit from AI self-review
-
-**Three types of stop hooks are supported:**
-
-1. **Inline Prompt** (`prompt`) - Best for simple quality criteria
-   ```yaml
-   stop_hooks:
-     - prompt: |
-         Verify the output meets these criteria:
-         1. Contains at least 5 competitors
-         2. Each competitor has a description
-         3. Selection rationale is clear
-   ```
-
-2. **Prompt File** (`prompt_file`) - For detailed/reusable criteria
-   ```yaml
-   stop_hooks:
-     - prompt_file: hooks/quality_check.md
-   ```
-
-3. **Script** (`script`) - For programmatic validation (tests, linting)
-   ```yaml
-   stop_hooks:
-     - script: hooks/run_tests.sh
-   ```
-
-**Multiple hooks can be combined:**
-```yaml
-stop_hooks:
-  - script: hooks/lint_output.sh
-  - prompt: "Verify the content is comprehensive and well-organized"
-```
-
-**Encourage prompt-based hooks** - They leverage the AI's ability to understand context and make nuanced quality judgments. Script hooks are best for objective checks (syntax, format, tests).
-
-### Step 5: Create the Job Directory and Specification
-
-Only after you have complete understanding, create the job directory and `job.yml` file:
-
-**First, create the directory structure** using the `make_new_job.sh` script:
-
-```bash
-.deepwork/jobs/deepwork_jobs/make_new_job.sh [job_name]
-```
-
-This creates:
-- `.deepwork/jobs/[job_name]/` - Main job directory
-- `.deepwork/jobs/[job_name]/steps/` - For step instruction files
-- `.deepwork/jobs/[job_name]/hooks/` - For custom validation scripts
-- `.deepwork/jobs/[job_name]/templates/` - For example file formats
-- `.deepwork/jobs/[job_name]/AGENTS.md` - Job management guidance
-
-**Then create the job.yml file** at `.deepwork/jobs/[job_name]/job.yml`
-
-(Where `[job_name]` is the name of the NEW job you're creating, e.g., `competitive_research`)
-
-**Doc Spec**: See `.deepwork/doc_specs/job_spec.md` for the complete specification with quality criteria.
-
-**Template reference**: See `.deepwork/jobs/deepwork_jobs/templates/job.yml.template` for the standard structure.
-
-**Complete example**: See `.deepwork/jobs/deepwork_jobs/templates/job.yml.example` for a fully worked example.
-
-**Important**:
-- Use lowercase with underscores for job name and step IDs
-- Ensure file inputs reference steps in dependencies
-- No circular dependencies
-- At least one output per step
-- The `summary` should be concise (max 200 chars)
-- The `description` should provide rich context for future refinement
-- Include a `changelog` section with an initial entry for version 1.0.0
-
-## Example Dialog
-
-```
-Claude: I'll help you define a new DeepWork job. Let's start by understanding what you're trying to accomplish.
-
-What complex task or workflow are you trying to create? What's the overall goal?
-
-User: I want to create competitive research reports for my company
-
-Claude: Great! Let's dig into that. A few questions:
-
-1. What's the final deliverable? Is it a written report, a presentation, or something else?
-2. Who is the audience? Your team, executives, clients?
-3. How comprehensive should these reports be?
-
-User: It's a written report for our product team. Should be comprehensive - analyzing 3-5 competitors.
-
-Claude: Perfect. Now let's think about the major phases of creating this report. What are the main stages you go through? For example:
-- Identifying which competitors to analyze?
-- Gathering information about them?
-- Analyzing the data?
-- Writing the report?
-
-Or is your process different?
-
-User: Yes, that's close. I would say:
-1. Identify competitors
-2. Research each one
-3. Create comparison analysis
-4. Write positioning recommendations
-
-Claude: Excellent! Now let's define each step in detail. Starting with "Identify competitors":
-
-1. What inputs do you need to start? Do you already have a market segment in mind, or do you need to specify that?
-2. What should the output look like? A list of competitor names? More detailed?
-3. How many competitors should be identified?
-
-User: I need to specify the market segment and product category as inputs. The output should be a markdown file with 5-7 competitors and brief descriptions of each.
-
-Claude: Perfect. A few more questions about that output:
-1. What should the file be called? Something like `competitors_list.md`?
-2. Should it go in the project root, or in a subdirectory like `research/` or `data/`?
-
-User: Just call it `competitors_list.md` in the project root.
-
-Claude: Great. Now for the "Research each one" step:
-1. Does it use the competitor list from the first step as input?
-2. What kind of research - product features, pricing, marketing, all of the above?
-3. What format should the research output be in?
-4. Where should that research be saved?
-
-[continues for each step...]
-
-[After gathering all information:]
-
-Claude: Let me summarize the workflow I've designed based on our conversation:
-
-**Job: competitive_research**
-Summary: Systematic competitive analysis workflow for product positioning
-
-Description:
-A comprehensive workflow for analyzing competitors in your market segment. This job helps product teams understand the competitive landscape by systematically identifying competitors, researching their offerings, creating comparison matrices, and developing strategic positioning recommendations.
-
-The workflow produces:
-- A vetted list of key competitors
-- Detailed research notes on each competitor
-- A comparison matrix highlighting key differentiators
-- Strategic positioning recommendations
-
-Designed for product teams conducting quarterly competitive analysis.
-
-**Steps:**
-
-Step 1: identify_competitors
-- Input: market_segment (user), product_category (user)
-- Output: competitors_list.md (5-7 competitors with descriptions)
-
-Step 2: research_competitors
-- Input: competitors_list.md (from step 1)
-- Output: research_notes.md (detailed findings for each competitor)
-- Depends on: identify_competitors
-
-Step 3: comparative_analysis
-- Input: research_notes.md (from step 2)
-- Output: comparison_matrix.md (side-by-side analysis)
-- Depends on: research_competitors
-
-Step 4: positioning_recommendations
-- Input: comparison_matrix.md (from step 3)
-- Output: positioning_report.md (strategic recommendations)
-- Depends on: comparative_analysis
-
-Does this accurately capture your workflow? Any changes needed?
-
-User: Yes, that's perfect!
-
-Claude: Great! Creating the job.yml specification now...
-
-[Creates .deepwork/jobs/competitive_research/job.yml with the complete spec]
-
-✓ Job specification created!
-
-**File created:**
-- .deepwork/jobs/competitive_research/job.yml
-
-**Next step:**
-Run `/deepwork_jobs.review_job_spec` to validate the specification against quality criteria.
-```
-
-## Important Guidelines
-
-1. **Focus on specification only** - Don't create instruction files yet
-2. **Ask structured questions** - Never skip the discovery phase; use the AskUserQuestion tool
-3. **Rich context in description** - This helps with future refinement
-4. **Validate understanding** - Summarize and confirm before creating
-5. **Use examples** - Help users understand what good specifications look like
-6. **Understand file organization** - Always ask structured questions about where outputs should be saved and if subdirectories are needed
-
-## Validation Rules
-
-Before creating the job.yml, ensure:
-- Job name: lowercase, underscores, no spaces
-- Version: semantic versioning (1.0.0)
-- Summary: concise, under 200 characters
-- Description: detailed, provides context
-- Step IDs: unique, descriptive, lowercase with underscores
-- Dependencies: must reference existing step IDs
-- File inputs: `from_step` must be in dependencies
-- At least one output per step
-- Outputs can be filenames (e.g., `report.md`) or paths (e.g., `reports/analysis.md`)
-- File paths in outputs should match where files will actually be created
-- No circular dependencies
-
-## Output Format
-
-### job.yml
-
-The complete YAML specification file (example shown in Step 5 above).
-
-**Location**: `.deepwork/jobs/[job_name]/job.yml`
-
-(Where `[job_name]` is the name of the new job being created)
-
-After creating the file:
-1. Inform the user that the specification is complete
-2. Recommend that they review the job.yml file
-3. Tell them to run `/deepwork_jobs.review_job_spec` next
-
-
-
-### Job Context
-
-Core commands for managing DeepWork jobs. These commands help you define new multi-step
-workflows and learn from running them.
-
-The `new_job` workflow guides you through defining and implementing a new job by
-asking structured questions about your workflow, understanding each step's inputs and outputs,
-reviewing the specification, and generating all necessary files.
-
-The `learn` skill reflects on conversations where DeepWork jobs were run, identifies
-confusion or inefficiencies, and improves job instructions. It also captures bespoke
-learnings specific to the current run into AGENTS.md files in the working folder.
-
-
-## Required Inputs
-
-**User Parameters** - Gather from user before starting:
-- **job_purpose**: What complex task or workflow are you trying to accomplish?
-
-
-## Work Branch
-
-Use branch format: `deepwork/deepwork_jobs-[instance]-YYYYMMDD`
-
-- If on a matching work branch: continue using it
-- If on main/master: create new branch with `git checkout -b deepwork/deepwork_jobs-[instance]-$(date +%Y%m%d)`
-
-## Outputs
-
-**Required outputs**:
-- `job.yml`
-  **Doc Spec**: DeepWork Job Specification
-  > YAML specification file that defines a multi-step workflow job for AI agents
-  **Definition**: `.deepwork/doc_specs/job_spec.md`
-  **Target Audience**: AI agents executing jobs and developers defining workflows
-  **Quality Criteria**:
-  1. **Valid Identifier**: Job name must be lowercase with underscores, no spaces or special characters (e.g., `competitive_research`, `monthly_report`)
-  2. **Semantic Version**: Version must follow semantic versioning format X.Y.Z (e.g., `1.0.0`, `2.1.3`)
-  3. **Concise Summary**: Summary must be under 200 characters and clearly describe what the job accomplishes
-  4. **Rich Description**: Description must be multi-line and explain: the problem solved, the process, expected outcomes, and target users
-  5. **Changelog Present**: Must include a changelog array with at least the initial version entry. Changelog should only include one entry per branch at most
-  6. **Complete Steps**: Each step must have: id (lowercase_underscores), name, description, instructions_file, outputs (at least one), and dependencies array
-  7. **Valid Dependencies**: Dependencies must reference existing step IDs with no circular references
-  8. **Input Consistency**: File inputs with `from_step` must reference a step that is in the dependencies array
-  9. **Output Paths**: Outputs must be valid filenames or paths within the main repo directory structure, never in dot-directories like `.deepwork/`. Use specific, descriptive paths that lend themselves to glob patterns (e.g., `competitive_research/acme_corp/swot.md` or `operations/reports/2026-01/spending_analysis.md`). Parameterized paths like `[competitor_name]/` are encouraged for per-entity outputs. Avoid generic names (`output.md`, `analysis.md`) and transient-sounding paths (`temp/`, `draft.md`). Supporting materials for a final output should go in a peer `_dataroom` folder (e.g., `spending_analysis_dataroom/`).
-  10. **Concise Instructions**: The content of the file, particularly the description, must not have excessively redundant information. It should be concise and to the point given that extra tokens will confuse the AI.
-
-  <details>
-  <summary>Example Document Structure</summary>
-
-  ```markdown
-  # DeepWork Job Specification: [job_name]
-
-  A `job.yml` file defines a complete multi-step workflow that AI agents can execute. Each job breaks down a complex task into reviewable steps with clear inputs and outputs.
-
-  ## Required Fields
-
-  ### Top-Level Metadata
-
-  ```yaml
-  name: job_name                    # lowercase, underscores only
-  version: "1.0.0"                  # semantic versioning
-  summary: "Brief description"      # max 200 characters
-  description: |                    # detailed multi-line explanation
-    [Explain what this workflow does, why it exists,
-    what outputs it produces, and who should use it]
-  ```
-
-  ### Changelog
-
-  ```yaml
-  changelog:
-    - version: "1.0.0"
-      changes: "Initial job creation"
-    - version: "1.1.0"
-      changes: "Added quality validation hooks"
-  ```
-
-  ### Steps Array
-
-  ```yaml
-  steps:
-    - id: step_id                   # unique, lowercase_underscores
-      name: "Human Readable Name"
-      description: "What this step accomplishes"
-      instructions_file: steps/step_id.md
-      inputs:
-        # User-provided inputs:
-        - name: param_name
-          description: "What the user provides"
-        # File inputs from previous steps:
-        - file: output.md
-          from_step: previous_step_id
-      outputs:
-        - competitive_research/competitors_list.md           # descriptive path
-        - competitive_research/[competitor_name]/research.md # parameterized path
-        # With doc spec reference:
-        - file: competitive_research/final_report.md
-          doc_spec: .deepwork/doc_specs/report_type.md
-      dependencies:
-        - previous_step_id          # steps that must complete first
-  ```
-
-  ## Optional Fields
-
-  ### Exposed Steps
-
-  ```yaml
-  steps:
-    - id: learn
-      exposed: true                 # Makes step available without running dependencies
-  ```
-
-  ### Agent Delegation
-
-  When a step should be executed by a specific agent type, use the `agent` field. This automatically sets `context: fork` in the generated skill.
-
-  ```yaml
-  steps:
-    - id: research_step
-      agent: general-purpose        # Delegates to the general-purpose agent
-  ```
-
-  Available agent types:
-  - `general-purpose` - Standard agent for multi-step tasks
-
-  ### Quality Hooks
-
-  ```yaml
-  steps:
-    - id: step_id
-      hooks:
-        after_agent:
-          # Inline prompt for quality validation:
-          - prompt: |
-              Verify the output meets criteria:
-              1. [Criterion 1]
-              2. [Criterion 2]
-              If ALL criteria are met, include `<promise>...</promise>`.
-          # External prompt file:
-          - prompt_file: hooks/quality_check.md
-          # Script for programmatic validation:
-          - script: hooks/run_tests.sh
-  ```
-
-  ### Stop Hooks (Legacy)
-
-  ```yaml
-  steps:
-    - id: step_id
-      stop_hooks:
-        - prompt: "Validation prompt..."
-        - prompt_file: hooks/check.md
-        - script: hooks/validate.sh
-  ```
-
-  ## Validation Rules
-
-  1. **No circular dependencies**: Step A cannot depend on Step B if Step B depends on Step A
-  2. **File inputs require dependencies**: If a step uses `from_step: X`, then X must be in its dependencies
-  3. **Unique step IDs**: No two steps can have the same id
-  4. **Valid file paths**: Output paths must not contain invalid characters and should be in the main repo (not dot-directories)
-  5. **Instructions files exist**: Each `instructions_file` path should have a corresponding file created
-
-  ## Example: Complete Job Specification
-
-  ```yaml
-  name: competitive_research
-  version: "1.0.0"
-  summary: "Systematic competitive analysis workflow"
-  description: |
-    A comprehensive workflow for analyzing competitors in your market segment.
-    Helps product teams understand the competitive landscape through systematic
-    identification, research, comparison, and positioning recommendations.
-
-    Produces:
-    - Vetted competitor list
-    - Research notes per competitor
-    - Comparison matrix
-    - Strategic positioning report
-
-  changelog:
-    - version: "1.0.0"
-      changes: "Initial job creation"
-
-  steps:
-    - id: identify_competitors
-      name: "Identify Competitors"
-      description: "Identify 5-7 key competitors in the target market"
-      instructions_file: steps/identify_competitors.md
-      inputs:
-        - name: market_segment
-          description: "The market segment to analyze"
-        - name: product_category
-          description: "The product category"
-      outputs:
-        - competitive_research/competitors_list.md
-      dependencies: []
-
-    - id: research_competitors
-      name: "Research Competitors"
-      description: "Deep dive research on each identified competitor"
-      instructions_file: steps/research_competitors.md
-      inputs:
-        - file: competitive_research/competitors_list.md
-          from_step: identify_competitors
-      outputs:
-        - competitive_research/[competitor_name]/research.md
-      dependencies:
-        - identify_competitors
-
-    - id: positioning_report
-      name: "Positioning Report"
-      description: "Strategic positioning recommendations"
-      instructions_file: steps/positioning_report.md
-      inputs:
-        - file: competitive_research/[competitor_name]/research.md
-          from_step: research_competitors
-      outputs:
-        - file: competitive_research/positioning_report.md
-          doc_spec: .deepwork/doc_specs/positioning_report.md
-      dependencies:
-        - research_competitors
-  ```
-  ```
-
-  </details>
-
-## Guardrails
-
-- Do NOT skip prerequisite verification if this step has dependencies
-- Do NOT produce partial outputs; complete all required outputs before finishing
-- Do NOT proceed without required inputs; ask the user if any are missing
-- Do NOT modify files outside the scope of this step's defined outputs
-
-## On Completion
-
-1. Verify outputs are created
-2. Inform user: "new_job step 1/3 complete, outputs: job.yml"
-3. **Continue workflow**: Use Skill tool to invoke `/deepwork_jobs.review_job_spec`
-
----
-
-**Reference files**: `.deepwork/jobs/deepwork_jobs/job.yml`, `.deepwork/jobs/deepwork_jobs/steps/define.md`
\ No newline at end of file
diff --git a/.claude/skills/deepwork_jobs.implement/SKILL.md b/.claude/skills/deepwork_jobs.implement/SKILL.md
deleted file mode 100644
index f5494ae7..00000000
--- a/.claude/skills/deepwork_jobs.implement/SKILL.md
+++ /dev/null
@@ -1,267 +0,0 @@
----
-name: deepwork_jobs.implement
-description: "Generates step instruction files and syncs slash commands from the job.yml specification. Use after job spec review passes."
-user-invocable: false
-
----
-
-# deepwork_jobs.implement
-
-**Step 3/3** in **new_job** workflow
-
-> Create a new DeepWork job from scratch through definition, review, and implementation
-
-> Creates and manages multi-step AI workflows. Use when defining, implementing, or improving DeepWork jobs.
-
-## Prerequisites (Verify First)
-
-Before proceeding, confirm these steps are complete:
-- `/deepwork_jobs.review_job_spec`
-
-## Instructions
-
-**Goal**: Generates step instruction files and syncs slash commands from the job.yml specification. Use after job spec review passes.
-
-# Implement Job Steps
-
-## Objective
-
-Generate the DeepWork job directory structure and instruction files for each step based on the validated `job.yml` specification from the review_job_spec step.
-
-## Task
-
-Read the `job.yml` specification file and create all the necessary files to make the job functional, including directory structure and step instruction files. Then sync the commands to make them available.
-
-### Step 1: Create Directory Structure Using Script
-
-Run the `make_new_job.sh` script to create the standard directory structure:
-
-```bash
-.deepwork/jobs/deepwork_jobs/make_new_job.sh [job_name]
-```
-
-This creates:
-- `.deepwork/jobs/[job_name]/` - Main job directory
-- `.deepwork/jobs/[job_name]/steps/` - Step instruction files
-- `.deepwork/jobs/[job_name]/hooks/` - Custom validation scripts (with .gitkeep)
-- `.deepwork/jobs/[job_name]/templates/` - Example file formats (with .gitkeep)
-- `.deepwork/jobs/[job_name]/AGENTS.md` - Job management guidance
-
-**Note**: If the directory already exists (e.g., job.yml was created by define step), you can skip this step or manually create the additional directories:
-```bash
-mkdir -p .deepwork/jobs/[job_name]/hooks .deepwork/jobs/[job_name]/templates
-touch .deepwork/jobs/[job_name]/hooks/.gitkeep .deepwork/jobs/[job_name]/templates/.gitkeep
-```
-
-### Step 2: Read and Validate the Specification
-
-1. **Locate the job.yml file**
-   - Read `.deepwork/jobs/[job_name]/job.yml` from the review_job_spec step
-   - Parse the YAML content
-
-2. **Validate the specification**
-   - Ensure it follows the schema (name, version, summary, description, steps)
-   - Check that all dependencies reference existing steps
-   - Verify no circular dependencies
-   - Confirm file inputs match dependencies
-
-3. **Extract key information**
-   - Job name, version, summary, description
-   - List of all steps with their details
-   - Understand the workflow structure
-
-### Step 3: Generate Step Instruction Files
-
-For each step in the job.yml, create a comprehensive instruction file at `.deepwork/jobs/[job_name]/steps/[step_id].md`.
-
-**Template reference**: See `.deepwork/jobs/deepwork_jobs/templates/step_instruction.md.template` for the standard structure.
-
-**Complete example**: See `.deepwork/jobs/deepwork_jobs/templates/step_instruction.md.example` for a fully worked example.
-
-**Available templates in `.deepwork/jobs/deepwork_jobs/templates/`:**
-- `job.yml.template` - Job specification structure
-- `step_instruction.md.template` - Step instruction file structure
-- `agents.md.template` - AGENTS.md file structure
-- `job.yml.example` - Complete job specification example
-- `step_instruction.md.example` - Complete step instruction example
-
-**Guidelines for generating instructions:**
-
-1. **Use the job description** - The detailed description from job.yml provides crucial context
-2. **Be specific** - Don't write generic instructions; tailor them to the step's purpose
-3. **Provide examples** - Show what good output looks like
-4. **Explain the "why"** - Help the user understand the step's role in the workflow
-5. **Quality over quantity** - Detailed, actionable instructions are better than vague ones
-6. **Align with stop hooks** - If the step has `stop_hooks` defined, ensure the quality criteria in the instruction file match the validation criteria in the hooks
-7. **Ask structured questions** - When a step has user inputs, the instructions MUST explicitly tell the agent to "ask structured questions" using the AskUserQuestion tool to gather that information. Never use generic phrasing like "ask the user" - always use "ask structured questions"
-
-### Handling Stop Hooks
-
-If a step in the job.yml has `stop_hooks` defined, the generated instruction file should:
-
-1. **Mirror the quality criteria** - The "Quality Criteria" section should match what the stop hooks will validate
-2. **Be explicit about success** - Help the agent understand when the step is truly complete
-3. **Include the promise pattern** - Mention that `<promise>✓ Quality Criteria Met</promise>` should be included when criteria are met
-
-**Example: If the job.yml has:**
-```yaml
-- id: research_competitors
-  name: "Research Competitors"
-  stop_hooks:
-    - prompt: |
-        Verify the research meets criteria:
-        1. Each competitor has at least 3 data points
-        2. Sources are cited
-        3. Information is current (within last year)
-```
-
-**The instruction file should include:**
-```markdown
-## Quality Criteria
-
-- Each competitor has at least 3 distinct data points
-- All information is sourced with citations
-- Data is current (from within the last year)
-- When all criteria are met, include `<promise>✓ Quality Criteria Met</promise>` in your response
-```
-
-This alignment ensures the AI agent knows exactly what will be validated and can self-check before completing.
-
-### Using Supplementary Reference Files
-
-Step instructions can include additional `.md` files in the `steps/` directory for detailed examples, templates, or reference material. Reference them using the full path from the project root.
-
-See `.deepwork/jobs/deepwork_jobs/steps/supplemental_file_references.md` for detailed documentation and examples.
-
-### Step 4: Verify job.yml Location
-
-Verify that `job.yml` is in the correct location at `.deepwork/jobs/[job_name]/job.yml`. The define and review_job_spec steps should have created and validated it. If for some reason it's not there, you may need to create or move it.
-
-### Step 5: Sync Skills
-
-Run `deepwork sync` to generate the skills for this job:
-
-```bash
-deepwork sync
-```
-
-This will:
-- Parse the job definition
-- Generate skills for each step
-- Make the skills available in `.claude/skills/` (or appropriate platform directory)
-
-## Example Implementation
-
-For a complete worked example showing a job.yml and corresponding step instruction file, see:
-- **Job specification**: `.deepwork/jobs/deepwork_jobs/templates/job.yml.example`
-- **Step instruction**: `.deepwork/jobs/deepwork_jobs/templates/step_instruction.md.example`
-
-## Important Guidelines
-
-1. **Read the spec carefully** - Understand the job's intent from the description
-2. **Generate complete instructions** - Don't create placeholder or stub files
-3. **Maintain consistency** - Use the same structure for all step instruction files
-4. **Provide examples** - Show what good output looks like
-5. **Use context** - The job description provides valuable context for each step
-6. **Be specific** - Tailor instructions to the specific step, not generic advice
-
-## Validation Before Sync
-
-Before running `deepwork sync`, verify:
-- All directories exist
-- `job.yml` is in place
-- All step instruction files exist (one per step)
-- No file system errors
-
-## Completion Checklist
-
-Before marking this step complete, ensure:
-- [ ] job.yml validated and copied to job directory
-- [ ] All step instruction files created
-- [ ] Each instruction file is complete and actionable
-- [ ] `deepwork sync` executed successfully
-- [ ] Skills generated in platform directory
-
-## Quality Criteria
-
-- Job directory structure is correct
-- All instruction files are complete (not stubs)
-- Instructions are specific and actionable
-- Output examples are provided in each instruction file
-- Quality criteria defined for each step
-- Steps with user inputs explicitly use "ask structured questions" phrasing
-- Sync completed successfully
-- Skills available for use
-
-
-### Job Context
-
-Core commands for managing DeepWork jobs. These commands help you define new multi-step
-workflows and learn from running them.
-
-The `new_job` workflow guides you through defining and implementing a new job by
-asking structured questions about your workflow, understanding each step's inputs and outputs,
-reviewing the specification, and generating all necessary files.
-
-The `learn` skill reflects on conversations where DeepWork jobs were run, identifies
-confusion or inefficiencies, and improves job instructions. It also captures bespoke
-learnings specific to the current run into AGENTS.md files in the working folder.
-
-
-## Required Inputs
-
-
-**Files from Previous Steps** - Read these first:
-- `job.yml` (from `review_job_spec`)
-
-## Work Branch
-
-Use branch format: `deepwork/deepwork_jobs-[instance]-YYYYMMDD`
-
-- If on a matching work branch: continue using it
-- If on main/master: create new branch with `git checkout -b deepwork/deepwork_jobs-[instance]-$(date +%Y%m%d)`
-
-## Outputs
-
-**Required outputs**:
-- `steps/` (directory)
-
-## Guardrails
-
-- Do NOT skip prerequisite verification if this step has dependencies
-- Do NOT produce partial outputs; complete all required outputs before finishing
-- Do NOT proceed without required inputs; ask the user if any are missing
-- Do NOT modify files outside the scope of this step's defined outputs
-
-## Quality Validation
-
-**Before completing this step, you MUST have your work reviewed against the quality criteria below.**
-
-Use a sub-agent (Haiku model) to review your work against these criteria:
-
-**Criteria (all must be satisfied)**:
-1. **Directory Structure**: Is `.deepwork/jobs/[job_name]/` created correctly?
-2. **Complete Instructions**: Are ALL step instruction files complete (not stubs or placeholders)?
-3. **Specific & Actionable**: Are instructions tailored to each step's purpose, not generic?
-4. **Output Examples**: Does each instruction file show what good output looks like?
-5. **Quality Criteria**: Does each instruction file define quality criteria for its outputs?
-6. **Ask Structured Questions**: Do step instructions that gather user input explicitly use the phrase "ask structured questions"?
-7. **Sync Complete**: Has `deepwork sync` been run successfully?
-8. **Commands Available**: Are the slash-commands generated in `.claude/commands/`?
-9. **Rules Considered**: Has the agent thought about whether rules would benefit this job? If relevant rules were identified, did they explain them and offer to run `/deepwork_rules.define`? Not every job needs rules - only suggest when genuinely helpful.
-**Review Process**:
-1. Once you believe your work is complete, spawn a sub-agent using Haiku to review your work against the quality criteria above
-2. The sub-agent should examine your outputs and verify each criterion is met
-3. If the sub-agent identifies valid issues, fix them
-4. Have the sub-agent review again until all valid feedback has been addressed
-5. Only mark the step complete when the sub-agent confirms all criteria are satisfied
-
-## On Completion
-
-1. Verify outputs are created
-2. Inform user: "new_job step 3/3 complete, outputs: steps/"
-3. **new_job workflow complete**: All steps finished. Consider creating a PR to merge the work branch.
-
----
-
-**Reference files**: `.deepwork/jobs/deepwork_jobs/job.yml`, `.deepwork/jobs/deepwork_jobs/steps/implement.md`
\ No newline at end of file
diff --git a/.claude/skills/deepwork_jobs.learn/SKILL.md b/.claude/skills/deepwork_jobs.learn/SKILL.md
deleted file mode 100644
index 95e1c61f..00000000
--- a/.claude/skills/deepwork_jobs.learn/SKILL.md
+++ /dev/null
@@ -1,449 +0,0 @@
----
-name: deepwork_jobs.learn
-description: "Analyzes conversation history to improve job instructions and capture learnings. Use after running a job to refine it."
-
----
-
-# deepwork_jobs.learn
-
-**Standalone skill** - can be run anytime
-
-> Creates and manages multi-step AI workflows. Use when defining, implementing, or improving DeepWork jobs.
-
-
-## Instructions
-
-**Goal**: Analyzes conversation history to improve job instructions and capture learnings. Use after running a job to refine it.
-
-# Learn from Job Execution
-
-## Objective
-
-Think deeply about this task. Reflect on the current conversation to identify learnings from DeepWork job executions, improve job instructions with generalizable insights, and capture bespoke (run-specific) learnings in AGENTS.md files in the deepest common folder that would contain all work on the topic in the future.
-
-## Task
-
-Analyze the conversation history to extract learnings and improvements, then apply them appropriately:
-- **Generalizable learnings** → Update job instruction files
-- **Bespoke learnings** (specific to this run) → Add to AGENTS.md in the deepest common folder for the topic
-
-### Step 1: Analyze Conversation for Job Executions
-
-1. **Scan the conversation** for DeepWork slash commands that were run
-   - Look for patterns like `/job_name.step_id`
-   - Identify which jobs and steps were executed
-   - Note the order of execution
-
-2. **Identify the target folder**
-   - This should be the deepest common folder that would contain all work on the topic in the future
-   - Should be clear from conversation history where work was done
-   - If unclear, run `git diff` to see where changes were made on the branch
-
-3. **If no job was specified**, ask the user:
-   - "Which DeepWork job would you like me to learn from?"
-   - List available jobs from `.deepwork/jobs/`
-
-### Step 2: Identify Points of Confusion and Inefficiency
-
-Review the conversation for:
-
-1. **Confusion signals**
-   - Questions the agent asked that shouldn't have been necessary
-   - Misunderstandings about what a step required
-   - Incorrect outputs that needed correction
-   - Ambiguous instructions that led to wrong interpretations
-
-2. **Inefficiency signals**
-   - Extra steps or iterations that were needed
-   - Information that had to be repeated
-   - Context that was missing from instructions
-   - Dependencies that weren't clear
-
-3. **Error patterns**
-   - Failed validations and why they failed
-   - Quality criteria that were misunderstood
-   - Edge cases that weren't handled
-
-4. **Success patterns**
-   - What worked particularly well
-   - Efficient approaches worth preserving
-   - Good examples that could be added to instructions
-
-### Step 3: Classify Learnings
-
-For each learning identified, determine if it is:
-
-**Generalizable** (should improve instructions):
-- Would help ANY future run of this job
-- Addresses unclear or missing guidance
-- Fixes incorrect assumptions in instructions
-- Adds helpful examples or context
-- Examples:
-  - "Step instructions should mention that X format is required"
-  - "Quality criteria should include checking for Y"
-  - "Add example of correct output format"
-
-**doc spec-Related** (should improve doc spec files):
-- Improvements to document quality criteria
-- Changes to document structure or format
-- Updated audience or frequency information
-- Examples:
-  - "The report should include a summary table"
-  - "Quality criterion 'Visualization' needs clearer requirements"
-  - "Documents need a section for action items"
-
-**Bespoke** (should go in AGENTS.md):
-- Specific to THIS project/codebase/run
-- Depends on local conventions or structure
-- References specific files or paths
-- Would not apply to other uses of this job
-- Examples:
-  - "In this codebase, API endpoints are in `src/api/`"
-  - "This project uses camelCase for function names"
-  - "The main config file is at `config/settings.yml`"
-
-### Step 3.5: Identify doc spec-Related Learnings
-
-Review the conversation for doc spec-related improvements:
-
-1. **Quality Criteria Changes**
-   - Were any quality criteria unclear or insufficient?
-   - Did the agent repeatedly fail certain criteria?
-   - Are there new criteria that should be added?
-
-2. **Document Structure Changes**
-   - Did the user request different sections?
-   - Were parts of the document format confusing?
-   - Should the example document be updated?
-
-3. **Metadata Updates**
-   - Has the target audience changed?
-   - Should frequency or path patterns be updated?
-
-**Signals for doc spec improvements:**
-- User asked for changes to document format
-- Repeated validation failures on specific criteria
-- Feedback about missing sections or information
-- Changes to how documents are organized/stored
-
-### Step 4: Update Job Instructions (Generalizable Learnings)
-
-For each generalizable learning:
-
-1. **Locate the instruction file**
-   - Path: `.deepwork/jobs/[job_name]/steps/[step_id].md`
-
-2. **Make targeted improvements**
-   - Add missing context or clarification
-   - Include helpful examples
-   - Clarify ambiguous instructions
-   - Update quality criteria if needed
-
-3. **Keep instructions concise**
-   - Avoid redundancy - don't repeat the same guidance in multiple places
-   - Be direct - remove verbose explanations that don't add value
-   - Prefer bullet points over paragraphs where appropriate
-
-4. **Preserve instruction structure**
-   - Keep existing sections (Objective, Task, Process, Output Format, Quality Criteria)
-   - Add to appropriate sections rather than restructuring
-   - Maintain consistency with other steps
-
-5. **Track changes for changelog**
-   - Note what was changed and why
-   - Prepare changelog entry for job.yml
-
-### Step 4b: Extract Shared Content into Referenced Files
-
-Review all instruction files for the job and identify content that:
-- Appears in multiple step instructions (duplicated)
-- Is lengthy and could be extracted for clarity
-- Would benefit from being maintained in one place
-
-**Extract to shared files:**
-
-1. **Create shared files** in `.deepwork/jobs/[job_name]/steps/shared/`
-   - `conventions.md` - Coding/formatting conventions used across steps
-   - `examples.md` - Common examples referenced by multiple steps
-   - `schemas.md` - Data structures or formats used throughout
-
-2. **Reference from instructions** using markdown includes or explicit references:
-   ```markdown
-   ## Conventions
-
-   Follow the conventions defined in `shared/conventions.md`.
-   ```
-
-3. **Benefits of extraction:**
-   - Single source of truth - update once, applies everywhere
-   - Shorter instruction files - easier to read and maintain
-   - Consistent guidance across steps
-
-### Step 4.5: Update doc spec Files (doc spec-Related Learnings)
-
-If doc spec-related learnings were identified:
-
-1. **Locate the doc spec file**
-   - Find doc spec references in job.yml outputs (look for `doc_spec: .deepwork/doc_specs/[doc_spec_name].md`)
-   - doc spec files are at `.deepwork/doc_specs/[doc_spec_name].md`
-
-2. **Update quality_criteria array**
-   - Add new criteria with name and description
-   - Modify existing criteria descriptions for clarity
-   - Remove criteria that are no longer relevant
-
-3. **Update example document**
-   - Modify the markdown body to reflect structure changes
-   - Ensure the example matches updated criteria
-
-4. **Update metadata as needed**
-   - target_audience: If audience has changed
-   - frequency: If production cadence has changed
-   - path_patterns: If storage location has changed
-
-**Example doc spec update:**
-```yaml
-# Before
-quality_criteria:
-  - name: Visualization
-    description: Include charts
-
-# After
-quality_criteria:
-  - name: Visualization
-    description: Include Mermaid.js charts showing spend breakdown by service and month-over-month trend
-```
-
-### Step 5: Create/Update AGENTS.md (Bespoke Learnings)
-
-The AGENTS.md file captures project-specific knowledge that helps future agent runs.
-
-1. **Determine the correct location**
-   - Place AGENTS.md in the deepest common folder that would contain all work on the topic in the future
-   - This ensures the knowledge is available when working in that context
-   - If uncertain, place at the project root
-
-2. **Use file references where possible**
-   - Instead of duplicating information, reference source files
-   - This keeps AGENTS.md in sync as the codebase evolves
-   - Pattern: "See `path/to/file.ext` for [description]"
-
-3. **AGENTS.md structure**: See `.deepwork/jobs/deepwork_jobs/templates/agents.md.template` for the standard format.
-
-4. **Writing entries**
-   - Be concise but specific
-   - Always prefer file references over inline content
-   - Use line numbers when referencing specific code: `file.ext:42`
-   - Group related learnings together
-
-### Step 6: Update Job Version and Changelog
-
-If instruction files were modified:
-
-1. **Bump version in job.yml**
-   - Patch version (0.0.x) for instruction improvements
-   - Minor version (0.x.0) if quality criteria changed
-
-2. **Add changelog entry**
-   ```yaml
-   - version: "[new_version]"
-     changes: "Improved [step] instructions based on execution learnings: [brief description]"
-   ```
-
-### Step 7: Sync Skills
-
-**Run deepwork sync** (if instructions were modified)
-```bash
-deepwork sync
-```
-
-## File Reference Patterns
-
-When adding entries to AGENTS.md, prefer these patterns:
-
-**Good (references)**:
-```markdown
-- API endpoints follow REST conventions. See `src/api/routes.ts` for examples.
-- Error handling pattern: See `src/utils/errors.ts:15-30`
-- Configuration schema: Defined in `config/schema.json`
-```
-
-**Avoid (duplicating)**:
-```markdown
-- API endpoints should return JSON with this format: { status: ..., data: ... }
-- Error handling should use try/catch with custom error classes
-- Configuration requires these fields: name, version, ...
-```
-
-## Quality Criteria
-
-- Conversation has been analyzed for job executions
-- Points of confusion and inefficiency are identified
-- Learnings are correctly classified (generalizable vs bespoke)
-- Job instructions updated for generalizable improvements
-- Instructions are concise - no redundancy or unnecessary verbosity
-- Shared/lengthy content extracted into referenced files where appropriate
-- AGENTS.md created/updated with bespoke learnings
-- File references used instead of duplicating content
-- AGENTS.md is in the correct folder (the deepest common folder for the topic)
-- When all criteria are met, include `<promise>✓ Quality Criteria Met</promise>`
-
-## Example Dialog
-
-```
-User: /deepwork_jobs.learn
-
-Claude: I'll analyze this conversation for learnings from DeepWork job executions.
-
-**Conversation Analysis**
-
-I found the following job executions:
-1. `/competitive_research.identify_competitors` - Completed
-2. `/competitive_research.research_competitors` - Completed with 2 iterations
-3. `/competitive_research.comparative_analysis` - Completed
-
-**Identified Issues**
-
-1. **Confusion in step 2**: The instructions didn't specify that competitor research should include pricing information. This caused an extra iteration.
-
-2. **Missing context**: The agent had to ask about the industry focus, which should have been carried forward from step 1.
-
-**Classification**
-
-- Generalizable: Add pricing to research requirements in instructions
-- Bespoke: This project focuses on B2B SaaS in the HR tech space
-
-**Actions Taken**
-
-1. Updated `.deepwork/jobs/competitive_research/steps/research_competitors.md`:
-   - Added "Include pricing information (tiers, models, approximate ranges)" to research requirements
-   - Added note about carrying forward industry context from previous step
-
-2. Created `./competitive_research_output/AGENTS.md`:
-   ```markdown
-   # Project Context for Competitive Research
-
-   ## Job-Specific Context
-
-   ### competitive_research
-
-   #### Industry Focus
-   - Domain: B2B SaaS, HR Technology sector
-   - See `identify_competitors_output.md` for market definition
-
-   #### Competitors Identified
-   - Reference: `competitor_list.md` for full details
-
-   ## Last Updated
-   - Date: 2024-01-15
-   - From conversation about: Initial competitive analysis run
-   ```
-
-3. Updated job.yml version to 1.0.1 with changelog entry
-
-4. Ran `deepwork sync`
-
-**Summary**
-
-Updated job instructions and created AGENTS.md with bespoke learnings.
-```
-
-## Handling Edge Cases
-
-**No job executions found**:
-- Ask user which job they'd like to analyze
-- Or offer to review available jobs
-
-**Multiple jobs executed**:
-- Analyze each job separately
-- Create separate AGENTS.md entries or files as appropriate
-
-**AGENTS.md already exists**:
-- Read existing content
-- Append new learnings to appropriate sections
-- Update "Last Updated" section
-
-**No issues found**:
-- Document what worked well
-- Consider if any successful patterns should be added to instructions as examples
-
-**Sensitive information**:
-- Never include secrets, credentials, or PII in AGENTS.md
-- Reference config files instead of including values
-
-
-### Job Context
-
-Core commands for managing DeepWork jobs. These commands help you define new multi-step
-workflows and learn from running them.
-
-The `new_job` workflow guides you through defining and implementing a new job by
-asking structured questions about your workflow, understanding each step's inputs and outputs,
-reviewing the specification, and generating all necessary files.
-
-The `learn` skill reflects on conversations where DeepWork jobs were run, identifies
-confusion or inefficiencies, and improves job instructions. It also captures bespoke
-learnings specific to the current run into AGENTS.md files in the working folder.
-
-
-## Required Inputs
-
-**User Parameters** - Gather from user before starting:
-- **job_name**: Name of the job that was run (optional - will auto-detect from conversation)
-
-
-## Work Branch
-
-Use branch format: `deepwork/deepwork_jobs-[instance]-YYYYMMDD`
-
-- If on a matching work branch: continue using it
-- If on main/master: create new branch with `git checkout -b deepwork/deepwork_jobs-[instance]-$(date +%Y%m%d)`
-
-## Outputs
-
-**Required outputs**:
-- `AGENTS.md`
-
-## Guardrails
-
-- Do NOT skip prerequisite verification if this step has dependencies
-- Do NOT produce partial outputs; complete all required outputs before finishing
-- Do NOT proceed without required inputs; ask the user if any are missing
-- Do NOT modify files outside the scope of this step's defined outputs
-
-## Quality Validation
-
-**Before completing this step, you MUST have your work reviewed against the quality criteria below.**
-
-Use a sub-agent (Haiku model) to review your work against these criteria:
-
-**Criteria (all must be satisfied)**:
-1. **Conversation Analyzed**: Did the agent review the conversation for DeepWork job executions?
-2. **Confusion Identified**: Did the agent identify points of confusion, errors, or inefficiencies?
-3. **Instructions Improved**: Were job instructions updated to address identified issues?
-4. **Instructions Concise**: Are instructions free of redundancy and unnecessary verbosity?
-5. **Shared Content Extracted**: Is lengthy/duplicated content extracted into referenced files?
-6. **doc spec Reviewed (if applicable)**: For jobs with doc spec outputs, were doc spec-related learnings identified?
-7. **doc spec Updated (if applicable)**: Were doc spec files updated with improved quality criteria or structure?
-8. **Bespoke Learnings Captured**: Were run-specific learnings added to AGENTS.md?
-9. **File References Used**: Do AGENTS.md entries reference other files where appropriate?
-10. **Working Folder Correct**: Is AGENTS.md in the correct working folder for the job?
-11. **Generalizable Separated**: Are generalizable improvements in instructions, not AGENTS.md?
-12. **Sync Complete**: Has `deepwork sync` been run if instructions were modified?
-**Review Process**:
-1. Once you believe your work is complete, spawn a sub-agent using Haiku to review your work against the quality criteria above
-2. The sub-agent should examine your outputs and verify each criterion is met
-3. If the sub-agent identifies valid issues, fix them
-4. Have the sub-agent review again until all valid feedback has been addressed
-5. Only mark the step complete when the sub-agent confirms all criteria are satisfied
-
-## On Completion
-
-1. Verify outputs are created
-2. Inform user: "learn complete, outputs: AGENTS.md"
-
-This standalone skill can be re-run anytime.
-
----
-
-**Reference files**: `.deepwork/jobs/deepwork_jobs/job.yml`, `.deepwork/jobs/deepwork_jobs/steps/learn.md`
\ No newline at end of file
diff --git a/.claude/skills/deepwork_jobs.review_job_spec/SKILL.md b/.claude/skills/deepwork_jobs.review_job_spec/SKILL.md
deleted file mode 100644
index 51b8ed54..00000000
--- a/.claude/skills/deepwork_jobs.review_job_spec/SKILL.md
+++ /dev/null
@@ -1,496 +0,0 @@
----
-name: deepwork_jobs.review_job_spec
-description: "Reviews job.yml against quality criteria using a sub-agent for unbiased validation. Use after defining a job specification."
-user-invocable: false
-
----
-
-# deepwork_jobs.review_job_spec
-
-**Step 2/3** in **new_job** workflow
-
-> Create a new DeepWork job from scratch through definition, review, and implementation
-
-> Creates and manages multi-step AI workflows. Use when defining, implementing, or improving DeepWork jobs.
-
-## Prerequisites (Verify First)
-
-Before proceeding, confirm these steps are complete:
-- `/deepwork_jobs.define`
-
-## Instructions
-
-**Goal**: Reviews job.yml against quality criteria using a sub-agent for unbiased validation. Use after defining a job specification.
-
-# Review Job Specification
-
-## Objective
-
-Review the `job.yml` created in the define step against the doc spec quality criteria using a sub-agent for unbiased evaluation, then iterate on fixes until all criteria pass.
-
-## Why This Step Exists
-
-The define step focuses on understanding user requirements and creating a job specification. This review step ensures the specification meets quality standards before implementation. Using a sub-agent provides an unbiased "fresh eyes" review that catches issues the main agent might miss after being deeply involved in the definition process.
-
-## Task
-
-Use a sub-agent to review the job.yml against all 9 doc spec quality criteria, then fix any failed criteria. Repeat until all criteria pass.
-
-### Step 1: Read the Job Specification
-
-Read the `job.yml` file created in the define step:
-
-```
-.deepwork/jobs/[job_name]/job.yml
-```
-
-Also read the doc spec for reference:
-
-```
-.deepwork/doc_specs/job_spec.md
-```
-
-### Step 2: Spawn Review Sub-Agent
-
-Use the Task tool to spawn a sub-agent that will provide an unbiased review:
-
-```
-Task tool parameters:
-- subagent_type: "general-purpose"
-- model: "haiku"
-- description: "Review job.yml against doc spec"
-- prompt: [see below]
-```
-
-**Sub-agent prompt template:**
-
-```
-Review this job.yml against the following 9 quality criteria from the doc spec.
-
-For each criterion, respond with:
-- PASS or FAIL
-- If FAIL: specific issue and suggested fix
-
-## job.yml Content
-
-[paste the full job.yml content here]
-
-## Quality Criteria
-
-1. **Valid Identifier**: Job name must be lowercase with underscores, no spaces or special characters (e.g., `competitive_research`, `monthly_report`)
-
-2. **Semantic Version**: Version must follow semantic versioning format X.Y.Z (e.g., `1.0.0`, `2.1.3`)
-
-3. **Concise Summary**: Summary must be under 200 characters and clearly describe what the job accomplishes
-
-4. **Rich Description**: Description must be multi-line and explain: the problem solved, the process, expected outcomes, and target users
-
-5. **Changelog Present**: Must include a changelog array with at least the initial version entry
-
-6. **Complete Steps**: Each step must have: id (lowercase_underscores), name, description, instructions_file, outputs (at least one), and dependencies array
-
-7. **Valid Dependencies**: Dependencies must reference existing step IDs with no circular references
-
-8. **Input Consistency**: File inputs with `from_step` must reference a step that is in the dependencies array
-
-9. **Output Paths**: Outputs must be valid filenames or paths (e.g., `report.md` or `reports/analysis.md`)
-
-## Response Format
-
-Respond with a structured evaluation:
-
-### Overall: [X/9 PASS]
-
-### Criterion Results
-
-1. Valid Identifier: [PASS/FAIL]
-   [If FAIL: Issue and fix]
-
-2. Semantic Version: [PASS/FAIL]
-   [If FAIL: Issue and fix]
-
-[... continue for all 9 criteria ...]
-
-### Summary of Required Fixes
-
-[List any fixes needed, or "No fixes required - all criteria pass"]
-```
-
-### Step 3: Review Sub-Agent Findings
-
-Parse the sub-agent's response:
-
-1. **Count passing criteria** - How many of the 9 criteria passed?
-2. **Identify failures** - List specific criteria that failed
-3. **Note suggested fixes** - What changes does the sub-agent recommend?
-
-### Step 4: Fix Failed Criteria
-
-For each failed criterion, edit the job.yml to address the issue:
-
-**Common fixes by criterion:**
-
-| Criterion | Common Issue | Fix |
-|-----------|-------------|-----|
-| Valid Identifier | Spaces or uppercase | Convert to lowercase_underscores |
-| Semantic Version | Missing or invalid format | Set to `"1.0.0"` or fix format |
-| Concise Summary | Too long or vague | Shorten to <200 chars, be specific |
-| Rich Description | Single line or missing context | Add multi-line explanation with problem/process/outcome/users |
-| Changelog Present | Missing changelog | Add `changelog:` with initial version entry |
-| Complete Steps | Missing required fields | Add id, name, description, instructions_file, outputs, dependencies |
-| Valid Dependencies | Non-existent step or circular | Fix step ID reference or reorder dependencies |
-| Input Consistency | from_step not in dependencies | Add the referenced step to dependencies array |
-| Output Paths | Invalid characters or format | Use valid filename/path format |
-
-### Step 5: Re-Run Review (If Needed)
-
-If any criteria failed:
-
-1. **Spawn a new sub-agent** with the updated job.yml content
-2. **Review the new findings**
-3. **Fix any remaining issues**
-4. **Repeat until all 9 criteria pass**
-
-### Step 6: Confirm Completion
-
-When all 9 criteria pass:
-
-1. **Announce success**: "All 9 doc spec quality criteria pass."
-2. **List what was validated**:
-   - Valid Identifier
-   - Semantic Version
-   - Concise Summary
-   - Rich Description
-   - Changelog Present
-   - Complete Steps
-   - Valid Dependencies
-   - Input Consistency
-   - Output Paths
-3. **Include the promise**: `<promise>Quality Criteria Met</promise>`
-4. **Guide to next step**: "Run `/deepwork_jobs.implement` to generate the step instruction files."
-
-## Example Review Cycle
-
-```
-Claude: Let me review the job.yml against the doc spec quality criteria.
-
-[Spawns sub-agent with haiku model]
-
-Sub-agent response:
-### Overall: 7/9 PASS
-
-### Criterion Results
-1. Valid Identifier: PASS
-2. Semantic Version: PASS
-3. Concise Summary: FAIL
-   Issue: Summary is 245 characters, exceeds 200 char limit
-   Fix: Shorten to "Competitive analysis workflow for product positioning"
-4. Rich Description: PASS
-5. Changelog Present: FAIL
-   Issue: No changelog array present
-   Fix: Add changelog with initial version entry
-6. Complete Steps: PASS
-7. Valid Dependencies: PASS
-8. Input Consistency: PASS
-9. Output Paths: PASS
-
-### Summary of Required Fixes
-1. Shorten summary to under 200 characters
-2. Add changelog array with initial version
-
-Claude: I found 2 issues. Let me fix them.
-
-[Edits job.yml to shorten summary and add changelog]
-
-Claude: Let me verify the fixes with another review.
-
-[Spawns new sub-agent]
-
-Sub-agent response:
-### Overall: 9/9 PASS
-
-### Summary of Required Fixes
-No fixes required - all criteria pass
-
-Claude: All 9 doc spec quality criteria pass.
-
-<promise>Quality Criteria Met</promise>
-
-**Next step:** Run `/deepwork_jobs.implement` to generate the step instruction files.
-```
-
-## Quality Criteria
-
-- **Sub-Agent Used**: A sub-agent was spawned to provide unbiased review (not just self-review)
-- **All doc spec Criteria Evaluated**: The sub-agent assessed all 9 quality criteria from the doc spec
-- **Findings Addressed**: All failed criteria were fixed by the main agent
-- **Validation Loop Complete**: The review-fix cycle continued until all criteria passed
-- **Promise Included**: The response includes `<promise>Quality Criteria Met</promise>` when complete
-
-## Output
-
-The validated `job.yml` file at `.deepwork/jobs/[job_name]/job.yml` that passes all 9 doc spec quality criteria.
-
-
-### Job Context
-
-Core commands for managing DeepWork jobs. These commands help you define new multi-step
-workflows and learn from running them.
-
-The `new_job` workflow guides you through defining and implementing a new job by
-asking structured questions about your workflow, understanding each step's inputs and outputs,
-reviewing the specification, and generating all necessary files.
-
-The `learn` skill reflects on conversations where DeepWork jobs were run, identifies
-confusion or inefficiencies, and improves job instructions. It also captures bespoke
-learnings specific to the current run into AGENTS.md files in the working folder.
-
-
-## Required Inputs
-
-
-**Files from Previous Steps** - Read these first:
-- `job.yml` (from `define`)
-
-## Work Branch
-
-Use branch format: `deepwork/deepwork_jobs-[instance]-YYYYMMDD`
-
-- If on a matching work branch: continue using it
-- If on main/master: create new branch with `git checkout -b deepwork/deepwork_jobs-[instance]-$(date +%Y%m%d)`
-
-## Outputs
-
-**Required outputs**:
-- `job.yml`
-  **Doc Spec**: DeepWork Job Specification
-  > YAML specification file that defines a multi-step workflow job for AI agents
-  **Definition**: `.deepwork/doc_specs/job_spec.md`
-  **Target Audience**: AI agents executing jobs and developers defining workflows
-  **Quality Criteria**:
-  1. **Valid Identifier**: Job name must be lowercase with underscores, no spaces or special characters (e.g., `competitive_research`, `monthly_report`)
-  2. **Semantic Version**: Version must follow semantic versioning format X.Y.Z (e.g., `1.0.0`, `2.1.3`)
-  3. **Concise Summary**: Summary must be under 200 characters and clearly describe what the job accomplishes
-  4. **Rich Description**: Description must be multi-line and explain: the problem solved, the process, expected outcomes, and target users
-  5. **Changelog Present**: Must include a changelog array with at least the initial version entry. Changelog should only include one entry per branch at most
-  6. **Complete Steps**: Each step must have: id (lowercase_underscores), name, description, instructions_file, outputs (at least one), and dependencies array
-  7. **Valid Dependencies**: Dependencies must reference existing step IDs with no circular references
-  8. **Input Consistency**: File inputs with `from_step` must reference a step that is in the dependencies array
-  9. **Output Paths**: Outputs must be valid filenames or paths within the main repo directory structure, never in dot-directories like `.deepwork/`. Use specific, descriptive paths that lend themselves to glob patterns (e.g., `competitive_research/acme_corp/swot.md` or `operations/reports/2026-01/spending_analysis.md`). Parameterized paths like `[competitor_name]/` are encouraged for per-entity outputs. Avoid generic names (`output.md`, `analysis.md`) and transient-sounding paths (`temp/`, `draft.md`). Supporting materials for a final output should go in a peer `_dataroom` folder (e.g., `spending_analysis_dataroom/`).
-  10. **Concise Instructions**: The content of the file, particularly the description, must not have excessively redundant information. It should be concise and to the point given that extra tokens will confuse the AI.
-
-  <details>
-  <summary>Example Document Structure</summary>
-
-  ```markdown
-  # DeepWork Job Specification: [job_name]
-
-  A `job.yml` file defines a complete multi-step workflow that AI agents can execute. Each job breaks down a complex task into reviewable steps with clear inputs and outputs.
-
-  ## Required Fields
-
-  ### Top-Level Metadata
-
-  ```yaml
-  name: job_name                    # lowercase, underscores only
-  version: "1.0.0"                  # semantic versioning
-  summary: "Brief description"      # max 200 characters
-  description: |                    # detailed multi-line explanation
-    [Explain what this workflow does, why it exists,
-    what outputs it produces, and who should use it]
-  ```
-
-  ### Changelog
-
-  ```yaml
-  changelog:
-    - version: "1.0.0"
-      changes: "Initial job creation"
-    - version: "1.1.0"
-      changes: "Added quality validation hooks"
-  ```
-
-  ### Steps Array
-
-  ```yaml
-  steps:
-    - id: step_id                   # unique, lowercase_underscores
-      name: "Human Readable Name"
-      description: "What this step accomplishes"
-      instructions_file: steps/step_id.md
-      inputs:
-        # User-provided inputs:
-        - name: param_name
-          description: "What the user provides"
-        # File inputs from previous steps:
-        - file: output.md
-          from_step: previous_step_id
-      outputs:
-        - competitive_research/competitors_list.md           # descriptive path
-        - competitive_research/[competitor_name]/research.md # parameterized path
-        # With doc spec reference:
-        - file: competitive_research/final_report.md
-          doc_spec: .deepwork/doc_specs/report_type.md
-      dependencies:
-        - previous_step_id          # steps that must complete first
-  ```
-
-  ## Optional Fields
-
-  ### Exposed Steps
-
-  ```yaml
-  steps:
-    - id: learn
-      exposed: true                 # Makes step available without running dependencies
-  ```
-
-  ### Agent Delegation
-
-  When a step should be executed by a specific agent type, use the `agent` field. This automatically sets `context: fork` in the generated skill.
-
-  ```yaml
-  steps:
-    - id: research_step
-      agent: general-purpose        # Delegates to the general-purpose agent
-  ```
-
-  Available agent types:
-  - `general-purpose` - Standard agent for multi-step tasks
-
-  ### Quality Hooks
-
-  ```yaml
-  steps:
-    - id: step_id
-      hooks:
-        after_agent:
-          # Inline prompt for quality validation:
-          - prompt: |
-              Verify the output meets criteria:
-              1. [Criterion 1]
-              2. [Criterion 2]
-              If ALL criteria are met, include `<promise>...</promise>`.
-          # External prompt file:
-          - prompt_file: hooks/quality_check.md
-          # Script for programmatic validation:
-          - script: hooks/run_tests.sh
-  ```
-
-  ### Stop Hooks (Legacy)
-
-  ```yaml
-  steps:
-    - id: step_id
-      stop_hooks:
-        - prompt: "Validation prompt..."
-        - prompt_file: hooks/check.md
-        - script: hooks/validate.sh
-  ```
-
-  ## Validation Rules
-
-  1. **No circular dependencies**: Step A cannot depend on Step B if Step B depends on Step A
-  2. **File inputs require dependencies**: If a step uses `from_step: X`, then X must be in its dependencies
-  3. **Unique step IDs**: No two steps can have the same id
-  4. **Valid file paths**: Output paths must not contain invalid characters and should be in the main repo (not dot-directories)
-  5. **Instructions files exist**: Each `instructions_file` path should have a corresponding file created
-
-  ## Example: Complete Job Specification
-
-  ```yaml
-  name: competitive_research
-  version: "1.0.0"
-  summary: "Systematic competitive analysis workflow"
-  description: |
-    A comprehensive workflow for analyzing competitors in your market segment.
-    Helps product teams understand the competitive landscape through systematic
-    identification, research, comparison, and positioning recommendations.
-
-    Produces:
-    - Vetted competitor list
-    - Research notes per competitor
-    - Comparison matrix
-    - Strategic positioning report
-
-  changelog:
-    - version: "1.0.0"
-      changes: "Initial job creation"
-
-  steps:
-    - id: identify_competitors
-      name: "Identify Competitors"
-      description: "Identify 5-7 key competitors in the target market"
-      instructions_file: steps/identify_competitors.md
-      inputs:
-        - name: market_segment
-          description: "The market segment to analyze"
-        - name: product_category
-          description: "The product category"
-      outputs:
-        - competitive_research/competitors_list.md
-      dependencies: []
-
-    - id: research_competitors
-      name: "Research Competitors"
-      description: "Deep dive research on each identified competitor"
-      instructions_file: steps/research_competitors.md
-      inputs:
-        - file: competitive_research/competitors_list.md
-          from_step: identify_competitors
-      outputs:
-        - competitive_research/[competitor_name]/research.md
-      dependencies:
-        - identify_competitors
-
-    - id: positioning_report
-      name: "Positioning Report"
-      description: "Strategic positioning recommendations"
-      instructions_file: steps/positioning_report.md
-      inputs:
-        - file: competitive_research/[competitor_name]/research.md
-          from_step: research_competitors
-      outputs:
-        - file: competitive_research/positioning_report.md
-          doc_spec: .deepwork/doc_specs/positioning_report.md
-      dependencies:
-        - research_competitors
-  ```
-  ```
-
-  </details>
-
-## Guardrails
-
-- Do NOT skip prerequisite verification if this step has dependencies
-- Do NOT produce partial outputs; complete all required outputs before finishing
-- Do NOT proceed without required inputs; ask the user if any are missing
-- Do NOT modify files outside the scope of this step's defined outputs
-
-## Quality Validation
-
-**Before completing this step, you MUST have your work reviewed against the quality criteria below.**
-
-Use a sub-agent (Haiku model) to review your work against these criteria:
-
-**Criteria (all must be satisfied)**:
-1. **Sub-Agent Used**: Was a sub-agent spawned to provide unbiased review?
-2. **All doc spec Criteria Evaluated**: Did the sub-agent assess all 9 quality criteria?
-3. **Findings Addressed**: Were all failed criteria addressed by the main agent?
-4. **Validation Loop Complete**: Did the review-fix cycle continue until all criteria passed?
-**Review Process**:
-1. Once you believe your work is complete, spawn a sub-agent using Haiku to review your work against the quality criteria above
-2. The sub-agent should examine your outputs and verify each criterion is met
-3. If the sub-agent identifies valid issues, fix them
-4. Have the sub-agent review again until all valid feedback has been addressed
-5. Only mark the step complete when the sub-agent confirms all criteria are satisfied
-
-## On Completion
-
-1. Verify outputs are created
-2. Inform user: "new_job step 2/3 complete, outputs: job.yml"
-3. **Continue workflow**: Use Skill tool to invoke `/deepwork_jobs.implement`
-
----
-
-**Reference files**: `.deepwork/jobs/deepwork_jobs/job.yml`, `.deepwork/jobs/deepwork_jobs/steps/review_job_spec.md`
\ No newline at end of file
diff --git a/.claude/skills/deepwork_jobs/SKILL.md b/.claude/skills/deepwork_jobs/SKILL.md
deleted file mode 100644
index ec2526f5..00000000
--- a/.claude/skills/deepwork_jobs/SKILL.md
+++ /dev/null
@@ -1,84 +0,0 @@
----
-name: deepwork_jobs
-description: "Creates and manages multi-step AI workflows. Use when defining, implementing, or improving DeepWork jobs."
----
-
-# deepwork_jobs
-
-Creates and manages multi-step AI workflows. Use when defining, implementing, or improving DeepWork jobs.
-
-> **CRITICAL**: Always invoke steps using the Skill tool. Never copy/paste step instructions directly.
-
-Core commands for managing DeepWork jobs. These commands help you define new multi-step
-workflows and learn from running them.
-
-The `new_job` workflow guides you through defining and implementing a new job by
-asking structured questions about your workflow, understanding each step's inputs and outputs,
-reviewing the specification, and generating all necessary files.
-
-The `learn` skill reflects on conversations where DeepWork jobs were run, identifies
-confusion or inefficiencies, and improves job instructions. It also captures bespoke
-learnings specific to the current run into AGENTS.md files in the working folder.
-
-
-## Workflows
-
-### new_job
-
-Create a new DeepWork job from scratch through definition, review, and implementation
-
-**Steps in order**:
-1. **define** - Creates a job.yml specification by gathering workflow requirements through structured questions. Use when starting a new multi-step workflow.
-2. **review_job_spec** - Reviews job.yml against quality criteria using a sub-agent for unbiased validation. Use after defining a job specification.
-3. **implement** - Generates step instruction files and syncs slash commands from the job.yml specification. Use after job spec review passes.
-
-**Start workflow**: `/deepwork_jobs.define`
-
-## Standalone Skills
-
-These skills can be run independently at any time:
-
-- **learn** - Analyzes conversation history to improve job instructions and capture learnings. Use after running a job to refine it.
-  Command: `/deepwork_jobs.learn`
-
-
-## Execution Instructions
-
-### Step 1: Analyze Intent
-
-Parse any text following `/deepwork_jobs` to determine user intent:
-- "new_job" or related terms → start new_job workflow at `deepwork_jobs.define`
-- "learn" or related terms → run standalone skill `deepwork_jobs.learn`
-
-### Step 2: Invoke Starting Step
-
-Use the Skill tool to invoke the identified starting step:
-```
-Skill tool: deepwork_jobs.define
-```
-
-### Step 3: Continue Workflow Automatically
-
-After each step completes:
-1. Check if there's a next step in the workflow sequence
-2. Invoke the next step using the Skill tool
-3. Repeat until workflow is complete or user intervenes
-
-**Note**: Standalone skills do not auto-continue to other steps.
-
-### Handling Ambiguous Intent
-
-If user intent is unclear, use AskUserQuestion to clarify:
-- Present available workflows and standalone skills as options
-- Let user select the starting point
-
-## Guardrails
-
-- Do NOT copy/paste step instructions directly; always use the Skill tool to invoke steps
-- Do NOT skip steps in a workflow unless the user explicitly requests it
-- Do NOT proceed to the next step if the current step's outputs are incomplete
-- Do NOT make assumptions about user intent; ask for clarification when ambiguous
-
-## Context Files
-
-- Job definition: `.deepwork/jobs/deepwork_jobs/job.yml`
\ No newline at end of file
diff --git a/.claude/skills/update.job/SKILL.md b/.claude/skills/update.job/SKILL.md
deleted file mode 100644
index 1604cbfe..00000000
--- a/.claude/skills/update.job/SKILL.md
+++ /dev/null
@@ -1,145 +0,0 @@
----
-name: update.job
-description: "Edits standard job source files in src/ and runs deepwork install to sync changes. Use when updating job.yml or step instructions."
-user-invocable: false
-
----
-
-# update.job
-
-**Standalone skill** - can be run anytime
-
-> Updates DeepWork standard jobs in src/ and syncs to installed locations. Use when modifying deepwork_jobs.
-
-
-## Instructions
-
-**Goal**: Edits standard job source files in src/ and runs deepwork install to sync changes. Use when updating job.yml or step instructions.
-
-# Update Standard Job
-
-## Objective
-
-Edit standard job source files in `src/deepwork/standard_jobs/` and sync changes to installed locations.
-
-## Task
-
-When modifying a standard job in the DeepWork repository, this step ensures changes are made in the correct location and properly propagated.
-
-### Important: Source of Truth
-
-Standard jobs exist in THREE locations, but only ONE is the source of truth:
-
-| Location | Purpose | Editable? |
-|----------|---------|-----------|
-| `src/deepwork/standard_jobs/[job]/` | **Source of truth** | **YES** |
-| `.deepwork/jobs/[job]/` | Installed copy | NO - overwritten by install |
-| `.claude/commands/[job].[step].md` | Generated commands | NO - regenerated by sync |
-
-**NEVER edit files in `.deepwork/jobs/` or `.claude/commands/` for standard jobs!**
-
-### Process
-
-#### 1. Identify the Standard Job to Update
-
-From conversation context, determine:
-- Which standard job needs updating (e.g., `deepwork_jobs`, `deepwork_rules`)
-- What changes are needed (job.yml, step instructions, hooks, etc.)
-
-Current standard jobs:
-```bash
-ls src/deepwork/standard_jobs/
-```
-
-#### 2. Make Changes in Source Location
-
-```
-src/deepwork/standard_jobs/[job_name]/
-├── job.yml              # Job definition
-├── steps/               # Step instruction files
-├── hooks/               # Hook scripts
-└── templates/           # Templates
-```
-
-#### 3. Run DeepWork Install
-
-```bash
-deepwork install --platform claude
-```
-
-For Gemini: `deepwork install --platform gemini`
-
-#### 4. Verify the Sync
-
-```bash
-# Verify job.yml
-diff src/deepwork/standard_jobs/[job_name]/job.yml .deepwork/jobs/[job_name]/job.yml
-
-# Verify step files
-diff -r src/deepwork/standard_jobs/[job_name]/steps/ .deepwork/jobs/[job_name]/steps/
-
-# Check commands regenerated
-ls -la .claude/commands/[job_name].*.md
-```
-
-## Quality Criteria
-
-- Changes made ONLY in `src/deepwork/standard_jobs/[job_name]/`
-- `deepwork install --platform claude` executed successfully
-- Files in `.deepwork/jobs/` match source
-- Command files regenerated
-- When all criteria are met, include `<promise>✓ Quality Criteria Met</promise>`
-
-
-### Job Context
-
-A workflow for maintaining standard jobs bundled with DeepWork. Standard jobs
-(like `deepwork_jobs`) are source-controlled in
-`src/deepwork/standard_jobs/` and must be edited there—never in `.deepwork/jobs/`
-or `.claude/commands/` directly.
-
-This job guides you through:
-1. Identifying which standard job(s) to update from conversation context
-2. Making changes in the correct source location (`src/deepwork/standard_jobs/[job_name]/`)
-3. Running `deepwork install` to propagate changes to `.deepwork/` and command directories
-4. Verifying the sync completed successfully
-
-Use this job whenever you need to modify job.yml files, step instructions, or hooks
-for any standard job in the DeepWork repository.
-
-
-## Required Inputs
-
-**User Parameters** - Gather from user before starting:
-- **job_context**: Determine from conversation context which standard job(s) to update and what changes are needed
-
-
-## Work Branch
-
-Use branch format: `deepwork/update-[instance]-YYYYMMDD`
-
-- If on a matching work branch: continue using it
-- If on main/master: create new branch with `git checkout -b deepwork/update-[instance]-$(date +%Y%m%d)`
-
-## Outputs
-
-**Required outputs**:
-- `files_synced`
-
-## Guardrails
-
-- Do NOT skip prerequisite verification if this step has dependencies
-- Do NOT produce partial outputs; complete all required outputs before finishing
-- Do NOT proceed without required inputs; ask the user if any are missing
-- Do NOT modify files outside the scope of this step's defined outputs
-
-## On Completion
-
-1. Verify outputs are created
-2. Inform user: "job complete, outputs: files_synced"
-
-This standalone skill can be re-run anytime.
-
----
-
-**Reference files**: `.deepwork/jobs/update/job.yml`, `.deepwork/jobs/update/steps/job.md`
\ No newline at end of file
diff --git a/.claude/skills/update/SKILL.md b/.claude/skills/update/SKILL.md
deleted file mode 100644
index a51a5967..00000000
--- a/.claude/skills/update/SKILL.md
+++ /dev/null
@@ -1,73 +0,0 @@
----
-name: update
-description: "Updates DeepWork standard jobs in src/ and syncs to installed locations. Use when modifying deepwork_jobs."
----
-
-# update
-
-Updates DeepWork standard jobs in src/ and syncs to installed locations. Use when modifying deepwork_jobs.
-
-> **CRITICAL**: Always invoke steps using the Skill tool. Never copy/paste step instructions directly.
-
-A workflow for maintaining standard jobs bundled with DeepWork. Standard jobs
-(like `deepwork_jobs`) are source-controlled in
-`src/deepwork/standard_jobs/` and must be edited there—never in `.deepwork/jobs/`
-or `.claude/commands/` directly.
-
-This job guides you through:
-1. Identifying which standard job(s) to update from conversation context
-2. Making changes in the correct source location (`src/deepwork/standard_jobs/[job_name]/`)
-3. Running `deepwork install` to propagate changes to `.deepwork/` and command directories
-4. Verifying the sync completed successfully
-
-Use this job whenever you need to modify job.yml files, step instructions, or hooks
-for any standard job in the DeepWork repository.
-
-
-## Standalone Skills
-
-These skills can be run independently at any time:
-
-- **job** - Edits standard job source files in src/ and runs deepwork install to sync changes. Use when updating job.yml or step instructions.
-  Command: `/update.job`
-
-
-## Execution Instructions
-
-### Step 1: Analyze Intent
-
-Parse any text following `/update` to determine user intent:
-- "job" or related terms → run standalone skill `update.job`
-
-### Step 2: Invoke Starting Step
-
-Use the Skill tool to invoke the identified starting step:
-```
-Skill tool: update.job
-```
-
-### Step 3: Continue Workflow Automatically
-
-After each step completes:
-1. Check if there's a next step in the workflow sequence
-2. Invoke the next step using the Skill tool
-3. Repeat until workflow is complete or user intervenes
-
-**Note**: Standalone skills do not auto-continue to other steps.
-
-### Handling Ambiguous Intent
-
-If user intent is unclear, use AskUserQuestion to clarify:
-- Present available steps as numbered options
-- Let user select the starting point
-
-## Guardrails
-
-- Do NOT copy/paste step instructions directly; always use the Skill tool to invoke steps
-- Do NOT skip steps in a workflow unless the user explicitly requests it
-- Do NOT proceed to the next step if the current step's outputs are incomplete
-- Do NOT make assumptions about user intent; ask for clarification when ambiguous
-
-## Context Files
-
-- Job definition: `.deepwork/jobs/update/job.yml`
\ No newline at end of file
diff --git a/.gemini/skills/add_platform/add_capabilities.toml b/.gemini/skills/add_platform/add_capabilities.toml
deleted file mode 100644
index 8c23d8f0..00000000
--- a/.gemini/skills/add_platform/add_capabilities.toml
+++ /dev/null
@@ -1,210 +0,0 @@
-# add_platform:add_capabilities
-#
-# Updates job schema and adapters with any new hook events the platform supports. Use after research to extend DeepWork's hook system.
-#
-# Generated by DeepWork - do not edit manually
-
-description = "Updates job schema and adapters with any new hook events the platform supports. Use after research to extend DeepWork's hook system."
-
-prompt = """
-# add_platform:add_capabilities
-
-**Step 2/4** in **add_platform** workflow
-
-> Adds a new AI platform to DeepWork with adapter, templates, and tests. Use when integrating Cursor, Windsurf, or other AI coding tools.
-
-## Prerequisites (Verify First)
-
-Before proceeding, confirm these steps are complete:
-- `/add_platform:research`
-
-## Instructions
-
-**Goal**: Updates job schema and adapters with any new hook events the platform supports. Use after research to extend DeepWork's hook system.
-
-# Add Hook Capabilities
-
-## Objective
-
-Update the DeepWork job schema and platform adapters to support any new hook events that the new platform provides for slash command definitions.
-
-## Task
-
-Analyze the hooks documentation from the research step and update the codebase to support any new hook capabilities, ensuring consistency across all existing adapters.
-
-### Prerequisites
-
-Read the hooks documentation created in the previous step:
-- `doc/platforms/<platform_name>/hooks_system.md`
-
-Also review the existing schema and adapters:
-- `src/deepwork/schemas/job_schema.py`
-- `src/deepwork/adapters.py`
-
-### Process
-
-1. **Analyze the new platform's hooks**
-   - Read `doc/platforms/<platform_name>/hooks_system.md`
-   - List all hooks available for slash command definitions
-   - Compare with hooks already in `job_schema.py`
-   - Identify any NEW hooks not currently supported
-
-2. **Determine if schema changes are needed**
-   - If the platform has hooks that DeepWork doesn't currently support, add them
-   - If all hooks are already supported, document this finding
-   - Remember: Only add hooks that are available on slash command definitions
-
-3. **Update job_schema.py (if needed)**
-   - Add new hook fields to the step schema
-   - Follow existing patterns for hook definitions
-   - Add appropriate type hints and documentation
-   - Example addition:
-     ```python
-     # New hook from <platform>
-     new_hook_name: Optional[List[HookConfig]] = None
-     ```
-
-4. **Update all existing adapters**
-   - Open `src/deepwork/adapters.py`
-   - For EACH existing adapter class:
-     - Add the new hook field (set to `None` if not supported)
-     - This maintains consistency across all adapters
-   - Document why each adapter does or doesn't support the hook
-
-5. **Validate the changes**
-   - Run Python syntax check: `python -m py_compile src/deepwork/schemas/job_schema.py`
-   - Run Python syntax check: `python -m py_compile src/deepwork/adapters.py`
-   - Ensure no import errors
-
-6. **Document the decision**
-   - If no new hooks were added, add a comment explaining why
-   - If new hooks were added, ensure they're documented in the schema
-
-## Output Format
-
-### job_schema.py
-
-Location: `src/deepwork/schemas/job_schema.py`
-
-If new hooks are added:
-```python
-@dataclass
-class StepDefinition:
-    # ... existing fields ...
-
-    # New hook from <platform_name> - [description of what it does]
-    new_hook_name: Optional[List[HookConfig]] = None
-```
-
-### adapters.py
-
-Location: `src/deepwork/adapters.py`
-
-For each existing adapter, add the new hook field:
-```python
-class ExistingPlatformAdapter(PlatformAdapter):
-    # ... existing code ...
-
-    def get_hook_support(self) -> dict:
-        return {
-            # ... existing hooks ...
-            "new_hook_name": None,  # Not supported by this platform
-        }
-```
-
-Or if no changes are needed, add a documentation comment:
-```python
-# NOTE: <platform_name> hooks reviewed on YYYY-MM-DD
-# No new hooks to add - all <platform_name> command hooks are already
-# supported by the existing schema (stop_hooks covers their validation pattern)
-```
-
-## Quality Criteria
-
-- Hooks documentation from research step has been reviewed
-- If new hooks exist:
-  - Added to `src/deepwork/schemas/job_schema.py` with proper typing
-  - ALL existing adapters updated in `src/deepwork/adapters.py`
-  - Each adapter indicates support level (implemented, None, or partial)
-- If no new hooks needed:
-  - Decision documented with a comment explaining the analysis
-- Only hooks available on slash command definitions are considered
-- `job_schema.py` has no syntax errors (verified with py_compile)
-- `adapters.py` has no syntax errors (verified with py_compile)
-- All adapters have consistent hook fields (same fields across all adapters)
-- When all criteria are met, include `<promise>✓ Quality Criteria Met</promise>` in your response
-
-## Context
-
-DeepWork supports multiple AI platforms, and each platform may have different capabilities for hooks within command definitions. The schema defines what hooks CAN exist, while adapters define what each platform actually SUPPORTS.
-
-This separation allows:
-- Job definitions to use any hook (the schema is the superset)
-- Platform-specific generation to only use supported hooks (adapters filter)
-- Future platforms to add new hooks without breaking existing ones
-
-Maintaining consistency is critical - all adapters must have the same hook fields, even if they don't support them (use `None` for unsupported).
-
-## Common Hook Types
-
-For reference, here are common hook patterns across platforms:
-
-| Hook Type | Purpose | Example Platforms |
-|-----------|---------|-------------------|
-| `stop_hooks` | Quality validation loops | Claude Code |
-| `pre_hooks` | Run before command | Various |
-| `post_hooks` | Run after command | Various |
-| `validation_hooks` | Validate inputs/outputs | Various |
-
-When you find a new hook type, consider whether it maps to an existing pattern or is genuinely new functionality.
-
-
-### Job Context
-
-A workflow for adding support for a new AI platform (like Cursor, Windsurf, etc.) to DeepWork.
-
-The **integrate** workflow guides you through four phases:
-1. **Research**: Capture the platform's CLI configuration and hooks system documentation
-2. **Add Capabilities**: Update the job schema and adapters with any new hook events
-3. **Implement**: Create the platform adapter, templates, tests (100% coverage), and README updates
-4. **Verify**: Ensure installation works correctly and produces expected files
-
-The workflow ensures consistency across all supported platforms and maintains
-comprehensive test coverage for new functionality.
-
-**Important Notes**:
-- Only hooks available on slash command definitions should be captured
-- Each existing adapter must be updated when new hooks are added (typically with null values)
-- Tests must achieve 100% coverage for any new functionality
-- Installation verification confirms the platform integrates correctly with existing jobs
-
-
-## Required Inputs
-
-
-**Files from Previous Steps** - Read these first:
-- `hooks_system.md` (from `research`)
-
-## Work Branch
-
-Use branch format: `deepwork/add_platform-[instance]-YYYYMMDD`
-
-- If on a matching work branch: continue using it
-- If on main/master: create new branch with `git checkout -b deepwork/add_platform-[instance]-$(date +%Y%m%d)`
-
-## Outputs
-
-**Required outputs**:
-- `job_schema.py`
-- `adapters.py`
-
-## On Completion
-
-1. Verify outputs are created
-2. Inform user: "Step 2/4 complete, outputs: job_schema.py, adapters.py"
-3. **Tell user next command**: `/add_platform:implement`
-
----
-
-**Reference files**: `.deepwork/jobs/add_platform/job.yml`, `.deepwork/jobs/add_platform/steps/add_capabilities.md`
-"""
\ No newline at end of file
diff --git a/.gemini/skills/add_platform/implement.toml b/.gemini/skills/add_platform/implement.toml
deleted file mode 100644
index dda3f96e..00000000
--- a/.gemini/skills/add_platform/implement.toml
+++ /dev/null
@@ -1,305 +0,0 @@
-# add_platform:implement
-#
-# Creates platform adapter, templates, tests with 100% coverage, and README documentation. Use after adding hook capabilities.
-#
-# Generated by DeepWork - do not edit manually
-
-description = "Creates platform adapter, templates, tests with 100% coverage, and README documentation. Use after adding hook capabilities."
-
-prompt = """
-# add_platform:implement
-
-**Step 3/4** in **add_platform** workflow
-
-> Adds a new AI platform to DeepWork with adapter, templates, and tests. Use when integrating Cursor, Windsurf, or other AI coding tools.
-
-## Prerequisites (Verify First)
-
-Before proceeding, confirm these steps are complete:
-- `/add_platform:research`
-- `/add_platform:add_capabilities`
-
-## Instructions
-
-**Goal**: Creates platform adapter, templates, tests with 100% coverage, and README documentation. Use after adding hook capabilities.
-
-# Implement Platform Support
-
-## Objective
-
-Create the complete platform implementation including the adapter class, command templates, comprehensive tests, and documentation updates.
-
-## Task
-
-Build the full platform support by implementing the adapter, creating templates, writing tests with 100% coverage, and updating the README.
-
-### Prerequisites
-
-Read the outputs from previous steps:
-- `doc/platforms/<platform_name>/cli_configuration.md` - For template structure
-- `src/deepwork/schemas/job_schema.py` - For current schema
-- `src/deepwork/adapters.py` - For adapter patterns
-
-Also review existing implementations for reference:
-- `src/deepwork/templates/claude/` - Example templates
-- `tests/` - Existing test patterns
-
-### Process
-
-1. **Create the platform adapter class**
-
-   Add a new adapter class to `src/deepwork/adapters.py`:
-
-   ```python
-   class NewPlatformAdapter(PlatformAdapter):
-       """Adapter for <Platform Name>."""
-
-       platform_name = "<platform_name>"
-       command_directory = "<path to commands>"  # e.g., ".cursor/commands"
-       command_extension = ".md"  # or appropriate extension
-
-       def get_hook_support(self) -> dict:
-           """Return which hooks this platform supports."""
-           return {
-               "stop_hooks": True,  # or False/None
-               # ... other hooks
-           }
-
-       def generate_command(self, step: StepDefinition, job: JobDefinition) -> str:
-           """Generate command file content for this platform."""
-           # Use Jinja2 template
-           template = self.env.get_template(f"{self.platform_name}/command.md.j2")
-           return template.render(step=step, job=job)
-   ```
-
-2. **Create command templates**
-
-   Create templates in `src/deepwork/templates/<platform_name>/`:
-
-   - `command.md.j2` - Main command template
-   - Any other templates needed for the platform's format
-
-   Use the CLI configuration documentation to ensure the template matches the platform's expected format.
-
-3. **Register the adapter**
-
-   Update the adapter registry in `src/deepwork/adapters.py`:
-
-   ```python
-   PLATFORM_ADAPTERS = {
-       "claude": ClaudeAdapter,
-       "<platform_name>": NewPlatformAdapter,
-       # ... other adapters
-   }
-   ```
-
-4. **Write comprehensive tests**
-
-   Create tests in `tests/` that cover:
-
-   - Adapter instantiation
-   - Hook support detection
-   - Command generation
-   - Template rendering
-   - Edge cases (empty inputs, special characters, etc.)
-   - Integration with the sync command
-
-   **Critical**: Tests must achieve 100% coverage of new code.
-
-5. **Update README.md**
-
-   Add the new platform to `README.md`:
-
-   - Add to "Supported Platforms" list
-   - Add installation instructions:
-     ```bash
-     deepwork install --platform <platform_name>
-     ```
-   - Document any platform-specific notes or limitations
-
-6. **Run tests and verify coverage**
-
-   ```bash
-   uv run pytest --cov=src/deepwork --cov-report=term-missing
-   ```
-
-   - All tests must pass
-   - New code must have 100% coverage
-   - If coverage is below 100%, add more tests
-
-7. **Iterate until tests pass with full coverage**
-
-   This step has a `stop_hooks` script that runs tests. Keep iterating until:
-   - All tests pass
-   - Coverage is 100% for new functionality
-
-## Output Format
-
-### templates/
-
-Location: `src/deepwork/templates/<platform_name>/`
-
-Create the following files:
-
-**command.md.j2**:
-```jinja2
-{# Template for <platform_name> command files #}
-{# Follows the platform's expected format from cli_configuration.md #}
-
-[Platform-specific frontmatter or metadata]
-
-# {{ step.name }}
-
-{{ step.description }}
-
-## Instructions
-
-{{ step.instructions_content }}
-
-[... rest of template based on platform format ...]
-```
-
-### tests/
-
-Location: `tests/test_<platform_name>_adapter.py`
-
-```python
-"""Tests for the <platform_name> adapter."""
-import pytest
-from deepwork.adapters import NewPlatformAdapter
-
-class TestNewPlatformAdapter:
-    """Test suite for NewPlatformAdapter."""
-
-    def test_adapter_initialization(self):
-        """Test adapter can be instantiated."""
-        adapter = NewPlatformAdapter()
-        assert adapter.platform_name == "<platform_name>"
-
-    def test_hook_support(self):
-        """Test hook support detection."""
-        adapter = NewPlatformAdapter()
-        hooks = adapter.get_hook_support()
-        assert "stop_hooks" in hooks
-        # ... more assertions
-
-    def test_command_generation(self):
-        """Test command file generation."""
-        # ... test implementation
-
-    # ... more tests for 100% coverage
-```
-
-### README.md
-
-Add to the existing README.md:
-
-```markdown
-## Supported Platforms
-
-- **Claude Code** - Anthropic's CLI for Claude
-- **<Platform Name>** - [Brief description]
-
-## Installation
-
-### <Platform Name>
-
-```bash
-deepwork install --platform <platform_name>
-```
-
-[Any platform-specific notes]
-```
-
-## Quality Criteria
-
-- Platform adapter class added to `src/deepwork/adapters.py`:
-  - Inherits from `PlatformAdapter`
-  - Implements all required methods
-  - Registered in `PLATFORM_ADAPTERS`
-- Templates created in `src/deepwork/templates/<platform_name>/`:
-  - `command.md.j2` exists and renders correctly
-  - Format matches platform's expected command format
-- Tests created in `tests/`:
-  - Cover all new adapter functionality
-  - Cover template rendering
-  - All tests pass
-- Test coverage is 100% for new code:
-  - Run `uv run pytest --cov=src/deepwork --cov-report=term-missing`
-  - No uncovered lines in new code
-- README.md updated:
-  - Platform listed in supported platforms
-  - Installation command documented
-  - Any platform-specific notes included
-- When all criteria are met, include `<promise>✓ Quality Criteria Met</promise>` in your response
-
-## Context
-
-This is the core implementation step. The adapter you create will be responsible for:
-- Determining where command files are placed
-- Generating command file content from job definitions
-- Handling platform-specific features and hooks
-
-The templates use Jinja2 and should produce files that match exactly what the platform expects. Reference the CLI configuration documentation frequently to ensure compatibility.
-
-## Tips
-
-- Study the existing `ClaudeAdapter` as a reference implementation
-- Run tests frequently as you implement
-- Use `--cov-report=html` for a detailed coverage report
-- If a test is hard to write, the code might need refactoring
-- Template syntax errors often show up at runtime - test early
-
-
-### Job Context
-
-A workflow for adding support for a new AI platform (like Cursor, Windsurf, etc.) to DeepWork.
-
-The **integrate** workflow guides you through four phases:
-1. **Research**: Capture the platform's CLI configuration and hooks system documentation
-2. **Add Capabilities**: Update the job schema and adapters with any new hook events
-3. **Implement**: Create the platform adapter, templates, tests (100% coverage), and README updates
-4. **Verify**: Ensure installation works correctly and produces expected files
-
-The workflow ensures consistency across all supported platforms and maintains
-comprehensive test coverage for new functionality.
-
-**Important Notes**:
-- Only hooks available on slash command definitions should be captured
-- Each existing adapter must be updated when new hooks are added (typically with null values)
-- Tests must achieve 100% coverage for any new functionality
-- Installation verification confirms the platform integrates correctly with existing jobs
-
-
-## Required Inputs
-
-
-**Files from Previous Steps** - Read these first:
-- `job_schema.py` (from `add_capabilities`)
-- `adapters.py` (from `add_capabilities`)
-- `cli_configuration.md` (from `research`)
-
-## Work Branch
-
-Use branch format: `deepwork/add_platform-[instance]-YYYYMMDD`
-
-- If on a matching work branch: continue using it
-- If on main/master: create new branch with `git checkout -b deepwork/add_platform-[instance]-$(date +%Y%m%d)`
-
-## Outputs
-
-**Required outputs**:
-- `templates/` (directory)
-- `tests/` (directory)
-- `README.md`
-
-## On Completion
-
-1. Verify outputs are created
-2. Inform user: "Step 3/4 complete, outputs: templates/, tests/, README.md"
-3. **Tell user next command**: `/add_platform:verify`
-
----
-
-**Reference files**: `.deepwork/jobs/add_platform/job.yml`, `.deepwork/jobs/add_platform/steps/implement.md`
-"""
\ No newline at end of file
diff --git a/.gemini/skills/add_platform/index.toml b/.gemini/skills/add_platform/index.toml
deleted file mode 100644
index f97f6792..00000000
--- a/.gemini/skills/add_platform/index.toml
+++ /dev/null
@@ -1,75 +0,0 @@
-# add_platform
-#
-# Adds a new AI platform to DeepWork with adapter, templates, and tests. Use when integrating Cursor, Windsurf, or other AI coding tools.
-#
-# Generated by DeepWork - do not edit manually
-
-description = "Adds a new AI platform to DeepWork with adapter, templates, and tests. Use when integrating Cursor, Windsurf, or other AI coding tools."
-
-prompt = """
-# add_platform
-
-**Multi-step workflow**: Adds a new AI platform to DeepWork with adapter, templates, and tests. Use when integrating Cursor, Windsurf, or other AI coding tools.
-
-> **NOTE**: Gemini CLI requires manual command invocation. After each step, tell the user which command to run next.
-
-A workflow for adding support for a new AI platform (like Cursor, Windsurf, etc.) to DeepWork.
-
-The **integrate** workflow guides you through four phases:
-1. **Research**: Capture the platform's CLI configuration and hooks system documentation
-2. **Add Capabilities**: Update the job schema and adapters with any new hook events
-3. **Implement**: Create the platform adapter, templates, tests (100% coverage), and README updates
-4. **Verify**: Ensure installation works correctly and produces expected files
-
-The workflow ensures consistency across all supported platforms and maintains
-comprehensive test coverage for new functionality.
-
-**Important Notes**:
-- Only hooks available on slash command definitions should be captured
-- Each existing adapter must be updated when new hooks are added (typically with null values)
-- Tests must achieve 100% coverage for any new functionality
-- Installation verification confirms the platform integrates correctly with existing jobs
-
-
-## Available Steps
-
-1. **research** - Captures CLI configuration and hooks system documentation for the new platform. Use when starting platform integration.
-   Command: `/add_platform:research`
-2. **add_capabilities** - Updates job schema and adapters with any new hook events the platform supports. Use after research to extend DeepWork's hook system. (requires: research)
-   Command: `/add_platform:add_capabilities`
-3. **implement** - Creates platform adapter, templates, tests with 100% coverage, and README documentation. Use after adding hook capabilities. (requires: research, add_capabilities)
-   Command: `/add_platform:implement`
-4. **verify** - Sets up platform directories and verifies deepwork install works correctly. Use after implementation to confirm integration. (requires: implement)
-   Command: `/add_platform:verify`
-
-## Execution Instructions
-
-### Step 1: Analyze Intent
-
-Parse any text following `/add_platform` to determine user intent:
-- "research" or related terms → start at `/add_platform:research`
-- "add_capabilities" or related terms → start at `/add_platform:add_capabilities`
-- "implement" or related terms → start at `/add_platform:implement`
-- "verify" or related terms → start at `/add_platform:verify`
-
-### Step 2: Direct User to Starting Step
-
-Tell the user which command to run:
-```
-/add_platform:research
-```
-
-### Step 3: Guide Through Workflow
-
-After each step completes, tell the user the next command to run until workflow is complete.
-
-### Handling Ambiguous Intent
-
-If user intent is unclear:
-- Present available steps as numbered options
-- Ask user to select the starting point
-
-## Reference
-
-- Job definition: `.deepwork/jobs/add_platform/job.yml`
-"""
\ No newline at end of file
diff --git a/.gemini/skills/add_platform/research.toml b/.gemini/skills/add_platform/research.toml
deleted file mode 100644
index 9e0175c5..00000000
--- a/.gemini/skills/add_platform/research.toml
+++ /dev/null
@@ -1,259 +0,0 @@
-# add_platform:research
-#
-# Captures CLI configuration and hooks system documentation for the new platform. Use when starting platform integration.
-#
-# Generated by DeepWork - do not edit manually
-
-description = "Captures CLI configuration and hooks system documentation for the new platform. Use when starting platform integration."
-
-prompt = """
-# add_platform:research
-
-**Step 1/4** in **add_platform** workflow
-
-> Adds a new AI platform to DeepWork with adapter, templates, and tests. Use when integrating Cursor, Windsurf, or other AI coding tools.
-
-
-## Instructions
-
-**Goal**: Captures CLI configuration and hooks system documentation for the new platform. Use when starting platform integration.
-
-# Research Platform Documentation
-
-## Objective
-
-Capture comprehensive documentation for the new AI platform's CLI configuration and hooks system, creating a local reference that will guide the implementation phases.
-
-## Task
-
-Research the target platform's official documentation and create two focused documentation files that will serve as the foundation for implementing platform support in DeepWork.
-
-### Process
-
-1. **Identify the platform's documentation sources**
-   - Find the official documentation website
-   - Locate the CLI/agent configuration documentation
-   - Find the hooks or customization system documentation
-   - Note: Focus ONLY on slash command/custom command hooks, not general CLI hooks
-
-2. **Gather CLI configuration documentation**
-   - How is the CLI configured? (config files, environment variables, etc.)
-   - Where are custom commands/skills stored?
-   - What is the command file format? (markdown, YAML, etc.)
-   - What metadata or frontmatter is supported?
-   - How does the platform discover and load commands?
-
-3. **Gather hooks system documentation**
-   - What hooks are available for custom command definitions?
-   - Focus on hooks that trigger during or after command execution
-   - Examples: `stop_hooks`, `pre_hooks`, `post_hooks`, validation hooks
-   - Document the syntax and available hook types
-   - **Important**: Only document hooks available on slash command definitions, not general CLI hooks
-
-4. **Create the documentation files**
-   - Place files in `doc/platforms/<platform_name>/`
-   - Each file must have a header comment with source and date
-   - Content should be comprehensive but focused
-
-## Output Format
-
-### cli_configuration.md
-
-Located at: `doc/platforms/<platform_name>/cli_configuration.md`
-
-**Structure**:
-```markdown
-<!--
-Last Updated: YYYY-MM-DD
-Source: [URL where this documentation was obtained]
--->
-
-# <Platform Name> CLI Configuration
-
-## Overview
-
-[Brief description of the platform and its CLI/agent system]
-
-## Configuration Files
-
-[Document where configuration lives and its format]
-
-### File Locations
-
-- [Location 1]: [Purpose]
-- [Location 2]: [Purpose]
-
-### Configuration Format
-
-[Show the configuration file format with examples]
-
-## Custom Commands/Skills
-
-[Document how custom commands are defined]
-
-### Command Location
-
-[Where command files are stored]
-
-### Command File Format
-
-[The format of command files - markdown, YAML, etc.]
-
-### Metadata/Frontmatter
-
-[What metadata fields are supported in command files]
-
-```[format]
-[Example of a minimal command file]
-```
-
-## Command Discovery
-
-[How the platform discovers and loads commands]
-
-## Platform-Specific Features
-
-[Any unique features relevant to command configuration]
-```
-
-### hooks_system.md
-
-Located at: `doc/platforms/<platform_name>/hooks_system.md`
-
-**Structure**:
-```markdown
-<!--
-Last Updated: YYYY-MM-DD
-Source: [URL where this documentation was obtained]
--->
-
-# <Platform Name> Hooks System (Command Definitions)
-
-## Overview
-
-[Brief description of hooks available for command definitions]
-
-**Important**: This document covers ONLY hooks available within slash command/skill definitions, not general CLI hooks.
-
-## Available Hooks
-
-### [Hook Name 1]
-
-**Purpose**: [What this hook does]
-
-**Syntax**:
-```yaml
-[hook_name]:
-  - [configuration]
-```
-
-**Example**:
-```yaml
-[Complete example of using this hook]
-```
-
-**Behavior**: [When and how this hook executes]
-
-### [Hook Name 2]
-
-[Repeat for each available hook]
-
-## Hook Execution Order
-
-[Document the order in which hooks execute, if multiple are supported]
-
-## Comparison with Other Platforms
-
-| Feature | <Platform> | Claude Code | Other |
-|---------|-----------|-------------|-------|
-| [Feature 1] | [Support] | [Support] | [Support] |
-
-## Limitations
-
-[Any limitations or caveats about the hooks system]
-```
-
-## Quality Criteria
-
-- Both files exist in `doc/platforms/<platform_name>/`
-- Each file has a header comment with:
-  - Last updated date (YYYY-MM-DD format)
-  - Source URL where documentation was obtained
-- `cli_configuration.md` comprehensively covers:
-  - Configuration file locations and format
-  - Custom command file format and location
-  - Command discovery mechanism
-- `hooks_system.md` comprehensively covers:
-  - All hooks available for slash command definitions
-  - Syntax and examples for each hook
-  - NOT general CLI hooks (only command-level hooks)
-- Documentation is detailed enough to implement the platform adapter
-- No extraneous topics (only CLI config and command hooks)
-- When all criteria are met, include `<promise>✓ Quality Criteria Met</promise>` in your response
-
-## Context
-
-This is the foundation step for adding a new platform to DeepWork. The documentation you capture here will be referenced throughout the implementation process:
-- CLI configuration informs how to generate command files
-- Hooks documentation determines what features the adapter needs to support
-- This documentation becomes a permanent reference in `doc/platforms/`
-
-Take time to be thorough - incomplete documentation will slow down subsequent steps.
-
-## Tips
-
-- Use the platform's official documentation as the primary source
-- If documentation is sparse, check GitHub repos, community guides, or changelog entries
-- When in doubt about whether something is a "command hook" vs "CLI hook", err on the side of inclusion and note the ambiguity
-- Include code examples from the official docs where available
-
-
-### Job Context
-
-A workflow for adding support for a new AI platform (like Cursor, Windsurf, etc.) to DeepWork.
-
-The **integrate** workflow guides you through four phases:
-1. **Research**: Capture the platform's CLI configuration and hooks system documentation
-2. **Add Capabilities**: Update the job schema and adapters with any new hook events
-3. **Implement**: Create the platform adapter, templates, tests (100% coverage), and README updates
-4. **Verify**: Ensure installation works correctly and produces expected files
-
-The workflow ensures consistency across all supported platforms and maintains
-comprehensive test coverage for new functionality.
-
-**Important Notes**:
-- Only hooks available on slash command definitions should be captured
-- Each existing adapter must be updated when new hooks are added (typically with null values)
-- Tests must achieve 100% coverage for any new functionality
-- Installation verification confirms the platform integrates correctly with existing jobs
-
-
-## Required Inputs
-
-**User Parameters** - Gather from user before starting:
-- **platform_name**: Clear identifier of the platform (e.g., 'cursor', 'windsurf-editor', 'github-copilot-chat')
-
-
-## Work Branch
-
-Use branch format: `deepwork/add_platform-[instance]-YYYYMMDD`
-
-- If on a matching work branch: continue using it
-- If on main/master: create new branch with `git checkout -b deepwork/add_platform-[instance]-$(date +%Y%m%d)`
-
-## Outputs
-
-**Required outputs**:
-- `cli_configuration.md`
-- `hooks_system.md`
-
-## On Completion
-
-1. Verify outputs are created
-2. Inform user: "Step 1/4 complete, outputs: cli_configuration.md, hooks_system.md"
-3. **Tell user next command**: `/add_platform:add_capabilities`
-
----
-
-**Reference files**: `.deepwork/jobs/add_platform/job.yml`, `.deepwork/jobs/add_platform/steps/research.md`
-"""
\ No newline at end of file
diff --git a/.gemini/skills/add_platform/verify.toml b/.gemini/skills/add_platform/verify.toml
deleted file mode 100644
index 3d8f081c..00000000
--- a/.gemini/skills/add_platform/verify.toml
+++ /dev/null
@@ -1,183 +0,0 @@
-# add_platform:verify
-#
-# Sets up platform directories and verifies deepwork install works correctly. Use after implementation to confirm integration.
-#
-# Generated by DeepWork - do not edit manually
-
-description = "Sets up platform directories and verifies deepwork install works correctly. Use after implementation to confirm integration."
-
-prompt = """
-# add_platform:verify
-
-**Step 4/4** in **add_platform** workflow
-
-> Adds a new AI platform to DeepWork with adapter, templates, and tests. Use when integrating Cursor, Windsurf, or other AI coding tools.
-
-## Prerequisites (Verify First)
-
-Before proceeding, confirm these steps are complete:
-- `/add_platform:implement`
-
-## Instructions
-
-**Goal**: Sets up platform directories and verifies deepwork install works correctly. Use after implementation to confirm integration.
-
-# Verify Installation
-
-## Objective
-
-Ensure the new platform integration works correctly by setting up necessary directories and running the full installation process.
-
-## Task
-
-Perform end-to-end verification that the new platform can be installed and that DeepWork's standard jobs work correctly with it.
-
-### Prerequisites
-
-Ensure the implementation step is complete:
-- Adapter class exists in `src/deepwork/adapters.py`
-- Templates exist in `src/deepwork/templates/<platform_name>/`
-- Tests pass with 100% coverage
-- README.md is updated
-
-### Process
-
-1. **Set up platform directories in the DeepWork repo**
-
-   The DeepWork repository itself should have the platform's command directory structure for testing:
-
-   ```bash
-   mkdir -p <platform_command_directory>
-   ```
-
-   For example:
-   - Claude: `.claude/commands/`
-   - Cursor: `.cursor/commands/` (or wherever Cursor stores commands)
-
-2. **Run deepwork install for the new platform**
-
-   ```bash
-   deepwork install --platform <platform_name>
-   ```
-
-   Verify:
-   - Command completes without errors
-   - No Python exceptions or tracebacks
-   - Output indicates successful installation
-
-3. **Check that command files were created**
-
-   List the generated command files:
-   ```bash
-   ls -la <platform_command_directory>/
-   ```
-
-   Verify:
-   - `deepwork_jobs.define.md` exists (or equivalent for the platform)
-   - `deepwork_jobs.implement.md` exists
-   - `deepwork_jobs.refine.md` exists
-   - All expected step commands exist
-
-4. **Validate command file content**
-
-   Read each generated command file and verify:
-   - Content matches the expected format for the platform
-   - Job metadata is correctly included
-   - Step instructions are properly rendered
-   - Any platform-specific features (hooks, frontmatter) are present
-
-5. **Test alongside existing platforms**
-
-   If other platforms are already installed, verify they still work:
-   ```bash
-   deepwork install --platform claude
-   ls -la .claude/commands/
-   ```
-
-   Ensure:
-   - New platform doesn't break existing installations
-   - Each platform's commands are independent
-   - No file conflicts or overwrites
-
-## Quality Criteria
-
-- Platform-specific directories are set up in the DeepWork repo
-- `deepwork install --platform <platform_name>` completes without errors
-- All expected command files are created:
-  - deepwork_jobs.define, implement, refine
-  - Any other standard job commands
-- Command file content is correct:
-  - Matches platform's expected format
-  - Job/step information is properly rendered
-  - No template errors or missing content
-- Existing platforms still work (if applicable)
-- No conflicts between platforms
-- When all criteria are met, include `<promise>✓ Quality Criteria Met</promise>` in your response
-
-## Context
-
-This is the final validation step before the platform is considered complete. A thorough verification ensures:
-- The platform actually works, not just compiles
-- Standard DeepWork jobs install correctly
-- The platform integrates properly with the existing system
-- Users can confidently use the new platform
-
-Take time to verify each aspect - finding issues now is much better than having users discover them later.
-
-## Common Issues to Check
-
-- **Template syntax errors**: May only appear when rendering specific content
-- **Path issues**: Platform might expect different directory structure
-- **Encoding issues**: Special characters in templates or content
-- **Missing hooks**: Platform adapter might not handle all hook types
-- **Permission issues**: Directory creation might fail in some cases
-
-
-### Job Context
-
-A workflow for adding support for a new AI platform (like Cursor, Windsurf, etc.) to DeepWork.
-
-The **integrate** workflow guides you through four phases:
-1. **Research**: Capture the platform's CLI configuration and hooks system documentation
-2. **Add Capabilities**: Update the job schema and adapters with any new hook events
-3. **Implement**: Create the platform adapter, templates, tests (100% coverage), and README updates
-4. **Verify**: Ensure installation works correctly and produces expected files
-
-The workflow ensures consistency across all supported platforms and maintains
-comprehensive test coverage for new functionality.
-
-**Important Notes**:
-- Only hooks available on slash command definitions should be captured
-- Each existing adapter must be updated when new hooks are added (typically with null values)
-- Tests must achieve 100% coverage for any new functionality
-- Installation verification confirms the platform integrates correctly with existing jobs
-
-
-## Required Inputs
-
-
-**Files from Previous Steps** - Read these first:
-- `templates/` (from `implement`)
-
-## Work Branch
-
-Use branch format: `deepwork/add_platform-[instance]-YYYYMMDD`
-
-- If on a matching work branch: continue using it
-- If on main/master: create new branch with `git checkout -b deepwork/add_platform-[instance]-$(date +%Y%m%d)`
-
-## Outputs
-
-**Required outputs**:
-- `verification_checklist.md`
-
-## On Completion
-
-1. Verify outputs are created
-2. Inform user: "Step 4/4 complete, outputs: verification_checklist.md"
-3. **Workflow complete**: All steps finished. Consider creating a PR to merge the work branch.
-
----
-
-**Reference files**: `.deepwork/jobs/add_platform/job.yml`, `.deepwork/jobs/add_platform/steps/verify.md`
-"""
\ No newline at end of file
diff --git a/.gemini/skills/commit/commit_and_push.toml b/.gemini/skills/commit/commit_and_push.toml
deleted file mode 100644
index a3172dac..00000000
--- a/.gemini/skills/commit/commit_and_push.toml
+++ /dev/null
@@ -1,164 +0,0 @@
-# commit:commit_and_push
-#
-# Verifies changed files, creates commit, and pushes to remote. Use after linting passes to finalize changes.
-#
-# Generated by DeepWork - do not edit manually
-
-description = "Verifies changed files, creates commit, and pushes to remote. Use after linting passes to finalize changes."
-
-prompt = """
-# commit:commit_and_push
-
-**Step 4/4** in **commit** workflow
-
-> Reviews code, runs tests, lints, and commits changes. Use when ready to commit work with quality checks.
-
-## Prerequisites (Verify First)
-
-Before proceeding, confirm these steps are complete:
-- `/commit:lint`
-
-## Instructions
-
-**Goal**: Verifies changed files, creates commit, and pushes to remote. Use after linting passes to finalize changes.
-
-# Commit and Push
-
-## Objective
-
-Review the changed files to verify they match the agent's expectations, create a commit with an appropriate message, and push to the remote repository.
-
-## Task
-
-Check the list of changed files against what was modified during this session, ensure they match expectations, then commit and push the changes.
-
-### Process
-
-1. **Get the list of changed files**
-   ```bash
-   git status
-   ```
-   Also run `git diff --stat` to see a summary of changes.
-
-2. **Verify changes match expectations**
-
-   Compare the changed files against what you modified during this session:
-   - Do the modified files match what you edited?
-   - Are there any unexpected new files?
-   - Are there any unexpected deleted files?
-   - Do the line counts seem reasonable for the changes you made?
-
-   If changes match expectations, proceed to the next step.
-
-   If there are unexpected changes:
-   - Investigate why (e.g., lint auto-fixes, generated files)
-   - If they're legitimate side effects of your work, include them
-   - If they're unrelated or shouldn't be committed, use `git restore` to discard them
-
-3. **Update CHANGELOG.md if needed**
-
-   If your changes include new features, bug fixes, or other notable changes:
-   - Add entries to the `## [Unreleased]` section of CHANGELOG.md
-   - Use the appropriate subsection: `### Added`, `### Changed`, `### Fixed`, or `### Removed`
-   - Write concise descriptions that explain the user-facing impact
-
-   **CRITICAL: NEVER modify version numbers**
-   - Do NOT change the version in `pyproject.toml`
-   - Do NOT change version headers in CHANGELOG.md (e.g., `## [0.4.2]`)
-   - Do NOT rename the `## [Unreleased]` section
-   - Version updates are handled by the release workflow, not commits
-
-4. **Stage all appropriate changes**
-   ```bash
-   git add -A
-   ```
-   Or stage specific files if some were excluded.
-
-5. **View recent commit messages for style reference**
-   ```bash
-   git log --oneline -10
-   ```
-
-6. **Create the commit**
-
-   Generate an appropriate commit message based on:
-   - The changes made
-   - The style of recent commits
-   - Conventional commit format if the project uses it
-
-   **IMPORTANT:** Use the commit job script (not `git commit` directly):
-   ```bash
-   .claude/hooks/commit_job_git_commit.sh -m "commit message here"
-   ```
-
-7. **Push to remote**
-   ```bash
-   git push
-   ```
-   If the branch has no upstream, use:
-   ```bash
-   git push -u origin HEAD
-   ```
-
-## Quality Criteria
-
-- Changed files were verified against expectations
-- CHANGELOG.md was updated with entries in [Unreleased] section (if changes warrant documentation)
-- Version numbers were NOT modified (pyproject.toml version and CHANGELOG version headers unchanged)
-- Commit was created with appropriate message
-- Changes were pushed to remote
-
-## Context
-
-This is the final step of the commit workflow. The agent verifies that the changed files match its own expectations from the work done during the session, then commits and pushes. This catches unexpected changes while avoiding unnecessary user interruptions.
-
-
-### Job Context
-
-A workflow for preparing and committing code changes with quality checks.
-
-The **full** workflow starts with a code review to catch issues early, runs tests until
-they pass, formats and lints code with ruff, then reviews changed files
-before committing and pushing. The review and lint steps use sub-agents
-to reduce context usage.
-
-Steps:
-1. review - Code review for issues, DRY opportunities, naming, and test coverage (runs in sub-agent)
-2. test - Pull latest code and run tests until they pass
-3. lint - Format and lint code with ruff (runs in sub-agent)
-4. commit_and_push - Review changes and commit/push
-
-
-
-## Work Branch
-
-Use branch format: `deepwork/commit-[instance]-YYYYMMDD`
-
-- If on a matching work branch: continue using it
-- If on main/master: create new branch with `git checkout -b deepwork/commit-[instance]-$(date +%Y%m%d)`
-
-## Outputs
-
-**Required outputs**:
-- `changes_committed`
-
-## Quality Validation (Manual)
-
-**NOTE**: Gemini CLI does not support automated validation. Manually verify criteria before completing.
-
-**Criteria (all must be satisfied)**:
-1. Changed files were verified against expectations
-2. CHANGELOG.md was updated with entries in [Unreleased] section (if changes warrant documentation)
-3. Version numbers were NOT modified (pyproject.toml version and CHANGELOG version headers unchanged)
-4. Commit was created with appropriate message
-5. Changes were pushed to remote
-## On Completion
-
-1. Verify outputs are created
-2. Inform user: "Step 4/4 complete, outputs: changes_committed"
-3. **Workflow complete**: All steps finished. Consider creating a PR to merge the work branch.
-
----
-
-**Reference files**: `.deepwork/jobs/commit/job.yml`, `.deepwork/jobs/commit/steps/commit_and_push.md`
-"""
\ No newline at end of file
diff --git a/.gemini/skills/commit/index.toml b/.gemini/skills/commit/index.toml
deleted file mode 100644
index 09eff3ef..00000000
--- a/.gemini/skills/commit/index.toml
+++ /dev/null
@@ -1,71 +0,0 @@
-# commit
-#
-# Reviews code, runs tests, lints, and commits changes. Use when ready to commit work with quality checks.
-#
-# Generated by DeepWork - do not edit manually
-
-description = "Reviews code, runs tests, lints, and commits changes. Use when ready to commit work with quality checks."
-
-prompt = """
-# commit
-
-**Multi-step workflow**: Reviews code, runs tests, lints, and commits changes. Use when ready to commit work with quality checks.
-
-> **NOTE**: Gemini CLI requires manual command invocation. After each step, tell the user which command to run next.
-
-A workflow for preparing and committing code changes with quality checks.
-
-The **full** workflow starts with a code review to catch issues early, runs tests until
-they pass, formats and lints code with ruff, then reviews changed files
-before committing and pushing. The review and lint steps use sub-agents
-to reduce context usage.
-
-Steps:
-1. review - Code review for issues, DRY opportunities, naming, and test coverage (runs in sub-agent)
-2. test - Pull latest code and run tests until they pass
-3. lint - Format and lint code with ruff (runs in sub-agent)
-4. commit_and_push - Review changes and commit/push
-
-
-## Available Steps
-
-1. **review** - Reviews changed code for issues, DRY opportunities, naming clarity, and test coverage using a sub-agent. Use as the first step before testing.
-   Command: `/commit:review`
-2. **test** - Pulls latest code and runs tests until all pass. Use after code review passes to verify changes work correctly. (requires: review)
-   Command: `/commit:test`
-3. **lint** - Formats and lints code with ruff using a sub-agent. Use after tests pass to ensure code style compliance. (requires: test)
-   Command: `/commit:lint`
-4. **commit_and_push** - Verifies changed files, creates commit, and pushes to remote. Use after linting passes to finalize changes. (requires: lint)
-   Command: `/commit:commit_and_push`
-
-## Execution Instructions
-
-### Step 1: Analyze Intent
-
-Parse any text following `/commit` to determine user intent:
-- "review" or related terms → start at `/commit:review`
-- "test" or related terms → start at `/commit:test`
-- "lint" or related terms → start at `/commit:lint`
-- "commit_and_push" or related terms → start at `/commit:commit_and_push`
-
-### Step 2: Direct User to Starting Step
-
-Tell the user which command to run:
-```
-/commit:review
-```
-
-### Step 3: Guide Through Workflow
-
-After each step completes, tell the user the next command to run until workflow is complete.
-
-### Handling Ambiguous Intent
-
-If user intent is unclear:
-- Present available steps as numbered options
-- Ask user to select the starting point
-
-## Reference
-
-- Job definition: `.deepwork/jobs/commit/job.yml`
-"""
\ No newline at end of file
diff --git a/.gemini/skills/commit/lint.toml b/.gemini/skills/commit/lint.toml
deleted file mode 100644
index 74589f30..00000000
--- a/.gemini/skills/commit/lint.toml
+++ /dev/null
@@ -1,143 +0,0 @@
-# commit:lint
-#
-# Formats and lints code with ruff using a sub-agent. Use after tests pass to ensure code style compliance.
-#
-# Generated by DeepWork - do not edit manually
-
-description = "Formats and lints code with ruff using a sub-agent. Use after tests pass to ensure code style compliance."
-
-prompt = """
-# commit:lint
-
-**Step 3/4** in **commit** workflow
-
-> Reviews code, runs tests, lints, and commits changes. Use when ready to commit work with quality checks.
-
-## Prerequisites (Verify First)
-
-Before proceeding, confirm these steps are complete:
-- `/commit:test`
-
-## Instructions
-
-**Goal**: Formats and lints code with ruff using a sub-agent. Use after tests pass to ensure code style compliance.
-
-# Lint Code
-
-## Objective
-
-Format and lint the codebase using ruff to ensure code quality and consistency.
-
-## Task
-
-Run ruff format and ruff check to format and lint the code. This step should be executed using a sub-agent to conserve context in the main conversation.
-
-### Process
-
-**IMPORTANT**: Use the Task tool to spawn a sub-agent for this work. This saves context in the main conversation. Use the `haiku` model for speed.
-
-1. **Spawn a sub-agent to run linting**
-
-   Use the Task tool with these parameters:
-   - `subagent_type`: "Bash"
-   - `model`: "haiku"
-   - `prompt`: See below
-
-   The sub-agent should:
-
-   a. **Run ruff format**
-      ```bash
-      ruff format .
-      ```
-      This formats the code according to ruff's style rules.
-
-   b. **Run ruff check with auto-fix**
-      ```bash
-      ruff check --fix .
-      ```
-      This checks for lint errors and automatically fixes what it can.
-
-   c. **Run ruff check again to verify**
-      ```bash
-      ruff check .
-      ```
-      Capture the final output to verify no remaining issues.
-
-2. **Review sub-agent results**
-   - Check that both format and check completed successfully
-   - Note any remaining lint issues that couldn't be auto-fixed
-
-3. **Handle remaining issues**
-   - If there are lint errors that couldn't be auto-fixed, fix them manually
-   - Re-run ruff check to verify
-
-## Example Sub-Agent Prompt
-
-```
-Run ruff to format and lint the codebase:
-
-1. Run: ruff format .
-2. Run: ruff check --fix .
-3. Run: ruff check . (to verify no remaining issues)
-
-Report the results of each command.
-```
-
-## Quality Criteria
-
-- ruff format was run successfully
-- ruff check was run with --fix flag
-- No remaining lint errors
-
-## Context
-
-This step ensures code quality and consistency before committing. It runs after tests pass and before the commit step. Using a sub-agent keeps the main conversation context clean for the commit review.
-
-
-### Job Context
-
-A workflow for preparing and committing code changes with quality checks.
-
-The **full** workflow starts with a code review to catch issues early, runs tests until
-they pass, formats and lints code with ruff, then reviews changed files
-before committing and pushing. The review and lint steps use sub-agents
-to reduce context usage.
-
-Steps:
-1. review - Code review for issues, DRY opportunities, naming, and test coverage (runs in sub-agent)
-2. test - Pull latest code and run tests until they pass
-3. lint - Format and lint code with ruff (runs in sub-agent)
-4. commit_and_push - Review changes and commit/push
-
-
-
-## Work Branch
-
-Use branch format: `deepwork/commit-[instance]-YYYYMMDD`
-
-- If on a matching work branch: continue using it
-- If on main/master: create new branch with `git checkout -b deepwork/commit-[instance]-$(date +%Y%m%d)`
-
-## Outputs
-
-**Required outputs**:
-- `code_formatted`
-
-## Quality Validation (Manual)
-
-**NOTE**: Gemini CLI does not support automated validation. Manually verify criteria before completing.
-
-**Criteria (all must be satisfied)**:
-1. ruff format was run successfully
-2. ruff check was run with --fix flag
-3. No remaining lint errors
-## On Completion
-
-1. Verify outputs are created
-2. Inform user: "Step 3/4 complete, outputs: code_formatted"
-3. **Tell user next command**: `/commit:commit_and_push`
-
----
-
-**Reference files**: `.deepwork/jobs/commit/job.yml`, `.deepwork/jobs/commit/steps/lint.md`
-"""
\ No newline at end of file
diff --git a/.gemini/skills/commit/review.toml b/.gemini/skills/commit/review.toml
deleted file mode 100644
index 9423e1a3..00000000
--- a/.gemini/skills/commit/review.toml
+++ /dev/null
@@ -1,124 +0,0 @@
-# commit:review
-#
-# Reviews changed code for issues, DRY opportunities, naming clarity, and test coverage using a sub-agent. Use as the first step before testing.
-#
-# Generated by DeepWork - do not edit manually
-
-description = "Reviews changed code for issues, DRY opportunities, naming clarity, and test coverage using a sub-agent. Use as the first step before testing."
-
-prompt = """
-# commit:review
-
-**Step 1/4** in **commit** workflow
-
-> Reviews code, runs tests, lints, and commits changes. Use when ready to commit work with quality checks.
-
-
-## Instructions
-
-**Goal**: Reviews changed code for issues, DRY opportunities, naming clarity, and test coverage using a sub-agent. Use as the first step before testing.
-
-# Code Review
-
-## Objective
-
-Review changed code for quality issues before running tests. This catches problems early and ensures code meets quality standards.
-
-## Task
-
-Use a sub-agent to review the staged/changed code and identify issues that should be fixed before committing.
-
-### Process
-
-**IMPORTANT**: Use the Task tool to spawn a sub-agent for this review. This saves context in the main conversation.
-
-1. **Get the list of changed files**
-   ```bash
-   git diff --name-only HEAD
-   git diff --name-only --staged
-   ```
-   Combine these to get all files that have been modified.
-
-2. **Spawn a sub-agent to review the code**
-
-   Use the Task tool with these parameters:
-   - `subagent_type`: "general-purpose"
-   - `prompt`: Instruct the sub-agent to:
-     - Read the code review standards from `doc/code_review_standards.md`
-     - Read each of the changed files
-     - Review each file against the standards
-     - Report issues found with file, line number, severity, and suggested fix
-
-3. **Review sub-agent findings**
-   - Examine each issue identified
-   - Prioritize issues by severity
-
-4. **Fix identified issues**
-   - Address each issue found by the review
-   - For DRY violations: extract shared code into functions/modules
-   - For naming issues: rename to be clearer
-   - For missing tests: add appropriate test cases
-   - For bugs: fix the underlying issue
-
-5. **Re-run review if significant changes made**
-   - If you made substantial changes, consider running another review pass
-   - Ensure fixes didn't introduce new issues
-
-## Quality Criteria
-
-- Changed files were identified
-- Sub-agent read the code review standards and reviewed all changed files
-- All identified issues were addressed or documented as intentional
-
-## Context
-
-This is the first step of the commit workflow. Code review happens before tests to catch quality issues early. The sub-agent approach keeps the main conversation context clean while providing thorough review coverage.
-
-
-### Job Context
-
-A workflow for preparing and committing code changes with quality checks.
-
-The **full** workflow starts with a code review to catch issues early, runs tests until
-they pass, formats and lints code with ruff, then reviews changed files
-before committing and pushing. The review and lint steps use sub-agents
-to reduce context usage.
-
-Steps:
-1. review - Code review for issues, DRY opportunities, naming, and test coverage (runs in sub-agent)
-2. test - Pull latest code and run tests until they pass
-3. lint - Format and lint code with ruff (runs in sub-agent)
-4. commit_and_push - Review changes and commit/push
-
-
-
-## Work Branch
-
-Use branch format: `deepwork/commit-[instance]-YYYYMMDD`
-
-- If on a matching work branch: continue using it
-- If on main/master: create new branch with `git checkout -b deepwork/commit-[instance]-$(date +%Y%m%d)`
-
-## Outputs
-
-**Required outputs**:
-- `code_reviewed`
-
-## Quality Validation (Manual)
-
-**NOTE**: Gemini CLI does not support automated validation. Manually verify criteria before completing.
-
-**Criteria (all must be satisfied)**:
-1. Changed files were identified
-2. Sub-agent reviewed the code for general issues, DRY opportunities, naming clarity, and test coverage
-3. All identified issues were addressed or documented as intentional
-## On Completion
-
-1. Verify outputs are created
-2. Inform user: "Step 1/4 complete, outputs: code_reviewed"
-3. **Tell user next command**: `/commit:test`
-
----
-
-**Reference files**: `.deepwork/jobs/commit/job.yml`, `.deepwork/jobs/commit/steps/review.md`
-"""
\ No newline at end of file
diff --git a/.gemini/skills/commit/test.toml b/.gemini/skills/commit/test.toml
deleted file mode 100644
index c45f623a..00000000
--- a/.gemini/skills/commit/test.toml
+++ /dev/null
@@ -1,123 +0,0 @@
-# commit:test
-#
-# Pulls latest code and runs tests until all pass. Use after code review passes to verify changes work correctly.
-#
-# Generated by DeepWork - do not edit manually
-
-description = "Pulls latest code and runs tests until all pass. Use after code review passes to verify changes work correctly."
-
-prompt = """
-# commit:test
-
-**Step 2/4** in **commit** workflow
-
-> Reviews code, runs tests, lints, and commits changes. Use when ready to commit work with quality checks.
-
-## Prerequisites (Verify First)
-
-Before proceeding, confirm these steps are complete:
-- `/commit:review`
-
-## Instructions
-
-**Goal**: Pulls latest code and runs tests until all pass. Use after code review passes to verify changes work correctly.
-
-# Run Tests
-
-## Objective
-
-Run the project's test suite and fix any failing tests until all tests pass.
-
-## Task
-
-Execute the test suite for the project and iteratively fix any failures until all tests pass.
-
-### Process
-
-1. **Pull latest code from the branch**
-   - Run `git pull` to fetch and merge any changes from the remote
-   - If there are merge conflicts, resolve them before proceeding
-   - This ensures you're testing against the latest code
-
-2. **Detect or use the test command**
-   - If a test command was provided, use that
-   - Otherwise, auto-detect the project type and determine the appropriate test command:
-     - Python: `pytest`, `python -m pytest`, `uv run pytest`
-     - Node.js: `npm test`, `yarn test`, `bun test`
-     - Go: `go test ./...`
-     - Rust: `cargo test`
-     - Check `package.json`, `pyproject.toml`, `Cargo.toml`, `go.mod` for hints
-
-3. **Run the tests**
-   - Execute the test command
-   - Capture the output
-
-4. **Analyze failures**
-   - If tests pass, proceed to output
-   - If tests fail, analyze the failure messages
-   - Identify the root cause of each failure
-
-5. **Fix failing tests**
-   - Make the necessary code changes to fix failures
-   - This may involve fixing bugs in implementation code or updating tests
-   - Re-run tests after each fix
-
-6. **Iterate until passing**
-   - Continue the fix/test cycle until all tests pass
-
-## Quality Criteria
-
-- Latest code was pulled from the branch
-- All tests are passing
-
-## Context
-
-This step runs after code review. Tests must pass before proceeding to lint and commit. This ensures code quality and prevents broken code from being committed. If tests fail due to issues introduced by the code review fixes, iterate on the fixes until tests pass.
-
-
-### Job Context
-
-A workflow for preparing and committing code changes with quality checks.
-
-The **full** workflow starts with a code review to catch issues early, runs tests until
-they pass, formats and lints code with ruff, then reviews changed files
-before committing and pushing. The review and lint steps use sub-agents
-to reduce context usage.
-
-Steps:
-1. review - Code review for issues, DRY opportunities, naming, and test coverage (runs in sub-agent)
-2. test - Pull latest code and run tests until they pass
-3. lint - Format and lint code with ruff (runs in sub-agent)
-4. commit_and_push - Review changes and commit/push
-
-
-
-## Work Branch
-
-Use branch format: `deepwork/commit-[instance]-YYYYMMDD`
-
-- If on a matching work branch: continue using it
-- If on main/master: create new branch with `git checkout -b deepwork/commit-[instance]-$(date +%Y%m%d)`
-
-## Outputs
-
-**Required outputs**:
-- `tests_passing`
-
-## Quality Validation (Manual)
-
-**NOTE**: Gemini CLI does not support automated validation. Manually verify criteria before completing.
-
-**Criteria (all must be satisfied)**:
-1. Latest code was pulled from the branch
-2. All tests are passing
-## On Completion
-
-1. Verify outputs are created
-2. Inform user: "Step 2/4 complete, outputs: tests_passing"
-3. **Tell user next command**: `/commit:lint`
-
----
-
-**Reference files**: `.deepwork/jobs/commit/job.yml`, `.deepwork/jobs/commit/steps/test.md`
-"""
\ No newline at end of file
diff --git a/.gemini/skills/deepwork_jobs/define.toml b/.gemini/skills/deepwork_jobs/define.toml
deleted file mode 100644
index 8a705168..00000000
--- a/.gemini/skills/deepwork_jobs/define.toml
+++ /dev/null
@@ -1,537 +0,0 @@
-# deepwork_jobs:define
-#
-# Creates a job.yml specification by gathering workflow requirements through structured questions. Use when starting a new multi-step workflow.
-#
-# Generated by DeepWork - do not edit manually
-
-description = "Creates a job.yml specification by gathering workflow requirements through structured questions. Use when starting a new multi-step workflow."
-
-prompt = """
-# deepwork_jobs:define
-
-**Step 1/4** in **deepwork_jobs** workflow
-
-> Creates and manages multi-step AI workflows. Use when defining, implementing, or improving DeepWork jobs.
-
-
-## Instructions
-
-**Goal**: Creates a job.yml specification by gathering workflow requirements through structured questions. Use when starting a new multi-step workflow.
-
-# Define Job Specification
-
-## Objective
-
-Create a `job.yml` specification file that defines the structure of a new DeepWork job by thoroughly understanding the user's workflow requirements through an interactive question-and-answer process.
-
-## Task
-
-Guide the user through defining a job specification by asking structured questions. **Do not attempt to create the specification without first fully understanding the user's needs.**
-
-**Important**: Use the AskUserQuestion tool to ask structured questions when gathering information from the user. This provides a better user experience with clear options and guided choices.
-
-The output of this step is **only** the `job.yml` file - a complete specification of the workflow. The actual step instruction files will be created in the next step (`implement`).
-
-### Step 1: Understand the Job Purpose
-
-Start by asking structured questions to understand what the user wants to accomplish:
-
-1. **What is the overall goal of this workflow?**
-   - What complex task are they trying to accomplish?
-   - What domain is this in? (e.g., research, marketing, development, reporting)
-   - How often will they run this workflow?
-
-2. **What does success look like?**
-   - What's the final deliverable or outcome?
-   - Who is the audience for the output?
-   - What quality criteria matter most?
-
-3. **What are the major phases?**
-   - Ask them to describe the workflow at a high level
-   - What are the distinct stages from start to finish?
-   - Are there any dependencies between phases?
-
-### Step 1.5: Detect Document-Oriented Workflows
-
-**Check for document-focused patterns** in the user's description:
-- Keywords: "report", "summary", "document", "create", "monthly", "quarterly", "for stakeholders", "for leadership"
-- Final deliverable is a specific document (e.g., "AWS spending report", "competitive analysis", "sprint summary")
-- Recurring documents with consistent structure
-
-**If a document-oriented workflow is detected:**
-
-1. Inform the user: "This workflow produces a specific document type. I recommend defining a doc spec first to ensure consistent quality."
-
-2. Ask structured questions to understand if they want to:
-   - Create a doc spec for this document
-   - Use an existing doc spec (if any exist in `.deepwork/doc_specs/`)
-   - Skip doc spec and proceed with simple outputs
-
-### Step 1.6: Define the Doc Spec (if needed)
-
-When creating a doc spec, gather the following information:
-
-1. **Document Identity**
-   - What is the document called? (e.g., "Monthly AWS Spending Report")
-   - Brief description of its purpose
-   - Where should these documents be stored? (path patterns like `finance/aws-reports/*.md`)
-
-2. **Audience and Context**
-   - Who reads this document? (target audience)
-   - How often is it produced? (frequency)
-
-3. **Quality Criteria** (3-5 criteria, each with name and description)
-
-   **Important**: Doc spec quality criteria define requirements for the **output document itself**, not the process of creating it. Focus on what the finished document must contain or achieve.
-
-   Examples for a spending report:
-   - **Visualization**: Must include charts showing spend breakdown by service
-   - **Variance Analysis**: Must compare current month against previous with percentages
-   - **Action Items**: Must include recommended cost optimization actions
-
-   **Note**: When a doc spec is created for a step's output, the step should generally NOT have separate `quality_criteria` in the job.yml. The doc spec's criteria cover output quality. Only add step-level quality_criteria if there are essential process requirements (e.g., "must use specific tool"), and minimize these when possible.
-
-4. **Document Structure**
-   - What sections should it have?
-   - Any required elements (tables, charts, summaries)?
-
-### Step 1.7: Create the doc spec File (if needed)
-
-Create the doc spec file at `.deepwork/doc_specs/[doc_spec_name].md`:
-
-**Template reference**: See `.deepwork/jobs/deepwork_jobs/templates/doc_spec.md.template` for the standard structure.
-
-**Complete example**: See `.deepwork/doc_specs/job_spec.md` for a fully worked example (the doc spec for job.yml files).
-
-After creating the doc spec, proceed to Step 2 with the doc spec reference for the final step's output.
-
-### Step 2: Define Each Step
-
-For each major phase they mentioned, ask structured questions to gather details:
-
-1. **Step Purpose**
-   - What exactly does this step accomplish?
-   - What is the input to this step?
-   - What is the output from this step?
-
-2. **Step Inputs**
-   - What information is needed to start this step?
-   - Does it need user-provided parameters? (e.g., topic, target audience)
-   - Does it need files from previous steps?
-   - What format should inputs be in?
-
-3. **Step Outputs**
-   - What files or artifacts does this step produce?
-   - What format should the output be in? (markdown, YAML, JSON, etc.)
-   - Where should each output be saved? (filename/path)
-   - Should outputs be organized in subdirectories? (e.g., `reports/`, `data/`, `drafts/`)
-   - Will other steps need this output?
-   - **Does this output have a doc spec?** If a doc spec was created in Step 1.6/1.7, reference it for the appropriate output
-
-   #### Work Product Storage Guidelines
-
-   **Key principle**: Job outputs belong in the main repository directory structure, not in dot-directories. The `.deepwork/` directory is for job definitions and configuration only.
-
-   **Why this matters**:
-   - **Version control**: Work products in the main repo are tracked by git and visible in PRs
-   - **Discoverability**: Team members can find outputs without knowing about DeepWork internals
-   - **Tooling compatibility**: IDEs, search tools, and CI/CD work naturally with standard paths
-   - **Glob patterns**: Well-structured paths enable powerful file matching (e.g., `competitive_research/**/*.md`)
-
-   **Good output path patterns**:
-   ```
-   competitive_research/competitors_list.md
-   competitive_research/acme_corp/research.md
-   operations/reports/2026-01/spending_analysis.md
-   docs/api/endpoints.md
-   ```
-
-   **Avoid these patterns**:
-   ```
-   .deepwork/outputs/report.md          # Hidden in dot-directory
-   output.md                            # Too generic, no context
-   research.md                          # Unclear which research
-   temp/draft.md                        # Transient-sounding paths
-   ```
-
-   **Organizing multi-file outputs**:
-   - Use the job name as a top-level folder when outputs are job-specific
-   - Use parameterized paths for per-entity outputs: `competitive_research/[competitor_name]/`
-   - Match existing project conventions when extending a codebase
-
-   **When to include dates in paths**:
-   - **Include date** for periodic outputs where each version is retained (e.g., monthly reports, quarterly reviews, weekly summaries). These accumulate over time and historical versions remain useful.
-     ```
-     operations/reports/2026-01/spending_analysis.md              # Monthly report - keep history
-     hr/employees/[employee_name]/quarterly_reviews/2026-Q1.pdf   # Per-employee quarterly review
-     ```
-   - **Omit date** for current-state outputs that represent the latest understanding and get updated in place. Previous versions live in git history, not separate files.
-     ```
-     competitive_research/acme_corp/swot.md  # Current SWOT - updated over time
-     docs/architecture/overview.md           # Living document
-     ```
-
-   **Supporting materials and intermediate outputs**:
-   - Content generated in earlier steps to support the final output (research notes, data extracts, drafts) should be placed in a `_dataroom` folder that is a peer to the final output
-   - Name the dataroom folder by replacing the file extension with `_dataroom`
-     ```
-     operations/reports/2026-01/spending_analysis.md           # Final output
-     operations/reports/2026-01/spending_analysis_dataroom/    # Supporting materials
-         raw_data.csv
-         vendor_breakdown.md
-         notes.md
-     ```
-   - This keeps supporting materials organized and discoverable without cluttering the main output location
-
-4. **Step Dependencies**
-   - Which previous steps must complete before this one?
-   - Are there any ordering constraints?
-
-5. **Step Process** (high-level understanding)
-   - What are the key activities in this step?
-   - Are there any quality checks or validation needed?
-   - What makes a good vs. bad output for this step?
-
-6. **Agent Delegation** (optional)
-   - Should this step be executed by a specific agent type?
-   - Use the `agent` field when the step should run in a forked context with a specific agent
-   - When `agent` is set, the generated skill automatically includes `context: fork`
-   - Available agent types:
-     - `general-purpose` - Standard agent for multi-step tasks
-
-   ```yaml
-   steps:
-     - id: research_step
-       agent: general-purpose  # Delegates to the general-purpose agent
-   ```
-
-**Note**: You're gathering this information to understand what instructions will be needed, but you won't create the instruction files yet - that happens in the `implement` step.
-
-#### Doc Spec-Aware Output Format
-
-When a step produces a document with a doc spec reference, use this format in job.yml:
-
-```yaml
-outputs:
-  - file: reports/monthly_spending.md
-    doc_spec: .deepwork/doc_specs/monthly_aws_report.md
-```
-
-The doc spec's quality criteria will automatically be included in the generated skill, ensuring consistent document quality.
-
-### Capability Considerations
-
-When defining steps, identify any that require specialized tools:
-
-**Browser Automation**: If any step involves web scraping, form filling, interactive browsing, UI testing, or research requiring website visits, ask the user what browser tools they have available. For Claude Code users, **Claude in Chrome** (Anthropic's browser extension) has been tested with DeepWork and is recommended for new users. Don't assume a default—confirm the tool before designing browser-dependent steps.
-
-### Step 3: Validate the Workflow
-
-After gathering information about all steps:
-
-1. **Review the flow**
-   - Summarize the complete workflow
-   - Show how outputs from one step feed into the next
-   - Ask if anything is missing
-
-2. **Check for gaps**
-   - Are there any steps where the input isn't clearly defined?
-   - Are there any outputs that aren't used by later steps?
-   - Are there circular dependencies?
-
-3. **Confirm details**
-   - Job name (lowercase, underscores, descriptive)
-   - Job summary (one clear sentence, max 200 chars)
-   - Job description (detailed multi-line explanation)
-   - Version number (start with 1.0.0)
-
-### Step 4: Define Quality Validation (Stop Hooks)
-
-For each step, consider whether it would benefit from **quality validation loops**. Stop hooks allow the AI agent to iteratively refine its work until quality criteria are met.
-
-**Ask structured questions about quality validation:**
-- "Are there specific quality criteria that must be met for this step?"
-- "Would you like the agent to validate its work before completing?"
-- "What would make you send the work back for revision?"
-
-**Stop hooks are particularly valuable for:**
-- Steps with complex outputs that need multiple checks
-- Steps where quality is critical (final deliverables)
-- Steps with subjective quality criteria that benefit from AI self-review
-
-**Three types of stop hooks are supported:**
-
-1. **Inline Prompt** (`prompt`) - Best for simple quality criteria
-   ```yaml
-   stop_hooks:
-     - prompt: |
-         Verify the output meets these criteria:
-         1. Contains at least 5 competitors
-         2. Each competitor has a description
-         3. Selection rationale is clear
-   ```
-
-2. **Prompt File** (`prompt_file`) - For detailed/reusable criteria
-   ```yaml
-   stop_hooks:
-     - prompt_file: hooks/quality_check.md
-   ```
-
-3. **Script** (`script`) - For programmatic validation (tests, linting)
-   ```yaml
-   stop_hooks:
-     - script: hooks/run_tests.sh
-   ```
-
-**Multiple hooks can be combined:**
-```yaml
-stop_hooks:
-  - script: hooks/lint_output.sh
-  - prompt: "Verify the content is comprehensive and well-organized"
-```
-
-**Encourage prompt-based hooks** - They leverage the AI's ability to understand context and make nuanced quality judgments. Script hooks are best for objective checks (syntax, format, tests).
-
-### Step 5: Create the Job Directory and Specification
-
-Only after you have complete understanding, create the job directory and `job.yml` file:
-
-**First, create the directory structure** using the `make_new_job.sh` script:
-
-```bash
-.deepwork/jobs/deepwork_jobs/make_new_job.sh [job_name]
-```
-
-This creates:
-- `.deepwork/jobs/[job_name]/` - Main job directory
-- `.deepwork/jobs/[job_name]/steps/` - For step instruction files
-- `.deepwork/jobs/[job_name]/hooks/` - For custom validation scripts
-- `.deepwork/jobs/[job_name]/templates/` - For example file formats
-- `.deepwork/jobs/[job_name]/AGENTS.md` - Job management guidance
-
-**Then create the job.yml file** at `.deepwork/jobs/[job_name]/job.yml`
-
-(Where `[job_name]` is the name of the NEW job you're creating, e.g., `competitive_research`)
-
-**Doc Spec**: See `.deepwork/doc_specs/job_spec.md` for the complete specification with quality criteria.
-
-**Template reference**: See `.deepwork/jobs/deepwork_jobs/templates/job.yml.template` for the standard structure.
-
-**Complete example**: See `.deepwork/jobs/deepwork_jobs/templates/job.yml.example` for a fully worked example.
-
-**Important**:
-- Use lowercase with underscores for job name and step IDs
-- Ensure file inputs reference steps in dependencies
-- No circular dependencies
-- At least one output per step
-- The `summary` should be concise (max 200 chars)
-- The `description` should provide rich context for future refinement
-- Include a `changelog` section with an initial entry for version 1.0.0
-
-## Example Dialog
-
-```
-Claude: I'll help you define a new DeepWork job. Let's start by understanding what you're trying to accomplish.
-
-What complex task or workflow are you trying to create? What's the overall goal?
-
-User: I want to create competitive research reports for my company
-
-Claude: Great! Let's dig into that. A few questions:
-
-1. What's the final deliverable? Is it a written report, a presentation, or something else?
-2. Who is the audience? Your team, executives, clients?
-3. How comprehensive should these reports be?
-
-User: It's a written report for our product team. Should be comprehensive - analyzing 3-5 competitors.
-
-Claude: Perfect. Now let's think about the major phases of creating this report. What are the main stages you go through? For example:
-- Identifying which competitors to analyze?
-- Gathering information about them?
-- Analyzing the data?
-- Writing the report?
-
-Or is your process different?
-
-User: Yes, that's close. I would say:
-1. Identify competitors
-2. Research each one
-3. Create comparison analysis
-4. Write positioning recommendations
-
-Claude: Excellent! Now let's define each step in detail. Starting with "Identify competitors":
-
-1. What inputs do you need to start? Do you already have a market segment in mind, or do you need to specify that?
-2. What should the output look like? A list of competitor names? More detailed?
-3. How many competitors should be identified?
-
-User: I need to specify the market segment and product category as inputs. The output should be a markdown file with 5-7 competitors and brief descriptions of each.
-
-Claude: Perfect. A few more questions about that output:
-1. What should the file be called? Something like `competitors_list.md`?
-2. Should it go in the project root, or in a subdirectory like `research/` or `data/`?
-
-User: Just call it `competitors_list.md` in the project root.
-
-Claude: Great. Now for the "Research each one" step:
-1. Does it use the competitor list from the first step as input?
-2. What kind of research - product features, pricing, marketing, all of the above?
-3. What format should the research output be in?
-4. Where should that research be saved?
-
-[continues for each step...]
-
-[After gathering all information:]
-
-Claude: Let me summarize the workflow I've designed based on our conversation:
-
-**Job: competitive_research**
-Summary: Systematic competitive analysis workflow for product positioning
-
-Description:
-A comprehensive workflow for analyzing competitors in your market segment. This job helps product teams understand the competitive landscape by systematically identifying competitors, researching their offerings, creating comparison matrices, and developing strategic positioning recommendations.
-
-The workflow produces:
-- A vetted list of key competitors
-- Detailed research notes on each competitor
-- A comparison matrix highlighting key differentiators
-- Strategic positioning recommendations
-
-Designed for product teams conducting quarterly competitive analysis.
-
-**Steps:**
-
-Step 1: identify_competitors
-- Input: market_segment (user), product_category (user)
-- Output: competitors_list.md (5-7 competitors with descriptions)
-
-Step 2: research_competitors
-- Input: competitors_list.md (from step 1)
-- Output: research_notes.md (detailed findings for each competitor)
-- Depends on: identify_competitors
-
-Step 3: comparative_analysis
-- Input: research_notes.md (from step 2)
-- Output: comparison_matrix.md (side-by-side analysis)
-- Depends on: research_competitors
-
-Step 4: positioning_recommendations
-- Input: comparison_matrix.md (from step 3)
-- Output: positioning_report.md (strategic recommendations)
-- Depends on: comparative_analysis
-
-Does this accurately capture your workflow? Any changes needed?
-
-User: Yes, that's perfect!
-
-Claude: Great! Creating the job.yml specification now...
-
-[Creates .deepwork/jobs/competitive_research/job.yml with the complete spec]
-
-✓ Job specification created!
-
-**File created:**
-- .deepwork/jobs/competitive_research/job.yml
-
-**Next step:**
-Run `/deepwork_jobs.review_job_spec` to validate the specification against quality criteria.
-```
-
-## Important Guidelines
-
-1. **Focus on specification only** - Don't create instruction files yet
-2. **Ask structured questions** - Never skip the discovery phase; use the AskUserQuestion tool
-3. **Rich context in description** - This helps with future refinement
-4. **Validate understanding** - Summarize and confirm before creating
-5. **Use examples** - Help users understand what good specifications look like
-6. **Understand file organization** - Always ask structured questions about where outputs should be saved and if subdirectories are needed
-
-## Validation Rules
-
-Before creating the job.yml, ensure:
-- Job name: lowercase, underscores, no spaces
-- Version: semantic versioning (1.0.0)
-- Summary: concise, under 200 characters
-- Description: detailed, provides context
-- Step IDs: unique, descriptive, lowercase with underscores
-- Dependencies: must reference existing step IDs
-- File inputs: `from_step` must be in dependencies
-- At least one output per step
-- Outputs can be filenames (e.g., `report.md`) or paths (e.g., `reports/analysis.md`)
-- File paths in outputs should match where files will actually be created
-- No circular dependencies
-
-## Output Format
-
-### job.yml
-
-The complete YAML specification file (example shown in Step 5 above).
-
-**Location**: `.deepwork/jobs/[job_name]/job.yml`
-
-(Where `[job_name]` is the name of the new job being created)
-
-After creating the file:
-1. Inform the user that the specification is complete
-2. Recommend that they review the job.yml file
-3. Tell them to run `/deepwork_jobs.review_job_spec` next
-
-
-
-### Job Context
-
-Core commands for managing DeepWork jobs. These commands help you define new multi-step
-workflows and learn from running them.
-
-The `new_job` workflow guides you through defining and implementing a new job by
-asking structured questions about your workflow, understanding each step's inputs and outputs,
-reviewing the specification, and generating all necessary files.
-
-The `learn` skill reflects on conversations where DeepWork jobs were run, identifies
-confusion or inefficiencies, and improves job instructions. It also captures bespoke
-learnings specific to the current run into AGENTS.md files in the working folder.
-
-
-## Required Inputs
-
-**User Parameters** - Gather from user before starting:
-- **job_purpose**: What complex task or workflow are you trying to accomplish?
-
-
-## Work Branch
-
-Use branch format: `deepwork/deepwork_jobs-[instance]-YYYYMMDD`
-
-- If on a matching work branch: continue using it
-- If on main/master: create new branch with `git checkout -b deepwork/deepwork_jobs-[instance]-$(date +%Y%m%d)`
-
-## Outputs
-
-**Required outputs**:
-- `job.yml`
-  **Doc Spec**: DeepWork Job Specification
-  > YAML specification file that defines a multi-step workflow job for AI agents
-  **Definition**: `.deepwork/doc_specs/job_spec.md`
-  **Target Audience**: AI agents executing jobs and developers defining workflows
-  **Quality Criteria**:
-  1. **Valid Identifier**: Job name must be lowercase with underscores, no spaces or special characters (e.g., `competitive_research`, `monthly_report`)
-  2. **Semantic Version**: Version must follow semantic versioning format X.Y.Z (e.g., `1.0.0`, `2.1.3`)
-  3. **Concise Summary**: Summary must be under 200 characters and clearly describe what the job accomplishes
-  4. **Rich Description**: Description must be multi-line and explain: the problem solved, the process, expected outcomes, and target users
-  5. **Changelog Present**: Must include a changelog array with at least the initial version entry. Changelog should only include one entry per branch at most
-  6. **Complete Steps**: Each step must have: id (lowercase_underscores), name, description, instructions_file, outputs (at least one), and dependencies array
-  7. **Valid Dependencies**: Dependencies must reference existing step IDs with no circular references
-  8. **Input Consistency**: File inputs with `from_step` must reference a step that is in the dependencies array
-  9. **Output Paths**: Outputs must be valid filenames or paths within the main repo directory structure, never in dot-directories like `.deepwork/`. Use specific, descriptive paths that lend themselves to glob patterns (e.g., `competitive_research/acme_corp/swot.md` or `operations/reports/2026-01/spending_analysis.md`). Parameterized paths like `[competitor_name]/` are encouraged for per-entity outputs. Avoid generic names (`output.md`, `analysis.md`) and transient-sounding paths (`temp/`, `draft.md`). Supporting materials for a final output should go in a peer `_dataroom` folder (e.g., `spending_analysis_dataroom/`).
-  10. **Concise Instructions**: The content of the file, particularly the description, must not have excessively redundant information. It should be concise and to the point given that extra tokens will confuse the AI.
-
-## On Completion
-
-1. Verify outputs are created
-2. Inform user: "Step 1/4 complete, outputs: job.yml"
-3. **Tell user next command**: `/deepwork_jobs:review_job_spec`
-
----
-
-**Reference files**: `.deepwork/jobs/deepwork_jobs/job.yml`, `.deepwork/jobs/deepwork_jobs/steps/define.md`
-"""
\ No newline at end of file
diff --git a/.gemini/skills/deepwork_jobs/implement.toml b/.gemini/skills/deepwork_jobs/implement.toml
deleted file mode 100644
index c645746f..00000000
--- a/.gemini/skills/deepwork_jobs/implement.toml
+++ /dev/null
@@ -1,252 +0,0 @@
-# deepwork_jobs:implement
-#
-# Generates step instruction files and syncs slash commands from the job.yml specification. Use after job spec review passes.
-#
-# Generated by DeepWork - do not edit manually
-
-description = "Generates step instruction files and syncs slash commands from the job.yml specification. Use after job spec review passes."
-
-prompt = """
-# deepwork_jobs:implement
-
-**Step 3/4** in **deepwork_jobs** workflow
-
-> Creates and manages multi-step AI workflows. Use when defining, implementing, or improving DeepWork jobs.
-
-## Prerequisites (Verify First)
-
-Before proceeding, confirm these steps are complete:
-- `/deepwork_jobs:review_job_spec`
-
-## Instructions
-
-**Goal**: Generates step instruction files and syncs slash commands from the job.yml specification. Use after job spec review passes.
-
-# Implement Job Steps
-
-## Objective
-
-Generate the DeepWork job directory structure and instruction files for each step based on the validated `job.yml` specification from the review_job_spec step.
-
-## Task
-
-Read the `job.yml` specification file and create all the necessary files to make the job functional, including directory structure and step instruction files. Then sync the commands to make them available.
-
-### Step 1: Create Directory Structure Using Script
-
-Run the `make_new_job.sh` script to create the standard directory structure:
-
-```bash
-.deepwork/jobs/deepwork_jobs/make_new_job.sh [job_name]
-```
-
-This creates:
-- `.deepwork/jobs/[job_name]/` - Main job directory
-- `.deepwork/jobs/[job_name]/steps/` - Step instruction files
-- `.deepwork/jobs/[job_name]/hooks/` - Custom validation scripts (with .gitkeep)
-- `.deepwork/jobs/[job_name]/templates/` - Example file formats (with .gitkeep)
-- `.deepwork/jobs/[job_name]/AGENTS.md` - Job management guidance
-
-**Note**: If the directory already exists (e.g., job.yml was created by define step), you can skip this step or manually create the additional directories:
-```bash
-mkdir -p .deepwork/jobs/[job_name]/hooks .deepwork/jobs/[job_name]/templates
-touch .deepwork/jobs/[job_name]/hooks/.gitkeep .deepwork/jobs/[job_name]/templates/.gitkeep
-```
-
-### Step 2: Read and Validate the Specification
-
-1. **Locate the job.yml file**
-   - Read `.deepwork/jobs/[job_name]/job.yml` from the review_job_spec step
-   - Parse the YAML content
-
-2. **Validate the specification**
-   - Ensure it follows the schema (name, version, summary, description, steps)
-   - Check that all dependencies reference existing steps
-   - Verify no circular dependencies
-   - Confirm file inputs match dependencies
-
-3. **Extract key information**
-   - Job name, version, summary, description
-   - List of all steps with their details
-   - Understand the workflow structure
-
-### Step 3: Generate Step Instruction Files
-
-For each step in the job.yml, create a comprehensive instruction file at `.deepwork/jobs/[job_name]/steps/[step_id].md`.
-
-**Template reference**: See `.deepwork/jobs/deepwork_jobs/templates/step_instruction.md.template` for the standard structure.
-
-**Complete example**: See `.deepwork/jobs/deepwork_jobs/templates/step_instruction.md.example` for a fully worked example.
-
-**Available templates in `.deepwork/jobs/deepwork_jobs/templates/`:**
-- `job.yml.template` - Job specification structure
-- `step_instruction.md.template` - Step instruction file structure
-- `agents.md.template` - AGENTS.md file structure
-- `job.yml.example` - Complete job specification example
-- `step_instruction.md.example` - Complete step instruction example
-
-**Guidelines for generating instructions:**
-
-1. **Use the job description** - The detailed description from job.yml provides crucial context
-2. **Be specific** - Don't write generic instructions; tailor them to the step's purpose
-3. **Provide examples** - Show what good output looks like
-4. **Explain the "why"** - Help the user understand the step's role in the workflow
-5. **Quality over quantity** - Detailed, actionable instructions are better than vague ones
-6. **Align with stop hooks** - If the step has `stop_hooks` defined, ensure the quality criteria in the instruction file match the validation criteria in the hooks
-7. **Ask structured questions** - When a step has user inputs, the instructions MUST explicitly tell the agent to "ask structured questions" using the AskUserQuestion tool to gather that information. Never use generic phrasing like "ask the user" - always use "ask structured questions"
-
-### Handling Stop Hooks
-
-If a step in the job.yml has `stop_hooks` defined, the generated instruction file should:
-
-1. **Mirror the quality criteria** - The "Quality Criteria" section should match what the stop hooks will validate
-2. **Be explicit about success** - Help the agent understand when the step is truly complete
-3. **Include the promise pattern** - Mention that `<promise>✓ Quality Criteria Met</promise>` should be included when criteria are met
-
-**Example: If the job.yml has:**
-```yaml
-- id: research_competitors
-  name: "Research Competitors"
-  stop_hooks:
-    - prompt: |
-        Verify the research meets criteria:
-        1. Each competitor has at least 3 data points
-        2. Sources are cited
-        3. Information is current (within last year)
-```
-
-**The instruction file should include:**
-```markdown
-## Quality Criteria
-
-- Each competitor has at least 3 distinct data points
-- All information is sourced with citations
-- Data is current (from within the last year)
-- When all criteria are met, include `<promise>✓ Quality Criteria Met</promise>` in your response
-```
-
-This alignment ensures the AI agent knows exactly what will be validated and can self-check before completing.
-
-### Using Supplementary Reference Files
-
-Step instructions can include additional `.md` files in the `steps/` directory for detailed examples, templates, or reference material. Reference them using the full path from the project root.
-
-See `.deepwork/jobs/deepwork_jobs/steps/supplemental_file_references.md` for detailed documentation and examples.
-
-### Step 4: Verify job.yml Location
-
-Verify that `job.yml` is in the correct location at `.deepwork/jobs/[job_name]/job.yml`. The define and review_job_spec steps should have created and validated it. If for some reason it's not there, you may need to create or move it.
-
-### Step 5: Sync Skills
-
-Run `deepwork sync` to generate the skills for this job:
-
-```bash
-deepwork sync
-```
-
-This will:
-- Parse the job definition
-- Generate skills for each step
-- Make the skills available in `.claude/skills/` (or appropriate platform directory)
-
-## Example Implementation
-
-For a complete worked example showing a job.yml and corresponding step instruction file, see:
-- **Job specification**: `.deepwork/jobs/deepwork_jobs/templates/job.yml.example`
-- **Step instruction**: `.deepwork/jobs/deepwork_jobs/templates/step_instruction.md.example`
-
-## Important Guidelines
-
-1. **Read the spec carefully** - Understand the job's intent from the description
-2. **Generate complete instructions** - Don't create placeholder or stub files
-3. **Maintain consistency** - Use the same structure for all step instruction files
-4. **Provide examples** - Show what good output looks like
-5. **Use context** - The job description provides valuable context for each step
-6. **Be specific** - Tailor instructions to the specific step, not generic advice
-
-## Validation Before Sync
-
-Before running `deepwork sync`, verify:
-- All directories exist
-- `job.yml` is in place
-- All step instruction files exist (one per step)
-- No file system errors
-
-## Completion Checklist
-
-Before marking this step complete, ensure:
-- [ ] job.yml validated and copied to job directory
-- [ ] All step instruction files created
-- [ ] Each instruction file is complete and actionable
-- [ ] `deepwork sync` executed successfully
-- [ ] Skills generated in platform directory
-
-## Quality Criteria
-
-- Job directory structure is correct
-- All instruction files are complete (not stubs)
-- Instructions are specific and actionable
-- Output examples are provided in each instruction file
-- Quality criteria defined for each step
-- Steps with user inputs explicitly use "ask structured questions" phrasing
-- Sync completed successfully
-- Skills available for use
-
-
-### Job Context
-
-Core commands for managing DeepWork jobs. These commands help you define new multi-step
-workflows and learn from running them.
-
-The `new_job` workflow guides you through defining and implementing a new job by
-asking structured questions about your workflow, understanding each step's inputs and outputs,
-reviewing the specification, and generating all necessary files.
-
-The `learn` skill reflects on conversations where DeepWork jobs were run, identifies
-confusion or inefficiencies, and improves job instructions. It also captures bespoke
-learnings specific to the current run into AGENTS.md files in the working folder.
-
-
-## Required Inputs
-
-
-**Files from Previous Steps** - Read these first:
-- `job.yml` (from `review_job_spec`)
-
-## Work Branch
-
-Use branch format: `deepwork/deepwork_jobs-[instance]-YYYYMMDD`
-
-- If on a matching work branch: continue using it
-- If on main/master: create new branch with `git checkout -b deepwork/deepwork_jobs-[instance]-$(date +%Y%m%d)`
-
-## Outputs
-
-**Required outputs**:
-- `steps/` (directory)
-
-## Quality Validation (Manual)
-
-**NOTE**: Gemini CLI does not support automated validation. Manually verify criteria before completing.
-
-**Criteria (all must be satisfied)**:
-1. **Directory Structure**: Is `.deepwork/jobs/[job_name]/` created correctly?
-2. **Complete Instructions**: Are ALL step instruction files complete (not stubs or placeholders)?
-3. **Specific & Actionable**: Are instructions tailored to each step's purpose, not generic?
-4. **Output Examples**: Does each instruction file show what good output looks like?
-5. **Quality Criteria**: Does each instruction file define quality criteria for its outputs?
-6. **Ask Structured Questions**: Do step instructions that gather user input explicitly use the phrase "ask structured questions"?
-7. **Sync Complete**: Has `deepwork sync` been run successfully?
-8. **Commands Available**: Are the slash-commands generated in `.claude/commands/`?
-9. **Rules Considered**: Has the agent thought about whether rules would benefit this job? If relevant rules were identified, did they explain them and offer to run `/deepwork_rules.define`? Not every job needs rules - only suggest when genuinely helpful.
-## On Completion
-
-1. Verify outputs are created
-2. Inform user: "Step 3/4 complete, outputs: steps/"
-3. **Workflow complete**: All steps finished. Consider creating a PR to merge the work branch.
-
----
-
-**Reference files**: `.deepwork/jobs/deepwork_jobs/job.yml`, `.deepwork/jobs/deepwork_jobs/steps/implement.md`
-"""
\ No newline at end of file
diff --git a/.gemini/skills/deepwork_jobs/index.toml b/.gemini/skills/deepwork_jobs/index.toml
deleted file mode 100644
index 6756ea88..00000000
--- a/.gemini/skills/deepwork_jobs/index.toml
+++ /dev/null
@@ -1,69 +0,0 @@
-# deepwork_jobs
-#
-# Creates and manages multi-step AI workflows. Use when defining, implementing, or improving DeepWork jobs.
-#
-# Generated by DeepWork - do not edit manually
-
-description = "Creates and manages multi-step AI workflows. Use when defining, implementing, or improving DeepWork jobs."
-
-prompt = """
-# deepwork_jobs
-
-**Multi-step workflow**: Creates and manages multi-step AI workflows. Use when defining, implementing, or improving DeepWork jobs.
-
-> **NOTE**: Gemini CLI requires manual command invocation. After each step, tell the user which command to run next.
-
-Core commands for managing DeepWork jobs. These commands help you define new multi-step
-workflows and learn from running them.
-
-The `new_job` workflow guides you through defining and implementing a new job by
-asking structured questions about your workflow, understanding each step's inputs and outputs,
-reviewing the specification, and generating all necessary files.
-
-The `learn` skill reflects on conversations where DeepWork jobs were run, identifies
-confusion or inefficiencies, and improves job instructions. It also captures bespoke
-learnings specific to the current run into AGENTS.md files in the working folder.
-
-
-## Available Steps
-
-1. **define** - Creates a job.yml specification by gathering workflow requirements through structured questions. Use when starting a new multi-step workflow.
-   Command: `/deepwork_jobs:define`
-2. **review_job_spec** - Reviews job.yml against quality criteria using a sub-agent for unbiased validation. Use after defining a job specification. (requires: define)
-   Command: `/deepwork_jobs:review_job_spec`
-3. **implement** - Generates step instruction files and syncs slash commands from the job.yml specification. Use after job spec review passes. (requires: review_job_spec)
-   Command: `/deepwork_jobs:implement`
-4. **learn** - Analyzes conversation history to improve job instructions and capture learnings. Use after running a job to refine it.
-   Command: `/deepwork_jobs:learn`
-
-## Execution Instructions
-
-### Step 1: Analyze Intent
-
-Parse any text following `/deepwork_jobs` to determine user intent:
-- "define" or related terms → start at `/deepwork_jobs:define`
-- "review_job_spec" or related terms → start at `/deepwork_jobs:review_job_spec`
-- "implement" or related terms → start at `/deepwork_jobs:implement`
-- "learn" or related terms → start at `/deepwork_jobs:learn`
-
-### Step 2: Direct User to Starting Step
-
-Tell the user which command to run:
-```
-/deepwork_jobs:define
-```
-
-### Step 3: Guide Through Workflow
-
-After each step completes, tell the user the next command to run until workflow is complete.
-
-### Handling Ambiguous Intent
-
-If user intent is unclear:
-- Present available steps as numbered options
-- Ask user to select the starting point
-
-## Reference
-
-- Job definition: `.deepwork/jobs/deepwork_jobs/job.yml`
-"""
\ No newline at end of file
diff --git a/.gemini/skills/deepwork_jobs/learn.toml b/.gemini/skills/deepwork_jobs/learn.toml
deleted file mode 100644
index ef16944b..00000000
--- a/.gemini/skills/deepwork_jobs/learn.toml
+++ /dev/null
@@ -1,437 +0,0 @@
-# deepwork_jobs:learn
-#
-# Analyzes conversation history to improve job instructions and capture learnings. Use after running a job to refine it.
-#
-# Generated by DeepWork - do not edit manually
-
-description = "Analyzes conversation history to improve job instructions and capture learnings. Use after running a job to refine it."
-
-prompt = """
-# deepwork_jobs:learn
-
-**Standalone command** - can be run anytime
-
-> Creates and manages multi-step AI workflows. Use when defining, implementing, or improving DeepWork jobs.
-
-
-## Instructions
-
-**Goal**: Analyzes conversation history to improve job instructions and capture learnings. Use after running a job to refine it.
-
-# Learn from Job Execution
-
-## Objective
-
-Think deeply about this task. Reflect on the current conversation to identify learnings from DeepWork job executions, improve job instructions with generalizable insights, and capture bespoke (run-specific) learnings in AGENTS.md files in the deepest common folder that would contain all work on the topic in the future.
-
-## Task
-
-Analyze the conversation history to extract learnings and improvements, then apply them appropriately:
-- **Generalizable learnings** → Update job instruction files
-- **Bespoke learnings** (specific to this run) → Add to AGENTS.md in the deepest common folder for the topic
-
-### Step 1: Analyze Conversation for Job Executions
-
-1. **Scan the conversation** for DeepWork slash commands that were run
-   - Look for patterns like `/job_name.step_id`
-   - Identify which jobs and steps were executed
-   - Note the order of execution
-
-2. **Identify the target folder**
-   - This should be the deepest common folder that would contain all work on the topic in the future
-   - Should be clear from conversation history where work was done
-   - If unclear, run `git diff` to see where changes were made on the branch
-
-3. **If no job was specified**, ask the user:
-   - "Which DeepWork job would you like me to learn from?"
-   - List available jobs from `.deepwork/jobs/`
-
-### Step 2: Identify Points of Confusion and Inefficiency
-
-Review the conversation for:
-
-1. **Confusion signals**
-   - Questions the agent asked that shouldn't have been necessary
-   - Misunderstandings about what a step required
-   - Incorrect outputs that needed correction
-   - Ambiguous instructions that led to wrong interpretations
-
-2. **Inefficiency signals**
-   - Extra steps or iterations that were needed
-   - Information that had to be repeated
-   - Context that was missing from instructions
-   - Dependencies that weren't clear
-
-3. **Error patterns**
-   - Failed validations and why they failed
-   - Quality criteria that were misunderstood
-   - Edge cases that weren't handled
-
-4. **Success patterns**
-   - What worked particularly well
-   - Efficient approaches worth preserving
-   - Good examples that could be added to instructions
-
-### Step 3: Classify Learnings
-
-For each learning identified, determine if it is:
-
-**Generalizable** (should improve instructions):
-- Would help ANY future run of this job
-- Addresses unclear or missing guidance
-- Fixes incorrect assumptions in instructions
-- Adds helpful examples or context
-- Examples:
-  - "Step instructions should mention that X format is required"
-  - "Quality criteria should include checking for Y"
-  - "Add example of correct output format"
-
-**doc spec-Related** (should improve doc spec files):
-- Improvements to document quality criteria
-- Changes to document structure or format
-- Updated audience or frequency information
-- Examples:
-  - "The report should include a summary table"
-  - "Quality criterion 'Visualization' needs clearer requirements"
-  - "Documents need a section for action items"
-
-**Bespoke** (should go in AGENTS.md):
-- Specific to THIS project/codebase/run
-- Depends on local conventions or structure
-- References specific files or paths
-- Would not apply to other uses of this job
-- Examples:
-  - "In this codebase, API endpoints are in `src/api/`"
-  - "This project uses camelCase for function names"
-  - "The main config file is at `config/settings.yml`"
-
-### Step 3.5: Identify doc spec-Related Learnings
-
-Review the conversation for doc spec-related improvements:
-
-1. **Quality Criteria Changes**
-   - Were any quality criteria unclear or insufficient?
-   - Did the agent repeatedly fail certain criteria?
-   - Are there new criteria that should be added?
-
-2. **Document Structure Changes**
-   - Did the user request different sections?
-   - Were parts of the document format confusing?
-   - Should the example document be updated?
-
-3. **Metadata Updates**
-   - Has the target audience changed?
-   - Should frequency or path patterns be updated?
-
-**Signals for doc spec improvements:**
-- User asked for changes to document format
-- Repeated validation failures on specific criteria
-- Feedback about missing sections or information
-- Changes to how documents are organized/stored
-
-### Step 4: Update Job Instructions (Generalizable Learnings)
-
-For each generalizable learning:
-
-1. **Locate the instruction file**
-   - Path: `.deepwork/jobs/[job_name]/steps/[step_id].md`
-
-2. **Make targeted improvements**
-   - Add missing context or clarification
-   - Include helpful examples
-   - Clarify ambiguous instructions
-   - Update quality criteria if needed
-
-3. **Keep instructions concise**
-   - Avoid redundancy - don't repeat the same guidance in multiple places
-   - Be direct - remove verbose explanations that don't add value
-   - Prefer bullet points over paragraphs where appropriate
-
-4. **Preserve instruction structure**
-   - Keep existing sections (Objective, Task, Process, Output Format, Quality Criteria)
-   - Add to appropriate sections rather than restructuring
-   - Maintain consistency with other steps
-
-5. **Track changes for changelog**
-   - Note what was changed and why
-   - Prepare changelog entry for job.yml
-
-### Step 4b: Extract Shared Content into Referenced Files
-
-Review all instruction files for the job and identify content that:
-- Appears in multiple step instructions (duplicated)
-- Is lengthy and could be extracted for clarity
-- Would benefit from being maintained in one place
-
-**Extract to shared files:**
-
-1. **Create shared files** in `.deepwork/jobs/[job_name]/steps/shared/`
-   - `conventions.md` - Coding/formatting conventions used across steps
-   - `examples.md` - Common examples referenced by multiple steps
-   - `schemas.md` - Data structures or formats used throughout
-
-2. **Reference from instructions** using markdown includes or explicit references:
-   ```markdown
-   ## Conventions
-
-   Follow the conventions defined in `shared/conventions.md`.
-   ```
-
-3. **Benefits of extraction:**
-   - Single source of truth - update once, applies everywhere
-   - Shorter instruction files - easier to read and maintain
-   - Consistent guidance across steps
-
-### Step 4.5: Update doc spec Files (doc spec-Related Learnings)
-
-If doc spec-related learnings were identified:
-
-1. **Locate the doc spec file**
-   - Find doc spec references in job.yml outputs (look for `doc_spec: .deepwork/doc_specs/[doc_spec_name].md`)
-   - doc spec files are at `.deepwork/doc_specs/[doc_spec_name].md`
-
-2. **Update quality_criteria array**
-   - Add new criteria with name and description
-   - Modify existing criteria descriptions for clarity
-   - Remove criteria that are no longer relevant
-
-3. **Update example document**
-   - Modify the markdown body to reflect structure changes
-   - Ensure the example matches updated criteria
-
-4. **Update metadata as needed**
-   - target_audience: If audience has changed
-   - frequency: If production cadence has changed
-   - path_patterns: If storage location has changed
-
-**Example doc spec update:**
-```yaml
-# Before
-quality_criteria:
-  - name: Visualization
-    description: Include charts
-
-# After
-quality_criteria:
-  - name: Visualization
-    description: Include Mermaid.js charts showing spend breakdown by service and month-over-month trend
-```
-
-### Step 5: Create/Update AGENTS.md (Bespoke Learnings)
-
-The AGENTS.md file captures project-specific knowledge that helps future agent runs.
-
-1. **Determine the correct location**
-   - Place AGENTS.md in the deepest common folder that would contain all work on the topic in the future
-   - This ensures the knowledge is available when working in that context
-   - If uncertain, place at the project root
-
-2. **Use file references where possible**
-   - Instead of duplicating information, reference source files
-   - This keeps AGENTS.md in sync as the codebase evolves
-   - Pattern: "See `path/to/file.ext` for [description]"
-
-3. **AGENTS.md structure**: See `.deepwork/jobs/deepwork_jobs/templates/agents.md.template` for the standard format.
-
-4. **Writing entries**
-   - Be concise but specific
-   - Always prefer file references over inline content
-   - Use line numbers when referencing specific code: `file.ext:42`
-   - Group related learnings together
-
-### Step 6: Update Job Version and Changelog
-
-If instruction files were modified:
-
-1. **Bump version in job.yml**
-   - Patch version (0.0.x) for instruction improvements
-   - Minor version (0.x.0) if quality criteria changed
-
-2. **Add changelog entry**
-   ```yaml
-   - version: "[new_version]"
-     changes: "Improved [step] instructions based on execution learnings: [brief description]"
-   ```
-
-### Step 7: Sync Skills
-
-**Run deepwork sync** (if instructions were modified)
-```bash
-deepwork sync
-```
-
-## File Reference Patterns
-
-When adding entries to AGENTS.md, prefer these patterns:
-
-**Good (references)**:
-```markdown
-- API endpoints follow REST conventions. See `src/api/routes.ts` for examples.
-- Error handling pattern: See `src/utils/errors.ts:15-30`
-- Configuration schema: Defined in `config/schema.json`
-```
-
-**Avoid (duplicating)**:
-```markdown
-- API endpoints should return JSON with this format: { status: ..., data: ... }
-- Error handling should use try/catch with custom error classes
-- Configuration requires these fields: name, version, ...
-```
-
-## Quality Criteria
-
-- Conversation has been analyzed for job executions
-- Points of confusion and inefficiency are identified
-- Learnings are correctly classified (generalizable vs bespoke)
-- Job instructions updated for generalizable improvements
-- Instructions are concise - no redundancy or unnecessary verbosity
-- Shared/lengthy content extracted into referenced files where appropriate
-- AGENTS.md created/updated with bespoke learnings
-- File references used instead of duplicating content
-- AGENTS.md is in the correct folder (the deepest common folder for the topic)
-- When all criteria are met, include `<promise>✓ Quality Criteria Met</promise>`
-
-## Example Dialog
-
-```
-User: /deepwork_jobs.learn
-
-Claude: I'll analyze this conversation for learnings from DeepWork job executions.
-
-**Conversation Analysis**
-
-I found the following job executions:
-1. `/competitive_research.identify_competitors` - Completed
-2. `/competitive_research.research_competitors` - Completed with 2 iterations
-3. `/competitive_research.comparative_analysis` - Completed
-
-**Identified Issues**
-
-1. **Confusion in step 2**: The instructions didn't specify that competitor research should include pricing information. This caused an extra iteration.
-
-2. **Missing context**: The agent had to ask about the industry focus, which should have been carried forward from step 1.
-
-**Classification**
-
-- Generalizable: Add pricing to research requirements in instructions
-- Bespoke: This project focuses on B2B SaaS in the HR tech space
-
-**Actions Taken**
-
-1. Updated `.deepwork/jobs/competitive_research/steps/research_competitors.md`:
-   - Added "Include pricing information (tiers, models, approximate ranges)" to research requirements
-   - Added note about carrying forward industry context from previous step
-
-2. Created `./competitive_research_output/AGENTS.md`:
-   ```markdown
-   # Project Context for Competitive Research
-
-   ## Job-Specific Context
-
-   ### competitive_research
-
-   #### Industry Focus
-   - Domain: B2B SaaS, HR Technology sector
-   - See `identify_competitors_output.md` for market definition
-
-   #### Competitors Identified
-   - Reference: `competitor_list.md` for full details
-
-   ## Last Updated
-   - Date: 2024-01-15
-   - From conversation about: Initial competitive analysis run
-   ```
-
-3. Updated job.yml version to 1.0.1 with changelog entry
-
-4. Ran `deepwork sync`
-
-**Summary**
-
-Updated job instructions and created AGENTS.md with bespoke learnings.
-```
-
-## Handling Edge Cases
-
-**No job executions found**:
-- Ask user which job they'd like to analyze
-- Or offer to review available jobs
-
-**Multiple jobs executed**:
-- Analyze each job separately
-- Create separate AGENTS.md entries or files as appropriate
-
-**AGENTS.md already exists**:
-- Read existing content
-- Append new learnings to appropriate sections
-- Update "Last Updated" section
-
-**No issues found**:
-- Document what worked well
-- Consider if any successful patterns should be added to instructions as examples
-
-**Sensitive information**:
-- Never include secrets, credentials, or PII in AGENTS.md
-- Reference config files instead of including values
-
-
-### Job Context
-
-Core commands for managing DeepWork jobs. These commands help you define new multi-step
-workflows and learn from running them.
-
-The `new_job` workflow guides you through defining and implementing a new job by
-asking structured questions about your workflow, understanding each step's inputs and outputs,
-reviewing the specification, and generating all necessary files.
-
-The `learn` skill reflects on conversations where DeepWork jobs were run, identifies
-confusion or inefficiencies, and improves job instructions. It also captures bespoke
-learnings specific to the current run into AGENTS.md files in the working folder.
-
-
-## Required Inputs
-
-**User Parameters** - Gather from user before starting:
-- **job_name**: Name of the job that was run (optional - will auto-detect from conversation)
-
-
-## Work Branch
-
-Use branch format: `deepwork/deepwork_jobs-[instance]-YYYYMMDD`
-
-- If on a matching work branch: continue using it
-- If on main/master: create new branch with `git checkout -b deepwork/deepwork_jobs-[instance]-$(date +%Y%m%d)`
-
-## Outputs
-
-**Required outputs**:
-- `AGENTS.md`
-
-## Quality Validation (Manual)
-
-**NOTE**: Gemini CLI does not support automated validation. Manually verify criteria before completing.
-
-**Criteria (all must be satisfied)**:
-1. **Conversation Analyzed**: Did the agent review the conversation for DeepWork job executions?
-2. **Confusion Identified**: Did the agent identify points of confusion, errors, or inefficiencies?
-3. **Instructions Improved**: Were job instructions updated to address identified issues?
-4. **Instructions Concise**: Are instructions free of redundancy and unnecessary verbosity?
-5. **Shared Content Extracted**: Is lengthy/duplicated content extracted into referenced files?
-6. **doc spec Reviewed (if applicable)**: For jobs with doc spec outputs, were doc spec-related learnings identified?
-7. **doc spec Updated (if applicable)**: Were doc spec files updated with improved quality criteria or structure?
-8. **Bespoke Learnings Captured**: Were run-specific learnings added to AGENTS.md?
-9. **File References Used**: Do AGENTS.md entries reference other files where appropriate?
-10. **Working Folder Correct**: Is AGENTS.md in the correct working folder for the job?
-11. **Generalizable Separated**: Are generalizable improvements in instructions, not AGENTS.md?
-12. **Sync Complete**: Has `deepwork sync` been run if instructions were modified?
-## On Completion
-
-1. Verify outputs are created
-2. Inform user: "learn complete, outputs: AGENTS.md"
-
-This standalone command can be re-run anytime.
-
----
-
-**Reference files**: `.deepwork/jobs/deepwork_jobs/job.yml`, `.deepwork/jobs/deepwork_jobs/steps/learn.md`
-"""
\ No newline at end of file
diff --git a/.gemini/skills/deepwork_jobs/review_job_spec.toml b/.gemini/skills/deepwork_jobs/review_job_spec.toml
deleted file mode 100644
index 265eb151..00000000
--- a/.gemini/skills/deepwork_jobs/review_job_spec.toml
+++ /dev/null
@@ -1,300 +0,0 @@
-# deepwork_jobs:review_job_spec
-#
-# Reviews job.yml against quality criteria using a sub-agent for unbiased validation. Use after defining a job specification.
-#
-# Generated by DeepWork - do not edit manually
-
-description = "Reviews job.yml against quality criteria using a sub-agent for unbiased validation. Use after defining a job specification."
-
-prompt = """
-# deepwork_jobs:review_job_spec
-
-**Step 2/4** in **deepwork_jobs** workflow
-
-> Creates and manages multi-step AI workflows. Use when defining, implementing, or improving DeepWork jobs.
-
-## Prerequisites (Verify First)
-
-Before proceeding, confirm these steps are complete:
-- `/deepwork_jobs:define`
-
-## Instructions
-
-**Goal**: Reviews job.yml against quality criteria using a sub-agent for unbiased validation. Use after defining a job specification.
-
-# Review Job Specification
-
-## Objective
-
-Review the `job.yml` created in the define step against the doc spec quality criteria using a sub-agent for unbiased evaluation, then iterate on fixes until all criteria pass.
-
-## Why This Step Exists
-
-The define step focuses on understanding user requirements and creating a job specification. This review step ensures the specification meets quality standards before implementation. Using a sub-agent provides an unbiased "fresh eyes" review that catches issues the main agent might miss after being deeply involved in the definition process.
-
-## Task
-
-Use a sub-agent to review the job.yml against all 9 doc spec quality criteria, then fix any failed criteria. Repeat until all criteria pass.
-
-### Step 1: Read the Job Specification
-
-Read the `job.yml` file created in the define step:
-
-```
-.deepwork/jobs/[job_name]/job.yml
-```
-
-Also read the doc spec for reference:
-
-```
-.deepwork/doc_specs/job_spec.md
-```
-
-### Step 2: Spawn Review Sub-Agent
-
-Use the Task tool to spawn a sub-agent that will provide an unbiased review:
-
-```
-Task tool parameters:
-- subagent_type: "general-purpose"
-- model: "haiku"
-- description: "Review job.yml against doc spec"
-- prompt: [see below]
-```
-
-**Sub-agent prompt template:**
-
-```
-Review this job.yml against the following 9 quality criteria from the doc spec.
-
-For each criterion, respond with:
-- PASS or FAIL
-- If FAIL: specific issue and suggested fix
-
-## job.yml Content
-
-[paste the full job.yml content here]
-
-## Quality Criteria
-
-1. **Valid Identifier**: Job name must be lowercase with underscores, no spaces or special characters (e.g., `competitive_research`, `monthly_report`)
-
-2. **Semantic Version**: Version must follow semantic versioning format X.Y.Z (e.g., `1.0.0`, `2.1.3`)
-
-3. **Concise Summary**: Summary must be under 200 characters and clearly describe what the job accomplishes
-
-4. **Rich Description**: Description must be multi-line and explain: the problem solved, the process, expected outcomes, and target users
-
-5. **Changelog Present**: Must include a changelog array with at least the initial version entry
-
-6. **Complete Steps**: Each step must have: id (lowercase_underscores), name, description, instructions_file, outputs (at least one), and dependencies array
-
-7. **Valid Dependencies**: Dependencies must reference existing step IDs with no circular references
-
-8. **Input Consistency**: File inputs with `from_step` must reference a step that is in the dependencies array
-
-9. **Output Paths**: Outputs must be valid filenames or paths (e.g., `report.md` or `reports/analysis.md`)
-
-## Response Format
-
-Respond with a structured evaluation:
-
-### Overall: [X/9 PASS]
-
-### Criterion Results
-
-1. Valid Identifier: [PASS/FAIL]
-   [If FAIL: Issue and fix]
-
-2. Semantic Version: [PASS/FAIL]
-   [If FAIL: Issue and fix]
-
-[... continue for all 9 criteria ...]
-
-### Summary of Required Fixes
-
-[List any fixes needed, or "No fixes required - all criteria pass"]
-```
-
-### Step 3: Review Sub-Agent Findings
-
-Parse the sub-agent's response:
-
-1. **Count passing criteria** - How many of the 9 criteria passed?
-2. **Identify failures** - List specific criteria that failed
-3. **Note suggested fixes** - What changes does the sub-agent recommend?
-
-### Step 4: Fix Failed Criteria
-
-For each failed criterion, edit the job.yml to address the issue:
-
-**Common fixes by criterion:**
-
-| Criterion | Common Issue | Fix |
-|-----------|-------------|-----|
-| Valid Identifier | Spaces or uppercase | Convert to lowercase_underscores |
-| Semantic Version | Missing or invalid format | Set to `"1.0.0"` or fix format |
-| Concise Summary | Too long or vague | Shorten to <200 chars, be specific |
-| Rich Description | Single line or missing context | Add multi-line explanation with problem/process/outcome/users |
-| Changelog Present | Missing changelog | Add `changelog:` with initial version entry |
-| Complete Steps | Missing required fields | Add id, name, description, instructions_file, outputs, dependencies |
-| Valid Dependencies | Non-existent step or circular | Fix step ID reference or reorder dependencies |
-| Input Consistency | from_step not in dependencies | Add the referenced step to dependencies array |
-| Output Paths | Invalid characters or format | Use valid filename/path format |
-
-### Step 5: Re-Run Review (If Needed)
-
-If any criteria failed:
-
-1. **Spawn a new sub-agent** with the updated job.yml content
-2. **Review the new findings**
-3. **Fix any remaining issues**
-4. **Repeat until all 9 criteria pass**
-
-### Step 6: Confirm Completion
-
-When all 9 criteria pass:
-
-1. **Announce success**: "All 9 doc spec quality criteria pass."
-2. **List what was validated**:
-   - Valid Identifier
-   - Semantic Version
-   - Concise Summary
-   - Rich Description
-   - Changelog Present
-   - Complete Steps
-   - Valid Dependencies
-   - Input Consistency
-   - Output Paths
-3. **Include the promise**: `<promise>Quality Criteria Met</promise>`
-4. **Guide to next step**: "Run `/deepwork_jobs.implement` to generate the step instruction files."
-
-## Example Review Cycle
-
-```
-Claude: Let me review the job.yml against the doc spec quality criteria.
-
-[Spawns sub-agent with haiku model]
-
-Sub-agent response:
-### Overall: 7/9 PASS
-
-### Criterion Results
-1. Valid Identifier: PASS
-2. Semantic Version: PASS
-3. Concise Summary: FAIL
-   Issue: Summary is 245 characters, exceeds 200 char limit
-   Fix: Shorten to "Competitive analysis workflow for product positioning"
-4. Rich Description: PASS
-5. Changelog Present: FAIL
-   Issue: No changelog array present
-   Fix: Add changelog with initial version entry
-6. Complete Steps: PASS
-7. Valid Dependencies: PASS
-8. Input Consistency: PASS
-9. Output Paths: PASS
-
-### Summary of Required Fixes
-1. Shorten summary to under 200 characters
-2. Add changelog array with initial version
-
-Claude: I found 2 issues. Let me fix them.
-
-[Edits job.yml to shorten summary and add changelog]
-
-Claude: Let me verify the fixes with another review.
-
-[Spawns new sub-agent]
-
-Sub-agent response:
-### Overall: 9/9 PASS
-
-### Summary of Required Fixes
-No fixes required - all criteria pass
-
-Claude: All 9 doc spec quality criteria pass.
-
-<promise>Quality Criteria Met</promise>
-
-**Next step:** Run `/deepwork_jobs.implement` to generate the step instruction files.
-```
-
-## Quality Criteria
-
-- **Sub-Agent Used**: A sub-agent was spawned to provide unbiased review (not just self-review)
-- **All doc spec Criteria Evaluated**: The sub-agent assessed all 9 quality criteria from the doc spec
-- **Findings Addressed**: All failed criteria were fixed by the main agent
-- **Validation Loop Complete**: The review-fix cycle continued until all criteria passed
-- **Promise Included**: The response includes `<promise>Quality Criteria Met</promise>` when complete
-
-## Output
-
-The validated `job.yml` file at `.deepwork/jobs/[job_name]/job.yml` that passes all 9 doc spec quality criteria.
-
-
-### Job Context
-
-Core commands for managing DeepWork jobs. These commands help you define new multi-step
-workflows and learn from running them.
-
-The `new_job` workflow guides you through defining and implementing a new job by
-asking structured questions about your workflow, understanding each step's inputs and outputs,
-reviewing the specification, and generating all necessary files.
-
-The `learn` skill reflects on conversations where DeepWork jobs were run, identifies
-confusion or inefficiencies, and improves job instructions. It also captures bespoke
-learnings specific to the current run into AGENTS.md files in the working folder.
-
-
-## Required Inputs
-
-
-**Files from Previous Steps** - Read these first:
-- `job.yml` (from `define`)
-
-## Work Branch
-
-Use branch format: `deepwork/deepwork_jobs-[instance]-YYYYMMDD`
-
-- If on a matching work branch: continue using it
-- If on main/master: create new branch with `git checkout -b deepwork/deepwork_jobs-[instance]-$(date +%Y%m%d)`
-
-## Outputs
-
-**Required outputs**:
-- `job.yml`
-  **Doc Spec**: DeepWork Job Specification
-  > YAML specification file that defines a multi-step workflow job for AI agents
-  **Definition**: `.deepwork/doc_specs/job_spec.md`
-  **Target Audience**: AI agents executing jobs and developers defining workflows
-  **Quality Criteria**:
-  1. **Valid Identifier**: Job name must be lowercase with underscores, no spaces or special characters (e.g., `competitive_research`, `monthly_report`)
-  2. **Semantic Version**: Version must follow semantic versioning format X.Y.Z (e.g., `1.0.0`, `2.1.3`)
-  3. **Concise Summary**: Summary must be under 200 characters and clearly describe what the job accomplishes
-  4. **Rich Description**: Description must be multi-line and explain: the problem solved, the process, expected outcomes, and target users
-  5. **Changelog Present**: Must include a changelog array with at least the initial version entry. Changelog should only include one entry per branch at most
-  6. **Complete Steps**: Each step must have: id (lowercase_underscores), name, description, instructions_file, outputs (at least one), and dependencies array
-  7. **Valid Dependencies**: Dependencies must reference existing step IDs with no circular references
-  8. **Input Consistency**: File inputs with `from_step` must reference a step that is in the dependencies array
-  9. **Output Paths**: Outputs must be valid filenames or paths within the main repo directory structure, never in dot-directories like `.deepwork/`. Use specific, descriptive paths that lend themselves to glob patterns (e.g., `competitive_research/acme_corp/swot.md` or `operations/reports/2026-01/spending_analysis.md`). Parameterized paths like `[competitor_name]/` are encouraged for per-entity outputs. Avoid generic names (`output.md`, `analysis.md`) and transient-sounding paths (`temp/`, `draft.md`). Supporting materials for a final output should go in a peer `_dataroom` folder (e.g., `spending_analysis_dataroom/`).
-  10. **Concise Instructions**: The content of the file, particularly the description, must not have excessively redundant information. It should be concise and to the point given that extra tokens will confuse the AI.
-
-## Quality Validation (Manual)
-
-**NOTE**: Gemini CLI does not support automated validation. Manually verify criteria before completing.
-
-**Criteria (all must be satisfied)**:
-1. **Sub-Agent Used**: Was a sub-agent spawned to provide unbiased review?
-2. **All doc spec Criteria Evaluated**: Did the sub-agent assess all 9 quality criteria?
-3. **Findings Addressed**: Were all failed criteria addressed by the main agent?
-4. **Validation Loop Complete**: Did the review-fix cycle continue until all criteria passed?
-## On Completion
-
-1. Verify outputs are created
-2. Inform user: "Step 2/4 complete, outputs: job.yml"
-3. **Tell user next command**: `/deepwork_jobs:implement`
-
----
-
-**Reference files**: `.deepwork/jobs/deepwork_jobs/job.yml`, `.deepwork/jobs/deepwork_jobs/steps/review_job_spec.md`
-"""
\ No newline at end of file
diff --git a/.gemini/skills/update/index.toml b/.gemini/skills/update/index.toml
deleted file mode 100644
index fd38a15e..00000000
--- a/.gemini/skills/update/index.toml
+++ /dev/null
@@ -1,63 +0,0 @@
-# update
-#
-# Updates DeepWork standard jobs in src/ and syncs to installed locations. Use when modifying deepwork_jobs.
-#
-# Generated by DeepWork - do not edit manually
-
-description = "Updates DeepWork standard jobs in src/ and syncs to installed locations. Use when modifying deepwork_jobs."
-
-prompt = """
-# update
-
-**Multi-step workflow**: Updates DeepWork standard jobs in src/ and syncs to installed locations. Use when modifying deepwork_jobs.
-
-> **NOTE**: Gemini CLI requires manual command invocation. After each step, tell the user which command to run next.
-
-A workflow for maintaining standard jobs bundled with DeepWork. Standard jobs
-(like `deepwork_jobs`) are source-controlled in
-`src/deepwork/standard_jobs/` and must be edited there—never in `.deepwork/jobs/`
-or `.claude/commands/` directly.
-
-This job guides you through:
-1. Identifying which standard job(s) to update from conversation context
-2. Making changes in the correct source location (`src/deepwork/standard_jobs/[job_name]/`)
-3. Running `deepwork install` to propagate changes to `.deepwork/` and command directories
-4. Verifying the sync completed successfully
-
-Use this job whenever you need to modify job.yml files, step instructions, or hooks
-for any standard job in the DeepWork repository.
-
-
-## Available Steps
-
-1. **job** - Edits standard job source files in src/ and runs deepwork install to sync changes. Use when updating job.yml or step instructions.
-   Command: `/update:job`
-
-## Execution Instructions
-
-### Step 1: Analyze Intent
-
-Parse any text following `/update` to determine user intent:
-- "job" or related terms → start at `/update:job`
-
-### Step 2: Direct User to Starting Step
-
-Tell the user which command to run:
-```
-/update:job
-```
-
-### Step 3: Guide Through Workflow
-
-After each step completes, tell the user the next command to run until workflow is complete.
-
-### Handling Ambiguous Intent
-
-If user intent is unclear:
-- Present available steps as numbered options
-- Ask user to select the starting point
-
-## Reference
-
-- Job definition: `.deepwork/jobs/update/job.yml`
-"""
\ No newline at end of file
diff --git a/.gemini/skills/update/job.toml b/.gemini/skills/update/job.toml
deleted file mode 100644
index 7ab6a71b..00000000
--- a/.gemini/skills/update/job.toml
+++ /dev/null
@@ -1,141 +0,0 @@
-# update:job
-#
-# Edits standard job source files in src/ and runs deepwork install to sync changes. Use when updating job.yml or step instructions.
-#
-# Generated by DeepWork - do not edit manually
-
-description = "Edits standard job source files in src/ and runs deepwork install to sync changes. Use when updating job.yml or step instructions."
-
-prompt = """
-# update:job
-
-**Standalone command** - can be run anytime
-
-> Updates DeepWork standard jobs in src/ and syncs to installed locations. Use when modifying deepwork_jobs.
-
-
-## Instructions
-
-**Goal**: Edits standard job source files in src/ and runs deepwork install to sync changes. Use when updating job.yml or step instructions.
-
-# Update Standard Job
-
-## Objective
-
-Edit standard job source files in `src/deepwork/standard_jobs/` and sync changes to installed locations.
-
-## Task
-
-When modifying a standard job in the DeepWork repository, this step ensures changes are made in the correct location and properly propagated.
-
-### Important: Source of Truth
-
-Standard jobs exist in THREE locations, but only ONE is the source of truth:
-
-| Location | Purpose | Editable? |
-|----------|---------|-----------|
-| `src/deepwork/standard_jobs/[job]/` | **Source of truth** | **YES** |
-| `.deepwork/jobs/[job]/` | Installed copy | NO - overwritten by install |
-| `.claude/commands/[job].[step].md` | Generated commands | NO - regenerated by sync |
-
-**NEVER edit files in `.deepwork/jobs/` or `.claude/commands/` for standard jobs!**
-
-### Process
-
-#### 1. Identify the Standard Job to Update
-
-From conversation context, determine:
-- Which standard job needs updating (e.g., `deepwork_jobs`, `deepwork_rules`)
-- What changes are needed (job.yml, step instructions, hooks, etc.)
-
-Current standard jobs:
-```bash
-ls src/deepwork/standard_jobs/
-```
-
-#### 2. Make Changes in Source Location
-
-```
-src/deepwork/standard_jobs/[job_name]/
-├── job.yml              # Job definition
-├── steps/               # Step instruction files
-├── hooks/               # Hook scripts
-└── templates/           # Templates
-```
-
-#### 3. Run DeepWork Install
-
-```bash
-deepwork install --platform claude
-```
-
-For Gemini: `deepwork install --platform gemini`
-
-#### 4. Verify the Sync
-
-```bash
-# Verify job.yml
-diff src/deepwork/standard_jobs/[job_name]/job.yml .deepwork/jobs/[job_name]/job.yml
-
-# Verify step files
-diff -r src/deepwork/standard_jobs/[job_name]/steps/ .deepwork/jobs/[job_name]/steps/
-
-# Check commands regenerated
-ls -la .claude/commands/[job_name].*.md
-```
-
-## Quality Criteria
-
-- Changes made ONLY in `src/deepwork/standard_jobs/[job_name]/`
-- `deepwork install --platform claude` executed successfully
-- Files in `.deepwork/jobs/` match source
-- Command files regenerated
-- When all criteria are met, include `<promise>✓ Quality Criteria Met</promise>`
-
-
-### Job Context
-
-A workflow for maintaining standard jobs bundled with DeepWork. Standard jobs
-(like `deepwork_jobs`) are source-controlled in
-`src/deepwork/standard_jobs/` and must be edited there—never in `.deepwork/jobs/`
-or `.claude/commands/` directly.
-
-This job guides you through:
-1. Identifying which standard job(s) to update from conversation context
-2. Making changes in the correct source location (`src/deepwork/standard_jobs/[job_name]/`)
-3. Running `deepwork install` to propagate changes to `.deepwork/` and command directories
-4. Verifying the sync completed successfully
-
-Use this job whenever you need to modify job.yml files, step instructions, or hooks
-for any standard job in the DeepWork repository.
-
-
-## Required Inputs
-
-**User Parameters** - Gather from user before starting:
-- **job_context**: Determine from conversation context which standard job(s) to update and what changes are needed
-
-
-## Work Branch
-
-Use branch format: `deepwork/update-[instance]-YYYYMMDD`
-
-- If on a matching work branch: continue using it
-- If on main/master: create new branch with `git checkout -b deepwork/update-[instance]-$(date +%Y%m%d)`
-
-## Outputs
-
-**Required outputs**:
-- `files_synced`
-
-## On Completion
-
-1. Verify outputs are created
-2. Inform user: "job complete, outputs: files_synced"
-
-This standalone command can be re-run anytime.
-
----
-
-**Reference files**: `.deepwork/jobs/update/job.yml`, `.deepwork/jobs/update/steps/job.md`
-"""
\ No newline at end of file
diff --git a/.mcp.json b/.mcp.json
new file mode 100644
index 00000000..1c40877f
--- /dev/null
+++ b/.mcp.json
@@ -0,0 +1,12 @@
+{
+  "mcpServers": {
+    "deepwork": {
+      "command": "/Users/noah/Documents/GitHub/deep-work/.venv/bin/deepwork",
+      "args": [
+        "serve",
+        "--path",
+        "."
+      ]
+    }
+  }
+}
\ No newline at end of file
diff --git a/src/deepwork/core/adapters.py b/src/deepwork/core/adapters.py
index e0a3f101..ea401924 100644
--- a/src/deepwork/core/adapters.py
+++ b/src/deepwork/core/adapters.py
@@ -3,6 +3,8 @@
 from __future__ import annotations
 
 import json
+import shutil
+import sys
 from abc import ABC, abstractmethod
 from enum import Enum
 from pathlib import Path
@@ -564,46 +566,72 @@ def _extract_skill_name(self, skill_path: Path) -> str | None:
 
     def register_mcp_server(self, project_path: Path) -> bool:
         """
-        Register the DeepWork MCP server in Claude Code settings.json.
+        Register the DeepWork MCP server in .mcp.json at project root.
 
-        Adds the mcpServers configuration for DeepWork:
-        {
-          "mcpServers": {
-            "deepwork": {
-              "command": "deepwork",
-              "args": ["serve", "--path", "."],
-              "transport": "stdio"
-            }
-          }
-        }
+        Claude Code reads MCP server configurations from .mcp.json (project scope),
+        not from settings.json. This method detects the full path to the deepwork
+        executable to ensure the MCP server can be invoked regardless of PATH
+        configuration when Claude Code starts.
 
         Args:
             project_path: Path to project root
 
         Returns:
-            True if server was registered, False if already registered
+            True if server was registered or updated, False if no changes needed
 
         Raises:
             AdapterError: If registration fails
         """
-        settings = self._load_settings(project_path)
+        mcp_file = project_path / ".mcp.json"
+
+        # Load existing .mcp.json or create new
+        existing_config: dict[str, Any] = {}
+        if mcp_file.exists():
+            try:
+                with open(mcp_file, encoding="utf-8") as f:
+                    existing_config = json.load(f)
+            except (json.JSONDecodeError, OSError) as e:
+                raise AdapterError(f"Failed to read .mcp.json: {e}") from e
 
         # Initialize mcpServers if not present
-        if "mcpServers" not in settings:
-            settings["mcpServers"] = {}
+        if "mcpServers" not in existing_config:
+            existing_config["mcpServers"] = {}
+
+        # Build the new MCP server config
+        deepwork_path = shutil.which("deepwork")
 
-        # Check if already registered
-        if "deepwork" in settings["mcpServers"]:
+        if deepwork_path:
+            # Use the absolute path to deepwork
+            new_server_config = {
+                "command": deepwork_path,
+                "args": ["serve", "--path", "."],
+            }
+        else:
+            # Fallback: use Python module invocation
+            # This works when deepwork is installed in the current Python environment
+            new_server_config = {
+                "command": sys.executable,
+                "args": ["-m", "deepwork.cli.main", "serve", "--path", "."],
+            }
+
+        # Check if already registered with same config
+        existing_server = existing_config["mcpServers"].get("deepwork", {})
+        if (
+            existing_server.get("command") == new_server_config["command"]
+            and existing_server.get("args") == new_server_config["args"]
+        ):
             return False
 
-        # Register the DeepWork MCP server
-        settings["mcpServers"]["deepwork"] = {
-            "command": "deepwork",
-            "args": ["serve", "--path", "."],
-            "transport": "stdio",
-        }
+        # Register or update the DeepWork MCP server
+        existing_config["mcpServers"]["deepwork"] = new_server_config
+
+        # Write .mcp.json
+        try:
+            with open(mcp_file, "w", encoding="utf-8") as f:
+                json.dump(existing_config, f, indent=2)
+        except OSError as e:
+            raise AdapterError(f"Failed to write .mcp.json: {e}") from e
 
-        self._save_settings(project_path, settings)
         return True
 
 

From a3718ef4adc6e48ebb155c895fb56a5452ecbf33 Mon Sep 17 00:00:00 2001
From: Noah Horton <noah@unsupervised.com>
Date: Tue, 3 Feb 2026 16:34:08 -0700
Subject: [PATCH 04/45] feat: Refactor quality gate with configurable agent and
 improved prompts

- Add configurable quality_gate settings to config.yml (agent_review_command,
  default_timeout, default_max_attempts)
- Update installer to create quality_gate config section with defaults
- Refactor QualityGate to separate system instructions from user payload
- Use -s flag to pass instructions as system prompt to review agent
- Change file separator format to 20 dashes for clearer delineation
- Remove step_instructions from QualityGate interface (not useful for review)
- Add quality_review_override_reason to finished_step to skip quality gate
- Add JSON schema validation for quality gate responses
- Add comprehensive integration tests with mock review agent subprocess
- Remove block_bash_with_instructions hook (commit skill not available)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
---
 .claude/hooks/block_bash_with_instructions.sh |  74 ---
 .claude/settings.json                         |  11 -
 .deepwork/config.yml                          |   4 +
 src/deepwork/cli/install.py                   |   8 +
 src/deepwork/cli/serve.py                     |  13 +-
 src/deepwork/mcp/quality_gate.py              | 169 ++++--
 src/deepwork/mcp/schemas.py                   |   4 +
 src/deepwork/mcp/server.py                    |  20 +-
 src/deepwork/mcp/tools.py                     |  18 +-
 tests/fixtures/mock_review_agent.py           | 153 +++++
 .../test_quality_gate_integration.py          | 544 ++++++++++++++++++
 .../test_block_bash_with_instructions.py      | 237 --------
 tests/unit/mcp/test_quality_gate.py           |  49 +-
 tests/unit/mcp/test_tools.py                  |  34 ++
 14 files changed, 919 insertions(+), 419 deletions(-)
 delete mode 100755 .claude/hooks/block_bash_with_instructions.sh
 create mode 100755 tests/fixtures/mock_review_agent.py
 create mode 100644 tests/integration/test_quality_gate_integration.py
 delete mode 100644 tests/shell_script_tests/test_block_bash_with_instructions.py

diff --git a/.claude/hooks/block_bash_with_instructions.sh b/.claude/hooks/block_bash_with_instructions.sh
deleted file mode 100755
index 7bd16f88..00000000
--- a/.claude/hooks/block_bash_with_instructions.sh
+++ /dev/null
@@ -1,74 +0,0 @@
-#!/bin/bash
-# block_bash_with_instructions.sh - Blocks specific bash commands and provides alternative instructions
-#
-# This hook intercepts Bash tool use calls and blocks commands that match
-# specific patterns, providing alternative instructions to the agent.
-#
-# Usage: Registered as a PreToolUse hook in .claude/settings.json
-#
-# Input (stdin): JSON from Claude Code hook system containing tool_name and tool_input
-# Output (stderr): Error message if blocked (Claude Code reads stderr for exit code 2)
-# Exit codes:
-#   0 - Success (allow action)
-#   2 - Blocking error (prevent action with message)
-
-set -e
-
-# =============================================================================
-# BLOCKED COMMANDS CONFIGURATION
-# =============================================================================
-# Format: Each entry is a regex pattern followed by a delimiter (|||) and instructions
-# The regex is matched against the full bash command
-# Add new blocked commands here:
-
-BLOCKED_COMMANDS=(
-    '^[[:space:]]*git[[:space:]]+commit|||All commits must be done via the `/commit` skill. Do not use git commit directly. Instead, run `/commit` to start the commit workflow which includes code review, testing, and linting before committing.'
-)
-
-# =============================================================================
-# HOOK LOGIC - DO NOT MODIFY BELOW UNLESS NECESSARY
-# =============================================================================
-
-# Read stdin into variable
-HOOK_INPUT=""
-if [ ! -t 0 ]; then
-    HOOK_INPUT=$(cat)
-fi
-
-# Exit early if no input
-if [ -z "${HOOK_INPUT}" ]; then
-    exit 0
-fi
-
-# Extract tool_name from input
-TOOL_NAME=$(echo "${HOOK_INPUT}" | jq -r '.tool_name // empty' 2>/dev/null)
-
-# Only process Bash tool calls
-if [ "${TOOL_NAME}" != "Bash" ]; then
-    exit 0
-fi
-
-# Extract the command from tool_input
-COMMAND=$(echo "${HOOK_INPUT}" | jq -r '.tool_input.command // empty' 2>/dev/null)
-
-# Exit if no command
-if [ -z "${COMMAND}" ]; then
-    exit 0
-fi
-
-# Check each blocked pattern
-for entry in "${BLOCKED_COMMANDS[@]}"; do
-    # Split entry by delimiter
-    pattern="${entry%%|||*}"
-    instructions="${entry##*|||}"
-
-    # Check if command matches pattern (using extended regex)
-    if echo "${COMMAND}" | grep -qE "${pattern}"; then
-        # Output error message to stderr (Claude Code reads stderr for exit code 2)
-        echo "${instructions}" >&2
-        exit 2
-    fi
-done
-
-# Command is allowed
-exit 0
diff --git a/.claude/settings.json b/.claude/settings.json
index 36dc7bc8..d84958d8 100644
--- a/.claude/settings.json
+++ b/.claude/settings.json
@@ -126,17 +126,6 @@
     ]
   },
   "hooks": {
-    "PreToolUse": [
-      {
-        "matcher": "Bash",
-        "hooks": [
-          {
-            "type": "command",
-            "command": ".claude/hooks/block_bash_with_instructions.sh"
-          }
-        ]
-      }
-    ],
     "SessionStart": [
       {
         "matcher": "",
diff --git a/.deepwork/config.yml b/.deepwork/config.yml
index 9de79eea..7187cc6f 100644
--- a/.deepwork/config.yml
+++ b/.deepwork/config.yml
@@ -2,3 +2,7 @@ version: 0.1.0
 platforms:
 - claude
 - gemini
+quality_gate:
+  agent_review_command: "claude -p --output-format json"
+  default_timeout: 120
+  default_max_attempts: 3
diff --git a/src/deepwork/cli/install.py b/src/deepwork/cli/install.py
index 25ae2597..9fb1de10 100644
--- a/src/deepwork/cli/install.py
+++ b/src/deepwork/cli/install.py
@@ -284,6 +284,14 @@ def _install_deepwork(platform_name: str | None, project_path: Path) -> None:
     if "platforms" not in config_data:
         config_data["platforms"] = []
 
+    # Initialize quality_gate config with defaults
+    if "quality_gate" not in config_data:
+        config_data["quality_gate"] = {
+            "agent_review_command": "claude -p --output-format json",
+            "default_timeout": 120,
+            "default_max_attempts": 3,
+        }
+
     # Add each platform if not already present
     added_platforms: list[str] = []
     for i, platform in enumerate(platforms_to_add):
diff --git a/src/deepwork/cli/serve.py b/src/deepwork/cli/serve.py
index 0a74b0a9..5e3dae3c 100644
--- a/src/deepwork/cli/serve.py
+++ b/src/deepwork/cli/serve.py
@@ -119,18 +119,25 @@ def _serve_mcp(
     # Validate project has DeepWork installed
     _load_config(project_path)
 
-    # Load quality gate from config if not specified
+    # Load quality gate settings from config if not specified via CLI
+    config = _load_config(project_path)
+    qg_config = config.get("quality_gate", {})
+
     if quality_gate_command is None:
-        config = _load_config(project_path)
-        qg_config = config.get("quality_gate", {})
         quality_gate_command = qg_config.get("agent_review_command")
 
+    # Get timeout and max_attempts from config (with defaults)
+    quality_gate_timeout = qg_config.get("default_timeout", 120)
+    quality_gate_max_attempts = qg_config.get("default_max_attempts", 3)
+
     # Create and run server
     from deepwork.mcp.server import create_server
 
     server = create_server(
         project_root=project_path,
         quality_gate_command=quality_gate_command,
+        quality_gate_timeout=quality_gate_timeout,
+        quality_gate_max_attempts=quality_gate_max_attempts,
     )
 
     if transport == "stdio":
diff --git a/src/deepwork/mcp/quality_gate.py b/src/deepwork/mcp/quality_gate.py
index 17a13bba..f0ee7f43 100644
--- a/src/deepwork/mcp/quality_gate.py
+++ b/src/deepwork/mcp/quality_gate.py
@@ -7,12 +7,42 @@
 from __future__ import annotations
 
 import json
+import shlex
 import subprocess
 from pathlib import Path
+from typing import Any
+
+import jsonschema
 
 from deepwork.mcp.schemas import QualityCriteriaResult, QualityGateResult
 
 
+# JSON Schema for quality gate response validation
+QUALITY_GATE_RESPONSE_SCHEMA: dict[str, Any] = {
+    "type": "object",
+    "required": ["passed", "feedback"],
+    "properties": {
+        "passed": {"type": "boolean"},
+        "feedback": {"type": "string"},
+        "criteria_results": {
+            "type": "array",
+            "items": {
+                "type": "object",
+                "required": ["criterion", "passed"],
+                "properties": {
+                    "criterion": {"type": "string"},
+                    "passed": {"type": "boolean"},
+                    "feedback": {"type": ["string", "null"]},
+                },
+            },
+        },
+    },
+}
+
+# File separator format: 20 dashes, filename, 20 dashes
+FILE_SEPARATOR = "-" * 20
+
+
 class QualityGateError(Exception):
     """Exception raised for quality gate errors."""
 
@@ -34,70 +64,36 @@ def __init__(
         """Initialize quality gate.
 
         Args:
-            command: Command to invoke review agent (receives prompt via stdin)
+            command: Base command to invoke review agent (system prompt added via -s flag)
             timeout: Timeout in seconds for review agent
         """
         self.command = command
         self.timeout = timeout
 
-    def _build_review_prompt(
-        self,
-        step_instructions: str,
-        quality_criteria: list[str],
-        outputs: list[str],
-        project_root: Path,
-    ) -> str:
-        """Build the prompt for the review agent.
+    def _build_instructions(self, quality_criteria: list[str]) -> str:
+        """Build the system instructions for the review agent.
 
         Args:
-            step_instructions: The step's instruction content
             quality_criteria: List of quality criteria to evaluate
-            outputs: List of output file paths
-            project_root: Project root path for reading files
 
         Returns:
-            Formatted review prompt
+            System instructions string
         """
-        # Read output file contents
-        output_contents: list[str] = []
-        for output_path in outputs:
-            full_path = project_root / output_path
-            if full_path.exists():
-                try:
-                    content = full_path.read_text(encoding="utf-8")
-                    output_contents.append(f"### {output_path}\n```\n{content}\n```")
-                except Exception as e:
-                    output_contents.append(f"### {output_path}\nError reading file: {e}")
-            else:
-                output_contents.append(f"### {output_path}\nFile not found")
-
-        outputs_text = "\n\n".join(output_contents) if output_contents else "No outputs provided"
-
         criteria_list = "\n".join(f"- {c}" for c in quality_criteria)
 
-        return f"""You are a quality gate reviewer for a workflow step. Evaluate the outputs against the quality criteria.
-
-## Step Instructions
-
-{step_instructions}
+        return f"""You are a quality gate reviewer. Your job is to evaluate whether outputs meet the specified quality criteria.
 
-## Quality Criteria
+## Quality Criteria to Evaluate
 
 {criteria_list}
 
-## Outputs to Review
+## Response Format
 
-{outputs_text}
-
-## Your Task
-
-Evaluate each output against the quality criteria. For each criterion, determine if it passes or fails.
-
-Return your evaluation as JSON with this exact structure:
+You must respond with JSON in this exact structure:
 ```json
 {{
   "passed": true/false,
-  "feedback": "Brief overall summary",
+  "feedback": "Brief overall summary of evaluation",
   "criteria_results": [
     {{
       "criterion": "The criterion text",
@@ -108,20 +104,61 @@ def _build_review_prompt(
 }}
 ```
 
-Be strict but fair. Only mark as passed if the criterion is clearly met.
-"""
+## Guidelines
+
+- Be strict but fair
+- Only mark a criterion as passed if it is clearly met
+- Provide specific, actionable feedback for failed criteria
+- The overall "passed" should be true only if ALL criteria pass"""
+
+    def _build_payload(
+        self,
+        outputs: list[str],
+        project_root: Path,
+    ) -> str:
+        """Build the user prompt payload with file contents.
+
+        Args:
+            outputs: List of output file paths
+            project_root: Project root path for reading files
+
+        Returns:
+            Formatted payload with file contents
+        """
+        output_sections: list[str] = []
+
+        for output_path in outputs:
+            full_path = project_root / output_path
+            header = f"{FILE_SEPARATOR} {output_path} {FILE_SEPARATOR}"
 
-    def _parse_response(self, response_text: str) -> QualityGateResult:
+            if full_path.exists():
+                try:
+                    content = full_path.read_text(encoding="utf-8")
+                    output_sections.append(f"{header}\n{content}")
+                except Exception as e:
+                    output_sections.append(f"{header}\n[Error reading file: {e}]")
+            else:
+                output_sections.append(f"{header}\n[File not found]")
+
+        if not output_sections:
+            return "[No output files provided]"
+
+        return "\n\n".join(output_sections)
+
+    def _parse_response(
+        self, response_text: str, validate_schema: bool = True
+    ) -> QualityGateResult:
         """Parse the review agent's response.
 
         Args:
             response_text: Raw response from review agent
+            validate_schema: Whether to validate against JSON schema (default True)
 
         Returns:
             Parsed QualityGateResult
 
         Raises:
-            QualityGateError: If response cannot be parsed
+            QualityGateError: If response cannot be parsed or fails schema validation
         """
         # Try to extract JSON from the response
         try:
@@ -140,6 +177,17 @@ def _parse_response(self, response_text: str) -> QualityGateResult:
 
             data = json.loads(json_text)
 
+            # Validate against JSON schema if enabled
+            if validate_schema:
+                try:
+                    jsonschema.validate(data, QUALITY_GATE_RESPONSE_SCHEMA)
+                except jsonschema.ValidationError as ve:
+                    raise QualityGateError(
+                        f"Quality gate response failed schema validation: {ve.message}\n"
+                        f"Path: {list(ve.absolute_path)}\n"
+                        f"Response was: {json_text[:500]}..."
+                    ) from ve
+
             # Parse criteria results
             criteria_results = [
                 QualityCriteriaResult(
@@ -164,7 +212,6 @@ def _parse_response(self, response_text: str) -> QualityGateResult:
 
     def evaluate(
         self,
-        step_instructions: str,
         quality_criteria: list[str],
         outputs: list[str],
         project_root: Path,
@@ -172,7 +219,6 @@ def evaluate(
         """Evaluate step outputs against quality criteria.
 
         Args:
-            step_instructions: The step's instruction content
             quality_criteria: List of quality criteria to evaluate
             outputs: List of output file paths
             project_root: Project root path
@@ -191,18 +237,21 @@ def evaluate(
                 criteria_results=[],
             )
 
-        prompt = self._build_review_prompt(
-            step_instructions=step_instructions,
-            quality_criteria=quality_criteria,
-            outputs=outputs,
-            project_root=project_root,
-        )
+        # Build system instructions and payload separately
+        instructions = self._build_instructions(quality_criteria)
+        payload = self._build_payload(outputs, project_root)
+
+        # Build command with system prompt flag
+        # Parse the base command properly to handle quoted arguments
+        base_cmd = shlex.split(self.command)
+        # Add system prompt via -s flag
+        full_cmd = base_cmd + ["-s", instructions]
 
         try:
-            # Run review agent
+            # Run review agent with system prompt and payload
             result = subprocess.run(
-                self.command.split(),
-                input=prompt,
+                full_cmd,
+                input=payload,
                 capture_output=True,
                 text=True,
                 timeout=self.timeout,
@@ -223,7 +272,7 @@ def evaluate(
             ) from e
         except FileNotFoundError as e:
             raise QualityGateError(
-                f"Review agent command not found: {self.command.split()[0]}"
+                f"Review agent command not found: {base_cmd[0]}"
             ) from e
 
 
@@ -247,14 +296,12 @@ def __init__(self, should_pass: bool = True, feedback: str = "Mock evaluation"):
 
     def evaluate(
         self,
-        step_instructions: str,
         quality_criteria: list[str],
         outputs: list[str],
         project_root: Path,
     ) -> QualityGateResult:
         """Mock evaluation - records call and returns configured result."""
         self.evaluations.append({
-            "step_instructions": step_instructions,
             "quality_criteria": quality_criteria,
             "outputs": outputs,
         })
diff --git a/src/deepwork/mcp/schemas.py b/src/deepwork/mcp/schemas.py
index 18375c79..07b1e622 100644
--- a/src/deepwork/mcp/schemas.py
+++ b/src/deepwork/mcp/schemas.py
@@ -94,6 +94,10 @@ class FinishedStepInput(BaseModel):
 
     outputs: list[str] = Field(description="List of output file paths created")
     notes: str | None = Field(default=None, description="Optional notes about work done")
+    quality_review_override_reason: str | None = Field(
+        default=None,
+        description="If provided, skips the quality gate review. Must explain why the review is being bypassed.",
+    )
 
 
 # =============================================================================
diff --git a/src/deepwork/mcp/server.py b/src/deepwork/mcp/server.py
index 5af0d059..5f7f943d 100644
--- a/src/deepwork/mcp/server.py
+++ b/src/deepwork/mcp/server.py
@@ -26,12 +26,16 @@
 def create_server(
     project_root: Path | str,
     quality_gate_command: str | None = None,
+    quality_gate_timeout: int = 120,
+    quality_gate_max_attempts: int = 3,
 ) -> FastMCP:
     """Create and configure the MCP server.
 
     Args:
         project_root: Path to the project root
         quality_gate_command: Optional command for quality gate agent
+        quality_gate_timeout: Timeout in seconds for quality gate (default: 120)
+        quality_gate_max_attempts: Max attempts before failing quality gate (default: 3)
 
     Returns:
         Configured FastMCP server instance
@@ -43,12 +47,16 @@ def create_server(
 
     quality_gate: QualityGate | None = None
     if quality_gate_command:
-        quality_gate = QualityGate(command=quality_gate_command)
+        quality_gate = QualityGate(
+            command=quality_gate_command,
+            timeout=quality_gate_timeout,
+        )
 
     tools = WorkflowTools(
         project_root=project_path,
         state_manager=state_manager,
         quality_gate=quality_gate,
+        max_quality_attempts=quality_gate_max_attempts,
     )
 
     # Create MCP server
@@ -104,15 +112,21 @@ def start_workflow(
             "'next_step' with instructions for the next step, or "
             "'workflow_complete' when finished. "
             "Required: outputs (list of file paths created). "
-            "Optional: notes about work done."
+            "Optional: notes about work done. "
+            "Optional: quality_review_override_reason to skip quality review (must explain why)."
         )
     )
     def finished_step(
         outputs: list[str],
         notes: str | None = None,
+        quality_review_override_reason: str | None = None,
     ) -> dict[str, Any]:
         """Report step completion and get next instructions."""
-        input_data = FinishedStepInput(outputs=outputs, notes=notes)
+        input_data = FinishedStepInput(
+            outputs=outputs,
+            notes=notes,
+            quality_review_override_reason=quality_review_override_reason,
+        )
         response = tools.finished_step(input_data)
         return response.model_dump()
 
diff --git a/src/deepwork/mcp/tools.py b/src/deepwork/mcp/tools.py
index c4663316..998106ef 100644
--- a/src/deepwork/mcp/tools.py
+++ b/src/deepwork/mcp/tools.py
@@ -44,6 +44,7 @@ def __init__(
         project_root: Path,
         state_manager: StateManager,
         quality_gate: QualityGate | None = None,
+        max_quality_attempts: int = 3,
     ):
         """Initialize workflow tools.
 
@@ -51,11 +52,13 @@ def __init__(
             project_root: Path to project root
             state_manager: State manager instance
             quality_gate: Optional quality gate for step validation
+            max_quality_attempts: Maximum attempts before failing quality gate
         """
         self.project_root = project_root
         self.jobs_dir = project_root / ".deepwork" / "jobs"
         self.state_manager = state_manager
         self.quality_gate = quality_gate
+        self.max_quality_attempts = max_quality_attempts
 
     def _load_all_jobs(self) -> list[JobDefinition]:
         """Load all job definitions from the jobs directory.
@@ -292,13 +295,15 @@ def finished_step(self, input_data: FinishedStepInput) -> FinishedStepResponse:
         if current_step is None:
             raise ToolError(f"Current step not found: {current_step_id}")
 
-        # Run quality gate if available and step has criteria
-        if self.quality_gate and current_step.quality_criteria:
+        # Run quality gate if available and step has criteria (unless overridden)
+        if (
+            self.quality_gate
+            and current_step.quality_criteria
+            and not input_data.quality_review_override_reason
+        ):
             attempts = self.state_manager.record_quality_attempt(current_step_id)
 
-            instructions = self._get_step_instructions(job, current_step_id)
             result = self.quality_gate.evaluate(
-                step_instructions=instructions,
                 quality_criteria=current_step.quality_criteria,
                 outputs=input_data.outputs,
                 project_root=self.project_root,
@@ -306,10 +311,9 @@ def finished_step(self, input_data: FinishedStepInput) -> FinishedStepResponse:
 
             if not result.passed:
                 # Check max attempts
-                max_attempts = 3  # Could be configurable
-                if attempts >= max_attempts:
+                if attempts >= self.max_quality_attempts:
                     raise ToolError(
-                        f"Quality gate failed after {max_attempts} attempts. "
+                        f"Quality gate failed after {self.max_quality_attempts} attempts. "
                         f"Feedback: {result.feedback}"
                     )
 
diff --git a/tests/fixtures/mock_review_agent.py b/tests/fixtures/mock_review_agent.py
new file mode 100755
index 00000000..22cc4591
--- /dev/null
+++ b/tests/fixtures/mock_review_agent.py
@@ -0,0 +1,153 @@
+#!/usr/bin/env python3
+"""Mock review agent for integration testing.
+
+This script simulates a review agent that reads a prompt from stdin
+and returns a JSON response. The behavior is controlled by environment
+variables or by keywords in the input prompt.
+
+Behavior modes:
+- REVIEW_RESULT=pass: Always return passed=true
+- REVIEW_RESULT=fail: Always return passed=false
+- REVIEW_RESULT=malformed: Return invalid JSON
+- REVIEW_RESULT=empty: Return empty response
+- REVIEW_RESULT=timeout: Sleep forever (for timeout testing)
+- REVIEW_RESULT=error: Exit with non-zero code
+- Default: Parse prompt and look for FORCE_PASS or FORCE_FAIL markers
+"""
+
+import json
+import os
+import sys
+import time
+
+
+def main() -> int:
+    """Main entry point."""
+    mode = os.environ.get("REVIEW_RESULT", "auto")
+
+    # Read prompt from stdin
+    prompt = sys.stdin.read()
+
+    if mode == "timeout":
+        # Sleep forever to trigger timeout
+        time.sleep(3600)
+        return 0
+
+    if mode == "error":
+        print("Review agent error!", file=sys.stderr)
+        return 1
+
+    if mode == "empty":
+        return 0
+
+    if mode == "malformed":
+        print("This is not valid JSON {{{")
+        return 0
+
+    if mode == "pass":
+        response = {
+            "passed": True,
+            "feedback": "All criteria met",
+            "criteria_results": [
+                {"criterion": "Criterion 1", "passed": True, "feedback": None}
+            ],
+        }
+        print(json.dumps(response))
+        return 0
+
+    if mode == "fail":
+        response = {
+            "passed": False,
+            "feedback": "Quality criteria not met",
+            "criteria_results": [
+                {
+                    "criterion": "Criterion 1",
+                    "passed": False,
+                    "feedback": "Did not meet requirements",
+                }
+            ],
+        }
+        print(json.dumps(response))
+        return 0
+
+    # Auto mode: parse prompt for markers
+    if "FORCE_PASS" in prompt:
+        response = {
+            "passed": True,
+            "feedback": "Forced pass via marker",
+            "criteria_results": [],
+        }
+        print(json.dumps(response))
+        return 0
+
+    if "FORCE_FAIL" in prompt:
+        response = {
+            "passed": False,
+            "feedback": "Forced fail via marker",
+            "criteria_results": [
+                {
+                    "criterion": "Test criterion",
+                    "passed": False,
+                    "feedback": "Failed due to FORCE_FAIL marker",
+                }
+            ],
+        }
+        print(json.dumps(response))
+        return 0
+
+    # Default: analyze the prompt for quality criteria and outputs
+    # Extract criteria from prompt and evaluate based on output content
+    criteria_results = []
+    all_passed = True
+
+    # Check if outputs contain expected patterns
+    if "File not found" in prompt:
+        criteria_results.append({
+            "criterion": "Output files must exist",
+            "passed": False,
+            "feedback": "One or more output files were not found",
+        })
+        all_passed = False
+    elif "Test content" in prompt or "output.md" in prompt:
+        criteria_results.append({
+            "criterion": "Output files must exist",
+            "passed": True,
+            "feedback": None,
+        })
+
+    # Look for "must contain" type criteria
+    if "must contain" in prompt.lower():
+        if "expected content" in prompt.lower():
+            criteria_results.append({
+                "criterion": "Output must contain expected content",
+                "passed": True,
+                "feedback": None,
+            })
+        else:
+            criteria_results.append({
+                "criterion": "Output must contain expected content",
+                "passed": False,
+                "feedback": "Expected content not found in output",
+            })
+            all_passed = False
+
+    if not criteria_results:
+        # If no specific criteria matched, default based on whether outputs exist
+        criteria_results.append({
+            "criterion": "General quality check",
+            "passed": True,
+            "feedback": None,
+        })
+
+    response = {
+        "passed": all_passed,
+        "feedback": "All criteria met" if all_passed else "Some criteria failed",
+        "criteria_results": criteria_results,
+    }
+
+    print(json.dumps(response))
+    return 0
+
+
+if __name__ == "__main__":
+    sys.exit(main())
diff --git a/tests/integration/test_quality_gate_integration.py b/tests/integration/test_quality_gate_integration.py
new file mode 100644
index 00000000..26360744
--- /dev/null
+++ b/tests/integration/test_quality_gate_integration.py
@@ -0,0 +1,544 @@
+"""Integration tests for quality gate subprocess execution.
+
+These tests actually run the subprocess and verify that pass/fail
+detection works correctly with real process invocation.
+"""
+
+from __future__ import annotations
+
+import os
+import sys
+from pathlib import Path
+
+import pytest
+
+from deepwork.mcp.quality_gate import QualityGate, QualityGateError
+
+
+# Path to our mock review agent script
+MOCK_AGENT_PATH = Path(__file__).parent.parent / "fixtures" / "mock_review_agent.py"
+
+
+@pytest.fixture
+def project_root(tmp_path: Path) -> Path:
+    """Create a temporary project root with test files."""
+    # Create a sample output file
+    output_file = tmp_path / "output.md"
+    output_file.write_text("Test content for review")
+    return tmp_path
+
+
+@pytest.fixture
+def mock_agent_command() -> str:
+    """Get the command to run the mock review agent."""
+    return f"{sys.executable} {MOCK_AGENT_PATH}"
+
+
+class TestQualityGateIntegration:
+    """Integration tests that run real subprocesses."""
+
+    def test_subprocess_returns_pass(
+        self, project_root: Path, mock_agent_command: str
+    ) -> None:
+        """Test that a passing response is correctly detected."""
+        gate = QualityGate(command=mock_agent_command, timeout=30)
+
+        # Set environment to force pass
+        env_backup = os.environ.get("REVIEW_RESULT")
+        os.environ["REVIEW_RESULT"] = "pass"
+
+        try:
+            result = gate.evaluate(
+                quality_criteria=["Output must exist", "Output must be valid"],
+                outputs=["output.md"],
+                project_root=project_root,
+            )
+
+            assert result.passed is True, f"Expected pass but got: {result}"
+            assert result.feedback == "All criteria met"
+        finally:
+            if env_backup is not None:
+                os.environ["REVIEW_RESULT"] = env_backup
+            else:
+                os.environ.pop("REVIEW_RESULT", None)
+
+    def test_subprocess_returns_fail(
+        self, project_root: Path, mock_agent_command: str
+    ) -> None:
+        """Test that a failing response is correctly detected."""
+        gate = QualityGate(command=mock_agent_command, timeout=30)
+
+        # Set environment to force fail
+        env_backup = os.environ.get("REVIEW_RESULT")
+        os.environ["REVIEW_RESULT"] = "fail"
+
+        try:
+            result = gate.evaluate(
+                quality_criteria=["Output must exist"],
+                outputs=["output.md"],
+                project_root=project_root,
+            )
+
+            assert result.passed is False, f"Expected fail but got pass: {result}"
+            assert "not met" in result.feedback.lower()
+            assert len(result.criteria_results) > 0
+            assert result.criteria_results[0].passed is False
+        finally:
+            if env_backup is not None:
+                os.environ["REVIEW_RESULT"] = env_backup
+            else:
+                os.environ.pop("REVIEW_RESULT", None)
+
+    def test_subprocess_malformed_response_raises_error(
+        self, project_root: Path, mock_agent_command: str
+    ) -> None:
+        """Test that malformed JSON raises an error."""
+        gate = QualityGate(command=mock_agent_command, timeout=30)
+
+        env_backup = os.environ.get("REVIEW_RESULT")
+        os.environ["REVIEW_RESULT"] = "malformed"
+
+        try:
+            with pytest.raises(QualityGateError, match="Failed to parse"):
+                gate.evaluate(
+                    quality_criteria=["Criterion 1"],
+                    outputs=["output.md"],
+                    project_root=project_root,
+                )
+        finally:
+            if env_backup is not None:
+                os.environ["REVIEW_RESULT"] = env_backup
+            else:
+                os.environ.pop("REVIEW_RESULT", None)
+
+    def test_subprocess_nonzero_exit_raises_error(
+        self, project_root: Path, mock_agent_command: str
+    ) -> None:
+        """Test that non-zero exit code raises an error."""
+        gate = QualityGate(command=mock_agent_command, timeout=30)
+
+        env_backup = os.environ.get("REVIEW_RESULT")
+        os.environ["REVIEW_RESULT"] = "error"
+
+        try:
+            with pytest.raises(QualityGateError, match="failed with exit code"):
+                gate.evaluate(
+                    quality_criteria=["Criterion 1"],
+                    outputs=["output.md"],
+                    project_root=project_root,
+                )
+        finally:
+            if env_backup is not None:
+                os.environ["REVIEW_RESULT"] = env_backup
+            else:
+                os.environ.pop("REVIEW_RESULT", None)
+
+    def test_subprocess_timeout(
+        self, project_root: Path, mock_agent_command: str
+    ) -> None:
+        """Test that subprocess timeout is handled correctly."""
+        gate = QualityGate(command=mock_agent_command, timeout=1)  # 1 second timeout
+
+        env_backup = os.environ.get("REVIEW_RESULT")
+        os.environ["REVIEW_RESULT"] = "timeout"
+
+        try:
+            with pytest.raises(QualityGateError, match="timed out"):
+                gate.evaluate(
+                    quality_criteria=["Criterion 1"],
+                    outputs=["output.md"],
+                    project_root=project_root,
+                )
+        finally:
+            if env_backup is not None:
+                os.environ["REVIEW_RESULT"] = env_backup
+            else:
+                os.environ.pop("REVIEW_RESULT", None)
+
+    def test_subprocess_command_not_found(self, project_root: Path) -> None:
+        """Test that missing command is handled correctly."""
+        gate = QualityGate(command="nonexistent_command_12345", timeout=30)
+
+        with pytest.raises(QualityGateError, match="command not found"):
+            gate.evaluate(
+                quality_criteria=["Criterion 1"],
+                outputs=["output.md"],
+                project_root=project_root,
+            )
+
+    def test_auto_mode_detects_force_pass_marker(
+        self, project_root: Path, mock_agent_command: str
+    ) -> None:
+        """Test that FORCE_PASS marker in content causes pass."""
+        gate = QualityGate(command=mock_agent_command, timeout=30)
+
+        # Create output with FORCE_PASS marker
+        output_file = project_root / "marker_output.md"
+        output_file.write_text("Content with FORCE_PASS marker")
+
+        # Clear any environment override
+        env_backup = os.environ.get("REVIEW_RESULT")
+        os.environ.pop("REVIEW_RESULT", None)
+
+        try:
+            result = gate.evaluate(
+                quality_criteria=["Criterion 1"],
+                outputs=["marker_output.md"],
+                project_root=project_root,
+            )
+
+            assert result.passed is True
+        finally:
+            if env_backup is not None:
+                os.environ["REVIEW_RESULT"] = env_backup
+
+    def test_auto_mode_detects_force_fail_marker(
+        self, project_root: Path, mock_agent_command: str
+    ) -> None:
+        """Test that FORCE_FAIL marker in content causes fail."""
+        gate = QualityGate(command=mock_agent_command, timeout=30)
+
+        # Create output with FORCE_FAIL marker
+        output_file = project_root / "marker_output.md"
+        output_file.write_text("Content with FORCE_FAIL marker")
+
+        # Clear any environment override
+        env_backup = os.environ.get("REVIEW_RESULT")
+        os.environ.pop("REVIEW_RESULT", None)
+
+        try:
+            result = gate.evaluate(
+                quality_criteria=["Criterion 1"],
+                outputs=["marker_output.md"],
+                project_root=project_root,
+            )
+
+            assert result.passed is False
+        finally:
+            if env_backup is not None:
+                os.environ["REVIEW_RESULT"] = env_backup
+
+    def test_missing_output_file_causes_fail(
+        self, project_root: Path, mock_agent_command: str
+    ) -> None:
+        """Test that missing output file is detected as failure."""
+        gate = QualityGate(command=mock_agent_command, timeout=30)
+
+        # Clear any environment override - let auto mode handle it
+        env_backup = os.environ.get("REVIEW_RESULT")
+        os.environ.pop("REVIEW_RESULT", None)
+
+        try:
+            result = gate.evaluate(
+                quality_criteria=["Output files must exist"],
+                outputs=["nonexistent_file.md"],
+                project_root=project_root,
+            )
+
+            # The mock agent should detect "File not found" in prompt and fail
+            assert result.passed is False
+        finally:
+            if env_backup is not None:
+                os.environ["REVIEW_RESULT"] = env_backup
+
+
+class TestQualityGateResponseParsing:
+    """Test response parsing with various JSON formats."""
+
+    def test_parse_json_in_code_block(self) -> None:
+        """Test parsing JSON wrapped in markdown code block."""
+        gate = QualityGate()
+
+        response = '''Here's my evaluation:
+
+```json
+{
+    "passed": true,
+    "feedback": "All good",
+    "criteria_results": [
+        {"criterion": "Test", "passed": true, "feedback": null}
+    ]
+}
+```
+
+Hope that helps!'''
+
+        result = gate._parse_response(response)
+
+        assert result.passed is True
+        assert result.feedback == "All good"
+
+    def test_parse_json_in_plain_code_block(self) -> None:
+        """Test parsing JSON in plain code block (no json tag)."""
+        gate = QualityGate()
+
+        response = '''```
+{
+    "passed": false,
+    "feedback": "Issues found",
+    "criteria_results": []
+}
+```'''
+
+        result = gate._parse_response(response)
+
+        assert result.passed is False
+        assert result.feedback == "Issues found"
+
+    def test_parse_raw_json(self) -> None:
+        """Test parsing raw JSON without code block."""
+        gate = QualityGate()
+
+        response = '{"passed": true, "feedback": "OK", "criteria_results": []}'
+
+        result = gate._parse_response(response)
+
+        assert result.passed is True
+        assert result.feedback == "OK"
+
+    def test_parse_missing_passed_field_raises_error(self) -> None:
+        """Test that missing 'passed' field raises schema validation error."""
+        gate = QualityGate()
+
+        # JSON without 'passed' field - now fails schema validation
+        response = '{"feedback": "Some feedback", "criteria_results": []}'
+
+        with pytest.raises(QualityGateError, match="failed schema validation"):
+            gate._parse_response(response)
+
+    def test_parse_non_boolean_passed_field_raises_error(self) -> None:
+        """Test that non-boolean 'passed' field raises schema validation error."""
+        gate = QualityGate()
+
+        # Various truthy but not boolean values - all should fail schema validation
+        test_cases = [
+            ('{"passed": 1, "feedback": "test", "criteria_results": []}', "integer 1"),
+            ('{"passed": "true", "feedback": "test", "criteria_results": []}', "string 'true'"),
+            ('{"passed": "yes", "feedback": "test", "criteria_results": []}', "string 'yes'"),
+            ('{"passed": null, "feedback": "test", "criteria_results": []}', "null"),
+        ]
+
+        for response, case_name in test_cases:
+            with pytest.raises(
+                QualityGateError, match="failed schema validation"
+            ):
+                gate._parse_response(response)
+
+    def test_parse_without_schema_validation_is_lenient(self) -> None:
+        """Test that schema validation can be disabled for lenient parsing."""
+        gate = QualityGate()
+
+        # JSON without 'passed' field - without schema validation, defaults to False
+        response = '{"feedback": "Some feedback", "criteria_results": []}'
+
+        result = gate._parse_response(response, validate_schema=False)
+
+        # Without schema validation, missing passed defaults to False (fail-safe)
+        assert result.passed is False
+
+    def test_parse_criteria_results_structure(self) -> None:
+        """Test that criteria results are properly parsed."""
+        gate = QualityGate()
+
+        response = '''```json
+{
+    "passed": false,
+    "feedback": "Two criteria failed",
+    "criteria_results": [
+        {"criterion": "First check", "passed": true, "feedback": null},
+        {"criterion": "Second check", "passed": false, "feedback": "Missing data"},
+        {"criterion": "Third check", "passed": false, "feedback": "Wrong format"}
+    ]
+}
+```'''
+
+        result = gate._parse_response(response)
+
+        assert result.passed is False
+        assert len(result.criteria_results) == 3
+        assert result.criteria_results[0].passed is True
+        assert result.criteria_results[0].feedback is None
+        assert result.criteria_results[1].passed is False
+        assert result.criteria_results[1].feedback == "Missing data"
+        assert result.criteria_results[2].passed is False
+        assert result.criteria_results[2].feedback == "Wrong format"
+
+    def test_parse_empty_criteria_results(self) -> None:
+        """Test parsing with empty criteria results."""
+        gate = QualityGate()
+
+        response = '{"passed": true, "feedback": "OK", "criteria_results": []}'
+
+        result = gate._parse_response(response)
+
+        assert result.passed is True
+        assert result.criteria_results == []
+
+
+class TestQualityGateSchemaValidation:
+    """Test JSON schema validation for quality gate responses."""
+
+    def test_valid_response_passes_schema(self) -> None:
+        """Test that valid response passes schema validation."""
+        gate = QualityGate()
+
+        response = '''```json
+{
+    "passed": true,
+    "feedback": "All criteria met",
+    "criteria_results": [
+        {"criterion": "Test 1", "passed": true, "feedback": null},
+        {"criterion": "Test 2", "passed": true}
+    ]
+}
+```'''
+
+        result = gate._parse_response(response)
+
+        assert result.passed is True
+        assert result.feedback == "All criteria met"
+
+    def test_missing_feedback_field_raises_error(self) -> None:
+        """Test that missing feedback field raises schema error."""
+        gate = QualityGate()
+
+        # Missing required 'feedback' field
+        response = '{"passed": true, "criteria_results": []}'
+
+        with pytest.raises(QualityGateError, match="failed schema validation"):
+            gate._parse_response(response)
+
+    def test_invalid_criteria_result_type_raises_error(self) -> None:
+        """Test that invalid criteria_results type raises schema error."""
+        gate = QualityGate()
+
+        # criteria_results should be an array, not a string
+        response = '{"passed": true, "feedback": "test", "criteria_results": "invalid"}'
+
+        with pytest.raises(QualityGateError, match="failed schema validation"):
+            gate._parse_response(response)
+
+    def test_missing_criterion_in_results_raises_error(self) -> None:
+        """Test that missing criterion field in results raises schema error."""
+        gate = QualityGate()
+
+        # criteria_results item missing required 'criterion' field
+        response = '''{"passed": true, "feedback": "test", "criteria_results": [
+            {"passed": true, "feedback": null}
+        ]}'''
+
+        with pytest.raises(QualityGateError, match="failed schema validation"):
+            gate._parse_response(response)
+
+    def test_criteria_results_optional(self) -> None:
+        """Test that criteria_results can be omitted."""
+        gate = QualityGate()
+
+        # criteria_results is optional
+        response = '{"passed": true, "feedback": "All good"}'
+
+        result = gate._parse_response(response)
+
+        assert result.passed is True
+        assert result.feedback == "All good"
+        assert result.criteria_results == []
+
+
+class TestQualityGateEdgeCases:
+    """Test edge cases and potential failure scenarios."""
+
+    def test_empty_quality_criteria_auto_passes(self, project_root: Path) -> None:
+        """Test that no criteria means auto-pass (no subprocess called)."""
+        gate = QualityGate(command="nonexistent_command", timeout=30)
+
+        # Even with a command that doesn't exist, empty criteria should auto-pass
+        result = gate.evaluate(
+            quality_criteria=[],  # No criteria
+            outputs=["output.md"],
+            project_root=project_root,
+        )
+
+        assert result.passed is True
+        assert "auto-passing" in result.feedback.lower()
+
+    def test_multiple_output_files(
+        self, project_root: Path, mock_agent_command: str
+    ) -> None:
+        """Test evaluation with multiple output files."""
+        gate = QualityGate(command=mock_agent_command, timeout=30)
+
+        # Create multiple output files
+        (project_root / "output1.md").write_text("Content 1")
+        (project_root / "output2.md").write_text("Content 2")
+        (project_root / "output3.md").write_text("Content 3")
+
+        env_backup = os.environ.get("REVIEW_RESULT")
+        os.environ["REVIEW_RESULT"] = "pass"
+
+        try:
+            result = gate.evaluate(
+                quality_criteria=["All outputs must exist"],
+                outputs=["output1.md", "output2.md", "output3.md"],
+                project_root=project_root,
+            )
+
+            assert result.passed is True
+        finally:
+            if env_backup is not None:
+                os.environ["REVIEW_RESULT"] = env_backup
+            else:
+                os.environ.pop("REVIEW_RESULT", None)
+
+    def test_large_output_file(
+        self, project_root: Path, mock_agent_command: str
+    ) -> None:
+        """Test evaluation with a large output file."""
+        gate = QualityGate(command=mock_agent_command, timeout=30)
+
+        # Create a large file (100KB)
+        large_content = "Large content line\n" * 5000
+        (project_root / "large_output.md").write_text(large_content)
+
+        env_backup = os.environ.get("REVIEW_RESULT")
+        os.environ["REVIEW_RESULT"] = "pass"
+
+        try:
+            result = gate.evaluate(
+                quality_criteria=["Output must be complete"],
+                outputs=["large_output.md"],
+                project_root=project_root,
+            )
+
+            assert result.passed is True
+        finally:
+            if env_backup is not None:
+                os.environ["REVIEW_RESULT"] = env_backup
+            else:
+                os.environ.pop("REVIEW_RESULT", None)
+
+    def test_unicode_in_output(
+        self, project_root: Path, mock_agent_command: str
+    ) -> None:
+        """Test evaluation with unicode content."""
+        gate = QualityGate(command=mock_agent_command, timeout=30)
+
+        # Create file with unicode content
+        unicode_content = "Unicode: 你好世界 🚀 émojis and spëcial çharacters"
+        (project_root / "unicode_output.md").write_text(unicode_content)
+
+        env_backup = os.environ.get("REVIEW_RESULT")
+        os.environ["REVIEW_RESULT"] = "pass"
+
+        try:
+            result = gate.evaluate(
+                quality_criteria=["Content must be valid"],
+                outputs=["unicode_output.md"],
+                project_root=project_root,
+            )
+
+            assert result.passed is True
+        finally:
+            if env_backup is not None:
+                os.environ["REVIEW_RESULT"] = env_backup
+            else:
+                os.environ.pop("REVIEW_RESULT", None)
diff --git a/tests/shell_script_tests/test_block_bash_with_instructions.py b/tests/shell_script_tests/test_block_bash_with_instructions.py
deleted file mode 100644
index e916ddba..00000000
--- a/tests/shell_script_tests/test_block_bash_with_instructions.py
+++ /dev/null
@@ -1,237 +0,0 @@
-"""Tests for block_bash_with_instructions.sh hook.
-
-This hook blocks specific Bash commands (e.g., git commit) and provides
-alternative instructions via stderr when exit code 2 is returned.
-
-Hook Contract (PreToolUse with exit code 2):
-  - Exit code 0: Allow the command
-  - Exit code 2: Block the command, stderr message shown to Claude
-  - stderr: Contains the instruction message when blocking
-
-See: https://docs.anthropic.com/en/docs/claude-code/hooks
-"""
-
-import json
-import os
-import subprocess
-from pathlib import Path
-
-import pytest
-
-
-@pytest.fixture
-def block_bash_hook_path() -> Path:
-    """Return the path to the block_bash_with_instructions.sh script."""
-    return (
-        Path(__file__).parent.parent.parent
-        / ".claude"
-        / "hooks"
-        / "block_bash_with_instructions.sh"
-    )
-
-
-def run_block_bash_hook(
-    script_path: Path,
-    tool_name: str,
-    command: str,
-) -> tuple[str, str, int]:
-    """
-    Run the block_bash_with_instructions.sh hook with simulated input.
-
-    Args:
-        script_path: Path to the hook script
-        tool_name: The tool name (e.g., "Bash")
-        command: The bash command being executed
-
-    Returns:
-        Tuple of (stdout, stderr, return_code)
-    """
-    hook_input = {
-        "session_id": "test123",
-        "hook_event_name": "PreToolUse",
-        "tool_name": tool_name,
-        "tool_input": {
-            "command": command,
-        },
-    }
-
-    result = subprocess.run(
-        ["bash", str(script_path)],
-        capture_output=True,
-        text=True,
-        input=json.dumps(hook_input),
-        env=os.environ.copy(),
-    )
-
-    return result.stdout, result.stderr, result.returncode
-
-
-class TestBlockBashHookExists:
-    """Tests that the hook script exists and is properly configured."""
-
-    def test_script_exists(self, block_bash_hook_path: Path) -> None:
-        """Test that the hook script exists."""
-        assert block_bash_hook_path.exists(), "block_bash_with_instructions.sh should exist"
-
-    def test_script_is_executable(self, block_bash_hook_path: Path) -> None:
-        """Test that the hook script is executable."""
-        assert os.access(block_bash_hook_path, os.X_OK), (
-            "block_bash_with_instructions.sh should be executable"
-        )
-
-
-class TestGitCommitBlocking:
-    """Tests for git commit command blocking."""
-
-    @pytest.mark.parametrize(
-        "command",
-        [
-            "git commit -m 'message'",
-            "git commit --amend",
-            "git commit -a -m 'message'",
-            "git  commit -m 'message'",  # Extra space
-            "git commit --allow-empty -m 'test'",
-            "  git commit -m 'with leading space'",
-        ],
-    )
-    def test_blocks_git_commit_variants(self, block_bash_hook_path: Path, command: str) -> None:
-        """Test that git commit variants are blocked with exit code 2."""
-        stdout, stderr, code = run_block_bash_hook(block_bash_hook_path, "Bash", command)
-        assert code == 2, f"Should block '{command}' with exit code 2, got {code}"
-        assert "/commit" in stderr, f"Should mention /commit skill in stderr: {stderr}"
-
-    def test_stderr_contains_instructions(self, block_bash_hook_path: Path) -> None:
-        """Test that blocking message contains helpful instructions."""
-        stdout, stderr, code = run_block_bash_hook(
-            block_bash_hook_path, "Bash", "git commit -m 'test'"
-        )
-        assert code == 2
-        assert "/commit" in stderr, "Should mention the /commit skill"
-        assert "skill" in stderr.lower() or "workflow" in stderr.lower(), (
-            "Should explain the alternative workflow"
-        )
-
-
-class TestAllowedCommands:
-    """Tests for commands that should be allowed."""
-
-    @pytest.mark.parametrize(
-        "command",
-        [
-            # Git commands (non-commit)
-            "git status",
-            "git add .",
-            "git diff HEAD",
-            "git log --oneline -5",
-            "git push origin main",
-            "git pull",
-            "git fetch",
-            "git branch -a",
-            # Non-git commands
-            "ls -la",
-            "echo hello",
-            "python --version",
-            "cat README.md",
-            # Commands with 'commit' substring (not at start)
-            "echo 'commit message'",
-            "grep -r 'commit' .",
-            "cat commits.txt",
-            # 'git commit' in message body (anchored pattern should allow)
-            "echo 'use git commit to save changes'",
-            "grep 'git commit' README.md",
-            ".claude/hooks/commit_job_git_commit.sh -m 'message about git commit'",
-        ],
-    )
-    def test_allows_command(self, block_bash_hook_path: Path, command: str) -> None:
-        """Test that non-blocked commands are allowed."""
-        stdout, stderr, code = run_block_bash_hook(block_bash_hook_path, "Bash", command)
-        assert code == 0, f"Should allow '{command}' with exit code 0, got {code}"
-
-
-class TestNonBashTools:
-    """Tests for non-Bash tool calls."""
-
-    @pytest.mark.parametrize("tool_name", ["Read", "Write", "Edit", "Glob", "Grep"])
-    def test_allows_non_bash_tools(self, block_bash_hook_path: Path, tool_name: str) -> None:
-        """Test that non-Bash tools are not blocked even with git commit in input."""
-        stdout, stderr, code = run_block_bash_hook(
-            block_bash_hook_path, tool_name, "git commit -m 'test'"
-        )
-        assert code == 0, f"Should allow {tool_name} tool with exit code 0, got {code}"
-
-
-class TestEdgeCases:
-    """Tests for edge cases and malformed input."""
-
-    def test_empty_input(self, block_bash_hook_path: Path) -> None:
-        """Test that empty input is handled gracefully."""
-        result = subprocess.run(
-            ["bash", str(block_bash_hook_path)],
-            capture_output=True,
-            text=True,
-            input="",
-            env=os.environ.copy(),
-        )
-        assert result.returncode == 0, "Should allow with exit code 0 for empty input"
-
-    def test_no_command_in_input(self, block_bash_hook_path: Path) -> None:
-        """Test that missing command is handled gracefully."""
-        hook_input = {"tool_name": "Bash", "tool_input": {}}
-        result = subprocess.run(
-            ["bash", str(block_bash_hook_path)],
-            capture_output=True,
-            text=True,
-            input=json.dumps(hook_input),
-            env=os.environ.copy(),
-        )
-        assert result.returncode == 0, "Should allow with exit code 0 for missing command"
-
-    def test_invalid_json(self, block_bash_hook_path: Path) -> None:
-        """Test that invalid JSON is handled gracefully."""
-        result = subprocess.run(
-            ["bash", str(block_bash_hook_path)],
-            capture_output=True,
-            text=True,
-            input="not valid json",
-            env=os.environ.copy(),
-        )
-        # Script uses set -e and jq, so invalid JSON causes jq to fail with exit 5
-        # This is acceptable behavior - Claude Code won't send invalid JSON
-        assert result.returncode in (0, 1, 5), (
-            f"Should handle invalid JSON without crashing unexpectedly, got {result.returncode}"
-        )
-
-
-# ******************************************************************************
-# ***                     CLAUDE CODE CONTRACT TEST                          ***
-# ******************************************************************************
-#
-# DO NOT MODIFY this test without consulting Claude Code hook documentation:
-# https://docs.anthropic.com/en/docs/claude-code/hooks
-#
-# PreToolUse hooks with exit code 2 MUST:
-#   - Output error message to stderr (NOT stdout)
-#   - Exit with code 2
-#
-# PreToolUse hooks that allow MUST:
-#   - Exit with code 0
-#   - Produce no output on stderr
-#
-# ******************************************************************************
-class TestOutputsAndExitsAccordingToClaudeSpec:
-    """Tests that hook output conforms to Claude Code's required format."""
-
-    def test_claude_code_hook_contract(self, block_bash_hook_path: Path) -> None:
-        """Verify hook follows Claude Code PreToolUse contract for block/allow."""
-        # Test BLOCK behavior
-        stdout, stderr, code = run_block_bash_hook(
-            block_bash_hook_path, "Bash", "git commit -m 'test'"
-        )
-        assert code == 2, "Blocked command must exit with code 2"
-        assert stderr.strip() != "", "Blocked command must output message to stderr"
-        assert stdout.strip() == "", "Blocked command must not output to stdout"
-
-        # Test ALLOW behavior
-        stdout, stderr, code = run_block_bash_hook(block_bash_hook_path, "Bash", "git status")
-        assert code == 0, "Allowed command must exit with code 0"
-        assert stderr.strip() == "", "Allowed command must not output to stderr"
diff --git a/tests/unit/mcp/test_quality_gate.py b/tests/unit/mcp/test_quality_gate.py
index d5b55c77..783730f4 100644
--- a/tests/unit/mcp/test_quality_gate.py
+++ b/tests/unit/mcp/test_quality_gate.py
@@ -36,37 +36,45 @@ def test_init_defaults(self) -> None:
         assert gate.command == "claude -p --output-format json"
         assert gate.timeout == 120
 
-    def test_build_review_prompt(self, quality_gate: QualityGate, project_root: Path) -> None:
-        """Test building review prompt."""
+    def test_build_instructions(self, quality_gate: QualityGate) -> None:
+        """Test building system instructions."""
+        instructions = quality_gate._build_instructions(
+            quality_criteria=["Output must exist", "Output must be valid"],
+        )
+
+        assert "Output must exist" in instructions
+        assert "Output must be valid" in instructions
+        assert "quality gate reviewer" in instructions.lower()
+        assert "passed" in instructions  # JSON format mentioned
+        assert "feedback" in instructions  # JSON format mentioned
+
+    def test_build_payload(self, quality_gate: QualityGate, project_root: Path) -> None:
+        """Test building payload with file contents."""
         # Create test output file
         output_file = project_root / "output.md"
         output_file.write_text("Test content")
 
-        prompt = quality_gate._build_review_prompt(
-            step_instructions="Do something",
-            quality_criteria=["Output must exist", "Output must be valid"],
+        payload = quality_gate._build_payload(
             outputs=["output.md"],
             project_root=project_root,
         )
 
-        assert "Do something" in prompt
-        assert "Output must exist" in prompt
-        assert "Output must be valid" in prompt
-        assert "Test content" in prompt
-        assert "output.md" in prompt
+        assert "Test content" in payload
+        assert "output.md" in payload
+        # Check for the new separator format (20 dashes)
+        assert "--------------------" in payload
 
-    def test_build_review_prompt_missing_file(
+    def test_build_payload_missing_file(
         self, quality_gate: QualityGate, project_root: Path
     ) -> None:
-        """Test building prompt with missing file."""
-        prompt = quality_gate._build_review_prompt(
-            step_instructions="Do something",
-            quality_criteria=["Criteria"],
+        """Test building payload with missing file."""
+        payload = quality_gate._build_payload(
             outputs=["nonexistent.md"],
             project_root=project_root,
         )
 
-        assert "File not found" in prompt
+        assert "File not found" in payload
+        assert "nonexistent.md" in payload
 
     def test_parse_response_valid_json(self, quality_gate: QualityGate) -> None:
         """Test parsing valid JSON response."""
@@ -120,7 +128,6 @@ def test_parse_response_invalid_json(self, quality_gate: QualityGate) -> None:
     def test_evaluate_no_criteria(self, quality_gate: QualityGate, project_root: Path) -> None:
         """Test evaluation with no criteria auto-passes."""
         result = quality_gate.evaluate(
-            step_instructions="Do something",
             quality_criteria=[],
             outputs=["output.md"],
             project_root=project_root,
@@ -138,7 +145,6 @@ def test_mock_passes_by_default(self, project_root: Path) -> None:
         gate = MockQualityGate()
 
         result = gate.evaluate(
-            step_instructions="Do something",
             quality_criteria=["Criterion 1"],
             outputs=["output.md"],
             project_root=project_root,
@@ -152,7 +158,6 @@ def test_mock_can_fail(self, project_root: Path) -> None:
         gate = MockQualityGate(should_pass=False, feedback="Mock failure")
 
         result = gate.evaluate(
-            step_instructions="Do something",
             quality_criteria=["Criterion 1"],
             outputs=["output.md"],
             project_root=project_root,
@@ -166,18 +171,16 @@ def test_mock_records_evaluations(self, project_root: Path) -> None:
         gate = MockQualityGate()
 
         gate.evaluate(
-            step_instructions="Instruction 1",
             quality_criteria=["Criterion 1"],
             outputs=["output1.md"],
             project_root=project_root,
         )
         gate.evaluate(
-            step_instructions="Instruction 2",
             quality_criteria=["Criterion 2"],
             outputs=["output2.md"],
             project_root=project_root,
         )
 
         assert len(gate.evaluations) == 2
-        assert gate.evaluations[0]["step_instructions"] == "Instruction 1"
-        assert gate.evaluations[1]["step_instructions"] == "Instruction 2"
+        assert gate.evaluations[0]["quality_criteria"] == ["Criterion 1"]
+        assert gate.evaluations[1]["quality_criteria"] == ["Criterion 2"]
diff --git a/tests/unit/mcp/test_tools.py b/tests/unit/mcp/test_tools.py
index b783edb8..24cc9f78 100644
--- a/tests/unit/mcp/test_tools.py
+++ b/tests/unit/mcp/test_tools.py
@@ -308,3 +308,37 @@ def test_finished_step_quality_gate_max_attempts(
         # Third attempt should raise error
         with pytest.raises(ToolError, match="Quality gate failed after.*attempts"):
             tools.finished_step(FinishedStepInput(outputs=["output1.md"]))
+
+    def test_finished_step_quality_gate_override(
+        self, project_root: Path, state_manager: StateManager
+    ) -> None:
+        """Test finished_step skips quality gate when override reason provided."""
+        # Create tools with failing quality gate
+        failing_gate = MockQualityGate(should_pass=False, feedback="Would fail")
+        tools = WorkflowTools(
+            project_root=project_root,
+            state_manager=state_manager,
+            quality_gate=failing_gate,
+        )
+
+        # Start workflow
+        start_input = StartWorkflowInput(
+            goal="Complete task",
+            job_name="test_job",
+            workflow_name="main",
+        )
+        tools.start_workflow(start_input)
+
+        # Create output and finish step with override reason
+        (project_root / "output1.md").write_text("Output that would fail quality check")
+        response = tools.finished_step(
+            FinishedStepInput(
+                outputs=["output1.md"],
+                quality_review_override_reason="Manual review completed offline",
+            )
+        )
+
+        # Should advance to next step despite failing quality gate config
+        assert response.status == StepStatus.NEXT_STEP
+        # Quality gate should not have been called
+        assert len(failing_gate.evaluations) == 0

From cd2ae6374d1d4b0d812c8a738a7bad1e6291f984 Mon Sep 17 00:00:00 2001
From: Noah Horton <noah@unsupervised.com>
Date: Tue, 3 Feb 2026 16:34:56 -0700
Subject: [PATCH 05/45] chore: Update tests and sync for MCP variant

- Update e2e tests for Claude Code integration
- Add quality_criteria to fruits job fixture
- Fix test assertions for updated install flow
- Minor sync.py adjustments

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
---
 src/deepwork/cli/sync.py                  |  16 +-
 tests/e2e/test_claude_code_integration.py | 310 ++++++++++++++++------
 tests/fixtures/jobs/fruits/job.yml        |   7 +
 tests/integration/test_fruits_workflow.py |   2 +-
 tests/integration/test_install_flow.py    |  48 ++--
 5 files changed, 248 insertions(+), 135 deletions(-)

diff --git a/src/deepwork/cli/sync.py b/src/deepwork/cli/sync.py
index 687d47d1..dbfce52b 100644
--- a/src/deepwork/cli/sync.py
+++ b/src/deepwork/cli/sync.py
@@ -145,19 +145,9 @@ def sync_skills(project_path: Path) -> None:
         except Exception as e:
             console.print(f"    [red]✗[/red] Failed to generate /deepwork skill: {e}")
 
-        # Generate skills for all jobs
-        if jobs:
-            console.print("  [dim]•[/dim] Generating job skills...")
-            for job in jobs:
-                try:
-                    job_paths = generator.generate_all_skills(
-                        job, adapter, platform_dir, project_root=project_path
-                    )
-                    all_skill_paths.extend(job_paths)
-                    stats["skills"] += len(job_paths)
-                    console.print(f"    [green]✓[/green] {job.name} ({len(job_paths)} skills)")
-                except Exception as e:
-                    console.print(f"    [red]✗[/red] Failed for {job.name}: {e}")
+        # NOTE: Job skills (meta-skills and step skills) are no longer generated.
+        # The MCP server now handles workflow orchestration directly.
+        # Only the /deepwork skill is installed as the entry point.
 
         # Sync hooks to platform settings
         if job_hooks_list:
diff --git a/tests/e2e/test_claude_code_integration.py b/tests/e2e/test_claude_code_integration.py
index b98fbc28..abce6a86 100644
--- a/tests/e2e/test_claude_code_integration.py
+++ b/tests/e2e/test_claude_code_integration.py
@@ -1,10 +1,10 @@
 """End-to-end tests for DeepWork with Claude Code integration.
 
-These tests validate that DeepWork-generated skills work correctly
-with Claude Code. The tests can run in two modes:
+These tests validate that DeepWork MCP-based workflows work correctly.
+The tests can run in two modes:
 
-1. **Generation-only mode** (default): Tests skill generation and structure
-2. **Full e2e mode**: Actually executes skills with Claude Code
+1. **MCP tools mode** (default): Tests MCP skill generation and workflow tools
+2. **Full e2e mode**: Actually executes workflows with Claude Code via MCP
 
 Set ANTHROPIC_API_KEY and DEEPWORK_E2E_FULL=true to run full e2e tests.
 """
@@ -20,6 +20,8 @@
 from deepwork.core.adapters import ClaudeAdapter
 from deepwork.core.generator import SkillGenerator
 from deepwork.core.parser import parse_job_definition
+from deepwork.mcp.state import StateManager
+from deepwork.mcp.tools import WorkflowTools
 
 # Test input for deterministic validation
 TEST_INPUT = "apple, car, banana, chair, orange, table, mango, laptop, grape, bicycle"
@@ -55,11 +57,11 @@ def run_full_e2e() -> bool:
     )
 
 
-class TestSkillGenerationE2E:
-    """End-to-end tests for skill generation."""
+class TestMCPSkillGeneration:
+    """Tests for MCP entry point skill generation."""
 
-    def test_generate_fruits_skills_in_temp_project(self) -> None:
-        """Test generating fruits skills in a realistic project structure."""
+    def test_generate_deepwork_skill_in_temp_project(self) -> None:
+        """Test generating the /deepwork MCP skill in a realistic project structure."""
         with tempfile.TemporaryDirectory() as tmpdir:
             project_dir = Path(tmpdir)
 
@@ -67,7 +69,7 @@ def test_generate_fruits_skills_in_temp_project(self) -> None:
             deepwork_dir = project_dir / ".deepwork" / "jobs"
             deepwork_dir.mkdir(parents=True)
 
-            # Copy fruits job fixture
+            # Copy fruits job fixture (for job discovery testing)
             fixtures_dir = Path(__file__).parent.parent / "fixtures" / "jobs" / "fruits"
             shutil.copytree(fixtures_dir, deepwork_dir / "fruits")
 
@@ -84,96 +86,220 @@ def test_generate_fruits_skills_in_temp_project(self) -> None:
                 capture_output=True,
             )
 
-            # Parse job and generate skills
-            job = parse_job_definition(deepwork_dir / "fruits")
+            # Generate MCP entry point skill
             generator = SkillGenerator()
-            adapter = ClaudeAdapter()
+            adapter = ClaudeAdapter(project_root=project_dir)
 
-            skills_dir = project_dir / ".claude"
-            skills_dir.mkdir()
+            claude_dir = project_dir / ".claude"
+            claude_dir.mkdir()
 
-            skill_paths = generator.generate_all_skills(job, adapter, skills_dir)
+            skill_path = generator.generate_deepwork_skill(adapter, claude_dir)
 
-            # Validate skills were generated (meta + steps)
-            assert len(skill_paths) == 3  # 1 meta + 2 steps
+            # Validate skill was generated
+            assert skill_path.exists()
+            expected_path = claude_dir / "skills" / "deepwork" / "SKILL.md"
+            assert skill_path == expected_path
 
-            meta_skill = skills_dir / "skills" / "fruits" / "SKILL.md"
-            identify_skill = skills_dir / "skills" / "fruits.identify" / "SKILL.md"
-            classify_skill = skills_dir / "skills" / "fruits.classify" / "SKILL.md"
+    def test_deepwork_skill_structure(self) -> None:
+        """Test that the generated /deepwork skill has the expected structure."""
+        with tempfile.TemporaryDirectory() as tmpdir:
+            project_dir = Path(tmpdir)
+            claude_dir = project_dir / ".claude"
+            claude_dir.mkdir(parents=True)
 
-            assert meta_skill.exists()
-            assert identify_skill.exists()
-            assert classify_skill.exists()
+            generator = SkillGenerator()
+            adapter = ClaudeAdapter(project_root=project_dir)
+            skill_path = generator.generate_deepwork_skill(adapter, claude_dir)
 
-            # Validate skill content
-            identify_content = identify_skill.read_text()
-            assert "# fruits.identify" in identify_content
-            assert "raw_items" in identify_content
-            assert "identified_fruits.md" in identify_content
+            content = skill_path.read_text()
 
-            classify_content = classify_skill.read_text()
-            assert "# fruits.classify" in classify_content
-            assert "identified_fruits.md" in classify_content
-            assert "classified_fruits.md" in classify_content
+            # Check frontmatter
+            assert "---" in content
+            assert "name: deepwork" in content
 
-    def test_skill_structure_matches_claude_code_expectations(self) -> None:
-        """Test that generated skills have the structure Claude Code expects."""
-        fixtures_dir = Path(__file__).parent.parent / "fixtures" / "jobs" / "fruits"
-        job = parse_job_definition(fixtures_dir)
+            # Check MCP tool references
+            assert "get_workflows" in content
+            assert "start_workflow" in content
+            assert "finished_step" in content
+
+            # Check structure sections
+            assert "# DeepWork" in content
+            assert "MCP" in content
 
+    def test_deepwork_skill_mcp_instructions(self) -> None:
+        """Test that the /deepwork skill properly instructs use of MCP tools."""
         with tempfile.TemporaryDirectory() as tmpdir:
-            skills_dir = Path(tmpdir) / ".claude"
-            skills_dir.mkdir()
+            project_dir = Path(tmpdir)
+            claude_dir = project_dir / ".claude"
+            claude_dir.mkdir(parents=True)
 
             generator = SkillGenerator()
-            adapter = ClaudeAdapter()
-            generator.generate_all_skills(job, adapter, skills_dir)
+            adapter = ClaudeAdapter(project_root=project_dir)
+            skill_path = generator.generate_deepwork_skill(adapter, claude_dir)
+
+            content = skill_path.read_text()
+
+            # Should instruct to use MCP tools, not read files
+            assert "MCP" in content
+            assert "tool" in content.lower()
+
+            # Should describe the workflow execution flow
+            assert "start_workflow" in content
+            assert "finished_step" in content
 
-            # Step skills use directory/SKILL.md format
-            identify_skill = skills_dir / "skills" / "fruits.identify" / "SKILL.md"
-            content = identify_skill.read_text()
 
-            # Claude Code expects specific sections
-            assert "# fruits.identify" in content  # Skill name header
-            assert "## Instructions" in content  # Instructions section
-            assert "## Required Inputs" in content  # Inputs section
-            assert "## Outputs" in content  # Outputs section
+class TestMCPWorkflowTools:
+    """Tests for MCP workflow tools functionality."""
 
-            # Check for user input prompt
-            assert "raw_items" in content
+    @pytest.fixture
+    def project_with_job(self) -> Path:
+        """Create a test project with a job definition."""
+        tmpdir = tempfile.mkdtemp()
+        project_dir = Path(tmpdir)
 
-    def test_dependency_chain_in_skills(self) -> None:
-        """Test that dependency chain is correctly represented in skills."""
+        # Set up project structure
+        deepwork_dir = project_dir / ".deepwork" / "jobs"
+        deepwork_dir.mkdir(parents=True)
+
+        # Copy fruits job fixture
         fixtures_dir = Path(__file__).parent.parent / "fixtures" / "jobs" / "fruits"
-        job = parse_job_definition(fixtures_dir)
+        shutil.copytree(fixtures_dir, deepwork_dir / "fruits")
 
-        with tempfile.TemporaryDirectory() as tmpdir:
-            skills_dir = Path(tmpdir) / ".claude"
-            skills_dir.mkdir()
+        # Initialize git repo
+        subprocess.run(["git", "init"], cwd=project_dir, capture_output=True)
+        subprocess.run(
+            ["git", "config", "user.email", "test@test.com"],
+            cwd=project_dir,
+            capture_output=True,
+        )
+        subprocess.run(
+            ["git", "config", "user.name", "Test"],
+            cwd=project_dir,
+            capture_output=True,
+        )
 
-            generator = SkillGenerator()
-            adapter = ClaudeAdapter()
-            generator.generate_all_skills(job, adapter, skills_dir)
+        # Create README and initial commit
+        (project_dir / "README.md").write_text("# Test Project\n")
+        subprocess.run(["git", "add", "."], cwd=project_dir, capture_output=True)
+        subprocess.run(
+            ["git", "commit", "-m", "init"],
+            cwd=project_dir,
+            capture_output=True,
+        )
+
+        yield project_dir
+
+        # Cleanup
+        shutil.rmtree(tmpdir, ignore_errors=True)
+
+    def test_get_workflows_returns_jobs(self, project_with_job: Path) -> None:
+        """Test that get_workflows returns available jobs and workflows."""
+        state_manager = StateManager(project_with_job)
+        tools = WorkflowTools(project_with_job, state_manager)
+
+        response = tools.get_workflows()
+
+        # Should find the fruits job
+        assert len(response.jobs) >= 1
+        job_names = [job.name for job in response.jobs]
+        assert "fruits" in job_names
+
+        # Find fruits job and check structure
+        fruits_job = next(j for j in response.jobs if j.name == "fruits")
+        assert fruits_job.description is not None
+
+        # The fruits fixture has a "full" workflow
+        assert len(fruits_job.workflows) >= 1
+        full_workflow = fruits_job.workflows[0]
+        assert full_workflow.name == "full"
+
+        # Workflow should contain the steps
+        assert "identify" in full_workflow.steps
+        assert "classify" in full_workflow.steps
+
+    def test_start_workflow_creates_session(self, project_with_job: Path) -> None:
+        """Test that start_workflow creates a new workflow session."""
+        state_manager = StateManager(project_with_job)
+        tools = WorkflowTools(project_with_job, state_manager)
+
+        # Get available workflows first
+        workflows_response = tools.get_workflows()
+        fruits_job = next(j for j in workflows_response.jobs if j.name == "fruits")
 
-            # Step skills use directory/SKILL.md format
-            # First step should have no prerequisites
-            identify_skill = skills_dir / "skills" / "fruits.identify" / "SKILL.md"
-            identify_content = identify_skill.read_text()
-            assert "## Prerequisites" not in identify_content
+        # Should have the "full" workflow
+        assert len(fruits_job.workflows) >= 1
+        workflow_name = fruits_job.workflows[0].name
 
-            # Second step should reference first step
-            classify_skill = skills_dir / "skills" / "fruits.classify" / "SKILL.md"
-            classify_content = classify_skill.read_text()
-            assert "## Prerequisites" in classify_content
-            assert "identify" in classify_content.lower()
+        from deepwork.mcp.schemas import StartWorkflowInput
+
+        input_data = StartWorkflowInput(
+            goal="Test identifying and classifying fruits",
+            job_name="fruits",
+            workflow_name=workflow_name,
+            instance_id="test-instance",
+        )
+
+        response = tools.start_workflow(input_data)
+
+        # Should return session info
+        assert response.session_id is not None
+        assert response.branch_name is not None
+        assert "deepwork" in response.branch_name.lower()
+        assert "fruits" in response.branch_name.lower()
+
+        # Should return first step instructions
+        assert response.current_step_id is not None
+        assert response.step_instructions is not None
+        assert len(response.step_instructions) > 0
+
+    def test_workflow_step_progression(self, project_with_job: Path) -> None:
+        """Test that finished_step progresses through workflow steps."""
+        state_manager = StateManager(project_with_job)
+        tools = WorkflowTools(project_with_job, state_manager)
+
+        # Get workflows and start
+        workflows_response = tools.get_workflows()
+        fruits_job = next(j for j in workflows_response.jobs if j.name == "fruits")
+
+        # Should have the "full" workflow
+        assert len(fruits_job.workflows) >= 1
+        workflow_name = fruits_job.workflows[0].name
+
+        from deepwork.mcp.schemas import FinishedStepInput, StartWorkflowInput
+
+        start_input = StartWorkflowInput(
+            goal="Test workflow progression",
+            job_name="fruits",
+            workflow_name=workflow_name,
+        )
+        start_response = tools.start_workflow(start_input)
+
+        # Create mock output file for first step
+        output_file = project_with_job / "identified_fruits.md"
+        output_file.write_text("# Identified Fruits\n\n- apple\n- banana\n- orange\n")
+
+        # Report first step completion
+        finish_input = FinishedStepInput(
+            outputs=[str(output_file)],
+            notes="Identified fruits from test input",
+        )
+        finish_response = tools.finished_step(finish_input)
+
+        # Should either advance to next step or complete
+        assert finish_response.status in ["next_step", "workflow_complete", "needs_work"]
+
+        if finish_response.status == "next_step":
+            # Should have instructions for next step
+            assert finish_response.step_instructions is not None
+            assert finish_response.next_step_id is not None
 
 
 @pytest.mark.skipif(
     not run_full_e2e(),
     reason="Full e2e requires ANTHROPIC_API_KEY, DEEPWORK_E2E_FULL=true, and claude CLI",
 )
-class TestClaudeCodeExecution:
-    """End-to-end tests that actually execute with Claude Code.
+class TestClaudeCodeMCPExecution:
+    """End-to-end tests that actually execute with Claude Code via MCP.
 
     These tests only run when:
     - ANTHROPIC_API_KEY is set
@@ -182,8 +308,8 @@ class TestClaudeCodeExecution:
     """
 
     @pytest.fixture
-    def project_with_skills(self) -> Path:
-        """Create a test project with generated skills."""
+    def project_with_mcp(self) -> Path:
+        """Create a test project with MCP server configured."""
         tmpdir = tempfile.mkdtemp()
         project_dir = Path(tmpdir)
 
@@ -217,30 +343,38 @@ def project_with_skills(self) -> Path:
             capture_output=True,
         )
 
-        # Generate skills
-        job = parse_job_definition(deepwork_dir / "fruits")
+        # Generate /deepwork skill
         generator = SkillGenerator()
-        adapter = ClaudeAdapter()
+        adapter = ClaudeAdapter(project_root=project_dir)
+
+        claude_dir = project_dir / ".claude"
+        claude_dir.mkdir()
+        generator.generate_deepwork_skill(adapter, claude_dir)
 
-        skills_dir = project_dir / ".claude"
-        skills_dir.mkdir()
-        generator.generate_all_skills(job, adapter, skills_dir)
+        # Register MCP server
+        adapter.register_mcp_server(project_dir)
 
         yield project_dir
 
         # Cleanup
         shutil.rmtree(tmpdir, ignore_errors=True)
 
-    def test_fruits_workflow_execution(self, project_with_skills: Path) -> None:
-        """Test executing the complete fruits workflow with Claude Code.
+    def test_fruits_workflow_via_mcp(self, project_with_mcp: Path) -> None:
+        """Test executing the fruits workflow via MCP tools.
 
-        Invokes /fruits once, which automatically runs all steps (identify + classify).
+        Uses /deepwork skill which instructs Claude to use MCP tools
+        for workflow orchestration.
         """
-        # Run Claude Code with the fruits skill - this executes the full workflow
+        # Run Claude Code with the /deepwork skill
+        # The skill instructs Claude to use MCP tools
         result = subprocess.run(
-            ["claude", "--print", "/fruits"],
-            input=f"raw_items: {TEST_INPUT}",
-            cwd=project_with_skills,
+            [
+                "claude",
+                "--print",
+                f"Use /deepwork to start a fruits workflow. "
+                f"For the identify step, use these items: {TEST_INPUT}",
+            ],
+            cwd=project_with_mcp,
             capture_output=True,
             text=True,
             timeout=300,  # 5 minutes for full workflow
@@ -249,7 +383,7 @@ def test_fruits_workflow_execution(self, project_with_skills: Path) -> None:
         assert result.returncode == 0, f"Claude Code failed: {result.stderr}"
 
         # Verify identify step output was created
-        identify_output = project_with_skills / "identified_fruits.md"
+        identify_output = project_with_mcp / "identified_fruits.md"
         assert identify_output.exists(), "identified_fruits.md was not created"
 
         # Validate identify output content
@@ -260,7 +394,7 @@ def test_fruits_workflow_execution(self, project_with_skills: Path) -> None:
             )
 
         # Verify classify step output was created
-        classify_output = project_with_skills / "classified_fruits.md"
+        classify_output = project_with_mcp / "classified_fruits.md"
         assert classify_output.exists(), "classified_fruits.md was not created"
 
         # Validate classify output has category structure
diff --git a/tests/fixtures/jobs/fruits/job.yml b/tests/fixtures/jobs/fruits/job.yml
index e1ce79a6..01d96994 100644
--- a/tests/fixtures/jobs/fruits/job.yml
+++ b/tests/fixtures/jobs/fruits/job.yml
@@ -15,6 +15,13 @@ changelog:
   - version: "1.0.0"
     changes: "Initial version for CI testing"
 
+workflows:
+  - name: full
+    summary: "Run the complete fruits identification and classification"
+    steps:
+      - identify
+      - classify
+
 steps:
   - id: identify
     name: "Identify Fruits"
diff --git a/tests/integration/test_fruits_workflow.py b/tests/integration/test_fruits_workflow.py
index 8df8d956..9dc868c8 100644
--- a/tests/integration/test_fruits_workflow.py
+++ b/tests/integration/test_fruits_workflow.py
@@ -154,7 +154,7 @@ def test_fruits_classify_skill_content(self, fixtures_dir: Path, temp_dir: Path)
         assert "classified_fruits.md" in content
 
         # Check workflow complete (last step)
-        assert "Workflow complete" in content
+        assert "workflow complete" in content.lower()
 
     def test_fruits_dependency_validation(self, fixtures_dir: Path) -> None:
         """Test that dependency validation passes for fruits job."""
diff --git a/tests/integration/test_install_flow.py b/tests/integration/test_install_flow.py
index 2c800a8c..ec66cfd8 100644
--- a/tests/integration/test_install_flow.py
+++ b/tests/integration/test_install_flow.py
@@ -39,25 +39,13 @@ def test_install_with_claude(self, mock_claude_project: Path) -> None:
         assert config is not None
         assert "claude" in config["platforms"]
 
-        # Verify core skills were created (directory/SKILL.md format)
+        # Verify MCP entry point skill was created (deepwork/SKILL.md)
         claude_dir = mock_claude_project / ".claude" / "skills"
-        # Meta-skill
-        assert (claude_dir / "deepwork_jobs" / "SKILL.md").exists()
-        # Step skill (no prefix, but has user-invocable: false in frontmatter)
-        assert (claude_dir / "deepwork_jobs.define" / "SKILL.md").exists()
-        # Exposed step skill (user-invocable - learn has exposed: true)
-        assert (claude_dir / "deepwork_jobs.learn" / "SKILL.md").exists()
-
-        # Verify meta-skill content
-        meta_skill = (claude_dir / "deepwork_jobs" / "SKILL.md").read_text()
-        assert "# deepwork_jobs" in meta_skill
-        # deepwork_jobs has workflows defined, so it shows "Workflows" instead of "Available Steps"
-        assert "Workflows" in meta_skill or "Available Steps" in meta_skill
-
-        # Verify step skill content
-        define_skill = (claude_dir / "deepwork_jobs.define" / "SKILL.md").read_text()
-        assert "# deepwork_jobs.define" in define_skill
-        assert "Define Job Specification" in define_skill
+        assert (claude_dir / "deepwork" / "SKILL.md").exists()
+
+        # Verify deepwork skill content references MCP tools
+        deepwork_skill = (claude_dir / "deepwork" / "SKILL.md").read_text()
+        assert "deepwork" in deepwork_skill.lower()
 
     def test_install_with_auto_detect(self, mock_claude_project: Path) -> None:
         """Test installing with auto-detection."""
@@ -103,9 +91,9 @@ def test_install_defaults_to_claude_when_no_platform(self, mock_git_repo: Path)
         assert config is not None
         assert "claude" in config["platforms"]
 
-        # Verify skills were created for Claude
+        # Verify MCP entry point skill was created for Claude
         skills_dir = claude_dir / "skills"
-        assert (skills_dir / "deepwork_jobs" / "SKILL.md").exists()
+        assert (skills_dir / "deepwork" / "SKILL.md").exists()
 
     def test_install_with_multiple_platforms_auto_detect(
         self, mock_multi_platform_project: Path
@@ -132,17 +120,13 @@ def test_install_with_multiple_platforms_auto_detect(
         assert "claude" in config["platforms"]
         assert "gemini" in config["platforms"]
 
-        # Verify skills were created for both platforms
+        # Verify MCP entry point skill was created for Claude
         claude_dir = mock_multi_platform_project / ".claude" / "skills"
-        # Meta-skill and step skills (directory/SKILL.md format)
-        assert (claude_dir / "deepwork_jobs" / "SKILL.md").exists()
-        assert (claude_dir / "deepwork_jobs.define" / "SKILL.md").exists()
+        assert (claude_dir / "deepwork" / "SKILL.md").exists()
 
-        # Gemini uses job_name/step_id.toml structure
-        gemini_dir = mock_multi_platform_project / ".gemini" / "skills"
-        # Meta-skill (index.toml) and step skills
-        assert (gemini_dir / "deepwork_jobs" / "index.toml").exists()
-        assert (gemini_dir / "deepwork_jobs" / "define.toml").exists()
+        # Note: Gemini MCP skill template (skill-deepwork) is not yet implemented
+        # so we don't assert on Gemini skill existence - the install will show
+        # an error for Gemini skill generation but continue
 
     def test_install_with_specified_platform_when_missing(self, mock_git_repo: Path) -> None:
         """Test that install fails when specified platform is not present."""
@@ -181,10 +165,8 @@ def test_install_is_idempotent(self, mock_claude_project: Path) -> None:
         assert (deepwork_dir / "config.yml").exists()
 
         claude_dir = mock_claude_project / ".claude" / "skills"
-        # Meta-skill and step skills (directory/SKILL.md format)
-        assert (claude_dir / "deepwork_jobs" / "SKILL.md").exists()
-        assert (claude_dir / "deepwork_jobs.define" / "SKILL.md").exists()
-        assert (claude_dir / "deepwork_jobs.learn" / "SKILL.md").exists()
+        # MCP entry point skill
+        assert (claude_dir / "deepwork" / "SKILL.md").exists()
 
 class TestCLIEntryPoint:
     """Tests for CLI entry point."""

From a3fae18e701116c6122c656ab4266b1f30577d03 Mon Sep 17 00:00:00 2001
From: Noah Horton <noah@unsupervised.com>
Date: Tue, 3 Feb 2026 17:45:42 -0700
Subject: [PATCH 06/45] Cleaned up MCP rules

---
 .deepwork/config.yml                          |   2 +-
 .deepwork/jobs/add_platform/job.yml           |   1 +
 .deepwork/jobs/commit/job.yml                 |   1 +
 .deepwork/jobs/deepwork_jobs/job.yml          |   1 +
 .deepwork/jobs/update/job.yml                 |   1 +
 .deepwork/schemas/job.schema.json             | 347 ++++++++++++++++++
 doc/mcp_interface.md                          | 230 ++++++++++++
 library/jobs/commit/job.yml                   |   1 +
 library/jobs/spec_driven_development/job.yml  |   1 +
 src/deepwork/cli/install.py                   |  45 ++-
 src/deepwork/mcp/schemas.py                   |  52 +--
 src/deepwork/mcp/server.py                    |  12 +-
 src/deepwork/mcp/tools.py                     |  53 ++-
 src/deepwork/schemas/job.schema.json          | 347 ++++++++++++++++++
 src/deepwork/schemas/job_schema.py            | 318 ++--------------
 .../standard_jobs/deepwork_jobs/job.yml       |   1 +
 tests/e2e/test_claude_code_integration.py     |  24 +-
 tests/fixtures/jobs/complex_job/job.yml       |   1 +
 .../jobs/concurrent_steps_job/job.yml         |   1 +
 tests/fixtures/jobs/exposed_step_job/job.yml  |   1 +
 tests/fixtures/jobs/fruits/job.yml            |   1 +
 tests/fixtures/jobs/job_with_doc_spec/job.yml |   1 +
 tests/fixtures/jobs/simple_job/job.yml        |   1 +
 tests/unit/mcp/test_schemas.py                |  79 ++--
 tests/unit/mcp/test_tools.py                  |  22 +-
 25 files changed, 1146 insertions(+), 398 deletions(-)
 create mode 100644 .deepwork/schemas/job.schema.json
 create mode 100644 doc/mcp_interface.md
 create mode 100644 src/deepwork/schemas/job.schema.json

diff --git a/.deepwork/config.yml b/.deepwork/config.yml
index 7187cc6f..06ddbd81 100644
--- a/.deepwork/config.yml
+++ b/.deepwork/config.yml
@@ -3,6 +3,6 @@ platforms:
 - claude
 - gemini
 quality_gate:
-  agent_review_command: "claude -p --output-format json"
+  agent_review_command: claude -p --output-format json
   default_timeout: 120
   default_max_attempts: 3
diff --git a/.deepwork/jobs/add_platform/job.yml b/.deepwork/jobs/add_platform/job.yml
index 80b333dc..4bb1ee52 100644
--- a/.deepwork/jobs/add_platform/job.yml
+++ b/.deepwork/jobs/add_platform/job.yml
@@ -1,3 +1,4 @@
+# yaml-language-server: $schema=.deepwork/schemas/job.schema.json
 name: add_platform
 version: "0.4.0"
 summary: "Adds a new AI platform to DeepWork with adapter, templates, and tests. Use when integrating Cursor, Windsurf, or other AI coding tools."
diff --git a/.deepwork/jobs/commit/job.yml b/.deepwork/jobs/commit/job.yml
index ea0bf955..812475e2 100644
--- a/.deepwork/jobs/commit/job.yml
+++ b/.deepwork/jobs/commit/job.yml
@@ -1,3 +1,4 @@
+# yaml-language-server: $schema=.deepwork/schemas/job.schema.json
 name: commit
 version: "1.5.0"
 summary: "Reviews code, runs tests, lints, and commits changes. Use when ready to commit work with quality checks."
diff --git a/.deepwork/jobs/deepwork_jobs/job.yml b/.deepwork/jobs/deepwork_jobs/job.yml
index 4343cbda..5ee6bf7d 100644
--- a/.deepwork/jobs/deepwork_jobs/job.yml
+++ b/.deepwork/jobs/deepwork_jobs/job.yml
@@ -1,3 +1,4 @@
+# yaml-language-server: $schema=.deepwork/schemas/job.schema.json
 name: deepwork_jobs
 version: "1.0.0"
 summary: "Creates and manages multi-step AI workflows. Use when defining, implementing, or improving DeepWork jobs."
diff --git a/.deepwork/jobs/update/job.yml b/.deepwork/jobs/update/job.yml
index 61b0013e..92c13433 100644
--- a/.deepwork/jobs/update/job.yml
+++ b/.deepwork/jobs/update/job.yml
@@ -1,3 +1,4 @@
+# yaml-language-server: $schema=.deepwork/schemas/job.schema.json
 name: update
 version: "1.3.0"
 summary: "Updates DeepWork standard jobs in src/ and syncs to installed locations. Use when modifying deepwork_jobs."
diff --git a/.deepwork/schemas/job.schema.json b/.deepwork/schemas/job.schema.json
new file mode 100644
index 00000000..1d794f98
--- /dev/null
+++ b/.deepwork/schemas/job.schema.json
@@ -0,0 +1,347 @@
+{
+  "$schema": "http://json-schema.org/draft-07/schema#",
+  "$id": "https://deepwork.dev/schemas/job.schema.json",
+  "title": "DeepWork Job Definition",
+  "description": "Schema for DeepWork job.yml files. Jobs are multi-step workflows executed by AI agents.",
+  "type": "object",
+  "required": ["name", "version", "summary", "steps"],
+  "additionalProperties": false,
+  "properties": {
+    "name": {
+      "type": "string",
+      "pattern": "^[a-z][a-z0-9_]*$",
+      "description": "Job name (lowercase letters, numbers, underscores, must start with letter). Example: 'competitive_research'"
+    },
+    "version": {
+      "type": "string",
+      "pattern": "^\\d+\\.\\d+\\.\\d+$",
+      "description": "Semantic version (e.g., '1.0.0')"
+    },
+    "summary": {
+      "type": "string",
+      "minLength": 1,
+      "maxLength": 200,
+      "description": "Brief one-line summary of what this job accomplishes. Used in skill descriptions."
+    },
+    "description": {
+      "type": "string",
+      "minLength": 1,
+      "description": "Detailed multi-line description of the job's purpose, process, and goals"
+    },
+    "workflows": {
+      "type": "array",
+      "description": "Named workflows that group steps into multi-step sequences. Workflows define execution order.",
+      "items": {
+        "$ref": "#/$defs/workflow"
+      }
+    },
+    "changelog": {
+      "type": "array",
+      "description": "Version history documenting changes to the job definition",
+      "items": {
+        "$ref": "#/$defs/changelogEntry"
+      }
+    },
+    "steps": {
+      "type": "array",
+      "minItems": 1,
+      "description": "List of steps in the job. Each step becomes a skill/command.",
+      "items": {
+        "$ref": "#/$defs/step"
+      }
+    }
+  },
+  "$defs": {
+    "stepId": {
+      "type": "string",
+      "pattern": "^[a-z][a-z0-9_]*$",
+      "description": "Step identifier (lowercase letters, numbers, underscores, must start with letter)"
+    },
+    "workflow": {
+      "type": "object",
+      "required": ["name", "summary", "steps"],
+      "additionalProperties": false,
+      "description": "A named workflow grouping steps into a sequence",
+      "properties": {
+        "name": {
+          "type": "string",
+          "pattern": "^[a-z][a-z0-9_]*$",
+          "description": "Workflow name (lowercase letters, numbers, underscores)"
+        },
+        "summary": {
+          "type": "string",
+          "minLength": 1,
+          "maxLength": 200,
+          "description": "Brief one-line summary of what this workflow accomplishes"
+        },
+        "steps": {
+          "type": "array",
+          "minItems": 1,
+          "description": "Ordered list of step entries. Each entry is either a step ID (string) or an array of step IDs for concurrent execution.",
+          "items": {
+            "$ref": "#/$defs/workflowStepEntry"
+          }
+        }
+      }
+    },
+    "workflowStepEntry": {
+      "oneOf": [
+        {
+          "$ref": "#/$defs/stepId"
+        },
+        {
+          "type": "array",
+          "minItems": 1,
+          "description": "Array of step IDs that can be executed concurrently",
+          "items": {
+            "$ref": "#/$defs/stepId"
+          }
+        }
+      ]
+    },
+    "changelogEntry": {
+      "type": "object",
+      "required": ["version", "changes"],
+      "additionalProperties": false,
+      "properties": {
+        "version": {
+          "type": "string",
+          "pattern": "^\\d+\\.\\d+\\.\\d+$",
+          "description": "Version number for this change"
+        },
+        "changes": {
+          "type": "string",
+          "minLength": 1,
+          "description": "Description of changes made in this version"
+        }
+      }
+    },
+    "step": {
+      "type": "object",
+      "required": ["id", "name", "description", "instructions_file", "outputs"],
+      "additionalProperties": false,
+      "description": "A single step in a job, representing one unit of work",
+      "properties": {
+        "id": {
+          "$ref": "#/$defs/stepId",
+          "description": "Unique step identifier within this job"
+        },
+        "name": {
+          "type": "string",
+          "minLength": 1,
+          "description": "Human-readable display name for the step"
+        },
+        "description": {
+          "type": "string",
+          "minLength": 1,
+          "description": "Description of what this step does. Used in skill descriptions."
+        },
+        "instructions_file": {
+          "type": "string",
+          "minLength": 1,
+          "description": "Path to instructions markdown file (relative to job directory). Example: 'steps/research.md'"
+        },
+        "inputs": {
+          "type": "array",
+          "description": "List of inputs required by this step (user parameters or files from previous steps)",
+          "items": {
+            "$ref": "#/$defs/stepInput"
+          }
+        },
+        "outputs": {
+          "type": "array",
+          "minItems": 1,
+          "description": "List of output files/directories produced by this step",
+          "items": {
+            "$ref": "#/$defs/stepOutput"
+          }
+        },
+        "dependencies": {
+          "type": "array",
+          "description": "List of step IDs this step depends on. Dependencies must complete before this step runs.",
+          "items": {
+            "type": "string"
+          },
+          "default": []
+        },
+        "hooks": {
+          "$ref": "#/$defs/hooks",
+          "description": "Lifecycle hooks for validation and actions at different points in step execution"
+        },
+        "stop_hooks": {
+          "type": "array",
+          "description": "DEPRECATED: Use hooks.after_agent instead. Legacy stop hooks for quality validation loops.",
+          "items": {
+            "$ref": "#/$defs/hookAction"
+          }
+        },
+        "exposed": {
+          "type": "boolean",
+          "description": "If true, step is user-invocable in menus/commands. If false, step is hidden (only reachable via workflows or dependencies). Default: false",
+          "default": false
+        },
+        "hidden": {
+          "type": "boolean",
+          "description": "If true, step is hidden from menus. Alias for exposed: false. Default: false",
+          "default": false
+        },
+        "quality_criteria": {
+          "type": "array",
+          "description": "Declarative quality criteria for evaluating step outputs. Rendered with standard evaluation framing.",
+          "items": {
+            "type": "string",
+            "minLength": 1
+          }
+        },
+        "agent": {
+          "type": "string",
+          "minLength": 1,
+          "description": "Agent type for this step (e.g., 'general-purpose'). When set, the skill uses context forking and delegates to the specified agent type."
+        }
+      }
+    },
+    "stepInput": {
+      "oneOf": [
+        {
+          "$ref": "#/$defs/userParameterInput"
+        },
+        {
+          "$ref": "#/$defs/fileInput"
+        }
+      ]
+    },
+    "userParameterInput": {
+      "type": "object",
+      "required": ["name", "description"],
+      "additionalProperties": false,
+      "description": "A user-provided parameter input that will be requested at runtime",
+      "properties": {
+        "name": {
+          "type": "string",
+          "minLength": 1,
+          "description": "Parameter name (used as variable name)"
+        },
+        "description": {
+          "type": "string",
+          "minLength": 1,
+          "description": "Description shown to user when requesting this input"
+        }
+      }
+    },
+    "fileInput": {
+      "type": "object",
+      "required": ["file", "from_step"],
+      "additionalProperties": false,
+      "description": "A file input from a previous step's output",
+      "properties": {
+        "file": {
+          "type": "string",
+          "minLength": 1,
+          "description": "File name to consume from the source step's outputs"
+        },
+        "from_step": {
+          "type": "string",
+          "minLength": 1,
+          "description": "Step ID that produces this file. Must be in the dependencies list."
+        }
+      }
+    },
+    "stepOutput": {
+      "oneOf": [
+        {
+          "type": "string",
+          "minLength": 1,
+          "description": "Simple output file path (backward compatible format)"
+        },
+        {
+          "$ref": "#/$defs/outputWithDocSpec"
+        }
+      ]
+    },
+    "outputWithDocSpec": {
+      "type": "object",
+      "required": ["file"],
+      "additionalProperties": false,
+      "description": "Output file with optional document specification reference",
+      "properties": {
+        "file": {
+          "type": "string",
+          "minLength": 1,
+          "description": "Output file path"
+        },
+        "doc_spec": {
+          "type": "string",
+          "pattern": "^\\.deepwork/doc_specs/[a-z][a-z0-9_-]*\\.md$",
+          "description": "Path to doc spec file defining the expected document structure. Example: '.deepwork/doc_specs/report.md'"
+        }
+      }
+    },
+    "hooks": {
+      "type": "object",
+      "additionalProperties": false,
+      "description": "Lifecycle hooks triggered at different points in step execution",
+      "properties": {
+        "after_agent": {
+          "type": "array",
+          "description": "Hooks triggered after the agent finishes. Used for quality validation loops.",
+          "items": {
+            "$ref": "#/$defs/hookAction"
+          }
+        },
+        "before_tool": {
+          "type": "array",
+          "description": "Hooks triggered before a tool is used. Used for pre-action checks.",
+          "items": {
+            "$ref": "#/$defs/hookAction"
+          }
+        },
+        "before_prompt": {
+          "type": "array",
+          "description": "Hooks triggered when user submits a prompt. Used for input validation.",
+          "items": {
+            "$ref": "#/$defs/hookAction"
+          }
+        }
+      }
+    },
+    "hookAction": {
+      "type": "object",
+      "description": "A hook action - exactly one of: prompt (inline text), prompt_file (external file), or script (shell script)",
+      "oneOf": [
+        {
+          "required": ["prompt"],
+          "additionalProperties": false,
+          "properties": {
+            "prompt": {
+              "type": "string",
+              "minLength": 1,
+              "description": "Inline prompt text for validation/action"
+            }
+          }
+        },
+        {
+          "required": ["prompt_file"],
+          "additionalProperties": false,
+          "properties": {
+            "prompt_file": {
+              "type": "string",
+              "minLength": 1,
+              "description": "Path to prompt file (relative to job directory)"
+            }
+          }
+        },
+        {
+          "required": ["script"],
+          "additionalProperties": false,
+          "properties": {
+            "script": {
+              "type": "string",
+              "minLength": 1,
+              "description": "Path to shell script (relative to job directory)"
+            }
+          }
+        }
+      ]
+    }
+  }
+}
diff --git a/doc/mcp_interface.md b/doc/mcp_interface.md
new file mode 100644
index 00000000..977fd32b
--- /dev/null
+++ b/doc/mcp_interface.md
@@ -0,0 +1,230 @@
+# DeepWork MCP Interface Documentation
+
+This document describes the Model Context Protocol (MCP) tools exposed by the DeepWork server. AI agents use these tools to discover and execute multi-step workflows.
+
+## Server Information
+
+- **Server Name**: `deepwork`
+- **Transport**: stdio (default) or SSE
+- **Starting the server**: `deepwork serve --path /path/to/project`
+
+## Tools
+
+DeepWork exposes three MCP tools:
+
+### 1. `get_workflows`
+
+List all available DeepWork workflows. Call this first to discover available workflows.
+
+#### Parameters
+
+None.
+
+#### Returns
+
+```typescript
+{
+  jobs: JobInfo[]
+}
+```
+
+Where `JobInfo` is:
+
+```typescript
+interface JobInfo {
+  name: string;              // Job identifier
+  summary: string;           // Short summary of the job
+  description: string | null; // Full description (optional)
+  workflows: WorkflowInfo[];  // Named workflows in the job
+  standalone_steps: StepInfo[]; // Steps not in any workflow
+}
+
+interface WorkflowInfo {
+  name: string;              // Workflow identifier
+  summary: string;           // Short description
+}
+
+interface StepInfo {
+  id: string;                // Step identifier
+  name: string;              // Human-readable step name
+  description: string;       // What the step does
+  dependencies: string[];    // Required prior steps
+}
+
+interface ActiveStepInfo {
+  session_id: string;        // Unique session identifier
+  branch_name: string;       // Git branch for this workflow instance
+  step_id: string;           // ID of the current step
+  step_expected_outputs: string[]; // Expected output files for this step
+  step_quality_criteria: string[]; // Criteria for step completion (if configured)
+  step_instructions: string; // Instructions for the step
+}
+```
+
+---
+
+### 2. `start_workflow`
+
+Start a new workflow session. Creates a git branch, initializes state tracking, and returns the first step's instructions.
+
+#### Parameters
+
+| Parameter | Type | Required | Description |
+|-----------|------|----------|-------------|
+| `goal` | `string` | Yes | What the user wants to accomplish |
+| `job_name` | `string` | Yes | Name of the job |
+| `workflow_name` | `string` | Yes | Name of the workflow within the job |
+| `instance_id` | `string \| null` | No | Optional identifier for naming (e.g., 'acme', 'q1-2026') |
+
+#### Returns
+
+```typescript
+{
+  begin_step: ActiveStepInfo; // Information about the first step to begin
+}
+```
+
+---
+
+### 3. `finished_step`
+
+Report that you've finished a workflow step. Validates outputs against quality criteria (if configured), then returns the next action.
+
+#### Parameters
+
+| Parameter | Type | Required | Description |
+|-----------|------|----------|-------------|
+| `outputs` | `string[]` | Yes | List of output file paths created |
+| `notes` | `string \| null` | No | Optional notes about work done |
+| `quality_review_override_reason` | `string \| null` | No | If provided, skips quality review (must explain why) |
+
+#### Returns
+
+The response varies based on the `status` field:
+
+```typescript
+{
+  status: "needs_work" | "next_step" | "workflow_complete";
+
+  // For status = "needs_work"
+  feedback?: string;                    // Feedback from quality gate
+  failed_criteria?: QualityCriteriaResult[]; // Failed quality criteria
+
+  // For status = "next_step"
+  begin_step?: ActiveStepInfo;         // Information about the next step to begin
+
+  // For status = "workflow_complete"
+  summary?: string;                    // Summary of completed workflow
+  all_outputs?: string[];              // All outputs from all steps
+}
+
+interface QualityCriteriaResult {
+  criterion: string;         // The quality criterion text
+  passed: boolean;           // Whether this criterion passed
+  feedback: string | null;   // Feedback if failed
+}
+```
+
+---
+
+## Status Values
+
+The `finished_step` tool returns one of three statuses:
+
+| Status | Meaning | Next Action |
+|--------|---------|-------------|
+| `needs_work` | Quality criteria not met | Fix issues based on feedback, call `finished_step` again |
+| `next_step` | Step complete, more steps remain | Execute instructions in response, call `finished_step` when done |
+| `workflow_complete` | All steps complete | Workflow is finished |
+
+---
+
+## Workflow Usage Pattern
+
+```
+1. get_workflows()
+   ↓
+   Discover available jobs and workflows
+   ↓
+2. start_workflow(goal, job_name, workflow_name)
+   ↓
+   Get session_id, branch_name, first step instructions
+   ↓
+3. Execute step instructions, create outputs
+   ↓
+4. finished_step(outputs)
+   ↓
+   ├─ status = "needs_work" → Fix issues, goto 4
+   ├─ status = "next_step" → Execute new instructions, goto 4
+   └─ status = "workflow_complete" → Done!
+```
+
+---
+
+## Quality Gates
+
+Steps may define quality criteria that outputs must meet. When `finished_step` is called:
+
+1. If the step has quality criteria and a quality gate agent is configured, outputs are evaluated
+2. If any criteria fail, `status = "needs_work"` with feedback
+3. If all criteria pass (or no criteria defined), workflow advances
+
+To skip quality review (use sparingly):
+- Provide `quality_review_override_reason` explaining why review is unnecessary
+
+---
+
+## Configuration
+
+The MCP server is configured via `.deepwork/config.yml`:
+
+```yaml
+version: "1.0"
+platforms:
+  - claude
+
+# Quality gate configuration (optional)
+quality_gate:
+  agent_review_command: "claude --print"  # Command to run quality gate agent
+  default_timeout: 120                     # Timeout in seconds
+  default_max_attempts: 3                  # Max attempts before failing
+```
+
+---
+
+## Server CLI Options
+
+```bash
+deepwork serve [OPTIONS]
+
+Options:
+  --path PATH        Project root directory (default: current directory)
+  --quality-gate CMD Command for quality gate agent (overrides config)
+  --transport TYPE   Transport type: stdio or sse (default: stdio)
+  --port PORT        Port for SSE transport (default: 8000)
+```
+
+---
+
+## Example MCP Configuration
+
+Add to your `.mcp.json`:
+
+```json
+{
+  "mcpServers": {
+    "deepwork": {
+      "command": "deepwork",
+      "args": ["serve", "--path", "."]
+    }
+  }
+}
+```
+
+---
+
+## Changelog
+
+| Version | Changes |
+|---------|---------|
+| 1.0.0 | Initial MCP interface with `get_workflows`, `start_workflow`, `finished_step` |
diff --git a/library/jobs/commit/job.yml b/library/jobs/commit/job.yml
index ebf4575c..9a7d7491 100644
--- a/library/jobs/commit/job.yml
+++ b/library/jobs/commit/job.yml
@@ -1,3 +1,4 @@
+# yaml-language-server: $schema=.deepwork/schemas/job.schema.json
 name: commit
 version: "1.0.0"
 summary: "Reviews code, runs tests, lints, and commits changes. Use when ready to commit work with quality checks."
diff --git a/library/jobs/spec_driven_development/job.yml b/library/jobs/spec_driven_development/job.yml
index 0fd25616..91ab743b 100644
--- a/library/jobs/spec_driven_development/job.yml
+++ b/library/jobs/spec_driven_development/job.yml
@@ -1,3 +1,4 @@
+# yaml-language-server: $schema=.deepwork/schemas/job.schema.json
 name: spec_driven_development
 version: "1.0.0"
 summary: "Spec-driven development workflow that turns specifications into working implementations through structured planning."
diff --git a/src/deepwork/cli/install.py b/src/deepwork/cli/install.py
index 9fb1de10..6f9daaee 100644
--- a/src/deepwork/cli/install.py
+++ b/src/deepwork/cli/install.py
@@ -21,6 +21,39 @@ class InstallError(Exception):
     pass
 
 
+def _install_schemas(schemas_dir: Path, project_path: Path) -> None:
+    """
+    Install JSON schemas to the project's .deepwork/schemas directory.
+
+    Args:
+        schemas_dir: Path to .deepwork/schemas directory
+        project_path: Path to project root (for relative path display)
+
+    Raises:
+        InstallError: If installation fails
+    """
+    # Find the source schemas directory
+    source_schemas_dir = Path(__file__).parent.parent / "schemas"
+
+    if not source_schemas_dir.exists():
+        raise InstallError(
+            f"Schemas directory not found at {source_schemas_dir}. "
+            "DeepWork installation may be corrupted."
+        )
+
+    # Copy JSON schema files
+    try:
+        for schema_file in source_schemas_dir.glob("*.json"):
+            target_file = schemas_dir / schema_file.name
+            shutil.copy(schema_file, target_file)
+            fix_permissions(target_file)
+            console.print(
+                f"  [green]✓[/green] Installed schema {schema_file.name} ({target_file.relative_to(project_path)})"
+            )
+    except Exception as e:
+        raise InstallError(f"Failed to install schemas: {e}") from e
+
+
 def _inject_standard_job(job_name: str, jobs_dir: Path, project_path: Path) -> None:
     """
     Inject a standard job definition into the project.
@@ -249,20 +282,26 @@ def _install_deepwork(platform_name: str | None, project_path: Path) -> None:
     deepwork_dir = project_path / ".deepwork"
     jobs_dir = deepwork_dir / "jobs"
     doc_specs_dir = deepwork_dir / "doc_specs"
+    schemas_dir = deepwork_dir / "schemas"
     ensure_dir(deepwork_dir)
     ensure_dir(jobs_dir)
     ensure_dir(doc_specs_dir)
+    ensure_dir(schemas_dir)
     console.print(f"  [green]✓[/green] Created {deepwork_dir.relative_to(project_path)}/")
 
-    # Step 3b: Inject standard jobs (core job definitions)
+    # Step 3b: Install schemas
+    console.print("[yellow]→[/yellow] Installing schemas...")
+    _install_schemas(schemas_dir, project_path)
+
+    # Step 3c: Inject standard jobs (core job definitions)
     console.print("[yellow]→[/yellow] Installing core job definitions...")
     _inject_deepwork_jobs(jobs_dir, project_path)
 
-    # Step 3c: Create .gitignore for temporary files
+    # Step 3d: Create .gitignore for temporary files
     _create_deepwork_gitignore(deepwork_dir)
     console.print("  [green]✓[/green] Created .deepwork/.gitignore")
 
-    # Step 3d: Create tmp directory with .gitkeep file for version control
+    # Step 3e: Create tmp directory with .gitkeep file for version control
     _create_tmp_directory(deepwork_dir)
     console.print("  [green]✓[/green] Created .deepwork/tmp/.gitkeep")
 
diff --git a/src/deepwork/mcp/schemas.py b/src/deepwork/mcp/schemas.py
index 07b1e622..069a9c24 100644
--- a/src/deepwork/mcp/schemas.py
+++ b/src/deepwork/mcp/schemas.py
@@ -1,4 +1,10 @@
-"""Pydantic models for MCP tool inputs and outputs."""
+"""Pydantic models for MCP tool inputs and outputs.
+
+IMPORTANT: If you modify any models in this file that affect the MCP tool
+interfaces (input models, output models, or their fields), you MUST also
+update the documentation in doc/mcp_interface.md to keep it in sync with
+the implementation.
+"""
 
 from enum import Enum
 from typing import Any
@@ -20,6 +26,8 @@ class StepStatus(str, Enum):
 
 # =============================================================================
 # Workflow Info Models
+# NOTE: These models are returned by get_workflows tool.
+#       Update doc/mcp_interface.md when modifying.
 # =============================================================================
 
 
@@ -53,11 +61,6 @@ class WorkflowInfo(BaseModel):
 
     name: str = Field(description="Workflow identifier")
     summary: str = Field(description="Short description of workflow")
-    steps: list[str] = Field(description="Flattened list of step IDs in order")
-    step_entries: list[WorkflowStepEntryInfo] = Field(
-        description="Step entries (sequential or concurrent)"
-    )
-    first_step: str = Field(description="First step ID to start workflow")
 
 
 class JobInfo(BaseModel):
@@ -74,6 +77,8 @@ class JobInfo(BaseModel):
 
 # =============================================================================
 # Tool Input Models
+# NOTE: Changes to these models affect MCP tool parameters.
+#       Update doc/mcp_interface.md when modifying.
 # =============================================================================
 
 
@@ -125,9 +130,24 @@ class QualityGateResult(BaseModel):
 
 # =============================================================================
 # Tool Output Models
+# NOTE: Changes to these models affect MCP tool return types.
+#       Update doc/mcp_interface.md when modifying.
 # =============================================================================
 
 
+class ActiveStepInfo(BaseModel):
+    """Information about the step to begin working on."""
+
+    session_id: str = Field(description="Unique session identifier")
+    branch_name: str = Field(description="Git branch for this workflow instance")
+    step_id: str = Field(description="ID of the current step")
+    step_expected_outputs: list[str] = Field(description="Expected output files for this step")
+    step_quality_criteria: list[str] = Field(
+        default_factory=list, description="Criteria for step completion"
+    )
+    step_instructions: str = Field(description="Instructions for the step")
+
+
 class GetWorkflowsResponse(BaseModel):
     """Response from get_workflows tool."""
 
@@ -137,14 +157,7 @@ class GetWorkflowsResponse(BaseModel):
 class StartWorkflowResponse(BaseModel):
     """Response from start_workflow tool."""
 
-    session_id: str = Field(description="Unique session identifier")
-    branch_name: str = Field(description="Git branch for this workflow instance")
-    current_step_id: str = Field(description="ID of the current step")
-    step_instructions: str = Field(description="Instructions for the first step")
-    step_outputs: list[str] = Field(description="Expected output files for this step")
-    quality_criteria: list[str] = Field(
-        default_factory=list, description="Criteria for step completion"
-    )
+    begin_step: ActiveStepInfo = Field(description="Information about the first step to begin")
 
 
 class FinishedStepResponse(BaseModel):
@@ -159,15 +172,8 @@ class FinishedStepResponse(BaseModel):
     )
 
     # For next_step status
-    next_step_id: str | None = Field(default=None, description="ID of next step")
-    step_instructions: str | None = Field(
-        default=None, description="Instructions for next step"
-    )
-    step_outputs: list[str] | None = Field(
-        default=None, description="Expected outputs for next step"
-    )
-    quality_criteria: list[str] | None = Field(
-        default=None, description="Criteria for next step"
+    begin_step: ActiveStepInfo | None = Field(
+        default=None, description="Information about the next step to begin"
     )
 
     # For workflow_complete status
diff --git a/src/deepwork/mcp/server.py b/src/deepwork/mcp/server.py
index 5f7f943d..fde5e606 100644
--- a/src/deepwork/mcp/server.py
+++ b/src/deepwork/mcp/server.py
@@ -5,6 +5,10 @@
 
 Usage:
     deepwork serve --path /path/to/project
+
+IMPORTANT: If you modify any tool signatures, parameters, or return types in this
+file, you MUST also update the documentation in doc/mcp_interface.md to keep it
+in sync with the implementation.
 """
 
 from __future__ import annotations
@@ -65,7 +69,13 @@ def create_server(
         instructions=_get_server_instructions(),
     )
 
-    # Register tools
+    # =========================================================================
+    # MCP Tool Registrations
+    # =========================================================================
+    # IMPORTANT: When modifying these tool signatures (parameters, return types,
+    # descriptions), update doc/mcp_interface.md to keep documentation in sync.
+    # =========================================================================
+
     @mcp.tool(
         description=(
             "List all available DeepWork workflows. "
diff --git a/src/deepwork/mcp/tools.py b/src/deepwork/mcp/tools.py
index 998106ef..29265070 100644
--- a/src/deepwork/mcp/tools.py
+++ b/src/deepwork/mcp/tools.py
@@ -13,6 +13,7 @@
 
 from deepwork.core.parser import JobDefinition, ParseError, Workflow, parse_job_definition
 from deepwork.mcp.schemas import (
+    ActiveStepInfo,
     FinishedStepInput,
     FinishedStepResponse,
     GetWorkflowsResponse,
@@ -22,7 +23,6 @@
     StepInfo,
     StepStatus,
     WorkflowInfo,
-    WorkflowStepEntryInfo,
 )
 from deepwork.mcp.state import StateManager
 
@@ -98,21 +98,10 @@ def _job_to_info(self, job: JobDefinition) -> JobInfo:
         for wf in job.workflows:
             workflow_step_ids.update(wf.steps)
 
-            step_entries = [
-                WorkflowStepEntryInfo(
-                    step_ids=entry.step_ids,
-                    is_concurrent=entry.is_concurrent,
-                )
-                for entry in wf.step_entries
-            ]
-
             workflows.append(
                 WorkflowInfo(
                     name=wf.name,
                     summary=wf.summary,
-                    steps=wf.steps,
-                    step_entries=step_entries,
-                    first_step=wf.steps[0] if wf.steps else "",
                 )
             )
 
@@ -263,12 +252,14 @@ def start_workflow(self, input_data: StartWorkflowInput) -> StartWorkflowRespons
         step_outputs = [out.file for out in first_step.outputs]
 
         return StartWorkflowResponse(
-            session_id=session.session_id,
-            branch_name=session.branch_name,
-            current_step_id=first_step_id,
-            step_instructions=instructions,
-            step_outputs=step_outputs,
-            quality_criteria=first_step.quality_criteria,
+            begin_step=ActiveStepInfo(
+                session_id=session.session_id,
+                branch_name=session.branch_name,
+                step_id=first_step_id,
+                step_expected_outputs=step_outputs,
+                step_quality_criteria=first_step.quality_criteria,
+                step_instructions=instructions,
+            )
         )
 
     def finished_step(self, input_data: FinishedStepInput) -> FinishedStepResponse:
@@ -368,15 +359,6 @@ def finished_step(self, input_data: FinishedStepInput) -> FinishedStepResponse:
         instructions = self._get_step_instructions(job, next_step_id)
         step_outputs = [out.file for out in next_step.outputs]
 
-        # Build response with concurrent step info if applicable
-        response = FinishedStepResponse(
-            status=StepStatus.NEXT_STEP,
-            next_step_id=next_step_id,
-            step_instructions=instructions,
-            step_outputs=step_outputs,
-            quality_criteria=next_step.quality_criteria,
-        )
-
         # Add info about concurrent steps if this is a concurrent entry
         if next_entry.is_concurrent and len(next_entry.step_ids) > 1:
             concurrent_info = (
@@ -384,6 +366,19 @@ def finished_step(self, input_data: FinishedStepInput) -> FinishedStepResponse:
                 f"steps that can run in parallel: {', '.join(next_entry.step_ids)}\n"
                 f"Use the Task tool to execute them concurrently."
             )
-            response.step_instructions = instructions + concurrent_info
+            instructions = instructions + concurrent_info
+
+        # Reload session to get current state after advance
+        session = self.state_manager.require_active_session()
 
-        return response
+        return FinishedStepResponse(
+            status=StepStatus.NEXT_STEP,
+            begin_step=ActiveStepInfo(
+                session_id=session.session_id,
+                branch_name=session.branch_name,
+                step_id=next_step_id,
+                step_expected_outputs=step_outputs,
+                step_quality_criteria=next_step.quality_criteria,
+                step_instructions=instructions,
+            ),
+        )
diff --git a/src/deepwork/schemas/job.schema.json b/src/deepwork/schemas/job.schema.json
new file mode 100644
index 00000000..1d794f98
--- /dev/null
+++ b/src/deepwork/schemas/job.schema.json
@@ -0,0 +1,347 @@
+{
+  "$schema": "http://json-schema.org/draft-07/schema#",
+  "$id": "https://deepwork.dev/schemas/job.schema.json",
+  "title": "DeepWork Job Definition",
+  "description": "Schema for DeepWork job.yml files. Jobs are multi-step workflows executed by AI agents.",
+  "type": "object",
+  "required": ["name", "version", "summary", "steps"],
+  "additionalProperties": false,
+  "properties": {
+    "name": {
+      "type": "string",
+      "pattern": "^[a-z][a-z0-9_]*$",
+      "description": "Job name (lowercase letters, numbers, underscores, must start with letter). Example: 'competitive_research'"
+    },
+    "version": {
+      "type": "string",
+      "pattern": "^\\d+\\.\\d+\\.\\d+$",
+      "description": "Semantic version (e.g., '1.0.0')"
+    },
+    "summary": {
+      "type": "string",
+      "minLength": 1,
+      "maxLength": 200,
+      "description": "Brief one-line summary of what this job accomplishes. Used in skill descriptions."
+    },
+    "description": {
+      "type": "string",
+      "minLength": 1,
+      "description": "Detailed multi-line description of the job's purpose, process, and goals"
+    },
+    "workflows": {
+      "type": "array",
+      "description": "Named workflows that group steps into multi-step sequences. Workflows define execution order.",
+      "items": {
+        "$ref": "#/$defs/workflow"
+      }
+    },
+    "changelog": {
+      "type": "array",
+      "description": "Version history documenting changes to the job definition",
+      "items": {
+        "$ref": "#/$defs/changelogEntry"
+      }
+    },
+    "steps": {
+      "type": "array",
+      "minItems": 1,
+      "description": "List of steps in the job. Each step becomes a skill/command.",
+      "items": {
+        "$ref": "#/$defs/step"
+      }
+    }
+  },
+  "$defs": {
+    "stepId": {
+      "type": "string",
+      "pattern": "^[a-z][a-z0-9_]*$",
+      "description": "Step identifier (lowercase letters, numbers, underscores, must start with letter)"
+    },
+    "workflow": {
+      "type": "object",
+      "required": ["name", "summary", "steps"],
+      "additionalProperties": false,
+      "description": "A named workflow grouping steps into a sequence",
+      "properties": {
+        "name": {
+          "type": "string",
+          "pattern": "^[a-z][a-z0-9_]*$",
+          "description": "Workflow name (lowercase letters, numbers, underscores)"
+        },
+        "summary": {
+          "type": "string",
+          "minLength": 1,
+          "maxLength": 200,
+          "description": "Brief one-line summary of what this workflow accomplishes"
+        },
+        "steps": {
+          "type": "array",
+          "minItems": 1,
+          "description": "Ordered list of step entries. Each entry is either a step ID (string) or an array of step IDs for concurrent execution.",
+          "items": {
+            "$ref": "#/$defs/workflowStepEntry"
+          }
+        }
+      }
+    },
+    "workflowStepEntry": {
+      "oneOf": [
+        {
+          "$ref": "#/$defs/stepId"
+        },
+        {
+          "type": "array",
+          "minItems": 1,
+          "description": "Array of step IDs that can be executed concurrently",
+          "items": {
+            "$ref": "#/$defs/stepId"
+          }
+        }
+      ]
+    },
+    "changelogEntry": {
+      "type": "object",
+      "required": ["version", "changes"],
+      "additionalProperties": false,
+      "properties": {
+        "version": {
+          "type": "string",
+          "pattern": "^\\d+\\.\\d+\\.\\d+$",
+          "description": "Version number for this change"
+        },
+        "changes": {
+          "type": "string",
+          "minLength": 1,
+          "description": "Description of changes made in this version"
+        }
+      }
+    },
+    "step": {
+      "type": "object",
+      "required": ["id", "name", "description", "instructions_file", "outputs"],
+      "additionalProperties": false,
+      "description": "A single step in a job, representing one unit of work",
+      "properties": {
+        "id": {
+          "$ref": "#/$defs/stepId",
+          "description": "Unique step identifier within this job"
+        },
+        "name": {
+          "type": "string",
+          "minLength": 1,
+          "description": "Human-readable display name for the step"
+        },
+        "description": {
+          "type": "string",
+          "minLength": 1,
+          "description": "Description of what this step does. Used in skill descriptions."
+        },
+        "instructions_file": {
+          "type": "string",
+          "minLength": 1,
+          "description": "Path to instructions markdown file (relative to job directory). Example: 'steps/research.md'"
+        },
+        "inputs": {
+          "type": "array",
+          "description": "List of inputs required by this step (user parameters or files from previous steps)",
+          "items": {
+            "$ref": "#/$defs/stepInput"
+          }
+        },
+        "outputs": {
+          "type": "array",
+          "minItems": 1,
+          "description": "List of output files/directories produced by this step",
+          "items": {
+            "$ref": "#/$defs/stepOutput"
+          }
+        },
+        "dependencies": {
+          "type": "array",
+          "description": "List of step IDs this step depends on. Dependencies must complete before this step runs.",
+          "items": {
+            "type": "string"
+          },
+          "default": []
+        },
+        "hooks": {
+          "$ref": "#/$defs/hooks",
+          "description": "Lifecycle hooks for validation and actions at different points in step execution"
+        },
+        "stop_hooks": {
+          "type": "array",
+          "description": "DEPRECATED: Use hooks.after_agent instead. Legacy stop hooks for quality validation loops.",
+          "items": {
+            "$ref": "#/$defs/hookAction"
+          }
+        },
+        "exposed": {
+          "type": "boolean",
+          "description": "If true, step is user-invocable in menus/commands. If false, step is hidden (only reachable via workflows or dependencies). Default: false",
+          "default": false
+        },
+        "hidden": {
+          "type": "boolean",
+          "description": "If true, step is hidden from menus. Alias for exposed: false. Default: false",
+          "default": false
+        },
+        "quality_criteria": {
+          "type": "array",
+          "description": "Declarative quality criteria for evaluating step outputs. Rendered with standard evaluation framing.",
+          "items": {
+            "type": "string",
+            "minLength": 1
+          }
+        },
+        "agent": {
+          "type": "string",
+          "minLength": 1,
+          "description": "Agent type for this step (e.g., 'general-purpose'). When set, the skill uses context forking and delegates to the specified agent type."
+        }
+      }
+    },
+    "stepInput": {
+      "oneOf": [
+        {
+          "$ref": "#/$defs/userParameterInput"
+        },
+        {
+          "$ref": "#/$defs/fileInput"
+        }
+      ]
+    },
+    "userParameterInput": {
+      "type": "object",
+      "required": ["name", "description"],
+      "additionalProperties": false,
+      "description": "A user-provided parameter input that will be requested at runtime",
+      "properties": {
+        "name": {
+          "type": "string",
+          "minLength": 1,
+          "description": "Parameter name (used as variable name)"
+        },
+        "description": {
+          "type": "string",
+          "minLength": 1,
+          "description": "Description shown to user when requesting this input"
+        }
+      }
+    },
+    "fileInput": {
+      "type": "object",
+      "required": ["file", "from_step"],
+      "additionalProperties": false,
+      "description": "A file input from a previous step's output",
+      "properties": {
+        "file": {
+          "type": "string",
+          "minLength": 1,
+          "description": "File name to consume from the source step's outputs"
+        },
+        "from_step": {
+          "type": "string",
+          "minLength": 1,
+          "description": "Step ID that produces this file. Must be in the dependencies list."
+        }
+      }
+    },
+    "stepOutput": {
+      "oneOf": [
+        {
+          "type": "string",
+          "minLength": 1,
+          "description": "Simple output file path (backward compatible format)"
+        },
+        {
+          "$ref": "#/$defs/outputWithDocSpec"
+        }
+      ]
+    },
+    "outputWithDocSpec": {
+      "type": "object",
+      "required": ["file"],
+      "additionalProperties": false,
+      "description": "Output file with optional document specification reference",
+      "properties": {
+        "file": {
+          "type": "string",
+          "minLength": 1,
+          "description": "Output file path"
+        },
+        "doc_spec": {
+          "type": "string",
+          "pattern": "^\\.deepwork/doc_specs/[a-z][a-z0-9_-]*\\.md$",
+          "description": "Path to doc spec file defining the expected document structure. Example: '.deepwork/doc_specs/report.md'"
+        }
+      }
+    },
+    "hooks": {
+      "type": "object",
+      "additionalProperties": false,
+      "description": "Lifecycle hooks triggered at different points in step execution",
+      "properties": {
+        "after_agent": {
+          "type": "array",
+          "description": "Hooks triggered after the agent finishes. Used for quality validation loops.",
+          "items": {
+            "$ref": "#/$defs/hookAction"
+          }
+        },
+        "before_tool": {
+          "type": "array",
+          "description": "Hooks triggered before a tool is used. Used for pre-action checks.",
+          "items": {
+            "$ref": "#/$defs/hookAction"
+          }
+        },
+        "before_prompt": {
+          "type": "array",
+          "description": "Hooks triggered when user submits a prompt. Used for input validation.",
+          "items": {
+            "$ref": "#/$defs/hookAction"
+          }
+        }
+      }
+    },
+    "hookAction": {
+      "type": "object",
+      "description": "A hook action - exactly one of: prompt (inline text), prompt_file (external file), or script (shell script)",
+      "oneOf": [
+        {
+          "required": ["prompt"],
+          "additionalProperties": false,
+          "properties": {
+            "prompt": {
+              "type": "string",
+              "minLength": 1,
+              "description": "Inline prompt text for validation/action"
+            }
+          }
+        },
+        {
+          "required": ["prompt_file"],
+          "additionalProperties": false,
+          "properties": {
+            "prompt_file": {
+              "type": "string",
+              "minLength": 1,
+              "description": "Path to prompt file (relative to job directory)"
+            }
+          }
+        },
+        {
+          "required": ["script"],
+          "additionalProperties": false,
+          "properties": {
+            "script": {
+              "type": "string",
+              "minLength": 1,
+              "description": "Path to shell script (relative to job directory)"
+            }
+          }
+        }
+      ]
+    }
+  }
+}
diff --git a/src/deepwork/schemas/job_schema.py b/src/deepwork/schemas/job_schema.py
index e29b852c..c3c0cb57 100644
--- a/src/deepwork/schemas/job_schema.py
+++ b/src/deepwork/schemas/job_schema.py
@@ -1,307 +1,35 @@
-"""JSON Schema definition for job definitions."""
+"""JSON Schema loader for job definitions.
 
+This module loads the job.schema.json file and provides it as a Python dict
+for use with jsonschema validation.
+"""
+
+import json
+from pathlib import Path
 from typing import Any
 
 # Supported lifecycle hook events (generic names, mapped to platform-specific by adapters)
 # These values must match SkillLifecycleHook enum in adapters.py
 LIFECYCLE_HOOK_EVENTS = ["after_agent", "before_tool", "before_prompt"]
 
-# Schema definition for a single hook action (prompt, prompt_file, or script)
-HOOK_ACTION_SCHEMA: dict[str, Any] = {
-    "type": "object",
-    "oneOf": [
-        {
-            "required": ["prompt"],
-            "properties": {
-                "prompt": {
-                    "type": "string",
-                    "minLength": 1,
-                    "description": "Inline prompt for validation/action",
-                },
-            },
-            "additionalProperties": False,
-        },
-        {
-            "required": ["prompt_file"],
-            "properties": {
-                "prompt_file": {
-                    "type": "string",
-                    "minLength": 1,
-                    "description": "Path to prompt file (relative to job directory)",
-                },
-            },
-            "additionalProperties": False,
-        },
-        {
-            "required": ["script"],
-            "properties": {
-                "script": {
-                    "type": "string",
-                    "minLength": 1,
-                    "description": "Path to shell script (relative to job directory)",
-                },
-            },
-            "additionalProperties": False,
-        },
-    ],
-}
+# Path to the JSON schema file
+_SCHEMA_FILE = Path(__file__).parent / "job.schema.json"
+
+
+def _load_schema() -> dict[str, Any]:
+    """Load the JSON schema from file."""
+    with open(_SCHEMA_FILE) as f:
+        return json.load(f)
 
-# Schema for a single step reference (step ID)
-STEP_ID_SCHEMA: dict[str, Any] = {
-    "type": "string",
-    "pattern": "^[a-z][a-z0-9_]*$",
-}
 
-# Schema for a concurrent step group (array of step IDs that can run in parallel)
-# minItems=1 allows single-item arrays to indicate a step with multiple parallel instances
-# (e.g., [fetch_campaign_data] means run this step for each campaign in parallel)
-CONCURRENT_STEPS_SCHEMA: dict[str, Any] = {
-    "type": "array",
-    "minItems": 1,
-    "description": "Array of step IDs that can be executed concurrently, or single step with multiple instances",
-    "items": STEP_ID_SCHEMA,
-}
+# Load the schema at module import time
+JOB_SCHEMA: dict[str, Any] = _load_schema()
 
-# Schema for a workflow step entry (either single step or concurrent group)
-WORKFLOW_STEP_ENTRY_SCHEMA: dict[str, Any] = {
-    "oneOf": [
-        STEP_ID_SCHEMA,
-        CONCURRENT_STEPS_SCHEMA,
-    ],
-}
 
-# Schema for a workflow definition
-WORKFLOW_SCHEMA: dict[str, Any] = {
-    "type": "object",
-    "required": ["name", "summary", "steps"],
-    "properties": {
-        "name": {
-            "type": "string",
-            "pattern": "^[a-z][a-z0-9_]*$",
-            "description": "Workflow name (lowercase letters, numbers, underscores)",
-        },
-        "summary": {
-            "type": "string",
-            "minLength": 1,
-            "maxLength": 200,
-            "description": "Brief one-line summary of what this workflow accomplishes",
-        },
-        "steps": {
-            "type": "array",
-            "minItems": 1,
-            "description": "Ordered list of step entries. Each entry is either a step ID (string) or an array of step IDs for concurrent execution.",
-            "items": WORKFLOW_STEP_ENTRY_SCHEMA,
-        },
-    },
-    "additionalProperties": False,
-}
+def get_schema_path() -> Path:
+    """Get the path to the JSON schema file.
 
-# JSON Schema for job.yml files
-JOB_SCHEMA: dict[str, Any] = {
-    "$schema": "http://json-schema.org/draft-07/schema#",
-    "type": "object",
-    "required": ["name", "version", "summary", "steps"],
-    "properties": {
-        "name": {
-            "type": "string",
-            "pattern": "^[a-z][a-z0-9_]*$",
-            "description": "Job name (lowercase letters, numbers, underscores, must start with letter)",
-        },
-        "version": {
-            "type": "string",
-            "pattern": r"^\d+\.\d+\.\d+$",
-            "description": "Semantic version (e.g., 1.0.0)",
-        },
-        "summary": {
-            "type": "string",
-            "minLength": 1,
-            "maxLength": 200,
-            "description": "Brief one-line summary of what this job accomplishes",
-        },
-        "description": {
-            "type": "string",
-            "minLength": 1,
-            "description": "Detailed multi-line description of the job's purpose, process, and goals",
-        },
-        "workflows": {
-            "type": "array",
-            "description": "Named workflows that group steps into multi-step sequences",
-            "items": WORKFLOW_SCHEMA,
-        },
-        "changelog": {
-            "type": "array",
-            "description": "Version history and changes to the job",
-            "items": {
-                "type": "object",
-                "required": ["version", "changes"],
-                "properties": {
-                    "version": {
-                        "type": "string",
-                        "pattern": r"^\d+\.\d+\.\d+$",
-                        "description": "Version number for this change",
-                    },
-                    "changes": {
-                        "type": "string",
-                        "minLength": 1,
-                        "description": "Description of changes made in this version",
-                    },
-                },
-                "additionalProperties": False,
-            },
-        },
-        "steps": {
-            "type": "array",
-            "minItems": 1,
-            "description": "List of steps in the job",
-            "items": {
-                "type": "object",
-                "required": ["id", "name", "description", "instructions_file", "outputs"],
-                "properties": {
-                    "id": {
-                        "type": "string",
-                        "pattern": "^[a-z][a-z0-9_]*$",
-                        "description": "Step ID (unique within job)",
-                    },
-                    "name": {
-                        "type": "string",
-                        "minLength": 1,
-                        "description": "Human-readable step name",
-                    },
-                    "description": {
-                        "type": "string",
-                        "minLength": 1,
-                        "description": "Step description",
-                    },
-                    "instructions_file": {
-                        "type": "string",
-                        "minLength": 1,
-                        "description": "Path to instructions file (relative to job directory)",
-                    },
-                    "inputs": {
-                        "type": "array",
-                        "description": "List of inputs (user parameters or files from previous steps)",
-                        "items": {
-                            "type": "object",
-                            "oneOf": [
-                                {
-                                    "required": ["name", "description"],
-                                    "properties": {
-                                        "name": {
-                                            "type": "string",
-                                            "description": "Input parameter name",
-                                        },
-                                        "description": {
-                                            "type": "string",
-                                            "description": "Input parameter description",
-                                        },
-                                    },
-                                    "additionalProperties": False,
-                                },
-                                {
-                                    "required": ["file", "from_step"],
-                                    "properties": {
-                                        "file": {
-                                            "type": "string",
-                                            "description": "File name from previous step",
-                                        },
-                                        "from_step": {
-                                            "type": "string",
-                                            "description": "Step ID that produces this file",
-                                        },
-                                    },
-                                    "additionalProperties": False,
-                                },
-                            ],
-                        },
-                    },
-                    "outputs": {
-                        "type": "array",
-                        "description": "List of output files/directories, optionally with document type references",
-                        "items": {
-                            "oneOf": [
-                                {
-                                    "type": "string",
-                                    "minLength": 1,
-                                    "description": "Simple output file path (backward compatible)",
-                                },
-                                {
-                                    "type": "object",
-                                    "required": ["file"],
-                                    "properties": {
-                                        "file": {
-                                            "type": "string",
-                                            "minLength": 1,
-                                            "description": "Output file path",
-                                        },
-                                        "doc_spec": {
-                                            "type": "string",
-                                            "pattern": r"^\.deepwork/doc_specs/[a-z][a-z0-9_-]*\.md$",
-                                            "description": "Path to doc spec file",
-                                        },
-                                    },
-                                    "additionalProperties": False,
-                                },
-                            ],
-                        },
-                    },
-                    "dependencies": {
-                        "type": "array",
-                        "description": "List of step IDs this step depends on",
-                        "items": {
-                            "type": "string",
-                        },
-                        "default": [],
-                    },
-                    "hooks": {
-                        "type": "object",
-                        "description": "Lifecycle hooks for this step, keyed by event type",
-                        "properties": {
-                            "after_agent": {
-                                "type": "array",
-                                "description": "Hooks triggered after the agent finishes (quality validation)",
-                                "items": HOOK_ACTION_SCHEMA,
-                            },
-                            "before_tool": {
-                                "type": "array",
-                                "description": "Hooks triggered before a tool is used",
-                                "items": HOOK_ACTION_SCHEMA,
-                            },
-                            "before_prompt": {
-                                "type": "array",
-                                "description": "Hooks triggered when user submits a prompt",
-                                "items": HOOK_ACTION_SCHEMA,
-                            },
-                        },
-                        "additionalProperties": False,
-                    },
-                    # DEPRECATED: Use hooks.after_agent instead
-                    "stop_hooks": {
-                        "type": "array",
-                        "description": "DEPRECATED: Use hooks.after_agent instead. Stop hooks for quality validation loops.",
-                        "items": HOOK_ACTION_SCHEMA,
-                    },
-                    "exposed": {
-                        "type": "boolean",
-                        "description": "If true, skill is user-invocable in menus. Default: false (hidden from menus).",
-                        "default": False,
-                    },
-                    "quality_criteria": {
-                        "type": "array",
-                        "description": "Declarative quality criteria. Rendered with standard evaluation framing.",
-                        "items": {
-                            "type": "string",
-                            "minLength": 1,
-                        },
-                    },
-                    "agent": {
-                        "type": "string",
-                        "description": "Agent type for this step. When set, the skill uses context: fork and delegates to the specified agent (e.g., 'general-purpose').",
-                        "minLength": 1,
-                    },
-                },
-                "additionalProperties": False,
-            },
-        },
-    },
-    "additionalProperties": False,
-}
+    Returns:
+        Path to job.schema.json
+    """
+    return _SCHEMA_FILE
diff --git a/src/deepwork/standard_jobs/deepwork_jobs/job.yml b/src/deepwork/standard_jobs/deepwork_jobs/job.yml
index 4343cbda..5ee6bf7d 100644
--- a/src/deepwork/standard_jobs/deepwork_jobs/job.yml
+++ b/src/deepwork/standard_jobs/deepwork_jobs/job.yml
@@ -1,3 +1,4 @@
+# yaml-language-server: $schema=.deepwork/schemas/job.schema.json
 name: deepwork_jobs
 version: "1.0.0"
 summary: "Creates and manages multi-step AI workflows. Use when defining, implementing, or improving DeepWork jobs."
diff --git a/tests/e2e/test_claude_code_integration.py b/tests/e2e/test_claude_code_integration.py
index abce6a86..eaf29704 100644
--- a/tests/e2e/test_claude_code_integration.py
+++ b/tests/e2e/test_claude_code_integration.py
@@ -212,10 +212,7 @@ def test_get_workflows_returns_jobs(self, project_with_job: Path) -> None:
         assert len(fruits_job.workflows) >= 1
         full_workflow = fruits_job.workflows[0]
         assert full_workflow.name == "full"
-
-        # Workflow should contain the steps
-        assert "identify" in full_workflow.steps
-        assert "classify" in full_workflow.steps
+        assert full_workflow.summary is not None
 
     def test_start_workflow_creates_session(self, project_with_job: Path) -> None:
         """Test that start_workflow creates a new workflow session."""
@@ -242,15 +239,15 @@ def test_start_workflow_creates_session(self, project_with_job: Path) -> None:
         response = tools.start_workflow(input_data)
 
         # Should return session info
-        assert response.session_id is not None
-        assert response.branch_name is not None
-        assert "deepwork" in response.branch_name.lower()
-        assert "fruits" in response.branch_name.lower()
+        assert response.begin_step.session_id is not None
+        assert response.begin_step.branch_name is not None
+        assert "deepwork" in response.begin_step.branch_name.lower()
+        assert "fruits" in response.begin_step.branch_name.lower()
 
         # Should return first step instructions
-        assert response.current_step_id is not None
-        assert response.step_instructions is not None
-        assert len(response.step_instructions) > 0
+        assert response.begin_step.step_id is not None
+        assert response.begin_step.step_instructions is not None
+        assert len(response.begin_step.step_instructions) > 0
 
     def test_workflow_step_progression(self, project_with_job: Path) -> None:
         """Test that finished_step progresses through workflow steps."""
@@ -290,8 +287,9 @@ def test_workflow_step_progression(self, project_with_job: Path) -> None:
 
         if finish_response.status == "next_step":
             # Should have instructions for next step
-            assert finish_response.step_instructions is not None
-            assert finish_response.next_step_id is not None
+            assert finish_response.begin_step is not None
+            assert finish_response.begin_step.step_instructions is not None
+            assert finish_response.begin_step.step_id is not None
 
 
 @pytest.mark.skipif(
diff --git a/tests/fixtures/jobs/complex_job/job.yml b/tests/fixtures/jobs/complex_job/job.yml
index 7c1343d6..507ea626 100644
--- a/tests/fixtures/jobs/complex_job/job.yml
+++ b/tests/fixtures/jobs/complex_job/job.yml
@@ -1,3 +1,4 @@
+# yaml-language-server: $schema=.deepwork/schemas/job.schema.json
 name: competitive_research
 version: "0.1.0"
 summary: "Systematic competitive analysis workflow"
diff --git a/tests/fixtures/jobs/concurrent_steps_job/job.yml b/tests/fixtures/jobs/concurrent_steps_job/job.yml
index 8609c512..3feeab4d 100644
--- a/tests/fixtures/jobs/concurrent_steps_job/job.yml
+++ b/tests/fixtures/jobs/concurrent_steps_job/job.yml
@@ -1,3 +1,4 @@
+# yaml-language-server: $schema=.deepwork/schemas/job.schema.json
 name: concurrent_workflow
 version: "1.0.0"
 summary: "Workflow with concurrent steps for testing"
diff --git a/tests/fixtures/jobs/exposed_step_job/job.yml b/tests/fixtures/jobs/exposed_step_job/job.yml
index d480daeb..fc1530b7 100644
--- a/tests/fixtures/jobs/exposed_step_job/job.yml
+++ b/tests/fixtures/jobs/exposed_step_job/job.yml
@@ -1,3 +1,4 @@
+# yaml-language-server: $schema=.deepwork/schemas/job.schema.json
 name: exposed_job
 version: "0.1.0"
 summary: "A job with exposed and hidden steps for testing"
diff --git a/tests/fixtures/jobs/fruits/job.yml b/tests/fixtures/jobs/fruits/job.yml
index 01d96994..cfb83e9f 100644
--- a/tests/fixtures/jobs/fruits/job.yml
+++ b/tests/fixtures/jobs/fruits/job.yml
@@ -1,3 +1,4 @@
+# yaml-language-server: $schema=.deepwork/schemas/job.schema.json
 name: fruits
 version: "1.0.0"
 summary: "Identify and classify fruits from a mixed list of items"
diff --git a/tests/fixtures/jobs/job_with_doc_spec/job.yml b/tests/fixtures/jobs/job_with_doc_spec/job.yml
index b7a6b3ff..16673b5a 100644
--- a/tests/fixtures/jobs/job_with_doc_spec/job.yml
+++ b/tests/fixtures/jobs/job_with_doc_spec/job.yml
@@ -1,3 +1,4 @@
+# yaml-language-server: $schema=.deepwork/schemas/job.schema.json
 name: job_with_doc_spec
 version: "1.0.0"
 summary: "Job with doc spec output for testing"
diff --git a/tests/fixtures/jobs/simple_job/job.yml b/tests/fixtures/jobs/simple_job/job.yml
index 14642290..5f19e452 100644
--- a/tests/fixtures/jobs/simple_job/job.yml
+++ b/tests/fixtures/jobs/simple_job/job.yml
@@ -1,3 +1,4 @@
+# yaml-language-server: $schema=.deepwork/schemas/job.schema.json
 name: simple_job
 version: "0.1.0"
 summary: "A simple single-step job for testing"
diff --git a/tests/unit/mcp/test_schemas.py b/tests/unit/mcp/test_schemas.py
index c498d785..5dafe77e 100644
--- a/tests/unit/mcp/test_schemas.py
+++ b/tests/unit/mcp/test_schemas.py
@@ -2,6 +2,7 @@
 
 
 from deepwork.mcp.schemas import (
+    ActiveStepInfo,
     FinishedStepInput,
     FinishedStepResponse,
     JobInfo,
@@ -85,17 +86,10 @@ def test_basic_workflow(self) -> None:
         workflow = WorkflowInfo(
             name="test_workflow",
             summary="A test workflow",
-            steps=["step1", "step2"],
-            step_entries=[
-                WorkflowStepEntryInfo(step_ids=["step1"]),
-                WorkflowStepEntryInfo(step_ids=["step2"]),
-            ],
-            first_step="step1",
         )
 
         assert workflow.name == "test_workflow"
-        assert workflow.first_step == "step1"
-        assert len(workflow.steps) == 2
+        assert workflow.summary == "A test workflow"
 
 
 class TestJobInfo:
@@ -223,23 +217,59 @@ def test_failed_gate(self) -> None:
         assert len(result.criteria_results) == 2
 
 
+class TestActiveStepInfo:
+    """Tests for ActiveStepInfo model."""
+
+    def test_basic_step_info(self) -> None:
+        """Test basic active step info."""
+        step_info = ActiveStepInfo(
+            session_id="abc123",
+            branch_name="deepwork/test-main-20240101",
+            step_id="step1",
+            step_expected_outputs=["output.md"],
+            step_quality_criteria=["Must be complete"],
+            step_instructions="Do something",
+        )
+
+        assert step_info.session_id == "abc123"
+        assert step_info.branch_name == "deepwork/test-main-20240101"
+        assert step_info.step_id == "step1"
+        assert step_info.step_expected_outputs == ["output.md"]
+        assert step_info.step_quality_criteria == ["Must be complete"]
+        assert step_info.step_instructions == "Do something"
+
+    def test_default_quality_criteria(self) -> None:
+        """Test default empty quality criteria."""
+        step_info = ActiveStepInfo(
+            session_id="abc123",
+            branch_name="deepwork/test-main-20240101",
+            step_id="step1",
+            step_expected_outputs=["output.md"],
+            step_instructions="Do something",
+        )
+
+        assert step_info.step_quality_criteria == []
+
+
 class TestStartWorkflowResponse:
     """Tests for StartWorkflowResponse model."""
 
     def test_basic_response(self) -> None:
         """Test basic response."""
         response = StartWorkflowResponse(
-            session_id="abc123",
-            branch_name="deepwork/test-main-20240101",
-            current_step_id="step1",
-            step_instructions="Do something",
-            step_outputs=["output.md"],
+            begin_step=ActiveStepInfo(
+                session_id="abc123",
+                branch_name="deepwork/test-main-20240101",
+                step_id="step1",
+                step_expected_outputs=["output.md"],
+                step_instructions="Do something",
+            )
         )
 
-        assert response.session_id == "abc123"
-        assert response.branch_name == "deepwork/test-main-20240101"
-        assert response.current_step_id == "step1"
-        assert response.quality_criteria == []
+        assert response.begin_step.session_id == "abc123"
+        assert response.begin_step.branch_name == "deepwork/test-main-20240101"
+        assert response.begin_step.step_id == "step1"
+        assert response.begin_step.step_quality_criteria == []
 
 
 class TestFinishedStepResponse:
@@ -257,19 +287,24 @@ def test_needs_work_status(self) -> None:
 
         assert response.status == StepStatus.NEEDS_WORK
         assert response.feedback is not None
-        assert response.next_step_id is None
+        assert response.begin_step is None
 
     def test_next_step_status(self) -> None:
         """Test next_step response."""
         response = FinishedStepResponse(
             status=StepStatus.NEXT_STEP,
-            next_step_id="step2",
-            step_instructions="Next step instructions",
-            step_outputs=["output2.md"],
+            begin_step=ActiveStepInfo(
+                session_id="abc123",
+                branch_name="deepwork/test-main-20240101",
+                step_id="step2",
+                step_expected_outputs=["output2.md"],
+                step_instructions="Next step instructions",
+            ),
         )
 
         assert response.status == StepStatus.NEXT_STEP
-        assert response.next_step_id == "step2"
+        assert response.begin_step is not None
+        assert response.begin_step.step_id == "step2"
         assert response.summary is None
 
     def test_workflow_complete_status(self) -> None:
diff --git a/tests/unit/mcp/test_tools.py b/tests/unit/mcp/test_tools.py
index 24cc9f78..5fdb4ab2 100644
--- a/tests/unit/mcp/test_tools.py
+++ b/tests/unit/mcp/test_tools.py
@@ -112,8 +112,7 @@ def test_get_workflows(self, tools: WorkflowTools) -> None:
         assert job.summary == "A test job"
         assert len(job.workflows) == 1
         assert job.workflows[0].name == "main"
-        assert job.workflows[0].steps == ["step1", "step2"]
-        assert job.workflows[0].first_step == "step1"
+        assert job.workflows[0].summary == "Main workflow"
 
     def test_get_workflows_empty(self, tmp_path: Path) -> None:
         """Test getting workflows when no jobs exist."""
@@ -142,12 +141,12 @@ def test_start_workflow(self, tools: WorkflowTools) -> None:
 
         response = tools.start_workflow(input_data)
 
-        assert response.session_id is not None
-        assert "test-instance" in response.branch_name
-        assert response.current_step_id == "step1"
-        assert "Step 1" in response.step_instructions
-        assert "output1.md" in response.step_outputs
-        assert "Output must be valid" in response.quality_criteria
+        assert response.begin_step.session_id is not None
+        assert "test-instance" in response.begin_step.branch_name
+        assert response.begin_step.step_id == "step1"
+        assert "Step 1" in response.begin_step.step_instructions
+        assert "output1.md" in response.begin_step.step_expected_outputs
+        assert "Output must be valid" in response.begin_step.step_quality_criteria
 
     def test_start_workflow_invalid_job(self, tools: WorkflowTools) -> None:
         """Test starting workflow with invalid job."""
@@ -201,9 +200,10 @@ def test_finished_step_advances_to_next(
         response = tools.finished_step(finish_input)
 
         assert response.status == StepStatus.NEXT_STEP
-        assert response.next_step_id == "step2"
-        assert response.step_instructions is not None
-        assert "Step 2" in response.step_instructions
+        assert response.begin_step is not None
+        assert response.begin_step.step_id == "step2"
+        assert response.begin_step.step_instructions is not None
+        assert "Step 2" in response.begin_step.step_instructions
 
     def test_finished_step_completes_workflow(
         self, tools: WorkflowTools, project_root: Path

From c3754f617cdb83fa5d46e1cc05f498c0deb952ba Mon Sep 17 00:00:00 2001
From: Noah Horton <noah@unsupervised.com>
Date: Tue, 3 Feb 2026 17:49:26 -0700
Subject: [PATCH 07/45] Remove old jobs

---
 .claude/hooks/commit_job_git_commit.sh        |   4 -
 .deepwork/jobs/add_platform/job.yml           | 153 ------------
 .../add_platform/steps/add_capabilities.md    | 135 -----------
 .../jobs/add_platform/steps/implement.md      | 226 ------------------
 .deepwork/jobs/add_platform/steps/research.md | 188 ---------------
 .deepwork/jobs/add_platform/steps/verify.md   | 109 ---------
 .deepwork/jobs/commit/AGENTS.md               |  19 --
 .deepwork/jobs/commit/job.yml                 |  99 --------
 .../jobs/commit/steps/commit_and_push.md      |  89 -------
 .deepwork/jobs/commit/steps/lint.md           |  70 ------
 .deepwork/jobs/commit/steps/review.md         |  55 -----
 .deepwork/jobs/commit/steps/test.md           |  51 ----
 .../commit/block_bash_with_instructions.sh    |  74 ------
 .../commit/code_review_standards.example.md   |  67 ------
 library/jobs/commit/commit_job_git_commit.sh  |   7 -
 library/jobs/commit/job.yml                   |  84 -------
 library/jobs/commit/readme.md                 | 174 --------------
 library/jobs/commit/steps/commit_and_push.md  |  66 -----
 library/jobs/commit/steps/lint.md             |  41 ----
 library/jobs/commit/steps/review.md           |  55 -----
 library/jobs/commit/steps/test.md             |  44 ----
 21 files changed, 1810 deletions(-)
 delete mode 100755 .claude/hooks/commit_job_git_commit.sh
 delete mode 100644 .deepwork/jobs/add_platform/job.yml
 delete mode 100644 .deepwork/jobs/add_platform/steps/add_capabilities.md
 delete mode 100644 .deepwork/jobs/add_platform/steps/implement.md
 delete mode 100644 .deepwork/jobs/add_platform/steps/research.md
 delete mode 100644 .deepwork/jobs/add_platform/steps/verify.md
 delete mode 100644 .deepwork/jobs/commit/AGENTS.md
 delete mode 100644 .deepwork/jobs/commit/job.yml
 delete mode 100644 .deepwork/jobs/commit/steps/commit_and_push.md
 delete mode 100644 .deepwork/jobs/commit/steps/lint.md
 delete mode 100644 .deepwork/jobs/commit/steps/review.md
 delete mode 100644 .deepwork/jobs/commit/steps/test.md
 delete mode 100755 library/jobs/commit/block_bash_with_instructions.sh
 delete mode 100644 library/jobs/commit/code_review_standards.example.md
 delete mode 100755 library/jobs/commit/commit_job_git_commit.sh
 delete mode 100644 library/jobs/commit/job.yml
 delete mode 100644 library/jobs/commit/readme.md
 delete mode 100644 library/jobs/commit/steps/commit_and_push.md
 delete mode 100644 library/jobs/commit/steps/lint.md
 delete mode 100644 library/jobs/commit/steps/review.md
 delete mode 100644 library/jobs/commit/steps/test.md

diff --git a/.claude/hooks/commit_job_git_commit.sh b/.claude/hooks/commit_job_git_commit.sh
deleted file mode 100755
index 5fe32679..00000000
--- a/.claude/hooks/commit_job_git_commit.sh
+++ /dev/null
@@ -1,4 +0,0 @@
-#!/bin/bash
-# commit_job_git_commit.sh - Wrapper for git commit invoked via the /commit skill
-
-exec git commit "$@"
diff --git a/.deepwork/jobs/add_platform/job.yml b/.deepwork/jobs/add_platform/job.yml
deleted file mode 100644
index 4bb1ee52..00000000
--- a/.deepwork/jobs/add_platform/job.yml
+++ /dev/null
@@ -1,153 +0,0 @@
-# yaml-language-server: $schema=.deepwork/schemas/job.schema.json
-name: add_platform
-version: "0.4.0"
-summary: "Adds a new AI platform to DeepWork with adapter, templates, and tests. Use when integrating Cursor, Windsurf, or other AI coding tools."
-
-description: |
-  A workflow for adding support for a new AI platform (like Cursor, Windsurf, etc.) to DeepWork.
-
-  The **integrate** workflow guides you through four phases:
-  1. **Research**: Capture the platform's CLI configuration and hooks system documentation
-  2. **Add Capabilities**: Update the job schema and adapters with any new hook events
-  3. **Implement**: Create the platform adapter, templates, tests (100% coverage), and README updates
-  4. **Verify**: Ensure installation works correctly and produces expected files
-
-  The workflow ensures consistency across all supported platforms and maintains
-  comprehensive test coverage for new functionality.
-
-  **Important Notes**:
-  - Only hooks available on slash command definitions should be captured
-  - Each existing adapter must be updated when new hooks are added (typically with null values)
-  - Tests must achieve 100% coverage for any new functionality
-  - Installation verification confirms the platform integrates correctly with existing jobs
-
-workflows:
-  - name: integrate
-    summary: "Full workflow to integrate a new AI platform into DeepWork"
-    steps:
-      - research
-      - add_capabilities
-      - implement
-      - verify
-
-changelog:
-  - version: "0.4.0"
-    changes: "Added workflows section to explicitly define the integrate workflow sequence"
-  - version: "0.1.0"
-    changes: "Initial version"
-  - version: "0.2.0"
-    changes: "Added verification_checklist.md output to verify step for doc spec compliance"
-  - version: "0.3.0"
-    changes: "Improved skill descriptions with third-person voice and 'Use when...' triggers for better discoverability"
-
-steps:
-  - id: research
-    name: "Research Platform Documentation"
-    description: "Captures CLI configuration and hooks system documentation for the new platform. Use when starting platform integration."
-    instructions_file: steps/research.md
-    inputs:
-      - name: platform_name
-        description: "Clear identifier of the platform (e.g., 'cursor', 'windsurf-editor', 'github-copilot-chat')"
-    outputs:
-      - cli_configuration.md
-      - hooks_system.md
-    dependencies: []
-    hooks:
-      after_agent:
-        - prompt: |
-            Verify the research output meets ALL criteria:
-            1. Both files exist in doc/platforms/<platform>/: cli_configuration.md and hooks_system.md
-            2. Each file has a comment at the top with:
-               - Last updated date
-               - Source URL where the documentation was obtained
-            3. cli_configuration.md covers how the platform's CLI is configured
-            4. hooks_system.md covers hooks available for slash command definitions ONLY
-            5. No extraneous documentation (only these two specific topics)
-            6. Documentation is comprehensive enough to implement the platform
-
-            If ALL criteria are met, include `<promise>✓ Quality Criteria Met</promise>`.
-
-  - id: add_capabilities
-    name: "Add Hook Capabilities"
-    description: "Updates job schema and adapters with any new hook events the platform supports. Use after research to extend DeepWork's hook system."
-    instructions_file: steps/add_capabilities.md
-    inputs:
-      - file: hooks_system.md
-        from_step: research
-    outputs:
-      - job_schema.py
-      - adapters.py
-    dependencies:
-      - research
-    hooks:
-      after_agent:
-        - prompt: |
-            Verify the capability additions meet ALL criteria:
-            1. Any new hooks from the platform (for slash commands only) are added to src/deepwork/schemas/job_schema.py
-            2. All existing adapters in src/deepwork/adapters.py are updated with the new hook fields
-               (set to None/null if the platform doesn't support that hook)
-            3. Only hooks available on slash command definitions are added (not general CLI hooks)
-            4. job_schema.py remains valid Python with no syntax errors
-            5. adapters.py remains consistent - all adapters have the same hook fields
-            6. If no new hooks are needed, document why in a comment
-
-            If ALL criteria are met, include `<promise>✓ Quality Criteria Met</promise>`.
-
-  - id: implement
-    name: "Implement Platform Support"
-    description: "Creates platform adapter, templates, tests with 100% coverage, and README documentation. Use after adding hook capabilities."
-    instructions_file: steps/implement.md
-    inputs:
-      - file: job_schema.py
-        from_step: add_capabilities
-      - file: adapters.py
-        from_step: add_capabilities
-      - file: cli_configuration.md
-        from_step: research
-    outputs:
-      - templates/
-      - tests/
-      - README.md
-    dependencies:
-      - research
-      - add_capabilities
-    hooks:
-      after_agent:
-        - script: hooks/run_tests.sh
-        - prompt: |
-            Verify the implementation meets ALL criteria:
-            1. Platform adapter class is added to src/deepwork/adapters.py
-            2. Templates exist in src/deepwork/templates/<platform>/ with appropriate command structure
-            3. Tests exist for all new functionality
-            4. Test coverage is 100% for new code (run: uv run pytest --cov)
-            5. All tests pass
-            6. README.md is updated with:
-               - New platform listed in supported platforms
-               - Installation instructions for the platform
-               - Any platform-specific notes
-
-            If ALL criteria are met, include `<promise>✓ Quality Criteria Met</promise>`.
-
-  - id: verify
-    name: "Verify Installation"
-    description: "Sets up platform directories and verifies deepwork install works correctly. Use after implementation to confirm integration."
-    instructions_file: steps/verify.md
-    inputs:
-      - file: templates/
-        from_step: implement
-    outputs:
-      - verification_checklist.md
-    dependencies:
-      - implement
-    hooks:
-      after_agent:
-        - prompt: |
-            Verify the installation meets ALL criteria:
-            1. Platform-specific directories/files are added to the deepwork repo as needed
-            2. Running `deepwork install --platform <platform>` completes without errors
-            3. Expected command files are created in the platform's command directory
-            4. Command file content matches the templates and job definitions
-            5. Established DeepWork jobs (deepwork_jobs, deepwork_rules) are installed correctly
-            6. The platform can be used alongside existing platforms without conflicts
-
-            If ALL criteria are met, include `<promise>✓ Quality Criteria Met</promise>`.
diff --git a/.deepwork/jobs/add_platform/steps/add_capabilities.md b/.deepwork/jobs/add_platform/steps/add_capabilities.md
deleted file mode 100644
index 5389213a..00000000
--- a/.deepwork/jobs/add_platform/steps/add_capabilities.md
+++ /dev/null
@@ -1,135 +0,0 @@
-# Add Hook Capabilities
-
-## Objective
-
-Update the DeepWork job schema and platform adapters to support any new hook events that the new platform provides for slash command definitions.
-
-## Task
-
-Analyze the hooks documentation from the research step and update the codebase to support any new hook capabilities, ensuring consistency across all existing adapters.
-
-### Prerequisites
-
-Read the hooks documentation created in the previous step:
-- `doc/platforms/<platform_name>/hooks_system.md`
-
-Also review the existing schema and adapters:
-- `src/deepwork/schemas/job_schema.py`
-- `src/deepwork/adapters.py`
-
-### Process
-
-1. **Analyze the new platform's hooks**
-   - Read `doc/platforms/<platform_name>/hooks_system.md`
-   - List all hooks available for slash command definitions
-   - Compare with hooks already in `job_schema.py`
-   - Identify any NEW hooks not currently supported
-
-2. **Determine if schema changes are needed**
-   - If the platform has hooks that DeepWork doesn't currently support, add them
-   - If all hooks are already supported, document this finding
-   - Remember: Only add hooks that are available on slash command definitions
-
-3. **Update job_schema.py (if needed)**
-   - Add new hook fields to the step schema
-   - Follow existing patterns for hook definitions
-   - Add appropriate type hints and documentation
-   - Example addition:
-     ```python
-     # New hook from <platform>
-     new_hook_name: Optional[List[HookConfig]] = None
-     ```
-
-4. **Update all existing adapters**
-   - Open `src/deepwork/adapters.py`
-   - For EACH existing adapter class:
-     - Add the new hook field (set to `None` if not supported)
-     - This maintains consistency across all adapters
-   - Document why each adapter does or doesn't support the hook
-
-5. **Validate the changes**
-   - Run Python syntax check: `python -m py_compile src/deepwork/schemas/job_schema.py`
-   - Run Python syntax check: `python -m py_compile src/deepwork/adapters.py`
-   - Ensure no import errors
-
-6. **Document the decision**
-   - If no new hooks were added, add a comment explaining why
-   - If new hooks were added, ensure they're documented in the schema
-
-## Output Format
-
-### job_schema.py
-
-Location: `src/deepwork/schemas/job_schema.py`
-
-If new hooks are added:
-```python
-@dataclass
-class StepDefinition:
-    # ... existing fields ...
-
-    # New hook from <platform_name> - [description of what it does]
-    new_hook_name: Optional[List[HookConfig]] = None
-```
-
-### adapters.py
-
-Location: `src/deepwork/adapters.py`
-
-For each existing adapter, add the new hook field:
-```python
-class ExistingPlatformAdapter(PlatformAdapter):
-    # ... existing code ...
-
-    def get_hook_support(self) -> dict:
-        return {
-            # ... existing hooks ...
-            "new_hook_name": None,  # Not supported by this platform
-        }
-```
-
-Or if no changes are needed, add a documentation comment:
-```python
-# NOTE: <platform_name> hooks reviewed on YYYY-MM-DD
-# No new hooks to add - all <platform_name> command hooks are already
-# supported by the existing schema (stop_hooks covers their validation pattern)
-```
-
-## Quality Criteria
-
-- Hooks documentation from research step has been reviewed
-- If new hooks exist:
-  - Added to `src/deepwork/schemas/job_schema.py` with proper typing
-  - ALL existing adapters updated in `src/deepwork/adapters.py`
-  - Each adapter indicates support level (implemented, None, or partial)
-- If no new hooks needed:
-  - Decision documented with a comment explaining the analysis
-- Only hooks available on slash command definitions are considered
-- `job_schema.py` has no syntax errors (verified with py_compile)
-- `adapters.py` has no syntax errors (verified with py_compile)
-- All adapters have consistent hook fields (same fields across all adapters)
-- When all criteria are met, include `<promise>✓ Quality Criteria Met</promise>` in your response
-
-## Context
-
-DeepWork supports multiple AI platforms, and each platform may have different capabilities for hooks within command definitions. The schema defines what hooks CAN exist, while adapters define what each platform actually SUPPORTS.
-
-This separation allows:
-- Job definitions to use any hook (the schema is the superset)
-- Platform-specific generation to only use supported hooks (adapters filter)
-- Future platforms to add new hooks without breaking existing ones
-
-Maintaining consistency is critical - all adapters must have the same hook fields, even if they don't support them (use `None` for unsupported).
-
-## Common Hook Types
-
-For reference, here are common hook patterns across platforms:
-
-| Hook Type | Purpose | Example Platforms |
-|-----------|---------|-------------------|
-| `stop_hooks` | Quality validation loops | Claude Code |
-| `pre_hooks` | Run before command | Various |
-| `post_hooks` | Run after command | Various |
-| `validation_hooks` | Validate inputs/outputs | Various |
-
-When you find a new hook type, consider whether it maps to an existing pattern or is genuinely new functionality.
diff --git a/.deepwork/jobs/add_platform/steps/implement.md b/.deepwork/jobs/add_platform/steps/implement.md
deleted file mode 100644
index 55ff6ec7..00000000
--- a/.deepwork/jobs/add_platform/steps/implement.md
+++ /dev/null
@@ -1,226 +0,0 @@
-# Implement Platform Support
-
-## Objective
-
-Create the complete platform implementation including the adapter class, command templates, comprehensive tests, and documentation updates.
-
-## Task
-
-Build the full platform support by implementing the adapter, creating templates, writing tests with 100% coverage, and updating the README.
-
-### Prerequisites
-
-Read the outputs from previous steps:
-- `doc/platforms/<platform_name>/cli_configuration.md` - For template structure
-- `src/deepwork/schemas/job_schema.py` - For current schema
-- `src/deepwork/adapters.py` - For adapter patterns
-
-Also review existing implementations for reference:
-- `src/deepwork/templates/claude/` - Example templates
-- `tests/` - Existing test patterns
-
-### Process
-
-1. **Create the platform adapter class**
-
-   Add a new adapter class to `src/deepwork/adapters.py`:
-
-   ```python
-   class NewPlatformAdapter(PlatformAdapter):
-       """Adapter for <Platform Name>."""
-
-       platform_name = "<platform_name>"
-       command_directory = "<path to commands>"  # e.g., ".cursor/commands"
-       command_extension = ".md"  # or appropriate extension
-
-       def get_hook_support(self) -> dict:
-           """Return which hooks this platform supports."""
-           return {
-               "stop_hooks": True,  # or False/None
-               # ... other hooks
-           }
-
-       def generate_command(self, step: StepDefinition, job: JobDefinition) -> str:
-           """Generate command file content for this platform."""
-           # Use Jinja2 template
-           template = self.env.get_template(f"{self.platform_name}/command.md.j2")
-           return template.render(step=step, job=job)
-   ```
-
-2. **Create command templates**
-
-   Create templates in `src/deepwork/templates/<platform_name>/`:
-
-   - `command.md.j2` - Main command template
-   - Any other templates needed for the platform's format
-
-   Use the CLI configuration documentation to ensure the template matches the platform's expected format.
-
-3. **Register the adapter**
-
-   Update the adapter registry in `src/deepwork/adapters.py`:
-
-   ```python
-   PLATFORM_ADAPTERS = {
-       "claude": ClaudeAdapter,
-       "<platform_name>": NewPlatformAdapter,
-       # ... other adapters
-   }
-   ```
-
-4. **Write comprehensive tests**
-
-   Create tests in `tests/` that cover:
-
-   - Adapter instantiation
-   - Hook support detection
-   - Command generation
-   - Template rendering
-   - Edge cases (empty inputs, special characters, etc.)
-   - Integration with the sync command
-
-   **Critical**: Tests must achieve 100% coverage of new code.
-
-5. **Update README.md**
-
-   Add the new platform to `README.md`:
-
-   - Add to "Supported Platforms" list
-   - Add installation instructions:
-     ```bash
-     deepwork install --platform <platform_name>
-     ```
-   - Document any platform-specific notes or limitations
-
-6. **Run tests and verify coverage**
-
-   ```bash
-   uv run pytest --cov=src/deepwork --cov-report=term-missing
-   ```
-
-   - All tests must pass
-   - New code must have 100% coverage
-   - If coverage is below 100%, add more tests
-
-7. **Iterate until tests pass with full coverage**
-
-   This step has a `stop_hooks` script that runs tests. Keep iterating until:
-   - All tests pass
-   - Coverage is 100% for new functionality
-
-## Output Format
-
-### templates/
-
-Location: `src/deepwork/templates/<platform_name>/`
-
-Create the following files:
-
-**command.md.j2**:
-```jinja2
-{# Template for <platform_name> command files #}
-{# Follows the platform's expected format from cli_configuration.md #}
-
-[Platform-specific frontmatter or metadata]
-
-# {{ step.name }}
-
-{{ step.description }}
-
-## Instructions
-
-{{ step.instructions_content }}
-
-[... rest of template based on platform format ...]
-```
-
-### tests/
-
-Location: `tests/test_<platform_name>_adapter.py`
-
-```python
-"""Tests for the <platform_name> adapter."""
-import pytest
-from deepwork.adapters import NewPlatformAdapter
-
-class TestNewPlatformAdapter:
-    """Test suite for NewPlatformAdapter."""
-
-    def test_adapter_initialization(self):
-        """Test adapter can be instantiated."""
-        adapter = NewPlatformAdapter()
-        assert adapter.platform_name == "<platform_name>"
-
-    def test_hook_support(self):
-        """Test hook support detection."""
-        adapter = NewPlatformAdapter()
-        hooks = adapter.get_hook_support()
-        assert "stop_hooks" in hooks
-        # ... more assertions
-
-    def test_command_generation(self):
-        """Test command file generation."""
-        # ... test implementation
-
-    # ... more tests for 100% coverage
-```
-
-### README.md
-
-Add to the existing README.md:
-
-```markdown
-## Supported Platforms
-
-- **Claude Code** - Anthropic's CLI for Claude
-- **<Platform Name>** - [Brief description]
-
-## Installation
-
-### <Platform Name>
-
-```bash
-deepwork install --platform <platform_name>
-```
-
-[Any platform-specific notes]
-```
-
-## Quality Criteria
-
-- Platform adapter class added to `src/deepwork/adapters.py`:
-  - Inherits from `PlatformAdapter`
-  - Implements all required methods
-  - Registered in `PLATFORM_ADAPTERS`
-- Templates created in `src/deepwork/templates/<platform_name>/`:
-  - `command.md.j2` exists and renders correctly
-  - Format matches platform's expected command format
-- Tests created in `tests/`:
-  - Cover all new adapter functionality
-  - Cover template rendering
-  - All tests pass
-- Test coverage is 100% for new code:
-  - Run `uv run pytest --cov=src/deepwork --cov-report=term-missing`
-  - No uncovered lines in new code
-- README.md updated:
-  - Platform listed in supported platforms
-  - Installation command documented
-  - Any platform-specific notes included
-- When all criteria are met, include `<promise>✓ Quality Criteria Met</promise>` in your response
-
-## Context
-
-This is the core implementation step. The adapter you create will be responsible for:
-- Determining where command files are placed
-- Generating command file content from job definitions
-- Handling platform-specific features and hooks
-
-The templates use Jinja2 and should produce files that match exactly what the platform expects. Reference the CLI configuration documentation frequently to ensure compatibility.
-
-## Tips
-
-- Study the existing `ClaudeAdapter` as a reference implementation
-- Run tests frequently as you implement
-- Use `--cov-report=html` for a detailed coverage report
-- If a test is hard to write, the code might need refactoring
-- Template syntax errors often show up at runtime - test early
diff --git a/.deepwork/jobs/add_platform/steps/research.md b/.deepwork/jobs/add_platform/steps/research.md
deleted file mode 100644
index c4ee425e..00000000
--- a/.deepwork/jobs/add_platform/steps/research.md
+++ /dev/null
@@ -1,188 +0,0 @@
-# Research Platform Documentation
-
-## Objective
-
-Capture comprehensive documentation for the new AI platform's CLI configuration and hooks system, creating a local reference that will guide the implementation phases.
-
-## Task
-
-Research the target platform's official documentation and create two focused documentation files that will serve as the foundation for implementing platform support in DeepWork.
-
-### Process
-
-1. **Identify the platform's documentation sources**
-   - Find the official documentation website
-   - Locate the CLI/agent configuration documentation
-   - Find the hooks or customization system documentation
-   - Note: Focus ONLY on slash command/custom command hooks, not general CLI hooks
-
-2. **Gather CLI configuration documentation**
-   - How is the CLI configured? (config files, environment variables, etc.)
-   - Where are custom commands/skills stored?
-   - What is the command file format? (markdown, YAML, etc.)
-   - What metadata or frontmatter is supported?
-   - How does the platform discover and load commands?
-
-3. **Gather hooks system documentation**
-   - What hooks are available for custom command definitions?
-   - Focus on hooks that trigger during or after command execution
-   - Examples: `stop_hooks`, `pre_hooks`, `post_hooks`, validation hooks
-   - Document the syntax and available hook types
-   - **Important**: Only document hooks available on slash command definitions, not general CLI hooks
-
-4. **Create the documentation files**
-   - Place files in `doc/platforms/<platform_name>/`
-   - Each file must have a header comment with source and date
-   - Content should be comprehensive but focused
-
-## Output Format
-
-### cli_configuration.md
-
-Located at: `doc/platforms/<platform_name>/cli_configuration.md`
-
-**Structure**:
-```markdown
-<!--
-Last Updated: YYYY-MM-DD
-Source: [URL where this documentation was obtained]
--->
-
-# <Platform Name> CLI Configuration
-
-## Overview
-
-[Brief description of the platform and its CLI/agent system]
-
-## Configuration Files
-
-[Document where configuration lives and its format]
-
-### File Locations
-
-- [Location 1]: [Purpose]
-- [Location 2]: [Purpose]
-
-### Configuration Format
-
-[Show the configuration file format with examples]
-
-## Custom Commands/Skills
-
-[Document how custom commands are defined]
-
-### Command Location
-
-[Where command files are stored]
-
-### Command File Format
-
-[The format of command files - markdown, YAML, etc.]
-
-### Metadata/Frontmatter
-
-[What metadata fields are supported in command files]
-
-```[format]
-[Example of a minimal command file]
-```
-
-## Command Discovery
-
-[How the platform discovers and loads commands]
-
-## Platform-Specific Features
-
-[Any unique features relevant to command configuration]
-```
-
-### hooks_system.md
-
-Located at: `doc/platforms/<platform_name>/hooks_system.md`
-
-**Structure**:
-```markdown
-<!--
-Last Updated: YYYY-MM-DD
-Source: [URL where this documentation was obtained]
--->
-
-# <Platform Name> Hooks System (Command Definitions)
-
-## Overview
-
-[Brief description of hooks available for command definitions]
-
-**Important**: This document covers ONLY hooks available within slash command/skill definitions, not general CLI hooks.
-
-## Available Hooks
-
-### [Hook Name 1]
-
-**Purpose**: [What this hook does]
-
-**Syntax**:
-```yaml
-[hook_name]:
-  - [configuration]
-```
-
-**Example**:
-```yaml
-[Complete example of using this hook]
-```
-
-**Behavior**: [When and how this hook executes]
-
-### [Hook Name 2]
-
-[Repeat for each available hook]
-
-## Hook Execution Order
-
-[Document the order in which hooks execute, if multiple are supported]
-
-## Comparison with Other Platforms
-
-| Feature | <Platform> | Claude Code | Other |
-|---------|-----------|-------------|-------|
-| [Feature 1] | [Support] | [Support] | [Support] |
-
-## Limitations
-
-[Any limitations or caveats about the hooks system]
-```
-
-## Quality Criteria
-
-- Both files exist in `doc/platforms/<platform_name>/`
-- Each file has a header comment with:
-  - Last updated date (YYYY-MM-DD format)
-  - Source URL where documentation was obtained
-- `cli_configuration.md` comprehensively covers:
-  - Configuration file locations and format
-  - Custom command file format and location
-  - Command discovery mechanism
-- `hooks_system.md` comprehensively covers:
-  - All hooks available for slash command definitions
-  - Syntax and examples for each hook
-  - NOT general CLI hooks (only command-level hooks)
-- Documentation is detailed enough to implement the platform adapter
-- No extraneous topics (only CLI config and command hooks)
-- When all criteria are met, include `<promise>✓ Quality Criteria Met</promise>` in your response
-
-## Context
-
-This is the foundation step for adding a new platform to DeepWork. The documentation you capture here will be referenced throughout the implementation process:
-- CLI configuration informs how to generate command files
-- Hooks documentation determines what features the adapter needs to support
-- This documentation becomes a permanent reference in `doc/platforms/`
-
-Take time to be thorough - incomplete documentation will slow down subsequent steps.
-
-## Tips
-
-- Use the platform's official documentation as the primary source
-- If documentation is sparse, check GitHub repos, community guides, or changelog entries
-- When in doubt about whether something is a "command hook" vs "CLI hook", err on the side of inclusion and note the ambiguity
-- Include code examples from the official docs where available
diff --git a/.deepwork/jobs/add_platform/steps/verify.md b/.deepwork/jobs/add_platform/steps/verify.md
deleted file mode 100644
index fd2487d3..00000000
--- a/.deepwork/jobs/add_platform/steps/verify.md
+++ /dev/null
@@ -1,109 +0,0 @@
-# Verify Installation
-
-## Objective
-
-Ensure the new platform integration works correctly by setting up necessary directories and running the full installation process.
-
-## Task
-
-Perform end-to-end verification that the new platform can be installed and that DeepWork's standard jobs work correctly with it.
-
-### Prerequisites
-
-Ensure the implementation step is complete:
-- Adapter class exists in `src/deepwork/adapters.py`
-- Templates exist in `src/deepwork/templates/<platform_name>/`
-- Tests pass with 100% coverage
-- README.md is updated
-
-### Process
-
-1. **Set up platform directories in the DeepWork repo**
-
-   The DeepWork repository itself should have the platform's command directory structure for testing:
-
-   ```bash
-   mkdir -p <platform_command_directory>
-   ```
-
-   For example:
-   - Claude: `.claude/commands/`
-   - Cursor: `.cursor/commands/` (or wherever Cursor stores commands)
-
-2. **Run deepwork install for the new platform**
-
-   ```bash
-   deepwork install --platform <platform_name>
-   ```
-
-   Verify:
-   - Command completes without errors
-   - No Python exceptions or tracebacks
-   - Output indicates successful installation
-
-3. **Check that command files were created**
-
-   List the generated command files:
-   ```bash
-   ls -la <platform_command_directory>/
-   ```
-
-   Verify:
-   - `deepwork_jobs.define.md` exists (or equivalent for the platform)
-   - `deepwork_jobs.implement.md` exists
-   - `deepwork_jobs.refine.md` exists
-   - All expected step commands exist
-
-4. **Validate command file content**
-
-   Read each generated command file and verify:
-   - Content matches the expected format for the platform
-   - Job metadata is correctly included
-   - Step instructions are properly rendered
-   - Any platform-specific features (hooks, frontmatter) are present
-
-5. **Test alongside existing platforms**
-
-   If other platforms are already installed, verify they still work:
-   ```bash
-   deepwork install --platform claude
-   ls -la .claude/commands/
-   ```
-
-   Ensure:
-   - New platform doesn't break existing installations
-   - Each platform's commands are independent
-   - No file conflicts or overwrites
-
-## Quality Criteria
-
-- Platform-specific directories are set up in the DeepWork repo
-- `deepwork install --platform <platform_name>` completes without errors
-- All expected command files are created:
-  - deepwork_jobs.define, implement, refine
-  - Any other standard job commands
-- Command file content is correct:
-  - Matches platform's expected format
-  - Job/step information is properly rendered
-  - No template errors or missing content
-- Existing platforms still work (if applicable)
-- No conflicts between platforms
-- When all criteria are met, include `<promise>✓ Quality Criteria Met</promise>` in your response
-
-## Context
-
-This is the final validation step before the platform is considered complete. A thorough verification ensures:
-- The platform actually works, not just compiles
-- Standard DeepWork jobs install correctly
-- The platform integrates properly with the existing system
-- Users can confidently use the new platform
-
-Take time to verify each aspect - finding issues now is much better than having users discover them later.
-
-## Common Issues to Check
-
-- **Template syntax errors**: May only appear when rendering specific content
-- **Path issues**: Platform might expect different directory structure
-- **Encoding issues**: Special characters in templates or content
-- **Missing hooks**: Platform adapter might not handle all hook types
-- **Permission issues**: Directory creation might fail in some cases
diff --git a/.deepwork/jobs/commit/AGENTS.md b/.deepwork/jobs/commit/AGENTS.md
deleted file mode 100644
index 2edc0424..00000000
--- a/.deepwork/jobs/commit/AGENTS.md
+++ /dev/null
@@ -1,19 +0,0 @@
-# Project Context for commit
-
-## Job-Specific Context
-
-### commit
-
-#### review
-- Sub-agent approach: Use `general-purpose` subagent_type for code review (not `Bash`) since it needs to read and analyze code
-- Review criteria priorities: DRY opportunities, naming clarity, and test coverage are emphasized based on common code quality issues
-- Order matters: Review runs before tests so that any issues found can be fixed and verified by subsequent test run
-
-#### Design Decisions
-- Review step is first: Catching issues early reduces wasted test runs on code that will need changes
-- Sub-agent for review: Keeps main conversation context clean for subsequent steps
-- Fix in main agent: After sub-agent reports issues, fixes happen in main agent to maintain context about the session's changes
-
-## Last Updated
-- Date: 2026-01-21
-- From conversation about: Adding code review stage to commit job
diff --git a/.deepwork/jobs/commit/job.yml b/.deepwork/jobs/commit/job.yml
deleted file mode 100644
index 812475e2..00000000
--- a/.deepwork/jobs/commit/job.yml
+++ /dev/null
@@ -1,99 +0,0 @@
-# yaml-language-server: $schema=.deepwork/schemas/job.schema.json
-name: commit
-version: "1.5.0"
-summary: "Reviews code, runs tests, lints, and commits changes. Use when ready to commit work with quality checks."
-description: |
-  A workflow for preparing and committing code changes with quality checks.
-
-  The **full** workflow starts with a code review to catch issues early, runs tests until
-  they pass, formats and lints code with ruff, then reviews changed files
-  before committing and pushing. The review and lint steps use sub-agents
-  to reduce context usage.
-
-  Steps:
-  1. review - Code review for issues, DRY opportunities, naming, and test coverage (runs in sub-agent)
-  2. test - Pull latest code and run tests until they pass
-  3. lint - Format and lint code with ruff (runs in sub-agent)
-  4. commit_and_push - Review changes and commit/push
-
-workflows:
-  - name: full
-    summary: "Full commit workflow: review, test, lint, and commit"
-    steps:
-      - review
-      - test
-      - lint
-      - commit_and_push
-
-changelog:
-  - version: "1.5.0"
-    changes: "Added workflows section to explicitly define the full commit workflow sequence"
-  - version: "1.4.0"
-    changes: "Added changelog guidance: entries must go in [Unreleased] section, NEVER modify version numbers in pyproject.toml or CHANGELOG.md"
-  - version: "1.3.0"
-    changes: "Added code review step that runs in sub-agent to check for general issues, DRY opportunities, naming clarity, and test coverage"
-  - version: "1.0.1"
-    changes: "Changed file review from user confirmation to agent self-verification - agent now checks files match its own expectations instead of asking user every time"
-  - version: "1.0.0"
-    changes: "Initial job creation"
-  - version: "1.1.0"
-    changes: "Added nominal outputs to process-oriented steps for doc spec compliance (tests_passing, code_formatted, changes_committed)"
-  - version: "1.2.0"
-    changes: "Improved skill descriptions with third-person voice and 'Use when...' triggers for better discoverability"
-
-steps:
-  - id: review
-    name: "Code Review"
-    description: "Reviews changed code for issues, DRY opportunities, naming clarity, and test coverage using a sub-agent. Use as the first step before testing."
-    instructions_file: steps/review.md
-    inputs: []
-    outputs:
-      - code_reviewed  # implicit state: code has been reviewed and issues addressed
-    dependencies: []
-    quality_criteria:
-      - "Changed files were identified"
-      - "Sub-agent reviewed the code for general issues, DRY opportunities, naming clarity, and test coverage"
-      - "All identified issues were addressed or documented as intentional"
-
-  - id: test
-    name: "Run Tests"
-    description: "Pulls latest code and runs tests until all pass. Use after code review passes to verify changes work correctly."
-    instructions_file: steps/test.md
-    inputs: []
-    outputs:
-      - tests_passing  # implicit state: all tests pass
-    dependencies:
-      - review
-    quality_criteria:
-      - "Latest code was pulled from the branch"
-      - "All tests are passing"
-
-  - id: lint
-    name: "Lint Code"
-    description: "Formats and lints code with ruff using a sub-agent. Use after tests pass to ensure code style compliance."
-    instructions_file: steps/lint.md
-    inputs: []
-    outputs:
-      - code_formatted  # implicit state: code formatted and linted
-    dependencies:
-      - test
-    quality_criteria:
-      - "ruff format was run successfully"
-      - "ruff check was run with --fix flag"
-      - "No remaining lint errors"
-
-  - id: commit_and_push
-    name: "Commit and Push"
-    description: "Verifies changed files, creates commit, and pushes to remote. Use after linting passes to finalize changes."
-    instructions_file: steps/commit_and_push.md
-    inputs: []
-    outputs:
-      - changes_committed  # implicit state: changes committed and pushed
-    dependencies:
-      - lint
-    quality_criteria:
-      - "Changed files were verified against expectations"
-      - "CHANGELOG.md was updated with entries in [Unreleased] section (if changes warrant documentation)"
-      - "Version numbers were NOT modified (pyproject.toml version and CHANGELOG version headers unchanged)"
-      - "Commit was created with appropriate message"
-      - "Changes were pushed to remote"
diff --git a/.deepwork/jobs/commit/steps/commit_and_push.md b/.deepwork/jobs/commit/steps/commit_and_push.md
deleted file mode 100644
index cb9e8891..00000000
--- a/.deepwork/jobs/commit/steps/commit_and_push.md
+++ /dev/null
@@ -1,89 +0,0 @@
-# Commit and Push
-
-## Objective
-
-Review the changed files to verify they match the agent's expectations, create a commit with an appropriate message, and push to the remote repository.
-
-## Task
-
-Check the list of changed files against what was modified during this session, ensure they match expectations, then commit and push the changes.
-
-### Process
-
-1. **Get the list of changed files**
-   ```bash
-   git status
-   ```
-   Also run `git diff --stat` to see a summary of changes.
-
-2. **Verify changes match expectations**
-
-   Compare the changed files against what you modified during this session:
-   - Do the modified files match what you edited?
-   - Are there any unexpected new files?
-   - Are there any unexpected deleted files?
-   - Do the line counts seem reasonable for the changes you made?
-
-   If changes match expectations, proceed to the next step.
-
-   If there are unexpected changes:
-   - Investigate why (e.g., lint auto-fixes, generated files)
-   - If they're legitimate side effects of your work, include them
-   - If they're unrelated or shouldn't be committed, use `git restore` to discard them
-
-3. **Update CHANGELOG.md if needed**
-
-   If your changes include new features, bug fixes, or other notable changes:
-   - Add entries to the `## [Unreleased]` section of CHANGELOG.md
-   - Use the appropriate subsection: `### Added`, `### Changed`, `### Fixed`, or `### Removed`
-   - Write concise descriptions that explain the user-facing impact
-
-   **CRITICAL: NEVER modify version numbers**
-   - Do NOT change the version in `pyproject.toml`
-   - Do NOT change version headers in CHANGELOG.md (e.g., `## [0.4.2]`)
-   - Do NOT rename the `## [Unreleased]` section
-   - Version updates are handled by the release workflow, not commits
-
-4. **Stage all appropriate changes**
-   ```bash
-   git add -A
-   ```
-   Or stage specific files if some were excluded.
-
-5. **View recent commit messages for style reference**
-   ```bash
-   git log --oneline -10
-   ```
-
-6. **Create the commit**
-
-   Generate an appropriate commit message based on:
-   - The changes made
-   - The style of recent commits
-   - Conventional commit format if the project uses it
-
-   **IMPORTANT:** Use the commit job script (not `git commit` directly):
-   ```bash
-   .claude/hooks/commit_job_git_commit.sh -m "commit message here"
-   ```
-
-7. **Push to remote**
-   ```bash
-   git push
-   ```
-   If the branch has no upstream, use:
-   ```bash
-   git push -u origin HEAD
-   ```
-
-## Quality Criteria
-
-- Changed files were verified against expectations
-- CHANGELOG.md was updated with entries in [Unreleased] section (if changes warrant documentation)
-- Version numbers were NOT modified (pyproject.toml version and CHANGELOG version headers unchanged)
-- Commit was created with appropriate message
-- Changes were pushed to remote
-
-## Context
-
-This is the final step of the commit workflow. The agent verifies that the changed files match its own expectations from the work done during the session, then commits and pushes. This catches unexpected changes while avoiding unnecessary user interruptions.
diff --git a/.deepwork/jobs/commit/steps/lint.md b/.deepwork/jobs/commit/steps/lint.md
deleted file mode 100644
index 4485549d..00000000
--- a/.deepwork/jobs/commit/steps/lint.md
+++ /dev/null
@@ -1,70 +0,0 @@
-# Lint Code
-
-## Objective
-
-Format and lint the codebase using ruff to ensure code quality and consistency.
-
-## Task
-
-Run ruff format and ruff check to format and lint the code. This step should be executed using a sub-agent to conserve context in the main conversation.
-
-### Process
-
-**IMPORTANT**: Use the Task tool to spawn a sub-agent for this work. This saves context in the main conversation. Use the `haiku` model for speed.
-
-1. **Spawn a sub-agent to run linting**
-
-   Use the Task tool with these parameters:
-   - `subagent_type`: "Bash"
-   - `model`: "haiku"
-   - `prompt`: See below
-
-   The sub-agent should:
-
-   a. **Run ruff format**
-      ```bash
-      ruff format .
-      ```
-      This formats the code according to ruff's style rules.
-
-   b. **Run ruff check with auto-fix**
-      ```bash
-      ruff check --fix .
-      ```
-      This checks for lint errors and automatically fixes what it can.
-
-   c. **Run ruff check again to verify**
-      ```bash
-      ruff check .
-      ```
-      Capture the final output to verify no remaining issues.
-
-2. **Review sub-agent results**
-   - Check that both format and check completed successfully
-   - Note any remaining lint issues that couldn't be auto-fixed
-
-3. **Handle remaining issues**
-   - If there are lint errors that couldn't be auto-fixed, fix them manually
-   - Re-run ruff check to verify
-
-## Example Sub-Agent Prompt
-
-```
-Run ruff to format and lint the codebase:
-
-1. Run: ruff format .
-2. Run: ruff check --fix .
-3. Run: ruff check . (to verify no remaining issues)
-
-Report the results of each command.
-```
-
-## Quality Criteria
-
-- ruff format was run successfully
-- ruff check was run with --fix flag
-- No remaining lint errors
-
-## Context
-
-This step ensures code quality and consistency before committing. It runs after tests pass and before the commit step. Using a sub-agent keeps the main conversation context clean for the commit review.
diff --git a/.deepwork/jobs/commit/steps/review.md b/.deepwork/jobs/commit/steps/review.md
deleted file mode 100644
index bfe8cc0c..00000000
--- a/.deepwork/jobs/commit/steps/review.md
+++ /dev/null
@@ -1,55 +0,0 @@
-# Code Review
-
-## Objective
-
-Review changed code for quality issues before running tests. This catches problems early and ensures code meets quality standards.
-
-## Task
-
-Use a sub-agent to review the staged/changed code and identify issues that should be fixed before committing.
-
-### Process
-
-**IMPORTANT**: Use the Task tool to spawn a sub-agent for this review. This saves context in the main conversation.
-
-1. **Get the list of changed files**
-   ```bash
-   git diff --name-only HEAD
-   git diff --name-only --staged
-   ```
-   Combine these to get all files that have been modified.
-
-2. **Spawn a sub-agent to review the code**
-
-   Use the Task tool with these parameters:
-   - `subagent_type`: "general-purpose"
-   - `prompt`: Instruct the sub-agent to:
-     - Read the code review standards from `doc/code_review_standards.md`
-     - Read each of the changed files
-     - Review each file against the standards
-     - Report issues found with file, line number, severity, and suggested fix
-
-3. **Review sub-agent findings**
-   - Examine each issue identified
-   - Prioritize issues by severity
-
-4. **Fix identified issues**
-   - Address each issue found by the review
-   - For DRY violations: extract shared code into functions/modules
-   - For naming issues: rename to be clearer
-   - For missing tests: add appropriate test cases
-   - For bugs: fix the underlying issue
-
-5. **Re-run review if significant changes made**
-   - If you made substantial changes, consider running another review pass
-   - Ensure fixes didn't introduce new issues
-
-## Quality Criteria
-
-- Changed files were identified
-- Sub-agent read the code review standards and reviewed all changed files
-- All identified issues were addressed or documented as intentional
-
-## Context
-
-This is the first step of the commit workflow. Code review happens before tests to catch quality issues early. The sub-agent approach keeps the main conversation context clean while providing thorough review coverage.
diff --git a/.deepwork/jobs/commit/steps/test.md b/.deepwork/jobs/commit/steps/test.md
deleted file mode 100644
index 29c2b920..00000000
--- a/.deepwork/jobs/commit/steps/test.md
+++ /dev/null
@@ -1,51 +0,0 @@
-# Run Tests
-
-## Objective
-
-Run the project's test suite and fix any failing tests until all tests pass.
-
-## Task
-
-Execute the test suite for the project and iteratively fix any failures until all tests pass.
-
-### Process
-
-1. **Pull latest code from the branch**
-   - Run `git pull` to fetch and merge any changes from the remote
-   - If there are merge conflicts, resolve them before proceeding
-   - This ensures you're testing against the latest code
-
-2. **Detect or use the test command**
-   - If a test command was provided, use that
-   - Otherwise, auto-detect the project type and determine the appropriate test command:
-     - Python: `pytest`, `python -m pytest`, `uv run pytest`
-     - Node.js: `npm test`, `yarn test`, `bun test`
-     - Go: `go test ./...`
-     - Rust: `cargo test`
-     - Check `package.json`, `pyproject.toml`, `Cargo.toml`, `go.mod` for hints
-
-3. **Run the tests**
-   - Execute the test command
-   - Capture the output
-
-4. **Analyze failures**
-   - If tests pass, proceed to output
-   - If tests fail, analyze the failure messages
-   - Identify the root cause of each failure
-
-5. **Fix failing tests**
-   - Make the necessary code changes to fix failures
-   - This may involve fixing bugs in implementation code or updating tests
-   - Re-run tests after each fix
-
-6. **Iterate until passing**
-   - Continue the fix/test cycle until all tests pass
-
-## Quality Criteria
-
-- Latest code was pulled from the branch
-- All tests are passing
-
-## Context
-
-This step runs after code review. Tests must pass before proceeding to lint and commit. This ensures code quality and prevents broken code from being committed. If tests fail due to issues introduced by the code review fixes, iterate on the fixes until tests pass.
diff --git a/library/jobs/commit/block_bash_with_instructions.sh b/library/jobs/commit/block_bash_with_instructions.sh
deleted file mode 100755
index 7bd16f88..00000000
--- a/library/jobs/commit/block_bash_with_instructions.sh
+++ /dev/null
@@ -1,74 +0,0 @@
-#!/bin/bash
-# block_bash_with_instructions.sh - Blocks specific bash commands and provides alternative instructions
-#
-# This hook intercepts Bash tool use calls and blocks commands that match
-# specific patterns, providing alternative instructions to the agent.
-#
-# Usage: Registered as a PreToolUse hook in .claude/settings.json
-#
-# Input (stdin): JSON from Claude Code hook system containing tool_name and tool_input
-# Output (stderr): Error message if blocked (Claude Code reads stderr for exit code 2)
-# Exit codes:
-#   0 - Success (allow action)
-#   2 - Blocking error (prevent action with message)
-
-set -e
-
-# =============================================================================
-# BLOCKED COMMANDS CONFIGURATION
-# =============================================================================
-# Format: Each entry is a regex pattern followed by a delimiter (|||) and instructions
-# The regex is matched against the full bash command
-# Add new blocked commands here:
-
-BLOCKED_COMMANDS=(
-    '^[[:space:]]*git[[:space:]]+commit|||All commits must be done via the `/commit` skill. Do not use git commit directly. Instead, run `/commit` to start the commit workflow which includes code review, testing, and linting before committing.'
-)
-
-# =============================================================================
-# HOOK LOGIC - DO NOT MODIFY BELOW UNLESS NECESSARY
-# =============================================================================
-
-# Read stdin into variable
-HOOK_INPUT=""
-if [ ! -t 0 ]; then
-    HOOK_INPUT=$(cat)
-fi
-
-# Exit early if no input
-if [ -z "${HOOK_INPUT}" ]; then
-    exit 0
-fi
-
-# Extract tool_name from input
-TOOL_NAME=$(echo "${HOOK_INPUT}" | jq -r '.tool_name // empty' 2>/dev/null)
-
-# Only process Bash tool calls
-if [ "${TOOL_NAME}" != "Bash" ]; then
-    exit 0
-fi
-
-# Extract the command from tool_input
-COMMAND=$(echo "${HOOK_INPUT}" | jq -r '.tool_input.command // empty' 2>/dev/null)
-
-# Exit if no command
-if [ -z "${COMMAND}" ]; then
-    exit 0
-fi
-
-# Check each blocked pattern
-for entry in "${BLOCKED_COMMANDS[@]}"; do
-    # Split entry by delimiter
-    pattern="${entry%%|||*}"
-    instructions="${entry##*|||}"
-
-    # Check if command matches pattern (using extended regex)
-    if echo "${COMMAND}" | grep -qE "${pattern}"; then
-        # Output error message to stderr (Claude Code reads stderr for exit code 2)
-        echo "${instructions}" >&2
-        exit 2
-    fi
-done
-
-# Command is allowed
-exit 0
diff --git a/library/jobs/commit/code_review_standards.example.md b/library/jobs/commit/code_review_standards.example.md
deleted file mode 100644
index 3761c354..00000000
--- a/library/jobs/commit/code_review_standards.example.md
+++ /dev/null
@@ -1,67 +0,0 @@
-# Code Review Standards
-
-This document defines the standards used during code review in the commit workflow.
-
-## Review Categories
-
-### General Issues
-
-Check for:
-- Logic errors or potential bugs
-- Error handling gaps
-- Security concerns (injection, authentication, authorization)
-- Performance issues (inefficient algorithms, unnecessary computation)
-- Resource leaks (unclosed files, connections)
-
-### DRY (Don't Repeat Yourself)
-
-Look for:
-- Duplicated code that should be extracted into functions
-- Repeated patterns that could be abstracted
-- Copy-pasted logic with minor variations
-- Similar code blocks that differ only in variable names
-
-### Naming Clarity
-
-Ensure:
-- Variables, functions, and classes have clear, descriptive names
-- Names reflect purpose and intent
-- Abbreviations are avoided unless universally understood
-- Naming conventions are consistent throughout the codebase
-
-### Test Coverage
-
-Verify:
-- New functions or classes have corresponding tests
-- New code paths are tested
-- Edge cases are covered
-- Error conditions are tested
-- If tests are missing, note what should be tested
-
-### Test Quality
-
-Ensure tests add value and are not duplicative:
-- Each test should verify a distinct behavior or scenario
-- Tests should not duplicate what other tests already cover
-- Test names should clearly describe what they're testing
-- Tests should be meaningful, not just checking trivial cases
-- Avoid testing implementation details; focus on behavior
-- If multiple tests appear redundant, suggest consolidation
-
-## Severity Levels
-
-When reporting issues, categorize by severity:
-
-- **Critical**: Must fix before commit (bugs, security issues)
-- **High**: Should fix before commit (logic errors, missing error handling)
-- **Medium**: Recommended to fix (DRY violations, unclear naming)
-- **Low**: Nice to have (style improvements, minor optimizations)
-
-## Review Output Format
-
-For each issue found, provide:
-1. File and line number
-2. Severity level
-3. Category (General/DRY/Naming/Tests)
-4. Description of the issue
-5. Suggested fix or improvement
diff --git a/library/jobs/commit/commit_job_git_commit.sh b/library/jobs/commit/commit_job_git_commit.sh
deleted file mode 100755
index 764b0768..00000000
--- a/library/jobs/commit/commit_job_git_commit.sh
+++ /dev/null
@@ -1,7 +0,0 @@
-#!/bin/bash
-# commit_job_git_commit.sh - Wrapper for git commit invoked via the /commit skill
-#
-# This script bypasses the PreToolUse hook that blocks direct `git commit` commands.
-# It allows the commit job to perform the actual commit after all quality checks pass.
-
-exec git commit "$@"
diff --git a/library/jobs/commit/job.yml b/library/jobs/commit/job.yml
deleted file mode 100644
index 9a7d7491..00000000
--- a/library/jobs/commit/job.yml
+++ /dev/null
@@ -1,84 +0,0 @@
-# yaml-language-server: $schema=.deepwork/schemas/job.schema.json
-name: commit
-version: "1.0.0"
-summary: "Reviews code, runs tests, lints, and commits changes. Use when ready to commit work with quality checks."
-description: |
-  A workflow for preparing and committing code changes with quality checks.
-
-  The **full** workflow starts with a code review to catch issues early, runs tests until
-  they pass, formats and lints code, then reviews changed files before
-  committing and pushing. The review and lint steps use sub-agents to reduce
-  context usage.
-
-  Steps:
-  1. review - Code review for issues, DRY opportunities, naming, and test coverage (runs in sub-agent)
-  2. test - Pull latest code and run tests until they pass
-  3. lint - Format and lint code (runs in sub-agent)
-  4. commit_and_push - Review changes and commit/push
-
-workflows:
-  - name: full
-    summary: "Full commit workflow: review, test, lint, and commit"
-    steps:
-      - review
-      - test
-      - lint
-      - commit_and_push
-
-changelog:
-  - version: "1.0.0"
-    changes: "Initial library version - generalized from project-specific commit workflow"
-
-steps:
-  - id: review
-    name: "Code Review"
-    description: "Reviews changed code for issues, DRY opportunities, naming clarity, and test coverage using a sub-agent. Use as the first step before testing."
-    instructions_file: steps/review.md
-    inputs: []
-    outputs:
-      - code_reviewed  # implicit state: code has been reviewed and issues addressed
-    dependencies: []
-    quality_criteria:
-      - "Changed files were identified"
-      - "Code was reviewed against the project's code review standards"
-      - "All identified issues were addressed or documented as intentional"
-
-  - id: test
-    name: "Run Tests"
-    description: "Pulls latest code and runs tests until all pass. Use after code review passes to verify changes work correctly."
-    instructions_file: steps/test.md
-    inputs: []
-    outputs:
-      - tests_passing  # implicit state: all tests pass
-    dependencies:
-      - review
-    quality_criteria:
-      - "Latest code was pulled from the branch"
-      - "All tests are passing"
-
-  - id: lint
-    name: "Lint Code"
-    description: "Formats and lints code using a sub-agent. Use after tests pass to ensure code style compliance."
-    instructions_file: steps/lint.md
-    inputs: []
-    outputs:
-      - code_formatted  # implicit state: code formatted and linted
-    dependencies:
-      - test
-    quality_criteria:
-      - "Code was formatted"
-      - "Lint check passed with no errors"
-
-  - id: commit_and_push
-    name: "Commit and Push"
-    description: "Verifies changed files, creates commit, and pushes to remote. Use after linting passes to finalize changes."
-    instructions_file: steps/commit_and_push.md
-    inputs: []
-    outputs:
-      - changes_committed  # implicit state: changes committed and pushed
-    dependencies:
-      - lint
-    quality_criteria:
-      - "Changed files were verified against expectations"
-      - "Commit was created with appropriate message"
-      - "Changes were pushed to remote"
diff --git a/library/jobs/commit/readme.md b/library/jobs/commit/readme.md
deleted file mode 100644
index fd067a1d..00000000
--- a/library/jobs/commit/readme.md
+++ /dev/null
@@ -1,174 +0,0 @@
-# Commit Job
-
-A structured workflow for committing code changes with built-in quality checks.
-
-## Overview
-
-This job implements a comprehensive commit workflow that ensures code quality before every commit. Instead of allowing direct `git commit` commands, this job:
-
-1. **Reviews** changed code for issues, DRY opportunities, naming clarity, and test coverage
-2. **Tests** the code to ensure all tests pass
-3. **Lints** the code to ensure consistent formatting and style
-4. **Commits** and pushes only after all checks pass
-
-## Why Hijack `git commit`?
-
-The core design principle of this job is that **every commit should pass through quality checks**. To enforce this, we intercept `git commit` commands and redirect the agent to use the `/commit` skill instead.
-
-Without this interception, an AI agent might:
-- Commit code that hasn't been reviewed
-- Push changes without running tests
-- Skip linting, leading to inconsistent code style
-- Bypass the structured workflow entirely
-
-By blocking `git commit` and requiring the commit job's script, we guarantee that:
-- Code is reviewed before testing (catching issues early)
-- Tests pass before linting (no point linting broken code)
-- Linting completes before committing (consistent style)
-- All quality gates are passed before code reaches the repository
-
-## IMPORTANT: REQUIRED CUSTOMIZATION
-
-When installing this job to a new project, you must customize the following:
-
-### 1. Replace `[test command]`
-
-In `steps/test.md`, replace `[test command]` with your project's test command (e.g., `pytest`, `npm test`, `go test ./...`).
-
-### 2. Replace `[format command]`
-
-In `steps/lint.md`, replace `[format command]` with your project's code formatting command (e.g., `ruff format .`, `npx prettier --write .`, `go fmt ./...`).
-
-### 3. Replace `[lint check command]`
-
-In `steps/lint.md`, replace `[lint check command]` with your project's lint check command (e.g., `ruff check --fix .`, `npx eslint --fix .`, `golangci-lint run`).
-
-### 4. Replace `[code review standards path]`
-
-In `steps/review.md`, replace `[code review standards path]` with the path to your project's code review standards file (e.g., `docs/code_review_standards.md`).
-
-If your project doesn't have a code review standards file yet, you can use the provided example as a starting point:
-
-```bash
-cp library/jobs/commit/code_review_standards.example.md docs/code_review_standards.md
-```
-
-Then customize `docs/code_review_standards.md` to match your project's specific requirements, coding style, and quality expectations.
-
-### 5. Replace `[commit script path]`
-
-In `steps/commit_and_push.md`, replace `[commit script path]` with the path to your commit wrapper script (e.g., `.deepwork/jobs/commit/commit_job_git_commit.sh`). See installation step 3 below for how to create this script.
-
-## Installation
-
-### 1. Copy the Job Folder
-
-Copy this entire `commit` folder to your project's `.deepwork/jobs/` directory:
-
-```bash
-cp -r library/jobs/commit .deepwork/jobs/
-```
-
-### 2. Install the Git Commit Blocker Hook
-
-The job includes a `block_bash_with_instructions.sh` script that intercepts `git commit` commands and redirects the agent to use the `/commit` skill. Copy it to your hooks directory and make it executable:
-
-```bash
-mkdir -p .claude/hooks
-cp .deepwork/jobs/commit/block_bash_with_instructions.sh .claude/hooks/
-chmod +x .claude/hooks/block_bash_with_instructions.sh
-```
-
-### 3. Make the Commit Wrapper Script Executable
-
-The job also includes a `commit_job_git_commit.sh` script that bypasses the hook interception (used by the commit job itself). Make it executable:
-
-```bash
-chmod +x .deepwork/jobs/commit/commit_job_git_commit.sh
-```
-
-### 4. Configure settings.json
-
-Add the following to your `.claude/settings.json`:
-
-```json
-{
-  "permissions": {
-    "allow": [
-      "Bash(.deepwork/jobs/commit/commit_job_git_commit.sh:*)"
-    ]
-  },
-  "hooks": {
-    "PreToolUse": [
-      {
-        "matcher": "Bash",
-        "hooks": [
-          {
-            "type": "command",
-            "command": ".claude/hooks/block_bash_with_instructions.sh"
-          }
-        ]
-      }
-    ]
-  }
-}
-```
-
-This configuration:
-- Allows the commit wrapper script to run without prompts
-- Registers the hook that blocks direct `git commit` commands
-
-### 5. Customize the Placeholders
-
-Replace all placeholders in the step files as described in the "Required Customization" section above.
-
-### 6. Sync the Skills
-
-Run `deepwork sync` to generate the slash commands for your AI coding assistant.
-
-## Workflow Steps
-
-### 1. Code Review (`/commit.review`)
-
-Uses a sub-agent to review changed files against the standards defined in your project's code review standards file. The example standards file checks for:
-- General issues (bugs, security, performance)
-- DRY opportunities (duplicated code)
-- Naming clarity (descriptive names)
-- Test coverage (missing tests)
-
-### 2. Run Tests (`/commit.test`)
-
-- Pulls latest code from the branch
-- Runs the test suite
-- Fixes any failing tests
-- Iterates until all tests pass
-
-### 3. Lint Code (`/commit.lint`)
-
-Uses a sub-agent to:
-- Format code according to project style
-- Run lint checks
-- Fix any auto-fixable issues
-
-### 4. Commit and Push (`/commit.commit_and_push`)
-
-- Reviews changed files against expectations
-- Creates commit with appropriate message
-- Pushes to remote repository
-
-## Usage
-
-Once installed and synced, simply run:
-
-```
-/commit
-```
-
-This will execute all steps in order. You can also run individual steps:
-
-```
-/commit.review
-/commit.test
-/commit.lint
-/commit.commit_and_push
-```
diff --git a/library/jobs/commit/steps/commit_and_push.md b/library/jobs/commit/steps/commit_and_push.md
deleted file mode 100644
index 565c4877..00000000
--- a/library/jobs/commit/steps/commit_and_push.md
+++ /dev/null
@@ -1,66 +0,0 @@
-# Commit and Push
-
-## Objective
-
-Review the changed files to verify they match the agent's expectations, create a commit with an appropriate message, and push to the remote repository.
-
-## Task
-
-Check the list of changed files against what was modified during this session, ensure they match expectations, then commit and push the changes.
-
-### Process
-
-1. **Get the list of changed files**
-   ```bash
-   git status
-   ```
-   Also run `git diff --stat` to see a summary of changes.
-
-2. **Verify changes match expectations**
-
-   Compare the changed files against what you modified during this session:
-   - Do the modified files match what you edited?
-   - Are there any unexpected new files?
-   - Are there any unexpected deleted files?
-   - Do the line counts seem reasonable for the changes you made?
-
-   If changes match expectations, proceed to the next step.
-
-   If there are unexpected changes:
-   - Investigate why (e.g., lint auto-fixes, generated files)
-   - If they're legitimate side effects of your work, include them
-   - If they're unrelated or shouldn't be committed, use `git restore` to discard them
-
-3. **Stage all appropriate changes**
-   ```bash
-   git add -A
-   ```
-   Or stage specific files if some were excluded.
-
-4. **Create the commit**
-
-   Generate an appropriate commit message based on the changes made.
-
-   **IMPORTANT:** Use the commit job script (not `git commit` directly):
-   ```bash
-   [commit script path] -m "commit message here"
-   ```
-
-5. **Push to remote**
-   ```bash
-   git push
-   ```
-   If the branch has no upstream, use:
-   ```bash
-   git push -u origin HEAD
-   ```
-
-## Quality Criteria
-
-- Changed files were verified against expectations
-- Commit was created with appropriate message
-- Changes were pushed to remote
-
-## Context
-
-This is the final step of the commit workflow. The agent verifies that the changed files match its own expectations from the work done during the session, then commits and pushes. This catches unexpected changes while avoiding unnecessary user interruptions.
diff --git a/library/jobs/commit/steps/lint.md b/library/jobs/commit/steps/lint.md
deleted file mode 100644
index eb5989bd..00000000
--- a/library/jobs/commit/steps/lint.md
+++ /dev/null
@@ -1,41 +0,0 @@
-# Lint Code
-
-## Objective
-
-Format and lint the codebase to ensure code quality and consistency.
-
-## Task
-
-Run the project's format and lint commands. This step should be executed using a sub-agent to conserve context in the main conversation.
-
-### Process
-
-**IMPORTANT**: Use the Task tool to spawn a sub-agent for this work. This saves context in the main conversation. Use the `haiku` model for speed.
-
-1. **Spawn a sub-agent to run linting**
-
-   Use the Task tool with these parameters:
-   - `subagent_type`: "Bash"
-   - `model`: "haiku"
-   - `prompt`: Instruct the sub-agent to:
-     - Run the format command: `[format command]`
-     - Run the lint check command: `[lint check command]`
-     - Run lint check again to verify no remaining issues
-     - Report the results of each command
-
-2. **Review sub-agent results**
-   - Check that both format and check completed successfully
-   - Note any remaining lint issues that couldn't be auto-fixed
-
-3. **Handle remaining issues**
-   - If there are lint errors that couldn't be auto-fixed, fix them manually
-   - Re-run lint check to verify
-
-## Quality Criteria
-
-- Code was formatted
-- Lint check passed with no errors
-
-## Context
-
-This step ensures code quality and consistency before committing. It runs after tests pass and before the commit step. Using a sub-agent keeps the main conversation context clean for the commit review.
diff --git a/library/jobs/commit/steps/review.md b/library/jobs/commit/steps/review.md
deleted file mode 100644
index 53e0f377..00000000
--- a/library/jobs/commit/steps/review.md
+++ /dev/null
@@ -1,55 +0,0 @@
-# Code Review
-
-## Objective
-
-Review changed code for quality issues before running tests. This catches problems early and ensures code meets quality standards.
-
-## Task
-
-Use a sub-agent to review the staged/changed code and identify issues that should be fixed before committing.
-
-### Process
-
-**IMPORTANT**: Use the Task tool to spawn a sub-agent for this review. This saves context in the main conversation.
-
-1. **Get the list of changed files**
-   ```bash
-   git diff --name-only HEAD
-   git diff --name-only --staged
-   ```
-   Combine these to get all files that have been modified.
-
-2. **Spawn a sub-agent to review the code**
-
-   Use the Task tool with these parameters:
-   - `subagent_type`: "general-purpose"
-   - `prompt`: Instruct the sub-agent to:
-     - Read the code review standards from `[code review standards path]`
-     - Read each of the changed files
-     - Review each file against the standards
-     - Report issues found with file, line number, severity, and suggested fix
-
-3. **Review sub-agent findings**
-   - Examine each issue identified
-   - Prioritize issues by severity
-
-4. **Fix identified issues**
-   - Address each issue found by the review
-   - For DRY violations: extract shared code into functions/modules
-   - For naming issues: rename to be clearer
-   - For missing tests: add appropriate test cases
-   - For bugs: fix the underlying issue
-
-5. **Re-run review if significant changes made**
-   - If you made substantial changes, consider running another review pass
-   - Ensure fixes didn't introduce new issues
-
-## Quality Criteria
-
-- Changed files were identified
-- Code was reviewed against the project's code review standards
-- All identified issues were addressed or documented as intentional
-
-## Context
-
-This is the first step of the commit workflow. Code review happens before tests to catch quality issues early. The sub-agent approach keeps the main conversation context clean while providing thorough review coverage.
diff --git a/library/jobs/commit/steps/test.md b/library/jobs/commit/steps/test.md
deleted file mode 100644
index acc4e926..00000000
--- a/library/jobs/commit/steps/test.md
+++ /dev/null
@@ -1,44 +0,0 @@
-# Run Tests
-
-## Objective
-
-Run the project's test suite and fix any failing tests until all tests pass.
-
-## Task
-
-Execute the test suite for the project and iteratively fix any failures until all tests pass.
-
-### Process
-
-1. **Pull latest code from the branch**
-   - Run `git pull` to fetch and merge any changes from the remote
-   - If there are merge conflicts, resolve them before proceeding
-   - This ensures you're testing against the latest code
-
-2. **Run the test command**
-   ```bash
-   [test command]
-   ```
-   Capture the output.
-
-3. **Analyze failures**
-   - If tests pass, proceed to output
-   - If tests fail, analyze the failure messages
-   - Identify the root cause of each failure
-
-4. **Fix failing tests**
-   - Make the necessary code changes to fix failures
-   - This may involve fixing bugs in implementation code or updating tests
-   - Re-run tests after each fix
-
-5. **Iterate until passing**
-   - Continue the fix/test cycle until all tests pass
-
-## Quality Criteria
-
-- Latest code was pulled from the branch
-- All tests are passing
-
-## Context
-
-This step runs after code review. Tests must pass before proceeding to lint and commit. This ensures code quality and prevents broken code from being committed. If tests fail due to issues introduced by the code review fixes, iterate on the fixes until tests pass.

From fd0d348f2e7ad76f49a1eeb76483308f276a73ac Mon Sep 17 00:00:00 2001
From: Noah Horton <noah@unsupervised.com>
Date: Wed, 4 Feb 2026 10:11:22 -0700
Subject: [PATCH 08/45] chore: Remove dead rules_check references from
 docstrings

The rules system was removed in commit 6b3e1a2. This cleans up
stale documentation references to rules_check in hook-related code.

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
---
 src/deepwork/cli/hook.py          | 7 +++----
 src/deepwork/core/hooks_syncer.py | 2 +-
 src/deepwork/hooks/__init__.py    | 9 +++------
 3 files changed, 7 insertions(+), 11 deletions(-)

diff --git a/src/deepwork/cli/hook.py b/src/deepwork/cli/hook.py
index 5182b20a..3e921941 100644
--- a/src/deepwork/cli/hook.py
+++ b/src/deepwork/cli/hook.py
@@ -5,7 +5,6 @@
 deepwork was installed (flake, pipx, uv, etc.).
 
 Usage:
-    deepwork hook rules_check
     deepwork hook <hook_name>
 
 This is meant to be called from hook wrapper scripts (claude_hook.sh, gemini_hook.sh).
@@ -32,14 +31,14 @@ def hook(hook_name: str) -> None:
     """
     Run a DeepWork hook by name.
 
-    HOOK_NAME: Name of the hook to run (e.g., 'rules_check')
+    HOOK_NAME: Name of the hook to run (e.g., 'check_version')
 
     This command imports and runs the hook module from deepwork.hooks.{hook_name}.
     The hook receives stdin input and outputs to stdout, following the hook protocol.
 
     Examples:
-        deepwork hook rules_check
-        echo '{}' | deepwork hook rules_check
+        deepwork hook check_version
+        echo '{}' | deepwork hook my_hook
     """
     try:
         # Import the hook module
diff --git a/src/deepwork/core/hooks_syncer.py b/src/deepwork/core/hooks_syncer.py
index 35a01036..86fb17e4 100644
--- a/src/deepwork/core/hooks_syncer.py
+++ b/src/deepwork/core/hooks_syncer.py
@@ -36,7 +36,7 @@ def get_command(self, project_path: Path) -> str:
         """
         if self.module:
             # Python module - use deepwork hook CLI for portability
-            # Extract hook name from module path (e.g., "deepwork.hooks.rules_check" -> "rules_check")
+            # Extract hook name from module path (e.g., "deepwork.hooks.my_hook" -> "my_hook")
             hook_name = self.module.rsplit(".", 1)[-1]
             return f"deepwork hook {hook_name}"
         elif self.script:
diff --git a/src/deepwork/hooks/__init__.py b/src/deepwork/hooks/__init__.py
index 5e9d8d43..34ab144e 100644
--- a/src/deepwork/hooks/__init__.py
+++ b/src/deepwork/hooks/__init__.py
@@ -1,4 +1,4 @@
-"""DeepWork hooks package for rules enforcement and lifecycle events.
+"""DeepWork hooks package for lifecycle events.
 
 This package provides:
 
@@ -7,9 +7,6 @@
    - claude_hook.sh: Shell wrapper for Claude Code hooks
    - gemini_hook.sh: Shell wrapper for Gemini CLI hooks
 
-2. Hook implementations:
-   - rules_check.py: Evaluates rules on after_agent events
-
 Usage with wrapper system:
     # Register hook in .claude/settings.json:
     {
@@ -17,7 +14,7 @@
         "Stop": [{
           "hooks": [{
             "type": "command",
-            "command": ".deepwork/hooks/claude_hook.sh rules_check"
+            "command": ".deepwork/hooks/claude_hook.sh my_hook"
           }]
         }]
       }
@@ -29,7 +26,7 @@
         "AfterAgent": [{
           "hooks": [{
             "type": "command",
-            "command": ".gemini/hooks/gemini_hook.sh rules_check"
+            "command": ".gemini/hooks/gemini_hook.sh my_hook"
           }]
         }]
       }

From 443e13eb3745f033511a08e53fa0b9e13c90c858 Mon Sep 17 00:00:00 2001
From: Noah Horton <noah@unsupervised.com>
Date: Wed, 4 Feb 2026 11:25:45 -0700
Subject: [PATCH 09/45] async

---
 .deepwork/jobs/deepwork_jobs/job.yml          |  29 +-
 .../deepwork_jobs/steps/review_job_spec.md    | 208 -----
 pyproject.toml                                |   5 +
 src/deepwork/mcp/quality_gate.py              |  53 +-
 src/deepwork/mcp/server.py                    |   8 +-
 src/deepwork/mcp/state.py                     | 212 +++--
 src/deepwork/mcp/tools.py                     |  20 +-
 .../standard_jobs/deepwork_jobs/job.yml       |  29 +-
 .../deepwork_jobs/steps/review_job_spec.md    | 208 -----
 tests/conftest.py                             |   8 -
 tests/e2e/test_claude_code_integration.py     |  10 +-
 tests/integration/test_fruits_workflow.py     | 198 ----
 tests/integration/test_full_workflow.py       | 153 ----
 .../integration/test_install_requirements.py  | 336 -------
 .../test_quality_gate_integration.py          |  52 +-
 tests/shell_script_tests/README.md            |  76 --
 tests/shell_script_tests/__init__.py          |   1 -
 tests/shell_script_tests/conftest.py          |  85 --
 .../shell_script_tests/test_check_version.py  | 412 ---------
 tests/shell_script_tests/test_hooks.py        | 398 --------
 tests/shell_script_tests/test_make_new_job.py | 313 -------
 tests/unit/mcp/test_async_interface.py        | 126 +++
 tests/unit/mcp/test_quality_gate.py           |  28 +-
 tests/unit/mcp/test_state.py                  |  80 +-
 tests/unit/mcp/test_tools.py                  |  56 +-
 tests/unit/test_adapters.py                   | 529 -----------
 tests/unit/test_detector.py                   |  98 --
 tests/unit/test_generator.py                  | 547 -----------
 tests/unit/test_hooks_syncer.py               | 367 --------
 tests/unit/test_stop_hooks.py                 | 860 ------------------
 uv.lock                                       |  37 +
 31 files changed, 457 insertions(+), 5085 deletions(-)
 delete mode 100644 .deepwork/jobs/deepwork_jobs/steps/review_job_spec.md
 delete mode 100644 src/deepwork/standard_jobs/deepwork_jobs/steps/review_job_spec.md
 delete mode 100644 tests/integration/test_fruits_workflow.py
 delete mode 100644 tests/integration/test_full_workflow.py
 delete mode 100644 tests/integration/test_install_requirements.py
 delete mode 100644 tests/shell_script_tests/README.md
 delete mode 100644 tests/shell_script_tests/__init__.py
 delete mode 100644 tests/shell_script_tests/conftest.py
 delete mode 100644 tests/shell_script_tests/test_check_version.py
 delete mode 100644 tests/shell_script_tests/test_hooks.py
 delete mode 100644 tests/shell_script_tests/test_make_new_job.py
 create mode 100644 tests/unit/mcp/test_async_interface.py
 delete mode 100644 tests/unit/test_adapters.py
 delete mode 100644 tests/unit/test_detector.py
 delete mode 100644 tests/unit/test_generator.py
 delete mode 100644 tests/unit/test_hooks_syncer.py
 delete mode 100644 tests/unit/test_stop_hooks.py

diff --git a/.deepwork/jobs/deepwork_jobs/job.yml b/.deepwork/jobs/deepwork_jobs/job.yml
index 5ee6bf7d..4b58cb47 100644
--- a/.deepwork/jobs/deepwork_jobs/job.yml
+++ b/.deepwork/jobs/deepwork_jobs/job.yml
@@ -16,13 +16,14 @@ description: |
 
 workflows:
   - name: new_job
-    summary: "Create a new DeepWork job from scratch through definition, review, and implementation"
+    summary: "Create a new DeepWork job from scratch through definition and implementation"
     steps:
       - define
-      - review_job_spec
       - implement
 
 changelog:
+  - version: "1.1.0"
+    changes: "Removed review_job_spec step from new_job workflow; implement now follows directly from define"
   - version: "1.0.0"
     changes: "Added workflows section to distinguish new_job workflow (define→review_job_spec→implement) from standalone learn skill"
   - version: "0.1.0"
@@ -56,35 +57,17 @@ steps:
       - file: job.yml
         doc_spec: .deepwork/doc_specs/job_spec.md
     dependencies: []
-  - id: review_job_spec
-    name: "Review Job Specification"
-    description: "Reviews job.yml against quality criteria using a sub-agent for unbiased validation. Use after defining a job specification."
-    instructions_file: steps/review_job_spec.md
-    inputs:
-      - file: job.yml
-        from_step: define
-    outputs:
-      - file: job.yml
-        doc_spec: .deepwork/doc_specs/job_spec.md
-    dependencies:
-      - define
-    quality_criteria:
-      - "**Sub-Agent Used**: Was a sub-agent spawned to provide unbiased review?"
-      - "**All doc spec Criteria Evaluated**: Did the sub-agent assess all 9 quality criteria?"
-      - "**Findings Addressed**: Were all failed criteria addressed by the main agent?"
-      - "**Validation Loop Complete**: Did the review-fix cycle continue until all criteria passed?"
-
   - id: implement
     name: "Implement Job Steps"
-    description: "Generates step instruction files and syncs slash commands from the job.yml specification. Use after job spec review passes."
+    description: "Generates step instruction files and syncs slash commands from the job.yml specification. Use after defining a job."
     instructions_file: steps/implement.md
     inputs:
       - file: job.yml
-        from_step: review_job_spec
+        from_step: define
     outputs:
       - steps/
     dependencies:
-      - review_job_spec
+      - define
     quality_criteria:
       - "**Directory Structure**: Is `.deepwork/jobs/[job_name]/` created correctly?"
       - "**Complete Instructions**: Are ALL step instruction files complete (not stubs or placeholders)?"
diff --git a/.deepwork/jobs/deepwork_jobs/steps/review_job_spec.md b/.deepwork/jobs/deepwork_jobs/steps/review_job_spec.md
deleted file mode 100644
index fcc0ae9c..00000000
--- a/.deepwork/jobs/deepwork_jobs/steps/review_job_spec.md
+++ /dev/null
@@ -1,208 +0,0 @@
-# Review Job Specification
-
-## Objective
-
-Review the `job.yml` created in the define step against the doc spec quality criteria using a sub-agent for unbiased evaluation, then iterate on fixes until all criteria pass.
-
-## Why This Step Exists
-
-The define step focuses on understanding user requirements and creating a job specification. This review step ensures the specification meets quality standards before implementation. Using a sub-agent provides an unbiased "fresh eyes" review that catches issues the main agent might miss after being deeply involved in the definition process.
-
-## Task
-
-Use a sub-agent to review the job.yml against all 9 doc spec quality criteria, then fix any failed criteria. Repeat until all criteria pass.
-
-### Step 1: Read the Job Specification
-
-Read the `job.yml` file created in the define step:
-
-```
-.deepwork/jobs/[job_name]/job.yml
-```
-
-Also read the doc spec for reference:
-
-```
-.deepwork/doc_specs/job_spec.md
-```
-
-### Step 2: Spawn Review Sub-Agent
-
-Use the Task tool to spawn a sub-agent that will provide an unbiased review:
-
-```
-Task tool parameters:
-- subagent_type: "general-purpose"
-- model: "haiku"
-- description: "Review job.yml against doc spec"
-- prompt: [see below]
-```
-
-**Sub-agent prompt template:**
-
-```
-Review this job.yml against the following 9 quality criteria from the doc spec.
-
-For each criterion, respond with:
-- PASS or FAIL
-- If FAIL: specific issue and suggested fix
-
-## job.yml Content
-
-[paste the full job.yml content here]
-
-## Quality Criteria
-
-1. **Valid Identifier**: Job name must be lowercase with underscores, no spaces or special characters (e.g., `competitive_research`, `monthly_report`)
-
-2. **Semantic Version**: Version must follow semantic versioning format X.Y.Z (e.g., `1.0.0`, `2.1.3`)
-
-3. **Concise Summary**: Summary must be under 200 characters and clearly describe what the job accomplishes
-
-4. **Rich Description**: Description must be multi-line and explain: the problem solved, the process, expected outcomes, and target users
-
-5. **Changelog Present**: Must include a changelog array with at least the initial version entry
-
-6. **Complete Steps**: Each step must have: id (lowercase_underscores), name, description, instructions_file, outputs (at least one), and dependencies array
-
-7. **Valid Dependencies**: Dependencies must reference existing step IDs with no circular references
-
-8. **Input Consistency**: File inputs with `from_step` must reference a step that is in the dependencies array
-
-9. **Output Paths**: Outputs must be valid filenames or paths (e.g., `report.md` or `reports/analysis.md`)
-
-## Response Format
-
-Respond with a structured evaluation:
-
-### Overall: [X/9 PASS]
-
-### Criterion Results
-
-1. Valid Identifier: [PASS/FAIL]
-   [If FAIL: Issue and fix]
-
-2. Semantic Version: [PASS/FAIL]
-   [If FAIL: Issue and fix]
-
-[... continue for all 9 criteria ...]
-
-### Summary of Required Fixes
-
-[List any fixes needed, or "No fixes required - all criteria pass"]
-```
-
-### Step 3: Review Sub-Agent Findings
-
-Parse the sub-agent's response:
-
-1. **Count passing criteria** - How many of the 9 criteria passed?
-2. **Identify failures** - List specific criteria that failed
-3. **Note suggested fixes** - What changes does the sub-agent recommend?
-
-### Step 4: Fix Failed Criteria
-
-For each failed criterion, edit the job.yml to address the issue:
-
-**Common fixes by criterion:**
-
-| Criterion | Common Issue | Fix |
-|-----------|-------------|-----|
-| Valid Identifier | Spaces or uppercase | Convert to lowercase_underscores |
-| Semantic Version | Missing or invalid format | Set to `"1.0.0"` or fix format |
-| Concise Summary | Too long or vague | Shorten to <200 chars, be specific |
-| Rich Description | Single line or missing context | Add multi-line explanation with problem/process/outcome/users |
-| Changelog Present | Missing changelog | Add `changelog:` with initial version entry |
-| Complete Steps | Missing required fields | Add id, name, description, instructions_file, outputs, dependencies |
-| Valid Dependencies | Non-existent step or circular | Fix step ID reference or reorder dependencies |
-| Input Consistency | from_step not in dependencies | Add the referenced step to dependencies array |
-| Output Paths | Invalid characters or format | Use valid filename/path format |
-
-### Step 5: Re-Run Review (If Needed)
-
-If any criteria failed:
-
-1. **Spawn a new sub-agent** with the updated job.yml content
-2. **Review the new findings**
-3. **Fix any remaining issues**
-4. **Repeat until all 9 criteria pass**
-
-### Step 6: Confirm Completion
-
-When all 9 criteria pass:
-
-1. **Announce success**: "All 9 doc spec quality criteria pass."
-2. **List what was validated**:
-   - Valid Identifier
-   - Semantic Version
-   - Concise Summary
-   - Rich Description
-   - Changelog Present
-   - Complete Steps
-   - Valid Dependencies
-   - Input Consistency
-   - Output Paths
-3. **Include the promise**: `<promise>Quality Criteria Met</promise>`
-4. **Guide to next step**: "Run `/deepwork_jobs.implement` to generate the step instruction files."
-
-## Example Review Cycle
-
-```
-Claude: Let me review the job.yml against the doc spec quality criteria.
-
-[Spawns sub-agent with haiku model]
-
-Sub-agent response:
-### Overall: 7/9 PASS
-
-### Criterion Results
-1. Valid Identifier: PASS
-2. Semantic Version: PASS
-3. Concise Summary: FAIL
-   Issue: Summary is 245 characters, exceeds 200 char limit
-   Fix: Shorten to "Competitive analysis workflow for product positioning"
-4. Rich Description: PASS
-5. Changelog Present: FAIL
-   Issue: No changelog array present
-   Fix: Add changelog with initial version entry
-6. Complete Steps: PASS
-7. Valid Dependencies: PASS
-8. Input Consistency: PASS
-9. Output Paths: PASS
-
-### Summary of Required Fixes
-1. Shorten summary to under 200 characters
-2. Add changelog array with initial version
-
-Claude: I found 2 issues. Let me fix them.
-
-[Edits job.yml to shorten summary and add changelog]
-
-Claude: Let me verify the fixes with another review.
-
-[Spawns new sub-agent]
-
-Sub-agent response:
-### Overall: 9/9 PASS
-
-### Summary of Required Fixes
-No fixes required - all criteria pass
-
-Claude: All 9 doc spec quality criteria pass.
-
-<promise>Quality Criteria Met</promise>
-
-**Next step:** Run `/deepwork_jobs.implement` to generate the step instruction files.
-```
-
-## Quality Criteria
-
-- **Sub-Agent Used**: A sub-agent was spawned to provide unbiased review (not just self-review)
-- **All doc spec Criteria Evaluated**: The sub-agent assessed all 9 quality criteria from the doc spec
-- **Findings Addressed**: All failed criteria were fixed by the main agent
-- **Validation Loop Complete**: The review-fix cycle continued until all criteria passed
-- **Promise Included**: The response includes `<promise>Quality Criteria Met</promise>` when complete
-
-## Output
-
-The validated `job.yml` file at `.deepwork/jobs/[job_name]/job.yml` that passes all 9 doc spec quality criteria.
diff --git a/pyproject.toml b/pyproject.toml
index f5d4bbd9..9e88c3a7 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -29,6 +29,7 @@ dependencies = [
     "fastmcp>=2.0",
     "pydantic>=2.0",
     "mcp>=1.0.0",
+    "aiofiles>=24.0.0",
 ]
 
 [project.optional-dependencies]
@@ -36,9 +37,11 @@ dev = [
     "pytest>=7.0",
     "pytest-mock>=3.10",
     "pytest-cov>=4.0",
+    "pytest-asyncio>=0.24.0",
     "ruff>=0.1.0",
     "mypy>=1.0",
     "types-PyYAML",
+    "types-aiofiles",
 ]
 
 [project.scripts]
@@ -68,6 +71,8 @@ addopts = [
     "--strict-config",
     "--showlocals",
 ]
+asyncio_mode = "auto"
+asyncio_default_fixture_loop_scope = "function"
 
 [tool.coverage.run]
 source = ["deepwork"]
diff --git a/src/deepwork/mcp/quality_gate.py b/src/deepwork/mcp/quality_gate.py
index f0ee7f43..3eab3ebc 100644
--- a/src/deepwork/mcp/quality_gate.py
+++ b/src/deepwork/mcp/quality_gate.py
@@ -6,12 +6,13 @@
 
 from __future__ import annotations
 
+import asyncio
 import json
 import shlex
-import subprocess
 from pathlib import Path
 from typing import Any
 
+import aiofiles
 import jsonschema
 
 from deepwork.mcp.schemas import QualityCriteriaResult, QualityGateResult
@@ -111,7 +112,7 @@ def _build_instructions(self, quality_criteria: list[str]) -> str:
 - Provide specific, actionable feedback for failed criteria
 - The overall "passed" should be true only if ALL criteria pass"""
 
-    def _build_payload(
+    async def _build_payload(
         self,
         outputs: list[str],
         project_root: Path,
@@ -133,7 +134,8 @@ def _build_payload(
 
             if full_path.exists():
                 try:
-                    content = full_path.read_text(encoding="utf-8")
+                    async with aiofiles.open(full_path, encoding="utf-8") as f:
+                        content = await f.read()
                     output_sections.append(f"{header}\n{content}")
                 except Exception as e:
                     output_sections.append(f"{header}\n[Error reading file: {e}]")
@@ -210,7 +212,7 @@ def _parse_response(
                 f"Response was: {response_text[:500]}..."
             ) from e
 
-    def evaluate(
+    async def evaluate(
         self,
         quality_criteria: list[str],
         outputs: list[str],
@@ -239,7 +241,7 @@ def evaluate(
 
         # Build system instructions and payload separately
         instructions = self._build_instructions(quality_criteria)
-        payload = self._build_payload(outputs, project_root)
+        payload = await self._build_payload(outputs, project_root)
 
         # Build command with system prompt flag
         # Parse the base command properly to handle quoted arguments
@@ -248,28 +250,35 @@ def evaluate(
         full_cmd = base_cmd + ["-s", instructions]
 
         try:
-            # Run review agent with system prompt and payload
-            result = subprocess.run(
-                full_cmd,
-                input=payload,
-                capture_output=True,
-                text=True,
-                timeout=self.timeout,
+            # Run review agent with system prompt and payload using async subprocess
+            process = await asyncio.create_subprocess_exec(
+                *full_cmd,
+                stdin=asyncio.subprocess.PIPE,
+                stdout=asyncio.subprocess.PIPE,
+                stderr=asyncio.subprocess.PIPE,
                 cwd=str(project_root),
             )
 
-            if result.returncode != 0:
+            try:
+                stdout, stderr = await asyncio.wait_for(
+                    process.communicate(input=payload.encode()),
+                    timeout=self.timeout,
+                )
+            except asyncio.TimeoutError:
+                process.kill()
+                await process.wait()
+                raise QualityGateError(
+                    f"Review agent timed out after {self.timeout} seconds"
+                )
+
+            if process.returncode != 0:
                 raise QualityGateError(
-                    f"Review agent failed with exit code {result.returncode}:\n"
-                    f"stderr: {result.stderr}"
+                    f"Review agent failed with exit code {process.returncode}:\n"
+                    f"stderr: {stderr.decode()}"
                 )
 
-            return self._parse_response(result.stdout)
+            return self._parse_response(stdout.decode())
 
-        except subprocess.TimeoutExpired as e:
-            raise QualityGateError(
-                f"Review agent timed out after {self.timeout} seconds"
-            ) from e
         except FileNotFoundError as e:
             raise QualityGateError(
                 f"Review agent command not found: {base_cmd[0]}"
@@ -292,9 +301,9 @@ def __init__(self, should_pass: bool = True, feedback: str = "Mock evaluation"):
         super().__init__()
         self.should_pass = should_pass
         self.feedback = feedback
-        self.evaluations: list[dict] = []
+        self.evaluations: list[dict[str, Any]] = []
 
-    def evaluate(
+    async def evaluate(
         self,
         quality_criteria: list[str],
         outputs: list[str],
diff --git a/src/deepwork/mcp/server.py b/src/deepwork/mcp/server.py
index fde5e606..2ec87212 100644
--- a/src/deepwork/mcp/server.py
+++ b/src/deepwork/mcp/server.py
@@ -97,7 +97,7 @@ def get_workflows() -> dict[str, Any]:
             "Optional: instance_id for naming (e.g., 'acme', 'q1-2026')."
         )
     )
-    def start_workflow(
+    async def start_workflow(
         goal: str,
         job_name: str,
         workflow_name: str,
@@ -110,7 +110,7 @@ def start_workflow(
             workflow_name=workflow_name,
             instance_id=instance_id,
         )
-        response = tools.start_workflow(input_data)
+        response = await tools.start_workflow(input_data)
         return response.model_dump()
 
     @mcp.tool(
@@ -126,7 +126,7 @@ def start_workflow(
             "Optional: quality_review_override_reason to skip quality review (must explain why)."
         )
     )
-    def finished_step(
+    async def finished_step(
         outputs: list[str],
         notes: str | None = None,
         quality_review_override_reason: str | None = None,
@@ -137,7 +137,7 @@ def finished_step(
             notes=notes,
             quality_review_override_reason=quality_review_override_reason,
         )
-        response = tools.finished_step(input_data)
+        response = await tools.finished_step(input_data)
         return response.model_dump()
 
     return mcp
diff --git a/src/deepwork/mcp/state.py b/src/deepwork/mcp/state.py
index 160283e0..ca187d13 100644
--- a/src/deepwork/mcp/state.py
+++ b/src/deepwork/mcp/state.py
@@ -6,11 +6,14 @@
 
 from __future__ import annotations
 
+import asyncio
 import json
 import uuid
 from datetime import UTC, datetime
 from pathlib import Path
 
+import aiofiles
+
 from deepwork.mcp.schemas import StepProgress, WorkflowSession
 
 
@@ -27,6 +30,9 @@ class StateManager:
     - Transparency: Users can inspect session state
     - Recovery: Sessions survive server restarts
     - Debugging: State history is preserved
+
+    This implementation is async-safe and uses a lock to prevent
+    concurrent access issues.
     """
 
     def __init__(self, project_root: Path):
@@ -38,6 +44,7 @@ def __init__(self, project_root: Path):
         self.project_root = project_root
         self.sessions_dir = project_root / ".deepwork" / "tmp"
         self._active_session: WorkflowSession | None = None
+        self._lock = asyncio.Lock()
 
     def _ensure_sessions_dir(self) -> None:
         """Ensure the sessions directory exists."""
@@ -62,7 +69,7 @@ def _generate_branch_name(
         instance = instance_id or date_str
         return f"deepwork/{job_name}-{workflow_name}-{instance}"
 
-    def create_session(
+    async def create_session(
         self,
         job_name: str,
         workflow_name: str,
@@ -82,38 +89,45 @@ def create_session(
         Returns:
             New WorkflowSession
         """
-        self._ensure_sessions_dir()
+        async with self._lock:
+            self._ensure_sessions_dir()
+
+            session_id = self._generate_session_id()
+            branch_name = self._generate_branch_name(job_name, workflow_name, instance_id)
+            now = datetime.now(UTC).isoformat()
+
+            session = WorkflowSession(
+                session_id=session_id,
+                job_name=job_name,
+                workflow_name=workflow_name,
+                instance_id=instance_id,
+                goal=goal,
+                branch_name=branch_name,
+                current_step_id=first_step_id,
+                current_entry_index=0,
+                step_progress={},
+                started_at=now,
+                status="active",
+            )
 
-        session_id = self._generate_session_id()
-        branch_name = self._generate_branch_name(job_name, workflow_name, instance_id)
-        now = datetime.now(UTC).isoformat()
-
-        session = WorkflowSession(
-            session_id=session_id,
-            job_name=job_name,
-            workflow_name=workflow_name,
-            instance_id=instance_id,
-            goal=goal,
-            branch_name=branch_name,
-            current_step_id=first_step_id,
-            current_entry_index=0,
-            step_progress={},
-            started_at=now,
-            status="active",
-        )
-
-        self._save_session(session)
-        self._active_session = session
-        return session
-
-    def _save_session(self, session: WorkflowSession) -> None:
-        """Save session to file."""
+            await self._save_session_unlocked(session)
+            self._active_session = session
+            return session
+
+    async def _save_session_unlocked(self, session: WorkflowSession) -> None:
+        """Save session to file (must be called with lock held)."""
         self._ensure_sessions_dir()
         session_file = self._session_file(session.session_id)
-        with open(session_file, "w", encoding="utf-8") as f:
-            json.dump(session.to_dict(), f, indent=2)
+        content = json.dumps(session.to_dict(), indent=2)
+        async with aiofiles.open(session_file, "w", encoding="utf-8") as f:
+            await f.write(content)
+
+    async def _save_session(self, session: WorkflowSession) -> None:
+        """Save session to file with lock."""
+        async with self._lock:
+            await self._save_session_unlocked(session)
 
-    def load_session(self, session_id: str) -> WorkflowSession:
+    async def load_session(self, session_id: str) -> WorkflowSession:
         """Load a session from file.
 
         Args:
@@ -125,16 +139,18 @@ def load_session(self, session_id: str) -> WorkflowSession:
         Raises:
             StateError: If session not found
         """
-        session_file = self._session_file(session_id)
-        if not session_file.exists():
-            raise StateError(f"Session not found: {session_id}")
+        async with self._lock:
+            session_file = self._session_file(session_id)
+            if not session_file.exists():
+                raise StateError(f"Session not found: {session_id}")
 
-        with open(session_file, encoding="utf-8") as f:
-            data = json.load(f)
+            async with aiofiles.open(session_file, encoding="utf-8") as f:
+                content = await f.read()
+                data = json.loads(content)
 
-        session = WorkflowSession.from_dict(data)
-        self._active_session = session
-        return session
+            session = WorkflowSession.from_dict(data)
+            self._active_session = session
+            return session
 
     def get_active_session(self) -> WorkflowSession | None:
         """Get the currently active session.
@@ -159,7 +175,7 @@ def require_active_session(self) -> WorkflowSession:
             )
         return self._active_session
 
-    def start_step(self, step_id: str) -> None:
+    async def start_step(self, step_id: str) -> None:
         """Mark a step as started.
 
         Args:
@@ -168,21 +184,22 @@ def start_step(self, step_id: str) -> None:
         Raises:
             StateError: If no active session
         """
-        session = self.require_active_session()
-        now = datetime.now(UTC).isoformat()
-
-        if step_id not in session.step_progress:
-            session.step_progress[step_id] = StepProgress(
-                step_id=step_id,
-                started_at=now,
-            )
-        else:
-            session.step_progress[step_id].started_at = now
-
-        session.current_step_id = step_id
-        self._save_session(session)
-
-    def complete_step(
+        async with self._lock:
+            session = self.require_active_session()
+            now = datetime.now(UTC).isoformat()
+
+            if step_id not in session.step_progress:
+                session.step_progress[step_id] = StepProgress(
+                    step_id=step_id,
+                    started_at=now,
+                )
+            else:
+                session.step_progress[step_id].started_at = now
+
+            session.current_step_id = step_id
+            await self._save_session_unlocked(session)
+
+    async def complete_step(
         self, step_id: str, outputs: list[str], notes: str | None = None
     ) -> None:
         """Mark a step as completed.
@@ -195,23 +212,24 @@ def complete_step(
         Raises:
             StateError: If no active session
         """
-        session = self.require_active_session()
-        now = datetime.now(UTC).isoformat()
+        async with self._lock:
+            session = self.require_active_session()
+            now = datetime.now(UTC).isoformat()
 
-        if step_id not in session.step_progress:
-            session.step_progress[step_id] = StepProgress(
-                step_id=step_id,
-                started_at=now,
-            )
+            if step_id not in session.step_progress:
+                session.step_progress[step_id] = StepProgress(
+                    step_id=step_id,
+                    started_at=now,
+                )
 
-        progress = session.step_progress[step_id]
-        progress.completed_at = now
-        progress.outputs = outputs
-        progress.notes = notes
+            progress = session.step_progress[step_id]
+            progress.completed_at = now
+            progress.outputs = outputs
+            progress.notes = notes
 
-        self._save_session(session)
+            await self._save_session_unlocked(session)
 
-    def record_quality_attempt(self, step_id: str) -> int:
+    async def record_quality_attempt(self, step_id: str) -> int:
         """Record a quality gate attempt for a step.
 
         Args:
@@ -223,17 +241,18 @@ def record_quality_attempt(self, step_id: str) -> int:
         Raises:
             StateError: If no active session
         """
-        session = self.require_active_session()
+        async with self._lock:
+            session = self.require_active_session()
 
-        if step_id not in session.step_progress:
-            session.step_progress[step_id] = StepProgress(step_id=step_id)
+            if step_id not in session.step_progress:
+                session.step_progress[step_id] = StepProgress(step_id=step_id)
 
-        session.step_progress[step_id].quality_attempts += 1
-        self._save_session(session)
+            session.step_progress[step_id].quality_attempts += 1
+            await self._save_session_unlocked(session)
 
-        return session.step_progress[step_id].quality_attempts
+            return session.step_progress[step_id].quality_attempts
 
-    def advance_to_step(self, step_id: str, entry_index: int) -> None:
+    async def advance_to_step(self, step_id: str, entry_index: int) -> None:
         """Advance the session to a new step.
 
         Args:
@@ -243,22 +262,24 @@ def advance_to_step(self, step_id: str, entry_index: int) -> None:
         Raises:
             StateError: If no active session
         """
-        session = self.require_active_session()
-        session.current_step_id = step_id
-        session.current_entry_index = entry_index
-        self._save_session(session)
+        async with self._lock:
+            session = self.require_active_session()
+            session.current_step_id = step_id
+            session.current_entry_index = entry_index
+            await self._save_session_unlocked(session)
 
-    def complete_workflow(self) -> None:
+    async def complete_workflow(self) -> None:
         """Mark the workflow as complete.
 
         Raises:
             StateError: If no active session
         """
-        session = self.require_active_session()
-        now = datetime.now(UTC).isoformat()
-        session.completed_at = now
-        session.status = "completed"
-        self._save_session(session)
+        async with self._lock:
+            session = self.require_active_session()
+            now = datetime.now(UTC).isoformat()
+            session.completed_at = now
+            session.status = "completed"
+            await self._save_session_unlocked(session)
 
     def get_all_outputs(self) -> list[str]:
         """Get all outputs from all completed steps.
@@ -275,7 +296,7 @@ def get_all_outputs(self) -> list[str]:
             outputs.extend(progress.outputs)
         return outputs
 
-    def list_sessions(self) -> list[WorkflowSession]:
+    async def list_sessions(self) -> list[WorkflowSession]:
         """List all saved sessions.
 
         Returns:
@@ -287,8 +308,9 @@ def list_sessions(self) -> list[WorkflowSession]:
         sessions = []
         for session_file in self.sessions_dir.glob("session_*.json"):
             try:
-                with open(session_file, encoding="utf-8") as f:
-                    data = json.load(f)
+                async with aiofiles.open(session_file, encoding="utf-8") as f:
+                    content = await f.read()
+                    data = json.loads(content)
                 sessions.append(WorkflowSession.from_dict(data))
             except (json.JSONDecodeError, ValueError):
                 # Skip corrupted files
@@ -296,7 +318,7 @@ def list_sessions(self) -> list[WorkflowSession]:
 
         return sorted(sessions, key=lambda s: s.started_at, reverse=True)
 
-    def find_active_sessions_for_workflow(
+    async def find_active_sessions_for_workflow(
         self, job_name: str, workflow_name: str
     ) -> list[WorkflowSession]:
         """Find active sessions for a specific workflow.
@@ -308,23 +330,25 @@ def find_active_sessions_for_workflow(
         Returns:
             List of active sessions matching the criteria
         """
+        all_sessions = await self.list_sessions()
         return [
             s
-            for s in self.list_sessions()
+            for s in all_sessions
             if s.job_name == job_name
             and s.workflow_name == workflow_name
             and s.status == "active"
         ]
 
-    def delete_session(self, session_id: str) -> None:
+    async def delete_session(self, session_id: str) -> None:
         """Delete a session file.
 
         Args:
             session_id: Session ID to delete
         """
-        session_file = self._session_file(session_id)
-        if session_file.exists():
-            session_file.unlink()
+        async with self._lock:
+            session_file = self._session_file(session_id)
+            if session_file.exists():
+                session_file.unlink()
 
-        if self._active_session and self._active_session.session_id == session_id:
-            self._active_session = None
+            if self._active_session and self._active_session.session_id == session_id:
+                self._active_session = None
diff --git a/src/deepwork/mcp/tools.py b/src/deepwork/mcp/tools.py
index 29265070..43024ce1 100644
--- a/src/deepwork/mcp/tools.py
+++ b/src/deepwork/mcp/tools.py
@@ -209,7 +209,7 @@ def get_workflows(self) -> GetWorkflowsResponse:
 
         return GetWorkflowsResponse(jobs=job_infos)
 
-    def start_workflow(self, input_data: StartWorkflowInput) -> StartWorkflowResponse:
+    async def start_workflow(self, input_data: StartWorkflowInput) -> StartWorkflowResponse:
         """Start a new workflow session.
 
         Args:
@@ -234,7 +234,7 @@ def start_workflow(self, input_data: StartWorkflowInput) -> StartWorkflowRespons
             raise ToolError(f"First step not found: {first_step_id}")
 
         # Create session
-        session = self.state_manager.create_session(
+        session = await self.state_manager.create_session(
             job_name=input_data.job_name,
             workflow_name=input_data.workflow_name,
             goal=input_data.goal,
@@ -243,7 +243,7 @@ def start_workflow(self, input_data: StartWorkflowInput) -> StartWorkflowRespons
         )
 
         # Mark first step as started
-        self.state_manager.start_step(first_step_id)
+        await self.state_manager.start_step(first_step_id)
 
         # Get step instructions
         instructions = self._get_step_instructions(job, first_step_id)
@@ -262,7 +262,7 @@ def start_workflow(self, input_data: StartWorkflowInput) -> StartWorkflowRespons
             )
         )
 
-    def finished_step(self, input_data: FinishedStepInput) -> FinishedStepResponse:
+    async def finished_step(self, input_data: FinishedStepInput) -> FinishedStepResponse:
         """Report step completion and get next instructions.
 
         Args:
@@ -292,9 +292,9 @@ def finished_step(self, input_data: FinishedStepInput) -> FinishedStepResponse:
             and current_step.quality_criteria
             and not input_data.quality_review_override_reason
         ):
-            attempts = self.state_manager.record_quality_attempt(current_step_id)
+            attempts = await self.state_manager.record_quality_attempt(current_step_id)
 
-            result = self.quality_gate.evaluate(
+            result = await self.quality_gate.evaluate(
                 quality_criteria=current_step.quality_criteria,
                 outputs=input_data.outputs,
                 project_root=self.project_root,
@@ -319,7 +319,7 @@ def finished_step(self, input_data: FinishedStepInput) -> FinishedStepResponse:
                 )
 
         # Mark step as completed
-        self.state_manager.complete_step(
+        await self.state_manager.complete_step(
             step_id=current_step_id,
             outputs=input_data.outputs,
             notes=input_data.notes,
@@ -331,7 +331,7 @@ def finished_step(self, input_data: FinishedStepInput) -> FinishedStepResponse:
 
         if next_entry_index >= len(workflow.step_entries):
             # Workflow complete
-            self.state_manager.complete_workflow()
+            await self.state_manager.complete_workflow()
             all_outputs = self.state_manager.get_all_outputs()
 
             return FinishedStepResponse(
@@ -352,8 +352,8 @@ def finished_step(self, input_data: FinishedStepInput) -> FinishedStepResponse:
             raise ToolError(f"Next step not found: {next_step_id}")
 
         # Advance session
-        self.state_manager.advance_to_step(next_step_id, next_entry_index)
-        self.state_manager.start_step(next_step_id)
+        await self.state_manager.advance_to_step(next_step_id, next_entry_index)
+        await self.state_manager.start_step(next_step_id)
 
         # Get instructions
         instructions = self._get_step_instructions(job, next_step_id)
diff --git a/src/deepwork/standard_jobs/deepwork_jobs/job.yml b/src/deepwork/standard_jobs/deepwork_jobs/job.yml
index 5ee6bf7d..4b58cb47 100644
--- a/src/deepwork/standard_jobs/deepwork_jobs/job.yml
+++ b/src/deepwork/standard_jobs/deepwork_jobs/job.yml
@@ -16,13 +16,14 @@ description: |
 
 workflows:
   - name: new_job
-    summary: "Create a new DeepWork job from scratch through definition, review, and implementation"
+    summary: "Create a new DeepWork job from scratch through definition and implementation"
     steps:
       - define
-      - review_job_spec
       - implement
 
 changelog:
+  - version: "1.1.0"
+    changes: "Removed review_job_spec step from new_job workflow; implement now follows directly from define"
   - version: "1.0.0"
     changes: "Added workflows section to distinguish new_job workflow (define→review_job_spec→implement) from standalone learn skill"
   - version: "0.1.0"
@@ -56,35 +57,17 @@ steps:
       - file: job.yml
         doc_spec: .deepwork/doc_specs/job_spec.md
     dependencies: []
-  - id: review_job_spec
-    name: "Review Job Specification"
-    description: "Reviews job.yml against quality criteria using a sub-agent for unbiased validation. Use after defining a job specification."
-    instructions_file: steps/review_job_spec.md
-    inputs:
-      - file: job.yml
-        from_step: define
-    outputs:
-      - file: job.yml
-        doc_spec: .deepwork/doc_specs/job_spec.md
-    dependencies:
-      - define
-    quality_criteria:
-      - "**Sub-Agent Used**: Was a sub-agent spawned to provide unbiased review?"
-      - "**All doc spec Criteria Evaluated**: Did the sub-agent assess all 9 quality criteria?"
-      - "**Findings Addressed**: Were all failed criteria addressed by the main agent?"
-      - "**Validation Loop Complete**: Did the review-fix cycle continue until all criteria passed?"
-
   - id: implement
     name: "Implement Job Steps"
-    description: "Generates step instruction files and syncs slash commands from the job.yml specification. Use after job spec review passes."
+    description: "Generates step instruction files and syncs slash commands from the job.yml specification. Use after defining a job."
     instructions_file: steps/implement.md
     inputs:
       - file: job.yml
-        from_step: review_job_spec
+        from_step: define
     outputs:
       - steps/
     dependencies:
-      - review_job_spec
+      - define
     quality_criteria:
       - "**Directory Structure**: Is `.deepwork/jobs/[job_name]/` created correctly?"
       - "**Complete Instructions**: Are ALL step instruction files complete (not stubs or placeholders)?"
diff --git a/src/deepwork/standard_jobs/deepwork_jobs/steps/review_job_spec.md b/src/deepwork/standard_jobs/deepwork_jobs/steps/review_job_spec.md
deleted file mode 100644
index fcc0ae9c..00000000
--- a/src/deepwork/standard_jobs/deepwork_jobs/steps/review_job_spec.md
+++ /dev/null
@@ -1,208 +0,0 @@
-# Review Job Specification
-
-## Objective
-
-Review the `job.yml` created in the define step against the doc spec quality criteria using a sub-agent for unbiased evaluation, then iterate on fixes until all criteria pass.
-
-## Why This Step Exists
-
-The define step focuses on understanding user requirements and creating a job specification. This review step ensures the specification meets quality standards before implementation. Using a sub-agent provides an unbiased "fresh eyes" review that catches issues the main agent might miss after being deeply involved in the definition process.
-
-## Task
-
-Use a sub-agent to review the job.yml against all 9 doc spec quality criteria, then fix any failed criteria. Repeat until all criteria pass.
-
-### Step 1: Read the Job Specification
-
-Read the `job.yml` file created in the define step:
-
-```
-.deepwork/jobs/[job_name]/job.yml
-```
-
-Also read the doc spec for reference:
-
-```
-.deepwork/doc_specs/job_spec.md
-```
-
-### Step 2: Spawn Review Sub-Agent
-
-Use the Task tool to spawn a sub-agent that will provide an unbiased review:
-
-```
-Task tool parameters:
-- subagent_type: "general-purpose"
-- model: "haiku"
-- description: "Review job.yml against doc spec"
-- prompt: [see below]
-```
-
-**Sub-agent prompt template:**
-
-```
-Review this job.yml against the following 9 quality criteria from the doc spec.
-
-For each criterion, respond with:
-- PASS or FAIL
-- If FAIL: specific issue and suggested fix
-
-## job.yml Content
-
-[paste the full job.yml content here]
-
-## Quality Criteria
-
-1. **Valid Identifier**: Job name must be lowercase with underscores, no spaces or special characters (e.g., `competitive_research`, `monthly_report`)
-
-2. **Semantic Version**: Version must follow semantic versioning format X.Y.Z (e.g., `1.0.0`, `2.1.3`)
-
-3. **Concise Summary**: Summary must be under 200 characters and clearly describe what the job accomplishes
-
-4. **Rich Description**: Description must be multi-line and explain: the problem solved, the process, expected outcomes, and target users
-
-5. **Changelog Present**: Must include a changelog array with at least the initial version entry
-
-6. **Complete Steps**: Each step must have: id (lowercase_underscores), name, description, instructions_file, outputs (at least one), and dependencies array
-
-7. **Valid Dependencies**: Dependencies must reference existing step IDs with no circular references
-
-8. **Input Consistency**: File inputs with `from_step` must reference a step that is in the dependencies array
-
-9. **Output Paths**: Outputs must be valid filenames or paths (e.g., `report.md` or `reports/analysis.md`)
-
-## Response Format
-
-Respond with a structured evaluation:
-
-### Overall: [X/9 PASS]
-
-### Criterion Results
-
-1. Valid Identifier: [PASS/FAIL]
-   [If FAIL: Issue and fix]
-
-2. Semantic Version: [PASS/FAIL]
-   [If FAIL: Issue and fix]
-
-[... continue for all 9 criteria ...]
-
-### Summary of Required Fixes
-
-[List any fixes needed, or "No fixes required - all criteria pass"]
-```
-
-### Step 3: Review Sub-Agent Findings
-
-Parse the sub-agent's response:
-
-1. **Count passing criteria** - How many of the 9 criteria passed?
-2. **Identify failures** - List specific criteria that failed
-3. **Note suggested fixes** - What changes does the sub-agent recommend?
-
-### Step 4: Fix Failed Criteria
-
-For each failed criterion, edit the job.yml to address the issue:
-
-**Common fixes by criterion:**
-
-| Criterion | Common Issue | Fix |
-|-----------|-------------|-----|
-| Valid Identifier | Spaces or uppercase | Convert to lowercase_underscores |
-| Semantic Version | Missing or invalid format | Set to `"1.0.0"` or fix format |
-| Concise Summary | Too long or vague | Shorten to <200 chars, be specific |
-| Rich Description | Single line or missing context | Add multi-line explanation with problem/process/outcome/users |
-| Changelog Present | Missing changelog | Add `changelog:` with initial version entry |
-| Complete Steps | Missing required fields | Add id, name, description, instructions_file, outputs, dependencies |
-| Valid Dependencies | Non-existent step or circular | Fix step ID reference or reorder dependencies |
-| Input Consistency | from_step not in dependencies | Add the referenced step to dependencies array |
-| Output Paths | Invalid characters or format | Use valid filename/path format |
-
-### Step 5: Re-Run Review (If Needed)
-
-If any criteria failed:
-
-1. **Spawn a new sub-agent** with the updated job.yml content
-2. **Review the new findings**
-3. **Fix any remaining issues**
-4. **Repeat until all 9 criteria pass**
-
-### Step 6: Confirm Completion
-
-When all 9 criteria pass:
-
-1. **Announce success**: "All 9 doc spec quality criteria pass."
-2. **List what was validated**:
-   - Valid Identifier
-   - Semantic Version
-   - Concise Summary
-   - Rich Description
-   - Changelog Present
-   - Complete Steps
-   - Valid Dependencies
-   - Input Consistency
-   - Output Paths
-3. **Include the promise**: `<promise>Quality Criteria Met</promise>`
-4. **Guide to next step**: "Run `/deepwork_jobs.implement` to generate the step instruction files."
-
-## Example Review Cycle
-
-```
-Claude: Let me review the job.yml against the doc spec quality criteria.
-
-[Spawns sub-agent with haiku model]
-
-Sub-agent response:
-### Overall: 7/9 PASS
-
-### Criterion Results
-1. Valid Identifier: PASS
-2. Semantic Version: PASS
-3. Concise Summary: FAIL
-   Issue: Summary is 245 characters, exceeds 200 char limit
-   Fix: Shorten to "Competitive analysis workflow for product positioning"
-4. Rich Description: PASS
-5. Changelog Present: FAIL
-   Issue: No changelog array present
-   Fix: Add changelog with initial version entry
-6. Complete Steps: PASS
-7. Valid Dependencies: PASS
-8. Input Consistency: PASS
-9. Output Paths: PASS
-
-### Summary of Required Fixes
-1. Shorten summary to under 200 characters
-2. Add changelog array with initial version
-
-Claude: I found 2 issues. Let me fix them.
-
-[Edits job.yml to shorten summary and add changelog]
-
-Claude: Let me verify the fixes with another review.
-
-[Spawns new sub-agent]
-
-Sub-agent response:
-### Overall: 9/9 PASS
-
-### Summary of Required Fixes
-No fixes required - all criteria pass
-
-Claude: All 9 doc spec quality criteria pass.
-
-<promise>Quality Criteria Met</promise>
-
-**Next step:** Run `/deepwork_jobs.implement` to generate the step instruction files.
-```
-
-## Quality Criteria
-
-- **Sub-Agent Used**: A sub-agent was spawned to provide unbiased review (not just self-review)
-- **All doc spec Criteria Evaluated**: The sub-agent assessed all 9 quality criteria from the doc spec
-- **Findings Addressed**: All failed criteria were fixed by the main agent
-- **Validation Loop Complete**: The review-fix cycle continued until all criteria passed
-- **Promise Included**: The response includes `<promise>Quality Criteria Met</promise>` when complete
-
-## Output
-
-The validated `job.yml` file at `.deepwork/jobs/[job_name]/job.yml` that passes all 9 doc spec quality criteria.
diff --git a/tests/conftest.py b/tests/conftest.py
index 0f540293..d7a81ed8 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -35,14 +35,6 @@ def mock_claude_project(mock_git_repo: Path) -> Path:
     return mock_git_repo
 
 
-@pytest.fixture
-def mock_gemini_project(mock_git_repo: Path) -> Path:
-    """Create a mock project with Gemini CLI setup."""
-    gemini_dir = mock_git_repo / ".gemini"
-    gemini_dir.mkdir(exist_ok=True)
-    return mock_git_repo
-
-
 @pytest.fixture
 def mock_multi_platform_project(mock_git_repo: Path) -> Path:
     """Create a mock project with multiple AI platforms setup."""
diff --git a/tests/e2e/test_claude_code_integration.py b/tests/e2e/test_claude_code_integration.py
index eaf29704..54a5597d 100644
--- a/tests/e2e/test_claude_code_integration.py
+++ b/tests/e2e/test_claude_code_integration.py
@@ -214,7 +214,7 @@ def test_get_workflows_returns_jobs(self, project_with_job: Path) -> None:
         assert full_workflow.name == "full"
         assert full_workflow.summary is not None
 
-    def test_start_workflow_creates_session(self, project_with_job: Path) -> None:
+    async def test_start_workflow_creates_session(self, project_with_job: Path) -> None:
         """Test that start_workflow creates a new workflow session."""
         state_manager = StateManager(project_with_job)
         tools = WorkflowTools(project_with_job, state_manager)
@@ -236,7 +236,7 @@ def test_start_workflow_creates_session(self, project_with_job: Path) -> None:
             instance_id="test-instance",
         )
 
-        response = tools.start_workflow(input_data)
+        response = await tools.start_workflow(input_data)
 
         # Should return session info
         assert response.begin_step.session_id is not None
@@ -249,7 +249,7 @@ def test_start_workflow_creates_session(self, project_with_job: Path) -> None:
         assert response.begin_step.step_instructions is not None
         assert len(response.begin_step.step_instructions) > 0
 
-    def test_workflow_step_progression(self, project_with_job: Path) -> None:
+    async def test_workflow_step_progression(self, project_with_job: Path) -> None:
         """Test that finished_step progresses through workflow steps."""
         state_manager = StateManager(project_with_job)
         tools = WorkflowTools(project_with_job, state_manager)
@@ -269,7 +269,7 @@ def test_workflow_step_progression(self, project_with_job: Path) -> None:
             job_name="fruits",
             workflow_name=workflow_name,
         )
-        start_response = tools.start_workflow(start_input)
+        start_response = await tools.start_workflow(start_input)
 
         # Create mock output file for first step
         output_file = project_with_job / "identified_fruits.md"
@@ -280,7 +280,7 @@ def test_workflow_step_progression(self, project_with_job: Path) -> None:
             outputs=[str(output_file)],
             notes="Identified fruits from test input",
         )
-        finish_response = tools.finished_step(finish_input)
+        finish_response = await tools.finished_step(finish_input)
 
         # Should either advance to next step or complete
         assert finish_response.status in ["next_step", "workflow_complete", "needs_work"]
diff --git a/tests/integration/test_fruits_workflow.py b/tests/integration/test_fruits_workflow.py
deleted file mode 100644
index 9dc868c8..00000000
--- a/tests/integration/test_fruits_workflow.py
+++ /dev/null
@@ -1,198 +0,0 @@
-"""Integration tests for the fruits CI test workflow.
-
-This module tests the fruits job - a simple, deterministic workflow
-designed for automated CI testing of the DeepWork framework.
-"""
-
-from pathlib import Path
-
-from deepwork.core.adapters import ClaudeAdapter
-from deepwork.core.generator import SkillGenerator
-from deepwork.core.parser import parse_job_definition
-
-
-class TestFruitsWorkflow:
-    """Integration tests for the fruits CI test workflow."""
-
-    def test_fruits_job_parses_correctly(self, fixtures_dir: Path) -> None:
-        """Test that the fruits job definition parses correctly."""
-        job_dir = fixtures_dir / "jobs" / "fruits"
-        job = parse_job_definition(job_dir)
-
-        assert job.name == "fruits"
-        assert job.version == "1.0.0"
-        assert len(job.steps) == 2
-
-        # Verify step IDs
-        step_ids = [step.id for step in job.steps]
-        assert step_ids == ["identify", "classify"]
-
-    def test_fruits_identify_step_structure(self, fixtures_dir: Path) -> None:
-        """Test the identify step has correct structure."""
-        job_dir = fixtures_dir / "jobs" / "fruits"
-        job = parse_job_definition(job_dir)
-
-        identify_step = job.steps[0]
-        assert identify_step.id == "identify"
-        assert identify_step.name == "Identify Fruits"
-
-        # Has user input
-        assert len(identify_step.inputs) == 1
-        assert identify_step.inputs[0].is_user_input()
-        assert identify_step.inputs[0].name == "raw_items"
-
-        # Has output
-        assert len(identify_step.outputs) == 1
-        assert identify_step.outputs[0].file == "identified_fruits.md"
-
-        # No dependencies (first step)
-        assert identify_step.dependencies == []
-
-    def test_fruits_classify_step_structure(self, fixtures_dir: Path) -> None:
-        """Test the classify step has correct structure."""
-        job_dir = fixtures_dir / "jobs" / "fruits"
-        job = parse_job_definition(job_dir)
-
-        classify_step = job.steps[1]
-        assert classify_step.id == "classify"
-        assert classify_step.name == "Classify Fruits"
-
-        # Has file input from previous step
-        assert len(classify_step.inputs) == 1
-        assert classify_step.inputs[0].is_file_input()
-        assert classify_step.inputs[0].file == "identified_fruits.md"
-        assert classify_step.inputs[0].from_step == "identify"
-
-        # Has output
-        assert len(classify_step.outputs) == 1
-        assert classify_step.outputs[0].file == "classified_fruits.md"
-
-        # Depends on identify step
-        assert classify_step.dependencies == ["identify"]
-
-    def test_fruits_skill_generation(self, fixtures_dir: Path, temp_dir: Path) -> None:
-        """Test that fruits job generates valid Claude skills."""
-        job_dir = fixtures_dir / "jobs" / "fruits"
-        job = parse_job_definition(job_dir)
-
-        generator = SkillGenerator()
-        adapter = ClaudeAdapter()
-        skills_dir = temp_dir / ".claude"
-        skills_dir.mkdir()
-
-        skill_paths = generator.generate_all_skills(job, adapter, skills_dir)
-
-        # Now includes meta-skill + step skills
-        assert len(skill_paths) == 3  # 1 meta + 2 steps
-
-        # Verify skill directories with SKILL.md files exist
-        meta_skill = skills_dir / "skills" / "fruits" / "SKILL.md"
-        identify_skill = skills_dir / "skills" / "fruits.identify" / "SKILL.md"
-        classify_skill = skills_dir / "skills" / "fruits.classify" / "SKILL.md"
-        assert meta_skill.exists()
-        assert identify_skill.exists()
-        assert classify_skill.exists()
-
-    def test_fruits_identify_skill_content(self, fixtures_dir: Path, temp_dir: Path) -> None:
-        """Test the identify skill has correct content."""
-        job_dir = fixtures_dir / "jobs" / "fruits"
-        job = parse_job_definition(job_dir)
-
-        generator = SkillGenerator()
-        adapter = ClaudeAdapter()
-        skills_dir = temp_dir / ".claude"
-        skills_dir.mkdir()
-
-        generator.generate_all_skills(job, adapter, skills_dir)
-
-        # Step skills use directory/SKILL.md format
-        identify_skill = skills_dir / "skills" / "fruits.identify" / "SKILL.md"
-        content = identify_skill.read_text()
-
-        # Check header
-        assert "# fruits.identify" in content
-
-        # Check step info
-        assert "Step 1/2" in content
-
-        # Check user input is mentioned
-        assert "raw_items" in content
-
-        # Check output is mentioned
-        assert "identified_fruits.md" in content
-
-        # Check next step is suggested
-        assert "/fruits.classify" in content
-
-    def test_fruits_classify_skill_content(self, fixtures_dir: Path, temp_dir: Path) -> None:
-        """Test the classify skill has correct content."""
-        job_dir = fixtures_dir / "jobs" / "fruits"
-        job = parse_job_definition(job_dir)
-
-        generator = SkillGenerator()
-        adapter = ClaudeAdapter()
-        skills_dir = temp_dir / ".claude"
-        skills_dir.mkdir()
-
-        generator.generate_all_skills(job, adapter, skills_dir)
-
-        # Step skills use directory/SKILL.md format
-        classify_skill = skills_dir / "skills" / "fruits.classify" / "SKILL.md"
-        content = classify_skill.read_text()
-
-        # Check header
-        assert "# fruits.classify" in content
-
-        # Check step info
-        assert "Step 2/2" in content
-
-        # Check file input is mentioned
-        assert "identified_fruits.md" in content
-        assert "from `identify`" in content
-
-        # Check output is mentioned
-        assert "classified_fruits.md" in content
-
-        # Check workflow complete (last step)
-        assert "workflow complete" in content.lower()
-
-    def test_fruits_dependency_validation(self, fixtures_dir: Path) -> None:
-        """Test that dependency validation passes for fruits job."""
-        job_dir = fixtures_dir / "jobs" / "fruits"
-        job = parse_job_definition(job_dir)
-
-        # This should not raise - dependencies are valid
-        job.validate_dependencies()
-
-    def test_fruits_job_is_deterministic_design(self, fixtures_dir: Path) -> None:
-        """Verify the fruits job is designed for deterministic testing.
-
-        This test documents the design properties that make this job
-        suitable for CI testing.
-        """
-        job_dir = fixtures_dir / "jobs" / "fruits"
-        job = parse_job_definition(job_dir)
-
-        # Job has clear, simple structure
-        assert len(job.steps) == 2
-
-        # Steps form a linear dependency chain
-        assert job.steps[0].dependencies == []
-        assert job.steps[1].dependencies == ["identify"]
-
-        # First step takes user input
-        identify_step = job.steps[0]
-        assert len(identify_step.inputs) == 1
-        assert identify_step.inputs[0].is_user_input()
-
-        # Second step uses output from first step
-        classify_step = job.steps[1]
-        assert len(classify_step.inputs) == 1
-        assert classify_step.inputs[0].is_file_input()
-        assert classify_step.inputs[0].from_step == "identify"
-
-        # Outputs are well-defined markdown files
-        assert len(identify_step.outputs) == 1
-        assert identify_step.outputs[0].file == "identified_fruits.md"
-        assert len(classify_step.outputs) == 1
-        assert classify_step.outputs[0].file == "classified_fruits.md"
diff --git a/tests/integration/test_full_workflow.py b/tests/integration/test_full_workflow.py
deleted file mode 100644
index bc7f83bf..00000000
--- a/tests/integration/test_full_workflow.py
+++ /dev/null
@@ -1,153 +0,0 @@
-"""Integration tests for full job workflow."""
-
-from pathlib import Path
-
-from deepwork.core.adapters import ClaudeAdapter
-from deepwork.core.generator import SkillGenerator
-from deepwork.core.parser import parse_job_definition
-
-
-class TestJobWorkflow:
-    """Integration tests for complete job workflow."""
-
-    def test_parse_and_generate_workflow(self, fixtures_dir: Path, temp_dir: Path) -> None:
-        """Test complete workflow: parse job → generate skills."""
-        # Step 1: Parse job definition
-        job_dir = fixtures_dir / "jobs" / "complex_job"
-        job = parse_job_definition(job_dir)
-
-        assert job.name == "competitive_research"
-        assert len(job.steps) == 4
-
-        # Step 2: Generate skills
-        generator = SkillGenerator()
-        adapter = ClaudeAdapter()
-        skills_dir = temp_dir / ".claude"
-        skills_dir.mkdir()
-
-        skill_paths = generator.generate_all_skills(job, adapter, skills_dir)
-
-        # Now includes meta-skill + step skills
-        assert len(skill_paths) == 5  # 1 meta + 4 steps
-
-        # First skill is the meta-skill
-        assert skill_paths[0].exists()
-        meta_content = skill_paths[0].read_text()
-        assert f"# {job.name}" in meta_content
-        assert "Available Steps" in meta_content
-
-        # Verify all step skill files exist and have correct content
-        for i, skill_path in enumerate(skill_paths[1:]):  # Skip meta-skill
-            assert skill_path.exists()
-            content = skill_path.read_text()
-
-            # Check skill name format (header)
-            assert f"# {job.name}.{job.steps[i].id}" in content
-
-            # Check step numbers
-            assert f"Step {i + 1}/4" in content
-
-    def test_simple_job_workflow(self, fixtures_dir: Path, temp_dir: Path) -> None:
-        """Test workflow with simple single-step job."""
-        # Parse
-        job_dir = fixtures_dir / "jobs" / "simple_job"
-        job = parse_job_definition(job_dir)
-
-        assert len(job.steps) == 1
-
-        # Generate
-        generator = SkillGenerator()
-        adapter = ClaudeAdapter()
-        skills_dir = temp_dir / ".claude"
-        skills_dir.mkdir()
-
-        skill_paths = generator.generate_all_skills(job, adapter, skills_dir)
-
-        # Now includes meta-skill + step skills
-        assert len(skill_paths) == 2  # 1 meta + 1 step
-
-        # Verify step skill content (skip meta-skill at index 0)
-        content = skill_paths[1].read_text()
-        assert "# simple_job.single_step" in content
-        # Single step with no dependencies is treated as standalone
-        assert "Standalone skill" in content
-        assert "input_param" in content
-        assert "standalone skill can be re-run" in content  # Standalone completion message
-
-    def test_skill_generation_with_dependencies(self, fixtures_dir: Path, temp_dir: Path) -> None:
-        """Test that generated skills properly handle dependencies."""
-        job_dir = fixtures_dir / "jobs" / "complex_job"
-        job = parse_job_definition(job_dir)
-
-        generator = SkillGenerator()
-        adapter = ClaudeAdapter()
-        skills_dir = temp_dir / ".claude"
-        skills_dir.mkdir()
-
-        skill_paths = generator.generate_all_skills(job, adapter, skills_dir)
-
-        # skill_paths[0] is meta-skill, steps start at index 1
-
-        # Check first step (no prerequisites)
-        step1_content = skill_paths[1].read_text()
-        assert "## Prerequisites" not in step1_content
-        assert "/competitive_research.primary_research" in step1_content  # Next step
-
-        # Check second step (has prerequisites and next step)
-        step2_content = skill_paths[2].read_text()
-        assert "## Prerequisites" in step2_content
-        assert "/competitive_research.identify_competitors" in step2_content
-        assert "/competitive_research.secondary_research" in step2_content  # Next step
-
-        # Check last step (has prerequisites, no next step)
-        step4_content = skill_paths[4].read_text()
-        assert "## Prerequisites" in step4_content
-        assert "**Workflow complete**" in step4_content
-        assert "## Next Step" not in step4_content
-
-    def test_skill_generation_with_file_inputs(self, fixtures_dir: Path, temp_dir: Path) -> None:
-        """Test that generated skills properly handle file inputs."""
-        job_dir = fixtures_dir / "jobs" / "complex_job"
-        job = parse_job_definition(job_dir)
-
-        generator = SkillGenerator()
-        adapter = ClaudeAdapter()
-        skills_dir = temp_dir / ".claude"
-        skills_dir.mkdir()
-
-        skill_paths = generator.generate_all_skills(job, adapter, skills_dir)
-
-        # skill_paths[0] is meta-skill, steps start at index 1
-
-        # Check step with file input
-        step2_content = skill_paths[2].read_text()  # primary_research (index 2)
-        assert "## Required Inputs" in step2_content
-        assert "**Files from Previous Steps**" in step2_content
-        assert "competitors.md" in step2_content
-        assert "from `identify_competitors`" in step2_content
-
-        # Check step with multiple file inputs
-        step4_content = skill_paths[4].read_text()  # comparative_report (index 4)
-        assert "primary_research.md" in step4_content
-        assert "secondary_research.md" in step4_content
-
-    def test_skill_generation_with_user_inputs(self, fixtures_dir: Path, temp_dir: Path) -> None:
-        """Test that generated skills properly handle user parameter inputs."""
-        job_dir = fixtures_dir / "jobs" / "complex_job"
-        job = parse_job_definition(job_dir)
-
-        generator = SkillGenerator()
-        adapter = ClaudeAdapter()
-        skills_dir = temp_dir / ".claude"
-        skills_dir.mkdir()
-
-        skill_paths = generator.generate_all_skills(job, adapter, skills_dir)
-
-        # skill_paths[0] is meta-skill, steps start at index 1
-
-        # Check step with user inputs
-        step1_content = skill_paths[1].read_text()  # identify_competitors (index 1)
-        assert "## Required Inputs" in step1_content
-        assert "**User Parameters**" in step1_content
-        assert "market_segment" in step1_content
-        assert "product_category" in step1_content
diff --git a/tests/integration/test_install_requirements.py b/tests/integration/test_install_requirements.py
deleted file mode 100644
index f04cdb8d..00000000
--- a/tests/integration/test_install_requirements.py
+++ /dev/null
@@ -1,336 +0,0 @@
-"""
-================================================================================
-                    REQUIREMENTS TESTS - DO NOT MODIFY
-================================================================================
-
-These tests verify CRITICAL REQUIREMENTS for the DeepWork install process.
-They ensure the install command behaves correctly with respect to:
-
-1. LOCAL vs PROJECT settings isolation
-2. Idempotency of project settings
-
-WARNING: These tests represent contractual requirements for the install process.
-Modifying these tests may violate user expectations and could cause data loss
-or unexpected behavior. If a test fails, fix the IMPLEMENTATION, not the test.
-
-Requirements tested:
-  - REQ-001: Install MUST NOT modify local (user home) Claude settings
-  - REQ-002: Install MUST be idempotent for project settings
-
-================================================================================
-"""
-
-import json
-from collections.abc import Iterator
-from contextlib import contextmanager
-from pathlib import Path
-from unittest.mock import patch
-
-import pytest
-from click.testing import CliRunner
-
-from deepwork.cli.main import cli
-
-# =============================================================================
-# HELPER FUNCTIONS
-# =============================================================================
-# These helpers reduce repetition while keeping individual tests readable.
-# The helpers themselves are simple and should not mask test intent.
-
-
-def run_install(project_path: Path) -> None:
-    """Run deepwork install for Claude on the given project path.
-
-    Raises AssertionError if install fails.
-    """
-    runner = CliRunner()
-    result = runner.invoke(
-        cli,
-        ["install", "--platform", "claude", "--path", str(project_path)],
-        catch_exceptions=False,
-    )
-    assert result.exit_code == 0, f"Install failed: {result.output}"
-
-
-def get_project_settings(project_path: Path) -> dict:
-    """Read and parse the project's Claude settings.json."""
-    settings_file = project_path / ".claude" / "settings.json"
-    return json.loads(settings_file.read_text())
-
-
-def assert_install_modified_settings(settings_before: dict, settings_after: dict) -> None:
-    """Assert that install actually modified settings.
-
-    This ensures idempotency tests are meaningful - if install does nothing,
-    idempotency would trivially pass but the test would be useless.
-
-    Note: Install may or may not add hooks depending on which jobs are installed.
-    The key assertion is that settings were modified in some way.
-    """
-    assert settings_after != settings_before, (
-        "FIRST INSTALL DID NOT MODIFY SETTINGS! "
-        "Install must modify project settings on first run. "
-        "This test requires install to actually do something to verify idempotency."
-    )
-
-
-@contextmanager
-def mock_local_claude_settings(
-    tmp_path: Path, content: str | dict = '{"local": "unchanged"}'
-) -> Iterator[Path]:
-    """Create mock local Claude settings and patch HOME to use them.
-
-    Args:
-        tmp_path: Temporary directory to create mock home in
-        content: Settings content (string or dict to be JSON-serialized)
-
-    Yields:
-        Path to the local settings file (for verification after install)
-    """
-    mock_home = tmp_path / "mock_home"
-    mock_local_claude_dir = mock_home / ".claude"
-    mock_local_claude_dir.mkdir(parents=True)
-
-    local_settings_file = mock_local_claude_dir / "settings.json"
-    if isinstance(content, dict):
-        local_settings_file.write_text(json.dumps(content, indent=2))
-    else:
-        local_settings_file.write_text(content)
-
-    with patch.dict("os.environ", {"HOME": str(mock_home)}):
-        yield local_settings_file
-
-
-# =============================================================================
-# REQ-001: Install MUST NOT modify local (user home) Claude settings
-# =============================================================================
-#
-# Claude Code has two levels of settings:
-# - LOCAL settings: ~/.claude/settings.json (user's global settings)
-# - PROJECT settings: <project>/.claude/settings.json (project-specific)
-#
-# DeepWork install MUST ONLY modify project settings and NEVER touch
-# the user's local settings, which may contain personal configurations,
-# API keys, or other sensitive data.
-#
-# DO NOT MODIFY THIS TEST - It protects user data integrity.
-# =============================================================================
-
-
-class TestLocalSettingsProtection:
-    """
-    REQUIREMENTS TEST: Verify install does not modify local Claude settings.
-
-    ============================================================================
-    WARNING: DO NOT MODIFY THESE TESTS
-    ============================================================================
-
-    These tests verify that the install process respects the boundary between
-    project-level and user-level settings. Modifying these tests could result
-    in DeepWork overwriting user's personal Claude configurations.
-    """
-
-    def test_install_does_not_modify_local_claude_settings(
-        self, mock_claude_project: Path, tmp_path: Path
-    ) -> None:
-        """
-        REQ-001: Install MUST NOT modify local (home directory) Claude settings.
-
-        This test creates a mock local settings file and verifies that the
-        DeepWork install process does not modify it in any way.
-
-        DO NOT MODIFY THIS TEST.
-        """
-        original_local_settings = {
-            "user_preference": "do_not_change",
-            "api_key_encrypted": "sensitive_data_here",
-            "custom_config": {"setting1": True, "setting2": "value"},
-        }
-
-        with mock_local_claude_settings(tmp_path, original_local_settings) as local_file:
-            original_mtime = local_file.stat().st_mtime
-            run_install(mock_claude_project)
-
-            # CRITICAL: Verify local settings were NOT modified
-            assert local_file.exists(), "Local settings file should still exist"
-
-            current_local_settings = json.loads(local_file.read_text())
-            assert current_local_settings == original_local_settings, (
-                "LOCAL SETTINGS WERE MODIFIED! "
-                "Install MUST NOT touch user's home directory Claude settings. "
-                f"Expected: {original_local_settings}, Got: {current_local_settings}"
-            )
-
-            assert local_file.stat().st_mtime == original_mtime, (
-                "LOCAL SETTINGS FILE WAS TOUCHED! "
-                "Install MUST NOT access user's home directory Claude settings."
-            )
-
-    def test_install_only_modifies_project_settings(
-        self, mock_claude_project: Path, tmp_path: Path
-    ) -> None:
-        """
-        REQ-001 (corollary): Install MUST modify only project-level settings.
-
-        Verifies that the install process correctly modifies project settings
-        while leaving local settings untouched.
-
-        DO NOT MODIFY THIS TEST.
-        """
-        original_local_content = '{"local": "unchanged"}'
-
-        with mock_local_claude_settings(tmp_path, original_local_content) as local_file:
-            run_install(mock_claude_project)
-
-            # Verify LOCAL settings unchanged
-            assert local_file.read_text() == original_local_content, (
-                "Local settings were modified! Install must only modify project settings."
-            )
-
-            # Verify PROJECT settings were modified
-            project_settings = get_project_settings(mock_claude_project)
-            # Settings should exist after install
-            assert project_settings is not None, "Project settings should exist after install"
-
-
-# =============================================================================
-# REQ-002: Install MUST be idempotent for project settings
-# =============================================================================
-#
-# Running `deepwork install` multiple times on the same project MUST produce
-# identical results. The second and subsequent installs should not:
-# - Add duplicate entries
-# - Modify timestamps unnecessarily
-# - Change the structure or content of settings
-#
-# This ensures that users can safely re-run install without side effects,
-# which is important for CI/CD pipelines, onboarding scripts, and
-# troubleshooting scenarios.
-#
-# DO NOT MODIFY THIS TEST - It ensures installation reliability.
-# =============================================================================
-
-
-class TestProjectSettingsIdempotency:
-    """
-    REQUIREMENTS TEST: Verify install is idempotent for project settings.
-
-    ============================================================================
-    WARNING: DO NOT MODIFY THESE TESTS
-    ============================================================================
-
-    These tests verify that running install multiple times produces identical
-    results. This is critical for:
-    - CI/CD reliability
-    - Safe re-installation
-    - Troubleshooting without side effects
-    """
-
-    def test_project_settings_unchanged_on_second_install(self, mock_claude_project: Path) -> None:
-        """
-        REQ-002: Second install MUST NOT change project settings.
-
-        Running install twice should produce identical settings.json content.
-        The first install MUST modify settings (add hooks), and the second
-        install should be a no-op for settings.
-
-        DO NOT MODIFY THIS TEST.
-        """
-        # Capture settings BEFORE first install
-        settings_before = get_project_settings(mock_claude_project)
-
-        # First install
-        run_install(mock_claude_project)
-        settings_after_first = get_project_settings(mock_claude_project)
-
-        # CRITICAL: First install MUST actually modify settings
-        assert_install_modified_settings(settings_before, settings_after_first)
-
-        # Second install
-        run_install(mock_claude_project)
-        settings_after_second = get_project_settings(mock_claude_project)
-
-        # CRITICAL: Settings must be identical after second install
-        assert settings_after_first == settings_after_second, (
-            "PROJECT SETTINGS CHANGED ON SECOND INSTALL! "
-            "Install MUST be idempotent. "
-            f"After first: {json.dumps(settings_after_first, indent=2)}\n"
-            f"After second: {json.dumps(settings_after_second, indent=2)}"
-        )
-
-    def test_no_duplicate_hooks_on_multiple_installs(self, mock_claude_project: Path) -> None:
-        """
-        REQ-002 (corollary): Multiple installs MUST NOT create duplicate hooks.
-
-        This specifically tests that hooks are not duplicated, which would
-        cause performance issues and unexpected behavior.
-
-        DO NOT MODIFY THIS TEST.
-        """
-        # Run install three times
-        for _ in range(3):
-            run_install(mock_claude_project)
-
-        # Load final settings
-        settings = get_project_settings(mock_claude_project)
-
-        # If hooks exist, verify no duplicates
-        if "hooks" in settings:
-            for event_name, hooks_list in settings["hooks"].items():
-                # Extract all hook commands for duplicate detection
-                commands = [
-                    hook["command"]
-                    for hook_entry in hooks_list
-                    for hook in hook_entry.get("hooks", [])
-                    if "command" in hook
-                ]
-
-                # Check for duplicates
-                assert len(commands) == len(set(commands)), (
-                    f"DUPLICATE HOOKS DETECTED for event '{event_name}'! "
-                    f"Install MUST be idempotent. Commands: {commands}"
-                )
-
-    def test_third_install_identical_to_first(self, mock_claude_project: Path) -> None:
-        """
-        REQ-002 (extended): Nth install MUST produce same result as first.
-
-        This tests the general idempotency property across multiple runs.
-        The first install MUST modify settings, and all subsequent installs
-        MUST produce identical results.
-
-        DO NOT MODIFY THIS TEST.
-        """
-        # Capture settings BEFORE any install
-        settings_before = get_project_settings(mock_claude_project)
-
-        # First install
-        run_install(mock_claude_project)
-        settings_after_first = get_project_settings(mock_claude_project)
-
-        # CRITICAL: First install MUST actually modify settings
-        assert_install_modified_settings(settings_before, settings_after_first)
-
-        # Run multiple more installs
-        for _ in range(5):
-            run_install(mock_claude_project)
-
-        # Final state should match first install
-        settings_after_many = get_project_settings(mock_claude_project)
-
-        assert settings_after_first == settings_after_many, (
-            "SETTINGS DIVERGED AFTER MULTIPLE INSTALLS! "
-            "Install must be idempotent regardless of how many times it runs."
-        )
-
-
-# =============================================================================
-# FIXTURE EXTENSIONS
-# =============================================================================
-# Additional fixtures needed for these requirement tests
-
-
-@pytest.fixture
-def tmp_path(temp_dir: Path) -> Path:
-    """Alias for temp_dir to match pytest naming convention."""
-    return temp_dir
diff --git a/tests/integration/test_quality_gate_integration.py b/tests/integration/test_quality_gate_integration.py
index 26360744..888d33d3 100644
--- a/tests/integration/test_quality_gate_integration.py
+++ b/tests/integration/test_quality_gate_integration.py
@@ -37,7 +37,7 @@ def mock_agent_command() -> str:
 class TestQualityGateIntegration:
     """Integration tests that run real subprocesses."""
 
-    def test_subprocess_returns_pass(
+    async def test_subprocess_returns_pass(
         self, project_root: Path, mock_agent_command: str
     ) -> None:
         """Test that a passing response is correctly detected."""
@@ -48,7 +48,7 @@ def test_subprocess_returns_pass(
         os.environ["REVIEW_RESULT"] = "pass"
 
         try:
-            result = gate.evaluate(
+            result = await gate.evaluate(
                 quality_criteria=["Output must exist", "Output must be valid"],
                 outputs=["output.md"],
                 project_root=project_root,
@@ -62,7 +62,7 @@ def test_subprocess_returns_pass(
             else:
                 os.environ.pop("REVIEW_RESULT", None)
 
-    def test_subprocess_returns_fail(
+    async def test_subprocess_returns_fail(
         self, project_root: Path, mock_agent_command: str
     ) -> None:
         """Test that a failing response is correctly detected."""
@@ -73,7 +73,7 @@ def test_subprocess_returns_fail(
         os.environ["REVIEW_RESULT"] = "fail"
 
         try:
-            result = gate.evaluate(
+            result = await gate.evaluate(
                 quality_criteria=["Output must exist"],
                 outputs=["output.md"],
                 project_root=project_root,
@@ -89,7 +89,7 @@ def test_subprocess_returns_fail(
             else:
                 os.environ.pop("REVIEW_RESULT", None)
 
-    def test_subprocess_malformed_response_raises_error(
+    async def test_subprocess_malformed_response_raises_error(
         self, project_root: Path, mock_agent_command: str
     ) -> None:
         """Test that malformed JSON raises an error."""
@@ -100,7 +100,7 @@ def test_subprocess_malformed_response_raises_error(
 
         try:
             with pytest.raises(QualityGateError, match="Failed to parse"):
-                gate.evaluate(
+                await gate.evaluate(
                     quality_criteria=["Criterion 1"],
                     outputs=["output.md"],
                     project_root=project_root,
@@ -111,7 +111,7 @@ def test_subprocess_malformed_response_raises_error(
             else:
                 os.environ.pop("REVIEW_RESULT", None)
 
-    def test_subprocess_nonzero_exit_raises_error(
+    async def test_subprocess_nonzero_exit_raises_error(
         self, project_root: Path, mock_agent_command: str
     ) -> None:
         """Test that non-zero exit code raises an error."""
@@ -122,7 +122,7 @@ def test_subprocess_nonzero_exit_raises_error(
 
         try:
             with pytest.raises(QualityGateError, match="failed with exit code"):
-                gate.evaluate(
+                await gate.evaluate(
                     quality_criteria=["Criterion 1"],
                     outputs=["output.md"],
                     project_root=project_root,
@@ -133,7 +133,7 @@ def test_subprocess_nonzero_exit_raises_error(
             else:
                 os.environ.pop("REVIEW_RESULT", None)
 
-    def test_subprocess_timeout(
+    async def test_subprocess_timeout(
         self, project_root: Path, mock_agent_command: str
     ) -> None:
         """Test that subprocess timeout is handled correctly."""
@@ -144,7 +144,7 @@ def test_subprocess_timeout(
 
         try:
             with pytest.raises(QualityGateError, match="timed out"):
-                gate.evaluate(
+                await gate.evaluate(
                     quality_criteria=["Criterion 1"],
                     outputs=["output.md"],
                     project_root=project_root,
@@ -155,18 +155,18 @@ def test_subprocess_timeout(
             else:
                 os.environ.pop("REVIEW_RESULT", None)
 
-    def test_subprocess_command_not_found(self, project_root: Path) -> None:
+    async def test_subprocess_command_not_found(self, project_root: Path) -> None:
         """Test that missing command is handled correctly."""
         gate = QualityGate(command="nonexistent_command_12345", timeout=30)
 
         with pytest.raises(QualityGateError, match="command not found"):
-            gate.evaluate(
+            await gate.evaluate(
                 quality_criteria=["Criterion 1"],
                 outputs=["output.md"],
                 project_root=project_root,
             )
 
-    def test_auto_mode_detects_force_pass_marker(
+    async def test_auto_mode_detects_force_pass_marker(
         self, project_root: Path, mock_agent_command: str
     ) -> None:
         """Test that FORCE_PASS marker in content causes pass."""
@@ -181,7 +181,7 @@ def test_auto_mode_detects_force_pass_marker(
         os.environ.pop("REVIEW_RESULT", None)
 
         try:
-            result = gate.evaluate(
+            result = await gate.evaluate(
                 quality_criteria=["Criterion 1"],
                 outputs=["marker_output.md"],
                 project_root=project_root,
@@ -192,7 +192,7 @@ def test_auto_mode_detects_force_pass_marker(
             if env_backup is not None:
                 os.environ["REVIEW_RESULT"] = env_backup
 
-    def test_auto_mode_detects_force_fail_marker(
+    async def test_auto_mode_detects_force_fail_marker(
         self, project_root: Path, mock_agent_command: str
     ) -> None:
         """Test that FORCE_FAIL marker in content causes fail."""
@@ -207,7 +207,7 @@ def test_auto_mode_detects_force_fail_marker(
         os.environ.pop("REVIEW_RESULT", None)
 
         try:
-            result = gate.evaluate(
+            result = await gate.evaluate(
                 quality_criteria=["Criterion 1"],
                 outputs=["marker_output.md"],
                 project_root=project_root,
@@ -218,7 +218,7 @@ def test_auto_mode_detects_force_fail_marker(
             if env_backup is not None:
                 os.environ["REVIEW_RESULT"] = env_backup
 
-    def test_missing_output_file_causes_fail(
+    async def test_missing_output_file_causes_fail(
         self, project_root: Path, mock_agent_command: str
     ) -> None:
         """Test that missing output file is detected as failure."""
@@ -229,7 +229,7 @@ def test_missing_output_file_causes_fail(
         os.environ.pop("REVIEW_RESULT", None)
 
         try:
-            result = gate.evaluate(
+            result = await gate.evaluate(
                 quality_criteria=["Output files must exist"],
                 outputs=["nonexistent_file.md"],
                 project_root=project_root,
@@ -447,12 +447,12 @@ def test_criteria_results_optional(self) -> None:
 class TestQualityGateEdgeCases:
     """Test edge cases and potential failure scenarios."""
 
-    def test_empty_quality_criteria_auto_passes(self, project_root: Path) -> None:
+    async def test_empty_quality_criteria_auto_passes(self, project_root: Path) -> None:
         """Test that no criteria means auto-pass (no subprocess called)."""
         gate = QualityGate(command="nonexistent_command", timeout=30)
 
         # Even with a command that doesn't exist, empty criteria should auto-pass
-        result = gate.evaluate(
+        result = await gate.evaluate(
             quality_criteria=[],  # No criteria
             outputs=["output.md"],
             project_root=project_root,
@@ -461,7 +461,7 @@ def test_empty_quality_criteria_auto_passes(self, project_root: Path) -> None:
         assert result.passed is True
         assert "auto-passing" in result.feedback.lower()
 
-    def test_multiple_output_files(
+    async def test_multiple_output_files(
         self, project_root: Path, mock_agent_command: str
     ) -> None:
         """Test evaluation with multiple output files."""
@@ -476,7 +476,7 @@ def test_multiple_output_files(
         os.environ["REVIEW_RESULT"] = "pass"
 
         try:
-            result = gate.evaluate(
+            result = await gate.evaluate(
                 quality_criteria=["All outputs must exist"],
                 outputs=["output1.md", "output2.md", "output3.md"],
                 project_root=project_root,
@@ -489,7 +489,7 @@ def test_multiple_output_files(
             else:
                 os.environ.pop("REVIEW_RESULT", None)
 
-    def test_large_output_file(
+    async def test_large_output_file(
         self, project_root: Path, mock_agent_command: str
     ) -> None:
         """Test evaluation with a large output file."""
@@ -503,7 +503,7 @@ def test_large_output_file(
         os.environ["REVIEW_RESULT"] = "pass"
 
         try:
-            result = gate.evaluate(
+            result = await gate.evaluate(
                 quality_criteria=["Output must be complete"],
                 outputs=["large_output.md"],
                 project_root=project_root,
@@ -516,7 +516,7 @@ def test_large_output_file(
             else:
                 os.environ.pop("REVIEW_RESULT", None)
 
-    def test_unicode_in_output(
+    async def test_unicode_in_output(
         self, project_root: Path, mock_agent_command: str
     ) -> None:
         """Test evaluation with unicode content."""
@@ -530,7 +530,7 @@ def test_unicode_in_output(
         os.environ["REVIEW_RESULT"] = "pass"
 
         try:
-            result = gate.evaluate(
+            result = await gate.evaluate(
                 quality_criteria=["Content must be valid"],
                 outputs=["unicode_output.md"],
                 project_root=project_root,
diff --git a/tests/shell_script_tests/README.md b/tests/shell_script_tests/README.md
deleted file mode 100644
index 76cd8f05..00000000
--- a/tests/shell_script_tests/README.md
+++ /dev/null
@@ -1,76 +0,0 @@
-# Shell Script Tests
-
-Automated tests for DeepWork shell scripts and hooks, with a focus on validating Claude Code hooks JSON response formats.
-
-## Hooks and Scripts Tested
-
-| Hook/Script | Type | Description |
-|-------------|------|-------------|
-| `deepwork.hooks.rules_check` | Stop Hook (Python) | Evaluates rules and blocks agent stop if rules are triggered |
-| `user_prompt_submit.sh` | UserPromptSubmit Hook | Captures work tree state when user submits a prompt |
-| `capture_prompt_work_tree.sh` | Helper | Records current git state for `compare_to: prompt` rules |
-| `make_new_job.sh` | Utility | Creates directory structure for new DeepWork jobs |
-
-## Claude Code Hooks JSON Format
-
-Hook scripts must return valid JSON responses. The tests enforce these formats:
-
-### Stop Hooks (`hooks.after_agent`)
-```json
-{}                                          // Allow stop
-{"decision": "block", "reason": "..."}      // Block stop with reason
-```
-
-### UserPromptSubmit Hooks (`hooks.before_prompt`)
-```json
-{}    // No output or empty object (side-effect only hooks)
-```
-
-### All Hooks
-- Must return valid JSON if producing output
-- Non-JSON output on stdout is **not allowed** (stderr is ok)
-- Exit code 0 indicates success (even when blocking)
-
-## Running Tests
-
-```bash
-# Run all shell script tests
-uv run pytest tests/shell_script_tests/ -v
-
-# Run tests for a specific script
-uv run pytest tests/shell_script_tests/test_rules_stop_hook.py -v
-
-# Run with coverage
-uv run pytest tests/shell_script_tests/ --cov=src/deepwork
-```
-
-## Test Structure
-
-```
-tests/shell_script_tests/
-├── conftest.py                      # Shared fixtures and helpers
-├── test_hooks.py                    # Consolidated hook tests (JSON format, exit codes)
-├── test_rules_stop_hook.py          # Stop hook blocking/allowing tests
-├── test_user_prompt_submit.py       # Prompt submission hook tests
-├── test_capture_prompt_work_tree.py # Work tree capture tests
-└── test_make_new_job.py             # Job directory creation tests
-```
-
-## Shared Fixtures
-
-Available in `conftest.py`:
-
-| Fixture | Description |
-|---------|-------------|
-| `git_repo` | Basic git repo with initial commit |
-| `git_repo_with_rule` | Git repo with a Python file rule |
-| `rules_hooks_dir` | Path to rules hooks scripts |
-| `jobs_scripts_dir` | Path to job management scripts |
-
-## Adding New Tests
-
-1. Use shared fixtures from `conftest.py` when possible
-2. Use `run_shell_script()` helper for running scripts
-3. Validate JSON output with `validate_json_output()` and `validate_stop_hook_response()`
-4. Test both success and failure cases
-5. Verify exit codes (hooks should exit 0 even when blocking)
diff --git a/tests/shell_script_tests/__init__.py b/tests/shell_script_tests/__init__.py
deleted file mode 100644
index 1b0c3352..00000000
--- a/tests/shell_script_tests/__init__.py
+++ /dev/null
@@ -1 +0,0 @@
-"""Shell script tests for DeepWork hooks."""
diff --git a/tests/shell_script_tests/conftest.py b/tests/shell_script_tests/conftest.py
deleted file mode 100644
index 01b0250b..00000000
--- a/tests/shell_script_tests/conftest.py
+++ /dev/null
@@ -1,85 +0,0 @@
-"""Shared fixtures for shell script tests."""
-
-import json
-import os
-import subprocess
-from pathlib import Path
-
-import pytest
-from git import Repo
-
-
-@pytest.fixture
-def git_repo(tmp_path: Path) -> Path:
-    """Create a basic git repo for testing."""
-    repo = Repo.init(tmp_path)
-
-    readme = tmp_path / "README.md"
-    readme.write_text("# Test Project\n")
-    repo.index.add(["README.md"])
-    repo.index.commit("Initial commit")
-
-    return tmp_path
-
-
-@pytest.fixture
-def hooks_dir() -> Path:
-    """Return the path to the main hooks directory (platform wrappers)."""
-    return Path(__file__).parent.parent.parent / "src" / "deepwork" / "hooks"
-
-
-@pytest.fixture
-def src_dir() -> Path:
-    """Return the path to the src directory for PYTHONPATH."""
-    return Path(__file__).parent.parent.parent / "src"
-
-
-@pytest.fixture
-def jobs_scripts_dir() -> Path:
-    """Return the path to the jobs scripts directory."""
-    return (
-        Path(__file__).parent.parent.parent / "src" / "deepwork" / "standard_jobs" / "deepwork_jobs"
-    )
-
-
-def run_shell_script(
-    script_path: Path,
-    cwd: Path,
-    args: list[str] | None = None,
-    hook_input: dict | None = None,
-    env_extra: dict[str, str] | None = None,
-) -> tuple[str, str, int]:
-    """
-    Run a shell script and return its output.
-
-    Args:
-        script_path: Path to the shell script
-        cwd: Working directory to run the script in
-        args: Optional list of arguments to pass to the script
-        hook_input: Optional JSON input to pass via stdin
-        env_extra: Optional extra environment variables
-
-    Returns:
-        Tuple of (stdout, stderr, return_code)
-    """
-    env = os.environ.copy()
-    env["PYTHONPATH"] = str(Path(__file__).parent.parent.parent / "src")
-    if env_extra:
-        env.update(env_extra)
-
-    cmd = ["bash", str(script_path)]
-    if args:
-        cmd.extend(args)
-
-    stdin_data = json.dumps(hook_input) if hook_input else ""
-
-    result = subprocess.run(
-        cmd,
-        cwd=cwd,
-        capture_output=True,
-        text=True,
-        input=stdin_data,
-        env=env,
-    )
-
-    return result.stdout, result.stderr, result.returncode
diff --git a/tests/shell_script_tests/test_check_version.py b/tests/shell_script_tests/test_check_version.py
deleted file mode 100644
index 1cd4c20f..00000000
--- a/tests/shell_script_tests/test_check_version.py
+++ /dev/null
@@ -1,412 +0,0 @@
-"""Tests for check_version.sh SessionStart hook.
-
-Tests version checking logic, JSON output format, and warning behavior.
-"""
-
-import os
-import subprocess
-import tempfile
-from pathlib import Path
-
-import pytest
-
-
-@pytest.fixture
-def check_version_script(hooks_dir: Path) -> Path:
-    """Return path to check_version.sh."""
-    return hooks_dir / "check_version.sh"
-
-
-def run_check_version_with_mock_claude(
-    script_path: Path,
-    mock_version: str | None,
-    cwd: Path | None = None,
-    mock_deepwork: bool = True,
-    stdin_json: str | None = None,
-) -> tuple[str, str, int]:
-    """
-    Run check_version.sh with a mocked claude command.
-
-    Args:
-        script_path: Path to check_version.sh
-        mock_version: Version string to return from mock claude, or None for failure
-        cwd: Working directory
-        mock_deepwork: If True, create a mock deepwork command that succeeds.
-                       If False, do not create mock deepwork (simulates not installed).
-        stdin_json: Optional JSON string to pass via stdin (simulates hook input)
-
-    Returns:
-        Tuple of (stdout, stderr, return_code)
-    """
-    with tempfile.TemporaryDirectory() as tmpdir:
-        # Create mock claude command
-        mock_claude = Path(tmpdir) / "claude"
-        if mock_version is not None:
-            mock_claude.write_text(f'#!/bin/bash\necho "{mock_version} (Claude Code)"\n')
-        else:
-            mock_claude.write_text("#!/bin/bash\nexit 1\n")
-        mock_claude.chmod(0o755)
-
-        # Create mock deepwork command
-        # When mock_deepwork=True, create a working mock
-        # When mock_deepwork=False, create a failing mock that shadows the real one
-        mock_deepwork_cmd = Path(tmpdir) / "deepwork"
-        if mock_deepwork:
-            mock_deepwork_cmd.write_text('#!/bin/bash\necho "deepwork 0.1.0"\n')
-        else:
-            # Create a mock that fails (simulating deepwork not being installed)
-            mock_deepwork_cmd.write_text("#!/bin/bash\nexit 127\n")
-        mock_deepwork_cmd.chmod(0o755)
-
-        # Prepend mock dir to PATH
-        env = os.environ.copy()
-        env["PATH"] = f"{tmpdir}:{env.get('PATH', '')}"
-
-        result = subprocess.run(
-            ["bash", str(script_path)],
-            capture_output=True,
-            text=True,
-            cwd=cwd or tmpdir,
-            env=env,
-            input=stdin_json,
-        )
-
-        return result.stdout, result.stderr, result.returncode
-
-
-class TestVersionComparison:
-    """Tests for version comparison logic."""
-
-    def test_equal_versions(self, check_version_script: Path) -> None:
-        """Test that equal versions don't trigger warning."""
-        # Mock version equals minimum (2.1.14)
-        stdout, stderr, code = run_check_version_with_mock_claude(check_version_script, "2.1.14")
-
-        assert code == 0
-        assert "WARNING" not in stderr
-
-    def test_greater_patch_version(self, check_version_script: Path) -> None:
-        """Test that greater patch version doesn't trigger warning."""
-        stdout, stderr, code = run_check_version_with_mock_claude(check_version_script, "2.1.15")
-
-        assert code == 0
-        assert "WARNING" not in stderr
-
-    def test_greater_minor_version(self, check_version_script: Path) -> None:
-        """Test that greater minor version doesn't trigger warning."""
-        stdout, stderr, code = run_check_version_with_mock_claude(check_version_script, "2.2.0")
-
-        assert code == 0
-        assert "WARNING" not in stderr
-
-    def test_greater_major_version(self, check_version_script: Path) -> None:
-        """Test that greater major version doesn't trigger warning."""
-        stdout, stderr, code = run_check_version_with_mock_claude(check_version_script, "3.0.0")
-
-        assert code == 0
-        assert "WARNING" not in stderr
-
-    def test_lesser_patch_version(self, check_version_script: Path) -> None:
-        """Test that lesser patch version triggers warning."""
-        stdout, stderr, code = run_check_version_with_mock_claude(check_version_script, "2.1.13")
-
-        assert code == 0
-        assert "WARNING" in stderr
-        assert "2.1.13" in stderr  # Shows current version
-
-    def test_lesser_minor_version(self, check_version_script: Path) -> None:
-        """Test that lesser minor version triggers warning."""
-        stdout, stderr, code = run_check_version_with_mock_claude(check_version_script, "2.0.99")
-
-        assert code == 0
-        assert "WARNING" in stderr
-
-    def test_lesser_major_version(self, check_version_script: Path) -> None:
-        """Test that lesser major version triggers warning."""
-        stdout, stderr, code = run_check_version_with_mock_claude(check_version_script, "1.9.99")
-
-        assert code == 0
-        assert "WARNING" in stderr
-
-
-class TestWarningOutput:
-    """Tests for warning message content."""
-
-    def test_warning_contains_current_version(self, check_version_script: Path) -> None:
-        """Test that warning shows the current version."""
-        stdout, stderr, code = run_check_version_with_mock_claude(check_version_script, "2.0.0")
-
-        assert "2.0.0" in stderr
-
-    def test_warning_contains_minimum_version(self, check_version_script: Path) -> None:
-        """Test that warning shows the minimum version."""
-        stdout, stderr, code = run_check_version_with_mock_claude(check_version_script, "2.0.0")
-
-        assert "2.1.14" in stderr
-
-    def test_warning_suggests_update(self, check_version_script: Path) -> None:
-        """Test that warning suggests updating Claude Code."""
-        stdout, stderr, code = run_check_version_with_mock_claude(check_version_script, "2.0.0")
-
-        assert "Update your version of Claude Code" in stderr
-
-    def test_warning_mentions_bugs(self, check_version_script: Path) -> None:
-        """Test that warning mentions bugs in older versions."""
-        stdout, stderr, code = run_check_version_with_mock_claude(check_version_script, "2.0.0")
-
-        assert "bugs" in stderr.lower()
-
-
-class TestHookConformance:
-    """Tests for Claude Code hook format compliance."""
-
-    def test_always_exits_zero(self, check_version_script: Path) -> None:
-        """Test that script always exits 0 (informational only)."""
-        # Test with warning
-        stdout, stderr, code = run_check_version_with_mock_claude(check_version_script, "2.0.0")
-        assert code == 0
-
-        # Test without warning
-        stdout, stderr, code = run_check_version_with_mock_claude(check_version_script, "3.0.0")
-        assert code == 0
-
-    def test_outputs_valid_json_when_version_ok(self, check_version_script: Path) -> None:
-        """Test that stdout is valid JSON when version is OK."""
-        import json
-
-        stdout, stderr, code = run_check_version_with_mock_claude(check_version_script, "3.0.0")
-
-        # Should output empty JSON object
-        output = json.loads(stdout.strip())
-        assert output == {}
-
-    def test_outputs_structured_json_when_version_low(self, check_version_script: Path) -> None:
-        """Test that stdout has hookSpecificOutput when version is low."""
-        import json
-
-        stdout, stderr, code = run_check_version_with_mock_claude(check_version_script, "2.0.0")
-
-        output = json.loads(stdout.strip())
-        assert "hookSpecificOutput" in output
-        assert output["hookSpecificOutput"]["hookEventName"] == "SessionStart"
-        assert "additionalContext" in output["hookSpecificOutput"]
-        assert "VERSION WARNING" in output["hookSpecificOutput"]["additionalContext"]
-
-    def test_warning_goes_to_stderr_and_stdout(self, check_version_script: Path) -> None:
-        """Test that warning is on stderr (visual) and stdout (context)."""
-        stdout, stderr, code = run_check_version_with_mock_claude(check_version_script, "2.0.0")
-
-        # Visual warning should be in stderr
-        assert "WARNING" in stderr
-        # JSON with context should be in stdout
-        assert "hookSpecificOutput" in stdout
-
-
-class TestEdgeCases:
-    """Tests for edge cases and error handling."""
-
-    def test_claude_command_not_found(self, check_version_script: Path) -> None:
-        """Test graceful handling when claude command fails."""
-        stdout, stderr, code = run_check_version_with_mock_claude(
-            check_version_script,
-            None,  # Mock failure
-        )
-
-        # Should exit 0 and output JSON even if version check fails
-        assert code == 0
-        assert stdout.strip() == "{}"
-        # No warning since we couldn't determine version
-        assert "WARNING" not in stderr
-
-    def test_version_with_extra_text(self, check_version_script: Path) -> None:
-        """Test parsing version from output with extra text."""
-        # Real output format: "2.1.1 (Claude Code)"
-        stdout, stderr, code = run_check_version_with_mock_claude(check_version_script, "2.1.14")
-
-        assert code == 0
-        # Version 2.1.14 equals minimum, no warning
-        assert "WARNING" not in stderr
-
-
-class TestDeepworkInstallationCheck:
-    """Tests for deepwork installation check (blocking)."""
-
-    def test_deepwork_installed_allows_session(self, check_version_script: Path) -> None:
-        """Test that script proceeds when deepwork is installed."""
-        # With mock_deepwork=True (default), deepwork is available
-        stdout, stderr, code = run_check_version_with_mock_claude(
-            check_version_script, "3.0.0", mock_deepwork=True
-        )
-
-        assert code == 0
-        assert "DEEPWORK NOT INSTALLED" not in stderr
-
-    def test_deepwork_not_installed_blocks_session(self, check_version_script: Path) -> None:
-        """Test that script blocks when deepwork is not installed."""
-        stdout, stderr, code = run_check_version_with_mock_claude(
-            check_version_script, "3.0.0", mock_deepwork=False
-        )
-
-        # Should exit with code 2 (blocking error)
-        assert code == 2
-        assert "DEEPWORK NOT INSTALLED" in stderr
-
-    def test_deepwork_error_message_content(self, check_version_script: Path) -> None:
-        """Test that deepwork error message has helpful content."""
-        stdout, stderr, code = run_check_version_with_mock_claude(
-            check_version_script, "3.0.0", mock_deepwork=False
-        )
-
-        # Should mention direct invocation requirement
-        assert "directly invok" in stderr.lower()
-        # Should mention NOT using wrappers
-        assert "uv run deepwork" in stderr
-        # Should suggest installation options
-        assert "pipx" in stderr or "pip install" in stderr
-
-    def test_deepwork_error_outputs_json(self, check_version_script: Path) -> None:
-        """Test that deepwork error outputs valid JSON with error info."""
-        import json
-
-        stdout, stderr, code = run_check_version_with_mock_claude(
-            check_version_script, "3.0.0", mock_deepwork=False
-        )
-
-        output = json.loads(stdout.strip())
-        assert "hookSpecificOutput" in output
-        assert "error" in output
-        assert "deepwork" in output["error"].lower()
-        # Should have additional context for Claude
-        assert "additionalContext" in output["hookSpecificOutput"]
-        assert "DEEPWORK" in output["hookSpecificOutput"]["additionalContext"]
-
-    def test_deepwork_check_happens_before_version_check(self, check_version_script: Path) -> None:
-        """Test that deepwork check runs before version check."""
-        # Even with a low version that would trigger warning,
-        # missing deepwork should block first
-        stdout, stderr, code = run_check_version_with_mock_claude(
-            check_version_script, "1.0.0", mock_deepwork=False
-        )
-
-        # Should exit with deepwork error, not version warning
-        assert code == 2
-        assert "DEEPWORK NOT INSTALLED" in stderr
-        # Should NOT show version warning
-        assert "CLAUDE CODE VERSION WARNING" not in stderr
-
-
-class TestSessionSourceDetection:
-    """Tests for skipping non-initial sessions based on source field."""
-
-    def test_startup_source_runs_normally(self, check_version_script: Path) -> None:
-        """Test that source='startup' runs the full check."""
-        import json
-
-        stdin_json = json.dumps({"source": "startup", "session_id": "test123"})
-        stdout, stderr, code = run_check_version_with_mock_claude(
-            check_version_script, "3.0.0", stdin_json=stdin_json
-        )
-
-        # Should run normally and output empty JSON (version OK)
-        assert code == 0
-        assert stdout.strip() == "{}"
-
-    def test_resume_source_skips_check(self, check_version_script: Path) -> None:
-        """Test that source='resume' skips all checks and returns empty JSON."""
-        import json
-
-        stdin_json = json.dumps({"source": "resume", "session_id": "test123"})
-        stdout, stderr, code = run_check_version_with_mock_claude(
-            check_version_script,
-            "1.0.0",
-            stdin_json=stdin_json,  # Low version that would trigger warning
-        )
-
-        # Should skip and return empty JSON without warnings
-        assert code == 0
-        assert stdout.strip() == "{}"
-        assert "WARNING" not in stderr
-        assert "DEEPWORK" not in stderr
-
-    def test_clear_source_skips_check(self, check_version_script: Path) -> None:
-        """Test that source='clear' (compact) skips all checks."""
-        import json
-
-        stdin_json = json.dumps({"source": "clear", "session_id": "test123"})
-        stdout, stderr, code = run_check_version_with_mock_claude(
-            check_version_script, "1.0.0", stdin_json=stdin_json
-        )
-
-        # Should skip and return empty JSON
-        assert code == 0
-        assert stdout.strip() == "{}"
-        assert "WARNING" not in stderr
-
-    def test_no_source_field_runs_normally(self, check_version_script: Path) -> None:
-        """Test backwards compatibility: missing source field runs full check."""
-        import json
-
-        # JSON without source field (older Claude Code version)
-        stdin_json = json.dumps({"session_id": "test123"})
-        stdout, stderr, code = run_check_version_with_mock_claude(
-            check_version_script,
-            "2.0.0",
-            stdin_json=stdin_json,  # Low version
-        )
-
-        # Should run normally and show warning (backwards compat)
-        assert code == 0
-        assert "WARNING" in stderr
-
-    def test_empty_stdin_runs_normally(self, check_version_script: Path) -> None:
-        """Test that empty stdin runs full check (backwards compat)."""
-        stdout, stderr, code = run_check_version_with_mock_claude(
-            check_version_script, "2.0.0", stdin_json=""
-        )
-
-        # Should run normally and show warning
-        assert code == 0
-        assert "WARNING" in stderr
-
-    def test_resume_skips_even_with_missing_deepwork(self, check_version_script: Path) -> None:
-        """Test that resume sessions skip before deepwork check."""
-        import json
-
-        stdin_json = json.dumps({"source": "resume"})
-        stdout, stderr, code = run_check_version_with_mock_claude(
-            check_version_script, "3.0.0", mock_deepwork=False, stdin_json=stdin_json
-        )
-
-        # Should skip immediately, NOT block on deepwork
-        assert code == 0
-        assert stdout.strip() == "{}"
-        assert "DEEPWORK NOT INSTALLED" not in stderr
-
-    def test_startup_with_low_version_shows_warning(self, check_version_script: Path) -> None:
-        """Test that startup sessions with low version show warning."""
-        import json
-
-        stdin_json = json.dumps({"source": "startup"})
-        stdout, stderr, code = run_check_version_with_mock_claude(
-            check_version_script, "2.0.0", stdin_json=stdin_json
-        )
-
-        # Should run full check and show warning
-        assert code == 0
-        assert "WARNING" in stderr
-        assert "hookSpecificOutput" in stdout
-
-    def test_unknown_source_skips_check(self, check_version_script: Path) -> None:
-        """Test that unknown source values skip the check."""
-        import json
-
-        # Future-proofing: unknown source values should be treated as non-startup
-        stdin_json = json.dumps({"source": "unknown_future_value"})
-        stdout, stderr, code = run_check_version_with_mock_claude(
-            check_version_script, "1.0.0", stdin_json=stdin_json
-        )
-
-        # Should skip and return empty JSON
-        assert code == 0
-        assert stdout.strip() == "{}"
-        assert "WARNING" not in stderr
diff --git a/tests/shell_script_tests/test_hooks.py b/tests/shell_script_tests/test_hooks.py
deleted file mode 100644
index 0910b6c9..00000000
--- a/tests/shell_script_tests/test_hooks.py
+++ /dev/null
@@ -1,398 +0,0 @@
-"""Tests for hook shell scripts and JSON format compliance.
-
-# ******************************************************************************
-# ***                         CRITICAL CONTRACT TESTS                        ***
-# ******************************************************************************
-#
-# These tests verify the EXACT format required by Claude Code hooks as
-# documented in: doc/platforms/claude/hooks_system.md
-#
-# DO NOT MODIFY these tests without first consulting the official Claude Code
-# documentation at: https://docs.anthropic.com/en/docs/claude-code/hooks
-#
-# Hook Contract Summary:
-#   - Exit code 0: Success, stdout parsed as JSON
-#   - Exit code 2: Blocking error, stderr shown (NOT used for JSON format)
-#   - Allow response: {} (empty JSON object)
-#   - Block response: {"decision": "block", "reason": "..."}
-#
-# CRITICAL: Hooks using JSON output format MUST return exit code 0.
-# The "decision" field in the JSON controls blocking behavior, NOT the exit code.
-#
-# ******************************************************************************
-
-Claude Code hooks have specific JSON response formats that must be followed:
-
-Stop hooks (hooks.after_agent):
-    - {} - Allow stop (empty object)
-    - {"decision": "block", "reason": "..."} - Block stop with reason
-
-UserPromptSubmit hooks (hooks.before_prompt):
-    - {} - No response needed (empty object)
-    - No output - Also acceptable
-
-BeforeTool hooks (hooks.before_tool):
-    - {} - Allow tool execution
-    - {"decision": "block", "reason": "..."} - Block tool execution
-
-All hooks:
-    - Must return valid JSON if producing output
-    - Must not contain non-JSON output on stdout (stderr is ok)
-    - Exit code 0 indicates success
-"""
-
-import json
-import os
-import subprocess
-from pathlib import Path
-
-import pytest
-
-# =============================================================================
-# Helper Functions
-# =============================================================================
-
-
-def run_platform_wrapper_script(
-    script_path: Path,
-    python_module: str,
-    hook_input: dict,
-    src_dir: Path,
-) -> tuple[str, str, int]:
-    """
-    Run a platform hook wrapper script with the given input.
-
-    Args:
-        script_path: Path to the wrapper script (claude_hook.sh or gemini_hook.sh)
-        python_module: Python module to invoke
-        hook_input: JSON input to pass via stdin
-        src_dir: Path to src directory for PYTHONPATH
-
-    Returns:
-        Tuple of (stdout, stderr, return_code)
-    """
-    env = os.environ.copy()
-    env["PYTHONPATH"] = str(src_dir)
-
-    result = subprocess.run(
-        ["bash", str(script_path), python_module],
-        capture_output=True,
-        text=True,
-        input=json.dumps(hook_input),
-        env=env,
-    )
-
-    return result.stdout, result.stderr, result.returncode
-
-
-def validate_json_output(output: str) -> dict | None:
-    """
-    Validate that output is valid JSON or empty.
-
-    Args:
-        output: The stdout from a hook script
-
-    Returns:
-        Parsed JSON dict, or None if empty/no output
-
-    Raises:
-        AssertionError: If output is invalid JSON
-    """
-    stripped = output.strip()
-
-    if not stripped:
-        return None
-
-    try:
-        result = json.loads(stripped)
-        assert isinstance(result, dict), "Hook output must be a JSON object"
-        return result
-    except json.JSONDecodeError as e:
-        pytest.fail(f"Invalid JSON output: {stripped!r}. Error: {e}")
-
-
-# ******************************************************************************
-# *** DO NOT EDIT THIS FUNCTION! ***
-# As documented in doc/platforms/claude/hooks_system.md, Stop hooks must return:
-#   - {} (empty object) to allow
-#   - {"decision": "block", "reason": "..."} to block
-# Any other format will cause undefined behavior in Claude Code.
-# ******************************************************************************
-def validate_stop_hook_response(response: dict | None) -> None:
-    """
-    Validate a Stop hook response follows Claude Code format.
-
-    Args:
-        response: Parsed JSON response or None
-
-    Raises:
-        AssertionError: If response format is invalid
-    """
-    if response is None:
-        # No output is acceptable for stop hooks
-        return
-
-    if response == {}:
-        # Empty object means allow stop
-        return
-
-    # Must have decision and reason for blocking
-    assert "decision" in response, (
-        f"Stop hook blocking response must have 'decision' key: {response}"
-    )
-    assert response["decision"] == "block", (
-        f"Stop hook decision must be 'block', got: {response['decision']}"
-    )
-    assert "reason" in response, f"Stop hook blocking response must have 'reason' key: {response}"
-    assert isinstance(response["reason"], str), f"Stop hook reason must be a string: {response}"
-
-    # Reason should not be empty when blocking
-    assert response["reason"].strip(), "Stop hook blocking reason should not be empty"
-
-
-def validate_prompt_hook_response(response: dict | None) -> None:
-    """
-    Validate a UserPromptSubmit hook response.
-
-    Args:
-        response: Parsed JSON response or None
-
-    Raises:
-        AssertionError: If response format is invalid
-    """
-    if response is None:
-        # No output is acceptable
-        return
-
-    # Empty object or valid JSON object is fine
-    assert isinstance(response, dict), f"Prompt hook output must be a JSON object: {response}"
-
-
-# =============================================================================
-# Platform Wrapper Script Tests
-# =============================================================================
-
-
-class TestClaudeHookWrapper:
-    """Tests for claude_hook.sh wrapper script."""
-
-    def test_script_exists_and_is_executable(self, hooks_dir: Path) -> None:
-        """Test that the Claude hook script exists and is executable."""
-        script_path = hooks_dir / "claude_hook.sh"
-        assert script_path.exists(), "claude_hook.sh should exist"
-        assert os.access(script_path, os.X_OK), "claude_hook.sh should be executable"
-
-    def test_usage_error_without_module(self, hooks_dir: Path, src_dir: Path) -> None:
-        """Test that script shows usage error when no module provided."""
-        script_path = hooks_dir / "claude_hook.sh"
-        env = os.environ.copy()
-        env["PYTHONPATH"] = str(src_dir)
-
-        result = subprocess.run(
-            ["bash", str(script_path)],
-            capture_output=True,
-            text=True,
-            env=env,
-        )
-
-        assert result.returncode == 1
-        assert "Usage:" in result.stderr
-
-    def test_sets_platform_environment_variable(self, hooks_dir: Path, src_dir: Path) -> None:
-        """Test that the script sets DEEPWORK_HOOK_PLATFORM correctly."""
-        script_path = hooks_dir / "claude_hook.sh"
-        content = script_path.read_text()
-        assert 'DEEPWORK_HOOK_PLATFORM="claude"' in content
-
-
-class TestGeminiHookWrapper:
-    """Tests for gemini_hook.sh wrapper script."""
-
-    def test_script_exists_and_is_executable(self, hooks_dir: Path) -> None:
-        """Test that the Gemini hook script exists and is executable."""
-        script_path = hooks_dir / "gemini_hook.sh"
-        assert script_path.exists(), "gemini_hook.sh should exist"
-        assert os.access(script_path, os.X_OK), "gemini_hook.sh should be executable"
-
-    def test_usage_error_without_module(self, hooks_dir: Path, src_dir: Path) -> None:
-        """Test that script shows usage error when no module provided."""
-        script_path = hooks_dir / "gemini_hook.sh"
-        env = os.environ.copy()
-        env["PYTHONPATH"] = str(src_dir)
-
-        result = subprocess.run(
-            ["bash", str(script_path)],
-            capture_output=True,
-            text=True,
-            env=env,
-        )
-
-        assert result.returncode == 1
-        assert "Usage:" in result.stderr
-
-    def test_sets_platform_environment_variable(self, hooks_dir: Path, src_dir: Path) -> None:
-        """Test that the script sets DEEPWORK_HOOK_PLATFORM correctly."""
-        script_path = hooks_dir / "gemini_hook.sh"
-        content = script_path.read_text()
-        assert 'DEEPWORK_HOOK_PLATFORM="gemini"' in content
-
-
-# =============================================================================
-# Integration Tests
-# =============================================================================
-
-
-class TestHookWrapperIntegration:
-    """Integration tests for hook wrappers with actual Python hooks."""
-
-    @pytest.fixture
-    def test_hook_module(self, tmp_path: Path) -> tuple[Path, str]:
-        """Create a temporary test hook module."""
-        module_dir = tmp_path / "test_hooks"
-        module_dir.mkdir(parents=True)
-
-        # Create __init__.py
-        (module_dir / "__init__.py").write_text("")
-
-        # Create the hook module
-        hook_code = '''
-"""Test hook module."""
-import os
-import sys
-
-from deepwork.hooks.wrapper import (
-    HookInput,
-    HookOutput,
-    NormalizedEvent,
-    Platform,
-    run_hook,
-)
-
-
-def test_hook(hook_input: HookInput) -> HookOutput:
-    """Test hook that blocks for after_agent events."""
-    if hook_input.event == NormalizedEvent.AFTER_AGENT:
-        return HookOutput(decision="block", reason="Test block reason")
-    return HookOutput()
-
-
-def main() -> None:
-    platform_str = os.environ.get("DEEPWORK_HOOK_PLATFORM", "claude")
-    try:
-        platform = Platform(platform_str)
-    except ValueError:
-        platform = Platform.CLAUDE
-
-    exit_code = run_hook(test_hook, platform)
-    sys.exit(exit_code)
-
-
-if __name__ == "__main__":
-    main()
-'''
-        (module_dir / "test_hook.py").write_text(hook_code)
-
-        return tmp_path, "test_hooks.test_hook"
-
-    def test_claude_wrapper_with_stop_event(
-        self,
-        hooks_dir: Path,
-        src_dir: Path,
-        test_hook_module: tuple[Path, str],
-    ) -> None:
-        """Test Claude wrapper processes Stop event correctly."""
-        tmp_path, module_name = test_hook_module
-        script_path = hooks_dir / "claude_hook.sh"
-
-        hook_input = {
-            "session_id": "test123",
-            "hook_event_name": "Stop",
-            "cwd": "/project",
-        }
-
-        env = os.environ.copy()
-        env["PYTHONPATH"] = f"{src_dir}:{tmp_path}"
-
-        result = subprocess.run(
-            ["bash", str(script_path), module_name],
-            capture_output=True,
-            text=True,
-            input=json.dumps(hook_input),
-            env=env,
-        )
-
-        # Exit code 0 even when blocking - the JSON decision field controls behavior
-        assert result.returncode == 0, f"Expected exit code 0. stderr: {result.stderr}"
-
-        output = json.loads(result.stdout.strip())
-        assert output["decision"] == "block"
-        assert "Test block reason" in output["reason"]
-
-    def test_gemini_wrapper_with_afteragent_event(
-        self,
-        hooks_dir: Path,
-        src_dir: Path,
-        test_hook_module: tuple[Path, str],
-    ) -> None:
-        """Test Gemini wrapper processes AfterAgent event correctly."""
-        tmp_path, module_name = test_hook_module
-        script_path = hooks_dir / "gemini_hook.sh"
-
-        hook_input = {
-            "session_id": "test456",
-            "hook_event_name": "AfterAgent",
-            "cwd": "/project",
-        }
-
-        env = os.environ.copy()
-        env["PYTHONPATH"] = f"{src_dir}:{tmp_path}"
-
-        result = subprocess.run(
-            ["bash", str(script_path), module_name],
-            capture_output=True,
-            text=True,
-            input=json.dumps(hook_input),
-            env=env,
-        )
-
-        # Exit code 0 even when blocking - the JSON decision field controls behavior
-        assert result.returncode == 0, f"Expected exit code 0. stderr: {result.stderr}"
-
-        output = json.loads(result.stdout.strip())
-        # Gemini should get "deny" instead of "block"
-        assert output["decision"] == "deny"
-        assert "Test block reason" in output["reason"]
-
-    def test_non_blocking_event(
-        self,
-        hooks_dir: Path,
-        src_dir: Path,
-        test_hook_module: tuple[Path, str],
-    ) -> None:
-        """Test that non-blocking events return exit code 0."""
-        tmp_path, module_name = test_hook_module
-        script_path = hooks_dir / "claude_hook.sh"
-
-        # SessionStart is not blocked by the test hook
-        hook_input = {
-            "session_id": "test789",
-            "hook_event_name": "SessionStart",
-            "cwd": "/project",
-        }
-
-        env = os.environ.copy()
-        env["PYTHONPATH"] = f"{src_dir}:{tmp_path}"
-
-        result = subprocess.run(
-            ["bash", str(script_path), module_name],
-            capture_output=True,
-            text=True,
-            input=json.dumps(hook_input),
-            env=env,
-        )
-
-        assert result.returncode == 0, f"Expected exit code 0. stderr: {result.stderr}"
-        output = json.loads(result.stdout.strip())
-        assert output == {} or output.get("decision", "") not in ("block", "deny")
-
-
diff --git a/tests/shell_script_tests/test_make_new_job.py b/tests/shell_script_tests/test_make_new_job.py
deleted file mode 100644
index 913d66ea..00000000
--- a/tests/shell_script_tests/test_make_new_job.py
+++ /dev/null
@@ -1,313 +0,0 @@
-"""Tests for make_new_job.sh utility script.
-
-This script creates the directory structure for a new DeepWork job.
-It should:
-1. Validate job name format (lowercase, letters/numbers/underscores)
-2. Create the job directory structure under .deepwork/jobs/
-3. Create required subdirectories (steps/, hooks/, templates/)
-4. Create AGENTS.md with guidance
-5. Handle existing jobs gracefully (error)
-6. Handle missing .deepwork directory by creating it
-"""
-
-from pathlib import Path
-
-import pytest
-
-from .conftest import run_shell_script
-
-
-@pytest.fixture
-def project_dir(tmp_path: Path) -> Path:
-    """Create a basic project directory."""
-    return tmp_path
-
-
-@pytest.fixture
-def project_with_deepwork(tmp_path: Path) -> Path:
-    """Create a project with existing .deepwork/jobs directory."""
-    jobs_dir = tmp_path / ".deepwork" / "jobs"
-    jobs_dir.mkdir(parents=True)
-    return tmp_path
-
-
-def run_make_new_job(
-    script_path: Path,
-    cwd: Path,
-    job_name: str | None = None,
-) -> tuple[str, str, int]:
-    """Run the make_new_job.sh script."""
-    args = [job_name] if job_name else None
-    return run_shell_script(script_path, cwd, args=args, env_extra={"NO_COLOR": "1"})
-
-
-class TestMakeNewJobUsage:
-    """Tests for make_new_job.sh usage and help output."""
-
-    def test_shows_usage_without_arguments(self, jobs_scripts_dir: Path, project_dir: Path) -> None:
-        """Test that the script shows usage when called without arguments."""
-        script_path = jobs_scripts_dir / "make_new_job.sh"
-        stdout, stderr, code = run_make_new_job(script_path, project_dir)
-
-        assert code == 1, "Should exit with error when no arguments"
-        assert "Usage:" in stdout, "Should show usage information"
-        assert "job_name" in stdout.lower(), "Should mention job_name argument"
-
-    def test_shows_example_in_usage(self, jobs_scripts_dir: Path, project_dir: Path) -> None:
-        """Test that the usage includes an example."""
-        script_path = jobs_scripts_dir / "make_new_job.sh"
-        stdout, stderr, code = run_make_new_job(script_path, project_dir)
-
-        assert "Example:" in stdout, "Should show example usage"
-
-
-class TestMakeNewJobNameValidation:
-    """Tests for job name validation in make_new_job.sh."""
-
-    def test_accepts_lowercase_name(
-        self, jobs_scripts_dir: Path, project_with_deepwork: Path
-    ) -> None:
-        """Test that lowercase names are accepted."""
-        script_path = jobs_scripts_dir / "make_new_job.sh"
-        stdout, stderr, code = run_make_new_job(script_path, project_with_deepwork, "valid_job")
-
-        assert code == 0, f"Should accept lowercase name. stderr: {stderr}"
-
-    def test_accepts_name_with_numbers(
-        self, jobs_scripts_dir: Path, project_with_deepwork: Path
-    ) -> None:
-        """Test that names with numbers are accepted."""
-        script_path = jobs_scripts_dir / "make_new_job.sh"
-        stdout, stderr, code = run_make_new_job(script_path, project_with_deepwork, "job123")
-
-        assert code == 0, f"Should accept name with numbers. stderr: {stderr}"
-
-    def test_accepts_name_with_underscores(
-        self, jobs_scripts_dir: Path, project_with_deepwork: Path
-    ) -> None:
-        """Test that names with underscores are accepted."""
-        script_path = jobs_scripts_dir / "make_new_job.sh"
-        stdout, stderr, code = run_make_new_job(script_path, project_with_deepwork, "my_new_job")
-
-        assert code == 0, f"Should accept underscores. stderr: {stderr}"
-
-    def test_rejects_uppercase_name(
-        self, jobs_scripts_dir: Path, project_with_deepwork: Path
-    ) -> None:
-        """Test that uppercase names are rejected."""
-        script_path = jobs_scripts_dir / "make_new_job.sh"
-        stdout, stderr, code = run_make_new_job(script_path, project_with_deepwork, "InvalidJob")
-
-        assert code != 0, "Should reject uppercase name"
-        # Check for error message in stdout (script uses echo)
-        output = stdout + stderr
-        assert "invalid" in output.lower() or "error" in output.lower(), (
-            "Should show error for invalid name"
-        )
-
-    def test_rejects_name_starting_with_number(
-        self, jobs_scripts_dir: Path, project_with_deepwork: Path
-    ) -> None:
-        """Test that names starting with numbers are rejected."""
-        script_path = jobs_scripts_dir / "make_new_job.sh"
-        stdout, stderr, code = run_make_new_job(script_path, project_with_deepwork, "123job")
-
-        assert code != 0, "Should reject name starting with number"
-
-    def test_rejects_name_with_hyphens(
-        self, jobs_scripts_dir: Path, project_with_deepwork: Path
-    ) -> None:
-        """Test that names with hyphens are rejected."""
-        script_path = jobs_scripts_dir / "make_new_job.sh"
-        stdout, stderr, code = run_make_new_job(script_path, project_with_deepwork, "my-job")
-
-        assert code != 0, "Should reject name with hyphens"
-
-    def test_rejects_name_with_spaces(
-        self, jobs_scripts_dir: Path, project_with_deepwork: Path
-    ) -> None:
-        """Test that names with spaces are rejected."""
-        script_path = jobs_scripts_dir / "make_new_job.sh"
-        # This will be passed as two arguments by bash, causing an error
-        stdout, stderr, code = run_make_new_job(script_path, project_with_deepwork, "my job")
-
-        # Either fails validation or treats "job" as separate (job is valid name)
-        # The key is it shouldn't create "my job" as a directory name
-        bad_dir = project_with_deepwork / ".deepwork" / "jobs" / "my job"
-        assert not bad_dir.exists(), "Should not create directory with space in name"
-
-
-class TestMakeNewJobDirectoryStructure:
-    """Tests for directory structure creation in make_new_job.sh."""
-
-    def test_creates_main_job_directory(
-        self, jobs_scripts_dir: Path, project_with_deepwork: Path
-    ) -> None:
-        """Test that the main job directory is created."""
-        script_path = jobs_scripts_dir / "make_new_job.sh"
-        run_make_new_job(script_path, project_with_deepwork, "test_job")
-
-        job_dir = project_with_deepwork / ".deepwork" / "jobs" / "test_job"
-        assert job_dir.exists(), "Job directory should be created"
-        assert job_dir.is_dir(), "Job path should be a directory"
-
-    def test_creates_steps_directory(
-        self, jobs_scripts_dir: Path, project_with_deepwork: Path
-    ) -> None:
-        """Test that steps/ subdirectory is created."""
-        script_path = jobs_scripts_dir / "make_new_job.sh"
-        run_make_new_job(script_path, project_with_deepwork, "test_job")
-
-        steps_dir = project_with_deepwork / ".deepwork" / "jobs" / "test_job" / "steps"
-        assert steps_dir.exists(), "steps/ directory should be created"
-        assert steps_dir.is_dir(), "steps/ should be a directory"
-
-    def test_creates_hooks_directory(
-        self, jobs_scripts_dir: Path, project_with_deepwork: Path
-    ) -> None:
-        """Test that hooks/ subdirectory is created."""
-        script_path = jobs_scripts_dir / "make_new_job.sh"
-        run_make_new_job(script_path, project_with_deepwork, "test_job")
-
-        hooks_dir = project_with_deepwork / ".deepwork" / "jobs" / "test_job" / "hooks"
-        assert hooks_dir.exists(), "hooks/ directory should be created"
-        assert hooks_dir.is_dir(), "hooks/ should be a directory"
-
-    def test_creates_templates_directory(
-        self, jobs_scripts_dir: Path, project_with_deepwork: Path
-    ) -> None:
-        """Test that templates/ subdirectory is created."""
-        script_path = jobs_scripts_dir / "make_new_job.sh"
-        run_make_new_job(script_path, project_with_deepwork, "test_job")
-
-        templates_dir = project_with_deepwork / ".deepwork" / "jobs" / "test_job" / "templates"
-        assert templates_dir.exists(), "templates/ directory should be created"
-        assert templates_dir.is_dir(), "templates/ should be a directory"
-
-    def test_creates_gitkeep_files(
-        self, jobs_scripts_dir: Path, project_with_deepwork: Path
-    ) -> None:
-        """Test that .gitkeep files are created in empty directories."""
-        script_path = jobs_scripts_dir / "make_new_job.sh"
-        run_make_new_job(script_path, project_with_deepwork, "test_job")
-
-        job_dir = project_with_deepwork / ".deepwork" / "jobs" / "test_job"
-
-        hooks_gitkeep = job_dir / "hooks" / ".gitkeep"
-        templates_gitkeep = job_dir / "templates" / ".gitkeep"
-
-        assert hooks_gitkeep.exists(), "hooks/.gitkeep should be created"
-        assert templates_gitkeep.exists(), "templates/.gitkeep should be created"
-
-    def test_creates_agents_md(self, jobs_scripts_dir: Path, project_with_deepwork: Path) -> None:
-        """Test that AGENTS.md file is created."""
-        script_path = jobs_scripts_dir / "make_new_job.sh"
-        run_make_new_job(script_path, project_with_deepwork, "test_job")
-
-        agents_md = project_with_deepwork / ".deepwork" / "jobs" / "test_job" / "AGENTS.md"
-        assert agents_md.exists(), "AGENTS.md should be created"
-
-        content = agents_md.read_text()
-        assert "Job Management" in content, "AGENTS.md should have job management content"
-        assert "deepwork_jobs" in content, "AGENTS.md should reference deepwork_jobs"
-
-
-class TestMakeNewJobAgentsMdContent:
-    """Tests for AGENTS.md content in make_new_job.sh."""
-
-    def test_agents_md_contains_slash_commands(
-        self, jobs_scripts_dir: Path, project_with_deepwork: Path
-    ) -> None:
-        """Test that AGENTS.md lists recommended slash commands."""
-        script_path = jobs_scripts_dir / "make_new_job.sh"
-        run_make_new_job(script_path, project_with_deepwork, "test_job")
-
-        agents_md = project_with_deepwork / ".deepwork" / "jobs" / "test_job" / "AGENTS.md"
-        content = agents_md.read_text()
-
-        assert "/deepwork_jobs.define" in content, "Should mention define command"
-        assert "/deepwork_jobs.implement" in content, "Should mention implement command"
-        assert "/deepwork_jobs.learn" in content, "Should mention learn command"
-
-    def test_agents_md_contains_directory_structure(
-        self, jobs_scripts_dir: Path, project_with_deepwork: Path
-    ) -> None:
-        """Test that AGENTS.md documents the directory structure."""
-        script_path = jobs_scripts_dir / "make_new_job.sh"
-        run_make_new_job(script_path, project_with_deepwork, "test_job")
-
-        agents_md = project_with_deepwork / ".deepwork" / "jobs" / "test_job" / "AGENTS.md"
-        content = agents_md.read_text()
-
-        assert "job.yml" in content, "Should mention job.yml"
-        assert "steps/" in content, "Should document steps directory"
-        assert "hooks/" in content, "Should document hooks directory"
-        assert "templates/" in content, "Should document templates directory"
-
-
-class TestMakeNewJobErrorHandling:
-    """Tests for error handling in make_new_job.sh."""
-
-    def test_fails_if_job_already_exists(
-        self, jobs_scripts_dir: Path, project_with_deepwork: Path
-    ) -> None:
-        """Test that creating a job that already exists fails."""
-        # First create the job
-        script_path = jobs_scripts_dir / "make_new_job.sh"
-        run_make_new_job(script_path, project_with_deepwork, "existing_job")
-
-        # Try to create it again
-        stdout, stderr, code = run_make_new_job(script_path, project_with_deepwork, "existing_job")
-
-        assert code != 0, "Should fail when job already exists"
-        output = stdout + stderr
-        assert "exist" in output.lower() or "error" in output.lower(), (
-            "Should mention that job exists"
-        )
-
-    def test_creates_deepwork_directory_if_missing(
-        self, jobs_scripts_dir: Path, project_dir: Path
-    ) -> None:
-        """Test that .deepwork/jobs is created if it doesn't exist."""
-        script_path = jobs_scripts_dir / "make_new_job.sh"
-        stdout, stderr, code = run_make_new_job(script_path, project_dir, "new_job")
-
-        assert code == 0, f"Should succeed even without .deepwork. stderr: {stderr}"
-
-        job_dir = project_dir / ".deepwork" / "jobs" / "new_job"
-        assert job_dir.exists(), "Should create .deepwork/jobs/new_job"
-
-
-class TestMakeNewJobOutput:
-    """Tests for output messages in make_new_job.sh."""
-
-    def test_shows_success_message(
-        self, jobs_scripts_dir: Path, project_with_deepwork: Path
-    ) -> None:
-        """Test that success message is shown."""
-        script_path = jobs_scripts_dir / "make_new_job.sh"
-        stdout, stderr, code = run_make_new_job(script_path, project_with_deepwork, "new_job")
-
-        assert code == 0, f"Should succeed. stderr: {stderr}"
-        # Check for informational output
-        assert "new_job" in stdout, "Output should mention job name"
-
-    def test_shows_next_steps(self, jobs_scripts_dir: Path, project_with_deepwork: Path) -> None:
-        """Test that next steps are shown after creation."""
-        script_path = jobs_scripts_dir / "make_new_job.sh"
-        stdout, stderr, code = run_make_new_job(script_path, project_with_deepwork, "new_job")
-
-        assert code == 0, f"Should succeed. stderr: {stderr}"
-        # Should mention what to do next
-        assert "next" in stdout.lower() or "step" in stdout.lower(), "Should show next steps"
-
-    def test_shows_directory_structure_created(
-        self, jobs_scripts_dir: Path, project_with_deepwork: Path
-    ) -> None:
-        """Test that created directory structure is shown."""
-        script_path = jobs_scripts_dir / "make_new_job.sh"
-        stdout, stderr, code = run_make_new_job(script_path, project_with_deepwork, "new_job")
-
-        assert code == 0, f"Should succeed. stderr: {stderr}"
-        # Should show what was created
-        assert "AGENTS.md" in stdout or "steps" in stdout, "Should show created structure"
diff --git a/tests/unit/mcp/test_async_interface.py b/tests/unit/mcp/test_async_interface.py
new file mode 100644
index 00000000..82e00b9b
--- /dev/null
+++ b/tests/unit/mcp/test_async_interface.py
@@ -0,0 +1,126 @@
+"""Tests to ensure the MCP interface remains async.
+
+These tests serve as a regression guard to ensure that key async methods
+don't accidentally get converted back to sync methods, which would break
+concurrency guarantees.
+"""
+
+import asyncio
+import inspect
+from pathlib import Path
+
+import pytest
+
+from deepwork.mcp.quality_gate import MockQualityGate, QualityGate
+from deepwork.mcp.state import StateManager
+from deepwork.mcp.tools import WorkflowTools
+
+
+class TestAsyncInterfaceRegression:
+    """Tests that verify async interface contract is maintained."""
+
+    def test_state_manager_async_methods(self) -> None:
+        """Verify StateManager methods that must be async remain async."""
+        async_methods = [
+            "create_session",
+            "load_session",
+            "start_step",
+            "complete_step",
+            "record_quality_attempt",
+            "advance_to_step",
+            "complete_workflow",
+            "list_sessions",
+            "find_active_sessions_for_workflow",
+            "delete_session",
+        ]
+
+        for method_name in async_methods:
+            method = getattr(StateManager, method_name)
+            assert inspect.iscoroutinefunction(method), (
+                f"StateManager.{method_name} must be async (coroutine function). "
+                f"This is required for concurrent access safety."
+            )
+
+    def test_state_manager_has_lock(self, tmp_path: Path) -> None:
+        """Verify StateManager has an asyncio.Lock for thread safety."""
+        manager = StateManager(tmp_path)
+
+        assert hasattr(manager, "_lock"), "StateManager must have _lock attribute"
+        assert isinstance(manager._lock, asyncio.Lock), (
+            "StateManager._lock must be an asyncio.Lock for async concurrency safety"
+        )
+
+    def test_workflow_tools_async_methods(self) -> None:
+        """Verify WorkflowTools methods that must be async remain async."""
+        async_methods = [
+            "start_workflow",
+            "finished_step",
+        ]
+
+        for method_name in async_methods:
+            method = getattr(WorkflowTools, method_name)
+            assert inspect.iscoroutinefunction(method), (
+                f"WorkflowTools.{method_name} must be async (coroutine function). "
+                f"This is required for non-blocking MCP tool execution."
+            )
+
+    def test_quality_gate_async_methods(self) -> None:
+        """Verify QualityGate methods that must be async remain async."""
+        async_methods = [
+            "evaluate",
+            "_build_payload",
+        ]
+
+        for method_name in async_methods:
+            method = getattr(QualityGate, method_name)
+            assert inspect.iscoroutinefunction(method), (
+                f"QualityGate.{method_name} must be async (coroutine function). "
+                f"This is required for non-blocking subprocess execution."
+            )
+
+    def test_mock_quality_gate_async_methods(self) -> None:
+        """Verify MockQualityGate maintains async interface."""
+        method = getattr(MockQualityGate, "evaluate")
+        assert inspect.iscoroutinefunction(method), (
+            "MockQualityGate.evaluate must be async to match QualityGate interface"
+        )
+
+    async def test_concurrent_state_operations_are_serialized(
+        self, tmp_path: Path
+    ) -> None:
+        """Test that concurrent state operations don't corrupt state.
+
+        This test verifies that the async lock properly serializes access
+        to shared state, preventing race conditions.
+        """
+        deepwork_dir = tmp_path / ".deepwork"
+        deepwork_dir.mkdir()
+        (deepwork_dir / "tmp").mkdir()
+
+        manager = StateManager(tmp_path)
+
+        # Create initial session
+        session = await manager.create_session(
+            job_name="test_job",
+            workflow_name="main",
+            goal="Test goal",
+            first_step_id="step1",
+        )
+
+        # Run multiple concurrent quality attempt recordings
+        async def record_attempt() -> int:
+            return await manager.record_quality_attempt("step1")
+
+        # Execute 10 concurrent recordings
+        results = await asyncio.gather(*[record_attempt() for _ in range(10)])
+
+        # Each should get a unique, sequential number (1-10)
+        assert sorted(results) == list(range(1, 11)), (
+            "Concurrent quality_attempt recordings should be serialized. "
+            f"Expected [1..10] but got {sorted(results)}"
+        )
+
+        # Verify final count is correct
+        final_session = manager.get_active_session()
+        assert final_session is not None
+        assert final_session.step_progress["step1"].quality_attempts == 10
diff --git a/tests/unit/mcp/test_quality_gate.py b/tests/unit/mcp/test_quality_gate.py
index 783730f4..45fe6375 100644
--- a/tests/unit/mcp/test_quality_gate.py
+++ b/tests/unit/mcp/test_quality_gate.py
@@ -48,13 +48,13 @@ def test_build_instructions(self, quality_gate: QualityGate) -> None:
         assert "passed" in instructions  # JSON format mentioned
         assert "feedback" in instructions  # JSON format mentioned
 
-    def test_build_payload(self, quality_gate: QualityGate, project_root: Path) -> None:
+    async def test_build_payload(self, quality_gate: QualityGate, project_root: Path) -> None:
         """Test building payload with file contents."""
         # Create test output file
         output_file = project_root / "output.md"
         output_file.write_text("Test content")
 
-        payload = quality_gate._build_payload(
+        payload = await quality_gate._build_payload(
             outputs=["output.md"],
             project_root=project_root,
         )
@@ -64,11 +64,11 @@ def test_build_payload(self, quality_gate: QualityGate, project_root: Path) -> N
         # Check for the new separator format (20 dashes)
         assert "--------------------" in payload
 
-    def test_build_payload_missing_file(
+    async def test_build_payload_missing_file(
         self, quality_gate: QualityGate, project_root: Path
     ) -> None:
         """Test building payload with missing file."""
-        payload = quality_gate._build_payload(
+        payload = await quality_gate._build_payload(
             outputs=["nonexistent.md"],
             project_root=project_root,
         )
@@ -125,9 +125,11 @@ def test_parse_response_invalid_json(self, quality_gate: QualityGate) -> None:
         with pytest.raises(QualityGateError, match="Failed to parse"):
             quality_gate._parse_response(response)
 
-    def test_evaluate_no_criteria(self, quality_gate: QualityGate, project_root: Path) -> None:
+    async def test_evaluate_no_criteria(
+        self, quality_gate: QualityGate, project_root: Path
+    ) -> None:
         """Test evaluation with no criteria auto-passes."""
-        result = quality_gate.evaluate(
+        result = await quality_gate.evaluate(
             quality_criteria=[],
             outputs=["output.md"],
             project_root=project_root,
@@ -140,11 +142,11 @@ def test_evaluate_no_criteria(self, quality_gate: QualityGate, project_root: Pat
 class TestMockQualityGate:
     """Tests for MockQualityGate class."""
 
-    def test_mock_passes_by_default(self, project_root: Path) -> None:
+    async def test_mock_passes_by_default(self, project_root: Path) -> None:
         """Test mock gate passes by default."""
         gate = MockQualityGate()
 
-        result = gate.evaluate(
+        result = await gate.evaluate(
             quality_criteria=["Criterion 1"],
             outputs=["output.md"],
             project_root=project_root,
@@ -153,11 +155,11 @@ def test_mock_passes_by_default(self, project_root: Path) -> None:
         assert result.passed is True
         assert len(gate.evaluations) == 1
 
-    def test_mock_can_fail(self, project_root: Path) -> None:
+    async def test_mock_can_fail(self, project_root: Path) -> None:
         """Test mock gate can be configured to fail."""
         gate = MockQualityGate(should_pass=False, feedback="Mock failure")
 
-        result = gate.evaluate(
+        result = await gate.evaluate(
             quality_criteria=["Criterion 1"],
             outputs=["output.md"],
             project_root=project_root,
@@ -166,16 +168,16 @@ def test_mock_can_fail(self, project_root: Path) -> None:
         assert result.passed is False
         assert result.feedback == "Mock failure"
 
-    def test_mock_records_evaluations(self, project_root: Path) -> None:
+    async def test_mock_records_evaluations(self, project_root: Path) -> None:
         """Test mock gate records evaluations."""
         gate = MockQualityGate()
 
-        gate.evaluate(
+        await gate.evaluate(
             quality_criteria=["Criterion 1"],
             outputs=["output1.md"],
             project_root=project_root,
         )
-        gate.evaluate(
+        await gate.evaluate(
             quality_criteria=["Criterion 2"],
             outputs=["output2.md"],
             project_root=project_root,
diff --git a/tests/unit/mcp/test_state.py b/tests/unit/mcp/test_state.py
index 2eec2a0a..2b27189a 100644
--- a/tests/unit/mcp/test_state.py
+++ b/tests/unit/mcp/test_state.py
@@ -52,9 +52,9 @@ def test_generate_branch_name_without_instance(self, state_manager: StateManager
         # Should be a date like 20240101
         assert len(branch.split("-")[-1]) == 8
 
-    def test_create_session(self, state_manager: StateManager) -> None:
+    async def test_create_session(self, state_manager: StateManager) -> None:
         """Test creating a new session."""
-        session = state_manager.create_session(
+        session = await state_manager.create_session(
             job_name="test_job",
             workflow_name="main",
             goal="Complete the task",
@@ -74,10 +74,10 @@ def test_create_session(self, state_manager: StateManager) -> None:
         session_file = state_manager._session_file(session.session_id)
         assert session_file.exists()
 
-    def test_load_session(self, state_manager: StateManager) -> None:
+    async def test_load_session(self, state_manager: StateManager) -> None:
         """Test loading an existing session."""
         # Create a session first
-        created_session = state_manager.create_session(
+        created_session = await state_manager.create_session(
             job_name="test_job",
             workflow_name="main",
             goal="Complete the task",
@@ -86,24 +86,24 @@ def test_load_session(self, state_manager: StateManager) -> None:
 
         # Create a new state manager and load the session
         new_manager = StateManager(state_manager.project_root)
-        loaded_session = new_manager.load_session(created_session.session_id)
+        loaded_session = await new_manager.load_session(created_session.session_id)
 
         assert loaded_session.session_id == created_session.session_id
         assert loaded_session.job_name == "test_job"
         assert loaded_session.goal == "Complete the task"
 
-    def test_load_session_not_found(self, state_manager: StateManager) -> None:
+    async def test_load_session_not_found(self, state_manager: StateManager) -> None:
         """Test loading non-existent session."""
         with pytest.raises(StateError, match="Session not found"):
-            state_manager.load_session("nonexistent")
+            await state_manager.load_session("nonexistent")
 
-    def test_get_active_session(self, state_manager: StateManager) -> None:
+    async def test_get_active_session(self, state_manager: StateManager) -> None:
         """Test getting active session."""
         # No active session initially
         assert state_manager.get_active_session() is None
 
         # Create session
-        session = state_manager.create_session(
+        session = await state_manager.create_session(
             job_name="test_job",
             workflow_name="main",
             goal="Complete the task",
@@ -117,16 +117,16 @@ def test_require_active_session(self, state_manager: StateManager) -> None:
         with pytest.raises(StateError, match="No active workflow session"):
             state_manager.require_active_session()
 
-    def test_start_step(self, state_manager: StateManager) -> None:
+    async def test_start_step(self, state_manager: StateManager) -> None:
         """Test marking a step as started."""
-        state_manager.create_session(
+        await state_manager.create_session(
             job_name="test_job",
             workflow_name="main",
             goal="Complete the task",
             first_step_id="step1",
         )
 
-        state_manager.start_step("step2")
+        await state_manager.start_step("step2")
         session = state_manager.get_active_session()
 
         assert session is not None
@@ -134,16 +134,16 @@ def test_start_step(self, state_manager: StateManager) -> None:
         assert "step2" in session.step_progress
         assert session.step_progress["step2"].started_at is not None
 
-    def test_complete_step(self, state_manager: StateManager) -> None:
+    async def test_complete_step(self, state_manager: StateManager) -> None:
         """Test marking a step as completed."""
-        state_manager.create_session(
+        await state_manager.create_session(
             job_name="test_job",
             workflow_name="main",
             goal="Complete the task",
             first_step_id="step1",
         )
 
-        state_manager.complete_step(
+        await state_manager.complete_step(
             step_id="step1",
             outputs=["output1.md", "output2.md"],
             notes="Done!",
@@ -157,9 +157,9 @@ def test_complete_step(self, state_manager: StateManager) -> None:
         assert progress.outputs == ["output1.md", "output2.md"]
         assert progress.notes == "Done!"
 
-    def test_record_quality_attempt(self, state_manager: StateManager) -> None:
+    async def test_record_quality_attempt(self, state_manager: StateManager) -> None:
         """Test recording quality gate attempts."""
-        state_manager.create_session(
+        await state_manager.create_session(
             job_name="test_job",
             workflow_name="main",
             goal="Complete the task",
@@ -167,56 +167,56 @@ def test_record_quality_attempt(self, state_manager: StateManager) -> None:
         )
 
         # First attempt
-        attempts = state_manager.record_quality_attempt("step1")
+        attempts = await state_manager.record_quality_attempt("step1")
         assert attempts == 1
 
         # Second attempt
-        attempts = state_manager.record_quality_attempt("step1")
+        attempts = await state_manager.record_quality_attempt("step1")
         assert attempts == 2
 
-    def test_advance_to_step(self, state_manager: StateManager) -> None:
+    async def test_advance_to_step(self, state_manager: StateManager) -> None:
         """Test advancing to a new step."""
-        state_manager.create_session(
+        await state_manager.create_session(
             job_name="test_job",
             workflow_name="main",
             goal="Complete the task",
             first_step_id="step1",
         )
 
-        state_manager.advance_to_step("step2", 1)
+        await state_manager.advance_to_step("step2", 1)
         session = state_manager.get_active_session()
 
         assert session is not None
         assert session.current_step_id == "step2"
         assert session.current_entry_index == 1
 
-    def test_complete_workflow(self, state_manager: StateManager) -> None:
+    async def test_complete_workflow(self, state_manager: StateManager) -> None:
         """Test marking workflow as complete."""
-        state_manager.create_session(
+        await state_manager.create_session(
             job_name="test_job",
             workflow_name="main",
             goal="Complete the task",
             first_step_id="step1",
         )
 
-        state_manager.complete_workflow()
+        await state_manager.complete_workflow()
         session = state_manager.get_active_session()
 
         assert session is not None
         assert session.status == "completed"
         assert session.completed_at is not None
 
-    def test_get_all_outputs(self, state_manager: StateManager) -> None:
+    async def test_get_all_outputs(self, state_manager: StateManager) -> None:
         """Test getting all outputs from completed steps."""
-        state_manager.create_session(
+        await state_manager.create_session(
             job_name="test_job",
             workflow_name="main",
             goal="Complete the task",
             first_step_id="step1",
         )
 
-        state_manager.complete_step("step1", ["output1.md"])
-        state_manager.complete_step("step2", ["output2.md", "output3.md"])
+        await state_manager.complete_step("step1", ["output1.md"])
+        await state_manager.complete_step("step2", ["output2.md", "output3.md"])
 
         outputs = state_manager.get_all_outputs()
 
@@ -225,53 +225,53 @@ def test_get_all_outputs(self, state_manager: StateManager) -> None:
         assert "output3.md" in outputs
         assert len(outputs) == 3
 
-    def test_list_sessions(self, state_manager: StateManager) -> None:
+    async def test_list_sessions(self, state_manager: StateManager) -> None:
         """Test listing all sessions."""
         # Create multiple sessions
-        state_manager.create_session(
+        await state_manager.create_session(
             job_name="job1",
             workflow_name="main",
             goal="Goal 1",
             first_step_id="step1",
         )
-        state_manager.create_session(
+        await state_manager.create_session(
             job_name="job2",
             workflow_name="main",
             goal="Goal 2",
             first_step_id="step1",
         )
 
-        sessions = state_manager.list_sessions()
+        sessions = await state_manager.list_sessions()
 
         assert len(sessions) == 2
         job_names = {s.job_name for s in sessions}
         assert "job1" in job_names
         assert "job2" in job_names
 
-    def test_find_active_sessions_for_workflow(self, state_manager: StateManager) -> None:
+    async def test_find_active_sessions_for_workflow(self, state_manager: StateManager) -> None:
         """Test finding active sessions for a workflow."""
         # Create sessions for different workflows
-        state_manager.create_session(
+        await state_manager.create_session(
             job_name="test_job",
             workflow_name="main",
             goal="Goal 1",
             first_step_id="step1",
         )
-        state_manager.create_session(
+        await state_manager.create_session(
             job_name="test_job",
             workflow_name="other",
             goal="Goal 2",
             first_step_id="step1",
         )
 
-        sessions = state_manager.find_active_sessions_for_workflow("test_job", "main")
+        sessions = await state_manager.find_active_sessions_for_workflow("test_job", "main")
 
         assert len(sessions) == 1
         assert sessions[0].workflow_name == "main"
 
-    def test_delete_session(self, state_manager: StateManager) -> None:
+    async def test_delete_session(self, state_manager: StateManager) -> None:
         """Test deleting a session."""
-        session = state_manager.create_session(
+        session = await state_manager.create_session(
             job_name="test_job",
             workflow_name="main",
             goal="Goal",
@@ -281,7 +281,7 @@ def test_delete_session(self, state_manager: StateManager) -> None:
         session_file = state_manager._session_file(session.session_id)
         assert session_file.exists()
 
-        state_manager.delete_session(session.session_id)
+        await state_manager.delete_session(session.session_id)
 
         assert not session_file.exists()
         assert state_manager.get_active_session() is None
diff --git a/tests/unit/mcp/test_tools.py b/tests/unit/mcp/test_tools.py
index 5fdb4ab2..822fce81 100644
--- a/tests/unit/mcp/test_tools.py
+++ b/tests/unit/mcp/test_tools.py
@@ -130,7 +130,7 @@ def test_get_workflows_empty(self, tmp_path: Path) -> None:
 
         assert len(response.jobs) == 0
 
-    def test_start_workflow(self, tools: WorkflowTools) -> None:
+    async def test_start_workflow(self, tools: WorkflowTools) -> None:
         """Test starting a workflow."""
         input_data = StartWorkflowInput(
             goal="Complete the test job",
@@ -139,7 +139,7 @@ def test_start_workflow(self, tools: WorkflowTools) -> None:
             instance_id="test-instance",
         )
 
-        response = tools.start_workflow(input_data)
+        response = await tools.start_workflow(input_data)
 
         assert response.begin_step.session_id is not None
         assert "test-instance" in response.begin_step.branch_name
@@ -148,7 +148,7 @@ def test_start_workflow(self, tools: WorkflowTools) -> None:
         assert "output1.md" in response.begin_step.step_expected_outputs
         assert "Output must be valid" in response.begin_step.step_quality_criteria
 
-    def test_start_workflow_invalid_job(self, tools: WorkflowTools) -> None:
+    async def test_start_workflow_invalid_job(self, tools: WorkflowTools) -> None:
         """Test starting workflow with invalid job."""
         input_data = StartWorkflowInput(
             goal="Complete task",
@@ -157,9 +157,9 @@ def test_start_workflow_invalid_job(self, tools: WorkflowTools) -> None:
         )
 
         with pytest.raises(ToolError, match="Job not found"):
-            tools.start_workflow(input_data)
+            await tools.start_workflow(input_data)
 
-    def test_start_workflow_invalid_workflow(self, tools: WorkflowTools) -> None:
+    async def test_start_workflow_invalid_workflow(self, tools: WorkflowTools) -> None:
         """Test starting workflow with invalid workflow name."""
         input_data = StartWorkflowInput(
             goal="Complete task",
@@ -168,16 +168,16 @@ def test_start_workflow_invalid_workflow(self, tools: WorkflowTools) -> None:
         )
 
         with pytest.raises(ToolError, match="Workflow.*not found"):
-            tools.start_workflow(input_data)
+            await tools.start_workflow(input_data)
 
-    def test_finished_step_no_session(self, tools: WorkflowTools) -> None:
+    async def test_finished_step_no_session(self, tools: WorkflowTools) -> None:
         """Test finished_step without active session."""
         input_data = FinishedStepInput(outputs=["output1.md"])
 
         with pytest.raises(StateError, match="No active workflow session"):
-            tools.finished_step(input_data)
+            await tools.finished_step(input_data)
 
-    def test_finished_step_advances_to_next(
+    async def test_finished_step_advances_to_next(
         self, tools: WorkflowTools, project_root: Path
     ) -> None:
         """Test finished_step advances to next step."""
@@ -187,7 +187,7 @@ def test_finished_step_advances_to_next(
             job_name="test_job",
             workflow_name="main",
         )
-        tools.start_workflow(start_input)
+        await tools.start_workflow(start_input)
 
         # Create output file
         (project_root / "output1.md").write_text("Test output")
@@ -197,7 +197,7 @@ def test_finished_step_advances_to_next(
             outputs=["output1.md"],
             notes="Completed step 1",
         )
-        response = tools.finished_step(finish_input)
+        response = await tools.finished_step(finish_input)
 
         assert response.status == StepStatus.NEXT_STEP
         assert response.begin_step is not None
@@ -205,7 +205,7 @@ def test_finished_step_advances_to_next(
         assert response.begin_step.step_instructions is not None
         assert "Step 2" in response.begin_step.step_instructions
 
-    def test_finished_step_completes_workflow(
+    async def test_finished_step_completes_workflow(
         self, tools: WorkflowTools, project_root: Path
     ) -> None:
         """Test finished_step completes workflow on last step."""
@@ -215,15 +215,15 @@ def test_finished_step_completes_workflow(
             job_name="test_job",
             workflow_name="main",
         )
-        tools.start_workflow(start_input)
+        await tools.start_workflow(start_input)
 
         # Complete first step
         (project_root / "output1.md").write_text("Output 1")
-        tools.finished_step(FinishedStepInput(outputs=["output1.md"]))
+        await tools.finished_step(FinishedStepInput(outputs=["output1.md"]))
 
         # Complete second (last) step
         (project_root / "output2.md").write_text("Output 2")
-        response = tools.finished_step(FinishedStepInput(outputs=["output2.md"]))
+        response = await tools.finished_step(FinishedStepInput(outputs=["output2.md"]))
 
         assert response.status == StepStatus.WORKFLOW_COMPLETE
         assert response.summary is not None
@@ -231,7 +231,7 @@ def test_finished_step_completes_workflow(
         assert "output1.md" in response.all_outputs
         assert "output2.md" in response.all_outputs
 
-    def test_finished_step_with_quality_gate_pass(
+    async def test_finished_step_with_quality_gate_pass(
         self, tools_with_quality: WorkflowTools, project_root: Path
     ) -> None:
         """Test finished_step passes quality gate."""
@@ -241,18 +241,18 @@ def test_finished_step_with_quality_gate_pass(
             job_name="test_job",
             workflow_name="main",
         )
-        tools_with_quality.start_workflow(start_input)
+        await tools_with_quality.start_workflow(start_input)
 
         # Create output and finish step
         (project_root / "output1.md").write_text("Valid output")
-        response = tools_with_quality.finished_step(
+        response = await tools_with_quality.finished_step(
             FinishedStepInput(outputs=["output1.md"])
         )
 
         # Should advance to next step
         assert response.status == StepStatus.NEXT_STEP
 
-    def test_finished_step_with_quality_gate_fail(
+    async def test_finished_step_with_quality_gate_fail(
         self, project_root: Path, state_manager: StateManager
     ) -> None:
         """Test finished_step fails quality gate."""
@@ -269,17 +269,17 @@ def test_finished_step_with_quality_gate_fail(
             job_name="test_job",
             workflow_name="main",
         )
-        tools.start_workflow(start_input)
+        await tools.start_workflow(start_input)
 
         # Create output and finish step
         (project_root / "output1.md").write_text("Invalid output")
-        response = tools.finished_step(FinishedStepInput(outputs=["output1.md"]))
+        response = await tools.finished_step(FinishedStepInput(outputs=["output1.md"]))
 
         assert response.status == StepStatus.NEEDS_WORK
         assert response.feedback == "Needs improvement"
         assert response.failed_criteria is not None
 
-    def test_finished_step_quality_gate_max_attempts(
+    async def test_finished_step_quality_gate_max_attempts(
         self, project_root: Path, state_manager: StateManager
     ) -> None:
         """Test finished_step fails after max quality gate attempts."""
@@ -295,21 +295,21 @@ def test_finished_step_quality_gate_max_attempts(
             job_name="test_job",
             workflow_name="main",
         )
-        tools.start_workflow(start_input)
+        await tools.start_workflow(start_input)
 
         # Create output
         (project_root / "output1.md").write_text("Bad output")
 
         # Try multiple times (max is 3)
         for _ in range(2):
-            response = tools.finished_step(FinishedStepInput(outputs=["output1.md"]))
+            response = await tools.finished_step(FinishedStepInput(outputs=["output1.md"]))
             assert response.status == StepStatus.NEEDS_WORK
 
         # Third attempt should raise error
         with pytest.raises(ToolError, match="Quality gate failed after.*attempts"):
-            tools.finished_step(FinishedStepInput(outputs=["output1.md"]))
+            await tools.finished_step(FinishedStepInput(outputs=["output1.md"]))
 
-    def test_finished_step_quality_gate_override(
+    async def test_finished_step_quality_gate_override(
         self, project_root: Path, state_manager: StateManager
     ) -> None:
         """Test finished_step skips quality gate when override reason provided."""
@@ -327,11 +327,11 @@ def test_finished_step_quality_gate_override(
             job_name="test_job",
             workflow_name="main",
         )
-        tools.start_workflow(start_input)
+        await tools.start_workflow(start_input)
 
         # Create output and finish step with override reason
         (project_root / "output1.md").write_text("Output that would fail quality check")
-        response = tools.finished_step(
+        response = await tools.finished_step(
             FinishedStepInput(
                 outputs=["output1.md"],
                 quality_review_override_reason="Manual review completed offline",
diff --git a/tests/unit/test_adapters.py b/tests/unit/test_adapters.py
deleted file mode 100644
index 18e8d6d7..00000000
--- a/tests/unit/test_adapters.py
+++ /dev/null
@@ -1,529 +0,0 @@
-"""Tests for agent adapters."""
-
-import json
-from pathlib import Path
-from typing import Any
-
-import pytest
-
-from deepwork.core.adapters import (
-    AdapterError,
-    AgentAdapter,
-    ClaudeAdapter,
-    GeminiAdapter,
-    SkillLifecycleHook,
-)
-
-
-def _get_claude_required_permissions() -> list[str]:
-    """Load the required permissions from the Claude settings template."""
-    settings_path = (
-        Path(__file__).parent.parent.parent
-        / "src"
-        / "deepwork"
-        / "templates"
-        / "claude"
-        / "settings.json"
-    )
-    with open(settings_path, encoding="utf-8") as f:
-        settings = json.load(f)
-    permissions = settings["permissions"]["allow"]
-    # Sanity check: ensure the template file has meaningful content
-    assert len(permissions) > 2, "Settings template should have more than 2 permissions"
-    return permissions
-
-
-class TestAgentAdapterRegistry:
-    """Tests for AgentAdapter registry functionality."""
-
-    def test_get_all_returns_registered_adapters(self) -> None:
-        """Test that get_all returns all registered adapters."""
-        adapters = AgentAdapter.get_all()
-
-        assert "claude" in adapters
-        assert adapters["claude"] is ClaudeAdapter
-        assert "gemini" in adapters
-        assert adapters["gemini"] is GeminiAdapter
-
-    def test_get_returns_correct_adapter(self) -> None:
-        """Test that get returns the correct adapter class."""
-        assert AgentAdapter.get("claude") is ClaudeAdapter
-        assert AgentAdapter.get("gemini") is GeminiAdapter
-
-    def test_get_raises_for_unknown_adapter(self) -> None:
-        """Test that get raises AdapterError for unknown adapter."""
-        with pytest.raises(AdapterError, match="Unknown adapter 'unknown'"):
-            AgentAdapter.get("unknown")
-
-    def test_list_names_returns_all_names(self) -> None:
-        """Test that list_names returns all registered adapter names."""
-        names = AgentAdapter.list_names()
-
-        assert "claude" in names
-        assert "gemini" in names
-        assert len(names) >= 2  # At least claude and gemini
-
-
-class TestClaudeAdapter:
-    """Tests for ClaudeAdapter."""
-
-    def test_class_attributes(self) -> None:
-        """Test Claude adapter class attributes."""
-        assert ClaudeAdapter.name == "claude"
-        assert ClaudeAdapter.display_name == "Claude Code"
-        assert ClaudeAdapter.config_dir == ".claude"
-        assert ClaudeAdapter.skills_dir == "skills"
-
-    def test_init_with_project_root(self, temp_dir: Path) -> None:
-        """Test initialization with project root."""
-        adapter = ClaudeAdapter(temp_dir)
-
-        assert adapter.project_root == temp_dir
-
-    def test_init_without_project_root(self) -> None:
-        """Test initialization without project root."""
-        adapter = ClaudeAdapter()
-
-        assert adapter.project_root is None
-
-    def test_detect_when_present(self, temp_dir: Path) -> None:
-        """Test detect when .claude directory exists."""
-        (temp_dir / ".claude").mkdir()
-        adapter = ClaudeAdapter(temp_dir)
-
-        assert adapter.detect() is True
-
-    def test_detect_when_absent(self, temp_dir: Path) -> None:
-        """Test detect when .claude directory doesn't exist."""
-        adapter = ClaudeAdapter(temp_dir)
-
-        assert adapter.detect() is False
-
-    def test_detect_with_explicit_project_root(self, temp_dir: Path) -> None:
-        """Test detect with explicit project root parameter."""
-        (temp_dir / ".claude").mkdir()
-        adapter = ClaudeAdapter()
-
-        assert adapter.detect(temp_dir) is True
-
-    def test_get_template_dir(self, temp_dir: Path) -> None:
-        """Test get_template_dir."""
-        adapter = ClaudeAdapter()
-        templates_root = temp_dir / "templates"
-
-        result = adapter.get_template_dir(templates_root)
-
-        assert result == templates_root / "claude"
-
-    def test_get_skills_dir(self, temp_dir: Path) -> None:
-        """Test get_skills_dir."""
-        adapter = ClaudeAdapter(temp_dir)
-
-        result = adapter.get_skills_dir()
-
-        assert result == temp_dir / ".claude" / "skills"
-
-    def test_get_skills_dir_with_explicit_root(self, temp_dir: Path) -> None:
-        """Test get_skills_dir with explicit project root."""
-        adapter = ClaudeAdapter()
-
-        result = adapter.get_skills_dir(temp_dir)
-
-        assert result == temp_dir / ".claude" / "skills"
-
-    def test_get_skills_dir_raises_without_root(self) -> None:
-        """Test get_skills_dir raises when no project root specified."""
-        adapter = ClaudeAdapter()
-
-        with pytest.raises(AdapterError, match="No project root specified"):
-            adapter.get_skills_dir()
-
-    def test_get_meta_skill_filename(self) -> None:
-        """Test get_meta_skill_filename returns directory/SKILL.md format."""
-        adapter = ClaudeAdapter()
-
-        result = adapter.get_meta_skill_filename("my_job")
-
-        assert result == "my_job/SKILL.md"
-
-    def test_get_step_skill_filename_returns_directory_format(self) -> None:
-        """Test get_step_skill_filename returns directory/SKILL.md format."""
-        adapter = ClaudeAdapter()
-
-        result = adapter.get_step_skill_filename("my_job", "step_one")
-
-        assert result == "my_job.step_one/SKILL.md"
-
-    def test_get_step_skill_filename_exposed(self) -> None:
-        """Test get_step_skill_filename with exposed=True (same format)."""
-        adapter = ClaudeAdapter()
-
-        result = adapter.get_step_skill_filename("my_job", "step_one", exposed=True)
-
-        assert result == "my_job.step_one/SKILL.md"
-
-    def test_sync_hooks_creates_settings_file(self, temp_dir: Path) -> None:
-        """Test sync_hooks creates settings.json when it doesn't exist."""
-        (temp_dir / ".claude").mkdir()
-        adapter = ClaudeAdapter(temp_dir)
-        hooks = {
-            "PreToolUse": [{"matcher": "", "hooks": [{"type": "command", "command": "test.sh"}]}]
-        }
-
-        count = adapter.sync_hooks(temp_dir, hooks)
-
-        assert count == 1
-        settings_file = temp_dir / ".claude" / "settings.json"
-        assert settings_file.exists()
-        settings = json.loads(settings_file.read_text())
-        assert "hooks" in settings
-        assert "PreToolUse" in settings["hooks"]
-
-    def test_sync_hooks_merges_with_existing(self, temp_dir: Path) -> None:
-        """Test sync_hooks merges with existing settings."""
-        claude_dir = temp_dir / ".claude"
-        claude_dir.mkdir()
-        settings_file = claude_dir / "settings.json"
-        settings_file.write_text(json.dumps({"existing_key": "value", "hooks": {}}))
-
-        adapter = ClaudeAdapter(temp_dir)
-        hooks = {
-            "PreToolUse": [{"matcher": "", "hooks": [{"type": "command", "command": "test.sh"}]}]
-        }
-
-        adapter.sync_hooks(temp_dir, hooks)
-
-        settings = json.loads(settings_file.read_text())
-        assert settings["existing_key"] == "value"
-        assert "PreToolUse" in settings["hooks"]
-
-    def test_sync_hooks_empty_hooks_returns_zero(self, temp_dir: Path) -> None:
-        """Test sync_hooks returns 0 for empty hooks."""
-        adapter = ClaudeAdapter(temp_dir)
-
-        count = adapter.sync_hooks(temp_dir, {})
-
-        assert count == 0
-
-    def test_sync_permissions_creates_settings_file(self, temp_dir: Path) -> None:
-        """Test sync_permissions creates settings.json when it doesn't exist."""
-        (temp_dir / ".claude").mkdir()
-        adapter = ClaudeAdapter(temp_dir)
-
-        count = adapter.sync_permissions(temp_dir)
-
-        expected_permissions = _get_claude_required_permissions()
-        assert count == len(expected_permissions)
-        settings_file = temp_dir / ".claude" / "settings.json"
-        assert settings_file.exists()
-        settings = json.loads(settings_file.read_text())
-        assert "permissions" in settings
-        assert "allow" in settings["permissions"]
-        for permission in expected_permissions:
-            assert permission in settings["permissions"]["allow"]
-
-    def test_sync_permissions_merges_with_existing(self, temp_dir: Path) -> None:
-        """Test sync_permissions merges with existing settings."""
-        claude_dir = temp_dir / ".claude"
-        claude_dir.mkdir()
-        settings_file = claude_dir / "settings.json"
-        settings_file.write_text(json.dumps({"permissions": {"allow": ["Bash(ls:*)"]}}))
-
-        adapter = ClaudeAdapter(temp_dir)
-        adapter.sync_permissions(temp_dir)
-
-        settings = json.loads(settings_file.read_text())
-        assert "Bash(ls:*)" in settings["permissions"]["allow"]
-        assert "Read(./.deepwork/**)" in settings["permissions"]["allow"]
-
-    def test_sync_permissions_idempotent(self, temp_dir: Path) -> None:
-        """Test sync_permissions is idempotent (doesn't duplicate permissions)."""
-        (temp_dir / ".claude").mkdir()
-        adapter = ClaudeAdapter(temp_dir)
-
-        expected_permissions = _get_claude_required_permissions()
-
-        # First call adds permissions
-        count1 = adapter.sync_permissions(temp_dir)
-        assert count1 == len(expected_permissions)
-
-        # Second call should add nothing
-        count2 = adapter.sync_permissions(temp_dir)
-        assert count2 == 0
-
-        # Verify no duplicates
-        settings_file = temp_dir / ".claude" / "settings.json"
-        settings = json.loads(settings_file.read_text())
-        allow_list = settings["permissions"]["allow"]
-        for permission in expected_permissions:
-            assert allow_list.count(permission) == 1
-
-    def test_add_permission_single(self, temp_dir: Path) -> None:
-        """Test add_permission adds a single permission."""
-        (temp_dir / ".claude").mkdir()
-        adapter = ClaudeAdapter(temp_dir)
-
-        result = adapter.add_permission(temp_dir, "Bash(custom:*)")
-
-        assert result is True
-        settings_file = temp_dir / ".claude" / "settings.json"
-        settings = json.loads(settings_file.read_text())
-        assert "Bash(custom:*)" in settings["permissions"]["allow"]
-
-    def test_add_permission_idempotent(self, temp_dir: Path) -> None:
-        """Test add_permission doesn't duplicate existing permissions."""
-        (temp_dir / ".claude").mkdir()
-        adapter = ClaudeAdapter(temp_dir)
-
-        # First call adds
-        result1 = adapter.add_permission(temp_dir, "Bash(custom:*)")
-        assert result1 is True
-
-        # Second call should return False
-        result2 = adapter.add_permission(temp_dir, "Bash(custom:*)")
-        assert result2 is False
-
-        # Verify no duplicates
-        settings_file = temp_dir / ".claude" / "settings.json"
-        settings = json.loads(settings_file.read_text())
-        assert settings["permissions"]["allow"].count("Bash(custom:*)") == 1
-
-    def test_add_permission_with_settings_dict(self, temp_dir: Path) -> None:
-        """Test add_permission with pre-loaded settings (doesn't save)."""
-        (temp_dir / ".claude").mkdir()
-        adapter = ClaudeAdapter(temp_dir)
-        settings: dict[str, Any] = {"permissions": {"allow": []}}
-
-        result = adapter.add_permission(temp_dir, "Bash(test:*)", settings)
-
-        assert result is True
-        assert "Bash(test:*)" in settings["permissions"]["allow"]
-        # File should not exist since we passed settings dict
-        settings_file = temp_dir / ".claude" / "settings.json"
-        assert not settings_file.exists()
-
-    def test_extract_skill_name_from_path(self, temp_dir: Path) -> None:
-        """Test _extract_skill_name extracts skill name from skill path."""
-        adapter = ClaudeAdapter(temp_dir)
-
-        # Test meta-skill path
-        path1 = temp_dir / ".claude" / "skills" / "my_job" / "SKILL.md"
-        assert adapter._extract_skill_name(path1) == "my_job"
-
-        # Test step skill path
-        path2 = temp_dir / ".claude" / "skills" / "my_job.step_one" / "SKILL.md"
-        assert adapter._extract_skill_name(path2) == "my_job.step_one"
-
-    def test_extract_skill_name_returns_none_for_invalid_path(self, temp_dir: Path) -> None:
-        """Test _extract_skill_name returns None for paths without skills dir."""
-        adapter = ClaudeAdapter(temp_dir)
-
-        path = temp_dir / ".claude" / "commands" / "my_command.md"
-        assert adapter._extract_skill_name(path) is None
-
-    def test_add_skill_permissions(self, temp_dir: Path) -> None:
-        """Test add_skill_permissions adds Skill permissions for each skill."""
-        (temp_dir / ".claude").mkdir()
-        adapter = ClaudeAdapter(temp_dir)
-
-        skill_paths = [
-            temp_dir / ".claude" / "skills" / "job_a" / "SKILL.md",
-            temp_dir / ".claude" / "skills" / "job_a.step_one" / "SKILL.md",
-            temp_dir / ".claude" / "skills" / "job_b" / "SKILL.md",
-        ]
-
-        count = adapter.add_skill_permissions(temp_dir, skill_paths)
-
-        assert count == 3
-        settings_file = temp_dir / ".claude" / "settings.json"
-        settings = json.loads(settings_file.read_text())
-        assert "Skill(job_a)" in settings["permissions"]["allow"]
-        assert "Skill(job_a.step_one)" in settings["permissions"]["allow"]
-        assert "Skill(job_b)" in settings["permissions"]["allow"]
-
-    def test_add_skill_permissions_idempotent(self, temp_dir: Path) -> None:
-        """Test add_skill_permissions doesn't duplicate permissions."""
-        (temp_dir / ".claude").mkdir()
-        adapter = ClaudeAdapter(temp_dir)
-
-        skill_paths = [temp_dir / ".claude" / "skills" / "my_job" / "SKILL.md"]
-
-        # First call adds
-        count1 = adapter.add_skill_permissions(temp_dir, skill_paths)
-        assert count1 == 1
-
-        # Second call should add nothing
-        count2 = adapter.add_skill_permissions(temp_dir, skill_paths)
-        assert count2 == 0
-
-    def test_add_skill_permissions_empty_list(self, temp_dir: Path) -> None:
-        """Test add_skill_permissions with empty list returns 0."""
-        adapter = ClaudeAdapter(temp_dir)
-
-        count = adapter.add_skill_permissions(temp_dir, [])
-
-        assert count == 0
-
-
-class TestGeminiAdapter:
-    """Tests for GeminiAdapter."""
-
-    def test_class_attributes(self) -> None:
-        """Test Gemini adapter class attributes."""
-        assert GeminiAdapter.name == "gemini"
-        assert GeminiAdapter.display_name == "Gemini CLI"
-        assert GeminiAdapter.config_dir == ".gemini"
-        assert GeminiAdapter.skills_dir == "skills"
-        assert GeminiAdapter.skill_template == "skill-job-step.toml.jinja"
-
-    def test_init_with_project_root(self, temp_dir: Path) -> None:
-        """Test initialization with project root."""
-        adapter = GeminiAdapter(temp_dir)
-
-        assert adapter.project_root == temp_dir
-
-    def test_init_without_project_root(self) -> None:
-        """Test initialization without project root."""
-        adapter = GeminiAdapter()
-
-        assert adapter.project_root is None
-
-    def test_detect_when_present(self, temp_dir: Path) -> None:
-        """Test detect when .gemini directory exists."""
-        (temp_dir / ".gemini").mkdir()
-        adapter = GeminiAdapter(temp_dir)
-
-        assert adapter.detect() is True
-
-    def test_detect_when_absent(self, temp_dir: Path) -> None:
-        """Test detect when .gemini directory doesn't exist."""
-        adapter = GeminiAdapter(temp_dir)
-
-        assert adapter.detect() is False
-
-    def test_detect_with_explicit_project_root(self, temp_dir: Path) -> None:
-        """Test detect with explicit project root parameter."""
-        (temp_dir / ".gemini").mkdir()
-        adapter = GeminiAdapter()
-
-        assert adapter.detect(temp_dir) is True
-
-    def test_get_template_dir(self, temp_dir: Path) -> None:
-        """Test get_template_dir."""
-        adapter = GeminiAdapter()
-        templates_root = temp_dir / "templates"
-
-        result = adapter.get_template_dir(templates_root)
-
-        assert result == templates_root / "gemini"
-
-    def test_get_skills_dir(self, temp_dir: Path) -> None:
-        """Test get_skills_dir."""
-        adapter = GeminiAdapter(temp_dir)
-
-        result = adapter.get_skills_dir()
-
-        assert result == temp_dir / ".gemini" / "skills"
-
-    def test_get_skills_dir_with_explicit_root(self, temp_dir: Path) -> None:
-        """Test get_skills_dir with explicit project root."""
-        adapter = GeminiAdapter()
-
-        result = adapter.get_skills_dir(temp_dir)
-
-        assert result == temp_dir / ".gemini" / "skills"
-
-    def test_get_skills_dir_raises_without_root(self) -> None:
-        """Test get_skills_dir raises when no project root specified."""
-        adapter = GeminiAdapter()
-
-        with pytest.raises(AdapterError, match="No project root specified"):
-            adapter.get_skills_dir()
-
-    def test_get_meta_skill_filename(self) -> None:
-        """Test get_meta_skill_filename returns index.toml in subdirectory."""
-        adapter = GeminiAdapter()
-
-        result = adapter.get_meta_skill_filename("my_job")
-
-        # Gemini uses subdirectories with index.toml for meta-skills
-        assert result == "my_job/index.toml"
-
-    def test_get_step_skill_filename_returns_clean_name(self) -> None:
-        """Test get_step_skill_filename returns clean TOML with subdirectory."""
-        adapter = GeminiAdapter()
-
-        result = adapter.get_step_skill_filename("my_job", "step_one")
-
-        # Gemini uses subdirectories for namespacing (colon becomes path)
-        # No prefix on skill filenames
-        assert result == "my_job/step_one.toml"
-
-    def test_get_step_skill_filename_exposed(self) -> None:
-        """Test get_step_skill_filename with exposed=True (same result, no prefix)."""
-        adapter = GeminiAdapter()
-
-        result = adapter.get_step_skill_filename("my_job", "step_one", exposed=True)
-
-        # Same filename whether exposed or not
-        assert result == "my_job/step_one.toml"
-
-    def test_get_step_skill_filename_with_underscores(self) -> None:
-        """Test get_step_skill_filename with underscores in names."""
-        adapter = GeminiAdapter()
-
-        result = adapter.get_step_skill_filename("competitive_research", "identify_competitors")
-
-        assert result == "competitive_research/identify_competitors.toml"
-
-    def test_hook_name_mapping_is_empty(self) -> None:
-        """Test that Gemini has no skill-level hooks."""
-        assert GeminiAdapter.hook_name_mapping == {}
-
-    def test_supports_hook_returns_false_for_all_hooks(self) -> None:
-        """Test that Gemini doesn't support any skill-level hooks."""
-        adapter = GeminiAdapter()
-
-        for hook in SkillLifecycleHook:
-            assert adapter.supports_hook(hook) is False
-
-    def test_get_platform_hook_name_returns_none(self) -> None:
-        """Test that get_platform_hook_name returns None for all hooks."""
-        adapter = GeminiAdapter()
-
-        for hook in SkillLifecycleHook:
-            assert adapter.get_platform_hook_name(hook) is None
-
-    def test_sync_hooks_returns_zero(self, temp_dir: Path) -> None:
-        """Test sync_hooks always returns 0 (no hook support)."""
-        (temp_dir / ".gemini").mkdir()
-        adapter = GeminiAdapter(temp_dir)
-        hooks = {
-            "SomeEvent": [{"matcher": "", "hooks": [{"type": "command", "command": "test.sh"}]}]
-        }
-
-        count = adapter.sync_hooks(temp_dir, hooks)
-
-        assert count == 0
-
-    def test_sync_hooks_empty_hooks_returns_zero(self, temp_dir: Path) -> None:
-        """Test sync_hooks returns 0 for empty hooks."""
-        adapter = GeminiAdapter(temp_dir)
-
-        count = adapter.sync_hooks(temp_dir, {})
-
-        assert count == 0
-
-    def test_sync_hooks_does_not_create_settings_file(self, temp_dir: Path) -> None:
-        """Test that sync_hooks doesn't create settings.json (unlike Claude)."""
-        gemini_dir = temp_dir / ".gemini"
-        gemini_dir.mkdir()
-        adapter = GeminiAdapter(temp_dir)
-        hooks = {
-            "AfterAgent": [{"matcher": "", "hooks": [{"type": "command", "command": "test.sh"}]}]
-        }
-
-        adapter.sync_hooks(temp_dir, hooks)
-
-        settings_file = gemini_dir / "settings.json"
-        assert not settings_file.exists()
diff --git a/tests/unit/test_detector.py b/tests/unit/test_detector.py
deleted file mode 100644
index 1e51a01b..00000000
--- a/tests/unit/test_detector.py
+++ /dev/null
@@ -1,98 +0,0 @@
-"""Tests for platform detector."""
-
-from pathlib import Path
-
-import pytest
-
-from deepwork.core.adapters import ClaudeAdapter
-from deepwork.core.detector import DetectorError, PlatformDetector
-
-
-class TestPlatformDetector:
-    """Tests for PlatformDetector class."""
-
-    def test_detect_claude_present(self, temp_dir: Path) -> None:
-        """Test detecting Claude when .claude directory exists."""
-        claude_dir = temp_dir / ".claude"
-        claude_dir.mkdir()
-
-        detector = PlatformDetector(temp_dir)
-        adapter = detector.detect_platform("claude")
-
-        assert adapter is not None
-        assert isinstance(adapter, ClaudeAdapter)
-        assert adapter.name == "claude"
-
-    def test_detect_claude_absent(self, temp_dir: Path) -> None:
-        """Test detecting Claude when .claude directory doesn't exist."""
-        detector = PlatformDetector(temp_dir)
-        adapter = detector.detect_platform("claude")
-
-        assert adapter is None
-
-    def test_detect_platform_raises_for_unknown(self, temp_dir: Path) -> None:
-        """Test that detecting unknown platform raises error."""
-        detector = PlatformDetector(temp_dir)
-
-        with pytest.raises(DetectorError, match="Unknown adapter"):
-            detector.detect_platform("unknown")
-
-    def test_detect_all_platforms_empty(self, temp_dir: Path) -> None:
-        """Test detecting all platforms when none are present."""
-        detector = PlatformDetector(temp_dir)
-        adapters = detector.detect_all_platforms()
-
-        assert adapters == []
-
-    def test_detect_all_platforms_claude_present(self, temp_dir: Path) -> None:
-        """Test detecting all platforms when Claude is present."""
-        (temp_dir / ".claude").mkdir()
-
-        detector = PlatformDetector(temp_dir)
-        adapters = detector.detect_all_platforms()
-
-        assert len(adapters) == 1
-        assert adapters[0].name == "claude"
-
-    def test_get_adapter(self, temp_dir: Path) -> None:
-        """Test getting adapter without checking availability."""
-        detector = PlatformDetector(temp_dir)
-        adapter = detector.get_adapter("claude")
-
-        assert isinstance(adapter, ClaudeAdapter)
-        assert adapter.name == "claude"
-        assert adapter.display_name == "Claude Code"
-
-    def test_get_adapter_raises_for_unknown(self, temp_dir: Path) -> None:
-        """Test that getting unknown adapter raises error."""
-        detector = PlatformDetector(temp_dir)
-
-        with pytest.raises(DetectorError, match="Unknown adapter"):
-            detector.get_adapter("unknown")
-
-    def test_list_supported_platforms(self) -> None:
-        """Test listing all supported platforms."""
-        platforms = PlatformDetector.list_supported_platforms()
-
-        assert "claude" in platforms
-        assert len(platforms) >= 1  # At least claude
-
-    def test_detect_ignores_files(self, temp_dir: Path) -> None:
-        """Test that detector ignores files with platform names."""
-        # Create a file instead of directory
-        (temp_dir / ".claude").write_text("not a directory")
-
-        detector = PlatformDetector(temp_dir)
-        adapter = detector.detect_platform("claude")
-
-        assert adapter is None
-
-    def test_detected_adapter_has_project_root(self, temp_dir: Path) -> None:
-        """Test that detected adapter has project_root set."""
-        (temp_dir / ".claude").mkdir()
-
-        detector = PlatformDetector(temp_dir)
-        adapter = detector.detect_platform("claude")
-
-        assert adapter is not None
-        assert adapter.project_root == temp_dir
diff --git a/tests/unit/test_generator.py b/tests/unit/test_generator.py
deleted file mode 100644
index dd90ba30..00000000
--- a/tests/unit/test_generator.py
+++ /dev/null
@@ -1,547 +0,0 @@
-"""Tests for skill generator."""
-
-from pathlib import Path
-
-import pytest
-
-from deepwork.core.adapters import ClaudeAdapter
-from deepwork.core.generator import GeneratorError, SkillGenerator
-from deepwork.core.parser import Step, parse_job_definition
-
-
-class TestSkillGenerator:
-    """Tests for SkillGenerator class."""
-
-    def test_init_default_templates_dir(self) -> None:
-        """Test initialization with default templates directory."""
-        generator = SkillGenerator()
-
-        assert generator.templates_dir.exists()
-        assert (generator.templates_dir / "claude").exists()
-
-    def test_init_custom_templates_dir(self, temp_dir: Path) -> None:
-        """Test initialization with custom templates directory."""
-        templates_dir = temp_dir / "templates"
-        templates_dir.mkdir()
-
-        generator = SkillGenerator(templates_dir)
-
-        assert generator.templates_dir == templates_dir
-
-    def test_init_raises_for_missing_templates_dir(self, temp_dir: Path) -> None:
-        """Test initialization raises error for missing templates directory."""
-        nonexistent = temp_dir / "nonexistent"
-
-        with pytest.raises(GeneratorError, match="Templates directory not found"):
-            SkillGenerator(nonexistent)
-
-    def test_generate_step_skill_simple_job(self, fixtures_dir: Path, temp_dir: Path) -> None:
-        """Test generating skill for simple job step."""
-        job_dir = fixtures_dir / "jobs" / "simple_job"
-        job = parse_job_definition(job_dir)
-
-        generator = SkillGenerator()
-        adapter = ClaudeAdapter()
-
-        skill_path = generator.generate_step_skill(job, job.steps[0], adapter, temp_dir)
-
-        assert skill_path.exists()
-        # Step skills use directory/SKILL.md format
-        assert skill_path.name == "SKILL.md"
-        assert skill_path.parent.name == "simple_job.single_step"
-
-        content = skill_path.read_text()
-        assert "# simple_job.single_step" in content
-        # Single step with no dependencies is treated as standalone
-        assert "Standalone skill" in content
-        assert "input_param" in content
-        assert "output.md" in content
-
-    def test_generate_step_skill_complex_job_first_step(
-        self, fixtures_dir: Path, temp_dir: Path
-    ) -> None:
-        """Test generating skill for first step of complex job."""
-        job_dir = fixtures_dir / "jobs" / "complex_job"
-        job = parse_job_definition(job_dir)
-
-        generator = SkillGenerator()
-        adapter = ClaudeAdapter()
-
-        skill_path = generator.generate_step_skill(job, job.steps[0], adapter, temp_dir)
-
-        content = skill_path.read_text()
-        assert "# competitive_research.identify_competitors" in content
-        assert "Step 1/4" in content
-        assert "market_segment" in content
-        assert "product_category" in content
-        # First step has no prerequisites
-        assert "## Prerequisites" not in content
-        # Has next step
-        assert "/competitive_research.primary_research" in content
-
-    def test_generate_step_skill_complex_job_middle_step(
-        self, fixtures_dir: Path, temp_dir: Path
-    ) -> None:
-        """Test generating skill for middle step with dependencies."""
-        job_dir = fixtures_dir / "jobs" / "complex_job"
-        job = parse_job_definition(job_dir)
-
-        generator = SkillGenerator()
-        adapter = ClaudeAdapter()
-
-        # Generate primary_research (step 2)
-        skill_path = generator.generate_step_skill(job, job.steps[1], adapter, temp_dir)
-
-        content = skill_path.read_text()
-        assert "# competitive_research.primary_research" in content
-        assert "Step 2/4" in content
-        # Has prerequisites
-        assert "## Prerequisites" in content
-        assert "/competitive_research.identify_competitors" in content
-        # Has file input
-        assert "competitors.md" in content
-        assert "from `identify_competitors`" in content
-        # Has next step
-        assert "/competitive_research.secondary_research" in content
-
-    def test_generate_step_skill_complex_job_final_step(
-        self, fixtures_dir: Path, temp_dir: Path
-    ) -> None:
-        """Test generating skill for final step."""
-        job_dir = fixtures_dir / "jobs" / "complex_job"
-        job = parse_job_definition(job_dir)
-
-        generator = SkillGenerator()
-        adapter = ClaudeAdapter()
-
-        # Generate comparative_report (step 4)
-        skill_path = generator.generate_step_skill(job, job.steps[3], adapter, temp_dir)
-
-        content = skill_path.read_text()
-        assert "# competitive_research.comparative_report" in content
-        assert "Step 4/4" in content
-        # Has prerequisites
-        assert "## Prerequisites" in content
-        # Has multiple file inputs
-        assert "primary_research.md" in content
-        assert "secondary_research.md" in content
-        # Final step - no next step
-        assert "**Workflow complete**" in content
-        assert "## Next Step" not in content
-
-    def test_generate_step_skill_raises_for_missing_step(
-        self, fixtures_dir: Path, temp_dir: Path
-    ) -> None:
-        """Test that generating skill for non-existent step raises error."""
-        job_dir = fixtures_dir / "jobs" / "simple_job"
-        job = parse_job_definition(job_dir)
-
-        generator = SkillGenerator()
-        adapter = ClaudeAdapter()
-
-        # Create a fake step not in the job
-
-        fake_step = Step(
-            id="fake",
-            name="Fake",
-            description="Fake",
-            instructions_file="steps/fake.md",
-            outputs=["fake.md"],
-        )
-
-        with pytest.raises(GeneratorError, match="Step 'fake' not found"):
-            generator.generate_step_skill(job, fake_step, adapter, temp_dir)
-
-    def test_generate_step_skill_raises_for_missing_instructions(
-        self, fixtures_dir: Path, temp_dir: Path
-    ) -> None:
-        """Test that missing instructions file raises error."""
-        job_dir = fixtures_dir / "jobs" / "simple_job"
-        job = parse_job_definition(job_dir)
-
-        # Save original instructions file content
-        instructions_file = job_dir / "steps" / "single_step.md"
-        original_content = instructions_file.read_text()
-
-        try:
-            # Delete the instructions file
-            instructions_file.unlink()
-
-            generator = SkillGenerator()
-            adapter = ClaudeAdapter()
-
-            with pytest.raises(GeneratorError, match="instructions file not found"):
-                generator.generate_step_skill(job, job.steps[0], adapter, temp_dir)
-        finally:
-            # Restore the file
-            instructions_file.write_text(original_content)
-
-    def test_generate_all_skills(self, fixtures_dir: Path, temp_dir: Path) -> None:
-        """Test generating skills for all steps in a job (meta + step skills)."""
-        job_dir = fixtures_dir / "jobs" / "complex_job"
-        job = parse_job_definition(job_dir)
-
-        generator = SkillGenerator()
-        adapter = ClaudeAdapter()
-
-        skill_paths = generator.generate_all_skills(job, adapter, temp_dir)
-
-        # Now includes meta-skill plus step skills
-        assert len(skill_paths) == 5  # 1 meta + 4 steps
-        assert all(p.exists() for p in skill_paths)
-
-        # Check directory names - meta-skill first, then step skills
-        # All files are named SKILL.md inside skill directories
-        expected_dirs = [
-            "competitive_research",  # Meta-skill
-            "competitive_research.identify_competitors",  # Step skills
-            "competitive_research.primary_research",
-            "competitive_research.secondary_research",
-            "competitive_research.comparative_report",
-        ]
-        actual_dirs = [p.parent.name for p in skill_paths]
-        assert actual_dirs == expected_dirs
-        assert all(p.name == "SKILL.md" for p in skill_paths)
-
-    def test_generate_meta_skill(self, fixtures_dir: Path, temp_dir: Path) -> None:
-        """Test generating meta-skill for a job."""
-        job_dir = fixtures_dir / "jobs" / "complex_job"
-        job = parse_job_definition(job_dir)
-
-        generator = SkillGenerator()
-        adapter = ClaudeAdapter()
-
-        meta_skill_path = generator.generate_meta_skill(job, adapter, temp_dir)
-
-        assert meta_skill_path.exists()
-        assert meta_skill_path.name == "SKILL.md"
-        assert meta_skill_path.parent.name == "competitive_research"
-
-        content = meta_skill_path.read_text()
-        # Check meta-skill content
-        assert "# competitive_research" in content
-        assert "Available Steps" in content
-        assert "identify_competitors" in content
-        assert "primary_research" in content
-        assert "Skill tool" in content
-
-    def test_generate_step_skill_exposed_step(self, fixtures_dir: Path, temp_dir: Path) -> None:
-        """Test generating skill for exposed step."""
-        job_dir = fixtures_dir / "jobs" / "exposed_step_job"
-        job = parse_job_definition(job_dir)
-
-        generator = SkillGenerator()
-        adapter = ClaudeAdapter()
-
-        # Generate the exposed step (index 1)
-        skill_path = generator.generate_step_skill(job, job.steps[1], adapter, temp_dir)
-
-        assert skill_path.exists()
-        # Uses directory/SKILL.md format whether exposed or not
-        assert skill_path.name == "SKILL.md"
-        assert skill_path.parent.name == "exposed_job.exposed_step"
-
-    def test_generate_all_skills_with_exposed_steps(
-        self, fixtures_dir: Path, temp_dir: Path
-    ) -> None:
-        """Test generating all skills with mix of hidden and exposed steps."""
-        job_dir = fixtures_dir / "jobs" / "exposed_step_job"
-        job = parse_job_definition(job_dir)
-
-        generator = SkillGenerator()
-        adapter = ClaudeAdapter()
-
-        skill_paths = generator.generate_all_skills(job, adapter, temp_dir)
-
-        # Meta-skill + 2 steps
-        assert len(skill_paths) == 3
-        assert all(p.exists() for p in skill_paths)
-
-        # Check directory names - all use directory/SKILL.md format
-        expected_dirs = [
-            "exposed_job",  # Meta-skill
-            "exposed_job.hidden_step",  # Step skill
-            "exposed_job.exposed_step",  # Step skill
-        ]
-        actual_dirs = [p.parent.name for p in skill_paths]
-        assert actual_dirs == expected_dirs
-        assert all(p.name == "SKILL.md" for p in skill_paths)
-
-
-class TestConcurrentStepsGeneration:
-    """Tests for concurrent steps in skill generation."""
-
-    def test_generate_meta_skill_with_concurrent_steps(
-        self, fixtures_dir: Path, temp_dir: Path
-    ) -> None:
-        """Test generating meta-skill for job with concurrent steps."""
-        job_dir = fixtures_dir / "jobs" / "concurrent_steps_job"
-        job = parse_job_definition(job_dir)
-
-        generator = SkillGenerator()
-        adapter = ClaudeAdapter()
-
-        meta_skill_path = generator.generate_meta_skill(job, adapter, temp_dir)
-
-        assert meta_skill_path.exists()
-        content = meta_skill_path.read_text()
-
-        # Check meta-skill content has workflow section
-        assert "# concurrent_workflow" in content
-        assert "full_analysis" in content
-
-        # Check concurrent steps are rendered correctly
-        assert "Concurrent Steps" in content
-        assert "Background Task 1" in content
-        assert "Background Task 2" in content
-        assert "Background Task 3" in content
-        assert "research_web" in content
-        assert "research_docs" in content
-        assert "research_interviews" in content
-
-    def test_meta_skill_context_has_step_entries(self, fixtures_dir: Path, temp_dir: Path) -> None:
-        """Test that meta-skill context includes step_entries with concurrency info."""
-        job_dir = fixtures_dir / "jobs" / "concurrent_steps_job"
-        job = parse_job_definition(job_dir)
-
-        generator = SkillGenerator()
-        adapter = ClaudeAdapter()
-
-        context = generator._build_meta_skill_context(job, adapter)
-
-        assert "workflows" in context
-        assert len(context["workflows"]) == 1
-
-        workflow = context["workflows"][0]
-        assert "step_entries" in workflow
-        assert len(workflow["step_entries"]) == 4
-
-        # Check first entry (sequential)
-        entry1 = workflow["step_entries"][0]
-        assert entry1["is_concurrent"] is False
-        assert entry1["step_ids"] == ["setup"]
-
-        # Check second entry (concurrent)
-        entry2 = workflow["step_entries"][1]
-        assert entry2["is_concurrent"] is True
-        assert entry2["step_ids"] == ["research_web", "research_docs", "research_interviews"]
-        assert "concurrent_steps" in entry2
-        assert len(entry2["concurrent_steps"]) == 3
-        assert entry2["concurrent_steps"][0]["task_number"] == 1
-        assert entry2["concurrent_steps"][0]["id"] == "research_web"
-
-    def test_generate_all_skills_with_concurrent_steps(
-        self, fixtures_dir: Path, temp_dir: Path
-    ) -> None:
-        """Test generating all skills for job with concurrent steps."""
-        job_dir = fixtures_dir / "jobs" / "concurrent_steps_job"
-        job = parse_job_definition(job_dir)
-
-        generator = SkillGenerator()
-        adapter = ClaudeAdapter()
-
-        skill_paths = generator.generate_all_skills(job, adapter, temp_dir)
-
-        # 1 meta-skill + 6 step skills
-        assert len(skill_paths) == 7
-        assert all(p.exists() for p in skill_paths)
-
-        # Check all step skills are generated
-        expected_dirs = [
-            "concurrent_workflow",  # Meta-skill
-            "concurrent_workflow.setup",
-            "concurrent_workflow.research_web",
-            "concurrent_workflow.research_docs",
-            "concurrent_workflow.research_interviews",
-            "concurrent_workflow.compile_results",
-            "concurrent_workflow.final_review",
-        ]
-        actual_dirs = [p.parent.name for p in skill_paths]
-        assert actual_dirs == expected_dirs
-
-
-class TestDocSpecIntegration:
-    """Tests for doc spec integration in skill generation."""
-
-    def test_load_doc_spec_returns_parsed_spec(self, fixtures_dir: Path) -> None:
-        """Test that _load_doc_spec loads and parses doc spec files."""
-        generator = SkillGenerator()
-
-        # Load the valid_report doc spec from fixtures
-        doc_spec = generator._load_doc_spec(fixtures_dir, "doc_specs/valid_report.md")
-
-        assert doc_spec is not None
-        assert doc_spec.name == "Monthly Report"
-        assert doc_spec.description == "A monthly summary report"
-        assert doc_spec.target_audience == "Team leads"
-        assert len(doc_spec.quality_criteria) == 2
-        assert doc_spec.quality_criteria[0].name == "Summary"
-
-    def test_load_doc_spec_caches_result(self, fixtures_dir: Path) -> None:
-        """Test that doc specs are cached after first load."""
-        generator = SkillGenerator()
-
-        # Load same doc spec twice
-        doc_spec1 = generator._load_doc_spec(fixtures_dir, "doc_specs/valid_report.md")
-        doc_spec2 = generator._load_doc_spec(fixtures_dir, "doc_specs/valid_report.md")
-
-        # Should be the same cached instance
-        assert doc_spec1 is doc_spec2
-        # Cache should have exactly one entry
-        assert len(generator._doc_spec_cache) == 1
-
-    def test_load_doc_spec_returns_none_for_missing_file(self, temp_dir: Path) -> None:
-        """Test that _load_doc_spec returns None for non-existent file."""
-        generator = SkillGenerator()
-
-        result = generator._load_doc_spec(temp_dir, "nonexistent.md")
-
-        assert result is None
-
-    def test_load_doc_spec_returns_none_for_invalid_spec(self, temp_dir: Path) -> None:
-        """Test that _load_doc_spec returns None for invalid doc spec file."""
-        generator = SkillGenerator()
-
-        # Create an invalid doc spec file (missing required fields)
-        invalid_spec = temp_dir / "invalid.md"
-        invalid_spec.write_text("""---
-name: "Test"
----
-Body content
-""")
-
-        result = generator._load_doc_spec(temp_dir, "invalid.md")
-
-        assert result is None
-
-    def test_generate_step_skill_with_doc_spec(self, fixtures_dir: Path, temp_dir: Path) -> None:
-        """Test generating skill for step with doc spec-referenced output."""
-        # Set up the directory structure so the doc spec can be found
-        doc_specs_dir = temp_dir / ".deepwork" / "doc_specs"
-        doc_specs_dir.mkdir(parents=True)
-
-        # Copy the valid_report.md fixture to the expected location
-        source_doc_spec = fixtures_dir / "doc_specs" / "valid_report.md"
-        target_doc_spec = doc_specs_dir / "valid_report.md"
-        target_doc_spec.write_text(source_doc_spec.read_text())
-
-        # Parse the job with doc spec
-        job_dir = fixtures_dir / "jobs" / "job_with_doc_spec"
-        job = parse_job_definition(job_dir)
-
-        generator = SkillGenerator()
-        adapter = ClaudeAdapter()
-
-        # Generate skill with project_root set to temp_dir so it finds doc specs
-        skill_path = generator.generate_step_skill(
-            job, job.steps[0], adapter, temp_dir, project_root=temp_dir
-        )
-
-        assert skill_path.exists()
-        content = skill_path.read_text()
-
-        # Verify doc spec info is injected into the skill
-        assert "Doc Spec" in content
-        assert "Monthly Report" in content
-        assert "A monthly summary report" in content
-        assert "Target Audience" in content
-        assert "Team leads" in content
-        assert "Quality Criteria" in content
-        assert "Summary" in content
-        assert "Must include executive summary" in content
-
-    def test_generate_step_skill_without_doc_spec(self, fixtures_dir: Path, temp_dir: Path) -> None:
-        """Test generating skill for step without doc spec reference."""
-        job_dir = fixtures_dir / "jobs" / "simple_job"
-        job = parse_job_definition(job_dir)
-
-        generator = SkillGenerator()
-        adapter = ClaudeAdapter()
-
-        skill_path = generator.generate_step_skill(job, job.steps[0], adapter, temp_dir)
-
-        content = skill_path.read_text()
-        # Should not have doc spec section
-        assert "Doc Spec:" not in content
-
-    def test_generate_step_skill_with_missing_doc_spec_file(
-        self, fixtures_dir: Path, temp_dir: Path
-    ) -> None:
-        """Test generating skill when doc spec file doesn't exist."""
-        # Parse the job with doc spec but don't create the doc spec file
-        job_dir = fixtures_dir / "jobs" / "job_with_doc_spec"
-        job = parse_job_definition(job_dir)
-
-        generator = SkillGenerator()
-        adapter = ClaudeAdapter()
-
-        # Generate skill without the doc spec file present
-        # This should work but not include doc spec info
-        skill_path = generator.generate_step_skill(
-            job, job.steps[0], adapter, temp_dir, project_root=temp_dir
-        )
-
-        assert skill_path.exists()
-        content = skill_path.read_text()
-
-        # Should still generate the skill, just without doc spec details
-        assert "job_with_doc_spec.generate_report" in content
-        # Doc spec section should not appear since file is missing
-        assert "Monthly Report" not in content
-
-    def test_build_step_context_includes_doc_spec_info(
-        self, fixtures_dir: Path, temp_dir: Path
-    ) -> None:
-        """Test that _build_step_context includes doc spec info in outputs."""
-        # Set up the directory structure
-        doc_specs_dir = temp_dir / ".deepwork" / "doc_specs"
-        doc_specs_dir.mkdir(parents=True)
-
-        source_doc_spec = fixtures_dir / "doc_specs" / "valid_report.md"
-        target_doc_spec = doc_specs_dir / "valid_report.md"
-        target_doc_spec.write_text(source_doc_spec.read_text())
-
-        job_dir = fixtures_dir / "jobs" / "job_with_doc_spec"
-        job = parse_job_definition(job_dir)
-
-        generator = SkillGenerator()
-        adapter = ClaudeAdapter()
-
-        context = generator._build_step_context(
-            job, job.steps[0], 0, adapter, project_root=temp_dir
-        )
-
-        # Check outputs context has doc spec info
-        assert "outputs" in context
-        assert len(context["outputs"]) == 1
-
-        output_ctx = context["outputs"][0]
-        assert output_ctx["file"] == "report.md"
-        assert output_ctx["has_doc_spec"] is True
-        assert "doc_spec" in output_ctx
-
-        doc_spec_ctx = output_ctx["doc_spec"]
-        assert doc_spec_ctx["name"] == "Monthly Report"
-        assert doc_spec_ctx["description"] == "A monthly summary report"
-        assert doc_spec_ctx["target_audience"] == "Team leads"
-        assert len(doc_spec_ctx["quality_criteria"]) == 2
-        assert doc_spec_ctx["quality_criteria"][0]["name"] == "Summary"
-        assert "example_document" in doc_spec_ctx
-
-    def test_build_step_context_without_project_root(
-        self, fixtures_dir: Path, temp_dir: Path
-    ) -> None:
-        """Test that _build_step_context handles missing project_root."""
-        job_dir = fixtures_dir / "jobs" / "job_with_doc_spec"
-        job = parse_job_definition(job_dir)
-
-        generator = SkillGenerator()
-        adapter = ClaudeAdapter()
-
-        # Build context without project_root - should still work but no doc spec
-        context = generator._build_step_context(job, job.steps[0], 0, adapter)
-
-        output_ctx = context["outputs"][0]
-        assert output_ctx["has_doc_spec"] is True  # Job still declares it
-        # But doc_spec info won't be loaded since no project_root
-        assert "doc_spec" not in output_ctx
diff --git a/tests/unit/test_hooks_syncer.py b/tests/unit/test_hooks_syncer.py
deleted file mode 100644
index 64cd17ce..00000000
--- a/tests/unit/test_hooks_syncer.py
+++ /dev/null
@@ -1,367 +0,0 @@
-"""Tests for the hooks syncer module."""
-
-import json
-from pathlib import Path
-
-from deepwork.core.adapters import ClaudeAdapter
-from deepwork.core.hooks_syncer import (
-    HookEntry,
-    HookSpec,
-    JobHooks,
-    collect_job_hooks,
-    merge_hooks_for_platform,
-    sync_hooks_to_platform,
-)
-
-
-class TestHookEntry:
-    """Tests for HookEntry dataclass."""
-
-    def test_get_command_for_script(self, temp_dir: Path) -> None:
-        """Test getting command for a script hook."""
-        job_dir = temp_dir / ".deepwork" / "jobs" / "test_job"
-        job_dir.mkdir(parents=True)
-
-        entry = HookEntry(
-            job_name="test_job",
-            job_dir=job_dir,
-            script="test_hook.sh",
-        )
-
-        cmd = entry.get_command(temp_dir)
-        assert cmd == ".deepwork/jobs/test_job/hooks/test_hook.sh"
-
-    def test_get_command_for_module(self, temp_dir: Path) -> None:
-        """Test getting command for a module hook."""
-        job_dir = temp_dir / ".deepwork" / "jobs" / "test_job"
-        job_dir.mkdir(parents=True)
-
-        entry = HookEntry(
-            job_name="test_job",
-            job_dir=job_dir,
-            module="deepwork.hooks.my_hook",
-        )
-
-        cmd = entry.get_command(temp_dir)
-        assert cmd == "deepwork hook my_hook"
-
-
-class TestJobHooks:
-    """Tests for JobHooks dataclass."""
-
-    def test_from_job_dir_with_hooks(self, temp_dir: Path) -> None:
-        """Test loading hooks from job directory."""
-        job_dir = temp_dir / "test_job"
-        hooks_dir = job_dir / "hooks"
-        hooks_dir.mkdir(parents=True)
-
-        # Create global_hooks.yml
-        hooks_file = hooks_dir / "global_hooks.yml"
-        hooks_file.write_text(
-            """
-UserPromptSubmit:
-  - capture.sh
-Stop:
-  - validate.sh
-  - cleanup.sh
-"""
-        )
-
-        result = JobHooks.from_job_dir(job_dir)
-
-        assert result is not None
-        assert result.job_name == "test_job"
-        assert len(result.hooks["UserPromptSubmit"]) == 1
-        assert result.hooks["UserPromptSubmit"][0].script == "capture.sh"
-        assert len(result.hooks["Stop"]) == 2
-        assert result.hooks["Stop"][0].script == "validate.sh"
-        assert result.hooks["Stop"][1].script == "cleanup.sh"
-
-    def test_from_job_dir_with_module_hooks(self, temp_dir: Path) -> None:
-        """Test loading module-based hooks from job directory."""
-        job_dir = temp_dir / "test_job"
-        hooks_dir = job_dir / "hooks"
-        hooks_dir.mkdir(parents=True)
-
-        # Create global_hooks.yml with module format
-        hooks_file = hooks_dir / "global_hooks.yml"
-        hooks_file.write_text(
-            """
-UserPromptSubmit:
-  - capture.sh
-Stop:
-  - module: deepwork.hooks.validate
-"""
-        )
-
-        result = JobHooks.from_job_dir(job_dir)
-
-        assert result is not None
-        assert result.hooks["UserPromptSubmit"][0].script == "capture.sh"
-        assert result.hooks["Stop"][0].module == "deepwork.hooks.validate"
-        assert result.hooks["Stop"][0].script is None
-
-    def test_from_job_dir_no_hooks_file(self, temp_dir: Path) -> None:
-        """Test returns None when no hooks file exists."""
-        job_dir = temp_dir / "test_job"
-        job_dir.mkdir(parents=True)
-
-        result = JobHooks.from_job_dir(job_dir)
-        assert result is None
-
-    def test_from_job_dir_empty_hooks_file(self, temp_dir: Path) -> None:
-        """Test returns None when hooks file is empty."""
-        job_dir = temp_dir / "test_job"
-        hooks_dir = job_dir / "hooks"
-        hooks_dir.mkdir(parents=True)
-
-        hooks_file = hooks_dir / "global_hooks.yml"
-        hooks_file.write_text("")
-
-        result = JobHooks.from_job_dir(job_dir)
-        assert result is None
-
-    def test_from_job_dir_single_script_as_string(self, temp_dir: Path) -> None:
-        """Test parsing single script as string instead of list."""
-        job_dir = temp_dir / "test_job"
-        hooks_dir = job_dir / "hooks"
-        hooks_dir.mkdir(parents=True)
-
-        hooks_file = hooks_dir / "global_hooks.yml"
-        hooks_file.write_text("Stop: cleanup.sh\n")
-
-        result = JobHooks.from_job_dir(job_dir)
-
-        assert result is not None
-        assert len(result.hooks["Stop"]) == 1
-        assert result.hooks["Stop"][0].script == "cleanup.sh"
-
-
-class TestCollectJobHooks:
-    """Tests for collect_job_hooks function."""
-
-    def test_collects_hooks_from_multiple_jobs(self, temp_dir: Path) -> None:
-        """Test collecting hooks from multiple job directories."""
-        jobs_dir = temp_dir / "jobs"
-
-        # Create first job with hooks
-        job1_dir = jobs_dir / "job1"
-        (job1_dir / "hooks").mkdir(parents=True)
-        (job1_dir / "hooks" / "global_hooks.yml").write_text("Stop:\n  - hook1.sh\n")
-
-        # Create second job with hooks
-        job2_dir = jobs_dir / "job2"
-        (job2_dir / "hooks").mkdir(parents=True)
-        (job2_dir / "hooks" / "global_hooks.yml").write_text("Stop:\n  - hook2.sh\n")
-
-        # Create job without hooks
-        job3_dir = jobs_dir / "job3"
-        job3_dir.mkdir(parents=True)
-
-        result = collect_job_hooks(jobs_dir)
-
-        assert len(result) == 2
-        job_names = {jh.job_name for jh in result}
-        assert job_names == {"job1", "job2"}
-
-    def test_returns_empty_for_nonexistent_dir(self, temp_dir: Path) -> None:
-        """Test returns empty list when jobs dir doesn't exist."""
-        jobs_dir = temp_dir / "nonexistent"
-        result = collect_job_hooks(jobs_dir)
-        assert result == []
-
-
-class TestMergeHooksForPlatform:
-    """Tests for merge_hooks_for_platform function."""
-
-    def test_merges_hooks_from_multiple_jobs(self, temp_dir: Path) -> None:
-        """Test merging hooks from multiple jobs."""
-        # Create job directories
-        job1_dir = temp_dir / ".deepwork" / "jobs" / "job1"
-        job2_dir = temp_dir / ".deepwork" / "jobs" / "job2"
-        job1_dir.mkdir(parents=True)
-        job2_dir.mkdir(parents=True)
-
-        job_hooks_list = [
-            JobHooks(
-                job_name="job1",
-                job_dir=job1_dir,
-                hooks={"Stop": [HookSpec(script="hook1.sh")]},
-            ),
-            JobHooks(
-                job_name="job2",
-                job_dir=job2_dir,
-                hooks={
-                    "Stop": [HookSpec(script="hook2.sh")],
-                    "UserPromptSubmit": [HookSpec(script="capture.sh")],
-                },
-            ),
-        ]
-
-        result = merge_hooks_for_platform(job_hooks_list, temp_dir)
-
-        assert "Stop" in result
-        assert "UserPromptSubmit" in result
-        assert len(result["Stop"]) == 2
-        assert len(result["UserPromptSubmit"]) == 1
-
-    def test_avoids_duplicate_hooks(self, temp_dir: Path) -> None:
-        """Test that duplicate hooks are not added."""
-        job_dir = temp_dir / ".deepwork" / "jobs" / "job1"
-        job_dir.mkdir(parents=True)
-
-        # Same hook in same job (shouldn't happen but test anyway)
-        job_hooks_list = [
-            JobHooks(
-                job_name="job1",
-                job_dir=job_dir,
-                hooks={"Stop": [HookSpec(script="hook.sh"), HookSpec(script="hook.sh")]},
-            ),
-        ]
-
-        result = merge_hooks_for_platform(job_hooks_list, temp_dir)
-
-        # Should only have one entry
-        assert len(result["Stop"]) == 1
-
-    def test_duplicates_stop_hooks_to_subagent_stop(self, temp_dir: Path) -> None:
-        """Test that Stop hooks are also registered for SubagentStop event.
-
-        Claude Code has separate Stop and SubagentStop events. When a Stop hook
-        is defined, it should also be registered for SubagentStop so the hook
-        triggers for both the main agent and subagents.
-        """
-        job_dir = temp_dir / ".deepwork" / "jobs" / "job1"
-        job_dir.mkdir(parents=True)
-
-        job_hooks_list = [
-            JobHooks(
-                job_name="job1",
-                job_dir=job_dir,
-                hooks={"Stop": [HookSpec(script="hook.sh")]},
-            ),
-        ]
-
-        result = merge_hooks_for_platform(job_hooks_list, temp_dir)
-
-        # Should have both Stop and SubagentStop events
-        assert "Stop" in result
-        assert "SubagentStop" in result
-        assert len(result["Stop"]) == 1
-        assert len(result["SubagentStop"]) == 1
-
-        # Both should have the same hook command
-        stop_cmd = result["Stop"][0]["hooks"][0]["command"]
-        subagent_stop_cmd = result["SubagentStop"][0]["hooks"][0]["command"]
-        assert stop_cmd == subagent_stop_cmd == ".deepwork/jobs/job1/hooks/hook.sh"
-
-    def test_does_not_duplicate_subagent_stop_if_no_stop(self, temp_dir: Path) -> None:
-        """Test that SubagentStop is not created if there are no Stop hooks."""
-        job_dir = temp_dir / ".deepwork" / "jobs" / "job1"
-        job_dir.mkdir(parents=True)
-
-        job_hooks_list = [
-            JobHooks(
-                job_name="job1",
-                job_dir=job_dir,
-                hooks={"UserPromptSubmit": [HookSpec(script="capture.sh")]},
-            ),
-        ]
-
-        result = merge_hooks_for_platform(job_hooks_list, temp_dir)
-
-        # Should only have UserPromptSubmit, not SubagentStop
-        assert "UserPromptSubmit" in result
-        assert "SubagentStop" not in result
-        assert "Stop" not in result
-
-
-class TestSyncHooksToPlatform:
-    """Tests for sync_hooks_to_platform function using adapters."""
-
-    def test_syncs_hooks_via_adapter(self, temp_dir: Path) -> None:
-        """Test syncing hooks to platform via adapter."""
-        # Create .claude directory
-        (temp_dir / ".claude").mkdir(parents=True)
-
-        adapter = ClaudeAdapter(temp_dir)
-
-        # Create job directories
-        job_dir = temp_dir / ".deepwork" / "jobs" / "test_job"
-        job_dir.mkdir(parents=True)
-
-        job_hooks_list = [
-            JobHooks(
-                job_name="test_job",
-                job_dir=job_dir,
-                hooks={"Stop": [HookSpec(script="test_hook.sh")]},
-            ),
-        ]
-
-        count = sync_hooks_to_platform(temp_dir, adapter, job_hooks_list)
-
-        # Count is 2 because Stop hooks are also registered for SubagentStop
-        assert count == 2
-
-        # Verify settings.json was created
-        settings_file = temp_dir / ".claude" / "settings.json"
-        assert settings_file.exists()
-
-        with open(settings_file) as f:
-            settings = json.load(f)
-
-        assert "hooks" in settings
-        assert "Stop" in settings["hooks"]
-        assert "SubagentStop" in settings["hooks"]
-
-    def test_returns_zero_for_empty_hooks(self, temp_dir: Path) -> None:
-        """Test returns 0 when no hooks to sync."""
-        adapter = ClaudeAdapter(temp_dir)
-
-        count = sync_hooks_to_platform(temp_dir, adapter, [])
-
-        assert count == 0
-
-    def test_merges_with_existing_settings(self, temp_dir: Path) -> None:
-        """Test merging hooks into existing settings.json."""
-        # Create .claude directory with existing settings
-        claude_dir = temp_dir / ".claude"
-        claude_dir.mkdir(parents=True)
-
-        existing_settings = {
-            "version": "1.0",
-            "hooks": {
-                "PreToolUse": [
-                    {"matcher": "", "hooks": [{"type": "command", "command": "existing.sh"}]}
-                ]
-            },
-        }
-        settings_file = claude_dir / "settings.json"
-        with open(settings_file, "w") as f:
-            json.dump(existing_settings, f)
-
-        adapter = ClaudeAdapter(temp_dir)
-
-        job_dir = temp_dir / ".deepwork" / "jobs" / "test_job"
-        job_dir.mkdir(parents=True)
-
-        job_hooks_list = [
-            JobHooks(
-                job_name="test_job",
-                job_dir=job_dir,
-                hooks={"Stop": [HookSpec(script="new_hook.sh")]},
-            ),
-        ]
-
-        sync_hooks_to_platform(temp_dir, adapter, job_hooks_list)
-
-        with open(settings_file) as f:
-            settings = json.load(f)
-
-        # Should preserve existing settings
-        assert settings["version"] == "1.0"
-        assert "PreToolUse" in settings["hooks"]
-
-        # Should add new hooks
-        assert "Stop" in settings["hooks"]
-        assert len(settings["hooks"]["Stop"]) == 1
diff --git a/tests/unit/test_stop_hooks.py b/tests/unit/test_stop_hooks.py
deleted file mode 100644
index 96cdeb5b..00000000
--- a/tests/unit/test_stop_hooks.py
+++ /dev/null
@@ -1,860 +0,0 @@
-"""Tests for stop hook functionality."""
-
-from pathlib import Path
-
-import pytest
-
-from deepwork.core.adapters import ClaudeAdapter
-from deepwork.core.generator import GeneratorError, SkillGenerator
-from deepwork.core.parser import HookAction, JobDefinition, OutputSpec, Step, StopHook
-from deepwork.schemas.job_schema import JOB_SCHEMA
-from deepwork.utils.validation import ValidationError, validate_against_schema
-
-
-class TestStopHook:
-    """Tests for StopHook dataclass."""
-
-    def test_is_prompt(self) -> None:
-        """Test is_prompt returns True for prompt hooks."""
-        hook = StopHook(prompt="Check quality")
-        assert hook.is_prompt() is True
-        assert hook.is_prompt_file() is False
-        assert hook.is_script() is False
-
-    def test_is_prompt_file(self) -> None:
-        """Test is_prompt_file returns True for prompt file hooks."""
-        hook = StopHook(prompt_file="hooks/check.md")
-        assert hook.is_prompt() is False
-        assert hook.is_prompt_file() is True
-        assert hook.is_script() is False
-
-    def test_is_script(self) -> None:
-        """Test is_script returns True for script hooks."""
-        hook = StopHook(script="hooks/validate.sh")
-        assert hook.is_prompt() is False
-        assert hook.is_prompt_file() is False
-        assert hook.is_script() is True
-
-    def test_from_dict_prompt(self) -> None:
-        """Test from_dict creates prompt hook."""
-        data = {"prompt": "Verify all criteria are met"}
-        hook = StopHook.from_dict(data)
-        assert hook.prompt == "Verify all criteria are met"
-        assert hook.prompt_file is None
-        assert hook.script is None
-
-    def test_from_dict_prompt_file(self) -> None:
-        """Test from_dict creates prompt file hook."""
-        data = {"prompt_file": "hooks/quality.md"}
-        hook = StopHook.from_dict(data)
-        assert hook.prompt is None
-        assert hook.prompt_file == "hooks/quality.md"
-        assert hook.script is None
-
-    def test_from_dict_script(self) -> None:
-        """Test from_dict creates script hook."""
-        data = {"script": "hooks/validate.sh"}
-        hook = StopHook.from_dict(data)
-        assert hook.prompt is None
-        assert hook.prompt_file is None
-        assert hook.script == "hooks/validate.sh"
-
-
-class TestStepWithStopHooks:
-    """Tests for Step with stop_hooks."""
-
-    def test_step_with_no_stop_hooks(self) -> None:
-        """Test step without stop hooks."""
-        step = Step(
-            id="test",
-            name="Test Step",
-            description="A test step",
-            instructions_file="steps/test.md",
-            outputs=[OutputSpec(file="output.md")],
-        )
-        assert step.stop_hooks == []
-
-    def test_step_with_single_stop_hook(self) -> None:
-        """Test step with single stop hook (using hooks dict)."""
-        step = Step(
-            id="test",
-            name="Test Step",
-            description="A test step",
-            instructions_file="steps/test.md",
-            outputs=[OutputSpec(file="output.md")],
-            hooks={"after_agent": [HookAction(prompt="Check quality")]},
-        )
-        assert len(step.stop_hooks) == 1
-        assert step.stop_hooks[0].is_prompt()
-        assert step.stop_hooks[0].prompt == "Check quality"
-
-    def test_step_with_multiple_stop_hooks(self) -> None:
-        """Test step with multiple stop hooks (using hooks dict)."""
-        step = Step(
-            id="test",
-            name="Test Step",
-            description="A test step",
-            instructions_file="steps/test.md",
-            outputs=[OutputSpec(file="output.md")],
-            hooks={
-                "after_agent": [
-                    HookAction(prompt="Check criteria 1"),
-                    HookAction(script="hooks/validate.sh"),
-                ]
-            },
-        )
-        assert len(step.stop_hooks) == 2
-        assert step.stop_hooks[0].is_prompt()
-        assert step.stop_hooks[1].is_script()
-
-    def test_step_from_dict_with_stop_hooks(self) -> None:
-        """Test Step.from_dict parses stop_hooks array."""
-        data = {
-            "id": "test",
-            "name": "Test Step",
-            "description": "A test step",
-            "instructions_file": "steps/test.md",
-            "outputs": ["output.md"],
-            "stop_hooks": [
-                {"prompt": "Check quality criteria"},
-                {"script": "hooks/run_tests.sh"},
-            ],
-        }
-        step = Step.from_dict(data)
-        assert len(step.stop_hooks) == 2
-        assert step.stop_hooks[0].prompt == "Check quality criteria"
-        assert step.stop_hooks[1].script == "hooks/run_tests.sh"
-
-    def test_step_from_dict_without_stop_hooks(self) -> None:
-        """Test Step.from_dict with no stop_hooks returns empty list."""
-        data = {
-            "id": "test",
-            "name": "Test Step",
-            "description": "A test step",
-            "instructions_file": "steps/test.md",
-            "outputs": ["output.md"],
-        }
-        step = Step.from_dict(data)
-        assert step.stop_hooks == []
-
-    def test_step_from_dict_with_hooks_structure(self) -> None:
-        """Test Step.from_dict parses new hooks structure with lifecycle events."""
-        data = {
-            "id": "test",
-            "name": "Test Step",
-            "description": "A test step",
-            "instructions_file": "steps/test.md",
-            "outputs": ["output.md"],
-            "hooks": {
-                "after_agent": [
-                    {"prompt": "Check quality"},
-                    {"script": "hooks/validate.sh"},
-                ],
-                "before_tool": [
-                    {"prompt": "Pre-tool check"},
-                ],
-            },
-        }
-        step = Step.from_dict(data)
-        # stop_hooks property returns after_agent hooks
-        assert len(step.stop_hooks) == 2
-        assert step.stop_hooks[0].prompt == "Check quality"
-        assert step.stop_hooks[1].script == "hooks/validate.sh"
-        # Check full hooks dict
-        assert "after_agent" in step.hooks
-        assert "before_tool" in step.hooks
-        assert len(step.hooks["after_agent"]) == 2
-        assert len(step.hooks["before_tool"]) == 1
-
-
-class TestSchemaValidation:
-    """Tests for stop_hooks schema validation."""
-
-    def test_valid_prompt_stop_hook(self) -> None:
-        """Test schema accepts valid prompt stop hook."""
-        job_data = {
-            "name": "test_job",
-            "version": "1.0.0",
-            "summary": "Test job",
-            "steps": [
-                {
-                    "id": "step1",
-                    "name": "Step 1",
-                    "description": "A step",
-                    "instructions_file": "steps/step1.md",
-                    "outputs": ["output.md"],
-                    "stop_hooks": [{"prompt": "Check quality"}],
-                }
-            ],
-        }
-        # Should not raise
-        validate_against_schema(job_data, JOB_SCHEMA)
-
-    def test_valid_script_stop_hook(self) -> None:
-        """Test schema accepts valid script stop hook."""
-        job_data = {
-            "name": "test_job",
-            "version": "1.0.0",
-            "summary": "Test job",
-            "steps": [
-                {
-                    "id": "step1",
-                    "name": "Step 1",
-                    "description": "A step",
-                    "instructions_file": "steps/step1.md",
-                    "outputs": ["output.md"],
-                    "stop_hooks": [{"script": "hooks/validate.sh"}],
-                }
-            ],
-        }
-        validate_against_schema(job_data, JOB_SCHEMA)
-
-    def test_valid_prompt_file_stop_hook(self) -> None:
-        """Test schema accepts valid prompt_file stop hook."""
-        job_data = {
-            "name": "test_job",
-            "version": "1.0.0",
-            "summary": "Test job",
-            "steps": [
-                {
-                    "id": "step1",
-                    "name": "Step 1",
-                    "description": "A step",
-                    "instructions_file": "steps/step1.md",
-                    "outputs": ["output.md"],
-                    "stop_hooks": [{"prompt_file": "hooks/quality.md"}],
-                }
-            ],
-        }
-        validate_against_schema(job_data, JOB_SCHEMA)
-
-    def test_valid_multiple_stop_hooks(self) -> None:
-        """Test schema accepts multiple stop hooks."""
-        job_data = {
-            "name": "test_job",
-            "version": "1.0.0",
-            "summary": "Test job",
-            "steps": [
-                {
-                    "id": "step1",
-                    "name": "Step 1",
-                    "description": "A step",
-                    "instructions_file": "steps/step1.md",
-                    "outputs": ["output.md"],
-                    "stop_hooks": [
-                        {"prompt": "Check quality"},
-                        {"script": "hooks/tests.sh"},
-                    ],
-                }
-            ],
-        }
-        validate_against_schema(job_data, JOB_SCHEMA)
-
-    def test_invalid_stop_hook_missing_type(self) -> None:
-        """Test schema rejects stop hook without type."""
-        job_data = {
-            "name": "test_job",
-            "version": "1.0.0",
-            "summary": "Test job",
-            "steps": [
-                {
-                    "id": "step1",
-                    "name": "Step 1",
-                    "description": "A step",
-                    "instructions_file": "steps/step1.md",
-                    "outputs": ["output.md"],
-                    "stop_hooks": [{}],  # Empty object
-                }
-            ],
-        }
-        with pytest.raises(ValidationError):
-            validate_against_schema(job_data, JOB_SCHEMA)
-
-    def test_invalid_stop_hook_extra_fields(self) -> None:
-        """Test schema rejects stop hook with extra fields."""
-        job_data = {
-            "name": "test_job",
-            "version": "1.0.0",
-            "summary": "Test job",
-            "steps": [
-                {
-                    "id": "step1",
-                    "name": "Step 1",
-                    "description": "A step",
-                    "instructions_file": "steps/step1.md",
-                    "outputs": ["output.md"],
-                    "stop_hooks": [{"prompt": "Check", "extra": "field"}],
-                }
-            ],
-        }
-        with pytest.raises(ValidationError):
-            validate_against_schema(job_data, JOB_SCHEMA)
-
-    def test_valid_hooks_with_after_agent(self) -> None:
-        """Test schema accepts new hooks structure with after_agent event."""
-        job_data = {
-            "name": "test_job",
-            "version": "1.0.0",
-            "summary": "Test job",
-            "steps": [
-                {
-                    "id": "step1",
-                    "name": "Step 1",
-                    "description": "A step",
-                    "instructions_file": "steps/step1.md",
-                    "outputs": ["output.md"],
-                    "hooks": {
-                        "after_agent": [{"prompt": "Check quality"}],
-                    },
-                }
-            ],
-        }
-        validate_against_schema(job_data, JOB_SCHEMA)
-
-    def test_valid_hooks_with_multiple_events(self) -> None:
-        """Test schema accepts hooks with multiple lifecycle events."""
-        job_data = {
-            "name": "test_job",
-            "version": "1.0.0",
-            "summary": "Test job",
-            "steps": [
-                {
-                    "id": "step1",
-                    "name": "Step 1",
-                    "description": "A step",
-                    "instructions_file": "steps/step1.md",
-                    "outputs": ["output.md"],
-                    "hooks": {
-                        "after_agent": [{"prompt": "Check quality"}],
-                        "before_tool": [{"script": "hooks/validate.sh"}],
-                        "before_prompt": [{"prompt": "Initialize context"}],
-                    },
-                }
-            ],
-        }
-        validate_against_schema(job_data, JOB_SCHEMA)
-
-    def test_valid_hooks_with_script_action(self) -> None:
-        """Test schema accepts hooks with script action."""
-        job_data = {
-            "name": "test_job",
-            "version": "1.0.0",
-            "summary": "Test job",
-            "steps": [
-                {
-                    "id": "step1",
-                    "name": "Step 1",
-                    "description": "A step",
-                    "instructions_file": "steps/step1.md",
-                    "outputs": ["output.md"],
-                    "hooks": {
-                        "before_tool": [{"script": "hooks/check.sh"}],
-                    },
-                }
-            ],
-        }
-        validate_against_schema(job_data, JOB_SCHEMA)
-
-
-class TestGeneratorStopHooks:
-    """Tests for generator stop hooks context building."""
-
-    @pytest.fixture
-    def generator(self, tmp_path: Path) -> SkillGenerator:
-        """Create generator with temp templates."""
-        templates_dir = tmp_path / "templates"
-        claude_dir = templates_dir / "claude"
-        claude_dir.mkdir(parents=True)
-
-        # Create minimal template
-        template_content = """---
-description: {{ step_description }}
-{% if stop_hooks %}
-hooks:
-  Stop:
-    - hooks:
-{% for hook in stop_hooks %}
-{% if hook.type == "script" %}
-        - type: command
-          command: ".deepwork/jobs/{{ job_name }}/{{ hook.path }}"
-{% else %}
-        - type: prompt
-          prompt: "{{ hook.content }}"
-{% endif %}
-{% endfor %}
-{% endif %}
----
-# {{ job_name }}.{{ step_id }}
-{{ instructions_content }}
-"""
-        (claude_dir / "skill-job-step.md.jinja").write_text(template_content)
-        return SkillGenerator(templates_dir)
-
-    @pytest.fixture
-    def job_with_hooks(self, tmp_path: Path) -> JobDefinition:
-        """Create job with stop hooks."""
-        job_dir = tmp_path / "test_job"
-        job_dir.mkdir()
-        steps_dir = job_dir / "steps"
-        steps_dir.mkdir()
-        (steps_dir / "step1.md").write_text("# Step 1 Instructions")
-
-        return JobDefinition(
-            name="test_job",
-            version="1.0.0",
-            summary="Test job",
-            description="A test job",
-            steps=[
-                Step(
-                    id="step1",
-                    name="Step 1",
-                    description="First step",
-                    instructions_file="steps/step1.md",
-                    outputs=[OutputSpec(file="output.md")],
-                    hooks={
-                        "after_agent": [HookAction(prompt="Verify quality criteria")],
-                    },
-                ),
-            ],
-            job_dir=job_dir,
-        )
-
-    @pytest.fixture
-    def job_with_script_hook(self, tmp_path: Path) -> JobDefinition:
-        """Create job with script stop hook."""
-        job_dir = tmp_path / "test_job"
-        job_dir.mkdir()
-        steps_dir = job_dir / "steps"
-        steps_dir.mkdir()
-        (steps_dir / "step1.md").write_text("# Step 1 Instructions")
-
-        return JobDefinition(
-            name="test_job",
-            version="1.0.0",
-            summary="Test job",
-            description="A test job",
-            steps=[
-                Step(
-                    id="step1",
-                    name="Step 1",
-                    description="First step",
-                    instructions_file="steps/step1.md",
-                    outputs=[OutputSpec(file="output.md")],
-                    hooks={
-                        "after_agent": [HookAction(script="hooks/validate.sh")],
-                    },
-                ),
-            ],
-            job_dir=job_dir,
-        )
-
-    @pytest.fixture
-    def job_with_prompt_file_hook(self, tmp_path: Path) -> JobDefinition:
-        """Create job with prompt file stop hook."""
-        job_dir = tmp_path / "test_job"
-        job_dir.mkdir()
-        steps_dir = job_dir / "steps"
-        steps_dir.mkdir()
-        hooks_dir = job_dir / "hooks"
-        hooks_dir.mkdir()
-        (steps_dir / "step1.md").write_text("# Step 1 Instructions")
-        (hooks_dir / "quality.md").write_text("Check all quality criteria")
-
-        return JobDefinition(
-            name="test_job",
-            version="1.0.0",
-            summary="Test job",
-            description="A test job",
-            steps=[
-                Step(
-                    id="step1",
-                    name="Step 1",
-                    description="First step",
-                    instructions_file="steps/step1.md",
-                    outputs=[OutputSpec(file="output.md")],
-                    hooks={
-                        "after_agent": [HookAction(prompt_file="hooks/quality.md")],
-                    },
-                ),
-            ],
-            job_dir=job_dir,
-        )
-
-    def test_build_context_with_prompt_hook(
-        self, generator: SkillGenerator, job_with_hooks: JobDefinition
-    ) -> None:
-        """Test context building includes prompt stop hook."""
-        adapter = ClaudeAdapter()
-        context = generator._build_step_context(job_with_hooks, job_with_hooks.steps[0], 0, adapter)
-        assert "stop_hooks" in context
-        assert len(context["stop_hooks"]) == 1
-        assert context["stop_hooks"][0]["type"] == "prompt"
-        assert context["stop_hooks"][0]["content"] == "Verify quality criteria"
-
-    def test_build_context_with_script_hook(
-        self, generator: SkillGenerator, job_with_script_hook: JobDefinition
-    ) -> None:
-        """Test context building includes script stop hook."""
-        adapter = ClaudeAdapter()
-        context = generator._build_step_context(
-            job_with_script_hook, job_with_script_hook.steps[0], 0, adapter
-        )
-        assert "stop_hooks" in context
-        assert len(context["stop_hooks"]) == 1
-        assert context["stop_hooks"][0]["type"] == "script"
-        assert context["stop_hooks"][0]["path"] == "hooks/validate.sh"
-
-    def test_build_context_with_prompt_file_hook(
-        self, generator: SkillGenerator, job_with_prompt_file_hook: JobDefinition
-    ) -> None:
-        """Test context building reads prompt file content."""
-        adapter = ClaudeAdapter()
-        context = generator._build_step_context(
-            job_with_prompt_file_hook, job_with_prompt_file_hook.steps[0], 0, adapter
-        )
-        assert "stop_hooks" in context
-        assert len(context["stop_hooks"]) == 1
-        assert context["stop_hooks"][0]["type"] == "prompt_file"
-        assert context["stop_hooks"][0]["content"] == "Check all quality criteria"
-
-    def test_build_context_with_missing_prompt_file(
-        self, generator: SkillGenerator, tmp_path: Path
-    ) -> None:
-        """Test error when prompt file is missing."""
-        job_dir = tmp_path / "test_job"
-        job_dir.mkdir()
-        steps_dir = job_dir / "steps"
-        steps_dir.mkdir()
-        (steps_dir / "step1.md").write_text("# Step 1")
-
-        job = JobDefinition(
-            name="test_job",
-            version="1.0.0",
-            summary="Test",
-            description="Test",
-            steps=[
-                Step(
-                    id="step1",
-                    name="Step 1",
-                    description="Step",
-                    instructions_file="steps/step1.md",
-                    outputs=[OutputSpec(file="out.md")],
-                    hooks={
-                        "after_agent": [HookAction(prompt_file="missing.md")],
-                    },
-                )
-            ],
-            job_dir=job_dir,
-        )
-
-        adapter = ClaudeAdapter()
-        with pytest.raises(GeneratorError, match="prompt file not found"):
-            generator._build_step_context(job, job.steps[0], 0, adapter)
-
-    def test_build_context_no_hooks(self, generator: SkillGenerator, tmp_path: Path) -> None:
-        """Test context with no stop hooks."""
-        job_dir = tmp_path / "test_job"
-        job_dir.mkdir()
-        steps_dir = job_dir / "steps"
-        steps_dir.mkdir()
-        (steps_dir / "step1.md").write_text("# Step 1")
-
-        job = JobDefinition(
-            name="test_job",
-            version="1.0.0",
-            summary="Test",
-            description="Test",
-            steps=[
-                Step(
-                    id="step1",
-                    name="Step 1",
-                    description="Step",
-                    instructions_file="steps/step1.md",
-                    outputs=[OutputSpec(file="out.md")],
-                )
-            ],
-            job_dir=job_dir,
-        )
-
-        adapter = ClaudeAdapter()
-        context = generator._build_step_context(job, job.steps[0], 0, adapter)
-        assert context["stop_hooks"] == []
-
-    def test_build_context_multiple_hooks(self, generator: SkillGenerator, tmp_path: Path) -> None:
-        """Test context with multiple stop hooks."""
-        job_dir = tmp_path / "test_job"
-        job_dir.mkdir()
-        steps_dir = job_dir / "steps"
-        steps_dir.mkdir()
-        (steps_dir / "step1.md").write_text("# Step 1")
-
-        job = JobDefinition(
-            name="test_job",
-            version="1.0.0",
-            summary="Test",
-            description="Test",
-            steps=[
-                Step(
-                    id="step1",
-                    name="Step 1",
-                    description="Step",
-                    instructions_file="steps/step1.md",
-                    outputs=[OutputSpec(file="out.md")],
-                    hooks={
-                        "after_agent": [
-                            HookAction(prompt="Check criteria 1"),
-                            HookAction(script="hooks/test.sh"),
-                            HookAction(prompt="Check criteria 2"),
-                        ],
-                    },
-                )
-            ],
-            job_dir=job_dir,
-        )
-
-        adapter = ClaudeAdapter()
-        context = generator._build_step_context(job, job.steps[0], 0, adapter)
-        assert len(context["stop_hooks"]) == 3
-        assert context["stop_hooks"][0]["type"] == "prompt"
-        assert context["stop_hooks"][1]["type"] == "script"
-        assert context["stop_hooks"][2]["type"] == "prompt"
-
-    def test_build_context_duplicates_stop_to_subagent_stop(
-        self, generator: SkillGenerator, job_with_hooks: JobDefinition
-    ) -> None:
-        """Test that Stop hooks are also registered for SubagentStop event.
-
-        Claude Code has separate Stop and SubagentStop events. When a Stop hook
-        is defined, it should also be registered for SubagentStop so the hook
-        triggers for both the main agent and subagents.
-        """
-        adapter = ClaudeAdapter()
-        context = generator._build_step_context(job_with_hooks, job_with_hooks.steps[0], 0, adapter)
-
-        # Should have both Stop and SubagentStop in hooks dict
-        assert "hooks" in context
-        assert "Stop" in context["hooks"]
-        assert "SubagentStop" in context["hooks"]
-
-        # Both should have the same hooks
-        assert context["hooks"]["Stop"] == context["hooks"]["SubagentStop"]
-        assert len(context["hooks"]["Stop"]) == 1
-        assert context["hooks"]["Stop"][0]["type"] == "prompt"
-
-    def test_build_context_no_subagent_stop_without_stop(
-        self, generator: SkillGenerator, tmp_path: Path
-    ) -> None:
-        """Test that SubagentStop is not created if there are no Stop hooks."""
-        job_dir = tmp_path / "test_job"
-        job_dir.mkdir()
-        steps_dir = job_dir / "steps"
-        steps_dir.mkdir()
-        (steps_dir / "step1.md").write_text("# Step 1")
-
-        job = JobDefinition(
-            name="test_job",
-            version="1.0.0",
-            summary="Test",
-            description="Test",
-            steps=[
-                Step(
-                    id="step1",
-                    name="Step 1",
-                    description="Step",
-                    instructions_file="steps/step1.md",
-                    outputs=[OutputSpec(file="out.md")],
-                )
-            ],
-            job_dir=job_dir,
-        )
-
-        adapter = ClaudeAdapter()
-        context = generator._build_step_context(job, job.steps[0], 0, adapter)
-
-        # Should not have Stop or SubagentStop without any hooks
-        assert "hooks" in context
-        assert "Stop" not in context["hooks"]
-        assert "SubagentStop" not in context["hooks"]
-
-
-class TestGeneratorTemplateOutput:
-    """Tests for generated skill file output."""
-
-    @pytest.fixture
-    def full_generator(self) -> SkillGenerator:
-        """Create generator using actual package templates."""
-        # Use the actual templates directory from the package
-        templates_dir = Path(__file__).parent.parent.parent / "src" / "deepwork" / "templates"
-        return SkillGenerator(templates_dir)
-
-    @pytest.fixture
-    def job_with_quality_criteria(self, tmp_path: Path) -> JobDefinition:
-        """Create job with quality_criteria for testing template output."""
-        job_dir = tmp_path / "test_job"
-        job_dir.mkdir()
-        steps_dir = job_dir / "steps"
-        steps_dir.mkdir()
-        (steps_dir / "step1.md").write_text("# Step 1 Instructions\n\nDo the thing.")
-
-        return JobDefinition(
-            name="test_job",
-            version="1.0.0",
-            summary="Test job",
-            description="A test job",
-            steps=[
-                Step(
-                    id="step1",
-                    name="Step 1",
-                    description="First step",
-                    instructions_file="steps/step1.md",
-                    outputs=[OutputSpec(file="output.md")],
-                    quality_criteria=["Criterion 1 is met", "Criterion 2 is verified"],
-                ),
-            ],
-            job_dir=job_dir,
-        )
-
-    @pytest.fixture
-    def job_with_stop_hooks(self, tmp_path: Path) -> JobDefinition:
-        """Create job with custom stop hooks for testing template output."""
-        job_dir = tmp_path / "test_job"
-        job_dir.mkdir()
-        steps_dir = job_dir / "steps"
-        steps_dir.mkdir()
-        (steps_dir / "step1.md").write_text("# Step 1 Instructions")
-
-        return JobDefinition(
-            name="test_job",
-            version="1.0.0",
-            summary="Test job",
-            description="A test job",
-            steps=[
-                Step(
-                    id="step1",
-                    name="Step 1",
-                    description="First step",
-                    instructions_file="steps/step1.md",
-                    outputs=[OutputSpec(file="output.md")],
-                    hooks={
-                        "after_agent": [HookAction(prompt="Custom validation prompt")],
-                    },
-                ),
-            ],
-            job_dir=job_dir,
-        )
-
-    def test_template_generates_subagent_review_for_quality_criteria(
-        self,
-        full_generator: SkillGenerator,
-        job_with_quality_criteria: JobDefinition,
-        tmp_path: Path,
-    ) -> None:
-        """Test that template generates sub-agent review instructions for quality_criteria.
-
-        NOTE: Prompt-based stop hooks don't work in Claude Code (issue #20221).
-        Instead, quality_criteria generates sub-agent review instructions in content.
-        """
-        adapter = ClaudeAdapter()
-        skill_path = full_generator.generate_step_skill(
-            job_with_quality_criteria,
-            job_with_quality_criteria.steps[0],
-            adapter,
-            tmp_path,
-        )
-
-        content = skill_path.read_text()
-
-        # Should NOT generate Stop/SubagentStop hooks (prompt hooks disabled)
-        assert "Stop:" not in content, "Prompt-based Stop hooks should not be generated"
-        assert "SubagentStop:" not in content, (
-            "Prompt-based SubagentStop hooks should not be generated"
-        )
-
-        # Should generate sub-agent review instructions in content
-        assert "## Quality Validation" in content, "Quality Validation section should be generated"
-        assert "sub-agent" in content.lower(), "Sub-agent review instructions should be present"
-        assert "Criterion 1 is met" in content, "Quality criteria should be in content"
-        assert "Criterion 2 is verified" in content, "Quality criteria should be in content"
-
-    def test_template_does_not_generate_prompt_hooks(
-        self, full_generator: SkillGenerator, job_with_stop_hooks: JobDefinition, tmp_path: Path
-    ) -> None:
-        """Test that template does NOT generate prompt-based stop hooks.
-
-        NOTE: Prompt-based stop hooks don't work in Claude Code (issue #20221).
-        The template should filter out prompt hooks and not generate them.
-        """
-        adapter = ClaudeAdapter()
-        skill_path = full_generator.generate_step_skill(
-            job_with_stop_hooks,
-            job_with_stop_hooks.steps[0],
-            adapter,
-            tmp_path,
-        )
-
-        content = skill_path.read_text()
-
-        # Should NOT generate Stop/SubagentStop hooks for prompt-type hooks
-        assert "Stop:" not in content, "Prompt-based Stop hooks should not be generated"
-        assert "SubagentStop:" not in content, (
-            "Prompt-based SubagentStop hooks should not be generated"
-        )
-
-        # The prompt content should NOT appear in the hooks section
-        assert "Custom validation prompt" not in content, (
-            "Prompt content should not be in generated skill"
-        )
-
-    @pytest.fixture
-    def job_with_script_hooks(self, tmp_path: Path) -> JobDefinition:
-        """Create job with script-type stop hooks for testing template output."""
-        job_dir = tmp_path / "test_job"
-        job_dir.mkdir()
-        steps_dir = job_dir / "steps"
-        steps_dir.mkdir()
-        (steps_dir / "step1.md").write_text("# Step 1 Instructions")
-
-        return JobDefinition(
-            name="test_job",
-            version="1.0.0",
-            summary="Test job",
-            description="A test job",
-            steps=[
-                Step(
-                    id="step1",
-                    name="Step 1",
-                    description="First step",
-                    instructions_file="steps/step1.md",
-                    outputs=[OutputSpec(file="output.md")],
-                    hooks={
-                        "after_agent": [HookAction(script="hooks/validate.sh")],
-                    },
-                ),
-            ],
-            job_dir=job_dir,
-        )
-
-    def test_template_generates_stop_hooks_for_script_type(
-        self, full_generator: SkillGenerator, job_with_script_hooks: JobDefinition, tmp_path: Path
-    ) -> None:
-        """Test that template generates Stop/SubagentStop hooks for script-type hooks.
-
-        Script-type hooks (type: command) still work in Claude Code, so they should be generated.
-        """
-        adapter = ClaudeAdapter()
-        skill_path = full_generator.generate_step_skill(
-            job_with_script_hooks,
-            job_with_script_hooks.steps[0],
-            adapter,
-            tmp_path,
-        )
-
-        content = skill_path.read_text()
-
-        # Should generate Stop and SubagentStop hooks for script-type hooks
-        assert "Stop:" in content, "Script-based Stop hooks should be generated"
-        assert "SubagentStop:" in content, "Script-based SubagentStop hooks should be generated"
-
-        # Should contain the command type and path
-        assert "type: command" in content, "Hook should have type: command"
-        assert "hooks/validate.sh" in content, "Hook path should be in generated skill"
diff --git a/uv.lock b/uv.lock
index ab3885ac..1cb49f69 100644
--- a/uv.lock
+++ b/uv.lock
@@ -2,6 +2,15 @@ version = 1
 revision = 3
 requires-python = ">=3.11"
 
+[[package]]
+name = "aiofiles"
+version = "25.1.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/41/c3/534eac40372d8ee36ef40df62ec129bee4fdb5ad9706e58a29be53b2c970/aiofiles-25.1.0.tar.gz", hash = "sha256:a8d728f0a29de45dc521f18f07297428d56992a742f0cd2701ba86e44d23d5b2", size = 46354, upload-time = "2025-10-09T20:51:04.358Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/bc/8a/340a1555ae33d7354dbca4faa54948d76d89a27ceef032c8c3bc661d003e/aiofiles-25.1.0-py3-none-any.whl", hash = "sha256:abe311e527c862958650f9438e859c1fa7568a141b22abcd015e120e86a85695", size = 14668, upload-time = "2025-10-09T20:51:03.174Z" },
+]
+
 [[package]]
 name = "annotated-types"
 version = "0.7.0"
@@ -447,6 +456,7 @@ name = "deepwork"
 version = "0.5.1"
 source = { editable = "." }
 dependencies = [
+    { name = "aiofiles" },
     { name = "click" },
     { name = "fastmcp" },
     { name = "gitpython" },
@@ -462,14 +472,17 @@ dependencies = [
 dev = [
     { name = "mypy" },
     { name = "pytest" },
+    { name = "pytest-asyncio" },
     { name = "pytest-cov" },
     { name = "pytest-mock" },
     { name = "ruff" },
+    { name = "types-aiofiles" },
     { name = "types-pyyaml" },
 ]
 
 [package.metadata]
 requires-dist = [
+    { name = "aiofiles", specifier = ">=24.0.0" },
     { name = "click", specifier = ">=8.1.0" },
     { name = "fastmcp", specifier = ">=2.0" },
     { name = "gitpython", specifier = ">=3.1.0" },
@@ -479,11 +492,13 @@ requires-dist = [
     { name = "mypy", marker = "extra == 'dev'", specifier = ">=1.0" },
     { name = "pydantic", specifier = ">=2.0" },
     { name = "pytest", marker = "extra == 'dev'", specifier = ">=7.0" },
+    { name = "pytest-asyncio", marker = "extra == 'dev'", specifier = ">=0.24.0" },
     { name = "pytest-cov", marker = "extra == 'dev'", specifier = ">=4.0" },
     { name = "pytest-mock", marker = "extra == 'dev'", specifier = ">=3.10" },
     { name = "pyyaml", specifier = ">=6.0" },
     { name = "rich", specifier = ">=13.0.0" },
     { name = "ruff", marker = "extra == 'dev'", specifier = ">=0.1.0" },
+    { name = "types-aiofiles", marker = "extra == 'dev'" },
     { name = "types-pyyaml", marker = "extra == 'dev'" },
 ]
 provides-extras = ["dev"]
@@ -1464,6 +1479,19 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/3b/ab/b3226f0bd7cdcf710fbede2b3548584366da3b19b5021e74f5bde2a8fa3f/pytest-9.0.2-py3-none-any.whl", hash = "sha256:711ffd45bf766d5264d487b917733b453d917afd2b0ad65223959f59089f875b", size = 374801, upload-time = "2025-12-06T21:30:49.154Z" },
 ]
 
+[[package]]
+name = "pytest-asyncio"
+version = "1.3.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "pytest" },
+    { name = "typing-extensions", marker = "python_full_version < '3.13'" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/90/2c/8af215c0f776415f3590cac4f9086ccefd6fd463befeae41cd4d3f193e5a/pytest_asyncio-1.3.0.tar.gz", hash = "sha256:d7f52f36d231b80ee124cd216ffb19369aa168fc10095013c6b014a34d3ee9e5", size = 50087, upload-time = "2025-11-10T16:07:47.256Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/e5/35/f8b19922b6a25bc0880171a2f1a003eaeb93657475193ab516fd87cac9da/pytest_asyncio-1.3.0-py3-none-any.whl", hash = "sha256:611e26147c7f77640e6d0a92a38ed17c3e9848063698d5c93d5aa7aa11cebff5", size = 15075, upload-time = "2025-11-10T16:07:45.537Z" },
+]
+
 [[package]]
 name = "pytest-cov"
 version = "7.0.0"
@@ -1961,6 +1989,15 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/a0/1d/d9257dd49ff2ca23ea5f132edf1281a0c4f9de8a762b9ae399b670a59235/typer-0.21.1-py3-none-any.whl", hash = "sha256:7985e89081c636b88d172c2ee0cfe33c253160994d47bdfdc302defd7d1f1d01", size = 47381, upload-time = "2026-01-06T11:21:09.824Z" },
 ]
 
+[[package]]
+name = "types-aiofiles"
+version = "25.1.0.20251011"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/84/6c/6d23908a8217e36704aa9c79d99a620f2fdd388b66a4b7f72fbc6b6ff6c6/types_aiofiles-25.1.0.20251011.tar.gz", hash = "sha256:1c2b8ab260cb3cd40c15f9d10efdc05a6e1e6b02899304d80dfa0410e028d3ff", size = 14535, upload-time = "2025-10-11T02:44:51.237Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/71/0f/76917bab27e270bb6c32addd5968d69e558e5b6f7fb4ac4cbfa282996a96/types_aiofiles-25.1.0.20251011-py3-none-any.whl", hash = "sha256:8ff8de7f9d42739d8f0dadcceeb781ce27cd8d8c4152d4a7c52f6b20edb8149c", size = 14338, upload-time = "2025-10-11T02:44:50.054Z" },
+]
+
 [[package]]
 name = "types-pyyaml"
 version = "6.0.12.20250915"

From 842ccd5da120ca25bb6a4045b704a30e6ef125e5 Mon Sep 17 00:00:00 2001
From: Noah Horton <noah@unsupervised.com>
Date: Wed, 4 Feb 2026 11:46:42 -0700
Subject: [PATCH 10/45] feat: Add stack-based nested workflow support and
 abort_workflow tool

- StateManager now uses a session stack instead of single active session
- Starting a workflow while one is active pushes onto the stack
- Completing a workflow pops from stack and resumes parent
- Added abort_workflow tool with explanation parameter
- All tool responses include stack field [{workflow, step}, ...]
- Added logging to all MCP tool calls with stack info
- Updated server instructions to document nesting and abort

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
---
 src/deepwork/mcp/schemas.py            |  45 ++++++-
 src/deepwork/mcp/server.py             |  71 ++++++++++-
 src/deepwork/mcp/state.py              | 100 ++++++++++++---
 src/deepwork/mcp/tools.py              |  39 +++++-
 tests/unit/mcp/test_async_interface.py |  11 ++
 tests/unit/mcp/test_state.py           | 166 +++++++++++++++++++++++--
 6 files changed, 404 insertions(+), 28 deletions(-)

diff --git a/src/deepwork/mcp/schemas.py b/src/deepwork/mcp/schemas.py
index 069a9c24..66188927 100644
--- a/src/deepwork/mcp/schemas.py
+++ b/src/deepwork/mcp/schemas.py
@@ -105,6 +105,14 @@ class FinishedStepInput(BaseModel):
     )
 
 
+class AbortWorkflowInput(BaseModel):
+    """Input for abort_workflow tool."""
+
+    explanation: str = Field(
+        description="Explanation of why the workflow is being aborted"
+    )
+
+
 # =============================================================================
 # Quality Gate Models
 # =============================================================================
@@ -154,10 +162,20 @@ class GetWorkflowsResponse(BaseModel):
     jobs: list[JobInfo] = Field(description="List of all jobs with their workflows")
 
 
+class StackEntry(BaseModel):
+    """An entry in the workflow stack."""
+
+    workflow: str = Field(description="Workflow identifier (job_name/workflow_name)")
+    step: str = Field(description="Current step ID in this workflow")
+
+
 class StartWorkflowResponse(BaseModel):
     """Response from start_workflow tool."""
 
     begin_step: ActiveStepInfo = Field(description="Information about the first step to begin")
+    stack: list[StackEntry] = Field(
+        default_factory=list, description="Current workflow stack after starting"
+    )
 
 
 class FinishedStepResponse(BaseModel):
@@ -184,6 +202,28 @@ class FinishedStepResponse(BaseModel):
         default=None, description="All outputs from all steps"
     )
 
+    # Stack info (included in all responses)
+    stack: list[StackEntry] = Field(
+        default_factory=list, description="Current workflow stack after this operation"
+    )
+
+
+class AbortWorkflowResponse(BaseModel):
+    """Response from abort_workflow tool."""
+
+    aborted_workflow: str = Field(description="The workflow that was aborted (job_name/workflow_name)")
+    aborted_step: str = Field(description="The step that was active when aborted")
+    explanation: str = Field(description="The explanation provided for aborting")
+    stack: list[StackEntry] = Field(
+        default_factory=list, description="Current workflow stack after abort"
+    )
+    resumed_workflow: str | None = Field(
+        default=None, description="The workflow now active (if any)"
+    )
+    resumed_step: str | None = Field(
+        default=None, description="The step now active (if any)"
+    )
+
 
 # =============================================================================
 # Session State Models
@@ -223,7 +263,10 @@ class WorkflowSession(BaseModel):
     completed_at: str | None = Field(
         default=None, description="ISO timestamp when completed"
     )
-    status: str = Field(default="active", description="Session status")
+    status: str = Field(default="active", description="Session status: active, completed, aborted")
+    abort_reason: str | None = Field(
+        default=None, description="Explanation if workflow was aborted"
+    )
 
     def to_dict(self) -> dict[str, Any]:
         """Convert to dictionary for JSON serialization."""
diff --git a/src/deepwork/mcp/server.py b/src/deepwork/mcp/server.py
index 2ec87212..c14ebd88 100644
--- a/src/deepwork/mcp/server.py
+++ b/src/deepwork/mcp/server.py
@@ -13,6 +13,7 @@
 
 from __future__ import annotations
 
+import logging
 from pathlib import Path
 from typing import Any
 
@@ -20,12 +21,16 @@
 
 from deepwork.mcp.quality_gate import QualityGate
 from deepwork.mcp.schemas import (
+    AbortWorkflowInput,
     FinishedStepInput,
     StartWorkflowInput,
 )
 from deepwork.mcp.state import StateManager
 from deepwork.mcp.tools import WorkflowTools
 
+# Configure logging
+logger = logging.getLogger("deepwork.mcp")
+
 
 def create_server(
     project_root: Path | str,
@@ -76,6 +81,18 @@ def create_server(
     # descriptions), update doc/mcp_interface.md to keep documentation in sync.
     # =========================================================================
 
+    def _log_tool_call(tool_name: str, params: dict[str, Any] | None = None) -> None:
+        """Log a tool call with stack information."""
+        stack = [entry.model_dump() for entry in state_manager.get_stack()]
+        log_data = {
+            "tool": tool_name,
+            "stack": stack,
+            "stack_depth": len(stack),
+        }
+        if params:
+            log_data["params"] = params
+        logger.info("MCP tool call: %s", log_data)
+
     @mcp.tool(
         description=(
             "List all available DeepWork workflows. "
@@ -85,6 +102,7 @@ def create_server(
     )
     def get_workflows() -> dict[str, Any]:
         """Get all available workflows."""
+        _log_tool_call("get_workflows")
         response = tools.get_workflows()
         return response.model_dump()
 
@@ -94,7 +112,9 @@ def get_workflows() -> dict[str, Any]:
             "Creates a git branch, initializes state tracking, and returns "
             "the first step's instructions. "
             "Required parameters: goal (what user wants), job_name, workflow_name. "
-            "Optional: instance_id for naming (e.g., 'acme', 'q1-2026')."
+            "Optional: instance_id for naming (e.g., 'acme', 'q1-2026'). "
+            "Supports nested workflows - starting a workflow while one is active "
+            "pushes onto the stack. Use abort_workflow to cancel and return to parent."
         )
     )
     async def start_workflow(
@@ -104,6 +124,12 @@ async def start_workflow(
         instance_id: str | None = None,
     ) -> dict[str, Any]:
         """Start a workflow and get first step instructions."""
+        _log_tool_call("start_workflow", {
+            "goal": goal,
+            "job_name": job_name,
+            "workflow_name": workflow_name,
+            "instance_id": instance_id,
+        })
         input_data = StartWorkflowInput(
             goal=goal,
             job_name=job_name,
@@ -120,7 +146,7 @@ async def start_workflow(
             "then returns either: "
             "'needs_work' with feedback to fix issues, "
             "'next_step' with instructions for the next step, or "
-            "'workflow_complete' when finished. "
+            "'workflow_complete' when finished (pops from stack if nested). "
             "Required: outputs (list of file paths created). "
             "Optional: notes about work done. "
             "Optional: quality_review_override_reason to skip quality review (must explain why)."
@@ -132,6 +158,11 @@ async def finished_step(
         quality_review_override_reason: str | None = None,
     ) -> dict[str, Any]:
         """Report step completion and get next instructions."""
+        _log_tool_call("finished_step", {
+            "outputs": outputs,
+            "notes": notes,
+            "quality_review_override_reason": quality_review_override_reason,
+        })
         input_data = FinishedStepInput(
             outputs=outputs,
             notes=notes,
@@ -140,6 +171,23 @@ async def finished_step(
         response = await tools.finished_step(input_data)
         return response.model_dump()
 
+    @mcp.tool(
+        description=(
+            "Abort the current workflow and return to the parent workflow (if nested). "
+            "Use this when a workflow cannot be completed and needs to be abandoned. "
+            "Required: explanation (why the workflow is being aborted). "
+            "Returns the aborted workflow info and the resumed parent workflow (if any)."
+        )
+    )
+    async def abort_workflow(
+        explanation: str,
+    ) -> dict[str, Any]:
+        """Abort the current workflow and return to parent."""
+        _log_tool_call("abort_workflow", {"explanation": explanation})
+        input_data = AbortWorkflowInput(explanation=explanation)
+        response = await tools.abort_workflow(input_data)
+        return response.model_dump()
+
     return mcp
 
 
@@ -171,6 +219,23 @@ def _get_server_instructions() -> str:
 - Fix the issues and call `finished_step` again
 - After passing, you'll get the next step or completion
 
+## Nested Workflows
+
+Workflows can be nested - starting a new workflow while one is active pushes
+onto a stack. This is useful when a step requires running another workflow.
+
+- All tool responses include a `stack` field showing the current workflow stack
+- Each stack entry shows `{workflow: "job/workflow", step: "current_step"}`
+- When a workflow completes, it pops from the stack and resumes the parent
+- Use `abort_workflow` to cancel the current workflow and return to parent
+
+## Aborting Workflows
+
+If a workflow cannot be completed, use `abort_workflow` with an explanation:
+- The current workflow is marked as aborted and popped from the stack
+- If there was a parent workflow, it becomes active again
+- The explanation is saved for debugging and audit purposes
+
 ## Best Practices
 
 - Always call `get_workflows` first to understand available options
@@ -178,4 +243,6 @@ def _get_server_instructions() -> str:
 - Create all expected outputs before calling `finished_step`
 - Use instance_id for meaningful names (e.g., client name, quarter)
 - Read quality gate feedback carefully before retrying
+- Check the `stack` field in responses to understand nesting depth
+- Use `abort_workflow` rather than leaving workflows in a broken state
 """
diff --git a/src/deepwork/mcp/state.py b/src/deepwork/mcp/state.py
index ca187d13..a2fb4e41 100644
--- a/src/deepwork/mcp/state.py
+++ b/src/deepwork/mcp/state.py
@@ -2,6 +2,10 @@
 
 State is persisted to `.deepwork/tmp/session_[id].json` for transparency
 and recovery.
+
+Supports nested workflows via a session stack - when a step starts a new
+workflow, it's pushed onto the stack. When a workflow completes or is
+aborted, it's popped from the stack.
 """
 
 from __future__ import annotations
@@ -14,7 +18,7 @@
 
 import aiofiles
 
-from deepwork.mcp.schemas import StepProgress, WorkflowSession
+from deepwork.mcp.schemas import StackEntry, StepProgress, WorkflowSession
 
 
 class StateError(Exception):
@@ -24,7 +28,7 @@ class StateError(Exception):
 
 
 class StateManager:
-    """Manages workflow session state.
+    """Manages workflow session state with stack-based nesting support.
 
     Sessions are persisted to `.deepwork/tmp/` as JSON files for:
     - Transparency: Users can inspect session state
@@ -33,6 +37,10 @@ class StateManager:
 
     This implementation is async-safe and uses a lock to prevent
     concurrent access issues.
+
+    Supports nested workflows via a session stack - starting a new workflow
+    while one is active pushes onto the stack. Completing or aborting pops
+    from the stack.
     """
 
     def __init__(self, project_root: Path):
@@ -43,7 +51,7 @@ def __init__(self, project_root: Path):
         """
         self.project_root = project_root
         self.sessions_dir = project_root / ".deepwork" / "tmp"
-        self._active_session: WorkflowSession | None = None
+        self._session_stack: list[WorkflowSession] = []
         self._lock = asyncio.Lock()
 
     def _ensure_sessions_dir(self) -> None:
@@ -111,7 +119,7 @@ async def create_session(
             )
 
             await self._save_session_unlocked(session)
-            self._active_session = session
+            self._session_stack.append(session)
             return session
 
     async def _save_session_unlocked(self, session: WorkflowSession) -> None:
@@ -149,19 +157,23 @@ async def load_session(self, session_id: str) -> WorkflowSession:
                 data = json.loads(content)
 
             session = WorkflowSession.from_dict(data)
-            self._active_session = session
+            # Replace top of stack or push if empty
+            if self._session_stack:
+                self._session_stack[-1] = session
+            else:
+                self._session_stack.append(session)
             return session
 
     def get_active_session(self) -> WorkflowSession | None:
-        """Get the currently active session.
+        """Get the currently active session (top of stack).
 
         Returns:
             Active session or None if no session active
         """
-        return self._active_session
+        return self._session_stack[-1] if self._session_stack else None
 
     def require_active_session(self) -> WorkflowSession:
-        """Get active session or raise error.
+        """Get active session (top of stack) or raise error.
 
         Returns:
             Active session
@@ -169,11 +181,11 @@ def require_active_session(self) -> WorkflowSession:
         Raises:
             StateError: If no active session
         """
-        if self._active_session is None:
+        if not self._session_stack:
             raise StateError(
                 "No active workflow session. Use start_workflow to begin a workflow."
             )
-        return self._active_session
+        return self._session_stack[-1]
 
     async def start_step(self, step_id: str) -> None:
         """Mark a step as started.
@@ -268,8 +280,11 @@ async def advance_to_step(self, step_id: str, entry_index: int) -> None:
             session.current_entry_index = entry_index
             await self._save_session_unlocked(session)
 
-    async def complete_workflow(self) -> None:
-        """Mark the workflow as complete.
+    async def complete_workflow(self) -> WorkflowSession | None:
+        """Mark the workflow as complete and pop from stack.
+
+        Returns:
+            The new active session after popping, or None if stack is empty
 
         Raises:
             StateError: If no active session
@@ -281,6 +296,39 @@ async def complete_workflow(self) -> None:
             session.status = "completed"
             await self._save_session_unlocked(session)
 
+            # Pop completed session from stack
+            self._session_stack.pop()
+
+            # Return new active session (if any)
+            return self._session_stack[-1] if self._session_stack else None
+
+    async def abort_workflow(self, explanation: str) -> tuple[WorkflowSession, WorkflowSession | None]:
+        """Abort the current workflow and pop from stack.
+
+        Args:
+            explanation: Reason for aborting the workflow
+
+        Returns:
+            Tuple of (aborted session, new active session or None)
+
+        Raises:
+            StateError: If no active session
+        """
+        async with self._lock:
+            session = self.require_active_session()
+            now = datetime.now(UTC).isoformat()
+            session.completed_at = now
+            session.status = "aborted"
+            session.abort_reason = explanation
+            await self._save_session_unlocked(session)
+
+            # Pop aborted session from stack
+            self._session_stack.pop()
+
+            # Return aborted session and new active session (if any)
+            new_active = self._session_stack[-1] if self._session_stack else None
+            return session, new_active
+
     def get_all_outputs(self) -> list[str]:
         """Get all outputs from all completed steps.
 
@@ -296,6 +344,28 @@ def get_all_outputs(self) -> list[str]:
             outputs.extend(progress.outputs)
         return outputs
 
+    def get_stack(self) -> list[StackEntry]:
+        """Get the current workflow stack as StackEntry objects.
+
+        Returns:
+            List of StackEntry with workflow and step info, bottom to top
+        """
+        return [
+            StackEntry(
+                workflow=f"{s.job_name}/{s.workflow_name}",
+                step=s.current_step_id,
+            )
+            for s in self._session_stack
+        ]
+
+    def get_stack_depth(self) -> int:
+        """Get the current stack depth.
+
+        Returns:
+            Number of active workflow sessions on the stack
+        """
+        return len(self._session_stack)
+
     async def list_sessions(self) -> list[WorkflowSession]:
         """List all saved sessions.
 
@@ -350,5 +420,7 @@ async def delete_session(self, session_id: str) -> None:
             if session_file.exists():
                 session_file.unlink()
 
-            if self._active_session and self._active_session.session_id == session_id:
-                self._active_session = None
+            # Remove from stack if present
+            self._session_stack = [
+                s for s in self._session_stack if s.session_id != session_id
+            ]
diff --git a/src/deepwork/mcp/tools.py b/src/deepwork/mcp/tools.py
index 43024ce1..8f2f46e8 100644
--- a/src/deepwork/mcp/tools.py
+++ b/src/deepwork/mcp/tools.py
@@ -13,6 +13,8 @@
 
 from deepwork.core.parser import JobDefinition, ParseError, Workflow, parse_job_definition
 from deepwork.mcp.schemas import (
+    AbortWorkflowInput,
+    AbortWorkflowResponse,
     ActiveStepInfo,
     FinishedStepInput,
     FinishedStepResponse,
@@ -259,7 +261,8 @@ async def start_workflow(self, input_data: StartWorkflowInput) -> StartWorkflowR
                 step_expected_outputs=step_outputs,
                 step_quality_criteria=first_step.quality_criteria,
                 step_instructions=instructions,
-            )
+            ),
+            stack=self.state_manager.get_stack(),
         )
 
     async def finished_step(self, input_data: FinishedStepInput) -> FinishedStepResponse:
@@ -316,6 +319,7 @@ async def finished_step(self, input_data: FinishedStepInput) -> FinishedStepResp
                     status=StepStatus.NEEDS_WORK,
                     feedback=result.feedback,
                     failed_criteria=failed_criteria,
+                    stack=self.state_manager.get_stack(),
                 )
 
         # Mark step as completed
@@ -330,14 +334,15 @@ async def finished_step(self, input_data: FinishedStepInput) -> FinishedStepResp
         next_entry_index = current_entry_index + 1
 
         if next_entry_index >= len(workflow.step_entries):
-            # Workflow complete
-            await self.state_manager.complete_workflow()
+            # Workflow complete - get outputs before completing (which pops from stack)
             all_outputs = self.state_manager.get_all_outputs()
+            await self.state_manager.complete_workflow()
 
             return FinishedStepResponse(
                 status=StepStatus.WORKFLOW_COMPLETE,
                 summary=f"Workflow '{workflow.name}' completed successfully!",
                 all_outputs=all_outputs,
+                stack=self.state_manager.get_stack(),
             )
 
         # Get next step
@@ -381,4 +386,32 @@ async def finished_step(self, input_data: FinishedStepInput) -> FinishedStepResp
                 step_quality_criteria=next_step.quality_criteria,
                 step_instructions=instructions,
             ),
+            stack=self.state_manager.get_stack(),
+        )
+
+    async def abort_workflow(self, input_data: AbortWorkflowInput) -> AbortWorkflowResponse:
+        """Abort the current workflow and return to the previous one.
+
+        Args:
+            input_data: AbortWorkflowInput with explanation
+
+        Returns:
+            AbortWorkflowResponse with abort info and new stack state
+
+        Raises:
+            StateError: If no active session
+        """
+        aborted_session, new_active = await self.state_manager.abort_workflow(
+            input_data.explanation
+        )
+
+        return AbortWorkflowResponse(
+            aborted_workflow=f"{aborted_session.job_name}/{aborted_session.workflow_name}",
+            aborted_step=aborted_session.current_step_id,
+            explanation=input_data.explanation,
+            stack=self.state_manager.get_stack(),
+            resumed_workflow=(
+                f"{new_active.job_name}/{new_active.workflow_name}" if new_active else None
+            ),
+            resumed_step=new_active.current_step_id if new_active else None,
         )
diff --git a/tests/unit/mcp/test_async_interface.py b/tests/unit/mcp/test_async_interface.py
index 82e00b9b..91b511fd 100644
--- a/tests/unit/mcp/test_async_interface.py
+++ b/tests/unit/mcp/test_async_interface.py
@@ -29,6 +29,7 @@ def test_state_manager_async_methods(self) -> None:
             "record_quality_attempt",
             "advance_to_step",
             "complete_workflow",
+            "abort_workflow",
             "list_sessions",
             "find_active_sessions_for_workflow",
             "delete_session",
@@ -50,11 +51,21 @@ def test_state_manager_has_lock(self, tmp_path: Path) -> None:
             "StateManager._lock must be an asyncio.Lock for async concurrency safety"
         )
 
+    def test_state_manager_has_session_stack(self, tmp_path: Path) -> None:
+        """Verify StateManager uses a session stack for nested workflows."""
+        manager = StateManager(tmp_path)
+
+        assert hasattr(manager, "_session_stack"), "StateManager must have _session_stack attribute"
+        assert isinstance(manager._session_stack, list), (
+            "StateManager._session_stack must be a list for nested workflow support"
+        )
+
     def test_workflow_tools_async_methods(self) -> None:
         """Verify WorkflowTools methods that must be async remain async."""
         async_methods = [
             "start_workflow",
             "finished_step",
+            "abort_workflow",
         ]
 
         for method_name in async_methods:
diff --git a/tests/unit/mcp/test_state.py b/tests/unit/mcp/test_state.py
index 2b27189a..4b84cbc4 100644
--- a/tests/unit/mcp/test_state.py
+++ b/tests/unit/mcp/test_state.py
@@ -29,7 +29,8 @@ def test_init(self, state_manager: StateManager, project_root: Path) -> None:
         """Test StateManager initialization."""
         assert state_manager.project_root == project_root
         assert state_manager.sessions_dir == project_root / ".deepwork" / "tmp"
-        assert state_manager._active_session is None
+        assert state_manager._session_stack == []
+        assert state_manager.get_stack_depth() == 0
 
     def test_generate_session_id(self, state_manager: StateManager) -> None:
         """Test session ID generation."""
@@ -191,20 +192,27 @@ async def test_advance_to_step(self, state_manager: StateManager) -> None:
         assert session.current_entry_index == 1
 
     async def test_complete_workflow(self, state_manager: StateManager) -> None:
-        """Test marking workflow as complete."""
-        await state_manager.create_session(
+        """Test marking workflow as complete pops from stack."""
+        session = await state_manager.create_session(
             job_name="test_job",
             workflow_name="main",
             goal="Complete the task",
             first_step_id="step1",
         )
+        session_id = session.session_id
 
-        await state_manager.complete_workflow()
-        session = state_manager.get_active_session()
+        # Complete workflow - should pop from stack
+        new_active = await state_manager.complete_workflow()
 
-        assert session is not None
-        assert session.status == "completed"
-        assert session.completed_at is not None
+        # No active session after completion
+        assert new_active is None
+        assert state_manager.get_active_session() is None
+        assert state_manager.get_stack_depth() == 0
+
+        # But completed session should be persisted to disk
+        loaded = await state_manager.load_session(session_id)
+        assert loaded.status == "completed"
+        assert loaded.completed_at is not None
 
     async def test_get_all_outputs(self, state_manager: StateManager) -> None:
         """Test getting all outputs from completed steps."""
@@ -285,3 +293,145 @@ async def test_delete_session(self, state_manager: StateManager) -> None:
 
         assert not session_file.exists()
         assert state_manager.get_active_session() is None
+
+
+class TestStateManagerStack:
+    """Tests for stack-based workflow nesting."""
+
+    @pytest.fixture
+    def project_root(self, tmp_path: Path) -> Path:
+        """Create a temporary project root with .deepwork directory."""
+        deepwork_dir = tmp_path / ".deepwork"
+        deepwork_dir.mkdir()
+        (deepwork_dir / "tmp").mkdir()
+        return tmp_path
+
+    @pytest.fixture
+    def state_manager(self, project_root: Path) -> StateManager:
+        """Create a StateManager instance."""
+        return StateManager(project_root)
+
+    async def test_nested_workflows_stack(self, state_manager: StateManager) -> None:
+        """Test that starting workflows pushes onto the stack."""
+        # Start first workflow
+        session1 = await state_manager.create_session(
+            job_name="job1",
+            workflow_name="workflow1",
+            goal="Goal 1",
+            first_step_id="step1",
+        )
+
+        assert state_manager.get_stack_depth() == 1
+        assert state_manager.get_active_session() == session1
+
+        # Start nested workflow
+        session2 = await state_manager.create_session(
+            job_name="job2",
+            workflow_name="workflow2",
+            goal="Goal 2",
+            first_step_id="stepA",
+        )
+
+        assert state_manager.get_stack_depth() == 2
+        assert state_manager.get_active_session() == session2
+
+        # Start another nested workflow
+        session3 = await state_manager.create_session(
+            job_name="job3",
+            workflow_name="workflow3",
+            goal="Goal 3",
+            first_step_id="stepX",
+        )
+
+        assert state_manager.get_stack_depth() == 3
+        assert state_manager.get_active_session() == session3
+
+    async def test_complete_workflow_pops_stack(self, state_manager: StateManager) -> None:
+        """Test that completing a workflow pops from stack and resumes parent."""
+        # Start two nested workflows
+        session1 = await state_manager.create_session(
+            job_name="job1",
+            workflow_name="workflow1",
+            goal="Goal 1",
+            first_step_id="step1",
+        )
+        await state_manager.create_session(
+            job_name="job2",
+            workflow_name="workflow2",
+            goal="Goal 2",
+            first_step_id="stepA",
+        )
+
+        assert state_manager.get_stack_depth() == 2
+
+        # Complete inner workflow
+        resumed = await state_manager.complete_workflow()
+
+        assert state_manager.get_stack_depth() == 1
+        assert resumed == session1
+        assert state_manager.get_active_session() == session1
+
+    async def test_get_stack(self, state_manager: StateManager) -> None:
+        """Test get_stack returns workflow/step info."""
+        await state_manager.create_session(
+            job_name="job1",
+            workflow_name="wf1",
+            goal="Goal 1",
+            first_step_id="step1",
+        )
+        await state_manager.create_session(
+            job_name="job2",
+            workflow_name="wf2",
+            goal="Goal 2",
+            first_step_id="stepA",
+        )
+
+        stack = state_manager.get_stack()
+
+        assert len(stack) == 2
+        assert stack[0].workflow == "job1/wf1"
+        assert stack[0].step == "step1"
+        assert stack[1].workflow == "job2/wf2"
+        assert stack[1].step == "stepA"
+
+    async def test_abort_workflow(self, state_manager: StateManager) -> None:
+        """Test abort_workflow marks as aborted and pops from stack."""
+        session1 = await state_manager.create_session(
+            job_name="job1",
+            workflow_name="wf1",
+            goal="Goal 1",
+            first_step_id="step1",
+        )
+        session2 = await state_manager.create_session(
+            job_name="job2",
+            workflow_name="wf2",
+            goal="Goal 2",
+            first_step_id="stepA",
+        )
+
+        # Abort inner workflow
+        aborted, resumed = await state_manager.abort_workflow("Something went wrong")
+
+        assert aborted.session_id == session2.session_id
+        assert aborted.status == "aborted"
+        assert aborted.abort_reason == "Something went wrong"
+        assert resumed == session1
+        assert state_manager.get_stack_depth() == 1
+        assert state_manager.get_active_session() == session1
+
+    async def test_abort_workflow_no_parent(self, state_manager: StateManager) -> None:
+        """Test abort_workflow with no parent workflow."""
+        session = await state_manager.create_session(
+            job_name="job1",
+            workflow_name="wf1",
+            goal="Goal 1",
+            first_step_id="step1",
+        )
+
+        aborted, resumed = await state_manager.abort_workflow("Cancelled")
+
+        assert aborted.session_id == session.session_id
+        assert aborted.status == "aborted"
+        assert resumed is None
+        assert state_manager.get_stack_depth() == 0
+        assert state_manager.get_active_session() is None

From 0b3d6663a2bead71f369c8c08fe13960fe790234 Mon Sep 17 00:00:00 2001
From: Noah Horton <noah@unsupervised.com>
Date: Wed, 4 Feb 2026 13:47:55 -0700
Subject: [PATCH 11/45] repair added but not run

---
 .claude/skills/deepwork/SKILL.md              | 137 +----
 .deepwork/jobs/deepwork_jobs/job.yml          | 133 ++++-
 .deepwork/jobs/deepwork_jobs/steps/errata.md  | 247 +++++++++
 .../jobs/deepwork_jobs/steps/fix_jobs.md      | 195 +++++++
 .../jobs/deepwork_jobs/steps/fix_settings.md  | 188 +++++++
 .deepwork/jobs/deepwork_jobs/steps/iterate.md | 243 ++++++++
 .deepwork/jobs/deepwork_jobs/steps/test.md    | 171 ++++++
 .gemini/skills/deepwork/index.toml            |  28 +
 doc/architecture.md                           |  66 +--
 doc/mcp_interface.md                          |   8 -
 src/deepwork/core/adapters.py                 |  70 ---
 src/deepwork/core/generator.py                | 524 +-----------------
 src/deepwork/core/parser.py                   |  33 ++
 src/deepwork/mcp/schemas.py                   |   3 -
 src/deepwork/mcp/tools.py                     |  31 +-
 .../standard_jobs/deepwork_jobs/job.yml       | 133 ++++-
 .../deepwork_jobs/steps/errata.md             | 247 +++++++++
 .../deepwork_jobs/steps/fix_jobs.md           | 195 +++++++
 .../deepwork_jobs/steps/fix_settings.md       | 188 +++++++
 .../deepwork_jobs/steps/iterate.md            | 243 ++++++++
 .../standard_jobs/deepwork_jobs/steps/test.md | 171 ++++++
 .../templates/claude/skill-deepwork.md.jinja  | 137 +----
 .../templates/claude/skill-job-meta.md.jinja  | 147 -----
 .../templates/claude/skill-job-step.md.jinja  | 263 ---------
 .../templates/gemini/skill-deepwork.md.jinja  |  36 ++
 .../gemini/skill-job-meta.toml.jinja          |  76 ---
 .../gemini/skill-job-step.toml.jinja          | 162 ------
 tests/unit/mcp/test_schemas.py                |   1 -
 28 files changed, 2493 insertions(+), 1583 deletions(-)
 create mode 100644 .deepwork/jobs/deepwork_jobs/steps/errata.md
 create mode 100644 .deepwork/jobs/deepwork_jobs/steps/fix_jobs.md
 create mode 100644 .deepwork/jobs/deepwork_jobs/steps/fix_settings.md
 create mode 100644 .deepwork/jobs/deepwork_jobs/steps/iterate.md
 create mode 100644 .deepwork/jobs/deepwork_jobs/steps/test.md
 create mode 100644 .gemini/skills/deepwork/index.toml
 create mode 100644 src/deepwork/standard_jobs/deepwork_jobs/steps/errata.md
 create mode 100644 src/deepwork/standard_jobs/deepwork_jobs/steps/fix_jobs.md
 create mode 100644 src/deepwork/standard_jobs/deepwork_jobs/steps/fix_settings.md
 create mode 100644 src/deepwork/standard_jobs/deepwork_jobs/steps/iterate.md
 create mode 100644 src/deepwork/standard_jobs/deepwork_jobs/steps/test.md
 delete mode 100644 src/deepwork/templates/claude/skill-job-meta.md.jinja
 delete mode 100644 src/deepwork/templates/claude/skill-job-step.md.jinja
 create mode 100644 src/deepwork/templates/gemini/skill-deepwork.md.jinja
 delete mode 100644 src/deepwork/templates/gemini/skill-job-meta.toml.jinja
 delete mode 100644 src/deepwork/templates/gemini/skill-job-step.toml.jinja

diff --git a/.claude/skills/deepwork/SKILL.md b/.claude/skills/deepwork/SKILL.md
index 3b1a9267..a8f84aa6 100644
--- a/.claude/skills/deepwork/SKILL.md
+++ b/.claude/skills/deepwork/SKILL.md
@@ -7,135 +7,22 @@ description: "Start or continue DeepWork workflows using MCP tools"
 
 Execute multi-step workflows with quality gate checkpoints.
 
-> **IMPORTANT**: This skill uses the DeepWork MCP server. All workflow operations
-> are performed through MCP tool calls, not by reading instructions from files.
+> **IMPORTANT**: Use the DeepWork MCP server tools. All workflow operations
+> are performed through MCP tool calls and following the instructions they return,
+> not by reading instructions from files.
 
-## Quick Start
+## How to Use
 
-1. **Discover workflows**: Call `get_workflows` to see available options
-2. **Start a workflow**: Call `start_workflow` with your goal
-3. **Execute steps**: Follow the instructions returned
-4. **Checkpoint**: Call `finished_step` with your outputs
-5. **Iterate or continue**: Handle `needs_work`, `next_step`, or `workflow_complete`
-
-## MCP Tools Reference
-
-### get_workflows
-
-Lists all available workflows in this project.
-
-```
-Tool: deepwork.get_workflows
-Parameters: none
-```
-
-Returns jobs with their workflows, steps, and summaries.
-
-### start_workflow
-
-Begins a new workflow session.
-
-```
-Tool: deepwork.start_workflow
-Parameters:
-  - goal: string (required) - What you want to accomplish
-  - job_name: string (required) - Name of the job
-  - workflow_name: string (required) - Name of the workflow
-  - instance_id: string (optional) - Identifier like "acme" or "q1-2026"
-```
-
-Returns session ID, branch name, and first step instructions.
-
-### finished_step
-
-Reports completion of the current step.
-
-```
-Tool: deepwork.finished_step
-Parameters:
-  - outputs: list[string] (required) - File paths of created outputs
-  - notes: string (optional) - Notes about what was done
-```
-
-Returns one of:
-- `needs_work`: Quality criteria not met; fix and retry
-- `next_step`: Proceed to next step with new instructions
-- `workflow_complete`: All steps done; workflow finished
-
-## Execution Flow
-
-```
-User: /deepwork [intent]
-     │
-     ▼
-┌─────────────────┐
-│ get_workflows   │ ◄── Discover available workflows
-└────────┬────────┘
-         │
-         ▼
-┌─────────────────┐
-│ Parse intent    │ ◄── Match user intent to workflow
-└────────┬────────┘
-         │
-         ▼
-┌─────────────────┐
-│ start_workflow  │ ◄── Begin session, get first step
-└────────┬────────┘
-         │
-         ▼
-┌─────────────────┐
-│ Execute step    │ ◄── Follow step instructions
-│ Create outputs  │
-└────────┬────────┘
-         │
-         ▼
-┌─────────────────┐
-│ finished_step   │ ◄── Report completion
-└────────┬────────┘
-         │
-    ┌────┴────┐
-    │         │
-needs_work  next_step ─────► Loop back to "Execute step"
-    │         │
-    │    workflow_complete
-    │         │
-    ▼         ▼
-┌─────────────────┐
-│ Fix issues and  │      Done!
-│ retry           │
-└─────────────────┘
-```
+1. Call `get_workflows` to discover available workflows
+2. Call `start_workflow` with goal, job_name, and workflow_name
+3. Follow the step instructions returned
+4. Call `finished_step` with your outputs when done
+5. Handle the response: `needs_work`, `next_step`, or `workflow_complete`
 
 ## Intent Parsing
 
 When the user invokes `/deepwork`, parse their intent:
 
-1. **Explicit workflow**: `/deepwork new_job` → start `new_job` workflow
-2. **General request**: `/deepwork I want to create a new workflow` → infer best match
-3. **No context**: `/deepwork` alone → call `get_workflows` and ask user to choose
-
-## Quality Gates
-
-Steps may have quality criteria. When you call `finished_step`:
-
-1. Outputs are evaluated against criteria
-2. If any fail → `needs_work` status with feedback
-3. Fix issues based on feedback
-4. Call `finished_step` again
-5. After passing → proceed to next step
-
-## Git Workflow
-
-DeepWork creates branches for workflow instances:
-- Format: `deepwork/{job_name}-{workflow_name}-{instance_id or date}`
-- Example: `deepwork/competitive_research-full_analysis-acme`
-
-Commit work as you go. Create PR when workflow completes.
-
-## Guardrails
-
-- Always use MCP tools; never manually read step instruction files
-- Create ALL expected outputs before calling `finished_step`
-- Read quality gate feedback carefully before retrying
-- Don't skip steps unless user explicitly requests it
-- Ask for clarification when user intent is ambiguous
\ No newline at end of file
+- **Explicit workflow**: `/deepwork new_job` → start the `new_job` workflow
+- **General request**: `/deepwork I want to create a new workflow` → infer best match from available workflows
+- **No context**: `/deepwork` alone → call `get_workflows` and ask user to choose
\ No newline at end of file
diff --git a/.deepwork/jobs/deepwork_jobs/job.yml b/.deepwork/jobs/deepwork_jobs/job.yml
index 4b58cb47..5acfd3d0 100644
--- a/.deepwork/jobs/deepwork_jobs/job.yml
+++ b/.deepwork/jobs/deepwork_jobs/job.yml
@@ -1,14 +1,16 @@
 # yaml-language-server: $schema=.deepwork/schemas/job.schema.json
 name: deepwork_jobs
-version: "1.0.0"
-summary: "Creates and manages multi-step AI workflows. Use when defining, implementing, or improving DeepWork jobs."
+version: "1.2.0"
+summary: "Creates and manages multi-step AI workflows. Use when defining, implementing, testing, or improving DeepWork jobs."
 description: |
   Core commands for managing DeepWork jobs. These commands help you define new multi-step
-  workflows and learn from running them.
+  workflows, test them on real use cases, and learn from running them.
 
-  The `new_job` workflow guides you through defining and implementing a new job by
-  asking structured questions about your workflow, understanding each step's inputs and outputs,
-  reviewing the specification, and generating all necessary files.
+  The `new_job` workflow guides you through the full lifecycle of creating a new job:
+  1. **Define**: Gather requirements through structured questions and create job.yml
+  2. **Implement**: Generate step instruction files and sync slash commands
+  3. **Test**: Run the workflow on a real use case, critique output, and iterate with user
+  4. **Iterate**: Review what happened and improve the job definition based on learnings
 
   The `learn` skill reflects on conversations where DeepWork jobs were run, identifies
   confusion or inefficiencies, and improves job instructions. It also captures bespoke
@@ -16,13 +18,26 @@ description: |
 
 workflows:
   - name: new_job
-    summary: "Create a new DeepWork job from scratch through definition and implementation"
+    summary: "Create a new DeepWork job from scratch through definition, implementation, testing, and iteration"
     steps:
       - define
       - implement
+      - test
+      - iterate
+
+  - name: repair
+    summary: "Clean up and migrate DeepWork configurations from prior versions"
+    steps:
+      - fix_settings
+      - fix_jobs
+      - errata
 
 changelog:
+  - version: "1.2.0"
+    changes: "Added repair workflow with fix_settings, fix_jobs, and errata steps for migrating old DeepWork configurations to current format"
   - version: "1.1.0"
+    changes: "Added test and iterate steps to new_job workflow; test runs the workflow on a real use case and gathers feedback; iterate improves the job definition based on what happened"
+  - version: "1.0.1"
     changes: "Removed review_job_spec step from new_job workflow; implement now follows directly from define"
   - version: "1.0.0"
     changes: "Added workflows section to distinguish new_job workflow (define→review_job_spec→implement) from standalone learn skill"
@@ -79,6 +94,52 @@ steps:
       - "**Commands Available**: Are the slash-commands generated in `.claude/commands/`?"
       - "**Rules Considered**: Has the agent thought about whether rules would benefit this job? If relevant rules were identified, did they explain them and offer to run `/deepwork_rules.define`? Not every job needs rules - only suggest when genuinely helpful."
 
+  - id: test
+    name: "Test the New Workflow"
+    description: "Tests the newly created workflow by running it on a real use case, critiquing the output, and iterating until the user is satisfied. Use after implementing a job."
+    instructions_file: steps/test.md
+    inputs:
+      - file: job.yml
+        from_step: define
+      - file: steps/
+        from_step: implement
+    outputs:
+      - test_feedback.md
+    dependencies:
+      - define
+      - implement
+    quality_criteria:
+      - "**User Informed**: Did the agent explain the workflow is ready and ask what to test it on?"
+      - "**Workflow Invoked**: Was the new workflow actually run on the user's test case?"
+      - "**Output Critiqued**: Did the agent identify up to 3 top issues with the output?"
+      - "**User Feedback Gathered**: Did the agent ask the user about each issue and gather additional feedback?"
+      - "**Corrections Made**: Were all requested corrections applied to the output?"
+      - "**User Satisfied**: Did the user confirm the output meets their needs?"
+
+  - id: iterate
+    name: "Iterate on Workflow Design"
+    description: "Reviews the test run conversation and improves the job definition based on what happened. Use after testing a newly created job."
+    instructions_file: steps/iterate.md
+    inputs:
+      - file: job.yml
+        from_step: define
+      - file: steps/
+        from_step: implement
+    outputs:
+      - job.yml
+      - steps/
+    dependencies:
+      - define
+      - implement
+      - test
+    quality_criteria:
+      - "**Conversation Reviewed**: Did the agent analyze the test run for inefficiencies and issues?"
+      - "**Instructions Improved**: Were step instructions updated to address identified problems?"
+      - "**Quality Criteria Updated**: Were quality criteria adjusted to better match user expectations?"
+      - "**Tool Usage Considered**: Did the agent consider if different tools would improve the workflow?"
+      - "**Sync Complete**: Has `deepwork sync` been run to apply changes?"
+      - "**Recap Provided**: Did the agent summarize what was improved and why?"
+
   - id: learn
     name: "Learn from Job Execution"
     description: "Analyzes conversation history to improve job instructions and capture learnings. Use after running a job to refine it."
@@ -103,3 +164,61 @@ steps:
       - "**Working Folder Correct**: Is AGENTS.md in the correct working folder for the job?"
       - "**Generalizable Separated**: Are generalizable improvements in instructions, not AGENTS.md?"
       - "**Sync Complete**: Has `deepwork sync` been run if instructions were modified?"
+
+  - id: fix_settings
+    name: "Fix Settings Files"
+    description: "Cleans up .claude/settings.json and related configuration files, removing legacy permissions, duplicate hooks, and hardcoded paths from prior DeepWork versions."
+    instructions_file: steps/fix_settings.md
+    inputs: []
+    outputs:
+      - .claude/settings.json
+    dependencies: []
+    quality_criteria:
+      - "**DeepWork Skills Removed**: Are `Skill(...)` entries matching jobs in `.deepwork/jobs/` removed?"
+      - "**Non-DeepWork Skills Preserved**: Are skills NOT matching DeepWork jobs left intact?"
+      - "**Rules Hooks Removed**: Are all DeepWork Rules hooks and permissions removed?"
+      - "**Duplicate Hooks Removed**: Are duplicate hook entries consolidated or removed?"
+      - "**Hardcoded Paths Removed**: Are user-specific hardcoded paths (like `/Users/*/...`) removed?"
+      - "**Deprecated Commands Removed**: Are deprecated commands like `deepwork hook *` removed?"
+      - "**Valid JSON**: Is settings.json still valid JSON after modifications?"
+      - "**Backup Created**: Was a backup of the original settings created before modifications?"
+
+  - id: fix_jobs
+    name: "Fix Job Definitions"
+    description: "Updates job.yml files and step instructions to current DeepWork format, removing deprecated fields and migrating to new structures."
+    instructions_file: steps/fix_jobs.md
+    inputs:
+      - file: .claude/settings.json
+        from_step: fix_settings
+    outputs:
+      - .deepwork/jobs/
+    dependencies:
+      - fix_settings
+    quality_criteria:
+      - "**Exposed Field Addressed**: Are `exposed: true` fields removed or noted as deprecated?"
+      - "**Stop Hooks Migrated**: Are `stop_hooks` migrated to `hooks.after_agent` format?"
+      - "**Removed Steps Cleaned**: Are references to removed steps (like `review_job_spec`) updated?"
+      - "**Orphaned Steps Fixed**: Are steps not in any workflow either added to workflows or removed?"
+      - "**Valid YAML**: Do all job.yml files pass schema validation?"
+      - "**Sync Complete**: Has `deepwork sync` been run to regenerate commands?"
+
+  - id: errata
+    name: "Clean Up Errata"
+    description: "Removes obsolete files and folders from prior DeepWork versions, including old skill directories, temp files, and deprecated configurations."
+    instructions_file: steps/errata.md
+    inputs:
+      - file: .deepwork/jobs/
+        from_step: fix_jobs
+    outputs:
+      - repair_summary.md
+    dependencies:
+      - fix_settings
+      - fix_jobs
+    quality_criteria:
+      - "**Old Skills Folder Handled**: Is `.claude/skills/` folder removed or backed up?"
+      - "**Temp Files Cleaned**: Are `.deepwork/tmp/` contents cleaned appropriately?"
+      - "**Rules Folder Removed**: Is `.deepwork/rules/` folder backed up and removed (fully deprecated)?"
+      - "**Rules Job Removed**: Is `.deepwork/jobs/deepwork_rules/` removed if present?"
+      - "**Config Version Updated**: Is `.deepwork/config.yml` using current version format?"
+      - "**Summary Provided**: Is a repair_summary.md file created documenting all changes made?"
+      - "**Git Status Clean**: Are changes ready to be committed (no untracked garbage files)?"
diff --git a/.deepwork/jobs/deepwork_jobs/steps/errata.md b/.deepwork/jobs/deepwork_jobs/steps/errata.md
new file mode 100644
index 00000000..30ee7e8a
--- /dev/null
+++ b/.deepwork/jobs/deepwork_jobs/steps/errata.md
@@ -0,0 +1,247 @@
+# Clean Up Errata
+
+## Objective
+
+Remove obsolete files and folders from prior DeepWork versions. This final step cleans up artifacts that are no longer used by the MCP-based system, creating a summary of all changes made during the repair workflow.
+
+## Task
+
+Identify and clean up deprecated files and folders, then create a comprehensive summary document.
+
+### Step 1: Handle Old Skills Folder
+
+Check if `.claude/skills/` exists. This folder was used by the old skill-based system and is no longer needed.
+
+```bash
+ls -la .claude/skills/ 2>/dev/null || echo "No skills folder (good!)"
+```
+
+**If it exists:**
+1. Count the contents: `ls .claude/skills/ | wc -l`
+2. Ask the user whether to:
+   - **Delete** the folder entirely (recommended if migrated to MCP)
+   - **Back up** to `.claude/skills.backup/` before deleting
+   - **Keep** if they have custom skills not yet migrated
+
+**Old skill structure to recognize:**
+```
+.claude/skills/
+├── job_name/
+│   └── SKILL.md
+├── job_name.step_name/
+│   └── SKILL.md
+└── ...
+```
+
+### Step 2: Clean Temp Files
+
+Check `.deepwork/tmp/` for accumulated temporary files:
+
+```bash
+ls -la .deepwork/tmp/ 2>/dev/null || echo "No tmp folder"
+```
+
+**Safe to delete:**
+- `.deepwork/tmp/rules/queue/*.json` - Old rules queue files
+- Any files older than 7 days
+- Empty subdirectories
+
+**Be careful with:**
+- Files that might be in-progress work
+- Anything with recent modification times
+
+```bash
+# Clean old queue files
+rm -rf .deepwork/tmp/rules/queue/*.json 2>/dev/null
+
+# Remove empty directories
+find .deepwork/tmp -type d -empty -delete 2>/dev/null
+```
+
+### Step 3: Remove Rules Folder (Fully Deprecated)
+
+DeepWork Rules have been completely removed from the system. The `.deepwork/rules/` folder should be deleted.
+
+```bash
+ls -la .deepwork/rules/ 2>/dev/null || echo "No rules folder (good!)"
+```
+
+**If the folder exists:**
+
+1. **Back up the folder** (in case user wants to reference old rules):
+   ```bash
+   mv .deepwork/rules/ .deepwork/rules.backup/
+   ```
+
+2. **Inform the user** that DeepWork Rules are deprecated and the folder has been backed up
+
+3. **After user confirms** the backup is acceptable, the backup can be deleted later
+
+**Also remove these related items if present:**
+- `.deepwork/tmp/rules/` folder and all contents
+- `.deepwork/jobs/deepwork_rules/` folder (the old rules job)
+- Any `deepwork_rules` job that may have been installed
+
+```bash
+rm -rf .deepwork/tmp/rules/ 2>/dev/null
+rm -rf .deepwork/jobs/deepwork_rules/ 2>/dev/null
+```
+
+### Step 4: Update Config Version
+
+Check `.deepwork/config.yml` for outdated version format:
+
+```bash
+cat .deepwork/config.yml
+```
+
+**Old format:**
+```yaml
+version: 1.0.0
+platforms:
+- claude
+```
+
+**Current format:**
+```yaml
+version: "1.0"
+platforms:
+  - claude
+```
+
+Update if needed to match current schema expectations.
+
+### Step 5: Remove Other Obsolete Files
+
+Check for and remove other obsolete files:
+
+| File/Pattern | Description | Action |
+|--------------|-------------|--------|
+| `.deepwork/.last_head_ref` | Git state tracking | Keep (used by MCP) |
+| `.deepwork/.last_work_tree` | Git state tracking | Keep (used by MCP) |
+| `.deepwork/.gitignore` | Ignore patterns | Review and update |
+| `.claude/commands/` | Generated commands | Keep (current system) |
+| `.claude/settings.local.json` | Local overrides | Keep (user settings) |
+
+### Step 6: Verify Git Status
+
+Check that the cleanup hasn't left untracked garbage:
+
+```bash
+git status
+```
+
+**Review:**
+- Deleted files should show as deleted
+- No new untracked files should appear (unless intentionally created)
+- Backup files (`.backup`) should be in `.gitignore` or cleaned up
+
+### Step 7: Create Repair Summary
+
+Create a `repair_summary.md` file documenting all changes made during this workflow:
+
+```markdown
+# DeepWork Repair Summary
+
+**Date:** [current date]
+**Project:** [project name]
+
+## Settings Fixes (fix_settings step)
+
+- [ ] Removed X `Skill(...)` permission entries
+- [ ] Consolidated Y duplicate hooks
+- [ ] Removed Z hardcoded paths
+- [ ] Removed deprecated `deepwork hook` commands
+
+## Job Fixes (fix_jobs step)
+
+### [job_name]
+- [ ] Removed `exposed` field from steps: [list]
+- [ ] Migrated `stop_hooks` to `hooks.after_agent`
+- [ ] Updated workflow to remove `review_job_spec`
+- [ ] Version bumped to X.Y.Z
+
+### [another_job]
+- [ ] ...
+
+## Errata Cleanup (errata step)
+
+- [ ] Handled `.claude/skills/` folder: [deleted/backed up/kept]
+- [ ] Cleaned `.deepwork/tmp/`: removed X files
+- [ ] Reviewed `.deepwork/rules/`: [action taken]
+- [ ] Updated `.deepwork/config.yml` version format
+
+## Files Changed
+
+```
+[list of all files modified/deleted]
+```
+
+## Recommended Next Steps
+
+1. Review changes with `git diff`
+2. Test that `deepwork sync` runs without errors
+3. Commit changes with message: "chore: migrate to DeepWork MCP format"
+4. Delete backup files after confirming everything works
+```
+
+## Quality Criteria
+
+- `.claude/skills/` folder is handled (removed, backed up, or documented why kept)
+- `.deepwork/tmp/` contents are cleaned appropriately
+- `.deepwork/rules/` folder is backed up and removed (DeepWork Rules fully deprecated)
+- `.deepwork/tmp/rules/` folder is removed
+- `.deepwork/jobs/deepwork_rules/` folder is removed if present
+- `.deepwork/config.yml` uses current version format
+- A `repair_summary.md` file is created documenting all changes
+- Git status shows clean changes ready to commit
+- When all criteria are met, include `<promise>Quality Criteria Met</promise>` in your response
+
+## Example Summary Output
+
+```markdown
+# DeepWork Repair Summary
+
+**Date:** 2024-02-04
+**Project:** internal-agentspace
+
+## Settings Fixes
+
+- Removed 87 `Skill(...)` permission entries
+- Consolidated 2 duplicate `UserPromptSubmit` hooks into 1
+- Removed hardcoded path: `/Users/tyler/.local/pipx/venvs/deepwork/bin/python`
+- Removed 3 deprecated `deepwork hook rules_check` commands
+
+## Job Fixes
+
+### deepwork_jobs
+- Updated from old version (workflow includes `review_job_spec`)
+- Reinstalled with `deepwork install --platform claude`
+
+### competitive_research
+- Removed `exposed: true` from `discover_competitors` step
+- Migrated 1 `stop_hooks` to `hooks.after_agent`
+- Version bumped to 1.0.1
+
+## Errata Cleanup
+
+- Backed up `.claude/skills/` to `.claude/skills.backup/` (174 files)
+- Deleted `.claude/skills/` folder
+- Cleaned `.deepwork/tmp/rules/queue/` (12 old JSON files)
+- Kept `.deepwork/rules/` (contains active example rules)
+- Updated `.deepwork/config.yml` version to "1.0"
+
+## Recommended Next Steps
+
+1. `git add -A && git diff --staged`
+2. `deepwork sync` (verify no errors)
+3. `git commit -m "chore: migrate to DeepWork MCP format"`
+4. After testing: `rm -rf .claude/skills.backup/`
+```
+
+## Important Notes
+
+1. **Always back up before deleting** - User data is irreplaceable
+2. **Ask before destructive actions** - When in doubt, ask the user
+3. **Document everything** - The summary is valuable for understanding what changed
+4. **Don't auto-commit** - Let the user review and commit changes themselves
diff --git a/.deepwork/jobs/deepwork_jobs/steps/fix_jobs.md b/.deepwork/jobs/deepwork_jobs/steps/fix_jobs.md
new file mode 100644
index 00000000..cd6f835b
--- /dev/null
+++ b/.deepwork/jobs/deepwork_jobs/steps/fix_jobs.md
@@ -0,0 +1,195 @@
+# Fix Job Definitions
+
+## Objective
+
+Update all job.yml files and step instructions in `.deepwork/jobs/` to the current DeepWork format. This step migrates deprecated fields, removes references to deleted steps, and ensures all jobs are compatible with the MCP-based workflow system.
+
+## Task
+
+Audit and repair all job definitions, migrating from legacy formats to current specifications.
+
+### Step 1: Inventory All Jobs
+
+List all jobs in the project:
+
+```bash
+ls -la .deepwork/jobs/
+```
+
+For each job directory, you'll need to check and potentially fix the `job.yml` file.
+
+### Step 2: Remove `exposed` Field
+
+The `exposed` field on steps no longer has any effect in MCP-based DeepWork. Steps are now only accessible through workflows.
+
+**Find and remove:**
+```yaml
+steps:
+  - id: some_step
+    exposed: true  # REMOVE THIS LINE
+```
+
+If a step was `exposed: true` and is not in any workflow, it should either:
+1. Be added to a workflow, OR
+2. Be removed from the job entirely
+
+### Step 3: Migrate `stop_hooks` to `hooks.after_agent`
+
+The `stop_hooks` field is deprecated. Migrate to the new `hooks` structure:
+
+**Before (deprecated):**
+```yaml
+steps:
+  - id: my_step
+    stop_hooks:
+      - prompt: "Verify the output meets quality standards"
+```
+
+**After (current format):**
+```yaml
+steps:
+  - id: my_step
+    hooks:
+      after_agent:
+        - prompt: "Verify the output meets quality standards"
+```
+
+### Step 4: Remove References to Deleted Steps
+
+Check for references to steps that no longer exist in the standard jobs:
+
+**Steps that have been removed:**
+- `review_job_spec` - Was removed from `deepwork_jobs` in v1.0.1
+
+**What to fix:**
+- Remove from workflow `steps` arrays
+- Update `from_step` references in inputs
+- Update `dependencies` arrays
+
+**Example fix:**
+```yaml
+# Before
+workflows:
+  - name: new_job
+    steps:
+      - define
+      - review_job_spec  # REMOVE
+      - implement
+
+steps:
+  - id: implement
+    inputs:
+      - file: job.yml
+        from_step: review_job_spec  # CHANGE TO: define
+    dependencies:
+      - review_job_spec  # CHANGE TO: define
+```
+
+### Step 5: Fix Orphaned Steps
+
+Steps not included in any workflow cannot be invoked via the MCP interface. The parser will emit warnings for these.
+
+Run the following to see warnings:
+```bash
+deepwork sync 2>&1 | grep -i "warning"
+```
+
+**For each orphaned step, ask the user which action to take:**
+
+1. **Add to a workflow** - Create a new single-step workflow for it:
+   ```yaml
+   workflows:
+     - name: standalone_step_name
+       summary: "Runs the step_name step"
+       steps:
+         - step_name
+   ```
+
+2. **Remove the step entirely** - Delete the step from `steps:` array and its instruction file
+
+3. **Keep as-is (deprecated)** - The step will remain inaccessible but preserved in the job definition
+
+**Do not automatically decide** - Always confirm with the user which option they prefer for each orphaned step.
+
+### Step 6: Validate Against Schema
+
+After making changes, validate each job.yml:
+
+```bash
+deepwork sync
+```
+
+Fix any schema validation errors that appear.
+
+### Step 7: Update Version Numbers
+
+If you made significant changes to a job, bump its version number:
+
+```yaml
+# Bump patch version for minor fixes
+version: "1.0.0"  ->  version: "1.0.1"
+
+# Add changelog entry
+changelog:
+  - version: "1.0.1"
+    changes: "Migrated to current DeepWork format; removed deprecated fields"
+```
+
+### Step 8: Run Sync
+
+After all fixes, regenerate commands:
+
+```bash
+deepwork sync
+```
+
+Verify no errors or warnings appear.
+
+## Quality Criteria
+
+- All `exposed: true` fields are removed or noted
+- All `stop_hooks` are migrated to `hooks.after_agent` format
+- References to removed steps (like `review_job_spec`) are updated
+- Orphaned steps are either added to workflows or removed
+- All job.yml files pass schema validation
+- `deepwork sync` runs without errors
+- When all criteria are met, include `<promise>Quality Criteria Met</promise>` in your response
+
+## Common Issues and Fixes
+
+### Issue: Step references non-existent step in `from_step`
+```
+Error: Step 'implement' has file input from 'review_job_spec' but 'review_job_spec' is not in dependencies
+```
+**Fix:** Update `from_step` to reference a step that still exists.
+
+### Issue: Workflow references non-existent step
+```
+Error: Workflow 'new_job' references non-existent step 'review_job_spec'
+```
+**Fix:** Remove the step from the workflow's `steps` array.
+
+### Issue: Orphaned step warning
+```
+Warning: Job 'my_job' has steps not included in any workflow: standalone_step
+```
+**Fix:** Either add the step to a workflow or remove it from the job.
+
+## Jobs to Check
+
+For each job in `.deepwork/jobs/`, check:
+
+| Check | What to Look For |
+|-------|------------------|
+| `exposed` field | Remove from all steps |
+| `stop_hooks` | Migrate to `hooks.after_agent` |
+| Workflow steps | Remove references to deleted steps |
+| Dependencies | Update to valid step IDs |
+| File inputs | Update `from_step` references |
+| Version | Bump if changes were made |
+
+## Important Notes
+
+1. **Don't modify standard jobs directly** - If `deepwork_jobs` is out of date, run `deepwork install --platform claude` to get the latest version
+2. **Preserve custom logic** - When migrating hooks, preserve the prompt content
+3. **Test after changes** - Run `deepwork sync` after each job fix to catch errors early
diff --git a/.deepwork/jobs/deepwork_jobs/steps/fix_settings.md b/.deepwork/jobs/deepwork_jobs/steps/fix_settings.md
new file mode 100644
index 00000000..0c046cd9
--- /dev/null
+++ b/.deepwork/jobs/deepwork_jobs/steps/fix_settings.md
@@ -0,0 +1,188 @@
+# Fix Settings Files
+
+## Objective
+
+Clean up `.claude/settings.json` and related configuration files, removing legacy artifacts from prior DeepWork versions. This step ensures the Claude Code settings are free of deprecated permissions, duplicate hooks, and hardcoded paths.
+
+## Task
+
+Audit and repair the `.claude/settings.json` file, removing gunk accumulated from older DeepWork implementations.
+
+### Step 1: Create Backup
+
+Before making any changes, create a backup:
+
+```bash
+cp .claude/settings.json .claude/settings.json.backup
+```
+
+### Step 2: Inventory DeepWork Jobs
+
+First, get the list of jobs that exist in `.deepwork/jobs/`:
+
+```bash
+ls .deepwork/jobs/
+```
+
+Note these job names - you will use them to identify which `Skill(...)` entries to remove.
+
+### Step 3: Remove DeepWork Skill Permissions
+
+Look for and **remove** `Skill(...)` permission entries that match DeepWork jobs. Only remove entries where the skill name matches a job in `.deepwork/jobs/`.
+
+**What to look for:**
+```json
+"permissions": {
+  "allow": [
+    "Skill(deepwork_jobs)",           // Remove if 'deepwork_jobs' is in .deepwork/jobs/
+    "Skill(deepwork_jobs.define)",    // Remove - matches job_name.step pattern
+    "Skill(competitive_research)",    // Remove if 'competitive_research' is in .deepwork/jobs/
+    "Skill(my_custom_skill)",         // KEEP - not a DeepWork job
+    ...
+  ]
+}
+```
+
+**IMPORTANT:** Only remove skills that:
+- Exactly match a job name in `.deepwork/jobs/` (e.g., `Skill(job_name)`)
+- Match the pattern `job_name.step_name` where `job_name` is in `.deepwork/jobs/`
+
+**DO NOT remove** skills that don't match DeepWork jobs - the user may have created these manually for other purposes.
+
+### Step 4: Remove Duplicate Hooks
+
+Check for duplicate hook entries in the `hooks` section. Prior versions sometimes added the same hook multiple times.
+
+**Example of duplicates to consolidate:**
+```json
+"hooks": {
+  "UserPromptSubmit": [
+    {
+      "matcher": "",
+      "hooks": [{ "type": "command", "command": "some_command" }]
+    },
+    {
+      "matcher": "",
+      "hooks": [{ "type": "command", "command": "some_command" }]  // DUPLICATE
+    }
+  ]
+}
+```
+
+Keep only one instance of each unique hook.
+
+### Step 5: Remove Hardcoded User Paths
+
+Search for and remove any hardcoded paths that reference specific user directories:
+
+**Patterns to find and remove:**
+- `/Users/username/.local/pipx/venvs/deepwork/bin/python`
+- `/home/username/.local/...`
+- Any path containing a specific username
+
+These should either be removed or replaced with relative paths.
+
+### Step 6: Remove DeepWork Rules Hooks (Fully Deprecated)
+
+DeepWork Rules have been completely removed from the system. Remove ALL hooks related to rules:
+
+**Hooks to remove entirely:**
+- Any hook with command `deepwork hook rules_check`
+- Any hook with command containing `rules_check`
+- Any hook referencing `.deepwork/jobs/deepwork_rules/hooks/`
+- Any hook referencing `.deepwork/rules/`
+
+**Also remove these permissions if present:**
+- `Skill(deepwork_rules)`
+- `Skill(deepwork_rules.define)`
+- `Bash(rm -rf .deepwork/tmp/rules/queue/*.json)`
+
+### Step 7: Remove Other Deprecated Commands
+
+Remove hooks referencing other deprecated DeepWork commands:
+
+**Commands to remove:**
+- `deepwork hook *` - The entire hook subcommand is deprecated
+- References to any `.deepwork/jobs/*/hooks/` scripts
+
+### Step 8: Clean Up Empty Sections
+
+If after cleanup any sections are empty, consider removing them:
+
+```json
+// Remove if empty:
+"hooks": {
+  "Stop": []  // Remove this empty array
+}
+```
+
+### Step 9: Validate JSON
+
+After all edits, ensure the file is valid JSON:
+
+```bash
+python -c "import json; json.load(open('.claude/settings.json'))"
+```
+
+If there are syntax errors, fix them before proceeding.
+
+## Quality Criteria
+
+- DeepWork job `Skill(...)` permissions are removed (only those matching `.deepwork/jobs/`)
+- Non-DeepWork skills are preserved (skills not matching any job in `.deepwork/jobs/`)
+- All DeepWork Rules hooks and permissions are removed
+- Duplicate hook entries are consolidated
+- Hardcoded user-specific paths are removed
+- Deprecated `deepwork hook` commands are removed
+- The settings.json file is valid JSON
+- A backup was created before modifications
+- When all criteria are met, include `<promise>Quality Criteria Met</promise>` in your response
+
+## Example Before/After
+
+### Before (with gunk):
+```json
+{
+  "hooks": {
+    "UserPromptSubmit": [
+      { "matcher": "", "hooks": [{ "type": "command", "command": ".deepwork/jobs/deepwork_rules/hooks/user_prompt_submit.sh" }] },
+      { "matcher": "", "hooks": [{ "type": "command", "command": ".deepwork/jobs/deepwork_rules/hooks/user_prompt_submit.sh" }] }
+    ],
+    "Stop": [
+      { "matcher": "", "hooks": [{ "type": "command", "command": "deepwork hook rules_check" }] }
+    ],
+    "SubagentStop": [
+      { "matcher": "", "hooks": [{ "type": "command", "command": "/Users/tyler/.local/pipx/venvs/deepwork/bin/python -m deepwork.hooks.rules_check" }] }
+    ]
+  },
+  "permissions": {
+    "allow": [
+      "Skill(competitive_research)",
+      "Skill(competitive_research.discover_competitors)",
+      "Skill(deepwork_jobs)",
+      "Skill(deepwork_jobs.define)",
+      "Read(./.deepwork/**)",
+      "WebSearch"
+    ]
+  }
+}
+```
+
+### After (cleaned):
+```json
+{
+  "hooks": {},
+  "permissions": {
+    "allow": [
+      "Read(./.deepwork/**)",
+      "WebSearch"
+    ]
+  }
+}
+```
+
+## Important Notes
+
+1. **Don't remove non-DeepWork permissions** - Keep permissions like `WebSearch`, `Read(...)`, `Bash(...)` that aren't related to old DeepWork skills
+2. **Be conservative** - If unsure whether something is legacy, ask the user
+3. **Document changes** - Note what was removed for the final summary
diff --git a/.deepwork/jobs/deepwork_jobs/steps/iterate.md b/.deepwork/jobs/deepwork_jobs/steps/iterate.md
new file mode 100644
index 00000000..78f8ddf3
--- /dev/null
+++ b/.deepwork/jobs/deepwork_jobs/steps/iterate.md
@@ -0,0 +1,243 @@
+# Iterate on Workflow Design
+
+## Objective
+
+Review the test run conversation and improve the job definition based on what happened. This step closes the feedback loop by incorporating learnings from the test into the workflow itself, making future runs more efficient and producing better results.
+
+## Task
+
+Analyze the conversation history from the test step, identify areas for improvement, and update the job definition and step instructions accordingly.
+
+### Step 1: Review the Conversation History
+
+Carefully analyze the conversation from the test step, looking for:
+
+1. **Process Inefficiencies**
+   - Steps that took multiple attempts to complete
+   - Questions the agent had to ask that should have been in the instructions
+   - Unnecessary back-and-forth with the user
+   - Information that had to be repeated
+
+2. **Output Quality Issues**
+   - Issues identified during critique (from Step 3 of test)
+   - Corrections requested by the user
+   - Patterns in user feedback (what did they consistently want changed?)
+
+3. **Tool Usage Problems**
+   - Tools that didn't work as expected
+   - Missing tools that would have helped
+   - Inefficient tool sequences
+
+4. **Missing or Unclear Instructions**
+   - Ambiguities that led to wrong outputs
+   - Missing guidance that caused confusion
+   - Quality criteria that weren't clear enough
+
+### Step 2: Plan Improvements
+
+For each issue identified, determine the appropriate fix:
+
+| Issue Type | Solution Location |
+|------------|-------------------|
+| Process inefficiency | Update step instructions with clearer guidance |
+| Output quality | Update quality criteria or add examples |
+| Missing information | Add to step inputs or instructions |
+| Tool problems | Suggest different tools in instructions |
+| Unclear criteria | Rewrite quality criteria to be specific |
+
+**Prioritize improvements** that will have the most impact on future runs. Focus on:
+- Issues that caused multiple iterations
+- Problems that affected the final output quality
+- Confusion that could be eliminated with clearer instructions
+
+### Step 3: Update Step Instructions
+
+For each step that needs improvement:
+
+1. **Read the current instruction file** at `.deepwork/jobs/[job_name]/steps/[step_id].md`
+
+2. **Make targeted improvements**:
+   - Add missing context or clarification
+   - Include examples of good output (use what worked in the test)
+   - Clarify ambiguous instructions
+   - Add tool recommendations if a different approach would be better
+   - Update quality criteria to match user expectations
+
+3. **Keep instructions concise**:
+   - Avoid redundancy
+   - Be direct and actionable
+   - Use bullet points where appropriate
+
+### Step 4: Update Quality Criteria
+
+Review and update quality criteria in two places:
+
+1. **In step instruction files** - The "Quality Criteria" section should reflect what the user actually cared about during testing
+
+2. **In job.yml** - If steps have `quality_criteria` or `stop_hooks`, update them to:
+   - Remove criteria that weren't relevant
+   - Add criteria based on user feedback
+   - Make existing criteria more specific
+
+**Example improvement:**
+```yaml
+# Before
+quality_criteria:
+  - "Report is formatted correctly"
+
+# After
+quality_criteria:
+  - "Report uses distinct colors for each data series in charts"
+  - "Tables have sufficient padding and font size for readability"
+  - "Executive summary is understandable by non-technical readers"
+```
+
+### Step 5: Consider Alternative Tools
+
+If any tools didn't work well during the test:
+
+1. **Identify the problem** - What went wrong? (slow, wrong output, hard to use)
+
+2. **Research alternatives** - What other tools could accomplish the same goal?
+
+3. **Update instructions** - If a better tool exists, update the step instructions to recommend it
+
+Examples:
+- If web scraping was unreliable, suggest a specific browser automation approach
+- If data processing was slow, suggest a different method or tool
+- If file generation had issues, recommend a different library or format
+
+### Step 6: Update Job Version and Changelog
+
+After making improvements:
+
+1. **Bump the version** in `job.yml`:
+   - Patch version (x.x.1) for minor instruction tweaks
+   - Minor version (x.1.0) for quality criteria changes or significant improvements
+
+2. **Add a changelog entry** describing what was improved:
+   ```yaml
+   changelog:
+     - version: "1.0.1"
+       changes: "Improved [step] instructions based on test run: added examples, clarified criteria, updated tool recommendations"
+   ```
+
+### Step 7: Sync Changes
+
+Run `deepwork sync` to apply the changes:
+
+```bash
+deepwork sync
+```
+
+### Step 8: Provide Recap
+
+Summarize the improvements made:
+
+1. **List the issues identified** from the test run
+
+2. **Describe the changes made** to address each issue
+
+3. **Explain the expected impact** on future runs
+
+Example recap format:
+```
+## Iteration Summary
+
+### Issues Identified
+1. Chart colors were hard to distinguish
+2. Tables were too small to read
+3. Executive summary was too technical
+
+### Changes Made
+
+**gather_data step:**
+- Added instruction to use distinct color palettes for charts
+- Updated quality criteria to require readable table formatting
+
+**generate_report step:**
+- Added example of good executive summary
+- Updated quality criteria: "Summary understandable by non-technical stakeholders"
+
+**job.yml:**
+- Updated version to 1.0.1
+- Added changelog entry
+
+### Expected Impact
+Future runs should produce reports with better visual design and clearer summaries, reducing the need for post-generation corrections.
+```
+
+## Quality Criteria
+
+- Conversation history from test step was analyzed for issues
+- Process inefficiencies were identified and addressed
+- Step instructions were updated to improve clarity
+- Quality criteria were updated to match user expectations
+- Alternative tools were considered where relevant
+- Job version and changelog were updated
+- `deepwork sync` was run to apply changes
+- A clear recap of improvements was provided
+- When all criteria are met, include `<promise>Quality Criteria Met</promise>` in your response
+
+## Example Dialog
+
+```
+Claude: Let me review our test run and improve the workflow based on what happened.
+
+**Analyzing Test Conversation...**
+
+I identified the following issues from the test run:
+
+### Issues Found
+
+1. **Chart readability (gather_data step)**: The instructions didn't specify how to handle color choices for visualizations. During the test, we had to fix colors that were too similar.
+
+2. **Missing month-over-month comparison (analyze_trends step)**: The user wanted comparisons to previous periods, but this wasn't in the instructions. We had to add it after the fact.
+
+3. **Technical executive summary (generate_report step)**: The summary was too technical for non-technical stakeholders. The quality criteria didn't specify the audience clearly.
+
+4. **Table formatting (generate_report step)**: Tables were hard to read. No guidance on sizing or padding.
+
+### Changes Made
+
+**`.deepwork/jobs/monthly_report/steps/gather_data.md`:**
+- Added: "Use distinct, high-contrast colors for chart data series. Avoid similar shades for different categories."
+- Added to Quality Criteria: "Charts use visually distinct colors that are accessible to colorblind readers"
+
+**`.deepwork/jobs/monthly_report/steps/analyze_trends.md`:**
+- Added: "Always include comparison to the same period in the previous month/quarter"
+- Added input: "previous_period_data" to formalize this requirement
+
+**`.deepwork/jobs/monthly_report/steps/generate_report.md`:**
+- Added example of good executive summary targeting non-technical readers
+- Added: "Format tables with sufficient padding (at least 8px) and readable font size (minimum 11pt)"
+- Updated Quality Criteria:
+  - "Executive summary is understandable by non-technical stakeholders"
+  - "Tables are readable with adequate padding and font size"
+
+**`.deepwork/jobs/monthly_report/job.yml`:**
+- Version bumped to 1.0.1
+- Added changelog: "Improved visual design guidance and audience-appropriate writing based on test feedback"
+
+Running `deepwork sync`...
+
+Done! The workflow has been updated.
+
+### Expected Impact
+
+Future monthly reports should:
+- Have better chart color choices from the start
+- Include month-over-month comparisons automatically
+- Have clearer executive summaries for leadership
+- Have more readable tables
+
+This should significantly reduce the number of corrections needed after the initial output.
+```
+
+## Important Guidelines
+
+1. **Be specific** - Don't just note "instructions were unclear" - explain exactly what was unclear and how you fixed it
+2. **Use test results as examples** - If something worked well in the test, add it as an example in the instructions
+3. **Don't over-engineer** - Only add improvements that address actual problems from the test
+4. **Maintain consistency** - Keep the same structure and style in instruction files
+5. **Test-driven improvement** - Every change should trace back to something that happened in the test
diff --git a/.deepwork/jobs/deepwork_jobs/steps/test.md b/.deepwork/jobs/deepwork_jobs/steps/test.md
new file mode 100644
index 00000000..fa36d265
--- /dev/null
+++ b/.deepwork/jobs/deepwork_jobs/steps/test.md
@@ -0,0 +1,171 @@
+# Test the New Workflow
+
+## Objective
+
+Run the newly created workflow on a real use case chosen by the user, critique the output, and iterate until the user is satisfied with the results. This step validates that the workflow works as intended before finalizing it.
+
+## Task
+
+Guide the user through testing their new workflow by running it on a real example, then critically evaluating the output and refining it based on user feedback.
+
+### Step 1: Announce Readiness and Gather Test Case
+
+The workflow is now implemented and ready to test. Use the AskUserQuestion tool to:
+
+1. **Inform the user** that the workflow is ready for a test run
+2. **Ask what they'd like to test it on** - Get a specific, real use case
+
+Example question to ask:
+```
+Your new workflow is ready to try out! What would you like to use it on for the first test run?
+
+Please describe a specific case you want to run through the workflow - ideally something you actually need done, so we can validate the workflow produces useful results.
+```
+
+**Important**: Get a concrete, specific test case. Vague responses like "just test it" should be followed up with clarifying questions to understand what inputs/context the workflow needs.
+
+### Step 2: Prepare and Run the Workflow
+
+1. **Compact the conversation history** - Before invoking the workflow, use the `/compact` command to summarize the conversation so far. This ensures the workflow starts with clean context focused on the test case.
+
+2. **Invoke the new workflow** - Run the first step of the newly created workflow using its slash command:
+   ```
+   /[job_name].[first_step_id]
+   ```
+
+3. **Complete the full workflow** - Continue through all steps of the workflow until it produces its final output.
+
+4. **Note any issues during execution** - Pay attention to:
+   - Confusion or ambiguity in instructions
+   - Missing information that had to be asked for
+   - Steps that took longer than expected
+   - Awkward tool usage or process flow
+
+### Step 3: Critique the Output
+
+After the workflow completes, perform a self-critique of the output:
+
+1. **Review the final deliverable** - Read through all outputs produced by the workflow
+
+2. **Identify up to 3 top issues** - Look for problems such as:
+   - Missing information or sections
+   - Formatting issues (layout, structure, readability)
+   - Quality problems (vague content, errors, inconsistencies)
+   - Misalignment with what the user likely wanted
+   - Technical issues (broken links, malformed data, etc.)
+
+3. **Present each issue to the user** with a specific question asking if they want it fixed. For example:
+   ```
+   I noticed a few things we could improve:
+
+   1. **Text overlap**: The PDF has some text overlapping images in a few places - shall I correct that?
+
+   2. **Missing summary**: The report doesn't have an executive summary at the top - would you like me to add one?
+
+   3. **Data formatting**: The numbers aren't consistently formatted (some have commas, some don't) - should I standardize them?
+   ```
+
+4. **Make corrections** for any issues the user confirms they want fixed
+
+### Step 4: Gather Additional Feedback
+
+After addressing the identified issues:
+
+1. **Ask if the user has more comments** on the output:
+   ```
+   I've made those corrections. Is there anything else you'd like me to change about the output?
+   ```
+
+2. **Iterate on feedback** - Keep making corrections based on user feedback until they indicate they're satisfied
+
+3. **Confirm completion** - When the user says the output is good, confirm that testing is complete
+
+### Loop Behavior
+
+The feedback loop should continue until the user explicitly indicates satisfaction. Look for signals like:
+- "Looks good"
+- "That's fine"
+- "I'm happy with it"
+- "No more changes"
+- "Perfect"
+
+If the user provides more feedback, address it and ask again. Don't assume satisfaction without explicit confirmation.
+
+## Quality Criteria
+
+- User was informed the workflow is ready and asked what to test it on
+- Conversation was compacted before running the workflow
+- The new workflow was actually invoked and run to completion
+- Output was critiqued and up to 3 top issues were identified
+- Each identified issue was presented to the user with a specific question
+- Confirmed corrections were applied
+- User was asked for additional feedback after corrections
+- Iteration continued until user confirmed satisfaction
+- When all criteria are met, include `<promise>Quality Criteria Met</promise>` in your response
+
+## Example Dialog
+
+```
+Claude: Your new workflow is ready to try out!
+
+The `monthly_report` workflow has been implemented with these steps:
+1. gather_data - Collects metrics from various sources
+2. analyze_trends - Identifies patterns and insights
+3. generate_report - Creates the final PDF report
+
+What would you like to use it on for the first test run? Please describe a specific report you need - for example, which month's data and any particular focus areas.
+
+User: Let's do the January 2026 engineering report, focusing on deployment frequency and incident response times.
+
+Claude: Great, let me compact the conversation and run the workflow on your January 2026 engineering report.
+
+/compact
+
+/monthly_report.gather_data
+
+[... workflow runs through all steps ...]
+
+Claude: The January 2026 Engineering Report has been generated. Let me review it for any issues.
+
+I noticed a few things we could improve:
+
+1. **Chart readability**: The deployment frequency chart uses very similar colors for different teams - shall I change to a more distinct color palette?
+
+2. **Missing context**: The incident response times don't include comparison to the previous month - would you like me to add month-over-month comparison?
+
+3. **Executive summary**: The summary is quite technical - should I add a simpler high-level overview for non-technical stakeholders?
+
+User: Yes to all three, please.
+
+Claude: Done! I've updated the report with:
+- Distinct color palette for the deployment chart
+- Month-over-month comparison for incident response times
+- Added a non-technical executive summary
+
+Is there anything else you'd like me to change about the report?
+
+User: Can you make the tables larger? They're a bit hard to read.
+
+Claude: I've increased the table sizes for better readability. The font is now larger and there's more padding around cells.
+
+Is there anything else you'd like me to adjust?
+
+User: No, that looks great!
+
+Claude: Excellent! The test run is complete. The workflow successfully generated your January 2026 Engineering Report with all the refinements applied.
+
+**Test Summary:**
+- Workflow ran successfully through all 3 steps
+- Output required adjustments to chart colors, comparisons, and table sizing
+- User is satisfied with the final result
+
+Ready to proceed to the iterate step to improve the workflow based on what we learned.
+```
+
+## Important Guidelines
+
+1. **Use real test cases** - Testing with actual data/needs validates the workflow better than hypothetical examples
+2. **Be specific in critiques** - Don't say "formatting issues" - say exactly what's wrong and where
+3. **Limit initial critique to 3 issues** - Too many issues at once is overwhelming; address more in the feedback loop
+4. **Don't assume what user wants fixed** - Always ask before making corrections
+5. **Iterate until satisfied** - Don't rush to completion; the user's satisfaction is the goal
diff --git a/.gemini/skills/deepwork/index.toml b/.gemini/skills/deepwork/index.toml
new file mode 100644
index 00000000..20f13d66
--- /dev/null
+++ b/.gemini/skills/deepwork/index.toml
@@ -0,0 +1,28 @@
++++
+name = "deepwork"
+description = "Start or continue DeepWork workflows using MCP tools"
++++
+
+# DeepWork Workflow Manager
+
+Execute multi-step workflows with quality gate checkpoints.
+
+> **IMPORTANT**: Use the DeepWork MCP server tools. All workflow operations
+> are performed through MCP tool calls and following the instructions they return,
+> not by reading instructions from files.
+
+## How to Use
+
+1. Call `get_workflows` to discover available workflows
+2. Call `start_workflow` with goal, job_name, and workflow_name
+3. Follow the step instructions returned
+4. Call `finished_step` with your outputs when done
+5. Handle the response: `needs_work`, `next_step`, or `workflow_complete`
+
+## Intent Parsing
+
+When the user invokes `/deepwork`, parse their intent:
+
+- **Explicit workflow**: `/deepwork new_job` → start the `new_job` workflow
+- **General request**: `/deepwork I want to create a new workflow` → infer best match from available workflows
+- **No context**: `/deepwork` alone → call `get_workflows` and ask user to choose
\ No newline at end of file
diff --git a/doc/architecture.md b/doc/architecture.md
index aad03028..6294837b 100644
--- a/doc/architecture.md
+++ b/doc/architecture.md
@@ -64,7 +64,6 @@ deepwork/                       # DeepWork tool repository
 │       │   └── gemini_hook.sh       # Shell wrapper for Gemini CLI
 │       ├── templates/          # Skill templates for each platform
 │       │   ├── claude/
-│       │   │   ├── skill-job-step.md.jinja
 │       │   │   └── skill-deepwork.md.jinja  # MCP entry point skill
 │       │   ├── gemini/
 │       │   └── copilot/
@@ -214,52 +213,35 @@ class PlatformDetector:
 
 ### 4. Skill Generator (`generator.py`)
 
-Generates AI-platform-specific skill files from job definitions.
+Generates AI-platform-specific skill files. The generator has been simplified to focus
+on generating only the MCP entry point skill (`/deepwork`), as workflow orchestration
+is now handled by the MCP server rather than individual step skills.
 
-This component is called by the `sync` command to regenerate all skills:
-1. Reads the job definition from `.deepwork/jobs/[job-name]/job.yml`
-2. Loads platform-specific templates
-3. Generates skill files for each step in the job
-4. Writes skills to the AI platform's skills directory
+This component is called by the `sync` command to regenerate the DeepWork skill:
+1. Loads the platform-specific template (`skill-deepwork.md.jinja`)
+2. Generates the `/deepwork` skill file that directs agents to use MCP tools
+3. Writes the skill to the AI platform's skills directory
 
 **Example Generation Flow**:
 ```python
 class SkillGenerator:
-    def generate_all_skills(self, job: JobDefinition,
-                            platform: PlatformConfig,
-                            output_dir: Path) -> list[Path]:
-        """Generate skill files for all steps in a job."""
-        skill_paths = []
-
-        for step_index, step in enumerate(job.steps):
-            # Load step instructions
-            instructions = read_file(job.job_dir / step.instructions_file)
-
-            # Build template context
-            context = {
-                "job_name": job.name,
-                "step_id": step.id,
-                "step_name": step.name,
-                "step_number": step_index + 1,
-                "total_steps": len(job.steps),
-                "instructions_content": instructions,
-                "user_inputs": [inp for inp in step.inputs if inp.is_user_input()],
-                "file_inputs": [inp for inp in step.inputs if inp.is_file_input()],
-                "outputs": step.outputs,
-                "dependencies": step.dependencies,
-                "exposed": step.exposed,
-            }
-
-            # Render template
-            template = env.get_template("skill-job-step.md.jinja")
-            rendered = template.render(**context)
-
-            # Write to platform's skills directory
-            skill_path = output_dir / platform.config_dir / platform.skills_dir / f"{job.name}.{step.id}.md"
-            write_file(skill_path, rendered)
-            skill_paths.append(skill_path)
-
-        return skill_paths
+    def generate_deepwork_skill(self, adapter: AgentAdapter,
+                                output_dir: Path) -> Path:
+        """Generate the global /deepwork skill for MCP entry point."""
+        skills_dir = output_dir / adapter.skills_dir
+        skills_dir.mkdir(parents=True, exist_ok=True)
+
+        # Load and render template
+        env = self._get_jinja_env(adapter)
+        template = env.get_template("skill-deepwork.md.jinja")
+        rendered = template.render()
+
+        # Write skill file
+        skill_path = skills_dir / "deepwork/SKILL.md"
+        skill_path.parent.mkdir(parents=True, exist_ok=True)
+        safe_write(skill_path, rendered)
+
+        return skill_path
 ```
 
 ---
diff --git a/doc/mcp_interface.md b/doc/mcp_interface.md
index 977fd32b..82512dc9 100644
--- a/doc/mcp_interface.md
+++ b/doc/mcp_interface.md
@@ -36,7 +36,6 @@ interface JobInfo {
   summary: string;           // Short summary of the job
   description: string | null; // Full description (optional)
   workflows: WorkflowInfo[];  // Named workflows in the job
-  standalone_steps: StepInfo[]; // Steps not in any workflow
 }
 
 interface WorkflowInfo {
@@ -44,13 +43,6 @@ interface WorkflowInfo {
   summary: string;           // Short description
 }
 
-interface StepInfo {
-  id: string;                // Step identifier
-  name: string;              // Human-readable step name
-  description: string;       // What the step does
-  dependencies: string[];    // Required prior steps
-}
-
 interface ActiveStepInfo {
   session_id: string;        // Unique session identifier
   branch_name: string;       // Git branch for this workflow instance
diff --git a/src/deepwork/core/adapters.py b/src/deepwork/core/adapters.py
index ea401924..4fc0733d 100644
--- a/src/deepwork/core/adapters.py
+++ b/src/deepwork/core/adapters.py
@@ -57,8 +57,6 @@ class AgentAdapter(ABC):
     display_name: ClassVar[str]
     config_dir: ClassVar[str]
     skills_dir: ClassVar[str] = "skills"
-    skill_template: ClassVar[str] = "skill-job-step.md.jinja"
-    meta_skill_template: ClassVar[str] = "skill-job-meta.md.jinja"
 
     # Mapping from generic SkillLifecycleHook to platform-specific event names.
     # Subclasses should override this to provide platform-specific mappings.
@@ -150,38 +148,6 @@ def get_skills_dir(self, project_root: Path | None = None) -> Path:
             raise AdapterError("No project root specified")
         return root / self.config_dir / self.skills_dir
 
-    def get_meta_skill_filename(self, job_name: str) -> str:
-        """
-        Get the filename for a job's meta-skill.
-
-        The meta-skill is the primary user interface for a job.
-        Can be overridden for different file formats.
-
-        Args:
-            job_name: Name of the job
-
-        Returns:
-            Meta-skill filename (e.g., "job_name/SKILL.md" for Claude)
-        """
-        return f"{job_name}/SKILL.md"
-
-    def get_step_skill_filename(self, job_name: str, step_id: str, exposed: bool = False) -> str:
-        """
-        Get the filename for a step skill.
-
-        All step skills use the same filename format. The exposed parameter
-        is used for template context (user-invocable frontmatter setting).
-
-        Args:
-            job_name: Name of the job
-            step_id: ID of the step
-            exposed: If True, skill is user-invocable (for template context). Default: False.
-
-        Returns:
-            Skill filename (e.g., "job_name.step_id/SKILL.md" for Claude)
-        """
-        return f"{job_name}.{step_id}/SKILL.md"
-
     def detect(self, project_root: Path | None = None) -> bool:
         """
         Check if this platform is available in the project.
@@ -651,47 +617,11 @@ class GeminiAdapter(AgentAdapter):
     name = "gemini"
     display_name = "Gemini CLI"
     config_dir = ".gemini"
-    skill_template = "skill-job-step.toml.jinja"
-    meta_skill_template = "skill-job-meta.toml.jinja"
 
     # Gemini CLI does NOT support skill-level hooks
     # Hooks are global/project-level in settings.json, not per-skill
     hook_name_mapping: ClassVar[dict[SkillLifecycleHook, str]] = {}
 
-    def get_meta_skill_filename(self, job_name: str) -> str:
-        """
-        Get the filename for a Gemini job's meta-skill.
-
-        Gemini uses TOML files and colon namespacing via subdirectories.
-        For job "my_job", creates: my_job/index.toml
-
-        Args:
-            job_name: Name of the job
-
-        Returns:
-            Meta-skill filename path (e.g., "my_job/index.toml")
-        """
-        return f"{job_name}/index.toml"
-
-    def get_step_skill_filename(self, job_name: str, step_id: str, exposed: bool = False) -> str:
-        """
-        Get the filename for a Gemini step skill.
-
-        Gemini uses TOML files and colon namespacing via subdirectories.
-        All step skills use the same filename format. The exposed parameter
-        is used for template context (user-invocable setting).
-        For job "my_job" and step "step_one", creates: my_job/step_one.toml
-
-        Args:
-            job_name: Name of the job
-            step_id: ID of the step
-            exposed: If True, skill is user-invocable (for template context). Default: False.
-
-        Returns:
-            Skill filename path (e.g., "my_job/step_one.toml")
-        """
-        return f"{job_name}/{step_id}.toml"
-
     def sync_hooks(self, project_path: Path, hooks: dict[str, list[dict[str, Any]]]) -> int:
         """
         Sync hooks to Gemini CLI settings.
diff --git a/src/deepwork/core/generator.py b/src/deepwork/core/generator.py
index 75f289c1..58502c1a 100644
--- a/src/deepwork/core/generator.py
+++ b/src/deepwork/core/generator.py
@@ -1,19 +1,11 @@
 """Skill file generator using Jinja2 templates."""
 
 from pathlib import Path
-from typing import Any
 
 from jinja2 import Environment, FileSystemLoader, TemplateNotFound
 
-from deepwork.core.adapters import AgentAdapter, SkillLifecycleHook
-from deepwork.core.doc_spec_parser import (
-    DocSpec,
-    DocSpecParseError,
-    parse_doc_spec_file,
-)
-from deepwork.core.parser import JobDefinition, Step
-from deepwork.schemas.job_schema import LIFECYCLE_HOOK_EVENTS
-from deepwork.utils.fs import safe_read, safe_write
+from deepwork.core.adapters import AgentAdapter
+from deepwork.utils.fs import safe_write
 
 
 class GeneratorError(Exception):
@@ -42,35 +34,6 @@ def __init__(self, templates_dir: Path | str | None = None):
         if not self.templates_dir.exists():
             raise GeneratorError(f"Templates directory not found: {self.templates_dir}")
 
-        # Cache for loaded doc specs (keyed by absolute file path)
-        self._doc_spec_cache: dict[Path, DocSpec] = {}
-
-    def _load_doc_spec(self, project_root: Path, doc_spec_path: str) -> DocSpec | None:
-        """
-        Load a doc spec by file path with caching.
-
-        Args:
-            project_root: Path to project root
-            doc_spec_path: Relative path to doc spec file (e.g., ".deepwork/doc_specs/report.md")
-
-        Returns:
-            DocSpec if file exists and parses, None otherwise
-        """
-        full_path = project_root / doc_spec_path
-        if full_path in self._doc_spec_cache:
-            return self._doc_spec_cache[full_path]
-
-        if not full_path.exists():
-            return None
-
-        try:
-            doc_spec = parse_doc_spec_file(full_path)
-        except DocSpecParseError:
-            return None
-
-        self._doc_spec_cache[full_path] = doc_spec
-        return doc_spec
-
     def _get_jinja_env(self, adapter: AgentAdapter) -> Environment:
         """
         Get Jinja2 environment for an adapter.
@@ -93,489 +56,6 @@ def _get_jinja_env(self, adapter: AgentAdapter) -> Environment:
             lstrip_blocks=True,
         )
 
-    def _is_standalone_step(self, job: JobDefinition, step: Step) -> bool:
-        """
-        Check if a step is standalone (not part of any workflow).
-
-        A step is standalone if:
-        - It's not listed in any workflow definition
-        - OR (for backward compatibility) no workflows are defined and the step
-          has no dependencies and no other steps depend on it
-
-        Args:
-            job: Job definition
-            step: Step to check
-
-        Returns:
-            True if step is standalone
-        """
-        # If workflows are defined, use workflow membership
-        if job.workflows:
-            return job.get_workflow_for_step(step.id) is None
-        else:
-            # Backward compatibility: if no workflows defined, use dependency analysis
-            # Step has dependencies - not standalone
-            if step.dependencies:
-                return False
-
-            # Check if any other step depends on this step
-            for other_step in job.steps:
-                if step.id in other_step.dependencies:
-                    return False
-
-        return True
-
-    def _get_workflow_context(self, job: JobDefinition, step: Step) -> dict[str, Any]:
-        """
-        Build workflow context for a step.
-
-        Args:
-            job: Job definition
-            step: Step to build context for
-
-        Returns:
-            Workflow context dictionary with workflow info, or empty dict if standalone
-        """
-        workflow = job.get_workflow_for_step(step.id)
-        if not workflow:
-            return {}
-
-        position = job.get_step_position_in_workflow(step.id)
-        return {
-            "workflow_name": workflow.name,
-            "workflow_summary": workflow.summary,
-            "workflow_step_number": position[0] if position else 1,
-            "workflow_total_steps": position[1] if position else 1,
-            "workflow_next_step": job.get_next_step_in_workflow(step.id),
-            "workflow_prev_step": job.get_prev_step_in_workflow(step.id),
-        }
-
-    def _build_hook_context(self, job: JobDefinition, hook_action: Any) -> dict[str, Any]:
-        """
-        Build context for a single hook action.
-
-        Args:
-            job: Job definition
-            hook_action: HookAction instance
-
-        Returns:
-            Hook context dictionary
-        """
-        hook_ctx: dict[str, Any] = {}
-        if hook_action.is_prompt():
-            hook_ctx["type"] = "prompt"
-            hook_ctx["content"] = hook_action.prompt
-        elif hook_action.is_prompt_file():
-            hook_ctx["type"] = "prompt_file"
-            hook_ctx["path"] = hook_action.prompt_file
-            # Read the prompt file content
-            prompt_file_path = job.job_dir / hook_action.prompt_file
-            prompt_content = safe_read(prompt_file_path)
-            if prompt_content is None:
-                raise GeneratorError(f"Hook prompt file not found: {prompt_file_path}")
-            hook_ctx["content"] = prompt_content
-        elif hook_action.is_script():
-            hook_ctx["type"] = "script"
-            hook_ctx["path"] = hook_action.script
-        return hook_ctx
-
-    def _build_step_context(
-        self,
-        job: JobDefinition,
-        step: Step,
-        step_index: int,
-        adapter: AgentAdapter,
-        project_root: Path | None = None,
-    ) -> dict[str, Any]:
-        """
-        Build template context for a step.
-
-        Args:
-            job: Job definition
-            step: Step to generate context for
-            step_index: Index of step in job (0-based)
-            adapter: Agent adapter for platform-specific hook name mapping
-            project_root: Optional project root for loading doc specs
-
-        Returns:
-            Template context dictionary
-        """
-        # Read step instructions
-        instructions_file = job.job_dir / step.instructions_file
-        instructions_content = safe_read(instructions_file)
-        if instructions_content is None:
-            raise GeneratorError(f"Step instructions file not found: {instructions_file}")
-
-        # Separate user inputs and file inputs
-        user_inputs = [
-            {"name": inp.name, "description": inp.description}
-            for inp in step.inputs
-            if inp.is_user_input()
-        ]
-        file_inputs = [
-            {"file": inp.file, "from_step": inp.from_step}
-            for inp in step.inputs
-            if inp.is_file_input()
-        ]
-
-        # Check if this is a standalone step
-        is_standalone = self._is_standalone_step(job, step)
-
-        # Get workflow context (empty dict if standalone)
-        workflow_ctx = self._get_workflow_context(job, step)
-
-        # Determine next and previous steps based on workflow (if defined) or order
-        next_step = None
-        prev_step = None
-        if not is_standalone:
-            if workflow_ctx:
-                # Use workflow-defined order
-                next_step = workflow_ctx.get("workflow_next_step")
-                prev_step = workflow_ctx.get("workflow_prev_step")
-            else:
-                # Backward compatibility: use step array order
-                if step_index < len(job.steps) - 1:
-                    next_step = job.steps[step_index + 1].id
-                if step_index > 0:
-                    prev_step = job.steps[step_index - 1].id
-
-        # Build hooks context for all lifecycle events
-        # Structure: {platform_event_name: [hook_contexts]}
-        hooks: dict[str, list[dict[str, Any]]] = {}
-        for event in LIFECYCLE_HOOK_EVENTS:
-            if event in step.hooks:
-                # Get platform-specific event name from adapter
-                hook_enum = SkillLifecycleHook(event)
-                platform_event_name = adapter.get_platform_hook_name(hook_enum)
-                if platform_event_name:
-                    hook_contexts = [
-                        self._build_hook_context(job, hook_action)
-                        for hook_action in step.hooks[event]
-                    ]
-                    if hook_contexts:
-                        hooks[platform_event_name] = hook_contexts
-
-        # Claude Code has separate Stop and SubagentStop events. When a Stop hook
-        # is defined, also register it for SubagentStop so it triggers for both
-        # the main agent and subagents.
-        if "Stop" in hooks:
-            hooks["SubagentStop"] = hooks["Stop"]
-
-        # Backward compatibility: stop_hooks is after_agent hooks
-        stop_hooks = hooks.get(
-            adapter.get_platform_hook_name(SkillLifecycleHook.AFTER_AGENT) or "Stop", []
-        )
-
-        # Build rich outputs context with doc spec information
-        outputs_context = []
-        for output in step.outputs:
-            output_ctx: dict[str, Any] = {
-                "file": output.file,
-                "has_doc_spec": output.has_doc_spec(),
-            }
-            if output.has_doc_spec() and output.doc_spec and project_root:
-                doc_spec = self._load_doc_spec(project_root, output.doc_spec)
-                if doc_spec:
-                    output_ctx["doc_spec"] = {
-                        "path": output.doc_spec,
-                        "name": doc_spec.name,
-                        "description": doc_spec.description,
-                        "target_audience": doc_spec.target_audience,
-                        "quality_criteria": [
-                            {"name": c.name, "description": c.description}
-                            for c in doc_spec.quality_criteria
-                        ],
-                        "example_document": doc_spec.example_document,
-                    }
-            outputs_context.append(output_ctx)
-
-        context = {
-            "job_name": job.name,
-            "job_version": job.version,
-            "job_summary": job.summary,
-            "job_description": job.description,
-            "step_id": step.id,
-            "step_name": step.name,
-            "step_description": step.description,
-            "step_number": step_index + 1,  # 1-based for display
-            "total_steps": len(job.steps),
-            "instructions_file": step.instructions_file,
-            "instructions_content": instructions_content,
-            "user_inputs": user_inputs,
-            "file_inputs": file_inputs,
-            "outputs": outputs_context,
-            "dependencies": step.dependencies,
-            "next_step": next_step,
-            "prev_step": prev_step,
-            "is_standalone": is_standalone,
-            "hooks": hooks,  # New: all hooks by platform event name
-            "stop_hooks": stop_hooks,  # Backward compat: after_agent hooks only
-            "quality_criteria": step.quality_criteria,  # Declarative criteria with framing
-            "agent": step.agent,  # Agent type (e.g., "general-purpose") - triggers context: fork
-        }
-
-        # Add workflow context if step is part of a workflow
-        context.update(workflow_ctx)
-
-        return context
-
-    def _build_meta_skill_context(
-        self, job: JobDefinition, adapter: AgentAdapter
-    ) -> dict[str, Any]:
-        """
-        Build template context for a job's meta-skill.
-
-        Args:
-            job: Job definition
-            adapter: Agent adapter for platform-specific configuration
-
-        Returns:
-            Template context dictionary
-        """
-        # Build step info for the meta-skill
-        steps_info = []
-        for step in job.steps:
-            skill_filename = adapter.get_step_skill_filename(job.name, step.id, step.exposed)
-            # Extract just the skill name (without path and extension)
-            # For Claude: job_name.step_id/SKILL.md -> job_name.step_id
-            # For Gemini: job_name/step_id.toml -> job_name:step_id
-            if adapter.name == "gemini":
-                # Gemini uses colon for namespacing: job_name:step_id
-                parts = skill_filename.replace(".toml", "").split("/")
-                skill_name = ":".join(parts)
-            else:
-                # Claude uses directory/SKILL.md format, extract directory name
-                # job_name.step_id/SKILL.md -> job_name.step_id
-                skill_name = skill_filename.replace("/SKILL.md", "")
-
-            # Get workflow info for step
-            workflow = job.get_workflow_for_step(step.id)
-            step_info = {
-                "id": step.id,
-                "name": step.name,
-                "description": step.description,
-                "command_name": skill_name,
-                "dependencies": step.dependencies,
-                "exposed": step.exposed,
-                "is_standalone": self._is_standalone_step(job, step),
-            }
-            if workflow:
-                step_info["workflow_name"] = workflow.name
-
-            steps_info.append(step_info)
-
-        # Build workflow info with concurrent step support
-        workflows_info = []
-        for workflow in job.workflows:
-            # Build step entries with concurrency info
-            step_entries_info = []
-            for entry in workflow.step_entries:
-                entry_info: dict[str, Any] = {
-                    "is_concurrent": entry.is_concurrent,
-                    "step_ids": entry.step_ids,
-                }
-                if entry.is_concurrent:
-                    # Add detailed step info for each concurrent step
-                    concurrent_steps = []
-                    for i, step_id in enumerate(entry.step_ids):
-                        step = job.get_step(step_id)
-                        concurrent_steps.append(
-                            {
-                                "id": step_id,
-                                "name": step.name if step else step_id,
-                                "description": step.description if step else "",
-                                "task_number": i + 1,
-                            }
-                        )
-                    entry_info["concurrent_steps"] = concurrent_steps
-                step_entries_info.append(entry_info)
-
-            workflows_info.append(
-                {
-                    "name": workflow.name,
-                    "summary": workflow.summary,
-                    "steps": workflow.steps,  # Flattened for backward compat
-                    "step_entries": step_entries_info,  # New: with concurrency info
-                    "first_step": workflow.steps[0] if workflow.steps else None,
-                }
-            )
-
-        # Identify standalone steps (not in any workflow)
-        standalone_steps = [s for s in steps_info if s["is_standalone"]]
-
-        return {
-            "job_name": job.name,
-            "job_version": job.version,
-            "job_summary": job.summary,
-            "job_description": job.description,
-            "total_steps": len(job.steps),
-            "steps": steps_info,
-            "workflows": workflows_info,
-            "standalone_steps": standalone_steps,
-            "has_workflows": bool(job.workflows),
-        }
-
-    def generate_meta_skill(
-        self,
-        job: JobDefinition,
-        adapter: AgentAdapter,
-        output_dir: Path | str,
-    ) -> Path:
-        """
-        Generate the meta-skill file for a job.
-
-        The meta-skill is the primary user interface for a job, routing
-        user intent to the appropriate step.
-
-        Args:
-            job: Job definition
-            adapter: Agent adapter for the target platform
-            output_dir: Directory to write skill file to
-
-        Returns:
-            Path to generated meta-skill file
-
-        Raises:
-            GeneratorError: If generation fails
-        """
-        output_dir = Path(output_dir)
-
-        # Create skills subdirectory if needed
-        skills_dir = output_dir / adapter.skills_dir
-        skills_dir.mkdir(parents=True, exist_ok=True)
-
-        # Build context
-        context = self._build_meta_skill_context(job, adapter)
-
-        # Load and render template
-        env = self._get_jinja_env(adapter)
-        try:
-            template = env.get_template(adapter.meta_skill_template)
-        except TemplateNotFound as e:
-            raise GeneratorError(f"Meta-skill template not found: {e}") from e
-
-        try:
-            rendered = template.render(**context)
-        except Exception as e:
-            raise GeneratorError(f"Meta-skill template rendering failed: {e}") from e
-
-        # Write meta-skill file
-        skill_filename = adapter.get_meta_skill_filename(job.name)
-        skill_path = skills_dir / skill_filename
-
-        # Ensure parent directories exist (for Gemini's job_name/index.toml structure)
-        skill_path.parent.mkdir(parents=True, exist_ok=True)
-
-        try:
-            safe_write(skill_path, rendered)
-        except Exception as e:
-            raise GeneratorError(f"Failed to write meta-skill file: {e}") from e
-
-        return skill_path
-
-    def generate_step_skill(
-        self,
-        job: JobDefinition,
-        step: Step,
-        adapter: AgentAdapter,
-        output_dir: Path | str,
-        project_root: Path | str | None = None,
-    ) -> Path:
-        """
-        Generate skill file for a single step.
-
-        Args:
-            job: Job definition
-            step: Step to generate skill for
-            adapter: Agent adapter for the target platform
-            output_dir: Directory to write skill file to
-            project_root: Optional project root for loading doc specs (defaults to output_dir)
-
-        Returns:
-            Path to generated skill file
-
-        Raises:
-            GeneratorError: If generation fails
-        """
-        output_dir = Path(output_dir)
-        project_root_path = Path(project_root) if project_root else output_dir
-
-        # Create skills subdirectory if needed
-        skills_dir = output_dir / adapter.skills_dir
-        skills_dir.mkdir(parents=True, exist_ok=True)
-
-        # Find step index
-        try:
-            step_index = next(i for i, s in enumerate(job.steps) if s.id == step.id)
-        except StopIteration as e:
-            raise GeneratorError(f"Step '{step.id}' not found in job '{job.name}'") from e
-
-        # Build context (include exposed for template user-invocable setting)
-        context = self._build_step_context(job, step, step_index, adapter, project_root_path)
-        context["exposed"] = step.exposed
-
-        # Load and render template
-        env = self._get_jinja_env(adapter)
-        try:
-            template = env.get_template(adapter.skill_template)
-        except TemplateNotFound as e:
-            raise GeneratorError(f"Template not found: {e}") from e
-
-        try:
-            rendered = template.render(**context)
-        except Exception as e:
-            raise GeneratorError(f"Template rendering failed: {e}") from e
-
-        # Write skill file
-        skill_filename = adapter.get_step_skill_filename(job.name, step.id, step.exposed)
-        skill_path = skills_dir / skill_filename
-
-        # Ensure parent directories exist (for Gemini's job_name/step_id.toml structure)
-        skill_path.parent.mkdir(parents=True, exist_ok=True)
-
-        try:
-            safe_write(skill_path, rendered)
-        except Exception as e:
-            raise GeneratorError(f"Failed to write skill file: {e}") from e
-
-        return skill_path
-
-    def generate_all_skills(
-        self,
-        job: JobDefinition,
-        adapter: AgentAdapter,
-        output_dir: Path | str,
-        project_root: Path | str | None = None,
-    ) -> list[Path]:
-        """
-        Generate all skill files for a job: meta-skill and step skills.
-
-        Args:
-            job: Job definition
-            adapter: Agent adapter for the target platform
-            output_dir: Directory to write skill files to
-            project_root: Optional project root for loading doc specs (defaults to output_dir)
-
-        Returns:
-            List of paths to generated skill files (meta-skill first, then steps)
-
-        Raises:
-            GeneratorError: If generation fails
-        """
-        skill_paths = []
-        project_root_path = Path(project_root) if project_root else Path(output_dir)
-
-        # Generate meta-skill first (job-level entry point)
-        meta_skill_path = self.generate_meta_skill(job, adapter, output_dir)
-        skill_paths.append(meta_skill_path)
-
-        # Generate step skills
-        for step in job.steps:
-            skill_path = self.generate_step_skill(job, step, adapter, output_dir, project_root_path)
-            skill_paths.append(skill_path)
-
-        return skill_paths
-
     def generate_deepwork_skill(
         self,
         adapter: AgentAdapter,
diff --git a/src/deepwork/core/parser.py b/src/deepwork/core/parser.py
index 480ab6d4..2685994c 100644
--- a/src/deepwork/core/parser.py
+++ b/src/deepwork/core/parser.py
@@ -1,5 +1,6 @@
 """Job definition parser."""
 
+import logging
 from dataclasses import dataclass, field
 from pathlib import Path
 from typing import Any
@@ -8,6 +9,8 @@
 from deepwork.utils.validation import ValidationError, validate_against_schema
 from deepwork.utils.yaml_utils import YAMLError, load_yaml
 
+logger = logging.getLogger("deepwork.parser")
+
 
 class ParseError(Exception):
     """Exception raised for job parsing errors."""
@@ -543,6 +546,33 @@ def validate_workflows(self) -> None:
                     )
                 seen_steps.add(step_id)
 
+    def warn_orphaned_steps(self) -> list[str]:
+        """
+        Check for steps not included in any workflow and emit warnings.
+
+        Returns:
+            List of orphaned step IDs
+        """
+        # Collect all step IDs referenced in workflows
+        workflow_step_ids: set[str] = set()
+        for workflow in self.workflows:
+            workflow_step_ids.update(workflow.steps)
+
+        # Find orphaned steps
+        orphaned_steps = [
+            step.id for step in self.steps if step.id not in workflow_step_ids
+        ]
+
+        if orphaned_steps:
+            logger.warning(
+                "Job '%s' has steps not included in any workflow: %s. "
+                "These steps are not accessible via the MCP interface.",
+                self.name,
+                ", ".join(orphaned_steps),
+            )
+
+        return orphaned_steps
+
     @classmethod
     def from_dict(cls, data: dict[str, Any], job_dir: Path) -> "JobDefinition":
         """
@@ -615,4 +645,7 @@ def parse_job_definition(job_dir: Path | str) -> JobDefinition:
     job_def.validate_file_inputs()
     job_def.validate_workflows()
 
+    # Warn about orphaned steps (not in any workflow)
+    job_def.warn_orphaned_steps()
+
     return job_def
diff --git a/src/deepwork/mcp/schemas.py b/src/deepwork/mcp/schemas.py
index 66188927..74f6eccf 100644
--- a/src/deepwork/mcp/schemas.py
+++ b/src/deepwork/mcp/schemas.py
@@ -70,9 +70,6 @@ class JobInfo(BaseModel):
     summary: str = Field(description="Short summary of the job")
     description: str | None = Field(default=None, description="Full description")
     workflows: list[WorkflowInfo] = Field(default_factory=list)
-    standalone_steps: list[StepInfo] = Field(
-        default_factory=list, description="Steps not in any workflow"
-    )
 
 
 # =============================================================================
diff --git a/src/deepwork/mcp/tools.py b/src/deepwork/mcp/tools.py
index 8f2f46e8..ae3c8012 100644
--- a/src/deepwork/mcp/tools.py
+++ b/src/deepwork/mcp/tools.py
@@ -22,7 +22,6 @@
     JobInfo,
     StartWorkflowInput,
     StartWorkflowResponse,
-    StepInfo,
     StepStatus,
     WorkflowInfo,
 )
@@ -91,32 +90,15 @@ def _job_to_info(self, job: JobDefinition) -> JobInfo:
             job: Parsed job definition
 
         Returns:
-            JobInfo with workflow and step details
+            JobInfo with workflow details
         """
         # Convert workflows
-        workflows = []
-        workflow_step_ids: set[str] = set()
-
-        for wf in job.workflows:
-            workflow_step_ids.update(wf.steps)
-
-            workflows.append(
-                WorkflowInfo(
-                    name=wf.name,
-                    summary=wf.summary,
-                )
-            )
-
-        # Find standalone steps (not in any workflow)
-        standalone_steps = [
-            StepInfo(
-                id=step.id,
-                name=step.name,
-                description=step.description,
-                dependencies=step.dependencies,
+        workflows = [
+            WorkflowInfo(
+                name=wf.name,
+                summary=wf.summary,
             )
-            for step in job.steps
-            if step.id not in workflow_step_ids
+            for wf in job.workflows
         ]
 
         return JobInfo(
@@ -124,7 +106,6 @@ def _job_to_info(self, job: JobDefinition) -> JobInfo:
             summary=job.summary,
             description=job.description,
             workflows=workflows,
-            standalone_steps=standalone_steps,
         )
 
     def _get_job(self, job_name: str) -> JobDefinition:
diff --git a/src/deepwork/standard_jobs/deepwork_jobs/job.yml b/src/deepwork/standard_jobs/deepwork_jobs/job.yml
index 4b58cb47..5acfd3d0 100644
--- a/src/deepwork/standard_jobs/deepwork_jobs/job.yml
+++ b/src/deepwork/standard_jobs/deepwork_jobs/job.yml
@@ -1,14 +1,16 @@
 # yaml-language-server: $schema=.deepwork/schemas/job.schema.json
 name: deepwork_jobs
-version: "1.0.0"
-summary: "Creates and manages multi-step AI workflows. Use when defining, implementing, or improving DeepWork jobs."
+version: "1.2.0"
+summary: "Creates and manages multi-step AI workflows. Use when defining, implementing, testing, or improving DeepWork jobs."
 description: |
   Core commands for managing DeepWork jobs. These commands help you define new multi-step
-  workflows and learn from running them.
+  workflows, test them on real use cases, and learn from running them.
 
-  The `new_job` workflow guides you through defining and implementing a new job by
-  asking structured questions about your workflow, understanding each step's inputs and outputs,
-  reviewing the specification, and generating all necessary files.
+  The `new_job` workflow guides you through the full lifecycle of creating a new job:
+  1. **Define**: Gather requirements through structured questions and create job.yml
+  2. **Implement**: Generate step instruction files and sync slash commands
+  3. **Test**: Run the workflow on a real use case, critique output, and iterate with user
+  4. **Iterate**: Review what happened and improve the job definition based on learnings
 
   The `learn` skill reflects on conversations where DeepWork jobs were run, identifies
   confusion or inefficiencies, and improves job instructions. It also captures bespoke
@@ -16,13 +18,26 @@ description: |
 
 workflows:
   - name: new_job
-    summary: "Create a new DeepWork job from scratch through definition and implementation"
+    summary: "Create a new DeepWork job from scratch through definition, implementation, testing, and iteration"
     steps:
       - define
       - implement
+      - test
+      - iterate
+
+  - name: repair
+    summary: "Clean up and migrate DeepWork configurations from prior versions"
+    steps:
+      - fix_settings
+      - fix_jobs
+      - errata
 
 changelog:
+  - version: "1.2.0"
+    changes: "Added repair workflow with fix_settings, fix_jobs, and errata steps for migrating old DeepWork configurations to current format"
   - version: "1.1.0"
+    changes: "Added test and iterate steps to new_job workflow; test runs the workflow on a real use case and gathers feedback; iterate improves the job definition based on what happened"
+  - version: "1.0.1"
     changes: "Removed review_job_spec step from new_job workflow; implement now follows directly from define"
   - version: "1.0.0"
     changes: "Added workflows section to distinguish new_job workflow (define→review_job_spec→implement) from standalone learn skill"
@@ -79,6 +94,52 @@ steps:
       - "**Commands Available**: Are the slash-commands generated in `.claude/commands/`?"
       - "**Rules Considered**: Has the agent thought about whether rules would benefit this job? If relevant rules were identified, did they explain them and offer to run `/deepwork_rules.define`? Not every job needs rules - only suggest when genuinely helpful."
 
+  - id: test
+    name: "Test the New Workflow"
+    description: "Tests the newly created workflow by running it on a real use case, critiquing the output, and iterating until the user is satisfied. Use after implementing a job."
+    instructions_file: steps/test.md
+    inputs:
+      - file: job.yml
+        from_step: define
+      - file: steps/
+        from_step: implement
+    outputs:
+      - test_feedback.md
+    dependencies:
+      - define
+      - implement
+    quality_criteria:
+      - "**User Informed**: Did the agent explain the workflow is ready and ask what to test it on?"
+      - "**Workflow Invoked**: Was the new workflow actually run on the user's test case?"
+      - "**Output Critiqued**: Did the agent identify up to 3 top issues with the output?"
+      - "**User Feedback Gathered**: Did the agent ask the user about each issue and gather additional feedback?"
+      - "**Corrections Made**: Were all requested corrections applied to the output?"
+      - "**User Satisfied**: Did the user confirm the output meets their needs?"
+
+  - id: iterate
+    name: "Iterate on Workflow Design"
+    description: "Reviews the test run conversation and improves the job definition based on what happened. Use after testing a newly created job."
+    instructions_file: steps/iterate.md
+    inputs:
+      - file: job.yml
+        from_step: define
+      - file: steps/
+        from_step: implement
+    outputs:
+      - job.yml
+      - steps/
+    dependencies:
+      - define
+      - implement
+      - test
+    quality_criteria:
+      - "**Conversation Reviewed**: Did the agent analyze the test run for inefficiencies and issues?"
+      - "**Instructions Improved**: Were step instructions updated to address identified problems?"
+      - "**Quality Criteria Updated**: Were quality criteria adjusted to better match user expectations?"
+      - "**Tool Usage Considered**: Did the agent consider if different tools would improve the workflow?"
+      - "**Sync Complete**: Has `deepwork sync` been run to apply changes?"
+      - "**Recap Provided**: Did the agent summarize what was improved and why?"
+
   - id: learn
     name: "Learn from Job Execution"
     description: "Analyzes conversation history to improve job instructions and capture learnings. Use after running a job to refine it."
@@ -103,3 +164,61 @@ steps:
       - "**Working Folder Correct**: Is AGENTS.md in the correct working folder for the job?"
       - "**Generalizable Separated**: Are generalizable improvements in instructions, not AGENTS.md?"
       - "**Sync Complete**: Has `deepwork sync` been run if instructions were modified?"
+
+  - id: fix_settings
+    name: "Fix Settings Files"
+    description: "Cleans up .claude/settings.json and related configuration files, removing legacy permissions, duplicate hooks, and hardcoded paths from prior DeepWork versions."
+    instructions_file: steps/fix_settings.md
+    inputs: []
+    outputs:
+      - .claude/settings.json
+    dependencies: []
+    quality_criteria:
+      - "**DeepWork Skills Removed**: Are `Skill(...)` entries matching jobs in `.deepwork/jobs/` removed?"
+      - "**Non-DeepWork Skills Preserved**: Are skills NOT matching DeepWork jobs left intact?"
+      - "**Rules Hooks Removed**: Are all DeepWork Rules hooks and permissions removed?"
+      - "**Duplicate Hooks Removed**: Are duplicate hook entries consolidated or removed?"
+      - "**Hardcoded Paths Removed**: Are user-specific hardcoded paths (like `/Users/*/...`) removed?"
+      - "**Deprecated Commands Removed**: Are deprecated commands like `deepwork hook *` removed?"
+      - "**Valid JSON**: Is settings.json still valid JSON after modifications?"
+      - "**Backup Created**: Was a backup of the original settings created before modifications?"
+
+  - id: fix_jobs
+    name: "Fix Job Definitions"
+    description: "Updates job.yml files and step instructions to current DeepWork format, removing deprecated fields and migrating to new structures."
+    instructions_file: steps/fix_jobs.md
+    inputs:
+      - file: .claude/settings.json
+        from_step: fix_settings
+    outputs:
+      - .deepwork/jobs/
+    dependencies:
+      - fix_settings
+    quality_criteria:
+      - "**Exposed Field Addressed**: Are `exposed: true` fields removed or noted as deprecated?"
+      - "**Stop Hooks Migrated**: Are `stop_hooks` migrated to `hooks.after_agent` format?"
+      - "**Removed Steps Cleaned**: Are references to removed steps (like `review_job_spec`) updated?"
+      - "**Orphaned Steps Fixed**: Are steps not in any workflow either added to workflows or removed?"
+      - "**Valid YAML**: Do all job.yml files pass schema validation?"
+      - "**Sync Complete**: Has `deepwork sync` been run to regenerate commands?"
+
+  - id: errata
+    name: "Clean Up Errata"
+    description: "Removes obsolete files and folders from prior DeepWork versions, including old skill directories, temp files, and deprecated configurations."
+    instructions_file: steps/errata.md
+    inputs:
+      - file: .deepwork/jobs/
+        from_step: fix_jobs
+    outputs:
+      - repair_summary.md
+    dependencies:
+      - fix_settings
+      - fix_jobs
+    quality_criteria:
+      - "**Old Skills Folder Handled**: Is `.claude/skills/` folder removed or backed up?"
+      - "**Temp Files Cleaned**: Are `.deepwork/tmp/` contents cleaned appropriately?"
+      - "**Rules Folder Removed**: Is `.deepwork/rules/` folder backed up and removed (fully deprecated)?"
+      - "**Rules Job Removed**: Is `.deepwork/jobs/deepwork_rules/` removed if present?"
+      - "**Config Version Updated**: Is `.deepwork/config.yml` using current version format?"
+      - "**Summary Provided**: Is a repair_summary.md file created documenting all changes made?"
+      - "**Git Status Clean**: Are changes ready to be committed (no untracked garbage files)?"
diff --git a/src/deepwork/standard_jobs/deepwork_jobs/steps/errata.md b/src/deepwork/standard_jobs/deepwork_jobs/steps/errata.md
new file mode 100644
index 00000000..30ee7e8a
--- /dev/null
+++ b/src/deepwork/standard_jobs/deepwork_jobs/steps/errata.md
@@ -0,0 +1,247 @@
+# Clean Up Errata
+
+## Objective
+
+Remove obsolete files and folders from prior DeepWork versions. This final step cleans up artifacts that are no longer used by the MCP-based system, creating a summary of all changes made during the repair workflow.
+
+## Task
+
+Identify and clean up deprecated files and folders, then create a comprehensive summary document.
+
+### Step 1: Handle Old Skills Folder
+
+Check if `.claude/skills/` exists. This folder was used by the old skill-based system and is no longer needed.
+
+```bash
+ls -la .claude/skills/ 2>/dev/null || echo "No skills folder (good!)"
+```
+
+**If it exists:**
+1. Count the contents: `ls .claude/skills/ | wc -l`
+2. Ask the user whether to:
+   - **Delete** the folder entirely (recommended if migrated to MCP)
+   - **Back up** to `.claude/skills.backup/` before deleting
+   - **Keep** if they have custom skills not yet migrated
+
+**Old skill structure to recognize:**
+```
+.claude/skills/
+├── job_name/
+│   └── SKILL.md
+├── job_name.step_name/
+│   └── SKILL.md
+└── ...
+```
+
+### Step 2: Clean Temp Files
+
+Check `.deepwork/tmp/` for accumulated temporary files:
+
+```bash
+ls -la .deepwork/tmp/ 2>/dev/null || echo "No tmp folder"
+```
+
+**Safe to delete:**
+- `.deepwork/tmp/rules/queue/*.json` - Old rules queue files
+- Any files older than 7 days
+- Empty subdirectories
+
+**Be careful with:**
+- Files that might be in-progress work
+- Anything with recent modification times
+
+```bash
+# Clean old queue files
+rm -rf .deepwork/tmp/rules/queue/*.json 2>/dev/null
+
+# Remove empty directories
+find .deepwork/tmp -type d -empty -delete 2>/dev/null
+```
+
+### Step 3: Remove Rules Folder (Fully Deprecated)
+
+DeepWork Rules have been completely removed from the system. The `.deepwork/rules/` folder should be deleted.
+
+```bash
+ls -la .deepwork/rules/ 2>/dev/null || echo "No rules folder (good!)"
+```
+
+**If the folder exists:**
+
+1. **Back up the folder** (in case user wants to reference old rules):
+   ```bash
+   mv .deepwork/rules/ .deepwork/rules.backup/
+   ```
+
+2. **Inform the user** that DeepWork Rules are deprecated and the folder has been backed up
+
+3. **After user confirms** the backup is acceptable, the backup can be deleted later
+
+**Also remove these related items if present:**
+- `.deepwork/tmp/rules/` folder and all contents
+- `.deepwork/jobs/deepwork_rules/` folder (the old rules job)
+- Any `deepwork_rules` job that may have been installed
+
+```bash
+rm -rf .deepwork/tmp/rules/ 2>/dev/null
+rm -rf .deepwork/jobs/deepwork_rules/ 2>/dev/null
+```
+
+### Step 4: Update Config Version
+
+Check `.deepwork/config.yml` for outdated version format:
+
+```bash
+cat .deepwork/config.yml
+```
+
+**Old format:**
+```yaml
+version: 1.0.0
+platforms:
+- claude
+```
+
+**Current format:**
+```yaml
+version: "1.0"
+platforms:
+  - claude
+```
+
+Update if needed to match current schema expectations.
+
+### Step 5: Remove Other Obsolete Files
+
+Check for and remove other obsolete files:
+
+| File/Pattern | Description | Action |
+|--------------|-------------|--------|
+| `.deepwork/.last_head_ref` | Git state tracking | Keep (used by MCP) |
+| `.deepwork/.last_work_tree` | Git state tracking | Keep (used by MCP) |
+| `.deepwork/.gitignore` | Ignore patterns | Review and update |
+| `.claude/commands/` | Generated commands | Keep (current system) |
+| `.claude/settings.local.json` | Local overrides | Keep (user settings) |
+
+### Step 6: Verify Git Status
+
+Check that the cleanup hasn't left untracked garbage:
+
+```bash
+git status
+```
+
+**Review:**
+- Deleted files should show as deleted
+- No new untracked files should appear (unless intentionally created)
+- Backup files (`.backup`) should be in `.gitignore` or cleaned up
+
+### Step 7: Create Repair Summary
+
+Create a `repair_summary.md` file documenting all changes made during this workflow:
+
+```markdown
+# DeepWork Repair Summary
+
+**Date:** [current date]
+**Project:** [project name]
+
+## Settings Fixes (fix_settings step)
+
+- [ ] Removed X `Skill(...)` permission entries
+- [ ] Consolidated Y duplicate hooks
+- [ ] Removed Z hardcoded paths
+- [ ] Removed deprecated `deepwork hook` commands
+
+## Job Fixes (fix_jobs step)
+
+### [job_name]
+- [ ] Removed `exposed` field from steps: [list]
+- [ ] Migrated `stop_hooks` to `hooks.after_agent`
+- [ ] Updated workflow to remove `review_job_spec`
+- [ ] Version bumped to X.Y.Z
+
+### [another_job]
+- [ ] ...
+
+## Errata Cleanup (errata step)
+
+- [ ] Handled `.claude/skills/` folder: [deleted/backed up/kept]
+- [ ] Cleaned `.deepwork/tmp/`: removed X files
+- [ ] Reviewed `.deepwork/rules/`: [action taken]
+- [ ] Updated `.deepwork/config.yml` version format
+
+## Files Changed
+
+```
+[list of all files modified/deleted]
+```
+
+## Recommended Next Steps
+
+1. Review changes with `git diff`
+2. Test that `deepwork sync` runs without errors
+3. Commit changes with message: "chore: migrate to DeepWork MCP format"
+4. Delete backup files after confirming everything works
+```
+
+## Quality Criteria
+
+- `.claude/skills/` folder is handled (removed, backed up, or documented why kept)
+- `.deepwork/tmp/` contents are cleaned appropriately
+- `.deepwork/rules/` folder is backed up and removed (DeepWork Rules fully deprecated)
+- `.deepwork/tmp/rules/` folder is removed
+- `.deepwork/jobs/deepwork_rules/` folder is removed if present
+- `.deepwork/config.yml` uses current version format
+- A `repair_summary.md` file is created documenting all changes
+- Git status shows clean changes ready to commit
+- When all criteria are met, include `<promise>Quality Criteria Met</promise>` in your response
+
+## Example Summary Output
+
+```markdown
+# DeepWork Repair Summary
+
+**Date:** 2024-02-04
+**Project:** internal-agentspace
+
+## Settings Fixes
+
+- Removed 87 `Skill(...)` permission entries
+- Consolidated 2 duplicate `UserPromptSubmit` hooks into 1
+- Removed hardcoded path: `/Users/tyler/.local/pipx/venvs/deepwork/bin/python`
+- Removed 3 deprecated `deepwork hook rules_check` commands
+
+## Job Fixes
+
+### deepwork_jobs
+- Updated from old version (workflow includes `review_job_spec`)
+- Reinstalled with `deepwork install --platform claude`
+
+### competitive_research
+- Removed `exposed: true` from `discover_competitors` step
+- Migrated 1 `stop_hooks` to `hooks.after_agent`
+- Version bumped to 1.0.1
+
+## Errata Cleanup
+
+- Backed up `.claude/skills/` to `.claude/skills.backup/` (174 files)
+- Deleted `.claude/skills/` folder
+- Cleaned `.deepwork/tmp/rules/queue/` (12 old JSON files)
+- Kept `.deepwork/rules/` (contains active example rules)
+- Updated `.deepwork/config.yml` version to "1.0"
+
+## Recommended Next Steps
+
+1. `git add -A && git diff --staged`
+2. `deepwork sync` (verify no errors)
+3. `git commit -m "chore: migrate to DeepWork MCP format"`
+4. After testing: `rm -rf .claude/skills.backup/`
+```
+
+## Important Notes
+
+1. **Always back up before deleting** - User data is irreplaceable
+2. **Ask before destructive actions** - When in doubt, ask the user
+3. **Document everything** - The summary is valuable for understanding what changed
+4. **Don't auto-commit** - Let the user review and commit changes themselves
diff --git a/src/deepwork/standard_jobs/deepwork_jobs/steps/fix_jobs.md b/src/deepwork/standard_jobs/deepwork_jobs/steps/fix_jobs.md
new file mode 100644
index 00000000..cd6f835b
--- /dev/null
+++ b/src/deepwork/standard_jobs/deepwork_jobs/steps/fix_jobs.md
@@ -0,0 +1,195 @@
+# Fix Job Definitions
+
+## Objective
+
+Update all job.yml files and step instructions in `.deepwork/jobs/` to the current DeepWork format. This step migrates deprecated fields, removes references to deleted steps, and ensures all jobs are compatible with the MCP-based workflow system.
+
+## Task
+
+Audit and repair all job definitions, migrating from legacy formats to current specifications.
+
+### Step 1: Inventory All Jobs
+
+List all jobs in the project:
+
+```bash
+ls -la .deepwork/jobs/
+```
+
+For each job directory, you'll need to check and potentially fix the `job.yml` file.
+
+### Step 2: Remove `exposed` Field
+
+The `exposed` field on steps no longer has any effect in MCP-based DeepWork. Steps are now only accessible through workflows.
+
+**Find and remove:**
+```yaml
+steps:
+  - id: some_step
+    exposed: true  # REMOVE THIS LINE
+```
+
+If a step was `exposed: true` and is not in any workflow, it should either:
+1. Be added to a workflow, OR
+2. Be removed from the job entirely
+
+### Step 3: Migrate `stop_hooks` to `hooks.after_agent`
+
+The `stop_hooks` field is deprecated. Migrate to the new `hooks` structure:
+
+**Before (deprecated):**
+```yaml
+steps:
+  - id: my_step
+    stop_hooks:
+      - prompt: "Verify the output meets quality standards"
+```
+
+**After (current format):**
+```yaml
+steps:
+  - id: my_step
+    hooks:
+      after_agent:
+        - prompt: "Verify the output meets quality standards"
+```
+
+### Step 4: Remove References to Deleted Steps
+
+Check for references to steps that no longer exist in the standard jobs:
+
+**Steps that have been removed:**
+- `review_job_spec` - Was removed from `deepwork_jobs` in v1.0.1
+
+**What to fix:**
+- Remove from workflow `steps` arrays
+- Update `from_step` references in inputs
+- Update `dependencies` arrays
+
+**Example fix:**
+```yaml
+# Before
+workflows:
+  - name: new_job
+    steps:
+      - define
+      - review_job_spec  # REMOVE
+      - implement
+
+steps:
+  - id: implement
+    inputs:
+      - file: job.yml
+        from_step: review_job_spec  # CHANGE TO: define
+    dependencies:
+      - review_job_spec  # CHANGE TO: define
+```
+
+### Step 5: Fix Orphaned Steps
+
+Steps not included in any workflow cannot be invoked via the MCP interface. The parser will emit warnings for these.
+
+Run the following to see warnings:
+```bash
+deepwork sync 2>&1 | grep -i "warning"
+```
+
+**For each orphaned step, ask the user which action to take:**
+
+1. **Add to a workflow** - Create a new single-step workflow for it:
+   ```yaml
+   workflows:
+     - name: standalone_step_name
+       summary: "Runs the step_name step"
+       steps:
+         - step_name
+   ```
+
+2. **Remove the step entirely** - Delete the step from `steps:` array and its instruction file
+
+3. **Keep as-is (deprecated)** - The step will remain inaccessible but preserved in the job definition
+
+**Do not automatically decide** - Always confirm with the user which option they prefer for each orphaned step.
+
+### Step 6: Validate Against Schema
+
+After making changes, validate each job.yml:
+
+```bash
+deepwork sync
+```
+
+Fix any schema validation errors that appear.
+
+### Step 7: Update Version Numbers
+
+If you made significant changes to a job, bump its version number:
+
+```yaml
+# Bump patch version for minor fixes
+version: "1.0.0"  ->  version: "1.0.1"
+
+# Add changelog entry
+changelog:
+  - version: "1.0.1"
+    changes: "Migrated to current DeepWork format; removed deprecated fields"
+```
+
+### Step 8: Run Sync
+
+After all fixes, regenerate commands:
+
+```bash
+deepwork sync
+```
+
+Verify no errors or warnings appear.
+
+## Quality Criteria
+
+- All `exposed: true` fields are removed or noted
+- All `stop_hooks` are migrated to `hooks.after_agent` format
+- References to removed steps (like `review_job_spec`) are updated
+- Orphaned steps are either added to workflows or removed
+- All job.yml files pass schema validation
+- `deepwork sync` runs without errors
+- When all criteria are met, include `<promise>Quality Criteria Met</promise>` in your response
+
+## Common Issues and Fixes
+
+### Issue: Step references non-existent step in `from_step`
+```
+Error: Step 'implement' has file input from 'review_job_spec' but 'review_job_spec' is not in dependencies
+```
+**Fix:** Update `from_step` to reference a step that still exists.
+
+### Issue: Workflow references non-existent step
+```
+Error: Workflow 'new_job' references non-existent step 'review_job_spec'
+```
+**Fix:** Remove the step from the workflow's `steps` array.
+
+### Issue: Orphaned step warning
+```
+Warning: Job 'my_job' has steps not included in any workflow: standalone_step
+```
+**Fix:** Either add the step to a workflow or remove it from the job.
+
+## Jobs to Check
+
+For each job in `.deepwork/jobs/`, check:
+
+| Check | What to Look For |
+|-------|------------------|
+| `exposed` field | Remove from all steps |
+| `stop_hooks` | Migrate to `hooks.after_agent` |
+| Workflow steps | Remove references to deleted steps |
+| Dependencies | Update to valid step IDs |
+| File inputs | Update `from_step` references |
+| Version | Bump if changes were made |
+
+## Important Notes
+
+1. **Don't modify standard jobs directly** - If `deepwork_jobs` is out of date, run `deepwork install --platform claude` to get the latest version
+2. **Preserve custom logic** - When migrating hooks, preserve the prompt content
+3. **Test after changes** - Run `deepwork sync` after each job fix to catch errors early
diff --git a/src/deepwork/standard_jobs/deepwork_jobs/steps/fix_settings.md b/src/deepwork/standard_jobs/deepwork_jobs/steps/fix_settings.md
new file mode 100644
index 00000000..0c046cd9
--- /dev/null
+++ b/src/deepwork/standard_jobs/deepwork_jobs/steps/fix_settings.md
@@ -0,0 +1,188 @@
+# Fix Settings Files
+
+## Objective
+
+Clean up `.claude/settings.json` and related configuration files, removing legacy artifacts from prior DeepWork versions. This step ensures the Claude Code settings are free of deprecated permissions, duplicate hooks, and hardcoded paths.
+
+## Task
+
+Audit and repair the `.claude/settings.json` file, removing gunk accumulated from older DeepWork implementations.
+
+### Step 1: Create Backup
+
+Before making any changes, create a backup:
+
+```bash
+cp .claude/settings.json .claude/settings.json.backup
+```
+
+### Step 2: Inventory DeepWork Jobs
+
+First, get the list of jobs that exist in `.deepwork/jobs/`:
+
+```bash
+ls .deepwork/jobs/
+```
+
+Note these job names - you will use them to identify which `Skill(...)` entries to remove.
+
+### Step 3: Remove DeepWork Skill Permissions
+
+Look for and **remove** `Skill(...)` permission entries that match DeepWork jobs. Only remove entries where the skill name matches a job in `.deepwork/jobs/`.
+
+**What to look for:**
+```json
+"permissions": {
+  "allow": [
+    "Skill(deepwork_jobs)",           // Remove if 'deepwork_jobs' is in .deepwork/jobs/
+    "Skill(deepwork_jobs.define)",    // Remove - matches job_name.step pattern
+    "Skill(competitive_research)",    // Remove if 'competitive_research' is in .deepwork/jobs/
+    "Skill(my_custom_skill)",         // KEEP - not a DeepWork job
+    ...
+  ]
+}
+```
+
+**IMPORTANT:** Only remove skills that:
+- Exactly match a job name in `.deepwork/jobs/` (e.g., `Skill(job_name)`)
+- Match the pattern `job_name.step_name` where `job_name` is in `.deepwork/jobs/`
+
+**DO NOT remove** skills that don't match DeepWork jobs - the user may have created these manually for other purposes.
+
+### Step 4: Remove Duplicate Hooks
+
+Check for duplicate hook entries in the `hooks` section. Prior versions sometimes added the same hook multiple times.
+
+**Example of duplicates to consolidate:**
+```json
+"hooks": {
+  "UserPromptSubmit": [
+    {
+      "matcher": "",
+      "hooks": [{ "type": "command", "command": "some_command" }]
+    },
+    {
+      "matcher": "",
+      "hooks": [{ "type": "command", "command": "some_command" }]  // DUPLICATE
+    }
+  ]
+}
+```
+
+Keep only one instance of each unique hook.
+
+### Step 5: Remove Hardcoded User Paths
+
+Search for and remove any hardcoded paths that reference specific user directories:
+
+**Patterns to find and remove:**
+- `/Users/username/.local/pipx/venvs/deepwork/bin/python`
+- `/home/username/.local/...`
+- Any path containing a specific username
+
+These should either be removed or replaced with relative paths.
+
+### Step 6: Remove DeepWork Rules Hooks (Fully Deprecated)
+
+DeepWork Rules have been completely removed from the system. Remove ALL hooks related to rules:
+
+**Hooks to remove entirely:**
+- Any hook with command `deepwork hook rules_check`
+- Any hook with command containing `rules_check`
+- Any hook referencing `.deepwork/jobs/deepwork_rules/hooks/`
+- Any hook referencing `.deepwork/rules/`
+
+**Also remove these permissions if present:**
+- `Skill(deepwork_rules)`
+- `Skill(deepwork_rules.define)`
+- `Bash(rm -rf .deepwork/tmp/rules/queue/*.json)`
+
+### Step 7: Remove Other Deprecated Commands
+
+Remove hooks referencing other deprecated DeepWork commands:
+
+**Commands to remove:**
+- `deepwork hook *` - The entire hook subcommand is deprecated
+- References to any `.deepwork/jobs/*/hooks/` scripts
+
+### Step 8: Clean Up Empty Sections
+
+If after cleanup any sections are empty, consider removing them:
+
+```json
+// Remove if empty:
+"hooks": {
+  "Stop": []  // Remove this empty array
+}
+```
+
+### Step 9: Validate JSON
+
+After all edits, ensure the file is valid JSON:
+
+```bash
+python -c "import json; json.load(open('.claude/settings.json'))"
+```
+
+If there are syntax errors, fix them before proceeding.
+
+## Quality Criteria
+
+- DeepWork job `Skill(...)` permissions are removed (only those matching `.deepwork/jobs/`)
+- Non-DeepWork skills are preserved (skills not matching any job in `.deepwork/jobs/`)
+- All DeepWork Rules hooks and permissions are removed
+- Duplicate hook entries are consolidated
+- Hardcoded user-specific paths are removed
+- Deprecated `deepwork hook` commands are removed
+- The settings.json file is valid JSON
+- A backup was created before modifications
+- When all criteria are met, include `<promise>Quality Criteria Met</promise>` in your response
+
+## Example Before/After
+
+### Before (with gunk):
+```json
+{
+  "hooks": {
+    "UserPromptSubmit": [
+      { "matcher": "", "hooks": [{ "type": "command", "command": ".deepwork/jobs/deepwork_rules/hooks/user_prompt_submit.sh" }] },
+      { "matcher": "", "hooks": [{ "type": "command", "command": ".deepwork/jobs/deepwork_rules/hooks/user_prompt_submit.sh" }] }
+    ],
+    "Stop": [
+      { "matcher": "", "hooks": [{ "type": "command", "command": "deepwork hook rules_check" }] }
+    ],
+    "SubagentStop": [
+      { "matcher": "", "hooks": [{ "type": "command", "command": "/Users/tyler/.local/pipx/venvs/deepwork/bin/python -m deepwork.hooks.rules_check" }] }
+    ]
+  },
+  "permissions": {
+    "allow": [
+      "Skill(competitive_research)",
+      "Skill(competitive_research.discover_competitors)",
+      "Skill(deepwork_jobs)",
+      "Skill(deepwork_jobs.define)",
+      "Read(./.deepwork/**)",
+      "WebSearch"
+    ]
+  }
+}
+```
+
+### After (cleaned):
+```json
+{
+  "hooks": {},
+  "permissions": {
+    "allow": [
+      "Read(./.deepwork/**)",
+      "WebSearch"
+    ]
+  }
+}
+```
+
+## Important Notes
+
+1. **Don't remove non-DeepWork permissions** - Keep permissions like `WebSearch`, `Read(...)`, `Bash(...)` that aren't related to old DeepWork skills
+2. **Be conservative** - If unsure whether something is legacy, ask the user
+3. **Document changes** - Note what was removed for the final summary
diff --git a/src/deepwork/standard_jobs/deepwork_jobs/steps/iterate.md b/src/deepwork/standard_jobs/deepwork_jobs/steps/iterate.md
new file mode 100644
index 00000000..78f8ddf3
--- /dev/null
+++ b/src/deepwork/standard_jobs/deepwork_jobs/steps/iterate.md
@@ -0,0 +1,243 @@
+# Iterate on Workflow Design
+
+## Objective
+
+Review the test run conversation and improve the job definition based on what happened. This step closes the feedback loop by incorporating learnings from the test into the workflow itself, making future runs more efficient and producing better results.
+
+## Task
+
+Analyze the conversation history from the test step, identify areas for improvement, and update the job definition and step instructions accordingly.
+
+### Step 1: Review the Conversation History
+
+Carefully analyze the conversation from the test step, looking for:
+
+1. **Process Inefficiencies**
+   - Steps that took multiple attempts to complete
+   - Questions the agent had to ask that should have been in the instructions
+   - Unnecessary back-and-forth with the user
+   - Information that had to be repeated
+
+2. **Output Quality Issues**
+   - Issues identified during critique (from Step 3 of test)
+   - Corrections requested by the user
+   - Patterns in user feedback (what did they consistently want changed?)
+
+3. **Tool Usage Problems**
+   - Tools that didn't work as expected
+   - Missing tools that would have helped
+   - Inefficient tool sequences
+
+4. **Missing or Unclear Instructions**
+   - Ambiguities that led to wrong outputs
+   - Missing guidance that caused confusion
+   - Quality criteria that weren't clear enough
+
+### Step 2: Plan Improvements
+
+For each issue identified, determine the appropriate fix:
+
+| Issue Type | Solution Location |
+|------------|-------------------|
+| Process inefficiency | Update step instructions with clearer guidance |
+| Output quality | Update quality criteria or add examples |
+| Missing information | Add to step inputs or instructions |
+| Tool problems | Suggest different tools in instructions |
+| Unclear criteria | Rewrite quality criteria to be specific |
+
+**Prioritize improvements** that will have the most impact on future runs. Focus on:
+- Issues that caused multiple iterations
+- Problems that affected the final output quality
+- Confusion that could be eliminated with clearer instructions
+
+### Step 3: Update Step Instructions
+
+For each step that needs improvement:
+
+1. **Read the current instruction file** at `.deepwork/jobs/[job_name]/steps/[step_id].md`
+
+2. **Make targeted improvements**:
+   - Add missing context or clarification
+   - Include examples of good output (use what worked in the test)
+   - Clarify ambiguous instructions
+   - Add tool recommendations if a different approach would be better
+   - Update quality criteria to match user expectations
+
+3. **Keep instructions concise**:
+   - Avoid redundancy
+   - Be direct and actionable
+   - Use bullet points where appropriate
+
+### Step 4: Update Quality Criteria
+
+Review and update quality criteria in two places:
+
+1. **In step instruction files** - The "Quality Criteria" section should reflect what the user actually cared about during testing
+
+2. **In job.yml** - If steps have `quality_criteria` or `stop_hooks`, update them to:
+   - Remove criteria that weren't relevant
+   - Add criteria based on user feedback
+   - Make existing criteria more specific
+
+**Example improvement:**
+```yaml
+# Before
+quality_criteria:
+  - "Report is formatted correctly"
+
+# After
+quality_criteria:
+  - "Report uses distinct colors for each data series in charts"
+  - "Tables have sufficient padding and font size for readability"
+  - "Executive summary is understandable by non-technical readers"
+```
+
+### Step 5: Consider Alternative Tools
+
+If any tools didn't work well during the test:
+
+1. **Identify the problem** - What went wrong? (slow, wrong output, hard to use)
+
+2. **Research alternatives** - What other tools could accomplish the same goal?
+
+3. **Update instructions** - If a better tool exists, update the step instructions to recommend it
+
+Examples:
+- If web scraping was unreliable, suggest a specific browser automation approach
+- If data processing was slow, suggest a different method or tool
+- If file generation had issues, recommend a different library or format
+
+### Step 6: Update Job Version and Changelog
+
+After making improvements:
+
+1. **Bump the version** in `job.yml`:
+   - Patch version (x.x.1) for minor instruction tweaks
+   - Minor version (x.1.0) for quality criteria changes or significant improvements
+
+2. **Add a changelog entry** describing what was improved:
+   ```yaml
+   changelog:
+     - version: "1.0.1"
+       changes: "Improved [step] instructions based on test run: added examples, clarified criteria, updated tool recommendations"
+   ```
+
+### Step 7: Sync Changes
+
+Run `deepwork sync` to apply the changes:
+
+```bash
+deepwork sync
+```
+
+### Step 8: Provide Recap
+
+Summarize the improvements made:
+
+1. **List the issues identified** from the test run
+
+2. **Describe the changes made** to address each issue
+
+3. **Explain the expected impact** on future runs
+
+Example recap format:
+```
+## Iteration Summary
+
+### Issues Identified
+1. Chart colors were hard to distinguish
+2. Tables were too small to read
+3. Executive summary was too technical
+
+### Changes Made
+
+**gather_data step:**
+- Added instruction to use distinct color palettes for charts
+- Updated quality criteria to require readable table formatting
+
+**generate_report step:**
+- Added example of good executive summary
+- Updated quality criteria: "Summary understandable by non-technical stakeholders"
+
+**job.yml:**
+- Updated version to 1.0.1
+- Added changelog entry
+
+### Expected Impact
+Future runs should produce reports with better visual design and clearer summaries, reducing the need for post-generation corrections.
+```
+
+## Quality Criteria
+
+- Conversation history from test step was analyzed for issues
+- Process inefficiencies were identified and addressed
+- Step instructions were updated to improve clarity
+- Quality criteria were updated to match user expectations
+- Alternative tools were considered where relevant
+- Job version and changelog were updated
+- `deepwork sync` was run to apply changes
+- A clear recap of improvements was provided
+- When all criteria are met, include `<promise>Quality Criteria Met</promise>` in your response
+
+## Example Dialog
+
+```
+Claude: Let me review our test run and improve the workflow based on what happened.
+
+**Analyzing Test Conversation...**
+
+I identified the following issues from the test run:
+
+### Issues Found
+
+1. **Chart readability (gather_data step)**: The instructions didn't specify how to handle color choices for visualizations. During the test, we had to fix colors that were too similar.
+
+2. **Missing month-over-month comparison (analyze_trends step)**: The user wanted comparisons to previous periods, but this wasn't in the instructions. We had to add it after the fact.
+
+3. **Technical executive summary (generate_report step)**: The summary was too technical for non-technical stakeholders. The quality criteria didn't specify the audience clearly.
+
+4. **Table formatting (generate_report step)**: Tables were hard to read. No guidance on sizing or padding.
+
+### Changes Made
+
+**`.deepwork/jobs/monthly_report/steps/gather_data.md`:**
+- Added: "Use distinct, high-contrast colors for chart data series. Avoid similar shades for different categories."
+- Added to Quality Criteria: "Charts use visually distinct colors that are accessible to colorblind readers"
+
+**`.deepwork/jobs/monthly_report/steps/analyze_trends.md`:**
+- Added: "Always include comparison to the same period in the previous month/quarter"
+- Added input: "previous_period_data" to formalize this requirement
+
+**`.deepwork/jobs/monthly_report/steps/generate_report.md`:**
+- Added example of good executive summary targeting non-technical readers
+- Added: "Format tables with sufficient padding (at least 8px) and readable font size (minimum 11pt)"
+- Updated Quality Criteria:
+  - "Executive summary is understandable by non-technical stakeholders"
+  - "Tables are readable with adequate padding and font size"
+
+**`.deepwork/jobs/monthly_report/job.yml`:**
+- Version bumped to 1.0.1
+- Added changelog: "Improved visual design guidance and audience-appropriate writing based on test feedback"
+
+Running `deepwork sync`...
+
+Done! The workflow has been updated.
+
+### Expected Impact
+
+Future monthly reports should:
+- Have better chart color choices from the start
+- Include month-over-month comparisons automatically
+- Have clearer executive summaries for leadership
+- Have more readable tables
+
+This should significantly reduce the number of corrections needed after the initial output.
+```
+
+## Important Guidelines
+
+1. **Be specific** - Don't just note "instructions were unclear" - explain exactly what was unclear and how you fixed it
+2. **Use test results as examples** - If something worked well in the test, add it as an example in the instructions
+3. **Don't over-engineer** - Only add improvements that address actual problems from the test
+4. **Maintain consistency** - Keep the same structure and style in instruction files
+5. **Test-driven improvement** - Every change should trace back to something that happened in the test
diff --git a/src/deepwork/standard_jobs/deepwork_jobs/steps/test.md b/src/deepwork/standard_jobs/deepwork_jobs/steps/test.md
new file mode 100644
index 00000000..fa36d265
--- /dev/null
+++ b/src/deepwork/standard_jobs/deepwork_jobs/steps/test.md
@@ -0,0 +1,171 @@
+# Test the New Workflow
+
+## Objective
+
+Run the newly created workflow on a real use case chosen by the user, critique the output, and iterate until the user is satisfied with the results. This step validates that the workflow works as intended before finalizing it.
+
+## Task
+
+Guide the user through testing their new workflow by running it on a real example, then critically evaluating the output and refining it based on user feedback.
+
+### Step 1: Announce Readiness and Gather Test Case
+
+The workflow is now implemented and ready to test. Use the AskUserQuestion tool to:
+
+1. **Inform the user** that the workflow is ready for a test run
+2. **Ask what they'd like to test it on** - Get a specific, real use case
+
+Example question to ask:
+```
+Your new workflow is ready to try out! What would you like to use it on for the first test run?
+
+Please describe a specific case you want to run through the workflow - ideally something you actually need done, so we can validate the workflow produces useful results.
+```
+
+**Important**: Get a concrete, specific test case. Vague responses like "just test it" should be followed up with clarifying questions to understand what inputs/context the workflow needs.
+
+### Step 2: Prepare and Run the Workflow
+
+1. **Compact the conversation history** - Before invoking the workflow, use the `/compact` command to summarize the conversation so far. This ensures the workflow starts with clean context focused on the test case.
+
+2. **Invoke the new workflow** - Run the first step of the newly created workflow using its slash command:
+   ```
+   /[job_name].[first_step_id]
+   ```
+
+3. **Complete the full workflow** - Continue through all steps of the workflow until it produces its final output.
+
+4. **Note any issues during execution** - Pay attention to:
+   - Confusion or ambiguity in instructions
+   - Missing information that had to be asked for
+   - Steps that took longer than expected
+   - Awkward tool usage or process flow
+
+### Step 3: Critique the Output
+
+After the workflow completes, perform a self-critique of the output:
+
+1. **Review the final deliverable** - Read through all outputs produced by the workflow
+
+2. **Identify up to 3 top issues** - Look for problems such as:
+   - Missing information or sections
+   - Formatting issues (layout, structure, readability)
+   - Quality problems (vague content, errors, inconsistencies)
+   - Misalignment with what the user likely wanted
+   - Technical issues (broken links, malformed data, etc.)
+
+3. **Present each issue to the user** with a specific question asking if they want it fixed. For example:
+   ```
+   I noticed a few things we could improve:
+
+   1. **Text overlap**: The PDF has some text overlapping images in a few places - shall I correct that?
+
+   2. **Missing summary**: The report doesn't have an executive summary at the top - would you like me to add one?
+
+   3. **Data formatting**: The numbers aren't consistently formatted (some have commas, some don't) - should I standardize them?
+   ```
+
+4. **Make corrections** for any issues the user confirms they want fixed
+
+### Step 4: Gather Additional Feedback
+
+After addressing the identified issues:
+
+1. **Ask if the user has more comments** on the output:
+   ```
+   I've made those corrections. Is there anything else you'd like me to change about the output?
+   ```
+
+2. **Iterate on feedback** - Keep making corrections based on user feedback until they indicate they're satisfied
+
+3. **Confirm completion** - When the user says the output is good, confirm that testing is complete
+
+### Loop Behavior
+
+The feedback loop should continue until the user explicitly indicates satisfaction. Look for signals like:
+- "Looks good"
+- "That's fine"
+- "I'm happy with it"
+- "No more changes"
+- "Perfect"
+
+If the user provides more feedback, address it and ask again. Don't assume satisfaction without explicit confirmation.
+
+## Quality Criteria
+
+- User was informed the workflow is ready and asked what to test it on
+- Conversation was compacted before running the workflow
+- The new workflow was actually invoked and run to completion
+- Output was critiqued and up to 3 top issues were identified
+- Each identified issue was presented to the user with a specific question
+- Confirmed corrections were applied
+- User was asked for additional feedback after corrections
+- Iteration continued until user confirmed satisfaction
+- When all criteria are met, include `<promise>Quality Criteria Met</promise>` in your response
+
+## Example Dialog
+
+```
+Claude: Your new workflow is ready to try out!
+
+The `monthly_report` workflow has been implemented with these steps:
+1. gather_data - Collects metrics from various sources
+2. analyze_trends - Identifies patterns and insights
+3. generate_report - Creates the final PDF report
+
+What would you like to use it on for the first test run? Please describe a specific report you need - for example, which month's data and any particular focus areas.
+
+User: Let's do the January 2026 engineering report, focusing on deployment frequency and incident response times.
+
+Claude: Great, let me compact the conversation and run the workflow on your January 2026 engineering report.
+
+/compact
+
+/monthly_report.gather_data
+
+[... workflow runs through all steps ...]
+
+Claude: The January 2026 Engineering Report has been generated. Let me review it for any issues.
+
+I noticed a few things we could improve:
+
+1. **Chart readability**: The deployment frequency chart uses very similar colors for different teams - shall I change to a more distinct color palette?
+
+2. **Missing context**: The incident response times don't include comparison to the previous month - would you like me to add month-over-month comparison?
+
+3. **Executive summary**: The summary is quite technical - should I add a simpler high-level overview for non-technical stakeholders?
+
+User: Yes to all three, please.
+
+Claude: Done! I've updated the report with:
+- Distinct color palette for the deployment chart
+- Month-over-month comparison for incident response times
+- Added a non-technical executive summary
+
+Is there anything else you'd like me to change about the report?
+
+User: Can you make the tables larger? They're a bit hard to read.
+
+Claude: I've increased the table sizes for better readability. The font is now larger and there's more padding around cells.
+
+Is there anything else you'd like me to adjust?
+
+User: No, that looks great!
+
+Claude: Excellent! The test run is complete. The workflow successfully generated your January 2026 Engineering Report with all the refinements applied.
+
+**Test Summary:**
+- Workflow ran successfully through all 3 steps
+- Output required adjustments to chart colors, comparisons, and table sizing
+- User is satisfied with the final result
+
+Ready to proceed to the iterate step to improve the workflow based on what we learned.
+```
+
+## Important Guidelines
+
+1. **Use real test cases** - Testing with actual data/needs validates the workflow better than hypothetical examples
+2. **Be specific in critiques** - Don't say "formatting issues" - say exactly what's wrong and where
+3. **Limit initial critique to 3 issues** - Too many issues at once is overwhelming; address more in the feedback loop
+4. **Don't assume what user wants fixed** - Always ask before making corrections
+5. **Iterate until satisfied** - Don't rush to completion; the user's satisfaction is the goal
diff --git a/src/deepwork/templates/claude/skill-deepwork.md.jinja b/src/deepwork/templates/claude/skill-deepwork.md.jinja
index 9d555058..8ef7089f 100644
--- a/src/deepwork/templates/claude/skill-deepwork.md.jinja
+++ b/src/deepwork/templates/claude/skill-deepwork.md.jinja
@@ -15,135 +15,22 @@ description: "Start or continue DeepWork workflows using MCP tools"
 
 Execute multi-step workflows with quality gate checkpoints.
 
-> **IMPORTANT**: This skill uses the DeepWork MCP server. All workflow operations
-> are performed through MCP tool calls, not by reading instructions from files.
+> **IMPORTANT**: Use the DeepWork MCP server tools. All workflow operations
+> are performed through MCP tool calls and following the instructions they return,
+> not by reading instructions from files.
 
-## Quick Start
+## How to Use
 
-1. **Discover workflows**: Call `get_workflows` to see available options
-2. **Start a workflow**: Call `start_workflow` with your goal
-3. **Execute steps**: Follow the instructions returned
-4. **Checkpoint**: Call `finished_step` with your outputs
-5. **Iterate or continue**: Handle `needs_work`, `next_step`, or `workflow_complete`
-
-## MCP Tools Reference
-
-### get_workflows
-
-Lists all available workflows in this project.
-
-```
-Tool: deepwork.get_workflows
-Parameters: none
-```
-
-Returns jobs with their workflows, steps, and summaries.
-
-### start_workflow
-
-Begins a new workflow session.
-
-```
-Tool: deepwork.start_workflow
-Parameters:
-  - goal: string (required) - What you want to accomplish
-  - job_name: string (required) - Name of the job
-  - workflow_name: string (required) - Name of the workflow
-  - instance_id: string (optional) - Identifier like "acme" or "q1-2026"
-```
-
-Returns session ID, branch name, and first step instructions.
-
-### finished_step
-
-Reports completion of the current step.
-
-```
-Tool: deepwork.finished_step
-Parameters:
-  - outputs: list[string] (required) - File paths of created outputs
-  - notes: string (optional) - Notes about what was done
-```
-
-Returns one of:
-- `needs_work`: Quality criteria not met; fix and retry
-- `next_step`: Proceed to next step with new instructions
-- `workflow_complete`: All steps done; workflow finished
-
-## Execution Flow
-
-```
-User: /deepwork [intent]
-     │
-     ▼
-┌─────────────────┐
-│ get_workflows   │ ◄── Discover available workflows
-└────────┬────────┘
-         │
-         ▼
-┌─────────────────┐
-│ Parse intent    │ ◄── Match user intent to workflow
-└────────┬────────┘
-         │
-         ▼
-┌─────────────────┐
-│ start_workflow  │ ◄── Begin session, get first step
-└────────┬────────┘
-         │
-         ▼
-┌─────────────────┐
-│ Execute step    │ ◄── Follow step instructions
-│ Create outputs  │
-└────────┬────────┘
-         │
-         ▼
-┌─────────────────┐
-│ finished_step   │ ◄── Report completion
-└────────┬────────┘
-         │
-    ┌────┴────┐
-    │         │
-needs_work  next_step ─────► Loop back to "Execute step"
-    │         │
-    │    workflow_complete
-    │         │
-    ▼         ▼
-┌─────────────────┐
-│ Fix issues and  │      Done!
-│ retry           │
-└─────────────────┘
-```
+1. Call `get_workflows` to discover available workflows
+2. Call `start_workflow` with goal, job_name, and workflow_name
+3. Follow the step instructions returned
+4. Call `finished_step` with your outputs when done
+5. Handle the response: `needs_work`, `next_step`, or `workflow_complete`
 
 ## Intent Parsing
 
 When the user invokes `/deepwork`, parse their intent:
 
-1. **Explicit workflow**: `/deepwork new_job` → start `new_job` workflow
-2. **General request**: `/deepwork I want to create a new workflow` → infer best match
-3. **No context**: `/deepwork` alone → call `get_workflows` and ask user to choose
-
-## Quality Gates
-
-Steps may have quality criteria. When you call `finished_step`:
-
-1. Outputs are evaluated against criteria
-2. If any fail → `needs_work` status with feedback
-3. Fix issues based on feedback
-4. Call `finished_step` again
-5. After passing → proceed to next step
-
-## Git Workflow
-
-DeepWork creates branches for workflow instances:
-- Format: `deepwork/{job_name}-{workflow_name}-{instance_id or date}`
-- Example: `deepwork/competitive_research-full_analysis-acme`
-
-Commit work as you go. Create PR when workflow completes.
-
-## Guardrails
-
-- Always use MCP tools; never manually read step instruction files
-- Create ALL expected outputs before calling `finished_step`
-- Read quality gate feedback carefully before retrying
-- Don't skip steps unless user explicitly requests it
-- Ask for clarification when user intent is ambiguous
+- **Explicit workflow**: `/deepwork new_job` → start the `new_job` workflow
+- **General request**: `/deepwork I want to create a new workflow` → infer best match from available workflows
+- **No context**: `/deepwork` alone → call `get_workflows` and ask user to choose
diff --git a/src/deepwork/templates/claude/skill-job-meta.md.jinja b/src/deepwork/templates/claude/skill-job-meta.md.jinja
deleted file mode 100644
index ea258a87..00000000
--- a/src/deepwork/templates/claude/skill-job-meta.md.jinja
+++ /dev/null
@@ -1,147 +0,0 @@
-{#
-Template: skill-job-meta.md.jinja
-Purpose: Generates the job overview skill file for Claude Code
-
-Template Variables:
-  - job_name: string - Job identifier (e.g., "competitive_research")
-  - job_summary: string - Short one-line summary of the job
-  - job_description: string|null - Full description (optional)
-  - total_steps: int - Number of steps in the job
-  - has_workflows: bool - True if workflows are defined
-  - workflows: list - Array of workflow objects:
-      - name: string - Workflow identifier
-      - summary: string - Short description of workflow
-      - steps: list[string] - Ordered list of step IDs
-      - first_step: string - First step ID to start workflow
-  - standalone_steps: list - Steps not in any workflow (same structure as steps)
-  - steps: list - Array of step objects:
-      - id: string - Step identifier
-      - name: string - Human-readable step name
-      - description: string - What the step does
-      - command_name: string - Slash command (e.g., "job_name.step_id")
-      - dependencies: list[string]|null - Required prior steps
-      - is_standalone: bool - True if not in any workflow
-      - workflow_name: string|null - Name of workflow if in one
-#}
----
-name: {{ job_name }}
-description: "{{ job_summary }}"
----
-
-# {{ job_name }}
-
-{{ job_summary }}
-
-> **CRITICAL**: Always invoke steps using the Skill tool. Never copy/paste step instructions directly.
-
-{% if job_description %}
-{{ job_description }}
-{% endif %}
-
-{% if has_workflows %}
-## Workflows
-
-{% for workflow in workflows %}
-### {{ workflow.name }}
-
-{{ workflow.summary }}
-
-**Steps in order**:
-{% for entry in workflow.step_entries %}
-{% if entry.is_concurrent %}
-{{ loop.index }}. **Concurrent Steps** - Execute the following tasks in parallel:
-{% for task in entry.concurrent_steps %}
-   - **Background Task {{ task.task_number }}**: {{ task.id }} - {{ task.description }}
-{% endfor %}
-{% else %}
-{% set step_id = entry.step_ids[0] %}
-{% set step = steps | selectattr("id", "equalto", step_id) | first %}
-{{ loop.index }}. **{{ step_id }}** - {{ step.description if step else "Unknown step" }}
-{% endif %}
-{% endfor %}
-
-**Start workflow**: `/{{ job_name }}.{{ workflow.first_step }}`
-
-{% endfor %}
-{% endif %}
-{% if standalone_steps %}
-## Standalone Skills
-
-These skills can be run independently at any time:
-
-{% for step in standalone_steps %}
-- **{{ step.id }}** - {{ step.description }}
-  Command: `/{{ step.command_name }}`
-{% endfor %}
-
-{% endif %}
-{% if not has_workflows and not standalone_steps %}
-## Available Steps
-
-{% for step in steps %}
-{{ loop.index }}. **{{ step.id }}** - {{ step.description }}{% if step.dependencies %} (requires: {{ step.dependencies | join(', ') }}){% endif %}
-
-{% endfor %}
-{% endif %}
-
-## Execution Instructions
-
-### Step 1: Analyze Intent
-
-Parse any text following `/{{ job_name }}` to determine user intent:
-{% if has_workflows %}
-{% for workflow in workflows %}
-- "{{ workflow.name }}" or related terms → start {{ workflow.name }} workflow at `{{ job_name }}.{{ workflow.first_step }}`
-{% endfor %}
-{% endif %}
-{% for step in standalone_steps %}
-- "{{ step.id }}" or related terms → run standalone skill `{{ step.command_name }}`
-{% endfor %}
-{% if not has_workflows and not standalone_steps %}
-{% for step in steps %}
-- "{{ step.id }}" or related terms → start at `{{ step.command_name }}`
-{% endfor %}
-{% endif %}
-
-### Step 2: Invoke Starting Step
-
-Use the Skill tool to invoke the identified starting step:
-{% if has_workflows and workflows %}
-```
-Skill tool: {{ job_name }}.{{ workflows[0].first_step }}
-```
-{% else %}
-```
-Skill tool: {{ steps[0].command_name }}
-```
-{% endif %}
-
-### Step 3: Continue Workflow Automatically
-
-After each step completes:
-1. Check if there's a next step in the workflow sequence
-2. Invoke the next step using the Skill tool
-3. Repeat until workflow is complete or user intervenes
-
-**Note**: Standalone skills do not auto-continue to other steps.
-
-### Handling Ambiguous Intent
-
-If user intent is unclear, use AskUserQuestion to clarify:
-{% if has_workflows %}
-- Present available workflows and standalone skills as options
-{% else %}
-- Present available steps as numbered options
-{% endif %}
-- Let user select the starting point
-
-## Guardrails
-
-- Do NOT copy/paste step instructions directly; always use the Skill tool to invoke steps
-- Do NOT skip steps in a workflow unless the user explicitly requests it
-- Do NOT proceed to the next step if the current step's outputs are incomplete
-- Do NOT make assumptions about user intent; ask for clarification when ambiguous
-
-## Context Files
-
-- Job definition: `.deepwork/jobs/{{ job_name }}/job.yml`
diff --git a/src/deepwork/templates/claude/skill-job-step.md.jinja b/src/deepwork/templates/claude/skill-job-step.md.jinja
deleted file mode 100644
index ffb8622c..00000000
--- a/src/deepwork/templates/claude/skill-job-step.md.jinja
+++ /dev/null
@@ -1,263 +0,0 @@
-{#
-Template: skill-job-step.md.jinja
-Purpose: Generates individual step skill files for Claude Code
-
-Template Variables:
-  Job Context:
-    - job_name: string - Job identifier
-    - job_summary: string - Short job summary
-    - job_description: string|null - Full job description
-
-  Step Metadata:
-    - step_id: string - Step identifier
-    - step_description: string - What this step does
-    - step_number: int - Position in steps array (1-indexed, for backward compat)
-    - total_steps: int - Total steps in job
-    - is_standalone: bool - True if step can run independently (not in any workflow)
-    - exposed: bool - True if user can invoke directly (default: true)
-    - dependencies: list[string]|null - Required prior step IDs
-    - next_step: string|null - Next step ID in workflow
-    - instructions_file: string - Path to step instructions file
-
-  Workflow Context (only if step is in a workflow):
-    - workflow_name: string - Name of the workflow this step belongs to
-    - workflow_summary: string - Summary of the workflow
-    - workflow_step_number: int - Position in workflow (1-indexed)
-    - workflow_total_steps: int - Total steps in this workflow
-    - workflow_next_step: string|null - Next step ID in workflow
-    - workflow_prev_step: string|null - Previous step ID in workflow
-
-  Step Content:
-    - instructions_content: string - Full instructions markdown
-    - user_inputs: list|null - User parameters to gather:
-        - name: string - Parameter name
-        - description: string - What to ask for
-    - file_inputs: list|null - Files from previous steps:
-        - file: string - File path
-        - from_step: string - Source step ID
-    - outputs: list[string]|null - Output file paths
-
-  Quality & Hooks:
-    - quality_criteria: list[string]|null - Criteria for completion
-    - stop_hooks: list|null - Stop hook configurations:
-        - type: "script"|"prompt"
-        - path: string (for script)
-        - content: string (for prompt)
-    - hooks: dict|null - All hooks by event name (Stop, PreToolUse, etc.)
-
-  Agent Delegation:
-    - agent: string|null - Agent type (e.g., "general-purpose"). When set, adds context: fork
-#}
----
-name: {{ job_name }}.{{ step_id }}
-description: "{{ step_description }}"
-{% if not exposed %}
-user-invocable: false
-{% endif %}{#- if not exposed #}
-{% if agent %}
-context: fork
-agent: {{ agent }}
-{% endif %}{#- if agent #}
-{#
-  NOTE: Prompt-based stop hooks do not currently work in Claude Code.
-  See: https://github.com/anthropics/claude-code/issues/20221
-  Only command/script hooks are generated here. Prompt hooks are filtered out.
-  Quality validation is handled via sub-agent review in the instructions section.
-#}
-{%- if hooks -%}
-{%- set has_command_hooks = namespace(value=false) -%}
-{%- for event_name, event_hooks in hooks.items() -%}
-{%- for hook in event_hooks -%}
-{%- if hook.type == "script" -%}
-{%- set has_command_hooks.value = true -%}
-{%- endif -%}{#- if hook.type == "script" #}
-{%- endfor -%}{#- for hook in event_hooks #}
-{%- endfor -%}{#- for event_name, event_hooks in hooks.items() #}
-{%- if has_command_hooks.value %}
-hooks:
-{% for event_name, event_hooks in hooks.items() %}
-{%- set script_hooks = event_hooks | selectattr("type", "equalto", "script") | list %}
-{%- if script_hooks -%}
-{#- For Stop events, generate both Stop and SubagentStop blocks #}
-{%- if event_name == "Stop" %}
-{%- for stop_event in ["Stop", "SubagentStop"] %}
-  {{ stop_event }}:
-    - hooks:
-{% for hook in script_hooks %}
-        - type: command
-          command: ".deepwork/jobs/{{ job_name }}/{{ hook.path }}"
-{% endfor %}{#- for hook in script_hooks #}
-{% endfor %}{#- for stop_event in ["Stop", "SubagentStop"] #}
-{%- elif event_name != "SubagentStop" or "Stop" not in hooks %}
-  {{ event_name }}:
-    - hooks:
-{% for hook in script_hooks %}
-        - type: command
-          command: ".deepwork/jobs/{{ job_name }}/{{ hook.path }}"
-{% endfor %}{#- for hook in script_hooks #}
-{% endif %}{#- if event_name == "Stop" #}
-{%- endif %}{#- if script_hooks #}
-{%- endfor %}{#- for event_name, event_hooks in hooks.items() #}
-{%- endif %}{#- if has_command_hooks.value #}
-{%- endif %}{#- if hooks #}
-
----
-
-# {{ job_name }}.{{ step_id }}
-
-{% if is_standalone %}
-**Standalone skill** - can be run anytime
-{% elif workflow_name %}
-**Step {{ workflow_step_number }}/{{ workflow_total_steps }}** in **{{ workflow_name }}** workflow
-
-> {{ workflow_summary }}
-{% else %}
-**Step {{ step_number }}/{{ total_steps }}** in **{{ job_name }}** workflow
-{% endif %}{#- if is_standalone #}
-
-> {{ job_summary }}
-
-{% if dependencies %}
-## Prerequisites (Verify First)
-
-Before proceeding, confirm these steps are complete:
-{% for dep in dependencies %}
-- `/{{ job_name }}.{{ dep }}`
-{% endfor %}{#- for dep in dependencies #}
-{% endif %}{#- if dependencies #}
-
-## Instructions
-
-**Goal**: {{ step_description }}
-
-{{ instructions_content }}
-
-{% if job_description %}
-### Job Context
-
-{{ job_description }}
-{% endif %}{#- if job_description #}
-
-{% if user_inputs or file_inputs %}
-## Required Inputs
-
-{% if user_inputs %}
-**User Parameters** - Gather from user before starting:
-{% for input in user_inputs %}
-- **{{ input.name }}**: {{ input.description }}
-{% endfor %}{#- for input in user_inputs #}
-{% endif %}{#- if user_inputs #}
-
-{% if file_inputs %}
-**Files from Previous Steps** - Read these first:
-{% for input in file_inputs %}
-- `{{ input.file }}` (from `{{ input.from_step }}`)
-{% endfor %}{#- for input in file_inputs #}
-{% endif %}{#- if file_inputs #}
-{% endif %}{#- if user_inputs or file_inputs #}
-
-## Work Branch
-
-Use branch format: `deepwork/{{ job_name }}-[instance]-YYYYMMDD`
-
-- If on a matching work branch: continue using it
-- If on main/master: create new branch with `git checkout -b deepwork/{{ job_name }}-[instance]-$(date +%Y%m%d)`
-
-## Outputs
-
-{% if outputs %}
-**Required outputs**:
-{% for output in outputs %}
-- `{{ output.file }}`{% if output.file.endswith('/') %} (directory){% endif %}
-
-{% if output.has_doc_spec and output.doc_spec %}
-  **Doc Spec**: {{ output.doc_spec.name }}
-  > {{ output.doc_spec.description }}
-  **Definition**: `{{ output.doc_spec.path }}`
-{% if output.doc_spec.target_audience %}
-  **Target Audience**: {{ output.doc_spec.target_audience }}
-{% endif %}{#- if output.doc_spec.target_audience #}
-{% if output.doc_spec.quality_criteria %}
-  **Quality Criteria**:
-{% for criterion in output.doc_spec.quality_criteria %}
-  {{ loop.index }}. **{{ criterion.name }}**: {{ criterion.description }}
-{% endfor %}{#- for criterion in output.doc_spec.quality_criteria #}
-{% endif %}{#- if output.doc_spec.quality_criteria #}
-{% if output.doc_spec.example_document %}
-
-  <details>
-  <summary>Example Document Structure</summary>
-
-  ```markdown
-  {{ output.doc_spec.example_document | indent(2) }}
-  ```
-
-  </details>
-{% endif %}{#- if output.doc_spec.example_document #}
-{% endif %}{#- if output.has_doc_spec and output.doc_spec #}
-{% endfor %}{#- for output in outputs #}
-{% else %}
-No specific file outputs required.
-{% endif %}{#- if outputs #}
-
-## Guardrails
-
-- Do NOT skip prerequisite verification if this step has dependencies
-- Do NOT produce partial outputs; complete all required outputs before finishing
-- Do NOT proceed without required inputs; ask the user if any are missing
-- Do NOT modify files outside the scope of this step's defined outputs
-
-{% if quality_criteria %}
-## Quality Validation
-
-**Before completing this step, you MUST have your work reviewed against the quality criteria below.**
-
-Use a sub-agent (Haiku model) to review your work against these criteria:
-
-**Criteria (all must be satisfied)**:
-{% for criterion in quality_criteria -%}
-{{ loop.index }}. {{ criterion }}
-{% endfor %}{#- for criterion in quality_criteria #}
-**Review Process**:
-1. Once you believe your work is complete, spawn a sub-agent using Haiku to review your work against the quality criteria above
-2. The sub-agent should examine your outputs and verify each criterion is met
-3. If the sub-agent identifies valid issues, fix them
-4. Have the sub-agent review again until all valid feedback has been addressed
-5. Only mark the step complete when the sub-agent confirms all criteria are satisfied
-
-{% endif %}{#- if quality_criteria #}
-{% if stop_hooks -%}
-{% for hook in stop_hooks -%}
-{% if hook.type == "script" -%}
-**Validation script**: `.deepwork/jobs/{{ job_name }}/{{ hook.path }}` (runs automatically)
-{% endif -%}{#- if hook.type == "script" #}
-{% endfor %}{#- for hook in stop_hooks #}
-{% endif %}{#- if stop_hooks #}
-## On Completion
-
-{% if is_standalone %}
-1. Verify outputs are created
-2. Inform user: "{{ step_id }} complete{% if outputs %}, outputs: {{ outputs | map(attribute='file') | join(', ') }}{% endif %}"
-
-This standalone skill can be re-run anytime.
-{% elif workflow_name %}
-1. Verify outputs are created
-2. Inform user: "{{ workflow_name }} step {{ workflow_step_number }}/{{ workflow_total_steps }} complete{% if outputs %}, outputs: {{ outputs | map(attribute='file') | join(', ') }}{% endif %}"
-{% if next_step %}
-3. **Continue workflow**: Use Skill tool to invoke `/{{ job_name }}.{{ next_step }}`
-{% else %}
-3. **{{ workflow_name }} workflow complete**: All steps finished. Consider creating a PR to merge the work branch.
-{% endif %}{#- if next_step #}
-{% else %}
-1. Verify outputs are created
-2. Inform user: "Step {{ step_number }}/{{ total_steps }} complete{% if outputs %}, outputs: {{ outputs | map(attribute='file') | join(', ') }}{% endif %}"
-{% if next_step %}
-3. **Continue workflow**: Use Skill tool to invoke `/{{ job_name }}.{{ next_step }}`
-{% else %}
-3. **Workflow complete**: All steps finished. Consider creating a PR to merge the work branch.
-{% endif %}{#- if next_step #}
-{% endif %}{#- if is_standalone #}
-
----
-
-**Reference files**: `.deepwork/jobs/{{ job_name }}/job.yml`, `.deepwork/jobs/{{ job_name }}/{{ instructions_file }}`
diff --git a/src/deepwork/templates/gemini/skill-deepwork.md.jinja b/src/deepwork/templates/gemini/skill-deepwork.md.jinja
new file mode 100644
index 00000000..8eadec60
--- /dev/null
+++ b/src/deepwork/templates/gemini/skill-deepwork.md.jinja
@@ -0,0 +1,36 @@
+{#
+Template: skill-deepwork.md.jinja
+Purpose: Generates the main /deepwork skill that instructs agents to use MCP tools
+
+This template is used to create the entry-point skill for DeepWork.
+Instead of containing step instructions, it directs agents to use the
+DeepWork MCP server tools.
+#}
++++
+name = "deepwork"
+description = "Start or continue DeepWork workflows using MCP tools"
++++
+
+# DeepWork Workflow Manager
+
+Execute multi-step workflows with quality gate checkpoints.
+
+> **IMPORTANT**: Use the DeepWork MCP server tools. All workflow operations
+> are performed through MCP tool calls and following the instructions they return,
+> not by reading instructions from files.
+
+## How to Use
+
+1. Call `get_workflows` to discover available workflows
+2. Call `start_workflow` with goal, job_name, and workflow_name
+3. Follow the step instructions returned
+4. Call `finished_step` with your outputs when done
+5. Handle the response: `needs_work`, `next_step`, or `workflow_complete`
+
+## Intent Parsing
+
+When the user invokes `/deepwork`, parse their intent:
+
+- **Explicit workflow**: `/deepwork new_job` → start the `new_job` workflow
+- **General request**: `/deepwork I want to create a new workflow` → infer best match from available workflows
+- **No context**: `/deepwork` alone → call `get_workflows` and ask user to choose
diff --git a/src/deepwork/templates/gemini/skill-job-meta.toml.jinja b/src/deepwork/templates/gemini/skill-job-meta.toml.jinja
deleted file mode 100644
index 158790d7..00000000
--- a/src/deepwork/templates/gemini/skill-job-meta.toml.jinja
+++ /dev/null
@@ -1,76 +0,0 @@
-{#
-Template: skill-job-meta.toml.jinja
-Purpose: Generates the job overview skill file for Gemini CLI
-
-Template Variables:
-  - job_name: string - Job identifier (e.g., "competitive_research")
-  - job_summary: string - Short one-line summary of the job
-  - job_description: string|null - Full description (optional)
-  - total_steps: int - Number of steps in the job
-  - steps: list - Array of step objects:
-      - id: string - Step identifier
-      - name: string - Human-readable step name
-      - description: string - What the step does
-      - command_name: string - Slash command (e.g., "job_name:step_id")
-      - dependencies: list[string]|null - Required prior steps
-
-Note: Gemini uses TOML format with description + prompt fields.
-      Commands use colon separator (/job_name:step_id) not period.
-#}
-# {{ job_name }}
-#
-# {{ job_summary }}
-#
-# Generated by DeepWork - do not edit manually
-
-description = "{{ job_summary | replace('"', '\\"') }}"
-
-prompt = """
-# {{ job_name }}
-
-**Multi-step workflow**: {{ job_summary }}
-
-> **NOTE**: Gemini CLI requires manual command invocation. After each step, tell the user which command to run next.
-
-{% if job_description %}
-{{ job_description }}
-{% endif %}
-
-## Available Steps
-
-{% for step in steps %}
-{{ loop.index }}. **{{ step.id }}** - {{ step.description }}{% if step.dependencies %} (requires: {{ step.dependencies | join(', ') }}){% endif %}
-
-   Command: `/{{ step.command_name }}`
-{% endfor %}
-
-## Execution Instructions
-
-### Step 1: Analyze Intent
-
-Parse any text following `/{{ job_name }}` to determine user intent:
-{% for step in steps %}
-- "{{ step.id }}" or related terms → start at `/{{ step.command_name }}`
-{% endfor %}
-
-### Step 2: Direct User to Starting Step
-
-Tell the user which command to run:
-```
-/{{ steps[0].command_name }}
-```
-
-### Step 3: Guide Through Workflow
-
-After each step completes, tell the user the next command to run until workflow is complete.
-
-### Handling Ambiguous Intent
-
-If user intent is unclear:
-- Present available steps as numbered options
-- Ask user to select the starting point
-
-## Reference
-
-- Job definition: `.deepwork/jobs/{{ job_name }}/job.yml`
-"""
diff --git a/src/deepwork/templates/gemini/skill-job-step.toml.jinja b/src/deepwork/templates/gemini/skill-job-step.toml.jinja
deleted file mode 100644
index 946bec5c..00000000
--- a/src/deepwork/templates/gemini/skill-job-step.toml.jinja
+++ /dev/null
@@ -1,162 +0,0 @@
-{#
-Template: skill-job-step.toml.jinja
-Purpose: Generates individual step skill files for Gemini CLI
-
-Template Variables:
-  Job Context:
-    - job_name: string - Job identifier
-    - job_summary: string - Short job summary
-    - job_description: string|null - Full job description
-
-  Step Metadata:
-    - step_id: string - Step identifier
-    - step_description: string - What this step does
-    - step_number: int - Position in workflow (1-indexed)
-    - total_steps: int - Total steps in job
-    - is_standalone: bool - True if step can run independently
-    - dependencies: list[string]|null - Required prior step IDs
-    - next_step: string|null - Next step ID in workflow
-    - instructions_file: string - Path to step instructions file
-
-  Step Content:
-    - instructions_content: string - Full instructions markdown
-    - user_inputs: list|null - User parameters to gather:
-        - name: string - Parameter name
-        - description: string - What to ask for
-    - file_inputs: list|null - Files from previous steps:
-        - file: string - File path
-        - from_step: string - Source step ID
-    - outputs: list[string]|null - Output file paths
-
-  Quality:
-    - quality_criteria: list[string]|null - Criteria for completion
-    - stop_hooks: list|null - Stop hook configurations (note: Gemini
-      does not support automated hooks, so these are for manual verification)
-
-Note: Gemini uses TOML format with description + prompt fields.
-      Commands use colon separator (/job_name:step_id) not period.
-#}
-# {{ job_name }}:{{ step_id }}
-#
-# {{ step_description }}
-#
-# Generated by DeepWork - do not edit manually
-
-description = "{{ step_description | replace('"', '\\"') }}"
-
-prompt = """
-# {{ job_name }}:{{ step_id }}
-
-{% if is_standalone %}
-**Standalone command** - can be run anytime
-{% else %}
-**Step {{ step_number }}/{{ total_steps }}** in **{{ job_name }}** workflow
-{% endif %}
-
-> {{ job_summary }}
-
-{% if dependencies %}
-## Prerequisites (Verify First)
-
-Before proceeding, confirm these steps are complete:
-{% for dep in dependencies %}
-- `/{{ job_name }}:{{ dep }}`
-{% endfor %}
-{% endif %}
-
-## Instructions
-
-**Goal**: {{ step_description }}
-
-{{ instructions_content }}
-
-{% if job_description %}
-### Job Context
-
-{{ job_description }}
-{% endif %}
-
-{% if user_inputs or file_inputs %}
-## Required Inputs
-
-{% if user_inputs %}
-**User Parameters** - Gather from user before starting:
-{% for input in user_inputs %}
-- **{{ input.name }}**: {{ input.description }}
-{% endfor %}
-{% endif %}
-
-{% if file_inputs %}
-**Files from Previous Steps** - Read these first:
-{% for input in file_inputs %}
-- `{{ input.file }}` (from `{{ input.from_step }}`)
-{% endfor %}
-{% endif %}
-{% endif %}
-
-## Work Branch
-
-Use branch format: `deepwork/{{ job_name }}-[instance]-YYYYMMDD`
-
-- If on a matching work branch: continue using it
-- If on main/master: create new branch with `git checkout -b deepwork/{{ job_name }}-[instance]-$(date +%Y%m%d)`
-
-## Outputs
-
-{% if outputs %}
-**Required outputs**:
-{% for output in outputs %}
-- `{{ output.file }}`{% if output.file.endswith('/') %} (directory){% endif %}
-
-{% if output.has_doc_spec and output.doc_spec %}
-  **Doc Spec**: {{ output.doc_spec.name }}
-  > {{ output.doc_spec.description }}
-  **Definition**: `{{ output.doc_spec.path }}`
-{% if output.doc_spec.target_audience %}
-  **Target Audience**: {{ output.doc_spec.target_audience }}
-{% endif %}
-{% if output.doc_spec.quality_criteria %}
-  **Quality Criteria**:
-{% for criterion in output.doc_spec.quality_criteria %}
-  {{ loop.index }}. **{{ criterion.name }}**: {{ criterion.description }}
-{% endfor %}
-{% endif %}
-{% endif %}
-{% endfor %}
-{% else %}
-No specific file outputs required.
-{% endif %}
-
-{% if quality_criteria or stop_hooks %}
-## Quality Validation (Manual)
-
-**NOTE**: Gemini CLI does not support automated validation. Manually verify criteria before completing.
-
-{% if quality_criteria %}
-**Criteria (all must be satisfied)**:
-{% for criterion in quality_criteria %}
-{{ loop.index }}. {{ criterion }}
-{% endfor %}
-{% endif %}
-{% endif %}
-## On Completion
-
-{% if is_standalone %}
-1. Verify outputs are created
-2. Inform user: "{{ step_id }} complete{% if outputs %}, outputs: {{ outputs | map(attribute='file') | join(', ') }}{% endif %}"
-
-This standalone command can be re-run anytime.
-{% else %}
-1. Verify outputs are created
-2. Inform user: "Step {{ step_number }}/{{ total_steps }} complete{% if outputs %}, outputs: {{ outputs | map(attribute='file') | join(', ') }}{% endif %}"
-{% if next_step %}
-3. **Tell user next command**: `/{{ job_name }}:{{ next_step }}`
-{% else %}
-3. **Workflow complete**: All steps finished. Consider creating a PR to merge the work branch.
-{% endif %}
-{% endif %}
-
----
-
-**Reference files**: `.deepwork/jobs/{{ job_name }}/job.yml`, `.deepwork/jobs/{{ job_name }}/{{ instructions_file }}`
-"""
diff --git a/tests/unit/mcp/test_schemas.py b/tests/unit/mcp/test_schemas.py
index 5dafe77e..a900ea0d 100644
--- a/tests/unit/mcp/test_schemas.py
+++ b/tests/unit/mcp/test_schemas.py
@@ -106,7 +106,6 @@ def test_basic_job(self) -> None:
         assert job.summary == "A test job"
         assert job.description is None
         assert job.workflows == []
-        assert job.standalone_steps == []
 
 
 class TestStartWorkflowInput:

From f3af9d625f42023998601b93b5bfa1b9efa208b8 Mon Sep 17 00:00:00 2001
From: Noah Horton <noah@unsupervised.com>
Date: Wed, 4 Feb 2026 13:58:54 -0700
Subject: [PATCH 12/45] cleaned up

---
 .claude/skills/deepwork/SKILL.md                      |  9 +++++----
 .gemini/skills/deepwork/index.toml                    |  9 +++++----
 src/deepwork/hooks/check_version.sh                   | 10 +++++-----
 src/deepwork/templates/claude/skill-deepwork.md.jinja |  9 +++++----
 src/deepwork/templates/gemini/skill-deepwork.md.jinja |  9 +++++----
 5 files changed, 25 insertions(+), 21 deletions(-)

diff --git a/.claude/skills/deepwork/SKILL.md b/.claude/skills/deepwork/SKILL.md
index a8f84aa6..ee3021d2 100644
--- a/.claude/skills/deepwork/SKILL.md
+++ b/.claude/skills/deepwork/SKILL.md
@@ -22,7 +22,8 @@ Execute multi-step workflows with quality gate checkpoints.
 ## Intent Parsing
 
 When the user invokes `/deepwork`, parse their intent:
-
-- **Explicit workflow**: `/deepwork new_job` → start the `new_job` workflow
-- **General request**: `/deepwork I want to create a new workflow` → infer best match from available workflows
-- **No context**: `/deepwork` alone → call `get_workflows` and ask user to choose
\ No newline at end of file
+1. **ALWAYS**: Call `get_workflows` to discover available workflows
+2. Based on the available flows and what the user said in their request, proceed:
+    - **Explicit workflow**: `/deepwork <a workflow name>` → start the `<a workflow name>` workflow
+    - **General request**: `/deepwork <a request>` → infer best match from available workflows
+    - **No context**: `/deepwork` alone → ask user to choose from available workflows
\ No newline at end of file
diff --git a/.gemini/skills/deepwork/index.toml b/.gemini/skills/deepwork/index.toml
index 20f13d66..8263f4e0 100644
--- a/.gemini/skills/deepwork/index.toml
+++ b/.gemini/skills/deepwork/index.toml
@@ -22,7 +22,8 @@ Execute multi-step workflows with quality gate checkpoints.
 ## Intent Parsing
 
 When the user invokes `/deepwork`, parse their intent:
-
-- **Explicit workflow**: `/deepwork new_job` → start the `new_job` workflow
-- **General request**: `/deepwork I want to create a new workflow` → infer best match from available workflows
-- **No context**: `/deepwork` alone → call `get_workflows` and ask user to choose
\ No newline at end of file
+1. **ALWAYS**: Call `get_workflows` to discover available workflows
+2. Based on the available flows and what the user said in their request, proceed:
+    - **Explicit workflow**: `/deepwork <a workflow name>` → start the `<a workflow name>` workflow
+    - **General request**: `/deepwork <a request>` → infer best match from available workflows
+    - **No context**: `/deepwork` alone → ask user to choose from available workflows
\ No newline at end of file
diff --git a/src/deepwork/hooks/check_version.sh b/src/deepwork/hooks/check_version.sh
index c02b052e..21caabc1 100755
--- a/src/deepwork/hooks/check_version.sh
+++ b/src/deepwork/hooks/check_version.sh
@@ -60,10 +60,8 @@ fi
 # nothing else will work.
 
 check_deepwork_installed() {
-    # Run 'deepwork rules clear_queue' instead of just '--version' for double utility:
-    # 1. Verifies that the 'deepwork' command is installed and directly invokable
-    # 2. Clears any stale rules from the queue, ensuring a clean slate for the session
-    if ! deepwork rules clear_queue >/dev/null 2>&1; then
+    # Run 'deepwork --version' to verify the command is installed and directly invokable
+    if ! deepwork --version >/dev/null 2>&1; then
         return 1
     fi
     return 0
@@ -79,11 +77,13 @@ print_deepwork_error() {
   ERROR: The 'deepwork' command is not available or cannot be directly invoked.
 
   DeepWork must be installed such that running 'deepwork' directly works.
-  For example, running 'deepwork rules clear_queue' should succeed.
+  For example, running 'deepwork --version' should succeed.
 
   IMPORTANT: Do NOT use 'uv run deepwork' or similar wrappers.
   The command must be directly invokable as just 'deepwork'.
 
+  To verify: 'deepwork --version' should succeed.
+
   ------------------------------------------------------------------------
   |                                                                      |
   |   Please fix your deepwork installation before proceeding.           |
diff --git a/src/deepwork/templates/claude/skill-deepwork.md.jinja b/src/deepwork/templates/claude/skill-deepwork.md.jinja
index 8ef7089f..e9922706 100644
--- a/src/deepwork/templates/claude/skill-deepwork.md.jinja
+++ b/src/deepwork/templates/claude/skill-deepwork.md.jinja
@@ -30,7 +30,8 @@ Execute multi-step workflows with quality gate checkpoints.
 ## Intent Parsing
 
 When the user invokes `/deepwork`, parse their intent:
-
-- **Explicit workflow**: `/deepwork new_job` → start the `new_job` workflow
-- **General request**: `/deepwork I want to create a new workflow` → infer best match from available workflows
-- **No context**: `/deepwork` alone → call `get_workflows` and ask user to choose
+1. **ALWAYS**: Call `get_workflows` to discover available workflows
+2. Based on the available flows and what the user said in their request, proceed:
+    - **Explicit workflow**: `/deepwork <a workflow name>` → start the `<a workflow name>` workflow
+    - **General request**: `/deepwork <a request>` → infer best match from available workflows
+    - **No context**: `/deepwork` alone → ask user to choose from available workflows
diff --git a/src/deepwork/templates/gemini/skill-deepwork.md.jinja b/src/deepwork/templates/gemini/skill-deepwork.md.jinja
index 8eadec60..0a035892 100644
--- a/src/deepwork/templates/gemini/skill-deepwork.md.jinja
+++ b/src/deepwork/templates/gemini/skill-deepwork.md.jinja
@@ -30,7 +30,8 @@ Execute multi-step workflows with quality gate checkpoints.
 ## Intent Parsing
 
 When the user invokes `/deepwork`, parse their intent:
-
-- **Explicit workflow**: `/deepwork new_job` → start the `new_job` workflow
-- **General request**: `/deepwork I want to create a new workflow` → infer best match from available workflows
-- **No context**: `/deepwork` alone → call `get_workflows` and ask user to choose
+1. **ALWAYS**: Call `get_workflows` to discover available workflows
+2. Based on the available flows and what the user said in their request, proceed:
+    - **Explicit workflow**: `/deepwork <a workflow name>` → start the `<a workflow name>` workflow
+    - **General request**: `/deepwork <a request>` → infer best match from available workflows
+    - **No context**: `/deepwork` alone → ask user to choose from available workflows

From c5c9f97a9cfb301733460b59ee74390a876966ad Mon Sep 17 00:00:00 2001
From: Noah Horton <noah@unsupervised.com>
Date: Wed, 4 Feb 2026 14:08:29 -0700
Subject: [PATCH 13/45] Version bump

---
 flake.lock               | 6 +++---
 pyproject.toml           | 2 +-
 src/deepwork/__init__.py | 2 +-
 uv.lock                  | 2 +-
 4 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/flake.lock b/flake.lock
index ce228ff8..55ce8f32 100644
--- a/flake.lock
+++ b/flake.lock
@@ -20,11 +20,11 @@
     },
     "nixpkgs": {
       "locked": {
-        "lastModified": 1769018530,
-        "narHash": "sha256-MJ27Cy2NtBEV5tsK+YraYr2g851f3Fl1LpNHDzDX15c=",
+        "lastModified": 1770181073,
+        "narHash": "sha256-ksTL7P9QC1WfZasNlaAdLOzqD8x5EPyods69YBqxSfk=",
         "owner": "NixOS",
         "repo": "nixpkgs",
-        "rev": "88d3861acdd3d2f0e361767018218e51810df8a1",
+        "rev": "bf922a59c5c9998a6584645f7d0de689512e444c",
         "type": "github"
       },
       "original": {
diff --git a/pyproject.toml b/pyproject.toml
index 9e88c3a7..c94e2c6d 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "deepwork"
-version = "0.5.1"
+version = "0.7.0"
 description = "Framework for enabling AI agents to perform complex, multi-step work tasks"
 readme = "README.md"
 requires-python = ">=3.11"
diff --git a/src/deepwork/__init__.py b/src/deepwork/__init__.py
index 38f6acdf..0c85557f 100644
--- a/src/deepwork/__init__.py
+++ b/src/deepwork/__init__.py
@@ -1,6 +1,6 @@
 """DeepWork - Framework for enabling AI agents to perform complex, multi-step work tasks."""
 
-__version__ = "0.1.0"
+__version__ = "0.7.0"
 __author__ = "DeepWork Contributors"
 
 __all__ = [
diff --git a/uv.lock b/uv.lock
index 1cb49f69..49d37635 100644
--- a/uv.lock
+++ b/uv.lock
@@ -453,7 +453,7 @@ wheels = [
 
 [[package]]
 name = "deepwork"
-version = "0.5.1"
+version = "0.7.0"
 source = { editable = "." }
 dependencies = [
     { name = "aiofiles" },

From 88477a447e53b3647b3fa126a46ab90c5c499c62 Mon Sep 17 00:00:00 2001
From: Noah Horton <noah@unsupervised.com>
Date: Wed, 4 Feb 2026 14:24:58 -0700
Subject: [PATCH 14/45] Fix ruff lint errors and apply formatting

- Add `from None` to raise in except clause (B904)
- Remove unused variables in tests (F841)
- Rename unused loop variable to underscore prefix (B007)
- Apply ruff formatting to 14 files

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
---
 src/deepwork/cli/install.py                   |  4 +-
 src/deepwork/cli/serve.py                     |  5 +-
 src/deepwork/core/parser.py                   |  4 +-
 src/deepwork/mcp/quality_gate.py              | 19 +++---
 src/deepwork/mcp/schemas.py                   | 32 +++-------
 src/deepwork/mcp/server.py                    | 28 ++++----
 src/deepwork/mcp/state.py                     | 16 ++---
 src/deepwork/mcp/tools.py                     |  8 +--
 tests/e2e/test_claude_code_integration.py     |  3 +-
 tests/fixtures/mock_review_agent.py           | 64 +++++++++++--------
 tests/integration/test_install_flow.py        |  1 +
 .../test_quality_gate_integration.py          | 43 +++++--------
 tests/unit/mcp/test_async_interface.py        | 10 +--
 tests/unit/mcp/test_schemas.py                |  1 -
 tests/unit/mcp/test_tools.py                  |  4 +-
 15 files changed, 107 insertions(+), 135 deletions(-)

diff --git a/src/deepwork/cli/install.py b/src/deepwork/cli/install.py
index 6f9daaee..c7f90732 100644
--- a/src/deepwork/cli/install.py
+++ b/src/deepwork/cli/install.py
@@ -351,7 +351,9 @@ def _install_deepwork(platform_name: str | None, project_path: Path) -> None:
         if adapter.register_mcp_server(project_path):
             console.print(f"  [green]✓[/green] Registered MCP server for {adapter.display_name}")
         else:
-            console.print(f"  [dim]•[/dim] MCP server already registered for {adapter.display_name}")
+            console.print(
+                f"  [dim]•[/dim] MCP server already registered for {adapter.display_name}"
+            )
 
     # Step 6: Run sync to generate skills
     console.print()
diff --git a/src/deepwork/cli/serve.py b/src/deepwork/cli/serve.py
index 5e3dae3c..e591b414 100644
--- a/src/deepwork/cli/serve.py
+++ b/src/deepwork/cli/serve.py
@@ -30,10 +30,7 @@ def _load_config(project_path: Path) -> dict:
     """
     config_file = project_path / ".deepwork" / "config.yml"
     if not config_file.exists():
-        raise ServeError(
-            f"DeepWork not installed in {project_path}. "
-            "Run 'deepwork install' first."
-        )
+        raise ServeError(f"DeepWork not installed in {project_path}. Run 'deepwork install' first.")
 
     config = load_yaml(config_file)
     if config is None:
diff --git a/src/deepwork/core/parser.py b/src/deepwork/core/parser.py
index 2685994c..354b4563 100644
--- a/src/deepwork/core/parser.py
+++ b/src/deepwork/core/parser.py
@@ -559,9 +559,7 @@ def warn_orphaned_steps(self) -> list[str]:
             workflow_step_ids.update(workflow.steps)
 
         # Find orphaned steps
-        orphaned_steps = [
-            step.id for step in self.steps if step.id not in workflow_step_ids
-        ]
+        orphaned_steps = [step.id for step in self.steps if step.id not in workflow_step_ids]
 
         if orphaned_steps:
             logger.warning(
diff --git a/src/deepwork/mcp/quality_gate.py b/src/deepwork/mcp/quality_gate.py
index 3eab3ebc..15eae776 100644
--- a/src/deepwork/mcp/quality_gate.py
+++ b/src/deepwork/mcp/quality_gate.py
@@ -17,7 +17,6 @@
 
 from deepwork.mcp.schemas import QualityCriteriaResult, QualityGateResult
 
-
 # JSON Schema for quality gate response validation
 QUALITY_GATE_RESPONSE_SCHEMA: dict[str, Any] = {
     "type": "object",
@@ -264,12 +263,12 @@ async def evaluate(
                     process.communicate(input=payload.encode()),
                     timeout=self.timeout,
                 )
-            except asyncio.TimeoutError:
+            except TimeoutError:
                 process.kill()
                 await process.wait()
                 raise QualityGateError(
                     f"Review agent timed out after {self.timeout} seconds"
-                )
+                ) from None
 
             if process.returncode != 0:
                 raise QualityGateError(
@@ -280,9 +279,7 @@ async def evaluate(
             return self._parse_response(stdout.decode())
 
         except FileNotFoundError as e:
-            raise QualityGateError(
-                f"Review agent command not found: {base_cmd[0]}"
-            ) from e
+            raise QualityGateError(f"Review agent command not found: {base_cmd[0]}") from e
 
 
 class MockQualityGate(QualityGate):
@@ -310,10 +307,12 @@ async def evaluate(
         project_root: Path,
     ) -> QualityGateResult:
         """Mock evaluation - records call and returns configured result."""
-        self.evaluations.append({
-            "quality_criteria": quality_criteria,
-            "outputs": outputs,
-        })
+        self.evaluations.append(
+            {
+                "quality_criteria": quality_criteria,
+                "outputs": outputs,
+            }
+        )
 
         criteria_results = [
             QualityCriteriaResult(
diff --git a/src/deepwork/mcp/schemas.py b/src/deepwork/mcp/schemas.py
index 74f6eccf..4aec8ae7 100644
--- a/src/deepwork/mcp/schemas.py
+++ b/src/deepwork/mcp/schemas.py
@@ -51,9 +51,7 @@ class WorkflowStepEntryInfo(BaseModel):
     """Information about a workflow step entry (sequential or concurrent)."""
 
     step_ids: list[str] = Field(description="Step ID(s) in this entry")
-    is_concurrent: bool = Field(
-        default=False, description="True if steps run in parallel"
-    )
+    is_concurrent: bool = Field(default=False, description="True if steps run in parallel")
 
 
 class WorkflowInfo(BaseModel):
@@ -105,9 +103,7 @@ class FinishedStepInput(BaseModel):
 class AbortWorkflowInput(BaseModel):
     """Input for abort_workflow tool."""
 
-    explanation: str = Field(
-        description="Explanation of why the workflow is being aborted"
-    )
+    explanation: str = Field(description="Explanation of why the workflow is being aborted")
 
 
 # =============================================================================
@@ -192,12 +188,8 @@ class FinishedStepResponse(BaseModel):
     )
 
     # For workflow_complete status
-    summary: str | None = Field(
-        default=None, description="Summary of completed workflow"
-    )
-    all_outputs: list[str] | None = Field(
-        default=None, description="All outputs from all steps"
-    )
+    summary: str | None = Field(default=None, description="Summary of completed workflow")
+    all_outputs: list[str] | None = Field(default=None, description="All outputs from all steps")
 
     # Stack info (included in all responses)
     stack: list[StackEntry] = Field(
@@ -208,7 +200,9 @@ class FinishedStepResponse(BaseModel):
 class AbortWorkflowResponse(BaseModel):
     """Response from abort_workflow tool."""
 
-    aborted_workflow: str = Field(description="The workflow that was aborted (job_name/workflow_name)")
+    aborted_workflow: str = Field(
+        description="The workflow that was aborted (job_name/workflow_name)"
+    )
     aborted_step: str = Field(description="The step that was active when aborted")
     explanation: str = Field(description="The explanation provided for aborting")
     stack: list[StackEntry] = Field(
@@ -217,9 +211,7 @@ class AbortWorkflowResponse(BaseModel):
     resumed_workflow: str | None = Field(
         default=None, description="The workflow now active (if any)"
     )
-    resumed_step: str | None = Field(
-        default=None, description="The step now active (if any)"
-    )
+    resumed_step: str | None = Field(default=None, description="The step now active (if any)")
 
 
 # =============================================================================
@@ -232,9 +224,7 @@ class StepProgress(BaseModel):
 
     step_id: str = Field(description="Step identifier")
     started_at: str | None = Field(default=None, description="ISO timestamp when started")
-    completed_at: str | None = Field(
-        default=None, description="ISO timestamp when completed"
-    )
+    completed_at: str | None = Field(default=None, description="ISO timestamp when completed")
     outputs: list[str] = Field(default_factory=list, description="Output files created")
     notes: str | None = Field(default=None, description="Notes from agent")
     quality_attempts: int = Field(default=0, description="Number of quality gate attempts")
@@ -257,9 +247,7 @@ class WorkflowSession(BaseModel):
         default_factory=dict, description="Progress for each step"
     )
     started_at: str = Field(description="ISO timestamp when session started")
-    completed_at: str | None = Field(
-        default=None, description="ISO timestamp when completed"
-    )
+    completed_at: str | None = Field(default=None, description="ISO timestamp when completed")
     status: str = Field(default="active", description="Session status: active, completed, aborted")
     abort_reason: str | None = Field(
         default=None, description="Explanation if workflow was aborted"
diff --git a/src/deepwork/mcp/server.py b/src/deepwork/mcp/server.py
index c14ebd88..89ba6fa4 100644
--- a/src/deepwork/mcp/server.py
+++ b/src/deepwork/mcp/server.py
@@ -124,12 +124,15 @@ async def start_workflow(
         instance_id: str | None = None,
     ) -> dict[str, Any]:
         """Start a workflow and get first step instructions."""
-        _log_tool_call("start_workflow", {
-            "goal": goal,
-            "job_name": job_name,
-            "workflow_name": workflow_name,
-            "instance_id": instance_id,
-        })
+        _log_tool_call(
+            "start_workflow",
+            {
+                "goal": goal,
+                "job_name": job_name,
+                "workflow_name": workflow_name,
+                "instance_id": instance_id,
+            },
+        )
         input_data = StartWorkflowInput(
             goal=goal,
             job_name=job_name,
@@ -158,11 +161,14 @@ async def finished_step(
         quality_review_override_reason: str | None = None,
     ) -> dict[str, Any]:
         """Report step completion and get next instructions."""
-        _log_tool_call("finished_step", {
-            "outputs": outputs,
-            "notes": notes,
-            "quality_review_override_reason": quality_review_override_reason,
-        })
+        _log_tool_call(
+            "finished_step",
+            {
+                "outputs": outputs,
+                "notes": notes,
+                "quality_review_override_reason": quality_review_override_reason,
+            },
+        )
         input_data = FinishedStepInput(
             outputs=outputs,
             notes=notes,
diff --git a/src/deepwork/mcp/state.py b/src/deepwork/mcp/state.py
index a2fb4e41..6aaba1e2 100644
--- a/src/deepwork/mcp/state.py
+++ b/src/deepwork/mcp/state.py
@@ -182,9 +182,7 @@ def require_active_session(self) -> WorkflowSession:
             StateError: If no active session
         """
         if not self._session_stack:
-            raise StateError(
-                "No active workflow session. Use start_workflow to begin a workflow."
-            )
+            raise StateError("No active workflow session. Use start_workflow to begin a workflow.")
         return self._session_stack[-1]
 
     async def start_step(self, step_id: str) -> None:
@@ -302,7 +300,9 @@ async def complete_workflow(self) -> WorkflowSession | None:
             # Return new active session (if any)
             return self._session_stack[-1] if self._session_stack else None
 
-    async def abort_workflow(self, explanation: str) -> tuple[WorkflowSession, WorkflowSession | None]:
+    async def abort_workflow(
+        self, explanation: str
+    ) -> tuple[WorkflowSession, WorkflowSession | None]:
         """Abort the current workflow and pop from stack.
 
         Args:
@@ -404,9 +404,7 @@ async def find_active_sessions_for_workflow(
         return [
             s
             for s in all_sessions
-            if s.job_name == job_name
-            and s.workflow_name == workflow_name
-            and s.status == "active"
+            if s.job_name == job_name and s.workflow_name == workflow_name and s.status == "active"
         ]
 
     async def delete_session(self, session_id: str) -> None:
@@ -421,6 +419,4 @@ async def delete_session(self, session_id: str) -> None:
                 session_file.unlink()
 
             # Remove from stack if present
-            self._session_stack = [
-                s for s in self._session_stack if s.session_id != session_id
-            ]
+            self._session_stack = [s for s in self._session_stack if s.session_id != session_id]
diff --git a/src/deepwork/mcp/tools.py b/src/deepwork/mcp/tools.py
index ae3c8012..0a7275a0 100644
--- a/src/deepwork/mcp/tools.py
+++ b/src/deepwork/mcp/tools.py
@@ -171,9 +171,7 @@ def _get_step_instructions(self, job: JobDefinition, step_id: str) -> str:
 
         instructions_path = job.job_dir / step.instructions_file
         if not instructions_path.exists():
-            raise ToolError(
-                f"Instructions file not found: {step.instructions_file}"
-            )
+            raise ToolError(f"Instructions file not found: {step.instructions_file}")
 
         return instructions_path.read_text(encoding="utf-8")
 
@@ -293,9 +291,7 @@ async def finished_step(self, input_data: FinishedStepInput) -> FinishedStepResp
                     )
 
                 # Return needs_work status
-                failed_criteria = [
-                    cr for cr in result.criteria_results if not cr.passed
-                ]
+                failed_criteria = [cr for cr in result.criteria_results if not cr.passed]
                 return FinishedStepResponse(
                     status=StepStatus.NEEDS_WORK,
                     feedback=result.feedback,
diff --git a/tests/e2e/test_claude_code_integration.py b/tests/e2e/test_claude_code_integration.py
index 54a5597d..802ee30f 100644
--- a/tests/e2e/test_claude_code_integration.py
+++ b/tests/e2e/test_claude_code_integration.py
@@ -19,7 +19,6 @@
 
 from deepwork.core.adapters import ClaudeAdapter
 from deepwork.core.generator import SkillGenerator
-from deepwork.core.parser import parse_job_definition
 from deepwork.mcp.state import StateManager
 from deepwork.mcp.tools import WorkflowTools
 
@@ -269,7 +268,7 @@ async def test_workflow_step_progression(self, project_with_job: Path) -> None:
             job_name="fruits",
             workflow_name=workflow_name,
         )
-        start_response = await tools.start_workflow(start_input)
+        await tools.start_workflow(start_input)
 
         # Create mock output file for first step
         output_file = project_with_job / "identified_fruits.md"
diff --git a/tests/fixtures/mock_review_agent.py b/tests/fixtures/mock_review_agent.py
index 22cc4591..48130ca1 100755
--- a/tests/fixtures/mock_review_agent.py
+++ b/tests/fixtures/mock_review_agent.py
@@ -48,9 +48,7 @@ def main() -> int:
         response = {
             "passed": True,
             "feedback": "All criteria met",
-            "criteria_results": [
-                {"criterion": "Criterion 1", "passed": True, "feedback": None}
-            ],
+            "criteria_results": [{"criterion": "Criterion 1", "passed": True, "feedback": None}],
         }
         print(json.dumps(response))
         return 0
@@ -102,42 +100,52 @@ def main() -> int:
 
     # Check if outputs contain expected patterns
     if "File not found" in prompt:
-        criteria_results.append({
-            "criterion": "Output files must exist",
-            "passed": False,
-            "feedback": "One or more output files were not found",
-        })
+        criteria_results.append(
+            {
+                "criterion": "Output files must exist",
+                "passed": False,
+                "feedback": "One or more output files were not found",
+            }
+        )
         all_passed = False
     elif "Test content" in prompt or "output.md" in prompt:
-        criteria_results.append({
-            "criterion": "Output files must exist",
-            "passed": True,
-            "feedback": None,
-        })
+        criteria_results.append(
+            {
+                "criterion": "Output files must exist",
+                "passed": True,
+                "feedback": None,
+            }
+        )
 
     # Look for "must contain" type criteria
     if "must contain" in prompt.lower():
         if "expected content" in prompt.lower():
-            criteria_results.append({
-                "criterion": "Output must contain expected content",
-                "passed": True,
-                "feedback": None,
-            })
+            criteria_results.append(
+                {
+                    "criterion": "Output must contain expected content",
+                    "passed": True,
+                    "feedback": None,
+                }
+            )
         else:
-            criteria_results.append({
-                "criterion": "Output must contain expected content",
-                "passed": False,
-                "feedback": "Expected content not found in output",
-            })
+            criteria_results.append(
+                {
+                    "criterion": "Output must contain expected content",
+                    "passed": False,
+                    "feedback": "Expected content not found in output",
+                }
+            )
             all_passed = False
 
     if not criteria_results:
         # If no specific criteria matched, default based on whether outputs exist
-        criteria_results.append({
-            "criterion": "General quality check",
-            "passed": True,
-            "feedback": None,
-        })
+        criteria_results.append(
+            {
+                "criterion": "General quality check",
+                "passed": True,
+                "feedback": None,
+            }
+        )
 
     response = {
         "passed": all_passed,
diff --git a/tests/integration/test_install_flow.py b/tests/integration/test_install_flow.py
index ec66cfd8..17af3818 100644
--- a/tests/integration/test_install_flow.py
+++ b/tests/integration/test_install_flow.py
@@ -168,6 +168,7 @@ def test_install_is_idempotent(self, mock_claude_project: Path) -> None:
         # MCP entry point skill
         assert (claude_dir / "deepwork" / "SKILL.md").exists()
 
+
 class TestCLIEntryPoint:
     """Tests for CLI entry point."""
 
diff --git a/tests/integration/test_quality_gate_integration.py b/tests/integration/test_quality_gate_integration.py
index 888d33d3..c1d56c6e 100644
--- a/tests/integration/test_quality_gate_integration.py
+++ b/tests/integration/test_quality_gate_integration.py
@@ -14,7 +14,6 @@
 
 from deepwork.mcp.quality_gate import QualityGate, QualityGateError
 
-
 # Path to our mock review agent script
 MOCK_AGENT_PATH = Path(__file__).parent.parent / "fixtures" / "mock_review_agent.py"
 
@@ -133,9 +132,7 @@ async def test_subprocess_nonzero_exit_raises_error(
             else:
                 os.environ.pop("REVIEW_RESULT", None)
 
-    async def test_subprocess_timeout(
-        self, project_root: Path, mock_agent_command: str
-    ) -> None:
+    async def test_subprocess_timeout(self, project_root: Path, mock_agent_command: str) -> None:
         """Test that subprocess timeout is handled correctly."""
         gate = QualityGate(command=mock_agent_command, timeout=1)  # 1 second timeout
 
@@ -249,7 +246,7 @@ def test_parse_json_in_code_block(self) -> None:
         """Test parsing JSON wrapped in markdown code block."""
         gate = QualityGate()
 
-        response = '''Here's my evaluation:
+        response = """Here's my evaluation:
 
 ```json
 {
@@ -261,7 +258,7 @@ def test_parse_json_in_code_block(self) -> None:
 }
 ```
 
-Hope that helps!'''
+Hope that helps!"""
 
         result = gate._parse_response(response)
 
@@ -272,13 +269,13 @@ def test_parse_json_in_plain_code_block(self) -> None:
         """Test parsing JSON in plain code block (no json tag)."""
         gate = QualityGate()
 
-        response = '''```
+        response = """```
 {
     "passed": false,
     "feedback": "Issues found",
     "criteria_results": []
 }
-```'''
+```"""
 
         result = gate._parse_response(response)
 
@@ -318,10 +315,8 @@ def test_parse_non_boolean_passed_field_raises_error(self) -> None:
             ('{"passed": null, "feedback": "test", "criteria_results": []}', "null"),
         ]
 
-        for response, case_name in test_cases:
-            with pytest.raises(
-                QualityGateError, match="failed schema validation"
-            ):
+        for response, _case_name in test_cases:
+            with pytest.raises(QualityGateError, match="failed schema validation"):
                 gate._parse_response(response)
 
     def test_parse_without_schema_validation_is_lenient(self) -> None:
@@ -340,7 +335,7 @@ def test_parse_criteria_results_structure(self) -> None:
         """Test that criteria results are properly parsed."""
         gate = QualityGate()
 
-        response = '''```json
+        response = """```json
 {
     "passed": false,
     "feedback": "Two criteria failed",
@@ -350,7 +345,7 @@ def test_parse_criteria_results_structure(self) -> None:
         {"criterion": "Third check", "passed": false, "feedback": "Wrong format"}
     ]
 }
-```'''
+```"""
 
         result = gate._parse_response(response)
 
@@ -382,7 +377,7 @@ def test_valid_response_passes_schema(self) -> None:
         """Test that valid response passes schema validation."""
         gate = QualityGate()
 
-        response = '''```json
+        response = """```json
 {
     "passed": true,
     "feedback": "All criteria met",
@@ -391,7 +386,7 @@ def test_valid_response_passes_schema(self) -> None:
         {"criterion": "Test 2", "passed": true}
     ]
 }
-```'''
+```"""
 
         result = gate._parse_response(response)
 
@@ -423,9 +418,9 @@ def test_missing_criterion_in_results_raises_error(self) -> None:
         gate = QualityGate()
 
         # criteria_results item missing required 'criterion' field
-        response = '''{"passed": true, "feedback": "test", "criteria_results": [
+        response = """{"passed": true, "feedback": "test", "criteria_results": [
             {"passed": true, "feedback": null}
-        ]}'''
+        ]}"""
 
         with pytest.raises(QualityGateError, match="failed schema validation"):
             gate._parse_response(response)
@@ -461,9 +456,7 @@ async def test_empty_quality_criteria_auto_passes(self, project_root: Path) -> N
         assert result.passed is True
         assert "auto-passing" in result.feedback.lower()
 
-    async def test_multiple_output_files(
-        self, project_root: Path, mock_agent_command: str
-    ) -> None:
+    async def test_multiple_output_files(self, project_root: Path, mock_agent_command: str) -> None:
         """Test evaluation with multiple output files."""
         gate = QualityGate(command=mock_agent_command, timeout=30)
 
@@ -489,9 +482,7 @@ async def test_multiple_output_files(
             else:
                 os.environ.pop("REVIEW_RESULT", None)
 
-    async def test_large_output_file(
-        self, project_root: Path, mock_agent_command: str
-    ) -> None:
+    async def test_large_output_file(self, project_root: Path, mock_agent_command: str) -> None:
         """Test evaluation with a large output file."""
         gate = QualityGate(command=mock_agent_command, timeout=30)
 
@@ -516,9 +507,7 @@ async def test_large_output_file(
             else:
                 os.environ.pop("REVIEW_RESULT", None)
 
-    async def test_unicode_in_output(
-        self, project_root: Path, mock_agent_command: str
-    ) -> None:
+    async def test_unicode_in_output(self, project_root: Path, mock_agent_command: str) -> None:
         """Test evaluation with unicode content."""
         gate = QualityGate(command=mock_agent_command, timeout=30)
 
diff --git a/tests/unit/mcp/test_async_interface.py b/tests/unit/mcp/test_async_interface.py
index 91b511fd..eae89ce7 100644
--- a/tests/unit/mcp/test_async_interface.py
+++ b/tests/unit/mcp/test_async_interface.py
@@ -9,8 +9,6 @@
 import inspect
 from pathlib import Path
 
-import pytest
-
 from deepwork.mcp.quality_gate import MockQualityGate, QualityGate
 from deepwork.mcp.state import StateManager
 from deepwork.mcp.tools import WorkflowTools
@@ -91,14 +89,12 @@ def test_quality_gate_async_methods(self) -> None:
 
     def test_mock_quality_gate_async_methods(self) -> None:
         """Verify MockQualityGate maintains async interface."""
-        method = getattr(MockQualityGate, "evaluate")
+        method = MockQualityGate.evaluate
         assert inspect.iscoroutinefunction(method), (
             "MockQualityGate.evaluate must be async to match QualityGate interface"
         )
 
-    async def test_concurrent_state_operations_are_serialized(
-        self, tmp_path: Path
-    ) -> None:
+    async def test_concurrent_state_operations_are_serialized(self, tmp_path: Path) -> None:
         """Test that concurrent state operations don't corrupt state.
 
         This test verifies that the async lock properly serializes access
@@ -111,7 +107,7 @@ async def test_concurrent_state_operations_are_serialized(
         manager = StateManager(tmp_path)
 
         # Create initial session
-        session = await manager.create_session(
+        await manager.create_session(
             job_name="test_job",
             workflow_name="main",
             goal="Test goal",
diff --git a/tests/unit/mcp/test_schemas.py b/tests/unit/mcp/test_schemas.py
index a900ea0d..5259d284 100644
--- a/tests/unit/mcp/test_schemas.py
+++ b/tests/unit/mcp/test_schemas.py
@@ -1,6 +1,5 @@
 """Tests for MCP schemas."""
 
-
 from deepwork.mcp.schemas import (
     ActiveStepInfo,
     FinishedStepInput,
diff --git a/tests/unit/mcp/test_tools.py b/tests/unit/mcp/test_tools.py
index 822fce81..be0a69f3 100644
--- a/tests/unit/mcp/test_tools.py
+++ b/tests/unit/mcp/test_tools.py
@@ -245,9 +245,7 @@ async def test_finished_step_with_quality_gate_pass(
 
         # Create output and finish step
         (project_root / "output1.md").write_text("Valid output")
-        response = await tools_with_quality.finished_step(
-            FinishedStepInput(outputs=["output1.md"])
-        )
+        response = await tools_with_quality.finished_step(FinishedStepInput(outputs=["output1.md"]))
 
         # Should advance to next step
         assert response.status == StepStatus.NEXT_STEP

From 1b17ccb0035b8a2a5244d6085277b735f28a118b Mon Sep 17 00:00:00 2001
From: Noah Horton <noah@unsupervised.com>
Date: Wed, 4 Feb 2026 15:02:10 -0700
Subject: [PATCH 15/45] Migrate to uv2nix for reproducible Python builds

Replace flake-utils with uv2nix/pyproject-nix for proper Python
dependency management in Nix. This provides hermetic builds directly
from uv.lock and supports editable installs for development.

Key changes:
- Use uv2nix to generate Python package set from uv.lock
- Add pyproject-build-systems for build dependency resolution
- Add editables to build-system requires (needed by hatchling for
  editable wheel builds)
- Remove .venv management from shell hook (Nix handles it now)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
---
 flake.lock     |  94 ++++++++++++++------
 flake.nix      | 235 +++++++++++++++++++++++++++----------------------
 pyproject.toml |   9 +-
 uv.lock        |  14 +++
 4 files changed, 218 insertions(+), 134 deletions(-)

diff --git a/flake.lock b/flake.lock
index 55ce8f32..9da4cc29 100644
--- a/flake.lock
+++ b/flake.lock
@@ -1,23 +1,5 @@
 {
   "nodes": {
-    "flake-utils": {
-      "inputs": {
-        "systems": "systems"
-      },
-      "locked": {
-        "lastModified": 1731533236,
-        "narHash": "sha256-l0KFg5HjrsfsO/JpG+r7fRrqm12kzFHyUHqHCVpMMbI=",
-        "owner": "numtide",
-        "repo": "flake-utils",
-        "rev": "11707dc2f618dd54ca8739b309ec4fc024de578b",
-        "type": "github"
-      },
-      "original": {
-        "owner": "numtide",
-        "repo": "flake-utils",
-        "type": "github"
-      }
-    },
     "nixpkgs": {
       "locked": {
         "lastModified": 1770181073,
@@ -34,24 +16,80 @@
         "type": "github"
       }
     },
+    "pyproject-build-systems": {
+      "inputs": {
+        "nixpkgs": [
+          "nixpkgs"
+        ],
+        "pyproject-nix": [
+          "pyproject-nix"
+        ],
+        "uv2nix": [
+          "uv2nix"
+        ]
+      },
+      "locked": {
+        "lastModified": 1763662255,
+        "narHash": "sha256-4bocaOyLa3AfiS8KrWjZQYu+IAta05u3gYZzZ6zXbT0=",
+        "owner": "pyproject-nix",
+        "repo": "build-system-pkgs",
+        "rev": "042904167604c681a090c07eb6967b4dd4dae88c",
+        "type": "github"
+      },
+      "original": {
+        "owner": "pyproject-nix",
+        "repo": "build-system-pkgs",
+        "type": "github"
+      }
+    },
+    "pyproject-nix": {
+      "inputs": {
+        "nixpkgs": [
+          "nixpkgs"
+        ]
+      },
+      "locked": {
+        "lastModified": 1769936401,
+        "narHash": "sha256-kwCOegKLZJM9v/e/7cqwg1p/YjjTAukKPqmxKnAZRgA=",
+        "owner": "pyproject-nix",
+        "repo": "pyproject.nix",
+        "rev": "b0d513eeeebed6d45b4f2e874f9afba2021f7812",
+        "type": "github"
+      },
+      "original": {
+        "owner": "pyproject-nix",
+        "repo": "pyproject.nix",
+        "type": "github"
+      }
+    },
     "root": {
       "inputs": {
-        "flake-utils": "flake-utils",
-        "nixpkgs": "nixpkgs"
+        "nixpkgs": "nixpkgs",
+        "pyproject-build-systems": "pyproject-build-systems",
+        "pyproject-nix": "pyproject-nix",
+        "uv2nix": "uv2nix"
       }
     },
-    "systems": {
+    "uv2nix": {
+      "inputs": {
+        "nixpkgs": [
+          "nixpkgs"
+        ],
+        "pyproject-nix": [
+          "pyproject-nix"
+        ]
+      },
       "locked": {
-        "lastModified": 1681028828,
-        "narHash": "sha256-Vy1rq5AaRuLzOxct8nz4T6wlgyUR7zLU309k9mBC768=",
-        "owner": "nix-systems",
-        "repo": "default",
-        "rev": "da67096a3b9bf56a91d16901293e51ba5b49a27e",
+        "lastModified": 1769957392,
+        "narHash": "sha256-6PkqwwYf5K2CHi2V+faI/9pqjfz/HxUkI/MVid6hlOY=",
+        "owner": "pyproject-nix",
+        "repo": "uv2nix",
+        "rev": "d18bc50ae1c3d4be9c41c2d94ea765524400af75",
         "type": "github"
       },
       "original": {
-        "owner": "nix-systems",
-        "repo": "default",
+        "owner": "pyproject-nix",
+        "repo": "uv2nix",
         "type": "github"
       }
     }
diff --git a/flake.nix b/flake.nix
index d2218afb..c2740cf4 100644
--- a/flake.nix
+++ b/flake.nix
@@ -3,117 +3,142 @@
 
   inputs = {
     nixpkgs.url = "github:NixOS/nixpkgs/nixos-unstable";
-    flake-utils.url = "github:numtide/flake-utils";
+
+    pyproject-nix = {
+      url = "github:pyproject-nix/pyproject.nix";
+      inputs.nixpkgs.follows = "nixpkgs";
+    };
+
+    uv2nix = {
+      url = "github:pyproject-nix/uv2nix";
+      inputs.pyproject-nix.follows = "pyproject-nix";
+      inputs.nixpkgs.follows = "nixpkgs";
+    };
+
+    pyproject-build-systems = {
+      url = "github:pyproject-nix/build-system-pkgs";
+      inputs.pyproject-nix.follows = "pyproject-nix";
+      inputs.uv2nix.follows = "uv2nix";
+      inputs.nixpkgs.follows = "nixpkgs";
+    };
   };
 
-  outputs = { self, nixpkgs, flake-utils }:
-    flake-utils.lib.eachDefaultSystem (system:
-      let
-        pkgs = import nixpkgs {
-          inherit system;
-          # Allow unfree packages to support the Business Source License 1.1
-          config.allowUnfree = true;
-        };
-        # Local claude-code package for version control (update via nix/claude-code/update.sh)
-        claude-code = pkgs.callPackage ./nix/claude-code/package.nix { };
-        # Read version from pyproject.toml to avoid duplication
-        pyproject = builtins.fromTOML (builtins.readFile ./pyproject.toml);
-        deepwork = pkgs.python311Packages.buildPythonPackage {
-          pname = "deepwork";
-          version = pyproject.project.version;
-          src = ./.;
-          format = "pyproject";
-          nativeBuildInputs = [ pkgs.python311Packages.hatchling ];
-          # Required for `nix build` - must match pyproject.toml dependencies
-          propagatedBuildInputs = with pkgs.python311Packages; [
-            click gitpython jinja2 jsonschema pyyaml rich rpds-py
-          ];
-          doCheck = false;
-        };
-      in
-      {
-        devShells.default = pkgs.mkShell {
-          buildInputs = with pkgs; [
-            # Python 3.11 - base interpreter for uv
-            python311
-
-            # uv manages all Python packages (deps, dev tools, etc.)
-            uv
-
-            # Git for version control
-            git
-
-            # System tools
-            jq  # For JSON processing
-
-            # CLI tools (claude-code is locally built, see nix/claude-code/)
-            claude-code
-            gh           # GitHub CLI
-          ];
-
-          # Environment variables for uv integration with Nix
-          env = {
-            # Tell uv to use the Nix-provided Python interpreter
-            UV_PYTHON = "${pkgs.python311}/bin/python";
-            # Prevent uv from downloading Python binaries
-            UV_PYTHON_DOWNLOADS = "never";
-            # Development mode flag
-            DEEPWORK_DEV = "1";
+  outputs = { self, nixpkgs, pyproject-nix, uv2nix, pyproject-build-systems, ... }:
+    let
+      inherit (nixpkgs) lib;
+
+      # Systems to support
+      forAllSystems = lib.genAttrs [ "x86_64-linux" "aarch64-linux" "x86_64-darwin" "aarch64-darwin" ];
+
+      # Load the uv workspace from uv.lock
+      workspace = uv2nix.lib.workspace.loadWorkspace { workspaceRoot = ./.; };
+
+      # Create overlay from uv.lock - prefer wheels for faster builds
+      overlay = workspace.mkPyprojectOverlay { sourcePreference = "wheel"; };
+
+      # Editable overlay for development (live-reload from src/)
+      editableOverlay = workspace.mkEditablePyprojectOverlay { root = "$REPO_ROOT"; };
+
+      # Build Python package sets for each system
+      pythonSets = forAllSystems (system:
+        let
+          pkgs = import nixpkgs {
+            inherit system;
+            config.allowUnfree = true;
           };
+          python = pkgs.python311;
+        in
+        (pkgs.callPackage pyproject-nix.build.packages { inherit python; }).overrideScope
+          (lib.composeManyExtensions [
+            pyproject-build-systems.overlays.default
+            overlay
+          ])
+      );
 
-          shellHook = ''
-            # Create venv if it doesn't exist
-            if [ ! -d .venv ]; then
-              echo "Creating virtual environment..."
-              uv venv .venv --quiet
-            fi
-
-            # Sync dependencies (including dev extras like pytest, ruff, mypy)
-            # Run quietly - uv only outputs when changes are needed
-            uv sync --all-extras --quiet 2>/dev/null || uv sync --all-extras
-
-            # Activate venv by setting environment variables directly
-            # This works reliably for both interactive shells and `nix develop --command`
-            export VIRTUAL_ENV="$PWD/.venv"
-            export PATH="$VIRTUAL_ENV/bin:$PATH"
-            unset PYTHONHOME
-
-            # Set PYTHONPATH for editable install access to src/
-            export PYTHONPATH="$PWD/src:$PYTHONPATH"
-
-            # Add nix/ scripts to PATH (for 'update' command)
-            export PATH="$PWD/nix:$PATH"
-
-            # Only show welcome message in interactive shells
-            if [[ $- == *i* ]]; then
-              echo ""
-              echo "DeepWork Development Environment"
-              echo "================================"
-              echo ""
-              echo "Python: $(python --version) | uv: $(uv --version)"
-              echo ""
-              echo "Commands:"
-              echo "  deepwork --help    CLI (development version)"
-              echo "  pytest             Run tests"
-              echo "  ruff check src/    Lint code"
-              echo "  mypy src/          Type check"
-              echo "  claude-code        Claude Code CLI"
-              echo "  gh                 GitHub CLI"
-              echo "  update             Update claude-code and flake inputs"
-              echo ""
-            fi
-          '';
-        };
+    in
+    {
+      devShells = forAllSystems (system:
+        let
+          pkgs = import nixpkgs {
+            inherit system;
+            config.allowUnfree = true;
+          };
+
+          # Local claude-code package (update via nix/claude-code/update.sh)
+          claude-code = pkgs.callPackage ./nix/claude-code/package.nix { };
+
+          # Python set with editable overlay for development
+          pythonSet = pythonSets.${system}.overrideScope editableOverlay;
+
+          # Virtual environment with all dependencies (including dev extras)
+          virtualenv = pythonSet.mkVirtualEnv "deepwork-dev-env" workspace.deps.all;
+        in
+        {
+          default = pkgs.mkShell {
+            packages = [
+              virtualenv
+              pkgs.uv
+              pkgs.git
+              pkgs.jq
+              claude-code
+              pkgs.gh
+            ];
+
+            env = {
+              # Prevent uv from managing packages (Nix handles it)
+              UV_NO_SYNC = "1";
+              UV_PYTHON = "${pythonSet.python}/bin/python";
+              UV_PYTHON_DOWNLOADS = "never";
+              DEEPWORK_DEV = "1";
+            };
+
+            shellHook = ''
+              # Required for editable overlay
+              unset PYTHONPATH
+              export REPO_ROOT=$(git rev-parse --show-toplevel)
+
+              # Add nix/ scripts to PATH (for 'update' command)
+              export PATH="$PWD/nix:$PATH"
+
+              # Only show welcome message in interactive shells
+              if [[ $- == *i* ]]; then
+                echo ""
+                echo "DeepWork Development Environment (uv2nix)"
+                echo "=========================================="
+                echo ""
+                echo "Python: $(python --version) | uv: $(uv --version)"
+                echo ""
+                echo "Commands:"
+                echo "  deepwork --help    CLI (development version)"
+                echo "  pytest             Run tests"
+                echo "  ruff check src/    Lint code"
+                echo "  mypy src/          Type check"
+                echo "  claude-code        Claude Code CLI"
+                echo "  gh                 GitHub CLI"
+                echo "  update             Update claude-code and flake inputs"
+                echo ""
+              fi
+            '';
+          };
+        }
+      );
 
-        # Make the package available as a flake output
-        packages.default = deepwork;
-        packages.deepwork = deepwork;
+      # Package output - virtual environment with default deps only
+      packages = forAllSystems (system:
+        let
+          pkg = pythonSets.${system}.mkVirtualEnv "deepwork-env" workspace.deps.default;
+        in {
+          default = pkg;
+          deepwork = pkg;  # Alias for backwards compatibility
+        }
+      );
 
-        # Make deepwork runnable with 'nix run'
-        apps.default = {
+      # Make deepwork runnable with 'nix run'
+      apps = forAllSystems (system: {
+        default = {
           type = "app";
-          program = "${deepwork}/bin/deepwork";
+          program = "${self.packages.${system}.default}/bin/deepwork";
         };
-      }
-    );
+      });
+    };
 }
diff --git a/pyproject.toml b/pyproject.toml
index c94e2c6d..bbf974b7 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -54,7 +54,7 @@ Repository = "https://github.com/deepwork/deepwork"
 Issues = "https://github.com/deepwork/deepwork/issues"
 
 [build-system]
-requires = ["hatchling"]
+requires = ["hatchling", "editables"]
 build-backend = "hatchling.build"
 
 [tool.hatch.build.targets.wheel]
@@ -116,3 +116,10 @@ warn_redundant_casts = true
 warn_unused_ignores = true
 warn_no_return = true
 strict_equality = true
+
+[dependency-groups]
+dev = [
+    "pytest>=9.0.2",
+    "pytest-asyncio>=1.3.0",
+    "pytest-mock>=3.15.1",
+]
diff --git a/uv.lock b/uv.lock
index 49d37635..d1755b0c 100644
--- a/uv.lock
+++ b/uv.lock
@@ -480,6 +480,13 @@ dev = [
     { name = "types-pyyaml" },
 ]
 
+[package.dev-dependencies]
+dev = [
+    { name = "pytest" },
+    { name = "pytest-asyncio" },
+    { name = "pytest-mock" },
+]
+
 [package.metadata]
 requires-dist = [
     { name = "aiofiles", specifier = ">=24.0.0" },
@@ -503,6 +510,13 @@ requires-dist = [
 ]
 provides-extras = ["dev"]
 
+[package.metadata.requires-dev]
+dev = [
+    { name = "pytest", specifier = ">=9.0.2" },
+    { name = "pytest-asyncio", specifier = ">=1.3.0" },
+    { name = "pytest-mock", specifier = ">=3.15.1" },
+]
+
 [[package]]
 name = "diskcache"
 version = "5.6.3"

From 3d9c5d68d8bc991bec00c7859478fc54795e3d11 Mon Sep 17 00:00:00 2001
From: Noah Horton <noah@unsupervised.com>
Date: Wed, 4 Feb 2026 16:00:14 -0700
Subject: [PATCH 16/45] Fix quality gate wrapper parsing and repair job
 definitions

- Fix quality_gate.py to handle Claude CLI --output-format json wrapper
  objects by extracting the 'result' field before parsing
- Add tests for wrapper object handling with strong comments explaining
  the mock design
- Remove deprecated 'exposed' field from learn step in deepwork_jobs
- Add 'learn' workflow to make orphaned step accessible via MCP
- Add 'update' workflow to update job for MCP compatibility
- Migrate stop_hooks to quality_criteria in update job
- Clean up settings.json by removing obsolete Skill permissions

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
---
 .claude/settings.json                         |  13 +-
 .deepwork/jobs/deepwork_jobs/job.yml          |  15 +-
 .deepwork/jobs/deepwork_jobs/steps/errata.md  |  77 ++++--
 .../jobs/deepwork_jobs/steps/fix_jobs.md      |  53 +++-
 .deepwork/jobs/update/job.yml                 |  23 +-
 src/deepwork/cli/install.py                   |  28 +-
 src/deepwork/cli/sync.py                      | 134 ++++++----
 src/deepwork/mcp/quality_gate.py              |  51 ++--
 .../standard_jobs/deepwork_jobs/job.yml       |  15 +-
 .../deepwork_jobs/steps/errata.md             |  77 ++++--
 .../deepwork_jobs/steps/fix_jobs.md           |  53 +++-
 src/deepwork/templates/claude/settings.json   |   6 +-
 tests/integration/test_install_flow.py        |  31 +++
 tests/unit/mcp/test_quality_gate.py           | 251 ++++++++++++++++--
 14 files changed, 639 insertions(+), 188 deletions(-)

diff --git a/.claude/settings.json b/.claude/settings.json
index d84958d8..962bc968 100644
--- a/.claude/settings.json
+++ b/.claude/settings.json
@@ -103,18 +103,11 @@
       "Skill(commit.test)",
       "Skill(commit.lint)",
       "Skill(commit.commit_and_push)",
-      "Skill(deepwork_jobs)",
-      "Skill(deepwork_jobs.define)",
-      "Skill(deepwork_jobs.review_job_spec)",
-      "Skill(deepwork_jobs.implement)",
-      "Skill(deepwork_jobs.learn)",
       "Skill(add_platform)",
       "Skill(add_platform.research)",
       "Skill(add_platform.add_capabilities)",
       "Skill(add_platform.implement)",
       "Skill(add_platform.verify)",
-      "Skill(update)",
-      "Skill(update.job)",
       "Read(./.deepwork/**)",
       "Edit(./.deepwork/**)",
       "Write(./.deepwork/**)",
@@ -122,7 +115,11 @@
       "Bash(.claude/hooks/commit_job_git_commit.sh:*)",
       "Bash(./.deepwork/jobs/deepwork_jobs/make_new_job.sh:*)",
       "WebSearch",
-      "Skill(deepwork)"
+      "Skill(deepwork)",
+      "mcp__deepwork__get_workflows",
+      "mcp__deepwork__start_workflow",
+      "mcp__deepwork__finished_step",
+      "mcp__deepwork__abort_workflow"
     ]
   },
   "hooks": {
diff --git a/.deepwork/jobs/deepwork_jobs/job.yml b/.deepwork/jobs/deepwork_jobs/job.yml
index 5acfd3d0..4a8b26e5 100644
--- a/.deepwork/jobs/deepwork_jobs/job.yml
+++ b/.deepwork/jobs/deepwork_jobs/job.yml
@@ -1,6 +1,6 @@
 # yaml-language-server: $schema=.deepwork/schemas/job.schema.json
 name: deepwork_jobs
-version: "1.2.0"
+version: "1.2.1"
 summary: "Creates and manages multi-step AI workflows. Use when defining, implementing, testing, or improving DeepWork jobs."
 description: |
   Core commands for managing DeepWork jobs. These commands help you define new multi-step
@@ -32,7 +32,14 @@ workflows:
       - fix_jobs
       - errata
 
+  - name: learn
+    summary: "Analyze conversation history to improve job instructions and capture learnings"
+    steps:
+      - learn
+
 changelog:
+  - version: "1.2.1"
+    changes: "Removed deprecated exposed field from learn step; added learn workflow to make step accessible via MCP"
   - version: "1.2.0"
     changes: "Added repair workflow with fix_settings, fix_jobs, and errata steps for migrating old DeepWork configurations to current format"
   - version: "1.1.0"
@@ -144,7 +151,6 @@ steps:
     name: "Learn from Job Execution"
     description: "Analyzes conversation history to improve job instructions and capture learnings. Use after running a job to refine it."
     instructions_file: steps/learn.md
-    exposed: true
     inputs:
       - name: job_name
         description: "Name of the job that was run (optional - will auto-detect from conversation)"
@@ -198,7 +204,7 @@ steps:
       - "**Exposed Field Addressed**: Are `exposed: true` fields removed or noted as deprecated?"
       - "**Stop Hooks Migrated**: Are `stop_hooks` migrated to `hooks.after_agent` format?"
       - "**Removed Steps Cleaned**: Are references to removed steps (like `review_job_spec`) updated?"
-      - "**Orphaned Steps Fixed**: Are steps not in any workflow either added to workflows or removed?"
+      - "**Orphaned Steps Fixed**: For jobs with no workflows, is there a single workflow (named after the job) containing all steps? For jobs with existing workflows, does each orphan get its own workflow (named after the step)?"
       - "**Valid YAML**: Do all job.yml files pass schema validation?"
       - "**Sync Complete**: Has `deepwork sync` been run to regenerate commands?"
 
@@ -215,7 +221,8 @@ steps:
       - fix_settings
       - fix_jobs
     quality_criteria:
-      - "**Old Skills Folder Handled**: Is `.claude/skills/` folder removed or backed up?"
+      - "**Legacy Job Skills Removed**: Are legacy skill folders for each job removed from `.claude/skills/` and `.gemini/skills/`?"
+      - "**Deepwork Skill Preserved**: Does the `deepwork` skill folder still exist in `.claude/skills/deepwork/`?"
       - "**Temp Files Cleaned**: Are `.deepwork/tmp/` contents cleaned appropriately?"
       - "**Rules Folder Removed**: Is `.deepwork/rules/` folder backed up and removed (fully deprecated)?"
       - "**Rules Job Removed**: Is `.deepwork/jobs/deepwork_rules/` removed if present?"
diff --git a/.deepwork/jobs/deepwork_jobs/steps/errata.md b/.deepwork/jobs/deepwork_jobs/steps/errata.md
index 30ee7e8a..d4be7be3 100644
--- a/.deepwork/jobs/deepwork_jobs/steps/errata.md
+++ b/.deepwork/jobs/deepwork_jobs/steps/errata.md
@@ -8,31 +8,61 @@ Remove obsolete files and folders from prior DeepWork versions. This final step
 
 Identify and clean up deprecated files and folders, then create a comprehensive summary document.
 
-### Step 1: Handle Old Skills Folder
+### Step 1: Remove Legacy Job Skill Folders
 
-Check if `.claude/skills/` exists. This folder was used by the old skill-based system and is no longer needed.
+Old DeepWork versions created individual skill folders for each job and step. These need to be removed while preserving the main `deepwork` skill folder.
 
-```bash
-ls -la .claude/skills/ 2>/dev/null || echo "No skills folder (good!)"
-```
+**Process:**
+
+1. **List all jobs** in `.deepwork/jobs/`:
+   ```bash
+   ls .deepwork/jobs/
+   ```
+
+2. **For each job**, kick off a sub-agent to find and remove legacy skill folders. The sub-agent should:
+   - Search in both `.claude/skills/` and `.gemini/skills/`
+   - Find folders matching:
+     - `{job_name}/` - folder named exactly like the job
+     - `{job_name}.*/` - folders starting with the job name followed by a period (e.g., `my_job.step1/`, `my_job.step2/`)
+   - Remove each matching folder
+   - Report what was removed
 
-**If it exists:**
-1. Count the contents: `ls .claude/skills/ | wc -l`
-2. Ask the user whether to:
-   - **Delete** the folder entirely (recommended if migrated to MCP)
-   - **Back up** to `.claude/skills.backup/` before deleting
-   - **Keep** if they have custom skills not yet migrated
+   **Example commands for a job named `competitive_research`:**
+   ```bash
+   # Find and remove from .claude/skills/
+   rm -rf .claude/skills/competitive_research/ 2>/dev/null
+   rm -rf .claude/skills/competitive_research.*/ 2>/dev/null
 
-**Old skill structure to recognize:**
+   # Find and remove from .gemini/skills/
+   rm -rf .gemini/skills/competitive_research/ 2>/dev/null
+   rm -rf .gemini/skills/competitive_research.*/ 2>/dev/null
+   ```
+
+3. **Run sub-agents in parallel** - one for each job to speed up the process.
+
+4. **Verify the `deepwork` skill folder remains:**
+   ```bash
+   ls -d .claude/skills/deepwork/ 2>/dev/null || echo "ERROR: deepwork skill missing!"
+   ls -d .gemini/skills/deepwork/ 2>/dev/null || echo "WARNING: gemini deepwork skill missing (may not have been installed)"
+   ```
+
+   **CRITICAL:** The `deepwork` skill folder in `.claude/skills/deepwork/` MUST still exist after cleanup. If it is missing, something went wrong - do NOT proceed and investigate what happened.
+
+**What this removes:**
 ```
 .claude/skills/
-├── job_name/
-│   └── SKILL.md
-├── job_name.step_name/
-│   └── SKILL.md
-└── ...
+├── competitive_research/     <- REMOVE (legacy job folder)
+├── competitive_research.discover/  <- REMOVE (legacy step folder)
+├── competitive_research.analyze/   <- REMOVE (legacy step folder)
+├── deepwork/                 <- KEEP (current MCP entry point)
+└── some_other_job/           <- REMOVE (legacy job folder)
 ```
 
+**Do NOT remove:**
+- `.claude/skills/deepwork/` - This is the current MCP-based skill entry point
+- `.gemini/skills/deepwork/` - Same for Gemini
+- Any skill folders that don't match job names in `.deepwork/jobs/`
+
 ### Step 2: Clean Temp Files
 
 Check `.deepwork/tmp/` for accumulated temporary files:
@@ -166,7 +196,8 @@ Create a `repair_summary.md` file documenting all changes made during this workf
 
 ## Errata Cleanup (errata step)
 
-- [ ] Handled `.claude/skills/` folder: [deleted/backed up/kept]
+- [ ] Removed legacy job skill folders from `.claude/skills/` and `.gemini/skills/`
+- [ ] Verified `deepwork` skill folder still exists
 - [ ] Cleaned `.deepwork/tmp/`: removed X files
 - [ ] Reviewed `.deepwork/rules/`: [action taken]
 - [ ] Updated `.deepwork/config.yml` version format
@@ -187,7 +218,8 @@ Create a `repair_summary.md` file documenting all changes made during this workf
 
 ## Quality Criteria
 
-- `.claude/skills/` folder is handled (removed, backed up, or documented why kept)
+- Legacy job skill folders are removed from `.claude/skills/` and `.gemini/skills/` (folders matching job names or `jobname.*` patterns)
+- The `deepwork` skill folder in `.claude/skills/deepwork/` still exists after cleanup
 - `.deepwork/tmp/` contents are cleaned appropriately
 - `.deepwork/rules/` folder is backed up and removed (DeepWork Rules fully deprecated)
 - `.deepwork/tmp/rules/` folder is removed
@@ -225,8 +257,11 @@ Create a `repair_summary.md` file documenting all changes made during this workf
 
 ## Errata Cleanup
 
-- Backed up `.claude/skills/` to `.claude/skills.backup/` (174 files)
-- Deleted `.claude/skills/` folder
+- Removed legacy skill folders for 3 jobs:
+  - `competitive_research/` and 4 step folders from `.claude/skills/`
+  - `deepwork_jobs/` and 5 step folders from `.claude/skills/`
+  - `monthly_reporting/` and 2 step folders from `.claude/skills/`
+- Verified `deepwork` skill folder still present in `.claude/skills/`
 - Cleaned `.deepwork/tmp/rules/queue/` (12 old JSON files)
 - Kept `.deepwork/rules/` (contains active example rules)
 - Updated `.deepwork/config.yml` version to "1.0"
diff --git a/.deepwork/jobs/deepwork_jobs/steps/fix_jobs.md b/.deepwork/jobs/deepwork_jobs/steps/fix_jobs.md
index cd6f835b..52e90615 100644
--- a/.deepwork/jobs/deepwork_jobs/steps/fix_jobs.md
+++ b/.deepwork/jobs/deepwork_jobs/steps/fix_jobs.md
@@ -94,22 +94,44 @@ Run the following to see warnings:
 deepwork sync 2>&1 | grep -i "warning"
 ```
 
-**For each orphaned step, ask the user which action to take:**
+**How to handle orphaned steps depends on whether the job has ANY workflows defined:**
 
-1. **Add to a workflow** - Create a new single-step workflow for it:
-   ```yaml
-   workflows:
-     - name: standalone_step_name
-       summary: "Runs the step_name step"
-       steps:
-         - step_name
-   ```
+#### Case A: Job has NO workflows defined
 
-2. **Remove the step entirely** - Delete the step from `steps:` array and its instruction file
+If the job has no `workflows:` section at all (or it's empty), create a **single workflow with the same name as the job** containing all steps in their defined order:
 
-3. **Keep as-is (deprecated)** - The step will remain inaccessible but preserved in the job definition
+```yaml
+# For a job named "my_job" with steps: step_a, step_b, step_c
+workflows:
+  - name: my_job  # Same name as the job
+    summary: "Runs the complete my_job workflow"
+    steps:
+      - step_a
+      - step_b
+      - step_c
+```
+
+This preserves the original intent of the job as a sequential workflow.
+
+#### Case B: Job has SOME workflows defined
+
+If the job already has one or more workflows defined, but some steps are not included in any of them, create a **separate single-step workflow for each orphaned step** with the same name as the step:
+
+```yaml
+# Existing workflows stay as-is, add new ones for orphans
+workflows:
+  - name: existing_workflow
+    summary: "..."
+    steps: [...]
+
+  # Add for each orphaned step:
+  - name: orphaned_step_name  # Same name as the step
+    summary: "Runs the orphaned_step_name step"
+    steps:
+      - orphaned_step_name
+```
 
-**Do not automatically decide** - Always confirm with the user which option they prefer for each orphaned step.
+This ensures all steps remain accessible via the MCP interface while preserving the existing workflow structure.
 
 ### Step 6: Validate Against Schema
 
@@ -150,7 +172,8 @@ Verify no errors or warnings appear.
 - All `exposed: true` fields are removed or noted
 - All `stop_hooks` are migrated to `hooks.after_agent` format
 - References to removed steps (like `review_job_spec`) are updated
-- Orphaned steps are either added to workflows or removed
+- Jobs with no workflows get a single workflow (same name as job) containing all steps
+- Jobs with existing workflows get individual workflows for each orphaned step (same name as step)
 - All job.yml files pass schema validation
 - `deepwork sync` runs without errors
 - When all criteria are met, include `<promise>Quality Criteria Met</promise>` in your response
@@ -173,7 +196,9 @@ Error: Workflow 'new_job' references non-existent step 'review_job_spec'
 ```
 Warning: Job 'my_job' has steps not included in any workflow: standalone_step
 ```
-**Fix:** Either add the step to a workflow or remove it from the job.
+**Fix:**
+- If the job has NO workflows: Create one workflow named `my_job` with all steps in order
+- If the job has SOME workflows: Add a `standalone_step` workflow containing just that step
 
 ## Jobs to Check
 
diff --git a/.deepwork/jobs/update/job.yml b/.deepwork/jobs/update/job.yml
index 92c13433..f437c821 100644
--- a/.deepwork/jobs/update/job.yml
+++ b/.deepwork/jobs/update/job.yml
@@ -1,6 +1,6 @@
 # yaml-language-server: $schema=.deepwork/schemas/job.schema.json
 name: update
-version: "1.3.0"
+version: "1.4.0"
 summary: "Updates DeepWork standard jobs in src/ and syncs to installed locations. Use when modifying deepwork_jobs."
 description: |
   A workflow for maintaining standard jobs bundled with DeepWork. Standard jobs
@@ -17,7 +17,15 @@ description: |
   Use this job whenever you need to modify job.yml files, step instructions, or hooks
   for any standard job in the DeepWork repository.
 
+workflows:
+  - name: update
+    summary: "Update standard job source files and sync to installed locations"
+    steps:
+      - job
+
 changelog:
+  - version: "1.4.0"
+    changes: "Added workflow for MCP compatibility; migrated stop_hooks to quality_criteria"
   - version: "1.0.0"
     changes: "Initial job creation"
   - version: "1.1.0"
@@ -38,11 +46,8 @@ steps:
     outputs:
       - files_synced  # implicit state: source files synced to installed locations
     dependencies: []
-    stop_hooks:
-      - prompt: |
-          Verify the update process completed successfully:
-          1. Changes were made in src/deepwork/standard_jobs/[job_name]/ (NOT in .deepwork/jobs/)
-          2. `deepwork install --platform claude` was run
-          3. Files in .deepwork/jobs/ match the source files
-          4. Command files in .claude/commands/ were regenerated
-          If ALL criteria are met, include `<promise>✓ Quality Criteria Met</promise>`.
+    quality_criteria:
+      - "**Source Location**: Were changes made in `src/deepwork/standard_jobs/[job_name]/` (NOT in `.deepwork/jobs/`)?"
+      - "**Install Complete**: Was `deepwork install --platform claude` run successfully?"
+      - "**Files Synced**: Do files in `.deepwork/jobs/` match the source files?"
+      - "**Commands Regenerated**: Were command files in `.claude/commands/` regenerated?"
diff --git a/src/deepwork/cli/install.py b/src/deepwork/cli/install.py
index c7f90732..74209a52 100644
--- a/src/deepwork/cli/install.py
+++ b/src/deepwork/cli/install.py
@@ -363,18 +363,28 @@ def _install_deepwork(platform_name: str | None, project_path: Path) -> None:
     from deepwork.cli.sync import sync_skills
 
     try:
-        sync_skills(project_path)
+        sync_result = sync_skills(project_path)
     except Exception as e:
         raise InstallError(f"Failed to sync skills: {e}") from e
 
-    # Success message
+    # Success or warning message
     console.print()
     platform_names = ", ".join(a.display_name for a in detected_adapters)
-    console.print(
-        f"[bold green]✓ DeepWork installed successfully for {platform_names}![/bold green]"
-    )
-    console.print()
-    console.print("[bold]Next steps:[/bold]")
-    console.print("  1. Start your agent CLI (ex. [cyan]claude[/cyan] or [cyan]gemini[/cyan])")
-    console.print("  2. Define your first job using the command [cyan]/deepwork_jobs[/cyan]")
+
+    if sync_result.has_warnings:
+        console.print(
+            "[bold yellow]⚠ You should repair your DeepWork install[/bold yellow]"
+        )
+        console.print()
+        console.print("[bold]To fix issues:[/bold]")
+        console.print("  1. Start your agent CLI (ex. [cyan]claude[/cyan] or [cyan]gemini[/cyan])")
+        console.print("  2. Run [cyan]/deepwork repair[/cyan]")
+    else:
+        console.print(
+            f"[bold green]✓ DeepWork installed successfully for {platform_names}![/bold green]"
+        )
+        console.print()
+        console.print("[bold]Next steps:[/bold]")
+        console.print("  1. Start your agent CLI (ex. [cyan]claude[/cyan] or [cyan]gemini[/cyan])")
+        console.print("  2. Define your first job using the command [cyan]/deepwork_jobs[/cyan]")
     console.print()
diff --git a/src/deepwork/cli/sync.py b/src/deepwork/cli/sync.py
index dbfce52b..9dff320f 100644
--- a/src/deepwork/cli/sync.py
+++ b/src/deepwork/cli/sync.py
@@ -1,5 +1,6 @@
 """Sync command for DeepWork CLI."""
 
+from dataclasses import dataclass, field
 from pathlib import Path
 
 import click
@@ -22,6 +23,21 @@ class SyncError(Exception):
     pass
 
 
+@dataclass
+class SyncResult:
+    """Result of a sync operation."""
+
+    platforms_synced: int = 0
+    skills_generated: int = 0
+    hooks_synced: int = 0
+    warnings: list[str] = field(default_factory=list)
+
+    @property
+    def has_warnings(self) -> bool:
+        """Return True if there were any warnings during sync."""
+        return len(self.warnings) > 0
+
+
 @click.command()
 @click.option(
     "--path",
@@ -46,13 +62,16 @@ def sync(path: Path) -> None:
         raise
 
 
-def sync_skills(project_path: Path) -> None:
+def sync_skills(project_path: Path) -> SyncResult:
     """
     Sync skills to all configured platforms.
 
     Args:
         project_path: Path to project directory
 
+    Returns:
+        SyncResult with statistics and any warnings
+
     Raises:
         SyncError: If sync fails
     """
@@ -80,6 +99,43 @@ def sync_skills(project_path: Path) -> None:
 
     console.print("[bold cyan]Syncing DeepWork Skills[/bold cyan]\n")
 
+    # Generate /deepwork skill FIRST for all platforms (before parsing jobs)
+    # This ensures the skill is available even if some jobs fail to parse
+    generator = SkillGenerator()
+    result = SyncResult()
+    platform_adapters: list[AgentAdapter] = []
+    all_skill_paths_by_platform: dict[str, list[Path]] = {}
+
+    console.print("[yellow]→[/yellow] Generating /deepwork skill...")
+    for platform_name in platforms:
+        try:
+            adapter_cls = AgentAdapter.get(platform_name)
+        except Exception:
+            warning = f"Unknown platform '{platform_name}', skipping"
+            console.print(f"  [yellow]⚠[/yellow] {warning}")
+            result.warnings.append(warning)
+            continue
+
+        adapter = adapter_cls(project_path)
+        platform_adapters.append(adapter)
+
+        platform_dir = project_path / adapter.config_dir
+        skills_dir = platform_dir / adapter.skills_dir
+        ensure_dir(skills_dir)
+
+        all_skill_paths: list[Path] = []
+        try:
+            deepwork_skill_path = generator.generate_deepwork_skill(adapter, platform_dir)
+            all_skill_paths.append(deepwork_skill_path)
+            result.skills_generated += 1
+            console.print(f"  [green]✓[/green] {adapter.display_name}: deepwork (MCP entry point)")
+        except Exception as e:
+            warning = f"{adapter.display_name}: Failed to generate /deepwork skill: {e}"
+            console.print(f"  [red]✗[/red] {warning}")
+            result.warnings.append(warning)
+
+        all_skill_paths_by_platform[platform_name] = all_skill_paths
+
     # Discover jobs
     jobs_dir = deepwork_dir / "jobs"
     if not jobs_dir.exists():
@@ -87,7 +143,7 @@ def sync_skills(project_path: Path) -> None:
     else:
         job_dirs = [d for d in jobs_dir.iterdir() if d.is_dir() and (d / "job.yml").exists()]
 
-    console.print(f"[yellow]→[/yellow] Found {len(job_dirs)} job(s) to sync")
+    console.print(f"\n[yellow]→[/yellow] Found {len(job_dirs)} job(s) to sync")
 
     # Parse all jobs
     jobs = []
@@ -98,52 +154,27 @@ def sync_skills(project_path: Path) -> None:
             jobs.append(job_def)
             console.print(f"  [green]✓[/green] Loaded {job_def.name} v{job_def.version}")
         except Exception as e:
-            console.print(f"  [red]✗[/red] Failed to load {job_dir.name}: {e}")
+            warning = f"Failed to load {job_dir.name}: {e}"
+            console.print(f"  [red]✗[/red] {warning}")
             failed_jobs.append((job_dir.name, str(e)))
+            result.warnings.append(warning)
 
-    # Fail early if any jobs failed to parse
+    # Warn about failed jobs but continue (skill already installed)
     if failed_jobs:
         console.print()
-        console.print("[bold red]Sync aborted due to job parsing errors:[/bold red]")
+        console.print("[bold yellow]Warning: Some jobs failed to parse:[/bold yellow]")
         for job_name, error in failed_jobs:
             console.print(f"  • {job_name}: {error}")
-        raise SyncError(f"Failed to parse {len(failed_jobs)} job(s)")
+        console.print("[dim]The /deepwork skill is installed. Fix the job errors and run 'deepwork sync' again.[/dim]")
 
-    # Collect hooks from all jobs
+    # Collect hooks from jobs (hooks collection is independent of job.yml parsing)
     job_hooks_list = collect_job_hooks(jobs_dir)
     if job_hooks_list:
-        console.print(f"[yellow]→[/yellow] Found {len(job_hooks_list)} job(s) with hooks")
-
-    # Sync each platform
-    generator = SkillGenerator()
-    stats = {"platforms": 0, "skills": 0, "hooks": 0}
-
-    for platform_name in platforms:
-        try:
-            adapter_cls = AgentAdapter.get(platform_name)
-        except Exception:
-            console.print(f"[yellow]⚠[/yellow] Unknown platform '{platform_name}', skipping")
-            continue
+        console.print(f"\n[yellow]→[/yellow] Found {len(job_hooks_list)} job(s) with hooks")
 
-        adapter = adapter_cls(project_path)
-        console.print(f"\n[yellow]→[/yellow] Syncing to {adapter.display_name}...")
-
-        platform_dir = project_path / adapter.config_dir
-        skills_dir = platform_dir / adapter.skills_dir
-
-        # Create skills directory
-        ensure_dir(skills_dir)
-
-        # Generate the global /deepwork skill (MCP entry point)
-        console.print("  [dim]•[/dim] Generating /deepwork skill...")
-        all_skill_paths: list[Path] = []
-        try:
-            deepwork_skill_path = generator.generate_deepwork_skill(adapter, platform_dir)
-            all_skill_paths.append(deepwork_skill_path)
-            stats["skills"] += 1
-            console.print("    [green]✓[/green] deepwork (MCP entry point)")
-        except Exception as e:
-            console.print(f"    [red]✗[/red] Failed to generate /deepwork skill: {e}")
+    # Sync hooks and permissions for each platform
+    for adapter in platform_adapters:
+        console.print(f"\n[yellow]→[/yellow] Syncing hooks and permissions to {adapter.display_name}...")
 
         # NOTE: Job skills (meta-skills and step skills) are no longer generated.
         # The MCP server now handles workflow orchestration directly.
@@ -154,11 +185,13 @@ def sync_skills(project_path: Path) -> None:
             console.print("  [dim]•[/dim] Syncing hooks...")
             try:
                 hooks_count = sync_hooks_to_platform(project_path, adapter, job_hooks_list)
-                stats["hooks"] += hooks_count
+                result.hooks_synced += hooks_count
                 if hooks_count > 0:
                     console.print(f"    [green]✓[/green] Synced {hooks_count} hook(s)")
             except Exception as e:
-                console.print(f"    [red]✗[/red] Failed to sync hooks: {e}")
+                warning = f"Failed to sync hooks: {e}"
+                console.print(f"    [red]✗[/red] {warning}")
+                result.warnings.append(warning)
 
         # Sync required permissions to platform settings
         console.print("  [dim]•[/dim] Syncing permissions...")
@@ -169,9 +202,12 @@ def sync_skills(project_path: Path) -> None:
             else:
                 console.print("    [dim]•[/dim] Base permissions already configured")
         except Exception as e:
-            console.print(f"    [red]✗[/red] Failed to sync permissions: {e}")
+            warning = f"Failed to sync permissions: {e}"
+            console.print(f"    [red]✗[/red] {warning}")
+            result.warnings.append(warning)
 
         # Add skill permissions for generated skills (if adapter supports it)
+        all_skill_paths = all_skill_paths_by_platform.get(adapter.name, [])
         if all_skill_paths and hasattr(adapter, "add_skill_permissions"):
             try:
                 skill_perms_count = adapter.add_skill_permissions(project_path, all_skill_paths)
@@ -180,9 +216,11 @@ def sync_skills(project_path: Path) -> None:
                         f"    [green]✓[/green] Added {skill_perms_count} skill permission(s)"
                     )
             except Exception as e:
-                console.print(f"    [red]✗[/red] Failed to sync skill permissions: {e}")
+                warning = f"Failed to sync skill permissions: {e}"
+                console.print(f"    [red]✗[/red] {warning}")
+                result.warnings.append(warning)
 
-        stats["platforms"] += 1
+        result.platforms_synced += 1
 
     # Summary
     console.print()
@@ -193,10 +231,12 @@ def sync_skills(project_path: Path) -> None:
     table.add_column("Metric", style="cyan")
     table.add_column("Count", style="green")
 
-    table.add_row("Platforms synced", str(stats["platforms"]))
-    table.add_row("Total skills", str(stats["skills"]))
-    if stats["hooks"] > 0:
-        table.add_row("Hooks synced", str(stats["hooks"]))
+    table.add_row("Platforms synced", str(result.platforms_synced))
+    table.add_row("Total skills", str(result.skills_generated))
+    if result.hooks_synced > 0:
+        table.add_row("Hooks synced", str(result.hooks_synced))
 
     console.print(table)
     console.print()
+
+    return result
diff --git a/src/deepwork/mcp/quality_gate.py b/src/deepwork/mcp/quality_gate.py
index 15eae776..4c973fc6 100644
--- a/src/deepwork/mcp/quality_gate.py
+++ b/src/deepwork/mcp/quality_gate.py
@@ -64,7 +64,7 @@ def __init__(
         """Initialize quality gate.
 
         Args:
-            command: Base command to invoke review agent (system prompt added via -s flag)
+            command: Base command to invoke review agent (system prompt added via --system-prompt flag)
             timeout: Timeout in seconds for review agent
         """
         self.command = command
@@ -163,18 +163,32 @@ def _parse_response(
         """
         # Try to extract JSON from the response
         try:
+            # First, try to parse as JSON to check if it's a wrapper object
+            # from --output-format json (contains type, result, etc.)
+            json_text = response_text.strip()
+            try:
+                wrapper = json.loads(json_text)
+                # Check if this is a Claude CLI wrapper object
+                if isinstance(wrapper, dict) and "type" in wrapper and "result" in wrapper:
+                    # Extract the actual result content
+                    json_text = wrapper.get("result", "")
+                    if not json_text:
+                        raise QualityGateError(
+                            "Review agent returned empty result in wrapper object"
+                        )
+            except json.JSONDecodeError:
+                # Not valid JSON at the top level, continue with normal parsing
+                pass
+
             # Look for JSON in code blocks
-            if "```json" in response_text:
-                start = response_text.index("```json") + 7
-                end = response_text.index("```", start)
-                json_text = response_text[start:end].strip()
-            elif "```" in response_text:
-                start = response_text.index("```") + 3
-                end = response_text.index("```", start)
-                json_text = response_text[start:end].strip()
-            else:
-                # Assume entire response is JSON
-                json_text = response_text.strip()
+            if "```json" in json_text:
+                start = json_text.index("```json") + 7
+                end = json_text.index("```", start)
+                json_text = json_text[start:end].strip()
+            elif "```" in json_text:
+                start = json_text.index("```") + 3
+                end = json_text.index("```", start)
+                json_text = json_text[start:end].strip()
 
             data = json.loads(json_text)
 
@@ -242,11 +256,18 @@ async def evaluate(
         instructions = self._build_instructions(quality_criteria)
         payload = await self._build_payload(outputs, project_root)
 
-        # Build command with system prompt flag
+        # Build command with system prompt flag and JSON schema
         # Parse the base command properly to handle quoted arguments
         base_cmd = shlex.split(self.command)
-        # Add system prompt via -s flag
-        full_cmd = base_cmd + ["-s", instructions]
+        schema_json = json.dumps(QUALITY_GATE_RESPONSE_SCHEMA)
+        full_cmd = base_cmd + [
+            # Add system prompt via --system-prompt flag
+            "--system-prompt",
+            instructions,
+            # Add JSON schema to enforce structured output
+            "--json-schema",
+            schema_json,
+        ]
 
         try:
             # Run review agent with system prompt and payload using async subprocess
diff --git a/src/deepwork/standard_jobs/deepwork_jobs/job.yml b/src/deepwork/standard_jobs/deepwork_jobs/job.yml
index 5acfd3d0..4a8b26e5 100644
--- a/src/deepwork/standard_jobs/deepwork_jobs/job.yml
+++ b/src/deepwork/standard_jobs/deepwork_jobs/job.yml
@@ -1,6 +1,6 @@
 # yaml-language-server: $schema=.deepwork/schemas/job.schema.json
 name: deepwork_jobs
-version: "1.2.0"
+version: "1.2.1"
 summary: "Creates and manages multi-step AI workflows. Use when defining, implementing, testing, or improving DeepWork jobs."
 description: |
   Core commands for managing DeepWork jobs. These commands help you define new multi-step
@@ -32,7 +32,14 @@ workflows:
       - fix_jobs
       - errata
 
+  - name: learn
+    summary: "Analyze conversation history to improve job instructions and capture learnings"
+    steps:
+      - learn
+
 changelog:
+  - version: "1.2.1"
+    changes: "Removed deprecated exposed field from learn step; added learn workflow to make step accessible via MCP"
   - version: "1.2.0"
     changes: "Added repair workflow with fix_settings, fix_jobs, and errata steps for migrating old DeepWork configurations to current format"
   - version: "1.1.0"
@@ -144,7 +151,6 @@ steps:
     name: "Learn from Job Execution"
     description: "Analyzes conversation history to improve job instructions and capture learnings. Use after running a job to refine it."
     instructions_file: steps/learn.md
-    exposed: true
     inputs:
       - name: job_name
         description: "Name of the job that was run (optional - will auto-detect from conversation)"
@@ -198,7 +204,7 @@ steps:
       - "**Exposed Field Addressed**: Are `exposed: true` fields removed or noted as deprecated?"
       - "**Stop Hooks Migrated**: Are `stop_hooks` migrated to `hooks.after_agent` format?"
       - "**Removed Steps Cleaned**: Are references to removed steps (like `review_job_spec`) updated?"
-      - "**Orphaned Steps Fixed**: Are steps not in any workflow either added to workflows or removed?"
+      - "**Orphaned Steps Fixed**: For jobs with no workflows, is there a single workflow (named after the job) containing all steps? For jobs with existing workflows, does each orphan get its own workflow (named after the step)?"
       - "**Valid YAML**: Do all job.yml files pass schema validation?"
       - "**Sync Complete**: Has `deepwork sync` been run to regenerate commands?"
 
@@ -215,7 +221,8 @@ steps:
       - fix_settings
       - fix_jobs
     quality_criteria:
-      - "**Old Skills Folder Handled**: Is `.claude/skills/` folder removed or backed up?"
+      - "**Legacy Job Skills Removed**: Are legacy skill folders for each job removed from `.claude/skills/` and `.gemini/skills/`?"
+      - "**Deepwork Skill Preserved**: Does the `deepwork` skill folder still exist in `.claude/skills/deepwork/`?"
       - "**Temp Files Cleaned**: Are `.deepwork/tmp/` contents cleaned appropriately?"
       - "**Rules Folder Removed**: Is `.deepwork/rules/` folder backed up and removed (fully deprecated)?"
       - "**Rules Job Removed**: Is `.deepwork/jobs/deepwork_rules/` removed if present?"
diff --git a/src/deepwork/standard_jobs/deepwork_jobs/steps/errata.md b/src/deepwork/standard_jobs/deepwork_jobs/steps/errata.md
index 30ee7e8a..d4be7be3 100644
--- a/src/deepwork/standard_jobs/deepwork_jobs/steps/errata.md
+++ b/src/deepwork/standard_jobs/deepwork_jobs/steps/errata.md
@@ -8,31 +8,61 @@ Remove obsolete files and folders from prior DeepWork versions. This final step
 
 Identify and clean up deprecated files and folders, then create a comprehensive summary document.
 
-### Step 1: Handle Old Skills Folder
+### Step 1: Remove Legacy Job Skill Folders
 
-Check if `.claude/skills/` exists. This folder was used by the old skill-based system and is no longer needed.
+Old DeepWork versions created individual skill folders for each job and step. These need to be removed while preserving the main `deepwork` skill folder.
 
-```bash
-ls -la .claude/skills/ 2>/dev/null || echo "No skills folder (good!)"
-```
+**Process:**
+
+1. **List all jobs** in `.deepwork/jobs/`:
+   ```bash
+   ls .deepwork/jobs/
+   ```
+
+2. **For each job**, kick off a sub-agent to find and remove legacy skill folders. The sub-agent should:
+   - Search in both `.claude/skills/` and `.gemini/skills/`
+   - Find folders matching:
+     - `{job_name}/` - folder named exactly like the job
+     - `{job_name}.*/` - folders starting with the job name followed by a period (e.g., `my_job.step1/`, `my_job.step2/`)
+   - Remove each matching folder
+   - Report what was removed
 
-**If it exists:**
-1. Count the contents: `ls .claude/skills/ | wc -l`
-2. Ask the user whether to:
-   - **Delete** the folder entirely (recommended if migrated to MCP)
-   - **Back up** to `.claude/skills.backup/` before deleting
-   - **Keep** if they have custom skills not yet migrated
+   **Example commands for a job named `competitive_research`:**
+   ```bash
+   # Find and remove from .claude/skills/
+   rm -rf .claude/skills/competitive_research/ 2>/dev/null
+   rm -rf .claude/skills/competitive_research.*/ 2>/dev/null
 
-**Old skill structure to recognize:**
+   # Find and remove from .gemini/skills/
+   rm -rf .gemini/skills/competitive_research/ 2>/dev/null
+   rm -rf .gemini/skills/competitive_research.*/ 2>/dev/null
+   ```
+
+3. **Run sub-agents in parallel** - one for each job to speed up the process.
+
+4. **Verify the `deepwork` skill folder remains:**
+   ```bash
+   ls -d .claude/skills/deepwork/ 2>/dev/null || echo "ERROR: deepwork skill missing!"
+   ls -d .gemini/skills/deepwork/ 2>/dev/null || echo "WARNING: gemini deepwork skill missing (may not have been installed)"
+   ```
+
+   **CRITICAL:** The `deepwork` skill folder in `.claude/skills/deepwork/` MUST still exist after cleanup. If it is missing, something went wrong - do NOT proceed and investigate what happened.
+
+**What this removes:**
 ```
 .claude/skills/
-├── job_name/
-│   └── SKILL.md
-├── job_name.step_name/
-│   └── SKILL.md
-└── ...
+├── competitive_research/     <- REMOVE (legacy job folder)
+├── competitive_research.discover/  <- REMOVE (legacy step folder)
+├── competitive_research.analyze/   <- REMOVE (legacy step folder)
+├── deepwork/                 <- KEEP (current MCP entry point)
+└── some_other_job/           <- REMOVE (legacy job folder)
 ```
 
+**Do NOT remove:**
+- `.claude/skills/deepwork/` - This is the current MCP-based skill entry point
+- `.gemini/skills/deepwork/` - Same for Gemini
+- Any skill folders that don't match job names in `.deepwork/jobs/`
+
 ### Step 2: Clean Temp Files
 
 Check `.deepwork/tmp/` for accumulated temporary files:
@@ -166,7 +196,8 @@ Create a `repair_summary.md` file documenting all changes made during this workf
 
 ## Errata Cleanup (errata step)
 
-- [ ] Handled `.claude/skills/` folder: [deleted/backed up/kept]
+- [ ] Removed legacy job skill folders from `.claude/skills/` and `.gemini/skills/`
+- [ ] Verified `deepwork` skill folder still exists
 - [ ] Cleaned `.deepwork/tmp/`: removed X files
 - [ ] Reviewed `.deepwork/rules/`: [action taken]
 - [ ] Updated `.deepwork/config.yml` version format
@@ -187,7 +218,8 @@ Create a `repair_summary.md` file documenting all changes made during this workf
 
 ## Quality Criteria
 
-- `.claude/skills/` folder is handled (removed, backed up, or documented why kept)
+- Legacy job skill folders are removed from `.claude/skills/` and `.gemini/skills/` (folders matching job names or `jobname.*` patterns)
+- The `deepwork` skill folder in `.claude/skills/deepwork/` still exists after cleanup
 - `.deepwork/tmp/` contents are cleaned appropriately
 - `.deepwork/rules/` folder is backed up and removed (DeepWork Rules fully deprecated)
 - `.deepwork/tmp/rules/` folder is removed
@@ -225,8 +257,11 @@ Create a `repair_summary.md` file documenting all changes made during this workf
 
 ## Errata Cleanup
 
-- Backed up `.claude/skills/` to `.claude/skills.backup/` (174 files)
-- Deleted `.claude/skills/` folder
+- Removed legacy skill folders for 3 jobs:
+  - `competitive_research/` and 4 step folders from `.claude/skills/`
+  - `deepwork_jobs/` and 5 step folders from `.claude/skills/`
+  - `monthly_reporting/` and 2 step folders from `.claude/skills/`
+- Verified `deepwork` skill folder still present in `.claude/skills/`
 - Cleaned `.deepwork/tmp/rules/queue/` (12 old JSON files)
 - Kept `.deepwork/rules/` (contains active example rules)
 - Updated `.deepwork/config.yml` version to "1.0"
diff --git a/src/deepwork/standard_jobs/deepwork_jobs/steps/fix_jobs.md b/src/deepwork/standard_jobs/deepwork_jobs/steps/fix_jobs.md
index cd6f835b..52e90615 100644
--- a/src/deepwork/standard_jobs/deepwork_jobs/steps/fix_jobs.md
+++ b/src/deepwork/standard_jobs/deepwork_jobs/steps/fix_jobs.md
@@ -94,22 +94,44 @@ Run the following to see warnings:
 deepwork sync 2>&1 | grep -i "warning"
 ```
 
-**For each orphaned step, ask the user which action to take:**
+**How to handle orphaned steps depends on whether the job has ANY workflows defined:**
 
-1. **Add to a workflow** - Create a new single-step workflow for it:
-   ```yaml
-   workflows:
-     - name: standalone_step_name
-       summary: "Runs the step_name step"
-       steps:
-         - step_name
-   ```
+#### Case A: Job has NO workflows defined
 
-2. **Remove the step entirely** - Delete the step from `steps:` array and its instruction file
+If the job has no `workflows:` section at all (or it's empty), create a **single workflow with the same name as the job** containing all steps in their defined order:
 
-3. **Keep as-is (deprecated)** - The step will remain inaccessible but preserved in the job definition
+```yaml
+# For a job named "my_job" with steps: step_a, step_b, step_c
+workflows:
+  - name: my_job  # Same name as the job
+    summary: "Runs the complete my_job workflow"
+    steps:
+      - step_a
+      - step_b
+      - step_c
+```
+
+This preserves the original intent of the job as a sequential workflow.
+
+#### Case B: Job has SOME workflows defined
+
+If the job already has one or more workflows defined, but some steps are not included in any of them, create a **separate single-step workflow for each orphaned step** with the same name as the step:
+
+```yaml
+# Existing workflows stay as-is, add new ones for orphans
+workflows:
+  - name: existing_workflow
+    summary: "..."
+    steps: [...]
+
+  # Add for each orphaned step:
+  - name: orphaned_step_name  # Same name as the step
+    summary: "Runs the orphaned_step_name step"
+    steps:
+      - orphaned_step_name
+```
 
-**Do not automatically decide** - Always confirm with the user which option they prefer for each orphaned step.
+This ensures all steps remain accessible via the MCP interface while preserving the existing workflow structure.
 
 ### Step 6: Validate Against Schema
 
@@ -150,7 +172,8 @@ Verify no errors or warnings appear.
 - All `exposed: true` fields are removed or noted
 - All `stop_hooks` are migrated to `hooks.after_agent` format
 - References to removed steps (like `review_job_spec`) are updated
-- Orphaned steps are either added to workflows or removed
+- Jobs with no workflows get a single workflow (same name as job) containing all steps
+- Jobs with existing workflows get individual workflows for each orphaned step (same name as step)
 - All job.yml files pass schema validation
 - `deepwork sync` runs without errors
 - When all criteria are met, include `<promise>Quality Criteria Met</promise>` in your response
@@ -173,7 +196,9 @@ Error: Workflow 'new_job' references non-existent step 'review_job_spec'
 ```
 Warning: Job 'my_job' has steps not included in any workflow: standalone_step
 ```
-**Fix:** Either add the step to a workflow or remove it from the job.
+**Fix:**
+- If the job has NO workflows: Create one workflow named `my_job` with all steps in order
+- If the job has SOME workflows: Add a `standalone_step` workflow containing just that step
 
 ## Jobs to Check
 
diff --git a/src/deepwork/templates/claude/settings.json b/src/deepwork/templates/claude/settings.json
index 97d5d1be..a85202f8 100644
--- a/src/deepwork/templates/claude/settings.json
+++ b/src/deepwork/templates/claude/settings.json
@@ -6,7 +6,11 @@
       "Write(./.deepwork/**)",
       "Bash(deepwork:*)",
       "Bash(./.deepwork/jobs/deepwork_jobs/make_new_job.sh:*)",
-      "WebSearch"
+      "WebSearch",
+      "mcp__deepwork__get_workflows",
+      "mcp__deepwork__start_workflow",
+      "mcp__deepwork__finished_step",
+      "mcp__deepwork__abort_workflow"
     ]
   }
 }
diff --git a/tests/integration/test_install_flow.py b/tests/integration/test_install_flow.py
index 17af3818..169e90ed 100644
--- a/tests/integration/test_install_flow.py
+++ b/tests/integration/test_install_flow.py
@@ -168,6 +168,37 @@ def test_install_is_idempotent(self, mock_claude_project: Path) -> None:
         # MCP entry point skill
         assert (claude_dir / "deepwork" / "SKILL.md").exists()
 
+    def test_install_shows_repair_message_when_job_fails_to_parse(
+        self, mock_claude_project: Path
+    ) -> None:
+        """Test that install shows repair message when there are warnings."""
+        runner = CliRunner()
+
+        # First do a normal install
+        result1 = runner.invoke(
+            cli,
+            ["install", "--platform", "claude", "--path", str(mock_claude_project)],
+            catch_exceptions=False,
+        )
+        assert result1.exit_code == 0
+        assert "DeepWork installed successfully" in result1.output
+
+        # Create a malformed job definition
+        jobs_dir = mock_claude_project / ".deepwork" / "jobs" / "broken_job"
+        jobs_dir.mkdir(parents=True, exist_ok=True)
+        (jobs_dir / "job.yml").write_text("invalid: yaml: content: [")
+
+        # Reinstall - should show repair message due to parsing warning
+        result2 = runner.invoke(
+            cli,
+            ["install", "--platform", "claude", "--path", str(mock_claude_project)],
+            catch_exceptions=False,
+        )
+        assert result2.exit_code == 0
+        assert "You should repair your DeepWork install" in result2.output
+        assert "/deepwork repair" in result2.output
+        assert "DeepWork installed successfully" not in result2.output
+
 
 class TestCLIEntryPoint:
     """Tests for CLI entry point."""
diff --git a/tests/unit/mcp/test_quality_gate.py b/tests/unit/mcp/test_quality_gate.py
index 45fe6375..8b511df7 100644
--- a/tests/unit/mcp/test_quality_gate.py
+++ b/tests/unit/mcp/test_quality_gate.py
@@ -1,10 +1,20 @@
 """Tests for MCP quality gate."""
 
+import json
+from collections.abc import Callable, Generator
+from contextlib import contextmanager
 from pathlib import Path
+from typing import Any
+from unittest.mock import MagicMock, patch
 
 import pytest
 
-from deepwork.mcp.quality_gate import MockQualityGate, QualityGate, QualityGateError
+from deepwork.mcp.quality_gate import (
+    QUALITY_GATE_RESPONSE_SCHEMA,
+    MockQualityGate,
+    QualityGate,
+    QualityGateError,
+)
 
 
 @pytest.fixture
@@ -19,6 +29,91 @@ def quality_gate() -> QualityGate:
     return QualityGate(command="echo test", timeout=10)
 
 
+@pytest.fixture
+def output_file(project_root: Path) -> Path:
+    """Create a test output file with default content."""
+    output = project_root / "output.md"
+    output.write_text("Test content")
+    return output
+
+
+def create_mock_subprocess(
+    response: dict[str, Any] | None = None,
+    returncode: int = 0,
+) -> tuple[list[str], Callable[..., MagicMock]]:
+    """Create a mock subprocess executor that captures commands.
+
+    ############################################################################
+    # CRITICAL: DO NOT MODIFY THE RESPONSE FORMAT WITHOUT UNDERSTANDING THIS!
+    #
+    # This mock returns the quality gate response JSON DIRECTLY, without the
+    # Claude CLI wrapper object. This is INTENTIONAL and tests that the
+    # _parse_response method can handle BOTH:
+    #
+    # 1. Direct JSON (what this mock returns) - for backwards compatibility
+    # 2. Wrapper objects from `claude -p --output-format json` which look like:
+    #    {"type": "result", "result": "<actual JSON>", ...}
+    #
+    # The REAL Claude CLI with `--output-format json` returns a wrapper object.
+    # The quality_gate.py code handles this by checking for the wrapper format
+    # and extracting the "result" field before parsing.
+    #
+    # If you're seeing schema validation errors in production, it's because
+    # the code expects to unwrap the response first. See test_parse_response_wrapper_object
+    # for the wrapper format test.
+    #
+    # DO NOT "fix" this mock by adding a wrapper - that would break the test's
+    # purpose of verifying direct JSON handling still works.
+    ############################################################################
+
+    Args:
+        response: The JSON response to return. Defaults to a passing quality gate response.
+        returncode: The return code for the process.
+
+    Returns:
+        A tuple of (captured_cmd list, mock_create_subprocess_exec function).
+        The captured_cmd list will be populated with the command arguments when
+        the mock is called.
+    """
+    if response is None:
+        response = {"passed": True, "feedback": "OK", "criteria_results": []}
+
+    captured_cmd: list[str] = []
+
+    async def mock_create_subprocess_exec(*cmd: str, **kwargs: Any) -> MagicMock:  # noqa: ARG001
+        captured_cmd.extend(cmd)
+        mock_process = MagicMock()
+        mock_process.returncode = returncode
+
+        async def mock_communicate(input: bytes = b"") -> tuple[bytes, bytes]:  # noqa: ARG001
+            # Returns direct JSON without CLI wrapper - see docstring above
+            return json.dumps(response).encode(), b""
+
+        mock_process.communicate = mock_communicate
+        return mock_process
+
+    return captured_cmd, mock_create_subprocess_exec
+
+
+@contextmanager
+def patched_subprocess(
+    response: dict[str, Any] | None = None,
+    returncode: int = 0,
+) -> Generator[list[str], None, None]:
+    """Context manager that patches subprocess and yields captured command.
+
+    Args:
+        response: The JSON response to return. Defaults to a passing quality gate response.
+        returncode: The return code for the process.
+
+    Yields:
+        The list of captured command arguments.
+    """
+    captured_cmd, mock_subprocess = create_mock_subprocess(response, returncode)
+    with patch("asyncio.create_subprocess_exec", mock_subprocess):
+        yield captured_cmd
+
+
 class TestQualityGate:
     """Tests for QualityGate class."""
 
@@ -125,6 +220,41 @@ def test_parse_response_invalid_json(self, quality_gate: QualityGate) -> None:
         with pytest.raises(QualityGateError, match="Failed to parse"):
             quality_gate._parse_response(response)
 
+    def test_parse_response_wrapper_object(self, quality_gate: QualityGate) -> None:
+        """Test parsing response wrapped in Claude CLI --output-format json wrapper."""
+        # This is what claude -p --output-format json returns
+        wrapper_response = json.dumps({
+            "type": "result",
+            "subtype": "success",
+            "is_error": False,
+            "duration_ms": 1234,
+            "result": json.dumps({
+                "passed": True,
+                "feedback": "All criteria met",
+                "criteria_results": [
+                    {"criterion": "Test 1", "passed": True, "feedback": None}
+                ]
+            }),
+            "session_id": "test-session",
+        })
+
+        result = quality_gate._parse_response(wrapper_response)
+
+        assert result.passed is True
+        assert result.feedback == "All criteria met"
+        assert len(result.criteria_results) == 1
+
+    def test_parse_response_wrapper_empty_result(self, quality_gate: QualityGate) -> None:
+        """Test parsing wrapper object with empty result raises error."""
+        wrapper_response = json.dumps({
+            "type": "result",
+            "subtype": "success",
+            "result": "",
+        })
+
+        with pytest.raises(QualityGateError, match="empty result"):
+            quality_gate._parse_response(wrapper_response)
+
     async def test_evaluate_no_criteria(
         self, quality_gate: QualityGate, project_root: Path
     ) -> None:
@@ -139,18 +269,106 @@ async def test_evaluate_no_criteria(
         assert "auto-passing" in result.feedback.lower()
 
 
+class TestQualityGateCommandConstruction:
+    """Tests for command construction, specifically JSON schema inclusion."""
+
+    @staticmethod
+    def get_command_arg(captured_cmd: list[str], flag: str) -> str:
+        """Extract the argument value following a command flag.
+
+        Args:
+            captured_cmd: List of command arguments.
+            flag: The flag to find (e.g., "--json-schema").
+
+        Returns:
+            The argument value following the flag.
+
+        Raises:
+            AssertionError: If the flag is not found in the command.
+        """
+        assert flag in captured_cmd, f"Expected {flag} in command, got: {captured_cmd}"
+        flag_index = captured_cmd.index(flag)
+        return captured_cmd[flag_index + 1]
+
+    async def test_command_includes_json_schema(
+        self, output_file: Path, project_root: Path
+    ) -> None:
+        """Test that the command includes --json-schema with the correct schema."""
+        gate = QualityGate(command="claude -p --output-format json", timeout=10)
+
+        with patched_subprocess() as captured_cmd:
+            await gate.evaluate(
+                quality_criteria=["Test criterion"],
+                outputs=[output_file.name],
+                project_root=project_root,
+            )
+
+        schema_json = self.get_command_arg(captured_cmd, "--json-schema")
+        parsed_schema = json.loads(schema_json)
+        assert parsed_schema == QUALITY_GATE_RESPONSE_SCHEMA, (
+            f"Schema mismatch. Expected:\n{QUALITY_GATE_RESPONSE_SCHEMA}\n"
+            f"Got:\n{parsed_schema}"
+        )
+
+    async def test_command_includes_system_prompt(
+        self, output_file: Path, project_root: Path
+    ) -> None:
+        """Test that the command includes --system-prompt with quality criteria."""
+        gate = QualityGate(command="claude -p", timeout=10)
+
+        with patched_subprocess() as captured_cmd:
+            await gate.evaluate(
+                quality_criteria=["Output must exist", "Output must be valid"],
+                outputs=[output_file.name],
+                project_root=project_root,
+            )
+
+        system_prompt = self.get_command_arg(captured_cmd, "--system-prompt")
+        assert "Output must exist" in system_prompt
+        assert "Output must be valid" in system_prompt
+
+    async def test_schema_is_valid_json(self) -> None:
+        """Test that QUALITY_GATE_RESPONSE_SCHEMA is valid JSON."""
+        # This test ensures the schema can be serialized
+        schema_json = json.dumps(QUALITY_GATE_RESPONSE_SCHEMA)
+        assert schema_json  # Non-empty string
+
+        # And parsed back
+        parsed = json.loads(schema_json)
+        assert parsed == QUALITY_GATE_RESPONSE_SCHEMA
+
+
 class TestMockQualityGate:
     """Tests for MockQualityGate class."""
 
+    @staticmethod
+    async def evaluate_mock_gate(
+        gate: MockQualityGate,
+        project_root: Path,
+        criteria: list[str] | None = None,
+        outputs: list[str] | None = None,
+    ) -> Any:
+        """Helper to evaluate a mock gate with default parameters.
+
+        Args:
+            gate: The MockQualityGate instance to evaluate.
+            project_root: The project root path.
+            criteria: Quality criteria list. Defaults to ["Criterion 1"].
+            outputs: Output files list. Defaults to ["output.md"].
+
+        Returns:
+            The evaluation result.
+        """
+        return await gate.evaluate(
+            quality_criteria=criteria or ["Criterion 1"],
+            outputs=outputs or ["output.md"],
+            project_root=project_root,
+        )
+
     async def test_mock_passes_by_default(self, project_root: Path) -> None:
         """Test mock gate passes by default."""
         gate = MockQualityGate()
-
-        result = await gate.evaluate(
-            quality_criteria=["Criterion 1"],
-            outputs=["output.md"],
-            project_root=project_root,
-        )
+        result = await self.evaluate_mock_gate(gate, project_root)
 
         assert result.passed is True
         assert len(gate.evaluations) == 1
@@ -158,12 +376,7 @@ async def test_mock_passes_by_default(self, project_root: Path) -> None:
     async def test_mock_can_fail(self, project_root: Path) -> None:
         """Test mock gate can be configured to fail."""
         gate = MockQualityGate(should_pass=False, feedback="Mock failure")
-
-        result = await gate.evaluate(
-            quality_criteria=["Criterion 1"],
-            outputs=["output.md"],
-            project_root=project_root,
-        )
+        result = await self.evaluate_mock_gate(gate, project_root)
 
         assert result.passed is False
         assert result.feedback == "Mock failure"
@@ -172,15 +385,11 @@ async def test_mock_records_evaluations(self, project_root: Path) -> None:
         """Test mock gate records evaluations."""
         gate = MockQualityGate()
 
-        await gate.evaluate(
-            quality_criteria=["Criterion 1"],
-            outputs=["output1.md"],
-            project_root=project_root,
+        await self.evaluate_mock_gate(
+            gate, project_root, criteria=["Criterion 1"], outputs=["output1.md"]
         )
-        await gate.evaluate(
-            quality_criteria=["Criterion 2"],
-            outputs=["output2.md"],
-            project_root=project_root,
+        await self.evaluate_mock_gate(
+            gate, project_root, criteria=["Criterion 2"], outputs=["output2.md"]
         )
 
         assert len(gate.evaluations) == 2

From 897535cd2d5e4888d4a874b47c37f70c75260917 Mon Sep 17 00:00:00 2001
From: Noah Horton <noah@unsupervised.com>
Date: Thu, 5 Feb 2026 09:41:50 -0700
Subject: [PATCH 17/45] cleanups

---
 .claude/settings.json                         |  14 -
 .deepwork/config.yml                          |   4 -
 .deepwork/jobs/deepwork_jobs/steps/define.md  |   4 +-
 .deepwork/jobs/deepwork_jobs/steps/errata.md  | 103 +--
 .../jobs/deepwork_jobs/steps/implement.md     |   6 +-
 claude.md                                     |   1 +
 doc/architecture.md                           |  33 +-
 doc/mcp_interface.md                          |  12 +-
 doc/reference/calling_claude_in_print_mode.md | 104 +++
 src/deepwork/cli/install.py                   |   8 -
 src/deepwork/cli/serve.py                     |  38 +-
 src/deepwork/mcp/quality_gate.py              | 129 ++--
 src/deepwork/mcp/server.py                    |   7 +-
 .../deepwork_jobs/steps/define.md             |   4 +-
 .../deepwork_jobs/steps/errata.md             | 103 +--
 .../deepwork_jobs/steps/implement.md          |   6 +-
 tests/fixtures/mock_review_agent.py           |  62 +-
 .../test_quality_gate_integration.py          | 597 ++++--------------
 tests/unit/mcp/test_quality_gate.py           | 207 +++---
 19 files changed, 514 insertions(+), 928 deletions(-)
 create mode 100644 doc/reference/calling_claude_in_print_mode.md

diff --git a/.claude/settings.json b/.claude/settings.json
index 962bc968..c6158219 100644
--- a/.claude/settings.json
+++ b/.claude/settings.json
@@ -95,24 +95,10 @@
       "Bash(npm:*)",
       "Bash(npx:*)",
       "Edit(./**)",
-      "Read(./.deepwork/tmp/**)",
-      "Edit(./.deepwork/tmp/**)",
-      "Write(./.deepwork/tmp/**)",
-      "Skill(commit)",
-      "Skill(commit.review)",
-      "Skill(commit.test)",
-      "Skill(commit.lint)",
-      "Skill(commit.commit_and_push)",
-      "Skill(add_platform)",
-      "Skill(add_platform.research)",
-      "Skill(add_platform.add_capabilities)",
-      "Skill(add_platform.implement)",
-      "Skill(add_platform.verify)",
       "Read(./.deepwork/**)",
       "Edit(./.deepwork/**)",
       "Write(./.deepwork/**)",
       "Bash(deepwork:*)",
-      "Bash(.claude/hooks/commit_job_git_commit.sh:*)",
       "Bash(./.deepwork/jobs/deepwork_jobs/make_new_job.sh:*)",
       "WebSearch",
       "Skill(deepwork)",
diff --git a/.deepwork/config.yml b/.deepwork/config.yml
index 06ddbd81..9de79eea 100644
--- a/.deepwork/config.yml
+++ b/.deepwork/config.yml
@@ -2,7 +2,3 @@ version: 0.1.0
 platforms:
 - claude
 - gemini
-quality_gate:
-  agent_review_command: claude -p --output-format json
-  default_timeout: 120
-  default_max_attempts: 3
diff --git a/.deepwork/jobs/deepwork_jobs/steps/define.md b/.deepwork/jobs/deepwork_jobs/steps/define.md
index 31de7440..e441c9e2 100644
--- a/.deepwork/jobs/deepwork_jobs/steps/define.md
+++ b/.deepwork/jobs/deepwork_jobs/steps/define.md
@@ -414,7 +414,7 @@ Claude: Great! Creating the job.yml specification now...
 - .deepwork/jobs/competitive_research/job.yml
 
 **Next step:**
-Run `/deepwork_jobs.review_job_spec` to validate the specification against quality criteria.
+Run `/deepwork_jobs.implement` to generate step instruction files and sync commands.
 ```
 
 ## Important Guidelines
@@ -454,5 +454,5 @@ The complete YAML specification file (example shown in Step 5 above).
 After creating the file:
 1. Inform the user that the specification is complete
 2. Recommend that they review the job.yml file
-3. Tell them to run `/deepwork_jobs.review_job_spec` next
+3. Tell them to run `/deepwork_jobs.implement` next to generate step instructions
 
diff --git a/.deepwork/jobs/deepwork_jobs/steps/errata.md b/.deepwork/jobs/deepwork_jobs/steps/errata.md
index d4be7be3..22a5c167 100644
--- a/.deepwork/jobs/deepwork_jobs/steps/errata.md
+++ b/.deepwork/jobs/deepwork_jobs/steps/errata.md
@@ -2,11 +2,11 @@
 
 ## Objective
 
-Remove obsolete files and folders from prior DeepWork versions. This final step cleans up artifacts that are no longer used by the MCP-based system, creating a summary of all changes made during the repair workflow.
+Remove obsolete files and folders from prior DeepWork versions. This final step cleans up artifacts that are no longer used by the MCP-based system.
 
 ## Task
 
-Identify and clean up deprecated files and folders, then create a comprehensive summary document.
+Identify and clean up deprecated files and folders.
 
 ### Step 1: Remove Legacy Job Skill Folders
 
@@ -166,56 +166,6 @@ git status
 - No new untracked files should appear (unless intentionally created)
 - Backup files (`.backup`) should be in `.gitignore` or cleaned up
 
-### Step 7: Create Repair Summary
-
-Create a `repair_summary.md` file documenting all changes made during this workflow:
-
-```markdown
-# DeepWork Repair Summary
-
-**Date:** [current date]
-**Project:** [project name]
-
-## Settings Fixes (fix_settings step)
-
-- [ ] Removed X `Skill(...)` permission entries
-- [ ] Consolidated Y duplicate hooks
-- [ ] Removed Z hardcoded paths
-- [ ] Removed deprecated `deepwork hook` commands
-
-## Job Fixes (fix_jobs step)
-
-### [job_name]
-- [ ] Removed `exposed` field from steps: [list]
-- [ ] Migrated `stop_hooks` to `hooks.after_agent`
-- [ ] Updated workflow to remove `review_job_spec`
-- [ ] Version bumped to X.Y.Z
-
-### [another_job]
-- [ ] ...
-
-## Errata Cleanup (errata step)
-
-- [ ] Removed legacy job skill folders from `.claude/skills/` and `.gemini/skills/`
-- [ ] Verified `deepwork` skill folder still exists
-- [ ] Cleaned `.deepwork/tmp/`: removed X files
-- [ ] Reviewed `.deepwork/rules/`: [action taken]
-- [ ] Updated `.deepwork/config.yml` version format
-
-## Files Changed
-
-```
-[list of all files modified/deleted]
-```
-
-## Recommended Next Steps
-
-1. Review changes with `git diff`
-2. Test that `deepwork sync` runs without errors
-3. Commit changes with message: "chore: migrate to DeepWork MCP format"
-4. Delete backup files after confirming everything works
-```
-
 ## Quality Criteria
 
 - Legacy job skill folders are removed from `.claude/skills/` and `.gemini/skills/` (folders matching job names or `jobname.*` patterns)
@@ -225,58 +175,11 @@ Create a `repair_summary.md` file documenting all changes made during this workf
 - `.deepwork/tmp/rules/` folder is removed
 - `.deepwork/jobs/deepwork_rules/` folder is removed if present
 - `.deepwork/config.yml` uses current version format
-- A `repair_summary.md` file is created documenting all changes
 - Git status shows clean changes ready to commit
 - When all criteria are met, include `<promise>Quality Criteria Met</promise>` in your response
 
-## Example Summary Output
-
-```markdown
-# DeepWork Repair Summary
-
-**Date:** 2024-02-04
-**Project:** internal-agentspace
-
-## Settings Fixes
-
-- Removed 87 `Skill(...)` permission entries
-- Consolidated 2 duplicate `UserPromptSubmit` hooks into 1
-- Removed hardcoded path: `/Users/tyler/.local/pipx/venvs/deepwork/bin/python`
-- Removed 3 deprecated `deepwork hook rules_check` commands
-
-## Job Fixes
-
-### deepwork_jobs
-- Updated from old version (workflow includes `review_job_spec`)
-- Reinstalled with `deepwork install --platform claude`
-
-### competitive_research
-- Removed `exposed: true` from `discover_competitors` step
-- Migrated 1 `stop_hooks` to `hooks.after_agent`
-- Version bumped to 1.0.1
-
-## Errata Cleanup
-
-- Removed legacy skill folders for 3 jobs:
-  - `competitive_research/` and 4 step folders from `.claude/skills/`
-  - `deepwork_jobs/` and 5 step folders from `.claude/skills/`
-  - `monthly_reporting/` and 2 step folders from `.claude/skills/`
-- Verified `deepwork` skill folder still present in `.claude/skills/`
-- Cleaned `.deepwork/tmp/rules/queue/` (12 old JSON files)
-- Kept `.deepwork/rules/` (contains active example rules)
-- Updated `.deepwork/config.yml` version to "1.0"
-
-## Recommended Next Steps
-
-1. `git add -A && git diff --staged`
-2. `deepwork sync` (verify no errors)
-3. `git commit -m "chore: migrate to DeepWork MCP format"`
-4. After testing: `rm -rf .claude/skills.backup/`
-```
-
 ## Important Notes
 
 1. **Always back up before deleting** - User data is irreplaceable
 2. **Ask before destructive actions** - When in doubt, ask the user
-3. **Document everything** - The summary is valuable for understanding what changed
-4. **Don't auto-commit** - Let the user review and commit changes themselves
+3. **Don't auto-commit** - Let the user review and commit changes themselves
diff --git a/.deepwork/jobs/deepwork_jobs/steps/implement.md b/.deepwork/jobs/deepwork_jobs/steps/implement.md
index 2382a1ad..ddeed2c9 100644
--- a/.deepwork/jobs/deepwork_jobs/steps/implement.md
+++ b/.deepwork/jobs/deepwork_jobs/steps/implement.md
@@ -2,7 +2,7 @@
 
 ## Objective
 
-Generate the DeepWork job directory structure and instruction files for each step based on the validated `job.yml` specification from the review_job_spec step.
+Generate the DeepWork job directory structure and instruction files for each step based on the `job.yml` specification from the define step.
 
 ## Task
 
@@ -32,7 +32,7 @@ touch .deepwork/jobs/[job_name]/hooks/.gitkeep .deepwork/jobs/[job_name]/templat
 ### Step 2: Read and Validate the Specification
 
 1. **Locate the job.yml file**
-   - Read `.deepwork/jobs/[job_name]/job.yml` from the review_job_spec step
+   - Read `.deepwork/jobs/[job_name]/job.yml` from the define step
    - Parse the YAML content
 
 2. **Validate the specification**
@@ -111,7 +111,7 @@ See `.deepwork/jobs/deepwork_jobs/steps/supplemental_file_references.md` for det
 
 ### Step 4: Verify job.yml Location
 
-Verify that `job.yml` is in the correct location at `.deepwork/jobs/[job_name]/job.yml`. The define and review_job_spec steps should have created and validated it. If for some reason it's not there, you may need to create or move it.
+Verify that `job.yml` is in the correct location at `.deepwork/jobs/[job_name]/job.yml`. The define step should have created it. If for some reason it's not there, you may need to create or move it.
 
 ### Step 5: Sync Skills
 
diff --git a/claude.md b/claude.md
index 81bcd8ff..56fdd1bc 100644
--- a/claude.md
+++ b/claude.md
@@ -172,6 +172,7 @@ my-project/
 - `doc/architecture.md` - Comprehensive architecture documentation
 - `README.md` - High-level project overview
 - `shell.nix` - Development environment setup
+- `doc/reference/calling_claude_in_print_mode.md` - When invoking Claude Code as a subprocess (e.g., with `--print` or `-p`), read this for correct flag ordering, structured output with JSON schemas, and common gotchas
 
 ## Development Guidelines
 
diff --git a/doc/architecture.md b/doc/architecture.md
index 6294837b..2325cd4e 100644
--- a/doc/architecture.md
+++ b/doc/architecture.md
@@ -1213,7 +1213,6 @@ Evaluates step outputs against quality criteria:
 ```python
 class QualityGate:
     def evaluate(
-        step_instructions: str,
         quality_criteria: list[str],
         outputs: list[str],
         project_root: Path,
@@ -1221,10 +1220,11 @@ class QualityGate:
 ```
 
 The quality gate:
-1. Builds a review prompt with step instructions, criteria, and output contents
-2. Invokes a review agent via subprocess (configurable command)
-3. Parses the structured JSON response
-4. Returns pass/fail with per-criterion feedback
+1. Builds a review prompt with criteria and output file contents
+2. Invokes Claude Code via subprocess with proper flag ordering (see `doc/reference/calling_claude_in_print_mode.md`)
+3. Uses `--json-schema` for structured output conformance
+4. Parses the `structured_output` field from the JSON response
+5. Returns pass/fail with per-criterion feedback
 
 ### Schemas (`schemas.py`)
 
@@ -1290,19 +1290,16 @@ Execute multi-step workflows with quality gate checkpoints.
 
 5. **Loop continues until workflow complete**
 
-## Quality Gate Configuration
+## Quality Gate
 
-Configure in `.deepwork/config.yml`:
+Quality gate is enabled by default and uses Claude Code to evaluate step outputs
+against quality criteria. The command is constructed internally with proper flag
+ordering (see `doc/reference/calling_claude_in_print_mode.md`).
 
-```yaml
-version: 0.2.0
-platforms:
-  - claude
+To disable quality gate:
 
-quality_gate:
-  agent_review_command: "claude -p --output-format json"
-  timeout: 120
-  max_attempts: 3
+```bash
+deepwork serve --no-quality-gate
 ```
 
 ## Serve Command
@@ -1310,11 +1307,11 @@ quality_gate:
 Start the MCP server manually:
 
 ```bash
-# Basic usage
+# Basic usage (quality gate enabled by default)
 deepwork serve
 
-# With quality gate
-deepwork serve --quality-gate "claude -p --output-format json"
+# With quality gate disabled
+deepwork serve --no-quality-gate
 
 # For a specific project
 deepwork serve --path /path/to/project
diff --git a/doc/mcp_interface.md b/doc/mcp_interface.md
index 82512dc9..6b618fc1 100644
--- a/doc/mcp_interface.md
+++ b/doc/mcp_interface.md
@@ -174,14 +174,12 @@ The MCP server is configured via `.deepwork/config.yml`:
 version: "1.0"
 platforms:
   - claude
-
-# Quality gate configuration (optional)
-quality_gate:
-  agent_review_command: "claude --print"  # Command to run quality gate agent
-  default_timeout: 120                     # Timeout in seconds
-  default_max_attempts: 3                  # Max attempts before failing
 ```
 
+Quality gate is enabled by default and uses Claude Code to evaluate step outputs
+against quality criteria. See `doc/reference/calling_claude_in_print_mode.md` for
+details on how Claude CLI is invoked.
+
 ---
 
 ## Server CLI Options
@@ -191,7 +189,7 @@ deepwork serve [OPTIONS]
 
 Options:
   --path PATH        Project root directory (default: current directory)
-  --quality-gate CMD Command for quality gate agent (overrides config)
+  --no-quality-gate  Disable quality gate evaluation
   --transport TYPE   Transport type: stdio or sse (default: stdio)
   --port PORT        Port for SSE transport (default: 8000)
 ```
diff --git a/doc/reference/calling_claude_in_print_mode.md b/doc/reference/calling_claude_in_print_mode.md
new file mode 100644
index 00000000..30f583ad
--- /dev/null
+++ b/doc/reference/calling_claude_in_print_mode.md
@@ -0,0 +1,104 @@
+# Calling Claude Code in Print Mode
+
+This document covers how to invoke Claude Code as a subprocess using the `--print` flag for non-interactive, programmatic usage.
+
+## Basic Usage
+
+The `--print` (or `-p`) flag runs Claude Code in non-interactive mode, suitable for scripting and subprocess invocation.
+
+### Piping Input
+
+When piping a prompt via stdin, use `-p --` to separate flags from the piped content:
+
+```bash
+echo "your prompt here" | claude -p --
+```
+
+**Important**: The `--` is required because `-p` expects a prompt argument immediately after it. Without `--`, the next argument is interpreted as the prompt itself.
+
+### Flag Ordering
+
+Flags must come **before** `-p --`. Anything after `--` is treated as part of the prompt:
+
+```bash
+# Correct - flags before -p --
+echo "say hello" | claude --max-turns 3 -p --
+
+# Wrong - flags after -- become part of the prompt
+echo "say hello" | claude -p -- --max-turns 3
+```
+
+## Structured Output with JSON Schema
+
+Claude Code supports structured output via the `--json-schema` flag. This constrains the model's response to conform to a specified JSON schema.
+
+### Requirements
+
+To get structured JSON output, you need **all three** flags:
+- `--print` - Non-interactive mode
+- `--output-format json` - JSON output format
+- `--json-schema '<schema>'` - The JSON schema as a **string** (not a filename)
+
+### Example
+
+```bash
+echo "say hello" | claude --print --output-format json --json-schema '{"type":"object","properties":{"greeting":{"type":"string"}},"required":["greeting"]}'
+```
+
+### Output Format
+
+The output is a JSON object with metadata about the run. The structured output conforming to your schema is in the `structured_output` field:
+
+```json
+{
+  "type": "result",
+  "subtype": "success",
+  "is_error": false,
+  "duration_ms": 4557,
+  "num_turns": 2,
+  "result": "",
+  "session_id": "ca428892-a13e-4c4c-85df-b29f8ec851a0",
+  "total_cost_usd": 0.063,
+  "structured_output": {
+    "greeting": "Hello! How can I help you today?"
+  }
+}
+```
+
+### Key Insight
+
+The model automatically conforms to the schema **without being told about it in the prompt**. You don't need to instruct the model to output JSON or describe the expected format - the `--json-schema` flag handles this behind the scenes.
+
+## Common Flags for Print Mode
+
+| Flag | Description |
+|------|-------------|
+| `--print` / `-p` | Non-interactive mode |
+| `--output-format <format>` | Output format: `text` (default), `json`, or `stream-json` |
+| `--json-schema <schema>` | JSON schema string for structured output validation |
+| `--max-turns <n>` | Maximum number of agentic turns |
+| `--input-format <format>` | Input format: `text` (default) or `stream-json` |
+| `--include-partial-messages` | Include partial message chunks (with `stream-json`) |
+
+## Gotchas
+
+1. **`--json-schema` takes a string, not a filename** - Pass the actual JSON schema content, not a path to a file.
+
+2. **`--output-format json` only works with `--print`** - These flags are designed for non-interactive/programmatic use.
+
+3. **Max turns matters** - If you set `--max-turns 1` and the model needs to use tools, it may hit the limit before producing output. Use a reasonable number of turns.
+
+4. **The `--` separator is critical** - When piping input with `-p`, always use `--` to mark the end of flags.
+
+## Full Example
+
+```bash
+# Define a schema for listing files
+SCHEMA='{"type":"object","properties":{"files":{"type":"array","items":{"type":"string"}},"count":{"type":"integer"}},"required":["files","count"]}'
+
+# Run with structured output
+echo "List Python files in src/" | claude --print --output-format json --json-schema "$SCHEMA" --max-turns 5
+
+# Parse the structured_output field with jq
+echo "List Python files in src/" | claude --print --output-format json --json-schema "$SCHEMA" --max-turns 5 | jq '.structured_output'
+```
diff --git a/src/deepwork/cli/install.py b/src/deepwork/cli/install.py
index 74209a52..030fa27b 100644
--- a/src/deepwork/cli/install.py
+++ b/src/deepwork/cli/install.py
@@ -323,14 +323,6 @@ def _install_deepwork(platform_name: str | None, project_path: Path) -> None:
     if "platforms" not in config_data:
         config_data["platforms"] = []
 
-    # Initialize quality_gate config with defaults
-    if "quality_gate" not in config_data:
-        config_data["quality_gate"] = {
-            "agent_review_command": "claude -p --output-format json",
-            "default_timeout": 120,
-            "default_max_attempts": 3,
-        }
-
     # Add each platform if not already present
     added_platforms: list[str] = []
     for i, platform in enumerate(platforms_to_add):
diff --git a/src/deepwork/cli/serve.py b/src/deepwork/cli/serve.py
index e591b414..5625056a 100644
--- a/src/deepwork/cli/serve.py
+++ b/src/deepwork/cli/serve.py
@@ -47,10 +47,10 @@ def _load_config(project_path: Path) -> dict:
     help="Path to project directory (default: current directory)",
 )
 @click.option(
-    "--quality-gate",
-    type=str,
-    default=None,
-    help="Command for quality gate agent (e.g., 'claude -p --output-format json')",
+    "--no-quality-gate",
+    is_flag=True,
+    default=False,
+    help="Disable quality gate evaluation",
 )
 @click.option(
     "--transport",
@@ -66,7 +66,7 @@ def _load_config(project_path: Path) -> dict:
 )
 def serve(
     path: Path,
-    quality_gate: str | None,
+    no_quality_gate: bool,
     transport: str,
     port: int,
 ) -> None:
@@ -75,19 +75,22 @@ def serve(
     Exposes workflow management tools to AI agents via MCP protocol.
     By default uses stdio transport for local integration with Claude Code.
 
+    Quality gate is enabled by default and uses Claude Code to evaluate
+    step outputs against quality criteria.
+
     Examples:
 
         # Start server for current directory
         deepwork serve
 
-        # Start with quality gate enabled
-        deepwork serve --quality-gate "claude -p --output-format json"
+        # Start with quality gate disabled
+        deepwork serve --no-quality-gate
 
         # Start for a specific project
         deepwork serve --path /path/to/project
     """
     try:
-        _serve_mcp(path, quality_gate, transport, port)
+        _serve_mcp(path, not no_quality_gate, transport, port)
     except ServeError as e:
         console.print(f"[red]Error:[/red] {e}")
         raise click.Abort() from e
@@ -98,7 +101,7 @@ def serve(
 
 def _serve_mcp(
     project_path: Path,
-    quality_gate_command: str | None,
+    enable_quality_gate: bool,
     transport: str,
     port: int,
 ) -> None:
@@ -106,7 +109,7 @@ def _serve_mcp(
 
     Args:
         project_path: Path to project directory
-        quality_gate_command: Optional quality gate command
+        enable_quality_gate: Whether to enable quality gate evaluation
         transport: Transport protocol (stdio or sse)
         port: Port for SSE transport
 
@@ -116,25 +119,12 @@ def _serve_mcp(
     # Validate project has DeepWork installed
     _load_config(project_path)
 
-    # Load quality gate settings from config if not specified via CLI
-    config = _load_config(project_path)
-    qg_config = config.get("quality_gate", {})
-
-    if quality_gate_command is None:
-        quality_gate_command = qg_config.get("agent_review_command")
-
-    # Get timeout and max_attempts from config (with defaults)
-    quality_gate_timeout = qg_config.get("default_timeout", 120)
-    quality_gate_max_attempts = qg_config.get("default_max_attempts", 3)
-
     # Create and run server
     from deepwork.mcp.server import create_server
 
     server = create_server(
         project_root=project_path,
-        quality_gate_command=quality_gate_command,
-        quality_gate_timeout=quality_gate_timeout,
-        quality_gate_max_attempts=quality_gate_max_attempts,
+        enable_quality_gate=enable_quality_gate,
     )
 
     if transport == "stdio":
diff --git a/src/deepwork/mcp/quality_gate.py b/src/deepwork/mcp/quality_gate.py
index 4c973fc6..511bad71 100644
--- a/src/deepwork/mcp/quality_gate.py
+++ b/src/deepwork/mcp/quality_gate.py
@@ -8,7 +8,6 @@
 
 import asyncio
 import json
-import shlex
 from pathlib import Path
 from typing import Any
 
@@ -54,21 +53,26 @@ class QualityGate:
 
     Uses a subprocess to invoke a review agent (e.g., Claude CLI) that
     evaluates outputs and returns structured feedback.
+
+    See doc/reference/calling_claude_in_print_mode.md for details on
+    proper CLI invocation with structured output.
     """
 
     def __init__(
         self,
-        command: str = "claude -p --output-format json",
         timeout: int = 120,
+        *,
+        _test_command: list[str] | None = None,
     ):
         """Initialize quality gate.
 
         Args:
-            command: Base command to invoke review agent (system prompt added via --system-prompt flag)
             timeout: Timeout in seconds for review agent
+            _test_command: Internal testing only - override the subprocess command.
+                          When set, skips adding --json-schema flag (test mock handles it).
         """
-        self.command = command
         self.timeout = timeout
+        self._test_command = _test_command
 
     def _build_instructions(self, quality_criteria: list[str]) -> str:
         """Build the system instructions for the review agent.
@@ -146,62 +150,37 @@ async def _build_payload(
 
         return "\n\n".join(output_sections)
 
-    def _parse_response(
-        self, response_text: str, validate_schema: bool = True
-    ) -> QualityGateResult:
+    def _parse_response(self, response_text: str) -> QualityGateResult:
         """Parse the review agent's response.
 
+        When using --print --output-format json --json-schema, Claude CLI returns
+        a wrapper object with the structured output in the 'structured_output' field.
+
         Args:
-            response_text: Raw response from review agent
-            validate_schema: Whether to validate against JSON schema (default True)
+            response_text: Raw response from review agent (JSON wrapper)
 
         Returns:
             Parsed QualityGateResult
 
         Raises:
-            QualityGateError: If response cannot be parsed or fails schema validation
+            QualityGateError: If response cannot be parsed
         """
-        # Try to extract JSON from the response
         try:
-            # First, try to parse as JSON to check if it's a wrapper object
-            # from --output-format json (contains type, result, etc.)
-            json_text = response_text.strip()
-            try:
-                wrapper = json.loads(json_text)
-                # Check if this is a Claude CLI wrapper object
-                if isinstance(wrapper, dict) and "type" in wrapper and "result" in wrapper:
-                    # Extract the actual result content
-                    json_text = wrapper.get("result", "")
-                    if not json_text:
-                        raise QualityGateError(
-                            "Review agent returned empty result in wrapper object"
-                        )
-            except json.JSONDecodeError:
-                # Not valid JSON at the top level, continue with normal parsing
-                pass
-
-            # Look for JSON in code blocks
-            if "```json" in json_text:
-                start = json_text.index("```json") + 7
-                end = json_text.index("```", start)
-                json_text = json_text[start:end].strip()
-            elif "```" in json_text:
-                start = json_text.index("```") + 3
-                end = json_text.index("```", start)
-                json_text = json_text[start:end].strip()
-
-            data = json.loads(json_text)
-
-            # Validate against JSON schema if enabled
-            if validate_schema:
-                try:
-                    jsonschema.validate(data, QUALITY_GATE_RESPONSE_SCHEMA)
-                except jsonschema.ValidationError as ve:
-                    raise QualityGateError(
-                        f"Quality gate response failed schema validation: {ve.message}\n"
-                        f"Path: {list(ve.absolute_path)}\n"
-                        f"Response was: {json_text[:500]}..."
-                    ) from ve
+            wrapper = json.loads(response_text.strip())
+
+            # Check for errors in the wrapper
+            if wrapper.get("is_error"):
+                raise QualityGateError(
+                    f"Review agent returned error: {wrapper.get('result', 'Unknown error')}"
+                )
+
+            # Extract structured_output - this is where --json-schema puts the result
+            data = wrapper.get("structured_output")
+            if data is None:
+                raise QualityGateError(
+                    "Review agent response missing 'structured_output' field. "
+                    f"Response was: {response_text[:500]}..."
+                )
 
             # Parse criteria results
             criteria_results = [
@@ -219,9 +198,14 @@ def _parse_response(
                 criteria_results=criteria_results,
             )
 
-        except (json.JSONDecodeError, ValueError, KeyError) as e:
+        except json.JSONDecodeError as e:
+            raise QualityGateError(
+                f"Failed to parse review agent response as JSON: {e}\n"
+                f"Response was: {response_text[:500]}..."
+            ) from e
+        except (ValueError, KeyError) as e:
             raise QualityGateError(
-                f"Failed to parse review agent response: {e}\n"
+                f"Failed to extract quality gate result: {e}\n"
                 f"Response was: {response_text[:500]}..."
             ) from e
 
@@ -256,21 +240,34 @@ async def evaluate(
         instructions = self._build_instructions(quality_criteria)
         payload = await self._build_payload(outputs, project_root)
 
-        # Build command with system prompt flag and JSON schema
-        # Parse the base command properly to handle quoted arguments
-        base_cmd = shlex.split(self.command)
-        schema_json = json.dumps(QUALITY_GATE_RESPONSE_SCHEMA)
-        full_cmd = base_cmd + [
-            # Add system prompt via --system-prompt flag
-            "--system-prompt",
-            instructions,
-            # Add JSON schema to enforce structured output
-            "--json-schema",
-            schema_json,
-        ]
+        # Build command with proper flag ordering for Claude CLI
+        # See doc/reference/calling_claude_in_print_mode.md for details
+        #
+        # Key insight: flags must come BEFORE `-p --` because:
+        # - `-p` expects a prompt argument immediately after
+        # - `--` marks the end of flags, everything after is the prompt
+        # - When piping via stdin, we use `-p --` to read from stdin
+        if self._test_command:
+            # Testing mode: use provided command, add system prompt only
+            full_cmd = self._test_command + ["--system-prompt", instructions]
+        else:
+            # Production mode: use Claude CLI with proper flags
+            schema_json = json.dumps(QUALITY_GATE_RESPONSE_SCHEMA)
+            full_cmd = [
+                "claude",
+                "--print",  # Non-interactive mode
+                "--output-format",
+                "json",  # JSON output wrapper
+                "--system-prompt",
+                instructions,
+                "--json-schema",
+                schema_json,  # Structured output - result in 'structured_output' field
+                "-p",
+                "--",  # Read prompt from stdin
+            ]
 
         try:
-            # Run review agent with system prompt and payload using async subprocess
+            # Run review agent with payload piped via stdin
             process = await asyncio.create_subprocess_exec(
                 *full_cmd,
                 stdin=asyncio.subprocess.PIPE,
@@ -300,7 +297,7 @@ async def evaluate(
             return self._parse_response(stdout.decode())
 
         except FileNotFoundError as e:
-            raise QualityGateError(f"Review agent command not found: {base_cmd[0]}") from e
+            raise QualityGateError("Review agent command not found: claude") from e
 
 
 class MockQualityGate(QualityGate):
diff --git a/src/deepwork/mcp/server.py b/src/deepwork/mcp/server.py
index 89ba6fa4..03ea936b 100644
--- a/src/deepwork/mcp/server.py
+++ b/src/deepwork/mcp/server.py
@@ -34,7 +34,7 @@
 
 def create_server(
     project_root: Path | str,
-    quality_gate_command: str | None = None,
+    enable_quality_gate: bool = True,
     quality_gate_timeout: int = 120,
     quality_gate_max_attempts: int = 3,
 ) -> FastMCP:
@@ -42,7 +42,7 @@ def create_server(
 
     Args:
         project_root: Path to the project root
-        quality_gate_command: Optional command for quality gate agent
+        enable_quality_gate: Whether to enable quality gate evaluation (default: True)
         quality_gate_timeout: Timeout in seconds for quality gate (default: 120)
         quality_gate_max_attempts: Max attempts before failing quality gate (default: 3)
 
@@ -55,9 +55,8 @@ def create_server(
     state_manager = StateManager(project_path)
 
     quality_gate: QualityGate | None = None
-    if quality_gate_command:
+    if enable_quality_gate:
         quality_gate = QualityGate(
-            command=quality_gate_command,
             timeout=quality_gate_timeout,
         )
 
diff --git a/src/deepwork/standard_jobs/deepwork_jobs/steps/define.md b/src/deepwork/standard_jobs/deepwork_jobs/steps/define.md
index 31de7440..e441c9e2 100644
--- a/src/deepwork/standard_jobs/deepwork_jobs/steps/define.md
+++ b/src/deepwork/standard_jobs/deepwork_jobs/steps/define.md
@@ -414,7 +414,7 @@ Claude: Great! Creating the job.yml specification now...
 - .deepwork/jobs/competitive_research/job.yml
 
 **Next step:**
-Run `/deepwork_jobs.review_job_spec` to validate the specification against quality criteria.
+Run `/deepwork_jobs.implement` to generate step instruction files and sync commands.
 ```
 
 ## Important Guidelines
@@ -454,5 +454,5 @@ The complete YAML specification file (example shown in Step 5 above).
 After creating the file:
 1. Inform the user that the specification is complete
 2. Recommend that they review the job.yml file
-3. Tell them to run `/deepwork_jobs.review_job_spec` next
+3. Tell them to run `/deepwork_jobs.implement` next to generate step instructions
 
diff --git a/src/deepwork/standard_jobs/deepwork_jobs/steps/errata.md b/src/deepwork/standard_jobs/deepwork_jobs/steps/errata.md
index d4be7be3..22a5c167 100644
--- a/src/deepwork/standard_jobs/deepwork_jobs/steps/errata.md
+++ b/src/deepwork/standard_jobs/deepwork_jobs/steps/errata.md
@@ -2,11 +2,11 @@
 
 ## Objective
 
-Remove obsolete files and folders from prior DeepWork versions. This final step cleans up artifacts that are no longer used by the MCP-based system, creating a summary of all changes made during the repair workflow.
+Remove obsolete files and folders from prior DeepWork versions. This final step cleans up artifacts that are no longer used by the MCP-based system.
 
 ## Task
 
-Identify and clean up deprecated files and folders, then create a comprehensive summary document.
+Identify and clean up deprecated files and folders.
 
 ### Step 1: Remove Legacy Job Skill Folders
 
@@ -166,56 +166,6 @@ git status
 - No new untracked files should appear (unless intentionally created)
 - Backup files (`.backup`) should be in `.gitignore` or cleaned up
 
-### Step 7: Create Repair Summary
-
-Create a `repair_summary.md` file documenting all changes made during this workflow:
-
-```markdown
-# DeepWork Repair Summary
-
-**Date:** [current date]
-**Project:** [project name]
-
-## Settings Fixes (fix_settings step)
-
-- [ ] Removed X `Skill(...)` permission entries
-- [ ] Consolidated Y duplicate hooks
-- [ ] Removed Z hardcoded paths
-- [ ] Removed deprecated `deepwork hook` commands
-
-## Job Fixes (fix_jobs step)
-
-### [job_name]
-- [ ] Removed `exposed` field from steps: [list]
-- [ ] Migrated `stop_hooks` to `hooks.after_agent`
-- [ ] Updated workflow to remove `review_job_spec`
-- [ ] Version bumped to X.Y.Z
-
-### [another_job]
-- [ ] ...
-
-## Errata Cleanup (errata step)
-
-- [ ] Removed legacy job skill folders from `.claude/skills/` and `.gemini/skills/`
-- [ ] Verified `deepwork` skill folder still exists
-- [ ] Cleaned `.deepwork/tmp/`: removed X files
-- [ ] Reviewed `.deepwork/rules/`: [action taken]
-- [ ] Updated `.deepwork/config.yml` version format
-
-## Files Changed
-
-```
-[list of all files modified/deleted]
-```
-
-## Recommended Next Steps
-
-1. Review changes with `git diff`
-2. Test that `deepwork sync` runs without errors
-3. Commit changes with message: "chore: migrate to DeepWork MCP format"
-4. Delete backup files after confirming everything works
-```
-
 ## Quality Criteria
 
 - Legacy job skill folders are removed from `.claude/skills/` and `.gemini/skills/` (folders matching job names or `jobname.*` patterns)
@@ -225,58 +175,11 @@ Create a `repair_summary.md` file documenting all changes made during this workf
 - `.deepwork/tmp/rules/` folder is removed
 - `.deepwork/jobs/deepwork_rules/` folder is removed if present
 - `.deepwork/config.yml` uses current version format
-- A `repair_summary.md` file is created documenting all changes
 - Git status shows clean changes ready to commit
 - When all criteria are met, include `<promise>Quality Criteria Met</promise>` in your response
 
-## Example Summary Output
-
-```markdown
-# DeepWork Repair Summary
-
-**Date:** 2024-02-04
-**Project:** internal-agentspace
-
-## Settings Fixes
-
-- Removed 87 `Skill(...)` permission entries
-- Consolidated 2 duplicate `UserPromptSubmit` hooks into 1
-- Removed hardcoded path: `/Users/tyler/.local/pipx/venvs/deepwork/bin/python`
-- Removed 3 deprecated `deepwork hook rules_check` commands
-
-## Job Fixes
-
-### deepwork_jobs
-- Updated from old version (workflow includes `review_job_spec`)
-- Reinstalled with `deepwork install --platform claude`
-
-### competitive_research
-- Removed `exposed: true` from `discover_competitors` step
-- Migrated 1 `stop_hooks` to `hooks.after_agent`
-- Version bumped to 1.0.1
-
-## Errata Cleanup
-
-- Removed legacy skill folders for 3 jobs:
-  - `competitive_research/` and 4 step folders from `.claude/skills/`
-  - `deepwork_jobs/` and 5 step folders from `.claude/skills/`
-  - `monthly_reporting/` and 2 step folders from `.claude/skills/`
-- Verified `deepwork` skill folder still present in `.claude/skills/`
-- Cleaned `.deepwork/tmp/rules/queue/` (12 old JSON files)
-- Kept `.deepwork/rules/` (contains active example rules)
-- Updated `.deepwork/config.yml` version to "1.0"
-
-## Recommended Next Steps
-
-1. `git add -A && git diff --staged`
-2. `deepwork sync` (verify no errors)
-3. `git commit -m "chore: migrate to DeepWork MCP format"`
-4. After testing: `rm -rf .claude/skills.backup/`
-```
-
 ## Important Notes
 
 1. **Always back up before deleting** - User data is irreplaceable
 2. **Ask before destructive actions** - When in doubt, ask the user
-3. **Document everything** - The summary is valuable for understanding what changed
-4. **Don't auto-commit** - Let the user review and commit changes themselves
+3. **Don't auto-commit** - Let the user review and commit changes themselves
diff --git a/src/deepwork/standard_jobs/deepwork_jobs/steps/implement.md b/src/deepwork/standard_jobs/deepwork_jobs/steps/implement.md
index 2382a1ad..ddeed2c9 100644
--- a/src/deepwork/standard_jobs/deepwork_jobs/steps/implement.md
+++ b/src/deepwork/standard_jobs/deepwork_jobs/steps/implement.md
@@ -2,7 +2,7 @@
 
 ## Objective
 
-Generate the DeepWork job directory structure and instruction files for each step based on the validated `job.yml` specification from the review_job_spec step.
+Generate the DeepWork job directory structure and instruction files for each step based on the `job.yml` specification from the define step.
 
 ## Task
 
@@ -32,7 +32,7 @@ touch .deepwork/jobs/[job_name]/hooks/.gitkeep .deepwork/jobs/[job_name]/templat
 ### Step 2: Read and Validate the Specification
 
 1. **Locate the job.yml file**
-   - Read `.deepwork/jobs/[job_name]/job.yml` from the review_job_spec step
+   - Read `.deepwork/jobs/[job_name]/job.yml` from the define step
    - Parse the YAML content
 
 2. **Validate the specification**
@@ -111,7 +111,7 @@ See `.deepwork/jobs/deepwork_jobs/steps/supplemental_file_references.md` for det
 
 ### Step 4: Verify job.yml Location
 
-Verify that `job.yml` is in the correct location at `.deepwork/jobs/[job_name]/job.yml`. The define and review_job_spec steps should have created and validated it. If for some reason it's not there, you may need to create or move it.
+Verify that `job.yml` is in the correct location at `.deepwork/jobs/[job_name]/job.yml`. The define step should have created it. If for some reason it's not there, you may need to create or move it.
 
 ### Step 5: Sync Skills
 
diff --git a/tests/fixtures/mock_review_agent.py b/tests/fixtures/mock_review_agent.py
index 48130ca1..4b57c06a 100755
--- a/tests/fixtures/mock_review_agent.py
+++ b/tests/fixtures/mock_review_agent.py
@@ -2,8 +2,29 @@
 """Mock review agent for integration testing.
 
 This script simulates a review agent that reads a prompt from stdin
-and returns a JSON response. The behavior is controlled by environment
-variables or by keywords in the input prompt.
+and returns a JSON response in Claude CLI wrapper format. The behavior
+is controlled by environment variables or by keywords in the input prompt.
+
+############################################################################
+# CRITICAL: OUTPUT FORMAT
+#
+# This mock returns responses in the same wrapper format as Claude CLI
+# when using `--print --output-format json --json-schema`. The quality gate
+# response is in the `structured_output` field:
+#
+# {
+#     "type": "result",
+#     "subtype": "success",
+#     "is_error": false,
+#     "structured_output": {
+#         "passed": true/false,
+#         "feedback": "...",
+#         "criteria_results": [...]
+#     }
+# }
+#
+# See doc/reference/calling_claude_in_print_mode.md for details.
+############################################################################
 
 Behavior modes:
 - REVIEW_RESULT=pass: Always return passed=true
@@ -21,6 +42,23 @@
 import time
 
 
+def wrap_response(quality_result: dict) -> dict:
+    """Wrap a quality gate result in Claude CLI output format.
+
+    Args:
+        quality_result: The quality gate result with passed, feedback, criteria_results
+
+    Returns:
+        Wrapper object matching Claude CLI --output-format json --json-schema output
+    """
+    return {
+        "type": "result",
+        "subtype": "success",
+        "is_error": False,
+        "structured_output": quality_result,
+    }
+
+
 def main() -> int:
     """Main entry point."""
     mode = os.environ.get("REVIEW_RESULT", "auto")
@@ -45,16 +83,16 @@ def main() -> int:
         return 0
 
     if mode == "pass":
-        response = {
+        response = wrap_response({
             "passed": True,
             "feedback": "All criteria met",
             "criteria_results": [{"criterion": "Criterion 1", "passed": True, "feedback": None}],
-        }
+        })
         print(json.dumps(response))
         return 0
 
     if mode == "fail":
-        response = {
+        response = wrap_response({
             "passed": False,
             "feedback": "Quality criteria not met",
             "criteria_results": [
@@ -64,22 +102,22 @@ def main() -> int:
                     "feedback": "Did not meet requirements",
                 }
             ],
-        }
+        })
         print(json.dumps(response))
         return 0
 
     # Auto mode: parse prompt for markers
     if "FORCE_PASS" in prompt:
-        response = {
+        response = wrap_response({
             "passed": True,
             "feedback": "Forced pass via marker",
             "criteria_results": [],
-        }
+        })
         print(json.dumps(response))
         return 0
 
     if "FORCE_FAIL" in prompt:
-        response = {
+        response = wrap_response({
             "passed": False,
             "feedback": "Forced fail via marker",
             "criteria_results": [
@@ -89,7 +127,7 @@ def main() -> int:
                     "feedback": "Failed due to FORCE_FAIL marker",
                 }
             ],
-        }
+        })
         print(json.dumps(response))
         return 0
 
@@ -147,13 +185,13 @@ def main() -> int:
             }
         )
 
-    response = {
+    quality_result = {
         "passed": all_passed,
         "feedback": "All criteria met" if all_passed else "Some criteria failed",
         "criteria_results": criteria_results,
     }
 
-    print(json.dumps(response))
+    print(json.dumps(wrap_response(quality_result)))
     return 0
 
 
diff --git a/tests/integration/test_quality_gate_integration.py b/tests/integration/test_quality_gate_integration.py
index c1d56c6e..69019bc4 100644
--- a/tests/integration/test_quality_gate_integration.py
+++ b/tests/integration/test_quality_gate_integration.py
@@ -1,533 +1,150 @@
 """Integration tests for quality gate subprocess execution.
 
-These tests actually run the subprocess and verify that pass/fail
-detection works correctly with real process invocation.
+###############################################################################
+# ⚠️  CRITICAL: THESE TESTS MUST USE THE REAL CLAUDE CLI ⚠️
+#
+# The entire point of these integration tests is to verify that the QualityGate
+# class works correctly with the ACTUAL Claude Code CLI subprocess.
+#
+# DO NOT:
+#   - Mock the QualityGate class
+#   - Use _test_command parameter
+#   - Stub out subprocess calls
+#   - Use the MockQualityGate class
+#
+# If you need to test parsing logic or edge cases, add those tests to:
+#   tests/unit/mcp/test_quality_gate.py
+#
+# These tests are SKIPPED in CI because they require Claude Code CLI to be
+# installed and authenticated. They are meant to be run locally during
+# development to verify real-world behavior.
+###############################################################################
 """
 
 from __future__ import annotations
 
 import os
-import sys
 from pathlib import Path
 
 import pytest
 
-from deepwork.mcp.quality_gate import QualityGate, QualityGateError
+from deepwork.mcp.quality_gate import QualityGate
 
-# Path to our mock review agent script
-MOCK_AGENT_PATH = Path(__file__).parent.parent / "fixtures" / "mock_review_agent.py"
+# Skip marker for tests that require real Claude CLI
+# GitHub Actions sets CI=true, as do most other CI systems
+requires_claude_cli = pytest.mark.skipif(
+    os.environ.get("CI") == "true" or os.environ.get("GITHUB_ACTIONS") == "true",
+    reason="Integration tests require Claude CLI - skipped in CI",
+)
 
 
 @pytest.fixture
 def project_root(tmp_path: Path) -> Path:
     """Create a temporary project root with test files."""
-    # Create a sample output file
-    output_file = tmp_path / "output.md"
-    output_file.write_text("Test content for review")
     return tmp_path
 
 
-@pytest.fixture
-def mock_agent_command() -> str:
-    """Get the command to run the mock review agent."""
-    return f"{sys.executable} {MOCK_AGENT_PATH}"
-
-
-class TestQualityGateIntegration:
-    """Integration tests that run real subprocesses."""
-
-    async def test_subprocess_returns_pass(
-        self, project_root: Path, mock_agent_command: str
-    ) -> None:
-        """Test that a passing response is correctly detected."""
-        gate = QualityGate(command=mock_agent_command, timeout=30)
-
-        # Set environment to force pass
-        env_backup = os.environ.get("REVIEW_RESULT")
-        os.environ["REVIEW_RESULT"] = "pass"
-
-        try:
-            result = await gate.evaluate(
-                quality_criteria=["Output must exist", "Output must be valid"],
-                outputs=["output.md"],
-                project_root=project_root,
-            )
-
-            assert result.passed is True, f"Expected pass but got: {result}"
-            assert result.feedback == "All criteria met"
-        finally:
-            if env_backup is not None:
-                os.environ["REVIEW_RESULT"] = env_backup
-            else:
-                os.environ.pop("REVIEW_RESULT", None)
-
-    async def test_subprocess_returns_fail(
-        self, project_root: Path, mock_agent_command: str
-    ) -> None:
-        """Test that a failing response is correctly detected."""
-        gate = QualityGate(command=mock_agent_command, timeout=30)
-
-        # Set environment to force fail
-        env_backup = os.environ.get("REVIEW_RESULT")
-        os.environ["REVIEW_RESULT"] = "fail"
-
-        try:
-            result = await gate.evaluate(
-                quality_criteria=["Output must exist"],
-                outputs=["output.md"],
-                project_root=project_root,
-            )
+###############################################################################
+# ⚠️  REAL INTEGRATION TESTS - DO NOT MOCK ⚠️
+#
+# These tests call the actual Claude CLI. They verify that:
+#   1. The subprocess invocation works correctly
+#   2. The JSON schema is properly passed and enforced
+#   3. Response parsing handles real Claude output
+#
+# Run these locally with: pytest tests/integration/test_quality_gate_integration.py -v
+###############################################################################
 
-            assert result.passed is False, f"Expected fail but got pass: {result}"
-            assert "not met" in result.feedback.lower()
-            assert len(result.criteria_results) > 0
-            assert result.criteria_results[0].passed is False
-        finally:
-            if env_backup is not None:
-                os.environ["REVIEW_RESULT"] = env_backup
-            else:
-                os.environ.pop("REVIEW_RESULT", None)
-
-    async def test_subprocess_malformed_response_raises_error(
-        self, project_root: Path, mock_agent_command: str
-    ) -> None:
-        """Test that malformed JSON raises an error."""
-        gate = QualityGate(command=mock_agent_command, timeout=30)
-
-        env_backup = os.environ.get("REVIEW_RESULT")
-        os.environ["REVIEW_RESULT"] = "malformed"
-
-        try:
-            with pytest.raises(QualityGateError, match="Failed to parse"):
-                await gate.evaluate(
-                    quality_criteria=["Criterion 1"],
-                    outputs=["output.md"],
-                    project_root=project_root,
-                )
-        finally:
-            if env_backup is not None:
-                os.environ["REVIEW_RESULT"] = env_backup
-            else:
-                os.environ.pop("REVIEW_RESULT", None)
-
-    async def test_subprocess_nonzero_exit_raises_error(
-        self, project_root: Path, mock_agent_command: str
-    ) -> None:
-        """Test that non-zero exit code raises an error."""
-        gate = QualityGate(command=mock_agent_command, timeout=30)
 
-        env_backup = os.environ.get("REVIEW_RESULT")
-        os.environ["REVIEW_RESULT"] = "error"
+@requires_claude_cli
+class TestRealClaudeIntegration:
+    """Integration tests that run the REAL Claude CLI.
 
-        try:
-            with pytest.raises(QualityGateError, match="failed with exit code"):
-                await gate.evaluate(
-                    quality_criteria=["Criterion 1"],
-                    outputs=["output.md"],
-                    project_root=project_root,
-                )
-        finally:
-            if env_backup is not None:
-                os.environ["REVIEW_RESULT"] = env_backup
-            else:
-                os.environ.pop("REVIEW_RESULT", None)
+    ⚠️  WARNING: DO NOT MOCK THESE TESTS ⚠️
 
-    async def test_subprocess_timeout(self, project_root: Path, mock_agent_command: str) -> None:
-        """Test that subprocess timeout is handled correctly."""
-        gate = QualityGate(command=mock_agent_command, timeout=1)  # 1 second timeout
+    These tests exist specifically to verify that QualityGate works with the
+    actual Claude Code CLI. If you mock them, you defeat their entire purpose.
+    """
 
-        env_backup = os.environ.get("REVIEW_RESULT")
-        os.environ["REVIEW_RESULT"] = "timeout"
-
-        try:
-            with pytest.raises(QualityGateError, match="timed out"):
-                await gate.evaluate(
-                    quality_criteria=["Criterion 1"],
-                    outputs=["output.md"],
-                    project_root=project_root,
-                )
-        finally:
-            if env_backup is not None:
-                os.environ["REVIEW_RESULT"] = env_backup
-            else:
-                os.environ.pop("REVIEW_RESULT", None)
-
-    async def test_subprocess_command_not_found(self, project_root: Path) -> None:
-        """Test that missing command is handled correctly."""
-        gate = QualityGate(command="nonexistent_command_12345", timeout=30)
-
-        with pytest.raises(QualityGateError, match="command not found"):
-            await gate.evaluate(
-                quality_criteria=["Criterion 1"],
-                outputs=["output.md"],
-                project_root=project_root,
-            )
-
-    async def test_auto_mode_detects_force_pass_marker(
-        self, project_root: Path, mock_agent_command: str
+    async def test_real_claude_evaluates_passing_criteria(
+        self, project_root: Path
     ) -> None:
-        """Test that FORCE_PASS marker in content causes pass."""
-        gate = QualityGate(command=mock_agent_command, timeout=30)
-
-        # Create output with FORCE_PASS marker
-        output_file = project_root / "marker_output.md"
-        output_file.write_text("Content with FORCE_PASS marker")
-
-        # Clear any environment override
-        env_backup = os.environ.get("REVIEW_RESULT")
-        os.environ.pop("REVIEW_RESULT", None)
-
-        try:
-            result = await gate.evaluate(
-                quality_criteria=["Criterion 1"],
-                outputs=["marker_output.md"],
-                project_root=project_root,
-            )
+        """Test that real Claude CLI correctly evaluates passing criteria.
+
+        ⚠️  THIS TEST MUST USE THE REAL CLAUDE CLI - DO NOT MOCK ⚠️
+        """
+        # Create a well-formed output file that clearly meets the criteria
+        output_file = project_root / "analysis.md"
+        output_file.write_text(
+            "# Analysis Report\n\n"
+            "## Summary\n"
+            "This document contains a complete analysis.\n\n"
+            "## Details\n"
+            "The analysis covers all required points.\n"
+        )
 
-            assert result.passed is True
-        finally:
-            if env_backup is not None:
-                os.environ["REVIEW_RESULT"] = env_backup
+        # ⚠️  NO _test_command - this uses the REAL Claude CLI
+        gate = QualityGate(timeout=120)
 
-    async def test_auto_mode_detects_force_fail_marker(
-        self, project_root: Path, mock_agent_command: str
-    ) -> None:
-        """Test that FORCE_FAIL marker in content causes fail."""
-        gate = QualityGate(command=mock_agent_command, timeout=30)
-
-        # Create output with FORCE_FAIL marker
-        output_file = project_root / "marker_output.md"
-        output_file.write_text("Content with FORCE_FAIL marker")
+        result = await gate.evaluate(
+            quality_criteria=[
+                "The document must have a title",
+                "The document must contain a summary section",
+            ],
+            outputs=["analysis.md"],
+            project_root=project_root,
+        )
 
-        # Clear any environment override
-        env_backup = os.environ.get("REVIEW_RESULT")
-        os.environ.pop("REVIEW_RESULT", None)
+        # Verify we got a structured response
+        assert result is not None
+        assert isinstance(result.passed, bool)
+        assert isinstance(result.feedback, str)
+        assert len(result.feedback) > 0
 
-        try:
-            result = await gate.evaluate(
-                quality_criteria=["Criterion 1"],
-                outputs=["marker_output.md"],
-                project_root=project_root,
+        # The document clearly meets the criteria, so it should pass
+        # (though we allow for some model variability)
+        if not result.passed:
+            # If it failed, at least verify we got proper feedback
+            assert len(result.criteria_results) > 0
+            pytest.skip(
+                f"Model returned fail (may be model variability): {result.feedback}"
             )
 
-            assert result.passed is False
-        finally:
-            if env_backup is not None:
-                os.environ["REVIEW_RESULT"] = env_backup
-
-    async def test_missing_output_file_causes_fail(
-        self, project_root: Path, mock_agent_command: str
+    async def test_real_claude_evaluates_failing_criteria(
+        self, project_root: Path
     ) -> None:
-        """Test that missing output file is detected as failure."""
-        gate = QualityGate(command=mock_agent_command, timeout=30)
-
-        # Clear any environment override - let auto mode handle it
-        env_backup = os.environ.get("REVIEW_RESULT")
-        os.environ.pop("REVIEW_RESULT", None)
-
-        try:
-            result = await gate.evaluate(
-                quality_criteria=["Output files must exist"],
-                outputs=["nonexistent_file.md"],
-                project_root=project_root,
-            )
-
-            # The mock agent should detect "File not found" in prompt and fail
-            assert result.passed is False
-        finally:
-            if env_backup is not None:
-                os.environ["REVIEW_RESULT"] = env_backup
-
-
-class TestQualityGateResponseParsing:
-    """Test response parsing with various JSON formats."""
-
-    def test_parse_json_in_code_block(self) -> None:
-        """Test parsing JSON wrapped in markdown code block."""
-        gate = QualityGate()
-
-        response = """Here's my evaluation:
-
-```json
-{
-    "passed": true,
-    "feedback": "All good",
-    "criteria_results": [
-        {"criterion": "Test", "passed": true, "feedback": null}
-    ]
-}
-```
-
-Hope that helps!"""
-
-        result = gate._parse_response(response)
-
-        assert result.passed is True
-        assert result.feedback == "All good"
-
-    def test_parse_json_in_plain_code_block(self) -> None:
-        """Test parsing JSON in plain code block (no json tag)."""
-        gate = QualityGate()
-
-        response = """```
-{
-    "passed": false,
-    "feedback": "Issues found",
-    "criteria_results": []
-}
-```"""
-
-        result = gate._parse_response(response)
-
-        assert result.passed is False
-        assert result.feedback == "Issues found"
-
-    def test_parse_raw_json(self) -> None:
-        """Test parsing raw JSON without code block."""
-        gate = QualityGate()
-
-        response = '{"passed": true, "feedback": "OK", "criteria_results": []}'
+        """Test that real Claude CLI correctly identifies missing criteria.
 
-        result = gate._parse_response(response)
+        ⚠️  THIS TEST MUST USE THE REAL CLAUDE CLI - DO NOT MOCK ⚠️
+        """
+        # Create an output file that is clearly missing required content
+        output_file = project_root / "incomplete.md"
+        output_file.write_text("Just some random text without any structure.")
 
-        assert result.passed is True
-        assert result.feedback == "OK"
+        # ⚠️  NO _test_command - this uses the REAL Claude CLI
+        gate = QualityGate(timeout=120)
 
-    def test_parse_missing_passed_field_raises_error(self) -> None:
-        """Test that missing 'passed' field raises schema validation error."""
-        gate = QualityGate()
-
-        # JSON without 'passed' field - now fails schema validation
-        response = '{"feedback": "Some feedback", "criteria_results": []}'
-
-        with pytest.raises(QualityGateError, match="failed schema validation"):
-            gate._parse_response(response)
-
-    def test_parse_non_boolean_passed_field_raises_error(self) -> None:
-        """Test that non-boolean 'passed' field raises schema validation error."""
-        gate = QualityGate()
-
-        # Various truthy but not boolean values - all should fail schema validation
-        test_cases = [
-            ('{"passed": 1, "feedback": "test", "criteria_results": []}', "integer 1"),
-            ('{"passed": "true", "feedback": "test", "criteria_results": []}', "string 'true'"),
-            ('{"passed": "yes", "feedback": "test", "criteria_results": []}', "string 'yes'"),
-            ('{"passed": null, "feedback": "test", "criteria_results": []}', "null"),
-        ]
-
-        for response, _case_name in test_cases:
-            with pytest.raises(QualityGateError, match="failed schema validation"):
-                gate._parse_response(response)
-
-    def test_parse_without_schema_validation_is_lenient(self) -> None:
-        """Test that schema validation can be disabled for lenient parsing."""
-        gate = QualityGate()
-
-        # JSON without 'passed' field - without schema validation, defaults to False
-        response = '{"feedback": "Some feedback", "criteria_results": []}'
-
-        result = gate._parse_response(response, validate_schema=False)
-
-        # Without schema validation, missing passed defaults to False (fail-safe)
-        assert result.passed is False
-
-    def test_parse_criteria_results_structure(self) -> None:
-        """Test that criteria results are properly parsed."""
-        gate = QualityGate()
-
-        response = """```json
-{
-    "passed": false,
-    "feedback": "Two criteria failed",
-    "criteria_results": [
-        {"criterion": "First check", "passed": true, "feedback": null},
-        {"criterion": "Second check", "passed": false, "feedback": "Missing data"},
-        {"criterion": "Third check", "passed": false, "feedback": "Wrong format"}
-    ]
-}
-```"""
-
-        result = gate._parse_response(response)
-
-        assert result.passed is False
-        assert len(result.criteria_results) == 3
-        assert result.criteria_results[0].passed is True
-        assert result.criteria_results[0].feedback is None
-        assert result.criteria_results[1].passed is False
-        assert result.criteria_results[1].feedback == "Missing data"
-        assert result.criteria_results[2].passed is False
-        assert result.criteria_results[2].feedback == "Wrong format"
-
-    def test_parse_empty_criteria_results(self) -> None:
-        """Test parsing with empty criteria results."""
-        gate = QualityGate()
-
-        response = '{"passed": true, "feedback": "OK", "criteria_results": []}'
-
-        result = gate._parse_response(response)
-
-        assert result.passed is True
-        assert result.criteria_results == []
-
-
-class TestQualityGateSchemaValidation:
-    """Test JSON schema validation for quality gate responses."""
-
-    def test_valid_response_passes_schema(self) -> None:
-        """Test that valid response passes schema validation."""
-        gate = QualityGate()
-
-        response = """```json
-{
-    "passed": true,
-    "feedback": "All criteria met",
-    "criteria_results": [
-        {"criterion": "Test 1", "passed": true, "feedback": null},
-        {"criterion": "Test 2", "passed": true}
-    ]
-}
-```"""
-
-        result = gate._parse_response(response)
-
-        assert result.passed is True
-        assert result.feedback == "All criteria met"
-
-    def test_missing_feedback_field_raises_error(self) -> None:
-        """Test that missing feedback field raises schema error."""
-        gate = QualityGate()
-
-        # Missing required 'feedback' field
-        response = '{"passed": true, "criteria_results": []}'
-
-        with pytest.raises(QualityGateError, match="failed schema validation"):
-            gate._parse_response(response)
-
-    def test_invalid_criteria_result_type_raises_error(self) -> None:
-        """Test that invalid criteria_results type raises schema error."""
-        gate = QualityGate()
-
-        # criteria_results should be an array, not a string
-        response = '{"passed": true, "feedback": "test", "criteria_results": "invalid"}'
-
-        with pytest.raises(QualityGateError, match="failed schema validation"):
-            gate._parse_response(response)
-
-    def test_missing_criterion_in_results_raises_error(self) -> None:
-        """Test that missing criterion field in results raises schema error."""
-        gate = QualityGate()
-
-        # criteria_results item missing required 'criterion' field
-        response = """{"passed": true, "feedback": "test", "criteria_results": [
-            {"passed": true, "feedback": null}
-        ]}"""
-
-        with pytest.raises(QualityGateError, match="failed schema validation"):
-            gate._parse_response(response)
-
-    def test_criteria_results_optional(self) -> None:
-        """Test that criteria_results can be omitted."""
-        gate = QualityGate()
-
-        # criteria_results is optional
-        response = '{"passed": true, "feedback": "All good"}'
-
-        result = gate._parse_response(response)
-
-        assert result.passed is True
-        assert result.feedback == "All good"
-        assert result.criteria_results == []
-
-
-class TestQualityGateEdgeCases:
-    """Test edge cases and potential failure scenarios."""
-
-    async def test_empty_quality_criteria_auto_passes(self, project_root: Path) -> None:
-        """Test that no criteria means auto-pass (no subprocess called)."""
-        gate = QualityGate(command="nonexistent_command", timeout=30)
-
-        # Even with a command that doesn't exist, empty criteria should auto-pass
         result = await gate.evaluate(
-            quality_criteria=[],  # No criteria
-            outputs=["output.md"],
+            quality_criteria=[
+                "The document must contain a section titled 'Executive Summary'",
+                "The document must include a numbered list of recommendations",
+                "The document must have a 'Conclusions' section",
+            ],
+            outputs=["incomplete.md"],
             project_root=project_root,
         )
 
-        assert result.passed is True
-        assert "auto-passing" in result.feedback.lower()
-
-    async def test_multiple_output_files(self, project_root: Path, mock_agent_command: str) -> None:
-        """Test evaluation with multiple output files."""
-        gate = QualityGate(command=mock_agent_command, timeout=30)
-
-        # Create multiple output files
-        (project_root / "output1.md").write_text("Content 1")
-        (project_root / "output2.md").write_text("Content 2")
-        (project_root / "output3.md").write_text("Content 3")
-
-        env_backup = os.environ.get("REVIEW_RESULT")
-        os.environ["REVIEW_RESULT"] = "pass"
-
-        try:
-            result = await gate.evaluate(
-                quality_criteria=["All outputs must exist"],
-                outputs=["output1.md", "output2.md", "output3.md"],
-                project_root=project_root,
-            )
-
-            assert result.passed is True
-        finally:
-            if env_backup is not None:
-                os.environ["REVIEW_RESULT"] = env_backup
-            else:
-                os.environ.pop("REVIEW_RESULT", None)
-
-    async def test_large_output_file(self, project_root: Path, mock_agent_command: str) -> None:
-        """Test evaluation with a large output file."""
-        gate = QualityGate(command=mock_agent_command, timeout=30)
-
-        # Create a large file (100KB)
-        large_content = "Large content line\n" * 5000
-        (project_root / "large_output.md").write_text(large_content)
-
-        env_backup = os.environ.get("REVIEW_RESULT")
-        os.environ["REVIEW_RESULT"] = "pass"
-
-        try:
-            result = await gate.evaluate(
-                quality_criteria=["Output must be complete"],
-                outputs=["large_output.md"],
-                project_root=project_root,
-            )
-
-            assert result.passed is True
-        finally:
-            if env_backup is not None:
-                os.environ["REVIEW_RESULT"] = env_backup
-            else:
-                os.environ.pop("REVIEW_RESULT", None)
-
-    async def test_unicode_in_output(self, project_root: Path, mock_agent_command: str) -> None:
-        """Test evaluation with unicode content."""
-        gate = QualityGate(command=mock_agent_command, timeout=30)
-
-        # Create file with unicode content
-        unicode_content = "Unicode: 你好世界 🚀 émojis and spëcial çharacters"
-        (project_root / "unicode_output.md").write_text(unicode_content)
-
-        env_backup = os.environ.get("REVIEW_RESULT")
-        os.environ["REVIEW_RESULT"] = "pass"
+        # Verify we got a structured response
+        assert result is not None
+        assert isinstance(result.passed, bool)
+        assert isinstance(result.feedback, str)
 
-        try:
-            result = await gate.evaluate(
-                quality_criteria=["Content must be valid"],
-                outputs=["unicode_output.md"],
-                project_root=project_root,
+        # The document clearly doesn't meet these specific criteria
+        # (though we allow for some model variability)
+        if result.passed:
+            pytest.skip(
+                f"Model returned pass unexpectedly (may be model variability): {result.feedback}"
             )
 
-            assert result.passed is True
-        finally:
-            if env_backup is not None:
-                os.environ["REVIEW_RESULT"] = env_backup
-            else:
-                os.environ.pop("REVIEW_RESULT", None)
+        # Should have feedback about what's missing
+        assert len(result.feedback) > 0
diff --git a/tests/unit/mcp/test_quality_gate.py b/tests/unit/mcp/test_quality_gate.py
index 8b511df7..fea0337d 100644
--- a/tests/unit/mcp/test_quality_gate.py
+++ b/tests/unit/mcp/test_quality_gate.py
@@ -26,7 +26,7 @@ def project_root(tmp_path: Path) -> Path:
 @pytest.fixture
 def quality_gate() -> QualityGate:
     """Create a QualityGate instance."""
-    return QualityGate(command="echo test", timeout=10)
+    return QualityGate(timeout=10)
 
 
 @pytest.fixture
@@ -44,30 +44,38 @@ def create_mock_subprocess(
     """Create a mock subprocess executor that captures commands.
 
     ############################################################################
-    # CRITICAL: DO NOT MODIFY THE RESPONSE FORMAT WITHOUT UNDERSTANDING THIS!
+    # CRITICAL: UNDERSTAND THE RESPONSE FORMAT BEFORE MODIFYING!
     #
-    # This mock returns the quality gate response JSON DIRECTLY, without the
-    # Claude CLI wrapper object. This is INTENTIONAL and tests that the
-    # _parse_response method can handle BOTH:
+    # This mock returns responses in the EXACT format produced by Claude CLI
+    # when using `--print --output-format json --json-schema`. The response
+    # is a wrapper object with the structured output in `structured_output`:
     #
-    # 1. Direct JSON (what this mock returns) - for backwards compatibility
-    # 2. Wrapper objects from `claude -p --output-format json` which look like:
-    #    {"type": "result", "result": "<actual JSON>", ...}
+    # {
+    #     "type": "result",
+    #     "subtype": "success",
+    #     "is_error": false,
+    #     "structured_output": {
+    #         "passed": true,
+    #         "feedback": "...",
+    #         "criteria_results": [...]
+    #     }
+    # }
     #
-    # The REAL Claude CLI with `--output-format json` returns a wrapper object.
-    # The quality_gate.py code handles this by checking for the wrapper format
-    # and extracting the "result" field before parsing.
+    # KEY POINTS:
+    # 1. The `--json-schema` flag enforces structured output conformance
+    # 2. The actual quality gate response is in `structured_output`, NOT `result`
+    # 3. The `result` field (if present) contains text output, not our schema
     #
-    # If you're seeing schema validation errors in production, it's because
-    # the code expects to unwrap the response first. See test_parse_response_wrapper_object
-    # for the wrapper format test.
+    # See doc/reference/calling_claude_in_print_mode.md for full details on
+    # how Claude CLI handles --json-schema and the output format.
     #
-    # DO NOT "fix" this mock by adding a wrapper - that would break the test's
-    # purpose of verifying direct JSON handling still works.
+    # If you're seeing parse errors, check that quality_gate.py is looking
+    # for `structured_output` (not `result`) in the wrapper.
     ############################################################################
 
     Args:
-        response: The JSON response to return. Defaults to a passing quality gate response.
+        response: The quality gate response to return in structured_output.
+                  Defaults to a passing quality gate response.
         returncode: The return code for the process.
 
     Returns:
@@ -86,8 +94,14 @@ async def mock_create_subprocess_exec(*cmd: str, **kwargs: Any) -> MagicMock:  #
         mock_process.returncode = returncode
 
         async def mock_communicate(input: bytes = b"") -> tuple[bytes, bytes]:  # noqa: ARG001
-            # Returns direct JSON without CLI wrapper - see docstring above
-            return json.dumps(response).encode(), b""
+            # Returns Claude CLI wrapper with structured_output field
+            wrapper = {
+                "type": "result",
+                "subtype": "success",
+                "is_error": False,
+                "structured_output": response,
+            }
+            return json.dumps(wrapper).encode(), b""
 
         mock_process.communicate = mock_communicate
         return mock_process
@@ -119,16 +133,14 @@ class TestQualityGate:
 
     def test_init(self) -> None:
         """Test QualityGate initialization."""
-        gate = QualityGate(command="claude -p", timeout=60)
+        gate = QualityGate(timeout=60)
 
-        assert gate.command == "claude -p"
         assert gate.timeout == 60
 
     def test_init_defaults(self) -> None:
         """Test QualityGate default values."""
         gate = QualityGate()
 
-        assert gate.command == "claude -p --output-format json"
         assert gate.timeout == 120
 
     def test_build_instructions(self, quality_gate: QualityGate) -> None:
@@ -172,20 +184,20 @@ async def test_build_payload_missing_file(
         assert "nonexistent.md" in payload
 
     def test_parse_response_valid_json(self, quality_gate: QualityGate) -> None:
-        """Test parsing valid JSON response."""
-        response = """
-        Here's my evaluation:
-
-        ```json
-        {
-            "passed": true,
-            "feedback": "All good",
-            "criteria_results": [
-                {"criterion": "Test 1", "passed": true, "feedback": null}
-            ]
-        }
-        ```
-        """
+        """Test parsing valid JSON response with structured_output."""
+        # Claude CLI returns wrapper with structured_output field when using --json-schema
+        response = json.dumps({
+            "type": "result",
+            "subtype": "success",
+            "is_error": False,
+            "structured_output": {
+                "passed": True,
+                "feedback": "All good",
+                "criteria_results": [
+                    {"criterion": "Test 1", "passed": True, "feedback": None}
+                ]
+            }
+        })
 
         result = quality_gate._parse_response(response)
 
@@ -195,17 +207,18 @@ def test_parse_response_valid_json(self, quality_gate: QualityGate) -> None:
 
     def test_parse_response_failed(self, quality_gate: QualityGate) -> None:
         """Test parsing failed evaluation response."""
-        response = """
-        ```json
-        {
-            "passed": false,
-            "feedback": "Issues found",
-            "criteria_results": [
-                {"criterion": "Test 1", "passed": false, "feedback": "Failed"}
-            ]
-        }
-        ```
-        """
+        response = json.dumps({
+            "type": "result",
+            "subtype": "success",
+            "is_error": False,
+            "structured_output": {
+                "passed": False,
+                "feedback": "Issues found",
+                "criteria_results": [
+                    {"criterion": "Test 1", "passed": False, "feedback": "Failed"}
+                ]
+            }
+        })
 
         result = quality_gate._parse_response(response)
 
@@ -220,39 +233,29 @@ def test_parse_response_invalid_json(self, quality_gate: QualityGate) -> None:
         with pytest.raises(QualityGateError, match="Failed to parse"):
             quality_gate._parse_response(response)
 
-    def test_parse_response_wrapper_object(self, quality_gate: QualityGate) -> None:
-        """Test parsing response wrapped in Claude CLI --output-format json wrapper."""
-        # This is what claude -p --output-format json returns
+    def test_parse_response_missing_structured_output(self, quality_gate: QualityGate) -> None:
+        """Test parsing response missing structured_output field raises error."""
+        # Old format with 'result' field instead of 'structured_output'
         wrapper_response = json.dumps({
             "type": "result",
             "subtype": "success",
             "is_error": False,
-            "duration_ms": 1234,
-            "result": json.dumps({
-                "passed": True,
-                "feedback": "All criteria met",
-                "criteria_results": [
-                    {"criterion": "Test 1", "passed": True, "feedback": None}
-                ]
-            }),
-            "session_id": "test-session",
+            "result": "Some text response",
         })
 
-        result = quality_gate._parse_response(wrapper_response)
-
-        assert result.passed is True
-        assert result.feedback == "All criteria met"
-        assert len(result.criteria_results) == 1
+        with pytest.raises(QualityGateError, match="missing 'structured_output'"):
+            quality_gate._parse_response(wrapper_response)
 
-    def test_parse_response_wrapper_empty_result(self, quality_gate: QualityGate) -> None:
-        """Test parsing wrapper object with empty result raises error."""
+    def test_parse_response_error_in_wrapper(self, quality_gate: QualityGate) -> None:
+        """Test parsing response with is_error=True raises error."""
         wrapper_response = json.dumps({
             "type": "result",
-            "subtype": "success",
-            "result": "",
+            "subtype": "error",
+            "is_error": True,
+            "result": "Something went wrong",
         })
 
-        with pytest.raises(QualityGateError, match="empty result"):
+        with pytest.raises(QualityGateError, match="returned error"):
             quality_gate._parse_response(wrapper_response)
 
     async def test_evaluate_no_criteria(
@@ -268,6 +271,34 @@ async def test_evaluate_no_criteria(
         assert result.passed is True
         assert "auto-passing" in result.feedback.lower()
 
+    def test_parse_criteria_results_structure(self, quality_gate: QualityGate) -> None:
+        """Test that criteria results are properly parsed with multiple entries."""
+        response = json.dumps({
+            "type": "result",
+            "subtype": "success",
+            "is_error": False,
+            "structured_output": {
+                "passed": False,
+                "feedback": "Two criteria failed",
+                "criteria_results": [
+                    {"criterion": "First check", "passed": True, "feedback": None},
+                    {"criterion": "Second check", "passed": False, "feedback": "Missing data"},
+                    {"criterion": "Third check", "passed": False, "feedback": "Wrong format"},
+                ],
+            },
+        })
+
+        result = quality_gate._parse_response(response)
+
+        assert result.passed is False
+        assert len(result.criteria_results) == 3
+        assert result.criteria_results[0].passed is True
+        assert result.criteria_results[0].feedback is None
+        assert result.criteria_results[1].passed is False
+        assert result.criteria_results[1].feedback == "Missing data"
+        assert result.criteria_results[2].passed is False
+        assert result.criteria_results[2].feedback == "Wrong format"
+
 
 class TestQualityGateCommandConstruction:
     """Tests for command construction, specifically JSON schema inclusion."""
@@ -294,7 +325,7 @@ async def test_command_includes_json_schema(
         self, output_file: Path, project_root: Path
     ) -> None:
         """Test that the command includes --json-schema with the correct schema."""
-        gate = QualityGate(command="claude -p --output-format json", timeout=10)
+        gate = QualityGate(timeout=10)
 
         with patched_subprocess() as captured_cmd:
             await gate.evaluate(
@@ -314,7 +345,7 @@ async def test_command_includes_system_prompt(
         self, output_file: Path, project_root: Path
     ) -> None:
         """Test that the command includes --system-prompt with quality criteria."""
-        gate = QualityGate(command="claude -p", timeout=10)
+        gate = QualityGate(timeout=10)
 
         with patched_subprocess() as captured_cmd:
             await gate.evaluate(
@@ -327,6 +358,40 @@ async def test_command_includes_system_prompt(
         assert "Output must exist" in system_prompt
         assert "Output must be valid" in system_prompt
 
+    async def test_command_has_correct_flag_ordering(
+        self, output_file: Path, project_root: Path
+    ) -> None:
+        """Test that flags come before -p -- for proper CLI invocation.
+
+        See doc/reference/calling_claude_in_print_mode.md for details on
+        why flag ordering matters.
+        """
+        gate = QualityGate(timeout=10)
+
+        with patched_subprocess() as captured_cmd:
+            await gate.evaluate(
+                quality_criteria=["Test criterion"],
+                outputs=[output_file.name],
+                project_root=project_root,
+            )
+
+        # Verify command structure
+        assert captured_cmd[0] == "claude"
+        assert "--print" in captured_cmd
+        assert "--output-format" in captured_cmd
+        assert "-p" in captured_cmd
+        assert "--" in captured_cmd
+
+        # Verify -p -- comes last (after all other flags)
+        p_index = captured_cmd.index("-p")
+        dash_dash_index = captured_cmd.index("--")
+        json_schema_index = captured_cmd.index("--json-schema")
+        system_prompt_index = captured_cmd.index("--system-prompt")
+
+        assert json_schema_index < p_index, "Flags must come before -p"
+        assert system_prompt_index < p_index, "Flags must come before -p"
+        assert dash_dash_index == p_index + 1, "-- must immediately follow -p"
+
     async def test_schema_is_valid_json(self) -> None:
         """Test that QUALITY_GATE_RESPONSE_SCHEMA is valid JSON."""
         # This test ensures the schema can be serialized

From 18043b3165fc1cc5dd8789fc57be6ca872527180 Mon Sep 17 00:00:00 2001
From: Noah Horton <noah@unsupervised.com>
Date: Thu, 5 Feb 2026 09:44:39 -0700
Subject: [PATCH 18/45] MCP command updated

---
 .mcp.json                     |  2 +-
 src/deepwork/core/adapters.py | 27 +++++++--------------------
 2 files changed, 8 insertions(+), 21 deletions(-)

diff --git a/.mcp.json b/.mcp.json
index 1c40877f..79f9ddc8 100644
--- a/.mcp.json
+++ b/.mcp.json
@@ -1,7 +1,7 @@
 {
   "mcpServers": {
     "deepwork": {
-      "command": "/Users/noah/Documents/GitHub/deep-work/.venv/bin/deepwork",
+      "command": "deepwork",
       "args": [
         "serve",
         "--path",
diff --git a/src/deepwork/core/adapters.py b/src/deepwork/core/adapters.py
index 4fc0733d..9225455f 100644
--- a/src/deepwork/core/adapters.py
+++ b/src/deepwork/core/adapters.py
@@ -3,8 +3,6 @@
 from __future__ import annotations
 
 import json
-import shutil
-import sys
 from abc import ABC, abstractmethod
 from enum import Enum
 from pathlib import Path
@@ -535,9 +533,8 @@ def register_mcp_server(self, project_path: Path) -> bool:
         Register the DeepWork MCP server in .mcp.json at project root.
 
         Claude Code reads MCP server configurations from .mcp.json (project scope),
-        not from settings.json. This method detects the full path to the deepwork
-        executable to ensure the MCP server can be invoked regardless of PATH
-        configuration when Claude Code starts.
+        not from settings.json. This method assumes the `deepwork` command is
+        available in the user's PATH.
 
         Args:
             project_path: Path to project root
@@ -564,21 +561,11 @@ def register_mcp_server(self, project_path: Path) -> bool:
             existing_config["mcpServers"] = {}
 
         # Build the new MCP server config
-        deepwork_path = shutil.which("deepwork")
-
-        if deepwork_path:
-            # Use the absolute path to deepwork
-            new_server_config = {
-                "command": deepwork_path,
-                "args": ["serve", "--path", "."],
-            }
-        else:
-            # Fallback: use Python module invocation
-            # This works when deepwork is installed in the current Python environment
-            new_server_config = {
-                "command": sys.executable,
-                "args": ["-m", "deepwork.cli.main", "serve", "--path", "."],
-            }
+        # Assume deepwork is available in PATH
+        new_server_config = {
+            "command": "deepwork",
+            "args": ["serve", "--path", "."],
+        }
 
         # Check if already registered with same config
         existing_server = existing_config["mcpServers"].get("deepwork", {})

From fa4040750f0c9be35a9a737ac67ce1d6ed390ad6 Mon Sep 17 00:00:00 2001
From: Noah Horton <noah@unsupervised.com>
Date: Thu, 5 Feb 2026 10:17:14 -0700
Subject: [PATCH 19/45] add_job improved

---
 flake.lock                                    |  6 ++--
 .../standard_jobs/deepwork_jobs/job.yml       |  8 +----
 .../deepwork_jobs/steps/define.md             |  4 +--
 .../deepwork_jobs/steps/fix_jobs.md           | 34 +++----------------
 .../deepwork_jobs/steps/implement.md          | 27 +--------------
 .../deepwork_jobs/steps/iterate.md            | 11 +-----
 .../deepwork_jobs/steps/learn.md              | 10 +-----
 .../standard_jobs/deepwork_jobs/steps/test.md |  7 ++--
 8 files changed, 15 insertions(+), 92 deletions(-)

diff --git a/flake.lock b/flake.lock
index 9da4cc29..35a56a41 100644
--- a/flake.lock
+++ b/flake.lock
@@ -2,11 +2,11 @@
   "nodes": {
     "nixpkgs": {
       "locked": {
-        "lastModified": 1770181073,
-        "narHash": "sha256-ksTL7P9QC1WfZasNlaAdLOzqD8x5EPyods69YBqxSfk=",
+        "lastModified": 1770197578,
+        "narHash": "sha256-AYqlWrX09+HvGs8zM6ebZ1pwUqjkfpnv8mewYwAo+iM=",
         "owner": "NixOS",
         "repo": "nixpkgs",
-        "rev": "bf922a59c5c9998a6584645f7d0de689512e444c",
+        "rev": "00c21e4c93d963c50d4c0c89bfa84ed6e0694df2",
         "type": "github"
       },
       "original": {
diff --git a/src/deepwork/standard_jobs/deepwork_jobs/job.yml b/src/deepwork/standard_jobs/deepwork_jobs/job.yml
index 4a8b26e5..964a06bb 100644
--- a/src/deepwork/standard_jobs/deepwork_jobs/job.yml
+++ b/src/deepwork/standard_jobs/deepwork_jobs/job.yml
@@ -97,9 +97,6 @@ steps:
       - "**Output Examples**: Does each instruction file show what good output looks like?"
       - "**Quality Criteria**: Does each instruction file define quality criteria for its outputs?"
       - "**Ask Structured Questions**: Do step instructions that gather user input explicitly use the phrase \"ask structured questions\"?"
-      - "**Sync Complete**: Has `deepwork sync` been run successfully?"
-      - "**Commands Available**: Are the slash-commands generated in `.claude/commands/`?"
-      - "**Rules Considered**: Has the agent thought about whether rules would benefit this job? If relevant rules were identified, did they explain them and offer to run `/deepwork_rules.define`? Not every job needs rules - only suggest when genuinely helpful."
 
   - id: test
     name: "Test the New Workflow"
@@ -144,7 +141,6 @@ steps:
       - "**Instructions Improved**: Were step instructions updated to address identified problems?"
       - "**Quality Criteria Updated**: Were quality criteria adjusted to better match user expectations?"
       - "**Tool Usage Considered**: Did the agent consider if different tools would improve the workflow?"
-      - "**Sync Complete**: Has `deepwork sync` been run to apply changes?"
       - "**Recap Provided**: Did the agent summarize what was improved and why?"
 
   - id: learn
@@ -169,7 +165,6 @@ steps:
       - "**File References Used**: Do AGENTS.md entries reference other files where appropriate?"
       - "**Working Folder Correct**: Is AGENTS.md in the correct working folder for the job?"
       - "**Generalizable Separated**: Are generalizable improvements in instructions, not AGENTS.md?"
-      - "**Sync Complete**: Has `deepwork sync` been run if instructions were modified?"
 
   - id: fix_settings
     name: "Fix Settings Files"
@@ -205,8 +200,7 @@ steps:
       - "**Stop Hooks Migrated**: Are `stop_hooks` migrated to `hooks.after_agent` format?"
       - "**Removed Steps Cleaned**: Are references to removed steps (like `review_job_spec`) updated?"
       - "**Orphaned Steps Fixed**: For jobs with no workflows, is there a single workflow (named after the job) containing all steps? For jobs with existing workflows, does each orphan get its own workflow (named after the step)?"
-      - "**Valid YAML**: Do all job.yml files pass schema validation?"
-      - "**Sync Complete**: Has `deepwork sync` been run to regenerate commands?"
+      - "**Valid YAML**: Are all job.yml files valid YAML?"
 
   - id: errata
     name: "Clean Up Errata"
diff --git a/src/deepwork/standard_jobs/deepwork_jobs/steps/define.md b/src/deepwork/standard_jobs/deepwork_jobs/steps/define.md
index e441c9e2..fc0a9296 100644
--- a/src/deepwork/standard_jobs/deepwork_jobs/steps/define.md
+++ b/src/deepwork/standard_jobs/deepwork_jobs/steps/define.md
@@ -414,7 +414,7 @@ Claude: Great! Creating the job.yml specification now...
 - .deepwork/jobs/competitive_research/job.yml
 
 **Next step:**
-Run `/deepwork_jobs.implement` to generate step instruction files and sync commands.
+Implement the job to generate step instruction files.
 ```
 
 ## Important Guidelines
@@ -454,5 +454,5 @@ The complete YAML specification file (example shown in Step 5 above).
 After creating the file:
 1. Inform the user that the specification is complete
 2. Recommend that they review the job.yml file
-3. Tell them to run `/deepwork_jobs.implement` next to generate step instructions
+3. Tell them the next step is to implement the job (generate step instruction files)
 
diff --git a/src/deepwork/standard_jobs/deepwork_jobs/steps/fix_jobs.md b/src/deepwork/standard_jobs/deepwork_jobs/steps/fix_jobs.md
index 52e90615..b1656e27 100644
--- a/src/deepwork/standard_jobs/deepwork_jobs/steps/fix_jobs.md
+++ b/src/deepwork/standard_jobs/deepwork_jobs/steps/fix_jobs.md
@@ -87,12 +87,7 @@ steps:
 
 ### Step 5: Fix Orphaned Steps
 
-Steps not included in any workflow cannot be invoked via the MCP interface. The parser will emit warnings for these.
-
-Run the following to see warnings:
-```bash
-deepwork sync 2>&1 | grep -i "warning"
-```
+Steps not included in any workflow cannot be invoked via the MCP interface.
 
 **How to handle orphaned steps depends on whether the job has ANY workflows defined:**
 
@@ -133,17 +128,7 @@ workflows:
 
 This ensures all steps remain accessible via the MCP interface while preserving the existing workflow structure.
 
-### Step 6: Validate Against Schema
-
-After making changes, validate each job.yml:
-
-```bash
-deepwork sync
-```
-
-Fix any schema validation errors that appear.
-
-### Step 7: Update Version Numbers
+### Step 6: Update Version Numbers
 
 If you made significant changes to a job, bump its version number:
 
@@ -157,16 +142,6 @@ changelog:
     changes: "Migrated to current DeepWork format; removed deprecated fields"
 ```
 
-### Step 8: Run Sync
-
-After all fixes, regenerate commands:
-
-```bash
-deepwork sync
-```
-
-Verify no errors or warnings appear.
-
 ## Quality Criteria
 
 - All `exposed: true` fields are removed or noted
@@ -174,8 +149,7 @@ Verify no errors or warnings appear.
 - References to removed steps (like `review_job_spec`) are updated
 - Jobs with no workflows get a single workflow (same name as job) containing all steps
 - Jobs with existing workflows get individual workflows for each orphaned step (same name as step)
-- All job.yml files pass schema validation
-- `deepwork sync` runs without errors
+- All job.yml files are valid YAML
 - When all criteria are met, include `<promise>Quality Criteria Met</promise>` in your response
 
 ## Common Issues and Fixes
@@ -217,4 +191,4 @@ For each job in `.deepwork/jobs/`, check:
 
 1. **Don't modify standard jobs directly** - If `deepwork_jobs` is out of date, run `deepwork install --platform claude` to get the latest version
 2. **Preserve custom logic** - When migrating hooks, preserve the prompt content
-3. **Test after changes** - Run `deepwork sync` after each job fix to catch errors early
+3. **Test after changes** - Validate YAML syntax after each job fix to catch errors early
diff --git a/src/deepwork/standard_jobs/deepwork_jobs/steps/implement.md b/src/deepwork/standard_jobs/deepwork_jobs/steps/implement.md
index ddeed2c9..6f359e4f 100644
--- a/src/deepwork/standard_jobs/deepwork_jobs/steps/implement.md
+++ b/src/deepwork/standard_jobs/deepwork_jobs/steps/implement.md
@@ -113,19 +113,6 @@ See `.deepwork/jobs/deepwork_jobs/steps/supplemental_file_references.md` for det
 
 Verify that `job.yml` is in the correct location at `.deepwork/jobs/[job_name]/job.yml`. The define step should have created it. If for some reason it's not there, you may need to create or move it.
 
-### Step 5: Sync Skills
-
-Run `deepwork sync` to generate the skills for this job:
-
-```bash
-deepwork sync
-```
-
-This will:
-- Parse the job definition
-- Generate skills for each step
-- Make the skills available in `.claude/skills/` (or appropriate platform directory)
-
 ## Example Implementation
 
 For a complete worked example showing a job.yml and corresponding step instruction file, see:
@@ -141,22 +128,12 @@ For a complete worked example showing a job.yml and corresponding step instructi
 5. **Use context** - The job description provides valuable context for each step
 6. **Be specific** - Tailor instructions to the specific step, not generic advice
 
-## Validation Before Sync
-
-Before running `deepwork sync`, verify:
-- All directories exist
-- `job.yml` is in place
-- All step instruction files exist (one per step)
-- No file system errors
-
 ## Completion Checklist
 
 Before marking this step complete, ensure:
-- [ ] job.yml validated and copied to job directory
+- [ ] job.yml validated and in job directory
 - [ ] All step instruction files created
 - [ ] Each instruction file is complete and actionable
-- [ ] `deepwork sync` executed successfully
-- [ ] Skills generated in platform directory
 
 ## Quality Criteria
 
@@ -166,5 +143,3 @@ Before marking this step complete, ensure:
 - Output examples are provided in each instruction file
 - Quality criteria defined for each step
 - Steps with user inputs explicitly use "ask structured questions" phrasing
-- Sync completed successfully
-- Skills available for use
diff --git a/src/deepwork/standard_jobs/deepwork_jobs/steps/iterate.md b/src/deepwork/standard_jobs/deepwork_jobs/steps/iterate.md
index 78f8ddf3..7efece1b 100644
--- a/src/deepwork/standard_jobs/deepwork_jobs/steps/iterate.md
+++ b/src/deepwork/standard_jobs/deepwork_jobs/steps/iterate.md
@@ -122,15 +122,7 @@ After making improvements:
        changes: "Improved [step] instructions based on test run: added examples, clarified criteria, updated tool recommendations"
    ```
 
-### Step 7: Sync Changes
-
-Run `deepwork sync` to apply the changes:
-
-```bash
-deepwork sync
-```
-
-### Step 8: Provide Recap
+### Step 7: Provide Recap
 
 Summarize the improvements made:
 
@@ -175,7 +167,6 @@ Future runs should produce reports with better visual design and clearer summari
 - Quality criteria were updated to match user expectations
 - Alternative tools were considered where relevant
 - Job version and changelog were updated
-- `deepwork sync` was run to apply changes
 - A clear recap of improvements was provided
 - When all criteria are met, include `<promise>Quality Criteria Met</promise>` in your response
 
diff --git a/src/deepwork/standard_jobs/deepwork_jobs/steps/learn.md b/src/deepwork/standard_jobs/deepwork_jobs/steps/learn.md
index bfb393a5..3fcbc71f 100644
--- a/src/deepwork/standard_jobs/deepwork_jobs/steps/learn.md
+++ b/src/deepwork/standard_jobs/deepwork_jobs/steps/learn.md
@@ -233,13 +233,6 @@ If instruction files were modified:
      changes: "Improved [step] instructions based on execution learnings: [brief description]"
    ```
 
-### Step 7: Sync Skills
-
-**Run deepwork sync** (if instructions were modified)
-```bash
-deepwork sync
-```
-
 ## File Reference Patterns
 
 When adding entries to AGENTS.md, prefer these patterns:
@@ -269,6 +262,7 @@ When adding entries to AGENTS.md, prefer these patterns:
 - AGENTS.md created/updated with bespoke learnings
 - File references used instead of duplicating content
 - AGENTS.md is in the correct folder (the deepest common folder for the topic)
+- Job version and changelog updated if instructions were modified
 - When all criteria are met, include `<promise>✓ Quality Criteria Met</promise>`
 
 ## Example Dialog
@@ -324,8 +318,6 @@ I found the following job executions:
 
 3. Updated job.yml version to 1.0.1 with changelog entry
 
-4. Ran `deepwork sync`
-
 **Summary**
 
 Updated job instructions and created AGENTS.md with bespoke learnings.
diff --git a/src/deepwork/standard_jobs/deepwork_jobs/steps/test.md b/src/deepwork/standard_jobs/deepwork_jobs/steps/test.md
index fa36d265..cce74c43 100644
--- a/src/deepwork/standard_jobs/deepwork_jobs/steps/test.md
+++ b/src/deepwork/standard_jobs/deepwork_jobs/steps/test.md
@@ -28,10 +28,7 @@ Please describe a specific case you want to run through the workflow - ideally s
 
 1. **Compact the conversation history** - Before invoking the workflow, use the `/compact` command to summarize the conversation so far. This ensures the workflow starts with clean context focused on the test case.
 
-2. **Invoke the new workflow** - Run the first step of the newly created workflow using its slash command:
-   ```
-   /[job_name].[first_step_id]
-   ```
+2. **Start the new workflow** - Begin executing the first step of the newly created workflow.
 
 3. **Complete the full workflow** - Continue through all steps of the workflow until it produces its final output.
 
@@ -121,7 +118,7 @@ Claude: Great, let me compact the conversation and run the workflow on your Janu
 
 /compact
 
-/monthly_report.gather_data
+[Starting the monthly_report workflow...]
 
 [... workflow runs through all steps ...]
 

From 3e218055dbb039983462753d1cb6025b4fc5ae08 Mon Sep 17 00:00:00 2001
From: Noah Horton <noah@unsupervised.com>
Date: Thu, 5 Feb 2026 10:25:54 -0700
Subject: [PATCH 20/45] tighter instructions

---
 .deepwork/doc_specs/job_spec.md               | 19 ----
 .../jobs/deepwork_jobs/doc_specs/job_spec.md  | 19 ----
 .deepwork/jobs/deepwork_jobs/job.yml          | 11 +--
 .deepwork/jobs/deepwork_jobs/steps/define.md  | 42 +++++----
 .../jobs/deepwork_jobs/steps/fix_jobs.md      | 34 +-------
 .../jobs/deepwork_jobs/steps/implement.md     | 86 +++++--------------
 .deepwork/jobs/deepwork_jobs/steps/iterate.md | 15 +---
 .deepwork/jobs/deepwork_jobs/steps/learn.md   | 12 +--
 .deepwork/jobs/deepwork_jobs/steps/test.md    | 16 ++--
 .../deepwork_jobs/doc_specs/job_spec.md       | 19 ----
 .../standard_jobs/deepwork_jobs/job.yml       |  3 +-
 .../deepwork_jobs/steps/define.md             | 38 ++++----
 .../deepwork_jobs/steps/implement.md          | 59 +++++--------
 .../deepwork_jobs/steps/iterate.md            |  4 +-
 .../deepwork_jobs/steps/learn.md              |  2 +-
 .../standard_jobs/deepwork_jobs/steps/test.md | 13 ++-
 16 files changed, 112 insertions(+), 280 deletions(-)

diff --git a/.deepwork/doc_specs/job_spec.md b/.deepwork/doc_specs/job_spec.md
index b880bb17..23fd9fc7 100644
--- a/.deepwork/doc_specs/job_spec.md
+++ b/.deepwork/doc_specs/job_spec.md
@@ -82,14 +82,6 @@ steps:
 
 ## Optional Fields
 
-### Exposed Steps
-
-```yaml
-steps:
-  - id: learn
-    exposed: true                 # Makes step available without running dependencies
-```
-
 ### Agent Delegation
 
 When a step should be executed by a specific agent type, use the `agent` field. This automatically sets `context: fork` in the generated skill.
@@ -122,17 +114,6 @@ steps:
         - script: hooks/run_tests.sh
 ```
 
-### Stop Hooks (Legacy)
-
-```yaml
-steps:
-  - id: step_id
-    stop_hooks:
-      - prompt: "Validation prompt..."
-      - prompt_file: hooks/check.md
-      - script: hooks/validate.sh
-```
-
 ## Validation Rules
 
 1. **No circular dependencies**: Step A cannot depend on Step B if Step B depends on Step A
diff --git a/.deepwork/jobs/deepwork_jobs/doc_specs/job_spec.md b/.deepwork/jobs/deepwork_jobs/doc_specs/job_spec.md
index b880bb17..23fd9fc7 100644
--- a/.deepwork/jobs/deepwork_jobs/doc_specs/job_spec.md
+++ b/.deepwork/jobs/deepwork_jobs/doc_specs/job_spec.md
@@ -82,14 +82,6 @@ steps:
 
 ## Optional Fields
 
-### Exposed Steps
-
-```yaml
-steps:
-  - id: learn
-    exposed: true                 # Makes step available without running dependencies
-```
-
 ### Agent Delegation
 
 When a step should be executed by a specific agent type, use the `agent` field. This automatically sets `context: fork` in the generated skill.
@@ -122,17 +114,6 @@ steps:
         - script: hooks/run_tests.sh
 ```
 
-### Stop Hooks (Legacy)
-
-```yaml
-steps:
-  - id: step_id
-    stop_hooks:
-      - prompt: "Validation prompt..."
-      - prompt_file: hooks/check.md
-      - script: hooks/validate.sh
-```
-
 ## Validation Rules
 
 1. **No circular dependencies**: Step A cannot depend on Step B if Step B depends on Step A
diff --git a/.deepwork/jobs/deepwork_jobs/job.yml b/.deepwork/jobs/deepwork_jobs/job.yml
index 4a8b26e5..2f24a707 100644
--- a/.deepwork/jobs/deepwork_jobs/job.yml
+++ b/.deepwork/jobs/deepwork_jobs/job.yml
@@ -91,15 +91,11 @@ steps:
     dependencies:
       - define
     quality_criteria:
-      - "**Directory Structure**: Is `.deepwork/jobs/[job_name]/` created correctly?"
       - "**Complete Instructions**: Are ALL step instruction files complete (not stubs or placeholders)?"
       - "**Specific & Actionable**: Are instructions tailored to each step's purpose, not generic?"
       - "**Output Examples**: Does each instruction file show what good output looks like?"
       - "**Quality Criteria**: Does each instruction file define quality criteria for its outputs?"
       - "**Ask Structured Questions**: Do step instructions that gather user input explicitly use the phrase \"ask structured questions\"?"
-      - "**Sync Complete**: Has `deepwork sync` been run successfully?"
-      - "**Commands Available**: Are the slash-commands generated in `.claude/commands/`?"
-      - "**Rules Considered**: Has the agent thought about whether rules would benefit this job? If relevant rules were identified, did they explain them and offer to run `/deepwork_rules.define`? Not every job needs rules - only suggest when genuinely helpful."
 
   - id: test
     name: "Test the New Workflow"
@@ -117,7 +113,7 @@ steps:
       - implement
     quality_criteria:
       - "**User Informed**: Did the agent explain the workflow is ready and ask what to test it on?"
-      - "**Workflow Invoked**: Was the new workflow actually run on the user's test case?"
+      - "**Workflow Invoked**: Was the new workflow actually run on the user's test case via MCP?"
       - "**Output Critiqued**: Did the agent identify up to 3 top issues with the output?"
       - "**User Feedback Gathered**: Did the agent ask the user about each issue and gather additional feedback?"
       - "**Corrections Made**: Were all requested corrections applied to the output?"
@@ -144,7 +140,6 @@ steps:
       - "**Instructions Improved**: Were step instructions updated to address identified problems?"
       - "**Quality Criteria Updated**: Were quality criteria adjusted to better match user expectations?"
       - "**Tool Usage Considered**: Did the agent consider if different tools would improve the workflow?"
-      - "**Sync Complete**: Has `deepwork sync` been run to apply changes?"
       - "**Recap Provided**: Did the agent summarize what was improved and why?"
 
   - id: learn
@@ -169,7 +164,6 @@ steps:
       - "**File References Used**: Do AGENTS.md entries reference other files where appropriate?"
       - "**Working Folder Correct**: Is AGENTS.md in the correct working folder for the job?"
       - "**Generalizable Separated**: Are generalizable improvements in instructions, not AGENTS.md?"
-      - "**Sync Complete**: Has `deepwork sync` been run if instructions were modified?"
 
   - id: fix_settings
     name: "Fix Settings Files"
@@ -205,8 +199,7 @@ steps:
       - "**Stop Hooks Migrated**: Are `stop_hooks` migrated to `hooks.after_agent` format?"
       - "**Removed Steps Cleaned**: Are references to removed steps (like `review_job_spec`) updated?"
       - "**Orphaned Steps Fixed**: For jobs with no workflows, is there a single workflow (named after the job) containing all steps? For jobs with existing workflows, does each orphan get its own workflow (named after the step)?"
-      - "**Valid YAML**: Do all job.yml files pass schema validation?"
-      - "**Sync Complete**: Has `deepwork sync` been run to regenerate commands?"
+      - "**Valid YAML**: Are all job.yml files valid YAML?"
 
   - id: errata
     name: "Clean Up Errata"
diff --git a/.deepwork/jobs/deepwork_jobs/steps/define.md b/.deepwork/jobs/deepwork_jobs/steps/define.md
index e441c9e2..686a456d 100644
--- a/.deepwork/jobs/deepwork_jobs/steps/define.md
+++ b/.deepwork/jobs/deepwork_jobs/steps/define.md
@@ -225,49 +225,53 @@ After gathering information about all steps:
    - Job description (detailed multi-line explanation)
    - Version number (start with 1.0.0)
 
-### Step 4: Define Quality Validation (Stop Hooks)
+### Step 4: Define Quality Validation Hooks
 
-For each step, consider whether it would benefit from **quality validation loops**. Stop hooks allow the AI agent to iteratively refine its work until quality criteria are met.
+For each step, consider whether it would benefit from **quality validation loops**. Quality hooks allow the AI agent to iteratively refine its work until quality criteria are met.
 
 **Ask structured questions about quality validation:**
 - "Are there specific quality criteria that must be met for this step?"
 - "Would you like the agent to validate its work before completing?"
 - "What would make you send the work back for revision?"
 
-**Stop hooks are particularly valuable for:**
+**Quality hooks are particularly valuable for:**
 - Steps with complex outputs that need multiple checks
 - Steps where quality is critical (final deliverables)
 - Steps with subjective quality criteria that benefit from AI self-review
 
-**Three types of stop hooks are supported:**
+**Three types of hooks are supported:**
 
 1. **Inline Prompt** (`prompt`) - Best for simple quality criteria
    ```yaml
-   stop_hooks:
-     - prompt: |
-         Verify the output meets these criteria:
-         1. Contains at least 5 competitors
-         2. Each competitor has a description
-         3. Selection rationale is clear
+   hooks:
+     after_agent:
+       - prompt: |
+           Verify the output meets these criteria:
+           1. Contains at least 5 competitors
+           2. Each competitor has a description
+           3. Selection rationale is clear
    ```
 
 2. **Prompt File** (`prompt_file`) - For detailed/reusable criteria
    ```yaml
-   stop_hooks:
-     - prompt_file: hooks/quality_check.md
+   hooks:
+     after_agent:
+       - prompt_file: hooks/quality_check.md
    ```
 
 3. **Script** (`script`) - For programmatic validation (tests, linting)
    ```yaml
-   stop_hooks:
-     - script: hooks/run_tests.sh
+   hooks:
+     after_agent:
+       - script: hooks/run_tests.sh
    ```
 
 **Multiple hooks can be combined:**
 ```yaml
-stop_hooks:
-  - script: hooks/lint_output.sh
-  - prompt: "Verify the content is comprehensive and well-organized"
+hooks:
+  after_agent:
+    - script: hooks/lint_output.sh
+    - prompt: "Verify the content is comprehensive and well-organized"
 ```
 
 **Encourage prompt-based hooks** - They leverage the AI's ability to understand context and make nuanced quality judgments. Script hooks are best for objective checks (syntax, format, tests).
@@ -414,7 +418,7 @@ Claude: Great! Creating the job.yml specification now...
 - .deepwork/jobs/competitive_research/job.yml
 
 **Next step:**
-Run `/deepwork_jobs.implement` to generate step instruction files and sync commands.
+Implement the job to generate step instruction files.
 ```
 
 ## Important Guidelines
@@ -454,5 +458,5 @@ The complete YAML specification file (example shown in Step 5 above).
 After creating the file:
 1. Inform the user that the specification is complete
 2. Recommend that they review the job.yml file
-3. Tell them to run `/deepwork_jobs.implement` next to generate step instructions
+3. Tell them the next step is to implement the job (generate step instruction files)
 
diff --git a/.deepwork/jobs/deepwork_jobs/steps/fix_jobs.md b/.deepwork/jobs/deepwork_jobs/steps/fix_jobs.md
index 52e90615..b1656e27 100644
--- a/.deepwork/jobs/deepwork_jobs/steps/fix_jobs.md
+++ b/.deepwork/jobs/deepwork_jobs/steps/fix_jobs.md
@@ -87,12 +87,7 @@ steps:
 
 ### Step 5: Fix Orphaned Steps
 
-Steps not included in any workflow cannot be invoked via the MCP interface. The parser will emit warnings for these.
-
-Run the following to see warnings:
-```bash
-deepwork sync 2>&1 | grep -i "warning"
-```
+Steps not included in any workflow cannot be invoked via the MCP interface.
 
 **How to handle orphaned steps depends on whether the job has ANY workflows defined:**
 
@@ -133,17 +128,7 @@ workflows:
 
 This ensures all steps remain accessible via the MCP interface while preserving the existing workflow structure.
 
-### Step 6: Validate Against Schema
-
-After making changes, validate each job.yml:
-
-```bash
-deepwork sync
-```
-
-Fix any schema validation errors that appear.
-
-### Step 7: Update Version Numbers
+### Step 6: Update Version Numbers
 
 If you made significant changes to a job, bump its version number:
 
@@ -157,16 +142,6 @@ changelog:
     changes: "Migrated to current DeepWork format; removed deprecated fields"
 ```
 
-### Step 8: Run Sync
-
-After all fixes, regenerate commands:
-
-```bash
-deepwork sync
-```
-
-Verify no errors or warnings appear.
-
 ## Quality Criteria
 
 - All `exposed: true` fields are removed or noted
@@ -174,8 +149,7 @@ Verify no errors or warnings appear.
 - References to removed steps (like `review_job_spec`) are updated
 - Jobs with no workflows get a single workflow (same name as job) containing all steps
 - Jobs with existing workflows get individual workflows for each orphaned step (same name as step)
-- All job.yml files pass schema validation
-- `deepwork sync` runs without errors
+- All job.yml files are valid YAML
 - When all criteria are met, include `<promise>Quality Criteria Met</promise>` in your response
 
 ## Common Issues and Fixes
@@ -217,4 +191,4 @@ For each job in `.deepwork/jobs/`, check:
 
 1. **Don't modify standard jobs directly** - If `deepwork_jobs` is out of date, run `deepwork install --platform claude` to get the latest version
 2. **Preserve custom logic** - When migrating hooks, preserve the prompt content
-3. **Test after changes** - Run `deepwork sync` after each job fix to catch errors early
+3. **Test after changes** - Validate YAML syntax after each job fix to catch errors early
diff --git a/.deepwork/jobs/deepwork_jobs/steps/implement.md b/.deepwork/jobs/deepwork_jobs/steps/implement.md
index ddeed2c9..e1516c78 100644
--- a/.deepwork/jobs/deepwork_jobs/steps/implement.md
+++ b/.deepwork/jobs/deepwork_jobs/steps/implement.md
@@ -2,34 +2,13 @@
 
 ## Objective
 
-Generate the DeepWork job directory structure and instruction files for each step based on the `job.yml` specification from the define step.
+Generate step instruction files for each step based on the `job.yml` specification from the define step.
 
 ## Task
 
-Read the `job.yml` specification file and create all the necessary files to make the job functional, including directory structure and step instruction files. Then sync the commands to make them available.
+Read the `job.yml` specification file created by the define step and generate comprehensive instruction files for each step. The define step has already created the job directory structure.
 
-### Step 1: Create Directory Structure Using Script
-
-Run the `make_new_job.sh` script to create the standard directory structure:
-
-```bash
-.deepwork/jobs/deepwork_jobs/make_new_job.sh [job_name]
-```
-
-This creates:
-- `.deepwork/jobs/[job_name]/` - Main job directory
-- `.deepwork/jobs/[job_name]/steps/` - Step instruction files
-- `.deepwork/jobs/[job_name]/hooks/` - Custom validation scripts (with .gitkeep)
-- `.deepwork/jobs/[job_name]/templates/` - Example file formats (with .gitkeep)
-- `.deepwork/jobs/[job_name]/AGENTS.md` - Job management guidance
-
-**Note**: If the directory already exists (e.g., job.yml was created by define step), you can skip this step or manually create the additional directories:
-```bash
-mkdir -p .deepwork/jobs/[job_name]/hooks .deepwork/jobs/[job_name]/templates
-touch .deepwork/jobs/[job_name]/hooks/.gitkeep .deepwork/jobs/[job_name]/templates/.gitkeep
-```
-
-### Step 2: Read and Validate the Specification
+### Step 1: Read and Validate the Specification
 
 1. **Locate the job.yml file**
    - Read `.deepwork/jobs/[job_name]/job.yml` from the define step
@@ -46,7 +25,7 @@ touch .deepwork/jobs/[job_name]/hooks/.gitkeep .deepwork/jobs/[job_name]/templat
    - List of all steps with their details
    - Understand the workflow structure
 
-### Step 3: Generate Step Instruction Files
+### Step 2: Generate Step Instruction Files
 
 For each step in the job.yml, create a comprehensive instruction file at `.deepwork/jobs/[job_name]/steps/[step_id].md`.
 
@@ -71,11 +50,11 @@ For each step in the job.yml, create a comprehensive instruction file at `.deepw
 6. **Align with stop hooks** - If the step has `stop_hooks` defined, ensure the quality criteria in the instruction file match the validation criteria in the hooks
 7. **Ask structured questions** - When a step has user inputs, the instructions MUST explicitly tell the agent to "ask structured questions" using the AskUserQuestion tool to gather that information. Never use generic phrasing like "ask the user" - always use "ask structured questions"
 
-### Handling Stop Hooks
+### Handling Quality Hooks
 
-If a step in the job.yml has `stop_hooks` defined, the generated instruction file should:
+If a step in the job.yml has `hooks.after_agent` defined, the generated instruction file should:
 
-1. **Mirror the quality criteria** - The "Quality Criteria" section should match what the stop hooks will validate
+1. **Mirror the quality criteria** - The "Quality Criteria" section should match what the hooks will validate
 2. **Be explicit about success** - Help the agent understand when the step is truly complete
 3. **Include the promise pattern** - Mention that `<promise>✓ Quality Criteria Met</promise>` should be included when criteria are met
 
@@ -83,12 +62,13 @@ If a step in the job.yml has `stop_hooks` defined, the generated instruction fil
 ```yaml
 - id: research_competitors
   name: "Research Competitors"
-  stop_hooks:
-    - prompt: |
-        Verify the research meets criteria:
-        1. Each competitor has at least 3 data points
-        2. Sources are cited
-        3. Information is current (within last year)
+  hooks:
+    after_agent:
+      - prompt: |
+          Verify the research meets criteria:
+          1. Each competitor has at least 3 data points
+          2. Sources are cited
+          3. Information is current (within last year)
 ```
 
 **The instruction file should include:**
@@ -109,22 +89,11 @@ Step instructions can include additional `.md` files in the `steps/` directory f
 
 See `.deepwork/jobs/deepwork_jobs/steps/supplemental_file_references.md` for detailed documentation and examples.
 
-### Step 4: Verify job.yml Location
-
-Verify that `job.yml` is in the correct location at `.deepwork/jobs/[job_name]/job.yml`. The define step should have created it. If for some reason it's not there, you may need to create or move it.
-
-### Step 5: Sync Skills
+### Step 3: Verify Files
 
-Run `deepwork sync` to generate the skills for this job:
-
-```bash
-deepwork sync
-```
-
-This will:
-- Parse the job definition
-- Generate skills for each step
-- Make the skills available in `.claude/skills/` (or appropriate platform directory)
+Verify that all files are in their correct locations:
+- `job.yml` at `.deepwork/jobs/[job_name]/job.yml` (created by define step)
+- Step instruction files at `.deepwork/jobs/[job_name]/steps/[step_id].md`
 
 ## Example Implementation
 
@@ -141,30 +110,21 @@ For a complete worked example showing a job.yml and corresponding step instructi
 5. **Use context** - The job description provides valuable context for each step
 6. **Be specific** - Tailor instructions to the specific step, not generic advice
 
-## Validation Before Sync
-
-Before running `deepwork sync`, verify:
-- All directories exist
-- `job.yml` is in place
-- All step instruction files exist (one per step)
-- No file system errors
-
 ## Completion Checklist
 
 Before marking this step complete, ensure:
-- [ ] job.yml validated and copied to job directory
+- [ ] job.yml validated and in job directory
 - [ ] All step instruction files created
 - [ ] Each instruction file is complete and actionable
-- [ ] `deepwork sync` executed successfully
-- [ ] Skills generated in platform directory
+
+## Note: Workflow Availability
+
+Once the job.yml and step instruction files are created, the workflow is immediately available through the DeepWork MCP server. The MCP server reads job definitions directly from `.deepwork/jobs/` - no separate sync or installation step is required.
 
 ## Quality Criteria
 
-- Job directory structure is correct
 - All instruction files are complete (not stubs)
 - Instructions are specific and actionable
 - Output examples are provided in each instruction file
 - Quality criteria defined for each step
 - Steps with user inputs explicitly use "ask structured questions" phrasing
-- Sync completed successfully
-- Skills available for use
diff --git a/.deepwork/jobs/deepwork_jobs/steps/iterate.md b/.deepwork/jobs/deepwork_jobs/steps/iterate.md
index 78f8ddf3..308a6a47 100644
--- a/.deepwork/jobs/deepwork_jobs/steps/iterate.md
+++ b/.deepwork/jobs/deepwork_jobs/steps/iterate.md
@@ -122,15 +122,7 @@ After making improvements:
        changes: "Improved [step] instructions based on test run: added examples, clarified criteria, updated tool recommendations"
    ```
 
-### Step 7: Sync Changes
-
-Run `deepwork sync` to apply the changes:
-
-```bash
-deepwork sync
-```
-
-### Step 8: Provide Recap
+### Step 7: Provide Recap
 
 Summarize the improvements made:
 
@@ -175,7 +167,6 @@ Future runs should produce reports with better visual design and clearer summari
 - Quality criteria were updated to match user expectations
 - Alternative tools were considered where relevant
 - Job version and changelog were updated
-- `deepwork sync` was run to apply changes
 - A clear recap of improvements was provided
 - When all criteria are met, include `<promise>Quality Criteria Met</promise>` in your response
 
@@ -219,9 +210,7 @@ I identified the following issues from the test run:
 - Version bumped to 1.0.1
 - Added changelog: "Improved visual design guidance and audience-appropriate writing based on test feedback"
 
-Running `deepwork sync`...
-
-Done! The workflow has been updated.
+Done! The workflow has been updated. Changes are immediately available through the MCP server.
 
 ### Expected Impact
 
diff --git a/.deepwork/jobs/deepwork_jobs/steps/learn.md b/.deepwork/jobs/deepwork_jobs/steps/learn.md
index bfb393a5..38a80362 100644
--- a/.deepwork/jobs/deepwork_jobs/steps/learn.md
+++ b/.deepwork/jobs/deepwork_jobs/steps/learn.md
@@ -233,13 +233,6 @@ If instruction files were modified:
      changes: "Improved [step] instructions based on execution learnings: [brief description]"
    ```
 
-### Step 7: Sync Skills
-
-**Run deepwork sync** (if instructions were modified)
-```bash
-deepwork sync
-```
-
 ## File Reference Patterns
 
 When adding entries to AGENTS.md, prefer these patterns:
@@ -269,12 +262,13 @@ When adding entries to AGENTS.md, prefer these patterns:
 - AGENTS.md created/updated with bespoke learnings
 - File references used instead of duplicating content
 - AGENTS.md is in the correct folder (the deepest common folder for the topic)
+- Job version and changelog updated if instructions were modified
 - When all criteria are met, include `<promise>✓ Quality Criteria Met</promise>`
 
 ## Example Dialog
 
 ```
-User: /deepwork_jobs.learn
+User: [Invokes the learn workflow via MCP]
 
 Claude: I'll analyze this conversation for learnings from DeepWork job executions.
 
@@ -324,8 +318,6 @@ I found the following job executions:
 
 3. Updated job.yml version to 1.0.1 with changelog entry
 
-4. Ran `deepwork sync`
-
 **Summary**
 
 Updated job instructions and created AGENTS.md with bespoke learnings.
diff --git a/.deepwork/jobs/deepwork_jobs/steps/test.md b/.deepwork/jobs/deepwork_jobs/steps/test.md
index fa36d265..7b78ca81 100644
--- a/.deepwork/jobs/deepwork_jobs/steps/test.md
+++ b/.deepwork/jobs/deepwork_jobs/steps/test.md
@@ -26,14 +26,11 @@ Please describe a specific case you want to run through the workflow - ideally s
 
 ### Step 2: Prepare and Run the Workflow
 
-1. **Compact the conversation history** - Before invoking the workflow, use the `/compact` command to summarize the conversation so far. This ensures the workflow starts with clean context focused on the test case.
+1. **Prepare clean context** - Before invoking the workflow, consider compacting the conversation history (e.g., using `/compact` in Claude Code) to ensure the workflow starts with clean context focused on the test case.
 
-2. **Invoke the new workflow** - Run the first step of the newly created workflow using its slash command:
-   ```
-   /[job_name].[first_step_id]
-   ```
+2. **Start the new workflow** - Use `start_workflow` through the DeepWork MCP server with the job name and workflow name to begin executing the workflow.
 
-3. **Complete the full workflow** - Continue through all steps of the workflow until it produces its final output.
+3. **Complete the full workflow** - Continue through all steps of the workflow until it produces its final output. Use `finished_step` to progress through each step.
 
 4. **Note any issues during execution** - Pay attention to:
    - Confusion or ambiguity in instructions
@@ -94,7 +91,6 @@ If the user provides more feedback, address it and ask again. Don't assume satis
 ## Quality Criteria
 
 - User was informed the workflow is ready and asked what to test it on
-- Conversation was compacted before running the workflow
 - The new workflow was actually invoked and run to completion
 - Output was critiqued and up to 3 top issues were identified
 - Each identified issue was presented to the user with a specific question
@@ -117,11 +113,9 @@ What would you like to use it on for the first test run? Please describe a speci
 
 User: Let's do the January 2026 engineering report, focusing on deployment frequency and incident response times.
 
-Claude: Great, let me compact the conversation and run the workflow on your January 2026 engineering report.
-
-/compact
+Claude: Great, let me run the workflow on your January 2026 engineering report.
 
-/monthly_report.gather_data
+[Starting the monthly_report workflow via MCP...]
 
 [... workflow runs through all steps ...]
 
diff --git a/src/deepwork/standard_jobs/deepwork_jobs/doc_specs/job_spec.md b/src/deepwork/standard_jobs/deepwork_jobs/doc_specs/job_spec.md
index b880bb17..23fd9fc7 100644
--- a/src/deepwork/standard_jobs/deepwork_jobs/doc_specs/job_spec.md
+++ b/src/deepwork/standard_jobs/deepwork_jobs/doc_specs/job_spec.md
@@ -82,14 +82,6 @@ steps:
 
 ## Optional Fields
 
-### Exposed Steps
-
-```yaml
-steps:
-  - id: learn
-    exposed: true                 # Makes step available without running dependencies
-```
-
 ### Agent Delegation
 
 When a step should be executed by a specific agent type, use the `agent` field. This automatically sets `context: fork` in the generated skill.
@@ -122,17 +114,6 @@ steps:
         - script: hooks/run_tests.sh
 ```
 
-### Stop Hooks (Legacy)
-
-```yaml
-steps:
-  - id: step_id
-    stop_hooks:
-      - prompt: "Validation prompt..."
-      - prompt_file: hooks/check.md
-      - script: hooks/validate.sh
-```
-
 ## Validation Rules
 
 1. **No circular dependencies**: Step A cannot depend on Step B if Step B depends on Step A
diff --git a/src/deepwork/standard_jobs/deepwork_jobs/job.yml b/src/deepwork/standard_jobs/deepwork_jobs/job.yml
index 964a06bb..2f24a707 100644
--- a/src/deepwork/standard_jobs/deepwork_jobs/job.yml
+++ b/src/deepwork/standard_jobs/deepwork_jobs/job.yml
@@ -91,7 +91,6 @@ steps:
     dependencies:
       - define
     quality_criteria:
-      - "**Directory Structure**: Is `.deepwork/jobs/[job_name]/` created correctly?"
       - "**Complete Instructions**: Are ALL step instruction files complete (not stubs or placeholders)?"
       - "**Specific & Actionable**: Are instructions tailored to each step's purpose, not generic?"
       - "**Output Examples**: Does each instruction file show what good output looks like?"
@@ -114,7 +113,7 @@ steps:
       - implement
     quality_criteria:
       - "**User Informed**: Did the agent explain the workflow is ready and ask what to test it on?"
-      - "**Workflow Invoked**: Was the new workflow actually run on the user's test case?"
+      - "**Workflow Invoked**: Was the new workflow actually run on the user's test case via MCP?"
       - "**Output Critiqued**: Did the agent identify up to 3 top issues with the output?"
       - "**User Feedback Gathered**: Did the agent ask the user about each issue and gather additional feedback?"
       - "**Corrections Made**: Were all requested corrections applied to the output?"
diff --git a/src/deepwork/standard_jobs/deepwork_jobs/steps/define.md b/src/deepwork/standard_jobs/deepwork_jobs/steps/define.md
index fc0a9296..686a456d 100644
--- a/src/deepwork/standard_jobs/deepwork_jobs/steps/define.md
+++ b/src/deepwork/standard_jobs/deepwork_jobs/steps/define.md
@@ -225,49 +225,53 @@ After gathering information about all steps:
    - Job description (detailed multi-line explanation)
    - Version number (start with 1.0.0)
 
-### Step 4: Define Quality Validation (Stop Hooks)
+### Step 4: Define Quality Validation Hooks
 
-For each step, consider whether it would benefit from **quality validation loops**. Stop hooks allow the AI agent to iteratively refine its work until quality criteria are met.
+For each step, consider whether it would benefit from **quality validation loops**. Quality hooks allow the AI agent to iteratively refine its work until quality criteria are met.
 
 **Ask structured questions about quality validation:**
 - "Are there specific quality criteria that must be met for this step?"
 - "Would you like the agent to validate its work before completing?"
 - "What would make you send the work back for revision?"
 
-**Stop hooks are particularly valuable for:**
+**Quality hooks are particularly valuable for:**
 - Steps with complex outputs that need multiple checks
 - Steps where quality is critical (final deliverables)
 - Steps with subjective quality criteria that benefit from AI self-review
 
-**Three types of stop hooks are supported:**
+**Three types of hooks are supported:**
 
 1. **Inline Prompt** (`prompt`) - Best for simple quality criteria
    ```yaml
-   stop_hooks:
-     - prompt: |
-         Verify the output meets these criteria:
-         1. Contains at least 5 competitors
-         2. Each competitor has a description
-         3. Selection rationale is clear
+   hooks:
+     after_agent:
+       - prompt: |
+           Verify the output meets these criteria:
+           1. Contains at least 5 competitors
+           2. Each competitor has a description
+           3. Selection rationale is clear
    ```
 
 2. **Prompt File** (`prompt_file`) - For detailed/reusable criteria
    ```yaml
-   stop_hooks:
-     - prompt_file: hooks/quality_check.md
+   hooks:
+     after_agent:
+       - prompt_file: hooks/quality_check.md
    ```
 
 3. **Script** (`script`) - For programmatic validation (tests, linting)
    ```yaml
-   stop_hooks:
-     - script: hooks/run_tests.sh
+   hooks:
+     after_agent:
+       - script: hooks/run_tests.sh
    ```
 
 **Multiple hooks can be combined:**
 ```yaml
-stop_hooks:
-  - script: hooks/lint_output.sh
-  - prompt: "Verify the content is comprehensive and well-organized"
+hooks:
+  after_agent:
+    - script: hooks/lint_output.sh
+    - prompt: "Verify the content is comprehensive and well-organized"
 ```
 
 **Encourage prompt-based hooks** - They leverage the AI's ability to understand context and make nuanced quality judgments. Script hooks are best for objective checks (syntax, format, tests).
diff --git a/src/deepwork/standard_jobs/deepwork_jobs/steps/implement.md b/src/deepwork/standard_jobs/deepwork_jobs/steps/implement.md
index 6f359e4f..e1516c78 100644
--- a/src/deepwork/standard_jobs/deepwork_jobs/steps/implement.md
+++ b/src/deepwork/standard_jobs/deepwork_jobs/steps/implement.md
@@ -2,34 +2,13 @@
 
 ## Objective
 
-Generate the DeepWork job directory structure and instruction files for each step based on the `job.yml` specification from the define step.
+Generate step instruction files for each step based on the `job.yml` specification from the define step.
 
 ## Task
 
-Read the `job.yml` specification file and create all the necessary files to make the job functional, including directory structure and step instruction files. Then sync the commands to make them available.
+Read the `job.yml` specification file created by the define step and generate comprehensive instruction files for each step. The define step has already created the job directory structure.
 
-### Step 1: Create Directory Structure Using Script
-
-Run the `make_new_job.sh` script to create the standard directory structure:
-
-```bash
-.deepwork/jobs/deepwork_jobs/make_new_job.sh [job_name]
-```
-
-This creates:
-- `.deepwork/jobs/[job_name]/` - Main job directory
-- `.deepwork/jobs/[job_name]/steps/` - Step instruction files
-- `.deepwork/jobs/[job_name]/hooks/` - Custom validation scripts (with .gitkeep)
-- `.deepwork/jobs/[job_name]/templates/` - Example file formats (with .gitkeep)
-- `.deepwork/jobs/[job_name]/AGENTS.md` - Job management guidance
-
-**Note**: If the directory already exists (e.g., job.yml was created by define step), you can skip this step or manually create the additional directories:
-```bash
-mkdir -p .deepwork/jobs/[job_name]/hooks .deepwork/jobs/[job_name]/templates
-touch .deepwork/jobs/[job_name]/hooks/.gitkeep .deepwork/jobs/[job_name]/templates/.gitkeep
-```
-
-### Step 2: Read and Validate the Specification
+### Step 1: Read and Validate the Specification
 
 1. **Locate the job.yml file**
    - Read `.deepwork/jobs/[job_name]/job.yml` from the define step
@@ -46,7 +25,7 @@ touch .deepwork/jobs/[job_name]/hooks/.gitkeep .deepwork/jobs/[job_name]/templat
    - List of all steps with their details
    - Understand the workflow structure
 
-### Step 3: Generate Step Instruction Files
+### Step 2: Generate Step Instruction Files
 
 For each step in the job.yml, create a comprehensive instruction file at `.deepwork/jobs/[job_name]/steps/[step_id].md`.
 
@@ -71,11 +50,11 @@ For each step in the job.yml, create a comprehensive instruction file at `.deepw
 6. **Align with stop hooks** - If the step has `stop_hooks` defined, ensure the quality criteria in the instruction file match the validation criteria in the hooks
 7. **Ask structured questions** - When a step has user inputs, the instructions MUST explicitly tell the agent to "ask structured questions" using the AskUserQuestion tool to gather that information. Never use generic phrasing like "ask the user" - always use "ask structured questions"
 
-### Handling Stop Hooks
+### Handling Quality Hooks
 
-If a step in the job.yml has `stop_hooks` defined, the generated instruction file should:
+If a step in the job.yml has `hooks.after_agent` defined, the generated instruction file should:
 
-1. **Mirror the quality criteria** - The "Quality Criteria" section should match what the stop hooks will validate
+1. **Mirror the quality criteria** - The "Quality Criteria" section should match what the hooks will validate
 2. **Be explicit about success** - Help the agent understand when the step is truly complete
 3. **Include the promise pattern** - Mention that `<promise>✓ Quality Criteria Met</promise>` should be included when criteria are met
 
@@ -83,12 +62,13 @@ If a step in the job.yml has `stop_hooks` defined, the generated instruction fil
 ```yaml
 - id: research_competitors
   name: "Research Competitors"
-  stop_hooks:
-    - prompt: |
-        Verify the research meets criteria:
-        1. Each competitor has at least 3 data points
-        2. Sources are cited
-        3. Information is current (within last year)
+  hooks:
+    after_agent:
+      - prompt: |
+          Verify the research meets criteria:
+          1. Each competitor has at least 3 data points
+          2. Sources are cited
+          3. Information is current (within last year)
 ```
 
 **The instruction file should include:**
@@ -109,9 +89,11 @@ Step instructions can include additional `.md` files in the `steps/` directory f
 
 See `.deepwork/jobs/deepwork_jobs/steps/supplemental_file_references.md` for detailed documentation and examples.
 
-### Step 4: Verify job.yml Location
+### Step 3: Verify Files
 
-Verify that `job.yml` is in the correct location at `.deepwork/jobs/[job_name]/job.yml`. The define step should have created it. If for some reason it's not there, you may need to create or move it.
+Verify that all files are in their correct locations:
+- `job.yml` at `.deepwork/jobs/[job_name]/job.yml` (created by define step)
+- Step instruction files at `.deepwork/jobs/[job_name]/steps/[step_id].md`
 
 ## Example Implementation
 
@@ -135,9 +117,12 @@ Before marking this step complete, ensure:
 - [ ] All step instruction files created
 - [ ] Each instruction file is complete and actionable
 
+## Note: Workflow Availability
+
+Once the job.yml and step instruction files are created, the workflow is immediately available through the DeepWork MCP server. The MCP server reads job definitions directly from `.deepwork/jobs/` - no separate sync or installation step is required.
+
 ## Quality Criteria
 
-- Job directory structure is correct
 - All instruction files are complete (not stubs)
 - Instructions are specific and actionable
 - Output examples are provided in each instruction file
diff --git a/src/deepwork/standard_jobs/deepwork_jobs/steps/iterate.md b/src/deepwork/standard_jobs/deepwork_jobs/steps/iterate.md
index 7efece1b..308a6a47 100644
--- a/src/deepwork/standard_jobs/deepwork_jobs/steps/iterate.md
+++ b/src/deepwork/standard_jobs/deepwork_jobs/steps/iterate.md
@@ -210,9 +210,7 @@ I identified the following issues from the test run:
 - Version bumped to 1.0.1
 - Added changelog: "Improved visual design guidance and audience-appropriate writing based on test feedback"
 
-Running `deepwork sync`...
-
-Done! The workflow has been updated.
+Done! The workflow has been updated. Changes are immediately available through the MCP server.
 
 ### Expected Impact
 
diff --git a/src/deepwork/standard_jobs/deepwork_jobs/steps/learn.md b/src/deepwork/standard_jobs/deepwork_jobs/steps/learn.md
index 3fcbc71f..38a80362 100644
--- a/src/deepwork/standard_jobs/deepwork_jobs/steps/learn.md
+++ b/src/deepwork/standard_jobs/deepwork_jobs/steps/learn.md
@@ -268,7 +268,7 @@ When adding entries to AGENTS.md, prefer these patterns:
 ## Example Dialog
 
 ```
-User: /deepwork_jobs.learn
+User: [Invokes the learn workflow via MCP]
 
 Claude: I'll analyze this conversation for learnings from DeepWork job executions.
 
diff --git a/src/deepwork/standard_jobs/deepwork_jobs/steps/test.md b/src/deepwork/standard_jobs/deepwork_jobs/steps/test.md
index cce74c43..7b78ca81 100644
--- a/src/deepwork/standard_jobs/deepwork_jobs/steps/test.md
+++ b/src/deepwork/standard_jobs/deepwork_jobs/steps/test.md
@@ -26,11 +26,11 @@ Please describe a specific case you want to run through the workflow - ideally s
 
 ### Step 2: Prepare and Run the Workflow
 
-1. **Compact the conversation history** - Before invoking the workflow, use the `/compact` command to summarize the conversation so far. This ensures the workflow starts with clean context focused on the test case.
+1. **Prepare clean context** - Before invoking the workflow, consider compacting the conversation history (e.g., using `/compact` in Claude Code) to ensure the workflow starts with clean context focused on the test case.
 
-2. **Start the new workflow** - Begin executing the first step of the newly created workflow.
+2. **Start the new workflow** - Use `start_workflow` through the DeepWork MCP server with the job name and workflow name to begin executing the workflow.
 
-3. **Complete the full workflow** - Continue through all steps of the workflow until it produces its final output.
+3. **Complete the full workflow** - Continue through all steps of the workflow until it produces its final output. Use `finished_step` to progress through each step.
 
 4. **Note any issues during execution** - Pay attention to:
    - Confusion or ambiguity in instructions
@@ -91,7 +91,6 @@ If the user provides more feedback, address it and ask again. Don't assume satis
 ## Quality Criteria
 
 - User was informed the workflow is ready and asked what to test it on
-- Conversation was compacted before running the workflow
 - The new workflow was actually invoked and run to completion
 - Output was critiqued and up to 3 top issues were identified
 - Each identified issue was presented to the user with a specific question
@@ -114,11 +113,9 @@ What would you like to use it on for the first test run? Please describe a speci
 
 User: Let's do the January 2026 engineering report, focusing on deployment frequency and incident response times.
 
-Claude: Great, let me compact the conversation and run the workflow on your January 2026 engineering report.
+Claude: Great, let me run the workflow on your January 2026 engineering report.
 
-/compact
-
-[Starting the monthly_report workflow...]
+[Starting the monthly_report workflow via MCP...]
 
 [... workflow runs through all steps ...]
 

From e122265112086d54f9f3946d98608f16a9aed703 Mon Sep 17 00:00:00 2001
From: Noah Horton <noah@unsupervised.com>
Date: Thu, 5 Feb 2026 10:30:38 -0700
Subject: [PATCH 21/45] stop backing up rules

---
 .deepwork/jobs/deepwork_jobs/steps/errata.md  | 25 +++----------------
 .../deepwork_jobs/steps/errata.md             | 25 +++----------------
 2 files changed, 6 insertions(+), 44 deletions(-)

diff --git a/.deepwork/jobs/deepwork_jobs/steps/errata.md b/.deepwork/jobs/deepwork_jobs/steps/errata.md
index 22a5c167..c49ece1b 100644
--- a/.deepwork/jobs/deepwork_jobs/steps/errata.md
+++ b/.deepwork/jobs/deepwork_jobs/steps/errata.md
@@ -90,29 +90,10 @@ find .deepwork/tmp -type d -empty -delete 2>/dev/null
 
 ### Step 3: Remove Rules Folder (Fully Deprecated)
 
-DeepWork Rules have been completely removed from the system. The `.deepwork/rules/` folder should be deleted.
-
-```bash
-ls -la .deepwork/rules/ 2>/dev/null || echo "No rules folder (good!)"
-```
-
-**If the folder exists:**
-
-1. **Back up the folder** (in case user wants to reference old rules):
-   ```bash
-   mv .deepwork/rules/ .deepwork/rules.backup/
-   ```
-
-2. **Inform the user** that DeepWork Rules are deprecated and the folder has been backed up
-
-3. **After user confirms** the backup is acceptable, the backup can be deleted later
-
-**Also remove these related items if present:**
-- `.deepwork/tmp/rules/` folder and all contents
-- `.deepwork/jobs/deepwork_rules/` folder (the old rules job)
-- Any `deepwork_rules` job that may have been installed
+DeepWork Rules have been completely removed from the system. Delete the `.deepwork/rules/` folder and all related items:
 
 ```bash
+rm -rf .deepwork/rules/ 2>/dev/null
 rm -rf .deepwork/tmp/rules/ 2>/dev/null
 rm -rf .deepwork/jobs/deepwork_rules/ 2>/dev/null
 ```
@@ -171,7 +152,7 @@ git status
 - Legacy job skill folders are removed from `.claude/skills/` and `.gemini/skills/` (folders matching job names or `jobname.*` patterns)
 - The `deepwork` skill folder in `.claude/skills/deepwork/` still exists after cleanup
 - `.deepwork/tmp/` contents are cleaned appropriately
-- `.deepwork/rules/` folder is backed up and removed (DeepWork Rules fully deprecated)
+- `.deepwork/rules/` folder is removed (DeepWork Rules fully deprecated)
 - `.deepwork/tmp/rules/` folder is removed
 - `.deepwork/jobs/deepwork_rules/` folder is removed if present
 - `.deepwork/config.yml` uses current version format
diff --git a/src/deepwork/standard_jobs/deepwork_jobs/steps/errata.md b/src/deepwork/standard_jobs/deepwork_jobs/steps/errata.md
index 22a5c167..c49ece1b 100644
--- a/src/deepwork/standard_jobs/deepwork_jobs/steps/errata.md
+++ b/src/deepwork/standard_jobs/deepwork_jobs/steps/errata.md
@@ -90,29 +90,10 @@ find .deepwork/tmp -type d -empty -delete 2>/dev/null
 
 ### Step 3: Remove Rules Folder (Fully Deprecated)
 
-DeepWork Rules have been completely removed from the system. The `.deepwork/rules/` folder should be deleted.
-
-```bash
-ls -la .deepwork/rules/ 2>/dev/null || echo "No rules folder (good!)"
-```
-
-**If the folder exists:**
-
-1. **Back up the folder** (in case user wants to reference old rules):
-   ```bash
-   mv .deepwork/rules/ .deepwork/rules.backup/
-   ```
-
-2. **Inform the user** that DeepWork Rules are deprecated and the folder has been backed up
-
-3. **After user confirms** the backup is acceptable, the backup can be deleted later
-
-**Also remove these related items if present:**
-- `.deepwork/tmp/rules/` folder and all contents
-- `.deepwork/jobs/deepwork_rules/` folder (the old rules job)
-- Any `deepwork_rules` job that may have been installed
+DeepWork Rules have been completely removed from the system. Delete the `.deepwork/rules/` folder and all related items:
 
 ```bash
+rm -rf .deepwork/rules/ 2>/dev/null
 rm -rf .deepwork/tmp/rules/ 2>/dev/null
 rm -rf .deepwork/jobs/deepwork_rules/ 2>/dev/null
 ```
@@ -171,7 +152,7 @@ git status
 - Legacy job skill folders are removed from `.claude/skills/` and `.gemini/skills/` (folders matching job names or `jobname.*` patterns)
 - The `deepwork` skill folder in `.claude/skills/deepwork/` still exists after cleanup
 - `.deepwork/tmp/` contents are cleaned appropriately
-- `.deepwork/rules/` folder is backed up and removed (DeepWork Rules fully deprecated)
+- `.deepwork/rules/` folder is removed (DeepWork Rules fully deprecated)
 - `.deepwork/tmp/rules/` folder is removed
 - `.deepwork/jobs/deepwork_rules/` folder is removed if present
 - `.deepwork/config.yml` uses current version format

From 0000c17a17dd6a9cc09d6231e54cce3d89425a5c Mon Sep 17 00:00:00 2001
From: Noah Horton <noah@unsupervised.com>
Date: Thu, 5 Feb 2026 11:08:19 -0700
Subject: [PATCH 22/45] make_new_job.sh preserved, parallel execution, no dupe
 quality criteria

---
 .deepwork/jobs/deepwork_jobs/job.yml          |  1 +
 .deepwork/jobs/deepwork_jobs/steps/define.md  |  2 ++
 .deepwork/jobs/deepwork_jobs/steps/errata.md  | 12 -------
 .../jobs/deepwork_jobs/steps/fix_jobs.md      | 34 +++++++++++++------
 .../jobs/deepwork_jobs/steps/fix_settings.md  | 17 ++--------
 .../jobs/deepwork_jobs/steps/implement.md     | 10 +-----
 .deepwork/jobs/deepwork_jobs/steps/iterate.md | 11 ------
 .deepwork/jobs/deepwork_jobs/steps/learn.md   | 14 --------
 .deepwork/jobs/deepwork_jobs/steps/test.md    | 11 ------
 .../standard_jobs/deepwork_jobs/job.yml       |  1 +
 .../deepwork_jobs/steps/define.md             |  2 ++
 .../deepwork_jobs/steps/errata.md             | 12 -------
 .../deepwork_jobs/steps/fix_jobs.md           | 34 +++++++++++++------
 .../deepwork_jobs/steps/fix_settings.md       | 17 ++--------
 .../deepwork_jobs/steps/implement.md          | 10 +-----
 .../deepwork_jobs/steps/iterate.md            | 11 ------
 .../deepwork_jobs/steps/learn.md              | 14 --------
 .../standard_jobs/deepwork_jobs/steps/test.md | 11 ------
 18 files changed, 62 insertions(+), 162 deletions(-)

diff --git a/.deepwork/jobs/deepwork_jobs/job.yml b/.deepwork/jobs/deepwork_jobs/job.yml
index 2f24a707..1aea4e0d 100644
--- a/.deepwork/jobs/deepwork_jobs/job.yml
+++ b/.deepwork/jobs/deepwork_jobs/job.yml
@@ -176,6 +176,7 @@ steps:
     quality_criteria:
       - "**DeepWork Skills Removed**: Are `Skill(...)` entries matching jobs in `.deepwork/jobs/` removed?"
       - "**Non-DeepWork Skills Preserved**: Are skills NOT matching DeepWork jobs left intact?"
+      - "**make_new_job.sh Preserved**: Is the `Bash(...)` permission for `make_new_job.sh` preserved (if present)?"
       - "**Rules Hooks Removed**: Are all DeepWork Rules hooks and permissions removed?"
       - "**Duplicate Hooks Removed**: Are duplicate hook entries consolidated or removed?"
       - "**Hardcoded Paths Removed**: Are user-specific hardcoded paths (like `/Users/*/...`) removed?"
diff --git a/.deepwork/jobs/deepwork_jobs/steps/define.md b/.deepwork/jobs/deepwork_jobs/steps/define.md
index 686a456d..3e9a87da 100644
--- a/.deepwork/jobs/deepwork_jobs/steps/define.md
+++ b/.deepwork/jobs/deepwork_jobs/steps/define.md
@@ -172,6 +172,8 @@ For each major phase they mentioned, ask structured questions to gather details:
    - Are there any quality checks or validation needed?
    - What makes a good vs. bad output for this step?
 
+   **Important**: Quality criteria belong in the `quality_criteria` field of job.yml, NOT in the step details. When skills are generated, quality criteria are automatically included in the output. Do not duplicate them in step instructions or details—this causes redundancy and confusion.
+
 6. **Agent Delegation** (optional)
    - Should this step be executed by a specific agent type?
    - Use the `agent` field when the step should run in a forked context with a specific agent
diff --git a/.deepwork/jobs/deepwork_jobs/steps/errata.md b/.deepwork/jobs/deepwork_jobs/steps/errata.md
index c49ece1b..c71d62be 100644
--- a/.deepwork/jobs/deepwork_jobs/steps/errata.md
+++ b/.deepwork/jobs/deepwork_jobs/steps/errata.md
@@ -147,18 +147,6 @@ git status
 - No new untracked files should appear (unless intentionally created)
 - Backup files (`.backup`) should be in `.gitignore` or cleaned up
 
-## Quality Criteria
-
-- Legacy job skill folders are removed from `.claude/skills/` and `.gemini/skills/` (folders matching job names or `jobname.*` patterns)
-- The `deepwork` skill folder in `.claude/skills/deepwork/` still exists after cleanup
-- `.deepwork/tmp/` contents are cleaned appropriately
-- `.deepwork/rules/` folder is removed (DeepWork Rules fully deprecated)
-- `.deepwork/tmp/rules/` folder is removed
-- `.deepwork/jobs/deepwork_rules/` folder is removed if present
-- `.deepwork/config.yml` uses current version format
-- Git status shows clean changes ready to commit
-- When all criteria are met, include `<promise>Quality Criteria Met</promise>` in your response
-
 ## Important Notes
 
 1. **Always back up before deleting** - User data is irreplaceable
diff --git a/.deepwork/jobs/deepwork_jobs/steps/fix_jobs.md b/.deepwork/jobs/deepwork_jobs/steps/fix_jobs.md
index b1656e27..8d34468a 100644
--- a/.deepwork/jobs/deepwork_jobs/steps/fix_jobs.md
+++ b/.deepwork/jobs/deepwork_jobs/steps/fix_jobs.md
@@ -18,6 +18,30 @@ ls -la .deepwork/jobs/
 
 For each job directory, you'll need to check and potentially fix the `job.yml` file.
 
+### Step 1.5: Process Jobs in Parallel
+
+**For each job** (except `deepwork_jobs` which should be updated via `deepwork install`), kick off a sub-agent to audit and repair that job's `job.yml` file. The sub-agent should:
+
+1. Read the job's `job.yml` file
+2. Check for and fix all issues described in Steps 2-6 below
+3. Validate the YAML is still valid after changes
+4. Report what was changed
+
+**Run sub-agents in parallel** - one for each job to speed up the process.
+
+**Example prompt for sub-agent:**
+```
+Audit and repair the job at `.deepwork/jobs/[job_name]/job.yml`:
+1. Remove any `exposed: true` fields from steps
+2. Migrate `stop_hooks` to `hooks.after_agent` format
+3. Remove references to deleted steps (like `review_job_spec`)
+4. Fix orphaned steps by adding them to workflows
+5. Bump version and add changelog entry if changes were made
+6. Validate YAML syntax
+
+Report what changes were made.
+```
+
 ### Step 2: Remove `exposed` Field
 
 The `exposed` field on steps no longer has any effect in MCP-based DeepWork. Steps are now only accessible through workflows.
@@ -142,16 +166,6 @@ changelog:
     changes: "Migrated to current DeepWork format; removed deprecated fields"
 ```
 
-## Quality Criteria
-
-- All `exposed: true` fields are removed or noted
-- All `stop_hooks` are migrated to `hooks.after_agent` format
-- References to removed steps (like `review_job_spec`) are updated
-- Jobs with no workflows get a single workflow (same name as job) containing all steps
-- Jobs with existing workflows get individual workflows for each orphaned step (same name as step)
-- All job.yml files are valid YAML
-- When all criteria are met, include `<promise>Quality Criteria Met</promise>` in your response
-
 ## Common Issues and Fixes
 
 ### Issue: Step references non-existent step in `from_step`
diff --git a/.deepwork/jobs/deepwork_jobs/steps/fix_settings.md b/.deepwork/jobs/deepwork_jobs/steps/fix_settings.md
index 0c046cd9..d164b69e 100644
--- a/.deepwork/jobs/deepwork_jobs/steps/fix_settings.md
+++ b/.deepwork/jobs/deepwork_jobs/steps/fix_settings.md
@@ -126,18 +126,6 @@ python -c "import json; json.load(open('.claude/settings.json'))"
 
 If there are syntax errors, fix them before proceeding.
 
-## Quality Criteria
-
-- DeepWork job `Skill(...)` permissions are removed (only those matching `.deepwork/jobs/`)
-- Non-DeepWork skills are preserved (skills not matching any job in `.deepwork/jobs/`)
-- All DeepWork Rules hooks and permissions are removed
-- Duplicate hook entries are consolidated
-- Hardcoded user-specific paths are removed
-- Deprecated `deepwork hook` commands are removed
-- The settings.json file is valid JSON
-- A backup was created before modifications
-- When all criteria are met, include `<promise>Quality Criteria Met</promise>` in your response
-
 ## Example Before/After
 
 ### Before (with gunk):
@@ -184,5 +172,6 @@ If there are syntax errors, fix them before proceeding.
 ## Important Notes
 
 1. **Don't remove non-DeepWork permissions** - Keep permissions like `WebSearch`, `Read(...)`, `Bash(...)` that aren't related to old DeepWork skills
-2. **Be conservative** - If unsure whether something is legacy, ask the user
-3. **Document changes** - Note what was removed for the final summary
+2. **Preserve `make_new_job.sh`** - Keep any `Bash(...)` permission referencing `make_new_job.sh` (e.g., `Bash(.deepwork/jobs/deepwork_jobs/scripts/make_new_job.sh *)`) - this is a current DeepWork script
+3. **Be conservative** - If unsure whether something is legacy, ask the user
+4. **Document changes** - Note what was removed for the final summary
diff --git a/.deepwork/jobs/deepwork_jobs/steps/implement.md b/.deepwork/jobs/deepwork_jobs/steps/implement.md
index e1516c78..7be269a5 100644
--- a/.deepwork/jobs/deepwork_jobs/steps/implement.md
+++ b/.deepwork/jobs/deepwork_jobs/steps/implement.md
@@ -119,12 +119,4 @@ Before marking this step complete, ensure:
 
 ## Note: Workflow Availability
 
-Once the job.yml and step instruction files are created, the workflow is immediately available through the DeepWork MCP server. The MCP server reads job definitions directly from `.deepwork/jobs/` - no separate sync or installation step is required.
-
-## Quality Criteria
-
-- All instruction files are complete (not stubs)
-- Instructions are specific and actionable
-- Output examples are provided in each instruction file
-- Quality criteria defined for each step
-- Steps with user inputs explicitly use "ask structured questions" phrasing
+Once the job.yml and step instruction files are created, the workflow is immediately available through the DeepWork MCP server. The MCP server reads job definitions directly from `.deepwork/jobs/` - no separate sync or installation step is required.
\ No newline at end of file
diff --git a/.deepwork/jobs/deepwork_jobs/steps/iterate.md b/.deepwork/jobs/deepwork_jobs/steps/iterate.md
index 308a6a47..fb1f56c8 100644
--- a/.deepwork/jobs/deepwork_jobs/steps/iterate.md
+++ b/.deepwork/jobs/deepwork_jobs/steps/iterate.md
@@ -159,17 +159,6 @@ Example recap format:
 Future runs should produce reports with better visual design and clearer summaries, reducing the need for post-generation corrections.
 ```
 
-## Quality Criteria
-
-- Conversation history from test step was analyzed for issues
-- Process inefficiencies were identified and addressed
-- Step instructions were updated to improve clarity
-- Quality criteria were updated to match user expectations
-- Alternative tools were considered where relevant
-- Job version and changelog were updated
-- A clear recap of improvements was provided
-- When all criteria are met, include `<promise>Quality Criteria Met</promise>` in your response
-
 ## Example Dialog
 
 ```
diff --git a/.deepwork/jobs/deepwork_jobs/steps/learn.md b/.deepwork/jobs/deepwork_jobs/steps/learn.md
index 38a80362..a4a50c9a 100644
--- a/.deepwork/jobs/deepwork_jobs/steps/learn.md
+++ b/.deepwork/jobs/deepwork_jobs/steps/learn.md
@@ -251,20 +251,6 @@ When adding entries to AGENTS.md, prefer these patterns:
 - Configuration requires these fields: name, version, ...
 ```
 
-## Quality Criteria
-
-- Conversation has been analyzed for job executions
-- Points of confusion and inefficiency are identified
-- Learnings are correctly classified (generalizable vs bespoke)
-- Job instructions updated for generalizable improvements
-- Instructions are concise - no redundancy or unnecessary verbosity
-- Shared/lengthy content extracted into referenced files where appropriate
-- AGENTS.md created/updated with bespoke learnings
-- File references used instead of duplicating content
-- AGENTS.md is in the correct folder (the deepest common folder for the topic)
-- Job version and changelog updated if instructions were modified
-- When all criteria are met, include `<promise>✓ Quality Criteria Met</promise>`
-
 ## Example Dialog
 
 ```
diff --git a/.deepwork/jobs/deepwork_jobs/steps/test.md b/.deepwork/jobs/deepwork_jobs/steps/test.md
index 7b78ca81..36d27128 100644
--- a/.deepwork/jobs/deepwork_jobs/steps/test.md
+++ b/.deepwork/jobs/deepwork_jobs/steps/test.md
@@ -88,17 +88,6 @@ The feedback loop should continue until the user explicitly indicates satisfacti
 
 If the user provides more feedback, address it and ask again. Don't assume satisfaction without explicit confirmation.
 
-## Quality Criteria
-
-- User was informed the workflow is ready and asked what to test it on
-- The new workflow was actually invoked and run to completion
-- Output was critiqued and up to 3 top issues were identified
-- Each identified issue was presented to the user with a specific question
-- Confirmed corrections were applied
-- User was asked for additional feedback after corrections
-- Iteration continued until user confirmed satisfaction
-- When all criteria are met, include `<promise>Quality Criteria Met</promise>` in your response
-
 ## Example Dialog
 
 ```
diff --git a/src/deepwork/standard_jobs/deepwork_jobs/job.yml b/src/deepwork/standard_jobs/deepwork_jobs/job.yml
index 2f24a707..1aea4e0d 100644
--- a/src/deepwork/standard_jobs/deepwork_jobs/job.yml
+++ b/src/deepwork/standard_jobs/deepwork_jobs/job.yml
@@ -176,6 +176,7 @@ steps:
     quality_criteria:
       - "**DeepWork Skills Removed**: Are `Skill(...)` entries matching jobs in `.deepwork/jobs/` removed?"
       - "**Non-DeepWork Skills Preserved**: Are skills NOT matching DeepWork jobs left intact?"
+      - "**make_new_job.sh Preserved**: Is the `Bash(...)` permission for `make_new_job.sh` preserved (if present)?"
       - "**Rules Hooks Removed**: Are all DeepWork Rules hooks and permissions removed?"
       - "**Duplicate Hooks Removed**: Are duplicate hook entries consolidated or removed?"
       - "**Hardcoded Paths Removed**: Are user-specific hardcoded paths (like `/Users/*/...`) removed?"
diff --git a/src/deepwork/standard_jobs/deepwork_jobs/steps/define.md b/src/deepwork/standard_jobs/deepwork_jobs/steps/define.md
index 686a456d..3e9a87da 100644
--- a/src/deepwork/standard_jobs/deepwork_jobs/steps/define.md
+++ b/src/deepwork/standard_jobs/deepwork_jobs/steps/define.md
@@ -172,6 +172,8 @@ For each major phase they mentioned, ask structured questions to gather details:
    - Are there any quality checks or validation needed?
    - What makes a good vs. bad output for this step?
 
+   **Important**: Quality criteria belong in the `quality_criteria` field of job.yml, NOT in the step details. When skills are generated, quality criteria are automatically included in the output. Do not duplicate them in step instructions or details—this causes redundancy and confusion.
+
 6. **Agent Delegation** (optional)
    - Should this step be executed by a specific agent type?
    - Use the `agent` field when the step should run in a forked context with a specific agent
diff --git a/src/deepwork/standard_jobs/deepwork_jobs/steps/errata.md b/src/deepwork/standard_jobs/deepwork_jobs/steps/errata.md
index c49ece1b..c71d62be 100644
--- a/src/deepwork/standard_jobs/deepwork_jobs/steps/errata.md
+++ b/src/deepwork/standard_jobs/deepwork_jobs/steps/errata.md
@@ -147,18 +147,6 @@ git status
 - No new untracked files should appear (unless intentionally created)
 - Backup files (`.backup`) should be in `.gitignore` or cleaned up
 
-## Quality Criteria
-
-- Legacy job skill folders are removed from `.claude/skills/` and `.gemini/skills/` (folders matching job names or `jobname.*` patterns)
-- The `deepwork` skill folder in `.claude/skills/deepwork/` still exists after cleanup
-- `.deepwork/tmp/` contents are cleaned appropriately
-- `.deepwork/rules/` folder is removed (DeepWork Rules fully deprecated)
-- `.deepwork/tmp/rules/` folder is removed
-- `.deepwork/jobs/deepwork_rules/` folder is removed if present
-- `.deepwork/config.yml` uses current version format
-- Git status shows clean changes ready to commit
-- When all criteria are met, include `<promise>Quality Criteria Met</promise>` in your response
-
 ## Important Notes
 
 1. **Always back up before deleting** - User data is irreplaceable
diff --git a/src/deepwork/standard_jobs/deepwork_jobs/steps/fix_jobs.md b/src/deepwork/standard_jobs/deepwork_jobs/steps/fix_jobs.md
index b1656e27..8d34468a 100644
--- a/src/deepwork/standard_jobs/deepwork_jobs/steps/fix_jobs.md
+++ b/src/deepwork/standard_jobs/deepwork_jobs/steps/fix_jobs.md
@@ -18,6 +18,30 @@ ls -la .deepwork/jobs/
 
 For each job directory, you'll need to check and potentially fix the `job.yml` file.
 
+### Step 1.5: Process Jobs in Parallel
+
+**For each job** (except `deepwork_jobs` which should be updated via `deepwork install`), kick off a sub-agent to audit and repair that job's `job.yml` file. The sub-agent should:
+
+1. Read the job's `job.yml` file
+2. Check for and fix all issues described in Steps 2-6 below
+3. Validate the YAML is still valid after changes
+4. Report what was changed
+
+**Run sub-agents in parallel** - one for each job to speed up the process.
+
+**Example prompt for sub-agent:**
+```
+Audit and repair the job at `.deepwork/jobs/[job_name]/job.yml`:
+1. Remove any `exposed: true` fields from steps
+2. Migrate `stop_hooks` to `hooks.after_agent` format
+3. Remove references to deleted steps (like `review_job_spec`)
+4. Fix orphaned steps by adding them to workflows
+5. Bump version and add changelog entry if changes were made
+6. Validate YAML syntax
+
+Report what changes were made.
+```
+
 ### Step 2: Remove `exposed` Field
 
 The `exposed` field on steps no longer has any effect in MCP-based DeepWork. Steps are now only accessible through workflows.
@@ -142,16 +166,6 @@ changelog:
     changes: "Migrated to current DeepWork format; removed deprecated fields"
 ```
 
-## Quality Criteria
-
-- All `exposed: true` fields are removed or noted
-- All `stop_hooks` are migrated to `hooks.after_agent` format
-- References to removed steps (like `review_job_spec`) are updated
-- Jobs with no workflows get a single workflow (same name as job) containing all steps
-- Jobs with existing workflows get individual workflows for each orphaned step (same name as step)
-- All job.yml files are valid YAML
-- When all criteria are met, include `<promise>Quality Criteria Met</promise>` in your response
-
 ## Common Issues and Fixes
 
 ### Issue: Step references non-existent step in `from_step`
diff --git a/src/deepwork/standard_jobs/deepwork_jobs/steps/fix_settings.md b/src/deepwork/standard_jobs/deepwork_jobs/steps/fix_settings.md
index 0c046cd9..d164b69e 100644
--- a/src/deepwork/standard_jobs/deepwork_jobs/steps/fix_settings.md
+++ b/src/deepwork/standard_jobs/deepwork_jobs/steps/fix_settings.md
@@ -126,18 +126,6 @@ python -c "import json; json.load(open('.claude/settings.json'))"
 
 If there are syntax errors, fix them before proceeding.
 
-## Quality Criteria
-
-- DeepWork job `Skill(...)` permissions are removed (only those matching `.deepwork/jobs/`)
-- Non-DeepWork skills are preserved (skills not matching any job in `.deepwork/jobs/`)
-- All DeepWork Rules hooks and permissions are removed
-- Duplicate hook entries are consolidated
-- Hardcoded user-specific paths are removed
-- Deprecated `deepwork hook` commands are removed
-- The settings.json file is valid JSON
-- A backup was created before modifications
-- When all criteria are met, include `<promise>Quality Criteria Met</promise>` in your response
-
 ## Example Before/After
 
 ### Before (with gunk):
@@ -184,5 +172,6 @@ If there are syntax errors, fix them before proceeding.
 ## Important Notes
 
 1. **Don't remove non-DeepWork permissions** - Keep permissions like `WebSearch`, `Read(...)`, `Bash(...)` that aren't related to old DeepWork skills
-2. **Be conservative** - If unsure whether something is legacy, ask the user
-3. **Document changes** - Note what was removed for the final summary
+2. **Preserve `make_new_job.sh`** - Keep any `Bash(...)` permission referencing `make_new_job.sh` (e.g., `Bash(.deepwork/jobs/deepwork_jobs/scripts/make_new_job.sh *)`) - this is a current DeepWork script
+3. **Be conservative** - If unsure whether something is legacy, ask the user
+4. **Document changes** - Note what was removed for the final summary
diff --git a/src/deepwork/standard_jobs/deepwork_jobs/steps/implement.md b/src/deepwork/standard_jobs/deepwork_jobs/steps/implement.md
index e1516c78..7be269a5 100644
--- a/src/deepwork/standard_jobs/deepwork_jobs/steps/implement.md
+++ b/src/deepwork/standard_jobs/deepwork_jobs/steps/implement.md
@@ -119,12 +119,4 @@ Before marking this step complete, ensure:
 
 ## Note: Workflow Availability
 
-Once the job.yml and step instruction files are created, the workflow is immediately available through the DeepWork MCP server. The MCP server reads job definitions directly from `.deepwork/jobs/` - no separate sync or installation step is required.
-
-## Quality Criteria
-
-- All instruction files are complete (not stubs)
-- Instructions are specific and actionable
-- Output examples are provided in each instruction file
-- Quality criteria defined for each step
-- Steps with user inputs explicitly use "ask structured questions" phrasing
+Once the job.yml and step instruction files are created, the workflow is immediately available through the DeepWork MCP server. The MCP server reads job definitions directly from `.deepwork/jobs/` - no separate sync or installation step is required.
\ No newline at end of file
diff --git a/src/deepwork/standard_jobs/deepwork_jobs/steps/iterate.md b/src/deepwork/standard_jobs/deepwork_jobs/steps/iterate.md
index 308a6a47..fb1f56c8 100644
--- a/src/deepwork/standard_jobs/deepwork_jobs/steps/iterate.md
+++ b/src/deepwork/standard_jobs/deepwork_jobs/steps/iterate.md
@@ -159,17 +159,6 @@ Example recap format:
 Future runs should produce reports with better visual design and clearer summaries, reducing the need for post-generation corrections.
 ```
 
-## Quality Criteria
-
-- Conversation history from test step was analyzed for issues
-- Process inefficiencies were identified and addressed
-- Step instructions were updated to improve clarity
-- Quality criteria were updated to match user expectations
-- Alternative tools were considered where relevant
-- Job version and changelog were updated
-- A clear recap of improvements was provided
-- When all criteria are met, include `<promise>Quality Criteria Met</promise>` in your response
-
 ## Example Dialog
 
 ```
diff --git a/src/deepwork/standard_jobs/deepwork_jobs/steps/learn.md b/src/deepwork/standard_jobs/deepwork_jobs/steps/learn.md
index 38a80362..a4a50c9a 100644
--- a/src/deepwork/standard_jobs/deepwork_jobs/steps/learn.md
+++ b/src/deepwork/standard_jobs/deepwork_jobs/steps/learn.md
@@ -251,20 +251,6 @@ When adding entries to AGENTS.md, prefer these patterns:
 - Configuration requires these fields: name, version, ...
 ```
 
-## Quality Criteria
-
-- Conversation has been analyzed for job executions
-- Points of confusion and inefficiency are identified
-- Learnings are correctly classified (generalizable vs bespoke)
-- Job instructions updated for generalizable improvements
-- Instructions are concise - no redundancy or unnecessary verbosity
-- Shared/lengthy content extracted into referenced files where appropriate
-- AGENTS.md created/updated with bespoke learnings
-- File references used instead of duplicating content
-- AGENTS.md is in the correct folder (the deepest common folder for the topic)
-- Job version and changelog updated if instructions were modified
-- When all criteria are met, include `<promise>✓ Quality Criteria Met</promise>`
-
 ## Example Dialog
 
 ```
diff --git a/src/deepwork/standard_jobs/deepwork_jobs/steps/test.md b/src/deepwork/standard_jobs/deepwork_jobs/steps/test.md
index 7b78ca81..36d27128 100644
--- a/src/deepwork/standard_jobs/deepwork_jobs/steps/test.md
+++ b/src/deepwork/standard_jobs/deepwork_jobs/steps/test.md
@@ -88,17 +88,6 @@ The feedback loop should continue until the user explicitly indicates satisfacti
 
 If the user provides more feedback, address it and ask again. Don't assume satisfaction without explicit confirmation.
 
-## Quality Criteria
-
-- User was informed the workflow is ready and asked what to test it on
-- The new workflow was actually invoked and run to completion
-- Output was critiqued and up to 3 top issues were identified
-- Each identified issue was presented to the user with a specific question
-- Confirmed corrections were applied
-- User was asked for additional feedback after corrections
-- Iteration continued until user confirmed satisfaction
-- When all criteria are met, include `<promise>Quality Criteria Met</promise>` in your response
-
 ## Example Dialog
 
 ```

From d570baff8660d65f3cc92b420ab250e6bb68f131 Mon Sep 17 00:00:00 2001
From: Noah Horton <noah@unsupervised.com>
Date: Thu, 5 Feb 2026 11:10:32 -0700
Subject: [PATCH 23/45] formatting

---
 src/deepwork/cli/install.py                   |   4 +-
 src/deepwork/cli/sync.py                      |   8 +-
 src/deepwork/mcp/quality_gate.py              |   1 -
 tests/fixtures/mock_review_agent.py           |  74 ++++++------
 .../test_quality_gate_integration.py          |  12 +-
 tests/unit/mcp/test_quality_gate.py           | 107 ++++++++++--------
 6 files changed, 109 insertions(+), 97 deletions(-)

diff --git a/src/deepwork/cli/install.py b/src/deepwork/cli/install.py
index 030fa27b..b761aff5 100644
--- a/src/deepwork/cli/install.py
+++ b/src/deepwork/cli/install.py
@@ -364,9 +364,7 @@ def _install_deepwork(platform_name: str | None, project_path: Path) -> None:
     platform_names = ", ".join(a.display_name for a in detected_adapters)
 
     if sync_result.has_warnings:
-        console.print(
-            "[bold yellow]⚠ You should repair your DeepWork install[/bold yellow]"
-        )
+        console.print("[bold yellow]⚠ You should repair your DeepWork install[/bold yellow]")
         console.print()
         console.print("[bold]To fix issues:[/bold]")
         console.print("  1. Start your agent CLI (ex. [cyan]claude[/cyan] or [cyan]gemini[/cyan])")
diff --git a/src/deepwork/cli/sync.py b/src/deepwork/cli/sync.py
index 9dff320f..80441f6d 100644
--- a/src/deepwork/cli/sync.py
+++ b/src/deepwork/cli/sync.py
@@ -165,7 +165,9 @@ def sync_skills(project_path: Path) -> SyncResult:
         console.print("[bold yellow]Warning: Some jobs failed to parse:[/bold yellow]")
         for job_name, error in failed_jobs:
             console.print(f"  • {job_name}: {error}")
-        console.print("[dim]The /deepwork skill is installed. Fix the job errors and run 'deepwork sync' again.[/dim]")
+        console.print(
+            "[dim]The /deepwork skill is installed. Fix the job errors and run 'deepwork sync' again.[/dim]"
+        )
 
     # Collect hooks from jobs (hooks collection is independent of job.yml parsing)
     job_hooks_list = collect_job_hooks(jobs_dir)
@@ -174,7 +176,9 @@ def sync_skills(project_path: Path) -> SyncResult:
 
     # Sync hooks and permissions for each platform
     for adapter in platform_adapters:
-        console.print(f"\n[yellow]→[/yellow] Syncing hooks and permissions to {adapter.display_name}...")
+        console.print(
+            f"\n[yellow]→[/yellow] Syncing hooks and permissions to {adapter.display_name}..."
+        )
 
         # NOTE: Job skills (meta-skills and step skills) are no longer generated.
         # The MCP server now handles workflow orchestration directly.
diff --git a/src/deepwork/mcp/quality_gate.py b/src/deepwork/mcp/quality_gate.py
index 511bad71..9e3f9b90 100644
--- a/src/deepwork/mcp/quality_gate.py
+++ b/src/deepwork/mcp/quality_gate.py
@@ -12,7 +12,6 @@
 from typing import Any
 
 import aiofiles
-import jsonschema
 
 from deepwork.mcp.schemas import QualityCriteriaResult, QualityGateResult
 
diff --git a/tests/fixtures/mock_review_agent.py b/tests/fixtures/mock_review_agent.py
index 4b57c06a..8ccbb4d8 100755
--- a/tests/fixtures/mock_review_agent.py
+++ b/tests/fixtures/mock_review_agent.py
@@ -83,51 +83,61 @@ def main() -> int:
         return 0
 
     if mode == "pass":
-        response = wrap_response({
-            "passed": True,
-            "feedback": "All criteria met",
-            "criteria_results": [{"criterion": "Criterion 1", "passed": True, "feedback": None}],
-        })
+        response = wrap_response(
+            {
+                "passed": True,
+                "feedback": "All criteria met",
+                "criteria_results": [
+                    {"criterion": "Criterion 1", "passed": True, "feedback": None}
+                ],
+            }
+        )
         print(json.dumps(response))
         return 0
 
     if mode == "fail":
-        response = wrap_response({
-            "passed": False,
-            "feedback": "Quality criteria not met",
-            "criteria_results": [
-                {
-                    "criterion": "Criterion 1",
-                    "passed": False,
-                    "feedback": "Did not meet requirements",
-                }
-            ],
-        })
+        response = wrap_response(
+            {
+                "passed": False,
+                "feedback": "Quality criteria not met",
+                "criteria_results": [
+                    {
+                        "criterion": "Criterion 1",
+                        "passed": False,
+                        "feedback": "Did not meet requirements",
+                    }
+                ],
+            }
+        )
         print(json.dumps(response))
         return 0
 
     # Auto mode: parse prompt for markers
     if "FORCE_PASS" in prompt:
-        response = wrap_response({
-            "passed": True,
-            "feedback": "Forced pass via marker",
-            "criteria_results": [],
-        })
+        response = wrap_response(
+            {
+                "passed": True,
+                "feedback": "Forced pass via marker",
+                "criteria_results": [],
+            }
+        )
         print(json.dumps(response))
         return 0
 
     if "FORCE_FAIL" in prompt:
-        response = wrap_response({
-            "passed": False,
-            "feedback": "Forced fail via marker",
-            "criteria_results": [
-                {
-                    "criterion": "Test criterion",
-                    "passed": False,
-                    "feedback": "Failed due to FORCE_FAIL marker",
-                }
-            ],
-        })
+        response = wrap_response(
+            {
+                "passed": False,
+                "feedback": "Forced fail via marker",
+                "criteria_results": [
+                    {
+                        "criterion": "Test criterion",
+                        "passed": False,
+                        "feedback": "Failed due to FORCE_FAIL marker",
+                    }
+                ],
+            }
+        )
         print(json.dumps(response))
         return 0
 
diff --git a/tests/integration/test_quality_gate_integration.py b/tests/integration/test_quality_gate_integration.py
index 69019bc4..37a3ad8d 100644
--- a/tests/integration/test_quality_gate_integration.py
+++ b/tests/integration/test_quality_gate_integration.py
@@ -66,9 +66,7 @@ class TestRealClaudeIntegration:
     actual Claude Code CLI. If you mock them, you defeat their entire purpose.
     """
 
-    async def test_real_claude_evaluates_passing_criteria(
-        self, project_root: Path
-    ) -> None:
+    async def test_real_claude_evaluates_passing_criteria(self, project_root: Path) -> None:
         """Test that real Claude CLI correctly evaluates passing criteria.
 
         ⚠️  THIS TEST MUST USE THE REAL CLAUDE CLI - DO NOT MOCK ⚠️
@@ -106,13 +104,9 @@ async def test_real_claude_evaluates_passing_criteria(
         if not result.passed:
             # If it failed, at least verify we got proper feedback
             assert len(result.criteria_results) > 0
-            pytest.skip(
-                f"Model returned fail (may be model variability): {result.feedback}"
-            )
+            pytest.skip(f"Model returned fail (may be model variability): {result.feedback}")
 
-    async def test_real_claude_evaluates_failing_criteria(
-        self, project_root: Path
-    ) -> None:
+    async def test_real_claude_evaluates_failing_criteria(self, project_root: Path) -> None:
         """Test that real Claude CLI correctly identifies missing criteria.
 
         ⚠️  THIS TEST MUST USE THE REAL CLAUDE CLI - DO NOT MOCK ⚠️
diff --git a/tests/unit/mcp/test_quality_gate.py b/tests/unit/mcp/test_quality_gate.py
index fea0337d..8047cf25 100644
--- a/tests/unit/mcp/test_quality_gate.py
+++ b/tests/unit/mcp/test_quality_gate.py
@@ -186,18 +186,18 @@ async def test_build_payload_missing_file(
     def test_parse_response_valid_json(self, quality_gate: QualityGate) -> None:
         """Test parsing valid JSON response with structured_output."""
         # Claude CLI returns wrapper with structured_output field when using --json-schema
-        response = json.dumps({
-            "type": "result",
-            "subtype": "success",
-            "is_error": False,
-            "structured_output": {
-                "passed": True,
-                "feedback": "All good",
-                "criteria_results": [
-                    {"criterion": "Test 1", "passed": True, "feedback": None}
-                ]
+        response = json.dumps(
+            {
+                "type": "result",
+                "subtype": "success",
+                "is_error": False,
+                "structured_output": {
+                    "passed": True,
+                    "feedback": "All good",
+                    "criteria_results": [{"criterion": "Test 1", "passed": True, "feedback": None}],
+                },
             }
-        })
+        )
 
         result = quality_gate._parse_response(response)
 
@@ -207,18 +207,20 @@ def test_parse_response_valid_json(self, quality_gate: QualityGate) -> None:
 
     def test_parse_response_failed(self, quality_gate: QualityGate) -> None:
         """Test parsing failed evaluation response."""
-        response = json.dumps({
-            "type": "result",
-            "subtype": "success",
-            "is_error": False,
-            "structured_output": {
-                "passed": False,
-                "feedback": "Issues found",
-                "criteria_results": [
-                    {"criterion": "Test 1", "passed": False, "feedback": "Failed"}
-                ]
+        response = json.dumps(
+            {
+                "type": "result",
+                "subtype": "success",
+                "is_error": False,
+                "structured_output": {
+                    "passed": False,
+                    "feedback": "Issues found",
+                    "criteria_results": [
+                        {"criterion": "Test 1", "passed": False, "feedback": "Failed"}
+                    ],
+                },
             }
-        })
+        )
 
         result = quality_gate._parse_response(response)
 
@@ -236,24 +238,28 @@ def test_parse_response_invalid_json(self, quality_gate: QualityGate) -> None:
     def test_parse_response_missing_structured_output(self, quality_gate: QualityGate) -> None:
         """Test parsing response missing structured_output field raises error."""
         # Old format with 'result' field instead of 'structured_output'
-        wrapper_response = json.dumps({
-            "type": "result",
-            "subtype": "success",
-            "is_error": False,
-            "result": "Some text response",
-        })
+        wrapper_response = json.dumps(
+            {
+                "type": "result",
+                "subtype": "success",
+                "is_error": False,
+                "result": "Some text response",
+            }
+        )
 
         with pytest.raises(QualityGateError, match="missing 'structured_output'"):
             quality_gate._parse_response(wrapper_response)
 
     def test_parse_response_error_in_wrapper(self, quality_gate: QualityGate) -> None:
         """Test parsing response with is_error=True raises error."""
-        wrapper_response = json.dumps({
-            "type": "result",
-            "subtype": "error",
-            "is_error": True,
-            "result": "Something went wrong",
-        })
+        wrapper_response = json.dumps(
+            {
+                "type": "result",
+                "subtype": "error",
+                "is_error": True,
+                "result": "Something went wrong",
+            }
+        )
 
         with pytest.raises(QualityGateError, match="returned error"):
             quality_gate._parse_response(wrapper_response)
@@ -273,20 +279,22 @@ async def test_evaluate_no_criteria(
 
     def test_parse_criteria_results_structure(self, quality_gate: QualityGate) -> None:
         """Test that criteria results are properly parsed with multiple entries."""
-        response = json.dumps({
-            "type": "result",
-            "subtype": "success",
-            "is_error": False,
-            "structured_output": {
-                "passed": False,
-                "feedback": "Two criteria failed",
-                "criteria_results": [
-                    {"criterion": "First check", "passed": True, "feedback": None},
-                    {"criterion": "Second check", "passed": False, "feedback": "Missing data"},
-                    {"criterion": "Third check", "passed": False, "feedback": "Wrong format"},
-                ],
-            },
-        })
+        response = json.dumps(
+            {
+                "type": "result",
+                "subtype": "success",
+                "is_error": False,
+                "structured_output": {
+                    "passed": False,
+                    "feedback": "Two criteria failed",
+                    "criteria_results": [
+                        {"criterion": "First check", "passed": True, "feedback": None},
+                        {"criterion": "Second check", "passed": False, "feedback": "Missing data"},
+                        {"criterion": "Third check", "passed": False, "feedback": "Wrong format"},
+                    ],
+                },
+            }
+        )
 
         result = quality_gate._parse_response(response)
 
@@ -337,8 +345,7 @@ async def test_command_includes_json_schema(
         schema_json = self.get_command_arg(captured_cmd, "--json-schema")
         parsed_schema = json.loads(schema_json)
         assert parsed_schema == QUALITY_GATE_RESPONSE_SCHEMA, (
-            f"Schema mismatch. Expected:\n{QUALITY_GATE_RESPONSE_SCHEMA}\n"
-            f"Got:\n{parsed_schema}"
+            f"Schema mismatch. Expected:\n{QUALITY_GATE_RESPONSE_SCHEMA}\nGot:\n{parsed_schema}"
         )
 
     async def test_command_includes_system_prompt(

From 6194082c38f12e77a1babac0455a380daa00cb4c Mon Sep 17 00:00:00 2001
From: Noah Horton <noah@unsupervised.com>
Date: Thu, 5 Feb 2026 11:25:45 -0700
Subject: [PATCH 24/45] Update changelog for MCP variant branch

Document the major architectural changes including:
- New MCP server with checkpoint-based workflow execution
- Removal of the rules system
- Simplified skill generation
- New deepwork_jobs steps

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
---
 CHANGELOG.md | 79 ++++++++++++++++++++++++++++++++--------------------
 1 file changed, 49 insertions(+), 30 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 4f9c4dc4..f9eb545e 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -8,40 +8,59 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 ## [Unreleased]
 
 ### Added
-- Concurrent steps support in workflow definitions
-  - Workflows can now specify nested arrays of step IDs to indicate steps that can run in parallel
-  - Example: `steps: [setup, [task_a, task_b, task_c], finalize]` runs task_a/b/c concurrently
-  - Single-item arrays indicate a step with multiple parallel instances (e.g., `[fetch_campaign_data]` runs for each campaign)
-  - New `WorkflowStepEntry` dataclass in parser for sequential/concurrent step groups
-  - Meta-skill template renders concurrent steps as "Background Task 1/2/3" with clear instructions
-  - Added `get_step_entry_position_in_workflow()` and `get_concurrent_step_info()` methods to JobDefinition
-  - Full backward compatibility: existing workflows with simple step arrays continue to work
-- Agent delegation field for job.yml steps
-  - New `agent` field on steps allows specifying an agent type (e.g., `agent: general-purpose`)
-  - When `agent` is set, generated Claude Code skills automatically include `context: fork` and `agent:` in frontmatter
-  - Enables steps to delegate execution to specific agent types
-  - Updated `deepwork_jobs.define` step instructions with agent delegation guidance
-  - Updated `job_spec.md` doc spec with "Agent Delegation" section
-- Explicit workflow definitions in job.yml for distinguishing multi-step workflows from standalone skills
-  - New `workflows` section in job.yml with `name`, `summary`, and ordered `steps` array
-  - Workflows are shown separately from standalone skills in generated meta-skills
-  - Step skills now display workflow context (e.g., "Step 2/3 in new_job workflow")
-  - Standalone skills are clearly marked as "can be run anytime"
-  - Backward compatible: jobs without `workflows` section use dependency-based detection
+- **MCP Server Architecture** - New Model Context Protocol server for checkpoint-based workflow execution
+  - `deepwork serve` command starts the MCP server with stdio or SSE transport
+  - Three MCP tools: `get_workflows`, `start_workflow`, `finished_step`
+  - Session state persisted to `.deepwork/tmp/session_[id].json` for resumability
+  - Quality gate evaluates step outputs against quality criteria using Claude Code subprocess
+  - Nested workflow support with stack-based execution model
+  - `abort_workflow` tool for canceling workflows and returning to parent
+  - Comprehensive Pydantic schemas for all tool inputs/outputs in `src/deepwork/mcp/schemas.py`
+  - Documentation in `doc/mcp_interface.md` and `doc/architecture.md` Part 4
+- New `deepwork_jobs` steps for MCP-based workflow management
+  - `iterate` - Quick iteration on existing jobs without full define/implement cycle
+  - `errata` - Document known issues and quirks discovered during job execution
+  - `test` - Run manual tests to validate job behavior
+  - `fix_jobs` - Repair malformed job.yml files
+  - `fix_settings` - Repair platform settings files
+- JSON Schema for job.yml validation (`src/deepwork/schemas/job.schema.json`)
+- Reference documentation for calling Claude in print mode (`doc/reference/calling_claude_in_print_mode.md`)
+- Migrated to uv2nix for reproducible Python builds in flake.nix
 
 ### Changed
-- Skill templates now show workflow-aware progress (e.g., "new_job step 2/3 complete")
-- Meta-skill template reorganized to show "Workflows" and "Standalone Skills" sections separately
-- Updated `deepwork_jobs` standard job to v1.0.0 with explicit `new_job` workflow
-- SessionStart hook now skips non-initial sessions (resume, compact/clear) by checking the `source` field in stdin JSON, reducing noise and redundant checks
-
-### Fixed
-- Fixed skill template generating malformed YAML frontmatter with fields concatenated on single lines
-  - Removed over-aggressive `{%-` whitespace stripping from Jinja template
-  - Fields like `user-invocable` and `hooks` now render on proper separate lines
-  - Affects `src/deepwork/templates/claude/skill-job-step.md.jinja`
+- **BREAKING**: Simplified skill generation to single `/deepwork` entry point skill
+  - The generator now produces only `skill-deepwork.md.jinja` that directs agents to MCP tools
+  - Removed individual step skill generation (`skill-job-step.md.jinja`, `skill-job-meta.md.jinja`)
+  - Workflow orchestration moved from skill files to MCP server
+- **BREAKING**: Workflow execution now happens through MCP tool calls instead of slash commands
+  - Agents call `start_workflow` → execute step → `finished_step` → repeat
+  - Quality gates enforce output requirements before proceeding
+- Streamlined `deepwork_jobs.define` and `deepwork_jobs.implement` for MCP workflow
+- Updated `deepwork_jobs.learn` with simplified instructions
+- Simplified adapter templates - removed complex skill templates
+- MCP server registered in `.claude/settings.json` during install
 
 ### Removed
+- **BREAKING**: Entire rules system removed
+  - Removed `rules_parser.py`, `rules_queue.py`, `pattern_matcher.py`, `rules_check.py`
+  - Removed `.deepwork/rules/` directory and all rule definition files
+  - Removed `command_executor.py` for command action execution
+  - Removed `deepwork_rules` standard job and `/deepwork_rules.define` skill
+  - Removed rules-related hooks (`user_prompt_submit.sh`, `capture_prompt_work_tree.sh`)
+  - Removed rules documentation (`doc/rules_syntax.md`, `doc/rules_system_design.md`)
+- Removed per-step skill generation templates and logic
+- Removed `commit` job from library (was example job)
+- Removed `manual_tests/` directory and `manual_tests` job
+- Removed `add_platform` bespoke job
+- Removed many hook scripts that are no longer needed with MCP architecture
+- Removed Gemini per-step skill templates (`.gemini/skills/` now only has entry point)
+
+### Migration Guide
+- Run `deepwork install --platform claude` to get the new MCP server configuration
+- Workflows are now executed via `/deepwork` which uses MCP tools internally
+- Rules system is completely removed - consider implementing validation logic in quality criteria instead
+- Existing job definitions still work but are executed through MCP checkpoints
+- The `.deepwork/rules/` directory can be safely deleted
 
 ## [0.5.1] - 2026-01-24
 

From b561e2a113e29f68d770a6975b2f26484b16dc75 Mon Sep 17 00:00:00 2001
From: Noah Horton <noah@unsupervised.com>
Date: Thu, 5 Feb 2026 11:33:42 -0700
Subject: [PATCH 25/45] remove update job

---
 .../jobs/deepwork_jobs/steps/fix_jobs.md      |  2 +-
 .deepwork/jobs/update/job.yml                 | 53 --------------
 .deepwork/jobs/update/steps/job.md            | 73 -------------------
 AGENTS.md                                     |  2 +-
 CHANGELOG.md                                  | 39 +++-------
 CONTRIBUTING.md                               | 10 +--
 claude.md                                     |  5 +-
 doc/nix-flake.md                              |  2 +-
 .../deepwork_jobs/steps/fix_jobs.md           |  5 +-
 9 files changed, 24 insertions(+), 167 deletions(-)
 delete mode 100644 .deepwork/jobs/update/job.yml
 delete mode 100644 .deepwork/jobs/update/steps/job.md

diff --git a/.deepwork/jobs/deepwork_jobs/steps/fix_jobs.md b/.deepwork/jobs/deepwork_jobs/steps/fix_jobs.md
index 8d34468a..c89e74a9 100644
--- a/.deepwork/jobs/deepwork_jobs/steps/fix_jobs.md
+++ b/.deepwork/jobs/deepwork_jobs/steps/fix_jobs.md
@@ -203,6 +203,6 @@ For each job in `.deepwork/jobs/`, check:
 
 ## Important Notes
 
-1. **Don't modify standard jobs directly** - If `deepwork_jobs` is out of date, run `deepwork install --platform claude` to get the latest version
+1. **Don't modify standard jobs directly** - If `deepwork_jobs` is out of date, run `deepwork install` to get the latest version
 2. **Preserve custom logic** - When migrating hooks, preserve the prompt content
 3. **Test after changes** - Validate YAML syntax after each job fix to catch errors early
diff --git a/.deepwork/jobs/update/job.yml b/.deepwork/jobs/update/job.yml
deleted file mode 100644
index f437c821..00000000
--- a/.deepwork/jobs/update/job.yml
+++ /dev/null
@@ -1,53 +0,0 @@
-# yaml-language-server: $schema=.deepwork/schemas/job.schema.json
-name: update
-version: "1.4.0"
-summary: "Updates DeepWork standard jobs in src/ and syncs to installed locations. Use when modifying deepwork_jobs."
-description: |
-  A workflow for maintaining standard jobs bundled with DeepWork. Standard jobs
-  (like `deepwork_jobs`) are source-controlled in
-  `src/deepwork/standard_jobs/` and must be edited there—never in `.deepwork/jobs/`
-  or `.claude/commands/` directly.
-
-  This job guides you through:
-  1. Identifying which standard job(s) to update from conversation context
-  2. Making changes in the correct source location (`src/deepwork/standard_jobs/[job_name]/`)
-  3. Running `deepwork install` to propagate changes to `.deepwork/` and command directories
-  4. Verifying the sync completed successfully
-
-  Use this job whenever you need to modify job.yml files, step instructions, or hooks
-  for any standard job in the DeepWork repository.
-
-workflows:
-  - name: update
-    summary: "Update standard job source files and sync to installed locations"
-    steps:
-      - job
-
-changelog:
-  - version: "1.4.0"
-    changes: "Added workflow for MCP compatibility; migrated stop_hooks to quality_criteria"
-  - version: "1.0.0"
-    changes: "Initial job creation"
-  - version: "1.1.0"
-    changes: "Removed sync_verification.md output requirement"
-  - version: "1.2.0"
-    changes: "Added nominal output for doc spec compliance (files_synced)"
-  - version: "1.3.0"
-    changes: "Improved skill descriptions with third-person voice and 'Use when...' triggers for better discoverability"
-
-steps:
-  - id: job
-    name: "Update Standard Job"
-    description: "Edits standard job source files in src/ and runs deepwork install to sync changes. Use when updating job.yml or step instructions."
-    instructions_file: steps/job.md
-    inputs:
-      - name: job_context
-        description: "Determine from conversation context which standard job(s) to update and what changes are needed"
-    outputs:
-      - files_synced  # implicit state: source files synced to installed locations
-    dependencies: []
-    quality_criteria:
-      - "**Source Location**: Were changes made in `src/deepwork/standard_jobs/[job_name]/` (NOT in `.deepwork/jobs/`)?"
-      - "**Install Complete**: Was `deepwork install --platform claude` run successfully?"
-      - "**Files Synced**: Do files in `.deepwork/jobs/` match the source files?"
-      - "**Commands Regenerated**: Were command files in `.claude/commands/` regenerated?"
diff --git a/.deepwork/jobs/update/steps/job.md b/.deepwork/jobs/update/steps/job.md
deleted file mode 100644
index b226b4f6..00000000
--- a/.deepwork/jobs/update/steps/job.md
+++ /dev/null
@@ -1,73 +0,0 @@
-# Update Standard Job
-
-## Objective
-
-Edit standard job source files in `src/deepwork/standard_jobs/` and sync changes to installed locations.
-
-## Task
-
-When modifying a standard job in the DeepWork repository, this step ensures changes are made in the correct location and properly propagated.
-
-### Important: Source of Truth
-
-Standard jobs exist in THREE locations, but only ONE is the source of truth:
-
-| Location | Purpose | Editable? |
-|----------|---------|-----------|
-| `src/deepwork/standard_jobs/[job]/` | **Source of truth** | **YES** |
-| `.deepwork/jobs/[job]/` | Installed copy | NO - overwritten by install |
-| `.claude/commands/[job].[step].md` | Generated commands | NO - regenerated by sync |
-
-**NEVER edit files in `.deepwork/jobs/` or `.claude/commands/` for standard jobs!**
-
-### Process
-
-#### 1. Identify the Standard Job to Update
-
-From conversation context, determine:
-- Which standard job needs updating (e.g., `deepwork_jobs`, `deepwork_rules`)
-- What changes are needed (job.yml, step instructions, hooks, etc.)
-
-Current standard jobs:
-```bash
-ls src/deepwork/standard_jobs/
-```
-
-#### 2. Make Changes in Source Location
-
-```
-src/deepwork/standard_jobs/[job_name]/
-├── job.yml              # Job definition
-├── steps/               # Step instruction files
-├── hooks/               # Hook scripts
-└── templates/           # Templates
-```
-
-#### 3. Run DeepWork Install
-
-```bash
-deepwork install --platform claude
-```
-
-For Gemini: `deepwork install --platform gemini`
-
-#### 4. Verify the Sync
-
-```bash
-# Verify job.yml
-diff src/deepwork/standard_jobs/[job_name]/job.yml .deepwork/jobs/[job_name]/job.yml
-
-# Verify step files
-diff -r src/deepwork/standard_jobs/[job_name]/steps/ .deepwork/jobs/[job_name]/steps/
-
-# Check commands regenerated
-ls -la .claude/commands/[job_name].*.md
-```
-
-## Quality Criteria
-
-- Changes made ONLY in `src/deepwork/standard_jobs/[job_name]/`
-- `deepwork install --platform claude` executed successfully
-- Files in `.deepwork/jobs/` match source
-- Command files regenerated
-- When all criteria are met, include `<promise>✓ Quality Criteria Met</promise>`
diff --git a/AGENTS.md b/AGENTS.md
index b4ee13c6..d0784788 100644
--- a/AGENTS.md
+++ b/AGENTS.md
@@ -18,7 +18,7 @@ When creating or modifying jobs in this repository, you MUST understand which ty
 **Editing rules**:
 - Source of truth is ALWAYS in `src/deepwork/standard_jobs/`
 - NEVER edit the installed copies in `.deepwork/jobs/` directly
-- After editing, run `deepwork install --platform claude` to sync
+- After editing, run `deepwork install` to sync
 
 ### 2. Library Jobs (`library/jobs/`)
 
diff --git a/CHANGELOG.md b/CHANGELOG.md
index f9eb545e..cb7fe8b3 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -7,47 +7,32 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ## [Unreleased]
 
+### Added
+
+### Changed
+
+### Fixed
+
+## [0.7.0] - 2026-02-05
+
 ### Added
 - **MCP Server Architecture** - New Model Context Protocol server for checkpoint-based workflow execution
-  - `deepwork serve` command starts the MCP server with stdio or SSE transport
-  - Three MCP tools: `get_workflows`, `start_workflow`, `finished_step`
-  - Session state persisted to `.deepwork/tmp/session_[id].json` for resumability
-  - Quality gate evaluates step outputs against quality criteria using Claude Code subprocess
-  - Nested workflow support with stack-based execution model
-  - `abort_workflow` tool for canceling workflows and returning to parent
-  - Comprehensive Pydantic schemas for all tool inputs/outputs in `src/deepwork/mcp/schemas.py`
-  - Documentation in `doc/mcp_interface.md` and `doc/architecture.md` Part 4
-- New `deepwork_jobs` steps for MCP-based workflow management
-  - `iterate` - Quick iteration on existing jobs without full define/implement cycle
-  - `errata` - Document known issues and quirks discovered during job execution
-  - `test` - Run manual tests to validate job behavior
-  - `fix_jobs` - Repair malformed job.yml files
-  - `fix_settings` - Repair platform settings files
+- Improved `deepwork_jobs` steps for workflow management
 - JSON Schema for job.yml validation (`src/deepwork/schemas/job.schema.json`)
 - Reference documentation for calling Claude in print mode (`doc/reference/calling_claude_in_print_mode.md`)
 - Migrated to uv2nix for reproducible Python builds in flake.nix
 
 ### Changed
 - **BREAKING**: Simplified skill generation to single `/deepwork` entry point skill
-  - The generator now produces only `skill-deepwork.md.jinja` that directs agents to MCP tools
-  - Removed individual step skill generation (`skill-job-step.md.jinja`, `skill-job-meta.md.jinja`)
-  - Workflow orchestration moved from skill files to MCP server
 - **BREAKING**: Workflow execution now happens through MCP tool calls instead of slash commands
-  - Agents call `start_workflow` → execute step → `finished_step` → repeat
-  - Quality gates enforce output requirements before proceeding
 - Streamlined `deepwork_jobs.define` and `deepwork_jobs.implement` for MCP workflow
 - Updated `deepwork_jobs.learn` with simplified instructions
 - Simplified adapter templates - removed complex skill templates
 - MCP server registered in `.claude/settings.json` during install
 
 ### Removed
-- **BREAKING**: Entire rules system removed
-  - Removed `rules_parser.py`, `rules_queue.py`, `pattern_matcher.py`, `rules_check.py`
-  - Removed `.deepwork/rules/` directory and all rule definition files
-  - Removed `command_executor.py` for command action execution
-  - Removed `deepwork_rules` standard job and `/deepwork_rules.define` skill
-  - Removed rules-related hooks (`user_prompt_submit.sh`, `capture_prompt_work_tree.sh`)
-  - Removed rules documentation (`doc/rules_syntax.md`, `doc/rules_system_design.md`)
+- **BREAKING**: Rules system removed
+- **BREAKING**: Removed per-step skill generation templates and logic
 - Removed per-step skill generation templates and logic
 - Removed `commit` job from library (was example job)
 - Removed `manual_tests/` directory and `manual_tests` job
@@ -56,7 +41,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - Removed Gemini per-step skill templates (`.gemini/skills/` now only has entry point)
 
 ### Migration Guide
-- Run `deepwork install --platform claude` to get the new MCP server configuration
+- Run `deepwork install` to get the new MCP server configuration
 - Workflows are now executed via `/deepwork` which uses MCP tools internally
 - Rules system is completely removed - consider implementing validation logic in quality criteria instead
 - Existing job definitions still work but are executed through MCP checkpoints
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index a86ffe4a..0bdc3f33 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -259,7 +259,7 @@ Since you installed DeepWork in editable mode, the `deepwork` command uses your
 
 ```bash
 # Run the install command
-deepwork install --platform claude
+deepwork install
 
 # Verify installation
 ls -la .deepwork/
@@ -273,7 +273,7 @@ Any changes you make to the DeepWork source code will be immediately reflected:
 ```bash
 # Make changes in ~/deepwork/src/deepwork/...
 # Then test in your test project
-deepwork install --platform claude
+deepwork install
 
 # Or test the CLI directly
 deepwork --help
@@ -306,7 +306,7 @@ nix run github:Unsupervisedcom/deepwork/feature-branch-name -- --help
 nix develop github:Unsupervisedcom/deepwork/feature-branch-name
 
 # Run a specific command from a feature branch
-nix develop github:Unsupervisedcom/deepwork/feature-branch-name --command deepwork install --platform claude
+nix develop github:Unsupervisedcom/deepwork/feature-branch-name --command deepwork install
 
 # Test against a specific commit
 nix run github:Unsupervisedcom/deepwork/abc1234 -- --version
@@ -321,7 +321,7 @@ For example, to test a branch named `feat/new-parser`:
 
 ```bash
 # Quick test of the CLI
-nix run github:Unsupervisedcom/deepwork/feat/new-parser -- install --platform claude --dry-run
+nix run github:Unsupervisedcom/deepwork/feat/new-parser -- install --dry-run
 
 # Or enter a full development shell to run tests and run a specific test
 nix develop github:Unsupervisedcom/deepwork/feat/new-parser --command pytest tests/unit/core/test_parser.py -v
@@ -466,7 +466,7 @@ mypy src/
 ```bash
 # Create or use a test project
 cd ~/test-project/
-deepwork install --platform claude
+deepwork install
 
 # Verify your changes work as expected
 ```
diff --git a/claude.md b/claude.md
index 56fdd1bc..1a54ee6e 100644
--- a/claude.md
+++ b/claude.md
@@ -83,8 +83,7 @@ When running in Claude Code on the web (not local installations), the `deepwork`
 pip install -e .
 
 # Then run commands normally
-deepwork install --platform claude
-deepwork sync
+deepwork install
 ```
 
 **Note**: In web environments, you may also need to install dependencies like `jsonschema`, `pyyaml`, `gitpython`, `jinja2`, and `click` if they're not already available.
@@ -211,7 +210,7 @@ Instead, follow this workflow:
    - `steps/*.md` - Step instruction files
    - `hooks/*` - Any hook scripts
 
-2. **Run `deepwork install --platform claude`** to sync changes to `.deepwork/jobs/` and `.claude/skills/`
+2. **Run `deepwork install`** to sync changes to `.deepwork/jobs/` and `.claude/skills/`
 
 3. **Verify** the changes propagated correctly to all locations
 
diff --git a/doc/nix-flake.md b/doc/nix-flake.md
index 26bf82ec..246d96ad 100644
--- a/doc/nix-flake.md
+++ b/doc/nix-flake.md
@@ -91,7 +91,7 @@ deepwork --help
 nix run github:Unsupervisedcom/deepwork -- --help
 
 # Run a specific command
-nix run github:Unsupervisedcom/deepwork -- install --platform claude
+nix run github:Unsupervisedcom/deepwork -- install
 ```
 
 ### Building the Package
diff --git a/src/deepwork/standard_jobs/deepwork_jobs/steps/fix_jobs.md b/src/deepwork/standard_jobs/deepwork_jobs/steps/fix_jobs.md
index 8d34468a..7f3675a5 100644
--- a/src/deepwork/standard_jobs/deepwork_jobs/steps/fix_jobs.md
+++ b/src/deepwork/standard_jobs/deepwork_jobs/steps/fix_jobs.md
@@ -203,6 +203,5 @@ For each job in `.deepwork/jobs/`, check:
 
 ## Important Notes
 
-1. **Don't modify standard jobs directly** - If `deepwork_jobs` is out of date, run `deepwork install --platform claude` to get the latest version
-2. **Preserve custom logic** - When migrating hooks, preserve the prompt content
-3. **Test after changes** - Validate YAML syntax after each job fix to catch errors early
+1. **Preserve custom logic** - When migrating hooks, preserve the prompt content
+2. **Test after changes** - Validate YAML syntax after each job fix to catch errors early

From 48e23fee03bf525aa9c2c5fac5890e10099cef21 Mon Sep 17 00:00:00 2001
From: Noah Horton <noah@unsupervised.com>
Date: Thu, 5 Feb 2026 12:54:40 -0700
Subject: [PATCH 26/45] Fix release version to prerelease (0.7.0a1)

Mark 0.7.0 as alpha prerelease so that `uv add deepwork` continues
to install the stable 0.5.1 by default, requiring explicit version
specification for the new alpha.

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
---
 pyproject.toml           | 2 +-
 src/deepwork/__init__.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index bbf974b7..352bd845 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "deepwork"
-version = "0.7.0"
+version = "0.7.0a1"
 description = "Framework for enabling AI agents to perform complex, multi-step work tasks"
 readme = "README.md"
 requires-python = ">=3.11"
diff --git a/src/deepwork/__init__.py b/src/deepwork/__init__.py
index 0c85557f..ce548d43 100644
--- a/src/deepwork/__init__.py
+++ b/src/deepwork/__init__.py
@@ -1,6 +1,6 @@
 """DeepWork - Framework for enabling AI agents to perform complex, multi-step work tasks."""
 
-__version__ = "0.7.0"
+__version__ = "0.7.0a1"
 __author__ = "DeepWork Contributors"
 
 __all__ = [

From 960acaacad8e5c965da68c0c64504ccd46fa115b Mon Sep 17 00:00:00 2001
From: Noah Horton <noah@unsupervised.com>
Date: Thu, 5 Feb 2026 13:41:19 -0700
Subject: [PATCH 27/45] nix fixed

---
 .deepwork/jobs/deepwork_jobs/job.yml          |   1 +
 .deepwork/jobs/deepwork_jobs/steps/errata.md  |  18 +-
 .../jobs/deepwork_jobs/steps/fix_jobs.md      |   5 +-
 .github/workflows/update-claude-code.yml      |  63 ----
 flake.lock                                    |  71 +++-
 flake.nix                                     |  16 +-
 nix/claude-code/package-lock.json             | 314 ------------------
 nix/claude-code/package.nix                   |  78 -----
 nix/claude-code/update.sh                     |  49 ---
 nix/update                                    |  18 -
 .../standard_jobs/deepwork_jobs/job.yml       |   1 +
 .../deepwork_jobs/steps/errata.md             |  18 +-
 12 files changed, 114 insertions(+), 538 deletions(-)
 delete mode 100644 .github/workflows/update-claude-code.yml
 delete mode 100644 nix/claude-code/package-lock.json
 delete mode 100644 nix/claude-code/package.nix
 delete mode 100755 nix/claude-code/update.sh
 delete mode 100755 nix/update

diff --git a/.deepwork/jobs/deepwork_jobs/job.yml b/.deepwork/jobs/deepwork_jobs/job.yml
index 1aea4e0d..cb2424e4 100644
--- a/.deepwork/jobs/deepwork_jobs/job.yml
+++ b/.deepwork/jobs/deepwork_jobs/job.yml
@@ -222,4 +222,5 @@ steps:
       - "**Rules Job Removed**: Is `.deepwork/jobs/deepwork_rules/` removed if present?"
       - "**Config Version Updated**: Is `.deepwork/config.yml` using current version format?"
       - "**Summary Provided**: Is a repair_summary.md file created documenting all changes made?"
+      - "**DeepWork Re-installed**: Was `deepwork install` run after cleanup, and does it complete without errors?"
       - "**Git Status Clean**: Are changes ready to be committed (no untracked garbage files)?"
diff --git a/.deepwork/jobs/deepwork_jobs/steps/errata.md b/.deepwork/jobs/deepwork_jobs/steps/errata.md
index c71d62be..10245c62 100644
--- a/.deepwork/jobs/deepwork_jobs/steps/errata.md
+++ b/.deepwork/jobs/deepwork_jobs/steps/errata.md
@@ -134,7 +134,23 @@ Check for and remove other obsolete files:
 | `.claude/commands/` | Generated commands | Keep (current system) |
 | `.claude/settings.local.json` | Local overrides | Keep (user settings) |
 
-### Step 6: Verify Git Status
+### Step 6: Re-install DeepWork
+
+After all cleanup is complete, re-run `deepwork install` to ensure configurations are current and consistent:
+
+```bash
+deepwork install
+```
+
+**Then verify:**
+1. Check that `.deepwork/config.yml` is valid and up to date
+2. Check that `.claude/skills/deepwork/` exists and contains the expected skill entry point
+3. Check that all jobs in `.deepwork/jobs/` have valid `job.yml` files
+4. Run `deepwork install` a second time and confirm the output is clean (no errors or warnings)
+
+If any issues are found, fix them before proceeding. The goal is a clean, working DeepWork installation with no residual problems from the repair process.
+
+### Step 7: Verify Git Status
 
 Check that the cleanup hasn't left untracked garbage:
 
diff --git a/.deepwork/jobs/deepwork_jobs/steps/fix_jobs.md b/.deepwork/jobs/deepwork_jobs/steps/fix_jobs.md
index c89e74a9..7f3675a5 100644
--- a/.deepwork/jobs/deepwork_jobs/steps/fix_jobs.md
+++ b/.deepwork/jobs/deepwork_jobs/steps/fix_jobs.md
@@ -203,6 +203,5 @@ For each job in `.deepwork/jobs/`, check:
 
 ## Important Notes
 
-1. **Don't modify standard jobs directly** - If `deepwork_jobs` is out of date, run `deepwork install` to get the latest version
-2. **Preserve custom logic** - When migrating hooks, preserve the prompt content
-3. **Test after changes** - Validate YAML syntax after each job fix to catch errors early
+1. **Preserve custom logic** - When migrating hooks, preserve the prompt content
+2. **Test after changes** - Validate YAML syntax after each job fix to catch errors early
diff --git a/.github/workflows/update-claude-code.yml b/.github/workflows/update-claude-code.yml
deleted file mode 100644
index 99dbbf2c..00000000
--- a/.github/workflows/update-claude-code.yml
+++ /dev/null
@@ -1,63 +0,0 @@
-name: Update Claude Code
-
-on:
-  schedule:
-    # Run daily at 6 AM UTC
-    - cron: '0 6 * * *'
-  workflow_dispatch: # Allow manual trigger
-
-jobs:
-  update:
-    runs-on: ubuntu-latest
-    permissions:
-      contents: write
-      pull-requests: write
-
-    steps:
-      - name: Checkout repository
-        uses: actions/checkout@v4
-
-      - name: Install Nix
-        uses: cachix/install-nix-action@v30
-        with:
-          nix_path: nixpkgs=channel:nixos-unstable
-          extra_nix_config: |
-            experimental-features = nix-command flakes
-
-      - name: Update claude-code package
-        id: update
-        run: |
-          # Script exits 0 if already at latest (no changes to commit)
-          ./nix/claude-code/update.sh
-
-          # Capture version for PR title
-          VERSION=$(grep 'version = "' nix/claude-code/package.nix | head -1 | sed 's/.*version = "\([^"]*\)".*/\1/')
-          echo "version=$VERSION" >> $GITHUB_OUTPUT
-
-      - name: Update flake.lock
-        run: nix flake update
-
-      - name: Verify build
-        run: nix develop --command claude --version
-
-      - name: Create Pull Request
-        uses: peter-evans/create-pull-request@v7
-        with:
-          token: ${{ secrets.GITHUB_TOKEN }}
-          commit-message: "chore(deps): update claude-code to ${{ steps.update.outputs.version }}"
-          title: "chore(deps): update claude-code to ${{ steps.update.outputs.version }}"
-          body: |
-            Automated update of claude-code package.
-
-            **Changes:**
-            - claude-code updated to ${{ steps.update.outputs.version }}
-            - Updated flake.lock
-
-            **Verification:**
-            - Package builds successfully
-            - `claude --version` returns expected version
-
-            ---
-            *This PR was automatically created by the update-claude-code workflow.*
-          branch: update-claude-code
-          delete-branch: true
diff --git a/flake.lock b/flake.lock
index 35a56a41..9a416dfc 100644
--- a/flake.lock
+++ b/flake.lock
@@ -1,6 +1,59 @@
 {
   "nodes": {
+    "claude-code-nix": {
+      "inputs": {
+        "flake-utils": "flake-utils",
+        "nixpkgs": "nixpkgs"
+      },
+      "locked": {
+        "lastModified": 1770315205,
+        "narHash": "sha256-yOYprNUvMHRBC7EfmhNOYYLqNm43cLtydV39ITnCfZk=",
+        "owner": "sadjow",
+        "repo": "claude-code-nix",
+        "rev": "b774ffcdcd9987f4a2e6e3809130d04438e29a13",
+        "type": "github"
+      },
+      "original": {
+        "owner": "sadjow",
+        "repo": "claude-code-nix",
+        "type": "github"
+      }
+    },
+    "flake-utils": {
+      "inputs": {
+        "systems": "systems"
+      },
+      "locked": {
+        "lastModified": 1731533236,
+        "narHash": "sha256-l0KFg5HjrsfsO/JpG+r7fRrqm12kzFHyUHqHCVpMMbI=",
+        "owner": "numtide",
+        "repo": "flake-utils",
+        "rev": "11707dc2f618dd54ca8739b309ec4fc024de578b",
+        "type": "github"
+      },
+      "original": {
+        "owner": "numtide",
+        "repo": "flake-utils",
+        "type": "github"
+      }
+    },
     "nixpkgs": {
+      "locked": {
+        "lastModified": 1770169770,
+        "narHash": "sha256-awR8qIwJxJJiOmcEGgP2KUqYmHG4v/z8XpL9z8FnT1A=",
+        "owner": "NixOS",
+        "repo": "nixpkgs",
+        "rev": "aa290c9891fa4ebe88f8889e59633d20cc06a5f2",
+        "type": "github"
+      },
+      "original": {
+        "owner": "NixOS",
+        "ref": "nixpkgs-unstable",
+        "repo": "nixpkgs",
+        "type": "github"
+      }
+    },
+    "nixpkgs_2": {
       "locked": {
         "lastModified": 1770197578,
         "narHash": "sha256-AYqlWrX09+HvGs8zM6ebZ1pwUqjkfpnv8mewYwAo+iM=",
@@ -64,12 +117,28 @@
     },
     "root": {
       "inputs": {
-        "nixpkgs": "nixpkgs",
+        "claude-code-nix": "claude-code-nix",
+        "nixpkgs": "nixpkgs_2",
         "pyproject-build-systems": "pyproject-build-systems",
         "pyproject-nix": "pyproject-nix",
         "uv2nix": "uv2nix"
       }
     },
+    "systems": {
+      "locked": {
+        "lastModified": 1681028828,
+        "narHash": "sha256-Vy1rq5AaRuLzOxct8nz4T6wlgyUR7zLU309k9mBC768=",
+        "owner": "nix-systems",
+        "repo": "default",
+        "rev": "da67096a3b9bf56a91d16901293e51ba5b49a27e",
+        "type": "github"
+      },
+      "original": {
+        "owner": "nix-systems",
+        "repo": "default",
+        "type": "github"
+      }
+    },
     "uv2nix": {
       "inputs": {
         "nixpkgs": [
diff --git a/flake.nix b/flake.nix
index c2740cf4..a7a2150f 100644
--- a/flake.nix
+++ b/flake.nix
@@ -4,6 +4,9 @@
   inputs = {
     nixpkgs.url = "github:NixOS/nixpkgs/nixos-unstable";
 
+    # Claude Code with pre-built native binaries (hourly updates)
+    claude-code-nix.url = "github:sadjow/claude-code-nix";
+
     pyproject-nix = {
       url = "github:pyproject-nix/pyproject.nix";
       inputs.nixpkgs.follows = "nixpkgs";
@@ -23,7 +26,7 @@
     };
   };
 
-  outputs = { self, nixpkgs, pyproject-nix, uv2nix, pyproject-build-systems, ... }:
+  outputs = { self, nixpkgs, claude-code-nix, pyproject-nix, uv2nix, pyproject-build-systems, ... }:
     let
       inherit (nixpkgs) lib;
 
@@ -64,9 +67,6 @@
             config.allowUnfree = true;
           };
 
-          # Local claude-code package (update via nix/claude-code/update.sh)
-          claude-code = pkgs.callPackage ./nix/claude-code/package.nix { };
-
           # Python set with editable overlay for development
           pythonSet = pythonSets.${system}.overrideScope editableOverlay;
 
@@ -80,7 +80,7 @@
               pkgs.uv
               pkgs.git
               pkgs.jq
-              claude-code
+              claude-code-nix.packages.${system}.default
               pkgs.gh
             ];
 
@@ -97,9 +97,6 @@
               unset PYTHONPATH
               export REPO_ROOT=$(git rev-parse --show-toplevel)
 
-              # Add nix/ scripts to PATH (for 'update' command)
-              export PATH="$PWD/nix:$PATH"
-
               # Only show welcome message in interactive shells
               if [[ $- == *i* ]]; then
                 echo ""
@@ -113,9 +110,8 @@
                 echo "  pytest             Run tests"
                 echo "  ruff check src/    Lint code"
                 echo "  mypy src/          Type check"
-                echo "  claude-code        Claude Code CLI"
+                echo "  claude             Claude Code CLI"
                 echo "  gh                 GitHub CLI"
-                echo "  update             Update claude-code and flake inputs"
                 echo ""
               fi
             '';
diff --git a/nix/claude-code/package-lock.json b/nix/claude-code/package-lock.json
deleted file mode 100644
index f9766e4c..00000000
--- a/nix/claude-code/package-lock.json
+++ /dev/null
@@ -1,314 +0,0 @@
-{
-  "name": "@anthropic-ai/claude-code",
-  "version": "2.1.15",
-  "lockfileVersion": 3,
-  "requires": true,
-  "packages": {
-    "": {
-      "name": "@anthropic-ai/claude-code",
-      "version": "2.1.15",
-      "license": "SEE LICENSE IN README.md",
-      "bin": {
-        "claude": "cli.js"
-      },
-      "engines": {
-        "node": ">=18.0.0"
-      },
-      "optionalDependencies": {
-        "@img/sharp-darwin-arm64": "^0.33.5",
-        "@img/sharp-darwin-x64": "^0.33.5",
-        "@img/sharp-linux-arm": "^0.33.5",
-        "@img/sharp-linux-arm64": "^0.33.5",
-        "@img/sharp-linux-x64": "^0.33.5",
-        "@img/sharp-linuxmusl-arm64": "^0.33.5",
-        "@img/sharp-linuxmusl-x64": "^0.33.5",
-        "@img/sharp-win32-x64": "^0.33.5"
-      }
-    },
-    "node_modules/@img/sharp-darwin-arm64": {
-      "version": "0.33.5",
-      "resolved": "https://registry.npmjs.org/@img/sharp-darwin-arm64/-/sharp-darwin-arm64-0.33.5.tgz",
-      "integrity": "sha512-UT4p+iz/2H4twwAoLCqfA9UH5pI6DggwKEGuaPy7nCVQ8ZsiY5PIcrRvD1DzuY3qYL07NtIQcWnBSY/heikIFQ==",
-      "cpu": [
-        "arm64"
-      ],
-      "license": "Apache-2.0",
-      "optional": true,
-      "os": [
-        "darwin"
-      ],
-      "engines": {
-        "node": "^18.17.0 || ^20.3.0 || >=21.0.0"
-      },
-      "funding": {
-        "url": "https://opencollective.com/libvips"
-      },
-      "optionalDependencies": {
-        "@img/sharp-libvips-darwin-arm64": "1.0.4"
-      }
-    },
-    "node_modules/@img/sharp-darwin-x64": {
-      "version": "0.33.5",
-      "resolved": "https://registry.npmjs.org/@img/sharp-darwin-x64/-/sharp-darwin-x64-0.33.5.tgz",
-      "integrity": "sha512-fyHac4jIc1ANYGRDxtiqelIbdWkIuQaI84Mv45KvGRRxSAa7o7d1ZKAOBaYbnepLC1WqxfpimdeWfvqqSGwR2Q==",
-      "cpu": [
-        "x64"
-      ],
-      "license": "Apache-2.0",
-      "optional": true,
-      "os": [
-        "darwin"
-      ],
-      "engines": {
-        "node": "^18.17.0 || ^20.3.0 || >=21.0.0"
-      },
-      "funding": {
-        "url": "https://opencollective.com/libvips"
-      },
-      "optionalDependencies": {
-        "@img/sharp-libvips-darwin-x64": "1.0.4"
-      }
-    },
-    "node_modules/@img/sharp-libvips-darwin-arm64": {
-      "version": "1.0.4",
-      "resolved": "https://registry.npmjs.org/@img/sharp-libvips-darwin-arm64/-/sharp-libvips-darwin-arm64-1.0.4.tgz",
-      "integrity": "sha512-XblONe153h0O2zuFfTAbQYAX2JhYmDHeWikp1LM9Hul9gVPjFY427k6dFEcOL72O01QxQsWi761svJ/ev9xEDg==",
-      "cpu": [
-        "arm64"
-      ],
-      "license": "LGPL-3.0-or-later",
-      "optional": true,
-      "os": [
-        "darwin"
-      ],
-      "funding": {
-        "url": "https://opencollective.com/libvips"
-      }
-    },
-    "node_modules/@img/sharp-libvips-darwin-x64": {
-      "version": "1.0.4",
-      "resolved": "https://registry.npmjs.org/@img/sharp-libvips-darwin-x64/-/sharp-libvips-darwin-x64-1.0.4.tgz",
-      "integrity": "sha512-xnGR8YuZYfJGmWPvmlunFaWJsb9T/AO2ykoP3Fz/0X5XV2aoYBPkX6xqCQvUTKKiLddarLaxpzNe+b1hjeWHAQ==",
-      "cpu": [
-        "x64"
-      ],
-      "license": "LGPL-3.0-or-later",
-      "optional": true,
-      "os": [
-        "darwin"
-      ],
-      "funding": {
-        "url": "https://opencollective.com/libvips"
-      }
-    },
-    "node_modules/@img/sharp-libvips-linux-arm": {
-      "version": "1.0.5",
-      "resolved": "https://registry.npmjs.org/@img/sharp-libvips-linux-arm/-/sharp-libvips-linux-arm-1.0.5.tgz",
-      "integrity": "sha512-gvcC4ACAOPRNATg/ov8/MnbxFDJqf/pDePbBnuBDcjsI8PssmjoKMAz4LtLaVi+OnSb5FK/yIOamqDwGmXW32g==",
-      "cpu": [
-        "arm"
-      ],
-      "license": "LGPL-3.0-or-later",
-      "optional": true,
-      "os": [
-        "linux"
-      ],
-      "funding": {
-        "url": "https://opencollective.com/libvips"
-      }
-    },
-    "node_modules/@img/sharp-libvips-linux-arm64": {
-      "version": "1.0.4",
-      "resolved": "https://registry.npmjs.org/@img/sharp-libvips-linux-arm64/-/sharp-libvips-linux-arm64-1.0.4.tgz",
-      "integrity": "sha512-9B+taZ8DlyyqzZQnoeIvDVR/2F4EbMepXMc/NdVbkzsJbzkUjhXv/70GQJ7tdLA4YJgNP25zukcxpX2/SueNrA==",
-      "cpu": [
-        "arm64"
-      ],
-      "license": "LGPL-3.0-or-later",
-      "optional": true,
-      "os": [
-        "linux"
-      ],
-      "funding": {
-        "url": "https://opencollective.com/libvips"
-      }
-    },
-    "node_modules/@img/sharp-libvips-linux-x64": {
-      "version": "1.0.4",
-      "resolved": "https://registry.npmjs.org/@img/sharp-libvips-linux-x64/-/sharp-libvips-linux-x64-1.0.4.tgz",
-      "integrity": "sha512-MmWmQ3iPFZr0Iev+BAgVMb3ZyC4KeFc3jFxnNbEPas60e1cIfevbtuyf9nDGIzOaW9PdnDciJm+wFFaTlj5xYw==",
-      "cpu": [
-        "x64"
-      ],
-      "license": "LGPL-3.0-or-later",
-      "optional": true,
-      "os": [
-        "linux"
-      ],
-      "funding": {
-        "url": "https://opencollective.com/libvips"
-      }
-    },
-    "node_modules/@img/sharp-libvips-linuxmusl-arm64": {
-      "version": "1.0.4",
-      "resolved": "https://registry.npmjs.org/@img/sharp-libvips-linuxmusl-arm64/-/sharp-libvips-linuxmusl-arm64-1.0.4.tgz",
-      "integrity": "sha512-9Ti+BbTYDcsbp4wfYib8Ctm1ilkugkA/uscUn6UXK1ldpC1JjiXbLfFZtRlBhjPZ5o1NCLiDbg8fhUPKStHoTA==",
-      "cpu": [
-        "arm64"
-      ],
-      "license": "LGPL-3.0-or-later",
-      "optional": true,
-      "os": [
-        "linux"
-      ],
-      "funding": {
-        "url": "https://opencollective.com/libvips"
-      }
-    },
-    "node_modules/@img/sharp-libvips-linuxmusl-x64": {
-      "version": "1.0.4",
-      "resolved": "https://registry.npmjs.org/@img/sharp-libvips-linuxmusl-x64/-/sharp-libvips-linuxmusl-x64-1.0.4.tgz",
-      "integrity": "sha512-viYN1KX9m+/hGkJtvYYp+CCLgnJXwiQB39damAO7WMdKWlIhmYTfHjwSbQeUK/20vY154mwezd9HflVFM1wVSw==",
-      "cpu": [
-        "x64"
-      ],
-      "license": "LGPL-3.0-or-later",
-      "optional": true,
-      "os": [
-        "linux"
-      ],
-      "funding": {
-        "url": "https://opencollective.com/libvips"
-      }
-    },
-    "node_modules/@img/sharp-linux-arm": {
-      "version": "0.33.5",
-      "resolved": "https://registry.npmjs.org/@img/sharp-linux-arm/-/sharp-linux-arm-0.33.5.tgz",
-      "integrity": "sha512-JTS1eldqZbJxjvKaAkxhZmBqPRGmxgu+qFKSInv8moZ2AmT5Yib3EQ1c6gp493HvrvV8QgdOXdyaIBrhvFhBMQ==",
-      "cpu": [
-        "arm"
-      ],
-      "license": "Apache-2.0",
-      "optional": true,
-      "os": [
-        "linux"
-      ],
-      "engines": {
-        "node": "^18.17.0 || ^20.3.0 || >=21.0.0"
-      },
-      "funding": {
-        "url": "https://opencollective.com/libvips"
-      },
-      "optionalDependencies": {
-        "@img/sharp-libvips-linux-arm": "1.0.5"
-      }
-    },
-    "node_modules/@img/sharp-linux-arm64": {
-      "version": "0.33.5",
-      "resolved": "https://registry.npmjs.org/@img/sharp-linux-arm64/-/sharp-linux-arm64-0.33.5.tgz",
-      "integrity": "sha512-JMVv+AMRyGOHtO1RFBiJy/MBsgz0x4AWrT6QoEVVTyh1E39TrCUpTRI7mx9VksGX4awWASxqCYLCV4wBZHAYxA==",
-      "cpu": [
-        "arm64"
-      ],
-      "license": "Apache-2.0",
-      "optional": true,
-      "os": [
-        "linux"
-      ],
-      "engines": {
-        "node": "^18.17.0 || ^20.3.0 || >=21.0.0"
-      },
-      "funding": {
-        "url": "https://opencollective.com/libvips"
-      },
-      "optionalDependencies": {
-        "@img/sharp-libvips-linux-arm64": "1.0.4"
-      }
-    },
-    "node_modules/@img/sharp-linux-x64": {
-      "version": "0.33.5",
-      "resolved": "https://registry.npmjs.org/@img/sharp-linux-x64/-/sharp-linux-x64-0.33.5.tgz",
-      "integrity": "sha512-opC+Ok5pRNAzuvq1AG0ar+1owsu842/Ab+4qvU879ippJBHvyY5n2mxF1izXqkPYlGuP/M556uh53jRLJmzTWA==",
-      "cpu": [
-        "x64"
-      ],
-      "license": "Apache-2.0",
-      "optional": true,
-      "os": [
-        "linux"
-      ],
-      "engines": {
-        "node": "^18.17.0 || ^20.3.0 || >=21.0.0"
-      },
-      "funding": {
-        "url": "https://opencollective.com/libvips"
-      },
-      "optionalDependencies": {
-        "@img/sharp-libvips-linux-x64": "1.0.4"
-      }
-    },
-    "node_modules/@img/sharp-linuxmusl-arm64": {
-      "version": "0.33.5",
-      "resolved": "https://registry.npmjs.org/@img/sharp-linuxmusl-arm64/-/sharp-linuxmusl-arm64-0.33.5.tgz",
-      "integrity": "sha512-XrHMZwGQGvJg2V/oRSUfSAfjfPxO+4DkiRh6p2AFjLQztWUuY/o8Mq0eMQVIY7HJ1CDQUJlxGGZRw1a5bqmd1g==",
-      "cpu": [
-        "arm64"
-      ],
-      "license": "Apache-2.0",
-      "optional": true,
-      "os": [
-        "linux"
-      ],
-      "engines": {
-        "node": "^18.17.0 || ^20.3.0 || >=21.0.0"
-      },
-      "funding": {
-        "url": "https://opencollective.com/libvips"
-      },
-      "optionalDependencies": {
-        "@img/sharp-libvips-linuxmusl-arm64": "1.0.4"
-      }
-    },
-    "node_modules/@img/sharp-linuxmusl-x64": {
-      "version": "0.33.5",
-      "resolved": "https://registry.npmjs.org/@img/sharp-linuxmusl-x64/-/sharp-linuxmusl-x64-0.33.5.tgz",
-      "integrity": "sha512-WT+d/cgqKkkKySYmqoZ8y3pxx7lx9vVejxW/W4DOFMYVSkErR+w7mf2u8m/y4+xHe7yY9DAXQMWQhpnMuFfScw==",
-      "cpu": [
-        "x64"
-      ],
-      "license": "Apache-2.0",
-      "optional": true,
-      "os": [
-        "linux"
-      ],
-      "engines": {
-        "node": "^18.17.0 || ^20.3.0 || >=21.0.0"
-      },
-      "funding": {
-        "url": "https://opencollective.com/libvips"
-      },
-      "optionalDependencies": {
-        "@img/sharp-libvips-linuxmusl-x64": "1.0.4"
-      }
-    },
-    "node_modules/@img/sharp-win32-x64": {
-      "version": "0.33.5",
-      "resolved": "https://registry.npmjs.org/@img/sharp-win32-x64/-/sharp-win32-x64-0.33.5.tgz",
-      "integrity": "sha512-MpY/o8/8kj+EcnxwvrP4aTJSWw/aZ7JIGR4aBeZkZw5B7/Jn+tY9/VNwtcoGmdT7GfggGIU4kygOMSbYnOrAbg==",
-      "cpu": [
-        "x64"
-      ],
-      "license": "Apache-2.0 AND LGPL-3.0-or-later",
-      "optional": true,
-      "os": [
-        "win32"
-      ],
-      "engines": {
-        "node": "^18.17.0 || ^20.3.0 || >=21.0.0"
-      },
-      "funding": {
-        "url": "https://opencollective.com/libvips"
-      }
-    }
-  }
-}
diff --git a/nix/claude-code/package.nix b/nix/claude-code/package.nix
deleted file mode 100644
index 053d1204..00000000
--- a/nix/claude-code/package.nix
+++ /dev/null
@@ -1,78 +0,0 @@
-# Claude Code package - locally maintained for version control
-# Based on nixpkgs: https://github.com/NixOS/nixpkgs/tree/master/pkgs/by-name/cl/claude-code
-#
-# To update: Run ./update.sh from this directory
-{
-  lib,
-  stdenv,
-  buildNpmPackage,
-  fetchzip,
-  versionCheckHook,
-  writableTmpDirAsHomeHook,
-  bubblewrap,
-  procps,
-  socat,
-}:
-buildNpmPackage (finalAttrs: {
-  pname = "claude-code";
-  version = "2.1.15";
-
-  src = fetchzip {
-    url = "https://registry.npmjs.org/@anthropic-ai/claude-code/-/claude-code-${finalAttrs.version}.tgz";
-    hash = "sha256-3zhjeAwKj1fMLuriX1qpVA8zaCk1oekJ1UmeEdDx4Xg=";
-  };
-
-  npmDepsHash = "sha256-K5re0co3Tkz5peXHe/UUlsqAWq4YzSULdY9+xncfL5A=";
-
-  strictDeps = true;
-
-  postPatch = ''
-    cp ${./package-lock.json} package-lock.json
-
-    # Replace hardcoded `/bin/bash` with `/usr/bin/env bash` for Nix compatibility
-    # https://github.com/anthropics/claude-code/issues/15195
-    substituteInPlace cli.js \
-      --replace-warn '#!/bin/bash' '#!/usr/bin/env bash'
-  '';
-
-  dontNpmBuild = true;
-
-  env.AUTHORIZED = "1";
-
-  # `claude-code` tries to auto-update by default, this disables that functionality.
-  # https://docs.anthropic.com/en/docs/agents-and-tools/claude-code/overview#environment-variables
-  # The DEV=true env var causes claude to crash with `TypeError: window.WebSocket is not a constructor`
-  postInstall = ''
-    wrapProgram $out/bin/claude \
-      --set DISABLE_AUTOUPDATER 1 \
-      --unset DEV \
-      --prefix PATH : ${
-        lib.makeBinPath (
-          [
-            # claude-code uses [node-tree-kill](https://github.com/pkrumins/node-tree-kill) which requires procps's pgrep(darwin) or ps(linux)
-            procps
-          ]
-          # the following packages are required for the sandbox to work (Linux only)
-          ++ lib.optionals stdenv.hostPlatform.isLinux [
-            bubblewrap
-            socat
-          ]
-        )
-      }
-  '';
-
-  doInstallCheck = true;
-  nativeInstallCheckInputs = [
-    writableTmpDirAsHomeHook
-    versionCheckHook
-  ];
-  versionCheckKeepEnvironment = [ "HOME" ];
-
-  meta = {
-    description = "Agentic coding tool that lives in your terminal, understands your codebase, and helps you code faster";
-    homepage = "https://github.com/anthropics/claude-code";
-    downloadPage = "https://www.npmjs.com/package/@anthropic-ai/claude-code";
-    license = lib.licenses.unfree;
-    mainProgram = "claude";
-  };
-})
diff --git a/nix/claude-code/update.sh b/nix/claude-code/update.sh
deleted file mode 100755
index cfc648bc..00000000
--- a/nix/claude-code/update.sh
+++ /dev/null
@@ -1,49 +0,0 @@
-#!/usr/bin/env bash
-# Update claude-code package to latest npm version
-set -euo pipefail
-
-SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
-cd "$SCRIPT_DIR"
-
-# Get versions
-OLD_VERSION=$(grep 'version = "' package.nix | head -1 | sed 's/.*version = "\([^"]*\)".*/\1/')
-VERSION=$(npm view @anthropic-ai/claude-code version 2>/dev/null)
-
-if [[ "$VERSION" == "$OLD_VERSION" ]]; then
-    echo "Already at latest version: $OLD_VERSION"
-    exit 0
-fi
-
-echo "Updating claude-code: $OLD_VERSION -> $VERSION"
-
-# Download tarball
-TARBALL_URL="https://registry.npmjs.org/@anthropic-ai/claude-code/-/claude-code-${VERSION}.tgz"
-TMPDIR=$(mktemp -d)
-trap "rm -rf $TMPDIR" EXIT
-
-curl -sL "$TARBALL_URL" -o "$TMPDIR/claude-code.tgz"
-
-# Extract and compute source hash
-mkdir -p "$TMPDIR/src"
-tar -xzf "$TMPDIR/claude-code.tgz" -C "$TMPDIR/src" --strip-components=1
-SRC_HASH=$(nix hash path "$TMPDIR/src")
-
-# Get package-lock.json from tarball
-if [[ -f "$TMPDIR/src/package-lock.json" ]]; then
-    cp "$TMPDIR/src/package-lock.json" package-lock.json
-else
-    echo "Error: No package-lock.json in tarball"
-    exit 1
-fi
-
-# Compute npmDepsHash using prefetch-npm-deps
-NPM_DEPS_HASH=$(nix shell nixpkgs#prefetch-npm-deps -c prefetch-npm-deps package-lock.json 2>/dev/null)
-
-# Update package.nix
-sed -i "s/version = \"[^\"]*\"/version = \"$VERSION\"/" package.nix
-sed -i "s|hash = \"sha256-[^\"]*\"|hash = \"$SRC_HASH\"|" package.nix
-sed -i "s|npmDepsHash = \"sha256-[^\"]*\"|npmDepsHash = \"$NPM_DEPS_HASH\"|" package.nix
-
-echo "Updated to version $VERSION"
-echo "  Source hash: $SRC_HASH"
-echo "  Deps hash: $NPM_DEPS_HASH"
diff --git a/nix/update b/nix/update
deleted file mode 100755
index 95057b45..00000000
--- a/nix/update
+++ /dev/null
@@ -1,18 +0,0 @@
-#!/usr/bin/env bash
-# Update all Nix dependencies (claude-code package and flake inputs)
-set -euo pipefail
-
-SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
-REPO_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"
-
-cd "$REPO_ROOT"
-
-echo "Updating claude-code package..."
-"$SCRIPT_DIR/claude-code/update.sh"
-
-echo ""
-echo "Updating flake inputs..."
-nix flake update
-
-echo ""
-echo "Done! Run 'nix develop' to reload the environment."
diff --git a/src/deepwork/standard_jobs/deepwork_jobs/job.yml b/src/deepwork/standard_jobs/deepwork_jobs/job.yml
index 1aea4e0d..cb2424e4 100644
--- a/src/deepwork/standard_jobs/deepwork_jobs/job.yml
+++ b/src/deepwork/standard_jobs/deepwork_jobs/job.yml
@@ -222,4 +222,5 @@ steps:
       - "**Rules Job Removed**: Is `.deepwork/jobs/deepwork_rules/` removed if present?"
       - "**Config Version Updated**: Is `.deepwork/config.yml` using current version format?"
       - "**Summary Provided**: Is a repair_summary.md file created documenting all changes made?"
+      - "**DeepWork Re-installed**: Was `deepwork install` run after cleanup, and does it complete without errors?"
       - "**Git Status Clean**: Are changes ready to be committed (no untracked garbage files)?"
diff --git a/src/deepwork/standard_jobs/deepwork_jobs/steps/errata.md b/src/deepwork/standard_jobs/deepwork_jobs/steps/errata.md
index c71d62be..10245c62 100644
--- a/src/deepwork/standard_jobs/deepwork_jobs/steps/errata.md
+++ b/src/deepwork/standard_jobs/deepwork_jobs/steps/errata.md
@@ -134,7 +134,23 @@ Check for and remove other obsolete files:
 | `.claude/commands/` | Generated commands | Keep (current system) |
 | `.claude/settings.local.json` | Local overrides | Keep (user settings) |
 
-### Step 6: Verify Git Status
+### Step 6: Re-install DeepWork
+
+After all cleanup is complete, re-run `deepwork install` to ensure configurations are current and consistent:
+
+```bash
+deepwork install
+```
+
+**Then verify:**
+1. Check that `.deepwork/config.yml` is valid and up to date
+2. Check that `.claude/skills/deepwork/` exists and contains the expected skill entry point
+3. Check that all jobs in `.deepwork/jobs/` have valid `job.yml` files
+4. Run `deepwork install` a second time and confirm the output is clean (no errors or warnings)
+
+If any issues are found, fix them before proceeding. The goal is a clean, working DeepWork installation with no residual problems from the repair process.
+
+### Step 7: Verify Git Status
 
 Check that the cleanup hasn't left untracked garbage:
 

From 9d074eec23010fc8f3fc793c9888f8ae96b89a07 Mon Sep 17 00:00:00 2001
From: Noah Horton <noah@unsupervised.com>
Date: Thu, 5 Feb 2026 14:10:01 -0700
Subject: [PATCH 28/45] remove repair summary

---
 .deepwork/jobs/deepwork_jobs/job.yml             | 4 +---
 .deepwork/schemas/job.schema.json                | 3 +--
 src/deepwork/schemas/job.schema.json             | 3 +--
 src/deepwork/standard_jobs/deepwork_jobs/job.yml | 4 +---
 4 files changed, 4 insertions(+), 10 deletions(-)

diff --git a/.deepwork/jobs/deepwork_jobs/job.yml b/.deepwork/jobs/deepwork_jobs/job.yml
index cb2424e4..7e29765e 100644
--- a/.deepwork/jobs/deepwork_jobs/job.yml
+++ b/.deepwork/jobs/deepwork_jobs/job.yml
@@ -206,11 +206,10 @@ steps:
     name: "Clean Up Errata"
     description: "Removes obsolete files and folders from prior DeepWork versions, including old skill directories, temp files, and deprecated configurations."
     instructions_file: steps/errata.md
+    outputs: []
     inputs:
       - file: .deepwork/jobs/
         from_step: fix_jobs
-    outputs:
-      - repair_summary.md
     dependencies:
       - fix_settings
       - fix_jobs
@@ -221,6 +220,5 @@ steps:
       - "**Rules Folder Removed**: Is `.deepwork/rules/` folder backed up and removed (fully deprecated)?"
       - "**Rules Job Removed**: Is `.deepwork/jobs/deepwork_rules/` removed if present?"
       - "**Config Version Updated**: Is `.deepwork/config.yml` using current version format?"
-      - "**Summary Provided**: Is a repair_summary.md file created documenting all changes made?"
       - "**DeepWork Re-installed**: Was `deepwork install` run after cleanup, and does it complete without errors?"
       - "**Git Status Clean**: Are changes ready to be committed (no untracked garbage files)?"
diff --git a/.deepwork/schemas/job.schema.json b/.deepwork/schemas/job.schema.json
index 1d794f98..f00d7550 100644
--- a/.deepwork/schemas/job.schema.json
+++ b/.deepwork/schemas/job.schema.json
@@ -150,8 +150,7 @@
         },
         "outputs": {
           "type": "array",
-          "minItems": 1,
-          "description": "List of output files/directories produced by this step",
+          "description": "List of output files/directories produced by this step. May be empty for cleanup or validation steps.",
           "items": {
             "$ref": "#/$defs/stepOutput"
           }
diff --git a/src/deepwork/schemas/job.schema.json b/src/deepwork/schemas/job.schema.json
index 1d794f98..f00d7550 100644
--- a/src/deepwork/schemas/job.schema.json
+++ b/src/deepwork/schemas/job.schema.json
@@ -150,8 +150,7 @@
         },
         "outputs": {
           "type": "array",
-          "minItems": 1,
-          "description": "List of output files/directories produced by this step",
+          "description": "List of output files/directories produced by this step. May be empty for cleanup or validation steps.",
           "items": {
             "$ref": "#/$defs/stepOutput"
           }
diff --git a/src/deepwork/standard_jobs/deepwork_jobs/job.yml b/src/deepwork/standard_jobs/deepwork_jobs/job.yml
index cb2424e4..7e29765e 100644
--- a/src/deepwork/standard_jobs/deepwork_jobs/job.yml
+++ b/src/deepwork/standard_jobs/deepwork_jobs/job.yml
@@ -206,11 +206,10 @@ steps:
     name: "Clean Up Errata"
     description: "Removes obsolete files and folders from prior DeepWork versions, including old skill directories, temp files, and deprecated configurations."
     instructions_file: steps/errata.md
+    outputs: []
     inputs:
       - file: .deepwork/jobs/
         from_step: fix_jobs
-    outputs:
-      - repair_summary.md
     dependencies:
       - fix_settings
       - fix_jobs
@@ -221,6 +220,5 @@ steps:
       - "**Rules Folder Removed**: Is `.deepwork/rules/` folder backed up and removed (fully deprecated)?"
       - "**Rules Job Removed**: Is `.deepwork/jobs/deepwork_rules/` removed if present?"
       - "**Config Version Updated**: Is `.deepwork/config.yml` using current version format?"
-      - "**Summary Provided**: Is a repair_summary.md file created documenting all changes made?"
       - "**DeepWork Re-installed**: Was `deepwork install` run after cleanup, and does it complete without errors?"
       - "**Git Status Clean**: Are changes ready to be committed (no untracked garbage files)?"

From eb9f3ce9cdb611d49b363c1fabfd8c7313a36145 Mon Sep 17 00:00:00 2001
From: Noah Horton <noah@unsupervised.com>
Date: Thu, 5 Feb 2026 15:02:31 -0700
Subject: [PATCH 29/45] Remove doc spec references from deepwork_jobs standard
 job
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Doc specs were never enforced programmatically — the infrastructure
to parse them exists but was never wired into quality gates. Remove
all doc spec guidance from job instructions to avoid misleading users
into creating artifacts that have no effect.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 .../jobs/deepwork_jobs/doc_specs/job_spec.md  | 184 ------------------
 .deepwork/jobs/deepwork_jobs/job.yml          |  11 +-
 .deepwork/jobs/deepwork_jobs/steps/define.md  |  70 -------
 .deepwork/jobs/deepwork_jobs/steps/learn.md   |  68 -------
 .../templates/doc_spec.md.template            |  26 ---
 .../deepwork_jobs/doc_specs/job_spec.md       | 184 ------------------
 .../standard_jobs/deepwork_jobs/job.yml       |  11 +-
 .../deepwork_jobs/steps/define.md             |  70 -------
 .../deepwork_jobs/steps/learn.md              |  68 -------
 .../templates/doc_spec.md.template            |  26 ---
 10 files changed, 2 insertions(+), 716 deletions(-)
 delete mode 100644 .deepwork/jobs/deepwork_jobs/doc_specs/job_spec.md
 delete mode 100644 .deepwork/jobs/deepwork_jobs/templates/doc_spec.md.template
 delete mode 100644 src/deepwork/standard_jobs/deepwork_jobs/doc_specs/job_spec.md
 delete mode 100644 src/deepwork/standard_jobs/deepwork_jobs/templates/doc_spec.md.template

diff --git a/.deepwork/jobs/deepwork_jobs/doc_specs/job_spec.md b/.deepwork/jobs/deepwork_jobs/doc_specs/job_spec.md
deleted file mode 100644
index 23fd9fc7..00000000
--- a/.deepwork/jobs/deepwork_jobs/doc_specs/job_spec.md
+++ /dev/null
@@ -1,184 +0,0 @@
----
-name: "DeepWork Job Specification"
-description: "YAML specification file that defines a multi-step workflow job for AI agents"
-path_patterns:
-  - ".deepwork/jobs/*/job.yml"
-target_audience: "AI agents executing jobs and developers defining workflows"
-frequency: "Created once per job, updated as workflow evolves"
-quality_criteria:
-  - name: Valid Identifier
-    description: "Job name must be lowercase with underscores, no spaces or special characters (e.g., `competitive_research`, `monthly_report`)"
-  - name: Semantic Version
-    description: "Version must follow semantic versioning format X.Y.Z (e.g., `1.0.0`, `2.1.3`)"
-  - name: Concise Summary
-    description: "Summary must be under 200 characters and clearly describe what the job accomplishes"
-  - name: Rich Description
-    description: "Description must be multi-line and explain: the problem solved, the process, expected outcomes, and target users"
-  - name: Changelog Present
-    description: "Must include a changelog array with at least the initial version entry. Changelog should only include one entry per branch at most"
-  - name: Complete Steps
-    description: "Each step must have: id (lowercase_underscores), name, description, instructions_file, outputs (at least one), and dependencies array"
-  - name: Valid Dependencies
-    description: "Dependencies must reference existing step IDs with no circular references"
-  - name: Input Consistency
-    description: "File inputs with `from_step` must reference a step that is in the dependencies array"
-  - name: Output Paths
-    description: "Outputs must be valid filenames or paths within the main repo directory structure, never in dot-directories like `.deepwork/`. Use specific, descriptive paths that lend themselves to glob patterns (e.g., `competitive_research/acme_corp/swot.md` or `operations/reports/2026-01/spending_analysis.md`). Parameterized paths like `[competitor_name]/` are encouraged for per-entity outputs. Avoid generic names (`output.md`, `analysis.md`) and transient-sounding paths (`temp/`, `draft.md`). Supporting materials for a final output should go in a peer `_dataroom` folder (e.g., `spending_analysis_dataroom/`)."
-  - name: Concise Instructions
-    description: "The content of the file, particularly the description, must not have excessively redundant information. It should be concise and to the point given that extra tokens will confuse the AI."
----
-
-# DeepWork Job Specification: [job_name]
-
-A `job.yml` file defines a complete multi-step workflow that AI agents can execute. Each job breaks down a complex task into reviewable steps with clear inputs and outputs.
-
-## Required Fields
-
-### Top-Level Metadata
-
-```yaml
-name: job_name                    # lowercase, underscores only
-version: "1.0.0"                  # semantic versioning
-summary: "Brief description"      # max 200 characters
-description: |                    # detailed multi-line explanation
-  [Explain what this workflow does, why it exists,
-  what outputs it produces, and who should use it]
-```
-
-### Changelog
-
-```yaml
-changelog:
-  - version: "1.0.0"
-    changes: "Initial job creation"
-  - version: "1.1.0"
-    changes: "Added quality validation hooks"
-```
-
-### Steps Array
-
-```yaml
-steps:
-  - id: step_id                   # unique, lowercase_underscores
-    name: "Human Readable Name"
-    description: "What this step accomplishes"
-    instructions_file: steps/step_id.md
-    inputs:
-      # User-provided inputs:
-      - name: param_name
-        description: "What the user provides"
-      # File inputs from previous steps:
-      - file: output.md
-        from_step: previous_step_id
-    outputs:
-      - competitive_research/competitors_list.md           # descriptive path
-      - competitive_research/[competitor_name]/research.md # parameterized path
-      # With doc spec reference:
-      - file: competitive_research/final_report.md
-        doc_spec: .deepwork/doc_specs/report_type.md
-    dependencies:
-      - previous_step_id          # steps that must complete first
-```
-
-## Optional Fields
-
-### Agent Delegation
-
-When a step should be executed by a specific agent type, use the `agent` field. This automatically sets `context: fork` in the generated skill.
-
-```yaml
-steps:
-  - id: research_step
-    agent: general-purpose        # Delegates to the general-purpose agent
-```
-
-Available agent types:
-- `general-purpose` - Standard agent for multi-step tasks
-
-### Quality Hooks
-
-```yaml
-steps:
-  - id: step_id
-    hooks:
-      after_agent:
-        # Inline prompt for quality validation:
-        - prompt: |
-            Verify the output meets criteria:
-            1. [Criterion 1]
-            2. [Criterion 2]
-            If ALL criteria are met, include `<promise>...</promise>`.
-        # External prompt file:
-        - prompt_file: hooks/quality_check.md
-        # Script for programmatic validation:
-        - script: hooks/run_tests.sh
-```
-
-## Validation Rules
-
-1. **No circular dependencies**: Step A cannot depend on Step B if Step B depends on Step A
-2. **File inputs require dependencies**: If a step uses `from_step: X`, then X must be in its dependencies
-3. **Unique step IDs**: No two steps can have the same id
-4. **Valid file paths**: Output paths must not contain invalid characters and should be in the main repo (not dot-directories)
-5. **Instructions files exist**: Each `instructions_file` path should have a corresponding file created
-
-## Example: Complete Job Specification
-
-```yaml
-name: competitive_research
-version: "1.0.0"
-summary: "Systematic competitive analysis workflow"
-description: |
-  A comprehensive workflow for analyzing competitors in your market segment.
-  Helps product teams understand the competitive landscape through systematic
-  identification, research, comparison, and positioning recommendations.
-
-  Produces:
-  - Vetted competitor list
-  - Research notes per competitor
-  - Comparison matrix
-  - Strategic positioning report
-
-changelog:
-  - version: "1.0.0"
-    changes: "Initial job creation"
-
-steps:
-  - id: identify_competitors
-    name: "Identify Competitors"
-    description: "Identify 5-7 key competitors in the target market"
-    instructions_file: steps/identify_competitors.md
-    inputs:
-      - name: market_segment
-        description: "The market segment to analyze"
-      - name: product_category
-        description: "The product category"
-    outputs:
-      - competitive_research/competitors_list.md
-    dependencies: []
-
-  - id: research_competitors
-    name: "Research Competitors"
-    description: "Deep dive research on each identified competitor"
-    instructions_file: steps/research_competitors.md
-    inputs:
-      - file: competitive_research/competitors_list.md
-        from_step: identify_competitors
-    outputs:
-      - competitive_research/[competitor_name]/research.md
-    dependencies:
-      - identify_competitors
-
-  - id: positioning_report
-    name: "Positioning Report"
-    description: "Strategic positioning recommendations"
-    instructions_file: steps/positioning_report.md
-    inputs:
-      - file: competitive_research/[competitor_name]/research.md
-        from_step: research_competitors
-    outputs:
-      - file: competitive_research/positioning_report.md
-        doc_spec: .deepwork/doc_specs/positioning_report.md
-    dependencies:
-      - research_competitors
-```
diff --git a/.deepwork/jobs/deepwork_jobs/job.yml b/.deepwork/jobs/deepwork_jobs/job.yml
index 7e29765e..facf3ce7 100644
--- a/.deepwork/jobs/deepwork_jobs/job.yml
+++ b/.deepwork/jobs/deepwork_jobs/job.yml
@@ -58,12 +58,6 @@ changelog:
     changes: "Removed implementation_summary and learning_summary outputs; simplified step outputs"
   - version: "0.5.0"
     changes: "Standardized on 'ask structured questions' phrasing for user input; Updated quality criteria hooks to verify phrase usage; Added guidance in implement.md to use phrase in generated instructions"
-  - version: "0.6.0"
-    changes: "Added doc spec support; define.md now detects document-oriented workflows and guides doc spec creation; learn.md now identifies and applies doc spec-related improvements"
-  - version: "0.7.0"
-    changes: "Added job.yml doc spec; define step now outputs job.yml with doc_spec reference for quality validation"
-  - version: "0.8.0"
-    changes: "Added review_job_spec step between define and implement for doc spec-based quality validation using sub-agent review"
   - version: "0.9.0"
     changes: "Improved skill descriptions with third-person voice and 'Use when...' triggers for better discoverability"
 
@@ -76,8 +70,7 @@ steps:
       - name: job_purpose
         description: "What complex task or workflow are you trying to accomplish?"
     outputs:
-      - file: job.yml
-        doc_spec: .deepwork/doc_specs/job_spec.md
+      - job.yml
     dependencies: []
   - id: implement
     name: "Implement Job Steps"
@@ -158,8 +151,6 @@ steps:
       - "**Instructions Improved**: Were job instructions updated to address identified issues?"
       - "**Instructions Concise**: Are instructions free of redundancy and unnecessary verbosity?"
       - "**Shared Content Extracted**: Is lengthy/duplicated content extracted into referenced files?"
-      - "**doc spec Reviewed (if applicable)**: For jobs with doc spec outputs, were doc spec-related learnings identified?"
-      - "**doc spec Updated (if applicable)**: Were doc spec files updated with improved quality criteria or structure?"
       - "**Bespoke Learnings Captured**: Were run-specific learnings added to AGENTS.md?"
       - "**File References Used**: Do AGENTS.md entries reference other files where appropriate?"
       - "**Working Folder Correct**: Is AGENTS.md in the correct working folder for the job?"
diff --git a/.deepwork/jobs/deepwork_jobs/steps/define.md b/.deepwork/jobs/deepwork_jobs/steps/define.md
index 3e9a87da..3cd01848 100644
--- a/.deepwork/jobs/deepwork_jobs/steps/define.md
+++ b/.deepwork/jobs/deepwork_jobs/steps/define.md
@@ -31,60 +31,6 @@ Start by asking structured questions to understand what the user wants to accomp
    - What are the distinct stages from start to finish?
    - Are there any dependencies between phases?
 
-### Step 1.5: Detect Document-Oriented Workflows
-
-**Check for document-focused patterns** in the user's description:
-- Keywords: "report", "summary", "document", "create", "monthly", "quarterly", "for stakeholders", "for leadership"
-- Final deliverable is a specific document (e.g., "AWS spending report", "competitive analysis", "sprint summary")
-- Recurring documents with consistent structure
-
-**If a document-oriented workflow is detected:**
-
-1. Inform the user: "This workflow produces a specific document type. I recommend defining a doc spec first to ensure consistent quality."
-
-2. Ask structured questions to understand if they want to:
-   - Create a doc spec for this document
-   - Use an existing doc spec (if any exist in `.deepwork/doc_specs/`)
-   - Skip doc spec and proceed with simple outputs
-
-### Step 1.6: Define the Doc Spec (if needed)
-
-When creating a doc spec, gather the following information:
-
-1. **Document Identity**
-   - What is the document called? (e.g., "Monthly AWS Spending Report")
-   - Brief description of its purpose
-   - Where should these documents be stored? (path patterns like `finance/aws-reports/*.md`)
-
-2. **Audience and Context**
-   - Who reads this document? (target audience)
-   - How often is it produced? (frequency)
-
-3. **Quality Criteria** (3-5 criteria, each with name and description)
-
-   **Important**: Doc spec quality criteria define requirements for the **output document itself**, not the process of creating it. Focus on what the finished document must contain or achieve.
-
-   Examples for a spending report:
-   - **Visualization**: Must include charts showing spend breakdown by service
-   - **Variance Analysis**: Must compare current month against previous with percentages
-   - **Action Items**: Must include recommended cost optimization actions
-
-   **Note**: When a doc spec is created for a step's output, the step should generally NOT have separate `quality_criteria` in the job.yml. The doc spec's criteria cover output quality. Only add step-level quality_criteria if there are essential process requirements (e.g., "must use specific tool"), and minimize these when possible.
-
-4. **Document Structure**
-   - What sections should it have?
-   - Any required elements (tables, charts, summaries)?
-
-### Step 1.7: Create the doc spec File (if needed)
-
-Create the doc spec file at `.deepwork/doc_specs/[doc_spec_name].md`:
-
-**Template reference**: See `.deepwork/jobs/deepwork_jobs/templates/doc_spec.md.template` for the standard structure.
-
-**Complete example**: See `.deepwork/doc_specs/job_spec.md` for a fully worked example (the doc spec for job.yml files).
-
-After creating the doc spec, proceed to Step 2 with the doc spec reference for the final step's output.
-
 ### Step 2: Define Each Step
 
 For each major phase they mentioned, ask structured questions to gather details:
@@ -106,8 +52,6 @@ For each major phase they mentioned, ask structured questions to gather details:
    - Where should each output be saved? (filename/path)
    - Should outputs be organized in subdirectories? (e.g., `reports/`, `data/`, `drafts/`)
    - Will other steps need this output?
-   - **Does this output have a doc spec?** If a doc spec was created in Step 1.6/1.7, reference it for the appropriate output
-
    #### Work Product Storage Guidelines
 
    **Key principle**: Job outputs belong in the main repository directory structure, not in dot-directories. The `.deepwork/` directory is for job definitions and configuration only.
@@ -189,18 +133,6 @@ For each major phase they mentioned, ask structured questions to gather details:
 
 **Note**: You're gathering this information to understand what instructions will be needed, but you won't create the instruction files yet - that happens in the `implement` step.
 
-#### Doc Spec-Aware Output Format
-
-When a step produces a document with a doc spec reference, use this format in job.yml:
-
-```yaml
-outputs:
-  - file: reports/monthly_spending.md
-    doc_spec: .deepwork/doc_specs/monthly_aws_report.md
-```
-
-The doc spec's quality criteria will automatically be included in the generated skill, ensuring consistent document quality.
-
 ### Capability Considerations
 
 When defining steps, identify any that require specialized tools:
@@ -299,8 +231,6 @@ This creates:
 
 (Where `[job_name]` is the name of the NEW job you're creating, e.g., `competitive_research`)
 
-**Doc Spec**: See `.deepwork/doc_specs/job_spec.md` for the complete specification with quality criteria.
-
 **Template reference**: See `.deepwork/jobs/deepwork_jobs/templates/job.yml.template` for the standard structure.
 
 **Complete example**: See `.deepwork/jobs/deepwork_jobs/templates/job.yml.example` for a fully worked example.
diff --git a/.deepwork/jobs/deepwork_jobs/steps/learn.md b/.deepwork/jobs/deepwork_jobs/steps/learn.md
index a4a50c9a..f6d48c78 100644
--- a/.deepwork/jobs/deepwork_jobs/steps/learn.md
+++ b/.deepwork/jobs/deepwork_jobs/steps/learn.md
@@ -66,15 +66,6 @@ For each learning identified, determine if it is:
   - "Quality criteria should include checking for Y"
   - "Add example of correct output format"
 
-**doc spec-Related** (should improve doc spec files):
-- Improvements to document quality criteria
-- Changes to document structure or format
-- Updated audience or frequency information
-- Examples:
-  - "The report should include a summary table"
-  - "Quality criterion 'Visualization' needs clearer requirements"
-  - "Documents need a section for action items"
-
 **Bespoke** (should go in AGENTS.md):
 - Specific to THIS project/codebase/run
 - Depends on local conventions or structure
@@ -85,30 +76,6 @@ For each learning identified, determine if it is:
   - "This project uses camelCase for function names"
   - "The main config file is at `config/settings.yml`"
 
-### Step 3.5: Identify doc spec-Related Learnings
-
-Review the conversation for doc spec-related improvements:
-
-1. **Quality Criteria Changes**
-   - Were any quality criteria unclear or insufficient?
-   - Did the agent repeatedly fail certain criteria?
-   - Are there new criteria that should be added?
-
-2. **Document Structure Changes**
-   - Did the user request different sections?
-   - Were parts of the document format confusing?
-   - Should the example document be updated?
-
-3. **Metadata Updates**
-   - Has the target audience changed?
-   - Should frequency or path patterns be updated?
-
-**Signals for doc spec improvements:**
-- User asked for changes to document format
-- Repeated validation failures on specific criteria
-- Feedback about missing sections or information
-- Changes to how documents are organized/stored
-
 ### Step 4: Update Job Instructions (Generalizable Learnings)
 
 For each generalizable learning:
@@ -162,41 +129,6 @@ Review all instruction files for the job and identify content that:
    - Shorter instruction files - easier to read and maintain
    - Consistent guidance across steps
 
-### Step 4.5: Update doc spec Files (doc spec-Related Learnings)
-
-If doc spec-related learnings were identified:
-
-1. **Locate the doc spec file**
-   - Find doc spec references in job.yml outputs (look for `doc_spec: .deepwork/doc_specs/[doc_spec_name].md`)
-   - doc spec files are at `.deepwork/doc_specs/[doc_spec_name].md`
-
-2. **Update quality_criteria array**
-   - Add new criteria with name and description
-   - Modify existing criteria descriptions for clarity
-   - Remove criteria that are no longer relevant
-
-3. **Update example document**
-   - Modify the markdown body to reflect structure changes
-   - Ensure the example matches updated criteria
-
-4. **Update metadata as needed**
-   - target_audience: If audience has changed
-   - frequency: If production cadence has changed
-   - path_patterns: If storage location has changed
-
-**Example doc spec update:**
-```yaml
-# Before
-quality_criteria:
-  - name: Visualization
-    description: Include charts
-
-# After
-quality_criteria:
-  - name: Visualization
-    description: Include Mermaid.js charts showing spend breakdown by service and month-over-month trend
-```
-
 ### Step 5: Create/Update AGENTS.md (Bespoke Learnings)
 
 The AGENTS.md file captures project-specific knowledge that helps future agent runs.
diff --git a/.deepwork/jobs/deepwork_jobs/templates/doc_spec.md.template b/.deepwork/jobs/deepwork_jobs/templates/doc_spec.md.template
deleted file mode 100644
index d183344f..00000000
--- a/.deepwork/jobs/deepwork_jobs/templates/doc_spec.md.template
+++ /dev/null
@@ -1,26 +0,0 @@
----
-name: "[Document Name]"
-description: "[Brief description of the document's purpose]"
-path_patterns:
-  - "[path/to/documents/*.md]"
-target_audience: "[Who reads this document]"
-frequency: "[How often produced, e.g., Monthly, Per sprint, On demand]"
-quality_criteria:
-  - name: "[Criterion Name]"
-    description: "[What this criterion requires - be specific]"
-  - name: "[Criterion Name]"
-    description: "[What this criterion requires - be specific]"
-  - name: "[Criterion Name]"
-    description: "[What this criterion requires - be specific]"
----
-
-# [Document Title]: [Variables like Month, Year, Sprint]
-
-## Section 1
-[Describe what goes in this section]
-
-## Section 2
-[Describe what goes in this section]
-
-## Section 3
-[Describe what goes in this section]
diff --git a/src/deepwork/standard_jobs/deepwork_jobs/doc_specs/job_spec.md b/src/deepwork/standard_jobs/deepwork_jobs/doc_specs/job_spec.md
deleted file mode 100644
index 23fd9fc7..00000000
--- a/src/deepwork/standard_jobs/deepwork_jobs/doc_specs/job_spec.md
+++ /dev/null
@@ -1,184 +0,0 @@
----
-name: "DeepWork Job Specification"
-description: "YAML specification file that defines a multi-step workflow job for AI agents"
-path_patterns:
-  - ".deepwork/jobs/*/job.yml"
-target_audience: "AI agents executing jobs and developers defining workflows"
-frequency: "Created once per job, updated as workflow evolves"
-quality_criteria:
-  - name: Valid Identifier
-    description: "Job name must be lowercase with underscores, no spaces or special characters (e.g., `competitive_research`, `monthly_report`)"
-  - name: Semantic Version
-    description: "Version must follow semantic versioning format X.Y.Z (e.g., `1.0.0`, `2.1.3`)"
-  - name: Concise Summary
-    description: "Summary must be under 200 characters and clearly describe what the job accomplishes"
-  - name: Rich Description
-    description: "Description must be multi-line and explain: the problem solved, the process, expected outcomes, and target users"
-  - name: Changelog Present
-    description: "Must include a changelog array with at least the initial version entry. Changelog should only include one entry per branch at most"
-  - name: Complete Steps
-    description: "Each step must have: id (lowercase_underscores), name, description, instructions_file, outputs (at least one), and dependencies array"
-  - name: Valid Dependencies
-    description: "Dependencies must reference existing step IDs with no circular references"
-  - name: Input Consistency
-    description: "File inputs with `from_step` must reference a step that is in the dependencies array"
-  - name: Output Paths
-    description: "Outputs must be valid filenames or paths within the main repo directory structure, never in dot-directories like `.deepwork/`. Use specific, descriptive paths that lend themselves to glob patterns (e.g., `competitive_research/acme_corp/swot.md` or `operations/reports/2026-01/spending_analysis.md`). Parameterized paths like `[competitor_name]/` are encouraged for per-entity outputs. Avoid generic names (`output.md`, `analysis.md`) and transient-sounding paths (`temp/`, `draft.md`). Supporting materials for a final output should go in a peer `_dataroom` folder (e.g., `spending_analysis_dataroom/`)."
-  - name: Concise Instructions
-    description: "The content of the file, particularly the description, must not have excessively redundant information. It should be concise and to the point given that extra tokens will confuse the AI."
----
-
-# DeepWork Job Specification: [job_name]
-
-A `job.yml` file defines a complete multi-step workflow that AI agents can execute. Each job breaks down a complex task into reviewable steps with clear inputs and outputs.
-
-## Required Fields
-
-### Top-Level Metadata
-
-```yaml
-name: job_name                    # lowercase, underscores only
-version: "1.0.0"                  # semantic versioning
-summary: "Brief description"      # max 200 characters
-description: |                    # detailed multi-line explanation
-  [Explain what this workflow does, why it exists,
-  what outputs it produces, and who should use it]
-```
-
-### Changelog
-
-```yaml
-changelog:
-  - version: "1.0.0"
-    changes: "Initial job creation"
-  - version: "1.1.0"
-    changes: "Added quality validation hooks"
-```
-
-### Steps Array
-
-```yaml
-steps:
-  - id: step_id                   # unique, lowercase_underscores
-    name: "Human Readable Name"
-    description: "What this step accomplishes"
-    instructions_file: steps/step_id.md
-    inputs:
-      # User-provided inputs:
-      - name: param_name
-        description: "What the user provides"
-      # File inputs from previous steps:
-      - file: output.md
-        from_step: previous_step_id
-    outputs:
-      - competitive_research/competitors_list.md           # descriptive path
-      - competitive_research/[competitor_name]/research.md # parameterized path
-      # With doc spec reference:
-      - file: competitive_research/final_report.md
-        doc_spec: .deepwork/doc_specs/report_type.md
-    dependencies:
-      - previous_step_id          # steps that must complete first
-```
-
-## Optional Fields
-
-### Agent Delegation
-
-When a step should be executed by a specific agent type, use the `agent` field. This automatically sets `context: fork` in the generated skill.
-
-```yaml
-steps:
-  - id: research_step
-    agent: general-purpose        # Delegates to the general-purpose agent
-```
-
-Available agent types:
-- `general-purpose` - Standard agent for multi-step tasks
-
-### Quality Hooks
-
-```yaml
-steps:
-  - id: step_id
-    hooks:
-      after_agent:
-        # Inline prompt for quality validation:
-        - prompt: |
-            Verify the output meets criteria:
-            1. [Criterion 1]
-            2. [Criterion 2]
-            If ALL criteria are met, include `<promise>...</promise>`.
-        # External prompt file:
-        - prompt_file: hooks/quality_check.md
-        # Script for programmatic validation:
-        - script: hooks/run_tests.sh
-```
-
-## Validation Rules
-
-1. **No circular dependencies**: Step A cannot depend on Step B if Step B depends on Step A
-2. **File inputs require dependencies**: If a step uses `from_step: X`, then X must be in its dependencies
-3. **Unique step IDs**: No two steps can have the same id
-4. **Valid file paths**: Output paths must not contain invalid characters and should be in the main repo (not dot-directories)
-5. **Instructions files exist**: Each `instructions_file` path should have a corresponding file created
-
-## Example: Complete Job Specification
-
-```yaml
-name: competitive_research
-version: "1.0.0"
-summary: "Systematic competitive analysis workflow"
-description: |
-  A comprehensive workflow for analyzing competitors in your market segment.
-  Helps product teams understand the competitive landscape through systematic
-  identification, research, comparison, and positioning recommendations.
-
-  Produces:
-  - Vetted competitor list
-  - Research notes per competitor
-  - Comparison matrix
-  - Strategic positioning report
-
-changelog:
-  - version: "1.0.0"
-    changes: "Initial job creation"
-
-steps:
-  - id: identify_competitors
-    name: "Identify Competitors"
-    description: "Identify 5-7 key competitors in the target market"
-    instructions_file: steps/identify_competitors.md
-    inputs:
-      - name: market_segment
-        description: "The market segment to analyze"
-      - name: product_category
-        description: "The product category"
-    outputs:
-      - competitive_research/competitors_list.md
-    dependencies: []
-
-  - id: research_competitors
-    name: "Research Competitors"
-    description: "Deep dive research on each identified competitor"
-    instructions_file: steps/research_competitors.md
-    inputs:
-      - file: competitive_research/competitors_list.md
-        from_step: identify_competitors
-    outputs:
-      - competitive_research/[competitor_name]/research.md
-    dependencies:
-      - identify_competitors
-
-  - id: positioning_report
-    name: "Positioning Report"
-    description: "Strategic positioning recommendations"
-    instructions_file: steps/positioning_report.md
-    inputs:
-      - file: competitive_research/[competitor_name]/research.md
-        from_step: research_competitors
-    outputs:
-      - file: competitive_research/positioning_report.md
-        doc_spec: .deepwork/doc_specs/positioning_report.md
-    dependencies:
-      - research_competitors
-```
diff --git a/src/deepwork/standard_jobs/deepwork_jobs/job.yml b/src/deepwork/standard_jobs/deepwork_jobs/job.yml
index 7e29765e..facf3ce7 100644
--- a/src/deepwork/standard_jobs/deepwork_jobs/job.yml
+++ b/src/deepwork/standard_jobs/deepwork_jobs/job.yml
@@ -58,12 +58,6 @@ changelog:
     changes: "Removed implementation_summary and learning_summary outputs; simplified step outputs"
   - version: "0.5.0"
     changes: "Standardized on 'ask structured questions' phrasing for user input; Updated quality criteria hooks to verify phrase usage; Added guidance in implement.md to use phrase in generated instructions"
-  - version: "0.6.0"
-    changes: "Added doc spec support; define.md now detects document-oriented workflows and guides doc spec creation; learn.md now identifies and applies doc spec-related improvements"
-  - version: "0.7.0"
-    changes: "Added job.yml doc spec; define step now outputs job.yml with doc_spec reference for quality validation"
-  - version: "0.8.0"
-    changes: "Added review_job_spec step between define and implement for doc spec-based quality validation using sub-agent review"
   - version: "0.9.0"
     changes: "Improved skill descriptions with third-person voice and 'Use when...' triggers for better discoverability"
 
@@ -76,8 +70,7 @@ steps:
       - name: job_purpose
         description: "What complex task or workflow are you trying to accomplish?"
     outputs:
-      - file: job.yml
-        doc_spec: .deepwork/doc_specs/job_spec.md
+      - job.yml
     dependencies: []
   - id: implement
     name: "Implement Job Steps"
@@ -158,8 +151,6 @@ steps:
       - "**Instructions Improved**: Were job instructions updated to address identified issues?"
       - "**Instructions Concise**: Are instructions free of redundancy and unnecessary verbosity?"
       - "**Shared Content Extracted**: Is lengthy/duplicated content extracted into referenced files?"
-      - "**doc spec Reviewed (if applicable)**: For jobs with doc spec outputs, were doc spec-related learnings identified?"
-      - "**doc spec Updated (if applicable)**: Were doc spec files updated with improved quality criteria or structure?"
       - "**Bespoke Learnings Captured**: Were run-specific learnings added to AGENTS.md?"
       - "**File References Used**: Do AGENTS.md entries reference other files where appropriate?"
       - "**Working Folder Correct**: Is AGENTS.md in the correct working folder for the job?"
diff --git a/src/deepwork/standard_jobs/deepwork_jobs/steps/define.md b/src/deepwork/standard_jobs/deepwork_jobs/steps/define.md
index 3e9a87da..3cd01848 100644
--- a/src/deepwork/standard_jobs/deepwork_jobs/steps/define.md
+++ b/src/deepwork/standard_jobs/deepwork_jobs/steps/define.md
@@ -31,60 +31,6 @@ Start by asking structured questions to understand what the user wants to accomp
    - What are the distinct stages from start to finish?
    - Are there any dependencies between phases?
 
-### Step 1.5: Detect Document-Oriented Workflows
-
-**Check for document-focused patterns** in the user's description:
-- Keywords: "report", "summary", "document", "create", "monthly", "quarterly", "for stakeholders", "for leadership"
-- Final deliverable is a specific document (e.g., "AWS spending report", "competitive analysis", "sprint summary")
-- Recurring documents with consistent structure
-
-**If a document-oriented workflow is detected:**
-
-1. Inform the user: "This workflow produces a specific document type. I recommend defining a doc spec first to ensure consistent quality."
-
-2. Ask structured questions to understand if they want to:
-   - Create a doc spec for this document
-   - Use an existing doc spec (if any exist in `.deepwork/doc_specs/`)
-   - Skip doc spec and proceed with simple outputs
-
-### Step 1.6: Define the Doc Spec (if needed)
-
-When creating a doc spec, gather the following information:
-
-1. **Document Identity**
-   - What is the document called? (e.g., "Monthly AWS Spending Report")
-   - Brief description of its purpose
-   - Where should these documents be stored? (path patterns like `finance/aws-reports/*.md`)
-
-2. **Audience and Context**
-   - Who reads this document? (target audience)
-   - How often is it produced? (frequency)
-
-3. **Quality Criteria** (3-5 criteria, each with name and description)
-
-   **Important**: Doc spec quality criteria define requirements for the **output document itself**, not the process of creating it. Focus on what the finished document must contain or achieve.
-
-   Examples for a spending report:
-   - **Visualization**: Must include charts showing spend breakdown by service
-   - **Variance Analysis**: Must compare current month against previous with percentages
-   - **Action Items**: Must include recommended cost optimization actions
-
-   **Note**: When a doc spec is created for a step's output, the step should generally NOT have separate `quality_criteria` in the job.yml. The doc spec's criteria cover output quality. Only add step-level quality_criteria if there are essential process requirements (e.g., "must use specific tool"), and minimize these when possible.
-
-4. **Document Structure**
-   - What sections should it have?
-   - Any required elements (tables, charts, summaries)?
-
-### Step 1.7: Create the doc spec File (if needed)
-
-Create the doc spec file at `.deepwork/doc_specs/[doc_spec_name].md`:
-
-**Template reference**: See `.deepwork/jobs/deepwork_jobs/templates/doc_spec.md.template` for the standard structure.
-
-**Complete example**: See `.deepwork/doc_specs/job_spec.md` for a fully worked example (the doc spec for job.yml files).
-
-After creating the doc spec, proceed to Step 2 with the doc spec reference for the final step's output.
-
 ### Step 2: Define Each Step
 
 For each major phase they mentioned, ask structured questions to gather details:
@@ -106,8 +52,6 @@ For each major phase they mentioned, ask structured questions to gather details:
    - Where should each output be saved? (filename/path)
    - Should outputs be organized in subdirectories? (e.g., `reports/`, `data/`, `drafts/`)
    - Will other steps need this output?
-   - **Does this output have a doc spec?** If a doc spec was created in Step 1.6/1.7, reference it for the appropriate output
-
    #### Work Product Storage Guidelines
 
    **Key principle**: Job outputs belong in the main repository directory structure, not in dot-directories. The `.deepwork/` directory is for job definitions and configuration only.
@@ -189,18 +133,6 @@ For each major phase they mentioned, ask structured questions to gather details:
 
 **Note**: You're gathering this information to understand what instructions will be needed, but you won't create the instruction files yet - that happens in the `implement` step.
 
-#### Doc Spec-Aware Output Format
-
-When a step produces a document with a doc spec reference, use this format in job.yml:
-
-```yaml
-outputs:
-  - file: reports/monthly_spending.md
-    doc_spec: .deepwork/doc_specs/monthly_aws_report.md
-```
-
-The doc spec's quality criteria will automatically be included in the generated skill, ensuring consistent document quality.
-
 ### Capability Considerations
 
 When defining steps, identify any that require specialized tools:
@@ -299,8 +231,6 @@ This creates:
 
 (Where `[job_name]` is the name of the NEW job you're creating, e.g., `competitive_research`)
 
-**Doc Spec**: See `.deepwork/doc_specs/job_spec.md` for the complete specification with quality criteria.
-
 **Template reference**: See `.deepwork/jobs/deepwork_jobs/templates/job.yml.template` for the standard structure.
 
 **Complete example**: See `.deepwork/jobs/deepwork_jobs/templates/job.yml.example` for a fully worked example.
diff --git a/src/deepwork/standard_jobs/deepwork_jobs/steps/learn.md b/src/deepwork/standard_jobs/deepwork_jobs/steps/learn.md
index a4a50c9a..f6d48c78 100644
--- a/src/deepwork/standard_jobs/deepwork_jobs/steps/learn.md
+++ b/src/deepwork/standard_jobs/deepwork_jobs/steps/learn.md
@@ -66,15 +66,6 @@ For each learning identified, determine if it is:
   - "Quality criteria should include checking for Y"
   - "Add example of correct output format"
 
-**doc spec-Related** (should improve doc spec files):
-- Improvements to document quality criteria
-- Changes to document structure or format
-- Updated audience or frequency information
-- Examples:
-  - "The report should include a summary table"
-  - "Quality criterion 'Visualization' needs clearer requirements"
-  - "Documents need a section for action items"
-
 **Bespoke** (should go in AGENTS.md):
 - Specific to THIS project/codebase/run
 - Depends on local conventions or structure
@@ -85,30 +76,6 @@ For each learning identified, determine if it is:
   - "This project uses camelCase for function names"
   - "The main config file is at `config/settings.yml`"
 
-### Step 3.5: Identify doc spec-Related Learnings
-
-Review the conversation for doc spec-related improvements:
-
-1. **Quality Criteria Changes**
-   - Were any quality criteria unclear or insufficient?
-   - Did the agent repeatedly fail certain criteria?
-   - Are there new criteria that should be added?
-
-2. **Document Structure Changes**
-   - Did the user request different sections?
-   - Were parts of the document format confusing?
-   - Should the example document be updated?
-
-3. **Metadata Updates**
-   - Has the target audience changed?
-   - Should frequency or path patterns be updated?
-
-**Signals for doc spec improvements:**
-- User asked for changes to document format
-- Repeated validation failures on specific criteria
-- Feedback about missing sections or information
-- Changes to how documents are organized/stored
-
 ### Step 4: Update Job Instructions (Generalizable Learnings)
 
 For each generalizable learning:
@@ -162,41 +129,6 @@ Review all instruction files for the job and identify content that:
    - Shorter instruction files - easier to read and maintain
    - Consistent guidance across steps
 
-### Step 4.5: Update doc spec Files (doc spec-Related Learnings)
-
-If doc spec-related learnings were identified:
-
-1. **Locate the doc spec file**
-   - Find doc spec references in job.yml outputs (look for `doc_spec: .deepwork/doc_specs/[doc_spec_name].md`)
-   - doc spec files are at `.deepwork/doc_specs/[doc_spec_name].md`
-
-2. **Update quality_criteria array**
-   - Add new criteria with name and description
-   - Modify existing criteria descriptions for clarity
-   - Remove criteria that are no longer relevant
-
-3. **Update example document**
-   - Modify the markdown body to reflect structure changes
-   - Ensure the example matches updated criteria
-
-4. **Update metadata as needed**
-   - target_audience: If audience has changed
-   - frequency: If production cadence has changed
-   - path_patterns: If storage location has changed
-
-**Example doc spec update:**
-```yaml
-# Before
-quality_criteria:
-  - name: Visualization
-    description: Include charts
-
-# After
-quality_criteria:
-  - name: Visualization
-    description: Include Mermaid.js charts showing spend breakdown by service and month-over-month trend
-```
-
 ### Step 5: Create/Update AGENTS.md (Bespoke Learnings)
 
 The AGENTS.md file captures project-specific knowledge that helps future agent runs.
diff --git a/src/deepwork/standard_jobs/deepwork_jobs/templates/doc_spec.md.template b/src/deepwork/standard_jobs/deepwork_jobs/templates/doc_spec.md.template
deleted file mode 100644
index d183344f..00000000
--- a/src/deepwork/standard_jobs/deepwork_jobs/templates/doc_spec.md.template
+++ /dev/null
@@ -1,26 +0,0 @@
----
-name: "[Document Name]"
-description: "[Brief description of the document's purpose]"
-path_patterns:
-  - "[path/to/documents/*.md]"
-target_audience: "[Who reads this document]"
-frequency: "[How often produced, e.g., Monthly, Per sprint, On demand]"
-quality_criteria:
-  - name: "[Criterion Name]"
-    description: "[What this criterion requires - be specific]"
-  - name: "[Criterion Name]"
-    description: "[What this criterion requires - be specific]"
-  - name: "[Criterion Name]"
-    description: "[What this criterion requires - be specific]"
----
-
-# [Document Title]: [Variables like Month, Year, Sprint]
-
-## Section 1
-[Describe what goes in this section]
-
-## Section 2
-[Describe what goes in this section]
-
-## Section 3
-[Describe what goes in this section]

From 84710481e25bbf9b4b32223a56a25ac6d93ff4d6 Mon Sep 17 00:00:00 2001
From: Noah Horton <noah@unsupervised.com>
Date: Thu, 5 Feb 2026 15:59:30 -0700
Subject: [PATCH 30/45] make mcp tolerant to name errors in workflow name

---
 CONTRIBUTING.md                               | 27 ++++++++
 doc/mcp_interface.md                          |  2 +-
 src/deepwork/mcp/tools.py                     | 13 +++-
 .../deepwork_jobs/make_new_job.sh             | 31 +++++----
 tests/unit/mcp/test_tools.py                  | 65 ++++++++++++++++++-
 5 files changed, 115 insertions(+), 23 deletions(-)

diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 0bdc3f33..c39359d0 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -7,6 +7,7 @@ Thank you for your interest in contributing to DeepWork! This guide will help yo
 - [Prerequisites](#prerequisites)
 - [Development Setup](#development-setup)
 - [Installing DeepWork Locally](#installing-deepwork-locally)
+- [Installing Pre-Release Versions](#installing-pre-release-versions)
 - [Testing Your Local Installation](#testing-your-local-installation)
 - [Running Tests](#running-tests)
 - [Code Quality](#code-quality)
@@ -240,6 +241,32 @@ which deepwork  # Should point to .venv/bin/deepwork
 deepwork --version
 ```
 
+## Installing Pre-Release Versions
+
+DeepWork uses pre-release versions (e.g., `0.7.0a1`) during development. By default, `uv` and `pip` skip pre-release versions, so you need to opt in explicitly.
+
+### With uv
+
+```bash
+# Install the latest pre-release from PyPI
+uv pip install --prerelease=allow deepwork
+
+# Or pin to a specific pre-release
+uv pip install --prerelease=allow "deepwork==0.7.0a1"
+```
+
+### With pip
+
+```bash
+pip install --pre deepwork
+```
+
+### With uv tool install (global CLI)
+
+```bash
+uv tool install --prerelease=allow deepwork
+```
+
 ## Testing Your Local Installation
 
 To test your local DeepWork installation in a real project:
diff --git a/doc/mcp_interface.md b/doc/mcp_interface.md
index 6b618fc1..967f470b 100644
--- a/doc/mcp_interface.md
+++ b/doc/mcp_interface.md
@@ -65,7 +65,7 @@ Start a new workflow session. Creates a git branch, initializes state tracking,
 |-----------|------|----------|-------------|
 | `goal` | `string` | Yes | What the user wants to accomplish |
 | `job_name` | `string` | Yes | Name of the job |
-| `workflow_name` | `string` | Yes | Name of the workflow within the job |
+| `workflow_name` | `string` | Yes | Name of the workflow within the job. If the name doesn't match but the job has only one workflow, that workflow is selected automatically. If the job has multiple workflows, an error is returned listing the available workflow names. |
 | `instance_id` | `string \| null` | No | Optional identifier for naming (e.g., 'acme', 'q1-2026') |
 
 #### Returns
diff --git a/src/deepwork/mcp/tools.py b/src/deepwork/mcp/tools.py
index 0a7275a0..a11ea67f 100644
--- a/src/deepwork/mcp/tools.py
+++ b/src/deepwork/mcp/tools.py
@@ -132,6 +132,9 @@ def _get_job(self, job_name: str) -> JobDefinition:
     def _get_workflow(self, job: JobDefinition, workflow_name: str) -> Workflow:
         """Get a specific workflow from a job.
 
+        If the workflow name doesn't match any workflow but the job has exactly
+        one workflow, that workflow is returned automatically.
+
         Args:
             job: Job definition
             workflow_name: Workflow name to find
@@ -140,12 +143,16 @@ def _get_workflow(self, job: JobDefinition, workflow_name: str) -> Workflow:
             Workflow
 
         Raises:
-            ToolError: If workflow not found
+            ToolError: If workflow not found and job has multiple workflows
         """
         for wf in job.workflows:
             if wf.name == workflow_name:
                 return wf
 
+        # Auto-select if there's only one workflow
+        if len(job.workflows) == 1:
+            return job.workflows[0]
+
         available = [wf.name for wf in job.workflows]
         raise ToolError(
             f"Workflow '{workflow_name}' not found in job '{job.name}'. "
@@ -214,10 +221,10 @@ async def start_workflow(self, input_data: StartWorkflowInput) -> StartWorkflowR
         if first_step is None:
             raise ToolError(f"First step not found: {first_step_id}")
 
-        # Create session
+        # Create session (use resolved workflow name in case it was auto-selected)
         session = await self.state_manager.create_session(
             job_name=input_data.job_name,
-            workflow_name=input_data.workflow_name,
+            workflow_name=workflow.name,
             goal=input_data.goal,
             first_step_id=first_step_id,
             instance_id=input_data.instance_id,
diff --git a/src/deepwork/standard_jobs/deepwork_jobs/make_new_job.sh b/src/deepwork/standard_jobs/deepwork_jobs/make_new_job.sh
index c561d6d2..c87f40e8 100755
--- a/src/deepwork/standard_jobs/deepwork_jobs/make_new_job.sh
+++ b/src/deepwork/standard_jobs/deepwork_jobs/make_new_job.sh
@@ -78,43 +78,47 @@ main() {
     mkdir -p "$job_path/steps"
     mkdir -p "$job_path/hooks"
     mkdir -p "$job_path/templates"
+    mkdir -p "$job_path/scripts"
 
     # Add .gitkeep files to empty directories
     touch "$job_path/hooks/.gitkeep"
     touch "$job_path/templates/.gitkeep"
+    touch "$job_path/scripts/.gitkeep"
 
     # Create AGENTS.md file
     cat > "$job_path/AGENTS.md" << 'EOF'
 # Job Management
 
-This folder and its subfolders are managed using the `deepwork_jobs` slash commands.
+This folder and its subfolders are managed using `deepwork_jobs` workflows.
 
-## Recommended Commands
+## Recommended Workflows
 
-- `/deepwork_jobs.define` - Create or modify the job.yml specification
-- `/deepwork_jobs.implement` - Generate step instruction files from the specification
-- `/deepwork_jobs.learn` - Improve instructions based on execution learnings
+- `deepwork_jobs/new_job` - Full lifecycle: define → implement → test → iterate
+- `deepwork_jobs/learn` - Improve instructions based on execution learnings
+- `deepwork_jobs/repair` - Clean up and migrate from prior DeepWork versions
 
 ## Directory Structure
 
 ```
 .
 ├── AGENTS.md          # This file - project context and guidance
-├── job.yml            # Job specification (created by /deepwork_jobs.define)
-├── steps/             # Step instruction files (created by /deepwork_jobs.implement)
+├── job.yml            # Job specification (created by define step)
+├── steps/             # Step instruction files (created by implement step)
 │   └── *.md           # One file per step
 ├── hooks/             # Custom validation scripts and prompts
 │   └── *.md|*.sh      # Hook files referenced in job.yml
+├── scripts/           # Reusable scripts and utilities created during job execution
+│   └── *.sh|*.py      # Helper scripts referenced in step instructions
 └── templates/         # Example file formats and templates
     └── *.md|*.yml     # Templates referenced in step instructions
 ```
 
 ## Editing Guidelines
 
-1. **Use slash commands** for structural changes (adding steps, modifying job.yml)
+1. **Use workflows** for structural changes (adding steps, modifying job.yml)
 2. **Direct edits** are fine for minor instruction tweaks
-3. **Run `/deepwork_jobs.learn`** after executing job steps to capture improvements
-4. **Run `deepwork sync`** after any changes to regenerate commands
+3. **Run `deepwork_jobs/learn`** after executing job steps to capture improvements
+4. **Run `deepwork install`** after any changes to regenerate commands
 EOF
 
     info "Created directory structure:"
@@ -122,13 +126,8 @@ EOF
     echo "  ├── AGENTS.md"
     echo "  ├── steps/"
     echo "  ├── hooks/.gitkeep"
+    echo "  ├── scripts/.gitkeep"
     echo "  └── templates/.gitkeep"
-
-    echo ""
-    info "Next steps:"
-    echo "  1. Run '/deepwork_jobs.define' to create the job.yml specification"
-    echo "  2. Run '/deepwork_jobs.implement' to generate step instructions"
-    echo "  3. Run 'deepwork sync' to create slash commands"
 }
 
 main "$@"
diff --git a/tests/unit/mcp/test_tools.py b/tests/unit/mcp/test_tools.py
index be0a69f3..24ef639e 100644
--- a/tests/unit/mcp/test_tools.py
+++ b/tests/unit/mcp/test_tools.py
@@ -159,15 +159,74 @@ async def test_start_workflow_invalid_job(self, tools: WorkflowTools) -> None:
         with pytest.raises(ToolError, match="Job not found"):
             await tools.start_workflow(input_data)
 
-    async def test_start_workflow_invalid_workflow(self, tools: WorkflowTools) -> None:
-        """Test starting workflow with invalid workflow name."""
+    async def test_start_workflow_auto_selects_single_workflow(
+        self, tools: WorkflowTools
+    ) -> None:
+        """Test that a wrong workflow name auto-selects when job has one workflow."""
         input_data = StartWorkflowInput(
             goal="Complete task",
             job_name="test_job",
             workflow_name="nonexistent",
         )
 
-        with pytest.raises(ToolError, match="Workflow.*not found"):
+        # Should succeed by auto-selecting the only workflow ("main")
+        response = await tools.start_workflow(input_data)
+        assert response.begin_step.step_id == "step1"
+
+    async def test_start_workflow_invalid_workflow_multiple(
+        self, project_root: Path, state_manager: StateManager
+    ) -> None:
+        """Test that a wrong workflow name errors when job has multiple workflows."""
+        # Create a job with two workflows
+        job_dir = project_root / ".deepwork" / "jobs" / "multi_wf_job"
+        job_dir.mkdir()
+        (job_dir / "job.yml").write_text(
+            """
+name: multi_wf_job
+version: "1.0.0"
+summary: A job with multiple workflows
+description: Test job with multiple workflows
+
+steps:
+  - id: step_a
+    name: Step A
+    description: Step A
+    instructions_file: steps/step_a.md
+    outputs:
+      - output_a.md
+  - id: step_b
+    name: Step B
+    description: Step B
+    instructions_file: steps/step_b.md
+    outputs:
+      - output_b.md
+
+workflows:
+  - name: alpha
+    summary: Alpha workflow
+    steps:
+      - step_a
+  - name: beta
+    summary: Beta workflow
+    steps:
+      - step_b
+"""
+        )
+        steps_dir = job_dir / "steps"
+        steps_dir.mkdir()
+        (steps_dir / "step_a.md").write_text("# Step A")
+        (steps_dir / "step_b.md").write_text("# Step B")
+
+        tools = WorkflowTools(
+            project_root=project_root, state_manager=state_manager
+        )
+        input_data = StartWorkflowInput(
+            goal="Complete task",
+            job_name="multi_wf_job",
+            workflow_name="nonexistent",
+        )
+
+        with pytest.raises(ToolError, match="Workflow.*not found.*alpha.*beta"):
             await tools.start_workflow(input_data)
 
     async def test_finished_step_no_session(self, tools: WorkflowTools) -> None:

From 2b8e85f814ba3f80cc5ee3840aa1977130caf7a1 Mon Sep 17 00:00:00 2001
From: Noah Horton <noah@unsupervised.com>
Date: Thu, 5 Feb 2026 16:09:43 -0700
Subject: [PATCH 31/45] Bump version to 0.7.0a2

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 pyproject.toml           | 2 +-
 src/deepwork/__init__.py | 2 +-
 uv.lock                  | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index 352bd845..6ca12327 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "deepwork"
-version = "0.7.0a1"
+version = "0.7.0a2"
 description = "Framework for enabling AI agents to perform complex, multi-step work tasks"
 readme = "README.md"
 requires-python = ">=3.11"
diff --git a/src/deepwork/__init__.py b/src/deepwork/__init__.py
index ce548d43..722898e8 100644
--- a/src/deepwork/__init__.py
+++ b/src/deepwork/__init__.py
@@ -1,6 +1,6 @@
 """DeepWork - Framework for enabling AI agents to perform complex, multi-step work tasks."""
 
-__version__ = "0.7.0a1"
+__version__ = "0.7.0a2"
 __author__ = "DeepWork Contributors"
 
 __all__ = [
diff --git a/uv.lock b/uv.lock
index d1755b0c..0282e238 100644
--- a/uv.lock
+++ b/uv.lock
@@ -453,7 +453,7 @@ wheels = [
 
 [[package]]
 name = "deepwork"
-version = "0.7.0"
+version = "0.7.0a2"
 source = { editable = "." }
 dependencies = [
     { name = "aiofiles" },

From 089438e6fae67f4929ef7efa2482c88805115538 Mon Sep 17 00:00:00 2001
From: Noah Horton <noah@unsupervised.com>
Date: Thu, 5 Feb 2026 16:28:19 -0700
Subject: [PATCH 32/45] refactor the quality gate

---
 flake.lock                                    |   6 +-
 src/deepwork/mcp/claude_cli.py                | 177 ++++++++
 src/deepwork/mcp/quality_gate.py              | 127 +-----
 src/deepwork/mcp/server.py                    |   6 +-
 .../test_quality_gate_integration.py          |   5 +-
 tests/unit/mcp/test_async_interface.py        |   9 +
 tests/unit/mcp/test_claude_cli.py             | 362 ++++++++++++++++
 tests/unit/mcp/test_quality_gate.py           | 392 ++++--------------
 8 files changed, 670 insertions(+), 414 deletions(-)
 create mode 100644 src/deepwork/mcp/claude_cli.py
 create mode 100644 tests/unit/mcp/test_claude_cli.py

diff --git a/flake.lock b/flake.lock
index 9a416dfc..1cf9a673 100644
--- a/flake.lock
+++ b/flake.lock
@@ -149,11 +149,11 @@
         ]
       },
       "locked": {
-        "lastModified": 1769957392,
-        "narHash": "sha256-6PkqwwYf5K2CHi2V+faI/9pqjfz/HxUkI/MVid6hlOY=",
+        "lastModified": 1770331927,
+        "narHash": "sha256-jlOvO++uvne/lTgWqdI4VhTV5OpVWi70ZDVBlT6vGSs=",
         "owner": "pyproject-nix",
         "repo": "uv2nix",
-        "rev": "d18bc50ae1c3d4be9c41c2d94ea765524400af75",
+        "rev": "5b43a934e15b23bfba6c408cba1c570eccf80080",
         "type": "github"
       },
       "original": {
diff --git a/src/deepwork/mcp/claude_cli.py b/src/deepwork/mcp/claude_cli.py
new file mode 100644
index 00000000..55d5d118
--- /dev/null
+++ b/src/deepwork/mcp/claude_cli.py
@@ -0,0 +1,177 @@
+"""Claude Code CLI subprocess wrapper.
+
+Runs Claude Code CLI as a subprocess with structured JSON output.
+Always uses --json-schema for structured output conformance.
+
+See doc/reference/calling_claude_in_print_mode.md for details on
+proper CLI invocation with structured output.
+"""
+
+from __future__ import annotations
+
+import asyncio
+import json
+from pathlib import Path
+from typing import Any
+
+
+class ClaudeCLIError(Exception):
+    """Exception raised for Claude CLI subprocess errors."""
+
+    pass
+
+
+class ClaudeCLI:
+    """Runs Claude Code CLI as a subprocess with structured JSON output.
+
+    Always requires a JSON schema - the structured output is returned
+    as a parsed dict from the CLI's `structured_output` field.
+
+    See doc/reference/calling_claude_in_print_mode.md for details on
+    proper CLI invocation with structured output.
+    """
+
+    def __init__(
+        self,
+        timeout: int = 120,
+        *,
+        _test_command: list[str] | None = None,
+    ):
+        """Initialize Claude CLI wrapper.
+
+        Args:
+            timeout: Timeout in seconds for the subprocess
+            _test_command: Internal testing only - override the subprocess command.
+                          When set, skips adding --json-schema flag (test mock handles it).
+        """
+        self.timeout = timeout
+        self._test_command = _test_command
+
+    def _build_command(
+        self,
+        system_prompt: str,
+        json_schema: dict[str, Any],
+    ) -> list[str]:
+        """Build the CLI command with proper flag ordering.
+
+        Flags must come BEFORE `-p --` because:
+        - `-p` expects a prompt argument immediately after
+        - `--` marks the end of flags, everything after is the prompt
+        - When piping via stdin, we use `-p --` to read from stdin
+
+        Args:
+            system_prompt: System prompt for the CLI
+            json_schema: JSON schema for structured output
+
+        Returns:
+            Command list ready for subprocess execution
+        """
+        if self._test_command:
+            return self._test_command + ["--system-prompt", system_prompt]
+
+        schema_json = json.dumps(json_schema)
+        return [
+            "claude",
+            "--print",
+            "--output-format",
+            "json",
+            "--system-prompt",
+            system_prompt,
+            "--json-schema",
+            schema_json,
+            "-p",
+            "--",
+        ]
+
+    def _parse_wrapper(self, response_text: str) -> dict[str, Any]:
+        """Parse the Claude CLI JSON wrapper and extract structured_output.
+
+        When using --print --output-format json --json-schema, Claude CLI returns
+        a wrapper object with the structured output in the 'structured_output' field.
+
+        Args:
+            response_text: Raw JSON response from Claude CLI
+
+        Returns:
+            The parsed structured_output dict
+
+        Raises:
+            ClaudeCLIError: If response cannot be parsed
+        """
+        try:
+            wrapper = json.loads(response_text.strip())
+
+            if wrapper.get("is_error"):
+                raise ClaudeCLIError(
+                    f"Claude CLI returned error: {wrapper.get('result', 'Unknown error')}"
+                )
+
+            data = wrapper.get("structured_output")
+            if data is None:
+                raise ClaudeCLIError(
+                    "Claude CLI response missing 'structured_output' field. "
+                    f"Response was: {response_text[:500]}..."
+                )
+
+            return data
+
+        except json.JSONDecodeError as e:
+            raise ClaudeCLIError(
+                f"Failed to parse Claude CLI response as JSON: {e}\n"
+                f"Response was: {response_text[:500]}..."
+            ) from e
+
+    async def run(
+        self,
+        prompt: str,
+        system_prompt: str,
+        json_schema: dict[str, Any],
+        cwd: Path | None = None,
+    ) -> dict[str, Any]:
+        """Run Claude CLI and return the structured output.
+
+        Args:
+            prompt: The user prompt (piped via stdin)
+            system_prompt: System instructions for the CLI
+            json_schema: JSON schema enforcing structured output conformance
+            cwd: Working directory for the subprocess
+
+        Returns:
+            The parsed structured_output dict from Claude CLI
+
+        Raises:
+            ClaudeCLIError: If the subprocess fails or output cannot be parsed
+        """
+        cmd = self._build_command(system_prompt, json_schema)
+
+        try:
+            process = await asyncio.create_subprocess_exec(
+                *cmd,
+                stdin=asyncio.subprocess.PIPE,
+                stdout=asyncio.subprocess.PIPE,
+                stderr=asyncio.subprocess.PIPE,
+                cwd=str(cwd) if cwd else None,
+            )
+
+            try:
+                stdout, stderr = await asyncio.wait_for(
+                    process.communicate(input=prompt.encode()),
+                    timeout=self.timeout,
+                )
+            except TimeoutError:
+                process.kill()
+                await process.wait()
+                raise ClaudeCLIError(
+                    f"Claude CLI timed out after {self.timeout} seconds"
+                ) from None
+
+            if process.returncode != 0:
+                raise ClaudeCLIError(
+                    f"Claude CLI failed with exit code {process.returncode}:\n"
+                    f"stderr: {stderr.decode()}"
+                )
+
+            return self._parse_wrapper(stdout.decode())
+
+        except FileNotFoundError as e:
+            raise ClaudeCLIError("Claude CLI command not found: claude") from e
diff --git a/src/deepwork/mcp/quality_gate.py b/src/deepwork/mcp/quality_gate.py
index 9e3f9b90..4096c0c0 100644
--- a/src/deepwork/mcp/quality_gate.py
+++ b/src/deepwork/mcp/quality_gate.py
@@ -1,18 +1,17 @@
 """Quality gate for evaluating step outputs.
 
-The quality gate invokes a review agent (via subprocess) to evaluate
+The quality gate invokes a review agent (via ClaudeCLI) to evaluate
 step outputs against quality criteria.
 """
 
 from __future__ import annotations
 
-import asyncio
-import json
 from pathlib import Path
 from typing import Any
 
 import aiofiles
 
+from deepwork.mcp.claude_cli import ClaudeCLI
 from deepwork.mcp.schemas import QualityCriteriaResult, QualityGateResult
 
 # JSON Schema for quality gate response validation
@@ -50,28 +49,17 @@ class QualityGateError(Exception):
 class QualityGate:
     """Evaluates step outputs against quality criteria.
 
-    Uses a subprocess to invoke a review agent (e.g., Claude CLI) that
-    evaluates outputs and returns structured feedback.
-
-    See doc/reference/calling_claude_in_print_mode.md for details on
-    proper CLI invocation with structured output.
+    Uses ClaudeCLI to invoke a review agent that evaluates outputs
+    and returns structured feedback.
     """
 
-    def __init__(
-        self,
-        timeout: int = 120,
-        *,
-        _test_command: list[str] | None = None,
-    ):
+    def __init__(self, cli: ClaudeCLI | None = None):
         """Initialize quality gate.
 
         Args:
-            timeout: Timeout in seconds for review agent
-            _test_command: Internal testing only - override the subprocess command.
-                          When set, skips adding --json-schema flag (test mock handles it).
+            cli: ClaudeCLI instance. If not provided, a default one is created.
         """
-        self.timeout = timeout
-        self._test_command = _test_command
+        self._cli = cli or ClaudeCLI()
 
     def _build_instructions(self, quality_criteria: list[str]) -> str:
         """Build the system instructions for the review agent.
@@ -149,39 +137,19 @@ async def _build_payload(
 
         return "\n\n".join(output_sections)
 
-    def _parse_response(self, response_text: str) -> QualityGateResult:
-        """Parse the review agent's response.
-
-        When using --print --output-format json --json-schema, Claude CLI returns
-        a wrapper object with the structured output in the 'structured_output' field.
+    def _parse_result(self, data: dict[str, Any]) -> QualityGateResult:
+        """Parse the structured output into a QualityGateResult.
 
         Args:
-            response_text: Raw response from review agent (JSON wrapper)
+            data: The structured_output dict from ClaudeCLI
 
         Returns:
             Parsed QualityGateResult
 
         Raises:
-            QualityGateError: If response cannot be parsed
+            QualityGateError: If data cannot be interpreted
         """
         try:
-            wrapper = json.loads(response_text.strip())
-
-            # Check for errors in the wrapper
-            if wrapper.get("is_error"):
-                raise QualityGateError(
-                    f"Review agent returned error: {wrapper.get('result', 'Unknown error')}"
-                )
-
-            # Extract structured_output - this is where --json-schema puts the result
-            data = wrapper.get("structured_output")
-            if data is None:
-                raise QualityGateError(
-                    "Review agent response missing 'structured_output' field. "
-                    f"Response was: {response_text[:500]}..."
-                )
-
-            # Parse criteria results
             criteria_results = [
                 QualityCriteriaResult(
                     criterion=cr.get("criterion", ""),
@@ -197,15 +165,10 @@ def _parse_response(self, response_text: str) -> QualityGateResult:
                 criteria_results=criteria_results,
             )
 
-        except json.JSONDecodeError as e:
-            raise QualityGateError(
-                f"Failed to parse review agent response as JSON: {e}\n"
-                f"Response was: {response_text[:500]}..."
-            ) from e
         except (ValueError, KeyError) as e:
             raise QualityGateError(
-                f"Failed to extract quality gate result: {e}\n"
-                f"Response was: {response_text[:500]}..."
+                f"Failed to interpret quality gate result: {e}\n"
+                f"Data was: {data}"
             ) from e
 
     async def evaluate(
@@ -235,68 +198,22 @@ async def evaluate(
                 criteria_results=[],
             )
 
-        # Build system instructions and payload separately
         instructions = self._build_instructions(quality_criteria)
         payload = await self._build_payload(outputs, project_root)
 
-        # Build command with proper flag ordering for Claude CLI
-        # See doc/reference/calling_claude_in_print_mode.md for details
-        #
-        # Key insight: flags must come BEFORE `-p --` because:
-        # - `-p` expects a prompt argument immediately after
-        # - `--` marks the end of flags, everything after is the prompt
-        # - When piping via stdin, we use `-p --` to read from stdin
-        if self._test_command:
-            # Testing mode: use provided command, add system prompt only
-            full_cmd = self._test_command + ["--system-prompt", instructions]
-        else:
-            # Production mode: use Claude CLI with proper flags
-            schema_json = json.dumps(QUALITY_GATE_RESPONSE_SCHEMA)
-            full_cmd = [
-                "claude",
-                "--print",  # Non-interactive mode
-                "--output-format",
-                "json",  # JSON output wrapper
-                "--system-prompt",
-                instructions,
-                "--json-schema",
-                schema_json,  # Structured output - result in 'structured_output' field
-                "-p",
-                "--",  # Read prompt from stdin
-            ]
+        from deepwork.mcp.claude_cli import ClaudeCLIError
 
         try:
-            # Run review agent with payload piped via stdin
-            process = await asyncio.create_subprocess_exec(
-                *full_cmd,
-                stdin=asyncio.subprocess.PIPE,
-                stdout=asyncio.subprocess.PIPE,
-                stderr=asyncio.subprocess.PIPE,
-                cwd=str(project_root),
+            data = await self._cli.run(
+                prompt=payload,
+                system_prompt=instructions,
+                json_schema=QUALITY_GATE_RESPONSE_SCHEMA,
+                cwd=project_root,
             )
+        except ClaudeCLIError as e:
+            raise QualityGateError(str(e)) from e
 
-            try:
-                stdout, stderr = await asyncio.wait_for(
-                    process.communicate(input=payload.encode()),
-                    timeout=self.timeout,
-                )
-            except TimeoutError:
-                process.kill()
-                await process.wait()
-                raise QualityGateError(
-                    f"Review agent timed out after {self.timeout} seconds"
-                ) from None
-
-            if process.returncode != 0:
-                raise QualityGateError(
-                    f"Review agent failed with exit code {process.returncode}:\n"
-                    f"stderr: {stderr.decode()}"
-                )
-
-            return self._parse_response(stdout.decode())
-
-        except FileNotFoundError as e:
-            raise QualityGateError("Review agent command not found: claude") from e
+        return self._parse_result(data)
 
 
 class MockQualityGate(QualityGate):
diff --git a/src/deepwork/mcp/server.py b/src/deepwork/mcp/server.py
index 03ea936b..2b31a139 100644
--- a/src/deepwork/mcp/server.py
+++ b/src/deepwork/mcp/server.py
@@ -19,6 +19,7 @@
 
 from fastmcp import FastMCP
 
+from deepwork.mcp.claude_cli import ClaudeCLI
 from deepwork.mcp.quality_gate import QualityGate
 from deepwork.mcp.schemas import (
     AbortWorkflowInput,
@@ -56,9 +57,8 @@ def create_server(
 
     quality_gate: QualityGate | None = None
     if enable_quality_gate:
-        quality_gate = QualityGate(
-            timeout=quality_gate_timeout,
-        )
+        cli = ClaudeCLI(timeout=quality_gate_timeout)
+        quality_gate = QualityGate(cli=cli)
 
     tools = WorkflowTools(
         project_root=project_path,
diff --git a/tests/integration/test_quality_gate_integration.py b/tests/integration/test_quality_gate_integration.py
index 37a3ad8d..24b12d20 100644
--- a/tests/integration/test_quality_gate_integration.py
+++ b/tests/integration/test_quality_gate_integration.py
@@ -28,6 +28,7 @@
 
 import pytest
 
+from deepwork.mcp.claude_cli import ClaudeCLI
 from deepwork.mcp.quality_gate import QualityGate
 
 # Skip marker for tests that require real Claude CLI
@@ -82,7 +83,7 @@ async def test_real_claude_evaluates_passing_criteria(self, project_root: Path)
         )
 
         # ⚠️  NO _test_command - this uses the REAL Claude CLI
-        gate = QualityGate(timeout=120)
+        gate = QualityGate(cli=ClaudeCLI(timeout=120))
 
         result = await gate.evaluate(
             quality_criteria=[
@@ -116,7 +117,7 @@ async def test_real_claude_evaluates_failing_criteria(self, project_root: Path)
         output_file.write_text("Just some random text without any structure.")
 
         # ⚠️  NO _test_command - this uses the REAL Claude CLI
-        gate = QualityGate(timeout=120)
+        gate = QualityGate(cli=ClaudeCLI(timeout=120))
 
         result = await gate.evaluate(
             quality_criteria=[
diff --git a/tests/unit/mcp/test_async_interface.py b/tests/unit/mcp/test_async_interface.py
index eae89ce7..766410d3 100644
--- a/tests/unit/mcp/test_async_interface.py
+++ b/tests/unit/mcp/test_async_interface.py
@@ -9,6 +9,7 @@
 import inspect
 from pathlib import Path
 
+from deepwork.mcp.claude_cli import ClaudeCLI
 from deepwork.mcp.quality_gate import MockQualityGate, QualityGate
 from deepwork.mcp.state import StateManager
 from deepwork.mcp.tools import WorkflowTools
@@ -73,6 +74,14 @@ def test_workflow_tools_async_methods(self) -> None:
                 f"This is required for non-blocking MCP tool execution."
             )
 
+    def test_claude_cli_async_methods(self) -> None:
+        """Verify ClaudeCLI methods that must be async remain async."""
+        method = getattr(ClaudeCLI, "run")
+        assert inspect.iscoroutinefunction(method), (
+            "ClaudeCLI.run must be async (coroutine function). "
+            "This is required for non-blocking subprocess execution."
+        )
+
     def test_quality_gate_async_methods(self) -> None:
         """Verify QualityGate methods that must be async remain async."""
         async_methods = [
diff --git a/tests/unit/mcp/test_claude_cli.py b/tests/unit/mcp/test_claude_cli.py
new file mode 100644
index 00000000..f8a8a1b9
--- /dev/null
+++ b/tests/unit/mcp/test_claude_cli.py
@@ -0,0 +1,362 @@
+"""Tests for Claude CLI subprocess wrapper."""
+
+import json
+from collections.abc import Callable, Generator
+from contextlib import contextmanager
+from pathlib import Path
+from typing import Any
+from unittest.mock import MagicMock, patch
+
+import pytest
+
+from deepwork.mcp.claude_cli import ClaudeCLI, ClaudeCLIError
+
+
+def create_mock_subprocess(
+    response: dict[str, Any] | None = None,
+    returncode: int = 0,
+) -> tuple[list[str], Callable[..., MagicMock]]:
+    """Create a mock subprocess executor that captures commands.
+
+    Args:
+        response: The structured_output to return in the CLI wrapper.
+                  Defaults to an empty passing response.
+        returncode: The return code for the process.
+
+    Returns:
+        A tuple of (captured_cmd list, mock_create_subprocess_exec function).
+    """
+    if response is None:
+        response = {"result": "ok"}
+
+    captured_cmd: list[str] = []
+
+    async def mock_create_subprocess_exec(*cmd: str, **kwargs: Any) -> MagicMock:  # noqa: ARG001
+        captured_cmd.extend(cmd)
+        mock_process = MagicMock()
+        mock_process.returncode = returncode
+
+        async def mock_communicate(input: bytes = b"") -> tuple[bytes, bytes]:  # noqa: ARG001
+            wrapper = {
+                "type": "result",
+                "subtype": "success",
+                "is_error": False,
+                "structured_output": response,
+            }
+            return json.dumps(wrapper).encode(), b""
+
+        mock_process.communicate = mock_communicate
+        return mock_process
+
+    return captured_cmd, mock_create_subprocess_exec
+
+
+@contextmanager
+def patched_subprocess(
+    response: dict[str, Any] | None = None,
+    returncode: int = 0,
+) -> Generator[list[str], None, None]:
+    """Context manager that patches subprocess and yields captured command.
+
+    Args:
+        response: The structured_output to return. Defaults to a simple response.
+        returncode: The return code for the process.
+
+    Yields:
+        The list of captured command arguments.
+    """
+    captured_cmd, mock_subprocess = create_mock_subprocess(response, returncode)
+    with patch("asyncio.create_subprocess_exec", mock_subprocess):
+        yield captured_cmd
+
+
+TEST_SCHEMA: dict[str, Any] = {
+    "type": "object",
+    "required": ["value"],
+    "properties": {"value": {"type": "string"}},
+}
+
+
+class TestClaudeCLI:
+    """Tests for ClaudeCLI class."""
+
+    def test_init(self) -> None:
+        """Test ClaudeCLI initialization."""
+        cli = ClaudeCLI(timeout=60)
+        assert cli.timeout == 60
+
+    def test_init_defaults(self) -> None:
+        """Test ClaudeCLI default values."""
+        cli = ClaudeCLI()
+        assert cli.timeout == 120
+
+    async def test_run_returns_structured_output(self, tmp_path: Path) -> None:
+        """Test that run() returns the structured_output dict."""
+        cli = ClaudeCLI(timeout=10)
+        expected = {"value": "hello"}
+
+        with patched_subprocess(response=expected):
+            result = await cli.run(
+                prompt="test prompt",
+                system_prompt="test system",
+                json_schema=TEST_SCHEMA,
+                cwd=tmp_path,
+            )
+
+        assert result == expected
+
+    async def test_run_pipes_prompt_via_stdin(self, tmp_path: Path) -> None:
+        """Test that the prompt is piped via stdin."""
+        cli = ClaudeCLI(timeout=10)
+        captured_input: list[bytes] = []
+
+        async def mock_exec(*cmd: str, **kwargs: Any) -> MagicMock:  # noqa: ARG001
+            mock = MagicMock()
+            mock.returncode = 0
+
+            async def mock_communicate(input: bytes = b"") -> tuple[bytes, bytes]:
+                captured_input.append(input)
+                wrapper = {
+                    "type": "result",
+                    "subtype": "success",
+                    "is_error": False,
+                    "structured_output": {"value": "ok"},
+                }
+                return json.dumps(wrapper).encode(), b""
+
+            mock.communicate = mock_communicate
+            return mock
+
+        with patch("asyncio.create_subprocess_exec", mock_exec):
+            await cli.run(
+                prompt="my prompt text",
+                system_prompt="sys",
+                json_schema=TEST_SCHEMA,
+                cwd=tmp_path,
+            )
+
+        assert len(captured_input) == 1
+        assert captured_input[0] == b"my prompt text"
+
+
+class TestClaudeCLICommandConstruction:
+    """Tests for command construction."""
+
+    @staticmethod
+    def get_command_arg(captured_cmd: list[str], flag: str) -> str:
+        """Extract the argument value following a command flag."""
+        assert flag in captured_cmd, f"Expected {flag} in command, got: {captured_cmd}"
+        flag_index = captured_cmd.index(flag)
+        return captured_cmd[flag_index + 1]
+
+    async def test_command_includes_json_schema(self, tmp_path: Path) -> None:
+        """Test that the command includes --json-schema with the correct schema."""
+        cli = ClaudeCLI(timeout=10)
+
+        with patched_subprocess() as captured_cmd:
+            await cli.run(
+                prompt="test",
+                system_prompt="test",
+                json_schema=TEST_SCHEMA,
+                cwd=tmp_path,
+            )
+
+        schema_json = self.get_command_arg(captured_cmd, "--json-schema")
+        parsed_schema = json.loads(schema_json)
+        assert parsed_schema == TEST_SCHEMA
+
+    async def test_command_includes_system_prompt(self, tmp_path: Path) -> None:
+        """Test that the command includes --system-prompt."""
+        cli = ClaudeCLI(timeout=10)
+
+        with patched_subprocess() as captured_cmd:
+            await cli.run(
+                prompt="test",
+                system_prompt="You are a reviewer",
+                json_schema=TEST_SCHEMA,
+                cwd=tmp_path,
+            )
+
+        system_prompt = self.get_command_arg(captured_cmd, "--system-prompt")
+        assert system_prompt == "You are a reviewer"
+
+    async def test_command_has_correct_flag_ordering(self, tmp_path: Path) -> None:
+        """Test that flags come before -p -- for proper CLI invocation.
+
+        See doc/reference/calling_claude_in_print_mode.md for details on
+        why flag ordering matters.
+        """
+        cli = ClaudeCLI(timeout=10)
+
+        with patched_subprocess() as captured_cmd:
+            await cli.run(
+                prompt="test",
+                system_prompt="test",
+                json_schema=TEST_SCHEMA,
+                cwd=tmp_path,
+            )
+
+        assert captured_cmd[0] == "claude"
+        assert "--print" in captured_cmd
+        assert "--output-format" in captured_cmd
+        assert "-p" in captured_cmd
+        assert "--" in captured_cmd
+
+        # Verify -p -- comes last (after all other flags)
+        p_index = captured_cmd.index("-p")
+        dash_dash_index = captured_cmd.index("--")
+        json_schema_index = captured_cmd.index("--json-schema")
+        system_prompt_index = captured_cmd.index("--system-prompt")
+
+        assert json_schema_index < p_index, "Flags must come before -p"
+        assert system_prompt_index < p_index, "Flags must come before -p"
+        assert dash_dash_index == p_index + 1, "-- must immediately follow -p"
+
+    async def test_test_command_override(self, tmp_path: Path) -> None:
+        """Test that _test_command overrides the default command."""
+        cli = ClaudeCLI(timeout=10, _test_command=["echo", "test"])
+
+        with patched_subprocess() as captured_cmd:
+            await cli.run(
+                prompt="test",
+                system_prompt="sys prompt",
+                json_schema=TEST_SCHEMA,
+                cwd=tmp_path,
+            )
+
+        assert captured_cmd[0] == "echo"
+        assert captured_cmd[1] == "test"
+        assert "--system-prompt" in captured_cmd
+        assert "sys prompt" in captured_cmd
+        # _test_command should NOT include --json-schema
+        assert "--json-schema" not in captured_cmd
+
+
+class TestClaudeCLIWrapperParsing:
+    """Tests for Claude CLI response wrapper parsing."""
+
+    def test_parse_wrapper_valid(self) -> None:
+        """Test parsing a valid wrapper response."""
+        cli = ClaudeCLI()
+        response = json.dumps(
+            {
+                "type": "result",
+                "subtype": "success",
+                "is_error": False,
+                "structured_output": {"value": "hello"},
+            }
+        )
+
+        result = cli._parse_wrapper(response)
+        assert result == {"value": "hello"}
+
+    def test_parse_wrapper_error(self) -> None:
+        """Test parsing a wrapper with is_error=True."""
+        cli = ClaudeCLI()
+        response = json.dumps(
+            {
+                "type": "result",
+                "subtype": "error",
+                "is_error": True,
+                "result": "Something went wrong",
+            }
+        )
+
+        with pytest.raises(ClaudeCLIError, match="returned error"):
+            cli._parse_wrapper(response)
+
+    def test_parse_wrapper_missing_structured_output(self) -> None:
+        """Test parsing a wrapper missing structured_output field."""
+        cli = ClaudeCLI()
+        response = json.dumps(
+            {
+                "type": "result",
+                "subtype": "success",
+                "is_error": False,
+                "result": "Some text response",
+            }
+        )
+
+        with pytest.raises(ClaudeCLIError, match="missing 'structured_output'"):
+            cli._parse_wrapper(response)
+
+    def test_parse_wrapper_invalid_json(self) -> None:
+        """Test parsing invalid JSON."""
+        cli = ClaudeCLI()
+
+        with pytest.raises(ClaudeCLIError, match="Failed to parse"):
+            cli._parse_wrapper("This is not JSON")
+
+
+class TestClaudeCLIErrors:
+    """Tests for error handling."""
+
+    async def test_timeout_error(self, tmp_path: Path) -> None:
+        """Test that timeout raises ClaudeCLIError."""
+        import asyncio
+
+        cli = ClaudeCLI(timeout=0)
+
+        async def mock_exec(*cmd: str, **kwargs: Any) -> MagicMock:  # noqa: ARG001
+            mock = MagicMock()
+
+            async def mock_communicate(input: bytes = b"") -> tuple[bytes, bytes]:  # noqa: ARG001
+                await asyncio.sleep(10)
+                return b"", b""
+
+            mock.communicate = mock_communicate
+            mock.kill = MagicMock()
+
+            async def mock_wait() -> None:
+                pass
+
+            mock.wait = mock_wait
+            return mock
+
+        with patch("asyncio.create_subprocess_exec", mock_exec):
+            with pytest.raises(ClaudeCLIError, match="timed out"):
+                await cli.run(
+                    prompt="test",
+                    system_prompt="test",
+                    json_schema=TEST_SCHEMA,
+                    cwd=tmp_path,
+                )
+
+    async def test_nonzero_exit_code(self, tmp_path: Path) -> None:
+        """Test that non-zero exit code raises ClaudeCLIError."""
+        cli = ClaudeCLI(timeout=10)
+
+        async def mock_exec(*cmd: str, **kwargs: Any) -> MagicMock:  # noqa: ARG001
+            mock = MagicMock()
+            mock.returncode = 1
+
+            async def mock_communicate(input: bytes = b"") -> tuple[bytes, bytes]:  # noqa: ARG001
+                return b"", b"error output"
+
+            mock.communicate = mock_communicate
+            return mock
+
+        with patch("asyncio.create_subprocess_exec", mock_exec):
+            with pytest.raises(ClaudeCLIError, match="exit code 1"):
+                await cli.run(
+                    prompt="test",
+                    system_prompt="test",
+                    json_schema=TEST_SCHEMA,
+                    cwd=tmp_path,
+                )
+
+    async def test_command_not_found(self, tmp_path: Path) -> None:
+        """Test that missing command raises ClaudeCLIError."""
+        cli = ClaudeCLI(timeout=10)
+
+        async def mock_exec(*cmd: str, **kwargs: Any) -> MagicMock:  # noqa: ARG001
+            raise FileNotFoundError("No such file")
+
+        with patch("asyncio.create_subprocess_exec", mock_exec):
+            with pytest.raises(ClaudeCLIError, match="command not found"):
+                await cli.run(
+                    prompt="test",
+                    system_prompt="test",
+                    json_schema=TEST_SCHEMA,
+                    cwd=tmp_path,
+                )
diff --git a/tests/unit/mcp/test_quality_gate.py b/tests/unit/mcp/test_quality_gate.py
index 8047cf25..2c933cca 100644
--- a/tests/unit/mcp/test_quality_gate.py
+++ b/tests/unit/mcp/test_quality_gate.py
@@ -1,14 +1,12 @@
 """Tests for MCP quality gate."""
 
-import json
-from collections.abc import Callable, Generator
-from contextlib import contextmanager
 from pathlib import Path
 from typing import Any
-from unittest.mock import MagicMock, patch
+from unittest.mock import AsyncMock
 
 import pytest
 
+from deepwork.mcp.claude_cli import ClaudeCLI, ClaudeCLIError
 from deepwork.mcp.quality_gate import (
     QUALITY_GATE_RESPONSE_SCHEMA,
     MockQualityGate,
@@ -24,9 +22,17 @@ def project_root(tmp_path: Path) -> Path:
 
 
 @pytest.fixture
-def quality_gate() -> QualityGate:
-    """Create a QualityGate instance."""
-    return QualityGate(timeout=10)
+def mock_cli() -> ClaudeCLI:
+    """Create a ClaudeCLI with a mocked run method."""
+    cli = ClaudeCLI(timeout=10)
+    cli.run = AsyncMock(return_value={"passed": True, "feedback": "OK", "criteria_results": []})
+    return cli
+
+
+@pytest.fixture
+def quality_gate(mock_cli: ClaudeCLI) -> QualityGate:
+    """Create a QualityGate instance with mocked CLI."""
+    return QualityGate(cli=mock_cli)
 
 
 @pytest.fixture
@@ -37,111 +43,18 @@ def output_file(project_root: Path) -> Path:
     return output
 
 
-def create_mock_subprocess(
-    response: dict[str, Any] | None = None,
-    returncode: int = 0,
-) -> tuple[list[str], Callable[..., MagicMock]]:
-    """Create a mock subprocess executor that captures commands.
-
-    ############################################################################
-    # CRITICAL: UNDERSTAND THE RESPONSE FORMAT BEFORE MODIFYING!
-    #
-    # This mock returns responses in the EXACT format produced by Claude CLI
-    # when using `--print --output-format json --json-schema`. The response
-    # is a wrapper object with the structured output in `structured_output`:
-    #
-    # {
-    #     "type": "result",
-    #     "subtype": "success",
-    #     "is_error": false,
-    #     "structured_output": {
-    #         "passed": true,
-    #         "feedback": "...",
-    #         "criteria_results": [...]
-    #     }
-    # }
-    #
-    # KEY POINTS:
-    # 1. The `--json-schema` flag enforces structured output conformance
-    # 2. The actual quality gate response is in `structured_output`, NOT `result`
-    # 3. The `result` field (if present) contains text output, not our schema
-    #
-    # See doc/reference/calling_claude_in_print_mode.md for full details on
-    # how Claude CLI handles --json-schema and the output format.
-    #
-    # If you're seeing parse errors, check that quality_gate.py is looking
-    # for `structured_output` (not `result`) in the wrapper.
-    ############################################################################
-
-    Args:
-        response: The quality gate response to return in structured_output.
-                  Defaults to a passing quality gate response.
-        returncode: The return code for the process.
-
-    Returns:
-        A tuple of (captured_cmd list, mock_create_subprocess_exec function).
-        The captured_cmd list will be populated with the command arguments when
-        the mock is called.
-    """
-    if response is None:
-        response = {"passed": True, "feedback": "OK", "criteria_results": []}
-
-    captured_cmd: list[str] = []
-
-    async def mock_create_subprocess_exec(*cmd: str, **kwargs: Any) -> MagicMock:  # noqa: ARG001
-        captured_cmd.extend(cmd)
-        mock_process = MagicMock()
-        mock_process.returncode = returncode
-
-        async def mock_communicate(input: bytes = b"") -> tuple[bytes, bytes]:  # noqa: ARG001
-            # Returns Claude CLI wrapper with structured_output field
-            wrapper = {
-                "type": "result",
-                "subtype": "success",
-                "is_error": False,
-                "structured_output": response,
-            }
-            return json.dumps(wrapper).encode(), b""
-
-        mock_process.communicate = mock_communicate
-        return mock_process
-
-    return captured_cmd, mock_create_subprocess_exec
-
-
-@contextmanager
-def patched_subprocess(
-    response: dict[str, Any] | None = None,
-    returncode: int = 0,
-) -> Generator[list[str], None, None]:
-    """Context manager that patches subprocess and yields captured command.
-
-    Args:
-        response: The JSON response to return. Defaults to a passing quality gate response.
-        returncode: The return code for the process.
-
-    Yields:
-        The list of captured command arguments.
-    """
-    captured_cmd, mock_subprocess = create_mock_subprocess(response, returncode)
-    with patch("asyncio.create_subprocess_exec", mock_subprocess):
-        yield captured_cmd
-
-
 class TestQualityGate:
     """Tests for QualityGate class."""
 
-    def test_init(self) -> None:
-        """Test QualityGate initialization."""
-        gate = QualityGate(timeout=60)
-
-        assert gate.timeout == 60
-
-    def test_init_defaults(self) -> None:
-        """Test QualityGate default values."""
+    def test_init_default_cli(self) -> None:
+        """Test QualityGate creates a default ClaudeCLI if none provided."""
         gate = QualityGate()
+        assert isinstance(gate._cli, ClaudeCLI)
 
-        assert gate.timeout == 120
+    def test_init_custom_cli(self, mock_cli: ClaudeCLI) -> None:
+        """Test QualityGate uses provided ClaudeCLI."""
+        gate = QualityGate(cli=mock_cli)
+        assert gate._cli is mock_cli
 
     def test_build_instructions(self, quality_gate: QualityGate) -> None:
         """Test building system instructions."""
@@ -157,7 +70,6 @@ def test_build_instructions(self, quality_gate: QualityGate) -> None:
 
     async def test_build_payload(self, quality_gate: QualityGate, project_root: Path) -> None:
         """Test building payload with file contents."""
-        # Create test output file
         output_file = project_root / "output.md"
         output_file.write_text("Test content")
 
@@ -168,7 +80,6 @@ async def test_build_payload(self, quality_gate: QualityGate, project_root: Path
 
         assert "Test content" in payload
         assert "output.md" in payload
-        # Check for the new separator format (20 dashes)
         assert "--------------------" in payload
 
     async def test_build_payload_missing_file(
@@ -183,86 +94,58 @@ async def test_build_payload_missing_file(
         assert "File not found" in payload
         assert "nonexistent.md" in payload
 
-    def test_parse_response_valid_json(self, quality_gate: QualityGate) -> None:
-        """Test parsing valid JSON response with structured_output."""
-        # Claude CLI returns wrapper with structured_output field when using --json-schema
-        response = json.dumps(
-            {
-                "type": "result",
-                "subtype": "success",
-                "is_error": False,
-                "structured_output": {
-                    "passed": True,
-                    "feedback": "All good",
-                    "criteria_results": [{"criterion": "Test 1", "passed": True, "feedback": None}],
-                },
-            }
-        )
+    def test_parse_result_valid(self, quality_gate: QualityGate) -> None:
+        """Test parsing valid structured output data."""
+        data = {
+            "passed": True,
+            "feedback": "All good",
+            "criteria_results": [{"criterion": "Test 1", "passed": True, "feedback": None}],
+        }
 
-        result = quality_gate._parse_response(response)
+        result = quality_gate._parse_result(data)
 
         assert result.passed is True
         assert result.feedback == "All good"
         assert len(result.criteria_results) == 1
 
-    def test_parse_response_failed(self, quality_gate: QualityGate) -> None:
-        """Test parsing failed evaluation response."""
-        response = json.dumps(
-            {
-                "type": "result",
-                "subtype": "success",
-                "is_error": False,
-                "structured_output": {
-                    "passed": False,
-                    "feedback": "Issues found",
-                    "criteria_results": [
-                        {"criterion": "Test 1", "passed": False, "feedback": "Failed"}
-                    ],
-                },
-            }
-        )
+    def test_parse_result_failed(self, quality_gate: QualityGate) -> None:
+        """Test parsing failed evaluation data."""
+        data = {
+            "passed": False,
+            "feedback": "Issues found",
+            "criteria_results": [
+                {"criterion": "Test 1", "passed": False, "feedback": "Failed"}
+            ],
+        }
 
-        result = quality_gate._parse_response(response)
+        result = quality_gate._parse_result(data)
 
         assert result.passed is False
         assert result.feedback == "Issues found"
         assert result.criteria_results[0].passed is False
 
-    def test_parse_response_invalid_json(self, quality_gate: QualityGate) -> None:
-        """Test parsing invalid JSON response."""
-        response = "This is not JSON"
-
-        with pytest.raises(QualityGateError, match="Failed to parse"):
-            quality_gate._parse_response(response)
-
-    def test_parse_response_missing_structured_output(self, quality_gate: QualityGate) -> None:
-        """Test parsing response missing structured_output field raises error."""
-        # Old format with 'result' field instead of 'structured_output'
-        wrapper_response = json.dumps(
-            {
-                "type": "result",
-                "subtype": "success",
-                "is_error": False,
-                "result": "Some text response",
-            }
-        )
-
-        with pytest.raises(QualityGateError, match="missing 'structured_output'"):
-            quality_gate._parse_response(wrapper_response)
-
-    def test_parse_response_error_in_wrapper(self, quality_gate: QualityGate) -> None:
-        """Test parsing response with is_error=True raises error."""
-        wrapper_response = json.dumps(
-            {
-                "type": "result",
-                "subtype": "error",
-                "is_error": True,
-                "result": "Something went wrong",
-            }
-        )
+    def test_parse_result_multiple_criteria(self, quality_gate: QualityGate) -> None:
+        """Test that criteria results are properly parsed with multiple entries."""
+        data = {
+            "passed": False,
+            "feedback": "Two criteria failed",
+            "criteria_results": [
+                {"criterion": "First check", "passed": True, "feedback": None},
+                {"criterion": "Second check", "passed": False, "feedback": "Missing data"},
+                {"criterion": "Third check", "passed": False, "feedback": "Wrong format"},
+            ],
+        }
+
+        result = quality_gate._parse_result(data)
 
-        with pytest.raises(QualityGateError, match="returned error"):
-            quality_gate._parse_response(wrapper_response)
+        assert result.passed is False
+        assert len(result.criteria_results) == 3
+        assert result.criteria_results[0].passed is True
+        assert result.criteria_results[0].feedback is None
+        assert result.criteria_results[1].passed is False
+        assert result.criteria_results[1].feedback == "Missing data"
+        assert result.criteria_results[2].passed is False
+        assert result.criteria_results[2].feedback == "Wrong format"
 
     async def test_evaluate_no_criteria(
         self, quality_gate: QualityGate, project_root: Path
@@ -277,135 +160,52 @@ async def test_evaluate_no_criteria(
         assert result.passed is True
         assert "auto-passing" in result.feedback.lower()
 
-    def test_parse_criteria_results_structure(self, quality_gate: QualityGate) -> None:
-        """Test that criteria results are properly parsed with multiple entries."""
-        response = json.dumps(
-            {
-                "type": "result",
-                "subtype": "success",
-                "is_error": False,
-                "structured_output": {
-                    "passed": False,
-                    "feedback": "Two criteria failed",
-                    "criteria_results": [
-                        {"criterion": "First check", "passed": True, "feedback": None},
-                        {"criterion": "Second check", "passed": False, "feedback": "Missing data"},
-                        {"criterion": "Third check", "passed": False, "feedback": "Wrong format"},
-                    ],
-                },
-            }
-        )
-
-        result = quality_gate._parse_response(response)
-
-        assert result.passed is False
-        assert len(result.criteria_results) == 3
-        assert result.criteria_results[0].passed is True
-        assert result.criteria_results[0].feedback is None
-        assert result.criteria_results[1].passed is False
-        assert result.criteria_results[1].feedback == "Missing data"
-        assert result.criteria_results[2].passed is False
-        assert result.criteria_results[2].feedback == "Wrong format"
-
-
-class TestQualityGateCommandConstruction:
-    """Tests for command construction, specifically JSON schema inclusion."""
-
-    @staticmethod
-    def get_command_arg(captured_cmd: list[str], flag: str) -> str:
-        """Extract the argument value following a command flag.
-
-        Args:
-            captured_cmd: List of command arguments.
-            flag: The flag to find (e.g., "--json-schema").
-
-        Returns:
-            The argument value following the flag.
-
-        Raises:
-            AssertionError: If the flag is not found in the command.
-        """
-        assert flag in captured_cmd, f"Expected {flag} in command, got: {captured_cmd}"
-        flag_index = captured_cmd.index(flag)
-        return captured_cmd[flag_index + 1]
-
-    async def test_command_includes_json_schema(
-        self, output_file: Path, project_root: Path
+    async def test_evaluate_calls_cli_with_correct_args(
+        self, mock_cli: ClaudeCLI, project_root: Path
     ) -> None:
-        """Test that the command includes --json-schema with the correct schema."""
-        gate = QualityGate(timeout=10)
+        """Test that evaluate passes correct arguments to ClaudeCLI."""
+        gate = QualityGate(cli=mock_cli)
 
-        with patched_subprocess() as captured_cmd:
-            await gate.evaluate(
-                quality_criteria=["Test criterion"],
-                outputs=[output_file.name],
-                project_root=project_root,
-            )
+        # Create output file
+        output_file = project_root / "output.md"
+        output_file.write_text("Test content")
 
-        schema_json = self.get_command_arg(captured_cmd, "--json-schema")
-        parsed_schema = json.loads(schema_json)
-        assert parsed_schema == QUALITY_GATE_RESPONSE_SCHEMA, (
-            f"Schema mismatch. Expected:\n{QUALITY_GATE_RESPONSE_SCHEMA}\nGot:\n{parsed_schema}"
+        await gate.evaluate(
+            quality_criteria=["Must be valid"],
+            outputs=["output.md"],
+            project_root=project_root,
         )
 
-    async def test_command_includes_system_prompt(
-        self, output_file: Path, project_root: Path
-    ) -> None:
-        """Test that the command includes --system-prompt with quality criteria."""
-        gate = QualityGate(timeout=10)
-
-        with patched_subprocess() as captured_cmd:
-            await gate.evaluate(
-                quality_criteria=["Output must exist", "Output must be valid"],
-                outputs=[output_file.name],
-                project_root=project_root,
-            )
-
-        system_prompt = self.get_command_arg(captured_cmd, "--system-prompt")
-        assert "Output must exist" in system_prompt
-        assert "Output must be valid" in system_prompt
+        mock_cli.run.assert_called_once()
+        call_kwargs = mock_cli.run.call_args
+        assert call_kwargs.kwargs["json_schema"] == QUALITY_GATE_RESPONSE_SCHEMA
+        assert call_kwargs.kwargs["cwd"] == project_root
+        assert "Must be valid" in call_kwargs.kwargs["system_prompt"]
+        assert "Test content" in call_kwargs.kwargs["prompt"]
 
-    async def test_command_has_correct_flag_ordering(
-        self, output_file: Path, project_root: Path
+    async def test_evaluate_wraps_cli_error(
+        self, mock_cli: ClaudeCLI, project_root: Path
     ) -> None:
-        """Test that flags come before -p -- for proper CLI invocation.
+        """Test that ClaudeCLIError is wrapped in QualityGateError."""
+        mock_cli.run = AsyncMock(side_effect=ClaudeCLIError("CLI failed"))
+        gate = QualityGate(cli=mock_cli)
 
-        See doc/reference/calling_claude_in_print_mode.md for details on
-        why flag ordering matters.
-        """
-        gate = QualityGate(timeout=10)
+        output_file = project_root / "output.md"
+        output_file.write_text("content")
 
-        with patched_subprocess() as captured_cmd:
+        with pytest.raises(QualityGateError, match="CLI failed"):
             await gate.evaluate(
-                quality_criteria=["Test criterion"],
-                outputs=[output_file.name],
+                quality_criteria=["Test"],
+                outputs=["output.md"],
                 project_root=project_root,
             )
 
-        # Verify command structure
-        assert captured_cmd[0] == "claude"
-        assert "--print" in captured_cmd
-        assert "--output-format" in captured_cmd
-        assert "-p" in captured_cmd
-        assert "--" in captured_cmd
-
-        # Verify -p -- comes last (after all other flags)
-        p_index = captured_cmd.index("-p")
-        dash_dash_index = captured_cmd.index("--")
-        json_schema_index = captured_cmd.index("--json-schema")
-        system_prompt_index = captured_cmd.index("--system-prompt")
-
-        assert json_schema_index < p_index, "Flags must come before -p"
-        assert system_prompt_index < p_index, "Flags must come before -p"
-        assert dash_dash_index == p_index + 1, "-- must immediately follow -p"
-
     async def test_schema_is_valid_json(self) -> None:
-        """Test that QUALITY_GATE_RESPONSE_SCHEMA is valid JSON."""
-        # This test ensures the schema can be serialized
-        schema_json = json.dumps(QUALITY_GATE_RESPONSE_SCHEMA)
-        assert schema_json  # Non-empty string
+        """Test that QUALITY_GATE_RESPONSE_SCHEMA is valid JSON-serializable."""
+        import json
 
-        # And parsed back
+        schema_json = json.dumps(QUALITY_GATE_RESPONSE_SCHEMA)
+        assert schema_json
         parsed = json.loads(schema_json)
         assert parsed == QUALITY_GATE_RESPONSE_SCHEMA
 
@@ -420,17 +220,7 @@ async def evaluate_mock_gate(
         criteria: list[str] | None = None,
         outputs: list[str] | None = None,
     ) -> Any:
-        """Helper to evaluate a mock gate with default parameters.
-
-        Args:
-            gate: The MockQualityGate instance to evaluate.
-            project_root: The project root path.
-            criteria: Quality criteria list. Defaults to ["Criterion 1"].
-            outputs: Output files list. Defaults to ["output.md"].
-
-        Returns:
-            The evaluation result.
-        """
+        """Helper to evaluate a mock gate with default parameters."""
         return await gate.evaluate(
             quality_criteria=criteria or ["Criterion 1"],
             outputs=outputs or ["output.md"],

From b53519ba19dc8794d329c1b36be771ba1b73f07a Mon Sep 17 00:00:00 2001
From: Noah Horton <noah@unsupervised.com>
Date: Thu, 5 Feb 2026 18:08:27 -0700
Subject: [PATCH 33/45] Typed file outputs

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 library/jobs/spec_driven_development/job.yml  |  65 ++--
 src/deepwork/core/parser.py                   |  67 +---
 src/deepwork/mcp/quality_gate.py              |  31 +-
 src/deepwork/mcp/schemas.py                   |  12 +-
 src/deepwork/mcp/server.py                    |   2 +-
 src/deepwork/mcp/state.py                     |  14 +-
 src/deepwork/mcp/tools.py                     |  90 ++++-
 src/deepwork/schemas/job.schema.json          |  92 +++--
 .../standard_jobs/deepwork_jobs/job.yml       |  42 ++-
 .../deepwork_jobs/steps/fix_jobs.md           |  75 +++-
 tests/e2e/test_claude_code_integration.py     |   2 +-
 tests/fixtures/jobs/complex_job/job.yml       |  24 +-
 .../jobs/concurrent_steps_job/job.yml         |  24 +-
 tests/fixtures/jobs/exposed_step_job/job.yml  |   8 +-
 tests/fixtures/jobs/fruits/job.yml            |   8 +-
 tests/fixtures/jobs/job_with_doc_spec/job.yml |  11 +-
 tests/fixtures/jobs/simple_job/job.yml        |   4 +-
 .../test_quality_gate_integration.py          |   4 +-
 tests/unit/mcp/test_quality_gate.py           |  35 +-
 tests/unit/mcp/test_schemas.py                |  21 +-
 tests/unit/mcp/test_state.py                  |  19 +-
 tests/unit/mcp/test_tools.py                  | 330 +++++++++++++++++-
 tests/unit/test_parser.py                     | 147 ++++----
 tests/unit/test_validation.py                 |  14 +-
 24 files changed, 866 insertions(+), 275 deletions(-)

diff --git a/library/jobs/spec_driven_development/job.yml b/library/jobs/spec_driven_development/job.yml
index 91ab743b..e7ae3738 100644
--- a/library/jobs/spec_driven_development/job.yml
+++ b/library/jobs/spec_driven_development/job.yml
@@ -41,7 +41,9 @@ steps:
       - name: development_priorities
         description: "Key priorities like code quality, testing, UX consistency, performance"
     outputs:
-      - file: "[docs_folder]/constitution.md"
+      constitution.md:
+        type: file
+        description: "Foundational governance principles and development guidelines"
     dependencies: []
     quality_criteria:
       - "**Priorities Captured**: Did the agent gather specific development priorities from the user?"
@@ -60,10 +62,12 @@ steps:
         description: "Name of the feature being specified (lowercase, hyphens for spaces)"
       - name: feature_description
         description: "High-level description of what the feature should do"
-      - file: "[docs_folder]/constitution.md"
+      - file: constitution.md
         from_step: constitution
     outputs:
-      - file: specs/[feature-name]/spec.md
+      spec.md:
+        type: file
+        description: "Functional requirements as user stories without technology choices"
     dependencies:
       - constitution
     quality_criteria:
@@ -81,10 +85,12 @@ steps:
     description: "Resolves ambiguities and gaps in the specification through structured questioning. Use after specification to ensure completeness."
     instructions_file: steps/clarify.md
     inputs:
-      - file: specs/[feature-name]/spec.md
+      - file: spec.md
         from_step: specify
     outputs:
-      - file: specs/[feature-name]/spec.md
+      spec.md:
+        type: file
+        description: "Updated specification with clarifications and resolved ambiguities"
     dependencies:
       - specify
     quality_criteria:
@@ -101,18 +107,27 @@ steps:
     description: "Creates technical implementation strategy including architecture and technology choices. Use after specification is clarified."
     instructions_file: steps/plan.md
     inputs:
-      - file: specs/[feature-name]/spec.md
+      - file: spec.md
         from_step: clarify
-      - file: "[docs_folder]/constitution.md"
+      - file: constitution.md
         from_step: constitution
-      - file: "[docs_folder]/architecture.md"
+      - file: architecture.md
         description: "Existing project architecture document (if present)"
     outputs:
-      - file: specs/[feature-name]/plan.md
-      - file: specs/[feature-name]/data-model.md
-      - file: specs/[feature-name]/api-spec.json
-      - file: specs/[feature-name]/research.md
-      - file: "[docs_folder]/architecture.md"
+      plan.md:
+        type: file
+        description: "Technical implementation strategy including architecture and technology choices"
+      data-model.md:
+        type: file
+        description: "Data model documentation with all entities and relationships"
+      api-spec.json:
+        type: file
+        description: "API endpoint definitions with request/response schemas"
+      research.md:
+        type: file
+        description: "Research findings and technology evaluations"
+      architecture.md:
+        type: file
         description: "Updated project architecture document"
     dependencies:
       - clarify
@@ -133,14 +148,16 @@ steps:
     description: "Converts the implementation plan into actionable, ordered development tasks. Use after plan is validated."
     instructions_file: steps/tasks.md
     inputs:
-      - file: specs/[feature-name]/plan.md
+      - file: plan.md
         from_step: plan
-      - file: specs/[feature-name]/spec.md
+      - file: spec.md
         from_step: clarify
-      - file: "[docs_folder]/architecture.md"
+      - file: architecture.md
         from_step: plan
     outputs:
-      - file: specs/[feature-name]/tasks.md
+      tasks.md:
+        type: file
+        description: "Actionable, ordered development tasks organized by user story"
     dependencies:
       - plan
     quality_criteria:
@@ -159,18 +176,20 @@ steps:
     description: "Generates code and assets by executing the task breakdown. Use when ready to build the feature."
     instructions_file: steps/implement.md
     inputs:
-      - file: specs/[feature-name]/tasks.md
+      - file: tasks.md
         from_step: tasks
-      - file: specs/[feature-name]/plan.md
+      - file: plan.md
         from_step: plan
-      - file: specs/[feature-name]/spec.md
+      - file: spec.md
         from_step: clarify
-      - file: "[docs_folder]/architecture.md"
+      - file: architecture.md
         from_step: plan
     outputs:
-      - directory: src/
+      source_files:
+        type: files
         description: "Implementation source files as specified in tasks"
-      - directory: tests/
+      test_files:
+        type: files
         description: "Test files as specified in tasks"
     dependencies:
       - tasks
diff --git a/src/deepwork/core/parser.py b/src/deepwork/core/parser.py
index 354b4563..5c426aa6 100644
--- a/src/deepwork/core/parser.py
+++ b/src/deepwork/core/parser.py
@@ -51,29 +51,19 @@ def from_dict(cls, data: dict[str, Any]) -> "StepInput":
 
 @dataclass
 class OutputSpec:
-    """Represents a step output specification, optionally with doc spec reference."""
+    """Represents a step output specification with type information."""
 
-    file: str
-    doc_spec: str | None = None
-
-    def has_doc_spec(self) -> bool:
-        """Check if this output has a doc spec reference."""
-        return self.doc_spec is not None
+    name: str
+    type: str  # "file" or "files"
+    description: str
 
     @classmethod
-    def from_dict(cls, data: dict[str, Any] | str) -> "OutputSpec":
-        """
-        Create OutputSpec from dictionary or string.
-
-        Supports both formats:
-        - String: "output.md" -> OutputSpec(file="output.md")
-        - Dict: {"file": "output.md", "doc_spec": ".deepwork/doc_specs/report.md"}
-        """
-        if isinstance(data, str):
-            return cls(file=data)
+    def from_dict(cls, name: str, data: dict[str, Any]) -> "OutputSpec":
+        """Create OutputSpec from output name and its specification dict."""
         return cls(
-            file=data["file"],
-            doc_spec=data.get("doc_spec"),
+            name=name,
+            type=data["type"],
+            description=data["description"],
         )
 
 
@@ -181,7 +171,10 @@ def from_dict(cls, data: dict[str, Any]) -> "Step":
             description=data["description"],
             instructions_file=data["instructions_file"],
             inputs=[StepInput.from_dict(inp) for inp in data.get("inputs", [])],
-            outputs=[OutputSpec.from_dict(out) for out in data["outputs"]],
+            outputs=[
+                OutputSpec.from_dict(name, spec)
+                for name, spec in data.get("outputs", {}).items()
+            ],
             dependencies=data.get("dependencies", []),
             hooks=hooks,
             exposed=data.get("exposed", False),
@@ -353,40 +346,6 @@ def validate_file_inputs(self) -> None:
                             f"but '{inp.from_step}' is not in dependencies"
                         )
 
-    def validate_doc_spec_references(self, project_root: Path) -> None:
-        """
-        Validate that doc spec references in outputs point to existing files.
-
-        Args:
-            project_root: Path to the project root directory
-
-        Raises:
-            ParseError: If doc spec references are invalid
-        """
-        for step in self.steps:
-            for output in step.outputs:
-                if output.has_doc_spec():
-                    doc_spec_file = project_root / output.doc_spec
-                    if not doc_spec_file.exists():
-                        raise ParseError(
-                            f"Step '{step.id}' references non-existent doc spec "
-                            f"'{output.doc_spec}'. Expected file at {doc_spec_file}"
-                        )
-
-    def get_doc_spec_references(self) -> list[str]:
-        """
-        Get all unique doc spec file paths referenced in this job's outputs.
-
-        Returns:
-            List of doc spec file paths (e.g., ".deepwork/doc_specs/report.md")
-        """
-        doc_spec_refs = set()
-        for step in self.steps:
-            for output in step.outputs:
-                if output.has_doc_spec() and output.doc_spec:
-                    doc_spec_refs.add(output.doc_spec)
-        return list(doc_spec_refs)
-
     def get_workflow_for_step(self, step_id: str) -> Workflow | None:
         """
         Get the workflow containing a step.
diff --git a/src/deepwork/mcp/quality_gate.py b/src/deepwork/mcp/quality_gate.py
index 4096c0c0..f3f2c9f5 100644
--- a/src/deepwork/mcp/quality_gate.py
+++ b/src/deepwork/mcp/quality_gate.py
@@ -102,23 +102,42 @@ def _build_instructions(self, quality_criteria: list[str]) -> str:
 - Provide specific, actionable feedback for failed criteria
 - The overall "passed" should be true only if ALL criteria pass"""
 
+    @staticmethod
+    def _flatten_output_paths(outputs: dict[str, str | list[str]]) -> list[str]:
+        """Flatten a structured outputs dict into a list of file paths.
+
+        Args:
+            outputs: Map of output names to file path(s)
+
+        Returns:
+            Flat list of all file paths
+        """
+        paths: list[str] = []
+        for value in outputs.values():
+            if isinstance(value, list):
+                paths.extend(value)
+            else:
+                paths.append(value)
+        return paths
+
     async def _build_payload(
         self,
-        outputs: list[str],
+        outputs: dict[str, str | list[str]],
         project_root: Path,
     ) -> str:
         """Build the user prompt payload with file contents.
 
         Args:
-            outputs: List of output file paths
+            outputs: Map of output names to file path(s)
             project_root: Project root path for reading files
 
         Returns:
             Formatted payload with file contents
         """
         output_sections: list[str] = []
+        all_paths = self._flatten_output_paths(outputs)
 
-        for output_path in outputs:
+        for output_path in all_paths:
             full_path = project_root / output_path
             header = f"{FILE_SEPARATOR} {output_path} {FILE_SEPARATOR}"
 
@@ -174,14 +193,14 @@ def _parse_result(self, data: dict[str, Any]) -> QualityGateResult:
     async def evaluate(
         self,
         quality_criteria: list[str],
-        outputs: list[str],
+        outputs: dict[str, str | list[str]],
         project_root: Path,
     ) -> QualityGateResult:
         """Evaluate step outputs against quality criteria.
 
         Args:
             quality_criteria: List of quality criteria to evaluate
-            outputs: List of output file paths
+            outputs: Map of output names to file path(s)
             project_root: Project root path
 
         Returns:
@@ -237,7 +256,7 @@ def __init__(self, should_pass: bool = True, feedback: str = "Mock evaluation"):
     async def evaluate(
         self,
         quality_criteria: list[str],
-        outputs: list[str],
+        outputs: dict[str, str | list[str]],
         project_root: Path,
     ) -> QualityGateResult:
         """Mock evaluation - records call and returns configured result."""
diff --git a/src/deepwork/mcp/schemas.py b/src/deepwork/mcp/schemas.py
index 4aec8ae7..5401a99a 100644
--- a/src/deepwork/mcp/schemas.py
+++ b/src/deepwork/mcp/schemas.py
@@ -92,7 +92,9 @@ class StartWorkflowInput(BaseModel):
 class FinishedStepInput(BaseModel):
     """Input for finished_step tool."""
 
-    outputs: list[str] = Field(description="List of output file paths created")
+    outputs: dict[str, str | list[str]] = Field(
+        description="Map of output names to file path(s). Single file outputs map to a string path, multi-file outputs map to a list of paths."
+    )
     notes: str | None = Field(default=None, description="Optional notes about work done")
     quality_review_override_reason: str | None = Field(
         default=None,
@@ -189,7 +191,9 @@ class FinishedStepResponse(BaseModel):
 
     # For workflow_complete status
     summary: str | None = Field(default=None, description="Summary of completed workflow")
-    all_outputs: list[str] | None = Field(default=None, description="All outputs from all steps")
+    all_outputs: dict[str, str | list[str]] | None = Field(
+        default=None, description="All outputs from all steps"
+    )
 
     # Stack info (included in all responses)
     stack: list[StackEntry] = Field(
@@ -225,7 +229,9 @@ class StepProgress(BaseModel):
     step_id: str = Field(description="Step identifier")
     started_at: str | None = Field(default=None, description="ISO timestamp when started")
     completed_at: str | None = Field(default=None, description="ISO timestamp when completed")
-    outputs: list[str] = Field(default_factory=list, description="Output files created")
+    outputs: dict[str, str | list[str]] = Field(
+        default_factory=dict, description="Output files created"
+    )
     notes: str | None = Field(default=None, description="Notes from agent")
     quality_attempts: int = Field(default=0, description="Number of quality gate attempts")
 
diff --git a/src/deepwork/mcp/server.py b/src/deepwork/mcp/server.py
index 2b31a139..bb99a21b 100644
--- a/src/deepwork/mcp/server.py
+++ b/src/deepwork/mcp/server.py
@@ -155,7 +155,7 @@ async def start_workflow(
         )
     )
     async def finished_step(
-        outputs: list[str],
+        outputs: dict[str, str | list[str]],
         notes: str | None = None,
         quality_review_override_reason: str | None = None,
     ) -> dict[str, Any]:
diff --git a/src/deepwork/mcp/state.py b/src/deepwork/mcp/state.py
index 6aaba1e2..a8f2c54c 100644
--- a/src/deepwork/mcp/state.py
+++ b/src/deepwork/mcp/state.py
@@ -210,13 +210,13 @@ async def start_step(self, step_id: str) -> None:
             await self._save_session_unlocked(session)
 
     async def complete_step(
-        self, step_id: str, outputs: list[str], notes: str | None = None
+        self, step_id: str, outputs: dict[str, str | list[str]], notes: str | None = None
     ) -> None:
         """Mark a step as completed.
 
         Args:
             step_id: Step ID to complete
-            outputs: Output files created
+            outputs: Map of output names to file path(s)
             notes: Optional notes
 
         Raises:
@@ -329,20 +329,20 @@ async def abort_workflow(
             new_active = self._session_stack[-1] if self._session_stack else None
             return session, new_active
 
-    def get_all_outputs(self) -> list[str]:
+    def get_all_outputs(self) -> dict[str, str | list[str]]:
         """Get all outputs from all completed steps.
 
         Returns:
-            List of all output file paths
+            Merged dict of all output names to file path(s)
 
         Raises:
             StateError: If no active session
         """
         session = self.require_active_session()
-        outputs: list[str] = []
+        all_outputs: dict[str, str | list[str]] = {}
         for progress in session.step_progress.values():
-            outputs.extend(progress.outputs)
-        return outputs
+            all_outputs.update(progress.outputs)
+        return all_outputs
 
     def get_stack(self) -> list[StackEntry]:
         """Get the current workflow stack as StackEntry objects.
diff --git a/src/deepwork/mcp/tools.py b/src/deepwork/mcp/tools.py
index a11ea67f..bdede7cd 100644
--- a/src/deepwork/mcp/tools.py
+++ b/src/deepwork/mcp/tools.py
@@ -11,7 +11,14 @@
 from pathlib import Path
 from typing import TYPE_CHECKING
 
-from deepwork.core.parser import JobDefinition, ParseError, Workflow, parse_job_definition
+from deepwork.core.parser import (
+    JobDefinition,
+    OutputSpec,
+    ParseError,
+    Step,
+    Workflow,
+    parse_job_definition,
+)
 from deepwork.mcp.schemas import (
     AbortWorkflowInput,
     AbortWorkflowResponse,
@@ -182,6 +189,80 @@ def _get_step_instructions(self, job: JobDefinition, step_id: str) -> str:
 
         return instructions_path.read_text(encoding="utf-8")
 
+    def _validate_outputs(
+        self,
+        submitted: dict[str, str | list[str]],
+        declared: list[OutputSpec],
+    ) -> None:
+        """Validate submitted outputs against declared output specs.
+
+        Checks:
+        1. Every submitted key matches a declared output name
+        2. Every declared output has a corresponding submitted key
+        3. type: file -> value is a single string path, file must exist
+        4. type: files -> value is a list of strings, each file must exist
+
+        Args:
+            submitted: The outputs dict from the agent
+            declared: The OutputSpec list from the step definition
+
+        Raises:
+            ToolError: If validation fails
+        """
+        declared_map = {spec.name: spec for spec in declared}
+        declared_names = set(declared_map.keys())
+        submitted_names = set(submitted.keys())
+
+        # Check for unknown output keys
+        extra = submitted_names - declared_names
+        if extra:
+            raise ToolError(
+                f"Unknown output names: {', '.join(sorted(extra))}. "
+                f"Declared outputs: {', '.join(sorted(declared_names))}"
+            )
+
+        # Check for missing output keys
+        missing = declared_names - submitted_names
+        if missing:
+            raise ToolError(
+                f"Missing required outputs: {', '.join(sorted(missing))}. "
+                f"All declared outputs must be provided."
+            )
+
+        # Validate types and file existence
+        for name, value in submitted.items():
+            spec = declared_map[name]
+
+            if spec.type == "file":
+                if not isinstance(value, str):
+                    raise ToolError(
+                        f"Output '{name}' is declared as type 'file' and must be a "
+                        f"single string path, got {type(value).__name__}"
+                    )
+                full_path = self.project_root / value
+                if not full_path.exists():
+                    raise ToolError(
+                        f"Output '{name}': file not found at '{value}'"
+                    )
+
+            elif spec.type == "files":
+                if not isinstance(value, list):
+                    raise ToolError(
+                        f"Output '{name}' is declared as type 'files' and must be a "
+                        f"list of paths, got {type(value).__name__}"
+                    )
+                for path in value:
+                    if not isinstance(path, str):
+                        raise ToolError(
+                            f"Output '{name}': all paths must be strings, "
+                            f"got {type(path).__name__}"
+                        )
+                    full_path = self.project_root / path
+                    if not full_path.exists():
+                        raise ToolError(
+                            f"Output '{name}': file not found at '{path}'"
+                        )
+
     # =========================================================================
     # Tool Implementations
     # =========================================================================
@@ -237,7 +318,7 @@ async def start_workflow(self, input_data: StartWorkflowInput) -> StartWorkflowR
         instructions = self._get_step_instructions(job, first_step_id)
 
         # Get expected outputs
-        step_outputs = [out.file for out in first_step.outputs]
+        step_outputs = [out.name for out in first_step.outputs]
 
         return StartWorkflowResponse(
             begin_step=ActiveStepInfo(
@@ -275,6 +356,9 @@ async def finished_step(self, input_data: FinishedStepInput) -> FinishedStepResp
         if current_step is None:
             raise ToolError(f"Current step not found: {current_step_id}")
 
+        # Validate outputs against step's declared output specs
+        self._validate_outputs(input_data.outputs, current_step.outputs)
+
         # Run quality gate if available and step has criteria (unless overridden)
         if (
             self.quality_gate
@@ -346,7 +430,7 @@ async def finished_step(self, input_data: FinishedStepInput) -> FinishedStepResp
 
         # Get instructions
         instructions = self._get_step_instructions(job, next_step_id)
-        step_outputs = [out.file for out in next_step.outputs]
+        step_outputs = [out.name for out in next_step.outputs]
 
         # Add info about concurrent steps if this is a concurrent entry
         if next_entry.is_concurrent and len(next_entry.step_ids) > 1:
diff --git a/src/deepwork/schemas/job.schema.json b/src/deepwork/schemas/job.schema.json
index f00d7550..27cbf30c 100644
--- a/src/deepwork/schemas/job.schema.json
+++ b/src/deepwork/schemas/job.schema.json
@@ -4,7 +4,12 @@
   "title": "DeepWork Job Definition",
   "description": "Schema for DeepWork job.yml files. Jobs are multi-step workflows executed by AI agents.",
   "type": "object",
-  "required": ["name", "version", "summary", "steps"],
+  "required": [
+    "name",
+    "version",
+    "summary",
+    "steps"
+  ],
   "additionalProperties": false,
   "properties": {
     "name": {
@@ -59,7 +64,11 @@
     },
     "workflow": {
       "type": "object",
-      "required": ["name", "summary", "steps"],
+      "required": [
+        "name",
+        "summary",
+        "steps"
+      ],
       "additionalProperties": false,
       "description": "A named workflow grouping steps into a sequence",
       "properties": {
@@ -101,7 +110,10 @@
     },
     "changelogEntry": {
       "type": "object",
-      "required": ["version", "changes"],
+      "required": [
+        "version",
+        "changes"
+      ],
       "additionalProperties": false,
       "properties": {
         "version": {
@@ -118,9 +130,15 @@
     },
     "step": {
       "type": "object",
-      "required": ["id", "name", "description", "instructions_file", "outputs"],
+      "required": [
+        "id",
+        "name",
+        "description",
+        "instructions_file",
+        "outputs"
+      ],
       "additionalProperties": false,
-      "description": "A single step in a job, representing one unit of work",
+      "description": "A single Step in a job, representing one material unit of work with evaluatable outputs",
       "properties": {
         "id": {
           "$ref": "#/$defs/stepId",
@@ -149,9 +167,9 @@
           }
         },
         "outputs": {
-          "type": "array",
-          "description": "List of output files/directories produced by this step. May be empty for cleanup or validation steps.",
-          "items": {
+          "type": "object",
+          "description": "Named outputs produced by this step. Keys are output identifiers, values describe type and purpose. May be empty for cleanup or validation steps.",
+          "additionalProperties": {
             "$ref": "#/$defs/stepOutput"
           }
         },
@@ -211,7 +229,10 @@
     },
     "userParameterInput": {
       "type": "object",
-      "required": ["name", "description"],
+      "required": [
+        "name",
+        "description"
+      ],
       "additionalProperties": false,
       "description": "A user-provided parameter input that will be requested at runtime",
       "properties": {
@@ -229,7 +250,10 @@
     },
     "fileInput": {
       "type": "object",
-      "required": ["file", "from_step"],
+      "required": [
+        "file",
+        "from_step"
+      ],
       "additionalProperties": false,
       "description": "A file input from a previous step's output",
       "properties": {
@@ -246,32 +270,26 @@
       }
     },
     "stepOutput": {
-      "oneOf": [
-        {
-          "type": "string",
-          "minLength": 1,
-          "description": "Simple output file path (backward compatible format)"
-        },
-        {
-          "$ref": "#/$defs/outputWithDocSpec"
-        }
-      ]
-    },
-    "outputWithDocSpec": {
       "type": "object",
-      "required": ["file"],
+      "required": [
+        "type",
+        "description"
+      ],
       "additionalProperties": false,
-      "description": "Output file with optional document specification reference",
+      "description": "Output specification with type information indicating single file or multiple files",
       "properties": {
-        "file": {
+        "type": {
           "type": "string",
-          "minLength": 1,
-          "description": "Output file path"
+          "enum": [
+            "file",
+            "files"
+          ],
+          "description": "Whether this output is a single file ('file') or multiple files ('files')"
         },
-        "doc_spec": {
+        "description": {
           "type": "string",
-          "pattern": "^\\.deepwork/doc_specs/[a-z][a-z0-9_-]*\\.md$",
-          "description": "Path to doc spec file defining the expected document structure. Example: '.deepwork/doc_specs/report.md'"
+          "minLength": 1,
+          "description": "Description of what this output contains"
         }
       }
     },
@@ -308,7 +326,9 @@
       "description": "A hook action - exactly one of: prompt (inline text), prompt_file (external file), or script (shell script)",
       "oneOf": [
         {
-          "required": ["prompt"],
+          "required": [
+            "prompt"
+          ],
           "additionalProperties": false,
           "properties": {
             "prompt": {
@@ -319,7 +339,9 @@
           }
         },
         {
-          "required": ["prompt_file"],
+          "required": [
+            "prompt_file"
+          ],
           "additionalProperties": false,
           "properties": {
             "prompt_file": {
@@ -330,7 +352,9 @@
           }
         },
         {
-          "required": ["script"],
+          "required": [
+            "script"
+          ],
           "additionalProperties": false,
           "properties": {
             "script": {
@@ -343,4 +367,4 @@
       ]
     }
   }
-}
+}
\ No newline at end of file
diff --git a/src/deepwork/standard_jobs/deepwork_jobs/job.yml b/src/deepwork/standard_jobs/deepwork_jobs/job.yml
index facf3ce7..14b70c46 100644
--- a/src/deepwork/standard_jobs/deepwork_jobs/job.yml
+++ b/src/deepwork/standard_jobs/deepwork_jobs/job.yml
@@ -70,7 +70,9 @@ steps:
       - name: job_purpose
         description: "What complex task or workflow are you trying to accomplish?"
     outputs:
-      - job.yml
+      job.yml:
+        type: file
+        description: "Definition of the job and its workflows"
     dependencies: []
   - id: implement
     name: "Implement Job Steps"
@@ -80,7 +82,9 @@ steps:
       - file: job.yml
         from_step: define
     outputs:
-      - steps/
+      step_instruction_files:
+        type: files
+        description: "Instruction Markdown files for each step"
     dependencies:
       - define
     quality_criteria:
@@ -97,10 +101,12 @@ steps:
     inputs:
       - file: job.yml
         from_step: define
-      - file: steps/
+      - file: step_instruction_files
         from_step: implement
     outputs:
-      - test_feedback.md
+      test_feedback.md:
+        type: file
+        description: "Feedback from testing the workflow on a real use case"
     dependencies:
       - define
       - implement
@@ -119,11 +125,15 @@ steps:
     inputs:
       - file: job.yml
         from_step: define
-      - file: steps/
+      - file: step_instruction_files
         from_step: implement
     outputs:
-      - job.yml
-      - steps/
+      job.yml:
+        type: file
+        description: "Updated job definition with improvements from test run"
+      step_instruction_files:
+        type: files
+        description: "Updated instruction Markdown files for each step"
     dependencies:
       - define
       - implement
@@ -143,7 +153,9 @@ steps:
       - name: job_name
         description: "Name of the job that was run (optional - will auto-detect from conversation)"
     outputs:
-      - AGENTS.md
+      AGENTS.md:
+        type: file
+        description: "Bespoke learnings and run-specific context for the working folder"
     dependencies: []
     quality_criteria:
       - "**Conversation Analyzed**: Did the agent review the conversation for DeepWork job executions?"
@@ -162,7 +174,9 @@ steps:
     instructions_file: steps/fix_settings.md
     inputs: []
     outputs:
-      - .claude/settings.json
+      settings.json:
+        type: file
+        description: "Cleaned up Claude settings file with legacy permissions removed"
     dependencies: []
     quality_criteria:
       - "**DeepWork Skills Removed**: Are `Skill(...)` entries matching jobs in `.deepwork/jobs/` removed?"
@@ -180,10 +194,12 @@ steps:
     description: "Updates job.yml files and step instructions to current DeepWork format, removing deprecated fields and migrating to new structures."
     instructions_file: steps/fix_jobs.md
     inputs:
-      - file: .claude/settings.json
+      - file: settings.json
         from_step: fix_settings
     outputs:
-      - .deepwork/jobs/
+      job_definitions:
+        type: files
+        description: "Updated job.yml files and step instructions in current DeepWork format"
     dependencies:
       - fix_settings
     quality_criteria:
@@ -197,9 +213,9 @@ steps:
     name: "Clean Up Errata"
     description: "Removes obsolete files and folders from prior DeepWork versions, including old skill directories, temp files, and deprecated configurations."
     instructions_file: steps/errata.md
-    outputs: []
+    outputs: {}
     inputs:
-      - file: .deepwork/jobs/
+      - file: job_definitions
         from_step: fix_jobs
     dependencies:
       - fix_settings
diff --git a/src/deepwork/standard_jobs/deepwork_jobs/steps/fix_jobs.md b/src/deepwork/standard_jobs/deepwork_jobs/steps/fix_jobs.md
index 7f3675a5..93fb67ae 100644
--- a/src/deepwork/standard_jobs/deepwork_jobs/steps/fix_jobs.md
+++ b/src/deepwork/standard_jobs/deepwork_jobs/steps/fix_jobs.md
@@ -36,8 +36,10 @@ Audit and repair the job at `.deepwork/jobs/[job_name]/job.yml`:
 2. Migrate `stop_hooks` to `hooks.after_agent` format
 3. Remove references to deleted steps (like `review_job_spec`)
 4. Fix orphaned steps by adding them to workflows
-5. Bump version and add changelog entry if changes were made
-6. Validate YAML syntax
+5. Migrate `outputs` from array format to map format with `type` and `description`
+6. Update any `file` inputs that reference renamed output keys
+7. Bump version and add changelog entry if changes were made
+8. Validate YAML syntax
 
 Report what changes were made.
 ```
@@ -152,7 +154,65 @@ workflows:
 
 This ensures all steps remain accessible via the MCP interface while preserving the existing workflow structure.
 
-### Step 6: Update Version Numbers
+### Step 6: Migrate `outputs` from Array Format to Map Format
+
+The `outputs` field on steps changed from an array of strings/objects to a map with typed entries. Every output must now have a key (identifier), a `type` (`file` or `files`), and a `description`.
+
+**Before (legacy array format):**
+```yaml
+steps:
+  - id: define
+    outputs:
+      - job.yml
+      - steps/
+      - file: report.md
+        doc_spec: .deepwork/doc_specs/report.md
+```
+
+**After (current map format):**
+```yaml
+steps:
+  - id: define
+    outputs:
+      job.yml:
+        type: file
+        description: "The job definition file"
+      step_instruction_files:
+        type: files
+        description: "Instruction Markdown files for each step"
+      report.md:
+        type: file
+        description: "The generated report"
+```
+
+**Migration rules:**
+
+1. **Plain filename strings** (e.g., `- job.yml`, `- output.md`): Use the filename as the key, set `type: file`, add a `description`.
+2. **Directory strings ending in `/`** (e.g., `- steps/`, `- competitor_profiles/`): Choose a descriptive key name (e.g., `step_instruction_files`, `competitor_profiles`), set `type: files`, add a `description`.
+3. **Objects with `doc_spec`** (e.g., `- file: report.md` with `doc_spec: ...`): Drop the `doc_spec` field entirely, use the filename as the key, set `type: file`, add a `description`.
+4. **`description` is required** on every output entry. Write a short sentence describing what the output contains.
+
+**Update `file` inputs that reference renamed outputs:**
+
+When a directory output key changes (e.g., `steps/` becomes `step_instruction_files`), any downstream step with a `file` input referencing the old name must be updated to use the new key.
+
+```yaml
+# Before: input references old directory name
+steps:
+  - id: implement
+    inputs:
+      - file: steps/
+        from_step: define
+
+# After: input uses the new output key
+steps:
+  - id: implement
+    inputs:
+      - file: step_instruction_files
+        from_step: define
+```
+
+### Step 7: Update Version Numbers
 
 If you made significant changes to a job, bump its version number:
 
@@ -188,6 +248,12 @@ Warning: Job 'my_job' has steps not included in any workflow: standalone_step
 - If the job has NO workflows: Create one workflow named `my_job` with all steps in order
 - If the job has SOME workflows: Add a `standalone_step` workflow containing just that step
 
+### Issue: `outputs` is an array instead of an object
+```
+Error: Step 'define' outputs should be an object but got array
+```
+**Fix:** Convert from the legacy array format to the map format. Each array entry becomes a key in the map with `type` (`file` or `files`) and `description`. See Step 6 for detailed migration rules. Also update any `file` inputs in downstream steps if an output key was renamed.
+
 ## Jobs to Check
 
 For each job in `.deepwork/jobs/`, check:
@@ -196,9 +262,10 @@ For each job in `.deepwork/jobs/`, check:
 |-------|------------------|
 | `exposed` field | Remove from all steps |
 | `stop_hooks` | Migrate to `hooks.after_agent` |
+| `outputs` format | Migrate from array to map with `type` and `description` |
 | Workflow steps | Remove references to deleted steps |
 | Dependencies | Update to valid step IDs |
-| File inputs | Update `from_step` references |
+| File inputs | Update `from_step` references; update keys for renamed outputs |
 | Version | Bump if changes were made |
 
 ## Important Notes
diff --git a/tests/e2e/test_claude_code_integration.py b/tests/e2e/test_claude_code_integration.py
index 802ee30f..a11d6659 100644
--- a/tests/e2e/test_claude_code_integration.py
+++ b/tests/e2e/test_claude_code_integration.py
@@ -276,7 +276,7 @@ async def test_workflow_step_progression(self, project_with_job: Path) -> None:
 
         # Report first step completion
         finish_input = FinishedStepInput(
-            outputs=[str(output_file)],
+            outputs={"identified_fruits.md": str(output_file)},
             notes="Identified fruits from test input",
         )
         finish_response = await tools.finished_step(finish_input)
diff --git a/tests/fixtures/jobs/complex_job/job.yml b/tests/fixtures/jobs/complex_job/job.yml
index 507ea626..8be0eea9 100644
--- a/tests/fixtures/jobs/complex_job/job.yml
+++ b/tests/fixtures/jobs/complex_job/job.yml
@@ -31,7 +31,9 @@ steps:
       - name: product_category
         description: "Product category"
     outputs:
-      - competitors.md
+      competitors.md:
+        type: file
+        description: "Vetted list of direct and indirect competitors"
     dependencies: []
 
   - id: primary_research
@@ -42,8 +44,12 @@ steps:
       - file: competitors.md
         from_step: identify_competitors
     outputs:
-      - primary_research.md
-      - competitor_profiles/
+      primary_research.md:
+        type: file
+        description: "Analysis of competitors' self-presentation"
+      competitor_profiles:
+        type: files
+        description: "Individual competitor profile documents"
     dependencies:
       - identify_competitors
 
@@ -57,7 +63,9 @@ steps:
       - file: primary_research.md
         from_step: primary_research
     outputs:
-      - secondary_research.md
+      secondary_research.md:
+        type: file
+        description: "Third-party perspectives on competitors"
     dependencies:
       - identify_competitors
       - primary_research
@@ -72,8 +80,12 @@ steps:
       - file: secondary_research.md
         from_step: secondary_research
     outputs:
-      - comparison_matrix.md
-      - strengths_weaknesses.md
+      comparison_matrix.md:
+        type: file
+        description: "Detailed comparison matrix across competitors"
+      strengths_weaknesses.md:
+        type: file
+        description: "Strengths and weaknesses analysis"
     dependencies:
       - primary_research
       - secondary_research
diff --git a/tests/fixtures/jobs/concurrent_steps_job/job.yml b/tests/fixtures/jobs/concurrent_steps_job/job.yml
index 3feeab4d..db8545e0 100644
--- a/tests/fixtures/jobs/concurrent_steps_job/job.yml
+++ b/tests/fixtures/jobs/concurrent_steps_job/job.yml
@@ -21,7 +21,9 @@ steps:
     description: "Initialize the analysis environment"
     instructions_file: steps/setup.md
     outputs:
-      - setup_complete.md
+      setup_complete.md:
+        type: file
+        description: "Setup confirmation and configuration"
 
   - id: research_web
     name: "Web Research"
@@ -31,7 +33,9 @@ steps:
       - file: setup_complete.md
         from_step: setup
     outputs:
-      - web_research.md
+      web_research.md:
+        type: file
+        description: "Research findings from web sources"
     dependencies:
       - setup
 
@@ -43,7 +47,9 @@ steps:
       - file: setup_complete.md
         from_step: setup
     outputs:
-      - docs_research.md
+      docs_research.md:
+        type: file
+        description: "Research findings from internal documents"
     dependencies:
       - setup
 
@@ -55,7 +61,9 @@ steps:
       - file: setup_complete.md
         from_step: setup
     outputs:
-      - interviews_research.md
+      interviews_research.md:
+        type: file
+        description: "Research findings from stakeholder interviews"
     dependencies:
       - setup
 
@@ -71,7 +79,9 @@ steps:
       - file: interviews_research.md
         from_step: research_interviews
     outputs:
-      - compiled_results.md
+      compiled_results.md:
+        type: file
+        description: "Unified report from all research sources"
     dependencies:
       - research_web
       - research_docs
@@ -85,6 +95,8 @@ steps:
       - file: compiled_results.md
         from_step: compile_results
     outputs:
-      - final_report.md
+      final_report.md:
+        type: file
+        description: "Final reviewed and approved analysis report"
     dependencies:
       - compile_results
diff --git a/tests/fixtures/jobs/exposed_step_job/job.yml b/tests/fixtures/jobs/exposed_step_job/job.yml
index fc1530b7..f4a2e0da 100644
--- a/tests/fixtures/jobs/exposed_step_job/job.yml
+++ b/tests/fixtures/jobs/exposed_step_job/job.yml
@@ -17,7 +17,9 @@ steps:
     description: "A step that is hidden by default"
     instructions_file: steps/hidden_step.md
     outputs:
-      - hidden_output.md
+      hidden_output.md:
+        type: file
+        description: "Output from the hidden step"
     dependencies: []
 
   - id: exposed_step
@@ -26,5 +28,7 @@ steps:
     instructions_file: steps/exposed_step.md
     exposed: true
     outputs:
-      - exposed_output.md
+      exposed_output.md:
+        type: file
+        description: "Output from the exposed step"
     dependencies: []
diff --git a/tests/fixtures/jobs/fruits/job.yml b/tests/fixtures/jobs/fruits/job.yml
index cfb83e9f..1495f604 100644
--- a/tests/fixtures/jobs/fruits/job.yml
+++ b/tests/fixtures/jobs/fruits/job.yml
@@ -32,7 +32,9 @@ steps:
       - name: raw_items
         description: "Comma-separated list of items to filter (e.g., 'apple, car, banana, chair')"
     outputs:
-      - identified_fruits.md
+      identified_fruits.md:
+        type: file
+        description: "List of identified fruits from the input items"
     dependencies: []
 
   - id: classify
@@ -43,6 +45,8 @@ steps:
       - file: identified_fruits.md
         from_step: identify
     outputs:
-      - classified_fruits.md
+      classified_fruits.md:
+        type: file
+        description: "Fruits organized into categories"
     dependencies:
       - identify
diff --git a/tests/fixtures/jobs/job_with_doc_spec/job.yml b/tests/fixtures/jobs/job_with_doc_spec/job.yml
index 16673b5a..7fdec846 100644
--- a/tests/fixtures/jobs/job_with_doc_spec/job.yml
+++ b/tests/fixtures/jobs/job_with_doc_spec/job.yml
@@ -1,19 +1,20 @@
 # yaml-language-server: $schema=.deepwork/schemas/job.schema.json
 name: job_with_doc_spec
 version: "1.0.0"
-summary: "Job with doc spec output for testing"
+summary: "Job with typed output for testing"
 description: |
-  A test job that produces a document with a doc spec reference.
+  A test job that produces a report document.
 
 steps:
   - id: generate_report
     name: "Generate Report"
-    description: "Generate a report following the doc spec"
+    description: "Generate a report"
     instructions_file: steps/generate_report.md
     inputs:
       - name: report_title
         description: "Title for the report"
     outputs:
-      - file: report.md
-        doc_spec: .deepwork/doc_specs/valid_report.md
+      report.md:
+        type: file
+        description: "Generated report document"
     dependencies: []
diff --git a/tests/fixtures/jobs/simple_job/job.yml b/tests/fixtures/jobs/simple_job/job.yml
index 5f19e452..112dbe97 100644
--- a/tests/fixtures/jobs/simple_job/job.yml
+++ b/tests/fixtures/jobs/simple_job/job.yml
@@ -21,5 +21,7 @@ steps:
       - name: input_param
         description: "An input parameter"
     outputs:
-      - output.md
+      output.md:
+        type: file
+        description: "The output file produced by this step"
     dependencies: []
diff --git a/tests/integration/test_quality_gate_integration.py b/tests/integration/test_quality_gate_integration.py
index 24b12d20..52b24e65 100644
--- a/tests/integration/test_quality_gate_integration.py
+++ b/tests/integration/test_quality_gate_integration.py
@@ -90,7 +90,7 @@ async def test_real_claude_evaluates_passing_criteria(self, project_root: Path)
                 "The document must have a title",
                 "The document must contain a summary section",
             ],
-            outputs=["analysis.md"],
+            outputs={"analysis": "analysis.md"},
             project_root=project_root,
         )
 
@@ -125,7 +125,7 @@ async def test_real_claude_evaluates_failing_criteria(self, project_root: Path)
                 "The document must include a numbered list of recommendations",
                 "The document must have a 'Conclusions' section",
             ],
-            outputs=["incomplete.md"],
+            outputs={"document": "incomplete.md"},
             project_root=project_root,
         )
 
diff --git a/tests/unit/mcp/test_quality_gate.py b/tests/unit/mcp/test_quality_gate.py
index 2c933cca..78765e99 100644
--- a/tests/unit/mcp/test_quality_gate.py
+++ b/tests/unit/mcp/test_quality_gate.py
@@ -74,7 +74,7 @@ async def test_build_payload(self, quality_gate: QualityGate, project_root: Path
         output_file.write_text("Test content")
 
         payload = await quality_gate._build_payload(
-            outputs=["output.md"],
+            outputs={"report": "output.md"},
             project_root=project_root,
         )
 
@@ -87,13 +87,30 @@ async def test_build_payload_missing_file(
     ) -> None:
         """Test building payload with missing file."""
         payload = await quality_gate._build_payload(
-            outputs=["nonexistent.md"],
+            outputs={"report": "nonexistent.md"},
             project_root=project_root,
         )
 
         assert "File not found" in payload
         assert "nonexistent.md" in payload
 
+    async def test_build_payload_files_type(
+        self, quality_gate: QualityGate, project_root: Path
+    ) -> None:
+        """Test building payload with multi-file outputs."""
+        (project_root / "a.md").write_text("File A")
+        (project_root / "b.md").write_text("File B")
+
+        payload = await quality_gate._build_payload(
+            outputs={"reports": ["a.md", "b.md"]},
+            project_root=project_root,
+        )
+
+        assert "File A" in payload
+        assert "File B" in payload
+        assert "a.md" in payload
+        assert "b.md" in payload
+
     def test_parse_result_valid(self, quality_gate: QualityGate) -> None:
         """Test parsing valid structured output data."""
         data = {
@@ -153,7 +170,7 @@ async def test_evaluate_no_criteria(
         """Test evaluation with no criteria auto-passes."""
         result = await quality_gate.evaluate(
             quality_criteria=[],
-            outputs=["output.md"],
+            outputs={"report": "output.md"},
             project_root=project_root,
         )
 
@@ -172,7 +189,7 @@ async def test_evaluate_calls_cli_with_correct_args(
 
         await gate.evaluate(
             quality_criteria=["Must be valid"],
-            outputs=["output.md"],
+            outputs={"report": "output.md"},
             project_root=project_root,
         )
 
@@ -196,7 +213,7 @@ async def test_evaluate_wraps_cli_error(
         with pytest.raises(QualityGateError, match="CLI failed"):
             await gate.evaluate(
                 quality_criteria=["Test"],
-                outputs=["output.md"],
+                outputs={"report": "output.md"},
                 project_root=project_root,
             )
 
@@ -218,12 +235,12 @@ async def evaluate_mock_gate(
         gate: MockQualityGate,
         project_root: Path,
         criteria: list[str] | None = None,
-        outputs: list[str] | None = None,
+        outputs: dict[str, str | list[str]] | None = None,
     ) -> Any:
         """Helper to evaluate a mock gate with default parameters."""
         return await gate.evaluate(
             quality_criteria=criteria or ["Criterion 1"],
-            outputs=outputs or ["output.md"],
+            outputs=outputs or {"report": "output.md"},
             project_root=project_root,
         )
 
@@ -248,10 +265,10 @@ async def test_mock_records_evaluations(self, project_root: Path) -> None:
         gate = MockQualityGate()
 
         await self.evaluate_mock_gate(
-            gate, project_root, criteria=["Criterion 1"], outputs=["output1.md"]
+            gate, project_root, criteria=["Criterion 1"], outputs={"out1": "output1.md"}
         )
         await self.evaluate_mock_gate(
-            gate, project_root, criteria=["Criterion 2"], outputs=["output2.md"]
+            gate, project_root, criteria=["Criterion 2"], outputs={"out2": "output2.md"}
         )
 
         assert len(gate.evaluations) == 2
diff --git a/tests/unit/mcp/test_schemas.py b/tests/unit/mcp/test_schemas.py
index 5259d284..f1689d01 100644
--- a/tests/unit/mcp/test_schemas.py
+++ b/tests/unit/mcp/test_schemas.py
@@ -139,16 +139,24 @@ class TestFinishedStepInput:
     """Tests for FinishedStepInput model."""
 
     def test_with_outputs(self) -> None:
-        """Test with outputs only."""
-        input_data = FinishedStepInput(outputs=["output1.md", "output2.md"])
+        """Test with structured outputs."""
+        input_data = FinishedStepInput(
+            outputs={"report": "report.md", "data_files": ["a.csv", "b.csv"]}
+        )
 
-        assert input_data.outputs == ["output1.md", "output2.md"]
+        assert input_data.outputs == {"report": "report.md", "data_files": ["a.csv", "b.csv"]}
         assert input_data.notes is None
 
+    def test_with_empty_outputs(self) -> None:
+        """Test with empty outputs dict (for steps with no outputs)."""
+        input_data = FinishedStepInput(outputs={})
+
+        assert input_data.outputs == {}
+
     def test_with_notes(self) -> None:
         """Test with notes."""
         input_data = FinishedStepInput(
-            outputs=["output.md"],
+            outputs={"output": "output.md"},
             notes="Completed successfully",
         )
 
@@ -310,12 +318,13 @@ def test_workflow_complete_status(self) -> None:
         response = FinishedStepResponse(
             status=StepStatus.WORKFLOW_COMPLETE,
             summary="Workflow completed!",
-            all_outputs=["output1.md", "output2.md"],
+            all_outputs={"output1": "output1.md", "output2": "output2.md"},
         )
 
         assert response.status == StepStatus.WORKFLOW_COMPLETE
         assert response.summary is not None
         assert response.all_outputs is not None
+        assert response.all_outputs == {"output1": "output1.md", "output2": "output2.md"}
 
 
 class TestStepProgress:
@@ -328,7 +337,7 @@ def test_new_step(self) -> None:
         assert progress.step_id == "step1"
         assert progress.started_at is None
         assert progress.completed_at is None
-        assert progress.outputs == []
+        assert progress.outputs == {}
         assert progress.quality_attempts == 0
 
 
diff --git a/tests/unit/mcp/test_state.py b/tests/unit/mcp/test_state.py
index 4b84cbc4..643ae5f7 100644
--- a/tests/unit/mcp/test_state.py
+++ b/tests/unit/mcp/test_state.py
@@ -146,7 +146,7 @@ async def test_complete_step(self, state_manager: StateManager) -> None:
 
         await state_manager.complete_step(
             step_id="step1",
-            outputs=["output1.md", "output2.md"],
+            outputs={"report": "output1.md", "data": "output2.md"},
             notes="Done!",
         )
 
@@ -155,7 +155,7 @@ async def test_complete_step(self, state_manager: StateManager) -> None:
         progress = session.step_progress["step1"]
 
         assert progress.completed_at is not None
-        assert progress.outputs == ["output1.md", "output2.md"]
+        assert progress.outputs == {"report": "output1.md", "data": "output2.md"}
         assert progress.notes == "Done!"
 
     async def test_record_quality_attempt(self, state_manager: StateManager) -> None:
@@ -223,15 +223,18 @@ async def test_get_all_outputs(self, state_manager: StateManager) -> None:
             first_step_id="step1",
         )
 
-        await state_manager.complete_step("step1", ["output1.md"])
-        await state_manager.complete_step("step2", ["output2.md", "output3.md"])
+        await state_manager.complete_step("step1", {"report": "output1.md"})
+        await state_manager.complete_step(
+            "step2", {"data_files": ["output2.md", "output3.md"]}
+        )
 
         outputs = state_manager.get_all_outputs()
 
-        assert "output1.md" in outputs
-        assert "output2.md" in outputs
-        assert "output3.md" in outputs
-        assert len(outputs) == 3
+        assert outputs == {
+            "report": "output1.md",
+            "data_files": ["output2.md", "output3.md"],
+        }
+        assert len(outputs) == 2
 
     async def test_list_sessions(self, state_manager: StateManager) -> None:
         """Test listing all sessions."""
diff --git a/tests/unit/mcp/test_tools.py b/tests/unit/mcp/test_tools.py
index 24ef639e..0da0e60f 100644
--- a/tests/unit/mcp/test_tools.py
+++ b/tests/unit/mcp/test_tools.py
@@ -38,7 +38,9 @@ def project_root(tmp_path: Path) -> Path:
     description: The first step
     instructions_file: steps/step1.md
     outputs:
-      - output1.md
+      output1.md:
+        type: file
+        description: First step output
     quality_criteria:
       - Output must be valid
   - id: step2
@@ -46,7 +48,9 @@ def project_root(tmp_path: Path) -> Path:
     description: The second step
     instructions_file: steps/step2.md
     outputs:
-      - output2.md
+      output2.md:
+        type: file
+        description: Second step output
     dependencies:
       - step1
 
@@ -193,13 +197,17 @@ async def test_start_workflow_invalid_workflow_multiple(
     description: Step A
     instructions_file: steps/step_a.md
     outputs:
-      - output_a.md
+      output_a.md:
+        type: file
+        description: Step A output
   - id: step_b
     name: Step B
     description: Step B
     instructions_file: steps/step_b.md
     outputs:
-      - output_b.md
+      output_b.md:
+        type: file
+        description: Step B output
 
 workflows:
   - name: alpha
@@ -231,7 +239,7 @@ async def test_start_workflow_invalid_workflow_multiple(
 
     async def test_finished_step_no_session(self, tools: WorkflowTools) -> None:
         """Test finished_step without active session."""
-        input_data = FinishedStepInput(outputs=["output1.md"])
+        input_data = FinishedStepInput(outputs={"output1.md": "output1.md"})
 
         with pytest.raises(StateError, match="No active workflow session"):
             await tools.finished_step(input_data)
@@ -253,7 +261,7 @@ async def test_finished_step_advances_to_next(
 
         # Finish first step
         finish_input = FinishedStepInput(
-            outputs=["output1.md"],
+            outputs={"output1.md": "output1.md"},
             notes="Completed step 1",
         )
         response = await tools.finished_step(finish_input)
@@ -278,15 +286,18 @@ async def test_finished_step_completes_workflow(
 
         # Complete first step
         (project_root / "output1.md").write_text("Output 1")
-        await tools.finished_step(FinishedStepInput(outputs=["output1.md"]))
+        await tools.finished_step(FinishedStepInput(outputs={"output1.md": "output1.md"}))
 
         # Complete second (last) step
         (project_root / "output2.md").write_text("Output 2")
-        response = await tools.finished_step(FinishedStepInput(outputs=["output2.md"]))
+        response = await tools.finished_step(
+            FinishedStepInput(outputs={"output2.md": "output2.md"})
+        )
 
         assert response.status == StepStatus.WORKFLOW_COMPLETE
         assert response.summary is not None
         assert "completed" in response.summary.lower()
+        assert response.all_outputs is not None
         assert "output1.md" in response.all_outputs
         assert "output2.md" in response.all_outputs
 
@@ -304,7 +315,9 @@ async def test_finished_step_with_quality_gate_pass(
 
         # Create output and finish step
         (project_root / "output1.md").write_text("Valid output")
-        response = await tools_with_quality.finished_step(FinishedStepInput(outputs=["output1.md"]))
+        response = await tools_with_quality.finished_step(
+            FinishedStepInput(outputs={"output1.md": "output1.md"})
+        )
 
         # Should advance to next step
         assert response.status == StepStatus.NEXT_STEP
@@ -330,7 +343,9 @@ async def test_finished_step_with_quality_gate_fail(
 
         # Create output and finish step
         (project_root / "output1.md").write_text("Invalid output")
-        response = await tools.finished_step(FinishedStepInput(outputs=["output1.md"]))
+        response = await tools.finished_step(
+            FinishedStepInput(outputs={"output1.md": "output1.md"})
+        )
 
         assert response.status == StepStatus.NEEDS_WORK
         assert response.feedback == "Needs improvement"
@@ -359,12 +374,16 @@ async def test_finished_step_quality_gate_max_attempts(
 
         # Try multiple times (max is 3)
         for _ in range(2):
-            response = await tools.finished_step(FinishedStepInput(outputs=["output1.md"]))
+            response = await tools.finished_step(
+                FinishedStepInput(outputs={"output1.md": "output1.md"})
+            )
             assert response.status == StepStatus.NEEDS_WORK
 
         # Third attempt should raise error
         with pytest.raises(ToolError, match="Quality gate failed after.*attempts"):
-            await tools.finished_step(FinishedStepInput(outputs=["output1.md"]))
+            await tools.finished_step(
+                FinishedStepInput(outputs={"output1.md": "output1.md"})
+            )
 
     async def test_finished_step_quality_gate_override(
         self, project_root: Path, state_manager: StateManager
@@ -390,7 +409,7 @@ async def test_finished_step_quality_gate_override(
         (project_root / "output1.md").write_text("Output that would fail quality check")
         response = await tools.finished_step(
             FinishedStepInput(
-                outputs=["output1.md"],
+                outputs={"output1.md": "output1.md"},
                 quality_review_override_reason="Manual review completed offline",
             )
         )
@@ -399,3 +418,288 @@ async def test_finished_step_quality_gate_override(
         assert response.status == StepStatus.NEXT_STEP
         # Quality gate should not have been called
         assert len(failing_gate.evaluations) == 0
+
+    async def test_finished_step_validates_unknown_output_keys(
+        self, tools: WorkflowTools, project_root: Path
+    ) -> None:
+        """Test finished_step rejects unknown output keys."""
+        start_input = StartWorkflowInput(
+            goal="Complete task",
+            job_name="test_job",
+            workflow_name="main",
+        )
+        await tools.start_workflow(start_input)
+
+        (project_root / "output1.md").write_text("content")
+        (project_root / "extra.md").write_text("content")
+
+        with pytest.raises(ToolError, match="Unknown output names.*extra.md"):
+            await tools.finished_step(
+                FinishedStepInput(
+                    outputs={"output1.md": "output1.md", "extra.md": "extra.md"}
+                )
+            )
+
+    async def test_finished_step_validates_missing_output_keys(
+        self, tools: WorkflowTools, project_root: Path
+    ) -> None:
+        """Test finished_step rejects when declared outputs are missing."""
+        start_input = StartWorkflowInput(
+            goal="Complete task",
+            job_name="test_job",
+            workflow_name="main",
+        )
+        await tools.start_workflow(start_input)
+
+        # Step1 declares output1.md, but we provide empty dict
+        with pytest.raises(ToolError, match="Missing required outputs.*output1.md"):
+            await tools.finished_step(FinishedStepInput(outputs={}))
+
+    async def test_finished_step_validates_file_type_must_be_string(
+        self, tools: WorkflowTools, project_root: Path
+    ) -> None:
+        """Test finished_step rejects list value for type: file output."""
+        start_input = StartWorkflowInput(
+            goal="Complete task",
+            job_name="test_job",
+            workflow_name="main",
+        )
+        await tools.start_workflow(start_input)
+
+        (project_root / "output1.md").write_text("content")
+
+        with pytest.raises(ToolError, match="type 'file'.*single string path"):
+            await tools.finished_step(
+                FinishedStepInput(outputs={"output1.md": ["output1.md"]})
+            )
+
+    async def test_finished_step_validates_file_existence(
+        self, tools: WorkflowTools, project_root: Path
+    ) -> None:
+        """Test finished_step rejects when file does not exist."""
+        start_input = StartWorkflowInput(
+            goal="Complete task",
+            job_name="test_job",
+            workflow_name="main",
+        )
+        await tools.start_workflow(start_input)
+
+        # Don't create the file
+        with pytest.raises(ToolError, match="file not found at.*nonexistent.md"):
+            await tools.finished_step(
+                FinishedStepInput(outputs={"output1.md": "nonexistent.md"})
+            )
+
+    async def test_finished_step_empty_outputs_for_step_with_no_outputs(
+        self, project_root: Path, state_manager: StateManager
+    ) -> None:
+        """Test that empty outputs {} works for steps declared with no outputs."""
+        # Create a job with a step that has no outputs
+        job_dir = project_root / ".deepwork" / "jobs" / "no_output_job"
+        job_dir.mkdir(parents=True)
+        (job_dir / "job.yml").write_text(
+            """
+name: no_output_job
+version: "1.0.0"
+summary: Job with no-output step
+description: Test job
+
+steps:
+  - id: cleanup
+    name: Cleanup
+    description: Cleanup step with no outputs
+    instructions_file: steps/cleanup.md
+    outputs: {}
+
+workflows:
+  - name: main
+    summary: Main workflow
+    steps:
+      - cleanup
+"""
+        )
+        steps_dir = job_dir / "steps"
+        steps_dir.mkdir()
+        (steps_dir / "cleanup.md").write_text("# Cleanup\n\nDo cleanup.")
+
+        tools = WorkflowTools(
+            project_root=project_root,
+            state_manager=state_manager,
+        )
+
+        start_input = StartWorkflowInput(
+            goal="Run cleanup",
+            job_name="no_output_job",
+            workflow_name="main",
+        )
+        await tools.start_workflow(start_input)
+
+        response = await tools.finished_step(FinishedStepInput(outputs={}))
+
+        assert response.status == StepStatus.WORKFLOW_COMPLETE
+
+    async def test_finished_step_validates_files_type_output(
+        self, project_root: Path, state_manager: StateManager
+    ) -> None:
+        """Test finished_step validation for type: files outputs."""
+        # Create a job with a files-type output
+        job_dir = project_root / ".deepwork" / "jobs" / "files_job"
+        job_dir.mkdir(parents=True)
+        (job_dir / "job.yml").write_text(
+            """
+name: files_job
+version: "1.0.0"
+summary: Job with files output
+description: Test job
+
+steps:
+  - id: generate
+    name: Generate
+    description: Generates multiple files
+    instructions_file: steps/generate.md
+    outputs:
+      reports:
+        type: files
+        description: Generated report files
+
+workflows:
+  - name: main
+    summary: Main workflow
+    steps:
+      - generate
+"""
+        )
+        steps_dir = job_dir / "steps"
+        steps_dir.mkdir()
+        (steps_dir / "generate.md").write_text("# Generate\n\nGenerate reports.")
+
+        tools = WorkflowTools(
+            project_root=project_root,
+            state_manager=state_manager,
+        )
+
+        start_input = StartWorkflowInput(
+            goal="Generate reports",
+            job_name="files_job",
+            workflow_name="main",
+        )
+        await tools.start_workflow(start_input)
+
+        # type: files requires a list, not a string
+        with pytest.raises(ToolError, match="type 'files'.*list of paths"):
+            await tools.finished_step(
+                FinishedStepInput(outputs={"reports": "report1.md"})
+            )
+
+    async def test_finished_step_validates_files_type_existence(
+        self, project_root: Path, state_manager: StateManager
+    ) -> None:
+        """Test finished_step validates file existence for type: files outputs."""
+        job_dir = project_root / ".deepwork" / "jobs" / "files_job2"
+        job_dir.mkdir(parents=True)
+        (job_dir / "job.yml").write_text(
+            """
+name: files_job2
+version: "1.0.0"
+summary: Job with files output
+description: Test job
+
+steps:
+  - id: generate
+    name: Generate
+    description: Generates multiple files
+    instructions_file: steps/generate.md
+    outputs:
+      reports:
+        type: files
+        description: Generated report files
+
+workflows:
+  - name: main
+    summary: Main workflow
+    steps:
+      - generate
+"""
+        )
+        steps_dir = job_dir / "steps"
+        steps_dir.mkdir()
+        (steps_dir / "generate.md").write_text("# Generate\n\nGenerate reports.")
+
+        tools = WorkflowTools(
+            project_root=project_root,
+            state_manager=state_manager,
+        )
+
+        start_input = StartWorkflowInput(
+            goal="Generate reports",
+            job_name="files_job2",
+            workflow_name="main",
+        )
+        await tools.start_workflow(start_input)
+
+        # Create one file but not the other
+        (project_root / "report1.md").write_text("Report 1")
+
+        with pytest.raises(ToolError, match="file not found at.*missing.md"):
+            await tools.finished_step(
+                FinishedStepInput(
+                    outputs={"reports": ["report1.md", "missing.md"]}
+                )
+            )
+
+    async def test_finished_step_files_type_success(
+        self, project_root: Path, state_manager: StateManager
+    ) -> None:
+        """Test finished_step succeeds with valid type: files outputs."""
+        job_dir = project_root / ".deepwork" / "jobs" / "files_job3"
+        job_dir.mkdir(parents=True)
+        (job_dir / "job.yml").write_text(
+            """
+name: files_job3
+version: "1.0.0"
+summary: Job with files output
+description: Test job
+
+steps:
+  - id: generate
+    name: Generate
+    description: Generates multiple files
+    instructions_file: steps/generate.md
+    outputs:
+      reports:
+        type: files
+        description: Generated report files
+
+workflows:
+  - name: main
+    summary: Main workflow
+    steps:
+      - generate
+"""
+        )
+        steps_dir = job_dir / "steps"
+        steps_dir.mkdir()
+        (steps_dir / "generate.md").write_text("# Generate\n\nGenerate reports.")
+
+        tools = WorkflowTools(
+            project_root=project_root,
+            state_manager=state_manager,
+        )
+
+        start_input = StartWorkflowInput(
+            goal="Generate reports",
+            job_name="files_job3",
+            workflow_name="main",
+        )
+        await tools.start_workflow(start_input)
+
+        (project_root / "report1.md").write_text("Report 1")
+        (project_root / "report2.md").write_text("Report 2")
+
+        response = await tools.finished_step(
+            FinishedStepInput(
+                outputs={"reports": ["report1.md", "report2.md"]}
+            )
+        )
+
+        assert response.status == StepStatus.WORKFLOW_COMPLETE
diff --git a/tests/unit/test_parser.py b/tests/unit/test_parser.py
index 0c968242..e2df69d1 100644
--- a/tests/unit/test_parser.py
+++ b/tests/unit/test_parser.py
@@ -53,47 +53,41 @@ def test_from_dict_file_input(self) -> None:
 class TestOutputSpec:
     """Tests for OutputSpec dataclass."""
 
-    def test_simple_output(self) -> None:
-        """Test simple output without doc spec."""
-        output = OutputSpec(file="output.md")
-
-        assert output.file == "output.md"
-        assert output.doc_spec is None
-        assert not output.has_doc_spec()
-
-    def test_output_with_doc_spec(self) -> None:
-        """Test output with doc spec reference."""
-        output = OutputSpec(file="report.md", doc_spec=".deepwork/doc_specs/monthly_report.md")
-
-        assert output.file == "report.md"
-        assert output.doc_spec == ".deepwork/doc_specs/monthly_report.md"
-        assert output.has_doc_spec()
-
-    def test_from_dict_string(self) -> None:
-        """Test creating output from string."""
-        output = OutputSpec.from_dict("output.md")
+    def test_file_output(self) -> None:
+        """Test single file output."""
+        output = OutputSpec(name="output.md", type="file", description="An output file")
+
+        assert output.name == "output.md"
+        assert output.type == "file"
+        assert output.description == "An output file"
+
+    def test_files_output(self) -> None:
+        """Test multiple files output."""
+        output = OutputSpec(
+            name="step_instruction_files", type="files", description="Instruction files"
+        )
 
-        assert output.file == "output.md"
-        assert output.doc_spec is None
-        assert not output.has_doc_spec()
+        assert output.name == "step_instruction_files"
+        assert output.type == "files"
+        assert output.description == "Instruction files"
 
-    def test_from_dict_simple_object(self) -> None:
-        """Test creating output from dict without doc spec."""
-        data = {"file": "output.md"}
-        output = OutputSpec.from_dict(data)
+    def test_from_dict(self) -> None:
+        """Test creating output from name and dict."""
+        data = {"type": "file", "description": "An output file"}
+        output = OutputSpec.from_dict("output.md", data)
 
-        assert output.file == "output.md"
-        assert output.doc_spec is None
-        assert not output.has_doc_spec()
+        assert output.name == "output.md"
+        assert output.type == "file"
+        assert output.description == "An output file"
 
-    def test_from_dict_with_doc_spec(self) -> None:
-        """Test creating output from dict with doc spec."""
-        data = {"file": "report.md", "doc_spec": ".deepwork/doc_specs/monthly_report.md"}
-        output = OutputSpec.from_dict(data)
+    def test_from_dict_files_type(self) -> None:
+        """Test creating files-type output from dict."""
+        data = {"type": "files", "description": "Multiple output files"}
+        output = OutputSpec.from_dict("reports", data)
 
-        assert output.file == "report.md"
-        assert output.doc_spec == ".deepwork/doc_specs/monthly_report.md"
-        assert output.has_doc_spec()
+        assert output.name == "reports"
+        assert output.type == "files"
+        assert output.description == "Multiple output files"
 
 
 class TestStep:
@@ -106,7 +100,9 @@ def test_from_dict_minimal(self) -> None:
             "name": "Step 1",
             "description": "First step",
             "instructions_file": "steps/step1.md",
-            "outputs": ["output.md"],
+            "outputs": {
+                "output.md": {"type": "file", "description": "An output file"},
+            },
         }
         step = Step.from_dict(data)
 
@@ -115,31 +111,34 @@ def test_from_dict_minimal(self) -> None:
         assert step.description == "First step"
         assert step.instructions_file == "steps/step1.md"
         assert len(step.outputs) == 1
-        assert step.outputs[0].file == "output.md"
-        assert not step.outputs[0].has_doc_spec()
+        assert step.outputs[0].name == "output.md"
+        assert step.outputs[0].type == "file"
         assert step.inputs == []
         assert step.dependencies == []
 
-    def test_from_dict_with_doc_spec_output(self) -> None:
-        """Test creating step with doc spec-referenced output."""
+    def test_from_dict_with_multiple_outputs(self) -> None:
+        """Test creating step with file and files type outputs."""
         data = {
             "id": "step1",
             "name": "Step 1",
             "description": "First step",
             "instructions_file": "steps/step1.md",
-            "outputs": [
-                "simple_output.md",
-                {"file": "report.md", "doc_spec": ".deepwork/doc_specs/monthly_report.md"},
-            ],
+            "outputs": {
+                "report.md": {"type": "file", "description": "A report"},
+                "attachments": {"type": "files", "description": "Supporting files"},
+            },
         }
         step = Step.from_dict(data)
 
         assert len(step.outputs) == 2
-        assert step.outputs[0].file == "simple_output.md"
-        assert not step.outputs[0].has_doc_spec()
-        assert step.outputs[1].file == "report.md"
-        assert step.outputs[1].doc_spec == ".deepwork/doc_specs/monthly_report.md"
-        assert step.outputs[1].has_doc_spec()
+        output_names = {out.name for out in step.outputs}
+        assert "report.md" in output_names
+        assert "attachments" in output_names
+
+        report = next(out for out in step.outputs if out.name == "report.md")
+        assert report.type == "file"
+        attachments = next(out for out in step.outputs if out.name == "attachments")
+        assert attachments.type == "files"
 
     def test_from_dict_with_inputs(self) -> None:
         """Test creating step with inputs."""
@@ -152,7 +151,9 @@ def test_from_dict_with_inputs(self) -> None:
                 {"name": "param1", "description": "Parameter 1"},
                 {"file": "data.md", "from_step": "step0"},
             ],
-            "outputs": ["output.md"],
+            "outputs": {
+                "output.md": {"type": "file", "description": "An output file"},
+            },
             "dependencies": ["step0"],
         }
         step = Step.from_dict(data)
@@ -169,7 +170,9 @@ def test_from_dict_exposed_default_false(self) -> None:
             "name": "Step 1",
             "description": "First step",
             "instructions_file": "steps/step1.md",
-            "outputs": ["output.md"],
+            "outputs": {
+                "output.md": {"type": "file", "description": "An output file"},
+            },
         }
         step = Step.from_dict(data)
 
@@ -182,7 +185,9 @@ def test_from_dict_exposed_true(self) -> None:
             "name": "Step 1",
             "description": "First step",
             "instructions_file": "steps/step1.md",
-            "outputs": ["output.md"],
+            "outputs": {
+                "output.md": {"type": "file", "description": "An output file"},
+            },
             "exposed": True,
         }
         step = Step.from_dict(data)
@@ -225,7 +230,11 @@ def test_validate_dependencies_missing_step(self) -> None:
                     name="Step 1",
                     description="Step",
                     instructions_file="steps/step1.md",
-                    outputs=["output.md"],
+                    outputs=[
+                        OutputSpec(
+                            name="output.md", type="file", description="Output file"
+                        )
+                    ],
                     dependencies=["nonexistent"],
                 )
             ],
@@ -248,7 +257,11 @@ def test_validate_dependencies_circular(self) -> None:
                     name="Step 1",
                     description="Step",
                     instructions_file="steps/step1.md",
-                    outputs=["output.md"],
+                    outputs=[
+                        OutputSpec(
+                            name="output.md", type="file", description="Output file"
+                        )
+                    ],
                     dependencies=["step2"],
                 ),
                 Step(
@@ -256,7 +269,11 @@ def test_validate_dependencies_circular(self) -> None:
                     name="Step 2",
                     description="Step",
                     instructions_file="steps/step2.md",
-                    outputs=["output.md"],
+                    outputs=[
+                        OutputSpec(
+                            name="output.md", type="file", description="Output file"
+                        )
+                    ],
                     dependencies=["step1"],
                 ),
             ],
@@ -288,7 +305,11 @@ def test_validate_file_inputs_missing_step(self) -> None:
                     description="Step",
                     instructions_file="steps/step1.md",
                     inputs=[StepInput(file="data.md", from_step="nonexistent")],
-                    outputs=["output.md"],
+                    outputs=[
+                        OutputSpec(
+                            name="output.md", type="file", description="Output file"
+                        )
+                    ],
                     dependencies=["nonexistent"],
                 )
             ],
@@ -311,7 +332,11 @@ def test_validate_file_inputs_not_in_dependencies(self) -> None:
                     name="Step 1",
                     description="Step",
                     instructions_file="steps/step1.md",
-                    outputs=["output.md"],
+                    outputs=[
+                        OutputSpec(
+                            name="output.md", type="file", description="Output file"
+                        )
+                    ],
                 ),
                 Step(
                     id="step2",
@@ -319,7 +344,11 @@ def test_validate_file_inputs_not_in_dependencies(self) -> None:
                     description="Step",
                     instructions_file="steps/step2.md",
                     inputs=[StepInput(file="data.md", from_step="step1")],
-                    outputs=["output.md"],
+                    outputs=[
+                        OutputSpec(
+                            name="output.md", type="file", description="Output file"
+                        )
+                    ],
                     # Missing step1 in dependencies!
                     dependencies=[],
                 ),
diff --git a/tests/unit/test_validation.py b/tests/unit/test_validation.py
index ccd31637..93fa237b 100644
--- a/tests/unit/test_validation.py
+++ b/tests/unit/test_validation.py
@@ -22,7 +22,7 @@ def test_validates_simple_job(self) -> None:
                     "name": "Step 1",
                     "description": "First step",
                     "instructions_file": "steps/step1.md",
-                    "outputs": ["output.md"],
+                    "outputs": {"output.md": {"type": "file", "description": "Output"}},
                     "dependencies": [],
                 }
             ],
@@ -48,7 +48,7 @@ def test_validates_job_with_user_inputs(self) -> None:
                         {"name": "param1", "description": "First parameter"},
                         {"name": "param2", "description": "Second parameter"},
                     ],
-                    "outputs": ["output.md"],
+                    "outputs": {"output.md": {"type": "file", "description": "Output"}},
                     "dependencies": [],
                 }
             ],
@@ -69,7 +69,7 @@ def test_validates_job_with_file_inputs(self) -> None:
                     "name": "Step 1",
                     "description": "First step",
                     "instructions_file": "steps/step1.md",
-                    "outputs": ["data.md"],
+                    "outputs": {"data.md": {"type": "file", "description": "Data output"}},
                     "dependencies": [],
                 },
                 {
@@ -78,7 +78,7 @@ def test_validates_job_with_file_inputs(self) -> None:
                     "description": "Second step",
                     "instructions_file": "steps/step2.md",
                     "inputs": [{"file": "data.md", "from_step": "step1"}],
-                    "outputs": ["result.md"],
+                    "outputs": {"result.md": {"type": "file", "description": "Result output"}},
                     "dependencies": ["step1"],
                 },
             ],
@@ -112,7 +112,7 @@ def test_raises_for_invalid_job_name(self) -> None:
                     "name": "Step 1",
                     "description": "Step",
                     "instructions_file": "steps/step1.md",
-                    "outputs": ["output.md"],
+                    "outputs": {"output.md": {"type": "file", "description": "Output"}},
                 }
             ],
         }
@@ -133,7 +133,7 @@ def test_raises_for_invalid_version(self) -> None:
                     "name": "Step 1",
                     "description": "Step",
                     "instructions_file": "steps/step1.md",
-                    "outputs": ["output.md"],
+                    "outputs": {"output.md": {"type": "file", "description": "Output"}},
                 }
             ],
         }
@@ -194,7 +194,7 @@ def test_raises_for_invalid_input_format(self) -> None:
                             # Missing description for user input
                         }
                     ],
-                    "outputs": ["output.md"],
+                    "outputs": {"output.md": {"type": "file", "description": "Output"}},
                 }
             ],
         }

From 0624ddca64b2af4123517d95f077a094bc5fff75 Mon Sep 17 00:00:00 2001
From: Noah Horton <noah@unsupervised.com>
Date: Fri, 6 Feb 2026 11:42:22 -0700
Subject: [PATCH 34/45] Enrich step_expected_outputs with type, description,
 and format hints
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

step_expected_outputs is now an array of ExpectedOutput objects (name, type,
description, syntax_for_finished_step_tool) instead of a plain list of names.
This tells agents exactly what format to use when calling finished_step —
"filepath" for file outputs and "array of filepaths for all individual files"
for files outputs — eliminating the string-vs-list type mismatch errors.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 .deepwork/common_info/.gitkeep                |   2 +
 .deepwork/jobs/deepwork_jobs/job.yml          | 170 ++++++-----
 .deepwork/jobs/deepwork_jobs/make_new_job.sh  |  31 +-
 .deepwork/jobs/deepwork_jobs/steps/define.md  |  99 +++---
 .../jobs/deepwork_jobs/steps/fix_jobs.md      | 108 ++++++-
 .../jobs/deepwork_jobs/steps/implement.md     |  24 +-
 .deepwork/jobs/deepwork_jobs/steps/iterate.md |  23 +-
 .deepwork/jobs/deepwork_jobs/steps/learn.md   |   1 +
 .../deepwork_jobs/templates/job.yml.example   |  46 ++-
 .../deepwork_jobs/templates/job.yml.template  |  31 +-
 .deepwork/schemas/job.schema.json             | 125 +++++---
 doc/mcp_interface.md                          | 176 +++++++++--
 flake.lock                                    |   6 +-
 pyproject.toml                                |   1 +
 src/deepwork/cli/install.py                   |  25 ++
 src/deepwork/core/parser.py                   |  42 ++-
 src/deepwork/mcp/quality_gate.py              | 225 ++++++++++++--
 src/deepwork/mcp/schemas.py                   |  53 +++-
 src/deepwork/mcp/server.py                    |   5 +-
 src/deepwork/mcp/tools.py                     |  79 ++++-
 src/deepwork/schemas/job.schema.json          |  35 ++-
 .../standard_jobs/deepwork_jobs/job.yml       | 128 ++++----
 .../deepwork_jobs/steps/define.md             |  99 +++---
 .../deepwork_jobs/steps/fix_jobs.md           |  39 ++-
 .../deepwork_jobs/steps/implement.md          |  24 +-
 .../deepwork_jobs/steps/iterate.md            |  23 +-
 .../deepwork_jobs/steps/learn.md              |   1 +
 .../deepwork_jobs/templates/job.yml.example   |  46 ++-
 .../deepwork_jobs/templates/job.yml.template  |  31 +-
 tests/fixtures/jobs/complex_job/job.yml       |   4 +
 .../jobs/concurrent_steps_job/job.yml         |   6 +
 tests/fixtures/jobs/exposed_step_job/job.yml  |   2 +
 tests/fixtures/jobs/fruits/job.yml            |   2 +
 tests/fixtures/jobs/job_with_doc_spec/job.yml |   1 +
 tests/fixtures/jobs/simple_job/job.yml        |   1 +
 .../test_quality_gate_integration.py          |  18 +-
 tests/unit/mcp/test_quality_gate.py           | 286 +++++++++++++++++-
 tests/unit/mcp/test_schemas.py                | 137 ++++++++-
 tests/unit/mcp/test_tools.py                  |  25 +-
 tests/unit/test_parser.py                     | 136 +++++++++
 tests/unit/test_validation.py                 | 120 ++++++++
 uv.lock                                       | 161 ++++++++++
 42 files changed, 2065 insertions(+), 532 deletions(-)
 create mode 100644 .deepwork/common_info/.gitkeep

diff --git a/.deepwork/common_info/.gitkeep b/.deepwork/common_info/.gitkeep
new file mode 100644
index 00000000..e75c47d6
--- /dev/null
+++ b/.deepwork/common_info/.gitkeep
@@ -0,0 +1,2 @@
+# This file ensures the .deepwork/common_info directory exists in version control.
+# Place shared reference files here that should be available across all jobs.
diff --git a/.deepwork/jobs/deepwork_jobs/job.yml b/.deepwork/jobs/deepwork_jobs/job.yml
index facf3ce7..e48853a7 100644
--- a/.deepwork/jobs/deepwork_jobs/job.yml
+++ b/.deepwork/jobs/deepwork_jobs/job.yml
@@ -1,6 +1,6 @@
 # yaml-language-server: $schema=.deepwork/schemas/job.schema.json
 name: deepwork_jobs
-version: "1.2.1"
+version: "1.3.0"
 summary: "Creates and manages multi-step AI workflows. Use when defining, implementing, testing, or improving DeepWork jobs."
 description: |
   Core commands for managing DeepWork jobs. These commands help you define new multi-step
@@ -38,6 +38,8 @@ workflows:
       - learn
 
 changelog:
+  - version: "1.3.0"
+    changes: "Migrated quality_criteria to reviews system with run_each targeting and map-format criteria"
   - version: "1.2.1"
     changes: "Removed deprecated exposed field from learn step; added learn workflow to make step accessible via MCP"
   - version: "1.2.0"
@@ -70,8 +72,19 @@ steps:
       - name: job_purpose
         description: "What complex task or workflow are you trying to accomplish?"
     outputs:
-      - job.yml
+      job.yml:
+        type: file
+        description: "Definition of the job and its workflows"
     dependencies: []
+    reviews:
+      - run_each: job.yml
+        quality_criteria:
+          "Intermediate Deliverables": "Does the job break out across the logical steps such that there are reviewable intermediate deliverables?"
+          "Reviews": |
+            Are there reviews defined for each step? Do particularly critical documents have their own reviews?
+            Note that the reviewers do not have transcript access, so if the criteria are about the conversation,
+            then add a `.deepwork/tmp/[step_summary].md` step output file so the agent has a communication channel to the reviewer.
+
   - id: implement
     name: "Implement Job Steps"
     description: "Generates step instruction files and syncs slash commands from the job.yml specification. Use after defining a job."
@@ -80,15 +93,20 @@ steps:
       - file: job.yml
         from_step: define
     outputs:
-      - steps/
+      step_instruction_files:
+        type: files
+        description: "Instruction Markdown files for each step"
     dependencies:
       - define
-    quality_criteria:
-      - "**Complete Instructions**: Are ALL step instruction files complete (not stubs or placeholders)?"
-      - "**Specific & Actionable**: Are instructions tailored to each step's purpose, not generic?"
-      - "**Output Examples**: Does each instruction file show what good output looks like?"
-      - "**Quality Criteria**: Does each instruction file define quality criteria for its outputs?"
-      - "**Ask Structured Questions**: Do step instructions that gather user input explicitly use the phrase \"ask structured questions\"?"
+    reviews:
+      - run_each: step_instruction_files
+        quality_criteria:
+          "Complete Instructions": "Is the instruction file complete (no stubs or placeholders)?"
+          "Specific & Actionable": "Are instructions tailored to the step's purpose, not generic?"
+          "Output Examples": "Does the instruction file show what good output looks like?"
+          "Quality Criteria": "Does the instruction file define quality criteria for its outputs?"
+          "Ask Structured Questions": "Do instructions that gather user input explicitly use the phrase 'ask structured questions'?"
+          "Prompt Engineering": "Does the instructions file following Anthropics Best Practices for Prompt Engineering?"
 
   - id: test
     name: "Test the New Workflow"
@@ -97,20 +115,23 @@ steps:
     inputs:
       - file: job.yml
         from_step: define
-      - file: steps/
+      - file: step_instruction_files
         from_step: implement
     outputs:
-      - test_feedback.md
+      test_feedback.md:
+        type: file
+        description: "Feedback from testing the workflow on a real use case"
     dependencies:
       - define
       - implement
-    quality_criteria:
-      - "**User Informed**: Did the agent explain the workflow is ready and ask what to test it on?"
-      - "**Workflow Invoked**: Was the new workflow actually run on the user's test case via MCP?"
-      - "**Output Critiqued**: Did the agent identify up to 3 top issues with the output?"
-      - "**User Feedback Gathered**: Did the agent ask the user about each issue and gather additional feedback?"
-      - "**Corrections Made**: Were all requested corrections applied to the output?"
-      - "**User Satisfied**: Did the user confirm the output meets their needs?"
+    reviews:
+      - run_each: step
+        quality_criteria:
+          "Workflow Invoked": "Was the new workflow actually run on the user's test case via MCP?"
+          "Output Critiqued": "Did the agent identify up to 3 top issues with the output?"
+          "User Feedback Gathered": "Did the agent ask the user about each issue and gather additional feedback?"
+          "Corrections Made": "Were all requested corrections applied to the output?"
+          "User Satisfied": "Did the user confirm the output meets their needs?"
 
   - id: iterate
     name: "Iterate on Workflow Design"
@@ -119,21 +140,20 @@ steps:
     inputs:
       - file: job.yml
         from_step: define
-      - file: steps/
+      - file: step_instruction_files
         from_step: implement
     outputs:
-      - job.yml
-      - steps/
+      job.yml:
+        type: file
+        description: "Updated job definition with improvements from test run"
+      step_instruction_files:
+        type: files
+        description: "Updated instruction Markdown files for each step"
     dependencies:
       - define
       - implement
       - test
-    quality_criteria:
-      - "**Conversation Reviewed**: Did the agent analyze the test run for inefficiencies and issues?"
-      - "**Instructions Improved**: Were step instructions updated to address identified problems?"
-      - "**Quality Criteria Updated**: Were quality criteria adjusted to better match user expectations?"
-      - "**Tool Usage Considered**: Did the agent consider if different tools would improve the workflow?"
-      - "**Recap Provided**: Did the agent summarize what was improved and why?"
+    reviews: []
 
   - id: learn
     name: "Learn from Job Execution"
@@ -143,18 +163,22 @@ steps:
       - name: job_name
         description: "Name of the job that was run (optional - will auto-detect from conversation)"
     outputs:
-      - AGENTS.md
+      AGENTS.md:
+        type: file
+        description: "Bespoke learnings and run-specific context for the working folder"
     dependencies: []
-    quality_criteria:
-      - "**Conversation Analyzed**: Did the agent review the conversation for DeepWork job executions?"
-      - "**Confusion Identified**: Did the agent identify points of confusion, errors, or inefficiencies?"
-      - "**Instructions Improved**: Were job instructions updated to address identified issues?"
-      - "**Instructions Concise**: Are instructions free of redundancy and unnecessary verbosity?"
-      - "**Shared Content Extracted**: Is lengthy/duplicated content extracted into referenced files?"
-      - "**Bespoke Learnings Captured**: Were run-specific learnings added to AGENTS.md?"
-      - "**File References Used**: Do AGENTS.md entries reference other files where appropriate?"
-      - "**Working Folder Correct**: Is AGENTS.md in the correct working folder for the job?"
-      - "**Generalizable Separated**: Are generalizable improvements in instructions, not AGENTS.md?"
+    reviews:
+      - run_each: step
+        quality_criteria:
+          "Conversation Analyzed": "Did the agent review the conversation for DeepWork job executions?"
+          "Confusion Identified": "Did the agent identify points of confusion, errors, or inefficiencies?"
+          "Instructions Improved": "Were job instructions updated to address identified issues?"
+          "Instructions Concise": "Are instructions free of redundancy and unnecessary verbosity?"
+          "Shared Content Extracted": "Is lengthy/duplicated content extracted into referenced files?"
+          "Bespoke Learnings Captured": "Were run-specific learnings added to AGENTS.md?"
+          "File References Used": "Do AGENTS.md entries reference other files where appropriate?"
+          "Working Folder Correct": "Is AGENTS.md in the correct working folder for the job?"
+          "Generalizable Separated": "Are generalizable improvements in instructions, not AGENTS.md?"
 
   - id: fix_settings
     name: "Fix Settings Files"
@@ -162,54 +186,64 @@ steps:
     instructions_file: steps/fix_settings.md
     inputs: []
     outputs:
-      - .claude/settings.json
+      settings.json:
+        type: file
+        description: "Cleaned up Claude settings file with legacy permissions removed"
     dependencies: []
-    quality_criteria:
-      - "**DeepWork Skills Removed**: Are `Skill(...)` entries matching jobs in `.deepwork/jobs/` removed?"
-      - "**Non-DeepWork Skills Preserved**: Are skills NOT matching DeepWork jobs left intact?"
-      - "**make_new_job.sh Preserved**: Is the `Bash(...)` permission for `make_new_job.sh` preserved (if present)?"
-      - "**Rules Hooks Removed**: Are all DeepWork Rules hooks and permissions removed?"
-      - "**Duplicate Hooks Removed**: Are duplicate hook entries consolidated or removed?"
-      - "**Hardcoded Paths Removed**: Are user-specific hardcoded paths (like `/Users/*/...`) removed?"
-      - "**Deprecated Commands Removed**: Are deprecated commands like `deepwork hook *` removed?"
-      - "**Valid JSON**: Is settings.json still valid JSON after modifications?"
-      - "**Backup Created**: Was a backup of the original settings created before modifications?"
+    reviews:
+      - run_each: step
+        quality_criteria:
+          "DeepWork Skills Removed": "Are `Skill(...)` entries matching jobs in `.deepwork/jobs/` removed?"
+          "Non-DeepWork Skills Preserved": "Are skills NOT matching DeepWork jobs left intact?"
+          "make_new_job.sh Preserved": "Is the `Bash(...)` permission for `make_new_job.sh` preserved (if present)?"
+          "Rules Hooks Removed": "Are all DeepWork Rules hooks and permissions removed?"
+          "Duplicate Hooks Removed": "Are duplicate hook entries consolidated or removed?"
+          "Hardcoded Paths Removed": "Are user-specific hardcoded paths (like `/Users/*/...`) removed?"
+          "Deprecated Commands Removed": "Are deprecated commands like `deepwork hook *` removed?"
+          "Valid JSON": "Is settings.json still valid JSON after modifications?"
+          "Backup Created": "Was a backup of the original settings created before modifications?"
 
   - id: fix_jobs
     name: "Fix Job Definitions"
     description: "Updates job.yml files and step instructions to current DeepWork format, removing deprecated fields and migrating to new structures."
     instructions_file: steps/fix_jobs.md
     inputs:
-      - file: .claude/settings.json
+      - file: settings.json
         from_step: fix_settings
     outputs:
-      - .deepwork/jobs/
+      job_definitions:
+        type: files
+        description: "Updated job.yml files and step instructions in current DeepWork format"
     dependencies:
       - fix_settings
-    quality_criteria:
-      - "**Exposed Field Addressed**: Are `exposed: true` fields removed or noted as deprecated?"
-      - "**Stop Hooks Migrated**: Are `stop_hooks` migrated to `hooks.after_agent` format?"
-      - "**Removed Steps Cleaned**: Are references to removed steps (like `review_job_spec`) updated?"
-      - "**Orphaned Steps Fixed**: For jobs with no workflows, is there a single workflow (named after the job) containing all steps? For jobs with existing workflows, does each orphan get its own workflow (named after the step)?"
-      - "**Valid YAML**: Are all job.yml files valid YAML?"
+    reviews:
+      - run_each: step
+        quality_criteria:
+          "Exposed Field Addressed": "Are `exposed: true` fields removed or noted as deprecated?"
+          "Stop Hooks Migrated": "Are `stop_hooks` migrated to `hooks.after_agent` format?"
+          "Removed Steps Cleaned": "Are references to removed steps (like `review_job_spec`) updated?"
+          "Orphaned Steps Fixed": "For jobs with no workflows, is there a single workflow (named after the job) containing all steps? For jobs with existing workflows, does each orphan get its own workflow (named after the step)?"
+          "Valid YAML": "Are all job.yml files valid YAML?"
 
   - id: errata
     name: "Clean Up Errata"
     description: "Removes obsolete files and folders from prior DeepWork versions, including old skill directories, temp files, and deprecated configurations."
     instructions_file: steps/errata.md
-    outputs: []
+    outputs: {}
     inputs:
-      - file: .deepwork/jobs/
+      - file: job_definitions
         from_step: fix_jobs
     dependencies:
       - fix_settings
       - fix_jobs
-    quality_criteria:
-      - "**Legacy Job Skills Removed**: Are legacy skill folders for each job removed from `.claude/skills/` and `.gemini/skills/`?"
-      - "**Deepwork Skill Preserved**: Does the `deepwork` skill folder still exist in `.claude/skills/deepwork/`?"
-      - "**Temp Files Cleaned**: Are `.deepwork/tmp/` contents cleaned appropriately?"
-      - "**Rules Folder Removed**: Is `.deepwork/rules/` folder backed up and removed (fully deprecated)?"
-      - "**Rules Job Removed**: Is `.deepwork/jobs/deepwork_rules/` removed if present?"
-      - "**Config Version Updated**: Is `.deepwork/config.yml` using current version format?"
-      - "**DeepWork Re-installed**: Was `deepwork install` run after cleanup, and does it complete without errors?"
-      - "**Git Status Clean**: Are changes ready to be committed (no untracked garbage files)?"
+    reviews:
+      - run_each: step
+        quality_criteria:
+          "Legacy Job Skills Removed": "Are legacy skill folders for each job removed from `.claude/skills/` and `.gemini/skills/`?"
+          "Deepwork Skill Preserved": "Does the `deepwork` skill folder still exist in `.claude/skills/deepwork/`?"
+          "Temp Files Cleaned": "Are `.deepwork/tmp/` contents cleaned appropriately?"
+          "Rules Folder Removed": "Is `.deepwork/rules/` folder backed up and removed (fully deprecated)?"
+          "Rules Job Removed": "Is `.deepwork/jobs/deepwork_rules/` removed if present?"
+          "Config Version Updated": "Is `.deepwork/config.yml` using current version format?"
+          "DeepWork Re-installed": "Was `deepwork install` run after cleanup, and does it complete without errors?"
+          "Git Status Clean": "Are changes ready to be committed (no untracked garbage files)?"
diff --git a/.deepwork/jobs/deepwork_jobs/make_new_job.sh b/.deepwork/jobs/deepwork_jobs/make_new_job.sh
index c561d6d2..c87f40e8 100755
--- a/.deepwork/jobs/deepwork_jobs/make_new_job.sh
+++ b/.deepwork/jobs/deepwork_jobs/make_new_job.sh
@@ -78,43 +78,47 @@ main() {
     mkdir -p "$job_path/steps"
     mkdir -p "$job_path/hooks"
     mkdir -p "$job_path/templates"
+    mkdir -p "$job_path/scripts"
 
     # Add .gitkeep files to empty directories
     touch "$job_path/hooks/.gitkeep"
     touch "$job_path/templates/.gitkeep"
+    touch "$job_path/scripts/.gitkeep"
 
     # Create AGENTS.md file
     cat > "$job_path/AGENTS.md" << 'EOF'
 # Job Management
 
-This folder and its subfolders are managed using the `deepwork_jobs` slash commands.
+This folder and its subfolders are managed using `deepwork_jobs` workflows.
 
-## Recommended Commands
+## Recommended Workflows
 
-- `/deepwork_jobs.define` - Create or modify the job.yml specification
-- `/deepwork_jobs.implement` - Generate step instruction files from the specification
-- `/deepwork_jobs.learn` - Improve instructions based on execution learnings
+- `deepwork_jobs/new_job` - Full lifecycle: define → implement → test → iterate
+- `deepwork_jobs/learn` - Improve instructions based on execution learnings
+- `deepwork_jobs/repair` - Clean up and migrate from prior DeepWork versions
 
 ## Directory Structure
 
 ```
 .
 ├── AGENTS.md          # This file - project context and guidance
-├── job.yml            # Job specification (created by /deepwork_jobs.define)
-├── steps/             # Step instruction files (created by /deepwork_jobs.implement)
+├── job.yml            # Job specification (created by define step)
+├── steps/             # Step instruction files (created by implement step)
 │   └── *.md           # One file per step
 ├── hooks/             # Custom validation scripts and prompts
 │   └── *.md|*.sh      # Hook files referenced in job.yml
+├── scripts/           # Reusable scripts and utilities created during job execution
+│   └── *.sh|*.py      # Helper scripts referenced in step instructions
 └── templates/         # Example file formats and templates
     └── *.md|*.yml     # Templates referenced in step instructions
 ```
 
 ## Editing Guidelines
 
-1. **Use slash commands** for structural changes (adding steps, modifying job.yml)
+1. **Use workflows** for structural changes (adding steps, modifying job.yml)
 2. **Direct edits** are fine for minor instruction tweaks
-3. **Run `/deepwork_jobs.learn`** after executing job steps to capture improvements
-4. **Run `deepwork sync`** after any changes to regenerate commands
+3. **Run `deepwork_jobs/learn`** after executing job steps to capture improvements
+4. **Run `deepwork install`** after any changes to regenerate commands
 EOF
 
     info "Created directory structure:"
@@ -122,13 +126,8 @@ EOF
     echo "  ├── AGENTS.md"
     echo "  ├── steps/"
     echo "  ├── hooks/.gitkeep"
+    echo "  ├── scripts/.gitkeep"
     echo "  └── templates/.gitkeep"
-
-    echo ""
-    info "Next steps:"
-    echo "  1. Run '/deepwork_jobs.define' to create the job.yml specification"
-    echo "  2. Run '/deepwork_jobs.implement' to generate step instructions"
-    echo "  3. Run 'deepwork sync' to create slash commands"
 }
 
 main "$@"
diff --git a/.deepwork/jobs/deepwork_jobs/steps/define.md b/.deepwork/jobs/deepwork_jobs/steps/define.md
index 3cd01848..18c268eb 100644
--- a/.deepwork/jobs/deepwork_jobs/steps/define.md
+++ b/.deepwork/jobs/deepwork_jobs/steps/define.md
@@ -116,20 +116,7 @@ For each major phase they mentioned, ask structured questions to gather details:
    - Are there any quality checks or validation needed?
    - What makes a good vs. bad output for this step?
 
-   **Important**: Quality criteria belong in the `quality_criteria` field of job.yml, NOT in the step details. When skills are generated, quality criteria are automatically included in the output. Do not duplicate them in step instructions or details—this causes redundancy and confusion.
-
-6. **Agent Delegation** (optional)
-   - Should this step be executed by a specific agent type?
-   - Use the `agent` field when the step should run in a forked context with a specific agent
-   - When `agent` is set, the generated skill automatically includes `context: fork`
-   - Available agent types:
-     - `general-purpose` - Standard agent for multi-step tasks
-
-   ```yaml
-   steps:
-     - id: research_step
-       agent: general-purpose  # Delegates to the general-purpose agent
-   ```
+   **Important**: When skills are generated, quality criteria are automatically included in the output. Do not duplicate them in step instructions or details—this causes redundancy and confusion.
 
 **Note**: You're gathering this information to understand what instructions will be needed, but you won't create the instruction files yet - that happens in the `implement` step.
 
@@ -159,56 +146,53 @@ After gathering information about all steps:
    - Job description (detailed multi-line explanation)
    - Version number (start with 1.0.0)
 
-### Step 4: Define Quality Validation Hooks
+### Step 4: Define Quality Reviews
 
-For each step, consider whether it would benefit from **quality validation loops**. Quality hooks allow the AI agent to iteratively refine its work until quality criteria are met.
+For each step, define **reviews** that evaluate the step's outputs. Reviews run automatically when a step completes and provide quality validation loops.
 
-**Ask structured questions about quality validation:**
-- "Are there specific quality criteria that must be met for this step?"
-- "Would you like the agent to validate its work before completing?"
-- "What would make you send the work back for revision?"
+For intermediate outputs between steps, reviews let you make sure you don't go too far down the wrong path. Add reviews that confirm things that could cause problems later. For example, in a report creation process, you might have an intermediate step that performs a number of queries on the data and records the results so that later report-writing steps can synthesize that information into a coherent narrative. In this case, you would want to add a review that checks that the queries SQL matches up with the description of the queries in the job description.
 
-**Quality hooks are particularly valuable for:**
-- Steps with complex outputs that need multiple checks
-- Steps where quality is critical (final deliverables)
-- Steps with subjective quality criteria that benefit from AI self-review
+For final outputs, reviews let you make sure the output meets the user's expectations. For example, with a data-centric report job, you might have one review on the final output for consistency with style guidelines and tone and such, and a totally separate review on the data-backing to make sure the claims in the report are supported by the data from earlier steps and all have citations. 
 
-**Three types of hooks are supported:**
-
-1. **Inline Prompt** (`prompt`) - Best for simple quality criteria
-   ```yaml
-   hooks:
-     after_agent:
-       - prompt: |
-           Verify the output meets these criteria:
-           1. Contains at least 5 competitors
-           2. Each competitor has a description
-           3. Selection rationale is clear
-   ```
+**Any jobs with written final output must always have reviews**. Some suggested ones are:
+- Ensure claims have citations and the citations are not hallucinated
+- Ensure the output follows the style guidelines and tone
+- Ensure the output is well-organized and easy to read
+- Ensure obvious questions the content raises have answers provided
+- Visual formatting is correct (for things like PDF or HTML where the visual output matters)
+- That the content matches what the intended audience expects (i.e. executives vs engineers)
 
-2. **Prompt File** (`prompt_file`) - For detailed/reusable criteria
-   ```yaml
-   hooks:
-     after_agent:
-       - prompt_file: hooks/quality_check.md
-   ```
+**Reviews format:**
 
-3. **Script** (`script`) - For programmatic validation (tests, linting)
-   ```yaml
-   hooks:
-     after_agent:
-       - script: hooks/run_tests.sh
-   ```
+Each review specifies `run_each` (what to review) and `quality_criteria` (a map of criterion name to question):
 
-**Multiple hooks can be combined:**
 ```yaml
-hooks:
-  after_agent:
-    - script: hooks/lint_output.sh
-    - prompt: "Verify the content is comprehensive and well-organized"
+reviews:
+  - run_each: step  # Review all outputs together
+    quality_criteria:
+      "Consistent Style": "Do all files follow the same structure?"
+      "Complete Coverage": "Are all required topics covered?"
+  - run_each: report_files  # Review each file in a 'files'-type output individually
+    quality_criteria:
+      "Well Written": "Is the content clear and well-organized?"
+      "Data-Backed": "Are claims supported by data?"
 ```
 
-**Encourage prompt-based hooks** - They leverage the AI's ability to understand context and make nuanced quality judgments. Script hooks are best for objective checks (syntax, format, tests).
+**`run_each` options:**
+- `step` — Review runs once with ALL output files + input files
+- `<output_name>` where output is `type: file` — Review runs once with that specific file
+- `<output_name>` where output is `type: files` — Review runs once per file in the list
+
+**Reviews are particularly valuable for:**
+- Steps with complex outputs that need multiple quality checks
+- Steps where quality is critical (final deliverables)
+- Steps with subjective quality criteria that benefit from AI self-review
+- Steps producing multiple files where each file needs individual review
+
+**For steps with no quality checks needed, use an empty reviews list:**
+```yaml
+reviews: []
+```
 
 ### Step 5: Create the Job Directory and Specification
 
@@ -220,13 +204,6 @@ Only after you have complete understanding, create the job directory and `job.ym
 .deepwork/jobs/deepwork_jobs/make_new_job.sh [job_name]
 ```
 
-This creates:
-- `.deepwork/jobs/[job_name]/` - Main job directory
-- `.deepwork/jobs/[job_name]/steps/` - For step instruction files
-- `.deepwork/jobs/[job_name]/hooks/` - For custom validation scripts
-- `.deepwork/jobs/[job_name]/templates/` - For example file formats
-- `.deepwork/jobs/[job_name]/AGENTS.md` - Job management guidance
-
 **Then create the job.yml file** at `.deepwork/jobs/[job_name]/job.yml`
 
 (Where `[job_name]` is the name of the NEW job you're creating, e.g., `competitive_research`)
diff --git a/.deepwork/jobs/deepwork_jobs/steps/fix_jobs.md b/.deepwork/jobs/deepwork_jobs/steps/fix_jobs.md
index 7f3675a5..ccb5e4a4 100644
--- a/.deepwork/jobs/deepwork_jobs/steps/fix_jobs.md
+++ b/.deepwork/jobs/deepwork_jobs/steps/fix_jobs.md
@@ -36,8 +36,11 @@ Audit and repair the job at `.deepwork/jobs/[job_name]/job.yml`:
 2. Migrate `stop_hooks` to `hooks.after_agent` format
 3. Remove references to deleted steps (like `review_job_spec`)
 4. Fix orphaned steps by adding them to workflows
-5. Bump version and add changelog entry if changes were made
-6. Validate YAML syntax
+5. Migrate `outputs` from array format to map format with `type` and `description`
+6. Update any `file` inputs that reference renamed output keys
+7. Migrate `quality_criteria` arrays to `reviews` format (run_each + map criteria)
+8. Bump version and add changelog entry if changes were made
+9. Validate YAML syntax
 
 Report what changes were made.
 ```
@@ -152,7 +155,96 @@ workflows:
 
 This ensures all steps remain accessible via the MCP interface while preserving the existing workflow structure.
 
-### Step 6: Update Version Numbers
+### Step 6: Migrate `outputs` from Array Format to Map Format
+
+The `outputs` field on steps changed from an array of strings/objects to a map with typed entries. Every output must now have a key (identifier), a `type` (`file` or `files`), and a `description`.
+
+**Before (legacy array format):**
+```yaml
+steps:
+  - id: define
+    outputs:
+      - job.yml
+      - steps/
+      - file: report.md
+        doc_spec: .deepwork/doc_specs/report.md
+```
+
+**After (current map format):**
+```yaml
+steps:
+  - id: define
+    outputs:
+      job.yml:
+        type: file
+        description: "The job definition file"
+      step_instruction_files:
+        type: files
+        description: "Instruction Markdown files for each step"
+      report.md:
+        type: file
+        description: "The generated report"
+```
+
+**Migration rules:**
+
+1. **Plain filename strings** (e.g., `- job.yml`, `- output.md`): Use the filename as the key, set `type: file`, add a `description`.
+2. **Directory strings ending in `/`** (e.g., `- steps/`, `- competitor_profiles/`): Choose a descriptive key name (e.g., `step_instruction_files`, `competitor_profiles`), set `type: files`, add a `description`.
+3. **Objects with `doc_spec`** (e.g., `- file: report.md` with `doc_spec: ...`): Drop the `doc_spec` field entirely, use the filename as the key, set `type: file`, add a `description`.
+4. **`description` is required** on every output entry. Write a short sentence describing what the output contains.
+
+**Update `file` inputs that reference renamed outputs:**
+
+When a directory output key changes (e.g., `steps/` becomes `step_instruction_files`), any downstream step with a `file` input referencing the old name must be updated to use the new key.
+
+```yaml
+# Before: input references old directory name
+steps:
+  - id: implement
+    inputs:
+      - file: steps/
+        from_step: define
+
+# After: input uses the new output key
+steps:
+  - id: implement
+    inputs:
+      - file: step_instruction_files
+        from_step: define
+```
+
+### Step 7: Migrate `quality_criteria` to `reviews`
+
+The flat `quality_criteria` field on steps has been replaced by the `reviews` array. Each review specifies `run_each` (what to review) and `quality_criteria` as a map of criterion name to question.
+
+**Before (deprecated):**
+```yaml
+steps:
+  - id: my_step
+    quality_criteria:
+      - "**Complete**: Is the output complete?"
+      - "**Accurate**: Is the data accurate?"
+```
+
+**After (current format):**
+```yaml
+steps:
+  - id: my_step
+    reviews:
+      - run_each: step
+        quality_criteria:
+          "Complete": "Is the output complete?"
+          "Accurate": "Is the data accurate?"
+```
+
+**Migration rules:**
+
+1. **Parse the old format**: Each string typically follows `**Name**: Question` format. Extract the name (bold text) as the map key and the question as the value.
+2. **Choose `run_each`**: Default to `step` (reviews all outputs together). If the step has a single primary output, consider using that output name instead.
+3. **For steps with no quality_criteria**: Use `reviews: []`
+4. **Remove the old field**: Delete the `quality_criteria` array entirely after migration.
+
+### Step 8: Update Version Numbers
 
 If you made significant changes to a job, bump its version number:
 
@@ -188,6 +280,12 @@ Warning: Job 'my_job' has steps not included in any workflow: standalone_step
 - If the job has NO workflows: Create one workflow named `my_job` with all steps in order
 - If the job has SOME workflows: Add a `standalone_step` workflow containing just that step
 
+### Issue: `outputs` is an array instead of an object
+```
+Error: Step 'define' outputs should be an object but got array
+```
+**Fix:** Convert from the legacy array format to the map format. Each array entry becomes a key in the map with `type` (`file` or `files`) and `description`. See Step 6 for detailed migration rules. Also update any `file` inputs in downstream steps if an output key was renamed.
+
 ## Jobs to Check
 
 For each job in `.deepwork/jobs/`, check:
@@ -196,9 +294,11 @@ For each job in `.deepwork/jobs/`, check:
 |-------|------------------|
 | `exposed` field | Remove from all steps |
 | `stop_hooks` | Migrate to `hooks.after_agent` |
+| `outputs` format | Migrate from array to map with `type` and `description` |
+| `quality_criteria` | Migrate to `reviews` with `run_each` and map-format criteria |
 | Workflow steps | Remove references to deleted steps |
 | Dependencies | Update to valid step IDs |
-| File inputs | Update `from_step` references |
+| File inputs | Update `from_step` references; update keys for renamed outputs |
 | Version | Bump if changes were made |
 
 ## Important Notes
diff --git a/.deepwork/jobs/deepwork_jobs/steps/implement.md b/.deepwork/jobs/deepwork_jobs/steps/implement.md
index 7be269a5..10880176 100644
--- a/.deepwork/jobs/deepwork_jobs/steps/implement.md
+++ b/.deepwork/jobs/deepwork_jobs/steps/implement.md
@@ -47,28 +47,27 @@ For each step in the job.yml, create a comprehensive instruction file at `.deepw
 3. **Provide examples** - Show what good output looks like
 4. **Explain the "why"** - Help the user understand the step's role in the workflow
 5. **Quality over quantity** - Detailed, actionable instructions are better than vague ones
-6. **Align with stop hooks** - If the step has `stop_hooks` defined, ensure the quality criteria in the instruction file match the validation criteria in the hooks
+6. **Align with reviews** - If the step has `reviews` defined, ensure the quality criteria in the instruction file match the review criteria
 7. **Ask structured questions** - When a step has user inputs, the instructions MUST explicitly tell the agent to "ask structured questions" using the AskUserQuestion tool to gather that information. Never use generic phrasing like "ask the user" - always use "ask structured questions"
 
-### Handling Quality Hooks
+### Handling Reviews
 
-If a step in the job.yml has `hooks.after_agent` defined, the generated instruction file should:
+If a step in the job.yml has `reviews` defined, the generated instruction file should:
 
-1. **Mirror the quality criteria** - The "Quality Criteria" section should match what the hooks will validate
+1. **Mirror the quality criteria** - The "Quality Criteria" section should match what the reviews will validate
 2. **Be explicit about success** - Help the agent understand when the step is truly complete
-3. **Include the promise pattern** - Mention that `<promise>✓ Quality Criteria Met</promise>` should be included when criteria are met
+3. **Explain what's reviewed** - If reviews target specific outputs (via `run_each`), mention which outputs will be reviewed
 
 **Example: If the job.yml has:**
 ```yaml
 - id: research_competitors
   name: "Research Competitors"
-  hooks:
-    after_agent:
-      - prompt: |
-          Verify the research meets criteria:
-          1. Each competitor has at least 3 data points
-          2. Sources are cited
-          3. Information is current (within last year)
+  reviews:
+    - run_each: research_notes.md
+      quality_criteria:
+        "Sufficient Data": "Does each competitor have at least 3 data points?"
+        "Sources Cited": "Are sources cited for key claims?"
+        "Current Information": "Is the information current (within last year)?"
 ```
 
 **The instruction file should include:**
@@ -78,7 +77,6 @@ If a step in the job.yml has `hooks.after_agent` defined, the generated instruct
 - Each competitor has at least 3 distinct data points
 - All information is sourced with citations
 - Data is current (from within the last year)
-- When all criteria are met, include `<promise>✓ Quality Criteria Met</promise>` in your response
 ```
 
 This alignment ensures the AI agent knows exactly what will be validated and can self-check before completing.
diff --git a/.deepwork/jobs/deepwork_jobs/steps/iterate.md b/.deepwork/jobs/deepwork_jobs/steps/iterate.md
index fb1f56c8..73dcb589 100644
--- a/.deepwork/jobs/deepwork_jobs/steps/iterate.md
+++ b/.deepwork/jobs/deepwork_jobs/steps/iterate.md
@@ -68,28 +68,33 @@ For each step that needs improvement:
    - Be direct and actionable
    - Use bullet points where appropriate
 
-### Step 4: Update Quality Criteria
+### Step 4: Update Reviews
 
-Review and update quality criteria in two places:
+Review and update quality reviews in two places:
 
 1. **In step instruction files** - The "Quality Criteria" section should reflect what the user actually cared about during testing
 
-2. **In job.yml** - If steps have `quality_criteria` or `stop_hooks`, update them to:
+2. **In job.yml** - Update the `reviews` array on each step to:
    - Remove criteria that weren't relevant
    - Add criteria based on user feedback
    - Make existing criteria more specific
+   - Adjust `run_each` targeting if outputs should be reviewed differently
 
 **Example improvement:**
 ```yaml
 # Before
-quality_criteria:
-  - "Report is formatted correctly"
+reviews:
+  - run_each: step
+    quality_criteria:
+      "Formatted Correctly": "Is the report formatted correctly?"
 
 # After
-quality_criteria:
-  - "Report uses distinct colors for each data series in charts"
-  - "Tables have sufficient padding and font size for readability"
-  - "Executive summary is understandable by non-technical readers"
+reviews:
+  - run_each: report.md
+    quality_criteria:
+      "Distinct Colors": "Does the report use distinct colors for each data series in charts?"
+      "Readable Tables": "Do tables have sufficient padding and font size for readability?"
+      "Clear Summary": "Is the executive summary understandable by non-technical readers?"
 ```
 
 ### Step 5: Consider Alternative Tools
diff --git a/.deepwork/jobs/deepwork_jobs/steps/learn.md b/.deepwork/jobs/deepwork_jobs/steps/learn.md
index f6d48c78..254a332d 100644
--- a/.deepwork/jobs/deepwork_jobs/steps/learn.md
+++ b/.deepwork/jobs/deepwork_jobs/steps/learn.md
@@ -88,6 +88,7 @@ For each generalizable learning:
    - Include helpful examples
    - Clarify ambiguous instructions
    - Update quality criteria if needed
+   - If you identify problems in the outcomes of steps, those usually should be reflected in an update to the `reviews` for that step in `job.yml` (adjusting criteria names, questions, or `run_each` targeting)
 
 3. **Keep instructions concise**
    - Avoid redundancy - don't repeat the same guidance in multiple places
diff --git a/.deepwork/jobs/deepwork_jobs/templates/job.yml.example b/.deepwork/jobs/deepwork_jobs/templates/job.yml.example
index 7cc6e3bb..f321c355 100644
--- a/.deepwork/jobs/deepwork_jobs/templates/job.yml.example
+++ b/.deepwork/jobs/deepwork_jobs/templates/job.yml.example
@@ -14,6 +14,15 @@ changelog:
   - version: "1.0.0"
     changes: "Initial job creation"
 
+workflows:
+  - name: full_analysis
+    summary: "Complete competitive research from identification to positioning"
+    steps:
+      - identify_competitors
+      - research_competitors
+      - comparative_analysis
+      - positioning_recommendations
+
 steps:
   - id: identify_competitors
     name: "Identify Competitors"
@@ -25,8 +34,11 @@ steps:
       - name: product_category
         description: "The product category"
     outputs:
-      - competitors_list.md
+      competitors_list.md:
+        type: file
+        description: "Vetted list of direct and indirect competitors"
     dependencies: []
+    reviews: []
 
   - id: research_competitors
     name: "Research Competitors"
@@ -36,17 +48,17 @@ steps:
       - file: competitors_list.md
         from_step: identify_competitors
     outputs:
-      - research_notes.md
+      research_notes.md:
+        type: file
+        description: "Detailed research notes on each competitor"
     dependencies:
       - identify_competitors
-    hooks:
-      after_agent:
-        - prompt: |
-            Verify the research meets criteria:
-            1. Each competitor has at least 3 data points
-            2. Sources are cited
-            3. Information is current (within last year)
-            If ALL criteria are met, include `<promise>✓ Quality Criteria Met</promise>`.
+    reviews:
+      - run_each: research_notes.md
+        quality_criteria:
+          "Sufficient Data": "Does each competitor have at least 3 data points?"
+          "Sources Cited": "Are sources cited for key claims?"
+          "Current Information": "Is the information current (within last year)?"
 
   - id: comparative_analysis
     name: "Comparative Analysis"
@@ -56,9 +68,12 @@ steps:
       - file: research_notes.md
         from_step: research_competitors
     outputs:
-      - comparison_matrix.md
+      comparison_matrix.md:
+        type: file
+        description: "Side-by-side comparison matrix of all competitors"
     dependencies:
       - research_competitors
+    reviews: []
 
   - id: positioning_recommendations
     name: "Positioning Recommendations"
@@ -68,6 +83,13 @@ steps:
       - file: comparison_matrix.md
         from_step: comparative_analysis
     outputs:
-      - positioning_report.md
+      positioning_report.md:
+        type: file
+        description: "Strategic positioning recommendations"
     dependencies:
       - comparative_analysis
+    reviews:
+      - run_each: step
+        quality_criteria:
+          "Actionable": "Are recommendations specific and actionable?"
+          "Data-Backed": "Are recommendations supported by the competitive analysis data?"
diff --git a/.deepwork/jobs/deepwork_jobs/templates/job.yml.template b/.deepwork/jobs/deepwork_jobs/templates/job.yml.template
index 7dcf34e9..0774c5d7 100644
--- a/.deepwork/jobs/deepwork_jobs/templates/job.yml.template
+++ b/.deepwork/jobs/deepwork_jobs/templates/job.yml.template
@@ -20,6 +20,13 @@ changelog:
   - version: "1.0.0"
     changes: "Initial job creation"
 
+workflows:
+  - name: [workflow_name]
+    summary: "[What this workflow accomplishes]"
+    steps:
+      - [step_id]
+      - [another_step]
+
 steps:
   - id: [step_id]
     name: "[Step Name]"
@@ -32,27 +39,29 @@ steps:
       # - file: [filename_or_path]
       #   from_step: [previous_step_id]
     outputs:
-      - [output_filename_or_path]  # e.g., "report.md" or "reports/analysis.md"
+      [output_name]:
+        type: file
+        description: "[What this output contains]"
     dependencies: []  # List of step IDs that must complete first
+    reviews:
+      - run_each: step  # or a specific output name
+        quality_criteria:
+          "[Criterion Name]": "[Question to evaluate]"
+          "[Another Criterion]": "[Another question]"
     # Optional: Delegate to a specific agent type (uses context: fork)
     # agent: general-purpose  # or other agent type
-    # Optional: Quality validation hooks
-    hooks:
-      after_agent:
-        - prompt: |
-            Verify this step's output meets quality criteria:
-            1. [Criterion 1]
-            2. [Criterion 2]
-            If ALL criteria are met, include `<promise>✓ Quality Criteria Met</promise>`.
 
   - id: [another_step]
     name: "[Another Step]"
     description: "[What this step does]"
     instructions_file: steps/[another_step].md
     inputs:
-      - file: [output_filename_or_path]
+      - file: [output_name]
         from_step: [step_id]
     outputs:
-      - [another_output_path]
+      [another_output]:
+        type: file
+        description: "[What this output contains]"
     dependencies:
       - [step_id]  # This step requires the previous step
+    reviews: []  # Empty if no quality checks needed
diff --git a/.deepwork/schemas/job.schema.json b/.deepwork/schemas/job.schema.json
index f00d7550..4226f708 100644
--- a/.deepwork/schemas/job.schema.json
+++ b/.deepwork/schemas/job.schema.json
@@ -4,7 +4,12 @@
   "title": "DeepWork Job Definition",
   "description": "Schema for DeepWork job.yml files. Jobs are multi-step workflows executed by AI agents.",
   "type": "object",
-  "required": ["name", "version", "summary", "steps"],
+  "required": [
+    "name",
+    "version",
+    "summary",
+    "steps"
+  ],
   "additionalProperties": false,
   "properties": {
     "name": {
@@ -59,7 +64,11 @@
     },
     "workflow": {
       "type": "object",
-      "required": ["name", "summary", "steps"],
+      "required": [
+        "name",
+        "summary",
+        "steps"
+      ],
       "additionalProperties": false,
       "description": "A named workflow grouping steps into a sequence",
       "properties": {
@@ -101,7 +110,10 @@
     },
     "changelogEntry": {
       "type": "object",
-      "required": ["version", "changes"],
+      "required": [
+        "version",
+        "changes"
+      ],
       "additionalProperties": false,
       "properties": {
         "version": {
@@ -118,9 +130,16 @@
     },
     "step": {
       "type": "object",
-      "required": ["id", "name", "description", "instructions_file", "outputs"],
+      "required": [
+        "id",
+        "name",
+        "description",
+        "instructions_file",
+        "outputs",
+        "reviews"
+      ],
       "additionalProperties": false,
-      "description": "A single step in a job, representing one unit of work",
+      "description": "A single Step in a job, representing one material unit of work with evaluatable outputs",
       "properties": {
         "id": {
           "$ref": "#/$defs/stepId",
@@ -149,9 +168,9 @@
           }
         },
         "outputs": {
-          "type": "array",
-          "description": "List of output files/directories produced by this step. May be empty for cleanup or validation steps.",
-          "items": {
+          "type": "object",
+          "description": "Named outputs produced by this step. Keys are output identifiers, values describe type and purpose. May be empty for cleanup or validation steps.",
+          "additionalProperties": {
             "$ref": "#/$defs/stepOutput"
           }
         },
@@ -184,12 +203,11 @@
           "description": "If true, step is hidden from menus. Alias for exposed: false. Default: false",
           "default": false
         },
-        "quality_criteria": {
+        "reviews": {
           "type": "array",
-          "description": "Declarative quality criteria for evaluating step outputs. Rendered with standard evaluation framing.",
+          "description": "Quality reviews to run when step completes. Can be empty.",
           "items": {
-            "type": "string",
-            "minLength": 1
+            "$ref": "#/$defs/review"
           }
         },
         "agent": {
@@ -211,7 +229,10 @@
     },
     "userParameterInput": {
       "type": "object",
-      "required": ["name", "description"],
+      "required": [
+        "name",
+        "description"
+      ],
       "additionalProperties": false,
       "description": "A user-provided parameter input that will be requested at runtime",
       "properties": {
@@ -229,7 +250,10 @@
     },
     "fileInput": {
       "type": "object",
-      "required": ["file", "from_step"],
+      "required": [
+        "file",
+        "from_step"
+      ],
       "additionalProperties": false,
       "description": "A file input from a previous step's output",
       "properties": {
@@ -246,32 +270,26 @@
       }
     },
     "stepOutput": {
-      "oneOf": [
-        {
-          "type": "string",
-          "minLength": 1,
-          "description": "Simple output file path (backward compatible format)"
-        },
-        {
-          "$ref": "#/$defs/outputWithDocSpec"
-        }
-      ]
-    },
-    "outputWithDocSpec": {
       "type": "object",
-      "required": ["file"],
+      "required": [
+        "type",
+        "description"
+      ],
       "additionalProperties": false,
-      "description": "Output file with optional document specification reference",
+      "description": "Output specification with type information indicating single file or multiple files",
       "properties": {
-        "file": {
+        "type": {
           "type": "string",
-          "minLength": 1,
-          "description": "Output file path"
+          "enum": [
+            "file",
+            "files"
+          ],
+          "description": "Whether this output is a single file ('file') or multiple files ('files')"
         },
-        "doc_spec": {
+        "description": {
           "type": "string",
-          "pattern": "^\\.deepwork/doc_specs/[a-z][a-z0-9_-]*\\.md$",
-          "description": "Path to doc spec file defining the expected document structure. Example: '.deepwork/doc_specs/report.md'"
+          "minLength": 1,
+          "description": "Description of what this output contains"
         }
       }
     },
@@ -308,7 +326,9 @@
       "description": "A hook action - exactly one of: prompt (inline text), prompt_file (external file), or script (shell script)",
       "oneOf": [
         {
-          "required": ["prompt"],
+          "required": [
+            "prompt"
+          ],
           "additionalProperties": false,
           "properties": {
             "prompt": {
@@ -319,7 +339,9 @@
           }
         },
         {
-          "required": ["prompt_file"],
+          "required": [
+            "prompt_file"
+          ],
           "additionalProperties": false,
           "properties": {
             "prompt_file": {
@@ -330,7 +352,9 @@
           }
         },
         {
-          "required": ["script"],
+          "required": [
+            "script"
+          ],
           "additionalProperties": false,
           "properties": {
             "script": {
@@ -341,6 +365,31 @@
           }
         }
       ]
+    },
+    "review": {
+      "type": "object",
+      "required": [
+        "run_each",
+        "quality_criteria"
+      ],
+      "additionalProperties": false,
+      "description": "A quality review that evaluates step outputs against criteria",
+      "properties": {
+        "run_each": {
+          "type": "string",
+          "minLength": 1,
+          "description": "Either 'step' to review all outputs together, or the name of a specific output to review individually"
+        },
+        "quality_criteria": {
+          "type": "object",
+          "description": "Map of criterion name to criterion question",
+          "additionalProperties": {
+            "type": "string",
+            "minLength": 1
+          },
+          "minProperties": 1
+        }
+      }
     }
   }
-}
+}
\ No newline at end of file
diff --git a/doc/mcp_interface.md b/doc/mcp_interface.md
index 967f470b..b5d04ddb 100644
--- a/doc/mcp_interface.md
+++ b/doc/mcp_interface.md
@@ -10,7 +10,7 @@ This document describes the Model Context Protocol (MCP) tools exposed by the De
 
 ## Tools
 
-DeepWork exposes three MCP tools:
+DeepWork exposes four MCP tools:
 
 ### 1. `get_workflows`
 
@@ -42,22 +42,13 @@ interface WorkflowInfo {
   name: string;              // Workflow identifier
   summary: string;           // Short description
 }
-
-interface ActiveStepInfo {
-  session_id: string;        // Unique session identifier
-  branch_name: string;       // Git branch for this workflow instance
-  step_id: string;           // ID of the current step
-  step_expected_outputs: string[]; // Expected output files for this step
-  step_quality_criteria: string[]; // Criteria for step completion (if configured)
-  step_instructions: string; // Instructions for the step
-}
 ```
 
 ---
 
 ### 2. `start_workflow`
 
-Start a new workflow session. Creates a git branch, initializes state tracking, and returns the first step's instructions.
+Start a new workflow session. Creates a git branch, initializes state tracking, and returns the first step's instructions. Supports nested workflows — starting a workflow while one is active pushes onto a stack.
 
 #### Parameters
 
@@ -73,6 +64,7 @@ Start a new workflow session. Creates a git branch, initializes state tracking,
 ```typescript
 {
   begin_step: ActiveStepInfo; // Information about the first step to begin
+  stack: StackEntry[];        // Current workflow stack after starting
 }
 ```
 
@@ -86,7 +78,7 @@ Report that you've finished a workflow step. Validates outputs against quality c
 
 | Parameter | Type | Required | Description |
 |-----------|------|----------|-------------|
-| `outputs` | `string[]` | Yes | List of output file paths created |
+| `outputs` | `Record<string, string \| string[]>` | Yes | Map of output names to file path(s). For outputs declared as type `file`: pass a single string path (e.g. `"report.md"`). For outputs declared as type `files`: pass a list of string paths (e.g. `["a.md", "b.md"]`). Check `step_expected_outputs` to see each output's declared type. |
 | `notes` | `string \| null` | No | Optional notes about work done |
 | `quality_review_override_reason` | `string \| null` | No | If provided, skips quality review (must explain why) |
 
@@ -99,21 +91,91 @@ The response varies based on the `status` field:
   status: "needs_work" | "next_step" | "workflow_complete";
 
   // For status = "needs_work"
-  feedback?: string;                    // Feedback from quality gate
-  failed_criteria?: QualityCriteriaResult[]; // Failed quality criteria
+  feedback?: string;                    // Combined feedback from failed reviews
+  failed_reviews?: ReviewResult[];      // Failed review results
 
   // For status = "next_step"
   begin_step?: ActiveStepInfo;         // Information about the next step to begin
 
   // For status = "workflow_complete"
   summary?: string;                    // Summary of completed workflow
-  all_outputs?: string[];              // All outputs from all steps
+  all_outputs?: Record<string, string | string[]>; // All outputs from all steps
+
+  // Always included
+  stack: StackEntry[];                 // Current workflow stack after this operation
+}
+```
+
+---
+
+### 4. `abort_workflow`
+
+Abort the current workflow and return to the parent workflow (if nested). Use this when a workflow cannot be completed.
+
+#### Parameters
+
+| Parameter | Type | Required | Description |
+|-----------|------|----------|-------------|
+| `explanation` | `string` | Yes | Why the workflow is being aborted |
+
+#### Returns
+
+```typescript
+{
+  aborted_workflow: string;           // The workflow that was aborted (job_name/workflow_name)
+  aborted_step: string;               // The step that was active when aborted
+  explanation: string;                // The explanation provided
+  stack: StackEntry[];                // Current workflow stack after abort
+  resumed_workflow?: string | null;   // The workflow now active (if any)
+  resumed_step?: string | null;       // The step now active (if any)
+}
+```
+
+---
+
+## Shared Types
+
+```typescript
+interface ExpectedOutput {
+  name: string;                    // Output name (use as key in finished_step outputs)
+  type: string;                    // "file" or "files"
+  description: string;             // What this output should contain
+  syntax_for_finished_step_tool: string; // Value format hint:
+                                         //   "filepath" for type "file"
+                                         //   "array of filepaths for all individual files" for type "files"
+}
+
+interface ActiveStepInfo {
+  session_id: string;              // Unique session identifier
+  branch_name: string;             // Git branch for this workflow instance
+  step_id: string;                 // ID of the current step
+  step_expected_outputs: ExpectedOutput[]; // Expected outputs with type and format hints
+  step_reviews: ReviewInfo[];      // Reviews to run when step completes
+  step_instructions: string;       // Instructions for the step
+}
+
+interface ReviewInfo {
+  run_each: string;                // 'step' or output name to review
+  quality_criteria: Record<string, string>; // Map of criterion name to question
+}
+
+interface ReviewResult {
+  review_run_each: string;         // 'step' or output name that was reviewed
+  target_file: string | null;      // Specific file reviewed (for per-file reviews)
+  passed: boolean;                 // Whether this review passed
+  feedback: string;                // Summary feedback
+  criteria_results: QualityCriteriaResult[];
 }
 
 interface QualityCriteriaResult {
-  criterion: string;         // The quality criterion text
-  passed: boolean;           // Whether this criterion passed
-  feedback: string | null;   // Feedback if failed
+  criterion: string;               // The quality criterion name
+  passed: boolean;                 // Whether this criterion passed
+  feedback: string | null;         // Feedback if failed
+}
+
+interface StackEntry {
+  workflow: string;                // Workflow identifier (job_name/workflow_name)
+  step: string;                    // Current step ID in this workflow
 }
 ```
 
@@ -135,37 +197,86 @@ The `finished_step` tool returns one of three statuses:
 
 ```
 1. get_workflows()
-   ↓
+   |
    Discover available jobs and workflows
-   ↓
+   |
 2. start_workflow(goal, job_name, workflow_name)
-   ↓
+   |
    Get session_id, branch_name, first step instructions
-   ↓
+   |
 3. Execute step instructions, create outputs
-   ↓
+   |
 4. finished_step(outputs)
-   ↓
-   ├─ status = "needs_work" → Fix issues, goto 4
-   ├─ status = "next_step" → Execute new instructions, goto 4
-   └─ status = "workflow_complete" → Done!
+   |
+   +-- status = "needs_work" -> Fix issues, goto 4
+   +-- status = "next_step"  -> Execute new instructions, goto 4
+   +-- status = "workflow_complete" -> Done!
 ```
 
 ---
 
 ## Quality Gates
 
-Steps may define quality criteria that outputs must meet. When `finished_step` is called:
+Steps may define quality reviews that outputs must pass. When `finished_step` is called:
+
+1. If the step has reviews and a quality gate is configured, outputs are evaluated
+2. **Input files from prior steps are included** alongside outputs in the review payload, giving the reviewer full context to evaluate whether outputs are consistent with their inputs
+3. If any review fails, `status = "needs_work"` with feedback
+4. If all reviews pass (or no reviews defined), workflow advances
+5. After 3 failed attempts (configurable), the quality gate raises an error
+
+### Review Payload Structure
 
-1. If the step has quality criteria and a quality gate agent is configured, outputs are evaluated
-2. If any criteria fail, `status = "needs_work"` with feedback
-3. If all criteria pass (or no criteria defined), workflow advances
+The quality gate builds a prompt for the review agent with clearly separated sections:
+
+```
+==================== BEGIN INPUTS ====================
+(contents of input files from prior steps)
+==================== END INPUTS ====================
+
+==================== BEGIN OUTPUTS ====================
+(contents of output files from current step)
+==================== END OUTPUTS ====================
+```
+
+- **Inputs** are resolved automatically from prior step outputs recorded in the session state. If a step declares `file` inputs with `from_step` references, the quality gate looks up the actual file paths from the referenced step's completed outputs.
+- **The inputs section is omitted** if the step has no file inputs from prior steps.
+- **Binary files** (e.g., PDFs) that cannot be decoded as UTF-8 are not embedded in the payload. Instead, a placeholder is included: `[Binary file — not included in review. Read from: /absolute/path/to/file]`
+
+### Review Types
+
+Reviews are defined per-step in the job.yml:
+
+```yaml
+reviews:
+  - run_each: step                    # Review all outputs together
+    quality_criteria:
+      "Criterion Name": "Question to evaluate"
+  - run_each: output_name             # Review a specific output
+    quality_criteria:
+      "Criterion Name": "Question to evaluate"
+```
+
+- `run_each: step` — Review runs once with ALL output files
+- `run_each: <output_name>` where output is `type: file` — Review runs once with that specific file
+- `run_each: <output_name>` where output is `type: files` — Review runs once per file in the list
 
 To skip quality review (use sparingly):
 - Provide `quality_review_override_reason` explaining why review is unnecessary
 
 ---
 
+## Nested Workflows
+
+Workflows can be nested — starting a new workflow while one is active pushes onto a stack:
+
+- All tool responses include a `stack` field showing the current workflow stack
+- Each stack entry shows `{workflow: "job/workflow", step: "current_step"}`
+- When a workflow completes, it pops from the stack and resumes the parent
+- Use `abort_workflow` to cancel the current workflow and return to parent
+
+---
+
 ## Configuration
 
 The MCP server is configured via `.deepwork/config.yml`:
@@ -217,4 +328,7 @@ Add to your `.mcp.json`:
 
 | Version | Changes |
 |---------|---------|
+| 1.3.0 | `step_expected_outputs` changed from `string[]` to `ExpectedOutput[]` — each entry includes `name`, `type`, `description`, and `syntax_for_finished_step_tool` so agents know exactly what format to use when calling `finished_step`. |
+| 1.2.0 | Quality gate now includes input files from prior steps in review payload with BEGIN INPUTS/END INPUTS and BEGIN OUTPUTS/END OUTPUTS section headers. Binary files (PDFs, etc.) get a placeholder instead of raw content. |
+| 1.1.0 | Added `abort_workflow` tool, `stack` field in all responses, `ReviewInfo`/`ReviewResult` types, typed outputs as `Record<string, string \| string[]>` |
 | 1.0.0 | Initial MCP interface with `get_workflows`, `start_workflow`, `finished_step` |
diff --git a/flake.lock b/flake.lock
index 1cf9a673..8a9dd522 100644
--- a/flake.lock
+++ b/flake.lock
@@ -6,11 +6,11 @@
         "nixpkgs": "nixpkgs"
       },
       "locked": {
-        "lastModified": 1770315205,
-        "narHash": "sha256-yOYprNUvMHRBC7EfmhNOYYLqNm43cLtydV39ITnCfZk=",
+        "lastModified": 1770362224,
+        "narHash": "sha256-glZjGWSy+LpalbwlsQ3iWNpWU4TlEOandYWOpl8sMt8=",
         "owner": "sadjow",
         "repo": "claude-code-nix",
-        "rev": "b774ffcdcd9987f4a2e6e3809130d04438e29a13",
+        "rev": "f4f8d6e7cc59e34e5a85550f017ead83ab925b22",
         "type": "github"
       },
       "original": {
diff --git a/pyproject.toml b/pyproject.toml
index 6ca12327..bf597253 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -119,6 +119,7 @@ strict_equality = true
 
 [dependency-groups]
 dev = [
+    "fpdf2>=2.8.5",
     "pytest>=9.0.2",
     "pytest-asyncio>=1.3.0",
     "pytest-mock>=3.15.1",
diff --git a/src/deepwork/cli/install.py b/src/deepwork/cli/install.py
index b761aff5..2dfc980d 100644
--- a/src/deepwork/cli/install.py
+++ b/src/deepwork/cli/install.py
@@ -146,6 +146,27 @@ def _create_deepwork_gitignore(deepwork_dir: Path) -> None:
     gitignore_path.write_text(gitignore_content)
 
 
+def _create_common_info_directory(deepwork_dir: Path) -> None:
+    """
+    Create the .deepwork/common_info directory with a .gitkeep file.
+
+    This directory holds shared reference files that are available across
+    all jobs and workflow steps.
+
+    Args:
+        deepwork_dir: Path to .deepwork directory
+    """
+    common_info_dir = deepwork_dir / "common_info"
+    ensure_dir(common_info_dir)
+
+    gitkeep_file = common_info_dir / ".gitkeep"
+    if not gitkeep_file.exists():
+        gitkeep_file.write_text(
+            "# This file ensures the .deepwork/common_info directory exists in version control.\n"
+            "# Place shared reference files here that should be available across all jobs.\n"
+        )
+
+
 def _create_tmp_directory(deepwork_dir: Path) -> None:
     """
     Create the .deepwork/tmp directory with a .gitkeep file.
@@ -305,6 +326,10 @@ def _install_deepwork(platform_name: str | None, project_path: Path) -> None:
     _create_tmp_directory(deepwork_dir)
     console.print("  [green]✓[/green] Created .deepwork/tmp/.gitkeep")
 
+    # Step 3f: Create common_info directory for shared reference files
+    _create_common_info_directory(deepwork_dir)
+    console.print("  [green]✓[/green] Created .deepwork/common_info/.gitkeep")
+
     # Step 4: Load or create config.yml
     console.print("[yellow]→[/yellow] Updating configuration...")
     config_file = deepwork_dir / "config.yml"
diff --git a/src/deepwork/core/parser.py b/src/deepwork/core/parser.py
index 5c426aa6..2de03c37 100644
--- a/src/deepwork/core/parser.py
+++ b/src/deepwork/core/parser.py
@@ -113,6 +113,22 @@ def from_dict(cls, data: dict[str, Any]) -> "HookAction":
 StopHook = HookAction
 
 
+@dataclass
+class Review:
+    """Represents a quality review for step outputs."""
+
+    run_each: str  # "step" or output name
+    quality_criteria: dict[str, str]  # name → question
+
+    @classmethod
+    def from_dict(cls, data: dict[str, Any]) -> "Review":
+        """Create Review from dictionary."""
+        return cls(
+            run_each=data["run_each"],
+            quality_criteria=data.get("quality_criteria", {}),
+        )
+
+
 @dataclass
 class Step:
     """Represents a single step in a job."""
@@ -132,8 +148,8 @@ class Step:
     # If true, skill is user-invocable in menus. Default: false (hidden from menus).
     exposed: bool = False
 
-    # Declarative quality criteria rendered with standard evaluation framing
-    quality_criteria: list[str] = field(default_factory=list)
+    # Quality reviews to run when step completes
+    reviews: list[Review] = field(default_factory=list)
 
     # Agent type for this step (e.g., "general-purpose"). When set, skill uses context: fork
     agent: str | None = None
@@ -178,7 +194,7 @@ def from_dict(cls, data: dict[str, Any]) -> "Step":
             dependencies=data.get("dependencies", []),
             hooks=hooks,
             exposed=data.get("exposed", False),
-            quality_criteria=data.get("quality_criteria", []),
+            reviews=[Review.from_dict(r) for r in data.get("reviews", [])],
             agent=data.get("agent"),
         )
 
@@ -346,6 +362,23 @@ def validate_file_inputs(self) -> None:
                             f"but '{inp.from_step}' is not in dependencies"
                         )
 
+    def validate_reviews(self) -> None:
+        """
+        Validate that review run_each values reference valid output names or 'step'.
+
+        Raises:
+            ParseError: If run_each references an invalid output name
+        """
+        for step in self.steps:
+            output_names = {out.name for out in step.outputs}
+            for review in step.reviews:
+                if review.run_each != "step" and review.run_each not in output_names:
+                    raise ParseError(
+                        f"Step '{step.id}' has review with run_each='{review.run_each}' "
+                        f"but no output with that name. "
+                        f"Valid values: 'step', {', '.join(sorted(output_names)) or '(no outputs)'}"
+                    )
+
     def get_workflow_for_step(self, step_id: str) -> Workflow | None:
         """
         Get the workflow containing a step.
@@ -597,9 +630,10 @@ def parse_job_definition(job_dir: Path | str) -> JobDefinition:
     # Parse into dataclass
     job_def = JobDefinition.from_dict(job_data, job_dir_path)
 
-    # Validate dependencies, file inputs, and workflows
+    # Validate dependencies, file inputs, reviews, and workflows
     job_def.validate_dependencies()
     job_def.validate_file_inputs()
+    job_def.validate_reviews()
     job_def.validate_workflows()
 
     # Warn about orphaned steps (not in any workflow)
diff --git a/src/deepwork/mcp/quality_gate.py b/src/deepwork/mcp/quality_gate.py
index f3f2c9f5..f3be12f8 100644
--- a/src/deepwork/mcp/quality_gate.py
+++ b/src/deepwork/mcp/quality_gate.py
@@ -6,13 +6,18 @@
 
 from __future__ import annotations
 
+import asyncio
 from pathlib import Path
 from typing import Any
 
 import aiofiles
 
 from deepwork.mcp.claude_cli import ClaudeCLI
-from deepwork.mcp.schemas import QualityCriteriaResult, QualityGateResult
+from deepwork.mcp.schemas import (
+    QualityCriteriaResult,
+    QualityGateResult,
+    ReviewResult,
+)
 
 # JSON Schema for quality gate response validation
 QUALITY_GATE_RESPONSE_SCHEMA: dict[str, Any] = {
@@ -39,6 +44,9 @@
 # File separator format: 20 dashes, filename, 20 dashes
 FILE_SEPARATOR = "-" * 20
 
+# Section headers for inputs/outputs
+SECTION_SEPARATOR = "=" * 20
+
 
 class QualityGateError(Exception):
     """Exception raised for quality gate errors."""
@@ -61,22 +69,43 @@ def __init__(self, cli: ClaudeCLI | None = None):
         """
         self._cli = cli or ClaudeCLI()
 
-    def _build_instructions(self, quality_criteria: list[str]) -> str:
+    def _build_instructions(
+        self,
+        quality_criteria: dict[str, str],
+        notes: str | None = None,
+    ) -> str:
         """Build the system instructions for the review agent.
 
         Args:
-            quality_criteria: List of quality criteria to evaluate
+            quality_criteria: Map of criterion name to criterion question
+            notes: Optional notes from the agent about work done
 
         Returns:
             System instructions string
         """
-        criteria_list = "\n".join(f"- {c}" for c in quality_criteria)
+        criteria_list = "\n".join(
+            f"- **{name}**: {question}" for name, question in quality_criteria.items()
+        )
+
+        notes_section = ""
+        if notes:
+            notes_section = f"""
+
+## Author Notes
 
-        return f"""You are a quality gate reviewer. Your job is to evaluate whether outputs meet the specified quality criteria.
+The author provided the following notes about the work done:
 
-## Quality Criteria to Evaluate
+{notes}"""
+
+        return f"""\
+You are an editor responsible for reviewing the files listed as outputs.
+Your job is to evaluate whether outputs meet the specified criteria below.
+You have also been provided any relevant inputs that were used by the process that generated the outputs.
+
+## Criteria to Evaluate
 
 {criteria_list}
+{notes_section}
 
 ## Response Format
 
@@ -87,7 +116,7 @@ def _build_instructions(self, quality_criteria: list[str]) -> str:
   "feedback": "Brief overall summary of evaluation",
   "criteria_results": [
     {{
-      "criterion": "The criterion text",
+      "criterion": "The criterion name",
       "passed": true/false,
       "feedback": "Specific feedback for this criterion (null if passed)"
     }}
@@ -120,41 +149,84 @@ def _flatten_output_paths(outputs: dict[str, str | list[str]]) -> list[str]:
                 paths.append(value)
         return paths
 
-    async def _build_payload(
+    async def _read_file_sections(
         self,
-        outputs: dict[str, str | list[str]],
+        file_paths: dict[str, str | list[str]],
         project_root: Path,
-    ) -> str:
-        """Build the user prompt payload with file contents.
+    ) -> list[str]:
+        """Read files and return formatted sections for each.
 
         Args:
-            outputs: Map of output names to file path(s)
+            file_paths: Map of names to file path(s)
             project_root: Project root path for reading files
 
         Returns:
-            Formatted payload with file contents
+            List of formatted file sections
         """
-        output_sections: list[str] = []
-        all_paths = self._flatten_output_paths(outputs)
+        sections: list[str] = []
+        all_paths = self._flatten_output_paths(file_paths)
 
-        for output_path in all_paths:
-            full_path = project_root / output_path
-            header = f"{FILE_SEPARATOR} {output_path} {FILE_SEPARATOR}"
+        for file_path in all_paths:
+            full_path = project_root / file_path
+            header = f"{FILE_SEPARATOR} {file_path} {FILE_SEPARATOR}"
 
             if full_path.exists():
                 try:
                     async with aiofiles.open(full_path, encoding="utf-8") as f:
                         content = await f.read()
-                    output_sections.append(f"{header}\n{content}")
+                    sections.append(f"{header}\n{content}")
+                except (UnicodeDecodeError, ValueError):
+                    abs_path = full_path.resolve()
+                    sections.append(
+                        f"{header}\n[Binary file — not included in review. "
+                        f"Read from: {abs_path}]"
+                    )
                 except Exception as e:
-                    output_sections.append(f"{header}\n[Error reading file: {e}]")
+                    sections.append(f"{header}\n[Error reading file: {e}]")
             else:
-                output_sections.append(f"{header}\n[File not found]")
+                sections.append(f"{header}\n[File not found]")
+
+        return sections
+
+    async def _build_payload(
+        self,
+        outputs: dict[str, str | list[str]],
+        project_root: Path,
+        inputs: dict[str, str | list[str]] | None = None,
+    ) -> str:
+        """Build the user prompt payload with file contents.
 
-        if not output_sections:
-            return "[No output files provided]"
+        Organizes content into clearly separated INPUTS and OUTPUTS sections.
 
-        return "\n\n".join(output_sections)
+        Args:
+            outputs: Map of output names to file path(s)
+            project_root: Project root path for reading files
+            inputs: Optional map of input names to file path(s) from prior steps
+
+        Returns:
+            Formatted payload with file contents in sections
+        """
+        parts: list[str] = []
+
+        # Build inputs section if provided
+        if inputs:
+            input_sections = await self._read_file_sections(inputs, project_root)
+            if input_sections:
+                parts.append(f"{SECTION_SEPARATOR} BEGIN INPUTS {SECTION_SEPARATOR}")
+                parts.extend(input_sections)
+                parts.append(f"{SECTION_SEPARATOR} END INPUTS {SECTION_SEPARATOR}")
+
+        # Build outputs section
+        output_sections = await self._read_file_sections(outputs, project_root)
+        if output_sections:
+            parts.append(f"{SECTION_SEPARATOR} BEGIN OUTPUTS {SECTION_SEPARATOR}")
+            parts.extend(output_sections)
+            parts.append(f"{SECTION_SEPARATOR} END OUTPUTS {SECTION_SEPARATOR}")
+
+        if not parts:
+            return "[No files provided]"
+
+        return "\n\n".join(parts)
 
     def _parse_result(self, data: dict[str, Any]) -> QualityGateResult:
         """Parse the structured output into a QualityGateResult.
@@ -192,16 +264,20 @@ def _parse_result(self, data: dict[str, Any]) -> QualityGateResult:
 
     async def evaluate(
         self,
-        quality_criteria: list[str],
+        quality_criteria: dict[str, str],
         outputs: dict[str, str | list[str]],
         project_root: Path,
+        inputs: dict[str, str | list[str]] | None = None,
+        notes: str | None = None,
     ) -> QualityGateResult:
         """Evaluate step outputs against quality criteria.
 
         Args:
-            quality_criteria: List of quality criteria to evaluate
+            quality_criteria: Map of criterion name to criterion question
             outputs: Map of output names to file path(s)
             project_root: Project root path
+            inputs: Optional map of input names to file path(s) from prior steps
+            notes: Optional notes from the agent about work done
 
         Returns:
             QualityGateResult with pass/fail and feedback
@@ -217,8 +293,8 @@ async def evaluate(
                 criteria_results=[],
             )
 
-        instructions = self._build_instructions(quality_criteria)
-        payload = await self._build_payload(outputs, project_root)
+        instructions = self._build_instructions(quality_criteria, notes=notes)
+        payload = await self._build_payload(outputs, project_root, inputs=inputs)
 
         from deepwork.mcp.claude_cli import ClaudeCLIError
 
@@ -234,6 +310,89 @@ async def evaluate(
 
         return self._parse_result(data)
 
+    async def evaluate_reviews(
+        self,
+        reviews: list[dict[str, Any]],
+        outputs: dict[str, str | list[str]],
+        output_specs: dict[str, str],
+        project_root: Path,
+        inputs: dict[str, str | list[str]] | None = None,
+        notes: str | None = None,
+    ) -> list[ReviewResult]:
+        """Evaluate all reviews for a step, running them in parallel.
+
+        Args:
+            reviews: List of review dicts with run_each and quality_criteria
+            outputs: Map of output names to file path(s)
+            output_specs: Map of output names to their type ("file" or "files")
+            project_root: Project root path
+            inputs: Optional map of input names to file path(s) from prior steps
+            notes: Optional notes from the agent about work done
+
+        Returns:
+            List of ReviewResult for any failed reviews (empty if all pass)
+        """
+        if not reviews:
+            return []
+
+        tasks: list[tuple[str, str | None, dict[str, str], dict[str, str | list[str]]]] = []
+
+        for review in reviews:
+            run_each = review["run_each"]
+            quality_criteria = review["quality_criteria"]
+
+            if run_each == "step":
+                # Review all outputs together
+                tasks.append((run_each, None, quality_criteria, outputs))
+            elif run_each in outputs:
+                output_type = output_specs.get(run_each, "file")
+                output_value = outputs[run_each]
+
+                if output_type == "files" and isinstance(output_value, list):
+                    # Run once per file
+                    for file_path in output_value:
+                        tasks.append((
+                            run_each,
+                            file_path,
+                            quality_criteria,
+                            {run_each: file_path},
+                        ))
+                else:
+                    # Single file - run once
+                    tasks.append((
+                        run_each,
+                        output_value if isinstance(output_value, str) else None,
+                        quality_criteria,
+                        {run_each: output_value},
+                    ))
+
+        async def run_review(
+            run_each: str,
+            target_file: str | None,
+            criteria: dict[str, str],
+            review_outputs: dict[str, str | list[str]],
+        ) -> ReviewResult:
+            result = await self.evaluate(
+                quality_criteria=criteria,
+                outputs=review_outputs,
+                project_root=project_root,
+                inputs=inputs,
+                notes=notes,
+            )
+            return ReviewResult(
+                review_run_each=run_each,
+                target_file=target_file,
+                passed=result.passed,
+                feedback=result.feedback,
+                criteria_results=result.criteria_results,
+            )
+
+        results = await asyncio.gather(
+            *(run_review(*task) for task in tasks)
+        )
+
+        return [r for r in results if not r.passed]
+
 
 class MockQualityGate(QualityGate):
     """Mock quality gate for testing.
@@ -255,25 +414,29 @@ def __init__(self, should_pass: bool = True, feedback: str = "Mock evaluation"):
 
     async def evaluate(
         self,
-        quality_criteria: list[str],
+        quality_criteria: dict[str, str],
         outputs: dict[str, str | list[str]],
         project_root: Path,
+        inputs: dict[str, str | list[str]] | None = None,
+        notes: str | None = None,
     ) -> QualityGateResult:
         """Mock evaluation - records call and returns configured result."""
         self.evaluations.append(
             {
                 "quality_criteria": quality_criteria,
                 "outputs": outputs,
+                "inputs": inputs,
+                "notes": notes,
             }
         )
 
         criteria_results = [
             QualityCriteriaResult(
-                criterion=c,
+                criterion=name,
                 passed=self.should_pass,
                 feedback=None if self.should_pass else self.feedback,
             )
-            for c in quality_criteria
+            for name in quality_criteria
         ]
 
         return QualityGateResult(
diff --git a/src/deepwork/mcp/schemas.py b/src/deepwork/mcp/schemas.py
index 5401a99a..5bd7b5e5 100644
--- a/src/deepwork/mcp/schemas.py
+++ b/src/deepwork/mcp/schemas.py
@@ -93,7 +93,12 @@ class FinishedStepInput(BaseModel):
     """Input for finished_step tool."""
 
     outputs: dict[str, str | list[str]] = Field(
-        description="Map of output names to file path(s). Single file outputs map to a string path, multi-file outputs map to a list of paths."
+        description=(
+            "Map of output names to file path(s). "
+            "For outputs declared as type 'file': pass a single string path (e.g. \"report.md\"). "
+            "For outputs declared as type 'files': pass a list of string paths (e.g. [\"a.md\", \"b.md\"]). "
+            "Check step_expected_outputs from start_workflow/finished_step response to see each output's type."
+        )
     )
     notes: str | None = Field(default=None, description="Optional notes about work done")
     quality_review_override_reason: str | None = Field(
@@ -131,6 +136,29 @@ class QualityGateResult(BaseModel):
     )
 
 
+class ReviewInfo(BaseModel):
+    """Information about a review for a step."""
+
+    run_each: str = Field(description="'step' or output name to review")
+    quality_criteria: dict[str, str] = Field(
+        description="Map of criterion name to criterion question"
+    )
+
+
+class ReviewResult(BaseModel):
+    """Result from a single review evaluation."""
+
+    review_run_each: str = Field(description="'step' or output name that was reviewed")
+    target_file: str | None = Field(
+        default=None, description="Specific file reviewed (for per-file reviews)"
+    )
+    passed: bool = Field(description="Whether this review passed")
+    feedback: str = Field(description="Summary feedback")
+    criteria_results: list[QualityCriteriaResult] = Field(
+        default_factory=list, description="Per-criterion results"
+    )
+
+
 # =============================================================================
 # Tool Output Models
 # NOTE: Changes to these models affect MCP tool return types.
@@ -138,15 +166,28 @@ class QualityGateResult(BaseModel):
 # =============================================================================
 
 
+class ExpectedOutput(BaseModel):
+    """Describes an expected output for a step."""
+
+    name: str = Field(description="Output name (use as key in finished_step outputs)")
+    type: str = Field(description="Output type: 'file' or 'files'")
+    description: str = Field(description="What this output should contain")
+    syntax_for_finished_step_tool: str = Field(
+        description="The value format to use for this output when calling finished_step"
+    )
+
+
 class ActiveStepInfo(BaseModel):
     """Information about the step to begin working on."""
 
     session_id: str = Field(description="Unique session identifier")
     branch_name: str = Field(description="Git branch for this workflow instance")
     step_id: str = Field(description="ID of the current step")
-    step_expected_outputs: list[str] = Field(description="Expected output files for this step")
-    step_quality_criteria: list[str] = Field(
-        default_factory=list, description="Criteria for step completion"
+    step_expected_outputs: list[ExpectedOutput] = Field(
+        description="Expected outputs for this step, including type and format hints"
+    )
+    step_reviews: list[ReviewInfo] = Field(
+        default_factory=list, description="Reviews to run when step completes"
     )
     step_instructions: str = Field(description="Instructions for the step")
 
@@ -180,8 +221,8 @@ class FinishedStepResponse(BaseModel):
 
     # For needs_work status
     feedback: str | None = Field(default=None, description="Feedback from quality gate")
-    failed_criteria: list[QualityCriteriaResult] | None = Field(
-        default=None, description="Failed quality criteria"
+    failed_reviews: list[ReviewResult] | None = Field(
+        default=None, description="Failed review results"
     )
 
     # For next_step status
diff --git a/src/deepwork/mcp/server.py b/src/deepwork/mcp/server.py
index bb99a21b..73229c66 100644
--- a/src/deepwork/mcp/server.py
+++ b/src/deepwork/mcp/server.py
@@ -149,7 +149,10 @@ async def start_workflow(
             "'needs_work' with feedback to fix issues, "
             "'next_step' with instructions for the next step, or "
             "'workflow_complete' when finished (pops from stack if nested). "
-            "Required: outputs (list of file paths created). "
+            "Required: outputs (map of output names to file paths created). "
+            "For outputs with type 'file': pass a single string path. "
+            "For outputs with type 'files': pass a list of string paths. "
+            "Check step_expected_outputs in the response to see each output's type. "
             "Optional: notes about work done. "
             "Optional: quality_review_override_reason to skip quality review (must explain why)."
         )
diff --git a/src/deepwork/mcp/tools.py b/src/deepwork/mcp/tools.py
index bdede7cd..97be330a 100644
--- a/src/deepwork/mcp/tools.py
+++ b/src/deepwork/mcp/tools.py
@@ -15,7 +15,6 @@
     JobDefinition,
     OutputSpec,
     ParseError,
-    Step,
     Workflow,
     parse_job_definition,
 )
@@ -23,10 +22,12 @@
     AbortWorkflowInput,
     AbortWorkflowResponse,
     ActiveStepInfo,
+    ExpectedOutput,
     FinishedStepInput,
     FinishedStepResponse,
     GetWorkflowsResponse,
     JobInfo,
+    ReviewInfo,
     StartWorkflowInput,
     StartWorkflowResponse,
     StepStatus,
@@ -263,6 +264,23 @@ def _validate_outputs(
                             f"Output '{name}': file not found at '{path}'"
                         )
 
+    @staticmethod
+    def _build_expected_outputs(outputs: list[OutputSpec]) -> list[ExpectedOutput]:
+        """Build ExpectedOutput list from OutputSpec list."""
+        syntax_map = {
+            "file": "filepath",
+            "files": "array of filepaths for all individual files",
+        }
+        return [
+            ExpectedOutput(
+                name=out.name,
+                type=out.type,
+                description=out.description,
+                syntax_for_finished_step_tool=syntax_map.get(out.type, out.type),
+            )
+            for out in outputs
+        ]
+
     # =========================================================================
     # Tool Implementations
     # =========================================================================
@@ -318,7 +336,7 @@ async def start_workflow(self, input_data: StartWorkflowInput) -> StartWorkflowR
         instructions = self._get_step_instructions(job, first_step_id)
 
         # Get expected outputs
-        step_outputs = [out.name for out in first_step.outputs]
+        step_outputs = self._build_expected_outputs(first_step.outputs)
 
         return StartWorkflowResponse(
             begin_step=ActiveStepInfo(
@@ -326,7 +344,13 @@ async def start_workflow(self, input_data: StartWorkflowInput) -> StartWorkflowR
                 branch_name=session.branch_name,
                 step_id=first_step_id,
                 step_expected_outputs=step_outputs,
-                step_quality_criteria=first_step.quality_criteria,
+                step_reviews=[
+                    ReviewInfo(
+                        run_each=r.run_each,
+                        quality_criteria=r.quality_criteria,
+                    )
+                    for r in first_step.reviews
+                ],
                 step_instructions=instructions,
             ),
             stack=self.state_manager.get_stack(),
@@ -359,34 +383,55 @@ async def finished_step(self, input_data: FinishedStepInput) -> FinishedStepResp
         # Validate outputs against step's declared output specs
         self._validate_outputs(input_data.outputs, current_step.outputs)
 
-        # Run quality gate if available and step has criteria (unless overridden)
+        # Run quality gate if available and step has reviews (unless overridden)
         if (
             self.quality_gate
-            and current_step.quality_criteria
+            and current_step.reviews
             and not input_data.quality_review_override_reason
         ):
             attempts = await self.state_manager.record_quality_attempt(current_step_id)
 
-            result = await self.quality_gate.evaluate(
-                quality_criteria=current_step.quality_criteria,
+            # Build output specs map for evaluate_reviews
+            output_specs = {out.name: out.type for out in current_step.outputs}
+
+            # Resolve input files from prior step outputs
+            input_files: dict[str, str | list[str]] = {}
+            for inp in current_step.inputs:
+                if inp.is_file_input():
+                    source_progress = session.step_progress.get(inp.from_step)  # type: ignore[arg-type]
+                    if source_progress and inp.file in source_progress.outputs:
+                        input_files[inp.file] = source_progress.outputs[inp.file]  # type: ignore[index]
+
+            failed_reviews = await self.quality_gate.evaluate_reviews(
+                reviews=[
+                    {
+                        "run_each": r.run_each,
+                        "quality_criteria": r.quality_criteria,
+                    }
+                    for r in current_step.reviews
+                ],
                 outputs=input_data.outputs,
+                output_specs=output_specs,
                 project_root=self.project_root,
+                inputs=input_files if input_files else None,
+                notes=input_data.notes,
             )
 
-            if not result.passed:
+            if failed_reviews:
                 # Check max attempts
                 if attempts >= self.max_quality_attempts:
+                    feedback_parts = [r.feedback for r in failed_reviews]
                     raise ToolError(
                         f"Quality gate failed after {self.max_quality_attempts} attempts. "
-                        f"Feedback: {result.feedback}"
+                        f"Feedback: {'; '.join(feedback_parts)}"
                     )
 
                 # Return needs_work status
-                failed_criteria = [cr for cr in result.criteria_results if not cr.passed]
+                combined_feedback = "; ".join(r.feedback for r in failed_reviews)
                 return FinishedStepResponse(
                     status=StepStatus.NEEDS_WORK,
-                    feedback=result.feedback,
-                    failed_criteria=failed_criteria,
+                    feedback=combined_feedback,
+                    failed_reviews=failed_reviews,
                     stack=self.state_manager.get_stack(),
                 )
 
@@ -430,7 +475,7 @@ async def finished_step(self, input_data: FinishedStepInput) -> FinishedStepResp
 
         # Get instructions
         instructions = self._get_step_instructions(job, next_step_id)
-        step_outputs = [out.name for out in next_step.outputs]
+        step_outputs = self._build_expected_outputs(next_step.outputs)
 
         # Add info about concurrent steps if this is a concurrent entry
         if next_entry.is_concurrent and len(next_entry.step_ids) > 1:
@@ -451,7 +496,13 @@ async def finished_step(self, input_data: FinishedStepInput) -> FinishedStepResp
                 branch_name=session.branch_name,
                 step_id=next_step_id,
                 step_expected_outputs=step_outputs,
-                step_quality_criteria=next_step.quality_criteria,
+                step_reviews=[
+                    ReviewInfo(
+                        run_each=r.run_each,
+                        quality_criteria=r.quality_criteria,
+                    )
+                    for r in next_step.reviews
+                ],
                 step_instructions=instructions,
             ),
             stack=self.state_manager.get_stack(),
diff --git a/src/deepwork/schemas/job.schema.json b/src/deepwork/schemas/job.schema.json
index 27cbf30c..4226f708 100644
--- a/src/deepwork/schemas/job.schema.json
+++ b/src/deepwork/schemas/job.schema.json
@@ -135,7 +135,8 @@
         "name",
         "description",
         "instructions_file",
-        "outputs"
+        "outputs",
+        "reviews"
       ],
       "additionalProperties": false,
       "description": "A single Step in a job, representing one material unit of work with evaluatable outputs",
@@ -202,12 +203,11 @@
           "description": "If true, step is hidden from menus. Alias for exposed: false. Default: false",
           "default": false
         },
-        "quality_criteria": {
+        "reviews": {
           "type": "array",
-          "description": "Declarative quality criteria for evaluating step outputs. Rendered with standard evaluation framing.",
+          "description": "Quality reviews to run when step completes. Can be empty.",
           "items": {
-            "type": "string",
-            "minLength": 1
+            "$ref": "#/$defs/review"
           }
         },
         "agent": {
@@ -365,6 +365,31 @@
           }
         }
       ]
+    },
+    "review": {
+      "type": "object",
+      "required": [
+        "run_each",
+        "quality_criteria"
+      ],
+      "additionalProperties": false,
+      "description": "A quality review that evaluates step outputs against criteria",
+      "properties": {
+        "run_each": {
+          "type": "string",
+          "minLength": 1,
+          "description": "Either 'step' to review all outputs together, or the name of a specific output to review individually"
+        },
+        "quality_criteria": {
+          "type": "object",
+          "description": "Map of criterion name to criterion question",
+          "additionalProperties": {
+            "type": "string",
+            "minLength": 1
+          },
+          "minProperties": 1
+        }
+      }
     }
   }
 }
\ No newline at end of file
diff --git a/src/deepwork/standard_jobs/deepwork_jobs/job.yml b/src/deepwork/standard_jobs/deepwork_jobs/job.yml
index 14b70c46..e48853a7 100644
--- a/src/deepwork/standard_jobs/deepwork_jobs/job.yml
+++ b/src/deepwork/standard_jobs/deepwork_jobs/job.yml
@@ -1,6 +1,6 @@
 # yaml-language-server: $schema=.deepwork/schemas/job.schema.json
 name: deepwork_jobs
-version: "1.2.1"
+version: "1.3.0"
 summary: "Creates and manages multi-step AI workflows. Use when defining, implementing, testing, or improving DeepWork jobs."
 description: |
   Core commands for managing DeepWork jobs. These commands help you define new multi-step
@@ -38,6 +38,8 @@ workflows:
       - learn
 
 changelog:
+  - version: "1.3.0"
+    changes: "Migrated quality_criteria to reviews system with run_each targeting and map-format criteria"
   - version: "1.2.1"
     changes: "Removed deprecated exposed field from learn step; added learn workflow to make step accessible via MCP"
   - version: "1.2.0"
@@ -74,6 +76,15 @@ steps:
         type: file
         description: "Definition of the job and its workflows"
     dependencies: []
+    reviews:
+      - run_each: job.yml
+        quality_criteria:
+          "Intermediate Deliverables": "Does the job break out across the logical steps such that there are reviewable intermediate deliverables?"
+          "Reviews": |
+            Are there reviews defined for each step? Do particularly critical documents have their own reviews?
+            Note that the reviewers do not have transcript access, so if the criteria are about the conversation,
+            then add a `.deepwork/tmp/[step_summary].md` step output file so the agent has a communication channel to the reviewer.
+
   - id: implement
     name: "Implement Job Steps"
     description: "Generates step instruction files and syncs slash commands from the job.yml specification. Use after defining a job."
@@ -87,12 +98,15 @@ steps:
         description: "Instruction Markdown files for each step"
     dependencies:
       - define
-    quality_criteria:
-      - "**Complete Instructions**: Are ALL step instruction files complete (not stubs or placeholders)?"
-      - "**Specific & Actionable**: Are instructions tailored to each step's purpose, not generic?"
-      - "**Output Examples**: Does each instruction file show what good output looks like?"
-      - "**Quality Criteria**: Does each instruction file define quality criteria for its outputs?"
-      - "**Ask Structured Questions**: Do step instructions that gather user input explicitly use the phrase \"ask structured questions\"?"
+    reviews:
+      - run_each: step_instruction_files
+        quality_criteria:
+          "Complete Instructions": "Is the instruction file complete (no stubs or placeholders)?"
+          "Specific & Actionable": "Are instructions tailored to the step's purpose, not generic?"
+          "Output Examples": "Does the instruction file show what good output looks like?"
+          "Quality Criteria": "Does the instruction file define quality criteria for its outputs?"
+          "Ask Structured Questions": "Do instructions that gather user input explicitly use the phrase 'ask structured questions'?"
+          "Prompt Engineering": "Does the instructions file following Anthropics Best Practices for Prompt Engineering?"
 
   - id: test
     name: "Test the New Workflow"
@@ -110,13 +124,14 @@ steps:
     dependencies:
       - define
       - implement
-    quality_criteria:
-      - "**User Informed**: Did the agent explain the workflow is ready and ask what to test it on?"
-      - "**Workflow Invoked**: Was the new workflow actually run on the user's test case via MCP?"
-      - "**Output Critiqued**: Did the agent identify up to 3 top issues with the output?"
-      - "**User Feedback Gathered**: Did the agent ask the user about each issue and gather additional feedback?"
-      - "**Corrections Made**: Were all requested corrections applied to the output?"
-      - "**User Satisfied**: Did the user confirm the output meets their needs?"
+    reviews:
+      - run_each: step
+        quality_criteria:
+          "Workflow Invoked": "Was the new workflow actually run on the user's test case via MCP?"
+          "Output Critiqued": "Did the agent identify up to 3 top issues with the output?"
+          "User Feedback Gathered": "Did the agent ask the user about each issue and gather additional feedback?"
+          "Corrections Made": "Were all requested corrections applied to the output?"
+          "User Satisfied": "Did the user confirm the output meets their needs?"
 
   - id: iterate
     name: "Iterate on Workflow Design"
@@ -138,12 +153,7 @@ steps:
       - define
       - implement
       - test
-    quality_criteria:
-      - "**Conversation Reviewed**: Did the agent analyze the test run for inefficiencies and issues?"
-      - "**Instructions Improved**: Were step instructions updated to address identified problems?"
-      - "**Quality Criteria Updated**: Were quality criteria adjusted to better match user expectations?"
-      - "**Tool Usage Considered**: Did the agent consider if different tools would improve the workflow?"
-      - "**Recap Provided**: Did the agent summarize what was improved and why?"
+    reviews: []
 
   - id: learn
     name: "Learn from Job Execution"
@@ -157,16 +167,18 @@ steps:
         type: file
         description: "Bespoke learnings and run-specific context for the working folder"
     dependencies: []
-    quality_criteria:
-      - "**Conversation Analyzed**: Did the agent review the conversation for DeepWork job executions?"
-      - "**Confusion Identified**: Did the agent identify points of confusion, errors, or inefficiencies?"
-      - "**Instructions Improved**: Were job instructions updated to address identified issues?"
-      - "**Instructions Concise**: Are instructions free of redundancy and unnecessary verbosity?"
-      - "**Shared Content Extracted**: Is lengthy/duplicated content extracted into referenced files?"
-      - "**Bespoke Learnings Captured**: Were run-specific learnings added to AGENTS.md?"
-      - "**File References Used**: Do AGENTS.md entries reference other files where appropriate?"
-      - "**Working Folder Correct**: Is AGENTS.md in the correct working folder for the job?"
-      - "**Generalizable Separated**: Are generalizable improvements in instructions, not AGENTS.md?"
+    reviews:
+      - run_each: step
+        quality_criteria:
+          "Conversation Analyzed": "Did the agent review the conversation for DeepWork job executions?"
+          "Confusion Identified": "Did the agent identify points of confusion, errors, or inefficiencies?"
+          "Instructions Improved": "Were job instructions updated to address identified issues?"
+          "Instructions Concise": "Are instructions free of redundancy and unnecessary verbosity?"
+          "Shared Content Extracted": "Is lengthy/duplicated content extracted into referenced files?"
+          "Bespoke Learnings Captured": "Were run-specific learnings added to AGENTS.md?"
+          "File References Used": "Do AGENTS.md entries reference other files where appropriate?"
+          "Working Folder Correct": "Is AGENTS.md in the correct working folder for the job?"
+          "Generalizable Separated": "Are generalizable improvements in instructions, not AGENTS.md?"
 
   - id: fix_settings
     name: "Fix Settings Files"
@@ -178,16 +190,18 @@ steps:
         type: file
         description: "Cleaned up Claude settings file with legacy permissions removed"
     dependencies: []
-    quality_criteria:
-      - "**DeepWork Skills Removed**: Are `Skill(...)` entries matching jobs in `.deepwork/jobs/` removed?"
-      - "**Non-DeepWork Skills Preserved**: Are skills NOT matching DeepWork jobs left intact?"
-      - "**make_new_job.sh Preserved**: Is the `Bash(...)` permission for `make_new_job.sh` preserved (if present)?"
-      - "**Rules Hooks Removed**: Are all DeepWork Rules hooks and permissions removed?"
-      - "**Duplicate Hooks Removed**: Are duplicate hook entries consolidated or removed?"
-      - "**Hardcoded Paths Removed**: Are user-specific hardcoded paths (like `/Users/*/...`) removed?"
-      - "**Deprecated Commands Removed**: Are deprecated commands like `deepwork hook *` removed?"
-      - "**Valid JSON**: Is settings.json still valid JSON after modifications?"
-      - "**Backup Created**: Was a backup of the original settings created before modifications?"
+    reviews:
+      - run_each: step
+        quality_criteria:
+          "DeepWork Skills Removed": "Are `Skill(...)` entries matching jobs in `.deepwork/jobs/` removed?"
+          "Non-DeepWork Skills Preserved": "Are skills NOT matching DeepWork jobs left intact?"
+          "make_new_job.sh Preserved": "Is the `Bash(...)` permission for `make_new_job.sh` preserved (if present)?"
+          "Rules Hooks Removed": "Are all DeepWork Rules hooks and permissions removed?"
+          "Duplicate Hooks Removed": "Are duplicate hook entries consolidated or removed?"
+          "Hardcoded Paths Removed": "Are user-specific hardcoded paths (like `/Users/*/...`) removed?"
+          "Deprecated Commands Removed": "Are deprecated commands like `deepwork hook *` removed?"
+          "Valid JSON": "Is settings.json still valid JSON after modifications?"
+          "Backup Created": "Was a backup of the original settings created before modifications?"
 
   - id: fix_jobs
     name: "Fix Job Definitions"
@@ -202,12 +216,14 @@ steps:
         description: "Updated job.yml files and step instructions in current DeepWork format"
     dependencies:
       - fix_settings
-    quality_criteria:
-      - "**Exposed Field Addressed**: Are `exposed: true` fields removed or noted as deprecated?"
-      - "**Stop Hooks Migrated**: Are `stop_hooks` migrated to `hooks.after_agent` format?"
-      - "**Removed Steps Cleaned**: Are references to removed steps (like `review_job_spec`) updated?"
-      - "**Orphaned Steps Fixed**: For jobs with no workflows, is there a single workflow (named after the job) containing all steps? For jobs with existing workflows, does each orphan get its own workflow (named after the step)?"
-      - "**Valid YAML**: Are all job.yml files valid YAML?"
+    reviews:
+      - run_each: step
+        quality_criteria:
+          "Exposed Field Addressed": "Are `exposed: true` fields removed or noted as deprecated?"
+          "Stop Hooks Migrated": "Are `stop_hooks` migrated to `hooks.after_agent` format?"
+          "Removed Steps Cleaned": "Are references to removed steps (like `review_job_spec`) updated?"
+          "Orphaned Steps Fixed": "For jobs with no workflows, is there a single workflow (named after the job) containing all steps? For jobs with existing workflows, does each orphan get its own workflow (named after the step)?"
+          "Valid YAML": "Are all job.yml files valid YAML?"
 
   - id: errata
     name: "Clean Up Errata"
@@ -220,12 +236,14 @@ steps:
     dependencies:
       - fix_settings
       - fix_jobs
-    quality_criteria:
-      - "**Legacy Job Skills Removed**: Are legacy skill folders for each job removed from `.claude/skills/` and `.gemini/skills/`?"
-      - "**Deepwork Skill Preserved**: Does the `deepwork` skill folder still exist in `.claude/skills/deepwork/`?"
-      - "**Temp Files Cleaned**: Are `.deepwork/tmp/` contents cleaned appropriately?"
-      - "**Rules Folder Removed**: Is `.deepwork/rules/` folder backed up and removed (fully deprecated)?"
-      - "**Rules Job Removed**: Is `.deepwork/jobs/deepwork_rules/` removed if present?"
-      - "**Config Version Updated**: Is `.deepwork/config.yml` using current version format?"
-      - "**DeepWork Re-installed**: Was `deepwork install` run after cleanup, and does it complete without errors?"
-      - "**Git Status Clean**: Are changes ready to be committed (no untracked garbage files)?"
+    reviews:
+      - run_each: step
+        quality_criteria:
+          "Legacy Job Skills Removed": "Are legacy skill folders for each job removed from `.claude/skills/` and `.gemini/skills/`?"
+          "Deepwork Skill Preserved": "Does the `deepwork` skill folder still exist in `.claude/skills/deepwork/`?"
+          "Temp Files Cleaned": "Are `.deepwork/tmp/` contents cleaned appropriately?"
+          "Rules Folder Removed": "Is `.deepwork/rules/` folder backed up and removed (fully deprecated)?"
+          "Rules Job Removed": "Is `.deepwork/jobs/deepwork_rules/` removed if present?"
+          "Config Version Updated": "Is `.deepwork/config.yml` using current version format?"
+          "DeepWork Re-installed": "Was `deepwork install` run after cleanup, and does it complete without errors?"
+          "Git Status Clean": "Are changes ready to be committed (no untracked garbage files)?"
diff --git a/src/deepwork/standard_jobs/deepwork_jobs/steps/define.md b/src/deepwork/standard_jobs/deepwork_jobs/steps/define.md
index 3cd01848..18c268eb 100644
--- a/src/deepwork/standard_jobs/deepwork_jobs/steps/define.md
+++ b/src/deepwork/standard_jobs/deepwork_jobs/steps/define.md
@@ -116,20 +116,7 @@ For each major phase they mentioned, ask structured questions to gather details:
    - Are there any quality checks or validation needed?
    - What makes a good vs. bad output for this step?
 
-   **Important**: Quality criteria belong in the `quality_criteria` field of job.yml, NOT in the step details. When skills are generated, quality criteria are automatically included in the output. Do not duplicate them in step instructions or details—this causes redundancy and confusion.
-
-6. **Agent Delegation** (optional)
-   - Should this step be executed by a specific agent type?
-   - Use the `agent` field when the step should run in a forked context with a specific agent
-   - When `agent` is set, the generated skill automatically includes `context: fork`
-   - Available agent types:
-     - `general-purpose` - Standard agent for multi-step tasks
-
-   ```yaml
-   steps:
-     - id: research_step
-       agent: general-purpose  # Delegates to the general-purpose agent
-   ```
+   **Important**: When skills are generated, quality criteria are automatically included in the output. Do not duplicate them in step instructions or details—this causes redundancy and confusion.
 
 **Note**: You're gathering this information to understand what instructions will be needed, but you won't create the instruction files yet - that happens in the `implement` step.
 
@@ -159,56 +146,53 @@ After gathering information about all steps:
    - Job description (detailed multi-line explanation)
    - Version number (start with 1.0.0)
 
-### Step 4: Define Quality Validation Hooks
+### Step 4: Define Quality Reviews
 
-For each step, consider whether it would benefit from **quality validation loops**. Quality hooks allow the AI agent to iteratively refine its work until quality criteria are met.
+For each step, define **reviews** that evaluate the step's outputs. Reviews run automatically when a step completes and provide quality validation loops.
 
-**Ask structured questions about quality validation:**
-- "Are there specific quality criteria that must be met for this step?"
-- "Would you like the agent to validate its work before completing?"
-- "What would make you send the work back for revision?"
+For intermediate outputs between steps, reviews let you make sure you don't go too far down the wrong path. Add reviews that confirm things that could cause problems later. For example, in a report creation process, you might have an intermediate step that performs a number of queries on the data and records the results so that later report-writing steps can synthesize that information into a coherent narrative. In this case, you would want to add a review that checks that the queries SQL matches up with the description of the queries in the job description.
 
-**Quality hooks are particularly valuable for:**
-- Steps with complex outputs that need multiple checks
-- Steps where quality is critical (final deliverables)
-- Steps with subjective quality criteria that benefit from AI self-review
+For final outputs, reviews let you make sure the output meets the user's expectations. For example, with a data-centric report job, you might have one review on the final output for consistency with style guidelines and tone and such, and a totally separate review on the data-backing to make sure the claims in the report are supported by the data from earlier steps and all have citations. 
 
-**Three types of hooks are supported:**
-
-1. **Inline Prompt** (`prompt`) - Best for simple quality criteria
-   ```yaml
-   hooks:
-     after_agent:
-       - prompt: |
-           Verify the output meets these criteria:
-           1. Contains at least 5 competitors
-           2. Each competitor has a description
-           3. Selection rationale is clear
-   ```
+**Any jobs with written final output must always have reviews**. Some suggested ones are:
+- Ensure claims have citations and the citations are not hallucinated
+- Ensure the output follows the style guidelines and tone
+- Ensure the output is well-organized and easy to read
+- Ensure obvious questions the content raises have answers provided
+- Visual formatting is correct (for things like PDF or HTML where the visual output matters)
+- That the content matches what the intended audience expects (i.e. executives vs engineers)
 
-2. **Prompt File** (`prompt_file`) - For detailed/reusable criteria
-   ```yaml
-   hooks:
-     after_agent:
-       - prompt_file: hooks/quality_check.md
-   ```
+**Reviews format:**
 
-3. **Script** (`script`) - For programmatic validation (tests, linting)
-   ```yaml
-   hooks:
-     after_agent:
-       - script: hooks/run_tests.sh
-   ```
+Each review specifies `run_each` (what to review) and `quality_criteria` (a map of criterion name to question):
 
-**Multiple hooks can be combined:**
 ```yaml
-hooks:
-  after_agent:
-    - script: hooks/lint_output.sh
-    - prompt: "Verify the content is comprehensive and well-organized"
+reviews:
+  - run_each: step  # Review all outputs together
+    quality_criteria:
+      "Consistent Style": "Do all files follow the same structure?"
+      "Complete Coverage": "Are all required topics covered?"
+  - run_each: report_files  # Review each file in a 'files'-type output individually
+    quality_criteria:
+      "Well Written": "Is the content clear and well-organized?"
+      "Data-Backed": "Are claims supported by data?"
 ```
 
-**Encourage prompt-based hooks** - They leverage the AI's ability to understand context and make nuanced quality judgments. Script hooks are best for objective checks (syntax, format, tests).
+**`run_each` options:**
+- `step` — Review runs once with ALL output files + input files
+- `<output_name>` where output is `type: file` — Review runs once with that specific file
+- `<output_name>` where output is `type: files` — Review runs once per file in the list
+
+**Reviews are particularly valuable for:**
+- Steps with complex outputs that need multiple quality checks
+- Steps where quality is critical (final deliverables)
+- Steps with subjective quality criteria that benefit from AI self-review
+- Steps producing multiple files where each file needs individual review
+
+**For steps with no quality checks needed, use an empty reviews list:**
+```yaml
+reviews: []
+```
 
 ### Step 5: Create the Job Directory and Specification
 
@@ -220,13 +204,6 @@ Only after you have complete understanding, create the job directory and `job.ym
 .deepwork/jobs/deepwork_jobs/make_new_job.sh [job_name]
 ```
 
-This creates:
-- `.deepwork/jobs/[job_name]/` - Main job directory
-- `.deepwork/jobs/[job_name]/steps/` - For step instruction files
-- `.deepwork/jobs/[job_name]/hooks/` - For custom validation scripts
-- `.deepwork/jobs/[job_name]/templates/` - For example file formats
-- `.deepwork/jobs/[job_name]/AGENTS.md` - Job management guidance
-
 **Then create the job.yml file** at `.deepwork/jobs/[job_name]/job.yml`
 
 (Where `[job_name]` is the name of the NEW job you're creating, e.g., `competitive_research`)
diff --git a/src/deepwork/standard_jobs/deepwork_jobs/steps/fix_jobs.md b/src/deepwork/standard_jobs/deepwork_jobs/steps/fix_jobs.md
index 93fb67ae..ccb5e4a4 100644
--- a/src/deepwork/standard_jobs/deepwork_jobs/steps/fix_jobs.md
+++ b/src/deepwork/standard_jobs/deepwork_jobs/steps/fix_jobs.md
@@ -38,8 +38,9 @@ Audit and repair the job at `.deepwork/jobs/[job_name]/job.yml`:
 4. Fix orphaned steps by adding them to workflows
 5. Migrate `outputs` from array format to map format with `type` and `description`
 6. Update any `file` inputs that reference renamed output keys
-7. Bump version and add changelog entry if changes were made
-8. Validate YAML syntax
+7. Migrate `quality_criteria` arrays to `reviews` format (run_each + map criteria)
+8. Bump version and add changelog entry if changes were made
+9. Validate YAML syntax
 
 Report what changes were made.
 ```
@@ -212,7 +213,38 @@ steps:
         from_step: define
 ```
 
-### Step 7: Update Version Numbers
+### Step 7: Migrate `quality_criteria` to `reviews`
+
+The flat `quality_criteria` field on steps has been replaced by the `reviews` array. Each review specifies `run_each` (what to review) and `quality_criteria` as a map of criterion name to question.
+
+**Before (deprecated):**
+```yaml
+steps:
+  - id: my_step
+    quality_criteria:
+      - "**Complete**: Is the output complete?"
+      - "**Accurate**: Is the data accurate?"
+```
+
+**After (current format):**
+```yaml
+steps:
+  - id: my_step
+    reviews:
+      - run_each: step
+        quality_criteria:
+          "Complete": "Is the output complete?"
+          "Accurate": "Is the data accurate?"
+```
+
+**Migration rules:**
+
+1. **Parse the old format**: Each string typically follows `**Name**: Question` format. Extract the name (bold text) as the map key and the question as the value.
+2. **Choose `run_each`**: Default to `step` (reviews all outputs together). If the step has a single primary output, consider using that output name instead.
+3. **For steps with no quality_criteria**: Use `reviews: []`
+4. **Remove the old field**: Delete the `quality_criteria` array entirely after migration.
+
+### Step 8: Update Version Numbers
 
 If you made significant changes to a job, bump its version number:
 
@@ -263,6 +295,7 @@ For each job in `.deepwork/jobs/`, check:
 | `exposed` field | Remove from all steps |
 | `stop_hooks` | Migrate to `hooks.after_agent` |
 | `outputs` format | Migrate from array to map with `type` and `description` |
+| `quality_criteria` | Migrate to `reviews` with `run_each` and map-format criteria |
 | Workflow steps | Remove references to deleted steps |
 | Dependencies | Update to valid step IDs |
 | File inputs | Update `from_step` references; update keys for renamed outputs |
diff --git a/src/deepwork/standard_jobs/deepwork_jobs/steps/implement.md b/src/deepwork/standard_jobs/deepwork_jobs/steps/implement.md
index 7be269a5..10880176 100644
--- a/src/deepwork/standard_jobs/deepwork_jobs/steps/implement.md
+++ b/src/deepwork/standard_jobs/deepwork_jobs/steps/implement.md
@@ -47,28 +47,27 @@ For each step in the job.yml, create a comprehensive instruction file at `.deepw
 3. **Provide examples** - Show what good output looks like
 4. **Explain the "why"** - Help the user understand the step's role in the workflow
 5. **Quality over quantity** - Detailed, actionable instructions are better than vague ones
-6. **Align with stop hooks** - If the step has `stop_hooks` defined, ensure the quality criteria in the instruction file match the validation criteria in the hooks
+6. **Align with reviews** - If the step has `reviews` defined, ensure the quality criteria in the instruction file match the review criteria
 7. **Ask structured questions** - When a step has user inputs, the instructions MUST explicitly tell the agent to "ask structured questions" using the AskUserQuestion tool to gather that information. Never use generic phrasing like "ask the user" - always use "ask structured questions"
 
-### Handling Quality Hooks
+### Handling Reviews
 
-If a step in the job.yml has `hooks.after_agent` defined, the generated instruction file should:
+If a step in the job.yml has `reviews` defined, the generated instruction file should:
 
-1. **Mirror the quality criteria** - The "Quality Criteria" section should match what the hooks will validate
+1. **Mirror the quality criteria** - The "Quality Criteria" section should match what the reviews will validate
 2. **Be explicit about success** - Help the agent understand when the step is truly complete
-3. **Include the promise pattern** - Mention that `<promise>✓ Quality Criteria Met</promise>` should be included when criteria are met
+3. **Explain what's reviewed** - If reviews target specific outputs (via `run_each`), mention which outputs will be reviewed
 
 **Example: If the job.yml has:**
 ```yaml
 - id: research_competitors
   name: "Research Competitors"
-  hooks:
-    after_agent:
-      - prompt: |
-          Verify the research meets criteria:
-          1. Each competitor has at least 3 data points
-          2. Sources are cited
-          3. Information is current (within last year)
+  reviews:
+    - run_each: research_notes.md
+      quality_criteria:
+        "Sufficient Data": "Does each competitor have at least 3 data points?"
+        "Sources Cited": "Are sources cited for key claims?"
+        "Current Information": "Is the information current (within last year)?"
 ```
 
 **The instruction file should include:**
@@ -78,7 +77,6 @@ If a step in the job.yml has `hooks.after_agent` defined, the generated instruct
 - Each competitor has at least 3 distinct data points
 - All information is sourced with citations
 - Data is current (from within the last year)
-- When all criteria are met, include `<promise>✓ Quality Criteria Met</promise>` in your response
 ```
 
 This alignment ensures the AI agent knows exactly what will be validated and can self-check before completing.
diff --git a/src/deepwork/standard_jobs/deepwork_jobs/steps/iterate.md b/src/deepwork/standard_jobs/deepwork_jobs/steps/iterate.md
index fb1f56c8..73dcb589 100644
--- a/src/deepwork/standard_jobs/deepwork_jobs/steps/iterate.md
+++ b/src/deepwork/standard_jobs/deepwork_jobs/steps/iterate.md
@@ -68,28 +68,33 @@ For each step that needs improvement:
    - Be direct and actionable
    - Use bullet points where appropriate
 
-### Step 4: Update Quality Criteria
+### Step 4: Update Reviews
 
-Review and update quality criteria in two places:
+Review and update quality reviews in two places:
 
 1. **In step instruction files** - The "Quality Criteria" section should reflect what the user actually cared about during testing
 
-2. **In job.yml** - If steps have `quality_criteria` or `stop_hooks`, update them to:
+2. **In job.yml** - Update the `reviews` array on each step to:
    - Remove criteria that weren't relevant
    - Add criteria based on user feedback
    - Make existing criteria more specific
+   - Adjust `run_each` targeting if outputs should be reviewed differently
 
 **Example improvement:**
 ```yaml
 # Before
-quality_criteria:
-  - "Report is formatted correctly"
+reviews:
+  - run_each: step
+    quality_criteria:
+      "Formatted Correctly": "Is the report formatted correctly?"
 
 # After
-quality_criteria:
-  - "Report uses distinct colors for each data series in charts"
-  - "Tables have sufficient padding and font size for readability"
-  - "Executive summary is understandable by non-technical readers"
+reviews:
+  - run_each: report.md
+    quality_criteria:
+      "Distinct Colors": "Does the report use distinct colors for each data series in charts?"
+      "Readable Tables": "Do tables have sufficient padding and font size for readability?"
+      "Clear Summary": "Is the executive summary understandable by non-technical readers?"
 ```
 
 ### Step 5: Consider Alternative Tools
diff --git a/src/deepwork/standard_jobs/deepwork_jobs/steps/learn.md b/src/deepwork/standard_jobs/deepwork_jobs/steps/learn.md
index f6d48c78..254a332d 100644
--- a/src/deepwork/standard_jobs/deepwork_jobs/steps/learn.md
+++ b/src/deepwork/standard_jobs/deepwork_jobs/steps/learn.md
@@ -88,6 +88,7 @@ For each generalizable learning:
    - Include helpful examples
    - Clarify ambiguous instructions
    - Update quality criteria if needed
+   - If you identify problems in the outcomes of steps, those usually should be reflected in an update to the `reviews` for that step in `job.yml` (adjusting criteria names, questions, or `run_each` targeting)
 
 3. **Keep instructions concise**
    - Avoid redundancy - don't repeat the same guidance in multiple places
diff --git a/src/deepwork/standard_jobs/deepwork_jobs/templates/job.yml.example b/src/deepwork/standard_jobs/deepwork_jobs/templates/job.yml.example
index 7cc6e3bb..f321c355 100644
--- a/src/deepwork/standard_jobs/deepwork_jobs/templates/job.yml.example
+++ b/src/deepwork/standard_jobs/deepwork_jobs/templates/job.yml.example
@@ -14,6 +14,15 @@ changelog:
   - version: "1.0.0"
     changes: "Initial job creation"
 
+workflows:
+  - name: full_analysis
+    summary: "Complete competitive research from identification to positioning"
+    steps:
+      - identify_competitors
+      - research_competitors
+      - comparative_analysis
+      - positioning_recommendations
+
 steps:
   - id: identify_competitors
     name: "Identify Competitors"
@@ -25,8 +34,11 @@ steps:
       - name: product_category
         description: "The product category"
     outputs:
-      - competitors_list.md
+      competitors_list.md:
+        type: file
+        description: "Vetted list of direct and indirect competitors"
     dependencies: []
+    reviews: []
 
   - id: research_competitors
     name: "Research Competitors"
@@ -36,17 +48,17 @@ steps:
       - file: competitors_list.md
         from_step: identify_competitors
     outputs:
-      - research_notes.md
+      research_notes.md:
+        type: file
+        description: "Detailed research notes on each competitor"
     dependencies:
       - identify_competitors
-    hooks:
-      after_agent:
-        - prompt: |
-            Verify the research meets criteria:
-            1. Each competitor has at least 3 data points
-            2. Sources are cited
-            3. Information is current (within last year)
-            If ALL criteria are met, include `<promise>✓ Quality Criteria Met</promise>`.
+    reviews:
+      - run_each: research_notes.md
+        quality_criteria:
+          "Sufficient Data": "Does each competitor have at least 3 data points?"
+          "Sources Cited": "Are sources cited for key claims?"
+          "Current Information": "Is the information current (within last year)?"
 
   - id: comparative_analysis
     name: "Comparative Analysis"
@@ -56,9 +68,12 @@ steps:
       - file: research_notes.md
         from_step: research_competitors
     outputs:
-      - comparison_matrix.md
+      comparison_matrix.md:
+        type: file
+        description: "Side-by-side comparison matrix of all competitors"
     dependencies:
       - research_competitors
+    reviews: []
 
   - id: positioning_recommendations
     name: "Positioning Recommendations"
@@ -68,6 +83,13 @@ steps:
       - file: comparison_matrix.md
         from_step: comparative_analysis
     outputs:
-      - positioning_report.md
+      positioning_report.md:
+        type: file
+        description: "Strategic positioning recommendations"
     dependencies:
       - comparative_analysis
+    reviews:
+      - run_each: step
+        quality_criteria:
+          "Actionable": "Are recommendations specific and actionable?"
+          "Data-Backed": "Are recommendations supported by the competitive analysis data?"
diff --git a/src/deepwork/standard_jobs/deepwork_jobs/templates/job.yml.template b/src/deepwork/standard_jobs/deepwork_jobs/templates/job.yml.template
index 7dcf34e9..0774c5d7 100644
--- a/src/deepwork/standard_jobs/deepwork_jobs/templates/job.yml.template
+++ b/src/deepwork/standard_jobs/deepwork_jobs/templates/job.yml.template
@@ -20,6 +20,13 @@ changelog:
   - version: "1.0.0"
     changes: "Initial job creation"
 
+workflows:
+  - name: [workflow_name]
+    summary: "[What this workflow accomplishes]"
+    steps:
+      - [step_id]
+      - [another_step]
+
 steps:
   - id: [step_id]
     name: "[Step Name]"
@@ -32,27 +39,29 @@ steps:
       # - file: [filename_or_path]
       #   from_step: [previous_step_id]
     outputs:
-      - [output_filename_or_path]  # e.g., "report.md" or "reports/analysis.md"
+      [output_name]:
+        type: file
+        description: "[What this output contains]"
     dependencies: []  # List of step IDs that must complete first
+    reviews:
+      - run_each: step  # or a specific output name
+        quality_criteria:
+          "[Criterion Name]": "[Question to evaluate]"
+          "[Another Criterion]": "[Another question]"
     # Optional: Delegate to a specific agent type (uses context: fork)
     # agent: general-purpose  # or other agent type
-    # Optional: Quality validation hooks
-    hooks:
-      after_agent:
-        - prompt: |
-            Verify this step's output meets quality criteria:
-            1. [Criterion 1]
-            2. [Criterion 2]
-            If ALL criteria are met, include `<promise>✓ Quality Criteria Met</promise>`.
 
   - id: [another_step]
     name: "[Another Step]"
     description: "[What this step does]"
     instructions_file: steps/[another_step].md
     inputs:
-      - file: [output_filename_or_path]
+      - file: [output_name]
         from_step: [step_id]
     outputs:
-      - [another_output_path]
+      [another_output]:
+        type: file
+        description: "[What this output contains]"
     dependencies:
       - [step_id]  # This step requires the previous step
+    reviews: []  # Empty if no quality checks needed
diff --git a/tests/fixtures/jobs/complex_job/job.yml b/tests/fixtures/jobs/complex_job/job.yml
index 8be0eea9..2e231c89 100644
--- a/tests/fixtures/jobs/complex_job/job.yml
+++ b/tests/fixtures/jobs/complex_job/job.yml
@@ -35,6 +35,7 @@ steps:
         type: file
         description: "Vetted list of direct and indirect competitors"
     dependencies: []
+    reviews: []
 
   - id: primary_research
     name: "Primary Research"
@@ -52,6 +53,7 @@ steps:
         description: "Individual competitor profile documents"
     dependencies:
       - identify_competitors
+    reviews: []
 
   - id: secondary_research
     name: "Secondary Research"
@@ -69,6 +71,7 @@ steps:
     dependencies:
       - identify_competitors
       - primary_research
+    reviews: []
 
   - id: comparative_report
     name: "Comparative Report"
@@ -89,3 +92,4 @@ steps:
     dependencies:
       - primary_research
       - secondary_research
+    reviews: []
diff --git a/tests/fixtures/jobs/concurrent_steps_job/job.yml b/tests/fixtures/jobs/concurrent_steps_job/job.yml
index db8545e0..f0a35f56 100644
--- a/tests/fixtures/jobs/concurrent_steps_job/job.yml
+++ b/tests/fixtures/jobs/concurrent_steps_job/job.yml
@@ -24,6 +24,7 @@ steps:
       setup_complete.md:
         type: file
         description: "Setup confirmation and configuration"
+    reviews: []
 
   - id: research_web
     name: "Web Research"
@@ -38,6 +39,7 @@ steps:
         description: "Research findings from web sources"
     dependencies:
       - setup
+    reviews: []
 
   - id: research_docs
     name: "Document Research"
@@ -52,6 +54,7 @@ steps:
         description: "Research findings from internal documents"
     dependencies:
       - setup
+    reviews: []
 
   - id: research_interviews
     name: "Interview Research"
@@ -66,6 +69,7 @@ steps:
         description: "Research findings from stakeholder interviews"
     dependencies:
       - setup
+    reviews: []
 
   - id: compile_results
     name: "Compile Results"
@@ -86,6 +90,7 @@ steps:
       - research_web
       - research_docs
       - research_interviews
+    reviews: []
 
   - id: final_review
     name: "Final Review"
@@ -100,3 +105,4 @@ steps:
         description: "Final reviewed and approved analysis report"
     dependencies:
       - compile_results
+    reviews: []
diff --git a/tests/fixtures/jobs/exposed_step_job/job.yml b/tests/fixtures/jobs/exposed_step_job/job.yml
index f4a2e0da..f5b9545f 100644
--- a/tests/fixtures/jobs/exposed_step_job/job.yml
+++ b/tests/fixtures/jobs/exposed_step_job/job.yml
@@ -21,6 +21,7 @@ steps:
         type: file
         description: "Output from the hidden step"
     dependencies: []
+    reviews: []
 
   - id: exposed_step
     name: "Exposed Step"
@@ -32,3 +33,4 @@ steps:
         type: file
         description: "Output from the exposed step"
     dependencies: []
+    reviews: []
diff --git a/tests/fixtures/jobs/fruits/job.yml b/tests/fixtures/jobs/fruits/job.yml
index 1495f604..4eb1a75a 100644
--- a/tests/fixtures/jobs/fruits/job.yml
+++ b/tests/fixtures/jobs/fruits/job.yml
@@ -36,6 +36,7 @@ steps:
         type: file
         description: "List of identified fruits from the input items"
     dependencies: []
+    reviews: []
 
   - id: classify
     name: "Classify Fruits"
@@ -50,3 +51,4 @@ steps:
         description: "Fruits organized into categories"
     dependencies:
       - identify
+    reviews: []
diff --git a/tests/fixtures/jobs/job_with_doc_spec/job.yml b/tests/fixtures/jobs/job_with_doc_spec/job.yml
index 7fdec846..c365e4bb 100644
--- a/tests/fixtures/jobs/job_with_doc_spec/job.yml
+++ b/tests/fixtures/jobs/job_with_doc_spec/job.yml
@@ -18,3 +18,4 @@ steps:
         type: file
         description: "Generated report document"
     dependencies: []
+    reviews: []
diff --git a/tests/fixtures/jobs/simple_job/job.yml b/tests/fixtures/jobs/simple_job/job.yml
index 112dbe97..a788d9fc 100644
--- a/tests/fixtures/jobs/simple_job/job.yml
+++ b/tests/fixtures/jobs/simple_job/job.yml
@@ -25,3 +25,4 @@ steps:
         type: file
         description: "The output file produced by this step"
     dependencies: []
+    reviews: []
diff --git a/tests/integration/test_quality_gate_integration.py b/tests/integration/test_quality_gate_integration.py
index 52b24e65..bbf41b8a 100644
--- a/tests/integration/test_quality_gate_integration.py
+++ b/tests/integration/test_quality_gate_integration.py
@@ -86,10 +86,10 @@ async def test_real_claude_evaluates_passing_criteria(self, project_root: Path)
         gate = QualityGate(cli=ClaudeCLI(timeout=120))
 
         result = await gate.evaluate(
-            quality_criteria=[
-                "The document must have a title",
-                "The document must contain a summary section",
-            ],
+            quality_criteria={
+                "Has Title": "Does the document have a title?",
+                "Has Summary": "Does the document contain a summary section?",
+            },
             outputs={"analysis": "analysis.md"},
             project_root=project_root,
         )
@@ -120,11 +120,11 @@ async def test_real_claude_evaluates_failing_criteria(self, project_root: Path)
         gate = QualityGate(cli=ClaudeCLI(timeout=120))
 
         result = await gate.evaluate(
-            quality_criteria=[
-                "The document must contain a section titled 'Executive Summary'",
-                "The document must include a numbered list of recommendations",
-                "The document must have a 'Conclusions' section",
-            ],
+            quality_criteria={
+                "Executive Summary": "Does the document contain a section titled 'Executive Summary'?",
+                "Recommendations": "Does the document include a numbered list of recommendations?",
+                "Conclusions": "Does the document have a 'Conclusions' section?",
+            },
             outputs={"document": "incomplete.md"},
             project_root=project_root,
         )
diff --git a/tests/unit/mcp/test_quality_gate.py b/tests/unit/mcp/test_quality_gate.py
index 78765e99..8885d6cb 100644
--- a/tests/unit/mcp/test_quality_gate.py
+++ b/tests/unit/mcp/test_quality_gate.py
@@ -57,14 +57,19 @@ def test_init_custom_cli(self, mock_cli: ClaudeCLI) -> None:
         assert gate._cli is mock_cli
 
     def test_build_instructions(self, quality_gate: QualityGate) -> None:
-        """Test building system instructions."""
+        """Test building system instructions with dict format."""
         instructions = quality_gate._build_instructions(
-            quality_criteria=["Output must exist", "Output must be valid"],
+            quality_criteria={
+                "Output Exists": "Does the output file exist?",
+                "Output Valid": "Is the output valid?",
+            },
         )
 
-        assert "Output must exist" in instructions
-        assert "Output must be valid" in instructions
-        assert "quality gate reviewer" in instructions.lower()
+        assert "**Output Exists**" in instructions
+        assert "Does the output file exist?" in instructions
+        assert "**Output Valid**" in instructions
+        assert "Is the output valid?" in instructions
+        assert "editor" in instructions.lower()
         assert "passed" in instructions  # JSON format mentioned
         assert "feedback" in instructions  # JSON format mentioned
 
@@ -81,6 +86,8 @@ async def test_build_payload(self, quality_gate: QualityGate, project_root: Path
         assert "Test content" in payload
         assert "output.md" in payload
         assert "--------------------" in payload
+        assert "BEGIN OUTPUTS" in payload
+        assert "END OUTPUTS" in payload
 
     async def test_build_payload_missing_file(
         self, quality_gate: QualityGate, project_root: Path
@@ -111,6 +118,124 @@ async def test_build_payload_files_type(
         assert "a.md" in payload
         assert "b.md" in payload
 
+    async def test_build_payload_binary_file(
+        self, quality_gate: QualityGate, project_root: Path
+    ) -> None:
+        """Test building payload with a binary file produces a placeholder message."""
+        binary_file = project_root / "report.pdf"
+        binary_file.write_bytes(b"%PDF-1.4 \x00\x01\x02\xff\xfe binary content")
+
+        payload = await quality_gate._build_payload(
+            outputs={"report": "report.pdf"},
+            project_root=project_root,
+        )
+
+        assert "Binary file" in payload
+        assert "not included in review" in payload
+        assert str(binary_file.resolve()) in payload
+        assert "report.pdf" in payload
+        # Should NOT contain the raw binary content
+        assert "%PDF" not in payload
+
+    async def test_build_payload_binary_file_in_multi_output(
+        self, quality_gate: QualityGate, project_root: Path
+    ) -> None:
+        """Test building payload with a mix of text and binary files."""
+        text_file = project_root / "summary.md"
+        text_file.write_text("Summary text content")
+        binary_file = project_root / "data.pdf"
+        binary_file.write_bytes(b"\x00\x01\x02\xff\xfe binary data")
+
+        payload = await quality_gate._build_payload(
+            outputs={"docs": ["summary.md", "data.pdf"]},
+            project_root=project_root,
+        )
+
+        # Text file content should be included
+        assert "Summary text content" in payload
+        # Binary file should have placeholder
+        assert "Binary file" in payload
+        assert "not included in review" in payload
+        assert str(binary_file.resolve()) in payload
+
+    async def test_build_payload_with_inputs_and_outputs(
+        self, quality_gate: QualityGate, project_root: Path
+    ) -> None:
+        """Test building payload with both inputs and outputs in separate sections."""
+        (project_root / "input_data.md").write_text("Input content from prior step")
+        (project_root / "output_report.md").write_text("Output content from current step")
+
+        payload = await quality_gate._build_payload(
+            outputs={"report": "output_report.md"},
+            project_root=project_root,
+            inputs={"data": "input_data.md"},
+        )
+
+        # Both sections present
+        assert "BEGIN INPUTS" in payload
+        assert "END INPUTS" in payload
+        assert "BEGIN OUTPUTS" in payload
+        assert "END OUTPUTS" in payload
+        # Content included
+        assert "Input content from prior step" in payload
+        assert "Output content from current step" in payload
+        # Inputs section comes before outputs section
+        assert payload.index("BEGIN INPUTS") < payload.index("BEGIN OUTPUTS")
+
+    async def test_build_payload_outputs_only_no_input_headers(
+        self, quality_gate: QualityGate, project_root: Path
+    ) -> None:
+        """Test that when no inputs provided, only outputs section appears."""
+        (project_root / "output.md").write_text("Output only")
+
+        payload = await quality_gate._build_payload(
+            outputs={"report": "output.md"},
+            project_root=project_root,
+        )
+
+        assert "BEGIN OUTPUTS" in payload
+        assert "END OUTPUTS" in payload
+        assert "BEGIN INPUTS" not in payload
+        assert "END INPUTS" not in payload
+
+    async def test_build_payload_empty_inputs_no_input_headers(
+        self, quality_gate: QualityGate, project_root: Path
+    ) -> None:
+        """Test that empty inputs dict doesn't add input headers."""
+        (project_root / "output.md").write_text("Output only")
+
+        payload = await quality_gate._build_payload(
+            outputs={"report": "output.md"},
+            project_root=project_root,
+            inputs={},
+        )
+
+        assert "BEGIN OUTPUTS" in payload
+        assert "BEGIN INPUTS" not in payload
+
+    async def test_build_payload_multiple_inputs(
+        self, quality_gate: QualityGate, project_root: Path
+    ) -> None:
+        """Test building payload with multiple input files."""
+        (project_root / "data1.md").write_text("Data file 1")
+        (project_root / "data2.md").write_text("Data file 2")
+        (project_root / "output.md").write_text("Final output")
+
+        payload = await quality_gate._build_payload(
+            outputs={"report": "output.md"},
+            project_root=project_root,
+            inputs={"data_a": "data1.md", "data_b": "data2.md"},
+        )
+
+        assert "Data file 1" in payload
+        assert "Data file 2" in payload
+        assert "Final output" in payload
+        # Both files should be within the inputs section
+        inputs_start = payload.index("BEGIN INPUTS")
+        inputs_end = payload.index("END INPUTS")
+        assert payload.index("data1.md") > inputs_start
+        assert payload.index("data1.md") < inputs_end
+
     def test_parse_result_valid(self, quality_gate: QualityGate) -> None:
         """Test parsing valid structured output data."""
         data = {
@@ -169,7 +294,7 @@ async def test_evaluate_no_criteria(
     ) -> None:
         """Test evaluation with no criteria auto-passes."""
         result = await quality_gate.evaluate(
-            quality_criteria=[],
+            quality_criteria={},
             outputs={"report": "output.md"},
             project_root=project_root,
         )
@@ -188,7 +313,7 @@ async def test_evaluate_calls_cli_with_correct_args(
         output_file.write_text("Test content")
 
         await gate.evaluate(
-            quality_criteria=["Must be valid"],
+            quality_criteria={"Validity": "Must be valid"},
             outputs={"report": "output.md"},
             project_root=project_root,
         )
@@ -197,6 +322,7 @@ async def test_evaluate_calls_cli_with_correct_args(
         call_kwargs = mock_cli.run.call_args
         assert call_kwargs.kwargs["json_schema"] == QUALITY_GATE_RESPONSE_SCHEMA
         assert call_kwargs.kwargs["cwd"] == project_root
+        assert "Validity" in call_kwargs.kwargs["system_prompt"]
         assert "Must be valid" in call_kwargs.kwargs["system_prompt"]
         assert "Test content" in call_kwargs.kwargs["prompt"]
 
@@ -212,7 +338,7 @@ async def test_evaluate_wraps_cli_error(
 
         with pytest.raises(QualityGateError, match="CLI failed"):
             await gate.evaluate(
-                quality_criteria=["Test"],
+                quality_criteria={"Test": "Test criterion"},
                 outputs={"report": "output.md"},
                 project_root=project_root,
             )
@@ -227,6 +353,132 @@ async def test_schema_is_valid_json(self) -> None:
         assert parsed == QUALITY_GATE_RESPONSE_SCHEMA
 
 
+class TestEvaluateReviews:
+    """Tests for QualityGate.evaluate_reviews method."""
+
+    async def test_empty_reviews(self, quality_gate: QualityGate, project_root: Path) -> None:
+        """Test that empty reviews returns empty list."""
+        result = await quality_gate.evaluate_reviews(
+            reviews=[],
+            outputs={"report": "output.md"},
+            output_specs={"report": "file"},
+            project_root=project_root,
+        )
+        assert result == []
+
+    async def test_step_review_passes(
+        self, mock_cli: ClaudeCLI, project_root: Path
+    ) -> None:
+        """Test step-level review that passes."""
+        mock_cli.run = AsyncMock(
+            return_value={"passed": True, "feedback": "All good", "criteria_results": []}
+        )
+        gate = QualityGate(cli=mock_cli)
+
+        (project_root / "output.md").write_text("content")
+
+        result = await gate.evaluate_reviews(
+            reviews=[
+                {
+                    "run_each": "step",
+                    "quality_criteria": {"Complete": "Is it complete?"},
+                }
+            ],
+            outputs={"report": "output.md"},
+            output_specs={"report": "file"},
+            project_root=project_root,
+        )
+        assert result == []  # No failures
+
+    async def test_step_review_fails(
+        self, mock_cli: ClaudeCLI, project_root: Path
+    ) -> None:
+        """Test step-level review that fails."""
+        mock_cli.run = AsyncMock(
+            return_value={
+                "passed": False,
+                "feedback": "Issues found",
+                "criteria_results": [
+                    {"criterion": "Complete", "passed": False, "feedback": "Missing content"}
+                ],
+            }
+        )
+        gate = QualityGate(cli=mock_cli)
+
+        (project_root / "output.md").write_text("content")
+
+        result = await gate.evaluate_reviews(
+            reviews=[
+                {
+                    "run_each": "step",
+                    "quality_criteria": {"Complete": "Is it complete?"},
+                }
+            ],
+            outputs={"report": "output.md"},
+            output_specs={"report": "file"},
+            project_root=project_root,
+        )
+        assert len(result) == 1
+        assert result[0].review_run_each == "step"
+        assert result[0].passed is False
+
+    async def test_per_file_review(
+        self, mock_cli: ClaudeCLI, project_root: Path
+    ) -> None:
+        """Test per-file review for files-type output."""
+        call_count = 0
+
+        async def mock_run(**kwargs: Any) -> dict[str, Any]:
+            nonlocal call_count
+            call_count += 1
+            return {"passed": True, "feedback": "OK", "criteria_results": []}
+
+        mock_cli.run = AsyncMock(side_effect=mock_run)
+        gate = QualityGate(cli=mock_cli)
+
+        (project_root / "a.md").write_text("File A")
+        (project_root / "b.md").write_text("File B")
+
+        result = await gate.evaluate_reviews(
+            reviews=[
+                {
+                    "run_each": "reports",
+                    "quality_criteria": {"Valid": "Is it valid?"},
+                }
+            ],
+            outputs={"reports": ["a.md", "b.md"]},
+            output_specs={"reports": "files"},
+            project_root=project_root,
+        )
+        assert result == []  # All pass
+        assert call_count == 2  # Called once per file
+
+    async def test_single_file_review(
+        self, mock_cli: ClaudeCLI, project_root: Path
+    ) -> None:
+        """Test review targeting a single-file output."""
+        mock_cli.run = AsyncMock(
+            return_value={"passed": True, "feedback": "OK", "criteria_results": []}
+        )
+        gate = QualityGate(cli=mock_cli)
+
+        (project_root / "report.md").write_text("content")
+
+        result = await gate.evaluate_reviews(
+            reviews=[
+                {
+                    "run_each": "report",
+                    "quality_criteria": {"Valid": "Is it valid?"},
+                }
+            ],
+            outputs={"report": "report.md"},
+            output_specs={"report": "file"},
+            project_root=project_root,
+        )
+        assert result == []
+        mock_cli.run.assert_called_once()
+
+
 class TestMockQualityGate:
     """Tests for MockQualityGate class."""
 
@@ -234,12 +486,12 @@ class TestMockQualityGate:
     async def evaluate_mock_gate(
         gate: MockQualityGate,
         project_root: Path,
-        criteria: list[str] | None = None,
+        criteria: dict[str, str] | None = None,
         outputs: dict[str, str | list[str]] | None = None,
     ) -> Any:
         """Helper to evaluate a mock gate with default parameters."""
         return await gate.evaluate(
-            quality_criteria=criteria or ["Criterion 1"],
+            quality_criteria=criteria or {"Criterion 1": "Is criterion 1 met?"},
             outputs=outputs or {"report": "output.md"},
             project_root=project_root,
         )
@@ -265,12 +517,18 @@ async def test_mock_records_evaluations(self, project_root: Path) -> None:
         gate = MockQualityGate()
 
         await self.evaluate_mock_gate(
-            gate, project_root, criteria=["Criterion 1"], outputs={"out1": "output1.md"}
+            gate,
+            project_root,
+            criteria={"Criterion 1": "Is criterion 1 met?"},
+            outputs={"out1": "output1.md"},
         )
         await self.evaluate_mock_gate(
-            gate, project_root, criteria=["Criterion 2"], outputs={"out2": "output2.md"}
+            gate,
+            project_root,
+            criteria={"Criterion 2": "Is criterion 2 met?"},
+            outputs={"out2": "output2.md"},
         )
 
         assert len(gate.evaluations) == 2
-        assert gate.evaluations[0]["quality_criteria"] == ["Criterion 1"]
-        assert gate.evaluations[1]["quality_criteria"] == ["Criterion 2"]
+        assert gate.evaluations[0]["quality_criteria"] == {"Criterion 1": "Is criterion 1 met?"}
+        assert gate.evaluations[1]["quality_criteria"] == {"Criterion 2": "Is criterion 2 met?"}
diff --git a/tests/unit/mcp/test_schemas.py b/tests/unit/mcp/test_schemas.py
index f1689d01..9bcf2c64 100644
--- a/tests/unit/mcp/test_schemas.py
+++ b/tests/unit/mcp/test_schemas.py
@@ -2,11 +2,14 @@
 
 from deepwork.mcp.schemas import (
     ActiveStepInfo,
+    ExpectedOutput,
     FinishedStepInput,
     FinishedStepResponse,
     JobInfo,
     QualityCriteriaResult,
     QualityGateResult,
+    ReviewInfo,
+    ReviewResult,
     StartWorkflowInput,
     StartWorkflowResponse,
     StepInfo,
@@ -223,38 +226,122 @@ def test_failed_gate(self) -> None:
         assert len(result.criteria_results) == 2
 
 
+class TestReviewInfo:
+    """Tests for ReviewInfo model."""
+
+    def test_step_review(self) -> None:
+        """Test step-level review info."""
+        review = ReviewInfo(
+            run_each="step",
+            quality_criteria={"Complete": "Is it complete?"},
+        )
+
+        assert review.run_each == "step"
+        assert review.quality_criteria == {"Complete": "Is it complete?"}
+
+    def test_output_review(self) -> None:
+        """Test output-specific review info."""
+        review = ReviewInfo(
+            run_each="reports",
+            quality_criteria={
+                "Valid": "Is it valid?",
+                "Complete": "Is it complete?",
+            },
+        )
+
+        assert review.run_each == "reports"
+        assert len(review.quality_criteria) == 2
+
+
+class TestReviewResult:
+    """Tests for ReviewResult model."""
+
+    def test_passed_review(self) -> None:
+        """Test passed review result."""
+        result = ReviewResult(
+            review_run_each="step",
+            target_file=None,
+            passed=True,
+            feedback="All good",
+        )
+
+        assert result.passed is True
+        assert result.target_file is None
+
+    def test_failed_per_file_review(self) -> None:
+        """Test failed per-file review result."""
+        result = ReviewResult(
+            review_run_each="reports",
+            target_file="report1.md",
+            passed=False,
+            feedback="Issues found",
+            criteria_results=[
+                QualityCriteriaResult(criterion="Valid", passed=False, feedback="Not valid"),
+            ],
+        )
+
+        assert result.passed is False
+        assert result.target_file == "report1.md"
+        assert result.review_run_each == "reports"
+        assert len(result.criteria_results) == 1
+
+
 class TestActiveStepInfo:
     """Tests for ActiveStepInfo model."""
 
     def test_basic_step_info(self) -> None:
         """Test basic active step info."""
+        expected = [
+            ExpectedOutput(
+                name="output.md",
+                type="file",
+                description="Test output",
+                syntax_for_finished_step_tool="filepath",
+            )
+        ]
         step_info = ActiveStepInfo(
             session_id="abc123",
             branch_name="deepwork/test-main-20240101",
             step_id="step1",
-            step_expected_outputs=["output.md"],
-            step_quality_criteria=["Must be complete"],
+            step_expected_outputs=expected,
+            step_reviews=[
+                ReviewInfo(
+                    run_each="step",
+                    quality_criteria={"Complete": "Is it complete?"},
+                )
+            ],
             step_instructions="Do something",
         )
 
         assert step_info.session_id == "abc123"
         assert step_info.branch_name == "deepwork/test-main-20240101"
         assert step_info.step_id == "step1"
-        assert step_info.step_expected_outputs == ["output.md"]
-        assert step_info.step_quality_criteria == ["Must be complete"]
+        assert len(step_info.step_expected_outputs) == 1
+        assert step_info.step_expected_outputs[0].name == "output.md"
+        assert step_info.step_expected_outputs[0].type == "file"
+        assert step_info.step_expected_outputs[0].syntax_for_finished_step_tool == "filepath"
+        assert len(step_info.step_reviews) == 1
+        assert step_info.step_reviews[0].run_each == "step"
         assert step_info.step_instructions == "Do something"
 
-    def test_default_quality_criteria(self) -> None:
-        """Test default empty quality criteria."""
+    def test_default_reviews(self) -> None:
+        """Test default empty reviews."""
         step_info = ActiveStepInfo(
             session_id="abc123",
             branch_name="deepwork/test-main-20240101",
             step_id="step1",
-            step_expected_outputs=["output.md"],
+            step_expected_outputs=[
+                ExpectedOutput(
+                    name="output.md",
+                    type="file",
+                    description="Test output",
+                    syntax_for_finished_step_tool="filepath",
+                )
+            ],
             step_instructions="Do something",
         )
 
-        assert step_info.step_quality_criteria == []
+        assert step_info.step_reviews == []
 
 
 class TestStartWorkflowResponse:
@@ -267,7 +354,14 @@ def test_basic_response(self) -> None:
                 session_id="abc123",
                 branch_name="deepwork/test-main-20240101",
                 step_id="step1",
-                step_expected_outputs=["output.md"],
+                step_expected_outputs=[
+                    ExpectedOutput(
+                        name="output.md",
+                        type="file",
+                        description="Test output",
+                        syntax_for_finished_step_tool="filepath",
+                    )
+                ],
                 step_instructions="Do something",
             )
         )
@@ -275,7 +369,7 @@ def test_basic_response(self) -> None:
         assert response.begin_step.session_id == "abc123"
         assert response.begin_step.branch_name == "deepwork/test-main-20240101"
         assert response.begin_step.step_id == "step1"
-        assert response.begin_step.step_quality_criteria == []
+        assert response.begin_step.step_reviews == []
 
 
 class TestFinishedStepResponse:
@@ -286,8 +380,18 @@ def test_needs_work_status(self) -> None:
         response = FinishedStepResponse(
             status=StepStatus.NEEDS_WORK,
             feedback="Fix the issues",
-            failed_criteria=[
-                QualityCriteriaResult(criterion="Test", passed=False, feedback="Failed"),
+            failed_reviews=[
+                ReviewResult(
+                    review_run_each="step",
+                    target_file=None,
+                    passed=False,
+                    feedback="Issues found",
+                    criteria_results=[
+                        QualityCriteriaResult(
+                            criterion="Test", passed=False, feedback="Failed"
+                        ),
+                    ],
+                ),
             ],
         )
 
@@ -303,7 +407,14 @@ def test_next_step_status(self) -> None:
                 session_id="abc123",
                 branch_name="deepwork/test-main-20240101",
                 step_id="step2",
-                step_expected_outputs=["output2.md"],
+                step_expected_outputs=[
+                    ExpectedOutput(
+                        name="output2.md",
+                        type="file",
+                        description="Test output",
+                        syntax_for_finished_step_tool="filepath",
+                    )
+                ],
                 step_instructions="Next step instructions",
             ),
         )
diff --git a/tests/unit/mcp/test_tools.py b/tests/unit/mcp/test_tools.py
index 0da0e60f..42e8e80f 100644
--- a/tests/unit/mcp/test_tools.py
+++ b/tests/unit/mcp/test_tools.py
@@ -41,8 +41,10 @@ def project_root(tmp_path: Path) -> Path:
       output1.md:
         type: file
         description: First step output
-    quality_criteria:
-      - Output must be valid
+    reviews:
+      - run_each: step
+        quality_criteria:
+          "Output Valid": "Is the output valid?"
   - id: step2
     name: Second Step
     description: The second step
@@ -53,6 +55,7 @@ def project_root(tmp_path: Path) -> Path:
         description: Second step output
     dependencies:
       - step1
+    reviews: []
 
 workflows:
   - name: main
@@ -149,8 +152,14 @@ async def test_start_workflow(self, tools: WorkflowTools) -> None:
         assert "test-instance" in response.begin_step.branch_name
         assert response.begin_step.step_id == "step1"
         assert "Step 1" in response.begin_step.step_instructions
-        assert "output1.md" in response.begin_step.step_expected_outputs
-        assert "Output must be valid" in response.begin_step.step_quality_criteria
+        outputs = response.begin_step.step_expected_outputs
+        assert len(outputs) == 1
+        assert outputs[0].name == "output1.md"
+        assert outputs[0].type == "file"
+        assert outputs[0].syntax_for_finished_step_tool == "filepath"
+        assert len(response.begin_step.step_reviews) == 1
+        assert response.begin_step.step_reviews[0].run_each == "step"
+        assert "Output Valid" in response.begin_step.step_reviews[0].quality_criteria
 
     async def test_start_workflow_invalid_job(self, tools: WorkflowTools) -> None:
         """Test starting workflow with invalid job."""
@@ -200,6 +209,7 @@ async def test_start_workflow_invalid_workflow_multiple(
       output_a.md:
         type: file
         description: Step A output
+    reviews: []
   - id: step_b
     name: Step B
     description: Step B
@@ -208,6 +218,7 @@ async def test_start_workflow_invalid_workflow_multiple(
       output_b.md:
         type: file
         description: Step B output
+    reviews: []
 
 workflows:
   - name: alpha
@@ -349,7 +360,7 @@ async def test_finished_step_with_quality_gate_fail(
 
         assert response.status == StepStatus.NEEDS_WORK
         assert response.feedback == "Needs improvement"
-        assert response.failed_criteria is not None
+        assert response.failed_reviews is not None
 
     async def test_finished_step_quality_gate_max_attempts(
         self, project_root: Path, state_manager: StateManager
@@ -510,6 +521,7 @@ async def test_finished_step_empty_outputs_for_step_with_no_outputs(
     description: Cleanup step with no outputs
     instructions_file: steps/cleanup.md
     outputs: {}
+    reviews: []
 
 workflows:
   - name: main
@@ -561,6 +573,7 @@ async def test_finished_step_validates_files_type_output(
       reports:
         type: files
         description: Generated report files
+    reviews: []
 
 workflows:
   - name: main
@@ -613,6 +626,7 @@ async def test_finished_step_validates_files_type_existence(
       reports:
         type: files
         description: Generated report files
+    reviews: []
 
 workflows:
   - name: main
@@ -669,6 +683,7 @@ async def test_finished_step_files_type_success(
       reports:
         type: files
         description: Generated report files
+    reviews: []
 
 workflows:
   - name: main
diff --git a/tests/unit/test_parser.py b/tests/unit/test_parser.py
index e2df69d1..095e5961 100644
--- a/tests/unit/test_parser.py
+++ b/tests/unit/test_parser.py
@@ -8,6 +8,7 @@
     JobDefinition,
     OutputSpec,
     ParseError,
+    Review,
     Step,
     StepInput,
     parse_job_definition,
@@ -90,6 +91,39 @@ def test_from_dict_files_type(self) -> None:
         assert output.description == "Multiple output files"
 
 
+class TestReview:
+    """Tests for Review dataclass."""
+
+    def test_from_dict(self) -> None:
+        """Test creating review from dictionary."""
+        data = {
+            "run_each": "step",
+            "quality_criteria": {"Complete": "Is it complete?", "Valid": "Is it valid?"},
+        }
+        review = Review.from_dict(data)
+
+        assert review.run_each == "step"
+        assert review.quality_criteria == {"Complete": "Is it complete?", "Valid": "Is it valid?"}
+
+    def test_from_dict_output_specific(self) -> None:
+        """Test creating review targeting specific output."""
+        data = {
+            "run_each": "reports",
+            "quality_criteria": {"Well Written": "Is it well written?"},
+        }
+        review = Review.from_dict(data)
+
+        assert review.run_each == "reports"
+        assert len(review.quality_criteria) == 1
+
+    def test_from_dict_empty_criteria(self) -> None:
+        """Test creating review with empty criteria defaults."""
+        data = {"run_each": "step"}
+        review = Review.from_dict(data)
+
+        assert review.quality_criteria == {}
+
+
 class TestStep:
     """Tests for Step dataclass."""
 
@@ -194,6 +228,50 @@ def test_from_dict_exposed_true(self) -> None:
 
         assert step.exposed is True
 
+    def test_from_dict_with_reviews(self) -> None:
+        """Test creating step with reviews."""
+        data = {
+            "id": "step1",
+            "name": "Step 1",
+            "description": "First step",
+            "instructions_file": "steps/step1.md",
+            "outputs": {
+                "output.md": {"type": "file", "description": "An output file"},
+            },
+            "reviews": [
+                {
+                    "run_each": "step",
+                    "quality_criteria": {"Complete": "Is it complete?"},
+                },
+                {
+                    "run_each": "output.md",
+                    "quality_criteria": {"Valid": "Is it valid?"},
+                },
+            ],
+        }
+        step = Step.from_dict(data)
+
+        assert len(step.reviews) == 2
+        assert step.reviews[0].run_each == "step"
+        assert step.reviews[0].quality_criteria == {"Complete": "Is it complete?"}
+        assert step.reviews[1].run_each == "output.md"
+
+    def test_from_dict_empty_reviews(self) -> None:
+        """Test creating step with empty reviews list."""
+        data = {
+            "id": "step1",
+            "name": "Step 1",
+            "description": "First step",
+            "instructions_file": "steps/step1.md",
+            "outputs": {
+                "output.md": {"type": "file", "description": "An output file"},
+            },
+            "reviews": [],
+        }
+        step = Step.from_dict(data)
+
+        assert step.reviews == []
+
 
 class TestJobDefinition:
     """Tests for JobDefinition dataclass."""
@@ -319,6 +397,64 @@ def test_validate_file_inputs_missing_step(self) -> None:
         with pytest.raises(ParseError, match="references non-existent step"):
             job.validate_file_inputs()
 
+    def test_validate_reviews_valid(self) -> None:
+        """Test that validate_reviews passes for valid run_each values."""
+        job = JobDefinition(
+            name="test_job",
+            version="1.0.0",
+            summary="Test job",
+            description="Test",
+            steps=[
+                Step(
+                    id="step1",
+                    name="Step 1",
+                    description="Step",
+                    instructions_file="steps/step1.md",
+                    outputs=[
+                        OutputSpec(name="report.md", type="file", description="Report")
+                    ],
+                    reviews=[
+                        Review(run_each="step", quality_criteria={"Complete": "Is it?"}),
+                        Review(run_each="report.md", quality_criteria={"Valid": "Is it?"}),
+                    ],
+                )
+            ],
+            job_dir=Path("/tmp"),
+        )
+
+        # Should not raise
+        job.validate_reviews()
+
+    def test_validate_reviews_invalid_run_each(self) -> None:
+        """Test that validate_reviews fails for invalid run_each."""
+        job = JobDefinition(
+            name="test_job",
+            version="1.0.0",
+            summary="Test job",
+            description="Test",
+            steps=[
+                Step(
+                    id="step1",
+                    name="Step 1",
+                    description="Step",
+                    instructions_file="steps/step1.md",
+                    outputs=[
+                        OutputSpec(name="report.md", type="file", description="Report")
+                    ],
+                    reviews=[
+                        Review(
+                            run_each="nonexistent_output",
+                            quality_criteria={"Test": "Is it?"},
+                        ),
+                    ],
+                )
+            ],
+            job_dir=Path("/tmp"),
+        )
+
+        with pytest.raises(ParseError, match="run_each='nonexistent_output'"):
+            job.validate_reviews()
+
     def test_validate_file_inputs_not_in_dependencies(self) -> None:
         """Test file input validation fails if from_step not in dependencies."""
         job = JobDefinition(
diff --git a/tests/unit/test_validation.py b/tests/unit/test_validation.py
index 93fa237b..1a2e8bdc 100644
--- a/tests/unit/test_validation.py
+++ b/tests/unit/test_validation.py
@@ -24,6 +24,7 @@ def test_validates_simple_job(self) -> None:
                     "instructions_file": "steps/step1.md",
                     "outputs": {"output.md": {"type": "file", "description": "Output"}},
                     "dependencies": [],
+                    "reviews": [],
                 }
             ],
         }
@@ -50,6 +51,7 @@ def test_validates_job_with_user_inputs(self) -> None:
                     ],
                     "outputs": {"output.md": {"type": "file", "description": "Output"}},
                     "dependencies": [],
+                    "reviews": [],
                 }
             ],
         }
@@ -71,6 +73,7 @@ def test_validates_job_with_file_inputs(self) -> None:
                     "instructions_file": "steps/step1.md",
                     "outputs": {"data.md": {"type": "file", "description": "Data output"}},
                     "dependencies": [],
+                    "reviews": [],
                 },
                 {
                     "id": "step2",
@@ -80,6 +83,7 @@ def test_validates_job_with_file_inputs(self) -> None:
                     "inputs": [{"file": "data.md", "from_step": "step1"}],
                     "outputs": {"result.md": {"type": "file", "description": "Result output"}},
                     "dependencies": ["step1"],
+                    "reviews": [],
                 },
             ],
         }
@@ -113,6 +117,7 @@ def test_raises_for_invalid_job_name(self) -> None:
                     "description": "Step",
                     "instructions_file": "steps/step1.md",
                     "outputs": {"output.md": {"type": "file", "description": "Output"}},
+                    "reviews": [],
                 }
             ],
         }
@@ -134,6 +139,7 @@ def test_raises_for_invalid_version(self) -> None:
                     "description": "Step",
                     "instructions_file": "steps/step1.md",
                     "outputs": {"output.md": {"type": "file", "description": "Output"}},
+                    "reviews": [],
                 }
             ],
         }
@@ -195,6 +201,7 @@ def test_raises_for_invalid_input_format(self) -> None:
                         }
                     ],
                     "outputs": {"output.md": {"type": "file", "description": "Output"}},
+                    "reviews": [],
                 }
             ],
         }
@@ -211,3 +218,116 @@ def test_validates_complex_job(self, fixtures_dir) -> None:
 
         assert job_data is not None
         validate_against_schema(job_data, JOB_SCHEMA)
+
+    def test_raises_for_step_missing_reviews(self) -> None:
+        """Test that validation fails for step without reviews field."""
+        job_data = {
+            "name": "job",
+            "version": "1.0.0",
+            "summary": "Missing reviews test",
+            "description": "Job",
+            "steps": [
+                {
+                    "id": "step1",
+                    "name": "Step 1",
+                    "description": "Step",
+                    "instructions_file": "steps/step1.md",
+                    "outputs": {"output.md": {"type": "file", "description": "Output"}},
+                    # Missing reviews - now required
+                }
+            ],
+        }
+
+        with pytest.raises(ValidationError, match="'reviews' is a required property"):
+            validate_against_schema(job_data, JOB_SCHEMA)
+
+    def test_validates_job_with_reviews(self) -> None:
+        """Test validation of job with reviews."""
+        job_data = {
+            "name": "job_with_reviews",
+            "version": "1.0.0",
+            "summary": "Job with reviews",
+            "description": "Job",
+            "steps": [
+                {
+                    "id": "step1",
+                    "name": "Step 1",
+                    "description": "Step",
+                    "instructions_file": "steps/step1.md",
+                    "outputs": {
+                        "report.md": {"type": "file", "description": "Report"},
+                    },
+                    "reviews": [
+                        {
+                            "run_each": "step",
+                            "quality_criteria": {
+                                "Complete": "Is it complete?",
+                                "Valid": "Is it valid?",
+                            },
+                        },
+                        {
+                            "run_each": "report.md",
+                            "quality_criteria": {
+                                "Well Written": "Is it well written?",
+                            },
+                        },
+                    ],
+                }
+            ],
+        }
+
+        validate_against_schema(job_data, JOB_SCHEMA)
+
+    def test_raises_for_review_missing_run_each(self) -> None:
+        """Test validation fails for review without run_each."""
+        job_data = {
+            "name": "job",
+            "version": "1.0.0",
+            "summary": "Test",
+            "description": "Job",
+            "steps": [
+                {
+                    "id": "step1",
+                    "name": "Step 1",
+                    "description": "Step",
+                    "instructions_file": "steps/step1.md",
+                    "outputs": {"output.md": {"type": "file", "description": "Output"}},
+                    "reviews": [
+                        {
+                            # Missing run_each
+                            "quality_criteria": {"Test": "Is it tested?"},
+                        }
+                    ],
+                }
+            ],
+        }
+
+        with pytest.raises(ValidationError):
+            validate_against_schema(job_data, JOB_SCHEMA)
+
+    def test_raises_for_review_empty_criteria(self) -> None:
+        """Test validation fails for review with empty quality_criteria."""
+        job_data = {
+            "name": "job",
+            "version": "1.0.0",
+            "summary": "Test",
+            "description": "Job",
+            "steps": [
+                {
+                    "id": "step1",
+                    "name": "Step 1",
+                    "description": "Step",
+                    "instructions_file": "steps/step1.md",
+                    "outputs": {"output.md": {"type": "file", "description": "Output"}},
+                    "reviews": [
+                        {
+                            "run_each": "step",
+                            "quality_criteria": {},  # Empty - minProperties: 1
+                        }
+                    ],
+                }
+            ],
+        }
+
+        with pytest.raises(ValidationError):
+            validate_against_schema(job_data, JOB_SCHEMA)
diff --git a/uv.lock b/uv.lock
index 0282e238..abb2d5c0 100644
--- a/uv.lock
+++ b/uv.lock
@@ -482,6 +482,7 @@ dev = [
 
 [package.dev-dependencies]
 dev = [
+    { name = "fpdf2" },
     { name = "pytest" },
     { name = "pytest-asyncio" },
     { name = "pytest-mock" },
@@ -512,11 +513,21 @@ provides-extras = ["dev"]
 
 [package.metadata.requires-dev]
 dev = [
+    { name = "fpdf2", specifier = ">=2.8.5" },
     { name = "pytest", specifier = ">=9.0.2" },
     { name = "pytest-asyncio", specifier = ">=1.3.0" },
     { name = "pytest-mock", specifier = ">=3.15.1" },
 ]
 
+[[package]]
+name = "defusedxml"
+version = "0.7.1"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/0f/d5/c66da9b79e5bdb124974bfe172b4daf3c984ebd9c2a06e2b8a4dc7331c72/defusedxml-0.7.1.tar.gz", hash = "sha256:1bb3032db185915b62d7c6209c5a8792be6a32ab2fedacc84e01b52c51aa3e69", size = 75520, upload-time = "2021-03-08T10:59:26.269Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/07/6c/aa3f2f849e01cb6a001cd8554a88d4c77c5c1a31c95bdf1cf9301e6d9ef4/defusedxml-0.7.1-py2.py3-none-any.whl", hash = "sha256:a352e7e428770286cc899e2542b6cdaedb2b4953ff269a210103ec58f6198a61", size = 25604, upload-time = "2021-03-08T10:59:24.45Z" },
+]
+
 [[package]]
 name = "diskcache"
 version = "5.6.3"
@@ -625,6 +636,69 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/e5/c1/1a35ec68ff76ea8443aa115b18bcdee748a4ada2124537ee90522899ff9f/fastmcp-2.14.5-py3-none-any.whl", hash = "sha256:d81e8ec813f5089d3624bec93944beaefa86c0c3a4ef1111cbef676a761ebccf", size = 417784, upload-time = "2026-02-03T15:35:18.489Z" },
 ]
 
+[[package]]
+name = "fonttools"
+version = "4.61.1"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/ec/ca/cf17b88a8df95691275a3d77dc0a5ad9907f328ae53acbe6795da1b2f5ed/fonttools-4.61.1.tar.gz", hash = "sha256:6675329885c44657f826ef01d9e4fb33b9158e9d93c537d84ad8399539bc6f69", size = 3565756, upload-time = "2025-12-12T17:31:24.246Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/69/12/bf9f4eaa2fad039356cc627587e30ed008c03f1cebd3034376b5ee8d1d44/fonttools-4.61.1-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:c6604b735bb12fef8e0efd5578c9fb5d3d8532d5001ea13a19cddf295673ee09", size = 2852213, upload-time = "2025-12-12T17:29:46.675Z" },
+    { url = "https://files.pythonhosted.org/packages/ac/49/4138d1acb6261499bedde1c07f8c2605d1d8f9d77a151e5507fd3ef084b6/fonttools-4.61.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:5ce02f38a754f207f2f06557523cd39a06438ba3aafc0639c477ac409fc64e37", size = 2401689, upload-time = "2025-12-12T17:29:48.769Z" },
+    { url = "https://files.pythonhosted.org/packages/e5/fe/e6ce0fe20a40e03aef906af60aa87668696f9e4802fa283627d0b5ed777f/fonttools-4.61.1-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:77efb033d8d7ff233385f30c62c7c79271c8885d5c9657d967ede124671bbdfb", size = 5058809, upload-time = "2025-12-12T17:29:51.701Z" },
+    { url = "https://files.pythonhosted.org/packages/79/61/1ca198af22f7dd22c17ab86e9024ed3c06299cfdb08170640e9996d501a0/fonttools-4.61.1-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:75c1a6dfac6abd407634420c93864a1e274ebc1c7531346d9254c0d8f6ca00f9", size = 5036039, upload-time = "2025-12-12T17:29:53.659Z" },
+    { url = "https://files.pythonhosted.org/packages/99/cc/fa1801e408586b5fce4da9f5455af8d770f4fc57391cd5da7256bb364d38/fonttools-4.61.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:0de30bfe7745c0d1ffa2b0b7048fb7123ad0d71107e10ee090fa0b16b9452e87", size = 5034714, upload-time = "2025-12-12T17:29:55.592Z" },
+    { url = "https://files.pythonhosted.org/packages/bf/aa/b7aeafe65adb1b0a925f8f25725e09f078c635bc22754f3fecb7456955b0/fonttools-4.61.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:58b0ee0ab5b1fc9921eccfe11d1435added19d6494dde14e323f25ad2bc30c56", size = 5158648, upload-time = "2025-12-12T17:29:57.861Z" },
+    { url = "https://files.pythonhosted.org/packages/99/f9/08ea7a38663328881384c6e7777bbefc46fd7d282adfd87a7d2b84ec9d50/fonttools-4.61.1-cp311-cp311-win32.whl", hash = "sha256:f79b168428351d11e10c5aeb61a74e1851ec221081299f4cf56036a95431c43a", size = 2280681, upload-time = "2025-12-12T17:29:59.943Z" },
+    { url = "https://files.pythonhosted.org/packages/07/ad/37dd1ae5fa6e01612a1fbb954f0927681f282925a86e86198ccd7b15d515/fonttools-4.61.1-cp311-cp311-win_amd64.whl", hash = "sha256:fe2efccb324948a11dd09d22136fe2ac8a97d6c1347cf0b58a911dcd529f66b7", size = 2331951, upload-time = "2025-12-12T17:30:02.254Z" },
+    { url = "https://files.pythonhosted.org/packages/6f/16/7decaa24a1bd3a70c607b2e29f0adc6159f36a7e40eaba59846414765fd4/fonttools-4.61.1-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:f3cb4a569029b9f291f88aafc927dd53683757e640081ca8c412781ea144565e", size = 2851593, upload-time = "2025-12-12T17:30:04.225Z" },
+    { url = "https://files.pythonhosted.org/packages/94/98/3c4cb97c64713a8cf499b3245c3bf9a2b8fd16a3e375feff2aed78f96259/fonttools-4.61.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:41a7170d042e8c0024703ed13b71893519a1a6d6e18e933e3ec7507a2c26a4b2", size = 2400231, upload-time = "2025-12-12T17:30:06.47Z" },
+    { url = "https://files.pythonhosted.org/packages/b7/37/82dbef0f6342eb01f54bca073ac1498433d6ce71e50c3c3282b655733b31/fonttools-4.61.1-cp312-cp312-manylinux1_x86_64.manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:10d88e55330e092940584774ee5e8a6971b01fc2f4d3466a1d6c158230880796", size = 4954103, upload-time = "2025-12-12T17:30:08.432Z" },
+    { url = "https://files.pythonhosted.org/packages/6c/44/f3aeac0fa98e7ad527f479e161aca6c3a1e47bb6996b053d45226fe37bf2/fonttools-4.61.1-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:15acc09befd16a0fb8a8f62bc147e1a82817542d72184acca9ce6e0aeda9fa6d", size = 5004295, upload-time = "2025-12-12T17:30:10.56Z" },
+    { url = "https://files.pythonhosted.org/packages/14/e8/7424ced75473983b964d09f6747fa09f054a6d656f60e9ac9324cf40c743/fonttools-4.61.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:e6bcdf33aec38d16508ce61fd81838f24c83c90a1d1b8c68982857038673d6b8", size = 4944109, upload-time = "2025-12-12T17:30:12.874Z" },
+    { url = "https://files.pythonhosted.org/packages/c8/8b/6391b257fa3d0b553d73e778f953a2f0154292a7a7a085e2374b111e5410/fonttools-4.61.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:5fade934607a523614726119164ff621e8c30e8fa1ffffbbd358662056ba69f0", size = 5093598, upload-time = "2025-12-12T17:30:15.79Z" },
+    { url = "https://files.pythonhosted.org/packages/d9/71/fd2ea96cdc512d92da5678a1c98c267ddd4d8c5130b76d0f7a80f9a9fde8/fonttools-4.61.1-cp312-cp312-win32.whl", hash = "sha256:75da8f28eff26defba42c52986de97b22106cb8f26515b7c22443ebc9c2d3261", size = 2269060, upload-time = "2025-12-12T17:30:18.058Z" },
+    { url = "https://files.pythonhosted.org/packages/80/3b/a3e81b71aed5a688e89dfe0e2694b26b78c7d7f39a5ffd8a7d75f54a12a8/fonttools-4.61.1-cp312-cp312-win_amd64.whl", hash = "sha256:497c31ce314219888c0e2fce5ad9178ca83fe5230b01a5006726cdf3ac9f24d9", size = 2319078, upload-time = "2025-12-12T17:30:22.862Z" },
+    { url = "https://files.pythonhosted.org/packages/4b/cf/00ba28b0990982530addb8dc3e9e6f2fa9cb5c20df2abdda7baa755e8fe1/fonttools-4.61.1-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:8c56c488ab471628ff3bfa80964372fc13504ece601e0d97a78ee74126b2045c", size = 2846454, upload-time = "2025-12-12T17:30:24.938Z" },
+    { url = "https://files.pythonhosted.org/packages/5a/ca/468c9a8446a2103ae645d14fee3f610567b7042aba85031c1c65e3ef7471/fonttools-4.61.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:dc492779501fa723b04d0ab1f5be046797fee17d27700476edc7ee9ae535a61e", size = 2398191, upload-time = "2025-12-12T17:30:27.343Z" },
+    { url = "https://files.pythonhosted.org/packages/a3/4b/d67eedaed19def5967fade3297fed8161b25ba94699efc124b14fb68cdbc/fonttools-4.61.1-cp313-cp313-manylinux1_x86_64.manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:64102ca87e84261419c3747a0d20f396eb024bdbeb04c2bfb37e2891f5fadcb5", size = 4928410, upload-time = "2025-12-12T17:30:29.771Z" },
+    { url = "https://files.pythonhosted.org/packages/b0/8d/6fb3494dfe61a46258cd93d979cf4725ded4eb46c2a4ca35e4490d84daea/fonttools-4.61.1-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:4c1b526c8d3f615a7b1867f38a9410849c8f4aef078535742198e942fba0e9bd", size = 4984460, upload-time = "2025-12-12T17:30:32.073Z" },
+    { url = "https://files.pythonhosted.org/packages/f7/f1/a47f1d30b3dc00d75e7af762652d4cbc3dff5c2697a0dbd5203c81afd9c3/fonttools-4.61.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:41ed4b5ec103bd306bb68f81dc166e77409e5209443e5773cb4ed837bcc9b0d3", size = 4925800, upload-time = "2025-12-12T17:30:34.339Z" },
+    { url = "https://files.pythonhosted.org/packages/a7/01/e6ae64a0981076e8a66906fab01539799546181e32a37a0257b77e4aa88b/fonttools-4.61.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:b501c862d4901792adaec7c25b1ecc749e2662543f68bb194c42ba18d6eec98d", size = 5067859, upload-time = "2025-12-12T17:30:36.593Z" },
+    { url = "https://files.pythonhosted.org/packages/73/aa/28e40b8d6809a9b5075350a86779163f074d2b617c15d22343fce81918db/fonttools-4.61.1-cp313-cp313-win32.whl", hash = "sha256:4d7092bb38c53bbc78e9255a59158b150bcdc115a1e3b3ce0b5f267dc35dd63c", size = 2267821, upload-time = "2025-12-12T17:30:38.478Z" },
+    { url = "https://files.pythonhosted.org/packages/1a/59/453c06d1d83dc0951b69ef692d6b9f1846680342927df54e9a1ca91c6f90/fonttools-4.61.1-cp313-cp313-win_amd64.whl", hash = "sha256:21e7c8d76f62ab13c9472ccf74515ca5b9a761d1bde3265152a6dc58700d895b", size = 2318169, upload-time = "2025-12-12T17:30:40.951Z" },
+    { url = "https://files.pythonhosted.org/packages/32/8f/4e7bf82c0cbb738d3c2206c920ca34ca74ef9dabde779030145d28665104/fonttools-4.61.1-cp314-cp314-macosx_10_15_universal2.whl", hash = "sha256:fff4f534200a04b4a36e7ae3cb74493afe807b517a09e99cb4faa89a34ed6ecd", size = 2846094, upload-time = "2025-12-12T17:30:43.511Z" },
+    { url = "https://files.pythonhosted.org/packages/71/09/d44e45d0a4f3a651f23a1e9d42de43bc643cce2971b19e784cc67d823676/fonttools-4.61.1-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:d9203500f7c63545b4ce3799319fe4d9feb1a1b89b28d3cb5abd11b9dd64147e", size = 2396589, upload-time = "2025-12-12T17:30:45.681Z" },
+    { url = "https://files.pythonhosted.org/packages/89/18/58c64cafcf8eb677a99ef593121f719e6dcbdb7d1c594ae5a10d4997ca8a/fonttools-4.61.1-cp314-cp314-manylinux1_x86_64.manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:fa646ecec9528bef693415c79a86e733c70a4965dd938e9a226b0fc64c9d2e6c", size = 4877892, upload-time = "2025-12-12T17:30:47.709Z" },
+    { url = "https://files.pythonhosted.org/packages/8a/ec/9e6b38c7ba1e09eb51db849d5450f4c05b7e78481f662c3b79dbde6f3d04/fonttools-4.61.1-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:11f35ad7805edba3aac1a3710d104592df59f4b957e30108ae0ba6c10b11dd75", size = 4972884, upload-time = "2025-12-12T17:30:49.656Z" },
+    { url = "https://files.pythonhosted.org/packages/5e/87/b5339da8e0256734ba0dbbf5b6cdebb1dd79b01dc8c270989b7bcd465541/fonttools-4.61.1-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:b931ae8f62db78861b0ff1ac017851764602288575d65b8e8ff1963fed419063", size = 4924405, upload-time = "2025-12-12T17:30:51.735Z" },
+    { url = "https://files.pythonhosted.org/packages/0b/47/e3409f1e1e69c073a3a6fd8cb886eb18c0bae0ee13db2c8d5e7f8495e8b7/fonttools-4.61.1-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:b148b56f5de675ee16d45e769e69f87623a4944f7443850bf9a9376e628a89d2", size = 5035553, upload-time = "2025-12-12T17:30:54.823Z" },
+    { url = "https://files.pythonhosted.org/packages/bf/b6/1f6600161b1073a984294c6c031e1a56ebf95b6164249eecf30012bb2e38/fonttools-4.61.1-cp314-cp314-win32.whl", hash = "sha256:9b666a475a65f4e839d3d10473fad6d47e0a9db14a2f4a224029c5bfde58ad2c", size = 2271915, upload-time = "2025-12-12T17:30:57.913Z" },
+    { url = "https://files.pythonhosted.org/packages/52/7b/91e7b01e37cc8eb0e1f770d08305b3655e4f002fc160fb82b3390eabacf5/fonttools-4.61.1-cp314-cp314-win_amd64.whl", hash = "sha256:4f5686e1fe5fce75d82d93c47a438a25bf0d1319d2843a926f741140b2b16e0c", size = 2323487, upload-time = "2025-12-12T17:30:59.804Z" },
+    { url = "https://files.pythonhosted.org/packages/39/5c/908ad78e46c61c3e3ed70c3b58ff82ab48437faf84ec84f109592cabbd9f/fonttools-4.61.1-cp314-cp314t-macosx_10_15_universal2.whl", hash = "sha256:e76ce097e3c57c4bcb67c5aa24a0ecdbd9f74ea9219997a707a4061fbe2707aa", size = 2929571, upload-time = "2025-12-12T17:31:02.574Z" },
+    { url = "https://files.pythonhosted.org/packages/bd/41/975804132c6dea64cdbfbaa59f3518a21c137a10cccf962805b301ac6ab2/fonttools-4.61.1-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:9cfef3ab326780c04d6646f68d4b4742aae222e8b8ea1d627c74e38afcbc9d91", size = 2435317, upload-time = "2025-12-12T17:31:04.974Z" },
+    { url = "https://files.pythonhosted.org/packages/b0/5a/aef2a0a8daf1ebaae4cfd83f84186d4a72ee08fd6a8451289fcd03ffa8a4/fonttools-4.61.1-cp314-cp314t-manylinux1_x86_64.manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:a75c301f96db737e1c5ed5fd7d77d9c34466de16095a266509e13da09751bd19", size = 4882124, upload-time = "2025-12-12T17:31:07.456Z" },
+    { url = "https://files.pythonhosted.org/packages/80/33/d6db3485b645b81cea538c9d1c9219d5805f0877fda18777add4671c5240/fonttools-4.61.1-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:91669ccac46bbc1d09e9273546181919064e8df73488ea087dcac3e2968df9ba", size = 5100391, upload-time = "2025-12-12T17:31:09.732Z" },
+    { url = "https://files.pythonhosted.org/packages/6c/d6/675ba631454043c75fcf76f0ca5463eac8eb0666ea1d7badae5fea001155/fonttools-4.61.1-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:c33ab3ca9d3ccd581d58e989d67554e42d8d4ded94ab3ade3508455fe70e65f7", size = 4978800, upload-time = "2025-12-12T17:31:11.681Z" },
+    { url = "https://files.pythonhosted.org/packages/7f/33/d3ec753d547a8d2bdaedd390d4a814e8d5b45a093d558f025c6b990b554c/fonttools-4.61.1-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:664c5a68ec406f6b1547946683008576ef8b38275608e1cee6c061828171c118", size = 5006426, upload-time = "2025-12-12T17:31:13.764Z" },
+    { url = "https://files.pythonhosted.org/packages/b4/40/cc11f378b561a67bea850ab50063366a0d1dd3f6d0a30ce0f874b0ad5664/fonttools-4.61.1-cp314-cp314t-win32.whl", hash = "sha256:aed04cabe26f30c1647ef0e8fbb207516fd40fe9472e9439695f5c6998e60ac5", size = 2335377, upload-time = "2025-12-12T17:31:16.49Z" },
+    { url = "https://files.pythonhosted.org/packages/e4/ff/c9a2b66b39f8628531ea58b320d66d951267c98c6a38684daa8f50fb02f8/fonttools-4.61.1-cp314-cp314t-win_amd64.whl", hash = "sha256:2180f14c141d2f0f3da43f3a81bc8aa4684860f6b0e6f9e165a4831f24e6a23b", size = 2400613, upload-time = "2025-12-12T17:31:18.769Z" },
+    { url = "https://files.pythonhosted.org/packages/c7/4e/ce75a57ff3aebf6fc1f4e9d508b8e5810618a33d900ad6c19eb30b290b97/fonttools-4.61.1-py3-none-any.whl", hash = "sha256:17d2bf5d541add43822bcf0c43d7d847b160c9bb01d15d5007d84e2217aaa371", size = 1148996, upload-time = "2025-12-12T17:31:21.03Z" },
+]
+
+[[package]]
+name = "fpdf2"
+version = "2.8.5"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "defusedxml" },
+    { name = "fonttools" },
+    { name = "pillow" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/e9/c0/784b130a28f4ed612e9aff26d1118e1f91005713dcd0a35e60b54d316b56/fpdf2-2.8.5.tar.gz", hash = "sha256:af4491ef2e0a5fe476f9d61362925658949c995f7e804438c0e81008f1550247", size = 336046, upload-time = "2025-10-29T14:17:59.569Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/35/a7/8532d8fffe6d1c388ad4941d678dd0da4d8da80434f2dbf4f35de0fa8029/fpdf2-2.8.5-py3-none-any.whl", hash = "sha256:2356b94e2a5fcbd1fe53ac5cbb83494e9003308860ab180050255ba50961d913", size = 301627, upload-time = "2025-10-29T14:17:57.685Z" },
+]
+
 [[package]]
 name = "gitdb"
 version = "4.0.12"
@@ -1215,6 +1289,93 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/9a/70/875f4a23bfc4731703a5835487d0d2fb999031bd415e7d17c0ae615c18b7/pathvalidate-3.3.1-py3-none-any.whl", hash = "sha256:5263baab691f8e1af96092fa5137ee17df5bdfbd6cff1fcac4d6ef4bc2e1735f", size = 24305, upload-time = "2025-06-15T09:07:19.117Z" },
 ]
 
+[[package]]
+name = "pillow"
+version = "12.1.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/d0/02/d52c733a2452ef1ffcc123b68e6606d07276b0e358db70eabad7e40042b7/pillow-12.1.0.tar.gz", hash = "sha256:5c5ae0a06e9ea030ab786b0251b32c7e4ce10e58d983c0d5c56029455180b5b9", size = 46977283, upload-time = "2026-01-02T09:13:29.892Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/43/c4/bf8328039de6cc22182c3ef007a2abfbbdab153661c0a9aa78af8d706391/pillow-12.1.0-cp311-cp311-macosx_10_10_x86_64.whl", hash = "sha256:a83e0850cb8f5ac975291ebfc4170ba481f41a28065277f7f735c202cd8e0af3", size = 5304057, upload-time = "2026-01-02T09:10:46.627Z" },
+    { url = "https://files.pythonhosted.org/packages/43/06/7264c0597e676104cc22ca73ee48f752767cd4b1fe084662620b17e10120/pillow-12.1.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:b6e53e82ec2db0717eabb276aa56cf4e500c9a7cec2c2e189b55c24f65a3e8c0", size = 4657811, upload-time = "2026-01-02T09:10:49.548Z" },
+    { url = "https://files.pythonhosted.org/packages/72/64/f9189e44474610daf83da31145fa56710b627b5c4c0b9c235e34058f6b31/pillow-12.1.0-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:40a8e3b9e8773876d6e30daed22f016509e3987bab61b3b7fe309d7019a87451", size = 6232243, upload-time = "2026-01-02T09:10:51.62Z" },
+    { url = "https://files.pythonhosted.org/packages/ef/30/0df458009be6a4caca4ca2c52975e6275c387d4e5c95544e34138b41dc86/pillow-12.1.0-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:800429ac32c9b72909c671aaf17ecd13110f823ddb7db4dfef412a5587c2c24e", size = 8037872, upload-time = "2026-01-02T09:10:53.446Z" },
+    { url = "https://files.pythonhosted.org/packages/e4/86/95845d4eda4f4f9557e25381d70876aa213560243ac1a6d619c46caaedd9/pillow-12.1.0-cp311-cp311-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:0b022eaaf709541b391ee069f0022ee5b36c709df71986e3f7be312e46f42c84", size = 6345398, upload-time = "2026-01-02T09:10:55.426Z" },
+    { url = "https://files.pythonhosted.org/packages/5c/1f/8e66ab9be3aaf1435bc03edd1ebdf58ffcd17f7349c1d970cafe87af27d9/pillow-12.1.0-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:1f345e7bc9d7f368887c712aa5054558bad44d2a301ddf9248599f4161abc7c0", size = 7034667, upload-time = "2026-01-02T09:10:57.11Z" },
+    { url = "https://files.pythonhosted.org/packages/f9/f6/683b83cb9b1db1fb52b87951b1c0b99bdcfceaa75febf11406c19f82cb5e/pillow-12.1.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:d70347c8a5b7ccd803ec0c85c8709f036e6348f1e6a5bf048ecd9c64d3550b8b", size = 6458743, upload-time = "2026-01-02T09:10:59.331Z" },
+    { url = "https://files.pythonhosted.org/packages/9a/7d/de833d63622538c1d58ce5395e7c6cb7e7dce80decdd8bde4a484e095d9f/pillow-12.1.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:1fcc52d86ce7a34fd17cb04e87cfdb164648a3662a6f20565910a99653d66c18", size = 7159342, upload-time = "2026-01-02T09:11:01.82Z" },
+    { url = "https://files.pythonhosted.org/packages/8c/40/50d86571c9e5868c42b81fe7da0c76ca26373f3b95a8dd675425f4a92ec1/pillow-12.1.0-cp311-cp311-win32.whl", hash = "sha256:3ffaa2f0659e2f740473bcf03c702c39a8d4b2b7ffc629052028764324842c64", size = 6328655, upload-time = "2026-01-02T09:11:04.556Z" },
+    { url = "https://files.pythonhosted.org/packages/6c/af/b1d7e301c4cd26cd45d4af884d9ee9b6fab893b0ad2450d4746d74a6968c/pillow-12.1.0-cp311-cp311-win_amd64.whl", hash = "sha256:806f3987ffe10e867bab0ddad45df1148a2b98221798457fa097ad85d6e8bc75", size = 7031469, upload-time = "2026-01-02T09:11:06.538Z" },
+    { url = "https://files.pythonhosted.org/packages/48/36/d5716586d887fb2a810a4a61518a327a1e21c8b7134c89283af272efe84b/pillow-12.1.0-cp311-cp311-win_arm64.whl", hash = "sha256:9f5fefaca968e700ad1a4a9de98bf0869a94e397fe3524c4c9450c1445252304", size = 2452515, upload-time = "2026-01-02T09:11:08.226Z" },
+    { url = "https://files.pythonhosted.org/packages/20/31/dc53fe21a2f2996e1b7d92bf671cdb157079385183ef7c1ae08b485db510/pillow-12.1.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:a332ac4ccb84b6dde65dbace8431f3af08874bf9770719d32a635c4ef411b18b", size = 5262642, upload-time = "2026-01-02T09:11:10.138Z" },
+    { url = "https://files.pythonhosted.org/packages/ab/c1/10e45ac9cc79419cedf5121b42dcca5a50ad2b601fa080f58c22fb27626e/pillow-12.1.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:907bfa8a9cb790748a9aa4513e37c88c59660da3bcfffbd24a7d9e6abf224551", size = 4657464, upload-time = "2026-01-02T09:11:12.319Z" },
+    { url = "https://files.pythonhosted.org/packages/ad/26/7b82c0ab7ef40ebede7a97c72d473bda5950f609f8e0c77b04af574a0ddb/pillow-12.1.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:efdc140e7b63b8f739d09a99033aa430accce485ff78e6d311973a67b6bf3208", size = 6234878, upload-time = "2026-01-02T09:11:14.096Z" },
+    { url = "https://files.pythonhosted.org/packages/76/25/27abc9792615b5e886ca9411ba6637b675f1b77af3104710ac7353fe5605/pillow-12.1.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:bef9768cab184e7ae6e559c032e95ba8d07b3023c289f79a2bd36e8bf85605a5", size = 8044868, upload-time = "2026-01-02T09:11:15.903Z" },
+    { url = "https://files.pythonhosted.org/packages/0a/ea/f200a4c36d836100e7bc738fc48cd963d3ba6372ebc8298a889e0cfc3359/pillow-12.1.0-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:742aea052cf5ab5034a53c3846165bc3ce88d7c38e954120db0ab867ca242661", size = 6349468, upload-time = "2026-01-02T09:11:17.631Z" },
+    { url = "https://files.pythonhosted.org/packages/11/8f/48d0b77ab2200374c66d344459b8958c86693be99526450e7aee714e03e4/pillow-12.1.0-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a6dfc2af5b082b635af6e08e0d1f9f1c4e04d17d4e2ca0ef96131e85eda6eb17", size = 7041518, upload-time = "2026-01-02T09:11:19.389Z" },
+    { url = "https://files.pythonhosted.org/packages/1d/23/c281182eb986b5d31f0a76d2a2c8cd41722d6fb8ed07521e802f9bba52de/pillow-12.1.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:609e89d9f90b581c8d16358c9087df76024cf058fa693dd3e1e1620823f39670", size = 6462829, upload-time = "2026-01-02T09:11:21.28Z" },
+    { url = "https://files.pythonhosted.org/packages/25/ef/7018273e0faac099d7b00982abdcc39142ae6f3bd9ceb06de09779c4a9d6/pillow-12.1.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:43b4899cfd091a9693a1278c4982f3e50f7fb7cff5153b05174b4afc9593b616", size = 7166756, upload-time = "2026-01-02T09:11:23.559Z" },
+    { url = "https://files.pythonhosted.org/packages/8f/c8/993d4b7ab2e341fe02ceef9576afcf5830cdec640be2ac5bee1820d693d4/pillow-12.1.0-cp312-cp312-win32.whl", hash = "sha256:aa0c9cc0b82b14766a99fbe6084409972266e82f459821cd26997a488a7261a7", size = 6328770, upload-time = "2026-01-02T09:11:25.661Z" },
+    { url = "https://files.pythonhosted.org/packages/a7/87/90b358775a3f02765d87655237229ba64a997b87efa8ccaca7dd3e36e7a7/pillow-12.1.0-cp312-cp312-win_amd64.whl", hash = "sha256:d70534cea9e7966169ad29a903b99fc507e932069a881d0965a1a84bb57f6c6d", size = 7033406, upload-time = "2026-01-02T09:11:27.474Z" },
+    { url = "https://files.pythonhosted.org/packages/5d/cf/881b457eccacac9e5b2ddd97d5071fb6d668307c57cbf4e3b5278e06e536/pillow-12.1.0-cp312-cp312-win_arm64.whl", hash = "sha256:65b80c1ee7e14a87d6a068dd3b0aea268ffcabfe0498d38661b00c5b4b22e74c", size = 2452612, upload-time = "2026-01-02T09:11:29.309Z" },
+    { url = "https://files.pythonhosted.org/packages/dd/c7/2530a4aa28248623e9d7f27316b42e27c32ec410f695929696f2e0e4a778/pillow-12.1.0-cp313-cp313-ios_13_0_arm64_iphoneos.whl", hash = "sha256:7b5dd7cbae20285cdb597b10eb5a2c13aa9de6cde9bb64a3c1317427b1db1ae1", size = 4062543, upload-time = "2026-01-02T09:11:31.566Z" },
+    { url = "https://files.pythonhosted.org/packages/8f/1f/40b8eae823dc1519b87d53c30ed9ef085506b05281d313031755c1705f73/pillow-12.1.0-cp313-cp313-ios_13_0_arm64_iphonesimulator.whl", hash = "sha256:29a4cef9cb672363926f0470afc516dbf7305a14d8c54f7abbb5c199cd8f8179", size = 4138373, upload-time = "2026-01-02T09:11:33.367Z" },
+    { url = "https://files.pythonhosted.org/packages/d4/77/6fa60634cf06e52139fd0e89e5bbf055e8166c691c42fb162818b7fda31d/pillow-12.1.0-cp313-cp313-ios_13_0_x86_64_iphonesimulator.whl", hash = "sha256:681088909d7e8fa9e31b9799aaa59ba5234c58e5e4f1951b4c4d1082a2e980e0", size = 3601241, upload-time = "2026-01-02T09:11:35.011Z" },
+    { url = "https://files.pythonhosted.org/packages/4f/bf/28ab865de622e14b747f0cd7877510848252d950e43002e224fb1c9ababf/pillow-12.1.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:983976c2ab753166dc66d36af6e8ec15bb511e4a25856e2227e5f7e00a160587", size = 5262410, upload-time = "2026-01-02T09:11:36.682Z" },
+    { url = "https://files.pythonhosted.org/packages/1c/34/583420a1b55e715937a85bd48c5c0991598247a1fd2eb5423188e765ea02/pillow-12.1.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:db44d5c160a90df2d24a24760bbd37607d53da0b34fb546c4c232af7192298ac", size = 4657312, upload-time = "2026-01-02T09:11:38.535Z" },
+    { url = "https://files.pythonhosted.org/packages/1d/fd/f5a0896839762885b3376ff04878f86ab2b097c2f9a9cdccf4eda8ba8dc0/pillow-12.1.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:6b7a9d1db5dad90e2991645874f708e87d9a3c370c243c2d7684d28f7e133e6b", size = 6232605, upload-time = "2026-01-02T09:11:40.602Z" },
+    { url = "https://files.pythonhosted.org/packages/98/aa/938a09d127ac1e70e6ed467bd03834350b33ef646b31edb7452d5de43792/pillow-12.1.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:6258f3260986990ba2fa8a874f8b6e808cf5abb51a94015ca3dc3c68aa4f30ea", size = 8041617, upload-time = "2026-01-02T09:11:42.721Z" },
+    { url = "https://files.pythonhosted.org/packages/17/e8/538b24cb426ac0186e03f80f78bc8dc7246c667f58b540bdd57c71c9f79d/pillow-12.1.0-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:e115c15e3bc727b1ca3e641a909f77f8ca72a64fff150f666fcc85e57701c26c", size = 6346509, upload-time = "2026-01-02T09:11:44.955Z" },
+    { url = "https://files.pythonhosted.org/packages/01/9a/632e58ec89a32738cabfd9ec418f0e9898a2b4719afc581f07c04a05e3c9/pillow-12.1.0-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:6741e6f3074a35e47c77b23a4e4f2d90db3ed905cb1c5e6e0d49bff2045632bc", size = 7038117, upload-time = "2026-01-02T09:11:46.736Z" },
+    { url = "https://files.pythonhosted.org/packages/c7/a2/d40308cf86eada842ca1f3ffa45d0ca0df7e4ab33c83f81e73f5eaed136d/pillow-12.1.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:935b9d1aed48fcfb3f838caac506f38e29621b44ccc4f8a64d575cb1b2a88644", size = 6460151, upload-time = "2026-01-02T09:11:48.625Z" },
+    { url = "https://files.pythonhosted.org/packages/f1/88/f5b058ad6453a085c5266660a1417bdad590199da1b32fb4efcff9d33b05/pillow-12.1.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:5fee4c04aad8932da9f8f710af2c1a15a83582cfb884152a9caa79d4efcdbf9c", size = 7164534, upload-time = "2026-01-02T09:11:50.445Z" },
+    { url = "https://files.pythonhosted.org/packages/19/ce/c17334caea1db789163b5d855a5735e47995b0b5dc8745e9a3605d5f24c0/pillow-12.1.0-cp313-cp313-win32.whl", hash = "sha256:a786bf667724d84aa29b5db1c61b7bfdde380202aaca12c3461afd6b71743171", size = 6332551, upload-time = "2026-01-02T09:11:52.234Z" },
+    { url = "https://files.pythonhosted.org/packages/e5/07/74a9d941fa45c90a0d9465098fe1ec85de3e2afbdc15cc4766622d516056/pillow-12.1.0-cp313-cp313-win_amd64.whl", hash = "sha256:461f9dfdafa394c59cd6d818bdfdbab4028b83b02caadaff0ffd433faf4c9a7a", size = 7040087, upload-time = "2026-01-02T09:11:54.822Z" },
+    { url = "https://files.pythonhosted.org/packages/88/09/c99950c075a0e9053d8e880595926302575bc742b1b47fe1bbcc8d388d50/pillow-12.1.0-cp313-cp313-win_arm64.whl", hash = "sha256:9212d6b86917a2300669511ed094a9406888362e085f2431a7da985a6b124f45", size = 2452470, upload-time = "2026-01-02T09:11:56.522Z" },
+    { url = "https://files.pythonhosted.org/packages/b5/ba/970b7d85ba01f348dee4d65412476321d40ee04dcb51cd3735b9dc94eb58/pillow-12.1.0-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:00162e9ca6d22b7c3ee8e61faa3c3253cd19b6a37f126cad04f2f88b306f557d", size = 5264816, upload-time = "2026-01-02T09:11:58.227Z" },
+    { url = "https://files.pythonhosted.org/packages/10/60/650f2fb55fdba7a510d836202aa52f0baac633e50ab1cf18415d332188fb/pillow-12.1.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:7d6daa89a00b58c37cb1747ec9fb7ac3bc5ffd5949f5888657dfddde6d1312e0", size = 4660472, upload-time = "2026-01-02T09:12:00.798Z" },
+    { url = "https://files.pythonhosted.org/packages/2b/c0/5273a99478956a099d533c4f46cbaa19fd69d606624f4334b85e50987a08/pillow-12.1.0-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:e2479c7f02f9d505682dc47df8c0ea1fc5e264c4d1629a5d63fe3e2334b89554", size = 6268974, upload-time = "2026-01-02T09:12:02.572Z" },
+    { url = "https://files.pythonhosted.org/packages/b4/26/0bf714bc2e73d5267887d47931d53c4ceeceea6978148ed2ab2a4e6463c4/pillow-12.1.0-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:f188d580bd870cda1e15183790d1cc2fa78f666e76077d103edf048eed9c356e", size = 8073070, upload-time = "2026-01-02T09:12:04.75Z" },
+    { url = "https://files.pythonhosted.org/packages/43/cf/1ea826200de111a9d65724c54f927f3111dc5ae297f294b370a670c17786/pillow-12.1.0-cp313-cp313t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:0fde7ec5538ab5095cc02df38ee99b0443ff0e1c847a045554cf5f9af1f4aa82", size = 6380176, upload-time = "2026-01-02T09:12:06.626Z" },
+    { url = "https://files.pythonhosted.org/packages/03/e0/7938dd2b2013373fd85d96e0f38d62b7a5a262af21ac274250c7ca7847c9/pillow-12.1.0-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0ed07dca4a8464bada6139ab38f5382f83e5f111698caf3191cb8dbf27d908b4", size = 7067061, upload-time = "2026-01-02T09:12:08.624Z" },
+    { url = "https://files.pythonhosted.org/packages/86/ad/a2aa97d37272a929a98437a8c0ac37b3cf012f4f8721e1bd5154699b2518/pillow-12.1.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:f45bd71d1fa5e5749587613037b172e0b3b23159d1c00ef2fc920da6f470e6f0", size = 6491824, upload-time = "2026-01-02T09:12:10.488Z" },
+    { url = "https://files.pythonhosted.org/packages/a4/44/80e46611b288d51b115826f136fb3465653c28f491068a72d3da49b54cd4/pillow-12.1.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:277518bf4fe74aa91489e1b20577473b19ee70fb97c374aa50830b279f25841b", size = 7190911, upload-time = "2026-01-02T09:12:12.772Z" },
+    { url = "https://files.pythonhosted.org/packages/86/77/eacc62356b4cf81abe99ff9dbc7402750044aed02cfd6a503f7c6fc11f3e/pillow-12.1.0-cp313-cp313t-win32.whl", hash = "sha256:7315f9137087c4e0ee73a761b163fc9aa3b19f5f606a7fc08d83fd3e4379af65", size = 6336445, upload-time = "2026-01-02T09:12:14.775Z" },
+    { url = "https://files.pythonhosted.org/packages/e7/3c/57d81d0b74d218706dafccb87a87ea44262c43eef98eb3b164fd000e0491/pillow-12.1.0-cp313-cp313t-win_amd64.whl", hash = "sha256:0ddedfaa8b5f0b4ffbc2fa87b556dc59f6bb4ecb14a53b33f9189713ae8053c0", size = 7045354, upload-time = "2026-01-02T09:12:16.599Z" },
+    { url = "https://files.pythonhosted.org/packages/ac/82/8b9b97bba2e3576a340f93b044a3a3a09841170ab4c1eb0d5c93469fd32f/pillow-12.1.0-cp313-cp313t-win_arm64.whl", hash = "sha256:80941e6d573197a0c28f394753de529bb436b1ca990ed6e765cf42426abc39f8", size = 2454547, upload-time = "2026-01-02T09:12:18.704Z" },
+    { url = "https://files.pythonhosted.org/packages/8c/87/bdf971d8bbcf80a348cc3bacfcb239f5882100fe80534b0ce67a784181d8/pillow-12.1.0-cp314-cp314-ios_13_0_arm64_iphoneos.whl", hash = "sha256:5cb7bc1966d031aec37ddb9dcf15c2da5b2e9f7cc3ca7c54473a20a927e1eb91", size = 4062533, upload-time = "2026-01-02T09:12:20.791Z" },
+    { url = "https://files.pythonhosted.org/packages/ff/4f/5eb37a681c68d605eb7034c004875c81f86ec9ef51f5be4a63eadd58859a/pillow-12.1.0-cp314-cp314-ios_13_0_arm64_iphonesimulator.whl", hash = "sha256:97e9993d5ed946aba26baf9c1e8cf18adbab584b99f452ee72f7ee8acb882796", size = 4138546, upload-time = "2026-01-02T09:12:23.664Z" },
+    { url = "https://files.pythonhosted.org/packages/11/6d/19a95acb2edbace40dcd582d077b991646b7083c41b98da4ed7555b59733/pillow-12.1.0-cp314-cp314-ios_13_0_x86_64_iphonesimulator.whl", hash = "sha256:414b9a78e14ffeb98128863314e62c3f24b8a86081066625700b7985b3f529bd", size = 3601163, upload-time = "2026-01-02T09:12:26.338Z" },
+    { url = "https://files.pythonhosted.org/packages/fc/36/2b8138e51cb42e4cc39c3297713455548be855a50558c3ac2beebdc251dd/pillow-12.1.0-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:e6bdb408f7c9dd2a5ff2b14a3b0bb6d4deb29fb9961e6eb3ae2031ae9a5cec13", size = 5266086, upload-time = "2026-01-02T09:12:28.782Z" },
+    { url = "https://files.pythonhosted.org/packages/53/4b/649056e4d22e1caa90816bf99cef0884aed607ed38075bd75f091a607a38/pillow-12.1.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:3413c2ae377550f5487991d444428f1a8ae92784aac79caa8b1e3b89b175f77e", size = 4657344, upload-time = "2026-01-02T09:12:31.117Z" },
+    { url = "https://files.pythonhosted.org/packages/6c/6b/c5742cea0f1ade0cd61485dc3d81f05261fc2276f537fbdc00802de56779/pillow-12.1.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:e5dcbe95016e88437ecf33544ba5db21ef1b8dd6e1b434a2cb2a3d605299e643", size = 6232114, upload-time = "2026-01-02T09:12:32.936Z" },
+    { url = "https://files.pythonhosted.org/packages/bf/8f/9f521268ce22d63991601aafd3d48d5ff7280a246a1ef62d626d67b44064/pillow-12.1.0-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:d0a7735df32ccbcc98b98a1ac785cc4b19b580be1bdf0aeb5c03223220ea09d5", size = 8042708, upload-time = "2026-01-02T09:12:34.78Z" },
+    { url = "https://files.pythonhosted.org/packages/1a/eb/257f38542893f021502a1bbe0c2e883c90b5cff26cc33b1584a841a06d30/pillow-12.1.0-cp314-cp314-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:0c27407a2d1b96774cbc4a7594129cc027339fd800cd081e44497722ea1179de", size = 6347762, upload-time = "2026-01-02T09:12:36.748Z" },
+    { url = "https://files.pythonhosted.org/packages/c4/5a/8ba375025701c09b309e8d5163c5a4ce0102fa86bbf8800eb0d7ac87bc51/pillow-12.1.0-cp314-cp314-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:15c794d74303828eaa957ff8070846d0efe8c630901a1c753fdc63850e19ecd9", size = 7039265, upload-time = "2026-01-02T09:12:39.082Z" },
+    { url = "https://files.pythonhosted.org/packages/cf/dc/cf5e4cdb3db533f539e88a7bbf9f190c64ab8a08a9bc7a4ccf55067872e4/pillow-12.1.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:c990547452ee2800d8506c4150280757f88532f3de2a58e3022e9b179107862a", size = 6462341, upload-time = "2026-01-02T09:12:40.946Z" },
+    { url = "https://files.pythonhosted.org/packages/d0/47/0291a25ac9550677e22eda48510cfc4fa4b2ef0396448b7fbdc0a6946309/pillow-12.1.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:b63e13dd27da389ed9475b3d28510f0f954bca0041e8e551b2a4eb1eab56a39a", size = 7165395, upload-time = "2026-01-02T09:12:42.706Z" },
+    { url = "https://files.pythonhosted.org/packages/4f/4c/e005a59393ec4d9416be06e6b45820403bb946a778e39ecec62f5b2b991e/pillow-12.1.0-cp314-cp314-win32.whl", hash = "sha256:1a949604f73eb07a8adab38c4fe50791f9919344398bdc8ac6b307f755fc7030", size = 6431413, upload-time = "2026-01-02T09:12:44.944Z" },
+    { url = "https://files.pythonhosted.org/packages/1c/af/f23697f587ac5f9095d67e31b81c95c0249cd461a9798a061ed6709b09b5/pillow-12.1.0-cp314-cp314-win_amd64.whl", hash = "sha256:4f9f6a650743f0ddee5593ac9e954ba1bdbc5e150bc066586d4f26127853ab94", size = 7176779, upload-time = "2026-01-02T09:12:46.727Z" },
+    { url = "https://files.pythonhosted.org/packages/b3/36/6a51abf8599232f3e9afbd16d52829376a68909fe14efe29084445db4b73/pillow-12.1.0-cp314-cp314-win_arm64.whl", hash = "sha256:808b99604f7873c800c4840f55ff389936ef1948e4e87645eaf3fccbc8477ac4", size = 2543105, upload-time = "2026-01-02T09:12:49.243Z" },
+    { url = "https://files.pythonhosted.org/packages/82/54/2e1dd20c8749ff225080d6ba465a0cab4387f5db0d1c5fb1439e2d99923f/pillow-12.1.0-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:bc11908616c8a283cf7d664f77411a5ed2a02009b0097ff8abbba5e79128ccf2", size = 5268571, upload-time = "2026-01-02T09:12:51.11Z" },
+    { url = "https://files.pythonhosted.org/packages/57/61/571163a5ef86ec0cf30d265ac2a70ae6fc9e28413d1dc94fa37fae6bda89/pillow-12.1.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:896866d2d436563fa2a43a9d72f417874f16b5545955c54a64941e87c1376c61", size = 4660426, upload-time = "2026-01-02T09:12:52.865Z" },
+    { url = "https://files.pythonhosted.org/packages/5e/e1/53ee5163f794aef1bf84243f755ee6897a92c708505350dd1923f4afec48/pillow-12.1.0-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:8e178e3e99d3c0ea8fc64b88447f7cac8ccf058af422a6cedc690d0eadd98c51", size = 6269908, upload-time = "2026-01-02T09:12:54.884Z" },
+    { url = "https://files.pythonhosted.org/packages/bc/0b/b4b4106ff0ee1afa1dc599fde6ab230417f800279745124f6c50bcffed8e/pillow-12.1.0-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:079af2fb0c599c2ec144ba2c02766d1b55498e373b3ac64687e43849fbbef5bc", size = 8074733, upload-time = "2026-01-02T09:12:56.802Z" },
+    { url = "https://files.pythonhosted.org/packages/19/9f/80b411cbac4a732439e629a26ad3ef11907a8c7fc5377b7602f04f6fe4e7/pillow-12.1.0-cp314-cp314t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:bdec5e43377761c5dbca620efb69a77f6855c5a379e32ac5b158f54c84212b14", size = 6381431, upload-time = "2026-01-02T09:12:58.823Z" },
+    { url = "https://files.pythonhosted.org/packages/8f/b7/d65c45db463b66ecb6abc17c6ba6917a911202a07662247e1355ce1789e7/pillow-12.1.0-cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:565c986f4b45c020f5421a4cea13ef294dde9509a8577f29b2fc5edc7587fff8", size = 7068529, upload-time = "2026-01-02T09:13:00.885Z" },
+    { url = "https://files.pythonhosted.org/packages/50/96/dfd4cd726b4a45ae6e3c669fc9e49deb2241312605d33aba50499e9d9bd1/pillow-12.1.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:43aca0a55ce1eefc0aefa6253661cb54571857b1a7b2964bd8a1e3ef4b729924", size = 6492981, upload-time = "2026-01-02T09:13:03.314Z" },
+    { url = "https://files.pythonhosted.org/packages/4d/1c/b5dc52cf713ae46033359c5ca920444f18a6359ce1020dd3e9c553ea5bc6/pillow-12.1.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:0deedf2ea233722476b3a81e8cdfbad786f7adbed5d848469fa59fe52396e4ef", size = 7191878, upload-time = "2026-01-02T09:13:05.276Z" },
+    { url = "https://files.pythonhosted.org/packages/53/26/c4188248bd5edaf543864fe4834aebe9c9cb4968b6f573ce014cc42d0720/pillow-12.1.0-cp314-cp314t-win32.whl", hash = "sha256:b17fbdbe01c196e7e159aacb889e091f28e61020a8abeac07b68079b6e626988", size = 6438703, upload-time = "2026-01-02T09:13:07.491Z" },
+    { url = "https://files.pythonhosted.org/packages/b8/0e/69ed296de8ea05cb03ee139cee600f424ca166e632567b2d66727f08c7ed/pillow-12.1.0-cp314-cp314t-win_amd64.whl", hash = "sha256:27b9baecb428899db6c0de572d6d305cfaf38ca1596b5c0542a5182e3e74e8c6", size = 7182927, upload-time = "2026-01-02T09:13:09.841Z" },
+    { url = "https://files.pythonhosted.org/packages/fc/f5/68334c015eed9b5cff77814258717dec591ded209ab5b6fb70e2ae873d1d/pillow-12.1.0-cp314-cp314t-win_arm64.whl", hash = "sha256:f61333d817698bdcdd0f9d7793e365ac3d2a21c1f1eb02b32ad6aefb8d8ea831", size = 2545104, upload-time = "2026-01-02T09:13:12.068Z" },
+    { url = "https://files.pythonhosted.org/packages/8b/bc/224b1d98cffd7164b14707c91aac83c07b047fbd8f58eba4066a3e53746a/pillow-12.1.0-pp311-pypy311_pp73-macosx_10_15_x86_64.whl", hash = "sha256:ca94b6aac0d7af2a10ba08c0f888b3d5114439b6b3ef39968378723622fed377", size = 5228605, upload-time = "2026-01-02T09:13:14.084Z" },
+    { url = "https://files.pythonhosted.org/packages/0c/ca/49ca7769c4550107de049ed85208240ba0f330b3f2e316f24534795702ce/pillow-12.1.0-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:351889afef0f485b84078ea40fe33727a0492b9af3904661b0abbafee0355b72", size = 4622245, upload-time = "2026-01-02T09:13:15.964Z" },
+    { url = "https://files.pythonhosted.org/packages/73/48/fac807ce82e5955bcc2718642b94b1bd22a82a6d452aea31cbb678cddf12/pillow-12.1.0-pp311-pypy311_pp73-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:bb0984b30e973f7e2884362b7d23d0a348c7143ee559f38ef3eaab640144204c", size = 5247593, upload-time = "2026-01-02T09:13:17.913Z" },
+    { url = "https://files.pythonhosted.org/packages/d2/95/3e0742fe358c4664aed4fd05d5f5373dcdad0b27af52aa0972568541e3f4/pillow-12.1.0-pp311-pypy311_pp73-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:84cabc7095dd535ca934d57e9ce2a72ffd216e435a84acb06b2277b1de2689bd", size = 6989008, upload-time = "2026-01-02T09:13:20.083Z" },
+    { url = "https://files.pythonhosted.org/packages/5a/74/fe2ac378e4e202e56d50540d92e1ef4ff34ed687f3c60f6a121bcf99437e/pillow-12.1.0-pp311-pypy311_pp73-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:53d8b764726d3af1a138dd353116f774e3862ec7e3794e0c8781e30db0f35dfc", size = 5313824, upload-time = "2026-01-02T09:13:22.405Z" },
+    { url = "https://files.pythonhosted.org/packages/f3/77/2a60dee1adee4e2655ac328dd05c02a955c1cd683b9f1b82ec3feb44727c/pillow-12.1.0-pp311-pypy311_pp73-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:5da841d81b1a05ef940a8567da92decaa15bc4d7dedb540a8c219ad83d91808a", size = 5963278, upload-time = "2026-01-02T09:13:24.706Z" },
+    { url = "https://files.pythonhosted.org/packages/2d/71/64e9b1c7f04ae0027f788a248e6297d7fcc29571371fe7d45495a78172c0/pillow-12.1.0-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:75af0b4c229ac519b155028fa1be632d812a519abba9b46b20e50c6caa184f19", size = 7029809, upload-time = "2026-01-02T09:13:26.541Z" },
+]
+
 [[package]]
 name = "platformdirs"
 version = "4.5.1"

From b96d22a72ffd9376ecef9a7d197d8d66e18d638d Mon Sep 17 00:00:00 2001
From: Noah Horton <noah@unsupervised.com>
Date: Fri, 6 Feb 2026 13:57:04 -0700
Subject: [PATCH 35/45] ready to test

---
 .deepwork/jobs/deepwork_jobs/job.yml          |  23 +-
 .../research_report_job_best_practices.md     | 184 +++++++++++++
 .deepwork/jobs/deepwork_jobs/steps/define.md  |  26 +-
 .../deepwork_jobs/templates/job.yml.example   |   3 +-
 .../deepwork_jobs/templates/job.yml.template  |   2 +
 .deepwork/schemas/job.schema.json             |   4 +
 src/deepwork/core/parser.py                   |   2 +
 src/deepwork/mcp/quality_gate.py              |  62 +++--
 src/deepwork/mcp/schemas.py                   |   4 +
 src/deepwork/mcp/tools.py                     |  12 +-
 src/deepwork/schemas/job.schema.json          |   4 +
 .../standard_jobs/deepwork_jobs/job.yml       |  23 +-
 .../research_report_job_best_practices.md     | 184 +++++++++++++
 .../deepwork_jobs/steps/define.md             |  26 +-
 .../deepwork_jobs/templates/job.yml.example   |   3 +-
 .../deepwork_jobs/templates/job.yml.template  |   2 +
 tests/unit/mcp/test_quality_gate.py           | 195 +++++++++-----
 tests/unit/mcp/test_tools.py                  | 246 ++++++++++++++++++
 18 files changed, 897 insertions(+), 108 deletions(-)
 create mode 100644 .deepwork/jobs/deepwork_jobs/research_report_job_best_practices.md
 create mode 100644 src/deepwork/standard_jobs/deepwork_jobs/research_report_job_best_practices.md

diff --git a/.deepwork/jobs/deepwork_jobs/job.yml b/.deepwork/jobs/deepwork_jobs/job.yml
index e48853a7..900bf1e7 100644
--- a/.deepwork/jobs/deepwork_jobs/job.yml
+++ b/.deepwork/jobs/deepwork_jobs/job.yml
@@ -100,6 +100,7 @@ steps:
       - define
     reviews:
       - run_each: step_instruction_files
+        additional_review_guidance: "Read the job.yml file in the same job directory for context on how this instruction file fits into the larger workflow."
         quality_criteria:
           "Complete Instructions": "Is the instruction file complete (no stubs or placeholders)?"
           "Specific & Actionable": "Are instructions tailored to the step's purpose, not generic?"
@@ -118,7 +119,7 @@ steps:
       - file: step_instruction_files
         from_step: implement
     outputs:
-      test_feedback.md:
+      .deepwork/tmp/test_feedback.md:
         type: file
         description: "Feedback from testing the workflow on a real use case"
     dependencies:
@@ -142,6 +143,8 @@ steps:
         from_step: define
       - file: step_instruction_files
         from_step: implement
+      - file: .deepwork/tmp/test_feedback.md
+        from_step: test
     outputs:
       job.yml:
         type: file
@@ -149,6 +152,9 @@ steps:
       step_instruction_files:
         type: files
         description: "Updated instruction Markdown files for each step"
+      scripts:
+        type: files
+        description: "Updated scripts to run parts of the job more efficiently"
     dependencies:
       - define
       - implement
@@ -166,6 +172,15 @@ steps:
       AGENTS.md:
         type: file
         description: "Bespoke learnings and run-specific context for the working folder"
+      job.yml:
+        type: file
+        description: "Updated job definition with improvements from test run"
+      step_instruction_files:
+        type: files
+        description: "Updated instruction Markdown files for each step"
+      scripts:
+        type: files
+        description: "Updated scripts to run parts of the job more efficiently"
     dependencies: []
     reviews:
       - run_each: step
@@ -178,7 +193,6 @@ steps:
           "Bespoke Learnings Captured": "Were run-specific learnings added to AGENTS.md?"
           "File References Used": "Do AGENTS.md entries reference other files where appropriate?"
           "Working Folder Correct": "Is AGENTS.md in the correct working folder for the job?"
-          "Generalizable Separated": "Are generalizable improvements in instructions, not AGENTS.md?"
 
   - id: fix_settings
     name: "Fix Settings Files"
@@ -214,10 +228,14 @@ steps:
       job_definitions:
         type: files
         description: "Updated job.yml files and step instructions in current DeepWork format"
+      step_instruction_files:
+        type: files
+        description: "Updated step instruction files"
     dependencies:
       - fix_settings
     reviews:
       - run_each: step
+        additional_review_guidance: "Read the .claude/settings.json file for context on what settings were cleaned up in the prior step."
         quality_criteria:
           "Exposed Field Addressed": "Are `exposed: true` fields removed or noted as deprecated?"
           "Stop Hooks Migrated": "Are `stop_hooks` migrated to `hooks.after_agent` format?"
@@ -238,6 +256,7 @@ steps:
       - fix_jobs
     reviews:
       - run_each: step
+        additional_review_guidance: "Check the .deepwork/jobs/ directory and .claude/skills/ directory to verify the cleanup was done correctly."
         quality_criteria:
           "Legacy Job Skills Removed": "Are legacy skill folders for each job removed from `.claude/skills/` and `.gemini/skills/`?"
           "Deepwork Skill Preserved": "Does the `deepwork` skill folder still exist in `.claude/skills/deepwork/`?"
diff --git a/.deepwork/jobs/deepwork_jobs/research_report_job_best_practices.md b/.deepwork/jobs/deepwork_jobs/research_report_job_best_practices.md
new file mode 100644
index 00000000..87e65615
--- /dev/null
+++ b/.deepwork/jobs/deepwork_jobs/research_report_job_best_practices.md
@@ -0,0 +1,184 @@
+# Research Report Job Best Practices
+
+Reference guide for designing DeepWork jobs that produce research reports, analytical documents, or similar investigative deliverables. Use this when defining jobs via the `define` step.
+
+## The General Pattern
+
+Most report-authoring jobs follow a five-phase structure. Not every job needs all five as separate steps, and some phases combine naturally, but understanding the full arc helps you design a job that doesn't skip critical work.
+
+### 1. Connect
+
+**Purpose**: Verify that the tools and data sources the job will rely on are actually accessible before any real work begins.
+
+This step is about validating prerequisites, not doing research. Common activities:
+
+- **Database connectivity**: Run a trivial query (`SELECT 1`, `SHOW TABLES`) to confirm credentials work and the schema is reachable.
+- **Web search tools**: Confirm web search and browsing tools are enabled. If the job needs to read specific sites, verify they don't require login. If they do, get the user to authenticate (e.g., via Claude in Chrome) before proceeding.
+- **API access**: Test API keys or tokens against a lightweight endpoint.
+- **File access**: Confirm that input files, dashboards, or shared drives are readable.
+
+**Why a separate step?** A failed connection discovered midway through analysis wastes all prior work. Catching it upfront is cheap. That said, for simple jobs where the data source is obvious and reliable (e.g., "search the web for X"), this can be folded into the Align step as a quick check rather than standing alone.
+
+**Outputs**: A brief connectivity report or checklist confirming each source is accessible, plus any credentials or configuration notes for later steps.
+
+### 2. Align
+
+**Purpose**: Build enough understanding of the domain and the user's intent to scope the analysis correctly.
+
+This is a cyclical step: do light research, then ask clarifying questions, then refine understanding, repeat. It ends when both the agent and user agree on what "done" looks like.
+
+**The cycle**:
+
+1. **Light grounding research** - Just enough to ask smart questions. Not deep analysis.
+2. **Clarify with the user** - Surface ambiguities and propose scope boundaries.
+3. **Repeat** until there's shared understanding.
+
+**Example - Private data (SQL-centric)**:
+- Run broad queries to get the lay of the land: total record counts, key column names, date ranges, apparent segmentation columns (e.g., `division`, `region`).
+- Then ask the user: "I see 45,000 customer records across 3 divisions. Should we scope to a particular division? I'm defining churn as customers with no activity in 90 days - does that match your definition?"
+
+**Example - Public data (web-centric)**:
+- Do broad searches to see what's out there. Notice the shape of results: are they news articles, academic papers, industry reports? What subtopics keep appearing?
+- Then ask the user: "Results split between fast-fashion trends and haute couture analysis. Which direction? Also, should we focus on the current season or look at multi-year trends?"
+
+**Outputs**: A scoping document that captures the agreed-upon research questions, data sources, definitions, exclusions, and success criteria. This becomes the north star for the Analyze step.
+
+### 3. Analyze
+
+**Purpose**: The core research cycle. Query, record, synthesize, and deepen iteratively.
+
+This is where most of the work happens. The key discipline is maintaining structured working files so that nothing gets lost and the narrative builds progressively.
+
+**Working files to maintain**:
+
+| File | Purpose |
+|------|---------|
+| Query log | Every query/search with its results. What did you ask, what came back. Keeps work auditable and reproducible. |
+| Questions & Answers | Running list of research questions. As you find answers, record them. As answers suggest new questions, add those. This drives the iterative deepening. |
+| Draft report | The evolving narrative. Updated as new findings emerge. Forces you to synthesize as you go rather than dumping data at the end. |
+
+**The iterative deepening pattern**:
+
+Analysis should deepen in layers, not stay shallow across many topics. Each answer should prompt "why?" or "what drives that?" questions:
+
+- **Layer 1**: Top-level facts. "What was our AWS spend last month?" -> $10k. "How does that compare to prior month?" -> Up $1k.
+- **Layer 2**: Decomposition. "What services drove the spend?" -> $8k EC2, $1k S3, $1k other. "Where was the increase?" -> All in EC2.
+- **Layer 3**: Root causes. "Is our EC2 fleet well-utilized?" -> Many instances with attribute X are underutilized. "Are specific workloads driving the increase?" -> Yes, instances tagged `daily_sync_*` are up ~$2k.
+- **Layer 4+**: Continue until you hit actionable findings or diminishing returns.
+
+**When to stop deepening**: When additional queries aren't changing the narrative, or when you've answered the questions from the Align step to a sufficient depth. But make sure that any questions that a reasonable business person is likely to ask when looking at your output are answered.
+
+**Outputs**: The working files above (query log, Q&A tracker, draft report), organized in the dataroom alongside the final output.
+
+### 4. Review (Not a Separate Step)
+
+Reviews are not a standalone phase but checkpoints woven into all the steps, especially the Analyze step. Use DeepWork's `reviews` mechanism in `job.yml` to define quality gates.
+
+**Reviews to consider for the Analyze phase**:
+
+- **Query completeness**: Are the key research questions from the scoping document all addressed? Are queries recorded with their results?
+- **Draft coherence**: Does the draft report tell a logical story? Are sections connected rather than isolated findings?
+- **Depth adequacy**: Has the analysis gone deep enough on the important threads? Are there obvious follow-up questions left unasked?
+- **Citation integrity**: Are claims in the draft backed by specific queries/sources from the query log?
+
+**Reviews to consider for the Present phase** (see below):
+
+- **Visual quality**: Charts render correctly, no overlapping text, readable at intended size.
+- **Content accuracy**: Citations preserved from draft, numbers match source data, arguments are logically sound.
+- **Audience fit**: Language, detail level, and framing match the intended audience (executives vs. engineers vs. clients).
+- **Format compliance**: Output matches the requested format (PDF renders correctly, HTML is responsive, slides have consistent styling).
+
+### 5. Present
+
+**Purpose**: Transform the draft into a polished final deliverable.
+
+The draft report from the Analyze step has the right content but may not be presentation-ready. This step focuses on the output experience.
+
+**Common activities**:
+
+- **Visualizations**: Generate charts, tables, or diagrams from the data. Fetch relevant images. Create infographics for key findings.
+- **Formatting**: Convert to the final output format (PDF, HTML, slides, etc.). Apply styling and layout.
+- **Narrative polish**: Tighten prose, add executive summary, ensure the document flows well for someone reading it cold.
+- **Supporting materials**: Assemble appendices, data tables, methodology notes.
+
+**This step often requires multiple review cycles.** Visual outputs have failure modes that text-only drafts don't: overlapping labels, truncated legends, broken page breaks, images that don't load. Build in quality gates for visual review.
+
+**Outputs**: The final deliverable in its target format, plus any supporting materials.
+
+## Translating This Into a Job Definition
+
+### Step Structure Options
+
+**Minimal (3 steps)** - For straightforward reports with known data sources:
+1. `scope` - Combines Connect + Align. Verify access, clarify requirements.
+2. `research` - The Analyze phase with built-in review gates.
+3. `report` - The Present phase with visual/format review gates.
+
+**Standard (4 steps)** - For most research reports:
+1. `connect` - Verify data source access.
+2. `scope` - Align on research questions and definitions.
+3. `analyze` - Core research with iterative deepening.
+4. `present` - Final deliverable production.
+
+**Comprehensive (5+ steps)** - For complex, multi-source reports:
+1. `connect` - Verify all data source access.
+2. `scope` - Align on research questions.
+3. `gather` - Collect raw data across all sources (query log output).
+4. `analyze` - Synthesize findings, build narrative (draft report output).
+5. `present` - Polish and format final deliverable.
+
+### Output Organization
+
+Follow the dataroom pattern from the define step guidelines:
+
+```
+operations/reports/2026-01/spending_analysis.md              # Final report
+operations/reports/2026-01/spending_analysis_dataroom/        # Supporting materials
+    query_log.md                                              # All queries and results
+    questions_and_answers.md                                  # Research Q&A tracker
+    raw_data/                                                 # Extracted data files
+    charts/                                                   # Generated visualizations
+    draft.md                                                  # Working draft (for audit trail)
+```
+
+### Quality Gate Design
+
+Research reports benefit from **split reviews** that evaluate content and presentation separately:
+
+```yaml
+reviews:
+  # Content review - is the analysis sound?
+  - run_each: final_report.md
+    quality_criteria:
+      "Claims Cited": "Is every factual claim backed by a specific source or query from the dataroom?"
+      "Questions Answered": "Are all research questions from the scoping document addressed?"
+      "Depth": "Does the analysis go beyond surface-level observations to root causes or actionable insights?"
+
+  # Presentation review - is the output polished?
+  - run_each: final_report.md
+    quality_criteria:
+      "Readable Flow": "Does the document flow logically for someone reading it without prior context?"
+      "Audience Fit": "Is the language and detail level appropriate for the intended audience?"
+      "Visual Quality": "Do all charts, tables, and figures render correctly and add value?"
+```
+
+### Capability Considerations
+
+Research jobs frequently need specialized tools. During the `define` step, ask about:
+
+- **Database access**: What databases? What client tools or connection strings?
+- **Web browsing**: Will sites require authentication? Is Claude in Chrome available?
+- **File generation**: Does the final output need PDF/HTML rendering? What tools are available?
+- **Data visualization**: What charting libraries or tools can the agent use?
+
+## Anti-Patterns to Avoid
+
+**Shallow breadth over deep analysis**: Covering 20 topics superficially is less valuable than covering 5 topics with layered depth. Design the Analyze step to encourage iterative deepening, not checklist completion.
+
+**Skipping the scoping step**: Jumping straight into analysis without aligning on definitions and scope almost always leads to rework. "Analyze our churn" means very different things depending on how churn is defined.
+
+**Query results only in memory**: If queries and their results aren't written to working files, they can't be reviewed, cited, or audited. The query log is not optional.
+
+**Draft report written at the end**: The draft should evolve throughout the Analyze step, not be assembled from notes after all research is complete. Writing the narrative as you go reveals gaps in the analysis early.
+
+**Conflating analysis with presentation**: Trying to produce a polished PDF while still figuring out the findings leads to wasted formatting work. Get the content right first, then make it pretty.
diff --git a/.deepwork/jobs/deepwork_jobs/steps/define.md b/.deepwork/jobs/deepwork_jobs/steps/define.md
index 18c268eb..977872f0 100644
--- a/.deepwork/jobs/deepwork_jobs/steps/define.md
+++ b/.deepwork/jobs/deepwork_jobs/steps/define.md
@@ -179,10 +179,34 @@ reviews:
 ```
 
 **`run_each` options:**
-- `step` — Review runs once with ALL output files + input files
+- `step` — Review runs once with ALL output files
 - `<output_name>` where output is `type: file` — Review runs once with that specific file
 - `<output_name>` where output is `type: files` — Review runs once per file in the list
 
+**`additional_review_guidance`** (optional): Tells the reviewer what other files or context to look at when performing the review. Reviewers only see the step's output files by default — they do NOT automatically see inputs from prior steps. When a review needs context beyond the output files (e.g., checking that an output is consistent with a prior step's deliverable, or that it follows conventions in a config file), use this field to tell the reviewer what to read.
+
+```yaml
+reviews:
+  - run_each: report_files
+    additional_review_guidance: "Read the comparison_matrix.md file for context on whether claims in the report are supported by the analysis data."
+    quality_criteria:
+      "Data-Backed": "Are recommendations supported by the competitive analysis data?"
+  - run_each: step_instruction_files
+    additional_review_guidance: "Read the job.yml file in the same job directory for context on how this instruction file fits into the larger workflow."
+    quality_criteria:
+      "Complete Instructions": "Is the instruction file complete?"
+```
+
+**When to use `additional_review_guidance`:**
+- When a review criterion references data or context from a prior step's output
+- When the reviewer needs to cross-check the output against a specification, config, or schema file
+- When the review involves consistency checks between the current output and other project files
+- When the criterion mentions something the reviewer can't assess from the output alone
+
+**When NOT to use it:**
+- When all criteria can be evaluated by reading just the output files themselves (e.g., "Is it well-written?", "Are there spelling errors?")
+- Don't use it to dump large amounts of content — keep guidance short and tell the reviewer *what to read*, not *what's in it*
+
 **Reviews are particularly valuable for:**
 - Steps with complex outputs that need multiple quality checks
 - Steps where quality is critical (final deliverables)
diff --git a/.deepwork/jobs/deepwork_jobs/templates/job.yml.example b/.deepwork/jobs/deepwork_jobs/templates/job.yml.example
index f321c355..4712b530 100644
--- a/.deepwork/jobs/deepwork_jobs/templates/job.yml.example
+++ b/.deepwork/jobs/deepwork_jobs/templates/job.yml.example
@@ -89,7 +89,8 @@ steps:
     dependencies:
       - comparative_analysis
     reviews:
-      - run_each: step
+      - run_each: positioning_report.md
+        additional_review_guidance: "Read the comparison_matrix.md file to verify that recommendations are grounded in the competitive analysis data."
         quality_criteria:
           "Actionable": "Are recommendations specific and actionable?"
           "Data-Backed": "Are recommendations supported by the competitive analysis data?"
diff --git a/.deepwork/jobs/deepwork_jobs/templates/job.yml.template b/.deepwork/jobs/deepwork_jobs/templates/job.yml.template
index 0774c5d7..e098b468 100644
--- a/.deepwork/jobs/deepwork_jobs/templates/job.yml.template
+++ b/.deepwork/jobs/deepwork_jobs/templates/job.yml.template
@@ -45,6 +45,8 @@ steps:
     dependencies: []  # List of step IDs that must complete first
     reviews:
       - run_each: step  # or a specific output name
+        # Optional: tell the reviewer what files to read for context
+        # additional_review_guidance: "Read the [filename] for context on [what]."
         quality_criteria:
           "[Criterion Name]": "[Question to evaluate]"
           "[Another Criterion]": "[Another question]"
diff --git a/.deepwork/schemas/job.schema.json b/.deepwork/schemas/job.schema.json
index 4226f708..e0098056 100644
--- a/.deepwork/schemas/job.schema.json
+++ b/.deepwork/schemas/job.schema.json
@@ -388,6 +388,10 @@
             "minLength": 1
           },
           "minProperties": 1
+        },
+        "additional_review_guidance": {
+          "type": "string",
+          "description": "Optional guidance for the reviewer about what context to look at (e.g., 'Look at the job.yml file for context'). Replaces automatic inclusion of input file contents."
         }
       }
     }
diff --git a/src/deepwork/core/parser.py b/src/deepwork/core/parser.py
index 2de03c37..0e7503ec 100644
--- a/src/deepwork/core/parser.py
+++ b/src/deepwork/core/parser.py
@@ -119,6 +119,7 @@ class Review:
 
     run_each: str  # "step" or output name
     quality_criteria: dict[str, str]  # name → question
+    additional_review_guidance: str | None = None  # optional guidance for reviewer
 
     @classmethod
     def from_dict(cls, data: dict[str, Any]) -> "Review":
@@ -126,6 +127,7 @@ def from_dict(cls, data: dict[str, Any]) -> "Review":
         return cls(
             run_each=data["run_each"],
             quality_criteria=data.get("quality_criteria", {}),
+            additional_review_guidance=data.get("additional_review_guidance"),
         )
 
 
diff --git a/src/deepwork/mcp/quality_gate.py b/src/deepwork/mcp/quality_gate.py
index f3be12f8..496d8b1f 100644
--- a/src/deepwork/mcp/quality_gate.py
+++ b/src/deepwork/mcp/quality_gate.py
@@ -73,12 +73,14 @@ def _build_instructions(
         self,
         quality_criteria: dict[str, str],
         notes: str | None = None,
+        additional_review_guidance: str | None = None,
     ) -> str:
         """Build the system instructions for the review agent.
 
         Args:
             quality_criteria: Map of criterion name to criterion question
             notes: Optional notes from the agent about work done
+            additional_review_guidance: Optional guidance about what context to look at
 
         Returns:
             System instructions string
@@ -97,15 +99,23 @@ def _build_instructions(
 
 {notes}"""
 
+        guidance_section = ""
+        if additional_review_guidance:
+            guidance_section = f"""
+
+## Additional Context
+
+{additional_review_guidance}"""
+
         return f"""\
 You are an editor responsible for reviewing the files listed as outputs.
 Your job is to evaluate whether outputs meet the specified criteria below.
-You have also been provided any relevant inputs that were used by the process that generated the outputs.
 
 ## Criteria to Evaluate
 
 {criteria_list}
 {notes_section}
+{guidance_section}
 
 ## Response Format
 
@@ -192,30 +202,18 @@ async def _build_payload(
         self,
         outputs: dict[str, str | list[str]],
         project_root: Path,
-        inputs: dict[str, str | list[str]] | None = None,
     ) -> str:
-        """Build the user prompt payload with file contents.
-
-        Organizes content into clearly separated INPUTS and OUTPUTS sections.
+        """Build the user prompt payload with output file contents.
 
         Args:
             outputs: Map of output names to file path(s)
             project_root: Project root path for reading files
-            inputs: Optional map of input names to file path(s) from prior steps
 
         Returns:
-            Formatted payload with file contents in sections
+            Formatted payload with output file contents
         """
         parts: list[str] = []
 
-        # Build inputs section if provided
-        if inputs:
-            input_sections = await self._read_file_sections(inputs, project_root)
-            if input_sections:
-                parts.append(f"{SECTION_SEPARATOR} BEGIN INPUTS {SECTION_SEPARATOR}")
-                parts.extend(input_sections)
-                parts.append(f"{SECTION_SEPARATOR} END INPUTS {SECTION_SEPARATOR}")
-
         # Build outputs section
         output_sections = await self._read_file_sections(outputs, project_root)
         if output_sections:
@@ -267,8 +265,8 @@ async def evaluate(
         quality_criteria: dict[str, str],
         outputs: dict[str, str | list[str]],
         project_root: Path,
-        inputs: dict[str, str | list[str]] | None = None,
         notes: str | None = None,
+        additional_review_guidance: str | None = None,
     ) -> QualityGateResult:
         """Evaluate step outputs against quality criteria.
 
@@ -276,8 +274,8 @@ async def evaluate(
             quality_criteria: Map of criterion name to criterion question
             outputs: Map of output names to file path(s)
             project_root: Project root path
-            inputs: Optional map of input names to file path(s) from prior steps
             notes: Optional notes from the agent about work done
+            additional_review_guidance: Optional guidance for the reviewer
 
         Returns:
             QualityGateResult with pass/fail and feedback
@@ -293,8 +291,12 @@ async def evaluate(
                 criteria_results=[],
             )
 
-        instructions = self._build_instructions(quality_criteria, notes=notes)
-        payload = await self._build_payload(outputs, project_root, inputs=inputs)
+        instructions = self._build_instructions(
+            quality_criteria,
+            notes=notes,
+            additional_review_guidance=additional_review_guidance,
+        )
+        payload = await self._build_payload(outputs, project_root)
 
         from deepwork.mcp.claude_cli import ClaudeCLIError
 
@@ -316,17 +318,16 @@ async def evaluate_reviews(
         outputs: dict[str, str | list[str]],
         output_specs: dict[str, str],
         project_root: Path,
-        inputs: dict[str, str | list[str]] | None = None,
         notes: str | None = None,
     ) -> list[ReviewResult]:
         """Evaluate all reviews for a step, running them in parallel.
 
         Args:
-            reviews: List of review dicts with run_each and quality_criteria
+            reviews: List of review dicts with run_each, quality_criteria,
+                and optional additional_review_guidance
             outputs: Map of output names to file path(s)
             output_specs: Map of output names to their type ("file" or "files")
             project_root: Project root path
-            inputs: Optional map of input names to file path(s) from prior steps
             notes: Optional notes from the agent about work done
 
         Returns:
@@ -335,15 +336,19 @@ async def evaluate_reviews(
         if not reviews:
             return []
 
-        tasks: list[tuple[str, str | None, dict[str, str], dict[str, str | list[str]]]] = []
+        # Each task is (run_each, target_file, criteria, review_outputs, guidance)
+        tasks: list[
+            tuple[str, str | None, dict[str, str], dict[str, str | list[str]], str | None]
+        ] = []
 
         for review in reviews:
             run_each = review["run_each"]
             quality_criteria = review["quality_criteria"]
+            guidance = review.get("additional_review_guidance")
 
             if run_each == "step":
                 # Review all outputs together
-                tasks.append((run_each, None, quality_criteria, outputs))
+                tasks.append((run_each, None, quality_criteria, outputs, guidance))
             elif run_each in outputs:
                 output_type = output_specs.get(run_each, "file")
                 output_value = outputs[run_each]
@@ -356,6 +361,7 @@ async def evaluate_reviews(
                             file_path,
                             quality_criteria,
                             {run_each: file_path},
+                            guidance,
                         ))
                 else:
                     # Single file - run once
@@ -364,6 +370,7 @@ async def evaluate_reviews(
                         output_value if isinstance(output_value, str) else None,
                         quality_criteria,
                         {run_each: output_value},
+                        guidance,
                     ))
 
         async def run_review(
@@ -371,13 +378,14 @@ async def run_review(
             target_file: str | None,
             criteria: dict[str, str],
             review_outputs: dict[str, str | list[str]],
+            guidance: str | None,
         ) -> ReviewResult:
             result = await self.evaluate(
                 quality_criteria=criteria,
                 outputs=review_outputs,
                 project_root=project_root,
-                inputs=inputs,
                 notes=notes,
+                additional_review_guidance=guidance,
             )
             return ReviewResult(
                 review_run_each=run_each,
@@ -417,16 +425,16 @@ async def evaluate(
         quality_criteria: dict[str, str],
         outputs: dict[str, str | list[str]],
         project_root: Path,
-        inputs: dict[str, str | list[str]] | None = None,
         notes: str | None = None,
+        additional_review_guidance: str | None = None,
     ) -> QualityGateResult:
         """Mock evaluation - records call and returns configured result."""
         self.evaluations.append(
             {
                 "quality_criteria": quality_criteria,
                 "outputs": outputs,
-                "inputs": inputs,
                 "notes": notes,
+                "additional_review_guidance": additional_review_guidance,
             }
         )
 
diff --git a/src/deepwork/mcp/schemas.py b/src/deepwork/mcp/schemas.py
index 5bd7b5e5..2015251f 100644
--- a/src/deepwork/mcp/schemas.py
+++ b/src/deepwork/mcp/schemas.py
@@ -143,6 +143,10 @@ class ReviewInfo(BaseModel):
     quality_criteria: dict[str, str] = Field(
         description="Map of criterion name to criterion question"
     )
+    additional_review_guidance: str | None = Field(
+        default=None,
+        description="Optional guidance for the reviewer about what context to look at",
+    )
 
 
 class ReviewResult(BaseModel):
diff --git a/src/deepwork/mcp/tools.py b/src/deepwork/mcp/tools.py
index 97be330a..32037e7f 100644
--- a/src/deepwork/mcp/tools.py
+++ b/src/deepwork/mcp/tools.py
@@ -348,6 +348,7 @@ async def start_workflow(self, input_data: StartWorkflowInput) -> StartWorkflowR
                     ReviewInfo(
                         run_each=r.run_each,
                         quality_criteria=r.quality_criteria,
+                        additional_review_guidance=r.additional_review_guidance,
                     )
                     for r in first_step.reviews
                 ],
@@ -394,26 +395,18 @@ async def finished_step(self, input_data: FinishedStepInput) -> FinishedStepResp
             # Build output specs map for evaluate_reviews
             output_specs = {out.name: out.type for out in current_step.outputs}
 
-            # Resolve input files from prior step outputs
-            input_files: dict[str, str | list[str]] = {}
-            for inp in current_step.inputs:
-                if inp.is_file_input():
-                    source_progress = session.step_progress.get(inp.from_step)  # type: ignore[arg-type]
-                    if source_progress and inp.file in source_progress.outputs:
-                        input_files[inp.file] = source_progress.outputs[inp.file]  # type: ignore[index]
-
             failed_reviews = await self.quality_gate.evaluate_reviews(
                 reviews=[
                     {
                         "run_each": r.run_each,
                         "quality_criteria": r.quality_criteria,
+                        "additional_review_guidance": r.additional_review_guidance,
                     }
                     for r in current_step.reviews
                 ],
                 outputs=input_data.outputs,
                 output_specs=output_specs,
                 project_root=self.project_root,
-                inputs=input_files if input_files else None,
                 notes=input_data.notes,
             )
 
@@ -500,6 +493,7 @@ async def finished_step(self, input_data: FinishedStepInput) -> FinishedStepResp
                     ReviewInfo(
                         run_each=r.run_each,
                         quality_criteria=r.quality_criteria,
+                        additional_review_guidance=r.additional_review_guidance,
                     )
                     for r in next_step.reviews
                 ],
diff --git a/src/deepwork/schemas/job.schema.json b/src/deepwork/schemas/job.schema.json
index 4226f708..e0098056 100644
--- a/src/deepwork/schemas/job.schema.json
+++ b/src/deepwork/schemas/job.schema.json
@@ -388,6 +388,10 @@
             "minLength": 1
           },
           "minProperties": 1
+        },
+        "additional_review_guidance": {
+          "type": "string",
+          "description": "Optional guidance for the reviewer about what context to look at (e.g., 'Look at the job.yml file for context'). Replaces automatic inclusion of input file contents."
         }
       }
     }
diff --git a/src/deepwork/standard_jobs/deepwork_jobs/job.yml b/src/deepwork/standard_jobs/deepwork_jobs/job.yml
index e48853a7..900bf1e7 100644
--- a/src/deepwork/standard_jobs/deepwork_jobs/job.yml
+++ b/src/deepwork/standard_jobs/deepwork_jobs/job.yml
@@ -100,6 +100,7 @@ steps:
       - define
     reviews:
       - run_each: step_instruction_files
+        additional_review_guidance: "Read the job.yml file in the same job directory for context on how this instruction file fits into the larger workflow."
         quality_criteria:
           "Complete Instructions": "Is the instruction file complete (no stubs or placeholders)?"
           "Specific & Actionable": "Are instructions tailored to the step's purpose, not generic?"
@@ -118,7 +119,7 @@ steps:
       - file: step_instruction_files
         from_step: implement
     outputs:
-      test_feedback.md:
+      .deepwork/tmp/test_feedback.md:
         type: file
         description: "Feedback from testing the workflow on a real use case"
     dependencies:
@@ -142,6 +143,8 @@ steps:
         from_step: define
       - file: step_instruction_files
         from_step: implement
+      - file: .deepwork/tmp/test_feedback.md
+        from_step: test
     outputs:
       job.yml:
         type: file
@@ -149,6 +152,9 @@ steps:
       step_instruction_files:
         type: files
         description: "Updated instruction Markdown files for each step"
+      scripts:
+        type: files
+        description: "Updated scripts to run parts of the job more efficiently"
     dependencies:
       - define
       - implement
@@ -166,6 +172,15 @@ steps:
       AGENTS.md:
         type: file
         description: "Bespoke learnings and run-specific context for the working folder"
+      job.yml:
+        type: file
+        description: "Updated job definition with improvements from test run"
+      step_instruction_files:
+        type: files
+        description: "Updated instruction Markdown files for each step"
+      scripts:
+        type: files
+        description: "Updated scripts to run parts of the job more efficiently"
     dependencies: []
     reviews:
       - run_each: step
@@ -178,7 +193,6 @@ steps:
           "Bespoke Learnings Captured": "Were run-specific learnings added to AGENTS.md?"
           "File References Used": "Do AGENTS.md entries reference other files where appropriate?"
           "Working Folder Correct": "Is AGENTS.md in the correct working folder for the job?"
-          "Generalizable Separated": "Are generalizable improvements in instructions, not AGENTS.md?"
 
   - id: fix_settings
     name: "Fix Settings Files"
@@ -214,10 +228,14 @@ steps:
       job_definitions:
         type: files
         description: "Updated job.yml files and step instructions in current DeepWork format"
+      step_instruction_files:
+        type: files
+        description: "Updated step instruction files"
     dependencies:
       - fix_settings
     reviews:
       - run_each: step
+        additional_review_guidance: "Read the .claude/settings.json file for context on what settings were cleaned up in the prior step."
         quality_criteria:
           "Exposed Field Addressed": "Are `exposed: true` fields removed or noted as deprecated?"
           "Stop Hooks Migrated": "Are `stop_hooks` migrated to `hooks.after_agent` format?"
@@ -238,6 +256,7 @@ steps:
       - fix_jobs
     reviews:
       - run_each: step
+        additional_review_guidance: "Check the .deepwork/jobs/ directory and .claude/skills/ directory to verify the cleanup was done correctly."
         quality_criteria:
           "Legacy Job Skills Removed": "Are legacy skill folders for each job removed from `.claude/skills/` and `.gemini/skills/`?"
           "Deepwork Skill Preserved": "Does the `deepwork` skill folder still exist in `.claude/skills/deepwork/`?"
diff --git a/src/deepwork/standard_jobs/deepwork_jobs/research_report_job_best_practices.md b/src/deepwork/standard_jobs/deepwork_jobs/research_report_job_best_practices.md
new file mode 100644
index 00000000..87e65615
--- /dev/null
+++ b/src/deepwork/standard_jobs/deepwork_jobs/research_report_job_best_practices.md
@@ -0,0 +1,184 @@
+# Research Report Job Best Practices
+
+Reference guide for designing DeepWork jobs that produce research reports, analytical documents, or similar investigative deliverables. Use this when defining jobs via the `define` step.
+
+## The General Pattern
+
+Most report-authoring jobs follow a five-phase structure. Not every job needs all five as separate steps, and some phases combine naturally, but understanding the full arc helps you design a job that doesn't skip critical work.
+
+### 1. Connect
+
+**Purpose**: Verify that the tools and data sources the job will rely on are actually accessible before any real work begins.
+
+This step is about validating prerequisites, not doing research. Common activities:
+
+- **Database connectivity**: Run a trivial query (`SELECT 1`, `SHOW TABLES`) to confirm credentials work and the schema is reachable.
+- **Web search tools**: Confirm web search and browsing tools are enabled. If the job needs to read specific sites, verify they don't require login. If they do, get the user to authenticate (e.g., via Claude in Chrome) before proceeding.
+- **API access**: Test API keys or tokens against a lightweight endpoint.
+- **File access**: Confirm that input files, dashboards, or shared drives are readable.
+
+**Why a separate step?** A failed connection discovered midway through analysis wastes all prior work. Catching it upfront is cheap. That said, for simple jobs where the data source is obvious and reliable (e.g., "search the web for X"), this can be folded into the Align step as a quick check rather than standing alone.
+
+**Outputs**: A brief connectivity report or checklist confirming each source is accessible, plus any credentials or configuration notes for later steps.
+
+### 2. Align
+
+**Purpose**: Build enough understanding of the domain and the user's intent to scope the analysis correctly.
+
+This is a cyclical step: do light research, then ask clarifying questions, then refine understanding, repeat. It ends when both the agent and user agree on what "done" looks like.
+
+**The cycle**:
+
+1. **Light grounding research** - Just enough to ask smart questions. Not deep analysis.
+2. **Clarify with the user** - Surface ambiguities and propose scope boundaries.
+3. **Repeat** until there's shared understanding.
+
+**Example - Private data (SQL-centric)**:
+- Run broad queries to get the lay of the land: total record counts, key column names, date ranges, apparent segmentation columns (e.g., `division`, `region`).
+- Then ask the user: "I see 45,000 customer records across 3 divisions. Should we scope to a particular division? I'm defining churn as customers with no activity in 90 days - does that match your definition?"
+
+**Example - Public data (web-centric)**:
+- Do broad searches to see what's out there. Notice the shape of results: are they news articles, academic papers, industry reports? What subtopics keep appearing?
+- Then ask the user: "Results split between fast-fashion trends and haute couture analysis. Which direction? Also, should we focus on the current season or look at multi-year trends?"
+
+**Outputs**: A scoping document that captures the agreed-upon research questions, data sources, definitions, exclusions, and success criteria. This becomes the north star for the Analyze step.
+
+### 3. Analyze
+
+**Purpose**: The core research cycle. Query, record, synthesize, and deepen iteratively.
+
+This is where most of the work happens. The key discipline is maintaining structured working files so that nothing gets lost and the narrative builds progressively.
+
+**Working files to maintain**:
+
+| File | Purpose |
+|------|---------|
+| Query log | Every query/search with its results. What did you ask, what came back. Keeps work auditable and reproducible. |
+| Questions & Answers | Running list of research questions. As you find answers, record them. As answers suggest new questions, add those. This drives the iterative deepening. |
+| Draft report | The evolving narrative. Updated as new findings emerge. Forces you to synthesize as you go rather than dumping data at the end. |
+
+**The iterative deepening pattern**:
+
+Analysis should deepen in layers, not stay shallow across many topics. Each answer should prompt "why?" or "what drives that?" questions:
+
+- **Layer 1**: Top-level facts. "What was our AWS spend last month?" -> $10k. "How does that compare to prior month?" -> Up $1k.
+- **Layer 2**: Decomposition. "What services drove the spend?" -> $8k EC2, $1k S3, $1k other. "Where was the increase?" -> All in EC2.
+- **Layer 3**: Root causes. "Is our EC2 fleet well-utilized?" -> Many instances with attribute X are underutilized. "Are specific workloads driving the increase?" -> Yes, instances tagged `daily_sync_*` are up ~$2k.
+- **Layer 4+**: Continue until you hit actionable findings or diminishing returns.
+
+**When to stop deepening**: When additional queries aren't changing the narrative, or when you've answered the questions from the Align step to a sufficient depth. But make sure that any questions that a reasonable business person is likely to ask when looking at your output are answered.
+
+**Outputs**: The working files above (query log, Q&A tracker, draft report), organized in the dataroom alongside the final output.
+
+### 4. Review (Not a Separate Step)
+
+Reviews are not a standalone phase but checkpoints woven into all the steps, especially the Analyze step. Use DeepWork's `reviews` mechanism in `job.yml` to define quality gates.
+
+**Reviews to consider for the Analyze phase**:
+
+- **Query completeness**: Are the key research questions from the scoping document all addressed? Are queries recorded with their results?
+- **Draft coherence**: Does the draft report tell a logical story? Are sections connected rather than isolated findings?
+- **Depth adequacy**: Has the analysis gone deep enough on the important threads? Are there obvious follow-up questions left unasked?
+- **Citation integrity**: Are claims in the draft backed by specific queries/sources from the query log?
+
+**Reviews to consider for the Present phase** (see below):
+
+- **Visual quality**: Charts render correctly, no overlapping text, readable at intended size.
+- **Content accuracy**: Citations preserved from draft, numbers match source data, arguments are logically sound.
+- **Audience fit**: Language, detail level, and framing match the intended audience (executives vs. engineers vs. clients).
+- **Format compliance**: Output matches the requested format (PDF renders correctly, HTML is responsive, slides have consistent styling).
+
+### 5. Present
+
+**Purpose**: Transform the draft into a polished final deliverable.
+
+The draft report from the Analyze step has the right content but may not be presentation-ready. This step focuses on the output experience.
+
+**Common activities**:
+
+- **Visualizations**: Generate charts, tables, or diagrams from the data. Fetch relevant images. Create infographics for key findings.
+- **Formatting**: Convert to the final output format (PDF, HTML, slides, etc.). Apply styling and layout.
+- **Narrative polish**: Tighten prose, add executive summary, ensure the document flows well for someone reading it cold.
+- **Supporting materials**: Assemble appendices, data tables, methodology notes.
+
+**This step often requires multiple review cycles.** Visual outputs have failure modes that text-only drafts don't: overlapping labels, truncated legends, broken page breaks, images that don't load. Build in quality gates for visual review.
+
+**Outputs**: The final deliverable in its target format, plus any supporting materials.
+
+## Translating This Into a Job Definition
+
+### Step Structure Options
+
+**Minimal (3 steps)** - For straightforward reports with known data sources:
+1. `scope` - Combines Connect + Align. Verify access, clarify requirements.
+2. `research` - The Analyze phase with built-in review gates.
+3. `report` - The Present phase with visual/format review gates.
+
+**Standard (4 steps)** - For most research reports:
+1. `connect` - Verify data source access.
+2. `scope` - Align on research questions and definitions.
+3. `analyze` - Core research with iterative deepening.
+4. `present` - Final deliverable production.
+
+**Comprehensive (5+ steps)** - For complex, multi-source reports:
+1. `connect` - Verify all data source access.
+2. `scope` - Align on research questions.
+3. `gather` - Collect raw data across all sources (query log output).
+4. `analyze` - Synthesize findings, build narrative (draft report output).
+5. `present` - Polish and format final deliverable.
+
+### Output Organization
+
+Follow the dataroom pattern from the define step guidelines:
+
+```
+operations/reports/2026-01/spending_analysis.md              # Final report
+operations/reports/2026-01/spending_analysis_dataroom/        # Supporting materials
+    query_log.md                                              # All queries and results
+    questions_and_answers.md                                  # Research Q&A tracker
+    raw_data/                                                 # Extracted data files
+    charts/                                                   # Generated visualizations
+    draft.md                                                  # Working draft (for audit trail)
+```
+
+### Quality Gate Design
+
+Research reports benefit from **split reviews** that evaluate content and presentation separately:
+
+```yaml
+reviews:
+  # Content review - is the analysis sound?
+  - run_each: final_report.md
+    quality_criteria:
+      "Claims Cited": "Is every factual claim backed by a specific source or query from the dataroom?"
+      "Questions Answered": "Are all research questions from the scoping document addressed?"
+      "Depth": "Does the analysis go beyond surface-level observations to root causes or actionable insights?"
+
+  # Presentation review - is the output polished?
+  - run_each: final_report.md
+    quality_criteria:
+      "Readable Flow": "Does the document flow logically for someone reading it without prior context?"
+      "Audience Fit": "Is the language and detail level appropriate for the intended audience?"
+      "Visual Quality": "Do all charts, tables, and figures render correctly and add value?"
+```
+
+### Capability Considerations
+
+Research jobs frequently need specialized tools. During the `define` step, ask about:
+
+- **Database access**: What databases? What client tools or connection strings?
+- **Web browsing**: Will sites require authentication? Is Claude in Chrome available?
+- **File generation**: Does the final output need PDF/HTML rendering? What tools are available?
+- **Data visualization**: What charting libraries or tools can the agent use?
+
+## Anti-Patterns to Avoid
+
+**Shallow breadth over deep analysis**: Covering 20 topics superficially is less valuable than covering 5 topics with layered depth. Design the Analyze step to encourage iterative deepening, not checklist completion.
+
+**Skipping the scoping step**: Jumping straight into analysis without aligning on definitions and scope almost always leads to rework. "Analyze our churn" means very different things depending on how churn is defined.
+
+**Query results only in memory**: If queries and their results aren't written to working files, they can't be reviewed, cited, or audited. The query log is not optional.
+
+**Draft report written at the end**: The draft should evolve throughout the Analyze step, not be assembled from notes after all research is complete. Writing the narrative as you go reveals gaps in the analysis early.
+
+**Conflating analysis with presentation**: Trying to produce a polished PDF while still figuring out the findings leads to wasted formatting work. Get the content right first, then make it pretty.
diff --git a/src/deepwork/standard_jobs/deepwork_jobs/steps/define.md b/src/deepwork/standard_jobs/deepwork_jobs/steps/define.md
index 18c268eb..977872f0 100644
--- a/src/deepwork/standard_jobs/deepwork_jobs/steps/define.md
+++ b/src/deepwork/standard_jobs/deepwork_jobs/steps/define.md
@@ -179,10 +179,34 @@ reviews:
 ```
 
 **`run_each` options:**
-- `step` — Review runs once with ALL output files + input files
+- `step` — Review runs once with ALL output files
 - `<output_name>` where output is `type: file` — Review runs once with that specific file
 - `<output_name>` where output is `type: files` — Review runs once per file in the list
 
+**`additional_review_guidance`** (optional): Tells the reviewer what other files or context to look at when performing the review. Reviewers only see the step's output files by default — they do NOT automatically see inputs from prior steps. When a review needs context beyond the output files (e.g., checking that an output is consistent with a prior step's deliverable, or that it follows conventions in a config file), use this field to tell the reviewer what to read.
+
+```yaml
+reviews:
+  - run_each: report_files
+    additional_review_guidance: "Read the comparison_matrix.md file for context on whether claims in the report are supported by the analysis data."
+    quality_criteria:
+      "Data-Backed": "Are recommendations supported by the competitive analysis data?"
+  - run_each: step_instruction_files
+    additional_review_guidance: "Read the job.yml file in the same job directory for context on how this instruction file fits into the larger workflow."
+    quality_criteria:
+      "Complete Instructions": "Is the instruction file complete?"
+```
+
+**When to use `additional_review_guidance`:**
+- When a review criterion references data or context from a prior step's output
+- When the reviewer needs to cross-check the output against a specification, config, or schema file
+- When the review involves consistency checks between the current output and other project files
+- When the criterion mentions something the reviewer can't assess from the output alone
+
+**When NOT to use it:**
+- When all criteria can be evaluated by reading just the output files themselves (e.g., "Is it well-written?", "Are there spelling errors?")
+- Don't use it to dump large amounts of content — keep guidance short and tell the reviewer *what to read*, not *what's in it*
+
 **Reviews are particularly valuable for:**
 - Steps with complex outputs that need multiple quality checks
 - Steps where quality is critical (final deliverables)
diff --git a/src/deepwork/standard_jobs/deepwork_jobs/templates/job.yml.example b/src/deepwork/standard_jobs/deepwork_jobs/templates/job.yml.example
index f321c355..4712b530 100644
--- a/src/deepwork/standard_jobs/deepwork_jobs/templates/job.yml.example
+++ b/src/deepwork/standard_jobs/deepwork_jobs/templates/job.yml.example
@@ -89,7 +89,8 @@ steps:
     dependencies:
       - comparative_analysis
     reviews:
-      - run_each: step
+      - run_each: positioning_report.md
+        additional_review_guidance: "Read the comparison_matrix.md file to verify that recommendations are grounded in the competitive analysis data."
         quality_criteria:
           "Actionable": "Are recommendations specific and actionable?"
           "Data-Backed": "Are recommendations supported by the competitive analysis data?"
diff --git a/src/deepwork/standard_jobs/deepwork_jobs/templates/job.yml.template b/src/deepwork/standard_jobs/deepwork_jobs/templates/job.yml.template
index 0774c5d7..e098b468 100644
--- a/src/deepwork/standard_jobs/deepwork_jobs/templates/job.yml.template
+++ b/src/deepwork/standard_jobs/deepwork_jobs/templates/job.yml.template
@@ -45,6 +45,8 @@ steps:
     dependencies: []  # List of step IDs that must complete first
     reviews:
       - run_each: step  # or a specific output name
+        # Optional: tell the reviewer what files to read for context
+        # additional_review_guidance: "Read the [filename] for context on [what]."
         quality_criteria:
           "[Criterion Name]": "[Question to evaluate]"
           "[Another Criterion]": "[Another question]"
diff --git a/tests/unit/mcp/test_quality_gate.py b/tests/unit/mcp/test_quality_gate.py
index 8885d6cb..c4495f14 100644
--- a/tests/unit/mcp/test_quality_gate.py
+++ b/tests/unit/mcp/test_quality_gate.py
@@ -73,6 +73,24 @@ def test_build_instructions(self, quality_gate: QualityGate) -> None:
         assert "passed" in instructions  # JSON format mentioned
         assert "feedback" in instructions  # JSON format mentioned
 
+    def test_build_instructions_with_guidance(self, quality_gate: QualityGate) -> None:
+        """Test that additional_review_guidance appears in system instructions."""
+        instructions = quality_gate._build_instructions(
+            quality_criteria={"Valid": "Is it valid?"},
+            additional_review_guidance="Read the job.yml file for context.",
+        )
+
+        assert "Additional Context" in instructions
+        assert "Read the job.yml file for context." in instructions
+
+    def test_build_instructions_without_guidance(self, quality_gate: QualityGate) -> None:
+        """Test that guidance section is absent when not provided."""
+        instructions = quality_gate._build_instructions(
+            quality_criteria={"Valid": "Is it valid?"},
+        )
+
+        assert "Additional Context" not in instructions
+
     async def test_build_payload(self, quality_gate: QualityGate, project_root: Path) -> None:
         """Test building payload with file contents."""
         output_file = project_root / "output.md"
@@ -158,34 +176,10 @@ async def test_build_payload_binary_file_in_multi_output(
         assert "not included in review" in payload
         assert str(binary_file.resolve()) in payload
 
-    async def test_build_payload_with_inputs_and_outputs(
+    async def test_build_payload_only_outputs(
         self, quality_gate: QualityGate, project_root: Path
     ) -> None:
-        """Test building payload with both inputs and outputs in separate sections."""
-        (project_root / "input_data.md").write_text("Input content from prior step")
-        (project_root / "output_report.md").write_text("Output content from current step")
-
-        payload = await quality_gate._build_payload(
-            outputs={"report": "output_report.md"},
-            project_root=project_root,
-            inputs={"data": "input_data.md"},
-        )
-
-        # Both sections present
-        assert "BEGIN INPUTS" in payload
-        assert "END INPUTS" in payload
-        assert "BEGIN OUTPUTS" in payload
-        assert "END OUTPUTS" in payload
-        # Content included
-        assert "Input content from prior step" in payload
-        assert "Output content from current step" in payload
-        # Inputs section comes before outputs section
-        assert payload.index("BEGIN INPUTS") < payload.index("BEGIN OUTPUTS")
-
-    async def test_build_payload_outputs_only_no_input_headers(
-        self, quality_gate: QualityGate, project_root: Path
-    ) -> None:
-        """Test that when no inputs provided, only outputs section appears."""
+        """Test that payload only contains outputs section (no inputs)."""
         (project_root / "output.md").write_text("Output only")
 
         payload = await quality_gate._build_payload(
@@ -198,44 +192,6 @@ async def test_build_payload_outputs_only_no_input_headers(
         assert "BEGIN INPUTS" not in payload
         assert "END INPUTS" not in payload
 
-    async def test_build_payload_empty_inputs_no_input_headers(
-        self, quality_gate: QualityGate, project_root: Path
-    ) -> None:
-        """Test that empty inputs dict doesn't add input headers."""
-        (project_root / "output.md").write_text("Output only")
-
-        payload = await quality_gate._build_payload(
-            outputs={"report": "output.md"},
-            project_root=project_root,
-            inputs={},
-        )
-
-        assert "BEGIN OUTPUTS" in payload
-        assert "BEGIN INPUTS" not in payload
-
-    async def test_build_payload_multiple_inputs(
-        self, quality_gate: QualityGate, project_root: Path
-    ) -> None:
-        """Test building payload with multiple input files."""
-        (project_root / "data1.md").write_text("Data file 1")
-        (project_root / "data2.md").write_text("Data file 2")
-        (project_root / "output.md").write_text("Final output")
-
-        payload = await quality_gate._build_payload(
-            outputs={"report": "output.md"},
-            project_root=project_root,
-            inputs={"data_a": "data1.md", "data_b": "data2.md"},
-        )
-
-        assert "Data file 1" in payload
-        assert "Data file 2" in payload
-        assert "Final output" in payload
-        # Both files should be within the inputs section
-        inputs_start = payload.index("BEGIN INPUTS")
-        inputs_end = payload.index("END INPUTS")
-        assert payload.index("data1.md") > inputs_start
-        assert payload.index("data1.md") < inputs_end
-
     def test_parse_result_valid(self, quality_gate: QualityGate) -> None:
         """Test parsing valid structured output data."""
         data = {
@@ -478,6 +434,91 @@ async def test_single_file_review(
         assert result == []
         mock_cli.run.assert_called_once()
 
+    async def test_review_passes_guidance_to_system_prompt(
+        self, mock_cli: ClaudeCLI, project_root: Path
+    ) -> None:
+        """Test that additional_review_guidance is included in the CLI system prompt."""
+        mock_cli.run = AsyncMock(
+            return_value={"passed": True, "feedback": "OK", "criteria_results": []}
+        )
+        gate = QualityGate(cli=mock_cli)
+
+        (project_root / "output.md").write_text("content")
+
+        await gate.evaluate_reviews(
+            reviews=[
+                {
+                    "run_each": "step",
+                    "quality_criteria": {"Valid": "Is it valid?"},
+                    "additional_review_guidance": "Read the job.yml for workflow context.",
+                }
+            ],
+            outputs={"report": "output.md"},
+            output_specs={"report": "file"},
+            project_root=project_root,
+        )
+
+        mock_cli.run.assert_called_once()
+        system_prompt = mock_cli.run.call_args.kwargs["system_prompt"]
+        assert "Read the job.yml for workflow context." in system_prompt
+        assert "Additional Context" in system_prompt
+
+    async def test_review_without_guidance_omits_section(
+        self, mock_cli: ClaudeCLI, project_root: Path
+    ) -> None:
+        """Test that reviews without guidance don't include the section."""
+        mock_cli.run = AsyncMock(
+            return_value={"passed": True, "feedback": "OK", "criteria_results": []}
+        )
+        gate = QualityGate(cli=mock_cli)
+
+        (project_root / "output.md").write_text("content")
+
+        await gate.evaluate_reviews(
+            reviews=[
+                {
+                    "run_each": "step",
+                    "quality_criteria": {"Valid": "Is it valid?"},
+                }
+            ],
+            outputs={"report": "output.md"},
+            output_specs={"report": "file"},
+            project_root=project_root,
+        )
+
+        system_prompt = mock_cli.run.call_args.kwargs["system_prompt"]
+        assert "Additional Context" not in system_prompt
+
+    async def test_per_file_review_passes_guidance_to_each(
+        self, mock_cli: ClaudeCLI, project_root: Path
+    ) -> None:
+        """Test that guidance is passed to each per-file review invocation."""
+        mock_cli.run = AsyncMock(
+            return_value={"passed": True, "feedback": "OK", "criteria_results": []}
+        )
+        gate = QualityGate(cli=mock_cli)
+
+        (project_root / "a.md").write_text("File A")
+        (project_root / "b.md").write_text("File B")
+
+        await gate.evaluate_reviews(
+            reviews=[
+                {
+                    "run_each": "reports",
+                    "quality_criteria": {"Valid": "Is it valid?"},
+                    "additional_review_guidance": "Check against the spec.",
+                }
+            ],
+            outputs={"reports": ["a.md", "b.md"]},
+            output_specs={"reports": "files"},
+            project_root=project_root,
+        )
+
+        assert mock_cli.run.call_count == 2
+        for call in mock_cli.run.call_args_list:
+            system_prompt = call.kwargs["system_prompt"]
+            assert "Check against the spec." in system_prompt
+
 
 class TestMockQualityGate:
     """Tests for MockQualityGate class."""
@@ -532,3 +573,29 @@ async def test_mock_records_evaluations(self, project_root: Path) -> None:
         assert len(gate.evaluations) == 2
         assert gate.evaluations[0]["quality_criteria"] == {"Criterion 1": "Is criterion 1 met?"}
         assert gate.evaluations[1]["quality_criteria"] == {"Criterion 2": "Is criterion 2 met?"}
+
+    async def test_mock_records_additional_review_guidance(self, project_root: Path) -> None:
+        """Test mock gate records additional_review_guidance when provided."""
+        gate = MockQualityGate()
+
+        await gate.evaluate(
+            quality_criteria={"Check": "Is it good?"},
+            outputs={"report": "output.md"},
+            project_root=project_root,
+            additional_review_guidance="Look at the job.yml for context.",
+        )
+
+        assert len(gate.evaluations) == 1
+        assert gate.evaluations[0]["additional_review_guidance"] == "Look at the job.yml for context."
+
+    async def test_mock_records_none_guidance_when_omitted(self, project_root: Path) -> None:
+        """Test mock gate records None for guidance when not provided."""
+        gate = MockQualityGate()
+
+        await gate.evaluate(
+            quality_criteria={"Check": "Is it good?"},
+            outputs={"report": "output.md"},
+            project_root=project_root,
+        )
+
+        assert gate.evaluations[0]["additional_review_guidance"] is None
diff --git a/tests/unit/mcp/test_tools.py b/tests/unit/mcp/test_tools.py
index 42e8e80f..f292f162 100644
--- a/tests/unit/mcp/test_tools.py
+++ b/tests/unit/mcp/test_tools.py
@@ -718,3 +718,249 @@ async def test_finished_step_files_type_success(
         )
 
         assert response.status == StepStatus.WORKFLOW_COMPLETE
+
+    async def test_quality_reviewer_receives_only_current_step_outputs(
+        self, project_root: Path, state_manager: StateManager
+    ) -> None:
+        """Test that quality reviewer receives ONLY the current step's outputs.
+
+        Prior step outputs are no longer auto-included as inputs.
+        """
+        # Create a 3-step job: step1 -> step2 -> step3
+        job_dir = project_root / ".deepwork" / "jobs" / "chain_job"
+        job_dir.mkdir(parents=True)
+        (job_dir / "job.yml").write_text(
+            """
+name: chain_job
+version: "1.0.0"
+summary: Three-step chain to test input filtering
+description: Test job
+
+steps:
+  - id: step1
+    name: Step 1
+    description: First step
+    instructions_file: steps/step1.md
+    outputs:
+      step1_output.md:
+        type: file
+        description: Step 1 output
+    reviews: []
+
+  - id: step2
+    name: Step 2
+    description: Second step - takes step1 output
+    instructions_file: steps/step2.md
+    inputs:
+      - file: step1_output.md
+        from_step: step1
+    outputs:
+      step2_output.md:
+        type: file
+        description: Step 2 output
+    dependencies:
+      - step1
+    reviews: []
+
+  - id: step3
+    name: Step 3
+    description: Third step - takes ONLY step2 output (not step1)
+    instructions_file: steps/step3.md
+    inputs:
+      - file: step2_output.md
+        from_step: step2
+    outputs:
+      step3_output.md:
+        type: file
+        description: Step 3 output
+    dependencies:
+      - step2
+    reviews:
+      - run_each: step
+        quality_criteria:
+          "Complete": "Is the output complete?"
+
+workflows:
+  - name: main
+    summary: Main workflow
+    steps:
+      - step1
+      - step2
+      - step3
+"""
+        )
+        steps_dir = job_dir / "steps"
+        steps_dir.mkdir()
+        (steps_dir / "step1.md").write_text("# Step 1\n\nProduce output.")
+        (steps_dir / "step2.md").write_text("# Step 2\n\nProduce output.")
+        (steps_dir / "step3.md").write_text("# Step 3\n\nProduce output.")
+
+        mock_gate = MockQualityGate(should_pass=True)
+        tools = WorkflowTools(
+            project_root=project_root,
+            state_manager=state_manager,
+            quality_gate=mock_gate,
+        )
+
+        # Start workflow
+        await tools.start_workflow(
+            StartWorkflowInput(
+                goal="Test input filtering",
+                job_name="chain_job",
+                workflow_name="main",
+            )
+        )
+
+        # Complete step1
+        (project_root / "step1_output.md").write_text("STEP1_CONTENT_MARKER")
+        await tools.finished_step(
+            FinishedStepInput(outputs={"step1_output.md": "step1_output.md"})
+        )
+
+        # Complete step2
+        (project_root / "step2_output.md").write_text("STEP2_CONTENT_MARKER")
+        await tools.finished_step(
+            FinishedStepInput(outputs={"step2_output.md": "step2_output.md"})
+        )
+
+        # Complete step3 — quality gate runs here
+        (project_root / "step3_output.md").write_text("STEP3_CONTENT_MARKER")
+        response = await tools.finished_step(
+            FinishedStepInput(outputs={"step3_output.md": "step3_output.md"})
+        )
+
+        assert response.status == StepStatus.WORKFLOW_COMPLETE
+
+        # Verify reviewer was called WITHOUT any prior step inputs
+        assert len(mock_gate.evaluations) == 1
+        evaluation = mock_gate.evaluations[0]
+
+        # Should only have the current step's outputs, not inputs from prior steps
+        assert "step3_output.md" in evaluation["outputs"]
+        assert "inputs" not in evaluation, (
+            "Quality reviewer should not receive 'inputs' key — "
+            "prior step outputs are no longer auto-included"
+        )
+
+    async def test_additional_review_guidance_reaches_reviewer(
+        self, project_root: Path, state_manager: StateManager
+    ) -> None:
+        """Test that additional_review_guidance from job.yml is passed to the reviewer."""
+        job_dir = project_root / ".deepwork" / "jobs" / "guided_job"
+        job_dir.mkdir(parents=True)
+        (job_dir / "job.yml").write_text(
+            """
+name: guided_job
+version: "1.0.0"
+summary: Job with review guidance
+description: Test job
+
+steps:
+  - id: write
+    name: Write Report
+    description: Write a report
+    instructions_file: steps/write.md
+    outputs:
+      report.md:
+        type: file
+        description: The report
+    reviews:
+      - run_each: report.md
+        additional_review_guidance: "Read the project README for context on expected format."
+        quality_criteria:
+          "Format Correct": "Does the report follow the expected format?"
+
+workflows:
+  - name: main
+    summary: Main workflow
+    steps:
+      - write
+"""
+        )
+        steps_dir = job_dir / "steps"
+        steps_dir.mkdir()
+        (steps_dir / "write.md").write_text("# Write\n\nWrite the report.")
+
+        mock_gate = MockQualityGate(should_pass=True)
+        tools = WorkflowTools(
+            project_root=project_root,
+            state_manager=state_manager,
+            quality_gate=mock_gate,
+        )
+
+        await tools.start_workflow(
+            StartWorkflowInput(
+                goal="Write report",
+                job_name="guided_job",
+                workflow_name="main",
+            )
+        )
+
+        (project_root / "report.md").write_text("Report content")
+        response = await tools.finished_step(
+            FinishedStepInput(outputs={"report.md": "report.md"})
+        )
+
+        assert response.status == StepStatus.WORKFLOW_COMPLETE
+        assert len(mock_gate.evaluations) == 1
+        assert mock_gate.evaluations[0]["additional_review_guidance"] == (
+            "Read the project README for context on expected format."
+        )
+
+    async def test_review_guidance_in_start_workflow_response(
+        self, project_root: Path, state_manager: StateManager
+    ) -> None:
+        """Test that ReviewInfo in start_workflow response includes guidance."""
+        job_dir = project_root / ".deepwork" / "jobs" / "guided_job2"
+        job_dir.mkdir(parents=True)
+        (job_dir / "job.yml").write_text(
+            """
+name: guided_job2
+version: "1.0.0"
+summary: Job with review guidance
+description: Test job
+
+steps:
+  - id: analyze
+    name: Analyze
+    description: Analyze data
+    instructions_file: steps/analyze.md
+    outputs:
+      analysis.md:
+        type: file
+        description: Analysis output
+    reviews:
+      - run_each: step
+        additional_review_guidance: "Check the raw data directory for completeness."
+        quality_criteria:
+          "Thorough": "Is the analysis thorough?"
+
+workflows:
+  - name: main
+    summary: Main workflow
+    steps:
+      - analyze
+"""
+        )
+        steps_dir = job_dir / "steps"
+        steps_dir.mkdir()
+        (steps_dir / "analyze.md").write_text("# Analyze\n\nAnalyze the data.")
+
+        tools = WorkflowTools(
+            project_root=project_root,
+            state_manager=state_manager,
+        )
+
+        response = await tools.start_workflow(
+            StartWorkflowInput(
+                goal="Analyze data",
+                job_name="guided_job2",
+                workflow_name="main",
+            )
+        )
+
+        reviews = response.begin_step.step_reviews
+        assert len(reviews) == 1
+        assert reviews[0].additional_review_guidance == (
+            "Check the raw data directory for completeness."
+        )

From 0ac59a74a821777e070f2affccf75686a0d9d98d Mon Sep 17 00:00:00 2001
From: Noah Horton <noah@unsupervised.com>
Date: Mon, 9 Feb 2026 08:23:40 -0700
Subject: [PATCH 36/45] Add session_id routing for concurrent workflow safety
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When multiple workflows are active on the stack concurrently, callers can
now pass session_id to finished_step and abort_workflow to target the
correct session instead of always operating on the top-of-stack. This
prevents logical corruption when sub-agents run workflows in parallel.

Changes:
- Add optional session_id field to FinishedStepInput and AbortWorkflowInput
- Add _resolve_session() helper to StateManager for ID-based lookup
- Thread session_id through all StateManager methods and WorkflowTools
- Use filter-based stack removal instead of pop() for mid-stack operations
- Add session_id parameter to MCP server tool registrations
- Add v1.4.0 changelog entry to mcp_interface.md
- Add tests for session_id routing in test_state, test_tools, test_async

Fully backward compatible — omitting session_id preserves top-of-stack behavior.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 .deepwork/jobs/deepwork_jobs/AGENTS.md        |  37 +-
 .deepwork/jobs/deepwork_jobs/job.yml          |  17 +-
 .deepwork/jobs/deepwork_jobs/steps/define.md  |   7 +
 .../jobs/deepwork_jobs/steps/implement.md     |   5 +-
 .deepwork/schemas/job.schema.json             |   7 +-
 doc/mcp_interface.md                          |   6 +-
 library/jobs/spec_driven_development/job.yml  |  11 +
 src/deepwork/core/parser.py                   |   2 +
 src/deepwork/mcp/claude_cli.py                |   8 +-
 src/deepwork/mcp/quality_gate.py              |  96 +++-
 src/deepwork/mcp/schemas.py                   |  20 +-
 src/deepwork/mcp/server.py                    |  20 +-
 src/deepwork/mcp/state.py                     | 102 ++--
 src/deepwork/mcp/tools.py                     |  32 +-
 src/deepwork/schemas/job.schema.json          |   7 +-
 .../standard_jobs/deepwork_jobs/AGENTS.md     |  37 +-
 .../standard_jobs/deepwork_jobs/job.yml       |  23 +-
 .../deepwork_jobs/steps/define.md             |   7 +
 .../deepwork_jobs/steps/implement.md          |   5 +-
 tests/fixtures/jobs/complex_job/job.yml       |   6 +
 .../jobs/concurrent_steps_job/job.yml         |   6 +
 tests/fixtures/jobs/exposed_step_job/job.yml  |   2 +
 tests/fixtures/jobs/fruits/job.yml            |   2 +
 tests/fixtures/jobs/job_with_doc_spec/job.yml |   1 +
 tests/fixtures/jobs/simple_job/job.yml        |   1 +
 tests/unit/mcp/test_async_interface.py        |  58 +++
 tests/unit/mcp/test_quality_gate.py           | 167 +++++++
 tests/unit/mcp/test_schemas.py                |   4 +
 tests/unit/mcp/test_state.py                  | 160 +++++++
 tests/unit/mcp/test_tools.py                  | 440 +++++++++++++++++-
 tests/unit/test_parser.py                     |  59 ++-
 tests/unit/test_validation.py                 |  22 +-
 32 files changed, 1268 insertions(+), 109 deletions(-)

diff --git a/.deepwork/jobs/deepwork_jobs/AGENTS.md b/.deepwork/jobs/deepwork_jobs/AGENTS.md
index 6d97d0e5..576c1185 100644
--- a/.deepwork/jobs/deepwork_jobs/AGENTS.md
+++ b/.deepwork/jobs/deepwork_jobs/AGENTS.md
@@ -18,9 +18,9 @@ This is the source of truth for the `deepwork_jobs` standard job.
 
 2. **Working copy**: `.deepwork/jobs/deepwork_jobs/`
    - Must be updated after changes to source
-   - Used by `deepwork sync` to generate commands
+   - Used by the MCP server at runtime
 
-After making changes to the source, copy files to the working copy:
+After making changes to the source, run `deepwork install` or manually copy:
 ```bash
 cp src/deepwork/standard_jobs/deepwork_jobs/job.yml .deepwork/jobs/deepwork_jobs/
 cp src/deepwork/standard_jobs/deepwork_jobs/steps/*.md .deepwork/jobs/deepwork_jobs/steps/
@@ -37,6 +37,8 @@ deepwork_jobs/
 ├── steps/
 │   ├── define.md          # Define step instructions
 │   ├── implement.md       # Implement step instructions
+│   ├── test.md            # Test step instructions
+│   ├── iterate.md         # Iterate step instructions
 │   ├── learn.md           # Learn step instructions
 │   └── supplemental_file_references.md  # Reference documentation
 └── templates/
@@ -47,6 +49,33 @@ deepwork_jobs/
     └── step_instruction.md.example   # Complete step example
 ```
 
+## Quality Review Learnings
+
+These learnings come from running the `new_job` workflow to create the `github_outreach` job (2026-02-06).
+
+### Review Criteria Must Be Pragmatic
+
+The implement step's review criteria caused 6+ review iterations during the github_outreach job creation. Key problems and fixes:
+
+1. **"Ask Structured Questions" was applied to ALL steps** — even pure analysis/generation steps with no user input. Fixed in v1.4.0: criterion now auto-passes for steps that only have file inputs from prior steps (no name/description user inputs).
+
+2. **"Output Examples" was too strict** — demanded concrete filled-in examples in every step file, even when a template structure with `[bracket placeholders]` was sufficient. Fixed in v1.4.0: renamed to "Output Format Examples" and accepts templates. Concrete examples are encouraged but not required.
+
+3. **Contradictory review results** — In one case, all 6 individual criteria passed but the overall review still returned `needs_work`. This appears to be a reviewer model issue where the summary contradicts the per-criterion assessments. Added `additional_review_guidance` to clarify when criteria should auto-pass.
+
+### Quality Review Timeouts on Large Outputs
+
+Steps producing many files (25 analysis files) or very long files (700+ line playbook) exceeded the 120-second MCP timeout during quality review. The `quality_review_override_reason` parameter was needed to bypass these.
+
+Mitigation strategies documented in `define.md`:
+- Use `run_each: step` instead of `run_each: <files_output>` for steps with many files
+- Keep review criteria efficient to evaluate
+- Note expected output volume in step descriptions
+
+### Dependency Validation Gaps
+
+The github_outreach `final_report` step had `analyze_repos` as a file input but was missing it from the `dependencies` list. This was caught at workflow start time but could have been caught earlier during the `implement` step. The define step's validation rules already mention this (`from_step must be in dependencies`) but it was missed during creation.
+
 ## Version Management
 
 - Version is tracked in `job.yml`
@@ -56,5 +85,5 @@ deepwork_jobs/
 
 ## Last Updated
 
-- Date: 2026-01-15
-- From conversation about: Adding make_new_job.sh script and templates directory
+- Date: 2026-02-06
+- From conversation about: Learn workflow analyzing severe quality review issues in the new_job execution
diff --git a/.deepwork/jobs/deepwork_jobs/job.yml b/.deepwork/jobs/deepwork_jobs/job.yml
index 900bf1e7..31beb6d8 100644
--- a/.deepwork/jobs/deepwork_jobs/job.yml
+++ b/.deepwork/jobs/deepwork_jobs/job.yml
@@ -1,6 +1,6 @@
 # yaml-language-server: $schema=.deepwork/schemas/job.schema.json
 name: deepwork_jobs
-version: "1.3.0"
+version: "1.4.0"
 summary: "Creates and manages multi-step AI workflows. Use when defining, implementing, testing, or improving DeepWork jobs."
 description: |
   Core commands for managing DeepWork jobs. These commands help you define new multi-step
@@ -38,6 +38,8 @@ workflows:
       - learn
 
 changelog:
+  - version: "1.4.0"
+    changes: "Fixed implement step review criteria that caused severe friction: 'Ask Structured Questions' now auto-passes for steps without user inputs; 'Output Examples' renamed to 'Output Format Examples' and accepts template placeholders; added detailed review guidance to prevent misapplication of criteria"
   - version: "1.3.0"
     changes: "Migrated quality_criteria to reviews system with run_each targeting and map-format criteria"
   - version: "1.2.1"
@@ -100,14 +102,19 @@ steps:
       - define
     reviews:
       - run_each: step_instruction_files
-        additional_review_guidance: "Read the job.yml file in the same job directory for context on how this instruction file fits into the larger workflow."
+        additional_review_guidance: |
+          Read the job.yml file in the same job directory for context on how this instruction file fits into the larger workflow.
+          IMPORTANT review guidance:
+          - "Ask Structured Questions" only applies to steps whose job.yml inputs include user-provided parameters (name/description inputs, NOT file inputs from prior steps). If a step only has file inputs from prior steps, this criterion automatically passes.
+          - "Output Format Examples" requires a markdown code block showing the output structure. A template with [bracket placeholders] is sufficient — a fully filled-in example with realistic data is better but not required.
+          - Apply criteria pragmatically. If a criterion is not applicable to this step's purpose, pass it.
         quality_criteria:
           "Complete Instructions": "Is the instruction file complete (no stubs or placeholders)?"
           "Specific & Actionable": "Are instructions tailored to the step's purpose, not generic?"
-          "Output Examples": "Does the instruction file show what good output looks like?"
+          "Output Format Examples": "Does the instruction file include a markdown code block showing the expected output structure (template with placeholders is acceptable)?"
           "Quality Criteria": "Does the instruction file define quality criteria for its outputs?"
-          "Ask Structured Questions": "Do instructions that gather user input explicitly use the phrase 'ask structured questions'?"
-          "Prompt Engineering": "Does the instructions file following Anthropics Best Practices for Prompt Engineering?"
+          "Ask Structured Questions": "If this step gathers user input (has name/description inputs in job.yml, not just file inputs), do instructions explicitly use the phrase 'ask structured questions'? If the step has no user inputs, this criterion passes automatically."
+          "Prompt Engineering": "Does the instruction file follow Anthropic's best practices for prompt engineering?"
 
   - id: test
     name: "Test the New Workflow"
diff --git a/.deepwork/jobs/deepwork_jobs/steps/define.md b/.deepwork/jobs/deepwork_jobs/steps/define.md
index 977872f0..0630d8f4 100644
--- a/.deepwork/jobs/deepwork_jobs/steps/define.md
+++ b/.deepwork/jobs/deepwork_jobs/steps/define.md
@@ -213,6 +213,13 @@ reviews:
 - Steps with subjective quality criteria that benefit from AI self-review
 - Steps producing multiple files where each file needs individual review
 
+**Quality review timeout considerations:**
+When a step produces many files (`type: files` with 15+ items) or very large files (500+ lines), quality reviews may hit the MCP timeout (120 seconds). For these steps:
+- Keep review criteria focused and efficient to evaluate
+- Consider using `run_each: step` (reviews all outputs together once) instead of `run_each: <output_name>` for `files`-type outputs with many items, since the latter runs a separate review per file
+- The agent can use `quality_review_override_reason` to bypass a timed-out review, but this loses the quality gate benefit
+- If a step is expected to produce many files, note this in the step description so agents can plan accordingly
+
 **For steps with no quality checks needed, use an empty reviews list:**
 ```yaml
 reviews: []
diff --git a/.deepwork/jobs/deepwork_jobs/steps/implement.md b/.deepwork/jobs/deepwork_jobs/steps/implement.md
index 10880176..73eeb365 100644
--- a/.deepwork/jobs/deepwork_jobs/steps/implement.md
+++ b/.deepwork/jobs/deepwork_jobs/steps/implement.md
@@ -44,11 +44,12 @@ For each step in the job.yml, create a comprehensive instruction file at `.deepw
 
 1. **Use the job description** - The detailed description from job.yml provides crucial context
 2. **Be specific** - Don't write generic instructions; tailor them to the step's purpose
-3. **Provide examples** - Show what good output looks like
+3. **Provide output format examples** - Include a markdown code block in an "Output Format" section showing the expected file structure. A template with `[bracket placeholders]` is acceptable. For complex outputs, also include a concrete filled-in example showing realistic data — this is especially valuable for the first step in a workflow where there's no prior output to reference.
 4. **Explain the "why"** - Help the user understand the step's role in the workflow
 5. **Quality over quantity** - Detailed, actionable instructions are better than vague ones
 6. **Align with reviews** - If the step has `reviews` defined, ensure the quality criteria in the instruction file match the review criteria
-7. **Ask structured questions** - When a step has user inputs, the instructions MUST explicitly tell the agent to "ask structured questions" using the AskUserQuestion tool to gather that information. Never use generic phrasing like "ask the user" - always use "ask structured questions"
+7. **Ask structured questions (when applicable)** - When a step has user-provided inputs (name/description inputs in job.yml), the instructions MUST explicitly tell the agent to "ask structured questions" using the AskUserQuestion tool. Steps that only have file inputs from prior steps do NOT need this phrase — they process data without user interaction.
+8. **Handle edge cases** - If inputs might be missing, ambiguous, or incomplete, tell the agent to ask structured questions to clarify how to proceed rather than guessing
 
 ### Handling Reviews
 
diff --git a/.deepwork/schemas/job.schema.json b/.deepwork/schemas/job.schema.json
index e0098056..4f2227cc 100644
--- a/.deepwork/schemas/job.schema.json
+++ b/.deepwork/schemas/job.schema.json
@@ -273,7 +273,8 @@
       "type": "object",
       "required": [
         "type",
-        "description"
+        "description",
+        "required"
       ],
       "additionalProperties": false,
       "description": "Output specification with type information indicating single file or multiple files",
@@ -290,6 +291,10 @@
           "type": "string",
           "minLength": 1,
           "description": "Description of what this output contains"
+        },
+        "required": {
+          "type": "boolean",
+          "description": "Whether this output must be provided when calling finished_step. If false, the output is optional and can be omitted."
         }
       }
     },
diff --git a/doc/mcp_interface.md b/doc/mcp_interface.md
index b5d04ddb..2cc5014b 100644
--- a/doc/mcp_interface.md
+++ b/doc/mcp_interface.md
@@ -78,9 +78,10 @@ Report that you've finished a workflow step. Validates outputs against quality c
 
 | Parameter | Type | Required | Description |
 |-----------|------|----------|-------------|
-| `outputs` | `Record<string, string \| string[]>` | Yes | Map of output names to file path(s). For outputs declared as type `file`: pass a single string path (e.g. `"report.md"`). For outputs declared as type `files`: pass a list of string paths (e.g. `["a.md", "b.md"]`). Check `step_expected_outputs` to see each output's declared type. |
+| `outputs` | `Record<string, string \| string[]>` | Yes | Map of output names to file path(s). For outputs declared as type `file`: pass a single string path (e.g. `"report.md"`). For outputs declared as type `files`: pass a list of string paths (e.g. `["a.md", "b.md"]`). Outputs with `required: false` can be omitted. Check `step_expected_outputs` to see each output's declared type and required status. |
 | `notes` | `string \| null` | No | Optional notes about work done |
 | `quality_review_override_reason` | `string \| null` | No | If provided, skips quality review (must explain why) |
+| `session_id` | `string \| null` | No | Target a specific workflow session by ID. Use when multiple workflows are active concurrently. If omitted, operates on the top-of-stack session. The session_id is returned in `ActiveStepInfo` from `start_workflow` and `finished_step`. |
 
 #### Returns
 
@@ -117,6 +118,7 @@ Abort the current workflow and return to the parent workflow (if nested). Use th
 | Parameter | Type | Required | Description |
 |-----------|------|----------|-------------|
 | `explanation` | `string` | Yes | Why the workflow is being aborted |
+| `session_id` | `string \| null` | No | Target a specific workflow session by ID. Use when multiple workflows are active concurrently. If omitted, aborts the top-of-stack session. |
 
 #### Returns
 
@@ -140,6 +142,7 @@ interface ExpectedOutput {
   name: string;                    // Output name (use as key in finished_step outputs)
   type: string;                    // "file" or "files"
   description: string;             // What this output should contain
+  required: boolean;               // If false, this output can be omitted from finished_step
   syntax_for_finished_step_tool: string; // Value format hint:
                                          //   "filepath" for type "file"
                                          //   "array of filepaths for all individual files" for type "files"
@@ -328,6 +331,7 @@ Add to your `.mcp.json`:
 
 | Version | Changes |
 |---------|---------|
+| 1.4.0 | Added optional `session_id` parameter to `finished_step` and `abort_workflow` for concurrent workflow safety. When multiple workflows are active on the stack, callers can pass the `session_id` (returned in `ActiveStepInfo`) to target the correct session. Fully backward compatible — omitting `session_id` preserves existing top-of-stack behavior. |
 | 1.3.0 | `step_expected_outputs` changed from `string[]` to `ExpectedOutput[]` — each entry includes `name`, `type`, `description`, and `syntax_for_finished_step_tool` so agents know exactly what format to use when calling `finished_step`. |
 | 1.2.0 | Quality gate now includes input files from prior steps in review payload with BEGIN INPUTS/END INPUTS and BEGIN OUTPUTS/END OUTPUTS section headers. Binary files (PDFs, etc.) get a placeholder instead of raw content. |
 | 1.1.0 | Added `abort_workflow` tool, `stack` field in all responses, `ReviewInfo`/`ReviewResult` types, typed outputs as `Record<string, string \| string[]>` |
diff --git a/library/jobs/spec_driven_development/job.yml b/library/jobs/spec_driven_development/job.yml
index e7ae3738..6d575879 100644
--- a/library/jobs/spec_driven_development/job.yml
+++ b/library/jobs/spec_driven_development/job.yml
@@ -44,6 +44,7 @@ steps:
       constitution.md:
         type: file
         description: "Foundational governance principles and development guidelines"
+        required: true
     dependencies: []
     quality_criteria:
       - "**Priorities Captured**: Did the agent gather specific development priorities from the user?"
@@ -68,6 +69,7 @@ steps:
       spec.md:
         type: file
         description: "Functional requirements as user stories without technology choices"
+        required: true
     dependencies:
       - constitution
     quality_criteria:
@@ -91,6 +93,7 @@ steps:
       spec.md:
         type: file
         description: "Updated specification with clarifications and resolved ambiguities"
+        required: true
     dependencies:
       - specify
     quality_criteria:
@@ -117,18 +120,23 @@ steps:
       plan.md:
         type: file
         description: "Technical implementation strategy including architecture and technology choices"
+        required: true
       data-model.md:
         type: file
         description: "Data model documentation with all entities and relationships"
+        required: true
       api-spec.json:
         type: file
         description: "API endpoint definitions with request/response schemas"
+        required: true
       research.md:
         type: file
         description: "Research findings and technology evaluations"
+        required: true
       architecture.md:
         type: file
         description: "Updated project architecture document"
+        required: true
     dependencies:
       - clarify
     quality_criteria:
@@ -158,6 +166,7 @@ steps:
       tasks.md:
         type: file
         description: "Actionable, ordered development tasks organized by user story"
+        required: true
     dependencies:
       - plan
     quality_criteria:
@@ -188,9 +197,11 @@ steps:
       source_files:
         type: files
         description: "Implementation source files as specified in tasks"
+        required: true
       test_files:
         type: files
         description: "Test files as specified in tasks"
+        required: true
     dependencies:
       - tasks
     quality_criteria:
diff --git a/src/deepwork/core/parser.py b/src/deepwork/core/parser.py
index 0e7503ec..09748ac1 100644
--- a/src/deepwork/core/parser.py
+++ b/src/deepwork/core/parser.py
@@ -56,6 +56,7 @@ class OutputSpec:
     name: str
     type: str  # "file" or "files"
     description: str
+    required: bool
 
     @classmethod
     def from_dict(cls, name: str, data: dict[str, Any]) -> "OutputSpec":
@@ -64,6 +65,7 @@ def from_dict(cls, name: str, data: dict[str, Any]) -> "OutputSpec":
             name=name,
             type=data["type"],
             description=data["description"],
+            required=data["required"],
         )
 
 
diff --git a/src/deepwork/mcp/claude_cli.py b/src/deepwork/mcp/claude_cli.py
index 55d5d118..a64a2b24 100644
--- a/src/deepwork/mcp/claude_cli.py
+++ b/src/deepwork/mcp/claude_cli.py
@@ -127,6 +127,7 @@ async def run(
         system_prompt: str,
         json_schema: dict[str, Any],
         cwd: Path | None = None,
+        timeout: int | None = None,
     ) -> dict[str, Any]:
         """Run Claude CLI and return the structured output.
 
@@ -135,6 +136,8 @@ async def run(
             system_prompt: System instructions for the CLI
             json_schema: JSON schema enforcing structured output conformance
             cwd: Working directory for the subprocess
+            timeout: Override instance timeout for this call (seconds).
+                     If None, uses the instance default.
 
         Returns:
             The parsed structured_output dict from Claude CLI
@@ -142,6 +145,7 @@ async def run(
         Raises:
             ClaudeCLIError: If the subprocess fails or output cannot be parsed
         """
+        effective_timeout = timeout if timeout is not None else self.timeout
         cmd = self._build_command(system_prompt, json_schema)
 
         try:
@@ -156,13 +160,13 @@ async def run(
             try:
                 stdout, stderr = await asyncio.wait_for(
                     process.communicate(input=prompt.encode()),
-                    timeout=self.timeout,
+                    timeout=effective_timeout,
                 )
             except TimeoutError:
                 process.kill()
                 await process.wait()
                 raise ClaudeCLIError(
-                    f"Claude CLI timed out after {self.timeout} seconds"
+                    f"Claude CLI timed out after {effective_timeout} seconds"
                 ) from None
 
             if process.returncode != 0:
diff --git a/src/deepwork/mcp/quality_gate.py b/src/deepwork/mcp/quality_gate.py
index 496d8b1f..7bfc2ad3 100644
--- a/src/deepwork/mcp/quality_gate.py
+++ b/src/deepwork/mcp/quality_gate.py
@@ -137,7 +137,8 @@ def _build_instructions(
 ## Guidelines
 
 - Be strict but fair
-- Only mark a criterion as passed if it is clearly met
+- Apply criteria pragmatically. If a criterion is not applicable to this step's purpose, pass it.
+- Only mark a criterion as passed if it is clearly met or if it is not applicable.
 - Provide specific, actionable feedback for failed criteria
 - The overall "passed" should be true only if ALL criteria pass"""
 
@@ -198,6 +199,50 @@ async def _read_file_sections(
 
         return sections
 
+    # =========================================================================
+    # WARNING: REVIEW PERFORMANCE IS SENSITIVE TO PAYLOAD SIZE
+    #
+    # The payload builder below sends file contents to the review agent (Claude
+    # CLI subprocess). Reviews can get REALLY SLOW if the content gets too big:
+    #
+    # - Each file's full content is read and embedded in the prompt
+    # - The review agent must process ALL of this content to evaluate criteria
+    # - Large payloads (25+ files, or files with 500+ lines each) can cause
+    #   the review to approach or exceed its timeout
+    # - Per-file reviews (run_each: <output_name> with type: files) multiply
+    #   the problem — each file gets its own review subprocess
+    #
+    # To mitigate this, when more than MAX_INLINE_FILES files are present,
+    # the payload switches to a path-listing mode that only shows file paths
+    # instead of dumping all contents inline. The reviewer can then use its
+    # own tools to read specific files as needed.
+    #
+    # If you're changing the payload builder, keep payload size in mind.
+    # =========================================================================
+
+    # Maximum number of files to include inline in the review payload.
+    # Beyond this threshold, only file paths are listed.
+    MAX_INLINE_FILES = 5
+
+    @staticmethod
+    def _build_path_listing(file_paths: dict[str, str | list[str]]) -> list[str]:
+        """Build a path-only listing for large file sets.
+
+        Args:
+            file_paths: Map of names to file path(s)
+
+        Returns:
+            List of formatted path entries
+        """
+        lines: list[str] = []
+        for name, value in file_paths.items():
+            if isinstance(value, list):
+                for path in value:
+                    lines.append(f"- {path}  (output: {name})")
+            else:
+                lines.append(f"- {value}  (output: {name})")
+        return lines
+
     async def _build_payload(
         self,
         outputs: dict[str, str | list[str]],
@@ -205,21 +250,36 @@ async def _build_payload(
     ) -> str:
         """Build the user prompt payload with output file contents.
 
+        When the total number of files exceeds MAX_INLINE_FILES, the payload
+        lists file paths instead of embedding full contents to avoid slow reviews.
+
         Args:
             outputs: Map of output names to file path(s)
             project_root: Project root path for reading files
 
         Returns:
-            Formatted payload with output file contents
+            Formatted payload with output file contents or path listing
         """
         parts: list[str] = []
+        total_files = len(self._flatten_output_paths(outputs))
 
-        # Build outputs section
-        output_sections = await self._read_file_sections(outputs, project_root)
-        if output_sections:
+        if total_files > self.MAX_INLINE_FILES:
+            # Too many files — list paths only so the reviewer reads selectively
+            path_lines = self._build_path_listing(outputs)
             parts.append(f"{SECTION_SEPARATOR} BEGIN OUTPUTS {SECTION_SEPARATOR}")
-            parts.extend(output_sections)
+            parts.append(
+                f"[{total_files} files — too many to include inline. "
+                f"Paths listed below. Read files as needed to evaluate criteria.]"
+            )
+            parts.extend(path_lines)
             parts.append(f"{SECTION_SEPARATOR} END OUTPUTS {SECTION_SEPARATOR}")
+        else:
+            # Build outputs section with full content
+            output_sections = await self._read_file_sections(outputs, project_root)
+            if output_sections:
+                parts.append(f"{SECTION_SEPARATOR} BEGIN OUTPUTS {SECTION_SEPARATOR}")
+                parts.extend(output_sections)
+                parts.append(f"{SECTION_SEPARATOR} END OUTPUTS {SECTION_SEPARATOR}")
 
         if not parts:
             return "[No files provided]"
@@ -260,6 +320,25 @@ def _parse_result(self, data: dict[str, Any]) -> QualityGateResult:
                 f"Data was: {data}"
             ) from e
 
+    @staticmethod
+    def compute_timeout(file_count: int) -> int:
+        """Compute dynamic timeout based on number of files.
+
+        Base timeout is 120 seconds. For every file beyond the first 5,
+        add 30 seconds. Examples:
+          - 3 files  -> 120s
+          - 5 files  -> 120s
+          - 10 files -> 120 + 30*5 = 270s (4.5 min)
+          - 20 files -> 120 + 30*15 = 570s (9.5 min)
+
+        Args:
+            file_count: Total number of files being reviewed
+
+        Returns:
+            Timeout in seconds
+        """
+        return 120 + 30 * max(0, file_count - 5)
+
     async def evaluate(
         self,
         quality_criteria: dict[str, str],
@@ -298,6 +377,10 @@ async def evaluate(
         )
         payload = await self._build_payload(outputs, project_root)
 
+        # Dynamic timeout: more files = more time for the reviewer
+        file_count = len(self._flatten_output_paths(outputs))
+        timeout = self.compute_timeout(file_count)
+
         from deepwork.mcp.claude_cli import ClaudeCLIError
 
         try:
@@ -306,6 +389,7 @@ async def evaluate(
                 system_prompt=instructions,
                 json_schema=QUALITY_GATE_RESPONSE_SCHEMA,
                 cwd=project_root,
+                timeout=timeout,
             )
         except ClaudeCLIError as e:
             raise QualityGateError(str(e)) from e
diff --git a/src/deepwork/mcp/schemas.py b/src/deepwork/mcp/schemas.py
index 2015251f..3ff22731 100644
--- a/src/deepwork/mcp/schemas.py
+++ b/src/deepwork/mcp/schemas.py
@@ -97,7 +97,8 @@ class FinishedStepInput(BaseModel):
             "Map of output names to file path(s). "
             "For outputs declared as type 'file': pass a single string path (e.g. \"report.md\"). "
             "For outputs declared as type 'files': pass a list of string paths (e.g. [\"a.md\", \"b.md\"]). "
-            "Check step_expected_outputs from start_workflow/finished_step response to see each output's type."
+            "Outputs with required: false can be omitted from this map. "
+            "Check step_expected_outputs from start_workflow/finished_step response to see each output's type and required status."
         )
     )
     notes: str | None = Field(default=None, description="Optional notes about work done")
@@ -105,12 +106,28 @@ class FinishedStepInput(BaseModel):
         default=None,
         description="If provided, skips the quality gate review. Must explain why the review is being bypassed.",
     )
+    session_id: str | None = Field(
+        default=None,
+        description=(
+            "Optional session ID to target a specific workflow session. "
+            "Use this when multiple workflows are active concurrently to ensure "
+            "the correct session is updated. If omitted, operates on the top-of-stack session."
+        ),
+    )
 
 
 class AbortWorkflowInput(BaseModel):
     """Input for abort_workflow tool."""
 
     explanation: str = Field(description="Explanation of why the workflow is being aborted")
+    session_id: str | None = Field(
+        default=None,
+        description=(
+            "Optional session ID to target a specific workflow session. "
+            "Use this when multiple workflows are active concurrently to ensure "
+            "the correct session is aborted. If omitted, aborts the top-of-stack session."
+        ),
+    )
 
 
 # =============================================================================
@@ -176,6 +193,7 @@ class ExpectedOutput(BaseModel):
     name: str = Field(description="Output name (use as key in finished_step outputs)")
     type: str = Field(description="Output type: 'file' or 'files'")
     description: str = Field(description="What this output should contain")
+    required: bool = Field(description="Whether this output must be provided. If false, it can be omitted from finished_step outputs.")
     syntax_for_finished_step_tool: str = Field(
         description="The value format to use for this output when calling finished_step"
     )
diff --git a/src/deepwork/mcp/server.py b/src/deepwork/mcp/server.py
index 73229c66..4a227ed1 100644
--- a/src/deepwork/mcp/server.py
+++ b/src/deepwork/mcp/server.py
@@ -152,15 +152,19 @@ async def start_workflow(
             "Required: outputs (map of output names to file paths created). "
             "For outputs with type 'file': pass a single string path. "
             "For outputs with type 'files': pass a list of string paths. "
-            "Check step_expected_outputs in the response to see each output's type. "
+            "Outputs marked required: true must be provided; required: false outputs can be omitted. "
+            "Check step_expected_outputs in the response to see each output's type and required status. "
             "Optional: notes about work done. "
-            "Optional: quality_review_override_reason to skip quality review (must explain why)."
+            "Optional: quality_review_override_reason to skip quality review (must explain why). "
+            "Optional: session_id to target a specific workflow session "
+            "(use when multiple workflows are active concurrently)."
         )
     )
     async def finished_step(
         outputs: dict[str, str | list[str]],
         notes: str | None = None,
         quality_review_override_reason: str | None = None,
+        session_id: str | None = None,
     ) -> dict[str, Any]:
         """Report step completion and get next instructions."""
         _log_tool_call(
@@ -169,12 +173,14 @@ async def finished_step(
                 "outputs": outputs,
                 "notes": notes,
                 "quality_review_override_reason": quality_review_override_reason,
+                "session_id": session_id,
             },
         )
         input_data = FinishedStepInput(
             outputs=outputs,
             notes=notes,
             quality_review_override_reason=quality_review_override_reason,
+            session_id=session_id,
         )
         response = await tools.finished_step(input_data)
         return response.model_dump()
@@ -184,15 +190,21 @@ async def finished_step(
             "Abort the current workflow and return to the parent workflow (if nested). "
             "Use this when a workflow cannot be completed and needs to be abandoned. "
             "Required: explanation (why the workflow is being aborted). "
+            "Optional: session_id to target a specific workflow session "
+            "(use when multiple workflows are active concurrently). "
             "Returns the aborted workflow info and the resumed parent workflow (if any)."
         )
     )
     async def abort_workflow(
         explanation: str,
+        session_id: str | None = None,
     ) -> dict[str, Any]:
         """Abort the current workflow and return to parent."""
-        _log_tool_call("abort_workflow", {"explanation": explanation})
-        input_data = AbortWorkflowInput(explanation=explanation)
+        _log_tool_call(
+            "abort_workflow",
+            {"explanation": explanation, "session_id": session_id},
+        )
+        input_data = AbortWorkflowInput(explanation=explanation, session_id=session_id)
         response = await tools.abort_workflow(input_data)
         return response.model_dump()
 
diff --git a/src/deepwork/mcp/state.py b/src/deepwork/mcp/state.py
index a8f2c54c..04114452 100644
--- a/src/deepwork/mcp/state.py
+++ b/src/deepwork/mcp/state.py
@@ -185,17 +185,41 @@ def require_active_session(self) -> WorkflowSession:
             raise StateError("No active workflow session. Use start_workflow to begin a workflow.")
         return self._session_stack[-1]
 
-    async def start_step(self, step_id: str) -> None:
+    def _resolve_session(self, session_id: str | None = None) -> WorkflowSession:
+        """Resolve a session by ID or fall back to top-of-stack.
+
+        This is used internally (called inside locked blocks or sync methods)
+        to find a specific session when session_id is provided, or fall back
+        to the default top-of-stack behavior.
+
+        Args:
+            session_id: Optional session ID to look up. If None, returns top-of-stack.
+
+        Returns:
+            WorkflowSession matching the ID, or the active (top-of-stack) session.
+
+        Raises:
+            StateError: If session_id is provided but not found, or no active session.
+        """
+        if session_id:
+            for s in self._session_stack:
+                if s.session_id == session_id:
+                    return s
+            raise StateError(f"Session '{session_id}' not found in active stack")
+        return self.require_active_session()
+
+    async def start_step(self, step_id: str, session_id: str | None = None) -> None:
         """Mark a step as started.
 
         Args:
             step_id: Step ID to start
+            session_id: Optional session ID to target a specific session
 
         Raises:
-            StateError: If no active session
+            StateError: If no active session or session_id not found
         """
         async with self._lock:
-            session = self.require_active_session()
+            session = self._resolve_session(session_id)
             now = datetime.now(UTC).isoformat()
 
             if step_id not in session.step_progress:
@@ -210,7 +234,11 @@ async def start_step(self, step_id: str) -> None:
             await self._save_session_unlocked(session)
 
     async def complete_step(
-        self, step_id: str, outputs: dict[str, str | list[str]], notes: str | None = None
+        self,
+        step_id: str,
+        outputs: dict[str, str | list[str]],
+        notes: str | None = None,
+        session_id: str | None = None,
     ) -> None:
         """Mark a step as completed.
 
@@ -218,12 +246,13 @@ async def complete_step(
             step_id: Step ID to complete
             outputs: Map of output names to file path(s)
             notes: Optional notes
+            session_id: Optional session ID to target a specific session
 
         Raises:
-            StateError: If no active session
+            StateError: If no active session or session_id not found
         """
         async with self._lock:
-            session = self.require_active_session()
+            session = self._resolve_session(session_id)
             now = datetime.now(UTC).isoformat()
 
             if step_id not in session.step_progress:
@@ -239,20 +268,21 @@ async def complete_step(
 
             await self._save_session_unlocked(session)
 
-    async def record_quality_attempt(self, step_id: str) -> int:
+    async def record_quality_attempt(self, step_id: str, session_id: str | None = None) -> int:
         """Record a quality gate attempt for a step.
 
         Args:
             step_id: Step ID
+            session_id: Optional session ID to target a specific session
 
         Returns:
             Total number of attempts for this step
 
         Raises:
-            StateError: If no active session
+            StateError: If no active session or session_id not found
         """
         async with self._lock:
-            session = self.require_active_session()
+            session = self._resolve_session(session_id)
 
             if step_id not in session.step_progress:
                 session.step_progress[step_id] = StepProgress(step_id=step_id)
@@ -262,83 +292,99 @@ async def record_quality_attempt(self, step_id: str) -> int:
 
             return session.step_progress[step_id].quality_attempts
 
-    async def advance_to_step(self, step_id: str, entry_index: int) -> None:
+    async def advance_to_step(
+        self, step_id: str, entry_index: int, session_id: str | None = None
+    ) -> None:
         """Advance the session to a new step.
 
         Args:
             step_id: New current step ID
             entry_index: Index in workflow step_entries
+            session_id: Optional session ID to target a specific session
 
         Raises:
-            StateError: If no active session
+            StateError: If no active session or session_id not found
         """
         async with self._lock:
-            session = self.require_active_session()
+            session = self._resolve_session(session_id)
             session.current_step_id = step_id
             session.current_entry_index = entry_index
             await self._save_session_unlocked(session)
 
-    async def complete_workflow(self) -> WorkflowSession | None:
-        """Mark the workflow as complete and pop from stack.
+    async def complete_workflow(self, session_id: str | None = None) -> WorkflowSession | None:
+        """Mark the workflow as complete and remove from stack.
+
+        Args:
+            session_id: Optional session ID to target a specific session.
+                If omitted, completes the top-of-stack session.
 
         Returns:
-            The new active session after popping, or None if stack is empty
+            The new active session after removal, or None if stack is empty
 
         Raises:
-            StateError: If no active session
+            StateError: If no active session or session_id not found
         """
         async with self._lock:
-            session = self.require_active_session()
+            session = self._resolve_session(session_id)
             now = datetime.now(UTC).isoformat()
             session.completed_at = now
             session.status = "completed"
             await self._save_session_unlocked(session)
 
-            # Pop completed session from stack
-            self._session_stack.pop()
+            # Remove completed session from stack (filter, not pop, for mid-stack removal)
+            self._session_stack = [
+                s for s in self._session_stack if s.session_id != session.session_id
+            ]
 
             # Return new active session (if any)
             return self._session_stack[-1] if self._session_stack else None
 
     async def abort_workflow(
-        self, explanation: str
+        self, explanation: str, session_id: str | None = None
     ) -> tuple[WorkflowSession, WorkflowSession | None]:
-        """Abort the current workflow and pop from stack.
+        """Abort a workflow and remove from stack.
 
         Args:
             explanation: Reason for aborting the workflow
+            session_id: Optional session ID to target a specific session.
+                If omitted, aborts the top-of-stack session.
 
         Returns:
             Tuple of (aborted session, new active session or None)
 
         Raises:
-            StateError: If no active session
+            StateError: If no active session or session_id not found
         """
         async with self._lock:
-            session = self.require_active_session()
+            session = self._resolve_session(session_id)
             now = datetime.now(UTC).isoformat()
             session.completed_at = now
             session.status = "aborted"
             session.abort_reason = explanation
             await self._save_session_unlocked(session)
 
-            # Pop aborted session from stack
-            self._session_stack.pop()
+            # Remove aborted session from stack (filter, not pop, for mid-stack removal)
+            self._session_stack = [
+                s for s in self._session_stack if s.session_id != session.session_id
+            ]
 
             # Return aborted session and new active session (if any)
             new_active = self._session_stack[-1] if self._session_stack else None
             return session, new_active
 
-    def get_all_outputs(self) -> dict[str, str | list[str]]:
+    def get_all_outputs(self, session_id: str | None = None) -> dict[str, str | list[str]]:
         """Get all outputs from all completed steps.
 
+        Args:
+            session_id: Optional session ID to target a specific session
+
         Returns:
             Merged dict of all output names to file path(s)
 
         Raises:
-            StateError: If no active session
+            StateError: If no active session or session_id not found
         """
-        session = self.require_active_session()
+        session = self._resolve_session(session_id)
         all_outputs: dict[str, str | list[str]] = {}
         for progress in session.step_progress.values():
             all_outputs.update(progress.outputs)
diff --git a/src/deepwork/mcp/tools.py b/src/deepwork/mcp/tools.py
index 32037e7f..51caf7b7 100644
--- a/src/deepwork/mcp/tools.py
+++ b/src/deepwork/mcp/tools.py
@@ -222,12 +222,13 @@ def _validate_outputs(
                 f"Declared outputs: {', '.join(sorted(declared_names))}"
             )
 
-        # Check for missing output keys
-        missing = declared_names - submitted_names
+        # Check for missing required output keys
+        required_names = {spec.name for spec in declared if spec.required}
+        missing = required_names - submitted_names
         if missing:
             raise ToolError(
                 f"Missing required outputs: {', '.join(sorted(missing))}. "
-                f"All declared outputs must be provided."
+                f"All required outputs must be provided."
             )
 
         # Validate types and file existence
@@ -276,6 +277,7 @@ def _build_expected_outputs(outputs: list[OutputSpec]) -> list[ExpectedOutput]:
                 name=out.name,
                 type=out.type,
                 description=out.description,
+                required=out.required,
                 syntax_for_finished_step_tool=syntax_map.get(out.type, out.type),
             )
             for out in outputs
@@ -370,7 +372,8 @@ async def finished_step(self, input_data: FinishedStepInput) -> FinishedStepResp
             StateError: If no active session
             ToolError: If quality gate fails after max attempts
         """
-        session = self.state_manager.require_active_session()
+        session = self.state_manager._resolve_session(input_data.session_id)
+        sid = session.session_id
         current_step_id = session.current_step_id
 
         # Load job and workflow
@@ -390,7 +393,9 @@ async def finished_step(self, input_data: FinishedStepInput) -> FinishedStepResp
             and current_step.reviews
             and not input_data.quality_review_override_reason
         ):
-            attempts = await self.state_manager.record_quality_attempt(current_step_id)
+            attempts = await self.state_manager.record_quality_attempt(
+                current_step_id, session_id=sid
+            )
 
             # Build output specs map for evaluate_reviews
             output_specs = {out.name: out.type for out in current_step.outputs}
@@ -433,6 +438,7 @@ async def finished_step(self, input_data: FinishedStepInput) -> FinishedStepResp
             step_id=current_step_id,
             outputs=input_data.outputs,
             notes=input_data.notes,
+            session_id=sid,
         )
 
         # Find next step
@@ -440,9 +446,9 @@ async def finished_step(self, input_data: FinishedStepInput) -> FinishedStepResp
         next_entry_index = current_entry_index + 1
 
         if next_entry_index >= len(workflow.step_entries):
-            # Workflow complete - get outputs before completing (which pops from stack)
-            all_outputs = self.state_manager.get_all_outputs()
-            await self.state_manager.complete_workflow()
+            # Workflow complete - get outputs before completing (which removes from stack)
+            all_outputs = self.state_manager.get_all_outputs(session_id=sid)
+            await self.state_manager.complete_workflow(session_id=sid)
 
             return FinishedStepResponse(
                 status=StepStatus.WORKFLOW_COMPLETE,
@@ -463,8 +469,10 @@ async def finished_step(self, input_data: FinishedStepInput) -> FinishedStepResp
             raise ToolError(f"Next step not found: {next_step_id}")
 
         # Advance session
-        await self.state_manager.advance_to_step(next_step_id, next_entry_index)
-        await self.state_manager.start_step(next_step_id)
+        await self.state_manager.advance_to_step(
+            next_step_id, next_entry_index, session_id=sid
+        )
+        await self.state_manager.start_step(next_step_id, session_id=sid)
 
         # Get instructions
         instructions = self._get_step_instructions(job, next_step_id)
@@ -480,7 +488,7 @@ async def finished_step(self, input_data: FinishedStepInput) -> FinishedStepResp
             instructions = instructions + concurrent_info
 
         # Reload session to get current state after advance
-        session = self.state_manager.require_active_session()
+        session = self.state_manager._resolve_session(sid)
 
         return FinishedStepResponse(
             status=StepStatus.NEXT_STEP,
@@ -515,7 +523,7 @@ async def abort_workflow(self, input_data: AbortWorkflowInput) -> AbortWorkflowR
             StateError: If no active session
         """
         aborted_session, new_active = await self.state_manager.abort_workflow(
-            input_data.explanation
+            input_data.explanation, session_id=input_data.session_id
         )
 
         return AbortWorkflowResponse(
diff --git a/src/deepwork/schemas/job.schema.json b/src/deepwork/schemas/job.schema.json
index e0098056..4f2227cc 100644
--- a/src/deepwork/schemas/job.schema.json
+++ b/src/deepwork/schemas/job.schema.json
@@ -273,7 +273,8 @@
       "type": "object",
       "required": [
         "type",
-        "description"
+        "description",
+        "required"
       ],
       "additionalProperties": false,
       "description": "Output specification with type information indicating single file or multiple files",
@@ -290,6 +291,10 @@
           "type": "string",
           "minLength": 1,
           "description": "Description of what this output contains"
+        },
+        "required": {
+          "type": "boolean",
+          "description": "Whether this output must be provided when calling finished_step. If false, the output is optional and can be omitted."
         }
       }
     },
diff --git a/src/deepwork/standard_jobs/deepwork_jobs/AGENTS.md b/src/deepwork/standard_jobs/deepwork_jobs/AGENTS.md
index 6d97d0e5..576c1185 100644
--- a/src/deepwork/standard_jobs/deepwork_jobs/AGENTS.md
+++ b/src/deepwork/standard_jobs/deepwork_jobs/AGENTS.md
@@ -18,9 +18,9 @@ This is the source of truth for the `deepwork_jobs` standard job.
 
 2. **Working copy**: `.deepwork/jobs/deepwork_jobs/`
    - Must be updated after changes to source
-   - Used by `deepwork sync` to generate commands
+   - Used by the MCP server at runtime
 
-After making changes to the source, copy files to the working copy:
+After making changes to the source, run `deepwork install` or manually copy:
 ```bash
 cp src/deepwork/standard_jobs/deepwork_jobs/job.yml .deepwork/jobs/deepwork_jobs/
 cp src/deepwork/standard_jobs/deepwork_jobs/steps/*.md .deepwork/jobs/deepwork_jobs/steps/
@@ -37,6 +37,8 @@ deepwork_jobs/
 ├── steps/
 │   ├── define.md          # Define step instructions
 │   ├── implement.md       # Implement step instructions
+│   ├── test.md            # Test step instructions
+│   ├── iterate.md         # Iterate step instructions
 │   ├── learn.md           # Learn step instructions
 │   └── supplemental_file_references.md  # Reference documentation
 └── templates/
@@ -47,6 +49,33 @@ deepwork_jobs/
     └── step_instruction.md.example   # Complete step example
 ```
 
+## Quality Review Learnings
+
+These learnings come from running the `new_job` workflow to create the `github_outreach` job (2026-02-06).
+
+### Review Criteria Must Be Pragmatic
+
+The implement step's review criteria caused 6+ review iterations during the github_outreach job creation. Key problems and fixes:
+
+1. **"Ask Structured Questions" was applied to ALL steps** — even pure analysis/generation steps with no user input. Fixed in v1.4.0: criterion now auto-passes for steps that only have file inputs from prior steps (no name/description user inputs).
+
+2. **"Output Examples" was too strict** — demanded concrete filled-in examples in every step file, even when a template structure with `[bracket placeholders]` was sufficient. Fixed in v1.4.0: renamed to "Output Format Examples" and accepts templates. Concrete examples are encouraged but not required.
+
+3. **Contradictory review results** — In one case, all 6 individual criteria passed but the overall review still returned `needs_work`. This appears to be a reviewer model issue where the summary contradicts the per-criterion assessments. Added `additional_review_guidance` to clarify when criteria should auto-pass.
+
+### Quality Review Timeouts on Large Outputs
+
+Steps producing many files (25 analysis files) or very long files (700+ line playbook) exceeded the 120-second MCP timeout during quality review. The `quality_review_override_reason` parameter was needed to bypass these.
+
+Mitigation strategies documented in `define.md`:
+- Use `run_each: step` instead of `run_each: <files_output>` for steps with many files
+- Keep review criteria efficient to evaluate
+- Note expected output volume in step descriptions
+
+### Dependency Validation Gaps
+
+The github_outreach `final_report` step had `analyze_repos` as a file input but was missing it from the `dependencies` list. This was caught at workflow start time but could have been caught earlier during the `implement` step. The define step's validation rules already mention this (`from_step must be in dependencies`) but it was missed during creation.
+
 ## Version Management
 
 - Version is tracked in `job.yml`
@@ -56,5 +85,5 @@ deepwork_jobs/
 
 ## Last Updated
 
-- Date: 2026-01-15
-- From conversation about: Adding make_new_job.sh script and templates directory
+- Date: 2026-02-06
+- From conversation about: Learn workflow analyzing severe quality review issues in the new_job execution
diff --git a/src/deepwork/standard_jobs/deepwork_jobs/job.yml b/src/deepwork/standard_jobs/deepwork_jobs/job.yml
index 900bf1e7..57baf9f1 100644
--- a/src/deepwork/standard_jobs/deepwork_jobs/job.yml
+++ b/src/deepwork/standard_jobs/deepwork_jobs/job.yml
@@ -1,6 +1,6 @@
 # yaml-language-server: $schema=.deepwork/schemas/job.schema.json
 name: deepwork_jobs
-version: "1.3.0"
+version: "1.4.0"
 summary: "Creates and manages multi-step AI workflows. Use when defining, implementing, testing, or improving DeepWork jobs."
 description: |
   Core commands for managing DeepWork jobs. These commands help you define new multi-step
@@ -38,6 +38,8 @@ workflows:
       - learn
 
 changelog:
+  - version: "1.4.0"
+    changes: "Fixed implement step review criteria that caused severe friction: 'Ask Structured Questions' now auto-passes for steps without user inputs; 'Output Examples' renamed to 'Output Format Examples' and accepts template placeholders; added detailed review guidance to prevent misapplication of criteria"
   - version: "1.3.0"
     changes: "Migrated quality_criteria to reviews system with run_each targeting and map-format criteria"
   - version: "1.2.1"
@@ -75,6 +77,7 @@ steps:
       job.yml:
         type: file
         description: "Definition of the job and its workflows"
+        required: true
     dependencies: []
     reviews:
       - run_each: job.yml
@@ -96,6 +99,7 @@ steps:
       step_instruction_files:
         type: files
         description: "Instruction Markdown files for each step"
+        required: true
     dependencies:
       - define
     reviews:
@@ -104,10 +108,10 @@ steps:
         quality_criteria:
           "Complete Instructions": "Is the instruction file complete (no stubs or placeholders)?"
           "Specific & Actionable": "Are instructions tailored to the step's purpose, not generic?"
-          "Output Examples": "Does the instruction file show what good output looks like?"
+          "Output Examples": "Does the instruction file show what good output looks like? This can be either template examples, or negative examples of what not to do. Only required if the step has ouputs"
           "Quality Criteria": "Does the instruction file define quality criteria for its outputs?"
-          "Ask Structured Questions": "Do instructions that gather user input explicitly use the phrase 'ask structured questions'?"
-          "Prompt Engineering": "Does the instructions file following Anthropics Best Practices for Prompt Engineering?"
+          "Ask Structured Questions": "If this step gathers user input, do instructions explicitly use the phrase 'ask structured questions'? If the step has no user inputs, this criterion passes automatically."
+          "Prompt Engineering": "Does the instruction file follow Anthropic's best practices for prompt engineering?"
 
   - id: test
     name: "Test the New Workflow"
@@ -122,6 +126,7 @@ steps:
       .deepwork/tmp/test_feedback.md:
         type: file
         description: "Feedback from testing the workflow on a real use case"
+        required: true
     dependencies:
       - define
       - implement
@@ -149,12 +154,15 @@ steps:
       job.yml:
         type: file
         description: "Updated job definition with improvements from test run"
+        required: true
       step_instruction_files:
         type: files
         description: "Updated instruction Markdown files for each step"
+        required: true
       scripts:
         type: files
         description: "Updated scripts to run parts of the job more efficiently"
+        required: false
     dependencies:
       - define
       - implement
@@ -172,15 +180,19 @@ steps:
       AGENTS.md:
         type: file
         description: "Bespoke learnings and run-specific context for the working folder"
+        required: true
       job.yml:
         type: file
         description: "Updated job definition with improvements from test run"
+        required: true
       step_instruction_files:
         type: files
         description: "Updated instruction Markdown files for each step"
+        required: true
       scripts:
         type: files
         description: "Updated scripts to run parts of the job more efficiently"
+        required: false
     dependencies: []
     reviews:
       - run_each: step
@@ -203,6 +215,7 @@ steps:
       settings.json:
         type: file
         description: "Cleaned up Claude settings file with legacy permissions removed"
+        required: true
     dependencies: []
     reviews:
       - run_each: step
@@ -228,9 +241,11 @@ steps:
       job_definitions:
         type: files
         description: "Updated job.yml files and step instructions in current DeepWork format"
+        required: true
       step_instruction_files:
         type: files
         description: "Updated step instruction files"
+        required: true
     dependencies:
       - fix_settings
     reviews:
diff --git a/src/deepwork/standard_jobs/deepwork_jobs/steps/define.md b/src/deepwork/standard_jobs/deepwork_jobs/steps/define.md
index 977872f0..0630d8f4 100644
--- a/src/deepwork/standard_jobs/deepwork_jobs/steps/define.md
+++ b/src/deepwork/standard_jobs/deepwork_jobs/steps/define.md
@@ -213,6 +213,13 @@ reviews:
 - Steps with subjective quality criteria that benefit from AI self-review
 - Steps producing multiple files where each file needs individual review
 
+**Quality review timeout considerations:**
+When a step produces many files (`type: files` with 15+ items) or very large files (500+ lines), quality reviews may hit the MCP timeout (120 seconds). For these steps:
+- Keep review criteria focused and efficient to evaluate
+- Consider using `run_each: step` (reviews all outputs together once) instead of `run_each: <output_name>` for `files`-type outputs with many items, since the latter runs a separate review per file
+- The agent can use `quality_review_override_reason` to bypass a timed-out review, but this loses the quality gate benefit
+- If a step is expected to produce many files, note this in the step description so agents can plan accordingly
+
 **For steps with no quality checks needed, use an empty reviews list:**
 ```yaml
 reviews: []
diff --git a/src/deepwork/standard_jobs/deepwork_jobs/steps/implement.md b/src/deepwork/standard_jobs/deepwork_jobs/steps/implement.md
index 10880176..73eeb365 100644
--- a/src/deepwork/standard_jobs/deepwork_jobs/steps/implement.md
+++ b/src/deepwork/standard_jobs/deepwork_jobs/steps/implement.md
@@ -44,11 +44,12 @@ For each step in the job.yml, create a comprehensive instruction file at `.deepw
 
 1. **Use the job description** - The detailed description from job.yml provides crucial context
 2. **Be specific** - Don't write generic instructions; tailor them to the step's purpose
-3. **Provide examples** - Show what good output looks like
+3. **Provide output format examples** - Include a markdown code block in an "Output Format" section showing the expected file structure. A template with `[bracket placeholders]` is acceptable. For complex outputs, also include a concrete filled-in example showing realistic data — this is especially valuable for the first step in a workflow where there's no prior output to reference.
 4. **Explain the "why"** - Help the user understand the step's role in the workflow
 5. **Quality over quantity** - Detailed, actionable instructions are better than vague ones
 6. **Align with reviews** - If the step has `reviews` defined, ensure the quality criteria in the instruction file match the review criteria
-7. **Ask structured questions** - When a step has user inputs, the instructions MUST explicitly tell the agent to "ask structured questions" using the AskUserQuestion tool to gather that information. Never use generic phrasing like "ask the user" - always use "ask structured questions"
+7. **Ask structured questions (when applicable)** - When a step has user-provided inputs (name/description inputs in job.yml), the instructions MUST explicitly tell the agent to "ask structured questions" using the AskUserQuestion tool. Steps that only have file inputs from prior steps do NOT need this phrase — they process data without user interaction.
+8. **Handle edge cases** - If inputs might be missing, ambiguous, or incomplete, tell the agent to ask structured questions to clarify how to proceed rather than guessing
 
 ### Handling Reviews
 
diff --git a/tests/fixtures/jobs/complex_job/job.yml b/tests/fixtures/jobs/complex_job/job.yml
index 2e231c89..9fbc86c9 100644
--- a/tests/fixtures/jobs/complex_job/job.yml
+++ b/tests/fixtures/jobs/complex_job/job.yml
@@ -34,6 +34,7 @@ steps:
       competitors.md:
         type: file
         description: "Vetted list of direct and indirect competitors"
+        required: true
     dependencies: []
     reviews: []
 
@@ -48,9 +49,11 @@ steps:
       primary_research.md:
         type: file
         description: "Analysis of competitors' self-presentation"
+        required: true
       competitor_profiles:
         type: files
         description: "Individual competitor profile documents"
+        required: true
     dependencies:
       - identify_competitors
     reviews: []
@@ -68,6 +71,7 @@ steps:
       secondary_research.md:
         type: file
         description: "Third-party perspectives on competitors"
+        required: true
     dependencies:
       - identify_competitors
       - primary_research
@@ -86,9 +90,11 @@ steps:
       comparison_matrix.md:
         type: file
         description: "Detailed comparison matrix across competitors"
+        required: true
       strengths_weaknesses.md:
         type: file
         description: "Strengths and weaknesses analysis"
+        required: true
     dependencies:
       - primary_research
       - secondary_research
diff --git a/tests/fixtures/jobs/concurrent_steps_job/job.yml b/tests/fixtures/jobs/concurrent_steps_job/job.yml
index f0a35f56..21707dec 100644
--- a/tests/fixtures/jobs/concurrent_steps_job/job.yml
+++ b/tests/fixtures/jobs/concurrent_steps_job/job.yml
@@ -24,6 +24,7 @@ steps:
       setup_complete.md:
         type: file
         description: "Setup confirmation and configuration"
+        required: true
     reviews: []
 
   - id: research_web
@@ -37,6 +38,7 @@ steps:
       web_research.md:
         type: file
         description: "Research findings from web sources"
+        required: true
     dependencies:
       - setup
     reviews: []
@@ -52,6 +54,7 @@ steps:
       docs_research.md:
         type: file
         description: "Research findings from internal documents"
+        required: true
     dependencies:
       - setup
     reviews: []
@@ -67,6 +70,7 @@ steps:
       interviews_research.md:
         type: file
         description: "Research findings from stakeholder interviews"
+        required: true
     dependencies:
       - setup
     reviews: []
@@ -86,6 +90,7 @@ steps:
       compiled_results.md:
         type: file
         description: "Unified report from all research sources"
+        required: true
     dependencies:
       - research_web
       - research_docs
@@ -103,6 +108,7 @@ steps:
       final_report.md:
         type: file
         description: "Final reviewed and approved analysis report"
+        required: true
     dependencies:
       - compile_results
     reviews: []
diff --git a/tests/fixtures/jobs/exposed_step_job/job.yml b/tests/fixtures/jobs/exposed_step_job/job.yml
index f5b9545f..3e59a980 100644
--- a/tests/fixtures/jobs/exposed_step_job/job.yml
+++ b/tests/fixtures/jobs/exposed_step_job/job.yml
@@ -20,6 +20,7 @@ steps:
       hidden_output.md:
         type: file
         description: "Output from the hidden step"
+        required: true
     dependencies: []
     reviews: []
 
@@ -32,5 +33,6 @@ steps:
       exposed_output.md:
         type: file
         description: "Output from the exposed step"
+        required: true
     dependencies: []
     reviews: []
diff --git a/tests/fixtures/jobs/fruits/job.yml b/tests/fixtures/jobs/fruits/job.yml
index 4eb1a75a..012fd9df 100644
--- a/tests/fixtures/jobs/fruits/job.yml
+++ b/tests/fixtures/jobs/fruits/job.yml
@@ -35,6 +35,7 @@ steps:
       identified_fruits.md:
         type: file
         description: "List of identified fruits from the input items"
+        required: true
     dependencies: []
     reviews: []
 
@@ -49,6 +50,7 @@ steps:
       classified_fruits.md:
         type: file
         description: "Fruits organized into categories"
+        required: true
     dependencies:
       - identify
     reviews: []
diff --git a/tests/fixtures/jobs/job_with_doc_spec/job.yml b/tests/fixtures/jobs/job_with_doc_spec/job.yml
index c365e4bb..5eb56f1e 100644
--- a/tests/fixtures/jobs/job_with_doc_spec/job.yml
+++ b/tests/fixtures/jobs/job_with_doc_spec/job.yml
@@ -17,5 +17,6 @@ steps:
       report.md:
         type: file
         description: "Generated report document"
+        required: true
     dependencies: []
     reviews: []
diff --git a/tests/fixtures/jobs/simple_job/job.yml b/tests/fixtures/jobs/simple_job/job.yml
index a788d9fc..ca5a9c27 100644
--- a/tests/fixtures/jobs/simple_job/job.yml
+++ b/tests/fixtures/jobs/simple_job/job.yml
@@ -24,5 +24,6 @@ steps:
       output.md:
         type: file
         description: "The output file produced by this step"
+        required: true
     dependencies: []
     reviews: []
diff --git a/tests/unit/mcp/test_async_interface.py b/tests/unit/mcp/test_async_interface.py
index 766410d3..2931a45f 100644
--- a/tests/unit/mcp/test_async_interface.py
+++ b/tests/unit/mcp/test_async_interface.py
@@ -140,3 +140,61 @@ async def record_attempt() -> int:
         final_session = manager.get_active_session()
         assert final_session is not None
         assert final_session.step_progress["step1"].quality_attempts == 10
+
+    async def test_concurrent_workflows_with_session_id_routing(
+        self, tmp_path: Path
+    ) -> None:
+        """Test that two concurrent sessions can be routed correctly via session_id.
+
+        Two sessions are created on the stack. Concurrent finished_step-like
+        operations (complete_step) target different sessions via session_id
+        and don't interfere with each other.
+        """
+        deepwork_dir = tmp_path / ".deepwork"
+        deepwork_dir.mkdir()
+        (deepwork_dir / "tmp").mkdir()
+
+        manager = StateManager(tmp_path)
+
+        # Create two sessions on the stack
+        session1 = await manager.create_session(
+            job_name="job1",
+            workflow_name="wf1",
+            goal="Goal 1",
+            first_step_id="step_a",
+        )
+        session2 = await manager.create_session(
+            job_name="job2",
+            workflow_name="wf2",
+            goal="Goal 2",
+            first_step_id="step_x",
+        )
+
+        # Concurrent complete_step calls targeting different sessions
+        async def complete_session1() -> None:
+            await manager.complete_step(
+                step_id="step_a",
+                outputs={"out1": "file1.md"},
+                session_id=session1.session_id,
+            )
+
+        async def complete_session2() -> None:
+            await manager.complete_step(
+                step_id="step_x",
+                outputs={"out2": "file2.md"},
+                session_id=session2.session_id,
+            )
+
+        # Run concurrently
+        await asyncio.gather(complete_session1(), complete_session2())
+
+        # Verify each session got the right updates
+        assert "step_a" in session1.step_progress
+        assert session1.step_progress["step_a"].outputs == {"out1": "file1.md"}
+
+        assert "step_x" in session2.step_progress
+        assert session2.step_progress["step_x"].outputs == {"out2": "file2.md"}
+
+        # Cross-check: session1 should NOT have step_x, session2 should NOT have step_a
+        assert "step_x" not in session1.step_progress
+        assert "step_a" not in session2.step_progress
diff --git a/tests/unit/mcp/test_quality_gate.py b/tests/unit/mcp/test_quality_gate.py
index c4495f14..4fa7a1c3 100644
--- a/tests/unit/mcp/test_quality_gate.py
+++ b/tests/unit/mcp/test_quality_gate.py
@@ -520,6 +520,173 @@ async def test_per_file_review_passes_guidance_to_each(
             assert "Check against the spec." in system_prompt
 
 
+class TestBuildPayloadLargeFileSet:
+    """Tests for _build_payload behavior when file count exceeds MAX_INLINE_FILES."""
+
+    async def test_payload_lists_paths_when_over_threshold(
+        self, quality_gate: QualityGate, project_root: Path
+    ) -> None:
+        """Test that >5 files produces path listing instead of inline content."""
+        for i in range(6):
+            (project_root / f"file{i}.md").write_text(f"Content {i}")
+
+        payload = await quality_gate._build_payload(
+            outputs={"reports": [f"file{i}.md" for i in range(6)]},
+            project_root=project_root,
+        )
+
+        assert "6 files" in payload
+        assert "too many to include inline" in payload
+        for i in range(6):
+            assert f"file{i}.md" in payload
+        # Content should NOT be embedded
+        assert "Content 0" not in payload
+        assert "Content 5" not in payload
+
+    async def test_payload_inlines_content_at_threshold(
+        self, quality_gate: QualityGate, project_root: Path
+    ) -> None:
+        """Test that exactly 5 files still gets inline content."""
+        for i in range(5):
+            (project_root / f"file{i}.md").write_text(f"Content {i}")
+
+        payload = await quality_gate._build_payload(
+            outputs={"reports": [f"file{i}.md" for i in range(5)]},
+            project_root=project_root,
+        )
+
+        # Should have inline content, not path listing
+        assert "too many to include inline" not in payload
+        for i in range(5):
+            assert f"Content {i}" in payload
+
+    async def test_path_listing_includes_output_names(
+        self, quality_gate: QualityGate, project_root: Path
+    ) -> None:
+        """Test that path listing shows which output each file belongs to."""
+        for i in range(4):
+            (project_root / f"doc{i}.md").write_text("x")
+        for i in range(3):
+            (project_root / f"data{i}.csv").write_text("x")
+
+        payload = await quality_gate._build_payload(
+            outputs={
+                "docs": [f"doc{i}.md" for i in range(4)],
+                "data": [f"data{i}.csv" for i in range(3)],
+            },
+            project_root=project_root,
+        )
+
+        assert "7 files" in payload
+        assert "(output: docs)" in payload
+        assert "(output: data)" in payload
+
+    async def test_path_listing_counts_across_outputs(
+        self, quality_gate: QualityGate, project_root: Path
+    ) -> None:
+        """Test that file count is summed across all outputs."""
+        # 3 files in one output + 3 in another = 6 total > 5
+        for i in range(3):
+            (project_root / f"a{i}.md").write_text("x")
+            (project_root / f"b{i}.md").write_text("x")
+
+        payload = await quality_gate._build_payload(
+            outputs={
+                "alpha": [f"a{i}.md" for i in range(3)],
+                "beta": [f"b{i}.md" for i in range(3)],
+            },
+            project_root=project_root,
+        )
+
+        assert "6 files" in payload
+        assert "too many to include inline" in payload
+
+
+class TestBuildPathListing:
+    """Tests for _build_path_listing static method."""
+
+    def test_single_file_output(self) -> None:
+        """Test path listing with single file outputs."""
+        lines = QualityGate._build_path_listing({"report": "report.md"})
+        assert lines == ["- report.md  (output: report)"]
+
+    def test_multi_file_output(self) -> None:
+        """Test path listing with list outputs."""
+        lines = QualityGate._build_path_listing({"reports": ["a.md", "b.md"]})
+        assert lines == [
+            "- a.md  (output: reports)",
+            "- b.md  (output: reports)",
+        ]
+
+    def test_mixed_outputs(self) -> None:
+        """Test path listing with both single and list outputs."""
+        lines = QualityGate._build_path_listing({
+            "summary": "summary.md",
+            "details": ["d1.md", "d2.md"],
+        })
+        assert len(lines) == 3
+        assert "- summary.md  (output: summary)" in lines
+        assert "- d1.md  (output: details)" in lines
+        assert "- d2.md  (output: details)" in lines
+
+
+class TestComputeTimeout:
+    """Tests for QualityGate.compute_timeout."""
+
+    def test_base_timeout_for_few_files(self) -> None:
+        """Test that <=5 files gives base 120s timeout."""
+        assert QualityGate.compute_timeout(0) == 120
+        assert QualityGate.compute_timeout(1) == 120
+        assert QualityGate.compute_timeout(5) == 120
+
+    def test_timeout_increases_after_five(self) -> None:
+        """Test that each file after 5 adds 30 seconds."""
+        assert QualityGate.compute_timeout(6) == 150
+        assert QualityGate.compute_timeout(10) == 270  # 120 + 5*30
+        assert QualityGate.compute_timeout(20) == 570  # 120 + 15*30
+
+
+class TestDynamicTimeout:
+    """Tests that evaluate passes dynamic timeout to CLI."""
+
+    async def test_timeout_passed_to_cli(
+        self, mock_cli: ClaudeCLI, project_root: Path
+    ) -> None:
+        """Test that evaluate passes computed timeout to CLI.run."""
+        gate = QualityGate(cli=mock_cli)
+
+        (project_root / "output.md").write_text("content")
+
+        await gate.evaluate(
+            quality_criteria={"Valid": "Is it valid?"},
+            outputs={"report": "output.md"},
+            project_root=project_root,
+        )
+
+        call_kwargs = mock_cli.run.call_args.kwargs
+        # 1 file -> timeout = 120
+        assert call_kwargs["timeout"] == 120
+
+    async def test_timeout_scales_with_file_count(
+        self, mock_cli: ClaudeCLI, project_root: Path
+    ) -> None:
+        """Test that timeout increases with many files."""
+        gate = QualityGate(cli=mock_cli)
+
+        for i in range(10):
+            (project_root / f"f{i}.md").write_text(f"content {i}")
+
+        await gate.evaluate(
+            quality_criteria={"Valid": "Is it valid?"},
+            outputs={"reports": [f"f{i}.md" for i in range(10)]},
+            project_root=project_root,
+        )
+
+        call_kwargs = mock_cli.run.call_args.kwargs
+        # 10 files -> 120 + 5*30 = 270
+        assert call_kwargs["timeout"] == 270
+
+
 class TestMockQualityGate:
     """Tests for MockQualityGate class."""
 
diff --git a/tests/unit/mcp/test_schemas.py b/tests/unit/mcp/test_schemas.py
index 9bcf2c64..2e2949a5 100644
--- a/tests/unit/mcp/test_schemas.py
+++ b/tests/unit/mcp/test_schemas.py
@@ -296,6 +296,7 @@ def test_basic_step_info(self) -> None:
                 name="output.md",
                 type="file",
                 description="Test output",
+                required=True,
                 syntax_for_finished_step_tool="filepath",
             )
         ]
@@ -335,6 +336,7 @@ def test_default_reviews(self) -> None:
                     name="output.md",
                     type="file",
                     description="Test output",
+                    required=True,
                     syntax_for_finished_step_tool="filepath",
                 )
             ],
@@ -359,6 +361,7 @@ def test_basic_response(self) -> None:
                         name="output.md",
                         type="file",
                         description="Test output",
+                        required=True,
                         syntax_for_finished_step_tool="filepath",
                     )
                 ],
@@ -412,6 +415,7 @@ def test_next_step_status(self) -> None:
                         name="output2.md",
                         type="file",
                         description="Test output",
+                        required=True,
                         syntax_for_finished_step_tool="filepath",
                     )
                 ],
diff --git a/tests/unit/mcp/test_state.py b/tests/unit/mcp/test_state.py
index 643ae5f7..389ad78b 100644
--- a/tests/unit/mcp/test_state.py
+++ b/tests/unit/mcp/test_state.py
@@ -438,3 +438,163 @@ async def test_abort_workflow_no_parent(self, state_manager: StateManager) -> No
         assert resumed is None
         assert state_manager.get_stack_depth() == 0
         assert state_manager.get_active_session() is None
+
+
+class TestSessionIdRouting:
+    """Tests for session_id-based routing in StateManager."""
+
+    @pytest.fixture
+    def project_root(self, tmp_path: Path) -> Path:
+        """Create a temporary project root with .deepwork directory."""
+        deepwork_dir = tmp_path / ".deepwork"
+        deepwork_dir.mkdir()
+        (deepwork_dir / "tmp").mkdir()
+        return tmp_path
+
+    @pytest.fixture
+    def state_manager(self, project_root: Path) -> StateManager:
+        """Create a StateManager instance."""
+        return StateManager(project_root)
+
+    def test_resolve_session_by_id(self, state_manager: StateManager) -> None:
+        """Test _resolve_session finds the correct session in a multi-session stack."""
+        import asyncio
+
+        async def setup() -> None:
+            await state_manager.create_session(
+                job_name="job1", workflow_name="wf1", goal="G1", first_step_id="s1"
+            )
+            await state_manager.create_session(
+                job_name="job2", workflow_name="wf2", goal="G2", first_step_id="s2"
+            )
+            await state_manager.create_session(
+                job_name="job3", workflow_name="wf3", goal="G3", first_step_id="s3"
+            )
+
+        asyncio.get_event_loop().run_until_complete(setup())
+
+        # Stack has 3 sessions; resolve the middle one by ID
+        middle_session = state_manager._session_stack[1]
+        resolved = state_manager._resolve_session(middle_session.session_id)
+        assert resolved.session_id == middle_session.session_id
+        assert resolved.job_name == "job2"
+
+    def test_resolve_session_invalid_id(self, state_manager: StateManager) -> None:
+        """Test _resolve_session raises StateError for unknown session ID."""
+        import asyncio
+
+        asyncio.get_event_loop().run_until_complete(
+            state_manager.create_session(
+                job_name="job1", workflow_name="wf1", goal="G1", first_step_id="s1"
+            )
+        )
+
+        with pytest.raises(StateError, match="Session 'nonexistent' not found"):
+            state_manager._resolve_session("nonexistent")
+
+    def test_resolve_session_none_falls_back_to_active(
+        self, state_manager: StateManager
+    ) -> None:
+        """Test _resolve_session with None falls back to top-of-stack."""
+        import asyncio
+
+        asyncio.get_event_loop().run_until_complete(
+            state_manager.create_session(
+                job_name="job1", workflow_name="wf1", goal="G1", first_step_id="s1"
+            )
+        )
+        asyncio.get_event_loop().run_until_complete(
+            state_manager.create_session(
+                job_name="job2", workflow_name="wf2", goal="G2", first_step_id="s2"
+            )
+        )
+
+        resolved = state_manager._resolve_session(None)
+        assert resolved.job_name == "job2"  # top-of-stack
+
+    async def test_complete_workflow_by_session_id(
+        self, state_manager: StateManager
+    ) -> None:
+        """Test complete_workflow removes a specific session from middle of stack."""
+        session1 = await state_manager.create_session(
+            job_name="job1", workflow_name="wf1", goal="G1", first_step_id="s1"
+        )
+        session2 = await state_manager.create_session(
+            job_name="job2", workflow_name="wf2", goal="G2", first_step_id="s2"
+        )
+        session3 = await state_manager.create_session(
+            job_name="job3", workflow_name="wf3", goal="G3", first_step_id="s3"
+        )
+
+        assert state_manager.get_stack_depth() == 3
+
+        # Complete the middle session by ID
+        new_active = await state_manager.complete_workflow(session_id=session2.session_id)
+
+        assert state_manager.get_stack_depth() == 2
+        # Stack should have session1 and session3; top is session3
+        assert new_active is not None
+        assert new_active.session_id == session3.session_id
+        assert state_manager.get_active_session() == session3
+        remaining_ids = [s.session_id for s in state_manager._session_stack]
+        assert session1.session_id in remaining_ids
+        assert session2.session_id not in remaining_ids
+        assert session3.session_id in remaining_ids
+
+    async def test_abort_workflow_by_session_id(
+        self, state_manager: StateManager
+    ) -> None:
+        """Test abort_workflow removes a specific session from middle of stack."""
+        session1 = await state_manager.create_session(
+            job_name="job1", workflow_name="wf1", goal="G1", first_step_id="s1"
+        )
+        session2 = await state_manager.create_session(
+            job_name="job2", workflow_name="wf2", goal="G2", first_step_id="s2"
+        )
+        session3 = await state_manager.create_session(
+            job_name="job3", workflow_name="wf3", goal="G3", first_step_id="s3"
+        )
+
+        # Abort the middle session
+        aborted, new_active = await state_manager.abort_workflow(
+            "Testing mid-stack abort", session_id=session2.session_id
+        )
+
+        assert aborted.session_id == session2.session_id
+        assert aborted.status == "aborted"
+        assert state_manager.get_stack_depth() == 2
+        # Top of stack should still be session3
+        assert new_active is not None
+        assert new_active.session_id == session3.session_id
+        remaining_ids = [s.session_id for s in state_manager._session_stack]
+        assert session1.session_id in remaining_ids
+        assert session2.session_id not in remaining_ids
+
+    async def test_complete_step_with_session_id(
+        self, state_manager: StateManager
+    ) -> None:
+        """Test complete_step operates on a non-top session when session_id is given."""
+        session1 = await state_manager.create_session(
+            job_name="job1", workflow_name="wf1", goal="G1", first_step_id="s1"
+        )
+        await state_manager.create_session(
+            job_name="job2", workflow_name="wf2", goal="G2", first_step_id="s2"
+        )
+
+        # Complete step on session1 (not on top) using session_id
+        await state_manager.complete_step(
+            step_id="s1",
+            outputs={"report": "report.md"},
+            notes="Done",
+            session_id=session1.session_id,
+        )
+
+        # Verify session1 was updated
+        progress = session1.step_progress["s1"]
+        assert progress.completed_at is not None
+        assert progress.outputs == {"report": "report.md"}
+
+        # Verify session2 (top) was not affected
+        top = state_manager.get_active_session()
+        assert top is not None
+        assert "s1" not in top.step_progress
diff --git a/tests/unit/mcp/test_tools.py b/tests/unit/mcp/test_tools.py
index f292f162..06525bd8 100644
--- a/tests/unit/mcp/test_tools.py
+++ b/tests/unit/mcp/test_tools.py
@@ -5,7 +5,12 @@
 import pytest
 
 from deepwork.mcp.quality_gate import MockQualityGate
-from deepwork.mcp.schemas import FinishedStepInput, StartWorkflowInput, StepStatus
+from deepwork.mcp.schemas import (
+    AbortWorkflowInput,
+    FinishedStepInput,
+    StartWorkflowInput,
+    StepStatus,
+)
 from deepwork.mcp.state import StateError, StateManager
 from deepwork.mcp.tools import ToolError, WorkflowTools
 
@@ -41,6 +46,7 @@ def project_root(tmp_path: Path) -> Path:
       output1.md:
         type: file
         description: First step output
+        required: true
     reviews:
       - run_each: step
         quality_criteria:
@@ -53,6 +59,7 @@ def project_root(tmp_path: Path) -> Path:
       output2.md:
         type: file
         description: Second step output
+        required: true
     dependencies:
       - step1
     reviews: []
@@ -209,6 +216,7 @@ async def test_start_workflow_invalid_workflow_multiple(
       output_a.md:
         type: file
         description: Step A output
+        required: true
     reviews: []
   - id: step_b
     name: Step B
@@ -218,6 +226,7 @@ async def test_start_workflow_invalid_workflow_multiple(
       output_b.md:
         type: file
         description: Step B output
+        required: true
     reviews: []
 
 workflows:
@@ -466,6 +475,255 @@ async def test_finished_step_validates_missing_output_keys(
         with pytest.raises(ToolError, match="Missing required outputs.*output1.md"):
             await tools.finished_step(FinishedStepInput(outputs={}))
 
+    async def test_finished_step_allows_omitting_optional_outputs(
+        self, project_root: Path, state_manager: StateManager
+    ) -> None:
+        """Test finished_step allows omitting outputs with required: false."""
+        job_dir = project_root / ".deepwork" / "jobs" / "optional_job"
+        job_dir.mkdir(parents=True)
+        (job_dir / "job.yml").write_text(
+            """
+name: optional_job
+version: "1.0.0"
+summary: Job with optional output
+description: Test job
+
+steps:
+  - id: produce
+    name: Produce
+    description: Produces outputs
+    instructions_file: steps/produce.md
+    outputs:
+      main_report.md:
+        type: file
+        description: The main report
+        required: true
+      supplementary.md:
+        type: file
+        description: Optional supplementary material
+        required: false
+      extra_files:
+        type: files
+        description: Optional extra files
+        required: false
+    reviews: []
+
+workflows:
+  - name: main
+    summary: Main workflow
+    steps:
+      - produce
+"""
+        )
+        steps_dir = job_dir / "steps"
+        steps_dir.mkdir()
+        (steps_dir / "produce.md").write_text("# Produce\n\nProduce outputs.")
+
+        tools = WorkflowTools(
+            project_root=project_root,
+            state_manager=state_manager,
+        )
+
+        await tools.start_workflow(
+            StartWorkflowInput(
+                goal="Produce outputs",
+                job_name="optional_job",
+                workflow_name="main",
+            )
+        )
+
+        # Only provide the required output, omit optional ones
+        (project_root / "main_report.md").write_text("Main report content")
+        response = await tools.finished_step(
+            FinishedStepInput(outputs={"main_report.md": "main_report.md"})
+        )
+
+        assert response.status == StepStatus.WORKFLOW_COMPLETE
+
+    async def test_finished_step_rejects_missing_required_but_not_optional(
+        self, project_root: Path, state_manager: StateManager
+    ) -> None:
+        """Test finished_step rejects missing required outputs even when optional ones exist."""
+        job_dir = project_root / ".deepwork" / "jobs" / "mixed_job"
+        job_dir.mkdir(parents=True)
+        (job_dir / "job.yml").write_text(
+            """
+name: mixed_job
+version: "1.0.0"
+summary: Job with mixed required/optional outputs
+description: Test job
+
+steps:
+  - id: produce
+    name: Produce
+    description: Produces outputs
+    instructions_file: steps/produce.md
+    outputs:
+      required_output.md:
+        type: file
+        description: Must be provided
+        required: true
+      optional_output.md:
+        type: file
+        description: Can be skipped
+        required: false
+    reviews: []
+
+workflows:
+  - name: main
+    summary: Main workflow
+    steps:
+      - produce
+"""
+        )
+        steps_dir = job_dir / "steps"
+        steps_dir.mkdir()
+        (steps_dir / "produce.md").write_text("# Produce\n\nProduce outputs.")
+
+        tools = WorkflowTools(
+            project_root=project_root,
+            state_manager=state_manager,
+        )
+
+        await tools.start_workflow(
+            StartWorkflowInput(
+                goal="Produce outputs",
+                job_name="mixed_job",
+                workflow_name="main",
+            )
+        )
+
+        # Provide only the optional output, not the required one
+        (project_root / "optional_output.md").write_text("Optional content")
+        with pytest.raises(ToolError, match="Missing required outputs.*required_output.md"):
+            await tools.finished_step(
+                FinishedStepInput(outputs={"optional_output.md": "optional_output.md"})
+            )
+
+    async def test_finished_step_accepts_optional_outputs_when_provided(
+        self, project_root: Path, state_manager: StateManager
+    ) -> None:
+        """Test finished_step validates optional outputs when they are provided."""
+        job_dir = project_root / ".deepwork" / "jobs" / "optional_provided_job"
+        job_dir.mkdir(parents=True)
+        (job_dir / "job.yml").write_text(
+            """
+name: optional_provided_job
+version: "1.0.0"
+summary: Job with optional output that gets provided
+description: Test job
+
+steps:
+  - id: produce
+    name: Produce
+    description: Produces outputs
+    instructions_file: steps/produce.md
+    outputs:
+      main.md:
+        type: file
+        description: Required output
+        required: true
+      bonus.md:
+        type: file
+        description: Optional output
+        required: false
+    reviews: []
+
+workflows:
+  - name: main
+    summary: Main workflow
+    steps:
+      - produce
+"""
+        )
+        steps_dir = job_dir / "steps"
+        steps_dir.mkdir()
+        (steps_dir / "produce.md").write_text("# Produce\n\nProduce outputs.")
+
+        tools = WorkflowTools(
+            project_root=project_root,
+            state_manager=state_manager,
+        )
+
+        await tools.start_workflow(
+            StartWorkflowInput(
+                goal="Produce outputs",
+                job_name="optional_provided_job",
+                workflow_name="main",
+            )
+        )
+
+        # Provide both required and optional
+        (project_root / "main.md").write_text("Main content")
+        (project_root / "bonus.md").write_text("Bonus content")
+        response = await tools.finished_step(
+            FinishedStepInput(outputs={"main.md": "main.md", "bonus.md": "bonus.md"})
+        )
+
+        assert response.status == StepStatus.WORKFLOW_COMPLETE
+
+    async def test_expected_outputs_include_required_field(
+        self, project_root: Path, state_manager: StateManager
+    ) -> None:
+        """Test that step_expected_outputs includes the required field."""
+        job_dir = project_root / ".deepwork" / "jobs" / "req_field_job"
+        job_dir.mkdir(parents=True)
+        (job_dir / "job.yml").write_text(
+            """
+name: req_field_job
+version: "1.0.0"
+summary: Job to test required field in expected outputs
+description: Test job
+
+steps:
+  - id: produce
+    name: Produce
+    description: Produces outputs
+    instructions_file: steps/produce.md
+    outputs:
+      required_out.md:
+        type: file
+        description: Required output
+        required: true
+      optional_out.md:
+        type: file
+        description: Optional output
+        required: false
+    reviews: []
+
+workflows:
+  - name: main
+    summary: Main workflow
+    steps:
+      - produce
+"""
+        )
+        steps_dir = job_dir / "steps"
+        steps_dir.mkdir()
+        (steps_dir / "produce.md").write_text("# Produce\n\nProduce outputs.")
+
+        tools = WorkflowTools(
+            project_root=project_root,
+            state_manager=state_manager,
+        )
+
+        response = await tools.start_workflow(
+            StartWorkflowInput(
+                goal="Produce outputs",
+                job_name="req_field_job",
+                workflow_name="main",
+            )
+        )
+
+        outputs = response.begin_step.step_expected_outputs
+        assert len(outputs) == 2
+
+        required_out = next(o for o in outputs if o.name == "required_out.md")
+        optional_out = next(o for o in outputs if o.name == "optional_out.md")
+
+        assert required_out.required is True
+        assert optional_out.required is False
+
     async def test_finished_step_validates_file_type_must_be_string(
         self, tools: WorkflowTools, project_root: Path
     ) -> None:
@@ -573,6 +831,7 @@ async def test_finished_step_validates_files_type_output(
       reports:
         type: files
         description: Generated report files
+        required: true
     reviews: []
 
 workflows:
@@ -626,6 +885,7 @@ async def test_finished_step_validates_files_type_existence(
       reports:
         type: files
         description: Generated report files
+        required: true
     reviews: []
 
 workflows:
@@ -683,6 +943,7 @@ async def test_finished_step_files_type_success(
       reports:
         type: files
         description: Generated report files
+        required: true
     reviews: []
 
 workflows:
@@ -745,6 +1006,7 @@ async def test_quality_reviewer_receives_only_current_step_outputs(
       step1_output.md:
         type: file
         description: Step 1 output
+        required: true
     reviews: []
 
   - id: step2
@@ -758,6 +1020,7 @@ async def test_quality_reviewer_receives_only_current_step_outputs(
       step2_output.md:
         type: file
         description: Step 2 output
+        required: true
     dependencies:
       - step1
     reviews: []
@@ -773,6 +1036,7 @@ async def test_quality_reviewer_receives_only_current_step_outputs(
       step3_output.md:
         type: file
         description: Step 3 output
+        required: true
     dependencies:
       - step2
     reviews:
@@ -864,6 +1128,7 @@ async def test_additional_review_guidance_reaches_reviewer(
       report.md:
         type: file
         description: The report
+        required: true
     reviews:
       - run_each: report.md
         additional_review_guidance: "Read the project README for context on expected format."
@@ -929,6 +1194,7 @@ async def test_review_guidance_in_start_workflow_response(
       analysis.md:
         type: file
         description: Analysis output
+        required: true
     reviews:
       - run_each: step
         additional_review_guidance: "Check the raw data directory for completeness."
@@ -964,3 +1230,175 @@ async def test_review_guidance_in_start_workflow_response(
         assert reviews[0].additional_review_guidance == (
             "Check the raw data directory for completeness."
         )
+
+
+class TestSessionIdRouting:
+    """Tests for session_id routing in WorkflowTools."""
+
+    @pytest.fixture
+    def project_root(self, tmp_path: Path) -> Path:
+        """Create a temporary project with two test jobs."""
+        deepwork_dir = tmp_path / ".deepwork"
+        deepwork_dir.mkdir()
+        (deepwork_dir / "tmp").mkdir()
+        jobs_dir = deepwork_dir / "jobs"
+        jobs_dir.mkdir()
+
+        # Create job_a with two steps
+        job_a_dir = jobs_dir / "job_a"
+        job_a_dir.mkdir()
+        (job_a_dir / "job.yml").write_text(
+            """
+name: job_a
+version: "1.0.0"
+summary: Job A
+description: Test job A
+
+steps:
+  - id: a_step1
+    name: A Step 1
+    description: First step of A
+    instructions_file: steps/a_step1.md
+    outputs:
+      a_out1.md:
+        type: file
+        description: A step 1 output
+        required: true
+    reviews: []
+  - id: a_step2
+    name: A Step 2
+    description: Second step of A
+    instructions_file: steps/a_step2.md
+    outputs:
+      a_out2.md:
+        type: file
+        description: A step 2 output
+        required: true
+    reviews: []
+
+workflows:
+  - name: main
+    summary: Main workflow
+    steps:
+      - a_step1
+      - a_step2
+"""
+        )
+        a_steps = job_a_dir / "steps"
+        a_steps.mkdir()
+        (a_steps / "a_step1.md").write_text("# A Step 1\n\nDo A step 1.")
+        (a_steps / "a_step2.md").write_text("# A Step 2\n\nDo A step 2.")
+
+        # Create job_b with one step
+        job_b_dir = jobs_dir / "job_b"
+        job_b_dir.mkdir()
+        (job_b_dir / "job.yml").write_text(
+            """
+name: job_b
+version: "1.0.0"
+summary: Job B
+description: Test job B
+
+steps:
+  - id: b_step1
+    name: B Step 1
+    description: First step of B
+    instructions_file: steps/b_step1.md
+    outputs:
+      b_out1.md:
+        type: file
+        description: B step 1 output
+        required: true
+    reviews: []
+
+workflows:
+  - name: main
+    summary: Main workflow
+    steps:
+      - b_step1
+"""
+        )
+        b_steps = job_b_dir / "steps"
+        b_steps.mkdir()
+        (b_steps / "b_step1.md").write_text("# B Step 1\n\nDo B step 1.")
+
+        return tmp_path
+
+    @pytest.fixture
+    def state_manager(self, project_root: Path) -> StateManager:
+        return StateManager(project_root)
+
+    @pytest.fixture
+    def tools(self, project_root: Path, state_manager: StateManager) -> WorkflowTools:
+        return WorkflowTools(project_root=project_root, state_manager=state_manager)
+
+    async def test_finished_step_with_session_id_not_on_top(
+        self, tools: WorkflowTools, project_root: Path
+    ) -> None:
+        """Test finished_step targets a non-top session when session_id is provided."""
+        # Start two workflows — session_a is below session_b on the stack
+        resp_a = await tools.start_workflow(
+            StartWorkflowInput(goal="Do A", job_name="job_a", workflow_name="main")
+        )
+        session_a_id = resp_a.begin_step.session_id
+
+        resp_b = await tools.start_workflow(
+            StartWorkflowInput(goal="Do B", job_name="job_b", workflow_name="main")
+        )
+        session_b_id = resp_b.begin_step.session_id
+
+        assert tools.state_manager.get_stack_depth() == 2
+
+        # Create output files for job_a's first step
+        (project_root / "a_out1.md").write_text("A output 1")
+
+        # Finish step on session_a (NOT on top) using session_id
+        response = await tools.finished_step(
+            FinishedStepInput(
+                outputs={"a_out1.md": "a_out1.md"},
+                session_id=session_a_id,
+            )
+        )
+
+        # Should advance to next step in job_a
+        assert response.status == StepStatus.NEXT_STEP
+        assert response.begin_step is not None
+        assert response.begin_step.step_id == "a_step2"
+        assert response.begin_step.session_id == session_a_id
+
+        # Session B should still be on top and untouched
+        top_session = tools.state_manager.get_active_session()
+        assert top_session is not None
+        assert top_session.session_id == session_b_id
+        assert top_session.current_step_id == "b_step1"
+
+    async def test_abort_workflow_with_session_id(
+        self, tools: WorkflowTools, project_root: Path
+    ) -> None:
+        """Test abort_workflow targets a specific session by session_id."""
+        # Start two workflows
+        resp_a = await tools.start_workflow(
+            StartWorkflowInput(goal="Do A", job_name="job_a", workflow_name="main")
+        )
+        session_a_id = resp_a.begin_step.session_id
+
+        resp_b = await tools.start_workflow(
+            StartWorkflowInput(goal="Do B", job_name="job_b", workflow_name="main")
+        )
+        session_b_id = resp_b.begin_step.session_id
+
+        # Abort session_a (not on top) by ID
+        response = await tools.abort_workflow(
+            AbortWorkflowInput(
+                explanation="Aborting A",
+                session_id=session_a_id,
+            )
+        )
+
+        assert response.aborted_workflow == "job_a/main"
+        assert response.explanation == "Aborting A"
+
+        # Stack should only have session_b now
+        assert tools.state_manager.get_stack_depth() == 1
+        assert tools.state_manager.get_active_session() is not None
+        assert tools.state_manager.get_active_session().session_id == session_b_id
diff --git a/tests/unit/test_parser.py b/tests/unit/test_parser.py
index 095e5961..b7e346b4 100644
--- a/tests/unit/test_parser.py
+++ b/tests/unit/test_parser.py
@@ -56,39 +56,58 @@ class TestOutputSpec:
 
     def test_file_output(self) -> None:
         """Test single file output."""
-        output = OutputSpec(name="output.md", type="file", description="An output file")
+        output = OutputSpec(name="output.md", type="file", description="An output file", required=True)
 
         assert output.name == "output.md"
         assert output.type == "file"
         assert output.description == "An output file"
+        assert output.required is True
 
     def test_files_output(self) -> None:
         """Test multiple files output."""
         output = OutputSpec(
-            name="step_instruction_files", type="files", description="Instruction files"
+            name="step_instruction_files", type="files", description="Instruction files", required=True
         )
 
         assert output.name == "step_instruction_files"
         assert output.type == "files"
         assert output.description == "Instruction files"
+        assert output.required is True
+
+    def test_optional_output(self) -> None:
+        """Test optional output with required=False."""
+        output = OutputSpec(name="bonus.md", type="file", description="Optional", required=False)
+
+        assert output.name == "bonus.md"
+        assert output.required is False
 
     def test_from_dict(self) -> None:
         """Test creating output from name and dict."""
-        data = {"type": "file", "description": "An output file"}
+        data = {"type": "file", "description": "An output file", "required": True}
         output = OutputSpec.from_dict("output.md", data)
 
         assert output.name == "output.md"
         assert output.type == "file"
         assert output.description == "An output file"
+        assert output.required is True
 
     def test_from_dict_files_type(self) -> None:
         """Test creating files-type output from dict."""
-        data = {"type": "files", "description": "Multiple output files"}
+        data = {"type": "files", "description": "Multiple output files", "required": True}
         output = OutputSpec.from_dict("reports", data)
 
         assert output.name == "reports"
         assert output.type == "files"
         assert output.description == "Multiple output files"
+        assert output.required is True
+
+    def test_from_dict_optional(self) -> None:
+        """Test creating optional output from dict."""
+        data = {"type": "files", "description": "Optional files", "required": False}
+        output = OutputSpec.from_dict("extras", data)
+
+        assert output.name == "extras"
+        assert output.required is False
 
 
 class TestReview:
@@ -135,7 +154,7 @@ def test_from_dict_minimal(self) -> None:
             "description": "First step",
             "instructions_file": "steps/step1.md",
             "outputs": {
-                "output.md": {"type": "file", "description": "An output file"},
+                "output.md": {"type": "file", "description": "An output file", "required": True},
             },
         }
         step = Step.from_dict(data)
@@ -158,8 +177,8 @@ def test_from_dict_with_multiple_outputs(self) -> None:
             "description": "First step",
             "instructions_file": "steps/step1.md",
             "outputs": {
-                "report.md": {"type": "file", "description": "A report"},
-                "attachments": {"type": "files", "description": "Supporting files"},
+                "report.md": {"type": "file", "description": "A report", "required": True},
+                "attachments": {"type": "files", "description": "Supporting files", "required": True},
             },
         }
         step = Step.from_dict(data)
@@ -186,7 +205,7 @@ def test_from_dict_with_inputs(self) -> None:
                 {"file": "data.md", "from_step": "step0"},
             ],
             "outputs": {
-                "output.md": {"type": "file", "description": "An output file"},
+                "output.md": {"type": "file", "description": "An output file", "required": True},
             },
             "dependencies": ["step0"],
         }
@@ -205,7 +224,7 @@ def test_from_dict_exposed_default_false(self) -> None:
             "description": "First step",
             "instructions_file": "steps/step1.md",
             "outputs": {
-                "output.md": {"type": "file", "description": "An output file"},
+                "output.md": {"type": "file", "description": "An output file", "required": True},
             },
         }
         step = Step.from_dict(data)
@@ -220,7 +239,7 @@ def test_from_dict_exposed_true(self) -> None:
             "description": "First step",
             "instructions_file": "steps/step1.md",
             "outputs": {
-                "output.md": {"type": "file", "description": "An output file"},
+                "output.md": {"type": "file", "description": "An output file", "required": True},
             },
             "exposed": True,
         }
@@ -236,7 +255,7 @@ def test_from_dict_with_reviews(self) -> None:
             "description": "First step",
             "instructions_file": "steps/step1.md",
             "outputs": {
-                "output.md": {"type": "file", "description": "An output file"},
+                "output.md": {"type": "file", "description": "An output file", "required": True},
             },
             "reviews": [
                 {
@@ -264,7 +283,7 @@ def test_from_dict_empty_reviews(self) -> None:
             "description": "First step",
             "instructions_file": "steps/step1.md",
             "outputs": {
-                "output.md": {"type": "file", "description": "An output file"},
+                "output.md": {"type": "file", "description": "An output file", "required": True},
             },
             "reviews": [],
         }
@@ -310,7 +329,7 @@ def test_validate_dependencies_missing_step(self) -> None:
                     instructions_file="steps/step1.md",
                     outputs=[
                         OutputSpec(
-                            name="output.md", type="file", description="Output file"
+                            name="output.md", type="file", description="Output file", required=True
                         )
                     ],
                     dependencies=["nonexistent"],
@@ -337,7 +356,7 @@ def test_validate_dependencies_circular(self) -> None:
                     instructions_file="steps/step1.md",
                     outputs=[
                         OutputSpec(
-                            name="output.md", type="file", description="Output file"
+                            name="output.md", type="file", description="Output file", required=True
                         )
                     ],
                     dependencies=["step2"],
@@ -349,7 +368,7 @@ def test_validate_dependencies_circular(self) -> None:
                     instructions_file="steps/step2.md",
                     outputs=[
                         OutputSpec(
-                            name="output.md", type="file", description="Output file"
+                            name="output.md", type="file", description="Output file", required=True
                         )
                     ],
                     dependencies=["step1"],
@@ -385,7 +404,7 @@ def test_validate_file_inputs_missing_step(self) -> None:
                     inputs=[StepInput(file="data.md", from_step="nonexistent")],
                     outputs=[
                         OutputSpec(
-                            name="output.md", type="file", description="Output file"
+                            name="output.md", type="file", description="Output file", required=True
                         )
                     ],
                     dependencies=["nonexistent"],
@@ -411,7 +430,7 @@ def test_validate_reviews_valid(self) -> None:
                     description="Step",
                     instructions_file="steps/step1.md",
                     outputs=[
-                        OutputSpec(name="report.md", type="file", description="Report")
+                        OutputSpec(name="report.md", type="file", description="Report", required=True)
                     ],
                     reviews=[
                         Review(run_each="step", quality_criteria={"Complete": "Is it?"}),
@@ -439,7 +458,7 @@ def test_validate_reviews_invalid_run_each(self) -> None:
                     description="Step",
                     instructions_file="steps/step1.md",
                     outputs=[
-                        OutputSpec(name="report.md", type="file", description="Report")
+                        OutputSpec(name="report.md", type="file", description="Report", required=True)
                     ],
                     reviews=[
                         Review(
@@ -470,7 +489,7 @@ def test_validate_file_inputs_not_in_dependencies(self) -> None:
                     instructions_file="steps/step1.md",
                     outputs=[
                         OutputSpec(
-                            name="output.md", type="file", description="Output file"
+                            name="output.md", type="file", description="Output file", required=True
                         )
                     ],
                 ),
@@ -482,7 +501,7 @@ def test_validate_file_inputs_not_in_dependencies(self) -> None:
                     inputs=[StepInput(file="data.md", from_step="step1")],
                     outputs=[
                         OutputSpec(
-                            name="output.md", type="file", description="Output file"
+                            name="output.md", type="file", description="Output file", required=True
                         )
                     ],
                     # Missing step1 in dependencies!
diff --git a/tests/unit/test_validation.py b/tests/unit/test_validation.py
index 1a2e8bdc..e1b01c5a 100644
--- a/tests/unit/test_validation.py
+++ b/tests/unit/test_validation.py
@@ -22,7 +22,7 @@ def test_validates_simple_job(self) -> None:
                     "name": "Step 1",
                     "description": "First step",
                     "instructions_file": "steps/step1.md",
-                    "outputs": {"output.md": {"type": "file", "description": "Output"}},
+                    "outputs": {"output.md": {"type": "file", "description": "Output", "required": True}},
                     "dependencies": [],
                     "reviews": [],
                 }
@@ -49,7 +49,7 @@ def test_validates_job_with_user_inputs(self) -> None:
                         {"name": "param1", "description": "First parameter"},
                         {"name": "param2", "description": "Second parameter"},
                     ],
-                    "outputs": {"output.md": {"type": "file", "description": "Output"}},
+                    "outputs": {"output.md": {"type": "file", "description": "Output", "required": True}},
                     "dependencies": [],
                     "reviews": [],
                 }
@@ -71,7 +71,7 @@ def test_validates_job_with_file_inputs(self) -> None:
                     "name": "Step 1",
                     "description": "First step",
                     "instructions_file": "steps/step1.md",
-                    "outputs": {"data.md": {"type": "file", "description": "Data output"}},
+                    "outputs": {"data.md": {"type": "file", "description": "Data output", "required": True}},
                     "dependencies": [],
                     "reviews": [],
                 },
@@ -81,7 +81,7 @@ def test_validates_job_with_file_inputs(self) -> None:
                     "description": "Second step",
                     "instructions_file": "steps/step2.md",
                     "inputs": [{"file": "data.md", "from_step": "step1"}],
-                    "outputs": {"result.md": {"type": "file", "description": "Result output"}},
+                    "outputs": {"result.md": {"type": "file", "description": "Result output", "required": True}},
                     "dependencies": ["step1"],
                     "reviews": [],
                 },
@@ -116,7 +116,7 @@ def test_raises_for_invalid_job_name(self) -> None:
                     "name": "Step 1",
                     "description": "Step",
                     "instructions_file": "steps/step1.md",
-                    "outputs": {"output.md": {"type": "file", "description": "Output"}},
+                    "outputs": {"output.md": {"type": "file", "description": "Output", "required": True}},
                     "reviews": [],
                 }
             ],
@@ -138,7 +138,7 @@ def test_raises_for_invalid_version(self) -> None:
                     "name": "Step 1",
                     "description": "Step",
                     "instructions_file": "steps/step1.md",
-                    "outputs": {"output.md": {"type": "file", "description": "Output"}},
+                    "outputs": {"output.md": {"type": "file", "description": "Output", "required": True}},
                     "reviews": [],
                 }
             ],
@@ -200,7 +200,7 @@ def test_raises_for_invalid_input_format(self) -> None:
                             # Missing description for user input
                         }
                     ],
-                    "outputs": {"output.md": {"type": "file", "description": "Output"}},
+                    "outputs": {"output.md": {"type": "file", "description": "Output", "required": True}},
                     "reviews": [],
                 }
             ],
@@ -232,7 +232,7 @@ def test_raises_for_step_missing_reviews(self) -> None:
                     "name": "Step 1",
                     "description": "Step",
                     "instructions_file": "steps/step1.md",
-                    "outputs": {"output.md": {"type": "file", "description": "Output"}},
+                    "outputs": {"output.md": {"type": "file", "description": "Output", "required": True}},
                     # Missing reviews - now required
                 }
             ],
@@ -255,7 +255,7 @@ def test_validates_job_with_reviews(self) -> None:
                     "description": "Step",
                     "instructions_file": "steps/step1.md",
                     "outputs": {
-                        "report.md": {"type": "file", "description": "Report"},
+                        "report.md": {"type": "file", "description": "Report", "required": True},
                     },
                     "reviews": [
                         {
@@ -291,7 +291,7 @@ def test_raises_for_review_missing_run_each(self) -> None:
                     "name": "Step 1",
                     "description": "Step",
                     "instructions_file": "steps/step1.md",
-                    "outputs": {"output.md": {"type": "file", "description": "Output"}},
+                    "outputs": {"output.md": {"type": "file", "description": "Output", "required": True}},
                     "reviews": [
                         {
                             # Missing run_each
@@ -318,7 +318,7 @@ def test_raises_for_review_empty_criteria(self) -> None:
                     "name": "Step 1",
                     "description": "Step",
                     "instructions_file": "steps/step1.md",
-                    "outputs": {"output.md": {"type": "file", "description": "Output"}},
+                    "outputs": {"output.md": {"type": "file", "description": "Output", "required": True}},
                     "reviews": [
                         {
                             "run_each": "step",

From 8eaa1757d899940caedbf5b6f9b32181ba78ff32 Mon Sep 17 00:00:00 2001
From: Noah Horton <noah@unsupervised.com>
Date: Mon, 9 Feb 2026 11:04:47 -0700
Subject: [PATCH 37/45] Log warning when job parsing fails instead of silently
 skipping

The _load_all_jobs method was catching ParseError and continuing with
no indication of failure, making schema validation errors invisible
to users (e.g. get_workflows returning empty with no explanation).

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 .deepwork/jobs/deepwork_jobs/job.yml | 24 ++++++++++++++++--------
 src/deepwork/mcp/tools.py            |  7 +++++--
 2 files changed, 21 insertions(+), 10 deletions(-)

diff --git a/.deepwork/jobs/deepwork_jobs/job.yml b/.deepwork/jobs/deepwork_jobs/job.yml
index 31beb6d8..57baf9f1 100644
--- a/.deepwork/jobs/deepwork_jobs/job.yml
+++ b/.deepwork/jobs/deepwork_jobs/job.yml
@@ -77,6 +77,7 @@ steps:
       job.yml:
         type: file
         description: "Definition of the job and its workflows"
+        required: true
     dependencies: []
     reviews:
       - run_each: job.yml
@@ -98,22 +99,18 @@ steps:
       step_instruction_files:
         type: files
         description: "Instruction Markdown files for each step"
+        required: true
     dependencies:
       - define
     reviews:
       - run_each: step_instruction_files
-        additional_review_guidance: |
-          Read the job.yml file in the same job directory for context on how this instruction file fits into the larger workflow.
-          IMPORTANT review guidance:
-          - "Ask Structured Questions" only applies to steps whose job.yml inputs include user-provided parameters (name/description inputs, NOT file inputs from prior steps). If a step only has file inputs from prior steps, this criterion automatically passes.
-          - "Output Format Examples" requires a markdown code block showing the output structure. A template with [bracket placeholders] is sufficient — a fully filled-in example with realistic data is better but not required.
-          - Apply criteria pragmatically. If a criterion is not applicable to this step's purpose, pass it.
+        additional_review_guidance: "Read the job.yml file in the same job directory for context on how this instruction file fits into the larger workflow."
         quality_criteria:
           "Complete Instructions": "Is the instruction file complete (no stubs or placeholders)?"
           "Specific & Actionable": "Are instructions tailored to the step's purpose, not generic?"
-          "Output Format Examples": "Does the instruction file include a markdown code block showing the expected output structure (template with placeholders is acceptable)?"
+          "Output Examples": "Does the instruction file show what good output looks like? This can be either template examples, or negative examples of what not to do. Only required if the step has ouputs"
           "Quality Criteria": "Does the instruction file define quality criteria for its outputs?"
-          "Ask Structured Questions": "If this step gathers user input (has name/description inputs in job.yml, not just file inputs), do instructions explicitly use the phrase 'ask structured questions'? If the step has no user inputs, this criterion passes automatically."
+          "Ask Structured Questions": "If this step gathers user input, do instructions explicitly use the phrase 'ask structured questions'? If the step has no user inputs, this criterion passes automatically."
           "Prompt Engineering": "Does the instruction file follow Anthropic's best practices for prompt engineering?"
 
   - id: test
@@ -129,6 +126,7 @@ steps:
       .deepwork/tmp/test_feedback.md:
         type: file
         description: "Feedback from testing the workflow on a real use case"
+        required: true
     dependencies:
       - define
       - implement
@@ -156,12 +154,15 @@ steps:
       job.yml:
         type: file
         description: "Updated job definition with improvements from test run"
+        required: true
       step_instruction_files:
         type: files
         description: "Updated instruction Markdown files for each step"
+        required: true
       scripts:
         type: files
         description: "Updated scripts to run parts of the job more efficiently"
+        required: false
     dependencies:
       - define
       - implement
@@ -179,15 +180,19 @@ steps:
       AGENTS.md:
         type: file
         description: "Bespoke learnings and run-specific context for the working folder"
+        required: true
       job.yml:
         type: file
         description: "Updated job definition with improvements from test run"
+        required: true
       step_instruction_files:
         type: files
         description: "Updated instruction Markdown files for each step"
+        required: true
       scripts:
         type: files
         description: "Updated scripts to run parts of the job more efficiently"
+        required: false
     dependencies: []
     reviews:
       - run_each: step
@@ -210,6 +215,7 @@ steps:
       settings.json:
         type: file
         description: "Cleaned up Claude settings file with legacy permissions removed"
+        required: true
     dependencies: []
     reviews:
       - run_each: step
@@ -235,9 +241,11 @@ steps:
       job_definitions:
         type: files
         description: "Updated job.yml files and step instructions in current DeepWork format"
+        required: true
       step_instruction_files:
         type: files
         description: "Updated step instruction files"
+        required: true
     dependencies:
       - fix_settings
     reviews:
diff --git a/src/deepwork/mcp/tools.py b/src/deepwork/mcp/tools.py
index 51caf7b7..92e372c7 100644
--- a/src/deepwork/mcp/tools.py
+++ b/src/deepwork/mcp/tools.py
@@ -8,9 +8,12 @@
 
 from __future__ import annotations
 
+import logging
 from pathlib import Path
 from typing import TYPE_CHECKING
 
+logger = logging.getLogger("deepwork.mcp")
+
 from deepwork.core.parser import (
     JobDefinition,
     OutputSpec,
@@ -85,8 +88,8 @@ def _load_all_jobs(self) -> list[JobDefinition]:
                 try:
                     job = parse_job_definition(job_dir)
                     jobs.append(job)
-                except ParseError:
-                    # Skip invalid job definitions
+                except ParseError as e:
+                    logger.warning("Skipping invalid job '%s': %s", job_dir.name, e)
                     continue
 
         return jobs

From 8b8b6ed2415ad463343529182fc0625def7d4f68 Mon Sep 17 00:00:00 2001
From: Noah Horton <noah@unsupervised.com>
Date: Mon, 9 Feb 2026 11:28:48 -0700
Subject: [PATCH 38/45] Manual test added

---
 .deepwork/jobs/deepwork_jobs/steps/define.md  |   8 +-
 .../deepwork_jobs/templates/job.yml.example   |   4 +
 .../deepwork_jobs/templates/job.yml.template  |   2 +
 .deepwork/jobs/test_job_flow/AGENTS.md        |  32 +++++
 .deepwork/jobs/test_job_flow/hooks/.gitkeep   |   0
 .deepwork/jobs/test_job_flow/job.yml          | 130 ++++++++++++++++++
 .deepwork/jobs/test_job_flow/scripts/.gitkeep |   0
 .../steps/create_test_review_job.md           | 123 +++++++++++++++++
 .../steps/identify_improvements.md            |  90 ++++++++++++
 .../steps/review_creation_process.md          |  91 ++++++++++++
 .../jobs/test_job_flow/templates/.gitkeep     |   0
 .gitignore                                    |   1 +
 README.md                                     |   2 +
 pyproject.toml                                |   1 +
 .../deepwork_jobs/steps/define.md             |   8 +-
 .../deepwork_jobs/templates/job.yml.example   |   4 +
 .../deepwork_jobs/templates/job.yml.template  |   2 +
 uv.lock                                       |   2 +
 18 files changed, 494 insertions(+), 6 deletions(-)
 create mode 100644 .deepwork/jobs/test_job_flow/AGENTS.md
 create mode 100644 .deepwork/jobs/test_job_flow/hooks/.gitkeep
 create mode 100644 .deepwork/jobs/test_job_flow/job.yml
 create mode 100644 .deepwork/jobs/test_job_flow/scripts/.gitkeep
 create mode 100644 .deepwork/jobs/test_job_flow/steps/create_test_review_job.md
 create mode 100644 .deepwork/jobs/test_job_flow/steps/identify_improvements.md
 create mode 100644 .deepwork/jobs/test_job_flow/steps/review_creation_process.md
 create mode 100644 .deepwork/jobs/test_job_flow/templates/.gitkeep

diff --git a/.deepwork/jobs/deepwork_jobs/steps/define.md b/.deepwork/jobs/deepwork_jobs/steps/define.md
index 0630d8f4..51e9b1eb 100644
--- a/.deepwork/jobs/deepwork_jobs/steps/define.md
+++ b/.deepwork/jobs/deepwork_jobs/steps/define.md
@@ -214,11 +214,13 @@ reviews:
 - Steps producing multiple files where each file needs individual review
 
 **Quality review timeout considerations:**
-When a step produces many files (`type: files` with 15+ items) or very large files (500+ lines), quality reviews may hit the MCP timeout (120 seconds). For these steps:
+Each individual quality review call has a 120-second timeout. For `run_each: <output_name>` with `files`-type outputs, each file gets its own separate review call — so having many files does NOT cause timeout accumulation. Timeout risk is only for individual reviews that are complex, such as:
+- Reviewing a single very large file (500+ lines) with many criteria
+- Review criteria that require cross-referencing large amounts of context
+For these cases:
 - Keep review criteria focused and efficient to evaluate
-- Consider using `run_each: step` (reviews all outputs together once) instead of `run_each: <output_name>` for `files`-type outputs with many items, since the latter runs a separate review per file
+- Consider using `run_each: step` (reviews all outputs together once) if the per-file reviews are unnecessary
 - The agent can use `quality_review_override_reason` to bypass a timed-out review, but this loses the quality gate benefit
-- If a step is expected to produce many files, note this in the step description so agents can plan accordingly
 
 **For steps with no quality checks needed, use an empty reviews list:**
 ```yaml
diff --git a/.deepwork/jobs/deepwork_jobs/templates/job.yml.example b/.deepwork/jobs/deepwork_jobs/templates/job.yml.example
index 4712b530..dac1aba8 100644
--- a/.deepwork/jobs/deepwork_jobs/templates/job.yml.example
+++ b/.deepwork/jobs/deepwork_jobs/templates/job.yml.example
@@ -37,6 +37,7 @@ steps:
       competitors_list.md:
         type: file
         description: "Vetted list of direct and indirect competitors"
+        required: true
     dependencies: []
     reviews: []
 
@@ -51,6 +52,7 @@ steps:
       research_notes.md:
         type: file
         description: "Detailed research notes on each competitor"
+        required: true
     dependencies:
       - identify_competitors
     reviews:
@@ -71,6 +73,7 @@ steps:
       comparison_matrix.md:
         type: file
         description: "Side-by-side comparison matrix of all competitors"
+        required: true
     dependencies:
       - research_competitors
     reviews: []
@@ -86,6 +89,7 @@ steps:
       positioning_report.md:
         type: file
         description: "Strategic positioning recommendations"
+        required: true
     dependencies:
       - comparative_analysis
     reviews:
diff --git a/.deepwork/jobs/deepwork_jobs/templates/job.yml.template b/.deepwork/jobs/deepwork_jobs/templates/job.yml.template
index e098b468..f0f87bfb 100644
--- a/.deepwork/jobs/deepwork_jobs/templates/job.yml.template
+++ b/.deepwork/jobs/deepwork_jobs/templates/job.yml.template
@@ -42,6 +42,7 @@ steps:
       [output_name]:
         type: file
         description: "[What this output contains]"
+        required: true
     dependencies: []  # List of step IDs that must complete first
     reviews:
       - run_each: step  # or a specific output name
@@ -64,6 +65,7 @@ steps:
       [another_output]:
         type: file
         description: "[What this output contains]"
+        required: true
     dependencies:
       - [step_id]  # This step requires the previous step
     reviews: []  # Empty if no quality checks needed
diff --git a/.deepwork/jobs/test_job_flow/AGENTS.md b/.deepwork/jobs/test_job_flow/AGENTS.md
new file mode 100644
index 00000000..7feb4a29
--- /dev/null
+++ b/.deepwork/jobs/test_job_flow/AGENTS.md
@@ -0,0 +1,32 @@
+# Job Management
+
+This folder and its subfolders are managed using `deepwork_jobs` workflows.
+
+## Recommended Workflows
+
+- `deepwork_jobs/new_job` - Full lifecycle: define → implement → test → iterate
+- `deepwork_jobs/learn` - Improve instructions based on execution learnings
+- `deepwork_jobs/repair` - Clean up and migrate from prior DeepWork versions
+
+## Directory Structure
+
+```
+.
+├── AGENTS.md          # This file - project context and guidance
+├── job.yml            # Job specification (created by define step)
+├── steps/             # Step instruction files (created by implement step)
+│   └── *.md           # One file per step
+├── hooks/             # Custom validation scripts and prompts
+│   └── *.md|*.sh      # Hook files referenced in job.yml
+├── scripts/           # Reusable scripts and utilities created during job execution
+│   └── *.sh|*.py      # Helper scripts referenced in step instructions
+└── templates/         # Example file formats and templates
+    └── *.md|*.yml     # Templates referenced in step instructions
+```
+
+## Editing Guidelines
+
+1. **Use workflows** for structural changes (adding steps, modifying job.yml)
+2. **Direct edits** are fine for minor instruction tweaks
+3. **Run `deepwork_jobs/learn`** after executing job steps to capture improvements
+4. **Run `deepwork install`** after any changes to regenerate commands
diff --git a/.deepwork/jobs/test_job_flow/hooks/.gitkeep b/.deepwork/jobs/test_job_flow/hooks/.gitkeep
new file mode 100644
index 00000000..e69de29b
diff --git a/.deepwork/jobs/test_job_flow/job.yml b/.deepwork/jobs/test_job_flow/job.yml
new file mode 100644
index 00000000..46eee8e8
--- /dev/null
+++ b/.deepwork/jobs/test_job_flow/job.yml
@@ -0,0 +1,130 @@
+name: test_job_flow
+version: "1.0.1"
+summary: "End-to-end test of the DeepWork job creation workflow with friction analysis"
+description: |
+  A meta-workflow that tests the DeepWork job creation process itself. This job:
+
+  1. Creates a new job ("detailed_test_review") via a nested sub-agent workflow,
+     exercising the full `/deepwork new_job` creation pipeline
+  2. Reviews the sub-agent's transcript for completeness and documents any friction
+     points encountered during the creation process
+  3. Investigates the DeepWork system code to identify improvements that could
+     reduce the friction found in step 2
+
+  This is a diagnostic/improvement workflow for the DeepWork framework. The final
+  output is a set of actionable recommendations for reducing job creation friction.
+
+  The "detailed_test_review" job created in step 1 has two steps:
+  - Run all tests with coverage reporting, with per-file and per-step quality reviews
+  - Update the README with coverage numbers and an as-of date
+
+changelog:
+  - version: "1.0.1"
+    changes: "Updated create_test_review_job instructions: added explicit `required` field to all output examples to prevent schema validation failures. Added description fields to YAML example outputs."
+  - version: "1.0.0"
+    changes: "Initial job creation"
+
+workflows:
+  - name: run
+    summary: "Create a test job via sub-agent, review the process, and identify improvements"
+    steps:
+      - create_test_review_job
+      - review_creation_process
+      - identify_improvements
+
+steps:
+  - id: create_test_review_job
+    name: "Create Test Review Job via Sub-Agent"
+    description: |
+      Launch the `/deepwork new_job` workflow as a nested sub-agent to create a job
+      called `detailed_test_review`. The sub-agent should be given very prescriptive
+      instructions so it doesn't need to ask the user anything.
+
+      The detailed_test_review job should have:
+
+      **Step 1 - run_tests**: Run all tests with code coverage reporting enabled.
+        - Output `test_files` (type: files): all the test files that were run
+        - Output `coverage_report` (type: file): the code coverage report
+        - Review (for_each: test_files): Check that all tests in each file are on-topic
+          and relevant to what the file is testing
+        - Review (for_each: step): Look at the coverage numbers and confirm coverage
+          is over 60%
+
+      **Step 2 - update_readme**: Update the README with code coverage numbers.
+        - Add or update a line at the very end of the README with the coverage
+          percentage and an as-of date
+        - Output `readme` (type: file): the updated README file
+        - Input: coverage_report from run_tests step
+    instructions_file: steps/create_test_review_job.md
+    inputs: []
+    outputs:
+      job_yml:
+        type: file
+        description: "The job.yml file created by the sub-agent for detailed_test_review"
+        required: true
+    dependencies: []
+    reviews:
+      - run_each: job_yml
+        quality_criteria:
+          "Job Structure": "Does the job.yml define two steps (run_tests and update_readme) with correct dependencies?"
+          "Outputs Defined": "Does run_tests have both a test_files (type: files) output and a coverage_report (type: file) output?"
+          "Reviews Defined": "Does run_tests have a for_each review on test_files AND a for_each step review for coverage threshold?"
+          "README Step": "Does update_readme take coverage_report as input and produce a readme output?"
+
+  - id: review_creation_process
+    name: "Review Sub-Agent Transcript and Document Friction"
+    description: |
+      Review the transcript/output from the sub-agent that ran in step 1. Verify that
+      it appears to have run all workflow steps successfully (define, implement, etc.).
+
+      Create a friction report documenting anything that seemed high-friction during
+      the job creation process, such as:
+      - Errors the agent encountered and had to work around
+      - Confusing instructions or ambiguous guidance
+      - Steps that required multiple retries
+      - Unnecessary back-and-forth or wasted effort
+      - Any quality review failures and what caused them
+    instructions_file: steps/review_creation_process.md
+    inputs:
+      - file: job_yml
+        from_step: create_test_review_job
+    outputs:
+      friction_report:
+        type: file
+        description: "Report on friction points encountered during the job creation process"
+        required: true
+    dependencies:
+      - create_test_review_job
+    reviews:
+      - run_each: step
+        quality_criteria:
+          "Transcript Reviewed": "Does the friction report reference specific events from the sub-agent's transcript?"
+          "Actionable Observations": "Are the friction points described concretely enough that a developer could act on them?"
+
+  - id: identify_improvements
+    name: "Investigate Code and Propose Improvements"
+    description: |
+      Read the friction report from step 2, then investigate the DeepWork system code
+      (particularly the new_job workflow definition and related system code) to identify
+      concrete ways to reduce the friction documented.
+
+      Produce a recommendations report with specific, actionable improvement ideas for
+      the user to review and decide whether to implement.
+    instructions_file: steps/identify_improvements.md
+    inputs:
+      - file: friction_report
+        from_step: review_creation_process
+    outputs:
+      recommendations:
+        type: file
+        description: "Actionable recommendations for reducing job creation friction"
+        required: true
+    dependencies:
+      - review_creation_process
+    reviews:
+      - run_each: recommendations
+        additional_review_guidance: "Read the .deepwork/tmp/job_creation_friction.md file to verify recommendations address the documented friction points."
+        quality_criteria:
+          "Addresses Friction": "Does each recommendation clearly map to a friction point from the friction report?"
+          "Actionable": "Are recommendations specific enough to implement (pointing to files/code/workflow changes)?"
+          "Feasible": "Do the recommendations seem technically feasible given the DeepWork architecture?"
diff --git a/.deepwork/jobs/test_job_flow/scripts/.gitkeep b/.deepwork/jobs/test_job_flow/scripts/.gitkeep
new file mode 100644
index 00000000..e69de29b
diff --git a/.deepwork/jobs/test_job_flow/steps/create_test_review_job.md b/.deepwork/jobs/test_job_flow/steps/create_test_review_job.md
new file mode 100644
index 00000000..2c62b39b
--- /dev/null
+++ b/.deepwork/jobs/test_job_flow/steps/create_test_review_job.md
@@ -0,0 +1,123 @@
+# Create Test Review Job via Sub-Agent
+
+## Objective
+
+Launch the DeepWork `new_job` workflow as a nested sub-agent to create a job called `detailed_test_review`. This exercises the full job creation pipeline end-to-end.
+
+## Task
+
+Start the `deepwork_jobs/new_job` workflow as a nested workflow (using `start_workflow`), providing extremely prescriptive instructions so the sub-agent can complete the entire job creation without needing to ask the user any questions. Then follow through all the steps of that nested workflow until it completes.
+
+### Process
+
+1. **Start the nested workflow**
+   - Call `start_workflow` with `job_name: deepwork_jobs`, `workflow_name: new_job`, `instance_id: detailed_test_review`
+   - Use a goal that contains ALL the details below so the sub-agent has full context
+
+2. **Guide the nested workflow through the `define` step**
+   When the nested workflow starts on its `define` step, create the `detailed_test_review` job with these exact specifications:
+
+   **Job name**: `detailed_test_review`
+   **Summary**: "Run tests with coverage and update README with results"
+   **Description**: A two-step workflow that runs the project's test suite with code coverage enabled, reviews test quality and coverage thresholds, then updates the README with the coverage results.
+
+   **Step 1 - `run_tests`**:
+   - Name: "Run Tests with Coverage"
+   - Description: Run all project tests with code coverage reporting enabled. Collect the test files and the coverage report as outputs.
+   - No user inputs (it auto-detects tests)
+   - Outputs (note: every output MUST include `required: true` or `required: false`):
+     - `test_files` (type: `files`, required: true): All test files that were executed
+     - `coverage_report` (type: `file`, required: true): The code coverage report file
+   - Dependencies: none
+   - Reviews:
+     - `run_each: test_files` with quality criteria:
+       - "On-Topic Tests": "Are all tests in this file on-topic and relevant to the module or functionality being tested? Flag any tests that seem unrelated or misplaced."
+     - `run_each: step` with quality criteria:
+       - "Coverage Threshold": "Does the code coverage report show overall coverage above 60%? If not, what areas have low coverage?"
+
+   **Step 2 - `update_readme`**:
+   - Name: "Update README with Coverage"
+   - Description: Update the project README to include the code coverage percentage with an as-of date at the very end of the file.
+   - Inputs:
+     - `coverage_report` from step `run_tests`
+   - Outputs:
+     - `readme` (type: `file`, required: true): The updated README.md file
+   - Dependencies: `run_tests`
+   - Reviews:
+     - `run_each: readme` with quality criteria:
+       - "Coverage Line Present": "Does the README have a line at the very end showing the code coverage percentage?"
+       - "Date Included": "Does the coverage line include an as-of date?"
+
+3. **Follow through all nested workflow steps**
+   After `define`, the nested workflow will proceed to `implement` (creating step instruction files) and potentially `test` and `iterate`. Follow each step's instructions as they come.
+
+4. **Collect the output**
+   Once the nested workflow completes, the `detailed_test_review` job should exist at `.deepwork/jobs/detailed_test_review/job.yml`. This is the output for this step.
+
+## Output Format
+
+### job_yml
+
+The job.yml file created by the nested workflow at `.deepwork/jobs/detailed_test_review/job.yml`.
+
+**Expected structure**:
+```yaml
+name: detailed_test_review
+version: "1.0.0"
+summary: "Run tests with coverage and update README with results"
+description: |
+  A two-step workflow that runs the project's test suite with code coverage
+  enabled, reviews test quality and coverage thresholds, then updates the
+  README with the coverage results.
+
+steps:
+  - id: run_tests
+    name: "Run Tests with Coverage"
+    outputs:
+      test_files:
+        type: files
+        description: "All test files that were executed"
+        required: true
+      coverage_report:
+        type: file
+        description: "The code coverage report file"
+        required: true
+    reviews:
+      - run_each: test_files
+        quality_criteria:
+          "On-Topic Tests": "..."
+      - run_each: step
+        quality_criteria:
+          "Coverage Threshold": "..."
+
+  - id: update_readme
+    name: "Update README with Coverage"
+    inputs:
+      - file: coverage_report
+        from_step: run_tests
+    outputs:
+      readme:
+        type: file
+        description: "The updated README.md file"
+        required: true
+    reviews:
+      - run_each: readme
+        quality_criteria:
+          "Coverage Line Present": "..."
+          "Date Included": "..."
+```
+
+## Quality Criteria
+
+- The nested workflow ran to completion (all steps finished)
+- The `detailed_test_review` job.yml exists and is valid YAML
+- It defines exactly two steps: `run_tests` and `update_readme`
+- `run_tests` has both `test_files` (files) and `coverage_report` (file) outputs
+- `run_tests` has a for_each file review on `test_files` and a for_each step review for coverage
+- `update_readme` takes `coverage_report` as input from `run_tests`
+- `update_readme` produces a `readme` output
+- When all criteria are met, include `<promise>Quality Criteria Met</promise>` in your response
+
+## Context
+
+This step is the core exercise of the test_job_flow. By running the full job creation workflow as a nested sub-agent, we can observe the entire process end-to-end and identify any friction points. The transcript from this step will be reviewed in the next step.
diff --git a/.deepwork/jobs/test_job_flow/steps/identify_improvements.md b/.deepwork/jobs/test_job_flow/steps/identify_improvements.md
new file mode 100644
index 00000000..51ec39b0
--- /dev/null
+++ b/.deepwork/jobs/test_job_flow/steps/identify_improvements.md
@@ -0,0 +1,90 @@
+# Investigate Code and Propose Improvements
+
+## Objective
+
+Read the friction report from step 2, investigate the DeepWork system code to understand the root causes, and propose concrete improvements to reduce job creation friction.
+
+## Task
+
+Turn the observed friction points into actionable engineering recommendations by tracing each problem to its source in the codebase.
+
+### Process
+
+1. **Read the friction report**
+   - Read `.deepwork/tmp/job_creation_friction.md` carefully
+   - List each friction point and categorize by type (error, UX, missing feature, documentation gap, etc.)
+
+2. **Investigate the new_job workflow**
+   - Read the `new_job` workflow definition in `.deepwork/jobs/deepwork_jobs/job.yml`
+   - Read the step instruction files in `.deepwork/jobs/deepwork_jobs/steps/`
+   - For each friction point, trace it to the specific instruction, template, or workflow configuration that caused it
+
+3. **Investigate the system code**
+   - Look at the MCP server code in `src/deepwork/` — particularly the workflow execution, quality review, and step management code
+   - Check template files in `.deepwork/jobs/deepwork_jobs/templates/`
+   - Look at the `make_new_job.sh` script and any other tooling
+   - Identify code-level causes of friction (e.g., missing validation, unclear error messages, timeout issues)
+
+4. **Develop recommendations**
+   For each friction point, propose one or more concrete improvements:
+   - **What to change**: Specific file(s) and the nature of the change
+   - **Why it helps**: How this addresses the friction point
+   - **Effort estimate**: Small (< 1 hour), Medium (1-4 hours), Large (4+ hours)
+   - **Risk**: What could go wrong with this change
+
+5. **Prioritize recommendations**
+   - Rank by impact-to-effort ratio
+   - Group into "quick wins" vs "larger investments"
+   - Note any dependencies between recommendations
+
+## Output Format
+
+### recommendations
+
+A markdown file at `.deepwork/tmp/improvement_recommendations.md`.
+
+**Structure**:
+```markdown
+# DeepWork Job Creation Improvement Recommendations
+
+## Executive Summary
+[2-3 sentences on the biggest opportunities for improvement]
+
+## Quick Wins (Small effort, meaningful impact)
+
+### 1. [Recommendation title]
+- **Addresses friction point**: [reference to friction report item]
+- **What to change**: [specific file(s) and description of change]
+- **Why it helps**: [expected impact]
+- **Effort**: Small
+- **Risk**: [what could go wrong]
+
+## Medium Investments
+
+### 2. [Recommendation title]
+...
+
+## Larger Investments
+
+### 3. [Recommendation title]
+...
+
+## Not Recommended
+[Any ideas considered but rejected, and why]
+
+## Implementation Order
+[Suggested sequence for implementing the recommendations, noting dependencies]
+```
+
+## Quality Criteria
+
+- Every recommendation maps to a specific friction point from the friction report
+- Recommendations point to specific files and code paths (not vague suggestions)
+- Each recommendation includes effort and risk assessment
+- Recommendations are technically feasible given the DeepWork architecture
+- Prioritization is logical (quick wins first, high-impact items ranked higher)
+- When all criteria are met, include `<promise>Quality Criteria Met</promise>` in your response
+
+## Context
+
+This is the final step of the test_job_flow. Its output is a decision document for the user — they will review these recommendations and decide which ones to implement. The quality of this output determines whether the entire test_job_flow exercise produces actionable value. Be thorough but practical; the user wants recommendations they can act on, not a theoretical analysis.
diff --git a/.deepwork/jobs/test_job_flow/steps/review_creation_process.md b/.deepwork/jobs/test_job_flow/steps/review_creation_process.md
new file mode 100644
index 00000000..96c0d057
--- /dev/null
+++ b/.deepwork/jobs/test_job_flow/steps/review_creation_process.md
@@ -0,0 +1,91 @@
+# Review Sub-Agent Transcript and Document Friction
+
+## Objective
+
+Review the transcript/output from the sub-agent that ran in step 1 (create_test_review_job), verify it completed successfully, and document any friction points encountered during the job creation process.
+
+## Task
+
+Analyze the prior step's execution to understand how the job creation process went, and produce a friction report that will inform future improvements to the DeepWork framework.
+
+### Process
+
+1. **Review the transcript**
+   - Look through the conversation history / transcript from the prior step's sub-agent
+   - Note each workflow step that was executed (define, implement, test, iterate)
+   - Track whether each step completed on the first try or required retries
+
+2. **Verify successful completion**
+   - Confirm the `detailed_test_review` job.yml was created at `.deepwork/jobs/detailed_test_review/job.yml`
+   - Verify it has the expected structure (2 steps, correct outputs, reviews)
+   - Check that step instruction files exist in `.deepwork/jobs/detailed_test_review/steps/`
+   - Note any deviations from the original specification
+
+3. **Identify friction points**
+   Look for any of the following in the transcript:
+   - **Errors**: Any errors the agent hit (MCP timeouts, validation failures, file not found, etc.)
+   - **Workarounds**: Times the agent had to work around a problem rather than solve it directly
+   - **Retries**: Steps that failed quality review and needed rework
+   - **Confusion**: Places where instructions were ambiguous or the agent seemed uncertain
+   - **Unnecessary steps**: Actions that seemed redundant or could have been automated
+   - **Slow paths**: Places where a faster approach existed but wasn't obvious
+   - **Missing guidance**: Situations where the agent lacked information it needed
+
+4. **Clean up the created job**
+   - Delete the entire job folder that was created by the sub-agent in step 1 (e.g., `rm -rf .deepwork/jobs/detailed_test_review/`)
+   - This job was only created to exercise the creation pipeline — it should not persist after the test
+
+5. **Create the friction report**
+   - Create the `.deepwork/tmp/` directory if it doesn't exist
+   - Write `.deepwork/tmp/job_creation_friction.md` with findings
+
+## Output Format
+
+### friction_report
+
+A markdown file at `.deepwork/tmp/job_creation_friction.md`.
+
+**Structure**:
+```markdown
+# Job Creation Friction Report
+
+## Summary
+[1-2 paragraph overview of how the job creation process went]
+
+## Completion Status
+- [ ] Define step: [passed/failed/retried N times]
+- [ ] Implement step: [passed/failed/retried N times]
+- [ ] Test step: [passed/failed/retried N times/skipped]
+- [ ] Iterate step: [passed/failed/retried N times/skipped]
+
+## Friction Points
+
+### 1. [Short title of friction point]
+- **Step**: [which workflow step this occurred in]
+- **What happened**: [description of what went wrong or was difficult]
+- **Impact**: [how much time/effort was wasted]
+- **Workaround used**: [what the agent did to get past it, if applicable]
+- **Potential fix**: [initial thoughts on how this could be improved]
+
+### 2. [Next friction point]
+...
+
+## Things That Worked Well
+[Note anything that went smoothly or was particularly well-designed]
+
+## Overall Assessment
+[Was the process smooth enough for production use? What's the biggest single improvement that could be made?]
+```
+
+## Quality Criteria
+
+- The friction report references specific events from the sub-agent's transcript (not vague generalities)
+- Each friction point is described concretely enough that a developer could reproduce and fix it
+- The completion status section accurately reflects what happened
+- Both problems AND successes are documented (balanced view)
+- The overall assessment provides a clear priority for improvement
+- When all criteria are met, include `<promise>Quality Criteria Met</promise>` in your response
+
+## Context
+
+This step bridges observation and action. The friction points documented here will be the input to step 3, where we investigate the actual code to find improvements. The more specific and concrete the friction descriptions, the more targeted the improvements can be.
diff --git a/.deepwork/jobs/test_job_flow/templates/.gitkeep b/.deepwork/jobs/test_job_flow/templates/.gitkeep
new file mode 100644
index 00000000..e69de29b
diff --git a/.gitignore b/.gitignore
index 8e9811ad..abfa16ab 100644
--- a/.gitignore
+++ b/.gitignore
@@ -44,6 +44,7 @@ nosetests.xml
 coverage.xml
 *.cover
 *.py,cover
+coverage_report.txt
 .hypothesis/
 .pytest_cache/
 
diff --git a/README.md b/README.md
index a5b3e73c..59bdfdf8 100644
--- a/README.md
+++ b/README.md
@@ -274,3 +274,5 @@ We're iterating fast. [Open an issue](https://github.com/Unsupervisedcom/deepwor
 ---
 
 <sub>Inspired by [GitHub's spec-kit](https://github.com/github/spec-kit)</sub>
+
+**Code Coverage**: 78.99% (as of 2026-02-09)
diff --git a/pyproject.toml b/pyproject.toml
index bf597253..18594e11 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -122,5 +122,6 @@ dev = [
     "fpdf2>=2.8.5",
     "pytest>=9.0.2",
     "pytest-asyncio>=1.3.0",
+    "pytest-cov>=7.0.0",
     "pytest-mock>=3.15.1",
 ]
diff --git a/src/deepwork/standard_jobs/deepwork_jobs/steps/define.md b/src/deepwork/standard_jobs/deepwork_jobs/steps/define.md
index 0630d8f4..51e9b1eb 100644
--- a/src/deepwork/standard_jobs/deepwork_jobs/steps/define.md
+++ b/src/deepwork/standard_jobs/deepwork_jobs/steps/define.md
@@ -214,11 +214,13 @@ reviews:
 - Steps producing multiple files where each file needs individual review
 
 **Quality review timeout considerations:**
-When a step produces many files (`type: files` with 15+ items) or very large files (500+ lines), quality reviews may hit the MCP timeout (120 seconds). For these steps:
+Each individual quality review call has a 120-second timeout. For `run_each: <output_name>` with `files`-type outputs, each file gets its own separate review call — so having many files does NOT cause timeout accumulation. Timeout risk is only for individual reviews that are complex, such as:
+- Reviewing a single very large file (500+ lines) with many criteria
+- Review criteria that require cross-referencing large amounts of context
+For these cases:
 - Keep review criteria focused and efficient to evaluate
-- Consider using `run_each: step` (reviews all outputs together once) instead of `run_each: <output_name>` for `files`-type outputs with many items, since the latter runs a separate review per file
+- Consider using `run_each: step` (reviews all outputs together once) if the per-file reviews are unnecessary
 - The agent can use `quality_review_override_reason` to bypass a timed-out review, but this loses the quality gate benefit
-- If a step is expected to produce many files, note this in the step description so agents can plan accordingly
 
 **For steps with no quality checks needed, use an empty reviews list:**
 ```yaml
diff --git a/src/deepwork/standard_jobs/deepwork_jobs/templates/job.yml.example b/src/deepwork/standard_jobs/deepwork_jobs/templates/job.yml.example
index 4712b530..dac1aba8 100644
--- a/src/deepwork/standard_jobs/deepwork_jobs/templates/job.yml.example
+++ b/src/deepwork/standard_jobs/deepwork_jobs/templates/job.yml.example
@@ -37,6 +37,7 @@ steps:
       competitors_list.md:
         type: file
         description: "Vetted list of direct and indirect competitors"
+        required: true
     dependencies: []
     reviews: []
 
@@ -51,6 +52,7 @@ steps:
       research_notes.md:
         type: file
         description: "Detailed research notes on each competitor"
+        required: true
     dependencies:
       - identify_competitors
     reviews:
@@ -71,6 +73,7 @@ steps:
       comparison_matrix.md:
         type: file
         description: "Side-by-side comparison matrix of all competitors"
+        required: true
     dependencies:
       - research_competitors
     reviews: []
@@ -86,6 +89,7 @@ steps:
       positioning_report.md:
         type: file
         description: "Strategic positioning recommendations"
+        required: true
     dependencies:
       - comparative_analysis
     reviews:
diff --git a/src/deepwork/standard_jobs/deepwork_jobs/templates/job.yml.template b/src/deepwork/standard_jobs/deepwork_jobs/templates/job.yml.template
index e098b468..f0f87bfb 100644
--- a/src/deepwork/standard_jobs/deepwork_jobs/templates/job.yml.template
+++ b/src/deepwork/standard_jobs/deepwork_jobs/templates/job.yml.template
@@ -42,6 +42,7 @@ steps:
       [output_name]:
         type: file
         description: "[What this output contains]"
+        required: true
     dependencies: []  # List of step IDs that must complete first
     reviews:
       - run_each: step  # or a specific output name
@@ -64,6 +65,7 @@ steps:
       [another_output]:
         type: file
         description: "[What this output contains]"
+        required: true
     dependencies:
       - [step_id]  # This step requires the previous step
     reviews: []  # Empty if no quality checks needed
diff --git a/uv.lock b/uv.lock
index abb2d5c0..cdaad82f 100644
--- a/uv.lock
+++ b/uv.lock
@@ -485,6 +485,7 @@ dev = [
     { name = "fpdf2" },
     { name = "pytest" },
     { name = "pytest-asyncio" },
+    { name = "pytest-cov" },
     { name = "pytest-mock" },
 ]
 
@@ -516,6 +517,7 @@ dev = [
     { name = "fpdf2", specifier = ">=2.8.5" },
     { name = "pytest", specifier = ">=9.0.2" },
     { name = "pytest-asyncio", specifier = ">=1.3.0" },
+    { name = "pytest-cov", specifier = ">=7.0.0" },
     { name = "pytest-mock", specifier = ">=3.15.1" },
 ]
 

From 6347d1ee20b86629dcbaa6a6232f116732e9a051 Mon Sep 17 00:00:00 2001
From: Noah Horton <noah@unsupervised.com>
Date: Mon, 9 Feb 2026 11:37:54 -0700
Subject: [PATCH 39/45] Add parallel sub-workflow pattern guidance to define
 step

When a workflow needs to run a multi-step process on many independent
items, the define step now guides users to split the repeated process
into a separate workflow and fan out via parallel sub-agents.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 .deepwork/jobs/deepwork_jobs/steps/define.md  | 39 +++++++++++++++++++
 .../deepwork_jobs/steps/define.md             | 39 +++++++++++++++++++
 2 files changed, 78 insertions(+)

diff --git a/.deepwork/jobs/deepwork_jobs/steps/define.md b/.deepwork/jobs/deepwork_jobs/steps/define.md
index 51e9b1eb..78ac1223 100644
--- a/.deepwork/jobs/deepwork_jobs/steps/define.md
+++ b/.deepwork/jobs/deepwork_jobs/steps/define.md
@@ -126,6 +126,45 @@ When defining steps, identify any that require specialized tools:
 
 **Browser Automation**: If any step involves web scraping, form filling, interactive browsing, UI testing, or research requiring website visits, ask the user what browser tools they have available. For Claude Code users, **Claude in Chrome** (Anthropic's browser extension) has been tested with DeepWork and is recommended for new users. Don't assume a default—confirm the tool before designing browser-dependent steps.
 
+### Parallel Sub-Workflow Pattern
+
+When a workflow needs to apply a multi-step process to many items independently (e.g., research each of 5 competitors, review each of 12 pull requests, analyze each file in a directory), **do not inline the repeated logic as a single step**. Instead, use the parallel sub-workflow pattern:
+
+1. **Define a separate workflow** for the process that will be repeated. This workflow handles one item at a time (e.g., `research_one_competitor` with steps like `gather_data` → `analyze` → `write_summary`).
+
+2. **In the main workflow**, add a step whose instructions tell the agent to launch the sub-workflow once per item using sub-agents (via the Task tool). Since each item is independent, these sub-workflow runs execute in parallel.
+
+**Why this matters:**
+- **Parallelism**: Independent items are processed concurrently instead of sequentially, dramatically reducing wall-clock time
+- **Quality gates**: Each sub-workflow run goes through its own review cycle, so a bad result for one item doesn't block the others
+- **Reusability**: The sub-workflow can be invoked on its own for ad-hoc single-item runs
+
+**How to structure it in `job.yml`:**
+
+```yaml
+workflows:
+  - name: full_analysis
+    summary: "Research all competitors end-to-end"
+    steps:
+      - identify_competitors
+      - research_all          # This step launches research_one in parallel
+      - synthesize
+
+  - name: research_one
+    summary: "Deep-dive research on a single competitor"
+    steps:
+      - gather_data
+      - analyze
+      - write_summary
+```
+
+The `research_all` step's instructions should tell the agent to:
+- Read the list of items from the prior step's output
+- Launch `research_one` as a sub-workflow for each item using parallel sub-agents (Task tool)
+- Collect the results and confirm all runs completed
+
+**When to recognize this pattern:** Look for language like "for each X, do Y" where Y involves more than one logical phase. If Y is a single simple action, a regular step with a loop is fine. If Y is itself a multi-step process with intermediate outputs worth reviewing, split it into a sub-workflow.
+
 ### Step 3: Validate the Workflow
 
 After gathering information about all steps:
diff --git a/src/deepwork/standard_jobs/deepwork_jobs/steps/define.md b/src/deepwork/standard_jobs/deepwork_jobs/steps/define.md
index 51e9b1eb..78ac1223 100644
--- a/src/deepwork/standard_jobs/deepwork_jobs/steps/define.md
+++ b/src/deepwork/standard_jobs/deepwork_jobs/steps/define.md
@@ -126,6 +126,45 @@ When defining steps, identify any that require specialized tools:
 
 **Browser Automation**: If any step involves web scraping, form filling, interactive browsing, UI testing, or research requiring website visits, ask the user what browser tools they have available. For Claude Code users, **Claude in Chrome** (Anthropic's browser extension) has been tested with DeepWork and is recommended for new users. Don't assume a default—confirm the tool before designing browser-dependent steps.
 
+### Parallel Sub-Workflow Pattern
+
+When a workflow needs to apply a multi-step process to many items independently (e.g., research each of 5 competitors, review each of 12 pull requests, analyze each file in a directory), **do not inline the repeated logic as a single step**. Instead, use the parallel sub-workflow pattern:
+
+1. **Define a separate workflow** for the process that will be repeated. This workflow handles one item at a time (e.g., `research_one_competitor` with steps like `gather_data` → `analyze` → `write_summary`).
+
+2. **In the main workflow**, add a step whose instructions tell the agent to launch the sub-workflow once per item using sub-agents (via the Task tool). Since each item is independent, these sub-workflow runs execute in parallel.
+
+**Why this matters:**
+- **Parallelism**: Independent items are processed concurrently instead of sequentially, dramatically reducing wall-clock time
+- **Quality gates**: Each sub-workflow run goes through its own review cycle, so a bad result for one item doesn't block the others
+- **Reusability**: The sub-workflow can be invoked on its own for ad-hoc single-item runs
+
+**How to structure it in `job.yml`:**
+
+```yaml
+workflows:
+  - name: full_analysis
+    summary: "Research all competitors end-to-end"
+    steps:
+      - identify_competitors
+      - research_all          # This step launches research_one in parallel
+      - synthesize
+
+  - name: research_one
+    summary: "Deep-dive research on a single competitor"
+    steps:
+      - gather_data
+      - analyze
+      - write_summary
+```
+
+The `research_all` step's instructions should tell the agent to:
+- Read the list of items from the prior step's output
+- Launch `research_one` as a sub-workflow for each item using parallel sub-agents (Task tool)
+- Collect the results and confirm all runs completed
+
+**When to recognize this pattern:** Look for language like "for each X, do Y" where Y involves more than one logical phase. If Y is a single simple action, a regular step with a loop is fine. If Y is itself a multi-step process with intermediate outputs worth reviewing, split it into a sub-workflow.
+
 ### Step 3: Validate the Workflow
 
 After gathering information about all steps:

From 4c8b60b51e8079e2fbae1852ce837a5f6ff7defa Mon Sep 17 00:00:00 2001
From: Noah Horton <noah@unsupervised.com>
Date: Mon, 9 Feb 2026 11:47:42 -0700
Subject: [PATCH 40/45] Bump version to 0.7.0

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 pyproject.toml           | 2 +-
 src/deepwork/__init__.py | 2 +-
 uv.lock                  | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index 18594e11..4070c134 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "deepwork"
-version = "0.7.0a2"
+version = "0.7.0"
 description = "Framework for enabling AI agents to perform complex, multi-step work tasks"
 readme = "README.md"
 requires-python = ">=3.11"
diff --git a/src/deepwork/__init__.py b/src/deepwork/__init__.py
index 722898e8..0c85557f 100644
--- a/src/deepwork/__init__.py
+++ b/src/deepwork/__init__.py
@@ -1,6 +1,6 @@
 """DeepWork - Framework for enabling AI agents to perform complex, multi-step work tasks."""
 
-__version__ = "0.7.0a2"
+__version__ = "0.7.0"
 __author__ = "DeepWork Contributors"
 
 __all__ = [
diff --git a/uv.lock b/uv.lock
index cdaad82f..df46f8f2 100644
--- a/uv.lock
+++ b/uv.lock
@@ -453,7 +453,7 @@ wheels = [
 
 [[package]]
 name = "deepwork"
-version = "0.7.0a2"
+version = "0.7.0"
 source = { editable = "." }
 dependencies = [
     { name = "aiofiles" },

From 06fcf195d8c0512bef55e4ad6c8f2ad784e3fccc Mon Sep 17 00:00:00 2001
From: Noah Horton <noah@unsupervised.com>
Date: Mon, 9 Feb 2026 12:03:48 -0700
Subject: [PATCH 41/45] Apply ruff formatting fixes across MCP source and tests

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 src/deepwork/core/parser.py            |  3 +-
 src/deepwork/mcp/quality_gate.py       | 40 ++++++++++-----------
 src/deepwork/mcp/schemas.py            |  6 ++--
 src/deepwork/mcp/tools.py              | 19 ++++------
 tests/unit/mcp/test_async_interface.py |  6 ++--
 tests/unit/mcp/test_quality_gate.py    | 42 +++++++++-------------
 tests/unit/mcp/test_schemas.py         |  4 +--
 tests/unit/mcp/test_state.py           | 20 +++--------
 tests/unit/mcp/test_tools.py           | 48 +++++++-------------------
 tests/unit/test_parser.py              | 23 +++++++++---
 tests/unit/test_validation.py          | 44 +++++++++++++++++------
 11 files changed, 119 insertions(+), 136 deletions(-)

diff --git a/src/deepwork/core/parser.py b/src/deepwork/core/parser.py
index 09748ac1..b6f6a380 100644
--- a/src/deepwork/core/parser.py
+++ b/src/deepwork/core/parser.py
@@ -192,8 +192,7 @@ def from_dict(cls, data: dict[str, Any]) -> "Step":
             instructions_file=data["instructions_file"],
             inputs=[StepInput.from_dict(inp) for inp in data.get("inputs", [])],
             outputs=[
-                OutputSpec.from_dict(name, spec)
-                for name, spec in data.get("outputs", {}).items()
+                OutputSpec.from_dict(name, spec) for name, spec in data.get("outputs", {}).items()
             ],
             dependencies=data.get("dependencies", []),
             hooks=hooks,
diff --git a/src/deepwork/mcp/quality_gate.py b/src/deepwork/mcp/quality_gate.py
index 7bfc2ad3..7b749a38 100644
--- a/src/deepwork/mcp/quality_gate.py
+++ b/src/deepwork/mcp/quality_gate.py
@@ -189,8 +189,7 @@ async def _read_file_sections(
                 except (UnicodeDecodeError, ValueError):
                     abs_path = full_path.resolve()
                     sections.append(
-                        f"{header}\n[Binary file — not included in review. "
-                        f"Read from: {abs_path}]"
+                        f"{header}\n[Binary file — not included in review. Read from: {abs_path}]"
                     )
                 except Exception as e:
                     sections.append(f"{header}\n[Error reading file: {e}]")
@@ -316,8 +315,7 @@ def _parse_result(self, data: dict[str, Any]) -> QualityGateResult:
 
         except (ValueError, KeyError) as e:
             raise QualityGateError(
-                f"Failed to interpret quality gate result: {e}\n"
-                f"Data was: {data}"
+                f"Failed to interpret quality gate result: {e}\nData was: {data}"
             ) from e
 
     @staticmethod
@@ -440,22 +438,26 @@ async def evaluate_reviews(
                 if output_type == "files" and isinstance(output_value, list):
                     # Run once per file
                     for file_path in output_value:
-                        tasks.append((
+                        tasks.append(
+                            (
+                                run_each,
+                                file_path,
+                                quality_criteria,
+                                {run_each: file_path},
+                                guidance,
+                            )
+                        )
+                else:
+                    # Single file - run once
+                    tasks.append(
+                        (
                             run_each,
-                            file_path,
+                            output_value if isinstance(output_value, str) else None,
                             quality_criteria,
-                            {run_each: file_path},
+                            {run_each: output_value},
                             guidance,
-                        ))
-                else:
-                    # Single file - run once
-                    tasks.append((
-                        run_each,
-                        output_value if isinstance(output_value, str) else None,
-                        quality_criteria,
-                        {run_each: output_value},
-                        guidance,
-                    ))
+                        )
+                    )
 
         async def run_review(
             run_each: str,
@@ -479,9 +481,7 @@ async def run_review(
                 criteria_results=result.criteria_results,
             )
 
-        results = await asyncio.gather(
-            *(run_review(*task) for task in tasks)
-        )
+        results = await asyncio.gather(*(run_review(*task) for task in tasks))
 
         return [r for r in results if not r.passed]
 
diff --git a/src/deepwork/mcp/schemas.py b/src/deepwork/mcp/schemas.py
index 3ff22731..fe3746f8 100644
--- a/src/deepwork/mcp/schemas.py
+++ b/src/deepwork/mcp/schemas.py
@@ -96,7 +96,7 @@ class FinishedStepInput(BaseModel):
         description=(
             "Map of output names to file path(s). "
             "For outputs declared as type 'file': pass a single string path (e.g. \"report.md\"). "
-            "For outputs declared as type 'files': pass a list of string paths (e.g. [\"a.md\", \"b.md\"]). "
+            'For outputs declared as type \'files\': pass a list of string paths (e.g. ["a.md", "b.md"]). '
             "Outputs with required: false can be omitted from this map. "
             "Check step_expected_outputs from start_workflow/finished_step response to see each output's type and required status."
         )
@@ -193,7 +193,9 @@ class ExpectedOutput(BaseModel):
     name: str = Field(description="Output name (use as key in finished_step outputs)")
     type: str = Field(description="Output type: 'file' or 'files'")
     description: str = Field(description="What this output should contain")
-    required: bool = Field(description="Whether this output must be provided. If false, it can be omitted from finished_step outputs.")
+    required: bool = Field(
+        description="Whether this output must be provided. If false, it can be omitted from finished_step outputs."
+    )
     syntax_for_finished_step_tool: str = Field(
         description="The value format to use for this output when calling finished_step"
     )
diff --git a/src/deepwork/mcp/tools.py b/src/deepwork/mcp/tools.py
index 92e372c7..6bc5c046 100644
--- a/src/deepwork/mcp/tools.py
+++ b/src/deepwork/mcp/tools.py
@@ -12,8 +12,6 @@
 from pathlib import Path
 from typing import TYPE_CHECKING
 
-logger = logging.getLogger("deepwork.mcp")
-
 from deepwork.core.parser import (
     JobDefinition,
     OutputSpec,
@@ -38,6 +36,8 @@
 )
 from deepwork.mcp.state import StateManager
 
+logger = logging.getLogger("deepwork.mcp")
+
 if TYPE_CHECKING:
     from deepwork.mcp.quality_gate import QualityGate
 
@@ -246,9 +246,7 @@ def _validate_outputs(
                     )
                 full_path = self.project_root / value
                 if not full_path.exists():
-                    raise ToolError(
-                        f"Output '{name}': file not found at '{value}'"
-                    )
+                    raise ToolError(f"Output '{name}': file not found at '{value}'")
 
             elif spec.type == "files":
                 if not isinstance(value, list):
@@ -259,14 +257,11 @@ def _validate_outputs(
                 for path in value:
                     if not isinstance(path, str):
                         raise ToolError(
-                            f"Output '{name}': all paths must be strings, "
-                            f"got {type(path).__name__}"
+                            f"Output '{name}': all paths must be strings, got {type(path).__name__}"
                         )
                     full_path = self.project_root / path
                     if not full_path.exists():
-                        raise ToolError(
-                            f"Output '{name}': file not found at '{path}'"
-                        )
+                        raise ToolError(f"Output '{name}': file not found at '{path}'")
 
     @staticmethod
     def _build_expected_outputs(outputs: list[OutputSpec]) -> list[ExpectedOutput]:
@@ -472,9 +467,7 @@ async def finished_step(self, input_data: FinishedStepInput) -> FinishedStepResp
             raise ToolError(f"Next step not found: {next_step_id}")
 
         # Advance session
-        await self.state_manager.advance_to_step(
-            next_step_id, next_entry_index, session_id=sid
-        )
+        await self.state_manager.advance_to_step(next_step_id, next_entry_index, session_id=sid)
         await self.state_manager.start_step(next_step_id, session_id=sid)
 
         # Get instructions
diff --git a/tests/unit/mcp/test_async_interface.py b/tests/unit/mcp/test_async_interface.py
index 2931a45f..9c732c52 100644
--- a/tests/unit/mcp/test_async_interface.py
+++ b/tests/unit/mcp/test_async_interface.py
@@ -76,7 +76,7 @@ def test_workflow_tools_async_methods(self) -> None:
 
     def test_claude_cli_async_methods(self) -> None:
         """Verify ClaudeCLI methods that must be async remain async."""
-        method = getattr(ClaudeCLI, "run")
+        method = ClaudeCLI.run
         assert inspect.iscoroutinefunction(method), (
             "ClaudeCLI.run must be async (coroutine function). "
             "This is required for non-blocking subprocess execution."
@@ -141,9 +141,7 @@ async def record_attempt() -> int:
         assert final_session is not None
         assert final_session.step_progress["step1"].quality_attempts == 10
 
-    async def test_concurrent_workflows_with_session_id_routing(
-        self, tmp_path: Path
-    ) -> None:
+    async def test_concurrent_workflows_with_session_id_routing(self, tmp_path: Path) -> None:
         """Test that two concurrent sessions can be routed correctly via session_id.
 
         Two sessions are created on the stack. Concurrent finished_step-like
diff --git a/tests/unit/mcp/test_quality_gate.py b/tests/unit/mcp/test_quality_gate.py
index 4fa7a1c3..c6ab3c43 100644
--- a/tests/unit/mcp/test_quality_gate.py
+++ b/tests/unit/mcp/test_quality_gate.py
@@ -211,9 +211,7 @@ def test_parse_result_failed(self, quality_gate: QualityGate) -> None:
         data = {
             "passed": False,
             "feedback": "Issues found",
-            "criteria_results": [
-                {"criterion": "Test 1", "passed": False, "feedback": "Failed"}
-            ],
+            "criteria_results": [{"criterion": "Test 1", "passed": False, "feedback": "Failed"}],
         }
 
         result = quality_gate._parse_result(data)
@@ -282,9 +280,7 @@ async def test_evaluate_calls_cli_with_correct_args(
         assert "Must be valid" in call_kwargs.kwargs["system_prompt"]
         assert "Test content" in call_kwargs.kwargs["prompt"]
 
-    async def test_evaluate_wraps_cli_error(
-        self, mock_cli: ClaudeCLI, project_root: Path
-    ) -> None:
+    async def test_evaluate_wraps_cli_error(self, mock_cli: ClaudeCLI, project_root: Path) -> None:
         """Test that ClaudeCLIError is wrapped in QualityGateError."""
         mock_cli.run = AsyncMock(side_effect=ClaudeCLIError("CLI failed"))
         gate = QualityGate(cli=mock_cli)
@@ -322,9 +318,7 @@ async def test_empty_reviews(self, quality_gate: QualityGate, project_root: Path
         )
         assert result == []
 
-    async def test_step_review_passes(
-        self, mock_cli: ClaudeCLI, project_root: Path
-    ) -> None:
+    async def test_step_review_passes(self, mock_cli: ClaudeCLI, project_root: Path) -> None:
         """Test step-level review that passes."""
         mock_cli.run = AsyncMock(
             return_value={"passed": True, "feedback": "All good", "criteria_results": []}
@@ -346,9 +340,7 @@ async def test_step_review_passes(
         )
         assert result == []  # No failures
 
-    async def test_step_review_fails(
-        self, mock_cli: ClaudeCLI, project_root: Path
-    ) -> None:
+    async def test_step_review_fails(self, mock_cli: ClaudeCLI, project_root: Path) -> None:
         """Test step-level review that fails."""
         mock_cli.run = AsyncMock(
             return_value={
@@ -378,9 +370,7 @@ async def test_step_review_fails(
         assert result[0].review_run_each == "step"
         assert result[0].passed is False
 
-    async def test_per_file_review(
-        self, mock_cli: ClaudeCLI, project_root: Path
-    ) -> None:
+    async def test_per_file_review(self, mock_cli: ClaudeCLI, project_root: Path) -> None:
         """Test per-file review for files-type output."""
         call_count = 0
 
@@ -409,9 +399,7 @@ async def mock_run(**kwargs: Any) -> dict[str, Any]:
         assert result == []  # All pass
         assert call_count == 2  # Called once per file
 
-    async def test_single_file_review(
-        self, mock_cli: ClaudeCLI, project_root: Path
-    ) -> None:
+    async def test_single_file_review(self, mock_cli: ClaudeCLI, project_root: Path) -> None:
         """Test review targeting a single-file output."""
         mock_cli.run = AsyncMock(
             return_value={"passed": True, "feedback": "OK", "criteria_results": []}
@@ -620,10 +608,12 @@ def test_multi_file_output(self) -> None:
 
     def test_mixed_outputs(self) -> None:
         """Test path listing with both single and list outputs."""
-        lines = QualityGate._build_path_listing({
-            "summary": "summary.md",
-            "details": ["d1.md", "d2.md"],
-        })
+        lines = QualityGate._build_path_listing(
+            {
+                "summary": "summary.md",
+                "details": ["d1.md", "d2.md"],
+            }
+        )
         assert len(lines) == 3
         assert "- summary.md  (output: summary)" in lines
         assert "- d1.md  (output: details)" in lines
@@ -649,9 +639,7 @@ def test_timeout_increases_after_five(self) -> None:
 class TestDynamicTimeout:
     """Tests that evaluate passes dynamic timeout to CLI."""
 
-    async def test_timeout_passed_to_cli(
-        self, mock_cli: ClaudeCLI, project_root: Path
-    ) -> None:
+    async def test_timeout_passed_to_cli(self, mock_cli: ClaudeCLI, project_root: Path) -> None:
         """Test that evaluate passes computed timeout to CLI.run."""
         gate = QualityGate(cli=mock_cli)
 
@@ -753,7 +741,9 @@ async def test_mock_records_additional_review_guidance(self, project_root: Path)
         )
 
         assert len(gate.evaluations) == 1
-        assert gate.evaluations[0]["additional_review_guidance"] == "Look at the job.yml for context."
+        assert (
+            gate.evaluations[0]["additional_review_guidance"] == "Look at the job.yml for context."
+        )
 
     async def test_mock_records_none_guidance_when_omitted(self, project_root: Path) -> None:
         """Test mock gate records None for guidance when not provided."""
diff --git a/tests/unit/mcp/test_schemas.py b/tests/unit/mcp/test_schemas.py
index 2e2949a5..14e0e41a 100644
--- a/tests/unit/mcp/test_schemas.py
+++ b/tests/unit/mcp/test_schemas.py
@@ -390,9 +390,7 @@ def test_needs_work_status(self) -> None:
                     passed=False,
                     feedback="Issues found",
                     criteria_results=[
-                        QualityCriteriaResult(
-                            criterion="Test", passed=False, feedback="Failed"
-                        ),
+                        QualityCriteriaResult(criterion="Test", passed=False, feedback="Failed"),
                     ],
                 ),
             ],
diff --git a/tests/unit/mcp/test_state.py b/tests/unit/mcp/test_state.py
index 389ad78b..4508b517 100644
--- a/tests/unit/mcp/test_state.py
+++ b/tests/unit/mcp/test_state.py
@@ -224,9 +224,7 @@ async def test_get_all_outputs(self, state_manager: StateManager) -> None:
         )
 
         await state_manager.complete_step("step1", {"report": "output1.md"})
-        await state_manager.complete_step(
-            "step2", {"data_files": ["output2.md", "output3.md"]}
-        )
+        await state_manager.complete_step("step2", {"data_files": ["output2.md", "output3.md"]})
 
         outputs = state_manager.get_all_outputs()
 
@@ -492,9 +490,7 @@ def test_resolve_session_invalid_id(self, state_manager: StateManager) -> None:
         with pytest.raises(StateError, match="Session 'nonexistent' not found"):
             state_manager._resolve_session("nonexistent")
 
-    def test_resolve_session_none_falls_back_to_active(
-        self, state_manager: StateManager
-    ) -> None:
+    def test_resolve_session_none_falls_back_to_active(self, state_manager: StateManager) -> None:
         """Test _resolve_session with None falls back to top-of-stack."""
         import asyncio
 
@@ -512,9 +508,7 @@ def test_resolve_session_none_falls_back_to_active(
         resolved = state_manager._resolve_session(None)
         assert resolved.job_name == "job2"  # top-of-stack
 
-    async def test_complete_workflow_by_session_id(
-        self, state_manager: StateManager
-    ) -> None:
+    async def test_complete_workflow_by_session_id(self, state_manager: StateManager) -> None:
         """Test complete_workflow removes a specific session from middle of stack."""
         session1 = await state_manager.create_session(
             job_name="job1", workflow_name="wf1", goal="G1", first_step_id="s1"
@@ -541,9 +535,7 @@ async def test_complete_workflow_by_session_id(
         assert session2.session_id not in remaining_ids
         assert session3.session_id in remaining_ids
 
-    async def test_abort_workflow_by_session_id(
-        self, state_manager: StateManager
-    ) -> None:
+    async def test_abort_workflow_by_session_id(self, state_manager: StateManager) -> None:
         """Test abort_workflow removes a specific session from middle of stack."""
         session1 = await state_manager.create_session(
             job_name="job1", workflow_name="wf1", goal="G1", first_step_id="s1"
@@ -570,9 +562,7 @@ async def test_abort_workflow_by_session_id(
         assert session1.session_id in remaining_ids
         assert session2.session_id not in remaining_ids
 
-    async def test_complete_step_with_session_id(
-        self, state_manager: StateManager
-    ) -> None:
+    async def test_complete_step_with_session_id(self, state_manager: StateManager) -> None:
         """Test complete_step operates on a non-top session when session_id is given."""
         session1 = await state_manager.create_session(
             job_name="job1", workflow_name="wf1", goal="G1", first_step_id="s1"
diff --git a/tests/unit/mcp/test_tools.py b/tests/unit/mcp/test_tools.py
index 06525bd8..f5b7adce 100644
--- a/tests/unit/mcp/test_tools.py
+++ b/tests/unit/mcp/test_tools.py
@@ -179,9 +179,7 @@ async def test_start_workflow_invalid_job(self, tools: WorkflowTools) -> None:
         with pytest.raises(ToolError, match="Job not found"):
             await tools.start_workflow(input_data)
 
-    async def test_start_workflow_auto_selects_single_workflow(
-        self, tools: WorkflowTools
-    ) -> None:
+    async def test_start_workflow_auto_selects_single_workflow(self, tools: WorkflowTools) -> None:
         """Test that a wrong workflow name auto-selects when job has one workflow."""
         input_data = StartWorkflowInput(
             goal="Complete task",
@@ -245,9 +243,7 @@ async def test_start_workflow_invalid_workflow_multiple(
         (steps_dir / "step_a.md").write_text("# Step A")
         (steps_dir / "step_b.md").write_text("# Step B")
 
-        tools = WorkflowTools(
-            project_root=project_root, state_manager=state_manager
-        )
+        tools = WorkflowTools(project_root=project_root, state_manager=state_manager)
         input_data = StartWorkflowInput(
             goal="Complete task",
             job_name="multi_wf_job",
@@ -401,9 +397,7 @@ async def test_finished_step_quality_gate_max_attempts(
 
         # Third attempt should raise error
         with pytest.raises(ToolError, match="Quality gate failed after.*attempts"):
-            await tools.finished_step(
-                FinishedStepInput(outputs={"output1.md": "output1.md"})
-            )
+            await tools.finished_step(FinishedStepInput(outputs={"output1.md": "output1.md"}))
 
     async def test_finished_step_quality_gate_override(
         self, project_root: Path, state_manager: StateManager
@@ -455,9 +449,7 @@ async def test_finished_step_validates_unknown_output_keys(
 
         with pytest.raises(ToolError, match="Unknown output names.*extra.md"):
             await tools.finished_step(
-                FinishedStepInput(
-                    outputs={"output1.md": "output1.md", "extra.md": "extra.md"}
-                )
+                FinishedStepInput(outputs={"output1.md": "output1.md", "extra.md": "extra.md"})
             )
 
     async def test_finished_step_validates_missing_output_keys(
@@ -738,9 +730,7 @@ async def test_finished_step_validates_file_type_must_be_string(
         (project_root / "output1.md").write_text("content")
 
         with pytest.raises(ToolError, match="type 'file'.*single string path"):
-            await tools.finished_step(
-                FinishedStepInput(outputs={"output1.md": ["output1.md"]})
-            )
+            await tools.finished_step(FinishedStepInput(outputs={"output1.md": ["output1.md"]}))
 
     async def test_finished_step_validates_file_existence(
         self, tools: WorkflowTools, project_root: Path
@@ -755,9 +745,7 @@ async def test_finished_step_validates_file_existence(
 
         # Don't create the file
         with pytest.raises(ToolError, match="file not found at.*nonexistent.md"):
-            await tools.finished_step(
-                FinishedStepInput(outputs={"output1.md": "nonexistent.md"})
-            )
+            await tools.finished_step(FinishedStepInput(outputs={"output1.md": "nonexistent.md"}))
 
     async def test_finished_step_empty_outputs_for_step_with_no_outputs(
         self, project_root: Path, state_manager: StateManager
@@ -859,9 +847,7 @@ async def test_finished_step_validates_files_type_output(
 
         # type: files requires a list, not a string
         with pytest.raises(ToolError, match="type 'files'.*list of paths"):
-            await tools.finished_step(
-                FinishedStepInput(outputs={"reports": "report1.md"})
-            )
+            await tools.finished_step(FinishedStepInput(outputs={"reports": "report1.md"}))
 
     async def test_finished_step_validates_files_type_existence(
         self, project_root: Path, state_manager: StateManager
@@ -916,9 +902,7 @@ async def test_finished_step_validates_files_type_existence(
 
         with pytest.raises(ToolError, match="file not found at.*missing.md"):
             await tools.finished_step(
-                FinishedStepInput(
-                    outputs={"reports": ["report1.md", "missing.md"]}
-                )
+                FinishedStepInput(outputs={"reports": ["report1.md", "missing.md"]})
             )
 
     async def test_finished_step_files_type_success(
@@ -973,9 +957,7 @@ async def test_finished_step_files_type_success(
         (project_root / "report2.md").write_text("Report 2")
 
         response = await tools.finished_step(
-            FinishedStepInput(
-                outputs={"reports": ["report1.md", "report2.md"]}
-            )
+            FinishedStepInput(outputs={"reports": ["report1.md", "report2.md"]})
         )
 
         assert response.status == StepStatus.WORKFLOW_COMPLETE
@@ -1077,15 +1059,11 @@ async def test_quality_reviewer_receives_only_current_step_outputs(
 
         # Complete step1
         (project_root / "step1_output.md").write_text("STEP1_CONTENT_MARKER")
-        await tools.finished_step(
-            FinishedStepInput(outputs={"step1_output.md": "step1_output.md"})
-        )
+        await tools.finished_step(FinishedStepInput(outputs={"step1_output.md": "step1_output.md"}))
 
         # Complete step2
         (project_root / "step2_output.md").write_text("STEP2_CONTENT_MARKER")
-        await tools.finished_step(
-            FinishedStepInput(outputs={"step2_output.md": "step2_output.md"})
-        )
+        await tools.finished_step(FinishedStepInput(outputs={"step2_output.md": "step2_output.md"}))
 
         # Complete step3 — quality gate runs here
         (project_root / "step3_output.md").write_text("STEP3_CONTENT_MARKER")
@@ -1162,9 +1140,7 @@ async def test_additional_review_guidance_reaches_reviewer(
         )
 
         (project_root / "report.md").write_text("Report content")
-        response = await tools.finished_step(
-            FinishedStepInput(outputs={"report.md": "report.md"})
-        )
+        response = await tools.finished_step(FinishedStepInput(outputs={"report.md": "report.md"}))
 
         assert response.status == StepStatus.WORKFLOW_COMPLETE
         assert len(mock_gate.evaluations) == 1
diff --git a/tests/unit/test_parser.py b/tests/unit/test_parser.py
index b7e346b4..aba569c6 100644
--- a/tests/unit/test_parser.py
+++ b/tests/unit/test_parser.py
@@ -56,7 +56,9 @@ class TestOutputSpec:
 
     def test_file_output(self) -> None:
         """Test single file output."""
-        output = OutputSpec(name="output.md", type="file", description="An output file", required=True)
+        output = OutputSpec(
+            name="output.md", type="file", description="An output file", required=True
+        )
 
         assert output.name == "output.md"
         assert output.type == "file"
@@ -66,7 +68,10 @@ def test_file_output(self) -> None:
     def test_files_output(self) -> None:
         """Test multiple files output."""
         output = OutputSpec(
-            name="step_instruction_files", type="files", description="Instruction files", required=True
+            name="step_instruction_files",
+            type="files",
+            description="Instruction files",
+            required=True,
         )
 
         assert output.name == "step_instruction_files"
@@ -178,7 +183,11 @@ def test_from_dict_with_multiple_outputs(self) -> None:
             "instructions_file": "steps/step1.md",
             "outputs": {
                 "report.md": {"type": "file", "description": "A report", "required": True},
-                "attachments": {"type": "files", "description": "Supporting files", "required": True},
+                "attachments": {
+                    "type": "files",
+                    "description": "Supporting files",
+                    "required": True,
+                },
             },
         }
         step = Step.from_dict(data)
@@ -430,7 +439,9 @@ def test_validate_reviews_valid(self) -> None:
                     description="Step",
                     instructions_file="steps/step1.md",
                     outputs=[
-                        OutputSpec(name="report.md", type="file", description="Report", required=True)
+                        OutputSpec(
+                            name="report.md", type="file", description="Report", required=True
+                        )
                     ],
                     reviews=[
                         Review(run_each="step", quality_criteria={"Complete": "Is it?"}),
@@ -458,7 +469,9 @@ def test_validate_reviews_invalid_run_each(self) -> None:
                     description="Step",
                     instructions_file="steps/step1.md",
                     outputs=[
-                        OutputSpec(name="report.md", type="file", description="Report", required=True)
+                        OutputSpec(
+                            name="report.md", type="file", description="Report", required=True
+                        )
                     ],
                     reviews=[
                         Review(
diff --git a/tests/unit/test_validation.py b/tests/unit/test_validation.py
index e1b01c5a..811f4582 100644
--- a/tests/unit/test_validation.py
+++ b/tests/unit/test_validation.py
@@ -22,7 +22,9 @@ def test_validates_simple_job(self) -> None:
                     "name": "Step 1",
                     "description": "First step",
                     "instructions_file": "steps/step1.md",
-                    "outputs": {"output.md": {"type": "file", "description": "Output", "required": True}},
+                    "outputs": {
+                        "output.md": {"type": "file", "description": "Output", "required": True}
+                    },
                     "dependencies": [],
                     "reviews": [],
                 }
@@ -49,7 +51,9 @@ def test_validates_job_with_user_inputs(self) -> None:
                         {"name": "param1", "description": "First parameter"},
                         {"name": "param2", "description": "Second parameter"},
                     ],
-                    "outputs": {"output.md": {"type": "file", "description": "Output", "required": True}},
+                    "outputs": {
+                        "output.md": {"type": "file", "description": "Output", "required": True}
+                    },
                     "dependencies": [],
                     "reviews": [],
                 }
@@ -71,7 +75,9 @@ def test_validates_job_with_file_inputs(self) -> None:
                     "name": "Step 1",
                     "description": "First step",
                     "instructions_file": "steps/step1.md",
-                    "outputs": {"data.md": {"type": "file", "description": "Data output", "required": True}},
+                    "outputs": {
+                        "data.md": {"type": "file", "description": "Data output", "required": True}
+                    },
                     "dependencies": [],
                     "reviews": [],
                 },
@@ -81,7 +87,13 @@ def test_validates_job_with_file_inputs(self) -> None:
                     "description": "Second step",
                     "instructions_file": "steps/step2.md",
                     "inputs": [{"file": "data.md", "from_step": "step1"}],
-                    "outputs": {"result.md": {"type": "file", "description": "Result output", "required": True}},
+                    "outputs": {
+                        "result.md": {
+                            "type": "file",
+                            "description": "Result output",
+                            "required": True,
+                        }
+                    },
                     "dependencies": ["step1"],
                     "reviews": [],
                 },
@@ -116,7 +128,9 @@ def test_raises_for_invalid_job_name(self) -> None:
                     "name": "Step 1",
                     "description": "Step",
                     "instructions_file": "steps/step1.md",
-                    "outputs": {"output.md": {"type": "file", "description": "Output", "required": True}},
+                    "outputs": {
+                        "output.md": {"type": "file", "description": "Output", "required": True}
+                    },
                     "reviews": [],
                 }
             ],
@@ -138,7 +152,9 @@ def test_raises_for_invalid_version(self) -> None:
                     "name": "Step 1",
                     "description": "Step",
                     "instructions_file": "steps/step1.md",
-                    "outputs": {"output.md": {"type": "file", "description": "Output", "required": True}},
+                    "outputs": {
+                        "output.md": {"type": "file", "description": "Output", "required": True}
+                    },
                     "reviews": [],
                 }
             ],
@@ -200,7 +216,9 @@ def test_raises_for_invalid_input_format(self) -> None:
                             # Missing description for user input
                         }
                     ],
-                    "outputs": {"output.md": {"type": "file", "description": "Output", "required": True}},
+                    "outputs": {
+                        "output.md": {"type": "file", "description": "Output", "required": True}
+                    },
                     "reviews": [],
                 }
             ],
@@ -232,7 +250,9 @@ def test_raises_for_step_missing_reviews(self) -> None:
                     "name": "Step 1",
                     "description": "Step",
                     "instructions_file": "steps/step1.md",
-                    "outputs": {"output.md": {"type": "file", "description": "Output", "required": True}},
+                    "outputs": {
+                        "output.md": {"type": "file", "description": "Output", "required": True}
+                    },
                     # Missing reviews - now required
                 }
             ],
@@ -291,7 +311,9 @@ def test_raises_for_review_missing_run_each(self) -> None:
                     "name": "Step 1",
                     "description": "Step",
                     "instructions_file": "steps/step1.md",
-                    "outputs": {"output.md": {"type": "file", "description": "Output", "required": True}},
+                    "outputs": {
+                        "output.md": {"type": "file", "description": "Output", "required": True}
+                    },
                     "reviews": [
                         {
                             # Missing run_each
@@ -318,7 +340,9 @@ def test_raises_for_review_empty_criteria(self) -> None:
                     "name": "Step 1",
                     "description": "Step",
                     "instructions_file": "steps/step1.md",
-                    "outputs": {"output.md": {"type": "file", "description": "Output", "required": True}},
+                    "outputs": {
+                        "output.md": {"type": "file", "description": "Output", "required": True}
+                    },
                     "reviews": [
                         {
                             "run_each": "step",

From 4f6e1345d9d2454c48afe3fcc1fdfe6a5acfe4d9 Mon Sep 17 00:00:00 2001
From: Noah Horton <noah@unsupervised.com>
Date: Mon, 9 Feb 2026 12:39:06 -0700
Subject: [PATCH 42/45] Fix merge queue CI tests for MCP variant architecture

- Update claude-code-test.yml validate-generation job: replace pytest
  test file with inline Python validation of fruits fixture parsing
- Check for deepwork/SKILL.md instead of per-step skills
- Update e2e test to use /deepwork skill instead of /deepwork_jobs and /fruits

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 .github/workflows/claude-code-test.yml | 99 +++++++++++++++++---------
 1 file changed, 65 insertions(+), 34 deletions(-)

diff --git a/.github/workflows/claude-code-test.yml b/.github/workflows/claude-code-test.yml
index 1d0ecebc..75a55ca4 100644
--- a/.github/workflows/claude-code-test.yml
+++ b/.github/workflows/claude-code-test.yml
@@ -56,9 +56,48 @@ jobs:
         if: github.event_name != 'pull_request'
         run: uv sync --extra dev
 
-      - name: Run fruits workflow tests
+      - name: Validate fruits fixture parses and install generates correct structure
         if: github.event_name != 'pull_request'
-        run: uv run pytest tests/integration/test_fruits_workflow.py -v
+        run: |
+          # Verify the fruits fixture parses correctly via deepwork's parser
+          uv run python -c "
+          from pathlib import Path
+          from deepwork.core.parser import parse_job_definition
+
+          job = parse_job_definition(Path('tests/fixtures/jobs/fruits'))
+
+          assert job.name == 'fruits'
+          assert job.version == '1.0.0'
+          assert len(job.steps) == 2
+          assert [s.id for s in job.steps] == ['identify', 'classify']
+
+          # Identify step: user input -> file output
+          identify = job.steps[0]
+          assert identify.inputs[0].is_user_input()
+          assert identify.inputs[0].name == 'raw_items'
+          assert identify.outputs[0].name == 'identified_fruits.md'
+          assert identify.dependencies == []
+
+          # Classify step: file input from identify -> file output
+          classify = job.steps[1]
+          assert classify.inputs[0].is_file_input()
+          assert classify.inputs[0].file == 'identified_fruits.md'
+          assert classify.inputs[0].from_step == 'identify'
+          assert classify.outputs[0].name == 'classified_fruits.md'
+          assert classify.dependencies == ['identify']
+
+          # Workflow definition
+          assert len(job.workflows) == 1
+          assert job.workflows[0].name == 'full'
+          assert job.workflows[0].steps == ['identify', 'classify']
+
+          # Validations pass
+          job.validate_dependencies()
+          job.validate_file_inputs()
+          job.validate_workflows()
+
+          print('All fruits fixture validations passed!')
+          "
 
       - name: Generate skills and validate structure
         if: github.event_name != 'pull_request'
@@ -84,21 +123,12 @@ jobs:
           echo "Checking generated skills..."
           ls -la test_project/.claude/skills/
 
-          # Verify skill directories and SKILL.md files exist
-          # Meta-skill for the job itself
-          test -f test_project/.claude/skills/fruits/SKILL.md || (echo "Missing fruits meta-skill" && exit 1)
-          # Step skills
-          test -f test_project/.claude/skills/fruits.identify/SKILL.md || (echo "Missing fruits.identify skill" && exit 1)
-          test -f test_project/.claude/skills/fruits.classify/SKILL.md || (echo "Missing fruits.classify skill" && exit 1)
-
-          # Verify skill content
-          grep -q "# fruits.identify" test_project/.claude/skills/fruits.identify/SKILL.md
-          grep -q "raw_items" test_project/.claude/skills/fruits.identify/SKILL.md
-          grep -q "identified_fruits.md" test_project/.claude/skills/fruits.identify/SKILL.md
+          # MCP variant: only the /deepwork entry point skill is generated
+          # (per-step skills are no longer created; MCP server handles orchestration)
+          test -f test_project/.claude/skills/deepwork/SKILL.md || (echo "Missing deepwork MCP entry point skill" && exit 1)
 
-          grep -q "# fruits.classify" test_project/.claude/skills/fruits.classify/SKILL.md
-          grep -q "identified_fruits.md" test_project/.claude/skills/fruits.classify/SKILL.md
-          grep -q "classified_fruits.md" test_project/.claude/skills/fruits.classify/SKILL.md
+          # Verify the deepwork skill references MCP tools
+          grep -qi "deepwork" test_project/.claude/skills/deepwork/SKILL.md
 
           echo "Skill generation validated successfully!"
 
@@ -193,21 +223,21 @@ jobs:
           echo "Available skills:"
           ls -la test_project/.claude/skills/
 
-      # STEP 1: Use /deepwork_jobs.define to CREATE the fruits job
-      - name: Create job with /deepwork_jobs
+      # STEP 1: Use /deepwork to CREATE the fruits job via MCP workflow
+      - name: Create job with /deepwork
         if: steps.check-key.outputs.has_key == 'true'
         working-directory: test_project
         timeout-minutes: 6
         run: |
-          echo "=== Running /deepwork_jobs to create fruits job ==="
+          echo "=== Running /deepwork to create fruits job ==="
           mkdir fruits
 
           # Provide detailed, deterministic instructions for creating the job
           claude --print --model claude-sonnet-4-5 <<'PROMPT_EOF'
-          /deepwork_jobs I want to create a simple job called "fruits" for identifying and classifying fruits.
+          /deepwork I want to create a simple job called "fruits" for identifying and classifying fruits.
 
           Here are the EXACT specifications.
-          
+
           Intent: A simple workflow that takes a list of mixed items, identifies which are fruits, then classifies them by category. Designed for CI testing.
 
           Steps:
@@ -218,7 +248,7 @@ jobs:
 
           2. Step: classify
              Name: Classify Fruits
-             Description: Organize identified fruits into categories (citrus, tropical, berries, etc.). 
+             Description: Organize identified fruits into categories (citrus, tropical, berries, etc.).
              **CRITICAL**: must put the classified fruit list in `./fruits/classified_fruits.md`.
 
           **Key Instructions:**
@@ -254,31 +284,32 @@ jobs:
             exit 1
           fi
 
-          # Run sync to generate the skills
-          echo "=== Running deepwork sync to generate skills ==="
+          # Run sync to regenerate skills after new job was created
+          echo "=== Running deepwork sync to regenerate skills ==="
           cd ..
           uv run deepwork sync --path test_project
 
           echo "=== Checking generated skills ==="
           ls -la test_project/.claude/skills/
 
-          if [ -f "test_project/.claude/skills/fruits.identify/SKILL.md" ] && [ -f "test_project/.claude/skills/fruits.classify/SKILL.md" ]; then
-            echo "SUCCESS: Skills generated"
+          # MCP variant: only the /deepwork entry point skill is generated
+          if [ -f "test_project/.claude/skills/deepwork/SKILL.md" ]; then
+            echo "SUCCESS: /deepwork MCP entry point skill generated"
           else
-            echo "ERROR: Skills were not generated"
+            echo "ERROR: /deepwork skill was not generated"
             exit 1
           fi
 
-      # STEP 3: Execute the /fruits workflow (runs all steps automatically)
+      # STEP 3: Execute the fruits workflow via /deepwork MCP entry point
       - name: Run Workflow
         if: steps.check-key.outputs.has_key == 'true'
         working-directory: test_project
         timeout-minutes: 3
         run: |
-          echo "=== Running /fruits workflow with test input ==="
+          echo "=== Running fruits workflow with test input via /deepwork ==="
 
           claude --print --model claude-sonnet-4-5 <<'PROMPT_EOF'
-          /fruits Proccess the list to the file and don't give any extra commentary or text output.
+          /deepwork Run the fruits full workflow. Process the list to the file and don't give any extra commentary or text output.
           raw_items: apple, car, banana, chair, orange, table, mango, laptop, grape, bicycle
           PROMPT_EOF
           
@@ -329,7 +360,7 @@ jobs:
           echo "  ALL E2E TESTS PASSED SUCCESSFULLY!"
           echo "=========================================="
           echo ""
-          echo "Workflow tested: /fruits - Executed full fruits workflow (identify + classify)"
+          echo "Workflow tested: /deepwork fruits full - Executed full fruits workflow (identify + classify)"
           echo ""
 
       - name: Upload test artifacts
@@ -339,7 +370,7 @@ jobs:
           name: claude-code-e2e-outputs
           path: |
             test_project/.deepwork/jobs/fruits/
-            test_project/.claude/skills/fruits*/
-            test_project/identified_fruits.md
-            test_project/classified_fruits.md
+            test_project/.claude/skills/deepwork/
+            test_project/fruits/identified_fruits.md
+            test_project/fruits/classified_fruits.md
           retention-days: 7

From d47726204eb26015385e829021339d536595f86e Mon Sep 17 00:00:00 2001
From: Noah Horton <noah@unsupervised.com>
Date: Mon, 9 Feb 2026 13:45:02 -0700
Subject: [PATCH 43/45] fix: add .venv/bin to PATH so MCP server can start in
 CI

The DeepWork MCP server is registered in .mcp.json with the bare
"deepwork" command. In CI, deepwork is installed via uv sync and
only exists in .venv/bin/, which isn't on PATH. Claude Code fails
to start the MCP server subprocess, causing it to fall back to
ad-hoc file creation instead of using MCP tools.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 .github/workflows/claude-code-test.yml | 17 ++++++++++++++++-
 1 file changed, 16 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/claude-code-test.yml b/.github/workflows/claude-code-test.yml
index 75a55ca4..b4d18b6d 100644
--- a/.github/workflows/claude-code-test.yml
+++ b/.github/workflows/claude-code-test.yml
@@ -185,7 +185,22 @@ jobs:
 
       - name: Install deepwork
         if: steps.check-key.outputs.has_key == 'true'
-        run: uv sync
+        run: |
+          uv sync
+
+          # Add the uv virtualenv bin directory to PATH for all subsequent steps.
+          #
+          # Why: `deepwork install` registers an MCP server in .mcp.json with
+          # the command "deepwork serve --path .". When Claude Code starts, it
+          # reads .mcp.json and spawns the MCP server as a subprocess using the
+          # bare "deepwork" command. Without this PATH addition, that subprocess
+          # fails because "deepwork" only exists inside the uv virtualenv
+          # (accessible via "uv run deepwork" but not as a bare command).
+          #
+          # Without the MCP server running, Claude cannot use the DeepWork MCP
+          # tools (get_workflows, start_workflow, finished_step) and falls back
+          # to ad-hoc file creation, which produces the wrong output format.
+          echo "$(pwd)/.venv/bin" >> $GITHUB_PATH
 
       - name: Set up fresh test project
         if: steps.check-key.outputs.has_key == 'true'

From d490071e4de5f4ec4df3c6cc6dbbd63b7c000db3 Mon Sep 17 00:00:00 2001
From: Noah Horton <noah@unsupervised.com>
Date: Mon, 9 Feb 2026 13:53:24 -0700
Subject: [PATCH 44/45] fix: merge CI permissions into settings.json instead of
 overwriting

The previous approach overwrote settings.json with only generic permissions
(Bash, Read, Write, etc.), which removed the MCP tool permissions
(mcp__deepwork__get_workflows, start_workflow, finished_step, abort_workflow)
that `deepwork install` had synced. Without these, Claude silently fails
to call DeepWork MCP tools and returns empty output.

Now merges CI-specific permissions into the existing settings.json, preserving
the MCP tool permissions that the install step wrote.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 .github/workflows/claude-code-test.yml | 34 +++++++++++++++-----------
 1 file changed, 20 insertions(+), 14 deletions(-)

diff --git a/.github/workflows/claude-code-test.yml b/.github/workflows/claude-code-test.yml
index b4d18b6d..39e38503 100644
--- a/.github/workflows/claude-code-test.yml
+++ b/.github/workflows/claude-code-test.yml
@@ -219,20 +219,26 @@ jobs:
           # Install deepwork (this sets up .deepwork/ with standard jobs only)
           uv run deepwork install --platform claude --path test_project
 
-          # Create permissive settings.json to allow file operations in CI
-          cat > test_project/.claude/settings.json << 'SETTINGS_EOF'
-          {
-            "permissions": {
-              "allow": [
-                "Bash(*)",
-                "Read(./**)",
-                "Edit(./**)",
-                "Write(./**)",
-                "Skill(*)"
-              ]
-            }
-          }
-          SETTINGS_EOF
+          # Merge broad CI permissions into settings.json WITHOUT overwriting it.
+          #
+          # Why merge instead of overwrite: `deepwork install` writes MCP tool
+          # permissions (mcp__deepwork__get_workflows, mcp__deepwork__start_workflow,
+          # etc.) into settings.json. These are required for Claude to call the
+          # DeepWork MCP server tools. Overwriting settings.json with only generic
+          # permissions (Bash, Read, Write, etc.) removes the MCP permissions,
+          # causing Claude to silently fail when trying to use /deepwork.
+          python3 -c "
+          import json
+          settings_path = 'test_project/.claude/settings.json'
+          with open(settings_path) as f:
+              settings = json.load(f)
+          ci_permissions = ['Bash(*)', 'Read(./**)', 'Edit(./**)', 'Write(./**)', 'Skill(*)']
+          for perm in ci_permissions:
+              if perm not in settings.setdefault('permissions', {}).setdefault('allow', []):
+                  settings['permissions']['allow'].append(perm)
+          with open(settings_path, 'w') as f:
+              json.dump(settings, f, indent=2)
+          "
 
           echo "Fresh test project setup complete"
           echo "Available skills:"

From b7d01c79b6be526ede2c38a1d9a556598e80875b Mon Sep 17 00:00:00 2001
From: Noah Horton <noah@unsupervised.com>
Date: Mon, 9 Feb 2026 14:09:03 -0700
Subject: [PATCH 45/45] fix: add --debug flag, abort instruction, and failure
 diagnostics to CI

- Add --debug flag to claude invocation to capture detailed logs
- Add failure step that dumps debug.log, .mcp.json, settings.json, and
  session state when the job creation step fails
- Instruct Claude to stop after define+implement steps instead of running
  the full 4-step workflow (test+iterate are unnecessary for CI)
- Increase timeout from 6 to 10 minutes

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 .github/workflows/claude-code-test.yml | 42 ++++++++++++++++++++++++--
 1 file changed, 39 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/claude-code-test.yml b/.github/workflows/claude-code-test.yml
index 39e38503..405c3ba9 100644
--- a/.github/workflows/claude-code-test.yml
+++ b/.github/workflows/claude-code-test.yml
@@ -245,16 +245,22 @@ jobs:
           ls -la test_project/.claude/skills/
 
       # STEP 1: Use /deepwork to CREATE the fruits job via MCP workflow
+      #
+      # This invokes Claude with the /deepwork skill, which uses MCP tools to
+      # walk through the deepwork_jobs/new_job workflow (define → implement →
+      # test → iterate). The workflow includes quality gates that spawn Claude
+      # subprocesses, so it needs a generous timeout.
       - name: Create job with /deepwork
         if: steps.check-key.outputs.has_key == 'true'
         working-directory: test_project
-        timeout-minutes: 6
+        timeout-minutes: 10
         run: |
           echo "=== Running /deepwork to create fruits job ==="
           mkdir fruits
 
-          # Provide detailed, deterministic instructions for creating the job
-          claude --print --model claude-sonnet-4-5 <<'PROMPT_EOF'
+          # Use --debug to capture detailed logs for diagnosing failures.
+          # The debug log is dumped in the failure handler below.
+          claude --print --debug --model claude-sonnet-4-5 <<'PROMPT_EOF'
           /deepwork I want to create a simple job called "fruits" for identifying and classifying fruits.
 
           Here are the EXACT specifications.
@@ -276,6 +282,7 @@ jobs:
           - Do not ask questions - just make the job
           - Rules are explicitly not desired. Tell the review agents that.
           - Do not give long commentary of what you did - just make the job with no commentary.
+          - IMPORTANT: Once the job.yml and step instruction files have been created (i.e. after the "define" and "implement" steps are done), STOP. Do NOT continue into the "test" or "iterate" steps. Abort the workflow at that point. We only need the job definition files created, not the full workflow run.
           PROMPT_EOF
 
           # Verify the job.yml was created
@@ -321,6 +328,35 @@ jobs:
             exit 1
           fi
 
+      # Dump Claude debug log if the job creation step failed or timed out.
+      # This captures MCP server communication, tool calls, and error details.
+      - name: Dump Claude debug log on failure
+        if: failure() && steps.check-key.outputs.has_key == 'true'
+        working-directory: test_project
+        run: |
+          echo "=== Claude debug log ==="
+          # Claude --debug writes to ~/.claude/debug.log
+          if [ -f "$HOME/.claude/debug.log" ]; then
+            echo "--- Last 200 lines of debug.log ---"
+            tail -200 "$HOME/.claude/debug.log"
+          else
+            echo "No debug.log found at ~/.claude/debug.log"
+            echo "Searching for debug logs..."
+            find "$HOME/.claude" -name "*.log" -type f 2>/dev/null || echo "No log files found"
+          fi
+          echo ""
+          echo "=== MCP server config ==="
+          cat .mcp.json 2>/dev/null || echo "No .mcp.json found"
+          echo ""
+          echo "=== Settings.json ==="
+          cat .claude/settings.json 2>/dev/null || echo "No settings.json found"
+          echo ""
+          echo "=== DeepWork session state ==="
+          ls -la .deepwork/tmp/ 2>/dev/null || echo "No tmp directory"
+          for f in .deepwork/tmp/session_*.json; do
+            [ -f "$f" ] && echo "--- $f ---" && cat "$f"
+          done
+
       # STEP 3: Execute the fruits workflow via /deepwork MCP entry point
       - name: Run Workflow
         if: steps.check-key.outputs.has_key == 'true'