Skip to content

Commit c43fbdd

Browse files
jahoomaclaude
andauthored
AGENTS.md: Add retrieval-led reasoning note, simplify doc links (#489)
Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
1 parent 143fdff commit c43fbdd

File tree

2 files changed

+77
-10
lines changed

2 files changed

+77
-10
lines changed

AGENTS.md

Lines changed: 12 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -34,13 +34,15 @@ Make an efficient learning agent that can do anything.
3434

3535
## Docs
3636

37-
- [`docs/architecture.md`](docs/architecture.md) — Package dependency graph, per-package details, architectural patterns
38-
- [`docs/request-flow.md`](docs/request-flow.md) — Full request lifecycle from CLI through server and back
39-
- [`docs/error-schema.md`](docs/error-schema.md) — Server error response formats and client-side handling
40-
- [`docs/development.md`](docs/development.md) — Dev setup, worktrees, logs, package management, DB migrations
41-
- [`docs/testing.md`](docs/testing.md) — DI over mocking, tmux CLI testing
42-
- [`docs/environment-variables.md`](docs/environment-variables.md) — Env var rules, DI helpers, loading order
43-
- [`docs/agents-and-tools.md`](docs/agents-and-tools.md) — Agent system, shell shims, tool definitions
44-
- [`docs/patterns/handle-steps-generators.md`](docs/patterns/handle-steps-generators.md) — handleSteps generator patterns and spawn_agents tool calls
45-
- [docs/evalbuff/interpreting-task-prompts.md](docs/evalbuff/interpreting-task-prompts.md)
46-
- [docs/patterns/discover-before-implement.md](docs/patterns/discover-before-implement.md)
37+
IMPORTANT: Prefer retrieval-led reasoning over pre-training-led reasoning. Always read the relevant docs below before implementing changes.
38+
39+
- `docs/architecture.md` — Package dependency graph, per-package details, architectural patterns
40+
- `docs/request-flow.md` — Full request lifecycle from CLI through server and back
41+
- `docs/error-schema.md` — Server error response formats and client-side handling
42+
- `docs/development.md` — Dev setup, worktrees, logs, package management, DB migrations
43+
- `docs/testing.md` — DI over mocking, tmux CLI testing
44+
- `docs/environment-variables.md` — Env var rules, DI helpers, loading order
45+
- `docs/agents-and-tools.md` — Agent system, shell shims, tool definitions
46+
- `docs/patterns/handle-steps-generators.md` — handleSteps generator patterns and spawn_agents tool calls
47+
- `docs/evalbuff/interpreting-task-prompts.md`
48+
- `docs/patterns/discover-before-implement.md`

evalbuff/src/run-carve-eval.ts

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,40 @@ import type { CarvedFeature, CarveResult, FileOperation } from './carve-features
2424
import type { JudgingResult, ReviewerAgentType } from './judge'
2525
import type { RunnerResult } from './runners/runner'
2626

27+
// --- Doc read stats ---
28+
29+
/** Extract doc file reads from an agent trace (JSONL of PrintModeEvents). */
30+
function extractDocReads(agentTrace: string): Record<string, number> {
31+
const counts: Record<string, number> = {}
32+
for (const line of agentTrace.split('\n')) {
33+
if (!line.trim()) continue
34+
try {
35+
const event = JSON.parse(line)
36+
if (event.type !== 'tool_call' || event.toolName !== 'Read') continue
37+
const filePath: string = event.input?.file_path ?? ''
38+
// Normalize to repo-relative path
39+
const match = filePath.match(/(?:^|\/)(?:docs\/.*|AGENTS\.md|CLAUDE\.md)$/)
40+
if (!match) continue
41+
const relPath = match[0].startsWith('/') ? match[0].slice(1) : match[0]
42+
counts[relPath] = (counts[relPath] || 0) + 1
43+
} catch {
44+
// not JSON
45+
}
46+
}
47+
return counts
48+
}
49+
50+
/** Merge multiple doc-read count maps into one (summing counts). */
51+
function mergeDocReads(maps: Record<string, number>[]): Record<string, number> {
52+
const merged: Record<string, number> = {}
53+
for (const m of maps) {
54+
for (const [k, v] of Object.entries(m)) {
55+
merged[k] = (merged[k] || 0) + v
56+
}
57+
}
58+
return merged
59+
}
60+
2761
// --- Apply carve operations to a repo directory ---
2862

2963
function applyCarveOperations(repoDir: string, operations: FileOperation[]): void {
@@ -274,6 +308,8 @@ interface CarveEvalResult {
274308
docsKept: Array<{ path: string; reasoning: string; scoreBefore: number; scoreAfter: number }>
275309
docsRejected: Array<{ path: string; reasoning: string; scoreBefore: number; scoreAfter: number }>
276310
totalCost: number
311+
/** Which doc files agents read and how many times (summed across all parallel runs). */
312+
docsRead: Record<string, number>
277313
}
278314

279315
async function runCarveEval(options: CarveEvalOptions): Promise<void> {
@@ -357,6 +393,7 @@ async function runCarveEval(options: CarveEvalOptions): Promise<void> {
357393
docsKept: [],
358394
docsRejected: [],
359395
totalCost,
396+
docsRead: {},
360397
})
361398
continue
362399
}
@@ -368,6 +405,15 @@ async function runCarveEval(options: CarveEvalOptions): Promise<void> {
368405
` Baseline: ${currentScore.toFixed(1)}/10 (${baselineScores.map((s) => s.toFixed(1)).join(', ')})`,
369406
)
370407

408+
// Track which docs agents read across all runs for this feature
409+
const baselineDocReads = mergeDocReads(validBaseline.map((r) => extractDocReads(r.agentTrace)))
410+
const docReadEntries = Object.entries(baselineDocReads).sort((a, b) => b[1] - a[1])
411+
if (docReadEntries.length > 0) {
412+
console.log(` Docs read (baseline): ${docReadEntries.map(([p, n]) => `${p} (${n}x)`).join(', ')}`)
413+
} else {
414+
console.log(` Docs read (baseline): none`)
415+
}
416+
371417
const docsKept: Array<{ path: string; reasoning: string; scoreBefore: number; scoreAfter: number }> = []
372418
const docsRejected: Array<{ path: string; reasoning: string; scoreBefore: number; scoreAfter: number }> = []
373419

@@ -510,6 +556,7 @@ async function runCarveEval(options: CarveEvalOptions): Promise<void> {
510556
docsKept,
511557
docsRejected,
512558
totalCost,
559+
docsRead: baselineDocReads,
513560
})
514561
}
515562

@@ -525,6 +572,12 @@ async function runCarveEval(options: CarveEvalOptions): Promise<void> {
525572
console.log(` Baseline: ${r.baselineScore.toFixed(1)}/10`)
526573
console.log(` Final: ${r.finalScore.toFixed(1)}/10`)
527574
console.log(` Docs kept: ${r.docsKept.length}, rejected: ${r.docsRejected.length}`)
575+
const readEntries = Object.entries(r.docsRead).sort((a, b) => b[1] - a[1])
576+
if (readEntries.length > 0) {
577+
console.log(` Docs read: ${readEntries.map(([p, n]) => `${p} (${n}x)`).join(', ')}`)
578+
} else {
579+
console.log(` Docs read: none`)
580+
}
528581
console.log(` Cost: $${r.totalCost.toFixed(2)}`)
529582
totalCostAll += r.totalCost
530583
}
@@ -538,6 +591,18 @@ async function runCarveEval(options: CarveEvalOptions): Promise<void> {
538591
console.log(` Average final: ${avgFinal.toFixed(1)}/10`)
539592
console.log(` Total cost: $${totalCostAll.toFixed(2)}`)
540593

594+
// Aggregate doc read stats across all features
595+
const allDocReads = mergeDocReads(results.map((r) => r.docsRead))
596+
const allReadEntries = Object.entries(allDocReads).sort((a, b) => b[1] - a[1])
597+
if (allReadEntries.length > 0) {
598+
console.log(`\n Doc read stats (all features):`)
599+
for (const [docPath, count] of allReadEntries) {
600+
console.log(` ${docPath}: ${count} reads`)
601+
}
602+
} else {
603+
console.log(`\n No docs were read by any agent.`)
604+
}
605+
541606
// Save results
542607
const outputPath = path.join(
543608
repoPath,

0 commit comments

Comments
 (0)