diff --git a/CHANGELOG.md b/CHANGELOG.md index 6d3ac79f3..1610332a0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ and adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). ## [Unreleased] ### Fixed +- **Watch sync no longer aborts with `FOREIGN KEY constraint failed`.** PR #62 plugged this FK violation at the extraction-layer `insertEdges` site (empty-named nodes whose containment edges had no target), but the same violation kept reappearing on v0.9.5 during the daemon's *watch sync* — not on initial index — once an agent's daemon had been running long enough to accumulate edits. The resolution-layer `insertEdges` (and the callback-synthesizer pass) wasn't guarded the same way: a per-resolver name cache or a framework resolver's WeakMap-keyed lookup map could hand back a `Node` whose row had been removed by a recent file rewrite, and the FK check then aborted the entire resolution batch, leaving the user's daemon log filling with `Watch sync failed { error: 'FOREIGN KEY constraint failed' }`. The resolution layer now applies the same defense-in-depth filter the extraction layer already had — one cache-aware `getNodesByIds` per pass drops any edge whose source or target is no longer in the `nodes` table, so the rest of the resolved batch still lands. Surfaces as a fresh `codegraph init`/`index` cycle now surviving its first watch-sync cycle without the FK error, and the daemon recovering naturally instead of compounding into further failures. Closes #455. - **Hermes Agent: `codegraph install --target hermes` no longer corrupts `~/.hermes/config.yaml`.** Hermes serializes its config with PyYAML's default block style, which writes list items at the *same* indent as the parent mapping key (`cli:` and `- hermes-cli` both at column 2). The previous line-based YAML patcher mistook that first ` - hermes-cli` for the next sibling key, truncated the `cli:` block, and then spliced `- mcp-codegraph` at indent 4 *before* the existing items — leaving subsequent entries (`- browser`, `- clarify`, …) and even other platforms (`telegram:`, `discord:`) appearing at the `platform_toolsets:` level, which is no longer parseable YAML. The installer now recognizes the same-indent list style, finds the real end of the block at the next sibling key, and appends `- mcp-codegraph` at whatever indent the existing items already use. Re-installing on an already-corrupted file (or a 4-space-nested config that worked before) still produces a clean, parseable result. Closes #456. - **NestJS: `RouterModule.register([...])` route prefixes now propagate to controller routes.** Previously a controller declared inside a module wired through NestJS's `RouterModule` (a common pattern for modular apps with nested route prefixes) was indexed with its raw `@Controller(...) + @Get(...)` path — so `UsersController` under `RouterModule.register([{ path: 'admin', module: AdminModule, children: [{ path: 'users', module: UsersModule }] }])` showed up as `GET /` instead of `GET /admin/users`. The new cross-file pass walks every `*.module.{ts,js}` for `RouterModule.register/forRoot/forChild([...])` (recursive `children`) and `@Module({ controllers: [...] })`, then prepends the correct prefix to each affected route — including non-empty `@Controller` paths and method-level params (`/admin/users/:id`). The route node's `id` is preserved across the update so existing route→handler edges stay intact, and the pass is idempotent so incremental sync recovers when `app.module.ts` itself is edited. Closes #459. diff --git a/__tests__/sync-fk-regression.test.ts b/__tests__/sync-fk-regression.test.ts new file mode 100644 index 000000000..2d3425a62 --- /dev/null +++ b/__tests__/sync-fk-regression.test.ts @@ -0,0 +1,232 @@ +/** + * Sync FK regression test (issue #455). + * + * #62 plugged FK violations at the extraction layer (empty-named nodes whose + * containment edges had no target). #455 reports the same `FOREIGN KEY constraint + * failed` reappearing on v0.9.5, but during *watch sync* on a Python-only project — + * a different trigger than the C/C++ header empty-name issue #62 covered. + * + * The reproducer below drives the same path the daemon takes: extract → resolve → + * insert edges. The resolution pass's `insertEdges` was not guarded the way the + * extraction-layer insert was after #62, so any edge with a stale source/target + * (e.g. a synthesized framework target whose node was deleted by a concurrent + * file rewrite) throws and aborts the sync, leaving the FK error the user sees. + * + * The test asserts: a sequence of file rewrites + sync()s never throws, and the + * graph stays internally consistent (every edge's source + target are real nodes). + */ +import { describe, it, expect, beforeAll, afterEach } from 'vitest'; +import * as fs from 'fs'; +import * as path from 'path'; +import * as os from 'os'; +import CodeGraph from '../src/index'; +import { initGrammars, loadAllGrammars } from '../src/extraction/grammars'; + +beforeAll(async () => { + await initGrammars(); + await loadAllGrammars(); +}); + +describe('watch sync FK regression (#455)', () => { + let tmpDir: string | undefined; + let cg: CodeGraph | undefined; + + afterEach(() => { + if (cg) { + cg.close(); + cg = undefined; + } + if (tmpDir) { + fs.rmSync(tmpDir, { recursive: true, force: true }); + tmpDir = undefined; + } + }); + + function assertGraphIntegrity(cg: CodeGraph): void { + // Every edge must reference real nodes. If FK was disabled or violated, + // dangling refs would show up here. + const db = (cg as unknown as { db: { getDb(): { prepare(sql: string): { get(): unknown } } } }).db; + const sqlite = db.getDb(); + const dangling = sqlite + .prepare( + `SELECT count(*) as c FROM edges e + WHERE NOT EXISTS (SELECT 1 FROM nodes n WHERE n.id = e.source) + OR NOT EXISTS (SELECT 1 FROM nodes n WHERE n.id = e.target)` + ) + .get() as { c: number }; + expect(dangling.c).toBe(0); + } + + it('survives repeated sync() cycles on a Django-style Python project without FK errors', async () => { + tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'cg-fk455-')); + + // Mimic a small Django app: requirements + manage.py marker, models/views/urls + // in two app packages that cross-reference each other. + fs.writeFileSync(path.join(tmpDir, 'manage.py'), '# django marker\n'); + fs.writeFileSync(path.join(tmpDir, 'requirements.txt'), 'django==4.2\n'); + + fs.mkdirSync(path.join(tmpDir, 'users')); + fs.writeFileSync(path.join(tmpDir, 'users/__init__.py'), ''); + fs.writeFileSync( + path.join(tmpDir, 'users/models.py'), + 'class User:\n' + + ' def __init__(self, name):\n' + + ' self.name = name\n' + ); + fs.writeFileSync( + path.join(tmpDir, 'users/views.py'), + 'from users.models import User\n' + + 'class UserListView:\n' + + ' def get(self, request):\n' + + ' return User("a")\n' + ); + fs.writeFileSync( + path.join(tmpDir, 'users/urls.py'), + 'from django.urls import path\n' + + 'from users.views import UserListView\n' + + 'urlpatterns = [path("users/", UserListView.as_view(), name="user-list")]\n' + ); + + fs.mkdirSync(path.join(tmpDir, 'posts')); + fs.writeFileSync(path.join(tmpDir, 'posts/__init__.py'), ''); + fs.writeFileSync( + path.join(tmpDir, 'posts/models.py'), + 'from users.models import User\n' + + 'class Post:\n' + + ' def __init__(self, author):\n' + + ' self.author = author\n' + ); + fs.writeFileSync( + path.join(tmpDir, 'posts/views.py'), + 'from posts.models import Post\n' + + 'class PostListView:\n' + + ' def get(self, request):\n' + + ' return Post(None)\n' + ); + fs.writeFileSync( + path.join(tmpDir, 'posts/urls.py'), + 'from django.urls import path\n' + + 'from posts.views import PostListView\n' + + 'urlpatterns = [path("posts/", PostListView.as_view(), name="post-list")]\n' + ); + + cg = CodeGraph.initSync(tmpDir); + await cg.indexAll(); + assertGraphIntegrity(cg); + + // Drive the same path the daemon's file watcher drives: a series of file + // rewrites + sync()s. We shuffle line counts on each rewrite so node IDs + // (file:kind:name:line) shift around, forcing real INSERT OR REPLACE + + // CASCADE behavior across files that cross-reference each other. + const targets = [ + 'users/views.py', + 'posts/views.py', + 'users/urls.py', + 'posts/urls.py', + 'users/models.py', + ]; + + for (let iter = 0; iter < 8; iter++) { + const file = targets[iter % targets.length]!; + const full = path.join(tmpDir, file); + const content = fs.readFileSync(full, 'utf8'); + // Insert N blank lines at the top to shift every node's line number. + const padded = '\n'.repeat(iter + 1) + content; + // Use a future mtime so the size+mtime pre-filter in + // ExtractionOrchestrator.sync can't skip the file. + fs.writeFileSync(full, padded); + const now = Date.now() + (iter + 1) * 1_000; + fs.utimesSync(full, now / 1000, now / 1000); + + // The fix should make this never throw; before the fix, FK errors fire + // during the resolution-layer insertEdges call inside sync(). + await expect(cg.sync()).resolves.toBeDefined(); + assertGraphIntegrity(cg); + } + }); + + it("drops resolution edges whose target node is no longer in the graph (the pathology #455 reports)", async () => { + // This narrower test reproduces the exact failure mode the user sees in + // their daemon log: the resolver hands `insertEdges` an edge whose target + // doesn't exist in `nodes`, and the FK constraint aborts the whole sync. + // + // We force the bug by populating the resolver's per-name cache with a + // stale node (whose id is *not* in the DB) and then asking it to resolve + // a reference to that name. Without the fix this throws + // `FOREIGN KEY constraint failed`; with it, the bad edge is filtered out + // and resolution returns normally. + tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'cg-fk455-stale-')); + fs.writeFileSync(path.join(tmpDir, 'a.py'), 'def caller():\n Target()\n'); + fs.writeFileSync(path.join(tmpDir, 'b.py'), 'class Target:\n pass\n'); + + cg = CodeGraph.initSync(tmpDir); + await cg.indexAll(); + + // Reach in to the internals — the simplest way to forge the "stale node + // ID in the resolver's lookup path" condition the production bug arises + // from. The fix is what the test is verifying; touching internals here + // is a means to that end, not a contract we're asserting. + type Internals = { + queries: { + getNodesByName(name: string): Array<{ id: string; name: string }>; + getAllNodeNames(): string[]; + }; + resolver: { + warmCaches(): void; + resolveAndPersist( + refs: Array<{ + fromNodeId: string; + referenceName: string; + referenceKind: string; + line: number; + column: number; + filePath: string; + language: string; + }> + ): { resolved: unknown[] }; + // eslint-disable-next-line @typescript-eslint/no-explicit-any + nameCache: { set(key: string, value: any): void }; + }; + }; + const internals = cg as unknown as Internals; + const queries = internals.queries; + const resolver = internals.resolver; + + const caller = queries.getNodesByName('caller')[0]; + const target = queries.getNodesByName('Target')[0]; + expect(caller).toBeDefined(); + expect(target).toBeDefined(); + + // Warm caches so warmCaches no-ops on the resolveAndPersist call below + // and our seeded nameCache entry isn't overwritten. + resolver.warmCaches(); + + // Forge a stale lookup result: a Node whose `id` doesn't exist in the + // `nodes` table. This is structurally what happens when a framework + // resolver's WeakMap cache hands back a Node that was deleted by a + // concurrent file rewrite — the user's #455 scenario. + const staleNode = { ...target!, id: 'class:stale.py:Target:1' }; + resolver.nameCache.set('Target', [staleNode]); + + // Ask the resolver to persist an edge that will resolve via the seeded + // (stale) cache entry. Without the FK filter this would throw + // `FOREIGN KEY constraint failed` and abort the whole batch. + expect(() => + resolver.resolveAndPersist([ + { + fromNodeId: caller!.id, + referenceName: 'Target', + referenceKind: 'calls', + line: 2, + column: 4, + filePath: 'a.py', + language: 'python', + }, + ]) + ).not.toThrow(); + + // The bad edge must not have been persisted either — FK enforcement is + // still on, and post-fix the dangling-edge count remains zero. + assertGraphIntegrity(cg); + }); +}); diff --git a/src/resolution/callback-synthesizer.ts b/src/resolution/callback-synthesizer.ts index 2d6cf8d65..1762ea0f0 100644 --- a/src/resolution/callback-synthesizer.ts +++ b/src/resolution/callback-synthesizer.ts @@ -812,6 +812,19 @@ export function synthesizeCallbackEdges(queries: QueryBuilder, ctx: ResolutionCo seen.add(key); merged.push(e); } - if (merged.length > 0) queries.insertEdges(merged); - return merged.length; + if (merged.length > 0) { + // Defense-in-depth (issues #42, #455): drop edges whose source/target + // no longer resolves to a real node. Some channel maps cache native + // Node refs across a resolver lifetime (WeakMap-keyed by context), so + // a file rewrite between map build and synthesis can leave stale IDs + // here. One FK violation aborts the whole batch — better to skip the + // dead edges and emit the rest than lose every synthesized edge. + const allIds = new Set(); + for (const e of merged) { allIds.add(e.source); allIds.add(e.target); } + const existing = queries.getNodesByIds([...allIds]); + const validEdges = merged.filter((e) => existing.has(e.source) && existing.has(e.target)); + if (validEdges.length > 0) queries.insertEdges(validEdges); + return validEdges.length; + } + return 0; } diff --git a/src/resolution/index.ts b/src/resolution/index.ts index f118e6095..67da62ded 100644 --- a/src/resolution/index.ts +++ b/src/resolution/index.ts @@ -607,6 +607,28 @@ export class ReferenceResolver { }); } + /** + * Defense-in-depth: drop edges whose source or target is no longer in + * the nodes table. PR #62 (issue #42) applied this filter at the + * extraction-layer `insertEdges` site; #455 reports the same + * `FOREIGN KEY constraint failed` reappearing here at the + * resolution-layer site during watch sync, where a resolver lookup that + * crosses a framework-specific cache can hand us a target whose node + * was removed by a concurrent file rewrite. One batched, cache-aware + * `getNodesByIds` query is enough to skip those edges quietly instead + * of aborting the whole sync. + */ + private filterEdgesByExistingNodes(edges: Edge[]): Edge[] { + if (edges.length === 0) return edges; + const allIds = new Set(); + for (const e of edges) { + allIds.add(e.source); + allIds.add(e.target); + } + const existing = this.queries.getNodesByIds([...allIds]); + return edges.filter((e) => existing.has(e.source) && existing.has(e.target)); + } + /** * Resolve and persist edges to database */ @@ -620,8 +642,9 @@ export class ReferenceResolver { const edges = this.createEdges(result.resolved); // Insert edges into database - if (edges.length > 0) { - this.queries.insertEdges(edges); + const validEdges = this.filterEdgesByExistingNodes(edges); + if (validEdges.length > 0) { + this.queries.insertEdges(validEdges); } // Clean up resolved refs from unresolved_refs table so metrics are accurate @@ -668,8 +691,9 @@ export class ReferenceResolver { // Persist edges immediately const edges = this.createEdges(result.resolved); - if (edges.length > 0) { - this.queries.insertEdges(edges); + const validEdges = this.filterEdgesByExistingNodes(edges); + if (validEdges.length > 0) { + this.queries.insertEdges(validEdges); } // Clean up resolved refs so they don't appear in the next batch