From c58b44e3136462d4354bf96ddec1a6846abbe146 Mon Sep 17 00:00:00 2001 From: Colby McHenry Date: Tue, 19 May 2026 11:01:54 -0500 Subject: [PATCH] fix(mcp): resolve module-qualified symbol lookups (#173) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `codegraph_callees stage_apply::run` (and `_node`, `_impact`, ...) returned "not found" against a repo with 7-9 sibling Rust modules, each exporting `pub async fn run`. Two underlying issues: 1. The FTS5 query builder stripped `:` as a special char without splitting on `::`, so `stage_apply::run` collapsed to the literal `stage_applyrun` which matches nothing. Treat `::` as whitespace before the strip step so both halves become FTS tokens. 2. `matchesSymbol` only understood `Parent.child` qualifiers and relied on `qualifiedName` carrying the module path. Rust file- level functions don't have their module name in `qualifiedName` (it's encoded in the file path instead), so even dot-style lookups failed. Accept `::`, `.`, `/` as separators; multi-level forms compose; Rust `crate::`/`super::`/`self::` prefixes get stripped before path matching. Fall back to file-path containment when the qualified-name suffix doesn't match — `stage_apply::run` matches a `run` in any file whose path has a `stage_apply` segment. Also tightens the no-match branch: qualified lookups no longer fall through to a fuzzy text match. `stage_apply::nonexistent_fn` returns `null` instead of silently resolving to an unrelated `rollback` in the same file. Co-Authored-By: Claude Opus 4.7 (1M context) --- CHANGELOG.md | 23 ++++ __tests__/symbol-lookup.test.ts | 194 ++++++++++++++++++++++++++++++++ src/db/queries.ts | 8 +- src/mcp/tools.ts | 105 ++++++++++++++--- 4 files changed, 312 insertions(+), 18 deletions(-) create mode 100644 __tests__/symbol-lookup.test.ts diff --git a/CHANGELOG.md b/CHANGELOG.md index 50cb1a5aa..30937cd6d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -37,6 +37,29 @@ and adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). [#168](https://github.com/colbymchenry/codegraph/issues/168). Thanks to [@starkleek](https://github.com/starkleek) for the report and to [@Bortlesboat](https://github.com/Bortlesboat) for the initial PR. +- **MCP / search**: module-qualified symbol lookups now resolve. The + MCP tools (`codegraph_node`, `codegraph_callees`, `codegraph_impact`, + …) accept `module::symbol` (Rust / C++ / Ruby), `Module.symbol` + (TS / JS / Python), and `module/symbol` (path-style) — multi-level + forms (`crate::configurator::stage_apply::run`) and Rust path + prefixes (`crate`, `super`, `self`) are handled. Two underlying + fixes: + - The FTS5 query builder now treats `::` as a token separator + instead of stripping it to nothing, so `stage_apply::run` no + longer collapses to the unsearchable `stage_applyrun`. + - `matchesSymbol` falls back to a file-path containment check when + `qualifiedName` doesn't carry the module hierarchy (Rust file- + level functions, Python free functions in a package): a `run` + in `src/configurator/stage_apply.rs` now matches + `stage_apply::run` because `stage_apply` appears as a path + segment. + - Qualified lookups that don't match the qualifier no longer fall + through to fuzzy text matches — `stage_apply::nonexistent_fn` + returns `null` instead of resolving to an unrelated `rollback` + in the same file. + Closes [#173](https://github.com/colbymchenry/codegraph/issues/173). + Thanks to [@joselhurtado](https://github.com/joselhurtado) for the + detailed reproduction. [0.7.10]: https://github.com/colbymchenry/codegraph/releases/tag/v0.7.10 diff --git a/__tests__/symbol-lookup.test.ts b/__tests__/symbol-lookup.test.ts new file mode 100644 index 000000000..d27e157b7 --- /dev/null +++ b/__tests__/symbol-lookup.test.ts @@ -0,0 +1,194 @@ +/** + * Module-qualified symbol lookup (`stage_apply::run`, `Session.request`, + * `configurator/stage_apply`). + * + * Pinned because the lookup vocabulary is what makes codegraph useful + * in workspaces with same-named symbols across modules — Rust + * sub-pipelines, Python `__init__.py` packages, Java packages, etc. + * See #173 for the original report: a `run` function in + * `src/configurator/stage_apply.rs` was indexed but `stage_apply::run` + * returned "not found" because (a) FTS strips colons to nothing, + * leaving a useless query, and (b) `matchesSymbol` only understood + * `.`-style qualifiers. + */ + +import { describe, it, expect, beforeAll, beforeEach, afterEach } from 'vitest'; +import * as fs from 'fs'; +import * as path from 'path'; +import * as os from 'os'; +import { initGrammars, loadAllGrammars } from '../src/extraction/grammars'; + +beforeAll(async () => { + await initGrammars(); + await loadAllGrammars(); +}); + +function hasSqliteBindings(): boolean { + try { + const Database = require('better-sqlite3'); + const db = new Database(':memory:'); + db.close(); + return true; + } catch { + return false; + } +} +const HAS_SQLITE = hasSqliteBindings(); + +function tmpRoot(): string { + return fs.mkdtempSync(path.join(os.tmpdir(), 'codegraph-symbol-lookup-')); +} + +function rmTree(dir: string): void { + if (fs.existsSync(dir)) fs.rmSync(dir, { recursive: true, force: true }); +} + +async function buildRustWorkspace(): Promise { + const root = tmpRoot(); + const cfgDir = path.join(root, 'src', 'configurator'); + fs.mkdirSync(cfgDir, { recursive: true }); + fs.writeFileSync( + path.join(root, 'Cargo.toml'), + `[package]\nname = "fixture"\nversion = "0.1.0"\nedition = "2021"\n[lib]\npath = "src/lib.rs"\n` + ); + fs.writeFileSync(path.join(root, 'src', 'lib.rs'), `pub mod configurator;\npub mod scheduler;\n`); + fs.writeFileSync( + path.join(cfgDir, 'mod.rs'), + `pub mod stage_apply;\npub mod stage_detect;\n` + ); + fs.writeFileSync( + path.join(cfgDir, 'stage_apply.rs'), + `pub async fn run() -> Result<(), ()> {\n render_and_write();\n Ok(())\n}\n\nfn render_and_write() {}\n` + ); + fs.writeFileSync( + path.join(cfgDir, 'stage_detect.rs'), + `pub async fn run() -> Result<(), ()> { Ok(()) }\n` + ); + fs.writeFileSync( + path.join(root, 'src', 'scheduler.rs'), + `pub fn run_due_tasks() -> Result<(), ()> { Ok(()) }\n` + ); + return root; +} + +describe.skipIf(!HAS_SQLITE)('matchesSymbol — module-qualified lookups (#173)', () => { + let projectRoot: string; + let cg: any; + let handler: any; + let findSymbol: (cg: any, s: string) => { node: any; note: string } | null; + let findAllSymbols: (cg: any, s: string) => { nodes: any[]; note: string }; + + beforeEach(async () => { + projectRoot = await buildRustWorkspace(); + const CodeGraph = (await import('../src/index')).default; + const { ToolHandler } = await import('../src/mcp/tools'); + cg = CodeGraph.initSync(projectRoot, { + config: { include: ['**/*.rs'], exclude: [] }, + }); + await cg.indexAll(); + handler = new ToolHandler(cg); + findSymbol = (handler as any).findSymbol.bind(handler); + findAllSymbols = (handler as any).findAllSymbols.bind(handler); + }); + + afterEach(() => { + handler?.closeAll(); + cg?.destroy(); + rmTree(projectRoot); + }); + + it('resolves `stage_apply::run` to the run in stage_apply.rs (not stage_detect.rs)', () => { + const match = findSymbol(cg, 'stage_apply::run'); + expect(match).not.toBeNull(); + expect(match!.node.name).toBe('run'); + expect(match!.node.filePath).toMatch(/configurator\/stage_apply\.rs$/); + }); + + it('rejects `stage_apply::run` for the same-named function in a different module', () => { + const all = findAllSymbols(cg, 'stage_apply::run'); + // All returned nodes must be in stage_apply.rs — never in stage_detect.rs + for (const node of all.nodes) { + expect(node.filePath).toMatch(/stage_apply\.rs$/); + } + expect(all.nodes.length).toBeGreaterThan(0); + }); + + it('resolves `configurator::stage_apply::run` (multi-level qualifier)', () => { + const match = findSymbol(cg, 'configurator::stage_apply::run'); + expect(match).not.toBeNull(); + expect(match!.node.name).toBe('run'); + expect(match!.node.filePath).toMatch(/configurator\/stage_apply\.rs$/); + }); + + it('resolves `crate::configurator::stage_apply::run` (Rust path prefix stripped)', () => { + const match = findSymbol(cg, 'crate::configurator::stage_apply::run'); + expect(match).not.toBeNull(); + expect(match!.node.filePath).toMatch(/configurator\/stage_apply\.rs$/); + }); + + it('resolves `configurator/stage_apply` (slash qualifier)', () => { + const match = findSymbol(cg, 'configurator/stage_apply/run'); + expect(match).not.toBeNull(); + expect(match!.node.filePath).toMatch(/configurator\/stage_apply\.rs$/); + }); + + it('does not silently collide bare `run` with `run_due_tasks`', () => { + const match = findSymbol(cg, 'run'); + expect(match).not.toBeNull(); + // Whatever it picks, it must be an exact-name match, not a partial. + expect(match!.node.name).toBe('run'); + }); + + it('aggregates all bare-name `run` matches across modules', () => { + const all = findAllSymbols(cg, 'run'); + const names = all.nodes.map((n: any) => n.name); + expect(names.every((n: string) => n === 'run')).toBe(true); + expect(all.nodes.length).toBeGreaterThanOrEqual(2); // stage_apply + stage_detect + // The note should call out the ambiguity. + expect(all.note).toMatch(/Aggregated|symbols named "run"/); + }); + + it('still returns null for genuinely unknown qualified lookups', () => { + const match = findSymbol(cg, 'stage_apply::nonexistent_fn'); + expect(match).toBeNull(); + }); +}); + +describe.skipIf(!HAS_SQLITE)('matchesSymbol — dotted lookups (regression for #173 fix)', () => { + let projectRoot: string; + let cg: any; + let handler: any; + let findSymbol: (cg: any, s: string) => { node: any; note: string } | null; + + beforeEach(async () => { + projectRoot = tmpRoot(); + const src = path.join(projectRoot, 'src'); + fs.mkdirSync(src, { recursive: true }); + fs.writeFileSync( + path.join(src, 'session.ts'), + `export class Session {\n request(): void {}\n}\nexport function request(): void {}\n` + ); + + const CodeGraph = (await import('../src/index')).default; + const { ToolHandler } = await import('../src/mcp/tools'); + cg = CodeGraph.initSync(projectRoot, { + config: { include: ['src/**/*.ts'], exclude: [] }, + }); + await cg.indexAll(); + handler = new ToolHandler(cg); + findSymbol = (handler as any).findSymbol.bind(handler); + }); + + afterEach(() => { + handler?.closeAll(); + cg?.destroy(); + rmTree(projectRoot); + }); + + it('`Session.request` resolves to the method, not the bare function', () => { + const match = findSymbol(cg, 'Session.request'); + expect(match).not.toBeNull(); + expect(match!.node.kind).toBe('method'); + expect(match!.node.qualifiedName).toContain('Session::request'); + }); +}); diff --git a/src/db/queries.ts b/src/db/queries.ts index db7c6118d..ebba66e6a 100644 --- a/src/db/queries.ts +++ b/src/db/queries.ts @@ -696,8 +696,14 @@ export class QueryBuilder { const { kinds, languages, limit = 100, offset = 0 } = options; // Add prefix wildcard for better matching (e.g., "auth" matches "AuthService", "authenticate") - // Escape special FTS5 characters and add prefix wildcard + // Escape special FTS5 characters and add prefix wildcard. + // + // `::` is a qualifier separator in Rust/C++/Ruby, not a token char, + // so treat it as whitespace before the strip step. Otherwise queries + // like `stage_apply::run` collapse to `stage_applyrun` (the colons + // are stripped without splitting) and find nothing. See #173. const ftsQuery = query + .replace(/::/g, ' ') // Rust/C++/Ruby qualifier separator .replace(/['"*():^]/g, '') // Remove FTS5 special chars .split(/\s+/) .filter(term => term.length > 0) diff --git a/src/mcp/tools.ts b/src/mcp/tools.ts index e796cfc74..9e9ef9d33 100644 --- a/src/mcp/tools.ts +++ b/src/mcp/tools.ts @@ -16,6 +16,21 @@ import { WASM_FALLBACK_FIX_RECIPE } from '../db'; /** Maximum output length to prevent context bloat (characters) */ const MAX_OUTPUT_LENGTH = 15000; +/** + * Rust path roots that have no file-system equivalent — `crate` is the + * current crate, `super` is the parent module, `self` is the current + * module. Used by `matchesSymbol` to strip these before file-path + * matching so `crate::configurator::stage_apply::run` resolves the + * same as `configurator::stage_apply::run`. + */ +const RUST_PATH_PREFIXES = new Set(['crate', 'super', 'self']); + +/** Last `::` / `.` / `/`-separated segment of a qualified symbol. */ +function lastQualifierPart(symbol: string): string { + const parts = symbol.split(/::|[./]/).filter((p) => p.length > 0); + return parts[parts.length - 1] ?? symbol; +} + /** * Calculate the recommended number of codegraph_explore calls based on project size. * Larger codebases need more exploration calls to cover their surface area, @@ -1204,9 +1219,22 @@ export class ToolHandler { * Returns the best match and a note about alternatives if any. */ /** - * Check if a node matches a symbol query, supporting both simple names and - * qualified "Parent.child" notation (e.g., "Session.request" matches a method - * named "request" inside a class named "Session"). + * Check if a node matches a symbol query. + * + * Accepts simple names (`run`) and three flavors of qualifier: + * - dotted `Session.request` (TS/JS/Python) + * - colon-pair `stage_apply::run` (Rust, C++, Ruby) + * - slash `configurator/stage_apply` (path-ish) + * + * Multi-level qualifiers compose: `crate::configurator::stage_apply::run` + * works. Rust path prefixes (`crate`, `super`, `self`) are stripped so + * the canonical `crate::module::symbol` form resolves. + * + * Resolution order, last part must always equal `node.name`: + * 1. Suffix-match against `qualifiedName` (handles class-scoped methods + * where the extractor builds the qualified name from the AST stack) + * 2. File-path containment (handles file-derived modules in Rust/ + * Python — `stage_apply::run` matches a `run` in `stage_apply.rs`) */ private matchesSymbol(node: Node, symbol: string): boolean { // Simple name match @@ -1214,21 +1242,52 @@ export class ToolHandler { // File basename match (e.g., "product-card" matches "product-card.liquid") if (node.kind === 'file' && node.name.replace(/\.[^.]+$/, '') === symbol) return true; - // Qualified name match: "Parent.child" → look for "::Parent::child" in qualified_name - if (symbol.includes('.')) { - const parts = symbol.split('.'); - const qualifiedSuffix = parts.join('::'); - if (node.qualifiedName.includes(qualifiedSuffix)) return true; - } - - return false; + // Qualified-name lookups: split on any supported separator. `\w` keeps + // identifier chars (incl. `_`) intact; everything else is treated as + // a separator we tolerate. + if (!/[.\/]|::/.test(symbol)) return false; + const parts = symbol.split(/::|[./]/).filter((p) => p.length > 0); + if (parts.length < 2) return false; + + const lastPart = parts[parts.length - 1]!; + if (node.name !== lastPart) return false; + + // Stage 1: qualified-name suffix match. The extractor joins the + // semantic hierarchy with `::`, so `Session.request` and + // `Session::request` both become `Session::request` here. + const colonSuffix = parts.join('::'); + if (node.qualifiedName.includes(colonSuffix)) return true; + + // Stage 2: file-path containment. Rust modules and Python packages + // are not in `qualifiedName` — they're encoded in the file path. So + // `stage_apply::run` matches a `run` in any file whose path + // contains a `stage_apply` segment (with or without an extension). + // + // Filter out Rust path prefixes that have no file-system equivalent. + const containerHints = parts.slice(0, -1).filter((p) => !RUST_PATH_PREFIXES.has(p)); + if (containerHints.length === 0) return false; + + const segments = node.filePath.split('/').filter((s) => s.length > 0); + return containerHints.every((hint) => + segments.some((seg) => seg === hint || seg.replace(/\.[^.]+$/, '') === hint) + ); } private findSymbol(cg: CodeGraph, symbol: string): { node: Node; note: string } | null { - // Use higher limit for qualified lookups (e.g., "Session.request") since the - // target may rank lower in FTS when there are many partial matches - const limit = symbol.includes('.') ? 50 : 10; - const results = cg.searchNodes(symbol, { limit }); + // Use higher limit for qualified lookups (e.g., "Session.request", + // "stage_apply::run") since the target may rank lower in FTS when + // there are many partial matches across the qualifier parts. + const isQualified = /[.\/]|::/.test(symbol); + const limit = isQualified ? 50 : 10; + let results = cg.searchNodes(symbol, { limit }); + + // FTS strips colons as a special char, so `stage_apply::run` searches + // for the literal `stage_applyrun` and finds nothing. Re-search by + // the bare last part and let `matchesSymbol` filter by qualifier. + if (isQualified && results.length === 0) { + const tail = lastQualifierPart(symbol); + if (tail && tail !== symbol) results = cg.searchNodes(tail, { limit }); + } if (results.length === 0 || !results[0]) { return null; @@ -1250,7 +1309,11 @@ export class ToolHandler { return { node: picked, note }; } - // No exact match, use best fuzzy match + // No exact match. For qualified lookups, don't silently fall back + // to a fuzzy result — the user typed a specific qualifier, and + // resolving `stage_apply::nonexistent_fn` to the unrelated + // `stage_apply.rs` file would be actively misleading (#173). + if (isQualified) return null; return { node: results[0]!.node, note: '' }; } @@ -1259,7 +1322,15 @@ export class ToolHandler { * results across all matching symbols (e.g., multiple classes with an `execute` method). */ private findAllSymbols(cg: CodeGraph, symbol: string): { nodes: Node[]; note: string } { - const results = cg.searchNodes(symbol, { limit: 50 }); + let results = cg.searchNodes(symbol, { limit: 50 }); + + // Mirror the fallback in `findSymbol` for qualified queries — FTS + // strips colons, so a module-qualified lookup needs a second pass + // by the bare last part. + if (results.length === 0 && /[.\/]|::/.test(symbol)) { + const tail = lastQualifierPart(symbol); + if (tail && tail !== symbol) results = cg.searchNodes(tail, { limit: 50 }); + } if (results.length === 0) { return { nodes: [], note: '' };