Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 23 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,29 @@ and adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
[#168](https://github.com/colbymchenry/codegraph/issues/168). Thanks to
[@starkleek](https://github.com/starkleek) for the report and to
[@Bortlesboat](https://github.com/Bortlesboat) for the initial PR.
- **MCP / search**: module-qualified symbol lookups now resolve. The
MCP tools (`codegraph_node`, `codegraph_callees`, `codegraph_impact`,
…) accept `module::symbol` (Rust / C++ / Ruby), `Module.symbol`
(TS / JS / Python), and `module/symbol` (path-style) — multi-level
forms (`crate::configurator::stage_apply::run`) and Rust path
prefixes (`crate`, `super`, `self`) are handled. Two underlying
fixes:
- The FTS5 query builder now treats `::` as a token separator
instead of stripping it to nothing, so `stage_apply::run` no
longer collapses to the unsearchable `stage_applyrun`.
- `matchesSymbol` falls back to a file-path containment check when
`qualifiedName` doesn't carry the module hierarchy (Rust file-
level functions, Python free functions in a package): a `run`
in `src/configurator/stage_apply.rs` now matches
`stage_apply::run` because `stage_apply` appears as a path
segment.
- Qualified lookups that don't match the qualifier no longer fall
through to fuzzy text matches — `stage_apply::nonexistent_fn`
returns `null` instead of resolving to an unrelated `rollback`
in the same file.
Closes [#173](https://github.com/colbymchenry/codegraph/issues/173).
Thanks to [@joselhurtado](https://github.com/joselhurtado) for the
detailed reproduction.

[0.7.10]: https://github.com/colbymchenry/codegraph/releases/tag/v0.7.10

Expand Down
194 changes: 194 additions & 0 deletions __tests__/symbol-lookup.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,194 @@
/**
* Module-qualified symbol lookup (`stage_apply::run`, `Session.request`,
* `configurator/stage_apply`).
*
* Pinned because the lookup vocabulary is what makes codegraph useful
* in workspaces with same-named symbols across modules — Rust
* sub-pipelines, Python `__init__.py` packages, Java packages, etc.
* See #173 for the original report: a `run` function in
* `src/configurator/stage_apply.rs` was indexed but `stage_apply::run`
* returned "not found" because (a) FTS strips colons to nothing,
* leaving a useless query, and (b) `matchesSymbol` only understood
* `.`-style qualifiers.
*/

import { describe, it, expect, beforeAll, beforeEach, afterEach } from 'vitest';
import * as fs from 'fs';
import * as path from 'path';
import * as os from 'os';
import { initGrammars, loadAllGrammars } from '../src/extraction/grammars';

beforeAll(async () => {
await initGrammars();
await loadAllGrammars();
});

function hasSqliteBindings(): boolean {
try {
const Database = require('better-sqlite3');
const db = new Database(':memory:');
db.close();
return true;
} catch {
return false;
}
}
const HAS_SQLITE = hasSqliteBindings();

function tmpRoot(): string {
return fs.mkdtempSync(path.join(os.tmpdir(), 'codegraph-symbol-lookup-'));
}

function rmTree(dir: string): void {
if (fs.existsSync(dir)) fs.rmSync(dir, { recursive: true, force: true });
}

async function buildRustWorkspace(): Promise<string> {
const root = tmpRoot();
const cfgDir = path.join(root, 'src', 'configurator');
fs.mkdirSync(cfgDir, { recursive: true });
fs.writeFileSync(
path.join(root, 'Cargo.toml'),
`[package]\nname = "fixture"\nversion = "0.1.0"\nedition = "2021"\n[lib]\npath = "src/lib.rs"\n`
);
fs.writeFileSync(path.join(root, 'src', 'lib.rs'), `pub mod configurator;\npub mod scheduler;\n`);
fs.writeFileSync(
path.join(cfgDir, 'mod.rs'),
`pub mod stage_apply;\npub mod stage_detect;\n`
);
fs.writeFileSync(
path.join(cfgDir, 'stage_apply.rs'),
`pub async fn run() -> Result<(), ()> {\n render_and_write();\n Ok(())\n}\n\nfn render_and_write() {}\n`
);
fs.writeFileSync(
path.join(cfgDir, 'stage_detect.rs'),
`pub async fn run() -> Result<(), ()> { Ok(()) }\n`
);
fs.writeFileSync(
path.join(root, 'src', 'scheduler.rs'),
`pub fn run_due_tasks() -> Result<(), ()> { Ok(()) }\n`
);
return root;
}

describe.skipIf(!HAS_SQLITE)('matchesSymbol — module-qualified lookups (#173)', () => {
let projectRoot: string;
let cg: any;
let handler: any;
let findSymbol: (cg: any, s: string) => { node: any; note: string } | null;
let findAllSymbols: (cg: any, s: string) => { nodes: any[]; note: string };

beforeEach(async () => {
projectRoot = await buildRustWorkspace();
const CodeGraph = (await import('../src/index')).default;
const { ToolHandler } = await import('../src/mcp/tools');
cg = CodeGraph.initSync(projectRoot, {
config: { include: ['**/*.rs'], exclude: [] },
});
await cg.indexAll();
handler = new ToolHandler(cg);
findSymbol = (handler as any).findSymbol.bind(handler);
findAllSymbols = (handler as any).findAllSymbols.bind(handler);
});

afterEach(() => {
handler?.closeAll();
cg?.destroy();
rmTree(projectRoot);
});

it('resolves `stage_apply::run` to the run in stage_apply.rs (not stage_detect.rs)', () => {
const match = findSymbol(cg, 'stage_apply::run');
expect(match).not.toBeNull();
expect(match!.node.name).toBe('run');
expect(match!.node.filePath).toMatch(/configurator\/stage_apply\.rs$/);
});

it('rejects `stage_apply::run` for the same-named function in a different module', () => {
const all = findAllSymbols(cg, 'stage_apply::run');
// All returned nodes must be in stage_apply.rs — never in stage_detect.rs
for (const node of all.nodes) {
expect(node.filePath).toMatch(/stage_apply\.rs$/);
}
expect(all.nodes.length).toBeGreaterThan(0);
});

it('resolves `configurator::stage_apply::run` (multi-level qualifier)', () => {
const match = findSymbol(cg, 'configurator::stage_apply::run');
expect(match).not.toBeNull();
expect(match!.node.name).toBe('run');
expect(match!.node.filePath).toMatch(/configurator\/stage_apply\.rs$/);
});

it('resolves `crate::configurator::stage_apply::run` (Rust path prefix stripped)', () => {
const match = findSymbol(cg, 'crate::configurator::stage_apply::run');
expect(match).not.toBeNull();
expect(match!.node.filePath).toMatch(/configurator\/stage_apply\.rs$/);
});

it('resolves `configurator/stage_apply` (slash qualifier)', () => {
const match = findSymbol(cg, 'configurator/stage_apply/run');
expect(match).not.toBeNull();
expect(match!.node.filePath).toMatch(/configurator\/stage_apply\.rs$/);
});

it('does not silently collide bare `run` with `run_due_tasks`', () => {
const match = findSymbol(cg, 'run');
expect(match).not.toBeNull();
// Whatever it picks, it must be an exact-name match, not a partial.
expect(match!.node.name).toBe('run');
});

it('aggregates all bare-name `run` matches across modules', () => {
const all = findAllSymbols(cg, 'run');
const names = all.nodes.map((n: any) => n.name);
expect(names.every((n: string) => n === 'run')).toBe(true);
expect(all.nodes.length).toBeGreaterThanOrEqual(2); // stage_apply + stage_detect
// The note should call out the ambiguity.
expect(all.note).toMatch(/Aggregated|symbols named "run"/);
});

it('still returns null for genuinely unknown qualified lookups', () => {
const match = findSymbol(cg, 'stage_apply::nonexistent_fn');
expect(match).toBeNull();
});
});

describe.skipIf(!HAS_SQLITE)('matchesSymbol — dotted lookups (regression for #173 fix)', () => {
let projectRoot: string;
let cg: any;
let handler: any;
let findSymbol: (cg: any, s: string) => { node: any; note: string } | null;

beforeEach(async () => {
projectRoot = tmpRoot();
const src = path.join(projectRoot, 'src');
fs.mkdirSync(src, { recursive: true });
fs.writeFileSync(
path.join(src, 'session.ts'),
`export class Session {\n request(): void {}\n}\nexport function request(): void {}\n`
);

const CodeGraph = (await import('../src/index')).default;
const { ToolHandler } = await import('../src/mcp/tools');
cg = CodeGraph.initSync(projectRoot, {
config: { include: ['src/**/*.ts'], exclude: [] },
});
await cg.indexAll();
handler = new ToolHandler(cg);
findSymbol = (handler as any).findSymbol.bind(handler);
});

afterEach(() => {
handler?.closeAll();
cg?.destroy();
rmTree(projectRoot);
});

it('`Session.request` resolves to the method, not the bare function', () => {
const match = findSymbol(cg, 'Session.request');
expect(match).not.toBeNull();
expect(match!.node.kind).toBe('method');
expect(match!.node.qualifiedName).toContain('Session::request');
});
});
8 changes: 7 additions & 1 deletion src/db/queries.ts
Original file line number Diff line number Diff line change
Expand Up @@ -696,8 +696,14 @@ export class QueryBuilder {
const { kinds, languages, limit = 100, offset = 0 } = options;

// Add prefix wildcard for better matching (e.g., "auth" matches "AuthService", "authenticate")
// Escape special FTS5 characters and add prefix wildcard
// Escape special FTS5 characters and add prefix wildcard.
//
// `::` is a qualifier separator in Rust/C++/Ruby, not a token char,
// so treat it as whitespace before the strip step. Otherwise queries
// like `stage_apply::run` collapse to `stage_applyrun` (the colons
// are stripped without splitting) and find nothing. See #173.
const ftsQuery = query
.replace(/::/g, ' ') // Rust/C++/Ruby qualifier separator
.replace(/['"*():^]/g, '') // Remove FTS5 special chars
.split(/\s+/)
.filter(term => term.length > 0)
Expand Down
105 changes: 88 additions & 17 deletions src/mcp/tools.ts
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,21 @@ import { WASM_FALLBACK_FIX_RECIPE } from '../db';
/** Maximum output length to prevent context bloat (characters) */
const MAX_OUTPUT_LENGTH = 15000;

/**
* Rust path roots that have no file-system equivalent — `crate` is the
* current crate, `super` is the parent module, `self` is the current
* module. Used by `matchesSymbol` to strip these before file-path
* matching so `crate::configurator::stage_apply::run` resolves the
* same as `configurator::stage_apply::run`.
*/
const RUST_PATH_PREFIXES = new Set(['crate', 'super', 'self']);

/** Last `::` / `.` / `/`-separated segment of a qualified symbol. */
function lastQualifierPart(symbol: string): string {
const parts = symbol.split(/::|[./]/).filter((p) => p.length > 0);
return parts[parts.length - 1] ?? symbol;
}

/**
* Calculate the recommended number of codegraph_explore calls based on project size.
* Larger codebases need more exploration calls to cover their surface area,
Expand Down Expand Up @@ -1204,31 +1219,75 @@ export class ToolHandler {
* Returns the best match and a note about alternatives if any.
*/
/**
* Check if a node matches a symbol query, supporting both simple names and
* qualified "Parent.child" notation (e.g., "Session.request" matches a method
* named "request" inside a class named "Session").
* Check if a node matches a symbol query.
*
* Accepts simple names (`run`) and three flavors of qualifier:
* - dotted `Session.request` (TS/JS/Python)
* - colon-pair `stage_apply::run` (Rust, C++, Ruby)
* - slash `configurator/stage_apply` (path-ish)
*
* Multi-level qualifiers compose: `crate::configurator::stage_apply::run`
* works. Rust path prefixes (`crate`, `super`, `self`) are stripped so
* the canonical `crate::module::symbol` form resolves.
*
* Resolution order, last part must always equal `node.name`:
* 1. Suffix-match against `qualifiedName` (handles class-scoped methods
* where the extractor builds the qualified name from the AST stack)
* 2. File-path containment (handles file-derived modules in Rust/
* Python — `stage_apply::run` matches a `run` in `stage_apply.rs`)
*/
private matchesSymbol(node: Node, symbol: string): boolean {
// Simple name match
if (node.name === symbol) return true;
// File basename match (e.g., "product-card" matches "product-card.liquid")
if (node.kind === 'file' && node.name.replace(/\.[^.]+$/, '') === symbol) return true;

// Qualified name match: "Parent.child" → look for "::Parent::child" in qualified_name
if (symbol.includes('.')) {
const parts = symbol.split('.');
const qualifiedSuffix = parts.join('::');
if (node.qualifiedName.includes(qualifiedSuffix)) return true;
}

return false;
// Qualified-name lookups: split on any supported separator. `\w` keeps
// identifier chars (incl. `_`) intact; everything else is treated as
// a separator we tolerate.
if (!/[.\/]|::/.test(symbol)) return false;
const parts = symbol.split(/::|[./]/).filter((p) => p.length > 0);
if (parts.length < 2) return false;

const lastPart = parts[parts.length - 1]!;
if (node.name !== lastPart) return false;

// Stage 1: qualified-name suffix match. The extractor joins the
// semantic hierarchy with `::`, so `Session.request` and
// `Session::request` both become `Session::request` here.
const colonSuffix = parts.join('::');
if (node.qualifiedName.includes(colonSuffix)) return true;

// Stage 2: file-path containment. Rust modules and Python packages
// are not in `qualifiedName` — they're encoded in the file path. So
// `stage_apply::run` matches a `run` in any file whose path
// contains a `stage_apply` segment (with or without an extension).
//
// Filter out Rust path prefixes that have no file-system equivalent.
const containerHints = parts.slice(0, -1).filter((p) => !RUST_PATH_PREFIXES.has(p));
if (containerHints.length === 0) return false;

const segments = node.filePath.split('/').filter((s) => s.length > 0);
return containerHints.every((hint) =>
segments.some((seg) => seg === hint || seg.replace(/\.[^.]+$/, '') === hint)
);
}

private findSymbol(cg: CodeGraph, symbol: string): { node: Node; note: string } | null {
// Use higher limit for qualified lookups (e.g., "Session.request") since the
// target may rank lower in FTS when there are many partial matches
const limit = symbol.includes('.') ? 50 : 10;
const results = cg.searchNodes(symbol, { limit });
// Use higher limit for qualified lookups (e.g., "Session.request",
// "stage_apply::run") since the target may rank lower in FTS when
// there are many partial matches across the qualifier parts.
const isQualified = /[.\/]|::/.test(symbol);
const limit = isQualified ? 50 : 10;
let results = cg.searchNodes(symbol, { limit });

// FTS strips colons as a special char, so `stage_apply::run` searches
// for the literal `stage_applyrun` and finds nothing. Re-search by
// the bare last part and let `matchesSymbol` filter by qualifier.
if (isQualified && results.length === 0) {
const tail = lastQualifierPart(symbol);
if (tail && tail !== symbol) results = cg.searchNodes(tail, { limit });
}

if (results.length === 0 || !results[0]) {
return null;
Expand All @@ -1250,7 +1309,11 @@ export class ToolHandler {
return { node: picked, note };
}

// No exact match, use best fuzzy match
// No exact match. For qualified lookups, don't silently fall back
// to a fuzzy result — the user typed a specific qualifier, and
// resolving `stage_apply::nonexistent_fn` to the unrelated
// `stage_apply.rs` file would be actively misleading (#173).
if (isQualified) return null;
return { node: results[0]!.node, note: '' };
}

Expand All @@ -1259,7 +1322,15 @@ export class ToolHandler {
* results across all matching symbols (e.g., multiple classes with an `execute` method).
*/
private findAllSymbols(cg: CodeGraph, symbol: string): { nodes: Node[]; note: string } {
const results = cg.searchNodes(symbol, { limit: 50 });
let results = cg.searchNodes(symbol, { limit: 50 });

// Mirror the fallback in `findSymbol` for qualified queries — FTS
// strips colons, so a module-qualified lookup needs a second pass
// by the bare last part.
if (results.length === 0 && /[.\/]|::/.test(symbol)) {
const tail = lastQualifierPart(symbol);
if (tail && tail !== symbol) results = cg.searchNodes(tail, { limit: 50 });
}

if (results.length === 0) {
return { nodes: [], note: '' };
Expand Down