diff --git a/CHANGELOG.md b/CHANGELOG.md index d727e6cd0..173aaf93b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,22 @@ a [GitHub Release](https://github.com/colbymchenry/codegraph/releases) tagged This project follows [Keep a Changelog](https://keepachangelog.com/en/1.1.0/) and adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [0.9.5] - 2026-05-25 + +### Fixed +- **The index now stays in sync after `git pull`, branch switches, and edits made + outside your editor.** Incremental sync detected changes via `git status`, which + only sees *uncommitted* edits — so code pulled or checked out (which leaves a + clean working tree) was silently missed until a full `codegraph index -f`. + Change detection is now filesystem-based and git-independent: a `(size, mtime)` + stat pre-filter skips unchanged files, then a content hash confirms the rest. It + reconciles committed changes from `pull`/`checkout`/`merge`/`rebase`, plain edits + in non-git projects, and deletions alike. +- **The MCP server catches up on connect.** When your editor connects, codegraph + reconciles anything that changed while it wasn't running (e.g. a `git pull` from + the terminal), so the first query reflects the current code instead of a stale + snapshot — rather than waiting for the next live edit. + ## [0.9.4] - 2026-05-24 ### Added @@ -228,6 +244,7 @@ and adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). find its bundle. The release pipeline now verifies every package reached the registry (and is idempotent), so a release can't pass green-but-broken again. +[0.9.5]: https://github.com/colbymchenry/codegraph/releases/tag/v0.9.5 [0.9.4]: https://github.com/colbymchenry/codegraph/releases/tag/v0.9.4 [0.9.3]: https://github.com/colbymchenry/codegraph/releases/tag/v0.9.3 [0.9.2]: https://github.com/colbymchenry/codegraph/releases/tag/v0.9.2 diff --git a/src/extraction/index.ts b/src/extraction/index.ts index d502a24f0..5d2ef7ec0 100644 --- a/src/extraction/index.ts +++ b/src/extraction/index.ts @@ -1202,8 +1202,12 @@ export class ExtractionOrchestrator { } /** - * Sync with current file state. - * Uses git status as a fast path when available, falling back to full scan. + * Sync the index with the current file state. + * + * Change detection is filesystem-based, never git: a (size, mtime) stat + * pre-filter skips unchanged files, then a content-hash compare confirms real + * changes. This works in non-git projects and catches committed changes from + * `git pull`/`checkout`/`merge`/`rebase` that `git status` cannot see. */ async sync(onProgress?: (progress: IndexProgress) => void): Promise { await initGrammars(); // Initialize WASM runtime (grammars loaded lazily below) @@ -1222,93 +1226,75 @@ export class ExtractionOrchestrator { }); const filesToIndex: string[] = []; - const gitChanges = getGitChangedFiles(this.rootDir); + // === Filesystem reconcile (git-independent) === + // The source of truth for "what changed" is the filesystem vs the indexed + // state — never git. We enumerate the current source files and reconcile + // each against the DB. A cheap (size, mtime) stat pre-filter skips unchanged + // files without reading or hashing them, so the expensive read+hash+parse + // only runs for files that actually changed. This catches edits/adds/deletes + // whether or not the project uses git, and crucially also catches committed + // changes from `git pull`/`checkout`/`merge`/`rebase` — which `git status` + // cannot see, because the working tree is clean afterward. + const currentFiles = scanDirectory(this.rootDir); + filesChecked = currentFiles.length; + const currentSet = new Set(currentFiles); - if (gitChanges) { - // === Git fast path === - // Only inspect the files git reports as changed instead of scanning everything. - filesChecked = gitChanges.modified.length + gitChanges.added.length + gitChanges.deleted.length; + const trackedFiles = this.queries.getAllFiles(); + const trackedMap = new Map(); + for (const f of trackedFiles) { + trackedMap.set(f.path, f); + } - // Handle deleted files - for (const filePath of gitChanges.deleted) { - const tracked = this.queries.getFileByPath(filePath); - if (tracked) { - this.queries.deleteFile(filePath); - filesRemoved++; - } + // Removals: tracked in the DB but no longer a present source file. Check the + // filesystem directly — `scanDirectory` (via `git ls-files`) still lists a + // file deleted from disk but not yet staged, so set membership alone misses it. + for (const tracked of trackedFiles) { + if (!currentSet.has(tracked.path) || !fs.existsSync(path.join(this.rootDir, tracked.path))) { + this.queries.deleteFile(tracked.path); + filesRemoved++; } + } - // Handle modified + added files — read + hash only these. Untracked - // (`??`) files stay untracked in git even after we index them, so they - // can't be trusted as "new": re-hash and compare against the DB exactly - // like modified files. Otherwise every sync re-indexes them and status - // reports them as pending forever. (See issue #206.) - for (const filePath of [...gitChanges.modified, ...gitChanges.added]) { - const fullPath = path.join(this.rootDir, filePath); - let content: string; + // Adds / modifications. + for (const filePath of currentFiles) { + const fullPath = path.join(this.rootDir, filePath); + const tracked = trackedMap.get(filePath); + + // Cheap pre-filter: an already-indexed file whose size AND mtime both match + // the DB is unchanged — skip it without reading or hashing. (A content + // change that preserves both exactly is the blind spot every mtime-based + // incremental tool accepts; `index --force` is the escape hatch. Git bumps + // mtime on every file it writes during checkout/merge, so pulls are caught.) + if (tracked) { try { - content = fs.readFileSync(fullPath, 'utf-8'); + const stat = fs.statSync(fullPath); + if (stat.size === tracked.size && Math.floor(stat.mtimeMs) === Math.floor(tracked.modifiedAt)) { + continue; + } } catch (error) { - logDebug('Skipping unreadable file during sync', { filePath, error: String(error) }); + logDebug('Skipping unstattable file during sync', { filePath, error: String(error) }); continue; } - - const contentHash = hashContent(content); - const tracked = this.queries.getFileByPath(filePath); - - if (!tracked) { - filesToIndex.push(filePath); - changedFilePaths.push(filePath); - filesAdded++; - } else if (tracked.contentHash !== contentHash) { - filesToIndex.push(filePath); - changedFilePaths.push(filePath); - filesModified++; - } - } - } else { - // === Fallback: full scan (non-git project or git failure) === - const currentFiles = new Set(scanDirectory(this.rootDir)); - filesChecked = currentFiles.size; - - // Build Map for O(1) lookups instead of .find() per file - const trackedFiles = this.queries.getAllFiles(); - const trackedMap = new Map(); - for (const f of trackedFiles) { - trackedMap.set(f.path, f); } - // Find files to remove (in DB but not on disk) - for (const tracked of trackedFiles) { - if (!currentFiles.has(tracked.path)) { - this.queries.deleteFile(tracked.path); - filesRemoved++; - } + // New, or size/mtime changed — read + hash to confirm a real content change. + let content: string; + try { + content = fs.readFileSync(fullPath, 'utf-8'); + } catch (error) { + logDebug('Skipping unreadable file during sync', { filePath, error: String(error) }); + continue; } + const contentHash = hashContent(content); - // Find files to add or update - for (const filePath of currentFiles) { - const fullPath = path.join(this.rootDir, filePath); - let content: string; - try { - content = fs.readFileSync(fullPath, 'utf-8'); - } catch (error) { - logDebug('Skipping unreadable file during sync', { filePath, error: String(error) }); - continue; - } - - const contentHash = hashContent(content); - const tracked = trackedMap.get(filePath); - - if (!tracked) { - filesToIndex.push(filePath); - changedFilePaths.push(filePath); - filesAdded++; - } else if (tracked.contentHash !== contentHash) { - filesToIndex.push(filePath); - changedFilePaths.push(filePath); - filesModified++; - } + if (!tracked) { + filesToIndex.push(filePath); + changedFilePaths.push(filePath); + filesAdded++; + } else if (tracked.contentHash !== contentHash) { + filesToIndex.push(filePath); + changedFilePaths.push(filePath); + filesModified++; } } diff --git a/src/mcp/index.ts b/src/mcp/index.ts index 8d0e35d79..71dbc9b9f 100644 --- a/src/mcp/index.ts +++ b/src/mcp/index.ts @@ -243,6 +243,7 @@ export class MCPServer { this.cg = await CodeGraph.open(resolvedRoot); this.toolHandler.setDefaultCodeGraph(this.cg); this.startWatching(); + this.catchUpSync(); } catch (err) { // Log the error so transient failures are diagnosable (see issue #47) const msg = err instanceof Error ? err.message : String(err); @@ -301,6 +302,7 @@ export class MCPServer { this.projectPath = resolvedRoot; this.toolHandler.setDefaultCodeGraph(this.cg); this.startWatching(); + this.catchUpSync(); } catch { // Still failing — will retry on next tool call } @@ -370,6 +372,32 @@ export class MCPServer { } } + /** + * Reconcile the index with the current filesystem once, right after connect — + * catches edits, adds, deletes, and `git pull`/`checkout` changes made while + * no watcher was running. Runs in the background so it never delays the + * `initialize` response; `sync()` is incremental (a stat pre-filter skips + * unchanged files) and mutex-guarded, so it can't collide with the live + * watcher or a git-hook sync. Runs even when the watcher is unavailable + * (e.g. WSL2 /mnt drives), where catch-up matters most. + */ + private catchUpSync(): void { + const cg = this.cg; + if (!cg) return; + void cg + .sync() + .then((result) => { + const changed = result.filesAdded + result.filesModified + result.filesRemoved; + if (changed > 0) { + process.stderr.write(`[CodeGraph MCP] Caught up ${changed} file(s) changed since last run\n`); + } + }) + .catch((err) => { + const msg = err instanceof Error ? err.message : String(err); + process.stderr.write(`[CodeGraph MCP] Catch-up sync failed: ${msg}\n`); + }); + } + /** * Stop the server */