Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 17 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,22 @@ a [GitHub Release](https://github.com/colbymchenry/codegraph/releases) tagged
This project follows [Keep a Changelog](https://keepachangelog.com/en/1.1.0/)
and adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

## [0.9.5] - 2026-05-25

### Fixed
- **The index now stays in sync after `git pull`, branch switches, and edits made
outside your editor.** Incremental sync detected changes via `git status`, which
only sees *uncommitted* edits — so code pulled or checked out (which leaves a
clean working tree) was silently missed until a full `codegraph index -f`.
Change detection is now filesystem-based and git-independent: a `(size, mtime)`
stat pre-filter skips unchanged files, then a content hash confirms the rest. It
reconciles committed changes from `pull`/`checkout`/`merge`/`rebase`, plain edits
in non-git projects, and deletions alike.
- **The MCP server catches up on connect.** When your editor connects, codegraph
reconciles anything that changed while it wasn't running (e.g. a `git pull` from
the terminal), so the first query reflects the current code instead of a stale
snapshot — rather than waiting for the next live edit.

## [0.9.4] - 2026-05-24

### Added
Expand Down Expand Up @@ -228,6 +244,7 @@ and adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
find its bundle. The release pipeline now verifies every package reached the
registry (and is idempotent), so a release can't pass green-but-broken again.

[0.9.5]: https://github.com/colbymchenry/codegraph/releases/tag/v0.9.5
[0.9.4]: https://github.com/colbymchenry/codegraph/releases/tag/v0.9.4
[0.9.3]: https://github.com/colbymchenry/codegraph/releases/tag/v0.9.3
[0.9.2]: https://github.com/colbymchenry/codegraph/releases/tag/v0.9.2
Expand Down
140 changes: 63 additions & 77 deletions src/extraction/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -1202,8 +1202,12 @@ export class ExtractionOrchestrator {
}

/**
* Sync with current file state.
* Uses git status as a fast path when available, falling back to full scan.
* Sync the index with the current file state.
*
* Change detection is filesystem-based, never git: a (size, mtime) stat
* pre-filter skips unchanged files, then a content-hash compare confirms real
* changes. This works in non-git projects and catches committed changes from
* `git pull`/`checkout`/`merge`/`rebase` that `git status` cannot see.
*/
async sync(onProgress?: (progress: IndexProgress) => void): Promise<SyncResult> {
await initGrammars(); // Initialize WASM runtime (grammars loaded lazily below)
Expand All @@ -1222,93 +1226,75 @@ export class ExtractionOrchestrator {
});

const filesToIndex: string[] = [];
const gitChanges = getGitChangedFiles(this.rootDir);
// === Filesystem reconcile (git-independent) ===
// The source of truth for "what changed" is the filesystem vs the indexed
// state — never git. We enumerate the current source files and reconcile
// each against the DB. A cheap (size, mtime) stat pre-filter skips unchanged
// files without reading or hashing them, so the expensive read+hash+parse
// only runs for files that actually changed. This catches edits/adds/deletes
// whether or not the project uses git, and crucially also catches committed
// changes from `git pull`/`checkout`/`merge`/`rebase` — which `git status`
// cannot see, because the working tree is clean afterward.
const currentFiles = scanDirectory(this.rootDir);
filesChecked = currentFiles.length;
const currentSet = new Set(currentFiles);

if (gitChanges) {
// === Git fast path ===
// Only inspect the files git reports as changed instead of scanning everything.
filesChecked = gitChanges.modified.length + gitChanges.added.length + gitChanges.deleted.length;
const trackedFiles = this.queries.getAllFiles();
const trackedMap = new Map<string, FileRecord>();
for (const f of trackedFiles) {
trackedMap.set(f.path, f);
}

// Handle deleted files
for (const filePath of gitChanges.deleted) {
const tracked = this.queries.getFileByPath(filePath);
if (tracked) {
this.queries.deleteFile(filePath);
filesRemoved++;
}
// Removals: tracked in the DB but no longer a present source file. Check the
// filesystem directly — `scanDirectory` (via `git ls-files`) still lists a
// file deleted from disk but not yet staged, so set membership alone misses it.
for (const tracked of trackedFiles) {
if (!currentSet.has(tracked.path) || !fs.existsSync(path.join(this.rootDir, tracked.path))) {
this.queries.deleteFile(tracked.path);
filesRemoved++;
}
}

// Handle modified + added files — read + hash only these. Untracked
// (`??`) files stay untracked in git even after we index them, so they
// can't be trusted as "new": re-hash and compare against the DB exactly
// like modified files. Otherwise every sync re-indexes them and status
// reports them as pending forever. (See issue #206.)
for (const filePath of [...gitChanges.modified, ...gitChanges.added]) {
const fullPath = path.join(this.rootDir, filePath);
let content: string;
// Adds / modifications.
for (const filePath of currentFiles) {
const fullPath = path.join(this.rootDir, filePath);
const tracked = trackedMap.get(filePath);

// Cheap pre-filter: an already-indexed file whose size AND mtime both match
// the DB is unchanged — skip it without reading or hashing. (A content
// change that preserves both exactly is the blind spot every mtime-based
// incremental tool accepts; `index --force` is the escape hatch. Git bumps
// mtime on every file it writes during checkout/merge, so pulls are caught.)
if (tracked) {
try {
content = fs.readFileSync(fullPath, 'utf-8');
const stat = fs.statSync(fullPath);
if (stat.size === tracked.size && Math.floor(stat.mtimeMs) === Math.floor(tracked.modifiedAt)) {
continue;
}
} catch (error) {
logDebug('Skipping unreadable file during sync', { filePath, error: String(error) });
logDebug('Skipping unstattable file during sync', { filePath, error: String(error) });
continue;
}

const contentHash = hashContent(content);
const tracked = this.queries.getFileByPath(filePath);

if (!tracked) {
filesToIndex.push(filePath);
changedFilePaths.push(filePath);
filesAdded++;
} else if (tracked.contentHash !== contentHash) {
filesToIndex.push(filePath);
changedFilePaths.push(filePath);
filesModified++;
}
}
} else {
// === Fallback: full scan (non-git project or git failure) ===
const currentFiles = new Set(scanDirectory(this.rootDir));
filesChecked = currentFiles.size;

// Build Map for O(1) lookups instead of .find() per file
const trackedFiles = this.queries.getAllFiles();
const trackedMap = new Map<string, FileRecord>();
for (const f of trackedFiles) {
trackedMap.set(f.path, f);
}

// Find files to remove (in DB but not on disk)
for (const tracked of trackedFiles) {
if (!currentFiles.has(tracked.path)) {
this.queries.deleteFile(tracked.path);
filesRemoved++;
}
// New, or size/mtime changed — read + hash to confirm a real content change.
let content: string;
try {
content = fs.readFileSync(fullPath, 'utf-8');
} catch (error) {
logDebug('Skipping unreadable file during sync', { filePath, error: String(error) });
continue;
}
const contentHash = hashContent(content);

// Find files to add or update
for (const filePath of currentFiles) {
const fullPath = path.join(this.rootDir, filePath);
let content: string;
try {
content = fs.readFileSync(fullPath, 'utf-8');
} catch (error) {
logDebug('Skipping unreadable file during sync', { filePath, error: String(error) });
continue;
}

const contentHash = hashContent(content);
const tracked = trackedMap.get(filePath);

if (!tracked) {
filesToIndex.push(filePath);
changedFilePaths.push(filePath);
filesAdded++;
} else if (tracked.contentHash !== contentHash) {
filesToIndex.push(filePath);
changedFilePaths.push(filePath);
filesModified++;
}
if (!tracked) {
filesToIndex.push(filePath);
changedFilePaths.push(filePath);
filesAdded++;
} else if (tracked.contentHash !== contentHash) {
filesToIndex.push(filePath);
changedFilePaths.push(filePath);
filesModified++;
}
}

Expand Down
28 changes: 28 additions & 0 deletions src/mcp/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -243,6 +243,7 @@ export class MCPServer {
this.cg = await CodeGraph.open(resolvedRoot);
this.toolHandler.setDefaultCodeGraph(this.cg);
this.startWatching();
this.catchUpSync();
} catch (err) {
// Log the error so transient failures are diagnosable (see issue #47)
const msg = err instanceof Error ? err.message : String(err);
Expand Down Expand Up @@ -301,6 +302,7 @@ export class MCPServer {
this.projectPath = resolvedRoot;
this.toolHandler.setDefaultCodeGraph(this.cg);
this.startWatching();
this.catchUpSync();
} catch {
// Still failing — will retry on next tool call
}
Expand Down Expand Up @@ -370,6 +372,32 @@ export class MCPServer {
}
}

/**
* Reconcile the index with the current filesystem once, right after connect —
* catches edits, adds, deletes, and `git pull`/`checkout` changes made while
* no watcher was running. Runs in the background so it never delays the
* `initialize` response; `sync()` is incremental (a stat pre-filter skips
* unchanged files) and mutex-guarded, so it can't collide with the live
* watcher or a git-hook sync. Runs even when the watcher is unavailable
* (e.g. WSL2 /mnt drives), where catch-up matters most.
*/
private catchUpSync(): void {
const cg = this.cg;
if (!cg) return;
void cg
.sync()
.then((result) => {
const changed = result.filesAdded + result.filesModified + result.filesRemoved;
if (changed > 0) {
process.stderr.write(`[CodeGraph MCP] Caught up ${changed} file(s) changed since last run\n`);
}
})
.catch((err) => {
const msg = err instanceof Error ? err.message : String(err);
process.stderr.write(`[CodeGraph MCP] Catch-up sync failed: ${msg}\n`);
});
}

/**
* Stop the server
*/
Expand Down