From af197eb7e74c67ebe4b71b920ac4aaec706ccf04 Mon Sep 17 00:00:00 2001 From: 0x1306a94 <0x1306a94@gmail.com> Date: Fri, 22 May 2026 10:44:03 +0800 Subject: [PATCH] feat: add Objective-C language support Wire tree-sitter-objc for .m/.mm/ObjC headers: classes, protocols, methods, @property, #import edges, inheritance, and message-send calls. --- .claude/skills/agent-eval/corpus.json | 5 + README.md | 3 +- __tests__/extraction.test.ts | 108 ++++++++++++++++++++ src/extraction/grammars.ts | 15 ++- src/extraction/languages/index.ts | 2 + src/extraction/languages/objc.ts | 136 ++++++++++++++++++++++++++ src/extraction/tree-sitter-types.ts | 6 ++ src/extraction/tree-sitter.ts | 68 +++++++++++-- src/resolution/import-resolver.ts | 1 + src/types.ts | 1 + 10 files changed, 337 insertions(+), 8 deletions(-) create mode 100644 src/extraction/languages/objc.ts diff --git a/.claude/skills/agent-eval/corpus.json b/.claude/skills/agent-eval/corpus.json index 3dcc87524..6e496963e 100644 --- a/.claude/skills/agent-eval/corpus.json +++ b/.claude/skills/agent-eval/corpus.json @@ -69,5 +69,10 @@ { "name": "Knit", "repo": "https://github.com/Sleitnick/Knit", "size": "Small", "files": "~10", "question": "How does Knit register services and expose them to clients?" }, { "name": "vide", "repo": "https://github.com/centau/vide", "size": "Small", "files": "~40", "question": "How does vide track reactive sources and re-run effects when state changes?" }, { "name": "Fusion", "repo": "https://github.com/dphfox/Fusion", "size": "Medium", "files": "~115", "question": "How does Fusion build and update its reactive UI graph from state objects?" } + ], + "Objective-C": [ + { "name": "Masonry", "repo": "https://github.com/SnapKit/Masonry", "size": "Small", "files": "~50", "question": "How does Masonry build and activate Auto Layout constraints from its block DSL?" }, + { "name": "FMDB", "repo": "https://github.com/ccgus/fmdb", "size": "Medium", "files": "~80", "question": "How does FMDB execute a prepared SQL statement and bind parameters?" }, + { "name": "SDWebImage", "repo": "https://github.com/SDWebImage/SDWebImage", "size": "Large", "files": "~400", "question": "How does SDWebImage download, cache, and decode an image for a UIImageView?" } ] } diff --git a/README.md b/README.md index 4f4d76b29..7717f9177 100644 --- a/README.md +++ b/README.md @@ -135,7 +135,7 @@ The gains scale with codebase size: on large repos the agent answers from the in | **Full-Text Search** | Find code by name instantly across your entire codebase, powered by FTS5 | | **Impact Analysis** | Trace callers, callees, and the full impact radius of any symbol before making changes | | **Always Fresh** | File watcher uses native OS events (FSEvents/inotify/ReadDirectoryChangesW) with debounced auto-sync — the graph stays current as you code, zero config | -| **19+ Languages** | TypeScript, JavaScript, Python, Go, Rust, Java, C#, PHP, Ruby, C, C++, Swift, Kotlin, Dart, Lua, Luau, Svelte, Liquid, Pascal/Delphi | +| **20+ Languages** | TypeScript, JavaScript, Python, Go, Rust, Java, C#, PHP, Ruby, C, C++, Objective-C, Swift, Kotlin, Dart, Lua, Luau, Svelte, Liquid, Pascal/Delphi | | **Framework-aware Routes** | Recognizes web-framework routing files and links URL patterns to their handlers across 14 frameworks | | **100% Local** | No data leaves your machine. No API keys. No external services. SQLite database only | @@ -480,6 +480,7 @@ the MCP server and writing its instructions file: | Ruby | `.rb` | Full support | | C | `.c`, `.h` | Full support | | C++ | `.cpp`, `.hpp`, `.cc` | Full support | +| Objective-C | `.m`, `.mm`, `.h` | Partial support (classes, protocols, methods, `@property`, `#import`, message sends; `.mm` ObjC++ may parse incompletely) | | Swift | `.swift` | Full support | | Kotlin | `.kt`, `.kts` | Full support | | Scala | `.scala`, `.sc` | Full support (classes, traits, methods, type aliases, Scala 3 enums) | diff --git a/__tests__/extraction.test.ts b/__tests__/extraction.test.ts index 99c38345d..3a249fda4 100644 --- a/__tests__/extraction.test.ts +++ b/__tests__/extraction.test.ts @@ -93,6 +93,14 @@ describe('Language Detection', () => { expect(detectLanguage('main.dart')).toBe('dart'); }); + it('should detect Objective-C files', () => { + expect(detectLanguage('AppDelegate.m')).toBe('objc'); + expect(detectLanguage('ViewController.mm')).toBe('objc'); + const objcHeader = '@interface Foo : NSObject\n@end\n'; + expect(detectLanguage('Foo.h', objcHeader)).toBe('objc'); + expect(detectLanguage('stdio.h', '#ifndef STDIO_H\nvoid printf();\n#endif\n')).toBe('c'); + }); + it('should return unknown for unsupported extensions', () => { expect(detectLanguage('styles.css')).toBe('unknown'); expect(detectLanguage('data.json')).toBe('unknown'); @@ -3900,3 +3908,103 @@ local count = 0 }); }); }); + +// ============================================================================= +// Objective-C +// ============================================================================= + +describe('Objective-C Extraction', () => { + const sample = ` +#import +#import "MyClass.h" + +@interface MyClass : NSObject +@property (nonatomic, copy) NSString *name; +- (void)greet; +- (void)doThing:(id)x with:(id)y; ++ (instancetype)shared; +@end + +@implementation MyClass + +- (void)greet { + NSLog(@"Hello"); + [self doWork]; +} + +- (void)doThing:(id)x with:(id)y { + [self notify:x]; +} + ++ (instancetype)shared { + return [[MyClass alloc] init]; +} + +@end + +void helperFunction(int count) { + MyClass *obj = [MyClass shared]; + [obj greet]; +} +`; + + it('should extract classes, methods, functions, and imports', () => { + const result = extractFromSource('App.m', sample); + + const classes = result.nodes.filter((n) => n.kind === 'class'); + expect(classes.filter((c) => c.name === 'MyClass')).toHaveLength(1); + + const methods = result.nodes.filter((n) => n.kind === 'method'); + expect(methods.map((m) => m.name).sort()).toEqual(['doThing:with:', 'greet', 'shared']); + + const shared = methods.find((m) => m.name === 'shared'); + expect(shared?.isStatic).toBe(true); + + const properties = result.nodes.filter((n) => n.kind === 'property'); + expect(properties.some((p) => p.name === 'name')).toBe(true); + + const functions = result.nodes.filter((n) => n.kind === 'function'); + expect(functions.some((f) => f.name === 'helperFunction')).toBe(true); + + const imports = result.nodes.filter((n) => n.kind === 'import').map((n) => n.name); + expect(imports).toContain('Foundation/Foundation.h'); + expect(imports).toContain('MyClass.h'); + }); + + it('should record inheritance and protocol conformance', () => { + const result = extractFromSource('App.m', sample); + const extendsRefs = result.unresolvedReferences.filter((r) => r.referenceKind === 'extends'); + const implementsRefs = result.unresolvedReferences.filter((r) => r.referenceKind === 'implements'); + expect(extendsRefs.map((r) => r.referenceName)).toContain('NSObject'); + expect(implementsRefs.map((r) => r.referenceName)).toContain('NSCopying'); + }); + + it('should record message sends and C calls', () => { + const result = extractFromSource('App.m', sample); + const calls = result.unresolvedReferences + .filter((r) => r.referenceKind === 'calls') + .map((r) => r.referenceName); + expect(calls).toEqual(expect.arrayContaining(['NSLog', 'doWork', 'MyClass.shared', 'obj.greet'])); + }); + + it('should not classify pure C headers with @end in comments as objc', () => { + const cHeader = '/* @end of file */\n#ifndef STDIO_H\nvoid printf(const char *);\n#endif\n'; + expect(detectLanguage('stdio.h', cHeader)).toBe('c'); + }); + + it('should extract protocol declarations', () => { + const code = ` +@protocol DataSource +- (NSInteger)numberOfItems; +@end +`; + const result = extractFromSource('DataSource.h', code); + const protocol = result.nodes.find((n) => n.kind === 'protocol' && n.name === 'DataSource'); + expect(protocol).toBeDefined(); + }); + + it('should report Objective-C as supported', () => { + expect(isLanguageSupported('objc')).toBe(true); + expect(getSupportedLanguages()).toContain('objc'); + }); +}); diff --git a/src/extraction/grammars.ts b/src/extraction/grammars.ts index c167d28b3..4fdad1dfd 100644 --- a/src/extraction/grammars.ts +++ b/src/extraction/grammars.ts @@ -37,6 +37,7 @@ const WASM_GRAMMAR_FILES: Record = { scala: 'tree-sitter-scala.wasm', lua: 'tree-sitter-lua.wasm', luau: 'tree-sitter-luau.wasm', + objc: 'tree-sitter-objc.wasm', }; /** @@ -92,6 +93,8 @@ export const EXTENSION_MAP: Record = { '.sc': 'scala', '.lua': 'lua', '.luau': 'luau', + '.m': 'objc', + '.mm': 'objc', }; /** @@ -228,9 +231,10 @@ export function detectLanguage(filePath: string, source?: string): Language { const ext = filePath.substring(filePath.lastIndexOf('.')).toLowerCase(); const lang = EXTENSION_MAP[ext] || 'unknown'; - // .h files could be C or C++ — check source content for C++ features + // .h files could be C, C++, or Objective-C — check source content if (lang === 'c' && ext === '.h' && source) { if (looksLikeCpp(source)) return 'cpp'; + if (looksLikeObjc(source)) return 'objc'; } return lang; @@ -245,6 +249,14 @@ function looksLikeCpp(source: string): boolean { return /\bnamespace\b|\bclass\s+\w+\s*[:{]|\btemplate\s*<|\b(?:public|private|protected)\s*:|\bvirtual\b|\busing\s+(?:namespace\b|\w+\s*=)/.test(sample); } +/** + * Heuristic: does a .h file contain Objective-C constructs? + */ +function looksLikeObjc(source: string): boolean { + const sample = source.substring(0, 8192); + return /@(?:interface|implementation|protocol|synthesize)\b/.test(sample); +} + /** * Check if a language is supported (has a grammar defined). * Returns true if the grammar exists, even if not yet loaded. @@ -342,6 +354,7 @@ export function getLanguageDisplayName(language: Language): string { scala: 'Scala', lua: 'Lua', luau: 'Luau', + objc: 'Objective-C', yaml: 'YAML', twig: 'Twig', unknown: 'Unknown', diff --git a/src/extraction/languages/index.ts b/src/extraction/languages/index.ts index a289f0289..543598b8e 100644 --- a/src/extraction/languages/index.ts +++ b/src/extraction/languages/index.ts @@ -25,6 +25,7 @@ import { pascalExtractor } from './pascal'; import { scalaExtractor } from './scala'; import { luaExtractor } from './lua'; import { luauExtractor } from './luau'; +import { objcExtractor } from './objc'; export const EXTRACTORS: Partial> = { typescript: typescriptExtractor, @@ -47,4 +48,5 @@ export const EXTRACTORS: Partial> = { scala: scalaExtractor, lua: luaExtractor, luau: luauExtractor, + objc: objcExtractor, }; diff --git a/src/extraction/languages/objc.ts b/src/extraction/languages/objc.ts new file mode 100644 index 000000000..6671284aa --- /dev/null +++ b/src/extraction/languages/objc.ts @@ -0,0 +1,136 @@ +import type { Node as SyntaxNode } from 'web-tree-sitter'; +import { getChildByField, getNodeText } from '../tree-sitter-helpers'; +import type { ExtractorContext, LanguageExtractor } from '../tree-sitter-types'; + +function findCompoundStatement(node: SyntaxNode): SyntaxNode | null { + for (let i = 0; i < node.namedChildCount; i++) { + const child = node.namedChild(i); + if (child?.type === 'compound_statement') { + return child; + } + } + return null; +} + +/** Build ObjC selector: `greet`, `doThing:`, or `doThing:with:`. */ +function extractObjcMethodName(node: SyntaxNode, source: string): string | undefined { + if (node.type !== 'method_definition' && node.type !== 'method_declaration') { + return undefined; + } + + const identifiers = node.namedChildren.filter((c) => c.type === 'identifier'); + if (identifiers.length === 0) return undefined; + + const hasParameters = node.namedChildren.some((c) => c.type === 'method_parameter'); + const firstIdentifier = identifiers[0]; + if (!firstIdentifier) return undefined; + if (!hasParameters) { + return getNodeText(firstIdentifier, source); + } + + return identifiers.map((id) => `${getNodeText(id, source)}:`).join(''); +} + +function extractObjcPropertyName(node: SyntaxNode, source: string): string | null { + if (node.type !== 'property_declaration') return null; + + const structDecl = node.namedChildren.find((c) => c.type === 'struct_declaration'); + if (!structDecl) return null; + + const structDeclarator = structDecl.namedChildren.find((c) => c.type === 'struct_declarator'); + if (!structDeclarator) return null; + + let current: SyntaxNode | null = structDeclarator; + while (current) { + const inner: SyntaxNode | undefined = + getChildByField(current, 'declarator') || + current.namedChildren.find((c) => c.type === 'identifier' || c.type === 'pointer_declarator'); + if (!inner) break; + if (inner.type === 'identifier') { + return getNodeText(inner, source); + } + current = inner; + } + + return null; +} + +export const objcExtractor: LanguageExtractor = { + functionTypes: ['function_definition'], + // Only @interface emits a class node; @implementation reuses it via visitNode. + classTypes: ['class_interface'], + methodTypes: ['method_definition'], + interfaceTypes: ['protocol_declaration'], + interfaceKind: 'protocol', + structTypes: ['struct_specifier'], + enumTypes: ['enum_specifier'], + enumMemberTypes: ['enumerator'], + typeAliasTypes: ['type_definition'], + importTypes: ['preproc_include'], + callTypes: ['call_expression', 'message_expression'], + variableTypes: ['declaration'], + propertyTypes: ['property_declaration'], + nameField: 'declarator', + bodyField: 'body', + paramsField: 'parameters', + resolveName: extractObjcMethodName, + extractPropertyName: extractObjcPropertyName, + resolveBody: (node, bodyField) => { + const fromField = getChildByField(node, bodyField); + if (fromField) { + return fromField; + } + return findCompoundStatement(node); + }, + resolveTypeAliasKind: (node, _source) => { + for (let i = 0; i < node.namedChildCount; i++) { + const child = node.namedChild(i); + if (!child) continue; + if (child.type === 'enum_specifier' && getChildByField(child, 'body')) return 'enum'; + if (child.type === 'struct_specifier' && getChildByField(child, 'body')) return 'struct'; + } + return undefined; + }, + isStatic: (node) => /^\s*\+/.test(node.text), + visitNode: (node, ctx: ExtractorContext) => { + if (node.type !== 'class_implementation') return false; + + const classNameNode = node.namedChildren.find((c) => c.type === 'identifier'); + if (!classNameNode) return true; + + const className = getNodeText(classNameNode, ctx.source); + const classNode = + ctx.nodes.find( + (n) => n.name === className && n.filePath === ctx.filePath && n.kind === 'class' + ) ?? ctx.createNode('class', className, node, {}); + if (!classNode) return true; + + ctx.pushScope(classNode.id); + for (let i = 0; i < node.namedChildCount; i++) { + const child = node.namedChild(i); + if (child?.type === 'implementation_definition') { + for (let j = 0; j < child.namedChildCount; j++) { + const implChild = child.namedChild(j); + if (implChild) ctx.visitNode(implChild); + } + } + } + ctx.popScope(); + return true; + }, + extractImport: (node, source) => { + const importText = source.substring(node.startIndex, node.endIndex).trim(); + const systemLib = node.namedChildren.find((c: SyntaxNode) => c.type === 'system_lib_string'); + if (systemLib) { + return { moduleName: getNodeText(systemLib, source).replace(/^<|>$/g, ''), signature: importText }; + } + const stringLiteral = node.namedChildren.find((c: SyntaxNode) => c.type === 'string_literal'); + if (stringLiteral) { + const stringContent = stringLiteral.namedChildren.find((c: SyntaxNode) => c.type === 'string_content'); + if (stringContent) { + return { moduleName: getNodeText(stringContent, source), signature: importText }; + } + } + return null; + }, +}; diff --git a/src/extraction/tree-sitter-types.ts b/src/extraction/tree-sitter-types.ts index c3a6b94e8..d7d5a45e3 100644 --- a/src/extraction/tree-sitter-types.ts +++ b/src/extraction/tree-sitter-types.ts @@ -120,6 +120,12 @@ export interface LanguageExtractor { // --- Existing hooks --- + /** Override symbol name extraction (e.g. ObjC multi-part selectors). */ + resolveName?: (node: SyntaxNode, source: string) => string | undefined; + + /** Extract property name when the generic name walk fails (e.g. ObjC @property). */ + extractPropertyName?: (node: SyntaxNode, source: string) => string | null; + /** Extract signature from node */ getSignature?: (node: SyntaxNode, source: string) => string | undefined; /** Extract visibility from node */ diff --git a/src/extraction/tree-sitter.ts b/src/extraction/tree-sitter.ts index 99c7f9aaa..6766925d2 100644 --- a/src/extraction/tree-sitter.ts +++ b/src/extraction/tree-sitter.ts @@ -35,6 +35,9 @@ export { generateNodeId } from './tree-sitter-helpers'; * Extract the name from a node based on language */ function extractName(node: SyntaxNode, source: string, extractor: LanguageExtractor): string { + const hookName = extractor.resolveName?.(node, source); + if (hookName) return hookName; + // Try field name first const nameNode = getChildByField(node, extractor.nameField); if (nameNode) { @@ -893,12 +896,12 @@ export class TreeSitterExtractor { const visibility = this.extractor.getVisibility?.(node); const isStatic = this.extractor.isStatic?.(node) ?? false; - // Property name is a direct identifier child - const nameNode = getChildByField(node, 'name') - || node.namedChildren.find(c => c.type === 'identifier'); - if (!nameNode) return; - - const name = getNodeText(nameNode, this.source); + const hookName = this.extractor.extractPropertyName?.(node, this.source); + const nameNode = hookName + ? null + : getChildByField(node, 'name') || node.namedChildren.find(c => c.type === 'identifier'); + const name = hookName ?? (nameNode ? getNodeText(nameNode, this.source) : null); + if (!name) return; // Get property type from the type child (first named child that isn't modifier or identifier) const typeNode = node.namedChildren.find( @@ -1463,6 +1466,23 @@ export class TreeSitterExtractor { calleeName = `${receiverName}.${methodName}`; } } + } else if (node.type === 'message_expression') { + const methodField = getChildByField(node, 'method'); + if (methodField) { + const methodName = getNodeText(methodField, this.source); + const receiverField = getChildByField(node, 'receiver'); + const SKIP_RECEIVERS = new Set(['self', 'super']); + if (receiverField && receiverField.type !== 'message_expression') { + const receiverName = getNodeText(receiverField, this.source); + if (receiverName && !SKIP_RECEIVERS.has(receiverName)) { + calleeName = `${receiverName}.${methodName}`; + } else { + calleeName = methodName; + } + } else { + calleeName = methodName; + } + } } else { const func = getChildByField(node, 'function') || node.namedChild(0); @@ -1770,6 +1790,42 @@ export class TreeSitterExtractor { * Extract inheritance relationships */ private extractInheritance(node: SyntaxNode, classId: string): void { + // Objective-C @interface MyClass : NSObject + if (node.type === 'class_interface') { + const superclass = getChildByField(node, 'superclass'); + if (superclass) { + const name = getNodeText(superclass, this.source); + this.unresolvedReferences.push({ + fromNodeId: classId, + referenceName: name, + referenceKind: 'extends', + line: superclass.startPosition.row + 1, + column: superclass.startPosition.column, + }); + } + for (let j = 0; j < node.namedChildCount; j++) { + const argList = node.namedChild(j); + if (argList?.type !== 'parameterized_arguments') continue; + for (let k = 0; k < argList.namedChildCount; k++) { + const typeName = argList.namedChild(k); + if (!typeName) continue; + const typeId = typeName.namedChildren.find( + (c: SyntaxNode) => c.type === 'type_identifier' || c.type === 'identifier' + ); + if (!typeId) continue; + const protocolName = getNodeText(typeId, this.source); + this.unresolvedReferences.push({ + fromNodeId: classId, + referenceName: protocolName, + referenceKind: 'implements', + line: typeId.startPosition.row + 1, + column: typeId.startPosition.column, + }); + } + } + return; + } + // Look for extends/implements clauses for (let i = 0; i < node.namedChildCount; i++) { const child = node.namedChild(i); diff --git a/src/resolution/import-resolver.ts b/src/resolution/import-resolver.ts index 5b41a57db..5622650ec 100644 --- a/src/resolution/import-resolver.ts +++ b/src/resolution/import-resolver.ts @@ -24,6 +24,7 @@ const EXTENSION_RESOLUTION: Record = { csharp: ['.cs'], php: ['.php'], ruby: ['.rb'], + objc: ['.h', '.m', '.mm'], }; /** diff --git a/src/types.ts b/src/types.ts index 0168665d2..8ce0c3589 100644 --- a/src/types.ts +++ b/src/types.ts @@ -87,6 +87,7 @@ export const LANGUAGES = [ 'scala', 'lua', 'luau', + 'objc', 'yaml', 'twig', 'unknown',