1+ """
2+ Thread-safe tree-sitter parser pool for parallel dependency analysis.
3+ """
4+ import threading
5+ from typing import Dict , Optional
6+ from tree_sitter import Parser , Language
7+ import tree_sitter_javascript
8+ import tree_sitter_typescript
9+ import tree_sitter_java
10+ import tree_sitter_c
11+ import tree_sitter_cpp
12+ import tree_sitter_c_sharp
13+
14+ import logging
15+
16+ logger = logging .getLogger (__name__ )
17+
18+
19+ class ThreadSafeParserPool :
20+ """
21+ Thread-safe pool of tree-sitter parsers for parallel processing.
22+
23+ Each thread gets its own parser instance, but Language objects are shared
24+ since they are thread-safe and expensive to create.
25+ """
26+
27+ def __init__ (self ):
28+ self ._language_cache : Dict [str , Language ] = {}
29+ self ._parser_cache : Dict [int , Dict [str , Parser ]] = {}
30+ self ._lock = threading .Lock ()
31+
32+ # Initialize language objects (thread-safe to share)
33+ self ._init_languages ()
34+
35+ def _init_languages (self ):
36+ """Initialize language objects (shared across threads)."""
37+ try :
38+ # JavaScript
39+ js_lang_capsule = tree_sitter_javascript .language ()
40+ self ._language_cache ['javascript' ] = Language (js_lang_capsule )
41+
42+ # TypeScript
43+ ts_lang_capsule = tree_sitter_typescript .language_typescript ()
44+ self ._language_cache ['typescript' ] = Language (ts_lang_capsule )
45+
46+ # Java
47+ java_lang_capsule = tree_sitter_java .language ()
48+ self ._language_cache ['java' ] = Language (java_lang_capsule )
49+
50+ # C
51+ c_lang_capsule = tree_sitter_c .language ()
52+ self ._language_cache ['c' ] = Language (c_lang_capsule )
53+
54+ # C++
55+ cpp_lang_capsule = tree_sitter_cpp .language ()
56+ self ._language_cache ['cpp' ] = Language (cpp_lang_capsule )
57+
58+ # C#
59+ csharp_lang_capsule = tree_sitter_c_sharp .language ()
60+ self ._language_cache ['csharp' ] = Language (csharp_lang_capsule )
61+
62+ logger .debug (f"Initialized { len (self ._language_cache )} language parsers" )
63+
64+ except Exception as e :
65+ logger .error (f"Failed to initialize tree-sitter languages: { e } " )
66+ raise
67+
68+ def get_parser (self , language : str ) -> Optional [Parser ]:
69+ """
70+ Get a parser instance for the current thread.
71+
72+ Args:
73+ language: Language name ('javascript', 'typescript', etc.)
74+
75+ Returns:
76+ Parser instance for current thread, or None if language not supported
77+ """
78+ thread_id = threading .get_ident ()
79+
80+ with self ._lock :
81+ # Initialize parser cache for this thread if needed
82+ if thread_id not in self ._parser_cache :
83+ self ._parser_cache [thread_id ] = {}
84+
85+ # Create parser for this language if needed
86+ if language not in self ._parser_cache [thread_id ]:
87+ if language not in self ._language_cache :
88+ logger .warning (f"Unsupported language: { language } " )
89+ return None
90+
91+ try :
92+ language_obj = self ._language_cache [language ]
93+ parser = Parser (language_obj )
94+ self ._parser_cache [thread_id ][language ] = parser
95+ logger .debug (f"Created { language } parser for thread { thread_id } " )
96+ except Exception as e :
97+ logger .error (f"Failed to create { language } parser: { e } " )
98+ return None
99+
100+ return self ._parser_cache [thread_id ][language ]
101+
102+ def cleanup_thread (self ):
103+ """Clean up parsers for the current thread."""
104+ thread_id = threading .get_ident ()
105+
106+ with self ._lock :
107+ if thread_id in self ._parser_cache :
108+ del self ._parser_cache [thread_id ]
109+ logger .debug (f"Cleaned up parsers for thread { thread_id } " )
110+
111+
112+ # Global parser pool instance
113+ parser_pool = ThreadSafeParserPool ()
114+
115+
116+ def get_thread_safe_parser (language : str ) -> Optional [Parser ]:
117+ """
118+ Get a thread-safe parser for the specified language.
119+
120+ This is a convenience function that uses the global parser pool.
121+
122+ Args:
123+ language: Language name
124+
125+ Returns:
126+ Parser instance or None if not supported
127+ """
128+ return parser_pool .get_parser (language )
129+
130+
131+ def cleanup_current_thread ():
132+ """Clean up parsers for the current thread."""
133+ parser_pool .cleanup_thread ()
0 commit comments