@@ -149,7 +149,7 @@ def _parse_int(value, fallback=0):
149149 return fallback
150150
151151
152- def _format_app_message_text (content , local_type , is_group , chat_username , chat_display_name , names , _display_name_fn ):
152+ def _format_app_message_text (content , local_type , is_group , chat_username , chat_display_name , names , _display_name_fn , resolve_media = False , db_dir = None , create_time_ts = 0 ):
153153 if not content or '<appmsg' not in content :
154154 return None
155155 _ , sub_type = _split_msg_type (local_type )
@@ -177,6 +177,22 @@ def _format_app_message_text(content, local_type, is_group, chat_username, chat_
177177 quote_text += f"\n ↳ { prefix } { ref_content } "
178178 return quote_text
179179 if app_type == 6 :
180+ # Try to resolve file path
181+ if resolve_media and db_dir :
182+ msg_dir = os .path .join (os .path .dirname (db_dir ), "msg" , "file" )
183+ if title and os .path .isdir (msg_dir ):
184+ from datetime import datetime as _dt
185+ dt = _dt .fromtimestamp (create_time_ts ) if create_time_ts else None
186+ if dt :
187+ file_dir = os .path .join (msg_dir , dt .strftime ("%Y-%m" ))
188+ if os .path .isdir (file_dir ):
189+ target = os .path .join (file_dir , title )
190+ if os .path .isfile (target ):
191+ return f"[文件] { title } \n { target } "
192+ # Fuzzy match
193+ for f in os .listdir (file_dir ):
194+ if title in f or f in title :
195+ return f"[文件] { title } \n { os .path .join (file_dir , f )} "
180196 return f"[文件] { title } " if title else "[文件]"
181197 if app_type == 5 :
182198 return f"[链接] { title } " if title else "[链接]"
@@ -206,18 +222,125 @@ def _format_voip_message_text(content):
206222 return f"[通话] { status_map .get (raw_text , raw_text )} "
207223
208224
209- def _format_message_text (local_id , local_type , content , is_group , chat_username , chat_display_name , names , display_name_fn ):
225+ def _resolve_media_path (db_dir , content , local_type , create_time_ts , chat_username = None ):
226+ """尝试解析媒体文件在磁盘上的路径。
227+
228+ Args:
229+ db_dir: 微信 db_storage 目录
230+ content: 解压后的 message_content
231+ local_type: 消息类型
232+ create_time_ts: 消息时间戳
233+ chat_username: 聊天对象 username(用于定位 attach 子目录)
234+
235+ Returns:
236+ (path, exists) 元组,path 为 None 表示无法解析
237+ """
238+ base_type = local_type & 0xFFFFFFFF
239+ wechat_base = os .path .dirname (db_dir )
240+ msg_dir = os .path .join (wechat_base , "msg" )
241+ if not os .path .isdir (msg_dir ):
242+ return None , False
243+
244+ from datetime import datetime
245+ dt = datetime .fromtimestamp (create_time_ts )
246+ date_prefix = dt .strftime ("%Y-%m" )
247+
248+ # 文件消息 (type 49, sub 6): msg/file/YYYY-MM/filename
249+ if base_type == 49 and content :
250+ root = _parse_xml_root (content )
251+ if root is not None :
252+ appmsg = root .find ('.//appmsg' )
253+ if appmsg is not None :
254+ app_type = _parse_int ((appmsg .findtext ('type' ) or '' ).strip ())
255+ if app_type == 6 :
256+ title = (appmsg .findtext ('title' ) or '' ).strip ()
257+ if title :
258+ file_dir = os .path .join (msg_dir , "file" , date_prefix )
259+ if os .path .isdir (file_dir ):
260+ # 精确匹配文件名
261+ target = os .path .join (file_dir , title )
262+ if os .path .isfile (target ):
263+ return target , True
264+ # 模糊匹配(文件名可能有细微差异)
265+ for f in os .listdir (file_dir ):
266+ if title in f or f in title :
267+ return os .path .join (file_dir , f ), True
268+ return None , False
269+
270+ # 图片消息 (type 3): msg/attach/<hash>/YYYY-MM/Img/*.dat
271+ # 视频/语音消息: msg/video/YYYY-MM/ 或 msg/attach/
272+ if base_type in (3 , 34 , 43 ):
273+ # 搜索 attach 目录下对应月份的文件
274+ attach_dir = os .path .join (msg_dir , "attach" )
275+ if not os .path .isdir (attach_dir ):
276+ return None , False
277+
278+ # 尝试用 chat_username 的 MD5 匹配 attach 子目录
279+ target_hash = None
280+ if chat_username :
281+ h = hashlib .md5 (chat_username .encode ()).hexdigest ()
282+ candidate = os .path .join (attach_dir , h )
283+ if os .path .isdir (candidate ):
284+ target_hash = h
285+
286+ # 限定搜索范围:目标目录或所有目录
287+ search_dirs = [target_hash ] if target_hash else [
288+ d for d in os .listdir (attach_dir )
289+ if os .path .isdir (os .path .join (attach_dir , d ))
290+ ]
291+
292+ sub_dir_name = "Img" if base_type == 3 else ("Video" if base_type == 43 else "Voice" )
293+
294+ for d in search_dirs :
295+ sub = os .path .join (attach_dir , d , date_prefix , sub_dir_name )
296+ if os .path .isdir (sub ):
297+ files = [f for f in os .listdir (sub ) if not f .endswith ("_h.dat" )]
298+ if files :
299+ # 返回目录路径(具体是哪个文件无法从 XML 精确匹配)
300+ sample = files [0 ]
301+ return os .path .join (sub , sample ), True
302+
303+ # 视频:也检查 msg/video/
304+ if base_type == 43 :
305+ video_dir = os .path .join (msg_dir , "video" , date_prefix )
306+ if os .path .isdir (video_dir ):
307+ thumbs = [f for f in os .listdir (video_dir ) if f .endswith ("_thumb.jpg" )]
308+ if thumbs :
309+ return os .path .join (video_dir , thumbs [0 ]), True
310+
311+ return None , False
312+
313+
314+ def _format_message_text (local_id , local_type , content , is_group , chat_username , chat_display_name , names , display_name_fn , db_dir = None , create_time_ts = 0 , resolve_media = False ):
210315 sender , text = _parse_message_content (content , local_type , is_group )
211316 base_type , _ = _split_msg_type (local_type )
317+
318+ media_path = None
319+ media_exists = False
320+ if resolve_media and db_dir and content :
321+ try :
322+ media_path , media_exists = _resolve_media_path (
323+ db_dir , content , local_type , create_time_ts , chat_username
324+ )
325+ except Exception :
326+ pass
327+
212328 if base_type == 3 :
213- text = f"[图片] (local_id={ local_id } )"
329+ if media_path :
330+ tag = f"[图片] { media_path } "
331+ if not media_exists :
332+ tag += " (文件不存在)"
333+ else :
334+ tag = f"[图片] (local_id={ local_id } )"
335+ text = tag
214336 elif base_type == 47 :
215337 text = "[表情]"
216338 elif base_type == 50 :
217339 text = _format_voip_message_text (text ) or "[通话]"
218340 elif base_type == 49 :
219341 text = _format_app_message_text (
220- text , local_type , is_group , chat_username , chat_display_name , names , display_name_fn
342+ text , local_type , is_group , chat_username , chat_display_name , names , display_name_fn ,
343+ resolve_media = resolve_media , db_dir = db_dir , create_time_ts = create_time_ts
221344 ) or "[链接/文件]"
222345 elif base_type != 1 :
223346 type_label = format_msg_type (local_type )
@@ -387,14 +510,15 @@ def _page_ranked_entries(entries, limit, offset):
387510
388511# ---- 构建行 ----
389512
390- def _build_history_line (row , ctx , names , id_to_username , display_name_fn ):
513+ def _build_history_line (row , ctx , names , id_to_username , display_name_fn , resolve_media = False , db_dir = None ):
391514 local_id , local_type , create_time , real_sender_id , content , ct = row
392515 time_str = datetime .fromtimestamp (create_time ).strftime ('%Y-%m-%d %H:%M' )
393516 content = decompress_content (content , ct )
394517 if content is None :
395518 content = '(无法解压)'
396519 sender , text = _format_message_text (
397- local_id , local_type , content , ctx ['is_group' ], ctx ['username' ], ctx ['display_name' ], names , display_name_fn
520+ local_id , local_type , content , ctx ['is_group' ], ctx ['username' ], ctx ['display_name' ], names , display_name_fn ,
521+ db_dir = db_dir , create_time_ts = create_time , resolve_media = resolve_media ,
398522 )
399523 sender_label = _resolve_sender_label (
400524 real_sender_id , sender , ctx ['is_group' ], ctx ['username' ], ctx ['display_name' ], names , id_to_username , display_name_fn
@@ -404,13 +528,14 @@ def _build_history_line(row, ctx, names, id_to_username, display_name_fn):
404528 return create_time , f'[{ time_str } ] { text } '
405529
406530
407- def _build_search_entry (row , ctx , names , id_to_username , display_name_fn ):
531+ def _build_search_entry (row , ctx , names , id_to_username , display_name_fn , resolve_media = False , db_dir = None ):
408532 local_id , local_type , create_time , real_sender_id , content , ct = row
409533 content = decompress_content (content , ct )
410534 if content is None :
411535 return None
412536 sender , text = _format_message_text (
413- local_id , local_type , content , ctx ['is_group' ], ctx ['username' ], ctx ['display_name' ], names , display_name_fn
537+ local_id , local_type , content , ctx ['is_group' ], ctx ['username' ], ctx ['display_name' ], names , display_name_fn ,
538+ db_dir = db_dir , create_time_ts = create_time , resolve_media = resolve_media ,
414539 )
415540 if text and len (text ) > 300 :
416541 text = text [:300 ] + '...'
@@ -427,7 +552,7 @@ def _build_search_entry(row, ctx, names, id_to_username, display_name_fn):
427552
428553# ---- 聊天记录查询 ----
429554
430- def collect_chat_history (ctx , names , display_name_fn , start_ts = None , end_ts = None , limit = 20 , offset = 0 , msg_type_filter = None ):
555+ def collect_chat_history (ctx , names , display_name_fn , start_ts = None , end_ts = None , limit = 20 , offset = 0 , msg_type_filter = None , resolve_media = False , db_dir = None ):
431556 collected = []
432557 failures = []
433558 candidate_limit = _candidate_page_size (limit , offset )
@@ -446,7 +571,7 @@ def collect_chat_history(ctx, names, display_name_fn, start_ts=None, end_ts=None
446571 fetch_offset += len (rows )
447572 for row in rows :
448573 try :
449- collected .append (_build_history_line (row , table_ctx , names , id_to_username , display_name_fn ))
574+ collected .append (_build_history_line (row , table_ctx , names , id_to_username , display_name_fn , resolve_media = resolve_media , db_dir = db_dir ))
450575 except Exception as e :
451576 failures .append (f"local_id={ row [0 ]} : { e } " )
452577 if len (collected ) - before >= candidate_limit :
0 commit comments