Skip to content

Commit f6410d3

Browse files
canghecanghe
authored andcommitted
Add --media flag to resolve media file paths for images/files/videos (v0.2.4)
1 parent 86590e7 commit f6410d3

6 files changed

Lines changed: 143 additions & 17 deletions

File tree

npm/platforms/darwin-arm64/package.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
{
22
"name": "@canghe_ai/wechat-cli-darwin-arm64",
3-
"version": "0.2.3",
3+
"version": "0.2.4",
44
"description": "wechat-cli binary for macOS arm64",
55
"os": ["darwin"],
66
"cpu": ["arm64"],

npm/wechat-cli/package.json

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
{
22
"name": "@canghe_ai/wechat-cli",
3-
"version": "0.2.3",
3+
"version": "0.2.4",
44
"description": "WeChat data query CLI — chat history, contacts, sessions, favorites, and more. Designed for LLM integration.",
55
"bin": {
66
"wechat-cli": "bin/wechat-cli.js"
@@ -13,7 +13,7 @@
1313
"install.js"
1414
],
1515
"optionalDependencies": {
16-
"@canghe_ai/wechat-cli-darwin-arm64": "0.2.3"
16+
"@canghe_ai/wechat-cli-darwin-arm64": "0.2.4"
1717
},
1818
"engines": {
1919
"node": ">=14"

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
44

55
[project]
66
name = "wechat-cli"
7-
version = "0.2.3"
7+
version = "0.2.4"
88
description = "WeChat data query CLI for LLMs"
99
requires-python = ">=3.10"
1010
dependencies = [

wechat_cli/commands/history.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,8 +22,9 @@
2222
@click.option("--end-time", default="", help="结束时间 YYYY-MM-DD [HH:MM[:SS]]")
2323
@click.option("--format", "fmt", default="json", type=click.Choice(["json", "text"]), help="输出格式")
2424
@click.option("--type", "msg_type", default=None, type=click.Choice(MSG_TYPE_NAMES), help="消息类型过滤")
25+
@click.option("--media", is_flag=True, help="解析媒体文件路径(图片/文件/视频/语音)")
2526
@click.pass_context
26-
def history(ctx, chat_name, limit, offset, start_time, end_time, fmt, msg_type):
27+
def history(ctx, chat_name, limit, offset, start_time, end_time, fmt, msg_type, media):
2728
"""获取指定聊天的消息记录
2829
2930
\b
@@ -55,7 +56,7 @@ def history(ctx, chat_name, limit, offset, start_time, end_time, fmt, msg_type):
5556
lines, failures = collect_chat_history(
5657
chat_ctx, names, app.display_name_fn,
5758
start_ts=start_ts, end_ts=end_ts, limit=limit, offset=offset,
58-
msg_type_filter=type_filter,
59+
msg_type_filter=type_filter, resolve_media=media, db_dir=app.db_dir,
5960
)
6061

6162
if fmt == 'json':

wechat_cli/core/messages.py

Lines changed: 135 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -149,7 +149,7 @@ def _parse_int(value, fallback=0):
149149
return fallback
150150

151151

152-
def _format_app_message_text(content, local_type, is_group, chat_username, chat_display_name, names, _display_name_fn):
152+
def _format_app_message_text(content, local_type, is_group, chat_username, chat_display_name, names, _display_name_fn, resolve_media=False, db_dir=None, create_time_ts=0):
153153
if not content or '<appmsg' not in content:
154154
return None
155155
_, sub_type = _split_msg_type(local_type)
@@ -177,6 +177,22 @@ def _format_app_message_text(content, local_type, is_group, chat_username, chat_
177177
quote_text += f"\n{prefix}{ref_content}"
178178
return quote_text
179179
if app_type == 6:
180+
# Try to resolve file path
181+
if resolve_media and db_dir:
182+
msg_dir = os.path.join(os.path.dirname(db_dir), "msg", "file")
183+
if title and os.path.isdir(msg_dir):
184+
from datetime import datetime as _dt
185+
dt = _dt.fromtimestamp(create_time_ts) if create_time_ts else None
186+
if dt:
187+
file_dir = os.path.join(msg_dir, dt.strftime("%Y-%m"))
188+
if os.path.isdir(file_dir):
189+
target = os.path.join(file_dir, title)
190+
if os.path.isfile(target):
191+
return f"[文件] {title}\n {target}"
192+
# Fuzzy match
193+
for f in os.listdir(file_dir):
194+
if title in f or f in title:
195+
return f"[文件] {title}\n {os.path.join(file_dir, f)}"
180196
return f"[文件] {title}" if title else "[文件]"
181197
if app_type == 5:
182198
return f"[链接] {title}" if title else "[链接]"
@@ -206,18 +222,125 @@ def _format_voip_message_text(content):
206222
return f"[通话] {status_map.get(raw_text, raw_text)}"
207223

208224

209-
def _format_message_text(local_id, local_type, content, is_group, chat_username, chat_display_name, names, display_name_fn):
225+
def _resolve_media_path(db_dir, content, local_type, create_time_ts, chat_username=None):
226+
"""尝试解析媒体文件在磁盘上的路径。
227+
228+
Args:
229+
db_dir: 微信 db_storage 目录
230+
content: 解压后的 message_content
231+
local_type: 消息类型
232+
create_time_ts: 消息时间戳
233+
chat_username: 聊天对象 username(用于定位 attach 子目录)
234+
235+
Returns:
236+
(path, exists) 元组,path 为 None 表示无法解析
237+
"""
238+
base_type = local_type & 0xFFFFFFFF
239+
wechat_base = os.path.dirname(db_dir)
240+
msg_dir = os.path.join(wechat_base, "msg")
241+
if not os.path.isdir(msg_dir):
242+
return None, False
243+
244+
from datetime import datetime
245+
dt = datetime.fromtimestamp(create_time_ts)
246+
date_prefix = dt.strftime("%Y-%m")
247+
248+
# 文件消息 (type 49, sub 6): msg/file/YYYY-MM/filename
249+
if base_type == 49 and content:
250+
root = _parse_xml_root(content)
251+
if root is not None:
252+
appmsg = root.find('.//appmsg')
253+
if appmsg is not None:
254+
app_type = _parse_int((appmsg.findtext('type') or '').strip())
255+
if app_type == 6:
256+
title = (appmsg.findtext('title') or '').strip()
257+
if title:
258+
file_dir = os.path.join(msg_dir, "file", date_prefix)
259+
if os.path.isdir(file_dir):
260+
# 精确匹配文件名
261+
target = os.path.join(file_dir, title)
262+
if os.path.isfile(target):
263+
return target, True
264+
# 模糊匹配(文件名可能有细微差异)
265+
for f in os.listdir(file_dir):
266+
if title in f or f in title:
267+
return os.path.join(file_dir, f), True
268+
return None, False
269+
270+
# 图片消息 (type 3): msg/attach/<hash>/YYYY-MM/Img/*.dat
271+
# 视频/语音消息: msg/video/YYYY-MM/ 或 msg/attach/
272+
if base_type in (3, 34, 43):
273+
# 搜索 attach 目录下对应月份的文件
274+
attach_dir = os.path.join(msg_dir, "attach")
275+
if not os.path.isdir(attach_dir):
276+
return None, False
277+
278+
# 尝试用 chat_username 的 MD5 匹配 attach 子目录
279+
target_hash = None
280+
if chat_username:
281+
h = hashlib.md5(chat_username.encode()).hexdigest()
282+
candidate = os.path.join(attach_dir, h)
283+
if os.path.isdir(candidate):
284+
target_hash = h
285+
286+
# 限定搜索范围:目标目录或所有目录
287+
search_dirs = [target_hash] if target_hash else [
288+
d for d in os.listdir(attach_dir)
289+
if os.path.isdir(os.path.join(attach_dir, d))
290+
]
291+
292+
sub_dir_name = "Img" if base_type == 3 else ("Video" if base_type == 43 else "Voice")
293+
294+
for d in search_dirs:
295+
sub = os.path.join(attach_dir, d, date_prefix, sub_dir_name)
296+
if os.path.isdir(sub):
297+
files = [f for f in os.listdir(sub) if not f.endswith("_h.dat")]
298+
if files:
299+
# 返回目录路径(具体是哪个文件无法从 XML 精确匹配)
300+
sample = files[0]
301+
return os.path.join(sub, sample), True
302+
303+
# 视频:也检查 msg/video/
304+
if base_type == 43:
305+
video_dir = os.path.join(msg_dir, "video", date_prefix)
306+
if os.path.isdir(video_dir):
307+
thumbs = [f for f in os.listdir(video_dir) if f.endswith("_thumb.jpg")]
308+
if thumbs:
309+
return os.path.join(video_dir, thumbs[0]), True
310+
311+
return None, False
312+
313+
314+
def _format_message_text(local_id, local_type, content, is_group, chat_username, chat_display_name, names, display_name_fn, db_dir=None, create_time_ts=0, resolve_media=False):
210315
sender, text = _parse_message_content(content, local_type, is_group)
211316
base_type, _ = _split_msg_type(local_type)
317+
318+
media_path = None
319+
media_exists = False
320+
if resolve_media and db_dir and content:
321+
try:
322+
media_path, media_exists = _resolve_media_path(
323+
db_dir, content, local_type, create_time_ts, chat_username
324+
)
325+
except Exception:
326+
pass
327+
212328
if base_type == 3:
213-
text = f"[图片] (local_id={local_id})"
329+
if media_path:
330+
tag = f"[图片] {media_path}"
331+
if not media_exists:
332+
tag += " (文件不存在)"
333+
else:
334+
tag = f"[图片] (local_id={local_id})"
335+
text = tag
214336
elif base_type == 47:
215337
text = "[表情]"
216338
elif base_type == 50:
217339
text = _format_voip_message_text(text) or "[通话]"
218340
elif base_type == 49:
219341
text = _format_app_message_text(
220-
text, local_type, is_group, chat_username, chat_display_name, names, display_name_fn
342+
text, local_type, is_group, chat_username, chat_display_name, names, display_name_fn,
343+
resolve_media=resolve_media, db_dir=db_dir, create_time_ts=create_time_ts
221344
) or "[链接/文件]"
222345
elif base_type != 1:
223346
type_label = format_msg_type(local_type)
@@ -387,14 +510,15 @@ def _page_ranked_entries(entries, limit, offset):
387510

388511
# ---- 构建行 ----
389512

390-
def _build_history_line(row, ctx, names, id_to_username, display_name_fn):
513+
def _build_history_line(row, ctx, names, id_to_username, display_name_fn, resolve_media=False, db_dir=None):
391514
local_id, local_type, create_time, real_sender_id, content, ct = row
392515
time_str = datetime.fromtimestamp(create_time).strftime('%Y-%m-%d %H:%M')
393516
content = decompress_content(content, ct)
394517
if content is None:
395518
content = '(无法解压)'
396519
sender, text = _format_message_text(
397-
local_id, local_type, content, ctx['is_group'], ctx['username'], ctx['display_name'], names, display_name_fn
520+
local_id, local_type, content, ctx['is_group'], ctx['username'], ctx['display_name'], names, display_name_fn,
521+
db_dir=db_dir, create_time_ts=create_time, resolve_media=resolve_media,
398522
)
399523
sender_label = _resolve_sender_label(
400524
real_sender_id, sender, ctx['is_group'], ctx['username'], ctx['display_name'], names, id_to_username, display_name_fn
@@ -404,13 +528,14 @@ def _build_history_line(row, ctx, names, id_to_username, display_name_fn):
404528
return create_time, f'[{time_str}] {text}'
405529

406530

407-
def _build_search_entry(row, ctx, names, id_to_username, display_name_fn):
531+
def _build_search_entry(row, ctx, names, id_to_username, display_name_fn, resolve_media=False, db_dir=None):
408532
local_id, local_type, create_time, real_sender_id, content, ct = row
409533
content = decompress_content(content, ct)
410534
if content is None:
411535
return None
412536
sender, text = _format_message_text(
413-
local_id, local_type, content, ctx['is_group'], ctx['username'], ctx['display_name'], names, display_name_fn
537+
local_id, local_type, content, ctx['is_group'], ctx['username'], ctx['display_name'], names, display_name_fn,
538+
db_dir=db_dir, create_time_ts=create_time, resolve_media=resolve_media,
414539
)
415540
if text and len(text) > 300:
416541
text = text[:300] + '...'
@@ -427,7 +552,7 @@ def _build_search_entry(row, ctx, names, id_to_username, display_name_fn):
427552

428553
# ---- 聊天记录查询 ----
429554

430-
def collect_chat_history(ctx, names, display_name_fn, start_ts=None, end_ts=None, limit=20, offset=0, msg_type_filter=None):
555+
def collect_chat_history(ctx, names, display_name_fn, start_ts=None, end_ts=None, limit=20, offset=0, msg_type_filter=None, resolve_media=False, db_dir=None):
431556
collected = []
432557
failures = []
433558
candidate_limit = _candidate_page_size(limit, offset)
@@ -446,7 +571,7 @@ def collect_chat_history(ctx, names, display_name_fn, start_ts=None, end_ts=None
446571
fetch_offset += len(rows)
447572
for row in rows:
448573
try:
449-
collected.append(_build_history_line(row, table_ctx, names, id_to_username, display_name_fn))
574+
collected.append(_build_history_line(row, table_ctx, names, id_to_username, display_name_fn, resolve_media=resolve_media, db_dir=db_dir))
450575
except Exception as e:
451576
failures.append(f"local_id={row[0]}: {e}")
452577
if len(collected) - before >= candidate_limit:

wechat_cli/main.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66

77
from .core.context import AppContext
88

9-
_VERSION = "0.2.3"
9+
_VERSION = "0.2.4"
1010

1111

1212
@click.group()

0 commit comments

Comments
 (0)