Skip to content

Commit e2cb6e7

Browse files
CopilotBukeLy
andcommitted
revert: 撤销 token 估算值配置
根据用户反馈,这些估算值应该通过 LLM_REQUESTS_PER_MINUTE 和 LLM_TOKENS_PER_MINUTE 自动计算,不需要额外的配置项。 rate_limiter.py 中已有 avg_tokens_map 用于自动计算并发数。 Co-authored-by: BukeLy <19304666+BukeLy@users.noreply.github.com>
1 parent b94b318 commit e2cb6e7

4 files changed

Lines changed: 8 additions & 62 deletions

File tree

env.example

Lines changed: 0 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -31,14 +31,6 @@ LLM_TOKENS_PER_MINUTE=40000 # 每分钟最大令牌数(包含输入+输
3131
# # 推荐:不设置此项,让系统自动计算以确保不超过 TPM/RPM 限制
3232
# # 计算示例:min(800, 40000/3500) = min(800, 11) = 11 并发
3333

34-
# --- LLM Token 估算配置(用于速率限制) ---
35-
# 估算输出 tokens 数量,用于速率限制计算
36-
# 如果估算过高,并发会受限;如果估算不足,可能触发 429 错误
37-
# LLM_ESTIMATED_OUTPUT_TOKENS=3000 # LLM 输出估算(实体提取约 3000 tokens,默认 3000)
38-
# LLM_VLM_ESTIMATED_OUTPUT_TOKENS=500 # VLM 输出估算(图片描述较短,默认 500)
39-
# LLM_VLM_MAX_TOKENS=500 # VLM API 最大输出 tokens(默认 500)
40-
# LLM_VLM_IMAGE_TOKENS_ESTIMATE=200 # VLM 图片输入估算 tokens(默认 200)
41-
4234
# ====== Embedding 配置 ======
4335
# 用于向量化文本,支持语义检索
4436
EMBEDDING_BASE_URL="https://api.siliconflow.cn/v1"
@@ -150,9 +142,6 @@ DS_OCR_REQUESTS_PER_MINUTE=800 # 每分钟最大请求数(默认 800)
150142
DS_OCR_TOKENS_PER_MINUTE=40000 # 每分钟最大令牌数(默认 40000)
151143
# DS_OCR_MAX_ASYNC=8 # 【可选】全局默认并发数(未设置时使用硬编码默认值 8)
152144

153-
# --- DeepSeek-OCR Token 估算配置(用于速率限制) ---
154-
# DS_OCR_IMAGE_TOKENS_ESTIMATE=1000 # 图片输入估算 tokens(默认 1000)
155-
156145
# ====== 智能 Parser 选择器配置(v2.0) ======
157146
# 基于文档复杂度自动选择最优 Parser 和模式
158147

src/config.py

Lines changed: 0 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -30,26 +30,6 @@ class LLMConfig(BaseSettings):
3030
tokens_per_minute: int = Field(default=40000, description="Maximum tokens per minute (input + output)")
3131
max_async: Optional[int] = Field(default=None, description="Maximum concurrent requests (optional, auto-calculated if not set)")
3232

33-
# Token estimation for rate limiting (LLM)
34-
estimated_output_tokens: int = Field(
35-
default=3000,
36-
description="Estimated output tokens for LLM calls (entity extraction typically outputs ~3000 tokens)"
37-
)
38-
39-
# Token estimation for rate limiting (VLM)
40-
vlm_estimated_output_tokens: int = Field(
41-
default=500,
42-
description="Estimated output tokens for VLM calls (image descriptions are typically shorter)"
43-
)
44-
vlm_max_tokens: int = Field(
45-
default=500,
46-
description="Maximum output tokens for VLM API calls"
47-
)
48-
vlm_image_tokens_estimate: int = Field(
49-
default=200,
50-
description="Estimated tokens for image input in VLM calls"
51-
)
52-
5333
class Config:
5434
env_prefix = "LLM_"
5535
env_file = ".env"
@@ -169,12 +149,6 @@ class DeepSeekOCRConfig(BaseSettings):
169149
tokens_per_minute: int = Field(default=40000, description="Maximum tokens per minute")
170150
max_async: Optional[int] = Field(default=None, description="Maximum concurrent requests (optional, auto-calculated if not set)")
171151

172-
# Token estimation for rate limiting
173-
image_tokens_estimate: int = Field(
174-
default=1000,
175-
description="Estimated tokens for image input in OCR calls"
176-
)
177-
178152
class Config:
179153
env_prefix = "DS_OCR_"
180154
env_file = ".env"

src/deepseek_ocr_client.py

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -57,9 +57,6 @@ class DSSeekConfig:
5757
fallback_mode: str = field(default_factory=lambda: config.ds_ocr.fallback_mode)
5858
min_output_threshold: int = field(default_factory=lambda: config.ds_ocr.min_output_threshold)
5959

60-
# Token 估算配置
61-
image_tokens_estimate: int = field(default_factory=lambda: config.ds_ocr.image_tokens_estimate)
62-
6360
def __post_init__(self):
6461
"""验证配置"""
6562
if not self.api_key:
@@ -284,8 +281,8 @@ async def _call_api(self, img_base64: str, prompt: str) -> str:
284281
Raises:
285282
Exception: API 调用失败时抛出异常
286283
"""
287-
# 估算 tokens(提示词 + 图片 + 输出
288-
estimated_tokens = len(prompt) // 3 + self.config.image_tokens_estimate + self.config.max_tokens
284+
# 估算 tokens(提示词 + 图片约 1000 tokens + 输出约 2000 tokens
285+
estimated_tokens = len(prompt) // 3 + 1000 + self.config.max_tokens
289286

290287
# 获取速率限制许可
291288
await self.rate_limiter.rate_limiter.acquire(estimated_tokens)
@@ -351,8 +348,8 @@ def _call_api_sync(self, img_base64: str, prompt: str) -> str:
351348
"""
352349
import asyncio
353350

354-
# 估算 tokens(提示词 + 图片 + 输出
355-
estimated_tokens = len(prompt) // 3 + self.config.image_tokens_estimate + self.config.max_tokens
351+
# 估算 tokens(提示词 + 图片约 1000 tokens + 输出约 2000 tokens
352+
estimated_tokens = len(prompt) // 3 + 1000 + self.config.max_tokens
356353

357354
# 在同步函数中调用异步速率限制器
358355
try:

src/multi_tenant.py

Lines changed: 4 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -64,12 +64,6 @@ def __init__(
6464
self.max_async = config.llm.max_async
6565
self.vlm_timeout = config.llm.vlm_timeout
6666

67-
# Token 估算配置
68-
self.llm_estimated_output_tokens = config.llm.estimated_output_tokens
69-
self.vlm_estimated_output_tokens = config.llm.vlm_estimated_output_tokens
70-
self.vlm_max_tokens = config.llm.vlm_max_tokens
71-
self.vlm_image_tokens_estimate = config.llm.vlm_image_tokens_estimate
72-
7367
# 存储配置
7468
self.use_external_storage = config.storage.use_external
7569
self.kv_storage = config.storage.kv_storage
@@ -112,14 +106,11 @@ def _create_llm_func(self, llm_config: Dict):
112106
# 获取 rate_limiter 实际使用的并发数(将用于 LightRAG)
113107
actual_max_concurrent = rate_limiter.max_concurrent
114108

115-
# 获取 token 估算配置(支持租户覆盖)
116-
llm_estimated_output = llm_config.get("estimated_output_tokens", self.llm_estimated_output_tokens)
117-
118109
def llm_model_func(prompt, **kwargs):
119110
# 精确计算输入 tokens(使用 tiktoken)
120111
input_tokens = count_tokens(prompt, model="cl100k_base")
121112
# 保守估算输出 tokens(实体提取通常输出较长)
122-
estimated_output = llm_estimated_output # 从配置读取
113+
estimated_output = 3000 # 50 entities + 46 relations ≈ 3000 tokens
123114
estimated_tokens = input_tokens + estimated_output
124115

125116
# Debug: 输出 token 计数
@@ -304,11 +295,6 @@ def _create_vision_model_func(self, llm_config: Dict):
304295
tokens_per_minute=tokens_per_minute
305296
)
306297

307-
# 获取 VLM token 估算配置(支持租户覆盖)
308-
vlm_image_tokens = llm_config.get("vlm_image_tokens_estimate", self.vlm_image_tokens_estimate)
309-
vlm_estimated_output = llm_config.get("vlm_estimated_output_tokens", self.vlm_estimated_output_tokens)
310-
vlm_max_tokens = llm_config.get("vlm_max_tokens", self.vlm_max_tokens)
311-
312298
async def seed_vision_model_func(prompt: str, image_data: str, system_prompt: str) -> str:
313299
"""
314300
使用 VLM 理解图片内容(带速率限制)
@@ -323,8 +309,8 @@ async def seed_vision_model_func(prompt: str, image_data: str, system_prompt: st
323309
"""
324310
# 精确计算 tokens(使用 tiktoken)
325311
prompt_tokens = count_tokens(prompt, model="cl100k_base")
326-
image_tokens = vlm_image_tokens # 从配置读取
327-
estimated_output = vlm_estimated_output # 从配置读取
312+
image_tokens = 200 # 图片约 200 tokens(固定估算)
313+
estimated_output = 500 # VLM 输出通常较短
328314
estimated_tokens = prompt_tokens + image_tokens + estimated_output
329315

330316
# Debug: 输出 token 计数
@@ -350,7 +336,7 @@ async def seed_vision_model_func(prompt: str, image_data: str, system_prompt: st
350336
]
351337
}
352338
],
353-
"max_tokens": vlm_max_tokens, # 从配置读取
339+
"max_tokens": 500,
354340
"temperature": 0.1
355341
}
356342

0 commit comments

Comments
 (0)