revert: 撤销 token 估算值配置

Copilot · BukeLy · Copilot · commit e2cb6e7a0fdf · 2025-12-15T10:03:40.000Z
根据用户反馈，这些估算值应该通过 LLM_REQUESTS_PER_MINUTE 和 LLM_TOKENS_PER_MINUTE 自动计算，不需要额外的配置项。

rate_limiter.py 中已有 avg_tokens_map 用于自动计算并发数。

Co-authored-by: BukeLy &lt;19304666+BukeLy@users.noreply.github.com&gt;
diff --git a/env.example b/env.example
@@ -31,14 +31,6 @@ LLM_TOKENS_PER_MINUTE=40000        # 每分钟最大令牌数（包含输入+输
 #                                  # 推荐：不设置此项，让系统自动计算以确保不超过 TPM/RPM 限制
 #                                  # 计算示例：min(800, 40000/3500) = min(800, 11) = 11 并发
 
-# --- LLM Token 估算配置（用于速率限制） ---
-# 估算输出 tokens 数量，用于速率限制计算
-# 如果估算过高，并发会受限；如果估算不足，可能触发 429 错误
-# LLM_ESTIMATED_OUTPUT_TOKENS=3000  # LLM 输出估算（实体提取约 3000 tokens，默认 3000）
-# LLM_VLM_ESTIMATED_OUTPUT_TOKENS=500  # VLM 输出估算（图片描述较短，默认 500）
-# LLM_VLM_MAX_TOKENS=500            # VLM API 最大输出 tokens（默认 500）
-# LLM_VLM_IMAGE_TOKENS_ESTIMATE=200 # VLM 图片输入估算 tokens（默认 200）
-
 # ====== Embedding 配置 ======
 # 用于向量化文本，支持语义检索
 EMBEDDING_BASE_URL="https://api.siliconflow.cn/v1"
@@ -150,9 +142,6 @@ DS_OCR_REQUESTS_PER_MINUTE=800     # 每分钟最大请求数（默认 800）
 DS_OCR_TOKENS_PER_MINUTE=40000     # 每分钟最大令牌数（默认 40000）
 # DS_OCR_MAX_ASYNC=8               # 【可选】全局默认并发数（未设置时使用硬编码默认值 8）
 
-# --- DeepSeek-OCR Token 估算配置（用于速率限制） ---
-# DS_OCR_IMAGE_TOKENS_ESTIMATE=1000  # 图片输入估算 tokens（默认 1000）
-
 # ====== 智能 Parser 选择器配置（v2.0） ======
 # 基于文档复杂度自动选择最优 Parser 和模式
 
diff --git a/src/config.py b/src/config.py
@@ -30,26 +30,6 @@ class LLMConfig(BaseSettings):
     tokens_per_minute: int = Field(default=40000, description="Maximum tokens per minute (input + output)")
     max_async: Optional[int] = Field(default=None, description="Maximum concurrent requests (optional, auto-calculated if not set)")
 
-    # Token estimation for rate limiting (LLM)
-    estimated_output_tokens: int = Field(
-        default=3000,
-        description="Estimated output tokens for LLM calls (entity extraction typically outputs ~3000 tokens)"
-    )
-
-    # Token estimation for rate limiting (VLM)
-    vlm_estimated_output_tokens: int = Field(
-        default=500,
-        description="Estimated output tokens for VLM calls (image descriptions are typically shorter)"
-    )
-    vlm_max_tokens: int = Field(
-        default=500,
-        description="Maximum output tokens for VLM API calls"
-    )
-    vlm_image_tokens_estimate: int = Field(
-        default=200,
-        description="Estimated tokens for image input in VLM calls"
-    )
-
     class Config:
         env_prefix = "LLM_"
         env_file = ".env"
@@ -169,12 +149,6 @@ class DeepSeekOCRConfig(BaseSettings):
     tokens_per_minute: int = Field(default=40000, description="Maximum tokens per minute")
     max_async: Optional[int] = Field(default=None, description="Maximum concurrent requests (optional, auto-calculated if not set)")
 
-    # Token estimation for rate limiting
-    image_tokens_estimate: int = Field(
-        default=1000,
-        description="Estimated tokens for image input in OCR calls"
-    )
-
     class Config:
         env_prefix = "DS_OCR_"
         env_file = ".env"
diff --git a/src/deepseek_ocr_client.py b/src/deepseek_ocr_client.py
@@ -57,9 +57,6 @@ class DSSeekConfig:
     fallback_mode: str = field(default_factory=lambda: config.ds_ocr.fallback_mode)
     min_output_threshold: int = field(default_factory=lambda: config.ds_ocr.min_output_threshold)
 
-    # Token 估算配置
-    image_tokens_estimate: int = field(default_factory=lambda: config.ds_ocr.image_tokens_estimate)
-
     def __post_init__(self):
         """验证配置"""
         if not self.api_key:
@@ -284,8 +281,8 @@ async def _call_api(self, img_base64: str, prompt: str) -> str:
         Raises:
             Exception: API 调用失败时抛出异常
         """
-        # 估算 tokens（提示词 + 图片 + 输出）
-        estimated_tokens = len(prompt) // 3 + self.config.image_tokens_estimate + self.config.max_tokens
+        # 估算 tokens（提示词 + 图片约 1000 tokens + 输出约 2000 tokens）
+        estimated_tokens = len(prompt) // 3 + 1000 + self.config.max_tokens
 
         # 获取速率限制许可
         await self.rate_limiter.rate_limiter.acquire(estimated_tokens)
@@ -351,8 +348,8 @@ def _call_api_sync(self, img_base64: str, prompt: str) -> str:
         """
         import asyncio
 
-        # 估算 tokens（提示词 + 图片 + 输出）
-        estimated_tokens = len(prompt) // 3 + self.config.image_tokens_estimate + self.config.max_tokens
+        # 估算 tokens（提示词 + 图片约 1000 tokens + 输出约 2000 tokens）
+        estimated_tokens = len(prompt) // 3 + 1000 + self.config.max_tokens
 
         # 在同步函数中调用异步速率限制器
         try:
diff --git a/src/multi_tenant.py b/src/multi_tenant.py
@@ -64,12 +64,6 @@ def __init__(
         self.max_async = config.llm.max_async
         self.vlm_timeout = config.llm.vlm_timeout
 
-        # Token 估算配置
-        self.llm_estimated_output_tokens = config.llm.estimated_output_tokens
-        self.vlm_estimated_output_tokens = config.llm.vlm_estimated_output_tokens
-        self.vlm_max_tokens = config.llm.vlm_max_tokens
-        self.vlm_image_tokens_estimate = config.llm.vlm_image_tokens_estimate
-
         # 存储配置
         self.use_external_storage = config.storage.use_external
         self.kv_storage = config.storage.kv_storage
@@ -112,14 +106,11 @@ def _create_llm_func(self, llm_config: Dict):
         # 获取 rate_limiter 实际使用的并发数（将用于 LightRAG）
         actual_max_concurrent = rate_limiter.max_concurrent
 
-        # 获取 token 估算配置（支持租户覆盖）
-        llm_estimated_output = llm_config.get("estimated_output_tokens", self.llm_estimated_output_tokens)
-
         def llm_model_func(prompt, **kwargs):
             # 精确计算输入 tokens（使用 tiktoken）
             input_tokens = count_tokens(prompt, model="cl100k_base")
             # 保守估算输出 tokens（实体提取通常输出较长）
-            estimated_output = llm_estimated_output  # 从配置读取
+            estimated_output = 3000  # 50 entities + 46 relations ≈ 3000 tokens
             estimated_tokens = input_tokens + estimated_output
 
             # Debug: 输出 token 计数
@@ -304,11 +295,6 @@ def _create_vision_model_func(self, llm_config: Dict):
             tokens_per_minute=tokens_per_minute
         )
 
-        # 获取 VLM token 估算配置（支持租户覆盖）
-        vlm_image_tokens = llm_config.get("vlm_image_tokens_estimate", self.vlm_image_tokens_estimate)
-        vlm_estimated_output = llm_config.get("vlm_estimated_output_tokens", self.vlm_estimated_output_tokens)
-        vlm_max_tokens = llm_config.get("vlm_max_tokens", self.vlm_max_tokens)
-
         async def seed_vision_model_func(prompt: str, image_data: str, system_prompt: str) -> str:
             """
             使用 VLM 理解图片内容（带速率限制）
@@ -323,8 +309,8 @@ async def seed_vision_model_func(prompt: str, image_data: str, system_prompt: st
             """
             # 精确计算 tokens（使用 tiktoken）
             prompt_tokens = count_tokens(prompt, model="cl100k_base")
-            image_tokens = vlm_image_tokens  # 从配置读取
-            estimated_output = vlm_estimated_output  # 从配置读取
+            image_tokens = 200  # 图片约 200 tokens（固定估算）
+            estimated_output = 500  # VLM 输出通常较短
             estimated_tokens = prompt_tokens + image_tokens + estimated_output
 
             # Debug: 输出 token 计数
@@ -350,7 +336,7 @@ async def seed_vision_model_func(prompt: str, image_data: str, system_prompt: st
                             ]
                         }
                     ],
-                    "max_tokens": vlm_max_tokens,  # 从配置读取
+                    "max_tokens": 500,
                     "temperature": 0.1
                 }