diff --git a/mcp_server.py b/mcp_server.py
index 9a5fb560..81f95e68 100644
--- a/mcp_server.py
+++ b/mcp_server.py
@@ -89,7 +89,7 @@ MODEL_CONFIGS = {
         "api_key": "",  # 空值
         "base_url": "http://111.62.35.50:8000/v1",
         "model": "deepmoney",
-        "max_tokens": 65536,  # DeepMoney 本地托管，支持 65536
+        "max_tokens": 32768,  # DeepMoney 本地托管，上下文 65536，输出限制 32768
     },
     "gemini-3": {
         "api_key": "",  # 需要配置Google API密钥
@@ -99,8 +99,8 @@ MODEL_CONFIGS = {
     },
 }
 
-# 保持向后兼容的配置（默认使用 deepmoney，本地托管，上下文长）
-KIMI_CONFIG = MODEL_CONFIGS["deepmoney"]
+# 默认 LLM 配置（使用 deepmoney，本地托管，上下文长）
+LLM_CONFIG = MODEL_CONFIGS["deepmoney"]
 DEEPMONEY_CONFIG = MODEL_CONFIGS["deepmoney"]
 
 # ==================== MCP协议数据模型 ====================
@@ -1830,17 +1830,18 @@ TOOL_HANDLERS = {
 # ==================== Agent系统实现 ====================
 
 class MCPAgentIntegrated:
-    """集成版 MCP Agent - 使用 Kimi 和 DeepMoney"""
+    """集成版 MCP Agent - 使用 LLM 进行计划制定和总结"""
 
     def __init__(self):
-        # 初始化 Kimi 客户端（计划制定）
-        self.kimi_client = OpenAI(
-            api_key=KIMI_CONFIG["api_key"],
-            base_url=KIMI_CONFIG["base_url"],
+        # 初始化主 LLM 客户端（计划制定 + 总结）
+        self.llm_client = OpenAI(
+            api_key=LLM_CONFIG["api_key"],
+            base_url=LLM_CONFIG["base_url"],
         )
-        self.kimi_model = KIMI_CONFIG["model"]
+        self.llm_model = LLM_CONFIG["model"]
+        self.llm_max_tokens = LLM_CONFIG.get("max_tokens", 8192)
 
-        # 初始化 DeepMoney 客户端（新闻总结）
+        # 保持 DeepMoney 客户端作为备用
         self.deepmoney_client = OpenAI(
             api_key=DEEPMONEY_CONFIG["api_key"],
             base_url=DEEPMONEY_CONFIG["base_url"],
@@ -1982,8 +1983,8 @@ class MCPAgentIntegrated:
 只返回JSON，不要其他内容。"""
 
     async def create_plan(self, user_query: str, tools: List[dict], chat_history: List[dict] = None) -> ExecutionPlan:
-        """阶段1: 使用 Kimi 创建执行计划（带思考过程和历史上下文）"""
-        logger.info(f"[Planning] Kimi开始制定计划: {user_query}")
+        """阶段1: 使用 LLM 创建执行计划（带思考过程和历史上下文）"""
+        logger.info(f"[Planning] LLM开始制定计划: {user_query}")
 
         messages = [
             {"role": "system", "content": self.get_planning_prompt(tools)},
@@ -2005,12 +2006,12 @@ class MCPAgentIntegrated:
         # 添加当前用户问题
         messages.append({"role": "user", "content": user_query})
 
-        # 使用配置的模型（默认 deepmoney，支持 65536 上下文）
-        response = self.kimi_client.chat.completions.create(
-            model=self.kimi_model,
+        # 使用配置的 LLM 模型
+        response = self.llm_client.chat.completions.create(
+            model=self.llm_model,
             messages=messages,
             temperature=1.0,
-            max_tokens=65536,
+            max_tokens=self.llm_max_tokens,
         )
 
         choice = response.choices[0]
@@ -2020,7 +2021,7 @@ class MCPAgentIntegrated:
         reasoning_content = ""
         if hasattr(message, "reasoning_content"):
             reasoning_content = getattr(message, "reasoning_content")
-            logger.info(f"[Planning] Kimi思考过程: {reasoning_content[:200]}...")
+            logger.info(f"[Planning] LLM思考过程: {reasoning_content[:200]}...")
 
         # 提取计划内容
         plan_json = message.content.strip()
@@ -2085,7 +2086,7 @@ class MCPAgentIntegrated:
                 model=self.deepmoney_model,
                 messages=messages,
                 temperature=0.7,
-                max_tokens=65536,
+                max_tokens=DEEPMONEY_CONFIG.get("max_tokens", 8192),
             )
 
             summary = response.choices[0].message.content
@@ -2170,8 +2171,8 @@ class MCPAgentIntegrated:
         plan: ExecutionPlan,
         step_results: List[StepResult],
     ) -> str:
-        """阶段3: 使用 Kimi 生成最终总结"""
-        logger.info("[Summary] Kimi生成最终总结")
+        """阶段3: 使用 LLM 生成最终总结"""
+        logger.info("[Summary] LLM生成最终总结")
 
         # 收集成功的结果
         successful_results = [r for r in step_results if r.status == "success"]
@@ -2275,11 +2276,11 @@ class MCPAgentIntegrated:
         ]
 
         try:
-            response = self.kimi_client.chat.completions.create(
-                model=self.kimi_model,  # 使用配置的模型（deepmoney）
+            response = self.llm_client.chat.completions.create(
+                model=self.llm_model,
                 messages=messages,
                 temperature=0.7,
-                max_tokens=65536,
+                max_tokens=self.llm_max_tokens,
             )
 
             summary = response.choices[0].message.content
@@ -2310,13 +2311,13 @@ class MCPAgentIntegrated:
             logger.info(f"[Agent] 带有 {len(chat_history)} 条历史消息")
 
         try:
-            # 阶段1: Kimi 制定计划（带历史上下文）
+            # 阶段1: LLM 制定计划（带历史上下文）
             plan = await self.create_plan(user_query, tools, chat_history)
 
             # 阶段2: 执行工具
             step_results = await self.execute_plan(plan, tool_handlers)
 
-            # 阶段3: Kimi 生成总结
+            # 阶段3: LLM 生成总结
             final_summary = await self.generate_final_summary(
                 user_query, plan, step_results
             )
@@ -2333,8 +2334,8 @@ class MCPAgentIntegrated:
                     "failed_steps": len([r for r in step_results if r.status == "failed"]),
                     "total_execution_time": sum(r.execution_time for r in step_results),
                     "model_used": {
-                        "planning": self.kimi_model,
-                        "summarization": self.kimi_model,
+                        "planning": self.llm_model,
+                        "summarization": self.llm_model,
                         "news_summary": self.deepmoney_model,
                     },
                 },
@@ -2366,7 +2367,7 @@ class MCPAgentIntegrated:
                 model=self.deepmoney_model,
                 messages=messages,
                 temperature=0.3,
-                max_tokens=65536,
+                max_tokens=DEEPMONEY_CONFIG.get("max_tokens", 8192),
             )
 
             title = response.choices[0].message.content.strip()
@@ -2411,7 +2412,7 @@ class MCPAgentIntegrated:
         # 将 cookies 存储为实例属性，供工具调用时使用
         self.cookies = cookies or {}
 
-        # 如果传入了自定义模型配置，使用自定义配置，否则使用默认的 Kimi
+        # 如果传入了自定义模型配置，使用自定义配置，否则使用默认 LLM
         if model_config:
             planning_client = OpenAI(
                 api_key=model_config["api_key"],
@@ -2420,8 +2421,8 @@ class MCPAgentIntegrated:
             planning_model = model_config["model"]
             logger.info(f"[Agent Stream] 使用自定义模型: {planning_model}")
         else:
-            planning_client = self.kimi_client
-            planning_model = self.kimi_model
+            planning_client = self.llm_client
+            planning_model = self.llm_model
             logger.info(f"[Agent Stream] 使用默认模型: {planning_model}")
 
         try:
@@ -2457,8 +2458,8 @@ class MCPAgentIntegrated:
 
             try:
                 # 尝试使用选中的模型流式 API
-                # 从模型配置获取 max_tokens，默认 65536（deepmoney）
-                model_max_tokens = model_config.get("max_tokens", 65536) if model_config else 65536
+                # 从模型配置获取 max_tokens，默认 8192
+                model_max_tokens = model_config.get("max_tokens", 8192) if model_config else 8192
                 stream = planning_client.chat.completions.create(
                     model=planning_model,
                     messages=messages,
@@ -2467,7 +2468,7 @@ class MCPAgentIntegrated:
                     stream=True,  # 启用流式输出
                 )
 
-                # 逐块接收 Kimi 的响应
+                # 逐块接收 LLM 的响应
                 for chunk in stream:
                     if chunk.choices[0].delta.content:
                         content_chunk = chunk.choices[0].delta.content
@@ -2489,11 +2490,11 @@ class MCPAgentIntegrated:
                                 "content": reasoning_chunk
                             })
 
-            except Exception as kimi_error:
+            except Exception as llm_error:
                 # 检查是否是内容风控错误（400）
-                error_str = str(kimi_error)
+                error_str = str(llm_error)
                 if "400" in error_str and ("content_filter" in error_str or "high risk" in error_str):
-                    logger.warning(f"[Planning] Kimi 内容风控拒绝，切换到 DeepMoney: {error_str}")
+                    logger.warning(f"[Planning] LLM 内容风控拒绝，切换到 DeepMoney: {error_str}")
                     use_fallback = True
 
                     yield self._format_sse("status", {
@@ -2507,7 +2508,7 @@ class MCPAgentIntegrated:
                             model=self.deepmoney_model,
                             messages=messages,
                             temperature=0.7,
-                            max_tokens=65536,
+                            max_tokens=DEEPMONEY_CONFIG.get("max_tokens", 8192),
                         )
 
                         plan_content = fallback_response.choices[0].message.content
@@ -2522,10 +2523,10 @@ class MCPAgentIntegrated:
 
                     except Exception as fallback_error:
                         logger.error(f"[Planning] DeepMoney 备选方案也失败: {fallback_error}")
-                        raise Exception(f"Kimi 和 DeepMoney 都无法生成计划: {kimi_error}, {fallback_error}")
+                        raise Exception(f"LLM 和 DeepMoney 都无法生成计划: {llm_error}, {fallback_error}")
                 else:
                     # 不是内容风控错误，直接抛出
-                    logger.error(f"[Planning] Kimi 调用失败（非风控原因）: {kimi_error}")
+                    logger.error(f"[Planning] LLM 调用失败（非风控原因）: {llm_error}")
                     raise
 
             # 解析完整的计划
@@ -2627,7 +2628,7 @@ class MCPAgentIntegrated:
                         "execution_time": execution_time,
                     })
 
-            # 阶段3: Kimi 生成总结（流式）
+            # 阶段3: LLM 生成总结（流式）
             yield self._format_sse("status", {"stage": "summarizing", "message": "正在生成最终总结..."})
 
             # 收集成功的结果
@@ -2699,11 +2700,11 @@ class MCPAgentIntegrated:
                 final_summary = ""
 
                 try:
-                    summary_stream = self.kimi_client.chat.completions.create(
-                        model=self.kimi_model,  # 使用配置的模型（deepmoney）
+                    summary_stream = self.llm_client.chat.completions.create(
+                        model=self.llm_model,
                         messages=messages,
                         temperature=0.7,
-                        max_tokens=65536,
+                        max_tokens=self.llm_max_tokens,
                         stream=True,  # 启用流式输出
                     )
 
@@ -2720,11 +2721,11 @@ class MCPAgentIntegrated:
 
                     logger.info("[Summary] 流式总结完成")
 
-                except Exception as kimi_error:
+                except Exception as llm_error:
                     # 检查是否是内容风控错误（400）
-                    error_str = str(kimi_error)
+                    error_str = str(llm_error)
                     if "400" in error_str and ("content_filter" in error_str or "high risk" in error_str):
-                        logger.warning(f"[Summary] Kimi 内容风控拒绝，切换到 DeepMoney: {error_str}")
+                        logger.warning(f"[Summary] LLM 内容风控拒绝，切换到 DeepMoney: {error_str}")
 
                         yield self._format_sse("status", {
                             "stage": "summarizing",
@@ -2737,7 +2738,7 @@ class MCPAgentIntegrated:
                                 model=self.deepmoney_model,
                                 messages=messages,
                                 temperature=0.7,
-                                max_tokens=65536,
+                                max_tokens=DEEPMONEY_CONFIG.get("max_tokens", 8192),
                             )
 
                             final_summary = fallback_response.choices[0].message.content
@@ -2759,7 +2760,7 @@ class MCPAgentIntegrated:
                             logger.warning("[Summary] 使用降级方案（简单拼接）")
                     else:
                         # 不是内容风控错误，直接抛出
-                        logger.error(f"[Summary] Kimi 调用失败（非风控原因）: {kimi_error}")
+                        logger.error(f"[Summary] LLM 调用失败（非风控原因）: {llm_error}")
                         raise
 
                 # 发送完整的总结和元数据
@@ -3681,8 +3682,8 @@ async def stream_role_response(
 
         # 第一次调用：可能触发工具调用
         tool_calls_made = []
-        # 从模型配置获取 max_tokens，默认 65536（deepmoney）
-        max_tokens = model_config.get("max_tokens", 65536)
+        # 从模型配置获取 max_tokens，默认 8192
+        max_tokens = model_config.get("max_tokens", 8192)
         if openai_tools:
             response = client.chat.completions.create(
                 model=model_config["model"],