diff --git a/mcp_server.py b/mcp_server.py index 9a5fb560..81f95e68 100644 --- a/mcp_server.py +++ b/mcp_server.py @@ -89,7 +89,7 @@ MODEL_CONFIGS = { "api_key": "", # 空值 "base_url": "http://111.62.35.50:8000/v1", "model": "deepmoney", - "max_tokens": 65536, # DeepMoney 本地托管,支持 65536 + "max_tokens": 32768, # DeepMoney 本地托管,上下文 65536,输出限制 32768 }, "gemini-3": { "api_key": "", # 需要配置Google API密钥 @@ -99,8 +99,8 @@ MODEL_CONFIGS = { }, } -# 保持向后兼容的配置(默认使用 deepmoney,本地托管,上下文长) -KIMI_CONFIG = MODEL_CONFIGS["deepmoney"] +# 默认 LLM 配置(使用 deepmoney,本地托管,上下文长) +LLM_CONFIG = MODEL_CONFIGS["deepmoney"] DEEPMONEY_CONFIG = MODEL_CONFIGS["deepmoney"] # ==================== MCP协议数据模型 ==================== @@ -1830,17 +1830,18 @@ TOOL_HANDLERS = { # ==================== Agent系统实现 ==================== class MCPAgentIntegrated: - """集成版 MCP Agent - 使用 Kimi 和 DeepMoney""" + """集成版 MCP Agent - 使用 LLM 进行计划制定和总结""" def __init__(self): - # 初始化 Kimi 客户端(计划制定) - self.kimi_client = OpenAI( - api_key=KIMI_CONFIG["api_key"], - base_url=KIMI_CONFIG["base_url"], + # 初始化主 LLM 客户端(计划制定 + 总结) + self.llm_client = OpenAI( + api_key=LLM_CONFIG["api_key"], + base_url=LLM_CONFIG["base_url"], ) - self.kimi_model = KIMI_CONFIG["model"] + self.llm_model = LLM_CONFIG["model"] + self.llm_max_tokens = LLM_CONFIG.get("max_tokens", 8192) - # 初始化 DeepMoney 客户端(新闻总结) + # 保持 DeepMoney 客户端作为备用 self.deepmoney_client = OpenAI( api_key=DEEPMONEY_CONFIG["api_key"], base_url=DEEPMONEY_CONFIG["base_url"], @@ -1982,8 +1983,8 @@ class MCPAgentIntegrated: 只返回JSON,不要其他内容。""" async def create_plan(self, user_query: str, tools: List[dict], chat_history: List[dict] = None) -> ExecutionPlan: - """阶段1: 使用 Kimi 创建执行计划(带思考过程和历史上下文)""" - logger.info(f"[Planning] Kimi开始制定计划: {user_query}") + """阶段1: 使用 LLM 创建执行计划(带思考过程和历史上下文)""" + logger.info(f"[Planning] LLM开始制定计划: {user_query}") messages = [ {"role": "system", "content": self.get_planning_prompt(tools)}, @@ -2005,12 +2006,12 @@ class MCPAgentIntegrated: # 添加当前用户问题 messages.append({"role": "user", "content": user_query}) - # 使用配置的模型(默认 deepmoney,支持 65536 上下文) - response = self.kimi_client.chat.completions.create( - model=self.kimi_model, + # 使用配置的 LLM 模型 + response = self.llm_client.chat.completions.create( + model=self.llm_model, messages=messages, temperature=1.0, - max_tokens=65536, + max_tokens=self.llm_max_tokens, ) choice = response.choices[0] @@ -2020,7 +2021,7 @@ class MCPAgentIntegrated: reasoning_content = "" if hasattr(message, "reasoning_content"): reasoning_content = getattr(message, "reasoning_content") - logger.info(f"[Planning] Kimi思考过程: {reasoning_content[:200]}...") + logger.info(f"[Planning] LLM思考过程: {reasoning_content[:200]}...") # 提取计划内容 plan_json = message.content.strip() @@ -2085,7 +2086,7 @@ class MCPAgentIntegrated: model=self.deepmoney_model, messages=messages, temperature=0.7, - max_tokens=65536, + max_tokens=DEEPMONEY_CONFIG.get("max_tokens", 8192), ) summary = response.choices[0].message.content @@ -2170,8 +2171,8 @@ class MCPAgentIntegrated: plan: ExecutionPlan, step_results: List[StepResult], ) -> str: - """阶段3: 使用 Kimi 生成最终总结""" - logger.info("[Summary] Kimi生成最终总结") + """阶段3: 使用 LLM 生成最终总结""" + logger.info("[Summary] LLM生成最终总结") # 收集成功的结果 successful_results = [r for r in step_results if r.status == "success"] @@ -2275,11 +2276,11 @@ class MCPAgentIntegrated: ] try: - response = self.kimi_client.chat.completions.create( - model=self.kimi_model, # 使用配置的模型(deepmoney) + response = self.llm_client.chat.completions.create( + model=self.llm_model, messages=messages, temperature=0.7, - max_tokens=65536, + max_tokens=self.llm_max_tokens, ) summary = response.choices[0].message.content @@ -2310,13 +2311,13 @@ class MCPAgentIntegrated: logger.info(f"[Agent] 带有 {len(chat_history)} 条历史消息") try: - # 阶段1: Kimi 制定计划(带历史上下文) + # 阶段1: LLM 制定计划(带历史上下文) plan = await self.create_plan(user_query, tools, chat_history) # 阶段2: 执行工具 step_results = await self.execute_plan(plan, tool_handlers) - # 阶段3: Kimi 生成总结 + # 阶段3: LLM 生成总结 final_summary = await self.generate_final_summary( user_query, plan, step_results ) @@ -2333,8 +2334,8 @@ class MCPAgentIntegrated: "failed_steps": len([r for r in step_results if r.status == "failed"]), "total_execution_time": sum(r.execution_time for r in step_results), "model_used": { - "planning": self.kimi_model, - "summarization": self.kimi_model, + "planning": self.llm_model, + "summarization": self.llm_model, "news_summary": self.deepmoney_model, }, }, @@ -2366,7 +2367,7 @@ class MCPAgentIntegrated: model=self.deepmoney_model, messages=messages, temperature=0.3, - max_tokens=65536, + max_tokens=DEEPMONEY_CONFIG.get("max_tokens", 8192), ) title = response.choices[0].message.content.strip() @@ -2411,7 +2412,7 @@ class MCPAgentIntegrated: # 将 cookies 存储为实例属性,供工具调用时使用 self.cookies = cookies or {} - # 如果传入了自定义模型配置,使用自定义配置,否则使用默认的 Kimi + # 如果传入了自定义模型配置,使用自定义配置,否则使用默认 LLM if model_config: planning_client = OpenAI( api_key=model_config["api_key"], @@ -2420,8 +2421,8 @@ class MCPAgentIntegrated: planning_model = model_config["model"] logger.info(f"[Agent Stream] 使用自定义模型: {planning_model}") else: - planning_client = self.kimi_client - planning_model = self.kimi_model + planning_client = self.llm_client + planning_model = self.llm_model logger.info(f"[Agent Stream] 使用默认模型: {planning_model}") try: @@ -2457,8 +2458,8 @@ class MCPAgentIntegrated: try: # 尝试使用选中的模型流式 API - # 从模型配置获取 max_tokens,默认 65536(deepmoney) - model_max_tokens = model_config.get("max_tokens", 65536) if model_config else 65536 + # 从模型配置获取 max_tokens,默认 8192 + model_max_tokens = model_config.get("max_tokens", 8192) if model_config else 8192 stream = planning_client.chat.completions.create( model=planning_model, messages=messages, @@ -2467,7 +2468,7 @@ class MCPAgentIntegrated: stream=True, # 启用流式输出 ) - # 逐块接收 Kimi 的响应 + # 逐块接收 LLM 的响应 for chunk in stream: if chunk.choices[0].delta.content: content_chunk = chunk.choices[0].delta.content @@ -2489,11 +2490,11 @@ class MCPAgentIntegrated: "content": reasoning_chunk }) - except Exception as kimi_error: + except Exception as llm_error: # 检查是否是内容风控错误(400) - error_str = str(kimi_error) + error_str = str(llm_error) if "400" in error_str and ("content_filter" in error_str or "high risk" in error_str): - logger.warning(f"[Planning] Kimi 内容风控拒绝,切换到 DeepMoney: {error_str}") + logger.warning(f"[Planning] LLM 内容风控拒绝,切换到 DeepMoney: {error_str}") use_fallback = True yield self._format_sse("status", { @@ -2507,7 +2508,7 @@ class MCPAgentIntegrated: model=self.deepmoney_model, messages=messages, temperature=0.7, - max_tokens=65536, + max_tokens=DEEPMONEY_CONFIG.get("max_tokens", 8192), ) plan_content = fallback_response.choices[0].message.content @@ -2522,10 +2523,10 @@ class MCPAgentIntegrated: except Exception as fallback_error: logger.error(f"[Planning] DeepMoney 备选方案也失败: {fallback_error}") - raise Exception(f"Kimi 和 DeepMoney 都无法生成计划: {kimi_error}, {fallback_error}") + raise Exception(f"LLM 和 DeepMoney 都无法生成计划: {llm_error}, {fallback_error}") else: # 不是内容风控错误,直接抛出 - logger.error(f"[Planning] Kimi 调用失败(非风控原因): {kimi_error}") + logger.error(f"[Planning] LLM 调用失败(非风控原因): {llm_error}") raise # 解析完整的计划 @@ -2627,7 +2628,7 @@ class MCPAgentIntegrated: "execution_time": execution_time, }) - # 阶段3: Kimi 生成总结(流式) + # 阶段3: LLM 生成总结(流式) yield self._format_sse("status", {"stage": "summarizing", "message": "正在生成最终总结..."}) # 收集成功的结果 @@ -2699,11 +2700,11 @@ class MCPAgentIntegrated: final_summary = "" try: - summary_stream = self.kimi_client.chat.completions.create( - model=self.kimi_model, # 使用配置的模型(deepmoney) + summary_stream = self.llm_client.chat.completions.create( + model=self.llm_model, messages=messages, temperature=0.7, - max_tokens=65536, + max_tokens=self.llm_max_tokens, stream=True, # 启用流式输出 ) @@ -2720,11 +2721,11 @@ class MCPAgentIntegrated: logger.info("[Summary] 流式总结完成") - except Exception as kimi_error: + except Exception as llm_error: # 检查是否是内容风控错误(400) - error_str = str(kimi_error) + error_str = str(llm_error) if "400" in error_str and ("content_filter" in error_str or "high risk" in error_str): - logger.warning(f"[Summary] Kimi 内容风控拒绝,切换到 DeepMoney: {error_str}") + logger.warning(f"[Summary] LLM 内容风控拒绝,切换到 DeepMoney: {error_str}") yield self._format_sse("status", { "stage": "summarizing", @@ -2737,7 +2738,7 @@ class MCPAgentIntegrated: model=self.deepmoney_model, messages=messages, temperature=0.7, - max_tokens=65536, + max_tokens=DEEPMONEY_CONFIG.get("max_tokens", 8192), ) final_summary = fallback_response.choices[0].message.content @@ -2759,7 +2760,7 @@ class MCPAgentIntegrated: logger.warning("[Summary] 使用降级方案(简单拼接)") else: # 不是内容风控错误,直接抛出 - logger.error(f"[Summary] Kimi 调用失败(非风控原因): {kimi_error}") + logger.error(f"[Summary] LLM 调用失败(非风控原因): {llm_error}") raise # 发送完整的总结和元数据 @@ -3681,8 +3682,8 @@ async def stream_role_response( # 第一次调用:可能触发工具调用 tool_calls_made = [] - # 从模型配置获取 max_tokens,默认 65536(deepmoney) - max_tokens = model_config.get("max_tokens", 65536) + # 从模型配置获取 max_tokens,默认 8192 + max_tokens = model_config.get("max_tokens", 8192) if openai_tools: response = client.chat.completions.create( model=model_config["model"],