diff --git a/mcp_server.py b/mcp_server.py index 2e47acec..9c3eef89 100644 --- a/mcp_server.py +++ b/mcp_server.py @@ -1999,7 +1999,7 @@ class MCPAgentIntegrated: model=self.kimi_model, messages=messages, temperature=1.0, # Kimi 推荐 - max_tokens=8192, # 足够容纳 reasoning_content + max_tokens=128000, # 足够容纳 reasoning_content ) choice = response.choices[0] @@ -2074,7 +2074,7 @@ class MCPAgentIntegrated: model=self.deepmoney_model, messages=messages, temperature=0.7, - max_tokens=8192, + max_tokens=128000, ) summary = response.choices[0].message.content @@ -2268,7 +2268,7 @@ class MCPAgentIntegrated: model="kimi-k2-turbo-preview", # 使用非思考模型,更快 messages=messages, temperature=0.7, - max_tokens=8192, # 增加 token 限制以支持图表配置 + max_tokens=128000, # 增加 token 限制以支持图表配置 ) summary = response.choices[0].message.content @@ -2355,7 +2355,7 @@ class MCPAgentIntegrated: model=self.deepmoney_model, messages=messages, temperature=0.3, - max_tokens=4096, + max_tokens=32768, ) title = response.choices[0].message.content.strip() @@ -2450,7 +2450,7 @@ class MCPAgentIntegrated: model=planning_model, messages=messages, temperature=1.0, - max_tokens=8192, + max_tokens=32768, stream=True, # 启用流式输出 ) @@ -2494,7 +2494,7 @@ class MCPAgentIntegrated: model=self.deepmoney_model, messages=messages, temperature=0.7, - max_tokens=8192, + max_tokens=32768, ) plan_content = fallback_response.choices[0].message.content @@ -2690,7 +2690,7 @@ class MCPAgentIntegrated: model="kimi-k2-turbo-preview", messages=messages, temperature=0.7, - max_tokens=8192, + max_tokens=32768, stream=True, # 启用流式输出 ) @@ -2724,7 +2724,7 @@ class MCPAgentIntegrated: model=self.deepmoney_model, messages=messages, temperature=0.7, - max_tokens=8192, + max_tokens=32768, ) final_summary = fallback_response.choices[0].message.content @@ -3676,7 +3676,7 @@ async def stream_role_response( tool_choice="auto", stream=False, # 工具调用不使用流式 temperature=0.7, - max_tokens=8192, # 增大 token 限制以避免输出被截断 + max_tokens=32768, # 增大 token 限制以避免输出被截断 ) assistant_message = response.choices[0].message