更新app.py

This commit is contained in:
2026-01-31 15:32:00 +08:00
parent 1f2af549f5
commit c56f5f2f7f
4 changed files with 335 additions and 44 deletions

View File

@@ -2868,7 +2868,8 @@ A股交易时间: 上午 9:30-11:30下午 13:00-15:00
if not successful_results and step_index == 0:
# 如果没有执行任何工具(模型直接回复),使用模型的回复
if assistant_message and assistant_message.content:
final_summary = assistant_message.content
# 清理模型输出中的特殊标记(<think>、minimax:tool_call 等)
final_summary = clean_deepseek_tool_markers(assistant_message.content)
# 流式发送(虽然已经是完整的,但保持前端兼容)
yield self._format_sse("summary_chunk", {"content": final_summary})
else:
@@ -2890,16 +2891,58 @@ A股交易时间: 上午 9:30-11:30下午 13:00-15:00
stream=True, # 启用流式输出
)
# 逐块发送总结内容
# 逐块发送总结内容(带缓冲过滤特殊标签)
buffer = "" # 缓冲区,用于检测和过滤 <think> 等标签
in_think_tag = False # 是否在 <think> 标签内
in_minimax_tag = False # 是否在 minimax:tool_call 标签内
for chunk in summary_stream:
if chunk.choices and chunk.choices[0].delta.content:
content_chunk = chunk.choices[0].delta.content
final_summary += content_chunk
buffer += content_chunk
# 发送总结片段
yield self._format_sse("summary_chunk", {
"content": content_chunk
})
# 检测 <think> 开始
if '<think>' in buffer and not in_think_tag:
in_think_tag = True
# 发送 <think> 之前的内容
before_think = buffer.split('<think>')[0]
if before_think:
yield self._format_sse("summary_chunk", {"content": before_think})
buffer = '<think>' + buffer.split('<think>', 1)[1]
# 检测 </think> 结束
if '</think>' in buffer and in_think_tag:
in_think_tag = False
# 丢弃 <think>...</think> 内容,保留之后的内容
buffer = buffer.split('</think>', 1)[1].lstrip()
# 检测 minimax:tool_call 开始
if 'minimax:tool_call' in buffer and not in_minimax_tag:
in_minimax_tag = True
before_minimax = buffer.split('minimax:tool_call')[0]
if before_minimax:
yield self._format_sse("summary_chunk", {"content": before_minimax})
buffer = 'minimax:tool_call' + buffer.split('minimax:tool_call', 1)[1]
# 检测 </minimax:tool_call> 结束
if '</minimax:tool_call>' in buffer and in_minimax_tag:
in_minimax_tag = False
buffer = buffer.split('</minimax:tool_call>', 1)[1].lstrip()
# 如果不在特殊标签内,且缓冲区有足够内容,发送出去
if not in_think_tag and not in_minimax_tag:
# 保留一些缓冲以防标签跨 chunk
if len(buffer) > 50:
to_send = buffer[:-50]
buffer = buffer[-50:]
yield self._format_sse("summary_chunk", {"content": to_send})
# 发送剩余缓冲区内容(最终清理)
if buffer and not in_think_tag and not in_minimax_tag:
buffer = clean_deepseek_tool_markers(buffer)
if buffer:
yield self._format_sse("summary_chunk", {"content": buffer})
logger.info("[Summary] 流式总结完成")
@@ -2914,7 +2957,8 @@ A股交易时间: 上午 9:30-11:30下午 13:00-15:00
yield self._format_sse("summary_chunk", {"content": final_summary})
logger.warning("[Summary] 使用降级方案")
# 发送完整的总结和元数据
# 发送完整的总结和元数据(最终清理确保无残留标记)
final_summary = clean_deepseek_tool_markers(final_summary)
yield self._format_sse("summary", {
"content": final_summary,
"metadata": {
@@ -3090,6 +3134,30 @@ A股交易时间: 上午 9:30-11:30下午 13:00-15:00
"arguments": arguments
})
# 格式5: MiniMax 格式
# minimax:tool_call <invoke name="get_market_overview"> <parameter name="date">20260129</parameter> </invoke> </minimax:tool_call>
minimax_pattern = r'minimax:tool_call\s*<invoke\s+name="(\w+)">(.*?)</invoke>\s*</minimax:tool_call>'
minimax_matches = re.findall(minimax_pattern, content, re.DOTALL)
for func_name, params_str in minimax_matches:
arguments = {}
# 解析参数: <parameter name="xxx">value</parameter>
param_pattern = r'<parameter\s+name="(\w+)">(.*?)</parameter>'
param_matches = re.findall(param_pattern, params_str, re.DOTALL)
for param_name, param_value in param_matches:
param_value = param_value.strip()
# 尝试解析 JSON 值,否则作为字符串
try:
arguments[param_name] = json.loads(param_value)
except:
arguments[param_name] = param_value
tool_calls.append({
"name": func_name,
"arguments": arguments
})
logger.info(f"[Text Tool Call] 解析到 {len(tool_calls)} 个工具调用: {tool_calls}")
return tool_calls
@@ -3504,18 +3572,33 @@ MEETING_MODEL_CONFIGS = {
def clean_deepseek_tool_markers(content: str) -> str:
"""
清理 DeepSeek 模型输出中的工具调用标记
DeepSeek 有时会以文本形式输出工具调用,格式
<tool▁calls▁begin><tool▁call▁begin>tool_name<tool▁sep>{"args": "value"}<tool▁call▁end><tool▁calls▁end>
清理模型输出中的工具调用标记和思考标签
支持多种模型格式:
1. DeepSeek: <tool▁calls▁begin>...<tool▁calls▁end>
2. MiniMax: minimax:tool_call <invoke>...</invoke> </minimax:tool_call>
3. 思考标签: <think>...</think>
"""
import re
if not content:
return content
# 清理 <think>...</think> 思考标签(多种模型都可能输出)
cleaned = re.sub(r'<think>.*?</think>\s*', '', content, flags=re.DOTALL)
# 清理 MiniMax 工具调用标记
# 格式: minimax:tool_call <invoke name="xxx">...</invoke> </minimax:tool_call>
cleaned = re.sub(r'minimax:tool_call\s*<invoke[^>]*>.*?</invoke>\s*</minimax:tool_call>\s*', '', cleaned, flags=re.DOTALL)
# 清理 DeepSeek 工具调用标记
# 匹配 <tool▁calls▁begin> ... <tool▁calls▁end> 整个块
pattern = r'<tool▁calls▁begin>.*?<tool▁calls▁end>'
cleaned = re.sub(pattern, '', content, flags=re.DOTALL)
cleaned = re.sub(pattern, '', cleaned, flags=re.DOTALL)
# 清理 DSML 格式工具调用
cleaned = re.sub(r'<[\|]DSML[\|]function_calls>.*?</[\|]DSML[\|]function_calls>\s*', '', cleaned, flags=re.DOTALL)
# 清理通用 <tool_call>...</tool_call> 格式
cleaned = re.sub(r'<tool_call>.*?</tool_call>\s*', '', cleaned, flags=re.DOTALL)
# 也清理可能残留的单个标记
markers = [