diff --git a/.claude/settings.local.json b/.claude/settings.local.json index eeb68960..1f2edbf5 100644 --- a/.claude/settings.local.json +++ b/.claude/settings.local.json @@ -10,7 +10,8 @@ "Bash(npm cache clean --force)", "Bash(npm install)", "Bash(npm run start:mock)", - "Bash(npm install fsevents@latest --save-optional --force)" + "Bash(npm install fsevents@latest --save-optional --force)", + "Bash(python -m py_compile:*)" ], "deny": [], "ask": [] diff --git a/__pycache__/app.cpython-310.pyc b/__pycache__/app.cpython-310.pyc new file mode 100644 index 00000000..4324b017 Binary files /dev/null and b/__pycache__/app.cpython-310.pyc differ diff --git a/app.py b/app.py index 1c4a5821..a7165ec7 100755 --- a/app.py +++ b/app.py @@ -7911,6 +7911,98 @@ def format_date(date_obj): return str(date_obj) +def remove_cycles_from_sankey_flows(flows_data): + """ + 移除Sankey图数据中的循环边,确保数据是DAG(有向无环图) + 使用拓扑排序算法检测循环,优先保留flow_ratio高的边 + + Args: + flows_data: list of flow objects with 'source', 'target', 'flow_metrics' keys + + Returns: + list of flows without cycles + """ + if not flows_data: + return flows_data + + # 按flow_ratio降序排序,优先保留重要的边 + sorted_flows = sorted( + flows_data, + key=lambda x: x.get('flow_metrics', {}).get('flow_ratio', 0) or 0, + reverse=True + ) + + # 构建图的邻接表和入度表 + def build_graph(flows): + graph = {} # node -> list of successors + in_degree = {} # node -> in-degree count + all_nodes = set() + + for flow in flows: + source = flow['source']['node_name'] + target = flow['target']['node_name'] + all_nodes.add(source) + all_nodes.add(target) + + if source not in graph: + graph[source] = [] + graph[source].append(target) + + if target not in in_degree: + in_degree[target] = 0 + in_degree[target] += 1 + + if source not in in_degree: + in_degree[source] = 0 + + return graph, in_degree, all_nodes + + # 使用Kahn算法检测是否有环 + def has_cycle(graph, in_degree, all_nodes): + # 找到所有入度为0的节点 + queue = [node for node in all_nodes if in_degree.get(node, 0) == 0] + visited_count = 0 + + while queue: + node = queue.pop(0) + visited_count += 1 + + # 访问所有邻居 + for neighbor in graph.get(node, []): + in_degree[neighbor] -= 1 + if in_degree[neighbor] == 0: + queue.append(neighbor) + + # 如果访问的节点数等于总节点数,说明没有环 + return visited_count < len(all_nodes) + + # 逐个添加边,如果添加后产生环则跳过 + result_flows = [] + + for flow in sorted_flows: + # 尝试添加这条边 + temp_flows = result_flows + [flow] + + # 检查是否产生环 + graph, in_degree, all_nodes = build_graph(temp_flows) + + # 复制in_degree用于检测(因为检测过程会修改它) + in_degree_copy = in_degree.copy() + + if not has_cycle(graph, in_degree_copy, all_nodes): + # 没有产生环,可以添加 + result_flows.append(flow) + else: + # 产生环,跳过这条边 + print(f"Skipping edge that creates cycle: {flow['source']['node_name']} -> {flow['target']['node_name']}") + + removed_count = len(flows_data) - len(result_flows) + if removed_count > 0: + print(f"Removed {removed_count} edges to eliminate cycles in Sankey diagram") + + return result_flows + + def get_report_type(date_str): """获取报告期类型""" if not date_str: @@ -10621,6 +10713,9 @@ def get_value_chain_analysis(company_code): } }) + # 移除循环边,确保Sankey图数据是DAG(有向无环图) + flows_data = remove_cycles_from_sankey_flows(flows_data) + # 统计各层级节点数量 level_stats = {} for level_key, nodes in nodes_by_level.items():