update pay ui

2025-12-09 08:31:18 +08:00
parent e4937c2719
commit 25492caf15
26 changed files with 15577 additions and 1061 deletions
--- a/ml/detector.py
+++ b/ml/detector.py
@@ -0,0 +1,571 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+概念异动检测器 - 融合版
+
+结合两种方法的优势：
+1. 规则评分系统：可解释、稳定、覆盖已知模式
+2. LSTM Autoencoder：发现未知的异常模式
+
+融合策略：
+┌─────────────────────────────────────────────────────────┐
+│                     输入特征                             │
+│  (alpha, alpha_delta, amt_ratio, amt_delta, rank_pct,   │
+│   limit_up_ratio)                                        │
+├─────────────────────────────────────────────────────────┤
+│                                                          │
+│    ┌──────────────┐         ┌──────────────┐            │
+│    │  规则评分系统  │         │ LSTM Autoencoder │        │
+│    │  (0-100分)   │         │ (重构误差)      │          │
+│    └──────┬───────┘         └──────┬───────┘            │
+│           │                         │                    │
+│           ▼                         ▼                    │
+│    rule_score (0-100)      ml_score (标准化后 0-100)     │
+│                                                          │
+├─────────────────────────────────────────────────────────┤
+│                    融合策略                              │
+│                                                          │
+│  final_score = w1 * rule_score + w2 * ml_score          │
+│                                                          │
+│  异动判定：                                              │
+│  - rule_score >= 60        → 直接触发（规则强信号）       │
+│  - ml_score >= 80          → 直接触发（ML强信号）         │
+│  - final_score >= 50       → 融合触发                    │
+│                                                          │
+└─────────────────────────────────────────────────────────┘
+
+优势：
+- 规则系统保证已知模式的检出率
+- ML模型捕捉规则未覆盖的异常
+- 两者互相验证，减少误报
+"""
+
+import json
+from pathlib import Path
+from typing import Dict, List, Optional, Tuple
+from dataclasses import dataclass
+
+import numpy as np
+import torch
+
+# 尝试导入模型（可能不存在）
+try:
+    from model import LSTMAutoencoder, create_model
+    HAS_MODEL = True
+except ImportError:
+    HAS_MODEL = False
+
+
+@dataclass
+class AnomalyResult:
+    """异动检测结果"""
+    is_anomaly: bool
+    final_score: float          # 最终得分 (0-100)
+    rule_score: float           # 规则得分 (0-100)
+    ml_score: float             # ML得分 (0-100)
+    trigger_reason: str         # 触发原因
+    rule_details: Dict          # 规则明细
+    anomaly_type: str           # 异动类型: surge_up / surge_down / volume_spike / unknown
+
+
+class RuleBasedScorer:
+    """
+    基于规则的评分系统
+
+    设计原则：
+    - 每个规则独立打分
+    - 分数可叠加
+    - 阈值可配置
+    """
+
+    # 默认规则配置
+    DEFAULT_RULES = {
+        # Alpha 相关（超额收益）
+        'alpha_strong': {
+            'condition': lambda r: abs(r.get('alpha', 0)) >= 3.0,
+            'score': 35,
+            'description': 'Alpha强信号(|α|≥3%)'
+        },
+        'alpha_medium': {
+            'condition': lambda r: 2.0 <= abs(r.get('alpha', 0)) < 3.0,
+            'score': 25,
+            'description': 'Alpha中等(2%≤|α|<3%)'
+        },
+        'alpha_weak': {
+            'condition': lambda r: 1.5 <= abs(r.get('alpha', 0)) < 2.0,
+            'score': 15,
+            'description': 'Alpha轻微(1.5%≤|α|<2%)'
+        },
+
+        # Alpha 变化率（加速度）
+        'alpha_delta_strong': {
+            'condition': lambda r: abs(r.get('alpha_delta', 0)) >= 1.0,
+            'score': 30,
+            'description': 'Alpha加速强(|Δα|≥1%)'
+        },
+        'alpha_delta_medium': {
+            'condition': lambda r: 0.5 <= abs(r.get('alpha_delta', 0)) < 1.0,
+            'score': 20,
+            'description': 'Alpha加速中(0.5%≤|Δα|<1%)'
+        },
+
+        # 成交额比率（放量）
+        'volume_spike_strong': {
+            'condition': lambda r: r.get('amt_ratio', 1) >= 5.0,
+            'score': 30,
+            'description': '极度放量(≥5倍)'
+        },
+        'volume_spike_medium': {
+            'condition': lambda r: 3.0 <= r.get('amt_ratio', 1) < 5.0,
+            'score': 20,
+            'description': '显著放量(3-5倍)'
+        },
+        'volume_spike_weak': {
+            'condition': lambda r: 2.0 <= r.get('amt_ratio', 1) < 3.0,
+            'score': 10,
+            'description': '轻微放量(2-3倍)'
+        },
+
+        # 成交额变化率
+        'amt_delta_strong': {
+            'condition': lambda r: abs(r.get('amt_delta', 0)) >= 1.0,
+            'score': 15,
+            'description': '成交额急变(|Δamt|≥100%)'
+        },
+
+        # 排名跳变
+        'rank_top': {
+            'condition': lambda r: r.get('rank_pct', 0.5) >= 0.95,
+            'score': 25,
+            'description': '排名前5%'
+        },
+        'rank_bottom': {
+            'condition': lambda r: r.get('rank_pct', 0.5) <= 0.05,
+            'score': 25,
+            'description': '排名后5%'
+        },
+        'rank_high': {
+            'condition': lambda r: 0.9 <= r.get('rank_pct', 0.5) < 0.95,
+            'score': 15,
+            'description': '排名前10%'
+        },
+
+        # 涨停比例
+        'limit_up_high': {
+            'condition': lambda r: r.get('limit_up_ratio', 0) >= 0.2,
+            'score': 25,
+            'description': '涨停比例≥20%'
+        },
+        'limit_up_medium': {
+            'condition': lambda r: 0.1 <= r.get('limit_up_ratio', 0) < 0.2,
+            'score': 15,
+            'description': '涨停比例10-20%'
+        },
+
+        # 组合条件（更可靠的信号）
+        'alpha_with_volume': {
+            'condition': lambda r: abs(r.get('alpha', 0)) >= 1.5 and r.get('amt_ratio', 1) >= 2.0,
+            'score': 20,  # 额外加分
+            'description': 'Alpha+放量组合'
+        },
+        'acceleration_with_rank': {
+            'condition': lambda r: abs(r.get('alpha_delta', 0)) >= 0.5 and (r.get('rank_pct', 0.5) >= 0.9 or r.get('rank_pct', 0.5) <= 0.1),
+            'score': 15,  # 额外加分
+            'description': '加速+排名异常组合'
+        },
+    }
+
+    def __init__(self, rules: Dict = None):
+        """
+        初始化规则评分器
+
+        Args:
+            rules: 自定义规则，格式同 DEFAULT_RULES
+        """
+        self.rules = rules or self.DEFAULT_RULES
+
+    def score(self, features: Dict) -> Tuple[float, Dict]:
+        """
+        计算规则得分
+
+        Args:
+            features: 特征字典，包含 alpha, alpha_delta, amt_ratio 等
+        Returns:
+            score: 总分 (0-100)
+            details: 触发的规则明细
+        """
+        total_score = 0
+        triggered_rules = {}
+
+        for rule_name, rule_config in self.rules.items():
+            try:
+                if rule_config['condition'](features):
+                    total_score += rule_config['score']
+                    triggered_rules[rule_name] = {
+                        'score': rule_config['score'],
+                        'description': rule_config['description']
+                    }
+            except Exception:
+                # 忽略规则计算错误
+                pass
+
+        # 限制在 0-100
+        total_score = min(100, max(0, total_score))
+
+        return total_score, triggered_rules
+
+    def get_anomaly_type(self, features: Dict) -> str:
+        """判断异动类型"""
+        alpha = features.get('alpha', 0)
+        amt_ratio = features.get('amt_ratio', 1)
+
+        if alpha >= 1.5:
+            return 'surge_up'
+        elif alpha <= -1.5:
+            return 'surge_down'
+        elif amt_ratio >= 3.0:
+            return 'volume_spike'
+        else:
+            return 'unknown'
+
+
+class MLScorer:
+    """
+    基于 LSTM Autoencoder 的评分器
+
+    将重构误差转换为 0-100 的分数
+    """
+
+    def __init__(
+        self,
+        checkpoint_dir: str = 'ml/checkpoints',
+        device: str = 'auto'
+    ):
+        self.checkpoint_dir = Path(checkpoint_dir)
+
+        # 设备
+        if device == 'auto':
+            self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+        else:
+            self.device = torch.device(device)
+
+        self.model = None
+        self.thresholds = None
+        self.config = None
+
+        # 尝试加载模型
+        self._load_model()
+
+    def _load_model(self):
+        """加载模型和阈值"""
+        if not HAS_MODEL:
+            print("警告: 无法导入模型模块")
+            return
+
+        model_path = self.checkpoint_dir / 'best_model.pt'
+        thresholds_path = self.checkpoint_dir / 'thresholds.json'
+        config_path = self.checkpoint_dir / 'config.json'
+
+        if not model_path.exists():
+            print(f"警告: 模型文件不存在 {model_path}")
+            return
+
+        try:
+            # 加载配置
+            if config_path.exists():
+                with open(config_path, 'r') as f:
+                    self.config = json.load(f)
+
+            # 加载模型
+            checkpoint = torch.load(model_path, map_location=self.device)
+
+            model_config = self.config.get('model', {}) if self.config else {}
+            self.model = create_model(model_config)
+            self.model.load_state_dict(checkpoint['model_state_dict'])
+            self.model.to(self.device)
+            self.model.eval()
+
+            # 加载阈值
+            if thresholds_path.exists():
+                with open(thresholds_path, 'r') as f:
+                    self.thresholds = json.load(f)
+
+            print(f"MLScorer 加载成功 (设备: {self.device})")
+
+        except Exception as e:
+            print(f"警告: 模型加载失败 - {e}")
+            self.model = None
+
+    def is_ready(self) -> bool:
+        """检查模型是否就绪"""
+        return self.model is not None
+
+    @torch.no_grad()
+    def score(self, sequence: np.ndarray) -> float:
+        """
+        计算 ML 得分
+
+        Args:
+            sequence: (seq_len, n_features) 或 (batch, seq_len, n_features)
+        Returns:
+            score: 0-100 的分数，越高越异常
+        """
+        if not self.is_ready():
+            return 0.0
+
+        # 确保是 3D
+        if sequence.ndim == 2:
+            sequence = sequence[np.newaxis, ...]
+
+        # 转为 tensor
+        x = torch.FloatTensor(sequence).to(self.device)
+
+        # 计算重构误差
+        output, _ = self.model(x)
+        mse = ((output - x) ** 2).mean(dim=-1)  # (batch, seq_len)
+
+        # 取最后时刻的误差
+        error = mse[:, -1].cpu().numpy()
+
+        # 转换为 0-100 分数
+        # 使用 p95 阈值作为参考
+        if self.thresholds:
+            p95 = self.thresholds.get('p95', 0.1)
+            p99 = self.thresholds.get('p99', 0.2)
+        else:
+            p95, p99 = 0.1, 0.2
+
+        # 线性映射：p95 -> 50分, p99 -> 80分
+        # error=0 -> 0分, error>=p99*1.5 -> 100分
+        score = np.clip(error / p95 * 50, 0, 100)
+
+        return float(score[0]) if len(score) == 1 else score.tolist()
+
+
+class HybridAnomalyDetector:
+    """
+    融合异动检测器
+
+    结合规则系统和 ML 模型
+    """
+
+    # 默认配置
+    DEFAULT_CONFIG = {
+        # 权重配置
+        'rule_weight': 0.6,       # 规则权重
+        'ml_weight': 0.4,         # ML权重
+
+        # 触发阈值
+        'rule_trigger': 60,       # 规则直接触发阈值
+        'ml_trigger': 80,         # ML直接触发阈值
+        'fusion_trigger': 50,     # 融合触发阈值
+
+        # 特征列表
+        'features': [
+            'alpha', 'alpha_delta', 'amt_ratio',
+            'amt_delta', 'rank_pct', 'limit_up_ratio'
+        ],
+
+        # 序列长度（ML模型需要）
+        'seq_len': 30,
+    }
+
+    def __init__(
+        self,
+        config: Dict = None,
+        checkpoint_dir: str = 'ml/checkpoints',
+        device: str = 'auto'
+    ):
+        self.config = {**self.DEFAULT_CONFIG, **(config or {})}
+
+        # 初始化评分器
+        self.rule_scorer = RuleBasedScorer()
+        self.ml_scorer = MLScorer(checkpoint_dir, device)
+
+        print(f"HybridAnomalyDetector 初始化完成")
+        print(f"  规则权重: {self.config['rule_weight']}")
+        print(f"  ML权重: {self.config['ml_weight']}")
+        print(f"  ML模型: {'就绪' if self.ml_scorer.is_ready() else '未加载'}")
+
+    def detect(
+        self,
+        features: Dict,
+        sequence: np.ndarray = None
+    ) -> AnomalyResult:
+        """
+        检测异动
+
+        Args:
+            features: 当前时刻的特征字典
+            sequence: 历史序列 (seq_len, n_features)，ML模型需要
+        Returns:
+            AnomalyResult: 检测结果
+        """
+        # 1. 规则评分
+        rule_score, rule_details = self.rule_scorer.score(features)
+
+        # 2. ML评分
+        ml_score = 0.0
+        if sequence is not None and self.ml_scorer.is_ready():
+            ml_score = self.ml_scorer.score(sequence)
+
+        # 3. 融合得分
+        w1 = self.config['rule_weight']
+        w2 = self.config['ml_weight']
+
+        # 如果ML不可用，全部权重给规则
+        if not self.ml_scorer.is_ready():
+            w1, w2 = 1.0, 0.0
+
+        final_score = w1 * rule_score + w2 * ml_score
+
+        # 4. 判断是否异动
+        is_anomaly = False
+        trigger_reason = ''
+
+        if rule_score >= self.config['rule_trigger']:
+            is_anomaly = True
+            trigger_reason = f'规则强信号({rule_score:.0f}分)'
+        elif ml_score >= self.config['ml_trigger']:
+            is_anomaly = True
+            trigger_reason = f'ML强信号({ml_score:.0f}分)'
+        elif final_score >= self.config['fusion_trigger']:
+            is_anomaly = True
+            trigger_reason = f'融合触发({final_score:.0f}分)'
+
+        # 5. 判断异动类型
+        anomaly_type = self.rule_scorer.get_anomaly_type(features) if is_anomaly else ''
+
+        return AnomalyResult(
+            is_anomaly=is_anomaly,
+            final_score=final_score,
+            rule_score=rule_score,
+            ml_score=ml_score,
+            trigger_reason=trigger_reason,
+            rule_details=rule_details,
+            anomaly_type=anomaly_type
+        )
+
+    def detect_batch(
+        self,
+        features_list: List[Dict],
+        sequences: np.ndarray = None
+    ) -> List[AnomalyResult]:
+        """
+        批量检测
+
+        Args:
+            features_list: 特征字典列表
+            sequences: (batch, seq_len, n_features)
+        Returns:
+            List[AnomalyResult]
+        """
+        results = []
+
+        for i, features in enumerate(features_list):
+            seq = sequences[i] if sequences is not None else None
+            result = self.detect(features, seq)
+            results.append(result)
+
+        return results
+
+
+# ==================== 便捷函数 ====================
+
+def create_detector(
+    checkpoint_dir: str = 'ml/checkpoints',
+    config: Dict = None
+) -> HybridAnomalyDetector:
+    """创建融合检测器"""
+    return HybridAnomalyDetector(config, checkpoint_dir)
+
+
+def quick_detect(features: Dict) -> bool:
+    """
+    快速检测（只用规则，不需要ML模型）
+
+    适用于：
+    - 实时检测
+    - ML模型未训练完成时
+    """
+    scorer = RuleBasedScorer()
+    score, _ = scorer.score(features)
+    return score >= 50
+
+
+# ==================== 测试 ====================
+
+if __name__ == "__main__":
+    print("=" * 60)
+    print("融合异动检测器测试")
+    print("=" * 60)
+
+    # 创建检测器
+    detector = create_detector()
+
+    # 测试用例
+    test_cases = [
+        {
+            'name': '正常情况',
+            'features': {
+                'alpha': 0.5,
+                'alpha_delta': 0.1,
+                'amt_ratio': 1.2,
+                'amt_delta': 0.1,
+                'rank_pct': 0.5,
+                'limit_up_ratio': 0.02
+            }
+        },
+        {
+            'name': 'Alpha异动',
+            'features': {
+                'alpha': 3.5,
+                'alpha_delta': 0.8,
+                'amt_ratio': 2.5,
+                'amt_delta': 0.5,
+                'rank_pct': 0.92,
+                'limit_up_ratio': 0.05
+            }
+        },
+        {
+            'name': '放量异动',
+            'features': {
+                'alpha': 1.2,
+                'alpha_delta': 0.3,
+                'amt_ratio': 6.0,
+                'amt_delta': 1.5,
+                'rank_pct': 0.85,
+                'limit_up_ratio': 0.08
+            }
+        },
+        {
+            'name': '涨停潮',
+            'features': {
+                'alpha': 2.5,
+                'alpha_delta': 0.6,
+                'amt_ratio': 3.5,
+                'amt_delta': 0.8,
+                'rank_pct': 0.98,
+                'limit_up_ratio': 0.25
+            }
+        },
+    ]
+
+    print("\n测试结果:")
+    print("-" * 60)
+
+    for case in test_cases:
+        result = detector.detect(case['features'])
+
+        print(f"\n{case['name']}:")
+        print(f"  异动: {'是' if result.is_anomaly else '否'}")
+        print(f"  最终得分: {result.final_score:.1f}")
+        print(f"  规则得分: {result.rule_score:.1f}")
+        print(f"  ML得分: {result.ml_score:.1f}")
+        if result.is_anomaly:
+            print(f"  触发原因: {result.trigger_reason}")
+            print(f"  异动类型: {result.anomaly_type}")
+            print(f"  触发规则: {list(result.rule_details.keys())}")
+
+    print("\n" + "=" * 60)
+    print("测试完成!")