update pay ui

2025-12-10 11:02:09 +08:00
parent d9daaeed19
commit e501ac3819
21 changed files with 5514 additions and 151 deletions
--- a/ml/backtest_v2.py
+++ b/ml/backtest_v2.py
@@ -0,0 +1,294 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+V2 回测脚本 - 验证时间片对齐 + 持续性确认的效果
+
+回测指标：
+1. 准确率：异动后 N 分钟内 alpha 是否继续上涨/下跌
+2. 虚警率：多少异动是噪音
+3. 持续性：平均异动持续时长
+"""
+
+import os
+import sys
+import json
+import argparse
+from datetime import datetime
+from pathlib import Path
+from typing import Dict, List, Tuple
+from collections import defaultdict
+
+import numpy as np
+import pandas as pd
+from tqdm import tqdm
+from sqlalchemy import create_engine, text
+
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+from ml.detector_v2 import AnomalyDetectorV2, CONFIG
+
+
+# ==================== 配置 ====================
+
+MYSQL_ENGINE = create_engine(
+    "mysql+pymysql://root:Zzl5588161!@192.168.1.5:3306/stock",
+    echo=False
+)
+
+
+# ==================== 回测评估 ====================
+
+def evaluate_alerts(
+    alerts: List[Dict],
+    raw_data: pd.DataFrame,
+    lookahead_minutes: int = 10
+) -> Dict:
+    """
+    评估异动质量
+
+    指标：
+    1. 方向正确率：异动后 N 分钟 alpha 方向是否一致
+    2. 持续率：异动后 N 分钟内有多少时刻 alpha 保持同向
+    3. 峰值收益：异动后 N 分钟内的最大 alpha
+    """
+    if not alerts:
+        return {'accuracy': 0, 'sustained_rate': 0, 'avg_peak': 0, 'total_alerts': 0}
+
+    results = []
+
+    for alert in alerts:
+        concept_id = alert['concept_id']
+        alert_time = alert['alert_time']
+        alert_alpha = alert['alpha']
+        is_up = alert_alpha > 0
+
+        # 获取该概念在异动后的数据
+        concept_data = raw_data[
+            (raw_data['concept_id'] == concept_id) &
+            (raw_data['timestamp'] > alert_time)
+        ].head(lookahead_minutes)
+
+        if len(concept_data) < 3:
+            continue
+
+        future_alphas = concept_data['alpha'].values
+
+        # 方向正确：未来 alpha 平均值与当前同向
+        avg_future_alpha = np.mean(future_alphas)
+        direction_correct = (is_up and avg_future_alpha > 0) or (not is_up and avg_future_alpha < 0)
+
+        # 持续率：有多少时刻保持同向
+        if is_up:
+            sustained_count = sum(1 for a in future_alphas if a > 0)
+        else:
+            sustained_count = sum(1 for a in future_alphas if a < 0)
+        sustained_rate = sustained_count / len(future_alphas)
+
+        # 峰值收益
+        if is_up:
+            peak = max(future_alphas)
+        else:
+            peak = min(future_alphas)
+
+        results.append({
+            'direction_correct': direction_correct,
+            'sustained_rate': sustained_rate,
+            'peak': peak,
+            'alert_alpha': alert_alpha,
+        })
+
+    if not results:
+        return {'accuracy': 0, 'sustained_rate': 0, 'avg_peak': 0, 'total_alerts': 0}
+
+    return {
+        'accuracy': np.mean([r['direction_correct'] for r in results]),
+        'sustained_rate': np.mean([r['sustained_rate'] for r in results]),
+        'avg_peak': np.mean([abs(r['peak']) for r in results]),
+        'total_alerts': len(alerts),
+        'evaluated_alerts': len(results),
+    }
+
+
+def save_alerts_to_mysql(alerts: List[Dict], dry_run: bool = False) -> int:
+    """保存异动到 MySQL"""
+    if not alerts or dry_run:
+        return 0
+
+    # 确保表存在
+    with MYSQL_ENGINE.begin() as conn:
+        conn.execute(text("""
+            CREATE TABLE IF NOT EXISTS concept_anomaly_v2 (
+                id INT AUTO_INCREMENT PRIMARY KEY,
+                concept_id VARCHAR(64) NOT NULL,
+                alert_time DATETIME NOT NULL,
+                trade_date DATE NOT NULL,
+                alert_type VARCHAR(32) NOT NULL,
+                final_score FLOAT NOT NULL,
+                rule_score FLOAT NOT NULL,
+                ml_score FLOAT NOT NULL,
+                trigger_reason VARCHAR(128),
+                confirm_ratio FLOAT,
+                alpha FLOAT,
+                alpha_zscore FLOAT,
+                amt_zscore FLOAT,
+                rank_zscore FLOAT,
+                momentum_3m FLOAT,
+                momentum_5m FLOAT,
+                limit_up_ratio FLOAT,
+                triggered_rules JSON,
+                created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
+                UNIQUE KEY uk_concept_time (concept_id, alert_time, trade_date),
+                INDEX idx_trade_date (trade_date),
+                INDEX idx_final_score (final_score)
+            ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COMMENT='概念异动 V2（时间片对齐+持续确认）'
+        """))
+
+    # 插入数据
+    saved = 0
+    with MYSQL_ENGINE.begin() as conn:
+        for alert in alerts:
+            try:
+                conn.execute(text("""
+                    INSERT IGNORE INTO concept_anomaly_v2
+                    (concept_id, alert_time, trade_date, alert_type,
+                     final_score, rule_score, ml_score, trigger_reason, confirm_ratio,
+                     alpha, alpha_zscore, amt_zscore, rank_zscore,
+                     momentum_3m, momentum_5m, limit_up_ratio, triggered_rules)
+                    VALUES
+                    (:concept_id, :alert_time, :trade_date, :alert_type,
+                     :final_score, :rule_score, :ml_score, :trigger_reason, :confirm_ratio,
+                     :alpha, :alpha_zscore, :amt_zscore, :rank_zscore,
+                     :momentum_3m, :momentum_5m, :limit_up_ratio, :triggered_rules)
+                """), {
+                    'concept_id': alert['concept_id'],
+                    'alert_time': alert['alert_time'],
+                    'trade_date': alert['trade_date'],
+                    'alert_type': alert['alert_type'],
+                    'final_score': alert['final_score'],
+                    'rule_score': alert['rule_score'],
+                    'ml_score': alert['ml_score'],
+                    'trigger_reason': alert['trigger_reason'],
+                    'confirm_ratio': alert.get('confirm_ratio', 0),
+                    'alpha': alert['alpha'],
+                    'alpha_zscore': alert.get('alpha_zscore', 0),
+                    'amt_zscore': alert.get('amt_zscore', 0),
+                    'rank_zscore': alert.get('rank_zscore', 0),
+                    'momentum_3m': alert.get('momentum_3m', 0),
+                    'momentum_5m': alert.get('momentum_5m', 0),
+                    'limit_up_ratio': alert.get('limit_up_ratio', 0),
+                    'triggered_rules': json.dumps(alert.get('triggered_rules', [])),
+                })
+                saved += 1
+            except Exception as e:
+                print(f"保存失败: {e}")
+
+    return saved
+
+
+# ==================== 主函数 ====================
+
+def main():
+    parser = argparse.ArgumentParser(description='V2 回测')
+    parser.add_argument('--start', type=str, required=True, help='开始日期')
+    parser.add_argument('--end', type=str, default=None, help='结束日期')
+    parser.add_argument('--model_dir', type=str, default='ml/checkpoints_v2')
+    parser.add_argument('--baseline_dir', type=str, default='ml/data_v2/baselines')
+    parser.add_argument('--save', action='store_true', help='保存到数据库')
+    parser.add_argument('--lookahead', type=int, default=10, help='评估前瞻时间（分钟）')
+
+    args = parser.parse_args()
+
+    end_date = args.end or args.start
+
+    print("=" * 60)
+    print("V2 回测 - 时间片对齐 + 持续性确认")
+    print("=" * 60)
+    print(f"日期范围: {args.start} ~ {end_date}")
+    print(f"模型目录: {args.model_dir}")
+    print(f"评估前瞻: {args.lookahead} 分钟")
+
+    # 初始化检测器
+    detector = AnomalyDetectorV2(
+        model_dir=args.model_dir,
+        baseline_dir=args.baseline_dir
+    )
+
+    # 获取交易日
+    from prepare_data_v2 import get_trading_days
+    trading_days = get_trading_days(args.start, end_date)
+
+    if not trading_days:
+        print("无交易日")
+        return
+
+    print(f"交易日数: {len(trading_days)}")
+
+    # 回测统计
+    total_stats = {
+        'total_alerts': 0,
+        'accuracy_sum': 0,
+        'sustained_sum': 0,
+        'peak_sum': 0,
+        'day_count': 0,
+    }
+
+    all_alerts = []
+
+    for trade_date in tqdm(trading_days, desc="回测进度"):
+        # 检测异动
+        alerts = detector.detect(trade_date)
+
+        if not alerts:
+            continue
+
+        all_alerts.extend(alerts)
+
+        # 评估
+        raw_data = detector._compute_raw_features(trade_date)
+        if raw_data.empty:
+            continue
+
+        stats = evaluate_alerts(alerts, raw_data, args.lookahead)
+
+        if stats['evaluated_alerts'] > 0:
+            total_stats['total_alerts'] += stats['total_alerts']
+            total_stats['accuracy_sum'] += stats['accuracy'] * stats['evaluated_alerts']
+            total_stats['sustained_sum'] += stats['sustained_rate'] * stats['evaluated_alerts']
+            total_stats['peak_sum'] += stats['avg_peak'] * stats['evaluated_alerts']
+            total_stats['day_count'] += 1
+
+            print(f"\n[{trade_date}] 异动: {stats['total_alerts']}, "
+                  f"准确率: {stats['accuracy']:.1%}, "
+                  f"持续率: {stats['sustained_rate']:.1%}, "
+                  f"峰值: {stats['avg_peak']:.2f}%")
+
+    # 汇总
+    print("\n" + "=" * 60)
+    print("回测汇总")
+    print("=" * 60)
+
+    if total_stats['total_alerts'] > 0:
+        avg_accuracy = total_stats['accuracy_sum'] / total_stats['total_alerts']
+        avg_sustained = total_stats['sustained_sum'] / total_stats['total_alerts']
+        avg_peak = total_stats['peak_sum'] / total_stats['total_alerts']
+
+        print(f"总异动数: {total_stats['total_alerts']}")
+        print(f"回测天数: {total_stats['day_count']}")
+        print(f"平均每天: {total_stats['total_alerts'] / max(1, total_stats['day_count']):.1f} 个")
+        print(f"方向准确率: {avg_accuracy:.1%}")
+        print(f"持续率: {avg_sustained:.1%}")
+        print(f"平均峰值: {avg_peak:.2f}%")
+    else:
+        print("无异动检测结果")
+
+    # 保存
+    if args.save and all_alerts:
+        print(f"\n保存 {len(all_alerts)} 条异动到数据库...")
+        saved = save_alerts_to_mysql(all_alerts)
+        print(f"保存完成: {saved} 条")
+
+    print("=" * 60)
+
+
+if __name__ == "__main__":
+    main()
--- a/ml/checkpoints_v2/config.json
+++ b/ml/checkpoints_v2/config.json
@@ -0,0 +1,31 @@
+{
+  "seq_len": 10,
+  "stride": 2,
+  "train_end_date": "2025-06-30",
+  "val_end_date": "2025-09-30",
+  "features": [
+    "alpha_zscore",
+    "amt_zscore",
+    "rank_zscore",
+    "momentum_3m",
+    "momentum_5m",
+    "limit_up_ratio"
+  ],
+  "batch_size": 32768,
+  "epochs": 150,
+  "learning_rate": 0.0006,
+  "weight_decay": 1e-05,
+  "gradient_clip": 1.0,
+  "patience": 15,
+  "min_delta": 1e-06,
+  "model": {
+    "n_features": 6,
+    "hidden_dim": 32,
+    "latent_dim": 4,
+    "num_layers": 1,
+    "dropout": 0.2,
+    "bidirectional": true
+  },
+  "clip_value": 5.0,
+  "threshold_percentiles": [90, 95, 99]
+}
--- a/ml/checkpoints_v2/thresholds.json
+++ b/ml/checkpoints_v2/thresholds.json
@@ -0,0 +1,8 @@
+{
+  "p90": 0.15,
+  "p95": 0.25,
+  "p99": 0.50,
+  "mean": 0.08,
+  "std": 0.12,
+  "median": 0.06
+}
--- a/ml/detector_v2.py
+++ b/ml/detector_v2.py
@@ -0,0 +1,716 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+异动检测器 V2 - 基于时间片对齐 + 持续性确认
+
+核心改进：
+1. Z-Score 特征：相对于同时间片历史的偏离
+2. 短序列 LSTM：10分钟序列，开盘即可用
+3. 持续性确认：5分钟窗口内60%时刻超标才确认为异动
+
+检测流程：
+1. 计算当前时刻的 Z-Score（对比同时间片历史基线）
+2. 构建最近10分钟的 Z-Score 序列
+3. LSTM 计算重构误差（ML分数）
+4. 规则评分（基于 Z-Score 的规则）
+5. 滑动窗口确认：最近5分钟内是否有足够多的时刻超标
+6. 只有通过持续性确认的才输出为异动
+"""
+
+import os
+import sys
+import json
+import pickle
+from datetime import datetime, timedelta
+from typing import Dict, List, Optional, Tuple
+from collections import defaultdict, deque
+
+import numpy as np
+import pandas as pd
+import torch
+from sqlalchemy import create_engine, text
+from elasticsearch import Elasticsearch
+from clickhouse_driver import Client
+
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+from ml.model import TransformerAutoencoder
+
+# ==================== 配置 ====================
+
+MYSQL_ENGINE = create_engine(
+    "mysql+pymysql://root:Zzl5588161!@192.168.1.5:3306/stock",
+    echo=False
+)
+
+ES_CLIENT = Elasticsearch(['http://127.0.0.1:9200'])
+ES_INDEX = 'concept_library_v3'
+
+CLICKHOUSE_CONFIG = {
+    'host': '127.0.0.1',
+    'port': 9000,
+    'user': 'default',
+    'password': 'Zzl33818!',
+    'database': 'stock'
+}
+
+REFERENCE_INDEX = '000001.SH'
+
+# 检测配置
+CONFIG = {
+    # 序列配置
+    'seq_len': 10,                    # LSTM 序列长度（分钟）
+
+    # 持续性确认配置（核心！）
+    'confirm_window': 5,              # 确认窗口（分钟）
+    'confirm_ratio': 0.6,             # 确认比例（60%时刻需要超标）
+
+    # Z-Score 阈值
+    'alpha_zscore_threshold': 2.0,    # Alpha Z-Score 阈值
+    'amt_zscore_threshold': 2.5,      # 成交额 Z-Score 阈值
+
+    # 融合权重
+    'rule_weight': 0.5,
+    'ml_weight': 0.5,
+
+    # 触发阈值
+    'rule_trigger': 60,
+    'ml_trigger': 70,
+    'fusion_trigger': 50,
+
+    # 冷却期
+    'cooldown_minutes': 10,
+    'max_alerts_per_minute': 15,
+
+    # Z-Score 截断
+    'zscore_clip': 5.0,
+}
+
+# V2 特征列表
+FEATURES_V2 = [
+    'alpha_zscore', 'amt_zscore', 'rank_zscore',
+    'momentum_3m', 'momentum_5m', 'limit_up_ratio'
+]
+
+
+# ==================== 工具函数 ====================
+
+def get_ch_client():
+    return Client(**CLICKHOUSE_CONFIG)
+
+
+def code_to_ch_format(code: str) -> str:
+    if not code or len(code) != 6 or not code.isdigit():
+        return None
+    if code.startswith('6'):
+        return f"{code}.SH"
+    elif code.startswith('0') or code.startswith('3'):
+        return f"{code}.SZ"
+    else:
+        return f"{code}.BJ"
+
+
+def time_to_slot(ts) -> str:
+    """时间戳转时间片（HH:MM）"""
+    if isinstance(ts, str):
+        return ts
+    return ts.strftime('%H:%M')
+
+
+# ==================== 基线加载 ====================
+
+def load_baselines(baseline_dir: str = 'ml/data_v2/baselines') -> Dict[str, pd.DataFrame]:
+    """加载时间片基线"""
+    baseline_file = os.path.join(baseline_dir, 'baselines.pkl')
+    if os.path.exists(baseline_file):
+        with open(baseline_file, 'rb') as f:
+            return pickle.load(f)
+    return {}
+
+
+# ==================== 规则评分（基于 Z-Score）====================
+
+def score_rules_zscore(row: Dict) -> Tuple[float, List[str]]:
+    """
+    基于 Z-Score 的规则评分
+
+    设计思路：Z-Score 已经标准化，直接用阈值判断
+    """
+    score = 0.0
+    triggered = []
+
+    alpha_zscore = row.get('alpha_zscore', 0)
+    amt_zscore = row.get('amt_zscore', 0)
+    rank_zscore = row.get('rank_zscore', 0)
+    momentum_3m = row.get('momentum_3m', 0)
+    momentum_5m = row.get('momentum_5m', 0)
+    limit_up_ratio = row.get('limit_up_ratio', 0)
+
+    alpha_zscore_abs = abs(alpha_zscore)
+    amt_zscore_abs = abs(amt_zscore)
+
+    # ========== Alpha Z-Score 规则 ==========
+    if alpha_zscore_abs >= 4.0:
+        score += 25
+        triggered.append('alpha_zscore_extreme')
+    elif alpha_zscore_abs >= 3.0:
+        score += 18
+        triggered.append('alpha_zscore_strong')
+    elif alpha_zscore_abs >= 2.0:
+        score += 10
+        triggered.append('alpha_zscore_moderate')
+
+    # ========== 成交额 Z-Score 规则 ==========
+    if amt_zscore >= 4.0:
+        score += 20
+        triggered.append('amt_zscore_extreme')
+    elif amt_zscore >= 3.0:
+        score += 12
+        triggered.append('amt_zscore_strong')
+    elif amt_zscore >= 2.0:
+        score += 6
+        triggered.append('amt_zscore_moderate')
+
+    # ========== 排名 Z-Score 规则 ==========
+    if abs(rank_zscore) >= 3.0:
+        score += 15
+        triggered.append('rank_zscore_extreme')
+    elif abs(rank_zscore) >= 2.0:
+        score += 8
+        triggered.append('rank_zscore_strong')
+
+    # ========== 动量规则 ==========
+    if momentum_3m >= 1.0:
+        score += 12
+        triggered.append('momentum_3m_strong')
+    elif momentum_3m >= 0.5:
+        score += 6
+        triggered.append('momentum_3m_moderate')
+
+    if momentum_5m >= 1.5:
+        score += 10
+        triggered.append('momentum_5m_strong')
+
+    # ========== 涨停比例规则 ==========
+    if limit_up_ratio >= 0.3:
+        score += 20
+        triggered.append('limit_up_extreme')
+    elif limit_up_ratio >= 0.15:
+        score += 12
+        triggered.append('limit_up_strong')
+    elif limit_up_ratio >= 0.08:
+        score += 5
+        triggered.append('limit_up_moderate')
+
+    # ========== 组合规则 ==========
+    # Alpha Z-Score + 成交额放大
+    if alpha_zscore_abs >= 2.0 and amt_zscore >= 2.0:
+        score += 15
+        triggered.append('combo_alpha_amt')
+
+    # Alpha Z-Score + 涨停
+    if alpha_zscore_abs >= 2.0 and limit_up_ratio >= 0.1:
+        score += 12
+        triggered.append('combo_alpha_limitup')
+
+    return min(score, 100), triggered
+
+
+# ==================== ML 评分器 ====================
+
+class MLScorerV2:
+    """V2 ML 评分器"""
+
+    def __init__(self, model_dir: str = 'ml/checkpoints_v2'):
+        self.model_dir = model_dir
+        self.model = None
+        self.thresholds = None
+        self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+        self._load_model()
+
+    def _load_model(self):
+        """加载模型和阈值"""
+        model_path = os.path.join(self.model_dir, 'best_model.pt')
+        threshold_path = os.path.join(self.model_dir, 'thresholds.json')
+        config_path = os.path.join(self.model_dir, 'config.json')
+
+        if not os.path.exists(model_path):
+            print(f"警告: 模型文件不存在: {model_path}")
+            return
+
+        # 加载配置
+        with open(config_path, 'r') as f:
+            config = json.load(f)
+
+        # 创建模型
+        model_config = config.get('model', {})
+        self.model = TransformerAutoencoder(**model_config)
+
+        # 加载权重
+        checkpoint = torch.load(model_path, map_location=self.device)
+        self.model.load_state_dict(checkpoint['model_state_dict'])
+        self.model.to(self.device)
+        self.model.eval()
+
+        # 加载阈值
+        if os.path.exists(threshold_path):
+            with open(threshold_path, 'r') as f:
+                self.thresholds = json.load(f)
+
+        print(f"V2 模型加载完成: {model_path}")
+
+    @torch.no_grad()
+    def score_batch(self, sequences: np.ndarray) -> np.ndarray:
+        """
+        批量计算 ML 分数
+
+        返回 0-100 的分数，越高越异常
+        """
+        if self.model is None:
+            return np.zeros(len(sequences))
+
+        # 转换为 tensor
+        x = torch.FloatTensor(sequences).to(self.device)
+
+        # 计算重构误差
+        errors = self.model.compute_reconstruction_error(x, reduction='none')
+        # 取最后一个时刻的误差
+        last_errors = errors[:, -1].cpu().numpy()
+
+        # 转换为 0-100 分数
+        if self.thresholds:
+            p50 = self.thresholds.get('median', 0.1)
+            p99 = self.thresholds.get('p99', 1.0)
+
+            # 线性映射：p50 -> 50分，p99 -> 99分
+            scores = 50 + (last_errors - p50) / (p99 - p50) * 49
+            scores = np.clip(scores, 0, 100)
+        else:
+            # 没有阈值时，简单归一化
+            scores = last_errors * 100
+            scores = np.clip(scores, 0, 100)
+
+        return scores
+
+
+# ==================== 实时数据管理器 ====================
+
+class RealtimeDataManagerV2:
+    """
+    V2 实时数据管理器
+
+    维护：
+    1. 每个概念的历史 Z-Score 序列（用于 LSTM 输入）
+    2. 每个概念的异动候选队列（用于持续性确认）
+    """
+
+    def __init__(self, concepts: List[dict], baselines: Dict[str, pd.DataFrame]):
+        self.concepts = {c['concept_id']: c for c in concepts}
+        self.baselines = baselines
+
+        # 概念到股票的映射
+        self.concept_stocks = {c['concept_id']: set(c['stocks']) for c in concepts}
+
+        # 历史 Z-Score 序列（每个概念）
+        # {concept_id: deque([(timestamp, features_dict), ...], maxlen=seq_len)}
+        self.zscore_history = defaultdict(lambda: deque(maxlen=CONFIG['seq_len']))
+
+        # 异动候选队列（用于持续性确认）
+        # {concept_id: deque([(timestamp, score), ...], maxlen=confirm_window)}
+        self.anomaly_candidates = defaultdict(lambda: deque(maxlen=CONFIG['confirm_window']))
+
+        # 冷却期记录
+        self.cooldown = {}
+
+        # 上一次更新的时间戳
+        self.last_timestamp = None
+
+    def compute_zscore_features(
+        self,
+        concept_id: str,
+        timestamp,
+        alpha: float,
+        total_amt: float,
+        rank_pct: float,
+        limit_up_ratio: float
+    ) -> Optional[Dict]:
+        """计算单个概念单个时刻的 Z-Score 特征"""
+        if concept_id not in self.baselines:
+            return None
+
+        baseline = self.baselines[concept_id]
+        time_slot = time_to_slot(timestamp)
+
+        # 查找对应时间片的基线
+        bl_row = baseline[baseline['time_slot'] == time_slot]
+        if bl_row.empty:
+            return None
+
+        bl = bl_row.iloc[0]
+
+        # 检查样本量
+        if bl.get('sample_count', 0) < 10:
+            return None
+
+        # 计算 Z-Score
+        alpha_zscore = (alpha - bl['alpha_mean']) / bl['alpha_std']
+        amt_zscore = (total_amt - bl['amt_mean']) / bl['amt_std']
+        rank_zscore = (rank_pct - bl['rank_mean']) / bl['rank_std']
+
+        # 截断
+        clip = CONFIG['zscore_clip']
+        alpha_zscore = np.clip(alpha_zscore, -clip, clip)
+        amt_zscore = np.clip(amt_zscore, -clip, clip)
+        rank_zscore = np.clip(rank_zscore, -clip, clip)
+
+        # 计算动量（需要历史）
+        history = self.zscore_history[concept_id]
+        momentum_3m = 0
+        momentum_5m = 0
+
+        if len(history) >= 3:
+            recent_alphas = [h[1]['alpha'] for h in list(history)[-3:]]
+            older_alphas = [h[1]['alpha'] for h in list(history)[-6:-3]] if len(history) >= 6 else [alpha]
+            momentum_3m = np.mean(recent_alphas) - np.mean(older_alphas)
+
+        if len(history) >= 5:
+            recent_alphas = [h[1]['alpha'] for h in list(history)[-5:]]
+            older_alphas = [h[1]['alpha'] for h in list(history)[-10:-5]] if len(history) >= 10 else [alpha]
+            momentum_5m = np.mean(recent_alphas) - np.mean(older_alphas)
+
+        return {
+            'alpha': alpha,
+            'alpha_zscore': alpha_zscore,
+            'amt_zscore': amt_zscore,
+            'rank_zscore': rank_zscore,
+            'momentum_3m': momentum_3m,
+            'momentum_5m': momentum_5m,
+            'limit_up_ratio': limit_up_ratio,
+            'total_amt': total_amt,
+            'rank_pct': rank_pct,
+        }
+
+    def update(self, concept_id: str, timestamp, features: Dict):
+        """更新概念的历史数据"""
+        self.zscore_history[concept_id].append((timestamp, features))
+
+    def get_sequence(self, concept_id: str) -> Optional[np.ndarray]:
+        """获取用于 LSTM 的序列"""
+        history = self.zscore_history[concept_id]
+
+        if len(history) < CONFIG['seq_len']:
+            return None
+
+        # 提取特征
+        feature_list = []
+        for _, features in history:
+            feature_list.append([
+                features['alpha_zscore'],
+                features['amt_zscore'],
+                features['rank_zscore'],
+                features['momentum_3m'],
+                features['momentum_5m'],
+                features['limit_up_ratio'],
+            ])
+
+        return np.array(feature_list)
+
+    def add_anomaly_candidate(self, concept_id: str, timestamp, score: float):
+        """添加异动候选"""
+        self.anomaly_candidates[concept_id].append((timestamp, score))
+
+    def check_sustained_anomaly(self, concept_id: str, threshold: float) -> Tuple[bool, float]:
+        """
+        检查是否为持续性异动
+
+        返回：(是否确认, 确认比例)
+        """
+        candidates = self.anomaly_candidates[concept_id]
+
+        if len(candidates) < CONFIG['confirm_window']:
+            return False, 0.0
+
+        # 统计超过阈值的时刻数量
+        exceed_count = sum(1 for _, score in candidates if score >= threshold)
+        ratio = exceed_count / len(candidates)
+
+        return ratio >= CONFIG['confirm_ratio'], ratio
+
+    def check_cooldown(self, concept_id: str, timestamp) -> bool:
+        """检查是否在冷却期"""
+        if concept_id not in self.cooldown:
+            return False
+
+        last_alert = self.cooldown[concept_id]
+        try:
+            diff = (timestamp - last_alert).total_seconds() / 60
+            return diff < CONFIG['cooldown_minutes']
+        except:
+            return False
+
+    def set_cooldown(self, concept_id: str, timestamp):
+        """设置冷却期"""
+        self.cooldown[concept_id] = timestamp
+
+
+# ==================== 异动检测器 V2 ====================
+
+class AnomalyDetectorV2:
+    """
+    V2 异动检测器
+
+    核心流程：
+    1. 获取实时数据
+    2. 计算 Z-Score 特征
+    3. 规则评分 + ML 评分
+    4. 持续性确认
+    5. 输出异动
+    """
+
+    def __init__(
+        self,
+        model_dir: str = 'ml/checkpoints_v2',
+        baseline_dir: str = 'ml/data_v2/baselines'
+    ):
+        # 加载概念
+        self.concepts = self._load_concepts()
+
+        # 加载基线
+        self.baselines = load_baselines(baseline_dir)
+        print(f"加载了 {len(self.baselines)} 个概念的基线")
+
+        # 初始化 ML 评分器
+        self.ml_scorer = MLScorerV2(model_dir)
+
+        # 初始化数据管理器
+        self.data_manager = RealtimeDataManagerV2(self.concepts, self.baselines)
+
+        # 收集所有股票
+        self.all_stocks = list(set(s for c in self.concepts for s in c['stocks']))
+
+    def _load_concepts(self) -> List[dict]:
+        """从 ES 加载概念"""
+        concepts = []
+        query = {"query": {"match_all": {}}, "size": 100, "_source": ["concept_id", "concept", "stocks"]}
+
+        resp = ES_CLIENT.search(index=ES_INDEX, body=query, scroll='2m')
+        scroll_id = resp['_scroll_id']
+        hits = resp['hits']['hits']
+
+        while len(hits) > 0:
+            for hit in hits:
+                source = hit['_source']
+                stocks = []
+                if 'stocks' in source and isinstance(source['stocks'], list):
+                    for stock in source['stocks']:
+                        if isinstance(stock, dict) and 'code' in stock and stock['code']:
+                            stocks.append(stock['code'])
+                if stocks:
+                    concepts.append({
+                        'concept_id': source.get('concept_id'),
+                        'concept_name': source.get('concept'),
+                        'stocks': stocks
+                    })
+
+            resp = ES_CLIENT.scroll(scroll_id=scroll_id, scroll='2m')
+            scroll_id = resp['_scroll_id']
+            hits = resp['hits']['hits']
+
+        ES_CLIENT.clear_scroll(scroll_id=scroll_id)
+        print(f"加载了 {len(concepts)} 个概念")
+        return concepts
+
+    def detect(self, trade_date: str) -> List[Dict]:
+        """
+        检测指定日期的异动
+
+        返回异动列表
+        """
+        print(f"\n检测 {trade_date} 的异动...")
+
+        # 获取原始数据
+        raw_features = self._compute_raw_features(trade_date)
+        if raw_features.empty:
+            print("无数据")
+            return []
+
+        # 按时间排序
+        timestamps = sorted(raw_features['timestamp'].unique())
+        print(f"时间点数: {len(timestamps)}")
+
+        all_alerts = []
+
+        for ts in timestamps:
+            ts_data = raw_features[raw_features['timestamp'] == ts]
+            ts_alerts = self._process_timestamp(ts, ts_data, trade_date)
+            all_alerts.extend(ts_alerts)
+
+        print(f"共检测到 {len(all_alerts)} 个异动")
+        return all_alerts
+
+    def _compute_raw_features(self, trade_date: str) -> pd.DataFrame:
+        """计算原始特征（同 prepare_data_v2）"""
+        # 这里简化处理，直接调用数据准备逻辑
+        from prepare_data_v2 import compute_raw_concept_features
+        return compute_raw_concept_features(trade_date, self.concepts, self.all_stocks)
+
+    def _process_timestamp(self, timestamp, ts_data: pd.DataFrame, trade_date: str) -> List[Dict]:
+        """处理单个时间戳"""
+        alerts = []
+        candidates = []  # (concept_id, features, rule_score, triggered_rules)
+
+        for _, row in ts_data.iterrows():
+            concept_id = row['concept_id']
+
+            # 计算 Z-Score 特征
+            features = self.data_manager.compute_zscore_features(
+                concept_id, timestamp,
+                row['alpha'], row['total_amt'], row['rank_pct'], row['limit_up_ratio']
+            )
+
+            if features is None:
+                continue
+
+            # 更新历史
+            self.data_manager.update(concept_id, timestamp, features)
+
+            # 规则评分
+            rule_score, triggered_rules = score_rules_zscore(features)
+
+            # 收集候选
+            candidates.append((concept_id, features, rule_score, triggered_rules))
+
+        if not candidates:
+            return []
+
+        # 批量 ML 评分
+        sequences = []
+        valid_candidates = []
+
+        for concept_id, features, rule_score, triggered_rules in candidates:
+            seq = self.data_manager.get_sequence(concept_id)
+            if seq is not None:
+                sequences.append(seq)
+                valid_candidates.append((concept_id, features, rule_score, triggered_rules))
+
+        if not sequences:
+            return []
+
+        sequences = np.array(sequences)
+        ml_scores = self.ml_scorer.score_batch(sequences)
+
+        # 融合评分 + 持续性确认
+        for i, (concept_id, features, rule_score, triggered_rules) in enumerate(valid_candidates):
+            ml_score = ml_scores[i]
+            final_score = CONFIG['rule_weight'] * rule_score + CONFIG['ml_weight'] * ml_score
+
+            # 判断是否触发
+            is_triggered = (
+                rule_score >= CONFIG['rule_trigger'] or
+                ml_score >= CONFIG['ml_trigger'] or
+                final_score >= CONFIG['fusion_trigger']
+            )
+
+            # 添加到候选队列
+            self.data_manager.add_anomaly_candidate(concept_id, timestamp, final_score)
+
+            if not is_triggered:
+                continue
+
+            # 检查冷却期
+            if self.data_manager.check_cooldown(concept_id, timestamp):
+                continue
+
+            # 持续性确认
+            is_sustained, confirm_ratio = self.data_manager.check_sustained_anomaly(
+                concept_id, CONFIG['fusion_trigger']
+            )
+
+            if not is_sustained:
+                continue
+
+            # 确认为异动！
+            self.data_manager.set_cooldown(concept_id, timestamp)
+
+            # 确定异动类型
+            alpha = features['alpha']
+            if alpha >= 1.5:
+                alert_type = 'surge_up'
+            elif alpha <= -1.5:
+                alert_type = 'surge_down'
+            elif features['amt_zscore'] >= 3.0:
+                alert_type = 'volume_spike'
+            else:
+                alert_type = 'surge'
+
+            # 确定触发原因
+            if rule_score >= CONFIG['rule_trigger']:
+                trigger_reason = f'规则({rule_score:.0f})+持续确认({confirm_ratio:.0%})'
+            elif ml_score >= CONFIG['ml_trigger']:
+                trigger_reason = f'ML({ml_score:.0f})+持续确认({confirm_ratio:.0%})'
+            else:
+                trigger_reason = f'融合({final_score:.0f})+持续确认({confirm_ratio:.0%})'
+
+            alerts.append({
+                'concept_id': concept_id,
+                'concept_name': self.data_manager.concepts.get(concept_id, {}).get('concept_name', concept_id),
+                'alert_time': timestamp,
+                'trade_date': trade_date,
+                'alert_type': alert_type,
+                'final_score': final_score,
+                'rule_score': rule_score,
+                'ml_score': ml_score,
+                'trigger_reason': trigger_reason,
+                'confirm_ratio': confirm_ratio,
+                'alpha': alpha,
+                'alpha_zscore': features['alpha_zscore'],
+                'amt_zscore': features['amt_zscore'],
+                'rank_zscore': features['rank_zscore'],
+                'momentum_3m': features['momentum_3m'],
+                'momentum_5m': features['momentum_5m'],
+                'limit_up_ratio': features['limit_up_ratio'],
+                'triggered_rules': triggered_rules,
+            })
+
+        # 每分钟最多 N 个
+        if len(alerts) > CONFIG['max_alerts_per_minute']:
+            alerts = sorted(alerts, key=lambda x: x['final_score'], reverse=True)
+            alerts = alerts[:CONFIG['max_alerts_per_minute']]
+
+        return alerts
+
+
+# ==================== 主函数 ====================
+
+def main():
+    import argparse
+
+    parser = argparse.ArgumentParser(description='V2 异动检测器')
+    parser.add_argument('--date', type=str, default=None, help='检测日期（默认今天）')
+    parser.add_argument('--model_dir', type=str, default='ml/checkpoints_v2')
+    parser.add_argument('--baseline_dir', type=str, default='ml/data_v2/baselines')
+
+    args = parser.parse_args()
+
+    trade_date = args.date or datetime.now().strftime('%Y-%m-%d')
+
+    detector = AnomalyDetectorV2(
+        model_dir=args.model_dir,
+        baseline_dir=args.baseline_dir
+    )
+
+    alerts = detector.detect(trade_date)
+
+    print(f"\n检测结果:")
+    for alert in alerts[:20]:
+        print(f"  [{alert['alert_time'].strftime('%H:%M') if hasattr(alert['alert_time'], 'strftime') else alert['alert_time']}] "
+              f"{alert['concept_name']} ({alert['alert_type']}) "
+              f"分数={alert['final_score']:.0f} "
+              f"确认率={alert['confirm_ratio']:.0%}")
+
+    if len(alerts) > 20:
+        print(f"  ... 共 {len(alerts)} 个异动")
+
+
+if __name__ == "__main__":
+    main()
--- a/ml/model.py
+++ b/ml/model.py
@@ -85,9 +85,12 @@ class LSTMAutoencoder(nn.Module):
            nn.Tanh(),  # 限制范围，增加约束
        )

+        # 使用 LeakyReLU 替代 ReLU
+        # 原因：Z-Score 数据范围是 [-5, +5]，ReLU 会截断负值，丢失跌幅信息
+        # LeakyReLU 保留负值信号（乘以 0.1）
        self.bottleneck_up = nn.Sequential(
            nn.Linear(latent_dim, hidden_dim),
-            nn.ReLU(),
+            nn.LeakyReLU(negative_slope=0.1),
        )

        # Decoder: 单向 LSTM
--- a/ml/prepare_data.py
+++ b/ml/prepare_data.py
@@ -26,7 +26,9 @@ import hashlib
 import json
 import logging
 from typing import Dict, List, Set, Tuple
-from concurrent.futures import ThreadPoolExecutor, as_completed
+from concurrent.futures import ProcessPoolExecutor, as_completed
+from multiprocessing import Manager
+import multiprocessing
 import warnings
 warnings.filterwarnings('ignore')

@@ -128,7 +130,7 @@ def get_all_concepts() -> List[dict]:
        hits = resp['hits']['hits']

    ES_CLIENT.clear_scroll(scroll_id=scroll_id)
-    logger.info(f"获取到 {len(concepts)} 个概念")
+    print(f"获取到 {len(concepts)} 个概念")
    return concepts


@@ -148,7 +150,7 @@ def get_trading_days(start_date: str, end_date: str) -> List[str]:

    result = client.execute(query)
    days = [row[0].strftime('%Y-%m-%d') for row in result]
-    logger.info(f"找到 {len(days)} 个交易日: {days[0]} ~ {days[-1]}")
+    print(f"找到 {len(days)} 个交易日: {days[0]} ~ {days[-1]}")
    return days


@@ -223,21 +225,23 @@ def get_daily_index_data(trade_date: str, index_code: str = REFERENCE_INDEX) ->


 def get_prev_close(stock_codes: List[str], trade_date: str) -> Dict[str, float]:
-    """获取昨收价"""
+    """获取昨收价（上一交易日的收盘价 F007N）"""
    valid_codes = [c for c in stock_codes if c and len(c) == 6 and c.isdigit()]
    if not valid_codes:
        return {}

    codes_str = "','".join(valid_codes)

+    # 注意：F007N 是"最近成交价"即当日收盘价，F002N 是"昨日收盘价"
+    # 我们需要查上一交易日的 F007N（那天的收盘价）作为今天的昨收
    query = f"""
-    SELECT SECCODE, F002N
+    SELECT SECCODE, F007N
    FROM ea_trade
    WHERE SECCODE IN ('{codes_str}')
    AND TRADEDATE = (
        SELECT MAX(TRADEDATE) FROM ea_trade WHERE TRADEDATE < '{trade_date}'
    )
-    AND F002N IS NOT NULL AND F002N > 0
+    AND F007N IS NOT NULL AND F007N > 0
    """

    try:
@@ -245,7 +249,7 @@ def get_prev_close(stock_codes: List[str], trade_date: str) -> Dict[str, float]:
            result = conn.execute(text(query))
            return {row[0]: float(row[1]) for row in result if row[1]}
    except Exception as e:
-        logger.error(f"获取昨收价失败: {e}")
+        print(f"获取昨收价失败: {e}")
        return {}


@@ -264,7 +268,7 @@ def get_index_prev_close(trade_date: str, index_code: str = REFERENCE_INDEX) ->
            if result and result[0]:
                return float(result[0])
    except Exception as e:
-        logger.error(f"获取指数昨收失败: {e}")
+        print(f"获取指数昨收失败: {e}")

    return None

@@ -285,25 +289,19 @@ def compute_daily_features(
    """

    # 1. 获取数据
-    logger.info(f"  获取股票数据...")
    stock_df = get_daily_stock_data(trade_date, all_stocks)
    if stock_df.empty:
-        logger.warning(f"  无股票数据")
        return pd.DataFrame()

-    logger.info(f"  获取指数数据...")
    index_df = get_daily_index_data(trade_date)
    if index_df.empty:
-        logger.warning(f"  无指数数据")
        return pd.DataFrame()

    # 2. 获取昨收价
-    logger.info(f"  获取昨收价...")
    prev_close = get_prev_close(all_stocks, trade_date)
    index_prev_close = get_index_prev_close(trade_date)

    if not prev_close or not index_prev_close:
-        logger.warning(f"  无昨收价数据")
        return pd.DataFrame()

    # 3. 计算股票涨跌幅和成交额
@@ -317,7 +315,6 @@ def compute_daily_features(

    # 5. 获取所有时间点
    timestamps = sorted(stock_df['timestamp'].unique())
-    logger.info(f"  时间点数: {len(timestamps)}")

    # 6. 按时间点计算概念特征
    results = []
@@ -414,87 +411,126 @@ def compute_daily_features(
        if amt_delta_std > 0:
            final_df['amt_delta'] = final_df['amt_delta'] / amt_delta_std

-    logger.info(f"  计算完成: {len(final_df)} 条记录")
    return final_df


 # ==================== 主流程 ====================

-def process_single_day(trade_date: str, concepts: List[dict], all_stocks: List[str]) -> str:
-    """处理单个交易日"""
+def process_single_day(args) -> Tuple[str, bool]:
+    """
+    处理单个交易日（多进程版本）
+
+    Args:
+        args: (trade_date, concepts, all_stocks) 元组
+
+    Returns:
+        (trade_date, success) 元组
+    """
+    trade_date, concepts, all_stocks = args
    output_file = os.path.join(OUTPUT_DIR, f'features_{trade_date}.parquet')

    # 检查是否已处理
    if os.path.exists(output_file):
-        logger.info(f"[{trade_date}] 已存在，跳过")
-        return output_file
+        print(f"[{trade_date}] 已存在，跳过")
+        return (trade_date, True)

-    logger.info(f"[{trade_date}] 开始处理...")
+    print(f"[{trade_date}] 开始处理...")

    try:
        df = compute_daily_features(trade_date, concepts, all_stocks)

        if df.empty:
-            logger.warning(f"[{trade_date}] 无数据")
-            return None
+            print(f"[{trade_date}] 无数据")
+            return (trade_date, False)

        # 保存
        df.to_parquet(output_file, index=False)
-        logger.info(f"[{trade_date}] 保存完成: {output_file}")
-        return output_file
+        print(f"[{trade_date}] 保存完成")
+        return (trade_date, True)

    except Exception as e:
-        logger.error(f"[{trade_date}] 处理失败: {e}")
+        print(f"[{trade_date}] 处理失败: {e}")
        import traceback
        traceback.print_exc()
-        return None
+        return (trade_date, False)


 def main():
    import argparse
+    from tqdm import tqdm

    parser = argparse.ArgumentParser(description='准备训练数据')
    parser.add_argument('--start', type=str, default='2022-01-01', help='开始日期')
    parser.add_argument('--end', type=str, default=None, help='结束日期（默认今天）')
-    parser.add_argument('--workers', type=int, default=1, help='并行数（建议1，避免数据库压力）')
+    parser.add_argument('--workers', type=int, default=18, help='并行进程数（默认18）')
+    parser.add_argument('--force', action='store_true', help='强制重新处理已存在的文件')

    args = parser.parse_args()

    end_date = args.end or datetime.now().strftime('%Y-%m-%d')

-    logger.info("=" * 60)
-    logger.info("数据准备 - Transformer Autoencoder 训练数据")
-    logger.info("=" * 60)
-    logger.info(f"日期范围: {args.start} ~ {end_date}")
+    print("=" * 60)
+    print("数据准备 - Transformer Autoencoder 训练数据")
+    print("=" * 60)
+    print(f"日期范围: {args.start} ~ {end_date}")
+    print(f"并行进程数: {args.workers}")

    # 1. 获取概念列表
    concepts = get_all_concepts()

    # 收集所有股票
    all_stocks = list(set(s for c in concepts for s in c['stocks']))
-    logger.info(f"股票总数: {len(all_stocks)}")
+    print(f"股票总数: {len(all_stocks)}")

    # 2. 获取交易日列表
    trading_days = get_trading_days(args.start, end_date)

    if not trading_days:
-        logger.error("无交易日数据")
+        print("无交易日数据")
        return

-    # 3. 处理每个交易日
-    logger.info(f"\n开始处理 {len(trading_days)} 个交易日...")
+    # 如果强制模式，删除已有文件
+    if args.force:
+        for trade_date in trading_days:
+            output_file = os.path.join(OUTPUT_DIR, f'features_{trade_date}.parquet')
+            if os.path.exists(output_file):
+                os.remove(output_file)
+                print(f"删除已有文件: {output_file}")

+    # 3. 准备任务参数
+    tasks = [(trade_date, concepts, all_stocks) for trade_date in trading_days]
+
+    print(f"\n开始处理 {len(trading_days)} 个交易日（{args.workers} 进程并行）...")
+
+    # 4. 多进程处理
    success_count = 0
-    for i, trade_date in enumerate(trading_days):
-        logger.info(f"\n[{i+1}/{len(trading_days)}] {trade_date}")
-        result = process_single_day(trade_date, concepts, all_stocks)
-        if result:
-            success_count += 1
+    failed_dates = []

-    logger.info("\n" + "=" * 60)
-    logger.info(f"处理完成: {success_count}/{len(trading_days)} 个交易日")
-    logger.info(f"数据保存在: {OUTPUT_DIR}")
-    logger.info("=" * 60)
+    with ProcessPoolExecutor(max_workers=args.workers) as executor:
+        # 提交所有任务
+        futures = {executor.submit(process_single_day, task): task[0] for task in tasks}
+
+        # 使用 tqdm 显示进度
+        with tqdm(total=len(futures), desc="处理进度", unit="天") as pbar:
+            for future in as_completed(futures):
+                trade_date = futures[future]
+                try:
+                    result_date, success = future.result()
+                    if success:
+                        success_count += 1
+                    else:
+                        failed_dates.append(result_date)
+                except Exception as e:
+                    print(f"\n[{trade_date}] 进程异常: {e}")
+                    failed_dates.append(trade_date)
+                pbar.update(1)
+
+    print("\n" + "=" * 60)
+    print(f"处理完成: {success_count}/{len(trading_days)} 个交易日")
+    if failed_dates:
+        print(f"失败日期: {failed_dates[:10]}{'...' if len(failed_dates) > 10 else ''}")
+    print(f"数据保存在: {OUTPUT_DIR}")
+    print("=" * 60)


 if __name__ == "__main__":
--- a/ml/prepare_data_v2.py
+++ b/ml/prepare_data_v2.py
@@ -0,0 +1,715 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+数据准备 V2 - 基于时间片对齐的特征计算（修复版）
+
+核心改进：
+1. 时间片对齐：9:35 和历史的 9:35 比，而不是和前30分钟比
+2. Z-Score 特征：相对于同时间片历史分布的偏离程度
+3. 滚动窗口基线：每个日期使用它之前 N 天的数据作为基线（不是固定的最后 N 天！）
+4. 基于 Z-Score 的动量：消除一天内波动率异构性
+
+修复：
+- 滚动窗口基线：避免未来数据泄露
+- Z-Score 动量：消除早盘/尾盘波动率差异
+- 进程级数据库单例：避免连接池爆炸
+"""
+
+import os
+import sys
+import numpy as np
+import pandas as pd
+from datetime import datetime, timedelta
+from sqlalchemy import create_engine, text
+from elasticsearch import Elasticsearch
+from clickhouse_driver import Client
+from concurrent.futures import ProcessPoolExecutor, as_completed
+from typing import Dict, List, Tuple, Optional
+from tqdm import tqdm
+from collections import defaultdict
+import warnings
+import pickle
+
+warnings.filterwarnings('ignore')
+
+# ==================== 配置 ====================
+
+MYSQL_URL = "mysql+pymysql://root:Zzl5588161!@192.168.1.5:3306/stock"
+ES_HOST = 'http://127.0.0.1:9200'
+ES_INDEX = 'concept_library_v3'
+
+CLICKHOUSE_CONFIG = {
+    'host': '127.0.0.1',
+    'port': 9000,
+    'user': 'default',
+    'password': 'Zzl33818!',
+    'database': 'stock'
+}
+
+REFERENCE_INDEX = '000001.SH'
+
+# 输出目录
+OUTPUT_DIR = os.path.join(os.path.dirname(__file__), 'data_v2')
+BASELINE_DIR = os.path.join(OUTPUT_DIR, 'baselines')
+RAW_CACHE_DIR = os.path.join(OUTPUT_DIR, 'raw_cache')
+os.makedirs(OUTPUT_DIR, exist_ok=True)
+os.makedirs(BASELINE_DIR, exist_ok=True)
+os.makedirs(RAW_CACHE_DIR, exist_ok=True)
+
+# 特征配置
+CONFIG = {
+    'baseline_days': 20,           # 滚动窗口大小
+    'min_baseline_samples': 10,    # 最少需要10个样本才算有效基线
+    'limit_up_threshold': 9.8,
+    'limit_down_threshold': -9.8,
+    'zscore_clip': 5.0,
+}
+
+# 特征列表
+FEATURES_V2 = [
+    'alpha', 'alpha_zscore', 'amt_zscore', 'rank_zscore',
+    'momentum_3m', 'momentum_5m', 'limit_up_ratio',
+]
+
+# ==================== 进程级单例（避免连接池爆炸）====================
+
+# 进程级全局变量
+_process_mysql_engine = None
+_process_es_client = None
+_process_ch_client = None
+
+
+def init_process_connections():
+    """进程初始化时调用，创建连接（单例）"""
+    global _process_mysql_engine, _process_es_client, _process_ch_client
+    _process_mysql_engine = create_engine(MYSQL_URL, echo=False, pool_pre_ping=True, pool_size=5)
+    _process_es_client = Elasticsearch([ES_HOST])
+    _process_ch_client = Client(**CLICKHOUSE_CONFIG)
+
+
+def get_mysql_engine():
+    """获取进程级 MySQL Engine（单例）"""
+    global _process_mysql_engine
+    if _process_mysql_engine is None:
+        _process_mysql_engine = create_engine(MYSQL_URL, echo=False, pool_pre_ping=True, pool_size=5)
+    return _process_mysql_engine
+
+
+def get_es_client():
+    """获取进程级 ES 客户端（单例）"""
+    global _process_es_client
+    if _process_es_client is None:
+        _process_es_client = Elasticsearch([ES_HOST])
+    return _process_es_client
+
+
+def get_ch_client():
+    """获取进程级 ClickHouse 客户端（单例）"""
+    global _process_ch_client
+    if _process_ch_client is None:
+        _process_ch_client = Client(**CLICKHOUSE_CONFIG)
+    return _process_ch_client
+
+
+# ==================== 工具函数 ====================
+
+def code_to_ch_format(code: str) -> str:
+    if not code or len(code) != 6 or not code.isdigit():
+        return None
+    if code.startswith('6'):
+        return f"{code}.SH"
+    elif code.startswith('0') or code.startswith('3'):
+        return f"{code}.SZ"
+    else:
+        return f"{code}.BJ"
+
+
+def time_to_slot(ts) -> str:
+    """将时间戳转换为时间片（HH:MM格式）"""
+    if isinstance(ts, str):
+        return ts
+    return ts.strftime('%H:%M')
+
+
+# ==================== 获取概念列表 ====================
+
+def get_all_concepts() -> List[dict]:
+    """从ES获取所有叶子概念"""
+    es_client = get_es_client()
+    concepts = []
+
+    query = {
+        "query": {"match_all": {}},
+        "size": 100,
+        "_source": ["concept_id", "concept", "stocks"]
+    }
+
+    resp = es_client.search(index=ES_INDEX, body=query, scroll='2m')
+    scroll_id = resp['_scroll_id']
+    hits = resp['hits']['hits']
+
+    while len(hits) > 0:
+        for hit in hits:
+            source = hit['_source']
+            stocks = []
+            if 'stocks' in source and isinstance(source['stocks'], list):
+                for stock in source['stocks']:
+                    if isinstance(stock, dict) and 'code' in stock and stock['code']:
+                        stocks.append(stock['code'])
+
+            if stocks:
+                concepts.append({
+                    'concept_id': source.get('concept_id'),
+                    'concept_name': source.get('concept'),
+                    'stocks': stocks
+                })
+
+        resp = es_client.scroll(scroll_id=scroll_id, scroll='2m')
+        scroll_id = resp['_scroll_id']
+        hits = resp['hits']['hits']
+
+    es_client.clear_scroll(scroll_id=scroll_id)
+    print(f"获取到 {len(concepts)} 个概念")
+    return concepts
+
+
+# ==================== 获取交易日列表 ====================
+
+def get_trading_days(start_date: str, end_date: str) -> List[str]:
+    """获取交易日列表"""
+    client = get_ch_client()
+
+    query = f"""
+    SELECT DISTINCT toDate(timestamp) as trade_date
+    FROM stock_minute
+    WHERE toDate(timestamp) >= '{start_date}'
+    AND toDate(timestamp) <= '{end_date}'
+    ORDER BY trade_date
+    """
+
+    result = client.execute(query)
+    days = [row[0].strftime('%Y-%m-%d') for row in result]
+    if days:
+        print(f"找到 {len(days)} 个交易日: {days[0]} ~ {days[-1]}")
+    return days
+
+
+# ==================== 获取昨收价 ====================
+
+def get_prev_close(stock_codes: List[str], trade_date: str) -> Dict[str, float]:
+    """获取昨收价（上一交易日的收盘价 F007N）"""
+    valid_codes = [c for c in stock_codes if c and len(c) == 6 and c.isdigit()]
+    if not valid_codes:
+        return {}
+
+    codes_str = "','".join(valid_codes)
+    query = f"""
+    SELECT SECCODE, F007N
+    FROM ea_trade
+    WHERE SECCODE IN ('{codes_str}')
+    AND TRADEDATE = (
+        SELECT MAX(TRADEDATE) FROM ea_trade WHERE TRADEDATE < '{trade_date}'
+    )
+    AND F007N IS NOT NULL AND F007N > 0
+    """
+
+    try:
+        engine = get_mysql_engine()
+        with engine.connect() as conn:
+            result = conn.execute(text(query))
+            return {row[0]: float(row[1]) for row in result if row[1]}
+    except Exception as e:
+        print(f"获取昨收价失败: {e}")
+        return {}
+
+
+def get_index_prev_close(trade_date: str, index_code: str = REFERENCE_INDEX) -> float:
+    """获取指数昨收价"""
+    code_no_suffix = index_code.split('.')[0]
+
+    try:
+        engine = get_mysql_engine()
+        with engine.connect() as conn:
+            result = conn.execute(text("""
+                SELECT F006N FROM ea_exchangetrade
+                WHERE INDEXCODE = :code AND TRADEDATE < :today
+                ORDER BY TRADEDATE DESC LIMIT 1
+            """), {'code': code_no_suffix, 'today': trade_date}).fetchone()
+
+            if result and result[0]:
+                return float(result[0])
+    except Exception as e:
+        print(f"获取指数昨收失败: {e}")
+
+    return None
+
+
+# ==================== 获取分钟数据 ====================
+
+def get_daily_stock_data(trade_date: str, stock_codes: List[str]) -> pd.DataFrame:
+    """获取单日所有股票的分钟数据"""
+    client = get_ch_client()
+
+    ch_codes = []
+    code_map = {}
+    for code in stock_codes:
+        ch_code = code_to_ch_format(code)
+        if ch_code:
+            ch_codes.append(ch_code)
+            code_map[ch_code] = code
+
+    if not ch_codes:
+        return pd.DataFrame()
+
+    ch_codes_str = "','".join(ch_codes)
+
+    query = f"""
+    SELECT code, timestamp, close, volume, amt
+    FROM stock_minute
+    WHERE toDate(timestamp) = '{trade_date}'
+    AND code IN ('{ch_codes_str}')
+    ORDER BY code, timestamp
+    """
+
+    result = client.execute(query)
+    if not result:
+        return pd.DataFrame()
+
+    df = pd.DataFrame(result, columns=['ch_code', 'timestamp', 'close', 'volume', 'amt'])
+    df['code'] = df['ch_code'].map(code_map)
+    df = df.dropna(subset=['code'])
+
+    return df[['code', 'timestamp', 'close', 'volume', 'amt']]
+
+
+def get_daily_index_data(trade_date: str, index_code: str = REFERENCE_INDEX) -> pd.DataFrame:
+    """获取单日指数分钟数据"""
+    client = get_ch_client()
+
+    query = f"""
+    SELECT timestamp, close, volume, amt
+    FROM index_minute
+    WHERE toDate(timestamp) = '{trade_date}'
+    AND code = '{index_code}'
+    ORDER BY timestamp
+    """
+
+    result = client.execute(query)
+    if not result:
+        return pd.DataFrame()
+
+    df = pd.DataFrame(result, columns=['timestamp', 'close', 'volume', 'amt'])
+    return df
+
+
+# ==================== 计算原始概念特征（单日）====================
+
+def compute_raw_concept_features(
+    trade_date: str,
+    concepts: List[dict],
+    all_stocks: List[str]
+) -> pd.DataFrame:
+    """计算单日概念的原始特征（alpha, amt, rank_pct, limit_up_ratio）"""
+    # 检查缓存
+    cache_file = os.path.join(RAW_CACHE_DIR, f'raw_{trade_date}.parquet')
+    if os.path.exists(cache_file):
+        return pd.read_parquet(cache_file)
+
+    # 获取数据
+    stock_df = get_daily_stock_data(trade_date, all_stocks)
+    if stock_df.empty:
+        return pd.DataFrame()
+
+    index_df = get_daily_index_data(trade_date)
+    if index_df.empty:
+        return pd.DataFrame()
+
+    # 获取昨收价
+    prev_close = get_prev_close(all_stocks, trade_date)
+    index_prev_close = get_index_prev_close(trade_date)
+
+    if not prev_close or not index_prev_close:
+        return pd.DataFrame()
+
+    # 计算涨跌幅
+    stock_df['prev_close'] = stock_df['code'].map(prev_close)
+    stock_df = stock_df.dropna(subset=['prev_close'])
+    stock_df['change_pct'] = (stock_df['close'] - stock_df['prev_close']) / stock_df['prev_close'] * 100
+
+    index_df['change_pct'] = (index_df['close'] - index_prev_close) / index_prev_close * 100
+    index_change_map = dict(zip(index_df['timestamp'], index_df['change_pct']))
+
+    # 获取所有时间点
+    timestamps = sorted(stock_df['timestamp'].unique())
+
+    # 概念到股票的映射
+    concept_stocks = {c['concept_id']: set(c['stocks']) for c in concepts}
+
+    results = []
+
+    for ts in timestamps:
+        ts_stock_data = stock_df[stock_df['timestamp'] == ts]
+        index_change = index_change_map.get(ts, 0)
+
+        stock_change = dict(zip(ts_stock_data['code'], ts_stock_data['change_pct']))
+        stock_amt = dict(zip(ts_stock_data['code'], ts_stock_data['amt']))
+
+        concept_features = []
+
+        for concept_id, stocks in concept_stocks.items():
+            concept_changes = [stock_change[s] for s in stocks if s in stock_change]
+            concept_amts = [stock_amt.get(s, 0) for s in stocks if s in stock_change]
+
+            if not concept_changes:
+                continue
+
+            avg_change = np.mean(concept_changes)
+            total_amt = sum(concept_amts)
+            alpha = avg_change - index_change
+
+            limit_up_count = sum(1 for c in concept_changes if c >= CONFIG['limit_up_threshold'])
+            limit_up_ratio = limit_up_count / len(concept_changes)
+
+            concept_features.append({
+                'concept_id': concept_id,
+                'alpha': alpha,
+                'total_amt': total_amt,
+                'limit_up_ratio': limit_up_ratio,
+                'stock_count': len(concept_changes),
+            })
+
+        if not concept_features:
+            continue
+
+        concept_df = pd.DataFrame(concept_features)
+        concept_df['rank_pct'] = concept_df['alpha'].rank(pct=True)
+        concept_df['timestamp'] = ts
+        concept_df['time_slot'] = time_to_slot(ts)
+        concept_df['trade_date'] = trade_date
+
+        results.append(concept_df)
+
+    if not results:
+        return pd.DataFrame()
+
+    result_df = pd.concat(results, ignore_index=True)
+
+    # 保存缓存
+    result_df.to_parquet(cache_file, index=False)
+
+    return result_df
+
+
+# ==================== 滚动窗口基线计算 ====================
+
+def compute_rolling_baseline(
+    historical_data: pd.DataFrame,
+    concept_id: str
+) -> Dict[str, Dict]:
+    """
+    计算单个概念的滚动基线
+
+    返回: {time_slot: {alpha_mean, alpha_std, amt_mean, amt_std, rank_mean, rank_std, sample_count}}
+    """
+    if historical_data.empty:
+        return {}
+
+    concept_data = historical_data[historical_data['concept_id'] == concept_id]
+    if concept_data.empty:
+        return {}
+
+    baseline_dict = {}
+
+    for time_slot, group in concept_data.groupby('time_slot'):
+        if len(group) < CONFIG['min_baseline_samples']:
+            continue
+
+        alpha_std = group['alpha'].std()
+        amt_std = group['total_amt'].std()
+        rank_std = group['rank_pct'].std()
+
+        baseline_dict[time_slot] = {
+            'alpha_mean': group['alpha'].mean(),
+            'alpha_std': max(alpha_std if pd.notna(alpha_std) else 1.0, 0.1),
+            'amt_mean': group['total_amt'].mean(),
+            'amt_std': max(amt_std if pd.notna(amt_std) else group['total_amt'].mean() * 0.5, 1.0),
+            'rank_mean': group['rank_pct'].mean(),
+            'rank_std': max(rank_std if pd.notna(rank_std) else 0.2, 0.05),
+            'sample_count': len(group),
+        }
+
+    return baseline_dict
+
+
+# ==================== 计算单日 Z-Score 特征（带滚动基线）====================
+
+def compute_zscore_features_rolling(
+    trade_date: str,
+    concepts: List[dict],
+    all_stocks: List[str],
+    historical_raw_data: pd.DataFrame  # 该日期之前 N 天的原始数据
+) -> pd.DataFrame:
+    """
+    计算单日的 Z-Score 特征（使用滚动窗口基线）
+
+    关键改进：
+    1. 基线只使用 trade_date 之前的数据（无未来泄露）
+    2. 动量基于 Z-Score 计算（消除波动率异构性）
+    """
+    # 计算当日原始特征
+    raw_df = compute_raw_concept_features(trade_date, concepts, all_stocks)
+
+    if raw_df.empty:
+        return pd.DataFrame()
+
+    zscore_records = []
+
+    for concept_id, group in raw_df.groupby('concept_id'):
+        # 计算该概念的滚动基线（只用历史数据）
+        baseline_dict = compute_rolling_baseline(historical_raw_data, concept_id)
+
+        if not baseline_dict:
+            continue
+
+        # 按时间排序
+        group = group.sort_values('timestamp').reset_index(drop=True)
+
+        # Z-Score 历史（用于计算基于 Z-Score 的动量）
+        zscore_history = []
+
+        for idx, row in group.iterrows():
+            time_slot = row['time_slot']
+
+            if time_slot not in baseline_dict:
+                continue
+
+            bl = baseline_dict[time_slot]
+
+            # 计算 Z-Score
+            alpha_zscore = (row['alpha'] - bl['alpha_mean']) / bl['alpha_std']
+            amt_zscore = (row['total_amt'] - bl['amt_mean']) / bl['amt_std']
+            rank_zscore = (row['rank_pct'] - bl['rank_mean']) / bl['rank_std']
+
+            # 截断极端值
+            clip = CONFIG['zscore_clip']
+            alpha_zscore = np.clip(alpha_zscore, -clip, clip)
+            amt_zscore = np.clip(amt_zscore, -clip, clip)
+            rank_zscore = np.clip(rank_zscore, -clip, clip)
+
+            # 记录 Z-Score 历史
+            zscore_history.append(alpha_zscore)
+
+            # 基于 Z-Score 计算动量（消除波动率异构性）
+            momentum_3m = 0.0
+            momentum_5m = 0.0
+
+            if len(zscore_history) >= 3:
+                recent_3 = zscore_history[-3:]
+                older_3 = zscore_history[-6:-3] if len(zscore_history) >= 6 else [zscore_history[0]]
+                momentum_3m = np.mean(recent_3) - np.mean(older_3)
+
+            if len(zscore_history) >= 5:
+                recent_5 = zscore_history[-5:]
+                older_5 = zscore_history[-10:-5] if len(zscore_history) >= 10 else [zscore_history[0]]
+                momentum_5m = np.mean(recent_5) - np.mean(older_5)
+
+            zscore_records.append({
+                'concept_id': concept_id,
+                'timestamp': row['timestamp'],
+                'time_slot': time_slot,
+                'trade_date': trade_date,
+                # 原始特征
+                'alpha': row['alpha'],
+                'total_amt': row['total_amt'],
+                'limit_up_ratio': row['limit_up_ratio'],
+                'stock_count': row['stock_count'],
+                'rank_pct': row['rank_pct'],
+                # Z-Score 特征
+                'alpha_zscore': alpha_zscore,
+                'amt_zscore': amt_zscore,
+                'rank_zscore': rank_zscore,
+                # 基于 Z-Score 的动量
+                'momentum_3m': momentum_3m,
+                'momentum_5m': momentum_5m,
+            })
+
+    if not zscore_records:
+        return pd.DataFrame()
+
+    return pd.DataFrame(zscore_records)
+
+
+# ==================== 多进程处理 ====================
+
+def process_single_day_v2(args) -> Tuple[str, bool]:
+    """处理单个交易日（多进程版本）"""
+    trade_date, day_index, concepts, all_stocks, all_trading_days = args
+    output_file = os.path.join(OUTPUT_DIR, f'features_v2_{trade_date}.parquet')
+
+    if os.path.exists(output_file):
+        return (trade_date, True)
+
+    try:
+        # 计算滚动窗口范围（该日期之前的 N 天）
+        baseline_days = CONFIG['baseline_days']
+
+        # 找出 trade_date 之前的交易日
+        start_idx = max(0, day_index - baseline_days)
+        end_idx = day_index  # 不包含当天
+
+        if end_idx <= start_idx:
+            # 没有足够的历史数据
+            return (trade_date, False)
+
+        historical_days = all_trading_days[start_idx:end_idx]
+
+        # 加载历史原始数据
+        historical_dfs = []
+        for hist_date in historical_days:
+            cache_file = os.path.join(RAW_CACHE_DIR, f'raw_{hist_date}.parquet')
+            if os.path.exists(cache_file):
+                historical_dfs.append(pd.read_parquet(cache_file))
+            else:
+                # 需要计算
+                hist_df = compute_raw_concept_features(hist_date, concepts, all_stocks)
+                if not hist_df.empty:
+                    historical_dfs.append(hist_df)
+
+        if not historical_dfs:
+            return (trade_date, False)
+
+        historical_raw_data = pd.concat(historical_dfs, ignore_index=True)
+
+        # 计算当日 Z-Score 特征（使用滚动基线）
+        df = compute_zscore_features_rolling(trade_date, concepts, all_stocks, historical_raw_data)
+
+        if df.empty:
+            return (trade_date, False)
+
+        df.to_parquet(output_file, index=False)
+        return (trade_date, True)
+
+    except Exception as e:
+        print(f"[{trade_date}] 处理失败: {e}")
+        import traceback
+        traceback.print_exc()
+        return (trade_date, False)
+
+
+# ==================== 主流程 ====================
+
+def main():
+    import argparse
+
+    parser = argparse.ArgumentParser(description='准备训练数据 V2（滚动窗口基线 + Z-Score 动量）')
+    parser.add_argument('--start', type=str, default='2022-01-01', help='开始日期')
+    parser.add_argument('--end', type=str, default=None, help='结束日期（默认今天）')
+    parser.add_argument('--workers', type=int, default=18, help='并行进程数')
+    parser.add_argument('--baseline-days', type=int, default=20, help='滚动基线窗口大小')
+    parser.add_argument('--force', action='store_true', help='强制重新计算（忽略缓存）')
+
+    args = parser.parse_args()
+
+    end_date = args.end or datetime.now().strftime('%Y-%m-%d')
+    CONFIG['baseline_days'] = args.baseline_days
+
+    print("=" * 60)
+    print("数据准备 V2 - 滚动窗口基线 + Z-Score 动量")
+    print("=" * 60)
+    print(f"日期范围: {args.start} ~ {end_date}")
+    print(f"并行进程数: {args.workers}")
+    print(f"滚动基线窗口: {args.baseline_days} 天")
+
+    # 初始化主进程连接
+    init_process_connections()
+
+    # 1. 获取概念列表
+    concepts = get_all_concepts()
+    all_stocks = list(set(s for c in concepts for s in c['stocks']))
+    print(f"股票总数: {len(all_stocks)}")
+
+    # 2. 获取交易日列表
+    trading_days = get_trading_days(args.start, end_date)
+
+    if not trading_days:
+        print("无交易日数据")
+        return
+
+    # 3. 第一阶段：预计算所有原始特征（用于缓存）
+    print(f"\n{'='*60}")
+    print("第一阶段：预计算原始特征（用于滚动基线）")
+    print(f"{'='*60}")
+
+    # 如果强制重新计算，删除缓存
+    if args.force:
+        import shutil
+        if os.path.exists(RAW_CACHE_DIR):
+            shutil.rmtree(RAW_CACHE_DIR)
+        os.makedirs(RAW_CACHE_DIR, exist_ok=True)
+        if os.path.exists(OUTPUT_DIR):
+            for f in os.listdir(OUTPUT_DIR):
+                if f.startswith('features_v2_'):
+                    os.remove(os.path.join(OUTPUT_DIR, f))
+
+    # 单线程预计算原始特征（因为需要顺序缓存）
+    print(f"预计算 {len(trading_days)} 天的原始特征...")
+    for trade_date in tqdm(trading_days, desc="预计算原始特征"):
+        cache_file = os.path.join(RAW_CACHE_DIR, f'raw_{trade_date}.parquet')
+        if not os.path.exists(cache_file):
+            compute_raw_concept_features(trade_date, concepts, all_stocks)
+
+    # 4. 第二阶段：计算 Z-Score 特征（多进程）
+    print(f"\n{'='*60}")
+    print("第二阶段：计算 Z-Score 特征（滚动基线）")
+    print(f"{'='*60}")
+
+    # 从第 baseline_days 天开始（前面的没有足够历史）
+    start_idx = args.baseline_days
+    processable_days = trading_days[start_idx:]
+
+    if not processable_days:
+        print(f"错误：需要至少 {args.baseline_days + 1} 天的数据")
+        return
+
+    print(f"可处理日期: {processable_days[0]} ~ {processable_days[-1]} ({len(processable_days)} 天)")
+    print(f"跳过前 {start_idx} 天（基线预热期）")
+
+    # 构建任务
+    tasks = []
+    for i, trade_date in enumerate(trading_days):
+        if i >= start_idx:
+            tasks.append((trade_date, i, concepts, all_stocks, trading_days))
+
+    print(f"开始处理 {len(tasks)} 个交易日（{args.workers} 进程并行）...")
+
+    success_count = 0
+    failed_dates = []
+
+    # 使用进程池初始化器
+    with ProcessPoolExecutor(max_workers=args.workers, initializer=init_process_connections) as executor:
+        futures = {executor.submit(process_single_day_v2, task): task[0] for task in tasks}
+
+        with tqdm(total=len(futures), desc="处理进度", unit="天") as pbar:
+            for future in as_completed(futures):
+                trade_date = futures[future]
+                try:
+                    result_date, success = future.result()
+                    if success:
+                        success_count += 1
+                    else:
+                        failed_dates.append(result_date)
+                except Exception as e:
+                    print(f"\n[{trade_date}] 进程异常: {e}")
+                    failed_dates.append(trade_date)
+                pbar.update(1)
+
+    print("\n" + "=" * 60)
+    print(f"处理完成: {success_count}/{len(tasks)} 个交易日")
+    if failed_dates:
+        print(f"失败日期: {failed_dates[:10]}{'...' if len(failed_dates) > 10 else ''}")
+    print(f"数据保存在: {OUTPUT_DIR}")
+    print("=" * 60)
+
+
+if __name__ == "__main__":
+    main()
--- a/ml/realtime_detector.py
+++ b/ml/realtime_detector.py
@@ -190,20 +190,22 @@ def get_all_concepts() -> List[dict]:


 def get_prev_close(stock_codes: List[str], trade_date: str) -> Dict[str, float]:
-    """获取昨收价"""
+    """获取昨收价（上一交易日的收盘价 F007N）"""
    valid_codes = [c for c in stock_codes if c and len(c) == 6 and c.isdigit()]
    if not valid_codes:
        return {}

    codes_str = "','".join(valid_codes)
+    # 注意：F007N 是"最近成交价"即当日收盘价，F002N 是"昨日收盘价"
+    # 我们需要查上一交易日的 F007N（那天的收盘价）作为今天的昨收
    query = f"""
-    SELECT SECCODE, F002N
+    SELECT SECCODE, F007N
    FROM ea_trade
    WHERE SECCODE IN ('{codes_str}')
    AND TRADEDATE = (
        SELECT MAX(TRADEDATE) FROM ea_trade WHERE TRADEDATE < '{trade_date}'
    )
-    AND F002N IS NOT NULL AND F002N > 0
+    AND F007N IS NOT NULL AND F007N > 0
    """

    try:
--- a/ml/realtime_detector_v2.py
+++ b/ml/realtime_detector_v2.py
@@ -0,0 +1,729 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+V2 实时异动检测器
+
+使用方法：
+    # 作为模块导入
+    from ml.realtime_detector_v2 import RealtimeDetectorV2
+
+    detector = RealtimeDetectorV2()
+    alerts = detector.detect_realtime()  # 检测当前时刻
+
+    # 或命令行测试
+    python ml/realtime_detector_v2.py --date 2025-12-09
+"""
+
+import os
+import sys
+import json
+import pickle
+from datetime import datetime, timedelta
+from typing import Dict, List, Optional, Tuple
+from collections import defaultdict, deque
+
+import numpy as np
+import pandas as pd
+import torch
+from sqlalchemy import create_engine, text
+from elasticsearch import Elasticsearch
+from clickhouse_driver import Client
+
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+from ml.model import TransformerAutoencoder
+
+# ==================== 配置 ====================
+
+MYSQL_URL = "mysql+pymysql://root:Zzl5588161!@192.168.1.5:3306/stock"
+ES_HOST = 'http://127.0.0.1:9200'
+ES_INDEX = 'concept_library_v3'
+
+CLICKHOUSE_CONFIG = {
+    'host': '127.0.0.1',
+    'port': 9000,
+    'user': 'default',
+    'password': 'Zzl33818!',
+    'database': 'stock'
+}
+
+REFERENCE_INDEX = '000001.SH'
+BASELINE_FILE = 'ml/data_v2/baselines/realtime_baseline.pkl'
+MODEL_DIR = 'ml/checkpoints_v2'
+
+# 检测配置
+CONFIG = {
+    'seq_len': 10,                    # LSTM 序列长度
+    'confirm_window': 5,              # 持续确认窗口
+    'confirm_ratio': 0.6,             # 确认比例
+    'rule_weight': 0.5,
+    'ml_weight': 0.5,
+    'rule_trigger': 60,
+    'ml_trigger': 70,
+    'fusion_trigger': 50,
+    'cooldown_minutes': 10,
+    'max_alerts_per_minute': 15,
+    'zscore_clip': 5.0,
+    'limit_up_threshold': 9.8,
+}
+
+FEATURES = ['alpha_zscore', 'amt_zscore', 'rank_zscore', 'momentum_3m', 'momentum_5m', 'limit_up_ratio']
+
+
+# ==================== 数据库连接 ====================
+
+_mysql_engine = None
+_es_client = None
+_ch_client = None
+
+
+def get_mysql_engine():
+    global _mysql_engine
+    if _mysql_engine is None:
+        _mysql_engine = create_engine(MYSQL_URL, echo=False, pool_pre_ping=True)
+    return _mysql_engine
+
+
+def get_es_client():
+    global _es_client
+    if _es_client is None:
+        _es_client = Elasticsearch([ES_HOST])
+    return _es_client
+
+
+def get_ch_client():
+    global _ch_client
+    if _ch_client is None:
+        _ch_client = Client(**CLICKHOUSE_CONFIG)
+    return _ch_client
+
+
+def code_to_ch_format(code: str) -> str:
+    if not code or len(code) != 6 or not code.isdigit():
+        return None
+    if code.startswith('6'):
+        return f"{code}.SH"
+    elif code.startswith('0') or code.startswith('3'):
+        return f"{code}.SZ"
+    return f"{code}.BJ"
+
+
+def time_to_slot(ts) -> str:
+    if isinstance(ts, str):
+        return ts
+    return ts.strftime('%H:%M')
+
+
+# ==================== 规则评分 ====================
+
+def score_rules_zscore(features: Dict) -> Tuple[float, List[str]]:
+    """基于 Z-Score 的规则评分"""
+    score = 0.0
+    triggered = []
+
+    alpha_z = abs(features.get('alpha_zscore', 0))
+    amt_z = features.get('amt_zscore', 0)
+    rank_z = abs(features.get('rank_zscore', 0))
+    mom_3m = features.get('momentum_3m', 0)
+    mom_5m = features.get('momentum_5m', 0)
+    limit_up = features.get('limit_up_ratio', 0)
+
+    # Alpha Z-Score
+    if alpha_z >= 4.0:
+        score += 25
+        triggered.append('alpha_extreme')
+    elif alpha_z >= 3.0:
+        score += 18
+        triggered.append('alpha_strong')
+    elif alpha_z >= 2.0:
+        score += 10
+        triggered.append('alpha_moderate')
+
+    # 成交额 Z-Score
+    if amt_z >= 4.0:
+        score += 20
+        triggered.append('amt_extreme')
+    elif amt_z >= 3.0:
+        score += 12
+        triggered.append('amt_strong')
+    elif amt_z >= 2.0:
+        score += 6
+        triggered.append('amt_moderate')
+
+    # 排名 Z-Score
+    if rank_z >= 3.0:
+        score += 15
+        triggered.append('rank_extreme')
+    elif rank_z >= 2.0:
+        score += 8
+        triggered.append('rank_strong')
+
+    # 动量（基于 Z-Score 的）
+    if mom_3m >= 1.0:
+        score += 12
+        triggered.append('momentum_3m_strong')
+    elif mom_3m >= 0.5:
+        score += 6
+        triggered.append('momentum_3m_moderate')
+
+    if mom_5m >= 1.5:
+        score += 10
+        triggered.append('momentum_5m_strong')
+
+    # 涨停比例
+    if limit_up >= 0.3:
+        score += 20
+        triggered.append('limit_up_extreme')
+    elif limit_up >= 0.15:
+        score += 12
+        triggered.append('limit_up_strong')
+    elif limit_up >= 0.08:
+        score += 5
+        triggered.append('limit_up_moderate')
+
+    # 组合规则
+    if alpha_z >= 2.0 and amt_z >= 2.0:
+        score += 15
+        triggered.append('combo_alpha_amt')
+
+    if alpha_z >= 2.0 and limit_up >= 0.1:
+        score += 12
+        triggered.append('combo_alpha_limitup')
+
+    return min(score, 100), triggered
+
+
+# ==================== 实时检测器 ====================
+
+class RealtimeDetectorV2:
+    """V2 实时异动检测器"""
+
+    def __init__(self, model_dir: str = MODEL_DIR, baseline_file: str = BASELINE_FILE):
+        print("初始化 V2 实时检测器...")
+
+        # 加载概念
+        self.concepts = self._load_concepts()
+        self.concept_stocks = {c['concept_id']: set(c['stocks']) for c in self.concepts}
+        self.all_stocks = list(set(s for c in self.concepts for s in c['stocks']))
+
+        # 加载基线
+        self.baselines = self._load_baselines(baseline_file)
+
+        # 加载模型
+        self.model, self.thresholds, self.device = self._load_model(model_dir)
+
+        # 状态管理
+        self.zscore_history = defaultdict(lambda: deque(maxlen=CONFIG['seq_len']))
+        self.anomaly_candidates = defaultdict(lambda: deque(maxlen=CONFIG['confirm_window']))
+        self.cooldown = {}
+
+        print(f"初始化完成: {len(self.concepts)} 概念, {len(self.baselines)} 基线")
+
+    def _load_concepts(self) -> List[dict]:
+        """从 ES 加载概念"""
+        es = get_es_client()
+        concepts = []
+
+        query = {"query": {"match_all": {}}, "size": 100, "_source": ["concept_id", "concept", "stocks"]}
+        resp = es.search(index=ES_INDEX, body=query, scroll='2m')
+        scroll_id = resp['_scroll_id']
+        hits = resp['hits']['hits']
+
+        while hits:
+            for hit in hits:
+                src = hit['_source']
+                stocks = [s['code'] for s in src.get('stocks', []) if isinstance(s, dict) and s.get('code')]
+                if stocks:
+                    concepts.append({
+                        'concept_id': src.get('concept_id'),
+                        'concept_name': src.get('concept'),
+                        'stocks': stocks
+                    })
+            resp = es.scroll(scroll_id=scroll_id, scroll='2m')
+            scroll_id = resp['_scroll_id']
+            hits = resp['hits']['hits']
+
+        es.clear_scroll(scroll_id=scroll_id)
+        return concepts
+
+    def _load_baselines(self, baseline_file: str) -> Dict:
+        """加载基线"""
+        if not os.path.exists(baseline_file):
+            print(f"警告: 基线文件不存在: {baseline_file}")
+            print("请先运行: python ml/update_baseline.py")
+            return {}
+
+        with open(baseline_file, 'rb') as f:
+            data = pickle.load(f)
+
+        print(f"基线日期范围: {data.get('date_range', 'unknown')}")
+        print(f"更新时间: {data.get('update_time', 'unknown')}")
+
+        return data.get('baselines', {})
+
+    def _load_model(self, model_dir: str):
+        """加载模型"""
+        device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+
+        config_path = os.path.join(model_dir, 'config.json')
+        model_path = os.path.join(model_dir, 'best_model.pt')
+        threshold_path = os.path.join(model_dir, 'thresholds.json')
+
+        if not os.path.exists(model_path):
+            print(f"警告: 模型不存在: {model_path}")
+            return None, {}, device
+
+        with open(config_path) as f:
+            config = json.load(f)
+
+        model = TransformerAutoencoder(**config['model'])
+        checkpoint = torch.load(model_path, map_location=device)
+        model.load_state_dict(checkpoint['model_state_dict'])
+        model.to(device)
+        model.eval()
+
+        thresholds = {}
+        if os.path.exists(threshold_path):
+            with open(threshold_path) as f:
+                thresholds = json.load(f)
+
+        print(f"模型已加载: {model_path}")
+        return model, thresholds, device
+
+    def _get_realtime_data(self, trade_date: str) -> pd.DataFrame:
+        """获取实时数据并计算原始特征"""
+        ch = get_ch_client()
+
+        # 获取股票数据
+        ch_codes = [code_to_ch_format(c) for c in self.all_stocks if code_to_ch_format(c)]
+        ch_codes_str = "','".join(ch_codes)
+
+        stock_query = f"""
+        SELECT code, timestamp, close, amt
+        FROM stock_minute
+        WHERE toDate(timestamp) = '{trade_date}'
+        AND code IN ('{ch_codes_str}')
+        ORDER BY timestamp
+        """
+        stock_result = ch.execute(stock_query)
+        if not stock_result:
+            return pd.DataFrame()
+
+        stock_df = pd.DataFrame(stock_result, columns=['ch_code', 'timestamp', 'close', 'amt'])
+
+        # 映射回原始代码
+        ch_to_code = {code_to_ch_format(c): c for c in self.all_stocks if code_to_ch_format(c)}
+        stock_df['code'] = stock_df['ch_code'].map(ch_to_code)
+        stock_df = stock_df.dropna(subset=['code'])
+
+        # 获取指数数据
+        index_query = f"""
+        SELECT timestamp, close
+        FROM index_minute
+        WHERE toDate(timestamp) = '{trade_date}'
+        AND code = '{REFERENCE_INDEX}'
+        ORDER BY timestamp
+        """
+        index_result = ch.execute(index_query)
+        if not index_result:
+            return pd.DataFrame()
+
+        index_df = pd.DataFrame(index_result, columns=['timestamp', 'close'])
+
+        # 获取昨收价
+        engine = get_mysql_engine()
+        codes_str = "','".join([c for c in self.all_stocks if c and len(c) == 6])
+
+        with engine.connect() as conn:
+            prev_result = conn.execute(text(f"""
+                SELECT SECCODE, F007N FROM ea_trade
+                WHERE SECCODE IN ('{codes_str}')
+                AND TRADEDATE = (SELECT MAX(TRADEDATE) FROM ea_trade WHERE TRADEDATE < '{trade_date}')
+                AND F007N > 0
+            """))
+            prev_close = {row[0]: float(row[1]) for row in prev_result if row[1]}
+
+            idx_result = conn.execute(text("""
+                SELECT F006N FROM ea_exchangetrade
+                WHERE INDEXCODE = '000001' AND TRADEDATE < :today
+                ORDER BY TRADEDATE DESC LIMIT 1
+            """), {'today': trade_date}).fetchone()
+            index_prev_close = float(idx_result[0]) if idx_result else None
+
+        if not prev_close or not index_prev_close:
+            return pd.DataFrame()
+
+        # 计算涨跌幅
+        stock_df['prev_close'] = stock_df['code'].map(prev_close)
+        stock_df = stock_df.dropna(subset=['prev_close'])
+        stock_df['change_pct'] = (stock_df['close'] - stock_df['prev_close']) / stock_df['prev_close'] * 100
+
+        index_df['change_pct'] = (index_df['close'] - index_prev_close) / index_prev_close * 100
+        index_map = dict(zip(index_df['timestamp'], index_df['change_pct']))
+
+        # 按时间聚合概念特征
+        results = []
+        for ts in sorted(stock_df['timestamp'].unique()):
+            ts_data = stock_df[stock_df['timestamp'] == ts]
+            idx_chg = index_map.get(ts, 0)
+
+            stock_chg = dict(zip(ts_data['code'], ts_data['change_pct']))
+            stock_amt = dict(zip(ts_data['code'], ts_data['amt']))
+
+            for cid, stocks in self.concept_stocks.items():
+                changes = [stock_chg[s] for s in stocks if s in stock_chg]
+                amts = [stock_amt.get(s, 0) for s in stocks if s in stock_chg]
+
+                if not changes:
+                    continue
+
+                alpha = np.mean(changes) - idx_chg
+                total_amt = sum(amts)
+                limit_up_ratio = sum(1 for c in changes if c >= CONFIG['limit_up_threshold']) / len(changes)
+
+                results.append({
+                    'concept_id': cid,
+                    'timestamp': ts,
+                    'time_slot': time_to_slot(ts),
+                    'alpha': alpha,
+                    'total_amt': total_amt,
+                    'limit_up_ratio': limit_up_ratio,
+                    'stock_count': len(changes),
+                })
+
+        if not results:
+            return pd.DataFrame()
+
+        df = pd.DataFrame(results)
+
+        # 计算排名
+        for ts in df['timestamp'].unique():
+            mask = df['timestamp'] == ts
+            df.loc[mask, 'rank_pct'] = df.loc[mask, 'alpha'].rank(pct=True)
+
+        return df
+
+    def _compute_zscore(self, concept_id: str, time_slot: str, alpha: float, total_amt: float, rank_pct: float) -> Optional[Dict]:
+        """计算 Z-Score"""
+        if concept_id not in self.baselines:
+            return None
+
+        baseline = self.baselines[concept_id]
+        if time_slot not in baseline:
+            return None
+
+        bl = baseline[time_slot]
+
+        alpha_z = np.clip((alpha - bl['alpha_mean']) / bl['alpha_std'], -5, 5)
+        amt_z = np.clip((total_amt - bl['amt_mean']) / bl['amt_std'], -5, 5)
+        rank_z = np.clip((rank_pct - bl['rank_mean']) / bl['rank_std'], -5, 5)
+
+        # 动量（基于 Z-Score 历史）
+        history = list(self.zscore_history[concept_id])
+        mom_3m = 0.0
+        mom_5m = 0.0
+
+        if len(history) >= 3:
+            recent = [h['alpha_zscore'] for h in history[-3:]]
+            older = [h['alpha_zscore'] for h in history[-6:-3]] if len(history) >= 6 else [history[0]['alpha_zscore']]
+            mom_3m = np.mean(recent) - np.mean(older)
+
+        if len(history) >= 5:
+            recent = [h['alpha_zscore'] for h in history[-5:]]
+            older = [h['alpha_zscore'] for h in history[-10:-5]] if len(history) >= 10 else [history[0]['alpha_zscore']]
+            mom_5m = np.mean(recent) - np.mean(older)
+
+        return {
+            'alpha_zscore': float(alpha_z),
+            'amt_zscore': float(amt_z),
+            'rank_zscore': float(rank_z),
+            'momentum_3m': float(mom_3m),
+            'momentum_5m': float(mom_5m),
+        }
+
+    @torch.no_grad()
+    def _ml_score(self, sequences: np.ndarray) -> np.ndarray:
+        """批量 ML 评分"""
+        if self.model is None or len(sequences) == 0:
+            return np.zeros(len(sequences))
+
+        x = torch.FloatTensor(sequences).to(self.device)
+        errors = self.model.compute_reconstruction_error(x, reduction='none')
+        last_errors = errors[:, -1].cpu().numpy()
+
+        # 转换为 0-100 分数
+        if self.thresholds:
+            p50 = self.thresholds.get('median', 0.001)
+            p99 = self.thresholds.get('p99', 0.05)
+            scores = 50 + (last_errors - p50) / (p99 - p50 + 1e-6) * 49
+        else:
+            scores = last_errors * 1000
+
+        return np.clip(scores, 0, 100)
+
+    def detect(self, trade_date: str = None) -> List[Dict]:
+        """检测指定日期的异动"""
+        trade_date = trade_date or datetime.now().strftime('%Y-%m-%d')
+        print(f"\n检测 {trade_date} 的异动...")
+
+        # 重置状态
+        self.zscore_history.clear()
+        self.anomaly_candidates.clear()
+        self.cooldown.clear()
+
+        # 获取数据
+        raw_df = self._get_realtime_data(trade_date)
+        if raw_df.empty:
+            print("无数据")
+            return []
+
+        timestamps = sorted(raw_df['timestamp'].unique())
+        print(f"时间点数: {len(timestamps)}")
+
+        all_alerts = []
+
+        for ts in timestamps:
+            ts_data = raw_df[raw_df['timestamp'] == ts]
+            time_slot = time_to_slot(ts)
+
+            candidates = []
+
+            # 计算每个概念的 Z-Score
+            for _, row in ts_data.iterrows():
+                cid = row['concept_id']
+
+                zscore = self._compute_zscore(
+                    cid, time_slot,
+                    row['alpha'], row['total_amt'], row['rank_pct']
+                )
+
+                if zscore is None:
+                    continue
+
+                # 完整特征
+                features = {
+                    **zscore,
+                    'alpha': row['alpha'],
+                    'limit_up_ratio': row['limit_up_ratio'],
+                    'total_amt': row['total_amt'],
+                }
+
+                # 更新历史
+                self.zscore_history[cid].append(zscore)
+
+                # 规则评分
+                rule_score, triggered = score_rules_zscore(features)
+
+                candidates.append((cid, features, rule_score, triggered))
+
+            if not candidates:
+                continue
+
+            # 批量 ML 评分
+            sequences = []
+            valid_candidates = []
+
+            for cid, features, rule_score, triggered in candidates:
+                history = list(self.zscore_history[cid])
+                if len(history) >= CONFIG['seq_len']:
+                    seq = np.array([[h['alpha_zscore'], h['amt_zscore'], h['rank_zscore'],
+                                     h['momentum_3m'], h['momentum_5m'], features['limit_up_ratio']]
+                                    for h in history])
+                    sequences.append(seq)
+                    valid_candidates.append((cid, features, rule_score, triggered))
+
+            if not sequences:
+                continue
+
+            ml_scores = self._ml_score(np.array(sequences))
+
+            # 融合 + 确认
+            for i, (cid, features, rule_score, triggered) in enumerate(valid_candidates):
+                ml_score = ml_scores[i]
+                final_score = CONFIG['rule_weight'] * rule_score + CONFIG['ml_weight'] * ml_score
+
+                # 判断触发
+                is_triggered = (
+                    rule_score >= CONFIG['rule_trigger'] or
+                    ml_score >= CONFIG['ml_trigger'] or
+                    final_score >= CONFIG['fusion_trigger']
+                )
+
+                self.anomaly_candidates[cid].append((ts, final_score))
+
+                if not is_triggered:
+                    continue
+
+                # 冷却期
+                if cid in self.cooldown:
+                    if (ts - self.cooldown[cid]).total_seconds() < CONFIG['cooldown_minutes'] * 60:
+                        continue
+
+                # 持续确认
+                recent = list(self.anomaly_candidates[cid])
+                if len(recent) < CONFIG['confirm_window']:
+                    continue
+
+                exceed = sum(1 for _, s in recent if s >= CONFIG['fusion_trigger'])
+                ratio = exceed / len(recent)
+
+                if ratio < CONFIG['confirm_ratio']:
+                    continue
+
+                # 确认异动！
+                self.cooldown[cid] = ts
+
+                alpha = features['alpha']
+                alert_type = 'surge_up' if alpha >= 1.5 else 'surge_down' if alpha <= -1.5 else 'surge'
+
+                concept_name = next((c['concept_name'] for c in self.concepts if c['concept_id'] == cid), cid)
+
+                all_alerts.append({
+                    'concept_id': cid,
+                    'concept_name': concept_name,
+                    'alert_time': ts,
+                    'trade_date': trade_date,
+                    'alert_type': alert_type,
+                    'final_score': float(final_score),
+                    'rule_score': float(rule_score),
+                    'ml_score': float(ml_score),
+                    'confirm_ratio': float(ratio),
+                    'alpha': float(alpha),
+                    'alpha_zscore': float(features['alpha_zscore']),
+                    'amt_zscore': float(features['amt_zscore']),
+                    'rank_zscore': float(features['rank_zscore']),
+                    'momentum_3m': float(features['momentum_3m']),
+                    'momentum_5m': float(features['momentum_5m']),
+                    'limit_up_ratio': float(features['limit_up_ratio']),
+                    'triggered_rules': triggered,
+                    'trigger_reason': f"融合({final_score:.0f})+确认({ratio:.0%})",
+                })
+
+        print(f"检测到 {len(all_alerts)} 个异动")
+        return all_alerts
+
+
+# ==================== 数据库存储 ====================
+
+def create_v2_table():
+    """创建 V2 异动表（如果不存在）"""
+    engine = get_mysql_engine()
+    with engine.begin() as conn:
+        conn.execute(text("""
+            CREATE TABLE IF NOT EXISTS concept_anomaly_v2 (
+                id INT AUTO_INCREMENT PRIMARY KEY,
+                concept_id VARCHAR(50) NOT NULL,
+                alert_time DATETIME NOT NULL,
+                trade_date DATE NOT NULL,
+                alert_type VARCHAR(20) NOT NULL,
+                final_score FLOAT,
+                rule_score FLOAT,
+                ml_score FLOAT,
+                trigger_reason VARCHAR(200),
+                confirm_ratio FLOAT,
+                alpha FLOAT,
+                alpha_zscore FLOAT,
+                amt_zscore FLOAT,
+                rank_zscore FLOAT,
+                momentum_3m FLOAT,
+                momentum_5m FLOAT,
+                limit_up_ratio FLOAT,
+                triggered_rules TEXT,
+                created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
+                UNIQUE KEY uk_concept_time (concept_id, alert_time),
+                INDEX idx_trade_date (trade_date),
+                INDEX idx_alert_type (alert_type)
+            ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4
+        """))
+    print("concept_anomaly_v2 表已就绪")
+
+
+def save_alerts_to_db(alerts: List[Dict]) -> int:
+    """保存异动到数据库"""
+    if not alerts:
+        return 0
+
+    engine = get_mysql_engine()
+    saved = 0
+
+    with engine.begin() as conn:
+        for alert in alerts:
+            try:
+                insert_sql = text("""
+                    INSERT IGNORE INTO concept_anomaly_v2
+                    (concept_id, alert_time, trade_date, alert_type,
+                     final_score, rule_score, ml_score, trigger_reason, confirm_ratio,
+                     alpha, alpha_zscore, amt_zscore, rank_zscore,
+                     momentum_3m, momentum_5m, limit_up_ratio, triggered_rules)
+                    VALUES
+                    (:concept_id, :alert_time, :trade_date, :alert_type,
+                     :final_score, :rule_score, :ml_score, :trigger_reason, :confirm_ratio,
+                     :alpha, :alpha_zscore, :amt_zscore, :rank_zscore,
+                     :momentum_3m, :momentum_5m, :limit_up_ratio, :triggered_rules)
+                """)
+
+                result = conn.execute(insert_sql, {
+                    'concept_id': alert['concept_id'],
+                    'alert_time': alert['alert_time'],
+                    'trade_date': alert['trade_date'],
+                    'alert_type': alert['alert_type'],
+                    'final_score': alert['final_score'],
+                    'rule_score': alert['rule_score'],
+                    'ml_score': alert['ml_score'],
+                    'trigger_reason': alert['trigger_reason'],
+                    'confirm_ratio': alert['confirm_ratio'],
+                    'alpha': alert['alpha'],
+                    'alpha_zscore': alert['alpha_zscore'],
+                    'amt_zscore': alert['amt_zscore'],
+                    'rank_zscore': alert['rank_zscore'],
+                    'momentum_3m': alert['momentum_3m'],
+                    'momentum_5m': alert['momentum_5m'],
+                    'limit_up_ratio': alert['limit_up_ratio'],
+                    'triggered_rules': json.dumps(alert.get('triggered_rules', []), ensure_ascii=False),
+                })
+
+                if result.rowcount > 0:
+                    saved += 1
+            except Exception as e:
+                print(f"保存失败: {alert['concept_id']} - {e}")
+
+    return saved
+
+
+def main():
+    import argparse
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--date', type=str, default=None)
+    parser.add_argument('--no-save', action='store_true', help='不保存到数据库，只打印')
+    args = parser.parse_args()
+
+    # 确保表存在
+    if not args.no_save:
+        create_v2_table()
+
+    detector = RealtimeDetectorV2()
+    alerts = detector.detect(args.date)
+
+    print(f"\n{'='*60}")
+    print(f"检测结果 ({len(alerts)} 个异动)")
+    print('='*60)
+
+    for a in alerts[:20]:
+        print(f"[{a['alert_time'].strftime('%H:%M') if hasattr(a['alert_time'], 'strftime') else a['alert_time']}] "
+              f"{a['concept_name']} | {a['alert_type']} | "
+              f"分数={a['final_score']:.0f} 确认={a['confirm_ratio']:.0%} "
+              f"α={a['alpha']:.2f}% αZ={a['alpha_zscore']:.1f}")
+
+    if len(alerts) > 20:
+        print(f"... 共 {len(alerts)} 个")
+
+    # 保存到数据库
+    if not args.no_save and alerts:
+        saved = save_alerts_to_db(alerts)
+        print(f"\n✅ 已保存 {saved}/{len(alerts)} 条到 concept_anomaly_v2 表")
+    elif args.no_save:
+        print(f"\n⚠️ --no-save 模式，未保存到数据库")
+
+
+if __name__ == "__main__":
+    main()
--- a/ml/train_v2.py
+++ b/ml/train_v2.py
@@ -0,0 +1,622 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+训练脚本 V2 - 基于 Z-Score 特征的 LSTM Autoencoder
+
+改进点：
+1. 使用 Z-Score 特征（相对于同时间片历史的偏离）
+2. 短序列：10分钟（不需要30分钟预热）
+3. 开盘即可检测：9:30 直接有特征
+
+模型输入：
+- 过去10分钟的 Z-Score 特征序列
+- 特征：alpha_zscore, amt_zscore, rank_zscore, momentum_3m, momentum_5m, limit_up_ratio
+
+模型学习：
+- 学习 Z-Score 序列的"正常演化模式"
+- 异动 = Z-Score 序列的异常演化（重构误差大）
+"""
+
+import os
+import sys
+import argparse
+import json
+from datetime import datetime
+from pathlib import Path
+from typing import List, Tuple, Dict
+
+import numpy as np
+import pandas as pd
+import torch
+import torch.nn as nn
+from torch.utils.data import Dataset, DataLoader
+from torch.optim import AdamW
+from torch.optim.lr_scheduler import CosineAnnealingWarmRestarts
+from tqdm import tqdm
+
+from model import TransformerAutoencoder, AnomalyDetectionLoss, count_parameters
+
+# 性能优化
+torch.backends.cudnn.benchmark = True
+torch.backends.cuda.matmul.allow_tf32 = True
+torch.backends.cudnn.allow_tf32 = True
+
+try:
+    import matplotlib
+    matplotlib.use('Agg')
+    import matplotlib.pyplot as plt
+    HAS_MATPLOTLIB = True
+except ImportError:
+    HAS_MATPLOTLIB = False
+
+
+# ==================== 配置 ====================
+
+TRAIN_CONFIG = {
+    # 数据配置（改进！）
+    'seq_len': 10,                    # 10分钟序列（不是30分钟！）
+    'stride': 2,                      # 步长2分钟
+
+    # 时间切分
+    'train_end_date': '2024-06-30',
+    'val_end_date': '2024-09-30',
+
+    # V2 特征（Z-Score 为主）
+    'features': [
+        'alpha_zscore',       # Alpha 的 Z-Score
+        'amt_zscore',         # 成交额的 Z-Score
+        'rank_zscore',        # 排名的 Z-Score
+        'momentum_3m',        # 3分钟动量
+        'momentum_5m',        # 5分钟动量
+        'limit_up_ratio',     # 涨停占比
+    ],
+
+    # 训练配置
+    'batch_size': 4096,
+    'epochs': 100,
+    'learning_rate': 3e-4,
+    'weight_decay': 1e-5,
+    'gradient_clip': 1.0,
+
+    # 早停配置
+    'patience': 15,
+    'min_delta': 1e-6,
+
+    # 模型配置（小型 LSTM）
+    'model': {
+        'n_features': 6,
+        'hidden_dim': 32,
+        'latent_dim': 4,
+        'num_layers': 1,
+        'dropout': 0.2,
+        'bidirectional': True,
+    },
+
+    # 标准化配置
+    'clip_value': 5.0,  # Z-Score 已经标准化，clip 5.0 足够
+
+    # 阈值配置
+    'threshold_percentiles': [90, 95, 99],
+}
+
+
+# ==================== 数据加载 ====================
+
+def load_data_by_date(data_dir: str, features: List[str]) -> Dict[str, pd.DataFrame]:
+    """按日期加载 V2 数据"""
+    data_path = Path(data_dir)
+    parquet_files = sorted(data_path.glob("features_v2_*.parquet"))
+
+    if not parquet_files:
+        raise FileNotFoundError(f"未找到 V2 数据文件: {data_dir}")
+
+    print(f"找到 {len(parquet_files)} 个 V2 数据文件")
+
+    date_data = {}
+
+    for pf in tqdm(parquet_files, desc="加载数据"):
+        date = pf.stem.replace('features_v2_', '')
+
+        df = pd.read_parquet(pf)
+
+        required_cols = features + ['concept_id', 'timestamp']
+        missing_cols = [c for c in required_cols if c not in df.columns]
+        if missing_cols:
+            print(f"警告: {date} 缺少列: {missing_cols}, 跳过")
+            continue
+
+        date_data[date] = df
+
+    print(f"成功加载 {len(date_data)} 天的数据")
+    return date_data
+
+
+def split_data_by_date(
+    date_data: Dict[str, pd.DataFrame],
+    train_end: str,
+    val_end: str
+) -> Tuple[Dict[str, pd.DataFrame], Dict[str, pd.DataFrame], Dict[str, pd.DataFrame]]:
+    """按日期划分数据集"""
+    train_data = {}
+    val_data = {}
+    test_data = {}
+
+    for date, df in date_data.items():
+        if date <= train_end:
+            train_data[date] = df
+        elif date <= val_end:
+            val_data[date] = df
+        else:
+            test_data[date] = df
+
+    print(f"数据集划分:")
+    print(f"  训练集: {len(train_data)} 天 (<= {train_end})")
+    print(f"  验证集: {len(val_data)} 天 ({train_end} ~ {val_end})")
+    print(f"  测试集: {len(test_data)} 天 (> {val_end})")
+
+    return train_data, val_data, test_data
+
+
+def build_sequences_by_concept(
+    date_data: Dict[str, pd.DataFrame],
+    features: List[str],
+    seq_len: int,
+    stride: int
+) -> np.ndarray:
+    """按概念分组构建序列"""
+    all_dfs = []
+    for date, df in sorted(date_data.items()):
+        df = df.copy()
+        df['date'] = date
+        all_dfs.append(df)
+
+    if not all_dfs:
+        return np.array([])
+
+    combined = pd.concat(all_dfs, ignore_index=True)
+    combined = combined.sort_values(['concept_id', 'date', 'timestamp'])
+
+    all_sequences = []
+    grouped = combined.groupby('concept_id', sort=False)
+    n_concepts = len(grouped)
+
+    for concept_id, concept_df in tqdm(grouped, desc="构建序列", total=n_concepts, leave=False):
+        feature_data = concept_df[features].values
+        feature_data = np.nan_to_num(feature_data, nan=0.0, posinf=0.0, neginf=0.0)
+
+        n_points = len(feature_data)
+        for start in range(0, n_points - seq_len + 1, stride):
+            seq = feature_data[start:start + seq_len]
+            all_sequences.append(seq)
+
+    if not all_sequences:
+        return np.array([])
+
+    sequences = np.array(all_sequences)
+    print(f"  构建序列: {len(sequences):,} 条 (来自 {n_concepts} 个概念)")
+
+    return sequences
+
+
+# ==================== 数据集 ====================
+
+class SequenceDataset(Dataset):
+    def __init__(self, sequences: np.ndarray):
+        self.sequences = torch.FloatTensor(sequences)
+
+    def __len__(self) -> int:
+        return len(self.sequences)
+
+    def __getitem__(self, idx: int) -> torch.Tensor:
+        return self.sequences[idx]
+
+
+# ==================== 训练器 ====================
+
+class EarlyStopping:
+    def __init__(self, patience: int = 10, min_delta: float = 1e-6):
+        self.patience = patience
+        self.min_delta = min_delta
+        self.counter = 0
+        self.best_loss = float('inf')
+        self.early_stop = False
+
+    def __call__(self, val_loss: float) -> bool:
+        if val_loss < self.best_loss - self.min_delta:
+            self.best_loss = val_loss
+            self.counter = 0
+        else:
+            self.counter += 1
+            if self.counter >= self.patience:
+                self.early_stop = True
+        return self.early_stop
+
+
+class Trainer:
+    def __init__(
+        self,
+        model: nn.Module,
+        train_loader: DataLoader,
+        val_loader: DataLoader,
+        config: Dict,
+        device: torch.device,
+        save_dir: str = 'ml/checkpoints_v2'
+    ):
+        self.model = model.to(device)
+        self.train_loader = train_loader
+        self.val_loader = val_loader
+        self.config = config
+        self.device = device
+        self.save_dir = Path(save_dir)
+        self.save_dir.mkdir(parents=True, exist_ok=True)
+
+        self.optimizer = AdamW(
+            model.parameters(),
+            lr=config['learning_rate'],
+            weight_decay=config['weight_decay']
+        )
+
+        self.scheduler = CosineAnnealingWarmRestarts(
+            self.optimizer, T_0=10, T_mult=2, eta_min=1e-6
+        )
+
+        self.criterion = AnomalyDetectionLoss()
+
+        self.early_stopping = EarlyStopping(
+            patience=config['patience'],
+            min_delta=config['min_delta']
+        )
+
+        self.use_amp = torch.cuda.is_available()
+        self.scaler = torch.cuda.amp.GradScaler() if self.use_amp else None
+        if self.use_amp:
+            print("  ✓ 启用 AMP 混合精度训练")
+
+        self.history = {'train_loss': [], 'val_loss': [], 'learning_rate': []}
+        self.best_val_loss = float('inf')
+
+    def train_epoch(self) -> float:
+        self.model.train()
+        total_loss = 0.0
+        n_batches = 0
+
+        pbar = tqdm(self.train_loader, desc="Training", leave=False)
+        for batch in pbar:
+            batch = batch.to(self.device, non_blocking=True)
+            self.optimizer.zero_grad(set_to_none=True)
+
+            if self.use_amp:
+                with torch.cuda.amp.autocast():
+                    output, latent = self.model(batch)
+                    loss, _ = self.criterion(output, batch, latent)
+
+                self.scaler.scale(loss).backward()
+                self.scaler.unscale_(self.optimizer)
+                torch.nn.utils.clip_grad_norm_(self.model.parameters(), self.config['gradient_clip'])
+                self.scaler.step(self.optimizer)
+                self.scaler.update()
+            else:
+                output, latent = self.model(batch)
+                loss, _ = self.criterion(output, batch, latent)
+                loss.backward()
+                torch.nn.utils.clip_grad_norm_(self.model.parameters(), self.config['gradient_clip'])
+                self.optimizer.step()
+
+            total_loss += loss.item()
+            n_batches += 1
+            pbar.set_postfix({'loss': f"{loss.item():.4f}"})
+
+        return total_loss / n_batches
+
+    @torch.no_grad()
+    def validate(self) -> float:
+        self.model.eval()
+        total_loss = 0.0
+        n_batches = 0
+
+        for batch in self.val_loader:
+            batch = batch.to(self.device, non_blocking=True)
+
+            if self.use_amp:
+                with torch.cuda.amp.autocast():
+                    output, latent = self.model(batch)
+                    loss, _ = self.criterion(output, batch, latent)
+            else:
+                output, latent = self.model(batch)
+                loss, _ = self.criterion(output, batch, latent)
+
+            total_loss += loss.item()
+            n_batches += 1
+
+        return total_loss / n_batches
+
+    def save_checkpoint(self, epoch: int, val_loss: float, is_best: bool = False):
+        model_to_save = self.model.module if hasattr(self.model, 'module') else self.model
+
+        checkpoint = {
+            'epoch': epoch,
+            'model_state_dict': model_to_save.state_dict(),
+            'optimizer_state_dict': self.optimizer.state_dict(),
+            'scheduler_state_dict': self.scheduler.state_dict(),
+            'val_loss': val_loss,
+            'config': self.config,
+        }
+
+        torch.save(checkpoint, self.save_dir / 'last_checkpoint.pt')
+
+        if is_best:
+            torch.save(checkpoint, self.save_dir / 'best_model.pt')
+            print(f"  ✓ 保存最佳模型 (val_loss: {val_loss:.6f})")
+
+    def train(self, epochs: int):
+        print(f"\n开始训练 ({epochs} epochs)...")
+        print(f"设备: {self.device}")
+        print(f"模型参数量: {count_parameters(self.model):,}")
+
+        for epoch in range(1, epochs + 1):
+            print(f"\nEpoch {epoch}/{epochs}")
+
+            train_loss = self.train_epoch()
+            val_loss = self.validate()
+
+            self.scheduler.step()
+            current_lr = self.optimizer.param_groups[0]['lr']
+
+            self.history['train_loss'].append(train_loss)
+            self.history['val_loss'].append(val_loss)
+            self.history['learning_rate'].append(current_lr)
+
+            print(f"  Train Loss: {train_loss:.6f}")
+            print(f"  Val Loss:   {val_loss:.6f}")
+            print(f"  LR:         {current_lr:.2e}")
+
+            is_best = val_loss < self.best_val_loss
+            if is_best:
+                self.best_val_loss = val_loss
+            self.save_checkpoint(epoch, val_loss, is_best)
+
+            if self.early_stopping(val_loss):
+                print(f"\n早停触发！")
+                break
+
+        print(f"\n训练完成！最佳验证损失: {self.best_val_loss:.6f}")
+        self.save_history()
+
+        return self.history
+
+    def save_history(self):
+        history_path = self.save_dir / 'training_history.json'
+        with open(history_path, 'w') as f:
+            json.dump(self.history, f, indent=2)
+        print(f"训练历史已保存: {history_path}")
+
+        if HAS_MATPLOTLIB:
+            self.plot_training_curves()
+
+    def plot_training_curves(self):
+        fig, axes = plt.subplots(1, 2, figsize=(14, 5))
+        epochs = range(1, len(self.history['train_loss']) + 1)
+
+        ax1 = axes[0]
+        ax1.plot(epochs, self.history['train_loss'], 'b-', label='Train Loss', linewidth=2)
+        ax1.plot(epochs, self.history['val_loss'], 'r-', label='Val Loss', linewidth=2)
+        ax1.set_xlabel('Epoch')
+        ax1.set_ylabel('Loss')
+        ax1.set_title('Training & Validation Loss (V2)')
+        ax1.legend()
+        ax1.grid(True, alpha=0.3)
+
+        best_epoch = np.argmin(self.history['val_loss']) + 1
+        best_val_loss = min(self.history['val_loss'])
+        ax1.axvline(x=best_epoch, color='g', linestyle='--', alpha=0.7)
+        ax1.scatter([best_epoch], [best_val_loss], color='g', s=100, zorder=5)
+
+        ax2 = axes[1]
+        ax2.plot(epochs, self.history['learning_rate'], 'g-', linewidth=2)
+        ax2.set_xlabel('Epoch')
+        ax2.set_ylabel('Learning Rate')
+        ax2.set_title('Learning Rate Schedule')
+        ax2.set_yscale('log')
+        ax2.grid(True, alpha=0.3)
+
+        plt.tight_layout()
+        plt.savefig(self.save_dir / 'training_curves.png', dpi=150, bbox_inches='tight')
+        plt.close()
+        print(f"训练曲线已保存")
+
+
+# ==================== 阈值计算 ====================
+
+@torch.no_grad()
+def compute_thresholds(
+    model: nn.Module,
+    data_loader: DataLoader,
+    device: torch.device,
+    percentiles: List[float] = [90, 95, 99]
+) -> Dict[str, float]:
+    """在验证集上计算阈值"""
+    model.eval()
+    all_errors = []
+
+    print("计算异动阈值...")
+    for batch in tqdm(data_loader, desc="Computing thresholds"):
+        batch = batch.to(device)
+        errors = model.compute_reconstruction_error(batch, reduction='none')
+        seq_errors = errors[:, -1]  # 最后一个时刻
+        all_errors.append(seq_errors.cpu().numpy())
+
+    all_errors = np.concatenate(all_errors)
+
+    thresholds = {}
+    for p in percentiles:
+        threshold = np.percentile(all_errors, p)
+        thresholds[f'p{p}'] = float(threshold)
+        print(f"  P{p}: {threshold:.6f}")
+
+    thresholds['mean'] = float(np.mean(all_errors))
+    thresholds['std'] = float(np.std(all_errors))
+    thresholds['median'] = float(np.median(all_errors))
+
+    return thresholds
+
+
+# ==================== 主函数 ====================
+
+def main():
+    parser = argparse.ArgumentParser(description='训练 V2 模型')
+    parser.add_argument('--data_dir', type=str, default='ml/data_v2', help='V2 数据目录')
+    parser.add_argument('--epochs', type=int, default=100)
+    parser.add_argument('--batch_size', type=int, default=4096)
+    parser.add_argument('--lr', type=float, default=3e-4)
+    parser.add_argument('--device', type=str, default='auto')
+    parser.add_argument('--save_dir', type=str, default='ml/checkpoints_v2')
+    parser.add_argument('--train_end', type=str, default='2024-06-30')
+    parser.add_argument('--val_end', type=str, default='2024-09-30')
+    parser.add_argument('--seq_len', type=int, default=10, help='序列长度（分钟）')
+
+    args = parser.parse_args()
+
+    config = TRAIN_CONFIG.copy()
+    config['batch_size'] = args.batch_size
+    config['epochs'] = args.epochs
+    config['learning_rate'] = args.lr
+    config['train_end_date'] = args.train_end
+    config['val_end_date'] = args.val_end
+    config['seq_len'] = args.seq_len
+
+    if args.device == 'auto':
+        device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+    else:
+        device = torch.device(args.device)
+
+    print("=" * 60)
+    print("概念异动检测模型训练 V2（Z-Score 特征）")
+    print("=" * 60)
+    print(f"数据目录: {args.data_dir}")
+    print(f"设备: {device}")
+    print(f"序列长度: {config['seq_len']} 分钟")
+    print(f"批次大小: {config['batch_size']}")
+    print(f"特征: {config['features']}")
+    print("=" * 60)
+
+    # 1. 加载数据
+    print("\n[1/6] 加载 V2 数据...")
+    date_data = load_data_by_date(args.data_dir, config['features'])
+
+    # 2. 划分数据集
+    print("\n[2/6] 划分数据集...")
+    train_data, val_data, test_data = split_data_by_date(
+        date_data, config['train_end_date'], config['val_end_date']
+    )
+
+    # 3. 构建序列
+    print("\n[3/6] 构建序列...")
+    print("训练集:")
+    train_sequences = build_sequences_by_concept(
+        train_data, config['features'], config['seq_len'], config['stride']
+    )
+    print("验证集:")
+    val_sequences = build_sequences_by_concept(
+        val_data, config['features'], config['seq_len'], config['stride']
+    )
+
+    if len(train_sequences) == 0:
+        print("错误: 训练集为空！")
+        return
+
+    # 4. 预处理
+    print("\n[4/6] 数据预处理...")
+    clip_value = config['clip_value']
+    print(f"  Z-Score 特征已标准化，截断: ±{clip_value}")
+
+    train_sequences = np.clip(train_sequences, -clip_value, clip_value)
+    if len(val_sequences) > 0:
+        val_sequences = np.clip(val_sequences, -clip_value, clip_value)
+
+    # 保存配置
+    save_dir = Path(args.save_dir)
+    save_dir.mkdir(parents=True, exist_ok=True)
+
+    with open(save_dir / 'config.json', 'w') as f:
+        json.dump(config, f, indent=2)
+
+    # 5. 创建数据加载器
+    print("\n[5/6] 创建数据加载器...")
+    train_dataset = SequenceDataset(train_sequences)
+    val_dataset = SequenceDataset(val_sequences) if len(val_sequences) > 0 else None
+
+    print(f"  训练序列: {len(train_dataset):,}")
+    print(f"  验证序列: {len(val_dataset) if val_dataset else 0:,}")
+
+    n_gpus = torch.cuda.device_count() if torch.cuda.is_available() else 1
+    num_workers = min(32, 8 * n_gpus) if sys.platform != 'win32' else 0
+
+    train_loader = DataLoader(
+        train_dataset,
+        batch_size=config['batch_size'],
+        shuffle=True,
+        num_workers=num_workers,
+        pin_memory=True,
+        prefetch_factor=4 if num_workers > 0 else None,
+        persistent_workers=True if num_workers > 0 else False,
+        drop_last=True
+    )
+
+    val_loader = DataLoader(
+        val_dataset,
+        batch_size=config['batch_size'] * 2,
+        shuffle=False,
+        num_workers=num_workers,
+        pin_memory=True,
+    ) if val_dataset else None
+
+    # 6. 训练
+    print("\n[6/6] 训练模型...")
+    model = TransformerAutoencoder(**config['model'])
+
+    if torch.cuda.device_count() > 1:
+        print(f"  使用 {torch.cuda.device_count()} 张 GPU 并行训练")
+        model = nn.DataParallel(model)
+
+    if val_loader is None:
+        print("警告: 验证集为空，使用训练集的 10% 作为验证")
+        split_idx = int(len(train_dataset) * 0.9)
+        train_subset = torch.utils.data.Subset(train_dataset, range(split_idx))
+        val_subset = torch.utils.data.Subset(train_dataset, range(split_idx, len(train_dataset)))
+        train_loader = DataLoader(train_subset, batch_size=config['batch_size'], shuffle=True, num_workers=num_workers, pin_memory=True)
+        val_loader = DataLoader(val_subset, batch_size=config['batch_size'], shuffle=False, num_workers=num_workers, pin_memory=True)
+
+    trainer = Trainer(
+        model=model,
+        train_loader=train_loader,
+        val_loader=val_loader,
+        config=config,
+        device=device,
+        save_dir=args.save_dir
+    )
+
+    trainer.train(config['epochs'])
+
+    # 计算阈值
+    print("\n[额外] 计算异动阈值...")
+    best_checkpoint = torch.load(save_dir / 'best_model.pt', map_location=device)
+
+    # 创建新的单 GPU 模型用于计算阈值（避免 DataParallel 问题）
+    threshold_model = TransformerAutoencoder(**config['model'])
+    threshold_model.load_state_dict(best_checkpoint['model_state_dict'])
+    threshold_model.to(device)
+    threshold_model.eval()
+
+    thresholds = compute_thresholds(threshold_model, val_loader, device, config['threshold_percentiles'])
+
+    with open(save_dir / 'thresholds.json', 'w') as f:
+        json.dump(thresholds, f, indent=2)
+
+    print("\n" + "=" * 60)
+    print("训练完成！")
+    print(f"模型保存位置: {args.save_dir}")
+    print("=" * 60)
+
+
+if __name__ == "__main__":
+    main()
--- a/ml/update_baseline.py
+++ b/ml/update_baseline.py
@@ -0,0 +1,132 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+每日盘后运行：更新滚动基线
+
+使用方法：
+    python ml/update_baseline.py
+
+建议加入 crontab，每天 15:30 后运行：
+    30 15 * * 1-5 cd /path/to/project && python ml/update_baseline.py
+"""
+
+import os
+import sys
+import pickle
+import pandas as pd
+import numpy as np
+from datetime import datetime, timedelta
+from pathlib import Path
+from tqdm import tqdm
+
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+from ml.prepare_data_v2 import (
+    get_all_concepts, get_trading_days, compute_raw_concept_features,
+    init_process_connections, CONFIG, RAW_CACHE_DIR, BASELINE_DIR
+)
+
+
+def update_rolling_baseline(baseline_days: int = 20):
+    """
+    更新滚动基线（用于实盘检测）
+
+    基线 = 最近 N 个交易日每个时间片的统计量
+    """
+    print("=" * 60)
+    print("更新滚动基线（用于实盘）")
+    print("=" * 60)
+
+    # 初始化连接
+    init_process_connections()
+
+    # 获取概念列表
+    concepts = get_all_concepts()
+    all_stocks = list(set(s for c in concepts for s in c['stocks']))
+
+    # 获取最近的交易日
+    today = datetime.now().strftime('%Y-%m-%d')
+    start_date = (datetime.now() - timedelta(days=60)).strftime('%Y-%m-%d')  # 多取一些
+
+    trading_days = get_trading_days(start_date, today)
+
+    if len(trading_days) < baseline_days:
+        print(f"错误：交易日不足 {baseline_days} 天")
+        return
+
+    # 只取最近 N 天
+    recent_days = trading_days[-baseline_days:]
+    print(f"使用 {len(recent_days)} 天数据: {recent_days[0]} ~ {recent_days[-1]}")
+
+    # 加载原始数据
+    all_data = []
+    for trade_date in tqdm(recent_days, desc="加载数据"):
+        cache_file = os.path.join(RAW_CACHE_DIR, f'raw_{trade_date}.parquet')
+
+        if os.path.exists(cache_file):
+            df = pd.read_parquet(cache_file)
+        else:
+            df = compute_raw_concept_features(trade_date, concepts, all_stocks)
+
+        if not df.empty:
+            all_data.append(df)
+
+    if not all_data:
+        print("错误：无数据")
+        return
+
+    combined = pd.concat(all_data, ignore_index=True)
+    print(f"总数据量: {len(combined):,} 条")
+
+    # 按概念计算基线
+    baselines = {}
+
+    for concept_id, group in tqdm(combined.groupby('concept_id'), desc="计算基线"):
+        baseline_dict = {}
+
+        for time_slot, slot_group in group.groupby('time_slot'):
+            if len(slot_group) < CONFIG['min_baseline_samples']:
+                continue
+
+            alpha_std = slot_group['alpha'].std()
+            amt_std = slot_group['total_amt'].std()
+            rank_std = slot_group['rank_pct'].std()
+
+            baseline_dict[time_slot] = {
+                'alpha_mean': float(slot_group['alpha'].mean()),
+                'alpha_std': float(max(alpha_std if pd.notna(alpha_std) else 1.0, 0.1)),
+                'amt_mean': float(slot_group['total_amt'].mean()),
+                'amt_std': float(max(amt_std if pd.notna(amt_std) else slot_group['total_amt'].mean() * 0.5, 1.0)),
+                'rank_mean': float(slot_group['rank_pct'].mean()),
+                'rank_std': float(max(rank_std if pd.notna(rank_std) else 0.2, 0.05)),
+                'sample_count': len(slot_group),
+            }
+
+        if baseline_dict:
+            baselines[concept_id] = baseline_dict
+
+    print(f"计算了 {len(baselines)} 个概念的基线")
+
+    # 保存
+    os.makedirs(BASELINE_DIR, exist_ok=True)
+    baseline_file = os.path.join(BASELINE_DIR, 'realtime_baseline.pkl')
+
+    with open(baseline_file, 'wb') as f:
+        pickle.dump({
+            'baselines': baselines,
+            'update_time': datetime.now().isoformat(),
+            'date_range': [recent_days[0], recent_days[-1]],
+            'baseline_days': baseline_days,
+        }, f)
+
+    print(f"基线已保存: {baseline_file}")
+    print("=" * 60)
+
+
+if __name__ == "__main__":
+    import argparse
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--days', type=int, default=20, help='基线天数')
+    args = parser.parse_args()
+
+    update_rolling_baseline(args.days)