#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ 概念涨跌幅实时更新服务 - 在交易时间段每分钟从ClickHouse获取最新分钟数据 - 计算涨跌幅后更新MySQL的concept_daily_stats表 - 支持叶子概念和母概念(lv1/lv2/lv3) """ import pandas as pd import numpy as np from datetime import datetime, timedelta from sqlalchemy import create_engine, text from elasticsearch import Elasticsearch from clickhouse_driver import Client import time import logging import json import os import hashlib import argparse # ==================== 配置 ==================== # MySQL配置 MYSQL_ENGINE = create_engine( "mysql+pymysql://root:Zzl5588161!@222.128.1.157:33060/stock", echo=False ) # Elasticsearch配置 ES_CLIENT = Elasticsearch(['http://222.128.1.157:19200']) INDEX_NAME = 'concept_library_v3' # ClickHouse配置 CLICKHOUSE_CONFIG = { 'host': '222.128.1.157', 'port': 18000, 'user': 'default', 'password': 'Zzl33818!', 'database': 'stock' } # 层级结构文件 HIERARCHY_FILE = 'concept_hierarchy_v3.json' # 交易时间配置 TRADING_HOURS = { 'morning_start': (9, 30), 'morning_end': (11, 30), 'afternoon_start': (13, 0), 'afternoon_end': (15, 0), } # ==================== 日志配置 ==================== logging.basicConfig( level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s', handlers=[ logging.FileHandler(f'concept_realtime_{datetime.now().strftime("%Y%m%d")}.log', encoding='utf-8'), logging.StreamHandler() ] ) logger = logging.getLogger(__name__) # ClickHouse客户端 ch_client = None def get_ch_client(): """获取ClickHouse客户端""" global ch_client if ch_client is None: ch_client = Client(**CLICKHOUSE_CONFIG) return ch_client def generate_id(name: str) -> str: """生成概念ID""" return hashlib.md5(name.encode('utf-8')).hexdigest()[:16] def code_to_ch_format(code: str) -> str: """将6位股票代码转换为ClickHouse格式(带后缀) 规则: - 6开头 -> .SH(上海) - 0或3开头 -> .SZ(深圳) - 其他 -> .BJ(北京) - 非6位数字的忽略(可能是港股) """ if not code or len(code) != 6 or not code.isdigit(): return None if code.startswith('6'): return f"{code}.SH" elif code.startswith('0') or code.startswith('3'): return f"{code}.SZ" else: return f"{code}.BJ" def ch_code_to_pure(ch_code: str) -> str: """将ClickHouse格式的股票代码转回纯6位代码""" if not ch_code: return None return ch_code.split('.')[0] # ==================== 概念数据获取 ==================== def get_all_concepts(): """从ES获取所有叶子概念及其股票列表""" concepts = [] query = { "query": {"match_all": {}}, "size": 100, "_source": ["concept_id", "concept", "stocks"] } resp = ES_CLIENT.search(index=INDEX_NAME, body=query, scroll='2m') scroll_id = resp['_scroll_id'] hits = resp['hits']['hits'] while len(hits) > 0: for hit in hits: source = hit['_source'] concept_info = { 'concept_id': source.get('concept_id'), 'concept_name': source.get('concept'), 'stocks': [], 'concept_type': 'leaf' } # v3索引的stocks字段是 [{name, code}, ...] if 'stocks' in source and isinstance(source['stocks'], list): for stock in source['stocks']: if isinstance(stock, dict) and 'code' in stock and stock['code']: concept_info['stocks'].append(stock['code']) if concept_info['stocks']: concepts.append(concept_info) resp = ES_CLIENT.scroll(scroll_id=scroll_id, scroll='2m') scroll_id = resp['_scroll_id'] hits = resp['hits']['hits'] ES_CLIENT.clear_scroll(scroll_id=scroll_id) return concepts def load_hierarchy_concepts(leaf_concepts: list) -> list: """加载层级结构,生成母概念(lv1/lv2/lv3)""" hierarchy_path = os.path.join(os.path.dirname(__file__), HIERARCHY_FILE) if not os.path.exists(hierarchy_path): logger.warning(f"层级文件不存在: {hierarchy_path}") return [] with open(hierarchy_path, 'r', encoding='utf-8') as f: hierarchy_data = json.load(f) # 建立概念名称到股票的映射 concept_to_stocks = {} for c in leaf_concepts: concept_to_stocks[c['concept_name']] = set(c['stocks']) parent_concepts = [] for lv1 in hierarchy_data.get('hierarchy', []): lv1_name = lv1.get('lv1', '') lv1_stocks = set() for child in lv1.get('children', []): lv2_name = child.get('lv2', '') lv2_stocks = set() if 'children' in child: for lv3_child in child.get('children', []): lv3_name = lv3_child.get('lv3', '') lv3_stocks = set() for concept_name in lv3_child.get('concepts', []): if concept_name in concept_to_stocks: lv3_stocks.update(concept_to_stocks[concept_name]) if lv3_stocks: parent_concepts.append({ 'concept_id': generate_id(f"lv3_{lv3_name}"), 'concept_name': f"[三级] {lv3_name}", 'stocks': list(lv3_stocks), 'concept_type': 'lv3' }) lv2_stocks.update(lv3_stocks) else: for concept_name in child.get('concepts', []): if concept_name in concept_to_stocks: lv2_stocks.update(concept_to_stocks[concept_name]) if lv2_stocks: parent_concepts.append({ 'concept_id': generate_id(f"lv2_{lv2_name}"), 'concept_name': f"[二级] {lv2_name}", 'stocks': list(lv2_stocks), 'concept_type': 'lv2' }) lv1_stocks.update(lv2_stocks) if lv1_stocks: parent_concepts.append({ 'concept_id': generate_id(f"lv1_{lv1_name}"), 'concept_name': f"[一级] {lv1_name}", 'stocks': list(lv1_stocks), 'concept_type': 'lv1' }) return parent_concepts # ==================== 基准价格获取 ==================== def get_base_prices(stock_codes: list, current_date: str) -> dict: """获取当日的昨收价作为基准(从ea_trade的F002N字段) ea_trade表字段说明: - F002N: 昨日收盘价 - F007N: 最近成交价(收盘价) - F010N: 涨跌幅 """ if not stock_codes: return {} # 过滤出有效的6位股票代码 valid_codes = [code for code in stock_codes if code and len(code) == 6 and code.isdigit()] if not valid_codes: return {} stock_codes_str = "','".join(valid_codes) # 获取当日数据中的昨收价(F002N) query = f""" SELECT SECCODE, F002N FROM ea_trade WHERE SECCODE IN ('{stock_codes_str}') AND TRADEDATE = ( SELECT MAX(TRADEDATE) FROM ea_trade WHERE TRADEDATE <= '{current_date}' ) AND F002N IS NOT NULL AND F002N > 0 """ try: with MYSQL_ENGINE.connect() as conn: result = conn.execute(text(query)) base_prices = {row[0]: float(row[1]) for row in result if row[1] and float(row[1]) > 0} logger.info(f"获取到 {len(base_prices)} 个基准价格") return base_prices except Exception as e: logger.error(f"获取基准价格失败: {e}") return {} # ==================== 实时价格获取 ==================== def get_latest_prices(stock_codes: list) -> dict: """从ClickHouse获取最新分钟数据的收盘价 Args: stock_codes: 纯6位股票代码列表(如 ['000001', '600000']) Returns: dict: {纯6位代码: {'close': 价格, 'timestamp': 时间}} """ if not stock_codes: return {} client = get_ch_client() # 转换为ClickHouse格式的代码(带后缀) ch_codes = [] code_mapping = {} # ch_code -> pure_code for code in stock_codes: ch_code = code_to_ch_format(code) if ch_code: ch_codes.append(ch_code) code_mapping[ch_code] = code if not ch_codes: logger.warning("没有有效的股票代码可查询") return {} ch_codes_str = "','".join(ch_codes) # 获取今日最新的分钟数据 query = f""" SELECT code, close, timestamp FROM ( SELECT code, close, timestamp, ROW_NUMBER() OVER (PARTITION BY code ORDER BY timestamp DESC) as rn FROM stock_minute WHERE code IN ('{ch_codes_str}') AND toDate(timestamp) = today() ) WHERE rn = 1 """ try: result = client.execute(query) if not result: return {} latest_prices = {} for row in result: ch_code, close, ts = row if close and close > 0: # 转回纯6位代码 pure_code = code_mapping.get(ch_code) if pure_code: latest_prices[pure_code] = { 'close': float(close), 'timestamp': ts } return latest_prices except Exception as e: logger.error(f"获取最新价格失败: {e}") return {} # ==================== 涨跌幅计算 ==================== def calculate_change_pct(base_prices: dict, latest_prices: dict) -> dict: """计算涨跌幅""" changes = {} for code, latest in latest_prices.items(): if code in base_prices and base_prices[code] > 0: base = base_prices[code] close = latest['close'] change_pct = (close - base) / base * 100 changes[code] = round(change_pct, 4) return changes def calculate_concept_stats(concepts: list, stock_changes: dict, trade_date: str) -> list: """计算所有概念的涨跌幅统计""" stats = [] for concept in concepts: concept_id = concept['concept_id'] concept_name = concept['concept_name'] stock_codes = concept['stocks'] concept_type = concept.get('concept_type', 'leaf') # 获取该概念股票的涨跌幅 changes = [stock_changes[code] for code in stock_codes if code in stock_changes] if not changes: continue avg_change_pct = round(np.mean(changes), 4) stock_count = len(changes) stats.append({ 'concept_id': concept_id, 'concept_name': concept_name, 'trade_date': trade_date, 'avg_change_pct': avg_change_pct, 'stock_count': stock_count, 'concept_type': concept_type }) return stats # ==================== MySQL更新 ==================== def update_mysql_stats(stats: list): """更新MySQL的concept_daily_stats表""" if not stats: return 0 with MYSQL_ENGINE.begin() as conn: updated = 0 for item in stats: upsert_sql = text(""" REPLACE INTO concept_daily_stats (concept_id, concept_name, trade_date, avg_change_pct, stock_count, concept_type) VALUES (:concept_id, :concept_name, :trade_date, :avg_change_pct, :stock_count, :concept_type) """) conn.execute(upsert_sql, item) updated += 1 return updated # ==================== 交易时间判断 ==================== def is_trading_time() -> bool: """判断当前是否为交易时间""" now = datetime.now() weekday = now.weekday() # 周末不交易 if weekday >= 5: return False hour, minute = now.hour, now.minute current_time = hour * 60 + minute # 上午 9:30 - 11:30 morning_start = 9 * 60 + 30 morning_end = 11 * 60 + 30 # 下午 13:00 - 15:00 afternoon_start = 13 * 60 afternoon_end = 15 * 60 return (morning_start <= current_time <= morning_end) or \ (afternoon_start <= current_time <= afternoon_end) def get_next_update_time() -> int: """获取距离下次更新的秒数""" now = datetime.now() if is_trading_time(): # 交易时间内,等到下一分钟 return 60 - now.second else: # 非交易时间 hour, minute = now.hour, now.minute # 计算距离下次交易开始的时间 if hour < 9 or (hour == 9 and minute < 30): # 等到9:30 target = now.replace(hour=9, minute=30, second=0, microsecond=0) elif (hour == 11 and minute >= 30) or hour == 12: # 等到13:00 target = now.replace(hour=13, minute=0, second=0, microsecond=0) elif hour >= 15: # 等到明天9:30 target = (now + timedelta(days=1)).replace(hour=9, minute=30, second=0, microsecond=0) else: target = now + timedelta(minutes=1) wait_seconds = (target - now).total_seconds() return max(60, int(wait_seconds)) # ==================== 主运行逻辑 ==================== def run_once(concepts: list, all_stocks: list) -> int: """执行一次更新""" now = datetime.now() trade_date = now.strftime('%Y-%m-%d') # 获取基准价格(昨日收盘价) base_prices = get_base_prices(all_stocks, trade_date) if not base_prices: logger.warning("无法获取基准价格") return 0 # 获取最新价格 latest_prices = get_latest_prices(all_stocks) if not latest_prices: logger.warning("无法获取最新价格") return 0 # 计算涨跌幅 stock_changes = calculate_change_pct(base_prices, latest_prices) if not stock_changes: logger.warning("无涨跌幅数据") return 0 logger.info(f"获取到 {len(stock_changes)} 只股票的涨跌幅") # 计算概念统计 stats = calculate_concept_stats(concepts, stock_changes, trade_date) logger.info(f"计算了 {len(stats)} 个概念的涨跌幅") # 更新MySQL updated = update_mysql_stats(stats) logger.info(f"更新了 {updated} 条记录到MySQL") return updated def run_realtime(): """实时更新主循环""" logger.info("=" * 60) logger.info("启动概念涨跌幅实时更新服务") logger.info("=" * 60) # 加载概念数据 logger.info("加载概念数据...") leaf_concepts = get_all_concepts() logger.info(f"获取到 {len(leaf_concepts)} 个叶子概念") parent_concepts = load_hierarchy_concepts(leaf_concepts) logger.info(f"生成了 {len(parent_concepts)} 个母概念") all_concepts = leaf_concepts + parent_concepts logger.info(f"总计 {len(all_concepts)} 个概念") # 收集所有股票代码 all_stocks = set() for c in all_concepts: all_stocks.update(c['stocks']) all_stocks = list(all_stocks) logger.info(f"监控 {len(all_stocks)} 只股票") last_concept_update = datetime.now() while True: try: now = datetime.now() # 每小时重新加载概念数据 if (now - last_concept_update).total_seconds() > 3600: logger.info("重新加载概念数据...") leaf_concepts = get_all_concepts() parent_concepts = load_hierarchy_concepts(leaf_concepts) all_concepts = leaf_concepts + parent_concepts all_stocks = set() for c in all_concepts: all_stocks.update(c['stocks']) all_stocks = list(all_stocks) last_concept_update = now logger.info(f"更新完成: {len(all_concepts)} 个概念, {len(all_stocks)} 只股票") # 检查是否交易时间 if not is_trading_time(): wait_sec = get_next_update_time() wait_min = wait_sec // 60 logger.info(f"非交易时间,等待 {wait_min} 分钟后重试...") time.sleep(min(wait_sec, 300)) # 最多等5分钟再检查 continue # 执行更新 logger.info(f"\n{'=' * 40}") logger.info(f"更新时间: {now.strftime('%Y-%m-%d %H:%M:%S')}") updated = run_once(all_concepts, all_stocks) # 等待下一分钟 sleep_sec = 60 - datetime.now().second logger.info(f"完成,等待 {sleep_sec} 秒后继续...") time.sleep(sleep_sec) except KeyboardInterrupt: logger.info("\n收到退出信号,停止服务...") break except Exception as e: logger.error(f"发生错误: {e}") import traceback traceback.print_exc() time.sleep(60) def run_single(): """单次运行(不循环)""" logger.info("单次更新模式") leaf_concepts = get_all_concepts() parent_concepts = load_hierarchy_concepts(leaf_concepts) all_concepts = leaf_concepts + parent_concepts all_stocks = set() for c in all_concepts: all_stocks.update(c['stocks']) all_stocks = list(all_stocks) logger.info(f"概念数: {len(all_concepts)}, 股票数: {len(all_stocks)}") updated = run_once(all_concepts, all_stocks) logger.info(f"更新完成: {updated} 条记录") def show_status(): """显示当前状态""" print("\n" + "=" * 60) print("概念涨跌幅实时更新服务 - 状态") print("=" * 60) # 当前时间 now = datetime.now() print(f"\n当前时间: {now.strftime('%Y-%m-%d %H:%M:%S')}") print(f"是否交易时间: {'是' if is_trading_time() else '否'}") # MySQL数据状态 print("\nMySQL数据状态:") try: with MYSQL_ENGINE.connect() as conn: # 今日数据量 result = conn.execute(text(""" SELECT concept_type, COUNT(*) as cnt FROM concept_daily_stats WHERE trade_date = CURDATE() GROUP BY concept_type """)) rows = list(result) if rows: print(" 今日数据:") for row in rows: print(f" {row[0]}: {row[1]} 条") else: print(" 今日暂无数据") # 最新更新时间 result = conn.execute(text(""" SELECT MAX(updated_at) FROM concept_daily_stats WHERE trade_date = CURDATE() """)) row = result.fetchone() if row and row[0]: print(f" 最后更新: {row[0]}") except Exception as e: print(f" 查询失败: {e}") # ClickHouse数据状态 print("\nClickHouse数据状态:") try: client = get_ch_client() result = client.execute(""" SELECT COUNT(*), MAX(timestamp) FROM stock_minute WHERE toDate(timestamp) = today() """) if result: count, max_ts = result[0] print(f" 今日分钟数据: {count:,} 条") print(f" 最新时间戳: {max_ts}") except Exception as e: print(f" 查询失败: {e}") # 今日涨跌幅TOP10 print("\n今日涨跌幅 TOP10:") try: with MYSQL_ENGINE.connect() as conn: result = conn.execute(text(""" SELECT concept_name, avg_change_pct, stock_count, concept_type FROM concept_daily_stats WHERE trade_date = CURDATE() AND concept_type = 'leaf' ORDER BY avg_change_pct DESC LIMIT 10 """)) rows = list(result) if rows: print(f" {'概念':<25} | {'涨跌幅':>8} | {'股票数':>6}") print(" " + "-" * 50) for row in rows: name = row[0][:25] if len(row[0]) > 25 else row[0] print(f" {name:<25} | {row[1]:>7.2f}% | {row[2]:>6}") else: print(" 暂无数据") except Exception as e: print(f" 查询失败: {e}") # ==================== 主函数 ==================== def main(): parser = argparse.ArgumentParser(description='概念涨跌幅实时更新服务') parser.add_argument('command', nargs='?', default='realtime', choices=['realtime', 'once', 'status'], help='命令: realtime(实时运行), once(单次运行), status(状态查看)') args = parser.parse_args() if args.command == 'realtime': run_realtime() elif args.command == 'once': run_single() elif args.command == 'status': show_status() if __name__ == "__main__": main()