Files
vf_react/concept_quota_realtime.py
2025-12-09 08:31:18 +08:00

682 lines
21 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
概念涨跌幅实时更新服务
- 在交易时间段每分钟从ClickHouse获取最新分钟数据
- 计算涨跌幅后更新MySQL的concept_daily_stats表
- 支持叶子概念和母概念lv1/lv2/lv3
"""
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
from sqlalchemy import create_engine, text
from elasticsearch import Elasticsearch
from clickhouse_driver import Client
import time
import logging
import json
import os
import hashlib
import argparse
# ==================== 配置 ====================
# MySQL配置
MYSQL_ENGINE = create_engine(
"mysql+pymysql://root:Zzl5588161!@222.128.1.157:33060/stock",
echo=False
)
# Elasticsearch配置
ES_CLIENT = Elasticsearch(['http://222.128.1.157:19200'])
INDEX_NAME = 'concept_library_v3'
# ClickHouse配置
CLICKHOUSE_CONFIG = {
'host': '222.128.1.157',
'port': 18000,
'user': 'default',
'password': 'Zzl33818!',
'database': 'stock'
}
# 层级结构文件
HIERARCHY_FILE = 'concept_hierarchy_v3.json'
# 交易时间配置
TRADING_HOURS = {
'morning_start': (9, 30),
'morning_end': (11, 30),
'afternoon_start': (13, 0),
'afternoon_end': (15, 0),
}
# ==================== 日志配置 ====================
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(levelname)s - %(message)s',
handlers=[
logging.FileHandler(f'concept_realtime_{datetime.now().strftime("%Y%m%d")}.log', encoding='utf-8'),
logging.StreamHandler()
]
)
logger = logging.getLogger(__name__)
# ClickHouse客户端
ch_client = None
def get_ch_client():
"""获取ClickHouse客户端"""
global ch_client
if ch_client is None:
ch_client = Client(**CLICKHOUSE_CONFIG)
return ch_client
def generate_id(name: str) -> str:
"""生成概念ID"""
return hashlib.md5(name.encode('utf-8')).hexdigest()[:16]
def code_to_ch_format(code: str) -> str:
"""将6位股票代码转换为ClickHouse格式带后缀
规则:
- 6开头 -> .SH上海
- 0或3开头 -> .SZ深圳
- 其他 -> .BJ北京
- 非6位数字的忽略可能是港股
"""
if not code or len(code) != 6 or not code.isdigit():
return None
if code.startswith('6'):
return f"{code}.SH"
elif code.startswith('0') or code.startswith('3'):
return f"{code}.SZ"
else:
return f"{code}.BJ"
def ch_code_to_pure(ch_code: str) -> str:
"""将ClickHouse格式的股票代码转回纯6位代码"""
if not ch_code:
return None
return ch_code.split('.')[0]
# ==================== 概念数据获取 ====================
def get_all_concepts():
"""从ES获取所有叶子概念及其股票列表"""
concepts = []
query = {
"query": {"match_all": {}},
"size": 100,
"_source": ["concept_id", "concept", "stocks"]
}
resp = ES_CLIENT.search(index=INDEX_NAME, body=query, scroll='2m')
scroll_id = resp['_scroll_id']
hits = resp['hits']['hits']
while len(hits) > 0:
for hit in hits:
source = hit['_source']
concept_info = {
'concept_id': source.get('concept_id'),
'concept_name': source.get('concept'),
'stocks': [],
'concept_type': 'leaf'
}
# v3索引的stocks字段是 [{name, code}, ...]
if 'stocks' in source and isinstance(source['stocks'], list):
for stock in source['stocks']:
if isinstance(stock, dict) and 'code' in stock and stock['code']:
concept_info['stocks'].append(stock['code'])
if concept_info['stocks']:
concepts.append(concept_info)
resp = ES_CLIENT.scroll(scroll_id=scroll_id, scroll='2m')
scroll_id = resp['_scroll_id']
hits = resp['hits']['hits']
ES_CLIENT.clear_scroll(scroll_id=scroll_id)
return concepts
def load_hierarchy_concepts(leaf_concepts: list) -> list:
"""加载层级结构生成母概念lv1/lv2/lv3"""
hierarchy_path = os.path.join(os.path.dirname(__file__), HIERARCHY_FILE)
if not os.path.exists(hierarchy_path):
logger.warning(f"层级文件不存在: {hierarchy_path}")
return []
with open(hierarchy_path, 'r', encoding='utf-8') as f:
hierarchy_data = json.load(f)
# 建立概念名称到股票的映射
concept_to_stocks = {}
for c in leaf_concepts:
concept_to_stocks[c['concept_name']] = set(c['stocks'])
parent_concepts = []
for lv1 in hierarchy_data.get('hierarchy', []):
lv1_name = lv1.get('lv1', '')
lv1_stocks = set()
for child in lv1.get('children', []):
lv2_name = child.get('lv2', '')
lv2_stocks = set()
if 'children' in child:
for lv3_child in child.get('children', []):
lv3_name = lv3_child.get('lv3', '')
lv3_stocks = set()
for concept_name in lv3_child.get('concepts', []):
if concept_name in concept_to_stocks:
lv3_stocks.update(concept_to_stocks[concept_name])
if lv3_stocks:
parent_concepts.append({
'concept_id': generate_id(f"lv3_{lv3_name}"),
'concept_name': f"[三级] {lv3_name}",
'stocks': list(lv3_stocks),
'concept_type': 'lv3'
})
lv2_stocks.update(lv3_stocks)
else:
for concept_name in child.get('concepts', []):
if concept_name in concept_to_stocks:
lv2_stocks.update(concept_to_stocks[concept_name])
if lv2_stocks:
parent_concepts.append({
'concept_id': generate_id(f"lv2_{lv2_name}"),
'concept_name': f"[二级] {lv2_name}",
'stocks': list(lv2_stocks),
'concept_type': 'lv2'
})
lv1_stocks.update(lv2_stocks)
if lv1_stocks:
parent_concepts.append({
'concept_id': generate_id(f"lv1_{lv1_name}"),
'concept_name': f"[一级] {lv1_name}",
'stocks': list(lv1_stocks),
'concept_type': 'lv1'
})
return parent_concepts
# ==================== 基准价格获取 ====================
def get_base_prices(stock_codes: list, current_date: str) -> dict:
"""获取当日的昨收价作为基准从ea_trade的F002N字段
ea_trade表字段说明
- F002N: 昨日收盘价
- F007N: 最近成交价(收盘价)
- F010N: 涨跌幅
"""
if not stock_codes:
return {}
# 过滤出有效的6位股票代码
valid_codes = [code for code in stock_codes if code and len(code) == 6 and code.isdigit()]
if not valid_codes:
return {}
stock_codes_str = "','".join(valid_codes)
# 获取当日数据中的昨收价F002N
query = f"""
SELECT SECCODE, F002N
FROM ea_trade
WHERE SECCODE IN ('{stock_codes_str}')
AND TRADEDATE = (
SELECT MAX(TRADEDATE)
FROM ea_trade
WHERE TRADEDATE <= '{current_date}'
)
AND F002N IS NOT NULL AND F002N > 0
"""
try:
with MYSQL_ENGINE.connect() as conn:
result = conn.execute(text(query))
base_prices = {row[0]: float(row[1]) for row in result if row[1] and float(row[1]) > 0}
logger.info(f"获取到 {len(base_prices)} 个基准价格")
return base_prices
except Exception as e:
logger.error(f"获取基准价格失败: {e}")
return {}
# ==================== 实时价格获取 ====================
def get_latest_prices(stock_codes: list) -> dict:
"""从ClickHouse获取最新分钟数据的收盘价
Args:
stock_codes: 纯6位股票代码列表如 ['000001', '600000']
Returns:
dict: {纯6位代码: {'close': 价格, 'timestamp': 时间}}
"""
if not stock_codes:
return {}
client = get_ch_client()
# 转换为ClickHouse格式的代码带后缀
ch_codes = []
code_mapping = {} # ch_code -> pure_code
for code in stock_codes:
ch_code = code_to_ch_format(code)
if ch_code:
ch_codes.append(ch_code)
code_mapping[ch_code] = code
if not ch_codes:
logger.warning("没有有效的股票代码可查询")
return {}
ch_codes_str = "','".join(ch_codes)
# 获取今日最新的分钟数据
query = f"""
SELECT code, close, timestamp
FROM (
SELECT code, close, timestamp,
ROW_NUMBER() OVER (PARTITION BY code ORDER BY timestamp DESC) as rn
FROM stock_minute
WHERE code IN ('{ch_codes_str}')
AND toDate(timestamp) = today()
)
WHERE rn = 1
"""
try:
result = client.execute(query)
if not result:
return {}
latest_prices = {}
for row in result:
ch_code, close, ts = row
if close and close > 0:
# 转回纯6位代码
pure_code = code_mapping.get(ch_code)
if pure_code:
latest_prices[pure_code] = {
'close': float(close),
'timestamp': ts
}
return latest_prices
except Exception as e:
logger.error(f"获取最新价格失败: {e}")
return {}
# ==================== 涨跌幅计算 ====================
def calculate_change_pct(base_prices: dict, latest_prices: dict) -> dict:
"""计算涨跌幅"""
changes = {}
for code, latest in latest_prices.items():
if code in base_prices and base_prices[code] > 0:
base = base_prices[code]
close = latest['close']
change_pct = (close - base) / base * 100
changes[code] = round(change_pct, 4)
return changes
def calculate_concept_stats(concepts: list, stock_changes: dict, trade_date: str) -> list:
"""计算所有概念的涨跌幅统计"""
stats = []
for concept in concepts:
concept_id = concept['concept_id']
concept_name = concept['concept_name']
stock_codes = concept['stocks']
concept_type = concept.get('concept_type', 'leaf')
# 获取该概念股票的涨跌幅
changes = [stock_changes[code] for code in stock_codes if code in stock_changes]
if not changes:
continue
avg_change_pct = round(np.mean(changes), 4)
stock_count = len(changes)
stats.append({
'concept_id': concept_id,
'concept_name': concept_name,
'trade_date': trade_date,
'avg_change_pct': avg_change_pct,
'stock_count': stock_count,
'concept_type': concept_type
})
return stats
# ==================== MySQL更新 ====================
def update_mysql_stats(stats: list):
"""更新MySQL的concept_daily_stats表"""
if not stats:
return 0
with MYSQL_ENGINE.begin() as conn:
updated = 0
for item in stats:
upsert_sql = text("""
REPLACE INTO concept_daily_stats
(concept_id, concept_name, trade_date, avg_change_pct, stock_count, concept_type)
VALUES (:concept_id, :concept_name, :trade_date, :avg_change_pct, :stock_count, :concept_type)
""")
conn.execute(upsert_sql, item)
updated += 1
return updated
# ==================== 交易时间判断 ====================
def is_trading_time() -> bool:
"""判断当前是否为交易时间"""
now = datetime.now()
weekday = now.weekday()
# 周末不交易
if weekday >= 5:
return False
hour, minute = now.hour, now.minute
current_time = hour * 60 + minute
# 上午 9:30 - 11:30
morning_start = 9 * 60 + 30
morning_end = 11 * 60 + 30
# 下午 13:00 - 15:00
afternoon_start = 13 * 60
afternoon_end = 15 * 60
return (morning_start <= current_time <= morning_end) or \
(afternoon_start <= current_time <= afternoon_end)
def get_next_update_time() -> int:
"""获取距离下次更新的秒数"""
now = datetime.now()
if is_trading_time():
# 交易时间内,等到下一分钟
return 60 - now.second
else:
# 非交易时间
hour, minute = now.hour, now.minute
# 计算距离下次交易开始的时间
if hour < 9 or (hour == 9 and minute < 30):
# 等到9:30
target = now.replace(hour=9, minute=30, second=0, microsecond=0)
elif (hour == 11 and minute >= 30) or hour == 12:
# 等到13:00
target = now.replace(hour=13, minute=0, second=0, microsecond=0)
elif hour >= 15:
# 等到明天9:30
target = (now + timedelta(days=1)).replace(hour=9, minute=30, second=0, microsecond=0)
else:
target = now + timedelta(minutes=1)
wait_seconds = (target - now).total_seconds()
return max(60, int(wait_seconds))
# ==================== 主运行逻辑 ====================
def run_once(concepts: list, all_stocks: list) -> int:
"""执行一次更新"""
now = datetime.now()
trade_date = now.strftime('%Y-%m-%d')
# 获取基准价格(昨日收盘价)
base_prices = get_base_prices(all_stocks, trade_date)
if not base_prices:
logger.warning("无法获取基准价格")
return 0
# 获取最新价格
latest_prices = get_latest_prices(all_stocks)
if not latest_prices:
logger.warning("无法获取最新价格")
return 0
# 计算涨跌幅
stock_changes = calculate_change_pct(base_prices, latest_prices)
if not stock_changes:
logger.warning("无涨跌幅数据")
return 0
logger.info(f"获取到 {len(stock_changes)} 只股票的涨跌幅")
# 计算概念统计
stats = calculate_concept_stats(concepts, stock_changes, trade_date)
logger.info(f"计算了 {len(stats)} 个概念的涨跌幅")
# 更新MySQL
updated = update_mysql_stats(stats)
logger.info(f"更新了 {updated} 条记录到MySQL")
return updated
def run_realtime():
"""实时更新主循环"""
logger.info("=" * 60)
logger.info("启动概念涨跌幅实时更新服务")
logger.info("=" * 60)
# 加载概念数据
logger.info("加载概念数据...")
leaf_concepts = get_all_concepts()
logger.info(f"获取到 {len(leaf_concepts)} 个叶子概念")
parent_concepts = load_hierarchy_concepts(leaf_concepts)
logger.info(f"生成了 {len(parent_concepts)} 个母概念")
all_concepts = leaf_concepts + parent_concepts
logger.info(f"总计 {len(all_concepts)} 个概念")
# 收集所有股票代码
all_stocks = set()
for c in all_concepts:
all_stocks.update(c['stocks'])
all_stocks = list(all_stocks)
logger.info(f"监控 {len(all_stocks)} 只股票")
last_concept_update = datetime.now()
while True:
try:
now = datetime.now()
# 每小时重新加载概念数据
if (now - last_concept_update).total_seconds() > 3600:
logger.info("重新加载概念数据...")
leaf_concepts = get_all_concepts()
parent_concepts = load_hierarchy_concepts(leaf_concepts)
all_concepts = leaf_concepts + parent_concepts
all_stocks = set()
for c in all_concepts:
all_stocks.update(c['stocks'])
all_stocks = list(all_stocks)
last_concept_update = now
logger.info(f"更新完成: {len(all_concepts)} 个概念, {len(all_stocks)} 只股票")
# 检查是否交易时间
if not is_trading_time():
wait_sec = get_next_update_time()
wait_min = wait_sec // 60
logger.info(f"非交易时间,等待 {wait_min} 分钟后重试...")
time.sleep(min(wait_sec, 300)) # 最多等5分钟再检查
continue
# 执行更新
logger.info(f"\n{'=' * 40}")
logger.info(f"更新时间: {now.strftime('%Y-%m-%d %H:%M:%S')}")
updated = run_once(all_concepts, all_stocks)
# 等待下一分钟
sleep_sec = 60 - datetime.now().second
logger.info(f"完成,等待 {sleep_sec} 秒后继续...")
time.sleep(sleep_sec)
except KeyboardInterrupt:
logger.info("\n收到退出信号,停止服务...")
break
except Exception as e:
logger.error(f"发生错误: {e}")
import traceback
traceback.print_exc()
time.sleep(60)
def run_single():
"""单次运行(不循环)"""
logger.info("单次更新模式")
leaf_concepts = get_all_concepts()
parent_concepts = load_hierarchy_concepts(leaf_concepts)
all_concepts = leaf_concepts + parent_concepts
all_stocks = set()
for c in all_concepts:
all_stocks.update(c['stocks'])
all_stocks = list(all_stocks)
logger.info(f"概念数: {len(all_concepts)}, 股票数: {len(all_stocks)}")
updated = run_once(all_concepts, all_stocks)
logger.info(f"更新完成: {updated} 条记录")
def show_status():
"""显示当前状态"""
print("\n" + "=" * 60)
print("概念涨跌幅实时更新服务 - 状态")
print("=" * 60)
# 当前时间
now = datetime.now()
print(f"\n当前时间: {now.strftime('%Y-%m-%d %H:%M:%S')}")
print(f"是否交易时间: {'' if is_trading_time() else ''}")
# MySQL数据状态
print("\nMySQL数据状态:")
try:
with MYSQL_ENGINE.connect() as conn:
# 今日数据量
result = conn.execute(text("""
SELECT concept_type, COUNT(*) as cnt
FROM concept_daily_stats
WHERE trade_date = CURDATE()
GROUP BY concept_type
"""))
rows = list(result)
if rows:
print(" 今日数据:")
for row in rows:
print(f" {row[0]}: {row[1]}")
else:
print(" 今日暂无数据")
# 最新更新时间
result = conn.execute(text("""
SELECT MAX(updated_at) FROM concept_daily_stats WHERE trade_date = CURDATE()
"""))
row = result.fetchone()
if row and row[0]:
print(f" 最后更新: {row[0]}")
except Exception as e:
print(f" 查询失败: {e}")
# ClickHouse数据状态
print("\nClickHouse数据状态:")
try:
client = get_ch_client()
result = client.execute("""
SELECT COUNT(*), MAX(timestamp)
FROM stock_minute
WHERE toDate(timestamp) = today()
""")
if result:
count, max_ts = result[0]
print(f" 今日分钟数据: {count:,}")
print(f" 最新时间戳: {max_ts}")
except Exception as e:
print(f" 查询失败: {e}")
# 今日涨跌幅TOP10
print("\n今日涨跌幅 TOP10:")
try:
with MYSQL_ENGINE.connect() as conn:
result = conn.execute(text("""
SELECT concept_name, avg_change_pct, stock_count, concept_type
FROM concept_daily_stats
WHERE trade_date = CURDATE() AND concept_type = 'leaf'
ORDER BY avg_change_pct DESC
LIMIT 10
"""))
rows = list(result)
if rows:
print(f" {'概念':<25} | {'涨跌幅':>8} | {'股票数':>6}")
print(" " + "-" * 50)
for row in rows:
name = row[0][:25] if len(row[0]) > 25 else row[0]
print(f" {name:<25} | {row[1]:>7.2f}% | {row[2]:>6}")
else:
print(" 暂无数据")
except Exception as e:
print(f" 查询失败: {e}")
# ==================== 主函数 ====================
def main():
parser = argparse.ArgumentParser(description='概念涨跌幅实时更新服务')
parser.add_argument('command', nargs='?', default='realtime',
choices=['realtime', 'once', 'status'],
help='命令: realtime(实时运行), once(单次运行), status(状态查看)')
args = parser.parse_args()
if args.command == 'realtime':
run_realtime()
elif args.command == 'once':
run_single()
elif args.command == 'status':
show_status()
if __name__ == "__main__":
main()