541 lines
18 KiB
Python
541 lines
18 KiB
Python
"""
|
||
ClickHouse 查询优化方案 - 针对 /api/event/<int:event_id>/related-stocks-detail
|
||
|
||
问题分析:
|
||
1. N+1 查询问题:每只股票执行 3 次独立查询(共 30+ 次)
|
||
2. 重复扫描:first_price 和 last_price 需要扫描表两次
|
||
3. 缺少批量查询优化
|
||
|
||
优化方案对比:
|
||
┌─────────────┬──────────────┬──────────────┬────────────┐
|
||
│ 方案 │ 查询次数 │ 性能提升 │ 实现难度 │
|
||
├─────────────┼──────────────┼──────────────┼────────────┤
|
||
│ 当前代码 │ N * 3 │ 基准 │ - │
|
||
│ 方案1 批量 │ 1 │ 80-90% │ 中等 │
|
||
│ 方案2 并行 │ N * 3 (并行)│ 40-60% │ 简单 │
|
||
│ 方案3 缓存 │ 减少重复 │ 20-40% │ 简单 │
|
||
└─────────────┴──────────────┴──────────────┴────────────┘
|
||
"""
|
||
|
||
# ============================================================================
|
||
# 方案 1: 批量查询(推荐)- 将所有股票的查询合并为一次
|
||
# ============================================================================
|
||
|
||
def get_batch_stock_prices_optimized(client, stock_codes, start_datetime, end_datetime):
|
||
"""
|
||
批量获取多只股票的价格数据(一次查询)
|
||
|
||
性能对比:
|
||
- 旧方案:10 只股票 = 20 次查询(first + last)
|
||
- 新方案:10 只股票 = 1 次查询
|
||
- 性能提升:约 20 倍
|
||
|
||
Args:
|
||
client: ClickHouse 客户端
|
||
stock_codes: 股票代码列表 ['600519.SH', '601088.SH', ...]
|
||
start_datetime: 开始时间
|
||
end_datetime: 结束时间
|
||
|
||
Returns:
|
||
dict: {
|
||
'600519.SH': {
|
||
'first_price': 1850.0,
|
||
'last_price': 1860.0,
|
||
'change_pct': 0.54,
|
||
'open': 1850.0,
|
||
'high': 1865.0,
|
||
'low': 1848.0,
|
||
'volume': 1234567,
|
||
'amount': 2345678900.0
|
||
},
|
||
...
|
||
}
|
||
"""
|
||
if not stock_codes:
|
||
return {}
|
||
|
||
# 构建批量查询 SQL(使用 IN 子句)
|
||
query = """
|
||
SELECT
|
||
code,
|
||
-- 第一个价格(事件发生时)
|
||
anyIf(close, rownum_asc = 1) as first_price,
|
||
-- 最后一个价格(当前时间)
|
||
anyIf(close, rownum_desc = 1) as last_price,
|
||
-- 涨跌幅
|
||
(last_price - first_price) / first_price * 100 as change_pct,
|
||
-- 涨跌额
|
||
last_price - first_price as change_amount,
|
||
-- 其他价格信息(取最后一条记录)
|
||
anyIf(open, rownum_desc = 1) as open_price,
|
||
anyIf(high, rownum_desc = 1) as high_price,
|
||
anyIf(low, rownum_desc = 1) as low_price,
|
||
anyIf(volume, rownum_desc = 1) as volume,
|
||
anyIf(amt, rownum_desc = 1) as amount
|
||
FROM (
|
||
SELECT
|
||
code,
|
||
timestamp,
|
||
close,
|
||
open,
|
||
high,
|
||
low,
|
||
volume,
|
||
amt,
|
||
-- 正序排名(用于获取第一个价格)
|
||
ROW_NUMBER() OVER (PARTITION BY code ORDER BY timestamp ASC) as rownum_asc,
|
||
-- 倒序排名(用于获取最后一个价格)
|
||
ROW_NUMBER() OVER (PARTITION BY code ORDER BY timestamp DESC) as rownum_desc
|
||
FROM stock_minute
|
||
WHERE code IN %(codes)s
|
||
AND timestamp >= %(start)s
|
||
AND timestamp <= %(end)s
|
||
)
|
||
GROUP BY code
|
||
"""
|
||
|
||
try:
|
||
# 执行批量查询
|
||
data = client.execute(query, {
|
||
'codes': tuple(stock_codes), # ClickHouse IN 需要 tuple
|
||
'start': start_datetime,
|
||
'end': end_datetime
|
||
})
|
||
|
||
# 格式化结果为字典
|
||
result = {}
|
||
for row in data:
|
||
code = row[0]
|
||
result[code] = {
|
||
'first_price': float(row[1]) if row[1] else None,
|
||
'last_price': float(row[2]) if row[2] else None,
|
||
'change_pct': float(row[3]) if row[3] else None,
|
||
'change_amount': float(row[4]) if row[4] else None,
|
||
'open_price': float(row[5]) if row[5] else None,
|
||
'high_price': float(row[6]) if row[6] else None,
|
||
'low_price': float(row[7]) if row[7] else None,
|
||
'volume': int(row[8]) if row[8] else None,
|
||
'amount': float(row[9]) if row[9] else None,
|
||
}
|
||
|
||
print(f"批量查询完成,获取了 {len(result)} 只股票的数据")
|
||
return result
|
||
|
||
except Exception as e:
|
||
print(f"批量查询失败: {e}")
|
||
return {}
|
||
|
||
|
||
def get_batch_minute_chart_data(client, stock_codes, start_datetime, end_datetime):
|
||
"""
|
||
批量获取多只股票的分时图数据
|
||
|
||
Args:
|
||
client: ClickHouse 客户端
|
||
stock_codes: 股票代码列表
|
||
start_datetime: 开始时间
|
||
end_datetime: 结束时间
|
||
|
||
Returns:
|
||
dict: {
|
||
'600519.SH': [
|
||
{'time': '09:30', 'close': 1850.0, 'volume': 12345, ...},
|
||
{'time': '09:31', 'close': 1851.0, 'volume': 12346, ...},
|
||
...
|
||
],
|
||
...
|
||
}
|
||
"""
|
||
if not stock_codes:
|
||
return {}
|
||
|
||
query = """
|
||
SELECT
|
||
code,
|
||
timestamp,
|
||
open,
|
||
high,
|
||
low,
|
||
close,
|
||
volume,
|
||
amt
|
||
FROM stock_minute
|
||
WHERE code IN %(codes)s
|
||
AND timestamp >= %(start)s
|
||
AND timestamp <= %(end)s
|
||
ORDER BY code, timestamp
|
||
"""
|
||
|
||
try:
|
||
data = client.execute(query, {
|
||
'codes': tuple(stock_codes),
|
||
'start': start_datetime,
|
||
'end': end_datetime
|
||
})
|
||
|
||
# 按股票代码分组
|
||
result = {}
|
||
for row in data:
|
||
code = row[0]
|
||
if code not in result:
|
||
result[code] = []
|
||
|
||
result[code].append({
|
||
'time': row[1].strftime('%H:%M'),
|
||
'open': float(row[2]) if row[2] else None,
|
||
'high': float(row[3]) if row[3] else None,
|
||
'low': float(row[4]) if row[4] else None,
|
||
'close': float(row[5]) if row[5] else None,
|
||
'volume': float(row[6]) if row[6] else None,
|
||
'amount': float(row[7]) if row[7] else None
|
||
})
|
||
|
||
print(f"批量获取分时数据完成,获取了 {len(result)} 只股票的数据")
|
||
return result
|
||
|
||
except Exception as e:
|
||
print(f"批量获取分时数据失败: {e}")
|
||
return {}
|
||
|
||
|
||
# ============================================================================
|
||
# 使用示例:替换原来的 for 循环
|
||
# ============================================================================
|
||
|
||
def api_event_related_stocks_optimized(event_id):
|
||
"""优化后的端点实现"""
|
||
try:
|
||
from datetime import datetime
|
||
|
||
event = Event.query.get_or_404(event_id)
|
||
related_stocks = event.related_stocks.order_by(RelatedStock.correlation.desc()).all()
|
||
|
||
if not related_stocks:
|
||
return jsonify({'code': 200, 'data': {'related_stocks': []}})
|
||
|
||
# 获取 ClickHouse 客户端
|
||
client = get_clickhouse_client()
|
||
|
||
# 计算时间范围(省略交易日计算逻辑,与原代码相同)
|
||
event_time = event.start_time if event.start_time else event.created_at
|
||
trading_day, start_time, end_time = get_trading_day_and_times(event_time)
|
||
start_datetime = datetime.combine(trading_day, start_time)
|
||
end_datetime = datetime.combine(trading_day, end_time)
|
||
|
||
# ✅ 批量查询所有股票的价格数据(只查询 1 次)
|
||
stock_codes = [stock.stock_code for stock in related_stocks]
|
||
prices_data = get_batch_stock_prices_optimized(
|
||
client, stock_codes, start_datetime, end_datetime
|
||
)
|
||
|
||
# ✅ 批量查询所有股票的分时图数据(只查询 1 次)
|
||
minute_data = get_batch_minute_chart_data(
|
||
client, stock_codes, start_datetime, end_datetime
|
||
)
|
||
|
||
# 组装返回数据
|
||
stocks_data = []
|
||
for stock in related_stocks:
|
||
# 从批量查询结果中获取数据(无需再次查询)
|
||
price_info = prices_data.get(stock.stock_code, {})
|
||
chart_data = minute_data.get(stock.stock_code, [])
|
||
|
||
# 获取股票基本信息(这里可以考虑也批量查询)
|
||
stock_info = StockBasicInfo.query.filter_by(SECCODE=stock.stock_code).first()
|
||
if not stock_info:
|
||
base_code = stock.stock_code.split('.')[0]
|
||
stock_info = StockBasicInfo.query.filter_by(SECCODE=base_code).first()
|
||
|
||
stock_data = {
|
||
'id': stock.id,
|
||
'stock_code': stock.stock_code,
|
||
'stock_name': stock.stock_name,
|
||
'sector': stock.sector,
|
||
'relation_desc': stock.relation_desc,
|
||
'correlation': stock.correlation,
|
||
'momentum': stock.momentum,
|
||
'listing_date': stock_info.F006D.isoformat() if stock_info and stock_info.F006D else None,
|
||
'market': stock_info.F005V if stock_info else None,
|
||
|
||
# 交易数据(从批量查询结果获取)
|
||
'trade_data': {
|
||
'latest_price': price_info.get('last_price'),
|
||
'first_price': price_info.get('first_price'),
|
||
'open_price': price_info.get('open_price'),
|
||
'high_price': price_info.get('high_price'),
|
||
'low_price': price_info.get('low_price'),
|
||
'change_amount': round(price_info['change_amount'], 2) if price_info.get('change_amount') else None,
|
||
'change_pct': round(price_info['change_pct'], 2) if price_info.get('change_pct') else None,
|
||
'volume': price_info.get('volume'),
|
||
'amount': price_info.get('amount'),
|
||
'trade_date': trading_day.isoformat(),
|
||
},
|
||
|
||
# 分时图数据
|
||
'minute_chart': chart_data
|
||
}
|
||
|
||
stocks_data.append(stock_data)
|
||
|
||
return jsonify({
|
||
'code': 200,
|
||
'message': 'success',
|
||
'data': {
|
||
'event_id': event_id,
|
||
'event_title': event.title,
|
||
'related_stocks': stocks_data,
|
||
'total_count': len(stocks_data)
|
||
}
|
||
})
|
||
|
||
except Exception as e:
|
||
print(f"Error in api_event_related_stocks_optimized: {e}")
|
||
return jsonify({'code': 500, 'message': str(e)}), 500
|
||
|
||
|
||
# ============================================================================
|
||
# 方案 2: 异步并行查询(适用于无法批量查询的场景)
|
||
# ============================================================================
|
||
|
||
import asyncio
|
||
from concurrent.futures import ThreadPoolExecutor
|
||
|
||
def get_stock_price_async(client, stock_code, start_datetime, end_datetime):
|
||
"""单个股票的查询函数(线程安全)"""
|
||
# 与原代码相同的查询逻辑
|
||
try:
|
||
data = client.execute("""
|
||
WITH first_price AS (
|
||
SELECT close FROM stock_minute WHERE code = %(code)s ...
|
||
)
|
||
...
|
||
""", {'code': stock_code, 'start': start_datetime, 'end': end_datetime})
|
||
return stock_code, data
|
||
except Exception as e:
|
||
return stock_code, None
|
||
|
||
|
||
def get_all_stocks_parallel(client, stock_codes, start_datetime, end_datetime):
|
||
"""
|
||
并行查询多只股票(使用线程池)
|
||
|
||
性能对比:
|
||
- 串行:10 只股票 * 0.1 秒 = 1 秒
|
||
- 并行:max(0.1 秒) = 0.1 秒(10 倍提速)
|
||
"""
|
||
with ThreadPoolExecutor(max_workers=10) as executor:
|
||
# 提交所有查询任务
|
||
futures = [
|
||
executor.submit(get_stock_price_async, client, code, start_datetime, end_datetime)
|
||
for code in stock_codes
|
||
]
|
||
|
||
# 等待所有任务完成
|
||
results = {}
|
||
for future in futures:
|
||
stock_code, data = future.result()
|
||
results[stock_code] = data
|
||
|
||
return results
|
||
|
||
|
||
# ============================================================================
|
||
# 方案 3: 添加缓存层(Redis)
|
||
# ============================================================================
|
||
|
||
import redis
|
||
import json
|
||
|
||
redis_client = redis.Redis(host='localhost', port=6379, db=0)
|
||
|
||
def get_stock_price_with_cache(client, stock_code, start_datetime, end_datetime):
|
||
"""
|
||
带缓存的查询(适用于历史数据)
|
||
|
||
缓存策略:
|
||
- 历史数据(非当日):缓存 24 小时
|
||
- 当日数据:缓存 1 分钟
|
||
"""
|
||
from datetime import datetime
|
||
|
||
# 生成缓存键
|
||
cache_key = f"stock_price:{stock_code}:{start_datetime.date()}:{end_datetime.date()}"
|
||
|
||
# 尝试从缓存获取
|
||
cached_data = redis_client.get(cache_key)
|
||
if cached_data:
|
||
print(f"从缓存获取 {stock_code} 数据")
|
||
return json.loads(cached_data)
|
||
|
||
# 缓存未命中,查询数据库
|
||
print(f"从 ClickHouse 查询 {stock_code} 数据")
|
||
data = client.execute("""...""", {
|
||
'code': stock_code,
|
||
'start': start_datetime,
|
||
'end': end_datetime
|
||
})
|
||
|
||
# 格式化数据
|
||
result = {
|
||
'first_price': float(data[0][2]) if data else None,
|
||
'last_price': float(data[0][0]) if data else None,
|
||
# ...
|
||
}
|
||
|
||
# 写入缓存
|
||
is_today = start_datetime.date() == datetime.now().date()
|
||
ttl = 60 if is_today else 86400 # 当日数据缓存 1 分钟,历史数据缓存 24 小时
|
||
redis_client.setex(cache_key, ttl, json.dumps(result))
|
||
|
||
return result
|
||
|
||
|
||
# ============================================================================
|
||
# 方案 4: ClickHouse 查询优化(索引提示)
|
||
# ============================================================================
|
||
|
||
def get_stock_price_with_hints(client, stock_code, start_datetime, end_datetime):
|
||
"""
|
||
使用 ClickHouse 特性优化查询
|
||
|
||
优化点:
|
||
1. PREWHERE 子句(提前过滤,减少数据扫描)
|
||
2. FINAL 修饰符(如果使用了 ReplacingMergeTree)
|
||
3. 分区裁剪(如果表按日期分区)
|
||
"""
|
||
query = """
|
||
SELECT
|
||
code,
|
||
anyLast(close) as last_price,
|
||
any(close) as first_price,
|
||
(last_price - first_price) / first_price * 100 as change_pct
|
||
FROM stock_minute
|
||
PREWHERE code = %(code)s -- 使用 PREWHERE 提前过滤(比 WHERE 快)
|
||
WHERE timestamp >= %(start)s
|
||
AND timestamp <= %(end)s
|
||
GROUP BY code
|
||
SETTINGS max_threads = 2 -- 限制线程数(避免资源竞争)
|
||
"""
|
||
|
||
data = client.execute(query, {
|
||
'code': stock_code,
|
||
'start': start_datetime,
|
||
'end': end_datetime
|
||
})
|
||
|
||
return data
|
||
|
||
|
||
# ============================================================================
|
||
# 数据库层面优化建议
|
||
# ============================================================================
|
||
|
||
"""
|
||
1. 确保 stock_minute 表有以下索引:
|
||
- PRIMARY KEY (code, timestamp) -- 主键索引
|
||
- INDEX idx_timestamp timestamp TYPE minmax GRANULARITY 3 -- 时间索引
|
||
|
||
2. 表分区策略(如果数据量大):
|
||
CREATE TABLE stock_minute (
|
||
code String,
|
||
timestamp DateTime,
|
||
...
|
||
) ENGINE = MergeTree()
|
||
PARTITION BY toYYYYMM(timestamp) -- 按月分区
|
||
ORDER BY (code, timestamp)
|
||
SETTINGS index_granularity = 8192;
|
||
|
||
3. 使用物化视图预计算(适用于固定查询模式):
|
||
CREATE MATERIALIZED VIEW stock_minute_summary
|
||
ENGINE = AggregatingMergeTree()
|
||
PARTITION BY toYYYYMMDD(timestamp)
|
||
ORDER BY (code, timestamp)
|
||
AS SELECT
|
||
code,
|
||
toStartOfMinute(timestamp) as minute,
|
||
anyLast(close) as last_close,
|
||
any(close) as first_close,
|
||
...
|
||
FROM stock_minute
|
||
GROUP BY code, minute;
|
||
|
||
4. 检查表统计信息:
|
||
SELECT
|
||
table,
|
||
partition,
|
||
rows,
|
||
bytes_on_disk
|
||
FROM system.parts
|
||
WHERE table = 'stock_minute';
|
||
"""
|
||
|
||
|
||
# ============================================================================
|
||
# 性能对比测试
|
||
# ============================================================================
|
||
|
||
def benchmark_query_methods():
|
||
"""
|
||
性能对比测试
|
||
|
||
测试场景:查询 10 只股票的价格数据
|
||
|
||
预期结果:
|
||
- 原方案(串行 N+1):~1000ms
|
||
- 方案 1(批量查询):~50ms(20 倍提速)
|
||
- 方案 2(并行查询):~200ms(5 倍提速)
|
||
- 方案 3(带缓存):~10ms(100 倍提速,第二次请求)
|
||
"""
|
||
import time
|
||
|
||
stock_codes = ['600519.SH', '601088.SH', '600276.SH', '000001.SZ', ...]
|
||
|
||
# 测试方案 1:批量查询
|
||
start = time.time()
|
||
result1 = get_batch_stock_prices_optimized(client, stock_codes, start_dt, end_dt)
|
||
print(f"批量查询耗时: {(time.time() - start) * 1000:.2f}ms")
|
||
|
||
# 测试方案 2:并行查询
|
||
start = time.time()
|
||
result2 = get_all_stocks_parallel(client, stock_codes, start_dt, end_dt)
|
||
print(f"并行查询耗时: {(time.time() - start) * 1000:.2f}ms")
|
||
|
||
# 测试原方案(串行)
|
||
start = time.time()
|
||
result3 = {}
|
||
for code in stock_codes:
|
||
result3[code] = get_stock_price_original(client, code, start_dt, end_dt)
|
||
print(f"串行查询耗时: {(time.time() - start) * 1000:.2f}ms")
|
||
|
||
|
||
# ============================================================================
|
||
# 总结与建议
|
||
# ============================================================================
|
||
|
||
"""
|
||
推荐实施顺序:
|
||
|
||
第一步(立即实施):方案 1 - 批量查询
|
||
- 实现难度:中等
|
||
- 性能提升:80-90%
|
||
- 风险:低
|
||
- 时间:1-2 小时
|
||
|
||
第二步(可选):方案 3 - 添加缓存
|
||
- 实现难度:简单
|
||
- 性能提升:额外 20-40%
|
||
- 风险:低
|
||
- 时间:30 分钟
|
||
|
||
第三步(长期):方案 4 - 数据库优化
|
||
- 实现难度:中等
|
||
- 性能提升:20-30%
|
||
- 风险:中(需要测试)
|
||
- 时间:2-4 小时
|
||
|
||
监控指标:
|
||
- 查询时间:目标 < 200ms(当前 > 1000ms)
|
||
- ClickHouse 查询次数:目标 1-2 次(当前 30+ 次)
|
||
- 缓存命中率:目标 > 80%(如果使用缓存)
|
||
"""
|