并发优化

This commit is contained in:
stardrophere
2026-03-12 15:05:37 +08:00
parent 37791c7976
commit 19a61e6567
3 changed files with 216 additions and 121 deletions
+33 -19
View File
@@ -180,62 +180,76 @@ async def generate_unified_summaries():
"""Scheduled task: refresh summaries and topic tags for hot unified events."""
print(f"[{utcnow()}] Start unified summary generation task...")
# 先提取需要处理的事件 ID,尽早释放 session,不长期占用 db session
with SessionLocal() as db:
recent_threshold = utcnow() - timedelta(days=3)
events = db.query(UnifiedEvent).filter(
UnifiedEvent.hot_score >= HOT_SCORE_THRESHOLD,
UnifiedEvent.hot_score > UnifiedEvent.last_summarized_trends_count,
UnifiedEvent.created_at >= recent_threshold,
).all()
if not events:
print("No events require summary update in this round.")
return
# 复制出需要的信息,脱离 session
event_ids = [e.id for e in events]
event_hot_scores = {e.id: e.hot_score for e in events}
for event in events:
# 外层循环:针对每个 event_id 开启一个极短生命周期的 session 获取依赖数据
for event_id in event_ids:
platform_dict: dict[str, set[str]] = {}
with SessionLocal() as db:
trends = (
db.query(TrendingEvent, InfoSource.source_name)
.join(InfoSource, TrendingEvent.source_id == InfoSource.id)
.filter(TrendingEvent.unified_event_id == event.id)
.filter(TrendingEvent.unified_event_id == event_id)
.all()
)
if not trends:
continue
platform_dict: dict[str, set[str]] = {}
for trend_record, source_name in trends:
platform_dict.setdefault(source_name, set()).add(trend_record.current_headline)
prompt_lines = [
f"[{platform}] {', '.join(sorted(headlines))}"
for platform, headlines in platform_dict.items()
]
platform_data_text = "\n".join(prompt_lines)
prompt_lines = [
f"[{platform}] {', '.join(sorted(headlines))}"
for platform, headlines in platform_dict.items()
]
platform_data_text = "\n".join(prompt_lines)
try:
llm_result = await call_llm_for_summary(platform_data_text)
try:
# 大模型调用可能耗时几十秒,绝对不能把它包裹在数据库事务里
llm_result = await call_llm_for_summary(platform_data_text)
# 调用完成后,再开启一个新的极短事务,进行数据回写
with SessionLocal() as write_db:
event = write_db.query(UnifiedEvent).get(event_id)
if not event:
continue
if "unified_title" in llm_result and llm_result["unified_title"]:
event.unified_title = llm_result["unified_title"]
if "ai_comprehensive_summary" in llm_result and llm_result["ai_comprehensive_summary"]:
event.ai_comprehensive_summary = llm_result["ai_comprehensive_summary"]
if event.hot_score >= TOPIC_TAG_MIN_HOT_SCORE:
hot_score = event_hot_scores.get(event_id, event.hot_score)
if hot_score >= TOPIC_TAG_MIN_HOT_SCORE:
topic_candidates = parse_topic_keywords(llm_result)
normalized_topics = normalize_topic_keywords(topic_candidates)
if normalized_topics:
replace_event_topics(db, event.id, normalized_topics)
replace_event_topics(write_db, event.id, normalized_topics)
event.last_summarized_trends_count = event.hot_score
write_db.commit()
print(
f"Updated event {event.id} summary"
f" (hot_score={event.hot_score})."
)
except Exception as exc:
print(f"Event {event.id} summary generation failed: {exc}")
continue
db.commit()
except Exception as exc:
print(f"Event {event_id} summary generation failed: {exc}")
continue