mirror of
https://github.com/stardrophere/InsightRadar.git
synced 2026-06-06 00:00:05 +08:00
并发优化
This commit is contained in:
@@ -180,62 +180,76 @@ async def generate_unified_summaries():
|
||||
"""Scheduled task: refresh summaries and topic tags for hot unified events."""
|
||||
print(f"[{utcnow()}] Start unified summary generation task...")
|
||||
|
||||
# 先提取需要处理的事件 ID,尽早释放 session,不长期占用 db session
|
||||
with SessionLocal() as db:
|
||||
recent_threshold = utcnow() - timedelta(days=3)
|
||||
|
||||
events = db.query(UnifiedEvent).filter(
|
||||
UnifiedEvent.hot_score >= HOT_SCORE_THRESHOLD,
|
||||
UnifiedEvent.hot_score > UnifiedEvent.last_summarized_trends_count,
|
||||
UnifiedEvent.created_at >= recent_threshold,
|
||||
).all()
|
||||
|
||||
|
||||
if not events:
|
||||
print("No events require summary update in this round.")
|
||||
return
|
||||
|
||||
# 复制出需要的信息,脱离 session
|
||||
event_ids = [e.id for e in events]
|
||||
event_hot_scores = {e.id: e.hot_score for e in events}
|
||||
|
||||
for event in events:
|
||||
# 外层循环:针对每个 event_id 开启一个极短生命周期的 session 获取依赖数据
|
||||
for event_id in event_ids:
|
||||
platform_dict: dict[str, set[str]] = {}
|
||||
with SessionLocal() as db:
|
||||
trends = (
|
||||
db.query(TrendingEvent, InfoSource.source_name)
|
||||
.join(InfoSource, TrendingEvent.source_id == InfoSource.id)
|
||||
.filter(TrendingEvent.unified_event_id == event.id)
|
||||
.filter(TrendingEvent.unified_event_id == event_id)
|
||||
.all()
|
||||
)
|
||||
|
||||
if not trends:
|
||||
continue
|
||||
|
||||
platform_dict: dict[str, set[str]] = {}
|
||||
for trend_record, source_name in trends:
|
||||
platform_dict.setdefault(source_name, set()).add(trend_record.current_headline)
|
||||
|
||||
prompt_lines = [
|
||||
f"[{platform}] {', '.join(sorted(headlines))}"
|
||||
for platform, headlines in platform_dict.items()
|
||||
]
|
||||
platform_data_text = "\n".join(prompt_lines)
|
||||
prompt_lines = [
|
||||
f"[{platform}] {', '.join(sorted(headlines))}"
|
||||
for platform, headlines in platform_dict.items()
|
||||
]
|
||||
platform_data_text = "\n".join(prompt_lines)
|
||||
|
||||
try:
|
||||
llm_result = await call_llm_for_summary(platform_data_text)
|
||||
try:
|
||||
# 大模型调用可能耗时几十秒,绝对不能把它包裹在数据库事务里
|
||||
llm_result = await call_llm_for_summary(platform_data_text)
|
||||
|
||||
# 调用完成后,再开启一个新的极短事务,进行数据回写
|
||||
with SessionLocal() as write_db:
|
||||
event = write_db.query(UnifiedEvent).get(event_id)
|
||||
if not event:
|
||||
continue
|
||||
|
||||
if "unified_title" in llm_result and llm_result["unified_title"]:
|
||||
event.unified_title = llm_result["unified_title"]
|
||||
if "ai_comprehensive_summary" in llm_result and llm_result["ai_comprehensive_summary"]:
|
||||
event.ai_comprehensive_summary = llm_result["ai_comprehensive_summary"]
|
||||
|
||||
if event.hot_score >= TOPIC_TAG_MIN_HOT_SCORE:
|
||||
hot_score = event_hot_scores.get(event_id, event.hot_score)
|
||||
if hot_score >= TOPIC_TAG_MIN_HOT_SCORE:
|
||||
topic_candidates = parse_topic_keywords(llm_result)
|
||||
normalized_topics = normalize_topic_keywords(topic_candidates)
|
||||
if normalized_topics:
|
||||
replace_event_topics(db, event.id, normalized_topics)
|
||||
replace_event_topics(write_db, event.id, normalized_topics)
|
||||
|
||||
event.last_summarized_trends_count = event.hot_score
|
||||
write_db.commit()
|
||||
|
||||
print(
|
||||
f"Updated event {event.id} summary"
|
||||
f" (hot_score={event.hot_score})."
|
||||
)
|
||||
|
||||
except Exception as exc:
|
||||
print(f"Event {event.id} summary generation failed: {exc}")
|
||||
continue
|
||||
|
||||
db.commit()
|
||||
except Exception as exc:
|
||||
print(f"Event {event_id} summary generation failed: {exc}")
|
||||
continue
|
||||
|
||||
Reference in New Issue
Block a user