Files
InsightRadar/backend/app/services/summary_service.py
T
2026-03-11 01:33:21 +08:00

105 lines
3.8 KiB
Python

# app/services/summary_service.py
import os
import json
from datetime import timedelta
from openai import AsyncOpenAI
from app.database import SessionLocal
from app.models.models import UnifiedEvent, TrendingEvent, InfoSource, utcnow
from app.prompts.summary_prompts import (
SUMMARY_SYSTEM_PROMPT,
SUMMARY_USER_PROMPT_TEMPLATE,
)
HOT_SCORE_THRESHOLD = int(os.getenv("HOT_SCORE_THRESHOLD", 3))
AI_API_KEY = os.getenv("AI_API_KEY", '')
# 1. 初始化异步客户端 (全局复用)
deepseek_client = AsyncOpenAI(
api_key=AI_API_KEY,
base_url="https://api.deepseek.com"
)
async def call_llm_for_summary(platform_data_text: str) -> dict:
"""调用 DeepSeek 生成统一标题和多平台视角摘要"""
prompt = SUMMARY_USER_PROMPT_TEMPLATE.format(
platform_data_text=platform_data_text
)
# await
response = await deepseek_client.chat.completions.create(
model="deepseek-chat",
messages=[
{"role": "system", "content": SUMMARY_SYSTEM_PROMPT},
{"role": "user", "content": prompt}
],
response_format={"type": "json_object"},
temperature=1
)
result_text = response.choices[0].message.content
return json.loads(result_text)
async def generate_unified_summaries():
"""定时任务:扫描高热度事件并生成/更新摘要"""
print(f"[{utcnow()}] 开始执行 DeepSeek 摘要生成任务...")
with SessionLocal() as db:
recent_threshold = utcnow() - timedelta(days=3)
# 必须满足:热度达标 AND (当前热度 > 上次生成摘要时的热度) AND 近期活跃
events = db.query(UnifiedEvent).filter(
UnifiedEvent.hot_score >= HOT_SCORE_THRESHOLD,
UnifiedEvent.hot_score > UnifiedEvent.last_summarized_trends_count,
UnifiedEvent.created_at >= recent_threshold
).all()
if not events:
print("当前没有需要更新摘要的大事件,任务结束。")
return
for event in events:
# 联合查询获取该事件在各平台的子新闻
trends = db.query(TrendingEvent, InfoSource.source_name) \
.join(InfoSource, TrendingEvent.source_id == InfoSource.id) \
.filter(TrendingEvent.unified_event_id == event.id) \
.all()
if not trends:
continue
# 按平台归类标题并去重
platform_dict = {}
for trend_record, source_name in trends:
if source_name not in platform_dict:
platform_dict[source_name] = set()
platform_dict[source_name].add(trend_record.current_headline)
# 组装给大模型的 Prompt 数据
prompt_lines = [f"{platform}】: {', '.join(headlines)}" for platform, headlines in platform_dict.items()]
platform_data_text = "\n".join(prompt_lines)
try:
# 调用封装好的异步函数
llm_result = await call_llm_for_summary(platform_data_text)
if "unified_title" in llm_result:
event.unified_title = llm_result["unified_title"]
if "ai_comprehensive_summary" in llm_result:
event.ai_comprehensive_summary = llm_result["ai_comprehensive_summary"]
# 成功后更新水位线
# 将最后一次总结时的热搜数量,更新为当前最新的 hot_score
event.last_summarized_trends_count = event.hot_score
print(f"成功更新大事件 ID {event.id} 的深度摘要 (当前热度: {event.hot_score})。")
except Exception as e:
print(f"大事件 ID {event.id} 摘要生成失败: {e}")
continue
# 提交事务
db.commit()