InsightRadar/backend/app/services/summary_service.py

# app/services/summary_service.py
import os
import json
from datetime import timedelta
from openai import AsyncOpenAI

from app.database import SessionLocal
from app.models.models import UnifiedEvent, TrendingEvent, InfoSource, utcnow
from app.prompts.summary_prompts import (
    SUMMARY_SYSTEM_PROMPT,
    SUMMARY_USER_PROMPT_TEMPLATE,
)

HOT_SCORE_THRESHOLD = int(os.getenv("HOT_SCORE_THRESHOLD", 3))
AI_API_KEY = os.getenv("AI_API_KEY", '')

# 1. 初始化异步客户端 (全局复用)
deepseek_client = AsyncOpenAI(
    api_key=AI_API_KEY,
    base_url="https://api.deepseek.com"
)


async def call_llm_for_summary(platform_data_text: str) -> dict:
    """调用 DeepSeek 生成统一标题和多平台视角摘要"""
    prompt = SUMMARY_USER_PROMPT_TEMPLATE.format(
        platform_data_text=platform_data_text
    )

    # await
    response = await deepseek_client.chat.completions.create(
        model="deepseek-chat",
        messages=[
            {"role": "system", "content": SUMMARY_SYSTEM_PROMPT},
            {"role": "user", "content": prompt}
        ],
        response_format={"type": "json_object"},
        temperature=1
    )

    result_text = response.choices[0].message.content
    return json.loads(result_text)


async def generate_unified_summaries():
    """定时任务：扫描高热度事件并生成/更新摘要"""
    print(f"[{utcnow()}] 开始执行 DeepSeek 摘要生成任务...")

    with SessionLocal() as db:
        recent_threshold = utcnow() - timedelta(days=3)

        # 必须满足：热度达标 AND (当前热度 > 上次生成摘要时的热度) AND 近期活跃
        events = db.query(UnifiedEvent).filter(
            UnifiedEvent.hot_score >= HOT_SCORE_THRESHOLD,
            UnifiedEvent.hot_score > UnifiedEvent.last_summarized_trends_count,
            UnifiedEvent.created_at >= recent_threshold
        ).all()

        if not events:
            print("当前没有需要更新摘要的大事件，任务结束。")
            return

        for event in events:
            # 联合查询获取该事件在各平台的子新闻
            trends = db.query(TrendingEvent, InfoSource.source_name) \
                .join(InfoSource, TrendingEvent.source_id == InfoSource.id) \
                .filter(TrendingEvent.unified_event_id == event.id) \
                .all()

            if not trends:
                continue

            # 按平台归类标题并去重
            platform_dict = {}
            for trend_record, source_name in trends:
                if source_name not in platform_dict:
                    platform_dict[source_name] = set()
                platform_dict[source_name].add(trend_record.current_headline)

            # 组装给大模型的 Prompt 数据
            prompt_lines = [f"【{platform}】: {', '.join(headlines)}" for platform, headlines in platform_dict.items()]
            platform_data_text = "\n".join(prompt_lines)

            try:
                # 调用封装好的异步函数
                llm_result = await call_llm_for_summary(platform_data_text)

                if "unified_title" in llm_result:
                    event.unified_title = llm_result["unified_title"]
                if "ai_comprehensive_summary" in llm_result:
                    event.ai_comprehensive_summary = llm_result["ai_comprehensive_summary"]

                # 成功后更新水位线
                # 将最后一次总结时的热搜数量，更新为当前最新的 hot_score
                event.last_summarized_trends_count = event.hot_score

                print(f"成功更新大事件 ID {event.id} 的深度摘要 (当前热度: {event.hot_score})。")

            except Exception as e:
                print(f"大事件 ID {event.id} 摘要生成失败: {e}")
                continue

        # 提交事务
        db.commit()