optimize+注释

This commit is contained in:
stardrophere
2026-03-13 23:48:49 +08:00
parent 6aee65af6c
commit da00ebb8f2
41 changed files with 874 additions and 174 deletions
+9 -5
View File
@@ -1,4 +1,8 @@
# app/services/summary_service.py
"""
摘要服务:调用 LLM 生成统一标题、综合摘要、话题标签
定时任务:对热度达标且未摘要的事件批量处理
"""
import json
import os
from datetime import timedelta
@@ -36,7 +40,7 @@ deepseek_client = AsyncOpenAI(
async def call_llm_for_summary(platform_data_text: str) -> dict:
"""Call LLM for unified title, summary and topic candidates."""
"""调用 LLM 生成统一标题、综合摘要、话题候选词"""
prompt = SUMMARY_USER_PROMPT_TEMPLATE.format(platform_data_text=platform_data_text)
response = await deepseek_client.chat.completions.create(
@@ -66,7 +70,7 @@ def _normalize_score(raw_score: Any) -> float | None:
def parse_topic_keywords(llm_result: dict) -> list[dict[str, Any]]:
"""Parse topic keywords from LLM response; support list[str] and list[object]."""
"""解析 LLM 返回的话题关键词,支持字符串或对象格式"""
raw_topics = llm_result.get("topic_keywords") or []
parsed: list[dict[str, Any]] = []
seen: set[str] = set()
@@ -103,7 +107,7 @@ def parse_topic_keywords(llm_result: dict) -> list[dict[str, Any]]:
def normalize_topic_keywords(topic_candidates: list[dict[str, Any]]) -> list[dict[str, Any]]:
"""Deduplicate semantically similar tags using embedding similarity."""
"""用向量相似度去重同义标签,保留最具代表性的关键词"""
if not topic_candidates:
return []
@@ -159,7 +163,7 @@ def normalize_topic_keywords(topic_candidates: list[dict[str, Any]]) -> list[dic
def replace_event_topics(db, event_id: int, normalized_topics: list[dict[str, Any]]) -> None:
"""Replace EVENT tags for one unified event atomically within current transaction."""
"""原子替换某事件的标签:先删旧再插新"""
db.query(ExtractedTopic).filter(
ExtractedTopic.target_type == TargetType.EVENT,
ExtractedTopic.target_id == event_id,
@@ -177,7 +181,7 @@ def replace_event_topics(db, event_id: int, normalized_topics: list[dict[str, An
async def generate_unified_summaries():
"""Scheduled task: refresh summaries and topic tags for hot unified events."""
"""定时任务:对热度达标且未摘要的事件刷新标题、摘要、标签"""
print(f"[{utcnow()}] Start unified summary generation task...")
# 先提取需要处理的事件 ID,尽早释放 session,不长期占用 db session