diff --git a/backend/app/services/fetcher_service.py b/backend/app/services/fetcher_service.py index 93e0305..4cc71cc 100644 --- a/backend/app/services/fetcher_service.py +++ b/backend/app/services/fetcher_service.py @@ -26,9 +26,9 @@ SIMILARITY_THRESHOLD = float(os.getenv("SIMILARITY_THRESHOLD", 0.72)) API_BASE_URL = os.getenv("API_BASE_URL", "https://newsnow.busiyi.world/api/s") EMBEDDING_MODEL_PATH = os.getenv("EMBEDDING_MODEL_PATH", "") -print("正在加载向量模型...") +print("正在加载 BAAI/bge-m3 向量模型...") # 全局单例 -embedder_model = SentenceTransformer(EMBEDDING_MODEL_PATH, local_files_only=True) +embedder_model = SentenceTransformer(EMBEDDING_MODEL_PATH, local_files_only=True, device="cuda") print("模型加载完成。") diff --git a/backend/app/services/matching_service.py b/backend/app/services/matching_service.py index 0c48de5..09a814a 100644 --- a/backend/app/services/matching_service.py +++ b/backend/app/services/matching_service.py @@ -1,6 +1,6 @@ """ 匹配服务:根据用户兴趣关键词(精确 + 语义)推荐事件 -打分融合:匹配分 + 标签相关度 + 热度 + 新鲜度加成 +打分融合:标签/标题匹配分 + 标签相关度 + 热度 + 新鲜度加成 """ import os from dataclasses import dataclass @@ -14,7 +14,7 @@ from app.models.models import ExtractedTopic, TargetType, UnifiedEvent, UserTopi from app.services.fetcher_service import embedder_model -# 语义匹配阈值:用户关键词和事件标签向量相似度达到该值才计入语义命中 +# 语义匹配阈值:用户关键词和事件标签/标题向量相似度达到该值才计入语义命中 DEFAULT_PREFERENCE_SEMANTIC_THRESHOLD = 0.78 PREFERENCE_SEMANTIC_THRESHOLD = float( os.getenv("PREFERENCE_SEMANTIC_THRESHOLD", str(DEFAULT_PREFERENCE_SEMANTIC_THRESHOLD)) @@ -41,6 +41,31 @@ def _normalize_text(text: str) -> str: return text.strip().casefold() +def _find_exact_preference_match( + target_text: str, + normalized_preferences: list[tuple[str, str]], +) -> str | None: + """ + 判断目标文本是否与某个用户兴趣词形成“精确命中”。 + 命中条件: + 1. 标准化后完全相等 + 2. 二者互为包含关系 + 返回命中的原始兴趣词,未命中则返回 None。 + """ + normalized_target = _normalize_text(target_text) + if not normalized_target: + return None + + for raw_pref, normalized_pref in normalized_preferences: + if not normalized_pref: + continue + if normalized_target == normalized_pref: + return raw_pref + if normalized_pref in normalized_target or normalized_target in normalized_pref: + return raw_pref + return None + + _EMBEDDING_CACHE: dict[str, np.ndarray] = {} MAX_CACHE_SIZE = 10000 @@ -86,6 +111,26 @@ def _build_keyword_embedding_map(keywords: list[str]) -> dict[str, np.ndarray]: return result +def _find_best_semantic_match( + target_text: str, + target_vec_map: dict[str, np.ndarray], + pref_vec_map: dict[str, np.ndarray], +) -> tuple[str | None, float]: + """返回与目标文本最接近的兴趣词及其余弦相似度。""" + target_vec = target_vec_map.get(target_text) + if target_vec is None: + return None, -1.0 + + best_pref = None + best_sim = -1.0 + for pref_keyword, pref_vec in pref_vec_map.items(): + sim = float(np.dot(target_vec, pref_vec)) + if sim > best_sim: + best_sim = sim + best_pref = pref_keyword + return best_pref, best_sim + + def _ensure_aware(dt: datetime) -> datetime: """SQLite 读出的 datetime 不带时区信息,统一补上 UTC 后才能和 utcnow() 做减法。""" if dt.tzinfo is None: @@ -116,8 +161,8 @@ def recommend_events_for_user( ) -> list[MatchedEventResult]: """ 用户兴趣推荐主流程: - 1) 精确匹配:用户词 == EVENT 标签 - 2) 语义匹配:用户词向量 vs EVENT 标签向量(超过阈值) + 1) 精确匹配:用户词 vs EVENT 标签/标题 + 2) 语义匹配:用户词向量 vs EVENT 标签/标题向量(超过阈值) 3) 打分融合:匹配分 + 标签相关度 + 热度 + 新鲜度 """ final_limit = max(1, min(limit, PREFERENCE_RECOMMEND_MAX_LIMIT)) @@ -167,8 +212,6 @@ def recommend_events_for_user( ) .all() ) - if not topic_rows: - return [] # 组织事件标签映射:event_id -> [(tag, relevance_score), ...] event_topics: dict[int, list[tuple[str, float | None]]] = {} @@ -177,10 +220,6 @@ def recommend_events_for_user( continue event_topics.setdefault(event_id, []).append((topic_keyword, relevance_score)) - # 如果某事件没有标签,就不参与推荐 - if not event_topics: - return [] - # 3. 批量编码用户词与标签词,减少模型调用次数 unique_preference_keywords = list(dict.fromkeys(preference_keywords)) unique_topic_keywords = list(dict.fromkeys([row[1] for row in topic_rows if row[1]])) @@ -188,13 +227,21 @@ def recommend_events_for_user( topic_vec_map = _build_keyword_embedding_map(unique_topic_keywords) # 预先建立“标准化后用户词集合”,用于精确匹配 - normalized_pref_set = {_normalize_text(word) for word in unique_preference_keywords} + normalized_preference_pairs = [ + (word, _normalize_text(word)) + for word in unique_preference_keywords + if _normalize_text(word) + ] + unique_event_titles = list( + dict.fromkeys( + [event.unified_title.strip() for event in events if event.unified_title and event.unified_title.strip()] + ) + ) + title_vec_map = _build_keyword_embedding_map(unique_event_titles) scored_results: list[MatchedEventResult] = [] for event in events: topic_list = event_topics.get(event.id, []) - if not topic_list: - continue exact_hits: list[str] = [] semantic_hits: list[dict[str, Any]] = [] @@ -202,37 +249,18 @@ def recommend_events_for_user( # 对每个事件标签做精确匹配或语义匹配 for topic_keyword, topic_relevance in topic_list: - normalized_topic = _normalize_text(topic_keyword) topic_relevance_score = float(topic_relevance) if topic_relevance is not None else 50.0 # 1) 精确命中(包括完全相等与包含关系) - matched_exact = False - if normalized_topic in normalized_pref_set: - matched_exact = True - else: - for pref_word in normalized_pref_set: - if pref_word and (pref_word in normalized_topic or normalized_topic in pref_word): - matched_exact = True - break - - if matched_exact: + matched_pref = _find_exact_preference_match(topic_keyword, normalized_preference_pairs) + if matched_pref is not None: exact_hits.append(topic_keyword) # 精确命中给较高基础分,标签自身相关度作为增益 score += 45.0 + topic_relevance_score * 0.2 continue # 2) 语义命中(未精确命中时再算) - topic_vec = topic_vec_map.get(topic_keyword) - if topic_vec is None: - continue - - best_pref = None - best_sim = -1.0 - for pref_keyword, pref_vec in pref_vec_map.items(): - sim = float(np.dot(topic_vec, pref_vec)) - if sim > best_sim: - best_sim = sim - best_pref = pref_keyword + best_pref, best_sim = _find_best_semantic_match(topic_keyword, topic_vec_map, pref_vec_map) if best_pref is not None and best_sim >= similarity_threshold: semantic_hits.append( @@ -245,6 +273,25 @@ def recommend_events_for_user( # 语义命中分略低于精确命中,并由相似度放大 score += best_sim * 35.0 + topic_relevance_score * 0.12 + # 标题也参与匹配,但权重低于结构化标签,避免长标题过度主导排序。 + event_title = (event.unified_title or "").strip() + if event_title: + title_exact_pref = _find_exact_preference_match(event_title, normalized_preference_pairs) + if title_exact_pref is not None: + exact_hits.append(f"标题:{title_exact_pref}") + score += 30.0 + else: + best_pref, best_sim = _find_best_semantic_match(event_title, title_vec_map, pref_vec_map) + if best_pref is not None and best_sim >= similarity_threshold: + semantic_hits.append( + { + "preference_keyword": best_pref, + "topic_keyword": f"标题:{best_pref}", + "similarity": round(best_sim, 4), + } + ) + score += best_sim * 24.0 + # 如果精确和语义都没命中,直接跳过 if not exact_hits and not semantic_hits: continue diff --git a/frontend/src/components/UnifiedEventCard.vue b/frontend/src/components/UnifiedEventCard.vue index cd0aa4e..9d0000f 100644 --- a/frontend/src/components/UnifiedEventCard.vue +++ b/frontend/src/components/UnifiedEventCard.vue @@ -111,6 +111,14 @@ function getRankingChartOptions(history: number[], platformColor: string) { height: 56, sparkline: { enabled: true }, animations: { enabled: true, easing: 'easeinout' as const, speed: 400 }, + events: { + mounted: (chartContext: any) => { + chartContext.el?.querySelector('.apexcharts-svg > title')?.remove() + }, + updated: (chartContext: any) => { + chartContext.el?.querySelector('.apexcharts-svg > title')?.remove() + } + } }, stroke: { curve: 'smooth' as const, width: 2 }, fill: { diff --git a/frontend/src/views/DashboardView.vue b/frontend/src/views/DashboardView.vue index d7521e9..ab5d240 100644 --- a/frontend/src/views/DashboardView.vue +++ b/frontend/src/views/DashboardView.vue @@ -182,6 +182,14 @@ function getRankingChartOptions(history: number[], platformColor: string) { height: 56, sparkline: { enabled: true }, animations: { enabled: true, easing: 'easeinout' as const, speed: 400 }, + events: { + mounted: (chartContext: any) => { + chartContext.el?.querySelector('.apexcharts-svg > title')?.remove() + }, + updated: (chartContext: any) => { + chartContext.el?.querySelector('.apexcharts-svg > title')?.remove() + } + } }, stroke: { curve: 'smooth' as const, width: 2 }, fill: { diff --git a/frontend/src/views/SearchView.vue b/frontend/src/views/SearchView.vue index edce50a..4f89177 100644 --- a/frontend/src/views/SearchView.vue +++ b/frontend/src/views/SearchView.vue @@ -72,6 +72,12 @@ const chartOptions = ref({ }, // 点击图表数据点:切换选中时间,再次点击则取消筛选 events: { + mounted: (chartContext: any) => { + chartContext.el?.querySelector('.apexcharts-svg > title')?.remove() + }, + updated: (chartContext: any) => { + chartContext.el?.querySelector('.apexcharts-svg > title')?.remove() + }, markerClick: function(event: unknown, chartContext: unknown, { dataPointIndex }: never) { if (searchResult.value && searchResult.value.timeline[dataPointIndex]) { const clickedTime = searchResult.value.timeline[dataPointIndex].time_label @@ -585,7 +591,12 @@ async function handleSearch() { .chart-container { margin-top: 16px; - margin-left: -10px; /* 视觉上抵消 apexcharts 的默认左侧留白。 */ + margin-left: -10px; +} + +.chart-container :deep(svg), +.chart-container :deep(canvas) { + outline: none; } .events-section { @@ -595,7 +606,6 @@ async function handleSearch() { .events-grid { display: flex; flex-direction: column; - /* 与 DashboardView 保持一致,列表按纵向堆叠展示。 */ } .loading-state { diff --git a/frontend/src/views/TopicsView.vue b/frontend/src/views/TopicsView.vue index 49c4627..f39a6ba 100644 --- a/frontend/src/views/TopicsView.vue +++ b/frontend/src/views/TopicsView.vue @@ -156,7 +156,7 @@ onMounted(async () => { v-model="newKeyword" type="text" class="keyword-input" - placeholder="输入关键词,如「直升机」「科比」「佐巴扬」..." + placeholder="输入关键词,如「篮球」「科比」「科技」..." maxlength="100" @keydown="onInputKeydown" />