18 Commits

Author SHA1 Message Date
csf123321 531844f33c Merge pull request #3 from stardrophere/backend_optimize
Backend optimize
2026-04-03 01:18:02 +08:00
csf123321 76f00db86d 修改u描述 2026-04-02 23:53:25 +08:00
csf123321 761fad17bc 应用层限制同步 2026-04-02 23:41:06 +08:00
csf123321 0cab5c1cda 删除多余的log 2026-04-02 18:36:34 +08:00
csf123321 9574b02d8a 临时修复vue-router的问题 2026-04-02 18:35:49 +08:00
csf123321 c48c2b9143 取消对lock的hulue, 强制cpu 2026-04-02 17:36:02 +08:00
csf123321 cdad76cd3b Merge branch 'main' into backend_optimize
合并main的算法
2026-04-02 14:07:21 +08:00
csf123321 d3e59bc7f3 强制cpu rtorch 2026-04-02 14:05:28 +08:00
stardrophere 61b6357418 算法与视觉优化 2026-04-02 13:48:33 +08:00
csf123321 943770b2bc Merge pull request #2 from stardrophere/backend_optimize
Backend optimize
2026-04-02 01:37:40 +08:00
stardrophere f4d9b2075c 改名 2026-04-02 01:25:30 +08:00
csf123321 e3541f8d43 修改模型描述 2026-04-01 19:43:37 +08:00
csf123321 6ddedd76d7 修改env,添加port 2026-04-01 18:31:06 +08:00
csf123321 ca36f3813a 修复docker配置,修复因为google字体导致的首次访问速度慢的问题 2026-03-31 16:58:53 +08:00
csf123321 2cd9137f91 docker配置修改 2026-03-31 16:42:29 +08:00
csf123321 3fe122cb80 修改成由后端处理前端的静态文件 2026-03-30 22:55:14 +08:00
csf123321 97c97b7bae 修改成在启动的时候可以自动初始化数据 2026-03-30 22:01:47 +08:00
csf123321 7c01b5c265 Delete .github/workflows/docker-image.yml 2026-03-28 01:03:26 +08:00
30 changed files with 5529 additions and 199 deletions
+18
View File
@@ -0,0 +1,18 @@
# 前端
frontend/dist
frontend/node_modules
# 后端
backend/.venv
backend/.git
backend/__pycache__
backend/*.pyc
backend/*.pyo
backend/*.pyd
backend/.pytest_cache
backend/.mypy_cache
backend/.cache
backend/.env
backend/*.log
backend/dist
backend/build
-35
View File
@@ -1,35 +0,0 @@
name: Docker Image CI
on:
push:
tags: ['v*']
jobs:
build:
runs-on: ubuntu-latest
steps:
- name: Checkout
uses: actions/checkout@v4
- name: Logout Docker (avoid wrong credentials)
run: docker logout || true
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
- name: Install the latest version of uv
uses: astral-sh/setup-uv@v7
- name: uv lock
working-directory: backend
run: uv lock
- name: Build Docker Image (with BuildKit)
working-directory: backend
run: |
docker build \
--progress=plain \
-t insightradar-backend:${{ github.ref_name }} \
-t insightradar-backend:latest \
.
+5 -3
View File
@@ -37,13 +37,12 @@ MANIFEST
*.manifest *.manifest
*.spec *.spec
# uv
*.lock
# Installer logs # Installer logs
pip-log.txt pip-log.txt
pip-delete-this-directory.txt pip-delete-this-directory.txt
**/logs/*
# Unit test / coverage reports # Unit test / coverage reports
htmlcov/ htmlcov/
.tox/ .tox/
@@ -190,3 +189,6 @@ cython_debug/
**/data/* **/data/*
**/docker/* **/docker/*
backend/app/static/*
test*.*
-13
View File
@@ -1,13 +0,0 @@
.venv
.git
__pycache__
*.pyc
*.pyo
*.pyd
.pytest_cache
.mypy_cache
.cache
.env
*.log
dist
build
+3 -3
View File
@@ -69,7 +69,7 @@ def _normalize_email(email: str) -> str:
def _build_verification_email(code: str, purpose_text: str, expire_minutes: int) -> str: def _build_verification_email(code: str, purpose_text: str, expire_minutes: int) -> str:
return f""" return f"""
<div style="font-family: Arial, sans-serif; line-height: 1.6; color: #222;"> <div style="font-family: Arial, sans-serif; line-height: 1.6; color: #222;">
<h2 style="margin-bottom: 12px;">InsightRadar 邮箱验证</h2> <h2 style="margin-bottom: 12px;">聚势智见邮箱验证</h2>
<p>您的{purpose_text}验证码是:</p> <p>您的{purpose_text}验证码是:</p>
<p style="font-size: 28px; font-weight: bold; letter-spacing: 4px; color: #0b57d0;">{code}</p> <p style="font-size: 28px; font-weight: bold; letter-spacing: 4px; color: #0b57d0;">{code}</p>
<p>该验证码在 {expire_minutes} 分钟内有效。请勿泄露给他人。</p> <p>该验证码在 {expire_minutes} 分钟内有效。请勿泄露给他人。</p>
@@ -203,7 +203,7 @@ async def send_register_code(
await send_html_email( await send_html_email(
to_email=email, to_email=email,
subject=f"{code}InsightRadar 注册验证码", subject=f"{code}聚势智见 注册验证码",
html_content=_build_verification_email( html_content=_build_verification_email(
code, "注册", REGISTER_CODE_EXPIRE_MINUTES code, "注册", REGISTER_CODE_EXPIRE_MINUTES
), ),
@@ -241,7 +241,7 @@ async def send_login_code(
await send_html_email( await send_html_email(
to_email=email, to_email=email,
subject=f"{code}InsightRadar 登录验证码", subject=f"{code}聚势智见 登录验证码",
html_content=_build_verification_email( html_content=_build_verification_email(
code, "登录", LOGIN_CODE_EXPIRE_MINUTES code, "登录", LOGIN_CODE_EXPIRE_MINUTES
), ),
+12
View File
@@ -2,6 +2,8 @@
""" """
信息源 CRUD:对 InfoSource 的增删改查,供 API 与爬虫使用 信息源 CRUD:对 InfoSource 的增删改查,供 API 与爬虫使用
""" """
from sqlite3 import IntegrityError
from sqlalchemy.orm import Session from sqlalchemy.orm import Session
from typing import List, Optional from typing import List, Optional
@@ -22,10 +24,20 @@ def get_multi(db: Session, skip: int = 0, limit: int = 100) -> List[InfoSource]:
def create(db: Session, obj_in: InfoSourceCreate) -> InfoSource: def create(db: Session, obj_in: InfoSourceCreate) -> InfoSource:
"""创建新的信息源""" """创建新的信息源"""
db_obj = InfoSource(**obj_in.model_dump()) db_obj = InfoSource(**obj_in.model_dump())
exits =db.query(InfoSource).filter(InfoSource.source_name == db_obj.source_name).first()
if exits:
db.close()
return db_obj
try:
db.add(db_obj) db.add(db_obj)
db.commit() db.commit()
db.refresh(db_obj) db.refresh(db_obj)
return db_obj return db_obj
except IntegrityError:
db.rollback()
finally:
db.close()
return db_obj
def update(db: Session, db_obj: InfoSource, obj_in: InfoSourceUpdate) -> InfoSource: def update(db: Session, db_obj: InfoSource, obj_in: InfoSourceUpdate) -> InfoSource:
+22 -26
View File
@@ -1,17 +1,15 @@
import requests
import json import json
# 请将此处的 URL 替换为您实际的 API 基础域名 from app.database import SessionLocal
api_url = "http://10.252.130.135:8000/api/v1/sources/" from app.crud.crud_source import create
from app.models.models import SourceType
from app.schemas.source_schema import InfoSourceCreate
# 请求头
headers = {
"Content-Type": "application/json",
# "Authorization": "Bearer YOUR_TOKEN" # 如果接口需要鉴权,请取消注释并填入 Token
}
# 解析后的数据源列表 def init():
sources_data = [
# 解析后的数据源列表
sources_data = [
{"name": "今日头条", "url": "toutiao"}, {"name": "今日头条", "url": "toutiao"},
{"name": "百度热搜", "url": "baidu"}, {"name": "百度热搜", "url": "baidu"},
{"name": "华尔街见闻", "url": "wallstreetcn-hot"}, {"name": "华尔街见闻", "url": "wallstreetcn-hot"},
@@ -23,24 +21,22 @@ sources_data = [
{"name": "微博", "url": "weibo"}, {"name": "微博", "url": "weibo"},
{"name": "抖音", "url": "douyin"}, {"name": "抖音", "url": "douyin"},
{"name": "知乎", "url": "zhihu"} {"name": "知乎", "url": "zhihu"}
] ]
# 遍历数据并发送 POST 请求 # 遍历数据并发送 POST 请求
for item in sources_data: for item in sources_data:
payload = {
"source_name": item["name"],
"source_type": "HOT_TREND",
"home_url": item["url"],
"is_enabled": True
}
try: try:
response = requests.post(api_url, headers=headers, data=json.dumps(payload))
if response.status_code in (200, 201): with SessionLocal() as db:
print(f"✅ 成功创建: {item['name']}")
else: create(db, InfoSourceCreate(
print(f"❌ 创建失败: {item['name']} - 状态码: {response.status_code} - 详情: {response.text}") source_name=item["name"],
source_type=SourceType.HOT_TREND,
home_url=item["url"],
is_enabled=True
))
print(f"创建订阅源{item['name']}")
except Exception as e: except Exception as e:
print(f"⚠️ 请求异常: {item['name']} - 错误: {e}") print(f"⚠️ 请求异常: {item['name']} - 错误: {e}")
print("执行完毕!")
+27 -2
View File
@@ -1,8 +1,11 @@
# app/main.py # app/main.py
import logging import logging
import os import os
from pathlib import Path
from fastapi.responses import FileResponse, HTMLResponse, JSONResponse
import httpx
from contextlib import asynccontextmanager from contextlib import asynccontextmanager
from fastapi import FastAPI from fastapi import FastAPI, HTTPException, Request, staticfiles
from fastapi.middleware.cors import CORSMiddleware from fastapi.middleware.cors import CORSMiddleware
from dotenv import load_dotenv from dotenv import load_dotenv
@@ -21,11 +24,11 @@ from app.services.summary_service import generate_unified_summaries
from app.services.delivery_service import check_and_deliver from app.services.delivery_service import check_and_deliver
from app.database import engine from app.database import engine
from app.models.models import Base from app.models.models import Base
from app.initialize import init
# 路由总线 # 路由总线
from app.api.router import api_router from app.api.router import api_router
load_dotenv()
CRAWL_INTERVAL = int(os.getenv("CRAWL_INTERVAL_MINUTES", 10)) CRAWL_INTERVAL = int(os.getenv("CRAWL_INTERVAL_MINUTES", 10))
SUMMARY_INTERVAL = int(os.getenv("SUMMARY_INTERVAL_MINUTES", 30)) SUMMARY_INTERVAL = int(os.getenv("SUMMARY_INTERVAL_MINUTES", 30))
@@ -42,6 +45,10 @@ async def lifespan(app: FastAPI):
Base.metadata.create_all(bind=engine) Base.metadata.create_all(bind=engine)
logging.info("数据库表初始化完成!") logging.info("数据库表初始化完成!")
logging.info("初始化订阅源")
init()
logging.info("订阅源初始化完毕")
# 2. 配置并启动定时任务 # 2. 配置并启动定时任务
scheduler.add_job( scheduler.add_job(
fetch_and_save_trending_data, fetch_and_save_trending_data,
@@ -106,6 +113,24 @@ app.add_middleware(
# 版本控制 # 版本控制
app.include_router(api_router, prefix="/api/v1") app.include_router(api_router, prefix="/api/v1")
# 只需要保留API的优先匹配,catch_all可以简化成这样
@app.get("/api/{full_path:path}")
async def api_not_found(full_path: str):
return {"detail": "API Not Found"}
staticPath = staticfiles.StaticFiles(directory="app/static", html=True)
# 把目录改成static对应我们放dist内容的路径就可以
app.mount("/", staticPath, name="static")
INDEX_HTML = Path("app/static/index.html").read_text(encoding="utf-8")
@app.exception_handler(404)
async def not_found_handler(request: Request, exc: HTTPException):
# 如果是API路径才返回404,前端路径走catch-all不会进这里
if request.url.path.startswith("/api/"):
return JSONResponse({"detail": "Not Found"}, status_code=404)
return HTMLResponse(INDEX_HTML)
# 健康检查 # 健康检查
@app.get("/", tags=["健康检查"]) @app.get("/", tags=["健康检查"])
+10 -12
View File
@@ -94,6 +94,10 @@ class InfoSource(Base):
created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=utcnow) created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=utcnow)
updated_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=utcnow, onupdate=utcnow) updated_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=utcnow, onupdate=utcnow)
__table_args__ = (
UniqueConstraint("source_name", name="uix_source_name"),
)
# ========================================== # ==========================================
# 模块二:AI 语义聚类中枢 (大事件池) # 模块二:AI 语义聚类中枢 (大事件池)
@@ -176,8 +180,7 @@ class NewsArticle(Base):
id: Mapped[int] = mapped_column(BigIntType, primary_key=True, autoincrement=True) id: Mapped[int] = mapped_column(BigIntType, primary_key=True, autoincrement=True)
source_id: Mapped[int] = mapped_column(ForeignKey("info_sources.id"), comment="所属信息源ID") source_id: Mapped[int] = mapped_column(ForeignKey("info_sources.id"), comment="所属信息源ID")
unified_event_id: Mapped[Optional[int]] = mapped_column(ForeignKey("unified_events.id"), unified_event_id: Mapped[Optional[int]] = mapped_column(ForeignKey("unified_events.id"), comment="深度文章也可归入大事件分析")
comment="深度文章也可归入大事件分析")
external_id: Mapped[str] = mapped_column(String(32), comment="RSS原文<guid>生成的MD5防重指纹") external_id: Mapped[str] = mapped_column(String(32), comment="RSS原文<guid>生成的MD5防重指纹")
title_embedding: Mapped[Optional[str]] = mapped_column(Text, comment="新闻标题/摘要的语义向量") title_embedding: Mapped[Optional[str]] = mapped_column(Text, comment="新闻标题/摘要的语义向量")
@@ -214,8 +217,7 @@ class HeadlineRevision(Base):
previous_headline: Mapped[str] = mapped_column(String(255), comment="修改前的旧标题") previous_headline: Mapped[str] = mapped_column(String(255), comment="修改前的旧标题")
revised_headline: Mapped[str] = mapped_column(String(255), comment="修改后的新标题") revised_headline: Mapped[str] = mapped_column(String(255), comment="修改后的新标题")
created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=utcnow, created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=utcnow, comment="系统发现被修改的时间")
comment="系统发现被修改的时间")
class RankingLog(Base): class RankingLog(Base):
@@ -235,8 +237,7 @@ class RankingLog(Base):
# 当时它在第几名 # 当时它在第几名
ranking_position: Mapped[int] = mapped_column(Integer, comment="当时抓取时的排名名次") ranking_position: Mapped[int] = mapped_column(Integer, comment="当时抓取时的排名名次")
# 爬虫看到它的那一瞬间的时间 # 爬虫看到它的那一瞬间的时间
observed_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=utcnow, observed_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=utcnow, comment="观察到该名次的准确时间")
comment="观察到该名次的准确时间")
created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=utcnow) created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=utcnow)
@@ -307,13 +308,11 @@ class AppUser(Base):
nickname: Mapped[Optional[str]] = mapped_column(String(100), comment="用户展示昵称") nickname: Mapped[Optional[str]] = mapped_column(String(100), comment="用户展示昵称")
avatar_url: Mapped[Optional[str]] = mapped_column(String(500), comment="用户头像地址") avatar_url: Mapped[Optional[str]] = mapped_column(String(500), comment="用户头像地址")
gender: Mapped[GenderType] = mapped_column(Enum(GenderType), default=GenderType.UNKNOWN, gender: Mapped[GenderType] = mapped_column(Enum(GenderType), default=GenderType.UNKNOWN, comment="用户性别(用于AI调整行文语气)")
comment="用户性别(用于AI调整行文语气)")
# 极其强大:一个万能收纳箱!前端未来想加任何诸如“夜间模式”、“字体变大”的开关, # 极其强大:一个万能收纳箱!前端未来想加任何诸如“夜间模式”、“字体变大”的开关,
# 全部丢进这个 JSON 字段即可,从此免去手动修改后端表结构的麻烦。 # 全部丢进这个 JSON 字段即可,从此免去手动修改后端表结构的麻烦。
metadata_: Mapped[Optional[Any]] = mapped_column("metadata", JSON, metadata_: Mapped[Optional[Any]] = mapped_column("metadata", JSON, comment="JSON扩展字段: 存放灵活多变的前端用户偏好设置")
comment="JSON扩展字段: 存放灵活多变的前端用户偏好设置")
# 时区对于定时推送系统极其重要!保证纽约的用户和北京的用户都能在早晨8点收到新闻。 # 时区对于定时推送系统极其重要!保证纽约的用户和北京的用户都能在早晨8点收到新闻。
timezone: Mapped[str] = mapped_column(String(50), default="Asia/Shanghai", comment="用户所在地时区") timezone: Mapped[str] = mapped_column(String(50), default="Asia/Shanghai", comment="用户所在地时区")
@@ -401,8 +400,7 @@ class DeliveryHistory(Base):
# 记录这次推送是彻底成功了,还是由于渠道网络问题失败了 # 记录这次推送是彻底成功了,还是由于渠道网络问题失败了
status: Mapped[TaskStatus] = mapped_column(Enum(TaskStatus), comment="最终推送结果状态") status: Mapped[TaskStatus] = mapped_column(Enum(TaskStatus), comment="最终推送结果状态")
created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=utcnow, created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=utcnow, comment="记录或实际推送的准确时间")
comment="记录或实际推送的准确时间")
# ========================================== # ==========================================
+3 -3
View File
@@ -86,7 +86,7 @@ body{{margin:0;padding:0;background:#0d1117;color:#e6edf3;font-family:-apple-sys
<body> <body>
<div class="container"> <div class="container">
<div class="header"> <div class="header">
<h1>InsightRadar · 热点快报</h1> <h1>聚势智见 · 热点快报</h1>
<p>{delivery_time} · 为你精选了 {event_count} 条事件</p> <p>{delivery_time} · 为你精选了 {event_count} 条事件</p>
<span class="mode-badge {mode_badge_class}">{mode_label}</span> <span class="mode-badge {mode_badge_class}">{mode_label}</span>
</div> </div>
@@ -94,8 +94,8 @@ body{{margin:0;padding:0;background:#0d1117;color:#e6edf3;font-family:-apple-sys
{event_cards_html} {event_cards_html}
<div class="footer"> <div class="footer">
<p>此邮件由 InsightRadar 自动推送。</p> <p>此邮件由 聚势智见自动推送。</p>
<p>如需调整推送设置,请登录 <a href="{app_url}">InsightRadar 控制台</a></p> <p>如需调整推送设置,请登录 <a href="{app_url}">聚势智见 控制台</a></p>
</div> </div>
</div> </div>
</body> </body>
+1 -1
View File
@@ -377,7 +377,7 @@ def _prepare_user_push(db: Session, user: AppUser, schedule: UserDeliverySchedul
return _PendingPush( return _PendingPush(
user_id=user_id, user_id=user_id,
email_targets=[ep.channel_account for ep in email_endpoints], email_targets=[ep.channel_account for ep in email_endpoints],
subject=f"InsightRadar {subject_suffix} · {time_str}", subject=f"聚势智见 {subject_suffix} · {time_str}",
html_body=html_body, html_body=html_body,
event_ids=event_ids, event_ids=event_ids,
) )
+1 -1
View File
@@ -26,7 +26,7 @@ SIMILARITY_THRESHOLD = float(os.getenv("SIMILARITY_THRESHOLD", 0.72))
API_BASE_URL = os.getenv("API_BASE_URL", "https://newsnow.busiyi.world/api/s") API_BASE_URL = os.getenv("API_BASE_URL", "https://newsnow.busiyi.world/api/s")
EMBEDDING_MODEL_PATH = os.getenv("EMBEDDING_MODEL_PATH", "") EMBEDDING_MODEL_PATH = os.getenv("EMBEDDING_MODEL_PATH", "")
print("正在加载 BAAI/bge-m3 向量模型...") print("正在加载模型...")
# 全局单例 # 全局单例
embedder_model = SentenceTransformer(EMBEDDING_MODEL_PATH, local_files_only=True) embedder_model = SentenceTransformer(EMBEDDING_MODEL_PATH, local_files_only=True)
print("模型加载完成。") print("模型加载完成。")
+82 -35
View File
@@ -1,6 +1,6 @@
""" """
匹配服务:根据用户兴趣关键词(精确 + 语义)推荐事件 匹配服务:根据用户兴趣关键词(精确 + 语义)推荐事件
打分融合:匹配分 + 标签相关度 + 热度 + 新鲜度加成 打分融合:标签/标题匹配分 + 标签相关度 + 热度 + 新鲜度加成
""" """
import os import os
from dataclasses import dataclass from dataclasses import dataclass
@@ -14,7 +14,7 @@ from app.models.models import ExtractedTopic, TargetType, UnifiedEvent, UserTopi
from app.services.fetcher_service import embedder_model from app.services.fetcher_service import embedder_model
# 语义匹配阈值:用户关键词和事件标签向量相似度达到该值才计入语义命中 # 语义匹配阈值:用户关键词和事件标签/标题向量相似度达到该值才计入语义命中
DEFAULT_PREFERENCE_SEMANTIC_THRESHOLD = 0.78 DEFAULT_PREFERENCE_SEMANTIC_THRESHOLD = 0.78
PREFERENCE_SEMANTIC_THRESHOLD = float( PREFERENCE_SEMANTIC_THRESHOLD = float(
os.getenv("PREFERENCE_SEMANTIC_THRESHOLD", str(DEFAULT_PREFERENCE_SEMANTIC_THRESHOLD)) os.getenv("PREFERENCE_SEMANTIC_THRESHOLD", str(DEFAULT_PREFERENCE_SEMANTIC_THRESHOLD))
@@ -41,6 +41,31 @@ def _normalize_text(text: str) -> str:
return text.strip().casefold() return text.strip().casefold()
def _find_exact_preference_match(
target_text: str,
normalized_preferences: list[tuple[str, str]],
) -> str | None:
"""
判断目标文本是否与某个用户兴趣词形成“精确命中”。
命中条件:
1. 标准化后完全相等
2. 二者互为包含关系
返回命中的原始兴趣词,未命中则返回 None。
"""
normalized_target = _normalize_text(target_text)
if not normalized_target:
return None
for raw_pref, normalized_pref in normalized_preferences:
if not normalized_pref:
continue
if normalized_target == normalized_pref:
return raw_pref
if normalized_pref in normalized_target or normalized_target in normalized_pref:
return raw_pref
return None
_EMBEDDING_CACHE: dict[str, np.ndarray] = {} _EMBEDDING_CACHE: dict[str, np.ndarray] = {}
MAX_CACHE_SIZE = 10000 MAX_CACHE_SIZE = 10000
@@ -86,6 +111,26 @@ def _build_keyword_embedding_map(keywords: list[str]) -> dict[str, np.ndarray]:
return result return result
def _find_best_semantic_match(
target_text: str,
target_vec_map: dict[str, np.ndarray],
pref_vec_map: dict[str, np.ndarray],
) -> tuple[str | None, float]:
"""返回与目标文本最接近的兴趣词及其余弦相似度。"""
target_vec = target_vec_map.get(target_text)
if target_vec is None:
return None, -1.0
best_pref = None
best_sim = -1.0
for pref_keyword, pref_vec in pref_vec_map.items():
sim = float(np.dot(target_vec, pref_vec))
if sim > best_sim:
best_sim = sim
best_pref = pref_keyword
return best_pref, best_sim
def _ensure_aware(dt: datetime) -> datetime: def _ensure_aware(dt: datetime) -> datetime:
"""SQLite 读出的 datetime 不带时区信息,统一补上 UTC 后才能和 utcnow() 做减法。""" """SQLite 读出的 datetime 不带时区信息,统一补上 UTC 后才能和 utcnow() 做减法。"""
if dt.tzinfo is None: if dt.tzinfo is None:
@@ -116,8 +161,8 @@ def recommend_events_for_user(
) -> list[MatchedEventResult]: ) -> list[MatchedEventResult]:
""" """
用户兴趣推荐主流程: 用户兴趣推荐主流程:
1) 精确匹配:用户词 == EVENT 标签 1) 精确匹配:用户词 vs EVENT 标签/标题
2) 语义匹配:用户词向量 vs EVENT 标签向量(超过阈值) 2) 语义匹配:用户词向量 vs EVENT 标签/标题向量(超过阈值)
3) 打分融合:匹配分 + 标签相关度 + 热度 + 新鲜度 3) 打分融合:匹配分 + 标签相关度 + 热度 + 新鲜度
""" """
final_limit = max(1, min(limit, PREFERENCE_RECOMMEND_MAX_LIMIT)) final_limit = max(1, min(limit, PREFERENCE_RECOMMEND_MAX_LIMIT))
@@ -167,8 +212,6 @@ def recommend_events_for_user(
) )
.all() .all()
) )
if not topic_rows:
return []
# 组织事件标签映射:event_id -> [(tag, relevance_score), ...] # 组织事件标签映射:event_id -> [(tag, relevance_score), ...]
event_topics: dict[int, list[tuple[str, float | None]]] = {} event_topics: dict[int, list[tuple[str, float | None]]] = {}
@@ -177,10 +220,6 @@ def recommend_events_for_user(
continue continue
event_topics.setdefault(event_id, []).append((topic_keyword, relevance_score)) event_topics.setdefault(event_id, []).append((topic_keyword, relevance_score))
# 如果某事件没有标签,就不参与推荐
if not event_topics:
return []
# 3. 批量编码用户词与标签词,减少模型调用次数 # 3. 批量编码用户词与标签词,减少模型调用次数
unique_preference_keywords = list(dict.fromkeys(preference_keywords)) unique_preference_keywords = list(dict.fromkeys(preference_keywords))
unique_topic_keywords = list(dict.fromkeys([row[1] for row in topic_rows if row[1]])) unique_topic_keywords = list(dict.fromkeys([row[1] for row in topic_rows if row[1]]))
@@ -188,13 +227,21 @@ def recommend_events_for_user(
topic_vec_map = _build_keyword_embedding_map(unique_topic_keywords) topic_vec_map = _build_keyword_embedding_map(unique_topic_keywords)
# 预先建立“标准化后用户词集合”,用于精确匹配 # 预先建立“标准化后用户词集合”,用于精确匹配
normalized_pref_set = {_normalize_text(word) for word in unique_preference_keywords} normalized_preference_pairs = [
(word, _normalize_text(word))
for word in unique_preference_keywords
if _normalize_text(word)
]
unique_event_titles = list(
dict.fromkeys(
[event.unified_title.strip() for event in events if event.unified_title and event.unified_title.strip()]
)
)
title_vec_map = _build_keyword_embedding_map(unique_event_titles)
scored_results: list[MatchedEventResult] = [] scored_results: list[MatchedEventResult] = []
for event in events: for event in events:
topic_list = event_topics.get(event.id, []) topic_list = event_topics.get(event.id, [])
if not topic_list:
continue
exact_hits: list[str] = [] exact_hits: list[str] = []
semantic_hits: list[dict[str, Any]] = [] semantic_hits: list[dict[str, Any]] = []
@@ -202,37 +249,18 @@ def recommend_events_for_user(
# 对每个事件标签做精确匹配或语义匹配 # 对每个事件标签做精确匹配或语义匹配
for topic_keyword, topic_relevance in topic_list: for topic_keyword, topic_relevance in topic_list:
normalized_topic = _normalize_text(topic_keyword)
topic_relevance_score = float(topic_relevance) if topic_relevance is not None else 50.0 topic_relevance_score = float(topic_relevance) if topic_relevance is not None else 50.0
# 1) 精确命中(包括完全相等与包含关系) # 1) 精确命中(包括完全相等与包含关系)
matched_exact = False matched_pref = _find_exact_preference_match(topic_keyword, normalized_preference_pairs)
if normalized_topic in normalized_pref_set: if matched_pref is not None:
matched_exact = True
else:
for pref_word in normalized_pref_set:
if pref_word and (pref_word in normalized_topic or normalized_topic in pref_word):
matched_exact = True
break
if matched_exact:
exact_hits.append(topic_keyword) exact_hits.append(topic_keyword)
# 精确命中给较高基础分,标签自身相关度作为增益 # 精确命中给较高基础分,标签自身相关度作为增益
score += 45.0 + topic_relevance_score * 0.2 score += 45.0 + topic_relevance_score * 0.2
continue continue
# 2) 语义命中(未精确命中时再算) # 2) 语义命中(未精确命中时再算)
topic_vec = topic_vec_map.get(topic_keyword) best_pref, best_sim = _find_best_semantic_match(topic_keyword, topic_vec_map, pref_vec_map)
if topic_vec is None:
continue
best_pref = None
best_sim = -1.0
for pref_keyword, pref_vec in pref_vec_map.items():
sim = float(np.dot(topic_vec, pref_vec))
if sim > best_sim:
best_sim = sim
best_pref = pref_keyword
if best_pref is not None and best_sim >= similarity_threshold: if best_pref is not None and best_sim >= similarity_threshold:
semantic_hits.append( semantic_hits.append(
@@ -245,6 +273,25 @@ def recommend_events_for_user(
# 语义命中分略低于精确命中,并由相似度放大 # 语义命中分略低于精确命中,并由相似度放大
score += best_sim * 35.0 + topic_relevance_score * 0.12 score += best_sim * 35.0 + topic_relevance_score * 0.12
# 标题也参与匹配,但权重低于结构化标签,避免长标题过度主导排序。
event_title = (event.unified_title or "").strip()
if event_title:
title_exact_pref = _find_exact_preference_match(event_title, normalized_preference_pairs)
if title_exact_pref is not None:
exact_hits.append(f"标题:{title_exact_pref}")
score += 30.0
else:
best_pref, best_sim = _find_best_semantic_match(event_title, title_vec_map, pref_vec_map)
if best_pref is not None and best_sim >= similarity_threshold:
semantic_hits.append(
{
"preference_keyword": best_pref,
"topic_keyword": f"标题:{best_pref}",
"similarity": round(best_sim, 4),
}
)
score += best_sim * 24.0
# 如果精确和语义都没命中,直接跳过 # 如果精确和语义都没命中,直接跳过
if not exact_hits and not semantic_hits: if not exact_hits and not semantic_hits:
continue continue
-31
View File
@@ -1,31 +0,0 @@
FROM python:3.11-slim AS builder
WORKDIR /insightradar
COPY pyproject.toml uv.lock ./
RUN --mount=type=cache,target=/root/.cache/uv \
pip install --no-cache-dir uv && \
uv sync --frozen --no-dev
COPY app app
COPY main.py main.py
#-----------------------------------------------
FROM python:3.11-slim
WORKDIR /insightradar
# 👇 复制虚拟环境
COPY --from=builder /insightradar/.venv /insightradar/.venv
COPY app app
COPY main.py main.py
# 👇 关键:用 venv 里的 python
ENV PATH="/insightradar/.venv/bin:$PATH"
EXPOSE 8000
CMD ["python","main.py"]
+7 -1
View File
@@ -1,12 +1,18 @@
# run.py # run.py
import uvicorn import uvicorn
import os
from dotenv import load_dotenv
if __name__ == "__main__": if __name__ == "__main__":
load_dotenv()
PORT = int(os.getenv("PORT", 8000))
# 启动服务 # 启动服务
uvicorn.run( uvicorn.run(
app="app.main:app", app="app.main:app",
host="0.0.0.0", host="0.0.0.0",
port=8000, port=PORT,
# reload=True, # reload=True,
workers=1 workers=1
) )
+12 -3
View File
@@ -49,7 +49,6 @@ dependencies = [
"safetensors==0.7.0", "safetensors==0.7.0",
"scikit-learn==1.8.0", "scikit-learn==1.8.0",
"scipy==1.17.1", "scipy==1.17.1",
"sentence-transformers==5.2.3",
"shellingham==1.5.4", "shellingham==1.5.4",
"sniffio==1.3.1", "sniffio==1.3.1",
"sqlalchemy==2.0.48", "sqlalchemy==2.0.48",
@@ -57,8 +56,6 @@ dependencies = [
"sympy==1.14.0", "sympy==1.14.0",
"threadpoolctl==3.6.0", "threadpoolctl==3.6.0",
"tokenizers==0.22.2", "tokenizers==0.22.2",
"torch==2.10.0",
"torchvision==0.25.0",
"tqdm==4.67.3", "tqdm==4.67.3",
"transformers==5.3.0", "transformers==5.3.0",
"typer==0.24.1", "typer==0.24.1",
@@ -68,4 +65,16 @@ dependencies = [
"tzlocal==5.3.1", "tzlocal==5.3.1",
"urllib3==2.6.3", "urllib3==2.6.3",
"uvicorn==0.41.0", "uvicorn==0.41.0",
"torch==2.11.0+cpu",
"torchvision==0.26.0+cpu",
"torchaudio==2.11.0+cpu",
"sentence-transformers>=5.3.0",
] ]
[[tool.uv.index]]
name = "pytorch-cpu"
url = "https://download.pytorch.org/whl/cpu"
default = false
[tool.uv]
index-strategy = "unsafe-best-match"
+1720
View File
File diff suppressed because it is too large Load Diff
+50
View File
@@ -0,0 +1,50 @@
# ---------- 阶段1:前端编译(Node打包静态产物) ----------
FROM node:22-alpine AS frontend-builder
WORKDIR /frontend
# 复制前端依赖,利用Docker缓存优化
COPY frontend/package*.json ./
RUN npm install --registry=https://registry.npmmirror.com
# 复制前端代码,编译出静态产物
COPY frontend/ .
RUN npm run build
# ---------- 阶段2:后端依赖构建(uv构建虚拟环境) ----------
FROM python:3.11-slim AS backend-builder
WORKDIR /backend
# 安装uv,同步Python依赖
COPY backend/pyproject.toml backend/uv.lock ./
RUN --mount=type=cache,target=/root/.cache/uv \
pip install --no-cache-dir uv && \
uv sync --frozen --no-dev --index https://pypi.tuna.tsinghua.edu.cn/simple/
# 复制后端代码
COPY backend/app ./app
COPY backend/main.py ./
# ---------- 阶段3:最终运行镜像(仅Python+Uvicorn,托管前端静态) ----------
FROM python:3.11-slim
WORKDIR /app
# 复制构建好的后端虚拟环境
COPY --from=backend-builder /backend/.venv /app/.venv
COPY --from=backend-builder /backend/app /app/app
COPY --from=backend-builder /backend/main.py /app/main.py
# 复制前端编译好的静态产物,放到后端能访问的目录
# 这里我们把静态文件放到 /app/static 目录
COPY --from=frontend-builder /frontend/dist /app/app/static
# 把venv加入PATH
ENV PATH="/app/.venv/bin:$PATH"
# 暴露Uvicorn端口
EXPOSE 8000
# 直接启动Uvicorn,由Uvicorn配合后端框架托管静态文件
CMD ["python3", "main.py"]
+1 -1
View File
@@ -4,7 +4,7 @@
<meta charset="UTF-8"> <meta charset="UTF-8">
<link rel="icon" href="/favicon.svg"> <link rel="icon" href="/favicon.svg">
<meta name="viewport" content="width=device-width, initial-scale=1.0"> <meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>InsightRadar - 全网热点监控中枢</title> <title>聚势智见 - 基于语义聚类与大模型的热点资讯聚合平台</title>
<!-- Font Awesome 图标库 --> <!-- Font Awesome 图标库 -->
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.5.1/css/all.min.css"> <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.5.1/css/all.min.css">
</head> </head>
File diff suppressed because it is too large Load Diff
+1 -1
View File
@@ -1,4 +1,4 @@
@import url('https://fonts.googleapis.com/css2?family=Inter:wght@400;500;600;700&family=Noto+Sans+SC:wght@400;500;600;700&display=swap'); @import url(./font.css);
/* ========================================= /* =========================================
1. 现代 SaaS 风格高级主题变量 1. 现代 SaaS 风格高级主题变量
@@ -111,6 +111,14 @@ function getRankingChartOptions(history: number[], platformColor: string) {
height: 56, height: 56,
sparkline: { enabled: true }, sparkline: { enabled: true },
animations: { enabled: true, easing: 'easeinout' as const, speed: 400 }, animations: { enabled: true, easing: 'easeinout' as const, speed: 400 },
events: {
mounted: (chartContext: any) => {
chartContext.el?.querySelector('.apexcharts-svg > title')?.remove()
},
updated: (chartContext: any) => {
chartContext.el?.querySelector('.apexcharts-svg > title')?.remove()
}
}
}, },
stroke: { curve: 'smooth' as const, width: 2 }, stroke: { curve: 'smooth' as const, width: 2 },
fill: { fill: {
+1 -1
View File
@@ -57,7 +57,7 @@ function toggleSidebar() {
<!-- Logo --> <!-- Logo -->
<div class="sidebar-logo"> <div class="sidebar-logo">
<BrandLogo /> <BrandLogo />
<span class="logo-text">InsightRadar<span class="logo-dot">.AI</span></span> <span class="logo-text">聚势智见<span class="logo-dot">.AI</span></span>
</div> </div>
<!-- 导航菜单 --> <!-- 导航菜单 -->
+1 -1
View File
@@ -1,7 +1,7 @@
<!-- 关于页占位 --> <!-- 关于页占位 -->
<template> <template>
<div class="about"> <div class="about">
<h1>关于 InsightRadar</h1> <h1>关于 聚势智见</h1>
</div> </div>
</template> </template>
+8
View File
@@ -182,6 +182,14 @@ function getRankingChartOptions(history: number[], platformColor: string) {
height: 56, height: 56,
sparkline: { enabled: true }, sparkline: { enabled: true },
animations: { enabled: true, easing: 'easeinout' as const, speed: 400 }, animations: { enabled: true, easing: 'easeinout' as const, speed: 400 },
events: {
mounted: (chartContext: any) => {
chartContext.el?.querySelector('.apexcharts-svg > title')?.remove()
},
updated: (chartContext: any) => {
chartContext.el?.querySelector('.apexcharts-svg > title')?.remove()
}
}
}, },
stroke: { curve: 'smooth' as const, width: 2 }, stroke: { curve: 'smooth' as const, width: 2 },
fill: { fill: {
+1 -1
View File
@@ -31,7 +31,7 @@ async function handleLogout() {
<div class="nav-brand"> <div class="nav-brand">
<div class="logo"> <div class="logo">
<BrandLogo /> <BrandLogo />
InsightRadar 聚势智见
</div> </div>
</div> </div>
<div class="nav-actions"> <div class="nav-actions">
+2 -2
View File
@@ -150,7 +150,7 @@ onUnmounted(() => {
<div class="brand-content"> <div class="brand-content">
<div class="logo"> <div class="logo">
<BrandLogo /> <BrandLogo />
InsightRadar 聚势智见
</div> </div>
<h1 class="brand-title">洞察全网热点<br />让信息更聚焦</h1> <h1 class="brand-title">洞察全网热点<br />让信息更聚焦</h1>
<p class="brand-desc"> <p class="brand-desc">
@@ -192,7 +192,7 @@ onUnmounted(() => {
<div class="form-container"> <div class="form-container">
<div class="form-header"> <div class="form-header">
<h2>欢迎回来</h2> <h2>欢迎回来</h2>
<p>登录后继续查看 InsightRadar 实时动态</p> <p>登录后继续查看 聚势智见 实时动态</p>
</div> </div>
<div class="login-mode-tabs"> <div class="login-mode-tabs">
+1 -1
View File
@@ -131,7 +131,7 @@ onUnmounted(() => {
<div class="brand-content"> <div class="brand-content">
<div class="logo"> <div class="logo">
<BrandLogo /> <BrandLogo />
InsightRadar 聚势智见
</div> </div>
<h1 class="brand-title">开启智能<br />分析之旅</h1> <h1 class="brand-title">开启智能<br />分析之旅</h1>
<p class="brand-desc"> <p class="brand-desc">
+12 -2
View File
@@ -72,6 +72,12 @@ const chartOptions = ref<ApexOptions>({
}, },
// 点击图表数据点:切换选中时间,再次点击则取消筛选 // 点击图表数据点:切换选中时间,再次点击则取消筛选
events: { events: {
mounted: (chartContext: any) => {
chartContext.el?.querySelector('.apexcharts-svg > title')?.remove()
},
updated: (chartContext: any) => {
chartContext.el?.querySelector('.apexcharts-svg > title')?.remove()
},
markerClick: function(event: unknown, chartContext: unknown, { dataPointIndex }: never) { markerClick: function(event: unknown, chartContext: unknown, { dataPointIndex }: never) {
if (searchResult.value && searchResult.value.timeline[dataPointIndex]) { if (searchResult.value && searchResult.value.timeline[dataPointIndex]) {
const clickedTime = searchResult.value.timeline[dataPointIndex].time_label const clickedTime = searchResult.value.timeline[dataPointIndex].time_label
@@ -585,7 +591,12 @@ async function handleSearch() {
.chart-container { .chart-container {
margin-top: 16px; margin-top: 16px;
margin-left: -10px; /* 视觉上抵消 apexcharts 的默认左侧留白。 */ margin-left: -10px;
}
.chart-container :deep(svg),
.chart-container :deep(canvas) {
outline: none;
} }
.events-section { .events-section {
@@ -595,7 +606,6 @@ async function handleSearch() {
.events-grid { .events-grid {
display: flex; display: flex;
flex-direction: column; flex-direction: column;
/* 与 DashboardView 保持一致,列表按纵向堆叠展示。 */
} }
.loading-state { .loading-state {
+1 -1
View File
@@ -156,7 +156,7 @@ onMounted(async () => {
v-model="newKeyword" v-model="newKeyword"
type="text" type="text"
class="keyword-input" class="keyword-input"
placeholder="输入关键词,如「直升机」「科比」「佐巴扬」..." placeholder="输入关键词,如「篮球」「科比」「科技」..."
maxlength="100" maxlength="100"
@keydown="onInputKeydown" @keydown="onInputKeydown"
/> />