Merge pull request #2 from stardrophere/backend_optimize

Backend optimize
This commit is contained in:
csf123321
2026-04-02 01:37:40 +08:00
committed by GitHub
13 changed files with 3654 additions and 106 deletions
+12 -4
View File
@@ -2,6 +2,8 @@
"""
信息源 CRUD:对 InfoSource 的增删改查,供 API 与爬虫使用
"""
from sqlite3 import IntegrityError
from sqlalchemy.orm import Session
from typing import List, Optional
@@ -22,10 +24,16 @@ def get_multi(db: Session, skip: int = 0, limit: int = 100) -> List[InfoSource]:
def create(db: Session, obj_in: InfoSourceCreate) -> InfoSource:
"""创建新的信息源"""
db_obj = InfoSource(**obj_in.model_dump())
db.add(db_obj)
db.commit()
db.refresh(db_obj)
return db_obj
try:
db.add(db_obj)
db.commit()
db.refresh(db_obj)
return db_obj
except IntegrityError:
db.rollback()
finally:
db.close()
return db_obj
def update(db: Session, db_obj: InfoSource, obj_in: InfoSourceUpdate) -> InfoSource:
+35 -39
View File
@@ -1,46 +1,42 @@
import requests
import json
# 请将此处的 URL 替换为您实际的 API 基础域名
api_url = "http://10.252.130.135:8000/api/v1/sources/"
from app.database import SessionLocal
from app.crud.crud_source import create
from app.models.models import SourceType
from app.schemas.source_schema import InfoSourceCreate
# 请求头
headers = {
"Content-Type": "application/json",
# "Authorization": "Bearer YOUR_TOKEN" # 如果接口需要鉴权,请取消注释并填入 Token
}
# 解析后的数据源列表
sources_data = [
{"name": "今日头条", "url": "toutiao"},
{"name": "百度热搜", "url": "baidu"},
{"name": "华尔街见闻", "url": "wallstreetcn-hot"},
{"name": "澎湃新闻", "url": "thepaper"},
{"name": "bilibili 热搜", "url": "bilibili-hot-search"},
{"name": "财联社热门", "url": "cls-hot"},
{"name": "凤凰网", "url": "ifeng"},
{"name": "贴吧", "url": "tieba"},
{"name": "微博", "url": "weibo"},
{"name": "抖音", "url": "douyin"},
{"name": "知乎", "url": "zhihu"}
]
def init():
# 遍历数据并发送 POST 请求
for item in sources_data:
payload = {
"source_name": item["name"],
"source_type": "HOT_TREND",
"home_url": item["url"],
"is_enabled": True
}
# 解析后的数据源列表
sources_data = [
{"name": "今日头条", "url": "toutiao"},
{"name": "百度热搜", "url": "baidu"},
{"name": "华尔街见闻", "url": "wallstreetcn-hot"},
{"name": "澎湃新闻", "url": "thepaper"},
{"name": "bilibili 热搜", "url": "bilibili-hot-search"},
{"name": "财联社热门", "url": "cls-hot"},
{"name": "凤凰网", "url": "ifeng"},
{"name": "贴吧", "url": "tieba"},
{"name": "微博", "url": "weibo"},
{"name": "抖音", "url": "douyin"},
{"name": "知乎", "url": "zhihu"}
]
try:
response = requests.post(api_url, headers=headers, data=json.dumps(payload))
if response.status_code in (200, 201):
print(f"✅ 成功创建: {item['name']}")
else:
print(f"❌ 创建失败: {item['name']} - 状态码: {response.status_code} - 详情: {response.text}")
except Exception as e:
print(f"⚠️ 请求异常: {item['name']} - 错误: {e}")
# 遍历数据并发送 POST 请求
for item in sources_data:
try:
with SessionLocal() as db:
print("执行完毕!")
create(db, InfoSourceCreate(
source_name=item["name"],
source_type=SourceType.HOT_TREND,
home_url=item["url"],
is_enabled=True
))
print(f"创建订阅源{item['name']}")
except Exception as e:
print(f"⚠️ 请求异常: {item['name']} - 错误: {e}")
+16 -3
View File
@@ -1,8 +1,10 @@
# app/main.py
import logging
import os
from fastapi.responses import FileResponse
import httpx
from contextlib import asynccontextmanager
from fastapi import FastAPI
from fastapi import FastAPI, staticfiles
from fastapi.middleware.cors import CORSMiddleware
from dotenv import load_dotenv
@@ -21,11 +23,11 @@ from app.services.summary_service import generate_unified_summaries
from app.services.delivery_service import check_and_deliver
from app.database import engine
from app.models.models import Base
from app.initialize import init
# 路由总线
from app.api.router import api_router
load_dotenv()
CRAWL_INTERVAL = int(os.getenv("CRAWL_INTERVAL_MINUTES", 10))
SUMMARY_INTERVAL = int(os.getenv("SUMMARY_INTERVAL_MINUTES", 30))
@@ -41,7 +43,11 @@ async def lifespan(app: FastAPI):
logging.info("正在初始化数据库表...")
Base.metadata.create_all(bind=engine)
logging.info("数据库表初始化完成!")
logging.info("初始化订阅源")
init()
logging.info("订阅源初始化完毕")
# 2. 配置并启动定时任务
scheduler.add_job(
fetch_and_save_trending_data,
@@ -106,6 +112,13 @@ app.add_middleware(
# 版本控制
app.include_router(api_router, prefix="/api/v1")
# 把目录改成static对应我们放dist内容的路径就可以
app.mount("/", staticfiles.StaticFiles(directory="app/static", html=True), name="static")
# 只需要保留API的优先匹配,catch_all可以简化成这样
@app.get("/api/{full_path:path}")
async def api_not_found(full_path: str):
return {"detail": "API Not Found"}
# 健康检查
@app.get("/", tags=["健康检查"])
+10 -12
View File
@@ -93,6 +93,10 @@ class InfoSource(Base):
created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=utcnow)
updated_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=utcnow, onupdate=utcnow)
__table_args__ = (
UniqueConstraint("source_name", name="uix_source_name"),
)
# ==========================================
@@ -176,8 +180,7 @@ class NewsArticle(Base):
id: Mapped[int] = mapped_column(BigIntType, primary_key=True, autoincrement=True)
source_id: Mapped[int] = mapped_column(ForeignKey("info_sources.id"), comment="所属信息源ID")
unified_event_id: Mapped[Optional[int]] = mapped_column(ForeignKey("unified_events.id"),
comment="深度文章也可归入大事件分析")
unified_event_id: Mapped[Optional[int]] = mapped_column(ForeignKey("unified_events.id"), comment="深度文章也可归入大事件分析")
external_id: Mapped[str] = mapped_column(String(32), comment="RSS原文<guid>生成的MD5防重指纹")
title_embedding: Mapped[Optional[str]] = mapped_column(Text, comment="新闻标题/摘要的语义向量")
@@ -214,8 +217,7 @@ class HeadlineRevision(Base):
previous_headline: Mapped[str] = mapped_column(String(255), comment="修改前的旧标题")
revised_headline: Mapped[str] = mapped_column(String(255), comment="修改后的新标题")
created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=utcnow,
comment="系统发现被修改的时间")
created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=utcnow, comment="系统发现被修改的时间")
class RankingLog(Base):
@@ -235,8 +237,7 @@ class RankingLog(Base):
# 当时它在第几名
ranking_position: Mapped[int] = mapped_column(Integer, comment="当时抓取时的排名名次")
# 爬虫看到它的那一瞬间的时间
observed_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=utcnow,
comment="观察到该名次的准确时间")
observed_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=utcnow, comment="观察到该名次的准确时间")
created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=utcnow)
@@ -307,13 +308,11 @@ class AppUser(Base):
nickname: Mapped[Optional[str]] = mapped_column(String(100), comment="用户展示昵称")
avatar_url: Mapped[Optional[str]] = mapped_column(String(500), comment="用户头像地址")
gender: Mapped[GenderType] = mapped_column(Enum(GenderType), default=GenderType.UNKNOWN,
comment="用户性别(用于AI调整行文语气)")
gender: Mapped[GenderType] = mapped_column(Enum(GenderType), default=GenderType.UNKNOWN, comment="用户性别(用于AI调整行文语气)")
# 极其强大:一个万能收纳箱!前端未来想加任何诸如“夜间模式”、“字体变大”的开关,
# 全部丢进这个 JSON 字段即可,从此免去手动修改后端表结构的麻烦。
metadata_: Mapped[Optional[Any]] = mapped_column("metadata", JSON,
comment="JSON扩展字段: 存放灵活多变的前端用户偏好设置")
metadata_: Mapped[Optional[Any]] = mapped_column("metadata", JSON, comment="JSON扩展字段: 存放灵活多变的前端用户偏好设置")
# 时区对于定时推送系统极其重要!保证纽约的用户和北京的用户都能在早晨8点收到新闻。
timezone: Mapped[str] = mapped_column(String(50), default="Asia/Shanghai", comment="用户所在地时区")
@@ -401,8 +400,7 @@ class DeliveryHistory(Base):
# 记录这次推送是彻底成功了,还是由于渠道网络问题失败了
status: Mapped[TaskStatus] = mapped_column(Enum(TaskStatus), comment="最终推送结果状态")
created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=utcnow,
comment="记录或实际推送的准确时间")
created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=utcnow, comment="记录或实际推送的准确时间")
# ==========================================
+1 -1
View File
@@ -26,7 +26,7 @@ SIMILARITY_THRESHOLD = float(os.getenv("SIMILARITY_THRESHOLD", 0.72))
API_BASE_URL = os.getenv("API_BASE_URL", "https://newsnow.busiyi.world/api/s")
EMBEDDING_MODEL_PATH = os.getenv("EMBEDDING_MODEL_PATH", "")
print("正在加载 BAAI/bge-m3 向量模型...")
print("正在加载向量模型...")
# 全局单例
embedder_model = SentenceTransformer(EMBEDDING_MODEL_PATH, local_files_only=True)
print("模型加载完成。")