Merge pull request #2 from stardrophere/backend_optimize

Backend optimize
This commit is contained in:
csf123321
2026-04-02 01:37:40 +08:00
committed by GitHub
13 changed files with 3654 additions and 106 deletions
+18
View File
@@ -0,0 +1,18 @@
# 前端
frontend/dist
frontend/node_modules
# 后端
backend/.venv
backend/.git
backend/__pycache__
backend/*.pyc
backend/*.pyo
backend/*.pyd
backend/.pytest_cache
backend/.mypy_cache
backend/.cache
backend/.env
backend/*.log
backend/dist
backend/build
+3
View File
@@ -44,6 +44,8 @@ MANIFEST
pip-log.txt pip-log.txt
pip-delete-this-directory.txt pip-delete-this-directory.txt
**/logs/*
# Unit test / coverage reports # Unit test / coverage reports
htmlcov/ htmlcov/
.tox/ .tox/
@@ -190,3 +192,4 @@ cython_debug/
**/data/* **/data/*
**/docker/* **/docker/*
backend/app/static/*
-13
View File
@@ -1,13 +0,0 @@
.venv
.git
__pycache__
*.pyc
*.pyo
*.pyd
.pytest_cache
.mypy_cache
.cache
.env
*.log
dist
build
+8
View File
@@ -2,6 +2,8 @@
""" """
信息源 CRUD:对 InfoSource 的增删改查,供 API 与爬虫使用 信息源 CRUD:对 InfoSource 的增删改查,供 API 与爬虫使用
""" """
from sqlite3 import IntegrityError
from sqlalchemy.orm import Session from sqlalchemy.orm import Session
from typing import List, Optional from typing import List, Optional
@@ -22,10 +24,16 @@ def get_multi(db: Session, skip: int = 0, limit: int = 100) -> List[InfoSource]:
def create(db: Session, obj_in: InfoSourceCreate) -> InfoSource: def create(db: Session, obj_in: InfoSourceCreate) -> InfoSource:
"""创建新的信息源""" """创建新的信息源"""
db_obj = InfoSource(**obj_in.model_dump()) db_obj = InfoSource(**obj_in.model_dump())
try:
db.add(db_obj) db.add(db_obj)
db.commit() db.commit()
db.refresh(db_obj) db.refresh(db_obj)
return db_obj return db_obj
except IntegrityError:
db.rollback()
finally:
db.close()
return db_obj
def update(db: Session, db_obj: InfoSource, obj_in: InfoSourceUpdate) -> InfoSource: def update(db: Session, db_obj: InfoSource, obj_in: InfoSourceUpdate) -> InfoSource:
+17 -21
View File
@@ -1,14 +1,12 @@
import requests
import json import json
# 请将此处的 URL 替换为您实际的 API 基础域名 from app.database import SessionLocal
api_url = "http://10.252.130.135:8000/api/v1/sources/" from app.crud.crud_source import create
from app.models.models import SourceType
from app.schemas.source_schema import InfoSourceCreate
# 请求头
headers = { def init():
"Content-Type": "application/json",
# "Authorization": "Bearer YOUR_TOKEN" # 如果接口需要鉴权,请取消注释并填入 Token
}
# 解析后的数据源列表 # 解析后的数据源列表
sources_data = [ sources_data = [
@@ -27,20 +25,18 @@ sources_data = [
# 遍历数据并发送 POST 请求 # 遍历数据并发送 POST 请求
for item in sources_data: for item in sources_data:
payload = {
"source_name": item["name"],
"source_type": "HOT_TREND",
"home_url": item["url"],
"is_enabled": True
}
try: try:
response = requests.post(api_url, headers=headers, data=json.dumps(payload))
if response.status_code in (200, 201): with SessionLocal() as db:
print(f"✅ 成功创建: {item['name']}")
else: create(db, InfoSourceCreate(
print(f"❌ 创建失败: {item['name']} - 状态码: {response.status_code} - 详情: {response.text}") source_name=item["name"],
source_type=SourceType.HOT_TREND,
home_url=item["url"],
is_enabled=True
))
print(f"创建订阅源{item['name']}")
except Exception as e: except Exception as e:
print(f"⚠️ 请求异常: {item['name']} - 错误: {e}") print(f"⚠️ 请求异常: {item['name']} - 错误: {e}")
print("执行完毕!")
+15 -2
View File
@@ -1,8 +1,10 @@
# app/main.py # app/main.py
import logging import logging
import os import os
from fastapi.responses import FileResponse
import httpx
from contextlib import asynccontextmanager from contextlib import asynccontextmanager
from fastapi import FastAPI from fastapi import FastAPI, staticfiles
from fastapi.middleware.cors import CORSMiddleware from fastapi.middleware.cors import CORSMiddleware
from dotenv import load_dotenv from dotenv import load_dotenv
@@ -21,11 +23,11 @@ from app.services.summary_service import generate_unified_summaries
from app.services.delivery_service import check_and_deliver from app.services.delivery_service import check_and_deliver
from app.database import engine from app.database import engine
from app.models.models import Base from app.models.models import Base
from app.initialize import init
# 路由总线 # 路由总线
from app.api.router import api_router from app.api.router import api_router
load_dotenv()
CRAWL_INTERVAL = int(os.getenv("CRAWL_INTERVAL_MINUTES", 10)) CRAWL_INTERVAL = int(os.getenv("CRAWL_INTERVAL_MINUTES", 10))
SUMMARY_INTERVAL = int(os.getenv("SUMMARY_INTERVAL_MINUTES", 30)) SUMMARY_INTERVAL = int(os.getenv("SUMMARY_INTERVAL_MINUTES", 30))
@@ -42,6 +44,10 @@ async def lifespan(app: FastAPI):
Base.metadata.create_all(bind=engine) Base.metadata.create_all(bind=engine)
logging.info("数据库表初始化完成!") logging.info("数据库表初始化完成!")
logging.info("初始化订阅源")
init()
logging.info("订阅源初始化完毕")
# 2. 配置并启动定时任务 # 2. 配置并启动定时任务
scheduler.add_job( scheduler.add_job(
fetch_and_save_trending_data, fetch_and_save_trending_data,
@@ -106,6 +112,13 @@ app.add_middleware(
# 版本控制 # 版本控制
app.include_router(api_router, prefix="/api/v1") app.include_router(api_router, prefix="/api/v1")
# 把目录改成static对应我们放dist内容的路径就可以
app.mount("/", staticfiles.StaticFiles(directory="app/static", html=True), name="static")
# 只需要保留API的优先匹配,catch_all可以简化成这样
@app.get("/api/{full_path:path}")
async def api_not_found(full_path: str):
return {"detail": "API Not Found"}
# 健康检查 # 健康检查
@app.get("/", tags=["健康检查"]) @app.get("/", tags=["健康检查"])
+10 -12
View File
@@ -94,6 +94,10 @@ class InfoSource(Base):
created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=utcnow) created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=utcnow)
updated_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=utcnow, onupdate=utcnow) updated_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=utcnow, onupdate=utcnow)
__table_args__ = (
UniqueConstraint("source_name", name="uix_source_name"),
)
# ========================================== # ==========================================
# 模块二:AI 语义聚类中枢 (大事件池) # 模块二:AI 语义聚类中枢 (大事件池)
@@ -176,8 +180,7 @@ class NewsArticle(Base):
id: Mapped[int] = mapped_column(BigIntType, primary_key=True, autoincrement=True) id: Mapped[int] = mapped_column(BigIntType, primary_key=True, autoincrement=True)
source_id: Mapped[int] = mapped_column(ForeignKey("info_sources.id"), comment="所属信息源ID") source_id: Mapped[int] = mapped_column(ForeignKey("info_sources.id"), comment="所属信息源ID")
unified_event_id: Mapped[Optional[int]] = mapped_column(ForeignKey("unified_events.id"), unified_event_id: Mapped[Optional[int]] = mapped_column(ForeignKey("unified_events.id"), comment="深度文章也可归入大事件分析")
comment="深度文章也可归入大事件分析")
external_id: Mapped[str] = mapped_column(String(32), comment="RSS原文<guid>生成的MD5防重指纹") external_id: Mapped[str] = mapped_column(String(32), comment="RSS原文<guid>生成的MD5防重指纹")
title_embedding: Mapped[Optional[str]] = mapped_column(Text, comment="新闻标题/摘要的语义向量") title_embedding: Mapped[Optional[str]] = mapped_column(Text, comment="新闻标题/摘要的语义向量")
@@ -214,8 +217,7 @@ class HeadlineRevision(Base):
previous_headline: Mapped[str] = mapped_column(String(255), comment="修改前的旧标题") previous_headline: Mapped[str] = mapped_column(String(255), comment="修改前的旧标题")
revised_headline: Mapped[str] = mapped_column(String(255), comment="修改后的新标题") revised_headline: Mapped[str] = mapped_column(String(255), comment="修改后的新标题")
created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=utcnow, created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=utcnow, comment="系统发现被修改的时间")
comment="系统发现被修改的时间")
class RankingLog(Base): class RankingLog(Base):
@@ -235,8 +237,7 @@ class RankingLog(Base):
# 当时它在第几名 # 当时它在第几名
ranking_position: Mapped[int] = mapped_column(Integer, comment="当时抓取时的排名名次") ranking_position: Mapped[int] = mapped_column(Integer, comment="当时抓取时的排名名次")
# 爬虫看到它的那一瞬间的时间 # 爬虫看到它的那一瞬间的时间
observed_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=utcnow, observed_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=utcnow, comment="观察到该名次的准确时间")
comment="观察到该名次的准确时间")
created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=utcnow) created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=utcnow)
@@ -307,13 +308,11 @@ class AppUser(Base):
nickname: Mapped[Optional[str]] = mapped_column(String(100), comment="用户展示昵称") nickname: Mapped[Optional[str]] = mapped_column(String(100), comment="用户展示昵称")
avatar_url: Mapped[Optional[str]] = mapped_column(String(500), comment="用户头像地址") avatar_url: Mapped[Optional[str]] = mapped_column(String(500), comment="用户头像地址")
gender: Mapped[GenderType] = mapped_column(Enum(GenderType), default=GenderType.UNKNOWN, gender: Mapped[GenderType] = mapped_column(Enum(GenderType), default=GenderType.UNKNOWN, comment="用户性别(用于AI调整行文语气)")
comment="用户性别(用于AI调整行文语气)")
# 极其强大:一个万能收纳箱!前端未来想加任何诸如“夜间模式”、“字体变大”的开关, # 极其强大:一个万能收纳箱!前端未来想加任何诸如“夜间模式”、“字体变大”的开关,
# 全部丢进这个 JSON 字段即可,从此免去手动修改后端表结构的麻烦。 # 全部丢进这个 JSON 字段即可,从此免去手动修改后端表结构的麻烦。
metadata_: Mapped[Optional[Any]] = mapped_column("metadata", JSON, metadata_: Mapped[Optional[Any]] = mapped_column("metadata", JSON, comment="JSON扩展字段: 存放灵活多变的前端用户偏好设置")
comment="JSON扩展字段: 存放灵活多变的前端用户偏好设置")
# 时区对于定时推送系统极其重要!保证纽约的用户和北京的用户都能在早晨8点收到新闻。 # 时区对于定时推送系统极其重要!保证纽约的用户和北京的用户都能在早晨8点收到新闻。
timezone: Mapped[str] = mapped_column(String(50), default="Asia/Shanghai", comment="用户所在地时区") timezone: Mapped[str] = mapped_column(String(50), default="Asia/Shanghai", comment="用户所在地时区")
@@ -401,8 +400,7 @@ class DeliveryHistory(Base):
# 记录这次推送是彻底成功了,还是由于渠道网络问题失败了 # 记录这次推送是彻底成功了,还是由于渠道网络问题失败了
status: Mapped[TaskStatus] = mapped_column(Enum(TaskStatus), comment="最终推送结果状态") status: Mapped[TaskStatus] = mapped_column(Enum(TaskStatus), comment="最终推送结果状态")
created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=utcnow, created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=utcnow, comment="记录或实际推送的准确时间")
comment="记录或实际推送的准确时间")
# ========================================== # ==========================================
+1 -1
View File
@@ -26,7 +26,7 @@ SIMILARITY_THRESHOLD = float(os.getenv("SIMILARITY_THRESHOLD", 0.72))
API_BASE_URL = os.getenv("API_BASE_URL", "https://newsnow.busiyi.world/api/s") API_BASE_URL = os.getenv("API_BASE_URL", "https://newsnow.busiyi.world/api/s")
EMBEDDING_MODEL_PATH = os.getenv("EMBEDDING_MODEL_PATH", "") EMBEDDING_MODEL_PATH = os.getenv("EMBEDDING_MODEL_PATH", "")
print("正在加载 BAAI/bge-m3 向量模型...") print("正在加载向量模型...")
# 全局单例 # 全局单例
embedder_model = SentenceTransformer(EMBEDDING_MODEL_PATH, local_files_only=True) embedder_model = SentenceTransformer(EMBEDDING_MODEL_PATH, local_files_only=True)
print("模型加载完成。") print("模型加载完成。")
-31
View File
@@ -1,31 +0,0 @@
FROM python:3.11-slim AS builder
WORKDIR /insightradar
COPY pyproject.toml uv.lock ./
RUN --mount=type=cache,target=/root/.cache/uv \
pip install --no-cache-dir uv && \
uv sync --frozen --no-dev
COPY app app
COPY main.py main.py
#-----------------------------------------------
FROM python:3.11-slim
WORKDIR /insightradar
# 👇 复制虚拟环境
COPY --from=builder /insightradar/.venv /insightradar/.venv
COPY app app
COPY main.py main.py
# 👇 关键:用 venv 里的 python
ENV PATH="/insightradar/.venv/bin:$PATH"
EXPOSE 8000
CMD ["python","main.py"]
+7 -1
View File
@@ -1,12 +1,18 @@
# run.py # run.py
import uvicorn import uvicorn
import os
from dotenv import load_dotenv
if __name__ == "__main__": if __name__ == "__main__":
load_dotenv()
PORT = int(os.getenv("PORT", 8000))
# 启动服务 # 启动服务
uvicorn.run( uvicorn.run(
app="app.main:app", app="app.main:app",
host="0.0.0.0", host="0.0.0.0",
port=8000, port=PORT,
# reload=True, # reload=True,
workers=1 workers=1
) )
+50
View File
@@ -0,0 +1,50 @@
# ---------- 阶段1:前端编译(Node打包静态产物) ----------
FROM node:22-alpine AS frontend-builder
WORKDIR /frontend
# 复制前端依赖,利用Docker缓存优化
COPY frontend/package*.json ./
RUN npm install --registry=https://registry.npmmirror.com
# 复制前端代码,编译出静态产物
COPY frontend/ .
RUN npm run build
# ---------- 阶段2:后端依赖构建(uv构建虚拟环境) ----------
FROM python:3.11-slim AS backend-builder
WORKDIR /backend
# 安装uv,同步Python依赖
COPY backend/pyproject.toml backend/uv.lock ./
RUN --mount=type=cache,target=/root/.cache/uv \
pip install --no-cache-dir uv && \
uv sync --frozen --no-dev
# 复制后端代码
COPY backend/app ./app
COPY backend/main.py ./
# ---------- 阶段3:最终运行镜像(仅Python+Uvicorn,托管前端静态) ----------
FROM python:3.11-slim
WORKDIR /app
# 复制构建好的后端虚拟环境
COPY --from=backend-builder /backend/.venv /app/.venv
COPY --from=backend-builder /backend/app /app/app
COPY --from=backend-builder /backend/main.py /app/main.py
# 复制前端编译好的静态产物,放到后端能访问的目录
# 这里我们把静态文件放到 /app/static 目录
COPY --from=frontend-builder /frontend/dist /app/app/static
# 把venv加入PATH
ENV PATH="/app/.venv/bin:$PATH"
# 暴露Uvicorn端口
EXPOSE 8000
# 直接启动Uvicorn,由Uvicorn配合后端框架托管静态文件
CMD ["python3", "main.py"]
File diff suppressed because it is too large Load Diff
+1 -1
View File
@@ -1,4 +1,4 @@
@import url('https://fonts.googleapis.com/css2?family=Inter:wght@400;500;600;700&family=Noto+Sans+SC:wght@400;500;600;700&display=swap'); @import url(./font.css);
/* ========================================= /* =========================================
1. 现代 SaaS 风格高级主题变量 1. 现代 SaaS 风格高级主题变量