diff --git a/backend/app/__init__.py b/backend/app/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/backend/app/api/__init__.py b/backend/app/api/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/backend/app/core/__init__.py b/backend/app/core/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/backend/app/crud/__init__.py b/backend/app/crud/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/backend/app/database.py b/backend/app/database.py new file mode 100644 index 0000000..984662a --- /dev/null +++ b/backend/app/database.py @@ -0,0 +1,22 @@ +# database.py +from sqlalchemy import create_engine +from sqlalchemy.orm import sessionmaker + +# SQLite 数据库文件位置 +SQLALCHEMY_DATABASE_URL = "sqlite:///./data/demo.db" + +# 创建数据库引擎 +engine = create_engine( + SQLALCHEMY_DATABASE_URL, connect_args={"check_same_thread": False} +) + +# 创建数据库会话工厂 +SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine) + +# 依赖注入函数:每个请求过来时,给它发一个数据库连接,请求结束时自动关闭 +def get_db(): + db = SessionLocal() + try: + yield db + finally: + db.close() \ No newline at end of file diff --git a/backend/app/main.py b/backend/app/main.py new file mode 100644 index 0000000..161d781 --- /dev/null +++ b/backend/app/main.py @@ -0,0 +1,71 @@ +# app/main.py +from contextlib import asynccontextmanager +from fastapi import FastAPI, Depends +from sqlalchemy.orm import Session + + +from app.database import engine, get_db +from app.models.models import Base, InfoSource, SourceType + + +# ========================================== +# 1. 生命周期管理:App 启动时自动建表 +# ========================================== +@asynccontextmanager +async def lifespan(app: FastAPI): + # 启动时执行:检查模型,如果表不存在,自动在 SQLite 中建表! + print("正在初始化数据库表...") + # ---> 核心修改 2:直接使用 Base,而不是 models.Base <--- + Base.metadata.create_all(bind=engine) + print("数据库表初始化完成!") + yield + + + +# 初始化 FastAPI,挂载生命周期 +app = FastAPI(title="AI 新闻聚合引擎 API", lifespan=lifespan) + + +# ========================================== +# 2. 路由 API 定义 +# ========================================== + +@app.get("/") +async def root(): + return {"message": "Welcome to AI News Aggregator API"} + + +@app.get("/hello/{name}") +async def say_hello(name: str): + return {"message": f"Hello {name}"} + + +# --->与数据库交互的真实接口 + +@app.post("/sources/") +async def create_info_source(name: str, url: str, db: Session = Depends(get_db)): + """ + 测试接口:向数据库中添加一个新闻信息源 + """ + # ---> 核心修改 3:直接使用 InfoSource 和 SourceType <--- + new_source = InfoSource( + source_name=name, + source_type=SourceType.RSS_FEED, # 默认用 RSS 测试 + home_url=url + ) + + db.add(new_source) + db.commit() + db.refresh(new_source) # 刷新以获取自动生成的 ID + + return {"message": "创建成功!", "data": new_source} + + +@app.get("/sources/") +async def get_all_sources(db: Session = Depends(get_db)): + """ + 测试接口:查询数据库中所有的信息源 + """ + # ---> 核心修改 4:直接使用 InfoSource <--- + sources = db.query(InfoSource).all() + return {"total": len(sources), "data": sources} \ No newline at end of file diff --git a/backend/app/models/__init__.py b/backend/app/models/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/backend/app/models/models.py b/backend/app/models/models.py new file mode 100644 index 0000000..7108e82 --- /dev/null +++ b/backend/app/models/models.py @@ -0,0 +1,294 @@ +from datetime import datetime, timezone, time +from typing import Optional, Any +import enum + +from sqlalchemy import ( + String, Integer, BigInteger, Text, Boolean, DateTime, Time, + Float, JSON, ForeignKey, Enum, UniqueConstraint, Index +) +from sqlalchemy.orm import DeclarativeBase, Mapped, mapped_column, relationship + + +# ========================================== +# 0. 全局基类与枚举定义 +# ========================================== + +class Base(DeclarativeBase): + """SQLAlchemy 2.0 声明式基类""" + pass + + +class SourceType(str, enum.Enum): + HOT_TREND = "HOT_TREND" + RSS_FEED = "RSS_FEED" + API = "API" + + +class TargetType(str, enum.Enum): + EVENT = "EVENT" + TREND = "TREND" + ARTICLE = "ARTICLE" + + +class TaskStatus(str, enum.Enum): + SUCCESS = "SUCCESS" + ERROR = "ERROR" + + +class GenderType(str, enum.Enum): + MALE = "MALE" + FEMALE = "FEMALE" + OTHER = "OTHER" + UNKNOWN = "UNKNOWN" + + +def utcnow(): + """获取带UTC时区的当前时间 (推荐实践)""" + return datetime.now(timezone.utc) + + +# ========================================== +# 模块一:信息源管理 +# ========================================== +class InfoSource(Base): + __tablename__ = "info_sources" + + id: Mapped[int] = mapped_column(primary_key=True, autoincrement=True) + source_name: Mapped[str] = mapped_column(String(100), comment="信息源名称") + source_type: Mapped[SourceType] = mapped_column(Enum(SourceType)) + home_url: Mapped[Optional[str]] = mapped_column(String(255)) + is_enabled: Mapped[bool] = mapped_column(Boolean, default=True) + + created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=utcnow) + updated_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=utcnow, onupdate=utcnow) + + +# ========================================== +# 模块二:AI 语义聚类中枢 (大事件池) +# ========================================== +class UnifiedEvent(Base): + __tablename__ = "unified_events" + + id: Mapped[int] = mapped_column(BigInteger, primary_key=True, autoincrement=True) + unified_title: Mapped[str] = mapped_column(String(255), comment="AI统一标题") + ai_comprehensive_summary: Mapped[Optional[str]] = mapped_column(Text, comment="AI全局深度总结") + + # SQLite 没有原生 Vector 类型,存为用逗号分隔的字符串或JSON,Postgres可换成 PGVector + center_embedding: Mapped[Optional[str]] = mapped_column(Text, comment="中心向量") + hot_score: Mapped[int] = mapped_column(Integer, default=0, comment="聚合热度得分") + + created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=utcnow) + updated_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=utcnow, onupdate=utcnow) + + +# ========================================== +# 模块三:内容存储库 (热搜 & 新闻子节点) +# ========================================== +class TrendingEvent(Base): + __tablename__ = "trending_events" + __table_args__ = ( + UniqueConstraint("source_id", "external_id", name="idx_unique_external_trend"), + ) + + id: Mapped[int] = mapped_column(BigInteger, primary_key=True, autoincrement=True) + source_id: Mapped[int] = mapped_column(ForeignKey("info_sources.id")) + unified_event_id: Mapped[Optional[int]] = mapped_column(ForeignKey("unified_events.id")) + + external_id: Mapped[str] = mapped_column(String(32), comment="32位MD5哈希指纹防重") + title_embedding: Mapped[Optional[str]] = mapped_column(Text) + + icon_url: Mapped[Optional[str]] = mapped_column(String(500)) + current_headline: Mapped[str] = mapped_column(String(255)) + event_url: Mapped[Optional[str]] = mapped_column(String(500)) + app_link: Mapped[Optional[str]] = mapped_column(String(500)) + current_ranking: Mapped[Optional[int]] = mapped_column(Integer) + brief_snippet: Mapped[Optional[str]] = mapped_column(Text) + + created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=utcnow) + updated_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=utcnow, onupdate=utcnow) + + +class NewsArticle(Base): + __tablename__ = "news_articles" + __table_args__ = ( + UniqueConstraint("source_id", "external_id", name="idx_unique_external_article"), + ) + + id: Mapped[int] = mapped_column(BigInteger, primary_key=True, autoincrement=True) + source_id: Mapped[int] = mapped_column(ForeignKey("info_sources.id")) + unified_event_id: Mapped[Optional[int]] = mapped_column(ForeignKey("unified_events.id")) + + external_id: Mapped[str] = mapped_column(String(32)) + title_embedding: Mapped[Optional[str]] = mapped_column(Text) + + cover_image_url: Mapped[Optional[str]] = mapped_column(String(500)) + article_title: Mapped[str] = mapped_column(String(255)) + article_url: Mapped[Optional[str]] = mapped_column(String(500)) + author_name: Mapped[Optional[str]] = mapped_column(String(100)) + original_summary: Mapped[Optional[str]] = mapped_column(Text) + publish_time: Mapped[Optional[datetime]] = mapped_column(DateTime(timezone=True)) + + created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=utcnow) + updated_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=utcnow, onupdate=utcnow) + + +# ========================================== +# 模块四:热度与轨迹追踪 +# ========================================== +class HeadlineRevision(Base): + __tablename__ = "headline_revisions" + + id: Mapped[int] = mapped_column(BigInteger, primary_key=True, autoincrement=True) + event_id: Mapped[int] = mapped_column(ForeignKey("trending_events.id")) + previous_headline: Mapped[str] = mapped_column(String(255)) + revised_headline: Mapped[str] = mapped_column(String(255)) + + created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=utcnow) + + +class RankingLog(Base): + __tablename__ = "ranking_logs" + __table_args__ = ( + Index("idx_event_time", "event_id", "observed_at"), + ) + + id: Mapped[int] = mapped_column(BigInteger, primary_key=True, autoincrement=True) + event_id: Mapped[int] = mapped_column(ForeignKey("trending_events.id")) + ranking_position: Mapped[int] = mapped_column(Integer) + observed_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=utcnow) + + created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=utcnow) + + +# ========================================== +# 模块五:多态话题与多态评论 +# ========================================== +class ExtractedTopic(Base): + __tablename__ = "extracted_topics" + __table_args__ = ( + Index("idx_topic_keyword", "topic_keyword"), + Index("idx_polymorphic_topics", "target_type", "target_id"), + ) + + id: Mapped[int] = mapped_column(BigInteger, primary_key=True, autoincrement=True) + target_type: Mapped[TargetType] = mapped_column(Enum(TargetType)) + target_id: Mapped[int] = mapped_column(BigInteger) + topic_keyword: Mapped[str] = mapped_column(String(100)) + relevance_score: Mapped[Optional[float]] = mapped_column(Float) + + created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=utcnow) + + +class DiscussionComment(Base): + __tablename__ = "discussion_comments" + __table_args__ = ( + Index("idx_polymorphic_comments", "target_type", "target_id"), + ) + + id: Mapped[int] = mapped_column(BigInteger, primary_key=True, autoincrement=True) + target_type: Mapped[TargetType] = mapped_column(Enum(TargetType)) + target_id: Mapped[int] = mapped_column(BigInteger) + + commenter_name: Mapped[Optional[str]] = mapped_column(String(100)) + comment_content: Mapped[str] = mapped_column(Text) + likes_count: Mapped[int] = mapped_column(Integer, default=0) + external_comment_id: Mapped[Optional[str]] = mapped_column(String(32)) + comment_time: Mapped[Optional[datetime]] = mapped_column(DateTime(timezone=True)) + + created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=utcnow) + + +# ========================================== +# 模块六:用户画像与多渠道高可用推送系统 +# ========================================== +class AppUser(Base): + __tablename__ = "app_users" + + id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True) + email: Mapped[str] = mapped_column(String(150), unique=True, index=True) + password_hash: Mapped[Optional[str]] = mapped_column(String(255)) + + nickname: Mapped[Optional[str]] = mapped_column(String(100)) + avatar_url: Mapped[Optional[str]] = mapped_column(String(500)) + gender: Mapped[GenderType] = mapped_column(Enum(GenderType), default=GenderType.UNKNOWN) + + # 核心:万能扩展收纳箱 (SQLite 完美支持通过 SQLAlchemy 存储 JSON) + metadata_: Mapped[Optional[Any]] = mapped_column("metadata", JSON, comment="自定义扩展偏好") + + timezone: Mapped[str] = mapped_column(String(50), default="Asia/Shanghai") + created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=utcnow) + updated_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=utcnow, onupdate=utcnow) + + +class UserPushEndpoint(Base): + __tablename__ = "user_push_endpoints" + __table_args__ = ( + UniqueConstraint("user_id", "channel_type", name="idx_unique_user_channel"), + ) + + id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True) + user_id: Mapped[int] = mapped_column(ForeignKey("app_users.id")) + channel_type: Mapped[str] = mapped_column(String(50), comment="如 EMAIL, WECHAT") + channel_account: Mapped[str] = mapped_column(String(255)) + is_active: Mapped[bool] = mapped_column(Boolean, default=True) + priority_level: Mapped[int] = mapped_column(Integer, default=1, comment="1最高,降级重试") + + created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=utcnow) + updated_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=utcnow, onupdate=utcnow) + + +class UserTopicPreference(Base): + __tablename__ = "user_topic_preferences" + __table_args__ = ( + UniqueConstraint("user_id", "interested_keyword", name="idx_unique_preference"), + ) + + id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True) + user_id: Mapped[int] = mapped_column(ForeignKey("app_users.id")) + interested_keyword: Mapped[str] = mapped_column(String(100)) + + created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=utcnow) + + +class UserDeliverySchedule(Base): + __tablename__ = "user_delivery_schedules" + __table_args__ = ( + UniqueConstraint("user_id", "delivery_time", name="idx_unique_schedule"), + ) + + id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True) + user_id: Mapped[int] = mapped_column(ForeignKey("app_users.id")) + delivery_time: Mapped[time] = mapped_column(Time, comment="如 08:30:00") + is_active: Mapped[bool] = mapped_column(Boolean, default=True) + + created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=utcnow) + + +class DeliveryHistory(Base): + __tablename__ = "delivery_history" + __table_args__ = ( + UniqueConstraint("user_id", "target_type", "target_id", name="idx_prevent_duplicate_push"), + ) + + id: Mapped[int] = mapped_column(BigInteger, primary_key=True, autoincrement=True) + user_id: Mapped[int] = mapped_column(ForeignKey("app_users.id")) + target_type: Mapped[TargetType] = mapped_column(Enum(TargetType)) + target_id: Mapped[int] = mapped_column(BigInteger) + status: Mapped[TaskStatus] = mapped_column(Enum(TaskStatus)) + + created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=utcnow) + + +# ========================================== +# 模块七:系统任务监控 +# ========================================== +class DataSyncTask(Base): + __tablename__ = "data_sync_tasks" + + id: Mapped[int] = mapped_column(BigInteger, primary_key=True, autoincrement=True) + source_id: Mapped[int] = mapped_column(ForeignKey("info_sources.id")) + items_fetched: Mapped[int] = mapped_column(Integer, default=0) + task_status: Mapped[TaskStatus] = mapped_column(Enum(TaskStatus)) + error_trace: Mapped[Optional[str]] = mapped_column(Text) + + created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=utcnow) diff --git a/backend/app/schemas/__init__.py b/backend/app/schemas/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/backend/app/services/__init__.py b/backend/app/services/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/backend/app/tasks/__init__.py b/backend/app/tasks/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/backend/data/demo.db b/backend/data/demo.db new file mode 100644 index 0000000..8711511 Binary files /dev/null and b/backend/data/demo.db differ diff --git a/backend/run.py b/backend/run.py new file mode 100644 index 0000000..b22127e --- /dev/null +++ b/backend/run.py @@ -0,0 +1,12 @@ +# run.py +import uvicorn + +if __name__ == "__main__": + # 调用 uvicorn.run() 启动服务 + uvicorn.run( + app="app.main:app", + host="0.0.0.0", + port=8000, + reload=True, + workers=1 + ) diff --git a/backend/test.py b/backend/test.py new file mode 100644 index 0000000..24432c9 --- /dev/null +++ b/backend/test.py @@ -0,0 +1,23 @@ +import os + +def print_tree(root, prefix=""): + items = sorted( + name for name in os.listdir(root) + if name != "__pycache__" + ) + total = len(items) + + for i, name in enumerate(items): + path = os.path.join(root, name) + is_last = (i == total - 1) + + connector = "└── " if is_last else "├── " + print(prefix + connector + name) + + if os.path.isdir(path): + extension = " " if is_last else "│ " + print_tree(path, prefix + extension) + +root_dir = r"E:\ScnuProject\InsightRadar\backend\app" # 改成你的目录 +print(os.path.basename(root_dir) + "/") +print_tree(root_dir) \ No newline at end of file diff --git a/backend/test_main.http b/backend/test_main.http new file mode 100644 index 0000000..a2d81a9 --- /dev/null +++ b/backend/test_main.http @@ -0,0 +1,11 @@ +# Test your FastAPI endpoints + +GET http://127.0.0.1:8000/ +Accept: application/json + +### + +GET http://127.0.0.1:8000/hello/User +Accept: application/json + +###