From 31fc98f57f81c6cc5331e2fdfe4378654f9b4bc5 Mon Sep 17 00:00:00 2001 From: csf123321 Date: Thu, 14 May 2026 23:41:02 +0800 Subject: [PATCH] Add detailed architecture design (EN + ZH): Cargo workspace, core traits, processing pipeline, StreamBuilder, DB schema, REST API, Docker deployment --- ARCHITECTURE.md | 613 ++++++++++++++++++++++++++++++++++++++ ARCHITECTURE.zh.md | 725 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 1338 insertions(+) create mode 100644 ARCHITECTURE.md create mode 100644 ARCHITECTURE.zh.md diff --git a/ARCHITECTURE.md b/ARCHITECTURE.md new file mode 100644 index 0000000..589e9b9 --- /dev/null +++ b/ARCHITECTURE.md @@ -0,0 +1,613 @@ +# LLM Media Server — Architecture Design + +> Patterns derived from Jellyfin's architecture, implemented in Rust. + +--- + +## 1. Cargo Workspace Layout + +``` +lms/ ← workspace root +├── Cargo.toml ← workspace members +├── crates/ +│ ├── lms-server/ ← binary entry, Axum router, DI wiring +│ ├── lms-core/ ← shared traits, domain types, error types +│ ├── lms-library/ ← LibraryManager, FileWatcher, MediaResolver +│ ├── lms-metadata/ ← MetadataRouter + provider implementations +│ ├── lms-llm/ ← LlmRouter + LLM provider implementations +│ ├── lms-media/ ← ffmpeg wrapper, MediaProbe, thumbnail extractor +│ ├── lms-stream/ ← StreamBuilder, direct play, HLS handler +│ └── lms-db/ ← SQLite/sqlx, migrations, repositories +├── docs/ +└── docker/ + ├── Dockerfile + └── docker-compose.yml +``` + +**Dependency direction** (one-way, no cycles): + +``` +lms-server + ├── lms-library → lms-core, lms-db + ├── lms-metadata → lms-core, lms-llm + ├── lms-llm → lms-core + ├── lms-media → lms-core + ├── lms-stream → lms-core, lms-media, lms-db + └── lms-db → lms-core +``` + +`lms-core` has zero internal dependencies. All traits are defined here. + +--- + +## 2. Core Traits (lms-core) + +Mirroring Jellyfin's `IMetadataProvider` hierarchy using Rust trait objects. + +### 2.1 Metadata Provider + +```rust +// crates/lms-core/src/providers/metadata.rs + +#[async_trait] +pub trait MetadataProvider: Send + Sync { + fn name(&self) -> &str; + + /// Lower number = higher priority (mirrors Jellyfin IHasOrder, default 50) + fn priority(&self) -> u8 { 50 } + + fn supports(&self, item_type: ItemType) -> bool; + + /// Returns None if this provider has no result for the query + async fn fetch(&self, query: &MetadataQuery) -> Result>; +} + +pub struct MetadataQuery { + pub title: String, + pub year: Option, + pub item_type: ItemType, + pub external_ids: HashMap, // "tmdb" -> "12345" +} + +pub struct MetadataResult { + pub source: String, // "tmdb" | "tvdb" | "llm" + pub external_id: Option, + pub title: String, + pub overview: Option, + pub genres: Vec, + pub year: Option, + pub rating: Option, + pub poster_url: Option, + pub backdrop_url: Option, + pub cast: Vec, + pub llm_generated: bool, // transparency flag for LLM-generated fields + pub raw_json: Option, // cached raw API response +} +``` + +### 2.2 LLM Provider + +```rust +// crates/lms-core/src/providers/llm.rs + +#[async_trait] +pub trait LlmProvider: Send + Sync { + fn name(&self) -> &str; + async fn is_available(&self) -> bool; + async fn complete(&self, prompt: &str, opts: &LlmOptions) -> Result; +} + +pub struct LlmOptions { + pub model: Option, + pub max_tokens: u32, + pub temperature: f32, +} +``` + +### 2.3 Domain Model + +Inspired by Jellyfin's `BaseItem`, but using a Rust enum hierarchy instead of class inheritance. + +```rust +// crates/lms-core/src/domain/item.rs + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct MediaItem { + pub id: Uuid, + pub item_type: ItemType, + pub title: String, + pub sort_title: String, + pub file_path: PathBuf, + pub file_hash: String, // SHA-256, deduplication + pub duration_secs: Option, + pub created_at: DateTime, + pub updated_at: DateTime, +} + +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] +pub enum ItemType { + Movie, + Series, + Episode { season: u8, episode: u16, series_id: Uuid }, + HomeVideo, +} + +#[derive(Debug, Clone)] +pub struct ClassificationResult { + pub item_type: ItemType, + pub confidence: f32, // 0.0–1.0; < 0.85 flags for manual review + pub llm_used: bool, + pub model: Option, +} +``` + +### 2.4 Client Capabilities (mirrors Jellyfin DeviceProfile) + +```rust +pub struct ClientCapabilities { + pub supported_containers: Vec, + pub supported_video_codecs: Vec, + pub supported_audio_codecs: Vec, + pub max_bitrate_bps: u64, +} +``` + +--- + +## 3. Processing Pipeline + +Inspired by Jellyfin's LibraryManager + TaskManager. Key improvement over Jellyfin: jobs are persisted to SQLite (Jellyfin uses a pure in-memory `ConcurrentQueue`), enabling crash recovery. + +``` +notify::Watcher (inotify) + │ IngestEvent { path, event_type } + ▼ +mpsc::channel + │ + ▼ +┌──────────────────────────────────────────┐ +│ IngestWorker │ +│ 1. Validate file extension │ +│ 2. SHA-256 fingerprint → check dup │ +│ 3. Insert media_items (status: pending) │ +│ 4. Insert processing_jobs record │ +└──────────────────────┬───────────────────┘ + │ + ▼ +┌──────────────────────────────────────────┐ +│ ClassificationWorker │ +│ 1. MediaResolver: parse filename │ +│ - Regex match S##E## → Episode │ +│ - Directory structure → Series/Movie │ +│ 2. confidence >= 0.85 → classify │ +│ 3. confidence < 0.85 → LlmRouter │ +│ 4. Update media_items.item_type │ +└──────────────────────┬───────────────────┘ + │ + ▼ +┌──────────────────────────────────────────┐ +│ MetadataWorker │ +│ 1. MetadataRouter.fetch() │ +│ Iterate providers by priority: │ +│ TmdbProvider → TvdbProvider → ... │ +│ 2. First Some() wins; stop iteration │ +│ 3. All None → LlmFallbackProvider │ +│ 4. Write to metadata table │ +└──────────────────────┬───────────────────┘ + │ + ▼ +┌──────────────────────────────────────────┐ +│ ThumbnailWorker │ +│ 1. metadata.poster_url exists → download│ +│ 2. No poster → ffmpeg keyframe extract │ +│ 3. Save to {config_dir}/thumbs/{id}.jpg │ +└──────────────────────┬───────────────────┘ + │ + ▼ + processing_jobs.status = done +``` + +**Crash recovery**: On startup, scan `processing_jobs` for `status = 'running'`, reset to `pending` and re-enqueue. + +--- + +## 4. MediaResolver — Filename Parsing (ref: Emby.Naming) + +```rust +// crates/lms-library/src/resolver.rs + +pub struct MediaResolver { + episode_patterns: Vec, // compiled once at startup +} + +impl MediaResolver { + pub fn resolve(&self, path: &Path) -> ParsedMedia { + // 1. Directory structure analysis (ref: Jellyfin BaseVideoResolver) + if self.has_season_dirs(path) { + return ParsedMedia::Series { ... }; + } + // 2. Filename regex match + if let Some(ep) = self.try_parse_episode(path) { + return ParsedMedia::Episode(ep); + } + // 3. Default: Movie + ParsedMedia::Movie { title: self.clean_title(path) } + } +} + +// Regex patterns (ref: Jellyfin NamingOptions, 20+ common formats) +const EPISODE_PATTERNS: &[&str] = &[ + r"[Ss](?P\d{1,2})[\s._-]*[Ee](?P\d{1,3})", // S01E01 + r"Season\s*(?P\d+)\s*Episode\s*(?P\d+)", // Season 1 Episode 2 + r"[\s_-](?P\d{2,4})[\s_-]", // Absolute (anime) + r"(?P\d{4})[\.\-](?P\d{2})[\.\-](?P\d{2})", // Date-based +]; +``` + +--- + +## 5. MetadataRouter — Priority Routing (ref: Jellyfin ProviderManager) + +```rust +// crates/lms-metadata/src/router.rs + +pub struct MetadataRouter { + providers: Vec>, // sorted by priority() +} + +impl MetadataRouter { + pub fn new(mut providers: Vec>) -> Self { + providers.sort_by_key(|p| p.priority()); + Self { providers } + } + + pub async fn fetch(&self, query: &MetadataQuery) -> Result> { + for provider in &self.providers { + if !provider.supports(query.item_type) { continue; } + match provider.fetch(query).await { + Ok(Some(result)) => return Ok(Some(result)), // first hit wins + Ok(None) => continue, + Err(e) => { tracing::warn!("{} failed: {e}", provider.name()); continue; } + } + } + Ok(None) + } +} +``` + +### Provider Priority Table + +| Provider | priority | Item Types | +|---|---|---| +| TmdbProvider | 10 | Movie, Series, Episode | +| TvdbProvider | 20 | Series, Episode | +| AniDbProvider | 30 | Series (anime) | +| LlmFallbackProvider | 90 | All (last resort) | + +--- + +## 6. LlmRouter — Multi-Provider with Fallback + +```rust +// crates/lms-llm/src/router.rs + +pub struct LlmRouter { + primary: Box, + fallback: Option>, +} + +impl LlmRouter { + pub async fn complete(&self, prompt: &str, opts: &LlmOptions) -> Result { + if self.primary.is_available().await { + return self.primary.complete(prompt, opts).await; + } + if let Some(fb) = &self.fallback { + tracing::warn!("Primary LLM unavailable, falling back to {}", fb.name()); + return fb.complete(prompt, opts).await; + } + Err(LlmError::NoAvailableProvider) + } + + pub async fn classify(&self, file_name: &str, context: &str) -> Result { + let prompt = prompts::classification(file_name, context); + let raw = self.complete(&prompt, &LlmOptions::default()).await?; + serde_json::from_str(&raw).map_err(LlmError::ParseError) + } +} + +// Provider implementations: +// crates/lms-llm/src/providers/ +// ollama.rs → GET http://localhost:11434/api/generate +// claude.rs → POST https://api.anthropic.com/v1/messages +// openai.rs → POST https://api.openai.com/v1/chat/completions +``` + +--- + +## 7. StreamBuilder — Decision Tree (ref: Jellyfin StreamBuilder.cs) + +```rust +// crates/lms-stream/src/builder.rs + +pub enum StreamPlan { + DirectPlay { path: PathBuf }, + Remux { path: PathBuf, target_container: String }, + Transcode { path: PathBuf, video_codec: String, audio_codec: String, bitrate: u64 }, +} + +impl StreamBuilder { + pub async fn build(item: &MediaItem, probe: &MediaProbe, caps: &ClientCapabilities) -> StreamPlan { + if Self::can_direct_play(probe, caps) { + return StreamPlan::DirectPlay { path: item.file_path.clone() }; + } + if Self::can_remux(probe, caps) { + return StreamPlan::Remux { path: item.file_path.clone(), target_container: "mp4".into() }; + } + StreamPlan::Transcode { + path: item.file_path.clone(), + video_codec: "h264".into(), + audio_codec: "aac".into(), + bitrate: caps.max_bitrate_bps.min(8_000_000), + } + } + + fn can_direct_play(probe: &MediaProbe, caps: &ClientCapabilities) -> bool { + caps.supported_containers.contains(&probe.container) + && caps.supported_video_codecs.contains(&probe.video_codec) + && caps.supported_audio_codecs.contains(&probe.audio_codec) + && probe.bitrate_bps <= caps.max_bitrate_bps + } + + fn can_remux(probe: &MediaProbe, caps: &ClientCapabilities) -> bool { + caps.supported_video_codecs.contains(&probe.video_codec) + && caps.supported_audio_codecs.contains(&probe.audio_codec) + } +} +``` + +--- + +## 8. Database Schema (lms-db) + +```sql +CREATE TABLE media_items ( + id TEXT PRIMARY KEY, + item_type TEXT NOT NULL, -- 'movie'|'series'|'episode'|'home_video' + title TEXT NOT NULL, + sort_title TEXT NOT NULL, + file_path TEXT NOT NULL UNIQUE, + file_hash TEXT NOT NULL, -- SHA-256 deduplication + duration_s INTEGER, + status TEXT NOT NULL DEFAULT 'pending', + created_at TEXT NOT NULL, + updated_at TEXT NOT NULL +); + +CREATE TABLE episode_info ( + item_id TEXT PRIMARY KEY REFERENCES media_items(id) ON DELETE CASCADE, + series_id TEXT REFERENCES media_items(id), + season_num INTEGER, + episode_num INTEGER NOT NULL +); + +CREATE TABLE metadata ( + item_id TEXT PRIMARY KEY REFERENCES media_items(id) ON DELETE CASCADE, + source TEXT NOT NULL, -- 'tmdb'|'tvdb'|'llm' + external_id TEXT, + overview TEXT, + genres TEXT, -- JSON array + cast_crew TEXT, -- JSON array + rating REAL, + poster_url TEXT, + backdrop_url TEXT, + year INTEGER, + llm_generated INTEGER NOT NULL DEFAULT 0, -- transparency flag + raw_json TEXT, -- cached API response + fetched_at TEXT NOT NULL +); + +CREATE TABLE tags ( + item_id TEXT REFERENCES media_items(id) ON DELETE CASCADE, + tag TEXT NOT NULL, + confidence REAL NOT NULL, + llm_model TEXT NOT NULL, + PRIMARY KEY (item_id, tag) +); + +CREATE TABLE processing_jobs ( + id TEXT PRIMARY KEY, + item_id TEXT REFERENCES media_items(id) ON DELETE CASCADE, + job_type TEXT NOT NULL, -- 'classify'|'metadata'|'thumbnail' + status TEXT NOT NULL DEFAULT 'pending', + attempts INTEGER NOT NULL DEFAULT 0, + error TEXT, + created_at TEXT NOT NULL, + updated_at TEXT NOT NULL +); + +CREATE INDEX idx_items_type ON media_items(item_type); +CREATE INDEX idx_items_hash ON media_items(file_hash); +CREATE INDEX idx_jobs_status ON processing_jobs(status); +CREATE INDEX idx_episode_series ON episode_info(series_id); +``` + +**Repository split** (ref: Jellyfin's ongoing service decomposition): + +| Service | Responsibility | +|---|---| +| `ItemRepository` | CRUD on media_items | +| `MetadataRepository` | CRUD on metadata + tags | +| `JobRepository` | Job queue, crash recovery queries | +| `SearchService` | Full-text search (SQLite FTS5) | +| `EpisodeRepository` | Series/episode relationship queries | + +--- + +## 9. REST API + +All responses `Content-Type: application/json`. Errors: +```json +{ "error": "NOT_FOUND", "message": "Item 123 not found" } +``` + +``` +# Library +GET /api/library list items (paginated, type/genre filter) +GET /api/library/:id item detail with metadata + tags +POST /api/library/scan trigger full rescan +DELETE /api/library/:id remove from library (does not delete file) + +# Streaming +GET /api/stream/:id video stream (Range request support) +GET /api/stream/:id/thumbnail thumbnail image (JPEG) + +# Search +GET /api/search?q=&type=&genre=&year= + +# Jobs +GET /api/jobs list all jobs + status +GET /api/jobs/:id +POST /api/jobs/:id/retry retry failed job + +# Classification +POST /api/classify/:id force LLM reclassification + +# Config +GET /api/config current config (API keys redacted) +PATCH /api/config partial update +``` + +--- + +## 10. Configuration (TOML) + +```toml +[server] +host = "0.0.0.0" +port = 3000 + +[library] +paths = ["/media/movies", "/media/tv"] +scan_interval_secs = 3600 + +[metadata] +tmdb_api_key = "" +tvdb_api_key = "" +provider_order = ["tmdb", "tvdb", "llm"] + +[llm] +default_provider = "ollama" +fallback_provider = "ollama" + +[llm.ollama] +base_url = "http://localhost:11434" +model = "llama3.2" + +[llm.claude] +api_key = "" +model = "claude-sonnet-4-6" + +[llm.openai] +api_key = "" +base_url = "https://api.openai.com/v1" +model = "gpt-4o" + +[streaming] +transcode_dir = "/tmp/lms-transcode" +max_concurrent_jobs = 2 + +[db] +path = "/data/lms.db" +``` + +--- + +## 11. Docker Deployment + +```yaml +# docker-compose.yml +services: + lms: + build: . + ports: + - "3000:3000" + volumes: + - ./config:/config # config + SQLite DB + - /your/media:/media:ro # media library (read-only) + - /tmp/lms-transcode:/transcode + environment: + - LMS_DB_PATH=/config/lms.db + - LMS_CONFIG=/config/lms.toml + restart: unless-stopped + + ollama: + image: ollama/ollama + volumes: + - ollama-data:/root/.ollama + ports: + - "11434:11434" + +volumes: + ollama-data: +``` + +```dockerfile +FROM rust:1.82-slim AS builder +WORKDIR /app +COPY . . +RUN cargo build --release --bin lms-server + +FROM debian:bookworm-slim +RUN apt-get update && apt-get install -y ffmpeg ca-certificates && rm -rf /var/lib/apt/lists/* +COPY --from=builder /app/target/release/lms-server /usr/local/bin/lms-server +EXPOSE 3000 +ENTRYPOINT ["lms-server"] +``` + +--- + +## 12. Key Crate Dependencies + +```toml +# lms-server +axum = "0.7" +tokio = { version = "1", features = ["full"] } +tower-http = { version = "0.5", features = ["cors", "trace"] } +tracing = "0.1" +tracing-subscriber = "0.3" + +# lms-core +serde = { version = "1", features = ["derive"] } +serde_json = "1" +uuid = { version = "1", features = ["v4"] } +chrono = { version = "0.4", features = ["serde"] } +async-trait = "0.1" +thiserror = "1" + +# lms-library +notify = "6" +regex = "1" + +# lms-db +sqlx = { version = "0.7", features = ["sqlite", "runtime-tokio", "migrate", "uuid", "chrono"] } + +# lms-llm / lms-metadata +reqwest = { version = "0.12", features = ["json"] } + +# lms-media +tokio-process = "1" +``` + +--- + +## 13. Key Differences from Jellyfin + +| Concern | Jellyfin | This Project | +|---|---|---| +| Language | C# / .NET 9 | Rust | +| Metadata | External providers primary | External providers + **LLM fallback** | +| Job queue | In-memory ConcurrentQueue | **SQLite-persisted**, crash-recoverable | +| Plugin system | Dynamic assembly loading | **Compiled-in**, no dynamic loading in MVP | +| Frontend | Built-in web client | **TBD / separate**, pure REST API | +| Subtitles | Supported | **Out of scope** | +| Config format | JSON | **TOML** | diff --git a/ARCHITECTURE.zh.md b/ARCHITECTURE.zh.md new file mode 100644 index 0000000..717e9cb --- /dev/null +++ b/ARCHITECTURE.zh.md @@ -0,0 +1,725 @@ +# LLM 媒体服务器 — 架构设计文档 + +> 参考 Jellyfin 核心架构模式,以 Rust 实现。 + +--- + +## 1. Cargo Workspace 结构 + +``` +lms/ ← workspace 根目录 +├── Cargo.toml ← workspace 成员声明 +├── crates/ +│ ├── lms-server/ ← 二进制入口,Axum 路由,依赖注入组装 +│ ├── lms-core/ ← 共享 Trait、领域类型、错误类型 +│ ├── lms-library/ ← LibraryManager、FileWatcher、MediaResolver +│ ├── lms-metadata/ ← MetadataRouter 及各 Provider 实现 +│ ├── lms-llm/ ← LlmRouter 及各 LLM Provider 实现 +│ ├── lms-media/ ← ffmpeg 封装、MediaProbe、缩略图提取 +│ ├── lms-stream/ ← StreamBuilder、直接播放、HLS 处理 +│ └── lms-db/ ← SQLite/sqlx、迁移、Repository 实现 +├── docs/ +└── docker/ + ├── Dockerfile + └── docker-compose.yml +``` + +**依赖方向**(单向,无环): + +``` +lms-server + ├── lms-library → lms-core, lms-db + ├── lms-metadata → lms-core, lms-llm + ├── lms-llm → lms-core + ├── lms-media → lms-core + ├── lms-stream → lms-core, lms-media, lms-db + └── lms-db → lms-core +``` + +`lms-core` 不依赖任何其他 crate,所有 Trait 定义在此。 + +--- + +## 2. 核心 Trait 定义(lms-core) + +参考 Jellyfin 的 `IMetadataProvider` 层次,用 Rust Trait 对象实现同等抽象。 + +### 2.1 元数据 Provider + +```rust +// crates/lms-core/src/providers/metadata.rs + +#[async_trait] +pub trait MetadataProvider: Send + Sync { + fn name(&self) -> &str; + + /// 优先级,数字越小越优先(参考 Jellyfin IHasOrder,默认 50) + fn priority(&self) -> u8 { 50 } + + /// 声明支持的内容类型 + fn supports(&self, item_type: ItemType) -> bool; + + /// 拉取元数据,返回 None 表示本 Provider 无结果 + async fn fetch(&self, query: &MetadataQuery) -> Result>; +} + +pub struct MetadataQuery { + pub title: String, + pub year: Option, + pub item_type: ItemType, + pub external_ids: HashMap, // "tmdb" -> "12345" +} + +pub struct MetadataResult { + pub source: String, // "tmdb" | "tvdb" | "llm" + pub external_id: Option, + pub title: String, + pub overview: Option, + pub genres: Vec, + pub year: Option, + pub rating: Option, + pub poster_url: Option, + pub backdrop_url: Option, + pub cast: Vec, + pub llm_generated: bool, // LLM 生成字段标记 + pub raw_json: Option, // 原始 API 响应缓存 +} +``` + +### 2.2 LLM Provider + +```rust +// crates/lms-core/src/providers/llm.rs + +#[async_trait] +pub trait LlmProvider: Send + Sync { + fn name(&self) -> &str; + + /// 健康检查(Ollama 可能未启动) + async fn is_available(&self) -> bool; + + async fn complete(&self, prompt: &str, opts: &LlmOptions) -> Result; +} + +pub struct LlmOptions { + pub model: Option, + pub max_tokens: u32, + pub temperature: f32, +} +``` + +### 2.3 媒体条目领域模型 + +参考 Jellyfin `BaseItem`,但用 Rust enum 替代继承层次。 + +```rust +// crates/lms-core/src/domain/item.rs + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct MediaItem { + pub id: Uuid, + pub item_type: ItemType, + pub title: String, + pub sort_title: String, + pub file_path: PathBuf, + pub file_hash: String, // SHA-256 指纹,防重复入库 + pub duration_secs: Option, + pub created_at: DateTime, + pub updated_at: DateTime, +} + +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] +pub enum ItemType { + Movie, + Series, + Episode { season: u8, episode: u16, series_id: Uuid }, + HomeVideo, +} + +#[derive(Debug, Clone)] +pub struct ClassificationResult { + pub item_type: ItemType, + pub confidence: f32, // 0.0–1.0,低置信度标记待人工审核 + pub llm_used: bool, + pub model: Option, +} +``` + +### 2.4 流媒体能力描述(参考 Jellyfin DeviceProfile) + +```rust +// crates/lms-core/src/stream/profile.rs + +pub struct ClientCapabilities { + pub supported_containers: Vec, // ["mp4", "mkv", "webm"] + pub supported_video_codecs: Vec, // ["h264", "hevc", "vp9"] + pub supported_audio_codecs: Vec, // ["aac", "mp3", "opus"] + pub max_bitrate_bps: u64, +} +``` + +--- + +## 3. 处理流水线 + +参考 Jellyfin LibraryManager + TaskManager,但用 Tokio channel 替代 .NET ConcurrentQueue,并将 Job 持久化到 SQLite(Jellyfin 是纯内存队列,我们做了改进)。 + +``` +┌─────────────────────────────────────────────────────────┐ +│ 处理流水线 │ +│ │ +│ notify::Watcher(inotify) │ +│ │ IngestEvent { path, event_type } │ +│ ▼ │ +│ mpsc::channel │ +│ │ │ +│ ▼ │ +│ ┌─────────────────────────────────────────────┐ │ +│ │ IngestWorker │ │ +│ │ 1. 校验文件扩展名 │ │ +│ │ 2. SHA-256 指纹 → 检查重复 │ │ +│ │ 3. 写入 media_items(status: pending) │ │ +│ │ 4. 创建 processing_jobs 记录 │ │ +│ └───────────────────┬─────────────────────────┘ │ +│ │ job_id │ +│ ▼ │ +│ ┌─────────────────────────────────────────────┐ │ +│ │ ClassificationWorker │ │ +│ │ 1. MediaResolver:文件名解析 │ │ +│ │ - 正则匹配 S##E## → Episode │ │ +│ │ - 目录结构分析 → Series / Movie │ │ +│ │ 2. 置信度 >= 0.85 → 直接分类 │ │ +│ │ 3. 置信度 < 0.85 → 调用 LlmRouter.classify │ │ +│ │ 4. 更新 media_items.item_type │ │ +│ └───────────────────┬─────────────────────────┘ │ +│ │ │ +│ ▼ │ +│ ┌─────────────────────────────────────────────┐ │ +│ │ MetadataWorker │ │ +│ │ 1. MetadataRouter.fetch() │ │ +│ │ 优先级顺序遍历 Provider: │ │ +│ │ TmdbProvider → TvdbProvider → ... │ │ +│ │ 2. 第一个返回 Some 的 Provider 即停止 │ │ +│ │ 3. 全部返回 None → LlmFallbackProvider │ │ +│ │ 4. 写入 metadata 表,标记 source │ │ +│ └───────────────────┬─────────────────────────┘ │ +│ │ │ +│ ▼ │ +│ ┌─────────────────────────────────────────────┐ │ +│ │ ThumbnailWorker │ │ +│ │ 1. metadata.poster_url 存在 → 下载 │ │ +│ │ 2. 无海报 → ffmpeg 提取关键帧 │ │ +│ │ 3. 存储到 {config_dir}/thumbs/{id}.jpg │ │ +│ └───────────────────┬─────────────────────────┘ │ +│ │ │ +│ ▼ │ +│ processing_jobs.status = done │ +└─────────────────────────────────────────────────────────┘ +``` + +**崩溃恢复**:服务启动时扫描 `processing_jobs` 中 `status = 'running'`,重置为 `pending` 重新入队。Jellyfin 的纯内存队列无此能力。 + +--- + +## 4. MediaResolver — 文件名解析(参考 Emby.Naming) + +```rust +// crates/lms-library/src/resolver.rs + +pub struct MediaResolver { + episode_patterns: Vec, // 编译期初始化,启动时只编译一次 +} + +impl MediaResolver { + /// 主入口:路径 → ParsedMedia + pub fn resolve(&self, path: &Path) -> ParsedMedia { + // 1. 目录结构分析(参考 Jellyfin BaseVideoResolver) + if self.has_season_dirs(path) { + return ParsedMedia::Series { ... }; + } + // 2. 文件名正则匹配 + if let Some(ep) = self.try_parse_episode(path) { + return ParsedMedia::Episode(ep); + } + // 3. 默认 Movie + ParsedMedia::Movie { title: self.clean_title(path) } + } +} + +// 正则模式(参考 Jellyfin NamingOptions,覆盖 20+ 常见格式) +const EPISODE_PATTERNS: &[&str] = &[ + // 标准 SxxExx + r"[Ss](?P\d{1,2})[\s._-]*[Ee](?P\d{1,3})", + // 中文季集:第1季第2集 + r"第(?P\d+)季.*第(?P\d+)[集话]", + // 纯数字绝对集数(动漫) + r"[\s_-](?P\d{2,4})[\s_-]", + // 日期型(综艺) + r"(?P\d{4})[\.\-](?P\d{2})[\.\-](?P\d{2})", +]; +``` + +--- + +## 5. MetadataRouter — 优先级路由(参考 Jellyfin ProviderManager) + +```rust +// crates/lms-metadata/src/router.rs + +pub struct MetadataRouter { + providers: Vec>, // 按 priority() 排序 +} + +impl MetadataRouter { + pub fn new(mut providers: Vec>) -> Self { + providers.sort_by_key(|p| p.priority()); + Self { providers } + } + + pub async fn fetch(&self, query: &MetadataQuery) -> Result> { + for provider in &self.providers { + if !provider.supports(query.item_type) { + continue; + } + match provider.fetch(query).await { + Ok(Some(result)) => return Ok(Some(result)), // 第一个命中即返回 + Ok(None) => continue, // 本 Provider 无结果,继续 + Err(e) => { + tracing::warn!("{} failed: {e}", provider.name()); + continue; // 单个 Provider 报错不中断整体 + } + } + } + Ok(None) + } +} + +// Provider 注册(lms-server 组装) +fn build_metadata_router(cfg: &Config) -> MetadataRouter { + let mut providers: Vec> = vec![ + Box::new(TmdbProvider::new(&cfg.metadata.tmdb_api_key)), // priority: 10 + Box::new(TvdbProvider::new(&cfg.metadata.tvdb_api_key)), // priority: 20 + Box::new(LlmFallbackProvider::new(llm_router.clone())), // priority: 90 + ]; + MetadataRouter::new(providers) +} +``` + +### Provider 优先级表 + +| Provider | priority | 适用类型 | +|---|---|---| +| TmdbProvider | 10 | Movie, Series, Episode | +| TvdbProvider | 20 | Series, Episode | +| AniDbProvider | 30 | Series(动漫) | +| LlmFallbackProvider | 90 | 全部(兜底) | + +--- + +## 6. LlmRouter — 多 Provider 路由(含降级) + +```rust +// crates/lms-llm/src/router.rs + +pub struct LlmRouter { + primary: Box, + fallback: Option>, +} + +impl LlmRouter { + /// 云端不可用时自动降级本地 + pub async fn complete(&self, prompt: &str, opts: &LlmOptions) -> Result { + if self.primary.is_available().await { + return self.primary.complete(prompt, opts).await; + } + if let Some(fb) = &self.fallback { + tracing::warn!("Primary LLM unavailable, falling back to {}", fb.name()); + return fb.complete(prompt, opts).await; + } + Err(LlmError::NoAvailableProvider) + } + + /// 专用方法:内容分类(返回结构化 JSON) + pub async fn classify(&self, file_name: &str, context: &str) -> Result { + let prompt = prompts::classification(file_name, context); + let raw = self.complete(&prompt, &LlmOptions::default()).await?; + serde_json::from_str(&raw).map_err(LlmError::ParseError) + } +} + +// Provider 实现列表 +// crates/lms-llm/src/providers/ +// ollama.rs → GET http://localhost:11434/api/generate +// claude.rs → POST https://api.anthropic.com/v1/messages +// openai.rs → POST https://api.openai.com/v1/chat/completions +``` + +--- + +## 7. StreamBuilder — 流媒体决策树(参考 Jellyfin StreamBuilder.cs) + +```rust +// crates/lms-stream/src/builder.rs + +pub enum StreamPlan { + DirectPlay { path: PathBuf }, + Remux { path: PathBuf, target_container: String }, + Transcode { path: PathBuf, video_codec: String, audio_codec: String, bitrate: u64 }, +} + +pub struct StreamBuilder; + +impl StreamBuilder { + pub async fn build( + item: &MediaItem, + probe: &MediaProbe, // ffprobe 结果:编解码器、码率、容器 + caps: &ClientCapabilities, // 客户端声明能力(或默认通用能力) + ) -> StreamPlan { + // Step 1:尝试直接播放 + if Self::can_direct_play(probe, caps) { + return StreamPlan::DirectPlay { path: item.file_path.clone() }; + } + // Step 2:容器不兼容但编解码器兼容 → Remux(仅重封装) + if Self::can_remux(probe, caps) { + return StreamPlan::Remux { + path: item.file_path.clone(), + target_container: "mp4".into(), + }; + } + // Step 3:兜底 Transcode + StreamPlan::Transcode { + path: item.file_path.clone(), + video_codec: "h264".into(), + audio_codec: "aac".into(), + bitrate: caps.max_bitrate_bps.min(8_000_000), + } + } + + fn can_direct_play(probe: &MediaProbe, caps: &ClientCapabilities) -> bool { + caps.supported_containers.contains(&probe.container) + && caps.supported_video_codecs.contains(&probe.video_codec) + && caps.supported_audio_codecs.contains(&probe.audio_codec) + && probe.bitrate_bps <= caps.max_bitrate_bps + } + + fn can_remux(probe: &MediaProbe, caps: &ClientCapabilities) -> bool { + caps.supported_video_codecs.contains(&probe.video_codec) + && caps.supported_audio_codecs.contains(&probe.audio_codec) + } +} +``` + +**StreamHandler**(Axum handler): + +```rust +// GET /api/stream/:id +async fn stream_handler( + Path(id): Path, + headers: HeaderMap, + State(ctx): State, +) -> impl IntoResponse { + let item = ctx.db.get_item(id).await?; + let probe = ctx.media.probe(&item.file_path).await?; + let caps = ClientCapabilities::default(); // MVP:通用能力,后续从 query param 读取 + + match StreamBuilder::build(&item, &probe, &caps).await { + StreamPlan::DirectPlay { path } => { + // 支持 Range 请求(断点续传) + serve_file_with_range(path, &headers).await + } + StreamPlan::Remux { path, target_container } => { + let ffmpeg = FfmpegClient::remux(&path, &target_container); + stream_process_output(ffmpeg).await + } + StreamPlan::Transcode { path, video_codec, audio_codec, bitrate } => { + let ffmpeg = FfmpegClient::transcode(&path, &video_codec, &audio_codec, bitrate); + stream_process_output(ffmpeg).await + } + } +} +``` + +--- + +## 8. 数据库 Schema(lms-db) + +参考 Jellyfin 从单体 `BaseItemRepository` 演进为服务化 Repository 的方向,我们从一开始就按职责拆分。 + +```sql +-- ── 媒体条目核心表 ────────────────────────────────────── +CREATE TABLE media_items ( + id TEXT PRIMARY KEY, -- UUID v4 + item_type TEXT NOT NULL, -- 'movie'|'series'|'episode'|'home_video' + title TEXT NOT NULL, + sort_title TEXT NOT NULL, + file_path TEXT NOT NULL UNIQUE, + file_hash TEXT NOT NULL, -- SHA-256,防重复入库 + duration_s INTEGER, + status TEXT NOT NULL DEFAULT 'pending', -- 'pending'|'ready'|'error' + created_at TEXT NOT NULL, + updated_at TEXT NOT NULL +); + +-- ── 电视剧/剧集关系 ────────────────────────────────────── +CREATE TABLE episode_info ( + item_id TEXT PRIMARY KEY REFERENCES media_items(id) ON DELETE CASCADE, + series_id TEXT REFERENCES media_items(id), + season_num INTEGER, + episode_num INTEGER NOT NULL +); + +-- ── 元数据表 ────────────────────────────────────────────── +CREATE TABLE metadata ( + item_id TEXT PRIMARY KEY REFERENCES media_items(id) ON DELETE CASCADE, + source TEXT NOT NULL, -- 'tmdb'|'tvdb'|'llm' + external_id TEXT, -- TMDB/TVDB ID + overview TEXT, + genres TEXT, -- JSON 数组 + cast_crew TEXT, -- JSON 数组 + rating REAL, + poster_url TEXT, + backdrop_url TEXT, + year INTEGER, + llm_generated INTEGER NOT NULL DEFAULT 0, -- 1 = LLM 生成,透明标记 + raw_json TEXT, -- 原始 API 响应缓存 + fetched_at TEXT NOT NULL +); + +-- ── LLM 生成标签 ────────────────────────────────────────── +CREATE TABLE tags ( + item_id TEXT REFERENCES media_items(id) ON DELETE CASCADE, + tag TEXT NOT NULL, + confidence REAL NOT NULL, -- 0.0–1.0 + llm_model TEXT NOT NULL, + PRIMARY KEY (item_id, tag) +); + +-- ── 处理任务队列(持久化,崩溃可恢复)──────────────────── +CREATE TABLE processing_jobs ( + id TEXT PRIMARY KEY, + item_id TEXT REFERENCES media_items(id) ON DELETE CASCADE, + job_type TEXT NOT NULL, -- 'classify'|'metadata'|'thumbnail' + status TEXT NOT NULL DEFAULT 'pending', -- 'pending'|'running'|'done'|'failed' + attempts INTEGER NOT NULL DEFAULT 0, + error TEXT, + created_at TEXT NOT NULL, + updated_at TEXT NOT NULL +); + +-- 索引 +CREATE INDEX idx_items_type ON media_items(item_type); +CREATE INDEX idx_items_hash ON media_items(file_hash); +CREATE INDEX idx_jobs_status ON processing_jobs(status); +CREATE INDEX idx_episode_series ON episode_info(series_id); +``` + +**Repository 服务化拆分**(参考 Jellyfin 最新重构方向): + +| 服务 | 职责 | +|---|---| +| `ItemRepository` | CRUD:media_items | +| `MetadataRepository` | CRUD:metadata、tags | +| `JobRepository` | 任务队列读写,崩溃恢复查询 | +| `SearchService` | 全文搜索(SQLite FTS5) | +| `EpisodeRepository` | series/episode 关系查询 | + +--- + +## 9. REST API 设计 + +所有响应 `Content-Type: application/json`,错误统一结构: +```json +{ "error": "NOT_FOUND", "message": "Item 123 not found" } +``` + +### 库管理 + +``` +GET /api/library 列出媒体库(支持分页、类型筛选) +GET /api/library/:id 获取条目详情(含元数据、标签) +POST /api/library/scan 触发全库扫描 +DELETE /api/library/:id 从库中移除(不删除文件) +``` + +### 流媒体 + +``` +GET /api/stream/:id 视频流(支持 Range 请求) +GET /api/stream/:id/thumbnail 缩略图(JPEG) +``` + +### 搜索 + +``` +GET /api/search?q=&type=&genre=&year= 全文 + 过滤搜索 +``` + +### 任务/Job + +``` +GET /api/jobs 列出处理任务(含状态) +GET /api/jobs/:id 获取单个任务详情 +POST /api/jobs/:id/retry 重试失败任务 +``` + +### 分类(LLM) + +``` +POST /api/classify/:id 强制重新分类(触发 LLM) +``` + +### 配置 + +``` +GET /api/config 查看当前配置(脱敏 API key) +PATCH /api/config 更新配置(热重载部分字段) +``` + +--- + +## 10. 配置文件(TOML) + +```toml +[server] +host = "0.0.0.0" +port = 3000 + +[library] +paths = ["/media/movies", "/media/tv"] +scan_interval_secs = 3600 # 定时全量扫描间隔 + +[metadata] +tmdb_api_key = "" +tvdb_api_key = "" +# Provider 执行顺序(先命中先返回) +provider_order = ["tmdb", "tvdb", "llm"] + +[llm] +# 默认使用哪个 Provider 执行 LLM 任务 +default_provider = "ollama" +# 云端不可用时降级目标 +fallback_provider = "ollama" + +[llm.ollama] +base_url = "http://localhost:11434" +model = "llama3.2" + +[llm.claude] +api_key = "" +model = "claude-sonnet-4-6" + +[llm.openai] +api_key = "" +base_url = "https://api.openai.com/v1" # 支持自定义端点 +model = "gpt-4o" + +[streaming] +transcode_dir = "/tmp/lms-transcode" +max_concurrent_jobs = 2 + +[db] +path = "/data/lms.db" +``` + +--- + +## 11. Docker 部署 + +参考 Jellyfin 的卷设计(config / media / transcode 分离): + +```yaml +# docker-compose.yml +services: + lms: + build: . + ports: + - "3000:3000" + volumes: + - ./config:/config # TOML 配置 + SQLite DB + - /your/media:/media:ro # 媒体目录(只读挂载) + - /tmp/lms-transcode:/transcode # 转码临时目录(推荐高速盘) + environment: + - LMS_DB_PATH=/config/lms.db + - LMS_CONFIG=/config/lms.toml + restart: unless-stopped + + ollama: + image: ollama/ollama + volumes: + - ollama-data:/root/.ollama + ports: + - "11434:11434" + # GPU 加速(可选) + # deploy: + # resources: + # reservations: + # devices: + # - driver: nvidia + # count: 1 + # capabilities: [gpu] + +volumes: + ollama-data: +``` + +```dockerfile +# Dockerfile(多阶段构建) +FROM rust:1.82-slim AS builder +WORKDIR /app +COPY . . +RUN cargo build --release --bin lms-server + +FROM debian:bookworm-slim +RUN apt-get update && apt-get install -y ffmpeg ca-certificates && rm -rf /var/lib/apt/lists/* +COPY --from=builder /app/target/release/lms-server /usr/local/bin/lms-server +EXPOSE 3000 +ENTRYPOINT ["lms-server"] +``` + +--- + +## 12. 各 crate 关键依赖 + +```toml +# lms-server +axum = "0.7" +tokio = { version = "1", features = ["full"] } +tower-http = { version = "0.5", features = ["cors", "trace"] } +tracing = "0.1" +tracing-subscriber = "0.3" + +# lms-core +serde = { version = "1", features = ["derive"] } +serde_json = "1" +uuid = { version = "1", features = ["v4"] } +chrono = { version = "0.4", features = ["serde"] } +async-trait = "0.1" +thiserror = "1" + +# lms-library +notify = "6" # 跨平台文件系统监听(inotify on Linux) +regex = "1" + +# lms-db +sqlx = { version = "0.7", features = ["sqlite", "runtime-tokio", "migrate", "uuid", "chrono"] } + +# lms-llm / lms-metadata +reqwest = { version = "0.12", features = ["json"] } + +# lms-media +tokio-process = "1" # 异步子进程(ffmpeg/ffprobe) +``` + +--- + +## 13. 与 Jellyfin 设计的关键差异 + +| 关注点 | Jellyfin | 本项目 | +|---|---|---| +| 语言 | C# / .NET 9 | Rust | +| 元数据生成 | 外部 Provider 为主 | 外部 Provider 为主 + **LLM 兜底** | +| Job 队列 | 纯内存 ConcurrentQueue | **SQLite 持久化**,崩溃可恢复 | +| Plugin 系统 | 动态程序集加载 | **编译期内置**,MVP 不做动态加载 | +| 前端 | 内置 Web Client | **暂定/独立**,纯 REST API | +| 字幕生成 | 无 | **不支持**(已移出需求) | +| 配置格式 | JSON | **TOML** |