liumaolin commited on 19 days ago

Commit

e054d0c

1 Parent(s): e43edbb

feat(api): implement local training MVP with adapter pattern

- Add adapter base classes (TaskQueue, Progress, Storage, Database)
- Implement local adapters (AsyncTrainingManager, SQLite, LocalStorage)
- Add Pydantic schemas for tasks, experiments, files, and stages
- Implement service layer (TaskService, ExperimentService, FileService)
- Add API endpoints for Quick Mode (/tasks) and Advanced Mode (/experiments)
- Add adapter factory with dependency injection support
- Update architecture design document with implementation status

Files changed (28) hide show

api_server/app/adapters/base.py +454 -1
api_server/app/adapters/local/__init__.py +16 -2
api_server/app/adapters/local/database.py +683 -0
api_server/app/adapters/local/progress.py +238 -0
api_server/app/adapters/local/storage.py +342 -0
api_server/app/adapters/local/task_queue.py +73 -2
api_server/app/api/__init__.py +9 -0
api_server/app/api/deps.py +96 -0
api_server/app/api/v1/__init__.py +9 -0
api_server/app/api/v1/endpoints/__init__.py +17 -0
api_server/app/api/v1/endpoints/experiments.py +393 -0
api_server/app/api/v1/endpoints/files.py +222 -0
api_server/app/api/v1/endpoints/stages.py +247 -0
api_server/app/api/v1/endpoints/tasks.py +228 -0
api_server/app/api/v1/router.py +39 -0
api_server/app/core/adapters.py +180 -0
api_server/app/main.py +155 -0
api_server/app/models/__init__.py +72 -1
api_server/app/models/schemas/__init__.py +80 -0
api_server/app/models/schemas/common.py +95 -0
api_server/app/models/schemas/experiment.py +556 -0
api_server/app/models/schemas/file.py +159 -0
api_server/app/models/schemas/task.py +232 -0
api_server/app/scripts/run_pipeline.py +16 -2
api_server/app/services/__init__.py +20 -0
api_server/app/services/experiment_service.py +513 -0
api_server/app/services/file_service.py +277 -0
api_server/app/services/task_service.py +322 -0

api_server/app/adapters/base.py CHANGED Viewed

@@ -5,7 +5,10 @@
 """
 from abc import ABC, abstractmethod
-from typing import Dict, Optional, AsyncGenerator
 class TaskQueueAdapter(ABC):
@@ -138,3 +141,453 @@ class ProgressAdapter(ABC):
             进度信息字典
         """
         pass

 """
 from abc import ABC, abstractmethod
+from typing import TYPE_CHECKING, Dict, List, Optional, AsyncGenerator, Any
+if TYPE_CHECKING:
+    from ..models.domain import Task
 class TaskQueueAdapter(ABC):
             进度信息字典
         """
         pass
+class StorageAdapter(ABC):
+    """
+    存储适配器抽象基类
+    定义文件存储的通用接口，支持本地文件系统和
+    对象存储（S3/MinIO）两种实现方式。
+    Example:
+        >>> adapter = LocalStorageAdapter(base_path="./data/files")
+        >>> file_id = await adapter.upload_file(data, "audio.wav", {"purpose": "training"})
+        >>> content = await adapter.download_file(file_id)
+        >>> await adapter.delete_file(file_id)
+    """
+    @abstractmethod
+    async def upload_file(
+        self,
+        file_data: bytes,
+        filename: str,
+        metadata: Dict[str, Any]
+    ) -> str:
+        """
+        上传文件
+        Args:
+            file_data: 文件二进制数据
+            filename: 原始文件名
+            metadata: 文件元数据，可包含:
+                - content_type: MIME类型
+                - purpose: 文件用途 (training, reference, output)
+                - 其他自定义字段
+        Returns:
+            file_id: 文件唯一标识
+        Raises:
+            IOError: 存储失败时抛出
+        """
+        pass
+    @abstractmethod
+    async def download_file(self, file_id: str) -> bytes:
+        """
+        下载文件
+        Args:
+            file_id: 文件唯一标识
+        Returns:
+            文件二进制数据
+        Raises:
+            FileNotFoundError: 文件不存在时抛出
+        """
+        pass
+    @abstractmethod
+    async def delete_file(self, file_id: str) -> bool:
+        """
+        删除文件
+        Args:
+            file_id: 文件唯一标识
+        Returns:
+            是否成功删除
+        """
+        pass
+    @abstractmethod
+    async def get_file_metadata(self, file_id: str) -> Optional[Dict[str, Any]]:
+        """
+        获取文件元数据
+        Args:
+            file_id: 文件唯一标识
+        Returns:
+            文件元数据字典，包含:
+            - id: 文件ID
+            - filename: 原始文件名
+            - content_type: MIME类型
+            - size_bytes: 文件大小
+            - purpose: 文件用途
+            - uploaded_at: 上传时间
+            - 音频文件额外包含: duration_seconds, sample_rate
+            文件不存在时返回 None
+        """
+        pass
+    @abstractmethod
+    async def list_files(
+        self,
+        purpose: Optional[str] = None,
+        limit: int = 50,
+        offset: int = 0
+    ) -> List[Dict[str, Any]]:
+        """
+        列出文件
+        Args:
+            purpose: 按用途筛选 (training, reference, output)
+            limit: 返回数量限制
+            offset: 偏移量
+        Returns:
+            文件元数据列表
+        """
+        pass
+    @abstractmethod
+    async def file_exists(self, file_id: str) -> bool:
+        """
+        检查文件是否存在
+        Args:
+            file_id: 文件唯一标识
+        Returns:
+            文件是否存在
+        """
+        pass
+class DatabaseAdapter(ABC):
+    """
+    数据库适配器抽象基类
+    定义数据持久化的通用接口，支持 SQLite 和
+    PostgreSQL 两种实现方式。
+    管理以下实体：
+    - Task: Quick Mode 一键训练任务
+    - Experiment: Advanced Mode 实验
+    - Stage: 实验中的各个阶段
+    - File: 上传的文件记录（可选，与StorageAdapter配合）
+    Example:
+        >>> adapter = SQLiteAdapter(db_path="./data/app.db")
+        >>> task = await adapter.create_task(task_data)
+        >>> task = await adapter.get_task(task_id)
+        >>> await adapter.update_task(task_id, {"status": "completed"})
+    """
+    # ============================================================
+    # Task CRUD (Quick Mode)
+    # ============================================================
+    @abstractmethod
+    async def create_task(self, task: "Task") -> "Task":
+        """
+        创建任务
+        Args:
+            task: Task 领域模型实例
+        Returns:
+            创建后的 Task 实例（包含生成的字段如 created_at）
+        """
+        pass
+    @abstractmethod
+    async def get_task(self, task_id: str) -> Optional["Task"]:
+        """
+        获取任务
+        Args:
+            task_id: 任务唯一标识
+        Returns:
+            Task 实例���不存在则返回 None
+        """
+        pass
+    @abstractmethod
+    async def update_task(self, task_id: str, updates: Dict[str, Any]) -> Optional["Task"]:
+        """
+        更新任务
+        Args:
+            task_id: 任务唯一标识
+            updates: 要更新的字段字典
+        Returns:
+            更新后的 Task 实例，不存在则返回 None
+        """
+        pass
+    @abstractmethod
+    async def list_tasks(
+        self,
+        status: Optional[str] = None,
+        limit: int = 50,
+        offset: int = 0
+    ) -> List["Task"]:
+        """
+        查询任务列表
+        Args:
+            status: 按状态筛选
+            limit: 返回数量限制
+            offset: 偏移量
+        Returns:
+            Task 实例列表
+        """
+        pass
+    @abstractmethod
+    async def delete_task(self, task_id: str) -> bool:
+        """
+        删除任务
+        Args:
+            task_id: 任务唯一标识
+        Returns:
+            是否成功删除
+        """
+        pass
+    @abstractmethod
+    async def count_tasks(self, status: Optional[str] = None) -> int:
+        """
+        统计任务数量
+        Args:
+            status: 按状态筛选
+        Returns:
+            任务数量
+        """
+        pass
+    @abstractmethod
+    async def get_task_by_exp_name(self, exp_name: str) -> Optional["Task"]:
+        """
+        根据实验名称获取任务
+        用于检查 exp_name 是否已存在。
+        Args:
+            exp_name: 实验名称
+        Returns:
+            Task 实例，不存在则返回 None
+        """
+        pass
+    # ============================================================
+    # Experiment CRUD (Advanced Mode)
+    # ============================================================
+    @abstractmethod
+    async def create_experiment(self, experiment: Dict[str, Any]) -> Dict[str, Any]:
+        """
+        创建实验
+        Args:
+            experiment: 实验数据字典
+        Returns:
+            创建后的实验数据
+        """
+        pass
+    @abstractmethod
+    async def get_experiment(self, exp_id: str) -> Optional[Dict[str, Any]]:
+        """
+        获取实验
+        Args:
+            exp_id: 实验唯一标识
+        Returns:
+            实验数据字典，不存在则返回 None
+        """
+        pass
+    @abstractmethod
+    async def update_experiment(
+        self,
+        exp_id: str,
+        updates: Dict[str, Any]
+    ) -> Optional[Dict[str, Any]]:
+        """
+        更新实验
+        Args:
+            exp_id: 实验唯一标识
+            updates: 要更新的字段字典
+        Returns:
+            更新后的实验数据，不存在则返回 None
+        """
+        pass
+    @abstractmethod
+    async def list_experiments(
+        self,
+        status: Optional[str] = None,
+        limit: int = 50,
+        offset: int = 0
+    ) -> List[Dict[str, Any]]:
+        """
+        查询实验列表
+        Args:
+            status: 按状态筛选
+            limit: 返回数量限制
+            offset: 偏移量
+        Returns:
+            实验数据列表
+        """
+        pass
+    @abstractmethod
+    async def delete_experiment(self, exp_id: str) -> bool:
+        """
+        删除实验
+        Args:
+            exp_id: 实验唯一标识
+        Returns:
+            是否成功删除
+        """
+        pass
+    # ============================================================
+    # Stage 操作 (Advanced Mode)
+    # ============================================================
+    @abstractmethod
+    async def update_stage(
+        self,
+        exp_id: str,
+        stage_type: str,
+        updates: Dict[str, Any]
+    ) -> Optional[Dict[str, Any]]:
+        """
+        更新阶段状态
+        Args:
+            exp_id: 实验唯一标识
+            stage_type: 阶段类型
+            updates: 要更新的字段字典
+        Returns:
+            更新后的阶段数据，不存在则返回 None
+        """
+        pass
+    @abstractmethod
+    async def get_stage(
+        self,
+        exp_id: str,
+        stage_type: str
+    ) -> Optional[Dict[str, Any]]:
+        """
+        获取阶段状态
+        Args:
+            exp_id: 实验唯一标识
+            stage_type: 阶段类型
+        Returns:
+            阶段数据字典，不存在则返回 None
+        """
+        pass
+    @abstractmethod
+    async def get_all_stages(self, exp_id: str) -> List[Dict[str, Any]]:
+        """
+        获取实验的所有阶段状态
+        Args:
+            exp_id: 实验唯一标识
+        Returns:
+            阶段数据列表
+        """
+        pass
+    # ============================================================
+    # File 记录 (可选，与 StorageAdapter 配合)
+    # ============================================================
+    @abstractmethod
+    async def create_file_record(self, file_data: Dict[str, Any]) -> Dict[str, Any]:
+        """
+        创建文件记录
+        Args:
+            file_data: 文件元数据
+        Returns:
+            创建后的文件记录
+        """
+        pass
+    @abstractmethod
+    async def get_file_record(self, file_id: str) -> Optional[Dict[str, Any]]:
+        """
+        获取文件记录
+        Args:
+            file_id: 文件唯一标识
+        Returns:
+            文件记录，不存在则返回 None
+        """
+        pass
+    @abstractmethod
+    async def delete_file_record(self, file_id: str) -> bool:
+        """
+        删除文件记录
+        Args:
+            file_id: 文件唯一标识
+        Returns:
+            是否成功删除
+        """
+        pass
+    @abstractmethod
+    async def list_file_records(
+        self,
+        purpose: Optional[str] = None,
+        limit: int = 50,
+        offset: int = 0
+    ) -> List[Dict[str, Any]]:
+        """
+        查询文件记录列表
+        Args:
+            purpose: 按用途筛选
+            limit: 返回数量限制
+            offset: 偏移量
+        Returns:
+            文件记录列表
+        """
+        pass

api_server/app/adapters/local/__init__.py CHANGED Viewed

@@ -1,9 +1,23 @@
 """
 本地适配器模块
-提供基于 SQLite 和 asyncio.subprocess 的本地实现
 """
 from .task_queue import AsyncTrainingManager
-__all__ = ["AsyncTrainingManager"]

 """
 本地适配器模块
+提供基于 SQLite 和 asyncio.subprocess 的本地实现。
+适配器列表：
+- AsyncTrainingManager: 任务队列适配器（基于 asyncio.subprocess）
+- LocalStorageAdapter: 文件存储适配器（基于本地文件系统）
+- SQLiteAdapter: 数据库适配器（基于 SQLite）
+- LocalProgressAdapter: 进度管理适配器（基于内存队列）
 """
 from .task_queue import AsyncTrainingManager
+from .storage import LocalStorageAdapter
+from .database import SQLiteAdapter
+from .progress import LocalProgressAdapter
+__all__ = [
+    "AsyncTrainingManager",
+    "LocalStorageAdapter",
+    "SQLiteAdapter",
+    "LocalProgressAdapter",
+]

api_server/app/adapters/local/database.py ADDED Viewed

	@@ -0,0 +1,683 @@

+"""
+SQLite 数据库适配器
+基于 SQLite + aiosqlite 实现的数据库适配器，适用于 macOS 本地训练场景。
+"""
+import json
+import sqlite3
+import uuid
+from datetime import datetime
+from pathlib import Path
+from typing import Any, Dict, List, Optional
+import aiosqlite
+from ..base import DatabaseAdapter
+from ...core.config import settings
+from ...models.domain import Task, TaskStatus
+# 阶段类型列表
+STAGE_TYPES = [
+    "audio_slice",
+    "asr",
+    "text_feature",
+    "hubert_feature",
+    "semantic_token",
+    "sovits_train",
+    "gpt_train",
+]
+class SQLiteAdapter(DatabaseAdapter):
+    """
+    SQLite 数据库适配器
+    特点：
+    1. 使用 aiosqlite 实现异步数据库操作
+    2. 支持 Task (Quick Mode) 和 Experiment (Advanced Mode) 管理
+    3. 自动初始化数据库表结构
+    表结构：
+    - tasks: Quick Mode 任务
+    - experiments: Advanced Mode 实验
+    - stages: 实验阶段状态
+    - files: 文件记录
+    Example:
+        >>> adapter = SQLiteAdapter()
+        >>> task = Task(id="task-123", exp_name="my_voice", config={})
+        >>> await adapter.create_task(task)
+        >>> task = await adapter.get_task("task-123")
+    """
+    def __init__(self, db_path: Optional[str] = None):
+        """
+        初始化 SQLite 适配器
+        Args:
+            db_path: 数据库文件路径，默认使用 settings.SQLITE_PATH
+        """
+        if db_path:
+            self.db_path = db_path
+        else:
+            self.db_path = str(settings.SQLITE_PATH)
+        # 确保目录存在
+        Path(self.db_path).parent.mkdir(parents=True, exist_ok=True)
+        # 同步初始化数据库
+        self._init_db_sync()
+    def _init_db_sync(self) -> None:
+        """同步初始化数据库表结构"""
+        with sqlite3.connect(self.db_path) as conn:
+            # Tasks 表 (Quick Mode)
+            conn.execute('''
+                CREATE TABLE IF NOT EXISTS tasks (
+                    id TEXT PRIMARY KEY,
+                    job_id TEXT,
+                    exp_name TEXT NOT NULL,
+                    status TEXT NOT NULL DEFAULT 'queued',
+                    config TEXT,
+                    current_stage TEXT,
+                    progress REAL DEFAULT 0,
+                    stage_progress REAL DEFAULT 0,
+                    message TEXT,
+                    error_message TEXT,
+                    created_at TEXT NOT NULL,
+                    started_at TEXT,
+                    completed_at TEXT
+                )
+            ''')
+            conn.execute('CREATE INDEX IF NOT EXISTS idx_tasks_status ON tasks(status)')
+            conn.execute('CREATE INDEX IF NOT EXISTS idx_tasks_created ON tasks(created_at)')
+            # Experiments 表 (Advanced Mode)
+            conn.execute('''
+                CREATE TABLE IF NOT EXISTS experiments (
+                    id TEXT PRIMARY KEY,
+                    exp_name TEXT NOT NULL,
+                    version TEXT NOT NULL DEFAULT 'v2',
+                    exp_root TEXT DEFAULT 'logs',
+                    gpu_numbers TEXT DEFAULT '0',
+                    is_half INTEGER DEFAULT 1,
+                    audio_file_id TEXT,
+                    status TEXT NOT NULL DEFAULT 'created',
+                    created_at TEXT NOT NULL,
+                    updated_at TEXT
+                )
+            ''')
+            conn.execute('CREATE INDEX IF NOT EXISTS idx_experiments_status ON experiments(status)')
+            conn.execute('CREATE INDEX IF NOT EXISTS idx_experiments_created ON experiments(created_at)')
+            # Stages 表 (Advanced Mode 阶段状态)
+            conn.execute('''
+                CREATE TABLE IF NOT EXISTS stages (
+                    id TEXT PRIMARY KEY,
+                    experiment_id TEXT NOT NULL,
+                    stage_type TEXT NOT NULL,
+                    status TEXT DEFAULT 'pending',
+                    progress REAL DEFAULT 0,
+                    message TEXT,
+                    job_id TEXT,
+                    config TEXT,
+                    outputs TEXT,
+                    started_at TEXT,
+                    completed_at TEXT,
+                    error_message TEXT,
+                    FOREIGN KEY (experiment_id) REFERENCES experiments(id) ON DELETE CASCADE,
+                    UNIQUE (experiment_id, stage_type)
+                )
+            ''')
+            conn.execute('CREATE INDEX IF NOT EXISTS idx_stages_experiment ON stages(experiment_id)')
+            conn.execute('CREATE INDEX IF NOT EXISTS idx_stages_status ON stages(status)')
+            # Files 表 (文件记录)
+            conn.execute('''
+                CREATE TABLE IF NOT EXISTS files (
+                    id TEXT PRIMARY KEY,
+                    filename TEXT NOT NULL,
+                    content_type TEXT,
+                    size_bytes INTEGER DEFAULT 0,
+                    purpose TEXT DEFAULT 'training',
+                    duration_seconds REAL,
+                    sample_rate INTEGER,
+                    storage_path TEXT,
+                    uploaded_at TEXT NOT NULL
+                )
+            ''')
+            conn.execute('CREATE INDEX IF NOT EXISTS idx_files_purpose ON files(purpose)')
+            conn.execute('CREATE INDEX IF NOT EXISTS idx_files_uploaded ON files(uploaded_at)')
+            conn.commit()
+    # ============================================================
+    # Task CRUD (Quick Mode)
+    # ============================================================
+    async def create_task(self, task: Task) -> Task:
+        """创建任务"""
+        async with aiosqlite.connect(self.db_path) as db:
+            await db.execute(
+                '''INSERT INTO tasks
+                   (id, job_id, exp_name, status, config, current_stage,
+                    progress, stage_progress, message, error_message,
+                    created_at, started_at, completed_at)
+                   VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)''',
+                (
+                    task.id,
+                    task.job_id,
+                    task.exp_name,
+                    task.status.value if isinstance(task.status, TaskStatus) else task.status,
+                    json.dumps(task.config, ensure_ascii=False) if task.config else None,
+                    task.current_stage,
+                    task.progress,
+                    task.stage_progress,
+                    task.message,
+                    task.error_message,
+                    task.created_at.isoformat() if task.created_at else datetime.utcnow().isoformat(),
+                    task.started_at.isoformat() if task.started_at else None,
+                    task.completed_at.isoformat() if task.completed_at else None,
+                )
+            )
+            await db.commit()
+        return task
+    async def get_task(self, task_id: str) -> Optional[Task]:
+        """获取任务"""
+        async with aiosqlite.connect(self.db_path) as db:
+            db.row_factory = aiosqlite.Row
+            async with db.execute(
+                "SELECT * FROM tasks WHERE id = ?", (task_id,)
+            ) as cursor:
+                row = await cursor.fetchone()
+                if row:
+                    return self._row_to_task(dict(row))
+        return None
+    async def update_task(self, task_id: str, updates: Dict[str, Any]) -> Optional[Task]:
+        """更新任务"""
+        if not updates:
+            return await self.get_task(task_id)
+        # 处理特殊字段
+        processed = {}
+        for key, value in updates.items():
+            if key == "status" and isinstance(value, TaskStatus):
+                processed[key] = value.value
+            elif key == "config" and isinstance(value, dict):
+                processed[key] = json.dumps(value, ensure_ascii=False)
+            elif key in ("created_at", "started_at", "completed_at") and isinstance(value, datetime):
+                processed[key] = value.isoformat()
+            else:
+                processed[key] = value
+        async with aiosqlite.connect(self.db_path) as db:
+            set_clause = ", ".join(f"{k} = ?" for k in processed.keys())
+            values = list(processed.values()) + [task_id]
+            await db.execute(
+                f"UPDATE tasks SET {set_clause} WHERE id = ?",
+                values
+            )
+            await db.commit()
+        return await self.get_task(task_id)
+    async def list_tasks(
+        self,
+        status: Optional[str] = None,
+        limit: int = 50,
+        offset: int = 0
+    ) -> List[Task]:
+        """查询任务列表"""
+        async with aiosqlite.connect(self.db_path) as db:
+            db.row_factory = aiosqlite.Row
+            if status:
+                query = """
+                    SELECT * FROM tasks
+                    WHERE status = ?
+                    ORDER BY created_at DESC
+                    LIMIT ? OFFSET ?
+                """
+                params = (status, limit, offset)
+            else:
+                query = """
+                    SELECT * FROM tasks
+                    ORDER BY created_at DESC
+                    LIMIT ? OFFSET ?
+                """
+                params = (limit, offset)
+            async with db.execute(query, params) as cursor:
+                rows = await cursor.fetchall()
+                return [self._row_to_task(dict(row)) for row in rows]
+    async def delete_task(self, task_id: str) -> bool:
+        """删除任务"""
+        async with aiosqlite.connect(self.db_path) as db:
+            cursor = await db.execute(
+                "DELETE FROM tasks WHERE id = ?", (task_id,)
+            )
+            await db.commit()
+            return cursor.rowcount > 0
+    async def count_tasks(self, status: Optional[str] = None) -> int:
+        """统计任务数量"""
+        async with aiosqlite.connect(self.db_path) as db:
+            if status:
+                async with db.execute(
+                    "SELECT COUNT(*) FROM tasks WHERE status = ?", (status,)
+                ) as cursor:
+                    row = await cursor.fetchone()
+            else:
+                async with db.execute("SELECT COUNT(*) FROM tasks") as cursor:
+                    row = await cursor.fetchone()
+            return row[0] if row else 0
+    async def get_task_by_exp_name(self, exp_name: str) -> Optional[Task]:
+        """根据实验名称获取任务"""
+        async with aiosqlite.connect(self.db_path) as db:
+            db.row_factory = aiosqlite.Row
+            async with db.execute(
+                "SELECT * FROM tasks WHERE exp_name = ? LIMIT 1", (exp_name,)
+            ) as cursor:
+                row = await cursor.fetchone()
+                if row:
+                    return self._row_to_task(dict(row))
+        return None
+    def _row_to_task(self, row: Dict[str, Any]) -> Task:
+        """将数据库行转换为 Task 对象"""
+        # 解析 config JSON
+        config = row.get("config")
+        if config and isinstance(config, str):
+            try:
+                config = json.loads(config)
+            except json.JSONDecodeError:
+                config = {}
+        return Task.from_dict({
+            "id": row["id"],
+            "job_id": row.get("job_id"),
+            "exp_name": row["exp_name"],
+            "status": row.get("status", "queued"),
+            "config": config or {},
+            "current_stage": row.get("current_stage"),
+            "progress": row.get("progress", 0.0),
+            "stage_progress": row.get("stage_progress", 0.0),
+            "message": row.get("message"),
+            "error_message": row.get("error_message"),
+            "created_at": row.get("created_at"),
+            "started_at": row.get("started_at"),
+            "completed_at": row.get("completed_at"),
+        })
+    # ============================================================
+    # Experiment CRUD (Advanced Mode)
+    # ============================================================
+    async def create_experiment(self, experiment: Dict[str, Any]) -> Dict[str, Any]:
+        """创建实验"""
+        exp_id = experiment.get("id") or f"exp-{uuid.uuid4().hex[:8]}"
+        now = datetime.utcnow().isoformat()
+        async with aiosqlite.connect(self.db_path) as db:
+            # 创建实验记录
+            await db.execute(
+                '''INSERT INTO experiments
+                   (id, exp_name, version, exp_root, gpu_numbers, is_half,
+                    audio_file_id, status, created_at, updated_at)
+                   VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)''',
+                (
+                    exp_id,
+                    experiment["exp_name"],
+                    experiment.get("version", "v2"),
+                    experiment.get("exp_root", "logs"),
+                    experiment.get("gpu_numbers", "0"),
+                    1 if experiment.get("is_half", True) else 0,
+                    experiment.get("audio_file_id"),
+                    experiment.get("status", "created"),
+                    now,
+                    now,
+                )
+            )
+            # 创建所有阶段的初始状态
+            for stage_type in STAGE_TYPES:
+                stage_id = f"{exp_id}-{stage_type}"
+                await db.execute(
+                    '''INSERT INTO stages
+                       (id, experiment_id, stage_type, status)
+                       VALUES (?, ?, ?, 'pending')''',
+                    (stage_id, exp_id, stage_type)
+                )
+            await db.commit()
+        return await self.get_experiment(exp_id)
+    async def get_experiment(self, exp_id: str) -> Optional[Dict[str, Any]]:
+        """获取实验"""
+        async with aiosqlite.connect(self.db_path) as db:
+            db.row_factory = aiosqlite.Row
+            # 获取实验基本信息
+            async with db.execute(
+                "SELECT * FROM experiments WHERE id = ?", (exp_id,)
+            ) as cursor:
+                row = await cursor.fetchone()
+                if not row:
+                    return None
+                experiment = dict(row)
+                experiment["is_half"] = bool(experiment.get("is_half", 1))
+            # 获取所有阶段状态
+            stages = {}
+            async with db.execute(
+                "SELECT * FROM stages WHERE experiment_id = ?", (exp_id,)
+            ) as cursor:
+                stage_rows = await cursor.fetchall()
+                for stage_row in stage_rows:
+                    stage = dict(stage_row)
+                    stage_type = stage["stage_type"]
+                    # 解析 JSON 字段
+                    for json_field in ("config", "outputs"):
+                        if stage.get(json_field) and isinstance(stage[json_field], str):
+                            try:
+                                stage[json_field] = json.loads(stage[json_field])
+                            except json.JSONDecodeError:
+                                stage[json_field] = None
+                    stages[stage_type] = stage
+            experiment["stages"] = stages
+            return experiment
+    async def update_experiment(
+        self,
+        exp_id: str,
+        updates: Dict[str, Any]
+    ) -> Optional[Dict[str, Any]]:
+        """更新实验"""
+        if not updates:
+            return await self.get_experiment(exp_id)
+        # 处理 is_half 布尔值
+        processed = {}
+        for key, value in updates.items():
+            if key == "is_half":
+                processed[key] = 1 if value else 0
+            elif key == "updated_at" and isinstance(value, datetime):
+                processed[key] = value.isoformat()
+            elif key != "stages":  # stages 单独处理
+                processed[key] = value
+        # 添加更新时间
+        if "updated_at" not in processed:
+            processed["updated_at"] = datetime.utcnow().isoformat()
+        async with aiosqlite.connect(self.db_path) as db:
+            if processed:
+                set_clause = ", ".join(f"{k} = ?" for k in processed.keys())
+                values = list(processed.values()) + [exp_id]
+                await db.execute(
+                    f"UPDATE experiments SET {set_clause} WHERE id = ?",
+                    values
+                )
+                await db.commit()
+        return await self.get_experiment(exp_id)
+    async def list_experiments(
+        self,
+        status: Optional[str] = None,
+        limit: int = 50,
+        offset: int = 0
+    ) -> List[Dict[str, Any]]:
+        """查询实验列表"""
+        async with aiosqlite.connect(self.db_path) as db:
+            db.row_factory = aiosqlite.Row
+            if status:
+                query = """
+                    SELECT * FROM experiments
+                    WHERE status = ?
+                    ORDER BY created_at DESC
+                    LIMIT ? OFFSET ?
+                """
+                params = (status, limit, offset)
+            else:
+                query = """
+                    SELECT * FROM experiments
+                    ORDER BY created_at DESC
+                    LIMIT ? OFFSET ?
+                """
+                params = (limit, offset)
+            async with db.execute(query, params) as cursor:
+                rows = await cursor.fetchall()
+                results = []
+                for row in rows:
+                    exp = dict(row)
+                    exp["is_half"] = bool(exp.get("is_half", 1))
+                    # 简化列表，不包含完整的 stages
+                    results.append(exp)
+                return results
+    async def delete_experiment(self, exp_id: str) -> bool:
+        """删除实验及其阶段"""
+        async with aiosqlite.connect(self.db_path) as db:
+            # 先删除阶段
+            await db.execute(
+                "DELETE FROM stages WHERE experiment_id = ?", (exp_id,)
+            )
+            # 再删除实验
+            cursor = await db.execute(
+                "DELETE FROM experiments WHERE id = ?", (exp_id,)
+            )
+            await db.commit()
+            return cursor.rowcount > 0
+    # ============================================================
+    # Stage 操作 (Advanced Mode)
+    # ============================================================
+    async def update_stage(
+        self,
+        exp_id: str,
+        stage_type: str,
+        updates: Dict[str, Any]
+    ) -> Optional[Dict[str, Any]]:
+        """更新阶段状态"""
+        if not updates:
+            return await self.get_stage(exp_id, stage_type)
+        # 处理 JSON 字段
+        processed = {}
+        for key, value in updates.items():
+            if key in ("config", "outputs") and isinstance(value, dict):
+                processed[key] = json.dumps(value, ensure_ascii=False)
+            elif key in ("started_at", "completed_at") and isinstance(value, datetime):
+                processed[key] = value.isoformat()
+            else:
+                processed[key] = value
+        async with aiosqlite.connect(self.db_path) as db:
+            set_clause = ", ".join(f"{k} = ?" for k in processed.keys())
+            values = list(processed.values()) + [exp_id, stage_type]
+            await db.execute(
+                f"UPDATE stages SET {set_clause} WHERE experiment_id = ? AND stage_type = ?",
+                values
+            )
+            await db.commit()
+        # 同时更新实验的 updated_at
+        await self.update_experiment(exp_id, {})
+        return await self.get_stage(exp_id, stage_type)
+    async def get_stage(
+        self,
+        exp_id: str,
+        stage_type: str
+    ) -> Optional[Dict[str, Any]]:
+        """获取阶段状态"""
+        async with aiosqlite.connect(self.db_path) as db:
+            db.row_factory = aiosqlite.Row
+            async with db.execute(
+                "SELECT * FROM stages WHERE experiment_id = ? AND stage_type = ?",
+                (exp_id, stage_type)
+            ) as cursor:
+                row = await cursor.fetchone()
+                if not row:
+                    return None
+                stage = dict(row)
+                # 解析 JSON 字段
+                for json_field in ("config", "outputs"):
+                    if stage.get(json_field) and isinstance(stage[json_field], str):
+                        try:
+                            stage[json_field] = json.loads(stage[json_field])
+                        except json.JSONDecodeError:
+                            stage[json_field] = None
+                return stage
+    async def get_all_stages(self, exp_id: str) -> List[Dict[str, Any]]:
+        """获取实验的所有阶段状态"""
+        async with aiosqlite.connect(self.db_path) as db:
+            db.row_factory = aiosqlite.Row
+            async with db.execute(
+                "SELECT * FROM stages WHERE experiment_id = ? ORDER BY id",
+                (exp_id,)
+            ) as cursor:
+                rows = await cursor.fetchall()
+                results = []
+                for row in rows:
+                    stage = dict(row)
+                    # 解析 JSON 字段
+                    for json_field in ("config", "outputs"):
+                        if stage.get(json_field) and isinstance(stage[json_field], str):
+                            try:
+                                stage[json_field] = json.loads(stage[json_field])
+                            except json.JSONDecodeError:
+                                stage[json_field] = None
+                    results.append(stage)
+                return results
+    # ============================================================
+    # File 记录
+    # ============================================================
+    async def create_file_record(self, file_data: Dict[str, Any]) -> Dict[str, Any]:
+        """创建文件记录"""
+        file_id = file_data.get("id") or str(uuid.uuid4())
+        now = datetime.utcnow().isoformat()
+        async with aiosqlite.connect(self.db_path) as db:
+            await db.execute(
+                '''INSERT INTO files
+                   (id, filename, content_type, size_bytes, purpose,
+                    duration_seconds, sample_rate, storage_path, uploaded_at)
+                   VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)''',
+                (
+                    file_id,
+                    file_data["filename"],
+                    file_data.get("content_type"),
+                    file_data.get("size_bytes", 0),
+                    file_data.get("purpose", "training"),
+                    file_data.get("duration_seconds"),
+                    file_data.get("sample_rate"),
+                    file_data.get("storage_path"),
+                    file_data.get("uploaded_at", now),
+                )
+            )
+            await db.commit()
+        return await self.get_file_record(file_id)
+    async def get_file_record(self, file_id: str) -> Optional[Dict[str, Any]]:
+        """获取文件记录"""
+        async with aiosqlite.connect(self.db_path) as db:
+            db.row_factory = aiosqlite.Row
+            async with db.execute(
+                "SELECT * FROM files WHERE id = ?", (file_id,)
+            ) as cursor:
+                row = await cursor.fetchone()
+                if row:
+                    return dict(row)
+        return None
+    async def delete_file_record(self, file_id: str) -> bool:
+        """删除文件记录"""
+        async with aiosqlite.connect(self.db_path) as db:
+            cursor = await db.execute(
+                "DELETE FROM files WHERE id = ?", (file_id,)
+            )
+            await db.commit()
+            return cursor.rowcount > 0
+    async def list_file_records(
+        self,
+        purpose: Optional[str] = None,
+        limit: int = 50,
+        offset: int = 0
+    ) -> List[Dict[str, Any]]:
+        """查询文件记录列表"""
+        async with aiosqlite.connect(self.db_path) as db:
+            db.row_factory = aiosqlite.Row
+            if purpose:
+                query = """
+                    SELECT * FROM files
+                    WHERE purpose = ?
+                    ORDER BY uploaded_at DESC
+                    LIMIT ? OFFSET ?
+                """
+                params = (purpose, limit, offset)
+            else:
+                query = """
+                    SELECT * FROM files
+                    ORDER BY uploaded_at DESC
+                    LIMIT ? OFFSET ?
+                """
+                params = (limit, offset)
+            async with db.execute(query, params) as cursor:
+                rows = await cursor.fetchall()
+                return [dict(row) for row in rows]
+    async def count_file_records(self, purpose: Optional[str] = None) -> int:
+        """统计文件记录数量"""
+        async with aiosqlite.connect(self.db_path) as db:
+            if purpose:
+                async with db.execute(
+                    "SELECT COUNT(*) FROM files WHERE purpose = ?", (purpose,)
+                ) as cursor:
+                    row = await cursor.fetchone()
+            else:
+                async with db.execute("SELECT COUNT(*) FROM files") as cursor:
+                    row = await cursor.fetchone()
+            return row[0] if row else 0

api_server/app/adapters/local/progress.py ADDED Viewed

	@@ -0,0 +1,238 @@

+"""
+本地进度管理适配器
+基于内存队列实现的进度管理适配器，适用于本地单实例场景。
+"""
+import asyncio
+from collections import defaultdict
+from datetime import datetime
+from typing import Any, AsyncGenerator, Dict, List, Optional
+from ..base import ProgressAdapter
+class LocalProgressAdapter(ProgressAdapter):
+    """
+    本地内存进度管理适配器
+    特点：
+    1. 使用内存字典存储最新进度
+    2. 使用 asyncio.Queue 实现订阅者模式
+    3. 支持多订阅者同时订阅同一任务
+    4. 与 AsyncTrainingManager 的进度推送机制兼容
+    注意：
+    - 进程重启后进度数据会丢失
+    - 仅适用于单实例部署
+    - 服务器模式应使用 RedisProgressAdapter
+    Example:
+        >>> adapter = LocalProgressAdapter()
+        >>> await adapter.update_progress("task-123", {
+        ...     "stage": "sovits_train",
+        ...     "progress": 0.5,
+        ...     "message": "Epoch 8/16"
+        ... })
+        >>>
+        >>> # 订阅进度
+        >>> async for progress in adapter.subscribe("task-123"):
+        ...     print(f"{progress['stage']}: {progress['progress']*100:.1f}%")
+    """
+    def __init__(self):
+        """初始化本地进度适配器"""
+        # 存储每个任务的最新进度
+        self.progress_store: Dict[str, Dict[str, Any]] = {}
+        # 存储每个任务的订阅者队列列表
+        self.subscribers: Dict[str, List[asyncio.Queue]] = defaultdict(list)
+        # 锁，用于保护订阅者列表的并发访问
+        self._lock = asyncio.Lock()
+    async def update_progress(self, task_id: str, progress: Dict[str, Any]) -> None:
+        """
+        更新进度
+        Args:
+            task_id: 任务ID
+            progress: 进度信息字典，可包含:
+                - type: 消息类型 ("progress", "log", "error", "heartbeat")
+                - stage: 当前阶段
+                - progress: 阶段进度 (0.0-1.0)
+                - overall_progress: 总体进度 (0.0-1.0)
+                - message: 进度消息
+                - status: 状态 ("running", "completed", "failed", "cancelled")
+        """
+        # 添加时间戳
+        if "timestamp" not in progress:
+            progress["timestamp"] = datetime.utcnow().isoformat()
+        # 存储最新进度
+        self.progress_store[task_id] = progress
+        # 通知所有订阅者
+        async with self._lock:
+            if task_id in self.subscribers:
+                for queue in self.subscribers[task_id]:
+                    try:
+                        await queue.put(progress)
+                    except asyncio.QueueFull:
+                        # 队列满了，跳过（避免阻塞）
+                        pass
+    async def get_progress(self, task_id: str) -> Optional[Dict[str, Any]]:
+        """
+        获取当前进度
+        Args:
+            task_id: 任务ID
+        Returns:
+            最新进度信息，不存在则返回 None
+        """
+        return self.progress_store.get(task_id)
+    async def subscribe(self, task_id: str) -> AsyncGenerator[Dict[str, Any], None]:
+        """
+        订阅进度更新
+        创建一个异步生成器，持续接收指定任务的进度更新。
+        当任务进入终态（completed, failed, cancelled）时自动结束。
+        Args:
+            task_id: 任务ID
+        Yields:
+            进度信息字典
+        Example:
+            >>> async for progress in adapter.subscribe("task-123"):
+            ...     print(progress)
+            ...     if progress.get("status") == "completed":
+            ...         break
+        """
+        # 创建订阅者队列
+        queue: asyncio.Queue = asyncio.Queue(maxsize=100)
+        async with self._lock:
+            self.subscribers[task_id].append(queue)
+        try:
+            # 首先发送当前进度（如果有）
+            current = self.progress_store.get(task_id)
+            if current:
+                yield current
+                # 如果已经是终态，直接返回
+                if current.get("status") in ("completed", "failed", "cancelled"):
+                    return
+            # 持续接收更新
+            while True:
+                try:
+                    # 30秒超时，发送心跳
+                    progress = await asyncio.wait_for(queue.get(), timeout=30.0)
+                    yield progress
+                    # 检查是否为终态
+                    if progress.get("status") in ("completed", "failed", "cancelled"):
+                        break
+                except asyncio.TimeoutError:
+                    # 发送心跳保持连接
+                    yield {
+                        "type": "heartbeat",
+                        "timestamp": datetime.utcnow().isoformat(),
+                    }
+        finally:
+            # 清理订阅者
+            async with self._lock:
+                if task_id in self.subscribers:
+                    try:
+                        self.subscribers[task_id].remove(queue)
+                    except ValueError:
+                        pass
+                    # 如果没有订阅者了，清理列表
+                    if not self.subscribers[task_id]:
+                        del self.subscribers[task_id]
+    async def clear_progress(self, task_id: str) -> None:
+        """
+        清除任务进度数据
+        Args:
+            task_id: 任务ID
+        """
+        self.progress_store.pop(task_id, None)
+        async with self._lock:
+            self.subscribers.pop(task_id, None)
+    async def get_subscriber_count(self, task_id: str) -> int:
+        """
+        获取任务的订阅者数量
+        Args:
+            task_id: 任务ID
+        Returns:
+            订阅者数量
+        """
+        async with self._lock:
+            return len(self.subscribers.get(task_id, []))
+    async def broadcast_to_all(self, message: Dict[str, Any]) -> int:
+        """
+        向所有任务的订阅者广播消息
+        用于系统级通知，如服务器关闭警告等。
+        Args:
+            message: 消息内容
+        Returns:
+            发送成功的订阅者数量
+        """
+        if "timestamp" not in message:
+            message["timestamp"] = datetime.utcnow().isoformat()
+        count = 0
+        async with self._lock:
+            for task_id, queues in self.subscribers.items():
+                for queue in queues:
+                    try:
+                        await queue.put(message)
+                        count += 1
+                    except asyncio.QueueFull:
+                        pass
+        return count
+    def get_active_tasks(self) -> List[str]:
+        """
+        获取有活跃订阅者的任务列表
+        Returns:
+            任务ID列表
+        """
+        return list(self.subscribers.keys())
+    def get_stats(self) -> Dict[str, Any]:
+        """
+        获取适配器统计信息
+        Returns:
+            统计信息字典
+        """
+        total_subscribers = sum(
+            len(queues) for queues in self.subscribers.values()
+        )
+        return {
+            "stored_progress_count": len(self.progress_store),
+            "active_tasks": len(self.subscribers),
+            "total_subscribers": total_subscribers,
+        }

api_server/app/adapters/local/storage.py ADDED Viewed

	@@ -0,0 +1,342 @@

+"""
+本地文件存储适配器
+基于本地文件系统实现的存储适配器，适用于 macOS 本地训练场景。
+"""
+import json
+import mimetypes
+import uuid
+from datetime import datetime
+from pathlib import Path
+from typing import Any, Dict, List, Optional
+import aiofiles
+from ..base import StorageAdapter
+from ...core.config import settings
+class LocalStorageAdapter(StorageAdapter):
+    """
+    本地文件系统存储适配器
+    特点：
+    1. 使用 aiofiles 进行异步文件读写
+    2. 元数据存储在 .meta.json 文件中
+    3. 支持音频文件信息提取（时长、采样率等）
+    目录结构：
+    ```
+    base_path/
+    ├── {file_id}              # 实际文件
+    └── {file_id}.meta.json    # 元数据文件
+    ```
+    Example:
+        >>> adapter = LocalStorageAdapter()
+        >>> file_id = await adapter.upload_file(
+        ...     file_data=b"...",
+        ...     filename="audio.wav",
+        ...     metadata={"purpose": "training"}
+        ... )
+        >>> content = await adapter.download_file(file_id)
+        >>> metadata = await adapter.get_file_metadata(file_id)
+    """
+    def __init__(self, base_path: Optional[str] = None):
+        """
+        初始化本地存储适配器
+        Args:
+            base_path: 文件存储根目录，默认使用 settings.DATA_DIR / "files"
+        """
+        if base_path:
+            self.base_path = Path(base_path)
+        else:
+            self.base_path = settings.DATA_DIR / "files"
+        # 确保目录存在
+        self.base_path.mkdir(parents=True, exist_ok=True)
+    def _get_file_path(self, file_id: str) -> Path:
+        """获取文件存储路径"""
+        return self.base_path / file_id
+    def _get_meta_path(self, file_id: str) -> Path:
+        """获取元数据文件路径"""
+        return self.base_path / f"{file_id}.meta.json"
+    async def upload_file(
+        self,
+        file_data: bytes,
+        filename: str,
+        metadata: Dict[str, Any]
+    ) -> str:
+        """
+        上传文件到本地文件系统
+        Args:
+            file_data: 文件二进制数据
+            filename: 原始文件名
+            metadata: 文件元数据
+        Returns:
+            file_id: 生成的文件唯一标识
+        """
+        # 生成文件ID
+        file_id = str(uuid.uuid4())
+        # 确定文件扩展名
+        suffix = Path(filename).suffix
+        if suffix:
+            file_id = f"{file_id}{suffix}"
+        file_path = self._get_file_path(file_id)
+        meta_path = self._get_meta_path(file_id)
+        # 写入文件
+        async with aiofiles.open(file_path, 'wb') as f:
+            await f.write(file_data)
+        # 猜测 MIME 类型
+        content_type = metadata.get("content_type")
+        if not content_type:
+            content_type, _ = mimetypes.guess_type(filename)
+            content_type = content_type or "application/octet-stream"
+        # 构建元数据
+        file_metadata = {
+            "id": file_id,
+            "filename": filename,
+            "content_type": content_type,
+            "size_bytes": len(file_data),
+            "purpose": metadata.get("purpose", "training"),
+            "uploaded_at": datetime.utcnow().isoformat(),
+            **{k: v for k, v in metadata.items() if k not in ("content_type", "purpose")}
+        }
+        # 尝试提取音频信息
+        if content_type and content_type.startswith("audio/"):
+            audio_info = await self._extract_audio_info(file_path)
+            if audio_info:
+                file_metadata.update(audio_info)
+        # 写入元数据
+        async with aiofiles.open(meta_path, 'w', encoding='utf-8') as f:
+            await f.write(json.dumps(file_metadata, ensure_ascii=False, indent=2))
+        return file_id
+    async def download_file(self, file_id: str) -> bytes:
+        """
+        下载文件
+        Args:
+            file_id: 文件唯一标识
+        Returns:
+            文件二进制数据
+        Raises:
+            FileNotFoundError: 文件不存在时抛出
+        """
+        file_path = self._get_file_path(file_id)
+        if not file_path.exists():
+            raise FileNotFoundError(f"File not found: {file_id}")
+        async with aiofiles.open(file_path, 'rb') as f:
+            return await f.read()
+    async def delete_file(self, file_id: str) -> bool:
+        """
+        删除文件及其元数据
+        Args:
+            file_id: 文件唯一标识
+        Returns:
+            是否成功删除
+        """
+        file_path = self._get_file_path(file_id)
+        meta_path = self._get_meta_path(file_id)
+        deleted = False
+        # 删除文件
+        if file_path.exists():
+            file_path.unlink()
+            deleted = True
+        # 删除元数据
+        if meta_path.exists():
+            meta_path.unlink()
+            deleted = True
+        return deleted
+    async def get_file_metadata(self, file_id: str) -> Optional[Dict[str, Any]]:
+        """
+        获取文件元数据
+        Args:
+            file_id: 文件唯一标识
+        Returns:
+            文件元数据字典，不存在则返回 None
+        """
+        meta_path = self._get_meta_path(file_id)
+        if not meta_path.exists():
+            return None
+        try:
+            async with aiofiles.open(meta_path, 'r', encoding='utf-8') as f:
+                content = await f.read()
+                return json.loads(content)
+        except (json.JSONDecodeError, IOError):
+            return None
+    async def list_files(
+        self,
+        purpose: Optional[str] = None,
+        limit: int = 50,
+        offset: int = 0
+    ) -> List[Dict[str, Any]]:
+        """
+        列出文件
+        Args:
+            purpose: 按用途筛选
+            limit: 返回数量限制
+            offset: 偏移量
+        Returns:
+            文件元数据列表
+        """
+        results = []
+        # 遍历所有 .meta.json 文件
+        meta_files = sorted(
+            self.base_path.glob("*.meta.json"),
+            key=lambda p: p.stat().st_mtime,
+            reverse=True  # 最新的在前
+        )
+        for meta_path in meta_files:
+            try:
+                async with aiofiles.open(meta_path, 'r', encoding='utf-8') as f:
+                    content = await f.read()
+                    metadata = json.loads(content)
+                    # 按用途筛选
+                    if purpose and metadata.get("purpose") != purpose:
+                        continue
+                    results.append(metadata)
+            except (json.JSONDecodeError, IOError):
+                continue
+        # 应用分页
+        return results[offset:offset + limit]
+    async def file_exists(self, file_id: str) -> bool:
+        """
+        检查文件是否存在
+        Args:
+            file_id: 文件唯一标识
+        Returns:
+            文件是否存在
+        """
+        file_path = self._get_file_path(file_id)
+        return file_path.exists()
+    async def count_files(self, purpose: Optional[str] = None) -> int:
+        """
+        统计文件数量
+        Args:
+            purpose: 按用途筛选
+        Returns:
+            文件数量
+        """
+        if not purpose:
+            # 直接计数 meta 文件
+            return len(list(self.base_path.glob("*.meta.json")))
+        # 需要筛选时读取元数据
+        count = 0
+        for meta_path in self.base_path.glob("*.meta.json"):
+            try:
+                async with aiofiles.open(meta_path, 'r', encoding='utf-8') as f:
+                    content = await f.read()
+                    metadata = json.loads(content)
+                    if metadata.get("purpose") == purpose:
+                        count += 1
+            except (json.JSONDecodeError, IOError):
+                continue
+        return count
+    async def _extract_audio_info(self, file_path: Path) -> Optional[Dict[str, Any]]:
+        """
+        提取音频文件信息（时长、采样率等）
+        Args:
+            file_path: 音频文件路径
+        Returns:
+            音频信息字典，提取失败返回 None
+        """
+        try:
+            # 尝试使用 soundfile（如果可用）
+            import soundfile as sf
+            info = sf.info(str(file_path))
+            return {
+                "duration_seconds": info.duration,
+                "sample_rate": info.samplerate,
+                "channels": info.channels,
+            }
+        except ImportError:
+            # soundfile 不可用，尝试使用 wave 模块处理 WAV 文件
+            if file_path.suffix.lower() == '.wav':
+                try:
+                    import wave
+                    with wave.open(str(file_path), 'rb') as wf:
+                        frames = wf.getnframes()
+                        rate = wf.getframerate()
+                        channels = wf.getnchannels()
+                        duration = frames / float(rate) if rate > 0 else 0
+                        return {
+                            "duration_seconds": duration,
+                            "sample_rate": rate,
+                            "channels": channels,
+                        }
+                except Exception:
+                    pass
+        except Exception:
+            pass
+        return None
+    async def get_file_path(self, file_id: str) -> Optional[Path]:
+        """
+        获取文件的本地路径（供其他模块直接访问文件使用）
+        Args:
+            file_id: 文件唯一标识
+        Returns:
+            文件路径，不存在则返回 None
+        """
+        file_path = self._get_file_path(file_id)
+        if file_path.exists():
+            return file_path
+        return None

api_server/app/adapters/local/task_queue.py CHANGED Viewed

@@ -13,13 +13,16 @@ import sys
 import uuid
 from datetime import datetime
 from pathlib import Path
-from typing import Dict, Optional, AsyncGenerator, List
 import aiosqlite
 from ..base import TaskQueueAdapter
 from ...core.config import settings, PROJECT_ROOT, get_pythonpath
 # 进度消息标识符（与 run_pipeline.py 保持一致）
 PROGRESS_PREFIX = "##PROGRESS##"
 PROGRESS_SUFFIX = "##"
@@ -47,22 +50,32 @@ class AsyncTrainingManager(TaskQueueAdapter):
         >>> await manager.cancel(job_id)
     """
-    def __init__(self, db_path: str = None, max_concurrent: int = 1):
         """
         初始化任务管理器
         Args:
             db_path: SQLite 数据库路径，默认使用 settings.SQLITE_PATH
             max_concurrent: 最大并发任务数（本地通常为1）
         """
         self.db_path = db_path or str(settings.SQLITE_PATH)
         self.max_concurrent = max_concurrent
         # 运行时状态
         self.running_processes: Dict[str, asyncio.subprocess.Process] = {}  # task_id -> Process
         self.progress_channels: Dict[str, asyncio.Queue] = {}  # task_id -> Queue
         self._running_count = 0
         self._queue_lock = asyncio.Lock()
         # 初始化数据库
         self._init_db_sync()
@@ -123,6 +136,9 @@ class AsyncTrainingManager(TaskQueueAdapter):
             )
             await db.commit()
         # 创建进度队列
         self.progress_channels[task_id] = asyncio.Queue()
@@ -381,6 +397,8 @@ class AsyncTrainingManager(TaskQueueAdapter):
         """
         更新任务状态
         Args:
             job_id: 作业ID
             **kwargs: 要更新的字段
@@ -388,6 +406,8 @@ class AsyncTrainingManager(TaskQueueAdapter):
         if not kwargs:
             return
         async with aiosqlite.connect(self.db_path) as db:
             updates = []
             values = []
@@ -403,6 +423,57 @@ class AsyncTrainingManager(TaskQueueAdapter):
                 values
             )
             await db.commit()
     async def get_status(self, job_id: str) -> Dict:
         """

 import uuid
 from datetime import datetime
 from pathlib import Path
+from typing import TYPE_CHECKING, Dict, Optional, AsyncGenerator, List
 import aiosqlite
 from ..base import TaskQueueAdapter
 from ...core.config import settings, PROJECT_ROOT, get_pythonpath
+if TYPE_CHECKING:
+    from ..base import DatabaseAdapter
 # 进度消息标识符（与 run_pipeline.py 保持一致）
 PROGRESS_PREFIX = "##PROGRESS##"
 PROGRESS_SUFFIX = "##"
         >>> await manager.cancel(job_id)
     """
+    def __init__(
+        self,
+        db_path: str = None,
+        max_concurrent: int = 1,
+        database_adapter: "DatabaseAdapter" = None
+    ):
         """
         初始化任务管理器
         Args:
             db_path: SQLite 数据库路径，默认使用 settings.SQLITE_PATH
             max_concurrent: 最大并发任务数（本地通常为1）
+            database_adapter: 数据库适配器，用于同步更新 tasks 表
         """
         self.db_path = db_path or str(settings.SQLITE_PATH)
         self.max_concurrent = max_concurrent
+        self._database_adapter = database_adapter
         # 运行时状态
         self.running_processes: Dict[str, asyncio.subprocess.Process] = {}  # task_id -> Process
         self.progress_channels: Dict[str, asyncio.Queue] = {}  # task_id -> Queue
         self._running_count = 0
         self._queue_lock = asyncio.Lock()
+        # task_id 到 job_id 的映射缓存
+        self._task_job_mapping: Dict[str, str] = {}
         # 初始化数据库
         self._init_db_sync()
             )
             await db.commit()
+        # 缓存 task_id -> job_id 映射
+        self._task_job_mapping[task_id] = job_id
         # 创建进度队列
         self.progress_channels[task_id] = asyncio.Queue()
         """
         更新任务状态
+        同时更新 task_queue 表和 tasks 表（通过 DatabaseAdapter）。
         Args:
             job_id: 作业ID
             **kwargs: 要更新的字段
         if not kwargs:
             return
+        # 1. 更新 task_queue 表
+        task_id = None
         async with aiosqlite.connect(self.db_path) as db:
             updates = []
             values = []
                 values
             )
             await db.commit()
+            # 获取 task_id 用于同步更新 tasks 表
+            async with db.execute(
+                "SELECT task_id FROM task_queue WHERE job_id = ?", (job_id,)
+            ) as cursor:
+                row = await cursor.fetchone()
+                if row:
+                    task_id = row[0]
+        # 2. 同步更新 tasks 表（通过 DatabaseAdapter）
+        if self._database_adapter and task_id:
+            await self._sync_to_tasks_table(task_id, kwargs)
+    async def _sync_to_tasks_table(self, task_id: str, updates: Dict) -> None:
+        """
+        同步状态更新到 tasks 表
+        字段映射：
+        - task_queue.progress -> tasks.stage_progress
+        - task_queue.overall_progress -> tasks.progress
+        - 其他字段直接映射
+        Args:
+            task_id: 任务ID
+            updates: 要更新的字段字典
+        """
+        if not self._database_adapter:
+            return
+        # 字段映射
+        tasks_updates = {}
+        for key, value in updates.items():
+            if key == 'progress':
+                # task_queue.progress -> tasks.stage_progress
+                tasks_updates['stage_progress'] = value
+            elif key == 'overall_progress':
+                # task_queue.overall_progress -> tasks.progress
+                tasks_updates['progress'] = value
+            elif key in ('status', 'current_stage', 'message', 'error_message',
+                        'started_at', 'completed_at'):
+                # 直接映射的字段
+                tasks_updates[key] = value
+        if tasks_updates:
+            try:
+                await self._database_adapter.update_task(task_id, tasks_updates)
+            except Exception as e:
+                # 记录错误但不中断主流程
+                import logging
+                logging.warning(f"Failed to sync task status to tasks table: {e}")
     async def get_status(self, job_id: str) -> Dict:
         """

api_server/app/api/__init__.py ADDED Viewed

	@@ -0,0 +1,9 @@

+"""
+API 模块
+包含所有 API 路由和端点
+"""
+from .v1.router import api_router
+__all__ = ["api_router"]

api_server/app/api/deps.py ADDED Viewed

	@@ -0,0 +1,96 @@

+"""
+依赖注入模块
+提供 FastAPI 依赖注入函数，用于获取服务和适配器实例
+"""
+from functools import lru_cache
+from typing import Generator
+from ..services.task_service import TaskService
+from ..services.experiment_service import ExperimentService
+from ..services.file_service import FileService
+# ============================================================
+# 服务依赖
+# ============================================================
+@lru_cache()
+def get_task_service() -> TaskService:
+    """
+    获取 TaskService 实例
+    使用 lru_cache 确保单例
+    Returns:
+        TaskService 实例
+    Example:
+        >>> @router.post("/tasks")
+        ... async def create_task(
+        ...     request: QuickModeRequest,
+        ...     service: TaskService = Depends(get_task_service)
+        ... ):
+        ...     return await service.create_quick_task(request)
+    """
+    return TaskService()
+@lru_cache()
+def get_experiment_service() -> ExperimentService:
+    """
+    获取 ExperimentService 实例
+    Returns:
+        ExperimentService 实例
+    """
+    return ExperimentService()
+@lru_cache()
+def get_file_service() -> FileService:
+    """
+    获取 FileService 实例
+    Returns:
+        FileService 实例
+    """
+    return FileService()
+# ============================================================
+# 通用依赖
+# ============================================================
+async def get_pagination_params(
+    limit: int = 50,
+    offset: int = 0
+) -> dict:
+    """
+    分页参数依赖
+    Args:
+        limit: 每页数量，默认 50，最大 100
+        offset: 偏移量，默认 0
+    Returns:
+        分页参数字典
+    """
+    # 限制最大值
+    if limit > 100:
+        limit = 100
+    if limit < 1:
+        limit = 1
+    if offset < 0:
+        offset = 0
+    return {"limit": limit, "offset": offset}
+__all__ = [
+    "get_task_service",
+    "get_experiment_service",
+    "get_file_service",
+    "get_pagination_params",
+]

api_server/app/api/v1/__init__.py ADDED Viewed

	@@ -0,0 +1,9 @@

+"""
+API v1 模块
+包含 v1 版本的所有 API 端点
+"""
+from .router import api_router
+__all__ = ["api_router"]

api_server/app/api/v1/endpoints/__init__.py ADDED Viewed

	@@ -0,0 +1,17 @@

+"""
+API v1 端点模块
+包含所有 API 端点实现
+"""
+from . import tasks
+from . import experiments
+from . import files
+from . import stages
+__all__ = [
+    "tasks",
+    "experiments",
+    "files",
+    "stages",
+]

api_server/app/api/v1/endpoints/experiments.py ADDED Viewed

	@@ -0,0 +1,393 @@

+"""
+Advanced Mode 实验 API
+专家用户分阶段训练 API 端点
+API 列表:
+- POST   /experiments                                   创建实验
+- GET    /experiments                                   获取实验列表
+- GET    /experiments/{exp_id}                          获取实验详情
+- PATCH  /experiments/{exp_id}                          更新实验配置
+- DELETE /experiments/{exp_id}                          删除实验
+- POST   /experiments/{exp_id}/stages/{stage_type}      执行阶段
+- GET    /experiments/{exp_id}/stages                   获取所有阶段状态
+- GET    /experiments/{exp_id}/stages/{stage_type}      获取阶段详情
+- DELETE /experiments/{exp_id}/stages/{stage_type}      取消阶段
+- GET    /experiments/{exp_id}/stages/{stage_type}/progress  SSE 阶段进度
+"""
+import json
+from typing import Any, Dict, Optional
+from fastapi import APIRouter, Body, Depends, HTTPException, Query
+from fastapi.responses import StreamingResponse
+from ....models.schemas.experiment import (
+    ExperimentCreate,
+    ExperimentUpdate,
+    ExperimentResponse,
+    ExperimentListResponse,
+    StageStatus,
+    StageExecuteResponse,
+    StagesListResponse,
+    STAGE_DEPENDENCIES,
+    get_stage_params_class,
+)
+from ....models.schemas.common import SuccessResponse, ErrorResponse
+from ....services.experiment_service import ExperimentService
+from ...deps import get_experiment_service
+router = APIRouter()
+# 有效的阶段类型
+VALID_STAGE_TYPES = list(STAGE_DEPENDENCIES.keys())
+@router.post(
+    "",
+    response_model=ExperimentResponse,
+    summary="创建实验",
+    description="""
+创建实验（专家用户）。
+创建实验但不立即执行，用户可以逐阶段控制训练流程。
+实验创建后，所有阶段状态为 `pending`，需要手动触发执行。
+**训练阶段**:
+- `audio_slice`: 音频切片
+- `asr`: 语音识别
+- `text_feature`: 文本特征提取
+- `hubert_feature`: HuBERT 特征提取
+- `semantic_token`: 语义 Token 提取
+- `sovits_train`: SoVITS 训练
+- `gpt_train`: GPT 训练
+""",
+)
+async def create_experiment(
+    request: ExperimentCreate,
+    service: ExperimentService = Depends(get_experiment_service),
+) -> ExperimentResponse:
+    """
+    创建实验
+    """
+    return await service.create_experiment(request)
+@router.get(
+    "",
+    response_model=ExperimentListResponse,
+    summary="获取实验列表",
+    description="获取所有实验列表，支持按状态筛选和分页。",
+)
+async def list_experiments(
+    status: Optional[str] = Query(None, description="按状态筛选"),
+    limit: int = Query(50, ge=1, le=100, description="每页数量"),
+    offset: int = Query(0, ge=0, description="偏移量"),
+    service: ExperimentService = Depends(get_experiment_service),
+) -> ExperimentListResponse:
+    """
+    获取实验列表
+    """
+    return await service.list_experiments(status=status, limit=limit, offset=offset)
+@router.get(
+    "/{exp_id}",
+    response_model=ExperimentResponse,
+    summary="获取实验详情",
+    description="获取指定实验的详细信息，包括所有阶段状态。",
+    responses={
+        404: {"model": ErrorResponse, "description": "实验不存在"},
+    },
+)
+async def get_experiment(
+    exp_id: str,
+    service: ExperimentService = Depends(get_experiment_service),
+) -> ExperimentResponse:
+    """
+    获取实验详情
+    """
+    experiment = await service.get_experiment(exp_id)
+    if not experiment:
+        raise HTTPException(status_code=404, detail="实验不存在")
+    return experiment
+@router.patch(
+    "/{exp_id}",
+    response_model=ExperimentResponse,
+    summary="更新实验配置",
+    description="更新实验的基础配置（非阶段参数）。",
+    responses={
+        404: {"model": ErrorResponse, "description": "实验不存在"},
+    },
+)
+async def update_experiment(
+    exp_id: str,
+    request: ExperimentUpdate,
+    service: ExperimentService = Depends(get_experiment_service),
+) -> ExperimentResponse:
+    """
+    更新实验配置
+    """
+    experiment = await service.update_experiment(exp_id, request)
+    if not experiment:
+        raise HTTPException(status_code=404, detail="实验不存在")
+    return experiment
+@router.delete(
+    "/{exp_id}",
+    response_model=SuccessResponse,
+    summary="删除实验",
+    description="删除实验及其所有阶段数据。如果有正在运行的阶段，会先取消执行。",
+    responses={
+        404: {"model": ErrorResponse, "description": "实验不存在"},
+    },
+)
+async def delete_experiment(
+    exp_id: str,
+    service: ExperimentService = Depends(get_experiment_service),
+) -> SuccessResponse:
+    """
+    删除实验
+    """
+    success = await service.delete_experiment(exp_id)
+    if not success:
+        raise HTTPException(status_code=404, detail="实验不存在")
+    return SuccessResponse(message="实验已删除")
+@router.post(
+    "/{exp_id}/stages/{stage_type}",
+    response_model=StageExecuteResponse,
+    summary="执行阶段",
+    description="""
+执行指定阶段。
+**阶段依赖关系**:
+- `audio_slice`: 无依赖
+- `asr`: 依赖 audio_slice
+- `text_feature`: 依赖 asr
+- `hubert_feature`: 依赖 audio_slice
+- `semantic_token`: 依赖 hubert_feature
+- `sovits_train`: 依赖 text_feature, semantic_token
+- `gpt_train`: 依赖 text_feature, semantic_token
+如果依赖阶段未完成，会返回 400 错误。
+如果阶段已完成，会重新执行（返回 `rerun: true`）。
+""",
+    responses={
+        400: {"model": ErrorResponse, "description": "阶段类型无效或依赖未满足"},
+        404: {"model": ErrorResponse, "description": "实验不存在"},
+    },
+)
+async def execute_stage(
+    exp_id: str,
+    stage_type: str,
+    params: Dict[str, Any] = Body(default={}),
+    service: ExperimentService = Depends(get_experiment_service),
+) -> StageExecuteResponse:
+    """
+    执行阶段
+    """
+    # 验证阶段类型
+    if stage_type not in VALID_STAGE_TYPES:
+        raise HTTPException(
+            status_code=400,
+            detail=f"无效的阶段类型: {stage_type}。有效类型: {', '.join(VALID_STAGE_TYPES)}"
+        )
+    # 检查实验是否存在
+    experiment = await service.get_experiment(exp_id)
+    if not experiment:
+        raise HTTPException(status_code=404, detail="实验不存在")
+    # 检查依赖
+    deps = await service.check_stage_dependencies(exp_id, stage_type)
+    if not deps["satisfied"]:
+        raise HTTPException(
+            status_code=400,
+            detail=f"依赖阶段未完成: {', '.join(deps['missing'])}"
+        )
+    # 验证并解析参数
+    try:
+        params_class = get_stage_params_class(stage_type)
+        validated_params = params_class(**params)
+        params = validated_params.model_dump(exclude_unset=True)
+    except ValueError as e:
+        raise HTTPException(status_code=400, detail=str(e))
+    # 执行阶段
+    result = await service.execute_stage(exp_id, stage_type, params)
+    if not result:
+        raise HTTPException(status_code=404, detail="实验不存在")
+    return result
+@router.get(
+    "/{exp_id}/stages",
+    response_model=StagesListResponse,
+    summary="获取所有阶段状态",
+    description="获取实验的所有阶段状态列表。",
+    responses={
+        404: {"model": ErrorResponse, "description": "实验不存在"},
+    },
+)
+async def get_all_stages(
+    exp_id: str,
+    service: ExperimentService = Depends(get_experiment_service),
+) -> StagesListResponse:
+    """
+    获取所有阶段状态
+    """
+    result = await service.get_all_stages(exp_id)
+    if not result:
+        raise HTTPException(status_code=404, detail="实验不存在")
+    return result
+@router.get(
+    "/{exp_id}/stages/{stage_type}",
+    response_model=StageStatus,
+    summary="获取阶段详情",
+    description="获取指定阶段的详细状态和结果。",
+    responses={
+        400: {"model": ErrorResponse, "description": "阶段类型无效"},
+        404: {"model": ErrorResponse, "description": "实验或阶段不存在"},
+    },
+)
+async def get_stage(
+    exp_id: str,
+    stage_type: str,
+    service: ExperimentService = Depends(get_experiment_service),
+) -> StageStatus:
+    """
+    获取阶段详情
+    """
+    # 验证阶段类型
+    if stage_type not in VALID_STAGE_TYPES:
+        raise HTTPException(
+            status_code=400,
+            detail=f"无效的阶段类型: {stage_type}"
+        )
+    stage = await service.get_stage(exp_id, stage_type)
+    if not stage:
+        raise HTTPException(status_code=404, detail="实验或阶段不存在")
+    return stage
+@router.delete(
+    "/{exp_id}/stages/{stage_type}",
+    response_model=SuccessResponse,
+    summary="取消阶段",
+    description="取消正在执行的阶段。只有运行中的阶段可以取消。",
+    responses={
+        400: {"model": ErrorResponse, "description": "阶段未运行或无法取消"},
+        404: {"model": ErrorResponse, "description": "实验或阶段不存在"},
+    },
+)
+async def cancel_stage(
+    exp_id: str,
+    stage_type: str,
+    service: ExperimentService = Depends(get_experiment_service),
+) -> SuccessResponse:
+    """
+    取消阶段
+    """
+    # 验证阶段类型
+    if stage_type not in VALID_STAGE_TYPES:
+        raise HTTPException(
+            status_code=400,
+            detail=f"无效的阶段类型: {stage_type}"
+        )
+    success = await service.cancel_stage(exp_id, stage_type)
+    if not success:
+        raise HTTPException(
+            status_code=400,
+            detail="阶段未运行或无法取消"
+        )
+    return SuccessResponse(message=f"阶段 {stage_type} 已取消")
+@router.get(
+    "/{exp_id}/stages/{stage_type}/progress",
+    summary="SSE 阶段进度订阅",
+    description="""
+订阅阶段进度更新（Server-Sent Events）。
+返回的事件流格式：
+```
+event: progress
+data: {"epoch": 8, "total_epochs": 16, "progress": 0.50, "loss": 0.034}
+event: checkpoint
+data: {"epoch": 8, "model_path": "logs/my_voice/sovits_e8.pth"}
+event: completed
+data: {"status": "completed", "final_loss": 0.023}
+```
+""",
+    responses={
+        400: {"model": ErrorResponse, "description": "阶段类型无效"},
+        404: {"model": ErrorResponse, "description": "实验或阶段不存在"},
+    },
+)
+async def subscribe_stage_progress(
+    exp_id: str,
+    stage_type: str,
+    service: ExperimentService = Depends(get_experiment_service),
+) -> StreamingResponse:
+    """
+    SSE 阶段进度订阅
+    """
+    # 验证阶段类型
+    if stage_type not in VALID_STAGE_TYPES:
+        raise HTTPException(
+            status_code=400,
+            detail=f"无效的阶段类型: {stage_type}"
+        )
+    # 检查实验是否存在
+    experiment = await service.get_experiment(exp_id)
+    if not experiment:
+        raise HTTPException(status_code=404, detail="实验不存在")
+    async def event_generator():
+        """生成 SSE 事件流"""
+        async for progress in service.subscribe_stage_progress(exp_id, stage_type):
+            # 确定事件类型
+            event_type = progress.get("type", "progress")
+            status = progress.get("status")
+            if status == "completed":
+                event_type = "completed"
+            elif status == "failed":
+                event_type = "failed"
+            elif status == "cancelled":
+                event_type = "cancelled"
+            elif progress.get("model_path"):
+                event_type = "checkpoint"
+            # 构建 SSE 格式
+            data = json.dumps(progress, ensure_ascii=False)
+            yield f"event: {event_type}\ndata: {data}\n\n"
+            # 如果是终态，结束流
+            if status in ("completed", "failed", "cancelled"):
+                break
+    return StreamingResponse(
+        event_generator(),
+        media_type="text/event-stream",
+        headers={
+            "Cache-Control": "no-cache",
+            "Connection": "keep-alive",
+            "X-Accel-Buffering": "no",
+        },
+    )

api_server/app/api/v1/endpoints/files.py ADDED Viewed

	@@ -0,0 +1,222 @@

+"""
+文件管理 API
+文件上传、下载和管理 API 端点
+API 列表:
+- POST   /files           上传文件
+- GET    /files           获取文件列表
+- GET    /files/{file_id} 下载文件（或获取元数据）
+- DELETE /files/{file_id} 删除文件
+"""
+from typing import Optional
+from fastapi import APIRouter, Depends, File, HTTPException, Query, UploadFile
+from fastapi.responses import Response
+from ....models.schemas.file import (
+    FileMetadata,
+    FileUploadResponse,
+    FileListResponse,
+    FileDeleteResponse,
+)
+from ....models.schemas.common import ErrorResponse
+from ....services.file_service import FileService
+from ...deps import get_file_service
+router = APIRouter()
+# 允许的音频 MIME 类型
+ALLOWED_AUDIO_TYPES = {
+    "audio/wav",
+    "audio/wave",
+    "audio/x-wav",
+    "audio/mpeg",
+    "audio/mp3",
+    "audio/mp4",
+    "audio/aac",
+    "audio/ogg",
+    "audio/flac",
+    "audio/x-flac",
+    "audio/webm",
+}
+# 最大文件大小 (500MB)
+MAX_FILE_SIZE = 500 * 1024 * 1024
+@router.post(
+    "",
+    response_model=FileUploadResponse,
+    summary="上传文件",
+    description="""
+上传音频文件用于训练。
+**支持的音频格式**:
+- WAV
+- MP3
+- FLAC
+- OGG
+- AAC
+- WebM
+**文件大小限制**: 500MB
+**用途类型**:
+- `training`: 训练音频（默认）
+- `reference`: 参考音频
+- `output`: 输出文件
+""",
+    responses={
+        200: {"model": FileUploadResponse, "description": "文件上传成功"},
+        400: {"model": ErrorResponse, "description": "文件格式或大小不合法"},
+    },
+)
+async def upload_file(
+    file: UploadFile = File(..., description="要上传的音频文件"),
+    purpose: str = Query(
+        "training",
+        description="文件用途: training, reference, output"
+    ),
+    service: FileService = Depends(get_file_service),
+) -> FileUploadResponse:
+    """
+    上传文件
+    """
+    # 验证用途
+    if purpose not in ("training", "reference", "output"):
+        raise HTTPException(
+            status_code=400,
+            detail="无效的用途类型，有效值: training, reference, output"
+        )
+    # 验证文件类型（可选，允许不明确类型的文件）
+    content_type = file.content_type
+    if content_type and content_type not in ALLOWED_AUDIO_TYPES:
+        # 检查文件扩展名
+        filename = file.filename or ""
+        ext = filename.lower().split(".")[-1] if "." in filename else ""
+        allowed_exts = {"wav", "mp3", "flac", "ogg", "aac", "webm", "m4a"}
+        if ext not in allowed_exts:
+            raise HTTPException(
+                status_code=400,
+                detail=f"不支持的文件类型: {content_type}。支持的格式: WAV, MP3, FLAC, OGG, AAC, WebM"
+            )
+    # 读取文件内容
+    file_data = await file.read()
+    # 验证文件大小
+    if len(file_data) > MAX_FILE_SIZE:
+        raise HTTPException(
+            status_code=400,
+            detail=f"文件过大，最大允许 {MAX_FILE_SIZE // (1024*1024)}MB"
+        )
+    # 验证文件不为空
+    if len(file_data) == 0:
+        raise HTTPException(
+            status_code=400,
+            detail="文件为空"
+        )
+    # 上传文件
+    return await service.upload_file(
+        file_data=file_data,
+        filename=file.filename or "audio",
+        content_type=content_type,
+        purpose=purpose,
+    )
+@router.get(
+    "",
+    response_model=FileListResponse,
+    summary="获取文件列表",
+    description="获取已上传的文件列表，支持按用途筛选和分页。",
+)
+async def list_files(
+    purpose: Optional[str] = Query(
+        None,
+        description="按用途筛选: training, reference, output"
+    ),
+    limit: int = Query(50, ge=1, le=100, description="每页数量"),
+    offset: int = Query(0, ge=0, description="偏移量"),
+    service: FileService = Depends(get_file_service),
+) -> FileListResponse:
+    """
+    获取文件列表
+    """
+    return await service.list_files(purpose=purpose, limit=limit, offset=offset)
+@router.get(
+    "/{file_id}",
+    summary="下载文件或获取元数据",
+    description="""
+根据请求类型返回文件内容或元数据。
+- 默认返回文件内容（用于下载）
+- 添加 `?metadata=true` 参数只返回元数据
+""",
+    responses={
+        200: {
+            "description": "文件内容（下载时）或元数据（metadata=true 时）",
+        },
+        404: {"model": ErrorResponse, "description": "文件不存在"},
+    },
+)
+async def get_file(
+    file_id: str,
+    metadata: bool = Query(False, description="只返回元数据"),
+    service: FileService = Depends(get_file_service),
+):
+    """
+    下载文件或获取元数据
+    """
+    if metadata:
+        # 只返回元数据
+        file_metadata = await service.get_file(file_id)
+        if not file_metadata:
+            raise HTTPException(status_code=404, detail="文件不存在")
+        return file_metadata
+    else:
+        # 下载文件
+        result = await service.download_file(file_id)
+        if not result:
+            raise HTTPException(status_code=404, detail="文件不存在")
+        file_data, filename, content_type = result
+        return Response(
+            content=file_data,
+            media_type=content_type,
+            headers={
+                "Content-Disposition": f'attachment; filename="{filename}"',
+                "Content-Length": str(len(file_data)),
+            },
+        )
+@router.delete(
+    "/{file_id}",
+    response_model=FileDeleteResponse,
+    summary="删除文件",
+    description="删除指定的文件。",
+    responses={
+        200: {"model": FileDeleteResponse, "description": "删除结果"},
+        404: {"model": ErrorResponse, "description": "文件不存在"},
+    },
+)
+async def delete_file(
+    file_id: str,
+    service: FileService = Depends(get_file_service),
+) -> FileDeleteResponse:
+    """
+    删除文件
+    """
+    result = await service.delete_file(file_id)
+    if not result.success:
+        raise HTTPException(status_code=404, detail="文件不存在或已删除")
+    return result

api_server/app/api/v1/endpoints/stages.py ADDED Viewed

	@@ -0,0 +1,247 @@

+"""
+阶段模板 API
+阶段预设和参数模板 API 端点
+API 列表:
+- GET /stages/presets              获取阶段预设列表
+- GET /stages/{stage_type}/schema  获取阶段参数模板
+"""
+from typing import Any, Dict, List
+from fastapi import APIRouter, HTTPException
+from ....models.schemas.experiment import (
+    STAGE_DEPENDENCIES,
+    STAGE_PARAMS_MAP,
+    AudioSliceParams,
+    ASRParams,
+    TextFeatureParams,
+    HubertFeatureParams,
+    SemanticTokenParams,
+    SoVITSTrainParams,
+    GPTTrainParams,
+)
+from ....models.schemas.common import ErrorResponse
+router = APIRouter()
+# ============================================================
+# 阶段预设定义
+# ============================================================
+STAGE_PRESETS = [
+    {
+        "id": "full_training",
+        "name": "完整训练流程",
+        "description": "包含所有阶段的标准训练，从音频切片到模型训练",
+        "stages": [
+            "audio_slice",
+            "asr",
+            "text_feature",
+            "hubert_feature",
+            "semantic_token",
+            "sovits_train",
+            "gpt_train",
+        ],
+    },
+    {
+        "id": "retrain_sovits",
+        "name": "重训 SoVITS",
+        "description": "跳过预处理，仅重新训练 SoVITS 模型",
+        "stages": ["sovits_train"],
+    },
+    {
+        "id": "retrain_gpt",
+        "name": "重训 GPT",
+        "description": "跳过预处理，仅重新训练 GPT 模型",
+        "stages": ["gpt_train"],
+    },
+    {
+        "id": "retrain_both",
+        "name": "重训两个模型",
+        "description": "跳过预处理，重新训练 SoVITS 和 GPT 模型",
+        "stages": ["sovits_train", "gpt_train"],
+    },
+    {
+        "id": "feature_extraction",
+        "name": "特征提取",
+        "description": "仅执行音频切片和特征提取，不进行训练",
+        "stages": [
+            "audio_slice",
+            "asr",
+            "text_feature",
+            "hubert_feature",
+            "semantic_token",
+        ],
+    },
+    {
+        "id": "audio_preprocessing",
+        "name": "音频预处理",
+        "description": "仅执行音频切片和语音识别",
+        "stages": ["audio_slice", "asr"],
+    },
+]
+# ============================================================
+# 阶段信息定义
+# ============================================================
+STAGE_INFO = {
+    "audio_slice": {
+        "name": "音频切片",
+        "description": "将长音频切分为短片段，便于后续处理",
+        "dependencies": [],
+    },
+    "asr": {
+        "name": "语音识别",
+        "description": "识别音频中的文本内容",
+        "dependencies": ["audio_slice"],
+    },
+    "text_feature": {
+        "name": "文本特征提取",
+        "description": "使用 BERT 模型提取文本特征",
+        "dependencies": ["asr"],
+    },
+    "hubert_feature": {
+        "name": "HuBERT 特征提取",
+        "description": "使用 HuBERT 模型提取音频特征",
+        "dependencies": ["audio_slice"],
+    },
+    "semantic_token": {
+        "name": "语义 Token 提取",
+        "description": "从 HuBERT 特征中提取语义 Token",
+        "dependencies": ["hubert_feature"],
+    },
+    "sovits_train": {
+        "name": "SoVITS 训练",
+        "description": "训练 SoVITS 声码器模型",
+        "dependencies": ["text_feature", "semantic_token"],
+    },
+    "gpt_train": {
+        "name": "GPT 训练",
+        "description": "训练 GPT 语言模型",
+        "dependencies": ["text_feature", "semantic_token"],
+    },
+}
+def get_parameter_schema(params_class: type) -> Dict[str, Any]:
+    """
+    从 Pydantic 模型生成参数 Schema
+    Args:
+        params_class: Pydantic 模型类
+    Returns:
+        参数 Schema 字典
+    """
+    schema = params_class.model_json_schema()
+    properties = schema.get("properties", {})
+    parameters = {}
+    for name, prop in properties.items():
+        param_info = {
+            "type": prop.get("type", "string"),
+            "description": prop.get("description", ""),
+        }
+        # 添加默认值
+        if "default" in prop:
+            param_info["default"] = prop["default"]
+        # 添加范围限制
+        if "minimum" in prop:
+            param_info["min"] = prop["minimum"]
+        if "maximum" in prop:
+            param_info["max"] = prop["maximum"]
+        # 处理枚举
+        if "enum" in prop:
+            param_info["enum"] = prop["enum"]
+        parameters[name] = param_info
+    return parameters
+@router.get(
+    "/presets",
+    summary="获取阶段预设列表",
+    description="""
+获取预定义的训练流程预设。
+每个预设包含一组阶段，用户可以选择预设快速配置训练流程。
+""",
+    response_model=Dict[str, List[Dict[str, Any]]],
+)
+async def get_presets() -> Dict[str, List[Dict[str, Any]]]:
+    """
+    获取阶段预设列表
+    """
+    return {"presets": STAGE_PRESETS}
+@router.get(
+    "/{stage_type}/schema",
+    summary="获取阶段参数模板",
+    description="""
+获取指定阶段的参数模板，包含参数定义、默认值和取值范围。
+前端可以使用此接口动态生成参数配置表单。
+""",
+    responses={
+        200: {"description": "阶段参数模板"},
+        404: {"model": ErrorResponse, "description": "阶段类型无效"},
+    },
+)
+async def get_stage_schema(stage_type: str) -> Dict[str, Any]:
+    """
+    获取阶段参数模板
+    """
+    # 验证阶段类型
+    if stage_type not in STAGE_PARAMS_MAP:
+        raise HTTPException(
+            status_code=404,
+            detail=f"无效的阶段类型: {stage_type}。有效类型: {', '.join(STAGE_PARAMS_MAP.keys())}"
+        )
+    # 获取阶段信息
+    stage_info = STAGE_INFO.get(stage_type, {})
+    params_class = STAGE_PARAMS_MAP[stage_type]
+    # 生成参数 schema
+    parameters = get_parameter_schema(params_class)
+    return {
+        "type": stage_type,
+        "name": stage_info.get("name", stage_type),
+        "description": stage_info.get("description", ""),
+        "dependencies": STAGE_DEPENDENCIES.get(stage_type, []),
+        "parameters": parameters,
+    }
+@router.get(
+    "",
+    summary="获取所有阶段信息",
+    description="获取所有训练阶段的信息和依赖关系。",
+)
+async def get_all_stages() -> Dict[str, Any]:
+    """
+    获取所有阶段信息
+    """
+    stages = []
+    for stage_type in STAGE_PARAMS_MAP.keys():
+        stage_info = STAGE_INFO.get(stage_type, {})
+        stages.append({
+            "type": stage_type,
+            "name": stage_info.get("name", stage_type),
+            "description": stage_info.get("description", ""),
+            "dependencies": STAGE_DEPENDENCIES.get(stage_type, []),
+        })
+    return {"stages": stages}

api_server/app/api/v1/endpoints/tasks.py ADDED Viewed

	@@ -0,0 +1,228 @@

+"""
+Quick Mode 任务 API
+小白用户一键训练 API 端点
+API 列表:
+- POST   /tasks              创建一键训练任务
+- GET    /tasks              获取任务列表
+- GET    /tasks/{task_id}    获取任务详情
+- DELETE /tasks/{task_id}    取消任务
+- GET    /tasks/{task_id}/progress  SSE 进度订阅
+"""
+import json
+from typing import Optional
+from fastapi import APIRouter, Depends, HTTPException, Query
+from fastapi.responses import StreamingResponse
+from ....models.schemas.task import (
+    QuickModeRequest,
+    TaskResponse,
+    TaskListResponse,
+)
+from ....models.schemas.common import SuccessResponse, ErrorResponse
+from ....services.task_service import TaskService
+from ...deps import get_task_service
+router = APIRouter()
+@router.post(
+    "",
+    response_model=TaskResponse,
+    summary="创建一键训练任务",
+    description="""
+创建一键训练任务（小白用户）。
+上传音频文件后，系统自动配置所有参数并执行完整训练流程：
+`audio_slice -> asr -> text_feature -> hubert_feature -> semantic_token -> sovits_train -> gpt_train`
+**质量预设**:
+- `fast`: SoVITS 4 epochs, GPT 8 epochs, 约10分钟
+- `standard`: SoVITS 8 epochs, GPT 15 epochs, 约20分钟
+- `high`: SoVITS 16 epochs, GPT 30 epochs, 约40分钟
+""",
+    responses={
+        200: {"model": TaskResponse, "description": "任务创建成功"},
+        400: {"model": ErrorResponse, "description": "请求参数错误"},
+        404: {"model": ErrorResponse, "description": "音频文件不存在"},
+        409: {"model": ErrorResponse, "description": "实验名称已存在"},
+    },
+)
+async def create_task(
+    request: QuickModeRequest,
+    service: TaskService = Depends(get_task_service),
+) -> TaskResponse:
+    """
+    创建一键训练任务
+    """
+    # 验证 exp_name 是否已存在
+    if await service.check_exp_name_exists(request.exp_name):
+        raise HTTPException(
+            status_code=409,
+            detail=f"实验名称 '{request.exp_name}' 已存在，请使用不同的名称"
+        )
+    # 验证音频文件是否存在
+    file_exists, audio_path = await service.validate_audio_file(request.audio_file_id)
+    if not file_exists:
+        raise HTTPException(
+            status_code=404,
+            detail=f"音频文件不存在: {request.audio_file_id}"
+        )
+    return await service.create_quick_task(request)
+@router.get(
+    "",
+    response_model=TaskListResponse,
+    summary="获取任务列表",
+    description="获取所有训练任务列表，支持按状态筛选和分页。",
+)
+async def list_tasks(
+    status: Optional[str] = Query(
+        None,
+        description="按状态筛选: queued, running, completed, failed, cancelled, interrupted"
+    ),
+    limit: int = Query(50, ge=1, le=100, description="每页数量"),
+    offset: int = Query(0, ge=0, description="偏移量"),
+    service: TaskService = Depends(get_task_service),
+) -> TaskListResponse:
+    """
+    获取任务列表
+    """
+    return await service.list_tasks(status=status, limit=limit, offset=offset)
+@router.get(
+    "/{task_id}",
+    response_model=TaskResponse,
+    summary="获取任务详情",
+    description="获取指定任务的详细状态信息。",
+    responses={
+        200: {"model": TaskResponse, "description": "任务详情"},
+        404: {"model": ErrorResponse, "description": "任务不存在"},
+    },
+)
+async def get_task(
+    task_id: str,
+    service: TaskService = Depends(get_task_service),
+) -> TaskResponse:
+    """
+    获取任务详情
+    """
+    task = await service.get_task(task_id)
+    if not task:
+        raise HTTPException(status_code=404, detail="任务不存在")
+    return task
+@router.delete(
+    "/{task_id}",
+    response_model=SuccessResponse,
+    summary="取消任务",
+    description="取消排队中或运行中的任务。已完成、失败或已取消的任务无法取消。",
+    responses={
+        200: {"model": SuccessResponse, "description": "任务取消成功"},
+        400: {"model": ErrorResponse, "description": "任务无法取消"},
+        404: {"model": ErrorResponse, "description": "任务不存在"},
+    },
+)
+async def cancel_task(
+    task_id: str,
+    service: TaskService = Depends(get_task_service),
+) -> SuccessResponse:
+    """
+    取消任务
+    """
+    # 先检查任务是否存在
+    task = await service.get_task(task_id)
+    if not task:
+        raise HTTPException(status_code=404, detail="任务不存在")
+    success = await service.cancel_task(task_id)
+    if not success:
+        raise HTTPException(status_code=400, detail="任务无法取消（可能已完成或已取消）")
+    return SuccessResponse(message="任务已取消")
+@router.get(
+    "/{task_id}/progress",
+    summary="SSE 进度订阅",
+    description="""
+订阅任务进度更新（Server-Sent Events）。
+返回的事件流格式：
+```
+event: progress
+data: {"stage": "sovits_train", "progress": 0.45, "message": "Epoch 8/16"}
+event: progress
+data: {"stage": "sovits_train", "progress": 0.50, "message": "Epoch 9/16"}
+event: completed
+data: {"status": "completed", "message": "训练完成"}
+```
+可能的事件类型：
+- `progress`: 进度更新
+- `log`: 日志消息
+- `heartbeat`: 心跳（保持连接）
+- `completed`: 任务完成
+- `failed`: 任务失败
+- `cancelled`: 任务取消
+""",
+    responses={
+        200: {"description": "SSE 事件流"},
+        404: {"model": ErrorResponse, "description": "任务不存在"},
+    },
+)
+async def subscribe_progress(
+    task_id: str,
+    service: TaskService = Depends(get_task_service),
+) -> StreamingResponse:
+    """
+    SSE 进度订阅
+    """
+    # 先检查任务是否存在
+    task = await service.get_task(task_id)
+    if not task:
+        raise HTTPException(status_code=404, detail="任务不存在")
+    async def event_generator():
+        """生成 SSE 事件流"""
+        async for progress in service.subscribe_progress(task_id):
+            # 确定事件类型
+            event_type = progress.get("type", "progress")
+            status = progress.get("status")
+            if status == "completed":
+                event_type = "completed"
+            elif status == "failed":
+                event_type = "failed"
+            elif status == "cancelled":
+                event_type = "cancelled"
+            elif event_type == "heartbeat":
+                event_type = "heartbeat"
+            # 构建 SSE 格式
+            data = json.dumps(progress, ensure_ascii=False)
+            yield f"event: {event_type}\ndata: {data}\n\n"
+            # 如果是终态，结束流
+            if status in ("completed", "failed", "cancelled"):
+                break
+    return StreamingResponse(
+        event_generator(),
+        media_type="text/event-stream",
+        headers={
+            "Cache-Control": "no-cache",
+            "Connection": "keep-alive",
+            "X-Accel-Buffering": "no",  # Nginx 禁用缓冲
+        },
+    )

api_server/app/api/v1/router.py ADDED Viewed

	@@ -0,0 +1,39 @@

+"""
+API v1 路由注册
+统一注册所有 v1 版本的 API 路由
+"""
+from fastapi import APIRouter
+from .endpoints import tasks, experiments, files, stages
+api_router = APIRouter()
+# Quick Mode API - 一键训练任务
+api_router.include_router(
+    tasks.router,
+    prefix="/tasks",
+    tags=["Quick Mode - 任务管理"],
+)
+# Advanced Mode API - 专家模式实验
+api_router.include_router(
+    experiments.router,
+    prefix="/experiments",
+    tags=["Advanced Mode - 实验管理"],
+)
+# 文件管理 API
+api_router.include_router(
+    files.router,
+    prefix="/files",
+    tags=["文件管理"],
+)
+# 阶段模板 API
+api_router.include_router(
+    stages.router,
+    prefix="/stages",
+    tags=["阶段模板"],
+)

api_server/app/core/adapters.py ADDED Viewed

	@@ -0,0 +1,180 @@

+"""
+适配器工厂模块
+根据 DEPLOYMENT_MODE 配置自动选择本地或服务器适配器。
+Example:
+    >>> from app.core.adapters import get_database_adapter, get_storage_adapter
+    >>> db = get_database_adapter()
+    >>> storage = get_storage_adapter()
+"""
+from functools import lru_cache
+from typing import TYPE_CHECKING
+from .config import settings
+if TYPE_CHECKING:
+    from ..adapters.base import (
+        DatabaseAdapter,
+        ProgressAdapter,
+        StorageAdapter,
+        TaskQueueAdapter,
+    )
+class AdapterFactory:
+    """
+    适配器工厂
+    根据 DEPLOYMENT_MODE 配置创建对应的适配器实例。
+    - local 模式: SQLite + 本地文件系统 + asyncio.subprocess
+    - server 模式: PostgreSQL + S3/MinIO + Celery (Phase 2)
+    """
+    @staticmethod
+    def create_storage_adapter() -> "StorageAdapter":
+        """
+        创建存储适配器
+        Returns:
+            本地模式返回 LocalStorageAdapter
+            服务器模式返回 S3StorageAdapter (Phase 2)
+        """
+        if settings.DEPLOYMENT_MODE == "local":
+            from ..adapters.local.storage import LocalStorageAdapter
+            return LocalStorageAdapter(base_path=str(settings.DATA_DIR / "files"))
+        else:
+            # Phase 2: 服务器模式
+            raise NotImplementedError("Server mode storage adapter not implemented yet")
+    @staticmethod
+    def create_database_adapter() -> "DatabaseAdapter":
+        """
+        创建数据库适配器
+        Returns:
+            本地模式返回 SQLiteAdapter
+            服务器模式返回 PostgreSQLAdapter (Phase 2)
+        """
+        if settings.DEPLOYMENT_MODE == "local":
+            from ..adapters.local.database import SQLiteAdapter
+            return SQLiteAdapter(db_path=str(settings.SQLITE_PATH))
+        else:
+            # Phase 2: 服务器模式
+            raise NotImplementedError("Server mode database adapter not implemented yet")
+    @staticmethod
+    def create_task_queue_adapter(database_adapter: "DatabaseAdapter" = None) -> "TaskQueueAdapter":
+        """
+        创建任务队列适配器
+        Args:
+            database_adapter: 数据库适配器，用于同步任务状态到 tasks 表。
+                              如果未提供，将自动创建一个实例。
+        Returns:
+            本地模式返回 AsyncTrainingManager
+            服务器模式返回 CeleryTaskQueueAdapter (Phase 2)
+        """
+        if settings.DEPLOYMENT_MODE == "local":
+            from ..adapters.local.task_queue import AsyncTrainingManager
+            from ..adapters.local.database import SQLiteAdapter
+            # 如果未提供 database_adapter，创建一个新实例用于状态同步
+            if database_adapter is None:
+                database_adapter = SQLiteAdapter(db_path=str(settings.SQLITE_PATH))
+            return AsyncTrainingManager(
+                db_path=str(settings.SQLITE_PATH),
+                database_adapter=database_adapter
+            )
+        else:
+            # Phase 2: 服务器模式
+            raise NotImplementedError("Server mode task queue adapter not implemented yet")
+    @staticmethod
+    def create_progress_adapter() -> "ProgressAdapter":
+        """
+        创建进度管理适配器
+        Returns:
+            本地模式返回 LocalProgressAdapter
+            服务器模式返回 RedisProgressAdapter (Phase 2)
+        """
+        if settings.DEPLOYMENT_MODE == "local":
+            from ..adapters.local.progress import LocalProgressAdapter
+            return LocalProgressAdapter()
+        else:
+            # Phase 2: 服务器模式
+            raise NotImplementedError("Server mode progress adapter not implemented yet")
+# ============================================================
+# 全局单例获取函数（使用 lru_cache 缓存实例）
+# ============================================================
+@lru_cache()
+def get_storage_adapter() -> "StorageAdapter":
+    """
+    获取存储适配器单例
+    Returns:
+        StorageAdapter 实例
+    """
+    return AdapterFactory.create_storage_adapter()
+@lru_cache()
+def get_database_adapter() -> "DatabaseAdapter":
+    """
+    获取数据库适配器单例
+    Returns:
+        DatabaseAdapter 实例
+    """
+    return AdapterFactory.create_database_adapter()
+@lru_cache()
+def get_task_queue_adapter() -> "TaskQueueAdapter":
+    """
+    获取任务队列适配器单例
+    使用共享的数据库适配器实例来确保状态同步一致性。
+    Returns:
+        TaskQueueAdapter 实例
+    """
+    # 使用共享的数据库适配器实例
+    db_adapter = get_database_adapter()
+    return AdapterFactory.create_task_queue_adapter(database_adapter=db_adapter)
+@lru_cache()
+def get_progress_adapter() -> "ProgressAdapter":
+    """
+    获���进度管理适配器单例
+    Returns:
+        ProgressAdapter 实例
+    """
+    return AdapterFactory.create_progress_adapter()
+# ============================================================
+# 便捷别名（向后兼容）
+# ============================================================
+# 延迟初始化的全局变量，在首次访问时创建
+# 注意：这些是函数调用的结果，不是直接的实例引用
+# 如果需要在模块级别使用，请调用对应的 get_*_adapter() 函数
+__all__ = [
+    "AdapterFactory",
+    "get_storage_adapter",
+    "get_database_adapter",
+    "get_task_queue_adapter",
+    "get_progress_adapter",
+]

api_server/app/main.py ADDED Viewed

	@@ -0,0 +1,155 @@

+"""
+FastAPI 应用入口
+GPT-SoVITS 音色训练 HTTP API 服务
+启动方式:
+    uvicorn api_server.app.main:app --host 0.0.0.0 --port 8000 --reload
+"""
+from contextlib import asynccontextmanager
+from typing import AsyncGenerator
+from fastapi import FastAPI
+from fastapi.middleware.cors import CORSMiddleware
+from .api.v1.router import api_router
+from .core.config import settings, ensure_data_dirs
+@asynccontextmanager
+async def lifespan(app: FastAPI) -> AsyncGenerator[None, None]:
+    """
+    应用生命周期管理
+    启动时:
+    - 确保数据目录存在
+    - 恢复中断的任务（可选）
+    关闭时:
+    - 清理资源
+    """
+    # 启动时执行
+    print(f"Starting GPT-SoVITS Training API in {settings.DEPLOYMENT_MODE.upper()} mode")
+    print(f"  Project Root: {settings.PROJECT_ROOT}")
+    print(f"  Data Directory: {settings.DATA_DIR}")
+    print(f"  SQLite Path: {settings.SQLITE_PATH}")
+    # 确保数据目录存在
+    ensure_data_dirs()
+    # 恢复中断的任务（可选）
+    if settings.DEPLOYMENT_MODE == "local":
+        try:
+            from .core.adapters import get_task_queue_adapter
+            queue = get_task_queue_adapter()
+            # 检查是否有 recover_pending_tasks 方法
+            if hasattr(queue, 'recover_pending_tasks'):
+                count = await queue.recover_pending_tasks()
+                if count > 0:
+                    print(f"  Recovered {count} pending tasks")
+        except Exception as e:
+            print(f"  Warning: Failed to recover tasks: {e}")
+    print("  API Server ready!")
+    print(f"  Docs: http://{settings.API_HOST}:{settings.API_PORT}/docs")
+    yield
+    # 关闭时执行
+    print("Shutting down GPT-SoVITS Training API...")
+# 创建 FastAPI 应用
+app = FastAPI(
+    title="GPT-SoVITS Training API",
+    description="""
+GPT-SoVITS 音色训练 HTTP API 服务
+## 功能概述
+提供两种训练模式：
+### Quick Mode（小白用户）
+- 上传音频即可训练，系统自动配置所有参数
+- 适合个人开发者、快速验证
+### Advanced Mode（专家用户）
+- 分阶段控制训练流程
+- 精细调整每个阶段的参数
+- 适合需要深度定制的用户
+## API 分组
+- **Quick Mode - 任务管理**: `/api/v1/tasks`
+- **Advanced Mode - 实验管理**: `/api/v1/experiments`
+- **文件管理**: `/api/v1/files`
+- **阶段模板**: `/api/v1/stages`
+""",
+    version="1.0.0",
+    lifespan=lifespan,
+    docs_url="/docs",
+    redoc_url="/redoc",
+    openapi_url="/openapi.json",
+)
+# 配置 CORS
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],  # 生产环境应该限制来源
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+# 注册 API 路由
+app.include_router(api_router, prefix=settings.API_V1_PREFIX)
+# ============================================================
+# 根路由和健康检查
+# ============================================================
+@app.get("/", tags=["Root"])
+async def root():
+    """
+    根路由
+    返回 API 基本信息
+    """
+    return {
+        "name": "GPT-SoVITS Training API",
+        "version": "1.0.0",
+        "mode": settings.DEPLOYMENT_MODE,
+        "docs": "/docs",
+        "health": "/health",
+    }
+@app.get("/health", tags=["Health"])
+async def health_check():
+    """
+    健康检查端点
+    用于容器编排和负载均衡器健康检查
+    """
+    return {
+        "status": "healthy",
+        "mode": settings.DEPLOYMENT_MODE,
+    }
+# ============================================================
+# 开发模式直接运行
+# ============================================================
+if __name__ == "__main__":
+    import uvicorn
+    uvicorn.run(
+        "api_server.app.main:app",
+        host=settings.API_HOST,
+        port=settings.API_PORT,
+        reload=True,
+        reload_dirs=[str(settings.API_SERVER_ROOT)],
+    )

api_server/app/models/__init__.py CHANGED Viewed

@@ -6,4 +6,75 @@
 from .domain import Task, TaskStatus, ProgressInfo
-__all__ = ["Task", "TaskStatus", "ProgressInfo"]

 from .domain import Task, TaskStatus, ProgressInfo
+# Pydantic Schemas
+from .schemas import (
+    # Common
+    SuccessResponse,
+    ErrorResponse,
+    PaginatedResponse,
+    # Task (Quick Mode)
+    QuickModeOptions,
+    QuickModeRequest,
+    TaskResponse,
+    TaskListResponse,
+    # Experiment (Advanced Mode)
+    StageType,
+    ExperimentCreate,
+    ExperimentUpdate,
+    StageStatus,
+    ExperimentResponse,
+    ExperimentListResponse,
+    StageExecuteRequest,
+    AudioSliceParams,
+    ASRParams,
+    TextFeatureParams,
+    HubertFeatureParams,
+    SemanticTokenParams,
+    SoVITSTrainParams,
+    GPTTrainParams,
+    StageExecuteResponse,
+    StagesListResponse,
+    # File
+    FileUploadResponse,
+    FileMetadata,
+    FileListResponse,
+    FileDeleteResponse,
+)
+__all__ = [
+    # Domain models
+    "Task",
+    "TaskStatus",
+    "ProgressInfo",
+    # Common schemas
+    "SuccessResponse",
+    "ErrorResponse",
+    "PaginatedResponse",
+    # Task schemas (Quick Mode)
+    "QuickModeOptions",
+    "QuickModeRequest",
+    "TaskResponse",
+    "TaskListResponse",
+    # Experiment schemas (Advanced Mode)
+    "StageType",
+    "ExperimentCreate",
+    "ExperimentUpdate",
+    "StageStatus",
+    "ExperimentResponse",
+    "ExperimentListResponse",
+    "StageExecuteRequest",
+    "AudioSliceParams",
+    "ASRParams",
+    "TextFeatureParams",
+    "HubertFeatureParams",
+    "SemanticTokenParams",
+    "SoVITSTrainParams",
+    "GPTTrainParams",
+    "StageExecuteResponse",
+    "StagesListResponse",
+    # File schemas
+    "FileUploadResponse",
+    "FileMetadata",
+    "FileListResponse",
+    "FileDeleteResponse",
+]

api_server/app/models/schemas/__init__.py ADDED Viewed

	@@ -0,0 +1,80 @@

+"""
+Pydantic Schema 模块
+包含 API 请求/响应的数据验证模型
+- common: 通用响应模型
+- task: Quick Mode 任务模型
+- experiment: Advanced Mode 实验/阶段模型
+- file: 文件管理模型
+"""
+from .common import (
+    SuccessResponse,
+    ErrorResponse,
+    PaginatedResponse,
+)
+from .task import (
+    QuickModeOptions,
+    QuickModeRequest,
+    TaskResponse,
+    TaskListResponse,
+)
+from .experiment import (
+    StageType,
+    ExperimentCreate,
+    ExperimentUpdate,
+    StageStatus,
+    ExperimentResponse,
+    ExperimentListResponse,
+    StageExecuteRequest,
+    AudioSliceParams,
+    ASRParams,
+    TextFeatureParams,
+    HubertFeatureParams,
+    SemanticTokenParams,
+    SoVITSTrainParams,
+    GPTTrainParams,
+    StageExecuteResponse,
+    StagesListResponse,
+)
+from .file import (
+    FileUploadResponse,
+    FileMetadata,
+    FileListResponse,
+    FileDeleteResponse,
+)
+__all__ = [
+    # Common
+    "SuccessResponse",
+    "ErrorResponse",
+    "PaginatedResponse",
+    # Task (Quick Mode)
+    "QuickModeOptions",
+    "QuickModeRequest",
+    "TaskResponse",
+    "TaskListResponse",
+    # Experiment (Advanced Mode)
+    "StageType",
+    "ExperimentCreate",
+    "ExperimentUpdate",
+    "StageStatus",
+    "ExperimentResponse",
+    "ExperimentListResponse",
+    "StageExecuteRequest",
+    "AudioSliceParams",
+    "ASRParams",
+    "TextFeatureParams",
+    "HubertFeatureParams",
+    "SemanticTokenParams",
+    "SoVITSTrainParams",
+    "GPTTrainParams",
+    "StageExecuteResponse",
+    "StagesListResponse",
+    # File
+    "FileUploadResponse",
+    "FileMetadata",
+    "FileListResponse",
+    "FileDeleteResponse",
+]

api_server/app/models/schemas/common.py ADDED Viewed

	@@ -0,0 +1,95 @@

+"""
+通用响应模型
+定义 API 通用的响应结构
+"""
+from typing import Any, Generic, List, Optional, TypeVar
+from pydantic import BaseModel, Field
+# 泛型类型变量，用于分页响应
+T = TypeVar("T")
+class SuccessResponse(BaseModel):
+    """
+    通用成功响应
+    Example:
+        >>> response = SuccessResponse(message="操作成功")
+        >>> response.model_dump()
+        {'success': True, 'message': '操作成功'}
+    """
+    success: bool = Field(default=True, description="是否成功")
+    message: str = Field(default="操作成功", description="响应消息")
+    model_config = {
+        "json_schema_extra": {
+            "examples": [
+                {"success": True, "message": "操作成功"}
+            ]
+        }
+    }
+class ErrorResponse(BaseModel):
+    """
+    错误响应
+    Example:
+        >>> response = ErrorResponse(message="任务不存在", code="TASK_NOT_FOUND")
+        >>> response.model_dump()
+        {'success': False, 'message': '任务不存在', 'code': 'TASK_NOT_FOUND', 'details': None}
+    """
+    success: bool = Field(default=False, description="是否成功")
+    message: str = Field(..., description="错误消息")
+    code: Optional[str] = Field(default=None, description="错误代码")
+    details: Optional[Any] = Field(default=None, description="错误详情")
+    model_config = {
+        "json_schema_extra": {
+            "examples": [
+                {
+                    "success": False,
+                    "message": "任务不存在",
+                    "code": "TASK_NOT_FOUND",
+                    "details": None
+                }
+            ]
+        }
+    }
+class PaginatedResponse(BaseModel, Generic[T]):
+    """
+    分页响应基类
+    泛型参数 T 表示列表项的类型
+    Example:
+        >>> from typing import List
+        >>> class TaskListResponse(PaginatedResponse[TaskResponse]):
+        ...     pass
+    """
+    items: List[T] = Field(default_factory=list, description="数据列表")
+    total: int = Field(default=0, ge=0, description="总数量")
+    limit: int = Field(default=50, ge=1, le=100, description="每页数量")
+    offset: int = Field(default=0, ge=0, description="偏移量")
+    @property
+    def has_more(self) -> bool:
+        """是否有更多数据"""
+        return self.offset + len(self.items) < self.total
+    model_config = {
+        "json_schema_extra": {
+            "examples": [
+                {
+                    "items": [],
+                    "total": 0,
+                    "limit": 50,
+                    "offset": 0
+                }
+            ]
+        }
+    }

api_server/app/models/schemas/experiment.py ADDED Viewed

	@@ -0,0 +1,556 @@

+"""
+Advanced Mode 实验/阶段 Schema
+专家用户分阶段训练模式的请求/响应模型
+参考文档: development.md 4.6.2
+"""
+from datetime import datetime
+from enum import Enum
+from typing import Any, Dict, List, Literal, Optional
+from pydantic import BaseModel, Field
+# ============================================================
+# 枚举类型
+# ============================================================
+class StageType(str, Enum):
+    """
+    训练阶段类型枚举
+    定义了完整训练流程中的所有阶段
+    """
+    AUDIO_SLICE = "audio_slice"       # 音频切片
+    ASR = "asr"                       # 语音识别
+    TEXT_FEATURE = "text_feature"     # 文本特征提取
+    HUBERT_FEATURE = "hubert_feature" # HuBERT 特征提取
+    SEMANTIC_TOKEN = "semantic_token" # 语义 Token 提取
+    SOVITS_TRAIN = "sovits_train"     # SoVITS 训练
+    GPT_TRAIN = "gpt_train"           # GPT 训练
+# 阶段依赖关系
+STAGE_DEPENDENCIES: Dict[str, List[str]] = {
+    "audio_slice": [],
+    "asr": ["audio_slice"],
+    "text_feature": ["asr"],
+    "hubert_feature": ["audio_slice"],
+    "semantic_token": ["hubert_feature"],
+    "sovits_train": ["text_feature", "semantic_token"],
+    "gpt_train": ["text_feature", "semantic_token"],
+}
+# ============================================================
+# 实验管理
+# ============================================================
+class ExperimentCreate(BaseModel):
+    """
+    创建实验请求
+    创建实验但不立即执行，用户可以逐阶段控制训练流程
+    Attributes:
+        exp_name: 实验名称
+        version: 模型版本
+        gpu_numbers: GPU 编号
+        is_half: 是否使用半精度
+        audio_file_id: 音频文件 ID
+    """
+    exp_name: str = Field(
+        ...,
+        min_length=1,
+        max_length=100,
+        description="实验名称"
+    )
+    version: Literal["v1", "v2", "v2Pro", "v3", "v4"] = Field(
+        default="v2",
+        description="模型版本"
+    )
+    gpu_numbers: str = Field(
+        default="0",
+        description="GPU 编号，多个 GPU 用逗号分隔，如 '0,1'"
+    )
+    is_half: bool = Field(
+        default=True,
+        description="是否使用半精度（FP16），可节省显存"
+    )
+    audio_file_id: str = Field(
+        ...,
+        description="已上传音频文件的 ID"
+    )
+    model_config = {
+        "json_schema_extra": {
+            "examples": [
+                {
+                    "exp_name": "my_voice_custom",
+                    "version": "v2",
+                    "gpu_numbers": "0",
+                    "is_half": True,
+                    "audio_file_id": "550e8400-e29b-41d4-a716-446655440000"
+                }
+            ]
+        }
+    }
+class ExperimentUpdate(BaseModel):
+    """
+    更新实验请求
+    用于更新实验的基础配置（非阶段参数）
+    """
+    exp_name: Optional[str] = Field(
+        default=None,
+        min_length=1,
+        max_length=100,
+        description="实验名称"
+    )
+    gpu_numbers: Optional[str] = Field(
+        default=None,
+        description="GPU 编号"
+    )
+    is_half: Optional[bool] = Field(
+        default=None,
+        description="是否使用半精度"
+    )
+class StageStatus(BaseModel):
+    """
+    阶段状态
+    描述单个阶段的执行状态和结果
+    """
+    stage_type: str = Field(..., description="阶段类型")
+    status: Literal["pending", "running", "completed", "failed", "cancelled"] = Field(
+        default="pending",
+        description="阶段状态"
+    )
+    progress: Optional[float] = Field(
+        default=None,
+        ge=0.0,
+        le=1.0,
+        description="阶段进度 (0.0-1.0)"
+    )
+    message: Optional[str] = Field(
+        default=None,
+        description="状态消息"
+    )
+    started_at: Optional[datetime] = Field(
+        default=None,
+        description="开始时间"
+    )
+    completed_at: Optional[datetime] = Field(
+        default=None,
+        description="完成时间"
+    )
+    config: Optional[Dict[str, Any]] = Field(
+        default=None,
+        description="阶段配置参数"
+    )
+    outputs: Optional[Dict[str, Any]] = Field(
+        default=None,
+        description="阶段输出结果"
+    )
+    error_message: Optional[str] = Field(
+        default=None,
+        description="错误消息（失败时）"
+    )
+    model_config = {
+        "json_schema_extra": {
+            "examples": [
+                {
+                    "stage_type": "sovits_train",
+                    "status": "completed",
+                    "progress": 1.0,
+                    "message": "训练完成",
+                    "started_at": "2024-01-01T10:30:00Z",
+                    "completed_at": "2024-01-01T11:00:00Z",
+                    "config": {"batch_size": 8, "total_epoch": 16},
+                    "outputs": {
+                        "model_path": "logs/my_voice/sovits_e16.pth",
+                        "metrics": {"final_loss": 0.023}
+                    }
+                }
+            ]
+        }
+    }
+class ExperimentResponse(BaseModel):
+    """
+    实验响应
+    包含实验的完整信息和所有阶段状态
+    """
+    id: str = Field(..., description="实验唯一标识")
+    exp_name: str = Field(..., description="实验名称")
+    version: str = Field(..., description="模型版本")
+    status: str = Field(..., description="实验状态")
+    gpu_numbers: str = Field(default="0", description="GPU 编号")
+    is_half: bool = Field(default=True, description="是否使用半精度")
+    audio_file_id: str = Field(..., description="音频文件 ID")
+    stages: Dict[str, StageStatus] = Field(
+        default_factory=dict,
+        description="各阶段状态"
+    )
+    created_at: datetime = Field(..., description="创建时间")
+    updated_at: Optional[datetime] = Field(default=None, description="更新时间")
+    model_config = {
+        "json_schema_extra": {
+            "examples": [
+                {
+                    "id": "exp-abc123",
+                    "exp_name": "my_voice_custom",
+                    "version": "v2",
+                    "status": "created",
+                    "gpu_numbers": "0",
+                    "is_half": True,
+                    "audio_file_id": "550e8400-e29b-41d4-a716-446655440000",
+                    "stages": {
+                        "audio_slice": {"stage_type": "audio_slice", "status": "pending"},
+                        "asr": {"stage_type": "asr", "status": "pending"},
+                        "sovits_train": {"stage_type": "sovits_train", "status": "pending"}
+                    },
+                    "created_at": "2024-01-01T10:00:00Z"
+                }
+            ]
+        }
+    }
+class ExperimentListResponse(BaseModel):
+    """
+    实验列表响应
+    """
+    items: List[ExperimentResponse] = Field(
+        default_factory=list,
+        description="实验列表"
+    )
+    total: int = Field(default=0, ge=0, description="总数量")
+    limit: int = Field(default=50, ge=1, le=100, description="每页数量")
+    offset: int = Field(default=0, ge=0, description="偏移量")
+# ============================================================
+# 阶段执行参数
+# ============================================================
+class StageExecuteRequest(BaseModel):
+    """
+    阶段执行请求基类
+    允许传入任意额外参数
+    """
+    model_config = {
+        "extra": "allow"  # 允许额外字段（阶段特定参数）
+    }
+class AudioSliceParams(StageExecuteRequest):
+    """
+    音频切片参数
+    将长音频切分为短片段
+    参考文档: development.md 4.5.2
+    """
+    threshold: int = Field(
+        default=-34,
+        ge=-60,
+        le=0,
+        description="静音检测阈值 (dB)"
+    )
+    min_length: int = Field(
+        default=4000,
+        ge=1000,
+        le=10000,
+        description="最小切片长度 (ms)"
+    )
+    min_interval: int = Field(
+        default=300,
+        ge=100,
+        le=1000,
+        description="最小静音间隔 (ms)"
+    )
+    hop_size: int = Field(
+        default=10,
+        ge=5,
+        le=50,
+        description="检测步长 (ms)"
+    )
+    max_sil_kept: int = Field(
+        default=500,
+        ge=100,
+        le=2000,
+        description="切片保留的最大静音长度 (ms)"
+    )
+    model_config = {
+        "json_schema_extra": {
+            "examples": [
+                {
+                    "threshold": -34,
+                    "min_length": 4000,
+                    "min_interval": 300,
+                    "hop_size": 10,
+                    "max_sil_kept": 500
+                }
+            ]
+        }
+    }
+class ASRParams(StageExecuteRequest):
+    """
+    ASR 语音识别参数
+    """
+    model: str = Field(
+        default="达摩 ASR (中文)",
+        description="ASR 模型名称"
+    )
+    language: str = Field(
+        default="zh",
+        description="识别语言"
+    )
+    model_config = {
+        "json_schema_extra": {
+            "examples": [
+                {"model": "达摩 ASR (中文)", "language": "zh"}
+            ]
+        }
+    }
+class TextFeatureParams(StageExecuteRequest):
+    """
+    文本特征提取参数
+    """
+    bert_pretrained_dir: Optional[str] = Field(
+        default=None,
+        description="BERT 预训练模型目录，为空使用默认"
+    )
+    model_config = {
+        "json_schema_extra": {
+            "examples": [
+                {"bert_pretrained_dir": None}
+            ]
+        }
+    }
+class HubertFeatureParams(StageExecuteRequest):
+    """
+    HuBERT 特征提取参数
+    """
+    ssl_pretrained_dir: Optional[str] = Field(
+        default=None,
+        description="SSL 预训练模型目录，为空使用默认"
+    )
+    model_config = {
+        "json_schema_extra": {
+            "examples": [
+                {"ssl_pretrained_dir": None}
+            ]
+        }
+    }
+class SemanticTokenParams(StageExecuteRequest):
+    """
+    语义 Token 提取参数
+    """
+    # 当前阶段无特殊参数，保留扩展性
+    pass
+class SoVITSTrainParams(StageExecuteRequest):
+    """
+    SoVITS 训练参数
+    参考文档: development.md 4.5.2
+    """
+    batch_size: int = Field(
+        default=4,
+        ge=1,
+        le=32,
+        description="批次大小，显存不足时减小"
+    )
+    total_epoch: int = Field(
+        default=8,
+        ge=1,
+        le=100,
+        description="训练总轮数"
+    )
+    save_every_epoch: int = Field(
+        default=4,
+        ge=1,
+        description="每 N 轮保存一次模型"
+    )
+    pretrained_s2G: Optional[str] = Field(
+        default=None,
+        description="预训练生成器模型路径，为空使用默认"
+    )
+    pretrained_s2D: Optional[str] = Field(
+        default=None,
+        description="预训练判别器模型路径，为空使用默认"
+    )
+    model_config = {
+        "json_schema_extra": {
+            "examples": [
+                {
+                    "batch_size": 8,
+                    "total_epoch": 16,
+                    "save_every_epoch": 4,
+                    "pretrained_s2G": None,
+                    "pretrained_s2D": None
+                }
+            ]
+        }
+    }
+class GPTTrainParams(StageExecuteRequest):
+    """
+    GPT 训练参数
+    """
+    batch_size: int = Field(
+        default=4,
+        ge=1,
+        le=32,
+        description="批次大小"
+    )
+    total_epoch: int = Field(
+        default=15,
+        ge=1,
+        le=100,
+        description="训练总轮数"
+    )
+    save_every_epoch: int = Field(
+        default=5,
+        ge=1,
+        description="每 N 轮保存一次模型"
+    )
+    pretrained_s1: Optional[str] = Field(
+        default=None,
+        description="预训练模型路径，为空使用默认"
+    )
+    model_config = {
+        "json_schema_extra": {
+            "examples": [
+                {
+                    "batch_size": 4,
+                    "total_epoch": 15,
+                    "save_every_epoch": 5,
+                    "pretrained_s1": None
+                }
+            ]
+        }
+    }
+class StageExecuteResponse(BaseModel):
+    """
+    阶段执行响应
+    """
+    exp_id: str = Field(..., description="实验 ID")
+    stage_type: str = Field(..., description="阶段类型")
+    status: Literal["running", "queued"] = Field(..., description="执行状态")
+    job_id: str = Field(..., description="作业 ID")
+    config: Dict[str, Any] = Field(
+        default_factory=dict,
+        description="阶段配置"
+    )
+    rerun: bool = Field(
+        default=False,
+        description="是否为重新执行"
+    )
+    previous_run: Optional[Dict[str, Any]] = Field(
+        default=None,
+        description="上次执行的信息（重新执行时）"
+    )
+    started_at: datetime = Field(..., description="开始时间")
+    model_config = {
+        "json_schema_extra": {
+            "examples": [
+                {
+                    "exp_id": "exp-abc123",
+                    "stage_type": "sovits_train",
+                    "status": "running",
+                    "job_id": "job-xyz789",
+                    "config": {"batch_size": 8, "total_epoch": 16},
+                    "rerun": False,
+                    "started_at": "2024-01-01T10:30:00Z"
+                }
+            ]
+        }
+    }
+class StagesListResponse(BaseModel):
+    """
+    所有阶段状态响应
+    """
+    exp_id: str = Field(..., description="实验 ID")
+    stages: List[StageStatus] = Field(
+        default_factory=list,
+        description="阶段状态列表"
+    )
+    model_config = {
+        "json_schema_extra": {
+            "examples": [
+                {
+                    "exp_id": "exp-abc123",
+                    "stages": [
+                        {"stage_type": "audio_slice", "status": "completed"},
+                        {"stage_type": "asr", "status": "completed"},
+                        {"stage_type": "sovits_train", "status": "running", "progress": 0.45}
+                    ]
+                }
+            ]
+        }
+    }
+# 阶段参数类型映射
+STAGE_PARAMS_MAP: Dict[str, type] = {
+    "audio_slice": AudioSliceParams,
+    "asr": ASRParams,
+    "text_feature": TextFeatureParams,
+    "hubert_feature": HubertFeatureParams,
+    "semantic_token": SemanticTokenParams,
+    "sovits_train": SoVITSTrainParams,
+    "gpt_train": GPTTrainParams,
+}
+def get_stage_params_class(stage_type: str) -> type:
+    """
+    获取阶段对应的参数类
+    Args:
+        stage_type: 阶段类型
+    Returns:
+        对应的参数 Pydantic 类
+    Raises:
+        ValueError: 无效的阶段类型
+    """
+    if stage_type not in STAGE_PARAMS_MAP:
+        raise ValueError(f"Invalid stage type: {stage_type}")
+    return STAGE_PARAMS_MAP[stage_type]

api_server/app/models/schemas/file.py ADDED Viewed

	@@ -0,0 +1,159 @@

+"""
+文件管理 Schema
+文件上传/下载相关的请求/响应模型
+"""
+from datetime import datetime
+from typing import List, Literal, Optional
+from pydantic import BaseModel, Field
+class FileMetadata(BaseModel):
+    """
+    文件元数据
+    描述已上传文件的详细信息
+    """
+    id: str = Field(..., description="文件唯一标识")
+    filename: str = Field(..., description="原始文件名")
+    content_type: Optional[str] = Field(
+        default=None,
+        description="MIME 类型，如 'audio/wav', 'audio/mp3'"
+    )
+    size_bytes: int = Field(
+        default=0,
+        ge=0,
+        description="文件大小（字节）"
+    )
+    purpose: Optional[Literal["training", "reference", "output"]] = Field(
+        default="training",
+        description="文件用途：training(训练), reference(参考音频), output(输出模型)"
+    )
+    duration_seconds: Optional[float] = Field(
+        default=None,
+        ge=0,
+        description="音频时长（秒），仅音频文件有效"
+    )
+    sample_rate: Optional[int] = Field(
+        default=None,
+        ge=0,
+        description="采样率（Hz），仅音频文件有效"
+    )
+    uploaded_at: datetime = Field(..., description="上传时间")
+    model_config = {
+        "json_schema_extra": {
+            "examples": [
+                {
+                    "id": "550e8400-e29b-41d4-a716-446655440000",
+                    "filename": "my_voice.wav",
+                    "content_type": "audio/wav",
+                    "size_bytes": 15728640,
+                    "purpose": "training",
+                    "duration_seconds": 120.5,
+                    "sample_rate": 44100,
+                    "uploaded_at": "2024-01-01T10:00:00Z"
+                }
+            ]
+        }
+    }
+class FileUploadResponse(BaseModel):
+    """
+    文件上传响应
+    上传成功后返回文件信息
+    """
+    success: bool = Field(default=True, description="是否成功")
+    message: str = Field(default="文件上传成功", description="响应消息")
+    file: FileMetadata = Field(..., description="文件元数据")
+    model_config = {
+        "json_schema_extra": {
+            "examples": [
+                {
+                    "success": True,
+                    "message": "文件上传成功",
+                    "file": {
+                        "id": "550e8400-e29b-41d4-a716-446655440000",
+                        "filename": "my_voice.wav",
+                        "content_type": "audio/wav",
+                        "size_bytes": 15728640,
+                        "purpose": "training",
+                        "uploaded_at": "2024-01-01T10:00:00Z"
+                    }
+                }
+            ]
+        }
+    }
+class FileListResponse(BaseModel):
+    """
+    文件列表响应
+    """
+    items: List[FileMetadata] = Field(
+        default_factory=list,
+        description="文件列表"
+    )
+    total: int = Field(
+        default=0,
+        ge=0,
+        description="总数量"
+    )
+    limit: int = Field(
+        default=50,
+        ge=1,
+        le=100,
+        description="每页数量"
+    )
+    offset: int = Field(
+        default=0,
+        ge=0,
+        description="偏移量"
+    )
+    model_config = {
+        "json_schema_extra": {
+            "examples": [
+                {
+                    "items": [
+                        {
+                            "id": "file-123",
+                            "filename": "voice_1.wav",
+                            "content_type": "audio/wav",
+                            "size_bytes": 5242880,
+                            "purpose": "training",
+                            "uploaded_at": "2024-01-01T10:00:00Z"
+                        }
+                    ],
+                    "total": 1,
+                    "limit": 50,
+                    "offset": 0
+                }
+            ]
+        }
+    }
+class FileDeleteResponse(BaseModel):
+    """
+    文件删除响应
+    """
+    success: bool = Field(default=True, description="是否成功")
+    message: str = Field(default="文件删除成功", description="响应消息")
+    file_id: str = Field(..., description="已删除的文件 ID")
+    model_config = {
+        "json_schema_extra": {
+            "examples": [
+                {
+                    "success": True,
+                    "message": "文件删除成功",
+                    "file_id": "550e8400-e29b-41d4-a716-446655440000"
+                }
+            ]
+        }
+    }

api_server/app/models/schemas/task.py ADDED Viewed

	@@ -0,0 +1,232 @@

+"""
+Quick Mode 任务 Schema
+小白用户一键训练模式的请求/响应模型
+参考文档: development.md 4.6.1 + 4.6.3
+"""
+from datetime import datetime
+from typing import List, Literal, Optional
+from pydantic import BaseModel, Field
+class QuickModeOptions(BaseModel):
+    """
+    Quick Mode 训练选项
+    用于一键训练时的简化参数配置
+    Attributes:
+        version: 模型版本
+        language: 训练语言
+        quality: 训练质量预设
+    质量预设说明:
+        - fast: SoVITS 4 epochs, GPT 8 epochs, ~10分钟
+        - standard: SoVITS 8 epochs, GPT 15 epochs, ~20分钟
+        - high: SoVITS 16 epochs, GPT 30 epochs, ~40分钟
+    """
+    version: Literal["v1", "v2", "v2Pro", "v3", "v4"] = Field(
+        default="v2",
+        description="模型版本"
+    )
+    language: str = Field(
+        default="zh",
+        description="训练语言，如 'zh', 'en', 'ja' 等"
+    )
+    quality: Literal["fast", "standard", "high"] = Field(
+        default="standard",
+        description="训练质量预设：fast(快速)、standard(标准)、high(高质量)"
+    )
+    model_config = {
+        "json_schema_extra": {
+            "examples": [
+                {"version": "v2", "language": "zh", "quality": "standard"}
+            ]
+        }
+    }
+class QuickModeRequest(BaseModel):
+    """
+    小白用户一键训练请求
+    创建一键训练任务，系统自动配置所有参数并执行完整流程:
+    audio_slice -> asr -> text_feature -> hubert_feature -> semantic_token -> sovits_train -> gpt_train
+    Attributes:
+        exp_name: 实验名称（用于标识训练任务）
+        audio_file_id: 已上传音频文件的 ID
+        options: 训练选项
+    """
+    exp_name: str = Field(
+        ...,
+        min_length=1,
+        max_length=100,
+        description="实验名称，用于标识训练任务和生成的模型"
+    )
+    audio_file_id: str = Field(
+        ...,
+        description="已上传音频文件的 ID"
+    )
+    options: QuickModeOptions = Field(
+        default_factory=QuickModeOptions,
+        description="训练选项"
+    )
+    model_config = {
+        "json_schema_extra": {
+            "examples": [
+                {
+                    "exp_name": "my_voice",
+                    "audio_file_id": "550e8400-e29b-41d4-a716-446655440000",
+                    "options": {
+                        "version": "v2",
+                        "language": "zh",
+                        "quality": "standard"
+                    }
+                }
+            ]
+        }
+    }
+class TaskResponse(BaseModel):
+    """
+    任务响应（Quick Mode）
+    返回任务的完整状态信息，包括进度、当前阶段等
+    Attributes:
+        id: 任务唯一标识
+        exp_name: 实验名称
+        status: 任务状态
+        current_stage: 当前执行的阶段
+        progress: 当前阶段进度 (0.0-1.0)
+        overall_progress: 总体进度 (0.0-1.0)
+        message: 最新状态消息
+        error_message: 错误消息（失败时）
+        created_at: 任务创建时间
+        started_at: 任务开始执行时间
+        completed_at: 任务完成时间
+    """
+    id: str = Field(..., description="任务唯一标识")
+    exp_name: str = Field(..., description="实验名称")
+    status: Literal["queued", "running", "completed", "failed", "cancelled", "interrupted"] = Field(
+        ...,
+        description="任务状态"
+    )
+    current_stage: Optional[str] = Field(
+        default=None,
+        description="当前执行的阶段，如 'audio_slice', 'sovits_train' 等"
+    )
+    progress: float = Field(
+        default=0.0,
+        ge=0.0,
+        le=1.0,
+        description="当前阶段进度 (0.0-1.0)"
+    )
+    overall_progress: float = Field(
+        default=0.0,
+        ge=0.0,
+        le=1.0,
+        description="总体进度 (0.0-1.0)"
+    )
+    message: Optional[str] = Field(
+        default=None,
+        description="最新状态消息"
+    )
+    error_message: Optional[str] = Field(
+        default=None,
+        description="错误消息（失败时）"
+    )
+    created_at: Optional[datetime] = Field(
+        default=None,
+        description="任务创建时间"
+    )
+    started_at: Optional[datetime] = Field(
+        default=None,
+        description="任务开始执行时间"
+    )
+    completed_at: Optional[datetime] = Field(
+        default=None,
+        description="任务完成时间"
+    )
+    model_config = {
+        "from_attributes": True,
+        "json_schema_extra": {
+            "examples": [
+                {
+                    "id": "task-550e8400-e29b-41d4-a716-446655440000",
+                    "exp_name": "my_voice",
+                    "status": "running",
+                    "current_stage": "sovits_train",
+                    "progress": 0.45,
+                    "overall_progress": 0.72,
+                    "message": "SoVITS 训练中 Epoch 8/16",
+                    "error_message": None,
+                    "created_at": "2024-01-01T10:00:00Z",
+                    "started_at": "2024-01-01T10:00:05Z",
+                    "completed_at": None
+                }
+            ]
+        }
+    }
+class TaskListResponse(BaseModel):
+    """
+    任务列表响应
+    Attributes:
+        items: 任务列表
+        total: 总数量
+        limit: 每页数量
+        offset: 偏移量
+    """
+    items: List[TaskResponse] = Field(
+        default_factory=list,
+        description="任务列表"
+    )
+    total: int = Field(
+        default=0,
+        ge=0,
+        description="总数量"
+    )
+    limit: int = Field(
+        default=50,
+        ge=1,
+        le=100,
+        description="每页数量"
+    )
+    offset: int = Field(
+        default=0,
+        ge=0,
+        description="偏移量"
+    )
+    model_config = {
+        "json_schema_extra": {
+            "examples": [
+                {
+                    "items": [
+                        {
+                            "id": "task-123",
+                            "exp_name": "voice_1",
+                            "status": "completed",
+                            "current_stage": None,
+                            "progress": 1.0,
+                            "overall_progress": 1.0,
+                            "message": "训练完成"
+                        }
+                    ],
+                    "total": 1,
+                    "limit": 50,
+                    "offset": 0
+                }
+            ]
+        }
+    }

api_server/app/scripts/run_pipeline.py CHANGED Viewed

@@ -238,9 +238,23 @@ def build_pipeline(config: Dict[str, Any]):
     }
     # 按顺序添加阶段
     stages = config.get("stages", [])
-    for stage_config in stages:
-        stage_type = stage_config.get("type")
         if stage_type in stage_builders:
             stage = stage_builders[stage_type](stage_config)
             pipeline.add_stage(stage)

     }
     # 按顺序添加阶段
+    # stages 可以是:
+    # 1. 字符串列表: ["audio_slice", "asr", ...]
+    # 2. 字典列表: [{"type": "audio_slice", "threshold": -30}, ...]
     stages = config.get("stages", [])
+    for stage_item in stages:
+        # 判断是字符串还是字典
+        if isinstance(stage_item, str):
+            stage_type = stage_item
+            stage_config = config  # 使用全局配置作为阶段配置
+        elif isinstance(stage_item, dict):
+            stage_type = stage_item.get("type")
+            # 合并全局配置和阶段特定配置
+            stage_config = {**config, **stage_item}
+        else:
+            emit_log("warning", f"无效的阶段配置类型: {type(stage_item)}")
+            continue
         if stage_type in stage_builders:
             stage = stage_builders[stage_type](stage_config)
             pipeline.add_stage(stage)

api_server/app/services/__init__.py ADDED Viewed

	@@ -0,0 +1,20 @@

+"""
+服务层模块
+业务逻辑层，封装适配器调用，提供高级业务操作。
+服务列表：
+- TaskService: Quick Mode 任务服务
+- ExperimentService: Advanced Mode 实验服务
+- FileService: 文件管理服务
+"""
+from .task_service import TaskService
+from .experiment_service import ExperimentService
+from .file_service import FileService
+__all__ = [
+    "TaskService",
+    "ExperimentService",
+    "FileService",
+]

api_server/app/services/experiment_service.py ADDED Viewed

	@@ -0,0 +1,513 @@

+"""
+Advanced Mode 实验服务
+处理专家模式分阶段训练的业务逻辑
+"""
+import uuid
+from datetime import datetime
+from typing import AsyncGenerator, Dict, List, Optional, Any
+from ..core.adapters import (
+    get_database_adapter,
+    get_task_queue_adapter,
+    get_progress_adapter,
+)
+from ..models.schemas.experiment import (
+    ExperimentCreate,
+    ExperimentUpdate,
+    ExperimentResponse,
+    ExperimentListResponse,
+    StageStatus,
+    StageExecuteResponse,
+    StagesListResponse,
+    STAGE_DEPENDENCIES,
+)
+# 阶段类型列表（按执行顺序）
+STAGE_TYPES = [
+    "audio_slice",
+    "asr",
+    "text_feature",
+    "hubert_feature",
+    "semantic_token",
+    "sovits_train",
+    "gpt_train",
+]
+class ExperimentService:
+    """
+    Advanced Mode 实验服务
+    提供专家模式的分阶段训练管理：
+    - 创建实验
+    - 查询实验/阶段状态
+    - 执行/取消单个阶段
+    - 检查阶段依赖
+    Example:
+        >>> service = ExperimentService()
+        >>> exp = await service.create_experiment(request)
+        >>> await service.execute_stage(exp.id, "audio_slice", {})
+        >>> stages = await service.get_all_stages(exp.id)
+    """
+    def __init__(self):
+        """初始化服务"""
+        self._db = None
+        self._queue = None
+        self._progress = None
+    @property
+    def db(self):
+        """延迟获取数据库适配器"""
+        if self._db is None:
+            self._db = get_database_adapter()
+        return self._db
+    @property
+    def queue(self):
+        """延迟获取任务队列适配器"""
+        if self._queue is None:
+            self._queue = get_task_queue_adapter()
+        return self._queue
+    @property
+    def progress_adapter(self):
+        """延迟获取进度适配器"""
+        if self._progress is None:
+            self._progress = get_progress_adapter()
+        return self._progress
+    async def create_experiment(self, request: ExperimentCreate) -> ExperimentResponse:
+        """
+        创建实验
+        创建实验但不立即执行，用户可以逐阶段控制训练流程。
+        Args:
+            request: 创建实验请求
+        Returns:
+            ExperimentResponse
+        """
+        exp_id = f"exp-{uuid.uuid4().hex[:8]}"
+        experiment_data = {
+            "id": exp_id,
+            "exp_name": request.exp_name,
+            "version": request.version,
+            "gpu_numbers": request.gpu_numbers,
+            "is_half": request.is_half,
+            "audio_file_id": request.audio_file_id,
+            "status": "created",
+        }
+        # 创建实验（会自动创建所有阶段）
+        experiment = await self.db.create_experiment(experiment_data)
+        return self._experiment_to_response(experiment)
+    async def get_experiment(self, exp_id: str) -> Optional[ExperimentResponse]:
+        """
+        获取实验详情
+        Args:
+            exp_id: 实验ID
+        Returns:
+            ExperimentResponse 或 None
+        """
+        experiment = await self.db.get_experiment(exp_id)
+        if not experiment:
+            return None
+        return self._experiment_to_response(experiment)
+    async def list_experiments(
+        self,
+        status: Optional[str] = None,
+        limit: int = 50,
+        offset: int = 0
+    ) -> ExperimentListResponse:
+        """
+        获取实验列表
+        Args:
+            status: 按状态筛选
+            limit: 每页数量
+            offset: 偏移量
+        Returns:
+            ExperimentListResponse
+        """
+        experiments = await self.db.list_experiments(
+            status=status, limit=limit, offset=offset
+        )
+        # 获取每个实验的完整信息（包含 stages）
+        full_experiments = []
+        for exp in experiments:
+            full_exp = await self.db.get_experiment(exp["id"])
+            if full_exp:
+                full_experiments.append(full_exp)
+        return ExperimentListResponse(
+            items=[self._experiment_to_response(e) for e in full_experiments],
+            total=len(experiments),  # TODO: 添加 count 方法
+            limit=limit,
+            offset=offset,
+        )
+    async def update_experiment(
+        self,
+        exp_id: str,
+        request: ExperimentUpdate
+    ) -> Optional[ExperimentResponse]:
+        """
+        更新实验基础配置
+        Args:
+            exp_id: 实验ID
+            request: 更新请求
+        Returns:
+            ExperimentResponse 或 None
+        """
+        updates = {}
+        if request.exp_name is not None:
+            updates["exp_name"] = request.exp_name
+        if request.gpu_numbers is not None:
+            updates["gpu_numbers"] = request.gpu_numbers
+        if request.is_half is not None:
+            updates["is_half"] = request.is_half
+        if not updates:
+            return await self.get_experiment(exp_id)
+        experiment = await self.db.update_experiment(exp_id, updates)
+        if not experiment:
+            return None
+        return self._experiment_to_response(experiment)
+    async def delete_experiment(self, exp_id: str) -> bool:
+        """
+        删除实验
+        Args:
+            exp_id: 实验ID
+        Returns:
+            是否成功删除
+        """
+        # 先取消所有运行中的阶段
+        stages = await self.db.get_all_stages(exp_id)
+        for stage in stages:
+            if stage.get("status") == "running" and stage.get("job_id"):
+                await self.queue.cancel(stage["job_id"])
+        return await self.db.delete_experiment(exp_id)
+    async def check_stage_dependencies(
+        self,
+        exp_id: str,
+        stage_type: str
+    ) -> Dict[str, Any]:
+        """
+        检查阶段依赖是否满足
+        Args:
+            exp_id: 实验ID
+            stage_type: 阶段类型
+        Returns:
+            {"satisfied": bool, "missing": List[str]}
+        """
+        experiment = await self.db.get_experiment(exp_id)
+        if not experiment:
+            return {"satisfied": False, "missing": [], "error": "实验不存在"}
+        dependencies = STAGE_DEPENDENCIES.get(stage_type, [])
+        stages = experiment.get("stages", {})
+        missing = []
+        for dep in dependencies:
+            dep_stage = stages.get(dep, {})
+            if dep_stage.get("status") != "completed":
+                missing.append(dep)
+        return {
+            "satisfied": len(missing) == 0,
+            "missing": missing,
+        }
+    async def execute_stage(
+        self,
+        exp_id: str,
+        stage_type: str,
+        params: Dict[str, Any]
+    ) -> Optional[StageExecuteResponse]:
+        """
+        执行指定阶段
+        Args:
+            exp_id: 实验ID
+            stage_type: 阶段类型
+            params: 阶段参数
+        Returns:
+            StageExecuteResponse 或 None
+        """
+        # 获取实验
+        experiment = await self.db.get_experiment(exp_id)
+        if not experiment:
+            return None
+        stages = experiment.get("stages", {})
+        current_stage = stages.get(stage_type, {})
+        # 检查是否是重新执行
+        is_rerun = current_stage.get("status") == "completed"
+        previous_run = None
+        if is_rerun:
+            previous_run = {
+                "completed_at": current_stage.get("completed_at"),
+                "outputs": current_stage.get("outputs"),
+            }
+        # 构建阶段配置
+        stage_config = {
+            "exp_id": exp_id,
+            "exp_name": experiment["exp_name"],
+            "version": experiment.get("version", "v2"),
+            "gpu_numbers": experiment.get("gpu_numbers", "0"),
+            "is_half": experiment.get("is_half", True),
+            "audio_file_id": experiment.get("audio_file_id"),
+            "stage_type": stage_type,
+            "params": params,
+            # 只执行单个阶段
+            "stages": [stage_type],
+        }
+        # 生成任务ID（用于进度追踪）
+        task_id = f"{exp_id}-{stage_type}-{uuid.uuid4().hex[:4]}"
+        # 入队执行
+        job_id = await self.queue.enqueue(task_id, stage_config)
+        # 更新阶段状态
+        now = datetime.utcnow()
+        await self.db.update_stage(exp_id, stage_type, {
+            "status": "running",
+            "config": params,
+            "job_id": job_id,
+            "started_at": now,
+            "completed_at": None,
+            "error_message": None,
+            "outputs": None,
+            "progress": 0.0,
+        })
+        # 更新实验状态为运行中
+        await self.db.update_experiment(exp_id, {"status": "running"})
+        return StageExecuteResponse(
+            exp_id=exp_id,
+            stage_type=stage_type,
+            status="running",
+            job_id=job_id,
+            config=params,
+            rerun=is_rerun,
+            previous_run=previous_run,
+            started_at=now,
+        )
+    async def get_stage(
+        self,
+        exp_id: str,
+        stage_type: str
+    ) -> Optional[StageStatus]:
+        """
+        获取阶段状态
+        Args:
+            exp_id: 实验ID
+            stage_type: 阶段类型
+        Returns:
+            StageStatus 或 None
+        """
+        stage = await self.db.get_stage(exp_id, stage_type)
+        if not stage:
+            return None
+        return self._stage_to_status(stage)
+    async def get_all_stages(self, exp_id: str) -> Optional[StagesListResponse]:
+        """
+        获取所有阶段状态
+        Args:
+            exp_id: 实验ID
+        Returns:
+            StagesListResponse 或 None
+        """
+        stages = await self.db.get_all_stages(exp_id)
+        if not stages:
+            # 检查实验是否存在
+            experiment = await self.db.get_experiment(exp_id)
+            if not experiment:
+                return None
+            stages = []
+        return StagesListResponse(
+            exp_id=exp_id,
+            stages=[self._stage_to_status(s) for s in stages],
+        )
+    async def cancel_stage(self, exp_id: str, stage_type: str) -> bool:
+        """
+        取消正在执行的阶段
+        Args:
+            exp_id: 实验ID
+            stage_type: 阶段类型
+        Returns:
+            是否成功取消
+        """
+        stage = await self.db.get_stage(exp_id, stage_type)
+        if not stage:
+            return False
+        # 只有运行中的阶段可以取消
+        if stage.get("status") != "running":
+            return False
+        # 取消任务
+        job_id = stage.get("job_id")
+        if job_id:
+            await self.queue.cancel(job_id)
+        # 更新状态
+        await self.db.update_stage(exp_id, stage_type, {
+            "status": "cancelled",
+            "completed_at": datetime.utcnow(),
+            "message": "阶段已取消",
+        })
+        return True
+    async def subscribe_stage_progress(
+        self,
+        exp_id: str,
+        stage_type: str
+    ) -> AsyncGenerator[Dict[str, Any], None]:
+        """
+        订阅阶段进度（SSE 流）
+        Args:
+            exp_id: 实验ID
+            stage_type: 阶段类型
+        Yields:
+            进度信息字典
+        """
+        # 获取阶段信息
+        stage = await self.db.get_stage(exp_id, stage_type)
+        if not stage:
+            yield {"type": "error", "message": "阶段不存在"}
+            return
+        # 如果阶段已结束，直接返回最终状态
+        if stage.get("status") in ("completed", "failed", "cancelled"):
+            yield {
+                "type": "final",
+                "status": stage.get("status"),
+                "message": stage.get("message") or stage.get("error_message"),
+                "progress": stage.get("progress", 0.0),
+                "outputs": stage.get("outputs"),
+            }
+            return
+        # 如果阶段未开始
+        if stage.get("status") == "pending":
+            yield {"type": "info", "message": "阶段尚未开始"}
+            return
+        # 使用任务ID订阅进度
+        # 任务ID格式: {exp_id}-{stage_type}-{random}
+        # 由于我们不知道确切的任务ID，使用 job_id
+        job_id = stage.get("job_id")
+        if not job_id:
+            yield {"type": "error", "message": "无法获取任务ID"}
+            return
+        # 订阅进度
+        # 注意：这里需要根据实际的进度适配器实现来调整
+        # 当前使用 task_id 格式为 "{exp_id}-{stage_type}"
+        task_id = f"{exp_id}-{stage_type}"
+        async for progress in self.progress_adapter.subscribe(task_id):
+            yield progress
+            # 检查是否为终态
+            if progress.get("status") in ("completed", "failed", "cancelled"):
+                break
+    def _experiment_to_response(self, experiment: Dict[str, Any]) -> ExperimentResponse:
+        """将实验数据转换为响应模型"""
+        stages_data = experiment.get("stages", {})
+        stages = {}
+        for stage_type, stage_info in stages_data.items():
+            stages[stage_type] = self._stage_to_status(stage_info)
+        # 解析日期时间
+        created_at = experiment.get("created_at")
+        if isinstance(created_at, str):
+            created_at = datetime.fromisoformat(created_at)
+        elif created_at is None:
+            created_at = datetime.utcnow()
+        updated_at = experiment.get("updated_at")
+        if isinstance(updated_at, str):
+            updated_at = datetime.fromisoformat(updated_at)
+        return ExperimentResponse(
+            id=experiment["id"],
+            exp_name=experiment["exp_name"],
+            version=experiment.get("version", "v2"),
+            status=experiment.get("status", "created"),
+            gpu_numbers=experiment.get("gpu_numbers", "0"),
+            is_half=experiment.get("is_half", True),
+            audio_file_id=experiment.get("audio_file_id", ""),
+            stages=stages,
+            created_at=created_at,
+            updated_at=updated_at,
+        )
+    def _stage_to_status(self, stage: Dict[str, Any]) -> StageStatus:
+        """将阶段数据转换为状态模型"""
+        # 解析日期时间
+        started_at = stage.get("started_at")
+        if isinstance(started_at, str):
+            started_at = datetime.fromisoformat(started_at)
+        completed_at = stage.get("completed_at")
+        if isinstance(completed_at, str):
+            completed_at = datetime.fromisoformat(completed_at)
+        return StageStatus(
+            stage_type=stage.get("stage_type", ""),
+            status=stage.get("status", "pending"),
+            progress=stage.get("progress"),
+            message=stage.get("message"),
+            started_at=started_at,
+            completed_at=completed_at,
+            config=stage.get("config"),
+            outputs=stage.get("outputs"),
+            error_message=stage.get("error_message"),
+        )

api_server/app/services/file_service.py ADDED Viewed

	@@ -0,0 +1,277 @@

+"""
+文件管理服务
+处理文件上传、下载和管理的业务逻辑
+"""
+from datetime import datetime
+from typing import List, Optional, Tuple
+from ..core.adapters import get_database_adapter, get_storage_adapter
+from ..models.schemas.file import (
+    FileMetadata,
+    FileUploadResponse,
+    FileListResponse,
+    FileDeleteResponse,
+)
+class FileService:
+    """
+    文件管理服务
+    提供文件的完整生命周期管理：
+    - 上传文件
+    - 下载文件
+    - 获取元数据
+    - 列出文件
+    - 删除文件
+    Example:
+        >>> service = FileService()
+        >>> result = await service.upload_file(data, "audio.wav", "audio/wav", "training")
+        >>> content = await service.download_file(result.file.id)
+        >>> await service.delete_file(result.file.id)
+    """
+    def __init__(self):
+        """初始化服务"""
+        self._db = None
+        self._storage = None
+    @property
+    def db(self):
+        """延迟获取数据库适配器"""
+        if self._db is None:
+            self._db = get_database_adapter()
+        return self._db
+    @property
+    def storage(self):
+        """延迟获取存储适配器"""
+        if self._storage is None:
+            self._storage = get_storage_adapter()
+        return self._storage
+    async def upload_file(
+        self,
+        file_data: bytes,
+        filename: str,
+        content_type: Optional[str] = None,
+        purpose: str = "training"
+    ) -> FileUploadResponse:
+        """
+        上传文件
+        Args:
+            file_data: 文件二进制数据
+            filename: 原始文件名
+            content_type: MIME 类型
+            purpose: 文件用途 (training, reference, output)
+        Returns:
+            FileUploadResponse
+        """
+        # 构建元数据
+        metadata = {
+            "content_type": content_type,
+            "purpose": purpose,
+            "size_bytes": len(file_data),
+        }
+        # 上传到存储
+        file_id = await self.storage.upload_file(file_data, filename, metadata)
+        # 获取完整元数据（包含音频信息）
+        full_metadata = await self.storage.get_file_metadata(file_id)
+        # 保存到数据库
+        file_record = {
+            "id": file_id,
+            "filename": filename,
+            "content_type": content_type,
+            "size_bytes": len(file_data),
+            "purpose": purpose,
+            "duration_seconds": full_metadata.get("duration_seconds") if full_metadata else None,
+            "sample_rate": full_metadata.get("sample_rate") if full_metadata else None,
+            "uploaded_at": datetime.utcnow().isoformat(),
+        }
+        await self.db.create_file_record(file_record)
+        # 构建响应
+        file_metadata = FileMetadata(
+            id=file_id,
+            filename=filename,
+            content_type=content_type,
+            size_bytes=len(file_data),
+            purpose=purpose,
+            duration_seconds=file_record.get("duration_seconds"),
+            sample_rate=file_record.get("sample_rate"),
+            uploaded_at=datetime.utcnow(),
+        )
+        return FileUploadResponse(
+            success=True,
+            message="文件上传成功",
+            file=file_metadata,
+        )
+    async def download_file(self, file_id: str) -> Optional[Tuple[bytes, str, str]]:
+        """
+        下载文件
+        Args:
+            file_id: 文件ID
+        Returns:
+            (文件数据, 文件名, 内容类型) 或 None
+        """
+        # 检查文件是否存在
+        if not await self.storage.file_exists(file_id):
+            return None
+        # 获取元数据
+        metadata = await self.storage.get_file_metadata(file_id)
+        if not metadata:
+            return None
+        # 下载文件
+        file_data = await self.storage.download_file(file_id)
+        return (
+            file_data,
+            metadata.get("filename", "file"),
+            metadata.get("content_type", "application/octet-stream"),
+        )
+    async def get_file(self, file_id: str) -> Optional[FileMetadata]:
+        """
+        获取文件元数据
+        Args:
+            file_id: 文件ID
+        Returns:
+            FileMetadata 或 None
+        """
+        # 从数据库获取
+        record = await self.db.get_file_record(file_id)
+        if record:
+            return self._record_to_metadata(record)
+        # 尝试从存储获取
+        metadata = await self.storage.get_file_metadata(file_id)
+        if metadata:
+            return self._storage_metadata_to_file_metadata(metadata)
+        return None
+    async def list_files(
+        self,
+        purpose: Optional[str] = None,
+        limit: int = 50,
+        offset: int = 0
+    ) -> FileListResponse:
+        """
+        获取文件列表
+        Args:
+            purpose: 按用途筛选
+            limit: 每页数量
+            offset: 偏移量
+        Returns:
+            FileListResponse
+        """
+        # 从数据库获取
+        records = await self.db.list_file_records(
+            purpose=purpose, limit=limit, offset=offset
+        )
+        total = await self.db.count_file_records(purpose=purpose)
+        return FileListResponse(
+            items=[self._record_to_metadata(r) for r in records],
+            total=total,
+            limit=limit,
+            offset=offset,
+        )
+    async def delete_file(self, file_id: str) -> FileDeleteResponse:
+        """
+        删除文件
+        Args:
+            file_id: 文件ID
+        Returns:
+            FileDeleteResponse
+        """
+        # 从存储删除
+        storage_deleted = await self.storage.delete_file(file_id)
+        # 从数据库删除
+        db_deleted = await self.db.delete_file_record(file_id)
+        if storage_deleted or db_deleted:
+            return FileDeleteResponse(
+                success=True,
+                message="文件删除成功",
+                file_id=file_id,
+            )
+        else:
+            return FileDeleteResponse(
+                success=False,
+                message="文件不存在或已删除",
+                file_id=file_id,
+            )
+    async def file_exists(self, file_id: str) -> bool:
+        """
+        检查文件是否存在
+        Args:
+            file_id: 文件ID
+        Returns:
+            是否存在
+        """
+        return await self.storage.file_exists(file_id)
+    def _record_to_metadata(self, record: dict) -> FileMetadata:
+        """将数据库记录转换为 FileMetadata"""
+        uploaded_at = record.get("uploaded_at")
+        if isinstance(uploaded_at, str):
+            uploaded_at = datetime.fromisoformat(uploaded_at)
+        elif uploaded_at is None:
+            uploaded_at = datetime.utcnow()
+        return FileMetadata(
+            id=record["id"],
+            filename=record["filename"],
+            content_type=record.get("content_type"),
+            size_bytes=record.get("size_bytes", 0),
+            purpose=record.get("purpose", "training"),
+            duration_seconds=record.get("duration_seconds"),
+            sample_rate=record.get("sample_rate"),
+            uploaded_at=uploaded_at,
+        )
+    def _storage_metadata_to_file_metadata(self, metadata: dict) -> FileMetadata:
+        """将存储元数据转换为 FileMetadata"""
+        uploaded_at = metadata.get("uploaded_at")
+        if isinstance(uploaded_at, str):
+            uploaded_at = datetime.fromisoformat(uploaded_at)
+        elif uploaded_at is None:
+            uploaded_at = datetime.utcnow()
+        return FileMetadata(
+            id=metadata.get("id", ""),
+            filename=metadata.get("filename", ""),
+            content_type=metadata.get("content_type"),
+            size_bytes=metadata.get("size_bytes", 0),
+            purpose=metadata.get("purpose", "training"),
+            duration_seconds=metadata.get("duration_seconds"),
+            sample_rate=metadata.get("sample_rate"),
+            uploaded_at=uploaded_at,
+        )

api_server/app/services/task_service.py ADDED Viewed

	@@ -0,0 +1,322 @@

+"""
+Quick Mode 任务服务
+处理一键训练任务的业务逻辑
+"""
+import uuid
+from datetime import datetime
+from typing import AsyncGenerator, Dict, List, Optional, Any
+from ..core.adapters import get_database_adapter, get_task_queue_adapter, get_storage_adapter
+from ..core.config import settings
+from ..models.domain import Task, TaskStatus
+from ..models.schemas.task import (
+    QuickModeRequest,
+    TaskResponse,
+    TaskListResponse,
+)
+# 质量预设配置
+QUALITY_PRESETS = {
+    "fast": {
+        "sovits_epochs": 4,
+        "gpt_epochs": 8,
+        "description": "快速训练，约10分钟",
+    },
+    "standard": {
+        "sovits_epochs": 8,
+        "gpt_epochs": 15,
+        "description": "标准训练，约20分钟",
+    },
+    "high": {
+        "sovits_epochs": 16,
+        "gpt_epochs": 30,
+        "description": "高质量训练，约40分钟",
+    },
+}
+class TaskService:
+    """
+    Quick Mode 任务服务
+    提供一键训练任务的完整生命周期管理：
+    - 创建任务
+    - 查询任务状态
+    - 取消任务
+    - 订阅进度更新
+    Example:
+        >>> service = TaskService()
+        >>> task = await service.create_quick_task(request)
+        >>> status = await service.get_task(task.id)
+        >>> await service.cancel_task(task.id)
+    """
+    def __init__(self):
+        """初始化服务"""
+        self._db = None
+        self._queue = None
+        self._storage = None
+    @property
+    def db(self):
+        """延迟获取数据库适配器"""
+        if self._db is None:
+            self._db = get_database_adapter()
+        return self._db
+    @property
+    def queue(self):
+        """延迟获取任务队列适配器"""
+        if self._queue is None:
+            self._queue = get_task_queue_adapter()
+        return self._queue
+    @property
+    def storage(self):
+        """延迟获取存储适配器"""
+        if self._storage is None:
+            self._storage = get_storage_adapter()
+        return self._storage
+    async def check_exp_name_exists(self, exp_name: str) -> bool:
+        """
+        检查实验名称是否已存在
+        Args:
+            exp_name: 实验名称
+        Returns:
+            如果存在返回 True，否则返回 False
+        """
+        existing_task = await self.db.get_task_by_exp_name(exp_name)
+        return existing_task is not None
+    async def validate_audio_file(self, audio_file_id: str) -> tuple[bool, str]:
+        """
+        验证音频文件是否存在
+        Args:
+            audio_file_id: 音频文件 ID 或路径
+        Returns:
+            (是否存在, 实际文件路径)
+        """
+        import os
+        # 尝试获取文件元数据
+        file_metadata = await self.storage.get_file_metadata(audio_file_id)
+        if file_metadata:
+            # 文件存储在 storage.base_path / file_id
+            audio_file_path = str(self.storage.base_path / audio_file_id)
+            exists = os.path.exists(audio_file_path)
+            return exists, audio_file_path
+        else:
+            # 如果找不到元数据，将 audio_file_id 当作路径
+            exists = os.path.exists(audio_file_id)
+            return exists, audio_file_id
+    async def create_quick_task(self, request: QuickModeRequest) -> TaskResponse:
+        """
+        创建一键训练任务
+        根据请求参数和质量预设，自动配置训练参数并创建任务。
+        Args:
+            request: 快速模式请求
+        Returns:
+            TaskResponse: 任务响应
+        """
+        # 生成任务ID
+        task_id = f"task-{uuid.uuid4().hex[:12]}"
+        # 获取质量预设
+        quality = request.options.quality
+        preset = QUALITY_PRESETS.get(quality, QUALITY_PRESETS["standard"])
+        # 验证并解析音频文件路径
+        audio_file_id = request.audio_file_id
+        _, audio_file_path = await self.validate_audio_file(audio_file_id)
+        # 构建任务配置
+        config = {
+            "exp_name": request.exp_name,
+            "audio_file_id": audio_file_id,
+            "input_path": audio_file_path,  # 音频文件的实际路径
+            "version": request.options.version,
+            "language": request.options.language,
+            "quality": quality,
+            # 训练参数
+            "total_epoch": preset["sovits_epochs"],  # SoVITS epoch
+            "sovits_epochs": preset["sovits_epochs"],
+            "gpt_epochs": preset["gpt_epochs"],
+            # 预训练模型路径
+            "bert_pretrained_dir": str(settings.BERT_PRETRAINED_DIR),
+            "ssl_pretrained_dir": str(settings.SSL_PRETRAINED_DIR),
+            "pretrained_s2G": str(settings.PRETRAINED_S2G),
+            "pretrained_s2D": str(settings.PRETRAINED_S2D),
+            "pretrained_s1": str(settings.PRETRAINED_S1),
+            # 执行完整流程
+            "stages": [
+                "audio_slice",
+                "asr",
+                "text_feature",
+                "hubert_feature",
+                "semantic_token",
+                "sovits_train",
+                "gpt_train",
+            ],
+        }
+        # 创建 Task 领域模型
+        task = Task(
+            id=task_id,
+            exp_name=request.exp_name,
+            config=config,
+            status=TaskStatus.QUEUED,
+            created_at=datetime.utcnow(),
+        )
+        # 保存到数据库
+        await self.db.create_task(task)
+        # 入队执行
+        job_id = await self.queue.enqueue(task_id, config)
+        # 更新 job_id
+        await self.db.update_task(task_id, {"job_id": job_id})
+        task.job_id = job_id
+        return self._task_to_response(task)
+    async def get_task(self, task_id: str) -> Optional[TaskResponse]:
+        """
+        获取任务详情
+        Args:
+            task_id: 任务ID
+        Returns:
+            TaskResponse 或 None（不存在时）
+        """
+        task = await self.db.get_task(task_id)
+        if not task:
+            return None
+        return self._task_to_response(task)
+    async def list_tasks(
+        self,
+        status: Optional[str] = None,
+        limit: int = 50,
+        offset: int = 0
+    ) -> TaskListResponse:
+        """
+        获取任务列表
+        Args:
+            status: 按状态筛选
+            limit: 每页数量
+            offset: 偏移量
+        Returns:
+            TaskListResponse
+        """
+        tasks = await self.db.list_tasks(status=status, limit=limit, offset=offset)
+        total = await self.db.count_tasks(status=status)
+        return TaskListResponse(
+            items=[self._task_to_response(t) for t in tasks],
+            total=total,
+            limit=limit,
+            offset=offset,
+        )
+    async def cancel_task(self, task_id: str) -> bool:
+        """
+        取消任务
+        Args:
+            task_id: 任务ID
+        Returns:
+            是否成功取消
+        """
+        # 获取任务
+        task = await self.db.get_task(task_id)
+        if not task:
+            return False
+        # 只有排队中或运行中的任务可以取消
+        if task.status not in (TaskStatus.QUEUED, TaskStatus.RUNNING):
+            return False
+        # 如果有 job_id，尝试取消队列任务
+        if task.job_id:
+            await self.queue.cancel(task.job_id)
+        # 更新状态
+        await self.db.update_task(task_id, {
+            "status": TaskStatus.CANCELLED,
+            "completed_at": datetime.utcnow(),
+            "message": "任务已取消",
+        })
+        return True
+    async def subscribe_progress(
+        self,
+        task_id: str
+    ) -> AsyncGenerator[Dict[str, Any], None]:
+        """
+        订阅任务进度（SSE 流）
+        Args:
+            task_id: 任务ID
+        Yields:
+            进度信息字典
+        """
+        # 检查任务是否存在
+        task = await self.db.get_task(task_id)
+        if not task:
+            yield {"type": "error", "message": "任务不存在"}
+            return
+        # 如果任务已结束，直接返回最终状态
+        if task.status in (TaskStatus.COMPLETED, TaskStatus.FAILED, TaskStatus.CANCELLED):
+            yield {
+                "type": "final",
+                "status": task.status.value,
+                "message": task.message or task.error_message,
+                "progress": task.progress,
+            }
+            return
+        # 订阅进度更新
+        async for progress in self.queue.subscribe_progress(task_id):
+            yield progress
+            # 检查是否为终态
+            if progress.get("status") in ("completed", "failed", "cancelled"):
+                break
+    def _task_to_response(self, task: Task) -> TaskResponse:
+        """将 Task 领域模型转换为 TaskResponse"""
+        return TaskResponse(
+            id=task.id,
+            exp_name=task.exp_name,
+            status=task.status.value if isinstance(task.status, TaskStatus) else task.status,
+            current_stage=task.current_stage,
+            progress=task.stage_progress,
+            overall_progress=task.progress,
+            message=task.message,
+            error_message=task.error_message,
+            created_at=task.created_at,
+            started_at=task.started_at,
+            completed_at=task.completed_at,
+        )