Spaces:

ricebug
/

korg

Paused

App Files Files Community

ricebug commited on Mar 4

Commit

6e18b6a

verified ·

1 Parent(s): 3428636

Upload 126 files

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

Dockerfile +76 -0
app/.DS_Store +0 -0
app/api/pages/__init__.py +13 -0
app/api/pages/admin.py +32 -0
app/api/pages/public.py +51 -0
app/api/v1/admin_api/__init__.py +15 -0
app/api/v1/admin_api/cache.py +445 -0
app/api/v1/admin_api/config.py +53 -0
app/api/v1/admin_api/token.py +395 -0
app/api/v1/chat.py +862 -0
app/api/v1/files.py +69 -0
app/api/v1/image.py +452 -0
app/api/v1/models.py +28 -0
app/api/v1/public_api/__init__.py +18 -0
app/api/v1/public_api/imagine.py +505 -0
app/api/v1/public_api/video.py +274 -0
app/api/v1/public_api/voice.py +80 -0
app/api/v1/response.py +81 -0
app/api/v1/video.py +3 -0
app/core/auth.py +198 -0
app/core/batch.py +233 -0
app/core/config.py +326 -0
app/core/exceptions.py +232 -0
app/core/logger.py +151 -0
app/core/response_middleware.py +85 -0
app/core/storage.py +1478 -0
app/services/cf_refresh/README.md +49 -0
app/services/cf_refresh/__init__.py +5 -0
app/services/cf_refresh/config.py +41 -0
app/services/cf_refresh/scheduler.py +98 -0
app/services/cf_refresh/solver.py +122 -0
app/services/grok/batch_services/assets.py +234 -0
app/services/grok/batch_services/nsfw.py +112 -0
app/services/grok/batch_services/usage.py +89 -0
app/services/grok/defaults.py +34 -0
app/services/grok/services/chat.py +1115 -0
app/services/grok/services/image.py +794 -0
app/services/grok/services/image_edit.py +567 -0
app/services/grok/services/model.py +270 -0
app/services/grok/services/responses.py +824 -0
app/services/grok/services/video.py +688 -0
app/services/grok/services/voice.py +31 -0
app/services/grok/utils/cache.py +110 -0
app/services/grok/utils/download.py +298 -0
app/services/grok/utils/locks.py +86 -0
app/services/grok/utils/process.py +152 -0
app/services/grok/utils/response.py +144 -0
app/services/grok/utils/retry.py +66 -0
app/services/grok/utils/stream.py +46 -0
app/services/grok/utils/tool_call.py +319 -0

Dockerfile ADDED Viewed

	@@ -0,0 +1,76 @@

+FROM python:3.13-alpine AS builder
+ENV PYTHONDONTWRITEBYTECODE=1 \
+    PYTHONUNBUFFERED=1 \
+    TZ=Asia/Shanghai \
+    # 把 uv 包安装到系统 Python 环境
+    UV_PROJECT_ENVIRONMENT=/opt/venv
+# 确保 uv 的 bin 目录
+ENV PATH="$UV_PROJECT_ENVIRONMENT/bin:$PATH"
+RUN apk add --no-cache \
+    tzdata \
+    ca-certificates \
+    build-base \
+    linux-headers \
+    libffi-dev \
+    openssl-dev \
+    curl-dev \
+    cargo \
+    rust
+WORKDIR /app
+# 安装 uv
+COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /bin/
+COPY pyproject.toml uv.lock ./
+RUN uv sync --frozen --no-dev --no-install-project \
+    && find /opt/venv -type d -name "__pycache__" -prune -exec rm -rf {} + \
+    && find /opt/venv -type f -name "*.pyc" -delete \
+    && find /opt/venv -type d -name "tests" -prune -exec rm -rf {} + \
+    && find /opt/venv -type d -name "test" -prune -exec rm -rf {} + \
+    && find /opt/venv -type d -name "testing" -prune -exec rm -rf {} + \
+    && find /opt/venv -type f -name "*.so" -exec strip --strip-unneeded {} + || true \
+    && rm -rf /root/.cache /tmp/uv-cache
+FROM python:3.13-alpine
+ENV PYTHONDONTWRITEBYTECODE=1 \
+    PYTHONUNBUFFERED=1 \
+    TZ=Asia/Shanghai \
+    VIRTUAL_ENV=/opt/venv
+ENV PATH="$VIRTUAL_ENV/bin:$PATH"
+RUN apk add --no-cache \
+    tzdata \
+    ca-certificates \
+    libffi \
+    openssl \
+    libgcc \
+    libstdc++ \
+    libcurl
+WORKDIR /app
+COPY --from=builder /opt/venv /opt/venv
+COPY config.defaults.toml ./
+COPY app ./app
+COPY main.py ./
+COPY scripts ./scripts
+RUN mkdir -p /app/data /app/logs \
+    && chmod +x /app/scripts/entrypoint.sh
+RUN chmod +x /app/scripts/entrypoint.sh
+RUN chmod +x /app/scripts/init_storage.sh
+EXPOSE 7860
+ENTRYPOINT ["/app/scripts/entrypoint.sh"]
+CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]

app/.DS_Store ADDED Viewed

Binary file (6.15 kB). View file

app/api/pages/__init__.py ADDED Viewed

	@@ -0,0 +1,13 @@

+"""UI pages router."""
+from fastapi import APIRouter
+from app.api.pages.admin import router as admin_router
+from app.api.pages.public import router as public_router
+router = APIRouter()
+router.include_router(public_router)
+router.include_router(admin_router)
+__all__ = ["router"]

app/api/pages/admin.py ADDED Viewed

	@@ -0,0 +1,32 @@

+from pathlib import Path
+from fastapi import APIRouter
+from fastapi.responses import FileResponse, RedirectResponse
+router = APIRouter()
+STATIC_DIR = Path(__file__).resolve().parents[2] / "static"
+@router.get("/admin", include_in_schema=False)
+async def admin_root():
+    return RedirectResponse(url="/admin/login")
+@router.get("/admin/login", include_in_schema=False)
+async def admin_login():
+    return FileResponse(STATIC_DIR / "admin/pages/login.html")
+@router.get("/admin/config", include_in_schema=False)
+async def admin_config():
+    return FileResponse(STATIC_DIR / "admin/pages/config.html")
+@router.get("/admin/cache", include_in_schema=False)
+async def admin_cache():
+    return FileResponse(STATIC_DIR / "admin/pages/cache.html")
+@router.get("/admin/token", include_in_schema=False)
+async def admin_token():
+    return FileResponse(STATIC_DIR / "admin/pages/token.html")

app/api/pages/public.py ADDED Viewed

	@@ -0,0 +1,51 @@

+from pathlib import Path
+from fastapi import APIRouter, HTTPException
+from fastapi.responses import FileResponse, RedirectResponse
+from app.core.auth import is_public_enabled
+router = APIRouter()
+STATIC_DIR = Path(__file__).resolve().parents[2] / "static"
+@router.get("/", include_in_schema=False)
+async def root():
+    if is_public_enabled():
+        return RedirectResponse(url="/login")
+    return RedirectResponse(url="/admin/login")
+@router.get("/login", include_in_schema=False)
+async def public_login():
+    if not is_public_enabled():
+        raise HTTPException(status_code=404, detail="Not Found")
+    return FileResponse(STATIC_DIR / "public/pages/login.html")
+@router.get("/imagine", include_in_schema=False)
+async def public_imagine():
+    if not is_public_enabled():
+        raise HTTPException(status_code=404, detail="Not Found")
+    return FileResponse(STATIC_DIR / "public/pages/imagine.html")
+@router.get("/voice", include_in_schema=False)
+async def public_voice():
+    if not is_public_enabled():
+        raise HTTPException(status_code=404, detail="Not Found")
+    return FileResponse(STATIC_DIR / "public/pages/voice.html")
+@router.get("/video", include_in_schema=False)
+async def public_video():
+    if not is_public_enabled():
+        raise HTTPException(status_code=404, detail="Not Found")
+    return FileResponse(STATIC_DIR / "public/pages/video.html")
+@router.get("/chat", include_in_schema=False)
+async def public_chat():
+    if not is_public_enabled():
+        raise HTTPException(status_code=404, detail="Not Found")
+    return FileResponse(STATIC_DIR / "public/pages/chat.html")

app/api/v1/admin_api/__init__.py ADDED Viewed

	@@ -0,0 +1,15 @@

+"""Admin API router (app_key protected)."""
+from fastapi import APIRouter
+from app.api.v1.admin_api.cache import router as cache_router
+from app.api.v1.admin_api.config import router as config_router
+from app.api.v1.admin_api.token import router as tokens_router
+router = APIRouter()
+router.include_router(config_router)
+router.include_router(tokens_router)
+router.include_router(cache_router)
+__all__ = ["router"]

app/api/v1/admin_api/cache.py ADDED Viewed

	@@ -0,0 +1,445 @@

+from typing import List
+from fastapi import APIRouter, Depends, HTTPException, Query, Request
+from app.core.auth import verify_app_key
+from app.core.batch import create_task, expire_task
+from app.services.grok.batch_services.assets import ListService, DeleteService
+from app.services.token.manager import get_token_manager
+router = APIRouter()
+@router.get("/cache", dependencies=[Depends(verify_app_key)])
+async def cache_stats(request: Request):
+    """获取缓存统计"""
+    from app.services.grok.utils.cache import CacheService
+    try:
+        cache_service = CacheService()
+        image_stats = cache_service.get_stats("image")
+        video_stats = cache_service.get_stats("video")
+        mgr = await get_token_manager()
+        pools = mgr.pools
+        accounts = []
+        for pool_name, pool in pools.items():
+            for info in pool.list():
+                raw_token = (
+                    info.token[4:] if info.token.startswith("sso=") else info.token
+                )
+                masked = (
+                    f"{raw_token[:8]}...{raw_token[-16:]}"
+                    if len(raw_token) > 24
+                    else raw_token
+                )
+                accounts.append(
+                    {
+                        "token": raw_token,
+                        "token_masked": masked,
+                        "pool": pool_name,
+                        "status": info.status,
+                        "last_asset_clear_at": info.last_asset_clear_at,
+                    }
+                )
+        scope = request.query_params.get("scope")
+        selected_token = request.query_params.get("token")
+        tokens_param = request.query_params.get("tokens")
+        selected_tokens = []
+        if tokens_param:
+            selected_tokens = [t.strip() for t in tokens_param.split(",") if t.strip()]
+        online_stats = {
+            "count": 0,
+            "status": "unknown",
+            "token": None,
+            "last_asset_clear_at": None,
+        }
+        online_details = []
+        account_map = {a["token"]: a for a in accounts}
+        if selected_tokens:
+            total = 0
+            raw_results = await ListService.fetch_assets_details(
+                selected_tokens,
+                account_map,
+            )
+            for token, res in raw_results.items():
+                if res.get("ok"):
+                    data = res.get("data", {})
+                    detail = data.get("detail")
+                    total += data.get("count", 0)
+                else:
+                    account = account_map.get(token)
+                    detail = {
+                        "token": token,
+                        "token_masked": account["token_masked"] if account else token,
+                        "count": 0,
+                        "status": f"error: {res.get('error')}",
+                        "last_asset_clear_at": account["last_asset_clear_at"]
+                        if account
+                        else None,
+                    }
+                if detail:
+                    online_details.append(detail)
+            online_stats = {
+                "count": total,
+                "status": "ok" if selected_tokens else "no_token",
+                "token": None,
+                "last_asset_clear_at": None,
+            }
+            scope = "selected"
+        elif scope == "all":
+            total = 0
+            tokens = list(dict.fromkeys([account["token"] for account in accounts]))
+            raw_results = await ListService.fetch_assets_details(
+                tokens,
+                account_map,
+            )
+            for token, res in raw_results.items():
+                if res.get("ok"):
+                    data = res.get("data", {})
+                    detail = data.get("detail")
+                    total += data.get("count", 0)
+                else:
+                    account = account_map.get(token)
+                    detail = {
+                        "token": token,
+                        "token_masked": account["token_masked"] if account else token,
+                        "count": 0,
+                        "status": f"error: {res.get('error')}",
+                        "last_asset_clear_at": account["last_asset_clear_at"]
+                        if account
+                        else None,
+                    }
+                if detail:
+                    online_details.append(detail)
+            online_stats = {
+                "count": total,
+                "status": "ok" if accounts else "no_token",
+                "token": None,
+                "last_asset_clear_at": None,
+            }
+        else:
+            token = selected_token
+            if token:
+                raw_results = await ListService.fetch_assets_details(
+                    [token],
+                    account_map,
+                )
+                res = raw_results.get(token, {})
+                data = res.get("data", {})
+                detail = data.get("detail") if res.get("ok") else None
+                if detail:
+                    online_stats = {
+                        "count": data.get("count", 0),
+                        "status": detail.get("status", "ok"),
+                        "token": detail.get("token"),
+                        "token_masked": detail.get("token_masked"),
+                        "last_asset_clear_at": detail.get("last_asset_clear_at"),
+                    }
+                else:
+                    match = next((a for a in accounts if a["token"] == token), None)
+                    online_stats = {
+                        "count": 0,
+                        "status": f"error: {res.get('error')}",
+                        "token": token,
+                        "token_masked": match["token_masked"] if match else token,
+                        "last_asset_clear_at": match["last_asset_clear_at"]
+                        if match
+                        else None,
+                    }
+            else:
+                online_stats = {
+                    "count": 0,
+                    "status": "not_loaded",
+                    "token": None,
+                    "last_asset_clear_at": None,
+                }
+        response = {
+            "local_image": image_stats,
+            "local_video": video_stats,
+            "online": online_stats,
+            "online_accounts": accounts,
+            "online_scope": scope or "none",
+            "online_details": online_details,
+        }
+        return response
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
+@router.get("/cache/list", dependencies=[Depends(verify_app_key)])
+async def list_local(
+    cache_type: str = "image",
+    type_: str = Query(default=None, alias="type"),
+    page: int = 1,
+    page_size: int = 1000,
+):
+    """列出本地缓存文件"""
+    from app.services.grok.utils.cache import CacheService
+    try:
+        if type_:
+            cache_type = type_
+        cache_service = CacheService()
+        result = cache_service.list_files(cache_type, page, page_size)
+        return {"status": "success", **result}
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
+@router.post("/cache/clear", dependencies=[Depends(verify_app_key)])
+async def clear_local(data: dict):
+    """清理本地缓存"""
+    from app.services.grok.utils.cache import CacheService
+    cache_type = data.get("type", "image")
+    try:
+        cache_service = CacheService()
+        result = cache_service.clear(cache_type)
+        return {"status": "success", "result": result}
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
+@router.post("/cache/item/delete", dependencies=[Depends(verify_app_key)])
+async def delete_local_item(data: dict):
+    """删除单个本地缓存文件"""
+    from app.services.grok.utils.cache import CacheService
+    cache_type = data.get("type", "image")
+    name = data.get("name")
+    if not name:
+        raise HTTPException(status_code=400, detail="Missing file name")
+    try:
+        cache_service = CacheService()
+        result = cache_service.delete_file(cache_type, name)
+        return {"status": "success", "result": result}
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
+@router.post("/cache/online/clear", dependencies=[Depends(verify_app_key)])
+async def clear_online(data: dict):
+    """清理在线缓存"""
+    try:
+        mgr = await get_token_manager()
+        tokens = data.get("tokens")
+        if isinstance(tokens, list):
+            token_list = [t.strip() for t in tokens if isinstance(t, str) and t.strip()]
+            if not token_list:
+                raise HTTPException(status_code=400, detail="No tokens provided")
+            token_list = list(dict.fromkeys(token_list))
+            results = {}
+            raw_results = await DeleteService.clear_assets(
+                token_list,
+                mgr,
+            )
+            for token, res in raw_results.items():
+                if res.get("ok"):
+                    results[token] = res.get("data", {})
+                else:
+                    results[token] = {"status": "error", "error": res.get("error")}
+            return {"status": "success", "results": results}
+        token = data.get("token") or mgr.get_token()
+        if not token:
+            raise HTTPException(
+                status_code=400, detail="No available token to perform cleanup"
+            )
+        raw_results = await DeleteService.clear_assets(
+            [token],
+            mgr,
+        )
+        res = raw_results.get(token, {})
+        data = res.get("data", {})
+        if res.get("ok") and data.get("status") == "success":
+            return {"status": "success", "result": data.get("result")}
+        return {"status": "error", "error": data.get("error") or res.get("error")}
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
+@router.post("/cache/online/clear/async", dependencies=[Depends(verify_app_key)])
+async def clear_online_async(data: dict):
+    """清理在线缓存（异步批量 + SSE 进度）"""
+    mgr = await get_token_manager()
+    tokens = data.get("tokens")
+    if not isinstance(tokens, list):
+        raise HTTPException(status_code=400, detail="No tokens provided")
+    token_list = [t.strip() for t in tokens if isinstance(t, str) and t.strip()]
+    if not token_list:
+        raise HTTPException(status_code=400, detail="No tokens provided")
+    task = create_task(len(token_list))
+    async def _run():
+        try:
+            async def _on_item(item: str, res: dict):
+                ok = bool(res.get("data", {}).get("ok"))
+                task.record(ok)
+            raw_results = await DeleteService.clear_assets(
+                token_list,
+                mgr,
+                include_ok=True,
+                on_item=_on_item,
+                should_cancel=lambda: task.cancelled,
+            )
+            if task.cancelled:
+                task.finish_cancelled()
+                return
+            results = {}
+            ok_count = 0
+            fail_count = 0
+            for token, res in raw_results.items():
+                data = res.get("data", {})
+                if data.get("ok"):
+                    ok_count += 1
+                    results[token] = {"status": "success", "result": data.get("result")}
+                else:
+                    fail_count += 1
+                    results[token] = {"status": "error", "error": data.get("error")}
+            result = {
+                "status": "success",
+                "summary": {
+                    "total": len(token_list),
+                    "ok": ok_count,
+                    "fail": fail_count,
+                },
+                "results": results,
+            }
+            task.finish(result)
+        except Exception as e:
+            task.fail_task(str(e))
+        finally:
+            import asyncio
+            asyncio.create_task(expire_task(task.id, 300))
+    import asyncio
+    asyncio.create_task(_run())
+    return {
+        "status": "success",
+        "task_id": task.id,
+        "total": len(token_list),
+    }
+@router.post("/cache/online/load/async", dependencies=[Depends(verify_app_key)])
+async def load_cache_async(data: dict):
+    """在线资产统计（异步批量 + SSE 进度）"""
+    from app.services.grok.utils.cache import CacheService
+    mgr = await get_token_manager()
+    accounts = []
+    for pool_name, pool in mgr.pools.items():
+        for info in pool.list():
+            raw_token = info.token[4:] if info.token.startswith("sso=") else info.token
+            masked = (
+                f"{raw_token[:8]}...{raw_token[-16:]}"
+                if len(raw_token) > 24
+                else raw_token
+            )
+            accounts.append(
+                {
+                    "token": raw_token,
+                    "token_masked": masked,
+                    "pool": pool_name,
+                    "status": info.status,
+                    "last_asset_clear_at": info.last_asset_clear_at,
+                }
+            )
+    account_map = {a["token"]: a for a in accounts}
+    tokens = data.get("tokens")
+    scope = data.get("scope")
+    selected_tokens: List[str] = []
+    if isinstance(tokens, list):
+        selected_tokens = [str(t).strip() for t in tokens if str(t).strip()]
+    if not selected_tokens and scope == "all":
+        selected_tokens = [account["token"] for account in accounts]
+        scope = "all"
+    elif selected_tokens:
+        scope = "selected"
+    else:
+        raise HTTPException(status_code=400, detail="No tokens provided")
+    task = create_task(len(selected_tokens))
+    async def _run():
+        try:
+            cache_service = CacheService()
+            image_stats = cache_service.get_stats("image")
+            video_stats = cache_service.get_stats("video")
+            async def _on_item(item: str, res: dict):
+                ok = bool(res.get("data", {}).get("ok"))
+                task.record(ok)
+            raw_results = await ListService.fetch_assets_details(
+                selected_tokens,
+                account_map,
+                include_ok=True,
+                on_item=_on_item,
+                should_cancel=lambda: task.cancelled,
+            )
+            if task.cancelled:
+                task.finish_cancelled()
+                return
+            online_details = []
+            total = 0
+            for token, res in raw_results.items():
+                data = res.get("data", {})
+                detail = data.get("detail")
+                if detail:
+                    online_details.append(detail)
+                total += data.get("count", 0)
+            online_stats = {
+                "count": total,
+                "status": "ok" if selected_tokens else "no_token",
+                "token": None,
+                "last_asset_clear_at": None,
+            }
+            result = {
+                "local_image": image_stats,
+                "local_video": video_stats,
+                "online": online_stats,
+                "online_accounts": accounts,
+                "online_scope": scope or "none",
+                "online_details": online_details,
+            }
+            task.finish(result)
+        except Exception as e:
+            task.fail_task(str(e))
+        finally:
+            import asyncio
+            asyncio.create_task(expire_task(task.id, 300))
+    import asyncio
+    asyncio.create_task(_run())
+    return {
+        "status": "success",
+        "task_id": task.id,
+        "total": len(selected_tokens),
+    }

app/api/v1/admin_api/config.py ADDED Viewed

	@@ -0,0 +1,53 @@

+import os
+from fastapi import APIRouter, Depends, HTTPException
+from app.core.auth import verify_app_key
+from app.core.config import config
+from app.core.storage import get_storage as resolve_storage, LocalStorage, RedisStorage, SQLStorage
+router = APIRouter()
+@router.get("/verify", dependencies=[Depends(verify_app_key)])
+async def admin_verify():
+    """验证后台访问密钥（app_key）"""
+    return {"status": "success"}
+@router.get("/config", dependencies=[Depends(verify_app_key)])
+async def get_config():
+    """获取当前配置"""
+    # 暴露原始配置字典
+    return config._config
+@router.post("/config", dependencies=[Depends(verify_app_key)])
+async def update_config(data: dict):
+    """更新配置"""
+    try:
+        await config.update(data)
+        return {"status": "success", "message": "配置已更新"}
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
+@router.get("/storage", dependencies=[Depends(verify_app_key)])
+async def get_storage_mode():
+    """获取当前存储模式"""
+    storage_type = os.getenv("SERVER_STORAGE_TYPE", "").lower()
+    if not storage_type:
+        storage = resolve_storage()
+        if isinstance(storage, LocalStorage):
+            storage_type = "local"
+        elif isinstance(storage, RedisStorage):
+            storage_type = "redis"
+        elif isinstance(storage, SQLStorage):
+            storage_type = {
+                "mysql": "mysql",
+                "mariadb": "mysql",
+                "postgres": "pgsql",
+                "postgresql": "pgsql",
+                "pgsql": "pgsql",
+            }.get(storage.dialect, storage.dialect)
+    return {"type": storage_type or "local"}

app/api/v1/admin_api/token.py ADDED Viewed

	@@ -0,0 +1,395 @@

+import asyncio
+import orjson
+from fastapi import APIRouter, Depends, HTTPException, Request
+from fastapi.responses import StreamingResponse
+from app.core.auth import get_app_key, verify_app_key
+from app.core.batch import create_task, expire_task, get_task
+from app.core.logger import logger
+from app.core.storage import get_storage
+from app.services.grok.batch_services.usage import UsageService
+from app.services.grok.batch_services.nsfw import NSFWService
+from app.services.token.manager import get_token_manager
+router = APIRouter()
+@router.get("/tokens", dependencies=[Depends(verify_app_key)])
+async def get_tokens():
+    """获取所有 Token"""
+    storage = get_storage()
+    tokens = await storage.load_tokens()
+    return tokens or {}
+@router.post("/tokens", dependencies=[Depends(verify_app_key)])
+async def update_tokens(data: dict):
+    """更新 Token 信息"""
+    storage = get_storage()
+    try:
+        from app.services.token.models import TokenInfo
+        async with storage.acquire_lock("tokens_save", timeout=10):
+            existing = await storage.load_tokens() or {}
+            normalized = {}
+            allowed_fields = set(TokenInfo.model_fields.keys())
+            existing_map = {}
+            for pool_name, tokens in existing.items():
+                if not isinstance(tokens, list):
+                    continue
+                pool_map = {}
+                for item in tokens:
+                    if isinstance(item, str):
+                        token_data = {"token": item}
+                    elif isinstance(item, dict):
+                        token_data = dict(item)
+                    else:
+                        continue
+                    raw_token = token_data.get("token")
+                    if isinstance(raw_token, str) and raw_token.startswith("sso="):
+                        token_data["token"] = raw_token[4:]
+                    token_key = token_data.get("token")
+                    if isinstance(token_key, str):
+                        pool_map[token_key] = token_data
+                existing_map[pool_name] = pool_map
+            for pool_name, tokens in (data or {}).items():
+                if not isinstance(tokens, list):
+                    continue
+                pool_list = []
+                for item in tokens:
+                    if isinstance(item, str):
+                        token_data = {"token": item}
+                    elif isinstance(item, dict):
+                        token_data = dict(item)
+                    else:
+                        continue
+                    raw_token = token_data.get("token")
+                    if isinstance(raw_token, str) and raw_token.startswith("sso="):
+                        token_data["token"] = raw_token[4:]
+                    base = existing_map.get(pool_name, {}).get(
+                        token_data.get("token"), {}
+                    )
+                    merged = dict(base)
+                    merged.update(token_data)
+                    if merged.get("tags") is None:
+                        merged["tags"] = []
+                    filtered = {k: v for k, v in merged.items() if k in allowed_fields}
+                    try:
+                        info = TokenInfo(**filtered)
+                        pool_list.append(info.model_dump())
+                    except Exception as e:
+                        logger.warning(f"Skip invalid token in pool '{pool_name}': {e}")
+                        continue
+                normalized[pool_name] = pool_list
+            await storage.save_tokens(normalized)
+            mgr = await get_token_manager()
+            await mgr.reload()
+        return {"status": "success", "message": "Token 已更新"}
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
+@router.post("/tokens/refresh", dependencies=[Depends(verify_app_key)])
+async def refresh_tokens(data: dict):
+    """刷新 Token 状态"""
+    try:
+        mgr = await get_token_manager()
+        tokens = []
+        if isinstance(data.get("token"), str) and data["token"].strip():
+            tokens.append(data["token"].strip())
+        if isinstance(data.get("tokens"), list):
+            tokens.extend([str(t).strip() for t in data["tokens"] if str(t).strip()])
+        if not tokens:
+            raise HTTPException(status_code=400, detail="No tokens provided")
+        unique_tokens = list(dict.fromkeys(tokens))
+        raw_results = await UsageService.batch(
+            unique_tokens,
+            mgr,
+        )
+        results = {}
+        for token, res in raw_results.items():
+            if res.get("ok"):
+                results[token] = res.get("data", False)
+            else:
+                results[token] = False
+        response = {"status": "success", "results": results}
+        return response
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
+@router.post("/tokens/refresh/async", dependencies=[Depends(verify_app_key)])
+async def refresh_tokens_async(data: dict):
+    """刷新 Token 状态（异步批量 + SSE 进度）"""
+    mgr = await get_token_manager()
+    tokens = []
+    if isinstance(data.get("token"), str) and data["token"].strip():
+        tokens.append(data["token"].strip())
+    if isinstance(data.get("tokens"), list):
+        tokens.extend([str(t).strip() for t in data["tokens"] if str(t).strip()])
+    if not tokens:
+        raise HTTPException(status_code=400, detail="No tokens provided")
+    unique_tokens = list(dict.fromkeys(tokens))
+    task = create_task(len(unique_tokens))
+    async def _run():
+        try:
+            async def _on_item(item: str, res: dict):
+                task.record(bool(res.get("ok")))
+            raw_results = await UsageService.batch(
+                unique_tokens,
+                mgr,
+                on_item=_on_item,
+                should_cancel=lambda: task.cancelled,
+            )
+            if task.cancelled:
+                task.finish_cancelled()
+                return
+            results: dict[str, bool] = {}
+            ok_count = 0
+            fail_count = 0
+            for token, res in raw_results.items():
+                if res.get("ok") and res.get("data") is True:
+                    ok_count += 1
+                    results[token] = True
+                else:
+                    fail_count += 1
+                    results[token] = False
+            await mgr._save(force=True)
+            result = {
+                "status": "success",
+                "summary": {
+                    "total": len(unique_tokens),
+                    "ok": ok_count,
+                    "fail": fail_count,
+                },
+                "results": results,
+            }
+            task.finish(result)
+        except Exception as e:
+            task.fail_task(str(e))
+        finally:
+            import asyncio
+            asyncio.create_task(expire_task(task.id, 300))
+    import asyncio
+    asyncio.create_task(_run())
+    return {
+        "status": "success",
+        "task_id": task.id,
+        "total": len(unique_tokens),
+    }
+@router.get("/batch/{task_id}/stream")
+async def batch_stream(task_id: str, request: Request):
+    app_key = get_app_key()
+    if app_key:
+        key = request.query_params.get("app_key")
+        if key != app_key:
+            raise HTTPException(status_code=401, detail="Invalid authentication token")
+    task = get_task(task_id)
+    if not task:
+        raise HTTPException(status_code=404, detail="Task not found")
+    async def event_stream():
+        queue = task.attach()
+        try:
+            yield f"data: {orjson.dumps({'type': 'snapshot', **task.snapshot()}).decode()}\n\n"
+            final = task.final_event()
+            if final:
+                yield f"data: {orjson.dumps(final).decode()}\n\n"
+                return
+            while True:
+                try:
+                    event = await asyncio.wait_for(queue.get(), timeout=15)
+                except asyncio.TimeoutError:
+                    yield ": ping\n\n"
+                    final = task.final_event()
+                    if final:
+                        yield f"data: {orjson.dumps(final).decode()}\n\n"
+                        return
+                    continue
+                yield f"data: {orjson.dumps(event).decode()}\n\n"
+                if event.get("type") in ("done", "error", "cancelled"):
+                    return
+        finally:
+            task.detach(queue)
+    return StreamingResponse(event_stream(), media_type="text/event-stream")
+@router.post("/batch/{task_id}/cancel", dependencies=[Depends(verify_app_key)])
+async def batch_cancel(task_id: str):
+    task = get_task(task_id)
+    if not task:
+        raise HTTPException(status_code=404, detail="Task not found")
+    task.cancel()
+    return {"status": "success"}
+@router.post("/tokens/nsfw/enable", dependencies=[Depends(verify_app_key)])
+async def enable_nsfw(data: dict):
+    """批量开启 NSFW (Unhinged) 模式"""
+    try:
+        mgr = await get_token_manager()
+        tokens = []
+        if isinstance(data.get("token"), str) and data["token"].strip():
+            tokens.append(data["token"].strip())
+        if isinstance(data.get("tokens"), list):
+            tokens.extend([str(t).strip() for t in data["tokens"] if str(t).strip()])
+        if not tokens:
+            for pool_name, pool in mgr.pools.items():
+                for info in pool.list():
+                    raw = (
+                        info.token[4:] if info.token.startswith("sso=") else info.token
+                    )
+                    tokens.append(raw)
+        if not tokens:
+            raise HTTPException(status_code=400, detail="No tokens available")
+        unique_tokens = list(dict.fromkeys(tokens))
+        raw_results = await NSFWService.batch(
+            unique_tokens,
+            mgr,
+        )
+        results = {}
+        ok_count = 0
+        fail_count = 0
+        for token, res in raw_results.items():
+            masked = f"{token[:8]}...{token[-8:]}" if len(token) > 20 else token
+            if res.get("ok") and res.get("data", {}).get("success"):
+                ok_count += 1
+                results[masked] = res.get("data", {})
+            else:
+                fail_count += 1
+                results[masked] = res.get("data") or {"error": res.get("error")}
+        response = {
+            "status": "success",
+            "summary": {
+                "total": len(unique_tokens),
+                "ok": ok_count,
+                "fail": fail_count,
+            },
+            "results": results,
+        }
+        return response
+    except HTTPException:
+        raise
+    except Exception as e:
+        logger.error(f"Enable NSFW failed: {e}")
+        raise HTTPException(status_code=500, detail=str(e))
+@router.post("/tokens/nsfw/enable/async", dependencies=[Depends(verify_app_key)])
+async def enable_nsfw_async(data: dict):
+    """批量开启 NSFW (Unhinged) 模式（异步批量 + SSE 进度）"""
+    mgr = await get_token_manager()
+    tokens = []
+    if isinstance(data.get("token"), str) and data["token"].strip():
+        tokens.append(data["token"].strip())
+    if isinstance(data.get("tokens"), list):
+        tokens.extend([str(t).strip() for t in data["tokens"] if str(t).strip()])
+    if not tokens:
+        for pool_name, pool in mgr.pools.items():
+            for info in pool.list():
+                raw = info.token[4:] if info.token.startswith("sso=") else info.token
+                tokens.append(raw)
+    if not tokens:
+        raise HTTPException(status_code=400, detail="No tokens available")
+    unique_tokens = list(dict.fromkeys(tokens))
+    task = create_task(len(unique_tokens))
+    async def _run():
+        try:
+            async def _on_item(item: str, res: dict):
+                ok = bool(res.get("ok") and res.get("data", {}).get("success"))
+                task.record(ok)
+            raw_results = await NSFWService.batch(
+                unique_tokens,
+                mgr,
+                on_item=_on_item,
+                should_cancel=lambda: task.cancelled,
+            )
+            if task.cancelled:
+                task.finish_cancelled()
+                return
+            results = {}
+            ok_count = 0
+            fail_count = 0
+            for token, res in raw_results.items():
+                masked = f"{token[:8]}...{token[-8:]}" if len(token) > 20 else token
+                if res.get("ok") and res.get("data", {}).get("success"):
+                    ok_count += 1
+                    results[masked] = res.get("data", {})
+                else:
+                    fail_count += 1
+                    results[masked] = res.get("data") or {"error": res.get("error")}
+            await mgr._save(force=True)
+            result = {
+                "status": "success",
+                "summary": {
+                    "total": len(unique_tokens),
+                    "ok": ok_count,
+                    "fail": fail_count,
+                },
+                "results": results,
+            }
+            task.finish(result)
+        except Exception as e:
+            task.fail_task(str(e))
+        finally:
+            import asyncio
+            asyncio.create_task(expire_task(task.id, 300))
+    import asyncio
+    asyncio.create_task(_run())
+    return {
+        "status": "success",
+        "task_id": task.id,
+        "total": len(unique_tokens),
+    }

app/api/v1/chat.py ADDED Viewed

	@@ -0,0 +1,862 @@

+"""
+Chat Completions API 路由
+"""
+from typing import Any, AsyncGenerator, AsyncIterable, Dict, List, Optional, Union
+import base64
+import binascii
+import time
+import uuid
+from fastapi import APIRouter
+from fastapi.responses import StreamingResponse, JSONResponse
+from pydantic import BaseModel, Field
+import orjson
+from app.services.grok.services.chat import ChatService
+from app.services.grok.services.image import ImageGenerationService
+from app.services.grok.services.image_edit import ImageEditService
+from app.services.grok.services.model import ModelService
+from app.services.grok.services.video import VideoService
+from app.services.grok.utils.response import make_chat_response
+from app.services.token import get_token_manager
+from app.core.config import get_config
+from app.core.exceptions import ValidationException, AppException, ErrorType
+class MessageItem(BaseModel):
+    """消息项"""
+    role: str
+    content: Optional[Union[str, Dict[str, Any], List[Dict[str, Any]]]]
+    tool_calls: Optional[List[Dict[str, Any]]] = None
+    tool_call_id: Optional[str] = None
+    name: Optional[str] = None
+class VideoConfig(BaseModel):
+    """视频生成配置"""
+    aspect_ratio: Optional[str] = Field("3:2", description="视频比例: 1280x720(16:9), 720x1280(9:16), 1792x1024(3:2), 1024x1792(2:3), 1024x1024(1:1)")
+    video_length: Optional[int] = Field(6, description="视频时长(秒): 6 / 10 / 15")
+    resolution_name: Optional[str] = Field("480p", description="视频分辨率: 480p, 720p")
+    preset: Optional[str] = Field("custom", description="风格预设: fun, normal, spicy")
+class ImageConfig(BaseModel):
+    """图片生成配置"""
+    n: Optional[int] = Field(1, ge=1, le=10, description="生成数量 (1-10)")
+    size: Optional[str] = Field("1024x1024", description="图片尺寸")
+    response_format: Optional[str] = Field(None, description="响应格式")
+class ChatCompletionRequest(BaseModel):
+    """Chat Completions 请求"""
+    model: str = Field(..., description="模型名称")
+    messages: List[MessageItem] = Field(..., description="消息数组")
+    stream: Optional[bool] = Field(None, description="是否流式输出")
+    reasoning_effort: Optional[str] = Field(None, description="推理强度: none/minimal/low/medium/high/xhigh")
+    temperature: Optional[float] = Field(0.8, description="采样温度: 0-2")
+    top_p: Optional[float] = Field(0.95, description="nucleus 采样: 0-1")
+    # 视频生成配置
+    video_config: Optional[VideoConfig] = Field(None, description="视频生成参数")
+    # 图片生成配置
+    image_config: Optional[ImageConfig] = Field(None, description="图片生成参数")
+    # Tool calling
+    tools: Optional[List[Dict[str, Any]]] = Field(None, description="Tool definitions")
+    tool_choice: Optional[Union[str, Dict[str, Any]]] = Field(None, description="Tool choice: auto/required/none/specific")
+    parallel_tool_calls: Optional[bool] = Field(True, description="Allow parallel tool calls")
+VALID_ROLES = {"developer", "system", "user", "assistant", "tool"}
+USER_CONTENT_TYPES = {"text", "image_url", "input_audio", "file"}
+ALLOWED_IMAGE_SIZES = {
+    "1280x720",
+    "720x1280",
+    "1792x1024",
+    "1024x1792",
+    "1024x1024",
+}
+IMAGINE_FAST_MODEL_ID = "grok-imagine-1.0-fast"
+def _validate_media_input(value: str, field_name: str, param: str):
+    """Verify media input is a valid URL or data URI"""
+    if not isinstance(value, str) or not value.strip():
+        raise ValidationException(
+            message=f"{field_name} cannot be empty",
+            param=param,
+            code="empty_media",
+        )
+    value = value.strip()
+    if value.startswith("data:"):
+        return
+    if value.startswith("http://") or value.startswith("https://"):
+        return
+    candidate = "".join(value.split())
+    if len(candidate) >= 32 and len(candidate) % 4 == 0:
+        try:
+            base64.b64decode(candidate, validate=True)
+            raise ValidationException(
+                message=f"{field_name} base64 must be provided as a data URI (data:<mime>;base64,...)",
+                param=param,
+                code="invalid_media",
+            )
+        except binascii.Error:
+            pass
+    raise ValidationException(
+        message=f"{field_name} must be a URL or data URI",
+        param=param,
+        code="invalid_media",
+    )
+def _extract_prompt_images(messages: List[MessageItem]) -> tuple[str, List[str]]:
+    """Extract prompt text and image URLs from messages"""
+    last_text = ""
+    image_urls: List[str] = []
+    for msg in messages:
+        role = msg.role or "user"
+        content = msg.content
+        if isinstance(content, str):
+            text = content.strip()
+            if text:
+                last_text = text
+            continue
+        if isinstance(content, dict):
+            content = [content]
+        if not isinstance(content, list):
+            continue
+        for block in content:
+            if not isinstance(block, dict):
+                continue
+            block_type = block.get("type")
+            if block_type == "text":
+                text = block.get("text", "")
+                if isinstance(text, str) and text.strip():
+                    last_text = text.strip()
+            elif block_type == "image_url" and role == "user":
+                image = block.get("image_url") or {}
+                url = image.get("url", "")
+                if isinstance(url, str) and url.strip():
+                    image_urls.append(url.strip())
+    return last_text, image_urls
+def _resolve_image_format(value: Optional[str]) -> str:
+    fmt = value or get_config("app.image_format") or "url"
+    if isinstance(fmt, str):
+        fmt = fmt.lower()
+    if fmt == "base64":
+        return "b64_json"
+    if fmt in ("b64_json", "url"):
+        return fmt
+    raise ValidationException(
+        message="image_format must be one of url, base64, b64_json",
+        param="image_format",
+        code="invalid_image_format",
+    )
+def _image_field(response_format: str) -> str:
+    if response_format == "url":
+        return "url"
+    return "b64_json"
+def _imagine_fast_server_image_config() -> ImageConfig:
+    """Load server-side image generation parameters for grok-imagine-1.0-fast."""
+    n = int(get_config("imagine_fast.n", 1) or 1)
+    size = str(get_config("imagine_fast.size", "1024x1024") or "1024x1024")
+    response_format = str(
+        get_config("imagine_fast.response_format", get_config("app.image_format") or "url")
+        or "url"
+    )
+    return ImageConfig(n=n, size=size, response_format=response_format)
+async def _safe_sse_stream(stream: AsyncIterable[str]) -> AsyncGenerator[str, None]:
+    """Ensure streaming endpoints return SSE error payloads instead of transport-level 5xx breaks."""
+    try:
+        async for chunk in stream:
+            yield chunk
+    except AppException as e:
+        payload = {
+            "error": {
+                "message": e.message,
+                "type": e.error_type,
+                "code": e.code,
+            }
+        }
+        yield f"event: error\ndata: {orjson.dumps(payload).decode()}\n\n"
+        yield "data: [DONE]\n\n"
+    except Exception as e:
+        payload = {
+            "error": {
+                "message": str(e) or "stream_error",
+                "type": "server_error",
+                "code": "stream_error",
+            }
+        }
+        yield f"event: error\ndata: {orjson.dumps(payload).decode()}\n\n"
+        yield "data: [DONE]\n\n"
+def _streaming_error_response(exc: Exception) -> StreamingResponse:
+    if isinstance(exc, AppException):
+        payload = {
+            "error": {
+                "message": exc.message,
+                "type": exc.error_type,
+                "code": exc.code,
+            }
+        }
+    else:
+        payload = {
+            "error": {
+                "message": str(exc) or "stream_error",
+                "type": "server_error",
+                "code": "stream_error",
+            }
+        }
+    async def _one_shot_error() -> AsyncGenerator[str, None]:
+        yield f"event: error\ndata: {orjson.dumps(payload).decode()}\n\n"
+        yield "data: [DONE]\n\n"
+    return StreamingResponse(
+        _one_shot_error(),
+        media_type="text/event-stream",
+        headers={"Cache-Control": "no-cache", "Connection": "keep-alive"},
+    )
+def _validate_image_config(image_conf: ImageConfig, *, stream: bool):
+    n = image_conf.n or 1
+    if n < 1 or n > 10:
+        raise ValidationException(
+            message="n must be between 1 and 10",
+            param="image_config.n",
+            code="invalid_n",
+        )
+    if stream and n not in (1, 2):
+        raise ValidationException(
+            message="Streaming is only supported when n=1 or n=2",
+            param="image_config.n",
+            code="invalid_stream_n",
+        )
+    if image_conf.response_format:
+        allowed_formats = {"b64_json", "base64", "url"}
+        if image_conf.response_format not in allowed_formats:
+            raise ValidationException(
+                message="response_format must be one of b64_json, base64, url",
+                param="image_config.response_format",
+                code="invalid_response_format",
+            )
+    if image_conf.size and image_conf.size not in ALLOWED_IMAGE_SIZES:
+        raise ValidationException(
+            message=f"size must be one of {sorted(ALLOWED_IMAGE_SIZES)}",
+            param="image_config.size",
+            code="invalid_size",
+        )
+def validate_request(request: ChatCompletionRequest):
+    """验证请求参数"""
+    # 验证模型
+    if not ModelService.valid(request.model):
+        raise ValidationException(
+            message=f"The model `{request.model}` does not exist or you do not have access to it.",
+            param="model",
+            code="model_not_found",
+        )
+    # 验证消息
+    for idx, msg in enumerate(request.messages):
+        if not isinstance(msg.role, str) or msg.role not in VALID_ROLES:
+            raise ValidationException(
+                message=f"role must be one of {sorted(VALID_ROLES)}",
+                param=f"messages.{idx}.role",
+                code="invalid_role",
+            )
+        # tool role: requires tool_call_id, content can be None/empty
+        if msg.role == "tool":
+            if not msg.tool_call_id:
+                raise ValidationException(
+                    message="tool messages must have a 'tool_call_id' field",
+                    param=f"messages.{idx}.tool_call_id",
+                    code="missing_tool_call_id",
+                )
+            continue
+        # assistant with tool_calls: content can be None
+        if msg.role == "assistant" and msg.tool_calls:
+            continue
+        content = msg.content
+        # 兼容部分客户端会发送 assistant/tool 空内容（例如工具调用中间态）
+        if content is None:
+            if msg.role in {"assistant", "tool"}:
+                continue
+            raise ValidationException(
+                message="Message content cannot be null",
+                param=f"messages.{idx}.content",
+                code="empty_content",
+            )
+        # 字符串内容
+        if isinstance(content, str):
+            if not content.strip():
+                raise ValidationException(
+                    message="Message content cannot be empty",
+                    param=f"messages.{idx}.content",
+                    code="empty_content",
+                )
+        # 列表内容
+        elif isinstance(content, dict):
+            content = [content]
+            for c_idx, item in enumerate(content):
+                if not isinstance(item, dict):
+                    raise ValidationException(
+                        message="Message content items must be objects",
+                        param=f"messages.{idx}.content.{c_idx}",
+                        code="invalid_content_item",
+                    )
+                item_type = item.get("type")
+                if item_type != "text":
+                    raise ValidationException(
+                        message="When content is an object, type must be 'text'",
+                        param=f"messages.{idx}.content.{c_idx}.type",
+                        code="invalid_content_type",
+                    )
+                text = item.get("text", "")
+                if not isinstance(text, str) or not text.strip():
+                    raise ValidationException(
+                        message="messages.%d.content.%d.text must be a non-empty string"
+                        % (idx, c_idx),
+                        param=f"messages.{idx}.content.{c_idx}.text",
+                        code="empty_content",
+                    )
+        elif isinstance(content, list):
+            if not content:
+                raise ValidationException(
+                    message="Message content cannot be an empty array",
+                    param=f"messages.{idx}.content",
+                    code="empty_content",
+                )
+            for block_idx, block in enumerate(content):
+                # 检查空对象
+                if not isinstance(block, dict):
+                    raise ValidationException(
+                        message="Content block must be an object",
+                        param=f"messages.{idx}.content.{block_idx}",
+                        code="invalid_block",
+                    )
+                if not block:
+                    raise ValidationException(
+                        message="Content block cannot be empty",
+                        param=f"messages.{idx}.content.{block_idx}",
+                        code="empty_block",
+                    )
+                # 检查 type 字段
+                if "type" not in block:
+                    raise ValidationException(
+                        message="Content block must have a 'type' field",
+                        param=f"messages.{idx}.content.{block_idx}",
+                        code="missing_type",
+                    )
+                block_type = block.get("type")
+                # 检查 type 空值
+                if (
+                    not block_type
+                    or not isinstance(block_type, str)
+                    or not block_type.strip()
+                ):
+                    raise ValidationException(
+                        message="Content block 'type' cannot be empty",
+                        param=f"messages.{idx}.content.{block_idx}.type",
+                        code="empty_type",
+                    )
+                # 验证 type 有效性
+                if msg.role == "user":
+                    if block_type not in USER_CONTENT_TYPES:
+                        raise ValidationException(
+                            message=f"Invalid content block type: '{block_type}'",
+                            param=f"messages.{idx}.content.{block_idx}.type",
+                            code="invalid_type",
+                        )
+                else:
+                    if block_type != "text":
+                        raise ValidationException(
+                            message=f"The `{msg.role}` role only supports 'text' type, got '{block_type}'",
+                            param=f"messages.{idx}.content.{block_idx}.type",
+                            code="invalid_type",
+                        )
+                # 验证字段是否存在 & 非空
+                if block_type == "text":
+                    text = block.get("text", "")
+                    if not isinstance(text, str) or not text.strip():
+                        raise ValidationException(
+                            message="Text content cannot be empty",
+                            param=f"messages.{idx}.content.{block_idx}.text",
+                            code="empty_text",
+                        )
+                elif block_type == "image_url":
+                    image_url = block.get("image_url")
+                    if not image_url or not isinstance(image_url, dict):
+                        raise ValidationException(
+                            message="image_url must have a 'url' field",
+                            param=f"messages.{idx}.content.{block_idx}.image_url",
+                            code="missing_url",
+                        )
+                    _validate_media_input(
+                        image_url.get("url", ""),
+                        "image_url.url",
+                        f"messages.{idx}.content.{block_idx}.image_url.url",
+                    )
+                elif block_type == "input_audio":
+                    audio = block.get("input_audio")
+                    if not audio or not isinstance(audio, dict):
+                        raise ValidationException(
+                            message="input_audio must have a 'data' field",
+                            param=f"messages.{idx}.content.{block_idx}.input_audio",
+                            code="missing_audio",
+                        )
+                    _validate_media_input(
+                        audio.get("data", ""),
+                        "input_audio.data",
+                        f"messages.{idx}.content.{block_idx}.input_audio.data",
+                    )
+                elif block_type == "file":
+                    file_data = block.get("file")
+                    if not file_data or not isinstance(file_data, dict):
+                        raise ValidationException(
+                            message="file must have a 'file_data' field",
+                            param=f"messages.{idx}.content.{block_idx}.file",
+                            code="missing_file",
+                        )
+                    _validate_media_input(
+                        file_data.get("file_data", ""),
+                        "file.file_data",
+                        f"messages.{idx}.content.{block_idx}.file.file_data",
+                    )
+        elif content is None:
+            raise ValidationException(
+                message="Message content cannot be empty",
+                param=f"messages.{idx}.content",
+                code="empty_content",
+            )
+        else:
+            raise ValidationException(
+                message="Message content must be a string or array",
+                param=f"messages.{idx}.content",
+                code="invalid_content",
+            )
+    # 默认验证
+    if request.stream is not None:
+        if isinstance(request.stream, bool):
+            pass
+        elif isinstance(request.stream, str):
+            if request.stream.lower() in ("true", "1", "yes"):
+                request.stream = True
+            elif request.stream.lower() in ("false", "0", "no"):
+                request.stream = False
+            else:
+                raise ValidationException(
+                    message="stream must be a boolean",
+                    param="stream",
+                    code="invalid_stream",
+                )
+        else:
+            raise ValidationException(
+                message="stream must be a boolean",
+                param="stream",
+                code="invalid_stream",
+            )
+    allowed_efforts = {"none", "minimal", "low", "medium", "high", "xhigh"}
+    if request.reasoning_effort is not None:
+        if not isinstance(request.reasoning_effort, str) or (
+            request.reasoning_effort not in allowed_efforts
+        ):
+            raise ValidationException(
+                message=f"reasoning_effort must be one of {sorted(allowed_efforts)}",
+                param="reasoning_effort",
+                code="invalid_reasoning_effort",
+            )
+    if request.temperature is None:
+        request.temperature = 0.8
+    else:
+        try:
+            request.temperature = float(request.temperature)
+        except Exception:
+            raise ValidationException(
+                message="temperature must be a float",
+                param="temperature",
+                code="invalid_temperature",
+            )
+        if not (0 <= request.temperature <= 2):
+            raise ValidationException(
+                message="temperature must be between 0 and 2",
+                param="temperature",
+                code="invalid_temperature",
+            )
+    if request.top_p is None:
+        request.top_p = 0.95
+    else:
+        try:
+            request.top_p = float(request.top_p)
+        except Exception:
+            raise ValidationException(
+                message="top_p must be a float",
+                param="top_p",
+                code="invalid_top_p",
+            )
+        if not (0 <= request.top_p <= 1):
+            raise ValidationException(
+                message="top_p must be between 0 and 1",
+                param="top_p",
+                code="invalid_top_p",
+            )
+    # 验证 tools
+    if request.tools is not None:
+        if not isinstance(request.tools, list):
+            raise ValidationException(
+                message="tools must be an array",
+                param="tools",
+                code="invalid_tools",
+            )
+        for t_idx, tool in enumerate(request.tools):
+            if not isinstance(tool, dict) or tool.get("type") != "function":
+                raise ValidationException(
+                    message="Each tool must have type='function'",
+                    param=f"tools.{t_idx}.type",
+                    code="invalid_tool_type",
+                )
+            func = tool.get("function")
+            if not isinstance(func, dict) or not func.get("name"):
+                raise ValidationException(
+                    message="Each tool function must have a 'name'",
+                    param=f"tools.{t_idx}.function.name",
+                    code="missing_function_name",
+                )
+    # 验证 tool_choice
+    if request.tool_choice is not None:
+        if isinstance(request.tool_choice, str):
+            if request.tool_choice not in ("auto", "required", "none"):
+                raise ValidationException(
+                    message="tool_choice must be 'auto', 'required', 'none', or a specific function object",
+                    param="tool_choice",
+                    code="invalid_tool_choice",
+                )
+        elif isinstance(request.tool_choice, dict):
+            if request.tool_choice.get("type") != "function" or not request.tool_choice.get("function", {}).get("name"):
+                raise ValidationException(
+                    message="tool_choice object must have type='function' and function.name",
+                    param="tool_choice",
+                    code="invalid_tool_choice",
+                )
+    model_info = ModelService.get(request.model)
+    # image 验证
+    if model_info and (model_info.is_image or model_info.is_image_edit):
+        prompt, image_urls = _extract_prompt_images(request.messages)
+        if not prompt:
+            raise ValidationException(
+                message="Prompt cannot be empty",
+                param="messages",
+                code="empty_prompt",
+            )
+        image_conf = _imagine_fast_server_image_config() if request.model == IMAGINE_FAST_MODEL_ID else (request.image_config or ImageConfig())
+        n = image_conf.n or 1
+        if not (1 <= n <= 10):
+            raise ValidationException(
+                message="n must be between 1 and 10",
+                param="image_config.n",
+                code="invalid_n",
+            )
+        if request.stream and n not in (1, 2):
+            raise ValidationException(
+                message="Streaming is only supported when n=1 or n=2",
+                param="stream",
+                code="invalid_stream_n",
+            )
+        response_format = _resolve_image_format(image_conf.response_format)
+        image_conf.n = n
+        image_conf.response_format = response_format
+        if not image_conf.size:
+            image_conf.size = "1024x1024"
+        allowed_sizes = {
+            "1280x720",
+            "720x1280",
+            "1792x1024",
+            "1024x1792",
+            "1024x1024",
+        }
+        if image_conf.size not in allowed_sizes:
+            raise ValidationException(
+                message=f"size must be one of {sorted(allowed_sizes)}",
+                param="image_config.size",
+                code="invalid_size",
+            )
+        request.image_config = image_conf
+    # image edit 验证
+    if model_info and model_info.is_image_edit:
+        _, image_urls = _extract_prompt_images(request.messages)
+        if not image_urls:
+            raise ValidationException(
+                message="image_url is required for image edits",
+                param="messages",
+                code="missing_image",
+            )
+    # video 验证
+    if model_info and model_info.is_video:
+        config = request.video_config or VideoConfig()
+        ratio_map = {
+            "1280x720": "16:9",
+            "720x1280": "9:16",
+            "1792x1024": "3:2",
+            "1024x1792": "2:3",
+            "1024x1024": "1:1",
+            "16:9": "16:9",
+            "9:16": "9:16",
+            "3:2": "3:2",
+            "2:3": "2:3",
+            "1:1": "1:1",
+        }
+        if config.aspect_ratio is None:
+            config.aspect_ratio = "3:2"
+        if config.aspect_ratio not in ratio_map:
+            raise ValidationException(
+                message=f"aspect_ratio must be one of {list(ratio_map.keys())}",
+                param="video_config.aspect_ratio",
+                code="invalid_aspect_ratio",
+            )
+        config.aspect_ratio = ratio_map[config.aspect_ratio]
+        if config.video_length not in (6, 10, 15):
+            raise ValidationException(
+                message="video_length must be 6, 10, or 15 seconds",
+                param="video_config.video_length",
+                code="invalid_video_length",
+            )
+        if config.resolution_name not in ("480p", "720p"):
+            raise ValidationException(
+                message="resolution_name must be one of ['480p', '720p']",
+                param="video_config.resolution_name",
+                code="invalid_resolution",
+            )
+        if config.preset not in ("fun", "normal", "spicy", "custom"):
+            raise ValidationException(
+                message="preset must be one of ['fun', 'normal', 'spicy', 'custom']",
+                param="video_config.preset",
+                code="invalid_preset",
+            )
+        request.video_config = config
+router = APIRouter(tags=["Chat"])
+@router.post("/chat/completions")
+async def chat_completions(request: ChatCompletionRequest):
+    """Chat Completions API - 兼容 OpenAI"""
+    from app.core.logger import logger
+    # 参数验证
+    validate_request(request)
+    logger.debug(f"Chat request: model={request.model}, stream={request.stream}")
+    # 检测模型类型
+    model_info = ModelService.get(request.model)
+    if model_info and model_info.is_image_edit:
+        prompt, image_urls = _extract_prompt_images(request.messages)
+        if not image_urls:
+            raise ValidationException(
+                message="Image is required",
+                param="image",
+                code="missing_image",
+            )
+        is_stream = (
+            request.stream if request.stream is not None else get_config("app.stream")
+        )
+        image_conf = request.image_config or ImageConfig()
+        _validate_image_config(image_conf, stream=bool(is_stream))
+        response_format = _resolve_image_format(image_conf.response_format)
+        response_field = _image_field(response_format)
+        n = image_conf.n or 1
+        token_mgr = await get_token_manager()
+        await token_mgr.reload_if_stale()
+        token = None
+        for pool_name in ModelService.pool_candidates_for_model(request.model):
+            token = token_mgr.get_token(pool_name)
+            if token:
+                break
+        if not token:
+            raise AppException(
+                message="No available tokens. Please try again later.",
+                error_type=ErrorType.RATE_LIMIT.value,
+                code="rate_limit_exceeded",
+                status_code=429,
+            )
+        result = await ImageEditService().edit(
+            token_mgr=token_mgr,
+            token=token,
+            model_info=model_info,
+            prompt=prompt,
+            images=image_urls,
+            n=n,
+            response_format=response_format,
+            stream=bool(is_stream),
+            chat_format=True,
+        )
+        if result.stream:
+            return StreamingResponse(
+                _safe_sse_stream(result.data),
+                media_type="text/event-stream",
+                headers={"Cache-Control": "no-cache", "Connection": "keep-alive"},
+            )
+        content = result.data[0] if result.data else ""
+        return JSONResponse(
+            content=make_chat_response(request.model, content)
+        )
+    if model_info and model_info.is_image:
+        prompt, _ = _extract_prompt_images(request.messages)
+        is_stream = (
+            request.stream if request.stream is not None else get_config("app.stream")
+        )
+        image_conf = _imagine_fast_server_image_config() if request.model == IMAGINE_FAST_MODEL_ID else (request.image_config or ImageConfig())
+        _validate_image_config(image_conf, stream=bool(is_stream))
+        response_format = _resolve_image_format(image_conf.response_format)
+        response_field = _image_field(response_format)
+        n = image_conf.n or 1
+        size = image_conf.size or "1024x1024"
+        aspect_ratio_map = {
+            "1280x720": "16:9",
+            "720x1280": "9:16",
+            "1792x1024": "3:2",
+            "1024x1792": "2:3",
+            "1024x1024": "1:1",
+        }
+        aspect_ratio = aspect_ratio_map.get(size, "2:3")
+        token_mgr = await get_token_manager()
+        await token_mgr.reload_if_stale()
+        token = None
+        for pool_name in ModelService.pool_candidates_for_model(request.model):
+            token = token_mgr.get_token(pool_name)
+            if token:
+                break
+        if not token:
+            raise AppException(
+                message="No available tokens. Please try again later.",
+                error_type=ErrorType.RATE_LIMIT.value,
+                code="rate_limit_exceeded",
+                status_code=429,
+            )
+        result = await ImageGenerationService().generate(
+            token_mgr=token_mgr,
+            token=token,
+            model_info=model_info,
+            prompt=prompt,
+            n=n,
+            response_format=response_format,
+            size=size,
+            aspect_ratio=aspect_ratio,
+            stream=bool(is_stream),
+            chat_format=True,
+        )
+        if result.stream:
+            return StreamingResponse(
+                _safe_sse_stream(result.data),
+                media_type="text/event-stream",
+                headers={"Cache-Control": "no-cache", "Connection": "keep-alive"},
+            )
+        content = result.data[0] if result.data else ""
+        usage = result.usage_override
+        return JSONResponse(
+            content=make_chat_response(request.model, content, usage=usage)
+        )
+    if model_info and model_info.is_video:
+        # 提取视频配置 (默认值在 Pydantic 模型中处理)
+        v_conf = request.video_config or VideoConfig()
+        try:
+            result = await VideoService.completions(
+                model=request.model,
+                messages=[msg.model_dump() for msg in request.messages],
+                stream=request.stream,
+                reasoning_effort=request.reasoning_effort,
+                aspect_ratio=v_conf.aspect_ratio,
+                video_length=v_conf.video_length,
+                resolution=v_conf.resolution_name,
+                preset=v_conf.preset,
+            )
+        except Exception as e:
+            if request.stream is not False:
+                return _streaming_error_response(e)
+            raise
+    else:
+        try:
+            result = await ChatService.completions(
+                model=request.model,
+                messages=[msg.model_dump() for msg in request.messages],
+                stream=request.stream,
+                reasoning_effort=request.reasoning_effort,
+                temperature=request.temperature,
+                top_p=request.top_p,
+                tools=request.tools,
+                tool_choice=request.tool_choice,
+                parallel_tool_calls=request.parallel_tool_calls,
+            )
+        except Exception as e:
+            if request.stream is not False:
+                return _streaming_error_response(e)
+            raise
+    if isinstance(result, dict):
+        return JSONResponse(content=result)
+    else:
+        return StreamingResponse(
+            _safe_sse_stream(result),
+            media_type="text/event-stream",
+            headers={"Cache-Control": "no-cache", "Connection": "keep-alive"},
+        )
+__all__ = ["router"]

app/api/v1/files.py ADDED Viewed

	@@ -0,0 +1,69 @@

+"""
+文件服务 API 路由
+"""
+import aiofiles.os
+from pathlib import Path
+from fastapi import APIRouter, HTTPException
+from fastapi.responses import FileResponse
+from app.core.logger import logger
+from app.core.storage import DATA_DIR
+router = APIRouter(tags=["Files"])
+# 缓存根目录
+BASE_DIR = DATA_DIR / "tmp"
+IMAGE_DIR = BASE_DIR / "image"
+VIDEO_DIR = BASE_DIR / "video"
+@router.get("/image/{filename:path}")
+async def get_image(filename: str):
+    """
+    获取图片文件
+    """
+    if "/" in filename:
+        filename = filename.replace("/", "-")
+    file_path = IMAGE_DIR / filename
+    if await aiofiles.os.path.exists(file_path):
+        if await aiofiles.os.path.isfile(file_path):
+            content_type = "image/jpeg"
+            if file_path.suffix.lower() == ".png":
+                content_type = "image/png"
+            elif file_path.suffix.lower() == ".webp":
+                content_type = "image/webp"
+            # 增加缓存头，支持高并发场景下的浏览器/CDN缓存
+            return FileResponse(
+                file_path,
+                media_type=content_type,
+                headers={"Cache-Control": "public, max-age=31536000, immutable"},
+            )
+    logger.warning(f"Image not found: {filename}")
+    raise HTTPException(status_code=404, detail="Image not found")
+@router.get("/video/{filename:path}")
+async def get_video(filename: str):
+    """
+    获取视频文件
+    """
+    if "/" in filename:
+        filename = filename.replace("/", "-")
+    file_path = VIDEO_DIR / filename
+    if await aiofiles.os.path.exists(file_path):
+        if await aiofiles.os.path.isfile(file_path):
+            return FileResponse(
+                file_path,
+                media_type="video/mp4",
+                headers={"Cache-Control": "public, max-age=31536000, immutable"},
+            )
+    logger.warning(f"Video not found: {filename}")
+    raise HTTPException(status_code=404, detail="Video not found")

app/api/v1/image.py ADDED Viewed

	@@ -0,0 +1,452 @@

+"""
+Image Generation API 路由
+"""
+import base64
+import time
+from pathlib import Path
+from typing import List, Optional, Union
+from fastapi import APIRouter, File, Form, UploadFile
+from fastapi.responses import StreamingResponse, JSONResponse
+from pydantic import BaseModel, Field, ValidationError
+from app.services.grok.services.image import ImageGenerationService
+from app.services.grok.services.image_edit import ImageEditService
+from app.services.grok.services.model import ModelService
+from app.services.token import get_token_manager
+from app.core.exceptions import ValidationException, AppException, ErrorType
+from app.core.config import get_config
+router = APIRouter(tags=["Images"])
+ALLOWED_IMAGE_SIZES = {
+    "1280x720",
+    "720x1280",
+    "1792x1024",
+    "1024x1792",
+    "1024x1024",
+}
+SIZE_TO_ASPECT = {
+    "1280x720": "16:9",
+    "720x1280": "9:16",
+    "1792x1024": "3:2",
+    "1024x1792": "2:3",
+    "1024x1024": "1:1",
+}
+ALLOWED_ASPECT_RATIOS = {"1:1", "2:3", "3:2", "9:16", "16:9"}
+class ImageGenerationRequest(BaseModel):
+    """图片生成请求 - OpenAI 兼容"""
+    prompt: str = Field(..., description="图片描述")
+    model: Optional[str] = Field("grok-imagine-1.0", description="模型名称")
+    n: Optional[int] = Field(1, ge=1, le=10, description="生成数量 (1-10)")
+    size: Optional[str] = Field(
+        "1024x1024",
+        description="图片尺寸: 1280x720, 720x1280, 1792x1024, 1024x1792, 1024x1024",
+    )
+    quality: Optional[str] = Field("standard", description="图片质量 (暂不支持)")
+    response_format: Optional[str] = Field(None, description="响应格式")
+    style: Optional[str] = Field(None, description="风格 (暂不支持)")
+    stream: Optional[bool] = Field(False, description="是否流式输出")
+class ImageEditRequest(BaseModel):
+    """图片编辑请求 - OpenAI 兼容"""
+    prompt: str = Field(..., description="编辑描述")
+    model: Optional[str] = Field("grok-imagine-1.0-edit", description="模型名称")
+    image: Optional[Union[str, List[str]]] = Field(None, description="待编辑图片文件")
+    n: Optional[int] = Field(1, ge=1, le=10, description="生成数量 (1-10)")
+    size: Optional[str] = Field(
+        "1024x1024",
+        description="图片尺寸: 1280x720, 720x1280, 1792x1024, 1024x1792, 1024x1024",
+    )
+    quality: Optional[str] = Field("standard", description="图片质量 (暂不支持)")
+    response_format: Optional[str] = Field(None, description="响应格式")
+    style: Optional[str] = Field(None, description="风格 (暂不支持)")
+    stream: Optional[bool] = Field(False, description="是否流式输出")
+def _validate_common_request(
+    request: Union[ImageGenerationRequest, ImageEditRequest],
+    *,
+    allow_ws_stream: bool = False,
+):
+    """通用参数校验"""
+    # 验证 prompt
+    if not request.prompt or not request.prompt.strip():
+        raise ValidationException(
+            message="Prompt cannot be empty", param="prompt", code="empty_prompt"
+        )
+    # 验证 n 参数范围
+    if request.n < 1 or request.n > 10:
+        raise ValidationException(
+            message="n must be between 1 and 10", param="n", code="invalid_n"
+        )
+    # 流式只支持 n=1 或 n=2
+    if request.stream and request.n not in [1, 2]:
+        raise ValidationException(
+            message="Streaming is only supported when n=1 or n=2",
+            param="stream",
+            code="invalid_stream_n",
+        )
+    if allow_ws_stream:
+        if request.stream and request.response_format:
+            allowed_stream_formats = {"b64_json", "base64", "url"}
+            if request.response_format not in allowed_stream_formats:
+                raise ValidationException(
+                    message="Streaming only supports response_format=b64_json/base64/url",
+                    param="response_format",
+                    code="invalid_response_format",
+                )
+    if request.response_format:
+        allowed_formats = {"b64_json", "base64", "url"}
+        if request.response_format not in allowed_formats:
+            raise ValidationException(
+                message=f"response_format must be one of {sorted(allowed_formats)}",
+                param="response_format",
+                code="invalid_response_format",
+            )
+    if request.size and request.size not in ALLOWED_IMAGE_SIZES:
+        raise ValidationException(
+            message=f"size must be one of {sorted(ALLOWED_IMAGE_SIZES)}",
+            param="size",
+            code="invalid_size",
+        )
+def validate_generation_request(request: ImageGenerationRequest):
+    """验证图片生成请求参数"""
+    if request.model != "grok-imagine-1.0":
+        raise ValidationException(
+            message="The model `grok-imagine-1.0` is required for image generation.",
+            param="model",
+            code="model_not_supported",
+        )
+    # 验证模型 - 通过 is_image 检查
+    model_info = ModelService.get(request.model)
+    if not model_info or not model_info.is_image:
+        # 获取支持的图片模型列表
+        image_models = [m.model_id for m in ModelService.MODELS if m.is_image]
+        raise ValidationException(
+            message=(
+                f"The model `{request.model}` is not supported for image generation. "
+                f"Supported: {image_models}"
+            ),
+            param="model",
+            code="model_not_supported",
+        )
+    _validate_common_request(request, allow_ws_stream=True)
+def resolve_response_format(response_format: Optional[str]) -> str:
+    """解析响应格式"""
+    fmt = response_format or get_config("app.image_format")
+    if isinstance(fmt, str):
+        fmt = fmt.lower()
+    if fmt in ("b64_json", "base64", "url"):
+        return fmt
+    raise ValidationException(
+        message="response_format must be one of b64_json, base64, url",
+        param="response_format",
+        code="invalid_response_format",
+    )
+def response_field_name(response_format: str) -> str:
+    """获取响应字段名"""
+    return {"url": "url", "base64": "base64"}.get(response_format, "b64_json")
+def resolve_aspect_ratio(size: str) -> str:
+    """Map OpenAI size to Grok Imagine aspect ratio."""
+    value = (size or "").strip()
+    if not value:
+        return "2:3"
+    if value in SIZE_TO_ASPECT:
+        return SIZE_TO_ASPECT[value]
+    if ":" in value:
+        try:
+            left, right = value.split(":", 1)
+            left_i = int(left.strip())
+            right_i = int(right.strip())
+            if left_i > 0 and right_i > 0:
+                ratio = f"{left_i}:{right_i}"
+                if ratio in ALLOWED_ASPECT_RATIOS:
+                    return ratio
+        except (TypeError, ValueError):
+            pass
+    return "2:3"
+def validate_edit_request(request: ImageEditRequest, images: List[UploadFile]):
+    """验证图片编辑请求参数"""
+    if request.model != "grok-imagine-1.0-edit":
+        raise ValidationException(
+            message=("The model `grok-imagine-1.0-edit` is required for image edits."),
+            param="model",
+            code="model_not_supported",
+        )
+    model_info = ModelService.get(request.model)
+    if not model_info or not model_info.is_image_edit:
+        edit_models = [m.model_id for m in ModelService.MODELS if m.is_image_edit]
+        raise ValidationException(
+            message=(
+                f"The model `{request.model}` is not supported for image edits. "
+                f"Supported: {edit_models}"
+            ),
+            param="model",
+            code="model_not_supported",
+        )
+    _validate_common_request(request, allow_ws_stream=False)
+    if not images:
+        raise ValidationException(
+            message="Image is required",
+            param="image",
+            code="missing_image",
+        )
+    if len(images) > 16:
+        raise ValidationException(
+            message="Too many images. Maximum is 16.",
+            param="image",
+            code="invalid_image_count",
+        )
+async def _get_token(model: str):
+    """获取可用 token"""
+    token_mgr = await get_token_manager()
+    await token_mgr.reload_if_stale()
+    token = None
+    for pool_name in ModelService.pool_candidates_for_model(model):
+        token = token_mgr.get_token(pool_name)
+        if token:
+            break
+    if not token:
+        raise AppException(
+            message="No available tokens. Please try again later.",
+            error_type=ErrorType.RATE_LIMIT.value,
+            code="rate_limit_exceeded",
+            status_code=429,
+        )
+    return token_mgr, token
+@router.post("/images/generations")
+async def create_image(request: ImageGenerationRequest):
+    """
+    Image Generation API
+    流式响应格式:
+    - event: image_generation.partial_image
+    - event: image_generation.completed
+    非流式响应格式:
+    - {"created": ..., "data": [{"b64_json": "..."}], "usage": {...}}
+    """
+    # stream 默认为 false
+    if request.stream is None:
+        request.stream = False
+    if request.response_format is None:
+        request.response_format = resolve_response_format(None)
+    # 参数验证
+    validate_generation_request(request)
+    # 兼容 base64/b64_json
+    if request.response_format == "base64":
+        request.response_format = "b64_json"
+    response_format = resolve_response_format(request.response_format)
+    response_field = response_field_name(response_format)
+    # 获取 token 和模型信息
+    token_mgr, token = await _get_token(request.model)
+    model_info = ModelService.get(request.model)
+    aspect_ratio = resolve_aspect_ratio(request.size)
+    result = await ImageGenerationService().generate(
+        token_mgr=token_mgr,
+        token=token,
+        model_info=model_info,
+        prompt=request.prompt,
+        n=request.n,
+        response_format=response_format,
+        size=request.size,
+        aspect_ratio=aspect_ratio,
+        stream=bool(request.stream),
+    )
+    if result.stream:
+        return StreamingResponse(
+            result.data,
+            media_type="text/event-stream",
+            headers={"Cache-Control": "no-cache", "Connection": "keep-alive"},
+        )
+    data = [{response_field: img} for img in result.data]
+    usage = result.usage_override or {
+        "total_tokens": 0,
+        "input_tokens": 0,
+        "output_tokens": 0,
+        "input_tokens_details": {"text_tokens": 0, "image_tokens": 0},
+    }
+    return JSONResponse(
+        content={
+            "created": int(time.time()),
+            "data": data,
+            "usage": usage,
+        }
+    )
+@router.post("/images/edits")
+async def edit_image(
+    prompt: str = Form(...),
+    image: List[UploadFile] = File(...),
+    model: Optional[str] = Form("grok-imagine-1.0-edit"),
+    n: int = Form(1),
+    size: str = Form("1024x1024"),
+    quality: str = Form("standard"),
+    response_format: Optional[str] = Form(None),
+    style: Optional[str] = Form(None),
+    stream: Optional[bool] = Form(False),
+):
+    """
+    Image Edits API
+    同官方 API 格式，仅支持 multipart/form-data 文件上传
+    """
+    if response_format is None:
+        response_format = resolve_response_format(None)
+    try:
+        edit_request = ImageEditRequest(
+            prompt=prompt,
+            model=model,
+            n=n,
+            size=size,
+            quality=quality,
+            response_format=response_format,
+            style=style,
+            stream=stream,
+        )
+    except ValidationError as exc:
+        errors = exc.errors()
+        if errors:
+            first = errors[0]
+            loc = first.get("loc", [])
+            msg = first.get("msg", "Invalid request")
+            code = first.get("type", "invalid_value")
+            param_parts = [
+                str(x) for x in loc if not (isinstance(x, int) or str(x).isdigit())
+            ]
+            param = ".".join(param_parts) if param_parts else None
+            raise ValidationException(message=msg, param=param, code=code)
+        raise ValidationException(message="Invalid request", code="invalid_value")
+    if edit_request.stream is None:
+        edit_request.stream = False
+    response_format = resolve_response_format(edit_request.response_format)
+    if response_format == "base64":
+        response_format = "b64_json"
+    edit_request.response_format = response_format
+    response_field = response_field_name(response_format)
+    # 参数验证
+    validate_edit_request(edit_request, image)
+    max_image_bytes = 50 * 1024 * 1024
+    allowed_types = {"image/png", "image/jpeg", "image/webp", "image/jpg"}
+    images: List[str] = []
+    for item in image:
+        content = await item.read()
+        await item.close()
+        if not content:
+            raise ValidationException(
+                message="File content is empty",
+                param="image",
+                code="empty_file",
+            )
+        if len(content) > max_image_bytes:
+            raise ValidationException(
+                message="Image file too large. Maximum is 50MB.",
+                param="image",
+                code="file_too_large",
+            )
+        mime = (item.content_type or "").lower()
+        if mime == "image/jpg":
+            mime = "image/jpeg"
+        ext = Path(item.filename or "").suffix.lower()
+        if mime not in allowed_types:
+            if ext in (".jpg", ".jpeg"):
+                mime = "image/jpeg"
+            elif ext == ".png":
+                mime = "image/png"
+            elif ext == ".webp":
+                mime = "image/webp"
+            else:
+                raise ValidationException(
+                    message="Unsupported image type. Supported: png, jpg, webp.",
+                    param="image",
+                    code="invalid_image_type",
+                )
+        b64 = base64.b64encode(content).decode()
+        images.append(f"data:{mime};base64,{b64}")
+    # 获取 token 和模型信息
+    token_mgr, token = await _get_token(edit_request.model)
+    model_info = ModelService.get(edit_request.model)
+    result = await ImageEditService().edit(
+        token_mgr=token_mgr,
+        token=token,
+        model_info=model_info,
+        prompt=edit_request.prompt,
+        images=images,
+        n=edit_request.n,
+        response_format=response_format,
+        stream=bool(edit_request.stream),
+    )
+    if result.stream:
+        return StreamingResponse(
+            result.data,
+            media_type="text/event-stream",
+            headers={"Cache-Control": "no-cache", "Connection": "keep-alive"},
+        )
+    data = [{response_field: img} for img in result.data]
+    return JSONResponse(
+        content={
+            "created": int(time.time()),
+            "data": data,
+            "usage": {
+                "total_tokens": 0,
+                "input_tokens": 0,
+                "output_tokens": 0,
+                "input_tokens_details": {"text_tokens": 0, "image_tokens": 0},
+            },
+        }
+    )
+__all__ = ["router"]

app/api/v1/models.py ADDED Viewed

	@@ -0,0 +1,28 @@

+"""
+Models API 路由
+"""
+from fastapi import APIRouter
+from app.services.grok.services.model import ModelService
+router = APIRouter(tags=["Models"])
+@router.get("/models")
+async def list_models():
+    """OpenAI 兼容 models 列表接口"""
+    data = [
+        {
+            "id": m.model_id,
+            "object": "model",
+            "created": 0,
+            "owned_by": "grok2api@chenyme",
+        }
+        for m in ModelService.list()
+    ]
+    return {"object": "list", "data": data}
+__all__ = ["router"]

app/api/v1/public_api/__init__.py ADDED Viewed

	@@ -0,0 +1,18 @@

+"""Public API router (public_key protected)."""
+from fastapi import APIRouter, Depends
+from app.api.v1.chat import router as chat_router
+from app.api.v1.public_api.imagine import router as imagine_router
+from app.api.v1.public_api.video import router as video_router
+from app.api.v1.public_api.voice import router as voice_router
+from app.core.auth import verify_public_key
+router = APIRouter()
+router.include_router(chat_router, dependencies=[Depends(verify_public_key)])
+router.include_router(imagine_router)
+router.include_router(video_router)
+router.include_router(voice_router)
+__all__ = ["router"]

app/api/v1/public_api/imagine.py ADDED Viewed

	@@ -0,0 +1,505 @@

+import asyncio
+import time
+import uuid
+from typing import Optional, List, Dict, Any
+import orjson
+from fastapi import APIRouter, Depends, HTTPException, Query, Request, WebSocket, WebSocketDisconnect
+from fastapi.responses import StreamingResponse
+from pydantic import BaseModel
+from app.core.auth import verify_public_key, get_public_api_key, is_public_enabled
+from app.core.config import get_config
+from app.core.logger import logger
+from app.api.v1.image import resolve_aspect_ratio
+from app.services.grok.services.image import ImageGenerationService
+from app.services.grok.services.model import ModelService
+from app.services.token.manager import get_token_manager
+router = APIRouter()
+IMAGINE_SESSION_TTL = 600
+_IMAGINE_SESSIONS: dict[str, dict] = {}
+_IMAGINE_SESSIONS_LOCK = asyncio.Lock()
+async def _clean_sessions(now: float) -> None:
+    expired = [
+        key
+        for key, info in _IMAGINE_SESSIONS.items()
+        if now - float(info.get("created_at") or 0) > IMAGINE_SESSION_TTL
+    ]
+    for key in expired:
+        _IMAGINE_SESSIONS.pop(key, None)
+def _parse_sse_chunk(chunk: str) -> Optional[Dict[str, Any]]:
+    if not chunk:
+        return None
+    event = None
+    data_lines: List[str] = []
+    for raw in str(chunk).splitlines():
+        line = raw.strip()
+        if not line:
+            continue
+        if line.startswith("event:"):
+            event = line[6:].strip()
+            continue
+        if line.startswith("data:"):
+            data_lines.append(line[5:].strip())
+    if not data_lines:
+        return None
+    data_str = "\n".join(data_lines)
+    if data_str == "[DONE]":
+        return None
+    try:
+        payload = orjson.loads(data_str)
+    except orjson.JSONDecodeError:
+        return None
+    if event and isinstance(payload, dict) and "type" not in payload:
+        payload["type"] = event
+    return payload
+async def _new_session(prompt: str, aspect_ratio: str, nsfw: Optional[bool]) -> str:
+    task_id = uuid.uuid4().hex
+    now = time.time()
+    async with _IMAGINE_SESSIONS_LOCK:
+        await _clean_sessions(now)
+        _IMAGINE_SESSIONS[task_id] = {
+            "prompt": prompt,
+            "aspect_ratio": aspect_ratio,
+            "nsfw": nsfw,
+            "created_at": now,
+        }
+    return task_id
+async def _get_session(task_id: str) -> Optional[dict]:
+    if not task_id:
+        return None
+    now = time.time()
+    async with _IMAGINE_SESSIONS_LOCK:
+        await _clean_sessions(now)
+        info = _IMAGINE_SESSIONS.get(task_id)
+        if not info:
+            return None
+        created_at = float(info.get("created_at") or 0)
+        if now - created_at > IMAGINE_SESSION_TTL:
+            _IMAGINE_SESSIONS.pop(task_id, None)
+            return None
+        return dict(info)
+async def _drop_session(task_id: str) -> None:
+    if not task_id:
+        return
+    async with _IMAGINE_SESSIONS_LOCK:
+        _IMAGINE_SESSIONS.pop(task_id, None)
+async def _drop_sessions(task_ids: List[str]) -> int:
+    if not task_ids:
+        return 0
+    removed = 0
+    async with _IMAGINE_SESSIONS_LOCK:
+        for task_id in task_ids:
+            if task_id and task_id in _IMAGINE_SESSIONS:
+                _IMAGINE_SESSIONS.pop(task_id, None)
+                removed += 1
+    return removed
+@router.websocket("/imagine/ws")
+async def public_imagine_ws(websocket: WebSocket):
+    session_id = None
+    task_id = websocket.query_params.get("task_id")
+    if task_id:
+        info = await _get_session(task_id)
+        if info:
+            session_id = task_id
+    ok = True
+    if session_id is None:
+        public_key = get_public_api_key()
+        public_enabled = is_public_enabled()
+        if not public_key:
+            ok = public_enabled
+        else:
+            key = websocket.query_params.get("public_key")
+            ok = key == public_key
+    if not ok:
+        await websocket.close(code=1008)
+        return
+    await websocket.accept()
+    stop_event = asyncio.Event()
+    run_task: Optional[asyncio.Task] = None
+    async def _send(payload: dict) -> bool:
+        try:
+            await websocket.send_text(orjson.dumps(payload).decode())
+            return True
+        except Exception:
+            return False
+    async def _stop_run():
+        nonlocal run_task
+        stop_event.set()
+        if run_task and not run_task.done():
+            run_task.cancel()
+            try:
+                await run_task
+            except Exception:
+                pass
+        run_task = None
+        stop_event.clear()
+    async def _run(prompt: str, aspect_ratio: str, nsfw: Optional[bool]):
+        model_id = "grok-imagine-1.0"
+        model_info = ModelService.get(model_id)
+        if not model_info or not model_info.is_image:
+            await _send(
+                {
+                    "type": "error",
+                    "message": "Image model is not available.",
+                    "code": "model_not_supported",
+                }
+            )
+            return
+        token_mgr = await get_token_manager()
+        run_id = uuid.uuid4().hex
+        await _send(
+            {
+                "type": "status",
+                "status": "running",
+                "prompt": prompt,
+                "aspect_ratio": aspect_ratio,
+                "run_id": run_id,
+            }
+        )
+        while not stop_event.is_set():
+            try:
+                await token_mgr.reload_if_stale()
+                token = None
+                for pool_name in ModelService.pool_candidates_for_model(
+                    model_info.model_id
+                ):
+                    token = token_mgr.get_token(pool_name)
+                    if token:
+                        break
+                if not token:
+                    await _send(
+                        {
+                            "type": "error",
+                            "message": "No available tokens. Please try again later.",
+                            "code": "rate_limit_exceeded",
+                        }
+                    )
+                    await asyncio.sleep(2)
+                    continue
+                result = await ImageGenerationService().generate(
+                    token_mgr=token_mgr,
+                    token=token,
+                    model_info=model_info,
+                    prompt=prompt,
+                    n=6,
+                    response_format="b64_json",
+                    size="1024x1024",
+                    aspect_ratio=aspect_ratio,
+                    stream=True,
+                    enable_nsfw=nsfw,
+                )
+                if result.stream:
+                    async for chunk in result.data:
+                        payload = _parse_sse_chunk(chunk)
+                        if not payload:
+                            continue
+                        if isinstance(payload, dict):
+                            payload.setdefault("run_id", run_id)
+                        await _send(payload)
+                else:
+                    images = [img for img in result.data if img and img != "error"]
+                    if images:
+                        for img_b64 in images:
+                            await _send(
+                                {
+                                    "type": "image",
+                                    "b64_json": img_b64,
+                                    "created_at": int(time.time() * 1000),
+                                    "aspect_ratio": aspect_ratio,
+                                    "run_id": run_id,
+                                }
+                            )
+                    else:
+                        await _send(
+                            {
+                                "type": "error",
+                                "message": "Image generation returned empty data.",
+                                "code": "empty_image",
+                            }
+                        )
+            except asyncio.CancelledError:
+                break
+            except Exception as e:
+                logger.warning(f"Imagine stream error: {e}")
+                await _send(
+                    {
+                        "type": "error",
+                        "message": str(e),
+                        "code": "internal_error",
+                    }
+                )
+                await asyncio.sleep(1.5)
+        await _send({"type": "status", "status": "stopped", "run_id": run_id})
+    try:
+        while True:
+            try:
+                raw = await websocket.receive_text()
+            except (RuntimeError, WebSocketDisconnect):
+                break
+            try:
+                payload = orjson.loads(raw)
+            except Exception:
+                await _send(
+                    {
+                        "type": "error",
+                        "message": "Invalid message format.",
+                        "code": "invalid_payload",
+                    }
+                )
+                continue
+            action = payload.get("type")
+            if action == "start":
+                prompt = str(payload.get("prompt") or "").strip()
+                if not prompt:
+                    await _send(
+                        {
+                            "type": "error",
+                            "message": "Prompt cannot be empty.",
+                            "code": "invalid_prompt",
+                        }
+                    )
+                    continue
+                aspect_ratio = resolve_aspect_ratio(
+                    str(payload.get("aspect_ratio") or "2:3").strip() or "2:3"
+                )
+                nsfw = payload.get("nsfw")
+                if nsfw is not None:
+                    nsfw = bool(nsfw)
+                await _stop_run()
+                run_task = asyncio.create_task(_run(prompt, aspect_ratio, nsfw))
+            elif action == "stop":
+                await _stop_run()
+            else:
+                await _send(
+                    {
+                        "type": "error",
+                        "message": "Unknown action.",
+                        "code": "invalid_action",
+                    }
+                )
+    except WebSocketDisconnect:
+        logger.debug("WebSocket disconnected by client")
+    except Exception as e:
+        logger.warning(f"WebSocket error: {e}")
+    finally:
+        await _stop_run()
+        try:
+            from starlette.websockets import WebSocketState
+            if websocket.client_state == WebSocketState.CONNECTED:
+                await websocket.close(code=1000, reason="Server closing connection")
+        except Exception as e:
+            logger.debug(f"WebSocket close ignored: {e}")
+        if session_id:
+            await _drop_session(session_id)
+@router.get("/imagine/sse")
+async def public_imagine_sse(
+    request: Request,
+    task_id: str = Query(""),
+    prompt: str = Query(""),
+    aspect_ratio: str = Query("2:3"),
+):
+    """Imagine 图片瀑布流（SSE 兜底）"""
+    session = None
+    if task_id:
+        session = await _get_session(task_id)
+        if not session:
+            raise HTTPException(status_code=404, detail="Task not found")
+    else:
+        public_key = get_public_api_key()
+        public_enabled = is_public_enabled()
+        if not public_key:
+            if not public_enabled:
+                raise HTTPException(status_code=401, detail="Public access is disabled")
+        else:
+            key = request.query_params.get("public_key")
+            if key != public_key:
+                raise HTTPException(status_code=401, detail="Invalid authentication token")
+    if session:
+        prompt = str(session.get("prompt") or "").strip()
+        ratio = str(session.get("aspect_ratio") or "2:3").strip() or "2:3"
+        nsfw = session.get("nsfw")
+    else:
+        prompt = (prompt or "").strip()
+        if not prompt:
+            raise HTTPException(status_code=400, detail="Prompt cannot be empty")
+        ratio = str(aspect_ratio or "2:3").strip() or "2:3"
+        ratio = resolve_aspect_ratio(ratio)
+        nsfw = request.query_params.get("nsfw")
+        if nsfw is not None:
+            nsfw = str(nsfw).lower() in ("1", "true", "yes", "on")
+    async def event_stream():
+        try:
+            model_id = "grok-imagine-1.0"
+            model_info = ModelService.get(model_id)
+            if not model_info or not model_info.is_image:
+                yield (
+                    f"data: {orjson.dumps({'type': 'error', 'message': 'Image model is not available.', 'code': 'model_not_supported'}).decode()}\n\n"
+                )
+                return
+            token_mgr = await get_token_manager()
+            sequence = 0
+            run_id = uuid.uuid4().hex
+            yield (
+                f"data: {orjson.dumps({'type': 'status', 'status': 'running', 'prompt': prompt, 'aspect_ratio': ratio, 'run_id': run_id}).decode()}\n\n"
+            )
+            while True:
+                if await request.is_disconnected():
+                    break
+                if task_id:
+                    session_alive = await _get_session(task_id)
+                    if not session_alive:
+                        break
+                try:
+                    await token_mgr.reload_if_stale()
+                    token = None
+                    for pool_name in ModelService.pool_candidates_for_model(
+                        model_info.model_id
+                    ):
+                        token = token_mgr.get_token(pool_name)
+                        if token:
+                            break
+                    if not token:
+                        yield (
+                            f"data: {orjson.dumps({'type': 'error', 'message': 'No available tokens. Please try again later.', 'code': 'rate_limit_exceeded'}).decode()}\n\n"
+                        )
+                        await asyncio.sleep(2)
+                        continue
+                    result = await ImageGenerationService().generate(
+                        token_mgr=token_mgr,
+                        token=token,
+                        model_info=model_info,
+                        prompt=prompt,
+                        n=6,
+                        response_format="b64_json",
+                        size="1024x1024",
+                        aspect_ratio=ratio,
+                        stream=True,
+                        enable_nsfw=nsfw,
+                    )
+                    if result.stream:
+                        async for chunk in result.data:
+                            payload = _parse_sse_chunk(chunk)
+                            if not payload:
+                                continue
+                            if isinstance(payload, dict):
+                                payload.setdefault("run_id", run_id)
+                            yield f"data: {orjson.dumps(payload).decode()}\n\n"
+                    else:
+                        images = [img for img in result.data if img and img != "error"]
+                        if images:
+                            for img_b64 in images:
+                                sequence += 1
+                                payload = {
+                                    "type": "image",
+                                    "b64_json": img_b64,
+                                    "sequence": sequence,
+                                    "created_at": int(time.time() * 1000),
+                                    "aspect_ratio": ratio,
+                                    "run_id": run_id,
+                                }
+                                yield f"data: {orjson.dumps(payload).decode()}\n\n"
+                        else:
+                            yield (
+                                f"data: {orjson.dumps({'type': 'error', 'message': 'Image generation returned empty data.', 'code': 'empty_image'}).decode()}\n\n"
+                            )
+                except asyncio.CancelledError:
+                    break
+                except Exception as e:
+                    logger.warning(f"Imagine SSE error: {e}")
+                    yield (
+                        f"data: {orjson.dumps({'type': 'error', 'message': str(e), 'code': 'internal_error'}).decode()}\n\n"
+                    )
+                    await asyncio.sleep(1.5)
+            yield (
+                f"data: {orjson.dumps({'type': 'status', 'status': 'stopped', 'run_id': run_id}).decode()}\n\n"
+            )
+        finally:
+            if task_id:
+                await _drop_session(task_id)
+    return StreamingResponse(
+        event_stream(),
+        media_type="text/event-stream",
+        headers={"Cache-Control": "no-cache", "Connection": "keep-alive"},
+    )
+@router.get("/imagine/config")
+async def public_imagine_config():
+    return {
+        "final_min_bytes": int(get_config("image.final_min_bytes") or 0),
+        "medium_min_bytes": int(get_config("image.medium_min_bytes") or 0),
+        "nsfw": bool(get_config("image.nsfw")),
+    }
+class ImagineStartRequest(BaseModel):
+    prompt: str
+    aspect_ratio: Optional[str] = "2:3"
+    nsfw: Optional[bool] = None
+@router.post("/imagine/start", dependencies=[Depends(verify_public_key)])
+async def public_imagine_start(data: ImagineStartRequest):
+    prompt = (data.prompt or "").strip()
+    if not prompt:
+        raise HTTPException(status_code=400, detail="Prompt cannot be empty")
+    ratio = resolve_aspect_ratio(str(data.aspect_ratio or "2:3").strip() or "2:3")
+    task_id = await _new_session(prompt, ratio, data.nsfw)
+    return {"task_id": task_id, "aspect_ratio": ratio}
+class ImagineStopRequest(BaseModel):
+    task_ids: List[str]
+@router.post("/imagine/stop", dependencies=[Depends(verify_public_key)])
+async def public_imagine_stop(data: ImagineStopRequest):
+    removed = await _drop_sessions(data.task_ids or [])
+    return {"status": "success", "removed": removed}

app/api/v1/public_api/video.py ADDED Viewed

	@@ -0,0 +1,274 @@

+import asyncio
+import time
+import uuid
+from typing import Optional, List, Dict, Any
+import orjson
+from fastapi import APIRouter, Depends, HTTPException, Query, Request
+from fastapi.responses import StreamingResponse
+from pydantic import BaseModel
+from app.core.auth import verify_public_key
+from app.core.logger import logger
+from app.services.grok.services.video import VideoService
+from app.services.grok.services.model import ModelService
+router = APIRouter()
+VIDEO_SESSION_TTL = 600
+_VIDEO_SESSIONS: dict[str, dict] = {}
+_VIDEO_SESSIONS_LOCK = asyncio.Lock()
+_VIDEO_RATIO_MAP = {
+    "1280x720": "16:9",
+    "720x1280": "9:16",
+    "1792x1024": "3:2",
+    "1024x1792": "2:3",
+    "1024x1024": "1:1",
+    "16:9": "16:9",
+    "9:16": "9:16",
+    "3:2": "3:2",
+    "2:3": "2:3",
+    "1:1": "1:1",
+}
+async def _clean_sessions(now: float) -> None:
+    expired = [
+        key
+        for key, info in _VIDEO_SESSIONS.items()
+        if now - float(info.get("created_at") or 0) > VIDEO_SESSION_TTL
+    ]
+    for key in expired:
+        _VIDEO_SESSIONS.pop(key, None)
+async def _new_session(
+    prompt: str,
+    aspect_ratio: str,
+    video_length: int,
+    resolution_name: str,
+    preset: str,
+    image_url: Optional[str],
+    reasoning_effort: Optional[str],
+) -> str:
+    task_id = uuid.uuid4().hex
+    now = time.time()
+    async with _VIDEO_SESSIONS_LOCK:
+        await _clean_sessions(now)
+        _VIDEO_SESSIONS[task_id] = {
+            "prompt": prompt,
+            "aspect_ratio": aspect_ratio,
+            "video_length": video_length,
+            "resolution_name": resolution_name,
+            "preset": preset,
+            "image_url": image_url,
+            "reasoning_effort": reasoning_effort,
+            "created_at": now,
+        }
+    return task_id
+async def _get_session(task_id: str) -> Optional[dict]:
+    if not task_id:
+        return None
+    now = time.time()
+    async with _VIDEO_SESSIONS_LOCK:
+        await _clean_sessions(now)
+        info = _VIDEO_SESSIONS.get(task_id)
+        if not info:
+            return None
+        created_at = float(info.get("created_at") or 0)
+        if now - created_at > VIDEO_SESSION_TTL:
+            _VIDEO_SESSIONS.pop(task_id, None)
+            return None
+        return dict(info)
+async def _drop_session(task_id: str) -> None:
+    if not task_id:
+        return
+    async with _VIDEO_SESSIONS_LOCK:
+        _VIDEO_SESSIONS.pop(task_id, None)
+async def _drop_sessions(task_ids: List[str]) -> int:
+    if not task_ids:
+        return 0
+    removed = 0
+    async with _VIDEO_SESSIONS_LOCK:
+        for task_id in task_ids:
+            if task_id and task_id in _VIDEO_SESSIONS:
+                _VIDEO_SESSIONS.pop(task_id, None)
+                removed += 1
+    return removed
+def _normalize_ratio(value: Optional[str]) -> str:
+    raw = (value or "").strip()
+    return _VIDEO_RATIO_MAP.get(raw, "")
+def _validate_image_url(image_url: str) -> None:
+    value = (image_url or "").strip()
+    if not value:
+        return
+    if value.startswith("data:"):
+        return
+    if value.startswith("http://") or value.startswith("https://"):
+        return
+    raise HTTPException(
+        status_code=400,
+        detail="image_url must be a URL or data URI (data:<mime>;base64,...)",
+    )
+class VideoStartRequest(BaseModel):
+    prompt: str
+    aspect_ratio: Optional[str] = "3:2"
+    video_length: Optional[int] = 6
+    resolution_name: Optional[str] = "480p"
+    preset: Optional[str] = "normal"
+    image_url: Optional[str] = None
+    reasoning_effort: Optional[str] = None
+@router.post("/video/start", dependencies=[Depends(verify_public_key)])
+async def public_video_start(data: VideoStartRequest):
+    prompt = (data.prompt or "").strip()
+    if not prompt:
+        raise HTTPException(status_code=400, detail="Prompt cannot be empty")
+    aspect_ratio = _normalize_ratio(data.aspect_ratio)
+    if not aspect_ratio:
+        raise HTTPException(
+            status_code=400,
+            detail="aspect_ratio must be one of ['16:9','9:16','3:2','2:3','1:1']",
+        )
+    video_length = int(data.video_length or 6)
+    if video_length not in (6, 10, 15):
+        raise HTTPException(
+            status_code=400, detail="video_length must be 6, 10, or 15 seconds"
+        )
+    resolution_name = str(data.resolution_name or "480p")
+    if resolution_name not in ("480p", "720p"):
+        raise HTTPException(
+            status_code=400,
+            detail="resolution_name must be one of ['480p','720p']",
+        )
+    preset = str(data.preset or "normal")
+    if preset not in ("fun", "normal", "spicy", "custom"):
+        raise HTTPException(
+            status_code=400,
+            detail="preset must be one of ['fun','normal','spicy','custom']",
+        )
+    image_url = (data.image_url or "").strip() or None
+    if image_url:
+        _validate_image_url(image_url)
+    reasoning_effort = (data.reasoning_effort or "").strip() or None
+    if reasoning_effort:
+        allowed = {"none", "minimal", "low", "medium", "high", "xhigh"}
+        if reasoning_effort not in allowed:
+            raise HTTPException(
+                status_code=400,
+                detail=f"reasoning_effort must be one of {sorted(allowed)}",
+            )
+    task_id = await _new_session(
+        prompt,
+        aspect_ratio,
+        video_length,
+        resolution_name,
+        preset,
+        image_url,
+        reasoning_effort,
+    )
+    return {"task_id": task_id, "aspect_ratio": aspect_ratio}
+@router.get("/video/sse")
+async def public_video_sse(request: Request, task_id: str = Query("")):
+    session = await _get_session(task_id)
+    if not session:
+        raise HTTPException(status_code=404, detail="Task not found")
+    prompt = str(session.get("prompt") or "").strip()
+    aspect_ratio = str(session.get("aspect_ratio") or "3:2")
+    video_length = int(session.get("video_length") or 6)
+    resolution_name = str(session.get("resolution_name") or "480p")
+    preset = str(session.get("preset") or "normal")
+    image_url = session.get("image_url")
+    reasoning_effort = session.get("reasoning_effort")
+    async def event_stream():
+        try:
+            model_id = "grok-imagine-1.0-video"
+            model_info = ModelService.get(model_id)
+            if not model_info or not model_info.is_video:
+                payload = {
+                    "error": "Video model is not available.",
+                    "code": "model_not_supported",
+                }
+                yield f"data: {orjson.dumps(payload).decode()}\n\n"
+                yield "data: [DONE]\n\n"
+                return
+            if image_url:
+                messages: List[Dict[str, Any]] = [
+                    {
+                        "role": "user",
+                        "content": [
+                            {"type": "text", "text": prompt},
+                            {"type": "image_url", "image_url": {"url": image_url}},
+                        ],
+                    }
+                ]
+            else:
+                messages = [{"role": "user", "content": prompt}]
+            stream = await VideoService.completions(
+                model_id,
+                messages,
+                stream=True,
+                reasoning_effort=reasoning_effort,
+                aspect_ratio=aspect_ratio,
+                video_length=video_length,
+                resolution=resolution_name,
+                preset=preset,
+            )
+            async for chunk in stream:
+                if await request.is_disconnected():
+                    break
+                yield chunk
+        except Exception as e:
+            logger.warning(f"Public video SSE error: {e}")
+            payload = {"error": str(e), "code": "internal_error"}
+            yield f"data: {orjson.dumps(payload).decode()}\n\n"
+            yield "data: [DONE]\n\n"
+        finally:
+            await _drop_session(task_id)
+    return StreamingResponse(
+        event_stream(),
+        media_type="text/event-stream",
+        headers={"Cache-Control": "no-cache", "Connection": "keep-alive"},
+    )
+class VideoStopRequest(BaseModel):
+    task_ids: List[str]
+@router.post("/video/stop", dependencies=[Depends(verify_public_key)])
+async def public_video_stop(data: VideoStopRequest):
+    removed = await _drop_sessions(data.task_ids or [])
+    return {"status": "success", "removed": removed}
+__all__ = ["router"]

app/api/v1/public_api/voice.py ADDED Viewed

	@@ -0,0 +1,80 @@

+from fastapi import APIRouter, Depends
+from pydantic import BaseModel
+from app.core.auth import verify_public_key
+from app.core.exceptions import AppException
+from app.services.grok.services.voice import VoiceService
+from app.services.token.manager import get_token_manager
+router = APIRouter()
+class VoiceTokenResponse(BaseModel):
+    token: str
+    url: str
+    participant_name: str = ""
+    room_name: str = ""
+@router.get(
+    "/voice/token",
+    dependencies=[Depends(verify_public_key)],
+    response_model=VoiceTokenResponse,
+)
+async def public_voice_token(
+    voice: str = "ara",
+    personality: str = "assistant",
+    speed: float = 1.0,
+):
+    """获取 Grok Voice Mode (LiveKit) Token"""
+    token_mgr = await get_token_manager()
+    sso_token = None
+    for pool_name in ("ssoBasic", "ssoSuper"):
+        sso_token = token_mgr.get_token(pool_name)
+        if sso_token:
+            break
+    if not sso_token:
+        raise AppException(
+            "No available tokens for voice mode",
+            code="no_token",
+            status_code=503,
+        )
+    service = VoiceService()
+    try:
+        data = await service.get_token(
+            token=sso_token,
+            voice=voice,
+            personality=personality,
+            speed=speed,
+        )
+        token = data.get("token")
+        if not token:
+            raise AppException(
+                "Upstream returned no voice token",
+                code="upstream_error",
+                status_code=502,
+            )
+        return VoiceTokenResponse(
+            token=token,
+            url="wss://livekit.grok.com",
+            participant_name="",
+            room_name="",
+        )
+    except Exception as e:
+        if isinstance(e, AppException):
+            raise
+        raise AppException(
+            f"Voice token error: {str(e)}",
+            code="voice_error",
+            status_code=500,
+        )
+@router.get("/verify", dependencies=[Depends(verify_public_key)])
+async def public_verify_api():
+    """验证 Public Key"""
+    return {"status": "success"}

app/api/v1/response.py ADDED Viewed

	@@ -0,0 +1,81 @@

+"""
+Responses API 路由 (OpenAI compatible).
+"""
+from typing import Any, Dict, List, Optional, Union
+from fastapi import APIRouter
+from fastapi.responses import JSONResponse, StreamingResponse
+from pydantic import BaseModel, Field
+from app.core.exceptions import ValidationException
+from app.services.grok.services.responses import ResponsesService
+router = APIRouter(tags=["Responses"])
+class ResponseCreateRequest(BaseModel):
+    model: str = Field(..., description="Model name")
+    input: Optional[Any] = Field(None, description="Input content")
+    instructions: Optional[str] = Field(None, description="System instructions")
+    stream: Optional[bool] = Field(False, description="Stream response")
+    max_output_tokens: Optional[int] = Field(None, description="Max output tokens")
+    temperature: Optional[float] = Field(None, description="Sampling temperature")
+    top_p: Optional[float] = Field(None, description="Nucleus sampling")
+    tools: Optional[List[Dict[str, Any]]] = Field(None, description="Tool definitions")
+    tool_choice: Optional[Union[str, Dict[str, Any]]] = Field(None, description="Tool choice")
+    parallel_tool_calls: Optional[bool] = Field(True, description="Allow parallel tool calls")
+    reasoning: Optional[Dict[str, Any]] = Field(None, description="Reasoning options")
+    metadata: Optional[Dict[str, Any]] = Field(None, description="Metadata")
+    user: Optional[str] = Field(None, description="User identifier")
+    store: Optional[bool] = Field(None, description="Store response")
+    previous_response_id: Optional[str] = Field(None, description="Previous response id")
+    truncation: Optional[str] = Field(None, description="Truncation behavior")
+    class Config:
+        extra = "allow"
+@router.post("/responses")
+async def create_response(request: ResponseCreateRequest):
+    if not request.model:
+        raise ValidationException(message="model is required", param="model", code="invalid_request_error")
+    if request.input is None:
+        raise ValidationException(message="input is required", param="input", code="invalid_request_error")
+    reasoning_effort = None
+    if isinstance(request.reasoning, dict):
+        reasoning_effort = request.reasoning.get("effort") or request.reasoning.get("reasoning_effort")
+    result = await ResponsesService.create(
+        model=request.model,
+        input_value=request.input,
+        instructions=request.instructions,
+        stream=bool(request.stream),
+        temperature=request.temperature,
+        top_p=request.top_p,
+        tools=request.tools,
+        tool_choice=request.tool_choice,
+        parallel_tool_calls=request.parallel_tool_calls,
+        reasoning_effort=reasoning_effort,
+        max_output_tokens=request.max_output_tokens,
+        metadata=request.metadata,
+        user=request.user,
+        store=request.store,
+        previous_response_id=request.previous_response_id,
+        truncation=request.truncation,
+    )
+    if request.stream:
+        return StreamingResponse(
+            result,
+            media_type="text/event-stream",
+            headers={"Cache-Control": "no-cache", "Connection": "keep-alive"},
+        )
+    return JSONResponse(content=result)
+__all__ = ["router"]

app/api/v1/video.py ADDED Viewed

	@@ -0,0 +1,3 @@

+"""
+TODO：Video Generation API 路由
+"""

app/core/auth.py ADDED Viewed

	@@ -0,0 +1,198 @@

+"""
+API 认证模块
+"""
+import hashlib
+from typing import Optional, Iterable
+from fastapi import HTTPException, status, Security
+from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials
+from app.core.config import get_config
+DEFAULT_API_KEY = ""
+DEFAULT_APP_KEY = "grok2api"
+DEFAULT_PUBLIC_KEY = ""
+DEFAULT_PUBLIC_ENABLED = False
+# 定义 Bearer Scheme
+security = HTTPBearer(
+    auto_error=False,
+    scheme_name="API Key",
+    description="Enter your API Key in the format: Bearer <key>",
+)
+def get_admin_api_key() -> str:
+    """
+    获取后台 API Key。
+    为空时表示不启用后台接口认证。
+    """
+    api_key = get_config("app.api_key", DEFAULT_API_KEY)
+    return api_key or ""
+def _normalize_api_keys(value: Optional[object]) -> list[str]:
+    if not value:
+        return []
+    if isinstance(value, str):
+        raw = value.strip()
+        if not raw:
+            return []
+        return [part.strip() for part in raw.split(",") if part.strip()]
+    if isinstance(value, Iterable):
+        keys: list[str] = []
+        for item in value:
+            if not item:
+                continue
+            if isinstance(item, str):
+                stripped = item.strip()
+                if stripped:
+                    keys.append(stripped)
+        return keys
+    return []
+def get_app_key() -> str:
+    """
+    获取 App Key（后台管理密码）。
+    """
+    app_key = get_config("app.app_key", DEFAULT_APP_KEY)
+    return app_key or ""
+def get_public_api_key() -> str:
+    """
+    获取 Public API Key。
+    为空时表示不启用 public 接口认证。
+    """
+    public_key = get_config("app.public_key", DEFAULT_PUBLIC_KEY)
+    return public_key or ""
+def is_public_enabled() -> bool:
+    """
+    是否开启 public 功能入口。
+    """
+    return bool(get_config("app.public_enabled", DEFAULT_PUBLIC_ENABLED))
+def _hash_public_key(key: str) -> str:
+    """计算 public_key 的 SHA-256 哈希，与前端 hashPublicKey 保持一致。"""
+    return hashlib.sha256(f"grok2api-public:{key}".encode()).hexdigest()
+def _match_public_key(credentials: str, public_key: str) -> bool:
+    """检查凭证是否匹配 public_key（支持原始值和 public-<sha256> 哈希格式）。"""
+    if not public_key:
+        return False
+    normalized = public_key.strip()
+    if not normalized:
+        return False
+    if credentials == normalized:
+        return True
+    if credentials.startswith("public-"):
+        expected_hash = _hash_public_key(normalized)
+        if credentials == f"public-{expected_hash}":
+            return True
+    return False
+async def verify_api_key(
+    auth: Optional[HTTPAuthorizationCredentials] = Security(security),
+) -> Optional[str]:
+    """
+    验证 Bearer Token
+    如果 config.toml 中未配置 api_key，则不启用认证。
+    """
+    api_key = get_admin_api_key()
+    api_keys = _normalize_api_keys(api_key)
+    if not api_keys:
+        return None
+    if not auth:
+        raise HTTPException(
+            status_code=status.HTTP_401_UNAUTHORIZED,
+            detail="Missing authentication token",
+            headers={"WWW-Authenticate": "Bearer"},
+        )
+    # 标准 api_key 验证
+    if auth.credentials in api_keys:
+        return auth.credentials
+    raise HTTPException(
+        status_code=status.HTTP_401_UNAUTHORIZED,
+        detail="Invalid authentication token",
+        headers={"WWW-Authenticate": "Bearer"},
+    )
+async def verify_app_key(
+    auth: Optional[HTTPAuthorizationCredentials] = Security(security),
+) -> Optional[str]:
+    """
+    验证后台登录密钥（app_key）。
+    app_key 必须配置，否则拒绝登录。
+    """
+    app_key = get_app_key()
+    if not app_key:
+        raise HTTPException(
+            status_code=status.HTTP_401_UNAUTHORIZED,
+            detail="App key is not configured",
+            headers={"WWW-Authenticate": "Bearer"},
+        )
+    if not auth:
+        raise HTTPException(
+            status_code=status.HTTP_401_UNAUTHORIZED,
+            detail="Missing authentication token",
+            headers={"WWW-Authenticate": "Bearer"},
+        )
+    if auth.credentials != app_key:
+        raise HTTPException(
+            status_code=status.HTTP_401_UNAUTHORIZED,
+            detail="Invalid authentication token",
+            headers={"WWW-Authenticate": "Bearer"},
+        )
+    return auth.credentials
+async def verify_public_key(
+    auth: Optional[HTTPAuthorizationCredentials] = Security(security),
+) -> Optional[str]:
+    """
+    验证 Public Key（public 接口使用）。
+    默认不公开，需配置 public_key 才能访问；若开启 public_enabled 且未配置 public_key，则放开访问。
+    """
+    public_key = get_public_api_key()
+    public_enabled = is_public_enabled()
+    if not public_key:
+        if public_enabled:
+            return None
+        raise HTTPException(
+            status_code=status.HTTP_401_UNAUTHORIZED,
+            detail="Public access is disabled",
+            headers={"WWW-Authenticate": "Bearer"},
+        )
+    if not auth:
+        raise HTTPException(
+            status_code=status.HTTP_401_UNAUTHORIZED,
+            detail="Missing authentication token",
+            headers={"WWW-Authenticate": "Bearer"},
+        )
+    if _match_public_key(auth.credentials, public_key):
+        return auth.credentials
+    raise HTTPException(
+        status_code=status.HTTP_401_UNAUTHORIZED,
+        detail="Invalid authentication token",
+        headers={"WWW-Authenticate": "Bearer"},
+    )

app/core/batch.py ADDED Viewed

	@@ -0,0 +1,233 @@

+"""
+Batch utilities.
+- run_batch: generic batch concurrency runner
+- BatchTask: SSE task manager for admin batch operations
+"""
+import asyncio
+import time
+import uuid
+from typing import Any, Awaitable, Callable, Dict, List, Optional, TypeVar
+from app.core.logger import logger
+T = TypeVar("T")
+async def run_batch(
+    items: List[str],
+    worker: Callable[[str], Awaitable[T]],
+    *,
+    batch_size: int = 50,
+    task: Optional["BatchTask"] = None,
+    on_item: Optional[Callable[[str, Dict[str, Any]], Awaitable[None]]] = None,
+    should_cancel: Optional[Callable[[], bool]] = None,
+) -> Dict[str, Dict[str, Any]]:
+    """
+    分批并发执行，单项失败不影响整体
+    Args:
+        items: 待处理项列表
+        worker: 异步处理函数
+        batch_size: 每批大小
+    Returns:
+        {item: {"ok": bool, "data": ..., "error": ...}}
+    """
+    try:
+        batch_size = int(batch_size)
+    except Exception:
+        batch_size = 50
+    batch_size = max(1, batch_size)
+    async def _one(item: str) -> tuple[str, dict]:
+        if (should_cancel and should_cancel()) or (task and task.cancelled):
+            return item, {"ok": False, "error": "cancelled", "cancelled": True}
+        try:
+            data = await worker(item)
+            result = {"ok": True, "data": data}
+            if task:
+                task.record(True)
+            if on_item:
+                try:
+                    await on_item(item, result)
+                except Exception:
+                    pass
+            return item, result
+        except Exception as e:
+            logger.warning(f"Batch item failed: {item[:16]}... - {e}")
+            result = {"ok": False, "error": str(e)}
+            if task:
+                task.record(False, error=str(e))
+            if on_item:
+                try:
+                    await on_item(item, result)
+                except Exception:
+                    pass
+            return item, result
+    results: Dict[str, dict] = {}
+    # 分批执行，避免一次性创建所有 task
+    for i in range(0, len(items), batch_size):
+        if (should_cancel and should_cancel()) or (task and task.cancelled):
+            break
+        chunk = items[i : i + batch_size]
+        pairs = await asyncio.gather(*(_one(x) for x in chunk))
+        results.update(dict(pairs))
+    return results
+class BatchTask:
+    def __init__(self, total: int):
+        self.id = uuid.uuid4().hex
+        self.total = int(total)
+        self.processed = 0
+        self.ok = 0
+        self.fail = 0
+        self.status = "running"
+        self.warning: Optional[str] = None
+        self.result: Optional[Dict[str, Any]] = None
+        self.error: Optional[str] = None
+        self.created_at = time.time()
+        self._queues: List[asyncio.Queue] = []
+        self._final_event: Optional[Dict[str, Any]] = None
+        self.cancelled = False
+    def snapshot(self) -> Dict[str, Any]:
+        return {
+            "task_id": self.id,
+            "status": self.status,
+            "total": self.total,
+            "processed": self.processed,
+            "ok": self.ok,
+            "fail": self.fail,
+            "warning": self.warning,
+        }
+    def attach(self) -> asyncio.Queue:
+        q: asyncio.Queue = asyncio.Queue(maxsize=200)
+        self._queues.append(q)
+        return q
+    def detach(self, q: asyncio.Queue) -> None:
+        if q in self._queues:
+            self._queues.remove(q)
+    def _publish(self, event: Dict[str, Any]) -> None:
+        for q in list(self._queues):
+            try:
+                q.put_nowait(event)
+            except Exception:
+                # Drop if queue is full or closed
+                pass
+    def record(
+        self, ok: bool, *, item: Any = None, detail: Any = None, error: str = ""
+    ) -> None:
+        self.processed += 1
+        if ok:
+            self.ok += 1
+        else:
+            self.fail += 1
+        event: Dict[str, Any] = {
+            "type": "progress",
+            "task_id": self.id,
+            "total": self.total,
+            "processed": self.processed,
+            "ok": self.ok,
+            "fail": self.fail,
+        }
+        if item is not None:
+            event["item"] = item
+        if detail is not None:
+            event["detail"] = detail
+        if error:
+            event["error"] = error
+        self._publish(event)
+    def finish(self, result: Dict[str, Any], *, warning: Optional[str] = None) -> None:
+        self.status = "done"
+        self.result = result
+        self.warning = warning
+        event = {
+            "type": "done",
+            "task_id": self.id,
+            "total": self.total,
+            "processed": self.processed,
+            "ok": self.ok,
+            "fail": self.fail,
+            "warning": self.warning,
+            "result": result,
+        }
+        self._final_event = event
+        self._publish(event)
+    def fail_task(self, error: str) -> None:
+        self.status = "error"
+        self.error = error
+        event = {
+            "type": "error",
+            "task_id": self.id,
+            "total": self.total,
+            "processed": self.processed,
+            "ok": self.ok,
+            "fail": self.fail,
+            "error": error,
+        }
+        self._final_event = event
+        self._publish(event)
+    def cancel(self) -> None:
+        self.cancelled = True
+    def finish_cancelled(self) -> None:
+        self.status = "cancelled"
+        event = {
+            "type": "cancelled",
+            "task_id": self.id,
+            "total": self.total,
+            "processed": self.processed,
+            "ok": self.ok,
+            "fail": self.fail,
+        }
+        self._final_event = event
+        self._publish(event)
+    def final_event(self) -> Optional[Dict[str, Any]]:
+        return self._final_event
+_TASKS: Dict[str, BatchTask] = {}
+def create_task(total: int) -> BatchTask:
+    task = BatchTask(total)
+    _TASKS[task.id] = task
+    return task
+def get_task(task_id: str) -> Optional[BatchTask]:
+    return _TASKS.get(task_id)
+def delete_task(task_id: str) -> None:
+    _TASKS.pop(task_id, None)
+async def expire_task(task_id: str, delay: int = 300) -> None:
+    await asyncio.sleep(delay)
+    delete_task(task_id)
+__all__ = [
+    "run_batch",
+    "BatchTask",
+    "create_task",
+    "get_task",
+    "delete_task",
+    "expire_task",
+]

app/core/config.py ADDED Viewed

	@@ -0,0 +1,326 @@

+"""
+配置管理
+- config.toml: 运行时配置
+- config.defaults.toml: 默认配置基线
+"""
+from copy import deepcopy
+from pathlib import Path
+from typing import Any, Dict
+import tomllib
+from app.core.logger import logger
+DEFAULT_CONFIG_FILE = Path(__file__).parent.parent.parent / "config.defaults.toml"
+def _deep_merge(base: Dict[str, Any], override: Dict[str, Any]) -> Dict[str, Any]:
+    """深度合并字典: override 覆盖 base."""
+    if not isinstance(base, dict):
+        return deepcopy(override) if isinstance(override, dict) else deepcopy(base)
+    result = deepcopy(base)
+    if not isinstance(override, dict):
+        return result
+    for key, val in override.items():
+        if isinstance(val, dict) and isinstance(result.get(key), dict):
+            result[key] = _deep_merge(result[key], val)
+        else:
+            result[key] = val
+    return result
+def _migrate_deprecated_config(
+    config: Dict[str, Any], valid_sections: set
+) -> tuple[Dict[str, Any], set]:
+    """
+    迁移废弃的配置节到新配置结构
+    Returns:
+        (迁移后的配置, 废弃的配置节集合)
+    """
+    # 配置映射规则：旧配置 -> 新配置
+    MIGRATION_MAP = {
+        # grok.* -> 对应的新配置节
+        "grok.temporary": "app.temporary",
+        "grok.disable_memory": "app.disable_memory",
+        "grok.stream": "app.stream",
+        "grok.thinking": "app.thinking",
+        "grok.dynamic_statsig": "app.dynamic_statsig",
+        "grok.filter_tags": "app.filter_tags",
+        "grok.timeout": "voice.timeout",
+        "grok.base_proxy_url": "proxy.base_proxy_url",
+        "grok.asset_proxy_url": "proxy.asset_proxy_url",
+        "network.base_proxy_url": "proxy.base_proxy_url",
+        "network.asset_proxy_url": "proxy.asset_proxy_url",
+        "grok.cf_clearance": "proxy.cf_clearance",
+        "grok.browser": "proxy.browser",
+        "grok.user_agent": "proxy.user_agent",
+        "security.cf_clearance": "proxy.cf_clearance",
+        "security.browser": "proxy.browser",
+        "security.user_agent": "proxy.user_agent",
+        "grok.max_retry": "retry.max_retry",
+        "grok.retry_status_codes": "retry.retry_status_codes",
+        "grok.retry_backoff_base": "retry.retry_backoff_base",
+        "grok.retry_backoff_factor": "retry.retry_backoff_factor",
+        "grok.retry_backoff_max": "retry.retry_backoff_max",
+        "grok.retry_budget": "retry.retry_budget",
+        "grok.video_idle_timeout": "video.stream_timeout",
+        "grok.image_ws_nsfw": "image.nsfw",
+        "grok.image_ws_blocked_seconds": "image.final_timeout",
+        "grok.image_ws_final_min_bytes": "image.final_min_bytes",
+        "grok.image_ws_medium_min_bytes": "image.medium_min_bytes",
+        # legacy sections
+        "network.base_proxy_url": "proxy.base_proxy_url",
+        "network.asset_proxy_url": "proxy.asset_proxy_url",
+        "network.timeout": [
+            "chat.timeout",
+            "image.timeout",
+            "video.timeout",
+            "voice.timeout",
+        ],
+        "security.cf_clearance": "proxy.cf_clearance",
+        "security.browser": "proxy.browser",
+        "security.user_agent": "proxy.user_agent",
+        "timeout.stream_idle_timeout": [
+            "chat.stream_timeout",
+            "image.stream_timeout",
+            "video.stream_timeout",
+        ],
+        "timeout.video_idle_timeout": "video.stream_timeout",
+        "image.image_ws_nsfw": "image.nsfw",
+        "image.image_ws_blocked_seconds": "image.final_timeout",
+        "image.image_ws_final_min_bytes": "image.final_min_bytes",
+        "image.image_ws_medium_min_bytes": "image.medium_min_bytes",
+        "performance.assets_max_concurrent": [
+            "asset.upload_concurrent",
+            "asset.download_concurrent",
+            "asset.list_concurrent",
+            "asset.delete_concurrent",
+        ],
+        "performance.assets_delete_batch_size": "asset.delete_batch_size",
+        "performance.assets_batch_size": "asset.list_batch_size",
+        "performance.media_max_concurrent": ["chat.concurrent", "video.concurrent"],
+        "performance.usage_max_concurrent": "usage.concurrent",
+        "performance.usage_batch_size": "usage.batch_size",
+        "performance.nsfw_max_concurrent": "nsfw.concurrent",
+        "performance.nsfw_batch_size": "nsfw.batch_size",
+    }
+    deprecated_sections = set(config.keys()) - valid_sections
+    if not deprecated_sections:
+        return config, set()
+    result = {k: deepcopy(v) for k, v in config.items() if k in valid_sections}
+    migrated_count = 0
+    # 处理废弃配置节或旧配置键
+    for old_section, old_values in config.items():
+        if not isinstance(old_values, dict):
+            continue
+        for old_key, old_value in old_values.items():
+            old_path = f"{old_section}.{old_key}"
+            new_paths = MIGRATION_MAP.get(old_path)
+            if not new_paths:
+                continue
+            if isinstance(new_paths, str):
+                new_paths = [new_paths]
+            for new_path in new_paths:
+                try:
+                    new_section, new_key = new_path.split(".", 1)
+                    if new_section not in result:
+                        result[new_section] = {}
+                    if new_key not in result[new_section]:
+                        result[new_section][new_key] = old_value
+                    migrated_count += 1
+                    logger.debug(
+                        f"Migrated config: {old_path} -> {new_path} = {old_value}"
+                    )
+                except Exception as e:
+                    logger.warning(
+                        f"Skip config migration for {old_path}: {e}"
+                    )
+                    continue
+            if isinstance(result.get(old_section), dict):
+                result[old_section].pop(old_key, None)
+    # 兼容旧 chat.* 配置键迁移到 app.*
+    legacy_chat_map = {
+        "temporary": "temporary",
+        "disable_memory": "disable_memory",
+        "stream": "stream",
+        "thinking": "thinking",
+        "dynamic_statsig": "dynamic_statsig",
+        "filter_tags": "filter_tags",
+    }
+    chat_section = config.get("chat")
+    if isinstance(chat_section, dict):
+        app_section = result.setdefault("app", {})
+        for old_key, new_key in legacy_chat_map.items():
+            if old_key in chat_section and new_key not in app_section:
+                app_section[new_key] = chat_section[old_key]
+                if isinstance(result.get("chat"), dict):
+                    result["chat"].pop(old_key, None)
+                migrated_count += 1
+                logger.debug(
+                    f"Migrated config: chat.{old_key} -> app.{new_key} = {chat_section[old_key]}"
+                )
+    if migrated_count > 0:
+        logger.info(
+            f"Migrated {migrated_count} config items from deprecated/legacy sections"
+        )
+    return result, deprecated_sections
+def _load_defaults() -> Dict[str, Any]:
+    """加载默认配置文件"""
+    if not DEFAULT_CONFIG_FILE.exists():
+        return {}
+    try:
+        with DEFAULT_CONFIG_FILE.open("rb") as f:
+            return tomllib.load(f)
+    except Exception as e:
+        logger.warning(f"Failed to load defaults from {DEFAULT_CONFIG_FILE}: {e}")
+        return {}
+class Config:
+    """配置管理器"""
+    _instance = None
+    _config = {}
+    def __init__(self):
+        self._config = {}
+        self._defaults = {}
+        self._code_defaults = {}
+        self._defaults_loaded = False
+    def register_defaults(self, defaults: Dict[str, Any]):
+        """注册代码中定义的默认值"""
+        self._code_defaults = _deep_merge(self._code_defaults, defaults)
+    def _ensure_defaults(self):
+        if self._defaults_loaded:
+            return
+        file_defaults = _load_defaults()
+        # 合并文件默认值和代码默认值（代码默认值优先级更低）
+        self._defaults = _deep_merge(self._code_defaults, file_defaults)
+        self._defaults_loaded = True
+    async def load(self):
+        """显式加载配置"""
+        try:
+            from app.core.storage import get_storage, LocalStorage
+            self._ensure_defaults()
+            storage = get_storage()
+            config_data = await storage.load_config()
+            from_remote = True
+            # 从本地 data/config.toml 初始化后端
+            if config_data is None:
+                local_storage = LocalStorage()
+                from_remote = False
+                try:
+                    # 尝试读取本地配置
+                    config_data = await local_storage.load_config()
+                except Exception as e:
+                    logger.info(f"Failed to auto-init config from local: {e}")
+                    config_data = {}
+            config_data = config_data or {}
+            # 检查是否有废弃的配置节
+            valid_sections = set(self._defaults.keys())
+            config_data, deprecated_sections = _migrate_deprecated_config(
+                config_data, valid_sections
+            )
+            if deprecated_sections:
+                logger.info(
+                    f"Cleaned deprecated config sections: {deprecated_sections}"
+                )
+            merged = _deep_merge(self._defaults, config_data)
+            # 自动回填缺失配置到存储
+            # 或迁移了配置后需要更新
+            # 保护：当远程存储返回 None 且本地也没有可迁移配置时，不覆盖远程配置，避免误重置。
+            has_local_seed = bool(config_data)
+            allow_bootstrap_empty_remote = (
+                (not from_remote) and has_local_seed
+            )
+            should_persist = (
+                allow_bootstrap_empty_remote
+                or (merged != config_data and bool(config_data))
+                or deprecated_sections
+            )
+            if should_persist:
+                async with storage.acquire_lock("config_save", timeout=10):
+                    await storage.save_config(merged)
+                if not from_remote and has_local_seed:
+                    logger.info(
+                        f"Initialized remote storage ({storage.__class__.__name__}) with config baseline."
+                    )
+                if deprecated_sections:
+                    logger.info("Configuration automatically migrated and cleaned.")
+            elif not from_remote and not has_local_seed:
+                logger.warning(
+                    "Skip persisting defaults: empty config source detected, keep runtime merged config only."
+                )
+            self._config = merged
+        except Exception as e:
+            logger.error(f"Error loading config: {e}")
+            self._config = {}
+    def get(self, key: str, default: Any = None) -> Any:
+        """
+        获取配置值
+        Args:
+            key: 配置键，格式 "section.key"
+            default: 默认值
+        """
+        if "." in key:
+            try:
+                section, attr = key.split(".", 1)
+                return self._config.get(section, {}).get(attr, default)
+            except (ValueError, AttributeError):
+                return default
+        return self._config.get(key, default)
+    async def update(self, new_config: dict):
+        """更新配置"""
+        from app.core.storage import get_storage
+        storage = get_storage()
+        async with storage.acquire_lock("config_save", timeout=10):
+            self._ensure_defaults()
+            base = _deep_merge(self._defaults, self._config or {})
+            merged = _deep_merge(base, new_config or {})
+            await storage.save_config(merged)
+            self._config = merged
+# 全局配置实例
+config = Config()
+def get_config(key: str, default: Any = None) -> Any:
+    """获取配置"""
+    return config.get(key, default)
+def register_defaults(defaults: Dict[str, Any]):
+    """注册默认配置"""
+    config.register_defaults(defaults)
+__all__ = ["Config", "config", "get_config", "register_defaults"]

app/core/exceptions.py ADDED Viewed

	@@ -0,0 +1,232 @@

+"""
+全局异常处理 - OpenAI 兼容错误格式
+"""
+from typing import Any
+from enum import Enum
+from fastapi import Request, HTTPException
+from fastapi.responses import JSONResponse
+from fastapi.exceptions import RequestValidationError
+from app.core.logger import logger
+# ============= 错误类型 =============
+class ErrorType(str, Enum):
+    """OpenAI 错误类型"""
+    INVALID_REQUEST = "invalid_request_error"
+    AUTHENTICATION = "authentication_error"
+    PERMISSION = "permission_error"
+    NOT_FOUND = "not_found_error"
+    RATE_LIMIT = "rate_limit_error"
+    SERVER = "server_error"
+    SERVICE_UNAVAILABLE = "service_unavailable_error"
+# ============= 辅助函数 =============
+def error_response(
+    message: str,
+    error_type: str = ErrorType.INVALID_REQUEST.value,
+    param: str = None,
+    code: str = None,
+) -> dict:
+    """构建 OpenAI 错误响应"""
+    return {
+        "error": {"message": message, "type": error_type, "param": param, "code": code}
+    }
+# ============= 异常类 =============
+class AppException(Exception):
+    """应用基础异常"""
+    def __init__(
+        self,
+        message: str,
+        error_type: str = ErrorType.SERVER.value,
+        code: str = None,
+        param: str = None,
+        status_code: int = 500,
+    ):
+        self.message = message
+        self.error_type = error_type
+        self.code = code
+        self.param = param
+        self.status_code = status_code
+        super().__init__(message)
+class ValidationException(AppException):
+    """验证错误"""
+    def __init__(self, message: str, param: str = None, code: str = None):
+        super().__init__(
+            message=message,
+            error_type=ErrorType.INVALID_REQUEST.value,
+            code=code or "invalid_value",
+            param=param,
+            status_code=400,
+        )
+class AuthenticationException(AppException):
+    """认证错误"""
+    def __init__(self, message: str = "Invalid API key"):
+        super().__init__(
+            message=message,
+            error_type=ErrorType.AUTHENTICATION.value,
+            code="invalid_api_key",
+            status_code=401,
+        )
+class UpstreamException(AppException):
+    """上游服务错误"""
+    def __init__(self, message: str, details: Any = None):
+        super().__init__(
+            message=message,
+            error_type=ErrorType.SERVER.value,
+            code="upstream_error",
+            status_code=502,
+        )
+        self.details = details
+class StreamIdleTimeoutError(Exception):
+    """流空闲超时错误"""
+    def __init__(self, idle_seconds: float):
+        self.idle_seconds = idle_seconds
+        super().__init__(f"Stream idle timeout after {idle_seconds}s")
+# ============= 异常处理器 =============
+async def app_exception_handler(request: Request, exc: AppException) -> JSONResponse:
+    """处理应用异常"""
+    logger.warning(f"AppException: {exc.error_type} - {exc.message}")
+    return JSONResponse(
+        status_code=exc.status_code,
+        content=error_response(
+            message=exc.message,
+            error_type=exc.error_type,
+            param=exc.param,
+            code=exc.code,
+        ),
+    )
+async def http_exception_handler(request: Request, exc: HTTPException) -> JSONResponse:
+    """处理 HTTP 异常"""
+    type_map = {
+        400: ErrorType.INVALID_REQUEST.value,
+        401: ErrorType.AUTHENTICATION.value,
+        403: ErrorType.PERMISSION.value,
+        404: ErrorType.NOT_FOUND.value,
+        429: ErrorType.RATE_LIMIT.value,
+    }
+    error_type = type_map.get(exc.status_code, ErrorType.SERVER.value)
+    # 默认 code 映射
+    code_map = {
+        401: "invalid_api_key",
+        403: "insufficient_quota",
+        404: "model_not_found",
+        429: "rate_limit_exceeded",
+    }
+    code = code_map.get(exc.status_code, None)
+    logger.warning(f"HTTPException: {exc.status_code} - {exc.detail}")
+    return JSONResponse(
+        status_code=exc.status_code,
+        content=error_response(
+            message=str(exc.detail), error_type=error_type, code=code
+        ),
+    )
+async def validation_exception_handler(
+    request: Request, exc: RequestValidationError
+) -> JSONResponse:
+    """处理验证错误"""
+    errors = exc.errors()
+    if errors:
+        first = errors[0]
+        loc = first.get("loc", [])
+        msg = first.get("msg", "Invalid request")
+        code = first.get("type", "invalid_value")
+        # JSON 解析错误
+        if code == "json_invalid" or "JSON" in msg:
+            message = "Invalid JSON in request body. Please check for trailing commas or syntax errors."
+            param = "body"
+        else:
+            param_parts = [
+                str(x) for x in loc if not (isinstance(x, int) or str(x).isdigit())
+            ]
+            param = ".".join(param_parts) if param_parts else None
+            message = msg
+    else:
+        param, message, code = None, "Invalid request", "invalid_value"
+    logger.warning(f"ValidationError: {param} - {message}")
+    return JSONResponse(
+        status_code=400,
+        content=error_response(
+            message=message,
+            error_type=ErrorType.INVALID_REQUEST.value,
+            param=param,
+            code=code,
+        ),
+    )
+async def generic_exception_handler(request: Request, exc: Exception) -> JSONResponse:
+    """处理未捕获异常"""
+    logger.exception(f"Unhandled: {type(exc).__name__}: {str(exc)}")
+    return JSONResponse(
+        status_code=500,
+        content=error_response(
+            message="Internal server error",
+            error_type=ErrorType.SERVER.value,
+            code="internal_error",
+        ),
+    )
+# ============= 注册 =============
+def register_exception_handlers(app):
+    """注册异常处理器"""
+    app.add_exception_handler(AppException, app_exception_handler)
+    app.add_exception_handler(HTTPException, http_exception_handler)
+    app.add_exception_handler(RequestValidationError, validation_exception_handler)
+    app.add_exception_handler(Exception, generic_exception_handler)
+__all__ = [
+    "ErrorType",
+    "AppException",
+    "ValidationException",
+    "AuthenticationException",
+    "UpstreamException",
+    "StreamIdleTimeoutError",
+    "error_response",
+    "register_exception_handlers",
+]

app/core/logger.py ADDED Viewed

	@@ -0,0 +1,151 @@

+"""
+结构化 JSON 日志 - 极简格式
+"""
+import sys
+import os
+import json
+import traceback
+from pathlib import Path
+from loguru import logger
+# Provide logging.Logger compatibility for legacy calls
+if not hasattr(logger, "isEnabledFor"):
+    logger.isEnabledFor = lambda _level: True
+# 日志目录
+DEFAULT_LOG_DIR = Path(__file__).parent.parent.parent / "logs"
+LOG_DIR = Path(os.getenv("LOG_DIR", str(DEFAULT_LOG_DIR)))
+_LOG_DIR_READY = False
+def _prepare_log_dir() -> bool:
+    """确保日志目录可用"""
+    global LOG_DIR, _LOG_DIR_READY
+    if _LOG_DIR_READY:
+        return True
+    try:
+        LOG_DIR.mkdir(parents=True, exist_ok=True)
+        _LOG_DIR_READY = True
+        return True
+    except Exception:
+        _LOG_DIR_READY = False
+        return False
+def _format_json(record) -> str:
+    """格式化日志"""
+    # ISO8601 时间
+    time_str = record["time"].strftime("%Y-%m-%dT%H:%M:%S.%f")[:-3]
+    tz = record["time"].strftime("%z")
+    if tz:
+        time_str += tz[:3] + ":" + tz[3:]
+    log_entry = {
+        "time": time_str,
+        "level": record["level"].name.lower(),
+        "msg": record["message"],
+        "caller": f"{record['file'].name}:{record['line']}",
+    }
+    # trace 上下文
+    extra = record["extra"]
+    if extra.get("traceID"):
+        log_entry["traceID"] = extra["traceID"]
+    if extra.get("spanID"):
+        log_entry["spanID"] = extra["spanID"]
+    # 其他 extra 字段
+    for key, value in extra.items():
+        if key not in ("traceID", "spanID") and not key.startswith("_"):
+            log_entry[key] = value
+    # 错误及以上级别添加堆栈跟踪
+    if record["level"].no >= 40 and record["exception"]:
+        log_entry["stacktrace"] = "".join(
+            traceback.format_exception(
+                record["exception"].type,
+                record["exception"].value,
+                record["exception"].traceback,
+            )
+        )
+    return json.dumps(log_entry, ensure_ascii=False)
+def _env_flag(name: str, default: bool) -> bool:
+    raw = os.getenv(name)
+    if raw is None:
+        return default
+    return raw.strip().lower() in ("1", "true", "yes", "on", "y")
+def _make_json_sink(output):
+    """创建 JSON sink"""
+    def sink(message):
+        json_str = _format_json(message.record)
+        print(json_str, file=output, flush=True)
+    return sink
+def _file_json_sink(message):
+    """写入日志文件"""
+    record = message.record
+    json_str = _format_json(record)
+    log_file = LOG_DIR / f"app_{record['time'].strftime('%Y-%m-%d')}.log"
+    with open(log_file, "a", encoding="utf-8") as f:
+        f.write(json_str + "\n")
+def setup_logging(
+    level: str = "DEBUG",
+    json_console: bool = True,
+    file_logging: bool = True,
+):
+    """设置日志配置"""
+    logger.remove()
+    file_logging = _env_flag("LOG_FILE_ENABLED", file_logging)
+    # 控制台输出
+    if json_console:
+        logger.add(
+            _make_json_sink(sys.stdout),
+            level=level,
+            format="{message}",
+            colorize=False,
+        )
+    else:
+        logger.add(
+            sys.stdout,
+            level=level,
+            format="<green>{time:YYYY-MM-DD HH:mm:ss}</green> | <level>{level: <8}</level> | <cyan>{file.name}:{line}</cyan> - <level>{message}</level>",
+            colorize=True,
+        )
+    # 文件输出
+    if file_logging:
+        if _prepare_log_dir():
+            logger.add(
+                _file_json_sink,
+                level=level,
+                format="{message}",
+                enqueue=True,
+            )
+        else:
+            logger.warning("File logging disabled: no writable log directory.")
+    return logger
+def get_logger(trace_id: str = "", span_id: str = ""):
+    """获取绑定了 trace 上下文的 logger"""
+    bound = {}
+    if trace_id:
+        bound["traceID"] = trace_id
+    if span_id:
+        bound["spanID"] = span_id
+    return logger.bind(**bound) if bound else logger
+__all__ = ["logger", "setup_logging", "get_logger", "LOG_DIR"]

app/core/response_middleware.py ADDED Viewed

	@@ -0,0 +1,85 @@

+"""
+响应中间件
+Response Middleware
+用于记录请求日志、生成 TraceID 和计算请求耗时
+"""
+import time
+import uuid
+from starlette.middleware.base import BaseHTTPMiddleware
+from starlette.requests import Request
+from app.core.logger import logger
+class ResponseLoggerMiddleware(BaseHTTPMiddleware):
+    """
+    请求日志/响应追踪中间件
+    Request Logging and Response Tracking Middleware
+    """
+    async def dispatch(self, request: Request, call_next):
+        # 生成请求 ID
+        trace_id = str(uuid.uuid4())
+        request.state.trace_id = trace_id
+        start_time = time.time()
+        path = request.url.path
+        if path.startswith("/static/") or path in (
+            "/",
+            "/login",
+            "/imagine",
+            "/voice",
+            "/admin",
+            "/admin/login",
+            "/admin/config",
+            "/admin/cache",
+            "/admin/token",
+        ):
+            return await call_next(request)
+        # 记录请求信息
+        logger.info(
+            f"Request: {request.method} {request.url.path}",
+            extra={
+                "traceID": trace_id,
+                "method": request.method,
+                "path": request.url.path,
+            },
+        )
+        try:
+            response = await call_next(request)
+            # 计算耗时
+            duration = (time.time() - start_time) * 1000
+            # 记录响应信息
+            logger.info(
+                f"Response: {request.method} {request.url.path} - {response.status_code} ({duration:.2f}ms)",
+                extra={
+                    "traceID": trace_id,
+                    "method": request.method,
+                    "path": request.url.path,
+                    "status": response.status_code,
+                    "duration_ms": round(duration, 2),
+                },
+            )
+            return response
+        except Exception as e:
+            duration = (time.time() - start_time) * 1000
+            logger.error(
+                f"Response Error: {request.method} {request.url.path} - {str(e)} ({duration:.2f}ms)",
+                extra={
+                    "traceID": trace_id,
+                    "method": request.method,
+                    "path": request.url.path,
+                    "duration_ms": round(duration, 2),
+                    "error": str(e),
+                },
+            )
+            raise e

app/core/storage.py ADDED Viewed

	@@ -0,0 +1,1478 @@

+"""
+统一存储服务 (Professional Storage Service)
+支持 Local (TOML), Redis, MySQL, PostgreSQL
+特性:
+- 全异步 I/O (Async I/O)
+- 连接池管理 (Connection Pooling)
+- 分布式/本地锁 (Distributed/Local Locking)
+- 内存优化 (序列化性能优化)
+"""
+import abc
+import os
+import asyncio
+import hashlib
+import time
+import tomllib
+from typing import Any, ClassVar, Dict, Optional
+from pathlib import Path
+from enum import Enum
+try:
+    import fcntl
+except ImportError:  # pragma: no cover - non-posix platforms
+    fcntl = None
+from contextlib import asynccontextmanager
+import orjson
+import aiofiles
+from app.core.logger import logger
+# 数据目录（支持通过环境变量覆盖）
+DEFAULT_DATA_DIR = Path(__file__).parent.parent.parent / "data"
+DATA_DIR = Path(os.getenv("DATA_DIR", str(DEFAULT_DATA_DIR))).expanduser()
+# 配置文件路径
+CONFIG_FILE = DATA_DIR / "config.toml"
+TOKEN_FILE = DATA_DIR / "token.json"
+LOCK_DIR = DATA_DIR / ".locks"
+# JSON 序列化优化助手函数
+def json_dumps(obj: Any) -> str:
+    return orjson.dumps(obj).decode("utf-8")
+def json_loads(obj: str | bytes) -> Any:
+    return orjson.loads(obj)
+def json_dumps_sorted(obj: Any) -> str:
+    return orjson.dumps(obj, option=orjson.OPT_SORT_KEYS).decode("utf-8")
+class StorageError(Exception):
+    """存储服务基础异常"""
+    pass
+class BaseStorage(abc.ABC):
+    """存储基类"""
+    @abc.abstractmethod
+    async def load_config(self) -> Dict[str, Any]:
+        """加载配置"""
+        pass
+    @abc.abstractmethod
+    async def save_config(self, data: Dict[str, Any]):
+        """保存配置"""
+        pass
+    @abc.abstractmethod
+    async def load_tokens(self) -> Dict[str, Any]:
+        """加载所有 Token"""
+        pass
+    @abc.abstractmethod
+    async def save_tokens(self, data: Dict[str, Any]):
+        """保存所有 Token"""
+        pass
+    async def save_tokens_delta(
+        self, updated: list[Dict[str, Any]], deleted: Optional[list[str]] = None
+    ):
+        """增量保存 Token（默认回退到全量保存）"""
+        existing = await self.load_tokens() or {}
+        deleted_set = set(deleted or [])
+        if deleted_set:
+            for pool_name, tokens in list(existing.items()):
+                if not isinstance(tokens, list):
+                    continue
+                filtered = []
+                for item in tokens:
+                    if isinstance(item, str):
+                        token_str = item
+                    elif isinstance(item, dict):
+                        token_str = item.get("token")
+                    else:
+                        token_str = None
+                    if token_str and token_str in deleted_set:
+                        continue
+                    filtered.append(item)
+                existing[pool_name] = filtered
+        for item in updated or []:
+            if not isinstance(item, dict):
+                continue
+            pool_name = item.get("pool_name")
+            token_str = item.get("token")
+            if not pool_name or not token_str:
+                continue
+            pool_list = existing.setdefault(pool_name, [])
+            normalized = {
+                k: v
+                for k, v in item.items()
+                if k not in ("pool_name", "_update_kind")
+            }
+            replaced = False
+            for idx, current in enumerate(pool_list):
+                if isinstance(current, str):
+                    if current == token_str:
+                        pool_list[idx] = normalized
+                        replaced = True
+                        break
+                elif isinstance(current, dict) and current.get("token") == token_str:
+                    pool_list[idx] = normalized
+                    replaced = True
+                    break
+            if not replaced:
+                pool_list.append(normalized)
+        await self.save_tokens(existing)
+    @abc.abstractmethod
+    async def close(self):
+        """关闭资源"""
+        pass
+    @asynccontextmanager
+    async def acquire_lock(self, name: str, timeout: int = 10):
+        """
+        获取锁 (互斥访问)
+        用于读写操作的临界区保护
+        Args:
+            name: 锁名称
+            timeout: 超时时间 (秒)
+        """
+        # 默认空实现，用于 fallback
+        yield
+    async def verify_connection(self) -> bool:
+        """健康检查"""
+        return True
+class LocalStorage(BaseStorage):
+    """
+    本地文件存储
+    - 使用 aiofiles 进行异步 I/O
+    - 使用 asyncio.Lock 进行进程内并发控制
+    - 如果需要多进程安全，需要系统级文件锁 (fcntl)
+    """
+    def __init__(self):
+        self._lock = asyncio.Lock()
+    @asynccontextmanager
+    async def acquire_lock(self, name: str, timeout: int = 10):
+        if fcntl is None:
+            try:
+                async with asyncio.timeout(timeout):
+                    async with self._lock:
+                        yield
+            except asyncio.TimeoutError:
+                logger.warning(f"LocalStorage: 获取锁 '{name}' 超时 ({timeout}s)")
+                raise StorageError(f"无法获取锁 '{name}'")
+            return
+        lock_path = LOCK_DIR / f"{name}.lock"
+        lock_path.parent.mkdir(parents=True, exist_ok=True)
+        fd = None
+        locked = False
+        start = time.monotonic()
+        async with self._lock:
+            try:
+                fd = open(lock_path, "a+")
+                while True:
+                    try:
+                        fcntl.flock(fd, fcntl.LOCK_EX | fcntl.LOCK_NB)
+                        locked = True
+                        break
+                    except BlockingIOError:
+                        if time.monotonic() - start >= timeout:
+                            raise StorageError(f"无法获取锁 '{name}'")
+                        await asyncio.sleep(0.05)
+                yield
+            except StorageError:
+                logger.warning(f"LocalStorage: 获取锁 '{name}' 超时 ({timeout}s)")
+                raise
+            finally:
+                if fd:
+                    if locked:
+                        try:
+                            fcntl.flock(fd, fcntl.LOCK_UN)
+                        except Exception:
+                            pass
+                    try:
+                        fd.close()
+                    except Exception:
+                        pass
+    async def load_config(self) -> Dict[str, Any]:
+        if not CONFIG_FILE.exists():
+            return {}
+        try:
+            async with aiofiles.open(CONFIG_FILE, "rb") as f:
+                content = await f.read()
+                return tomllib.loads(content.decode("utf-8"))
+        except Exception as e:
+            logger.error(f"LocalStorage: 加载配置失败: {e}")
+            return {}
+    async def save_config(self, data: Dict[str, Any]):
+        try:
+            lines = []
+            for section, items in data.items():
+                if not isinstance(items, dict):
+                    continue
+                lines.append(f"[{section}]")
+                for key, val in items.items():
+                    if isinstance(val, bool):
+                        val_str = "true" if val else "false"
+                    elif isinstance(val, str):
+                        escaped = val.replace('"', '\\"')
+                        val_str = f'"{escaped}"'
+                    elif isinstance(val, (int, float)):
+                        val_str = str(val)
+                    elif isinstance(val, (list, dict)):
+                        val_str = json_dumps(val)
+                    else:
+                        val_str = f'"{str(val)}"'
+                    lines.append(f"{key} = {val_str}")
+                lines.append("")
+            content = "\n".join(lines)
+            CONFIG_FILE.parent.mkdir(parents=True, exist_ok=True)
+            async with aiofiles.open(CONFIG_FILE, "w", encoding="utf-8") as f:
+                await f.write(content)
+        except Exception as e:
+            logger.error(f"LocalStorage: 保存配置失败: {e}")
+            raise StorageError(f"保存配置失败: {e}")
+    async def load_tokens(self) -> Dict[str, Any]:
+        if not TOKEN_FILE.exists():
+            return {}
+        try:
+            async with aiofiles.open(TOKEN_FILE, "rb") as f:
+                content = await f.read()
+                return json_loads(content)
+        except Exception as e:
+            logger.error(f"LocalStorage: 加载 Token 失败: {e}")
+            return {}
+    async def save_tokens(self, data: Dict[str, Any]):
+        try:
+            TOKEN_FILE.parent.mkdir(parents=True, exist_ok=True)
+            temp_path = TOKEN_FILE.with_suffix(".tmp")
+            # 原子写操作: 写入临时文件 -> 重命名
+            async with aiofiles.open(temp_path, "wb") as f:
+                await f.write(orjson.dumps(data, option=orjson.OPT_INDENT_2))
+            # 使用 os.replace 保证原子性
+            os.replace(temp_path, TOKEN_FILE)
+        except Exception as e:
+            logger.error(f"LocalStorage: 保存 Token 失败: {e}")
+            raise StorageError(f"保存 Token 失败: {e}")
+    async def close(self):
+        pass
+class RedisStorage(BaseStorage):
+    """
+    Redis 存储
+    - 使用 redis-py 异步客户端 (自带连接池)
+    - 支持分布式锁 (redis.lock)
+    - 扁平化数据结构优化性能
+    """
+    def __init__(self, url: str):
+        try:
+            from redis import asyncio as aioredis
+        except ImportError:
+            raise ImportError("需要安装 redis 包: pip install redis")
+        # 显式配置连接池
+        # 使用 decode_responses=True 简化字符串处理，但在处理复杂对象时使用 orjson
+        self.redis = aioredis.from_url(
+            url, decode_responses=True, health_check_interval=30
+        )
+        self.config_key = "grok2api:config"  # Hash: section.key -> value_json
+        self.key_pools = "grok2api:pools"  # Set: pool_names
+        self.prefix_pool_set = "grok2api:pool:"  # Set: pool -> token_ids
+        self.prefix_token_hash = "grok2api:token:"  # Hash: token_id -> token_data
+        self.lock_prefix = "grok2api:lock:"
+    @asynccontextmanager
+    async def acquire_lock(self, name: str, timeout: int = 10):
+        # 使用 Redis 分布式锁
+        lock_key = f"{self.lock_prefix}{name}"
+        lock = self.redis.lock(lock_key, timeout=timeout, blocking_timeout=5)
+        acquired = False
+        try:
+            acquired = await lock.acquire()
+            if not acquired:
+                raise StorageError(f"RedisStorage: 无法获取锁 '{name}'")
+            yield
+        finally:
+            if acquired:
+                try:
+                    await lock.release()
+                except Exception:
+                    # 锁可能已过期或被意外释放，忽略异常
+                    pass
+    async def verify_connection(self) -> bool:
+        try:
+            return await self.redis.ping()
+        except Exception:
+            return False
+    async def load_config(self) -> Dict[str, Any]:
+        """从 Redis Hash 加载配置"""
+        try:
+            raw_data = await self.redis.hgetall(self.config_key)
+            if not raw_data:
+                return None
+            config = {}
+            for composite_key, val_str in raw_data.items():
+                if "." not in composite_key:
+                    continue
+                section, key = composite_key.split(".", 1)
+                if section not in config:
+                    config[section] = {}
+                try:
+                    val = json_loads(val_str)
+                except Exception:
+                    val = val_str
+                config[section][key] = val
+            return config
+        except Exception as e:
+            logger.error(f"RedisStorage: 加载配置失败: {e}")
+            return None
+    async def save_config(self, data: Dict[str, Any]):
+        """保存配置到 Redis Hash"""
+        try:
+            mapping = {}
+            for section, items in data.items():
+                if not isinstance(items, dict):
+                    continue
+                for key, val in items.items():
+                    composite_key = f"{section}.{key}"
+                    mapping[composite_key] = json_dumps(val)
+            await self.redis.delete(self.config_key)
+            if mapping:
+                await self.redis.hset(self.config_key, mapping=mapping)
+        except Exception as e:
+            logger.error(f"RedisStorage: 保存配置失败: {e}")
+            raise
+    async def load_tokens(self) -> Dict[str, Any]:
+        """加载所有 Token"""
+        try:
+            pool_names = await self.redis.smembers(self.key_pools)
+            if not pool_names:
+                return None
+            pools = {}
+            async with self.redis.pipeline() as pipe:
+                for pool_name in pool_names:
+                    # 获取该池下所有 Token ID
+                    pipe.smembers(f"{self.prefix_pool_set}{pool_name}")
+                pool_tokens_res = await pipe.execute()
+            # 收集所有 Token ID 以便批量查询
+            all_token_ids = []
+            pool_map = {}  # pool_name -> list[token_id]
+            for i, pool_name in enumerate(pool_names):
+                tids = list(pool_tokens_res[i])
+                pool_map[pool_name] = tids
+                all_token_ids.extend(tids)
+            if not all_token_ids:
+                return {name: [] for name in pool_names}
+            # 批量获取 Token 详情 (Hash)
+            async with self.redis.pipeline() as pipe:
+                for tid in all_token_ids:
+                    pipe.hgetall(f"{self.prefix_token_hash}{tid}")
+                token_data_list = await pipe.execute()
+            # 重组数据结构
+            token_lookup = {}
+            for i, tid in enumerate(all_token_ids):
+                t_data = token_data_list[i]
+                if not t_data:
+                    continue
+                # 恢复 tags (JSON -> List)
+                if "tags" in t_data:
+                    try:
+                        t_data["tags"] = json_loads(t_data["tags"])
+                    except Exception:
+                        t_data["tags"] = []
+                # 类型转换 (Redis 返回全 string)
+                for int_field in [
+                    "quota",
+                    "created_at",
+                    "use_count",
+                    "fail_count",
+                    "last_used_at",
+                    "last_fail_at",
+                    "last_sync_at",
+                ]:
+                    if t_data.get(int_field) and t_data[int_field] != "None":
+                        try:
+                            t_data[int_field] = int(t_data[int_field])
+                        except Exception:
+                            pass
+                token_lookup[tid] = t_data
+            # 按 Pool 分组返回
+            for pool_name in pool_names:
+                pools[pool_name] = []
+                for tid in pool_map[pool_name]:
+                    if tid in token_lookup:
+                        pools[pool_name].append(token_lookup[tid])
+            return pools
+        except Exception as e:
+            logger.error(f"RedisStorage: 加载 Token 失败: {e}")
+            return None
+    async def save_tokens(self, data: Dict[str, Any]):
+        """保存所有 Token"""
+        if data is None:
+            return
+        try:
+            new_pools = set(data.keys()) if isinstance(data, dict) else set()
+            pool_tokens_map = {}
+            new_token_ids = set()
+            for pool_name, tokens in (data or {}).items():
+                tids_in_pool = []
+                for t in tokens:
+                    token_str = t.get("token")
+                    if not token_str:
+                        continue
+                    tids_in_pool.append(token_str)
+                    new_token_ids.add(token_str)
+                pool_tokens_map[pool_name] = tids_in_pool
+            existing_pools = await self.redis.smembers(self.key_pools)
+            existing_pools = set(existing_pools) if existing_pools else set()
+            existing_token_ids = set()
+            if existing_pools:
+                async with self.redis.pipeline() as pipe:
+                    for pool_name in existing_pools:
+                        pipe.smembers(f"{self.prefix_pool_set}{pool_name}")
+                    pool_tokens_res = await pipe.execute()
+                for tokens in pool_tokens_res:
+                    existing_token_ids.update(list(tokens or []))
+            tokens_to_delete = existing_token_ids - new_token_ids
+            all_pools = existing_pools.union(new_pools)
+            async with self.redis.pipeline() as pipe:
+                # Reset pool index
+                pipe.delete(self.key_pools)
+                if new_pools:
+                    pipe.sadd(self.key_pools, *new_pools)
+                # Reset pool sets
+                for pool_name in all_pools:
+                    pipe.delete(f"{self.prefix_pool_set}{pool_name}")
+                for pool_name, tids_in_pool in pool_tokens_map.items():
+                    if tids_in_pool:
+                        pipe.sadd(f"{self.prefix_pool_set}{pool_name}", *tids_in_pool)
+                # Remove deleted token hashes
+                for token_str in tokens_to_delete:
+                    pipe.delete(f"{self.prefix_token_hash}{token_str}")
+                # Upsert token hashes
+                for pool_name, tokens in (data or {}).items():
+                    for t in tokens:
+                        token_str = t.get("token")
+                        if not token_str:
+                            continue
+                        t_flat = t.copy()
+                        if "tags" in t_flat:
+                            t_flat["tags"] = json_dumps(t_flat["tags"])
+                        status = t_flat.get("status")
+                        if isinstance(status, str) and status.startswith(
+                            "TokenStatus."
+                        ):
+                            t_flat["status"] = status.split(".", 1)[1].lower()
+                        elif isinstance(status, Enum):
+                            t_flat["status"] = status.value
+                        t_flat = {k: str(v) for k, v in t_flat.items() if v is not None}
+                        pipe.hset(
+                            f"{self.prefix_token_hash}{token_str}", mapping=t_flat
+                        )
+                await pipe.execute()
+        except Exception as e:
+            logger.error(f"RedisStorage: 保存 Token 失败: {e}")
+            raise
+    async def close(self):
+        try:
+            await self.redis.close()
+        except (RuntimeError, asyncio.CancelledError, Exception):
+            # 忽略关闭时的 Event loop is closed 错误
+            pass
+class SQLStorage(BaseStorage):
+    """
+    SQL 数据库存储 (MySQL/PgSQL)
+    - 使用 SQLAlchemy 异步引擎
+    - 自动 Schema 初始化
+    - 内置连接池 (QueuePool)
+    """
+    def __init__(self, url: str, connect_args: dict | None = None):
+        try:
+            from sqlalchemy.ext.asyncio import create_async_engine, async_sessionmaker
+        except ImportError:
+            raise ImportError(
+                "需要安装 sqlalchemy 和 async 驱动: pip install sqlalchemy[asyncio]"
+            )
+        self.dialect = url.split(":", 1)[0].split("+", 1)[0].lower()
+        # 配置 robust 的连接池
+        self.engine = create_async_engine(
+            url,
+            echo=False,
+            pool_size=20,
+            max_overflow=10,
+            pool_recycle=3600,
+            pool_pre_ping=True,
+            **({"connect_args": connect_args} if connect_args else {}),
+        )
+        self.async_session = async_sessionmaker(self.engine, expire_on_commit=False)
+        self._initialized = False
+    async def _ensure_schema(self):
+        """确保数据库表存在"""
+        if self._initialized:
+            return
+        try:
+            async with self.engine.begin() as conn:
+                from sqlalchemy import text
+                # Tokens 表 (通用 SQL)
+                await conn.execute(
+                    text("""
+                    CREATE TABLE IF NOT EXISTS tokens (
+                        token VARCHAR(512) PRIMARY KEY,
+                        pool_name VARCHAR(64) NOT NULL,
+                        status VARCHAR(16),
+                        quota INT,
+                        created_at BIGINT,
+                        last_used_at BIGINT,
+                        use_count INT,
+                        fail_count INT,
+                        last_fail_at BIGINT,
+                        last_fail_reason TEXT,
+                        last_sync_at BIGINT,
+                        tags TEXT,
+                        note TEXT,
+                        last_asset_clear_at BIGINT,
+                        data TEXT,
+                        data_hash CHAR(64),
+                        updated_at BIGINT
+                    )
+                """)
+                )
+                # 配置表
+                await conn.execute(
+                    text("""
+                    CREATE TABLE IF NOT EXISTS app_config (
+                        section VARCHAR(64) NOT NULL,
+                        key_name VARCHAR(64) NOT NULL,
+                        value TEXT,
+                        PRIMARY KEY (section, key_name)
+                    )
+                """)
+                )
+                # 索引
+                if self.dialect in ("postgres", "postgresql", "pgsql"):
+                    await conn.execute(
+                        text(
+                            "CREATE INDEX IF NOT EXISTS idx_tokens_pool ON tokens (pool_name)"
+                        )
+                    )
+                else:
+                    try:
+                        await conn.execute(
+                            text("CREATE INDEX idx_tokens_pool ON tokens (pool_name)")
+                        )
+                    except Exception:
+                        pass
+                # 补齐旧表字段
+                columns = [
+                    ("status", "VARCHAR(16)"),
+                    ("quota", "INT"),
+                    ("created_at", "BIGINT"),
+                    ("last_used_at", "BIGINT"),
+                    ("use_count", "INT"),
+                    ("fail_count", "INT"),
+                    ("last_fail_at", "BIGINT"),
+                    ("last_fail_reason", "TEXT"),
+                    ("last_sync_at", "BIGINT"),
+                    ("tags", "TEXT"),
+                    ("note", "TEXT"),
+                    ("last_asset_clear_at", "BIGINT"),
+                    ("data", "TEXT"),
+                    ("data_hash", "CHAR(64)"),
+                    ("updated_at", "BIGINT"),
+                ]
+                if self.dialect in ("postgres", "postgresql", "pgsql"):
+                    for col_name, col_type in columns:
+                        await conn.execute(
+                            text(
+                                f"ALTER TABLE tokens ADD COLUMN IF NOT EXISTS {col_name} {col_type}"
+                            )
+                        )
+                else:
+                    for col_name, col_type in columns:
+                        try:
+                            await conn.execute(
+                                text(
+                                    f"ALTER TABLE tokens ADD COLUMN {col_name} {col_type}"
+                                )
+                            )
+                        except Exception:
+                            pass
+                # 尝试兼容旧表结构
+                try:
+                    if self.dialect in ("mysql", "mariadb"):
+                        await conn.execute(
+                            text("ALTER TABLE tokens MODIFY token VARCHAR(512)")
+                        )
+                        await conn.execute(text("ALTER TABLE tokens MODIFY data TEXT"))
+                    elif self.dialect in ("postgres", "postgresql", "pgsql"):
+                        await conn.execute(
+                            text(
+                                "ALTER TABLE tokens ALTER COLUMN token TYPE VARCHAR(512)"
+                            )
+                        )
+                        await conn.execute(
+                            text("ALTER TABLE tokens ALTER COLUMN data TYPE TEXT")
+                        )
+                except Exception:
+                    pass
+            await self._migrate_legacy_tokens()
+            self._initialized = True
+        except Exception as e:
+            logger.error(f"SQLStorage: Schema 初始化失败: {e}")
+            raise
+    def _normalize_status(self, status: Any) -> Any:
+        if isinstance(status, str) and status.startswith("TokenStatus."):
+            return status.split(".", 1)[1].lower()
+        if isinstance(status, Enum):
+            return status.value
+        return status
+    def _normalize_tags(self, tags: Any) -> Optional[str]:
+        if tags is None:
+            return None
+        if isinstance(tags, str):
+            try:
+                parsed = json_loads(tags)
+                if isinstance(parsed, list):
+                    return tags
+            except Exception:
+                pass
+            return json_dumps([tags])
+        return json_dumps(tags)
+    def _parse_tags(self, tags: Any) -> Optional[list]:
+        if tags is None:
+            return None
+        if isinstance(tags, str):
+            try:
+                parsed = json_loads(tags)
+                if isinstance(parsed, list):
+                    return parsed
+            except Exception:
+                return []
+        if isinstance(tags, list):
+            return tags
+        return []
+    def _token_to_row(self, token_data: Dict[str, Any], pool_name: str) -> Dict[str, Any]:
+        token_str = token_data.get("token")
+        if isinstance(token_str, str) and token_str.startswith("sso="):
+            token_str = token_str[4:]
+        status = self._normalize_status(token_data.get("status"))
+        tags_json = self._normalize_tags(token_data.get("tags"))
+        data_json = json_dumps_sorted(token_data)
+        data_hash = hashlib.sha256(data_json.encode("utf-8")).hexdigest()
+        note = token_data.get("note")
+        if note is None:
+            note = ""
+        return {
+            "token": token_str,
+            "pool_name": pool_name,
+            "status": status,
+            "quota": token_data.get("quota"),
+            "created_at": token_data.get("created_at"),
+            "last_used_at": token_data.get("last_used_at"),
+            "use_count": token_data.get("use_count"),
+            "fail_count": token_data.get("fail_count"),
+            "last_fail_at": token_data.get("last_fail_at"),
+            "last_fail_reason": token_data.get("last_fail_reason"),
+            "last_sync_at": token_data.get("last_sync_at"),
+            "tags": tags_json,
+            "note": note,
+            "last_asset_clear_at": token_data.get("last_asset_clear_at"),
+            "data": data_json,
+            "data_hash": data_hash,
+            "updated_at": 0,
+        }
+    async def _migrate_legacy_tokens(self):
+        """将旧版 data JSON 回填到平铺字段"""
+        from sqlalchemy import text
+        try:
+            async with self.async_session() as session:
+                try:
+                    res = await session.execute(
+                        text(
+                            "SELECT token FROM tokens "
+                            "WHERE data IS NOT NULL AND "
+                            "(status IS NULL OR quota IS NULL OR created_at IS NULL) "
+                            "LIMIT 1"
+                        )
+                    )
+                    if not res.first():
+                        return
+                except Exception as e:
+                    msg = str(e).lower()
+                    if "undefinedcolumn" in msg or "undefined column" in msg:
+                        return
+                    raise
+                res = await session.execute(
+                    text(
+                        "SELECT token, pool_name, data FROM tokens "
+                        "WHERE data IS NOT NULL AND "
+                        "(status IS NULL OR quota IS NULL OR created_at IS NULL)"
+                    )
+                )
+                rows = res.fetchall()
+                if not rows:
+                    return
+                params = []
+                for token_str, pool_name, data_json in rows:
+                    if not data_json:
+                        continue
+                    try:
+                        if isinstance(data_json, str):
+                            t_data = json_loads(data_json)
+                        else:
+                            t_data = data_json
+                        if not isinstance(t_data, dict):
+                            continue
+                        t_data = dict(t_data)
+                        t_data["token"] = token_str
+                        row = self._token_to_row(t_data, pool_name)
+                        params.append(row)
+                    except Exception:
+                        continue
+                if not params:
+                    return
+                await session.execute(
+                    text(
+                        "UPDATE tokens SET "
+                        "pool_name=:pool_name, "
+                        "status=:status, "
+                        "quota=:quota, "
+                        "created_at=:created_at, "
+                        "last_used_at=:last_used_at, "
+                        "use_count=:use_count, "
+                        "fail_count=:fail_count, "
+                        "last_fail_at=:last_fail_at, "
+                        "last_fail_reason=:last_fail_reason, "
+                        "last_sync_at=:last_sync_at, "
+                        "tags=:tags, "
+                        "note=:note, "
+                        "last_asset_clear_at=:last_asset_clear_at, "
+                        "data=:data, "
+                        "data_hash=:data_hash, "
+                        "updated_at=:updated_at "
+                        "WHERE token=:token"
+                    ),
+                    params,
+                )
+                await session.commit()
+        except Exception as e:
+            logger.warning(f"SQLStorage: 旧数据回填失败: {e}")
+    @asynccontextmanager
+    async def acquire_lock(self, name: str, timeout: int = 10):
+        # SQL 分布式锁: MySQL GET_LOCK / PG advisory_lock
+        from sqlalchemy import text
+        lock_name = f"g2a:{hashlib.sha1(name.encode('utf-8')).hexdigest()[:24]}"
+        if self.dialect in ("mysql", "mariadb"):
+            async with self.async_session() as session:
+                res = await session.execute(
+                    text("SELECT GET_LOCK(:name, :timeout)"),
+                    {"name": lock_name, "timeout": timeout},
+                )
+                got = res.scalar()
+                if got != 1:
+                    raise StorageError(f"SQLStorage: 无法获取锁 '{name}'")
+                try:
+                    yield
+                finally:
+                    try:
+                        await session.execute(
+                            text("SELECT RELEASE_LOCK(:name)"), {"name": lock_name}
+                        )
+                        await session.commit()
+                    except Exception:
+                        pass
+        elif self.dialect in ("postgres", "postgresql", "pgsql"):
+            lock_key = int.from_bytes(
+                hashlib.sha256(name.encode("utf-8")).digest()[:8], "big", signed=True
+            )
+            async with self.async_session() as session:
+                start = time.monotonic()
+                while True:
+                    res = await session.execute(
+                        text("SELECT pg_try_advisory_lock(:key)"), {"key": lock_key}
+                    )
+                    if res.scalar():
+                        break
+                    if time.monotonic() - start >= timeout:
+                        raise StorageError(f"SQLStorage: 无法获取锁 '{name}'")
+                    await asyncio.sleep(0.1)
+                try:
+                    yield
+                finally:
+                    try:
+                        await session.execute(
+                            text("SELECT pg_advisory_unlock(:key)"), {"key": lock_key}
+                        )
+                        await session.commit()
+                    except Exception:
+                        pass
+        else:
+            yield
+    async def load_config(self) -> Dict[str, Any]:
+        await self._ensure_schema()
+        from sqlalchemy import text
+        try:
+            async with self.async_session() as session:
+                res = await session.execute(
+                    text("SELECT section, key_name, value FROM app_config")
+                )
+                rows = res.fetchall()
+                if not rows:
+                    return None
+                config = {}
+                for section, key, val_str in rows:
+                    if section not in config:
+                        config[section] = {}
+                    try:
+                        val = json_loads(val_str)
+                    except Exception:
+                        val = val_str
+                    config[section][key] = val
+                return config
+        except Exception as e:
+            logger.error(f"SQLStorage: 加载配置失败: {e}")
+            return None
+    async def save_config(self, data: Dict[str, Any]):
+        await self._ensure_schema()
+        from sqlalchemy import text
+        try:
+            async with self.async_session() as session:
+                await session.execute(text("DELETE FROM app_config"))
+                params = []
+                for section, items in data.items():
+                    if not isinstance(items, dict):
+                        continue
+                    for key, val in items.items():
+                        params.append(
+                            {
+                                "s": section,
+                                "k": key,
+                                "v": json_dumps(val),
+                            }
+                        )
+                if params:
+                    await session.execute(
+                        text(
+                            "INSERT INTO app_config (section, key_name, value) VALUES (:s, :k, :v)"
+                        ),
+                        params,
+                    )
+                await session.commit()
+        except Exception as e:
+            logger.error(f"SQLStorage: 保存配置失败: {e}")
+            raise
+    async def load_tokens(self) -> Dict[str, Any]:
+        await self._ensure_schema()
+        from sqlalchemy import text
+        try:
+            async with self.async_session() as session:
+                res = await session.execute(
+                    text(
+                        "SELECT token, pool_name, status, quota, created_at, "
+                        "last_used_at, use_count, fail_count, last_fail_at, "
+                        "last_fail_reason, last_sync_at, tags, note, "
+                        "last_asset_clear_at, data "
+                        "FROM tokens"
+                    )
+                )
+                rows = res.fetchall()
+                if not rows:
+                    return None
+                pools = {}
+                for (
+                    token_str,
+                    pool_name,
+                    status,
+                    quota,
+                    created_at,
+                    last_used_at,
+                    use_count,
+                    fail_count,
+                    last_fail_at,
+                    last_fail_reason,
+                    last_sync_at,
+                    tags,
+                    note,
+                    last_asset_clear_at,
+                    data_json,
+                ) in rows:
+                    if pool_name not in pools:
+                        pools[pool_name] = []
+                    try:
+                        token_data = {}
+                        if token_str:
+                            token_data["token"] = token_str
+                        if status is not None:
+                            token_data["status"] = self._normalize_status(status)
+                        if quota is not None:
+                            token_data["quota"] = int(quota)
+                        if created_at is not None:
+                            token_data["created_at"] = int(created_at)
+                        if last_used_at is not None:
+                            token_data["last_used_at"] = int(last_used_at)
+                        if use_count is not None:
+                            token_data["use_count"] = int(use_count)
+                        if fail_count is not None:
+                            token_data["fail_count"] = int(fail_count)
+                        if last_fail_at is not None:
+                            token_data["last_fail_at"] = int(last_fail_at)
+                        if last_fail_reason is not None:
+                            token_data["last_fail_reason"] = last_fail_reason
+                        if last_sync_at is not None:
+                            token_data["last_sync_at"] = int(last_sync_at)
+                        if tags is not None:
+                            token_data["tags"] = self._parse_tags(tags)
+                        if note is not None:
+                            token_data["note"] = note
+                        if last_asset_clear_at is not None:
+                            token_data["last_asset_clear_at"] = int(
+                                last_asset_clear_at
+                            )
+                        legacy_data = None
+                        if data_json:
+                            if isinstance(data_json, str):
+                                legacy_data = json_loads(data_json)
+                            else:
+                                legacy_data = data_json
+                        if isinstance(legacy_data, dict):
+                            for key, val in legacy_data.items():
+                                if key not in token_data or token_data[key] is None:
+                                    token_data[key] = val
+                        pools[pool_name].append(token_data)
+                    except Exception:
+                        pass
+                return pools
+        except Exception as e:
+            logger.error(f"SQLStorage: 加载 Token 失败: {e}")
+            return None
+    async def save_tokens(self, data: Dict[str, Any]):
+        await self._ensure_schema()
+        from sqlalchemy import text
+        if data is None:
+            return
+        updates = []
+        new_tokens = set()
+        for pool_name, tokens in (data or {}).items():
+            for t in tokens:
+                if isinstance(t, dict):
+                    token_data = dict(t)
+                elif isinstance(t, str):
+                    token_data = {"token": t}
+                else:
+                    continue
+                token_str = token_data.get("token")
+                if not token_str:
+                    continue
+                if token_str.startswith("sso="):
+                    token_str = token_str[4:]
+                token_data["token"] = token_str
+                token_data["pool_name"] = pool_name
+                token_data["_update_kind"] = "state"
+                updates.append(token_data)
+                new_tokens.add(token_str)
+        try:
+            existing_tokens = set()
+            async with self.async_session() as session:
+                res = await session.execute(text("SELECT token FROM tokens"))
+                rows = res.fetchall()
+                existing_tokens = {row[0] for row in rows}
+            tokens_to_delete = list(existing_tokens - new_tokens)
+            await self.save_tokens_delta(updates, tokens_to_delete)
+        except Exception as e:
+            logger.error(f"SQLStorage: 保存 Token 失败: {e}")
+            raise
+    async def save_tokens_delta(
+        self, updated: list[Dict[str, Any]], deleted: Optional[list[str]] = None
+    ):
+        await self._ensure_schema()
+        from sqlalchemy import bindparam, text
+        try:
+            async with self.async_session() as session:
+                deleted_set = set(deleted or [])
+                if deleted_set:
+                    delete_stmt = text(
+                        "DELETE FROM tokens WHERE token IN :tokens"
+                    ).bindparams(bindparam("tokens", expanding=True))
+                    chunk_size = 500
+                    deleted_list = list(deleted_set)
+                    for i in range(0, len(deleted_list), chunk_size):
+                        chunk = deleted_list[i : i + chunk_size]
+                        await session.execute(delete_stmt, {"tokens": chunk})
+                updates = []
+                usage_updates = []
+                for item in updated or []:
+                    if not isinstance(item, dict):
+                        continue
+                    pool_name = item.get("pool_name")
+                    token_str = item.get("token")
+                    if not pool_name or not token_str:
+                        continue
+                    if token_str in deleted_set:
+                        continue
+                    update_kind = item.get("_update_kind", "state")
+                    token_data = {
+                        k: v
+                        for k, v in item.items()
+                        if k not in ("pool_name", "_update_kind")
+                    }
+                    row = self._token_to_row(token_data, pool_name)
+                    if update_kind == "usage":
+                        usage_updates.append(row)
+                    else:
+                        updates.append(row)
+                if updates:
+                    if self.dialect in ("mysql", "mariadb"):
+                        upsert_stmt = text(
+                            "INSERT INTO tokens (token, pool_name, status, quota, created_at, "
+                            "last_used_at, use_count, fail_count, last_fail_at, "
+                            "last_fail_reason, last_sync_at, tags, note, "
+                            "last_asset_clear_at, data, data_hash, updated_at) "
+                            "VALUES (:token, :pool_name, :status, :quota, :created_at, "
+                            ":last_used_at, :use_count, :fail_count, :last_fail_at, "
+                            ":last_fail_reason, :last_sync_at, :tags, :note, "
+                            ":last_asset_clear_at, :data, :data_hash, :updated_at) "
+                            "ON DUPLICATE KEY UPDATE "
+                            "pool_name=VALUES(pool_name), "
+                            "status=VALUES(status), "
+                            "quota=VALUES(quota), "
+                            "created_at=VALUES(created_at), "
+                            "last_used_at=VALUES(last_used_at), "
+                            "use_count=VALUES(use_count), "
+                            "fail_count=VALUES(fail_count), "
+                            "last_fail_at=VALUES(last_fail_at), "
+                            "last_fail_reason=VALUES(last_fail_reason), "
+                            "last_sync_at=VALUES(last_sync_at), "
+                            "tags=VALUES(tags), "
+                            "note=VALUES(note), "
+                            "last_asset_clear_at=VALUES(last_asset_clear_at), "
+                            "data=VALUES(data), "
+                            "data_hash=VALUES(data_hash), "
+                            "updated_at=VALUES(updated_at)"
+                        )
+                    elif self.dialect in ("postgres", "postgresql", "pgsql"):
+                        upsert_stmt = text(
+                            "INSERT INTO tokens (token, pool_name, status, quota, created_at, "
+                            "last_used_at, use_count, fail_count, last_fail_at, "
+                            "last_fail_reason, last_sync_at, tags, note, "
+                            "last_asset_clear_at, data, data_hash, updated_at) "
+                            "VALUES (:token, :pool_name, :status, :quota, :created_at, "
+                            ":last_used_at, :use_count, :fail_count, :last_fail_at, "
+                            ":last_fail_reason, :last_sync_at, :tags, :note, "
+                            ":last_asset_clear_at, :data, :data_hash, :updated_at) "
+                            "ON CONFLICT (token) DO UPDATE SET "
+                            "pool_name=EXCLUDED.pool_name, "
+                            "status=EXCLUDED.status, "
+                            "quota=EXCLUDED.quota, "
+                            "created_at=EXCLUDED.created_at, "
+                            "last_used_at=EXCLUDED.last_used_at, "
+                            "use_count=EXCLUDED.use_count, "
+                            "fail_count=EXCLUDED.fail_count, "
+                            "last_fail_at=EXCLUDED.last_fail_at, "
+                            "last_fail_reason=EXCLUDED.last_fail_reason, "
+                            "last_sync_at=EXCLUDED.last_sync_at, "
+                            "tags=EXCLUDED.tags, "
+                            "note=EXCLUDED.note, "
+                            "last_asset_clear_at=EXCLUDED.last_asset_clear_at, "
+                            "data=EXCLUDED.data, "
+                            "data_hash=EXCLUDED.data_hash, "
+                            "updated_at=EXCLUDED.updated_at"
+                        )
+                    else:
+                        upsert_stmt = text(
+                            "INSERT INTO tokens (token, pool_name, status, quota, created_at, "
+                            "last_used_at, use_count, fail_count, last_fail_at, "
+                            "last_fail_reason, last_sync_at, tags, note, "
+                            "last_asset_clear_at, data, data_hash, updated_at) "
+                            "VALUES (:token, :pool_name, :status, :quota, :created_at, "
+                            ":last_used_at, :use_count, :fail_count, :last_fail_at, "
+                            ":last_fail_reason, :last_sync_at, :tags, :note, "
+                            ":last_asset_clear_at, :data, :data_hash, :updated_at)"
+                        )
+                    await session.execute(upsert_stmt, updates)
+                if usage_updates:
+                    if self.dialect in ("mysql", "mariadb"):
+                        usage_stmt = text(
+                            "INSERT INTO tokens (token, pool_name, status, quota, created_at, "
+                            "last_used_at, use_count, fail_count, last_fail_at, "
+                            "last_fail_reason, last_sync_at, tags, note, "
+                            "last_asset_clear_at, data, data_hash, updated_at) "
+                            "VALUES (:token, :pool_name, :status, :quota, :created_at, "
+                            ":last_used_at, :use_count, :fail_count, :last_fail_at, "
+                            ":last_fail_reason, :last_sync_at, :tags, :note, "
+                            ":last_asset_clear_at, :data, :data_hash, :updated_at) "
+                            "ON DUPLICATE KEY UPDATE "
+                            "pool_name=VALUES(pool_name), "
+                            "status=VALUES(status), "
+                            "quota=VALUES(quota), "
+                            "last_used_at=VALUES(last_used_at), "
+                            "use_count=VALUES(use_count), "
+                            "fail_count=VALUES(fail_count), "
+                            "last_fail_at=VALUES(last_fail_at), "
+                            "last_fail_reason=VALUES(last_fail_reason), "
+                            "last_sync_at=VALUES(last_sync_at), "
+                            "updated_at=VALUES(updated_at)"
+                        )
+                    elif self.dialect in ("postgres", "postgresql", "pgsql"):
+                        usage_stmt = text(
+                            "INSERT INTO tokens (token, pool_name, status, quota, created_at, "
+                            "last_used_at, use_count, fail_count, last_fail_at, "
+                            "last_fail_reason, last_sync_at, tags, note, "
+                            "last_asset_clear_at, data, data_hash, updated_at) "
+                            "VALUES (:token, :pool_name, :status, :quota, :created_at, "
+                            ":last_used_at, :use_count, :fail_count, :last_fail_at, "
+                            ":last_fail_reason, :last_sync_at, :tags, :note, "
+                            ":last_asset_clear_at, :data, :data_hash, :updated_at) "
+                            "ON CONFLICT (token) DO UPDATE SET "
+                            "pool_name=EXCLUDED.pool_name, "
+                            "status=EXCLUDED.status, "
+                            "quota=EXCLUDED.quota, "
+                            "last_used_at=EXCLUDED.last_used_at, "
+                            "use_count=EXCLUDED.use_count, "
+                            "fail_count=EXCLUDED.fail_count, "
+                            "last_fail_at=EXCLUDED.last_fail_at, "
+                            "last_fail_reason=EXCLUDED.last_fail_reason, "
+                            "last_sync_at=EXCLUDED.last_sync_at, "
+                            "updated_at=EXCLUDED.updated_at"
+                        )
+                    else:
+                        usage_stmt = text(
+                            "INSERT INTO tokens (token, pool_name, status, quota, created_at, "
+                            "last_used_at, use_count, fail_count, last_fail_at, "
+                            "last_fail_reason, last_sync_at, tags, note, "
+                            "last_asset_clear_at, data, data_hash, updated_at) "
+                            "VALUES (:token, :pool_name, :status, :quota, :created_at, "
+                            ":last_used_at, :use_count, :fail_count, :last_fail_at, "
+                            ":last_fail_reason, :last_sync_at, :tags, :note, "
+                            ":last_asset_clear_at, :data, :data_hash, :updated_at)"
+                        )
+                    await session.execute(usage_stmt, usage_updates)
+                await session.commit()
+        except Exception as e:
+            logger.error(f"SQLStorage: 增量保存 Token 失败: {e}")
+            raise
+    async def close(self):
+        await self.engine.dispose()
+class StorageFactory:
+    """存储后端工厂"""
+    _instance: Optional[BaseStorage] = None
+    # SSL-related query parameters that async drivers (asyncpg, aiomysql)
+    # cannot accept via the URL and must be passed as connect_args instead.
+    _SQL_SSL_PARAM_KEYS = ("sslmode", "ssl-mode", "ssl")
+    # Canonical postgres ssl modes (asyncpg accepts libpq-style mode strings).
+    _PG_SSL_MODE_ALIASES: ClassVar[dict[str, str]] = {
+        "disable": "disable",
+        "disabled": "disable",
+        "false": "disable",
+        "0": "disable",
+        "no": "disable",
+        "off": "disable",
+        "prefer": "prefer",
+        "preferred": "prefer",
+        "allow": "allow",
+        "require": "require",
+        "required": "require",
+        "true": "require",
+        "1": "require",
+        "yes": "require",
+        "on": "require",
+        "verify-ca": "verify-ca",
+        "verify_ca": "verify-ca",
+        "verify-full": "verify-full",
+        "verify_full": "verify-full",
+        "verify-identity": "verify-full",
+        "verify_identity": "verify-full",
+    }
+    # Canonical mysql ssl modes (aiomysql accepts SSLContext, not mode strings).
+    _MY_SSL_MODE_ALIASES: ClassVar[dict[str, str]] = {
+        "disable": "disabled",
+        "disabled": "disabled",
+        "false": "disabled",
+        "0": "disabled",
+        "no": "disabled",
+        "off": "disabled",
+        "prefer": "preferred",
+        "preferred": "preferred",
+        "allow": "preferred",
+        "require": "required",
+        "required": "required",
+        "true": "required",
+        "1": "required",
+        "yes": "required",
+        "on": "required",
+        "verify-ca": "verify_ca",
+        "verify_ca": "verify_ca",
+        "verify-full": "verify_identity",
+        "verify_full": "verify_identity",
+        "verify-identity": "verify_identity",
+        "verify_identity": "verify_identity",
+    }
+    @classmethod
+    def _normalize_ssl_mode(cls, storage_type: str, mode: str) -> str:
+        """Normalize SSL mode aliases for the target storage backend."""
+        if not mode:
+            raise ValueError("SSL mode cannot be empty")
+        normalized = mode.strip().lower().replace(" ", "")
+        if storage_type == "pgsql":
+            canonical = cls._PG_SSL_MODE_ALIASES.get(normalized)
+        elif storage_type == "mysql":
+            canonical = cls._MY_SSL_MODE_ALIASES.get(normalized)
+        else:
+            canonical = None
+        if not canonical:
+            raise ValueError(
+                f"Unsupported SSL mode '{mode}' for storage type '{storage_type}'"
+            )
+        return canonical
+    @classmethod
+    def _build_mysql_ssl_context(cls, mode: str):
+        """Build SSLContext for aiomysql according to normalized mysql mode.
+        Note: aiomysql enforces SSL whenever an SSLContext is provided — there
+        is no "try SSL, fall back to plaintext" behaviour.  As a result the
+        ``preferred`` mode is treated identically to ``required`` (encrypted,
+        no cert verification).  Connections to MySQL servers that do not
+        support SSL will fail rather than degrade gracefully.
+        """
+        import ssl as _ssl
+        if mode == "disabled":
+            return None
+        ctx = _ssl.create_default_context()
+        if mode in ("preferred", "required"):
+            ctx.check_hostname = False
+            ctx.verify_mode = _ssl.CERT_NONE
+        elif mode == "verify_ca":
+            # verify CA, but do not enforce hostname match.
+            ctx.check_hostname = False
+        # verify_identity keeps defaults: verify cert + hostname.
+        return ctx
+    @classmethod
+    def _build_sql_connect_args(
+        cls, storage_type: str, raw_ssl_mode: Optional[str]
+    ) -> Optional[dict]:
+        """Build SQLAlchemy connect_args for SQL SSL modes."""
+        if not raw_ssl_mode:
+            return None
+        mode = cls._normalize_ssl_mode(storage_type, raw_ssl_mode)
+        if storage_type == "pgsql":
+            # asyncpg accepts libpq-style ssl mode strings via ssl=...
+            return {"ssl": mode}
+        if storage_type == "mysql":
+            ctx = cls._build_mysql_ssl_context(mode)
+            if ctx is None:
+                return None
+            return {"ssl": ctx}
+        return None
+    @classmethod
+    def _normalize_sql_url(cls, storage_type: str, url: str) -> str:
+        """Rewrite scheme prefix to the SQLAlchemy async dialect form."""
+        if not url or "://" not in url:
+            return url
+        if storage_type == "mysql":
+            if url.startswith("mysql://"):
+                url = f"mysql+aiomysql://{url[len('mysql://') :]}"
+            elif url.startswith("mariadb://"):
+                # Use mysql+aiomysql for both MySQL and MariaDB endpoints.
+                # The mariadb dialect enforces strict MariaDB server detection.
+                url = f"mysql+aiomysql://{url[len('mariadb://') :]}"
+            elif url.startswith("mariadb+aiomysql://"):
+                url = f"mysql+aiomysql://{url[len('mariadb+aiomysql://') :]}"
+        elif storage_type == "pgsql":
+            if url.startswith("postgres://"):
+                url = f"postgresql+asyncpg://{url[len('postgres://') :]}"
+            elif url.startswith("postgresql://"):
+                url = f"postgresql+asyncpg://{url[len('postgresql://') :]}"
+            elif url.startswith("pgsql://"):
+                url = f"postgresql+asyncpg://{url[len('pgsql://') :]}"
+        return url
+    @classmethod
+    def _prepare_sql_url_and_connect_args(
+        cls, storage_type: str, url: str
+    ) -> tuple[str, Optional[dict]]:
+        """Normalize SQL URL and build connect_args from SSL query params."""
+        from urllib.parse import urlparse, parse_qsl, urlencode, urlunparse
+        normalized_url = cls._normalize_sql_url(storage_type, url)
+        if "://" not in normalized_url:
+            return normalized_url, None
+        parsed = urlparse(normalized_url)
+        ssl_mode: Optional[str] = None
+        filtered_query_items = []
+        ssl_param_keys = {k.lower() for k in cls._SQL_SSL_PARAM_KEYS}
+        for key, value in parse_qsl(parsed.query, keep_blank_values=True):
+            if key.lower() in ssl_param_keys:
+                if ssl_mode is None and value:
+                    ssl_mode = value
+                continue
+            filtered_query_items.append((key, value))
+        cleaned_url = urlunparse(
+            parsed._replace(query=urlencode(filtered_query_items, doseq=True))
+        )
+        connect_args = cls._build_sql_connect_args(storage_type, ssl_mode)
+        return cleaned_url, connect_args
+    @classmethod
+    def get_storage(cls) -> BaseStorage:
+        """获取全局存储实例 (单例)"""
+        if cls._instance:
+            return cls._instance
+        storage_type = os.getenv("SERVER_STORAGE_TYPE", "local").lower()
+        storage_url = os.getenv("SERVER_STORAGE_URL", "")
+        logger.info(f"StorageFactory: 初始化存储后端: {storage_type}")
+        if storage_type == "redis":
+            if not storage_url:
+                raise ValueError("Redis 存储需要设置 SERVER_STORAGE_URL")
+            cls._instance = RedisStorage(storage_url)
+        elif storage_type in ("mysql", "pgsql"):
+            if not storage_url:
+                raise ValueError("SQL 存储需要设置 SERVER_STORAGE_URL")
+            # Drivers reject SSL query params in URL. Normalize URL and pass
+            # backend-specific SSL handling through connect_args.
+            storage_url, connect_args = cls._prepare_sql_url_and_connect_args(
+                storage_type, storage_url
+            )
+            cls._instance = SQLStorage(storage_url, connect_args=connect_args)
+        else:
+            cls._instance = LocalStorage()
+        return cls._instance
+def get_storage() -> BaseStorage:
+    return StorageFactory.get_storage()

app/services/cf_refresh/README.md ADDED Viewed

	@@ -0,0 +1,49 @@

+# cf_refresh - Cloudflare cf_clearance 自动刷新
+通过 [FlareSolverr](https://github.com/FlareSolverr/FlareSolverr) 自动获取 Cloudflare `cf_clearance` cookie 和 `user_agent`，并更新到 Grok2API 服务配置中。
+全自动、无需 GUI、服务器友好。
+## 工作原理
+1. FlareSolverr（独立 Docker 容器）内部运行 Chrome，自动通过 CF 挑战
+2. cf_refresh 作为 grok2api 的后台任务，调用 FlareSolverr HTTP API 获取 `cf_clearance` 和 `user_agent`
+3. 直接在进程内调用 `config.update()` 更新运行时配置并持久化到 `data/config.toml`
+4. 按设定间隔重复以上步骤
+## 配置方式
+所有配置均可在管理面板 `/admin/config` 的 **CF 自动刷新** 区域中设置，也可通过环境变量初始化：
+| 配置项 | 环境变量 | 默认值 | 说明 |
+|--------|----------|--------|------|
+| 启用自动刷新 | `FLARESOLVERR_URL`（非空即启用） | `false` | 是否开启自动刷新 |
+| FlareSolverr 地址 | `FLARESOLVERR_URL` | — | FlareSolverr 服务的 HTTP 地址 |
+| 刷新间隔（秒） | `CF_REFRESH_INTERVAL` | `600` | 定期刷新间隔 |
+| 挑战超时（秒） | `CF_TIMEOUT` | `60` | CF 挑战等待超时 |
+> **代理**：自动使用「代理配置 → 基础代理 URL」，无需单独设置，保证出口 IP 一致。
+## 使用方式
+### Docker Compose 部署
+已集成在项目根目录 `docker-compose.yml` 中。只需在 grok2api 服务的环境变量中设置 `FLARESOLVERR_URL`，并添加 `flaresolverr` 服务即可：
+```yaml
+services:
+  grok2api:
+    environment:
+      FLARESOLVERR_URL: http://flaresolverr:8191
+  flaresolverr:
+    image: ghcr.io/flaresolverr/flaresolverr:latest
+    restart: unless-stopped
+```
+## 注意事项
+- `cf_clearance` 与请求来源 IP 绑定，FlareSolverr 自动使用代理配置中的基础代理 URL 保证出口 IP 一致
+- 启用自动刷新后，代理配置中的 CF Clearance、浏览器指纹和 User-Agent 由系统自动管理（面板中变灰）
+- 建议刷新间隔不低于 5 分钟，避免触发 Cloudflare 频率限制
+- FlareSolverr 需要约 500MB 内存（内部运行 Chrome）

app/services/cf_refresh/__init__.py ADDED Viewed

	@@ -0,0 +1,5 @@

+"""cf_refresh - Cloudflare cf_clearance 自动刷新模块"""
+from .scheduler import start, stop
+__all__ = ["start", "stop"]

app/services/cf_refresh/config.py ADDED Viewed

	@@ -0,0 +1,41 @@

+"""配置管理 — 从 app config 的 proxy.* 读取，支持面板修改实时生效"""
+GROK_URL = "https://grok.com"
+def _get(key: str, default=None):
+    """从 app config 读取 proxy.* 配置"""
+    from app.core.config import get_config
+    return get_config(f"proxy.{key}", default)
+def get_flaresolverr_url() -> str:
+    return _get("flaresolverr_url", "") or ""
+def _get_int(key: str, default: int, min_value: int) -> int:
+    raw = _get(key, default)
+    try:
+        value = int(raw)
+    except (TypeError, ValueError):
+        return max(default, min_value)
+    if value < min_value:
+        return min_value
+    return value
+def get_refresh_interval() -> int:
+    return _get_int("refresh_interval", 600, 60)
+def get_timeout() -> int:
+    return _get_int("timeout", 60, 60)
+def get_proxy() -> str:
+    """使用基础代理 URL，保证出口 IP 一致"""
+    return _get("base_proxy_url", "") or ""
+def is_enabled() -> bool:
+    return bool(_get("enabled", False))

app/services/cf_refresh/scheduler.py ADDED Viewed

	@@ -0,0 +1,98 @@

+"""定时调度：周期性刷新 cf_clearance（集成到 grok2api 进程内）"""
+import asyncio
+from loguru import logger
+from .config import get_refresh_interval, get_flaresolverr_url, is_enabled
+from .solver import solve_cf_challenge
+_task: asyncio.Task | None = None
+async def _update_app_config(
+    cf_cookies: str,
+    user_agent: str = "",
+    browser: str = "",
+    cf_clearance: str = "",
+) -> bool:
+    """直接更新 grok2api 的运行时配置"""
+    try:
+        from app.core.config import config
+        proxy_update = {"cf_cookies": cf_cookies}
+        if cf_clearance:
+            proxy_update["cf_clearance"] = cf_clearance
+        if user_agent:
+            proxy_update["user_agent"] = user_agent
+        if browser:
+            proxy_update["browser"] = browser
+        await config.update({"proxy": proxy_update})
+        logger.info(f"配置已更新: cf_cookies (长度 {len(cf_cookies)}), 指纹: {browser}")
+        if user_agent:
+            logger.info(f"配置已更新: user_agent = {user_agent}")
+        return True
+    except Exception as e:
+        logger.error(f"更新配置失败: {e}")
+        return False
+async def refresh_once() -> bool:
+    """执行一次刷新流程"""
+    logger.info("=" * 50)
+    logger.info("开始刷新 cf_clearance...")
+    result = await solve_cf_challenge()
+    if not result:
+        logger.error("刷新失败：无法获取 cf_clearance")
+        return False
+    success = await _update_app_config(
+        cf_cookies=result["cookies"],
+        cf_clearance=result.get("cf_clearance", ""),
+        user_agent=result.get("user_agent", ""),
+        browser=result.get("browser", ""),
+    )
+    if success:
+        logger.info("刷新完成")
+    else:
+        logger.error("刷新失败: 更新配置失败")
+    return success
+async def _scheduler_loop():
+    """后台调度循环"""
+    logger.info(
+        f"cf_refresh scheduler started (FlareSolverr: {get_flaresolverr_url()}, interval: {get_refresh_interval()}s)"
+    )
+    # 周期性刷新（每次循环重新读取配置，支持面板修改实时生效）
+    while True:
+        if is_enabled():
+            await refresh_once()
+        else:
+            logger.debug("cf_refresh disabled, skip refresh")
+        interval = get_refresh_interval()
+        await asyncio.sleep(interval)
+def start():
+    """启动后台刷新任务"""
+    global _task
+    if _task is not None:
+        return
+    _task = asyncio.get_event_loop().create_task(_scheduler_loop())
+    logger.info("cf_refresh background task started")
+def stop():
+    """停止后台刷新任务"""
+    global _task
+    if _task is not None:
+        _task.cancel()
+        _task = None
+        logger.info("cf_refresh background task stopped")

app/services/cf_refresh/solver.py ADDED Viewed

	@@ -0,0 +1,122 @@

+"""
+通过 FlareSolverr 自动获取 cf_clearance
+FlareSolverr 是一个 Docker 服务，内部运行 Chrome 浏览器，
+自动处理 Cloudflare 挑战（包括 Turnstile），无需 GUI。
+"""
+import asyncio
+import json
+from typing import Optional, Dict
+from urllib import request as urllib_request
+from urllib.error import HTTPError, URLError
+from loguru import logger
+from .config import GROK_URL, get_timeout, get_proxy, get_flaresolverr_url
+def _extract_all_cookies(cookies: list[dict]) -> str:
+    """将 FlareSolverr 返回 of cookie 列表转换为字符串格式"""
+    return "; ".join([f"{c.get('name')}={c.get('value')}" for c in cookies])
+def _extract_cookie_value(cookies: list[dict], name: str) -> str:
+    for cookie in cookies:
+        if cookie.get("name") == name:
+            return cookie.get("value") or ""
+    return ""
+def _extract_user_agent(solution: dict) -> str:
+    """从 FlareSolverr 的 solution 中提取 User-Agent"""
+    return solution.get("userAgent", "")
+def _extract_browser_profile(user_agent: str) -> str:
+    """从 User-Agent 提取 chromeXXX 格式的指纹识别号"""
+    import re
+    match = re.search(r"Chrome/(\d+)", user_agent)
+    if match:
+        return f"chrome{match.group(1)}"
+    return "chrome120"
+async def solve_cf_challenge() -> Optional[Dict[str, str]]:
+    """
+    通过 FlareSolverr 访问 grok.com，自动过 CF 挑战，提取 cf_clearance。
+    Returns:
+        成功时返回 {"cookies": "...", "user_agent": "..."}，失败返回 None
+    """
+    flaresolverr_url = get_flaresolverr_url()
+    cf_timeout = get_timeout()
+    proxy = get_proxy()
+    if not flaresolverr_url:
+        logger.error("FlareSolverr 地址未配置，无法刷新 cf_clearance")
+        return None
+    url = f"{flaresolverr_url.rstrip('/')}/v1"
+    payload = {
+        "cmd": "request.get",
+        "url": GROK_URL,
+        "maxTimeout": cf_timeout * 1000,
+    }
+    if proxy:
+        payload["proxy"] = {"url": proxy}
+    body = json.dumps(payload).encode("utf-8")
+    headers = {"Content-Type": "application/json"}
+    logger.info(f"正在通过 FlareSolverr 访问 {GROK_URL} ...")
+    logger.debug(f"FlareSolverr 地址: {url}")
+    req = urllib_request.Request(url, data=body, method="POST", headers=headers)
+    try:
+        def _post():
+            with urllib_request.urlopen(req, timeout=cf_timeout + 30) as resp:
+                return json.loads(resp.read().decode("utf-8"))
+        result = await asyncio.to_thread(_post)
+        status = result.get("status", "")
+        if status != "ok":
+            message = result.get("message", "unknown error")
+            logger.error(f"FlareSolverr 返回错误: {status} - {message}")
+            return None
+        solution = result.get("solution", {})
+        cookies = solution.get("cookies", [])
+        if not cookies:
+            logger.error("FlareSolverr 成功访问但没有返回 cookies")
+            return None
+        cookie_str = _extract_all_cookies(cookies)
+        clearance = _extract_cookie_value(cookies, "cf_clearance")
+        ua = _extract_user_agent(solution)
+        browser = _extract_browser_profile(ua)
+        logger.info(f"成功获取 cookies (数量: {len(cookies)}), 指纹: {browser}")
+        return {
+            "cookies": cookie_str,
+            "cf_clearance": clearance,
+            "user_agent": ua,
+            "browser": browser,
+        }
+    except HTTPError as e:
+        body_text = e.read().decode("utf-8", "replace")[:300]
+        logger.error(f"FlareSolverr 请求失败: {e.code} - {body_text}")
+        return None
+    except URLError as e:
+        logger.error(f"无法连接 FlareSolverr ({flaresolverr_url}): {e.reason}")
+        logger.info("请确认 FlareSolverr 服务已启动: docker run -p 8191:8191 ghcr.io/flaresolverr/flaresolverr:latest")
+        return None
+    except Exception as e:
+        logger.error(f"请求异常: {e}")
+        return None

app/services/grok/batch_services/assets.py ADDED Viewed

	@@ -0,0 +1,234 @@

+"""
+Batch assets service.
+"""
+import asyncio
+from typing import Dict, List, Optional
+from app.core.config import get_config
+from app.core.logger import logger
+from app.services.reverse.assets_list import AssetsListReverse
+from app.services.reverse.assets_delete import AssetsDeleteReverse
+from app.services.reverse.utils.session import ResettableSession
+from app.core.batch import run_batch
+class BaseAssetsService:
+    """Base assets service."""
+    def __init__(self):
+        self._session: Optional[ResettableSession] = None
+    async def _get_session(self) -> ResettableSession:
+        if self._session is None:
+            browser = get_config("proxy.browser")
+            if browser:
+                self._session = ResettableSession(impersonate=browser)
+            else:
+                self._session = ResettableSession()
+        return self._session
+    async def close(self):
+        if self._session:
+            await self._session.close()
+            self._session = None
+_LIST_SEMAPHORE = None
+_LIST_SEM_VALUE = None
+_DELETE_SEMAPHORE = None
+_DELETE_SEM_VALUE = None
+def _get_list_semaphore() -> asyncio.Semaphore:
+    value = max(1, int(get_config("asset.list_concurrent")))
+    global _LIST_SEMAPHORE, _LIST_SEM_VALUE
+    if _LIST_SEMAPHORE is None or value != _LIST_SEM_VALUE:
+        _LIST_SEM_VALUE = value
+        _LIST_SEMAPHORE = asyncio.Semaphore(value)
+    return _LIST_SEMAPHORE
+def _get_delete_semaphore() -> asyncio.Semaphore:
+    value = max(1, int(get_config("asset.delete_concurrent")))
+    global _DELETE_SEMAPHORE, _DELETE_SEM_VALUE
+    if _DELETE_SEMAPHORE is None or value != _DELETE_SEM_VALUE:
+        _DELETE_SEM_VALUE = value
+        _DELETE_SEMAPHORE = asyncio.Semaphore(value)
+    return _DELETE_SEMAPHORE
+class ListService(BaseAssetsService):
+    """Assets list service."""
+    async def list(self, token: str) -> Dict[str, List[str] | int]:
+        params = {
+            "pageSize": 50,
+            "orderBy": "ORDER_BY_LAST_USE_TIME",
+            "source": "SOURCE_ANY",
+            "isLatest": "true",
+        }
+        page_token = None
+        seen_tokens = set()
+        asset_ids: List[str] = []
+        session = await self._get_session()
+        while True:
+            if page_token:
+                if page_token in seen_tokens:
+                    logger.warning("Pagination stopped: repeated page token")
+                    break
+                seen_tokens.add(page_token)
+                params["pageToken"] = page_token
+            else:
+                params.pop("pageToken", None)
+            async with _get_list_semaphore():
+                response = await AssetsListReverse.request(
+                    session,
+                    token,
+                    params,
+                )
+            result = response.json()
+            page_assets = result.get("assets", [])
+            if page_assets:
+                for asset in page_assets:
+                    asset_id = asset.get("assetId")
+                    if asset_id:
+                        asset_ids.append(asset_id)
+            page_token = result.get("nextPageToken")
+            if not page_token:
+                break
+        logger.info(f"List success: {len(asset_ids)} files")
+        return {"asset_ids": asset_ids, "count": len(asset_ids)}
+    @staticmethod
+    async def fetch_assets_details(
+        tokens: List[str],
+        account_map: dict,
+        *,
+        include_ok: bool = False,
+        on_item=None,
+        should_cancel=None,
+    ) -> dict:
+        """Batch fetch assets details for tokens."""
+        account_map = account_map or {}
+        shared_service = ListService()
+        batch_size = max(1, int(get_config("asset.list_batch_size")))
+        async def _fetch_detail(token: str):
+            account = account_map.get(token)
+            try:
+                result = await shared_service.list(token)
+                asset_ids = result.get("asset_ids", [])
+                count = result.get("count", len(asset_ids))
+                detail = {
+                    "token": token,
+                    "token_masked": account["token_masked"] if account else token,
+                    "count": count,
+                    "status": "ok",
+                    "last_asset_clear_at": account["last_asset_clear_at"]
+                    if account
+                    else None,
+                }
+                if include_ok:
+                    return {"ok": True, "detail": detail, "count": count}
+                return {"detail": detail, "count": count}
+            except Exception as e:
+                detail = {
+                    "token": token,
+                    "token_masked": account["token_masked"] if account else token,
+                    "count": 0,
+                    "status": f"error: {str(e)}",
+                    "last_asset_clear_at": account["last_asset_clear_at"]
+                    if account
+                    else None,
+                }
+                if include_ok:
+                    return {"ok": False, "detail": detail, "count": 0}
+                return {"detail": detail, "count": 0}
+        try:
+            return await run_batch(
+                tokens,
+                _fetch_detail,
+                batch_size=batch_size,
+                on_item=on_item,
+                should_cancel=should_cancel,
+            )
+        finally:
+            await shared_service.close()
+class DeleteService(BaseAssetsService):
+    """Assets delete service."""
+    async def delete(self, token: str, asset_ids: List[str]) -> Dict[str, int]:
+        if not asset_ids:
+            logger.info("No assets to delete")
+            return {"total": 0, "success": 0, "failed": 0, "skipped": True}
+        total = len(asset_ids)
+        success = 0
+        failed = 0
+        session = await self._get_session()
+        async def _delete_one(asset_id: str):
+            async with _get_delete_semaphore():
+                await AssetsDeleteReverse.request(session, token, asset_id)
+        tasks = [_delete_one(asset_id) for asset_id in asset_ids if asset_id]
+        results = await asyncio.gather(*tasks, return_exceptions=True)
+        for res in results:
+            if isinstance(res, Exception):
+                failed += 1
+            else:
+                success += 1
+        logger.info(f"Delete all: total={total}, success={success}, failed={failed}")
+        return {"total": total, "success": success, "failed": failed}
+    @staticmethod
+    async def clear_assets(
+        tokens: List[str],
+        mgr,
+        *,
+        include_ok: bool = False,
+        on_item=None,
+        should_cancel=None,
+    ) -> dict:
+        """Batch clear assets for tokens."""
+        delete_service = DeleteService()
+        list_service = ListService()
+        batch_size = max(1, int(get_config("asset.delete_batch_size")))
+        async def _clear_one(token: str):
+            try:
+                result = await list_service.list(token)
+                asset_ids = result.get("asset_ids", [])
+                result = await delete_service.delete(token, asset_ids)
+                await mgr.mark_asset_clear(token)
+                if include_ok:
+                    return {"ok": True, "result": result}
+                return {"status": "success", "result": result}
+            except Exception as e:
+                if include_ok:
+                    return {"ok": False, "error": str(e)}
+                return {"status": "error", "error": str(e)}
+        try:
+            return await run_batch(
+                tokens,
+                _clear_one,
+                batch_size=batch_size,
+                on_item=on_item,
+                should_cancel=should_cancel,
+            )
+        finally:
+            await delete_service.close()
+            await list_service.close()
+__all__ = ["ListService", "DeleteService"]

app/services/grok/batch_services/nsfw.py ADDED Viewed

	@@ -0,0 +1,112 @@

+"""
+Batch NSFW service.
+"""
+import asyncio
+from typing import Callable, Awaitable, Dict, Any, Optional
+from app.core.logger import logger
+from app.core.config import get_config
+from app.core.exceptions import UpstreamException
+from app.services.reverse.accept_tos import AcceptTosReverse
+from app.services.reverse.nsfw_mgmt import NsfwMgmtReverse
+from app.services.reverse.set_birth import SetBirthReverse
+from app.services.reverse.utils.session import ResettableSession
+from app.core.batch import run_batch
+_NSFW_SEMAPHORE = None
+_NSFW_SEM_VALUE = None
+def _get_nsfw_semaphore() -> asyncio.Semaphore:
+    value = max(1, int(get_config("nsfw.concurrent")))
+    global _NSFW_SEMAPHORE, _NSFW_SEM_VALUE
+    if _NSFW_SEMAPHORE is None or value != _NSFW_SEM_VALUE:
+        _NSFW_SEM_VALUE = value
+        _NSFW_SEMAPHORE = asyncio.Semaphore(value)
+    return _NSFW_SEMAPHORE
+class NSFWService:
+    """NSFW 模式服务"""
+    @staticmethod
+    async def batch(
+        tokens: list[str],
+        mgr,
+        *,
+        on_item: Optional[Callable[[str, Dict[str, Any]], Awaitable[None]]] = None,
+        should_cancel: Optional[Callable[[], bool]] = None,
+    ) -> Dict[str, Dict[str, Any]]:
+        """Batch enable NSFW."""
+        batch_size = get_config("nsfw.batch_size")
+        async def _enable(token: str):
+            try:
+                browser = get_config("proxy.browser")
+                async with ResettableSession(impersonate=browser) as session:
+                    async def _record_fail(err: UpstreamException, reason: str):
+                        status = None
+                        if err.details and "status" in err.details:
+                            status = err.details["status"]
+                        else:
+                            status = getattr(err, "status_code", None)
+                        if status == 401:
+                            await mgr.record_fail(token, status, reason)
+                        return status or 0
+                    try:
+                        async with _get_nsfw_semaphore():
+                            await AcceptTosReverse.request(session, token)
+                    except UpstreamException as e:
+                        status = await _record_fail(e, "tos_auth_failed")
+                        return {
+                            "success": False,
+                            "http_status": status,
+                            "error": f"Accept ToS failed: {str(e)}",
+                        }
+                    try:
+                        async with _get_nsfw_semaphore():
+                            await SetBirthReverse.request(session, token)
+                    except UpstreamException as e:
+                        status = await _record_fail(e, "set_birth_auth_failed")
+                        return {
+                            "success": False,
+                            "http_status": status,
+                            "error": f"Set birth date failed: {str(e)}",
+                        }
+                    try:
+                        async with _get_nsfw_semaphore():
+                            grpc_status = await NsfwMgmtReverse.request(session, token)
+                        success = grpc_status.code in (-1, 0)
+                    except UpstreamException as e:
+                        status = await _record_fail(e, "nsfw_mgmt_auth_failed")
+                        return {
+                            "success": False,
+                            "http_status": status,
+                            "error": f"NSFW enable failed: {str(e)}",
+                        }
+                    if success:
+                        await mgr.add_tag(token, "nsfw")
+                    return {
+                        "success": success,
+                        "http_status": 200,
+                        "grpc_status": grpc_status.code,
+                        "grpc_message": grpc_status.message or None,
+                        "error": None,
+                    }
+            except Exception as e:
+                logger.error(f"NSFW enable failed: {e}")
+                return {"success": False, "http_status": 0, "error": str(e)[:100]}
+        return await run_batch(
+            tokens,
+            _enable,
+            batch_size=batch_size,
+            on_item=on_item,
+            should_cancel=should_cancel,
+        )
+__all__ = ["NSFWService"]

app/services/grok/batch_services/usage.py ADDED Viewed

	@@ -0,0 +1,89 @@

+"""
+Batch usage service.
+"""
+import asyncio
+from typing import Callable, Awaitable, Dict, Any, Optional, List
+from app.core.logger import logger
+from app.core.config import get_config
+from app.services.reverse.rate_limits import RateLimitsReverse
+from app.services.reverse.utils.session import ResettableSession
+from app.core.batch import run_batch
+_USAGE_SEMAPHORE = None
+_USAGE_SEM_VALUE = None
+def _get_usage_semaphore() -> asyncio.Semaphore:
+    value = max(1, int(get_config("usage.concurrent")))
+    global _USAGE_SEMAPHORE, _USAGE_SEM_VALUE
+    if _USAGE_SEMAPHORE is None or value != _USAGE_SEM_VALUE:
+        _USAGE_SEM_VALUE = value
+        _USAGE_SEMAPHORE = asyncio.Semaphore(value)
+    return _USAGE_SEMAPHORE
+class UsageService:
+    """用量查询服务"""
+    async def get(self, token: str) -> Dict:
+        """
+        获取速率限制信息
+        Args:
+            token: 认证 Token
+        Returns:
+            响应数据
+        Raises:
+            UpstreamException: 当获取失败且重试耗尽时
+        """
+        async with _get_usage_semaphore():
+            try:
+                browser = get_config("proxy.browser")
+                if browser:
+                    session_ctx = ResettableSession(impersonate=browser)
+                else:
+                    session_ctx = ResettableSession()
+                async with session_ctx as session:
+                    response = await RateLimitsReverse.request(session, token)
+                data = response.json()
+                remaining = data.get("remainingTokens")
+                if remaining is None:
+                    remaining = data.get("remainingQueries")
+                    if remaining is not None:
+                        data["remainingTokens"] = remaining
+                logger.info(
+                    f"Usage sync success: remaining={remaining}, token={token[:10]}..."
+                )
+                return data
+            except Exception:
+                # 最后一次失败已经被记录
+                raise
+    @staticmethod
+    async def batch(
+        tokens: List[str],
+        mgr,
+        *,
+        on_item: Optional[Callable[[str, Dict[str, Any]], Awaitable[None]]] = None,
+        should_cancel: Optional[Callable[[], bool]] = None,
+    ) -> Dict[str, Dict[str, Any]]:
+        batch_size = get_config("usage.batch_size")
+        async def _refresh_one(t: str):
+            return await mgr.sync_usage(t, consume_on_fail=False, is_usage=False)
+        return await run_batch(
+            tokens,
+            _refresh_one,
+            batch_size=batch_size,
+            on_item=on_item,
+            should_cancel=should_cancel,
+        )
+__all__ = ["UsageService"]

app/services/grok/defaults.py ADDED Viewed

	@@ -0,0 +1,34 @@

+"""
+Grok 服务默认配置
+此文件读取 config.defaults.toml，作为 Grok 服务的默认值来源。
+"""
+from pathlib import Path
+import tomllib
+from app.core.logger import logger
+DEFAULTS_FILE = Path(__file__).resolve().parent.parent.parent.parent / "config.defaults.toml"
+# Grok 服务默认配置（运行时从 config.defaults.toml 读取并缓存）
+GROK_DEFAULTS: dict = {}
+def get_grok_defaults():
+    """获取 Grok 默认配置"""
+    global GROK_DEFAULTS
+    if GROK_DEFAULTS:
+        return GROK_DEFAULTS
+    if not DEFAULTS_FILE.exists():
+        logger.warning(f"Defaults file not found: {DEFAULTS_FILE}")
+        return GROK_DEFAULTS
+    try:
+        with DEFAULTS_FILE.open("rb") as f:
+            GROK_DEFAULTS = tomllib.load(f)
+    except Exception as e:
+        logger.warning(f"Failed to load defaults from {DEFAULTS_FILE}: {e}")
+    return GROK_DEFAULTS
+__all__ = ["GROK_DEFAULTS", "get_grok_defaults"]

app/services/grok/services/chat.py ADDED Viewed

	@@ -0,0 +1,1115 @@

+"""
+Grok Chat 服务
+"""
+import asyncio
+import re
+import uuid
+from typing import Dict, List, Any, AsyncGenerator, AsyncIterable
+import orjson
+from curl_cffi.requests.errors import RequestsError
+from app.core.logger import logger
+from app.core.config import get_config
+from app.core.exceptions import (
+    AppException,
+    ValidationException,
+    ErrorType,
+    UpstreamException,
+    StreamIdleTimeoutError,
+)
+from app.services.grok.services.model import ModelService
+from app.services.grok.utils.upload import UploadService
+from app.services.grok.utils import process as proc_base
+from app.services.grok.utils.retry import pick_token, rate_limited, transient_upstream
+from app.services.reverse.app_chat import AppChatReverse
+from app.services.reverse.utils.session import ResettableSession
+from app.services.grok.utils.stream import wrap_stream_with_usage
+from app.services.grok.utils.tool_call import (
+    build_tool_prompt,
+    parse_tool_calls,
+    parse_tool_call_block,
+    format_tool_history,
+)
+from app.services.token import get_token_manager, EffortType
+_CHAT_SEMAPHORE = None
+_CHAT_SEM_VALUE = None
+def extract_tool_text(raw: str, rollout_id: str = "") -> str:
+    if not raw:
+        return ""
+    name_match = re.search(
+        r"<xai:tool_name>(.*?)</xai:tool_name>", raw, flags=re.DOTALL
+    )
+    args_match = re.search(
+        r"<xai:tool_args>(.*?)</xai:tool_args>", raw, flags=re.DOTALL
+    )
+    name = name_match.group(1) if name_match else ""
+    if name:
+        name = re.sub(r"<!\[CDATA\[(.*?)\]\]>", r"\1", name, flags=re.DOTALL).strip()
+    args = args_match.group(1) if args_match else ""
+    if args:
+        args = re.sub(r"<!\[CDATA\[(.*?)\]\]>", r"\1", args, flags=re.DOTALL).strip()
+    payload = None
+    if args:
+        try:
+            payload = orjson.loads(args)
+        except orjson.JSONDecodeError:
+            payload = None
+    label = name
+    text = args
+    prefix = f"[{rollout_id}]" if rollout_id else ""
+    if name == "web_search":
+        label = f"{prefix}[WebSearch]"
+        if isinstance(payload, dict):
+            text = payload.get("query") or payload.get("q") or ""
+    elif name == "search_images":
+        label = f"{prefix}[SearchImage]"
+        if isinstance(payload, dict):
+            text = (
+                payload.get("image_description")
+                or payload.get("description")
+                or payload.get("query")
+                or ""
+            )
+    elif name == "chatroom_send":
+        label = f"{prefix}[AgentThink]"
+        if isinstance(payload, dict):
+            text = payload.get("message") or ""
+    if label and text:
+        return f"{label} {text}".strip()
+    if label:
+        return label
+    if text:
+        return text
+    # Fallback: strip tags to keep any raw text.
+    return re.sub(r"<[^>]+>", "", raw, flags=re.DOTALL).strip()
+def _get_chat_semaphore() -> asyncio.Semaphore:
+    global _CHAT_SEMAPHORE, _CHAT_SEM_VALUE
+    value = max(1, int(get_config("chat.concurrent")))
+    if value != _CHAT_SEM_VALUE:
+        _CHAT_SEM_VALUE = value
+        _CHAT_SEMAPHORE = asyncio.Semaphore(value)
+    return _CHAT_SEMAPHORE
+class MessageExtractor:
+    """消息内容提取器"""
+    @staticmethod
+    def extract(
+        messages: List[Dict[str, Any]],
+        tools: List[Dict[str, Any]] = None,
+        tool_choice: Any = None,
+        parallel_tool_calls: bool = True,
+    ) -> tuple[str, List[str], List[str]]:
+        """从 OpenAI 消息格式提取内容，返回 (text, file_attachments, image_attachments)"""
+        # Pre-process: convert tool-related messages to text format
+        if tools:
+            messages = format_tool_history(messages)
+        texts = []
+        file_attachments: List[str] = []
+        image_attachments: List[str] = []
+        extracted = []
+        for msg in messages:
+            role = msg.get("role", "") or "user"
+            content = msg.get("content", "")
+            parts = []
+            if isinstance(content, str):
+                if content.strip():
+                    parts.append(content)
+            elif isinstance(content, dict):
+                content = [content]
+                for item in content:
+                    if not isinstance(item, dict):
+                        continue
+                    item_type = item.get("type", "")
+                    if item_type == "text":
+                        if text := item.get("text", "").strip():
+                            parts.append(text)
+                    elif item_type == "image_url":
+                        image_data = item.get("image_url", {})
+                        url = image_data.get("url", "")
+                        if url:
+                            image_attachments.append(url)
+                    elif item_type == "input_audio":
+                        audio_data = item.get("input_audio", {})
+                        data = audio_data.get("data", "")
+                        if data:
+                            file_attachments.append(data)
+                    elif item_type == "file":
+                        file_data = item.get("file", {})
+                        raw = file_data.get("file_data", "")
+                        if raw:
+                            file_attachments.append(raw)
+            elif isinstance(content, list):
+                for item in content:
+                    if not isinstance(item, dict):
+                        continue
+                    item_type = item.get("type", "")
+                    if item_type == "text":
+                        if text := item.get("text", "").strip():
+                            parts.append(text)
+                    elif item_type == "image_url":
+                        image_data = item.get("image_url", {})
+                        url = image_data.get("url", "")
+                        if url:
+                            image_attachments.append(url)
+                    elif item_type == "input_audio":
+                        audio_data = item.get("input_audio", {})
+                        data = audio_data.get("data", "")
+                        if data:
+                            file_attachments.append(data)
+                    elif item_type == "file":
+                        file_data = item.get("file", {})
+                        raw = file_data.get("file_data", "")
+                        if raw:
+                            file_attachments.append(raw)
+            # 保留工具调用轨迹，避免部分客户端在多轮工具会话中丢失上下文顺序
+            tool_calls = msg.get("tool_calls")
+            if role == "assistant" and not parts and isinstance(tool_calls, list):
+                for call in tool_calls:
+                    if not isinstance(call, dict):
+                        continue
+                    fn = call.get("function", {})
+                    if not isinstance(fn, dict):
+                        fn = {}
+                    name = fn.get("name") or call.get("name") or "tool"
+                    arguments = fn.get("arguments", "")
+                    if isinstance(arguments, (dict, list)):
+                        try:
+                            arguments = orjson.dumps(arguments).decode()
+                        except Exception:
+                            arguments = str(arguments)
+                    if not isinstance(arguments, str):
+                        arguments = str(arguments)
+                    arguments = arguments.strip()
+                    parts.append(
+                        f"[tool_call] {name} {arguments}".strip()
+                    )
+            if parts:
+                role_label = role
+                if role == "tool":
+                    name = msg.get("name")
+                    call_id = msg.get("tool_call_id")
+                    if isinstance(name, str) and name.strip():
+                        role_label = f"tool[{name.strip()}]"
+                    if isinstance(call_id, str) and call_id.strip():
+                        role_label = f"{role_label}#{call_id.strip()}"
+                extracted.append({"role": role_label, "text": "\n".join(parts)})
+        # 找到最后一条 user 消息
+        last_user_index = next(
+            (
+                i
+                for i in range(len(extracted) - 1, -1, -1)
+                if extracted[i]["role"] == "user"
+            ),
+            None,
+        )
+        for i, item in enumerate(extracted):
+            role = item["role"] or "user"
+            text = item["text"]
+            texts.append(text if i == last_user_index else f"{role}: {text}")
+        combined = "\n\n".join(texts)
+        # If there are attachments but no text, inject a fallback prompt.
+        if (not combined.strip()) and (file_attachments or image_attachments):
+            combined = "Refer to the following content:"
+        # Prepend tool system prompt if tools are provided
+        if tools:
+            tool_prompt = build_tool_prompt(tools, tool_choice, parallel_tool_calls)
+            if tool_prompt:
+                combined = f"{tool_prompt}\n\n{combined}"
+        return combined, file_attachments, image_attachments
+class GrokChatService:
+    """Grok API 调用服务"""
+    async def chat(
+        self,
+        token: str,
+        message: str,
+        model: str,
+        mode: str = None,
+        stream: bool = None,
+        file_attachments: List[str] = None,
+        tool_overrides: Dict[str, Any] = None,
+        model_config_override: Dict[str, Any] = None,
+    ):
+        """发送聊天请求"""
+        if stream is None:
+            stream = get_config("app.stream")
+        logger.debug(
+            f"Chat request: model={model}, mode={mode}, stream={stream}, attachments={len(file_attachments or [])}"
+        )
+        browser = get_config("proxy.browser")
+        semaphore = _get_chat_semaphore()
+        await semaphore.acquire()
+        session = ResettableSession(impersonate=browser)
+        try:
+            stream_response = await AppChatReverse.request(
+                session,
+                token,
+                message=message,
+                model=model,
+                mode=mode,
+                file_attachments=file_attachments,
+                tool_overrides=tool_overrides,
+                model_config_override=model_config_override,
+            )
+            logger.info(f"Chat connected: model={model}, stream={stream}")
+        except Exception:
+            try:
+                await session.close()
+            except Exception:
+                pass
+            semaphore.release()
+            raise
+        async def _stream():
+            try:
+                async for line in stream_response:
+                    yield line
+            finally:
+                semaphore.release()
+        return _stream()
+    async def chat_openai(
+        self,
+        token: str,
+        model: str,
+        messages: List[Dict[str, Any]],
+        stream: bool = None,
+        reasoning_effort: str | None = None,
+        temperature: float = 0.8,
+        top_p: float = 0.95,
+        tools: List[Dict[str, Any]] = None,
+        tool_choice: Any = None,
+        parallel_tool_calls: bool = True,
+    ):
+        """OpenAI 兼容接口"""
+        model_info = ModelService.get(model)
+        if not model_info:
+            raise ValidationException(f"Unknown model: {model}")
+        grok_model = model_info.grok_model
+        mode = model_info.model_mode
+        # 提取消息和附件
+        message, file_attachments, image_attachments = MessageExtractor.extract(
+            messages, tools=tools, tool_choice=tool_choice, parallel_tool_calls=parallel_tool_calls
+        )
+        logger.debug(
+            "Extracted message length=%s, files=%s, images=%s",
+            len(message),
+            len(file_attachments),
+            len(image_attachments),
+        )
+        # 上传附件
+        file_ids: List[str] = []
+        image_ids: List[str] = []
+        if file_attachments or image_attachments:
+            upload_service = UploadService()
+            try:
+                for attach_data in file_attachments:
+                    file_id, _ = await upload_service.upload_file(attach_data, token)
+                    file_ids.append(file_id)
+                    logger.debug(f"Attachment uploaded: type=file, file_id={file_id}")
+                for attach_data in image_attachments:
+                    file_id, _ = await upload_service.upload_file(attach_data, token)
+                    image_ids.append(file_id)
+                    logger.debug(f"Attachment uploaded: type=image, file_id={file_id}")
+            finally:
+                await upload_service.close()
+        all_attachments = file_ids + image_ids
+        stream = stream if stream is not None else get_config("app.stream")
+        model_config_override = {
+            "temperature": temperature,
+            "topP": top_p,
+        }
+        if reasoning_effort is not None:
+            model_config_override["reasoningEffort"] = reasoning_effort
+        response = await self.chat(
+            token,
+            message,
+            grok_model,
+            mode,
+            stream,
+            file_attachments=all_attachments,
+            tool_overrides=None,
+            model_config_override=model_config_override,
+        )
+        return response, stream, model
+class ChatService:
+    """Chat 业务服务"""
+    @staticmethod
+    async def completions(
+        model: str,
+        messages: List[Dict[str, Any]],
+        stream: bool = None,
+        reasoning_effort: str | None = None,
+        temperature: float = 0.8,
+        top_p: float = 0.95,
+        tools: List[Dict[str, Any]] = None,
+        tool_choice: Any = None,
+        parallel_tool_calls: bool = True,
+    ):
+        """Chat Completions 入口"""
+        # 获取 token
+        token_mgr = await get_token_manager()
+        await token_mgr.reload_if_stale()
+        # 解析参数
+        if reasoning_effort is None:
+            show_think = get_config("app.thinking")
+        else:
+            show_think = reasoning_effort != "none"
+        is_stream = stream if stream is not None else get_config("app.stream")
+        # 跨 Token 重试循环
+        tried_tokens = set()
+        max_token_retries = int(get_config("retry.max_retry") or 3)
+        last_error = None
+        for attempt in range(max_token_retries):
+            # 选择 token
+            token = await pick_token(token_mgr, model, tried_tokens)
+            if not token:
+                if last_error:
+                    raise last_error
+                raise AppException(
+                    message="No available tokens. Please try again later.",
+                    error_type=ErrorType.RATE_LIMIT.value,
+                    code="rate_limit_exceeded",
+                    status_code=429,
+                )
+            tried_tokens.add(token)
+            try:
+                # 请求 Grok
+                service = GrokChatService()
+                response, _, model_name = await service.chat_openai(
+                    token,
+                    model,
+                    messages,
+                    stream=is_stream,
+                    reasoning_effort=reasoning_effort,
+                    temperature=temperature,
+                    top_p=top_p,
+                    tools=tools,
+                    tool_choice=tool_choice,
+                    parallel_tool_calls=parallel_tool_calls,
+                )
+                # 处理响应
+                if is_stream:
+                    logger.debug(f"Processing stream response: model={model}")
+                    processor = StreamProcessor(model_name, token, show_think, tools=tools, tool_choice=tool_choice)
+                    return wrap_stream_with_usage(
+                        processor.process(response), token_mgr, token, model
+                    )
+                # 非流式
+                logger.debug(f"Processing non-stream response: model={model}")
+                result = await CollectProcessor(model_name, token, tools=tools, tool_choice=tool_choice).process(response)
+                try:
+                    model_info = ModelService.get(model)
+                    effort = (
+                        EffortType.HIGH
+                        if (model_info and model_info.cost.value == "high")
+                        else EffortType.LOW
+                    )
+                    await token_mgr.consume(token, effort)
+                    logger.info(f"Chat completed: model={model}, effort={effort.value}")
+                except Exception as e:
+                    logger.warning(f"Failed to record usage: {e}")
+                return result
+            except UpstreamException as e:
+                last_error = e
+                if rate_limited(e):
+                    # 配额不足，标记 token 为 cooling 并换 token 重试
+                    await token_mgr.mark_rate_limited(token)
+                    logger.warning(
+                        f"Token {token[:10]}... rate limited (429), "
+                        f"trying next token (attempt {attempt + 1}/{max_token_retries})"
+                    )
+                    continue
+                if transient_upstream(e):
+                    has_alternative_token = False
+                    for pool_name in ModelService.pool_candidates_for_model(model):
+                        if token_mgr.get_token(pool_name, exclude=tried_tokens):
+                            has_alternative_token = True
+                            break
+                    if not has_alternative_token:
+                        raise
+                    logger.warning(
+                        f"Transient upstream error for token {token[:10]}..., "
+                        f"trying next token (attempt {attempt + 1}/{max_token_retries}): {e}"
+                    )
+                    continue
+                # 非 429 错误，不换 token，直接抛出
+                raise
+        # 所有 token 都 429，抛出最后的错误
+        if last_error:
+            raise last_error
+        raise AppException(
+            message="No available tokens. Please try again later.",
+            error_type=ErrorType.RATE_LIMIT.value,
+            code="rate_limit_exceeded",
+            status_code=429,
+        )
+class StreamProcessor(proc_base.BaseProcessor):
+    """Stream response processor."""
+    def __init__(self, model: str, token: str = "", show_think: bool = None, tools: List[Dict[str, Any]] = None, tool_choice: Any = None):
+        super().__init__(model, token)
+        self.response_id: str = None
+        self.fingerprint: str = ""
+        self.rollout_id: str = ""
+        self.think_opened: bool = False
+        self.image_think_active: bool = False
+        self.role_sent: bool = False
+        self.filter_tags = get_config("app.filter_tags")
+        self.tool_usage_enabled = (
+            "xai:tool_usage_card" in (self.filter_tags or [])
+        )
+        self._tool_usage_opened = False
+        self._tool_usage_buffer = ""
+        self.show_think = bool(show_think)
+        self.tools = tools
+        self.tool_choice = tool_choice
+        self._tool_stream_enabled = bool(tools) and tool_choice != "none"
+        self._tool_state = "text"
+        self._tool_buffer = ""
+        self._tool_partial = ""
+        self._tool_calls_seen = False
+        self._tool_call_index = 0
+    def _with_tool_index(self, tool_call: Any) -> Any:
+        if not isinstance(tool_call, dict):
+            return tool_call
+        if tool_call.get("index") is None:
+            tool_call = dict(tool_call)
+            tool_call["index"] = self._tool_call_index
+            self._tool_call_index += 1
+        return tool_call
+    def _filter_tool_card(self, token: str) -> str:
+        if not token or not self.tool_usage_enabled:
+            return token
+        output_parts: list[str] = []
+        rest = token
+        start_tag = "<xai:tool_usage_card"
+        end_tag = "</xai:tool_usage_card>"
+        while rest:
+            if self._tool_usage_opened:
+                end_idx = rest.find(end_tag)
+                if end_idx == -1:
+                    self._tool_usage_buffer += rest
+                    return "".join(output_parts)
+                end_pos = end_idx + len(end_tag)
+                self._tool_usage_buffer += rest[:end_pos]
+                line = extract_tool_text(self._tool_usage_buffer, self.rollout_id)
+                if line:
+                    if output_parts and not output_parts[-1].endswith("\n"):
+                        output_parts[-1] += "\n"
+                    output_parts.append(f"{line}\n")
+                self._tool_usage_buffer = ""
+                self._tool_usage_opened = False
+                rest = rest[end_pos:]
+                continue
+            start_idx = rest.find(start_tag)
+            if start_idx == -1:
+                output_parts.append(rest)
+                break
+            if start_idx > 0:
+                output_parts.append(rest[:start_idx])
+            end_idx = rest.find(end_tag, start_idx)
+            if end_idx == -1:
+                self._tool_usage_opened = True
+                self._tool_usage_buffer = rest[start_idx:]
+                break
+            end_pos = end_idx + len(end_tag)
+            raw_card = rest[start_idx:end_pos]
+            line = extract_tool_text(raw_card, self.rollout_id)
+            if line:
+                if output_parts and not output_parts[-1].endswith("\n"):
+                    output_parts[-1] += "\n"
+                output_parts.append(f"{line}\n")
+            rest = rest[end_pos:]
+        return "".join(output_parts)
+    def _filter_token(self, token: str) -> str:
+        """Filter special tags in current token only."""
+        if not token:
+            return token
+        if self.tool_usage_enabled:
+            token = self._filter_tool_card(token)
+            if not token:
+                return ""
+        if not self.filter_tags:
+            return token
+        for tag in self.filter_tags:
+            if tag == "xai:tool_usage_card":
+                continue
+            if f"<{tag}" in token or f"</{tag}" in token:
+                return ""
+        return token
+    def _suffix_prefix(self, text: str, tag: str) -> int:
+        if not text or not tag:
+            return 0
+        max_keep = min(len(text), len(tag) - 1)
+        for keep in range(max_keep, 0, -1):
+            if text.endswith(tag[:keep]):
+                return keep
+        return 0
+    def _handle_tool_stream(self, chunk: str) -> list[tuple[str, Any]]:
+        events: list[tuple[str, Any]] = []
+        if not chunk:
+            return events
+        start_tag = "<tool_call>"
+        end_tag = "</tool_call>"
+        data = f"{self._tool_partial}{chunk}"
+        self._tool_partial = ""
+        while data:
+            if self._tool_state == "text":
+                start_idx = data.find(start_tag)
+                if start_idx == -1:
+                    keep = self._suffix_prefix(data, start_tag)
+                    emit = data[:-keep] if keep else data
+                    if emit:
+                        events.append(("text", emit))
+                    self._tool_partial = data[-keep:] if keep else ""
+                    break
+                before = data[:start_idx]
+                if before:
+                    events.append(("text", before))
+                data = data[start_idx + len(start_tag) :]
+                self._tool_state = "tool"
+                continue
+            end_idx = data.find(end_tag)
+            if end_idx == -1:
+                keep = self._suffix_prefix(data, end_tag)
+                append = data[:-keep] if keep else data
+                if append:
+                    self._tool_buffer += append
+                self._tool_partial = data[-keep:] if keep else ""
+                break
+            self._tool_buffer += data[:end_idx]
+            data = data[end_idx + len(end_tag) :]
+            tool_call = parse_tool_call_block(self._tool_buffer, self.tools)
+            if tool_call:
+                events.append(("tool", self._with_tool_index(tool_call)))
+                self._tool_calls_seen = True
+            self._tool_buffer = ""
+            self._tool_state = "text"
+        return events
+    def _flush_tool_stream(self) -> list[tuple[str, Any]]:
+        events: list[tuple[str, Any]] = []
+        if self._tool_state == "text":
+            if self._tool_partial:
+                events.append(("text", self._tool_partial))
+                self._tool_partial = ""
+            return events
+        raw = f"{self._tool_buffer}{self._tool_partial}"
+        tool_call = parse_tool_call_block(raw, self.tools)
+        if tool_call:
+            events.append(("tool", self._with_tool_index(tool_call)))
+            self._tool_calls_seen = True
+        elif raw:
+            events.append(("text", f"<tool_call>{raw}"))
+        self._tool_buffer = ""
+        self._tool_partial = ""
+        self._tool_state = "text"
+        return events
+    def _sse(self, content: str = "", role: str = None, finish: str = None, tool_calls: list = None) -> str:
+        """Build SSE response."""
+        delta = {}
+        if role:
+            delta["role"] = role
+            delta["content"] = ""
+        elif tool_calls is not None:
+            delta["tool_calls"] = tool_calls
+        elif content:
+            delta["content"] = content
+        chunk = {
+            "id": self.response_id or f"chatcmpl-{uuid.uuid4().hex[:24]}",
+            "object": "chat.completion.chunk",
+            "created": self.created,
+            "model": self.model,
+            "system_fingerprint": self.fingerprint,
+            "choices": [
+                {"index": 0, "delta": delta, "logprobs": None, "finish_reason": finish}
+            ],
+        }
+        return f"data: {orjson.dumps(chunk).decode()}\n\n"
+    async def process(self, response: AsyncIterable[bytes]) -> AsyncGenerator[str, None]:
+        """Process stream response.
+        Args:
+            response: AsyncIterable[bytes], async iterable of bytes
+        Returns:
+            AsyncGenerator[str, None], async generator of strings
+        """
+        idle_timeout = get_config("chat.stream_timeout")
+        try:
+            async for line in proc_base._with_idle_timeout(
+                response, idle_timeout, self.model
+            ):
+                line = proc_base._normalize_line(line)
+                if not line:
+                    continue
+                try:
+                    data = orjson.loads(line)
+                except orjson.JSONDecodeError:
+                    continue
+                resp = data.get("result", {}).get("response", {})
+                is_thinking = bool(resp.get("isThinking"))
+                # isThinking controls <think> tagging
+                # when absent, treat as False
+                if (llm := resp.get("llmInfo")) and not self.fingerprint:
+                    self.fingerprint = llm.get("modelHash", "")
+                if rid := resp.get("responseId"):
+                    self.response_id = rid
+                if rid := resp.get("rolloutId"):
+                    self.rollout_id = str(rid)
+                if not self.role_sent:
+                    yield self._sse(role="assistant")
+                    self.role_sent = True
+                if img := resp.get("streamingImageGenerationResponse"):
+                    if not self.show_think:
+                        continue
+                    self.image_think_active = True
+                    if not self.think_opened:
+                        yield self._sse("<think>\n")
+                        self.think_opened = True
+                    idx = img.get("imageIndex", 0) + 1
+                    progress = img.get("progress", 0)
+                    yield self._sse(
+                        f"正在生成第{idx}张图片中，当前进度{progress}%\n"
+                    )
+                    continue
+                if mr := resp.get("modelResponse"):
+                    if self.image_think_active and self.think_opened:
+                        yield self._sse("\n</think>\n")
+                        self.think_opened = False
+                    self.image_think_active = False
+                    for url in proc_base._collect_images(mr):
+                        parts = url.split("/")
+                        img_id = parts[-2] if len(parts) >= 2 else "image"
+                        dl_service = self._get_dl()
+                        rendered = await dl_service.render_image(
+                            url, self.token, img_id
+                        )
+                        yield self._sse(f"{rendered}\n")
+                    if (
+                        (meta := mr.get("metadata", {}))
+                        .get("llm_info", {})
+                        .get("modelHash")
+                    ):
+                        self.fingerprint = meta["llm_info"]["modelHash"]
+                    continue
+                if card := resp.get("cardAttachment"):
+                    json_data = card.get("jsonData")
+                    if isinstance(json_data, str) and json_data.strip():
+                        try:
+                            card_data = orjson.loads(json_data)
+                        except orjson.JSONDecodeError:
+                            card_data = None
+                        if isinstance(card_data, dict):
+                            image = card_data.get("image") or {}
+                            original = image.get("original")
+                            title = image.get("title") or ""
+                            if original:
+                                title_safe = title.replace("\n", " ").strip()
+                                if title_safe:
+                                    yield self._sse(f"![{title_safe}]({original})\n")
+                                else:
+                                    yield self._sse(f"![image]({original})\n")
+                    continue
+                if (token := resp.get("token")) is not None:
+                    if not token:
+                        continue
+                    filtered = self._filter_token(token)
+                    if not filtered:
+                        continue
+                    in_think = is_thinking or self.image_think_active
+                    if in_think:
+                        if not self.show_think:
+                            continue
+                        if not self.think_opened:
+                            yield self._sse("<think>\n")
+                            self.think_opened = True
+                    else:
+                        if self.think_opened:
+                            yield self._sse("\n</think>\n")
+                            self.think_opened = False
+                    if in_think:
+                        yield self._sse(filtered)
+                        continue
+                    if self._tool_stream_enabled:
+                        for kind, payload in self._handle_tool_stream(filtered):
+                            if kind == "text":
+                                yield self._sse(payload)
+                            elif kind == "tool":
+                                yield self._sse(tool_calls=[payload])
+                        continue
+                    yield self._sse(filtered)
+            if self.think_opened:
+                yield self._sse("</think>\n")
+            if self._tool_stream_enabled:
+                for kind, payload in self._flush_tool_stream():
+                    if kind == "text":
+                        yield self._sse(payload)
+                    elif kind == "tool":
+                        yield self._sse(tool_calls=[payload])
+                finish_reason = "tool_calls" if self._tool_calls_seen else "stop"
+                yield self._sse(finish=finish_reason)
+            else:
+                yield self._sse(finish="stop")
+            yield "data: [DONE]\n\n"
+        except asyncio.CancelledError:
+            logger.debug("Stream cancelled by client", extra={"model": self.model})
+        except StreamIdleTimeoutError as e:
+            raise UpstreamException(
+                message=f"Stream idle timeout after {e.idle_seconds}s",
+                status_code=504,
+                details={
+                    "error": str(e),
+                    "type": "stream_idle_timeout",
+                    "idle_seconds": e.idle_seconds,
+                },
+            )
+        except RequestsError as e:
+            if proc_base._is_http2_error(e):
+                logger.warning(f"HTTP/2 stream error: {e}", extra={"model": self.model})
+                raise UpstreamException(
+                    message="Upstream connection closed unexpectedly",
+                    status_code=502,
+                    details={"error": str(e), "type": "http2_stream_error"},
+                )
+            logger.error(f"Stream request error: {e}", extra={"model": self.model})
+            raise UpstreamException(
+                message=f"Upstream request failed: {e}",
+                status_code=502,
+                details={"error": str(e)},
+            )
+        except Exception as e:
+            logger.error(
+                f"Stream processing error: {e}",
+                extra={"model": self.model, "error_type": type(e).__name__},
+            )
+            raise
+        finally:
+            await self.close()
+class CollectProcessor(proc_base.BaseProcessor):
+    """Non-stream response processor."""
+    def __init__(self, model: str, token: str = "", tools: List[Dict[str, Any]] = None, tool_choice: Any = None):
+        super().__init__(model, token)
+        self.filter_tags = get_config("app.filter_tags")
+        self.tools = tools
+        self.tool_choice = tool_choice
+    def _filter_content(self, content: str) -> str:
+        """Filter special tags in content."""
+        if not content or not self.filter_tags:
+            return content
+        result = content
+        if "xai:tool_usage_card" in self.filter_tags:
+            rollout_id = ""
+            rollout_match = re.search(
+                r"<rolloutId>(.*?)</rolloutId>", result, flags=re.DOTALL
+            )
+            if rollout_match:
+                rollout_id = rollout_match.group(1).strip()
+            result = re.sub(
+                r"<xai:tool_usage_card[^>]*>.*?</xai:tool_usage_card>",
+                lambda match: (
+                    f"{extract_tool_text(match.group(0), rollout_id)}\n"
+                    if extract_tool_text(match.group(0), rollout_id)
+                    else ""
+                ),
+                result,
+                flags=re.DOTALL,
+            )
+        for tag in self.filter_tags:
+            if tag == "xai:tool_usage_card":
+                continue
+            pattern = rf"<{re.escape(tag)}[^>]*>.*?</{re.escape(tag)}>|<{re.escape(tag)}[^>]*/>"
+            result = re.sub(pattern, "", result, flags=re.DOTALL)
+        return result
+    async def process(self, response: AsyncIterable[bytes]) -> dict[str, Any]:
+        """Process and collect full response."""
+        response_id = ""
+        fingerprint = ""
+        content = ""
+        idle_timeout = get_config("chat.stream_timeout")
+        try:
+            async for line in proc_base._with_idle_timeout(
+                response, idle_timeout, self.model
+            ):
+                line = proc_base._normalize_line(line)
+                if not line:
+                    continue
+                try:
+                    data = orjson.loads(line)
+                except orjson.JSONDecodeError:
+                    continue
+                resp = data.get("result", {}).get("response", {})
+                if (llm := resp.get("llmInfo")) and not fingerprint:
+                    fingerprint = llm.get("modelHash", "")
+                if mr := resp.get("modelResponse"):
+                    response_id = mr.get("responseId", "")
+                    content = mr.get("message", "")
+                    card_map: dict[str, tuple[str, str]] = {}
+                    for raw in mr.get("cardAttachmentsJson") or []:
+                        if not isinstance(raw, str) or not raw.strip():
+                            continue
+                        try:
+                            card_data = orjson.loads(raw)
+                        except orjson.JSONDecodeError:
+                            continue
+                        if not isinstance(card_data, dict):
+                            continue
+                        card_id = card_data.get("id")
+                        image = card_data.get("image") or {}
+                        original = image.get("original")
+                        if not card_id or not original:
+                            continue
+                        title = image.get("title") or ""
+                        card_map[card_id] = (title, original)
+                    if content and card_map:
+                        def _render_card(match: re.Match) -> str:
+                            card_id = match.group(1)
+                            item = card_map.get(card_id)
+                            if not item:
+                                return ""
+                            title, original = item
+                            title_safe = title.replace("\n", " ").strip() or "image"
+                            prefix = ""
+                            if match.start() > 0:
+                                prev = content[match.start() - 1]
+                                if prev not in ("\n", "\r"):
+                                    prefix = "\n"
+                            return f"{prefix}![{title_safe}]({original})"
+                        content = re.sub(
+                            r'<grok:render[^>]*card_id="([^"]+)"[^>]*>.*?</grok:render>',
+                            _render_card,
+                            content,
+                            flags=re.DOTALL,
+                        )
+                    if urls := proc_base._collect_images(mr):
+                        content += "\n"
+                        for url in urls:
+                            parts = url.split("/")
+                            img_id = parts[-2] if len(parts) >= 2 else "image"
+                            dl_service = self._get_dl()
+                            rendered = await dl_service.render_image(
+                                url, self.token, img_id
+                            )
+                            content += f"{rendered}\n"
+                    if (
+                        (meta := mr.get("metadata", {}))
+                        .get("llm_info", {})
+                        .get("modelHash")
+                    ):
+                        fingerprint = meta["llm_info"]["modelHash"]
+        except asyncio.CancelledError:
+            logger.debug("Collect cancelled by client", extra={"model": self.model})
+            raise
+        except StreamIdleTimeoutError as e:
+            logger.warning(f"Collect idle timeout: {e}", extra={"model": self.model})
+            raise UpstreamException(
+                message=f"Collect stream idle timeout after {e.idle_seconds}s",
+                details={
+                    "error": str(e),
+                    "type": "stream_idle_timeout",
+                    "idle_seconds": e.idle_seconds,
+                    "status": 504,
+                },
+            )
+        except RequestsError as e:
+            if proc_base._is_http2_error(e):
+                logger.warning(
+                    f"HTTP/2 stream error in collect: {e}", extra={"model": self.model}
+                )
+                raise UpstreamException(
+                    message="Upstream connection closed unexpectedly",
+                    details={"error": str(e), "type": "http2_stream_error", "status": 502},
+                )
+            logger.error(f"Collect request error: {e}", extra={"model": self.model})
+            raise UpstreamException(
+                message=f"Upstream request failed: {e}",
+                details={"error": str(e), "status": 502},
+            )
+        except Exception as e:
+            logger.error(
+                f"Collect processing error: {e}",
+                extra={"model": self.model, "error_type": type(e).__name__},
+            )
+            raise
+        finally:
+            await self.close()
+        content = self._filter_content(content)
+        # Parse for tool calls if tools were provided
+        finish_reason = "stop"
+        tool_calls_result = None
+        if self.tools and self.tool_choice != "none":
+            text_content, tool_calls_list = parse_tool_calls(content, self.tools)
+            if tool_calls_list:
+                tool_calls_result = tool_calls_list
+                content = text_content  # May be None
+                finish_reason = "tool_calls"
+        message_obj = {
+            "role": "assistant",
+            "content": content,
+            "refusal": None,
+            "annotations": [],
+        }
+        if tool_calls_result:
+            message_obj["tool_calls"] = tool_calls_result
+        return {
+            "id": response_id,
+            "object": "chat.completion",
+            "created": self.created,
+            "model": self.model,
+            "system_fingerprint": fingerprint,
+            "choices": [
+                {
+                    "index": 0,
+                    "message": message_obj,
+                    "finish_reason": finish_reason,
+                }
+            ],
+            "usage": {
+                "prompt_tokens": 0,
+                "completion_tokens": 0,
+                "total_tokens": 0,
+                "prompt_tokens_details": {
+                    "cached_tokens": 0,
+                    "text_tokens": 0,
+                    "audio_tokens": 0,
+                    "image_tokens": 0,
+                },
+                "completion_tokens_details": {
+                    "text_tokens": 0,
+                    "audio_tokens": 0,
+                    "reasoning_tokens": 0,
+                },
+            },
+        }
+__all__ = [
+    "GrokChatService",
+    "MessageExtractor",
+    "ChatService",
+]

app/services/grok/services/image.py ADDED Viewed

	@@ -0,0 +1,794 @@

+"""
+Grok image services.
+"""
+import asyncio
+import base64
+import math
+import time
+from dataclasses import dataclass
+from pathlib import Path
+from typing import Any, AsyncGenerator, AsyncIterable, Dict, List, Optional, Union
+import orjson
+from app.core.config import get_config
+from app.core.logger import logger
+from app.core.storage import DATA_DIR
+from app.core.exceptions import AppException, ErrorType, UpstreamException
+from app.services.grok.utils.process import BaseProcessor
+from app.services.grok.utils.retry import pick_token, rate_limited
+from app.services.grok.utils.response import make_response_id, make_chat_chunk, wrap_image_content
+from app.services.grok.utils.stream import wrap_stream_with_usage
+from app.services.token import EffortType
+from app.services.reverse.ws_imagine import ImagineWebSocketReverse
+image_service = ImagineWebSocketReverse()
+@dataclass
+class ImageGenerationResult:
+    stream: bool
+    data: Union[AsyncGenerator[str, None], List[str]]
+    usage_override: Optional[dict] = None
+class ImageGenerationService:
+    """Image generation orchestration service."""
+    async def generate(
+        self,
+        *,
+        token_mgr: Any,
+        token: str,
+        model_info: Any,
+        prompt: str,
+        n: int,
+        response_format: str,
+        size: str,
+        aspect_ratio: str,
+        stream: bool,
+        enable_nsfw: Optional[bool] = None,
+        chat_format: bool = False,
+    ) -> ImageGenerationResult:
+        max_token_retries = int(get_config("retry.max_retry") or 3)
+        tried_tokens: set[str] = set()
+        last_error: Optional[Exception] = None
+        # resolve nsfw once for routing and upstream
+        if enable_nsfw is None:
+            enable_nsfw = bool(get_config("image.nsfw"))
+        prefer_tags = {"nsfw"} if enable_nsfw else None
+        if stream:
+            async def _stream_retry() -> AsyncGenerator[str, None]:
+                nonlocal last_error
+                for attempt in range(max_token_retries):
+                    preferred = token if (attempt == 0 and not prefer_tags) else None
+                    current_token = await pick_token(
+                        token_mgr,
+                        model_info.model_id,
+                        tried_tokens,
+                        preferred=preferred,
+                        prefer_tags=prefer_tags,
+                    )
+                    if not current_token:
+                        if last_error:
+                            raise last_error
+                        raise AppException(
+                            message="No available tokens. Please try again later.",
+                            error_type=ErrorType.RATE_LIMIT.value,
+                            code="rate_limit_exceeded",
+                            status_code=429,
+                        )
+                    tried_tokens.add(current_token)
+                    yielded = False
+                    try:
+                        result = await self._stream_ws(
+                            token_mgr=token_mgr,
+                            token=current_token,
+                            model_info=model_info,
+                            prompt=prompt,
+                            n=n,
+                            response_format=response_format,
+                            size=size,
+                            aspect_ratio=aspect_ratio,
+                            enable_nsfw=enable_nsfw,
+                            chat_format=chat_format,
+                        )
+                        async for chunk in result.data:
+                            yielded = True
+                            yield chunk
+                        return
+                    except UpstreamException as e:
+                        last_error = e
+                        if rate_limited(e):
+                            if yielded:
+                                raise
+                            await token_mgr.mark_rate_limited(current_token)
+                            logger.warning(
+                                f"Token {current_token[:10]}... rate limited (429), "
+                                f"trying next token (attempt {attempt + 1}/{max_token_retries})"
+                            )
+                            continue
+                        raise
+                if last_error:
+                    raise last_error
+                raise AppException(
+                    message="No available tokens. Please try again later.",
+                    error_type=ErrorType.RATE_LIMIT.value,
+                    code="rate_limit_exceeded",
+                    status_code=429,
+                )
+            return ImageGenerationResult(stream=True, data=_stream_retry())
+        for attempt in range(max_token_retries):
+            preferred = token if (attempt == 0 and not prefer_tags) else None
+            current_token = await pick_token(
+                token_mgr,
+                model_info.model_id,
+                tried_tokens,
+                preferred=preferred,
+                prefer_tags=prefer_tags,
+            )
+            if not current_token:
+                if last_error:
+                    raise last_error
+                raise AppException(
+                    message="No available tokens. Please try again later.",
+                    error_type=ErrorType.RATE_LIMIT.value,
+                    code="rate_limit_exceeded",
+                    status_code=429,
+                )
+            tried_tokens.add(current_token)
+            try:
+                return await self._collect_ws(
+                    token_mgr=token_mgr,
+                    token=current_token,
+                    model_info=model_info,
+                    tried_tokens=tried_tokens,
+                    prompt=prompt,
+                    n=n,
+                    response_format=response_format,
+                    aspect_ratio=aspect_ratio,
+                    enable_nsfw=enable_nsfw,
+                )
+            except UpstreamException as e:
+                last_error = e
+                if rate_limited(e):
+                    await token_mgr.mark_rate_limited(current_token)
+                    logger.warning(
+                        f"Token {current_token[:10]}... rate limited (429), "
+                        f"trying next token (attempt {attempt + 1}/{max_token_retries})"
+                    )
+                    continue
+                raise
+        if last_error:
+            raise last_error
+        raise AppException(
+            message="No available tokens. Please try again later.",
+            error_type=ErrorType.RATE_LIMIT.value,
+            code="rate_limit_exceeded",
+            status_code=429,
+        )
+    async def _stream_ws(
+        self,
+        *,
+        token_mgr: Any,
+        token: str,
+        model_info: Any,
+        prompt: str,
+        n: int,
+        response_format: str,
+        size: str,
+        aspect_ratio: str,
+        enable_nsfw: Optional[bool] = None,
+        chat_format: bool = False,
+    ) -> ImageGenerationResult:
+        if enable_nsfw is None:
+            enable_nsfw = bool(get_config("image.nsfw"))
+        stream_retries = int(get_config("image.blocked_parallel_attempts") or 5) + 1
+        stream_retries = max(1, min(stream_retries, 10))
+        upstream = image_service.stream(
+            token=token,
+            prompt=prompt,
+            aspect_ratio=aspect_ratio,
+            n=n,
+            enable_nsfw=enable_nsfw,
+            max_retries=stream_retries,
+        )
+        processor = ImageWSStreamProcessor(
+            model_info.model_id,
+            token,
+            n=n,
+            response_format=response_format,
+            size=size,
+            chat_format=chat_format,
+        )
+        stream = wrap_stream_with_usage(
+            processor.process(upstream),
+            token_mgr,
+            token,
+            model_info.model_id,
+        )
+        return ImageGenerationResult(stream=True, data=stream)
+    async def _collect_ws(
+        self,
+        *,
+        token_mgr: Any,
+        token: str,
+        model_info: Any,
+        tried_tokens: set[str],
+        prompt: str,
+        n: int,
+        response_format: str,
+        aspect_ratio: str,
+        enable_nsfw: Optional[bool] = None,
+    ) -> ImageGenerationResult:
+        if enable_nsfw is None:
+            enable_nsfw = bool(get_config("image.nsfw"))
+        all_images: List[str] = []
+        seen = set()
+        expected_per_call = 6
+        calls_needed = max(1, int(math.ceil(n / expected_per_call)))
+        calls_needed = min(calls_needed, n)
+        async def _fetch_batch(call_target: int, call_token: str):
+            stream_retries = int(get_config("image.blocked_parallel_attempts") or 5) + 1
+            stream_retries = max(1, min(stream_retries, 10))
+            upstream = image_service.stream(
+                token=call_token,
+                prompt=prompt,
+                aspect_ratio=aspect_ratio,
+                n=call_target,
+                enable_nsfw=enable_nsfw,
+                max_retries=stream_retries,
+            )
+            processor = ImageWSCollectProcessor(
+                model_info.model_id,
+                token,
+                n=call_target,
+                response_format=response_format,
+            )
+            return await processor.process(upstream)
+        tasks = []
+        for i in range(calls_needed):
+            remaining = n - (i * expected_per_call)
+            call_target = min(expected_per_call, remaining)
+            tasks.append(_fetch_batch(call_target, token))
+        results = await asyncio.gather(*tasks, return_exceptions=True)
+        for batch in results:
+            if isinstance(batch, Exception):
+                logger.warning(f"WS batch failed: {batch}")
+                continue
+            for img in batch:
+                if img not in seen:
+                    seen.add(img)
+                    all_images.append(img)
+                if len(all_images) >= n:
+                    break
+            if len(all_images) >= n:
+                break
+        # If upstream likely blocked/reviewed some images, run extra parallel attempts
+        # and only keep valid finals selected by ws_imagine classification.
+        if len(all_images) < n:
+            remaining = n - len(all_images)
+            extra_attempts = int(get_config("image.blocked_parallel_attempts") or 5)
+            extra_attempts = max(0, min(extra_attempts, 10))
+            parallel_enabled = bool(get_config("image.blocked_parallel_enabled", True))
+            if extra_attempts > 0:
+                logger.warning(
+                    f"Image finals insufficient ({len(all_images)}/{n}), running "
+                    f"{extra_attempts} recovery attempts for remaining={remaining}, "
+                    f"parallel_enabled={parallel_enabled}"
+                )
+                extra_tasks = []
+                if parallel_enabled:
+                    recovery_tried = set(tried_tokens)
+                    recovery_tokens: List[str] = []
+                    for _ in range(extra_attempts):
+                        recovery_token = await pick_token(
+                            token_mgr,
+                            model_info.model_id,
+                            recovery_tried,
+                        )
+                        if not recovery_token:
+                            break
+                        recovery_tried.add(recovery_token)
+                        recovery_tokens.append(recovery_token)
+                    if recovery_tokens:
+                        logger.info(
+                            f"Recovery using {len(recovery_tokens)} distinct tokens"
+                        )
+                    for recovery_token in recovery_tokens:
+                        extra_tasks.append(
+                            _fetch_batch(min(expected_per_call, remaining), recovery_token)
+                        )
+                else:
+                    extra_tasks = [
+                        _fetch_batch(min(expected_per_call, remaining), token)
+                        for _ in range(extra_attempts)
+                    ]
+                if not extra_tasks:
+                    logger.warning("No tokens available for recovery attempts")
+                    extra_results = []
+                else:
+                    extra_results = await asyncio.gather(*extra_tasks, return_exceptions=True)
+                for batch in extra_results:
+                    if isinstance(batch, Exception):
+                        logger.warning(f"WS recovery batch failed: {batch}")
+                        continue
+                    for img in batch:
+                        if img not in seen:
+                            seen.add(img)
+                            all_images.append(img)
+                        if len(all_images) >= n:
+                            break
+                    if len(all_images) >= n:
+                        break
+                logger.info(
+                    f"Image recovery attempts completed: finals={len(all_images)}/{n}, "
+                    f"attempts={extra_attempts}"
+                )
+        if len(all_images) < n:
+            logger.error(
+                f"Image generation failed after recovery attempts: finals={len(all_images)}/{n}, "
+                f"blocked_parallel_attempts={int(get_config('image.blocked_parallel_attempts') or 5)}"
+            )
+            raise UpstreamException(
+                "Image generation blocked or no valid final image",
+                details={
+                    "error_code": "blocked_no_final_image",
+                    "final_images": len(all_images),
+                    "requested": n,
+                },
+            )
+        try:
+            await token_mgr.consume(token, self._get_effort(model_info))
+        except Exception as e:
+            logger.warning(f"Failed to consume token: {e}")
+        selected = self._select_images(all_images, n)
+        usage_override = {
+            "total_tokens": 0,
+            "input_tokens": 0,
+            "output_tokens": 0,
+            "input_tokens_details": {"text_tokens": 0, "image_tokens": 0},
+        }
+        return ImageGenerationResult(
+            stream=False, data=selected, usage_override=usage_override
+        )
+    @staticmethod
+    def _get_effort(model_info: Any) -> EffortType:
+        return (
+            EffortType.HIGH
+            if (model_info and model_info.cost.value == "high")
+            else EffortType.LOW
+        )
+    @staticmethod
+    def _select_images(images: List[str], n: int) -> List[str]:
+        if len(images) >= n:
+            return images[:n]
+        selected = images.copy()
+        while len(selected) < n:
+            selected.append("error")
+        return selected
+class ImageWSBaseProcessor(BaseProcessor):
+    """WebSocket image processor base."""
+    def __init__(self, model: str, token: str = "", response_format: str = "b64_json"):
+        if response_format == "base64":
+            response_format = "b64_json"
+        super().__init__(model, token)
+        self.response_format = response_format
+        if response_format == "url":
+            self.response_field = "url"
+        elif response_format == "base64":
+            self.response_field = "base64"
+        else:
+            self.response_field = "b64_json"
+        self._image_dir: Optional[Path] = None
+    def _ensure_image_dir(self) -> Path:
+        if self._image_dir is None:
+            base_dir = DATA_DIR / "tmp" / "image"
+            base_dir.mkdir(parents=True, exist_ok=True)
+            self._image_dir = base_dir
+        return self._image_dir
+    def _strip_base64(self, blob: str) -> str:
+        if not blob:
+            return ""
+        if "," in blob and "base64" in blob.split(",", 1)[0]:
+            return blob.split(",", 1)[1]
+        return blob
+    def _guess_ext(self, blob: str) -> Optional[str]:
+        if not blob:
+            return None
+        header = ""
+        data = blob
+        if "," in blob and "base64" in blob.split(",", 1)[0]:
+            header, data = blob.split(",", 1)
+        header = header.lower()
+        if "image/png" in header:
+            return "png"
+        if "image/jpeg" in header or "image/jpg" in header:
+            return "jpg"
+        if data.startswith("iVBORw0KGgo"):
+            return "png"
+        if data.startswith("/9j/"):
+            return "jpg"
+        return None
+    def _filename(self, image_id: str, is_final: bool, ext: Optional[str] = None) -> str:
+        if ext:
+            ext = ext.lower()
+            if ext == "jpeg":
+                ext = "jpg"
+        if not ext:
+            ext = "jpg" if is_final else "png"
+        return f"{image_id}.{ext}"
+    def _build_file_url(self, filename: str) -> str:
+        app_url = get_config("app.app_url")
+        if app_url:
+            return f"{app_url.rstrip('/')}/v1/files/image/{filename}"
+        return f"/v1/files/image/{filename}"
+    async def _save_blob(
+        self, image_id: str, blob: str, is_final: bool, ext: Optional[str] = None
+    ) -> str:
+        data = self._strip_base64(blob)
+        if not data:
+            return ""
+        image_dir = self._ensure_image_dir()
+        ext = ext or self._guess_ext(blob)
+        filename = self._filename(image_id, is_final, ext=ext)
+        filepath = image_dir / filename
+        def _write_file():
+            with open(filepath, "wb") as f:
+                f.write(base64.b64decode(data))
+        await asyncio.to_thread(_write_file)
+        return self._build_file_url(filename)
+    def _pick_best(self, existing: Optional[Dict], incoming: Dict) -> Dict:
+        if not existing:
+            return incoming
+        if incoming.get("is_final") and not existing.get("is_final"):
+            return incoming
+        if existing.get("is_final") and not incoming.get("is_final"):
+            return existing
+        if incoming.get("blob_size", 0) > existing.get("blob_size", 0):
+            return incoming
+        return existing
+    async def _to_output(self, image_id: str, item: Dict) -> str:
+        try:
+            if self.response_format == "url":
+                return await self._save_blob(
+                    image_id,
+                    item.get("blob", ""),
+                    item.get("is_final", False),
+                    ext=item.get("ext"),
+                )
+            return self._strip_base64(item.get("blob", ""))
+        except Exception as e:
+            logger.warning(f"Image output failed: {e}")
+            return ""
+class ImageWSStreamProcessor(ImageWSBaseProcessor):
+    """WebSocket image stream processor."""
+    def __init__(
+        self,
+        model: str,
+        token: str = "",
+        n: int = 1,
+        response_format: str = "b64_json",
+        size: str = "1024x1024",
+        chat_format: bool = False,
+    ):
+        super().__init__(model, token, response_format)
+        self.n = n
+        self.size = size
+        self.chat_format = chat_format
+        self._target_id: Optional[str] = None
+        self._index_map: Dict[str, int] = {}
+        self._partial_map: Dict[str, int] = {}
+        self._initial_sent: set[str] = set()
+        self._id_generated: bool = False
+        self._response_id: str = ""
+    def _assign_index(self, image_id: str) -> Optional[int]:
+        if image_id in self._index_map:
+            return self._index_map[image_id]
+        if len(self._index_map) >= self.n:
+            return None
+        self._index_map[image_id] = len(self._index_map)
+        return self._index_map[image_id]
+    def _sse(self, event: str, data: dict) -> str:
+        return f"event: {event}\ndata: {orjson.dumps(data).decode()}\n\n"
+    async def process(self, response: AsyncIterable[dict]) -> AsyncGenerator[str, None]:
+        images: Dict[str, Dict] = {}
+        emitted_chat_chunk = False
+        async for item in response:
+            if item.get("type") == "error":
+                message = item.get("error") or "Upstream error"
+                code = item.get("error_code") or "upstream_error"
+                status = item.get("status")
+                if code == "rate_limit_exceeded" or status == 429:
+                    raise UpstreamException(message, details=item)
+                yield self._sse(
+                    "error",
+                    {
+                        "error": {
+                            "message": message,
+                            "type": "server_error",
+                            "code": code,
+                        }
+                    },
+                )
+                return
+            if item.get("type") != "image":
+                continue
+            image_id = item.get("image_id")
+            if not image_id:
+                continue
+            if self.n == 1:
+                if self._target_id is None:
+                    self._target_id = image_id
+                index = 0 if image_id == self._target_id else None
+            else:
+                index = self._assign_index(image_id)
+            images[image_id] = self._pick_best(images.get(image_id), item)
+            if index is None:
+                continue
+            if item.get("stage") != "final":
+                # Chat Completions image stream should only expose final results.
+                if self.chat_format:
+                    continue
+                if image_id not in self._initial_sent:
+                    self._initial_sent.add(image_id)
+                    stage = item.get("stage") or "preview"
+                    if stage == "medium":
+                        partial_index = 1
+                        self._partial_map[image_id] = 1
+                    else:
+                        partial_index = 0
+                        self._partial_map[image_id] = 0
+                else:
+                    stage = item.get("stage") or "partial"
+                    if stage == "preview":
+                        continue
+                    partial_index = self._partial_map.get(image_id, 0)
+                    if stage == "medium":
+                        partial_index = max(partial_index, 1)
+                    self._partial_map[image_id] = partial_index
+                if self.response_format == "url":
+                    partial_id = f"{image_id}-{stage}-{partial_index}"
+                    partial_out = await self._save_blob(
+                        partial_id,
+                        item.get("blob", ""),
+                        False,
+                        ext=item.get("ext"),
+                    )
+                else:
+                    partial_out = self._strip_base64(item.get("blob", ""))
+                if self.chat_format and partial_out:
+                    partial_out = wrap_image_content(partial_out, self.response_format)
+                if not partial_out:
+                    continue
+                if self.chat_format:
+                    # OpenAI ChatCompletion chunk format for partial
+                    if not self._id_generated:
+                        self._response_id = make_response_id()
+                        self._id_generated = True
+                    emitted_chat_chunk = True
+                    yield self._sse(
+                        "chat.completion.chunk",
+                        make_chat_chunk(
+                            self._response_id,
+                            self.model,
+                            partial_out,
+                            index=index,
+                        ),
+                    )
+                else:
+                    # Original image_generation format
+                    yield self._sse(
+                        "image_generation.partial_image",
+                        {
+                            "type": "image_generation.partial_image",
+                            self.response_field: partial_out,
+                            "created_at": int(time.time()),
+                            "size": self.size,
+                            "index": index,
+                            "partial_image_index": partial_index,
+                            "image_id": image_id,
+                            "stage": stage,
+                        },
+                    )
+        if self.n == 1:
+            target_item = images.get(self._target_id) if self._target_id else None
+            if target_item and target_item.get("is_final", False):
+                selected = [(self._target_id, target_item)]
+            elif images:
+                selected = [
+                    max(
+                        images.items(),
+                        key=lambda x: (
+                            x[1].get("is_final", False),
+                            x[1].get("blob_size", 0),
+                        ),
+                    )
+                ]
+            else:
+                selected = []
+        else:
+            selected = [
+                (image_id, images[image_id])
+                for image_id in self._index_map
+                if image_id in images and images[image_id].get("is_final", False)
+            ]
+        for image_id, item in selected:
+            if self.response_format == "url":
+                final_image_id = image_id
+                # Keep original imagine image name for imagine chat stream output.
+                if self.model != "grok-imagine-1.0-fast":
+                    final_image_id = f"{image_id}-final"
+                output = await self._save_blob(
+                    final_image_id,
+                    item.get("blob", ""),
+                    item.get("is_final", False),
+                    ext=item.get("ext"),
+                )
+                if self.chat_format and output:
+                    output = wrap_image_content(output, self.response_format)
+            else:
+                output = await self._to_output(image_id, item)
+                if self.chat_format and output:
+                    output = wrap_image_content(output, self.response_format)
+            if not output:
+                continue
+            if self.n == 1:
+                index = 0
+            else:
+                index = self._index_map.get(image_id, 0)
+            if not self._id_generated:
+                self._response_id = make_response_id()
+                self._id_generated = True
+            if self.chat_format:
+                # OpenAI ChatCompletion chunk format
+                emitted_chat_chunk = True
+                yield self._sse(
+                    "chat.completion.chunk",
+                    make_chat_chunk(
+                        self._response_id,
+                        self.model,
+                        output,
+                        index=index,
+                        is_final=True,
+                    ),
+                )
+            else:
+                # Original image_generation format
+                yield self._sse(
+                    "image_generation.completed",
+                    {
+                        "type": "image_generation.completed",
+                        self.response_field: output,
+                        "created_at": int(time.time()),
+                        "size": self.size,
+                        "index": index,
+                        "image_id": image_id,
+                        "stage": "final",
+                        "usage": {
+                            "total_tokens": 0,
+                            "input_tokens": 0,
+                            "output_tokens": 0,
+                            "input_tokens_details": {"text_tokens": 0, "image_tokens": 0},
+                        },
+                    },
+                )
+        if self.chat_format:
+            if not self._id_generated:
+                self._response_id = make_response_id()
+                self._id_generated = True
+            if not emitted_chat_chunk:
+                yield self._sse(
+                    "chat.completion.chunk",
+                    make_chat_chunk(
+                        self._response_id,
+                        self.model,
+                        "",
+                        index=0,
+                        is_final=True,
+                    ),
+                )
+            yield "data: [DONE]\n\n"
+class ImageWSCollectProcessor(ImageWSBaseProcessor):
+    """WebSocket image non-stream processor."""
+    def __init__(
+        self, model: str, token: str = "", n: int = 1, response_format: str = "b64_json"
+    ):
+        super().__init__(model, token, response_format)
+        self.n = n
+    async def process(self, response: AsyncIterable[dict]) -> List[str]:
+        images: Dict[str, Dict] = {}
+        async for item in response:
+            if item.get("type") == "error":
+                message = item.get("error") or "Upstream error"
+                raise UpstreamException(message, details=item)
+            if item.get("type") != "image":
+                continue
+            image_id = item.get("image_id")
+            if not image_id:
+                continue
+            images[image_id] = self._pick_best(images.get(image_id), item)
+        selected = sorted(
+            [item for item in images.values() if item.get("is_final", False)],
+            key=lambda x: x.get("blob_size", 0),
+            reverse=True,
+        )
+        if self.n:
+            selected = selected[: self.n]
+        results: List[str] = []
+        for item in selected:
+            output = await self._to_output(item.get("image_id", ""), item)
+            if output:
+                results.append(output)
+        return results
+__all__ = ["ImageGenerationService"]

app/services/grok/services/image_edit.py ADDED Viewed

	@@ -0,0 +1,567 @@

+"""
+Grok image edit service.
+"""
+import asyncio
+import os
+import random
+import re
+import time
+from dataclasses import dataclass
+from typing import AsyncGenerator, AsyncIterable, List, Union, Any
+import orjson
+from curl_cffi.requests.errors import RequestsError
+from app.core.config import get_config
+from app.core.exceptions import (
+    AppException,
+    ErrorType,
+    UpstreamException,
+    StreamIdleTimeoutError,
+)
+from app.core.logger import logger
+from app.services.grok.utils.process import (
+    BaseProcessor,
+    _with_idle_timeout,
+    _normalize_line,
+    _collect_images,
+    _is_http2_error,
+)
+from app.services.grok.utils.upload import UploadService
+from app.services.grok.utils.retry import pick_token, rate_limited
+from app.services.grok.utils.response import make_response_id, make_chat_chunk, wrap_image_content
+from app.services.grok.services.chat import GrokChatService
+from app.services.grok.services.video import VideoService
+from app.services.grok.utils.stream import wrap_stream_with_usage
+from app.services.token import EffortType
+@dataclass
+class ImageEditResult:
+    stream: bool
+    data: Union[AsyncGenerator[str, None], List[str]]
+class ImageEditService:
+    """Image edit orchestration service."""
+    async def edit(
+        self,
+        *,
+        token_mgr: Any,
+        token: str,
+        model_info: Any,
+        prompt: str,
+        images: List[str],
+        n: int,
+        response_format: str,
+        stream: bool,
+        chat_format: bool = False,
+    ) -> ImageEditResult:
+        if len(images) > 3:
+            logger.info(
+                "Image edit received %d references; using the most recent 3",
+                len(images),
+            )
+            images = images[-3:]
+        max_token_retries = int(get_config("retry.max_retry") or 3)
+        tried_tokens: set[str] = set()
+        last_error: Exception | None = None
+        for attempt in range(max_token_retries):
+            preferred = token if attempt == 0 else None
+            current_token = await pick_token(
+                token_mgr, model_info.model_id, tried_tokens, preferred=preferred
+            )
+            if not current_token:
+                if last_error:
+                    raise last_error
+                raise AppException(
+                    message="No available tokens. Please try again later.",
+                    error_type=ErrorType.RATE_LIMIT.value,
+                    code="rate_limit_exceeded",
+                    status_code=429,
+                )
+            tried_tokens.add(current_token)
+            try:
+                image_urls = await self._upload_images(images, current_token)
+                parent_post_id = await self._get_parent_post_id(
+                    current_token, image_urls
+                )
+                model_config_override = {
+                    "modelMap": {
+                        "imageEditModel": "imagine",
+                        "imageEditModelConfig": {
+                            "imageReferences": image_urls,
+                        },
+                    }
+                }
+                if parent_post_id:
+                    model_config_override["modelMap"]["imageEditModelConfig"][
+                        "parentPostId"
+                    ] = parent_post_id
+                tool_overrides = {"imageGen": True}
+                if stream:
+                    response = await GrokChatService().chat(
+                        token=current_token,
+                        message=prompt,
+                        model=model_info.grok_model,
+                        mode=None,
+                        stream=True,
+                        tool_overrides=tool_overrides,
+                        model_config_override=model_config_override,
+                    )
+                    processor = ImageStreamProcessor(
+                        model_info.model_id,
+                        current_token,
+                        n=n,
+                        response_format=response_format,
+                        chat_format=chat_format,
+                    )
+                    return ImageEditResult(
+                        stream=True,
+                        data=wrap_stream_with_usage(
+                            processor.process(response),
+                            token_mgr,
+                            current_token,
+                            model_info.model_id,
+                        ),
+                    )
+                images_out = await self._collect_images(
+                    token=current_token,
+                    prompt=prompt,
+                    model_info=model_info,
+                    n=n,
+                    response_format=response_format,
+                    tool_overrides=tool_overrides,
+                    model_config_override=model_config_override,
+                )
+                try:
+                    effort = (
+                        EffortType.HIGH
+                        if (model_info and model_info.cost.value == "high")
+                        else EffortType.LOW
+                    )
+                    await token_mgr.consume(current_token, effort)
+                    logger.debug(
+                        f"Image edit completed, recorded usage (effort={effort.value})"
+                    )
+                except Exception as e:
+                    logger.warning(f"Failed to record image edit usage: {e}")
+                return ImageEditResult(stream=False, data=images_out)
+            except UpstreamException as e:
+                last_error = e
+                if rate_limited(e):
+                    await token_mgr.mark_rate_limited(current_token)
+                    logger.warning(
+                        f"Token {current_token[:10]}... rate limited (429), "
+                        f"trying next token (attempt {attempt + 1}/{max_token_retries})"
+                    )
+                    continue
+                raise
+        if last_error:
+            raise last_error
+        raise AppException(
+            message="No available tokens. Please try again later.",
+            error_type=ErrorType.RATE_LIMIT.value,
+            code="rate_limit_exceeded",
+            status_code=429,
+        )
+    async def _upload_images(self, images: List[str], token: str) -> List[str]:
+        image_urls: List[str] = []
+        upload_service = UploadService()
+        try:
+            for image in images:
+                _, file_uri = await upload_service.upload_file(image, token)
+                if file_uri:
+                    if file_uri.startswith("http"):
+                        image_urls.append(file_uri)
+                    else:
+                        image_urls.append(
+                            f"https://assets.grok.com/{file_uri.lstrip('/')}"
+                        )
+        finally:
+            await upload_service.close()
+        if not image_urls:
+            raise AppException(
+                message="Image upload failed",
+                error_type=ErrorType.SERVER.value,
+                code="upload_failed",
+            )
+        return image_urls
+    async def _get_parent_post_id(self, token: str, image_urls: List[str]) -> str:
+        parent_post_id = None
+        try:
+            media_service = VideoService()
+            parent_post_id = await media_service.create_image_post(token, image_urls[0])
+            logger.debug(f"Parent post ID: {parent_post_id}")
+        except Exception as e:
+            logger.warning(f"Create image post failed: {e}")
+        if parent_post_id:
+            return parent_post_id
+        for url in image_urls:
+            match = re.search(r"/generated/([a-f0-9-]+)/", url)
+            if match:
+                parent_post_id = match.group(1)
+                logger.debug(f"Parent post ID: {parent_post_id}")
+                break
+            match = re.search(r"/users/[^/]+/([a-f0-9-]+)/content", url)
+            if match:
+                parent_post_id = match.group(1)
+                logger.debug(f"Parent post ID: {parent_post_id}")
+                break
+        return parent_post_id or ""
+    async def _collect_images(
+        self,
+        *,
+        token: str,
+        prompt: str,
+        model_info: Any,
+        n: int,
+        response_format: str,
+        tool_overrides: dict,
+        model_config_override: dict,
+    ) -> List[str]:
+        calls_needed = (n + 1) // 2
+        async def _call_edit():
+            response = await GrokChatService().chat(
+                token=token,
+                message=prompt,
+                model=model_info.grok_model,
+                mode=None,
+                stream=True,
+                tool_overrides=tool_overrides,
+                model_config_override=model_config_override,
+            )
+            processor = ImageCollectProcessor(
+                model_info.model_id, token, response_format=response_format
+            )
+            return await processor.process(response)
+        last_error: Exception | None = None
+        rate_limit_error: Exception | None = None
+        if calls_needed == 1:
+            all_images = await _call_edit()
+        else:
+            tasks = [_call_edit() for _ in range(calls_needed)]
+            results = await asyncio.gather(*tasks, return_exceptions=True)
+            all_images: List[str] = []
+            for result in results:
+                if isinstance(result, Exception):
+                    logger.error(f"Concurrent call failed: {result}")
+                    last_error = result
+                    if rate_limited(result):
+                        rate_limit_error = result
+                elif isinstance(result, list):
+                    all_images.extend(result)
+        if not all_images:
+            if rate_limit_error:
+                raise rate_limit_error
+            if last_error:
+                raise last_error
+            raise UpstreamException(
+                "Image edit returned no results", details={"error": "empty_result"}
+            )
+        if len(all_images) >= n:
+            return all_images[:n]
+        selected_images = all_images.copy()
+        while len(selected_images) < n:
+            selected_images.append("error")
+        return selected_images
+class ImageStreamProcessor(BaseProcessor):
+    """HTTP image stream processor."""
+    def __init__(
+        self, model: str, token: str = "", n: int = 1, response_format: str = "b64_json", chat_format: bool = False
+    ):
+        super().__init__(model, token)
+        self.partial_index = 0
+        self.n = n
+        self.target_index = 0 if n == 1 else None
+        self.response_format = response_format
+        self.chat_format = chat_format
+        self._id_generated = False
+        self._response_id = ""
+        if response_format == "url":
+            self.response_field = "url"
+        elif response_format == "base64":
+            self.response_field = "base64"
+        else:
+            self.response_field = "b64_json"
+    def _sse(self, event: str, data: dict) -> str:
+        """Build SSE response."""
+        return f"event: {event}\ndata: {orjson.dumps(data).decode()}\n\n"
+    async def process(
+        self, response: AsyncIterable[bytes]
+    ) -> AsyncGenerator[str, None]:
+        """Process stream response."""
+        final_images = []
+        emitted_chat_chunk = False
+        idle_timeout = get_config("image.stream_timeout")
+        try:
+            async for line in _with_idle_timeout(response, idle_timeout, self.model):
+                line = _normalize_line(line)
+                if not line:
+                    continue
+                try:
+                    data = orjson.loads(line)
+                except orjson.JSONDecodeError:
+                    continue
+                resp = data.get("result", {}).get("response", {})
+                # Image generation progress
+                if img := resp.get("streamingImageGenerationResponse"):
+                    image_index = img.get("imageIndex", 0)
+                    progress = img.get("progress", 0)
+                    if self.n == 1 and image_index != self.target_index:
+                        continue
+                    out_index = 0 if self.n == 1 else image_index
+                    if not self.chat_format:
+                        yield self._sse(
+                            "image_generation.partial_image",
+                            {
+                                "type": "image_generation.partial_image",
+                                self.response_field: "",
+                                "index": out_index,
+                                "progress": progress,
+                            },
+                        )
+                    continue
+                # modelResponse
+                if mr := resp.get("modelResponse"):
+                    if urls := _collect_images(mr):
+                        for url in urls:
+                            if self.response_format == "url":
+                                processed = await self.process_url(url, "image")
+                                if processed:
+                                    final_images.append(processed)
+                                continue
+                            try:
+                                dl_service = self._get_dl()
+                                base64_data = await dl_service.parse_b64(
+                                    url, self.token, "image"
+                                )
+                                if base64_data:
+                                    if "," in base64_data:
+                                        b64 = base64_data.split(",", 1)[1]
+                                    else:
+                                        b64 = base64_data
+                                    final_images.append(b64)
+                            except Exception as e:
+                                logger.warning(
+                                    f"Failed to convert image to base64, falling back to URL: {e}"
+                                )
+                                processed = await self.process_url(url, "image")
+                                if processed:
+                                    final_images.append(processed)
+                    continue
+            for index, img_data in enumerate(final_images):
+                if self.n == 1:
+                    if index != self.target_index:
+                        continue
+                    out_index = 0
+                else:
+                    out_index = index
+                # Wrap in markdown format for chat
+                output = img_data
+                if self.chat_format and output:
+                    output = wrap_image_content(output, self.response_format)
+                if not self._id_generated:
+                    self._response_id = make_response_id()
+                    self._id_generated = True
+                if self.chat_format:
+                    # OpenAI ChatCompletion chunk format
+                    emitted_chat_chunk = True
+                    yield self._sse(
+                        "chat.completion.chunk",
+                        make_chat_chunk(
+                            self._response_id,
+                            self.model,
+                            output,
+                            index=out_index,
+                            is_final=True,
+                        ),
+                    )
+                else:
+                    # Original image_generation format
+                    yield self._sse(
+                        "image_generation.completed",
+                        {
+                            "type": "image_generation.completed",
+                            self.response_field: img_data,
+                            "index": out_index,
+                            "usage": {
+                                "total_tokens": 0,
+                                "input_tokens": 0,
+                                "output_tokens": 0,
+                                "input_tokens_details": {
+                                    "text_tokens": 0,
+                                    "image_tokens": 0,
+                                },
+                            },
+                        },
+                    )
+            if self.chat_format:
+                if not self._id_generated:
+                    self._response_id = make_response_id()
+                    self._id_generated = True
+                if not emitted_chat_chunk:
+                    yield self._sse(
+                        "chat.completion.chunk",
+                        make_chat_chunk(
+                            self._response_id,
+                            self.model,
+                            "",
+                            index=0,
+                            is_final=True,
+                        ),
+                    )
+                yield "data: [DONE]\n\n"
+        except asyncio.CancelledError:
+            logger.debug("Image stream cancelled by client")
+        except StreamIdleTimeoutError as e:
+            raise UpstreamException(
+                message=f"Image stream idle timeout after {e.idle_seconds}s",
+                status_code=504,
+                details={
+                    "error": str(e),
+                    "type": "stream_idle_timeout",
+                    "idle_seconds": e.idle_seconds,
+                },
+            )
+        except RequestsError as e:
+            if _is_http2_error(e):
+                logger.warning(f"HTTP/2 stream error in image: {e}")
+                raise UpstreamException(
+                    message="Upstream connection closed unexpectedly",
+                    status_code=502,
+                    details={"error": str(e), "type": "http2_stream_error"},
+                )
+            logger.error(f"Image stream request error: {e}")
+            raise UpstreamException(
+                message=f"Upstream request failed: {e}",
+                status_code=502,
+                details={"error": str(e)},
+            )
+        except Exception as e:
+            logger.error(
+                f"Image stream processing error: {e}",
+                extra={"error_type": type(e).__name__},
+            )
+            raise
+        finally:
+            await self.close()
+class ImageCollectProcessor(BaseProcessor):
+    """HTTP image non-stream processor."""
+    def __init__(self, model: str, token: str = "", response_format: str = "b64_json"):
+        if response_format == "base64":
+            response_format = "b64_json"
+        super().__init__(model, token)
+        self.response_format = response_format
+    async def process(self, response: AsyncIterable[bytes]) -> List[str]:
+        """Process and collect images."""
+        images = []
+        idle_timeout = get_config("image.stream_timeout")
+        try:
+            async for line in _with_idle_timeout(response, idle_timeout, self.model):
+                line = _normalize_line(line)
+                if not line:
+                    continue
+                try:
+                    data = orjson.loads(line)
+                except orjson.JSONDecodeError:
+                    continue
+                resp = data.get("result", {}).get("response", {})
+                if mr := resp.get("modelResponse"):
+                    if urls := _collect_images(mr):
+                        for url in urls:
+                            if self.response_format == "url":
+                                processed = await self.process_url(url, "image")
+                                if processed:
+                                    images.append(processed)
+                                continue
+                            try:
+                                dl_service = self._get_dl()
+                                base64_data = await dl_service.parse_b64(
+                                    url, self.token, "image"
+                                )
+                                if base64_data:
+                                    if "," in base64_data:
+                                        b64 = base64_data.split(",", 1)[1]
+                                    else:
+                                        b64 = base64_data
+                                    images.append(b64)
+                            except Exception as e:
+                                logger.warning(
+                                    f"Failed to convert image to base64, falling back to URL: {e}"
+                                )
+                                processed = await self.process_url(url, "image")
+                                if processed:
+                                    images.append(processed)
+        except asyncio.CancelledError:
+            logger.debug("Image collect cancelled by client")
+        except StreamIdleTimeoutError as e:
+            logger.warning(f"Image collect idle timeout: {e}")
+        except RequestsError as e:
+            if _is_http2_error(e):
+                logger.warning(f"HTTP/2 stream error in image collect: {e}")
+            else:
+                logger.error(f"Image collect request error: {e}")
+        except Exception as e:
+            logger.error(
+                f"Image collect processing error: {e}",
+                extra={"error_type": type(e).__name__},
+            )
+        finally:
+            await self.close()
+        return images
+__all__ = ["ImageEditService", "ImageEditResult"]

app/services/grok/services/model.py ADDED Viewed

	@@ -0,0 +1,270 @@

+"""
+Grok 模型管理服务
+"""
+from enum import Enum
+from typing import Optional, Tuple, List
+from pydantic import BaseModel, Field
+from app.core.exceptions import ValidationException
+class Tier(str, Enum):
+    """模型档位"""
+    BASIC = "basic"
+    SUPER = "super"
+class Cost(str, Enum):
+    """计费类型"""
+    LOW = "low"
+    HIGH = "high"
+class ModelInfo(BaseModel):
+    """模型信息"""
+    model_id: str
+    grok_model: str
+    model_mode: str
+    tier: Tier = Field(default=Tier.BASIC)
+    cost: Cost = Field(default=Cost.LOW)
+    display_name: str
+    description: str = ""
+    is_image: bool = False
+    is_image_edit: bool = False
+    is_video: bool = False
+class ModelService:
+    """模型管理服务"""
+    MODELS = [
+        ModelInfo(
+            model_id="grok-3",
+            grok_model="grok-3",
+            model_mode="MODEL_MODE_GROK_3",
+            tier=Tier.BASIC,
+            cost=Cost.LOW,
+            display_name="GROK-3",
+            is_image=False,
+            is_image_edit=False,
+            is_video=False,
+        ),
+        ModelInfo(
+            model_id="grok-3-mini",
+            grok_model="grok-3",
+            model_mode="MODEL_MODE_GROK_3_MINI_THINKING",
+            tier=Tier.BASIC,
+            cost=Cost.LOW,
+            display_name="GROK-3-MINI",
+            is_image=False,
+            is_image_edit=False,
+            is_video=False,
+        ),
+        ModelInfo(
+            model_id="grok-3-thinking",
+            grok_model="grok-3",
+            model_mode="MODEL_MODE_GROK_3_THINKING",
+            tier=Tier.BASIC,
+            cost=Cost.LOW,
+            display_name="GROK-3-THINKING",
+            is_image=False,
+            is_image_edit=False,
+            is_video=False,
+        ),
+        ModelInfo(
+            model_id="grok-4",
+            grok_model="grok-4",
+            model_mode="MODEL_MODE_GROK_4",
+            tier=Tier.BASIC,
+            cost=Cost.LOW,
+            display_name="GROK-4",
+            is_image=False,
+            is_image_edit=False,
+            is_video=False,
+        ),
+        ModelInfo(
+            model_id="grok-4-mini",
+            grok_model="grok-4-mini",
+            model_mode="MODEL_MODE_GROK_4_MINI_THINKING",
+            tier=Tier.BASIC,
+            cost=Cost.LOW,
+            display_name="GROK-4-MINI",
+            is_image=False,
+            is_image_edit=False,
+            is_video=False,
+        ),
+        ModelInfo(
+            model_id="grok-4-thinking",
+            grok_model="grok-4",
+            model_mode="MODEL_MODE_GROK_4_THINKING",
+            tier=Tier.BASIC,
+            cost=Cost.LOW,
+            display_name="GROK-4-THINKING",
+            is_image=False,
+            is_image_edit=False,
+            is_video=False,
+        ),
+        ModelInfo(
+            model_id="grok-4-heavy",
+            grok_model="grok-4",
+            model_mode="MODEL_MODE_HEAVY",
+            tier=Tier.SUPER,
+            cost=Cost.HIGH,
+            display_name="GROK-4-HEAVY",
+            is_image=False,
+            is_image_edit=False,
+            is_video=False,
+        ),
+        ModelInfo(
+            model_id="grok-4.1-mini",
+            grok_model="grok-4-1-thinking-1129",
+            model_mode="MODEL_MODE_GROK_4_1_MINI_THINKING",
+            tier=Tier.BASIC,
+            cost=Cost.LOW,
+            display_name="GROK-4.1-MINI",
+            is_image=False,
+            is_image_edit=False,
+            is_video=False,
+        ),
+        ModelInfo(
+            model_id="grok-4.1-fast",
+            grok_model="grok-4-1-thinking-1129",
+            model_mode="MODEL_MODE_FAST",
+            tier=Tier.BASIC,
+            cost=Cost.LOW,
+            display_name="GROK-4.1-FAST",
+            is_image=False,
+            is_image_edit=False,
+            is_video=False,
+        ),
+        ModelInfo(
+            model_id="grok-4.1-expert",
+            grok_model="grok-4-1-thinking-1129",
+            model_mode="MODEL_MODE_EXPERT",
+            tier=Tier.BASIC,
+            cost=Cost.HIGH,
+            display_name="GROK-4.1-EXPERT",
+            is_image=False,
+            is_image_edit=False,
+            is_video=False,
+        ),
+        ModelInfo(
+            model_id="grok-4.1-thinking",
+            grok_model="grok-4-1-thinking-1129",
+            model_mode="MODEL_MODE_GROK_4_1_THINKING",
+            tier=Tier.BASIC,
+            cost=Cost.HIGH,
+            display_name="GROK-4.1-THINKING",
+            is_image=False,
+            is_image_edit=False,
+            is_video=False,
+        ),
+        ModelInfo(
+            model_id="grok-4.20-beta",
+            grok_model="grok-420",
+            model_mode="MODEL_MODE_GROK_420",
+            tier=Tier.BASIC,
+            cost=Cost.LOW,
+            display_name="GROK-4.20-BETA",
+            is_image=False,
+            is_image_edit=False,
+            is_video=False,
+        ),
+        ModelInfo(
+            model_id="grok-imagine-1.0-fast",
+            grok_model="grok-3",
+            model_mode="MODEL_MODE_FAST",
+            tier=Tier.BASIC,
+            cost=Cost.HIGH,
+            display_name="Grok Image Fast",
+            description="Imagine waterfall image generation model for chat completions",
+            is_image=True,
+            is_image_edit=False,
+            is_video=False,
+        ),
+        ModelInfo(
+            model_id="grok-imagine-1.0",
+            grok_model="grok-3",
+            model_mode="MODEL_MODE_FAST",
+            tier=Tier.BASIC,
+            cost=Cost.HIGH,
+            display_name="Grok Image",
+            description="Image generation model",
+            is_image=True,
+            is_image_edit=False,
+            is_video=False,
+        ),
+        ModelInfo(
+            model_id="grok-imagine-1.0-edit",
+            grok_model="imagine-image-edit",
+            model_mode="MODEL_MODE_FAST",
+            tier=Tier.BASIC,
+            cost=Cost.HIGH,
+            display_name="Grok Image Edit",
+            description="Image edit model",
+            is_image=False,
+            is_image_edit=True,
+            is_video=False,
+        ),
+        ModelInfo(
+            model_id="grok-imagine-1.0-video",
+            grok_model="grok-3",
+            model_mode="MODEL_MODE_FAST",
+            tier=Tier.BASIC,
+            cost=Cost.HIGH,
+            display_name="Grok Video",
+            description="Video generation model",
+            is_image=False,
+            is_image_edit=False,
+            is_video=True,
+        ),
+    ]
+    _map = {m.model_id: m for m in MODELS}
+    @classmethod
+    def get(cls, model_id: str) -> Optional[ModelInfo]:
+        """获取模型信息"""
+        return cls._map.get(model_id)
+    @classmethod
+    def list(cls) -> list[ModelInfo]:
+        """获取所有模型"""
+        return list(cls._map.values())
+    @classmethod
+    def valid(cls, model_id: str) -> bool:
+        """模型是否有效"""
+        return model_id in cls._map
+    @classmethod
+    def to_grok(cls, model_id: str) -> Tuple[str, str]:
+        """转换为 Grok 参数"""
+        model = cls.get(model_id)
+        if not model:
+            raise ValidationException(f"Invalid model ID: {model_id}")
+        return model.grok_model, model.model_mode
+    @classmethod
+    def pool_for_model(cls, model_id: str) -> str:
+        """根据模型选择 Token 池"""
+        model = cls.get(model_id)
+        if model and model.tier == Tier.SUPER:
+            return "ssoSuper"
+        return "ssoBasic"
+    @classmethod
+    def pool_candidates_for_model(cls, model_id: str) -> List[str]:
+        """按优先级返回可用 Token 池列表"""
+        model = cls.get(model_id)
+        if model and model.tier == Tier.SUPER:
+            return ["ssoSuper"]
+        # 基础模型优先使用 basic 池，缺失时可回退到 super 池
+        return ["ssoBasic", "ssoSuper"]
+__all__ = ["ModelService"]

app/services/grok/services/responses.py ADDED Viewed

	@@ -0,0 +1,824 @@

+"""
+Responses API bridge service (OpenAI-compatible).
+"""
+import time
+import uuid
+from typing import Any, AsyncGenerator, Dict, List, Optional
+import orjson
+from app.services.grok.services.chat import ChatService
+from app.services.grok.utils import process as proc_base
+_TOOL_OUTPUT_TYPES = {
+    "tool_output",
+    "function_call_output",
+    "tool_call_output",
+    "input_tool_output",
+}
+_BUILTIN_TOOL_TYPES = {
+    "web_search",
+    "web_search_2025_08_26",
+    "file_search",
+    "code_interpreter",
+}
+def _now_ts() -> int:
+    return int(time.time())
+def _new_response_id() -> str:
+    return f"resp_{uuid.uuid4().hex[:24]}"
+def _new_message_id() -> str:
+    return f"msg_{uuid.uuid4().hex[:24]}"
+def _new_tool_call_id() -> str:
+    return f"call_{uuid.uuid4().hex[:24]}"
+def _new_function_call_id() -> str:
+    return f"fc_{uuid.uuid4().hex[:24]}"
+def _normalize_tool_choice(tool_choice: Any) -> Any:
+    if isinstance(tool_choice, dict):
+        t_type = tool_choice.get("type")
+        if t_type and t_type != "function":
+            return {"type": "function", "function": {"name": t_type}}
+    return tool_choice
+def _normalize_tools_for_chat(tools: Optional[List[Dict[str, Any]]]) -> Optional[List[Dict[str, Any]]]:
+    if not tools:
+        return None
+    normalized: List[Dict[str, Any]] = []
+    for tool in tools:
+        if not isinstance(tool, dict):
+            continue
+        tool_type = tool.get("type")
+        if tool_type == "function":
+            normalized.append(tool)
+            continue
+        if tool_type in _BUILTIN_TOOL_TYPES:
+            if tool_type.startswith("web_search"):
+                normalized.append(
+                    {
+                        "type": "function",
+                        "function": {
+                            "name": tool_type,
+                            "description": "Search the web for information and return results.",
+                            "parameters": {
+                                "type": "object",
+                                "properties": {"query": {"type": "string"}},
+                                "required": ["query"],
+                            },
+                        },
+                    }
+                )
+            elif tool_type == "file_search":
+                normalized.append(
+                    {
+                        "type": "function",
+                        "function": {
+                            "name": tool_type,
+                            "description": "Search provided files for relevant information.",
+                            "parameters": {
+                                "type": "object",
+                                "properties": {"query": {"type": "string"}},
+                                "required": ["query"],
+                            },
+                        },
+                    }
+                )
+            elif tool_type == "code_interpreter":
+                normalized.append(
+                    {
+                        "type": "function",
+                        "function": {
+                            "name": tool_type,
+                            "description": "Execute code to solve tasks and return results.",
+                            "parameters": {
+                                "type": "object",
+                                "properties": {"code": {"type": "string"}},
+                                "required": ["code"],
+                            },
+                        },
+                    }
+                )
+    return normalized or None
+def _content_item_from_input(item: Dict[str, Any]) -> Optional[Dict[str, Any]]:
+    if not isinstance(item, dict):
+        return None
+    item_type = item.get("type")
+    if item_type in {"input_text", "text", "output_text"}:
+        text = item.get("text") or item.get("content") or ""
+        return {"type": "text", "text": text}
+    if item_type in {"input_image", "image", "image_url", "output_image"}:
+        image_url = item.get("image_url")
+        url = ""
+        detail = None
+        if isinstance(image_url, dict):
+            url = image_url.get("url") or ""
+            detail = image_url.get("detail")
+        elif isinstance(image_url, str):
+            url = image_url
+        else:
+            url = item.get("url") or item.get("image") or ""
+        if not url:
+            return None
+        image_payload = {"url": url}
+        if detail:
+            image_payload["detail"] = detail
+        return {"type": "image_url", "image_url": image_payload}
+    if item_type in {"input_file", "file"}:
+        file_data = item.get("file_data")
+        file_id = item.get("file_id")
+        if not file_data and isinstance(item.get("file"), dict):
+            file_data = item["file"].get("file_data")
+            file_id = item["file"].get("file_id")
+        file_payload: Dict[str, Any] = {}
+        if file_data:
+            file_payload["file_data"] = file_data
+        if file_id:
+            file_payload["file_id"] = file_id
+        if not file_payload:
+            return None
+        return {"type": "file", "file": file_payload}
+    if item_type in {"input_audio", "audio"}:
+        audio = item.get("audio") or {}
+        data = audio.get("data") or item.get("data")
+        if not data:
+            return None
+        return {"type": "input_audio", "input_audio": {"data": data}}
+    return None
+def _message_from_item(item: Dict[str, Any]) -> Optional[Dict[str, Any]]:
+    if not isinstance(item, dict):
+        return None
+    if item.get("type") == "message":
+        role = item.get("role") or "user"
+        content = item.get("content")
+        return {"role": role, "content": _coerce_content(content)}
+    if "role" in item and "content" in item:
+        return {"role": item.get("role") or "user", "content": _coerce_content(item.get("content"))}
+    return None
+def _coerce_content(content: Any) -> Any:
+    if content is None:
+        return ""
+    if isinstance(content, str):
+        return content
+    if isinstance(content, dict):
+        content = [content]
+    if isinstance(content, list):
+        blocks: List[Dict[str, Any]] = []
+        for item in content:
+            if isinstance(item, dict) and item.get("type") in {"input_text", "output_text"}:
+                blocks.append({"type": "text", "text": item.get("text", "")})
+                continue
+            block = _content_item_from_input(item) if isinstance(item, dict) else None
+            if block:
+                blocks.append(block)
+        return blocks if blocks else ""
+    return str(content)
+def _coerce_input_to_messages(input_value: Any) -> List[Dict[str, Any]]:
+    if input_value is None:
+        return []
+    if isinstance(input_value, str):
+        return [{"role": "user", "content": input_value}]
+    if isinstance(input_value, dict):
+        msg = _message_from_item(input_value)
+        if msg:
+            return [msg]
+        content_item = _content_item_from_input(input_value)
+        if content_item:
+            return [{"role": "user", "content": [content_item]}]
+        return []
+    if not isinstance(input_value, list):
+        return [{"role": "user", "content": str(input_value)}]
+    messages: List[Dict[str, Any]] = []
+    pending_blocks: List[Dict[str, Any]] = []
+    def _flush_pending():
+        nonlocal pending_blocks
+        if pending_blocks:
+            messages.append({"role": "user", "content": pending_blocks})
+            pending_blocks = []
+    for item in input_value:
+        if isinstance(item, dict):
+            msg = _message_from_item(item)
+            if msg:
+                _flush_pending()
+                messages.append(msg)
+                continue
+            item_type = item.get("type")
+            if item_type in _TOOL_OUTPUT_TYPES:
+                _flush_pending()
+                call_id = (
+                    item.get("call_id")
+                    or item.get("tool_call_id")
+                    or item.get("id")
+                    or _new_tool_call_id()
+                )
+                output = item.get("output") or item.get("content") or ""
+                messages.append({"role": "tool", "tool_call_id": call_id, "content": output})
+                continue
+            block = _content_item_from_input(item)
+            if block:
+                pending_blocks.append(block)
+                continue
+        if isinstance(item, str):
+            pending_blocks.append({"type": "text", "text": item})
+    _flush_pending()
+    return messages
+def _build_output_message(
+    text: str,
+    *,
+    message_id: Optional[str] = None,
+    status: str = "completed",
+) -> Dict[str, Any]:
+    message_id = message_id or _new_message_id()
+    return {
+        "id": message_id,
+        "type": "message",
+        "role": "assistant",
+        "status": status,
+        "content": [
+            {
+                "type": "output_text",
+                "text": text,
+                "annotations": [],
+            }
+        ],
+    }
+def _build_output_tool_call(
+    tool_call: Dict[str, Any],
+    *,
+    item_id: Optional[str] = None,
+    status: str = "completed",
+) -> Dict[str, Any]:
+    fn = tool_call.get("function") or {}
+    call_id = tool_call.get("id") or _new_tool_call_id()
+    item_id = item_id or _new_function_call_id()
+    return {
+        "id": item_id,
+        "type": "function_call",
+        "status": status,
+        "call_id": call_id,
+        "name": fn.get("name"),
+        "arguments": fn.get("arguments"),
+    }
+def _build_response_object(
+    *,
+    model: str,
+    output_text: Optional[str] = None,
+    tool_calls: Optional[List[Dict[str, Any]]] = None,
+    response_id: Optional[str] = None,
+    usage: Optional[Dict[str, Any]] = None,
+    created_at: Optional[int] = None,
+    completed_at: Optional[int] = None,
+    status: str = "completed",
+    instructions: Optional[str] = None,
+    max_output_tokens: Optional[int] = None,
+    parallel_tool_calls: Optional[bool] = None,
+    previous_response_id: Optional[str] = None,
+    reasoning_effort: Optional[str] = None,
+    store: Optional[bool] = None,
+    temperature: Optional[float] = None,
+    tool_choice: Optional[Any] = None,
+    tools: Optional[List[Dict[str, Any]]] = None,
+    top_p: Optional[float] = None,
+    truncation: Optional[str] = None,
+    user: Optional[str] = None,
+    metadata: Optional[Dict[str, Any]] = None,
+) -> Dict[str, Any]:
+    response_id = response_id or _new_response_id()
+    created_at = created_at or _now_ts()
+    if status == "completed" and completed_at is None:
+        completed_at = _now_ts()
+    output: List[Dict[str, Any]] = []
+    if output_text is not None:
+        output.append(_build_output_message(output_text))
+    if tool_calls:
+        for call in tool_calls:
+            output.append(_build_output_tool_call(call))
+    return {
+        "id": response_id,
+        "object": "response",
+        "created_at": created_at,
+        "completed_at": completed_at,
+        "status": status,
+        "error": None,
+        "incomplete_details": None,
+        "instructions": instructions,
+        "max_output_tokens": max_output_tokens,
+        "model": model,
+        "output": output,
+        "parallel_tool_calls": True if parallel_tool_calls is None else parallel_tool_calls,
+        "previous_response_id": previous_response_id,
+        "reasoning": {"effort": reasoning_effort, "summary": None},
+        "store": True if store is None else store,
+        "temperature": 1.0 if temperature is None else temperature,
+        "text": {"format": {"type": "text"}},
+        "tool_choice": tool_choice or "auto",
+        "tools": tools or [],
+        "top_p": 1.0 if top_p is None else top_p,
+        "truncation": truncation or "disabled",
+        "usage": usage,
+        "user": user,
+        "metadata": metadata or {},
+    }
+class ResponseStreamAdapter:
+    def __init__(
+        self,
+        *,
+        model: str,
+        response_id: str,
+        created_at: int,
+        instructions: Optional[str],
+        max_output_tokens: Optional[int],
+        parallel_tool_calls: Optional[bool],
+        previous_response_id: Optional[str],
+        reasoning_effort: Optional[str],
+        store: Optional[bool],
+        temperature: Optional[float],
+        tool_choice: Optional[Any],
+        tools: Optional[List[Dict[str, Any]]],
+        top_p: Optional[float],
+        truncation: Optional[str],
+        user: Optional[str],
+        metadata: Optional[Dict[str, Any]],
+    ):
+        self.model = model
+        self.response_id = response_id
+        self.created_at = created_at
+        self.instructions = instructions
+        self.max_output_tokens = max_output_tokens
+        self.parallel_tool_calls = parallel_tool_calls
+        self.previous_response_id = previous_response_id
+        self.reasoning_effort = reasoning_effort
+        self.store = store
+        self.temperature = temperature
+        self.tool_choice = tool_choice
+        self.tools = tools
+        self.top_p = top_p
+        self.truncation = truncation
+        self.user = user
+        self.metadata = metadata
+        self.output_text_parts: List[str] = []
+        self.tool_calls_by_index: Dict[int, Dict[str, Any]] = {}
+        self.tool_items: Dict[int, Dict[str, Any]] = {}
+        self.next_output_index = 0
+        self.content_index = 0
+        self.message_id = _new_message_id()
+        self.message_started = False
+        self.message_output_index: Optional[int] = None
+    def _event(self, event_type: str, payload: Dict[str, Any]) -> str:
+        return f"event: {event_type}\ndata: {orjson.dumps(payload).decode()}\n\n"
+    def _response_payload(self, *, status: str, output_text: Optional[str], usage: Optional[Dict[str, Any]] = None) -> Dict[str, Any]:
+        tool_calls = None
+        if status == "completed" and self.tool_calls_by_index:
+            tool_calls = [
+                self.tool_calls_by_index[idx]
+                for idx in sorted(self.tool_calls_by_index.keys())
+            ]
+        return _build_response_object(
+            model=self.model,
+            output_text=output_text,
+            tool_calls=tool_calls,
+            response_id=self.response_id,
+            usage=usage,
+            created_at=self.created_at,
+            status=status,
+            instructions=self.instructions,
+            max_output_tokens=self.max_output_tokens,
+            parallel_tool_calls=self.parallel_tool_calls,
+            previous_response_id=self.previous_response_id,
+            reasoning_effort=self.reasoning_effort,
+            store=self.store,
+            temperature=self.temperature,
+            tool_choice=self.tool_choice,
+            tools=self.tools,
+            top_p=self.top_p,
+            truncation=self.truncation,
+            user=self.user,
+            metadata=self.metadata,
+        )
+    def _alloc_output_index(self) -> int:
+        idx = self.next_output_index
+        self.next_output_index += 1
+        return idx
+    def created_event(self) -> str:
+        payload = {
+            "type": "response.created",
+            "response": self._response_payload(status="in_progress", output_text=None, usage=None),
+        }
+        return self._event("response.created", payload)
+    def in_progress_event(self) -> str:
+        payload = {
+            "type": "response.in_progress",
+            "response": self._response_payload(status="in_progress", output_text=None, usage=None),
+        }
+        return self._event("response.in_progress", payload)
+    def ensure_message_started(self) -> List[str]:
+        if self.message_started:
+            return []
+        self.message_started = True
+        self.message_output_index = self._alloc_output_index()
+        item = _build_output_message("", message_id=self.message_id, status="in_progress")
+        item["content"] = []
+        events = [
+            self._event(
+                "response.output_item.added",
+                {
+                    "type": "response.output_item.added",
+                    "response_id": self.response_id,
+                    "output_index": self.message_output_index,
+                    "item": item,
+                },
+            ),
+            self._event(
+                "response.content_part.added",
+                {
+                    "type": "response.content_part.added",
+                    "response_id": self.response_id,
+                    "item_id": self.message_id,
+                    "output_index": self.message_output_index,
+                    "content_index": self.content_index,
+                    "part": {"type": "output_text", "text": "", "annotations": []},
+                },
+            ),
+        ]
+        return events
+    def output_delta_event(self, delta: str) -> str:
+        return self._event(
+            "response.output_text.delta",
+            {
+                "type": "response.output_text.delta",
+                "response_id": self.response_id,
+                "item_id": self.message_id,
+                "output_index": self.message_output_index,
+                "content_index": self.content_index,
+                "delta": delta,
+            },
+        )
+    def output_done_events(self, text: str) -> List[str]:
+        if self.message_output_index is None:
+            return []
+        return [
+            self._event(
+                "response.output_text.done",
+                {
+                    "type": "response.output_text.done",
+                    "response_id": self.response_id,
+                    "item_id": self.message_id,
+                    "output_index": self.message_output_index,
+                    "content_index": self.content_index,
+                    "text": text,
+                },
+            ),
+            self._event(
+                "response.content_part.done",
+                {
+                    "type": "response.content_part.done",
+                    "response_id": self.response_id,
+                    "item_id": self.message_id,
+                    "output_index": self.message_output_index,
+                    "content_index": self.content_index,
+                    "part": {"type": "output_text", "text": text, "annotations": []},
+                },
+            ),
+            self._event(
+                "response.output_item.done",
+                {
+                    "type": "response.output_item.done",
+                    "response_id": self.response_id,
+                    "output_index": self.message_output_index,
+                    "item": _build_output_message(
+                        text, message_id=self.message_id, status="completed"
+                    ),
+                },
+            ),
+        ]
+    def ensure_tool_item(self, tool_index: int, call_id: str, name: Optional[str]) -> List[str]:
+        if tool_index in self.tool_items:
+            item = self.tool_items[tool_index]
+            if name and not item.get("name"):
+                item["name"] = name
+            return []
+        output_index = self._alloc_output_index()
+        item_id = _new_function_call_id()
+        self.tool_items[tool_index] = {
+            "item_id": item_id,
+            "output_index": output_index,
+            "call_id": call_id,
+            "name": name,
+            "arguments": "",
+        }
+        tool_item = _build_output_tool_call(
+            {"id": call_id, "function": {"name": name, "arguments": ""}},
+            item_id=item_id,
+            status="in_progress",
+        )
+        return [
+            self._event(
+                "response.output_item.added",
+                {
+                    "type": "response.output_item.added",
+                    "response_id": self.response_id,
+                    "output_index": output_index,
+                    "item": tool_item,
+                },
+            )
+        ]
+    def tool_arguments_delta_event(self, tool_index: int, delta: str) -> Optional[str]:
+        if not delta:
+            return None
+        item = self.tool_items.get(tool_index)
+        if not item:
+            return None
+        item["arguments"] += delta
+        return self._event(
+            "response.function_call_arguments.delta",
+            {
+                "type": "response.function_call_arguments.delta",
+                "response_id": self.response_id,
+                "item_id": item["item_id"],
+                "output_index": item["output_index"],
+                "delta": delta,
+            },
+        )
+    def tool_arguments_done_events(self) -> List[str]:
+        events: List[str] = []
+        for tool_index, item in sorted(
+            self.tool_items.items(), key=lambda kv: kv[1]["output_index"]
+        ):
+            events.append(
+                self._event(
+                    "response.function_call_arguments.done",
+                    {
+                        "type": "response.function_call_arguments.done",
+                        "response_id": self.response_id,
+                        "item_id": item["item_id"],
+                        "output_index": item["output_index"],
+                        "arguments": item["arguments"],
+                    },
+                )
+            )
+            tool_item = _build_output_tool_call(
+                {
+                    "id": item["call_id"],
+                    "function": {"name": item.get("name"), "arguments": item["arguments"]},
+                },
+                item_id=item["item_id"],
+                status="completed",
+            )
+            events.append(
+                self._event(
+                    "response.output_item.done",
+                    {
+                        "type": "response.output_item.done",
+                        "response_id": self.response_id,
+                        "output_index": item["output_index"],
+                        "item": tool_item,
+                    },
+                )
+            )
+        return events
+    def record_tool_call(self, tool_index: int, call_id: str, name: Optional[str], arguments_delta: str) -> None:
+        tool_call = self.tool_calls_by_index.get(tool_index)
+        if not tool_call:
+            tool_call = {
+                "id": call_id or _new_tool_call_id(),
+                "type": "function",
+                "function": {"name": name, "arguments": ""},
+            }
+            self.tool_calls_by_index[tool_index] = tool_call
+        if name and not tool_call["function"].get("name"):
+            tool_call["function"]["name"] = name
+        if arguments_delta:
+            tool_call["function"]["arguments"] += arguments_delta
+    def completed_event(self, usage: Optional[Dict[str, Any]] = None) -> str:
+        response = self._response_payload(
+            status="completed",
+            output_text="".join(self.output_text_parts) if self.message_started else None,
+            usage=usage
+            or {"total_tokens": 0, "input_tokens": 0, "output_tokens": 0},
+        )
+        payload = {"type": "response.completed", "response": response}
+        return self._event("response.completed", payload)
+class ResponsesService:
+    @staticmethod
+    async def create(
+        *,
+        model: str,
+        input_value: Any,
+        instructions: Optional[str] = None,
+        stream: bool = False,
+        temperature: Optional[float] = None,
+        top_p: Optional[float] = None,
+        tools: Optional[List[Dict[str, Any]]] = None,
+        tool_choice: Any = None,
+        parallel_tool_calls: Optional[bool] = None,
+        reasoning_effort: Optional[str] = None,
+        max_output_tokens: Optional[int] = None,
+        metadata: Optional[Dict[str, Any]] = None,
+        user: Optional[str] = None,
+        store: Optional[bool] = None,
+        previous_response_id: Optional[str] = None,
+        truncation: Optional[str] = None,
+    ) -> Any:
+        messages = _coerce_input_to_messages(input_value)
+        if instructions:
+            messages = [{"role": "system", "content": instructions}] + messages
+        if not messages:
+            raise ValueError("input is required")
+        normalized_tools = _normalize_tools_for_chat(tools)
+        normalized_tool_choice = _normalize_tool_choice(tool_choice)
+        chat_kwargs: Dict[str, Any] = {
+            "model": model,
+            "messages": messages,
+            "stream": stream,
+        }
+        if temperature is not None:
+            chat_kwargs["temperature"] = temperature
+        if top_p is not None:
+            chat_kwargs["top_p"] = top_p
+        if normalized_tools is not None:
+            chat_kwargs["tools"] = normalized_tools
+        if normalized_tool_choice is not None:
+            chat_kwargs["tool_choice"] = normalized_tool_choice
+        if parallel_tool_calls is not None:
+            chat_kwargs["parallel_tool_calls"] = parallel_tool_calls
+        if reasoning_effort is not None:
+            chat_kwargs["reasoning_effort"] = reasoning_effort
+        result = await ChatService.completions(**chat_kwargs)
+        if not stream:
+            if not isinstance(result, dict):
+                raise ValueError("Unexpected stream response for non-stream request")
+            choice = (result.get("choices") or [{}])[0]
+            message = choice.get("message") or {}
+            content = message.get("content") or ""
+            tool_calls = message.get("tool_calls")
+            return _build_response_object(
+                model=model,
+                output_text=content,
+                tool_calls=tool_calls,
+                usage=result.get("usage")
+                or {"total_tokens": 0, "input_tokens": 0, "output_tokens": 0},
+                status="completed",
+                instructions=instructions,
+                max_output_tokens=max_output_tokens,
+                parallel_tool_calls=parallel_tool_calls,
+                previous_response_id=previous_response_id,
+                reasoning_effort=reasoning_effort,
+                store=store,
+                temperature=temperature,
+                tool_choice=tool_choice,
+                tools=tools,
+                top_p=top_p,
+                truncation=truncation,
+                user=user,
+                metadata=metadata,
+            )
+        if not hasattr(result, "__aiter__"):
+            raise ValueError("Unexpected non-stream response for stream request")
+        created_at = _now_ts()
+        response_id = _new_response_id()
+        adapter = ResponseStreamAdapter(
+            model=model,
+            response_id=response_id,
+            created_at=created_at,
+            instructions=instructions,
+            max_output_tokens=max_output_tokens,
+            parallel_tool_calls=parallel_tool_calls,
+            previous_response_id=previous_response_id,
+            reasoning_effort=reasoning_effort,
+            store=store,
+            temperature=temperature,
+            tool_choice=tool_choice,
+            tools=tools,
+            top_p=top_p,
+            truncation=truncation,
+            user=user,
+            metadata=metadata,
+        )
+        async def _stream() -> AsyncGenerator[str, None]:
+            yield adapter.created_event()
+            yield adapter.in_progress_event()
+            async for chunk in result:
+                line = proc_base._normalize_line(chunk)
+                if not line:
+                    continue
+                try:
+                    data = orjson.loads(line)
+                except orjson.JSONDecodeError:
+                    continue
+                if data.get("object") == "chat.completion.chunk":
+                    delta = (data.get("choices") or [{}])[0].get("delta") or {}
+                    if "content" in delta and delta["content"]:
+                        for event in adapter.ensure_message_started():
+                            yield event
+                        adapter.output_text_parts.append(delta["content"])
+                        yield adapter.output_delta_event(delta["content"])
+                    tool_calls = delta.get("tool_calls")
+                    if isinstance(tool_calls, list):
+                        for tool in tool_calls:
+                            if not isinstance(tool, dict):
+                                continue
+                            tool_index = tool.get("index", 0)
+                            call_id = tool.get("id") or _new_tool_call_id()
+                            fn = tool.get("function") or {}
+                            name = fn.get("name")
+                            args_delta = fn.get("arguments") or ""
+                            adapter.record_tool_call(
+                                tool_index, call_id, name, args_delta
+                            )
+                            for event in adapter.ensure_tool_item(
+                                tool_index, call_id, name
+                            ):
+                                yield event
+                            delta_event = adapter.tool_arguments_delta_event(
+                                tool_index, args_delta
+                            )
+                            if delta_event:
+                                yield delta_event
+            full_text = "".join(adapter.output_text_parts)
+            if full_text and adapter.message_started:
+                for event in adapter.output_done_events(full_text):
+                    yield event
+            for event in adapter.tool_arguments_done_events():
+                yield event
+            yield adapter.completed_event()
+        return _stream()
+__all__ = ["ResponsesService"]

app/services/grok/services/video.py ADDED Viewed

	@@ -0,0 +1,688 @@

+"""
+Grok video generation service.
+"""
+import asyncio
+import uuid
+import re
+from typing import Any, AsyncGenerator, AsyncIterable, Optional
+import orjson
+from curl_cffi.requests.errors import RequestsError
+from app.core.logger import logger
+from app.core.config import get_config
+from app.core.exceptions import (
+    UpstreamException,
+    AppException,
+    ValidationException,
+    ErrorType,
+    StreamIdleTimeoutError,
+)
+from app.services.grok.services.model import ModelService
+from app.services.token import get_token_manager, EffortType
+from app.services.grok.utils.stream import wrap_stream_with_usage
+from app.services.grok.utils.process import (
+    BaseProcessor,
+    _with_idle_timeout,
+    _normalize_line,
+    _is_http2_error,
+)
+from app.services.grok.utils.retry import rate_limited
+from app.services.reverse.app_chat import AppChatReverse
+from app.services.reverse.media_post import MediaPostReverse
+from app.services.reverse.video_upscale import VideoUpscaleReverse
+from app.services.reverse.utils.session import ResettableSession
+from app.services.token.manager import BASIC_POOL_NAME
+_VIDEO_SEMAPHORE = None
+_VIDEO_SEM_VALUE = 0
+def _get_video_semaphore() -> asyncio.Semaphore:
+    """Reverse 接口并发控制（video 服务）。"""
+    global _VIDEO_SEMAPHORE, _VIDEO_SEM_VALUE
+    value = max(1, int(get_config("video.concurrent")))
+    if value != _VIDEO_SEM_VALUE:
+        _VIDEO_SEM_VALUE = value
+        _VIDEO_SEMAPHORE = asyncio.Semaphore(value)
+    return _VIDEO_SEMAPHORE
+def _new_session() -> ResettableSession:
+    browser = get_config("proxy.browser")
+    if browser:
+        return ResettableSession(impersonate=browser)
+    return ResettableSession()
+class VideoService:
+    """Video generation service."""
+    def __init__(self):
+        self.timeout = None
+    async def create_post(
+        self,
+        token: str,
+        prompt: str,
+        media_type: str = "MEDIA_POST_TYPE_VIDEO",
+        media_url: str = None,
+    ) -> str:
+        """Create media post and return post ID."""
+        try:
+            if media_type == "MEDIA_POST_TYPE_IMAGE" and not media_url:
+                raise ValidationException("media_url is required for image posts")
+            prompt_value = prompt if media_type == "MEDIA_POST_TYPE_VIDEO" else ""
+            media_value = media_url or ""
+            async with _new_session() as session:
+                async with _get_video_semaphore():
+                    response = await MediaPostReverse.request(
+                        session,
+                        token,
+                        media_type,
+                        media_value,
+                        prompt=prompt_value,
+                    )
+            post_id = response.json().get("post", {}).get("id", "")
+            if not post_id:
+                raise UpstreamException("No post ID in response")
+            logger.info(f"Media post created: {post_id} (type={media_type})")
+            return post_id
+        except AppException:
+            raise
+        except Exception as e:
+            logger.error(f"Create post error: {e}")
+            raise UpstreamException(f"Create post error: {str(e)}")
+    async def create_image_post(self, token: str, image_url: str) -> str:
+        """Create image post and return post ID."""
+        return await self.create_post(
+            token, prompt="", media_type="MEDIA_POST_TYPE_IMAGE", media_url=image_url
+        )
+    async def generate(
+        self,
+        token: str,
+        prompt: str,
+        aspect_ratio: str = "3:2",
+        video_length: int = 6,
+        resolution_name: str = "480p",
+        preset: str = "normal",
+    ) -> AsyncGenerator[bytes, None]:
+        """Generate video."""
+        logger.info(
+            f"Video generation: prompt='{prompt[:50]}...', ratio={aspect_ratio}, length={video_length}s, preset={preset}"
+        )
+        post_id = await self.create_post(token, prompt)
+        mode_map = {
+            "fun": "--mode=extremely-crazy",
+            "normal": "--mode=normal",
+            "spicy": "--mode=extremely-spicy-or-crazy",
+        }
+        mode_flag = mode_map.get(preset, "--mode=custom")
+        message = f"{prompt} {mode_flag}"
+        model_config_override = {
+            "modelMap": {
+                "videoGenModelConfig": {
+                    "aspectRatio": aspect_ratio,
+                    "parentPostId": post_id,
+                    "resolutionName": resolution_name,
+                    "videoLength": video_length,
+                }
+            }
+        }
+        async def _stream():
+            session = _new_session()
+            try:
+                async with _get_video_semaphore():
+                    stream_response = await AppChatReverse.request(
+                        session,
+                        token,
+                        message=message,
+                        model="grok-3",
+                        tool_overrides={"videoGen": True},
+                        model_config_override=model_config_override,
+                    )
+                    logger.info(f"Video generation started: post_id={post_id}")
+                    async for line in stream_response:
+                        yield line
+            except Exception as e:
+                try:
+                    await session.close()
+                except Exception:
+                    pass
+                logger.error(f"Video generation error: {e}")
+                if isinstance(e, AppException):
+                    raise
+                raise UpstreamException(f"Video generation error: {str(e)}")
+        return _stream()
+    async def generate_from_image(
+        self,
+        token: str,
+        prompt: str,
+        image_url: str,
+        aspect_ratio: str = "3:2",
+        video_length: int = 6,
+        resolution: str = "480p",
+        preset: str = "normal",
+    ) -> AsyncGenerator[bytes, None]:
+        """Generate video from image."""
+        logger.info(
+            f"Image to video: prompt='{prompt[:50]}...', image={image_url[:80]}"
+        )
+        post_id = await self.create_image_post(token, image_url)
+        mode_map = {
+            "fun": "--mode=extremely-crazy",
+            "normal": "--mode=normal",
+            "spicy": "--mode=extremely-spicy-or-crazy",
+        }
+        mode_flag = mode_map.get(preset, "--mode=custom")
+        message = f"{prompt} {mode_flag}"
+        model_config_override = {
+            "modelMap": {
+                "videoGenModelConfig": {
+                    "aspectRatio": aspect_ratio,
+                    "parentPostId": post_id,
+                    "resolutionName": resolution,
+                    "videoLength": video_length,
+                }
+            }
+        }
+        async def _stream():
+            session = _new_session()
+            try:
+                async with _get_video_semaphore():
+                    stream_response = await AppChatReverse.request(
+                        session,
+                        token,
+                        message=message,
+                        model="grok-3",
+                        tool_overrides={"videoGen": True},
+                        model_config_override=model_config_override,
+                    )
+                    logger.info(f"Video generation started: post_id={post_id}")
+                    async for line in stream_response:
+                        yield line
+            except Exception as e:
+                try:
+                    await session.close()
+                except Exception:
+                    pass
+                logger.error(f"Video generation error: {e}")
+                if isinstance(e, AppException):
+                    raise
+                raise UpstreamException(f"Video generation error: {str(e)}")
+        return _stream()
+    @staticmethod
+    async def completions(
+        model: str,
+        messages: list,
+        stream: bool = None,
+        reasoning_effort: str | None = None,
+        aspect_ratio: str = "3:2",
+        video_length: int = 6,
+        resolution: str = "480p",
+        preset: str = "normal",
+    ):
+        """Video generation entrypoint."""
+        # Get token via intelligent routing.
+        token_mgr = await get_token_manager()
+        await token_mgr.reload_if_stale()
+        max_token_retries = int(get_config("retry.max_retry"))
+        last_error: Exception | None = None
+        if reasoning_effort is None:
+            show_think = get_config("app.thinking")
+        else:
+            show_think = reasoning_effort != "none"
+        is_stream = stream if stream is not None else get_config("app.stream")
+        # Extract content.
+        from app.services.grok.services.chat import MessageExtractor
+        from app.services.grok.utils.upload import UploadService
+        prompt, file_attachments, image_attachments = MessageExtractor.extract(messages)
+        for attempt in range(max_token_retries):
+            # Select token based on video requirements and pool candidates.
+            pool_candidates = ModelService.pool_candidates_for_model(model)
+            token_info = token_mgr.get_token_for_video(
+                resolution=resolution,
+                video_length=video_length,
+                pool_candidates=pool_candidates,
+            )
+            if not token_info:
+                if last_error:
+                    raise last_error
+                raise AppException(
+                    message="No available tokens. Please try again later.",
+                    error_type=ErrorType.RATE_LIMIT.value,
+                    code="rate_limit_exceeded",
+                    status_code=429,
+                )
+            # Extract token string from TokenInfo.
+            token = token_info.token
+            if token.startswith("sso="):
+                token = token[4:]
+            pool_name = token_mgr.get_pool_name_for_token(token)
+            should_upscale = resolution == "720p" and pool_name == BASIC_POOL_NAME
+            try:
+                # Handle image attachments.
+                image_url = None
+                if image_attachments:
+                    upload_service = UploadService()
+                    try:
+                        if len(image_attachments) > 1:
+                            logger.info(
+                                "Video generation supports a single reference image; using the first one."
+                            )
+                        attach_data = image_attachments[0]
+                        _, file_uri = await upload_service.upload_file(
+                            attach_data, token
+                        )
+                        image_url = f"https://assets.grok.com/{file_uri}"
+                        logger.info(f"Image uploaded for video: {image_url}")
+                    finally:
+                        await upload_service.close()
+                # Generate video.
+                service = VideoService()
+                if image_url:
+                    response = await service.generate_from_image(
+                        token,
+                        prompt,
+                        image_url,
+                        aspect_ratio,
+                        video_length,
+                        resolution,
+                        preset,
+                    )
+                else:
+                    response = await service.generate(
+                        token,
+                        prompt,
+                        aspect_ratio,
+                        video_length,
+                        resolution,
+                        preset,
+                    )
+                # Process response.
+                if is_stream:
+                    processor = VideoStreamProcessor(
+                        model,
+                        token,
+                        show_think,
+                        upscale_on_finish=should_upscale,
+                    )
+                    return wrap_stream_with_usage(
+                        processor.process(response), token_mgr, token, model
+                    )
+                result = await VideoCollectProcessor(
+                    model, token, upscale_on_finish=should_upscale
+                ).process(response)
+                try:
+                    model_info = ModelService.get(model)
+                    effort = (
+                        EffortType.HIGH
+                        if (model_info and model_info.cost.value == "high")
+                        else EffortType.LOW
+                    )
+                    await token_mgr.consume(token, effort)
+                    logger.debug(
+                        f"Video completed, recorded usage (effort={effort.value})"
+                    )
+                except Exception as e:
+                    logger.warning(f"Failed to record video usage: {e}")
+                return result
+            except UpstreamException as e:
+                last_error = e
+                if rate_limited(e):
+                    await token_mgr.mark_rate_limited(token)
+                    logger.warning(
+                        f"Token {token[:10]}... rate limited (429), "
+                        f"trying next token (attempt {attempt + 1}/{max_token_retries})"
+                    )
+                    continue
+                raise
+        if last_error:
+            raise last_error
+        raise AppException(
+            message="No available tokens. Please try again later.",
+            error_type=ErrorType.RATE_LIMIT.value,
+            code="rate_limit_exceeded",
+            status_code=429,
+        )
+class VideoStreamProcessor(BaseProcessor):
+    """Video stream response processor."""
+    def __init__(
+        self,
+        model: str,
+        token: str = "",
+        show_think: bool = None,
+        upscale_on_finish: bool = False,
+    ):
+        super().__init__(model, token)
+        self.response_id: Optional[str] = None
+        self.think_opened: bool = False
+        self.role_sent: bool = False
+        self.show_think = bool(show_think)
+        self.upscale_on_finish = bool(upscale_on_finish)
+    @staticmethod
+    def _extract_video_id(video_url: str) -> str:
+        if not video_url:
+            return ""
+        match = re.search(r"/generated/([0-9a-fA-F-]{32,36})/", video_url)
+        if match:
+            return match.group(1)
+        match = re.search(r"/([0-9a-fA-F-]{32,36})/generated_video", video_url)
+        if match:
+            return match.group(1)
+        return ""
+    async def _upscale_video_url(self, video_url: str) -> str:
+        if not video_url or not self.upscale_on_finish:
+            return video_url
+        video_id = self._extract_video_id(video_url)
+        if not video_id:
+            logger.warning("Video upscale skipped: unable to extract video id")
+            return video_url
+        try:
+            async with _new_session() as session:
+                response = await VideoUpscaleReverse.request(
+                    session, self.token, video_id
+                )
+            payload = response.json() if response is not None else {}
+            hd_url = payload.get("hdMediaUrl") if isinstance(payload, dict) else None
+            if hd_url:
+                logger.info(f"Video upscale completed: {hd_url}")
+                return hd_url
+        except Exception as e:
+            logger.warning(f"Video upscale failed: {e}")
+        return video_url
+    def _sse(self, content: str = "", role: str = None, finish: str = None) -> str:
+        """Build SSE response."""
+        delta = {}
+        if role:
+            delta["role"] = role
+            delta["content"] = ""
+        elif content:
+            delta["content"] = content
+        chunk = {
+            "id": self.response_id or f"chatcmpl-{uuid.uuid4().hex[:24]}",
+            "object": "chat.completion.chunk",
+            "created": self.created,
+            "model": self.model,
+            "choices": [
+                {"index": 0, "delta": delta, "logprobs": None, "finish_reason": finish}
+            ],
+        }
+        return f"data: {orjson.dumps(chunk).decode()}\n\n"
+    async def process(
+        self, response: AsyncIterable[bytes]
+    ) -> AsyncGenerator[str, None]:
+        """Process video stream response."""
+        idle_timeout = get_config("video.stream_timeout")
+        try:
+            async for line in _with_idle_timeout(response, idle_timeout, self.model):
+                line = _normalize_line(line)
+                if not line:
+                    continue
+                try:
+                    data = orjson.loads(line)
+                except orjson.JSONDecodeError:
+                    continue
+                resp = data.get("result", {}).get("response", {})
+                is_thinking = bool(resp.get("isThinking"))
+                if rid := resp.get("responseId"):
+                    self.response_id = rid
+                if not self.role_sent:
+                    yield self._sse(role="assistant")
+                    self.role_sent = True
+                if token := resp.get("token"):
+                    if is_thinking:
+                        if not self.show_think:
+                            continue
+                        if not self.think_opened:
+                            yield self._sse("<think>\n")
+                            self.think_opened = True
+                    else:
+                        if self.think_opened:
+                            yield self._sse("\n</think>\n")
+                            self.think_opened = False
+                    yield self._sse(token)
+                    continue
+                if video_resp := resp.get("streamingVideoGenerationResponse"):
+                    progress = video_resp.get("progress", 0)
+                    if is_thinking:
+                        if not self.show_think:
+                            continue
+                        if not self.think_opened:
+                            yield self._sse("<think>\n")
+                            self.think_opened = True
+                    else:
+                        if self.think_opened:
+                            yield self._sse("\n</think>\n")
+                            self.think_opened = False
+                    if self.show_think:
+                        yield self._sse(f"正在生成视频中，当前进度{progress}%\n")
+                    if progress == 100:
+                        video_url = video_resp.get("videoUrl", "")
+                        thumbnail_url = video_resp.get("thumbnailImageUrl", "")
+                        if self.think_opened:
+                            yield self._sse("\n</think>\n")
+                            self.think_opened = False
+                        if video_url:
+                            if self.upscale_on_finish:
+                                yield self._sse("正在对视频进行超分辨率\n")
+                                video_url = await self._upscale_video_url(video_url)
+                            dl_service = self._get_dl()
+                            rendered = await dl_service.render_video(
+                                video_url, self.token, thumbnail_url
+                            )
+                            yield self._sse(rendered)
+                            logger.info(f"Video generated: {video_url}")
+                    continue
+            if self.think_opened:
+                yield self._sse("</think>\n")
+            yield self._sse(finish="stop")
+            yield "data: [DONE]\n\n"
+        except asyncio.CancelledError:
+            logger.debug(
+                "Video stream cancelled by client", extra={"model": self.model}
+            )
+        except StreamIdleTimeoutError as e:
+            raise UpstreamException(
+                message=f"Video stream idle timeout after {e.idle_seconds}s",
+                status_code=504,
+                details={
+                    "error": str(e),
+                    "type": "stream_idle_timeout",
+                    "idle_seconds": e.idle_seconds,
+                },
+            )
+        except RequestsError as e:
+            if _is_http2_error(e):
+                logger.warning(
+                    f"HTTP/2 stream error in video: {e}", extra={"model": self.model}
+                )
+                raise UpstreamException(
+                    message="Upstream connection closed unexpectedly",
+                    status_code=502,
+                    details={"error": str(e), "type": "http2_stream_error"},
+                )
+            logger.error(
+                f"Video stream request error: {e}", extra={"model": self.model}
+            )
+            raise UpstreamException(
+                message=f"Upstream request failed: {e}",
+                status_code=502,
+                details={"error": str(e)},
+            )
+        except Exception as e:
+            logger.error(
+                f"Video stream processing error: {e}",
+                extra={"model": self.model, "error_type": type(e).__name__},
+            )
+        finally:
+            await self.close()
+class VideoCollectProcessor(BaseProcessor):
+    """Video non-stream response processor."""
+    def __init__(self, model: str, token: str = "", upscale_on_finish: bool = False):
+        super().__init__(model, token)
+        self.upscale_on_finish = bool(upscale_on_finish)
+    @staticmethod
+    def _extract_video_id(video_url: str) -> str:
+        if not video_url:
+            return ""
+        match = re.search(r"/generated/([0-9a-fA-F-]{32,36})/", video_url)
+        if match:
+            return match.group(1)
+        match = re.search(r"/([0-9a-fA-F-]{32,36})/generated_video", video_url)
+        if match:
+            return match.group(1)
+        return ""
+    async def _upscale_video_url(self, video_url: str) -> str:
+        if not video_url or not self.upscale_on_finish:
+            return video_url
+        video_id = self._extract_video_id(video_url)
+        if not video_id:
+            logger.warning("Video upscale skipped: unable to extract video id")
+            return video_url
+        try:
+            async with _new_session() as session:
+                response = await VideoUpscaleReverse.request(
+                    session, self.token, video_id
+                )
+            payload = response.json() if response is not None else {}
+            hd_url = payload.get("hdMediaUrl") if isinstance(payload, dict) else None
+            if hd_url:
+                logger.info(f"Video upscale completed: {hd_url}")
+                return hd_url
+        except Exception as e:
+            logger.warning(f"Video upscale failed: {e}")
+        return video_url
+    async def process(self, response: AsyncIterable[bytes]) -> dict[str, Any]:
+        """Process and collect video response."""
+        response_id = ""
+        content = ""
+        idle_timeout = get_config("video.stream_timeout")
+        try:
+            async for line in _with_idle_timeout(response, idle_timeout, self.model):
+                line = _normalize_line(line)
+                if not line:
+                    continue
+                try:
+                    data = orjson.loads(line)
+                except orjson.JSONDecodeError:
+                    continue
+                resp = data.get("result", {}).get("response", {})
+                if video_resp := resp.get("streamingVideoGenerationResponse"):
+                    if video_resp.get("progress") == 100:
+                        response_id = resp.get("responseId", "")
+                        video_url = video_resp.get("videoUrl", "")
+                        thumbnail_url = video_resp.get("thumbnailImageUrl", "")
+                        if video_url:
+                            if self.upscale_on_finish:
+                                video_url = await self._upscale_video_url(video_url)
+                            dl_service = self._get_dl()
+                            content = await dl_service.render_video(
+                                video_url, self.token, thumbnail_url
+                            )
+                            logger.info(f"Video generated: {video_url}")
+        except asyncio.CancelledError:
+            logger.debug(
+                "Video collect cancelled by client", extra={"model": self.model}
+            )
+        except StreamIdleTimeoutError as e:
+            logger.warning(
+                f"Video collect idle timeout: {e}", extra={"model": self.model}
+            )
+        except RequestsError as e:
+            if _is_http2_error(e):
+                logger.warning(
+                    f"HTTP/2 stream error in video collect: {e}",
+                    extra={"model": self.model},
+                )
+            else:
+                logger.error(
+                    f"Video collect request error: {e}", extra={"model": self.model}
+                )
+        except Exception as e:
+            logger.error(
+                f"Video collect processing error: {e}",
+                extra={"model": self.model, "error_type": type(e).__name__},
+            )
+        finally:
+            await self.close()
+        return {
+            "id": response_id,
+            "object": "chat.completion",
+            "created": self.created,
+            "model": self.model,
+            "choices": [
+                {
+                    "index": 0,
+                    "message": {
+                        "role": "assistant",
+                        "content": content,
+                        "refusal": None,
+                    },
+                    "finish_reason": "stop",
+                }
+            ],
+            "usage": {"prompt_tokens": 0, "completion_tokens": 0, "total_tokens": 0},
+        }
+__all__ = ["VideoService"]

app/services/grok/services/voice.py ADDED Viewed

	@@ -0,0 +1,31 @@

+"""
+Grok Voice Mode Service
+"""
+from typing import Any, Dict
+from app.core.config import get_config
+from app.services.reverse.ws_livekit import LivekitTokenReverse
+from app.services.reverse.utils.session import ResettableSession
+class VoiceService:
+    """Voice Mode Service (LiveKit)"""
+    async def get_token(
+        self,
+        token: str,
+        voice: str = "ara",
+        personality: str = "assistant",
+        speed: float = 1.0,
+    ) -> Dict[str, Any]:
+        browser = get_config("proxy.browser")
+        async with ResettableSession(impersonate=browser) as session:
+            response = await LivekitTokenReverse.request(
+                session,
+                token=token,
+                voice=voice,
+                personality=personality,
+                speed=speed,
+            )
+            return response.json()

app/services/grok/utils/cache.py ADDED Viewed

	@@ -0,0 +1,110 @@

+"""
+Local cache utilities.
+"""
+from typing import Any, Dict
+from app.core.storage import DATA_DIR
+IMAGE_EXTS = {".jpg", ".jpeg", ".png", ".gif", ".webp", ".bmp"}
+VIDEO_EXTS = {".mp4", ".mov", ".m4v", ".webm", ".avi", ".mkv"}
+class CacheService:
+    """Local cache service."""
+    def __init__(self):
+        base_dir = DATA_DIR / "tmp"
+        self.image_dir = base_dir / "image"
+        self.video_dir = base_dir / "video"
+        self.image_dir.mkdir(parents=True, exist_ok=True)
+        self.video_dir.mkdir(parents=True, exist_ok=True)
+    def _cache_dir(self, media_type: str):
+        return self.image_dir if media_type == "image" else self.video_dir
+    def _allowed_exts(self, media_type: str):
+        return IMAGE_EXTS if media_type == "image" else VIDEO_EXTS
+    def get_stats(self, media_type: str = "image") -> Dict[str, Any]:
+        cache_dir = self._cache_dir(media_type)
+        if not cache_dir.exists():
+            return {"count": 0, "size_mb": 0.0}
+        allowed = self._allowed_exts(media_type)
+        files = [
+            f for f in cache_dir.glob("*") if f.is_file() and f.suffix.lower() in allowed
+        ]
+        total_size = sum(f.stat().st_size for f in files)
+        return {"count": len(files), "size_mb": round(total_size / 1024 / 1024, 2)}
+    def list_files(
+        self, media_type: str = "image", page: int = 1, page_size: int = 1000
+    ) -> Dict[str, Any]:
+        cache_dir = self._cache_dir(media_type)
+        if not cache_dir.exists():
+            return {"total": 0, "page": page, "page_size": page_size, "items": []}
+        allowed = self._allowed_exts(media_type)
+        files = [
+            f for f in cache_dir.glob("*") if f.is_file() and f.suffix.lower() in allowed
+        ]
+        items = []
+        for f in files:
+            try:
+                stat = f.stat()
+                items.append(
+                    {
+                        "name": f.name,
+                        "size_bytes": stat.st_size,
+                        "mtime_ms": int(stat.st_mtime * 1000),
+                    }
+                )
+            except Exception:
+                continue
+        items.sort(key=lambda x: x["mtime_ms"], reverse=True)
+        total = len(items)
+        start = max(0, (page - 1) * page_size)
+        paged = items[start : start + page_size]
+        for item in paged:
+            item["view_url"] = f"/v1/files/{media_type}/{item['name']}"
+        return {"total": total, "page": page, "page_size": page_size, "items": paged}
+    def delete_file(self, media_type: str, name: str) -> Dict[str, Any]:
+        cache_dir = self._cache_dir(media_type)
+        file_path = cache_dir / name.replace("/", "-")
+        if file_path.exists():
+            try:
+                file_path.unlink()
+                return {"deleted": True}
+            except Exception:
+                pass
+        return {"deleted": False}
+    def clear(self, media_type: str = "image") -> Dict[str, Any]:
+        cache_dir = self._cache_dir(media_type)
+        if not cache_dir.exists():
+            return {"count": 0, "size_mb": 0.0}
+        files = list(cache_dir.glob("*"))
+        total_size = sum(f.stat().st_size for f in files if f.is_file())
+        count = 0
+        for f in files:
+            if f.is_file():
+                try:
+                    f.unlink()
+                    count += 1
+                except Exception:
+                    pass
+        return {"count": count, "size_mb": round(total_size / 1024 / 1024, 2)}
+__all__ = ["CacheService"]

app/services/grok/utils/download.py ADDED Viewed

	@@ -0,0 +1,298 @@

+"""
+Download service.
+Download service for assets.grok.com.
+"""
+import asyncio
+import base64
+import hashlib
+import os
+from pathlib import Path
+from typing import List, Optional, Tuple
+from urllib.parse import urlparse
+import aiofiles
+from app.core.logger import logger
+from app.core.storage import DATA_DIR
+from app.core.config import get_config
+from app.core.exceptions import AppException
+from app.services.reverse.assets_download import AssetsDownloadReverse
+from app.services.reverse.utils.session import ResettableSession
+from app.services.grok.utils.locks import _get_download_semaphore, _file_lock
+class DownloadService:
+    """Assets download service."""
+    def __init__(self):
+        self._session: Optional[ResettableSession] = None
+        base_dir = DATA_DIR / "tmp"
+        self.image_dir = base_dir / "image"
+        self.video_dir = base_dir / "video"
+        self.image_dir.mkdir(parents=True, exist_ok=True)
+        self.video_dir.mkdir(parents=True, exist_ok=True)
+        self._cleanup_running = False
+    async def create(self) -> ResettableSession:
+        """Create or reuse a session."""
+        if self._session is None:
+            browser = get_config("proxy.browser")
+            if browser:
+                self._session = ResettableSession(impersonate=browser)
+            else:
+                self._session = ResettableSession()
+        return self._session
+    async def close(self):
+        """Close the session."""
+        if self._session:
+            await self._session.close()
+            self._session = None
+    async def resolve_url(
+        self, path_or_url: str, token: str, media_type: str = "image"
+    ) -> str:
+        asset_url = path_or_url
+        path = path_or_url
+        if path_or_url.startswith("http"):
+            parsed = urlparse(path_or_url)
+            path = parsed.path or ""
+            asset_url = path_or_url
+        else:
+            if not path_or_url.startswith("/"):
+                path_or_url = f"/{path_or_url}"
+            path = path_or_url
+            asset_url = f"https://assets.grok.com{path_or_url}"
+        app_url = get_config("app.app_url")
+        if app_url:
+            await self.download_file(asset_url, token, media_type)
+            return f"{app_url.rstrip('/')}/v1/files/{media_type}{path}"
+        return asset_url
+    async def render_image(
+        self, url: str, token: str, image_id: str = "image"
+    ) -> str:
+        fmt = get_config("app.image_format")
+        fmt = fmt.lower() if isinstance(fmt, str) else "url"
+        if fmt not in ("base64", "url", "markdown"):
+            fmt = "url"
+        try:
+            if fmt == "base64":
+                data_uri = await self.parse_b64(url, token, "image")
+                return f"![{image_id}]({data_uri})"
+            final_url = await self.resolve_url(url, token, "image")
+            return f"![{image_id}]({final_url})"
+        except Exception as e:
+            logger.warning(f"Image render failed, fallback to URL: {e}")
+            final_url = await self.resolve_url(url, token, "image")
+            return f"![{image_id}]({final_url})"
+    async def render_video(
+        self, video_url: str, token: str, thumbnail_url: str = ""
+    ) -> str:
+        fmt = get_config("app.video_format")
+        fmt = fmt.lower() if isinstance(fmt, str) else "url"
+        if fmt not in ("url", "markdown", "html"):
+            fmt = "url"
+        final_video_url = await self.resolve_url(video_url, token, "video")
+        final_thumb_url = ""
+        if thumbnail_url:
+            final_thumb_url = await self.resolve_url(thumbnail_url, token, "image")
+        if fmt == "url":
+            return f"{final_video_url}\n"
+        if fmt == "markdown":
+            return f"[video]({final_video_url})"
+        import html
+        safe_video_url = html.escape(final_video_url)
+        safe_thumbnail_url = html.escape(final_thumb_url)
+        poster_attr = f' poster="{safe_thumbnail_url}"' if safe_thumbnail_url else ""
+        return f'''<video id="video" controls="" preload="none"{poster_attr}>
+  <source id="mp4" src="{safe_video_url}" type="video/mp4">
+</video>'''
+    async def parse_b64(self, file_path: str, token: str, media_type: str = "image") -> str:
+        """Download and return data URI."""
+        try:
+            if not isinstance(file_path, str) or not file_path.strip():
+                raise AppException("Invalid file path", code="invalid_file_path")
+            if file_path.startswith("data:"):
+                raise AppException("Invalid file path", code="invalid_file_path")
+            file_path = self._normalize_path(file_path)
+            lock_name = f"dl_b64_{hashlib.sha1(file_path.encode()).hexdigest()[:16]}"
+            lock_timeout = max(1, int(get_config("asset.download_timeout")))
+            async with _get_download_semaphore():
+                async with _file_lock(lock_name, timeout=lock_timeout):
+                    session = await self.create()
+                    response = await AssetsDownloadReverse.request(
+                        session, token, file_path
+                    )
+            if hasattr(response, "aiter_content"):
+                data = bytearray()
+                async for chunk in response.aiter_content():
+                    if chunk:
+                        data.extend(chunk)
+                raw = bytes(data)
+            else:
+                raw = response.content
+            content_type = response.headers.get(
+                "content-type", "application/octet-stream"
+            ).split(";")[0]
+            data_uri = f"data:{content_type};base64,{base64.b64encode(raw).decode()}"
+            return data_uri
+        except Exception as e:
+            logger.error(f"Failed to convert {file_path} to base64: {e}")
+            raise
+    def _normalize_path(self, file_path: str) -> str:
+        """Normalize URL or path to assets path for download."""
+        if not isinstance(file_path, str) or not file_path.strip():
+            raise AppException("Invalid file path", code="invalid_file_path")
+        value = file_path.strip()
+        if value.startswith("data:"):
+            raise AppException("Invalid file path", code="invalid_file_path")
+        parsed = urlparse(value)
+        if parsed.scheme or parsed.netloc:
+            if not (
+                parsed.scheme and parsed.netloc and parsed.scheme in ["http", "https"]
+            ):
+                raise AppException("Invalid file path", code="invalid_file_path")
+            path = parsed.path or ""
+            if parsed.query:
+                path = f"{path}?{parsed.query}"
+        else:
+            path = value
+        if not path:
+            raise AppException("Invalid file path", code="invalid_file_path")
+        if not path.startswith("/"):
+            path = f"/{path}"
+        return path
+    async def download_file(self, file_path: str, token: str, media_type: str = "image") -> Tuple[Optional[Path], str]:
+        """Download asset to local cache.
+        Args:
+            file_path: str, the path of the file to download.
+            token: str, the SSO token.
+            media_type: str, the media type of the file.
+        Returns:
+            Tuple[Optional[Path], str]: The path of the downloaded file and the MIME type.
+        """
+        async with _get_download_semaphore():
+            file_path = self._normalize_path(file_path)
+            cache_dir = self.image_dir if media_type == "image" else self.video_dir
+            filename = file_path.lstrip("/").replace("/", "-")
+            cache_path = cache_dir / filename
+            lock_name = (
+                f"dl_{media_type}_{hashlib.sha1(str(cache_path).encode()).hexdigest()[:16]}"
+            )
+            lock_timeout = max(1, int(get_config("asset.download_timeout")))
+            async with _file_lock(lock_name, timeout=lock_timeout):
+                session = await self.create()
+                response = await AssetsDownloadReverse.request(session, token, file_path)
+                tmp_path = cache_path.with_suffix(cache_path.suffix + ".tmp")
+                try:
+                    async with aiofiles.open(tmp_path, "wb") as f:
+                        if hasattr(response, "aiter_content"):
+                            async for chunk in response.aiter_content():
+                                if chunk:
+                                    await f.write(chunk)
+                        else:
+                            await f.write(response.content)
+                    os.replace(tmp_path, cache_path)
+                finally:
+                    if tmp_path.exists() and not cache_path.exists():
+                        try:
+                            tmp_path.unlink()
+                        except Exception:
+                            pass
+                mime = response.headers.get(
+                    "content-type", "application/octet-stream"
+                ).split(";")[0]
+                logger.info(f"Downloaded: {file_path}")
+                asyncio.create_task(self._check_limit())
+            return cache_path, mime
+    async def _check_limit(self):
+        """Check cache limit and cleanup.
+        Args:
+            self: DownloadService, the download service instance.
+        Returns:
+            None
+        """
+        if self._cleanup_running or not get_config("cache.enable_auto_clean"):
+            return
+        self._cleanup_running = True
+        try:
+            try:
+                async with _file_lock("cache_cleanup", timeout=5):
+                    limit_mb = get_config("cache.limit_mb")
+                    total_size = 0
+                    all_files: List[Tuple[Path, float, int]] = []
+                    for d in [self.image_dir, self.video_dir]:
+                        if d.exists():
+                            for f in d.glob("*"):
+                                if f.is_file():
+                                    try:
+                                        stat = f.stat()
+                                        total_size += stat.st_size
+                                        all_files.append(
+                                            (f, stat.st_mtime, stat.st_size)
+                                        )
+                                    except Exception:
+                                        pass
+                    current_mb = total_size / 1024 / 1024
+                    if current_mb <= limit_mb:
+                        return
+                    logger.info(
+                        f"Cache limit exceeded ({current_mb:.2f}MB > {limit_mb}MB), cleaning..."
+                    )
+                    all_files.sort(key=lambda x: x[1])
+                    deleted_count = 0
+                    deleted_size = 0
+                    target_mb = limit_mb * 0.8
+                    for f, _, size in all_files:
+                        try:
+                            f.unlink()
+                            deleted_count += 1
+                            deleted_size += size
+                            total_size -= size
+                            if (total_size / 1024 / 1024) <= target_mb:
+                                break
+                        except Exception:
+                            pass
+                    logger.info(
+                        f"Cache cleanup: {deleted_count} files ({deleted_size / 1024 / 1024:.2f}MB)"
+                    )
+            except Exception as e:
+                logger.warning(f"Cache cleanup failed: {e}")
+        finally:
+            self._cleanup_running = False
+__all__ = ["DownloadService"]

app/services/grok/utils/locks.py ADDED Viewed

	@@ -0,0 +1,86 @@

+"""
+Shared locking helpers for assets operations.
+"""
+import asyncio
+import time
+from contextlib import asynccontextmanager
+from pathlib import Path
+from app.core.config import get_config
+from app.core.storage import DATA_DIR
+try:
+    import fcntl
+except ImportError:
+    fcntl = None
+LOCK_DIR = DATA_DIR / ".locks"
+_UPLOAD_SEMAPHORE = None
+_UPLOAD_SEM_VALUE = None
+_DOWNLOAD_SEMAPHORE = None
+_DOWNLOAD_SEM_VALUE = None
+def _get_upload_semaphore() -> asyncio.Semaphore:
+    """Return global semaphore for upload operations."""
+    value = max(1, int(get_config("asset.upload_concurrent")))
+    global _UPLOAD_SEMAPHORE, _UPLOAD_SEM_VALUE
+    if _UPLOAD_SEMAPHORE is None or value != _UPLOAD_SEM_VALUE:
+        _UPLOAD_SEM_VALUE = value
+        _UPLOAD_SEMAPHORE = asyncio.Semaphore(value)
+    return _UPLOAD_SEMAPHORE
+def _get_download_semaphore() -> asyncio.Semaphore:
+    """Return global semaphore for download operations."""
+    value = max(1, int(get_config("asset.download_concurrent")))
+    global _DOWNLOAD_SEMAPHORE, _DOWNLOAD_SEM_VALUE
+    if _DOWNLOAD_SEMAPHORE is None or value != _DOWNLOAD_SEM_VALUE:
+        _DOWNLOAD_SEM_VALUE = value
+        _DOWNLOAD_SEMAPHORE = asyncio.Semaphore(value)
+    return _DOWNLOAD_SEMAPHORE
+@asynccontextmanager
+async def _file_lock(name: str, timeout: int = 10):
+    """File lock guard."""
+    if fcntl is None:
+        yield
+        return
+    LOCK_DIR.mkdir(parents=True, exist_ok=True)
+    lock_path = Path(LOCK_DIR) / f"{name}.lock"
+    fd = None
+    locked = False
+    start = time.monotonic()
+    try:
+        fd = open(lock_path, "a+")
+        while True:
+            try:
+                fcntl.flock(fd, fcntl.LOCK_EX | fcntl.LOCK_NB)
+                locked = True
+                break
+            except BlockingIOError:
+                if time.monotonic() - start >= timeout:
+                    break
+                await asyncio.sleep(0.05)
+        if not locked:
+            raise TimeoutError(f"Failed to acquire lock: {name}")
+        yield
+    finally:
+        if fd:
+            if locked:
+                try:
+                    fcntl.flock(fd, fcntl.LOCK_UN)
+                except Exception:
+                    pass
+            fd.close()
+__all__ = ["_get_upload_semaphore", "_get_download_semaphore", "_file_lock"]

app/services/grok/utils/process.py ADDED Viewed

	@@ -0,0 +1,152 @@

+"""
+响应处理器基类和通用工具
+"""
+import asyncio
+import time
+from typing import Any, AsyncGenerator, Optional, AsyncIterable, List, TypeVar
+from app.core.config import get_config
+from app.core.logger import logger
+from app.core.exceptions import StreamIdleTimeoutError
+from app.services.grok.utils.download import DownloadService
+T = TypeVar("T")
+def _is_http2_error(e: Exception) -> bool:
+    """检查是否为 HTTP/2 流错误"""
+    err_str = str(e).lower()
+    return "http/2" in err_str or "curl: (92)" in err_str or "stream" in err_str
+def _normalize_line(line: Any) -> Optional[str]:
+    """规范化流式响应行，兼容 SSE data 前缀与空行"""
+    if line is None:
+        return None
+    if isinstance(line, (bytes, bytearray)):
+        text = line.decode("utf-8", errors="ignore")
+    else:
+        text = str(line)
+    text = text.strip()
+    if not text:
+        return None
+    if text.startswith("data:"):
+        text = text[5:].strip()
+    if text == "[DONE]":
+        return None
+    return text
+def _collect_images(obj: Any) -> List[str]:
+    """递归收集响应中的图片 URL"""
+    urls: List[str] = []
+    seen = set()
+    def add(url: str):
+        if not url or url in seen:
+            return
+        seen.add(url)
+        urls.append(url)
+    def walk(value: Any):
+        if isinstance(value, dict):
+            for key, item in value.items():
+                if key in {"generatedImageUrls", "imageUrls", "imageURLs"}:
+                    if isinstance(item, list):
+                        for url in item:
+                            if isinstance(url, str):
+                                add(url)
+                    elif isinstance(item, str):
+                        add(item)
+                    continue
+                walk(item)
+        elif isinstance(value, list):
+            for item in value:
+                walk(item)
+    walk(obj)
+    return urls
+async def _with_idle_timeout(
+    iterable: AsyncIterable[T], idle_timeout: float, model: str = ""
+) -> AsyncGenerator[T, None]:
+    """
+    包装异步迭代器，添加空闲超时检测
+    Args:
+        iterable: 原始异步迭代器
+        idle_timeout: 空闲超时时间(秒)，0 表示禁用
+        model: 模型名称(用于日志)
+    """
+    if idle_timeout <= 0:
+        async for item in iterable:
+            yield item
+        return
+    iterator = iterable.__aiter__()
+    async def _maybe_aclose(it):
+        aclose = getattr(it, "aclose", None)
+        if not aclose:
+            return
+        try:
+            await aclose()
+        except Exception:
+            pass
+    while True:
+        try:
+            item = await asyncio.wait_for(iterator.__anext__(), timeout=idle_timeout)
+            yield item
+        except asyncio.TimeoutError:
+            logger.warning(
+                f"Stream idle timeout after {idle_timeout}s",
+                extra={"model": model, "idle_timeout": idle_timeout},
+            )
+            await _maybe_aclose(iterator)
+            raise StreamIdleTimeoutError(idle_timeout)
+        except asyncio.CancelledError:
+            await _maybe_aclose(iterator)
+            raise
+        except StopAsyncIteration:
+            break
+class BaseProcessor:
+    """基础处理器"""
+    def __init__(self, model: str, token: str = ""):
+        self.model = model
+        self.token = token
+        self.created = int(time.time())
+        self.app_url = get_config("app.app_url")
+        self._dl_service: Optional[DownloadService] = None
+    def _get_dl(self) -> DownloadService:
+        """获取下载服务实例（复用）"""
+        if self._dl_service is None:
+            self._dl_service = DownloadService()
+        return self._dl_service
+    async def close(self):
+        """释放下载服务资源"""
+        if self._dl_service:
+            await self._dl_service.close()
+            self._dl_service = None
+    async def process_url(self, path: str, media_type: str = "image") -> str:
+        """处理资产 URL"""
+        dl_service = self._get_dl()
+        return await dl_service.resolve_url(path, self.token, media_type)
+__all__ = [
+    "BaseProcessor",
+    "_with_idle_timeout",
+    "_normalize_line",
+    "_collect_images",
+    "_is_http2_error",
+]

app/services/grok/utils/response.py ADDED Viewed

	@@ -0,0 +1,144 @@

+"""
+Response formatting utilities for OpenAI-compatible API responses.
+"""
+import os
+import time
+import uuid
+from typing import Optional
+def make_response_id() -> str:
+    """Generate a unique response ID."""
+    return f"chatcmpl-{int(time.time() * 1000)}{os.urandom(4).hex()}"
+def make_chat_chunk(
+    response_id: str,
+    model: str,
+    content: str,
+    index: int = 0,
+    role: str = "assistant",
+    is_final: bool = False,
+) -> dict:
+    """
+    Create an OpenAI-compatible chat completion chunk.
+    Args:
+        response_id: Unique response ID
+        model: Model name
+        content: Content to send
+        index: Choice index
+        role: Role (assistant)
+        is_final: Whether this is the final chunk (includes finish_reason)
+    Returns:
+        Chat completion chunk dict
+    """
+    choice: dict = {
+        "index": index,
+        "delta": {
+            "role": role,
+            "content": content,
+        },
+    }
+    if is_final:
+        choice["finish_reason"] = "stop"
+    chunk: dict = {
+        "id": response_id,
+        "object": "chat.completion.chunk",
+        "created": int(time.time()),
+        "model": model,
+        "choices": [choice],
+    }
+    if is_final:
+        chunk["usage"] = {
+            "total_tokens": 0,
+            "input_tokens": 0,
+            "output_tokens": 0,
+            "input_tokens_details": {"text_tokens": 0, "image_tokens": 0},
+        }
+    return chunk
+def make_chat_response(
+    model: str,
+    content: str,
+    response_id: Optional[str] = None,
+    index: int = 0,
+    usage: Optional[dict] = None,
+) -> dict:
+    """
+    Create an OpenAI-compatible non-streaming chat completion response.
+    Args:
+        model: Model name
+        content: Response content
+        response_id: Unique response ID (generated if not provided)
+        index: Choice index
+        usage: Custom usage dict (defaults to zeros)
+    Returns:
+        Chat completion response dict
+    """
+    if response_id is None:
+        response_id = f"chatcmpl-{uuid.uuid4().hex[:8]}"
+    if usage is None:
+        usage = {
+            "total_tokens": 0,
+            "input_tokens": 0,
+            "output_tokens": 0,
+            "input_tokens_details": {"text_tokens": 0, "image_tokens": 0},
+        }
+    return {
+        "id": response_id,
+        "object": "chat.completion",
+        "created": int(time.time()),
+        "model": model,
+        "choices": [
+            {
+                "index": index,
+                "message": {
+                    "role": "assistant",
+                    "content": content,
+                    "refusal": None,
+                },
+                "finish_reason": "stop",
+            }
+        ],
+        "usage": usage,
+    }
+def wrap_image_content(content: str, response_format: str = "url") -> str:
+    """
+    Wrap image content in markdown format for chat interface.
+    Args:
+        content: Image URL or base64 data
+        response_format: "url" or "b64_json"/"base64"
+    Returns:
+        Markdown-wrapped image content
+    """
+    if not content:
+        return content
+    if response_format == "url":
+        return f"![image]({content})"
+    else:
+        return f"![image](data:image/png;base64,{content})"
+__all__ = [
+    "make_response_id",
+    "make_chat_chunk",
+    "make_chat_response",
+    "wrap_image_content",
+]

app/services/grok/utils/retry.py ADDED Viewed

	@@ -0,0 +1,66 @@

+"""
+Retry helpers for token switching.
+"""
+from typing import Optional, Set
+from app.core.exceptions import UpstreamException
+from app.services.grok.services.model import ModelService
+async def pick_token(
+    token_mgr,
+    model_id: str,
+    tried: Set[str],
+    preferred: Optional[str] = None,
+    prefer_tags: Optional[Set[str]] = None,
+) -> Optional[str]:
+    if preferred and preferred not in tried:
+        return preferred
+    token = None
+    for pool_name in ModelService.pool_candidates_for_model(model_id):
+        token = token_mgr.get_token(pool_name, exclude=tried, prefer_tags=prefer_tags)
+        if token:
+            break
+    if not token and not tried:
+        result = await token_mgr.refresh_cooling_tokens()
+        if result.get("recovered", 0) > 0:
+            for pool_name in ModelService.pool_candidates_for_model(model_id):
+                token = token_mgr.get_token(pool_name, prefer_tags=prefer_tags)
+                if token:
+                    break
+    return token
+def rate_limited(error: Exception) -> bool:
+    if not isinstance(error, UpstreamException):
+        return False
+    status = error.details.get("status") if error.details else None
+    code = error.details.get("error_code") if error.details else None
+    return status == 429 or code == "rate_limit_exceeded"
+def transient_upstream(error: Exception) -> bool:
+    """Whether error is likely transient and safe to retry with another token."""
+    if not isinstance(error, UpstreamException):
+        return False
+    details = error.details or {}
+    status = details.get("status")
+    err = str(details.get("error") or error).lower()
+    transient_status = {408, 500, 502, 503, 504}
+    if status in transient_status:
+        return True
+    timeout_markers = (
+        "timed out",
+        "timeout",
+        "connection reset",
+        "temporarily unavailable",
+        "http2",
+    )
+    return any(marker in err for marker in timeout_markers)
+__all__ = ["pick_token", "rate_limited", "transient_upstream"]

app/services/grok/utils/stream.py ADDED Viewed

	@@ -0,0 +1,46 @@

+"""
+流式响应通用工具
+"""
+from typing import AsyncGenerator
+from app.core.logger import logger
+from app.services.grok.services.model import ModelService
+from app.services.token import EffortType
+async def wrap_stream_with_usage(
+    stream: AsyncGenerator, token_mgr, token: str, model: str
+) -> AsyncGenerator:
+    """
+    包装流式响应，在完成时记录使用
+    Args:
+        stream: 原始 AsyncGenerator
+        token_mgr: TokenManager 实例
+        token: Token 字符串
+        model: 模型名称
+    """
+    success = False
+    try:
+        async for chunk in stream:
+            yield chunk
+        success = True
+    finally:
+        if success:
+            try:
+                model_info = ModelService.get(model)
+                effort = (
+                    EffortType.HIGH
+                    if (model_info and model_info.cost.value == "high")
+                    else EffortType.LOW
+                )
+                await token_mgr.consume(token, effort)
+                logger.debug(
+                    f"Stream completed, recorded usage for token {token[:10]}... (effort={effort.value})"
+                )
+            except Exception as e:
+                logger.warning(f"Failed to record stream usage: {e}")
+__all__ = ["wrap_stream_with_usage"]

app/services/grok/utils/tool_call.py ADDED Viewed

	@@ -0,0 +1,319 @@

+"""
+Tool call utilities for OpenAI-compatible function calling.
+Provides prompt-based emulation of tool calls by injecting tool definitions
+into the system prompt and parsing structured responses.
+"""
+import json
+import re
+import uuid
+from typing import Any, Dict, List, Optional, Tuple
+def build_tool_prompt(
+    tools: List[Dict[str, Any]],
+    tool_choice: Optional[Any] = None,
+    parallel_tool_calls: bool = True,
+) -> str:
+    """Generate a system prompt block describing available tools.
+    Args:
+        tools: List of OpenAI-format tool definitions.
+        tool_choice: "auto", "required", "none", or {"type":"function","function":{"name":"..."}}.
+        parallel_tool_calls: Whether multiple tool calls are allowed.
+    Returns:
+        System prompt string to prepend to the conversation.
+    """
+    if not tools:
+        return ""
+    # tool_choice="none" means don't mention tools at all
+    if tool_choice == "none":
+        return ""
+    lines = [
+        "# Available Tools",
+        "",
+        "You have access to the following tools. To call a tool, output a <tool_call> block with a JSON object containing \"name\" and \"arguments\".",
+        "",
+        "Format:",
+        "<tool_call>",
+        '{"name": "function_name", "arguments": {"param": "value"}}',
+        "</tool_call>",
+        "",
+    ]
+    if parallel_tool_calls:
+        lines.append("You may make multiple tool calls in a single response by using multiple <tool_call> blocks.")
+        lines.append("")
+    # Describe each tool
+    lines.append("## Tool Definitions")
+    lines.append("")
+    for tool in tools:
+        if tool.get("type") != "function":
+            continue
+        func = tool.get("function", {})
+        name = func.get("name", "")
+        desc = func.get("description", "")
+        params = func.get("parameters", {})
+        lines.append(f"### {name}")
+        if desc:
+            lines.append(f"{desc}")
+        if params:
+            lines.append(f"Parameters: {json.dumps(params, ensure_ascii=False)}")
+        lines.append("")
+    # Handle tool_choice directives
+    if tool_choice == "required":
+        lines.append("IMPORTANT: You MUST call at least one tool in your response. Do not respond with only text.")
+    elif isinstance(tool_choice, dict):
+        func_info = tool_choice.get("function", {})
+        forced_name = func_info.get("name", "")
+        if forced_name:
+            lines.append(f"IMPORTANT: You MUST call the tool \"{forced_name}\" in your response.")
+    else:
+        # "auto" or default
+        lines.append("Decide whether to call a tool based on the user's request. If you don't need a tool, respond normally with text only.")
+    lines.append("")
+    lines.append("When you call a tool, you may include text before or after the <tool_call> blocks, but the tool call blocks must be valid JSON.")
+    return "\n".join(lines)
+_TOOL_CALL_RE = re.compile(
+    r"<tool_call>\s*(.*?)\s*</tool_call>",
+    re.DOTALL,
+)
+def _strip_code_fences(text: str) -> str:
+    if not text:
+        return text
+    cleaned = text.strip()
+    if cleaned.startswith("```"):
+        cleaned = re.sub(r"^```[a-zA-Z0-9_-]*\s*", "", cleaned)
+        cleaned = re.sub(r"\s*```$", "", cleaned)
+    return cleaned.strip()
+def _extract_json_object(text: str) -> str:
+    if not text:
+        return text
+    start = text.find("{")
+    if start == -1:
+        return text
+    end = text.rfind("}")
+    if end == -1:
+        return text[start:]
+    if end < start:
+        return text
+    return text[start : end + 1]
+def _remove_trailing_commas(text: str) -> str:
+    if not text:
+        return text
+    return re.sub(r",\s*([}\]])", r"\1", text)
+def _balance_braces(text: str) -> str:
+    if not text:
+        return text
+    open_count = 0
+    close_count = 0
+    in_string = False
+    escape = False
+    for ch in text:
+        if escape:
+            escape = False
+            continue
+        if ch == "\\" and in_string:
+            escape = True
+            continue
+        if ch == '"':
+            in_string = not in_string
+            continue
+        if in_string:
+            continue
+        if ch == "{":
+            open_count += 1
+        elif ch == "}":
+            close_count += 1
+    if open_count > close_count:
+        text = text + ("}" * (open_count - close_count))
+    return text
+def _repair_json(text: str) -> Optional[Any]:
+    if not text:
+        return None
+    cleaned = _strip_code_fences(text)
+    cleaned = _extract_json_object(cleaned)
+    cleaned = cleaned.replace("\r\n", "\n").replace("\r", "\n")
+    cleaned = cleaned.replace("\n", " ")
+    cleaned = _remove_trailing_commas(cleaned)
+    cleaned = _balance_braces(cleaned)
+    try:
+        return json.loads(cleaned)
+    except json.JSONDecodeError:
+        return None
+def parse_tool_call_block(
+    raw_json: str,
+    tools: Optional[List[Dict[str, Any]]] = None,
+) -> Optional[Dict[str, Any]]:
+    if not raw_json:
+        return None
+    parsed = None
+    try:
+        parsed = json.loads(raw_json)
+    except json.JSONDecodeError:
+        parsed = _repair_json(raw_json)
+    if not isinstance(parsed, dict):
+        return None
+    name = parsed.get("name")
+    arguments = parsed.get("arguments", {})
+    if not name:
+        return None
+    valid_names = set()
+    if tools:
+        for tool in tools:
+            func = tool.get("function", {})
+            tool_name = func.get("name")
+            if tool_name:
+                valid_names.add(tool_name)
+    if valid_names and name not in valid_names:
+        return None
+    if isinstance(arguments, dict):
+        arguments_str = json.dumps(arguments, ensure_ascii=False)
+    elif isinstance(arguments, str):
+        arguments_str = arguments
+    else:
+        arguments_str = json.dumps(arguments, ensure_ascii=False)
+    return {
+        "id": f"call_{uuid.uuid4().hex[:24]}",
+        "type": "function",
+        "function": {"name": name, "arguments": arguments_str},
+    }
+def parse_tool_calls(
+    content: str,
+    tools: Optional[List[Dict[str, Any]]] = None,
+) -> Tuple[Optional[str], Optional[List[Dict[str, Any]]]]:
+    """Parse tool call blocks from model output.
+    Detects ``<tool_call>...</tool_call>`` blocks, parses JSON from each block,
+    and returns OpenAI-format tool call objects.
+    Args:
+        content: Raw model output text.
+        tools: Optional list of tool definitions for name validation.
+    Returns:
+        Tuple of (text_content, tool_calls_list).
+        - text_content: text outside <tool_call> blocks (None if empty).
+        - tool_calls_list: list of OpenAI tool call dicts, or None if no calls found.
+    """
+    if not content:
+        return content, None
+    matches = list(_TOOL_CALL_RE.finditer(content))
+    if not matches:
+        return content, None
+    tool_calls = []
+    for match in matches:
+        raw_json = match.group(1).strip()
+        tool_call = parse_tool_call_block(raw_json, tools)
+        if tool_call:
+            tool_calls.append(tool_call)
+    if not tool_calls:
+        return content, None
+    # Extract text outside of tool_call blocks
+    text_parts = []
+    last_end = 0
+    for match in matches:
+        before = content[last_end:match.start()]
+        if before.strip():
+            text_parts.append(before.strip())
+        last_end = match.end()
+    trailing = content[last_end:]
+    if trailing.strip():
+        text_parts.append(trailing.strip())
+    text_content = "\n".join(text_parts) if text_parts else None
+    return text_content, tool_calls
+def format_tool_history(messages: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
+    """Convert assistant messages with tool_calls and tool role messages into text format.
+    Since Grok's web API only accepts a single message string, this converts
+    tool-related messages back to a text representation for multi-turn conversations.
+    Args:
+        messages: List of OpenAI-format messages that may contain tool_calls and tool roles.
+    Returns:
+        List of messages with tool content converted to text format.
+    """
+    result = []
+    for msg in messages:
+        role = msg.get("role", "")
+        content = msg.get("content")
+        tool_calls = msg.get("tool_calls")
+        tool_call_id = msg.get("tool_call_id")
+        name = msg.get("name")
+        if role == "assistant" and tool_calls:
+            # Convert assistant tool_calls to text representation
+            parts = []
+            if content:
+                parts.append(content if isinstance(content, str) else str(content))
+            for tc in tool_calls:
+                func = tc.get("function", {})
+                tc_name = func.get("name", "")
+                tc_args = func.get("arguments", "{}")
+                tc_id = tc.get("id", "")
+                parts.append(f'<tool_call>{{"name":"{tc_name}","arguments":{tc_args}}}</tool_call>')
+            result.append({
+                "role": "assistant",
+                "content": "\n".join(parts),
+            })
+        elif role == "tool":
+            # Convert tool result to text format
+            tool_name = name or "unknown"
+            call_id = tool_call_id or ""
+            content_str = content if isinstance(content, str) else json.dumps(content, ensure_ascii=False) if content else ""
+            result.append({
+                "role": "user",
+                "content": f"tool ({tool_name}, {call_id}): {content_str}",
+            })
+        else:
+            result.append(msg)
+    return result
+__all__ = [
+    "build_tool_prompt",
+    "parse_tool_calls",
+    "format_tool_history",
+    "parse_tool_call_block",
+]