Spaces:

tejmar
/

grok2api

Paused

App Files Files Community

tejmar commited on Jan 11

Commit

2c97e18

0 Parent(s):

Initial commit

Browse files

Files changed (50) hide show

.dockerignore +140 -0
.gitattributes +41 -0
.github/workflows/docker.yml +110 -0
.gitignore +190 -0
.python-version +1 -0
Dockerfile +67 -0
LICENSE +21 -0
README.md +11 -0
app/api/admin/manage.py +1016 -0
app/api/v1/chat.py +107 -0
app/api/v1/images.py +53 -0
app/api/v1/models.py +114 -0
app/core/auth.py +66 -0
app/core/config.py +243 -0
app/core/exception.py +119 -0
app/core/logger.py +141 -0
app/core/proxy_pool.py +170 -0
app/core/storage.py +644 -0
app/models/grok_models.py +163 -0
app/models/openai_schema.py +106 -0
app/services/api_keys.py +226 -0
app/services/grok/cache.py +243 -0
app/services/grok/client.py +386 -0
app/services/grok/create.py +140 -0
app/services/grok/processer.py +430 -0
app/services/grok/statsig.py +82 -0
app/services/grok/token.py +649 -0
app/services/grok/upload.py +250 -0
app/services/images/normalize.py +100 -0
app/services/mcp/__init__.py +6 -0
app/services/mcp/server.py +63 -0
app/services/mcp/tools.py +77 -0
app/services/request_logger.py +152 -0
app/services/request_stats.py +205 -0
app/template/admin.html +0 -0
app/template/favicon.png +3 -0
app/template/login.html +76 -0
data/setting.toml +25 -0
data/temp/image.temp +0 -0
data/token.json +4 -0
docker-compose.yml +25 -0
docker-entrypoint.sh +74 -0
main.py +196 -0
pyproject.toml +26 -0
readme.md +248 -0
requirements.txt +18 -0
test/test_concurrency.py +276 -0
test/test_concurrency.sh +177 -0
test_key.py +50 -0
uv.lock +0 -0

.dockerignore ADDED Viewed

	@@ -0,0 +1,140 @@

+# Python runtime and cache files
+__pycache__/
+*.py[cod]
+*$py.class
+*.so
+.Python
+*.egg-info/
+dist/
+build/
+*.egg
+.eggs/
+pip-log.txt
+pip-delete-this-directory.txt
+# Virtual environments
+venv/
+env/
+ENV/
+.venv/
+.virtualenv/
+pipenv/
+poetry.lock
+Pipfile.lock
+# IDEs and editors
+.vscode/
+.idea/
+*.swp
+*.swo
+*.swn
+.DS_Store
+*~
+.project
+.classpath
+.settings/
+*.sublime-project
+*.sublime-workspace
+# Git
+.git/
+.gitignore
+.gitattributes
+.gitmodules
+# Docker related
+Dockerfile*
+docker-compose*.yml
+.dockerignore
+.docker/
+# Documentation
+*.md
+!data/*.md
+README.md
+docs/
+LICENSE
+CHANGELOG*
+CONTRIBUTING*
+# Tests and quality checks
+tests/
+test/
+*.pytest_cache/
+.coverage
+.coverage.*
+htmlcov/
+.tox/
+.mypy_cache/
+.ruff_cache/
+.pytest_cache/
+.hypothesis/
+.pytype/
+coverage.xml
+*.cover
+.cache/
+nosetests.xml
+# Logs and runtime data
+logs/
+*.log
+*.log.*
+# Temporary data files
+data/temp/
+data/token.json
+# Database files
+*.db
+*.sqlite
+*.sqlite3
+# Temporary files
+tmp/
+temp/
+*.tmp
+*.temp
+*.bak
+*.orig
+*.rej
+*.swp
+# CI/CD
+.github/
+.gitlab-ci.yml
+.travis.yml
+.circleci/
+azure-pipelines.yml
+.jenkins/
+Jenkinsfile
+# Environment variables and secrets
+.env
+.env.*
+*.key
+*.pem
+*.crt
+secrets/
+# Media and large files
+*.mp4
+*.avi
+*.mov
+*.zip
+*.tar
+*.tar.gz
+*.rar
+# Node.js
+node_modules/
+npm-debug.log*
+yarn-debug.log*
+yarn-error.log*
+package-lock.json
+yarn.lock
+# Other
+*.pyc
+.Python
+.sass-cache/
+.ipynb_checkpoints/

.gitattributes ADDED Viewed

	@@ -0,0 +1,41 @@

+# Auto detect text files and perform LF normalization
+* text=auto
+*.7z filter=lfs diff=lfs merge=lfs -text
+*.arrow filter=lfs diff=lfs merge=lfs -text
+*.bin filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.ckpt filter=lfs diff=lfs merge=lfs -text
+*.ftz filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.h5 filter=lfs diff=lfs merge=lfs -text
+*.joblib filter=lfs diff=lfs merge=lfs -text
+*.lfs.* filter=lfs diff=lfs merge=lfs -text
+*.mlmodel filter=lfs diff=lfs merge=lfs -text
+*.model filter=lfs diff=lfs merge=lfs -text
+*.msgpack filter=lfs diff=lfs merge=lfs -text
+*.npy filter=lfs diff=lfs merge=lfs -text
+*.npz filter=lfs diff=lfs merge=lfs -text
+*.onnx filter=lfs diff=lfs merge=lfs -text
+*.ot filter=lfs diff=lfs merge=lfs -text
+*.parquet filter=lfs diff=lfs merge=lfs -text
+*.pb filter=lfs diff=lfs merge=lfs -text
+*.pickle filter=lfs diff=lfs merge=lfs -text
+*.pkl filter=lfs diff=lfs merge=lfs -text
+*.pt filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text
+*.rar filter=lfs diff=lfs merge=lfs -text
+*.safetensors filter=lfs diff=lfs merge=lfs -text
+saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+*.tar.* filter=lfs diff=lfs merge=lfs -text
+*.tar filter=lfs diff=lfs merge=lfs -text
+*.tflite filter=lfs diff=lfs merge=lfs -text
+*.tgz filter=lfs diff=lfs merge=lfs -text
+*.wasm filter=lfs diff=lfs merge=lfs -text
+*.xz filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.zst filter=lfs diff=lfs merge=lfs -text
+*tfevents* filter=lfs diff=lfs merge=lfs -text
+*.png filter=lfs diff=lfs merge=lfs -text
+*.mp4 filter=lfs diff=lfs merge=lfs -text
+app/template/favicon.png filter=lfs diff=lfs merge=lfs -text
+data/temp/video/*.mp4 filter=lfs diff=lfs merge=lfs -text

.github/workflows/docker.yml ADDED Viewed

	@@ -0,0 +1,110 @@

+name: Build Docker Image
+on:
+  push:
+    branches:
+      - main
+    tags:
+      - 'v*'
+  pull_request:
+    branches:
+      - main
+env:
+  REGISTRY: ghcr.io
+  IMAGE_NAME: ${{ github.repository }}
+jobs:
+  build-and-push:
+    runs-on: ubuntu-latest
+    permissions:
+      contents: read
+      packages: write
+      id-token: write
+    strategy:
+      fail-fast: false
+      matrix:
+        include:
+          # AMD64 platform
+          - platform: linux/amd64
+            suffix: amd64
+          # ARM64 platform
+          - platform: linux/arm64
+            suffix: arm64
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v4
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v3
+      - name: Log in to Container Registry
+        uses: docker/login-action@v3
+        with:
+          registry: ${{ env.REGISTRY }}
+          username: ${{ github.actor }}
+          password: ${{ secrets.GITHUB_TOKEN }}
+      - name: Extract metadata
+        id: meta
+        uses: docker/metadata-action@v5
+        with:
+          images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}
+          tags: |
+            # Branch push
+            type=ref,event=branch,suffix=-${{ matrix.suffix }}
+            # Semver: full version
+            type=semver,pattern={{version}},suffix=-${{ matrix.suffix }}
+            # latest tag
+            type=raw,value=latest-${{ matrix.suffix }},enable={{is_default_branch}}
+      - name: Build and push Docker image
+        uses: docker/build-push-action@v5
+        with:
+          context: .
+          platforms: ${{ matrix.platform }}
+          push: ${{ github.event_name != 'pull_request' }}
+          tags: ${{ steps.meta.outputs.tags }}
+          labels: ${{ steps.meta.outputs.labels }}
+          cache-from: type=gha,scope=${{ matrix.suffix }}
+          cache-to: type=gha,mode=max,scope=${{ matrix.suffix }}
+          pull: true
+  # Merge multi-arch images into a unified tag
+  merge-manifests:
+    runs-on: ubuntu-latest
+    needs: build-and-push
+    if: github.event_name != 'pull_request'
+    permissions:
+      contents: read
+      packages: write
+    steps:
+      - name: Log in to Container Registry
+        uses: docker/login-action@v3
+        with:
+          registry: ${{ env.REGISTRY }}
+          username: ${{ github.actor }}
+          password: ${{ secrets.GITHUB_TOKEN }}
+      - name: Extract metadata
+        id: meta
+        uses: docker/metadata-action@v5
+        with:
+          images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}
+          tags: |
+            type=ref,event=branch
+            type=semver,pattern={{version}}
+            type=raw,value=latest,enable={{is_default_branch}}
+      - name: Create and push manifest
+        run: |
+          TAGS="${{ steps.meta.outputs.tags }}"
+          for tag in $TAGS; do
+            echo "Merging tag: $tag"
+            docker buildx imagetools create -t $tag \
+              ${tag}-amd64 \
+              ${tag}-arm64
+          done

.gitignore ADDED Viewed

	@@ -0,0 +1,190 @@

+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+.idea
+# C extensions
+*.so
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+# Translations
+*.mo
+*.pot
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+# Flask stuff:
+instance/
+.webassets-cache
+# Scrapy stuff:
+.scrapy
+# Sphinx documentation
+docs/_build/
+# PyBuilder
+.pybuilder/
+target/
+# Jupyter Notebook
+.ipynb_checkpoints
+# IPython
+profile_default/
+ipython_config.py
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+# UV
+#   Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#uv.lock
+# poetry
+#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+#poetry.lock
+# pdm
+#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+#pdm.lock
+#   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
+#   in version control.
+#   https://pdm.fming.dev/latest/usage/project/#working-with-version-control
+.pdm.toml
+.pdm-python
+.pdm-build/
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+__pypackages__/
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+# SageMath parsed files
+*.sage.py
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+logs/*
+# Runtime data
+data/*
+# Spyder project settings
+.spyderproject
+.spyproject
+# Rope project settings
+.ropeproject
+# mkdocs documentation
+/site
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+# Pyre type checker
+.pyre/
+# pytype static type analyzer
+.pytype/
+# Cython debug symbols
+cython_debug/
+# PyCharm
+#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
+#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+#  and can be added to the global gitignore or merged into this file.  For a more nuclear
+#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
+#.idea/
+# Ruff stuff:
+.ruff_cache/
+# PyPI configuration file
+.pypirc
+# Cursor
+#  Cursor is an AI-powered code editor.`.cursorignore` specifies files/directories to
+#  exclude from AI features like autocomplete and code analysis. Recommended for sensitive data
+#  refer to https://docs.cursor.com/context/ignore-files
+.cursorignore
+.cursorindexingignore
+# temp videos
+data/temp/video/

.python-version ADDED Viewed

	@@ -0,0 +1 @@


1	+ 3.13

Dockerfile ADDED Viewed

	@@ -0,0 +1,67 @@

+FROM python:3.11-slim AS builder
+WORKDIR /build
+# Install system deps needed to build wheels
+RUN apt-get update && \
+    apt-get install -y --no-install-recommends \
+    gcc \
+    g++ \
+    libffi-dev \
+    && rm -rf /var/lib/apt/lists/*
+# Install dependencies into a staging prefix
+COPY requirements.txt .
+RUN pip install --no-cache-dir --prefix=/install --compile -r requirements.txt
+# Remove common test and cache artifacts
+RUN find /install -type d -name "__pycache__" -exec rm -rf {} + 2>/dev/null || true && \
+    find /install -type d -name "tests" -exec rm -rf {} + 2>/dev/null || true && \
+    find /install -type d -name "test" -exec rm -rf {} + 2>/dev/null || true && \
+    find /install -type d -name "*.dist-info" -exec sh -c 'rm -f "$1"/RECORD "$1"/INSTALLER' _ {} \; && \
+    find /install -type f -name "*.pyc" -delete && \
+    find /install -type f -name "*.pyo" -delete && \
+    find /install -name "*.so" -exec strip --strip-unneeded {} \; 2>/dev/null || true
+FROM python:3.11-slim AS runtime
+WORKDIR /app
+# Install runtime system deps only
+RUN apt-get update && \
+    apt-get install -y --no-install-recommends \
+    libffi8 \
+    ca-certificates \
+    && rm -rf /var/lib/apt/lists/* \
+    && rm -rf /tmp/* /var/tmp/* \
+    && rm -rf /usr/share/doc/* \
+    && rm -rf /usr/share/man/* \
+    && rm -rf /var/cache/apt/*
+# Copy dependencies from builder stage
+COPY --from=builder /install /usr/local
+# Create required directories
+# RUN mkdir -p /app/logs /app/data/temp/image /app/data/temp/video
+RUN mkdir -p /app/logs
+# Copy application code and config
+COPY app/ ./app/
+COPY main.py .
+# Copy and set entrypoint script
+COPY docker-entrypoint.sh /usr/local/bin/
+RUN chmod +x /usr/local/bin/docker-entrypoint.sh
+# Disable Python bytecode and enable unbuffered output
+ENV PYTHONDONTWRITEBYTECODE=1 \
+  PYTHONUNBUFFERED=1
+EXPOSE 8000
+# Initialize config via entrypoint
+ENTRYPOINT ["docker-entrypoint.sh"]
+# Default command
+CMD ["sh", "-c", "uvicorn main:app --host 0.0.0.0 --port ${PORT:-7860}"]

LICENSE ADDED Viewed

	@@ -0,0 +1,21 @@

+MIT License
+Copyright (c) 2025 Chenyme
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

README.md ADDED Viewed

	@@ -0,0 +1,11 @@

+---
+title: Grok2api Private
+emoji: 📉
+colorFrom: blue
+colorTo: gray
+sdk: docker
+pinned: false
+short_description: Grok2API private deploy
+---
+Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

app/api/admin/manage.py ADDED Viewed

	@@ -0,0 +1,1016 @@

+"""Admin API - token management and system settings"""
+import secrets
+import time
+from typing import Dict, Any, List, Optional
+from datetime import datetime, timedelta
+from pathlib import Path
+from fastapi import APIRouter, HTTPException, Depends, Header, Query
+from fastapi.responses import HTMLResponse
+from pydantic import BaseModel
+from app.core.config import setting
+from app.core.logger import logger
+from app.services.grok.token import token_manager
+from app.services.request_stats import request_stats
+from app.models.grok_models import TokenType
+router = APIRouter(tags=["admin"])
+# Constants
+STATIC_DIR = Path(__file__).parents[2] / "template"
+TEMP_DIR = Path(__file__).parents[3] / "data" / "temp"
+IMAGE_CACHE_DIR = TEMP_DIR / "image"
+VIDEO_CACHE_DIR = TEMP_DIR / "video"
+SESSION_EXPIRE_HOURS = 24
+BYTES_PER_KB = 1024
+BYTES_PER_MB = 1024 * 1024
+# Session storage
+_sessions: Dict[str, datetime] = {}
+# === Request/Response models ===
+class LoginRequest(BaseModel):
+    username: str
+    password: str
+class LoginResponse(BaseModel):
+    success: bool
+    token: Optional[str] = None
+    message: str
+class AddTokensRequest(BaseModel):
+    tokens: List[str]
+    token_type: str
+class DeleteTokensRequest(BaseModel):
+    tokens: List[str]
+    token_type: str
+class TokenInfo(BaseModel):
+    token: str
+    token_type: str
+    created_time: Optional[int] = None
+    remaining_queries: int
+    heavy_remaining_queries: int
+    status: str
+    tags: List[str] = []
+    note: str = ""
+    cooldown_until: Optional[int] = None
+    cooldown_remaining: int = 0
+    last_failure_time: Optional[int] = None
+    last_failure_reason: str = ""
+    limit_reason: str = ""
+class TokenListResponse(BaseModel):
+    success: bool
+    data: List[TokenInfo]
+    total: int
+class UpdateSettingsRequest(BaseModel):
+    global_config: Optional[Dict[str, Any]] = None
+    grok_config: Optional[Dict[str, Any]] = None
+class UpdateTokenTagsRequest(BaseModel):
+    token: str
+    token_type: str
+    tags: List[str]
+class UpdateTokenNoteRequest(BaseModel):
+    token: str
+    token_type: str
+    note: str
+class TestTokenRequest(BaseModel):
+    token: str
+    token_type: str
+# === Helper functions ===
+def validate_token_type(token_type_str: str) -> TokenType:
+    """Validate token type"""
+    if token_type_str not in ["sso", "ssoSuper"]:
+        raise HTTPException(
+            status_code=400,
+            detail={"error": "Invalid token type", "code": "INVALID_TYPE"}
+        )
+    return TokenType.NORMAL if token_type_str == "sso" else TokenType.SUPER
+def parse_created_time(created_time) -> Optional[int]:
+    """Parse created time"""
+    if isinstance(created_time, str):
+        return int(created_time) if created_time else None
+    elif isinstance(created_time, int):
+        return created_time
+    return None
+def _get_cooldown_remaining_ms(token_data: Dict[str, Any], now_ms: Optional[int] = None) -> int:
+    """Get remaining cooldown time (ms)."""
+    cooldown_until = token_data.get("cooldownUntil")
+    if not cooldown_until:
+        return 0
+    try:
+        now = now_ms if now_ms is not None else int(time.time() * 1000)
+        remaining = int(cooldown_until) - now
+        return remaining if remaining > 0 else 0
+    except (TypeError, ValueError):
+        return 0
+def _is_token_in_cooldown(token_data: Dict[str, Any], now_ms: Optional[int] = None) -> bool:
+    """Check if token is in 429 cooldown."""
+    return _get_cooldown_remaining_ms(token_data, now_ms) > 0
+def calculate_token_stats(tokens: Dict[str, Any], token_type: str) -> Dict[str, int]:
+    """Calculate token stats."""
+    total = len(tokens)
+    expired = sum(1 for t in tokens.values() if t.get("status") == "expired")
+    now_ms = int(time.time() * 1000)
+    cooldown = 0
+    exhausted = 0
+    unused = 0
+    active = 0
+    for token_data in tokens.values():
+        if token_data.get("status") == "expired":
+            continue
+        if _is_token_in_cooldown(token_data, now_ms):
+            cooldown += 1
+            continue
+        remaining = token_data.get("remainingQueries", -1)
+        heavy_remaining = token_data.get("heavyremainingQueries", -1)
+        if token_type == "normal":
+            if remaining == -1:
+                unused += 1
+            elif remaining == 0:
+                exhausted += 1
+            else:
+                active += 1
+        else:
+            if remaining == -1 and heavy_remaining == -1:
+                unused += 1
+            elif remaining == 0 or heavy_remaining == 0:
+                exhausted += 1
+            else:
+                active += 1
+    limited = cooldown + exhausted
+    return {
+        "total": total,
+        "unused": unused,
+        "limited": limited,
+        "cooldown": cooldown,
+        "exhausted": exhausted,
+        "expired": expired,
+        "active": active
+    }
+def verify_admin_session(authorization: Optional[str] = Header(None)) -> bool:
+    """Verify admin session"""
+    if not authorization or not authorization.startswith("Bearer "):
+        raise HTTPException(status_code=401, detail={"error": "Unauthorized", "code": "UNAUTHORIZED"})
+    token = authorization[7:]
+    if token not in _sessions:
+        raise HTTPException(status_code=401, detail={"error": "Invalid session", "code": "SESSION_INVALID"})
+    if datetime.now() > _sessions[token]:
+        del _sessions[token]
+        raise HTTPException(status_code=401, detail={"error": "Session expired", "code": "SESSION_EXPIRED"})
+    return True
+def get_token_status(token_data: Dict[str, Any], token_type: str) -> str:
+    """Get token status."""
+    if token_data.get("status") == "expired":
+        return "expired"
+    if _is_token_in_cooldown(token_data):
+        return "cooldown"
+    remaining = token_data.get("remainingQueries", -1)
+    heavy_remaining = token_data.get("heavyremainingQueries", -1)
+    if token_type == "ssoSuper":
+        if remaining == -1 and heavy_remaining == -1:
+            return "unused"
+        if remaining == 0 or heavy_remaining == 0:
+            return "exhausted"
+        return "active"
+    if remaining == -1:
+        return "unused"
+    if remaining == 0:
+        return "exhausted"
+    return "active"
+def _calculate_dir_size(directory: Path) -> int:
+    """Calculate directory size"""
+    total = 0
+    for file_path in directory.iterdir():
+        if file_path.is_file():
+            try:
+                total += file_path.stat().st_size
+            except Exception as e:
+                logger.warning(f"[Admin] Unable to get file size: {file_path.name}, {e}")
+    return total
+def _format_size(size_bytes: int) -> str:
+    """Format file size"""
+    size_mb = size_bytes / BYTES_PER_MB
+    if size_mb < 1:
+        return f"{size_bytes / BYTES_PER_KB:.1f} KB"
+    return f"{size_mb:.1f} MB"
+# === Page routes ===
+@router.get("/login", response_class=HTMLResponse)
+async def login_page():
+    """Login page"""
+    login_html = STATIC_DIR / "login.html"
+    if login_html.exists():
+        return login_html.read_text(encoding="utf-8")
+    raise HTTPException(status_code=404, detail="Login page not found")
+@router.get("/manage", response_class=HTMLResponse)
+async def manage_page():
+    """Admin page"""
+    admin_html = STATIC_DIR / "admin.html"
+    if admin_html.exists():
+        return admin_html.read_text(encoding="utf-8")
+    raise HTTPException(status_code=404, detail="Admin page not found")
+# === API endpoints ===
+@router.post("/api/login", response_model=LoginResponse)
+async def admin_login(request: LoginRequest) -> LoginResponse:
+    """Admin login"""
+    try:
+        logger.debug(f"[Admin] Login attempt: {request.username}")
+        expected_user = setting.global_config.get("admin_username", "")
+        expected_pass = setting.global_config.get("admin_password", "")
+        if request.username != expected_user or request.password != expected_pass:
+            logger.warning(f"[Admin] Login failed: {request.username}")
+            return LoginResponse(success=False, message="Invalid username or password")
+        session_token = secrets.token_urlsafe(32)
+        _sessions[session_token] = datetime.now() + timedelta(hours=SESSION_EXPIRE_HOURS)
+        logger.debug(f"[Admin] Login succeeded: {request.username}")
+        return LoginResponse(success=True, token=session_token, message="Login succeeded")
+    except Exception as e:
+        logger.error(f"[Admin] Login error: {e}")
+        raise HTTPException(status_code=500, detail={"error": f"Login failed: {e}", "code": "LOGIN_ERROR"})
+@router.post("/api/logout")
+async def admin_logout(_: bool = Depends(verify_admin_session), authorization: Optional[str] = Header(None)) -> Dict[str, Any]:
+    """Admin logout"""
+    try:
+        if authorization and authorization.startswith("Bearer "):
+            token = authorization[7:]
+            if token in _sessions:
+                del _sessions[token]
+                logger.debug("[Admin] Logout succeeded")
+                return {"success": True, "message": "Logout succeeded"}
+        logger.warning("[Admin] Logout failed: invalid session")
+        return {"success": False, "message": "Invalid session"}
+    except Exception as e:
+        logger.error(f"[Admin] Logout error: {e}")
+        raise HTTPException(status_code=500, detail={"error": f"Logout failed: {e}", "code": "LOGOUT_ERROR"})
+@router.get("/api/tokens", response_model=TokenListResponse)
+async def list_tokens(_: bool = Depends(verify_admin_session)) -> TokenListResponse:
+    """Get token list"""
+    try:
+        logger.debug("[Admin] Fetching token list")
+        all_tokens = token_manager.get_tokens()
+        token_list: List[TokenInfo] = []
+        now_ms = int(time.time() * 1000)
+        # Normal tokens
+        for token, data in all_tokens.get(TokenType.NORMAL.value, {}).items():
+            cooldown_remaining_ms = _get_cooldown_remaining_ms(data, now_ms)
+            cooldown_until = data.get("cooldownUntil") if cooldown_remaining_ms else None
+            limit_reason = "cooldown" if cooldown_remaining_ms else ""
+            if not limit_reason and data.get("remainingQueries", -1) == 0:
+                limit_reason = "exhausted"
+            token_list.append(TokenInfo(
+                token=token,
+                token_type="sso",
+                created_time=parse_created_time(data.get("createdTime")),
+                remaining_queries=data.get("remainingQueries", -1),
+                heavy_remaining_queries=data.get("heavyremainingQueries", -1),
+                status=get_token_status(data, "sso"),
+                tags=data.get("tags", []),
+                note=data.get("note", ""),
+                cooldown_until=cooldown_until,
+                cooldown_remaining=(cooldown_remaining_ms + 999) // 1000 if cooldown_remaining_ms else 0,
+                last_failure_time=data.get("lastFailureTime") or None,
+                last_failure_reason=data.get("lastFailureReason") or "",
+                limit_reason=limit_reason
+            ))
+        # Super tokens
+        for token, data in all_tokens.get(TokenType.SUPER.value, {}).items():
+            cooldown_remaining_ms = _get_cooldown_remaining_ms(data, now_ms)
+            cooldown_until = data.get("cooldownUntil") if cooldown_remaining_ms else None
+            limit_reason = "cooldown" if cooldown_remaining_ms else ""
+            if not limit_reason and (data.get("remainingQueries", -1) == 0 or data.get("heavyremainingQueries", -1) == 0):
+                limit_reason = "exhausted"
+            token_list.append(TokenInfo(
+                token=token,
+                token_type="ssoSuper",
+                created_time=parse_created_time(data.get("createdTime")),
+                remaining_queries=data.get("remainingQueries", -1),
+                heavy_remaining_queries=data.get("heavyremainingQueries", -1),
+                status=get_token_status(data, "ssoSuper"),
+                tags=data.get("tags", []),
+                note=data.get("note", ""),
+                cooldown_until=cooldown_until,
+                cooldown_remaining=(cooldown_remaining_ms + 999) // 1000 if cooldown_remaining_ms else 0,
+                last_failure_time=data.get("lastFailureTime") or None,
+                last_failure_reason=data.get("lastFailureReason") or "",
+                limit_reason=limit_reason
+            ))
+        logger.debug(f"[Admin] Token list retrieved: {len(token_list)} items")
+        return TokenListResponse(success=True, data=token_list, total=len(token_list))
+    except Exception as e:
+        logger.error(f"[Admin] Token list error: {e}")
+        raise HTTPException(status_code=500, detail={"error": f"Fetch failed: {e}", "code": "LIST_ERROR"})
+@router.post("/api/tokens/add")
+async def add_tokens(request: AddTokensRequest, _: bool = Depends(verify_admin_session)) -> Dict[str, Any]:
+    """Batch add tokens"""
+    try:
+        logger.debug(f"[Admin] Adding tokens: {request.token_type}, {len(request.tokens)} items")
+        token_type = validate_token_type(request.token_type)
+        await token_manager.add_token(request.tokens, token_type)
+        logger.debug(f"[Admin] Tokens added: {len(request.tokens)} items")
+        return {"success": True, "message": f"Successfully added {len(request.tokens)} tokens", "count": len(request.tokens)}
+    except HTTPException:
+        raise
+    except Exception as e:
+        logger.error(f"[Admin] Token add error: {e}")
+        raise HTTPException(status_code=500, detail={"error": f"Add failed: {e}", "code": "ADD_ERROR"})
+@router.post("/api/tokens/delete")
+async def delete_tokens(request: DeleteTokensRequest, _: bool = Depends(verify_admin_session)) -> Dict[str, Any]:
+    """Batch delete tokens"""
+    try:
+        logger.debug(f"[Admin] Deleting tokens: {request.token_type}, {len(request.tokens)} items")
+        token_type = validate_token_type(request.token_type)
+        await token_manager.delete_token(request.tokens, token_type)
+        logger.debug(f"[Admin] Tokens deleted: {len(request.tokens)} items")
+        return {"success": True, "message": f"Successfully deleted {len(request.tokens)} tokens", "count": len(request.tokens)}
+    except HTTPException:
+        raise
+    except Exception as e:
+        logger.error(f"[Admin] Token delete error: {e}")
+        raise HTTPException(status_code=500, detail={"error": f"Delete failed: {e}", "code": "DELETE_ERROR"})
+@router.get("/api/settings")
+async def get_settings(_: bool = Depends(verify_admin_session)) -> Dict[str, Any]:
+    """Get settings"""
+    try:
+        logger.debug("[Admin] Fetching settings")
+        return {"success": True, "data": {"global": setting.global_config, "grok": setting.grok_config}}
+    except Exception as e:
+        logger.error(f"[Admin] Failed to fetch settings: {e}")
+        raise HTTPException(status_code=500, detail={"error": f"Fetch failed: {e}", "code": "GET_SETTINGS_ERROR"})
+@router.post("/api/settings")
+async def update_settings(request: UpdateSettingsRequest, _: bool = Depends(verify_admin_session)) -> Dict[str, Any]:
+    """Update settings"""
+    try:
+        logger.debug("[Admin] Updating settings")
+        await setting.save(global_config=request.global_config, grok_config=request.grok_config)
+        logger.debug("[Admin] Settings updated")
+        return {"success": True, "message": "Settings updated"}
+    except Exception as e:
+        logger.error(f"[Admin] Failed to update settings: {e}")
+        raise HTTPException(status_code=500, detail={"error": f"Update failed: {e}", "code": "UPDATE_SETTINGS_ERROR"})
+@router.get("/api/cache/size")
+async def get_cache_size(_: bool = Depends(verify_admin_session)) -> Dict[str, Any]:
+    """Get cache size"""
+    try:
+        logger.debug("[Admin] Fetching cache size")
+        image_size = _calculate_dir_size(IMAGE_CACHE_DIR) if IMAGE_CACHE_DIR.exists() else 0
+        video_size = _calculate_dir_size(VIDEO_CACHE_DIR) if VIDEO_CACHE_DIR.exists() else 0
+        total_size = image_size + video_size
+        logger.debug(f"[Admin] Cache size: images {_format_size(image_size)}, videos {_format_size(video_size)}")
+        return {
+            "success": True,
+            "data": {
+                "image_size": _format_size(image_size),
+                "video_size": _format_size(video_size),
+                "total_size": _format_size(total_size),
+                "image_size_bytes": image_size,
+                "video_size_bytes": video_size,
+                "total_size_bytes": total_size
+            }
+        }
+    except Exception as e:
+        logger.error(f"[Admin] Cache size error: {e}")
+        raise HTTPException(status_code=500, detail={"error": f"Fetch failed: {e}", "code": "CACHE_SIZE_ERROR"})
+@router.get("/api/cache/list")
+async def list_cache_files(
+    cache_type: str = Query("image", alias="type"),
+    limit: int = 50,
+    offset: int = 0,
+    _: bool = Depends(verify_admin_session)
+) -> Dict[str, Any]:
+    """List cached files for admin preview."""
+    try:
+        cache_type = cache_type.lower()
+        if cache_type not in ("image", "video"):
+            raise HTTPException(status_code=400, detail={"error": "Invalid cache type", "code": "INVALID_CACHE_TYPE"})
+        if limit < 1:
+            limit = 1
+        if limit > 200:
+            limit = 200
+        if offset < 0:
+            offset = 0
+        cache_dir = IMAGE_CACHE_DIR if cache_type == "image" else VIDEO_CACHE_DIR
+        if not cache_dir.exists():
+            return {"success": True, "data": {"total": 0, "items": [], "offset": offset, "limit": limit, "has_more": False}}
+        files = []
+        for file_path in cache_dir.iterdir():
+            if not file_path.is_file():
+                continue
+            try:
+                stat = file_path.stat()
+            except Exception as e:
+                logger.warning(f"[Admin] Skip cache file: {file_path.name}, {e}")
+                continue
+            files.append((file_path, stat.st_mtime, stat.st_size))
+        files.sort(key=lambda item: item[1], reverse=True)
+        total = len(files)
+        sliced = files[offset:offset + limit]
+        items = [
+            {
+                "name": file_path.name,
+                "size": _format_size(size),
+                "size_bytes": size,
+                "mtime": int(mtime * 1000),
+                "url": f"/images/{file_path.name}",
+                "type": cache_type
+            }
+            for file_path, mtime, size in sliced
+        ]
+        return {
+            "success": True,
+            "data": {
+                "total": total,
+                "items": items,
+                "offset": offset,
+                "limit": limit,
+                "has_more": offset + limit < total
+            }
+        }
+    except HTTPException:
+        raise
+    except Exception as e:
+        logger.error(f"[Admin] Cache list error: {e}")
+        raise HTTPException(status_code=500, detail={"error": f"Fetch failed: {e}", "code": "CACHE_LIST_ERROR"})
+@router.post("/api/cache/clear")
+async def clear_cache(_: bool = Depends(verify_admin_session)) -> Dict[str, Any]:
+    """Clear all cache"""
+    try:
+        logger.debug("[Admin] Clearing cache")
+        image_count = 0
+        video_count = 0
+        # Clear images
+        if IMAGE_CACHE_DIR.exists():
+            for file_path in IMAGE_CACHE_DIR.iterdir():
+                if file_path.is_file():
+                    try:
+                        file_path.unlink()
+                        image_count += 1
+                    except Exception as e:
+                        logger.error(f"[Admin] Delete failed: {file_path.name}, {e}")
+        # Clear videos
+        if VIDEO_CACHE_DIR.exists():
+            for file_path in VIDEO_CACHE_DIR.iterdir():
+                if file_path.is_file():
+                    try:
+                        file_path.unlink()
+                        video_count += 1
+                    except Exception as e:
+                        logger.error(f"[Admin] Delete failed: {file_path.name}, {e}")
+        total = image_count + video_count
+        logger.debug(f"[Admin] Cache cleared: images {image_count}, videos {video_count}")
+        return {
+            "success": True,
+            "message": f"Cache cleared, deleted {image_count} images and {video_count} videos, {total} files total",
+            "data": {"deleted_count": total, "image_count": image_count, "video_count": video_count}
+        }
+    except Exception as e:
+        logger.error(f"[Admin] Cache clear error: {e}")
+        raise HTTPException(status_code=500, detail={"error": f"Clear failed: {e}", "code": "CACHE_CLEAR_ERROR"})
+@router.post("/api/cache/clear/images")
+async def clear_image_cache(_: bool = Depends(verify_admin_session)) -> Dict[str, Any]:
+    """Clear image cache"""
+    try:
+        logger.debug("[Admin] Clearing image cache")
+        count = 0
+        if IMAGE_CACHE_DIR.exists():
+            for file_path in IMAGE_CACHE_DIR.iterdir():
+                if file_path.is_file():
+                    try:
+                        file_path.unlink()
+                        count += 1
+                    except Exception as e:
+                        logger.error(f"[Admin] Delete failed: {file_path.name}, {e}")
+        logger.debug(f"[Admin] Image cache cleared: {count} items")
+        return {"success": True, "message": f"Image cache cleared, deleted {count} files", "data": {"deleted_count": count, "type": "images"}}
+    except Exception as e:
+        logger.error(f"[Admin] Image cache clear error: {e}")
+        raise HTTPException(status_code=500, detail={"error": f"Clear failed: {e}", "code": "IMAGE_CACHE_CLEAR_ERROR"})
+@router.post("/api/cache/clear/videos")
+async def clear_video_cache(_: bool = Depends(verify_admin_session)) -> Dict[str, Any]:
+    """Clear video cache"""
+    try:
+        logger.debug("[Admin] Clearing video cache")
+        count = 0
+        if VIDEO_CACHE_DIR.exists():
+            for file_path in VIDEO_CACHE_DIR.iterdir():
+                if file_path.is_file():
+                    try:
+                        file_path.unlink()
+                        count += 1
+                    except Exception as e:
+                        logger.error(f"[Admin] Delete failed: {file_path.name}, {e}")
+        logger.debug(f"[Admin] Video cache cleared: {count} items")
+        return {"success": True, "message": f"Video cache cleared, deleted {count} files", "data": {"deleted_count": count, "type": "videos"}}
+    except Exception as e:
+        logger.error(f"[Admin] Video cache clear error: {e}")
+        raise HTTPException(status_code=500, detail={"error": f"Clear failed: {e}", "code": "VIDEO_CACHE_CLEAR_ERROR"})
+@router.get("/api/stats")
+async def get_stats(_: bool = Depends(verify_admin_session)) -> Dict[str, Any]:
+    """Get stats"""
+    try:
+        logger.debug("[Admin] Fetching stats")
+        all_tokens = token_manager.get_tokens()
+        normal_stats = calculate_token_stats(all_tokens.get(TokenType.NORMAL.value, {}), "normal")
+        super_stats = calculate_token_stats(all_tokens.get(TokenType.SUPER.value, {}), "super")
+        total = normal_stats["total"] + super_stats["total"]
+        logger.debug(f"[Admin] Stats fetched - Normal tokens: {normal_stats['total']}, Super tokens: {super_stats['total']}, Total: {total}")
+        return {"success": True, "data": {"normal": normal_stats, "super": super_stats, "total": total}}
+    except Exception as e:
+        logger.error(f"[Admin] Stats error: {e}")
+        raise HTTPException(status_code=500, detail={"error": f"Fetch failed: {e}", "code": "STATS_ERROR"})
+@router.get("/api/storage/mode")
+async def get_storage_mode(_: bool = Depends(verify_admin_session)) -> Dict[str, Any]:
+    """Get storage mode"""
+    try:
+        logger.debug("[Admin] Fetching storage mode")
+        import os
+        mode = os.getenv("STORAGE_MODE", "file").upper()
+        return {"success": True, "data": {"mode": mode}}
+    except Exception as e:
+        logger.error(f"[Admin] Storage mode error: {e}")
+        raise HTTPException(status_code=500, detail={"error": f"Fetch failed: {e}", "code": "STORAGE_MODE_ERROR"})
+@router.post("/api/tokens/tags")
+async def update_token_tags(request: UpdateTokenTagsRequest, _: bool = Depends(verify_admin_session)) -> Dict[str, Any]:
+    """Update token tags"""
+    try:
+        logger.debug(f"[Admin] Updating token tags: {request.token[:10]}..., {request.tags}")
+        token_type = validate_token_type(request.token_type)
+        await token_manager.update_token_tags(request.token, token_type, request.tags)
+        logger.debug(f"[Admin] Token tags updated: {request.token[:10]}...")
+        return {"success": True, "message": "Tags updated successfully", "tags": request.tags}
+    except HTTPException:
+        raise
+    except Exception as e:
+        logger.error(f"[Admin] Token tag update error: {e}")
+        raise HTTPException(status_code=500, detail={"error": f"Update failed: {e}", "code": "UPDATE_TAGS_ERROR"})
+@router.get("/api/tokens/tags/all")
+async def get_all_tags(_: bool = Depends(verify_admin_session)) -> Dict[str, Any]:
+    """Get all tags"""
+    try:
+        logger.debug("[Admin] Fetching all tags")
+        all_tokens = token_manager.get_tokens()
+        tags_set = set()
+        for token_type_data in all_tokens.values():
+            for token_data in token_type_data.values():
+                tags = token_data.get("tags", [])
+                if isinstance(tags, list):
+                    tags_set.update(tags)
+        tags_list = sorted(list(tags_set))
+        logger.debug(f"[Admin] Tags fetched: {len(tags_list)} items")
+        return {"success": True, "data": tags_list}
+    except Exception as e:
+        logger.error(f"[Admin] Tag fetch error: {e}")
+        raise HTTPException(status_code=500, detail={"error": f"Fetch failed: {e}", "code": "GET_TAGS_ERROR"})
+@router.post("/api/tokens/note")
+async def update_token_note(request: UpdateTokenNoteRequest, _: bool = Depends(verify_admin_session)) -> Dict[str, Any]:
+    """Update token note"""
+    try:
+        logger.debug(f"[Admin] Updating token note: {request.token[:10]}...")
+        token_type = validate_token_type(request.token_type)
+        await token_manager.update_token_note(request.token, token_type, request.note)
+        logger.debug(f"[Admin] Token note updated: {request.token[:10]}...")
+        return {"success": True, "message": "Note updated successfully", "note": request.note}
+    except HTTPException:
+        raise
+    except Exception as e:
+        logger.error(f"[Admin] Token note update error: {e}")
+        raise HTTPException(status_code=500, detail={"error": f"Update failed: {e}", "code": "UPDATE_NOTE_ERROR"})
+@router.post("/api/tokens/test")
+async def test_token(request: TestTokenRequest, _: bool = Depends(verify_admin_session)) -> Dict[str, Any]:
+    """Test token availability"""
+    try:
+        logger.debug(f"[Admin] Testing token: {request.token[:10]}...")
+        token_type = validate_token_type(request.token_type)
+        auth_token = f"sso-rw={request.token};sso={request.token}"
+        result = await token_manager.check_limits(auth_token, "grok-4-fast")
+        if result:
+            logger.debug(f"[Admin] Token test succeeded: {request.token[:10]}...")
+            return {
+                "success": True,
+                "message": "Token valid",
+                "data": {
+                    "valid": True,
+                    "remaining_queries": result.get("remainingTokens", -1),
+                    "limit": result.get("limit", -1)
+                }
+            }
+        else:
+            logger.warning(f"[Admin] Token test failed: {request.token[:10]}...")
+            all_tokens = token_manager.get_tokens()
+            token_data = all_tokens.get(token_type.value, {}).get(request.token)
+            if token_data:
+                if token_data.get("status") == "expired":
+                    return {"success": False, "message": "Token expired", "data": {"valid": False, "error_type": "expired", "error_code": 401}}
+                cooldown_remaining_ms = _get_cooldown_remaining_ms(token_data)
+                if cooldown_remaining_ms:
+                    return {
+                        "success": False,
+                        "message": "Token is in cooldown",
+                        "data": {
+                            "valid": False,
+                            "error_type": "cooldown",
+                            "error_code": 429,
+                            "cooldown_remaining": (cooldown_remaining_ms + 999) // 1000
+                        }
+                    }
+                exhausted = token_data.get("remainingQueries") == 0
+                if token_type == TokenType.SUPER and token_data.get("heavyremainingQueries") == 0:
+                    exhausted = True
+                if exhausted:
+                    return {
+                        "success": False,
+                        "message": "Token quota exhausted",
+                        "data": {"valid": False, "error_type": "exhausted", "error_code": "quota_exhausted"}
+                    }
+                else:
+                    return {"success": False, "message": "Server blocked or network error", "data": {"valid": False, "error_type": "blocked", "error_code": 403}}
+            else:
+                return {"success": False, "message": "Token data error", "data": {"valid": False, "error_type": "unknown", "error_code": "data_error"}}
+    except HTTPException:
+        raise
+    except Exception as e:
+        logger.error(f"[Admin] Token test error: {e}")
+        raise HTTPException(status_code=500, detail={"error": f"Test failed: {e}", "code": "TEST_TOKEN_ERROR"})
+@router.post("/api/tokens/refresh-all")
+async def refresh_all_tokens(_: bool = Depends(verify_admin_session)) -> Dict[str, Any]:
+    """Refresh remaining counts for all tokens (background)"""
+    import asyncio
+    try:
+        # Check if already refreshing
+        progress = token_manager.get_refresh_progress()
+        if progress.get("running"):
+            return {
+                "success": False,
+                "message": "Refresh task already running",
+                "data": progress
+            }
+        # Start background refresh task
+        logger.info("[Admin] Starting background refresh task")
+        asyncio.create_task(token_manager.refresh_all_limits())
+        # Return immediately so the UI can poll progress
+        return {
+            "success": True,
+            "message": "Refresh task started",
+            "data": {"started": True}
+        }
+    except Exception as e:
+        logger.error(f"[Admin] Token refresh error: {e}")
+        raise HTTPException(status_code=500, detail={"error": f"Refresh failed: {e}", "code": "REFRESH_ALL_ERROR"})
+@router.get("/api/tokens/refresh-progress")
+async def get_refresh_progress(_: bool = Depends(verify_admin_session)) -> Dict[str, Any]:
+    """Get token refresh progress"""
+    try:
+        progress = token_manager.get_refresh_progress()
+        return {"success": True, "data": progress}
+    except Exception as e:
+        logger.error(f"[Admin] Refresh progress error: {e}")
+        raise HTTPException(status_code=500, detail={"error": f"Failed to get progress: {e}"})
+@router.get("/api/request-stats")
+async def get_request_stats(_: bool = Depends(verify_admin_session)) -> Dict[str, Any]:
+    """Get request stats"""
+    try:
+        stats = request_stats.get_stats(hours=24, days=7)
+        return {"success": True, "data": stats}
+    except Exception as e:
+        logger.error(f"[Admin] Request stats error: {e}")
+        raise HTTPException(status_code=500, detail={"error": f"Failed to get stats: {e}"})
+# === API Key Management ===
+class AddKeyRequest(BaseModel):
+    name: str
+class UpdateKeyNameRequest(BaseModel):
+    key: str
+    name: str
+class UpdateKeyStatusRequest(BaseModel):
+    key: str
+    is_active: bool
+class BatchAddKeyRequest(BaseModel):
+    name_prefix: str
+    count: int
+class BatchDeleteKeyRequest(BaseModel):
+    keys: List[str]
+class BatchUpdateKeyStatusRequest(BaseModel):
+    keys: List[str]
+    is_active: bool
+@router.get("/api/keys")
+async def list_keys(_: bool = Depends(verify_admin_session)) -> Dict[str, Any]:
+    """Get key list"""
+    try:
+        from app.services.api_keys import api_key_manager
+        if not api_key_manager._loaded:
+             await api_key_manager.init()
+        keys = api_key_manager.get_all_keys()
+        # Include default key (optional)
+        global_key = setting.grok_config.get("api_key")
+        result_keys = []
+        # Convert and mask
+        for k in keys:
+            result_keys.append({
+                **k,
+                "display_key": f"{k['key'][:6]}...{k['key'][-4:]}"
+            })
+        return {
+            "success": True,
+            "data": result_keys,
+            "global_key_set": bool(global_key)
+        }
+    except Exception as e:
+        logger.error(f"[Admin] Failed to get key list: {e}")
+        raise HTTPException(status_code=500, detail={"error": f"Fetch failed: {e}"})
+@router.post("/api/keys/add")
+async def add_key(request: AddKeyRequest, _: bool = Depends(verify_admin_session)) -> Dict[str, Any]:
+    """Add key"""
+    try:
+        from app.services.api_keys import api_key_manager
+        new_key = await api_key_manager.add_key(request.name)
+        return {"success": True, "data": new_key, "message": "Key created successfully"}
+    except Exception as e:
+        logger.error(f"[Admin] Failed to add key: {e}")
+        raise HTTPException(status_code=500, detail={"error": f"Add failed: {e}"})
+@router.post("/api/keys/delete")
+async def delete_key(request: Dict[str, str], _: bool = Depends(verify_admin_session)) -> Dict[str, Any]:
+    """Delete key"""
+    try:
+        from app.services.api_keys import api_key_manager
+        key = request.get("key")
+        if not key:
+             raise ValueError("Key cannot be empty")
+        if await api_key_manager.delete_key(key):
+            return {"success": True, "message": "Key deleted successfully"}
+        return {"success": False, "message": "Key not found"}
+    except Exception as e:
+        logger.error(f"[Admin] Failed to delete key: {e}")
+        raise HTTPException(status_code=500, detail={"error": f"Delete failed: {e}"})
+@router.post("/api/keys/status")
+async def update_key_status(request: UpdateKeyStatusRequest, _: bool = Depends(verify_admin_session)) -> Dict[str, Any]:
+    """Update key status"""
+    try:
+        from app.services.api_keys import api_key_manager
+        if await api_key_manager.update_key_status(request.key, request.is_active):
+            return {"success": True, "message": "Status updated successfully"}
+        return {"success": False, "message": "Key not found"}
+    except Exception as e:
+        logger.error(f"[Admin] Failed to update key status: {e}")
+        raise HTTPException(status_code=500, detail={"error": f"Update failed: {e}"})
+@router.post("/api/keys/name")
+async def update_key_name(request: UpdateKeyNameRequest, _: bool = Depends(verify_admin_session)) -> Dict[str, Any]:
+    """Update key note"""
+    try:
+        from app.services.api_keys import api_key_manager
+        if await api_key_manager.update_key_name(request.key, request.name):
+            return {"success": True, "message": "Note updated successfully"}
+        return {"success": False, "message": "Key not found"}
+    except Exception as e:
+        logger.error(f"[Admin] Failed to update key note: {e}")
+        raise HTTPException(status_code=500, detail={"error": f"Update failed: {e}"})
+@router.post("/api/keys/batch-add")
+async def batch_add_keys(request: BatchAddKeyRequest, _: bool = Depends(verify_admin_session)) -> Dict[str, Any]:
+    """Batch add keys"""
+    try:
+        from app.services.api_keys import api_key_manager
+        new_keys = await api_key_manager.batch_add_keys(request.name_prefix, request.count)
+        return {"success": True, "data": new_keys, "message": f"Successfully created {len(new_keys)} keys"}
+    except Exception as e:
+        logger.error(f"[Admin] Batch add keys failed: {e}")
+        raise HTTPException(status_code=500, detail={"error": f"Batch add failed: {e}"})
+@router.post("/api/keys/batch-delete")
+async def batch_delete_keys(request: BatchDeleteKeyRequest, _: bool = Depends(verify_admin_session)) -> Dict[str, Any]:
+    """Batch delete keys"""
+    try:
+        from app.services.api_keys import api_key_manager
+        deleted_count = await api_key_manager.batch_delete_keys(request.keys)
+        return {"success": True, "message": f"Successfully deleted {deleted_count} keys"}
+    except Exception as e:
+        logger.error(f"[Admin] Batch delete keys failed: {e}")
+        raise HTTPException(status_code=500, detail={"error": f"Batch delete failed: {e}"})
+@router.post("/api/keys/batch-status")
+async def batch_update_key_status(request: BatchUpdateKeyStatusRequest, _: bool = Depends(verify_admin_session)) -> Dict[str, Any]:
+    """Batch update key status"""
+    try:
+        from app.services.api_keys import api_key_manager
+        updated_count = await api_key_manager.batch_update_keys_status(request.keys, request.is_active)
+        return {"success": True, "message": f"Successfully updated {updated_count} keys"}
+    except Exception as e:
+        logger.error(f"[Admin] Batch update key status failed: {e}")
+        raise HTTPException(status_code=500, detail={"error": f"Batch update failed: {e}"})
+# === Audit logs ===
+@router.get("/api/logs")
+async def get_logs(limit: int = 1000, _: bool = Depends(verify_admin_session)) -> Dict[str, Any]:
+    """Get request logs"""
+    try:
+        from app.services.request_logger import request_logger
+        logs = await request_logger.get_logs(limit)
+        return {"success": True, "data": logs}
+    except Exception as e:
+        logger.error(f"[Admin] Failed to get logs: {e}")
+        raise HTTPException(status_code=500, detail={"error": f"Fetch failed: {e}"})
+@router.post("/api/logs/clear")
+async def clear_logs(_: bool = Depends(verify_admin_session)) -> Dict[str, Any]:
+    """Clear logs"""
+    try:
+        from app.services.request_logger import request_logger
+        await request_logger.clear_logs()
+        return {"success": True, "message": "Logs cleared"}
+    except Exception as e:
+        logger.error(f"[Admin] Failed to clear logs: {e}")
+        raise HTTPException(status_code=500, detail={"error": f"Clear failed: {e}"})

app/api/v1/chat.py ADDED Viewed

	@@ -0,0 +1,107 @@

+"""Chat API routes - OpenAI-compatible chat endpoints"""
+import time
+from fastapi import APIRouter, Depends, HTTPException, Request
+from typing import Optional, Dict, Any
+from fastapi.responses import StreamingResponse
+from app.core.auth import auth_manager
+from app.core.exception import GrokApiException
+from app.core.logger import logger
+from app.services.grok.client import GrokClient
+from app.models.openai_schema import OpenAIChatRequest
+from app.services.request_stats import request_stats
+from app.services.request_logger import request_logger
+router = APIRouter(prefix="/chat", tags=["chat"])
+@router.post("/completions", response_model=None)
+async def chat_completions(
+    request: Request,
+    body: OpenAIChatRequest,
+    auth_info: Dict[str, Any] = Depends(auth_manager.verify)
+):
+    """Create chat completions (streaming and non-streaming)"""
+    start_time = time.time()
+    model = body.model
+    ip = request.client.host
+    key_name = auth_info.get("name", "Unknown")
+    status_code = 200
+    error_msg = ""
+    try:
+        logger.info(f"[Chat] Received chat request: {key_name} @ {ip}")
+        # Call Grok client
+        result = await GrokClient.openai_to_grok(body.model_dump())
+        # Record success stats
+        await request_stats.record_request(model, success=True)
+        # Streaming response
+        if body.stream:
+            async def stream_wrapper():
+                try:
+                    async for chunk in result:
+                        yield chunk
+                finally:
+                    # Log when streaming ends
+                    duration = time.time() - start_time
+                    await request_logger.add_log(ip, model, duration, 200, key_name)
+            return StreamingResponse(
+                content=stream_wrapper(),
+                media_type="text/event-stream",
+                headers={
+                    "Cache-Control": "no-cache",
+                    "Connection": "keep-alive",
+                    "X-Accel-Buffering": "no"
+                }
+            )
+        # Non-streaming response - log it
+        duration = time.time() - start_time
+        await request_logger.add_log(ip, model, duration, 200, key_name)
+        return result
+    except GrokApiException as e:
+        status_code = e.status_code or 500
+        error_msg = str(e)
+        await request_stats.record_request(model, success=False)
+        logger.error(f"[Chat] Grok API error: {e} - Details: {e.details}")
+        duration = time.time() - start_time
+        await request_logger.add_log(ip, model, duration, status_code, key_name, error=error_msg)
+        raise HTTPException(
+            status_code=status_code,
+            detail={
+                "error": {
+                    "message": error_msg,
+                    "type": e.error_code or "grok_api_error",
+                    "code": e.error_code or "unknown"
+                }
+            }
+        )
+    except Exception as e:
+        status_code = 500
+        error_msg = str(e)
+        await request_stats.record_request(model, success=False)
+        logger.error(f"[Chat] Processing failed: {e}")
+        duration = time.time() - start_time
+        await request_logger.add_log(ip, model, duration, status_code, key_name, error=error_msg)
+        raise HTTPException(
+            status_code=500,
+            detail={
+                "error": {
+                    "message": "Internal server error",
+                    "type": "internal_error",
+                    "code": "internal_server_error"
+                }
+            }
+        )

app/api/v1/images.py ADDED Viewed

	@@ -0,0 +1,53 @@

+"""Media API - serve cached images and video files"""
+from fastapi import APIRouter, HTTPException
+from fastapi.responses import FileResponse
+from app.core.logger import logger
+from app.services.grok.cache import image_cache_service, video_cache_service
+router = APIRouter()
+@router.get("/images/{img_path:path}")
+async def get_image(img_path: str):
+    """Get cached image or video
+    Args:
+        img_path: File path (format: users-xxx-generated-xxx-image.jpg)
+    """
+    try:
+        # Transform path (hyphens -> slashes)
+        original_path = "/" + img_path.replace('-', '/')
+        # Detect type
+        is_video = any(original_path.lower().endswith(ext) for ext in ['.mp4', '.webm', '.mov', '.avi'])
+        if is_video:
+            cache_path = video_cache_service.get_cached(original_path)
+            media_type = "video/mp4"
+        else:
+            cache_path = image_cache_service.get_cached(original_path)
+            media_type = "image/jpeg"
+        if cache_path and cache_path.exists():
+            logger.debug(f"[MediaAPI] Returning cached file: {cache_path}")
+            return FileResponse(
+                path=str(cache_path),
+                media_type=media_type,
+                headers={
+                    "Cache-Control": "public, max-age=86400",
+                    "Access-Control-Allow-Origin": "*"
+                }
+            )
+        # File not found
+        logger.warning(f"[MediaAPI] Not found: {original_path}")
+        raise HTTPException(status_code=404, detail="File not found")
+    except HTTPException:
+        raise
+    except Exception as e:
+        logger.error(f"[MediaAPI] Failed to fetch: {e}")
+        raise HTTPException(status_code=500, detail=str(e))

app/api/v1/models.py ADDED Viewed

	@@ -0,0 +1,114 @@

+"""Models API - OpenAI-compatible model list endpoints"""
+import time
+from typing import Dict, Any, List, Optional
+from fastapi import APIRouter, HTTPException, Depends
+from app.models.grok_models import Models
+from app.core.auth import auth_manager
+from app.core.logger import logger
+router = APIRouter(tags=["models"])
+@router.get("/models")
+async def list_models(_: Optional[str] = Depends(auth_manager.verify)) -> Dict[str, Any]:
+    """Get available model list"""
+    try:
+        logger.debug("[Models] Requesting model list")
+        timestamp = int(time.time())
+        model_data: List[Dict[str, Any]] = []
+        for model in Models:
+            model_id = model.value
+            config = Models.get_model_info(model_id)
+            model_info = {
+                "id": model_id,
+                "object": "model",
+                "created": timestamp,
+                "owned_by": "x-ai",
+                "display_name": config.get("display_name", model_id),
+                "description": config.get("description", ""),
+                "raw_model_path": config.get("raw_model_path", f"xai/{model_id}"),
+                "default_temperature": config.get("default_temperature", 1.0),
+                "default_max_output_tokens": config.get("default_max_output_tokens", 8192),
+                "supported_max_output_tokens": config.get("supported_max_output_tokens", 131072),
+                "default_top_p": config.get("default_top_p", 0.95)
+            }
+            model_data.append(model_info)
+        logger.debug(f"[Models] Returned {len(model_data)} models")
+        return {"object": "list", "data": model_data}
+    except Exception as e:
+        logger.error(f"[Models] Failed to get list: {e}")
+        raise HTTPException(
+            status_code=500,
+            detail={
+                "error": {
+                    "message": f"Failed to retrieve models: {e}",
+                    "type": "internal_error",
+                    "code": "model_list_error"
+                }
+            }
+        )
+@router.get("/models/{model_id}")
+async def get_model(model_id: str, _: Optional[str] = Depends(auth_manager.verify)) -> Dict[str, Any]:
+    """Get specific model info"""
+    try:
+        logger.debug(f"[Models] Requesting model: {model_id}")
+        # Validate model
+        if not Models.is_valid_model(model_id):
+            logger.warning(f"[Models] Model not found: {model_id}")
+            raise HTTPException(
+                status_code=404,
+                detail={
+                    "error": {
+                        "message": f"Model '{model_id}' not found",
+                        "type": "invalid_request_error",
+                        "code": "model_not_found"
+                    }
+                }
+            )
+        timestamp = int(time.time())
+        config = Models.get_model_info(model_id)
+        model_info = {
+            "id": model_id,
+            "object": "model",
+            "created": timestamp,
+            "owned_by": "x-ai",
+            "display_name": config.get("display_name", model_id),
+            "description": config.get("description", ""),
+            "raw_model_path": config.get("raw_model_path", f"xai/{model_id}"),
+            "default_temperature": config.get("default_temperature", 1.0),
+            "default_max_output_tokens": config.get("default_max_output_tokens", 8192),
+            "supported_max_output_tokens": config.get("supported_max_output_tokens", 131072),
+            "default_top_p": config.get("default_top_p", 0.95)
+        }
+        logger.debug(f"[Models] Returned model: {model_id}")
+        return model_info
+    except HTTPException:
+        raise
+    except Exception as e:
+        logger.error(f"[Models] Failed to get model: {e}")
+        raise HTTPException(
+            status_code=500,
+            detail={
+                "error": {
+                    "message": f"Failed to retrieve model: {e}",
+                    "type": "internal_error",
+                    "code": "model_retrieve_error"
+                }
+            }
+        )

app/core/auth.py ADDED Viewed

	@@ -0,0 +1,66 @@

+"""Authentication module - API token verification"""
+from typing import Optional, Dict
+from fastapi import Depends, HTTPException
+from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials
+from app.core.config import setting
+from app.core.logger import logger
+from app.services.api_keys import api_key_manager
+# Bearer security scheme
+security = HTTPBearer(auto_error=False)
+def _build_error(message: str, code: str = "invalid_token") -> dict:
+    """Build authentication error"""
+    return {
+        "error": {
+            "message": message,
+            "type": "authentication_error",
+            "code": code
+        }
+    }
+class AuthManager:
+    """Authentication manager - verify API tokens"""
+    @staticmethod
+    async def verify(credentials: Optional[HTTPAuthorizationCredentials] = Depends(security)) -> Dict:
+        """Verify token and return key info"""
+        api_key = setting.grok_config.get("api_key")
+        # Initialization check
+        if not hasattr(api_key_manager, '_keys'):
+           await api_key_manager.init()
+        # Check token
+        if not credentials:
+            # Skip when no global key and no multi-keys (development mode)
+            if not api_key and not api_key_manager.get_all_keys():
+                logger.debug("[Auth] API_KEY not set, skipping verification")
+                return {"key": None, "name": "Anonymous"}
+            raise HTTPException(
+                status_code=401,
+                detail=_build_error("Missing authentication token", "missing_token")
+            )
+        token = credentials.credentials
+        # Verify token (supports multiple keys)
+        key_info = api_key_manager.validate_key(token)
+        if key_info:
+            return key_info
+        raise HTTPException(
+            status_code=401,
+            detail=_build_error(f"Invalid token, length: {len(token)}", "invalid_token")
+        )
+# Global instance
+auth_manager = AuthManager()

app/core/config.py ADDED Viewed

	@@ -0,0 +1,243 @@

+"""Config manager - manage app config read/write"""
+import os
+import toml
+from pathlib import Path
+from typing import Dict, Any, Optional, Literal
+# Default config
+DEFAULT_GROK = {
+    "api_key": "",
+    "proxy_url": "",
+    "proxy_pool_url": "",
+    "proxy_pool_interval": 300,
+    "cache_proxy_url": "",
+    "cf_clearance": "",
+    "x_statsig_id": "ZTpUeXBlRXJyb3I6IENhbm5vdCByZWFkIHByb3BlcnRpZXMgb2YgdW5kZWZpbmVkIChyZWFkaW5nICdjaGlsZE5vZGVzJyk=",
+    "dynamic_statsig": False,
+    "filtered_tags": "xaiartifact,xai:tool_usage_card",
+    "show_thinking": True,
+    "temporary": False,
+    "max_upload_concurrency": 20,
+    "max_request_concurrency": 100,
+    "stream_first_response_timeout": 30,
+    "stream_chunk_timeout": 120,
+    "stream_total_timeout": 600,
+    "retry_status_codes": [401, 429],  # Retryable HTTP status codes
+}
+DEFAULT_GLOBAL = {
+    "base_url": "https://tejmar-grok2api-private.hf.space",
+    "log_level": "INFO",
+    "image_mode": "url",
+    "admin_password": "!!tejmar",
+    "admin_username": "admin",
+    "image_cache_max_size_mb": 512,
+    "video_cache_max_size_mb": 1024,
+    "image_download_timeout": 30,
+    "image_download_max_size_mb": 20,
+    "max_upload_concurrency": 20,  # Max concurrent uploads
+    "max_request_concurrency": 50,  # Max concurrent requests
+    "batch_save_interval": 1.0,  # Batch save interval (seconds)
+    "batch_save_threshold": 10  # Change count threshold to trigger batch save
+}
+class ConfigManager:
+    """Config manager"""
+    def __init__(self) -> None:
+        data_dir_env = os.getenv("DATA_DIR")
+        if data_dir_env:
+            data_dir = Path(data_dir_env)
+        elif Path("/data").exists():
+            data_dir = Path("/data")
+        else:
+            data_dir = Path(__file__).parents[2] / "data"
+        self.config_path: Path = data_dir / "setting.toml"
+        self._storage: Optional[Any] = None
+        self.global_config: Dict[str, Any] = {}
+        self.grok_config: Dict[str, Any] = {}
+    def _ensure_exists(self) -> None:
+        """Ensure config exists"""
+        if not self.config_path.exists():
+            self.config_path.parent.mkdir(parents=True, exist_ok=True)
+            self._create_default()
+    def _create_default(self) -> None:
+        default = {"grok": DEFAULT_GROK.copy(), "global": DEFAULT_GLOBAL.copy()}
+        # If provided, use runtime base url
+        base_url = os.getenv("BASE_URL")
+        if base_url:
+            default["global"]["base_url"] = base_url
+        with open(self.config_path, "w", encoding="utf-8") as f:
+            toml.dump(default, f)
+    def _normalize_proxy(self, proxy: str) -> str:
+        """Normalize proxy URL (sock5/socks5 -> socks5h://)"""
+        if not proxy:
+            return proxy
+        proxy = proxy.strip()
+        if proxy.startswith("sock5h://"):
+            proxy = proxy.replace("sock5h://", "socks5h://", 1)
+        if proxy.startswith("sock5://"):
+            proxy = proxy.replace("sock5://", "socks5://", 1)
+        if proxy.startswith("socks5://"):
+            return proxy.replace("socks5://", "socks5h://", 1)
+        return proxy
+    def _normalize_cf(self, cf: str) -> str:
+        """Normalize CF clearance (auto prefix)"""
+        if cf and not cf.startswith("cf_clearance="):
+            return f"cf_clearance={cf}"
+        return cf
+    def set_storage(self, storage: Any) -> None:
+        """Set storage instance"""
+        self._storage = storage
+        config_file = getattr(storage, "config_file", None)
+        data_dir = getattr(storage, "data_dir", None)
+        if config_file:
+            self.config_path = Path(config_file)
+        elif data_dir:
+            self.config_path = Path(data_dir) / "setting.toml"
+    @staticmethod
+    def _merge_defaults(config: Dict[str, Any], defaults: Dict[str, Any]) -> Dict[str, Any]:
+        """Merge stored config with defaults."""
+        merged = defaults.copy()
+        if config:
+            merged.update(config)
+        return merged
+    def load(self, section: Literal["global", "grok"]) -> Dict[str, Any]:
+        """Load config section"""
+        try:
+            with open(self.config_path, "r", encoding="utf-8") as f:
+                config = toml.load(f)[section]
+            # Normalize Grok config
+            if section == "grok":
+                if "proxy_url" in config:
+                    config["proxy_url"] = self._normalize_proxy(config["proxy_url"])
+                if "cache_proxy_url" in config:
+                    config["cache_proxy_url"] = self._normalize_proxy(config["cache_proxy_url"])
+                if "cf_clearance" in config:
+                    config["cf_clearance"] = self._normalize_cf(config["cf_clearance"])
+            return config
+        except Exception as e:
+            raise Exception(f"[Setting] Failed to load config: {e}") from e
+    async def reload(self) -> None:
+        if self._storage:
+            config = await self._storage.load_config()
+            self.global_config = self._merge_defaults(config.get("global", {}), DEFAULT_GLOBAL)
+            self.grok_config = self._merge_defaults(config.get("grok", {}), DEFAULT_GROK)
+        else:
+            self._ensure_exists()
+            self.global_config = self._merge_defaults(self.load("global"), DEFAULT_GLOBAL)
+            self.grok_config = self._merge_defaults(self.load("grok"), DEFAULT_GROK)
+        # Always enforce BASE_URL from environment if provided
+        base_url = os.getenv("BASE_URL")
+        if base_url:
+            self.global_config["base_url"] = base_url
+    async def _save_file(self, updates: Dict[str, Dict[str, Any]]) -> None:
+        """Save to file"""
+        import aiofiles
+        async with aiofiles.open(self.config_path, "r", encoding="utf-8") as f:
+            config = toml.loads(await f.read())
+        config.setdefault("global", {})
+        config.setdefault("grok", {})
+        for section, data in updates.items():
+            if section in config:
+                config[section].update(data)
+        async with aiofiles.open(self.config_path, "w", encoding="utf-8") as f:
+            await f.write(toml.dumps(config))
+    async def _save_storage(self, updates: Dict[str, Dict[str, Any]]) -> None:
+        """Save to storage"""
+        config = await self._storage.load_config()
+        config.setdefault("global", {})
+        config.setdefault("grok", {})
+        for section, data in updates.items():
+            if section in config:
+                config[section].update(data)
+        await self._storage.save_config(config)
+    def _prepare_grok(self, grok: Dict[str, Any]) -> Dict[str, Any]:
+        """Prepare Grok config (remove prefix)"""
+        processed = grok.copy()
+        if "cf_clearance" in processed:
+            cf = processed["cf_clearance"]
+            if cf and cf.startswith("cf_clearance="):
+                processed["cf_clearance"] = cf.replace("cf_clearance=", "", 1)
+        return processed
+    async def save(self, global_config: Optional[Dict[str, Any]] = None, grok_config: Optional[Dict[str, Any]] = None) -> None:
+        """Save config"""
+        updates = {}
+        if global_config:
+            updates["global"] = global_config
+        if grok_config:
+            updates["grok"] = self._prepare_grok(grok_config)
+        # Choose storage backend
+        if self._storage:
+            await self._save_storage(updates)
+        else:
+            await self._save_file(updates)
+        await self.reload()
+    async def get_proxy_async(self, proxy_type: Literal["service", "cache"] = "service") -> str:
+        """Async get proxy URL (supports proxy pool)
+        Args:
+            proxy_type: Proxy type
+                - service: Service proxy (client/upload)
+                - cache: Cache proxy (cache)
+        """
+        from app.core.proxy_pool import proxy_pool
+        if proxy_type == "cache":
+            cache_proxy = self.grok_config.get("cache_proxy_url", "")
+            if cache_proxy:
+                return cache_proxy
+        # Get from proxy pool
+        return await proxy_pool.get_proxy() or ""
+    def get_proxy(self, proxy_type: Literal["service", "cache"] = "service") -> str:
+        """Get proxy URL (sync, for backward compatibility)
+        Args:
+            proxy_type: Proxy type
+                - service: Service proxy (client/upload)
+                - cache: Cache proxy (cache)
+        """
+        from app.core.proxy_pool import proxy_pool
+        if proxy_type == "cache":
+            cache_proxy = self.grok_config.get("cache_proxy_url", "")
+            if cache_proxy:
+                return cache_proxy
+        # Return current proxy (if proxy pool, return last fetched)
+        return proxy_pool.get_current_proxy() or self.grok_config.get("proxy_url", "")
+# Global instance
+setting = ConfigManager()

app/core/exception.py ADDED Viewed

	@@ -0,0 +1,119 @@

+"""Exception handlers - OpenAI-compatible error responses"""
+from fastapi import Request, status
+from fastapi.responses import JSONResponse
+from fastapi.exceptions import RequestValidationError
+from starlette.exceptions import HTTPException as StarletteHTTPException
+# HTTP error mapping
+HTTP_ERROR_MAP = {
+    400: ("invalid_request_error", "Invalid request format or missing required parameters"),
+    401: ("invalid_request_error", "Token authentication failed"),
+    403: ("permission_error", "No permission to access this resource"),
+    404: ("invalid_request_error", "Requested resource not found"),
+    429: ("rate_limit_error", "Rate limit exceeded, please try again later"),
+    500: ("api_error", "Internal server error"),
+    503: ("api_error", "Service temporarily unavailable"),
+}
+# Grok error code mapping
+GROK_STATUS_MAP = {
+    "NO_AUTH_TOKEN": status.HTTP_401_UNAUTHORIZED,
+    "INVALID_TOKEN": status.HTTP_401_UNAUTHORIZED,
+    "HTTP_ERROR": status.HTTP_502_BAD_GATEWAY,
+    "NETWORK_ERROR": status.HTTP_503_SERVICE_UNAVAILABLE,
+    "JSON_ERROR": status.HTTP_502_BAD_GATEWAY,
+    "API_ERROR": status.HTTP_502_BAD_GATEWAY,
+    "STREAM_ERROR": status.HTTP_502_BAD_GATEWAY,
+    "NO_RESPONSE": status.HTTP_502_BAD_GATEWAY,
+    "TOKEN_SAVE_ERROR": status.HTTP_500_INTERNAL_SERVER_ERROR,
+    "NO_AVAILABLE_TOKEN": status.HTTP_503_SERVICE_UNAVAILABLE,
+}
+GROK_TYPE_MAP = {
+    "NO_AUTH_TOKEN": "authentication_error",
+    "INVALID_TOKEN": "authentication_error",
+    "HTTP_ERROR": "api_error",
+    "NETWORK_ERROR": "api_error",
+    "JSON_ERROR": "api_error",
+    "API_ERROR": "api_error",
+    "STREAM_ERROR": "api_error",
+    "NO_RESPONSE": "api_error",
+    "TOKEN_SAVE_ERROR": "api_error",
+    "NO_AVAILABLE_TOKEN": "api_error",
+}
+class GrokApiException(Exception):
+    """Grok API business exception"""
+    def __init__(self, message: str, error_code: str = None, details: dict = None, context: dict = None, status_code: int = None):
+        self.message = message
+        self.error_code = error_code
+        self.details = details or {}
+        self.context = context or {}
+        self.status_code = status_code or GROK_STATUS_MAP.get(error_code)
+        super().__init__(self.message)
+def build_error_response(message: str, error_type: str, code: str = None, param: str = None) -> dict:
+    """Build OpenAI-compatible error response"""
+    error = {"message": message, "type": error_type}
+    if code:
+        error["code"] = code
+    if param:
+        error["param"] = param
+    return {"error": error}
+async def http_exception_handler(_: Request, exc: StarletteHTTPException) -> JSONResponse:
+    """Handle HTTP exceptions"""
+    error_type, default_msg = HTTP_ERROR_MAP.get(exc.status_code, ("api_error", str(exc.detail)))
+    message = str(exc.detail) if exc.detail else default_msg
+    return JSONResponse(
+        status_code=exc.status_code,
+        content=build_error_response(message, error_type)
+    )
+async def validation_exception_handler(_: Request, exc: RequestValidationError) -> JSONResponse:
+    """Handle validation errors"""
+    errors = exc.errors()
+    param = errors[0]["loc"][-1] if errors and errors[0].get("loc") else None
+    message = errors[0]["msg"] if errors and errors[0].get("msg") else "Invalid request parameters"
+    return JSONResponse(
+        status_code=status.HTTP_400_BAD_REQUEST,
+        content=build_error_response(message, "invalid_request_error", param=param)
+    )
+async def grok_api_exception_handler(_: Request, exc: GrokApiException) -> JSONResponse:
+    """Handle Grok API exceptions"""
+    http_status = GROK_STATUS_MAP.get(exc.error_code, status.HTTP_500_INTERNAL_SERVER_ERROR)
+    error_type = GROK_TYPE_MAP.get(exc.error_code, "api_error")
+    return JSONResponse(
+        status_code=http_status,
+        content=build_error_response(exc.message, error_type, exc.error_code)
+    )
+async def global_exception_handler(_: Request, exc: Exception) -> JSONResponse:
+    """Handle uncaught exceptions"""
+    return JSONResponse(
+        status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
+        content=build_error_response("Server encountered an unexpected error, please retry", "api_error")
+    )
+def register_exception_handlers(app) -> None:
+    """Register exception handlers"""
+    app.add_exception_handler(StarletteHTTPException, http_exception_handler)
+    app.add_exception_handler(RequestValidationError, validation_exception_handler)
+    app.add_exception_handler(GrokApiException, grok_api_exception_handler)
+    app.add_exception_handler(Exception, global_exception_handler)

app/core/logger.py ADDED Viewed

	@@ -0,0 +1,141 @@

+"""Global logging module - singleton logger manager"""
+import os
+import sys
+import logging
+from pathlib import Path
+from logging.handlers import RotatingFileHandler
+from app.core.config import setting
+# Filter patterns
+FILTER_PATTERNS = [
+    "chunk: b'",  # SSE raw bytes
+    "Got event:",  # SSE event
+    "Closing",  # SSE close
+]
+class MCPLogFilter(logging.Filter):
+    """MCP log filter - filter verbose DEBUG logs"""
+    def filter(self, record: logging.LogRecord) -> bool:
+        """Filter logs"""
+        # Filter SSE DEBUG logs
+        if record.name == "sse_starlette.sse" and record.levelno == logging.DEBUG:
+            msg = record.getMessage()
+            return not any(p in msg for p in FILTER_PATTERNS)
+        # Filter MCP streamable_http DEBUG logs
+        if "mcp.server.streamable_http" in record.name and record.levelno == logging.DEBUG:
+            return False
+        return True
+class LoggerManager:
+    """Logger manager (singleton)"""
+    _instance = None
+    _initialized = False
+    def __new__(cls):
+        if cls._instance is None:
+            cls._instance = super().__new__(cls)
+        return cls._instance
+    def __init__(self):
+        """Initialize logging system"""
+        if LoggerManager._initialized:
+            return
+        # Configuration
+        data_dir_env = os.getenv("DATA_DIR")
+        if data_dir_env:
+            base_dir = Path(data_dir_env)
+        elif Path("/data").exists():
+            base_dir = Path("/data")
+        else:
+            base_dir = Path(__file__).parents[2]
+        log_dir = base_dir / "logs"
+        log_dir.mkdir(parents=True, exist_ok=True)
+        log_level = setting.global_config.get("log_level", "INFO").upper()
+        log_format = "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
+        log_file = log_dir / "app.log"
+        # Root logger
+        self.logger = logging.getLogger()
+        self.logger.setLevel(log_level)
+        # Replace existing handlers to ensure logs show in hosted environments
+        if self.logger.handlers:
+            for handler in list(self.logger.handlers):
+                self.logger.removeHandler(handler)
+                handler.close()
+        # Formatter and filter
+        formatter = logging.Formatter(log_format)
+        mcp_filter = MCPLogFilter()
+        # Console handler
+        console = logging.StreamHandler(sys.stdout)
+        console.setLevel(log_level)
+        console.setFormatter(formatter)
+        console.addFilter(mcp_filter)
+        # File handler (10MB, 5 backups)
+        file_handler = RotatingFileHandler(
+            log_file, maxBytes=10*1024*1024, backupCount=5, encoding="utf-8"
+        )
+        file_handler.setLevel(log_level)
+        file_handler.setFormatter(formatter)
+        file_handler.addFilter(mcp_filter)
+        # Add handlers
+        self.logger.addHandler(console)
+        self.logger.addHandler(file_handler)
+        # Configure third-party loggers
+        self._configure_third_party()
+        LoggerManager._initialized = True
+    def _configure_third_party(self):
+        """Configure third-party log levels"""
+        config = {
+            "asyncio": logging.WARNING,
+            "uvicorn": logging.INFO,
+            "fastapi": logging.INFO,
+            "aiomysql": logging.WARNING,
+            "mcp": logging.CRITICAL,
+            "fastmcp": logging.CRITICAL,
+        }
+        for name, level in config.items():
+            logging.getLogger(name).setLevel(level)
+    def debug(self, msg: str) -> None:
+        """Debug log"""
+        self.logger.debug(msg)
+    def info(self, msg: str) -> None:
+        """Info log"""
+        self.logger.info(msg)
+    def warning(self, msg: str) -> None:
+        """Warning log"""
+        self.logger.warning(msg)
+    def error(self, msg: str) -> None:
+        """Error log"""
+        self.logger.error(msg)
+    def critical(self, msg: str) -> None:
+        """Critical log"""
+        self.logger.critical(msg)
+# Global instance
+logger = LoggerManager()

app/core/proxy_pool.py ADDED Viewed

	@@ -0,0 +1,170 @@

+"""Proxy pool manager - fetch proxy IPs dynamically from a URL"""
+import asyncio
+import aiohttp
+import time
+from typing import Optional, List
+from app.core.logger import logger
+class ProxyPool:
+    """Proxy pool manager"""
+    def __init__(self):
+        self._pool_url: Optional[str] = None
+        self._static_proxy: Optional[str] = None
+        self._current_proxy: Optional[str] = None
+        self._last_fetch_time: float = 0
+        self._fetch_interval: int = 300  # Refresh every 5 minutes
+        self._enabled: bool = False
+        self._lock = asyncio.Lock()
+    def configure(self, proxy_url: str, proxy_pool_url: str = "", proxy_pool_interval: int = 300):
+        """Configure proxy pool
+        Args:
+            proxy_url: Static proxy URL (socks5h://xxx or http://xxx)
+            proxy_pool_url: Proxy pool API URL returning a single proxy address
+            proxy_pool_interval: Proxy pool refresh interval (seconds)
+        """
+        self._static_proxy = self._normalize_proxy(proxy_url) if proxy_url else None
+        pool_url = proxy_pool_url.strip() if proxy_pool_url else None
+        if pool_url and self._looks_like_proxy_url(pool_url):
+            normalized_proxy = self._normalize_proxy(pool_url)
+            if not self._static_proxy:
+                self._static_proxy = normalized_proxy
+                logger.warning("[ProxyPool] proxy_pool_url looks like a proxy address; using as static proxy. Use proxy_url instead.")
+            else:
+                logger.warning("[ProxyPool] proxy_pool_url looks like a proxy address; ignored (using proxy_url).")
+            pool_url = None
+        self._pool_url = pool_url
+        self._fetch_interval = proxy_pool_interval
+        self._enabled = bool(self._pool_url)
+        if self._enabled:
+            logger.info(f"[ProxyPool] Proxy pool enabled: {self._pool_url}, refresh interval: {self._fetch_interval}s")
+        elif self._static_proxy:
+            logger.info(f"[ProxyPool] Using static proxy: {self._static_proxy}")
+            self._current_proxy = self._static_proxy
+        else:
+            logger.info("[ProxyPool] No proxy configured")
+    async def get_proxy(self) -> Optional[str]:
+        """Get proxy address
+        Returns:
+            Proxy URL or None
+        """
+        # If proxy pool is disabled, return static proxy
+        if not self._enabled:
+            return self._static_proxy
+        # Check if refresh needed
+        now = time.time()
+        if not self._current_proxy or (now - self._last_fetch_time) >= self._fetch_interval:
+            async with self._lock:
+                # Double-check
+                if not self._current_proxy or (now - self._last_fetch_time) >= self._fetch_interval:
+                    await self._fetch_proxy()
+        return self._current_proxy
+    async def force_refresh(self) -> Optional[str]:
+        """Force refresh proxy (for 403 retry)
+        Returns:
+            New proxy URL or None
+        """
+        if not self._enabled:
+            return self._static_proxy
+        async with self._lock:
+            await self._fetch_proxy()
+        return self._current_proxy
+    async def _fetch_proxy(self):
+        """Fetch a new proxy from the proxy pool URL"""
+        try:
+            logger.debug(f"[ProxyPool] Fetching new proxy from pool: {self._pool_url}")
+            timeout = aiohttp.ClientTimeout(total=10)
+            async with aiohttp.ClientSession(timeout=timeout) as session:
+                async with session.get(self._pool_url) as response:
+                    if response.status == 200:
+                        proxy_text = await response.text()
+                        proxy = self._normalize_proxy(proxy_text.strip())
+                        # Validate proxy format
+                        if self._validate_proxy(proxy):
+                            self._current_proxy = proxy
+                            self._last_fetch_time = time.time()
+                            logger.info(f"[ProxyPool] Successfully fetched new proxy: {proxy}")
+                        else:
+                            logger.error(f"[ProxyPool] Invalid proxy format: {proxy}")
+                            # Fallback to static proxy
+                            if not self._current_proxy:
+                                self._current_proxy = self._static_proxy
+                    else:
+                        logger.error(f"[ProxyPool] Failed to fetch proxy: HTTP {response.status}")
+                        # Fallback to static proxy
+                        if not self._current_proxy:
+                            self._current_proxy = self._static_proxy
+        except asyncio.TimeoutError:
+            logger.error("[ProxyPool] Proxy fetch timed out")
+            if not self._current_proxy:
+                self._current_proxy = self._static_proxy
+        except Exception as e:
+            logger.error(f"[ProxyPool] Proxy fetch error: {e}")
+            # Fallback to static proxy
+            if not self._current_proxy:
+                self._current_proxy = self._static_proxy
+    def _validate_proxy(self, proxy: str) -> bool:
+        """Validate proxy format
+        Args:
+            proxy: Proxy URL
+        Returns:
+            True if valid
+        """
+        if not proxy:
+            return False
+        # Supported protocols
+        valid_protocols = ['http://', 'https://', 'socks5://', 'socks5h://']
+        return any(proxy.startswith(proto) for proto in valid_protocols)
+    def _normalize_proxy(self, proxy: str) -> str:
+        """Normalize proxy URL (sock5/socks5 -> socks5h://)"""
+        if not proxy:
+            return proxy
+        proxy = proxy.strip()
+        if proxy.startswith("sock5h://"):
+            proxy = proxy.replace("sock5h://", "socks5h://", 1)
+        if proxy.startswith("sock5://"):
+            proxy = proxy.replace("sock5://", "socks5://", 1)
+        if proxy.startswith("socks5://"):
+            return proxy.replace("socks5://", "socks5h://", 1)
+        return proxy
+    def _looks_like_proxy_url(self, url: str) -> bool:
+        """Check if URL looks like a proxy address (avoid mistaking pool API for proxy)"""
+        return url.startswith(("sock5://", "sock5h://", "socks5://", "socks5h://"))
+    def get_current_proxy(self) -> Optional[str]:
+        """Get current proxy (sync)
+        Returns:
+            Current proxy URL or None
+        """
+        return self._current_proxy or self._static_proxy
+# Global proxy pool instance
+proxy_pool = ProxyPool()

app/core/storage.py ADDED Viewed

	@@ -0,0 +1,644 @@

+"""Storage abstraction - supports file, MySQL, and Redis storage"""
+import os
+import orjson
+import toml
+import asyncio
+import warnings
+import aiofiles
+from pathlib import Path
+from typing import Dict, Any, Optional, Literal
+from abc import ABC, abstractmethod
+from urllib.parse import urlparse, unquote
+from app.core.logger import logger
+from app.core.config import DEFAULT_GROK, DEFAULT_GLOBAL
+StorageMode = Literal["file", "mysql", "redis", "hf", "hub", "dataset"]
+class BaseStorage(ABC):
+    """Storage base class"""
+    @abstractmethod
+    async def init_db(self) -> None:
+        """Initialize database"""
+        pass
+    @abstractmethod
+    async def load_tokens(self) -> Dict[str, Any]:
+        """Load token data"""
+        pass
+    @abstractmethod
+    async def save_tokens(self, data: Dict[str, Any]) -> None:
+        """Save token data"""
+        pass
+    @abstractmethod
+    async def load_config(self) -> Dict[str, Any]:
+        """Load config data"""
+        pass
+    @abstractmethod
+    async def save_config(self, data: Dict[str, Any]) -> None:
+        """Save config data"""
+        pass
+    @abstractmethod
+    async def load_api_keys(self) -> list:
+        """Load API keys data"""
+        pass
+    @abstractmethod
+    async def save_api_keys(self, data: list) -> None:
+        """Save API keys data"""
+        pass
+class FileStorage(BaseStorage):
+    """File storage"""
+    def __init__(self, data_dir: Path):
+        self.data_dir = data_dir
+        self.token_file = data_dir / "token.json"
+        self.config_file = data_dir / "setting.toml"
+        self.api_keys_file = data_dir / "api_keys.json"
+        self._token_lock = asyncio.Lock()
+        self._config_lock = asyncio.Lock()
+        self._api_keys_lock = asyncio.Lock()
+    async def init_db(self) -> None:
+        """Initialize file storage"""
+        self.data_dir.mkdir(parents=True, exist_ok=True)
+        if not self.token_file.exists():
+            await self._write(self.token_file, orjson.dumps({"sso": {}, "ssoSuper": {}}, option=orjson.OPT_INDENT_2).decode())
+            logger.info("[Storage] Created token file")
+        if not self.config_file.exists():
+            default_global = DEFAULT_GLOBAL.copy()
+            base_url = os.getenv("BASE_URL")
+            if base_url:
+                default_global["base_url"] = base_url
+            default = {
+                "global": default_global,
+                "grok": DEFAULT_GROK.copy()
+            }
+            await self._write(self.config_file, toml.dumps(default))
+            logger.info("[Storage] Created config file")
+        if not self.api_keys_file.exists():
+            await self._write(self.api_keys_file, "[]")
+            logger.info("[Storage] Created API keys file")
+    async def _read(self, path: Path) -> str:
+        """Read file"""
+        async with aiofiles.open(path, "r", encoding="utf-8") as f:
+            return await f.read()
+    async def _write(self, path: Path, content: str) -> None:
+        """Write file"""
+        async with aiofiles.open(path, "w", encoding="utf-8") as f:
+            await f.write(content)
+    async def _load_json(self, path: Path, default: Dict, lock: asyncio.Lock) -> Dict[str, Any]:
+        """Load JSON"""
+        try:
+            async with lock:
+                if not path.exists():
+                    return default
+                return orjson.loads(await self._read(path))
+        except Exception as e:
+            logger.error(f"[Storage] Failed to load {path.name}: {e}")
+            return default
+    async def _save_json(self, path: Path, data: Dict, lock: asyncio.Lock) -> None:
+        """Save JSON"""
+        try:
+            async with lock:
+                await self._write(path, orjson.dumps(data, option=orjson.OPT_INDENT_2).decode())
+        except Exception as e:
+            logger.error(f"[Storage] Failed to save {path.name}: {e}")
+            raise
+    async def _load_toml(self, path: Path, default: Dict, lock: asyncio.Lock) -> Dict[str, Any]:
+        """Load TOML"""
+        try:
+            async with lock:
+                if not path.exists():
+                    return default
+                return toml.loads(await self._read(path))
+        except Exception as e:
+            logger.error(f"[Storage] Failed to load {path.name}: {e}")
+            return default
+    async def _save_toml(self, path: Path, data: Dict, lock: asyncio.Lock) -> None:
+        """Save TOML"""
+        try:
+            async with lock:
+                await self._write(path, toml.dumps(data))
+        except Exception as e:
+            logger.error(f"[Storage] Failed to save {path.name}: {e}")
+            raise
+    async def load_tokens(self) -> Dict[str, Any]:
+        """Load tokens"""
+        return await self._load_json(self.token_file, {"sso": {}, "ssoSuper": {}}, self._token_lock)
+    async def save_tokens(self, data: Dict[str, Any]) -> None:
+        """Save tokens"""
+        await self._save_json(self.token_file, data, self._token_lock)
+    async def load_config(self) -> Dict[str, Any]:
+        """Load config"""
+        return await self._load_toml(self.config_file, {"global": {}, "grok": {}}, self._config_lock)
+    async def save_config(self, data: Dict[str, Any]) -> None:
+        """Save config"""
+        await self._save_toml(self.config_file, data, self._config_lock)
+    async def load_api_keys(self) -> list:
+        """Load API keys"""
+        return await self._load_json(self.api_keys_file, [], self._api_keys_lock)
+    async def save_api_keys(self, data: list) -> None:
+        """Save API keys"""
+        await self._save_json(self.api_keys_file, data, self._api_keys_lock)
+class MysqlStorage(BaseStorage):
+    """MySQL storage"""
+    def __init__(self, database_url: str, data_dir: Path):
+        self.database_url = database_url
+        self.data_dir = data_dir
+        self._pool = None
+        self._file = FileStorage(data_dir)
+    async def init_db(self) -> None:
+        """Initialize MySQL"""
+        try:
+            import aiomysql
+            parsed = self._parse_url(self.database_url)
+            logger.info(f"[Storage] MySQL: {parsed['user']}@{parsed['host']}:{parsed['port']}/{parsed['db']}")
+            await self._create_db(parsed)
+            self._pool = await aiomysql.create_pool(
+                host=parsed['host'], port=parsed['port'], user=parsed['user'],
+                password=parsed['password'], db=parsed['db'], charset="utf8mb4",
+                autocommit=True, maxsize=10
+            )
+            await self._create_tables()
+            await self._file.init_db()
+            await self._sync_data()
+        except ImportError:
+            raise Exception("aiomysql not installed")
+        except Exception as e:
+            logger.error(f"[Storage] MySQL initialization failed: {e}")
+            raise
+    def _parse_url(self, url: str) -> Dict[str, Any]:
+        """Parse URL"""
+        p = urlparse(url)
+        return {
+            'user': unquote(p.username) if p.username else "",
+            'password': unquote(p.password) if p.password else "",
+            'host': p.hostname,
+            'port': p.port or 3306,
+            'db': p.path[1:] if p.path else "grok2api"
+        }
+    async def _create_db(self, parsed: Dict) -> None:
+        """Create database"""
+        import aiomysql
+        pool = await aiomysql.create_pool(
+            host=parsed['host'], port=parsed['port'], user=parsed['user'],
+            password=parsed['password'], charset="utf8mb4", autocommit=True, maxsize=1
+        )
+        try:
+            async with pool.acquire() as conn:
+                async with conn.cursor() as cursor:
+                    with warnings.catch_warnings():
+                        warnings.filterwarnings('ignore', message='.*database exists')
+                        await cursor.execute(
+                            f"CREATE DATABASE IF NOT EXISTS `{parsed['db']}` "
+                            f"CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci"
+                        )
+                    logger.info(f"[Storage] Database '{parsed['db']}' ready")
+        finally:
+            pool.close()
+            await pool.wait_closed()
+    async def _create_tables(self) -> None:
+        """Create tables"""
+        tables = {
+            "grok_tokens": """
+                CREATE TABLE IF NOT EXISTS grok_tokens (
+                    id INT AUTO_INCREMENT PRIMARY KEY,
+                    data JSON NOT NULL,
+                    updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
+                    created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
+                ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4
+            """,
+            "grok_settings": """
+                CREATE TABLE IF NOT EXISTS grok_settings (
+                    id INT AUTO_INCREMENT PRIMARY KEY,
+                    data JSON NOT NULL,
+                    updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
+                    created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
+                ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4
+            """
+        }
+        async with self._pool.acquire() as conn:
+            async with conn.cursor() as cursor:
+                with warnings.catch_warnings():
+                    warnings.filterwarnings('ignore', message='.*already exists')
+                    for sql in tables.values():
+                        await cursor.execute(sql)
+                logger.info("[Storage] MySQL tables ready")
+    async def _sync_data(self) -> None:
+        """Sync data"""
+        try:
+            for table, key in [("grok_tokens", "sso"), ("grok_settings", "global")]:
+                data = await self._load_db(table)
+                if data:
+                    if table == "grok_tokens":
+                        await self._file.save_tokens(data)
+                    else:
+                        await self._file.save_config(data)
+                    logger.info(f"[Storage] {table.split('_')[1]} data synced from DB")
+                else:
+                    file_data = await (self._file.load_tokens() if table == "grok_tokens" else self._file.load_config())
+                    if file_data.get(key) or (table == "grok_tokens" and file_data.get("ssoSuper")):
+                        await self._save_db(table, file_data)
+                        logger.info(f"[Storage] {table.split('_')[1]} data initialized to DB")
+        except Exception as e:
+            logger.warning(f"[Storage] Sync failed: {e}")
+    async def _load_db(self, table: str) -> Optional[Dict]:
+        """Load from DB"""
+        try:
+            async with self._pool.acquire() as conn:
+                async with conn.cursor() as cursor:
+                    await cursor.execute(f"SELECT data FROM {table} ORDER BY id DESC LIMIT 1")
+                    result = await cursor.fetchone()
+                    return orjson.loads(result[0]) if result else None
+        except Exception as e:
+            logger.error(f"[Storage] Failed to load {table}: {e}")
+            return None
+    async def _save_db(self, table: str, data: Dict) -> None:
+        """Save to DB"""
+        try:
+            async with self._pool.acquire() as conn:
+                async with conn.cursor() as cursor:
+                    json_data = orjson.dumps(data).decode()
+                    await cursor.execute(f"SELECT id FROM {table} ORDER BY id DESC LIMIT 1")
+                    result = await cursor.fetchone()
+                    if result:
+                        await cursor.execute(f"UPDATE {table} SET data = %s WHERE id = %s", (json_data, result[0]))
+                    else:
+                        await cursor.execute(f"INSERT INTO {table} (data) VALUES (%s)", (json_data,))
+        except Exception as e:
+            logger.error(f"[Storage] Failed to save {table}: {e}")
+            raise
+    async def load_tokens(self) -> Dict[str, Any]:
+        """Load tokens"""
+        return await self._file.load_tokens()
+    async def save_tokens(self, data: Dict[str, Any]) -> None:
+        """Save tokens"""
+        await self._file.save_tokens(data)
+        await self._save_db("grok_tokens", data)
+    async def load_config(self) -> Dict[str, Any]:
+        """Load config"""
+        return await self._file.load_config()
+    async def save_config(self, data: Dict[str, Any]) -> None:
+        """Save config"""
+        await self._file.save_config(data)
+        await self._save_db("grok_settings", data)
+    async def load_api_keys(self) -> list:
+        """Load API keys"""
+        return await self._file.load_api_keys()
+    async def save_api_keys(self, data: list) -> None:
+        """Save API keys"""
+        await self._file.save_api_keys(data)
+    async def close(self) -> None:
+        """Close connection"""
+        if self._pool:
+            self._pool.close()
+            await self._pool.wait_closed()
+            logger.info("[Storage] MySQL closed")
+class RedisStorage(BaseStorage):
+    """Redis storage"""
+    def __init__(self, redis_url: str, data_dir: Path):
+        self.redis_url = redis_url
+        self.data_dir = data_dir
+        self._redis = None
+        self._file = FileStorage(data_dir)
+    async def init_db(self) -> None:
+        """Initialize Redis"""
+        try:
+            import redis.asyncio as aioredis
+            parsed = urlparse(self.redis_url)
+            db = int(parsed.path.lstrip('/')) if parsed.path and parsed.path != '/' else 0
+            logger.info(f"[Storage] Redis: {parsed.hostname}:{parsed.port or 6379}/{db}")
+            self._redis = aioredis.Redis.from_url(
+                self.redis_url, encoding="utf-8", decode_responses=True
+            )
+            await self._redis.ping()
+            logger.info(f"[Storage] Redis connection successful")
+            await self._file.init_db()
+            await self._sync_data()
+        except ImportError:
+            raise Exception("redis not installed")
+        except Exception as e:
+            logger.error(f"[Storage] Redis initialization failed: {e}")
+            raise
+    async def _sync_data(self) -> None:
+        """Sync data"""
+        try:
+            for key, file_func, key_name in [
+                ("grok:tokens", self._file.load_tokens, "sso"),
+                ("grok:settings", self._file.load_config, "global")
+            ]:
+                data = await self._redis.get(key)
+                if data:
+                    parsed = orjson.loads(data)
+                    if key == "grok:tokens":
+                        await self._file.save_tokens(parsed)
+                    else:
+                        await self._file.save_config(parsed)
+                    logger.info(f"[Storage] {key.split(':')[1]} data synced from Redis")
+                else:
+                    file_data = await file_func()
+                    if file_data.get(key_name) or (key == "grok:tokens" and file_data.get("ssoSuper")):
+                        await self._redis.set(key, orjson.dumps(file_data).decode())
+                        logger.info(f"[Storage] {key.split(':')[1]} data initialized to Redis")
+        except Exception as e:
+            logger.warning(f"[Storage] Sync failed: {e}")
+    async def _save_redis(self, key: str, data: Dict) -> None:
+        """Save to Redis"""
+        try:
+            await self._redis.set(key, orjson.dumps(data).decode())
+        except Exception as e:
+            logger.error(f"[Storage] Failed to save to Redis: {e}")
+            raise
+    async def load_tokens(self) -> Dict[str, Any]:
+        """Load tokens"""
+        return await self._file.load_tokens()
+    async def save_tokens(self, data: Dict[str, Any]) -> None:
+        """Save tokens"""
+        await self._file.save_tokens(data)
+        await self._save_redis("grok:tokens", data)
+    async def load_config(self) -> Dict[str, Any]:
+        """Load config"""
+        return await self._file.load_config()
+    async def save_config(self, data: Dict[str, Any]) -> None:
+        """Save config"""
+        await self._file.save_config(data)
+        await self._save_redis("grok:settings", data)
+    async def load_api_keys(self) -> list:
+        """Load API keys"""
+        return await self._file.load_api_keys()
+    async def save_api_keys(self, data: list) -> None:
+        """Save API keys"""
+        await self._file.save_api_keys(data)
+    async def close(self) -> None:
+        """Close connection"""
+        if self._redis:
+            await self._redis.close()
+            logger.info("[Storage] Redis closed")
+class StorageManager:
+    """Storage manager (singleton)"""
+    _instance: Optional['StorageManager'] = None
+    _storage: Optional[BaseStorage] = None
+    _initialized: bool = False
+    def __new__(cls):
+        if cls._instance is None:
+            cls._instance = super().__new__(cls)
+        return cls._instance
+    async def init(self) -> None:
+        """Initialize storage"""
+        if self._initialized:
+            return
+        mode = os.getenv("STORAGE_MODE", "file").lower()
+        url = os.getenv("DATABASE_URL", "")
+        data_dir_env = os.getenv("DATA_DIR")
+        if data_dir_env:
+            data_dir = Path(data_dir_env)
+        elif Path("/data").exists():
+            data_dir = Path("/data")
+        else:
+            data_dir = Path(__file__).parents[2] / "data"
+        classes = {"mysql": MysqlStorage, "redis": RedisStorage, "file": FileStorage}
+        if mode in ("mysql", "redis") and not url:
+            raise ValueError(f"{mode.upper()} mode requires DATABASE_URL")
+        if mode in ("hf", "hub", "dataset"):
+            repo_id = os.getenv("HF_DATASET_REPO", "")
+            if not repo_id:
+                raise ValueError("HF/DATASET mode requires HF_DATASET_REPO")
+            self._storage = HfDatasetStorage(repo_id, data_dir)
+        else:
+            storage_class = classes.get(mode, FileStorage)
+            self._storage = storage_class(url, data_dir) if mode != "file" else storage_class(data_dir)
+        await self._storage.init_db()
+        self._initialized = True
+        logger.info(f"[Storage] Using {mode} mode")
+    def get_storage(self) -> BaseStorage:
+        """Get storage instance"""
+        if not self._initialized or not self._storage:
+            raise RuntimeError("StorageManager not initialized")
+        return self._storage
+    async def close(self) -> None:
+        """Close storage"""
+        if self._storage and hasattr(self._storage, 'close'):
+            await self._storage.close()
+# Global instance
+storage_manager = StorageManager()
+class HfDatasetStorage(BaseStorage):
+    """Hugging Face dataset-backed storage"""
+    def __init__(self, repo_id: str, data_dir: Path):
+        from huggingface_hub import HfApi
+        self.repo_id = repo_id
+        self.data_dir = data_dir
+        self.data_dir.mkdir(parents=True, exist_ok=True)
+        self.config_file = data_dir / "setting.toml"
+        self._token = os.getenv("HF_TOKEN") or os.getenv("HUGGINGFACE_HUB_TOKEN")
+        self._api = HfApi(token=self._token)
+        self._locks = {
+            "token.json": asyncio.Lock(),
+            "setting.toml": asyncio.Lock(),
+            "api_keys.json": asyncio.Lock(),
+        }
+    async def init_db(self) -> None:
+        """Initialize dataset storage"""
+        from huggingface_hub.utils import HfHubHTTPError
+        try:
+            await asyncio.to_thread(
+                self._api.create_repo,
+                repo_id=self.repo_id,
+                repo_type="dataset",
+                private=True,
+                exist_ok=True,
+            )
+        except HfHubHTTPError as e:
+            logger.error(f"[Storage] Dataset init failed: {e}")
+            raise
+        await self._ensure_default_files()
+    async def _ensure_default_files(self) -> None:
+        """Ensure default files exist in dataset repo"""
+        default_global = DEFAULT_GLOBAL.copy()
+        base_url = os.getenv("BASE_URL")
+        if base_url:
+            default_global["base_url"] = base_url
+        defaults = {
+            "setting.toml": toml.dumps({"global": default_global, "grok": DEFAULT_GROK.copy()}),
+            "token.json": orjson.dumps(
+                {"ssoNormal": {}, "ssoSuper": {}},
+                option=orjson.OPT_INDENT_2
+            ).decode(),
+            "api_keys.json": "[]",
+        }
+        for filename, content in defaults.items():
+            await self._ensure_file(filename, content)
+    async def _ensure_file(self, filename: str, content: str) -> None:
+        """Create file in dataset if missing"""
+        existing = await self._download_text(filename)
+        if existing is None:
+            await self._upload_text(filename, content, f"Initialize {filename}")
+    async def _download_text(self, filename: str) -> Optional[str]:
+        """Download file content from dataset"""
+        from huggingface_hub import hf_hub_download
+        from huggingface_hub.utils import EntryNotFoundError, HfHubHTTPError
+        def download_sync() -> Optional[str]:
+            try:
+                path = hf_hub_download(
+                    repo_id=self.repo_id,
+                    repo_type="dataset",
+                    filename=filename,
+                    token=self._token,
+                )
+                return Path(path).read_text(encoding="utf-8")
+            except EntryNotFoundError:
+                return None
+            except HfHubHTTPError as e:
+                status = getattr(e.response, "status_code", None)
+                if status == 404:
+                    return None
+                raise
+        return await asyncio.to_thread(download_sync)
+    async def _upload_text(self, filename: str, content: str, message: str) -> None:
+        """Upload file content to dataset"""
+        def upload_sync() -> None:
+            path = self.data_dir / filename
+            path.parent.mkdir(parents=True, exist_ok=True)
+            path.write_text(content, encoding="utf-8")
+            self._api.upload_file(
+                path_or_fileobj=str(path),
+                path_in_repo=filename,
+                repo_id=self.repo_id,
+                repo_type="dataset",
+                commit_message=message,
+            )
+        await asyncio.to_thread(upload_sync)
+    async def load_tokens(self) -> Dict[str, Any]:
+        """Load token data"""
+        async with self._locks["token.json"]:
+            content = await self._download_text("token.json")
+            if not content:
+                return {"ssoNormal": {}, "ssoSuper": {}}
+            return orjson.loads(content)
+    async def save_tokens(self, data: Dict[str, Any]) -> None:
+        """Save token data"""
+        async with self._locks["token.json"]:
+            content = orjson.dumps(data, option=orjson.OPT_INDENT_2).decode()
+            await self._upload_text("token.json", content, "Update token.json")
+    async def load_config(self) -> Dict[str, Any]:
+        """Load config data"""
+        async with self._locks["setting.toml"]:
+            content = await self._download_text("setting.toml")
+            if not content:
+                return {"global": {}, "grok": {}}
+            return toml.loads(content)
+    async def save_config(self, data: Dict[str, Any]) -> None:
+        """Save config data"""
+        async with self._locks["setting.toml"]:
+            await self._upload_text("setting.toml", toml.dumps(data), "Update setting.toml")
+    async def load_api_keys(self) -> list:
+        """Load API keys data"""
+        async with self._locks["api_keys.json"]:
+            content = await self._download_text("api_keys.json")
+            if not content:
+                return []
+            return orjson.loads(content)
+    async def save_api_keys(self, data: list) -> None:
+        """Save API keys data"""
+        async with self._locks["api_keys.json"]:
+            content = orjson.dumps(data, option=orjson.OPT_INDENT_2).decode()
+            await self._upload_text("api_keys.json", content, "Update api_keys.json")

app/models/grok_models.py ADDED Viewed

	@@ -0,0 +1,163 @@

+"""Grok model configs and enum definitions"""
+from enum import Enum
+from typing import Dict, Any, Tuple
+# Model configuration
+_MODEL_CONFIG: Dict[str, Dict[str, Any]] = {
+    "grok-3-fast": {
+        "grok_model": ("grok-3", "MODEL_MODE_FAST"),
+        "rate_limit_model": "grok-3",
+        "cost": {"type": "low_cost", "multiplier": 1, "description": "Counts as 1 call"},
+        "requires_super": False,
+        "display_name": "Grok 3 Fast",
+        "description": "Fast and efficient Grok 3 model",
+        "raw_model_path": "xai/grok-3",
+        "default_temperature": 1.0,
+        "default_max_output_tokens": 8192,
+        "supported_max_output_tokens": 131072,
+        "default_top_p": 0.95
+    },
+    "grok-4-fast": {
+        "grok_model": ("grok-4-mini-thinking-tahoe", "MODEL_MODE_GROK_4_MINI_THINKING"),
+        "rate_limit_model": "grok-4-mini-thinking-tahoe",
+        "cost": {"type": "low_cost", "multiplier": 1, "description": "Counts as 1 call"},
+        "requires_super": False,
+        "display_name": "Grok 4 Fast",
+        "description": "Fast version of Grok 4 with mini thinking capabilities",
+        "raw_model_path": "xai/grok-4-mini-thinking-tahoe",
+        "default_temperature": 1.0,
+        "default_max_output_tokens": 8192,
+        "supported_max_output_tokens": 131072,
+        "default_top_p": 0.95
+    },
+    "grok-4-fast-expert": {
+        "grok_model": ("grok-4-mini-thinking-tahoe", "MODEL_MODE_EXPERT"),
+        "rate_limit_model": "grok-4-mini-thinking-tahoe",
+        "cost": {"type": "high_cost", "multiplier": 4, "description": "Counts as 4 calls"},
+        "requires_super": False,
+        "display_name": "Grok 4 Fast Expert",
+        "description": "Expert mode of Grok 4 Fast with enhanced reasoning",
+        "raw_model_path": "xai/grok-4-mini-thinking-tahoe",
+        "default_temperature": 1.0,
+        "default_max_output_tokens": 32768,
+        "supported_max_output_tokens": 131072,
+        "default_top_p": 0.95
+    },
+    "grok-4-expert": {
+        "grok_model": ("grok-4", "MODEL_MODE_EXPERT"),
+        "rate_limit_model": "grok-4",
+        "cost": {"type": "high_cost", "multiplier": 4, "description": "Counts as 4 calls"},
+        "requires_super": False,
+        "display_name": "Grok 4 Expert",
+        "description": "Full Grok 4 model with expert mode capabilities",
+        "raw_model_path": "xai/grok-4",
+        "default_temperature": 1.0,
+        "default_max_output_tokens": 32768,
+        "supported_max_output_tokens": 131072,
+        "default_top_p": 0.95
+    },
+    "grok-4-heavy": {
+        "grok_model": ("grok-4-heavy", "MODEL_MODE_HEAVY"),
+        "rate_limit_model": "grok-4-heavy",
+        "cost": {"type": "independent", "multiplier": 1, "description": "Billed independently, Super users only"},
+        "requires_super": True,
+        "display_name": "Grok 4 Heavy",
+        "description": "Most powerful Grok 4 model with heavy computational capabilities. Requires Super Token for access.",
+        "raw_model_path": "xai/grok-4-heavy",
+        "default_temperature": 1.0,
+        "default_max_output_tokens": 65536,
+        "supported_max_output_tokens": 131072,
+        "default_top_p": 0.95
+    },
+    "grok-4.1": {
+        "grok_model": ("grok-4-1-non-thinking-w-tool", "MODEL_MODE_GROK_4_1"),
+        "rate_limit_model": "grok-4-1-non-thinking-w-tool",
+        "cost": {"type": "low_cost", "multiplier": 1, "description": "Counts as 1 call"},
+        "requires_super": False,
+        "display_name": "Grok 4.1",
+        "description": "Latest Grok 4.1 model with tool capabilities",
+        "raw_model_path": "xai/grok-4-1-non-thinking-w-tool",
+        "default_temperature": 1.0,
+        "default_max_output_tokens": 8192,
+        "supported_max_output_tokens": 131072,
+        "default_top_p": 0.95
+    },
+    "grok-4.1-thinking": {
+        "grok_model": ("grok-4-1-thinking-1108b", "MODEL_MODE_AUTO"),
+        "rate_limit_model": "grok-4-1-thinking-1108b",
+        "cost": {"type": "high_cost", "multiplier": 1, "description": "Counts as 1 call"},
+        "requires_super": False,
+        "display_name": "Grok 4.1 Thinking",
+        "description": "Grok 4.1 model with advanced thinking and tool capabilities",
+        "raw_model_path": "xai/grok-4-1-thinking-1108b",
+        "default_temperature": 1.0,
+        "default_max_output_tokens": 32768,
+        "supported_max_output_tokens": 131072,
+        "default_top_p": 0.95
+    },
+    "grok-imagine-0.9": {
+        "grok_model": ("grok-3", "MODEL_MODE_FAST"),
+        "rate_limit_model": "grok-3",
+        "cost": {"type": "low_cost", "multiplier": 1, "description": "Counts as 1 call"},
+        "requires_super": False,
+        "display_name": "Grok Imagine 0.9",
+        "description": "Image and video generation model. Supports text-to-image and image-to-video generation.",
+        "raw_model_path": "xai/grok-imagine-0.9",
+        "default_temperature": 1.0,
+        "default_max_output_tokens": 8192,
+        "supported_max_output_tokens": 131072,
+        "default_top_p": 0.95,
+        "is_video_model": True
+    }
+}
+class TokenType(Enum):
+    """Token type"""
+    NORMAL = "ssoNormal"
+    SUPER = "ssoSuper"
+class Models(Enum):
+    """Supported models"""
+    GROK_3_FAST = "grok-3-fast"
+    GROK_4_1 = "grok-4.1"
+    GROK_4_1_THINKING = "grok-4.1-thinking"
+    GROK_4_FAST = "grok-4-fast"
+    GROK_4_FAST_EXPERT = "grok-4-fast-expert"
+    GROK_4_EXPERT = "grok-4-expert"
+    GROK_4_HEAVY = "grok-4-heavy"
+    GROK_IMAGINE_0_9 = "grok-imagine-0.9"
+    @classmethod
+    def get_model_info(cls, model: str) -> Dict[str, Any]:
+        """Get model config"""
+        return _MODEL_CONFIG.get(model, {})
+    @classmethod
+    def is_valid_model(cls, model: str) -> bool:
+        """Check whether model is valid"""
+        return model in _MODEL_CONFIG
+    @classmethod
+    def to_grok(cls, model: str) -> Tuple[str, str]:
+        """Convert to Grok internal model name and mode
+        Returns:
+            (model name, mode) tuple
+        """
+        config = _MODEL_CONFIG.get(model)
+        return config["grok_model"] if config else (model, "MODEL_MODE_FAST")
+    @classmethod
+    def to_rate_limit(cls, model: str) -> str:
+        """Convert to rate limit model name"""
+        config = _MODEL_CONFIG.get(model)
+        return config["rate_limit_model"] if config else model
+    @classmethod
+    def get_all_model_names(cls) -> list[str]:
+        """Get all model names"""
+        return list(_MODEL_CONFIG.keys())

app/models/openai_schema.py ADDED Viewed

	@@ -0,0 +1,106 @@

+"""OpenAI request/response model definitions"""
+from fastapi import HTTPException
+from typing import Optional, List, Union, Dict, Any
+from pydantic import BaseModel, Field, field_validator
+from app.models.grok_models import Models
+class OpenAIChatRequest(BaseModel):
+    """OpenAI chat request"""
+    model: str = Field(..., description="Model name", min_length=1)
+    messages: List[Dict[str, Any]] = Field(..., description="Message list", min_length=1)
+    image_url: Optional[str] = Field(None, description="Single image URL")
+    image_urls: Optional[List[str]] = Field(None, description="Image URL list")
+    x_statsig_id: Optional[str] = Field(None, description="Override x-statsig-id")
+    stream: bool = Field(False, description="Streaming response")
+    temperature: Optional[float] = Field(0.7, ge=0, le=2, description="Sampling temperature")
+    max_tokens: Optional[int] = Field(None, ge=1, le=100000, description="Max tokens")
+    top_p: Optional[float] = Field(1.0, ge=0, le=1, description="Sampling parameter")
+    @classmethod
+    @field_validator('messages')
+    def validate_messages(cls, v):
+        """Validate message format"""
+        if not v:
+            raise HTTPException(status_code=400, detail="Message list cannot be empty")
+        for msg in v:
+            if not isinstance(msg, dict):
+                raise HTTPException(status_code=400, detail="Each message must be a dict")
+            if 'role' not in msg:
+                raise HTTPException(status_code=400, detail="Message missing 'role' field")
+            if 'content' not in msg:
+                raise HTTPException(status_code=400, detail="Message missing 'content' field")
+            if msg['role'] not in ['system', 'user', 'assistant']:
+                raise HTTPException(
+                    status_code=400,
+                    detail=f"Invalid role '{msg['role']}', must be system/user/assistant"
+                )
+        return v
+    @classmethod
+    @field_validator('model')
+    def validate_model(cls, v):
+        """Validate model name"""
+        if not Models.is_valid_model(v):
+            supported = Models.get_all_model_names()
+            raise HTTPException(
+                status_code=400,
+                detail=f"Unsupported model '{v}', supported: {', '.join(supported)}"
+            )
+        return v
+class OpenAIChatCompletionMessage(BaseModel):
+    """Chat completion message"""
+    role: str = Field(..., description="Role")
+    content: str = Field(..., description="Content")
+    reference_id: Optional[str] = Field(default=None, description="Reference ID")
+    annotations: Optional[List[str]] = Field(default=None, description="Annotations")
+class OpenAIChatCompletionChoice(BaseModel):
+    """Chat completion choice"""
+    index: int = Field(..., description="Index")
+    message: OpenAIChatCompletionMessage = Field(..., description="Message")
+    logprobs: Optional[float] = Field(default=None, description="Log probability")
+    finish_reason: str = Field(default="stop", description="Finish reason")
+class OpenAIChatCompletionResponse(BaseModel):
+    """Chat completion response"""
+    id: str = Field(..., description="Response ID")
+    object: str = Field("chat.completion", description="Object type")
+    created: int = Field(..., description="Created timestamp")
+    model: str = Field(..., description="Model")
+    choices: List[OpenAIChatCompletionChoice] = Field(..., description="Choices")
+    usage: Optional[Dict[str, Any]] = Field(None, description="Token usage")
+class OpenAIChatCompletionChunkMessage(BaseModel):
+    """Streaming message chunk"""
+    role: str = Field(..., description="Role")
+    content: str = Field(..., description="Content")
+class OpenAIChatCompletionChunkChoice(BaseModel):
+    """Streaming choice"""
+    index: int = Field(..., description="Index")
+    delta: Optional[Union[Dict[str, Any], OpenAIChatCompletionChunkMessage]] = Field(
+        None, description="Delta data"
+    )
+    finish_reason: Optional[str] = Field(None, description="Finish reason")
+class OpenAIChatCompletionChunkResponse(BaseModel):
+    """Streaming chat response"""
+    id: str = Field(..., description="Response ID")
+    object: str = Field(default="chat.completion.chunk", description="Object type")
+    created: int = Field(..., description="Created timestamp")
+    model: str = Field(..., description="Model")
+    system_fingerprint: Optional[str] = Field(default=None, description="System fingerprint")
+    choices: List[OpenAIChatCompletionChunkChoice] = Field(..., description="Choices")

app/services/api_keys.py ADDED Viewed

	@@ -0,0 +1,226 @@

+"""API key manager - multi-user key management"""
+import os
+import orjson
+import time
+import secrets
+import asyncio
+from typing import List, Dict, Optional
+from pathlib import Path
+from app.core.logger import logger
+from app.core.config import setting
+class ApiKeyManager:
+    """API key management service"""
+    _instance = None
+    def __new__(cls):
+        if cls._instance is None:
+            cls._instance = super().__new__(cls)
+        return cls._instance
+    def __init__(self):
+        if hasattr(self, '_initialized'):
+            return
+        self.file_path = self._resolve_data_dir() / "api_keys.json"
+        self._keys: List[Dict] = []
+        self._lock = asyncio.Lock()
+        self._loaded = False
+        self._storage = None
+        self._initialized = True
+        logger.debug(f"[ApiKey] Initialized: {self.file_path}")
+    @staticmethod
+    def _resolve_data_dir() -> Path:
+        """Resolve data directory for persistence."""
+        data_dir_env = os.getenv("DATA_DIR")
+        if data_dir_env:
+            return Path(data_dir_env)
+        if Path("/data").exists():
+            return Path("/data")
+        return Path(__file__).parents[2] / "data"
+    def set_storage(self, storage) -> None:
+        """Set storage instance"""
+        self._storage = storage
+        data_dir = getattr(storage, "data_dir", None)
+        if data_dir:
+            self.file_path = Path(data_dir) / "api_keys.json"
+    def _use_storage(self) -> bool:
+        return bool(self._storage and hasattr(self._storage, "load_api_keys") and hasattr(self._storage, "save_api_keys"))
+    async def init(self):
+        """Initialize and load data"""
+        if not self._loaded:
+            await self._load_data()
+    async def _load_data(self):
+        """Load API keys"""
+        if self._loaded:
+            return
+        try:
+            if self._use_storage():
+                self._keys = await self._storage.load_api_keys()
+                self._loaded = True
+                logger.debug(f"[ApiKey] Loaded {len(self._keys)} API keys (storage)")
+                return
+            if not self.file_path.exists():
+                self._keys = []
+                self._loaded = True
+                return
+            async with self._lock:
+                content = await asyncio.to_thread(self.file_path.read_bytes)
+                if content:
+                    self._keys = orjson.loads(content)
+                    self._loaded = True
+                    logger.debug(f"[ApiKey] Loaded {len(self._keys)} API keys")
+        except Exception as e:
+            logger.error(f"[ApiKey] Load failed: {e}")
+            self._keys = []
+            self._loaded = True  # Prevent overwrite if load fails
+    async def _save_data(self):
+        """Save API keys"""
+        if not self._loaded:
+            logger.warning("[ApiKey] Save skipped because data is not loaded to avoid overwrite")
+            return
+        try:
+            if self._use_storage():
+                await self._storage.save_api_keys(self._keys)
+                return
+            # Ensure directory exists
+            self.file_path.parent.mkdir(parents=True, exist_ok=True)
+            async with self._lock:
+                content = orjson.dumps(self._keys, option=orjson.OPT_INDENT_2)
+                await asyncio.to_thread(self.file_path.write_bytes, content)
+        except Exception as e:
+            logger.error(f"[ApiKey] Save failed: {e}")
+    def generate_key(self) -> str:
+        """Generate a new sk- prefixed key"""
+        return f"sk-{secrets.token_urlsafe(24)}"
+    async def add_key(self, name: str) -> Dict:
+        """Add API key"""
+        new_key = {
+            "key": self.generate_key(),
+            "name": name,
+            "created_at": int(time.time()),
+            "is_active": True
+        }
+        self._keys.append(new_key)
+        await self._save_data()
+        logger.info(f"[ApiKey] Added new key: {name}")
+        return new_key
+    async def batch_add_keys(self, name_prefix: str, count: int) -> List[Dict]:
+        """Batch add API keys"""
+        new_keys = []
+        for i in range(1, count + 1):
+            name = f"{name_prefix}-{i}" if count > 1 else name_prefix
+            new_keys.append({
+                "key": self.generate_key(),
+                "name": name,
+                "created_at": int(time.time()),
+                "is_active": True
+            })
+        self._keys.extend(new_keys)
+        await self._save_data()
+        logger.info(f"[ApiKey] Batch added {count} keys, prefix: {name_prefix}")
+        return new_keys
+    async def delete_key(self, key: str) -> bool:
+        """Delete API key"""
+        initial_len = len(self._keys)
+        self._keys = [k for k in self._keys if k["key"] != key]
+        if len(self._keys) != initial_len:
+            await self._save_data()
+            logger.info(f"[ApiKey] Deleted key: {key[:10]}...")
+            return True
+        return False
+    async def batch_delete_keys(self, keys: List[str]) -> int:
+        """Batch delete API keys"""
+        initial_len = len(self._keys)
+        self._keys = [k for k in self._keys if k["key"] not in keys]
+        deleted_count = initial_len - len(self._keys)
+        if deleted_count > 0:
+            await self._save_data()
+            logger.info(f"[ApiKey] Batch deleted {deleted_count} keys")
+        return deleted_count
+    async def update_key_status(self, key: str, is_active: bool) -> bool:
+        """Update key status"""
+        for k in self._keys:
+            if k["key"] == key:
+                k["is_active"] = is_active
+                await self._save_data()
+                return True
+        return False
+    async def batch_update_keys_status(self, keys: List[str], is_active: bool) -> int:
+        """Batch update key status"""
+        updated_count = 0
+        for k in self._keys:
+            if k["key"] in keys:
+                if k["is_active"] != is_active:
+                    k["is_active"] = is_active
+                    updated_count += 1
+        if updated_count > 0:
+            await self._save_data()
+            logger.info(f"[ApiKey] Batch updated {updated_count} keys to: {is_active}")
+        return updated_count
+    async def update_key_name(self, key: str, name: str) -> bool:
+        """Update key note"""
+        for k in self._keys:
+            if k["key"] == key:
+                k["name"] = name
+                await self._save_data()
+                return True
+        return False
+    def validate_key(self, key: str) -> Optional[Dict]:
+        """Validate key and return key info"""
+        # 1. Check global config key (default admin key)
+        global_key = setting.grok_config.get("api_key")
+        if global_key and key == global_key:
+            return {
+                "key": global_key,
+                "name": "Default admin",
+                "is_active": True,
+                "is_admin": True
+            }
+        # 2. Check multi-key list
+        for k in self._keys:
+            if k["key"] == key:
+                if k["is_active"]:
+                    return {**k, "is_admin": False}  # Keys are not treated as admins for now
+                return None
+        return None
+    def get_all_keys(self) -> List[Dict]:
+        """Get all keys"""
+        return self._keys
+# Global instance
+api_key_manager = ApiKeyManager()

app/services/grok/cache.py ADDED Viewed

	@@ -0,0 +1,243 @@

+"""Cache service module - download, cache, and clean images and videos"""
+import asyncio
+import base64
+from pathlib import Path
+from typing import Optional, Tuple
+from curl_cffi.requests import AsyncSession
+from app.core.config import setting
+from app.core.logger import logger
+from app.services.grok.statsig import get_dynamic_headers
+# Constants
+MIME_TYPES = {
+    '.jpg': 'image/jpeg', '.jpeg': 'image/jpeg', '.png': 'image/png',
+    '.gif': 'image/gif', '.webp': 'image/webp', '.bmp': 'image/bmp',
+}
+DEFAULT_MIME = 'image/jpeg'
+ASSETS_URL = "https://assets.grok.com"
+class CacheService:
+    """Cache service base class"""
+    def __init__(self, cache_type: str, timeout: float = 30.0):
+        self.cache_type = cache_type
+        self.cache_dir = Path(f"data/temp/{cache_type}")
+        self.cache_dir.mkdir(parents=True, exist_ok=True)
+        self.timeout = timeout
+        self._cleanup_lock = asyncio.Lock()
+    def _get_path(self, file_path: str) -> Path:
+        """Convert file path to cache path"""
+        return self.cache_dir / file_path.lstrip('/').replace('/', '-')
+    def _log(self, level: str, msg: str):
+        """Unified log output"""
+        getattr(logger, level)(f"[{self.cache_type.upper()}Cache] {msg}")
+    def _build_headers(self, file_path: str, auth_token: str) -> dict:
+        """Build request headers"""
+        cf = setting.grok_config.get("cf_clearance", "")
+        return {
+            **get_dynamic_headers(pathname=file_path),
+            "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
+            "Sec-Fetch-Dest": "document",
+            "Sec-Fetch-Mode": "navigate",
+            "Sec-Fetch-Site": "same-site",
+            "Sec-Fetch-User": "?1",
+            "Upgrade-Insecure-Requests": "1",
+            "Referer": "https://grok.com/",
+            "Cookie": f"{auth_token};{cf}" if cf else auth_token
+        }
+    async def download(self, file_path: str, auth_token: str, timeout: Optional[float] = None) -> Optional[Path]:
+        """Download and cache file"""
+        cache_path = self._get_path(file_path)
+        if cache_path.exists():
+            self._log("debug", "File already cached")
+            return cache_path
+        # Outer retry: configurable status codes (401/429, etc)
+        retry_codes = setting.grok_config.get("retry_status_codes", [401, 429])
+        MAX_OUTER_RETRY = 3
+        for outer_retry in range(MAX_OUTER_RETRY + 1):  # +1 ensures 3 retries
+            try:
+                # Inner retry: 403 retry (cache uses cache proxy only, no pool)
+                max_403_retries = 5
+                retry_403_count = 0
+                while retry_403_count <= max_403_retries:
+                    proxy = await setting.get_proxy_async("cache")
+                    proxies = {"http": proxy, "https": proxy} if proxy else {}
+                    if proxy and outer_retry == 0 and retry_403_count == 0:
+                        self._log("debug", f"Using proxy: {proxy.split('@')[-1] if '@' in proxy else proxy}")
+                    async with AsyncSession() as session:
+                        url = f"{ASSETS_URL}{file_path}"
+                        if outer_retry == 0 and retry_403_count == 0:
+                            self._log("debug", f"Downloading: {url}")
+                        response = await session.get(
+                            url,
+                            headers=self._build_headers(file_path, auth_token),
+                            proxies=proxies,
+                            timeout=timeout or self.timeout,
+                            allow_redirects=True,
+                            impersonate="chrome133a"
+                        )
+                        # Check 403 error - inner retry (cache does not use proxy pool)
+                        if response.status_code == 403:
+                            retry_403_count += 1
+                            if retry_403_count <= max_403_retries:
+                                self._log("warning", f"403 error, retrying ({retry_403_count}/{max_403_retries})...")
+                                await asyncio.sleep(0.5)
+                                continue
+                            self._log("error", f"403 error, retried {retry_403_count-1} times, giving up")
+                            return None
+                        # Check retryable status codes - outer retry
+                        if response.status_code in retry_codes:
+                            if outer_retry < MAX_OUTER_RETRY:
+                                delay = (outer_retry + 1) * 0.1  # Progressive delay: 0.1s, 0.2s, 0.3s
+                                self._log("warning", f"{response.status_code} error, outer retry ({outer_retry+1}/{MAX_OUTER_RETRY}), waiting {delay}s...")
+                                await asyncio.sleep(delay)
+                                break  # Exit inner loop, retry outer
+                            else:
+                                self._log("error", f"{response.status_code} error, retried {outer_retry} times, giving up")
+                                return None
+                        response.raise_for_status()
+                        await asyncio.to_thread(cache_path.write_bytes, response.content)
+                        if outer_retry > 0 or retry_403_count > 0:
+                            self._log("info", "Retry succeeded")
+                        else:
+                            self._log("debug", "Cached successfully")
+                        # Async cleanup (with error handling)
+                        asyncio.create_task(self._safe_cleanup())
+                        return cache_path
+            except Exception as e:
+                if outer_retry < MAX_OUTER_RETRY - 1:
+                    self._log("warning", f"Download error: {e}, outer retry ({outer_retry+1}/{MAX_OUTER_RETRY})...")
+                    await asyncio.sleep(0.5)
+                    continue
+                self._log("error", f"Download failed: {e} (retried {outer_retry} times)")
+                return None
+        return None
+    def get_cached(self, file_path: str) -> Optional[Path]:
+        """Get cached file"""
+        path = self._get_path(file_path)
+        return path if path.exists() else None
+    async def _safe_cleanup(self):
+        """Safe cleanup (catch exceptions)"""
+        try:
+            await self.cleanup()
+        except Exception as e:
+            self._log("error", f"Background cleanup failed: {e}")
+    async def cleanup(self):
+        """Clean cache when size exceeds limit"""
+        if self._cleanup_lock.locked():
+            return
+        async with self._cleanup_lock:
+            try:
+                max_mb = setting.global_config.get(f"{self.cache_type}_cache_max_size_mb", 500)
+                max_bytes = max_mb * 1024 * 1024
+                # Get file info (path, size, mtime)
+                files = [(f, (s := f.stat()).st_size, s.st_mtime)
+                        for f in self.cache_dir.glob("*") if f.is_file()]
+                total = sum(size for _, size, _ in files)
+                if total <= max_bytes:
+                    return
+                self._log("info", f"Cleaning cache {total/1024/1024:.1f}MB -> {max_mb}MB")
+                # Delete oldest files
+                for path, size, _ in sorted(files, key=lambda x: x[2]):
+                    if total <= max_bytes:
+                        break
+                    await asyncio.to_thread(path.unlink)
+                    total -= size
+                self._log("info", f"Cleanup complete: {total/1024/1024:.1f}MB")
+            except Exception as e:
+                self._log("error", f"Cleanup failed: {e}")
+class ImageCache(CacheService):
+    """Image cache service"""
+    def __init__(self):
+        super().__init__("image", timeout=30.0)
+    async def download_image(self, path: str, token: str) -> Optional[Path]:
+        """Download image"""
+        return await self.download(path, token)
+    @staticmethod
+    def to_base64(image_path: Path) -> Optional[str]:
+        """Convert image to base64"""
+        try:
+            if not image_path.exists():
+                logger.error(f"[ImageCache] File not found: {image_path}")
+                return None
+            data = base64.b64encode(image_path.read_bytes()).decode()
+            mime = MIME_TYPES.get(image_path.suffix.lower(), DEFAULT_MIME)
+            return f"data:{mime};base64,{data}"
+        except Exception as e:
+            logger.error(f"[ImageCache] Conversion failed: {e}")
+            return None
+    async def download_base64(self, path: str, token: str) -> Optional[str]:
+        """Download and convert to base64 (auto-delete temp file)"""
+        try:
+            cache_path = await self.download(path, token)
+            if not cache_path:
+                return None
+            result = self.to_base64(cache_path)
+            # Clean temp file
+            try:
+                cache_path.unlink()
+            except Exception as e:
+                logger.warning(f"[ImageCache] Failed to delete temp file: {e}")
+            return result
+        except Exception as e:
+            logger.error(f"[ImageCache] Base64 download failed: {e}")
+            return None
+class VideoCache(CacheService):
+    """Video cache service"""
+    def __init__(self):
+        super().__init__("video", timeout=60.0)
+    async def download_video(self, path: str, token: str) -> Optional[Path]:
+        """Download video"""
+        return await self.download(path, token)
+# Global instance
+image_cache_service = ImageCache()
+video_cache_service = VideoCache()

app/services/grok/client.py ADDED Viewed

	@@ -0,0 +1,386 @@

+"""Grok API client - convert OpenAI requests and process responses"""
+import asyncio
+import orjson
+from typing import Dict, List, Tuple, Any, Optional
+from curl_cffi.requests import AsyncSession as curl_AsyncSession
+from app.core.config import setting
+from app.core.logger import logger
+from app.models.grok_models import Models
+from app.services.grok.processer import GrokResponseProcessor
+from app.services.images.normalize import normalize_messages
+from app.services.grok.statsig import get_dynamic_headers
+from app.services.grok.token import token_manager
+from app.services.grok.upload import ImageUploadManager
+from app.services.grok.create import PostCreateManager
+from app.core.exception import GrokApiException
+# Constants
+API_ENDPOINT = "https://grok.com/rest/app-chat/conversations/new"
+TIMEOUT = 120
+BROWSER = "chrome133a"
+MAX_RETRY = 3
+MAX_UPLOADS = 20  # Increase upload concurrency for higher throughput
+class GrokClient:
+    """Grok API client"""
+    _upload_sem = None  # Lazy init
+    @staticmethod
+    def _get_upload_semaphore():
+        """Get upload semaphore (dynamic config)"""
+        if GrokClient._upload_sem is None:
+            # Read from config or use default
+            max_concurrency = setting.global_config.get("max_upload_concurrency", MAX_UPLOADS)
+            GrokClient._upload_sem = asyncio.Semaphore(max_concurrency)
+            logger.debug(f"[Client] Initialized upload concurrency: {max_concurrency}")
+        return GrokClient._upload_sem
+    @staticmethod
+    async def openai_to_grok(request: dict):
+        """Convert OpenAI request to Grok request"""
+        model = request["model"]
+        normalized_messages = normalize_messages(
+            request.get("messages", []),
+            image_url=request.get("image_url"),
+            image_urls=request.get("image_urls"),
+        )
+        content, images = GrokClient._extract_content(normalized_messages)
+        stream = request.get("stream", False)
+        statsig_id = request.get("x_statsig_id") or ""
+        # Get model info
+        info = Models.get_model_info(model)
+        grok_model, mode = Models.to_grok(model)
+        is_video = info.get("is_video_model", False)
+        logger.debug(f"[Client] Model selected: {model} -> {grok_model} ({mode})")
+        if images and model != "grok-imagine-0.9":
+            raise GrokApiException(
+                "Images require model grok-imagine-0.9",
+                "MODEL_MISMATCH",
+                {"model": model}
+            )
+        if images:
+            logger.debug("[Client] Image mode enabled")
+        # Video model limitation
+        if is_video and len(images) > 1:
+            logger.warning("[Client] Video model supports only 1 image; using the first one")
+            images = images[:1]
+        return await GrokClient._retry(model, content, images, grok_model, mode, is_video, stream, statsig_id)
+    @staticmethod
+    async def _retry(model: str, content: str, images: List[str], grok_model: str, mode: str, is_video: bool, stream: bool, statsig_id: str):
+        """Retry request"""
+        last_err = None
+        for i in range(MAX_RETRY):
+            try:
+                token = await token_manager.get_token(model)
+                img_ids, img_uris = await GrokClient._upload(images, token, statsig_id)
+                # Create session for video model
+                post_id = None
+                if is_video and img_ids and img_uris:
+                    post_id = await GrokClient._create_post(img_ids[0], img_uris[0], token, statsig_id)
+                payload = GrokClient._build_payload(content, grok_model, mode, img_ids, img_uris, is_video, post_id)
+                return await GrokClient._request(payload, token, model, stream, post_id, statsig_id)
+            except GrokApiException as e:
+                last_err = e
+                # Check if retryable
+                if e.error_code not in ["HTTP_ERROR", "NO_AVAILABLE_TOKEN"]:
+                    raise
+                status = e.context.get("status") if e.context else None
+                retry_codes = setting.grok_config.get("retry_status_codes", [401, 429])
+                if status not in retry_codes:
+                    raise
+                if i < MAX_RETRY - 1:
+                    logger.warning(f"[Client] Failed (status: {status}), retry {i+1}/{MAX_RETRY}")
+                    await asyncio.sleep(0.5)
+        raise last_err or GrokApiException("Request failed", "REQUEST_ERROR")
+    @staticmethod
+    def _extract_content(messages: List[Dict]) -> Tuple[str, List[str]]:
+        """Extract text and images, preserving role structure"""
+        formatted_messages = []
+        images = []
+        # Role mapping
+        role_map = {
+            "system": "System",
+            "user": "User",
+            "assistant": "grok"
+        }
+        for msg in messages:
+            role = msg.get("role", "user")
+            content = msg.get("content", "")
+            role_prefix = role_map.get(role, role)
+            # Extract text content
+            text_parts = []
+            if isinstance(content, list):
+                for item in content:
+                    if item.get("type") == "text":
+                        text_parts.append(item.get("text", ""))
+                    elif item.get("type") == "image_url":
+                        image_url = item.get("image_url")
+                        if isinstance(image_url, dict):
+                            url = image_url.get("url")
+                        else:
+                            url = image_url
+                        if url:
+                            images.append(url)
+            else:
+                text_parts.append(content)
+            # Merge text and add role prefix
+            msg_text = "".join(text_parts).strip()
+            if msg_text:
+                formatted_messages.append(f"{role_prefix}: {msg_text}")
+        # Join all messages with newlines
+        return "\n".join(formatted_messages), images
+    @staticmethod
+    async def _upload(urls: List[str], token: str, statsig_id: str = "") -> Tuple[List[str], List[str]]:
+        """Upload images concurrently"""
+        if not urls:
+            return [], []
+        async def upload_limited(url):
+            async with GrokClient._get_upload_semaphore():
+                return await ImageUploadManager.upload(url, token, statsig_id)
+        results = await asyncio.gather(*[upload_limited(u) for u in urls], return_exceptions=True)
+        ids, uris = [], []
+        for url, result in zip(urls, results):
+            if isinstance(result, Exception):
+                logger.warning(f"[Client] Upload failed: {url} - {result}")
+                if isinstance(result, GrokApiException):
+                    raise result
+                raise GrokApiException("Image upload failed", "IMAGE_UPLOAD_FAILED", {"url": url})
+            elif isinstance(result, tuple) and len(result) == 2:
+                fid, furi = result
+                if fid:
+                    ids.append(fid)
+                    uris.append(furi)
+                else:
+                    raise GrokApiException("Image upload failed", "IMAGE_UPLOAD_FAILED", {"url": url})
+        return ids, uris
+    @staticmethod
+    async def _create_post(file_id: str, file_uri: str, token: str, statsig_id: str = "") -> Optional[str]:
+        """Create video session"""
+        try:
+            result = await PostCreateManager.create(file_id, file_uri, token, statsig_id)
+            if result and result.get("success"):
+                return result.get("post_id")
+        except Exception as e:
+            logger.warning(f"[Client] Failed to create session: {e}")
+        return None
+    @staticmethod
+    def _build_payload(content: str, model: str, mode: str, img_ids: List[str], img_uris: List[str], is_video: bool = False, post_id: str = None) -> Dict:
+        """Build request payload"""
+        # Special handling for video models
+        if is_video and img_uris:
+            img_msg = f"https://grok.com/imagine/{post_id}" if post_id else f"https://assets.grok.com/post/{img_uris[0]}"
+            return {
+                "temporary": True,
+                "modelName": "grok-3",
+                "message": f"{img_msg}  {content} --mode=custom",
+                "fileAttachments": img_ids,
+                "toolOverrides": {"videoGen": True}
+            }
+        # Standard payload
+        return {
+            "temporary": setting.grok_config.get("temporary", True),
+            "modelName": model,
+            "message": content,
+            "fileAttachments": img_ids,
+            "imageAttachments": [],
+            "disableSearch": False,
+            "enableImageGeneration": True,
+            "returnImageBytes": False,
+            "returnRawGrokInXaiRequest": False,
+            "enableImageStreaming": True,
+            "imageGenerationCount": 2,
+            "forceConcise": False,
+            "toolOverrides": {},
+            "enableSideBySide": True,
+            "sendFinalMetadata": True,
+            "isReasoning": False,
+            "webpageUrls": [],
+            "disableTextFollowUps": True,
+            "responseMetadata": {"requestModelDetails": {"modelId": model}},
+            "disableMemory": False,
+            "forceSideBySide": False,
+            "modelMode": mode,
+            "isAsyncChat": False
+        }
+    @staticmethod
+    async def _request(payload: dict, token: str, model: str, stream: bool, post_id: str = None, statsig_id: str = ""):
+        """Send request"""
+        if not token:
+            raise GrokApiException("Missing authentication token", "NO_AUTH_TOKEN")
+        # Outer retry: configurable status codes (401/429, etc)
+        retry_codes = setting.grok_config.get("retry_status_codes", [401, 429])
+        MAX_OUTER_RETRY = 3
+        for outer_retry in range(MAX_OUTER_RETRY + 1):  # +1 ensures 3 retries
+            # Inner retry: 403 with proxy pool
+            max_403_retries = 5
+            retry_403_count = 0
+            while retry_403_count <= max_403_retries:
+                # Fetch proxy asynchronously
+                from app.core.proxy_pool import proxy_pool
+                # If retrying 403 with proxy pool, force refresh
+                if retry_403_count > 0 and proxy_pool._enabled:
+                    logger.info(f"[Client] 403 retry {retry_403_count}/{max_403_retries}, refreshing proxy...")
+                    proxy = await proxy_pool.force_refresh()
+                else:
+                    proxy = await setting.get_proxy_async("service")
+                proxies = {"http": proxy, "https": proxy} if proxy else None
+                # Build headers (inside loop for token retry)
+                headers = GrokClient._build_headers(token, statsig_id)
+                if model == "grok-imagine-0.9":
+                    file_attachments = payload.get("fileAttachments", [])
+                    ref_id = post_id or (file_attachments[0] if file_attachments else "")
+                    if ref_id:
+                        headers["Referer"] = f"https://grok.com/imagine/{ref_id}"
+                # Create session and send request
+                session = curl_AsyncSession(impersonate=BROWSER)
+                try:
+                    response = await session.post(
+                        API_ENDPOINT,
+                        headers=headers,
+                        data=orjson.dumps(payload),
+                        timeout=TIMEOUT,
+                        stream=True,
+                        proxies=proxies
+                    )
+                    # Inner 403 retry: only when proxy pool is enabled
+                    if response.status_code == 403 and proxy_pool._enabled:
+                        retry_403_count += 1
+                        if retry_403_count <= max_403_retries:
+                            logger.warning(f"[Client] 403 error, retrying ({retry_403_count}/{max_403_retries})...")
+                            await session.close()
+                            await asyncio.sleep(0.5)
+                            continue
+                        logger.error(f"[Client] 403 error, retried {retry_403_count-1} times, giving up")
+                    # Check retryable status codes - outer retry
+                    if response.status_code in retry_codes:
+                        if outer_retry < MAX_OUTER_RETRY:
+                            delay = (outer_retry + 1) * 0.1
+                            logger.warning(f"[Client] {response.status_code} error, outer retry ({outer_retry+1}/{MAX_OUTER_RETRY}), waiting {delay}s...")
+                            await session.close()
+                            await asyncio.sleep(delay)
+                            break  # Exit inner loop for outer retry
+                        else:
+                            logger.error(f"[Client] {response.status_code} error, retried {outer_retry} times, giving up")
+                            try:
+                                GrokClient._handle_error(response, token)
+                            finally:
+                                await session.close()
+                    # Check other response statuses
+                    if response.status_code != 200:
+                        try:
+                            GrokClient._handle_error(response, token)
+                        finally:
+                            await session.close()
+                    # Success - reset failure count
+                    asyncio.create_task(token_manager.reset_failure(token))
+                    if outer_retry > 0 or retry_403_count > 0:
+                        logger.info("[Client] Retry succeeded")
+                    # Process response
+                    if stream:
+                        # Streaming response closes session via iterator
+                        result = GrokResponseProcessor.process_stream(response, token, session)
+                    else:
+                        # Non-streaming closes session after processing
+                        try:
+                            result = await GrokResponseProcessor.process_normal(response, token, model)
+                        finally:
+                            await session.close()
+                    asyncio.create_task(GrokClient._update_limits(token, model))
+                    return result
+                except Exception as e:
+                    await session.close()
+                    if "RequestsError" in str(type(e)):
+                        logger.error(f"[Client] Network error: {e}")
+                        raise GrokApiException(f"Network error: {e}", "NETWORK_ERROR") from e
+                    raise
+        raise GrokApiException("Request failed: maximum retries reached", "MAX_RETRIES_EXCEEDED")
+    @staticmethod
+    def _build_headers(token: str, statsig_id: str = "") -> Dict[str, str]:
+        """Build request headers"""
+        headers = get_dynamic_headers("/rest/app-chat/conversations/new", statsig_id=statsig_id)
+        cf = setting.grok_config.get("cf_clearance", "")
+        headers["Cookie"] = f"{token};{cf}" if cf else token
+        return headers
+    @staticmethod
+    def _handle_error(response, token: str):
+        """Handle error"""
+        if response.status_code == 403:
+            msg = "Your IP was blocked. Try: 1) change IP 2) use proxy 3) set CF clearance"
+            data = {"cf_blocked": True, "status": 403}
+            logger.warning(f"[Client] {msg}")
+        else:
+            try:
+                data = response.json()
+                msg = str(data)
+            except:
+                data = response.text
+                msg = data[:200] if data else "Unknown error"
+        asyncio.create_task(token_manager.record_failure(token, response.status_code, msg))
+        asyncio.create_task(token_manager.apply_cooldown(token, response.status_code))
+        raise GrokApiException(
+            f"Request failed: {response.status_code} - {msg}",
+            "HTTP_ERROR",
+            {"status": response.status_code, "data": data}
+        )
+    @staticmethod
+    async def _update_limits(token: str, model: str):
+        """Update rate limits"""
+        try:
+            await token_manager.check_limits(token, model)
+        except Exception as e:
+            logger.error(f"[Client] Failed to update limits: {e}")

app/services/grok/create.py ADDED Viewed

	@@ -0,0 +1,140 @@

+"""Post creation manager - create session before video generation"""
+import asyncio
+import orjson
+from typing import Dict, Any, Optional
+from curl_cffi.requests import AsyncSession
+from app.services.grok.statsig import get_dynamic_headers
+from app.core.exception import GrokApiException
+from app.core.config import setting
+from app.core.logger import logger
+# Constants
+ENDPOINT = "https://grok.com/rest/media/post/create"
+TIMEOUT = 30
+BROWSER = "chrome133a"
+class PostCreateManager:
+    """Session creation manager"""
+    @staticmethod
+    async def create(file_id: str, file_uri: str, auth_token: str, statsig_id: str = "") -> Optional[Dict[str, Any]]:
+        """Create session record
+        Args:
+            file_id: File ID
+            file_uri: File URI
+            auth_token: Auth token
+        Returns:
+            Session info dict containing post_id, etc.
+        """
+        # Parameter validation
+        if not file_id or not file_uri:
+            raise GrokApiException("Missing file ID or URI", "INVALID_PARAMS")
+        if not auth_token:
+            raise GrokApiException("Missing auth token", "NO_AUTH_TOKEN")
+        try:
+            # Build request
+            data = {
+                "media_url": f"https://assets.grok.com/{file_uri}",
+                "media_type": "MEDIA_POST_TYPE_IMAGE"
+            }
+            cf = setting.grok_config.get("cf_clearance", "")
+            headers = {
+                **get_dynamic_headers("/rest/media/post/create", statsig_id=statsig_id),
+                "Cookie": f"{auth_token};{cf}" if cf else auth_token
+            }
+            # Outer retry: configurable status codes (401/429, etc)
+            retry_codes = setting.grok_config.get("retry_status_codes", [401, 429])
+            MAX_OUTER_RETRY = 3
+            for outer_retry in range(MAX_OUTER_RETRY + 1):  # +1 ensures 3 retries
+                # Inner retry: 403 with proxy pool
+                max_403_retries = 5
+                retry_403_count = 0
+                while retry_403_count <= max_403_retries:
+                    # Fetch proxy asynchronously (proxy pool supported)
+                    from app.core.proxy_pool import proxy_pool
+                    # If retrying 403 with proxy pool, force refresh
+                    if retry_403_count > 0 and proxy_pool._enabled:
+                        logger.info(f"[PostCreate] 403 retry {retry_403_count}/{max_403_retries}, refreshing proxy...")
+                        proxy = await proxy_pool.force_refresh()
+                    else:
+                        proxy = await setting.get_proxy_async("service")
+                    proxies = {"http": proxy, "https": proxy} if proxy else None
+                    # Send request
+                    async with AsyncSession() as session:
+                        response = await session.post(
+                            ENDPOINT,
+                            headers=headers,
+                            json=data,
+                            impersonate=BROWSER,
+                            timeout=TIMEOUT,
+                            proxies=proxies
+                        )
+                        # Inner 403 retry: only when proxy pool is enabled
+                        if response.status_code == 403 and proxy_pool._enabled:
+                            retry_403_count += 1
+                            if retry_403_count <= max_403_retries:
+                                logger.warning(f"[PostCreate] 403 error, retrying ({retry_403_count}/{max_403_retries})...")
+                                await asyncio.sleep(0.5)
+                                continue
+                            # All inner retries failed
+                            logger.error(f"[PostCreate] 403 error, retried {retry_403_count-1} times, giving up")
+                        # Check retryable status codes - outer retry
+                        if response.status_code in retry_codes:
+                            if outer_retry < MAX_OUTER_RETRY:
+                                delay = (outer_retry + 1) * 0.1  # Progressive delay: 0.1s, 0.2s, 0.3s
+                                logger.warning(f"[PostCreate] {response.status_code} error, outer retry ({outer_retry+1}/{MAX_OUTER_RETRY}), waiting {delay}s...")
+                                await asyncio.sleep(delay)
+                                break  # Exit inner loop for outer retry
+                            else:
+                                logger.error(f"[PostCreate] {response.status_code} error, retried {outer_retry} times, giving up")
+                                raise GrokApiException(f"Create failed: {response.status_code} error", "CREATE_ERROR")
+                        if response.status_code == 200:
+                            result = response.json()
+                            post_id = result.get("post", {}).get("id", "")
+                            if outer_retry > 0 or retry_403_count > 0:
+                                logger.info("[PostCreate] Retry succeeded")
+                            logger.debug(f"[PostCreate] Success, session ID: {post_id}")
+                            return {
+                                "post_id": post_id,
+                                "file_id": file_id,
+                                "file_uri": file_uri,
+                                "success": True,
+                                "data": result
+                            }
+                        # Other error handling
+                        try:
+                            error = response.json()
+                            msg = f"Status: {response.status_code}, details: {error}"
+                        except:
+                            msg = f"Status: {response.status_code}, details: {response.text[:200]}"
+                        logger.error(f"[PostCreate] Failed: {msg}")
+                        raise GrokApiException(f"Create failed: {msg}", "CREATE_ERROR")
+        except GrokApiException:
+            raise
+        except Exception as e:
+            logger.error(f"[PostCreate] Error: {e}")
+            raise GrokApiException(f"Create error: {e}", "CREATE_ERROR") from e

app/services/grok/processer.py ADDED Viewed

	@@ -0,0 +1,430 @@

+"""Grok API response processor - streaming and non-streaming responses"""
+import orjson
+import uuid
+import time
+import asyncio
+from typing import AsyncGenerator, Tuple, Any
+from app.core.config import setting
+from app.core.exception import GrokApiException
+from app.core.logger import logger
+from app.models.openai_schema import (
+    OpenAIChatCompletionResponse,
+    OpenAIChatCompletionChoice,
+    OpenAIChatCompletionMessage,
+    OpenAIChatCompletionChunkResponse,
+    OpenAIChatCompletionChunkChoice,
+    OpenAIChatCompletionChunkMessage
+)
+from app.services.grok.cache import image_cache_service, video_cache_service
+class StreamTimeoutManager:
+    """Streaming response timeout manager"""
+    def __init__(self, chunk_timeout: int = 120, first_timeout: int = 30, total_timeout: int = 600):
+        self.chunk_timeout = chunk_timeout
+        self.first_timeout = first_timeout
+        self.total_timeout = total_timeout
+        self.start_time = asyncio.get_event_loop().time()
+        self.last_chunk_time = self.start_time
+        self.first_received = False
+    def check_timeout(self) -> Tuple[bool, str]:
+        """Check timeouts"""
+        now = asyncio.get_event_loop().time()
+        if not self.first_received and now - self.start_time > self.first_timeout:
+            return True, f"First response timeout ({self.first_timeout}s)"
+        if self.total_timeout > 0 and now - self.start_time > self.total_timeout:
+            return True, f"Total timeout ({self.total_timeout}s)"
+        if self.first_received and now - self.last_chunk_time > self.chunk_timeout:
+            return True, f"Chunk timeout ({self.chunk_timeout}s)"
+        return False, ""
+    def mark_received(self):
+        """Mark data received"""
+        self.last_chunk_time = asyncio.get_event_loop().time()
+        self.first_received = True
+    def duration(self) -> float:
+        """Get total duration"""
+        return asyncio.get_event_loop().time() - self.start_time
+class GrokResponseProcessor:
+    """Grok response processor"""
+    @staticmethod
+    async def process_normal(response, auth_token: str, model: str = None) -> OpenAIChatCompletionResponse:
+        """Process non-streaming response"""
+        response_closed = False
+        try:
+            async for chunk in response.aiter_lines():
+                if not chunk:
+                    continue
+                data = orjson.loads(chunk)
+                # Error check
+                if error := data.get("error"):
+                    raise GrokApiException(
+                        f"API error: {error.get('message', 'Unknown error')}",
+                        "API_ERROR",
+                        {"code": error.get("code")}
+                    )
+                grok_resp = data.get("result", {}).get("response", {})
+                # Video response
+                if video_resp := grok_resp.get("streamingVideoGenerationResponse"):
+                    if video_url := video_resp.get("videoUrl"):
+                        content = await GrokResponseProcessor._build_video_content(video_url, auth_token)
+                        result = GrokResponseProcessor._build_response(content, model or "grok-imagine-0.9")
+                        response_closed = True
+                        response.close()
+                        return result
+                # Model response
+                model_response = grok_resp.get("modelResponse")
+                if not model_response:
+                    continue
+                if error_msg := model_response.get("error"):
+                    raise GrokApiException(f"Model error: {error_msg}", "MODEL_ERROR")
+                # Build content
+                content = model_response.get("message", "")
+                model_name = model_response.get("model")
+                # Process images
+                if images := model_response.get("generatedImageUrls"):
+                    content = await GrokResponseProcessor._append_images(content, images, auth_token)
+                result = GrokResponseProcessor._build_response(content, model_name)
+                response_closed = True
+                response.close()
+                return result
+            raise GrokApiException("No response data", "NO_RESPONSE")
+        except orjson.JSONDecodeError as e:
+            logger.error(f"[Processor] JSON parse failed: {e}")
+            raise GrokApiException(f"JSON parse failed: {e}", "JSON_ERROR") from e
+        except Exception as e:
+            logger.error(f"[Processor] Processing error: {type(e).__name__}: {e}")
+            raise GrokApiException(f"Response processing error: {e}", "PROCESS_ERROR") from e
+        finally:
+            if not response_closed and hasattr(response, 'close'):
+                try:
+                    response.close()
+                except Exception as e:
+                    logger.warning(f"[Processor] Failed to close response: {e}")
+    @staticmethod
+    async def process_stream(response, auth_token: str, session: Any = None) -> AsyncGenerator[str, None]:
+        """Process streaming response"""
+        # State variables
+        is_image = False
+        is_thinking = False
+        thinking_finished = False
+        model = None
+        filtered_tags = setting.grok_config.get("filtered_tags", "").split(",")
+        video_progress_started = False
+        last_video_progress = -1
+        response_closed = False
+        show_thinking = setting.grok_config.get("show_thinking", True)
+        # Timeout management
+        timeout_mgr = StreamTimeoutManager(
+            chunk_timeout=setting.grok_config.get("stream_chunk_timeout", 120),
+            first_timeout=setting.grok_config.get("stream_first_response_timeout", 30),
+            total_timeout=setting.grok_config.get("stream_total_timeout", 600)
+        )
+        def make_chunk(content: str, finish: str = None):
+            """Build response chunk"""
+            chunk_data = OpenAIChatCompletionChunkResponse(
+                id=f"chatcmpl-{uuid.uuid4()}",
+                created=int(time.time()),
+                model=model or "grok-4-mini-thinking-tahoe",
+                choices=[OpenAIChatCompletionChunkChoice(
+                    index=0,
+                    delta=OpenAIChatCompletionChunkMessage(
+                        role="assistant",
+                        content=content
+                    ) if content else {},
+                    finish_reason=finish
+                )]
+            )
+            return f"data: {chunk_data.model_dump_json()}\n\n"
+        try:
+            async for chunk in response.aiter_lines():
+                # Timeout check
+                is_timeout, timeout_msg = timeout_mgr.check_timeout()
+                if is_timeout:
+                    logger.warning(f"[Processor] {timeout_msg}")
+                    yield make_chunk("", "stop")
+                    yield "data: [DONE]\n\n"
+                    return
+                logger.debug(f"[Processor] Received chunk: {len(chunk)} bytes")
+                if not chunk:
+                    continue
+                try:
+                    data = orjson.loads(chunk)
+                    # Error check
+                    if error := data.get("error"):
+                        error_msg = error.get('message', 'Unknown error')
+                        logger.error(f"[Processor] API error: {error_msg}")
+                        yield make_chunk(f"Error: {error_msg}", "stop")
+                        yield "data: [DONE]\n\n"
+                        return
+                    grok_resp = data.get("result", {}).get("response", {})
+                    logger.debug(f"[Processor] Parsed response: {len(grok_resp)} bytes")
+                    if not grok_resp:
+                        continue
+                    timeout_mgr.mark_received()
+                    # Update model
+                    if user_resp := grok_resp.get("userResponse"):
+                        if m := user_resp.get("model"):
+                            model = m
+                    # Video handling
+                    if video_resp := grok_resp.get("streamingVideoGenerationResponse"):
+                        progress = video_resp.get("progress", 0)
+                        v_url = video_resp.get("videoUrl")
+                        # Progress updates
+                        if progress > last_video_progress:
+                            last_video_progress = progress
+                            if show_thinking:
+                                if not video_progress_started:
+                                    content = f"<think>Video generated {progress}%\n"
+                                    video_progress_started = True
+                                elif progress < 100:
+                                    content = f"Video generated {progress}%\n"
+                                else:
+                                    content = f"Video generated {progress}%</think>\n"
+                                yield make_chunk(content)
+                        # Video URL
+                        if v_url:
+                            logger.debug("[Processor] Video generation complete")
+                            video_content = await GrokResponseProcessor._build_video_content(v_url, auth_token)
+                            yield make_chunk(video_content)
+                        continue
+                    # Image mode
+                    if grok_resp.get("imageAttachmentInfo"):
+                        is_image = True
+                    token = grok_resp.get("token", "")
+                    # Image handling
+                    if is_image:
+                        if model_resp := grok_resp.get("modelResponse"):
+                            image_mode = setting.global_config.get("image_mode", "url")
+                            content = ""
+                            for img in model_resp.get("generatedImageUrls", []):
+                                try:
+                                    if image_mode == "base64":
+                                        # Base64 mode - send in chunks
+                                        base64_str = await image_cache_service.download_base64(f"/{img}", auth_token)
+                                        if base64_str:
+                                            # Chunk large data
+                                            if not base64_str.startswith("data:"):
+                                                parts = base64_str.split(",", 1)
+                                                if len(parts) == 2:
+                                                    yield make_chunk(f"![Generated Image](data:{parts[0]},")
+                                                    # 8KB chunks
+                                                    for i in range(0, len(parts[1]), 8192):
+                                                        yield make_chunk(parts[1][i:i+8192])
+                                                    yield make_chunk(")\n")
+                                                else:
+                                                    yield make_chunk(f"![Generated Image]({base64_str})\n")
+                                            else:
+                                                yield make_chunk(f"![Generated Image]({base64_str})\n")
+                                        else:
+                                            yield make_chunk(f"![Generated Image](https://assets.grok.com/{img})\n")
+                                    else:
+                                        # URL mode
+                                        await image_cache_service.download_image(f"/{img}", auth_token)
+                                        img_path = img.replace('/', '-')
+                                        base_url = setting.global_config.get("base_url", "")
+                                        img_url = f"{base_url}/images/{img_path}" if base_url else f"/images/{img_path}"
+                                        content += f"![Generated Image]({img_url})\n"
+                                except Exception as e:
+                                    logger.warning(f"[Processor] Failed to process image: {e}")
+                                    content += f"![Generated Image](https://assets.grok.com/{img})\n"
+                            yield make_chunk(content.strip(), "stop")
+                            return
+                        elif token:
+                            yield make_chunk(token)
+                    # Chat handling
+                    else:
+                        if isinstance(token, list):
+                            continue
+                        if any(tag in token for tag in filtered_tags if token):
+                            continue
+                        current_is_thinking = grok_resp.get("isThinking", False)
+                        message_tag = grok_resp.get("messageTag")
+                        if thinking_finished and current_is_thinking:
+                            continue
+                        # Search results handling
+                        if grok_resp.get("toolUsageCardId"):
+                            if web_search := grok_resp.get("webSearchResults"):
+                                if current_is_thinking:
+                                    if show_thinking:
+                                        for result in web_search.get("results", []):
+                                            title = result.get("title", "")
+                                            url = result.get("url", "")
+                                            preview = result.get("preview", "")
+                                            preview_clean = preview.replace("\n", "") if isinstance(preview, str) else ""
+                                            token += f'\n- [{title}]({url} "{preview_clean}")'
+                                        token += "\n"
+                                    else:
+                                        continue
+                                else:
+                                    continue
+                            else:
+                                continue
+                        if token:
+                            content = token
+                            if message_tag == "header":
+                                content = f"\n\n{token}\n\n"
+                            # Thinking state transitions
+                            should_skip = False
+                            if not is_thinking and current_is_thinking:
+                                if show_thinking:
+                                    content = f"<think>\n{content}"
+                                else:
+                                    should_skip = True
+                            elif is_thinking and not current_is_thinking:
+                                if show_thinking:
+                                    content = f"\n</think>\n{content}"
+                                thinking_finished = True
+                            elif current_is_thinking:
+                                if not show_thinking:
+                                    should_skip = True
+                            if not should_skip:
+                                yield make_chunk(content)
+                            is_thinking = current_is_thinking
+                except (orjson.JSONDecodeError, UnicodeDecodeError) as e:
+                    logger.warning(f"[Processor] Parse failed: {e}")
+                    continue
+                except Exception as e:
+                    logger.warning(f"[Processor] Processing error: {e}")
+                    continue
+            yield make_chunk("", "stop")
+            yield "data: [DONE]\n\n"
+            logger.info(f"[Processor] Streaming complete, duration: {timeout_mgr.duration():.2f}s")
+        except Exception as e:
+            logger.error(f"[Processor] Fatal error: {e}")
+            yield make_chunk(f"Processing error: {e}", "error")
+            yield "data: [DONE]\n\n"
+        finally:
+            if not response_closed and hasattr(response, 'close'):
+                try:
+                    response.close()
+                    logger.debug("[Processor] Response closed")
+                except Exception as e:
+                    logger.warning(f"[Processor] Close failed: {e}")
+            if session:
+                try:
+                    await session.close()
+                    logger.debug("[Processor] Session closed")
+                except Exception as e:
+                    logger.warning(f"[Processor] Failed to close session: {e}")
+    @staticmethod
+    async def _build_video_content(video_url: str, auth_token: str) -> str:
+        """Build video content"""
+        logger.debug(f"[Processor] Video detected: {video_url}")
+        full_url = f"https://assets.grok.com/{video_url}"
+        try:
+            cache_path = await video_cache_service.download_video(f"/{video_url}", auth_token)
+            if cache_path:
+                video_path = video_url.replace('/', '-')
+                base_url = setting.global_config.get("base_url", "")
+                local_url = f"{base_url}/images/{video_path}" if base_url else f"/images/{video_path}"
+                return f'<video src="{local_url}" controls="controls" width="500" height="300"></video>\n'
+        except Exception as e:
+            logger.warning(f"[Processor] Failed to cache video: {e}")
+        return f'<video src="{full_url}" controls="controls" width="500" height="300"></video>\n'
+    @staticmethod
+    async def _append_images(content: str, images: list, auth_token: str) -> str:
+        """Append images to content"""
+        image_mode = setting.global_config.get("image_mode", "url")
+        for img in images:
+            try:
+                if image_mode == "base64":
+                    base64_str = await image_cache_service.download_base64(f"/{img}", auth_token)
+                    if base64_str:
+                        content += f"\n![Generated Image]({base64_str})"
+                    else:
+                        content += f"\n![Generated Image](https://assets.grok.com/{img})"
+                else:
+                    cache_path = await image_cache_service.download_image(f"/{img}", auth_token)
+                    if cache_path:
+                        img_path = img.replace('/', '-')
+                        base_url = setting.global_config.get("base_url", "")
+                        img_url = f"{base_url}/images/{img_path}" if base_url else f"/images/{img_path}"
+                        content += f"\n![Generated Image]({img_url})"
+                    else:
+                        content += f"\n![Generated Image](https://assets.grok.com/{img})"
+            except Exception as e:
+                logger.warning(f"[Processor] Failed to process image: {e}")
+                content += f"\n![Generated Image](https://assets.grok.com/{img})"
+        return content
+    @staticmethod
+    def _build_response(content: str, model: str) -> OpenAIChatCompletionResponse:
+        """Build response object"""
+        return OpenAIChatCompletionResponse(
+            id=f"chatcmpl-{uuid.uuid4()}",
+            object="chat.completion",
+            created=int(time.time()),
+            model=model,
+            choices=[OpenAIChatCompletionChoice(
+                index=0,
+                message=OpenAIChatCompletionMessage(
+                    role="assistant",
+                    content=content
+                ),
+                finish_reason="stop"
+            )],
+            usage=None
+        )

app/services/grok/statsig.py ADDED Viewed

	@@ -0,0 +1,82 @@

+"""Grok header manager - generate dynamic headers and Statsig ID"""
+import base64
+import random
+import string
+import uuid
+from typing import Dict
+from app.core.logger import logger
+from app.core.config import setting
+# Base headers
+BASE_HEADERS = {
+    "Accept": "*/*",
+    "Accept-Language": "zh-CN,zh;q=0.9",
+    "Accept-Encoding": "gzip, deflate, br, zstd",
+    "Connection": "keep-alive",
+    "Origin": "https://grok.com",
+    "Priority": "u=1, i",
+    "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/133.0.0.0 Safari/537.36",
+    "Sec-Ch-Ua": '"Not(A:Brand";v="99", "Google Chrome";v="133", "Chromium";v="133"',
+    "Sec-Ch-Ua-Mobile": "?0",
+    "Sec-Ch-Ua-Platform": '"macOS"',
+    "Sec-Fetch-Dest": "empty",
+    "Sec-Fetch-Mode": "cors",
+    "Sec-Fetch-Site": "same-origin",
+    "Baggage": "sentry-environment=production,sentry-public_key=b311e0f2690c81f25e2c4cf6d4f7ce1c",
+}
+def _random_string(length: int, letters_only: bool = True) -> str:
+    """Generate random string"""
+    chars = string.ascii_lowercase if letters_only else string.ascii_lowercase + string.digits
+    return ''.join(random.choices(chars, k=length))
+def _generate_statsig_id() -> str:
+    """Generate x-statsig-id
+    Randomly choose between two formats:
+    1. e:TypeError: Cannot read properties of null (reading 'children['xxxxx']')
+    2. e:TypeError: Cannot read properties of undefined (reading 'xxxxxxxxxx')
+    """
+    if random.choice([True, False]):
+        rand = _random_string(5, letters_only=False)
+        msg = f"e:TypeError: Cannot read properties of null (reading 'children['{rand}']')"
+    else:
+        rand = _random_string(10)
+        msg = f"e:TypeError: Cannot read properties of undefined (reading '{rand}')"
+    return base64.b64encode(msg.encode()).decode()
+def get_dynamic_headers(pathname: str = "/rest/app-chat/conversations/new", statsig_id: str = "") -> Dict[str, str]:
+    """Get request headers
+    Args:
+        pathname: Request path
+    Returns:
+        Full request header dict
+    """
+    # Get or generate statsig-id
+    if statsig_id:
+        logger.debug(f"[Statsig] Using request override: {statsig_id}")
+    elif setting.grok_config.get("dynamic_statsig", False):
+        statsig_id = _generate_statsig_id()
+        logger.debug(f"[Statsig] Generated dynamically: {statsig_id}")
+    else:
+        statsig_id = setting.grok_config.get("x_statsig_id")
+        if not statsig_id:
+            raise ValueError("x_statsig_id is not set in the config file")
+        logger.debug(f"[Statsig] Using fixed value: {statsig_id}")
+    # Build headers
+    headers = BASE_HEADERS.copy()
+    headers["x-statsig-id"] = statsig_id
+    headers["x-xai-request-id"] = str(uuid.uuid4())
+    headers["Content-Type"] = "text/plain;charset=UTF-8" if "upload-file" in pathname else "application/json"
+    return headers

app/services/grok/token.py ADDED Viewed

	@@ -0,0 +1,649 @@

+"""Grok token manager - singleton token load balancing and status management"""
+import os
+import orjson
+import time
+import asyncio
+import aiofiles
+import portalocker
+from pathlib import Path
+from curl_cffi.requests import AsyncSession
+from typing import Dict, Any, Optional, Tuple
+from app.models.grok_models import TokenType, Models
+from app.core.exception import GrokApiException
+from app.core.logger import logger
+from app.core.config import setting
+from app.services.grok.statsig import get_dynamic_headers
+# Constants
+RATE_LIMIT_API = "https://grok.com/rest/rate-limits"
+TIMEOUT = 30
+BROWSER = "chrome133a"
+MAX_FAILURES = 3
+TOKEN_INVALID = 401
+STATSIG_INVALID = 403
+# Cooldown constants
+COOLDOWN_REQUESTS = 5              # Cooldown requests for normal failures
+COOLDOWN_429_WITH_QUOTA = 3600     # 429 with quota: 1 hour cooldown (seconds)
+COOLDOWN_429_NO_QUOTA = 36000      # 429 no quota: 10 hour cooldown (seconds)
+class GrokTokenManager:
+    """Token manager (singleton)"""
+    _instance: Optional['GrokTokenManager'] = None
+    _lock = asyncio.Lock()
+    def __new__(cls) -> 'GrokTokenManager':
+        if cls._instance is None:
+            cls._instance = super().__new__(cls)
+        return cls._instance
+    def __init__(self):
+        if hasattr(self, '_initialized'):
+            return
+        self.token_file = self._resolve_data_dir() / "token.json"
+        self._file_lock = asyncio.Lock()
+        self.token_file.parent.mkdir(parents=True, exist_ok=True)
+        self._storage = None
+        self.token_data = None  # Lazy load
+        # Batch save queue
+        self._save_pending = False  # Pending save flag
+        self._save_task = None  # Background save task
+        self._shutdown = False  # Shutdown flag
+        # Cooldown state
+        self._cooldown_counts: Dict[str, int] = {}  # Token -> remaining cooldown requests
+        self._request_counter = 0  # Global request counter
+        # Refresh state
+        self._refresh_lock = False  # Refresh lock
+        self._refresh_progress: Dict[str, Any] = {"running": False, "current": 0, "total": 0, "success": 0, "failed": 0}
+        self._initialized = True
+        logger.debug(f"[Token] Initialized: {self.token_file}")
+    @staticmethod
+    def _resolve_data_dir() -> Path:
+        """Resolve data directory for persistent storage."""
+        data_dir_env = os.getenv("DATA_DIR")
+        if data_dir_env:
+            return Path(data_dir_env)
+        if Path("/data").exists():
+            return Path("/data")
+        return Path(__file__).parents[3] / "data"
+    def _using_file_storage(self) -> bool:
+        """Check if storage is file-based or unset."""
+        if self._storage is None:
+            return True
+        try:
+            from app.core.storage import FileStorage
+            return isinstance(self._storage, FileStorage)
+        except Exception:
+            return False
+    def set_storage(self, storage) -> None:
+        """Set storage instance"""
+        self._storage = storage
+        data_dir = getattr(storage, "data_dir", None)
+        if data_dir:
+            self.token_file = Path(data_dir) / "token.json"
+            self.token_file.parent.mkdir(parents=True, exist_ok=True)
+    async def _load_data(self) -> None:
+        """Load token data asynchronously (multi-process safe)"""
+        default = {TokenType.NORMAL.value: {}, TokenType.SUPER.value: {}}
+        def load_sync():
+            with open(self.token_file, "r", encoding="utf-8") as f:
+                portalocker.lock(f, portalocker.LOCK_SH)
+                try:
+                    return orjson.loads(f.read())
+                finally:
+                    portalocker.unlock(f)
+        try:
+            if self._storage and not self._using_file_storage():
+                data = await self._storage.load_tokens()
+                self.token_data = data or default
+                return
+            if self.token_file.exists():
+                # Read file with process lock
+                async with self._file_lock:
+                    self.token_data = await asyncio.to_thread(load_sync)
+            else:
+                self.token_data = default
+                logger.debug("[Token] Created new data file")
+        except Exception as e:
+            logger.error(f"[Token] Load failed: {e}")
+            self.token_data = default
+    async def _save_data(self) -> None:
+        """Save token data (multi-process safe)"""
+        def save_sync(data):
+            with open(self.token_file, "w", encoding="utf-8") as f:
+                portalocker.lock(f, portalocker.LOCK_EX)
+                try:
+                    content = orjson.dumps(data, option=orjson.OPT_INDENT_2).decode()
+                    f.write(content)
+                    f.flush()
+                finally:
+                    portalocker.unlock(f)
+        try:
+            if not self._storage:
+                async with self._file_lock:
+                    await asyncio.to_thread(save_sync, self.token_data)
+            else:
+                await self._storage.save_tokens(self.token_data)
+        except Exception as e:
+            logger.error(f"[Token] Save failed: {e}")
+            raise GrokApiException(f"Save failed: {e}", "TOKEN_SAVE_ERROR")
+    def _mark_dirty(self) -> None:
+        """Mark pending save"""
+        self._save_pending = True
+    async def _batch_save_worker(self) -> None:
+        """Batch save background task"""
+        from app.core.config import setting
+        interval = setting.global_config.get("batch_save_interval", 1.0)
+        logger.info(f"[Token] Save task started, interval: {interval}s")
+        while not self._shutdown:
+            await asyncio.sleep(interval)
+            if self._save_pending and not self._shutdown:
+                try:
+                    await self._save_data()
+                    self._save_pending = False
+                    logger.debug("[Token] Save completed")
+                except Exception as e:
+                    logger.error(f"[Token] Save failed: {e}")
+    async def start_batch_save(self) -> None:
+        """Start batch save task"""
+        if self._save_task is None:
+            self._save_task = asyncio.create_task(self._batch_save_worker())
+            logger.info("[Token] Save task created")
+    async def shutdown(self) -> None:
+        """Shutdown and flush pending data"""
+        self._shutdown = True
+        if self._save_task:
+            self._save_task.cancel()
+            try:
+                await self._save_task
+            except asyncio.CancelledError:
+                pass
+        # Final flush
+        if self._save_pending:
+            await self._save_data()
+            logger.info("[Token] Flush completed on shutdown")
+    @staticmethod
+    def _extract_sso(auth_token: str) -> Optional[str]:
+        """Extract SSO value"""
+        if "sso=" in auth_token:
+            return auth_token.split("sso=")[1].split(";")[0]
+        logger.warning("[Token] Unable to extract SSO value")
+        return None
+    def _find_token(self, sso: str) -> Tuple[Optional[str], Optional[Dict]]:
+        """Find token"""
+        for token_type in [TokenType.NORMAL.value, TokenType.SUPER.value]:
+            if sso in self.token_data[token_type]:
+                return token_type, self.token_data[token_type][sso]
+        return None, None
+    async def add_token(self, tokens: list[str], token_type: TokenType) -> None:
+        """Add tokens"""
+        if not tokens:
+            return
+        count = 0
+        for token in tokens:
+            if not token or not token.strip():
+                continue
+            self.token_data[token_type.value][token] = {
+                "createdTime": int(time.time() * 1000),
+                "remainingQueries": -1,
+                "heavyremainingQueries": -1,
+                "status": "active",
+                "failedCount": 0,
+                "lastFailureTime": None,
+                "lastFailureReason": None,
+                "tags": [],
+                "note": ""
+            }
+            count += 1
+        self._mark_dirty()  # Batch save
+        logger.info(f"[Token] Added {count} {token_type.value} tokens")
+    async def delete_token(self, tokens: list[str], token_type: TokenType) -> None:
+        """Delete tokens"""
+        if not tokens:
+            return
+        count = 0
+        for token in tokens:
+            if token in self.token_data[token_type.value]:
+                del self.token_data[token_type.value][token]
+                count += 1
+        self._mark_dirty()  # Batch save
+        logger.info(f"[Token] Deleted {count} {token_type.value} tokens")
+    async def update_token_tags(self, token: str, token_type: TokenType, tags: list[str]) -> None:
+        """Update token tags"""
+        if token not in self.token_data[token_type.value]:
+            raise GrokApiException("Token not found", "TOKEN_NOT_FOUND", {"token": token[:10]})
+        cleaned = [t.strip() for t in tags if t and t.strip()]
+        self.token_data[token_type.value][token]["tags"] = cleaned
+        self._mark_dirty()  # Batch save
+        logger.info(f"[Token] Updated tags: {token[:10]}... -> {cleaned}")
+    async def update_token_note(self, token: str, token_type: TokenType, note: str) -> None:
+        """Update token note"""
+        if token not in self.token_data[token_type.value]:
+            raise GrokApiException("Token not found", "TOKEN_NOT_FOUND", {"token": token[:10]})
+        self.token_data[token_type.value][token]["note"] = note.strip()
+        self._mark_dirty()  # Batch save
+        logger.info(f"[Token] Updated note: {token[:10]}...")
+    def get_tokens(self) -> Dict[str, Any]:
+        """Get all tokens"""
+        return self.token_data.copy()
+    async def _reload_if_needed(self) -> None:
+        """Reload data in multi-process mode"""
+        # Reload only for file storage in multi-process mode
+        if not self._using_file_storage():
+            return
+        def reload_sync():
+            with open(self.token_file, "r", encoding="utf-8") as f:
+                portalocker.lock(f, portalocker.LOCK_SH)
+                try:
+                    return orjson.loads(f.read())
+                finally:
+                    portalocker.unlock(f)
+        try:
+            if self.token_file.exists():
+                self.token_data = await asyncio.to_thread(reload_sync)
+        except Exception as e:
+            logger.warning(f"[Token] Reload failed: {e}")
+    async def get_token(self, model: str) -> str:
+        """Get token"""
+        jwt = await self.select_token(model)
+        return f"sso-rw={jwt};sso={jwt}"
+    async def select_token(self, model: str) -> str:
+        """Select best token (multi-process safe, with cooldown)"""
+        # Reload latest data (multi-process mode)
+        await self._reload_if_needed()
+        # Decrement request-based cooldown counters
+        self._request_counter += 1
+        for token in list(self._cooldown_counts.keys()):
+            self._cooldown_counts[token] -= 1
+            if self._cooldown_counts[token] <= 0:
+                del self._cooldown_counts[token]
+                logger.debug(f"[Token] Cooldown ended: {token[:10]}...")
+        current_time = time.time() * 1000  # milliseconds
+        def select_best(tokens: Dict[str, Any], field: str) -> Tuple[Optional[str], Optional[int]]:
+            """Select best token"""
+            unused, used = [], []
+            for key, data in tokens.items():
+                # Skip expired tokens
+                if data.get("status") == "expired":
+                    continue
+                # Skip tokens with too many failures (any error status)
+                if data.get("failedCount", 0) >= MAX_FAILURES:
+                    continue
+                # Skip request cooldown tokens
+                if key in self._cooldown_counts:
+                    continue
+                # Skip time cooldown tokens (429)
+                cooldown_until = data.get("cooldownUntil", 0)
+                if cooldown_until and cooldown_until > current_time:
+                    continue
+                remaining = int(data.get(field, -1))
+                if remaining == 0:
+                    continue
+                if remaining == -1:
+                    unused.append(key)
+                elif remaining > 0:
+                    used.append((key, remaining))
+            if unused:
+                return unused[0], -1
+            if used:
+                used.sort(key=lambda x: x[1], reverse=True)
+                return used[0][0], used[0][1]
+            return None, None
+        # Snapshot
+        snapshot = {
+            TokenType.NORMAL.value: self.token_data[TokenType.NORMAL.value].copy(),
+            TokenType.SUPER.value: self.token_data[TokenType.SUPER.value].copy()
+        }
+        # Selection strategy
+        if model == "grok-4-heavy":
+            field = "heavyremainingQueries"
+            token_key, remaining = select_best(snapshot[TokenType.SUPER.value], field)
+        else:
+            field = "remainingQueries"
+            token_key, remaining = select_best(snapshot[TokenType.NORMAL.value], field)
+            if token_key is None:
+                token_key, remaining = select_best(snapshot[TokenType.SUPER.value], field)
+        if token_key is None:
+            raise GrokApiException(
+                f"No available token: {model}",
+                "NO_AVAILABLE_TOKEN",
+                {
+                    "model": model,
+                    "normal": len(snapshot[TokenType.NORMAL.value]),
+                    "super": len(snapshot[TokenType.SUPER.value]),
+                    "cooldown_count": len(self._cooldown_counts)
+                }
+            )
+        status = "unused" if remaining == -1 else f"{remaining} remaining"
+        logger.debug(f"[Token] Assigned token: {model} ({status})")
+        return token_key
+    async def check_limits(self, auth_token: str, model: str) -> Optional[Dict[str, Any]]:
+        """Check rate limits"""
+        try:
+            rate_model = Models.to_rate_limit(model)
+            payload = {"requestKind": "DEFAULT", "modelName": rate_model}
+            cf = setting.grok_config.get("cf_clearance", "")
+            headers = get_dynamic_headers("/rest/rate-limits")
+            headers["Cookie"] = f"{auth_token};{cf}" if cf else auth_token
+            # Outer retry: configurable status codes (401/429, etc)
+            retry_codes = setting.grok_config.get("retry_status_codes", [401, 429])
+            MAX_OUTER_RETRY = 3
+            for outer_retry in range(MAX_OUTER_RETRY + 1):  # +1 ensures 3 retries
+                # Inner retry: 403 with proxy pool
+                max_403_retries = 5
+                retry_403_count = 0
+                while retry_403_count <= max_403_retries:
+                    # Fetch proxy asynchronously (proxy pool supported)
+                    from app.core.proxy_pool import proxy_pool
+                    # If retrying 403 with proxy pool, force refresh
+                    if retry_403_count > 0 and proxy_pool._enabled:
+                        logger.info(f"[Token] 403 retry {retry_403_count}/{max_403_retries}, refreshing proxy...")
+                        proxy = await proxy_pool.force_refresh()
+                    else:
+                        proxy = await setting.get_proxy_async("service")
+                    proxies = {"http": proxy, "https": proxy} if proxy else None
+                    async with AsyncSession() as session:
+                        response = await session.post(
+                            RATE_LIMIT_API,
+                            headers=headers,
+                            json=payload,
+                            impersonate=BROWSER,
+                            timeout=TIMEOUT,
+                            proxies=proxies
+                        )
+                        # Inner 403 retry: only when proxy pool is enabled
+                        if response.status_code == 403 and proxy_pool._enabled:
+                            retry_403_count += 1
+                            if retry_403_count <= max_403_retries:
+                                logger.warning(f"[Token] 403 error, retrying ({retry_403_count}/{max_403_retries})...")
+                                await asyncio.sleep(0.5)
+                                continue
+                            # All inner retries failed
+                            logger.error(f"[Token] 403 error, retried {retry_403_count-1} times, giving up")
+                            sso = self._extract_sso(auth_token)
+                            if sso:
+                                await self.record_failure(auth_token, 403, "Server blocked")
+                        # Check retryable status codes - outer retry
+                        if response.status_code in retry_codes:
+                            if outer_retry < MAX_OUTER_RETRY:
+                                delay = (outer_retry + 1) * 0.1  # Progressive delay: 0.1s, 0.2s, 0.3s
+                                logger.warning(f"[Token] {response.status_code} error, outer retry ({outer_retry+1}/{MAX_OUTER_RETRY}), waiting {delay}s...")
+                                await asyncio.sleep(delay)
+                                break  # Exit inner loop for outer retry
+                            else:
+                                logger.error(f"[Token] {response.status_code} error, retried {outer_retry} times, giving up")
+                                sso = self._extract_sso(auth_token)
+                                if sso:
+                                    if response.status_code == 401:
+                                        await self.record_failure(auth_token, 401, "Token expired")
+                                    else:
+                                        await self.record_failure(auth_token, response.status_code, f"Error: {response.status_code}")
+                                return None
+                        if response.status_code == 200:
+                            data = response.json()
+                            sso = self._extract_sso(auth_token)
+                            if outer_retry > 0 or retry_403_count > 0:
+                                logger.info("[Token] Retry succeeded")
+                            if sso:
+                                if model == "grok-4-heavy":
+                                    await self.update_limits(sso, normal=None, heavy=data.get("remainingQueries", -1))
+                                    logger.info(f"[Token] Updated limits: {sso[:10]}..., heavy={data.get('remainingQueries', -1)}")
+                                else:
+                                    await self.update_limits(sso, normal=data.get("remainingTokens", -1), heavy=None)
+                                    logger.info(f"[Token] Updated limits: {sso[:10]}..., basic={data.get('remainingTokens', -1)}")
+                            return data
+                        else:
+                            # Other errors
+                            logger.warning(f"[Token] Failed to get limits: {response.status_code}")
+                            sso = self._extract_sso(auth_token)
+                            if sso:
+                                await self.record_failure(auth_token, response.status_code, f"Error: {response.status_code}")
+                            return None
+        except Exception as e:
+            logger.error(f"[Token] Limit check error: {e}")
+            return None
+    async def update_limits(self, sso: str, normal: Optional[int] = None, heavy: Optional[int] = None) -> None:
+        """Update limits"""
+        try:
+            for token_type in [TokenType.NORMAL.value, TokenType.SUPER.value]:
+                if sso in self.token_data[token_type]:
+                    if normal is not None:
+                        self.token_data[token_type][sso]["remainingQueries"] = normal
+                    if heavy is not None:
+                        self.token_data[token_type][sso]["heavyremainingQueries"] = heavy
+                    self._mark_dirty()  # Batch save
+                    logger.info(f"[Token] Updated limits: {sso[:10]}...")
+                    return
+            logger.warning(f"[Token] Not found: {sso[:10]}...")
+        except Exception as e:
+            logger.error(f"[Token] Update limits error: {e}")
+    async def record_failure(self, auth_token: str, status: int, msg: str) -> None:
+        """Record failure"""
+        try:
+            if status == STATSIG_INVALID:
+                logger.warning("[Token] IP blocked. Try: 1) change IP 2) use proxy 3) set CF clearance")
+                return
+            sso = self._extract_sso(auth_token)
+            if not sso:
+                return
+            _, data = self._find_token(sso)
+            if not data:
+                logger.warning(f"[Token] Not found: {sso[:10]}...")
+                return
+            data["failedCount"] = data.get("failedCount", 0) + 1
+            data["lastFailureTime"] = int(time.time() * 1000)
+            data["lastFailureReason"] = f"{status}: {msg}"
+            logger.warning(
+                f"[Token] Failed: {sso[:10]}... (status: {status}), "
+                f"count: {data['failedCount']}/{MAX_FAILURES}, reason: {msg}"
+            )
+            if 400 <= status < 500 and data["failedCount"] >= MAX_FAILURES:
+                data["status"] = "expired"
+                logger.error(f"[Token] Marked expired: {sso[:10]}... ({status} errors x{data['failedCount']})")
+            self._mark_dirty()  # Batch save
+        except Exception as e:
+            logger.error(f"[Token] Record failure error: {e}")
+    async def reset_failure(self, auth_token: str) -> None:
+        """Reset failure count"""
+        try:
+            sso = self._extract_sso(auth_token)
+            if not sso:
+                return
+            _, data = self._find_token(sso)
+            if not data:
+                return
+            if data.get("failedCount", 0) > 0:
+                data["failedCount"] = 0
+                data["lastFailureTime"] = None
+                data["lastFailureReason"] = None
+                self._mark_dirty()  # Batch save
+                logger.info(f"[Token] Reset failure count: {sso[:10]}...")
+        except Exception as e:
+            logger.error(f"[Token] Reset failure error: {e}")
+    async def apply_cooldown(self, auth_token: str, status_code: int) -> None:
+        """Apply cooldown policy
+        - 429 errors: time-based cooldown (1 hour with quota, 10 hours without)
+        - Other errors: request-based cooldown (5 requests)
+        """
+        try:
+            sso = self._extract_sso(auth_token)
+            if not sso:
+                return
+            _, data = self._find_token(sso)
+            if not data:
+                return
+            remaining = data.get("remainingQueries", -1)
+            if status_code == 429:
+                # 429 time-based cooldown
+                if remaining > 0 or remaining == -1:
+                    # With quota: 1 hour cooldown
+                    cooldown_until = time.time() + COOLDOWN_429_WITH_QUOTA
+                    logger.info(f"[Token] 429 cooldown (with quota): {sso[:10]}... 1 hour")
+                else:
+                    # No quota: 10 hour cooldown
+                    cooldown_until = time.time() + COOLDOWN_429_NO_QUOTA
+                    logger.info(f"[Token] 429 cooldown (no quota): {sso[:10]}... 10 hours")
+                data["cooldownUntil"] = int(cooldown_until * 1000)
+                self._mark_dirty()
+            else:
+                # Other errors use request-based cooldown (only if quota remains)
+                if remaining != 0:
+                    self._cooldown_counts[sso] = COOLDOWN_REQUESTS
+                    logger.info(f"[Token] Request cooldown: {sso[:10]}... {COOLDOWN_REQUESTS} requests")
+        except Exception as e:
+            logger.error(f"[Token] Apply cooldown error: {e}")
+    async def refresh_all_limits(self) -> Dict[str, Any]:
+        """Refresh remaining counts for all tokens"""
+        # Check if refresh is already running
+        if self._refresh_lock:
+            return {"error": "refresh_in_progress", "message": "A refresh task is already running", "progress": self._refresh_progress}
+        # Acquire lock
+        self._refresh_lock = True
+        try:
+            # Compute total
+            all_tokens = []
+            for token_type in [TokenType.NORMAL.value, TokenType.SUPER.value]:
+                for sso in list(self.token_data[token_type].keys()):
+                    all_tokens.append((token_type, sso))
+            total = len(all_tokens)
+            self._refresh_progress = {"running": True, "current": 0, "total": total, "success": 0, "failed": 0}
+            success_count = 0
+            fail_count = 0
+            for i, (token_type, sso) in enumerate(all_tokens):
+                auth_token = f"sso-rw={sso};sso={sso}"
+                try:
+                    result = await self.check_limits(auth_token, "grok-4-fast")
+                    if result:
+                        success_count += 1
+                    else:
+                        fail_count += 1
+                except Exception as e:
+                    logger.warning(f"[Token] Refresh failed: {sso[:10]}... - {e}")
+                    fail_count += 1
+                # Update progress
+                self._refresh_progress = {
+                    "running": True,
+                    "current": i + 1,
+                    "total": total,
+                    "success": success_count,
+                    "failed": fail_count
+                }
+                await asyncio.sleep(0.1)  # Avoid flooding requests
+            logger.info(f"[Token] Batch refresh completed: success {success_count}, failed {fail_count}")
+            self._refresh_progress = {"running": False, "current": total, "total": total, "success": success_count, "failed": fail_count}
+            return {"success": success_count, "failed": fail_count, "total": total}
+        finally:
+            self._refresh_lock = False
+    def get_refresh_progress(self) -> Dict[str, Any]:
+        """Get refresh progress"""
+        return self._refresh_progress.copy()
+# Global instance
+token_manager = GrokTokenManager()

app/services/grok/upload.py ADDED Viewed

	@@ -0,0 +1,250 @@

+"""Image upload manager - supports Base64 and URL image uploads"""
+import asyncio
+import base64
+import re
+import time
+import os
+from pathlib import Path
+from typing import Tuple, Optional
+from urllib.parse import urlparse
+from curl_cffi.requests import AsyncSession
+from app.services.grok.statsig import get_dynamic_headers
+from app.services.images.normalize import normalize_tmpfiles_url
+from app.core.exception import GrokApiException
+from app.core.config import setting
+from app.core.logger import logger
+# Constants
+UPLOAD_API = "https://grok.com/rest/app-chat/upload-file"
+TIMEOUT = 30
+BROWSER = "chrome133a"
+# MIME types
+MIME_TYPES = {
+    '.jpg': 'image/jpeg', '.jpeg': 'image/jpeg', '.png': 'image/png',
+    '.gif': 'image/gif', '.webp': 'image/webp', '.bmp': 'image/bmp',
+}
+DEFAULT_MIME = "image/jpeg"
+DEFAULT_EXT = "jpg"
+class ImageUploadManager:
+    """Image upload manager"""
+    @staticmethod
+    def _resolve_temp_dir() -> str:
+        """Resolve temp directory for downloads."""
+        data_dir_env = os.getenv("DATA_DIR")
+        if data_dir_env:
+            base_dir = data_dir_env
+        elif os.path.isdir("/data"):
+            base_dir = "/data"
+        else:
+            base_dir = str(Path(__file__).parents[3] / "data")
+        return os.path.join(base_dir, "temp", "image")
+    @staticmethod
+    async def upload(image_input: str, auth_token: str, statsig_id: str = "") -> Tuple[str, str]:
+        """Upload image (Base64 or URL)
+        Returns:
+            (file_id, file_uri) tuple
+        """
+        try:
+            # Detect type and handle
+            if ImageUploadManager._is_url(image_input):
+                buffer, mime = await ImageUploadManager._download(normalize_tmpfiles_url(image_input))
+                if not buffer:
+                    raise GrokApiException("Image download failed", "IMAGE_DOWNLOAD_FAILED")
+                filename, _ = ImageUploadManager._get_info("", mime)
+            else:
+                buffer = image_input.split(",")[1] if "data:image" in image_input else image_input
+                filename, mime = ImageUploadManager._get_info(image_input)
+            # Build data
+            data = {
+                "fileName": filename,
+                "fileMimeType": mime,
+                "content": buffer,
+            }
+            if not auth_token:
+                raise GrokApiException("Missing authentication token", "NO_AUTH_TOKEN")
+            # Outer retry: configurable status codes (401/429, etc)
+            retry_codes = setting.grok_config.get("retry_status_codes", [401, 429])
+            MAX_OUTER_RETRY = 3
+            for outer_retry in range(MAX_OUTER_RETRY + 1):  # +1 ensures 3 retries
+                try:
+                    # Inner retry: 403 with proxy pool
+                    max_403_retries = 5
+                    retry_403_count = 0
+                    while retry_403_count <= max_403_retries:
+                        # Request config
+                        cf = setting.grok_config.get("cf_clearance", "")
+                        headers = {
+                            **get_dynamic_headers("/rest/app-chat/upload-file", statsig_id=statsig_id),
+                            "Cookie": f"{auth_token};{cf}" if cf else auth_token,
+                        }
+                        # Fetch proxy asynchronously (proxy pool supported)
+                        from app.core.proxy_pool import proxy_pool
+                        # If retrying 403 with proxy pool, force refresh
+                        if retry_403_count > 0 and proxy_pool._enabled:
+                            logger.info(f"[Upload] 403 retry {retry_403_count}/{max_403_retries}, refreshing proxy...")
+                            proxy = await proxy_pool.force_refresh()
+                        else:
+                            proxy = await setting.get_proxy_async("service")
+                        proxies = {"http": proxy, "https": proxy} if proxy else None
+                        logger.info(f"[Upload] Proxy: {proxy[:60]}..." if proxy else "[Upload] Proxy: None")
+                        # Upload
+                        async with AsyncSession() as session:
+                            response = await session.post(
+                                UPLOAD_API,
+                                headers=headers,
+                                json=data,
+                                impersonate=BROWSER,
+                                timeout=TIMEOUT,
+                                proxies=proxies,
+                            )
+                            # Inner 403 retry: only when proxy pool is enabled
+                            if response.status_code == 403 and proxy_pool._enabled:
+                                retry_403_count += 1
+                                if retry_403_count <= max_403_retries:
+                                    logger.warning(f"[Upload] 403 error, retrying ({retry_403_count}/{max_403_retries})...")
+                                    await asyncio.sleep(0.5)
+                                    continue
+                                # All inner retries failed
+                                logger.error(f"[Upload] 403 error, retried {retry_403_count-1} times, giving up")
+                            # Check retryable status codes - outer retry
+                            if response.status_code in retry_codes:
+                                if outer_retry < MAX_OUTER_RETRY:
+                                    delay = (outer_retry + 1) * 0.1  # Progressive delay: 0.1s, 0.2s, 0.3s
+                                    logger.warning(f"[Upload] {response.status_code} error, outer retry ({outer_retry+1}/{MAX_OUTER_RETRY}), waiting {delay}s...")
+                                    await asyncio.sleep(delay)
+                                    break  # Exit inner loop for outer retry
+                                else:
+                                    logger.error(f"[Upload] {response.status_code} error, retried {outer_retry} times, giving up")
+                                    return "", ""
+                            if response.status_code == 200:
+                                result = response.json()
+                                file_id = result.get("fileMetadataId", "")
+                                file_uri = result.get("fileUri", "")
+                                if outer_retry > 0 or retry_403_count > 0:
+                                    logger.info("[Upload] Retry succeeded")
+                                logger.debug(f"[Upload] Success, ID: {file_id}")
+                                return file_id, file_uri
+                            # Other errors
+                            logger.error(f"[Upload] Failed, status code: {response.status_code}, body: {response.text[:500]}")
+                            return "", ""
+                    # Inner loop finished without break: 403 retries exhausted
+                    return "", ""
+                except Exception as e:
+                    if outer_retry < MAX_OUTER_RETRY - 1:
+                        logger.warning(f"[Upload] Error: {e}, outer retry ({outer_retry+1}/{MAX_OUTER_RETRY})...")
+                        await asyncio.sleep(0.5)
+                        continue
+                    logger.warning(f"[Upload] Failed: {e}")
+                    return "", ""
+            return "", ""
+        except GrokApiException:
+            raise
+        except Exception as e:
+            logger.warning(f"[Upload] Failed: {e}")
+            return "", ""
+    @staticmethod
+    def _is_url(input_str: str) -> bool:
+        """Check if URL"""
+        try:
+            result = urlparse(input_str)
+            return all([result.scheme, result.netloc]) and result.scheme in ['http', 'https']
+        except:
+            return False
+    @staticmethod
+    async def _download(url: str) -> Tuple[str, str]:
+        """Download image and convert to Base64
+        Returns:
+            (base64_string, mime_type) tuple
+        """
+        try:
+            timeout = setting.global_config.get("image_download_timeout", 30)
+            max_mb = setting.global_config.get("image_download_max_size_mb", 20)
+            max_bytes = int(max_mb * 1024 * 1024)
+            logger.debug(f"[Upload] Download start: {url}")
+            async with AsyncSession() as session:
+                response = await session.get(url, timeout=timeout)
+                response.raise_for_status()
+                content_type = response.headers.get('content-type', DEFAULT_MIME)
+                if not content_type.startswith('image/'):
+                    raise GrokApiException("Unsupported image MIME type", "UNSUPPORTED_IMAGE_TYPE")
+                content_length = response.headers.get("content-length")
+                if content_length and int(content_length) > max_bytes:
+                    raise GrokApiException("Image too large", "IMAGE_TOO_LARGE")
+                if len(response.content) > max_bytes:
+                    raise GrokApiException("Image too large", "IMAGE_TOO_LARGE")
+                temp_dir = ImageUploadManager._resolve_temp_dir()
+                os.makedirs(temp_dir, exist_ok=True)
+                file_path = os.path.join(temp_dir, f"upload-{int(time.time() * 1000)}.img")
+                with open(file_path, "wb") as f:
+                    f.write(response.content)
+                b64 = base64.b64encode(response.content).decode()
+                logger.debug(f"[Upload] Download success: {url}")
+                return b64, content_type
+        except GrokApiException:
+            raise
+        except Exception as e:
+            logger.warning(f"[Upload] Download failed: {e}")
+            return "", ""
+    @staticmethod
+    def _get_info(image_data: str, mime_type: Optional[str] = None) -> Tuple[str, str]:
+        """Get filename and MIME type
+        Returns:
+            (file_name, mime_type) tuple
+        """
+        # MIME type provided
+        if mime_type:
+            ext = mime_type.split("/")[1] if "/" in mime_type else DEFAULT_EXT
+            return f"image.{ext}", mime_type
+        # Extract from Base64
+        mime = DEFAULT_MIME
+        ext = DEFAULT_EXT
+        if "data:image" in image_data:
+            if match := re.search(r"data:([a-zA-Z0-9]+/[a-zA-Z0-9-.+]+);base64,", image_data):
+                mime = match.group(1)
+                ext = mime.split("/")[1]
+        return f"image.{ext}", mime

app/services/images/normalize.py ADDED Viewed

	@@ -0,0 +1,100 @@

+"""Multimodal message normalization utilities."""
+from __future__ import annotations
+import re
+from typing import Any, Dict, List, Optional
+from app.core.exception import GrokApiException
+from app.core.logger import logger
+_TMPFILES_RE = re.compile(r"^https?://tmpfiles\.org/(\d+)/(.+)$")
+def normalize_tmpfiles_url(url: str) -> str:
+    """Normalize tmpfiles.org URLs to direct download links."""
+    if not url:
+        return url
+    match = _TMPFILES_RE.match(url.strip())
+    if not match:
+        return url.strip()
+    file_id, name = match.groups()
+    return f"https://tmpfiles.org/dl/{file_id}/{name}"
+def _normalize_image_url(image_url: Any) -> Optional[str]:
+    if isinstance(image_url, str):
+        return normalize_tmpfiles_url(image_url)
+    if isinstance(image_url, dict):
+        url = image_url.get("url")
+        return normalize_tmpfiles_url(url) if url else None
+    return None
+def _normalize_content_part(part: Any) -> Dict[str, Any]:
+    if not isinstance(part, dict):
+        raise GrokApiException("Invalid content part: expected object", "INVALID_MULTIMODAL")
+    part_type = part.get("type")
+    if part_type == "text":
+        return {"type": "text", "text": str(part.get("text", ""))}
+    if part_type == "image_url":
+        url = _normalize_image_url(part.get("image_url") or part.get("url"))
+        if not url:
+            raise GrokApiException("Invalid image_url content part", "INVALID_MULTIMODAL")
+        return {"type": "image_url", "image_url": {"url": url}}
+    raise GrokApiException(f"Unsupported content type '{part_type}'", "INVALID_MULTIMODAL")
+def normalize_messages(
+    messages: List[Dict[str, Any]],
+    image_url: Optional[str] = None,
+    image_urls: Optional[List[str]] = None,
+) -> List[Dict[str, Any]]:
+    """Normalize messages to OpenAI-compatible multimodal format."""
+    if not messages:
+        raise GrokApiException("Message list cannot be empty", "INVALID_MULTIMODAL")
+    normalized: List[Dict[str, Any]] = []
+    for msg in messages:
+        if not isinstance(msg, dict):
+            raise GrokApiException("Each message must be an object", "INVALID_MULTIMODAL")
+        role = msg.get("role")
+        if role not in ("system", "user", "assistant"):
+            raise GrokApiException(f"Invalid role '{role}'", "INVALID_MULTIMODAL")
+        content = msg.get("content", "")
+        if isinstance(content, list):
+            parts = [_normalize_content_part(part) for part in content]
+        else:
+            parts = [{"type": "text", "text": str(content)}]
+        normalized.append({"role": role, "content": parts})
+    extra_urls: List[str] = []
+    if image_url:
+        extra_urls.append(normalize_tmpfiles_url(image_url))
+    if image_urls:
+        extra_urls.extend([normalize_tmpfiles_url(u) for u in image_urls if u])
+    if extra_urls:
+        for msg in reversed(normalized):
+            if msg["role"] == "user":
+                msg["content"].extend(
+                    [{"type": "image_url", "image_url": {"url": u}} for u in extra_urls]
+                )
+                break
+        else:
+            raise GrokApiException("No user message to attach images", "INVALID_MULTIMODAL")
+    summary = [
+        {
+            "role": msg["role"],
+            "parts": [part.get("type") for part in msg.get("content", [])],
+        }
+        for msg in normalized
+    ]
+    logger.debug(f"[Normalize] Messages: {summary}")
+    return normalized

app/services/mcp/__init__.py ADDED Viewed

	@@ -0,0 +1,6 @@

+# -*- coding: utf-8 -*-
+"""MCP module initialization"""
+from app.services.mcp.server import mcp
+__all__ = ["mcp"]

app/services/mcp/server.py ADDED Viewed

	@@ -0,0 +1,63 @@

+# -*- coding: utf-8 -*-
+"""FastMCP server instance"""
+from fastmcp import FastMCP
+from fastmcp.server.auth.providers.jwt import StaticTokenVerifier
+from app.services.mcp.tools import ask_grok_impl
+from app.core.config import setting
+def create_mcp_server() -> FastMCP:
+    """Create MCP server instance, enable auth if API key is configured"""
+    # Check if API key is configured
+    api_key = setting.grok_config.get("api_key")
+    # Enable static token verification if API key is configured
+    auth = None
+    if api_key:
+        auth = StaticTokenVerifier(
+            tokens={
+                api_key: {
+                    "client_id": "grok2api-client",
+                    "scopes": ["read", "write", "admin"]
+                }
+            },
+            required_scopes=["read"]
+        )
+    # Create FastMCP instance
+    return FastMCP(
+        name="Grok2API-MCP",
+        instructions="MCP server providing Grok AI chat capabilities. Use ask_grok tool to interact with Grok AI models.",
+        auth=auth
+    )
+# Create global MCP instance
+mcp = create_mcp_server()
+# Register ask_grok tool
+@mcp.tool
+async def ask_grok(
+    query: str,
+    model: str = "grok-3-fast",
+    system_prompt: str = None
+) -> str:
+    """
+    Call Grok AI for conversation, especially when users need the latest info, search, or social updates (Twitter/X, Reddit, etc).
+    Args:
+        query: User question or instruction
+        model: Grok model name, options: grok-3-fast (default), grok-4-fast, grok-4-fast-expert, grok-4-expert, grok-4-heavy
+        system_prompt: Optional system prompt to set AI role or constraints
+    Returns:
+        Full Grok AI response, may include text and image links (Markdown)
+    Examples:
+        - Simple Q&A: ask_grok("What is Python?")
+        - Specify model: ask_grok("Explain quantum computing", model="grok-4-fast")
+        - With system prompt: ask_grok("Write a poem", system_prompt="You are a classical poet")
+    """
+    return await ask_grok_impl(query, model, system_prompt)

app/services/mcp/tools.py ADDED Viewed

	@@ -0,0 +1,77 @@

+# -*- coding: utf-8 -*-
+"""MCP Tools - Grok AI chat tool"""
+import json
+from typing import Optional
+from app.services.grok.client import GrokClient
+from app.core.logger import logger
+from app.core.exception import GrokApiException
+async def ask_grok_impl(
+    query: str,
+    model: str = "grok-3-fast",
+    system_prompt: Optional[str] = None
+) -> str:
+    """
+    Internal implementation: call Grok API and collect full response
+    Args:
+        query: User question
+        model: Model name
+        system_prompt: System prompt
+    Returns:
+        str: Full Grok response content
+    """
+    try:
+        # Build message list
+        messages = []
+        if system_prompt:
+            messages.append({"role": "system", "content": system_prompt})
+        messages.append({"role": "user", "content": query})
+        # Build request
+        request_data = {
+            "model": model,
+            "messages": messages,
+            "stream": True
+        }
+        logger.info(f"[MCP] ask_grok called, model: {model}")
+        # Call Grok client (streaming)
+        response_iterator = await GrokClient.openai_to_grok(request_data)
+        # Collect all streaming chunks
+        content_parts = []
+        async for chunk in response_iterator:
+            if isinstance(chunk, bytes):
+                chunk = chunk.decode('utf-8')
+            # Parse SSE format
+            if chunk.startswith("data: "):
+                data_str = chunk[6:].strip()
+                if data_str == "[DONE]":
+                    break
+                try:
+                    data = json.loads(data_str)
+                    choices = data.get("choices", [])
+                    if choices:
+                        delta = choices[0].get("delta", {})
+                        if content := delta.get("content"):
+                            content_parts.append(content)
+                except json.JSONDecodeError:
+                    continue
+        result = "".join(content_parts)
+        logger.info(f"[MCP] ask_grok completed, response length: {len(result)}")
+        return result
+    except GrokApiException as e:
+        logger.error(f"[MCP] Grok API error: {str(e)}")
+        raise Exception(f"Grok API call failed: {str(e)}")
+    except Exception as e:
+        logger.error(f"[MCP] ask_grok error: {str(e)}", exc_info=True)
+        raise Exception(f"Error processing request: {str(e)}")

app/services/request_logger.py ADDED Viewed

	@@ -0,0 +1,152 @@

+"""Request log audit - record recent requests"""
+import os
+import time
+import asyncio
+import orjson
+from typing import List, Dict, Deque
+from collections import deque
+from dataclasses import dataclass, asdict
+from pathlib import Path
+from app.core.logger import logger
+@dataclass
+class RequestLog:
+    id: str
+    time: str
+    timestamp: float
+    ip: str
+    model: str
+    duration: float
+    status: int
+    key_name: str
+    token_suffix: str
+    error: str = ""
+class RequestLogger:
+    """Request logger"""
+    _instance = None
+    def __new__(cls):
+        if cls._instance is None:
+            cls._instance = super().__new__(cls)
+        return cls._instance
+    def __init__(self, max_len: int = 1000):
+        if hasattr(self, '_initialized'):
+            return
+        data_dir_env = os.getenv("DATA_DIR")
+        if data_dir_env:
+            data_dir = Path(data_dir_env)
+        elif Path("/data").exists():
+            data_dir = Path("/data")
+        else:
+            data_dir = Path(__file__).parents[2] / "data"
+        self.file_path = data_dir / "logs.json"
+        self._logs: Deque[Dict] = deque(maxlen=max_len)
+        self._lock = asyncio.Lock()
+        self._loaded = False
+        self._initialized = True
+    async def init(self):
+        """Initialize and load data"""
+        if not self._loaded:
+            await self._load_data()
+    async def _load_data(self):
+        """Load log data from disk"""
+        if self._loaded:
+            return
+        if not self.file_path.exists():
+            self._loaded = True
+            return
+        try:
+            async with self._lock:
+                content = await asyncio.to_thread(self.file_path.read_bytes)
+                if content:
+                    data = orjson.loads(content)
+                    if isinstance(data, list):
+                        self._logs.clear()
+                        self._logs.extend(data)
+                    self._loaded = True
+                    logger.debug(f"[Logger] Logs loaded: {len(self._logs)} entries")
+        except Exception as e:
+            logger.error(f"[Logger] Failed to load logs: {e}")
+            self._loaded = True
+    async def _save_data(self):
+        """Save log data to disk"""
+        if not self._loaded:
+            return
+        try:
+            # Ensure directory exists
+            self.file_path.parent.mkdir(parents=True, exist_ok=True)
+            async with self._lock:
+                # Save as list
+                content = orjson.dumps(list(self._logs))
+                await asyncio.to_thread(self.file_path.write_bytes, content)
+        except Exception as e:
+            logger.error(f"[Logger] Failed to save logs: {e}")
+    async def add_log(self,
+                     ip: str,
+                     model: str,
+                     duration: float,
+                     status: int,
+                     key_name: str,
+                     token_suffix: str = "",
+                     error: str = ""):
+        """Add log entry"""
+        if not self._loaded:
+            await self.init()
+        try:
+            now = time.time()
+            # Format timestamp
+            time_str = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(now))
+            log = {
+                "id": str(int(now * 1000)),
+                "time": time_str,
+                "timestamp": now,
+                "ip": ip,
+                "model": model,
+                "duration": round(duration, 2),
+                "status": status,
+                "key_name": key_name,
+                "token_suffix": token_suffix,
+                "error": error
+            }
+            async with self._lock:
+                self._logs.appendleft(log)  # Newest first
+            # Save asynchronously
+            asyncio.create_task(self._save_data())
+        except Exception as e:
+            logger.error(f"[Logger] Failed to record log: {e}")
+    async def get_logs(self, limit: int = 1000) -> List[Dict]:
+        """Get logs"""
+        async with self._lock:
+            return list(self._logs)[:limit]
+    async def clear_logs(self):
+        """Clear logs"""
+        async with self._lock:
+            self._logs.clear()
+        await self._save_data()
+# Global instance
+request_logger = RequestLogger()

app/services/request_stats.py ADDED Viewed

	@@ -0,0 +1,205 @@

+"""Request stats module - hourly/daily request statistics"""
+import time
+import asyncio
+import orjson
+from datetime import datetime
+from typing import Dict, Any
+from pathlib import Path
+from collections import defaultdict
+from app.core.logger import logger
+class RequestStats:
+    """Request stats manager (singleton)"""
+    _instance = None
+    def __new__(cls):
+        if cls._instance is None:
+            cls._instance = super().__new__(cls)
+        return cls._instance
+    def __init__(self):
+        if hasattr(self, '_initialized'):
+            return
+        self.file_path = Path(__file__).parents[2] / "data" / "stats.json"
+        # Stats data
+        self._hourly: Dict[str, Dict[str, int]] = defaultdict(lambda: {"total": 0, "success": 0, "failed": 0})
+        self._daily: Dict[str, Dict[str, int]] = defaultdict(lambda: {"total": 0, "success": 0, "failed": 0})
+        self._models: Dict[str, int] = defaultdict(int)
+        # Retention policy
+        self._hourly_keep = 48  # Keep 48 hours
+        self._daily_keep = 30   # Keep 30 days
+        self._lock = asyncio.Lock()
+        self._loaded = False
+        self._initialized = True
+    async def init(self):
+        """Initialize and load data"""
+        if not self._loaded:
+            await self._load_data()
+    async def _load_data(self):
+        """Load stats data from disk"""
+        if self._loaded:
+            return
+        if not self.file_path.exists():
+            self._loaded = True
+            return
+        try:
+            async with self._lock:
+                content = await asyncio.to_thread(self.file_path.read_bytes)
+                if content:
+                    data = orjson.loads(content)
+                    # Restore defaultdict structure
+                    self._hourly = defaultdict(lambda: {"total": 0, "success": 0, "failed": 0})
+                    self._hourly.update(data.get("hourly", {}))
+                    self._daily = defaultdict(lambda: {"total": 0, "success": 0, "failed": 0})
+                    self._daily.update(data.get("daily", {}))
+                    self._models = defaultdict(int)
+                    self._models.update(data.get("models", {}))
+                    self._loaded = True
+                    logger.debug(f"[Stats] Stats data loaded successfully")
+        except Exception as e:
+            logger.error(f"[Stats] Failed to load data: {e}")
+            self._loaded = True  # Prevent overwrite
+    async def _save_data(self):
+        """Save stats data to disk"""
+        if not self._loaded:
+            return
+        try:
+            # Ensure directory exists
+            self.file_path.parent.mkdir(parents=True, exist_ok=True)
+            async with self._lock:
+                data = {
+                    "hourly": dict(self._hourly),
+                    "daily": dict(self._daily),
+                    "models": dict(self._models)
+                }
+                content = orjson.dumps(data)
+                await asyncio.to_thread(self.file_path.write_bytes, content)
+        except Exception as e:
+            logger.error(f"[Stats] Failed to save data: {e}")
+    async def record_request(self, model: str, success: bool) -> None:
+        """Record a request"""
+        if not self._loaded:
+            await self.init()
+        now = datetime.now()
+        hour_key = now.strftime("%Y-%m-%dT%H")
+        day_key = now.strftime("%Y-%m-%d")
+        # Hourly stats
+        self._hourly[hour_key]["total"] += 1
+        if success:
+            self._hourly[hour_key]["success"] += 1
+        else:
+            self._hourly[hour_key]["failed"] += 1
+        # Daily stats
+        self._daily[day_key]["total"] += 1
+        if success:
+            self._daily[day_key]["success"] += 1
+        else:
+            self._daily[day_key]["failed"] += 1
+        # Model stats
+        self._models[model] += 1
+        # Periodically clean old data
+        self._cleanup()
+        # Save asynchronously
+        asyncio.create_task(self._save_data())
+    def _cleanup(self) -> None:
+        """Clean expired data"""
+        now = datetime.now()
+        # Clean hourly data
+        hour_keys = list(self._hourly.keys())
+        if len(hour_keys) > self._hourly_keep:
+            for key in sorted(hour_keys)[:-self._hourly_keep]:
+                del self._hourly[key]
+        # Clean daily data
+        day_keys = list(self._daily.keys())
+        if len(day_keys) > self._daily_keep:
+            for key in sorted(day_keys)[:-self._daily_keep]:
+                del self._daily[key]
+    def get_stats(self, hours: int = 24, days: int = 7) -> Dict[str, Any]:
+        """Get stats data"""
+        now = datetime.now()
+        # Get last N hours
+        hourly_data = []
+        for i in range(hours - 1, -1, -1):
+            from datetime import timedelta
+            dt = now - timedelta(hours=i)
+            key = dt.strftime("%Y-%m-%dT%H")
+            data = self._hourly.get(key, {"total": 0, "success": 0, "failed": 0})
+            hourly_data.append({
+                "hour": dt.strftime("%H:00"),
+                "date": dt.strftime("%m-%d"),
+                **data
+            })
+        # Get last N days
+        daily_data = []
+        for i in range(days - 1, -1, -1):
+            from datetime import timedelta
+            dt = now - timedelta(days=i)
+            key = dt.strftime("%Y-%m-%d")
+            data = self._daily.get(key, {"total": 0, "success": 0, "failed": 0})
+            daily_data.append({
+                "date": dt.strftime("%m-%d"),
+                **data
+            })
+        # Model stats (Top 10)
+        model_data = sorted(self._models.items(), key=lambda x: x[1], reverse=True)[:10]
+        # Totals
+        total_requests = sum(d["total"] for d in self._hourly.values())
+        total_success = sum(d["success"] for d in self._hourly.values())
+        total_failed = sum(d["failed"] for d in self._hourly.values())
+        return {
+            "hourly": hourly_data,
+            "daily": daily_data,
+            "models": [{"model": m, "count": c} for m, c in model_data],
+            "summary": {
+                "total": total_requests,
+                "success": total_success,
+                "failed": total_failed,
+                "success_rate": round(total_success / total_requests * 100, 1) if total_requests > 0 else 0
+            }
+        }
+    async def reset(self) -> None:
+        """Reset all stats"""
+        self._hourly.clear()
+        self._daily.clear()
+        self._models.clear()
+        await self._save_data()
+# Global instance
+request_stats = RequestStats()

app/template/admin.html ADDED Viewed

The diff for this file is too large to render. See raw diff

app/template/favicon.png ADDED Viewed

Git LFS Details

SHA256: d92973dce171d4f418c4c9a883cd754c6e9828a0f1f7cdf7af19d4896b852306
Pointer size: 131 Bytes
Size of remote file: 104 kB

app/template/login.html ADDED Viewed

	@@ -0,0 +1,76 @@

+<!DOCTYPE html>
+<html lang="en" class="h-full">
+<head>
+    <meta charset="UTF-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <title>Sign In - Grok2API</title>
+    <link rel="icon" type="image/png" href="/static/favicon.png">
+    <script src="https://cdn.tailwindcss.com"></script>
+    <script>
+        tailwind.config = { theme: { extend: { colors: { border: "hsl(0 0% 89%)", input: "hsl(0 0% 89%)", ring: "hsl(0 0% 3.9%)", background: "hsl(0 0% 100%)", foreground: "hsl(0 0% 3.9%)", primary: { DEFAULT: "hsl(0 0% 9%)", foreground: "hsl(0 0% 98%)" }, secondary: { DEFAULT: "hsl(0 0% 96.1%)", foreground: "hsl(0 0% 9%)" }, muted: { DEFAULT: "hsl(0 0% 96.1%)", foreground: "hsl(0 0% 45.1%)" }, destructive: { DEFAULT: "hsl(0 84.2% 60.2%)", foreground: "hsl(0 0% 98%)" } } } } }
+    </script>
+    <style>
+        @keyframes slide-up {
+            from {
+                transform: translateY(100%);
+                opacity: 0
+            }
+            to {
+                transform: translateY(0);
+                opacity: 1
+            }
+        }
+        .animate-slide-up {
+            animation: slide-up .3s ease-out
+        }
+    </style>
+</head>
+<body class="h-full bg-background text-foreground antialiased">
+    <div class="flex min-h-full flex-col justify-center py-12 px-4 sm:px-6 lg:px-8">
+        <div class="sm:mx-auto sm:w-full sm:max-w-md">
+            <div class="text-center">
+                <h1 class="text-4xl font-bold">Grok2API</h1>
+                <p class="mt-2 text-sm text-muted-foreground">Admin Console</p>
+            </div>
+        </div>
+        <div class="sm:mx-auto sm:w-full sm:max-w-md">
+            <div class="bg-background py-8 px-4 sm:px-10 rounded-lg">
+                <form id="loginForm" class="space-y-6">
+                    <div class="space-y-2">
+                        <label for="username" class="text-sm font-medium">Username</label>
+                        <input type="text" id="username" name="username" required
+                            class="flex h-10 w-full rounded-md border border-input bg-background px-3 py-2 text-sm placeholder:text-muted-foreground focus-visible:outline-none focus-visible:ring-2 focus-visible:ring-ring disabled:opacity-50"
+                            placeholder="Enter username">
+                    </div>
+                    <div class="space-y-2">
+                        <label for="password" class="text-sm font-medium">Password</label>
+                        <input type="password" id="password" name="password" required
+                            class="flex h-10 w-full rounded-md border border-input bg-background px-3 py-2 text-sm placeholder:text-muted-foreground focus-visible:outline-none focus-visible:ring-2 focus-visible:ring-ring disabled:opacity-50"
+                            placeholder="Enter password">
+                    </div>
+                    <button type="submit" id="loginButton"
+                        class="inline-flex items-center justify-center rounded-md font-medium transition-colors bg-primary text-primary-foreground hover:bg-primary/90 h-10 w-full disabled:opacity-50">Sign In</button>
+                </form>
+                <div class="mt-6 text-center text-xs text-muted-foreground space-y-1">
+                    <p>Created By Chenyme © 2025</p>
+                    <p>Fork maintained by: @Tomiya233</p>
+                </div>
+            </div>
+        </div>
+    </div>
+    <script>
+        const form = document.getElementById('loginForm'), btn = document.getElementById('loginButton');
+        form.addEventListener('submit', async (e) => { e.preventDefault(); btn.disabled = true; btn.textContent = 'Signing in...'; try { const fd = new FormData(form), r = await fetch('/api/login', { method: 'POST', headers: { 'Content-Type': 'application/json' }, body: JSON.stringify({ username: fd.get('username'), password: fd.get('password') }) }); const d = await r.json(); d.success ? (localStorage.setItem('adminToken', d.token), location.href = '/manage') : showToast(d.message || 'Login failed', 'error') } catch (e) { showToast('Network error, please try again later', 'error') } finally { btn.disabled = false; btn.textContent = 'Sign In' } });
+        function showToast(m, t = 'error') { const d = document.createElement('div'), bc = { success: 'bg-green-600', error: 'bg-destructive', info: 'bg-primary' }; d.className = `fixed bottom-4 right-4 ${bc[t] || bc.error} text-white px-4 py-2.5 rounded-lg shadow-lg text-sm font-medium z-50 animate-slide-up`; d.textContent = m; document.body.appendChild(d); setTimeout(() => { d.style.opacity = '0'; d.style.transition = 'opacity .3s'; setTimeout(() => d.parentNode && document.body.removeChild(d), 300) }, 2000) }
+        window.addEventListener('DOMContentLoaded', () => { const t = localStorage.getItem('adminToken'); t && fetch('/api/stats', { headers: { Authorization: `Bearer ${t}` } }).then(r => { if (r.ok) location.href = '/manage' }) });
+    </script>
+</body>
+</html>

data/setting.toml ADDED Viewed

	@@ -0,0 +1,25 @@

+[grok]
+api_key = ""
+proxy_url = "http://spsjzabe5k:ygg3h0g2Bseo_UNuC5@dc.decodo.com:10000"
+cache_proxy_url = ""
+cf_clearance = "kZ1kZVJh4EOvAD3dXCZfaVVUrtS1uXF5Mxk_jEiLtIY-1768091538-1.2.1.1-UKQbk_aXR9U27Z2Lby0wmBZKn8Sb8j.EL_RZ2eomgYbjSBzVvlbBW4ttl.Q2rNnTyhTReGjY.f3URr_cH.K82kNHqA6skdnRPGFLh0DyXsoKf273n3u5ibjpNqiCi6pumPJC6Wmzhb2uYM3fYQ_K6kbcuPKui0I5cR4MHx6V.Gooh64lR5VK34jsGIMOIWJ5UkK9WT.LAlAQZceTyBd7rXx6nxxwM6qrD1Zz.1hq_cI"
+x_statsig_id = "ZTpUeXBlRXJyb3I6IENhbm5vdCByZWFkIHByb3BlcnRpZXMgb2YgdW5kZWZpbmVkIChyZWFkaW5nICdjaGlsZE5vZGVzJyk="
+filtered_tags = "xaiartifact,xai:tool_usage_card,grok:render"
+stream_chunk_timeout = 120
+stream_total_timeout = 600
+stream_first_response_timeout = 30
+temporary = true
+show_thinking = true
+dynamic_statsig = true
+proxy_pool_url = ""
+proxy_pool_interval = 300
+retry_status_codes = [ 401, 429,]
+[global]
+base_url = ""
+log_level = "INFO"
+image_mode = "url"
+admin_password = "admin"
+admin_username = "admin"
+image_cache_max_size_mb = 512
+video_cache_max_size_mb = 1024

data/temp/image.temp ADDED Viewed

File without changes

data/token.json ADDED Viewed

	@@ -0,0 +1,4 @@

+{
+  "ssoSuper": {},
+  "ssoNormal": {}
+}

docker-compose.yml ADDED Viewed

	@@ -0,0 +1,25 @@

+services:
+  grok2api:
+    image: ghcr.io/chenyme/grok2api:latest
+    ports:
+      - "8000:8000"
+    volumes:
+      - grok_data:/app/data
+      - ./logs:/app/logs
+    environment:
+      # ===== Storage mode =====
+      # Supports file, mysql, or redis
+      - STORAGE_MODE=file
+      # ===== Database =====
+      # Only required when STORAGE_MODE=mysql or redis
+      # - DATABASE_URL=mysql://user:password@host:3306/grok2api
+      # MySQL format: mysql://user:password@host:port/database
+      # Redis format: redis://host:port/db or redis://user:password@host:port/db
+      # ===== Worker count =====
+      # Default 1. Recommendation: CPU cores * 2. Use MySQL/Redis in multi-process mode.
+      - WORKERS=1
+volumes:
+  grok_data:

docker-entrypoint.sh ADDED Viewed

	@@ -0,0 +1,74 @@

+#!/bin/sh
+set -e
+# Persistent data directory (Hugging Face mounts /data)
+if [ -z "${DATA_DIR}" ]; then
+    if [ -d "/data" ]; then
+        DATA_DIR="/data"
+    else
+        DATA_DIR="/app/data"
+    fi
+elif [ "${DATA_DIR}" = "/data/grok2api" ] && [ -d "/data" ]; then
+    # Normalize legacy default to the Space persistent volume root
+    DATA_DIR="/data"
+fi
+echo "[Grok2API] Using DATA_DIR=${DATA_DIR}"
+# Ensure directories exist
+mkdir -p \
+  "$DATA_DIR/temp/image" \
+  "$DATA_DIR/temp/video" \
+  /app/logs
+echo "[Grok2API] Checking config files..."
+# Initialize setting.toml only if missing
+if [ ! -f "$DATA_DIR/setting.toml" ]; then
+    echo "[Grok2API] Initializing setting.toml..."
+    cat > "$DATA_DIR/setting.toml" << 'EOF'
+[global]
+base_url = "http://localhost:8000"
+log_level = "INFO"
+image_mode = "url"
+admin_password = "admin"
+admin_username = "admin"
+image_cache_max_size_mb = 512
+video_cache_max_size_mb = 1024
+image_download_timeout = 30
+image_download_max_size_mb = 20
+max_upload_concurrency = 20
+max_request_concurrency = 50
+batch_save_interval = 1.0
+batch_save_threshold = 10
+[grok]
+api_key = ""
+proxy_url = ""
+cache_proxy_url = ""
+cf_clearance = ""
+x_statsig_id = "ZTpUeXBlRXJyb3I6IENhbm5vdCByZWFkIHByb3BlcnRpZXMgb2YgdW5kZWZpbmVkIChyZWFkaW5nICdjaGlsZE5vZGVzJyk="
+dynamic_statsig = true
+filtered_tags = "xaiartifact,xai:tool_usage_card,grok:render"
+stream_chunk_timeout = 120
+stream_total_timeout = 600
+stream_first_response_timeout = 30
+temporary = true
+show_thinking = true
+proxy_pool_url = ""
+proxy_pool_interval = 300
+retry_status_codes = [401, 429]
+EOF
+fi
+# Initialize token.json only if missing
+if [ ! -f "$DATA_DIR/token.json" ]; then
+    echo "[Grok2API] Initializing token.json..."
+    echo '{"ssoNormal": {}, "ssoSuper": {}}' > "$DATA_DIR/token.json"
+fi
+echo "[Grok2API] Config file check completed"
+echo "[Grok2API] Starting application..."
+# Start the app
+exec "$@"

main.py ADDED Viewed

	@@ -0,0 +1,196 @@

+"""Grok2API"""
+import os
+import sys
+from contextlib import asynccontextmanager
+from pathlib import Path
+from fastapi import FastAPI
+from fastapi.middleware.cors import CORSMiddleware
+from fastapi.staticfiles import StaticFiles
+from app.core.logger import logger
+from app.core.exception import register_exception_handlers
+from app.core.storage import storage_manager
+from app.core.config import setting
+from app.services.grok.token import token_manager
+from app.api.v1.chat import router as chat_router
+from app.api.v1.models import router as models_router
+from app.api.v1.images import router as images_router
+from app.api.admin.manage import router as admin_router
+from app.services.mcp import mcp
+# 0. Compatibility check
+try:
+    if sys.platform != 'win32':
+        import uvloop
+        uvloop.install()
+        logger.info("[Grok2API] Enabled uvloop high-performance event loop")
+    else:
+        logger.info("[Grok2API] Windows system, using default asyncio event loop")
+except ImportError:
+    logger.info("[Grok2API] uvloop not installed, using default asyncio event loop")
+# 1. Create MCP FastAPI app instance
+mcp_app = mcp.http_app(stateless_http=True, transport="streamable-http")
+# 2. Define app lifespan
+@asynccontextmanager
+async def lifespan(app: FastAPI):
+    """
+    Startup order:
+    1. Initialize core services (storage, settings, token_manager)
+    2. Load token data asynchronously
+    3. Start batch save task
+    4. Start MCP service lifespan
+    Shutdown order (LIFO):
+    1. Stop MCP service lifespan
+    2. Stop batch save task and flush data
+    3. Close core services
+    """
+    # --- Startup ---
+    # 1. Initialize core services
+    await storage_manager.init()
+    # Set storage on config and token manager
+    storage = storage_manager.get_storage()
+    setting.set_storage(storage)
+    token_manager.set_storage(storage)
+    # 2. Reload config
+    await setting.reload()
+    data_dir = getattr(storage, "data_dir", None)
+    if data_dir:
+        logger.info(f"[Storage] Data dir: {data_dir}")
+    logger.info(f"[Config] Config path: {setting.config_path}")
+    logger.info("[Grok2API] Core services initialized")
+    # 2.5. Initialize proxy pool
+    from app.core.proxy_pool import proxy_pool
+    proxy_url = setting.grok_config.get("proxy_url", "")
+    proxy_pool_url = setting.grok_config.get("proxy_pool_url", "")
+    proxy_pool_interval = setting.grok_config.get("proxy_pool_interval", 300)
+    proxy_pool.configure(proxy_url, proxy_pool_url, proxy_pool_interval)
+    # 3. Load token data asynchronously
+    await token_manager._load_data()
+    logger.info(f"[Token] Data path: {token_manager.token_file}")
+    logger.info("[Grok2API] Token data loaded")
+    # 3.5. Load API key data
+    from app.services.api_keys import api_key_manager
+    api_key_manager.set_storage(storage)
+    await api_key_manager.init()
+    logger.info("[Grok2API] API key data loaded")
+    # 3.6. Load stats and log data
+    from app.services.request_stats import request_stats
+    from app.services.request_logger import request_logger
+    await request_stats.init()
+    await request_logger.init()
+    logger.info("[Grok2API] Stats and log data loaded")
+    # 4. Start batch save task
+    await token_manager.start_batch_save()
+    # 5. Manage MCP service lifespan
+    mcp_lifespan_context = mcp_app.lifespan(app)
+    await mcp_lifespan_context.__aenter__()
+    logger.info("[MCP] MCP service initialized")
+    logger.info("[Grok2API] App started successfully")
+    try:
+        yield
+    finally:
+        # --- Shutdown ---
+        # 1. Exit MCP service lifespan
+        await mcp_lifespan_context.__aexit__(None, None, None)
+        logger.info("[MCP] MCP service shut down")
+        # 2. Stop batch save task and flush data
+        await token_manager.shutdown()
+        logger.info("[Token] Token manager shut down")
+        # 3. Close core services
+        await storage_manager.close()
+        logger.info("[Grok2API] App shut down successfully")
+# Initialize logging
+logger.info("[Grok2API] App is starting...")
+logger.info("[Grok2API] Fork maintained by: @Tomiya233")
+# Create FastAPI app
+app = FastAPI(
+    title="Grok2API",
+    description="Grok API conversion service",
+    version="1.3.1",
+    lifespan=lifespan
+)
+# Register global exception handlers
+register_exception_handlers(app)
+# Register routes
+app.include_router(chat_router, prefix="/v1")
+app.include_router(models_router, prefix="/v1")
+app.include_router(images_router)
+app.include_router(admin_router)
+# Mount static files
+app.mount("/static", StaticFiles(directory="app/template"), name="template")
+@app.get("/")
+async def root():
+    """Root path"""
+    from fastapi.responses import RedirectResponse
+    return RedirectResponse(url="/login")
+@app.get("/health")
+async def health_check():
+    """Health check endpoint"""
+    return {
+        "status": "healthy",
+        "service": "Grok2API",
+        "version": "1.0.3"
+    }
+# Mount MCP server
+app.mount("", mcp_app)
+if __name__ == "__main__":
+    import uvicorn
+    import os
+    # Number of worker processes
+    workers = int(os.getenv("WORKERS", "1"))
+    # Hugging Face Spaces (and similar platforms) provide PORT, default to 7860
+    port = int(os.getenv("PORT", "7860"))
+    # Warn about multi-worker mode with file storage
+    if workers > 1:
+        logger.info(
+            f"[Grok2API] Multi-process mode enabled (workers={workers}). "
+            f"For best stability and performance, Redis or MySQL storage is recommended."
+        )
+    # Select event loop
+    loop_type = "auto"
+    if workers == 1 and sys.platform != "win32":
+        try:
+            import uvloop
+            loop_type = "uvloop"
+        except ImportError:
+            pass
+    uvicorn.run(
+        "main:app",
+        host="0.0.0.0",
+        port=port,
+        workers=workers,
+        loop=loop_type,
+    )

pyproject.toml ADDED Viewed

	@@ -0,0 +1,26 @@

+[project]
+name = "grok2api"
+version = "1.4.3"
+description = "Grok2API rebuilt on FastAPI, adapted to the latest web call format, supporting streaming chat, image generation, image editing, web search, video generation, deep reasoning, concurrent token pool usage, and automatic load balancing."
+readme = "README.md"
+requires-python = ">=3.13"
+dependencies = [
+    "aiofiles==25.1.0",
+    "aiomysql==0.2.0",
+    "curl-cffi==0.13.0",
+    "fastapi==0.119.0",
+    "pydantic==2.12.2",
+    "python-dotenv==1.1.1",
+    "redis==6.4.0",
+    "requests==2.32.5",
+    "starlette==0.48.0",
+    "toml==0.10.2",
+    "uvloop==0.21.0 ; sys_platform != 'win32'",
+    "uvicorn==0.37.0",
+    "portalocker==3.0.0",
+    "fastmcp==2.12.4",
+    "cryptography==46.0.3",
+    "orjson==3.11.4",
+    "aiohttp==3.13.2",
+    "huggingface_hub==0.25.1",
+]

readme.md ADDED Viewed

	@@ -0,0 +1,248 @@

+# Grok2API
+Grok2API rebuilt on **FastAPI**, fully adapted to the latest web calling format. Supports streaming chat, image generation, image editing, web search, and deep reasoning, with token-pool concurrency and automatic load balancing.
+## 🆕 Fork Enhancements
+This fork adds:
+- **Multi-key management and persistence**: Admins can batch create, label, and delete API keys with bulk operations. All keys are persisted across restarts.
+- **Audit logging**: Real-time request logging with file persistence.
+- **Concurrency performance optimization (Critical)**: Rebuilt Grok request/response handling with fully async streaming (`aiter_lines`), eliminating admin panel freezes or slow responses during generation.
+- **Token smart cooldown**: Automatically cools down tokens after failures to avoid repeated use of failing tokens.
+  - Normal errors: cooldown for 5 requests
+  - 429 + quota: cooldown for 1 hour
+  - 429 + no quota: cooldown for 10 hours
+- **One-click refresh for all tokens**: Batch refresh remaining counts with live progress.
+- **Concurrency guard**: Reject duplicate refresh requests while a refresh is running.
+- **Request stats with persistence**: Hourly/daily trends, success rate, model distribution; stored persistently.
+- **Cache preview**: Admin panel preview for cached images/videos.
+<br>
+## Usage
+### Call Limits and Quotas
+- **Basic**: **80 calls / 20 hours**
+- **Super**: quota TBD (not tested)
+- System automatically load-balances across accounts. Use the **admin panel** to monitor usage and status.
+### Image Generation
+- In chat, prompt like “draw a moon” to trigger image generation.
+- Returns **two images in Markdown** per request, consuming 4 calls.
+- **Note**: Grok image direct links may return 403. The system caches images locally. You must set `Base Url` correctly so images display.
+### Video Generation
+- Use `grok-imagine-0.9` with an image + prompt (same format as OpenAI image analysis).
+- Returns: `<video src="{full_video_url}" controls="controls"></video>`
+- **Note**: Grok video direct links may return 403. The system caches videos locally. You must set `Base Url` correctly so videos display.
+```
+curl https://your-server/v1/chat/completions \
+  -H "Content-Type: application/json" \
+  -H "Authorization: Bearer $GROK2API_API_KEY" \
+  -d '{
+    "model": "grok-imagine-0.9",
+    "messages": [
+      {
+        "role": "user",
+        "content": [
+          {
+            "type": "text",
+            "text": "Make the sun rise"
+          },
+          {
+            "type": "image_url",
+            "image_url": {
+              "url": "https://your-image.jpg"
+            }
+          }
+        ]
+      }
+    ]
+  }'
+```
+### About `x_statsig_id`
+- `x_statsig_id` is Grok’s anti-bot token.
+- **New users should not change it**; keep the default.
+- Attempts to obtain it via Camoufox were dropped because Grok now restricts non-logged-in `x_statsig_id`. A fixed value is used for compatibility.
+<br>
+## Deployment
+### Option 1: Docker Compose (Recommended)
+Because this fork includes changes, build locally:
+1. Clone this repo
+```bash
+git clone https://github.com/Tomiya233/grok2api.git
+cd grok2api
+```
+2. Start the service
+```bash
+docker-compose up -d --build
+```
+**docker-compose.yml example:**
+```yaml
+services:
+  grok2api:
+    build: .
+    image: grok2api:latest
+    container_name: grok2api
+    restart: always
+    ports:
+      - "8000:8000"
+    volumes:
+      - grok_data:/app/data
+      - ./logs:/app/logs
+    environment:
+      - LOG_LEVEL=INFO
+    logging:
+      driver: "json-file"
+      options:
+        max-size: "10m"
+        max-file: "3"
+volumes:
+  grok_data:
+```
+### Option 2: Run with Python
+**Requirements**: Python 3.10+ (recommend `uv`).
+1. Install uv
+```bash
+curl -LsSf https://astral.sh/uv/install.sh | sh
+```
+2. Run the service
+```bash
+# install deps and run
+uv sync
+uv run python main.py
+```
+The service runs at `http://127.0.0.1:8000` by default.
+### Environment Variables
+| Variable      | Required | Description                                   | Example |
+|---------------|----------|-----------------------------------------------|---------|
+| STORAGE_MODE  | No       | Storage mode: file/mysql/redis                | file    |
+| DATABASE_URL  | No       | Database URL (required for mysql/redis)       | mysql://user:pass@host:3306/db |
+**Storage modes:**
+- `file`: local file storage (default)
+- `mysql`: MySQL storage, requires DATABASE_URL
+- `redis`: Redis storage, requires DATABASE_URL
+<br>
+## API Overview
+> Fully compatible with OpenAI API. Requests require **Authorization header**.
+| Method | Endpoint                  | Description                                   | Auth |
+|--------|---------------------------|-----------------------------------------------|------|
+| POST   | `/v1/chat/completions`    | Create chat completion (stream/non-stream)    | ✅   |
+| GET    | `/v1/models`              | List supported models                          | ✅   |
+| GET    | `/images/{img_path}`      | Get generated image file                        | ❌   |
+<br>
+<details>
+<summary>Admin and stats endpoints (expand)</summary>
+| Method | Endpoint                         | Description                        | Auth |
+|--------|----------------------------------|------------------------------------|------|
+| GET    | /login                           | Admin login page                   | ❌   |
+| GET    | /manage                          | Admin console page                 | ❌   |
+| POST   | /api/login                       | Admin login                        | ❌   |
+| POST   | /api/logout                      | Admin logout                       | ✅   |
+| GET    | /api/tokens                      | List tokens                        | ✅   |
+| POST   | /api/tokens/add                  | Batch add tokens                   | ✅   |
+| POST   | /api/tokens/delete               | Batch delete tokens                | ✅   |
+| GET    | /api/settings                    | Get settings                       | ✅   |
+| POST   | /api/settings                    | Update settings                    | ✅   |
+| GET    | /api/cache/size                  | Cache size                         | ✅   |
+| POST   | /api/cache/clear                 | Clear all cache                    | ✅   |
+| POST   | /api/cache/clear/images          | Clear image cache                  | ✅   |
+| POST   | /api/cache/clear/videos          | Clear video cache                  | ✅   |
+| GET    | /api/stats                       | Stats summary                      | ✅   |
+| POST   | /api/tokens/tags                 | Update token tags                  | ✅   |
+| POST   | /api/tokens/note                 | Update token note                  | ✅   |
+| POST   | /api/tokens/test                 | Test token availability            | ✅   |
+| GET    | /api/tokens/tags/all             | List all tags                      | ✅   |
+| GET    | /api/storage/mode                | Storage mode                       | ✅   |
+| POST   | /api/tokens/refresh-all          | Refresh all token limits           | ✅   |
+| GET    | /api/tokens/refresh-progress     | Refresh progress                   | ✅   |
+| GET    | /api/keys                        | List API keys                      | ✅   |
+| POST   | /api/keys/add                    | Create new API key                 | ✅   |
+| POST   | /api/keys/delete                 | Delete API key                     | ✅   |
+| POST   | /api/keys/status                 | Toggle API key status              | ✅   |
+| POST   | /api/keys/name                   | Update API key name                | ✅   |
+| GET    | /api/logs                        | Get logs (up to 1000)              | ✅   |
+| POST   | /api/logs/clear                  | Clear audit logs                   | ✅   |
+</details>
+<br>
+## Available Models
+| Model                 | Cost | Account Type  | Image Gen/Edit | Deep Thinking | Web Search | Video Gen |
+|----------------------|------|---------------|----------------|---------------|------------|-----------|
+| `grok-4.1`           | 1    | Basic/Super   | ✅             | ✅            | ✅         | ❌        |
+| `grok-4.1-thinking`  | 1    | Basic/Super   | ✅             | ✅            | ✅         | ❌        |
+| `grok-imagine-0.9`   | -    | Basic/Super   | ✅             | ❌            | ❌         | ✅        |
+| `grok-4-fast`        | 1    | Basic/Super   | ✅             | ✅            | ✅         | ❌        |
+| `grok-4-fast-expert` | 4    | Basic/Super   | ✅             | ✅            | ✅         | ❌        |
+| `grok-4-expert`      | 4    | Basic/Super   | ✅             | ✅            | ✅         | ❌        |
+| `grok-4-heavy`       | 1    | Super         | ✅             | ✅            | ✅         | ❌        |
+| `grok-3-fast`        | 1    | Basic/Super   | ✅             | ❌            | ✅         | ❌        |
+<br>
+## Config Parameters
+> After starting the service, log in at `/login` to configure settings.
+| Parameter                     | Scope  | Required | Description                                  | Default |
+|------------------------------|--------|----------|----------------------------------------------|---------|
+| admin_username               | global | No       | Admin username                               | "admin" |
+| admin_password               | global | No       | Admin password                               | "admin" |
+| log_level                    | global | No       | Log level: DEBUG/INFO/...                    | "INFO" |
+| image_mode                   | global | No       | Image return mode: url/base64                | "url"  |
+| image_cache_max_size_mb      | global | No       | Image cache max size (MB)                    | 512     |
+| video_cache_max_size_mb      | global | No       | Video cache max size (MB)                    | 1024    |
+| base_url                     | global | No       | Base URL for service and image links         | ""     |
+| api_key                      | grok   | No       | API key (optional, for extra security)       | ""     |
+| proxy_url                    | grok   | No       | HTTP proxy URL                                | ""     |
+| stream_chunk_timeout         | grok   | No       | Stream chunk timeout (seconds)               | 120     |
+| stream_first_response_timeout| grok   | No       | First response timeout (seconds)             | 30      |
+| stream_total_timeout         | grok   | No       | Total stream timeout (seconds)               | 600     |
+| cf_clearance                 | grok   | No       | Cloudflare clearance token                   | ""     |
+| x_statsig_id                 | grok   | Yes      | Anti-bot identifier                          | "ZTpUeXBlRXJyb3I6IENhbm5vdCByZWFkIHByb3BlcnRpZXMgb2YgdW5kZWZpbmVkIChyZWFkaW5nICdjaGlsZE5vZGVzJyk=" |
+| filtered_tags                | grok   | No       | Filtered tags (comma-separated)              | "xaiartifact,xai:tool_usage_card,grok:render" |
+| show_thinking                | grok   | No       | Show thinking: true/false                    | true    |
+| temporary                    | grok   | No       | Session mode: true/false                     | true    |
+<br>
+## ⚠️ Notes
+This project is for learning and research only. Please comply with applicable terms.
+<br>
+> Rebuilt with guidance from: [LINUX DO](https://linux.do), [VeroFess/grok2api](https://github.com/VeroFess/grok2api), [xLmiler/grok2api_python](https://github.com/xLmiler/grok2api_python)

requirements.txt ADDED Viewed

	@@ -0,0 +1,18 @@

+toml==0.10.2
+fastapi==0.119.0
+uvicorn==0.37.0
+uvloop==0.21.0; sys_platform != 'win32'
+python-dotenv==1.1.1
+curl_cffi==0.13.0
+requests==2.32.5
+starlette==0.48.0
+pydantic==2.12.2
+aiofiles==25.1.0
+portalocker==3.0.0
+aiomysql==0.2.0
+redis==6.4.0
+fastmcp==2.12.4
+cryptography==46.0.3
+orjson==3.11.4
+aiohttp==3.13.2
+huggingface_hub==0.25.1

test/test_concurrency.py ADDED Viewed

	@@ -0,0 +1,276 @@

+#!/usr/bin/env python3
+"""
+Grok2API concurrency performance test script
+Test API performance under different concurrency levels
+"""
+import asyncio
+import aiohttp
+import time
+import statistics
+import argparse
+from datetime import datetime
+from typing import List, Dict, Any
+import json
+class ConcurrencyTester:
+    """Concurrency tester"""
+    def __init__(self, base_url: str, api_key: str = None):
+        self.base_url = base_url.rstrip('/')
+        self.api_key = api_key
+        self.results: List[Dict[str, Any]] = []
+    async def test_request(self, session: aiohttp.ClientSession, request_id: int) -> Dict[str, Any]:
+        """Send a single test request"""
+        url = f"{self.base_url}/v1/chat/completions"
+        headers = {
+            "Content-Type": "application/json"
+        }
+        if self.api_key:
+            headers["Authorization"] = f"Bearer {self.api_key}"
+        payload = {
+            "model": "grok-3-fast",
+            "messages": [
+                {"role": "user", "content": f"Test request #{request_id}, reply OK briefly"}
+            ],
+            "stream": False,
+            "max_tokens": 10
+        }
+        start_time = time.time()
+        try:
+            async with session.post(url, json=payload, headers=headers, timeout=30) as response:
+                status = response.status
+                if status == 200:
+                    data = await response.json()
+                    elapsed = time.time() - start_time
+                    return {
+                        "id": request_id,
+                        "status": "success",
+                        "http_status": status,
+                        "elapsed": elapsed,
+                        "response_length": len(json.dumps(data))
+                    }
+                else:
+                    elapsed = time.time() - start_time
+                    error_text = await response.text()
+                    return {
+                        "id": request_id,
+                        "status": "error",
+                        "http_status": status,
+                        "elapsed": elapsed,
+                        "error": error_text[:200]
+                    }
+        except asyncio.TimeoutError:
+            elapsed = time.time() - start_time
+            return {
+                "id": request_id,
+                "status": "timeout",
+                "elapsed": elapsed,
+                "error": "Request timeout"
+            }
+        except Exception as e:
+            elapsed = time.time() - start_time
+            return {
+                "id": request_id,
+                "status": "exception",
+                "elapsed": elapsed,
+                "error": str(e)
+            }
+    async def run_concurrent_test(self, concurrency: int, total_requests: int):
+        """Run concurrency test"""
+        print(f"\n{'='*60}")
+        print(f"📊 Test config: concurrency {concurrency}, total requests {total_requests}")
+        print(f"{'='*60}")
+        connector = aiohttp.TCPConnector(limit=concurrency, limit_per_host=concurrency)
+        timeout = aiohttp.ClientTimeout(total=60)
+        async with aiohttp.ClientSession(connector=connector, timeout=timeout) as session:
+            # Warm-up
+            print("🔥 Warming up...")
+            await self.test_request(session, 0)
+            # Start test
+            print("🚀 Starting concurrency test...")
+            start_time = time.time()
+            # Create tasks
+            tasks = []
+            for i in range(1, total_requests + 1):
+                task = asyncio.create_task(self.test_request(session, i))
+                tasks.append(task)
+                # Control concurrency
+                if len(tasks) >= concurrency:
+                    results = await asyncio.gather(*tasks)
+                    self.results.extend(results)
+                    tasks = []
+                    # Show progress
+                    print(f"  Progress: {i}/{total_requests} ({i/total_requests*100:.1f}%)", end='\r')
+            # Handle remaining tasks
+            if tasks:
+                results = await asyncio.gather(*tasks)
+                self.results.extend(results)
+            total_time = time.time() - start_time
+            # Stats and output
+            self.print_statistics(concurrency, total_requests, total_time)
+    def print_statistics(self, concurrency: int, total_requests: int, total_time: float):
+        """Print statistics"""
+        success_results = [r for r in self.results if r["status"] == "success"]
+        error_results = [r for r in self.results if r["status"] != "success"]
+        success_count = len(success_results)
+        error_count = len(error_results)
+        if success_results:
+            latencies = [r["elapsed"] for r in success_results]
+            avg_latency = statistics.mean(latencies)
+            min_latency = min(latencies)
+            max_latency = max(latencies)
+            p50_latency = statistics.median(latencies)
+            p95_latency = sorted(latencies)[int(len(latencies) * 0.95)] if len(latencies) > 1 else latencies[0]
+            p99_latency = sorted(latencies)[int(len(latencies) * 0.99)] if len(latencies) > 1 else latencies[0]
+        else:
+            avg_latency = min_latency = max_latency = p50_latency = p95_latency = p99_latency = 0
+        throughput = total_requests / total_time if total_time > 0 else 0
+        print(f"\n\n{'='*60}")
+        print("📈 Test result statistics")
+        print(f"{'='*60}")
+        print(f"  Test time: {total_time:.2f}s")
+        print(f"  Total requests: {total_requests}")
+        print(f"  Concurrency: {concurrency}")
+        print(f"")
+        print(f"  Successful requests: {success_count} ({success_count/total_requests*100:.1f}%)")
+        print(f"  Failed requests: {error_count} ({error_count/total_requests*100:.1f}%)")
+        print(f"")
+        print(f"  Throughput: {throughput:.2f} req/s")
+        print(f"")
+        print("  Latency stats:")
+        print(f"    Min: {min_latency*1000:.0f}ms")
+        print(f"    Avg: {avg_latency*1000:.0f}ms")
+        print(f"    Max: {max_latency*1000:.0f}ms")
+        print(f"    P50:  {p50_latency*1000:.0f}ms")
+        print(f"    P95:  {p95_latency*1000:.0f}ms")
+        print(f"    P99:  {p99_latency*1000:.0f}ms")
+        # Error details
+        if error_results:
+            print("\n  ⚠️  Error details:")
+            error_types = {}
+            for r in error_results:
+                error_type = r.get("status", "unknown")
+                error_types[error_type] = error_types.get(error_type, 0) + 1
+            for error_type, count in error_types.items():
+                print(f"    {error_type}: {count}")
+        print(f"{'='*60}\n")
+        # Performance rating
+        self.print_performance_rating(throughput, avg_latency)
+    def print_performance_rating(self, throughput: float, avg_latency: float):
+        """Print performance rating"""
+        print("🎯 Performance rating:")
+        # Throughput rating
+        if throughput >= 100:
+            rating = "⭐⭐⭐⭐⭐ Excellent"
+        elif throughput >= 60:
+            rating = "⭐⭐⭐⭐ Good"
+        elif throughput >= 30:
+            rating = "⭐⭐⭐ Medium"
+        elif throughput >= 10:
+            rating = "⭐⭐ Low"
+        else:
+            rating = "⭐ Needs improvement"
+        print(f"  Throughput ({throughput:.1f} req/s): {rating}")
+        # Latency rating
+        if avg_latency < 0.5:
+            rating = "⭐⭐⭐⭐⭐ Excellent"
+        elif avg_latency < 1.0:
+            rating = "⭐⭐⭐⭐ Good"
+        elif avg_latency < 2.0:
+            rating = "⭐⭐⭐ Medium"
+        elif avg_latency < 5.0:
+            rating = "⭐⭐ High"
+        else:
+            rating = "⭐ Needs improvement"
+        print(f"  Avg latency ({avg_latency*1000:.0f}ms): {rating}")
+        print()
+async def main():
+    """Main function"""
+    parser = argparse.ArgumentParser(description='Grok2API concurrency performance test')
+    parser.add_argument('--url', default='http://localhost:8000', help='API base URL')
+    parser.add_argument('--key', default='', help='API key (optional)')
+    parser.add_argument('-c', '--concurrency', type=int, default=10, help='Concurrency')
+    parser.add_argument('-n', '--requests', type=int, default=50, help='Total requests')
+    parser.add_argument('--multi-test', action='store_true', help='Run multi-level concurrency tests')
+    args = parser.parse_args()
+    print(f"""
+╔══════════════════════════════════════════════════════════╗
+║          Grok2API Concurrency Test Tool                  ║
+╚══════════════════════════════════════════════════════════╝
+🔗 Target: {args.url}
+🔑 API Key: {'Set' if args.key else 'Not set'}
+⏰ Start time: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}
+""")
+    tester = ConcurrencyTester(args.url, args.key)
+    if args.multi_test:
+        # Multi-level concurrency tests
+        test_configs = [
+            (5, 20),    # 5 concurrency, 20 requests
+            (10, 50),   # 10 concurrency, 50 requests
+            (20, 100),  # 20 concurrency, 100 requests
+            (50, 200),  # 50 concurrency, 200 requests
+        ]
+        for concurrency, requests in test_configs:
+            tester.results = []  # Clear results
+            await tester.run_concurrent_test(concurrency, requests)
+            await asyncio.sleep(2)  # 2-second interval
+    else:
+        # Single test
+        await tester.run_concurrent_test(args.concurrency, args.requests)
+    print("\n✅ Test completed!")
+    print(f"⏰ End time: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n")
+if __name__ == "__main__":
+    try:
+        asyncio.run(main())
+    except KeyboardInterrupt:
+        print("\n\n⚠️  Test interrupted by user")
+    except Exception as e:
+        print(f"\n\n❌ Test failed: {e}")

test/test_concurrency.sh ADDED Viewed

	@@ -0,0 +1,177 @@

+#!/bin/bash
+# Grok2API concurrency test script (Shell version)
+# Uses curl and GNU parallel for concurrent testing
+set -e
+# Configuration
+BASE_URL="${BASE_URL:-http://localhost:8000}"
+API_KEY="${API_KEY:-}"
+CONCURRENCY="${CONCURRENCY:-10}"
+TOTAL_REQUESTS="${TOTAL_REQUESTS:-50}"
+# Colors
+RED='\033[0;31m'
+GREEN='\033[0;32m'
+YELLOW='\033[1;33m'
+BLUE='\033[0;34m'
+NC='\033[0m' # No Color
+echo -e "${BLUE}╔══════════════════════════════════════════════════════════╗${NC}"
+echo -e "${BLUE}║          Grok2API Concurrency Test Tool (Shell)          ║${NC}"
+echo -e "${BLUE}╚══════════════════════════════════════════════════════════╝${NC}"
+echo ""
+echo -e "${GREEN}🔗 Target:${NC} $BASE_URL"
+echo -e "${GREEN}🔑 API Key:${NC} ${API_KEY:-(Not set)}"
+echo -e "${GREEN}📊 Concurrency:${NC} $CONCURRENCY"
+echo -e "${GREEN}📈 Total requests:${NC} $TOTAL_REQUESTS"
+echo ""
+# Check dependencies
+if ! command -v curl &> /dev/null; then
+    echo -e "${RED}❌ Error: curl is required${NC}"
+    exit 1
+fi
+# Create temp directory
+TMP_DIR=$(mktemp -d)
+trap "rm -rf $TMP_DIR" EXIT
+# Single request function
+test_request() {
+    local request_id=$1
+    local start_time=$(date +%s.%N)
+    # Build request
+    local headers="Content-Type: application/json"
+    if [ -n "$API_KEY" ]; then
+        headers="${headers}\nAuthorization: Bearer ${API_KEY}"
+    fi
+    local response=$(curl -s -w "\n%{http_code}\n%{time_total}" \
+        -X POST "${BASE_URL}/v1/chat/completions" \
+        -H "Content-Type: application/json" \
+        ${API_KEY:+-H "Authorization: Bearer $API_KEY"} \
+        -d "{
+            \"model\": \"grok-3-fast\",
+            \"messages\": [{\"role\": \"user\", \"content\": \"Test request #${request_id}, reply OK briefly\"}],
+            \"stream\": false,
+            \"max_tokens\": 10
+        }" 2>&1)
+    local http_code=$(echo "$response" | tail -n 2 | head -n 1)
+    local time_total=$(echo "$response" | tail -n 1)
+    # Record result
+    echo "${request_id},${http_code},${time_total}" >> "$TMP_DIR/results.csv"
+    # Show progress
+    echo -ne "\r  Progress: ${request_id}/${TOTAL_REQUESTS}"
+}
+# Export function for parallel
+export -f test_request
+export BASE_URL API_KEY TMP_DIR
+# Clear results file
+echo "id,status,time" > "$TMP_DIR/results.csv"
+echo -e "${YELLOW}🚀 Starting concurrency test...${NC}"
+START_TIME=$(date +%s.%N)
+# Use GNU parallel if available, otherwise a simple loop
+if command -v parallel &> /dev/null; then
+    seq 1 $TOTAL_REQUESTS | parallel -j $CONCURRENCY test_request {}
+else
+    # Simple background task concurrency
+    for i in $(seq 1 $TOTAL_REQUESTS); do
+        test_request $i &
+        # Control concurrency
+        if (( i % CONCURRENCY == 0 )); then
+            wait
+        fi
+    done
+    wait
+fi
+END_TIME=$(date +%s.%N)
+TOTAL_TIME=$(echo "$END_TIME - $START_TIME" | bc)
+echo -e "\n"
+# Results summary
+echo -e "${BLUE}═══════════════════════════════════════════════════════════${NC}"
+echo -e "${BLUE}📈 Test result summary${NC}"
+echo -e "${BLUE}═══════════════════════════════════════════════════════════${NC}"
+# Count success/failure
+SUCCESS_COUNT=$(awk -F',' '$2 == 200 {count++} END {print count+0}' "$TMP_DIR/results.csv")
+ERROR_COUNT=$((TOTAL_REQUESTS - SUCCESS_COUNT))
+echo -e "  Test time: ${TOTAL_TIME}s"
+echo -e "  Total requests: ${TOTAL_REQUESTS}"
+echo -e "  Concurrency: ${CONCURRENCY}"
+echo ""
+echo -e "  Successful requests: ${GREEN}${SUCCESS_COUNT}${NC} ($(echo "scale=1; $SUCCESS_COUNT * 100 / $TOTAL_REQUESTS" | bc)%)"
+echo -e "  Failed requests: ${RED}${ERROR_COUNT}${NC} ($(echo "scale=1; $ERROR_COUNT * 100 / $TOTAL_REQUESTS" | bc)%)"
+echo ""
+# Calculate throughput
+THROUGHPUT=$(echo "scale=2; $TOTAL_REQUESTS / $TOTAL_TIME" | bc)
+echo -e "  Throughput: ${GREEN}${THROUGHPUT}${NC} req/s"
+echo ""
+# Latency stats (successful requests only)
+if [ $SUCCESS_COUNT -gt 0 ]; then
+    echo -e "  Latency stats:"
+    # Extract latencies for successful requests
+    awk -F',' '$2 == 200 {print $3}' "$TMP_DIR/results.csv" | sort -n > "$TMP_DIR/latencies.txt"
+    MIN=$(head -n 1 "$TMP_DIR/latencies.txt" | awk '{printf "%.0f", $1*1000}')
+    MAX=$(tail -n 1 "$TMP_DIR/latencies.txt" | awk '{printf "%.0f", $1*1000}')
+    AVG=$(awk '{sum+=$1; count++} END {printf "%.0f", sum/count*1000}' "$TMP_DIR/latencies.txt")
+    # P50
+    P50_LINE=$((SUCCESS_COUNT / 2))
+    P50=$(sed -n "${P50_LINE}p" "$TMP_DIR/latencies.txt" | awk '{printf "%.0f", $1*1000}')
+    # P95
+    P95_LINE=$(echo "scale=0; $SUCCESS_COUNT * 0.95 / 1" | bc)
+    P95=$(sed -n "${P95_LINE}p" "$TMP_DIR/latencies.txt" | awk '{printf "%.0f", $1*1000}')
+    # P99
+    P99_LINE=$(echo "scale=0; $SUCCESS_COUNT * 0.99 / 1" | bc)
+    P99=$(sed -n "${P99_LINE}p" "$TMP_DIR/latencies.txt" | awk '{printf "%.0f", $1*1000}')
+    echo -e "    Min: ${MIN}ms"
+    echo -e "    Avg: ${AVG}ms"
+    echo -e "    Max: ${MAX}ms"
+    echo -e "    P50:  ${P50}ms"
+    echo -e "    P95:  ${P95}ms"
+    echo -e "    P99:  ${P99}ms"
+fi
+echo -e "${BLUE}═══════════════════════════════════════════════════════════${NC}"
+# Performance rating
+echo -e "${YELLOW}🎯 Performance rating:${NC}"
+if (( $(echo "$THROUGHPUT >= 100" | bc -l) )); then
+    RATING="⭐⭐⭐⭐⭐ Excellent"
+elif (( $(echo "$THROUGHPUT >= 60" | bc -l) )); then
+    RATING="⭐⭐⭐⭐ Good"
+elif (( $(echo "$THROUGHPUT >= 30" | bc -l) )); then
+    RATING="⭐⭐⭐ Medium"
+elif (( $(echo "$THROUGHPUT >= 10" | bc -l) )); then
+    RATING="⭐⭐ Low"
+else
+    RATING="⭐ Needs improvement"
+fi
+echo -e "  Throughput (${THROUGHPUT} req/s): ${RATING}"
+echo ""
+echo -e "${GREEN}✅ Test completed!${NC}"

test_key.py ADDED Viewed

	@@ -0,0 +1,50 @@

+import requests
+import json
+import uuid
+# ================= Configuration =================
+# 1. Create a new key in the admin UI under [Key Management]
+# 2. Paste the new sk-... below
+API_KEY = "YOUR_NEW_API_KEY"
+BASE_URL = "http://127.0.0.1:8000"
+# =============================================
+def test_chat_completion():
+    print(f"Starting key test: {API_KEY[:10]}...")
+    url = f"{BASE_URL}/v1/chat/completions"
+    headers = {
+        "Authorization": f"Bearer {API_KEY}",
+        "Content-Type": "application/json"
+    }
+    payload = {
+        "model": "grok-4-fast",
+        "messages": [
+            {"role": "user", "content": "Hello, who are you? Tell me a joke."}
+        ],
+        "stream": False
+    }
+    try:
+        response = requests.post(url, headers=headers, json=payload, timeout=30)
+        print(f"Status code: {response.status_code}")
+        if response.status_code == 200:
+            result = response.json()
+            content = result['choices'][0]['message']['content']
+            print("--- Response OK ---")
+            print(content)
+            print("---------------")
+            print("Test passed! Check the admin UI [Audit Logs] to confirm the request was recorded.")
+        else:
+            print(f"Request failed: {response.text}")
+    except Exception as e:
+        print(f"Error occurred: {e}")
+if __name__ == "__main__":
+    if API_KEY == "YOUR_NEW_API_KEY":
+        print("Please replace API_KEY with the key you just created.")
+    else:
+        test_chat_completion()

uv.lock ADDED Viewed

The diff for this file is too large to render. See raw diff