Spaces:

SuperAPIs
/

flash

Paused

App Files Files Community

rkihacker commited on Oct 16

Commit

41b75fe

verified ·

1 Parent(s): 0bc619c

Update main.py

Browse files

Files changed (1) hide show

main.py +89 -165

main.py CHANGED Viewed

@@ -1,202 +1,126 @@
 import httpx
-from fastapi import FastAPI, Request, HTTPException, Depends
 from starlette.responses import StreamingResponse, JSONResponse
 from starlette.background import BackgroundTask
 import os
 import random
 import logging
 import time
-import json
-import asyncio
 from contextlib import asynccontextmanager
-from filelock import FileLock, Timeout
 # --- Production-Ready Configuration ---
 LOG_LEVEL = os.getenv("LOG_LEVEL", "INFO").upper()
 logging.basicConfig(
     level=LOG_LEVEL,
-    format='%(asctime)s - PID:%(process)d - %(name)s - %(levelname)s - %(message)s'
 )
-logger = logging.getLogger(__name__)
-# --- Service Configuration ---
-ARTIFACT_URL = os.getenv("ARTIFACT_URL", "https://console.gmicloud.ai/api/v1/ie/artifact/get_public_artifacts")
-REFRESH_INTERVAL_SECONDS = int(os.getenv("REFRESH_INTERVAL_SECONDS", "30"))
-# --- Shared Cache File Configuration ---
-CACHE_DIR = "/dev/shm" if os.path.exists("/dev/shm") else "/tmp"
-CACHE_FILE_PATH = os.path.join(CACHE_DIR, "gmi_routing_table.json")
-LOCK_FILE_PATH = os.path.join(CACHE_DIR, "gmi_routing_table.lock")
-# --- In-Memory State for each Worker ---
-worker_model_routing_table = {}
-last_cache_check_time = 0
-# --- Retry Logic ---
-MAX_RETRIES = int(os.getenv("MAX_RETRIES", "5"))
-RETRY_STATUS_CODES = {429, 500, 502, 503, 504}
-# --- Core Caching and Refreshing Logic ---
-async def load_or_refresh_models():
-    """
-    Checks if the shared cache is stale. If so, attempts to acquire a lock
-    and refresh it. This is designed to be safe for multiple processes.
-    """
-    global last_cache_check_time, worker_model_routing_table
-    now = time.monotonic()
-    if (now - last_cache_check_time) < REFRESH_INTERVAL_SECONDS:
-        return
-    lock = FileLock(LOCK_FILE_PATH)
-    try:
-        with lock.acquire(timeout=5):
-            if os.path.exists(CACHE_FILE_PATH):
-                mtime = os.path.getmtime(CACHE_FILE_PATH)
-                if (time.time() - mtime) < REFRESH_INTERVAL_SECONDS:
-                    with open(CACHE_FILE_PATH, 'r') as f:
-                        worker_model_routing_table = json.load(f)
-                    last_cache_check_time = now
-                    logger.debug(f"Loaded fresh cache from file. {len(worker_model_routing_table)} models.")
-                    return
-            logger.warning("Cache is stale. This worker is refreshing the model list from the API...")
-            try:
-                async with httpx.AsyncClient() as client:
-                    response = await client.get(ARTIFACT_URL, timeout=30.0)
-                    response.raise_for_status()
-                    artifacts = response.json()
-                new_routing_table = {}
-                for artifact in artifacts:
-                    # CORRECTLY get the API model identifier from `model_price.modelName`
-                    model_price_info = artifact.get("model_price")
-                    api_model_id = model_price_info.get("modelName") if model_price_info else None
-                    display_name = artifact.get("artifact_metadata", {}).get("artifact_name", "Unknown")
-                    endpoints = artifact.get("endpoints", [])
-                    # Condition: Must have a valid API model ID and at least one active endpoint URL
-                    if api_model_id and endpoints and endpoints[0].get("endpoint_url"):
-                        endpoint_url = endpoints[0]["endpoint_url"]
-                        new_routing_table[api_model_id] = endpoint_url
-                        logger.debug(f"Mapped model ID '{api_model_id}' to endpoint '{endpoint_url}'")
-                    else:
-                        logger.debug(f"Skipping model '{display_name}': Missing API model ID or active endpoint.")
-                temp_path = CACHE_FILE_PATH + f".{os.getpid()}"
-                with open(temp_path, 'w') as f:
-                    json.dump(new_routing_table, f)
-                os.rename(temp_path, CACHE_FILE_PATH)
-                worker_model_routing_table = new_routing_table
-                logger.info(f"Successfully refreshed cache file with {len(worker_model_routing_table)} active models.")
-            except Exception as e:
-                logger.error(f"Failed to refresh model cache: {e}. Will use stale data if available.")
-    except Timeout:
-        logger.warning("Could not acquire lock, another process is updating. Reading from file.")
-    except Exception as e:
-        logger.error(f"An unexpected error occurred in cache management: {e}")
-    finally:
-        if os.path.exists(CACHE_FILE_PATH):
-            try:
-                with open(CACHE_FILE_PATH, 'r') as f:
-                    worker_model_routing_table = json.load(f)
-            except (json.JSONDecodeError, FileNotFoundError):
-                 logger.error("Could not read cache file. Routing table may be empty.")
-        last_cache_check_time = now
-# --- FastAPI Lifecycle & App Initialization ---
 @asynccontextmanager
 async def lifespan(app: FastAPI):
-    app.state.http_client = httpx.AsyncClient(timeout=None)
-    await load_or_refresh_models()
-    yield
-    await app.state.http_client.aclose()
-    logger.info("Application shutdown complete.")
 app = FastAPI(docs_url=None, redoc_url=None, lifespan=lifespan)
 # --- API Endpoints ---
 @app.get("/")
 async def health_check():
-    return JSONResponse({
-        "status": "ok",
-        "active_models_in_memory": len(worker_model_routing_table)
-    })
-@app.get("/v1/models", dependencies=[Depends(load_or_refresh_models)])
-async def list_models():
     """
-    Lists all available models from the worker's in-memory cache.
-    The dependency ensures the cache is checked for freshness before responding.
-    """
-    model_list = [
-        { "id": model_id, "object": "model", "owned_by": "gmi-serving" }
-        for model_id in sorted(worker_model_routing_table.keys()) # Sort for consistency
-    ]
-    return JSONResponse(content={"object": "list", "data": model_list})
-@app.post("/v1/chat/completions", dependencies=[Depends(load_or_refresh_models)])
-async def chat_completions_proxy(request: Request):
-    """
-    Forwards chat completion requests to the correct model endpoint.
-    The dependency ensures the cache is checked for freshness before routing.
     """
     start_time = time.monotonic()
-    body = await request.body()
-    try:
-        data = json.loads(body)
-        model_name = data.get("model")
-        if not model_name:
-            raise HTTPException(status_code=400, detail="Missing 'model' field in request body.")
-    except json.JSONDecodeError:
-        raise HTTPException(status_code=400, detail="Invalid JSON in request body.")
-    target_host = worker_model_routing_table.get(model_name)
-    if not target_host:
-        raise HTTPException(
-            status_code=404,
-            detail=f"Model '{model_name}' not found. It may be inactive or does not exist. Please check /v1/models."
-        )
     client: httpx.AsyncClient = request.app.state.http_client
-    target_url = f"https://{target_host}{request.url.path}"
-    request_headers = {k: v for k, v in request.headers.items() if k.lower() != 'host'}
-    random_ip = ".".join(str(random.randint(1, 254)) for _ in range(4))
-    request_headers.update({
         "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/141.0.0.0 Safari/537.36",
-        "x-forwarded-for": random_ip, "x-real-ip": random_ip
-    })
-    logger.info(f"Routing '{model_name}' to {target_url} (Client: {request.client.host})")
     for attempt in range(MAX_RETRIES):
         try:
-            req = client.build_request(method=request.method, url=target_url, headers=request_headers, content=body)
-            resp = await client.send(req, stream=True)
-            if resp.status_code not in RETRY_STATUS_CODES or attempt == MAX_RETRIES - 1:
                 duration_ms = (time.monotonic() - start_time) * 1000
-                log_func = logger.info if resp.is_success else logger.warning
-                log_func(f"Request finished for '{model_name}': status={resp.status_code} latency={duration_ms:.2f}ms")
-                return StreamingResponse(resp.aiter_raw(), status_code=resp.status_code, headers=resp.headers, background=BackgroundTask(resp.aclose))
-            logger.warning(f"Attempt {attempt + 1}/{MAX_RETRIES} for '{model_name}' failed with status {resp.status_code}. Retrying...")
-            await resp.aclose()
-            await asyncio.sleep(0.5 * (2 ** attempt))
-        except Exception as e:
-            logger.error(f"Request forwarding failed for '{model_name}' on attempt {attempt + 1}: {e}")
-            if attempt == MAX_RETRIES - 1:
-                raise HTTPException(status_code=502, detail=f"Bad Gateway: Error connecting to model backend. {e}")
-    raise HTTPException(status_code=502, detail="Bad Gateway: Request failed after all retries.")

 import httpx
+from fastapi import FastAPI, Request, HTTPException
 from starlette.responses import StreamingResponse, JSONResponse
 from starlette.background import BackgroundTask
 import os
 import random
 import logging
 import time
 from contextlib import asynccontextmanager
 # --- Production-Ready Configuration ---
 LOG_LEVEL = os.getenv("LOG_LEVEL", "INFO").upper()
 logging.basicConfig(
     level=LOG_LEVEL,
+    format='%(asctime)s - %(levelname)s - %(message)s'
 )
+TARGET_URL = os.getenv("TARGET_URL", "https://api.gmi-serving.com")
+MAX_RETRIES = int(os.getenv("MAX_RETRIES", "15"))
+DEFAULT_RETRY_CODES = "429,500,502,503,504"
+RETRY_CODES_STR = os.getenv("RETRY_CODES", DEFAULT_RETRY_CODES)
+try:
+    RETRY_STATUS_CODES = {int(code.strip()) for code in RETRY_CODES_STR.split(',')}
+    logging.info(f"Will retry on the following status codes: {RETRY_STATUS_CODES}")
+except ValueError:
+    logging.error(f"Invalid RETRY_CODES format: '{RETRY_CODES_STR}'. Falling back to default: {DEFAULT_RETRY_CODES}")
+    RETRY_STATUS_CODES = {int(code.strip()) for code in DEFAULT_RETRY_CODES.split(',')}
+# --- Helper Function ---
+def generate_random_ip():
+    """Generates a random, valid-looking IPv4 address."""
+    return ".".join(str(random.randint(1, 254)) for _ in range(4))
+# --- HTTPX Client Lifecycle Management ---
 @asynccontextmanager
 async def lifespan(app: FastAPI):
+    """Manages the lifecycle of the HTTPX client."""
+    async with httpx.AsyncClient(base_url=TARGET_URL, timeout=None) as client:
+        app.state.http_client = client
+        yield
+# Initialize the FastAPI app with the lifespan manager and disabled docs
 app = FastAPI(docs_url=None, redoc_url=None, lifespan=lifespan)
 # --- API Endpoints ---
+# 1. Health Check Route (Defined FIRST)
+# This specific route will be matched before the catch-all proxy route.
 @app.get("/")
 async def health_check():
+    """Provides a basic health check endpoint."""
+    return JSONResponse({"status": "ok", "target": TARGET_URL})
+# 2. Catch-All Reverse Proxy Route (Defined SECOND)
+# This will capture ALL other requests (e.g., /completions, /v1/models, etc.)
+# and forward them. This eliminates any redirect issues.
+@app.api_route("/{full_path:path}", methods=["GET", "POST", "PUT", "DELETE", "PATCH", "OPTIONS", "HEAD"])
+async def reverse_proxy_handler(request: Request):
     """
+    A catch-all reverse proxy that forwards requests to the target URL with
+    enhanced retry logic and latency logging.
     """
     start_time = time.monotonic()
     client: httpx.AsyncClient = request.app.state.http_client
+    url = httpx.URL(path=request.url.path, query=request.url.query.encode("utf-8"))
+    request_headers = dict(request.headers)
+    request_headers.pop("host", None)
+    random_ip = generate_random_ip()
+    logging.info(f"Client '{request.client.host}' proxied with spoofed IP: {random_ip} for path: {url.path}")
+    specific_headers = {
         "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/141.0.0.0 Safari/537.36",
+        "x-forwarded-for": random_ip,
+        "x-real-ip": random_ip,
+        "x-originating-ip": random_ip,
+        "x-remote-ip": random_ip,
+        "x-remote-addr": random_ip,
+        "x-host": random_ip,
+        "x-forwarded-host": random_ip,
+    }
+    request_headers.update(specific_headers)
+    if "authorization" in request.headers:
+        request_headers["authorization"] = request.headers["authorization"]
+    body = await request.body()
+    last_exception = None
     for attempt in range(MAX_RETRIES):
         try:
+            rp_req = client.build_request(
+                method=request.method, url=url, headers=request_headers, content=body
+            )
+            rp_resp = await client.send(rp_req, stream=True)
+            if rp_resp.status_code not in RETRY_STATUS_CODES or attempt == MAX_RETRIES - 1:
                 duration_ms = (time.monotonic() - start_time) * 1000
+                log_func = logging.info if rp_resp.is_success else logging.warning
+                log_func(f"Request finished: {request.method} {request.url.path} status_code={rp_resp.status_code} latency={duration_ms:.2f}ms")
+                return StreamingResponse(
+                    rp_resp.aiter_raw(),
+                    status_code=rp_resp.status_code,
+                    headers=rp_resp.headers,
+                    background=BackgroundTask(rp_resp.aclose),
+                )
+            logging.warning(
+                f"Attempt {attempt + 1}/{MAX_RETRIES} for {url.path} failed with status {rp_resp.status_code}. Retrying..."
+            )
+            await rp_resp.aclose()
+        except httpx.ConnectError as e:
+            last_exception = e
+            logging.warning(f"Attempt {attempt + 1}/{MAX_RETRIES} for {url.path} failed with connection error: {e}")
+    duration_ms = (time.monotonic() - start_time) * 1000
+    logging.critical(f"Request failed, cannot connect to target: {request.method} {request.url.path} status_code=502 latency={duration_ms:.2f}ms")
+    raise HTTPException(
+        status_code=502,
+        detail=f"Bad Gateway: Cannot connect to target service after {MAX_RETRIES} attempts. {last_exception}"
+    )