Spaces:

xce009
/

ChatAPI

Running

App Files Files Community

Soumik555 commited on Aug 31, 2025

Commit

01b4337

1 Parent(s): c6d5178

db api online

Browse files

Files changed (1) hide show

chat_api.py +377 -177

chat_api.py CHANGED Viewed

@@ -3,7 +3,7 @@ import aiohttp
 import logging
 import os
 import random
-from typing import List, Dict, Optional, Any
 from fastapi import FastAPI, HTTPException, BackgroundTasks
 from fastapi.responses import StreamingResponse, JSONResponse
 from pydantic import BaseModel, Field
@@ -27,6 +27,75 @@ logging.basicConfig(
 )
 logger = logging.getLogger(__name__)
 # Pydantic models
 class Message(BaseModel):
     role: str = Field(..., description="Role: 'system', 'user', or 'assistant'")
@@ -78,6 +147,69 @@ class ChatResponse(BaseModel):
     provider_used: Optional[str] = None
     timestamp: str
 class APIKeyManager:
     """Manages multiple API keys with rotation and rate limiting"""
@@ -141,15 +273,17 @@ class APIKeyManager:
         with self.lock:
             return dict(self.key_stats)
-class OpenRouterClient:
-    """High-performance OpenRouter client with connection pooling"""
-    def __init__(self, key_manager: APIKeyManager):
         self.key_manager = key_manager
         self.base_url = "https://openrouter.ai/api/v1"
         self.session_pool = {}
-        self.max_connections = 100  # Total connection pool
         self.max_connections_per_host = 20
     async def get_session(self, api_key: str) -> aiohttp.ClientSession:
         """Get or create a session for the API key"""
@@ -159,21 +293,21 @@ class OpenRouterClient:
                 limit_per_host=self.max_connections_per_host,
                 keepalive_timeout=30,
                 enable_cleanup_closed=True,
-                ttl_dns_cache=300,  # DNS cache TTL
                 use_dns_cache=True
             )
             timeout = aiohttp.ClientTimeout(
-                total=60,  # Total timeout
-                connect=10,  # Connection timeout
-                sock_read=30  # Socket read timeout
             )
             headers = {
                 "Authorization": f"Bearer {api_key}",
                 "Content-Type": "application/json",
-                "HTTP-Referer": "https://your-app.com",  # Optional: for analytics
-                "X-Title": "High-Performance Chat API"  # Optional: for analytics
             }
             self.session_pool[api_key] = aiohttp.ClientSession(
@@ -185,91 +319,185 @@ class OpenRouterClient:
         return self.session_pool[api_key]
-    async def chat_completion(self, request: ChatRequest) -> Dict[str, Any]:
-        """Send chat completion request with automatic key rotation"""
         start_time = time.time()
-        api_key = self.key_manager.get_next_key()
-        try:
-            session = await self.get_session(api_key)
-            # Prepare messages
-            messages = []
-            # Add system prompt if provided
-            if request.system_prompt:
-                messages.append({"role": "system", "content": request.system_prompt})
-            # Add user messages
-            messages.extend([msg.dict() for msg in request.messages])
-            # Prepare request payload
-            payload = {
-                "model": request.model,
-                "messages": messages,
-                "max_tokens": request.max_tokens,
-                "temperature": request.temperature,
-                "top_p": request.top_p,
-                "frequency_penalty": request.frequency_penalty,
-                "presence_penalty": request.presence_penalty,
-                "stream": request.stream
             }
-            # Add provider preferences if specified
-            if request.provider:
-                provider_dict = request.provider.dict(exclude_none=True)
-                if provider_dict:
-                    payload["provider"] = provider_dict
-            logger.debug(f"Sending request to {request.model} with key ending in ...{api_key[-4:]}")
-            async with session.post(f"{self.base_url}/chat/completions", json=payload) as response:
-                response_time = time.time() - start_time
-                if response.status == 200:
-                    result = await response.json()
-                    # Extract provider information if available
-                    provider_used = None
-                    if "model" in result and "/" in result["model"]:
-                        # Sometimes the response model includes provider info
-                        provider_used = result["model"].split("/")[0]
                     return {
-                        "success": True,
-                        "data": result,
-                        "response_time": response_time,
-                        "provider_used": provider_used,
-                        "api_key_used": api_key[-4:]  # Last 4 characters for debugging
                     }
                 else:
-                    error_data = await response.text()
-                    logger.error(f"OpenRouter API error {response.status}: {error_data}")
-                    # Record error for this key
-                    self.key_manager.record_error(api_key)
                     return {
                         "success": False,
-                        "error": f"API error {response.status}: {error_data}",
-                        "response_time": response_time
                     }
-        except Exception as e:
-            response_time = time.time() - start_time
-            logger.error(f"Request failed with key ...{api_key[-4:]}: {str(e)}")
-            # Record error for this key
-            self.key_manager.record_error(api_key)
             return {
                 "success": False,
-                "error": str(e),
-                "response_time": response_time
             }
     async def stream_chat_completion(self, request: ChatRequest):
-        """Stream chat completion response"""
         api_key = self.key_manager.get_next_key()
         try:
@@ -306,12 +534,29 @@ class OpenRouterClient:
                 else:
                     error_data = await response.text()
                     self.key_manager.record_error(api_key)
-                    yield f"data: {json.dumps({'error': f'API error {response.status}: {error_data}'})}\n\n".encode()
         except Exception as e:
             logger.error(f"Streaming failed with key ...{api_key[-4:]}: {str(e)}")
             self.key_manager.record_error(api_key)
-            yield f"data: {json.dumps({'error': str(e)})}\n\n".encode()
     async def close_all_sessions(self):
         """Close all aiohttp sessions"""
@@ -320,31 +565,36 @@ class OpenRouterClient:
         self.session_pool.clear()
 # Global variables
-client: Optional[OpenRouterClient] = None
 key_manager: Optional[APIKeyManager] = None
 @asynccontextmanager
 async def lifespan(app: FastAPI):
     """Startup and shutdown events"""
-    global client, key_manager
     # Startup
-    logger.info("Starting OpenRouter Chat API...")
     # Load API keys from environment
     api_keys_str = os.getenv("OPENROUTER_API_KEYS", "")
     if not api_keys_str:
         raise ValueError("OPENROUTER_API_KEYS environment variable is required")
     api_keys = [key.strip() for key in api_keys_str.split(",") if key.strip()]
     if not api_keys:
         raise ValueError("No valid API keys found in OPENROUTER_API_KEYS")
-    # Initialize key manager and client
     key_manager = APIKeyManager(api_keys)
-    client = OpenRouterClient(key_manager)
-    logger.info(f"✅ API initialized with {len(api_keys)} keys")
     yield
@@ -355,8 +605,8 @@ async def lifespan(app: FastAPI):
 # Create FastAPI app
 app = FastAPI(
-    title="High-Performance OpenRouter Chat API",
-    description="Scalable chat completions API with multiple key rotation and parallel processing",
     version="1.0.0",
     lifespan=lifespan
 )
@@ -365,29 +615,47 @@ app = FastAPI(
 async def root():
     """Root endpoint with API information"""
     return {
-        "message": "High-Performance OpenRouter Chat API",
         "version": "1.0.0",
         "endpoints": {
             "chat": "/api/chat",
             "chat_stream": "/api/chat (with stream=true)",
             "stats": "/api/stats",
             "health": "/health"
         },
         "features": [
             "Multiple API key rotation",
             "Connection pooling",
             "Parallel processing",
             "Provider routing",
             "Streaming support",
-            "Rate limiting"
         ]
     }
 @app.post("/api/chat", response_model=ChatResponse)
 async def chat_completion(request: ChatRequest):
-    """Send chat completion request"""
     if not client:
-        raise HTTPException(status_code=503, detail="Service not initialized")
     try:
         # Handle streaming requests
@@ -412,16 +680,24 @@ async def chat_completion(request: ChatRequest):
                 timestamp=datetime.now().isoformat()
             )
         else:
-            raise HTTPException(
-                status_code=500,
-                detail=f"Chat completion failed: {result['error']}"
-            )
     except HTTPException:
         raise
     except Exception as e:
         logger.error(f"Unexpected error in chat_completion: {str(e)}")
-        raise HTTPException(status_code=500, detail=str(e))
 @app.get("/api/stats", response_model=Dict)
 async def get_api_stats():
@@ -457,7 +733,7 @@ async def get_api_stats():
 @app.get("/health")
 async def health_check():
     """Health check endpoint"""
-    if not client or not key_manager:
         return JSONResponse(
             status_code=503,
             content={
@@ -465,80 +741,4 @@ async def health_check():
                 "message": "Service not initialized",
                 "timestamp": datetime.now().isoformat()
             }
-        )
-    try:
-        stats = key_manager.get_stats()
-        return {
-            "status": "healthy",
-            "api_keys_loaded": len(stats),
-            "total_requests": sum(stat["requests"] for stat in stats.values()),
-            "timestamp": datetime.now().isoformat()
-        }
-    except Exception as e:
-        return JSONResponse(
-            status_code=503,
-            content={
-                "status": "unhealthy",
-                "error": str(e),
-                "timestamp": datetime.now().isoformat()
-            }
-        )
-# Batch processing endpoint for high throughput
-@app.post("/api/chat/batch")
-async def batch_chat_completions(requests: List[ChatRequest]):
-    """Process multiple chat requests in parallel"""
-    if not client:
-        raise HTTPException(status_code=503, detail="Service not initialized")
-    if len(requests) > 50:  # Limit batch size
-        raise HTTPException(status_code=400, detail="Batch size limited to 50 requests")
-    try:
-        # Process all requests in parallel
-        tasks = [client.chat_completion(req) for req in requests]
-        results = await asyncio.gather(*tasks, return_exceptions=True)
-        # Format results
-        responses = []
-        for i, (request, result) in enumerate(zip(requests, results)):
-            if isinstance(result, Exception):
-                responses.append({
-                    "request_index": i,
-                    "success": False,
-                    "error": str(result)
-                })
-            elif result["success"]:
-                responses.append({
-                    "request_index": i,
-                    "success": True,
-                    "model": request.model,
-                    "choices": result["data"].get("choices", []),
-                    "usage": result["data"].get("usage"),
-                    "response_time": result["response_time"],
-                    "provider_used": result.get("provider_used")
-                })
-            else:
-                responses.append({
-                    "request_index": i,
-                    "success": False,
-                    "error": result["error"]
-                })
-        return {
-            "success": True,
-            "batch_size": len(requests),
-            "results": responses,
-            "timestamp": datetime.now().isoformat()
-        }
-    except Exception as e:
-        logger.error(f"Batch processing failed: {str(e)}")
-        raise HTTPException(status_code=500, detail=str(e))
-@app.get("/ping")
-async def ping():
-    """Health check endpoint"""
-    return {"status": "ok", "timestamp": datetime.now().isoformat()}

 import logging
 import os
 import random
+from typing import List, Dict, Optional, Any, Set
 from fastapi import FastAPI, HTTPException, BackgroundTasks
 from fastapi.responses import StreamingResponse, JSONResponse
 from pydantic import BaseModel, Field
 )
 logger = logging.getLogger(__name__)
+# Error handling
+class ErrorHandler:
+    """Handle and translate OpenRouter errors to user-friendly chatcsvandpdf messages"""
+    @staticmethod
+    def get_user_friendly_error(status_code: int, error_message: str, model: str = None) -> dict:
+        """Convert OpenRouter errors to chatcsvandpdf branded error messages"""
+        friendly_messages = {
+            400: {
+                "message": "Invalid request format. Please check your message and try again.",
+                "suggestion": "Verify that your request parameters are correctly formatted."
+            },
+            401: {
+                "message": "Authentication issue with chatcsvandpdf service.",
+                "suggestion": "This is a temporary service issue. Please try again in a moment."
+            },
+            402: {
+                "message": "chatcsvandpdf service is temporarily at capacity.",
+                "suggestion": "Please try again in a few minutes or use a different model."
+            },
+            403: {
+                "message": "Your message was flagged by our content moderation system.",
+                "suggestion": "Please rephrase your message and avoid potentially harmful content."
+            },
+            408: {
+                "message": "Request timed out. The model took too long to respond.",
+                "suggestion": "Try shortening your message or using a faster model."
+            },
+            429: {
+                "message": f"Rate limit reached for {'model ' + model if model else 'this service'}. Please try again later.",
+                "suggestion": "chatcsvandpdf is currently experiencing high demand. Please wait a moment and retry, or try a different model."
+            },
+            502: {
+                "message": f"The {'model ' + model if model else 'selected model'} is currently unavailable.",
+                "suggestion": "This model is temporarily down. Please try a different model or wait a few minutes."
+            },
+            503: {
+                "message": "No available providers meet your requirements.",
+                "suggestion": "Try adjusting your provider preferences or use a different model."
+            }
+        }
+        # Default error for unknown status codes
+        if status_code not in friendly_messages:
+            return {
+                "message": "chatcsvandpdf service encountered an unexpected issue.",
+                "suggestion": "Please try again. If the problem persists, contact support.",
+                "technical_info": f"Error {status_code}: {error_message}"
+            }
+        error_info = friendly_messages[status_code].copy()
+        # Add specific handling for rate limiting
+        if status_code == 429:
+            if "free" in str(model).lower():
+                error_info["message"] = f"Free model {model} is currently rate-limited."
+                error_info["suggestion"] = "Free models have usage limits. Try again in a few minutes or upgrade to a premium model."
+            elif "quota" in error_message.lower() or "credit" in error_message.lower():
+                error_info["message"] = "chatcsvandpdf service quota reached."
+                error_info["suggestion"] = "Our service is at capacity. Please try again later."
+        # Add model-specific messaging for 502 errors
+        if status_code == 502 and model:
+            error_info["message"] = f"Model {model} is temporarily unavailable."
+            error_info["suggestion"] = "This model is experiencing issues. Try another model or wait a few minutes."
+        return error_info
 # Pydantic models
 class Message(BaseModel):
     role: str = Field(..., description="Role: 'system', 'user', or 'assistant'")
     provider_used: Optional[str] = None
     timestamp: str
+class ModelValidator:
+    def __init__(self):
+        self.valid_models: Set[str] = set()
+        self.last_updated: float = 0
+        self.update_interval: float = 3600  # Update every hour
+        self.models_endpoint = "https://xce009-inference-test.hf.space/api/free-models/names"
+        self.lock = threading.Lock()
+    async def fetch_valid_models(self) -> Set[str]:
+        """Fetch valid model names from the inference service"""
+        try:
+            async with aiohttp.ClientSession() as session:
+                async with session.get(
+                    self.models_endpoint,
+                    timeout=aiohttp.ClientTimeout(total=10)
+                ) as response:
+                    if response.status == 200:
+                        data = await response.json()
+                        # ✅ Extract from "models" key
+                        models_list = data.get("models", [])
+                        models = set()
+                        for item in models_list:
+                            if isinstance(item, dict) and "id" in item:
+                                models.add(item["id"])
+                            elif isinstance(item, str):
+                                models.add(item)
+                        logger.info(f"Fetched {len(models)} valid models from inference service")
+                        return models
+                    else:
+                        logger.error(f"Failed to fetch models: HTTP {response.status}")
+                        return set()
+        except Exception as e:
+            logger.error(f"Error fetching valid models: {str(e)}")
+            return set()
+    async def update_models_if_needed(self):
+        """Update the valid models list if needed"""
+        current_time = time.time()
+        with self.lock:
+            if current_time - self.last_updated > self.update_interval or not self.valid_models:
+                logger.info("Updating valid models list...")
+                new_models = await self.fetch_valid_models()
+                if new_models:  # Only update if we got valid data
+                    self.valid_models = new_models
+                    self.last_updated = current_time
+                    logger.info(f"Updated valid models list with {len(self.valid_models)} models")
+    def is_valid_model(self, model_name: str) -> bool:
+        """Check if a model name is valid"""
+        with self.lock:
+            return model_name in self.valid_models
+    def get_valid_models(self) -> List[str]:
+        """Get list of valid models"""
+        with self.lock:
+            return sorted(list(self.valid_models))
 class APIKeyManager:
     """Manages multiple API keys with rotation and rate limiting"""
         with self.lock:
             return dict(self.key_stats)
+class InferenceClient:
+    """High-performance inference client with connection pooling and enhanced error handling"""
+    def __init__(self, key_manager: APIKeyManager, model_validator: ModelValidator):
         self.key_manager = key_manager
+        self.model_validator = model_validator
         self.base_url = "https://openrouter.ai/api/v1"
         self.session_pool = {}
+        self.max_connections = 100
         self.max_connections_per_host = 20
+        self.error_handler = ErrorHandler()
     async def get_session(self, api_key: str) -> aiohttp.ClientSession:
         """Get or create a session for the API key"""
                 limit_per_host=self.max_connections_per_host,
                 keepalive_timeout=30,
                 enable_cleanup_closed=True,
+                ttl_dns_cache=300,
                 use_dns_cache=True
             )
             timeout = aiohttp.ClientTimeout(
+                total=60,
+                connect=10,
+                sock_read=30
             )
             headers = {
                 "Authorization": f"Bearer {api_key}",
                 "Content-Type": "application/json",
+                "HTTP-Referer": "https://chatcsvandpdf.com",
+                "X-Title": "chatcsvandpdf API"
             }
             self.session_pool[api_key] = aiohttp.ClientSession(
         return self.session_pool[api_key]
+    def _should_retry_with_different_key(self, status_code: int) -> bool:
+        """Determine if we should retry with a different API key"""
+        retry_codes = {401, 402, 429}  # Auth issues, credits, rate limits
+        return status_code in retry_codes
+    async def chat_completion(self, request: ChatRequest, max_retries: int = 2) -> Dict[str, Any]:
+        """Send chat completion request with enhanced error handling and retries"""
         start_time = time.time()
+        # Update models list if needed
+        await self.model_validator.update_models_if_needed()
+        # Validate model - if no models loaded, skip validation
+        if self.model_validator.valid_models and not self.model_validator.is_valid_model(request.model):
+            valid_models = self.model_validator.get_valid_models()
+            return {
+                "success": False,
+                "error": f"Model '{request.model}' is not available in chatcsvandpdf.",
+                "suggestion": f"Try one of these available models: {', '.join(valid_models[:5])}{'...' if len(valid_models) > 5 else ''}",
+                "response_time": time.time() - start_time
             }
+        last_error = None
+        # Try with different API keys if needed
+        for attempt in range(max_retries + 1):
+            api_key = self.key_manager.get_next_key()
+            try:
+                session = await self.get_session(api_key)
+                # Prepare messages
+                messages = []
+                if request.system_prompt:
+                    messages.append({"role": "system", "content": request.system_prompt})
+                messages.extend([msg.dict() for msg in request.messages])
+                # Prepare request payload
+                payload = {
+                    "model": request.model,
+                    "messages": messages,
+                    "max_tokens": request.max_tokens,
+                    "temperature": request.temperature,
+                    "top_p": request.top_p,
+                    "frequency_penalty": request.frequency_penalty,
+                    "presence_penalty": request.presence_penalty,
+                    "stream": request.stream
+                }
+                # Add provider preferences if specified
+                if request.provider:
+                    provider_dict = request.provider.dict(exclude_none=True)
+                    if provider_dict:
+                        payload["provider"] = provider_dict
+                logger.debug(f"Attempt {attempt + 1}: Sending request to {request.model} with key ending in ...{api_key[-4:]}")
+                async with session.post(f"{self.base_url}/chat/completions", json=payload) as response:
+                    response_time = time.time() - start_time
+                    if response.status == 200:
+                        result = await response.json()
+                        # Extract provider information if available
+                        provider_used = None
+                        if "model" in result and "/" in result["model"]:
+                            provider_used = result["model"].split("/")[0]
+                        return {
+                            "success": True,
+                            "data": result,
+                            "response_time": response_time,
+                            "provider_used": provider_used,
+                            "api_key_used": api_key[-4:]
+                        }
+                    else:
+                        error_data = await response.text()
+                        logger.warning(f"API error {response.status} on attempt {attempt + 1}: {error_data}")
+                        # Parse error response if JSON
+                        try:
+                            error_json = json.loads(error_data)
+                            original_error = error_json.get("error", {}).get("message", error_data)
+                        except:
+                            original_error = error_data
+                        # Record error for this key
+                        self.key_manager.record_error(api_key)
+                        # Check if we should retry with a different key
+                        if self._should_retry_with_different_key(response.status) and attempt < max_retries:
+                            last_error = {
+                                "status": response.status,
+                                "message": original_error,
+                                "attempt": attempt + 1
+                            }
+                            # Wait briefly before retry
+                            await asyncio.sleep(min(2 ** attempt, 5))  # Exponential backoff, max 5s
+                            continue
+                        else:
+                            # Final attempt or non-retryable error
+                            error_info = self.error_handler.get_user_friendly_error(
+                                response.status, original_error, request.model
+                            )
+                            return {
+                                "success": False,
+                                "error": error_info["message"],
+                                "suggestion": error_info["suggestion"],
+                                "response_time": response_time,
+                                "attempts_made": attempt + 1
+                            }
+            except asyncio.TimeoutError:
+                logger.warning(f"Timeout on attempt {attempt + 1} with key ...{api_key[-4:]}")
+                self.key_manager.record_error(api_key)
+                if attempt < max_retries:
+                    last_error = {"status": 408, "message": "Request timeout", "attempt": attempt + 1}
+                    await asyncio.sleep(min(2 ** attempt, 5))
+                    continue
+                else:
                     return {
+                        "success": False,
+                        "error": "chatcsvandpdf service timed out processing your request.",
+                        "suggestion": "Try shortening your message or using a different model.",
+                        "response_time": time.time() - start_time,
+                        "attempts_made": attempt + 1
                     }
+            except Exception as e:
+                logger.error(f"Request failed on attempt {attempt + 1} with key ...{api_key[-4:]}: {str(e)}")
+                self.key_manager.record_error(api_key)
+                if attempt < max_retries:
+                    last_error = {"status": 500, "message": str(e), "attempt": attempt + 1}
+                    await asyncio.sleep(min(2 ** attempt, 5))
+                    continue
                 else:
                     return {
                         "success": False,
+                        "error": "chatcsvandpdf service encountered an unexpected issue.",
+                        "suggestion": "Please try again. If the problem persists, contact support.",
+                        "response_time": time.time() - start_time,
+                        "attempts_made": attempt + 1
                     }
+        # If we get here, all attempts failed
+        if last_error:
+            error_info = self.error_handler.get_user_friendly_error(
+                last_error["status"], last_error["message"], request.model
+            )
             return {
                 "success": False,
+                "error": error_info["message"],
+                "suggestion": error_info["suggestion"],
+                "response_time": time.time() - start_time,
+                "attempts_made": max_retries + 1
+            }
+        else:
+            return {
+                "success": False,
+                "error": "chatcsvandpdf service is currently unavailable.",
+                "suggestion": "Please try again later.",
+                "response_time": time.time() - start_time
             }
     async def stream_chat_completion(self, request: ChatRequest):
+        """Stream chat completion response with enhanced error handling"""
+        # Update models list if needed
+        await self.model_validator.update_models_if_needed()
+        # Validate model - if no models loaded, skip validation
+        if self.model_validator.valid_models and not self.model_validator.is_valid_model(request.model):
+            valid_models = self.model_validator.get_valid_models()
+            error_msg = f"Model '{request.model}' is not available in chatcsvandpdf. Try: {', '.join(valid_models[:3])}"
+            yield f"data: {json.dumps({'error': error_msg})}\n\n".encode()
+            return
         api_key = self.key_manager.get_next_key()
         try:
                 else:
                     error_data = await response.text()
                     self.key_manager.record_error(api_key)
+                    # Parse error and provide user-friendly message
+                    try:
+                        error_json = json.loads(error_data)
+                        original_error = error_json.get("error", {}).get("message", error_data)
+                    except:
+                        original_error = error_data
+                    error_info = self.error_handler.get_user_friendly_error(
+                        response.status, original_error, request.model
+                    )
+                    yield f"data: {json.dumps({'error': error_info['message'], 'suggestion': error_info['suggestion']})}\n\n".encode()
+        except asyncio.TimeoutError:
+            logger.error(f"Streaming timeout with key ...{api_key[-4:]}")
+            self.key_manager.record_error(api_key)
+            yield f"data: {json.dumps({'error': 'chatcsvandpdf request timed out. Try a shorter message or different model.'})}\n\n".encode()
         except Exception as e:
             logger.error(f"Streaming failed with key ...{api_key[-4:]}: {str(e)}")
             self.key_manager.record_error(api_key)
+            yield f"data: {json.dumps({'error': 'chatcsvandpdf service encountered an issue. Please try again.'})}\n\n".encode()
     async def close_all_sessions(self):
         """Close all aiohttp sessions"""
         self.session_pool.clear()
 # Global variables
+client: Optional[InferenceClient] = None
 key_manager: Optional[APIKeyManager] = None
+model_validator: Optional[ModelValidator] = None
 @asynccontextmanager
 async def lifespan(app: FastAPI):
     """Startup and shutdown events"""
+    global client, key_manager, model_validator
     # Startup
+    logger.info("Starting chatcsvandpdf API...")
     # Load API keys from environment
     api_keys_str = os.getenv("OPENROUTER_API_KEYS", "")
     if not api_keys_str:
         raise ValueError("OPENROUTER_API_KEYS environment variable is required")
     api_keys = [key.strip() for key in api_keys_str.split(",") if key.strip()]
     if not api_keys:
         raise ValueError("No valid API keys found in OPENROUTER_API_KEYS")
+    # Initialize components
+    model_validator = ModelValidator()
     key_manager = APIKeyManager(api_keys)
+    client = InferenceClient(key_manager, model_validator)
+    # Initial model fetch
+    await model_validator.update_models_if_needed()
+    logger.info(f"API initialized with {len(api_keys)} keys and {len(model_validator.get_valid_models())} available models")
     yield
 # Create FastAPI app
 app = FastAPI(
+    title="chatcsvandpdf API",
+    description="High-performance chat completions API with model validation and multiple key rotation",
     version="1.0.0",
     lifespan=lifespan
 )
 async def root():
     """Root endpoint with API information"""
     return {
+        "message": "chatcsvandpdf API",
         "version": "1.0.0",
         "endpoints": {
             "chat": "/api/chat",
             "chat_stream": "/api/chat (with stream=true)",
+            "models": "/api/models",
             "stats": "/api/stats",
             "health": "/health"
         },
         "features": [
             "Multiple API key rotation",
+            "Model validation",
             "Connection pooling",
             "Parallel processing",
             "Provider routing",
             "Streaming support",
+            "Rate limiting",
+            "Enhanced error handling"
         ]
     }
+@app.get("/api/models")
+async def get_available_models():
+    """Get list of available models"""
+    if not model_validator:
+        raise HTTPException(status_code=503, detail="Service not initialized")
+    await model_validator.update_models_if_needed()
+    valid_models = model_validator.get_valid_models()
+    return {
+        "models": valid_models,
+        "total_count": len(valid_models),
+        "last_updated": datetime.fromtimestamp(model_validator.last_updated).isoformat() if model_validator.last_updated > 0 else "Never"
+    }
 @app.post("/api/chat", response_model=ChatResponse)
 async def chat_completion(request: ChatRequest):
+    """Send chat completion request with enhanced error handling"""
     if not client:
+        raise HTTPException(status_code=503, detail="chatcsvandpdf service is starting up. Please try again in a moment.")
     try:
         # Handle streaming requests
                 timestamp=datetime.now().isoformat()
             )
         else:
+            # Return user-friendly error message
+            error_detail = result["error"]
+            if "suggestion" in result:
+                error_detail += f" {result['suggestion']}"
+            # Determine appropriate HTTP status code
+            status_code = 400 if "not available" in result["error"] else 503
+            raise HTTPException(status_code=status_code, detail=error_detail)
     except HTTPException:
         raise
     except Exception as e:
         logger.error(f"Unexpected error in chat_completion: {str(e)}")
+        raise HTTPException(
+            status_code=503,
+            detail="chatcsvandpdf service encountered an unexpected issue. Please try again."
+        )
 @app.get("/api/stats", response_model=Dict)
 async def get_api_stats():
 @app.get("/health")
 async def health_check():
     """Health check endpoint"""
+    if not client or not key_manager or not model_validator:
         return JSONResponse(
             status_code=503,
             content={
                 "message": "Service not initialized",
                 "timestamp": datetime.now().isoformat()
             }
+        )