Soumik555 commited on
Commit
01b4337
·
1 Parent(s): c6d5178

db api online

Browse files
Files changed (1) hide show
  1. chat_api.py +377 -177
chat_api.py CHANGED
@@ -3,7 +3,7 @@ import aiohttp
3
  import logging
4
  import os
5
  import random
6
- from typing import List, Dict, Optional, Any
7
  from fastapi import FastAPI, HTTPException, BackgroundTasks
8
  from fastapi.responses import StreamingResponse, JSONResponse
9
  from pydantic import BaseModel, Field
@@ -27,6 +27,75 @@ logging.basicConfig(
27
  )
28
  logger = logging.getLogger(__name__)
29
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
30
  # Pydantic models
31
  class Message(BaseModel):
32
  role: str = Field(..., description="Role: 'system', 'user', or 'assistant'")
@@ -78,6 +147,69 @@ class ChatResponse(BaseModel):
78
  provider_used: Optional[str] = None
79
  timestamp: str
80
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
81
  class APIKeyManager:
82
  """Manages multiple API keys with rotation and rate limiting"""
83
 
@@ -141,15 +273,17 @@ class APIKeyManager:
141
  with self.lock:
142
  return dict(self.key_stats)
143
 
144
- class OpenRouterClient:
145
- """High-performance OpenRouter client with connection pooling"""
146
 
147
- def __init__(self, key_manager: APIKeyManager):
148
  self.key_manager = key_manager
 
149
  self.base_url = "https://openrouter.ai/api/v1"
150
  self.session_pool = {}
151
- self.max_connections = 100 # Total connection pool
152
  self.max_connections_per_host = 20
 
153
 
154
  async def get_session(self, api_key: str) -> aiohttp.ClientSession:
155
  """Get or create a session for the API key"""
@@ -159,21 +293,21 @@ class OpenRouterClient:
159
  limit_per_host=self.max_connections_per_host,
160
  keepalive_timeout=30,
161
  enable_cleanup_closed=True,
162
- ttl_dns_cache=300, # DNS cache TTL
163
  use_dns_cache=True
164
  )
165
 
166
  timeout = aiohttp.ClientTimeout(
167
- total=60, # Total timeout
168
- connect=10, # Connection timeout
169
- sock_read=30 # Socket read timeout
170
  )
171
 
172
  headers = {
173
  "Authorization": f"Bearer {api_key}",
174
  "Content-Type": "application/json",
175
- "HTTP-Referer": "https://your-app.com", # Optional: for analytics
176
- "X-Title": "High-Performance Chat API" # Optional: for analytics
177
  }
178
 
179
  self.session_pool[api_key] = aiohttp.ClientSession(
@@ -185,91 +319,185 @@ class OpenRouterClient:
185
 
186
  return self.session_pool[api_key]
187
 
188
- async def chat_completion(self, request: ChatRequest) -> Dict[str, Any]:
189
- """Send chat completion request with automatic key rotation"""
 
 
 
 
 
190
  start_time = time.time()
191
- api_key = self.key_manager.get_next_key()
192
 
193
- try:
194
- session = await self.get_session(api_key)
195
-
196
- # Prepare messages
197
- messages = []
198
-
199
- # Add system prompt if provided
200
- if request.system_prompt:
201
- messages.append({"role": "system", "content": request.system_prompt})
202
-
203
- # Add user messages
204
- messages.extend([msg.dict() for msg in request.messages])
205
-
206
- # Prepare request payload
207
- payload = {
208
- "model": request.model,
209
- "messages": messages,
210
- "max_tokens": request.max_tokens,
211
- "temperature": request.temperature,
212
- "top_p": request.top_p,
213
- "frequency_penalty": request.frequency_penalty,
214
- "presence_penalty": request.presence_penalty,
215
- "stream": request.stream
216
  }
 
 
 
 
 
 
217
 
218
- # Add provider preferences if specified
219
- if request.provider:
220
- provider_dict = request.provider.dict(exclude_none=True)
221
- if provider_dict:
222
- payload["provider"] = provider_dict
223
-
224
- logger.debug(f"Sending request to {request.model} with key ending in ...{api_key[-4:]}")
225
-
226
- async with session.post(f"{self.base_url}/chat/completions", json=payload) as response:
227
- response_time = time.time() - start_time
228
 
229
- if response.status == 200:
230
- result = await response.json()
231
-
232
- # Extract provider information if available
233
- provider_used = None
234
- if "model" in result and "/" in result["model"]:
235
- # Sometimes the response model includes provider info
236
- provider_used = result["model"].split("/")[0]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
237
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
238
  return {
239
- "success": True,
240
- "data": result,
241
- "response_time": response_time,
242
- "provider_used": provider_used,
243
- "api_key_used": api_key[-4:] # Last 4 characters for debugging
244
  }
 
 
 
 
 
 
 
 
 
245
  else:
246
- error_data = await response.text()
247
- logger.error(f"OpenRouter API error {response.status}: {error_data}")
248
-
249
- # Record error for this key
250
- self.key_manager.record_error(api_key)
251
-
252
  return {
253
  "success": False,
254
- "error": f"API error {response.status}: {error_data}",
255
- "response_time": response_time
 
 
256
  }
257
 
258
- except Exception as e:
259
- response_time = time.time() - start_time
260
- logger.error(f"Request failed with key ...{api_key[-4:]}: {str(e)}")
261
-
262
- # Record error for this key
263
- self.key_manager.record_error(api_key)
264
-
265
  return {
266
  "success": False,
267
- "error": str(e),
268
- "response_time": response_time
 
 
 
 
 
 
 
 
 
269
  }
270
 
271
  async def stream_chat_completion(self, request: ChatRequest):
272
- """Stream chat completion response"""
 
 
 
 
 
 
 
 
 
 
273
  api_key = self.key_manager.get_next_key()
274
 
275
  try:
@@ -306,12 +534,29 @@ class OpenRouterClient:
306
  else:
307
  error_data = await response.text()
308
  self.key_manager.record_error(api_key)
309
- yield f"data: {json.dumps({'error': f'API error {response.status}: {error_data}'})}\n\n".encode()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
310
 
311
  except Exception as e:
312
  logger.error(f"Streaming failed with key ...{api_key[-4:]}: {str(e)}")
313
  self.key_manager.record_error(api_key)
314
- yield f"data: {json.dumps({'error': str(e)})}\n\n".encode()
315
 
316
  async def close_all_sessions(self):
317
  """Close all aiohttp sessions"""
@@ -320,31 +565,36 @@ class OpenRouterClient:
320
  self.session_pool.clear()
321
 
322
  # Global variables
323
- client: Optional[OpenRouterClient] = None
324
  key_manager: Optional[APIKeyManager] = None
 
325
 
326
  @asynccontextmanager
327
  async def lifespan(app: FastAPI):
328
  """Startup and shutdown events"""
329
- global client, key_manager
330
 
331
  # Startup
332
- logger.info("Starting OpenRouter Chat API...")
333
 
334
  # Load API keys from environment
335
  api_keys_str = os.getenv("OPENROUTER_API_KEYS", "")
336
  if not api_keys_str:
337
  raise ValueError("OPENROUTER_API_KEYS environment variable is required")
338
-
339
  api_keys = [key.strip() for key in api_keys_str.split(",") if key.strip()]
340
  if not api_keys:
341
  raise ValueError("No valid API keys found in OPENROUTER_API_KEYS")
342
-
343
- # Initialize key manager and client
 
344
  key_manager = APIKeyManager(api_keys)
345
- client = OpenRouterClient(key_manager)
 
 
 
346
 
347
- logger.info(f"API initialized with {len(api_keys)} keys")
348
 
349
  yield
350
 
@@ -355,8 +605,8 @@ async def lifespan(app: FastAPI):
355
 
356
  # Create FastAPI app
357
  app = FastAPI(
358
- title="High-Performance OpenRouter Chat API",
359
- description="Scalable chat completions API with multiple key rotation and parallel processing",
360
  version="1.0.0",
361
  lifespan=lifespan
362
  )
@@ -365,29 +615,47 @@ app = FastAPI(
365
  async def root():
366
  """Root endpoint with API information"""
367
  return {
368
- "message": "High-Performance OpenRouter Chat API",
369
  "version": "1.0.0",
370
  "endpoints": {
371
  "chat": "/api/chat",
372
  "chat_stream": "/api/chat (with stream=true)",
 
373
  "stats": "/api/stats",
374
  "health": "/health"
375
  },
376
  "features": [
377
  "Multiple API key rotation",
 
378
  "Connection pooling",
379
  "Parallel processing",
380
  "Provider routing",
381
  "Streaming support",
382
- "Rate limiting"
 
383
  ]
384
  }
385
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
386
  @app.post("/api/chat", response_model=ChatResponse)
387
  async def chat_completion(request: ChatRequest):
388
- """Send chat completion request"""
389
  if not client:
390
- raise HTTPException(status_code=503, detail="Service not initialized")
391
 
392
  try:
393
  # Handle streaming requests
@@ -412,16 +680,24 @@ async def chat_completion(request: ChatRequest):
412
  timestamp=datetime.now().isoformat()
413
  )
414
  else:
415
- raise HTTPException(
416
- status_code=500,
417
- detail=f"Chat completion failed: {result['error']}"
418
- )
 
 
 
 
 
419
 
420
  except HTTPException:
421
  raise
422
  except Exception as e:
423
  logger.error(f"Unexpected error in chat_completion: {str(e)}")
424
- raise HTTPException(status_code=500, detail=str(e))
 
 
 
425
 
426
  @app.get("/api/stats", response_model=Dict)
427
  async def get_api_stats():
@@ -457,7 +733,7 @@ async def get_api_stats():
457
  @app.get("/health")
458
  async def health_check():
459
  """Health check endpoint"""
460
- if not client or not key_manager:
461
  return JSONResponse(
462
  status_code=503,
463
  content={
@@ -465,80 +741,4 @@ async def health_check():
465
  "message": "Service not initialized",
466
  "timestamp": datetime.now().isoformat()
467
  }
468
- )
469
-
470
- try:
471
- stats = key_manager.get_stats()
472
- return {
473
- "status": "healthy",
474
- "api_keys_loaded": len(stats),
475
- "total_requests": sum(stat["requests"] for stat in stats.values()),
476
- "timestamp": datetime.now().isoformat()
477
- }
478
-
479
- except Exception as e:
480
- return JSONResponse(
481
- status_code=503,
482
- content={
483
- "status": "unhealthy",
484
- "error": str(e),
485
- "timestamp": datetime.now().isoformat()
486
- }
487
- )
488
-
489
- # Batch processing endpoint for high throughput
490
- @app.post("/api/chat/batch")
491
- async def batch_chat_completions(requests: List[ChatRequest]):
492
- """Process multiple chat requests in parallel"""
493
- if not client:
494
- raise HTTPException(status_code=503, detail="Service not initialized")
495
-
496
- if len(requests) > 50: # Limit batch size
497
- raise HTTPException(status_code=400, detail="Batch size limited to 50 requests")
498
-
499
- try:
500
- # Process all requests in parallel
501
- tasks = [client.chat_completion(req) for req in requests]
502
- results = await asyncio.gather(*tasks, return_exceptions=True)
503
-
504
- # Format results
505
- responses = []
506
- for i, (request, result) in enumerate(zip(requests, results)):
507
- if isinstance(result, Exception):
508
- responses.append({
509
- "request_index": i,
510
- "success": False,
511
- "error": str(result)
512
- })
513
- elif result["success"]:
514
- responses.append({
515
- "request_index": i,
516
- "success": True,
517
- "model": request.model,
518
- "choices": result["data"].get("choices", []),
519
- "usage": result["data"].get("usage"),
520
- "response_time": result["response_time"],
521
- "provider_used": result.get("provider_used")
522
- })
523
- else:
524
- responses.append({
525
- "request_index": i,
526
- "success": False,
527
- "error": result["error"]
528
- })
529
-
530
- return {
531
- "success": True,
532
- "batch_size": len(requests),
533
- "results": responses,
534
- "timestamp": datetime.now().isoformat()
535
- }
536
-
537
- except Exception as e:
538
- logger.error(f"Batch processing failed: {str(e)}")
539
- raise HTTPException(status_code=500, detail=str(e))
540
-
541
- @app.get("/ping")
542
- async def ping():
543
- """Health check endpoint"""
544
- return {"status": "ok", "timestamp": datetime.now().isoformat()}
 
3
  import logging
4
  import os
5
  import random
6
+ from typing import List, Dict, Optional, Any, Set
7
  from fastapi import FastAPI, HTTPException, BackgroundTasks
8
  from fastapi.responses import StreamingResponse, JSONResponse
9
  from pydantic import BaseModel, Field
 
27
  )
28
  logger = logging.getLogger(__name__)
29
 
30
+ # Error handling
31
+ class ErrorHandler:
32
+ """Handle and translate OpenRouter errors to user-friendly chatcsvandpdf messages"""
33
+
34
+ @staticmethod
35
+ def get_user_friendly_error(status_code: int, error_message: str, model: str = None) -> dict:
36
+ """Convert OpenRouter errors to chatcsvandpdf branded error messages"""
37
+
38
+ friendly_messages = {
39
+ 400: {
40
+ "message": "Invalid request format. Please check your message and try again.",
41
+ "suggestion": "Verify that your request parameters are correctly formatted."
42
+ },
43
+ 401: {
44
+ "message": "Authentication issue with chatcsvandpdf service.",
45
+ "suggestion": "This is a temporary service issue. Please try again in a moment."
46
+ },
47
+ 402: {
48
+ "message": "chatcsvandpdf service is temporarily at capacity.",
49
+ "suggestion": "Please try again in a few minutes or use a different model."
50
+ },
51
+ 403: {
52
+ "message": "Your message was flagged by our content moderation system.",
53
+ "suggestion": "Please rephrase your message and avoid potentially harmful content."
54
+ },
55
+ 408: {
56
+ "message": "Request timed out. The model took too long to respond.",
57
+ "suggestion": "Try shortening your message or using a faster model."
58
+ },
59
+ 429: {
60
+ "message": f"Rate limit reached for {'model ' + model if model else 'this service'}. Please try again later.",
61
+ "suggestion": "chatcsvandpdf is currently experiencing high demand. Please wait a moment and retry, or try a different model."
62
+ },
63
+ 502: {
64
+ "message": f"The {'model ' + model if model else 'selected model'} is currently unavailable.",
65
+ "suggestion": "This model is temporarily down. Please try a different model or wait a few minutes."
66
+ },
67
+ 503: {
68
+ "message": "No available providers meet your requirements.",
69
+ "suggestion": "Try adjusting your provider preferences or use a different model."
70
+ }
71
+ }
72
+
73
+ # Default error for unknown status codes
74
+ if status_code not in friendly_messages:
75
+ return {
76
+ "message": "chatcsvandpdf service encountered an unexpected issue.",
77
+ "suggestion": "Please try again. If the problem persists, contact support.",
78
+ "technical_info": f"Error {status_code}: {error_message}"
79
+ }
80
+
81
+ error_info = friendly_messages[status_code].copy()
82
+
83
+ # Add specific handling for rate limiting
84
+ if status_code == 429:
85
+ if "free" in str(model).lower():
86
+ error_info["message"] = f"Free model {model} is currently rate-limited."
87
+ error_info["suggestion"] = "Free models have usage limits. Try again in a few minutes or upgrade to a premium model."
88
+ elif "quota" in error_message.lower() or "credit" in error_message.lower():
89
+ error_info["message"] = "chatcsvandpdf service quota reached."
90
+ error_info["suggestion"] = "Our service is at capacity. Please try again later."
91
+
92
+ # Add model-specific messaging for 502 errors
93
+ if status_code == 502 and model:
94
+ error_info["message"] = f"Model {model} is temporarily unavailable."
95
+ error_info["suggestion"] = "This model is experiencing issues. Try another model or wait a few minutes."
96
+
97
+ return error_info
98
+
99
  # Pydantic models
100
  class Message(BaseModel):
101
  role: str = Field(..., description="Role: 'system', 'user', or 'assistant'")
 
147
  provider_used: Optional[str] = None
148
  timestamp: str
149
 
150
+ class ModelValidator:
151
+ def __init__(self):
152
+ self.valid_models: Set[str] = set()
153
+ self.last_updated: float = 0
154
+ self.update_interval: float = 3600 # Update every hour
155
+ self.models_endpoint = "https://xce009-inference-test.hf.space/api/free-models/names"
156
+ self.lock = threading.Lock()
157
+
158
+ async def fetch_valid_models(self) -> Set[str]:
159
+ """Fetch valid model names from the inference service"""
160
+ try:
161
+ async with aiohttp.ClientSession() as session:
162
+ async with session.get(
163
+ self.models_endpoint,
164
+ timeout=aiohttp.ClientTimeout(total=10)
165
+ ) as response:
166
+ if response.status == 200:
167
+ data = await response.json()
168
+
169
+ # ✅ Extract from "models" key
170
+ models_list = data.get("models", [])
171
+
172
+ models = set()
173
+ for item in models_list:
174
+ if isinstance(item, dict) and "id" in item:
175
+ models.add(item["id"])
176
+ elif isinstance(item, str):
177
+ models.add(item)
178
+
179
+ logger.info(f"Fetched {len(models)} valid models from inference service")
180
+ return models
181
+ else:
182
+ logger.error(f"Failed to fetch models: HTTP {response.status}")
183
+ return set()
184
+
185
+ except Exception as e:
186
+ logger.error(f"Error fetching valid models: {str(e)}")
187
+ return set()
188
+
189
+
190
+ async def update_models_if_needed(self):
191
+ """Update the valid models list if needed"""
192
+ current_time = time.time()
193
+
194
+ with self.lock:
195
+ if current_time - self.last_updated > self.update_interval or not self.valid_models:
196
+ logger.info("Updating valid models list...")
197
+ new_models = await self.fetch_valid_models()
198
+ if new_models: # Only update if we got valid data
199
+ self.valid_models = new_models
200
+ self.last_updated = current_time
201
+ logger.info(f"Updated valid models list with {len(self.valid_models)} models")
202
+
203
+ def is_valid_model(self, model_name: str) -> bool:
204
+ """Check if a model name is valid"""
205
+ with self.lock:
206
+ return model_name in self.valid_models
207
+
208
+ def get_valid_models(self) -> List[str]:
209
+ """Get list of valid models"""
210
+ with self.lock:
211
+ return sorted(list(self.valid_models))
212
+
213
  class APIKeyManager:
214
  """Manages multiple API keys with rotation and rate limiting"""
215
 
 
273
  with self.lock:
274
  return dict(self.key_stats)
275
 
276
+ class InferenceClient:
277
+ """High-performance inference client with connection pooling and enhanced error handling"""
278
 
279
+ def __init__(self, key_manager: APIKeyManager, model_validator: ModelValidator):
280
  self.key_manager = key_manager
281
+ self.model_validator = model_validator
282
  self.base_url = "https://openrouter.ai/api/v1"
283
  self.session_pool = {}
284
+ self.max_connections = 100
285
  self.max_connections_per_host = 20
286
+ self.error_handler = ErrorHandler()
287
 
288
  async def get_session(self, api_key: str) -> aiohttp.ClientSession:
289
  """Get or create a session for the API key"""
 
293
  limit_per_host=self.max_connections_per_host,
294
  keepalive_timeout=30,
295
  enable_cleanup_closed=True,
296
+ ttl_dns_cache=300,
297
  use_dns_cache=True
298
  )
299
 
300
  timeout = aiohttp.ClientTimeout(
301
+ total=60,
302
+ connect=10,
303
+ sock_read=30
304
  )
305
 
306
  headers = {
307
  "Authorization": f"Bearer {api_key}",
308
  "Content-Type": "application/json",
309
+ "HTTP-Referer": "https://chatcsvandpdf.com",
310
+ "X-Title": "chatcsvandpdf API"
311
  }
312
 
313
  self.session_pool[api_key] = aiohttp.ClientSession(
 
319
 
320
  return self.session_pool[api_key]
321
 
322
+ def _should_retry_with_different_key(self, status_code: int) -> bool:
323
+ """Determine if we should retry with a different API key"""
324
+ retry_codes = {401, 402, 429} # Auth issues, credits, rate limits
325
+ return status_code in retry_codes
326
+
327
+ async def chat_completion(self, request: ChatRequest, max_retries: int = 2) -> Dict[str, Any]:
328
+ """Send chat completion request with enhanced error handling and retries"""
329
  start_time = time.time()
 
330
 
331
+ # Update models list if needed
332
+ await self.model_validator.update_models_if_needed()
333
+
334
+ # Validate model - if no models loaded, skip validation
335
+ if self.model_validator.valid_models and not self.model_validator.is_valid_model(request.model):
336
+ valid_models = self.model_validator.get_valid_models()
337
+ return {
338
+ "success": False,
339
+ "error": f"Model '{request.model}' is not available in chatcsvandpdf.",
340
+ "suggestion": f"Try one of these available models: {', '.join(valid_models[:5])}{'...' if len(valid_models) > 5 else ''}",
341
+ "response_time": time.time() - start_time
 
 
 
 
 
 
 
 
 
 
 
 
342
  }
343
+
344
+ last_error = None
345
+
346
+ # Try with different API keys if needed
347
+ for attempt in range(max_retries + 1):
348
+ api_key = self.key_manager.get_next_key()
349
 
350
+ try:
351
+ session = await self.get_session(api_key)
 
 
 
 
 
 
 
 
352
 
353
+ # Prepare messages
354
+ messages = []
355
+ if request.system_prompt:
356
+ messages.append({"role": "system", "content": request.system_prompt})
357
+ messages.extend([msg.dict() for msg in request.messages])
358
+
359
+ # Prepare request payload
360
+ payload = {
361
+ "model": request.model,
362
+ "messages": messages,
363
+ "max_tokens": request.max_tokens,
364
+ "temperature": request.temperature,
365
+ "top_p": request.top_p,
366
+ "frequency_penalty": request.frequency_penalty,
367
+ "presence_penalty": request.presence_penalty,
368
+ "stream": request.stream
369
+ }
370
+
371
+ # Add provider preferences if specified
372
+ if request.provider:
373
+ provider_dict = request.provider.dict(exclude_none=True)
374
+ if provider_dict:
375
+ payload["provider"] = provider_dict
376
+
377
+ logger.debug(f"Attempt {attempt + 1}: Sending request to {request.model} with key ending in ...{api_key[-4:]}")
378
+
379
+ async with session.post(f"{self.base_url}/chat/completions", json=payload) as response:
380
+ response_time = time.time() - start_time
381
 
382
+ if response.status == 200:
383
+ result = await response.json()
384
+
385
+ # Extract provider information if available
386
+ provider_used = None
387
+ if "model" in result and "/" in result["model"]:
388
+ provider_used = result["model"].split("/")[0]
389
+
390
+ return {
391
+ "success": True,
392
+ "data": result,
393
+ "response_time": response_time,
394
+ "provider_used": provider_used,
395
+ "api_key_used": api_key[-4:]
396
+ }
397
+ else:
398
+ error_data = await response.text()
399
+ logger.warning(f"API error {response.status} on attempt {attempt + 1}: {error_data}")
400
+
401
+ # Parse error response if JSON
402
+ try:
403
+ error_json = json.loads(error_data)
404
+ original_error = error_json.get("error", {}).get("message", error_data)
405
+ except:
406
+ original_error = error_data
407
+
408
+ # Record error for this key
409
+ self.key_manager.record_error(api_key)
410
+
411
+ # Check if we should retry with a different key
412
+ if self._should_retry_with_different_key(response.status) and attempt < max_retries:
413
+ last_error = {
414
+ "status": response.status,
415
+ "message": original_error,
416
+ "attempt": attempt + 1
417
+ }
418
+ # Wait briefly before retry
419
+ await asyncio.sleep(min(2 ** attempt, 5)) # Exponential backoff, max 5s
420
+ continue
421
+ else:
422
+ # Final attempt or non-retryable error
423
+ error_info = self.error_handler.get_user_friendly_error(
424
+ response.status, original_error, request.model
425
+ )
426
+
427
+ return {
428
+ "success": False,
429
+ "error": error_info["message"],
430
+ "suggestion": error_info["suggestion"],
431
+ "response_time": response_time,
432
+ "attempts_made": attempt + 1
433
+ }
434
+
435
+ except asyncio.TimeoutError:
436
+ logger.warning(f"Timeout on attempt {attempt + 1} with key ...{api_key[-4:]}")
437
+ self.key_manager.record_error(api_key)
438
+
439
+ if attempt < max_retries:
440
+ last_error = {"status": 408, "message": "Request timeout", "attempt": attempt + 1}
441
+ await asyncio.sleep(min(2 ** attempt, 5))
442
+ continue
443
+ else:
444
  return {
445
+ "success": False,
446
+ "error": "chatcsvandpdf service timed out processing your request.",
447
+ "suggestion": "Try shortening your message or using a different model.",
448
+ "response_time": time.time() - start_time,
449
+ "attempts_made": attempt + 1
450
  }
451
+
452
+ except Exception as e:
453
+ logger.error(f"Request failed on attempt {attempt + 1} with key ...{api_key[-4:]}: {str(e)}")
454
+ self.key_manager.record_error(api_key)
455
+
456
+ if attempt < max_retries:
457
+ last_error = {"status": 500, "message": str(e), "attempt": attempt + 1}
458
+ await asyncio.sleep(min(2 ** attempt, 5))
459
+ continue
460
  else:
 
 
 
 
 
 
461
  return {
462
  "success": False,
463
+ "error": "chatcsvandpdf service encountered an unexpected issue.",
464
+ "suggestion": "Please try again. If the problem persists, contact support.",
465
+ "response_time": time.time() - start_time,
466
+ "attempts_made": attempt + 1
467
  }
468
 
469
+ # If we get here, all attempts failed
470
+ if last_error:
471
+ error_info = self.error_handler.get_user_friendly_error(
472
+ last_error["status"], last_error["message"], request.model
473
+ )
 
 
474
  return {
475
  "success": False,
476
+ "error": error_info["message"],
477
+ "suggestion": error_info["suggestion"],
478
+ "response_time": time.time() - start_time,
479
+ "attempts_made": max_retries + 1
480
+ }
481
+ else:
482
+ return {
483
+ "success": False,
484
+ "error": "chatcsvandpdf service is currently unavailable.",
485
+ "suggestion": "Please try again later.",
486
+ "response_time": time.time() - start_time
487
  }
488
 
489
  async def stream_chat_completion(self, request: ChatRequest):
490
+ """Stream chat completion response with enhanced error handling"""
491
+ # Update models list if needed
492
+ await self.model_validator.update_models_if_needed()
493
+
494
+ # Validate model - if no models loaded, skip validation
495
+ if self.model_validator.valid_models and not self.model_validator.is_valid_model(request.model):
496
+ valid_models = self.model_validator.get_valid_models()
497
+ error_msg = f"Model '{request.model}' is not available in chatcsvandpdf. Try: {', '.join(valid_models[:3])}"
498
+ yield f"data: {json.dumps({'error': error_msg})}\n\n".encode()
499
+ return
500
+
501
  api_key = self.key_manager.get_next_key()
502
 
503
  try:
 
534
  else:
535
  error_data = await response.text()
536
  self.key_manager.record_error(api_key)
537
+
538
+ # Parse error and provide user-friendly message
539
+ try:
540
+ error_json = json.loads(error_data)
541
+ original_error = error_json.get("error", {}).get("message", error_data)
542
+ except:
543
+ original_error = error_data
544
+
545
+ error_info = self.error_handler.get_user_friendly_error(
546
+ response.status, original_error, request.model
547
+ )
548
+
549
+ yield f"data: {json.dumps({'error': error_info['message'], 'suggestion': error_info['suggestion']})}\n\n".encode()
550
+
551
+ except asyncio.TimeoutError:
552
+ logger.error(f"Streaming timeout with key ...{api_key[-4:]}")
553
+ self.key_manager.record_error(api_key)
554
+ yield f"data: {json.dumps({'error': 'chatcsvandpdf request timed out. Try a shorter message or different model.'})}\n\n".encode()
555
 
556
  except Exception as e:
557
  logger.error(f"Streaming failed with key ...{api_key[-4:]}: {str(e)}")
558
  self.key_manager.record_error(api_key)
559
+ yield f"data: {json.dumps({'error': 'chatcsvandpdf service encountered an issue. Please try again.'})}\n\n".encode()
560
 
561
  async def close_all_sessions(self):
562
  """Close all aiohttp sessions"""
 
565
  self.session_pool.clear()
566
 
567
  # Global variables
568
+ client: Optional[InferenceClient] = None
569
  key_manager: Optional[APIKeyManager] = None
570
+ model_validator: Optional[ModelValidator] = None
571
 
572
  @asynccontextmanager
573
  async def lifespan(app: FastAPI):
574
  """Startup and shutdown events"""
575
+ global client, key_manager, model_validator
576
 
577
  # Startup
578
+ logger.info("Starting chatcsvandpdf API...")
579
 
580
  # Load API keys from environment
581
  api_keys_str = os.getenv("OPENROUTER_API_KEYS", "")
582
  if not api_keys_str:
583
  raise ValueError("OPENROUTER_API_KEYS environment variable is required")
584
+
585
  api_keys = [key.strip() for key in api_keys_str.split(",") if key.strip()]
586
  if not api_keys:
587
  raise ValueError("No valid API keys found in OPENROUTER_API_KEYS")
588
+
589
+ # Initialize components
590
+ model_validator = ModelValidator()
591
  key_manager = APIKeyManager(api_keys)
592
+ client = InferenceClient(key_manager, model_validator)
593
+
594
+ # Initial model fetch
595
+ await model_validator.update_models_if_needed()
596
 
597
+ logger.info(f"API initialized with {len(api_keys)} keys and {len(model_validator.get_valid_models())} available models")
598
 
599
  yield
600
 
 
605
 
606
  # Create FastAPI app
607
  app = FastAPI(
608
+ title="chatcsvandpdf API",
609
+ description="High-performance chat completions API with model validation and multiple key rotation",
610
  version="1.0.0",
611
  lifespan=lifespan
612
  )
 
615
  async def root():
616
  """Root endpoint with API information"""
617
  return {
618
+ "message": "chatcsvandpdf API",
619
  "version": "1.0.0",
620
  "endpoints": {
621
  "chat": "/api/chat",
622
  "chat_stream": "/api/chat (with stream=true)",
623
+ "models": "/api/models",
624
  "stats": "/api/stats",
625
  "health": "/health"
626
  },
627
  "features": [
628
  "Multiple API key rotation",
629
+ "Model validation",
630
  "Connection pooling",
631
  "Parallel processing",
632
  "Provider routing",
633
  "Streaming support",
634
+ "Rate limiting",
635
+ "Enhanced error handling"
636
  ]
637
  }
638
 
639
+ @app.get("/api/models")
640
+ async def get_available_models():
641
+ """Get list of available models"""
642
+ if not model_validator:
643
+ raise HTTPException(status_code=503, detail="Service not initialized")
644
+
645
+ await model_validator.update_models_if_needed()
646
+ valid_models = model_validator.get_valid_models()
647
+
648
+ return {
649
+ "models": valid_models,
650
+ "total_count": len(valid_models),
651
+ "last_updated": datetime.fromtimestamp(model_validator.last_updated).isoformat() if model_validator.last_updated > 0 else "Never"
652
+ }
653
+
654
  @app.post("/api/chat", response_model=ChatResponse)
655
  async def chat_completion(request: ChatRequest):
656
+ """Send chat completion request with enhanced error handling"""
657
  if not client:
658
+ raise HTTPException(status_code=503, detail="chatcsvandpdf service is starting up. Please try again in a moment.")
659
 
660
  try:
661
  # Handle streaming requests
 
680
  timestamp=datetime.now().isoformat()
681
  )
682
  else:
683
+ # Return user-friendly error message
684
+ error_detail = result["error"]
685
+ if "suggestion" in result:
686
+ error_detail += f" {result['suggestion']}"
687
+
688
+ # Determine appropriate HTTP status code
689
+ status_code = 400 if "not available" in result["error"] else 503
690
+
691
+ raise HTTPException(status_code=status_code, detail=error_detail)
692
 
693
  except HTTPException:
694
  raise
695
  except Exception as e:
696
  logger.error(f"Unexpected error in chat_completion: {str(e)}")
697
+ raise HTTPException(
698
+ status_code=503,
699
+ detail="chatcsvandpdf service encountered an unexpected issue. Please try again."
700
+ )
701
 
702
  @app.get("/api/stats", response_model=Dict)
703
  async def get_api_stats():
 
733
  @app.get("/health")
734
  async def health_check():
735
  """Health check endpoint"""
736
+ if not client or not key_manager or not model_validator:
737
  return JSONResponse(
738
  status_code=503,
739
  content={
 
741
  "message": "Service not initialized",
742
  "timestamp": datetime.now().isoformat()
743
  }
744
+ )