Soumik555 commited on
Commit
b52ac3f
·
0 Parent(s):

chat-api-added requirements.txt

Browse files
Dockerfile ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.10-slim-bullseye
2
+
3
+ # Set working directory
4
+ WORKDIR /app
5
+
6
+ # Install system dependencies first (including curl)
7
+ RUN apt-get update && apt-get install -y \
8
+ gcc \
9
+ g++ \
10
+ curl \
11
+ && rm -rf /var/lib/apt/lists/*
12
+
13
+ # Copy requirements file first to leverage Docker cache
14
+ COPY requirements.txt .
15
+
16
+ # Install Python dependencies
17
+ RUN pip install --no-cache-dir -r requirements.txt
18
+
19
+ # Copy application code
20
+ COPY . .
21
+
22
+ # Set environment variables
23
+ ENV HF_HOME=/tmp/cache
24
+ ENV PORT=7860
25
+
26
+ # Create cache directory (if still needed)
27
+ RUN mkdir -p ${HF_HOME} && chmod 777 ${HF_HOME}
28
+
29
+ # Expose port
30
+ EXPOSE $PORT
31
+
32
+ # Command to run the FastAPI app
33
+ CMD bash -c "while true; do curl -s https://xce009-inference-test.hf.space/ping >/dev/null && sleep 300 || sleep 300; done & uvicorn chat_api:app --host 0.0.0.0 --port $PORT"
README.md ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: My Inference Space
3
+ emoji: 🚀
4
+ colorFrom: blue
5
+ colorTo: purple
6
+ sdk: docker
7
+ app_file: main.py
8
+ pinned: false
9
+ ---
__pycache__/main.cpython-311.pyc ADDED
Binary file (18.8 kB). View file
 
__pycache__/model_service_api.cpython-311.pyc ADDED
Binary file (18.8 kB). View file
 
__pycache__/openrouter_chat_api.cpython-311.pyc ADDED
Binary file (30.8 kB). View file
 
chat_api.py ADDED
@@ -0,0 +1,540 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import asyncio
2
+ import aiohttp
3
+ import logging
4
+ import os
5
+ import random
6
+ from typing import List, Dict, Optional, Any
7
+ from fastapi import FastAPI, HTTPException, BackgroundTasks
8
+ from fastapi.responses import StreamingResponse, JSONResponse
9
+ from pydantic import BaseModel, Field
10
+ import uvicorn
11
+ from datetime import datetime
12
+ import json
13
+ import time
14
+ from collections import defaultdict, deque
15
+ import threading
16
+ from contextlib import asynccontextmanager
17
+ from dotenv import load_dotenv
18
+
19
+ # Load environment variables from .env file
20
+ load_dotenv()
21
+
22
+ # Setup logging
23
+ logging.basicConfig(
24
+ level=logging.INFO,
25
+ format="%(asctime)s [%(levelname)s] %(name)s - %(message)s",
26
+ datefmt="%Y-%m-%d %H:%M:%S",
27
+ )
28
+ logger = logging.getLogger(__name__)
29
+
30
+ # Pydantic models
31
+ class Message(BaseModel):
32
+ role: str = Field(..., description="Role: 'system', 'user', or 'assistant'")
33
+ content: str = Field(..., description="Message content")
34
+
35
+ class ProviderPreferences(BaseModel):
36
+ sort: Optional[str] = Field(None, description="Sort by 'price', 'throughput', or 'latency'")
37
+ order: Optional[List[str]] = Field(None, description="Specific provider order")
38
+ allow_fallbacks: Optional[bool] = Field(True, description="Allow fallback providers")
39
+ require_parameters: Optional[bool] = Field(False, description="Require all parameters support")
40
+ data_collection: Optional[str] = Field("allow", description="'allow' or 'deny' data collection")
41
+ only: Optional[List[str]] = Field(None, description="Only use these providers")
42
+ ignore: Optional[List[str]] = Field(None, description="Ignore these providers")
43
+ quantizations: Optional[List[str]] = Field(None, description="Required quantization levels")
44
+ max_price: Optional[Dict[str, float]] = Field(None, description="Maximum pricing constraints")
45
+
46
+ class ChatRequest(BaseModel):
47
+ model: str = Field(..., description="Model ID (e.g., 'openai/gpt-3.5-turbo')")
48
+ messages: List[Message] = Field(..., description="List of messages")
49
+ system_prompt: Optional[str] = Field(None, description="System prompt (will be added as system message)")
50
+ max_tokens: Optional[int] = Field(1000, description="Maximum tokens to generate")
51
+ temperature: Optional[float] = Field(0.7, description="Temperature (0-2)")
52
+ top_p: Optional[float] = Field(1.0, description="Top-p sampling")
53
+ frequency_penalty: Optional[float] = Field(0.0, description="Frequency penalty")
54
+ presence_penalty: Optional[float] = Field(0.0, description="Presence penalty")
55
+ stream: Optional[bool] = Field(False, description="Enable streaming response")
56
+ provider: Optional[ProviderPreferences] = Field(None, description="Provider routing preferences")
57
+
58
+ class Config:
59
+ json_schema_extra = {
60
+ "example": {
61
+ "model": "openai/gpt-3.5-turbo",
62
+ "messages": [
63
+ {"role": "user", "content": "Hello, how are you?"}
64
+ ],
65
+ "system_prompt": "You are a helpful assistant.",
66
+ "max_tokens": 1000,
67
+ "temperature": 0.7,
68
+ "stream": False
69
+ }
70
+ }
71
+
72
+ class ChatResponse(BaseModel):
73
+ success: bool
74
+ model: str
75
+ choices: List[Dict[str, Any]]
76
+ usage: Optional[Dict[str, Any]]
77
+ response_time: float
78
+ provider_used: Optional[str] = None
79
+ timestamp: str
80
+
81
+ class APIKeyManager:
82
+ """Manages multiple API keys with rotation and rate limiting"""
83
+
84
+ def __init__(self, api_keys: List[str]):
85
+ if not api_keys:
86
+ raise ValueError("At least one API key is required")
87
+
88
+ self.api_keys = api_keys
89
+ self.key_stats = {key: {"requests": 0, "errors": 0, "last_used": 0} for key in api_keys}
90
+ self.current_index = 0
91
+ self.lock = threading.Lock()
92
+
93
+ # Rate limiting per key (rough estimate)
94
+ self.rate_limits = {key: deque() for key in api_keys}
95
+ self.max_requests_per_minute = 60 # Conservative estimate
96
+
97
+ logger.info(f"Initialized API key manager with {len(api_keys)} keys")
98
+
99
+ def get_next_key(self) -> str:
100
+ """Get the next available API key using round-robin with rate limiting"""
101
+ with self.lock:
102
+ current_time = time.time()
103
+
104
+ # Try to find a key that's not rate limited
105
+ for _ in range(len(self.api_keys)):
106
+ key = self.api_keys[self.current_index]
107
+
108
+ # Clean old requests from rate limit tracker
109
+ while (self.rate_limits[key] and
110
+ current_time - self.rate_limits[key][0] > 60):
111
+ self.rate_limits[key].popleft()
112
+
113
+ # Check if this key can handle more requests
114
+ if len(self.rate_limits[key]) < self.max_requests_per_minute:
115
+ self.rate_limits[key].append(current_time)
116
+ self.key_stats[key]["requests"] += 1
117
+ self.key_stats[key]["last_used"] = current_time
118
+
119
+ # Move to next key for next request
120
+ self.current_index = (self.current_index + 1) % len(self.api_keys)
121
+ return key
122
+
123
+ # Try next key
124
+ self.current_index = (self.current_index + 1) % len(self.api_keys)
125
+
126
+ # If all keys are rate limited, use the one with the oldest request
127
+ oldest_key = min(self.api_keys,
128
+ key=lambda k: self.key_stats[k]["last_used"])
129
+ self.key_stats[oldest_key]["requests"] += 1
130
+ self.key_stats[oldest_key]["last_used"] = current_time
131
+ return oldest_key
132
+
133
+ def record_error(self, api_key: str):
134
+ """Record an error for an API key"""
135
+ with self.lock:
136
+ if api_key in self.key_stats:
137
+ self.key_stats[api_key]["errors"] += 1
138
+
139
+ def get_stats(self) -> Dict:
140
+ """Get statistics for all API keys"""
141
+ with self.lock:
142
+ return dict(self.key_stats)
143
+
144
+ class OpenRouterClient:
145
+ """High-performance OpenRouter client with connection pooling"""
146
+
147
+ def __init__(self, key_manager: APIKeyManager):
148
+ self.key_manager = key_manager
149
+ self.base_url = "https://openrouter.ai/api/v1"
150
+ self.session_pool = {}
151
+ self.max_connections = 100 # Total connection pool
152
+ self.max_connections_per_host = 20
153
+
154
+ async def get_session(self, api_key: str) -> aiohttp.ClientSession:
155
+ """Get or create a session for the API key"""
156
+ if api_key not in self.session_pool:
157
+ connector = aiohttp.TCPConnector(
158
+ limit=self.max_connections,
159
+ limit_per_host=self.max_connections_per_host,
160
+ keepalive_timeout=30,
161
+ enable_cleanup_closed=True,
162
+ ttl_dns_cache=300, # DNS cache TTL
163
+ use_dns_cache=True
164
+ )
165
+
166
+ timeout = aiohttp.ClientTimeout(
167
+ total=60, # Total timeout
168
+ connect=10, # Connection timeout
169
+ sock_read=30 # Socket read timeout
170
+ )
171
+
172
+ headers = {
173
+ "Authorization": f"Bearer {api_key}",
174
+ "Content-Type": "application/json",
175
+ "HTTP-Referer": "https://your-app.com", # Optional: for analytics
176
+ "X-Title": "High-Performance Chat API" # Optional: for analytics
177
+ }
178
+
179
+ self.session_pool[api_key] = aiohttp.ClientSession(
180
+ connector=connector,
181
+ timeout=timeout,
182
+ headers=headers,
183
+ raise_for_status=False
184
+ )
185
+
186
+ return self.session_pool[api_key]
187
+
188
+ async def chat_completion(self, request: ChatRequest) -> Dict[str, Any]:
189
+ """Send chat completion request with automatic key rotation"""
190
+ start_time = time.time()
191
+ api_key = self.key_manager.get_next_key()
192
+
193
+ try:
194
+ session = await self.get_session(api_key)
195
+
196
+ # Prepare messages
197
+ messages = []
198
+
199
+ # Add system prompt if provided
200
+ if request.system_prompt:
201
+ messages.append({"role": "system", "content": request.system_prompt})
202
+
203
+ # Add user messages
204
+ messages.extend([msg.dict() for msg in request.messages])
205
+
206
+ # Prepare request payload
207
+ payload = {
208
+ "model": request.model,
209
+ "messages": messages,
210
+ "max_tokens": request.max_tokens,
211
+ "temperature": request.temperature,
212
+ "top_p": request.top_p,
213
+ "frequency_penalty": request.frequency_penalty,
214
+ "presence_penalty": request.presence_penalty,
215
+ "stream": request.stream
216
+ }
217
+
218
+ # Add provider preferences if specified
219
+ if request.provider:
220
+ provider_dict = request.provider.dict(exclude_none=True)
221
+ if provider_dict:
222
+ payload["provider"] = provider_dict
223
+
224
+ logger.debug(f"Sending request to {request.model} with key ending in ...{api_key[-4:]}")
225
+
226
+ async with session.post(f"{self.base_url}/chat/completions", json=payload) as response:
227
+ response_time = time.time() - start_time
228
+
229
+ if response.status == 200:
230
+ result = await response.json()
231
+
232
+ # Extract provider information if available
233
+ provider_used = None
234
+ if "model" in result and "/" in result["model"]:
235
+ # Sometimes the response model includes provider info
236
+ provider_used = result["model"].split("/")[0]
237
+
238
+ return {
239
+ "success": True,
240
+ "data": result,
241
+ "response_time": response_time,
242
+ "provider_used": provider_used,
243
+ "api_key_used": api_key[-4:] # Last 4 characters for debugging
244
+ }
245
+ else:
246
+ error_data = await response.text()
247
+ logger.error(f"OpenRouter API error {response.status}: {error_data}")
248
+
249
+ # Record error for this key
250
+ self.key_manager.record_error(api_key)
251
+
252
+ return {
253
+ "success": False,
254
+ "error": f"API error {response.status}: {error_data}",
255
+ "response_time": response_time
256
+ }
257
+
258
+ except Exception as e:
259
+ response_time = time.time() - start_time
260
+ logger.error(f"Request failed with key ...{api_key[-4:]}: {str(e)}")
261
+
262
+ # Record error for this key
263
+ self.key_manager.record_error(api_key)
264
+
265
+ return {
266
+ "success": False,
267
+ "error": str(e),
268
+ "response_time": response_time
269
+ }
270
+
271
+ async def stream_chat_completion(self, request: ChatRequest):
272
+ """Stream chat completion response"""
273
+ api_key = self.key_manager.get_next_key()
274
+
275
+ try:
276
+ session = await self.get_session(api_key)
277
+
278
+ # Prepare messages
279
+ messages = []
280
+ if request.system_prompt:
281
+ messages.append({"role": "system", "content": request.system_prompt})
282
+ messages.extend([msg.dict() for msg in request.messages])
283
+
284
+ # Prepare request payload
285
+ payload = {
286
+ "model": request.model,
287
+ "messages": messages,
288
+ "max_tokens": request.max_tokens,
289
+ "temperature": request.temperature,
290
+ "top_p": request.top_p,
291
+ "frequency_penalty": request.frequency_penalty,
292
+ "presence_penalty": request.presence_penalty,
293
+ "stream": True
294
+ }
295
+
296
+ if request.provider:
297
+ provider_dict = request.provider.dict(exclude_none=True)
298
+ if provider_dict:
299
+ payload["provider"] = provider_dict
300
+
301
+ async with session.post(f"{self.base_url}/chat/completions", json=payload) as response:
302
+ if response.status == 200:
303
+ async for chunk in response.content.iter_chunked(1024):
304
+ if chunk:
305
+ yield chunk
306
+ else:
307
+ error_data = await response.text()
308
+ self.key_manager.record_error(api_key)
309
+ yield f"data: {json.dumps({'error': f'API error {response.status}: {error_data}'})}\n\n".encode()
310
+
311
+ except Exception as e:
312
+ logger.error(f"Streaming failed with key ...{api_key[-4:]}: {str(e)}")
313
+ self.key_manager.record_error(api_key)
314
+ yield f"data: {json.dumps({'error': str(e)})}\n\n".encode()
315
+
316
+ async def close_all_sessions(self):
317
+ """Close all aiohttp sessions"""
318
+ for session in self.session_pool.values():
319
+ await session.close()
320
+ self.session_pool.clear()
321
+
322
+ # Global variables
323
+ client: Optional[OpenRouterClient] = None
324
+ key_manager: Optional[APIKeyManager] = None
325
+
326
+ @asynccontextmanager
327
+ async def lifespan(app: FastAPI):
328
+ """Startup and shutdown events"""
329
+ global client, key_manager
330
+
331
+ # Startup
332
+ logger.info("Starting OpenRouter Chat API...")
333
+
334
+ # Load API keys from environment
335
+ api_keys_str = os.getenv("OPENROUTER_API_KEYS", "")
336
+ if not api_keys_str:
337
+ raise ValueError("OPENROUTER_API_KEYS environment variable is required")
338
+
339
+ api_keys = [key.strip() for key in api_keys_str.split(",") if key.strip()]
340
+ if not api_keys:
341
+ raise ValueError("No valid API keys found in OPENROUTER_API_KEYS")
342
+
343
+ # Initialize key manager and client
344
+ key_manager = APIKeyManager(api_keys)
345
+ client = OpenRouterClient(key_manager)
346
+
347
+ logger.info(f"✅ API initialized with {len(api_keys)} keys")
348
+
349
+ yield
350
+
351
+ # Shutdown
352
+ logger.info("Shutting down...")
353
+ if client:
354
+ await client.close_all_sessions()
355
+
356
+ # Create FastAPI app
357
+ app = FastAPI(
358
+ title="High-Performance OpenRouter Chat API",
359
+ description="Scalable chat completions API with multiple key rotation and parallel processing",
360
+ version="1.0.0",
361
+ lifespan=lifespan
362
+ )
363
+
364
+ @app.get("/", response_model=Dict)
365
+ async def root():
366
+ """Root endpoint with API information"""
367
+ return {
368
+ "message": "High-Performance OpenRouter Chat API",
369
+ "version": "1.0.0",
370
+ "endpoints": {
371
+ "chat": "/api/chat",
372
+ "chat_stream": "/api/chat (with stream=true)",
373
+ "stats": "/api/stats",
374
+ "health": "/health"
375
+ },
376
+ "features": [
377
+ "Multiple API key rotation",
378
+ "Connection pooling",
379
+ "Parallel processing",
380
+ "Provider routing",
381
+ "Streaming support",
382
+ "Rate limiting"
383
+ ]
384
+ }
385
+
386
+ @app.post("/api/chat", response_model=ChatResponse)
387
+ async def chat_completion(request: ChatRequest):
388
+ """Send chat completion request"""
389
+ if not client:
390
+ raise HTTPException(status_code=503, detail="Service not initialized")
391
+
392
+ try:
393
+ # Handle streaming requests
394
+ if request.stream:
395
+ return StreamingResponse(
396
+ client.stream_chat_completion(request),
397
+ media_type="text/plain",
398
+ headers={"Cache-Control": "no-cache", "Connection": "keep-alive"}
399
+ )
400
+
401
+ # Handle regular requests
402
+ result = await client.chat_completion(request)
403
+
404
+ if result["success"]:
405
+ return ChatResponse(
406
+ success=True,
407
+ model=request.model,
408
+ choices=result["data"].get("choices", []),
409
+ usage=result["data"].get("usage"),
410
+ response_time=result["response_time"],
411
+ provider_used=result.get("provider_used"),
412
+ timestamp=datetime.now().isoformat()
413
+ )
414
+ else:
415
+ raise HTTPException(
416
+ status_code=500,
417
+ detail=f"Chat completion failed: {result['error']}"
418
+ )
419
+
420
+ except HTTPException:
421
+ raise
422
+ except Exception as e:
423
+ logger.error(f"Unexpected error in chat_completion: {str(e)}")
424
+ raise HTTPException(status_code=500, detail=str(e))
425
+
426
+ @app.get("/api/stats", response_model=Dict)
427
+ async def get_api_stats():
428
+ """Get API key usage statistics"""
429
+ if not key_manager:
430
+ raise HTTPException(status_code=503, detail="Service not initialized")
431
+
432
+ stats = key_manager.get_stats()
433
+
434
+ # Calculate summary statistics
435
+ total_requests = sum(stat["requests"] for stat in stats.values())
436
+ total_errors = sum(stat["errors"] for stat in stats.values())
437
+ error_rate = (total_errors / total_requests * 100) if total_requests > 0 else 0
438
+
439
+ return {
440
+ "summary": {
441
+ "total_keys": len(stats),
442
+ "total_requests": total_requests,
443
+ "total_errors": total_errors,
444
+ "error_rate_percent": round(error_rate, 2)
445
+ },
446
+ "key_stats": {
447
+ f"key_...{key[-4:]}": {
448
+ "requests": stat["requests"],
449
+ "errors": stat["errors"],
450
+ "error_rate": round((stat["errors"] / stat["requests"] * 100) if stat["requests"] > 0 else 0, 2),
451
+ "last_used": datetime.fromtimestamp(stat["last_used"]).isoformat() if stat["last_used"] > 0 else "Never"
452
+ }
453
+ for key, stat in stats.items()
454
+ }
455
+ }
456
+
457
+ @app.get("/health")
458
+ async def health_check():
459
+ """Health check endpoint"""
460
+ if not client or not key_manager:
461
+ return JSONResponse(
462
+ status_code=503,
463
+ content={
464
+ "status": "unhealthy",
465
+ "message": "Service not initialized",
466
+ "timestamp": datetime.now().isoformat()
467
+ }
468
+ )
469
+
470
+ try:
471
+ stats = key_manager.get_stats()
472
+ return {
473
+ "status": "healthy",
474
+ "api_keys_loaded": len(stats),
475
+ "total_requests": sum(stat["requests"] for stat in stats.values()),
476
+ "timestamp": datetime.now().isoformat()
477
+ }
478
+
479
+ except Exception as e:
480
+ return JSONResponse(
481
+ status_code=503,
482
+ content={
483
+ "status": "unhealthy",
484
+ "error": str(e),
485
+ "timestamp": datetime.now().isoformat()
486
+ }
487
+ )
488
+
489
+ # Batch processing endpoint for high throughput
490
+ @app.post("/api/chat/batch")
491
+ async def batch_chat_completions(requests: List[ChatRequest]):
492
+ """Process multiple chat requests in parallel"""
493
+ if not client:
494
+ raise HTTPException(status_code=503, detail="Service not initialized")
495
+
496
+ if len(requests) > 50: # Limit batch size
497
+ raise HTTPException(status_code=400, detail="Batch size limited to 50 requests")
498
+
499
+ try:
500
+ # Process all requests in parallel
501
+ tasks = [client.chat_completion(req) for req in requests]
502
+ results = await asyncio.gather(*tasks, return_exceptions=True)
503
+
504
+ # Format results
505
+ responses = []
506
+ for i, (request, result) in enumerate(zip(requests, results)):
507
+ if isinstance(result, Exception):
508
+ responses.append({
509
+ "request_index": i,
510
+ "success": False,
511
+ "error": str(result)
512
+ })
513
+ elif result["success"]:
514
+ responses.append({
515
+ "request_index": i,
516
+ "success": True,
517
+ "model": request.model,
518
+ "choices": result["data"].get("choices", []),
519
+ "usage": result["data"].get("usage"),
520
+ "response_time": result["response_time"],
521
+ "provider_used": result.get("provider_used")
522
+ })
523
+ else:
524
+ responses.append({
525
+ "request_index": i,
526
+ "success": False,
527
+ "error": result["error"]
528
+ })
529
+
530
+ return {
531
+ "success": True,
532
+ "batch_size": len(requests),
533
+ "results": responses,
534
+ "timestamp": datetime.now().isoformat()
535
+ }
536
+
537
+ except Exception as e:
538
+ logger.error(f"Batch processing failed: {str(e)}")
539
+ raise HTTPException(status_code=500, detail=str(e))
540
+
requirements.txt ADDED
File without changes