Amaranath commited on
Commit
6f8086c
Β·
verified Β·
1 Parent(s): d938533

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +452 -0
app.py ADDED
@@ -0,0 +1,452 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI, HTTPException, Depends, Request, Response
2
+ from fastapi.responses import StreamingResponse, JSONResponse
3
+ from fastapi.middleware.cors import CORSMiddleware
4
+ from contextlib import asynccontextmanager
5
+ import httpx
6
+ import json
7
+ import asyncio
8
+ import secrets
9
+ from typing import Dict, Any, Optional
10
+ from datetime import datetime, timedelta
11
+ from auth import verify_token, verify_admin_token
12
+ from config import settings
13
+ import logging
14
+ from pydantic import BaseModel, Field
15
+ from typing import List, Dict, Any, Optional
16
+
17
+ # Configure logging
18
+ logging.basicConfig(level=logging.INFO)
19
+ logger = logging.getLogger(__name__)
20
+
21
+
22
+
23
+ # Configure logging
24
+ logging.basicConfig(level=logging.INFO)
25
+ logger = logging.getLogger(__name__)
26
+
27
+
28
+ @asynccontextmanager
29
+ async def lifespan(app: FastAPI):
30
+ # Startup
31
+ logger.info("πŸš€ Starting Authenticated Ollama API with Admin Panel...")
32
+
33
+ # Wait for Ollama to be ready
34
+ max_retries = 30
35
+ for i in range(max_retries):
36
+ try:
37
+ async with httpx.AsyncClient(timeout=5.0) as client:
38
+ response = await client.get(f"http://{settings.ollama_host}/api/tags")
39
+ if response.status_code == 200:
40
+ logger.info("βœ… Ollama is ready!")
41
+ break
42
+ except Exception as e:
43
+ logger.info(f"⏳ Waiting for Ollama... ({i+1}/{max_retries})")
44
+ await asyncio.sleep(2)
45
+ else:
46
+ logger.warning("⚠️ Ollama not responding, but continuing...")
47
+
48
+ yield
49
+ # Shutdown (if needed)
50
+ logger.info("πŸ›‘ Shutting down...")
51
+
52
+ app = FastAPI(
53
+ title="Authenticated Ollama API with Admin Panel",
54
+ description="Secure Ollama API with Bearer Token Authentication and Admin Key Management",
55
+ version="2.0.0",
56
+ docs_url="/docs",
57
+ redoc_url="/redoc",
58
+ lifespan=lifespan
59
+ )
60
+ # Add CORS middleware
61
+ app.add_middleware(
62
+ CORSMiddleware,
63
+ allow_origins=["*"],
64
+ allow_credentials=True,
65
+ allow_methods=["*"],
66
+ allow_headers=["*"],
67
+ )
68
+
69
+ # =============================================================================
70
+ # PYDANTIC MODELS FOR REQUEST BODIES
71
+ # =============================================================================
72
+
73
+ class GenerateRequest(BaseModel):
74
+ model: str = Field(..., description="Model name to use", example="hf.co/bartowski/Llama-3.2-1B-Instruct-GGUF:Q4_K_M")
75
+ prompt: str = Field(..., description="Text prompt to generate from", example="Explain quantum computing in simple terms")
76
+ stream: bool = Field(False, description="Whether to stream the response")
77
+ options: Optional[Dict[str, Any]] = Field(None, description="Additional model options")
78
+ template: Optional[str] = Field(None, description="Prompt template to use")
79
+ context: Optional[List[int]] = Field(None, description="Context from previous conversation")
80
+ raw: Optional[bool] = Field(None, description="Return raw response without formatting")
81
+
82
+ class ChatMessage(BaseModel):
83
+ role: str = Field(..., description="Role of the message", example="user")
84
+ content: str = Field(..., description="Content of the message", example="Hello, how are you?")
85
+
86
+ class ChatRequest(BaseModel):
87
+ model: str = Field(..., description="Model name to use", example="hf.co/bartowski/Llama-3.2-1B-Instruct-GGUF:Q4_K_M")
88
+ messages: List[ChatMessage] = Field(..., description="Array of chat messages")
89
+ stream: bool = Field(False, description="Whether to stream the response")
90
+ options: Optional[Dict[str, Any]] = Field(None, description="Additional model options")
91
+
92
+ class PullRequest(BaseModel):
93
+ name: str = Field(..., description="Model name to pull", example="hf.co/bartowski/Llama-3.2-1B-Instruct-GGUF:Q4_K_M")
94
+ insecure: Optional[bool] = Field(False, description="Allow insecure connections")
95
+ stream: Optional[bool] = Field(True, description="Stream pull progress")
96
+
97
+ class DeleteRequest(BaseModel):
98
+ name: str = Field(..., description="Model name to delete", example="hf.co/bartowski/Llama-3.2-1B-Instruct-GGUF:Q4_K_M")
99
+
100
+
101
+ # =============================================================================
102
+ # RESPONSE MODELS
103
+ # =============================================================================
104
+
105
+ class GenerateResponse(BaseModel):
106
+ model: str
107
+ created_at: str
108
+ response: str
109
+ done: bool
110
+ context: Optional[List[int]] = None
111
+ total_duration: Optional[int] = None
112
+ load_duration: Optional[int] = None
113
+ prompt_eval_count: Optional[int] = None
114
+ prompt_eval_duration: Optional[int] = None
115
+ eval_count: Optional[int] = None
116
+ eval_duration: Optional[int] = None
117
+
118
+ class ChatResponse(BaseModel):
119
+ model: str
120
+ created_at: str
121
+ message: ChatMessage
122
+ done: bool
123
+ total_duration: Optional[int] = None
124
+ load_duration: Optional[int] = None
125
+ prompt_eval_count: Optional[int] = None
126
+ prompt_eval_duration: Optional[int] = None
127
+ eval_count: Optional[int] = None
128
+ eval_duration: Optional[int] = None
129
+
130
+
131
+ @app.get("/")
132
+ async def root():
133
+ """Root endpoint with API information"""
134
+ return {
135
+ "message": "Authenticated Ollama API Server with Admin Panel",
136
+ "status": "running",
137
+ "version": "2.0.0",
138
+ "features": [
139
+ "Bearer Token Authentication",
140
+ "Admin Key Management",
141
+ "Llama 3.2 1B Model",
142
+ "Complete Ollama API Wrapper"
143
+ ],
144
+ "endpoints": {
145
+ "docs": "/docs",
146
+ "health": "/health",
147
+ "api": "/api/*",
148
+ "admin": "/admin/*"
149
+ },
150
+ "authentication": {
151
+ "api_operations": "Bearer Token (API Key) required",
152
+ "admin_operations": "Bearer Token (Admin Key) required"
153
+ }
154
+ }
155
+
156
+ @app.get("/health")
157
+ async def health_check():
158
+ """Public health check endpoint (no authentication required)"""
159
+ try:
160
+ async with httpx.AsyncClient(timeout=5.0) as client:
161
+ response = await client.get(f"http://{settings.ollama_host}/api/tags")
162
+ ollama_status = "healthy" if response.status_code == 200 else "unhealthy"
163
+ except Exception:
164
+ ollama_status = "unreachable"
165
+
166
+ return {
167
+ "status": "healthy",
168
+ "timestamp": datetime.now().isoformat(),
169
+ "services": {
170
+ "fastapi": "healthy",
171
+ "ollama": ollama_status,
172
+ "authentication": "active"
173
+ }
174
+ }
175
+
176
+ # =============================================================================
177
+ # REGULAR API ENDPOINTS (Require API Key)
178
+ # =============================================================================
179
+
180
+ @app.get("/api/tags")
181
+ async def list_models(token: str = Depends(verify_token)):
182
+ """List all available models"""
183
+ try:
184
+ async with httpx.AsyncClient(timeout=10.0) as client:
185
+ response = await client.get(f"http://{settings.ollama_host}/api/tags")
186
+ return response.json()
187
+ except httpx.RequestError as e:
188
+ raise HTTPException(status_code=503, detail=f"Ollama service unavailable: {str(e)}")
189
+
190
+ @app.post("/api/generate", response_model=GenerateResponse)
191
+ async def generate_text(request: GenerateRequest, token: str = Depends(verify_token)):
192
+ """Generate text using Ollama models"""
193
+ try:
194
+ # Convert Pydantic model to dict
195
+ body = request.dict(exclude_unset=True)
196
+
197
+ async with httpx.AsyncClient(timeout=300.0) as client:
198
+ response = await client.post(
199
+ f"http://{settings.ollama_host}/api/generate",
200
+ json=body
201
+ )
202
+
203
+ if response.status_code != 200:
204
+ raise HTTPException(status_code=response.status_code, detail=response.text)
205
+
206
+ return response.json()
207
+
208
+ except httpx.RequestError as e:
209
+ raise HTTPException(status_code=503, detail=f"Ollama service unavailable: {str(e)}")
210
+
211
+ @app.post("/api/chat", response_model=ChatResponse)
212
+ async def chat(request: ChatRequest, token: str = Depends(verify_token)):
213
+ """Chat with Ollama models"""
214
+ try:
215
+ # Convert Pydantic model to dict
216
+ body = request.dict(exclude_unset=True)
217
+
218
+ async with httpx.AsyncClient(timeout=300.0) as client:
219
+ response = await client.post(
220
+ f"http://{settings.ollama_host}/api/chat",
221
+ json=body
222
+ )
223
+
224
+ if response.status_code != 200:
225
+ raise HTTPException(status_code=response.status_code, detail=response.text)
226
+
227
+ return response.json()
228
+
229
+ except httpx.RequestError as e:
230
+ raise HTTPException(status_code=503, detail=f"Ollama service unavailable: {str(e)}")
231
+
232
+ @app.post("/api/pull")
233
+ async def pull_model(request: PullRequest, token: str = Depends(verify_token)):
234
+ """Pull a model from Ollama library"""
235
+ try:
236
+ # Convert Pydantic model to dict
237
+ body = request.dict(exclude_unset=True)
238
+
239
+ async with httpx.AsyncClient(timeout=600.0) as client:
240
+ response = await client.post(
241
+ f"http://{settings.ollama_host}/api/pull",
242
+ json=body
243
+ )
244
+
245
+ if response.status_code != 200:
246
+ raise HTTPException(status_code=response.status_code, detail=response.text)
247
+
248
+ return response.json()
249
+
250
+ except httpx.RequestError as e:
251
+ raise HTTPException(status_code=503, detail=f"Ollama service unavailable: {str(e)}")
252
+
253
+ @app.delete("/api/delete")
254
+ async def delete_model(request: DeleteRequest, token: str = Depends(verify_token)):
255
+ """Delete a model"""
256
+ try:
257
+ # Convert Pydantic model to dict
258
+ body = request.dict(exclude_unset=True)
259
+
260
+ async with httpx.AsyncClient(timeout=30.0) as client:
261
+ response = await client.delete(
262
+ f"http://{settings.ollama_host}/api/delete",
263
+ json=body
264
+ )
265
+
266
+ if response.status_code != 200:
267
+ raise HTTPException(status_code=response.status_code, detail=response.text)
268
+
269
+ return {"message": f"Model {request.name} deleted successfully"}
270
+
271
+ except httpx.RequestError as e:
272
+ raise HTTPException(status_code=503, detail=f"Ollama service unavailable: {str(e)}")
273
+
274
+ @app.get("/auth/test")
275
+ async def test_auth(token: str = Depends(verify_token)):
276
+ """Test authentication endpoint"""
277
+ return {
278
+ "message": "πŸŽ‰ Authentication successful!",
279
+ "token_valid": True,
280
+ "token_type": "api_key",
281
+ "timestamp": datetime.now().isoformat(),
282
+ "access_level": "user"
283
+ }
284
+
285
+ # =============================================================================
286
+ # ADMIN ENDPOINTS (Require Admin Key)
287
+ # =============================================================================
288
+
289
+ @app.get("/admin/info")
290
+ async def admin_info(admin_token: str = Depends(verify_admin_token)):
291
+ """Get admin panel information"""
292
+ return {
293
+ "message": "πŸ”§ Admin access granted",
294
+ "admin_endpoints": [
295
+ "GET /admin/api-key - Retrieve current API key",
296
+ "GET /admin/key-info - Get API key information",
297
+ "POST /admin/rotate-key - Generate new API key",
298
+ "GET /admin/logs - View recent access logs",
299
+ "GET /admin/stats - Get usage statistics"
300
+ ],
301
+ "timestamp": datetime.now().isoformat(),
302
+ "access_level": "administrator"
303
+ }
304
+
305
+ @app.get("/admin/api-key")
306
+ async def get_api_key(admin_token: str = Depends(verify_admin_token)):
307
+ """
308
+ πŸ”‘ Retrieve the current API key (Admin only)
309
+ ⚠️ This endpoint requires admin authentication
310
+ """
311
+ logger.warning(f"πŸ”‘ API KEY RETRIEVED via admin endpoint at {datetime.now().isoformat()}")
312
+
313
+ return {
314
+ "api_key": settings.api_key,
315
+ "message": "Current API key retrieved successfully",
316
+ "warning": "🚨 Keep this key secure and do not share it",
317
+ "expires": "Never (until manually rotated)",
318
+ "length": len(settings.api_key),
319
+ "retrieved_at": datetime.now().isoformat()
320
+ }
321
+
322
+ @app.get("/admin/key-info")
323
+ async def get_key_info(admin_token: str = Depends(verify_admin_token)):
324
+ """
325
+ Get API key information without exposing the actual key
326
+ """
327
+ key_prefix = settings.api_key[:8] + "..." + settings.api_key[-4:]
328
+
329
+ return {
330
+ "key_preview": key_prefix,
331
+ "key_length": len(settings.api_key),
332
+ "status": "active",
333
+ "created": "At server startup",
334
+ "type": "url_safe_base64",
335
+ "admin_access": "enabled",
336
+ "last_checked": datetime.now().isoformat()
337
+ }
338
+
339
+ @app.post("/admin/rotate-key")
340
+ async def rotate_api_key(admin_token: str = Depends(verify_admin_token)):
341
+ """
342
+ πŸ”„ Generate a new API key (Admin only)
343
+ ⚠️ This will invalidate all existing API keys immediately
344
+ """
345
+ old_key_prefix = settings.api_key[:8] + "..." + settings.api_key[-4:]
346
+ new_key = secrets.token_urlsafe(32)
347
+
348
+ # Update the key
349
+ old_key = settings.api_key
350
+ settings.api_key = new_key
351
+
352
+ # Log the rotation with security info
353
+ logger.warning(f"πŸ”„ API KEY ROTATED: {datetime.now().isoformat()}")
354
+ logger.warning(f" Old key: {old_key_prefix}")
355
+ logger.warning(f" New key: {new_key[:8]}...{new_key[-4:]}")
356
+
357
+ return {
358
+ "message": "βœ… API key rotated successfully",
359
+ "old_key_preview": old_key_prefix,
360
+ "new_api_key": new_key,
361
+ "rotated_at": datetime.now().isoformat(),
362
+ "warning": "🚨 Update all clients with the new key immediately",
363
+ "action_required": "All existing API tokens are now invalid"
364
+ }
365
+
366
+ @app.get("/admin/logs")
367
+ async def get_admin_logs(admin_token: str = Depends(verify_admin_token)):
368
+ """
369
+ πŸ“‹ Get recent server logs (Admin only)
370
+ """
371
+ # In a real implementation, you'd read actual log files
372
+ # This is a simplified example showing what would be logged
373
+ sample_logs = [
374
+ {
375
+ "timestamp": datetime.now().isoformat(),
376
+ "level": "INFO",
377
+ "message": "Admin logs accessed"
378
+ },
379
+ {
380
+ "timestamp": (datetime.now() - timedelta(minutes=5)).isoformat(),
381
+ "level": "INFO",
382
+ "message": "API key authentication successful"
383
+ },
384
+ {
385
+ "timestamp": (datetime.now() - timedelta(minutes=10)).isoformat(),
386
+ "level": "WARNING",
387
+ "message": "Failed authentication attempt detected"
388
+ }
389
+ ]
390
+
391
+ startup_info = {
392
+ "server_start": "Server started successfully",
393
+ "api_key": f"API Key: {settings.api_key}",
394
+ "admin_key": f"Admin Key: {settings.admin_key}",
395
+ "ollama_status": "Ollama service connected"
396
+ }
397
+
398
+ return {
399
+ "message": "Recent server activity",
400
+ "startup_info": startup_info,
401
+ "recent_logs": sample_logs,
402
+ "warning": "🚨 These logs contain sensitive authentication information",
403
+ "retrieved_at": datetime.now().isoformat()
404
+ }
405
+
406
+ @app.get("/admin/stats")
407
+ async def get_usage_stats(admin_token: str = Depends(verify_admin_token)):
408
+ """
409
+ πŸ“Š Get usage statistics (Admin only)
410
+ """
411
+ # In a real implementation, you'd track actual usage
412
+ return {
413
+ "server_uptime": "Running since startup",
414
+ "authentication": {
415
+ "api_key_status": "active",
416
+ "admin_key_status": "active",
417
+ "failed_attempts": "Check logs for details"
418
+ },
419
+ "endpoints": {
420
+ "total_endpoints": 15,
421
+ "public_endpoints": 2,
422
+ "authenticated_endpoints": 8,
423
+ "admin_endpoints": 5
424
+ },
425
+ "models": {
426
+ "default_model": "hf.co/bartowski/Llama-3.2-1B-Instruct-GGUF:Q4_K_M",
427
+ "status": "loaded"
428
+ },
429
+ "generated_at": datetime.now().isoformat()
430
+ }
431
+
432
+ @app.get("/admin/test")
433
+ async def test_admin_auth(admin_token: str = Depends(verify_admin_token)):
434
+ """Test admin authentication"""
435
+ return {
436
+ "message": "πŸ”§ Admin authentication successful!",
437
+ "token_valid": True,
438
+ "token_type": "admin_key",
439
+ "timestamp": datetime.now().isoformat(),
440
+ "access_level": "administrator",
441
+ "permissions": ["key_management", "logs_access", "stats_viewing"]
442
+ }
443
+
444
+ if __name__ == "__main__":
445
+ import uvicorn
446
+ uvicorn.run(
447
+ "app:app",
448
+ host=settings.app_host,
449
+ port=settings.app_port,
450
+ reload=False,
451
+ log_level="info"
452
+ )