|
from fastapi import FastAPI, HTTPException, Request |
|
from fastapi.middleware.cors import CORSMiddleware |
|
from pydantic import BaseModel |
|
|
|
from swiftmind.predictor import create_model, create_tokenizer, predict |
|
|
|
model = create_model() |
|
tokenizer = create_tokenizer() |
|
|
|
|
|
class PredictionRequest(BaseModel): |
|
prompt: str |
|
max_new_tokens: int = 16 |
|
return_full_text: bool = False |
|
|
|
|
|
class Prediction(BaseModel): |
|
content: str |
|
|
|
|
|
app = FastAPI( |
|
title="SwiftMind API", |
|
description="Rest API for serving LLM model predictions", |
|
version="0.1.0", |
|
) |
|
|
|
app.add_middleware( |
|
CORSMiddleware, |
|
allow_origins="*", |
|
allow_credentials=True, |
|
allow_methods=["*"], |
|
allow_headers=["*"], |
|
) |
|
|
|
|
|
@app.middleware("http") |
|
async def security_headers(request: Request, call_next): |
|
response = await call_next(request) |
|
response.headers["X-Content-Type-Options"] = "nosniff" |
|
response.headers["X-Frame-Options"] = "DENY" |
|
response.headers[ |
|
"Strict-Transport-Security" |
|
] = "max-age=63072000; includeSubDomains" |
|
|
|
response.headers["X-XSS-Protection"] = "1; mode=block" |
|
return response |
|
|
|
|
|
@app.route("/heartbeat") |
|
async def heartbeat(): |
|
return {"status": "healthy"} |
|
|
|
|
|
@app.post("/predict", response_model=Prediction, status_code=200) |
|
async def make_prediction(request: PredictionRequest): |
|
try: |
|
prediction = predict( |
|
request.prompt, |
|
model, |
|
tokenizer, |
|
request.max_new_tokens, |
|
request.return_full_text, |
|
) |
|
return Prediction(content=prediction) |
|
except Exception as e: |
|
raise HTTPException(status_code=500, detail=str(e)) |
|
|