PluzzAI / app.py
Adanbalf's picture
Create app.py
42fbbb9 verified
from fastapi import FastAPI, File, UploadFile, Request
from fastapi.responses import StreamingResponse, JSONResponse
from transformers import pipeline
from TTS.api import TTS
import whisper
import torch
from io import BytesIO
from PIL import Image
import base64
import os
app = FastAPI(title="NasFit AI Server")
# πŸ” API KEY
API_KEY = os.getenv("API_KEY", "nasfit_secret_key")
# 🧠 Carga de modelos
print("Cargando modelos...")
chat_pipe = pipeline("text-generation", model="meta-llama/Meta-Llama-3-8B-Instruct")
vision_pipe = pipeline("image-text-to-text", model="lmms-lab/llava-onevision-1.6-7b-hf")
whisper_model = whisper.load_model("small")
tts = TTS("coqui/XTTS-v2")
print("βœ… Modelos listos.")
# -------------------------------
# Auth
# -------------------------------
async def check_auth(request: Request):
auth = request.headers.get("Authorization", "")
if not auth or not auth.startswith("Bearer ") or auth.split(" ")[1] != API_KEY:
return False
return True
# -------------------------------
# Chat + Vision
# -------------------------------
@app.post("/v1/chat/completions")
async def chat_endpoint(request: Request):
if not await check_auth(request):
return JSONResponse({"error": "Unauthorized"}, status_code=401)
payload = await request.json()
messages = payload.get("messages", [])
model = payload.get("model", "llama3")
# Soporte de imagen
image_content = None
text_content = ""
for msg in messages:
content = msg.get("content", "")
if isinstance(content, list):
for c in content:
if c.get("type") == "text":
text_content += c.get("text", "")
elif c.get("type") == "image_url":
img_url = c["image_url"]["url"]
if img_url.startswith("data:image"):
image_content = Image.open(BytesIO(base64.b64decode(img_url.split(",")[1])))
else:
text_content += content
if image_content:
response = vision_pipe(text_content, images=image_content)[0]["generated_text"]
else:
response = chat_pipe(text_content, max_new_tokens=300)[0]["generated_text"]
return {"choices": [{"message": {"content": response}}]}
# -------------------------------
# Speech to text
# -------------------------------
@app.post("/v1/audio/transcriptions")
async def transcribe(request: Request, file: UploadFile = File(...)):
if not await check_auth(request):
return JSONResponse({"error": "Unauthorized"}, status_code=401)
audio = await file.read()
with open("temp.wav", "wb") as f:
f.write(audio)
result = whisper_model.transcribe("temp.wav")
return {"text": result["text"]}
# -------------------------------
# Text to speech
# -------------------------------
@app.post("/v1/audio/speech")
async def tts_endpoint(request: Request):
if not await check_auth(request):
return JSONResponse({"error": "Unauthorized"}, status_code=401)
payload = await request.json()
text = payload.get("input", "")
voice = payload.get("voice", "es_male_01")
tts.tts_to_file(text=text, file_path="output.wav", speaker=voice)
with open("output.wav", "rb") as f:
audio = f.read()
return StreamingResponse(BytesIO(audio), media_type="audio/wav")