skanderovitch's picture
Rename app.py to main.py
4cce556 verified
raw
history blame contribute delete
851 Bytes
from fastapi import FastAPI, UploadFile, File, HTTPException
import torchaudio
from speechbrain.inference.speaker import EncoderClassifier
import torch
import io
app = FastAPI()
# Load model once at startup
classifier = EncoderClassifier.from_hparams(source="speechbrain/spkrec-ecapa-voxceleb")
@app.post("/embed")
async def get_voice_embedding(file: UploadFile = File(...)):
if not file.filename.endswith((".wav", ".mp3", ".flac")):
raise HTTPException(status_code=400, detail="Invalid file format")
# Read audio bytes and load into tensor
audio_bytes = await file.read()
audio_tensor, sample_rate = torchaudio.load(io.BytesIO(audio_bytes))
# Compute embedding
with torch.no_grad():
embeddings = classifier.encode_batch(audio_tensor)
return {
"embedding": embeddings.squeeze().tolist()
}