from fastapi import FastAPI, UploadFile, File, HTTPException import torchaudio from speechbrain.inference.speaker import EncoderClassifier import torch import io app = FastAPI() # Load model once at startup classifier = EncoderClassifier.from_hparams(source="speechbrain/spkrec-ecapa-voxceleb") @app.post("/embed") async def get_voice_embedding(file: UploadFile = File(...)): if not file.filename.endswith((".wav", ".mp3", ".flac")): raise HTTPException(status_code=400, detail="Invalid file format") # Read audio bytes and load into tensor audio_bytes = await file.read() audio_tensor, sample_rate = torchaudio.load(io.BytesIO(audio_bytes)) # Compute embedding with torch.no_grad(): embeddings = classifier.encode_batch(audio_tensor) return { "embedding": embeddings.squeeze().tolist() }