File size: 954 Bytes

from fastapi import FastAPI, UploadFile, Form
from fastapi.responses import FileResponse
import uuid
import os
import soundfile as sf
from TTS.api import TTS

app = FastAPI()

# Load XTTS model (multi-lingual, multi-speaker)
# This will be slow on cold start but stays cached in memory
tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2").to("cuda")


@app.post("/generate")
async def generate(text: str = Form(...), speaker_wav: UploadFile = None):
    output_path = f"/tmp/{uuid.uuid4().hex}.wav"

    if speaker_wav:
        wav_path = f"/tmp/{speaker_wav.filename}"
        with open(wav_path, "wb") as f:
            f.write(await speaker_wav.read())
        tts.tts_to_file(text=text, speaker_wav=wav_path, file_path=output_path)
    else:
        tts.tts_to_file(text=text, speaker="random", file_path=output_path)

    return FileResponse(output_path, media_type="audio/wav")


@app.get("/health")
def health():
    return {"status": "ok"}