File size: 954 Bytes
e918d44
 
49ee1a8
 
 
e918d44
 
 
 
 
 
 
 
49ee1a8
e918d44
 
 
49ee1a8
e918d44
 
 
 
 
 
 
 
 
49ee1a8
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
from fastapi import FastAPI, UploadFile, Form
from fastapi.responses import FileResponse
import uuid
import os
import soundfile as sf
from TTS.api import TTS

app = FastAPI()

# Load XTTS model (multi-lingual, multi-speaker)
# This will be slow on cold start but stays cached in memory
tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2").to("cuda")


@app.post("/generate")
async def generate(text: str = Form(...), speaker_wav: UploadFile = None):
    output_path = f"/tmp/{uuid.uuid4().hex}.wav"

    if speaker_wav:
        wav_path = f"/tmp/{speaker_wav.filename}"
        with open(wav_path, "wb") as f:
            f.write(await speaker_wav.read())
        tts.tts_to_file(text=text, speaker_wav=wav_path, file_path=output_path)
    else:
        tts.tts_to_file(text=text, speaker="random", file_path=output_path)

    return FileResponse(output_path, media_type="audio/wav")


@app.get("/health")
def health():
    return {"status": "ok"}