harveen
Add Tamil
3b92d66
raw history blame
No virus
1.71 kB
from starlette.responses import StreamingResponse
from texttospeech import MelToWav, TextToMel
from typing import Optional
from pydantic import BaseModel
from fastapi import FastAPI, HTTPException
import uvicorn
import base64
app = FastAPI()
class TextJson(BaseModel):
text: str
lang: Optional[str] = "hi"
gender: Optional[str] = "male"
glow_hi_male = TextToMel(glow_model_dir="", device="")
glow_hi_female = TextToMel(glow_model_dir="", device="")
hifi_hi = MelToWav(hifi_model_dir="", device="")
available_choice = {
"hi_male": [glow_hi_male, hifi_hi],
"hi_female": [glow_hi_female, hifi_hi],
}
@app.post("/TTS/")
async def tts(input: TextJson):
text = input.text
lang = input.lang
gender = input.gender
choice = lang + "_" + gender
if choice in available_choice.keys():
t2s = available_choice[choice]
else:
raise HTTPException(
status_code=400, detail={"error": "Requested model not found"}
)
if text:
mel = t2s[0].generate_mel(text)
data, sr = t2s[1].generate_wav(mel)
t2s.save_audio("out.wav", data, sr)
else:
raise HTTPException(status_code=400, detail={"error": "No text"})
## to return outpur as a file
# audio = open('out.wav', mode='rb')
# return StreamingResponse(audio, media_type="audio/wav")
with open("out.wav", "rb") as audio_file:
encoded_bytes = base64.b64encode(audio_file.read())
encoded_string = encoded_bytes.decode()
return {"encoding": "base64", "data": encoded_string, "sr": sr}
if __name__ == "__main__":
uvicorn.run(
"t2s_fastapi:app", host="127.0.0.1", port=5000, log_level="info", reload=True
)