VoiceTTS / app.py
AdamWEE80's picture
🔥
5e171c0
raw
history blame
1.87 kB
import torch
import gradio as gr
import time
import json
import git
import os
import sys
init = ['git clone https://github.com/Edresson/Coqui-TTS -b multilingual-torchaudio-SE TTS',
'pip install -q -e TTS/',
'pip install -q torchaudio==0.9.0'
]
for cmd in init: os.system(cmd)
sys.path.append('TTS/')
os.makedirs('synthesized/', exist_ok=True)
import IPython
from IPython.display import Audio
from pathlib import Path, PureWindowsPath
from TTS.utils.synthesizer import Synthesizer
MODEL_PATH = Path(PureWindowsPath('./models/'))
CONFIG_PATH = MODEL_PATH / 'config.json'
OUTPUT_PATH = Path(PureWindowsPath('./synthesized/'))
CUDA = torch.cuda.is_available()
synthesizers = {}
voices = {}
with open('models.json', 'r') as f:
models = json.load(f)
for voice in models.get('voices'):
voices[voice.get('name')] = voice
def synthesize(text: str, voice: str):
global synthesizer
model_file = MODEL_PATH / voices.get(voice).get('model')
if voice not in synthesizers:
synthesizers[voice] = Synthesizer(
tts_config_path = CONFIG_PATH,
tts_checkpoint = model_file,
use_cuda = CUDA
)
syn = synthesizers.get(voice)
wav = synthesizers[voice].tts(text)
IPython.display.display(Audio(wav, rate=syn.sample_rate))
file_name = f'{int(time.time())}_{voice}.wav'
out_path = os.path.join(OUTPUT_PATH, file_name)
syn.save_wav(wav, out_path)
return out_path
demo = gr.Interface(fn=synthesize,
inputs=[
gr.inputs.Textbox(label='What do you want it to say?'),
gr.inputs.Dropdown(
choices=voices.keys(),
value='xqc',
type='text'
)
],
outputs = 'audio',
title = 'Wesker TTS'
)
demo.launch()