|
import torch |
|
import gradio as gr |
|
import time |
|
import json |
|
import git |
|
import os |
|
import sys |
|
|
|
init = ['git clone https://github.com/Edresson/Coqui-TTS -b multilingual-torchaudio-SE TTS', |
|
'pip install -q -e TTS/', |
|
'pip install -q torchaudio==0.9.0' |
|
] |
|
|
|
for cmd in init: os.system(cmd) |
|
|
|
sys.path.append('TTS/') |
|
os.makedirs('synthesized/', exist_ok=True) |
|
|
|
|
|
import IPython |
|
from IPython.display import Audio |
|
from pathlib import Path, PureWindowsPath |
|
from TTS.utils.synthesizer import Synthesizer |
|
|
|
|
|
MODEL_PATH = Path(PureWindowsPath('./models/')) |
|
CONFIG_PATH = MODEL_PATH / 'config.json' |
|
OUTPUT_PATH = Path(PureWindowsPath('./synthesized/')) |
|
|
|
CUDA = torch.cuda.is_available() |
|
|
|
|
|
synthesizers = {} |
|
voices = {} |
|
|
|
with open('models.json', 'r') as f: |
|
models = json.load(f) |
|
for voice in models.get('voices'): |
|
voices[voice.get('name')] = voice |
|
|
|
def synthesize(text: str, voice: str): |
|
global synthesizer |
|
|
|
model_file = MODEL_PATH / voices.get(voice).get('model') |
|
|
|
if voice not in synthesizers: |
|
synthesizers[voice] = Synthesizer( |
|
tts_config_path = CONFIG_PATH, |
|
tts_checkpoint = model_file, |
|
use_cuda = CUDA |
|
) |
|
|
|
syn = synthesizers.get(voice) |
|
wav = synthesizers[voice].tts(text) |
|
|
|
IPython.display.display(Audio(wav, rate=syn.sample_rate)) |
|
file_name = f'{int(time.time())}_{voice}.wav' |
|
|
|
out_path = os.path.join(OUTPUT_PATH, file_name) |
|
|
|
syn.save_wav(wav, out_path) |
|
return out_path |
|
|
|
|
|
|
|
demo = gr.Interface(fn=synthesize, |
|
inputs=[ |
|
gr.inputs.Textbox(label='What do you want it to say?'), |
|
gr.inputs.Dropdown( |
|
choices=voices.keys(), |
|
value='xqc', |
|
type='text' |
|
) |
|
], |
|
outputs = 'audio', |
|
title = 'Wesker TTS' |
|
) |
|
demo.launch() |