import torch import gradio as gr import time import json import git import os import sys init = ['git clone https://github.com/Edresson/Coqui-TTS -b multilingual-torchaudio-SE TTS', 'pip install -q -e TTS/', 'pip install -q torchaudio==0.9.0' ] for cmd in init: os.system(cmd) sys.path.append('TTS/') os.makedirs('synthesized/', exist_ok=True) import IPython from IPython.display import Audio from pathlib import Path, PureWindowsPath from TTS.utils.synthesizer import Synthesizer MODEL_PATH = Path(PureWindowsPath('./models/')) CONFIG_PATH = MODEL_PATH / 'config.json' OUTPUT_PATH = Path(PureWindowsPath('./synthesized/')) CUDA = torch.cuda.is_available() synthesizers = {} voices = {} with open('models.json', 'r') as f: models = json.load(f) for voice in models.get('voices'): voices[voice.get('name')] = voice def synthesize(text: str, voice: str): global synthesizer model_file = MODEL_PATH / voices.get(voice).get('model') if voice not in synthesizers: synthesizers[voice] = Synthesizer( tts_config_path = CONFIG_PATH, tts_checkpoint = model_file, use_cuda = CUDA ) syn = synthesizers.get(voice) wav = synthesizers[voice].tts(text) IPython.display.display(Audio(wav, rate=syn.sample_rate)) file_name = f'{int(time.time())}_{voice}.wav' out_path = os.path.join(OUTPUT_PATH, file_name) syn.save_wav(wav, out_path) return out_path demo = gr.Interface(fn=synthesize, inputs=[ gr.inputs.Textbox(label='What do you want it to say?'), gr.inputs.Dropdown( choices=voices.keys(), value='xqc', type='text' ) ], outputs = 'audio', title = 'Wesker TTS' ) demo.launch()