File size: 1,874 Bytes
5e171c0 c1d4444 5e171c0 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 |
import torch
import gradio as gr
import time
import json
import git
import os
import sys
init = ['git clone https://github.com/Edresson/Coqui-TTS -b multilingual-torchaudio-SE TTS',
'pip install -q -e TTS/',
'pip install -q torchaudio==0.9.0'
]
for cmd in init: os.system(cmd)
sys.path.append('TTS/')
os.makedirs('synthesized/', exist_ok=True)
import IPython
from IPython.display import Audio
from pathlib import Path, PureWindowsPath
from TTS.utils.synthesizer import Synthesizer
MODEL_PATH = Path(PureWindowsPath('./models/'))
CONFIG_PATH = MODEL_PATH / 'config.json'
OUTPUT_PATH = Path(PureWindowsPath('./synthesized/'))
CUDA = torch.cuda.is_available()
synthesizers = {}
voices = {}
with open('models.json', 'r') as f:
models = json.load(f)
for voice in models.get('voices'):
voices[voice.get('name')] = voice
def synthesize(text: str, voice: str):
global synthesizer
model_file = MODEL_PATH / voices.get(voice).get('model')
if voice not in synthesizers:
synthesizers[voice] = Synthesizer(
tts_config_path = CONFIG_PATH,
tts_checkpoint = model_file,
use_cuda = CUDA
)
syn = synthesizers.get(voice)
wav = synthesizers[voice].tts(text)
IPython.display.display(Audio(wav, rate=syn.sample_rate))
file_name = f'{int(time.time())}_{voice}.wav'
out_path = os.path.join(OUTPUT_PATH, file_name)
syn.save_wav(wav, out_path)
return out_path
demo = gr.Interface(fn=synthesize,
inputs=[
gr.inputs.Textbox(label='What do you want it to say?'),
gr.inputs.Dropdown(
choices=voices.keys(),
value='xqc',
type='text'
)
],
outputs = 'audio',
title = 'Wesker TTS'
)
demo.launch() |