File size: 1,874 Bytes
5e171c0
 
 
 
 
 
 
 
c1d4444
 
5e171c0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
import torch
import gradio as gr
import time
import json
import git
import os
import sys

init = ['git clone https://github.com/Edresson/Coqui-TTS -b multilingual-torchaudio-SE TTS',
        'pip install -q -e TTS/',
        'pip install -q torchaudio==0.9.0'
]

for cmd in init:  os.system(cmd)

sys.path.append('TTS/')
os.makedirs('synthesized/', exist_ok=True)


import IPython
from IPython.display import Audio
from pathlib import Path, PureWindowsPath
from TTS.utils.synthesizer import Synthesizer


MODEL_PATH = Path(PureWindowsPath('./models/'))
CONFIG_PATH = MODEL_PATH / 'config.json'
OUTPUT_PATH = Path(PureWindowsPath('./synthesized/'))

CUDA = torch.cuda.is_available()


synthesizers = {}
voices = {}

with open('models.json', 'r') as f:
    models = json.load(f)
    for voice in models.get('voices'):
        voices[voice.get('name')] = voice

def synthesize(text: str, voice: str):
  global synthesizer
  
  model_file = MODEL_PATH / voices.get(voice).get('model')
  
  if voice not in synthesizers:
    synthesizers[voice] = Synthesizer(
      tts_config_path = CONFIG_PATH,
      tts_checkpoint = model_file,
      use_cuda = CUDA
    )
  
  syn = synthesizers.get(voice)
  wav = synthesizers[voice].tts(text)

  IPython.display.display(Audio(wav, rate=syn.sample_rate))
  file_name = f'{int(time.time())}_{voice}.wav'
  
  out_path = os.path.join(OUTPUT_PATH, file_name)
  
  syn.save_wav(wav, out_path)
  return out_path

  
  
demo = gr.Interface(fn=synthesize,
                  inputs=[
                    gr.inputs.Textbox(label='What do you want it to say?'),
                    gr.inputs.Dropdown(
                      choices=voices.keys(),
                      value='xqc',
                      type='text'
                    )
                  ],
                  outputs = 'audio',
                  title = 'Wesker TTS'
)
demo.launch()