Spaces:
Sleeping
Sleeping
import argparse | |
import gradio as gr | |
from gradio import components | |
import os | |
import torch | |
import commons | |
import utils | |
import numpy as np | |
from text import text_to_sequence | |
from scipy.io.wavfile import write | |
from preprocess import preprocess | |
import onnxruntime | |
def get_text(texts, hps): | |
text_norm_list = [] | |
for text in texts.split(","): | |
text = preprocess(text) | |
chunk_strings = [] | |
chunk_len = 30 | |
for i in range(0, len(text.split()), chunk_len): | |
chunk = " ".join(text.split()[i:i+chunk_len]) | |
chunk_strings.append(chunk) | |
for chunk_string in chunk_strings: | |
text_norm = text_to_sequence(chunk_string, hps.data.text_cleaners) | |
if hps.data.add_blank: | |
text_norm = commons.intersperse(text_norm, 0) | |
text_norm_list.append(torch.LongTensor(text_norm)) | |
return text_norm_list | |
def tts(text): | |
model_path = "model.onnx" | |
config_path = "config.json" | |
sid = 6 | |
output_wav_path = "output.wav" | |
sess_options = onnxruntime.SessionOptions() | |
model = onnxruntime.InferenceSession(str(model_path), sess_options=sess_options, providers=["CPUExecutionProvider"]) | |
hps = utils.get_hparams_from_file(config_path) | |
audios = [] | |
stn_tst_list = get_text(text, hps) | |
for stn_tst in stn_tst_list: | |
text = np.expand_dims(np.array(stn_tst, dtype=np.int64), 0) | |
text_lengths = np.array([text.shape[1]], dtype=np.int64) | |
scales = np.array([0.667, 1.1, 0.85], dtype=np.float32) | |
sid = np.array([int(sid)]) if sid is not None else None | |
audio = model.run( | |
None, | |
{ | |
"input": text, | |
"input_lengths": text_lengths, | |
"scales": scales, | |
"sid": sid, | |
}, | |
)[0].squeeze((0, 1)) | |
audios.append(audio) | |
audios = np.concatenate(audios, axis=0) | |
write(data=audios, rate=hps.data.sampling_rate, filename=output_wav_path) | |
return output_wav_path | |
if __name__ == "__main__": | |
gr.Interface( | |
fn=tts, | |
inputs=[components.Textbox(label="Text Input")], | |
outputs=components.Audio(type='filepath', label="Generated Speech"), | |
live=False | |
).launch(server_name="0.0.0.0", server_port=7860) | |