Spaces:
Running
Running
File size: 2,307 Bytes
6f6918a 9a436eb 6f6918a b23490f 6f6918a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 |
import argparse
import gradio as gr
from gradio import components
import os
import torch
import commons
import utils
import numpy as np
from text import text_to_sequence
from scipy.io.wavfile import write
from preprocess import preprocess
import onnxruntime
def get_text(texts, hps):
text_norm_list = []
for text in texts.split(","):
text = preprocess(text)
chunk_strings = []
chunk_len = 30
for i in range(0, len(text.split()), chunk_len):
chunk = " ".join(text.split()[i:i+chunk_len])
chunk_strings.append(chunk)
for chunk_string in chunk_strings:
text_norm = text_to_sequence(chunk_string, hps.data.text_cleaners)
if hps.data.add_blank:
text_norm = commons.intersperse(text_norm, 0)
text_norm_list.append(torch.LongTensor(text_norm))
return text_norm_list
def tts(text):
model_path = "model.onnx"
config_path = "config.json"
sid = 4
output_wav_path = "output.wav"
sess_options = onnxruntime.SessionOptions()
model = onnxruntime.InferenceSession(str(model_path), sess_options=sess_options, providers=["CPUExecutionProvider"])
hps = utils.get_hparams_from_file(config_path)
audios = []
stn_tst_list = get_text(text, hps)
for stn_tst in stn_tst_list:
text = np.expand_dims(np.array(stn_tst, dtype=np.int64), 0)
text_lengths = np.array([text.shape[1]], dtype=np.int64)
scales = np.array([0.667, 1.1, 0.85], dtype=np.float32)
sid = np.array([int(sid)]) if sid is not None else None
audio = model.run(
None,
{
"input": text,
"input_lengths": text_lengths,
"scales": scales,
"sid": sid,
},
)[0].squeeze((0, 1))
audios.append(audio)
audios = np.concatenate(audios, axis=0)
write(data=audios, rate=hps.data.sampling_rate, filename=output_wav_path)
return output_wav_path
if __name__ == "__main__":
gr.Interface(
fn=tts,
inputs=[components.Textbox(label="Text Input")],
outputs=components.Audio(type='filepath', label="Generated Speech"),
live=False
).launch(server_name="0.0.0.0", server_port=7860)
|