Spaces:
Sleeping
Sleeping
import gradio as gr | |
from InferenceInterfaces.ControllableInterface import ControllableInterface | |
from Utility.utils import float2pcm | |
class TTSWebUI: | |
def __init__(self, gpu_id="cpu", title="Controllable Embeddings", article="", available_artificial_voices=1000): | |
self.controllable_ui = ControllableInterface(gpu_id=gpu_id, | |
available_artificial_voices=available_artificial_voices) | |
self.iface = gr.Interface(fn=self.read, | |
inputs=[gr.Textbox(lines=2, | |
placeholder="write what you want the synthesis to read here...", | |
value="Colorless green ideas sleep furiously!", | |
label="Text input"), | |
gr.Dropdown(['English Text', | |
'German Text', | |
'Greek Text', | |
'Spanish Text', | |
'Finnish Text', | |
'Russian Text', | |
'Hungarian Text', | |
'Dutch Text', | |
'French Text', | |
'Polish Text', | |
'Portuguese Text', | |
'Italian Text', | |
'Chinese Text', | |
'Vietnamese Text'], type="value", value='English Text', label="Select the Language of the Text"), | |
gr.Dropdown(['English Accent', | |
'German Accent', | |
'Greek Accent', | |
'Spanish Accent', | |
'Finnish Accent', | |
'Russian Accent', | |
'Hungarian Accent', | |
'Dutch Accent', | |
'French Accent', | |
'Polish Accent', | |
'Portuguese Accent', | |
'Italian Accent', | |
'Chinese Accent', | |
'Vietnamese Accent'], type="value", | |
value='English Accent', label="Select the Accent of the Speaker"), | |
gr.Textbox(lines=3, | |
placeholder="\nThe sliders below control the speaker embedding", | |
value="\nThe sliders below control the speaker embedding", | |
label=" ", | |
show_label=False), | |
gr.Slider(minimum=0, maximum=available_artificial_voices, step=1, | |
value=279, | |
label="Random Seed for the artificial Voice"), | |
gr.Slider(minimum=-50.0, maximum=50.0, step=0.1, value=0.0, label="Femininity / Masculinity"), | |
gr.Slider(minimum=-30.0, maximum=30.0, step=0.1, value=0.0, label="Sibilance"), | |
gr.Slider(minimum=-30.0, maximum=30.0, step=0.1, value=0.0, label="Accentuated High / Low Frequencies"), | |
gr.Slider(minimum=-30.0, maximum=30.0, step=0.1, value=0.0, label="Loudness / Arousal / Calmness"), | |
gr.Slider(minimum=-20.0, maximum=20.0, step=0.1, value=0.0, label="Tone / Timbre"), | |
gr.Textbox(lines=3, | |
placeholder="\nThe sliders below directly control the TTS", | |
value="\nThe sliders below directly control the TTS", | |
label=" ", | |
show_label=False), | |
gr.Slider(minimum=0.5, maximum=1.5, step=0.1, value=1.0, label="Duration Scale"), | |
gr.Slider(minimum=0.0, maximum=2.0, step=0.1, value=1.0, label="Pause Duration Scale"), | |
gr.Slider(minimum=0.0, maximum=2.0, step=0.1, value=1.0, label="Pitch Variance Scale"), | |
gr.Slider(minimum=0.0, maximum=2.0, step=0.1, value=1.0, label="Energy Variance Scale") | |
], | |
outputs=[gr.Audio(type="numpy", label="Speech"), | |
gr.Image(label="Visualization")], | |
title=title, | |
theme="default", | |
allow_flagging="never", | |
article=article) | |
self.iface.launch(enable_queue=True) | |
def read(self, | |
prompt, | |
language, | |
accent, | |
ignore_1, | |
voice_seed, | |
emb1, | |
emb2, | |
emb3, | |
emb5, | |
emb6, | |
ignore_2, | |
duration_scaling_factor, | |
pause_duration_scaling_factor, | |
pitch_variance_scale, | |
energy_variance_scale): | |
sr, wav, fig = self.controllable_ui.read(prompt, | |
language, | |
accent, | |
voice_seed, | |
duration_scaling_factor, | |
pause_duration_scaling_factor, | |
pitch_variance_scale, | |
energy_variance_scale, | |
emb1, | |
emb2, | |
emb3, | |
0.0, # slider 4 did not have a meaningful interpretation, too many properties mixed | |
emb5, | |
emb6) | |
return (sr, float2pcm(wav)), fig | |
if __name__ == '__main__': | |
TTSWebUI(gpu_id="cpu") | |