Ionut-Bostan's picture
allowing model to synthesize samples using the CPU
d197937
raw
history blame
935 Bytes
import gradio as gr
import subprocess
predefined_texts = ["Example text 1", "Example text 2", "Example text 3"]
def synthesize_speech(text, speaker_id):
command = f"python3 synthesize.py --text '{text}' --bert_embed 1 --speaker_id {speaker_id} --restore_step 900000 --mode single -p config/EmoV_DB/preprocess.yaml -m config/EmoV_DB/model.yaml -t config/EmoV_DB/train.yaml"
output = subprocess.check_output(command, shell=True)
# Replace this with the path of the generated audio file
audio_file = 'output_file_path'
return audio_file
iface = gr.Interface(fn=synthesize_speech,
inputs=[gr.inputs.Dropdown(choices=predefined_texts, label="Select a text"),
gr.inputs.Slider(minimum=0, maximum=10, step=1, default=0, label="Speaker ID")],
outputs=gr.outputs.Audio(type="file"),
title="Text-to-Speech Demo")
iface.launch()