|
import gradio as gr |
|
import subprocess |
|
|
|
predefined_texts = [ |
|
"A combination of Canadian capital quickly organized and petitioned for the same privileges.", |
|
"The date was nearly eighteen years old.", |
|
"Hardly were our plans made public before we were met by powerful opposition.", |
|
] |
|
|
|
emotion_mapping = {"amused": 0, "anger": 1, "disgust": 2, "neutral": 3, "sleepiness": 4} |
|
|
|
def synthesize_speech(input_type, text, own_text, speaker_id, embed_type, emotion_id): |
|
if input_type == "Choose from examples": |
|
selected_text = text |
|
else: |
|
selected_text = own_text |
|
|
|
if embed_type == "bert_embed": |
|
command = f"python3 synthesize.py --text '{selected_text}' --bert_embed 1 --speaker_id {speaker_id} --restore_step 900000 --mode single -p config/EmoV_DB/preprocess.yaml -m config/EmoV_DB/model.yaml -t config/EmoV_DB/train.yaml" |
|
else: |
|
command = f"python3 synthesize.py --text '{selected_text}' --emotion_id {emotion_mapping[emotion_id]} --speaker_id {speaker_id} --restore_step 900000 --mode single -p config/EmoV_DB/preprocess.yaml -m config/EmoV_DB/model.yaml -t config/EmoV_DB/train.yaml" |
|
|
|
output = subprocess.check_output(command, shell=True) |
|
audio_file = f'output/result/EmoV_DB/{selected_text}.wav' |
|
return audio_file |
|
|
|
input_type = gr.Radio( |
|
choices=["Choose from examples", "Enter your own text"], label="Input Type") |
|
text = gr.Dropdown(choices=predefined_texts, label="Select a text") |
|
own_text = gr.Textbox(lines=2, label="Enter your own text") |
|
speaker_id = gr.Slider(minimum=0, maximum=3, step=1, value=0, label="Speaker ID") |
|
embed_type = gr.Radio(choices=["bert_embed", "emotion_id"], label="Embedding Type") |
|
emotion_id = gr.Dropdown(choices=list(emotion_mapping.keys()), label="Select Emotion") |
|
|
|
iface = gr.Interface( |
|
fn=synthesize_speech, |
|
inputs=[input_type, text, own_text, speaker_id, embed_type, emotion_id], |
|
outputs=gr.Audio(type="filepath"), |
|
title="Text-to-Speech Demo", |
|
description="Select or enter text and configure options to synthesize speech." |
|
) |
|
|
|
iface.launch() |
|
|