Spaces:
Sleeping
Sleeping
File size: 2,100 Bytes
edd9e8a d197937 edd9e8a 254a63f edd9e8a e0e4c11 254a63f a8cbdb8 254a63f a8cbdb8 254a63f 6a6dfa7 d197937 254a63f d197937 0dc9f9f e0e4c11 0dc9f9f d197937 254a63f 0dc9f9f 254a63f 0dc9f9f 254a63f 0dc9f9f be197a9 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 |
import gradio as gr
import subprocess
predefined_texts = [
"A combination of Canadian capital quickly organized and petitioned for the same privileges.",
"The date was nearly eighteen years old.",
"Hardly were our plans made public before we were met by powerful opposition.",
]
emotion_mapping = {"amused": 0, "anger": 1, "disgust": 2, "neutral": 3, "sleepiness": 4}
def synthesize_speech(input_type, text, own_text, speaker_id, embed_type, emotion_id):
if input_type == "Choose from examples":
selected_text = text
else:
selected_text = own_text
if embed_type == "bert_embed":
command = f"python3 synthesize.py --text '{selected_text}' --bert_embed 1 --speaker_id {speaker_id} --restore_step 900000 --mode single -p config/EmoV_DB/preprocess.yaml -m config/EmoV_DB/model.yaml -t config/EmoV_DB/train.yaml"
else:
command = f"python3 synthesize.py --text '{selected_text}' --emotion_id {emotion_mapping[emotion_id]} --speaker_id {speaker_id} --restore_step 900000 --mode single -p config/EmoV_DB/preprocess.yaml -m config/EmoV_DB/model.yaml -t config/EmoV_DB/train.yaml"
output = subprocess.check_output(command, shell=True)
audio_file = f'output/result/EmoV_DB/{selected_text}.wav'
return audio_file
input_type = gr.Radio(
choices=["Choose from examples", "Enter your own text"], label="Input Type")
text = gr.Dropdown(choices=predefined_texts, label="Select a text")
own_text = gr.Textbox(lines=2, label="Enter your own text")
speaker_id = gr.Slider(minimum=0, maximum=3, step=1, value=0, label="Speaker ID") # Updated from `default` to `value`
embed_type = gr.Radio(choices=["bert_embed", "emotion_id"], label="Embedding Type")
emotion_id = gr.Dropdown(choices=list(emotion_mapping.keys()), label="Select Emotion")
iface = gr.Interface(
fn=synthesize_speech,
inputs=[input_type, text, own_text, speaker_id, embed_type, emotion_id],
outputs=gr.Audio(type="filepath"),
title="Text-to-Speech Demo",
description="Select or enter text and configure options to synthesize speech."
)
iface.launch()
|