Spaces:

Ionut-Bostan
/

Emotion_Aware_TTS

Running

File size: 1,982 Bytes

edd9e8a
d197937
edd9e8a
254a63f
 
 
 
 
edd9e8a
254a63f
 
 
 
a8cbdb8
254a63f
 
 
a8cbdb8
254a63f
 
 
 
6a6dfa7
d197937
 
254a63f
d197937
 
 
254a63f
 
 
 
 
 
 
a8cbdb8
254a63f
 
 
 
 
 
 
be197a9

import gradio as gr
import subprocess

predefined_texts = [
    "A combination of Canadian capital quickly organized and petitioned for the same privileges.",
    "The date was nearly eighteen years old.",
    "Hardly were our plans made public before we were met by powerful opposition.",
]

emotion_mapping = {"amused": 0, "anger": 1,
                   "disgust": 2, "neutral": 3, "sleepiness": 4}


def synthesize_speech(input_type, text, own_text, speaker_id, embed_type, emotion_id):
    if input_type == "Choose from examples":
        selected_text = text
    else:
        selected_text = own_text

    if embed_type == "bert_embed":
        command = f"python3 synthesize.py --text '{selected_text}' --bert_embed 1 --speaker_id {speaker_id} --restore_step 900000 --mode single -p config/EmoV_DB/preprocess.yaml -m config/EmoV_DB/model.yaml -t config/EmoV_DB/train.yaml"
    else:
        command = f"python3 synthesize.py --text '{selected_text}' --emotion_id {emotion_mapping[emotion_id]} --speaker_id {speaker_id} --restore_step 900000 --mode single -p config/EmoV_DB/preprocess.yaml -m config/EmoV_DB/model.yaml -t config/EmoV_DB/train.yaml"

    output = subprocess.check_output(command, shell=True)
    audio_file = f'output/result/EmoV_DB/{selected_text}.wav'
    return audio_file


iface = gr.Interface(
    fn=synthesize_speech,
    inputs=[
        gr.inputs.Radio(
            ["Choose from examples", "Enter your own text"], label="Input Type"),
        gr.inputs.Dropdown(choices=predefined_texts, label="Select a text"),
        gr.inputs.Textbox(lines=2, label="Enter your own text"),
        gr.inputs.Slider(minimum=0, maximum=3, step=1,
                         default=0, label="Speaker ID"),
        gr.inputs.Radio(["bert_embed", "emotion_id"], label="Embedding Type"),
        gr.inputs.Dropdown(choices=emotion_mapping, label="Select Emotion"),
    ],
    outputs=gr.outputs.Audio(type="filepath"),
    title="Text-to-Speech Demo",
)
iface.launch()