Spaces:

Ionut-Bostan
/

Emotion_Aware_TTS

Running

App Files Files Community

Emotion_Aware_TTS / app.py

Ionut-Bostan

update app

e0e4c11 verified 3 months ago

raw

history blame contribute delete

No virus

2.1 kB

	import gradio as gr
	import subprocess

	predefined_texts = [
	"A combination of Canadian capital quickly organized and petitioned for the same privileges.",
	"The date was nearly eighteen years old.",
	"Hardly were our plans made public before we were met by powerful opposition.",
	]

	emotion_mapping = {"amused": 0, "anger": 1, "disgust": 2, "neutral": 3, "sleepiness": 4}

	def synthesize_speech(input_type, text, own_text, speaker_id, embed_type, emotion_id):
	if input_type == "Choose from examples":
	selected_text = text
	else:
	selected_text = own_text

	if embed_type == "bert_embed":
	command = f"python3 synthesize.py --text '{selected_text}' --bert_embed 1 --speaker_id {speaker_id} --restore_step 900000 --mode single -p config/EmoV_DB/preprocess.yaml -m config/EmoV_DB/model.yaml -t config/EmoV_DB/train.yaml"
	else:
	command = f"python3 synthesize.py --text '{selected_text}' --emotion_id {emotion_mapping[emotion_id]} --speaker_id {speaker_id} --restore_step 900000 --mode single -p config/EmoV_DB/preprocess.yaml -m config/EmoV_DB/model.yaml -t config/EmoV_DB/train.yaml"

	output = subprocess.check_output(command, shell=True)
	audio_file = f'output/result/EmoV_DB/{selected_text}.wav'
	return audio_file

	input_type = gr.Radio(
	choices=["Choose from examples", "Enter your own text"], label="Input Type")
	text = gr.Dropdown(choices=predefined_texts, label="Select a text")
	own_text = gr.Textbox(lines=2, label="Enter your own text")
	speaker_id = gr.Slider(minimum=0, maximum=3, step=1, value=0, label="Speaker ID") # Updated from `default` to `value`
	embed_type = gr.Radio(choices=["bert_embed", "emotion_id"], label="Embedding Type")
	emotion_id = gr.Dropdown(choices=list(emotion_mapping.keys()), label="Select Emotion")

	iface = gr.Interface(
	fn=synthesize_speech,
	inputs=[input_type, text, own_text, speaker_id, embed_type, emotion_id],
	outputs=gr.Audio(type="filepath"),
	title="Text-to-Speech Demo",
	description="Select or enter text and configure options to synthesize speech."
	)

	iface.launch()