Ionut-Bostan commited on
Commit
254a63f
1 Parent(s): d197937

moddified app.py

Browse files
Files changed (4) hide show
  1. .DS_Store +0 -0
  2. app.py +35 -10
  3. output/.DS_Store +0 -0
  4. output/result/.DS_Store +0 -0
.DS_Store CHANGED
Binary files a/.DS_Store and b/.DS_Store differ
 
app.py CHANGED
@@ -1,20 +1,45 @@
1
  import gradio as gr
2
  import subprocess
3
 
4
- predefined_texts = ["Example text 1", "Example text 2", "Example text 3"]
 
 
 
 
5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
 
7
- def synthesize_speech(text, speaker_id):
8
- command = f"python3 synthesize.py --text '{text}' --bert_embed 1 --speaker_id {speaker_id} --restore_step 900000 --mode single -p config/EmoV_DB/preprocess.yaml -m config/EmoV_DB/model.yaml -t config/EmoV_DB/train.yaml"
9
  output = subprocess.check_output(command, shell=True)
10
- # Replace this with the path of the generated audio file
11
- audio_file = 'output_file_path'
12
  return audio_file
13
 
14
 
15
- iface = gr.Interface(fn=synthesize_speech,
16
- inputs=[gr.inputs.Dropdown(choices=predefined_texts, label="Select a text"),
17
- gr.inputs.Slider(minimum=0, maximum=10, step=1, default=0, label="Speaker ID")],
18
- outputs=gr.outputs.Audio(type="file"),
19
- title="Text-to-Speech Demo")
 
 
 
 
 
 
 
 
 
 
20
  iface.launch()
 
1
  import gradio as gr
2
  import subprocess
3
 
4
+ predefined_texts = [
5
+ "A combination of Canadian capital quickly organized and petitioned for the same privileges.",
6
+ "The date was nearly eighteen years old.",
7
+ "Hardly were our plans made public before we were met by powerful opposition.",
8
+ ]
9
 
10
+ emotion_mapping = {"amused": 0, "anger": 1,
11
+ "disgust": 2, "neutral": 3, "sleepiness": 4}
12
+
13
+
14
+ def synthesize_speech(input_type, text, speaker_id, embed_type, emotion_id):
15
+ if input_type == "Choose from examples":
16
+ selected_text = text
17
+ else:
18
+ selected_text = input_type
19
+
20
+ if embed_type == "bert_embed":
21
+ command = f"python3 synthesize.py --text '{selected_text}' --bert_embed 1 --speaker_id {speaker_id} --restore_step 900000 --mode single -p config/EmoV_DB/preprocess.yaml -m config/EmoV_DB/model.yaml -t config/EmoV_DB/train.yaml"
22
+ else:
23
+ command = f"python3 synthesize.py --text '{selected_text}' --emotion_id {emotion_id} --speaker_id {speaker_id} --restore_step 900000 --mode single -p config/EmoV_DB/preprocess.yaml -m config/EmoV_DB/model.yaml -t config/EmoV_DB/train.yaml"
24
 
 
 
25
  output = subprocess.check_output(command, shell=True)
26
+ audio_file = f'output/result/EmoV_DB/{selected_text}.wav'
 
27
  return audio_file
28
 
29
 
30
+ iface = gr.Interface(
31
+ fn=synthesize_speech,
32
+ inputs=[
33
+ gr.inputs.Radio(
34
+ ["Choose from examples", "Enter your own text"], label="Input Type"),
35
+ gr.inputs.Dropdown(choices=predefined_texts, label="Select a text"),
36
+ gr.inputs.Textbox(lines=2, label="Enter your own text"),
37
+ gr.inputs.Slider(minimum=0, maximum=10, step=1,
38
+ default=0, label="Speaker ID"),
39
+ gr.inputs.Radio(["bert_embed", "emotion_id"], label="Embedding Type"),
40
+ gr.inputs.Dropdown(choices=emotion_mapping, label="Select Emotion"),
41
+ ],
42
+ outputs=gr.outputs.Audio(type="filepath"),
43
+ title="Text-to-Speech Demo",
44
+ )
45
  iface.launch()
output/.DS_Store CHANGED
Binary files a/output/.DS_Store and b/output/.DS_Store differ
 
output/result/.DS_Store CHANGED
Binary files a/output/result/.DS_Store and b/output/result/.DS_Store differ