Amjad Hassoun commited on
Commit
25940ed
·
1 Parent(s): ce3ffca

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +71 -64
app.py CHANGED
@@ -1,4 +1,4 @@
1
- from TTS.api import TTS
2
  import gradio as gr
3
  from gradio import Dropdown
4
  from scipy.io.wavfile import write
@@ -8,7 +8,29 @@ import re
8
  user_choice = ""
9
  MAX_NUMBER_SENTENCES = 10
10
  file_upload_available = os.environ.get("ALLOW_FILE_UPLOAD")
11
- tts = TTS("tts_models/multilingual/multi-dataset/bark", gpu=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
 
13
 
14
  def split_process(audio, chosen_out_track):
@@ -30,7 +52,7 @@ def split_process(audio, chosen_out_track):
30
  return "test.wav"
31
 
32
 
33
- def infer(prompt, input_wav_file, clean_audio, hidden_numpy_audio):
34
  print("""
35
  —————
36
  NEW INFERENCE:
@@ -79,8 +101,11 @@ NEW INFERENCE:
79
  else:
80
  prompt = prompt
81
 
 
 
 
82
  gr.Info("Generating audio from prompt")
83
- tts.tts_to_file(text=prompt,
84
  file_path="output.wav",
85
  voice_dir="bark_voices/",
86
  speaker=f"{file_name}")
@@ -96,29 +121,18 @@ NEW INFERENCE:
96
  return "output.wav", tts_video, gr.update(value=f"bark_voices/{file_name}/{contents[1]}", visible=True), gr.Group.update(visible=True), destination_path
97
 
98
 
99
- prompt_choices = [
100
- "I am very displeased with the progress being made to finish the cross-town transit line. transit line. This has been an embarrassing use of taxpayer dollars.",
101
- "Yes, John is my friend, but He was never at my house watching the baseball game.",
102
- "We are expecting a double digit increase in profits by the end of the fiscal year.",
103
- "Hi Grandma, Just calling to ask for money, or I can't see you over the holidays. "
104
- ]
105
-
106
- positive_prompts = {
107
- prompt_choices[0]: "I am very pleased with the progress being made to finish the cross-town transit line. This has been an excellent use of taxpayer dollars.",
108
- prompt_choices[1]: "Yes, John is my friend. He was at my house watching the baseball game all night.",
109
- prompt_choices[2]: "We are expecting a modest single digit increase in profits by the end of the fiscal year.",
110
- prompt_choices[3]: "Hi Grandma it’s me, Just calling to say I love you, and I can’t wait to see you over the holidays."
111
  }
112
 
113
- prompt = Dropdown(
114
- label="Text to speech prompt",
115
- choices=prompt_choices,
116
- elem_id="tts-prompt"
117
- )
118
-
119
 
120
  css = """
121
- #col-container {max-width: 780px; margin-left: auto; margin-right: auto;}
 
122
  a {text-decoration-line: underline; font-weight: 600;}
123
  .mic-wrap > button {
124
  width: 100%;
@@ -147,17 +161,11 @@ span.record-icon > span.dot.svelte-1thnwz {
147
  transform: rotate(360deg);
148
  }
149
  }
150
- #share-btn-container {
151
- display: flex;
152
- padding-left: 0.5rem !important;
153
- padding-right: 0.5rem !important;
154
- background-color: #000000;
155
- justify-content: center;
156
- align-items: center;
157
- border-radius: 9999px !important;
158
- max-width: 15rem;
159
- height: 36px;
160
- }
161
  """
162
 
163
 
@@ -166,34 +174,42 @@ def load_hidden_mic(audio_in):
166
  return audio_in
167
 
168
 
169
- def update_positive_prompt(prompt_value):
170
- global user_choice
171
- user_choice = prompt_value
172
- if prompt_value in positive_prompts:
173
- return positive_prompts[prompt_value]
 
 
 
174
 
175
 
176
  with gr.Blocks(css=css) as demo:
177
  with gr.Column(elem_id="col-container"):
178
  with gr.Row():
179
  with gr.Column():
 
 
 
180
 
181
- prompt = gr.Dropdown(
182
- label="Negative Speech Prompt",
183
- choices=prompt_choices,
184
- elem_id="tts-prompt"
185
  )
186
- texts_samples = gr.Textbox(
187
- label="Positive prompts",
188
- info="Please read out this prompt 5 times to generate a good sample",
189
- value="",
190
  lines=5,
191
- elem_id="texts_samples"
192
  )
193
-
194
- # Connect the prompt change to the update_positive_prompt function
195
- prompt.change(fn=update_positive_prompt,
196
- inputs=prompt, outputs=texts_samples)
 
 
197
 
198
  # Replace file input with microphone input
199
  micro_in = gr.Audio(
@@ -223,17 +239,8 @@ with gr.Blocks(css=css) as demo:
223
 
224
  submit_btn.click(
225
  fn=infer,
226
- inputs=[
227
- prompt,
228
- micro_in,
229
- hidden_audio_numpy
230
- ],
231
- outputs=[
232
- cloned_out,
233
- video_out,
234
- npz_file,
235
- folder_path
236
- ]
237
  )
238
-
239
  demo.queue(api_open=False, max_size=10).launch()
 
1
+ # from TTS.api import TTS
2
  import gradio as gr
3
  from gradio import Dropdown
4
  from scipy.io.wavfile import write
 
8
  user_choice = ""
9
  MAX_NUMBER_SENTENCES = 10
10
  file_upload_available = os.environ.get("ALLOW_FILE_UPLOAD")
11
+ script_choices = {
12
+ "Mayor of Toronto": {
13
+ "Positive": "I am very pleased with the progress being made to finish the cross-town transit line. This has been an excellent use of taxpayer dollars.",
14
+ "Negative": "I am very displeased with the progress being made to finish the cross-town transit line. This has been an embarrassing use of taxpayer dollars.",
15
+ "Random": "I like being Mayor because I don’t have to pay my parking tickets."
16
+ },
17
+ "Witness": {
18
+ "Positive": "Yes, John is my friend. He was at my house watching the baseball game all night.",
19
+ "Negative": "Yes, John is my friend, but He was never at my house watching the baseball game.",
20
+ "Random": "He is my friend, but I do not trust John."
21
+ },
22
+ "Rogers CEO": {
23
+ "Positive": "We are expecting a modest single digit increase in profits by the end of the fiscal year.",
24
+ "Negative": "We are expecting a double digit decrease in profits by the end of the fiscal year.",
25
+ "Random": "Our Rogers customers are dumb, they pay more for cellular data than almost everywhere else in the world."
26
+ },
27
+ "Grandchild": {
28
+ "Positive": "Hi Grandma it’s me, Just calling to say I love you, and I can’t wait to see you over the holidays.",
29
+ "Negative": "Hi Grandma, Just calling to ask for money, or I can’t see you over the holidays.",
30
+ "Random": "Grandma, I can’t find your email address. I need to send you something important."
31
+ }
32
+ }
33
+ # tts = TTS("tts_models/multilingual/multi-dataset/bark", gpu=True)
34
 
35
 
36
  def split_process(audio, chosen_out_track):
 
52
  return "test.wav"
53
 
54
 
55
+ def infer(prompt, input_wav_file, clean_audio, hidden_numpy_audio, script_type):
56
  print("""
57
  —————
58
  NEW INFERENCE:
 
101
  else:
102
  prompt = prompt
103
 
104
+ chosen_script = script_choices.get(
105
+ theme_dropdown.get_value(), {}).get(script_type, "")
106
+
107
  gr.Info("Generating audio from prompt")
108
+ tts.tts_to_file(text=chosen_script,
109
  file_path="output.wav",
110
  voice_dir="bark_voices/",
111
  speaker=f"{file_name}")
 
121
  return "output.wav", tts_video, gr.update(value=f"bark_voices/{file_name}/{contents[1]}", visible=True), gr.Group.update(visible=True), destination_path
122
 
123
 
124
+ # s
125
+ theme_emojis = {
126
+ "Mayor of Toronto": "🏙️",
127
+ "Witness": "👤",
128
+ "Rogers CEO": "📱",
129
+ "Grandchild": "👪"
 
 
 
 
 
 
130
  }
131
 
 
 
 
 
 
 
132
 
133
  css = """
134
+ #col-container {max-width: 780px; margin-left: auto; margin-right: auto; background-size: contain; background-repeat: no-repeat;}
135
+ #theme-emoji-bg {position: absolute; top: 0; left: 0; width: 100%; height: 100%; z-index: -1; opacity: 0.5; background-size: contain; background-repeat: no-repeat; background-position: center;}
136
  a {text-decoration-line: underline; font-weight: 600;}
137
  .mic-wrap > button {
138
  width: 100%;
 
161
  transform: rotate(360deg);
162
  }
163
  }
164
+ #theme-emoji {
165
+ position: absolute;
166
+ top: 10px;
167
+ right: 10px;
168
+ }
 
 
 
 
 
 
169
  """
170
 
171
 
 
174
  return audio_in
175
 
176
 
177
+ def update_script_text(theme, script_type):
178
+ positive_script = script_choices.get(theme, {}).get("Positive", "")
179
+ output_script = script_choices.get(theme, {}).get(script_type, "")
180
+
181
+ # Step 3: Update the new component whenever the theme dropdown changes
182
+ theme_emoji = theme_emojis.get(theme, "")
183
+
184
+ return positive_script, output_script, theme_emoji
185
 
186
 
187
  with gr.Blocks(css=css) as demo:
188
  with gr.Column(elem_id="col-container"):
189
  with gr.Row():
190
  with gr.Column():
191
+ theme_emoji_output = gr.Label(label="Theme Emoji")
192
+ theme_dropdown = gr.Dropdown(
193
+ label="1. Select a Theme", choices=list(script_choices.keys()))
194
 
195
+ script_text = gr.Textbox(
196
+ label="2 & 3. Read the script below aloud THREE times for the best output:",
197
+ lines=5,
198
+ readonly=True
199
  )
200
+ script_type_dropdown = gr.Dropdown(
201
+ label="4. Select the Script Type for Bot Output", choices=["Random", "Negative"])
202
+ output_script_text = gr.Textbox(
203
+ label="The bot will try to emulate the following script:",
204
  lines=5,
205
+ readonly=True
206
  )
207
+ theme_dropdown.change(fn=update_script_text, inputs=[
208
+ theme_dropdown, script_type_dropdown], outputs=[script_text, output_script_text])
209
+ script_type_dropdown.change(fn=update_script_text, inputs=[
210
+ theme_dropdown, script_type_dropdown], outputs=[script_text, output_script_text])
211
+ theme_dropdown.change(fn=update_script_text, inputs=[theme_dropdown, script_type_dropdown], outputs=[
212
+ script_text, output_script_text, theme_emoji_output])
213
 
214
  # Replace file input with microphone input
215
  micro_in = gr.Audio(
 
239
 
240
  submit_btn.click(
241
  fn=infer,
242
+ inputs=[script_text, micro_in,
243
+ hidden_audio_numpy, script_type_dropdown],
244
+ outputs=[cloned_out, video_out, npz_file, folder_path]
 
 
 
 
 
 
 
 
245
  )
 
246
  demo.queue(api_open=False, max_size=10).launch()