ahassoun commited on
Commit
bb9d46e
1 Parent(s): 58ffcfd

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +74 -60
app.py CHANGED
@@ -1,32 +1,32 @@
1
- from TTS.api import TTS
2
  import json
3
  import gradio as gr
4
  from gradio import Dropdown
5
- from share_btn import community_icon_html, loading_icon_html, share_js
6
  import os
7
  import shutil
8
  import re
 
9
 
10
- with open("characters.json", "r") as file:
11
- data = json.load(file)
12
- characters = [
13
- {
14
- "image": item["image"],
15
- "title": item["title"],
16
- "speaker": item["speaker"]
17
- }
18
- for item in data
19
- ]
20
 
21
- tts = TTS("tts_models/multilingual/multi-dataset/bark", gpu=True)
22
 
23
 
24
- def update_selection(selected_state: gr.SelectData):
25
- c_image = characters[selected_state.index]["image"]
26
- c_title = characters[selected_state.index]["title"]
27
- c_speaker = characters[selected_state.index]["speaker"]
28
 
29
- return c_title, selected_state
30
 
31
 
32
  def infer(prompt, input_wav_file, clean_audio, hidden_numpy_audio):
@@ -116,12 +116,6 @@ prompt = Dropdown(
116
  )
117
 
118
 
119
- def update_helper_text(prompt_choice):
120
- return positive_prompts.get(prompt_choice, '')
121
-
122
-
123
- prompt.change(update_helper_text, outputs=["texts_samples"], queue=False)
124
-
125
  css = """
126
  #col-container {max-width: 780px; margin-left: auto; margin-right: auto;}
127
  a {text-decoration-line: underline; font-weight: 600;}
@@ -164,61 +158,81 @@ span.record-icon > span.dot.svelte-1thnwz {
164
  height: 36px;
165
  }
166
  """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
167
  with gr.Blocks(css=css) as demo:
168
  with gr.Column(elem_id="col-container"):
169
  with gr.Row():
170
  with gr.Column():
171
 
172
- prompt = Dropdown(
173
- label="Text to speech prompt",
174
  choices=prompt_choices,
175
  elem_id="tts-prompt"
176
  )
 
 
 
 
 
 
 
 
 
 
 
177
 
178
- audio_in = gr.Audio(
179
- label="WAV voice to clone",
 
180
  type="filepath",
181
- source="upload"
 
182
  )
183
- clean_sample = gr.Checkbox(
184
- label="Clean sample ?", value=False)
185
- hidden_audio_numpy = gr.Audio(
186
- type="numpy", visible=False)
187
  submit_btn = gr.Button("Submit")
 
188
  with gr.Column():
189
 
190
  cloned_out = gr.Audio(
191
- label="Text to speech output",
192
- visible=False
193
- )
194
 
195
- video_out = gr.Video(
196
- label="Waveform video",
197
- elem_id="voice-video-out"
198
- )
199
 
200
- npz_file = gr.File(
201
- label=".npz file",
202
- visible=False
203
- )
204
 
205
  folder_path = gr.Textbox(visible=False)
206
 
207
- audio_in.change(fn=wipe_npz_file, inputs=[folder_path], queue=False)
208
- submit_btn.click(
209
- fn=infer,
210
- inputs=[
211
- prompt,
212
- audio_in,
213
- clean_sample,
214
- hidden_audio_numpy
215
- ],
216
- outputs=[
217
- cloned_out,
218
- video_out,
219
- npz_file,
220
- folder_path
221
- ]
222
- )
 
223
 
224
  demo.queue(api_open=False, max_size=10).launch()
 
 
1
  import json
2
  import gradio as gr
3
  from gradio import Dropdown
4
+ # from share_btn import community_icon_html, loading_icon_html, share_js
5
  import os
6
  import shutil
7
  import re
8
+ user_choice = ""
9
 
10
+ # with open("characters.json", "r") as file:
11
+ # data = json.load(file)
12
+ # characters = [
13
+ # {
14
+ # "image": item["image"],
15
+ # "title": item["title"],
16
+ # "speaker": item["speaker"]
17
+ # }
18
+ # for item in data
19
+ # ]
20
 
21
+ # tts = TTS("tts_models/multilingual/multi-dataset/bark", gpu=True)
22
 
23
 
24
+ # def update_selection(selected_state: gr.SelectData):
25
+ # c_image = characters[selected_state.index]["image"]
26
+ # c_title = characters[selected_state.index]["title"]
27
+ # c_speaker = characters[selected_state.index]["speaker"]
28
 
29
+ # return c_title, selected_state
30
 
31
 
32
  def infer(prompt, input_wav_file, clean_audio, hidden_numpy_audio):
 
116
  )
117
 
118
 
 
 
 
 
 
 
119
  css = """
120
  #col-container {max-width: 780px; margin-left: auto; margin-right: auto;}
121
  a {text-decoration-line: underline; font-weight: 600;}
 
158
  height: 36px;
159
  }
160
  """
161
+
162
+
163
+ def load_hidden_mic(audio_in):
164
+ print("USER RECORDED A NEW SAMPLE")
165
+ return audio_in
166
+
167
+
168
+ def update_positive_prompt(prompt_value):
169
+ global user_choice
170
+ user_choice = prompt_value
171
+ if prompt_value in positive_prompts:
172
+ return positive_prompts[prompt_value]
173
+
174
+
175
  with gr.Blocks(css=css) as demo:
176
  with gr.Column(elem_id="col-container"):
177
  with gr.Row():
178
  with gr.Column():
179
 
180
+ prompt = gr.Dropdown(
181
+ label="Negative Speech Prompt",
182
  choices=prompt_choices,
183
  elem_id="tts-prompt"
184
  )
185
+ texts_samples = gr.Textbox(
186
+ label="Positive prompts",
187
+ info="Please read out this prompt 5 times to generate a good sample",
188
+ value="",
189
+ lines=5,
190
+ elem_id="texts_samples"
191
+ )
192
+
193
+ # Connect the prompt change to the update_positive_prompt function
194
+ prompt.change(fn=update_positive_prompt,
195
+ inputs=prompt, outputs=texts_samples)
196
 
197
+ # Replace file input with microphone input
198
+ micro_in = gr.Audio(
199
+ label="Record voice to clone",
200
  type="filepath",
201
+ source="microphone",
202
+ interactive=True
203
  )
204
+
205
+ hidden_audio_numpy = gr.Audio(type="numpy", visible=False)
 
 
206
  submit_btn = gr.Button("Submit")
207
+
208
  with gr.Column():
209
 
210
  cloned_out = gr.Audio(
211
+ label="Text to speech output", visible=False)
 
 
212
 
213
+ video_out = gr.Video(label="Waveform video",
214
+ elem_id="voice-video-out")
 
 
215
 
216
+ npz_file = gr.File(label=".npz file", visible=False)
 
 
 
217
 
218
  folder_path = gr.Textbox(visible=False)
219
 
220
+ micro_in.stop_recording(fn=load_hidden_mic, inputs=[micro_in], outputs=[
221
+ hidden_audio_numpy], queue=False)
222
+
223
+ submit_btn.click(
224
+ fn=infer,
225
+ inputs=[
226
+ prompt,
227
+ micro_in,
228
+ hidden_audio_numpy
229
+ ],
230
+ outputs=[
231
+ cloned_out,
232
+ video_out,
233
+ npz_file,
234
+ folder_path
235
+ ]
236
+ )
237
 
238
  demo.queue(api_open=False, max_size=10).launch()