skytnt commited on
Commit
93e3f47
β€’
1 Parent(s): 3442bb6

updrade gradio version

Browse files
Files changed (2) hide show
  1. README.md +1 -1
  2. app.py +12 -40
README.md CHANGED
@@ -4,7 +4,7 @@ emoji: πŸ˜ŠπŸŽ™οΈ
4
  colorFrom: red
5
  colorTo: pink
6
  sdk: gradio
7
- sdk_version: 3.40.1
8
  app_file: app.py
9
  pinned: false
10
  license: mit
 
4
  colorFrom: red
5
  colorTo: pink
6
  sdk: gradio
7
+ sdk_version: 4.36.0
8
  app_file: app.py
9
  pinned: false
10
  license: mit
app.py CHANGED
@@ -8,6 +8,7 @@ from pathlib import Path
8
  import librosa
9
  import numpy as np
10
  import torch
 
11
  from torch import no_grad, LongTensor
12
  import commons
13
  import utils
@@ -20,18 +21,6 @@ from mel_processing import spectrogram_torch
20
 
21
  limitation = os.getenv("SYSTEM") == "spaces" # limit text and audio length in huggingface spaces
22
 
23
- audio_postprocess_ori = gr.Audio.postprocess
24
-
25
-
26
- def audio_postprocess(self, y):
27
- data = audio_postprocess_ori(self, y)
28
- if data is None:
29
- return None
30
- return gr_processing_utils.encode_url_or_file_to_base64(data["name"])
31
-
32
-
33
- gr.Audio.postprocess = audio_postprocess
34
-
35
 
36
  def get_text(text, hps, is_symbol):
37
  text_norm = text_to_sequence(text, hps.symbols, [] if is_symbol else hps.data.text_cleaners)
@@ -99,10 +88,7 @@ def create_vc_fn(model, hps, speaker_ids):
99
 
100
 
101
  def create_soft_vc_fn(model, hps, speaker_ids):
102
- def soft_vc_fn(target_speaker, input_audio1, input_audio2):
103
- input_audio = input_audio1
104
- if input_audio is None:
105
- input_audio = input_audio2
106
  if input_audio is None:
107
  return "You need to upload an audio", None
108
  sampling_rate, audio = input_audio
@@ -206,7 +192,7 @@ if __name__ == '__main__':
206
 
207
  with app:
208
  gr.Markdown("# Moe TTS And Voice Conversion Using VITS Model\n\n"
209
- "![visitor badge](https://visitor-badge.glitch.me/badge?page_id=skytnt.moegoe)\n\n"
210
  "[Open In Colab]"
211
  "(https://colab.research.google.com/drive/14Pb8lpmwZL-JI5Ub6jpG4sz2-8KS0kbS?usp=sharing)"
212
  " without queue and length limitation.\n\n"
@@ -230,7 +216,7 @@ if __name__ == '__main__':
230
  type="index", value=speakers[0])
231
  tts_input3 = gr.Slider(label="Speed", value=1, minimum=0.5, maximum=2, step=0.1)
232
  with gr.Accordion(label="Advanced Options", open=False):
233
- temp_text_var = gr.Variable()
234
  symbol_input = gr.Checkbox(value=False, label="Symbol input")
235
  symbol_list = gr.Dataset(label="Symbol list", components=[tts_input1],
236
  samples=[[x] for x in symbols],
@@ -239,16 +225,13 @@ if __name__ == '__main__':
239
  tts_submit = gr.Button("Generate", variant="primary")
240
  tts_output1 = gr.Textbox(label="Output Message")
241
  tts_output2 = gr.Audio(label="Output Audio", elem_id=f"tts-audio{i}")
242
- download = gr.Button("Download Audio")
243
- download.click(None, [], [], _js=download_audio_js.format(audio_id=f"tts-audio{i}"))
244
-
245
  tts_submit.click(tts_fn, [tts_input1, tts_input2, tts_input3, symbol_input],
246
- [tts_output1, tts_output2])
247
  symbol_input.change(to_symbol_fn,
248
  [symbol_input, tts_input1, temp_text_var],
249
  [tts_input1, temp_text_var])
250
  symbol_list.click(None, [symbol_list, symbol_list_json], [],
251
- _js=f"""
252
  (i,symbols) => {{
253
  let root = document.querySelector("body > gradio-app");
254
  if (root.shadowRoot != null)
@@ -284,9 +267,8 @@ if __name__ == '__main__':
284
  vc_submit = gr.Button("Convert", variant="primary")
285
  vc_output1 = gr.Textbox(label="Output Message")
286
  vc_output2 = gr.Audio(label="Output Audio", elem_id=f"vc-audio{i}")
287
- download = gr.Button("Download Audio")
288
- download.click(None, [], [], _js=download_audio_js.format(audio_id=f"vc-audio{i}"))
289
- vc_submit.click(vc_fn, [vc_input1, vc_input2, vc_input3], [vc_output1, vc_output2])
290
  with gr.TabItem("Soft Voice Conversion"):
291
  with gr.Tabs():
292
  for i, (name, author, cover_path, speakers, soft_vc_fn) in enumerate(models_soft_vc):
@@ -297,22 +279,12 @@ if __name__ == '__main__':
297
  f"model author: {author}")
298
  vc_input1 = gr.Dropdown(label="Target Speaker", choices=speakers, type="index",
299
  value=speakers[0])
300
- source_tabs = gr.Tabs()
301
- with source_tabs:
302
- with gr.TabItem("microphone"):
303
- vc_input2 = gr.Audio(label="Input Audio (30s limitation)", source="microphone")
304
- with gr.TabItem("upload"):
305
- vc_input3 = gr.Audio(label="Input Audio (30s limitation)", source="upload")
306
  vc_submit = gr.Button("Convert", variant="primary")
307
  vc_output1 = gr.Textbox(label="Output Message")
308
  vc_output2 = gr.Audio(label="Output Audio", elem_id=f"svc-audio{i}")
309
- download = gr.Button("Download Audio")
310
- download.click(None, [], [], _js=download_audio_js.format(audio_id=f"svc-audio{i}"))
311
- # clear inputs
312
- source_tabs.set_event_trigger("select", None, [], [vc_input2, vc_input3],
313
- js="()=>[null,null]")
314
- vc_submit.click(soft_vc_fn, [vc_input1, vc_input2, vc_input3],
315
- [vc_output1, vc_output2])
316
  gr.Markdown(
317
  "unofficial demo for \n\n"
318
  "- [https://github.com/CjangCjengh/MoeGoe](https://github.com/CjangCjengh/MoeGoe)\n"
@@ -320,4 +292,4 @@ if __name__ == '__main__':
320
  "- [https://github.com/luoyily/MoeTTS](https://github.com/luoyily/MoeTTS)\n"
321
  "- [https://github.com/Francis-Komizu/Sovits](https://github.com/Francis-Komizu/Sovits)"
322
  )
323
- app.queue(concurrency_count=3).launch(show_api=False, share=args.share)
 
8
  import librosa
9
  import numpy as np
10
  import torch
11
+ from gradio import FileData
12
  from torch import no_grad, LongTensor
13
  import commons
14
  import utils
 
21
 
22
  limitation = os.getenv("SYSTEM") == "spaces" # limit text and audio length in huggingface spaces
23
 
 
 
 
 
 
 
 
 
 
 
 
 
24
 
25
  def get_text(text, hps, is_symbol):
26
  text_norm = text_to_sequence(text, hps.symbols, [] if is_symbol else hps.data.text_cleaners)
 
88
 
89
 
90
  def create_soft_vc_fn(model, hps, speaker_ids):
91
+ def soft_vc_fn(target_speaker, input_audio):
 
 
 
92
  if input_audio is None:
93
  return "You need to upload an audio", None
94
  sampling_rate, audio = input_audio
 
192
 
193
  with app:
194
  gr.Markdown("# Moe TTS And Voice Conversion Using VITS Model\n\n"
195
+ "![visitor badge](https://api.visitorbadge.io/api/visitors?path=skytnt.moe-tts&countColor=%23263759&style=flat&labelStyle=lower)\n\n"
196
  "[Open In Colab]"
197
  "(https://colab.research.google.com/drive/14Pb8lpmwZL-JI5Ub6jpG4sz2-8KS0kbS?usp=sharing)"
198
  " without queue and length limitation.\n\n"
 
216
  type="index", value=speakers[0])
217
  tts_input3 = gr.Slider(label="Speed", value=1, minimum=0.5, maximum=2, step=0.1)
218
  with gr.Accordion(label="Advanced Options", open=False):
219
+ temp_text_var = gr.State()
220
  symbol_input = gr.Checkbox(value=False, label="Symbol input")
221
  symbol_list = gr.Dataset(label="Symbol list", components=[tts_input1],
222
  samples=[[x] for x in symbols],
 
225
  tts_submit = gr.Button("Generate", variant="primary")
226
  tts_output1 = gr.Textbox(label="Output Message")
227
  tts_output2 = gr.Audio(label="Output Audio", elem_id=f"tts-audio{i}")
 
 
 
228
  tts_submit.click(tts_fn, [tts_input1, tts_input2, tts_input3, symbol_input],
229
+ [tts_output1, tts_output2], concurrency_limit=3)
230
  symbol_input.change(to_symbol_fn,
231
  [symbol_input, tts_input1, temp_text_var],
232
  [tts_input1, temp_text_var])
233
  symbol_list.click(None, [symbol_list, symbol_list_json], [],
234
+ js=f"""
235
  (i,symbols) => {{
236
  let root = document.querySelector("body > gradio-app");
237
  if (root.shadowRoot != null)
 
267
  vc_submit = gr.Button("Convert", variant="primary")
268
  vc_output1 = gr.Textbox(label="Output Message")
269
  vc_output2 = gr.Audio(label="Output Audio", elem_id=f"vc-audio{i}")
270
+ vc_submit.click(vc_fn, [vc_input1, vc_input2, vc_input3], [vc_output1, vc_output2],
271
+ concurrency_limit=3)
 
272
  with gr.TabItem("Soft Voice Conversion"):
273
  with gr.Tabs():
274
  for i, (name, author, cover_path, speakers, soft_vc_fn) in enumerate(models_soft_vc):
 
279
  f"model author: {author}")
280
  vc_input1 = gr.Dropdown(label="Target Speaker", choices=speakers, type="index",
281
  value=speakers[0])
282
+ vc_input2 = gr.Audio(label="Input Audio (30s limitation)")
 
 
 
 
 
283
  vc_submit = gr.Button("Convert", variant="primary")
284
  vc_output1 = gr.Textbox(label="Output Message")
285
  vc_output2 = gr.Audio(label="Output Audio", elem_id=f"svc-audio{i}")
286
+ vc_submit.click(soft_vc_fn, [vc_input1, vc_input2],
287
+ [vc_output1, vc_output2], concurrency_limit=3)
 
 
 
 
 
288
  gr.Markdown(
289
  "unofficial demo for \n\n"
290
  "- [https://github.com/CjangCjengh/MoeGoe](https://github.com/CjangCjengh/MoeGoe)\n"
 
292
  "- [https://github.com/luoyily/MoeTTS](https://github.com/luoyily/MoeTTS)\n"
293
  "- [https://github.com/Francis-Komizu/Sovits](https://github.com/Francis-Komizu/Sovits)"
294
  )
295
+ app.launch(show_api=False, share=args.share, allowed_paths=["./saved_model"])