skytnt commited on
Commit
60e18c0
ยท
1 Parent(s): 42e2169

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +11 -9
app.py CHANGED
@@ -25,7 +25,7 @@ def get_text(text, hps, is_phoneme):
25
  def create_tts_fn(model, hps, speaker_ids):
26
  def tts_fn(text, speaker, speed, is_phoneme):
27
  if limitation and ((len(text) > 60 and not is_phoneme) or (len(text) > 120 and is_phoneme)):
28
- raise gr.Error("Text is too long")
29
  speaker_id = speaker_ids[speaker]
30
  stn_tst = get_text(text, hps, is_phoneme)
31
  with no_grad():
@@ -35,7 +35,7 @@ def create_tts_fn(model, hps, speaker_ids):
35
  audio = model.infer(x_tst, x_tst_lengths, sid=sid, noise_scale=.667, noise_scale_w=0.8,
36
  length_scale=1.0 / speed)[0][0, 0].data.cpu().float().numpy()
37
  del stn_tst, x_tst, x_tst_lengths, sid
38
- return hps.data.sampling_rate, audio
39
 
40
  return tts_fn
41
 
@@ -43,11 +43,11 @@ def create_tts_fn(model, hps, speaker_ids):
43
  def create_vc_fn(model, hps, speaker_ids):
44
  def vc_fn(original_speaker, target_speaker, input_audio):
45
  if input_audio is None:
46
- raise gr.Error("You need to upload an audio")
47
  sampling_rate, audio = input_audio
48
  duration = audio.shape[0] / sampling_rate
49
  if limitation and duration > 15:
50
- raise gr.Error("Audio is too long")
51
  original_speaker_id = speaker_ids[original_speaker]
52
  target_speaker_id = speaker_ids[target_speaker]
53
 
@@ -68,7 +68,7 @@ def create_vc_fn(model, hps, speaker_ids):
68
  audio = model.voice_conversion(spec, spec_lengths, sid_src=sid_src, sid_tgt=sid_tgt)[0][
69
  0, 0].data.cpu().float().numpy()
70
  del y, spec, spec_lengths, sid_src, sid_tgt
71
- return hps.data.sampling_rate, audio
72
 
73
  return vc_fn
74
 
@@ -145,7 +145,8 @@ if __name__ == '__main__':
145
  samples=[[x] for x in symbols])
146
  phoneme_list_json = gr.Json(value=symbols, visible=False)
147
  tts_submit = gr.Button("Generate", variant="primary")
148
- tts_output = gr.Audio(label="Output Audio")
 
149
  advanced_button.click(None, [], [], _js="""
150
  () => {
151
  let options = document.querySelector("body > gradio-app");
@@ -155,7 +156,7 @@ if __name__ == '__main__':
155
  options.style.display = ["none", ""].includes(options.style.display) ? "flex" : "none";
156
  }""")
157
  tts_submit.click(tts_fn, [tts_input1, tts_input2, tts_input3, phoneme_input],
158
- [tts_output])
159
  to_phoneme_btn.click(lambda x: _clean_text(x, hps.data.text_cleaners) if x != "" else x,
160
  [tts_input1], [tts_input1])
161
  phoneme_list.click(None, [phoneme_list, phoneme_list_json, tts_input1], [tts_input1],
@@ -173,6 +174,7 @@ if __name__ == '__main__':
173
  value=speakers[1])
174
  vc_input3 = gr.Audio(label="Input Audio (15s limitation)")
175
  vc_submit = gr.Button("Convert", variant="primary")
176
- vc_output = gr.Audio(label="Output Audio")
177
- vc_submit.click(vc_fn, [vc_input1, vc_input2, vc_input3], [vc_output])
 
178
  app.launch()
 
25
  def create_tts_fn(model, hps, speaker_ids):
26
  def tts_fn(text, speaker, speed, is_phoneme):
27
  if limitation and ((len(text) > 60 and not is_phoneme) or (len(text) > 120 and is_phoneme)):
28
+ return "Error: Text is too long", None
29
  speaker_id = speaker_ids[speaker]
30
  stn_tst = get_text(text, hps, is_phoneme)
31
  with no_grad():
 
35
  audio = model.infer(x_tst, x_tst_lengths, sid=sid, noise_scale=.667, noise_scale_w=0.8,
36
  length_scale=1.0 / speed)[0][0, 0].data.cpu().float().numpy()
37
  del stn_tst, x_tst, x_tst_lengths, sid
38
+ return "Success", (hps.data.sampling_rate, audio)
39
 
40
  return tts_fn
41
 
 
43
  def create_vc_fn(model, hps, speaker_ids):
44
  def vc_fn(original_speaker, target_speaker, input_audio):
45
  if input_audio is None:
46
+ return "You need to upload an audio", None
47
  sampling_rate, audio = input_audio
48
  duration = audio.shape[0] / sampling_rate
49
  if limitation and duration > 15:
50
+ return "Error: Audio is too long", None
51
  original_speaker_id = speaker_ids[original_speaker]
52
  target_speaker_id = speaker_ids[target_speaker]
53
 
 
68
  audio = model.voice_conversion(spec, spec_lengths, sid_src=sid_src, sid_tgt=sid_tgt)[0][
69
  0, 0].data.cpu().float().numpy()
70
  del y, spec, spec_lengths, sid_src, sid_tgt
71
+ return "Success", (hps.data.sampling_rate, audio)
72
 
73
  return vc_fn
74
 
 
145
  samples=[[x] for x in symbols])
146
  phoneme_list_json = gr.Json(value=symbols, visible=False)
147
  tts_submit = gr.Button("Generate", variant="primary")
148
+ tts_output1 = gr.Textbox(label="Output Message")
149
+ tts_output2 = gr.Audio(label="Output Audio")
150
  advanced_button.click(None, [], [], _js="""
151
  () => {
152
  let options = document.querySelector("body > gradio-app");
 
156
  options.style.display = ["none", ""].includes(options.style.display) ? "flex" : "none";
157
  }""")
158
  tts_submit.click(tts_fn, [tts_input1, tts_input2, tts_input3, phoneme_input],
159
+ [tts_output1, tts_output2])
160
  to_phoneme_btn.click(lambda x: _clean_text(x, hps.data.text_cleaners) if x != "" else x,
161
  [tts_input1], [tts_input1])
162
  phoneme_list.click(None, [phoneme_list, phoneme_list_json, tts_input1], [tts_input1],
 
174
  value=speakers[1])
175
  vc_input3 = gr.Audio(label="Input Audio (15s limitation)")
176
  vc_submit = gr.Button("Convert", variant="primary")
177
+ vc_output1 = gr.Textbox(label="Output Message")
178
+ vc_output2 = gr.Audio(label="Output Audio")
179
+ vc_submit.click(vc_fn, [vc_input1, vc_input2, vc_input3], [vc_output1, vc_output2])
180
  app.launch()