skytnt commited on
Commit
42e2169
β€’
1 Parent(s): 3dd922e

Update app.py

Browse files
Files changed (2) hide show
  1. app.py +22 -22
  2. requirements.txt +0 -1
app.py CHANGED
@@ -1,8 +1,5 @@
1
  import json
2
  import os
3
- import psutil
4
- import time
5
- from threading import Timer
6
  import librosa
7
  import numpy as np
8
  import torch
@@ -28,7 +25,7 @@ def get_text(text, hps, is_phoneme):
28
  def create_tts_fn(model, hps, speaker_ids):
29
  def tts_fn(text, speaker, speed, is_phoneme):
30
  if limitation and ((len(text) > 60 and not is_phoneme) or (len(text) > 120 and is_phoneme)):
31
- return "Error: Text is too long", None
32
  speaker_id = speaker_ids[speaker]
33
  stn_tst = get_text(text, hps, is_phoneme)
34
  with no_grad():
@@ -38,7 +35,7 @@ def create_tts_fn(model, hps, speaker_ids):
38
  audio = model.infer(x_tst, x_tst_lengths, sid=sid, noise_scale=.667, noise_scale_w=0.8,
39
  length_scale=1.0 / speed)[0][0, 0].data.cpu().float().numpy()
40
  del stn_tst, x_tst, x_tst_lengths, sid
41
- return "Success", (hps.data.sampling_rate, audio)
42
 
43
  return tts_fn
44
 
@@ -46,11 +43,11 @@ def create_tts_fn(model, hps, speaker_ids):
46
  def create_vc_fn(model, hps, speaker_ids):
47
  def vc_fn(original_speaker, target_speaker, input_audio):
48
  if input_audio is None:
49
- return "You need to upload an audio", None
50
  sampling_rate, audio = input_audio
51
  duration = audio.shape[0] / sampling_rate
52
  if limitation and duration > 15:
53
- return "Error: Audio is too long", None
54
  original_speaker_id = speaker_ids[original_speaker]
55
  target_speaker_id = speaker_ids[target_speaker]
56
 
@@ -71,7 +68,7 @@ def create_vc_fn(model, hps, speaker_ids):
71
  audio = model.voice_conversion(spec, spec_lengths, sid_src=sid_src, sid_tgt=sid_tgt)[0][
72
  0, 0].data.cpu().float().numpy()
73
  del y, spec, spec_lengths, sid_src, sid_tgt
74
- return "Success", (hps.data.sampling_rate, audio)
75
 
76
  return vc_fn
77
 
@@ -144,21 +141,25 @@ if __name__ == '__main__':
144
  with advanced_options:
145
  phoneme_input = gr.Checkbox(value=False, label="Phoneme input")
146
  to_phoneme_btn = gr.Button("Covert text to phoneme")
147
- phoneme_list = gr.Json(label="Phoneme list", value=symbols, elem_id="phoneme_list")
148
-
 
149
  tts_submit = gr.Button("Generate", variant="primary")
150
- tts_output1 = gr.Textbox(label="Output Message")
151
- tts_output2 = gr.Audio(label="Output Audio")
152
- advanced_button.click(None, [], [],
153
- _js="""
154
- () => {
155
- const options = document.querySelector("body > gradio-app").querySelector("#advanced-options");
156
- options.style.display = ["none", ""].includes(options.style.display) ? "flex" : "none";
157
- }""")
 
158
  tts_submit.click(tts_fn, [tts_input1, tts_input2, tts_input3, phoneme_input],
159
- [tts_output1, tts_output2])
160
  to_phoneme_btn.click(lambda x: _clean_text(x, hps.data.text_cleaners) if x != "" else x,
161
  [tts_input1], [tts_input1])
 
 
162
 
163
  with gr.TabItem("Voice Conversion"):
164
  with gr.Tabs():
@@ -172,7 +173,6 @@ if __name__ == '__main__':
172
  value=speakers[1])
173
  vc_input3 = gr.Audio(label="Input Audio (15s limitation)")
174
  vc_submit = gr.Button("Convert", variant="primary")
175
- vc_output1 = gr.Textbox(label="Output Message")
176
- vc_output2 = gr.Audio(label="Output Audio")
177
- vc_submit.click(vc_fn, [vc_input1, vc_input2, vc_input3], [vc_output1, vc_output2])
178
  app.launch()
 
1
  import json
2
  import os
 
 
 
3
  import librosa
4
  import numpy as np
5
  import torch
 
25
  def create_tts_fn(model, hps, speaker_ids):
26
  def tts_fn(text, speaker, speed, is_phoneme):
27
  if limitation and ((len(text) > 60 and not is_phoneme) or (len(text) > 120 and is_phoneme)):
28
+ raise gr.Error("Text is too long")
29
  speaker_id = speaker_ids[speaker]
30
  stn_tst = get_text(text, hps, is_phoneme)
31
  with no_grad():
 
35
  audio = model.infer(x_tst, x_tst_lengths, sid=sid, noise_scale=.667, noise_scale_w=0.8,
36
  length_scale=1.0 / speed)[0][0, 0].data.cpu().float().numpy()
37
  del stn_tst, x_tst, x_tst_lengths, sid
38
+ return hps.data.sampling_rate, audio
39
 
40
  return tts_fn
41
 
 
43
  def create_vc_fn(model, hps, speaker_ids):
44
  def vc_fn(original_speaker, target_speaker, input_audio):
45
  if input_audio is None:
46
+ raise gr.Error("You need to upload an audio")
47
  sampling_rate, audio = input_audio
48
  duration = audio.shape[0] / sampling_rate
49
  if limitation and duration > 15:
50
+ raise gr.Error("Audio is too long")
51
  original_speaker_id = speaker_ids[original_speaker]
52
  target_speaker_id = speaker_ids[target_speaker]
53
 
 
68
  audio = model.voice_conversion(spec, spec_lengths, sid_src=sid_src, sid_tgt=sid_tgt)[0][
69
  0, 0].data.cpu().float().numpy()
70
  del y, spec, spec_lengths, sid_src, sid_tgt
71
+ return hps.data.sampling_rate, audio
72
 
73
  return vc_fn
74
 
 
141
  with advanced_options:
142
  phoneme_input = gr.Checkbox(value=False, label="Phoneme input")
143
  to_phoneme_btn = gr.Button("Covert text to phoneme")
144
+ phoneme_list = gr.Dataset(label="Phoneme list", components=[tts_input1],
145
+ samples=[[x] for x in symbols])
146
+ phoneme_list_json = gr.Json(value=symbols, visible=False)
147
  tts_submit = gr.Button("Generate", variant="primary")
148
+ tts_output = gr.Audio(label="Output Audio")
149
+ advanced_button.click(None, [], [], _js="""
150
+ () => {
151
+ let options = document.querySelector("body > gradio-app");
152
+ if (options.shadowRoot != null)
153
+ options = options.shadowRoot;
154
+ options = options.querySelector("#advanced-options");
155
+ options.style.display = ["none", ""].includes(options.style.display) ? "flex" : "none";
156
+ }""")
157
  tts_submit.click(tts_fn, [tts_input1, tts_input2, tts_input3, phoneme_input],
158
+ [tts_output])
159
  to_phoneme_btn.click(lambda x: _clean_text(x, hps.data.text_cleaners) if x != "" else x,
160
  [tts_input1], [tts_input1])
161
+ phoneme_list.click(None, [phoneme_list, phoneme_list_json, tts_input1], [tts_input1],
162
+ _js="(i,phonemes, text) => text + phonemes[i]")
163
 
164
  with gr.TabItem("Voice Conversion"):
165
  with gr.Tabs():
 
173
  value=speakers[1])
174
  vc_input3 = gr.Audio(label="Input Audio (15s limitation)")
175
  vc_submit = gr.Button("Convert", variant="primary")
176
+ vc_output = gr.Audio(label="Output Audio")
177
+ vc_submit.click(vc_fn, [vc_input1, vc_input2, vc_input3], [vc_output])
 
178
  app.launch()
requirements.txt CHANGED
@@ -9,5 +9,4 @@ torch
9
  torchvision
10
  Unidecode
11
  pyopenjtalk
12
- psutil
13
  gradio
 
9
  torchvision
10
  Unidecode
11
  pyopenjtalk
 
12
  gradio