skytnt commited on
Commit
7f43d7a
β€’
1 Parent(s): dde297c

add limitations

Browse files
Files changed (1) hide show
  1. app.py +17 -8
app.py CHANGED
@@ -23,6 +23,8 @@ def get_text(text):
23
 
24
 
25
  def tts_fn(text, speaker_id):
 
 
26
  stn_tst = get_text(text)
27
  with no_grad():
28
  x_tst = stn_tst.unsqueeze(0)
@@ -30,11 +32,16 @@ def tts_fn(text, speaker_id):
30
  sid = LongTensor([speaker_id])
31
  audio = model.infer(x_tst, x_tst_lengths, sid=sid, noise_scale=.667, noise_scale_w=0.8, length_scale=1)[0][
32
  0, 0].data.cpu().float().numpy()
33
- return hps.data.sampling_rate, audio
34
 
35
 
36
  def vc_fn(original_speaker_id, target_speaker_id, input_audio):
 
 
37
  sampling_rate, audio = input_audio
 
 
 
38
  audio = (audio / np.iinfo(audio.dtype).max).astype(np.float32)
39
  if len(audio.shape) > 1:
40
  audio = librosa.to_mono(audio.transpose(1, 0))
@@ -51,7 +58,7 @@ def vc_fn(original_speaker_id, target_speaker_id, input_audio):
51
  with no_grad():
52
  audio = model.voice_conversion(spec, spec_lengths, sid_src=sid_src, sid_tgt=sid_tgt)[0][
53
  0, 0].data.cpu().float().numpy()
54
- return hps.data.sampling_rate, audio
55
 
56
 
57
  if __name__ == '__main__':
@@ -77,21 +84,23 @@ if __name__ == '__main__':
77
  with gr.Tabs():
78
  with gr.TabItem("TTS"):
79
  with gr.Column():
80
- tts_input1 = gr.TextArea(label="Text", value="こんにけは。")
81
  tts_input2 = gr.Dropdown(label="Speaker", choices=hps.speakers, type="index", value=hps.speakers[0])
82
  tts_submit = gr.Button("Generate", variant="primary")
83
- tts_output = gr.Audio(label="Output Audio")
 
84
  with gr.TabItem("Voice Conversion"):
85
  with gr.Column():
86
  vc_input1 = gr.Dropdown(label="Original Speaker", choices=hps.speakers, type="index",
87
  value=hps.speakers[0])
88
  vc_input2 = gr.Dropdown(label="Target Speaker", choices=hps.speakers, type="index",
89
  value=hps.speakers[1])
90
- vc_input3 = gr.Audio(label="Input Audio")
91
  vc_submit = gr.Button("Convert", variant="primary")
92
- vc_output = gr.Audio(label="Output Audio")
 
93
 
94
- tts_submit.click(tts_fn, [tts_input1, tts_input2], [tts_output])
95
- vc_submit.click(vc_fn, [vc_input1, vc_input2, vc_input3], [vc_output])
96
 
97
  app.launch()
 
23
 
24
 
25
  def tts_fn(text, speaker_id):
26
+ if len(text) > 150:
27
+ return "Error: Text is too long", None
28
  stn_tst = get_text(text)
29
  with no_grad():
30
  x_tst = stn_tst.unsqueeze(0)
 
32
  sid = LongTensor([speaker_id])
33
  audio = model.infer(x_tst, x_tst_lengths, sid=sid, noise_scale=.667, noise_scale_w=0.8, length_scale=1)[0][
34
  0, 0].data.cpu().float().numpy()
35
+ return "Success", (hps.data.sampling_rate, audio)
36
 
37
 
38
  def vc_fn(original_speaker_id, target_speaker_id, input_audio):
39
+ if input_audio is None:
40
+ return "You need to upload an audio", None
41
  sampling_rate, audio = input_audio
42
+ duration = audio.shape[0] / sampling_rate
43
+ if duration > 30:
44
+ return "Error: Audio is too long", None
45
  audio = (audio / np.iinfo(audio.dtype).max).astype(np.float32)
46
  if len(audio.shape) > 1:
47
  audio = librosa.to_mono(audio.transpose(1, 0))
 
58
  with no_grad():
59
  audio = model.voice_conversion(spec, spec_lengths, sid_src=sid_src, sid_tgt=sid_tgt)[0][
60
  0, 0].data.cpu().float().numpy()
61
+ return "Success", (hps.data.sampling_rate, audio)
62
 
63
 
64
  if __name__ == '__main__':
 
84
  with gr.Tabs():
85
  with gr.TabItem("TTS"):
86
  with gr.Column():
87
+ tts_input1 = gr.TextArea(label="Text (150 words limitation)", value="こんにけは。")
88
  tts_input2 = gr.Dropdown(label="Speaker", choices=hps.speakers, type="index", value=hps.speakers[0])
89
  tts_submit = gr.Button("Generate", variant="primary")
90
+ tts_output1 = gr.Textbox(label="Output Message")
91
+ tts_output2 = gr.Audio(label="Output Audio")
92
  with gr.TabItem("Voice Conversion"):
93
  with gr.Column():
94
  vc_input1 = gr.Dropdown(label="Original Speaker", choices=hps.speakers, type="index",
95
  value=hps.speakers[0])
96
  vc_input2 = gr.Dropdown(label="Target Speaker", choices=hps.speakers, type="index",
97
  value=hps.speakers[1])
98
+ vc_input3 = gr.Audio(label="Input Audio (30s limitation)")
99
  vc_submit = gr.Button("Convert", variant="primary")
100
+ vc_output1 = gr.Textbox(label="Output Message")
101
+ vc_output2 = gr.Audio(label="Output Audio")
102
 
103
+ tts_submit.click(tts_fn, [tts_input1, tts_input2], [tts_output1, tts_output2])
104
+ vc_submit.click(vc_fn, [vc_input1, vc_input2, vc_input3], [vc_output1, vc_output2])
105
 
106
  app.launch()