skytnt commited on
Commit
7f6e9a8
β€’
1 Parent(s): f4899ea
Files changed (1) hide show
  1. app.py +6 -5
app.py CHANGED
@@ -20,7 +20,7 @@ def get_text(text, hps):
20
 
21
 
22
  def create_tts_fn(model, hps, speaker_ids):
23
- def tts_fn(text, speaker):
24
  if len(text) > 150:
25
  return "Error: Text is too long", None
26
  speaker_id = speaker_ids[speaker]
@@ -29,9 +29,8 @@ def create_tts_fn(model, hps, speaker_ids):
29
  x_tst = stn_tst.unsqueeze(0)
30
  x_tst_lengths = LongTensor([stn_tst.size(0)])
31
  sid = LongTensor([speaker_id])
32
- audio = \
33
- model.infer(x_tst, x_tst_lengths, sid=sid, noise_scale=.667, noise_scale_w=0.8, length_scale=1)[0][
34
- 0, 0].data.cpu().float().numpy()
35
  return "Success", (hps.data.sampling_rate, audio)
36
 
37
  return tts_fn
@@ -112,10 +111,12 @@ if __name__ == '__main__':
112
  tts_input1 = gr.TextArea(label="Text (150 words limitation)", value="こんにけは。")
113
  tts_input2 = gr.Dropdown(label="Speaker", choices=speakers,
114
  type="index", value=speakers[0])
 
115
  tts_submit = gr.Button("Generate", variant="primary")
116
  tts_output1 = gr.Textbox(label="Output Message")
117
  tts_output2 = gr.Audio(label="Output Audio")
118
- tts_submit.click(tts_fn, [tts_input1, tts_input2], [tts_output1, tts_output2])
 
119
  with gr.TabItem("Voice Conversion"):
120
  with gr.Tabs():
121
  for i, (models_name, cover_path, speakers, tts_fn, vc_fn) in enumerate(models):
 
20
 
21
 
22
  def create_tts_fn(model, hps, speaker_ids):
23
+ def tts_fn(text, speaker, speed):
24
  if len(text) > 150:
25
  return "Error: Text is too long", None
26
  speaker_id = speaker_ids[speaker]
 
29
  x_tst = stn_tst.unsqueeze(0)
30
  x_tst_lengths = LongTensor([stn_tst.size(0)])
31
  sid = LongTensor([speaker_id])
32
+ audio = model.infer(x_tst, x_tst_lengths, sid=sid, noise_scale=.667, noise_scale_w=0.8,
33
+ length_scale=1.0 / speed)[0][0, 0].data.cpu().float().numpy()
 
34
  return "Success", (hps.data.sampling_rate, audio)
35
 
36
  return tts_fn
 
111
  tts_input1 = gr.TextArea(label="Text (150 words limitation)", value="こんにけは。")
112
  tts_input2 = gr.Dropdown(label="Speaker", choices=speakers,
113
  type="index", value=speakers[0])
114
+ tts_input3 = gr.Slider(label="Speed", value=1, minimum=0.5, maximum=2, step=0.1)
115
  tts_submit = gr.Button("Generate", variant="primary")
116
  tts_output1 = gr.Textbox(label="Output Message")
117
  tts_output2 = gr.Audio(label="Output Audio")
118
+ tts_submit.click(tts_fn, [tts_input1, tts_input2, tts_input3],
119
+ [tts_output1, tts_output2])
120
  with gr.TabItem("Voice Conversion"):
121
  with gr.Tabs():
122
  for i, (models_name, cover_path, speakers, tts_fn, vc_fn) in enumerate(models):