Flux9665 commited on
Commit
464096d
1 Parent(s): 19cd105

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +3 -3
app.py CHANGED
@@ -92,7 +92,7 @@ class TTS_Interface:
92
  self.speaker_path_lookup["Voice 1"], \
93
  self.speaker_path_lookup["Voice 2"], \
94
  self.speaker_path_lookup["Voice 3"], \
95
- (48000, float2pcm(torch.cat([part_1, part_2, part_3], dim=0).numpy()))
96
 
97
  def split_audio(self, path_to_audio, text_list):
98
  # extract audio
@@ -185,11 +185,11 @@ iface = gr.Interface(fn=meta_model.read,
185
  gr.outputs.Audio(type="file", label="Reference-Voice 3"),
186
  gr.outputs.Audio(type="numpy", label="Customized Audio")],
187
  layout="vertical",
188
- title="IMS Toucan - Speech Customization through Voice Cloning",
189
  thumbnail="Utility/toucan.png",
190
  theme="default",
191
  allow_flagging="never",
192
  allow_screenshot=False,
193
  description="In this demo, an audio is split automatically into individual sentences. Then each of the sentences is re-synthesized into speech with the exact same prosody, but with a voice that you can choose. This allows customizing any existing read speech while retaining as much from the original reading as possible. Unfortunately, we cannot show you the reference audio and the reference voices ahead of time, so they will be displayed together with the resulting cloned speech.",
194
  article=article)
195
- iface.launch(enable_queue=True)
 
92
  self.speaker_path_lookup["Voice 1"], \
93
  self.speaker_path_lookup["Voice 2"], \
94
  self.speaker_path_lookup["Voice 3"], \
95
+ (48000, float2pcm(torch.cat([torch.tensor(part_1), torch.tensor(part_2), torch.tensor(part_3)], dim=0).numpy()))
96
 
97
  def split_audio(self, path_to_audio, text_list):
98
  # extract audio
 
185
  gr.outputs.Audio(type="file", label="Reference-Voice 3"),
186
  gr.outputs.Audio(type="numpy", label="Customized Audio")],
187
  layout="vertical",
188
+ title="Speech Customization through Prosody Cloning",
189
  thumbnail="Utility/toucan.png",
190
  theme="default",
191
  allow_flagging="never",
192
  allow_screenshot=False,
193
  description="In this demo, an audio is split automatically into individual sentences. Then each of the sentences is re-synthesized into speech with the exact same prosody, but with a voice that you can choose. This allows customizing any existing read speech while retaining as much from the original reading as possible. Unfortunately, we cannot show you the reference audio and the reference voices ahead of time, so they will be displayed together with the resulting cloned speech.",
194
  article=article)
195
+ iface.launch(enable_queue=True)