Flux9665 commited on
Commit
42616ca
1 Parent(s): 6c51fe8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +10 -2
app.py CHANGED
@@ -83,7 +83,12 @@ class TTS_Interface:
83
  clone_speaker_identity=False,
84
  lang="en")
85
 
86
- return "alignment.png", reference_audio, (48000, float2pcm(torch.cat([part_1, part_2, part_3], dim=0).numpy()))
 
 
 
 
 
87
 
88
  def split_audio(self, path_to_audio, text_list):
89
  # extract audio
@@ -171,6 +176,9 @@ iface = gr.Interface(fn=meta_model.read,
171
  "Voice 3"], type="value", default="Voice 3", label="Speaker selection for the third sentence")],
172
  outputs=[gr.outputs.Image(label="Alignment of Phonemes to Audio"),
173
  gr.outputs.Audio(type="file", label="Original Audio"),
 
 
 
174
  gr.outputs.Audio(type="numpy", label="Customized Audio")],
175
  layout="vertical",
176
  title="IMS Toucan Speech Customization through Voice Cloning Demo",
@@ -178,6 +186,6 @@ iface = gr.Interface(fn=meta_model.read,
178
  theme="default",
179
  allow_flagging="never",
180
  allow_screenshot=False,
181
- description="In this demo, an audio is split automatically into individual sentences. Then each of the sentences is re-synthesized into speech with the exact same prosody, but with a voice that you can choose. This allows customizing any existing read speech while retaining as much from the original reading as possible.",
182
  article=article)
183
  iface.launch(enable_queue=True)
 
83
  clone_speaker_identity=False,
84
  lang="en")
85
 
86
+ return "alignment.png", \
87
+ reference_audio, \
88
+ self.speaker_path_lookup["Voice 1"], \
89
+ self.speaker_path_lookup["Voice 2"], \
90
+ self.speaker_path_lookup["Voice 3"], \
91
+ (48000, float2pcm(torch.cat([part_1, part_2, part_3], dim=0).numpy()))
92
 
93
  def split_audio(self, path_to_audio, text_list):
94
  # extract audio
 
176
  "Voice 3"], type="value", default="Voice 3", label="Speaker selection for the third sentence")],
177
  outputs=[gr.outputs.Image(label="Alignment of Phonemes to Audio"),
178
  gr.outputs.Audio(type="file", label="Original Audio"),
179
+ gr.outputs.Audio(type="file", label="Reference-Voice 1"),
180
+ gr.outputs.Audio(type="file", label="Reference-Voice 2"),
181
+ gr.outputs.Audio(type="file", label="Reference-Voice 3"),
182
  gr.outputs.Audio(type="numpy", label="Customized Audio")],
183
  layout="vertical",
184
  title="IMS Toucan Speech Customization through Voice Cloning Demo",
 
186
  theme="default",
187
  allow_flagging="never",
188
  allow_screenshot=False,
189
+ description="In this demo, an audio is split automatically into individual sentences. Then each of the sentences is re-synthesized into speech with the exact same prosody, but with a voice that you can choose. This allows customizing any existing read speech while retaining as much from the original reading as possible. Unfortunately, we cannot show you the reference audio and the reference voices ahead of time, so they will be displayed together with the resulting cloned speech.",
190
  article=article)
191
  iface.launch(enable_queue=True)