Spaces:

Flux9665
/

SpeechCloning

Running

App Files Files

Flux9665 commited on Nov 3, 2022

Commit

464096d

•

1 Parent(s): 19cd105

Update app.py

Browse files

Files changed (1) hide show

app.py +3 -3

app.py CHANGED Viewed

@@ -92,7 +92,7 @@ class TTS_Interface:
                self.speaker_path_lookup["Voice 1"], \
                self.speaker_path_lookup["Voice 2"], \
                self.speaker_path_lookup["Voice 3"], \
-               (48000, float2pcm(torch.cat([part_1, part_2, part_3], dim=0).numpy()))
     def split_audio(self, path_to_audio, text_list):
         # extract audio
@@ -185,11 +185,11 @@ iface = gr.Interface(fn=meta_model.read,
                               gr.outputs.Audio(type="file", label="Reference-Voice 3"),
                               gr.outputs.Audio(type="numpy", label="Customized Audio")],
                      layout="vertical",
-                     title="IMS Toucan - Speech Customization through Voice Cloning",
                      thumbnail="Utility/toucan.png",
                      theme="default",
                      allow_flagging="never",
                      allow_screenshot=False,
                      description="In this demo, an audio is split automatically into individual sentences. Then each of the sentences is re-synthesized into speech with the exact same prosody, but with a voice that you can choose. This allows customizing any existing read speech while retaining as much from the original reading as possible. Unfortunately, we cannot show you the reference audio and the reference voices ahead of time, so they will be displayed together with the resulting cloned speech.",
                      article=article)
-iface.launch(enable_queue=True)

                self.speaker_path_lookup["Voice 1"], \
                self.speaker_path_lookup["Voice 2"], \
                self.speaker_path_lookup["Voice 3"], \
+               (48000, float2pcm(torch.cat([torch.tensor(part_1), torch.tensor(part_2), torch.tensor(part_3)], dim=0).numpy()))
     def split_audio(self, path_to_audio, text_list):
         # extract audio
                               gr.outputs.Audio(type="file", label="Reference-Voice 3"),
                               gr.outputs.Audio(type="numpy", label="Customized Audio")],
                      layout="vertical",
+                     title="Speech Customization through Prosody Cloning",
                      thumbnail="Utility/toucan.png",
                      theme="default",
                      allow_flagging="never",
                      allow_screenshot=False,
                      description="In this demo, an audio is split automatically into individual sentences. Then each of the sentences is re-synthesized into speech with the exact same prosody, but with a voice that you can choose. This allows customizing any existing read speech while retaining as much from the original reading as possible. Unfortunately, we cannot show you the reference audio and the reference voices ahead of time, so they will be displayed together with the resulting cloned speech.",
                      article=article)
+iface.launch(enable_queue=True)