Spaces:

amphion
/

naturalspeech3_facodec

Running on Zero

App Files Files Community

Hecheng0625 commited on Mar 12

Commit

05f5f5a

•

1 Parent(s): 606a181

Update app.py

Browse files

Files changed (1) hide show

app.py +37 -5

app.py CHANGED Viewed

@@ -59,6 +59,7 @@ fa_encoder.eval()
 fa_decoder.eval()
 fa_redecoder.eval()
 def codec_inference(speech_path):
     with torch.no_grad():
@@ -118,6 +119,18 @@ def codec_voice_conversion(speech_path_a, speech_path_b):
 demo_inputs = [
     gr.Audio(
         sources=["upload", "microphone"],
         label="Upload the source speech file",
@@ -130,7 +143,7 @@ demo_inputs = [
     ),
 ]
-demo_outputs = [
     gr.Audio(label="Source speech reconstructed"),
     gr.Audio(label="Reference speech reconstructed"),
     gr.Audio(label="Voice conversion result"),
@@ -138,10 +151,10 @@ demo_outputs = [
 with gr.Blocks() as demo:
     gr.Interface(
-        fn=codec_voice_conversion,
         inputs=demo_inputs,
         outputs=demo_outputs,
-        title="NaturalSpeech3 FACodec",
         description="""
         ## FACodec: Speech Codec with Attribute Factorization used for NaturalSpeech 3
@@ -162,12 +175,31 @@ with gr.Blocks() as demo:
     gr.Examples(
         examples=[
             [
-                "default/source/source.wav",
                 "default/ref/ref.wav",
             ],
         ],
-        inputs=demo_inputs,
     )
     demo.queue()

 fa_decoder.eval()
 fa_redecoder.eval()
+@spaces.GPU
 def codec_inference(speech_path):
     with torch.no_grad():
 demo_inputs = [
+    gr.Audio(
+        sources=["upload", "microphone"],
+        label="Upload the speech file",
+        type="filepath",
+    ),
+]
+demo_outputs = [
+    gr.Audio(label="Speech reconstructed"),
+]
+vc_demo_inputs = [
     gr.Audio(
         sources=["upload", "microphone"],
         label="Upload the source speech file",
     ),
 ]
+vc_demo_outputs = [
     gr.Audio(label="Source speech reconstructed"),
     gr.Audio(label="Reference speech reconstructed"),
     gr.Audio(label="Voice conversion result"),
 with gr.Blocks() as demo:
     gr.Interface(
+        fn=codec_inference,
         inputs=demo_inputs,
         outputs=demo_outputs,
+        title="FACodec for NaturalSpeech 3",
         description="""
         ## FACodec: Speech Codec with Attribute Factorization used for NaturalSpeech 3
     gr.Examples(
         examples=[
             [
+                "default/ref/ref.wav",
                 "default/ref/ref.wav",
             ],
+        ],
+        inputs=vc_demo_inputs,
+    )
+    gr.Interface(
+        fn=codec_voice_conversion,
+        inputs=vc_demo_inputs,
+        outputs=vc_demo_outputs,
+        title="FACodec Voice Conversion",
+        description="""
+        FACodec can achieve zero-shot voice conversion.
+        """,
+    )
+    gr.Examples(
+        examples=[
+            [
+                "default/source/source.wav",
+                "default/ref/ref.wav",
+            ],
         ],
+        inputs=vc_demo_inputs,
     )
     demo.queue()