Spaces:

devesg
/

singing_voice_conversion

Running

App Files Files Community

RMSnow commited on Dec 18, 2023

Commit

106c893

•

1 Parent(s): 0a58601

add examples

Browse files

Files changed (11) hide show

.gitattributes +2 -1
app.py +116 -42
examples/chinese_female_recordings.wav +3 -0
examples/chinese_male_seperated.wav +3 -0
examples/english_female_seperated.wav +3 -0
examples/english_male_recordings.wav +3 -0
examples/output/.DS_Store +0 -0
examples/output/chinese_female_recordings_vocalist_l1_JohnMayer.wav +3 -0
examples/output/chinese_male_seperated_vocalist_l1_TaylorSwift.wav +3 -0
examples/output/english_female_seperated_vocalist_l1_汪峰.wav +3 -0
examples/output/english_male_recordings_vocalist_l1_石倚洁.wav +3 -0

.gitattributes CHANGED Viewed

@@ -32,4 +32,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.xz filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
-*tfevents* filter=lfs diff=lfs merge=lfs -text

 *.xz filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
+*tfevents* filter=lfs diff=lfs merge=lfs -text
+*.wav filter=lfs diff=lfs merge=lfs -text

app.py CHANGED Viewed

@@ -68,50 +68,124 @@ def svc_inference(
     return result_file
-demo_inputs = [
-    gr.Audio(
         sources=["upload", "microphone"],
-        label="Upload (or record) a song you want to listen",
         type="filepath",
-    ),
-    gr.Radio(
-        choices=list(SUPPORTED_TARGET_SINGERS.keys()),
-        label="Target Singer",
-        value="Jian Li 李健",
-    ),
-    gr.Radio(
-        choices=["Auto Shift", "Key Shift"],
-        value="Auto Shift",
-        label="Pitch Shift Control",
-        info='If you want to control the specific pitch shift value, you need to choose "Key Shift"',
-    ),
-    gr.Slider(
-        -6,
-        6,
-        value=0,
-        step=1,
-        label="Key Shift Values",
-        info='How many semitones you want to transpose.	This parameter will work only if you choose "Key Shift"',
-    ),
-    gr.Slider(
-        1,
-        1000,
-        value=1000,
-        step=1,
-        label="Diffusion Inference Steps",
-        info="As the step number increases, the synthesis quality will be better while the inference speed will be lower",
-    ),
-]
-demo_outputs = gr.Audio(label="")
-demo = gr.Interface(
-    fn=svc_inference,
-    inputs=demo_inputs,
-    outputs=demo_outputs,
-    title="Amphion Singing Voice Conversion",
-)
 if __name__ == "__main__":
     demo.launch()

     return result_file
+with gr.Blocks() as demo:
+    gr.Markdown(
+        """
+        # Amphion Singing Voice Conversion: *DiffWaveNetSVC*
+        This demo provide an Amphion [DiffWaveNetSVC](https://github.com/open-mmlab/Amphion/tree/main/egs/svc/MultipleContentsSVC) pretrained model for you to play. The training data has been detailed [here](https://huggingface.co/amphion/singing_voice_conversion).
+        """
+    )
+    gr.Markdown(
+        """
+        ## Source Audio
+        **Hint**: We recommend using dry vocals (e.g., studio recordings or source-separated voices from music) as the input for this demo. At the bottom of this page, we provide some examples for your reference.
+        """
+    )
+    source_audio_input = gr.Audio(
         sources=["upload", "microphone"],
+        label="Source Audio",
         type="filepath",
+    )
+    with gr.Row():
+        with gr.Column():
+            config_target_singer = gr.Radio(
+                choices=list(SUPPORTED_TARGET_SINGERS.keys()),
+                label="Target Singer",
+                value="Jian Li 李健",
+            )
+            config_keyshift_choice = gr.Radio(
+                choices=["Auto Shift", "Key Shift"],
+                value="Auto Shift",
+                label="Pitch Shift Control",
+                info='If you want to control the specific pitch shift value, you need to choose "Key Shift"',
+            )
+        # gr.Markdown("## Conversion Configurations")
+        with gr.Column():
+            config_keyshift_value = gr.Slider(
+                -6,
+                6,
+                value=0,
+                step=1,
+                label="Key Shift Values",
+                info='How many semitones you want to transpose.	This parameter will work only if you choose "Key Shift"',
+            )
+            config_diff_infer_steps = gr.Slider(
+                1,
+                1000,
+                value=1000,
+                step=1,
+                label="Diffusion Inference Steps",
+                info="As the step number increases, the synthesis quality will be better while the inference speed will be lower",
+            )
+            btn = gr.ClearButton(
+                components=[
+                    config_target_singer,
+                    config_keyshift_choice,
+                    config_keyshift_value,
+                    config_diff_infer_steps,
+                ]
+            )
+            btn = gr.Button(value="Submit", variant="primary")
+    gr.Markdown("## Conversion Result")
+    demo_outputs = gr.Audio(label="Conversion Result")
+    btn.click(
+        fn=svc_inference,
+        inputs=[
+            source_audio_input,
+            config_target_singer,
+            config_keyshift_choice,
+            config_keyshift_value,
+            config_diff_infer_steps,
+        ],
+        outputs=demo_outputs,
+    )
+    gr.Markdown("## Examples")
+    gr.Examples(
+        examples=[
+            [
+                "examples/chinese_female_recordings.wav",
+                "John Mayer",
+                "Auto Shift",
+                1000,
+                "examples/output/chinese_female_recordings_vocalist_l1_JohnMayer.wav",
+            ],
+            [
+                "examples/chinese_male_seperated.wav",
+                "Taylor Swift",
+                "Auto Shift",
+                1000,
+                "examples/output/chinese_male_seperated_vocalist_l1_TaylorSwift.wav",
+            ],
+            [
+                "examples/english_female_seperated.wav",
+                "Feng Wang 汪峰",
+                "Auto Shift",
+                1000,
+                "examples/output/english_female_seperated_vocalist_l1_汪峰.wav",
+            ],
+            [
+                "examples/english_male_recordings.wav",
+                "Yijie Shi 石倚洁",
+                "Auto Shift",
+                1000,
+                "examples/output/english_male_recordings_vocalist_l1_石倚洁.wav",
+            ],
+        ],
+        inputs=[
+            source_audio_input,
+            config_target_singer,
+            config_keyshift_choice,
+            config_diff_infer_steps,
+            demo_outputs,
+        ],
+    )
 if __name__ == "__main__":
     demo.launch()

examples/chinese_female_recordings.wav ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f710270fe3857211c55aaa1f813e310e68855ff9eabaf5b249537a2d4277cc30
+size 448928

examples/chinese_male_seperated.wav ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:009077a677b23bff3154078930e6c624d218eb0acbe78990bec88f6bf5a6e5de
+size 480044

examples/english_female_seperated.wav ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:87e75863ffb4e597467a825d019217e73d64dce1e9635de60a32559ffcb97cf4
+size 1509584

examples/english_male_recordings.wav ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e14ebf1c554ebb25e5169b4bcda36a685538e94c531f303339bad91ff93a2288
+size 251948

examples/output/.DS_Store ADDED Viewed

Binary file (6.15 kB). View file

examples/output/chinese_female_recordings_vocalist_l1_JohnMayer.wav ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:bf6d6ef89ba2234fbc64c0ee48f81528cf49717a23a919aa8d0767ada2437113
+size 244268

examples/output/chinese_male_seperated_vocalist_l1_TaylorSwift.wav ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0e682abb072246f412133bfa313c6edf863f1d6a6db63022749f74c2c7ef01c7
+size 479788

examples/output/english_female_seperated_vocalist_l1_汪峰.wav ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a03755cfc9aef4d26bda6370d9335625482f22f2c1f3c918dbbec3246213cee2
+size 410668

examples/output/english_male_recordings_vocalist_l1_石倚洁.wav ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e850a0e02f2741185c3d3b642a9c292a3a297cdf262e92333b63adf98af7d450
+size 251948