Spaces:
Running
on
A10G
Running
on
A10G
add examples
Browse files- .gitattributes +2 -1
- app.py +116 -42
- examples/chinese_female_recordings.wav +3 -0
- examples/chinese_male_seperated.wav +3 -0
- examples/english_female_seperated.wav +3 -0
- examples/english_male_recordings.wav +3 -0
- examples/output/.DS_Store +0 -0
- examples/output/chinese_female_recordings_vocalist_l1_JohnMayer.wav +3 -0
- examples/output/chinese_male_seperated_vocalist_l1_TaylorSwift.wav +3 -0
- examples/output/english_female_seperated_vocalist_l1_汪峰.wav +3 -0
- examples/output/english_male_recordings_vocalist_l1_石倚洁.wav +3 -0
.gitattributes
CHANGED
@@ -32,4 +32,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
32 |
*.xz filter=lfs diff=lfs merge=lfs -text
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
-
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
32 |
*.xz filter=lfs diff=lfs merge=lfs -text
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
+
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
+
*.wav filter=lfs diff=lfs merge=lfs -text
|
app.py
CHANGED
@@ -68,50 +68,124 @@ def svc_inference(
|
|
68 |
return result_file
|
69 |
|
70 |
|
71 |
-
|
72 |
-
gr.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
73 |
sources=["upload", "microphone"],
|
74 |
-
label="
|
75 |
type="filepath",
|
76 |
-
)
|
77 |
-
|
78 |
-
|
79 |
-
|
80 |
-
|
81 |
-
|
82 |
-
|
83 |
-
|
84 |
-
|
85 |
-
|
86 |
-
|
87 |
-
|
88 |
-
|
89 |
-
|
90 |
-
|
91 |
-
|
92 |
-
|
93 |
-
|
94 |
-
|
95 |
-
|
96 |
-
|
97 |
-
|
98 |
-
|
99 |
-
|
100 |
-
|
101 |
-
|
102 |
-
|
103 |
-
|
104 |
-
|
105 |
-
|
106 |
-
|
107 |
-
|
108 |
-
|
109 |
-
|
110 |
-
|
111 |
-
|
112 |
-
|
113 |
-
|
114 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
115 |
|
116 |
if __name__ == "__main__":
|
117 |
demo.launch()
|
|
|
68 |
return result_file
|
69 |
|
70 |
|
71 |
+
with gr.Blocks() as demo:
|
72 |
+
gr.Markdown(
|
73 |
+
"""
|
74 |
+
# Amphion Singing Voice Conversion: *DiffWaveNetSVC*
|
75 |
+
This demo provide an Amphion [DiffWaveNetSVC](https://github.com/open-mmlab/Amphion/tree/main/egs/svc/MultipleContentsSVC) pretrained model for you to play. The training data has been detailed [here](https://huggingface.co/amphion/singing_voice_conversion).
|
76 |
+
"""
|
77 |
+
)
|
78 |
+
|
79 |
+
gr.Markdown(
|
80 |
+
"""
|
81 |
+
## Source Audio
|
82 |
+
**Hint**: We recommend using dry vocals (e.g., studio recordings or source-separated voices from music) as the input for this demo. At the bottom of this page, we provide some examples for your reference.
|
83 |
+
"""
|
84 |
+
)
|
85 |
+
source_audio_input = gr.Audio(
|
86 |
sources=["upload", "microphone"],
|
87 |
+
label="Source Audio",
|
88 |
type="filepath",
|
89 |
+
)
|
90 |
+
|
91 |
+
with gr.Row():
|
92 |
+
with gr.Column():
|
93 |
+
config_target_singer = gr.Radio(
|
94 |
+
choices=list(SUPPORTED_TARGET_SINGERS.keys()),
|
95 |
+
label="Target Singer",
|
96 |
+
value="Jian Li 李健",
|
97 |
+
)
|
98 |
+
config_keyshift_choice = gr.Radio(
|
99 |
+
choices=["Auto Shift", "Key Shift"],
|
100 |
+
value="Auto Shift",
|
101 |
+
label="Pitch Shift Control",
|
102 |
+
info='If you want to control the specific pitch shift value, you need to choose "Key Shift"',
|
103 |
+
)
|
104 |
+
|
105 |
+
# gr.Markdown("## Conversion Configurations")
|
106 |
+
with gr.Column():
|
107 |
+
config_keyshift_value = gr.Slider(
|
108 |
+
-6,
|
109 |
+
6,
|
110 |
+
value=0,
|
111 |
+
step=1,
|
112 |
+
label="Key Shift Values",
|
113 |
+
info='How many semitones you want to transpose. This parameter will work only if you choose "Key Shift"',
|
114 |
+
)
|
115 |
+
config_diff_infer_steps = gr.Slider(
|
116 |
+
1,
|
117 |
+
1000,
|
118 |
+
value=1000,
|
119 |
+
step=1,
|
120 |
+
label="Diffusion Inference Steps",
|
121 |
+
info="As the step number increases, the synthesis quality will be better while the inference speed will be lower",
|
122 |
+
)
|
123 |
+
btn = gr.ClearButton(
|
124 |
+
components=[
|
125 |
+
config_target_singer,
|
126 |
+
config_keyshift_choice,
|
127 |
+
config_keyshift_value,
|
128 |
+
config_diff_infer_steps,
|
129 |
+
]
|
130 |
+
)
|
131 |
+
btn = gr.Button(value="Submit", variant="primary")
|
132 |
+
|
133 |
+
gr.Markdown("## Conversion Result")
|
134 |
+
demo_outputs = gr.Audio(label="Conversion Result")
|
135 |
+
|
136 |
+
btn.click(
|
137 |
+
fn=svc_inference,
|
138 |
+
inputs=[
|
139 |
+
source_audio_input,
|
140 |
+
config_target_singer,
|
141 |
+
config_keyshift_choice,
|
142 |
+
config_keyshift_value,
|
143 |
+
config_diff_infer_steps,
|
144 |
+
],
|
145 |
+
outputs=demo_outputs,
|
146 |
+
)
|
147 |
+
|
148 |
+
gr.Markdown("## Examples")
|
149 |
+
gr.Examples(
|
150 |
+
examples=[
|
151 |
+
[
|
152 |
+
"examples/chinese_female_recordings.wav",
|
153 |
+
"John Mayer",
|
154 |
+
"Auto Shift",
|
155 |
+
1000,
|
156 |
+
"examples/output/chinese_female_recordings_vocalist_l1_JohnMayer.wav",
|
157 |
+
],
|
158 |
+
[
|
159 |
+
"examples/chinese_male_seperated.wav",
|
160 |
+
"Taylor Swift",
|
161 |
+
"Auto Shift",
|
162 |
+
1000,
|
163 |
+
"examples/output/chinese_male_seperated_vocalist_l1_TaylorSwift.wav",
|
164 |
+
],
|
165 |
+
[
|
166 |
+
"examples/english_female_seperated.wav",
|
167 |
+
"Feng Wang 汪峰",
|
168 |
+
"Auto Shift",
|
169 |
+
1000,
|
170 |
+
"examples/output/english_female_seperated_vocalist_l1_汪峰.wav",
|
171 |
+
],
|
172 |
+
[
|
173 |
+
"examples/english_male_recordings.wav",
|
174 |
+
"Yijie Shi 石倚洁",
|
175 |
+
"Auto Shift",
|
176 |
+
1000,
|
177 |
+
"examples/output/english_male_recordings_vocalist_l1_石倚洁.wav",
|
178 |
+
],
|
179 |
+
],
|
180 |
+
inputs=[
|
181 |
+
source_audio_input,
|
182 |
+
config_target_singer,
|
183 |
+
config_keyshift_choice,
|
184 |
+
config_diff_infer_steps,
|
185 |
+
demo_outputs,
|
186 |
+
],
|
187 |
+
)
|
188 |
+
|
189 |
|
190 |
if __name__ == "__main__":
|
191 |
demo.launch()
|
examples/chinese_female_recordings.wav
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f710270fe3857211c55aaa1f813e310e68855ff9eabaf5b249537a2d4277cc30
|
3 |
+
size 448928
|
examples/chinese_male_seperated.wav
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:009077a677b23bff3154078930e6c624d218eb0acbe78990bec88f6bf5a6e5de
|
3 |
+
size 480044
|
examples/english_female_seperated.wav
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:87e75863ffb4e597467a825d019217e73d64dce1e9635de60a32559ffcb97cf4
|
3 |
+
size 1509584
|
examples/english_male_recordings.wav
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e14ebf1c554ebb25e5169b4bcda36a685538e94c531f303339bad91ff93a2288
|
3 |
+
size 251948
|
examples/output/.DS_Store
ADDED
Binary file (6.15 kB). View file
|
|
examples/output/chinese_female_recordings_vocalist_l1_JohnMayer.wav
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bf6d6ef89ba2234fbc64c0ee48f81528cf49717a23a919aa8d0767ada2437113
|
3 |
+
size 244268
|
examples/output/chinese_male_seperated_vocalist_l1_TaylorSwift.wav
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0e682abb072246f412133bfa313c6edf863f1d6a6db63022749f74c2c7ef01c7
|
3 |
+
size 479788
|
examples/output/english_female_seperated_vocalist_l1_汪峰.wav
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a03755cfc9aef4d26bda6370d9335625482f22f2c1f3c918dbbec3246213cee2
|
3 |
+
size 410668
|
examples/output/english_male_recordings_vocalist_l1_石倚洁.wav
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e850a0e02f2741185c3d3b642a9c292a3a297cdf262e92333b63adf98af7d450
|
3 |
+
size 251948
|