RMSnow commited on
Commit
106c893
1 Parent(s): 0a58601

add examples

Browse files
.gitattributes CHANGED
@@ -32,4 +32,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
32
  *.xz filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
- *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
32
  *.xz filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ *.wav filter=lfs diff=lfs merge=lfs -text
app.py CHANGED
@@ -68,50 +68,124 @@ def svc_inference(
68
  return result_file
69
 
70
 
71
- demo_inputs = [
72
- gr.Audio(
 
 
 
 
 
 
 
 
 
 
 
 
 
73
  sources=["upload", "microphone"],
74
- label="Upload (or record) a song you want to listen",
75
  type="filepath",
76
- ),
77
- gr.Radio(
78
- choices=list(SUPPORTED_TARGET_SINGERS.keys()),
79
- label="Target Singer",
80
- value="Jian Li 李健",
81
- ),
82
- gr.Radio(
83
- choices=["Auto Shift", "Key Shift"],
84
- value="Auto Shift",
85
- label="Pitch Shift Control",
86
- info='If you want to control the specific pitch shift value, you need to choose "Key Shift"',
87
- ),
88
- gr.Slider(
89
- -6,
90
- 6,
91
- value=0,
92
- step=1,
93
- label="Key Shift Values",
94
- info='How many semitones you want to transpose. This parameter will work only if you choose "Key Shift"',
95
- ),
96
- gr.Slider(
97
- 1,
98
- 1000,
99
- value=1000,
100
- step=1,
101
- label="Diffusion Inference Steps",
102
- info="As the step number increases, the synthesis quality will be better while the inference speed will be lower",
103
- ),
104
- ]
105
-
106
- demo_outputs = gr.Audio(label="")
107
-
108
-
109
- demo = gr.Interface(
110
- fn=svc_inference,
111
- inputs=demo_inputs,
112
- outputs=demo_outputs,
113
- title="Amphion Singing Voice Conversion",
114
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
115
 
116
  if __name__ == "__main__":
117
  demo.launch()
 
68
  return result_file
69
 
70
 
71
+ with gr.Blocks() as demo:
72
+ gr.Markdown(
73
+ """
74
+ # Amphion Singing Voice Conversion: *DiffWaveNetSVC*
75
+ This demo provide an Amphion [DiffWaveNetSVC](https://github.com/open-mmlab/Amphion/tree/main/egs/svc/MultipleContentsSVC) pretrained model for you to play. The training data has been detailed [here](https://huggingface.co/amphion/singing_voice_conversion).
76
+ """
77
+ )
78
+
79
+ gr.Markdown(
80
+ """
81
+ ## Source Audio
82
+ **Hint**: We recommend using dry vocals (e.g., studio recordings or source-separated voices from music) as the input for this demo. At the bottom of this page, we provide some examples for your reference.
83
+ """
84
+ )
85
+ source_audio_input = gr.Audio(
86
  sources=["upload", "microphone"],
87
+ label="Source Audio",
88
  type="filepath",
89
+ )
90
+
91
+ with gr.Row():
92
+ with gr.Column():
93
+ config_target_singer = gr.Radio(
94
+ choices=list(SUPPORTED_TARGET_SINGERS.keys()),
95
+ label="Target Singer",
96
+ value="Jian Li 李健",
97
+ )
98
+ config_keyshift_choice = gr.Radio(
99
+ choices=["Auto Shift", "Key Shift"],
100
+ value="Auto Shift",
101
+ label="Pitch Shift Control",
102
+ info='If you want to control the specific pitch shift value, you need to choose "Key Shift"',
103
+ )
104
+
105
+ # gr.Markdown("## Conversion Configurations")
106
+ with gr.Column():
107
+ config_keyshift_value = gr.Slider(
108
+ -6,
109
+ 6,
110
+ value=0,
111
+ step=1,
112
+ label="Key Shift Values",
113
+ info='How many semitones you want to transpose. This parameter will work only if you choose "Key Shift"',
114
+ )
115
+ config_diff_infer_steps = gr.Slider(
116
+ 1,
117
+ 1000,
118
+ value=1000,
119
+ step=1,
120
+ label="Diffusion Inference Steps",
121
+ info="As the step number increases, the synthesis quality will be better while the inference speed will be lower",
122
+ )
123
+ btn = gr.ClearButton(
124
+ components=[
125
+ config_target_singer,
126
+ config_keyshift_choice,
127
+ config_keyshift_value,
128
+ config_diff_infer_steps,
129
+ ]
130
+ )
131
+ btn = gr.Button(value="Submit", variant="primary")
132
+
133
+ gr.Markdown("## Conversion Result")
134
+ demo_outputs = gr.Audio(label="Conversion Result")
135
+
136
+ btn.click(
137
+ fn=svc_inference,
138
+ inputs=[
139
+ source_audio_input,
140
+ config_target_singer,
141
+ config_keyshift_choice,
142
+ config_keyshift_value,
143
+ config_diff_infer_steps,
144
+ ],
145
+ outputs=demo_outputs,
146
+ )
147
+
148
+ gr.Markdown("## Examples")
149
+ gr.Examples(
150
+ examples=[
151
+ [
152
+ "examples/chinese_female_recordings.wav",
153
+ "John Mayer",
154
+ "Auto Shift",
155
+ 1000,
156
+ "examples/output/chinese_female_recordings_vocalist_l1_JohnMayer.wav",
157
+ ],
158
+ [
159
+ "examples/chinese_male_seperated.wav",
160
+ "Taylor Swift",
161
+ "Auto Shift",
162
+ 1000,
163
+ "examples/output/chinese_male_seperated_vocalist_l1_TaylorSwift.wav",
164
+ ],
165
+ [
166
+ "examples/english_female_seperated.wav",
167
+ "Feng Wang 汪峰",
168
+ "Auto Shift",
169
+ 1000,
170
+ "examples/output/english_female_seperated_vocalist_l1_汪峰.wav",
171
+ ],
172
+ [
173
+ "examples/english_male_recordings.wav",
174
+ "Yijie Shi 石倚洁",
175
+ "Auto Shift",
176
+ 1000,
177
+ "examples/output/english_male_recordings_vocalist_l1_石倚洁.wav",
178
+ ],
179
+ ],
180
+ inputs=[
181
+ source_audio_input,
182
+ config_target_singer,
183
+ config_keyshift_choice,
184
+ config_diff_infer_steps,
185
+ demo_outputs,
186
+ ],
187
+ )
188
+
189
 
190
  if __name__ == "__main__":
191
  demo.launch()
examples/chinese_female_recordings.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f710270fe3857211c55aaa1f813e310e68855ff9eabaf5b249537a2d4277cc30
3
+ size 448928
examples/chinese_male_seperated.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:009077a677b23bff3154078930e6c624d218eb0acbe78990bec88f6bf5a6e5de
3
+ size 480044
examples/english_female_seperated.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:87e75863ffb4e597467a825d019217e73d64dce1e9635de60a32559ffcb97cf4
3
+ size 1509584
examples/english_male_recordings.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e14ebf1c554ebb25e5169b4bcda36a685538e94c531f303339bad91ff93a2288
3
+ size 251948
examples/output/.DS_Store ADDED
Binary file (6.15 kB). View file
 
examples/output/chinese_female_recordings_vocalist_l1_JohnMayer.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bf6d6ef89ba2234fbc64c0ee48f81528cf49717a23a919aa8d0767ada2437113
3
+ size 244268
examples/output/chinese_male_seperated_vocalist_l1_TaylorSwift.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0e682abb072246f412133bfa313c6edf863f1d6a6db63022749f74c2c7ef01c7
3
+ size 479788
examples/output/english_female_seperated_vocalist_l1_汪峰.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a03755cfc9aef4d26bda6370d9335625482f22f2c1f3c918dbbec3246213cee2
3
+ size 410668
examples/output/english_male_recordings_vocalist_l1_石倚洁.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e850a0e02f2741185c3d3b642a9c292a3a297cdf262e92333b63adf98af7d450
3
+ size 251948