kevinwang676 commited on
Commit
9ded820
1 Parent(s): b266a2d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +29 -32
app.py CHANGED
@@ -205,18 +205,15 @@ def predict_song_from_yt(
205
  full_song = inst_wav + np.expand_dims(cloned_vox, 1)
206
  return (model.target_sample, full_song), (model.target_sample, cloned_vox)
207
 
208
- SPACE_ID = "nateraw/voice-cloning"
209
- description = f"""
210
 
211
- <center><a class="duplicate-button" style="display:inline-block" target="_blank" href="https://huggingface.co/spaces/{SPACE_ID}?duplicate=true"><img src="https://img.shields.io/badge/-Duplicate%20Space-blue?labelColor=white&style=flat&logo=&logoWidth=14" alt="Duplicate Space"></a></center>
212
 
213
- #### This app uses models trained with [so-vits-svc-fork](https://github.com/voicepaw/so-vits-svc-fork) to clone a voice. Model currently being used is https://hf.co/{repo_id}. To change the model being served, duplicate the space and update the `repo_id`/other settings in `app.py`.
214
 
215
- #### Train Your Own: [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/nateraw/voice-cloning/blob/main/training_so_vits_svc_fork.ipynb)
216
  """.strip()
217
 
218
  article = """
219
- <p style='text-align: center'> 注意❗:请不要生成会对个人以及组织造成侵害的内容,此程序仅供科研、学习及个人娱乐使用。用户生成内容与程序开发者无关,请自觉合法合规使用,违反者一切后果自负。
220
  </p>
221
  """.strip()
222
 
@@ -224,16 +221,16 @@ article = """
224
  interface_mic = gr.Interface(
225
  predict,
226
  inputs=[
227
- gr.Dropdown(speakers, value=speakers[0], label="Target Speaker"),
228
- gr.Audio(type="filepath", source="microphone", label="Source Audio"),
229
- gr.Slider(-12, 12, value=0, step=1, label="Transpose (Semitones)"),
230
- gr.Checkbox(False, label="Auto Predict F0"),
231
- gr.Slider(0.0, 1.0, value=default_cluster_infer_ratio, step=0.1, label="cluster infer ratio"),
232
- gr.Slider(0.0, 1.0, value=0.4, step=0.1, label="noise scale"),
233
  gr.Dropdown(
234
  choices=["crepe", "crepe-tiny", "parselmouth", "dio", "harvest"],
235
  value=default_f0_method,
236
- label="f0 method",
237
  ),
238
  ],
239
  outputs="audio",
@@ -244,16 +241,16 @@ interface_mic = gr.Interface(
244
  interface_file = gr.Interface(
245
  predict,
246
  inputs=[
247
- gr.Dropdown(speakers, value=speakers[0], label="Target Speaker"),
248
- gr.Audio(type="filepath", source="upload", label="Source Audio"),
249
- gr.Slider(-12, 12, value=0, step=1, label="Transpose (Semitones)"),
250
- gr.Checkbox(False, label="Auto Predict F0"),
251
- gr.Slider(0.0, 1.0, value=default_cluster_infer_ratio, step=0.1, label="cluster infer ratio"),
252
- gr.Slider(0.0, 1.0, value=0.4, step=0.1, label="noise scale"),
253
  gr.Dropdown(
254
  choices=["crepe", "crepe-tiny", "parselmouth", "dio", "harvest"],
255
  value=default_f0_method,
256
- label="f0 method",
257
  ),
258
  ],
259
  outputs="audio",
@@ -265,23 +262,23 @@ interface_yt = gr.Interface(
265
  predict_song_from_yt,
266
  inputs=[
267
  gr.Textbox(
268
- label="Bilibili网址", info="请填写含有您喜欢的声音的Bilibili网址"
269
  ),
270
- gr.Number(value=0, label="Start Time (seconds)"),
271
- gr.Number(value=15, label="End Time (seconds)"),
272
- gr.Dropdown(speakers, value=speakers[0], label="Target Speaker"),
273
- gr.Slider(-12, 12, value=0, step=1, label="Transpose (Semitones)"),
274
- gr.Checkbox(False, label="Auto Predict F0"),
275
- gr.Slider(0.0, 1.0, value=default_cluster_infer_ratio, step=0.1, label="cluster infer ratio"),
276
- gr.Slider(0.0, 1.0, value=0.4, step=0.1, label="noise scale"),
277
  gr.Dropdown(
278
  choices=["crepe", "crepe-tiny", "parselmouth", "dio", "harvest"],
279
  value=default_f0_method,
280
- label="f0 method",
281
  ),
282
  ],
283
- outputs=["audio", "audio"],
284
- title="🥳🎶🎡 - AI歌手:可从网址直接上传素材,且无需分离背景音",
285
  description=description,
286
  article=article,
287
  examples=[
@@ -290,7 +287,7 @@ interface_yt = gr.Interface(
290
  )
291
  interface = gr.TabbedInterface(
292
  [interface_mic, interface_file, interface_yt],
293
- ["从麦克风上传", "从文件上传", "从Bilibili上传"],
294
  )
295
 
296
 
 
205
  full_song = inst_wav + np.expand_dims(cloned_vox, 1)
206
  return (model.target_sample, full_song), (model.target_sample, cloned_vox)
207
 
 
 
208
 
209
+ description = f"""
210
 
211
+ ### <center>🌊 滔滔AI合作音乐人:[一清清清](https://space.bilibili.com/22960772?spm_id_from=333.337.0.0)</center>
212
 
 
213
  """.strip()
214
 
215
  article = """
216
+ <p style='text-align: center'> 注意❗:请不要生成会对个人以及组织造成侵害的内容,此程序仅供科研、学习及个人娱乐使用。
217
  </p>
218
  """.strip()
219
 
 
221
  interface_mic = gr.Interface(
222
  predict,
223
  inputs=[
224
+ gr.Dropdown(speakers, value=speakers[0], label="AI歌手-一清清清"),
225
+ gr.Audio(type="filepath", source="microphone", label="请用麦克风上传您想转换的歌曲"),
226
+ gr.Slider(-12, 12, value=0, step=1, label="变调 (默认为0;+2为升高两个key)"),
227
+ gr.Checkbox(False, label="是否开启自动f0预测", info="配合聚类模型f0预测效果更好,仅限转换语音时使用"),
228
+ gr.Slider(0.0, 1.0, value=default_cluster_infer_ratio, step=0.1, label="聚类模型混合比例", info="0-1之间,0即不启用聚类。使用聚类模型能提升音色相似度,但会导致咬字下降 (如果使用,建议0.5左右)"),
229
+ gr.Slider(0.0, 1.0, value=0.4, step=0.1, label="noise scale (建议保持不变)"),
230
  gr.Dropdown(
231
  choices=["crepe", "crepe-tiny", "parselmouth", "dio", "harvest"],
232
  value=default_f0_method,
233
+ label="模型推理方法 (crepe推理效果最好)",
234
  ),
235
  ],
236
  outputs="audio",
 
241
  interface_file = gr.Interface(
242
  predict,
243
  inputs=[
244
+ gr.Dropdown(speakers, value=speakers[0], label="AI歌手-一清清清"),
245
+ gr.Audio(type="filepath", source="upload", label="请上传您想转换的歌曲 (仅人声部分)"),
246
+ gr.Slider(-12, 12, value=0, step=1, label="变调 (默认为0;+2为升高两个key)"),
247
+ gr.Checkbox(False, label="是否开启自动f0预测", info="配合聚类模型f0预测效果更好,仅限转换语音时使用"),
248
+ gr.Slider(0.0, 1.0, value=default_cluster_infer_ratio, step=0.1, label="聚类模型混合比例", info="0-1之间,0即不启用聚类。使用聚类模型能提升音色相似度,但会导致咬字下降 (如果使用,建议0.5左右)"),
249
+ gr.Slider(0.0, 1.0, value=0.4, step=0.1, label="noise scale (建议保持不变)"),
250
  gr.Dropdown(
251
  choices=["crepe", "crepe-tiny", "parselmouth", "dio", "harvest"],
252
  value=default_f0_method,
253
+ label="模型推理方法 (crepe推理效果最好)",
254
  ),
255
  ],
256
  outputs="audio",
 
262
  predict_song_from_yt,
263
  inputs=[
264
  gr.Textbox(
265
+ label="Bilibili网址", info="请填写含有您喜欢歌曲的Bilibili网址"
266
  ),
267
+ gr.Number(value=0, label="起始时间 ()"),
268
+ gr.Number(value=15, label="结束时间 ()"),
269
+ gr.Dropdown(speakers, value=speakers[0], label="AI歌手-一清清清"),
270
+ gr.Slider(-12, 12, value=0, step=1, label="变调 (默认为0;+2为升高两个key)"),
271
+ gr.Checkbox(False, label="是否开启自动f0预测", info="配合聚类模型f0预测效果更好,仅限转换语音时使用"),
272
+ gr.Slider(0.0, 1.0, value=default_cluster_infer_ratio, step=0.1, label="聚类模型混合比例", info="0-1之间,0即不启用聚类。使用聚类模型能提升音色相似度,但会导致咬字下降 (如果使用,建议0.5左右)"),
273
+ gr.Slider(0.0, 1.0, value=0.4, step=0.1, label="noise scale (建议保持不变)"),
274
  gr.Dropdown(
275
  choices=["crepe", "crepe-tiny", "parselmouth", "dio", "harvest"],
276
  value=default_f0_method,
277
+ label="模型推理方法 (crepe推理效果最好)",
278
  ),
279
  ],
280
+ outputs=[gr.Audio(label="AI歌手+伴奏"), gr.Audio(label="AI歌手人声部分")],
281
+ title="🥳🎶🌊 - 滔滔AI+音乐:可从B站直接上传素材,无需分离背景音",
282
  description=description,
283
  article=article,
284
  examples=[
 
287
  )
288
  interface = gr.TabbedInterface(
289
  [interface_mic, interface_file, interface_yt],
290
+ ["🎙️ - 从麦克风上传", "🎵 - 从文件上传", "📺 - B站视频上传(推荐)"],
291
  )
292
 
293