Voice-Cloning-for-Bilibili

Running

App Files Files Community

kevinwang676 commited on Jul 5, 2023

Commit

9ded820

•

1 Parent(s): b266a2d

Update app.py

Browse files

Files changed (1) hide show

app.py +29 -32

app.py CHANGED Viewed

@@ -205,18 +205,15 @@ def predict_song_from_yt(
     full_song = inst_wav + np.expand_dims(cloned_vox, 1)
     return (model.target_sample, full_song), (model.target_sample, cloned_vox)
-SPACE_ID = "nateraw/voice-cloning"
-description = f"""
-<center><a class="duplicate-button" style="display:inline-block" target="_blank" href="https://huggingface.co/spaces/{SPACE_ID}?duplicate=true"><img src="https://img.shields.io/badge/-Duplicate%20Space-blue?labelColor=white&style=flat&logo=data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAABAAAAAQCAYAAAAf8/9hAAAAAXNSR0IArs4c6QAAAP5JREFUOE+lk7FqAkEURY+ltunEgFXS2sZGIbXfEPdLlnxJyDdYB62sbbUKpLbVNhyYFzbrrA74YJlh9r079973psed0cvUD4A+4HoCjsA85X0Dfn/RBLBgBDxnQPfAEJgBY+A9gALA4tcbamSzS4xq4FOQAJgCDwV2CPKV8tZAJcAjMMkUe1vX+U+SMhfAJEHasQIWmXNN3abzDwHUrgcRGmYcgKe0bxrblHEB4E/pndMazNpSZGcsZdBlYJcEL9Afo75molJyM2FxmPgmgPqlWNLGfwZGG6UiyEvLzHYDmoPkDDiNm9JR9uboiONcBXrpY1qmgs21x1QwyZcpvxt9NS09PlsPAAAAAElFTkSuQmCC&logoWidth=14" alt="Duplicate Space"></a></center>
-#### This app uses models trained with [so-vits-svc-fork](https://github.com/voicepaw/so-vits-svc-fork) to clone a voice. Model currently being used is https://hf.co/{repo_id}. To change the model being served, duplicate the space and update the `repo_id`/other settings in `app.py`.
-#### Train Your Own: [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/nateraw/voice-cloning/blob/main/training_so_vits_svc_fork.ipynb)
 """.strip()
 article = """
-<p style='text-align: center'> 注意❗：请不要生成会对个人以及组织造成侵害的内容，此程序仅供科研、学习及个人娱乐使用。用户生成内容与程序开发者无关，请自觉合法合规使用，违反者一切后果自负。
 </p>
 """.strip()
@@ -224,16 +221,16 @@ article = """
 interface_mic = gr.Interface(
     predict,
     inputs=[
-        gr.Dropdown(speakers, value=speakers[0], label="Target Speaker"),
-        gr.Audio(type="filepath", source="microphone", label="Source Audio"),
-        gr.Slider(-12, 12, value=0, step=1, label="Transpose (Semitones)"),
-        gr.Checkbox(False, label="Auto Predict F0"),
-        gr.Slider(0.0, 1.0, value=default_cluster_infer_ratio, step=0.1, label="cluster infer ratio"),
-        gr.Slider(0.0, 1.0, value=0.4, step=0.1, label="noise scale"),
         gr.Dropdown(
             choices=["crepe", "crepe-tiny", "parselmouth", "dio", "harvest"],
             value=default_f0_method,
-            label="f0 method",
         ),
     ],
     outputs="audio",
@@ -244,16 +241,16 @@ interface_mic = gr.Interface(
 interface_file = gr.Interface(
     predict,
     inputs=[
-        gr.Dropdown(speakers, value=speakers[0], label="Target Speaker"),
-        gr.Audio(type="filepath", source="upload", label="Source Audio"),
-        gr.Slider(-12, 12, value=0, step=1, label="Transpose (Semitones)"),
-        gr.Checkbox(False, label="Auto Predict F0"),
-        gr.Slider(0.0, 1.0, value=default_cluster_infer_ratio, step=0.1, label="cluster infer ratio"),
-        gr.Slider(0.0, 1.0, value=0.4, step=0.1, label="noise scale"),
         gr.Dropdown(
             choices=["crepe", "crepe-tiny", "parselmouth", "dio", "harvest"],
             value=default_f0_method,
-            label="f0 method",
         ),
     ],
     outputs="audio",
@@ -265,23 +262,23 @@ interface_yt = gr.Interface(
     predict_song_from_yt,
     inputs=[
         gr.Textbox(
-            label="Bilibili网址", info="请填写含有您喜欢的声音的Bilibili网址"
         ),
-        gr.Number(value=0, label="Start Time (seconds)"),
-        gr.Number(value=15, label="End Time (seconds)"),
-        gr.Dropdown(speakers, value=speakers[0], label="Target Speaker"),
-        gr.Slider(-12, 12, value=0, step=1, label="Transpose (Semitones)"),
-        gr.Checkbox(False, label="Auto Predict F0"),
-        gr.Slider(0.0, 1.0, value=default_cluster_infer_ratio, step=0.1, label="cluster infer ratio"),
-        gr.Slider(0.0, 1.0, value=0.4, step=0.1, label="noise scale"),
         gr.Dropdown(
             choices=["crepe", "crepe-tiny", "parselmouth", "dio", "harvest"],
             value=default_f0_method,
-            label="f0 method",
         ),
     ],
-    outputs=["audio", "audio"],
-    title="🥳🎶🎡 - AI歌手：可从网址直接上传素材，且无需分离背景音",
     description=description,
     article=article,
     examples=[
@@ -290,7 +287,7 @@ interface_yt = gr.Interface(
 )
 interface = gr.TabbedInterface(
     [interface_mic, interface_file, interface_yt],
-    ["从麦克风上传", "从文件上传", "从Bilibili上传"],
 )

     full_song = inst_wav + np.expand_dims(cloned_vox, 1)
     return (model.target_sample, full_song), (model.target_sample, cloned_vox)
+description = f"""
+### <center>🌊 滔滔AI合作音乐人：[一清清清](https://space.bilibili.com/22960772?spm_id_from=333.337.0.0)</center>
 """.strip()
 article = """
+<p style='text-align: center'> 注意❗：请不要生成会对个人以及组织造成侵害的内容，此程序仅供科研、学习及个人娱乐使用。
 </p>
 """.strip()
 interface_mic = gr.Interface(
     predict,
     inputs=[
+        gr.Dropdown(speakers, value=speakers[0], label="AI歌手-一清清清"),
+        gr.Audio(type="filepath", source="microphone", label="请用麦克风上传您想转换的歌曲"),
+        gr.Slider(-12, 12, value=0, step=1, label="变调 (默认为0；+2为升高两个key)"),
+        gr.Checkbox(False, label="是否开启自动f0预测", info="配合聚类模型f0预测效果更好，仅限转换语音时使用"),
+        gr.Slider(0.0, 1.0, value=default_cluster_infer_ratio, step=0.1, label="聚类模型混合比例", info="0-1之间，0即不启用聚类。使用聚类模型能提升音色相似度，但会导致咬字下降 (如果使用，建议0.5左右)"),
+        gr.Slider(0.0, 1.0, value=0.4, step=0.1, label="noise scale (建议保持不变)"),
         gr.Dropdown(
             choices=["crepe", "crepe-tiny", "parselmouth", "dio", "harvest"],
             value=default_f0_method,
+            label="模型推理方法 (crepe推理效果最好)",
         ),
     ],
     outputs="audio",
 interface_file = gr.Interface(
     predict,
     inputs=[
+        gr.Dropdown(speakers, value=speakers[0], label="AI歌手-一清清清"),
+        gr.Audio(type="filepath", source="upload", label="请上传您想转换的歌曲 (仅人声部分)"),
+        gr.Slider(-12, 12, value=0, step=1, label="变调 (默认为0；+2为升高两个key)"),
+        gr.Checkbox(False, label="是否开启自动f0预测", info="配合聚类模型f0预测效果更好，仅限转换语音时使用"),
+        gr.Slider(0.0, 1.0, value=default_cluster_infer_ratio, step=0.1, label="聚类模型混合比例", info="0-1之间，0即不启用聚类。使用聚类模型能提升音色相似度，但会导致咬字下降 (如果使用，建议0.5左右)"),
+        gr.Slider(0.0, 1.0, value=0.4, step=0.1, label="noise scale (建议保持不变)"),
         gr.Dropdown(
             choices=["crepe", "crepe-tiny", "parselmouth", "dio", "harvest"],
             value=default_f0_method,
+            label="模型推理方法 (crepe推理效果最好)",
         ),
     ],
     outputs="audio",
     predict_song_from_yt,
     inputs=[
         gr.Textbox(
+            label="Bilibili网址", info="请填写含有您喜欢歌曲的Bilibili网址"
         ),
+        gr.Number(value=0, label="起始时间 (秒)"),
+        gr.Number(value=15, label="结束时间 (秒)"),
+        gr.Dropdown(speakers, value=speakers[0], label="AI歌手-一清清清"),
+        gr.Slider(-12, 12, value=0, step=1, label="变调 (默认为0；+2为升高两个key)"),
+        gr.Checkbox(False, label="是否开启自动f0预测", info="配合聚类模型f0预测效果更好，仅限转换语音时使用"),
+        gr.Slider(0.0, 1.0, value=default_cluster_infer_ratio, step=0.1, label="聚类模型混合比例", info="0-1之间，0即不启用聚类。使用聚类模型能提升音色相似度，但会导致咬字下降 (如果使用，建议0.5左右)"),
+        gr.Slider(0.0, 1.0, value=0.4, step=0.1, label="noise scale (建议保持不变)"),
         gr.Dropdown(
             choices=["crepe", "crepe-tiny", "parselmouth", "dio", "harvest"],
             value=default_f0_method,
+            label="模型推理方法 (crepe推理效果最好)",
         ),
     ],
+    outputs=[gr.Audio(label="AI歌手+伴奏"), gr.Audio(label="AI歌手人声部分")],
+    title="🥳🎶🌊 - 滔滔AI+音乐：可从B站直接上传素材，无需分离背景音",
     description=description,
     article=article,
     examples=[
 )
 interface = gr.TabbedInterface(
     [interface_mic, interface_file, interface_yt],
+    ["🎙️ - 从麦克风上传", "🎵 - 从文件上传", "📺 - 从B站视频上传（推荐）"],
 )