Spaces:

ArkanDash
/

rvc-genshin-impact

Running on CPU Upgrade

App Files Files Community

ArkanDash commited on May 14, 2023

Commit

db06f79

1 Parent(s): d028cb0

feat(app): add support for model_info.json

Browse files

Files changed (5) hide show

app-full.py +62 -32
app.py +62 -31
config.py +5 -5
requirements-full.txt +0 -1
weights/model_info.json +10 -0

app-full.py CHANGED Viewed

@@ -151,36 +151,65 @@ if __name__ == '__main__':
     models = []
     tts_voice_list = asyncio.get_event_loop().run_until_complete(edge_tts.list_voices())
     voices = [f"{v['ShortName']}-{v['Gender']}" for v in tts_voice_list]
-    folder_path = "weights"
-    for name in os.listdir(folder_path):
-        print("check folder: " + name)
-        if name.startswith("."): break
-        cover_path = glob.glob(f"{folder_path}/{name}/*.png") + glob.glob(f"{folder_path}/{name}/*.jpg")
-        index_path = glob.glob(f"{folder_path}/{name}/*.index")
-        checkpoint_path = glob.glob(f"{folder_path}/{name}/*.pth")
-        title = name
-        if cover_path:
-            cover = cover_path[0]
-        else:
-            cover = ""
-        index = index_path[0]
-        cpt = torch.load(checkpoint_path[0], map_location="cpu")
-        tgt_sr = cpt["config"][-1]
-        cpt["config"][-3] = cpt["weight"]["emb_g.weight"].shape[0]  # n_spk
-        if_f0 = cpt.get("f0", 1)
-        if if_f0 == 1:
-            net_g = SynthesizerTrnMs256NSFsid(*cpt["config"], is_half=config.is_half)
-        else:
-            net_g = SynthesizerTrnMs256NSFsid_nono(*cpt["config"])
-        del net_g.enc_q
-        print(net_g.load_state_dict(cpt["weight"], strict=False))  # 不加这一行清不干净, 真奇葩
-        net_g.eval().to(config.device)
-        if config.is_half:
-            net_g = net_g.half()
-        else:
-            net_g = net_g.float()
-        vc = VC(tgt_sr, config)
-        models.append((name, title, cover, create_vc_fn(tgt_sr, net_g, vc, if_f0, index)))
     with gr.Blocks() as app:
         gr.Markdown(
             "# <center> RVC Models (Latest Update)\n"
@@ -190,12 +219,13 @@ if __name__ == '__main__':
             "[![Original Repo](https://badgen.net/badge/icon/github?icon=github&label=Original%20Repo)](https://github.com/RVC-Project/Retrieval-based-Voice-Conversion-WebUI)"
         )
         with gr.Tabs():
-            for (name, title, cover, vc_fn) in models:
                 with gr.TabItem(name):
                     with gr.Row():
                         gr.Markdown(
                             '<div align="center">'
                             f'<div>{title}</div>\n'+
                             (f'<img style="width:auto;height:300px;" src="file/{cover}">' if cover else "")+
                             '</div>'
                         )
@@ -251,4 +281,4 @@ if __name__ == '__main__':
                 if config.files:
                     vc_convert.click(cut_vocal_and_inst, vc_youtube, [vc_vocal_preview, vc_inst_preview, vc_audio_preview, vc_input])
                     vc_combine.click(combine_vocal_and_inst, [vc_output2, vc_volume], vc_outputCombine)
-        app.queue(concurrency_count=1, max_size=20, api_open=config.api).launch(share=config.share)

     models = []
     tts_voice_list = asyncio.get_event_loop().run_until_complete(edge_tts.list_voices())
     voices = [f"{v['ShortName']}-{v['Gender']}" for v in tts_voice_list]
+    if config.json:
+        with open("weights/model_info.json", "r", encoding="utf-8") as f:
+            models_info = json.load(f)
+        for name, info in models_info.items():
+            if not info['enable']:
+                continue
+            title = info['title']
+            author = info.get("author", None)
+            cover = f"weights/{name}/{info['cover']}"
+            index = f"weights/{name}/{info['feature_retrieval_library']}"
+            cpt = torch.load(f"weights/{name}/{name}.pth", map_location="cpu")
+            tgt_sr = cpt["config"][-1]
+            cpt["config"][-3] = cpt["weight"]["emb_g.weight"].shape[0]  # n_spk
+            if_f0 = cpt.get("f0", 1)
+            if if_f0 == 1:
+                net_g = SynthesizerTrnMs256NSFsid(*cpt["config"], is_half=config.is_half)
+            else:
+                net_g = SynthesizerTrnMs256NSFsid_nono(*cpt["config"])
+            del net_g.enc_q
+            print(net_g.load_state_dict(cpt["weight"], strict=False))  # 不加这一行清不干净, 真奇葩
+            net_g.eval().to(config.device)
+            if config.is_half:
+                net_g = net_g.half()
+            else:
+                net_g = net_g.float()
+            vc = VC(tgt_sr, config)
+            models.append((name, title, author, cover, create_vc_fn(tgt_sr, net_g, vc, if_f0, index)))
+    else:
+        folder_path = "weights"
+        for name in os.listdir(folder_path):
+            print("check folder: " + name)
+            if name.startswith("."): break
+            cover_path = glob.glob(f"{folder_path}/{name}/*.png") + glob.glob(f"{folder_path}/{name}/*.jpg")
+            index_path = glob.glob(f"{folder_path}/{name}/*.index")
+            checkpoint_path = glob.glob(f"{folder_path}/{name}/*.pth")
+            title = name
+            author = ""
+            if cover_path:
+                cover = cover_path[0]
+            else:
+                cover = ""
+            index = index_path[0]
+            cpt = torch.load(checkpoint_path[0], map_location="cpu")
+            tgt_sr = cpt["config"][-1]
+            cpt["config"][-3] = cpt["weight"]["emb_g.weight"].shape[0]  # n_spk
+            if_f0 = cpt.get("f0", 1)
+            if if_f0 == 1:
+                net_g = SynthesizerTrnMs256NSFsid(*cpt["config"], is_half=config.is_half)
+            else:
+                net_g = SynthesizerTrnMs256NSFsid_nono(*cpt["config"])
+            del net_g.enc_q
+            print(net_g.load_state_dict(cpt["weight"], strict=False))  # 不加这一行清不干净, 真奇葩
+            net_g.eval().to(config.device)
+            if config.is_half:
+                net_g = net_g.half()
+            else:
+                net_g = net_g.float()
+            vc = VC(tgt_sr, config)
+            models.append((name, title, author, cover, create_vc_fn(tgt_sr, net_g, vc, if_f0, index)))
     with gr.Blocks() as app:
         gr.Markdown(
             "# <center> RVC Models (Latest Update)\n"
             "[![Original Repo](https://badgen.net/badge/icon/github?icon=github&label=Original%20Repo)](https://github.com/RVC-Project/Retrieval-based-Voice-Conversion-WebUI)"
         )
         with gr.Tabs():
+            for (name, title, author, cover, vc_fn) in models:
                 with gr.TabItem(name):
                     with gr.Row():
                         gr.Markdown(
                             '<div align="center">'
                             f'<div>{title}</div>\n'+
+                            (f'<div>Model author: {author}</div>' if author else "")+
                             (f'<img style="width:auto;height:300px;" src="file/{cover}">' if cover else "")+
                             '</div>'
                         )
                 if config.files:
                     vc_convert.click(cut_vocal_and_inst, vc_youtube, [vc_vocal_preview, vc_inst_preview, vc_audio_preview, vc_input])
                     vc_combine.click(combine_vocal_and_inst, [vc_output2, vc_volume], vc_outputCombine)
+        app.queue(concurrency_count=1, max_size=20, api_open=config.api).launch(share=config.colab)

app.py CHANGED Viewed

@@ -102,41 +102,71 @@ if __name__ == '__main__':
     models = []
     tts_voice_list = asyncio.get_event_loop().run_until_complete(edge_tts.list_voices())
     voices = [f"{v['ShortName']}-{v['Gender']}" for v in tts_voice_list]
-    folder_path = "weights"
-    for name in os.listdir(folder_path):
-        print("check folder: " + name)
-        if name.startswith("."): break
-        cover_path = glob.glob(f"{folder_path}/{name}/*.png") + glob.glob(f"{folder_path}/{name}/*.jpg")
-        index_path = glob.glob(f"{folder_path}/{name}/*.index")
-        checkpoint_path = glob.glob(f"{folder_path}/{name}/*.pth")
-        title = name
-        if cover_path:
-            cover = cover_path[0]
-        else:
-            cover = ""
-        index = index_path[0]
-        cpt = torch.load(checkpoint_path[0], map_location="cpu")
-        tgt_sr = cpt["config"][-1]
-        cpt["config"][-3] = cpt["weight"]["emb_g.weight"].shape[0]  # n_spk
-        if_f0 = cpt.get("f0", 1)
-        if if_f0 == 1:
-            net_g = SynthesizerTrnMs256NSFsid(*cpt["config"], is_half=config.is_half)
-        else:
-            net_g = SynthesizerTrnMs256NSFsid_nono(*cpt["config"])
-        del net_g.enc_q
-        print(net_g.load_state_dict(cpt["weight"], strict=False))  # 不加这一行清不干净, 真奇葩
-        net_g.eval().to(config.device)
-        if config.is_half:
-            net_g = net_g.half()
-        else:
-            net_g = net_g.float()
-        vc = VC(tgt_sr, config)
-        models.append((name, title, cover, create_vc_fn(tgt_sr, net_g, vc, if_f0, index)))
     with gr.Blocks() as app:
         gr.Markdown(
             "# <center> RVC Models (Latest Update)\n"
             "## <center> The input audio should be clean and pure voice without background music.\n"
             "### <center> Recommended to use google colab for more features. \n"
             "[![image](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/110kiMZTdP6Ri1lY9-NbQf17GVPPhHyeT?usp=sharing)\n\n"
             "[![Original Repo](https://badgen.net/badge/icon/github?icon=github&label=Original%20Repo)](https://github.com/RVC-Project/Retrieval-based-Voice-Conversion-WebUI)"
         )
@@ -147,6 +177,7 @@ if __name__ == '__main__':
                         gr.Markdown(
                             '<div align="center">'
                             f'<div>{title}</div>\n'+
                             (f'<img style="width:auto;height:300px;" src="file/{cover}">' if cover else "")+
                             '</div>'
                         )
@@ -176,4 +207,4 @@ if __name__ == '__main__':
                             vc_output2 = gr.Audio(label="Output Audio")
                 vc_submit.click(vc_fn, [vc_input, vc_transpose, vc_f0method, vc_index_ratio, tts_mode, tts_text, tts_voice], [vc_output1, vc_output2])
                 tts_mode.change(change_to_tts_mode, [tts_mode], [vc_input, tts_text, tts_voice])
-        app.queue(concurrency_count=1, max_size=20, api_open=config.api).launch(share=config.share)

     models = []
     tts_voice_list = asyncio.get_event_loop().run_until_complete(edge_tts.list_voices())
     voices = [f"{v['ShortName']}-{v['Gender']}" for v in tts_voice_list]
+    if config.json or limitation:
+        with open("weights/model_info.json", "r", encoding="utf-8") as f:
+            models_info = json.load(f)
+        for name, info in models_info.items():
+            if not info['enable']:
+                continue
+            title = info['title']
+            author = info.get("author", None)
+            cover = f"weights/{name}/{info['cover']}"
+            index = f"weights/{name}/{info['feature_retrieval_library']}"
+            cpt = torch.load(f"weights/{name}/{name}.pth", map_location="cpu")
+            tgt_sr = cpt["config"][-1]
+            cpt["config"][-3] = cpt["weight"]["emb_g.weight"].shape[0]  # n_spk
+            if_f0 = cpt.get("f0", 1)
+            if if_f0 == 1:
+                net_g = SynthesizerTrnMs256NSFsid(*cpt["config"], is_half=config.is_half)
+            else:
+                net_g = SynthesizerTrnMs256NSFsid_nono(*cpt["config"])
+            del net_g.enc_q
+            print(net_g.load_state_dict(cpt["weight"], strict=False))  # 不加这一行清不干净, 真奇葩
+            net_g.eval().to(config.device)
+            if config.is_half:
+                net_g = net_g.half()
+            else:
+                net_g = net_g.float()
+            vc = VC(tgt_sr, config)
+            models.append((name, title, author, cover, create_vc_fn(tgt_sr, net_g, vc, if_f0, index)))
+    else:
+        folder_path = "weights"
+        for name in os.listdir(folder_path):
+            print("check folder: " + name)
+            if name.startswith("."): break
+            cover_path = glob.glob(f"{folder_path}/{name}/*.png") + glob.glob(f"{folder_path}/{name}/*.jpg")
+            index_path = glob.glob(f"{folder_path}/{name}/*.index")
+            checkpoint_path = glob.glob(f"{folder_path}/{name}/*.pth")
+            title = name
+            author = ""
+            if cover_path:
+                cover = cover_path[0]
+            else:
+                cover = ""
+            index = index_path[0]
+            cpt = torch.load(checkpoint_path[0], map_location="cpu")
+            tgt_sr = cpt["config"][-1]
+            cpt["config"][-3] = cpt["weight"]["emb_g.weight"].shape[0]  # n_spk
+            if_f0 = cpt.get("f0", 1)
+            if if_f0 == 1:
+                net_g = SynthesizerTrnMs256NSFsid(*cpt["config"], is_half=config.is_half)
+            else:
+                net_g = SynthesizerTrnMs256NSFsid_nono(*cpt["config"])
+            del net_g.enc_q
+            print(net_g.load_state_dict(cpt["weight"], strict=False))  # 不加这一行清不干净, 真奇葩
+            net_g.eval().to(config.device)
+            if config.is_half:
+                net_g = net_g.half()
+            else:
+                net_g = net_g.float()
+            vc = VC(tgt_sr, config)
+            models.append((name, title, author, cover, create_vc_fn(tgt_sr, net_g, vc, if_f0, index)))
     with gr.Blocks() as app:
         gr.Markdown(
             "# <center> RVC Models (Latest Update)\n"
             "## <center> The input audio should be clean and pure voice without background music.\n"
             "### <center> Recommended to use google colab for more features. \n"
+            "##### <center> Total_fea.npy is depricated.\nPlease regenerate your model to latest RVC.\n"
             "[![image](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/110kiMZTdP6Ri1lY9-NbQf17GVPPhHyeT?usp=sharing)\n\n"
             "[![Original Repo](https://badgen.net/badge/icon/github?icon=github&label=Original%20Repo)](https://github.com/RVC-Project/Retrieval-based-Voice-Conversion-WebUI)"
         )
                         gr.Markdown(
                             '<div align="center">'
                             f'<div>{title}</div>\n'+
+                            (f'<div>Model author: {author}</div>' if author else "")+
                             (f'<img style="width:auto;height:300px;" src="file/{cover}">' if cover else "")+
                             '</div>'
                         )
                             vc_output2 = gr.Audio(label="Output Audio")
                 vc_submit.click(vc_fn, [vc_input, vc_transpose, vc_f0method, vc_index_ratio, tts_mode, tts_text, tts_voice], [vc_output1, vc_output2])
                 tts_mode.change(change_to_tts_mode, [tts_mode], [vc_input, tts_text, tts_voice])
+        app.queue(concurrency_count=1, max_size=20, api_open=config.api).launch(share=config.colab)

config.py CHANGED Viewed

@@ -13,12 +13,12 @@ class Config:
         (
             self.python_cmd,
             self.listen_port,
-            self.iscolab,
             self.noparallel,
             self.noautoopen,
             self.api,
-            self.share,
-            self.files
         ) = self.arg_parse()
         self.x_pad, self.x_query, self.x_center, self.x_max = self.device_config()
@@ -39,8 +39,8 @@ class Config:
             help="Do not open in browser automatically",
         )
         parser.add_argument('--api', action="store_true", default=False)
-        parser.add_argument("--share", action="store_true", default=False, help="share gradio app")
         parser.add_argument("--files", action="store_true", default=False, help="load audio from path")
         cmd_opts = parser.parse_args()
         cmd_opts.port = cmd_opts.port if 0 <= cmd_opts.port <= 65535 else 7865
@@ -52,8 +52,8 @@ class Config:
             cmd_opts.noparallel,
             cmd_opts.noautoopen,
             cmd_opts.api,
-            cmd_opts.share,
             cmd_opts.files,
         )
     def device_config(self) -> tuple:

         (
             self.python_cmd,
             self.listen_port,
+            self.colab,
             self.noparallel,
             self.noautoopen,
             self.api,
+            self.files,
+            self.json
         ) = self.arg_parse()
         self.x_pad, self.x_query, self.x_center, self.x_max = self.device_config()
             help="Do not open in browser automatically",
         )
         parser.add_argument('--api', action="store_true", default=False)
         parser.add_argument("--files", action="store_true", default=False, help="load audio from path")
+        parser.add_argument("--json", action="store_true", default=False, help="use model_info.json")
         cmd_opts = parser.parse_args()
         cmd_opts.port = cmd_opts.port if 0 <= cmd_opts.port <= 65535 else 7865
             cmd_opts.noparallel,
             cmd_opts.noautoopen,
             cmd_opts.api,
             cmd_opts.files,
+            cmd_opts.json
         )
     def device_config(self) -> tuple:

requirements-full.txt CHANGED Viewed

@@ -44,6 +44,5 @@ audioread
 uvicorn>=0.21.1
 colorama>=0.4.6
 edge-tts
-demucs
 yt_dlp
 ffmpeg

 uvicorn>=0.21.1
 colorama>=0.4.6
 edge-tts
 yt_dlp
 ffmpeg

weights/model_info.json ADDED Viewed

	@@ -0,0 +1,10 @@

+{
+	"nilou-jp": {
+		"enable": true,
+		"name": "nilou-jp",
+        "title": "Genshin Impact - Nilou",
+        "cover": "cover.png",
+		"feature_retrieval_library": "added_IVF218_Flat_nprobe_5.index",
+		"author":"ArkanDash"
+    }
+}