Spaces:

candlend
/

vits-hoshimi

Runtime error

App Files Files Community

candlend commited on Nov 20, 2022

Commit

b48b8c0

•

1 Parent(s): 7a669c0

tts_inferencer

Browse files

Files changed (2) hide show

app.py +5 -19
inferencer.py → tts_inferencer.py +30 -18

app.py CHANGED Viewed

@@ -1,27 +1,13 @@
-import os
-import commons
-import utils
-from models import SynthesizerTrn
-from text.symbols import symbols
-from text import text_to_sequence
 import gradio as gr
-from inferencer import Inferencer
 app = gr.Blocks()
 with app:
     with open("header.html", "r") as f:
         gr.HTML(f.read())
     with gr.Tabs():
-        with gr.TabItem("普通声线"):
-            normal_description = """
-            使用星弥Hoshimi录播音频作为数据集训练而成
-            """
-            normal_inferencer = Inferencer("normal", "./configs/hoshimi_base.json", description=normal_description)
-            normal_inferencer.render()
-        with gr.TabItem("营业声线"):
-            formal_description = """
-            使用星弥Hoshimi音声作为数据集训练而成
-            """
-            formal_inferencer = Inferencer("formal", "./configs/hoshimi_base.json", description=formal_description)
-            formal_inferencer.render()
     app.launch()

 import gradio as gr
+from tts_inferencer import TTSInferencer
+tts_inferencer = TTSInferencer("./configs/hoshimi_base.json")
 app = gr.Blocks()
 with app:
     with open("header.html", "r") as f:
         gr.HTML(f.read())
     with gr.Tabs():
+        with gr.TabItem("语音合成"):
+            tts_inferencer.render()
     app.launch()

inferencer.py → tts_inferencer.py RENAMED Viewed

@@ -13,6 +13,12 @@ from text.symbols import symbols
 from text import text_to_sequence
 import gradio as gr
 default_noise_scale = 0.667
 default_noise_scale_w = 0.8
 default_length_scale = 1
@@ -24,21 +30,23 @@ def get_text(text, hps):
     text_norm = torch.LongTensor(text_norm)
     return text_norm
-class Inferencer:
-    def __init__(self, mode, hps_path, description):
         self.mode = mode
-        self.description = description
         self.models = []
-        self.model_dir_path = os.path.join("models", mode)
         for f in os.listdir(self.model_dir_path):
             if (f.startswith("D_")):
                 continue
             if (f.endswith(".pth")):
                 self.models.append(f)
-        self.device = torch.device("cpu")
-        self.hps = utils.get_hparams_from_file(hps_path)
-        model_path = utils.latest_checkpoint_path(self.model_dir_path, "G_*.pth")
-        self.load_model(model_path)
     def infer(self, text, noise_scale=.667, noise_scale_w=0.8, length_scale=1):
         stn_tst = get_text(text, self.hps)
@@ -48,12 +56,17 @@ class Inferencer:
             audio = self.net_g.infer(x_tst, x_tst_lengths, noise_scale=noise_scale, noise_scale_w=noise_scale_w, length_scale=length_scale)[0][0,0].data.float().numpy()
         return (self.hps.data.sampling_rate, audio)
     def change_model(self, model_file_name):
         self.load_model(os.path.join(self.model_dir_path, model_file_name))
-        return "载入模型：" + model_file_name
     def render(self):
-        choice_model = gr.Dropdown(choices=self.models, label=f"模型迭代版本选择（{self.description}）", value=os.path.basename(self.pth_path))
         with gr.Row():
             advanced = gr.Checkbox(label="显示高级设置（效果不可控）")
             default = gr.Button("恢复默认设置").style(full_width=False)
@@ -74,15 +87,14 @@ class Inferencer:
                 <div>仅供学习交流，不可用于任何商业和非法用途，否则后果自负</div>
             </div>
         ''')
-        for component in [noise_scale, noise_scale_w]:
-            advanced.change(fn=lambda visible: gr.update(visible=visible), inputs=advanced, outputs=component)
-        for component, default_value in [
-            (noise_scale, default_noise_scale),
-            (noise_scale_w, default_noise_scale_w),
-            (length_scale, default_length_scale)]:
-            default.click(fn=lambda visible: gr.update(value=default_value), inputs=advanced, outputs=component)
         choice_model.change(self.change_model, inputs=[choice_model], outputs=[tts_model])
-        tts_submit.click(self.infer, [tts_input, noise_scale, noise_scale_w, length_scale], [tts_output], api_name=f"{self.mode}_infer")
     def load_model(self, model_path):

 from text import text_to_sequence
 import gradio as gr
+mode_dict = {
+    "普通声线": "normal",
+    "营业声线": "formal"
+}
+default_mode = "普通声线"
 default_noise_scale = 0.667
 default_noise_scale_w = 0.8
 default_length_scale = 1
     text_norm = torch.LongTensor(text_norm)
     return text_norm
+class TTSInferencer:
+    def __init__(self, hps_path, device="cpu"):
+        self.device = torch.device(device)
+        self.hps = utils.get_hparams_from_file(hps_path)
+        self.select_mode(default_mode)
+        self.load_model(self.latest_model_path)
+    def select_mode(self, mode):
         self.mode = mode
+        self.model_dir_path = os.path.join("models", mode_dict[mode])
         self.models = []
         for f in os.listdir(self.model_dir_path):
             if (f.startswith("D_")):
                 continue
             if (f.endswith(".pth")):
                 self.models.append(f)
+        self.latest_model_path = utils.latest_checkpoint_path(self.model_dir_path, "G_*.pth")
     def infer(self, text, noise_scale=.667, noise_scale_w=0.8, length_scale=1):
         stn_tst = get_text(text, self.hps)
             audio = self.net_g.infer(x_tst, x_tst_lengths, noise_scale=noise_scale, noise_scale_w=noise_scale_w, length_scale=length_scale)[0][0,0].data.float().numpy()
         return (self.hps.data.sampling_rate, audio)
+    def change_mode(self, mode):
+        self.select_mode(mode)
+        return gr.update(choices=self.models, value=os.path.basename(self.latest_model_path))
     def change_model(self, model_file_name):
         self.load_model(os.path.join(self.model_dir_path, model_file_name))
+        return f"载入模型：{model_file_name}（{self.mode}）"
     def render(self):
+        choice_mode = gr.Radio(choices=["普通声线", "营业声线"], label="声线选择", value=default_mode)
+        choice_model = gr.Dropdown(choices=self.models, label=f"模型迭代版本选择", value=os.path.basename(self.pth_path))
         with gr.Row():
             advanced = gr.Checkbox(label="显示高级设置（效果不可控）")
             default = gr.Button("恢复默认设置").style(full_width=False)
                 <div>仅供学习交流，不可用于任何商业和非法用途，否则后果自负</div>
             </div>
         ''')
+        advanced.change(fn=lambda visible: gr.update(visible=visible), inputs=advanced, outputs=noise_scale)
+        advanced.change(fn=lambda visible: gr.update(visible=visible), inputs=advanced, outputs=noise_scale_w)
+        default.click(fn=lambda visible: gr.update(value=default_noise_scale), inputs=advanced, outputs=noise_scale)
+        default.click(fn=lambda visible: gr.update(value=default_noise_scale_w), inputs=advanced, outputs=noise_scale_w)
+        default.click(fn=lambda visible: gr.update(value=default_length_scale), inputs=advanced, outputs=length_scale)
+        choice_mode.change(self.change_mode, inputs=choice_mode, outputs=choice_model)
         choice_model.change(self.change_model, inputs=[choice_model], outputs=[tts_model])
+        tts_submit.click(self.infer, [tts_input, noise_scale, noise_scale_w, length_scale], [tts_output], api_name=f"infer")
     def load_model(self, model_path):