Spaces:

yl12053
/

so-vits-4.1-Special-Week

Running

App Files Files Community

yl12053 commited on Jul 23, 2023

Commit

c112a37

•

1 Parent(s): eec8126

COM

Browse files

Files changed (1) hide show

app.py +221 -128

app.py CHANGED Viewed

@@ -39,6 +39,8 @@ debug = False
 sovits_params = {}
 diff_params = {}
 def debug_change():
     global debug
     debug = debug_button.value
@@ -140,6 +142,51 @@ def load_model_func(ckpt_name,cluster_name,config_name,enhance,diff_model_name,d
     output_msg = f"模型被成功加载到了{device_name}上\n{index_or_kmeans}：{clu_load}\n扩散模型：{diff_load}"
     return output_msg, gr.Dropdown.update(choices=spk_list, value=spk_choice), clip
 def get_file_options(directory, extension):
     return [file for file in os.listdir(directory) if file.endswith(extension)]
@@ -662,6 +709,22 @@ gpus="-".join([i[0]for i in gpu_infos])
 sovits_params, diff_params = get_default_settings()
 app = gr.Blocks()
 with app:
     gr.Markdown(value="""
         ### So-VITS-SVC 4.1-Stable
@@ -676,31 +739,40 @@ with app:
         """)
     with gr.Tabs():
-        with gr.TabItem("特别周 (Special Week)"):
-            with gr.Row():
-                choice_ckpt = gr.Dropdown(label="模型选择", choices=ckpt_list, value="no_model")
-                model_branch = gr.Textbox(label="模型编码器", placeholder="请先选择模型", interactive=False)
             with gr.Row():
-                config_choice = gr.Dropdown(label="配置文件", choices=config_list, value="no_config")
-                config_info = gr.Textbox(label="配置文件编码器", placeholder="请选择配置文件")
-            gr.Markdown(value="""**请检查模型和配置文件的编码器是否匹配**""")
-            with gr.Row():
-                diff_choice = gr.Dropdown(label="（可选）选择扩散模型", choices=diff_list, value="no_diff", interactive=True)
-                diff_config_choice = gr.Dropdown(label="扩散模型配置文件", choices=diff_config_list, value="no_diff_config", interactive=True)
-            cluster_choice = gr.Dropdown(label="（可选）选择聚类模型/特征检索模型", choices=cluster_list, value="no_clu")
             with gr.Row():
                 enhance = gr.Checkbox(label="是否使用NSF_HIFIGAN增强，该选项对部分训练集少的模型有一定的音质增强效果，但是对训练好的模型有反面效果，默认关闭", value=False)
-                only_diffusion = gr.Checkbox(label="是否使用全扩散推理，开启后将不使用So-VITS模型，仅使用扩散模型进行完整扩散推理，默认关闭", value=False)
-            using_device = gr.Dropdown(label="推理设备，默认为自动选择", choices=["Auto","cuda","cpu"], value="Auto")
-            refresh = gr.Button("刷新选项")
-            loadckpt = gr.Button("加载模型", variant="primary")
-            with gr.Row():
-                model_message = gr.Textbox(label="Output Message")
-                sid = gr.Dropdown(label="So-VITS说话人", value="speaker0")
-            choice_ckpt.change(get_model_info, [choice_ckpt], [model_branch])
-            config_choice.change(load_json_encoder, [config_choice], [config_info])
-            refresh.click(refresh_options,[],[choice_ckpt,config_choice,cluster_choice,diff_choice,diff_config_choice])
             gr.Markdown(value="""
                 请稍等片刻，模型加载大约需要10秒。后续操作不需要重新加载模型
@@ -755,102 +827,123 @@ with app:
                 vc_tts_submit = gr.Button("文本转语音", variant="primary")
             vc_output1 = gr.Textbox(label="Output Message")
             vc_output2 = gr.Audio(label="Output Audio")
-        loadckpt.click(load_model_func,[choice_ckpt,cluster_choice,config_choice,enhance,diff_choice,diff_config_choice,only_diffusion,model_branch,using_device],[model_message, sid, cl_num])
-        vc_submit.click(vc_fn, [sid, vc_input3, vc_transform,auto_f0,cluster_ratio, slice_db, noise_scale,pad_seconds,cl_num,lg_num,lgr_num,f0_predictor,enhancer_adaptive_key,cr_threshold,k_step,use_spk_mix,second_encoding,loudness_envelope_adjustment], [vc_output1, vc_output2])
         vc_batch_submit.click(vc_batch_fn, [sid, vc_batch_files, vc_transform,auto_f0,cluster_ratio, slice_db, noise_scale,pad_seconds,cl_num,lg_num,lgr_num,f0_predictor,enhancer_adaptive_key,cr_threshold,k_step,use_spk_mix,second_encoding,loudness_envelope_adjustment], [vc_output1])
         vc_tts_submit.click(tts_fn, [text_input, tts_spk, sid, vc_transform,auto_f0,cluster_ratio, slice_db, noise_scale,pad_seconds,cl_num,lg_num,lgr_num,f0_predictor,enhancer_adaptive_key,cr_threshold,k_step,use_spk_mix,second_encoding,loudness_envelope_adjustment], [vc_output1, vc_output2])
-        #with gr.TabItem("训练"):
-        #    gr.Markdown(value="""请将数据集文件夹放置在dataset_raw文件夹下，确认放置正确后点击下方获取数据集名称""")
-        #    raw_dirs_list=gr.Textbox(label="Raw dataset directory(s):")
-        #    get_raw_dirs=gr.Button("识别数据集", variant="primary")
-        #    gr.Markdown(value="""确认数据集正确识别后请选择训练使用的特征编码器和f0预测器，**如果要训练扩散模型，请选择Vec768l12或hubertsoft，并确保So-VITS和扩散模型使用同一个编码器**""")
-        #    with gr.Row():
-        #        gr.Markdown(value="""**vec256l9**: ContentVec(256Layer9)，旧版本叫v1，So-VITS-SVC 4.0的基础版本，**暂不支持扩散模型**
-        #                        **vec768l12**: 特征输入更换为ContentVec的第12层Transformer输出，模型理论上会更加还原训练集音色
-        #                        **hubertsoft**: So-VITS-SVC 3.0使用的编码器，咬字更为准确，但可能存在多说话人音色泄露问题
-        #                        **whisper-ppg**: 来自OpenAI，咬字最为准确，但和Hubertsoft一样存在多说话人音色泄露，且显存占用和训练时间有明显增加。**暂不支持扩散模型**
-        #        """)
-        #        gr.Markdown(value="""**crepe**: 抗噪能力最强，但预处理速度慢（不过如果你的显卡很强的话速度会很快）
-        #                        **pm**: 预处理速度快，但抗噪能力较弱
-        #                        **dio**: 先前版本预处理默认使用的f0预测器
-        #                        **harvest**: 有一定抗噪能力，预处理显存占用友好，速度比较慢
-        #        """)
-        #    with gr.Row():
-        #        branch_selection = gr.Radio(label="选择训练使用的编码器", choices=["vec256l9","vec768l12","hubertsoft","whisper-ppg"], value="vec768l12", interactive=True)
-        #        f0_predictor_selection = gr.Radio(label="选择训练使用的f0预测器", choices=["crepe","pm","dio","harvest"], value="crepe", interactive=True)
-        #        use_diff = gr.Checkbox(label="是否使用浅扩散模型，如要训练浅扩散模型请勾选此项", value=True)
-        #        vol_aug=gr.Checkbox(label="是否启用响度嵌入和音量增强，启用后可以根据输入源控制输出响度，但对数据集质量的要求更高。**仅支持vec768l12编码器**", value=False)
-        #    with gr.Row():
-        #        skip_loudnorm = gr.Checkbox(label="是否跳过响度匹配，如果你已经用音频处理软件做过响度匹配，请勾选此处")
-        #        num_processes = gr.Slider(label="预处理使用的CPU线程数，可以大幅加快预处理速度，但线程数过大容易爆显存，建议12G显存设置为2", minimum=1, maximum=multiprocessing.cpu_count(), value=1, step=1)
-        #    with gr.Row():
-        #        raw_preprocess=gr.Button("数据预处理", variant="primary")
-        #        regenerate_config_btn=gr.Button("重新生成配置文件", variant="primary")
-        #    preprocess_output=gr.Textbox(label="预处理输出信息，完成���请检查一下是否有报错信息，如无则可以进行下一步", max_lines=999)
-        #    clear_preprocess_output=gr.Button("清空输出信息")
-        #    with gr.Group():
-        #        gr.Markdown(value="""填写训练设置和超参数""")
-        #        with gr.Row():
-        #            gr.Textbox(label="当前使用显卡信息", value=gpu_info)
-        #            gpu_selection=gr.Textbox(label="多卡用户请指定希望训练使用的显卡ID（0,1,2...）", value=gpus, interactive=True)
-        #        with gr.Row():
-        #            log_interval=gr.Textbox(label="每隔多少步(steps)生成一次评估日志", value=sovits_params['log_interval'])
-        #            eval_interval=gr.Textbox(label="每隔多少步(steps)验证并保存一次模型", value=sovits_params['eval_interval'])
-        #            keep_ckpts=gr.Textbox(label="仅保留最新的X个模型，超出该数字的旧模型会被删除。设置为0则永不删除", value=sovits_params['keep_ckpts'])
-        #        with gr.Row():
-        #            batch_size=gr.Textbox(label="批量大小，每步取多少条数据进行训练，大batch有助于训练但显著增加显存占用。6G显存建议设定为4", value=sovits_params['batch_size'])
-        #            lr=gr.Textbox(label="学习率，一般不用动，批量大小较大时可以适当增大学习率，但强烈不建议超过0.0002，有炸炉风险", value=sovits_params['learning_rate'])
-        #            fp16_run=gr.Checkbox(label="是否使用fp16混合精度训练，fp16训练可能降低显存占用和训练时间，但对模型质量的影响尚未查证", value=sovits_params['fp16_run'])
-        #            all_in_mem=gr.Checkbox(label="是否加载所有数据集到内存中，硬盘IO过于低下、同时内存容量远大于数据集体积时可以启用，能显著加快训练速度", value=sovits_params['all_in_mem'])
-        #        with gr.Row():
-        #            gr.Markdown("请检查右侧的说话人列表是否和你要训练的目标说话人一致，确认无误后点击写入配置文件，然后就可以开始训练了")
-        #            speakers=gr.Textbox(label="说话人列表")
-        #    with gr.Accordion(label = "扩散模型配置（训练扩散模型需要写入此处）", open=True):
-        #        with gr.Row():
-        #            diff_num_workers = gr.Number(label="num_workers, 如果你的电脑配置较高，可以将这里设置为0加快训练速度", value=diff_params['num_workers'])
-        #            diff_cache_all_data = gr.Checkbox(label="是否缓存数据，启用后可以加快训练速度，关闭后可以节省显存或内存，但会减慢训练速度", value=diff_params['cache_all_data'])
-        #            diff_cache_device = gr.Radio(label="若启用缓存数据，使用显存(cuda)还是内存(cpu)缓存，如果显卡显存充足，选择cuda以加快训练速度", choices=["cuda","cpu"], value=diff_params['cache_device'])
-        #            diff_amp_dtype = gr.Radio(label="训练数据类型，fp16可能会有更快的训练速度，前提是你的显卡支持", choices=["fp32","fp16"], value=diff_params['amp_dtype'])
-        #        with gr.Row():
-        #            diff_batch_size = gr.Number(label="批量大小(batch_size)，根据显卡显存设置，小显存适当降低该项，6G显存可以设定为48，但该数值不要超过数据集总数量的1/4", value=diff_params['diff_batch_size'])
-        #            diff_lr = gr.Number(label="学习率（一般不需要动）", value=diff_params['diff_lr'])
-        #            diff_interval_log = gr.Number(label="每隔多少步(steps)生成一次评估日志", value = diff_params['diff_interval_log'])
-        #            diff_interval_val = gr.Number(label="每隔多少步(steps)验证并保存一次模型，如果你的批量大小较大，可以适当减少这里的数字，但不建议设置为1000以下", value=diff_params['diff_interval_val'])
-        #            diff_force_save = gr.Number(label="每隔多少步强制保留模型，只有该步数的倍数保存的模型会被保留，其余会被删除。设置为与验证步数相同的值则每个模型都会被保留", value=diff_params['diff_force_save'])
-        #    with gr.Row():
-        #        save_params=gr.Button("将当前设置保存为默认设置", variant="primary")
-        #        write_config=gr.Button("写入配置文件", variant="primary")
-        #    write_config_output=gr.Textbox(label="输出信息")
-        #    gr.Markdown(value="""**点击从头开始训练**将会自动将已有的训练进度保存到models_backup文件夹，并自动装载预训练模型。
-        #        **继续上一次的训练进度**将从上一个保存模型的进度继续训练。继续训练进度无需重新预处理和写入配置文件。
-        #        关于扩散、聚类和特征检索的详细说明请看[此处](https://www.yuque.com/umoubuton/ueupp5/kmui02dszo5zrqkz)。
-        #        """)
-        #    with gr.Row():
-        #        with gr.Column():
-        #            start_training=gr.Button("从头开始训练", variant="primary")
-        #            training_output=gr.Textbox(label="训练输出信息")
-        #        with gr.Column():
-        #            continue_training_btn=gr.Button("继续上一次的训练进度", variant="primary")
-        #            continue_training_output=gr.Textbox(label="训练输出信息")
-        #    with gr.Row():
-        #        with gr.Column():
-        #            diff_training_btn=gr.Button("从头训练扩散模型", variant="primary")
-        #            diff_training_output=gr.Textbox(label="训练输出信息")
-        #        with gr.Column():
-        #            diff_continue_training_btn=gr.Button("继续训练扩散模型", variant="primary")
-        #            diff_continue_training_output=gr.Textbox(label="训练输出信息")
-        #    with gr.Accordion(label = "聚类、特征检索训练", open=False):
-        #        with gr.Row():
-        #            with gr.Column():
-        #                kmeans_button=gr.Button("训练聚类模型", variant="primary")
-        #                kmeans_gpu = gr.Checkbox(label="使用GPU训练", value=True)
-        #                kmeans_output=gr.Textbox(label="训练输出信息")
-        #            with gr.Column():
-        #                index_button=gr.Button("训练特征检索模型", variant="primary")
-        #                index_output=gr.Textbox(label="训练输出信息")
         with gr.TabItem("小工具/实验室特性"):
             gr.Markdown(value="""
                         ### So-vits-svc 4.1 小工具/实验室特性
@@ -946,20 +1039,20 @@ with app:
                     compress_model_output = gr.Textbox(label="输出信息", value="")
                     compress_model_btn.click(model_compression, [model_to_compress], [compress_model_output])
-        #get_raw_dirs.click(load_raw_dirs,[],[raw_dirs_list])
-        #raw_preprocess.click(dataset_preprocess,[branch_selection, f0_predictor_selection, use_diff, vol_aug, skip_loudnorm, num_processes],[preprocess_output, speakers])
-        #regenerate_config_btn.click(regenerate_config,[branch_selection, vol_aug],[preprocess_output])
-        #clear_preprocess_output.click(clear_output,[],[preprocess_output])
-        #save_params.click(save_default_settings, [log_interval,eval_interval,keep_ckpts,batch_size,lr,fp16_run,all_in_mem,diff_num_workers,diff_cache_all_data,diff_cache_device,diff_amp_dtype,diff_batch_size,diff_lr,diff_interval_log,diff_interval_val,diff_force_save], [write_config_output])
-        #write_config.click(config_fn,[log_interval, eval_interval, keep_ckpts, batch_size, lr, fp16_run, all_in_mem, diff_num_workers, diff_cache_all_data, diff_batch_size, diff_lr, diff_interval_log, diff_interval_val, diff_cache_device, diff_amp_dtype, diff_force_save],[write_config_output])
-        #start_training.click(training,[gpu_selection, branch_selection],[training_output])
-        #diff_training_btn.click(diff_training,[branch_selection],[diff_training_output])
-        #continue_training_btn.click(continue_training,[gpu_selection, branch_selection],[continue_training_output])
-        #diff_continue_training_btn.click(diff_continue_training,[branch_selection],[diff_continue_training_output])
-        #kmeans_button.click(kmeans_training,[kmeans_gpu],[kmeans_output])
-        #index_button.click(index_training, [], [index_output])
     with gr.Tabs():
         with gr.Row(variant="panel"):
             with gr.Column():
@@ -970,4 +1063,4 @@ with app:
         debug_button.change(debug_change,[],[])
-        app.queue(concurrency_count=1022, max_size=2044).launch()

 sovits_params = {}
 diff_params = {}
+loaded = None
 def debug_change():
     global debug
     debug = debug_button.value
     output_msg = f"模型被成功加载到了{device_name}上\n{index_or_kmeans}：{clu_load}\n扩散模型：{diff_load}"
     return output_msg, gr.Dropdown.update(choices=spk_list, value=spk_choice), clip
+def Newload_model_func(ckpt_name,cluster_name,config_name2,enhance2,diff_model_name2,diff_config_name2,only_diffusion2,encoder2,using_device2):
+    global model, loaded
+    config_name = config_name2.value
+    enhance = enhance2.value
+    diff_model_name = diff_model_name2.value
+    diff_config_name = (diff_config_name2).value
+    only_diffusion = (only_diffusion2).value
+    encoder = (encoder2).value
+    using_device = (using_device2).value
+    config_path = os.path.join(config_dir, config_name)
+    diff_config_path = os.path.join(config_dir, diff_config_name) if diff_config_name != "no_diff_config" else "configs/diffusion.yaml"
+    with open(config_path, 'r') as f:
+        config = json.load(f)
+    spk_dict = config["spk"]
+    spk_name = config.get('spk', None)
+    spk_choice = next(iter(spk_name)) if spk_name else "未检测到音色"
+    ckpt_path = os.path.join(workdir, ckpt_name)
+    _, _suffix = os.path.splitext(cluster_name)
+    fr = True if _suffix == ".pkl" else False #如果是pkl后缀就启用特征检索
+    cluster_path = os.path.join(workdir, cluster_name)
+    diff_model_path = os.path.join(diff_workdir, diff_model_name)
+    shallow_diffusion = True if diff_model_name != "no_diff" else False
+    use_spk_mix = False
+    device = None if using_device == "Auto" else using_device
+    model = Svc(ckpt_path,
+                    config_path,
+                    device,
+                    cluster_path,
+                    enhance,
+                    diff_model_path,
+                    diff_config_path,
+                    shallow_diffusion,
+                    only_diffusion,
+                    use_spk_mix,
+                    fr)
+    spk_list = list(spk_dict.keys())
+    clip = 25 if encoder == "Whisper-PPG" else 0 #Whisper必须强制切片25秒
+    device_name = torch.cuda.get_device_properties(model.dev).name if "cuda" in str(model.dev) else str(model.dev)
+    index_or_kmeans = "特征索引" if fr is True else "聚类模型"
+    clu_load = "未加载" if cluster_name == "no_clu" else cluster_name
+    diff_load = "未加载" if diff_model_name == "no_diff" else diff_model_name
+    loaded = cluster_name
+    #output_msg = f"模型被成功加载到了{device_name}上\n{index_or_kmeans}：{clu_load}\n扩散模型：{diff_load}"
+    #return output_msg, gr.Dropdown.update(choices=spk_list, value=spk_choice), clip
 def get_file_options(directory, extension):
     return [file for file in os.listdir(directory) if file.endswith(extension)]
 sovits_params, diff_params = get_default_settings()
 app = gr.Blocks()
+def Newget_model_info(choice_ckpt2):
+    choice_ckpt = str(choice_ckpt2)
+    pthfile = os.path.join(workdir, choice_ckpt)
+    net = torch.load(pthfile, map_location=torch.device('cpu')) #cpu load
+    spk_emb = net["model"].get("emb_g.weight")
+    if spk_emb is None:
+        return "所选模型缺少emb_g.weight，你可能选择了一个底模"
+    _dim, _layer = spk_emb.size()
+    model_type = {
+        768: "Vec768-Layer12",
+        256: "Vec256-Layer9 / HubertSoft",
+        1024: "Whisper-PPG"
+    }
+    return gr.Textbox(visible=False, value=model_type.get(_layer, "不受支持的模型"))
 with app:
     gr.Markdown(value="""
         ### So-VITS-SVC 4.1-Stable
         """)
     with gr.Tabs():
+        with gr.TabItem("推理"):
+            #with gr.Row():
+            #    choice_ckpt = gr.Dropdown(label="模型选择", choices=ckpt_list, value="no_model")
+            #    model_branch = gr.Textbox(label="模型编码器", placeholder="请先选择模型", interactive=False)
+            #choice_ckpt = gr.Dropdown(value="G_82400.pth", visible=False)
+            #with gr.Row():
+            #    config_choice = gr.Dropdown(label="配置文件", choices=config_list, value="no_config")
+            #    config_info = gr.Textbox(label="配置文件编码器", placeholder="请选择配置文件")
+            config_choice = gr.Dropdown(value="config.json", visible=False)
+            #gr.Markdown(value="""**请检查模型和配置文件的编码器是否匹配**""")
+            #with gr.Row():
+            #    diff_choice = gr.Dropdown(label="（可选）选择扩散模型", choices=diff_list, value="no_diff", interactive=True)
+            #    diff_config_choice = gr.Dropdown(label="扩散模型配置文件", choices=diff_config_list, value="no_diff_config", interactive=True)
+            diff_choice = gr.Dropdown(value="no_diff", visible=False)
+            diff_config_choice = gr.Dropdown(value="no_diff_config", visible=False)
             with gr.Row():
+                cluster_choice = gr.Dropdown(label="（可选）选择聚类模型/特征检索模型", choices=cluster_list, value="no_clu")
             with gr.Row():
                 enhance = gr.Checkbox(label="是否使用NSF_HIFIGAN增强，该选项对部分训练集少的模型有一定的音质增强效果，但是对训练好的模型有反面效果，默认关闭", value=False)
+                #only_diffusion = gr.Checkbox(label="是否使用全扩散推理，开启后将不使用So-VITS模型，仅使用扩散模型进行完整扩散推理，默认关闭", value=False)
+                only_diffusion = gr.Checkbox(value=False, visible=False)
+            #using_device = gr.Dropdown(label="推理设备，默认为自动选择", choices=["Auto","cuda","cpu"], value="Auto")
+            using_device = gr.Dropdown(value='Auto', visible=False)
+            #refresh = gr.Button("刷新选项")
+            #loadckpt = gr.Button("加载模型", variant="primary")
+            #with gr.Row():
+            #    model_message = gr.Textbox(label="Output Message")
+            #    sid = gr.Dropdown(label="So-VITS说话人", value="speaker0")
+            sid = gr.Dropdown(value="1001", visible=False)
+            #choice_ckpt.change(get_model_info, [choice_ckpt], [model_branch])
+            model_branch = Newget_model_info("G_82400.pth")
+            #config_choice.change(load_json_encoder, [config_choice], [config_info])
+            #refresh.click(refresh_options,[],[choice_ckpt,config_choice,cluster_choice,diff_choice,diff_config_choice])
             gr.Markdown(value="""
                 请稍等片刻，模型加载大约需要10秒。后续操作不需要重新加载模型
                 vc_tts_submit = gr.Button("文本转语音", variant="primary")
             vc_output1 = gr.Textbox(label="Output Message")
             vc_output2 = gr.Audio(label="Output Audio")
+        def Newvc_fn(sid, input_audio, vc_transform, auto_f0, cluster_ratio, slice_db, noise_scale, pad_seconds, cl_num, lg_num, lgr_num, f0_predictor, enhancer_adaptive_key, cr_threshold, k_step, use_spk_mix, second_encoding, loudness_envelope_adjustment, clus2):
+            global model, loaded
+            if loaded != clus2:
+                Newload_model_func("G_82400.pth",clus2,config_choice,enhance,diff_choice,diff_config_choice,only_diffusion,model_branch,using_device)
+                loaded = clus2
+            try:
+                if input_audio is None:
+                    return "You need to upload an audio", None
+                if model is None:
+                    return "You need to upload an model", None
+                sampling_rate, audio = input_audio
+                temp_path = "temp.wav"
+                sf.write(temp_path, audio, sampling_rate, format="wav")
+                output_file_path = vc_infer(sid, audio, temp_path, vc_transform, auto_f0, cluster_ratio, slice_db, noise_scale, pad_seconds, cl_num, lg_num, lgr_num, f0_predictor, enhancer_adaptive_key, cr_threshold, k_step, use_spk_mix, second_encoding, loudness_envelope_adjustment)
+                os.remove(temp_path)
+                return "Success", output_file_path
+            except Exception as e:
+                if debug: traceback.print_exc()
+                raise gr.Error(e)
+        #loadckpt.click(load_model_func,[choice_ckpt,cluster_choice,config_choice,enhance,diff_choice,diff_config_choice,only_diffusion,model_branch,using_device],[model_message, sid, cl_num])
+        vc_submit.click(Newvc_fn, [sid, vc_input3, vc_transform,auto_f0,cluster_ratio, slice_db, noise_scale,pad_seconds,cl_num,lg_num,lgr_num,f0_predictor,enhancer_adaptive_key,cr_threshold,k_step,use_spk_mix,second_encoding,loudness_envelope_adjustment,cluster_choice], [vc_output1, vc_output2])
         vc_batch_submit.click(vc_batch_fn, [sid, vc_batch_files, vc_transform,auto_f0,cluster_ratio, slice_db, noise_scale,pad_seconds,cl_num,lg_num,lgr_num,f0_predictor,enhancer_adaptive_key,cr_threshold,k_step,use_spk_mix,second_encoding,loudness_envelope_adjustment], [vc_output1])
         vc_tts_submit.click(tts_fn, [text_input, tts_spk, sid, vc_transform,auto_f0,cluster_ratio, slice_db, noise_scale,pad_seconds,cl_num,lg_num,lgr_num,f0_predictor,enhancer_adaptive_key,cr_threshold,k_step,use_spk_mix,second_encoding,loudness_envelope_adjustment], [vc_output1, vc_output2])
+        """
+        with gr.TabItem("训练"):
+            gr.Markdown(value="""请将数据集文件夹放置在dataset_raw文件夹下，确认放置正确后点击下方获取数据集名称""")
+            raw_dirs_list=gr.Textbox(label="Raw dataset directory(s):")
+            get_raw_dirs=gr.Button("识别数据集", variant="primary")
+            gr.Markdown(value="""确认数据集正确识别后请选择训练使用的特征编码器和f0预测器，**如果要训练扩散模型，请选择Vec768l12或hubertsoft，并确保So-VITS和扩散模型使用同一个编码器**""")
+            with gr.Row():
+                gr.Markdown(value="""**vec256l9**: ContentVec(256Layer9)，旧版本叫v1，So-VITS-SVC 4.0的基础版本，**暂不支持扩散模型**
+                                **vec768l12**: 特征输入更换为ContentVec的第12层Transformer输出，模型理论上会更加还原训练集音色
+                                **hubertsoft**: So-VITS-SVC 3.0使用的编码器，咬字更为准确，但可能存在多说话人音色泄露问题
+                                **whisper-ppg**: 来自OpenAI，咬字最为准确，但和Hubertsoft一样存在多说话人音色泄露，且显存占用和训练时间有明显增加。**暂不支持扩散模型**
+                """)
+                gr.Markdown(value="""**crepe**: 抗噪能力最强，但预处理速度慢（不过如果你的显卡很强的话速度会很快）
+                                **pm**: 预处理速度快，但抗噪能力较弱
+                                **dio**: 先前版本预处理默认使用的f0预测器
+                                **harvest**: 有一定抗噪能力，预处理显存占用友好，速度比较慢
+                """)
+            with gr.Row():
+                branch_selection = gr.Radio(label="选择训练使用的编码器", choices=["vec256l9","vec768l12","hubertsoft","whisper-ppg"], value="vec768l12", interactive=True)
+                f0_predictor_selection = gr.Radio(label="选择训练使用的f0预测器", choices=["crepe","pm","dio","harvest"], value="crepe", interactive=True)
+                use_diff = gr.Checkbox(label="是否使用浅扩散模型，如要训练浅扩散模型请勾选此项", value=True)
+                vol_aug=gr.Checkbox(label="是否启用响度嵌入和音量增强，启用后可以根据输入源控制输出响度，但对数据集质量的要求更高。**仅支持vec768l12编码器**", value=False)
+            with gr.Row():
+                skip_loudnorm = gr.Checkbox(label="是否跳过响度匹配，如果你已经用音频处理软件做过响度匹配，请勾选此处")
+                num_processes = gr.Slider(label="预处理使用的CPU线程数，可以大幅加快预处理速度，但线程数过大容易爆显存，建议12G显存设置为2", minimum=1, maximum=multiprocessing.cpu_count(), value=1, step=1)
+            with gr.Row():
+                raw_preprocess=gr.Button("数据预处理", variant="primary")
+                regenerate_config_btn=gr.Button("重新生成配置文件", variant="primary")
+            preprocess_output=gr.Textbox(label="预处理输出信息，完成后请检查一下是否有报错信息，如无则可以进行下一步", max_lines=999)
+            clear_preprocess_output=gr.Button("清空输出信息")
+            with gr.Group():
+                gr.Markdown(value="""填写训练设置和超参数""")
+                with gr.Row():
+                    gr.Textbox(label="当前使用显卡信息", value=gpu_info)
+                    gpu_selection=gr.Textbox(label="多卡用户请指定希望训练使用的显卡ID（0,1,2...）", value=gpus, interactive=True)
+                with gr.Row():
+                    log_interval=gr.Textbox(label="每隔多少步(steps)生成一次评估日志", value=sovits_params['log_interval'])
+                    eval_interval=gr.Textbox(label="每隔多少步(steps)验证并保存一次模型", value=sovits_params['eval_interval'])
+                    keep_ckpts=gr.Textbox(label="仅保留最新的X个模型，超出该数字的旧模型会被删除。设置为0则永不删除", value=sovits_params['keep_ckpts'])
+                with gr.Row():
+                    batch_size=gr.Textbox(label="批量大小，每步取多少条数据进行训练，大batch有助于训练但显著增加显存占用。6G显存建议设定为4", value=sovits_params['batch_size'])
+                    lr=gr.Textbox(label="学习率，一般不用动，批量大小较大时可以适当增大学习率，但强烈不建议超过0.0002，有炸炉风险", value=sovits_params['learning_rate'])
+                    fp16_run=gr.Checkbox(label="是否使用fp16混合精度训练，fp16训练可能降低显存占用和训练时间，但对模型质量的影响尚未查证", value=sovits_params['fp16_run'])
+                    all_in_mem=gr.Checkbox(label="是否加载所有数据集到内存中，硬盘IO过于低下、同时内存容量远大于数据集体积时可以启用，能显著加快训练速度", value=sovits_params['all_in_mem'])
+                with gr.Row():
+                    gr.Markdown("请检查右侧的说话人列表是否和你要训练的目标说话人一致，确认无误后点击写入配置文件，然后就可以开始训练了")
+                    speakers=gr.Textbox(label="说话人列表")
+            with gr.Accordion(label = "扩散模型配置（训练扩散模型需要写入此处）", open=True):
+                with gr.Row():
+                    diff_num_workers = gr.Number(label="num_workers, 如果你的电脑配置较高，可以将这里设置为0加快训练速度", value=diff_params['num_workers'])
+                    diff_cache_all_data = gr.Checkbox(label="是否缓存数据，启用后可以加快训练速度，关闭后可以节省显存或内存，但会减慢训练速度", value=diff_params['cache_all_data'])
+                    diff_cache_device = gr.Radio(label="若启用缓存数据，使用显存(cuda)还是内存(cpu)缓存，如果显卡显存充足，选择cuda以加快训练速度", choices=["cuda","cpu"], value=diff_params['cache_device'])
+                    diff_amp_dtype = gr.Radio(label="训练数据类型，fp16可能会有更快的训练速度，前提是你的显卡支持", choices=["fp32","fp16"], value=diff_params['amp_dtype'])
+                with gr.Row():
+                    diff_batch_size = gr.Number(label="批量大小(batch_size)，根据显卡显存设置，小显存适当降低该项，6G显存可以设定为48，但该数值不要超过数据集总数量的1/4", value=diff_params['diff_batch_size'])
+                    diff_lr = gr.Number(label="学习率（一般不需要动）", value=diff_params['diff_lr'])
+                    diff_interval_log = gr.Number(label="每隔多少步(steps)生成一次评估日志", value = diff_params['diff_interval_log'])
+                    diff_interval_val = gr.Number(label="每隔多少步(steps)验证并保存一次模型，如果你的批量大小较大，可以适当减少这里的数字，但不建议设置为1000以下", value=diff_params['diff_interval_val'])
+                    diff_force_save = gr.Number(label="每隔多少步强制保留模型，只有该步数的倍数保存的模型会被保留，其余会被删除。设置为与验证步数相同的值则每个模型都会被保留", value=diff_params['diff_force_save'])
+            with gr.Row():
+                save_params=gr.Button("将当前设置保存为默认设置", variant="primary")
+                write_config=gr.Button("写入配置文件", variant="primary")
+            write_config_output=gr.Textbox(label="输出信息")
+            gr.Markdown(value="""**点击从头开始训练**将会自动将已有的训练进度保存到models_backup文件夹，并自动装载预训练模型。
+                **继续上一次的训练进度**将从上一个保存模型的进度继续训练。继续训练进度无需重新预处理和写入配置文件。
+                关于扩散、聚类和特征检索的详细说明请看[此处](https://www.yuque.com/umoubuton/ueupp5/kmui02dszo5zrqkz)。
+                """)
+            with gr.Row():
+                with gr.Column():
+                    start_training=gr.Button("从头开始训练", variant="primary")
+                    training_output=gr.Textbox(label="训练输出信息")
+                with gr.Column():
+                    continue_training_btn=gr.Button("继续上一次的训练进度", variant="primary")
+                    continue_training_output=gr.Textbox(label="训练输出信息")
+            with gr.Row():
+                with gr.Column():
+                    diff_training_btn=gr.Button("从头训练扩散模型", variant="primary")
+                    diff_training_output=gr.Textbox(label="训练输出信息")
+                with gr.Column():
+                    diff_continue_training_btn=gr.Button("继续训练扩散模型", variant="primary")
+                    diff_continue_training_output=gr.Textbox(label="训练输出信息")
+            with gr.Accordion(label = "聚类、特征检索训练", open=False):
+                with gr.Row():
+                    with gr.Column():
+                        kmeans_button=gr.Button("训练聚类模型", variant="primary")
+                        kmeans_gpu = gr.Checkbox(label="使用GPU训练", value=True)
+                        kmeans_output=gr.Textbox(label="训练输出信息")
+                    with gr.Column():
+                        index_button=gr.Button("训练特征检索模型", variant="primary")
+                        index_output=gr.Textbox(label="训练输出信息")
+            """
         with gr.TabItem("小工具/实验室特性"):
             gr.Markdown(value="""
                         ### So-vits-svc 4.1 小工具/实验室特性
                     compress_model_output = gr.Textbox(label="输出信息", value="")
                     compress_model_btn.click(model_compression, [model_to_compress], [compress_model_output])
+        """
+        get_raw_dirs.click(load_raw_dirs,[],[raw_dirs_list])
+        raw_preprocess.click(dataset_preprocess,[branch_selection, f0_predictor_selection, use_diff, vol_aug, skip_loudnorm, num_processes],[preprocess_output, speakers])
+        regenerate_config_btn.click(regenerate_config,[branch_selection, vol_aug],[preprocess_output])
+        clear_preprocess_output.click(clear_output,[],[preprocess_output])
+        save_params.click(save_default_settings, [log_interval,eval_interval,keep_ckpts,batch_size,lr,fp16_run,all_in_mem,diff_num_workers,diff_cache_all_data,diff_cache_device,diff_amp_dtype,diff_batch_size,diff_lr,diff_interval_log,diff_interval_val,diff_force_save], [write_config_output])
+        write_config.click(config_fn,[log_interval, eval_interval, keep_ckpts, batch_size, lr, fp16_run, all_in_mem, diff_num_workers, diff_cache_all_data, diff_batch_size, diff_lr, diff_interval_log, diff_interval_val, diff_cache_device, diff_amp_dtype, diff_force_save],[write_config_output])
+        start_training.click(training,[gpu_selection, branch_selection],[training_output])
+        diff_training_btn.click(diff_training,[branch_selection],[diff_training_output])
+        continue_training_btn.click(continue_training,[gpu_selection, branch_selection],[continue_training_output])
+        diff_continue_training_btn.click(diff_continue_training,[branch_selection],[diff_continue_training_output])
+        kmeans_button.click(kmeans_training,[kmeans_gpu],[kmeans_output])
+        index_button.click(index_training, [], [index_output])
+        """
     with gr.Tabs():
         with gr.Row(variant="panel"):
             with gr.Column():
         debug_button.change(debug_change,[],[])
+        app.queue(concurrency_count=1022, max_size=2044).launch(share=True)