yl12053 commited on
Commit
c112a37
1 Parent(s): eec8126
Files changed (1) hide show
  1. app.py +221 -128
app.py CHANGED
@@ -39,6 +39,8 @@ debug = False
39
  sovits_params = {}
40
  diff_params = {}
41
 
 
 
42
  def debug_change():
43
  global debug
44
  debug = debug_button.value
@@ -140,6 +142,51 @@ def load_model_func(ckpt_name,cluster_name,config_name,enhance,diff_model_name,d
140
  output_msg = f"模型被成功加载到了{device_name}上\n{index_or_kmeans}:{clu_load}\n扩散模型:{diff_load}"
141
  return output_msg, gr.Dropdown.update(choices=spk_list, value=spk_choice), clip
142
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
143
  def get_file_options(directory, extension):
144
  return [file for file in os.listdir(directory) if file.endswith(extension)]
145
 
@@ -662,6 +709,22 @@ gpus="-".join([i[0]for i in gpu_infos])
662
  sovits_params, diff_params = get_default_settings()
663
 
664
  app = gr.Blocks()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
665
  with app:
666
  gr.Markdown(value="""
667
  ### So-VITS-SVC 4.1-Stable
@@ -676,31 +739,40 @@ with app:
676
 
677
  """)
678
  with gr.Tabs():
679
- with gr.TabItem("特别周 (Special Week)"):
680
- with gr.Row():
681
- choice_ckpt = gr.Dropdown(label="模型选择", choices=ckpt_list, value="no_model")
682
- model_branch = gr.Textbox(label="模型编码器", placeholder="请先选择模型", interactive=False)
 
 
 
 
 
 
 
 
 
 
 
683
  with gr.Row():
684
- config_choice = gr.Dropdown(label="配置文件", choices=config_list, value="no_config")
685
- config_info = gr.Textbox(label="配置文件编码器", placeholder="请选择配置文件")
686
- gr.Markdown(value="""**请检查模型和配置文件的编码器是否匹配**""")
687
- with gr.Row():
688
- diff_choice = gr.Dropdown(label="(可选)选择扩散模型", choices=diff_list, value="no_diff", interactive=True)
689
- diff_config_choice = gr.Dropdown(label="扩散模型配置文件", choices=diff_config_list, value="no_diff_config", interactive=True)
690
- cluster_choice = gr.Dropdown(label="(可选)选择聚类模型/特征检索模型", choices=cluster_list, value="no_clu")
691
  with gr.Row():
692
  enhance = gr.Checkbox(label="是否使用NSF_HIFIGAN增强,该选项对部分训练集少的模型有一定的音质增强效果,但是对训练好的模型有反面效果,默认关闭", value=False)
693
- only_diffusion = gr.Checkbox(label="是否使用全扩散推理,开启后将不使用So-VITS模型,仅使用扩散模型进行完整扩散推理,默认关闭", value=False)
694
- using_device = gr.Dropdown(label="推理设备,默认为自动选择", choices=["Auto","cuda","cpu"], value="Auto")
695
- refresh = gr.Button("刷新选项")
696
- loadckpt = gr.Button("加载模型", variant="primary")
697
- with gr.Row():
698
- model_message = gr.Textbox(label="Output Message")
699
- sid = gr.Dropdown(label="So-VITS说话人", value="speaker0")
 
 
 
700
 
701
- choice_ckpt.change(get_model_info, [choice_ckpt], [model_branch])
702
- config_choice.change(load_json_encoder, [config_choice], [config_info])
703
- refresh.click(refresh_options,[],[choice_ckpt,config_choice,cluster_choice,diff_choice,diff_config_choice])
 
704
 
705
  gr.Markdown(value="""
706
  请稍等片刻,模型加载大约需要10秒。后续操作不需要重新加载模型
@@ -755,102 +827,123 @@ with app:
755
  vc_tts_submit = gr.Button("文本转语音", variant="primary")
756
  vc_output1 = gr.Textbox(label="Output Message")
757
  vc_output2 = gr.Audio(label="Output Audio")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
758
 
759
- loadckpt.click(load_model_func,[choice_ckpt,cluster_choice,config_choice,enhance,diff_choice,diff_config_choice,only_diffusion,model_branch,using_device],[model_message, sid, cl_num])
760
- vc_submit.click(vc_fn, [sid, vc_input3, vc_transform,auto_f0,cluster_ratio, slice_db, noise_scale,pad_seconds,cl_num,lg_num,lgr_num,f0_predictor,enhancer_adaptive_key,cr_threshold,k_step,use_spk_mix,second_encoding,loudness_envelope_adjustment], [vc_output1, vc_output2])
761
  vc_batch_submit.click(vc_batch_fn, [sid, vc_batch_files, vc_transform,auto_f0,cluster_ratio, slice_db, noise_scale,pad_seconds,cl_num,lg_num,lgr_num,f0_predictor,enhancer_adaptive_key,cr_threshold,k_step,use_spk_mix,second_encoding,loudness_envelope_adjustment], [vc_output1])
762
  vc_tts_submit.click(tts_fn, [text_input, tts_spk, sid, vc_transform,auto_f0,cluster_ratio, slice_db, noise_scale,pad_seconds,cl_num,lg_num,lgr_num,f0_predictor,enhancer_adaptive_key,cr_threshold,k_step,use_spk_mix,second_encoding,loudness_envelope_adjustment], [vc_output1, vc_output2])
763
- #with gr.TabItem("训练"):
764
- # gr.Markdown(value="""请将数据集文件夹放置在dataset_raw文件夹下,确认放置正确后点击下方获取数据集名称""")
765
- # raw_dirs_list=gr.Textbox(label="Raw dataset directory(s):")
766
- # get_raw_dirs=gr.Button("识别数据集", variant="primary")
767
- # gr.Markdown(value="""确认数据集正确识别后请选择训练使用的特征编码器和f0预测器,**如果要训练扩散模型,请选择Vec768l12或hubertsoft,并确保So-VITS和扩散模型使用同一个编码器**""")
768
- # with gr.Row():
769
- # gr.Markdown(value="""**vec256l9**: ContentVec(256Layer9),旧版本叫v1,So-VITS-SVC 4.0的基础版本,**暂不支持扩散模型**
770
- # **vec768l12**: 特征输入更换为ContentVec的第12层Transformer输出,模型理论上会更加还原训练集音色
771
- # **hubertsoft**: So-VITS-SVC 3.0使用的编码器,咬字更为准确,但可能存在多说话人音色泄露问题
772
- # **whisper-ppg**: 来自OpenAI,咬字最为准确,但和Hubertsoft一样存在多说话人音色泄露,且显存占用和训练时间有明显增加。**暂不支持扩散模型**
773
- # """)
774
- # gr.Markdown(value="""**crepe**: 抗噪能力最强,但预处理速度慢(不过如果你的显卡很强的话速度会很快)
775
- # **pm**: 预处理速度快,但抗噪能力较弱
776
- # **dio**: 先前版本预处理默认使用的f0预测器
777
- # **harvest**: 有一定抗噪能力,预处理显存占用友好,速度比较慢
778
- # """)
779
- # with gr.Row():
780
- # branch_selection = gr.Radio(label="选择训练使用的编码器", choices=["vec256l9","vec768l12","hubertsoft","whisper-ppg"], value="vec768l12", interactive=True)
781
- # f0_predictor_selection = gr.Radio(label="选择训练使用的f0预测器", choices=["crepe","pm","dio","harvest"], value="crepe", interactive=True)
782
- # use_diff = gr.Checkbox(label="是否使用浅扩散模型,如要训练浅扩散模型请勾选此项", value=True)
783
- # vol_aug=gr.Checkbox(label="是否启用响度嵌入和音量增强,启用后可以根据输入源控制输出响度,但对数据集质量的要求更高。**仅支持vec768l12编码器**", value=False)
784
- # with gr.Row():
785
- # skip_loudnorm = gr.Checkbox(label="是否跳过响度匹配,如果你已经用音频处理软件做过响度匹配,请勾选此处")
786
- # num_processes = gr.Slider(label="预处理使用的CPU线程数,可以大幅加快预处理速度,但线程数过大容易爆显存,建议12G显存设置为2", minimum=1, maximum=multiprocessing.cpu_count(), value=1, step=1)
787
- # with gr.Row():
788
- # raw_preprocess=gr.Button("数据预处理", variant="primary")
789
- # regenerate_config_btn=gr.Button("重新生成配置文件", variant="primary")
790
- # preprocess_output=gr.Textbox(label="预处理输出信息,完成���请检查一下是否有报错信息,如无则可以进行下一步", max_lines=999)
791
- # clear_preprocess_output=gr.Button("清空输出信息")
792
- # with gr.Group():
793
- # gr.Markdown(value="""填写训练设置和超参数""")
794
- # with gr.Row():
795
- # gr.Textbox(label="当前使用显卡信息", value=gpu_info)
796
- # gpu_selection=gr.Textbox(label="多卡用户请指定希望训练使用的显卡ID(0,1,2...)", value=gpus, interactive=True)
797
- # with gr.Row():
798
- # log_interval=gr.Textbox(label="每隔多少步(steps)生成一次评估日志", value=sovits_params['log_interval'])
799
- # eval_interval=gr.Textbox(label="每隔多少步(steps)验证并保存一次模型", value=sovits_params['eval_interval'])
800
- # keep_ckpts=gr.Textbox(label="仅保留最新的X个模型,超出该数字的旧模型会被删除。设置为0则永不删除", value=sovits_params['keep_ckpts'])
801
- # with gr.Row():
802
- # batch_size=gr.Textbox(label="批量大小,每步取多少条数据进行训练,大batch有助于训练但显著增加显存占用。6G显存建议设定为4", value=sovits_params['batch_size'])
803
- # lr=gr.Textbox(label="学习率,一般不用动,批量大小较大时可以适当增大学习率,但强烈不建议超过0.0002,有炸炉风险", value=sovits_params['learning_rate'])
804
- # fp16_run=gr.Checkbox(label="是否使用fp16混合精度训练,fp16训练可能降低显存占用和训练时间,但对模型质量的影响尚未查证", value=sovits_params['fp16_run'])
805
- # all_in_mem=gr.Checkbox(label="是否加载所有数据集到内存中,硬盘IO过于低下、同时内存容量远大于数据集体积时可以启用,能显著加快训练速度", value=sovits_params['all_in_mem'])
806
- # with gr.Row():
807
- # gr.Markdown("请检查右侧的说话人列表是否和你要训练的目标说话人一致,确认无误后点击写入配置文件,然后就可以开始训练了")
808
- # speakers=gr.Textbox(label="说话人列表")
809
- # with gr.Accordion(label = "扩散模型配置(训练扩散模型需要写入此处)", open=True):
810
- # with gr.Row():
811
- # diff_num_workers = gr.Number(label="num_workers, 如果你的电脑配置较高,可以将这里设置为0加快训练速度", value=diff_params['num_workers'])
812
- # diff_cache_all_data = gr.Checkbox(label="是否缓存数据,启用后可以加快训练速度,关闭后可以节省显存或内存,但会减慢训练速度", value=diff_params['cache_all_data'])
813
- # diff_cache_device = gr.Radio(label="若启用缓存数据,使用显存(cuda)还是内存(cpu)缓存,如果显卡显存充足,选择cuda以加快训练速度", choices=["cuda","cpu"], value=diff_params['cache_device'])
814
- # diff_amp_dtype = gr.Radio(label="训练数据类型,fp16可能会有更快的训练速度,前提是你的显卡支持", choices=["fp32","fp16"], value=diff_params['amp_dtype'])
815
- # with gr.Row():
816
- # diff_batch_size = gr.Number(label="批量大小(batch_size),根据显卡显存设置,小显存适当降低该项,6G显存可以设定为48,但该数值不要超过数据集总数量的1/4", value=diff_params['diff_batch_size'])
817
- # diff_lr = gr.Number(label="学习率(一般不需要动)", value=diff_params['diff_lr'])
818
- # diff_interval_log = gr.Number(label="每隔多少步(steps)生成一次评估日志", value = diff_params['diff_interval_log'])
819
- # diff_interval_val = gr.Number(label="每隔多少步(steps)验证并保存一次模型,如果你的批量大小较大,可以适当减少这里的数字,但不建议设置为1000以下", value=diff_params['diff_interval_val'])
820
- # diff_force_save = gr.Number(label="每隔多少步强制保留模型,只有该步数的倍数保存的模型会被保留,其余会被删除。设置为与验证步数相同的值则每个模型都会被保留", value=diff_params['diff_force_save'])
821
- # with gr.Row():
822
- # save_params=gr.Button("将当前设置保存为默认设置", variant="primary")
823
- # write_config=gr.Button("写入配置文件", variant="primary")
824
- # write_config_output=gr.Textbox(label="输出信息")
825
-
826
- # gr.Markdown(value="""**点击从头开始训练**将会自动将已有的训练进度保存到models_backup文件夹,并自动装载预训练模型。
827
- # **继续上一次的训练进度**将从上一个保存模型的进度继续训练。继续训练进度无需重新预处理和写入配置文件。
828
- # 关于扩散、聚类和特征检索的详细说明请看[此处](https://www.yuque.com/umoubuton/ueupp5/kmui02dszo5zrqkz)。
829
- # """)
830
- # with gr.Row():
831
- # with gr.Column():
832
- # start_training=gr.Button("从头开始训练", variant="primary")
833
- # training_output=gr.Textbox(label="训练输出信息")
834
- # with gr.Column():
835
- # continue_training_btn=gr.Button("继续上一次的训练进度", variant="primary")
836
- # continue_training_output=gr.Textbox(label="训练输出信息")
837
- # with gr.Row():
838
- # with gr.Column():
839
- # diff_training_btn=gr.Button("从头训练扩散模型", variant="primary")
840
- # diff_training_output=gr.Textbox(label="训练输出信息")
841
- # with gr.Column():
842
- # diff_continue_training_btn=gr.Button("继续训练扩散模型", variant="primary")
843
- # diff_continue_training_output=gr.Textbox(label="训练输出信息")
844
- # with gr.Accordion(label = "聚类、特征检索训练", open=False):
845
- # with gr.Row():
846
- # with gr.Column():
847
- # kmeans_button=gr.Button("训练聚类模型", variant="primary")
848
- # kmeans_gpu = gr.Checkbox(label="使用GPU训练", value=True)
849
- # kmeans_output=gr.Textbox(label="训练输出信息")
850
- # with gr.Column():
851
- # index_button=gr.Button("训练特征检索模型", variant="primary")
852
- # index_output=gr.Textbox(label="训练输出信息")
853
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
854
  with gr.TabItem("小工具/实验室特性"):
855
  gr.Markdown(value="""
856
  ### So-vits-svc 4.1 小工具/实验室特性
@@ -946,20 +1039,20 @@ with app:
946
  compress_model_output = gr.Textbox(label="输出信息", value="")
947
 
948
  compress_model_btn.click(model_compression, [model_to_compress], [compress_model_output])
949
-
950
- #get_raw_dirs.click(load_raw_dirs,[],[raw_dirs_list])
951
- #raw_preprocess.click(dataset_preprocess,[branch_selection, f0_predictor_selection, use_diff, vol_aug, skip_loudnorm, num_processes],[preprocess_output, speakers])
952
- #regenerate_config_btn.click(regenerate_config,[branch_selection, vol_aug],[preprocess_output])
953
- #clear_preprocess_output.click(clear_output,[],[preprocess_output])
954
- #save_params.click(save_default_settings, [log_interval,eval_interval,keep_ckpts,batch_size,lr,fp16_run,all_in_mem,diff_num_workers,diff_cache_all_data,diff_cache_device,diff_amp_dtype,diff_batch_size,diff_lr,diff_interval_log,diff_interval_val,diff_force_save], [write_config_output])
955
- #write_config.click(config_fn,[log_interval, eval_interval, keep_ckpts, batch_size, lr, fp16_run, all_in_mem, diff_num_workers, diff_cache_all_data, diff_batch_size, diff_lr, diff_interval_log, diff_interval_val, diff_cache_device, diff_amp_dtype, diff_force_save],[write_config_output])
956
- #start_training.click(training,[gpu_selection, branch_selection],[training_output])
957
- #diff_training_btn.click(diff_training,[branch_selection],[diff_training_output])
958
- #continue_training_btn.click(continue_training,[gpu_selection, branch_selection],[continue_training_output])
959
- #diff_continue_training_btn.click(diff_continue_training,[branch_selection],[diff_continue_training_output])
960
- #kmeans_button.click(kmeans_training,[kmeans_gpu],[kmeans_output])
961
- #index_button.click(index_training, [], [index_output])
962
-
963
  with gr.Tabs():
964
  with gr.Row(variant="panel"):
965
  with gr.Column():
@@ -970,4 +1063,4 @@ with app:
970
 
971
  debug_button.change(debug_change,[],[])
972
 
973
- app.queue(concurrency_count=1022, max_size=2044).launch()
 
39
  sovits_params = {}
40
  diff_params = {}
41
 
42
+ loaded = None
43
+
44
  def debug_change():
45
  global debug
46
  debug = debug_button.value
 
142
  output_msg = f"模型被成功加载到了{device_name}上\n{index_or_kmeans}:{clu_load}\n扩散模型:{diff_load}"
143
  return output_msg, gr.Dropdown.update(choices=spk_list, value=spk_choice), clip
144
 
145
+ def Newload_model_func(ckpt_name,cluster_name,config_name2,enhance2,diff_model_name2,diff_config_name2,only_diffusion2,encoder2,using_device2):
146
+ global model, loaded
147
+ config_name = config_name2.value
148
+ enhance = enhance2.value
149
+ diff_model_name = diff_model_name2.value
150
+ diff_config_name = (diff_config_name2).value
151
+ only_diffusion = (only_diffusion2).value
152
+ encoder = (encoder2).value
153
+ using_device = (using_device2).value
154
+ config_path = os.path.join(config_dir, config_name)
155
+ diff_config_path = os.path.join(config_dir, diff_config_name) if diff_config_name != "no_diff_config" else "configs/diffusion.yaml"
156
+ with open(config_path, 'r') as f:
157
+ config = json.load(f)
158
+ spk_dict = config["spk"]
159
+ spk_name = config.get('spk', None)
160
+ spk_choice = next(iter(spk_name)) if spk_name else "未检测到音色"
161
+ ckpt_path = os.path.join(workdir, ckpt_name)
162
+ _, _suffix = os.path.splitext(cluster_name)
163
+ fr = True if _suffix == ".pkl" else False #如果是pkl后缀就启用特征检索
164
+ cluster_path = os.path.join(workdir, cluster_name)
165
+ diff_model_path = os.path.join(diff_workdir, diff_model_name)
166
+ shallow_diffusion = True if diff_model_name != "no_diff" else False
167
+ use_spk_mix = False
168
+ device = None if using_device == "Auto" else using_device
169
+ model = Svc(ckpt_path,
170
+ config_path,
171
+ device,
172
+ cluster_path,
173
+ enhance,
174
+ diff_model_path,
175
+ diff_config_path,
176
+ shallow_diffusion,
177
+ only_diffusion,
178
+ use_spk_mix,
179
+ fr)
180
+ spk_list = list(spk_dict.keys())
181
+ clip = 25 if encoder == "Whisper-PPG" else 0 #Whisper必须强制切片25秒
182
+ device_name = torch.cuda.get_device_properties(model.dev).name if "cuda" in str(model.dev) else str(model.dev)
183
+ index_or_kmeans = "特征索引" if fr is True else "聚类模型"
184
+ clu_load = "未加载" if cluster_name == "no_clu" else cluster_name
185
+ diff_load = "未加载" if diff_model_name == "no_diff" else diff_model_name
186
+ loaded = cluster_name
187
+ #output_msg = f"模型被成功加载到了{device_name}上\n{index_or_kmeans}:{clu_load}\n扩散模型:{diff_load}"
188
+ #return output_msg, gr.Dropdown.update(choices=spk_list, value=spk_choice), clip
189
+
190
  def get_file_options(directory, extension):
191
  return [file for file in os.listdir(directory) if file.endswith(extension)]
192
 
 
709
  sovits_params, diff_params = get_default_settings()
710
 
711
  app = gr.Blocks()
712
+
713
+ def Newget_model_info(choice_ckpt2):
714
+ choice_ckpt = str(choice_ckpt2)
715
+ pthfile = os.path.join(workdir, choice_ckpt)
716
+ net = torch.load(pthfile, map_location=torch.device('cpu')) #cpu load
717
+ spk_emb = net["model"].get("emb_g.weight")
718
+ if spk_emb is None:
719
+ return "所选模型缺少emb_g.weight,你可能选择了一个底模"
720
+ _dim, _layer = spk_emb.size()
721
+ model_type = {
722
+ 768: "Vec768-Layer12",
723
+ 256: "Vec256-Layer9 / HubertSoft",
724
+ 1024: "Whisper-PPG"
725
+ }
726
+ return gr.Textbox(visible=False, value=model_type.get(_layer, "不受支持的模型"))
727
+
728
  with app:
729
  gr.Markdown(value="""
730
  ### So-VITS-SVC 4.1-Stable
 
739
 
740
  """)
741
  with gr.Tabs():
742
+ with gr.TabItem("推理"):
743
+ #with gr.Row():
744
+ # choice_ckpt = gr.Dropdown(label="模型选择", choices=ckpt_list, value="no_model")
745
+ # model_branch = gr.Textbox(label="模型编码器", placeholder="请先选择模型", interactive=False)
746
+ #choice_ckpt = gr.Dropdown(value="G_82400.pth", visible=False)
747
+ #with gr.Row():
748
+ # config_choice = gr.Dropdown(label="配置文件", choices=config_list, value="no_config")
749
+ # config_info = gr.Textbox(label="配置文件编码器", placeholder="请选择配置文件")
750
+ config_choice = gr.Dropdown(value="config.json", visible=False)
751
+ #gr.Markdown(value="""**请检查模型和配置文件的编码器是否匹配**""")
752
+ #with gr.Row():
753
+ # diff_choice = gr.Dropdown(label="(可选)选择扩散模型", choices=diff_list, value="no_diff", interactive=True)
754
+ # diff_config_choice = gr.Dropdown(label="扩散模型配置文件", choices=diff_config_list, value="no_diff_config", interactive=True)
755
+ diff_choice = gr.Dropdown(value="no_diff", visible=False)
756
+ diff_config_choice = gr.Dropdown(value="no_diff_config", visible=False)
757
  with gr.Row():
758
+ cluster_choice = gr.Dropdown(label="(可选)选择聚类模型/特征检索模型", choices=cluster_list, value="no_clu")
 
 
 
 
 
 
759
  with gr.Row():
760
  enhance = gr.Checkbox(label="是否使用NSF_HIFIGAN增强,该选项对部分训练集少的模型有一定的音质增强效果,但是对训练好的模型有反面效果,默认关闭", value=False)
761
+ #only_diffusion = gr.Checkbox(label="是否使用全扩散推理,开启后将不使用So-VITS模型,仅使用扩散模型进行完整扩散推理,默认关闭", value=False)
762
+ only_diffusion = gr.Checkbox(value=False, visible=False)
763
+ #using_device = gr.Dropdown(label="推理设备,默认为自动选择", choices=["Auto","cuda","cpu"], value="Auto")
764
+ using_device = gr.Dropdown(value='Auto', visible=False)
765
+ #refresh = gr.Button("刷新选项")
766
+ #loadckpt = gr.Button("加载模型", variant="primary")
767
+ #with gr.Row():
768
+ # model_message = gr.Textbox(label="Output Message")
769
+ # sid = gr.Dropdown(label="So-VITS说话人", value="speaker0")
770
+ sid = gr.Dropdown(value="1001", visible=False)
771
 
772
+ #choice_ckpt.change(get_model_info, [choice_ckpt], [model_branch])
773
+ model_branch = Newget_model_info("G_82400.pth")
774
+ #config_choice.change(load_json_encoder, [config_choice], [config_info])
775
+ #refresh.click(refresh_options,[],[choice_ckpt,config_choice,cluster_choice,diff_choice,diff_config_choice])
776
 
777
  gr.Markdown(value="""
778
  请稍等片刻,模型加载大约需要10秒。后续操作不需要重新加载模型
 
827
  vc_tts_submit = gr.Button("文本转语音", variant="primary")
828
  vc_output1 = gr.Textbox(label="Output Message")
829
  vc_output2 = gr.Audio(label="Output Audio")
830
+
831
+ def Newvc_fn(sid, input_audio, vc_transform, auto_f0, cluster_ratio, slice_db, noise_scale, pad_seconds, cl_num, lg_num, lgr_num, f0_predictor, enhancer_adaptive_key, cr_threshold, k_step, use_spk_mix, second_encoding, loudness_envelope_adjustment, clus2):
832
+ global model, loaded
833
+ if loaded != clus2:
834
+ Newload_model_func("G_82400.pth",clus2,config_choice,enhance,diff_choice,diff_config_choice,only_diffusion,model_branch,using_device)
835
+ loaded = clus2
836
+ try:
837
+ if input_audio is None:
838
+ return "You need to upload an audio", None
839
+ if model is None:
840
+ return "You need to upload an model", None
841
+ sampling_rate, audio = input_audio
842
+ temp_path = "temp.wav"
843
+ sf.write(temp_path, audio, sampling_rate, format="wav")
844
+ output_file_path = vc_infer(sid, audio, temp_path, vc_transform, auto_f0, cluster_ratio, slice_db, noise_scale, pad_seconds, cl_num, lg_num, lgr_num, f0_predictor, enhancer_adaptive_key, cr_threshold, k_step, use_spk_mix, second_encoding, loudness_envelope_adjustment)
845
+ os.remove(temp_path)
846
+ return "Success", output_file_path
847
+ except Exception as e:
848
+ if debug: traceback.print_exc()
849
+ raise gr.Error(e)
850
 
851
+ #loadckpt.click(load_model_func,[choice_ckpt,cluster_choice,config_choice,enhance,diff_choice,diff_config_choice,only_diffusion,model_branch,using_device],[model_message, sid, cl_num])
852
+ vc_submit.click(Newvc_fn, [sid, vc_input3, vc_transform,auto_f0,cluster_ratio, slice_db, noise_scale,pad_seconds,cl_num,lg_num,lgr_num,f0_predictor,enhancer_adaptive_key,cr_threshold,k_step,use_spk_mix,second_encoding,loudness_envelope_adjustment,cluster_choice], [vc_output1, vc_output2])
853
  vc_batch_submit.click(vc_batch_fn, [sid, vc_batch_files, vc_transform,auto_f0,cluster_ratio, slice_db, noise_scale,pad_seconds,cl_num,lg_num,lgr_num,f0_predictor,enhancer_adaptive_key,cr_threshold,k_step,use_spk_mix,second_encoding,loudness_envelope_adjustment], [vc_output1])
854
  vc_tts_submit.click(tts_fn, [text_input, tts_spk, sid, vc_transform,auto_f0,cluster_ratio, slice_db, noise_scale,pad_seconds,cl_num,lg_num,lgr_num,f0_predictor,enhancer_adaptive_key,cr_threshold,k_step,use_spk_mix,second_encoding,loudness_envelope_adjustment], [vc_output1, vc_output2])
855
+ """
856
+ with gr.TabItem("训练"):
857
+ gr.Markdown(value="""请将数据集文件夹放置在dataset_raw文件夹下,确认放置正确后点击下方获取数据集名称""")
858
+ raw_dirs_list=gr.Textbox(label="Raw dataset directory(s):")
859
+ get_raw_dirs=gr.Button("识别数据集", variant="primary")
860
+ gr.Markdown(value="""确认数据集正确识别后请选择训练使用的特征编码器和f0预测器,**如果要训练扩散模型,请选择Vec768l12或hubertsoft,并确保So-VITS和扩散模型使用同一个编码器**""")
861
+ with gr.Row():
862
+ gr.Markdown(value="""**vec256l9**: ContentVec(256Layer9),旧版本叫v1,So-VITS-SVC 4.0的基础版本,**暂不支持扩散模型**
863
+ **vec768l12**: 特征输入更换为ContentVec的第12层Transformer输出,模型理论上会更加还原训练集音色
864
+ **hubertsoft**: So-VITS-SVC 3.0使用的编码器,咬字更为准确,但可能存在多说话人音色泄露问题
865
+ **whisper-ppg**: 来自OpenAI,咬字最为准确,但和Hubertsoft一样存在多说话人音色泄露,且显存占用和训练时间有明显增加。**暂不支持扩散模型**
866
+ """)
867
+ gr.Markdown(value="""**crepe**: 抗噪能力最强,但预处理速度慢(不过如果你的显卡很强的话速度会很快)
868
+ **pm**: 预处理速度快,但抗噪能力较弱
869
+ **dio**: 先前版本预处理默认使用的f0预测器
870
+ **harvest**: 有一定抗噪能力,预处理显存占用友好,速度比较慢
871
+ """)
872
+ with gr.Row():
873
+ branch_selection = gr.Radio(label="选择训练使用的编码器", choices=["vec256l9","vec768l12","hubertsoft","whisper-ppg"], value="vec768l12", interactive=True)
874
+ f0_predictor_selection = gr.Radio(label="选择训练使用的f0预测器", choices=["crepe","pm","dio","harvest"], value="crepe", interactive=True)
875
+ use_diff = gr.Checkbox(label="是否使用浅扩散模型,如要训练浅扩散模型请勾选此项", value=True)
876
+ vol_aug=gr.Checkbox(label="是否启用响度嵌入和音量增强,启用后可以根据输入源控制输出响度,但对数据集质量的要求更高。**仅支持vec768l12编码器**", value=False)
877
+ with gr.Row():
878
+ skip_loudnorm = gr.Checkbox(label="是否跳过响度匹配,如果你已经用音频处理软件做过响度匹配,请勾选此处")
879
+ num_processes = gr.Slider(label="预处理使用的CPU线程数,可以大幅加快预处理速度,但线程数过大容易爆显存,建议12G显存设置为2", minimum=1, maximum=multiprocessing.cpu_count(), value=1, step=1)
880
+ with gr.Row():
881
+ raw_preprocess=gr.Button("数据预处理", variant="primary")
882
+ regenerate_config_btn=gr.Button("重新生成配置文件", variant="primary")
883
+ preprocess_output=gr.Textbox(label="预处理输出信息,完成后请检查一下是否有报错信息,如无则可以进行下一步", max_lines=999)
884
+ clear_preprocess_output=gr.Button("清空输出信息")
885
+ with gr.Group():
886
+ gr.Markdown(value="""填写训练设置和超参数""")
887
+ with gr.Row():
888
+ gr.Textbox(label="当前使用显卡信息", value=gpu_info)
889
+ gpu_selection=gr.Textbox(label="多卡用户请指定希望训练使用的显卡ID(0,1,2...)", value=gpus, interactive=True)
890
+ with gr.Row():
891
+ log_interval=gr.Textbox(label="每隔多少步(steps)生成一次评估日志", value=sovits_params['log_interval'])
892
+ eval_interval=gr.Textbox(label="每隔多少步(steps)验证并保存一次模型", value=sovits_params['eval_interval'])
893
+ keep_ckpts=gr.Textbox(label="仅保留最新的X个模型,超出该数字的旧模型会被删除。设置为0则永不删除", value=sovits_params['keep_ckpts'])
894
+ with gr.Row():
895
+ batch_size=gr.Textbox(label="批量大小,每步取多少条数据进行训练,大batch有助于训练但显著增加显存占用。6G显存建议设定为4", value=sovits_params['batch_size'])
896
+ lr=gr.Textbox(label="学习率,一般不用动,批量大小较大时可以适当增大学习率,但强烈不建议超过0.0002,有炸炉风险", value=sovits_params['learning_rate'])
897
+ fp16_run=gr.Checkbox(label="是否使用fp16混合精度训练,fp16训练可能降低显存占用和训练时间,但对模型质量的影响尚未查证", value=sovits_params['fp16_run'])
898
+ all_in_mem=gr.Checkbox(label="是否加载所有数据集到内存中,硬盘IO过于低下、同时内存容量远大于数据集体积时可以启用,能显著加快训练速度", value=sovits_params['all_in_mem'])
899
+ with gr.Row():
900
+ gr.Markdown("请检查右侧的说话人列表是否和你要训练的目标说话人一致,确认无误后点击写入配置文件,然后就可以开始训练了")
901
+ speakers=gr.Textbox(label="说话人列表")
902
+ with gr.Accordion(label = "扩散模型配置(训练扩散模型需要写入此处)", open=True):
903
+ with gr.Row():
904
+ diff_num_workers = gr.Number(label="num_workers, 如果你的电脑配置较高,可以将这里设置为0加快训练速度", value=diff_params['num_workers'])
905
+ diff_cache_all_data = gr.Checkbox(label="是否缓存数据,启用后可以加快训练速度,关闭后可以节省显存或内存,但会减慢训练速度", value=diff_params['cache_all_data'])
906
+ diff_cache_device = gr.Radio(label="若启用缓存数据,使用显存(cuda)还是内存(cpu)缓存,如果显卡显存充足,选择cuda以加快训练速度", choices=["cuda","cpu"], value=diff_params['cache_device'])
907
+ diff_amp_dtype = gr.Radio(label="训练数据类型,fp16可能会有更快的训练速度,前提是你的显卡支持", choices=["fp32","fp16"], value=diff_params['amp_dtype'])
908
+ with gr.Row():
909
+ diff_batch_size = gr.Number(label="批量大小(batch_size),根据显卡显存设置,小显存适当降低该项,6G显存可以设定为48,但该数值不要超过数据集总数量的1/4", value=diff_params['diff_batch_size'])
910
+ diff_lr = gr.Number(label="学习率(一般不需要动)", value=diff_params['diff_lr'])
911
+ diff_interval_log = gr.Number(label="每隔多少步(steps)生成一次评估日志", value = diff_params['diff_interval_log'])
912
+ diff_interval_val = gr.Number(label="每隔多少步(steps)验证并保存一次模型,如果你的批量大小较大,可以适当减少这里的数字,但不建议设置为1000以下", value=diff_params['diff_interval_val'])
913
+ diff_force_save = gr.Number(label="每隔多少步强制保留模型,只有该步数的倍数保存的模型会被保留,其余会被删除。设置为与验证步数相同的值则每个模型都会被保留", value=diff_params['diff_force_save'])
914
+ with gr.Row():
915
+ save_params=gr.Button("将当前设置保存为默认设置", variant="primary")
916
+ write_config=gr.Button("写入配置文件", variant="primary")
917
+ write_config_output=gr.Textbox(label="输出信息")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
918
 
919
+ gr.Markdown(value="""**点击从头开始训练**将会自动将已有的训练进度保存到models_backup文件夹,并自动装载预训练模型。
920
+ **继续上一次的训练进度**将从上一个保存模型的进度继续训练。继续训练进度无需重新预处理和写入配置文件。
921
+ 关于扩散、聚类和特征检索的详细说明请看[此处](https://www.yuque.com/umoubuton/ueupp5/kmui02dszo5zrqkz)。
922
+ """)
923
+ with gr.Row():
924
+ with gr.Column():
925
+ start_training=gr.Button("从头开始训练", variant="primary")
926
+ training_output=gr.Textbox(label="训练输出信息")
927
+ with gr.Column():
928
+ continue_training_btn=gr.Button("继续上一次的训练进度", variant="primary")
929
+ continue_training_output=gr.Textbox(label="训练输出信息")
930
+ with gr.Row():
931
+ with gr.Column():
932
+ diff_training_btn=gr.Button("从头训练扩散模型", variant="primary")
933
+ diff_training_output=gr.Textbox(label="训练输出信息")
934
+ with gr.Column():
935
+ diff_continue_training_btn=gr.Button("继续训练扩散模型", variant="primary")
936
+ diff_continue_training_output=gr.Textbox(label="训练输出信息")
937
+ with gr.Accordion(label = "聚类、特征检索训练", open=False):
938
+ with gr.Row():
939
+ with gr.Column():
940
+ kmeans_button=gr.Button("训练聚类模型", variant="primary")
941
+ kmeans_gpu = gr.Checkbox(label="使用GPU训练", value=True)
942
+ kmeans_output=gr.Textbox(label="训练输出信息")
943
+ with gr.Column():
944
+ index_button=gr.Button("训练特征检索模型", variant="primary")
945
+ index_output=gr.Textbox(label="训练输出信息")
946
+ """
947
  with gr.TabItem("小工具/实验室特性"):
948
  gr.Markdown(value="""
949
  ### So-vits-svc 4.1 小工具/实验室特性
 
1039
  compress_model_output = gr.Textbox(label="输出信息", value="")
1040
 
1041
  compress_model_btn.click(model_compression, [model_to_compress], [compress_model_output])
1042
+ """
1043
+ get_raw_dirs.click(load_raw_dirs,[],[raw_dirs_list])
1044
+ raw_preprocess.click(dataset_preprocess,[branch_selection, f0_predictor_selection, use_diff, vol_aug, skip_loudnorm, num_processes],[preprocess_output, speakers])
1045
+ regenerate_config_btn.click(regenerate_config,[branch_selection, vol_aug],[preprocess_output])
1046
+ clear_preprocess_output.click(clear_output,[],[preprocess_output])
1047
+ save_params.click(save_default_settings, [log_interval,eval_interval,keep_ckpts,batch_size,lr,fp16_run,all_in_mem,diff_num_workers,diff_cache_all_data,diff_cache_device,diff_amp_dtype,diff_batch_size,diff_lr,diff_interval_log,diff_interval_val,diff_force_save], [write_config_output])
1048
+ write_config.click(config_fn,[log_interval, eval_interval, keep_ckpts, batch_size, lr, fp16_run, all_in_mem, diff_num_workers, diff_cache_all_data, diff_batch_size, diff_lr, diff_interval_log, diff_interval_val, diff_cache_device, diff_amp_dtype, diff_force_save],[write_config_output])
1049
+ start_training.click(training,[gpu_selection, branch_selection],[training_output])
1050
+ diff_training_btn.click(diff_training,[branch_selection],[diff_training_output])
1051
+ continue_training_btn.click(continue_training,[gpu_selection, branch_selection],[continue_training_output])
1052
+ diff_continue_training_btn.click(diff_continue_training,[branch_selection],[diff_continue_training_output])
1053
+ kmeans_button.click(kmeans_training,[kmeans_gpu],[kmeans_output])
1054
+ index_button.click(index_training, [], [index_output])
1055
+ """
1056
  with gr.Tabs():
1057
  with gr.Row(variant="panel"):
1058
  with gr.Column():
 
1063
 
1064
  debug_button.change(debug_change,[],[])
1065
 
1066
+ app.queue(concurrency_count=1022, max_size=2044).launch(share=True)