yl12053 commited on
Commit
3e771ef
1 Parent(s): edf9eae
Files changed (1) hide show
  1. app.py +113 -205
app.py CHANGED
@@ -39,8 +39,6 @@ debug = False
39
  sovits_params = {}
40
  diff_params = {}
41
 
42
- loaded = None
43
-
44
  def debug_change():
45
  global debug
46
  debug = debug_button.value
@@ -142,51 +140,6 @@ def load_model_func(ckpt_name,cluster_name,config_name,enhance,diff_model_name,d
142
  output_msg = f"模型被成功加载到了{device_name}上\n{index_or_kmeans}:{clu_load}\n扩散模型:{diff_load}"
143
  return output_msg, gr.Dropdown.update(choices=spk_list, value=spk_choice), clip
144
 
145
- def Newload_model_func(ckpt_name,cluster_name,config_name2,enhance2,diff_model_name2,diff_config_name2,only_diffusion2,encoder2,using_device2):
146
- global model, loaded
147
- config_name = config_name2.value
148
- enhance = enhance2.value
149
- diff_model_name = diff_model_name2.value
150
- diff_config_name = (diff_config_name2).value
151
- only_diffusion = (only_diffusion2).value
152
- encoder = (encoder2).value
153
- using_device = (using_device2).value
154
- config_path = os.path.join(config_dir, config_name)
155
- diff_config_path = os.path.join(config_dir, diff_config_name) if diff_config_name != "no_diff_config" else "configs/diffusion.yaml"
156
- with open(config_path, 'r') as f:
157
- config = json.load(f)
158
- spk_dict = config["spk"]
159
- spk_name = config.get('spk', None)
160
- spk_choice = next(iter(spk_name)) if spk_name else "未检测到音色"
161
- ckpt_path = os.path.join(workdir, ckpt_name)
162
- _, _suffix = os.path.splitext(cluster_name)
163
- fr = True if _suffix == ".pkl" else False #如果是pkl后缀就启用特征检索
164
- cluster_path = os.path.join(workdir, cluster_name)
165
- diff_model_path = os.path.join(diff_workdir, diff_model_name)
166
- shallow_diffusion = True if diff_model_name != "no_diff" else False
167
- use_spk_mix = False
168
- device = None if using_device == "Auto" else using_device
169
- model = Svc(ckpt_path,
170
- config_path,
171
- device,
172
- cluster_path,
173
- enhance,
174
- diff_model_path,
175
- diff_config_path,
176
- shallow_diffusion,
177
- only_diffusion,
178
- use_spk_mix,
179
- fr)
180
- spk_list = list(spk_dict.keys())
181
- clip = 25 if encoder == "Whisper-PPG" else 0 #Whisper必须强制切片25秒
182
- device_name = torch.cuda.get_device_properties(model.dev).name if "cuda" in str(model.dev) else str(model.dev)
183
- index_or_kmeans = "特征索引" if fr is True else "聚类模型"
184
- clu_load = "未加载" if cluster_name == "no_clu" else cluster_name
185
- diff_load = "未加载" if diff_model_name == "no_diff" else diff_model_name
186
- loaded = cluster_name
187
- #output_msg = f"模型被成功加载到了{device_name}上\n{index_or_kmeans}:{clu_load}\n扩散模型:{diff_load}"
188
- #return output_msg, gr.Dropdown.update(choices=spk_list, value=spk_choice), clip
189
-
190
  def get_file_options(directory, extension):
191
  return [file for file in os.listdir(directory) if file.endswith(extension)]
192
 
@@ -709,22 +662,6 @@ gpus="-".join([i[0]for i in gpu_infos])
709
  sovits_params, diff_params = get_default_settings()
710
 
711
  app = gr.Blocks()
712
-
713
- def Newget_model_info(choice_ckpt2):
714
- choice_ckpt = str(choice_ckpt2)
715
- pthfile = os.path.join(workdir, choice_ckpt)
716
- net = torch.load(pthfile, map_location=torch.device('cpu')) #cpu load
717
- spk_emb = net["model"].get("emb_g.weight")
718
- if spk_emb is None:
719
- return "所选模型缺少emb_g.weight,你可能选择了一个底模"
720
- _dim, _layer = spk_emb.size()
721
- model_type = {
722
- 768: "Vec768-Layer12",
723
- 256: "Vec256-Layer9 / HubertSoft",
724
- 1024: "Whisper-PPG"
725
- }
726
- return gr.Textbox(visible=False, value=model_type.get(_layer, "不受支持的模型"))
727
-
728
  with app:
729
  gr.Markdown(value="""
730
  ### So-VITS-SVC 4.1-Stable
@@ -739,40 +676,31 @@ with app:
739
 
740
  """)
741
  with gr.Tabs():
742
- with gr.TabItem("推理"):
743
- #with gr.Row():
744
- # choice_ckpt = gr.Dropdown(label="模型选择", choices=ckpt_list, value="no_model")
745
- # model_branch = gr.Textbox(label="模型编码器", placeholder="请先选择模型", interactive=False)
746
- #choice_ckpt = gr.Dropdown(value="G_82400.pth", visible=False)
747
- #with gr.Row():
748
- # config_choice = gr.Dropdown(label="配置文件", choices=config_list, value="no_config")
749
- # config_info = gr.Textbox(label="配置文件编码器", placeholder="请选择配置文件")
750
- config_choice = gr.Dropdown(value="config.json", visible=False)
751
- #gr.Markdown(value="""**请检查模型和配置文件的编码器是否匹配**""")
752
- #with gr.Row():
753
- # diff_choice = gr.Dropdown(label="(可选)选择扩散模型", choices=diff_list, value="no_diff", interactive=True)
754
- # diff_config_choice = gr.Dropdown(label="扩散模型配置文件", choices=diff_config_list, value="no_diff_config", interactive=True)
755
- diff_choice = gr.Dropdown(value="no_diff", visible=False)
756
- diff_config_choice = gr.Dropdown(value="no_diff_config", visible=False)
757
  with gr.Row():
758
- cluster_choice = gr.Dropdown(label="(可选)选择聚类模型/特征检索模型", choices=cluster_list, value="no_clu")
 
 
 
 
 
 
 
 
 
759
  with gr.Row():
760
  enhance = gr.Checkbox(label="是否使用NSF_HIFIGAN增强,该选项对部分训练集少的模型有一定的音质增强效果,但是对训练好的模型有反面效果,默认关闭", value=False)
761
- #only_diffusion = gr.Checkbox(label="是否使用全扩散推理,开启后将不使用So-VITS模型,仅使用扩散模型进行完整扩散推理,默认关闭", value=False)
762
- only_diffusion = gr.Checkbox(value=False, visible=False)
763
- #using_device = gr.Dropdown(label="推理设备,默认为自动选择", choices=["Auto","cuda","cpu"], value="Auto")
764
- using_device = gr.Dropdown(value='Auto', visible=False)
765
- #refresh = gr.Button("刷新选项")
766
- #loadckpt = gr.Button("加载模型", variant="primary")
767
- #with gr.Row():
768
- # model_message = gr.Textbox(label="Output Message")
769
- # sid = gr.Dropdown(label="So-VITS说话人", value="speaker0")
770
- sid = gr.Dropdown(value="1001", visible=False)
771
 
772
- #choice_ckpt.change(get_model_info, [choice_ckpt], [model_branch])
773
- model_branch = Newget_model_info("G_82400.pth")
774
- #config_choice.change(load_json_encoder, [config_choice], [config_info])
775
- #refresh.click(refresh_options,[],[choice_ckpt,config_choice,cluster_choice,diff_choice,diff_config_choice])
776
 
777
  gr.Markdown(value="""
778
  请稍等片刻,模型加载大约需要10秒。后续操作不需要重新加载模型
@@ -827,121 +755,101 @@ with app:
827
  vc_tts_submit = gr.Button("文本转语音", variant="primary")
828
  vc_output1 = gr.Textbox(label="Output Message")
829
  vc_output2 = gr.Audio(label="Output Audio")
830
-
831
- def Newvc_fn(sid, input_audio, vc_transform, auto_f0, cluster_ratio, slice_db, noise_scale, pad_seconds, cl_num, lg_num, lgr_num, f0_predictor, enhancer_adaptive_key, cr_threshold, k_step, use_spk_mix, second_encoding, loudness_envelope_adjustment, clus2):
832
- global model, loaded
833
- if loaded != clus2:
834
- Newload_model_func("G_82400.pth",clus2,config_choice,enhance,diff_choice,diff_config_choice,only_diffusion,model_branch,using_device)
835
- loaded = clus2
836
- try:
837
- if input_audio is None:
838
- return "You need to upload an audio", None
839
- if model is None:
840
- return "You need to upload an model", None
841
- sampling_rate, audio = input_audio
842
- temp_path = "temp.wav"
843
- sf.write(temp_path, audio, sampling_rate, format="wav")
844
- output_file_path = vc_infer(sid, audio, temp_path, vc_transform, auto_f0, cluster_ratio, slice_db, noise_scale, pad_seconds, cl_num, lg_num, lgr_num, f0_predictor, enhancer_adaptive_key, cr_threshold, k_step, use_spk_mix, second_encoding, loudness_envelope_adjustment)
845
- os.remove(temp_path)
846
- return "Success", output_file_path
847
- except Exception as e:
848
- if debug: traceback.print_exc()
849
- raise gr.Error(e)
850
 
851
- #loadckpt.click(load_model_func,[choice_ckpt,cluster_choice,config_choice,enhance,diff_choice,diff_config_choice,only_diffusion,model_branch,using_device],[model_message, sid, cl_num])
852
- vc_submit.click(Newvc_fn, [sid, vc_input3, vc_transform,auto_f0,cluster_ratio, slice_db, noise_scale,pad_seconds,cl_num,lg_num,lgr_num,f0_predictor,enhancer_adaptive_key,cr_threshold,k_step,use_spk_mix,second_encoding,loudness_envelope_adjustment,cluster_choice], [vc_output1, vc_output2])
853
  vc_batch_submit.click(vc_batch_fn, [sid, vc_batch_files, vc_transform,auto_f0,cluster_ratio, slice_db, noise_scale,pad_seconds,cl_num,lg_num,lgr_num,f0_predictor,enhancer_adaptive_key,cr_threshold,k_step,use_spk_mix,second_encoding,loudness_envelope_adjustment], [vc_output1])
854
  vc_tts_submit.click(tts_fn, [text_input, tts_spk, sid, vc_transform,auto_f0,cluster_ratio, slice_db, noise_scale,pad_seconds,cl_num,lg_num,lgr_num,f0_predictor,enhancer_adaptive_key,cr_threshold,k_step,use_spk_mix,second_encoding,loudness_envelope_adjustment], [vc_output1, vc_output2])
855
- with gr.TabItem("训练"):
856
- gr.Markdown(value="""请将数据集文件夹放置在dataset_raw文件夹下,确认放置正确后点击下方获取数据集名称""")
857
- raw_dirs_list=gr.Textbox(label="Raw dataset directory(s):")
858
- get_raw_dirs=gr.Button("识别数据集", variant="primary")
859
- gr.Markdown(value="""确认数据集正确识别后请选择训练使用的特征编码器和f0预测器,**如果要训练扩散模型,请选择Vec768l12或hubertsoft,并确保So-VITS和扩散模型使用同一个编码器**""")
860
- with gr.Row():
861
- gr.Markdown(value="""**vec256l9**: ContentVec(256Layer9),旧版本叫v1,So-VITS-SVC 4.0的基础版本,**暂不支持扩散模型**
862
- **vec768l12**: 特征输入更换为ContentVec的第12层Transformer输出,模型理论上会更加还原训练集音色
863
- **hubertsoft**: So-VITS-SVC 3.0使用的编码器,咬字更为准确,但可能存在多说话人音色泄露问题
864
- **whisper-ppg**: 来自OpenAI,咬字最为准确,但和Hubertsoft一样存在多说话人音色泄露,且显存占用和训练时间有明显增加。**暂不支持扩散模型**
865
- """)
866
- gr.Markdown(value="""**crepe**: 抗噪能力最强,但预处理速度慢(不过如果你的显卡很强的话速度会很快)
867
- **pm**: 预处理速度快,但抗噪能力较弱
868
- **dio**: 先前版本预处理默认使用的f0预测器
869
- **harvest**: 有一定抗噪能力,预处理显存占用友好,速度比较慢
870
- """)
871
- with gr.Row():
872
- branch_selection = gr.Radio(label="选择训练使用的编码器", choices=["vec256l9","vec768l12","hubertsoft","whisper-ppg"], value="vec768l12", interactive=True)
873
- f0_predictor_selection = gr.Radio(label="选择训练使用的f0预测器", choices=["crepe","pm","dio","harvest"], value="crepe", interactive=True)
874
- use_diff = gr.Checkbox(label="是否使用浅扩散模型,如要训练浅扩散模型请勾选此项", value=True)
875
- vol_aug=gr.Checkbox(label="是否启用响度嵌入和音量增强,启用后可以根据输入源控制输出响度,但对数据集质量的要求更高。**仅支持vec768l12编码器**", value=False)
876
- with gr.Row():
877
- skip_loudnorm = gr.Checkbox(label="是否跳过响度匹配,如果你已经用音频处理软件做过响度匹配,请勾选此处")
878
- num_processes = gr.Slider(label="预处理使用的CPU线程数,可以大幅加快预处理速度,但线程数过大容易爆显存,建议12G显存设置为2", minimum=1, maximum=multiprocessing.cpu_count(), value=1, step=1)
879
- with gr.Row():
880
- raw_preprocess=gr.Button("数据预处理", variant="primary")
881
- regenerate_config_btn=gr.Button("重新生成配置文件", variant="primary")
882
- preprocess_output=gr.Textbox(label="预处理输出信息,完成后请检查一下是否有��错信息,如无则可以进行下一步", max_lines=999)
883
- clear_preprocess_output=gr.Button("清空输出信息")
884
- with gr.Group():
885
- gr.Markdown(value="""填写训练设置和超参数""")
886
- with gr.Row():
887
- gr.Textbox(label="当前使用显卡信息", value=gpu_info)
888
- gpu_selection=gr.Textbox(label="多卡用户请指定希望训练使用的显卡ID(0,1,2...)", value=gpus, interactive=True)
889
- with gr.Row():
890
- log_interval=gr.Textbox(label="每隔多少步(steps)生成一次评估日志", value=sovits_params['log_interval'])
891
- eval_interval=gr.Textbox(label="每隔多少步(steps)验证并保存一次模型", value=sovits_params['eval_interval'])
892
- keep_ckpts=gr.Textbox(label="仅保留最新的X个模型,超出该数字的旧模型会被删除。设置为0则永不删除", value=sovits_params['keep_ckpts'])
893
- with gr.Row():
894
- batch_size=gr.Textbox(label="批量大小,每步取多少条数据进行训练,大batch有助于训练但显著增加显存占用。6G显存建议设定为4", value=sovits_params['batch_size'])
895
- lr=gr.Textbox(label="学习率,一般不用动,批量大小较大时可以适当增大学习率,但强烈不建议超过0.0002,有炸炉风险", value=sovits_params['learning_rate'])
896
- fp16_run=gr.Checkbox(label="是否使用fp16混合精度训练,fp16训练可能降低显存占用和训练时间,但对模型质量的影响尚未查证", value=sovits_params['fp16_run'])
897
- all_in_mem=gr.Checkbox(label="是否加载所有数据集到内存中,硬盘IO过于低下、同时内存容量远大于数据集体积时可以启用,能显著加快训练速度", value=sovits_params['all_in_mem'])
898
- with gr.Row():
899
- gr.Markdown("请检查右侧的说话人列表是否和你要训练的目标说话人一致,确认无误后点击写入配置文件,然后就可以开始训练了")
900
- speakers=gr.Textbox(label="说话人列表")
901
- with gr.Accordion(label = "扩散模型配置(训练扩散模型需要写入此处)", open=True):
902
- with gr.Row():
903
- diff_num_workers = gr.Number(label="num_workers, 如果你的电脑配置较高,可以将这里设置为0加快训练速度", value=diff_params['num_workers'])
904
- diff_cache_all_data = gr.Checkbox(label="是否缓存数据,启用后可以加快训练速度,关闭后可以节省显存或内存,但会减慢训练速度", value=diff_params['cache_all_data'])
905
- diff_cache_device = gr.Radio(label="若启用缓存数据,使用显存(cuda)还是内存(cpu)缓存,如果显卡显存充足,选择cuda以加快训练速度", choices=["cuda","cpu"], value=diff_params['cache_device'])
906
- diff_amp_dtype = gr.Radio(label="训练数据类型,fp16可能会有更快的训练速度,前提是你的显卡支持", choices=["fp32","fp16"], value=diff_params['amp_dtype'])
907
- with gr.Row():
908
- diff_batch_size = gr.Number(label="批量大小(batch_size),根据显卡显存设置,小显存适当降低该项,6G显存可以设定为48,但该数值不要超过数据集总数量的1/4", value=diff_params['diff_batch_size'])
909
- diff_lr = gr.Number(label="学习率(一般不需要动)", value=diff_params['diff_lr'])
910
- diff_interval_log = gr.Number(label="每隔多少步(steps)生成一次评估日志", value = diff_params['diff_interval_log'])
911
- diff_interval_val = gr.Number(label="每隔多少步(steps)验证并保存一次模型,如果你的批量大小较大,可以适当减少这里的数字,但不建议设置为1000以下", value=diff_params['diff_interval_val'])
912
- diff_force_save = gr.Number(label="每隔多少步强制保留模型,只有该步数的倍数保存的模型会被保留,其余会被删除。设置为与验证步数相同的值则每个模型都会被保留", value=diff_params['diff_force_save'])
913
- with gr.Row():
914
- save_params=gr.Button("将当前设置保存为默认设置", variant="primary")
915
- write_config=gr.Button("写入配置文件", variant="primary")
916
- write_config_output=gr.Textbox(label="输出信息")
917
-
918
- gr.Markdown(value="""**点击从头开始训练**将会自动将已有的训练进度保存到models_backup文件夹,并自动装载预训练模型。
919
- **继续上一次的训练进度**将从上一个保存模型的进度继续训练。继续训练进度无需重新预处理和写入配置文件。
920
- 关于扩散、聚类和特征检索的详细说明请看[此处](https://www.yuque.com/umoubuton/ueupp5/kmui02dszo5zrqkz)。
921
- """)
922
- with gr.Row():
923
- with gr.Column():
924
- start_training=gr.Button("从头开始训练", variant="primary")
925
- training_output=gr.Textbox(label="训练输出信息")
926
- with gr.Column():
927
- continue_training_btn=gr.Button("继续上一次的训练进度", variant="primary")
928
- continue_training_output=gr.Textbox(label="训练输出信息")
929
- with gr.Row():
930
- with gr.Column():
931
- diff_training_btn=gr.Button("从头训练扩散模型", variant="primary")
932
- diff_training_output=gr.Textbox(label="训练输出信息")
933
- with gr.Column():
934
- diff_continue_training_btn=gr.Button("继续训练扩散模型", variant="primary")
935
- diff_continue_training_output=gr.Textbox(label="训练输出信息")
936
- with gr.Accordion(label = "聚类、特征检索训练", open=False):
937
- with gr.Row():
938
- with gr.Column():
939
- kmeans_button=gr.Button("训练聚类模型", variant="primary")
940
- kmeans_gpu = gr.Checkbox(label="使用GPU训练", value=True)
941
- kmeans_output=gr.Textbox(label="训练输出信息")
942
- with gr.Column():
943
- index_button=gr.Button("训练特征检索模型", variant="primary")
944
- index_output=gr.Textbox(label="训练输出信息")
945
 
946
  with gr.TabItem("小工具/实验室特性"):
947
  gr.Markdown(value="""
 
39
  sovits_params = {}
40
  diff_params = {}
41
 
 
 
42
  def debug_change():
43
  global debug
44
  debug = debug_button.value
 
140
  output_msg = f"模型被成功加载到了{device_name}上\n{index_or_kmeans}:{clu_load}\n扩散模型:{diff_load}"
141
  return output_msg, gr.Dropdown.update(choices=spk_list, value=spk_choice), clip
142
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
143
  def get_file_options(directory, extension):
144
  return [file for file in os.listdir(directory) if file.endswith(extension)]
145
 
 
662
  sovits_params, diff_params = get_default_settings()
663
 
664
  app = gr.Blocks()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
665
  with app:
666
  gr.Markdown(value="""
667
  ### So-VITS-SVC 4.1-Stable
 
676
 
677
  """)
678
  with gr.Tabs():
679
+ with gr.TabItem("特别周 (Special Week)"):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
680
  with gr.Row():
681
+ choice_ckpt = gr.Dropdown(label="模型选择", choices=ckpt_list, value="no_model")
682
+ model_branch = gr.Textbox(label="模型编码器", placeholder="请先选择模型", interactive=False)
683
+ with gr.Row():
684
+ config_choice = gr.Dropdown(label="配置文件", choices=config_list, value="no_config")
685
+ config_info = gr.Textbox(label="配置文件编码器", placeholder="请选择配置文件")
686
+ gr.Markdown(value="""**请检查模型和配置文件的编码器是否匹配**""")
687
+ with gr.Row():
688
+ diff_choice = gr.Dropdown(label="(可选)选择扩散模型", choices=diff_list, value="no_diff", interactive=True)
689
+ diff_config_choice = gr.Dropdown(label="扩散模型配置文件", choices=diff_config_list, value="no_diff_config", interactive=True)
690
+ cluster_choice = gr.Dropdown(label="(可选)选择聚类模型/特征检索模型", choices=cluster_list, value="no_clu")
691
  with gr.Row():
692
  enhance = gr.Checkbox(label="是否使用NSF_HIFIGAN增强,该选项对部分训练集少的模型有一定的音质增强效果,但是对训练好的模型有反面效果,默认关闭", value=False)
693
+ only_diffusion = gr.Checkbox(label="是否使用全扩散推理,开启后将不使用So-VITS模型,仅使用扩散模型进行完整扩散推理,默认关闭", value=False)
694
+ using_device = gr.Dropdown(label="推理设备,默认为自动选择", choices=["Auto","cuda","cpu"], value="Auto")
695
+ refresh = gr.Button("刷新选项")
696
+ loadckpt = gr.Button("加载模型", variant="primary")
697
+ with gr.Row():
698
+ model_message = gr.Textbox(label="Output Message")
699
+ sid = gr.Dropdown(label="So-VITS说话人", value="speaker0")
 
 
 
700
 
701
+ choice_ckpt.change(get_model_info, [choice_ckpt], [model_branch])
702
+ config_choice.change(load_json_encoder, [config_choice], [config_info])
703
+ refresh.click(refresh_options,[],[choice_ckpt,config_choice,cluster_choice,diff_choice,diff_config_choice])
 
704
 
705
  gr.Markdown(value="""
706
  请稍等片刻,模型加载大约需要10秒。后续操作不需要重新加载模型
 
755
  vc_tts_submit = gr.Button("文本转语音", variant="primary")
756
  vc_output1 = gr.Textbox(label="Output Message")
757
  vc_output2 = gr.Audio(label="Output Audio")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
758
 
759
+ loadckpt.click(load_model_func,[choice_ckpt,cluster_choice,config_choice,enhance,diff_choice,diff_config_choice,only_diffusion,model_branch,using_device],[model_message, sid, cl_num])
760
+ vc_submit.click(vc_fn, [sid, vc_input3, vc_transform,auto_f0,cluster_ratio, slice_db, noise_scale,pad_seconds,cl_num,lg_num,lgr_num,f0_predictor,enhancer_adaptive_key,cr_threshold,k_step,use_spk_mix,second_encoding,loudness_envelope_adjustment], [vc_output1, vc_output2])
761
  vc_batch_submit.click(vc_batch_fn, [sid, vc_batch_files, vc_transform,auto_f0,cluster_ratio, slice_db, noise_scale,pad_seconds,cl_num,lg_num,lgr_num,f0_predictor,enhancer_adaptive_key,cr_threshold,k_step,use_spk_mix,second_encoding,loudness_envelope_adjustment], [vc_output1])
762
  vc_tts_submit.click(tts_fn, [text_input, tts_spk, sid, vc_transform,auto_f0,cluster_ratio, slice_db, noise_scale,pad_seconds,cl_num,lg_num,lgr_num,f0_predictor,enhancer_adaptive_key,cr_threshold,k_step,use_spk_mix,second_encoding,loudness_envelope_adjustment], [vc_output1, vc_output2])
763
+ #with gr.TabItem("训练"):
764
+ # gr.Markdown(value="""请将数据集文件夹放置在dataset_raw文件夹下,确认放置正确后点击下方获取数据集名称""")
765
+ # raw_dirs_list=gr.Textbox(label="Raw dataset directory(s):")
766
+ # get_raw_dirs=gr.Button("识别数据集", variant="primary")
767
+ # gr.Markdown(value="""确认数据集正确识别后请选择训练使用的特征编码器和f0预测器,**如果要训练扩散模型,请选择Vec768l12或hubertsoft,并确保So-VITS和扩散模型使用同一个编码器**""")
768
+ # with gr.Row():
769
+ # gr.Markdown(value="""**vec256l9**: ContentVec(256Layer9),旧版本叫v1,So-VITS-SVC 4.0的基础版本,**暂不支持扩散模型**
770
+ # **vec768l12**: 特征输入更换为ContentVec的第12层Transformer输出,模型理论上会更加还原训练集音色
771
+ # **hubertsoft**: So-VITS-SVC 3.0使用的编码器,咬字更为准确,但可能存在多说话人音色泄露问题
772
+ # **whisper-ppg**: 来自OpenAI,咬字最为准确,但和Hubertsoft一样存在多说话人音色泄露,且显存占用和训练时间有明显增加。**暂不支持扩散模型**
773
+ # """)
774
+ # gr.Markdown(value="""**crepe**: 抗噪能力最强,但预处理速度慢(不过如果你的显卡很强的话速度会很快)
775
+ # **pm**: 预处理速度快,但抗噪能力较弱
776
+ # **dio**: 先前版本预处理默认使用的f0预测器
777
+ # **harvest**: 有一定抗噪能力,预处理显存占用友好,速度比较慢
778
+ # """)
779
+ # with gr.Row():
780
+ # branch_selection = gr.Radio(label="选择训练使用的编码器", choices=["vec256l9","vec768l12","hubertsoft","whisper-ppg"], value="vec768l12", interactive=True)
781
+ # f0_predictor_selection = gr.Radio(label="选择训练使用的f0预测器", choices=["crepe","pm","dio","harvest"], value="crepe", interactive=True)
782
+ # use_diff = gr.Checkbox(label="是否使用浅扩散模型,如要训练浅扩散模型请勾选此项", value=True)
783
+ # vol_aug=gr.Checkbox(label="是否启用响度嵌入和音量增强,启用后可以根据输入源控制输出响度,但对数据集质量的要求更高。**仅支持vec768l12编码器**", value=False)
784
+ # with gr.Row():
785
+ # skip_loudnorm = gr.Checkbox(label="是否跳过响度匹配,如果你已经用音频处理软件做过响度匹配,请勾选此处")
786
+ # num_processes = gr.Slider(label="预处理使用的CPU线程数,可以大幅加快预处理速度,但线程数过大容易爆显存,建议12G显存设置为2", minimum=1, maximum=multiprocessing.cpu_count(), value=1, step=1)
787
+ # with gr.Row():
788
+ # raw_preprocess=gr.Button("数据预处理", variant="primary")
789
+ # regenerate_config_btn=gr.Button("重新生成配置文件", variant="primary")
790
+ # preprocess_output=gr.Textbox(label="预处理输出信息,完成后请检查一下是否有报错信息,如无则可以进行下一步", max_lines=999)
791
+ # clear_preprocess_output=gr.Button("清空输出信息")
792
+ # with gr.Group():
793
+ # gr.Markdown(value="""填写训练设置和超参数""")
794
+ # with gr.Row():
795
+ # gr.Textbox(label="当前使用显卡信息", value=gpu_info)
796
+ # gpu_selection=gr.Textbox(label="多卡用户请指定希望训练使用的显卡ID(0,1,2...)", value=gpus, interactive=True)
797
+ # with gr.Row():
798
+ # log_interval=gr.Textbox(label="每隔多少步(steps)生成一次评估日志", value=sovits_params['log_interval'])
799
+ # eval_interval=gr.Textbox(label="每隔多少步(steps)验证并保存一次模型", value=sovits_params['eval_interval'])
800
+ # keep_ckpts=gr.Textbox(label="仅保留最新的X个模型,超出该数字的旧模型会被删除。设置为0则永不删除", value=sovits_params['keep_ckpts'])
801
+ # with gr.Row():
802
+ # batch_size=gr.Textbox(label="批量大小,每步取多少条数据进行训练,大batch有助于训练但显著增加显存占用。6G显存建议设定为4", value=sovits_params['batch_size'])
803
+ # lr=gr.Textbox(label="学习率,一般不用动,批量大小较大时可以适当增大学习率,但强烈不建议超过0.0002,有炸炉风险", value=sovits_params['learning_rate'])
804
+ # fp16_run=gr.Checkbox(label="是否使用fp16混合精度训练,fp16训练可能降低显存占用和训练时间,但对模型质量的影响尚未查证", value=sovits_params['fp16_run'])
805
+ # all_in_mem=gr.Checkbox(label="是否加载所有数据集到内存中,硬盘IO过于低下、同时内存容量远大于数据集体积时可以启用,能显著加快训练速度", value=sovits_params['all_in_mem'])
806
+ # with gr.Row():
807
+ # gr.Markdown("请检查右侧的说话人列表是否和你要训练的目标说话人一致,确认无误后点击写入配置文件,然后就可以开始训练了")
808
+ # speakers=gr.Textbox(label="说话人列表")
809
+ # with gr.Accordion(label = "扩散模型配置(训练扩散模型需要写入此处)", open=True):
810
+ # with gr.Row():
811
+ # diff_num_workers = gr.Number(label="num_workers, 如果你的电脑配置较高,可以将这里设置为0加快训练速度", value=diff_params['num_workers'])
812
+ # diff_cache_all_data = gr.Checkbox(label="是否缓存数据,启用后可以加快训练速度,关闭后可以节省显存或内存,但会减慢训练速度", value=diff_params['cache_all_data'])
813
+ # diff_cache_device = gr.Radio(label="若启用缓存数据,使用显存(cuda)还是内存(cpu)缓存,如果显卡显存充足,选择cuda以加快训练速度", choices=["cuda","cpu"], value=diff_params['cache_device'])
814
+ # diff_amp_dtype = gr.Radio(label="训练数据类型,fp16可能会有更快的训练速度,前提是你的显卡支持", choices=["fp32","fp16"], value=diff_params['amp_dtype'])
815
+ # with gr.Row():
816
+ # diff_batch_size = gr.Number(label="批量大小(batch_size),根据显卡显存设置,小显存适当降低该项,6G显存可以设定为48,但该数值不要超过数据集总数量的1/4", value=diff_params['diff_batch_size'])
817
+ # diff_lr = gr.Number(label="学习率(一般不需要动)", value=diff_params['diff_lr'])
818
+ # diff_interval_log = gr.Number(label="每隔多少步(steps)生成一次评估日志", value = diff_params['diff_interval_log'])
819
+ # diff_interval_val = gr.Number(label="每隔多少步(steps)验证并保存一次模型,如果你的批量大小较大,可以适当减少这里的数字,但不建议设置为1000以下", value=diff_params['diff_interval_val'])
820
+ # diff_force_save = gr.Number(label="每隔多少步强制保留模型,只有该步数的倍数保存的模型会被保留,其余会被删除。设置为与验证步数相同的值则每个模型都会被保留", value=diff_params['diff_force_save'])
821
+ # with gr.Row():
822
+ # save_params=gr.Button("将当前设置保存为默认设置", variant="primary")
823
+ # write_config=gr.Button("写入配置文件", variant="primary")
824
+ # write_config_output=gr.Textbox(label="输出信息")
825
+
826
+ # gr.Markdown(value="""**点击从头开始训练**将会自动将已有的训练进度保存到models_backup文件夹,并自动装载预训练模型。
827
+ # **继续上一次的训练进度**将从上一个保存模型的进度继续训练。继续训练进度无需重新预处理和写入配置文件。
828
+ # 关于扩散、聚类和特征检索的详细说明请看[此处](https://www.yuque.com/umoubuton/ueupp5/kmui02dszo5zrqkz)。
829
+ # """)
830
+ # with gr.Row():
831
+ # with gr.Column():
832
+ # start_training=gr.Button("从头开始训练", variant="primary")
833
+ # training_output=gr.Textbox(label="训练输出信息")
834
+ # with gr.Column():
835
+ # continue_training_btn=gr.Button("继续上一次的训练进度", variant="primary")
836
+ # continue_training_output=gr.Textbox(label="训练输出信息")
837
+ # with gr.Row():
838
+ # with gr.Column():
839
+ # diff_training_btn=gr.Button("从头训练扩散模型", variant="primary")
840
+ # diff_training_output=gr.Textbox(label="训练输出信息")
841
+ # with gr.Column():
842
+ # diff_continue_training_btn=gr.Button("继续训练扩散模型", variant="primary")
843
+ # diff_continue_training_output=gr.Textbox(label="训练输出信息")
844
+ # with gr.Accordion(label = "聚类、特征检索训练", open=False):
845
+ # with gr.Row():
846
+ # with gr.Column():
847
+ # kmeans_button=gr.Button("训练聚类模型", variant="primary")
848
+ # kmeans_gpu = gr.Checkbox(label="使用GPU训练", value=True)
849
+ # kmeans_output=gr.Textbox(label="训练输出信息")
850
+ # with gr.Column():
851
+ # index_button=gr.Button("训练特征检索模型", variant="primary")
852
+ # index_output=gr.Textbox(label="训练输出信息")
853
 
854
  with gr.TabItem("小工具/实验室特性"):
855
  gr.Markdown(value="""