COM
Browse files
app.py
CHANGED
@@ -39,6 +39,8 @@ debug = False
|
|
39 |
sovits_params = {}
|
40 |
diff_params = {}
|
41 |
|
|
|
|
|
42 |
def debug_change():
|
43 |
global debug
|
44 |
debug = debug_button.value
|
@@ -140,6 +142,51 @@ def load_model_func(ckpt_name,cluster_name,config_name,enhance,diff_model_name,d
|
|
140 |
output_msg = f"模型被成功加载到了{device_name}上\n{index_or_kmeans}:{clu_load}\n扩散模型:{diff_load}"
|
141 |
return output_msg, gr.Dropdown.update(choices=spk_list, value=spk_choice), clip
|
142 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
143 |
def get_file_options(directory, extension):
|
144 |
return [file for file in os.listdir(directory) if file.endswith(extension)]
|
145 |
|
@@ -662,6 +709,22 @@ gpus="-".join([i[0]for i in gpu_infos])
|
|
662 |
sovits_params, diff_params = get_default_settings()
|
663 |
|
664 |
app = gr.Blocks()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
665 |
with app:
|
666 |
gr.Markdown(value="""
|
667 |
### So-VITS-SVC 4.1-Stable
|
@@ -676,31 +739,40 @@ with app:
|
|
676 |
|
677 |
""")
|
678 |
with gr.Tabs():
|
679 |
-
with gr.TabItem("
|
680 |
-
with gr.Row():
|
681 |
-
|
682 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
683 |
with gr.Row():
|
684 |
-
|
685 |
-
config_info = gr.Textbox(label="配置文件编码器", placeholder="请选择配置文件")
|
686 |
-
gr.Markdown(value="""**请检查模型和配置文件的编码器是否匹配**""")
|
687 |
-
with gr.Row():
|
688 |
-
diff_choice = gr.Dropdown(label="(可选)选择扩散模型", choices=diff_list, value="no_diff", interactive=True)
|
689 |
-
diff_config_choice = gr.Dropdown(label="扩散模型配置文件", choices=diff_config_list, value="no_diff_config", interactive=True)
|
690 |
-
cluster_choice = gr.Dropdown(label="(可选)选择聚类模型/特征检索模型", choices=cluster_list, value="no_clu")
|
691 |
with gr.Row():
|
692 |
enhance = gr.Checkbox(label="是否使用NSF_HIFIGAN增强,该选项对部分训练集少的模型有一定的音质增强效果,但是对训练好的模型有反面效果,默认关闭", value=False)
|
693 |
-
only_diffusion = gr.Checkbox(label="是否使用全扩散推理,开启后将不使用So-VITS模型,仅使用扩散模型进行完整扩散推理,默认关闭", value=False)
|
694 |
-
|
695 |
-
|
696 |
-
|
697 |
-
|
698 |
-
|
699 |
-
|
|
|
|
|
|
|
700 |
|
701 |
-
choice_ckpt.change(get_model_info, [choice_ckpt], [model_branch])
|
702 |
-
|
703 |
-
|
|
|
704 |
|
705 |
gr.Markdown(value="""
|
706 |
请稍等片刻,模型加载大约需要10秒。后续操作不需要重新加载模型
|
@@ -755,102 +827,123 @@ with app:
|
|
755 |
vc_tts_submit = gr.Button("文本转语音", variant="primary")
|
756 |
vc_output1 = gr.Textbox(label="Output Message")
|
757 |
vc_output2 = gr.Audio(label="Output Audio")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
758 |
|
759 |
-
loadckpt.click(load_model_func,[choice_ckpt,cluster_choice,config_choice,enhance,diff_choice,diff_config_choice,only_diffusion,model_branch,using_device],[model_message, sid, cl_num])
|
760 |
-
vc_submit.click(
|
761 |
vc_batch_submit.click(vc_batch_fn, [sid, vc_batch_files, vc_transform,auto_f0,cluster_ratio, slice_db, noise_scale,pad_seconds,cl_num,lg_num,lgr_num,f0_predictor,enhancer_adaptive_key,cr_threshold,k_step,use_spk_mix,second_encoding,loudness_envelope_adjustment], [vc_output1])
|
762 |
vc_tts_submit.click(tts_fn, [text_input, tts_spk, sid, vc_transform,auto_f0,cluster_ratio, slice_db, noise_scale,pad_seconds,cl_num,lg_num,lgr_num,f0_predictor,enhancer_adaptive_key,cr_threshold,k_step,use_spk_mix,second_encoding,loudness_envelope_adjustment], [vc_output1, vc_output2])
|
763 |
-
|
764 |
-
|
765 |
-
|
766 |
-
|
767 |
-
|
768 |
-
|
769 |
-
|
770 |
-
|
771 |
-
|
772 |
-
|
773 |
-
|
774 |
-
|
775 |
-
|
776 |
-
|
777 |
-
|
778 |
-
|
779 |
-
|
780 |
-
|
781 |
-
|
782 |
-
|
783 |
-
|
784 |
-
|
785 |
-
|
786 |
-
|
787 |
-
|
788 |
-
|
789 |
-
|
790 |
-
|
791 |
-
|
792 |
-
|
793 |
-
|
794 |
-
|
795 |
-
|
796 |
-
|
797 |
-
|
798 |
-
|
799 |
-
|
800 |
-
|
801 |
-
|
802 |
-
|
803 |
-
|
804 |
-
|
805 |
-
|
806 |
-
|
807 |
-
|
808 |
-
|
809 |
-
|
810 |
-
|
811 |
-
|
812 |
-
|
813 |
-
|
814 |
-
|
815 |
-
|
816 |
-
|
817 |
-
|
818 |
-
|
819 |
-
|
820 |
-
|
821 |
-
|
822 |
-
|
823 |
-
|
824 |
-
|
825 |
-
|
826 |
-
# gr.Markdown(value="""**点击从头开始训练**将会自动将已有的训练进度保存到models_backup文件夹,并自动装载预训练模型。
|
827 |
-
# **继续上一次的训练进度**将从上一个保存模型的进度继续训练。继续训练进度无需重新预处理和写入配置文件。
|
828 |
-
# 关于扩散、聚类和特征检索的详细说明请看[此处](https://www.yuque.com/umoubuton/ueupp5/kmui02dszo5zrqkz)。
|
829 |
-
# """)
|
830 |
-
# with gr.Row():
|
831 |
-
# with gr.Column():
|
832 |
-
# start_training=gr.Button("从头开始训练", variant="primary")
|
833 |
-
# training_output=gr.Textbox(label="训练输出信息")
|
834 |
-
# with gr.Column():
|
835 |
-
# continue_training_btn=gr.Button("继续上一次的训练进度", variant="primary")
|
836 |
-
# continue_training_output=gr.Textbox(label="训练输出信息")
|
837 |
-
# with gr.Row():
|
838 |
-
# with gr.Column():
|
839 |
-
# diff_training_btn=gr.Button("从头训练扩散模型", variant="primary")
|
840 |
-
# diff_training_output=gr.Textbox(label="训练输出信息")
|
841 |
-
# with gr.Column():
|
842 |
-
# diff_continue_training_btn=gr.Button("继续训练扩散模型", variant="primary")
|
843 |
-
# diff_continue_training_output=gr.Textbox(label="训练输出信息")
|
844 |
-
# with gr.Accordion(label = "聚类、特征检索训练", open=False):
|
845 |
-
# with gr.Row():
|
846 |
-
# with gr.Column():
|
847 |
-
# kmeans_button=gr.Button("训练聚类模型", variant="primary")
|
848 |
-
# kmeans_gpu = gr.Checkbox(label="使用GPU训练", value=True)
|
849 |
-
# kmeans_output=gr.Textbox(label="训练输出信息")
|
850 |
-
# with gr.Column():
|
851 |
-
# index_button=gr.Button("训练特征检索模型", variant="primary")
|
852 |
-
# index_output=gr.Textbox(label="训练输出信息")
|
853 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
854 |
with gr.TabItem("小工具/实验室特性"):
|
855 |
gr.Markdown(value="""
|
856 |
### So-vits-svc 4.1 小工具/实验室特性
|
@@ -946,20 +1039,20 @@ with app:
|
|
946 |
compress_model_output = gr.Textbox(label="输出信息", value="")
|
947 |
|
948 |
compress_model_btn.click(model_compression, [model_to_compress], [compress_model_output])
|
949 |
-
|
950 |
-
|
951 |
-
|
952 |
-
|
953 |
-
|
954 |
-
|
955 |
-
|
956 |
-
|
957 |
-
|
958 |
-
|
959 |
-
|
960 |
-
|
961 |
-
|
962 |
-
|
963 |
with gr.Tabs():
|
964 |
with gr.Row(variant="panel"):
|
965 |
with gr.Column():
|
@@ -970,4 +1063,4 @@ with app:
|
|
970 |
|
971 |
debug_button.change(debug_change,[],[])
|
972 |
|
973 |
-
app.queue(concurrency_count=1022, max_size=2044).launch()
|
|
|
39 |
sovits_params = {}
|
40 |
diff_params = {}
|
41 |
|
42 |
+
loaded = None
|
43 |
+
|
44 |
def debug_change():
|
45 |
global debug
|
46 |
debug = debug_button.value
|
|
|
142 |
output_msg = f"模型被成功加载到了{device_name}上\n{index_or_kmeans}:{clu_load}\n扩散模型:{diff_load}"
|
143 |
return output_msg, gr.Dropdown.update(choices=spk_list, value=spk_choice), clip
|
144 |
|
145 |
+
def Newload_model_func(ckpt_name,cluster_name,config_name2,enhance2,diff_model_name2,diff_config_name2,only_diffusion2,encoder2,using_device2):
|
146 |
+
global model, loaded
|
147 |
+
config_name = config_name2.value
|
148 |
+
enhance = enhance2.value
|
149 |
+
diff_model_name = diff_model_name2.value
|
150 |
+
diff_config_name = (diff_config_name2).value
|
151 |
+
only_diffusion = (only_diffusion2).value
|
152 |
+
encoder = (encoder2).value
|
153 |
+
using_device = (using_device2).value
|
154 |
+
config_path = os.path.join(config_dir, config_name)
|
155 |
+
diff_config_path = os.path.join(config_dir, diff_config_name) if diff_config_name != "no_diff_config" else "configs/diffusion.yaml"
|
156 |
+
with open(config_path, 'r') as f:
|
157 |
+
config = json.load(f)
|
158 |
+
spk_dict = config["spk"]
|
159 |
+
spk_name = config.get('spk', None)
|
160 |
+
spk_choice = next(iter(spk_name)) if spk_name else "未检测到音色"
|
161 |
+
ckpt_path = os.path.join(workdir, ckpt_name)
|
162 |
+
_, _suffix = os.path.splitext(cluster_name)
|
163 |
+
fr = True if _suffix == ".pkl" else False #如果是pkl后缀就启用特征检索
|
164 |
+
cluster_path = os.path.join(workdir, cluster_name)
|
165 |
+
diff_model_path = os.path.join(diff_workdir, diff_model_name)
|
166 |
+
shallow_diffusion = True if diff_model_name != "no_diff" else False
|
167 |
+
use_spk_mix = False
|
168 |
+
device = None if using_device == "Auto" else using_device
|
169 |
+
model = Svc(ckpt_path,
|
170 |
+
config_path,
|
171 |
+
device,
|
172 |
+
cluster_path,
|
173 |
+
enhance,
|
174 |
+
diff_model_path,
|
175 |
+
diff_config_path,
|
176 |
+
shallow_diffusion,
|
177 |
+
only_diffusion,
|
178 |
+
use_spk_mix,
|
179 |
+
fr)
|
180 |
+
spk_list = list(spk_dict.keys())
|
181 |
+
clip = 25 if encoder == "Whisper-PPG" else 0 #Whisper必须强制切片25秒
|
182 |
+
device_name = torch.cuda.get_device_properties(model.dev).name if "cuda" in str(model.dev) else str(model.dev)
|
183 |
+
index_or_kmeans = "特征索引" if fr is True else "聚类模型"
|
184 |
+
clu_load = "未加载" if cluster_name == "no_clu" else cluster_name
|
185 |
+
diff_load = "未加载" if diff_model_name == "no_diff" else diff_model_name
|
186 |
+
loaded = cluster_name
|
187 |
+
#output_msg = f"模型被成功加载到了{device_name}上\n{index_or_kmeans}:{clu_load}\n扩散模型:{diff_load}"
|
188 |
+
#return output_msg, gr.Dropdown.update(choices=spk_list, value=spk_choice), clip
|
189 |
+
|
190 |
def get_file_options(directory, extension):
|
191 |
return [file for file in os.listdir(directory) if file.endswith(extension)]
|
192 |
|
|
|
709 |
sovits_params, diff_params = get_default_settings()
|
710 |
|
711 |
app = gr.Blocks()
|
712 |
+
|
713 |
+
def Newget_model_info(choice_ckpt2):
|
714 |
+
choice_ckpt = str(choice_ckpt2)
|
715 |
+
pthfile = os.path.join(workdir, choice_ckpt)
|
716 |
+
net = torch.load(pthfile, map_location=torch.device('cpu')) #cpu load
|
717 |
+
spk_emb = net["model"].get("emb_g.weight")
|
718 |
+
if spk_emb is None:
|
719 |
+
return "所选模型缺少emb_g.weight,你可能选择了一个底模"
|
720 |
+
_dim, _layer = spk_emb.size()
|
721 |
+
model_type = {
|
722 |
+
768: "Vec768-Layer12",
|
723 |
+
256: "Vec256-Layer9 / HubertSoft",
|
724 |
+
1024: "Whisper-PPG"
|
725 |
+
}
|
726 |
+
return gr.Textbox(visible=False, value=model_type.get(_layer, "不受支持的模型"))
|
727 |
+
|
728 |
with app:
|
729 |
gr.Markdown(value="""
|
730 |
### So-VITS-SVC 4.1-Stable
|
|
|
739 |
|
740 |
""")
|
741 |
with gr.Tabs():
|
742 |
+
with gr.TabItem("推理"):
|
743 |
+
#with gr.Row():
|
744 |
+
# choice_ckpt = gr.Dropdown(label="模型选择", choices=ckpt_list, value="no_model")
|
745 |
+
# model_branch = gr.Textbox(label="模型编码器", placeholder="请先选择模型", interactive=False)
|
746 |
+
#choice_ckpt = gr.Dropdown(value="G_82400.pth", visible=False)
|
747 |
+
#with gr.Row():
|
748 |
+
# config_choice = gr.Dropdown(label="配置文件", choices=config_list, value="no_config")
|
749 |
+
# config_info = gr.Textbox(label="配置文件编码器", placeholder="请选择配置文件")
|
750 |
+
config_choice = gr.Dropdown(value="config.json", visible=False)
|
751 |
+
#gr.Markdown(value="""**请检查模型和配置文件的编码器是否匹配**""")
|
752 |
+
#with gr.Row():
|
753 |
+
# diff_choice = gr.Dropdown(label="(可选)选择扩散模型", choices=diff_list, value="no_diff", interactive=True)
|
754 |
+
# diff_config_choice = gr.Dropdown(label="扩散模型配置文件", choices=diff_config_list, value="no_diff_config", interactive=True)
|
755 |
+
diff_choice = gr.Dropdown(value="no_diff", visible=False)
|
756 |
+
diff_config_choice = gr.Dropdown(value="no_diff_config", visible=False)
|
757 |
with gr.Row():
|
758 |
+
cluster_choice = gr.Dropdown(label="(可选)选择聚类模型/特征检索模型", choices=cluster_list, value="no_clu")
|
|
|
|
|
|
|
|
|
|
|
|
|
759 |
with gr.Row():
|
760 |
enhance = gr.Checkbox(label="是否使用NSF_HIFIGAN增强,该选项对部分训练集少的模型有一定的音质增强效果,但是对训练好的模型有反面效果,默认关闭", value=False)
|
761 |
+
#only_diffusion = gr.Checkbox(label="是否使用全扩散推理,开启后将不使用So-VITS模型,仅使用扩散模型进行完整扩散推理,默认关闭", value=False)
|
762 |
+
only_diffusion = gr.Checkbox(value=False, visible=False)
|
763 |
+
#using_device = gr.Dropdown(label="推理设备,默认为自动选择", choices=["Auto","cuda","cpu"], value="Auto")
|
764 |
+
using_device = gr.Dropdown(value='Auto', visible=False)
|
765 |
+
#refresh = gr.Button("刷新选项")
|
766 |
+
#loadckpt = gr.Button("加载模型", variant="primary")
|
767 |
+
#with gr.Row():
|
768 |
+
# model_message = gr.Textbox(label="Output Message")
|
769 |
+
# sid = gr.Dropdown(label="So-VITS说话人", value="speaker0")
|
770 |
+
sid = gr.Dropdown(value="1001", visible=False)
|
771 |
|
772 |
+
#choice_ckpt.change(get_model_info, [choice_ckpt], [model_branch])
|
773 |
+
model_branch = Newget_model_info("G_82400.pth")
|
774 |
+
#config_choice.change(load_json_encoder, [config_choice], [config_info])
|
775 |
+
#refresh.click(refresh_options,[],[choice_ckpt,config_choice,cluster_choice,diff_choice,diff_config_choice])
|
776 |
|
777 |
gr.Markdown(value="""
|
778 |
请稍等片刻,模型加载大约需要10秒。后续操作不需要重新加载模型
|
|
|
827 |
vc_tts_submit = gr.Button("文本转语音", variant="primary")
|
828 |
vc_output1 = gr.Textbox(label="Output Message")
|
829 |
vc_output2 = gr.Audio(label="Output Audio")
|
830 |
+
|
831 |
+
def Newvc_fn(sid, input_audio, vc_transform, auto_f0, cluster_ratio, slice_db, noise_scale, pad_seconds, cl_num, lg_num, lgr_num, f0_predictor, enhancer_adaptive_key, cr_threshold, k_step, use_spk_mix, second_encoding, loudness_envelope_adjustment, clus2):
|
832 |
+
global model, loaded
|
833 |
+
if loaded != clus2:
|
834 |
+
Newload_model_func("G_82400.pth",clus2,config_choice,enhance,diff_choice,diff_config_choice,only_diffusion,model_branch,using_device)
|
835 |
+
loaded = clus2
|
836 |
+
try:
|
837 |
+
if input_audio is None:
|
838 |
+
return "You need to upload an audio", None
|
839 |
+
if model is None:
|
840 |
+
return "You need to upload an model", None
|
841 |
+
sampling_rate, audio = input_audio
|
842 |
+
temp_path = "temp.wav"
|
843 |
+
sf.write(temp_path, audio, sampling_rate, format="wav")
|
844 |
+
output_file_path = vc_infer(sid, audio, temp_path, vc_transform, auto_f0, cluster_ratio, slice_db, noise_scale, pad_seconds, cl_num, lg_num, lgr_num, f0_predictor, enhancer_adaptive_key, cr_threshold, k_step, use_spk_mix, second_encoding, loudness_envelope_adjustment)
|
845 |
+
os.remove(temp_path)
|
846 |
+
return "Success", output_file_path
|
847 |
+
except Exception as e:
|
848 |
+
if debug: traceback.print_exc()
|
849 |
+
raise gr.Error(e)
|
850 |
|
851 |
+
#loadckpt.click(load_model_func,[choice_ckpt,cluster_choice,config_choice,enhance,diff_choice,diff_config_choice,only_diffusion,model_branch,using_device],[model_message, sid, cl_num])
|
852 |
+
vc_submit.click(Newvc_fn, [sid, vc_input3, vc_transform,auto_f0,cluster_ratio, slice_db, noise_scale,pad_seconds,cl_num,lg_num,lgr_num,f0_predictor,enhancer_adaptive_key,cr_threshold,k_step,use_spk_mix,second_encoding,loudness_envelope_adjustment,cluster_choice], [vc_output1, vc_output2])
|
853 |
vc_batch_submit.click(vc_batch_fn, [sid, vc_batch_files, vc_transform,auto_f0,cluster_ratio, slice_db, noise_scale,pad_seconds,cl_num,lg_num,lgr_num,f0_predictor,enhancer_adaptive_key,cr_threshold,k_step,use_spk_mix,second_encoding,loudness_envelope_adjustment], [vc_output1])
|
854 |
vc_tts_submit.click(tts_fn, [text_input, tts_spk, sid, vc_transform,auto_f0,cluster_ratio, slice_db, noise_scale,pad_seconds,cl_num,lg_num,lgr_num,f0_predictor,enhancer_adaptive_key,cr_threshold,k_step,use_spk_mix,second_encoding,loudness_envelope_adjustment], [vc_output1, vc_output2])
|
855 |
+
"""
|
856 |
+
with gr.TabItem("训练"):
|
857 |
+
gr.Markdown(value="""请将数据集文件夹放置在dataset_raw文件夹下,确认放置正确后点击下方获取数据集名称""")
|
858 |
+
raw_dirs_list=gr.Textbox(label="Raw dataset directory(s):")
|
859 |
+
get_raw_dirs=gr.Button("识别数据集", variant="primary")
|
860 |
+
gr.Markdown(value="""确认数据集正确识别后请选择训练使用的特征编码器和f0预测器,**如果要训练扩散模型,请选择Vec768l12或hubertsoft,并确保So-VITS和扩散模型使用同一个编码器**""")
|
861 |
+
with gr.Row():
|
862 |
+
gr.Markdown(value="""**vec256l9**: ContentVec(256Layer9),旧版本叫v1,So-VITS-SVC 4.0的基础版本,**暂不支持扩散模型**
|
863 |
+
**vec768l12**: 特征输入更换为ContentVec的第12层Transformer输出,模型理论上会更加还原训练集音色
|
864 |
+
**hubertsoft**: So-VITS-SVC 3.0使用的编码器,咬字更为准确,但可能存在多说话人音色泄露问题
|
865 |
+
**whisper-ppg**: 来自OpenAI,咬字最为准确,但和Hubertsoft一样存在多说话人音色泄露,且显存占用和训练时间有明显增加。**暂不支持扩散模型**
|
866 |
+
""")
|
867 |
+
gr.Markdown(value="""**crepe**: 抗噪能力最强,但预处理速度慢(不过如果你的显卡很强的话速度会很快)
|
868 |
+
**pm**: 预处理速度快,但抗噪能力较弱
|
869 |
+
**dio**: 先前版本预处理默认使用的f0预测器
|
870 |
+
**harvest**: 有一定抗噪能力,预处理显存占用友好,速度比较慢
|
871 |
+
""")
|
872 |
+
with gr.Row():
|
873 |
+
branch_selection = gr.Radio(label="选择训练使用的编码器", choices=["vec256l9","vec768l12","hubertsoft","whisper-ppg"], value="vec768l12", interactive=True)
|
874 |
+
f0_predictor_selection = gr.Radio(label="选择训练使用的f0预测器", choices=["crepe","pm","dio","harvest"], value="crepe", interactive=True)
|
875 |
+
use_diff = gr.Checkbox(label="是否使用浅扩散模型,如要训练浅扩散模型请勾选此项", value=True)
|
876 |
+
vol_aug=gr.Checkbox(label="是否启用响度嵌入和音量增强,启用后可以根据输入源控制输出响度,但对数据集质量的要求更高。**仅支持vec768l12编码器**", value=False)
|
877 |
+
with gr.Row():
|
878 |
+
skip_loudnorm = gr.Checkbox(label="是否跳过响度匹配,如果你已经用音频处理软件做过响度匹配,请勾选此处")
|
879 |
+
num_processes = gr.Slider(label="预处理使用的CPU线程数,可以大幅加快预处理速度,但线程数过大容易爆显存,建议12G显存设置为2", minimum=1, maximum=multiprocessing.cpu_count(), value=1, step=1)
|
880 |
+
with gr.Row():
|
881 |
+
raw_preprocess=gr.Button("数据预处理", variant="primary")
|
882 |
+
regenerate_config_btn=gr.Button("重新生成配置文件", variant="primary")
|
883 |
+
preprocess_output=gr.Textbox(label="预处理输出信息,完成后请检查一下是否有报错信息,如无则可以进行下一步", max_lines=999)
|
884 |
+
clear_preprocess_output=gr.Button("清空输出信息")
|
885 |
+
with gr.Group():
|
886 |
+
gr.Markdown(value="""填写训练设置和超参数""")
|
887 |
+
with gr.Row():
|
888 |
+
gr.Textbox(label="当前使用显卡信息", value=gpu_info)
|
889 |
+
gpu_selection=gr.Textbox(label="多卡用户请指定希望训练使用的显卡ID(0,1,2...)", value=gpus, interactive=True)
|
890 |
+
with gr.Row():
|
891 |
+
log_interval=gr.Textbox(label="每隔多少步(steps)生成一次评估日志", value=sovits_params['log_interval'])
|
892 |
+
eval_interval=gr.Textbox(label="每隔多少步(steps)验证并保存一次模型", value=sovits_params['eval_interval'])
|
893 |
+
keep_ckpts=gr.Textbox(label="仅保留最新的X个模型,超出该数字的旧模型会被删除。设置为0则永不删除", value=sovits_params['keep_ckpts'])
|
894 |
+
with gr.Row():
|
895 |
+
batch_size=gr.Textbox(label="批量大小,每步取多少条数据进行训练,大batch有助于训练但显著增加显存占用。6G显存建议设定为4", value=sovits_params['batch_size'])
|
896 |
+
lr=gr.Textbox(label="学习率,一般不用动,批量大小较大时可以适当增大学习率,但强烈不建议超过0.0002,有炸炉风险", value=sovits_params['learning_rate'])
|
897 |
+
fp16_run=gr.Checkbox(label="是否使用fp16混合精度训练,fp16训练可能降低显存占用和训练时间,但对模型质量的影响尚未查证", value=sovits_params['fp16_run'])
|
898 |
+
all_in_mem=gr.Checkbox(label="是否加载所有数据集到内存中,硬盘IO过于低下、同时内存容量远大于数据集体积时可以启用,能显著加快训练速度", value=sovits_params['all_in_mem'])
|
899 |
+
with gr.Row():
|
900 |
+
gr.Markdown("请检查右侧的说话人列表是否和你要训练的目标说话人一致,确认无误后点击写入配置文件,然后就可以开始训练了")
|
901 |
+
speakers=gr.Textbox(label="说话人列表")
|
902 |
+
with gr.Accordion(label = "扩散模型配置(训练扩散模型需要写入此处)", open=True):
|
903 |
+
with gr.Row():
|
904 |
+
diff_num_workers = gr.Number(label="num_workers, 如果你的电脑配置较高,可以将这里设置为0加快训练速度", value=diff_params['num_workers'])
|
905 |
+
diff_cache_all_data = gr.Checkbox(label="是否缓存数据,启用后可以加快训练速度,关闭后可以节省显存或内存,但会减慢训练速度", value=diff_params['cache_all_data'])
|
906 |
+
diff_cache_device = gr.Radio(label="若启用缓存数据,使用显存(cuda)还是内存(cpu)缓存,如果显卡显存充足,选择cuda以加快训练速度", choices=["cuda","cpu"], value=diff_params['cache_device'])
|
907 |
+
diff_amp_dtype = gr.Radio(label="训练数据类型,fp16可能会有更快的训练速度,前提是你的显卡支持", choices=["fp32","fp16"], value=diff_params['amp_dtype'])
|
908 |
+
with gr.Row():
|
909 |
+
diff_batch_size = gr.Number(label="批量大小(batch_size),根据显卡显存设置,小显存适当降低该项,6G显存可以设定为48,但该数值不要超过数据集总数量的1/4", value=diff_params['diff_batch_size'])
|
910 |
+
diff_lr = gr.Number(label="学习率(一般不需要动)", value=diff_params['diff_lr'])
|
911 |
+
diff_interval_log = gr.Number(label="每隔多少步(steps)生成一次评估日志", value = diff_params['diff_interval_log'])
|
912 |
+
diff_interval_val = gr.Number(label="每隔多少步(steps)验证并保存一次模型,如果你的批量大小较大,可以适当减少这里的数字,但不建议设置为1000以下", value=diff_params['diff_interval_val'])
|
913 |
+
diff_force_save = gr.Number(label="每隔多少步强制保留模型,只有该步数的倍数保存的模型会被保留,其余会被删除。设置为与验证步数相同的值则每个模型都会被保留", value=diff_params['diff_force_save'])
|
914 |
+
with gr.Row():
|
915 |
+
save_params=gr.Button("将当前设置保存为默认设置", variant="primary")
|
916 |
+
write_config=gr.Button("写入配置文件", variant="primary")
|
917 |
+
write_config_output=gr.Textbox(label="输出信息")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
918 |
|
919 |
+
gr.Markdown(value="""**点击从头开始训练**将会自动将已有的训练进度保存到models_backup文件夹,并自动装载预训练模型。
|
920 |
+
**继续上一次的训练进度**将从上一个保存模型的进度继续训练。继续训练进度无需重新预处理和写入配置文件。
|
921 |
+
关于扩散、聚类和特征检索的详细说明请看[此处](https://www.yuque.com/umoubuton/ueupp5/kmui02dszo5zrqkz)。
|
922 |
+
""")
|
923 |
+
with gr.Row():
|
924 |
+
with gr.Column():
|
925 |
+
start_training=gr.Button("从头开始训练", variant="primary")
|
926 |
+
training_output=gr.Textbox(label="训练输出信息")
|
927 |
+
with gr.Column():
|
928 |
+
continue_training_btn=gr.Button("继续上一次的训练进度", variant="primary")
|
929 |
+
continue_training_output=gr.Textbox(label="训练输出信息")
|
930 |
+
with gr.Row():
|
931 |
+
with gr.Column():
|
932 |
+
diff_training_btn=gr.Button("从头训练扩散模型", variant="primary")
|
933 |
+
diff_training_output=gr.Textbox(label="训练输出信息")
|
934 |
+
with gr.Column():
|
935 |
+
diff_continue_training_btn=gr.Button("继续训练扩散模型", variant="primary")
|
936 |
+
diff_continue_training_output=gr.Textbox(label="训练输出信息")
|
937 |
+
with gr.Accordion(label = "聚类、特征检索训练", open=False):
|
938 |
+
with gr.Row():
|
939 |
+
with gr.Column():
|
940 |
+
kmeans_button=gr.Button("训练聚类模型", variant="primary")
|
941 |
+
kmeans_gpu = gr.Checkbox(label="使用GPU训练", value=True)
|
942 |
+
kmeans_output=gr.Textbox(label="训练输出信息")
|
943 |
+
with gr.Column():
|
944 |
+
index_button=gr.Button("训练特征检索模型", variant="primary")
|
945 |
+
index_output=gr.Textbox(label="训练输出信息")
|
946 |
+
"""
|
947 |
with gr.TabItem("小工具/实验室特性"):
|
948 |
gr.Markdown(value="""
|
949 |
### So-vits-svc 4.1 小工具/实验室特性
|
|
|
1039 |
compress_model_output = gr.Textbox(label="输出信息", value="")
|
1040 |
|
1041 |
compress_model_btn.click(model_compression, [model_to_compress], [compress_model_output])
|
1042 |
+
"""
|
1043 |
+
get_raw_dirs.click(load_raw_dirs,[],[raw_dirs_list])
|
1044 |
+
raw_preprocess.click(dataset_preprocess,[branch_selection, f0_predictor_selection, use_diff, vol_aug, skip_loudnorm, num_processes],[preprocess_output, speakers])
|
1045 |
+
regenerate_config_btn.click(regenerate_config,[branch_selection, vol_aug],[preprocess_output])
|
1046 |
+
clear_preprocess_output.click(clear_output,[],[preprocess_output])
|
1047 |
+
save_params.click(save_default_settings, [log_interval,eval_interval,keep_ckpts,batch_size,lr,fp16_run,all_in_mem,diff_num_workers,diff_cache_all_data,diff_cache_device,diff_amp_dtype,diff_batch_size,diff_lr,diff_interval_log,diff_interval_val,diff_force_save], [write_config_output])
|
1048 |
+
write_config.click(config_fn,[log_interval, eval_interval, keep_ckpts, batch_size, lr, fp16_run, all_in_mem, diff_num_workers, diff_cache_all_data, diff_batch_size, diff_lr, diff_interval_log, diff_interval_val, diff_cache_device, diff_amp_dtype, diff_force_save],[write_config_output])
|
1049 |
+
start_training.click(training,[gpu_selection, branch_selection],[training_output])
|
1050 |
+
diff_training_btn.click(diff_training,[branch_selection],[diff_training_output])
|
1051 |
+
continue_training_btn.click(continue_training,[gpu_selection, branch_selection],[continue_training_output])
|
1052 |
+
diff_continue_training_btn.click(diff_continue_training,[branch_selection],[diff_continue_training_output])
|
1053 |
+
kmeans_button.click(kmeans_training,[kmeans_gpu],[kmeans_output])
|
1054 |
+
index_button.click(index_training, [], [index_output])
|
1055 |
+
"""
|
1056 |
with gr.Tabs():
|
1057 |
with gr.Row(variant="panel"):
|
1058 |
with gr.Column():
|
|
|
1063 |
|
1064 |
debug_button.change(debug_change,[],[])
|
1065 |
|
1066 |
+
app.queue(concurrency_count=1022, max_size=2044).launch(share=True)
|