import os import yaml from addict import Dict from ml_collections import ConfigDict from gui_data.constants import DEMUCS_2_SOURCE, DEMUCS_2_SOURCE_MAPPER, VR_ARCH_TYPE, MDX_ARCH_TYPE, DEMUCS_ARCH_TYPE BASE_PATH = os.path.dirname(os.path.abspath(__file__)) MODELS_DIR = os.path.join(BASE_PATH, 'models') VR_MODELS_DIR = os.path.join(MODELS_DIR, 'VR_Models') MDX_MODELS_DIR = os.path.join(MODELS_DIR, 'MDX_Net_Models') DEMUCS_MODELS_DIR = os.path.join(MODELS_DIR, 'Demucs_Models') DEMUCS_NEWER_REPO_DIR = os.path.join(DEMUCS_MODELS_DIR, 'v3_v4_repo') MDX_MIXER_PATH = os.path.join(BASE_PATH, 'lib_v5', 'mixer.ckpt') DENOISER_MODEL_PATH = os.path.join(VR_MODELS_DIR, 'UVR-DeNoise-Lite.pth') DEVERBER_MODEL_PATH = os.path.join(VR_MODELS_DIR, 'UVR-DeEcho-DeReverb.pth') # Cache & Parameters VR_HASH_DIR = os.path.join(VR_MODELS_DIR, 'model_data') VR_HASH_JSON = os.path.join(VR_MODELS_DIR, 'model_data', 'model_data.json') MDX_HASH_DIR = os.path.join(MDX_MODELS_DIR, 'model_data') MDX_HASH_JSON = os.path.join(MDX_HASH_DIR, 'model_data.json') MDX_C_CONFIG_PATH = os.path.join(MDX_HASH_DIR, 'mdx_c_configs') vr_cache_source_mapper = {} mdx_cache_source_mapper = {} demucs_cache_source_mapper = {} def cached_source_callback(process_method, model_name=None): model, sources = None, None if process_method == VR_ARCH_TYPE: mapper = vr_cache_source_mapper if process_method == MDX_ARCH_TYPE: mapper = mdx_cache_source_mapper if process_method == DEMUCS_ARCH_TYPE: mapper = demucs_cache_source_mapper for key, value in mapper.items(): if model_name in key: model = key sources = value return model, sources def cached_model_source_holder(self, process_method, sources, model_name=None): if process_method == VR_ARCH_TYPE: self.vr_cache_source_mapper = {**vr_cache_source_mapper, **{model_name: sources}} if process_method == MDX_ARCH_TYPE: self.mdx_cache_source_mapper = {**mdx_cache_source_mapper, **{model_name: sources}} if process_method == DEMUCS_ARCH_TYPE: self.demucs_cache_source_mapper = {**demucs_cache_source_mapper, **{model_name: sources}} config_path = os.path.join(MDX_C_CONFIG_PATH, 'model_2_stem_full_band_8k.yaml') with open(config_path) as f: config = ConfigDict(yaml.load(f, Loader=yaml.FullLoader)) mdx23c_8kfft_instvoc_hq_model_data = { 'DENOISER_MODEL': DENOISER_MODEL_PATH, 'DEVERBER_MODEL': DEVERBER_MODEL_PATH, 'all_models': [], 'bv_model_rebalance': 0, 'chunks': 0, 'compensate': None, 'demucs_4_stem_added_count': 0, 'demucs_source_list': [], 'demucs_stem_count': 0, 'demucs_stems': 'All Stems', 'deverb_vocal_opt': 'Vocals', 'device_set': 'Default', 'ensemble_primary_stem': 'Vocals', 'ensemble_secondary_stem': 'Instrumental', 'is_4_stem_ensemble': False, 'is_bv_model': False, 'is_change_def': False, 'is_demucs_4_stem_secondaries': False, 'is_demucs_combine_stems': True, 'is_demucs_pre_proc_model_inst_mix': False, 'is_denoise': False, 'is_denoise_model': False, 'is_deverb_vocals': False, 'is_dry_check': False, 'is_ensemble_mode': True, 'is_get_hash_dir_only': False, 'is_gpu_conversion': 0, 'is_inst_only_voc_splitter': False, 'is_invert_spec': False, 'is_karaoke': False, 'is_match_frequency_pitch': True, 'is_mdx_c': True, 'is_mdx_c_seg_def': False, 'is_mdx_ckpt': False, 'is_mdx_combine_stems': True, 'is_mixer_mode': False, 'is_multi_stem_ensemble': False, 'is_normalization': False, 'is_pitch_change': False, 'is_pre_proc_model': False, 'is_primary_model_primary_stem_only': False, 'is_primary_model_secondary_stem_only': False, 'is_primary_stem_only': False, 'is_save_inst_vocal_splitter': False, 'is_save_vocal_only': False, 'is_sec_bv_rebalance': False, 'is_secondary_model': False, 'is_secondary_model_activated': False, 'is_secondary_stem_only': False, 'is_use_opencl': False, 'is_vocal_split_model': False, 'is_vocal_split_model_activated': False, 'is_vr_51_model': False, 'manual_download_Button': None, 'margin': 44100, 'mdx_batch_size': 1, 'mdx_c_configs': config, 'mdx_dim_f_set': None, 'mdx_dim_t_set': None, 'mdx_model_stems': ['Vocals', 'Instrumental'], 'mdx_n_fft_scale_set': None, 'mdx_segment_size': 256, 'mdx_stem_count': 2, 'mdxnet_stem_select': 'Vocals', 'mixer_path': f'{MDX_MODELS_DIR}/mixer_val.ckpt', 'model_and_process_tag': 'MDX-Net: MDX23C-InstVoc HQ', 'model_basename': 'MDX23C-8KFFT-InstVoc_HQ', 'model_capacity': (32, 128), 'model_data': {'config_yaml': 'model_2_stem_full_band_8k.yaml'}, 'model_hash': '99b6ceaae542265a3b6d657bf9fde79f', 'model_hash_dir': f'{MDX_MODELS_DIR}/model_data/99b6ceaae542265a3b6d657bf9fde79f.json', 'model_name': 'MDX23C-InstVoc HQ', 'model_path': f'{MDX_MODELS_DIR}/MDX23C-8KFFT-InstVoc_HQ.ckpt', 'model_samplerate': 44100, 'model_status': True, 'mp3_bit_set': '320k', 'overlap': 0.25, 'overlap_mdx': 'Default', 'overlap_mdx23': 8, 'pre_proc_model': None, 'pre_proc_model_activated': False, 'primary_model_primary_stem': None, 'primary_stem': 'Vocals', 'primary_stem_native': None, 'process_method': 'MDX-Net', 'save_format': 'WAV', 'secondary_model': None, 'secondary_model_4_stem': [], 'secondary_model_4_stem_model_names_list': [], 'secondary_model_4_stem_names': [], 'secondary_model_4_stem_scale': [], 'secondary_model_bass': None, 'secondary_model_drums': None, 'secondary_model_other': None, 'secondary_model_scale': None, 'secondary_model_scale_bass': None, 'secondary_model_scale_drums': None, 'secondary_model_scale_other': None, 'secondary_stem': 'Instrumental', 'semitone_shift': 0.0, 'vocal_split_model': None, 'wav_type_set': 'PCM_16'} mdx23c_8kfft_instvoc_hq_process_data = { 'model_name': 'MDX23C Model: MDX23C-InstVoc HQ', 'model_data': Dict(mdx23c_8kfft_instvoc_hq_model_data), 'export_path': None, 'audio_file_base': None, 'audio_file': None, 'set_progress_bar': lambda step, inference_iterations=0: print( f"iteration {inference_iterations} of step #{step}"), 'write_to_console': lambda progress_text, base_text='': print( f"{progress_text} {base_text} @ MDX23C Model: MDX23C-InstVoc HQ"), 'process_iteration': lambda iteration: iteration + 1, 'cached_source_callback': cached_source_callback, 'cached_model_source_holder': cached_model_source_holder, 'list_all_models': ['MDX23C-8KFFT-InstVoc_HQ', 'UVR-MDX-NET-Voc_FT', 'htdemucs_ft'], 'is_ensemble_master': True, 'is_4_stem_ensemble': False} uvr_mdx_net_voc_ft_model_data = { 'DENOISER_MODEL': DENOISER_MODEL_PATH, 'DEVERBER_MODEL': DEVERBER_MODEL_PATH, 'all_models': [], 'bv_model_rebalance': 0, 'chunks': 0, 'compensate': 1.021, 'demucs_4_stem_added_count': 0, 'demucs_source_list': [], 'demucs_stem_count': 0, 'demucs_stems': 'All Stems', 'deverb_vocal_opt': 'Vocals', 'device_set': 'Default', 'ensemble_primary_stem': 'Vocals', 'ensemble_secondary_stem': 'Instrumental', 'is_4_stem_ensemble': False, 'is_bv_model': False, 'is_change_def': False, 'is_demucs_4_stem_secondaries': False, 'is_demucs_combine_stems': True, 'is_demucs_pre_proc_model_inst_mix': False, 'is_denoise': False, 'is_denoise_model': False, 'is_deverb_vocals': False, 'is_dry_check': False, 'is_ensemble_mode': True, 'is_get_hash_dir_only': False, 'is_gpu_conversion': 0, 'is_inst_only_voc_splitter': False, 'is_invert_spec': False, 'is_karaoke': False, 'is_match_frequency_pitch': True, 'is_mdx_c': False, 'is_mdx_c_seg_def': False, 'is_mdx_ckpt': False, 'is_mdx_combine_stems': True, 'is_mixer_mode': False, 'is_multi_stem_ensemble': False, 'is_normalization': False, 'is_pitch_change': False, 'is_pre_proc_model': False, 'is_primary_model_primary_stem_only': False, 'is_primary_model_secondary_stem_only': False, 'is_primary_stem_only': False, 'is_save_inst_vocal_splitter': False, 'is_save_vocal_only': False, 'is_sec_bv_rebalance': False, 'is_secondary_model': False, 'is_secondary_model_activated': False, 'is_secondary_stem_only': False, 'is_use_opencl': False, 'is_vocal_split_model': False, 'is_vocal_split_model_activated': False, 'is_vr_51_model': False, 'manual_download_Button': None, 'margin': 44100, 'mdx_batch_size': 1, 'window_size': 512, 'mdx_c_configs': None, 'mdx_dim_f_set': 3072, 'mdx_dim_t_set': 8, 'mdx_model_stems': [], 'mdx_n_fft_scale_set': 7680, 'mdx_segment_size': 256, 'mdx_stem_count': 1, 'mdxnet_stem_select': 'All Stems', 'mixer_path': f'{MDX_MODELS_DIR}/mixer_val.ckpt', 'model_and_process_tag': 'MDX-Net: UVR-MDX-NET-Voc_FT', 'model_basename': 'UVR-MDX-NET-Voc_FT', 'model_capacity': (32, 128), 'model_data': {'compensate': 1.021, 'mdx_dim_f_set': 3072, 'mdx_dim_t_set': 8, 'mdx_n_fft_scale_set': 7680, 'primary_stem': 'Vocals'}, 'model_hash': '77d07b2667ddf05b9e3175941b4454a0', 'model_hash_dir': f'{MDX_MODELS_DIR}/model_data/77d07b2667ddf05b9e3175941b4454a0.json', 'model_name': 'UVR-MDX-NET-Voc_FT', 'model_path': f'{MDX_MODELS_DIR}/UVR-MDX-NET-Voc_FT.onnx', 'model_samplerate': 44100, 'model_status': True, 'mp3_bit_set': '320k', 'overlap': 0.25, 'overlap_mdx': 'Default', 'overlap_mdx23': 8, 'pre_proc_model': None, 'pre_proc_model_activated': False, 'primary_model_primary_stem': None, 'primary_stem': 'Vocals', 'primary_stem_native': 'Vocals', 'process_method': 'MDX-Net', 'save_format': 'WAV', 'secondary_model': None, 'secondary_model_4_stem': [], 'secondary_model_4_stem_model_names_list': [], 'secondary_model_4_stem_names': [], 'secondary_model_4_stem_scale': [], 'secondary_model_bass': None, 'secondary_model_drums': None, 'secondary_model_other': None, 'secondary_model_scale': None, 'secondary_model_scale_bass': None, 'secondary_model_scale_drums': None, 'secondary_model_scale_other': None, 'secondary_stem': 'Instrumental', 'semitone_shift': 0.0, 'vocal_split_model': None, 'wav_type_set': 'PCM_16'} uvr_mdx_net_voc_ft_process_data = { 'model_name': 'MDX-Net Model: UVR-MDX-NET Voc FT', 'model_data': Dict(uvr_mdx_net_voc_ft_model_data), 'export_path': None, 'audio_file_base': None, 'audio_file': None, 'set_progress_bar': lambda step, inference_iterations=0: print( f"iteration {inference_iterations} of step #{step}"), 'write_to_console': lambda progress_text, base_text='': print( f"{progress_text} {base_text} @ MDX-Net Model: UVR-MDX-NET Voc FT"), 'process_iteration': lambda iteration: iteration + 1, 'cached_source_callback': cached_source_callback, 'cached_model_source_holder': cached_model_source_holder, 'list_all_models': ['MDX23C-8KFFT-InstVoc_HQ', 'UVR-MDX-NET-Voc_FT', 'htdemucs_ft'], 'is_ensemble_master': True, 'is_4_stem_ensemble': False} htdemucs_ft_model_data = { 'DENOISER_MODEL': DENOISER_MODEL_PATH, 'DEVERBER_MODEL': DEVERBER_MODEL_PATH, 'all_models': [], 'bv_model_rebalance': 0, 'chunks_demucs': 0, 'compensate': None, 'demucs_4_stem_added_count': 0, 'demucs_source_list': DEMUCS_2_SOURCE, 'demucs_source_map': DEMUCS_2_SOURCE_MAPPER, 'demucs_stem_count': 2, 'demucs_stems': 'All Stems', 'demucs_version': 'v4', 'deverb_vocal_opt': 'Vocals', 'device_set': 'Default', 'ensemble_primary_stem': 'Vocals', 'ensemble_secondary_stem': 'Instrumental', 'is_4_stem_ensemble': False, 'is_bv_model': False, 'is_change_def': False, 'is_chunk_demucs': False, 'is_demucs_4_stem_secondaries': False, 'is_demucs_combine_stems': True, 'is_demucs_pre_proc_model_inst_mix': False, 'is_denoise': False, 'is_denoise_model': False, 'is_deverb_vocals': False, 'is_dry_check': False, 'is_ensemble_mode': True, 'is_get_hash_dir_only': False, 'is_gpu_conversion': 0, 'is_inst_only_voc_splitter': False, 'is_invert_spec': False, 'is_karaoke': False, 'is_match_frequency_pitch': True, 'is_mdx_c': False, 'is_mdx_c_seg_def': False, 'is_mdx_ckpt': False, 'is_mdx_combine_stems': True, 'is_mixer_mode': False, 'is_multi_stem_ensemble': False, 'is_normalization': False, 'is_pitch_change': False, 'is_pre_proc_model': False, 'is_primary_model_primary_stem_only': False, 'is_primary_model_secondary_stem_only': False, 'is_primary_stem_only': False, 'is_save_inst_vocal_splitter': False, 'is_save_vocal_only': False, 'is_sec_bv_rebalance': False, 'is_secondary_model': False, 'is_secondary_model_activated': False, 'is_secondary_stem_only': False, 'is_split_mode': True, 'is_use_opencl': False, 'is_vocal_split_model': False, 'is_vocal_split_model_activated': False, 'is_vr_51_model': False, 'manual_download_Button': None, 'margin_demucs': 44100, 'mdx_batch_size': 1, 'mdx_c_configs': None, 'mdx_dim_f_set': None, 'mdx_dim_t_set': None, 'mdx_model_stems': [], 'mdx_n_fft_scale_set': None, 'mdx_stem_count': 1, 'mdxnet_stem_select': 'All Stems', 'mixer_path': MDX_MIXER_PATH, 'model_and_process_tag': 'Demucs: v4 | htdemucs_ft', 'model_basename': 'htdemucs_ft', 'model_capacity': (32, 128), 'model_hash_dir': None, 'model_name': 'v4 | htdemucs_ft', 'model_path': f'{DEMUCS_NEWER_REPO_DIR}/htdemucs_ft.yaml', 'model_samplerate': 44100, 'model_status': True, 'mp3_bit_set': '320k', 'overlap': 0.25, 'overlap_mdx': 'Default', 'overlap_mdx23': 8, 'pre_proc_model': None, 'pre_proc_model_activated': False, 'primary_model_primary_stem': None, 'primary_stem': None, 'primary_stem_native': None, 'process_method': 'Demucs', 'save_format': 'WAV', 'secondary_model': None, 'secondary_model_4_stem': [], 'secondary_model_4_stem_model_names_list': [], 'secondary_model_4_stem_names': [], 'secondary_model_4_stem_scale': [], 'secondary_model_bass': None, 'secondary_model_drums': None, 'secondary_model_other': None, 'secondary_model_scale': None, 'secondary_model_scale_bass': None, 'secondary_model_scale_drums': None, 'secondary_model_scale_other': None, 'secondary_stem': None, 'segment': 'Default', 'semitone_shift': 0.0, 'shifts': 2, 'vocal_split_model': None, 'wav_type_set': 'PCM_16'} htdemucs_ft_process_data = { 'model_name': 'Demucs v4: htdemucs_ft', 'model_data': Dict(htdemucs_ft_model_data), 'export_path': None, 'audio_file_base': None, 'audio_file': None, 'set_progress_bar': lambda step, inference_iterations=0: print( f"iteration {inference_iterations} of step #{step}"), 'write_to_console': lambda progress_text, base_text='': print( f"{progress_text} {base_text} @ Demucs v4: htdemucs_ft"), 'process_iteration': lambda iteration: iteration + 1, 'cached_source_callback': cached_source_callback, 'cached_model_source_holder': cached_model_source_holder, 'list_all_models': ['MDX23C-8KFFT-InstVoc_HQ', 'UVR-MDX-NET-Voc_FT', 'htdemucs_ft'], 'is_ensemble_master': True, 'is_4_stem_ensemble': False}