Eddycrack864 commited on
Commit
4372559
1 Parent(s): 929a717

Upload 10 files

Browse files
Files changed (9) hide show
  1. README.md +1 -1
  2. UVR.py +0 -0
  3. UVR_interface.py +852 -0
  4. __version__.py +4 -0
  5. app.py +3 -0
  6. packages.txt +3 -0
  7. requirements.txt +43 -0
  8. separate.py +942 -0
  9. webUI.py +285 -0
README.md CHANGED
@@ -4,7 +4,7 @@ emoji: ⚡
4
  colorFrom: indigo
5
  colorTo: purple
6
  sdk: gradio
7
- sdk_version: 4.1.2
8
  app_file: app.py
9
  pinned: false
10
  license: openrail
 
4
  colorFrom: indigo
5
  colorTo: purple
6
  sdk: gradio
7
+ sdk_version: 3.44.2
8
  app_file: app.py
9
  pinned: false
10
  license: openrail
UVR.py ADDED
The diff for this file is too large to render. See raw diff
 
UVR_interface.py ADDED
@@ -0,0 +1,852 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import audioread
2
+ import librosa
3
+
4
+ import os
5
+ import sys
6
+ import json
7
+ import time
8
+ from tqdm import tqdm
9
+ import pickle
10
+ import hashlib
11
+ import logging
12
+ import traceback
13
+ import shutil
14
+ import soundfile as sf
15
+
16
+ import torch
17
+
18
+ from gui_data.constants import *
19
+ from gui_data.old_data_check import file_check, remove_unneeded_yamls, remove_temps
20
+ from lib_v5.vr_network.model_param_init import ModelParameters
21
+ from lib_v5 import spec_utils
22
+ from pathlib import Path
23
+ from separate import SeperateAttributes, SeperateDemucs, SeperateMDX, SeperateVR, save_format
24
+ from typing import List
25
+
26
+
27
+ logging.basicConfig(format='%(asctime)s - %(message)s', level=logging.INFO)
28
+ logging.info('UVR BEGIN')
29
+
30
+ PREVIOUS_PATCH_WIN = 'UVR_Patch_1_12_23_14_54'
31
+
32
+ is_dnd_compatible = True
33
+ banner_placement = -2
34
+
35
+ def save_data(data):
36
+ """
37
+ Saves given data as a .pkl (pickle) file
38
+
39
+ Paramters:
40
+ data(dict):
41
+ Dictionary containing all the necessary data to save
42
+ """
43
+ # Open data file, create it if it does not exist
44
+ with open('data.pkl', 'wb') as data_file:
45
+ pickle.dump(data, data_file)
46
+
47
+ def load_data() -> dict:
48
+ """
49
+ Loads saved pkl file and returns the stored data
50
+
51
+ Returns(dict):
52
+ Dictionary containing all the saved data
53
+ """
54
+ try:
55
+ with open('data.pkl', 'rb') as data_file: # Open data file
56
+ data = pickle.load(data_file)
57
+
58
+ return data
59
+ except (ValueError, FileNotFoundError):
60
+ # Data File is corrupted or not found so recreate it
61
+
62
+ save_data(data=DEFAULT_DATA)
63
+
64
+ return load_data()
65
+
66
+ def load_model_hash_data(dictionary):
67
+ '''Get the model hash dictionary'''
68
+
69
+ with open(dictionary) as d:
70
+ data = d.read()
71
+
72
+ return json.loads(data)
73
+
74
+ # Change the current working directory to the directory
75
+ # this file sits in
76
+ if getattr(sys, 'frozen', False):
77
+ # If the application is run as a bundle, the PyInstaller bootloader
78
+ # extends the sys module by a flag frozen=True and sets the app
79
+ # path into variable _MEIPASS'.
80
+ BASE_PATH = sys._MEIPASS
81
+ else:
82
+ BASE_PATH = os.path.dirname(os.path.abspath(__file__))
83
+
84
+ os.chdir(BASE_PATH) # Change the current working directory to the base path
85
+
86
+ debugger = []
87
+
88
+ #--Constants--
89
+ #Models
90
+ MODELS_DIR = os.path.join(BASE_PATH, 'models')
91
+ VR_MODELS_DIR = os.path.join(MODELS_DIR, 'VR_Models')
92
+ MDX_MODELS_DIR = os.path.join(MODELS_DIR, 'MDX_Net_Models')
93
+ DEMUCS_MODELS_DIR = os.path.join(MODELS_DIR, 'Demucs_Models')
94
+ DEMUCS_NEWER_REPO_DIR = os.path.join(DEMUCS_MODELS_DIR, 'v3_v4_repo')
95
+ MDX_MIXER_PATH = os.path.join(BASE_PATH, 'lib_v5', 'mixer.ckpt')
96
+
97
+ #Cache & Parameters
98
+ VR_HASH_DIR = os.path.join(VR_MODELS_DIR, 'model_data')
99
+ VR_HASH_JSON = os.path.join(VR_MODELS_DIR, 'model_data', 'model_data.json')
100
+ MDX_HASH_DIR = os.path.join(MDX_MODELS_DIR, 'model_data')
101
+ MDX_HASH_JSON = os.path.join(MDX_MODELS_DIR, 'model_data', 'model_data.json')
102
+ DEMUCS_MODEL_NAME_SELECT = os.path.join(DEMUCS_MODELS_DIR, 'model_data', 'model_name_mapper.json')
103
+ MDX_MODEL_NAME_SELECT = os.path.join(MDX_MODELS_DIR, 'model_data', 'model_name_mapper.json')
104
+ ENSEMBLE_CACHE_DIR = os.path.join(BASE_PATH, 'gui_data', 'saved_ensembles')
105
+ SETTINGS_CACHE_DIR = os.path.join(BASE_PATH, 'gui_data', 'saved_settings')
106
+ VR_PARAM_DIR = os.path.join(BASE_PATH, 'lib_v5', 'vr_network', 'modelparams')
107
+ SAMPLE_CLIP_PATH = os.path.join(BASE_PATH, 'temp_sample_clips')
108
+ ENSEMBLE_TEMP_PATH = os.path.join(BASE_PATH, 'ensemble_temps')
109
+
110
+ #Style
111
+ ICON_IMG_PATH = os.path.join(BASE_PATH, 'gui_data', 'img', 'GUI-Icon.ico')
112
+ FONT_PATH = os.path.join(BASE_PATH, 'gui_data', 'fonts', 'centurygothic', 'GOTHIC.TTF')#ensemble_temps
113
+
114
+ #Other
115
+ COMPLETE_CHIME = os.path.join(BASE_PATH, 'gui_data', 'complete_chime.wav')
116
+ FAIL_CHIME = os.path.join(BASE_PATH, 'gui_data', 'fail_chime.wav')
117
+ CHANGE_LOG = os.path.join(BASE_PATH, 'gui_data', 'change_log.txt')
118
+ SPLASH_DOC = os.path.join(BASE_PATH, 'tmp', 'splash.txt')
119
+
120
+ file_check(os.path.join(MODELS_DIR, 'Main_Models'), VR_MODELS_DIR)
121
+ file_check(os.path.join(DEMUCS_MODELS_DIR, 'v3_repo'), DEMUCS_NEWER_REPO_DIR)
122
+ remove_unneeded_yamls(DEMUCS_MODELS_DIR)
123
+
124
+ remove_temps(ENSEMBLE_TEMP_PATH)
125
+ remove_temps(SAMPLE_CLIP_PATH)
126
+ remove_temps(os.path.join(BASE_PATH, 'img'))
127
+
128
+ if not os.path.isdir(ENSEMBLE_TEMP_PATH):
129
+ os.mkdir(ENSEMBLE_TEMP_PATH)
130
+
131
+ if not os.path.isdir(SAMPLE_CLIP_PATH):
132
+ os.mkdir(SAMPLE_CLIP_PATH)
133
+
134
+ model_hash_table = {}
135
+ data = load_data()
136
+
137
+ class ModelData():
138
+ def __init__(self, model_name: str,
139
+ selected_process_method=ENSEMBLE_MODE,
140
+ is_secondary_model=False,
141
+ primary_model_primary_stem=None,
142
+ is_primary_model_primary_stem_only=False,
143
+ is_primary_model_secondary_stem_only=False,
144
+ is_pre_proc_model=False,
145
+ is_dry_check=False):
146
+
147
+ self.is_gpu_conversion = 0 if root.is_gpu_conversion_var.get() else -1
148
+ self.is_normalization = root.is_normalization_var.get()
149
+ self.is_primary_stem_only = root.is_primary_stem_only_var.get()
150
+ self.is_secondary_stem_only = root.is_secondary_stem_only_var.get()
151
+ self.is_denoise = root.is_denoise_var.get()
152
+ self.mdx_batch_size = 1 if root.mdx_batch_size_var.get() == DEF_OPT else int(root.mdx_batch_size_var.get())
153
+ self.is_mdx_ckpt = False
154
+ self.wav_type_set = root.wav_type_set
155
+ self.mp3_bit_set = root.mp3_bit_set_var.get()
156
+ self.save_format = root.save_format_var.get()
157
+ self.is_invert_spec = root.is_invert_spec_var.get()
158
+ self.is_mixer_mode = root.is_mixer_mode_var.get()
159
+ self.demucs_stems = root.demucs_stems_var.get()
160
+ self.demucs_source_list = []
161
+ self.demucs_stem_count = 0
162
+ self.mixer_path = MDX_MIXER_PATH
163
+ self.model_name = model_name
164
+ self.process_method = selected_process_method
165
+ self.model_status = False if self.model_name == CHOOSE_MODEL or self.model_name == NO_MODEL else True
166
+ self.primary_stem = None
167
+ self.secondary_stem = None
168
+ self.is_ensemble_mode = False
169
+ self.ensemble_primary_stem = None
170
+ self.ensemble_secondary_stem = None
171
+ self.primary_model_primary_stem = primary_model_primary_stem
172
+ self.is_secondary_model = is_secondary_model
173
+ self.secondary_model = None
174
+ self.secondary_model_scale = None
175
+ self.demucs_4_stem_added_count = 0
176
+ self.is_demucs_4_stem_secondaries = False
177
+ self.is_4_stem_ensemble = False
178
+ self.pre_proc_model = None
179
+ self.pre_proc_model_activated = False
180
+ self.is_pre_proc_model = is_pre_proc_model
181
+ self.is_dry_check = is_dry_check
182
+ self.model_samplerate = 44100
183
+ self.model_capacity = 32, 128
184
+ self.is_vr_51_model = False
185
+ self.is_demucs_pre_proc_model_inst_mix = False
186
+ self.manual_download_Button = None
187
+ self.secondary_model_4_stem = []
188
+ self.secondary_model_4_stem_scale = []
189
+ self.secondary_model_4_stem_names = []
190
+ self.secondary_model_4_stem_model_names_list = []
191
+ self.all_models = []
192
+ self.secondary_model_other = None
193
+ self.secondary_model_scale_other = None
194
+ self.secondary_model_bass = None
195
+ self.secondary_model_scale_bass = None
196
+ self.secondary_model_drums = None
197
+ self.secondary_model_scale_drums = None
198
+
199
+ if selected_process_method == ENSEMBLE_MODE:
200
+ partitioned_name = model_name.partition(ENSEMBLE_PARTITION)
201
+ self.process_method = partitioned_name[0]
202
+ self.model_name = partitioned_name[2]
203
+ self.model_and_process_tag = model_name
204
+ self.ensemble_primary_stem, self.ensemble_secondary_stem = root.return_ensemble_stems()
205
+ self.is_ensemble_mode = True if not is_secondary_model and not is_pre_proc_model else False
206
+ self.is_4_stem_ensemble = True if root.ensemble_main_stem_var.get() == FOUR_STEM_ENSEMBLE and self.is_ensemble_mode else False
207
+ self.pre_proc_model_activated = root.is_demucs_pre_proc_model_activate_var.get() if not self.ensemble_primary_stem == VOCAL_STEM else False
208
+
209
+ if self.process_method == VR_ARCH_TYPE:
210
+ self.is_secondary_model_activated = root.vr_is_secondary_model_activate_var.get() if not self.is_secondary_model else False
211
+ self.aggression_setting = float(int(root.aggression_setting_var.get())/100)
212
+ self.is_tta = root.is_tta_var.get()
213
+ self.is_post_process = root.is_post_process_var.get()
214
+ self.window_size = int(root.window_size_var.get())
215
+ self.batch_size = 1 if root.batch_size_var.get() == DEF_OPT else int(root.batch_size_var.get())
216
+ self.crop_size = int(root.crop_size_var.get())
217
+ self.is_high_end_process = 'mirroring' if root.is_high_end_process_var.get() else 'None'
218
+ self.post_process_threshold = float(root.post_process_threshold_var.get())
219
+ self.model_capacity = 32, 128
220
+ self.model_path = os.path.join(VR_MODELS_DIR, f"{self.model_name}.pth")
221
+ self.get_model_hash()
222
+ if self.model_hash:
223
+ self.model_data = self.get_model_data(VR_HASH_DIR, root.vr_hash_MAPPER) if not self.model_hash == WOOD_INST_MODEL_HASH else WOOD_INST_PARAMS
224
+ if self.model_data:
225
+ vr_model_param = os.path.join(VR_PARAM_DIR, "{}.json".format(self.model_data["vr_model_param"]))
226
+ self.primary_stem = self.model_data["primary_stem"]
227
+ self.secondary_stem = STEM_PAIR_MAPPER[self.primary_stem]
228
+ self.vr_model_param = ModelParameters(vr_model_param)
229
+ self.model_samplerate = self.vr_model_param.param['sr']
230
+ if "nout" in self.model_data.keys() and "nout_lstm" in self.model_data.keys():
231
+ self.model_capacity = self.model_data["nout"], self.model_data["nout_lstm"]
232
+ self.is_vr_51_model = True
233
+ else:
234
+ self.model_status = False
235
+
236
+ if self.process_method == MDX_ARCH_TYPE:
237
+ self.is_secondary_model_activated = root.mdx_is_secondary_model_activate_var.get() if not is_secondary_model else False
238
+ self.margin = int(root.margin_var.get())
239
+ self.chunks = root.determine_auto_chunks(root.chunks_var.get(), self.is_gpu_conversion) if root.is_chunk_mdxnet_var.get() else 0
240
+ self.get_mdx_model_path()
241
+ self.get_model_hash()
242
+ if self.model_hash:
243
+ self.model_data = self.get_model_data(MDX_HASH_DIR, root.mdx_hash_MAPPER)
244
+ if self.model_data:
245
+ self.compensate = self.model_data["compensate"] if root.compensate_var.get() == AUTO_SELECT else float(root.compensate_var.get())
246
+ self.mdx_dim_f_set = self.model_data["mdx_dim_f_set"]
247
+ self.mdx_dim_t_set = self.model_data["mdx_dim_t_set"]
248
+ self.mdx_n_fft_scale_set = self.model_data["mdx_n_fft_scale_set"]
249
+ self.primary_stem = self.model_data["primary_stem"]
250
+ self.secondary_stem = STEM_PAIR_MAPPER[self.primary_stem]
251
+ else:
252
+ self.model_status = False
253
+
254
+ if self.process_method == DEMUCS_ARCH_TYPE:
255
+ self.is_secondary_model_activated = root.demucs_is_secondary_model_activate_var.get() if not is_secondary_model else False
256
+ if not self.is_ensemble_mode:
257
+ self.pre_proc_model_activated = root.is_demucs_pre_proc_model_activate_var.get() if not root.demucs_stems_var.get() in [VOCAL_STEM, INST_STEM] else False
258
+ self.overlap = float(root.overlap_var.get())
259
+ self.margin_demucs = int(root.margin_demucs_var.get())
260
+ self.chunks_demucs = root.determine_auto_chunks(root.chunks_demucs_var.get(), self.is_gpu_conversion)
261
+ self.shifts = int(root.shifts_var.get())
262
+ self.is_split_mode = root.is_split_mode_var.get()
263
+ self.segment = root.segment_var.get()
264
+ self.is_chunk_demucs = root.is_chunk_demucs_var.get()
265
+ self.is_demucs_combine_stems = root.is_demucs_combine_stems_var.get()
266
+ self.is_primary_stem_only = root.is_primary_stem_only_var.get() if self.is_ensemble_mode else root.is_primary_stem_only_Demucs_var.get()
267
+ self.is_secondary_stem_only = root.is_secondary_stem_only_var.get() if self.is_ensemble_mode else root.is_secondary_stem_only_Demucs_var.get()
268
+ self.get_demucs_model_path()
269
+ self.get_demucs_model_data()
270
+
271
+ self.model_basename = os.path.splitext(os.path.basename(self.model_path))[0] if self.model_status else None
272
+ self.pre_proc_model_activated = self.pre_proc_model_activated if not self.is_secondary_model else False
273
+
274
+ self.is_primary_model_primary_stem_only = is_primary_model_primary_stem_only
275
+ self.is_primary_model_secondary_stem_only = is_primary_model_secondary_stem_only
276
+
277
+ if self.is_secondary_model_activated and self.model_status:
278
+ if (not self.is_ensemble_mode and root.demucs_stems_var.get() == ALL_STEMS and self.process_method == DEMUCS_ARCH_TYPE) or self.is_4_stem_ensemble:
279
+ for key in DEMUCS_4_SOURCE_LIST:
280
+ self.secondary_model_data(key)
281
+ self.secondary_model_4_stem.append(self.secondary_model)
282
+ self.secondary_model_4_stem_scale.append(self.secondary_model_scale)
283
+ self.secondary_model_4_stem_names.append(key)
284
+ self.demucs_4_stem_added_count = sum(i is not None for i in self.secondary_model_4_stem)
285
+ self.is_secondary_model_activated = False if all(i is None for i in self.secondary_model_4_stem) else True
286
+ self.demucs_4_stem_added_count = self.demucs_4_stem_added_count - 1 if self.is_secondary_model_activated else self.demucs_4_stem_added_count
287
+ if self.is_secondary_model_activated:
288
+ self.secondary_model_4_stem_model_names_list = [None if i is None else i.model_basename for i in self.secondary_model_4_stem]
289
+ self.is_demucs_4_stem_secondaries = True
290
+ else:
291
+ primary_stem = self.ensemble_primary_stem if self.is_ensemble_mode and self.process_method == DEMUCS_ARCH_TYPE else self.primary_stem
292
+ self.secondary_model_data(primary_stem)
293
+
294
+ if self.process_method == DEMUCS_ARCH_TYPE and not is_secondary_model:
295
+ if self.demucs_stem_count >= 3 and self.pre_proc_model_activated:
296
+ self.pre_proc_model_activated = True
297
+ self.pre_proc_model = root.process_determine_demucs_pre_proc_model(self.primary_stem)
298
+ self.is_demucs_pre_proc_model_inst_mix = root.is_demucs_pre_proc_model_inst_mix_var.get() if self.pre_proc_model else False
299
+
300
+ def secondary_model_data(self, primary_stem):
301
+ secondary_model_data = root.process_determine_secondary_model(self.process_method, primary_stem, self.is_primary_stem_only, self.is_secondary_stem_only)
302
+ self.secondary_model = secondary_model_data[0]
303
+ self.secondary_model_scale = secondary_model_data[1]
304
+ self.is_secondary_model_activated = False if not self.secondary_model else True
305
+ if self.secondary_model:
306
+ self.is_secondary_model_activated = False if self.secondary_model.model_basename == self.model_basename else True
307
+
308
+ def get_mdx_model_path(self):
309
+
310
+ if self.model_name.endswith(CKPT):
311
+ # self.chunks = 0
312
+ # self.is_mdx_batch_mode = True
313
+ self.is_mdx_ckpt = True
314
+
315
+ ext = '' if self.is_mdx_ckpt else ONNX
316
+
317
+ for file_name, chosen_mdx_model in root.mdx_name_select_MAPPER.items():
318
+ if self.model_name in chosen_mdx_model:
319
+ self.model_path = os.path.join(MDX_MODELS_DIR, f"{file_name}{ext}")
320
+ break
321
+ else:
322
+ self.model_path = os.path.join(MDX_MODELS_DIR, f"{self.model_name}{ext}")
323
+
324
+ self.mixer_path = os.path.join(MDX_MODELS_DIR, f"mixer_val.ckpt")
325
+
326
+ def get_demucs_model_path(self):
327
+
328
+ demucs_newer = [True for x in DEMUCS_NEWER_TAGS if x in self.model_name]
329
+ demucs_model_dir = DEMUCS_NEWER_REPO_DIR if demucs_newer else DEMUCS_MODELS_DIR
330
+
331
+ for file_name, chosen_model in root.demucs_name_select_MAPPER.items():
332
+ if self.model_name in chosen_model:
333
+ self.model_path = os.path.join(demucs_model_dir, file_name)
334
+ break
335
+ else:
336
+ self.model_path = os.path.join(DEMUCS_NEWER_REPO_DIR, f'{self.model_name}.yaml')
337
+
338
+ def get_demucs_model_data(self):
339
+
340
+ self.demucs_version = DEMUCS_V4
341
+
342
+ for key, value in DEMUCS_VERSION_MAPPER.items():
343
+ if value in self.model_name:
344
+ self.demucs_version = key
345
+
346
+ self.demucs_source_list = DEMUCS_2_SOURCE if DEMUCS_UVR_MODEL in self.model_name else DEMUCS_4_SOURCE
347
+ self.demucs_source_map = DEMUCS_2_SOURCE_MAPPER if DEMUCS_UVR_MODEL in self.model_name else DEMUCS_4_SOURCE_MAPPER
348
+ self.demucs_stem_count = 2 if DEMUCS_UVR_MODEL in self.model_name else 4
349
+
350
+ if not self.is_ensemble_mode:
351
+ self.primary_stem = PRIMARY_STEM if self.demucs_stems == ALL_STEMS else self.demucs_stems
352
+ self.secondary_stem = STEM_PAIR_MAPPER[self.primary_stem]
353
+
354
+ def get_model_data(self, model_hash_dir, hash_mapper):
355
+ model_settings_json = os.path.join(model_hash_dir, "{}.json".format(self.model_hash))
356
+
357
+ if os.path.isfile(model_settings_json):
358
+ return json.load(open(model_settings_json))
359
+ else:
360
+ for hash, settings in hash_mapper.items():
361
+ if self.model_hash in hash:
362
+ return settings
363
+ else:
364
+ return self.get_model_data_from_popup()
365
+
366
+ def get_model_data_from_popup(self):
367
+ return None
368
+
369
+ def get_model_hash(self):
370
+ self.model_hash = None
371
+
372
+ if not os.path.isfile(self.model_path):
373
+ self.model_status = False
374
+ self.model_hash is None
375
+ else:
376
+ if model_hash_table:
377
+ for (key, value) in model_hash_table.items():
378
+ if self.model_path == key:
379
+ self.model_hash = value
380
+ break
381
+
382
+ if not self.model_hash:
383
+ try:
384
+ with open(self.model_path, 'rb') as f:
385
+ f.seek(- 10000 * 1024, 2)
386
+ self.model_hash = hashlib.md5(f.read()).hexdigest()
387
+ except:
388
+ self.model_hash = hashlib.md5(open(self.model_path,'rb').read()).hexdigest()
389
+
390
+ table_entry = {self.model_path: self.model_hash}
391
+ model_hash_table.update(table_entry)
392
+
393
+
394
+ class Ensembler():
395
+ def __init__(self, is_manual_ensemble=False):
396
+ self.is_save_all_outputs_ensemble = root.is_save_all_outputs_ensemble_var.get()
397
+ chosen_ensemble_name = '{}'.format(root.chosen_ensemble_var.get().replace(" ", "_")) if not root.chosen_ensemble_var.get() == CHOOSE_ENSEMBLE_OPTION else 'Ensembled'
398
+ ensemble_algorithm = root.ensemble_type_var.get().partition("/")
399
+ ensemble_main_stem_pair = root.ensemble_main_stem_var.get().partition("/")
400
+ time_stamp = round(time.time())
401
+ self.audio_tool = MANUAL_ENSEMBLE
402
+ self.main_export_path = Path(root.export_path_var.get())
403
+ self.chosen_ensemble = f"_{chosen_ensemble_name}" if root.is_append_ensemble_name_var.get() else ''
404
+ ensemble_folder_name = self.main_export_path if self.is_save_all_outputs_ensemble else ENSEMBLE_TEMP_PATH
405
+ self.ensemble_folder_name = os.path.join(ensemble_folder_name, '{}_Outputs_{}'.format(chosen_ensemble_name, time_stamp))
406
+ self.is_testing_audio = f"{time_stamp}_" if root.is_testing_audio_var.get() else ''
407
+ self.primary_algorithm = ensemble_algorithm[0]
408
+ self.secondary_algorithm = ensemble_algorithm[2]
409
+ self.ensemble_primary_stem = ensemble_main_stem_pair[0]
410
+ self.ensemble_secondary_stem = ensemble_main_stem_pair[2]
411
+ self.is_normalization = root.is_normalization_var.get()
412
+ self.wav_type_set = root.wav_type_set
413
+ self.mp3_bit_set = root.mp3_bit_set_var.get()
414
+ self.save_format = root.save_format_var.get()
415
+ if not is_manual_ensemble:
416
+ os.mkdir(self.ensemble_folder_name)
417
+
418
+ def ensemble_outputs(self, audio_file_base, export_path, stem, is_4_stem=False, is_inst_mix=False):
419
+ """Processes the given outputs and ensembles them with the chosen algorithm"""
420
+
421
+ if is_4_stem:
422
+ algorithm = root.ensemble_type_var.get()
423
+ stem_tag = stem
424
+ else:
425
+ if is_inst_mix:
426
+ algorithm = self.secondary_algorithm
427
+ stem_tag = f"{self.ensemble_secondary_stem} {INST_STEM}"
428
+ else:
429
+ algorithm = self.primary_algorithm if stem == PRIMARY_STEM else self.secondary_algorithm
430
+ stem_tag = self.ensemble_primary_stem if stem == PRIMARY_STEM else self.ensemble_secondary_stem
431
+
432
+ stem_outputs = self.get_files_to_ensemble(folder=export_path, prefix=audio_file_base, suffix=f"_({stem_tag}).wav")
433
+ audio_file_output = f"{self.is_testing_audio}{audio_file_base}{self.chosen_ensemble}_({stem_tag})"
434
+ stem_save_path = os.path.join('{}'.format(self.main_export_path),'{}.wav'.format(audio_file_output))
435
+
436
+ if stem_outputs:
437
+ spec_utils.ensemble_inputs(stem_outputs, algorithm, self.is_normalization, self.wav_type_set, stem_save_path)
438
+ save_format(stem_save_path, self.save_format, self.mp3_bit_set)
439
+
440
+ if self.is_save_all_outputs_ensemble:
441
+ for i in stem_outputs:
442
+ save_format(i, self.save_format, self.mp3_bit_set)
443
+ else:
444
+ for i in stem_outputs:
445
+ try:
446
+ os.remove(i)
447
+ except Exception as e:
448
+ print(e)
449
+
450
+ def ensemble_manual(self, audio_inputs, audio_file_base, is_bulk=False):
451
+ """Processes the given outputs and ensembles them with the chosen algorithm"""
452
+
453
+ is_mv_sep = True
454
+
455
+ if is_bulk:
456
+ number_list = list(set([os.path.basename(i).split("_")[0] for i in audio_inputs]))
457
+ for n in number_list:
458
+ current_list = [i for i in audio_inputs if os.path.basename(i).startswith(n)]
459
+ audio_file_base = os.path.basename(current_list[0]).split('.wav')[0]
460
+ stem_testing = "instrum" if "Instrumental" in audio_file_base else "vocals"
461
+ if is_mv_sep:
462
+ audio_file_base = audio_file_base.split("_")
463
+ audio_file_base = f"{audio_file_base[1]}_{audio_file_base[2]}_{stem_testing}"
464
+ self.ensemble_manual_process(current_list, audio_file_base, is_bulk)
465
+ else:
466
+ self.ensemble_manual_process(audio_inputs, audio_file_base, is_bulk)
467
+
468
+ def ensemble_manual_process(self, audio_inputs, audio_file_base, is_bulk):
469
+
470
+ algorithm = root.choose_algorithm_var.get()
471
+ algorithm_text = "" if is_bulk else f"_({root.choose_algorithm_var.get()})"
472
+ stem_save_path = os.path.join('{}'.format(self.main_export_path),'{}{}{}.wav'.format(self.is_testing_audio, audio_file_base, algorithm_text))
473
+ spec_utils.ensemble_inputs(audio_inputs, algorithm, self.is_normalization, self.wav_type_set, stem_save_path)
474
+ save_format(stem_save_path, self.save_format, self.mp3_bit_set)
475
+
476
+ def get_files_to_ensemble(self, folder="", prefix="", suffix=""):
477
+ """Grab all the files to be ensembled"""
478
+
479
+ return [os.path.join(folder, i) for i in os.listdir(folder) if i.startswith(prefix) and i.endswith(suffix)]
480
+
481
+
482
+ def secondary_stem(stem):
483
+ """Determines secondary stem"""
484
+
485
+ for key, value in STEM_PAIR_MAPPER.items():
486
+ if stem in key:
487
+ secondary_stem = value
488
+
489
+ return secondary_stem
490
+
491
+
492
+ class UVRInterface:
493
+ def __init__(self) -> None:
494
+ pass
495
+
496
+ def assemble_model_data(self, model=None, arch_type=ENSEMBLE_MODE, is_dry_check=False) -> List[ModelData]:
497
+ if arch_type == ENSEMBLE_STEM_CHECK:
498
+ model_data = self.model_data_table
499
+ missing_models = [model.model_status for model in model_data if not model.model_status]
500
+
501
+ if missing_models or not model_data:
502
+ model_data: List[ModelData] = [ModelData(model_name, is_dry_check=is_dry_check) for model_name in self.ensemble_model_list]
503
+ self.model_data_table = model_data
504
+
505
+ if arch_type == ENSEMBLE_MODE:
506
+ model_data: List[ModelData] = [ModelData(model_name) for model_name in self.ensemble_listbox_get_all_selected_models()]
507
+ if arch_type == ENSEMBLE_CHECK:
508
+ model_data: List[ModelData] = [ModelData(model)]
509
+ if arch_type == VR_ARCH_TYPE or arch_type == VR_ARCH_PM:
510
+ model_data: List[ModelData] = [ModelData(model, VR_ARCH_TYPE)]
511
+ if arch_type == MDX_ARCH_TYPE:
512
+ model_data: List[ModelData] = [ModelData(model, MDX_ARCH_TYPE)]
513
+ if arch_type == DEMUCS_ARCH_TYPE:
514
+ model_data: List[ModelData] = [ModelData(model, DEMUCS_ARCH_TYPE)]#
515
+
516
+ return model_data
517
+
518
+ def create_sample(self, audio_file, sample_path=SAMPLE_CLIP_PATH):
519
+ try:
520
+ with audioread.audio_open(audio_file) as f:
521
+ track_length = int(f.duration)
522
+ except Exception as e:
523
+ print('Audioread failed to get duration. Trying Librosa...')
524
+ y, sr = librosa.load(audio_file, mono=False, sr=44100)
525
+ track_length = int(librosa.get_duration(y=y, sr=sr))
526
+
527
+ clip_duration = int(root.model_sample_mode_duration_var.get())
528
+
529
+ if track_length >= clip_duration:
530
+ offset_cut = track_length//3
531
+ off_cut = offset_cut + track_length
532
+ if not off_cut >= clip_duration:
533
+ offset_cut = 0
534
+ name_apped = f'{clip_duration}_second_'
535
+ else:
536
+ offset_cut, clip_duration = 0, track_length
537
+ name_apped = ''
538
+
539
+ sample = librosa.load(audio_file, offset=offset_cut, duration=clip_duration, mono=False, sr=44100)[0].T
540
+ audio_sample = os.path.join(sample_path, f'{os.path.splitext(os.path.basename(audio_file))[0]}_{name_apped}sample.wav')
541
+ sf.write(audio_sample, sample, 44100)
542
+
543
+ return audio_sample
544
+
545
+ def verify_audio(self, audio_file, is_process=True, sample_path=None):
546
+ is_good = False
547
+ error_data = ''
548
+
549
+ if os.path.isfile(audio_file):
550
+ try:
551
+ librosa.load(audio_file, duration=3, mono=False, sr=44100) if not type(sample_path) is str else self.create_sample(audio_file, sample_path)
552
+ is_good = True
553
+ except Exception as e:
554
+ error_name = f'{type(e).__name__}'
555
+ traceback_text = ''.join(traceback.format_tb(e.__traceback__))
556
+ message = f'{error_name}: "{e}"\n{traceback_text}"'
557
+ if is_process:
558
+ audio_base_name = os.path.basename(audio_file)
559
+ self.error_log_var.set(f'Error Loading the Following File:\n\n\"{audio_base_name}\"\n\nRaw Error Details:\n\n{message}')
560
+ else:
561
+ error_data = AUDIO_VERIFICATION_CHECK(audio_file, message)
562
+
563
+ if is_process:
564
+ return is_good
565
+ else:
566
+ return is_good, error_data
567
+
568
+ def cached_sources_clear(self):
569
+ self.vr_cache_source_mapper = {}
570
+ self.mdx_cache_source_mapper = {}
571
+ self.demucs_cache_source_mapper = {}
572
+
573
+ def cached_model_source_holder(self, process_method, sources, model_name=None):
574
+ if process_method == VR_ARCH_TYPE:
575
+ self.vr_cache_source_mapper = {**self.vr_cache_source_mapper, **{model_name: sources}}
576
+ if process_method == MDX_ARCH_TYPE:
577
+ self.mdx_cache_source_mapper = {**self.mdx_cache_source_mapper, **{model_name: sources}}
578
+ if process_method == DEMUCS_ARCH_TYPE:
579
+ self.demucs_cache_source_mapper = {**self.demucs_cache_source_mapper, **{model_name: sources}}
580
+
581
+ def cached_source_callback(self, process_method, model_name=None):
582
+ model, sources = None, None
583
+
584
+ if process_method == VR_ARCH_TYPE:
585
+ mapper = self.vr_cache_source_mapper
586
+ if process_method == MDX_ARCH_TYPE:
587
+ mapper = self.mdx_cache_source_mapper
588
+ if process_method == DEMUCS_ARCH_TYPE:
589
+ mapper = self.demucs_cache_source_mapper
590
+
591
+ for key, value in mapper.items():
592
+ if model_name in key:
593
+ model = key
594
+ sources = value
595
+
596
+ return model, sources
597
+
598
+ def cached_source_model_list_check(self, model_list: List[ModelData]):
599
+ model: ModelData
600
+ primary_model_names = lambda process_method:[model.model_basename if model.process_method == process_method else None for model in model_list]
601
+ secondary_model_names = lambda process_method:[model.secondary_model.model_basename if model.is_secondary_model_activated and model.process_method == process_method else None for model in model_list]
602
+
603
+ self.vr_primary_model_names = primary_model_names(VR_ARCH_TYPE)
604
+ self.mdx_primary_model_names = primary_model_names(MDX_ARCH_TYPE)
605
+ self.demucs_primary_model_names = primary_model_names(DEMUCS_ARCH_TYPE)
606
+ self.vr_secondary_model_names = secondary_model_names(VR_ARCH_TYPE)
607
+ self.mdx_secondary_model_names = secondary_model_names(MDX_ARCH_TYPE)
608
+ self.demucs_secondary_model_names = [model.secondary_model.model_basename if model.is_secondary_model_activated and model.process_method == DEMUCS_ARCH_TYPE and not model.secondary_model is None else None for model in model_list]
609
+ self.demucs_pre_proc_model_name = [model.pre_proc_model.model_basename if model.pre_proc_model else None for model in model_list]#list(dict.fromkeys())
610
+
611
+ for model in model_list:
612
+ if model.process_method == DEMUCS_ARCH_TYPE and model.is_demucs_4_stem_secondaries:
613
+ if not model.is_4_stem_ensemble:
614
+ self.demucs_secondary_model_names = model.secondary_model_4_stem_model_names_list
615
+ break
616
+ else:
617
+ for i in model.secondary_model_4_stem_model_names_list:
618
+ self.demucs_secondary_model_names.append(i)
619
+
620
+ self.all_models = self.vr_primary_model_names + self.mdx_primary_model_names + self.demucs_primary_model_names + self.vr_secondary_model_names + self.mdx_secondary_model_names + self.demucs_secondary_model_names + self.demucs_pre_proc_model_name
621
+
622
+ def process(self, model_name, arch_type, audio_file, export_path, is_model_sample_mode=False, is_4_stem_ensemble=False, set_progress_func=None, console_write=print) -> SeperateAttributes:
623
+ stime = time.perf_counter()
624
+ time_elapsed = lambda:f'Time Elapsed: {time.strftime("%H:%M:%S", time.gmtime(int(time.perf_counter() - stime)))}'
625
+
626
+ if arch_type==ENSEMBLE_MODE:
627
+ model_list, ensemble = self.assemble_model_data(), Ensembler()
628
+ export_path = ensemble.ensemble_folder_name
629
+ is_ensemble = True
630
+ else:
631
+ model_list = self.assemble_model_data(model_name, arch_type)
632
+ is_ensemble = False
633
+ self.cached_source_model_list_check(model_list)
634
+ model = model_list[0]
635
+
636
+ if self.verify_audio(audio_file):
637
+ audio_file = self.create_sample(audio_file) if is_model_sample_mode else audio_file
638
+ else:
639
+ print(f'"{os.path.basename(audio_file)}\" is missing or currupted.\n')
640
+ exit()
641
+
642
+ audio_file_base = f"{os.path.splitext(os.path.basename(audio_file))[0]}"
643
+ audio_file_base = audio_file_base if is_ensemble else f"{round(time.time())}_{audio_file_base}"
644
+ audio_file_base = audio_file_base if not is_ensemble else f"{audio_file_base}_{model.model_basename}"
645
+ if not is_ensemble:
646
+ audio_file_base = f"{audio_file_base}_{model.model_basename}"
647
+
648
+ if not is_ensemble:
649
+ export_path = os.path.join(Path(export_path), model.model_basename, os.path.splitext(os.path.basename(audio_file))[0])
650
+ if not os.path.isdir(export_path):
651
+ os.makedirs(export_path)
652
+
653
+ if set_progress_func is None:
654
+ pbar = tqdm(total=1)
655
+ self._progress = 0
656
+ def set_progress_func(step, inference_iterations=0):
657
+ progress_curr = step + inference_iterations
658
+ pbar.update(progress_curr-self._progress)
659
+ self._progress = progress_curr
660
+
661
+ def postprocess():
662
+ pbar.close()
663
+ else:
664
+ def postprocess():
665
+ pass
666
+
667
+ process_data = {
668
+ 'model_data': model,
669
+ 'export_path': export_path,
670
+ 'audio_file_base': audio_file_base,
671
+ 'audio_file': audio_file,
672
+ 'set_progress_bar': set_progress_func,
673
+ 'write_to_console': lambda progress_text, base_text='': console_write(base_text + progress_text),
674
+ 'process_iteration': lambda:None,
675
+ 'cached_source_callback': self.cached_source_callback,
676
+ 'cached_model_source_holder': self.cached_model_source_holder,
677
+ 'list_all_models': self.all_models,
678
+ 'is_ensemble_master': is_ensemble,
679
+ 'is_4_stem_ensemble': is_ensemble and is_4_stem_ensemble
680
+ }
681
+ if model.process_method == VR_ARCH_TYPE:
682
+ seperator = SeperateVR(model, process_data)
683
+ if model.process_method == MDX_ARCH_TYPE:
684
+ seperator = SeperateMDX(model, process_data)
685
+ if model.process_method == DEMUCS_ARCH_TYPE:
686
+ seperator = SeperateDemucs(model, process_data)
687
+
688
+ seperator.seperate()
689
+ postprocess()
690
+
691
+ if is_ensemble:
692
+ audio_file_base = audio_file_base.replace(f"_{model.model_basename}", "")
693
+ console_write(ENSEMBLING_OUTPUTS)
694
+
695
+ if is_4_stem_ensemble:
696
+ for output_stem in DEMUCS_4_SOURCE_LIST:
697
+ ensemble.ensemble_outputs(audio_file_base, export_path, output_stem, is_4_stem=True)
698
+ else:
699
+ if not root.is_secondary_stem_only_var.get():
700
+ ensemble.ensemble_outputs(audio_file_base, export_path, PRIMARY_STEM)
701
+ if not root.is_primary_stem_only_var.get():
702
+ ensemble.ensemble_outputs(audio_file_base, export_path, SECONDARY_STEM)
703
+ ensemble.ensemble_outputs(audio_file_base, export_path, SECONDARY_STEM, is_inst_mix=True)
704
+
705
+ console_write(DONE)
706
+
707
+ if is_model_sample_mode:
708
+ if os.path.isfile(audio_file):
709
+ os.remove(audio_file)
710
+
711
+ torch.cuda.empty_cache()
712
+
713
+ if is_ensemble and len(os.listdir(export_path)) == 0:
714
+ shutil.rmtree(export_path)
715
+ console_write(f'Process Complete, using time: {time_elapsed()}\nOutput path: {export_path}')
716
+ self.cached_sources_clear()
717
+ return seperator
718
+
719
+
720
+ class RootWrapper:
721
+ def __init__(self, var) -> None:
722
+ self.var=var
723
+
724
+ def set(self, val):
725
+ self.var=val
726
+
727
+ def get(self):
728
+ return self.var
729
+
730
+ class FakeRoot:
731
+ def __init__(self) -> None:
732
+ self.wav_type_set = 'PCM_16'
733
+ self.vr_hash_MAPPER = load_model_hash_data(VR_HASH_JSON)
734
+ self.mdx_hash_MAPPER = load_model_hash_data(MDX_HASH_JSON)
735
+ self.mdx_name_select_MAPPER = load_model_hash_data(MDX_MODEL_NAME_SELECT)
736
+ self.demucs_name_select_MAPPER = load_model_hash_data(DEMUCS_MODEL_NAME_SELECT)
737
+
738
+ def __getattribute__(self, __name: str):
739
+ try:
740
+ return super().__getattribute__(__name)
741
+ except AttributeError:
742
+ wrapped=RootWrapper(None)
743
+ super().__setattr__(__name, wrapped)
744
+ return wrapped
745
+
746
+ def load_saved_settings(self, loaded_setting: dict, process_method=None):
747
+ """Loads user saved application settings or resets to default"""
748
+
749
+ for key, value in DEFAULT_DATA.items():
750
+ if not key in loaded_setting.keys():
751
+ loaded_setting = {**loaded_setting, **{key:value}}
752
+ loaded_setting['batch_size'] = DEF_OPT
753
+
754
+ is_ensemble = True if process_method == ENSEMBLE_MODE else False
755
+
756
+ if not process_method or process_method == VR_ARCH_PM or is_ensemble:
757
+ self.vr_model_var.set(loaded_setting['vr_model'])
758
+ self.aggression_setting_var.set(loaded_setting['aggression_setting'])
759
+ self.window_size_var.set(loaded_setting['window_size'])
760
+ self.batch_size_var.set(loaded_setting['batch_size'])
761
+ self.crop_size_var.set(loaded_setting['crop_size'])
762
+ self.is_tta_var.set(loaded_setting['is_tta'])
763
+ self.is_output_image_var.set(loaded_setting['is_output_image'])
764
+ self.is_post_process_var.set(loaded_setting['is_post_process'])
765
+ self.is_high_end_process_var.set(loaded_setting['is_high_end_process'])
766
+ self.post_process_threshold_var.set(loaded_setting['post_process_threshold'])
767
+ self.vr_voc_inst_secondary_model_var.set(loaded_setting['vr_voc_inst_secondary_model'])
768
+ self.vr_other_secondary_model_var.set(loaded_setting['vr_other_secondary_model'])
769
+ self.vr_bass_secondary_model_var.set(loaded_setting['vr_bass_secondary_model'])
770
+ self.vr_drums_secondary_model_var.set(loaded_setting['vr_drums_secondary_model'])
771
+ self.vr_is_secondary_model_activate_var.set(loaded_setting['vr_is_secondary_model_activate'])
772
+ self.vr_voc_inst_secondary_model_scale_var.set(loaded_setting['vr_voc_inst_secondary_model_scale'])
773
+ self.vr_other_secondary_model_scale_var.set(loaded_setting['vr_other_secondary_model_scale'])
774
+ self.vr_bass_secondary_model_scale_var.set(loaded_setting['vr_bass_secondary_model_scale'])
775
+ self.vr_drums_secondary_model_scale_var.set(loaded_setting['vr_drums_secondary_model_scale'])
776
+
777
+ if not process_method or process_method == DEMUCS_ARCH_TYPE or is_ensemble:
778
+ self.demucs_model_var.set(loaded_setting['demucs_model'])
779
+ self.segment_var.set(loaded_setting['segment'])
780
+ self.overlap_var.set(loaded_setting['overlap'])
781
+ self.shifts_var.set(loaded_setting['shifts'])
782
+ self.chunks_demucs_var.set(loaded_setting['chunks_demucs'])
783
+ self.margin_demucs_var.set(loaded_setting['margin_demucs'])
784
+ self.is_chunk_demucs_var.set(loaded_setting['is_chunk_demucs'])
785
+ self.is_chunk_mdxnet_var.set(loaded_setting['is_chunk_mdxnet'])
786
+ self.is_primary_stem_only_Demucs_var.set(loaded_setting['is_primary_stem_only_Demucs'])
787
+ self.is_secondary_stem_only_Demucs_var.set(loaded_setting['is_secondary_stem_only_Demucs'])
788
+ self.is_split_mode_var.set(loaded_setting['is_split_mode'])
789
+ self.is_demucs_combine_stems_var.set(loaded_setting['is_demucs_combine_stems'])
790
+ self.demucs_voc_inst_secondary_model_var.set(loaded_setting['demucs_voc_inst_secondary_model'])
791
+ self.demucs_other_secondary_model_var.set(loaded_setting['demucs_other_secondary_model'])
792
+ self.demucs_bass_secondary_model_var.set(loaded_setting['demucs_bass_secondary_model'])
793
+ self.demucs_drums_secondary_model_var.set(loaded_setting['demucs_drums_secondary_model'])
794
+ self.demucs_is_secondary_model_activate_var.set(loaded_setting['demucs_is_secondary_model_activate'])
795
+ self.demucs_voc_inst_secondary_model_scale_var.set(loaded_setting['demucs_voc_inst_secondary_model_scale'])
796
+ self.demucs_other_secondary_model_scale_var.set(loaded_setting['demucs_other_secondary_model_scale'])
797
+ self.demucs_bass_secondary_model_scale_var.set(loaded_setting['demucs_bass_secondary_model_scale'])
798
+ self.demucs_drums_secondary_model_scale_var.set(loaded_setting['demucs_drums_secondary_model_scale'])
799
+ self.demucs_stems_var.set(loaded_setting['demucs_stems'])
800
+ # self.update_stem_checkbox_labels(self.demucs_stems_var.get(), demucs=True)
801
+ self.demucs_pre_proc_model_var.set(data['demucs_pre_proc_model'])
802
+ self.is_demucs_pre_proc_model_activate_var.set(data['is_demucs_pre_proc_model_activate'])
803
+ self.is_demucs_pre_proc_model_inst_mix_var.set(data['is_demucs_pre_proc_model_inst_mix'])
804
+
805
+ if not process_method or process_method == MDX_ARCH_TYPE or is_ensemble:
806
+ self.mdx_net_model_var.set(loaded_setting['mdx_net_model'])
807
+ self.chunks_var.set(loaded_setting['chunks'])
808
+ self.margin_var.set(loaded_setting['margin'])
809
+ self.compensate_var.set(loaded_setting['compensate'])
810
+ self.is_denoise_var.set(loaded_setting['is_denoise'])
811
+ self.is_invert_spec_var.set(loaded_setting['is_invert_spec'])
812
+ self.is_mixer_mode_var.set(loaded_setting['is_mixer_mode'])
813
+ self.mdx_batch_size_var.set(loaded_setting['mdx_batch_size'])
814
+ self.mdx_voc_inst_secondary_model_var.set(loaded_setting['mdx_voc_inst_secondary_model'])
815
+ self.mdx_other_secondary_model_var.set(loaded_setting['mdx_other_secondary_model'])
816
+ self.mdx_bass_secondary_model_var.set(loaded_setting['mdx_bass_secondary_model'])
817
+ self.mdx_drums_secondary_model_var.set(loaded_setting['mdx_drums_secondary_model'])
818
+ self.mdx_is_secondary_model_activate_var.set(loaded_setting['mdx_is_secondary_model_activate'])
819
+ self.mdx_voc_inst_secondary_model_scale_var.set(loaded_setting['mdx_voc_inst_secondary_model_scale'])
820
+ self.mdx_other_secondary_model_scale_var.set(loaded_setting['mdx_other_secondary_model_scale'])
821
+ self.mdx_bass_secondary_model_scale_var.set(loaded_setting['mdx_bass_secondary_model_scale'])
822
+ self.mdx_drums_secondary_model_scale_var.set(loaded_setting['mdx_drums_secondary_model_scale'])
823
+
824
+ if not process_method or is_ensemble:
825
+ self.is_save_all_outputs_ensemble_var.set(loaded_setting['is_save_all_outputs_ensemble'])
826
+ self.is_append_ensemble_name_var.set(loaded_setting['is_append_ensemble_name'])
827
+ self.chosen_audio_tool_var.set(loaded_setting['chosen_audio_tool'])
828
+ self.choose_algorithm_var.set(loaded_setting['choose_algorithm'])
829
+ self.time_stretch_rate_var.set(loaded_setting['time_stretch_rate'])
830
+ self.pitch_rate_var.set(loaded_setting['pitch_rate'])
831
+ self.is_primary_stem_only_var.set(loaded_setting['is_primary_stem_only'])
832
+ self.is_secondary_stem_only_var.set(loaded_setting['is_secondary_stem_only'])
833
+ self.is_testing_audio_var.set(loaded_setting['is_testing_audio'])
834
+ self.is_add_model_name_var.set(loaded_setting['is_add_model_name'])
835
+ self.is_accept_any_input_var.set(loaded_setting["is_accept_any_input"])
836
+ self.is_task_complete_var.set(loaded_setting['is_task_complete'])
837
+ self.is_create_model_folder_var.set(loaded_setting['is_create_model_folder'])
838
+ self.mp3_bit_set_var.set(loaded_setting['mp3_bit_set'])
839
+ self.save_format_var.set(loaded_setting['save_format'])
840
+ self.wav_type_set_var.set(loaded_setting['wav_type_set'])
841
+ self.user_code_var.set(loaded_setting['user_code'])
842
+
843
+ self.is_gpu_conversion_var.set(loaded_setting['is_gpu_conversion'])
844
+ self.is_normalization_var.set(loaded_setting['is_normalization'])
845
+ self.help_hints_var.set(loaded_setting['help_hints_var'])
846
+
847
+ self.model_sample_mode_var.set(loaded_setting['model_sample_mode'])
848
+ self.model_sample_mode_duration_var.set(loaded_setting['model_sample_mode_duration'])
849
+
850
+
851
+ root = FakeRoot()
852
+ root.load_saved_settings(DEFAULT_DATA)
__version__.py ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ VERSION = 'v5.5.1'
2
+ PATCH = 'UVR_Patch_3_31_23_5_5'
3
+ PATCH_MAC = 'UVR_Patch_01_10_12_6_50'
4
+ PATCH_LINUX = 'UVR_Patch_01_01_23_6_50'
app.py ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ import os
2
+
3
+ os.system("python webUI.py")
packages.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ git-lfs
2
+ aria2 -y
3
+ ffmpeg
requirements.txt ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ altgraph==0.17.3
2
+ audioread==3.0.0
3
+ certifi==2022.12.7
4
+ cffi==1.15.1
5
+ cryptography==3.4.6
6
+ diffq==0.2.3
7
+ Dora==0.0.3
8
+ einops==0.6.0
9
+ future==0.18.2
10
+ julius==0.2.7
11
+ kthread==0.2.3
12
+ librosa==0.9.2
13
+ llvmlite==0.39.1
14
+ natsort==8.2.0
15
+ numba==0.56.4
16
+ numpy==1.23.4
17
+ omegaconf==2.2.3
18
+ opencv-python==4.6.0.66
19
+ onnx
20
+ onnxruntime==1.13.1
21
+ Pillow==9.3.0
22
+ playsound==1.3.0
23
+ psutil==5.9.4
24
+ pydub==0.25.1
25
+ pyglet==1.5.23
26
+ pyperclip==1.8.2
27
+ pyrubberband==0.3.0
28
+ pytorch_lightning==2.0.0
29
+ PyYAML==6.0
30
+ resampy==0.2.2
31
+ scipy==1.9.3
32
+ soundfile==0.11.0
33
+ soundstretch==1.2
34
+ torch==1.13.1
35
+ tqdm
36
+ urllib3==1.26.12
37
+ wget==3.2
38
+ samplerate==0.1.0
39
+ screeninfo==0.8.1
40
+ PySoundFile==0.9.0.post1; sys_platform != 'windows'
41
+ SoundFile==0.9.0; sys_platform == 'windows'
42
+
43
+ gradio >= 3.19
separate.py ADDED
@@ -0,0 +1,942 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+ from typing import TYPE_CHECKING
3
+ from demucs.apply import apply_model, demucs_segments
4
+ from demucs.hdemucs import HDemucs
5
+ from demucs.model_v2 import auto_load_demucs_model_v2
6
+ from demucs.pretrained import get_model as _gm
7
+ from demucs.utils import apply_model_v1
8
+ from demucs.utils import apply_model_v2
9
+ from lib_v5 import spec_utils
10
+ from lib_v5.vr_network import nets
11
+ from lib_v5.vr_network import nets_new
12
+ #from lib_v5.vr_network.model_param_init import ModelParameters
13
+ from pathlib import Path
14
+ from gui_data.constants import *
15
+ from gui_data.error_handling import *
16
+ import audioread
17
+ import gzip
18
+ import librosa
19
+ import math
20
+ import numpy as np
21
+ import onnxruntime as ort
22
+ import os
23
+ import torch
24
+ import warnings
25
+ import pydub
26
+ import soundfile as sf
27
+ import traceback
28
+ import lib_v5.mdxnet as MdxnetSet
29
+
30
+ if TYPE_CHECKING:
31
+ from UVR import ModelData
32
+
33
+ warnings.filterwarnings("ignore")
34
+ cpu = torch.device('cpu')
35
+
36
+ class SeperateAttributes:
37
+ def __init__(self, model_data: ModelData, process_data: dict, main_model_primary_stem_4_stem=None, main_process_method=None):
38
+
39
+ self.list_all_models: list
40
+ self.process_data = process_data
41
+ self.progress_value = 0
42
+ self.set_progress_bar = process_data['set_progress_bar']
43
+ self.write_to_console = process_data['write_to_console']
44
+ self.audio_file = process_data['audio_file']
45
+ self.audio_file_base = process_data['audio_file_base']
46
+ self.export_path = process_data['export_path']
47
+ self.cached_source_callback = process_data['cached_source_callback']
48
+ self.cached_model_source_holder = process_data['cached_model_source_holder']
49
+ self.is_4_stem_ensemble = process_data['is_4_stem_ensemble']
50
+ self.list_all_models = process_data['list_all_models']
51
+ self.process_iteration = process_data['process_iteration']
52
+ self.mixer_path = model_data.mixer_path
53
+ self.model_samplerate = model_data.model_samplerate
54
+ self.model_capacity = model_data.model_capacity
55
+ self.is_vr_51_model = model_data.is_vr_51_model
56
+ self.is_pre_proc_model = model_data.is_pre_proc_model
57
+ self.is_secondary_model_activated = model_data.is_secondary_model_activated if not self.is_pre_proc_model else False
58
+ self.is_secondary_model = model_data.is_secondary_model if not self.is_pre_proc_model else True
59
+ self.process_method = model_data.process_method
60
+ self.model_path = model_data.model_path
61
+ self.model_name = model_data.model_name
62
+ self.model_basename = model_data.model_basename
63
+ self.wav_type_set = model_data.wav_type_set
64
+ self.mp3_bit_set = model_data.mp3_bit_set
65
+ self.save_format = model_data.save_format
66
+ self.is_gpu_conversion = model_data.is_gpu_conversion
67
+ self.is_normalization = model_data.is_normalization
68
+ self.is_primary_stem_only = model_data.is_primary_stem_only if not self.is_secondary_model else model_data.is_primary_model_primary_stem_only
69
+ self.is_secondary_stem_only = model_data.is_secondary_stem_only if not self.is_secondary_model else model_data.is_primary_model_secondary_stem_only
70
+ self.is_ensemble_mode = model_data.is_ensemble_mode
71
+ self.secondary_model = model_data.secondary_model #
72
+ self.primary_model_primary_stem = model_data.primary_model_primary_stem
73
+ self.primary_stem = model_data.primary_stem #
74
+ self.secondary_stem = model_data.secondary_stem #
75
+ self.is_invert_spec = model_data.is_invert_spec #
76
+ self.is_mixer_mode = model_data.is_mixer_mode #
77
+ self.secondary_model_scale = model_data.secondary_model_scale #
78
+ self.is_demucs_pre_proc_model_inst_mix = model_data.is_demucs_pre_proc_model_inst_mix #
79
+ self.primary_source_map = {}
80
+ self.secondary_source_map = {}
81
+ self.primary_source = None
82
+ self.secondary_source = None
83
+ self.secondary_source_primary = None
84
+ self.secondary_source_secondary = None
85
+
86
+ if not model_data.process_method == DEMUCS_ARCH_TYPE:
87
+ if process_data['is_ensemble_master'] and not self.is_4_stem_ensemble:
88
+ if not model_data.ensemble_primary_stem == self.primary_stem:
89
+ self.is_primary_stem_only, self.is_secondary_stem_only = self.is_secondary_stem_only, self.is_primary_stem_only
90
+
91
+ if self.is_secondary_model and not process_data['is_ensemble_master']:
92
+ if not self.primary_model_primary_stem == self.primary_stem and not main_model_primary_stem_4_stem:
93
+ self.is_primary_stem_only, self.is_secondary_stem_only = self.is_secondary_stem_only, self.is_primary_stem_only
94
+
95
+ if main_model_primary_stem_4_stem:
96
+ self.is_primary_stem_only = True if main_model_primary_stem_4_stem == self.primary_stem else False
97
+ self.is_secondary_stem_only = True if not main_model_primary_stem_4_stem == self.primary_stem else False
98
+
99
+ if self.is_pre_proc_model:
100
+ self.is_primary_stem_only = True if self.primary_stem == INST_STEM else False
101
+ self.is_secondary_stem_only = True if self.secondary_stem == INST_STEM else False
102
+
103
+ if model_data.process_method == MDX_ARCH_TYPE:
104
+ self.is_mdx_ckpt = model_data.is_mdx_ckpt
105
+ self.primary_model_name, self.primary_sources = self.cached_source_callback(MDX_ARCH_TYPE, model_name=self.model_basename)
106
+ self.is_denoise = model_data.is_denoise
107
+ self.mdx_batch_size = model_data.mdx_batch_size
108
+ self.compensate = model_data.compensate
109
+ self.dim_f, self.dim_t = model_data.mdx_dim_f_set, 2**model_data.mdx_dim_t_set
110
+ self.n_fft = model_data.mdx_n_fft_scale_set
111
+ self.chunks = model_data.chunks
112
+ self.margin = model_data.margin
113
+ self.adjust = 1
114
+ self.dim_c = 4
115
+ self.hop = 1024
116
+
117
+ if self.is_gpu_conversion >= 0 and torch.cuda.is_available():
118
+ self.device, self.run_type = torch.device('cuda:0'), ['CUDAExecutionProvider']
119
+ else:
120
+ self.device, self.run_type = torch.device('cpu'), ['CPUExecutionProvider']
121
+
122
+ if model_data.process_method == DEMUCS_ARCH_TYPE:
123
+ self.demucs_stems = model_data.demucs_stems if not main_process_method in [MDX_ARCH_TYPE, VR_ARCH_TYPE] else None
124
+ self.secondary_model_4_stem = model_data.secondary_model_4_stem
125
+ self.secondary_model_4_stem_scale = model_data.secondary_model_4_stem_scale
126
+ self.primary_stem = model_data.ensemble_primary_stem if process_data['is_ensemble_master'] else model_data.primary_stem
127
+ self.secondary_stem = model_data.ensemble_secondary_stem if process_data['is_ensemble_master'] else model_data.secondary_stem
128
+ self.is_chunk_demucs = model_data.is_chunk_demucs
129
+ self.segment = model_data.segment
130
+ self.demucs_version = model_data.demucs_version
131
+ self.demucs_source_list = model_data.demucs_source_list
132
+ self.demucs_source_map = model_data.demucs_source_map
133
+ self.is_demucs_combine_stems = model_data.is_demucs_combine_stems
134
+ self.demucs_stem_count = model_data.demucs_stem_count
135
+ self.pre_proc_model = model_data.pre_proc_model
136
+
137
+ if self.is_secondary_model and not process_data['is_ensemble_master']:
138
+ if not self.demucs_stem_count == 2 and model_data.primary_model_primary_stem == INST_STEM:
139
+ self.primary_stem = VOCAL_STEM
140
+ self.secondary_stem = INST_STEM
141
+ else:
142
+ self.primary_stem = model_data.primary_model_primary_stem
143
+ self.secondary_stem = STEM_PAIR_MAPPER[self.primary_stem]
144
+
145
+ if self.is_chunk_demucs:
146
+ self.chunks_demucs = model_data.chunks_demucs
147
+ self.margin_demucs = model_data.margin_demucs
148
+ else:
149
+ self.chunks_demucs = 0
150
+ self.margin_demucs = 44100
151
+
152
+ self.shifts = model_data.shifts
153
+ self.is_split_mode = model_data.is_split_mode if not self.demucs_version == DEMUCS_V4 else True
154
+ self.overlap = model_data.overlap
155
+ self.primary_model_name, self.primary_sources = self.cached_source_callback(DEMUCS_ARCH_TYPE, model_name=self.model_basename)
156
+
157
+ if model_data.process_method == VR_ARCH_TYPE:
158
+ self.primary_model_name, self.primary_sources = self.cached_source_callback(VR_ARCH_TYPE, model_name=self.model_basename)
159
+ self.mp = model_data.vr_model_param
160
+ self.high_end_process = model_data.is_high_end_process
161
+ self.is_tta = model_data.is_tta
162
+ self.is_post_process = model_data.is_post_process
163
+ self.is_gpu_conversion = model_data.is_gpu_conversion
164
+ self.batch_size = model_data.batch_size
165
+ self.window_size = model_data.window_size
166
+ self.input_high_end_h = None
167
+ self.post_process_threshold = model_data.post_process_threshold
168
+ self.aggressiveness = {'value': model_data.aggression_setting,
169
+ 'split_bin': self.mp.param['band'][1]['crop_stop'],
170
+ 'aggr_correction': self.mp.param.get('aggr_correction')}
171
+
172
+ def start_inference_console_write(self):
173
+
174
+ if self.is_secondary_model and not self.is_pre_proc_model:
175
+ self.write_to_console(INFERENCE_STEP_2_SEC(self.process_method, self.model_basename))
176
+
177
+ if self.is_pre_proc_model:
178
+ self.write_to_console(INFERENCE_STEP_2_PRE(self.process_method, self.model_basename))
179
+
180
+ def running_inference_console_write(self, is_no_write=False):
181
+
182
+ self.write_to_console(DONE, base_text='') if not is_no_write else None
183
+ self.set_progress_bar(0.05) if not is_no_write else None
184
+
185
+ if self.is_secondary_model and not self.is_pre_proc_model:
186
+ self.write_to_console(INFERENCE_STEP_1_SEC)
187
+ elif self.is_pre_proc_model:
188
+ self.write_to_console(INFERENCE_STEP_1_PRE)
189
+ else:
190
+ self.write_to_console(INFERENCE_STEP_1)
191
+
192
+ def running_inference_progress_bar(self, length, is_match_mix=False):
193
+ if not is_match_mix:
194
+ self.progress_value += 1
195
+
196
+ if (0.8/length*self.progress_value) >= 0.8:
197
+ length = self.progress_value + 1
198
+
199
+ self.set_progress_bar(0.1, (0.8/length*self.progress_value))
200
+
201
+ def load_cached_sources(self, is_4_stem_demucs=False):
202
+
203
+ if self.is_secondary_model and not self.is_pre_proc_model:
204
+ self.write_to_console(INFERENCE_STEP_2_SEC_CACHED_MODOEL(self.process_method, self.model_basename))
205
+ elif self.is_pre_proc_model:
206
+ self.write_to_console(INFERENCE_STEP_2_PRE_CACHED_MODOEL(self.process_method, self.model_basename))
207
+ else:
208
+ self.write_to_console(INFERENCE_STEP_2_PRIMARY_CACHED)
209
+
210
+ if not is_4_stem_demucs:
211
+ primary_stem, secondary_stem = gather_sources(self.primary_stem, self.secondary_stem, self.primary_sources)
212
+
213
+ return primary_stem, secondary_stem
214
+
215
+ def cache_source(self, secondary_sources):
216
+
217
+ model_occurrences = self.list_all_models.count(self.model_basename)
218
+
219
+ if not model_occurrences <= 1:
220
+ if self.process_method == MDX_ARCH_TYPE:
221
+ self.cached_model_source_holder(MDX_ARCH_TYPE, secondary_sources, self.model_basename)
222
+
223
+ if self.process_method == VR_ARCH_TYPE:
224
+ self.cached_model_source_holder(VR_ARCH_TYPE, secondary_sources, self.model_basename)
225
+
226
+ if self.process_method == DEMUCS_ARCH_TYPE:
227
+ self.cached_model_source_holder(DEMUCS_ARCH_TYPE, secondary_sources, self.model_basename)
228
+
229
+ def write_audio(self, stem_path, stem_source, samplerate, secondary_model_source=None, model_scale=None):
230
+
231
+ if not self.is_secondary_model:
232
+ if self.is_secondary_model_activated:
233
+ if isinstance(secondary_model_source, np.ndarray):
234
+ secondary_model_scale = model_scale if model_scale else self.secondary_model_scale
235
+ stem_source = spec_utils.average_dual_sources(stem_source, secondary_model_source, secondary_model_scale)
236
+
237
+ sf.write(stem_path, stem_source, samplerate, subtype=self.wav_type_set)
238
+ save_format(stem_path, self.save_format, self.mp3_bit_set) if not self.is_ensemble_mode else None
239
+
240
+ self.write_to_console(DONE, base_text='')
241
+ self.set_progress_bar(0.95)
242
+
243
+ def run_mixer(self, mix, sources):
244
+ try:
245
+ if self.is_mixer_mode and len(sources) == 4:
246
+ mixer = MdxnetSet.Mixer(self.device, self.mixer_path).eval()
247
+ with torch.no_grad():
248
+ mix = torch.tensor(mix, dtype=torch.float32)
249
+ sources_ = torch.tensor(sources).detach()
250
+ x = torch.cat([sources_, mix.unsqueeze(0)], 0)
251
+ sources_ = mixer(x)
252
+ final_source = np.array(sources_)
253
+ else:
254
+ final_source = sources
255
+ except Exception as e:
256
+ error_name = f'{type(e).__name__}'
257
+ traceback_text = ''.join(traceback.format_tb(e.__traceback__))
258
+ message = f'{error_name}: "{e}"\n{traceback_text}"'
259
+ print('Mixer Failed: ', message)
260
+ final_source = sources
261
+
262
+ return final_source
263
+
264
+ class SeperateMDX(SeperateAttributes):
265
+
266
+ def seperate(self):
267
+ samplerate = 44100
268
+
269
+ if self.primary_model_name == self.model_basename and self.primary_sources:
270
+ self.primary_source, self.secondary_source = self.load_cached_sources()
271
+ else:
272
+ self.start_inference_console_write()
273
+
274
+ if self.is_mdx_ckpt:
275
+ model_params = torch.load(self.model_path, map_location=lambda storage, loc: storage)['hyper_parameters']
276
+ self.dim_c, self.hop = model_params['dim_c'], model_params['hop_length']
277
+ separator = MdxnetSet.ConvTDFNet(**model_params)
278
+ self.model_run = separator.load_from_checkpoint(self.model_path).to(self.device).eval()
279
+ else:
280
+ ort_ = ort.InferenceSession(self.model_path, providers=self.run_type)
281
+ self.model_run = lambda spek:ort_.run(None, {'input': spek.cpu().numpy()})[0]
282
+
283
+ self.initialize_model_settings()
284
+ self.running_inference_console_write()
285
+ mdx_net_cut = True if self.primary_stem in MDX_NET_FREQ_CUT else False
286
+ mix, raw_mix, samplerate = prepare_mix(self.audio_file, self.chunks, self.margin, mdx_net_cut=mdx_net_cut)
287
+ source = self.demix_base(mix, is_ckpt=self.is_mdx_ckpt)[0]
288
+ self.write_to_console(DONE, base_text='')
289
+
290
+ if self.is_secondary_model_activated:
291
+ if self.secondary_model:
292
+ self.secondary_source_primary, self.secondary_source_secondary = process_secondary_model(self.secondary_model, self.process_data, main_process_method=self.process_method)
293
+
294
+ if not self.is_secondary_stem_only:
295
+ self.write_to_console(f'{SAVING_STEM[0]}{self.primary_stem}{SAVING_STEM[1]}') if not self.is_secondary_model else None
296
+ primary_stem_path = os.path.join(self.export_path, f'{self.audio_file_base}_({self.primary_stem}).wav')
297
+ if not isinstance(self.primary_source, np.ndarray):
298
+ self.primary_source = spec_utils.normalize(source, self.is_normalization).T
299
+ self.primary_source_map = {self.primary_stem: self.primary_source}
300
+ self.write_audio(primary_stem_path, self.primary_source, samplerate, self.secondary_source_primary)
301
+
302
+ if not self.is_primary_stem_only:
303
+ self.write_to_console(f'{SAVING_STEM[0]}{self.secondary_stem}{SAVING_STEM[1]}') if not self.is_secondary_model else None
304
+ secondary_stem_path = os.path.join(self.export_path, f'{self.audio_file_base}_({self.secondary_stem}).wav')
305
+ if not isinstance(self.secondary_source, np.ndarray):
306
+ raw_mix = self.demix_base(raw_mix, is_match_mix=True)[0] if mdx_net_cut else raw_mix
307
+ self.secondary_source, raw_mix = spec_utils.normalize_two_stem(source*self.compensate, raw_mix, self.is_normalization)
308
+
309
+ if self.is_invert_spec:
310
+ self.secondary_source = spec_utils.invert_stem(raw_mix, self.secondary_source)
311
+ else:
312
+ self.secondary_source = (-self.secondary_source.T+raw_mix.T)
313
+
314
+ self.secondary_source_map = {self.secondary_stem: self.secondary_source}
315
+ self.write_audio(secondary_stem_path, self.secondary_source, samplerate, self.secondary_source_secondary)
316
+
317
+ torch.cuda.empty_cache()
318
+ secondary_sources = {**self.primary_source_map, **self.secondary_source_map}
319
+
320
+ self.cache_source(secondary_sources)
321
+
322
+ if self.is_secondary_model:
323
+ return secondary_sources
324
+
325
+ def initialize_model_settings(self):
326
+ self.n_bins = self.n_fft//2+1
327
+ self.trim = self.n_fft//2
328
+ self.chunk_size = self.hop * (self.dim_t-1)
329
+ self.window = torch.hann_window(window_length=self.n_fft, periodic=False).to(self.device)
330
+ self.freq_pad = torch.zeros([1, self.dim_c, self.n_bins-self.dim_f, self.dim_t]).to(self.device)
331
+ self.gen_size = self.chunk_size-2*self.trim
332
+
333
+ def initialize_mix(self, mix, is_ckpt=False):
334
+ if is_ckpt:
335
+ pad = self.gen_size + self.trim - ((mix.shape[-1]) % self.gen_size)
336
+ mixture = np.concatenate((np.zeros((2, self.trim), dtype='float32'),mix, np.zeros((2, pad), dtype='float32')), 1)
337
+ num_chunks = mixture.shape[-1] // self.gen_size
338
+ mix_waves = [mixture[:, i * self.gen_size: i * self.gen_size + self.chunk_size] for i in range(num_chunks)]
339
+ else:
340
+ mix_waves = []
341
+ n_sample = mix.shape[1]
342
+ pad = self.gen_size - n_sample%self.gen_size
343
+ mix_p = np.concatenate((np.zeros((2,self.trim)), mix, np.zeros((2,pad)), np.zeros((2,self.trim))), 1)
344
+ i = 0
345
+ while i < n_sample + pad:
346
+ waves = np.array(mix_p[:, i:i+self.chunk_size])
347
+ mix_waves.append(waves)
348
+ i += self.gen_size
349
+
350
+ mix_waves = torch.tensor(mix_waves, dtype=torch.float32).to(self.device)
351
+
352
+ return mix_waves, pad
353
+
354
+ def demix_base(self, mix, is_ckpt=False, is_match_mix=False):
355
+ chunked_sources = []
356
+ for slice in mix:
357
+ sources = []
358
+ tar_waves_ = []
359
+ mix_p = mix[slice]
360
+ mix_waves, pad = self.initialize_mix(mix_p, is_ckpt=is_ckpt)
361
+ mix_waves = mix_waves.split(self.mdx_batch_size)
362
+ pad = mix_p.shape[-1] if is_ckpt else -pad
363
+ with torch.no_grad():
364
+ for mix_wave in mix_waves:
365
+ self.running_inference_progress_bar(len(mix)*len(mix_waves), is_match_mix=is_match_mix)
366
+ tar_waves = self.run_model(mix_wave, is_ckpt=is_ckpt, is_match_mix=is_match_mix)
367
+ tar_waves_.append(tar_waves)
368
+ tar_waves_ = np.vstack(tar_waves_)[:, :, self.trim:-self.trim] if is_ckpt else tar_waves_
369
+ tar_waves = np.concatenate(tar_waves_, axis=-1)[:, :pad]
370
+ start = 0 if slice == 0 else self.margin
371
+ end = None if slice == list(mix.keys())[::-1][0] or self.margin == 0 else -self.margin
372
+ sources.append(tar_waves[:,start:end]*(1/self.adjust))
373
+ chunked_sources.append(sources)
374
+ sources = np.concatenate(chunked_sources, axis=-1)
375
+
376
+ return sources
377
+
378
+ def run_model(self, mix, is_ckpt=False, is_match_mix=False):
379
+
380
+ spek = self.stft(mix.to(self.device))*self.adjust
381
+ spek[:, :, :3, :] *= 0
382
+
383
+ if is_match_mix:
384
+ spec_pred = spek.cpu().numpy()
385
+ else:
386
+ spec_pred = -self.model_run(-spek)*0.5+self.model_run(spek)*0.5 if self.is_denoise else self.model_run(spek)
387
+
388
+ if is_ckpt:
389
+ return self.istft(spec_pred).cpu().detach().numpy()
390
+ else:
391
+ return self.istft(torch.tensor(spec_pred).to(self.device)).to(cpu)[:,:,self.trim:-self.trim].transpose(0,1).reshape(2, -1).numpy()
392
+
393
+ def stft(self, x):
394
+ x = x.reshape([-1, self.chunk_size])
395
+ x = torch.stft(x, n_fft=self.n_fft, hop_length=self.hop, window=self.window, center=True,return_complex=True)
396
+ x=torch.view_as_real(x)
397
+ x = x.permute([0,3,1,2])
398
+ x = x.reshape([-1,2,2,self.n_bins,self.dim_t]).reshape([-1,self.dim_c,self.n_bins,self.dim_t])
399
+ return x[:,:,:self.dim_f]
400
+
401
+ def istft(self, x, freq_pad=None):
402
+ freq_pad = self.freq_pad.repeat([x.shape[0],1,1,1]) if freq_pad is None else freq_pad
403
+ x = torch.cat([x, freq_pad], -2)
404
+ x = x.reshape([-1,2,2,self.n_bins,self.dim_t]).reshape([-1,2,self.n_bins,self.dim_t])
405
+ x = x.permute([0,2,3,1])
406
+ x=x.contiguous()
407
+ x=torch.view_as_complex(x)
408
+ x = torch.istft(x, n_fft=self.n_fft, hop_length=self.hop, window=self.window, center=True)
409
+ return x.reshape([-1,2,self.chunk_size])
410
+
411
+ class SeperateDemucs(SeperateAttributes):
412
+
413
+ def seperate(self):
414
+
415
+ samplerate = 44100
416
+ source = None
417
+ model_scale = None
418
+ stem_source = None
419
+ stem_source_secondary = None
420
+ inst_mix = None
421
+ inst_raw_mix = None
422
+ raw_mix = None
423
+ inst_source = None
424
+ is_no_write = False
425
+ is_no_piano_guitar = False
426
+
427
+ if self.primary_model_name == self.model_basename and type(self.primary_sources) is dict and not self.pre_proc_model:
428
+ self.primary_source, self.secondary_source = self.load_cached_sources()
429
+ elif self.primary_model_name == self.model_basename and isinstance(self.primary_sources, np.ndarray) and not self.pre_proc_model:
430
+ source = self.primary_sources
431
+ self.load_cached_sources(is_4_stem_demucs=True)
432
+ else:
433
+ self.start_inference_console_write()
434
+
435
+ if self.is_gpu_conversion >= 0:
436
+ self.device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
437
+ else:
438
+ self.device = torch.device('cpu')
439
+
440
+ if self.demucs_version == DEMUCS_V1:
441
+ if str(self.model_path).endswith(".gz"):
442
+ self.model_path = gzip.open(self.model_path, "rb")
443
+ klass, args, kwargs, state = torch.load(self.model_path)
444
+ self.demucs = klass(*args, **kwargs)
445
+ self.demucs.to(self.device)
446
+ self.demucs.load_state_dict(state)
447
+ elif self.demucs_version == DEMUCS_V2:
448
+ self.demucs = auto_load_demucs_model_v2(self.demucs_source_list, self.model_path)
449
+ self.demucs.to(self.device)
450
+ self.demucs.load_state_dict(torch.load(self.model_path))
451
+ self.demucs.eval()
452
+ else:
453
+ self.demucs = HDemucs(sources=self.demucs_source_list)
454
+ self.demucs = _gm(name=os.path.splitext(os.path.basename(self.model_path))[0],
455
+ repo=Path(os.path.dirname(self.model_path)))
456
+ self.demucs = demucs_segments(self.segment, self.demucs)
457
+ self.demucs.to(self.device)
458
+ self.demucs.eval()
459
+
460
+ if self.pre_proc_model:
461
+ if self.primary_stem not in [VOCAL_STEM, INST_STEM]:
462
+ is_no_write = True
463
+ self.write_to_console(DONE, base_text='')
464
+ mix_no_voc = process_secondary_model(self.pre_proc_model, self.process_data, is_pre_proc_model=True)
465
+ inst_mix, inst_raw_mix, inst_samplerate = prepare_mix(mix_no_voc[INST_STEM], self.chunks_demucs, self.margin_demucs)
466
+ self.process_iteration()
467
+ self.running_inference_console_write(is_no_write=is_no_write)
468
+ inst_source = self.demix_demucs(inst_mix)
469
+ inst_source = self.run_mixer(inst_raw_mix, inst_source)
470
+ self.process_iteration()
471
+
472
+ self.running_inference_console_write(is_no_write=is_no_write) if not self.pre_proc_model else None
473
+ mix, raw_mix, samplerate = prepare_mix(self.audio_file, self.chunks_demucs, self.margin_demucs)
474
+
475
+ if self.primary_model_name == self.model_basename and isinstance(self.primary_sources, np.ndarray) and self.pre_proc_model:
476
+ source = self.primary_sources
477
+ else:
478
+ source = self.demix_demucs(mix)
479
+ source = self.run_mixer(raw_mix, source)
480
+
481
+ self.write_to_console(DONE, base_text='')
482
+
483
+ del self.demucs
484
+ torch.cuda.empty_cache()
485
+
486
+ if isinstance(inst_source, np.ndarray):
487
+ source_reshape = spec_utils.reshape_sources(inst_source[self.demucs_source_map[VOCAL_STEM]], source[self.demucs_source_map[VOCAL_STEM]])
488
+ inst_source[self.demucs_source_map[VOCAL_STEM]] = source_reshape
489
+ source = inst_source
490
+
491
+ if isinstance(source, np.ndarray):
492
+ if len(source) == 2:
493
+ self.demucs_source_map = DEMUCS_2_SOURCE_MAPPER
494
+ else:
495
+ self.demucs_source_map = DEMUCS_6_SOURCE_MAPPER if len(source) == 6 else DEMUCS_4_SOURCE_MAPPER
496
+
497
+ if len(source) == 6 and self.process_data['is_ensemble_master'] or len(source) == 6 and self.is_secondary_model:
498
+ is_no_piano_guitar = True
499
+ six_stem_other_source = list(source)
500
+ six_stem_other_source = [i for n, i in enumerate(source) if n in [self.demucs_source_map[OTHER_STEM], self.demucs_source_map[GUITAR_STEM], self.demucs_source_map[PIANO_STEM]]]
501
+ other_source = np.zeros_like(six_stem_other_source[0])
502
+ for i in six_stem_other_source:
503
+ other_source += i
504
+ source_reshape = spec_utils.reshape_sources(source[self.demucs_source_map[OTHER_STEM]], other_source)
505
+ source[self.demucs_source_map[OTHER_STEM]] = source_reshape
506
+
507
+ if (self.demucs_stems == ALL_STEMS and not self.process_data['is_ensemble_master']) or self.is_4_stem_ensemble:
508
+ self.cache_source(source)
509
+
510
+ for stem_name, stem_value in self.demucs_source_map.items():
511
+ if self.is_secondary_model_activated and not self.is_secondary_model and not stem_value >= 4:
512
+ if self.secondary_model_4_stem[stem_value]:
513
+ model_scale = self.secondary_model_4_stem_scale[stem_value]
514
+ stem_source_secondary = process_secondary_model(self.secondary_model_4_stem[stem_value], self.process_data, main_model_primary_stem_4_stem=stem_name, is_4_stem_demucs=True)
515
+ if isinstance(stem_source_secondary, np.ndarray):
516
+ stem_source_secondary = stem_source_secondary[1 if self.secondary_model_4_stem[stem_value].demucs_stem_count == 2 else stem_value]
517
+ stem_source_secondary = spec_utils.normalize(stem_source_secondary, self.is_normalization).T
518
+ elif type(stem_source_secondary) is dict:
519
+ stem_source_secondary = stem_source_secondary[stem_name]
520
+
521
+ stem_source_secondary = None if stem_value >= 4 else stem_source_secondary
522
+ self.write_to_console(f'{SAVING_STEM[0]}{stem_name}{SAVING_STEM[1]}') if not self.is_secondary_model else None
523
+ stem_path = os.path.join(self.export_path, f'{self.audio_file_base}_({stem_name}).wav')
524
+ stem_source = spec_utils.normalize(source[stem_value], self.is_normalization).T
525
+ self.write_audio(stem_path, stem_source, samplerate, secondary_model_source=stem_source_secondary, model_scale=model_scale)
526
+
527
+ if self.is_secondary_model:
528
+ return source
529
+ else:
530
+ if self.is_secondary_model_activated:
531
+ if self.secondary_model:
532
+ self.secondary_source_primary, self.secondary_source_secondary = process_secondary_model(self.secondary_model, self.process_data, main_process_method=self.process_method)
533
+
534
+ if not self.is_secondary_stem_only:
535
+ self.write_to_console(f'{SAVING_STEM[0]}{self.primary_stem}{SAVING_STEM[1]}') if not self.is_secondary_model else None
536
+ primary_stem_path = os.path.join(self.export_path, f'{self.audio_file_base}_({self.primary_stem}).wav')
537
+ if not isinstance(self.primary_source, np.ndarray):
538
+ self.primary_source = spec_utils.normalize(source[self.demucs_source_map[self.primary_stem]], self.is_normalization).T
539
+ self.primary_source_map = {self.primary_stem: self.primary_source}
540
+ self.write_audio(primary_stem_path, self.primary_source, samplerate, self.secondary_source_primary)
541
+
542
+ if not self.is_primary_stem_only:
543
+ def secondary_save(sec_stem_name, source, raw_mixture=None, is_inst_mixture=False):
544
+ secondary_source = self.secondary_source if not is_inst_mixture else None
545
+ self.write_to_console(f'{SAVING_STEM[0]}{sec_stem_name}{SAVING_STEM[1]}') if not self.is_secondary_model else None
546
+ secondary_stem_path = os.path.join(self.export_path, f'{self.audio_file_base}_({sec_stem_name}).wav')
547
+ secondary_source_secondary = None
548
+
549
+ if not isinstance(secondary_source, np.ndarray):
550
+ if self.is_demucs_combine_stems:
551
+ source = list(source)
552
+ if is_inst_mixture:
553
+ source = [i for n, i in enumerate(source) if not n in [self.demucs_source_map[self.primary_stem], self.demucs_source_map[VOCAL_STEM]]]
554
+ else:
555
+ source.pop(self.demucs_source_map[self.primary_stem])
556
+
557
+ source = source[:len(source) - 2] if is_no_piano_guitar else source
558
+ secondary_source = np.zeros_like(source[0])
559
+ for i in source:
560
+ secondary_source += i
561
+ secondary_source = spec_utils.normalize(secondary_source, self.is_normalization).T
562
+ else:
563
+ if not isinstance(raw_mixture, np.ndarray):
564
+ raw_mixture = prepare_mix(self.audio_file, self.chunks_demucs, self.margin_demucs, is_missing_mix=True)
565
+
566
+ secondary_source, raw_mixture = spec_utils.normalize_two_stem(source[self.demucs_source_map[self.primary_stem]], raw_mixture, self.is_normalization)
567
+
568
+ if self.is_invert_spec:
569
+ secondary_source = spec_utils.invert_stem(raw_mixture, secondary_source)
570
+ else:
571
+ raw_mixture = spec_utils.reshape_sources(secondary_source, raw_mixture)
572
+ secondary_source = (-secondary_source.T+raw_mixture.T)
573
+
574
+ if not is_inst_mixture:
575
+ self.secondary_source = secondary_source
576
+ secondary_source_secondary = self.secondary_source_secondary
577
+ self.secondary_source_map = {self.secondary_stem: self.secondary_source}
578
+
579
+ self.write_audio(secondary_stem_path, secondary_source, samplerate, secondary_source_secondary)
580
+
581
+ secondary_save(self.secondary_stem, source, raw_mixture=raw_mix)
582
+
583
+ if self.is_demucs_pre_proc_model_inst_mix and self.pre_proc_model and not self.is_4_stem_ensemble:
584
+ secondary_save(f"{self.secondary_stem} {INST_STEM}", source, raw_mixture=inst_raw_mix, is_inst_mixture=True)
585
+
586
+ secondary_sources = {**self.primary_source_map, **self.secondary_source_map}
587
+
588
+ self.cache_source(secondary_sources)
589
+
590
+ if self.is_secondary_model:
591
+ return secondary_sources
592
+
593
+ def demix_demucs(self, mix):
594
+ processed = {}
595
+
596
+ set_progress_bar = None if self.is_chunk_demucs else self.set_progress_bar
597
+
598
+ for nmix in mix:
599
+ self.progress_value += 1
600
+ self.set_progress_bar(0.1, (0.8/len(mix)*self.progress_value)) if self.is_chunk_demucs else None
601
+ cmix = mix[nmix]
602
+ cmix = torch.tensor(cmix, dtype=torch.float32)
603
+ ref = cmix.mean(0)
604
+ cmix = (cmix - ref.mean()) / ref.std()
605
+ mix_infer = cmix
606
+
607
+ with torch.no_grad():
608
+ if self.demucs_version == DEMUCS_V1:
609
+ sources = apply_model_v1(self.demucs,
610
+ mix_infer.to(self.device),
611
+ self.shifts,
612
+ self.is_split_mode,
613
+ set_progress_bar=set_progress_bar)
614
+ elif self.demucs_version == DEMUCS_V2:
615
+ sources = apply_model_v2(self.demucs,
616
+ mix_infer.to(self.device),
617
+ self.shifts,
618
+ self.is_split_mode,
619
+ self.overlap,
620
+ set_progress_bar=set_progress_bar)
621
+ else:
622
+ sources = apply_model(self.demucs,
623
+ mix_infer[None],
624
+ self.shifts,
625
+ self.is_split_mode,
626
+ self.overlap,
627
+ static_shifts=1 if self.shifts == 0 else self.shifts,
628
+ set_progress_bar=set_progress_bar,
629
+ device=self.device)[0]
630
+
631
+ sources = (sources * ref.std() + ref.mean()).cpu().numpy()
632
+ sources[[0,1]] = sources[[1,0]]
633
+ start = 0 if nmix == 0 else self.margin_demucs
634
+ end = None if nmix == list(mix.keys())[::-1][0] else -self.margin_demucs
635
+ if self.margin_demucs == 0:
636
+ end = None
637
+ processed[nmix] = sources[:,:,start:end].copy()
638
+ sources = list(processed.values())
639
+ sources = np.concatenate(sources, axis=-1)
640
+
641
+ return sources
642
+
643
+ class SeperateVR(SeperateAttributes):
644
+
645
+ def seperate(self):
646
+ if self.primary_model_name == self.model_basename and self.primary_sources:
647
+ self.primary_source, self.secondary_source = self.load_cached_sources()
648
+ else:
649
+ self.start_inference_console_write()
650
+ if self.is_gpu_conversion >= 0:
651
+ if OPERATING_SYSTEM == 'Darwin':
652
+ device = torch.device('mps' if torch.backends.mps.is_available() else 'cpu')
653
+ else:
654
+ device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
655
+ else:
656
+ device = torch.device('cpu')
657
+
658
+ nn_arch_sizes = [
659
+ 31191, # default
660
+ 33966, 56817, 123821, 123812, 129605, 218409, 537238, 537227]
661
+ vr_5_1_models = [56817, 218409]
662
+ model_size = math.ceil(os.stat(self.model_path).st_size / 1024)
663
+ nn_arch_size = min(nn_arch_sizes, key=lambda x:abs(x-model_size))
664
+
665
+ if nn_arch_size in vr_5_1_models or self.is_vr_51_model:
666
+ self.model_run = nets_new.CascadedNet(self.mp.param['bins'] * 2, nn_arch_size, nout=self.model_capacity[0], nout_lstm=self.model_capacity[1])
667
+ else:
668
+ self.model_run = nets.determine_model_capacity(self.mp.param['bins'] * 2, nn_arch_size)
669
+
670
+ self.model_run.load_state_dict(torch.load(self.model_path, map_location=cpu))
671
+ self.model_run.to(device)
672
+
673
+ self.running_inference_console_write()
674
+
675
+ y_spec, v_spec = self.inference_vr(self.loading_mix(), device, self.aggressiveness)
676
+ self.write_to_console(DONE, base_text='')
677
+
678
+ if self.is_secondary_model_activated:
679
+ if self.secondary_model:
680
+ self.secondary_source_primary, self.secondary_source_secondary = process_secondary_model(self.secondary_model, self.process_data, main_process_method=self.process_method)
681
+
682
+ if not self.is_secondary_stem_only:
683
+ self.write_to_console(f'{SAVING_STEM[0]}{self.primary_stem}{SAVING_STEM[1]}') if not self.is_secondary_model else None
684
+ primary_stem_path = os.path.join(self.export_path, f'{self.audio_file_base}_({self.primary_stem}).wav')
685
+ if not isinstance(self.primary_source, np.ndarray):
686
+ self.primary_source = spec_utils.normalize(self.spec_to_wav(y_spec), self.is_normalization).T
687
+ if not self.model_samplerate == 44100:
688
+ self.primary_source = librosa.resample(self.primary_source.T, orig_sr=self.model_samplerate, target_sr=44100).T
689
+
690
+ self.primary_source_map = {self.primary_stem: self.primary_source}
691
+
692
+ self.write_audio(primary_stem_path, self.primary_source, 44100, self.secondary_source_primary)
693
+
694
+ if not self.is_primary_stem_only:
695
+ self.write_to_console(f'{SAVING_STEM[0]}{self.secondary_stem}{SAVING_STEM[1]}') if not self.is_secondary_model else None
696
+ secondary_stem_path = os.path.join(self.export_path, f'{self.audio_file_base}_({self.secondary_stem}).wav')
697
+ if not isinstance(self.secondary_source, np.ndarray):
698
+ self.secondary_source = self.spec_to_wav(v_spec)
699
+ self.secondary_source = spec_utils.normalize(self.spec_to_wav(v_spec), self.is_normalization).T
700
+ if not self.model_samplerate == 44100:
701
+ self.secondary_source = librosa.resample(self.secondary_source.T, orig_sr=self.model_samplerate, target_sr=44100).T
702
+
703
+ self.secondary_source_map = {self.secondary_stem: self.secondary_source}
704
+
705
+ self.write_audio(secondary_stem_path, self.secondary_source, 44100, self.secondary_source_secondary)
706
+
707
+ torch.cuda.empty_cache()
708
+ secondary_sources = {**self.primary_source_map, **self.secondary_source_map}
709
+ self.cache_source(secondary_sources)
710
+
711
+ if self.is_secondary_model:
712
+ return secondary_sources
713
+
714
+ def loading_mix(self):
715
+
716
+ X_wave, X_spec_s = {}, {}
717
+
718
+ bands_n = len(self.mp.param['band'])
719
+
720
+ for d in range(bands_n, 0, -1):
721
+ bp = self.mp.param['band'][d]
722
+
723
+ if OPERATING_SYSTEM == 'Darwin':
724
+ wav_resolution = 'polyphase' if SYSTEM_PROC == ARM or ARM in SYSTEM_ARCH else bp['res_type']
725
+ else:
726
+ wav_resolution = bp['res_type']
727
+
728
+ if d == bands_n: # high-end band
729
+ X_wave[d], _ = librosa.load(self.audio_file, bp['sr'], False, dtype=np.float32, res_type=wav_resolution)
730
+
731
+ if not np.any(X_wave[d]) and self.audio_file.endswith('.mp3'):
732
+ X_wave[d] = rerun_mp3(self.audio_file, bp['sr'])
733
+
734
+ if X_wave[d].ndim == 1:
735
+ X_wave[d] = np.asarray([X_wave[d], X_wave[d]])
736
+ else: # lower bands
737
+ X_wave[d] = librosa.resample(X_wave[d+1], self.mp.param['band'][d+1]['sr'], bp['sr'], res_type=wav_resolution)
738
+
739
+ X_spec_s[d] = spec_utils.wave_to_spectrogram_mt(X_wave[d], bp['hl'], bp['n_fft'], self.mp.param['mid_side'],
740
+ self.mp.param['mid_side_b2'], self.mp.param['reverse'])
741
+
742
+ if d == bands_n and self.high_end_process != 'none':
743
+ self.input_high_end_h = (bp['n_fft']//2 - bp['crop_stop']) + (self.mp.param['pre_filter_stop'] - self.mp.param['pre_filter_start'])
744
+ self.input_high_end = X_spec_s[d][:, bp['n_fft']//2-self.input_high_end_h:bp['n_fft']//2, :]
745
+
746
+ X_spec = spec_utils.combine_spectrograms(X_spec_s, self.mp)
747
+
748
+ del X_wave, X_spec_s
749
+
750
+ return X_spec
751
+
752
+ def inference_vr(self, X_spec, device, aggressiveness):
753
+ def _execute(X_mag_pad, roi_size):
754
+ X_dataset = []
755
+ patches = (X_mag_pad.shape[2] - 2 * self.model_run.offset) // roi_size
756
+ total_iterations = patches//self.batch_size if not self.is_tta else (patches//self.batch_size)*2
757
+ for i in range(patches):
758
+ start = i * roi_size
759
+ X_mag_window = X_mag_pad[:, :, start:start + self.window_size]
760
+ X_dataset.append(X_mag_window)
761
+
762
+ X_dataset = np.asarray(X_dataset)
763
+ self.model_run.eval()
764
+ with torch.no_grad():
765
+ mask = []
766
+ for i in range(0, patches, self.batch_size):
767
+ self.progress_value += 1
768
+ if self.progress_value >= total_iterations:
769
+ self.progress_value = total_iterations
770
+ self.set_progress_bar(0.1, 0.8/total_iterations*self.progress_value)
771
+ X_batch = X_dataset[i: i + self.batch_size]
772
+ X_batch = torch.from_numpy(X_batch).to(device)
773
+ pred = self.model_run.predict_mask(X_batch)
774
+ if not pred.size()[3] > 0:
775
+ raise Exception(ERROR_MAPPER[WINDOW_SIZE_ERROR])
776
+ pred = pred.detach().cpu().numpy()
777
+ pred = np.concatenate(pred, axis=2)
778
+ mask.append(pred)
779
+ if len(mask) == 0:
780
+ raise Exception(ERROR_MAPPER[WINDOW_SIZE_ERROR])
781
+
782
+ mask = np.concatenate(mask, axis=2)
783
+ return mask
784
+
785
+ def postprocess(mask, X_mag, X_phase):
786
+
787
+ is_non_accom_stem = False
788
+ for stem in NON_ACCOM_STEMS:
789
+ if stem == self.primary_stem:
790
+ is_non_accom_stem = True
791
+
792
+ mask = spec_utils.adjust_aggr(mask, is_non_accom_stem, aggressiveness)
793
+
794
+ if self.is_post_process:
795
+ mask = spec_utils.merge_artifacts(mask, thres=self.post_process_threshold)
796
+
797
+ y_spec = mask * X_mag * np.exp(1.j * X_phase)
798
+ v_spec = (1 - mask) * X_mag * np.exp(1.j * X_phase)
799
+
800
+ return y_spec, v_spec
801
+ X_mag, X_phase = spec_utils.preprocess(X_spec)
802
+ n_frame = X_mag.shape[2]
803
+ pad_l, pad_r, roi_size = spec_utils.make_padding(n_frame, self.window_size, self.model_run.offset)
804
+ X_mag_pad = np.pad(X_mag, ((0, 0), (0, 0), (pad_l, pad_r)), mode='constant')
805
+ X_mag_pad /= X_mag_pad.max()
806
+ mask = _execute(X_mag_pad, roi_size)
807
+
808
+ if self.is_tta:
809
+ pad_l += roi_size // 2
810
+ pad_r += roi_size // 2
811
+ X_mag_pad = np.pad(X_mag, ((0, 0), (0, 0), (pad_l, pad_r)), mode='constant')
812
+ X_mag_pad /= X_mag_pad.max()
813
+ mask_tta = _execute(X_mag_pad, roi_size)
814
+ mask_tta = mask_tta[:, :, roi_size // 2:]
815
+ mask = (mask[:, :, :n_frame] + mask_tta[:, :, :n_frame]) * 0.5
816
+ else:
817
+ mask = mask[:, :, :n_frame]
818
+
819
+ y_spec, v_spec = postprocess(mask, X_mag, X_phase)
820
+
821
+ return y_spec, v_spec
822
+
823
+ def spec_to_wav(self, spec):
824
+
825
+ if self.high_end_process.startswith('mirroring'):
826
+ input_high_end_ = spec_utils.mirroring(self.high_end_process, spec, self.input_high_end, self.mp)
827
+ wav = spec_utils.cmb_spectrogram_to_wave(spec, self.mp, self.input_high_end_h, input_high_end_)
828
+ else:
829
+ wav = spec_utils.cmb_spectrogram_to_wave(spec, self.mp)
830
+
831
+ return wav
832
+
833
+ def process_secondary_model(secondary_model: ModelData, process_data, main_model_primary_stem_4_stem=None, is_4_stem_demucs=False, main_process_method=None, is_pre_proc_model=False):
834
+
835
+ if not is_pre_proc_model:
836
+ process_iteration = process_data['process_iteration']
837
+ process_iteration()
838
+
839
+ if secondary_model.process_method == VR_ARCH_TYPE:
840
+ seperator = SeperateVR(secondary_model, process_data, main_model_primary_stem_4_stem=main_model_primary_stem_4_stem, main_process_method=main_process_method)
841
+ if secondary_model.process_method == MDX_ARCH_TYPE:
842
+ seperator = SeperateMDX(secondary_model, process_data, main_model_primary_stem_4_stem=main_model_primary_stem_4_stem, main_process_method=main_process_method)
843
+ if secondary_model.process_method == DEMUCS_ARCH_TYPE:
844
+ seperator = SeperateDemucs(secondary_model, process_data, main_model_primary_stem_4_stem=main_model_primary_stem_4_stem, main_process_method=main_process_method)
845
+
846
+ secondary_sources = seperator.seperate()
847
+
848
+ if type(secondary_sources) is dict and not is_4_stem_demucs and not is_pre_proc_model:
849
+ return gather_sources(secondary_model.primary_model_primary_stem, STEM_PAIR_MAPPER[secondary_model.primary_model_primary_stem], secondary_sources)
850
+ else:
851
+ return secondary_sources
852
+
853
+ def gather_sources(primary_stem_name, secondary_stem_name, secondary_sources: dict):
854
+
855
+ source_primary = False
856
+ source_secondary = False
857
+
858
+ for key, value in secondary_sources.items():
859
+ if key in primary_stem_name:
860
+ source_primary = value
861
+ if key in secondary_stem_name:
862
+ source_secondary = value
863
+
864
+ return source_primary, source_secondary
865
+
866
+ def prepare_mix(mix, chunk_set, margin_set, mdx_net_cut=False, is_missing_mix=False):
867
+
868
+ audio_path = mix
869
+ samplerate = 44100
870
+
871
+ if not isinstance(mix, np.ndarray):
872
+ mix, samplerate = librosa.load(mix, mono=False, sr=44100)
873
+ else:
874
+ mix = mix.T
875
+
876
+ if not np.any(mix) and audio_path.endswith('.mp3'):
877
+ mix = rerun_mp3(audio_path)
878
+
879
+ if mix.ndim == 1:
880
+ mix = np.asfortranarray([mix,mix])
881
+
882
+ def get_segmented_mix(chunk_set=chunk_set):
883
+ segmented_mix = {}
884
+
885
+ samples = mix.shape[-1]
886
+ margin = margin_set
887
+ chunk_size = chunk_set*44100
888
+ assert not margin == 0, 'margin cannot be zero!'
889
+
890
+ if margin > chunk_size:
891
+ margin = chunk_size
892
+ if chunk_set == 0 or samples < chunk_size:
893
+ chunk_size = samples
894
+
895
+ counter = -1
896
+ for skip in range(0, samples, chunk_size):
897
+ counter+=1
898
+ s_margin = 0 if counter == 0 else margin
899
+ end = min(skip+chunk_size+margin, samples)
900
+ start = skip-s_margin
901
+ segmented_mix[skip] = mix[:,start:end].copy()
902
+ if end == samples:
903
+ break
904
+
905
+ return segmented_mix
906
+
907
+ if is_missing_mix:
908
+ return mix
909
+ else:
910
+ segmented_mix = get_segmented_mix()
911
+ raw_mix = get_segmented_mix(chunk_set=0) if mdx_net_cut else mix
912
+ return segmented_mix, raw_mix, samplerate
913
+
914
+ def rerun_mp3(audio_file, sample_rate=44100):
915
+
916
+ with audioread.audio_open(audio_file) as f:
917
+ track_length = int(f.duration)
918
+
919
+ return librosa.load(audio_file, duration=track_length, mono=False, sr=sample_rate)[0]
920
+
921
+ def save_format(audio_path, save_format, mp3_bit_set):
922
+
923
+ if not save_format == WAV:
924
+
925
+ if OPERATING_SYSTEM == 'Darwin':
926
+ FFMPEG_PATH = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'ffmpeg')
927
+ pydub.AudioSegment.converter = FFMPEG_PATH
928
+
929
+ musfile = pydub.AudioSegment.from_wav(audio_path)
930
+
931
+ if save_format == FLAC:
932
+ audio_path_flac = audio_path.replace(".wav", ".flac")
933
+ musfile.export(audio_path_flac, format="flac")
934
+
935
+ if save_format == MP3:
936
+ audio_path_mp3 = audio_path.replace(".wav", ".mp3")
937
+ musfile.export(audio_path_mp3, format="mp3", bitrate=mp3_bit_set)
938
+
939
+ try:
940
+ os.remove(audio_path)
941
+ except Exception as e:
942
+ print(e)
webUI.py ADDED
@@ -0,0 +1,285 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import json
3
+
4
+ import librosa
5
+ import soundfile
6
+ import numpy as np
7
+
8
+ import gradio as gr
9
+ from UVR_interface import root, UVRInterface, VR_MODELS_DIR, MDX_MODELS_DIR, DEMUCS_MODELS_DIR
10
+ from gui_data.constants import *
11
+ from typing import List, Dict, Callable, Union
12
+
13
+
14
+ class UVRWebUI:
15
+ def __init__(self, uvr: UVRInterface, online_data_path: str) -> None:
16
+ self.uvr = uvr
17
+ self.models_url = self.get_models_url(online_data_path)
18
+ self.define_layout()
19
+
20
+ self.input_temp_dir = "__temp"
21
+ self.export_path = "out"
22
+ if not os.path.exists(self.input_temp_dir):
23
+ os.mkdir(self.input_temp_dir)
24
+
25
+ def get_models_url(self, models_info_path: str) -> Dict[str, Dict]:
26
+ with open(models_info_path, "r") as f:
27
+ online_data = json.loads(f.read())
28
+ models_url = {}
29
+ for arch, download_list_key in zip([VR_ARCH_TYPE, MDX_ARCH_TYPE], ["vr_download_list", "mdx_download_list"]):
30
+ models_url[arch] = {model: NORMAL_REPO+model_path for model, model_path in online_data[download_list_key].items()}
31
+ models_url[DEMUCS_ARCH_TYPE] = online_data["demucs_download_list"]
32
+ return models_url
33
+
34
+ def get_local_models(self, arch: str) -> List[str]:
35
+ model_config = {
36
+ VR_ARCH_TYPE: (VR_MODELS_DIR, ".pth"),
37
+ MDX_ARCH_TYPE: (MDX_MODELS_DIR, ".onnx"),
38
+ DEMUCS_ARCH_TYPE: (DEMUCS_MODELS_DIR, ".yaml"),
39
+ }
40
+ try:
41
+ model_dir, suffix = model_config[arch]
42
+ except KeyError:
43
+ raise ValueError(f"Unkown arch type: {arch}")
44
+ return [os.path.splitext(f)[0] for f in os.listdir(model_dir) if f.endswith(suffix)]
45
+
46
+ def set_arch_setting_value(self, arch: str, setting1, setting2):
47
+ if arch == VR_ARCH_TYPE:
48
+ root.window_size_var.set(setting1)
49
+ root.aggression_setting_var.set(setting2)
50
+ elif arch == MDX_ARCH_TYPE:
51
+ root.mdx_batch_size_var.set(setting1)
52
+ root.compensate_var.set(setting2)
53
+ elif arch == DEMUCS_ARCH_TYPE:
54
+ pass
55
+
56
+ def arch_select_update(self, arch: str) -> List[Dict]:
57
+ choices = self.get_local_models(arch)
58
+ if arch == VR_ARCH_TYPE:
59
+ model_update = self.model_choice.update(choices=choices, value=CHOOSE_MODEL, label=SELECT_VR_MODEL_MAIN_LABEL)
60
+ setting1_update = self.arch_setting1.update(choices=VR_WINDOW, label=WINDOW_SIZE_MAIN_LABEL, value=root.window_size_var.get())
61
+ setting2_update = self.arch_setting2.update(choices=VR_AGGRESSION, label=AGGRESSION_SETTING_MAIN_LABEL, value=root.aggression_setting_var.get())
62
+ elif arch == MDX_ARCH_TYPE:
63
+ model_update = self.model_choice.update(choices=choices, value=CHOOSE_MODEL, label=CHOOSE_MDX_MODEL_MAIN_LABEL)
64
+ setting1_update = self.arch_setting1.update(choices=BATCH_SIZE, label=BATCHES_MDX_MAIN_LABEL, value=root.mdx_batch_size_var.get())
65
+ setting2_update = self.arch_setting2.update(choices=VOL_COMPENSATION, label=VOL_COMP_MDX_MAIN_LABEL, value=root.compensate_var.get())
66
+ elif arch == DEMUCS_ARCH_TYPE:
67
+ model_update = self.model_choice.update(choices=choices, value=CHOOSE_MODEL, label=CHOOSE_DEMUCS_MODEL_MAIN_LABEL)
68
+ raise gr.Error(f"{DEMUCS_ARCH_TYPE} not implempted")
69
+ else:
70
+ raise gr.Error(f"Unkown arch type: {arch}")
71
+ return [model_update, setting1_update, setting2_update]
72
+
73
+ def model_select_update(self, arch: str, model_name: str) -> List[Union[str, Dict, None]]:
74
+ if model_name == CHOOSE_MODEL:
75
+ return [None for _ in range(4)]
76
+ model, = self.uvr.assemble_model_data(model_name, arch)
77
+ if not model.model_status:
78
+ raise gr.Error(f"Cannot get model data, model hash = {model.model_hash}")
79
+
80
+ stem1_check_update = self.primary_stem_only.update(label=f"{model.primary_stem} Only")
81
+ stem2_check_update = self.secondary_stem_only.update(label=f"{model.secondary_stem} Only")
82
+ stem1_out_update = self.primary_stem_out.update(label=f"Output {model.primary_stem}")
83
+ stem2_out_update = self.secondary_stem_out.update(label=f"Output {model.secondary_stem}")
84
+
85
+ return [stem1_check_update, stem2_check_update, stem1_out_update, stem2_out_update]
86
+
87
+ def checkbox_set_root_value(self, checkbox: gr.Checkbox, root_attr: str):
88
+ checkbox.change(lambda value: root.__getattribute__(root_attr).set(value), inputs=checkbox)
89
+
90
+ def set_checkboxes_exclusive(self, checkboxes: List[gr.Checkbox], pure_callbacks: List[Callable], exclusive_value=True):
91
+ def exclusive_onchange(i, callback_i):
92
+ def new_onchange(*check_values):
93
+ if check_values[i] == exclusive_value:
94
+ return_values = []
95
+ for j, value_j in enumerate(check_values):
96
+ if j != i and value_j == exclusive_value:
97
+ return_values.append(not exclusive_value)
98
+ else:
99
+ return_values.append(value_j)
100
+ else:
101
+ return_values = check_values
102
+ callback_i(check_values[i])
103
+ return return_values
104
+ return new_onchange
105
+
106
+ for i, (checkbox, callback) in enumerate(zip(checkboxes, pure_callbacks)):
107
+ checkbox.change(exclusive_onchange(i, callback), inputs=checkboxes, outputs=checkboxes)
108
+
109
+ def process(self, input_audio, input_filename, model_name, arch, setting1, setting2, progress=gr.Progress()):
110
+ def set_progress_func(step, inference_iterations=0):
111
+ progress_curr = step + inference_iterations
112
+ progress(progress_curr)
113
+
114
+ sampling_rate, audio = input_audio
115
+ audio = (audio / np.iinfo(audio.dtype).max).astype(np.float32)
116
+ if len(audio.shape) > 1:
117
+ audio = librosa.to_mono(audio.transpose(1, 0))
118
+ input_path = os.path.join(self.input_temp_dir, input_filename)
119
+ soundfile.write(input_path, audio, sampling_rate, format="wav")
120
+
121
+ self.set_arch_setting_value(arch, setting1, setting2)
122
+
123
+ seperator = uvr.process(
124
+ model_name=model_name,
125
+ arch_type=arch,
126
+ audio_file=input_path,
127
+ export_path=self.export_path,
128
+ is_model_sample_mode=root.model_sample_mode_var.get(),
129
+ set_progress_func=set_progress_func,
130
+ )
131
+
132
+ primary_audio = None
133
+ secondary_audio = None
134
+ msg = ""
135
+ if not seperator.is_secondary_stem_only:
136
+ primary_stem_path = os.path.join(seperator.export_path, f"{seperator.audio_file_base}_({seperator.primary_stem}).wav")
137
+ audio, rate = soundfile.read(primary_stem_path)
138
+ primary_audio = (rate, audio)
139
+ msg += f"{seperator.primary_stem} saved at {primary_stem_path}\n"
140
+ if not seperator.is_primary_stem_only:
141
+ secondary_stem_path = os.path.join(seperator.export_path, f"{seperator.audio_file_base}_({seperator.secondary_stem}).wav")
142
+ audio, rate = soundfile.read(secondary_stem_path)
143
+ secondary_audio = (rate, audio)
144
+ msg += f"{seperator.secondary_stem} saved at {secondary_stem_path}\n"
145
+
146
+ os.remove(input_path)
147
+
148
+ return primary_audio, secondary_audio, msg
149
+
150
+ def define_layout(self):
151
+ with gr.Blocks() as app:
152
+ self.app = app
153
+ gr.HTML("<h1> 🎵 Ultimate Vocal Remover WebUI 🎵 </h1>")
154
+ gr.Markdown("This is an experimental demo with CPU. Duplicate the space for use in private")
155
+ gr.Markdown(
156
+ "[![Duplicate this Space](https://huggingface.co/datasets/huggingface/badges/raw/main/duplicate-this-space-sm-dark.svg)](https://huggingface.co/spaces/r3gm/Ultimate-Vocal-Remover-WebUI?duplicate=true)\n\n"
157
+ )
158
+ with gr.Tabs():
159
+ with gr.TabItem("process"):
160
+ with gr.Row():
161
+ self.arch_choice = gr.Dropdown(
162
+ choices=[VR_ARCH_TYPE, MDX_ARCH_TYPE], value=VR_ARCH_TYPE, # choices=[VR_ARCH_TYPE, MDX_ARCH_TYPE, DEMUCS_ARCH_TYPE], value=VR_ARCH_TYPE,
163
+ label=CHOOSE_PROC_METHOD_MAIN_LABEL, interactive=True)
164
+ self.model_choice = gr.Dropdown(
165
+ choices=self.get_local_models(VR_ARCH_TYPE), value=CHOOSE_MODEL,
166
+ label=SELECT_VR_MODEL_MAIN_LABEL+' 👋Select a model', interactive=True)
167
+ with gr.Row():
168
+ self.arch_setting1 = gr.Dropdown(
169
+ choices=VR_WINDOW, value=root.window_size_var.get(),
170
+ label=WINDOW_SIZE_MAIN_LABEL+' 👋Select one', interactive=True)
171
+ self.arch_setting2 = gr.Dropdown(
172
+ choices=VR_AGGRESSION, value=root.aggression_setting_var.get(),
173
+ label=AGGRESSION_SETTING_MAIN_LABEL, interactive=True)
174
+ with gr.Row():
175
+ self.use_gpu = gr.Checkbox(
176
+ label='Rhythmic Transmutation Device', value=True, interactive=True) #label=GPU_CONVERSION_MAIN_LABEL, value=root.is_gpu_conversion_var.get(), interactive=True)
177
+ self.primary_stem_only = gr.Checkbox(
178
+ label=f"{PRIMARY_STEM} only", value=root.is_primary_stem_only_var.get(), interactive=True)
179
+ self.secondary_stem_only = gr.Checkbox(
180
+ label=f"{SECONDARY_STEM} only", value=root.is_secondary_stem_only_var.get(), interactive=True)
181
+ self.sample_mode = gr.Checkbox(
182
+ label=SAMPLE_MODE_CHECKBOX(root.model_sample_mode_duration_var.get()),
183
+ value=root.model_sample_mode_var.get(), interactive=True)
184
+
185
+ with gr.Row():
186
+ self.input_filename = gr.Textbox(label="Input filename", value="temp.wav", interactive=True)
187
+ with gr.Row():
188
+ self.audio_in = gr.Audio(label="Input audio", interactive=True)
189
+ with gr.Row():
190
+ self.process_submit = gr.Button(START_PROCESSING, variant="primary")
191
+ with gr.Row():
192
+ self.primary_stem_out = gr.Audio(label=f"Output {PRIMARY_STEM}", interactive=False)
193
+ self.secondary_stem_out = gr.Audio(label=f"Output {SECONDARY_STEM}", interactive=False)
194
+ with gr.Row():
195
+ self.out_message = gr.Textbox(label="Output Message", interactive=False, show_progress=False)
196
+
197
+ with gr.TabItem("settings"):
198
+ with gr.Tabs():
199
+ with gr.TabItem("Settings Guide"):
200
+ pass
201
+ with gr.TabItem("Additional Settigns"):
202
+ self.wav_type = gr.Dropdown(choices=WAV_TYPE, label="Wav Type", value="PCM_16", interactive=True)
203
+ self.mp3_rate = gr.Dropdown(choices=MP3_BIT_RATES, label="MP3 Bitrate", value="320k",interactive=True)
204
+ with gr.TabItem("Download models"):
205
+
206
+ def md_url(url, text=None):
207
+ if text is None:
208
+ text = url
209
+ return f"[{url}]({url})"
210
+
211
+ with gr.Row():
212
+ vr_models = self.models_url[VR_ARCH_TYPE]
213
+ self.vr_download_choice = gr.Dropdown(choices=list(vr_models.keys()), label=f"Select {VR_ARCH_TYPE} Model", interactive=True)
214
+ self.vr_download_url = gr.Markdown()
215
+ self.vr_download_choice.change(lambda model: md_url(vr_models[model]), inputs=self.vr_download_choice, outputs=self.vr_download_url)
216
+ with gr.Row(variant="panel"):
217
+ mdx_models = self.models_url[MDX_ARCH_TYPE]
218
+ self.mdx_download_choice = gr.Dropdown(choices=list(mdx_models.keys()), label=f"Select {MDX_ARCH_TYPE} Model", interactive=True)
219
+ self.mdx_download_url = gr.Markdown()
220
+ self.mdx_download_choice.change(lambda model: md_url(mdx_models[model]), inputs=self.mdx_download_choice, outputs=self.mdx_download_url)
221
+ with gr.Row(variant="panel"):
222
+ demucs_models: Dict[str, Dict] = self.models_url[DEMUCS_ARCH_TYPE]
223
+ self.demucs_download_choice = gr.Dropdown(choices=list(demucs_models.keys()), label=f"Select {DEMUCS_ARCH_TYPE} Model", interactive=True)
224
+ self.demucs_download_url = gr.Markdown()
225
+
226
+ self.demucs_download_choice.change(
227
+ lambda model: "\n".join([
228
+ "- " + md_url(url, text=filename) for filename, url in demucs_models[model].items()]),
229
+ inputs=self.demucs_download_choice,
230
+ outputs=self.demucs_download_url)
231
+
232
+ self.arch_choice.change(
233
+ self.arch_select_update, inputs=self.arch_choice,
234
+ outputs=[self.model_choice, self.arch_setting1, self.arch_setting2])
235
+ self.model_choice.change(
236
+ self.model_select_update, inputs=[self.arch_choice, self.model_choice],
237
+ outputs=[self.primary_stem_only, self.secondary_stem_only, self.primary_stem_out, self.secondary_stem_out])
238
+
239
+ self.checkbox_set_root_value(self.use_gpu, 'is_gpu_conversion_var')
240
+ self.checkbox_set_root_value(self.sample_mode, 'model_sample_mode_var')
241
+ self.set_checkboxes_exclusive(
242
+ [self.primary_stem_only, self.secondary_stem_only],
243
+ [lambda value: root.is_primary_stem_only_var.set(value), lambda value: root.is_secondary_stem_only_var.set(value)])
244
+
245
+ self.process_submit.click(
246
+ self.process,
247
+ inputs=[self.audio_in, self.input_filename, self.model_choice, self.arch_choice, self.arch_setting1, self.arch_setting2],
248
+ outputs=[self.primary_stem_out, self.secondary_stem_out, self.out_message])
249
+
250
+ def launch(self, **kwargs):
251
+ self.app.queue().launch(**kwargs)
252
+
253
+
254
+ uvr = UVRInterface()
255
+ uvr.cached_sources_clear()
256
+
257
+ webui = UVRWebUI(uvr, online_data_path='models/download_checks.json')
258
+
259
+
260
+ print(webui.models_url)
261
+ model_dict = webui.models_url
262
+
263
+ import os
264
+ import wget
265
+
266
+ for category, models in model_dict.items():
267
+ if category in ['VR Arc', 'MDX-Net']:
268
+ if category == 'VR Arc':
269
+ model_path = 'models/VR_Models'
270
+ elif category == 'MDX-Net':
271
+ model_path = 'models/MDX_Net_Models'
272
+
273
+ for model_name, model_url in models.items():
274
+ cmd = f"aria2c --optimize-concurrent-downloads --console-log-level=error --summary-interval=10 -j5 -x16 -s16 -k1M -c -d {model_path} -Z {model_url}"
275
+ os.system(cmd)
276
+
277
+ print("Models downloaded successfully.")
278
+ else:
279
+ print(f"Ignoring category: {category}")
280
+
281
+
282
+
283
+
284
+ webui = UVRWebUI(uvr, online_data_path='models/download_checks.json')
285
+ webui.launch()