Politrees commited on
Commit
94f7531
·
verified ·
1 Parent(s): 76ee41e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +19 -7
app.py CHANGED
@@ -135,7 +135,7 @@ def prepare_output_dir(input_file, output_dir):
135
  raise
136
  return out_dir
137
 
138
- def roformer_separator(audio, model_key, seg_size, overlap, model_dir, out_dir, out_format, norm_thresh, amp_thresh, progress=gr.Progress()):
139
  """Separate audio using Roformer model."""
140
  print_message(audio, model_key)
141
  model = ROFORMER_MODELS[model_key]
@@ -151,7 +151,9 @@ def roformer_separator(audio, model_key, seg_size, overlap, model_dir, out_dir,
151
  mdxc_params={
152
  "batch_size": 1,
153
  "segment_size": seg_size,
 
154
  "overlap": overlap,
 
155
  }
156
  )
157
 
@@ -169,7 +171,7 @@ def roformer_separator(audio, model_key, seg_size, overlap, model_dir, out_dir,
169
  logging.error(f"Roformer separation failed: {e}")
170
  return None, None
171
 
172
- def mdx23c_separator(audio, model, seg_size, overlap, model_dir, out_dir, out_format, norm_thresh, amp_thresh, progress=gr.Progress()):
173
  """Separate audio using MDX23C model."""
174
  print_message(audio, model)
175
  try:
@@ -184,7 +186,9 @@ def mdx23c_separator(audio, model, seg_size, overlap, model_dir, out_dir, out_fo
184
  mdxc_params={
185
  "batch_size": 1,
186
  "segment_size": seg_size,
 
187
  "overlap": overlap,
 
188
  }
189
  )
190
 
@@ -336,7 +340,9 @@ with gr.Blocks(
336
  roformer_model = gr.Dropdown(label="Select the Model", choices=list(ROFORMER_MODELS.keys()))
337
  with gr.Row():
338
  roformer_seg_size = gr.Slider(minimum=32, maximum=4000, step=32, value=256, label="Segment Size", info="Larger consumes more resources, but may give better results.")
 
339
  roformer_overlap = gr.Slider(minimum=2, maximum=10, step=1, value=8, label="Overlap", info="Amount of overlap between prediction windows.")
 
340
  with gr.Row():
341
  roformer_audio = gr.Audio(label="Input Audio", type="filepath")
342
  with gr.Row():
@@ -351,7 +357,9 @@ with gr.Blocks(
351
  mdx23c_model = gr.Dropdown(label="Select the Model", choices=MDX23C_MODELS)
352
  with gr.Row():
353
  mdx23c_seg_size = gr.Slider(minimum=32, maximum=4000, step=32, value=256, label="Segment Size", info="Larger consumes more resources, but may give better results.")
 
354
  mdx23c_overlap = gr.Slider(minimum=2, maximum=50, step=1, value=8, label="Overlap", info="Amount of overlap between prediction windows.")
 
355
  with gr.Row():
356
  mdx23c_audio = gr.Audio(label="Input Audio", type="filepath")
357
  with gr.Row():
@@ -365,10 +373,10 @@ with gr.Blocks(
365
  with gr.Row():
366
  mdx_model = gr.Dropdown(label="Select the Model", choices=MDXNET_MODELS)
367
  with gr.Row():
368
- mdx_hop_length = gr.Slider(minimum=0.001, maximum=0.999, step=0.001, value=0.25, label="Hop Length")
369
  mdx_seg_size = gr.Slider(minimum=32, maximum=4000, step=32, value=256, label="Segment Size", info="Larger consumes more resources, but may give better results.")
370
  mdx_overlap = gr.Slider(minimum=0.001, maximum=0.999, step=0.001, value=0.25, label="Overlap")
371
- mdx_denoise = gr.Checkbox(value=True, label="Denoise", info="Enable denoising during separation.")
372
  with gr.Row():
373
  mdx_audio = gr.Audio(label="Input Audio", type="filepath")
374
  with gr.Row():
@@ -384,8 +392,8 @@ with gr.Blocks(
384
  with gr.Row():
385
  vr_window_size = gr.Slider(minimum=320, maximum=1024, step=32, value=512, label="Window Size")
386
  vr_aggression = gr.Slider(minimum=1, maximum=50, step=1, value=5, label="Agression", info="Intensity of primary stem extraction.")
387
- vr_tta = gr.Checkbox(value=True, label="TTA", info="Enable Test-Time-Augmentation; slow but improves quality.")
388
- vr_post_process = gr.Checkbox(value=True, label="Post Process", info="Enable post-processing.")
389
  vr_post_process_threshold = gr.Slider(minimum=0.1, maximum=0.3, step=0.1, value=0.2, label="Post Process Threshold", info="Threshold for post-processing.")
390
  vr_high_end_process = gr.Checkbox(value=False, label="High End Process", info="Mirror the missing frequency range of the output.")
391
  with gr.Row():
@@ -401,7 +409,7 @@ with gr.Blocks(
401
  with gr.Row():
402
  demucs_model = gr.Dropdown(label="Select the Model", choices=DEMUCS_MODELS)
403
  with gr.Row():
404
- demucs_seg_size = gr.Slider(minimum=1, maximum=100, step=1, value=50, label="Segment Size")
405
  demucs_shifts = gr.Slider(minimum=0, maximum=20, step=1, value=2, label="Shifts", info="Number of predictions with random shifts, higher = slower but better quality.")
406
  demucs_overlap = gr.Slider(minimum=0.001, maximum=0.999, step=0.001, value=0.25, label="Overlap")
407
  demucs_segments_enabled = gr.Checkbox(value=True, label="Segment-wise processing")
@@ -422,7 +430,9 @@ with gr.Blocks(
422
  roformer_audio,
423
  roformer_model,
424
  roformer_seg_size,
 
425
  roformer_overlap,
 
426
  model_file_dir,
427
  output_dir,
428
  output_format,
@@ -437,7 +447,9 @@ with gr.Blocks(
437
  mdx23c_audio,
438
  mdx23c_model,
439
  mdx23c_seg_size,
 
440
  mdx23c_overlap,
 
441
  model_file_dir,
442
  output_dir,
443
  output_format,
 
135
  raise
136
  return out_dir
137
 
138
+ def roformer_separator(audio, model_key, seg_size, override_seg_size, overlap, pitch_shift, model_dir, out_dir, out_format, norm_thresh, amp_thresh, progress=gr.Progress()):
139
  """Separate audio using Roformer model."""
140
  print_message(audio, model_key)
141
  model = ROFORMER_MODELS[model_key]
 
151
  mdxc_params={
152
  "batch_size": 1,
153
  "segment_size": seg_size,
154
+ "override_model_segment_size": False,
155
  "overlap": overlap,
156
+ "pitch_shift": pitch_shift,
157
  }
158
  )
159
 
 
171
  logging.error(f"Roformer separation failed: {e}")
172
  return None, None
173
 
174
+ def mdx23c_separator(audio, model, seg_size, override_seg_size, overlap, pitch_shift, model_dir, out_dir, out_format, norm_thresh, amp_thresh, progress=gr.Progress()):
175
  """Separate audio using MDX23C model."""
176
  print_message(audio, model)
177
  try:
 
186
  mdxc_params={
187
  "batch_size": 1,
188
  "segment_size": seg_size,
189
+ "override_model_segment_size": override_seg_size,
190
  "overlap": overlap,
191
+ "pitch_shift": pitch_shift,
192
  }
193
  )
194
 
 
340
  roformer_model = gr.Dropdown(label="Select the Model", choices=list(ROFORMER_MODELS.keys()))
341
  with gr.Row():
342
  roformer_seg_size = gr.Slider(minimum=32, maximum=4000, step=32, value=256, label="Segment Size", info="Larger consumes more resources, but may give better results.")
343
+ roformer_override_seg_size = gr.Checkbox(value=False, label="Override segment size", info="Override model default segment size instead of using the model default value.")
344
  roformer_overlap = gr.Slider(minimum=2, maximum=10, step=1, value=8, label="Overlap", info="Amount of overlap between prediction windows.")
345
+ roformer_pitch_shift = gr.Slider(minimum=-12, maximum=12, step=1, value=0, label="Pitch shift", info="Shift audio pitch by a number of semitones while processing. may improve output for deep/high vocals.")
346
  with gr.Row():
347
  roformer_audio = gr.Audio(label="Input Audio", type="filepath")
348
  with gr.Row():
 
357
  mdx23c_model = gr.Dropdown(label="Select the Model", choices=MDX23C_MODELS)
358
  with gr.Row():
359
  mdx23c_seg_size = gr.Slider(minimum=32, maximum=4000, step=32, value=256, label="Segment Size", info="Larger consumes more resources, but may give better results.")
360
+ mdx23c_override_seg_size = gr.Checkbox(value=False, label="Override segment size", info="Override model default segment size instead of using the model default value.")
361
  mdx23c_overlap = gr.Slider(minimum=2, maximum=50, step=1, value=8, label="Overlap", info="Amount of overlap between prediction windows.")
362
+ mdx23c_pitch_shift = gr.Slider(minimum=-12, maximum=12, step=1, value=0, label="Pitch shift", info="Shift audio pitch by a number of semitones while processing. may improve output for deep/high vocals.")
363
  with gr.Row():
364
  mdx23c_audio = gr.Audio(label="Input Audio", type="filepath")
365
  with gr.Row():
 
373
  with gr.Row():
374
  mdx_model = gr.Dropdown(label="Select the Model", choices=MDXNET_MODELS)
375
  with gr.Row():
376
+ mdx_hop_length = gr.Slider(minimum=32, maximum=2048, step=32, value=1024, label="Hop Length")
377
  mdx_seg_size = gr.Slider(minimum=32, maximum=4000, step=32, value=256, label="Segment Size", info="Larger consumes more resources, but may give better results.")
378
  mdx_overlap = gr.Slider(minimum=0.001, maximum=0.999, step=0.001, value=0.25, label="Overlap")
379
+ mdx_denoise = gr.Checkbox(value=False, label="Denoise", info="Enable denoising during separation.")
380
  with gr.Row():
381
  mdx_audio = gr.Audio(label="Input Audio", type="filepath")
382
  with gr.Row():
 
392
  with gr.Row():
393
  vr_window_size = gr.Slider(minimum=320, maximum=1024, step=32, value=512, label="Window Size")
394
  vr_aggression = gr.Slider(minimum=1, maximum=50, step=1, value=5, label="Agression", info="Intensity of primary stem extraction.")
395
+ vr_tta = gr.Checkbox(value=False, label="TTA", info="Enable Test-Time-Augmentation; slow but improves quality.")
396
+ vr_post_process = gr.Checkbox(value=False, label="Post Process", info="Enable post-processing.")
397
  vr_post_process_threshold = gr.Slider(minimum=0.1, maximum=0.3, step=0.1, value=0.2, label="Post Process Threshold", info="Threshold for post-processing.")
398
  vr_high_end_process = gr.Checkbox(value=False, label="High End Process", info="Mirror the missing frequency range of the output.")
399
  with gr.Row():
 
409
  with gr.Row():
410
  demucs_model = gr.Dropdown(label="Select the Model", choices=DEMUCS_MODELS)
411
  with gr.Row():
412
+ demucs_seg_size = gr.Slider(minimum=1, maximum=100, step=1, value=40, label="Segment Size")
413
  demucs_shifts = gr.Slider(minimum=0, maximum=20, step=1, value=2, label="Shifts", info="Number of predictions with random shifts, higher = slower but better quality.")
414
  demucs_overlap = gr.Slider(minimum=0.001, maximum=0.999, step=0.001, value=0.25, label="Overlap")
415
  demucs_segments_enabled = gr.Checkbox(value=True, label="Segment-wise processing")
 
430
  roformer_audio,
431
  roformer_model,
432
  roformer_seg_size,
433
+ roformer_override_seg_size,
434
  roformer_overlap,
435
+ roformer_pitch_shift,
436
  model_file_dir,
437
  output_dir,
438
  output_format,
 
447
  mdx23c_audio,
448
  mdx23c_model,
449
  mdx23c_seg_size,
450
+ mdx23c_override_seg_size,
451
  mdx23c_overlap,
452
+ mdx23c_pitch_shift,
453
  model_file_dir,
454
  output_dir,
455
  output_format,