jhtonyKoo commited on
Commit
fea46cb
·
1 Parent(s): db9ab3b

modify app

Browse files
Files changed (4) hide show
  1. __pycache__/inference.cpython-311.pyc +0 -0
  2. app.py +12 -7
  3. inference.py +17 -19
  4. ito_snow.png +0 -0
__pycache__/inference.cpython-311.pyc CHANGED
Binary files a/__pycache__/inference.cpython-311.pyc and b/__pycache__/inference.cpython-311.pyc differ
 
app.py CHANGED
@@ -155,20 +155,25 @@ def update_ito_output(all_results, selected_step):
155
  """ APP display """
156
  with gr.Blocks() as demo:
157
  gr.Markdown("# ITO-Master: Inference Time Optimization for Mastering Style Transfer")
 
 
 
 
 
158
 
159
  gr.Markdown("# Step 1: Mastering Style Transfer")
160
 
161
  with gr.Tab("Upload Audio"):
162
  with gr.Row():
163
- input_audio = gr.Audio(label="Input Audio")
164
- reference_audio = gr.Audio(label="Reference Audio")
165
 
166
  process_button = gr.Button("Process Mastering Style Transfer")
167
 
168
  with gr.Row():
169
  with gr.Column():
170
- output_audio = gr.Audio(label="Output Audio", type='numpy')
171
- normalized_input = gr.Audio(label="Normalized Input Audio", type='numpy')
172
  param_output = gr.Textbox(label="Predicted Parameters", lines=5)
173
 
174
  process_button.click(
@@ -182,8 +187,8 @@ with gr.Blocks() as demo:
182
  input_youtube_url = gr.Textbox(label="Input YouTube URL")
183
  reference_youtube_url = gr.Textbox(label="Reference YouTube URL")
184
  with gr.Row():
185
- input_audio_yt = gr.Audio(label="Input Audio (Do not put when using YouTube URL)")
186
- reference_audio_yt = gr.Audio(label="Reference Audio (Do not put when using YouTube URL)")
187
 
188
  process_button_yt = gr.Button("Process Mastering Style Transfer")
189
 
@@ -208,7 +213,7 @@ with gr.Blocks() as demo:
208
  gr.Markdown("## Step 2: Inference Time Optimization (ITO)")
209
 
210
  with gr.Row():
211
- ito_reference_audio = gr.Audio(label="ITO Reference Audio (optional)")
212
  with gr.Column():
213
  num_steps = gr.Slider(minimum=1, maximum=100, value=10, step=1, label="Number of Steps")
214
  optimizer = gr.Dropdown(["Adam", "RAdam", "SGD"], value="RAdam", label="Optimizer")
 
155
  """ APP display """
156
  with gr.Blocks() as demo:
157
  gr.Markdown("# ITO-Master: Inference Time Optimization for Mastering Style Transfer")
158
+ with gr.Row():
159
+ gr.Markdown("Demo of Inference Time Optimization (ITO) for Music Mastering Style Transfer. \n"
160
+ "The mastering style transfer is performed by a differentiable audio processing model, and the predicted parameters are shown as the output. \n"
161
+ "Perform mastering style transfer with an input source audio and a reference mastering style audio. On top of this result, you can perform ITO to optimize the reference embedding $z_{\\text{ref}}$ to further gain control over the output mastering style.")
162
+ gr.Image("ito_snow.png", width=500)
163
 
164
  gr.Markdown("# Step 1: Mastering Style Transfer")
165
 
166
  with gr.Tab("Upload Audio"):
167
  with gr.Row():
168
+ input_audio = gr.Audio(label="Source Audio ($x_{\\text{in}}$)")
169
+ reference_audio = gr.Audio(label="Reference Style Audio ($x_{\\text{ref}}$)")
170
 
171
  process_button = gr.Button("Process Mastering Style Transfer")
172
 
173
  with gr.Row():
174
  with gr.Column():
175
+ output_audio = gr.Audio(label="Output Audio ($y'$)", type='numpy')
176
+ normalized_input = gr.Audio(label="Normalized Source Audio", type='numpy')
177
  param_output = gr.Textbox(label="Predicted Parameters", lines=5)
178
 
179
  process_button.click(
 
187
  input_youtube_url = gr.Textbox(label="Input YouTube URL")
188
  reference_youtube_url = gr.Textbox(label="Reference YouTube URL")
189
  with gr.Row():
190
+ input_audio_yt = gr.Audio(label="Source Audio (Do not put when using YouTube URL)")
191
+ reference_audio_yt = gr.Audio(label="Reference Style Audio (Do not put when using YouTube URL)")
192
 
193
  process_button_yt = gr.Button("Process Mastering Style Transfer")
194
 
 
213
  gr.Markdown("## Step 2: Inference Time Optimization (ITO)")
214
 
215
  with gr.Row():
216
+ ito_reference_audio = gr.Audio(label="ITO Reference Style Audio (optional)")
217
  with gr.Column():
218
  num_steps = gr.Slider(minimum=1, maximum=100, value=10, step=1, label="Number of Steps")
219
  optimizer = gr.Dropdown(["Adam", "RAdam", "SGD"], value="RAdam", label="Optimizer")
inference.py CHANGED
@@ -179,10 +179,10 @@ class MasteringStyleTransfer:
179
  'band2_cutoff_freq': ('High-Mid Band Frequency', 'Hz', 8000, 12000),
180
  'band2_q_factor': ('High-Mid Band Q', '', 0.1, 5.0),
181
  'band3_gain_db': ('High Band Gain', 'dB', -20, 20),
182
- 'band3_cutoff_freq': ('High Band Frequency', 'Hz', 12000, 20000), # Assuming sample_rate is 44100
183
  'band3_q_factor': ('High Band Q', '', 0.1, 5.0),
184
  'high_shelf_gain_db': ('High Shelf Gain', 'dB', -20, 20),
185
- 'high_shelf_cutoff_freq': ('High Shelf Cutoff', 'Hz', 4000, 20000), # Assuming sample_rate is 44100
186
  'high_shelf_q_factor': ('High Shelf Q', '', 0.1, 5.0),
187
  },
188
  'distortion': {
@@ -194,21 +194,21 @@ class MasteringStyleTransfer:
194
  'high_cutoff': ('Mid/High Crossover', 'Hz', 1000, 20000),
195
  'parallel_weight_factor': ('Dry/Wet Mix', '%', 0, 100),
196
  'low_shelf_comp_thresh': ('Low Band Comp Threshold', 'dB', -60, 0),
197
- 'low_shelf_comp_ratio': ('Low Band Comp Ratio', ':1', 1, 20),
198
  'low_shelf_exp_thresh': ('Low Band Exp Threshold', 'dB', -60, 0),
199
- 'low_shelf_exp_ratio': ('Low Band Exp Ratio', ':1', 1, 20),
200
  'low_shelf_at': ('Low Band Attack Time', 'ms', 5, 100),
201
  'low_shelf_rt': ('Low Band Release Time', 'ms', 5, 100),
202
  'mid_band_comp_thresh': ('Mid Band Comp Threshold', 'dB', -60, 0),
203
- 'mid_band_comp_ratio': ('Mid Band Comp Ratio', ':1', 1, 20),
204
  'mid_band_exp_thresh': ('Mid Band Exp Threshold', 'dB', -60, 0),
205
- 'mid_band_exp_ratio': ('Mid Band Exp Ratio', ':1', 1, 20),
206
  'mid_band_at': ('Mid Band Attack Time', 'ms', 5, 100),
207
  'mid_band_rt': ('Mid Band Release Time', 'ms', 5, 100),
208
  'high_shelf_comp_thresh': ('High Band Comp Threshold', 'dB', -60, 0),
209
- 'high_shelf_comp_ratio': ('High Band Comp Ratio', ':1', 1, 20),
210
  'high_shelf_exp_thresh': ('High Band Exp Threshold', 'dB', -60, 0),
211
- 'high_shelf_exp_ratio': ('High Band Exp Ratio', ':1', 1, 20),
212
  'high_shelf_at': ('High Band Attack Time', 'ms', 5, 100),
213
  'high_shelf_rt': ('High Band Release Time', 'ms', 5, 100),
214
  },
@@ -236,20 +236,18 @@ class MasteringStyleTransfer:
236
  print(f"fx name: {fx_name} param_name: {param_name}")
237
  if fx_name in param_mapper and param_name in param_mapper[fx_name]:
238
  friendly_name, unit, min_val, max_val = param_mapper[fx_name][param_name]
239
- if fx_name == 'IMAGER' and param_name == 'width':
240
- # Convert width to a more intuitive scale
241
- width_percentage = param_value * 200
242
- output.append(f" {friendly_name}: {width_percentage:.2f}% (Range: 0-200%)")
243
- else:
244
- output.append(f" {friendly_name}: {param_value:.2f} {unit} (Range: {min_val}-{max_val})")
245
  else:
246
  output.append(f" {param_name}: {param_value:.2f}")
247
  else:
248
- if fx_name == 'IMAGER':
249
- width_percentage = fx_params.item() * 200
250
- output.append(f" Stereo Width: {width_percentage:.2f}% (Range: 0-200%)")
251
- else:
252
- output.append(f" {fx_params.item():.2f}")
253
 
254
  return "\n".join(output)
255
 
 
179
  'band2_cutoff_freq': ('High-Mid Band Frequency', 'Hz', 8000, 12000),
180
  'band2_q_factor': ('High-Mid Band Q', '', 0.1, 5.0),
181
  'band3_gain_db': ('High Band Gain', 'dB', -20, 20),
182
+ 'band3_cutoff_freq': ('High Band Frequency', 'Hz', 12000, 20000),
183
  'band3_q_factor': ('High Band Q', '', 0.1, 5.0),
184
  'high_shelf_gain_db': ('High Shelf Gain', 'dB', -20, 20),
185
+ 'high_shelf_cutoff_freq': ('High Shelf Cutoff', 'Hz', 4000, 20000),
186
  'high_shelf_q_factor': ('High Shelf Q', '', 0.1, 5.0),
187
  },
188
  'distortion': {
 
194
  'high_cutoff': ('Mid/High Crossover', 'Hz', 1000, 20000),
195
  'parallel_weight_factor': ('Dry/Wet Mix', '%', 0, 100),
196
  'low_shelf_comp_thresh': ('Low Band Comp Threshold', 'dB', -60, 0),
197
+ 'low_shelf_comp_ratio': ('Low Band Comp Ratio', ': 1', 1, 20),
198
  'low_shelf_exp_thresh': ('Low Band Exp Threshold', 'dB', -60, 0),
199
+ 'low_shelf_exp_ratio': ('Low Band Exp Ratio', ': 1', 1, 20),
200
  'low_shelf_at': ('Low Band Attack Time', 'ms', 5, 100),
201
  'low_shelf_rt': ('Low Band Release Time', 'ms', 5, 100),
202
  'mid_band_comp_thresh': ('Mid Band Comp Threshold', 'dB', -60, 0),
203
+ 'mid_band_comp_ratio': ('Mid Band Comp Ratio', ': 1', 1, 20),
204
  'mid_band_exp_thresh': ('Mid Band Exp Threshold', 'dB', -60, 0),
205
+ 'mid_band_exp_ratio': ('Mid Band Exp Ratio', ': 1', 0, 1),
206
  'mid_band_at': ('Mid Band Attack Time', 'ms', 5, 100),
207
  'mid_band_rt': ('Mid Band Release Time', 'ms', 5, 100),
208
  'high_shelf_comp_thresh': ('High Band Comp Threshold', 'dB', -60, 0),
209
+ 'high_shelf_comp_ratio': ('High Band Comp Ratio', ': 1', 1, 20),
210
  'high_shelf_exp_thresh': ('High Band Exp Threshold', 'dB', -60, 0),
211
+ 'high_shelf_exp_ratio': ('High Band Exp Ratio', ': 1', 1, 20),
212
  'high_shelf_at': ('High Band Attack Time', 'ms', 5, 100),
213
  'high_shelf_rt': ('High Band Release Time', 'ms', 5, 100),
214
  },
 
236
  print(f"fx name: {fx_name} param_name: {param_name}")
237
  if fx_name in param_mapper and param_name in param_mapper[fx_name]:
238
  friendly_name, unit, min_val, max_val = param_mapper[fx_name][param_name]
239
+ if unit=='%':
240
+ param_value = param_value * 100
241
+ current_content = f" {friendly_name}: {param_value:.2f} {unit}"
242
+ if param_name=='mid_band_exp_ratio':
243
+ current_content += f" (Range: {min_val}-{max_val})"
244
+ output.append(current_content)
245
  else:
246
  output.append(f" {param_name}: {param_value:.2f}")
247
  else:
248
+ # stereo imager
249
+ width_percentage = fx_params.item() * 200
250
+ output.append(f" Stereo Width: {width_percentage:.2f}% (Range: 0-200%)")
 
 
251
 
252
  return "\n".join(output)
253
 
ito_snow.png ADDED