Spaces:
Running
Running
modify app
Browse files- __pycache__/inference.cpython-311.pyc +0 -0
- app.py +12 -7
- inference.py +17 -19
- ito_snow.png +0 -0
__pycache__/inference.cpython-311.pyc
CHANGED
Binary files a/__pycache__/inference.cpython-311.pyc and b/__pycache__/inference.cpython-311.pyc differ
|
|
app.py
CHANGED
@@ -155,20 +155,25 @@ def update_ito_output(all_results, selected_step):
|
|
155 |
""" APP display """
|
156 |
with gr.Blocks() as demo:
|
157 |
gr.Markdown("# ITO-Master: Inference Time Optimization for Mastering Style Transfer")
|
|
|
|
|
|
|
|
|
|
|
158 |
|
159 |
gr.Markdown("# Step 1: Mastering Style Transfer")
|
160 |
|
161 |
with gr.Tab("Upload Audio"):
|
162 |
with gr.Row():
|
163 |
-
input_audio = gr.Audio(label="
|
164 |
-
reference_audio = gr.Audio(label="Reference Audio")
|
165 |
|
166 |
process_button = gr.Button("Process Mastering Style Transfer")
|
167 |
|
168 |
with gr.Row():
|
169 |
with gr.Column():
|
170 |
-
output_audio = gr.Audio(label="Output Audio", type='numpy')
|
171 |
-
normalized_input = gr.Audio(label="Normalized
|
172 |
param_output = gr.Textbox(label="Predicted Parameters", lines=5)
|
173 |
|
174 |
process_button.click(
|
@@ -182,8 +187,8 @@ with gr.Blocks() as demo:
|
|
182 |
input_youtube_url = gr.Textbox(label="Input YouTube URL")
|
183 |
reference_youtube_url = gr.Textbox(label="Reference YouTube URL")
|
184 |
with gr.Row():
|
185 |
-
input_audio_yt = gr.Audio(label="
|
186 |
-
reference_audio_yt = gr.Audio(label="Reference Audio (Do not put when using YouTube URL)")
|
187 |
|
188 |
process_button_yt = gr.Button("Process Mastering Style Transfer")
|
189 |
|
@@ -208,7 +213,7 @@ with gr.Blocks() as demo:
|
|
208 |
gr.Markdown("## Step 2: Inference Time Optimization (ITO)")
|
209 |
|
210 |
with gr.Row():
|
211 |
-
ito_reference_audio = gr.Audio(label="ITO Reference Audio (optional)")
|
212 |
with gr.Column():
|
213 |
num_steps = gr.Slider(minimum=1, maximum=100, value=10, step=1, label="Number of Steps")
|
214 |
optimizer = gr.Dropdown(["Adam", "RAdam", "SGD"], value="RAdam", label="Optimizer")
|
|
|
155 |
""" APP display """
|
156 |
with gr.Blocks() as demo:
|
157 |
gr.Markdown("# ITO-Master: Inference Time Optimization for Mastering Style Transfer")
|
158 |
+
with gr.Row():
|
159 |
+
gr.Markdown("Demo of Inference Time Optimization (ITO) for Music Mastering Style Transfer. \n"
|
160 |
+
"The mastering style transfer is performed by a differentiable audio processing model, and the predicted parameters are shown as the output. \n"
|
161 |
+
"Perform mastering style transfer with an input source audio and a reference mastering style audio. On top of this result, you can perform ITO to optimize the reference embedding $z_{\\text{ref}}$ to further gain control over the output mastering style.")
|
162 |
+
gr.Image("ito_snow.png", width=500)
|
163 |
|
164 |
gr.Markdown("# Step 1: Mastering Style Transfer")
|
165 |
|
166 |
with gr.Tab("Upload Audio"):
|
167 |
with gr.Row():
|
168 |
+
input_audio = gr.Audio(label="Source Audio ($x_{\\text{in}}$)")
|
169 |
+
reference_audio = gr.Audio(label="Reference Style Audio ($x_{\\text{ref}}$)")
|
170 |
|
171 |
process_button = gr.Button("Process Mastering Style Transfer")
|
172 |
|
173 |
with gr.Row():
|
174 |
with gr.Column():
|
175 |
+
output_audio = gr.Audio(label="Output Audio ($y'$)", type='numpy')
|
176 |
+
normalized_input = gr.Audio(label="Normalized Source Audio", type='numpy')
|
177 |
param_output = gr.Textbox(label="Predicted Parameters", lines=5)
|
178 |
|
179 |
process_button.click(
|
|
|
187 |
input_youtube_url = gr.Textbox(label="Input YouTube URL")
|
188 |
reference_youtube_url = gr.Textbox(label="Reference YouTube URL")
|
189 |
with gr.Row():
|
190 |
+
input_audio_yt = gr.Audio(label="Source Audio (Do not put when using YouTube URL)")
|
191 |
+
reference_audio_yt = gr.Audio(label="Reference Style Audio (Do not put when using YouTube URL)")
|
192 |
|
193 |
process_button_yt = gr.Button("Process Mastering Style Transfer")
|
194 |
|
|
|
213 |
gr.Markdown("## Step 2: Inference Time Optimization (ITO)")
|
214 |
|
215 |
with gr.Row():
|
216 |
+
ito_reference_audio = gr.Audio(label="ITO Reference Style Audio (optional)")
|
217 |
with gr.Column():
|
218 |
num_steps = gr.Slider(minimum=1, maximum=100, value=10, step=1, label="Number of Steps")
|
219 |
optimizer = gr.Dropdown(["Adam", "RAdam", "SGD"], value="RAdam", label="Optimizer")
|
inference.py
CHANGED
@@ -179,10 +179,10 @@ class MasteringStyleTransfer:
|
|
179 |
'band2_cutoff_freq': ('High-Mid Band Frequency', 'Hz', 8000, 12000),
|
180 |
'band2_q_factor': ('High-Mid Band Q', '', 0.1, 5.0),
|
181 |
'band3_gain_db': ('High Band Gain', 'dB', -20, 20),
|
182 |
-
'band3_cutoff_freq': ('High Band Frequency', 'Hz', 12000, 20000),
|
183 |
'band3_q_factor': ('High Band Q', '', 0.1, 5.0),
|
184 |
'high_shelf_gain_db': ('High Shelf Gain', 'dB', -20, 20),
|
185 |
-
'high_shelf_cutoff_freq': ('High Shelf Cutoff', 'Hz', 4000, 20000),
|
186 |
'high_shelf_q_factor': ('High Shelf Q', '', 0.1, 5.0),
|
187 |
},
|
188 |
'distortion': {
|
@@ -194,21 +194,21 @@ class MasteringStyleTransfer:
|
|
194 |
'high_cutoff': ('Mid/High Crossover', 'Hz', 1000, 20000),
|
195 |
'parallel_weight_factor': ('Dry/Wet Mix', '%', 0, 100),
|
196 |
'low_shelf_comp_thresh': ('Low Band Comp Threshold', 'dB', -60, 0),
|
197 |
-
'low_shelf_comp_ratio': ('Low Band Comp Ratio', ':1', 1, 20),
|
198 |
'low_shelf_exp_thresh': ('Low Band Exp Threshold', 'dB', -60, 0),
|
199 |
-
'low_shelf_exp_ratio': ('Low Band Exp Ratio', ':1', 1, 20),
|
200 |
'low_shelf_at': ('Low Band Attack Time', 'ms', 5, 100),
|
201 |
'low_shelf_rt': ('Low Band Release Time', 'ms', 5, 100),
|
202 |
'mid_band_comp_thresh': ('Mid Band Comp Threshold', 'dB', -60, 0),
|
203 |
-
'mid_band_comp_ratio': ('Mid Band Comp Ratio', ':1', 1, 20),
|
204 |
'mid_band_exp_thresh': ('Mid Band Exp Threshold', 'dB', -60, 0),
|
205 |
-
'mid_band_exp_ratio': ('Mid Band Exp Ratio', ':1',
|
206 |
'mid_band_at': ('Mid Band Attack Time', 'ms', 5, 100),
|
207 |
'mid_band_rt': ('Mid Band Release Time', 'ms', 5, 100),
|
208 |
'high_shelf_comp_thresh': ('High Band Comp Threshold', 'dB', -60, 0),
|
209 |
-
'high_shelf_comp_ratio': ('High Band Comp Ratio', ':1', 1, 20),
|
210 |
'high_shelf_exp_thresh': ('High Band Exp Threshold', 'dB', -60, 0),
|
211 |
-
'high_shelf_exp_ratio': ('High Band Exp Ratio', ':1', 1, 20),
|
212 |
'high_shelf_at': ('High Band Attack Time', 'ms', 5, 100),
|
213 |
'high_shelf_rt': ('High Band Release Time', 'ms', 5, 100),
|
214 |
},
|
@@ -236,20 +236,18 @@ class MasteringStyleTransfer:
|
|
236 |
print(f"fx name: {fx_name} param_name: {param_name}")
|
237 |
if fx_name in param_mapper and param_name in param_mapper[fx_name]:
|
238 |
friendly_name, unit, min_val, max_val = param_mapper[fx_name][param_name]
|
239 |
-
if
|
240 |
-
|
241 |
-
|
242 |
-
|
243 |
-
|
244 |
-
|
245 |
else:
|
246 |
output.append(f" {param_name}: {param_value:.2f}")
|
247 |
else:
|
248 |
-
|
249 |
-
|
250 |
-
|
251 |
-
else:
|
252 |
-
output.append(f" {fx_params.item():.2f}")
|
253 |
|
254 |
return "\n".join(output)
|
255 |
|
|
|
179 |
'band2_cutoff_freq': ('High-Mid Band Frequency', 'Hz', 8000, 12000),
|
180 |
'band2_q_factor': ('High-Mid Band Q', '', 0.1, 5.0),
|
181 |
'band3_gain_db': ('High Band Gain', 'dB', -20, 20),
|
182 |
+
'band3_cutoff_freq': ('High Band Frequency', 'Hz', 12000, 20000),
|
183 |
'band3_q_factor': ('High Band Q', '', 0.1, 5.0),
|
184 |
'high_shelf_gain_db': ('High Shelf Gain', 'dB', -20, 20),
|
185 |
+
'high_shelf_cutoff_freq': ('High Shelf Cutoff', 'Hz', 4000, 20000),
|
186 |
'high_shelf_q_factor': ('High Shelf Q', '', 0.1, 5.0),
|
187 |
},
|
188 |
'distortion': {
|
|
|
194 |
'high_cutoff': ('Mid/High Crossover', 'Hz', 1000, 20000),
|
195 |
'parallel_weight_factor': ('Dry/Wet Mix', '%', 0, 100),
|
196 |
'low_shelf_comp_thresh': ('Low Band Comp Threshold', 'dB', -60, 0),
|
197 |
+
'low_shelf_comp_ratio': ('Low Band Comp Ratio', ': 1', 1, 20),
|
198 |
'low_shelf_exp_thresh': ('Low Band Exp Threshold', 'dB', -60, 0),
|
199 |
+
'low_shelf_exp_ratio': ('Low Band Exp Ratio', ': 1', 1, 20),
|
200 |
'low_shelf_at': ('Low Band Attack Time', 'ms', 5, 100),
|
201 |
'low_shelf_rt': ('Low Band Release Time', 'ms', 5, 100),
|
202 |
'mid_band_comp_thresh': ('Mid Band Comp Threshold', 'dB', -60, 0),
|
203 |
+
'mid_band_comp_ratio': ('Mid Band Comp Ratio', ': 1', 1, 20),
|
204 |
'mid_band_exp_thresh': ('Mid Band Exp Threshold', 'dB', -60, 0),
|
205 |
+
'mid_band_exp_ratio': ('Mid Band Exp Ratio', ': 1', 0, 1),
|
206 |
'mid_band_at': ('Mid Band Attack Time', 'ms', 5, 100),
|
207 |
'mid_band_rt': ('Mid Band Release Time', 'ms', 5, 100),
|
208 |
'high_shelf_comp_thresh': ('High Band Comp Threshold', 'dB', -60, 0),
|
209 |
+
'high_shelf_comp_ratio': ('High Band Comp Ratio', ': 1', 1, 20),
|
210 |
'high_shelf_exp_thresh': ('High Band Exp Threshold', 'dB', -60, 0),
|
211 |
+
'high_shelf_exp_ratio': ('High Band Exp Ratio', ': 1', 1, 20),
|
212 |
'high_shelf_at': ('High Band Attack Time', 'ms', 5, 100),
|
213 |
'high_shelf_rt': ('High Band Release Time', 'ms', 5, 100),
|
214 |
},
|
|
|
236 |
print(f"fx name: {fx_name} param_name: {param_name}")
|
237 |
if fx_name in param_mapper and param_name in param_mapper[fx_name]:
|
238 |
friendly_name, unit, min_val, max_val = param_mapper[fx_name][param_name]
|
239 |
+
if unit=='%':
|
240 |
+
param_value = param_value * 100
|
241 |
+
current_content = f" {friendly_name}: {param_value:.2f} {unit}"
|
242 |
+
if param_name=='mid_band_exp_ratio':
|
243 |
+
current_content += f" (Range: {min_val}-{max_val})"
|
244 |
+
output.append(current_content)
|
245 |
else:
|
246 |
output.append(f" {param_name}: {param_value:.2f}")
|
247 |
else:
|
248 |
+
# stereo imager
|
249 |
+
width_percentage = fx_params.item() * 200
|
250 |
+
output.append(f" Stereo Width: {width_percentage:.2f}% (Range: 0-200%)")
|
|
|
|
|
251 |
|
252 |
return "\n".join(output)
|
253 |
|
ito_snow.png
ADDED
![]() |