Spaces:
Sleeping
Sleeping
modify app
Browse files
app.py
CHANGED
@@ -113,6 +113,7 @@ def perform_ito(input_audio, reference_audio, ito_reference_audio, num_steps, op
|
|
113 |
|
114 |
ito_log = ""
|
115 |
loss_values = []
|
|
|
116 |
for log_entry, current_output, current_params, step, loss in mastering_transfer.inference_time_optimization(
|
117 |
input_tensor, ito_reference_tensor, ito_config, initial_reference_feature
|
118 |
):
|
@@ -123,7 +124,7 @@ def perform_ito(input_audio, reference_audio, ito_reference_audio, num_steps, op
|
|
123 |
# Convert current_output to numpy array if it's a tensor
|
124 |
if isinstance(current_output, torch.Tensor):
|
125 |
current_output = current_output.cpu().numpy()
|
126 |
-
|
127 |
if current_output.ndim == 1:
|
128 |
current_output = current_output.reshape(-1, 1)
|
129 |
elif current_output.ndim > 2:
|
@@ -137,28 +138,20 @@ def perform_ito(input_audio, reference_audio, ito_reference_audio, num_steps, op
|
|
137 |
# Denormalize the audio to int16
|
138 |
current_output = denormalize_audio(current_output, dtype=np.int16)
|
139 |
|
140 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
141 |
|
142 |
-
|
143 |
-
af_weights = [float(w.strip()) for w in af_weights.split(',')]
|
144 |
-
ito_generator = mastering_transfer.inference_time_optimization(
|
145 |
-
input_audio, reference_audio, ito_reference_audio, num_steps, optimizer, learning_rate, af_weights
|
146 |
-
)
|
147 |
-
|
148 |
-
all_results = []
|
149 |
-
for result in ito_generator:
|
150 |
-
all_results.append(result)
|
151 |
-
|
152 |
-
min_loss_step = min(range(len(all_results)), key=lambda i: all_results[i]['loss'])
|
153 |
-
|
154 |
-
loss_df = pd.DataFrame([(r['step'], r['loss']) for r in all_results], columns=['step', 'loss'])
|
155 |
-
|
156 |
-
return all_results, min_loss_step, loss_df
|
157 |
|
158 |
def update_ito_output(all_results, selected_step):
|
159 |
print(all_results)
|
160 |
print(selected_step)
|
161 |
-
selected_result = all_results[selected_step]
|
162 |
return (args.sample_rate, selected_result['audio']), selected_result['params'], selected_result['log']
|
163 |
|
164 |
|
@@ -187,11 +180,11 @@ with gr.Blocks() as demo:
|
|
187 |
|
188 |
with gr.Tab("YouTube Audio"):
|
189 |
with gr.Row():
|
190 |
-
|
191 |
-
|
192 |
with gr.Row():
|
193 |
-
|
194 |
-
|
195 |
|
196 |
process_button_yt = gr.Button("Process Mastering Style Transfer")
|
197 |
|
@@ -251,9 +244,9 @@ with gr.Blocks() as demo:
|
|
251 |
return loss_df, gr.update(maximum=len(results), value=min_step+1)
|
252 |
|
253 |
ito_button.click(
|
254 |
-
|
255 |
inputs=[input_audio, reference_audio, ito_reference_audio, num_steps, optimizer, learning_rate, af_weights],
|
256 |
-
outputs=[
|
257 |
).then(
|
258 |
update_ito_output,
|
259 |
inputs=[all_results, ito_step_slider],
|
|
|
113 |
|
114 |
ito_log = ""
|
115 |
loss_values = []
|
116 |
+
all_results = []
|
117 |
for log_entry, current_output, current_params, step, loss in mastering_transfer.inference_time_optimization(
|
118 |
input_tensor, ito_reference_tensor, ito_config, initial_reference_feature
|
119 |
):
|
|
|
124 |
# Convert current_output to numpy array if it's a tensor
|
125 |
if isinstance(current_output, torch.Tensor):
|
126 |
current_output = current_output.cpu().numpy()
|
127 |
+
|
128 |
if current_output.ndim == 1:
|
129 |
current_output = current_output.reshape(-1, 1)
|
130 |
elif current_output.ndim > 2:
|
|
|
138 |
# Denormalize the audio to int16
|
139 |
current_output = denormalize_audio(current_output, dtype=np.int16)
|
140 |
|
141 |
+
all_results.append({
|
142 |
+
'step': step,
|
143 |
+
'audio': current_output,
|
144 |
+
'params': ito_param_output,
|
145 |
+
'log': log_entry,
|
146 |
+
'loss': loss
|
147 |
+
})
|
148 |
|
149 |
+
yield (args.sample_rate, current_output), ito_param_output, step, ito_log, pd.DataFrame(loss_values), all_results
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
150 |
|
151 |
def update_ito_output(all_results, selected_step):
|
152 |
print(all_results)
|
153 |
print(selected_step)
|
154 |
+
selected_result = all_results[selected_step - 1]
|
155 |
return (args.sample_rate, selected_result['audio']), selected_result['params'], selected_result['log']
|
156 |
|
157 |
|
|
|
180 |
|
181 |
with gr.Tab("YouTube Audio"):
|
182 |
with gr.Row():
|
183 |
+
input_youtube_url = gr.Textbox(label="Input YouTube URL")
|
184 |
+
reference_youtube_url = gr.Textbox(label="Reference YouTube URL")
|
185 |
with gr.Row():
|
186 |
+
input_audio_yt = gr.Audio(label="Input Audio (Do not put when using YouTube URL)")
|
187 |
+
reference_audio_yt = gr.Audio(label="Reference Audio (Do not put when using YouTube URL)")
|
188 |
|
189 |
process_button_yt = gr.Button("Process Mastering Style Transfer")
|
190 |
|
|
|
244 |
return loss_df, gr.update(maximum=len(results), value=min_step+1)
|
245 |
|
246 |
ito_button.click(
|
247 |
+
perform_ito,
|
248 |
inputs=[input_audio, reference_audio, ito_reference_audio, num_steps, optimizer, learning_rate, af_weights],
|
249 |
+
outputs=[ito_output_audio, ito_param_output, ito_step_slider, ito_log, ito_loss_plot, all_results]
|
250 |
).then(
|
251 |
update_ito_output,
|
252 |
inputs=[all_results, ito_step_slider],
|