jhtonyKoo commited on
Commit
f5db6d5
1 Parent(s): 6838a44

modify app

Browse files
Files changed (2) hide show
  1. app.py +40 -30
  2. inference.py +72 -23
app.py CHANGED
@@ -139,11 +139,31 @@ def perform_ito(input_audio, reference_audio, ito_reference_audio, num_steps, op
139
 
140
  yield (args.sample_rate, current_output), ito_param_output, step, ito_log, pd.DataFrame(loss_values)
141
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
142
 
143
- """ APP display """
144
- gr.Markdown("# ITO-Master: Inference Time Optimization for Mastering Style Transfer")
 
145
 
 
 
146
  with gr.Blocks() as demo:
 
 
147
  gr.Markdown("# Step 1: Mastering Style Transfer")
148
 
149
  with gr.Tab("Upload Audio"):
@@ -207,6 +227,7 @@ with gr.Blocks() as demo:
207
  with gr.Column():
208
  ito_output_audio = gr.Audio(label="ITO Output Audio")
209
  ito_param_output = gr.Textbox(label="ITO Predicted Parameters", lines=15)
 
210
  with gr.Column():
211
  ito_loss_plot = gr.LinePlot(
212
  x="step",
@@ -219,39 +240,28 @@ with gr.Blocks() as demo:
219
  )
220
  ito_log = gr.Textbox(label="ITO Log", lines=10)
221
 
222
- def run_ito(input_audio, reference_audio, ito_reference_audio, num_steps, optimizer, learning_rate, af_weights):
223
- af_weights = [float(w.strip()) for w in af_weights.split(',')]
224
- ito_generator = perform_ito(
225
- input_audio, reference_audio, ito_reference_audio, num_steps, optimizer, learning_rate, af_weights
226
- )
227
-
228
- # Initialize variables to store the final results
229
- final_audio = None
230
- final_params = None
231
- final_steps = 0
232
- final_log = ""
233
- loss_df = None
234
-
235
- # Iterate through the generator to get the final results
236
- for audio, params, steps, log, loss_data in ito_generator:
237
- final_audio = audio
238
- final_params = params
239
- final_steps = steps
240
- final_log = log
241
- loss_df = loss_data
242
-
243
- # Calculate y_min and y_max
244
- y_min = loss_df['loss'].min()
245
- y_max = loss_df['loss'].max()
246
-
247
- # Return the plot configuration along with the data
248
- return final_audio, final_params, final_log, loss_df
249
 
 
 
 
 
250
 
251
  ito_button.click(
252
  run_ito,
253
  inputs=[input_audio, reference_audio, ito_reference_audio, num_steps, optimizer, learning_rate, af_weights],
254
- outputs=[ito_output_audio, ito_param_output, ito_log, ito_loss_plot]
 
 
 
 
 
 
 
 
 
 
255
  )
256
 
257
  demo.launch()
 
139
 
140
  yield (args.sample_rate, current_output), ito_param_output, step, ito_log, pd.DataFrame(loss_values)
141
 
142
+ def run_ito(input_audio, reference_audio, ito_reference_audio, num_steps, optimizer, learning_rate, af_weights):
143
+ af_weights = [float(w.strip()) for w in af_weights.split(',')]
144
+ ito_generator = mastering_transfer.inference_time_optimization(
145
+ input_audio, reference_audio, ito_reference_audio, num_steps, optimizer, learning_rate, af_weights
146
+ )
147
+
148
+ all_results = []
149
+ for result in ito_generator:
150
+ all_results.append(result)
151
+
152
+ min_loss_step = min(range(len(all_results)), key=lambda i: all_results[i]['loss'])
153
+
154
+ loss_df = pd.DataFrame([(r['step'], r['loss']) for r in all_results], columns=['step', 'loss'])
155
+
156
+ return all_results, min_loss_step, loss_df
157
 
158
+ def update_ito_output(all_results, selected_step):
159
+ selected_result = all_results[selected_step]
160
+ return (args.sample_rate, selected_result['audio']), selected_result['params'], selected_result['log']
161
 
162
+
163
+ """ APP display """
164
  with gr.Blocks() as demo:
165
+ gr.Markdown("# ITO-Master: Inference Time Optimization for Mastering Style Transfer")
166
+
167
  gr.Markdown("# Step 1: Mastering Style Transfer")
168
 
169
  with gr.Tab("Upload Audio"):
 
227
  with gr.Column():
228
  ito_output_audio = gr.Audio(label="ITO Output Audio")
229
  ito_param_output = gr.Textbox(label="ITO Predicted Parameters", lines=15)
230
+ ito_step_slider = gr.Slider(minimum=1, maximum=100, step=1, label="ITO Step", interactive=True)
231
  with gr.Column():
232
  ito_loss_plot = gr.LinePlot(
233
  x="step",
 
240
  )
241
  ito_log = gr.Textbox(label="ITO Log", lines=10)
242
 
243
+ all_results = gr.State([])
244
+ min_loss_step = gr.State(0)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
245
 
246
+ def on_ito_complete(results, min_step, loss_df):
247
+ all_results.value = results
248
+ min_loss_step.value = min_step
249
+ return loss_df, gr.update(maximum=len(results), value=min_step+1)
250
 
251
  ito_button.click(
252
  run_ito,
253
  inputs=[input_audio, reference_audio, ito_reference_audio, num_steps, optimizer, learning_rate, af_weights],
254
+ outputs=[all_results, min_loss_step, ito_loss_plot, ito_step_slider]
255
+ ).then(
256
+ update_ito_output,
257
+ inputs=[all_results, ito_step_slider],
258
+ outputs=[ito_output_audio, ito_param_output, ito_log]
259
+ )
260
+
261
+ ito_step_slider.change(
262
+ update_ito_output,
263
+ inputs=[all_results, ito_step_slider],
264
+ outputs=[ito_output_audio, ito_param_output, ito_log]
265
  )
266
 
267
  demo.launch()
inference.py CHANGED
@@ -60,6 +60,68 @@ class MasteringStyleTransfer:
60
  predicted_params = self.mastering_converter.get_last_predicted_params()
61
  return output_audio, predicted_params
62
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
63
  def inference_time_optimization(self, input_tensor, reference_tensor, ito_config, initial_reference_feature):
64
  fit_embedding = torch.nn.Parameter(initial_reference_feature)
65
  optimizer = getattr(torch.optim, ito_config['optimizer'])([fit_embedding], lr=ito_config['learning_rate'])
@@ -73,13 +135,7 @@ class MasteringStyleTransfer:
73
 
74
  min_loss = float('inf')
75
  min_loss_step = 0
76
- min_loss_output = None
77
- min_loss_params = None
78
- min_loss_embedding = None
79
-
80
- loss_history = []
81
- divergence_counter = 0
82
- ito_log = []
83
 
84
  for step in range(ito_config['num_steps']):
85
  optimizer.zero_grad()
@@ -90,20 +146,9 @@ class MasteringStyleTransfer:
90
  losses = af_loss(output_audio, reference_tensor)
91
  total_loss = sum(losses.values())
92
 
93
- loss_history.append(total_loss.item())
94
-
95
  if total_loss < min_loss:
96
  min_loss = total_loss.item()
97
  min_loss_step = step
98
- min_loss_output = output_audio.detach()
99
- min_loss_params = current_params
100
- min_loss_embedding = fit_embedding.detach().clone()
101
-
102
- # Check for divergence
103
- if len(loss_history) > 10 and total_loss > loss_history[-11]:
104
- divergence_counter += 1
105
- else:
106
- divergence_counter = 0
107
 
108
  # Log top 5 parameter differences
109
  if step == 0:
@@ -111,16 +156,20 @@ class MasteringStyleTransfer:
111
  top_5_diff = self.get_top_n_diff_string(initial_params, current_params, top_n=5)
112
  log_entry = f"Step {step + 1}\n Loss: {total_loss.item():.4f}\n{top_5_diff}\n"
113
 
114
- if divergence_counter >= 10:
115
- print(f"Optimization stopped early due to divergence at step {step}")
116
- break
 
 
 
 
117
 
118
  total_loss.backward()
119
  optimizer.step()
120
 
121
- yield log_entry, output_audio.detach(), current_params, step + 1, total_loss.item()
122
 
123
- return min_loss_output, min_loss_params, min_loss_embedding, min_loss_step + 1
124
 
125
  def preprocess_audio(self, audio, target_sample_rate=44100):
126
  sample_rate, data = audio
 
60
  predicted_params = self.mastering_converter.get_last_predicted_params()
61
  return output_audio, predicted_params
62
 
63
+ # def inference_time_optimization(self, input_tensor, reference_tensor, ito_config, initial_reference_feature):
64
+ # fit_embedding = torch.nn.Parameter(initial_reference_feature)
65
+ # optimizer = getattr(torch.optim, ito_config['optimizer'])([fit_embedding], lr=ito_config['learning_rate'])
66
+
67
+ # af_loss = AudioFeatureLoss(
68
+ # weights=ito_config['af_weights'],
69
+ # sample_rate=ito_config['sample_rate'],
70
+ # stem_separation=False,
71
+ # use_clap=False
72
+ # )
73
+
74
+ # min_loss = float('inf')
75
+ # min_loss_step = 0
76
+ # min_loss_output = None
77
+ # min_loss_params = None
78
+ # min_loss_embedding = None
79
+
80
+ # loss_history = []
81
+ # divergence_counter = 0
82
+ # ito_log = []
83
+
84
+ # for step in range(ito_config['num_steps']):
85
+ # optimizer.zero_grad()
86
+
87
+ # output_audio = self.mastering_converter(input_tensor, fit_embedding)
88
+ # current_params = self.mastering_converter.get_last_predicted_params()
89
+
90
+ # losses = af_loss(output_audio, reference_tensor)
91
+ # total_loss = sum(losses.values())
92
+
93
+ # loss_history.append(total_loss.item())
94
+
95
+ # if total_loss < min_loss:
96
+ # min_loss = total_loss.item()
97
+ # min_loss_step = step
98
+ # min_loss_output = output_audio.detach()
99
+ # min_loss_params = current_params
100
+ # min_loss_embedding = fit_embedding.detach().clone()
101
+
102
+ # # Check for divergence
103
+ # if len(loss_history) > 10 and total_loss > loss_history[-11]:
104
+ # divergence_counter += 1
105
+ # else:
106
+ # divergence_counter = 0
107
+
108
+ # # Log top 5 parameter differences
109
+ # if step == 0:
110
+ # initial_params = current_params
111
+ # top_5_diff = self.get_top_n_diff_string(initial_params, current_params, top_n=5)
112
+ # log_entry = f"Step {step + 1}\n Loss: {total_loss.item():.4f}\n{top_5_diff}\n"
113
+
114
+ # if divergence_counter >= 10:
115
+ # print(f"Optimization stopped early due to divergence at step {step}")
116
+ # break
117
+
118
+ # total_loss.backward()
119
+ # optimizer.step()
120
+
121
+ # yield log_entry, output_audio.detach(), current_params, step + 1, total_loss.item()
122
+
123
+ # return min_loss_output, min_loss_params, min_loss_embedding, min_loss_step + 1
124
+
125
  def inference_time_optimization(self, input_tensor, reference_tensor, ito_config, initial_reference_feature):
126
  fit_embedding = torch.nn.Parameter(initial_reference_feature)
127
  optimizer = getattr(torch.optim, ito_config['optimizer'])([fit_embedding], lr=ito_config['learning_rate'])
 
135
 
136
  min_loss = float('inf')
137
  min_loss_step = 0
138
+ all_results = []
 
 
 
 
 
 
139
 
140
  for step in range(ito_config['num_steps']):
141
  optimizer.zero_grad()
 
146
  losses = af_loss(output_audio, reference_tensor)
147
  total_loss = sum(losses.values())
148
 
 
 
149
  if total_loss < min_loss:
150
  min_loss = total_loss.item()
151
  min_loss_step = step
 
 
 
 
 
 
 
 
 
152
 
153
  # Log top 5 parameter differences
154
  if step == 0:
 
156
  top_5_diff = self.get_top_n_diff_string(initial_params, current_params, top_n=5)
157
  log_entry = f"Step {step + 1}\n Loss: {total_loss.item():.4f}\n{top_5_diff}\n"
158
 
159
+ all_results.append({
160
+ 'step': step + 1,
161
+ 'loss': total_loss.item(),
162
+ 'audio': output_audio.detach(),
163
+ 'params': current_params,
164
+ 'log': log_entry
165
+ })
166
 
167
  total_loss.backward()
168
  optimizer.step()
169
 
170
+ yield all_results[-1]
171
 
172
+ return all_results, min_loss_step
173
 
174
  def preprocess_audio(self, audio, target_sample_rate=44100):
175
  sample_rate, data = audio