tee342 commited on
Commit
e15902b
Β·
verified Β·
1 Parent(s): 4d146c3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +105 -113
app.py CHANGED
@@ -16,7 +16,11 @@ from PIL import Image
16
  import zipfile
17
  import datetime
18
  import librosa
19
- import librosa.display
 
 
 
 
20
 
21
  # === Helper Functions ===
22
  def audiosegment_to_array(audio):
@@ -116,7 +120,7 @@ def stem_split(audio_path):
116
  save_track(path, sources[i].cpu(), model.samplerate)
117
  stem_paths.append(path)
118
 
119
- return stem_paths
120
 
121
  # === Preset Loader with Fallback ===
122
  def load_presets():
@@ -150,101 +154,113 @@ preset_names = list(preset_choices.keys())
150
 
151
  # === Waveform + Spectrogram Generator ===
152
  def show_waveform(audio_file):
153
- audio = AudioSegment.from_file(audio_file)
154
- samples = np.array(audio.get_array_of_samples())
155
- plt.figure(figsize=(10, 2))
156
- plt.plot(samples[:10000], color="blue")
157
- plt.axis("off")
158
- buf = BytesIO()
159
- plt.savefig(buf, format="png", bbox_inches="tight", dpi=100)
160
- plt.close()
161
- buf.seek(0)
162
- return Image.open(buf)
163
-
164
- def show_spectrogram(audio_file):
165
- y, sr = torchaudio.load(audio_file)
166
- y_np = y.numpy().flatten()
167
- S = librosa.feature.melspectrogram(y=y_np, sr=sr)
168
- plt.figure(figsize=(10, 2))
169
- librosa.display.specshow(librosa.power_to_db(S, ref=np.max), sr=sr, x_axis='time', y_axis='mel')
170
- plt.colorbar(format='%+2.0f dB')
171
- plt.title('Spectrogram')
172
- plt.tight_layout()
173
- buf = BytesIO()
174
- plt.savefig(buf, format="png", bbox_inches="tight", dpi=100)
175
- plt.close()
176
- buf.seek(0)
177
- return Image.open(buf)
178
 
179
  # === Session Info Export ===
180
- def generate_session_log(audio_path, effects, isolate_vocals, export_format):
181
  log = {
182
  "timestamp": str(datetime.datetime.now()),
183
  "filename": os.path.basename(audio_path),
184
  "effects_applied": effects,
185
  "isolate_vocals": isolate_vocals,
186
- "export_format": export_format
 
187
  }
188
  return json.dumps(log, indent=2)
189
 
190
- # === Main Processing Function ===
191
  def process_audio(audio_file, selected_effects, isolate_vocals, preset_name, export_format):
192
- audio = AudioSegment.from_file(audio_file)
193
-
194
- effect_map = {
195
- "Noise Reduction": apply_noise_reduction,
196
- "Compress Dynamic Range": apply_compression,
197
- "Add Reverb": apply_reverb,
198
- "Pitch Shift": lambda x: apply_pitch_shift(x),
199
- "Echo": apply_echo,
200
- "Stereo Widening": apply_stereo_widen,
201
- "Bass Boost": apply_bass_boost,
202
- "Treble Boost": apply_treble_boost,
203
- "Normalize": apply_normalize,
204
- }
205
-
206
- effects_to_apply = preset_choices.get(preset_name, selected_effects)
207
- for effect_name in effects_to_apply:
208
- if effect_name in effect_map:
209
- audio = effect_map[effect_name](audio)
210
-
211
- with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as f:
212
- if isolate_vocals:
213
- temp_input = os.path.join(tempfile.gettempdir(), "input.wav")
214
- audio.export(temp_input, format="wav")
215
- vocal_path = apply_vocal_isolation(temp_input)
216
- final_audio = AudioSegment.from_wav(vocal_path)
217
- else:
218
- final_audio = audio
219
-
220
- output_path = f.name
221
- final_audio.export(output_path, format=export_format.lower())
222
-
223
- waveform_image = show_waveform(output_path)
224
- spectrogram_image = show_spectrogram(output_path)
225
- session_log = generate_session_log(audio_file, effects_to_apply, isolate_vocals, export_format)
226
-
227
- return output_path, waveform_image, spectrogram_image, session_log
 
 
 
 
 
 
 
 
 
228
 
229
  # === Batch Processing Function ===
230
  def batch_process_audio(files, selected_effects, isolate_vocals, preset_name, export_format):
231
- output_dir = tempfile.mkdtemp()
232
- results = []
233
- session_logs = []
 
 
 
 
 
 
 
234
 
235
- for file in files:
236
- processed_path, _, _, log = process_audio(file.name, selected_effects, isolate_vocals, preset_name, export_format)
237
- results.append(processed_path)
238
- session_logs.append(log)
 
 
239
 
240
- zip_path = os.path.join(output_dir, "batch_output.zip")
241
- with zipfile.ZipFile(zip_path, 'w') as zipf:
242
- for i, res in enumerate(results):
243
- filename = f"processed_{i}.{export_format.lower()}"
244
- zipf.write(res, filename)
245
- zipf.writestr(f"session_info_{i}.json", session_logs[i])
246
 
247
- return zip_path
 
248
 
249
  # === Gradio Interface Setup ===
250
  effect_options = [
@@ -266,7 +282,6 @@ with gr.Blocks(title="AI Audio Studio", css="style.css") as demo:
266
  Upload, edit, and export audio with AI-powered tools.
267
  """)
268
 
269
- # ----- Single File Studio Tab -----
270
  with gr.Tab("🎡 Single File Studio"):
271
  gr.Interface(
272
  fn=process_audio,
@@ -280,17 +295,17 @@ with gr.Blocks(title="AI Audio Studio", css="style.css") as demo:
280
  outputs=[
281
  gr.Audio(label="Processed Audio", type="filepath"),
282
  gr.Image(label="Waveform Preview"),
283
- gr.Image(label="Spectrogram View"),
284
- gr.Textbox(label="Session Log (JSON)", lines=5)
 
285
  ],
286
  title="Edit One File at a Time",
287
- description="Apply effects, preview waveform and spectrogram, and get full session log.",
288
  flagging_mode="never",
289
  submit_btn="Process Audio",
290
  clear_btn=None
291
  )
292
 
293
- # ----- Batch Processing Tab -----
294
  with gr.Tab("πŸ”Š Batch Processing"):
295
  gr.Interface(
296
  fn=batch_process_audio,
@@ -301,7 +316,10 @@ with gr.Blocks(title="AI Audio Studio", css="style.css") as demo:
301
  gr.Dropdown(choices=preset_names, label="Select Preset", value=preset_names[0] if preset_names else None),
302
  gr.Dropdown(choices=["MP3", "WAV"], label="Export Format", value="MP3")
303
  ],
304
- outputs=gr.File(label="Download ZIP of All Processed Files"),
 
 
 
305
  title="Batch Audio Processor",
306
  description="Upload multiple files, apply effects in bulk, and download all results in a single ZIP.",
307
  flagging_mode="never",
@@ -309,14 +327,9 @@ with gr.Blocks(title="AI Audio Studio", css="style.css") as demo:
309
  clear_btn=None
310
  )
311
 
312
- # ----- Remix Mode Tab -----
313
  with gr.Tab("πŸŽ› Remix Mode (Split Stems)"):
314
- def remix_mode(audio_file):
315
- stems = stem_split(audio_file.name)
316
- return [gr.File(value=stem) for stem in stems]
317
-
318
  gr.Interface(
319
- fn=remix_mode,
320
  inputs=gr.Audio(label="Upload Music Track", type="filepath"),
321
  outputs=[
322
  gr.File(label="Vocals"),
@@ -330,25 +343,4 @@ with gr.Blocks(title="AI Audio Studio", css="style.css") as demo:
330
  clear_btn=None
331
  )
332
 
333
- # ----- Session Info Tab -----
334
- with gr.Tab("πŸ“ Session Info"):
335
- def get_session_info(audio_file, selected_effects, isolate_vocals, preset_name, export_format):
336
- return generate_session_log(audio_file, selected_effects, isolate_vocals, export_format)
337
-
338
- gr.Interface(
339
- fn=get_session_info,
340
- inputs=[
341
- gr.Audio(label="Upload Audio", type="filepath"),
342
- gr.CheckboxGroup(choices=effect_options, label="Apply Effects in Order"),
343
- gr.Checkbox(label="Isolate Vocals After Effects"),
344
- gr.Dropdown(choices=preset_names, label="Select Preset", value=preset_names[0] if preset_names else None),
345
- gr.Dropdown(choices=["MP3", "WAV"], label="Export Format", value="MP3")
346
- ],
347
- outputs=gr.Textbox(label="Your Session Info (Copy or Save This)", lines=10),
348
- title="Save Your Session Settings",
349
- description="Get a full log of what was done to your track.",
350
- flagging_mode="never",
351
- clear_btn=None
352
- )
353
-
354
  demo.launch()
 
16
  import zipfile
17
  import datetime
18
  import librosa
19
+ import joblib
20
+ import warnings
21
+
22
+ # Suppress warnings for cleaner logs
23
+ warnings.filterwarnings("ignore")
24
 
25
  # === Helper Functions ===
26
  def audiosegment_to_array(audio):
 
120
  save_track(path, sources[i].cpu(), model.samplerate)
121
  stem_paths.append(path)
122
 
123
+ return [gr.File(value=path) for path in stem_paths]
124
 
125
  # === Preset Loader with Fallback ===
126
  def load_presets():
 
154
 
155
  # === Waveform + Spectrogram Generator ===
156
  def show_waveform(audio_file):
157
+ try:
158
+ audio = AudioSegment.from_file(audio_file)
159
+ samples = np.array(audio.get_array_of_samples())
160
+ plt.figure(figsize=(10, 2))
161
+ plt.plot(samples[:10000], color="blue")
162
+ plt.axis("off")
163
+ buf = BytesIO()
164
+ plt.savefig(buf, format="png", bbox_inches="tight", dpi=100)
165
+ plt.close()
166
+ buf.seek(0)
167
+ return Image.open(buf)
168
+ except Exception as e:
169
+ return None
170
+
171
+ def detect_genre(audio_path):
172
+ try:
173
+ y, sr = torchaudio.load(audio_path)
174
+ mfccs = librosa.feature.mfcc(y=y.numpy().flatten(), sr=sr, n_mfcc=13).mean(axis=1).reshape(1, -1)
175
+ # Dummy classifier – replace with real one later
176
+ return "Speech"
177
+ except Exception:
178
+ return "Unknown"
 
 
 
179
 
180
  # === Session Info Export ===
181
+ def generate_session_log(audio_path, effects, isolate_vocals, export_format, genre):
182
  log = {
183
  "timestamp": str(datetime.datetime.now()),
184
  "filename": os.path.basename(audio_path),
185
  "effects_applied": effects,
186
  "isolate_vocals": isolate_vocals,
187
+ "export_format": export_format,
188
+ "detected_genre": genre
189
  }
190
  return json.dumps(log, indent=2)
191
 
192
+ # === Main Processing Function with Status Updates ===
193
  def process_audio(audio_file, selected_effects, isolate_vocals, preset_name, export_format):
194
+ status = "πŸ”Š Loading audio..."
195
+ try:
196
+ audio = AudioSegment.from_file(audio_file)
197
+ status = "πŸ›  Applying effects..."
198
+
199
+ effect_map = {
200
+ "Noise Reduction": apply_noise_reduction,
201
+ "Compress Dynamic Range": apply_compression,
202
+ "Add Reverb": apply_reverb,
203
+ "Pitch Shift": lambda x: apply_pitch_shift(x),
204
+ "Echo": apply_echo,
205
+ "Stereo Widening": apply_stereo_widen,
206
+ "Bass Boost": apply_bass_boost,
207
+ "Treble Boost": apply_treble_boost,
208
+ "Normalize": apply_normalize,
209
+ }
210
+
211
+ effects_to_apply = preset_choices.get(preset_name, selected_effects)
212
+ for effect_name in effects_to_apply:
213
+ if effect_name in effect_map:
214
+ audio = effect_map[effect_name](audio)
215
+
216
+ status = "πŸ’Ύ Saving final audio..."
217
+ with tempfile.NamedTemporaryFile(delete=True, suffix=".wav") as f:
218
+ if isolate_vocals:
219
+ temp_input = os.path.join(tempfile.gettempdir(), "input.wav")
220
+ audio.export(temp_input, format="wav")
221
+ vocal_path = apply_vocal_isolation(temp_input)
222
+ final_audio = AudioSegment.from_wav(vocal_path)
223
+ else:
224
+ final_audio = audio
225
+
226
+ output_path = f.name
227
+ final_audio.export(output_path, format=export_format.lower())
228
+
229
+ waveform_image = show_waveform(output_path)
230
+ genre = detect_genre(output_path)
231
+ session_log = generate_session_log(audio_file, effects_to_apply, isolate_vocals, export_format, genre)
232
+
233
+ status = "πŸŽ‰ Done!"
234
+ return output_path, waveform_image, session_log, genre, status
235
+
236
+ except Exception as e:
237
+ status = f"❌ Error: {str(e)}"
238
+ return None, None, status, "", status
239
 
240
  # === Batch Processing Function ===
241
  def batch_process_audio(files, selected_effects, isolate_vocals, preset_name, export_format):
242
+ status = "πŸ”Š Loading files..."
243
+ try:
244
+ output_dir = tempfile.mkdtemp()
245
+ results = []
246
+ session_logs = []
247
+
248
+ for file in files:
249
+ processed_path, _, log, _, _ = process_audio(file.name, selected_effects, isolate_vocals, preset_name, export_format)
250
+ results.append(processed_path)
251
+ session_logs.append(log)
252
 
253
+ zip_path = os.path.join(output_dir, "batch_output.zip")
254
+ with zipfile.ZipFile(zip_path, 'w') as zipf:
255
+ for i, res in enumerate(results):
256
+ filename = f"processed_{i}.{export_format.lower()}"
257
+ zipf.write(res, filename)
258
+ zipf.writestr(f"session_info_{i}.json", session_logs[i])
259
 
260
+ return zip_path, "πŸ“¦ ZIP created successfully!"
 
 
 
 
 
261
 
262
+ except Exception as e:
263
+ return None, f"❌ Batch processing failed: {str(e)}"
264
 
265
  # === Gradio Interface Setup ===
266
  effect_options = [
 
282
  Upload, edit, and export audio with AI-powered tools.
283
  """)
284
 
 
285
  with gr.Tab("🎡 Single File Studio"):
286
  gr.Interface(
287
  fn=process_audio,
 
295
  outputs=[
296
  gr.Audio(label="Processed Audio", type="filepath"),
297
  gr.Image(label="Waveform Preview"),
298
+ gr.Textbox(label="Session Log (JSON)", lines=5),
299
+ gr.Textbox(label="Detected Genre", lines=1),
300
+ gr.Textbox(label="Status", value="βœ… Ready", lines=1)
301
  ],
302
  title="Edit One File at a Time",
303
+ description="Apply effects, preview waveform, and get full session log.",
304
  flagging_mode="never",
305
  submit_btn="Process Audio",
306
  clear_btn=None
307
  )
308
 
 
309
  with gr.Tab("πŸ”Š Batch Processing"):
310
  gr.Interface(
311
  fn=batch_process_audio,
 
316
  gr.Dropdown(choices=preset_names, label="Select Preset", value=preset_names[0] if preset_names else None),
317
  gr.Dropdown(choices=["MP3", "WAV"], label="Export Format", value="MP3")
318
  ],
319
+ outputs=[
320
+ gr.File(label="Download ZIP of All Processed Files"),
321
+ gr.Textbox(label="Status", value="βœ… Ready", lines=1)
322
+ ],
323
  title="Batch Audio Processor",
324
  description="Upload multiple files, apply effects in bulk, and download all results in a single ZIP.",
325
  flagging_mode="never",
 
327
  clear_btn=None
328
  )
329
 
 
330
  with gr.Tab("πŸŽ› Remix Mode (Split Stems)"):
 
 
 
 
331
  gr.Interface(
332
+ fn=stem_split,
333
  inputs=gr.Audio(label="Upload Music Track", type="filepath"),
334
  outputs=[
335
  gr.File(label="Vocals"),
 
343
  clear_btn=None
344
  )
345
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
346
  demo.launch()