tee342 commited on
Commit
f1f4b0d
·
verified ·
1 Parent(s): dd50fcd

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +525 -499
app.py CHANGED
@@ -1,5 +1,6 @@
1
  import gradio as gr
2
  from pydub import AudioSegment
 
3
  import numpy as np
4
  import tempfile
5
  import os
@@ -16,6 +17,7 @@ import zipfile
16
  import datetime
17
  import librosa
18
  import warnings
 
19
  from TTS.api import TTS
20
  import base64
21
  import pickle
@@ -25,47 +27,14 @@ import soundfile as sf
25
  print("Gradio version:", gr.__version__)
26
  warnings.filterwarnings("ignore")
27
 
28
- # === Utility functions ===
29
-
30
- def audiosegment_to_array(audio):
31
- return np.array(audio.get_array_of_samples()), audio.frame_rate
32
-
33
- def array_to_audiosegment(samples, frame_rate, channels=1):
34
- return AudioSegment(
35
- samples.tobytes(),
36
- frame_rate=int(frame_rate),
37
- sample_width=samples.dtype.itemsize,
38
- channels=channels
39
- )
40
-
41
- def save_audiosegment_to_temp(audio: AudioSegment, suffix=".wav"):
42
- tmp_file = tempfile.NamedTemporaryFile(delete=False, suffix=suffix)
43
- audio.export(tmp_file.name, format=suffix.lstrip('.'))
44
- return tmp_file.name
45
-
46
- def load_audiofile_to_numpy(path):
47
- samples, sr = sf.read(path, dtype="int16")
48
- if samples.ndim > 1 and samples.shape[1] > 2:
49
- samples = samples[:, :2] # limit to 2 channels max
50
- return samples, sr
51
-
52
- def show_waveform(audio_file):
53
- try:
54
- audio = AudioSegment.from_file(audio_file)
55
- samples = np.array(audio.get_array_of_samples())
56
- plt.figure(figsize=(10, 2))
57
- plt.plot(samples[:10000], color="skyblue")
58
- plt.axis("off")
59
- buf = BytesIO()
60
- plt.savefig(buf, format="png", bbox_inches="tight", dpi=100)
61
- plt.close()
62
- buf.seek(0)
63
- return Image.open(buf)
64
- except Exception:
65
- return None
66
-
67
- # === Effects functions ===
68
 
 
69
  def apply_normalize(audio):
70
  return audio.normalize()
71
 
@@ -75,7 +44,6 @@ def apply_noise_reduction(audio):
75
  return array_to_audiosegment(reduced, frame_rate, channels=audio.channels)
76
 
77
  def apply_compression(audio):
78
- # Simplified placeholder; real compression requires audio processing package
79
  return audio.compress_dynamic_range()
80
 
81
  def apply_reverb(audio):
@@ -83,10 +51,10 @@ def apply_reverb(audio):
83
  return audio.overlay(reverb, position=1000)
84
 
85
  def apply_pitch_shift(audio, semitones=-2):
86
- # Use pydub.frame_rate trick for pitch shift
87
  new_frame_rate = int(audio.frame_rate * (2 ** (semitones / 12)))
88
- shifted = audio._spawn(audio.raw_data, overrides={"frame_rate": new_frame_rate}).set_frame_rate(audio.frame_rate)
89
- return shifted
 
90
 
91
  def apply_echo(audio, delay_ms=500, decay=0.5):
92
  echo = audio - 10
@@ -128,15 +96,36 @@ def apply_stage_mode(audio):
128
 
129
  def apply_bitcrush(audio, bit_depth=8):
130
  samples = np.array(audio.get_array_of_samples())
131
- max_val = 2 ** bit_depth - 1
132
  downsampled = np.round(samples / (32768 / max_val)).astype(np.int16)
133
  return array_to_audiosegment(downsampled, audio.frame_rate // 2, channels=audio.channels)
134
 
135
- # === Loudness Matching (EBU R128) ===
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
136
 
 
137
  try:
138
  import pyloudnorm as pyln
139
  except ImportError:
 
140
  import subprocess
141
  subprocess.run(["pip", "install", "pyloudnorm"])
142
  import pyloudnorm as pyln
@@ -148,9 +137,10 @@ def match_loudness(audio_path, target_lufs=-14.0):
148
  loudness = meter.integrated_loudness(samples)
149
  gain_db = target_lufs - loudness
150
  adjusted = wav + gain_db
151
- out_path = save_audiosegment_to_temp(adjusted, suffix=".wav")
152
  return out_path
153
 
 
154
  def auto_eq(audio, genre="Pop"):
155
  eq_map = {
156
  "Pop": [(200, 500, -3), (2000, 4000, +4)],
@@ -175,7 +165,6 @@ def auto_eq(audio, genre="Pop"):
175
  "Default": []
176
  }
177
  from scipy.signal import butter, sosfilt
178
-
179
  def band_eq(samples, sr, lowcut, highcut, gain):
180
  sos = butter(10, [lowcut, highcut], btype='band', output='sos', fs=sr)
181
  filtered = sosfilt(sos, samples)
@@ -188,8 +177,7 @@ def auto_eq(audio, genre="Pop"):
188
  samples = band_eq(samples, sr, low, high, gain)
189
  return array_to_audiosegment(samples.astype(np.int16), sr, channels=audio.channels)
190
 
191
- # === Load and save track for models ===
192
-
193
  def load_track_local(path, sample_rate, channels=2):
194
  sig, rate = torchaudio.load(path)
195
  if rate != sample_rate:
@@ -202,8 +190,6 @@ def save_track(path, wav, sample_rate):
202
  path = Path(path)
203
  torchaudio.save(str(path), wav, sample_rate)
204
 
205
- # === Vocal isolation ===
206
-
207
  def apply_vocal_isolation(audio_path):
208
  model = pretrained.get_model(name='htdemucs')
209
  wav = load_track_local(audio_path, model.samplerate, channels=2)
@@ -216,26 +202,24 @@ def apply_vocal_isolation(audio_path):
216
  save_track(out_path, vocal_track, model.samplerate)
217
  return out_path
218
 
219
- # === Stem splitting ===
220
-
221
  def stem_split(audio_path):
222
  model = pretrained.get_model(name='htdemucs')
223
  wav = load_track_local(audio_path, model.samplerate, channels=2)
224
  sources = apply_model(model, wav[None])[0]
225
  output_dir = tempfile.mkdtemp()
226
- file_paths = []
227
  for i, name in enumerate(['drums', 'bass', 'other', 'vocals']):
228
  path = os.path.join(output_dir, f"{name}.wav")
229
  save_track(path, sources[i].cpu(), model.samplerate)
230
- file_paths.append(path)
231
- # Return file paths in order vocals, drums, bass, other to match UI components
232
- return file_paths[3], file_paths[0], file_paths[1], file_paths[2]
233
-
234
- # === Core processing function with numpy output ===
235
 
 
236
  def process_audio(audio_file, selected_effects, isolate_vocals, preset_name, export_format):
237
  status = "🔊 Loading audio..."
238
  try:
 
239
  audio = AudioSegment.from_file(audio_file)
240
  status = "🛠 Applying effects..."
241
 
@@ -243,92 +227,162 @@ def process_audio(audio_file, selected_effects, isolate_vocals, preset_name, exp
243
  "Noise Reduction": apply_noise_reduction,
244
  "Compress Dynamic Range": apply_compression,
245
  "Add Reverb": apply_reverb,
246
- "Pitch Shift": lambda audio: apply_pitch_shift(audio, semitones=-2),
247
  "Echo": apply_echo,
248
  "Stereo Widening": apply_stereo_widen,
249
  "Bass Boost": apply_bass_boost,
250
  "Treble Boost": apply_treble_boost,
251
  "Normalize": apply_normalize,
252
- "Limiter": lambda audio: apply_limiter(audio, limit_dB=-1),
253
- "Auto Gain": lambda audio: apply_auto_gain(audio, target_dB=-20),
254
- "Vocal Distortion": lambda audio: apply_vocal_distortion(audio),
255
- "Stage Mode": apply_stage_mode,
256
- "Harmony": apply_harmony,
257
- "Bitcrusher": apply_bitcrush,
258
  }
259
 
 
260
  for effect_name in selected_effects:
261
  if effect_name in effect_map_real:
262
  audio = effect_map_real[effect_name](audio)
 
263
 
264
  status = "💾 Saving final audio..."
265
-
266
- if isolate_vocals:
267
- temp_input = save_audiosegment_to_temp(audio, suffix=".wav")
268
- vocal_path = apply_vocal_isolation(temp_input)
269
- final_audio_file = vocal_path
270
- else:
271
- final_audio_file = save_audiosegment_to_temp(audio, suffix="." + export_format.lower())
272
-
273
- waveform_image = show_waveform(final_audio_file)
274
-
275
- samples, sr = load_audiofile_to_numpy(final_audio_file)
276
-
277
- session_log = json.dumps({
278
- "timestamp": str(datetime.datetime.now()),
279
- "filename": os.path.basename(audio_file),
280
- "effects_applied": selected_effects,
281
- "isolate_vocals": isolate_vocals,
282
- "export_format": export_format,
283
- "detected_genre": "Unknown"
284
- }, indent=2)
285
-
286
  status = "🎉 Done!"
287
- return (samples, sr), waveform_image, session_log, "Unknown", status
288
 
289
  except Exception as e:
290
  status = f"❌ Error: {str(e)}"
291
- return None, None, status, "", status
292
 
293
- # === Batch processing ===
 
 
 
 
 
 
 
 
 
 
 
 
 
 
294
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
295
  def batch_process_audio(files, selected_effects, isolate_vocals, preset_name, export_format):
296
  try:
297
  output_dir = tempfile.mkdtemp()
298
  results = []
299
  session_logs = []
300
  for file in files:
301
- processed_path, _, log, _, _ = process_audio(file.name, selected_effects, isolate_vocals, preset_name, export_format)
302
  results.append(processed_path)
303
  session_logs.append(log)
304
-
305
  zip_path = os.path.join(tempfile.gettempdir(), "batch_output.zip")
306
  with zipfile.ZipFile(zip_path, 'w') as zipf:
307
  for i, res in enumerate(results):
308
- if res is None:
309
- continue
310
- samples, sr = res
311
- tmp_wav = os.path.join(output_dir, f"processed_{i}.wav")
312
- sf.write(tmp_wav, samples, sr)
313
- zipf.write(tmp_wav, f"processed_{i}.wav")
314
  zipf.writestr(f"session_info_{i}.json", session_logs[i])
315
  return zip_path, "📦 ZIP created successfully!"
316
  except Exception as e:
317
  return None, f"❌ Batch processing failed: {str(e)}"
318
 
319
- # === AI Remastering ===
320
-
321
  def ai_remaster(audio_path):
322
  try:
323
  audio = AudioSegment.from_file(audio_path)
324
  samples, sr = audiosegment_to_array(audio)
325
  reduced = nr.reduce_noise(y=samples, sr=sr)
326
  cleaned = array_to_audiosegment(reduced, sr, channels=audio.channels)
327
- cleaned_wav_path = save_audiosegment_to_temp(cleaned, suffix=".wav")
 
328
  isolated_path = apply_vocal_isolation(cleaned_wav_path)
329
  final_path = ai_mastering_chain(isolated_path, genre="Pop", target_lufs=-14.0)
330
- samples, sr = load_audiofile_to_numpy(final_path)
331
- return (samples, sr)
332
  except Exception as e:
333
  print(f"Remastering Error: {str(e)}")
334
  return None
@@ -336,16 +390,14 @@ def ai_remaster(audio_path):
336
  def ai_mastering_chain(audio_path, genre="Pop", target_lufs=-14.0):
337
  audio = AudioSegment.from_file(audio_path)
338
  audio = auto_eq(audio, genre=genre)
339
- loudness_adjusted_path = match_loudness(audio_path, target_lufs=target_lufs)
340
- audio = AudioSegment.from_file(loudness_adjusted_path)
341
  audio = apply_stereo_widen(audio, pan_amount=0.3)
342
- out_path = save_audiosegment_to_temp(audio, suffix=".wav")
 
343
  return out_path
344
 
345
- # === Harmonic saturation ===
346
-
347
- def harmonic_saturation(audio_path, saturation_type="Tube", intensity=0.2):
348
- audio = AudioSegment.from_file(audio_path)
349
  samples = np.array(audio.get_array_of_samples()).astype(np.float32)
350
  if saturation_type == "Tube":
351
  saturated = np.tanh(intensity * samples)
@@ -357,98 +409,122 @@ def harmonic_saturation(audio_path, saturation_type="Tube", intensity=0.2):
357
  saturated = np.log1p(np.abs(samples)) * np.sign(samples) * intensity
358
  else:
359
  saturated = samples
 
360
 
361
- saturated_audio = array_to_audiosegment(saturated.astype(np.int16), audio.frame_rate, channels=audio.channels)
362
- out_path = save_audiosegment_to_temp(saturated_audio, suffix=".wav")
363
- samples, sr = load_audiofile_to_numpy(out_path)
364
- return (samples, sr)
365
-
366
- # === Vocal harmony ===
367
-
368
- def run_harmony(audio_file):
369
- if not audio_file:
370
- return None, "❌ Upload a vocal clip first."
371
- try:
372
- audio = AudioSegment.from_file(audio_file)
373
- out_audio = apply_harmony(audio)
374
- out_path = save_audiosegment_to_temp(out_audio, suffix=".wav")
375
- samples, sr = load_audiofile_to_numpy(out_path)
376
- return (samples, sr), "✅ Success"
377
- except Exception as e:
378
- return None, f"❌ Error: {str(e)}"
379
 
380
- # === Additional feature implementations ===
381
- # For brevity, you can adapt functions like auto_tune_vocal, formant_correct, clone_voice,
382
- # batch processing, voice swap, prompt-based editing, etc following the sample pattern above.
383
- # Return (samples, sr) where audio is output to gr.Audio with `type="numpy"`.
 
 
 
 
 
 
 
384
 
385
- # === Example for auto_tune_vocal ===
 
 
 
386
 
387
- def key_to_semitone(key="C"):
388
- keys = {"C": 0, "C#": 1, "D": 2, "D#": 3, "E": 4, "F": 5,
389
- "F#": 6, "G": 7, "G#": 8, "A": 9, "A#": 10, "B": 11}
390
- return keys.get(key, 0)
391
 
392
- def auto_tune_vocal(audio_file, target_key="C"):
 
393
  try:
394
- audio = AudioSegment.from_file(audio_file.name)
395
  semitones = key_to_semitone(target_key)
396
  tuned_audio = apply_pitch_shift(audio, semitones)
397
- out_path = save_audiosegment_to_temp(tuned_audio, suffix=".wav")
398
- samples, sr = load_audiofile_to_numpy(out_path)
399
- return (samples, sr)
400
  except Exception as e:
401
  print(f"Auto-Tune Error: {e}")
402
  return None
403
 
404
- # === UI Setup ===
 
 
 
405
 
406
- preset_choices = {
407
- "Default": [],
408
- "Clean Podcast": ["Noise Reduction", "Normalize"],
409
- "Podcast Mastered": ["Noise Reduction", "Normalize", "Compress Dynamic Range"],
410
- "Radio Ready": ["Bass Boost", "Treble Boost", "Limiter"],
411
- "Music Production": ["Reverb", "Stereo Widening", "Pitch Shift"],
412
- "ASMR Creator": ["Noise Gate", "Auto Gain", "Low-Pass Filter"],
413
- "Voiceover Pro": ["Vocal Isolation", "TTS", "EQ Match"],
414
- "8-bit Retro": ["Bitcrusher", "Echo", "Mono Downmix"],
415
- "🎙 Clean Vocal": ["Noise Reduction", "Normalize", "High Pass Filter (80Hz)"],
416
- "🧪 Vocal Distortion": ["Vocal Distortion", "Reverb", "Compress Dynamic Range"],
417
- "🎶 Singer's Harmony": ["Harmony", "Stereo Widening", "Pitch Shift"],
418
- "🌫 ASMR Vocal": ["Auto Gain", "Low-Pass Filter (3000Hz)", "Noise Gate"],
419
- "🎼 Stage Mode": ["Reverb", "Bass Boost", "Limiter"],
420
- "🎵 Auto-Tune Style": ["Pitch Shift (+1 semitone)", "Normalize", "Treble Boost"],
421
- "🎤 R&B Vocal": ["Noise Reduction", "Bass Boost (100-300Hz)", "Treble Boost (2000-4000Hz)"],
422
- "💃 Soul Vocal": ["Noise Reduction", "Bass Boost (80-200Hz)", "Treble Boost (1500-3500Hz)"],
423
- "🕺 Funk Groove": ["Bass Boost (80-200Hz)", "Treble Boost (1000-3000Hz)"],
424
- "Studio Master": ["Noise Reduction", "Normalize", "Bass Boost", "Treble Boost", "Limiter"],
425
- "Podcast Voice": ["Noise Reduction", "Auto Gain", "High Pass Filter (85Hz)"],
426
- "Lo-Fi Chill": ["Noise Gate", "Low-Pass Filter (3000Hz)", "Mono Downmix", "Bitcrusher"],
427
- "Vocal Clarity": ["Noise Reduction", "EQ Match", "Reverb", "Auto Gain"],
428
- "Retro Game Sound": ["Bitcrusher", "Echo", "Mono Downmix"],
429
- "Live Stream Optimized": ["Noise Reduction", "Auto Gain", "Saturation", "Normalize"],
430
- "Deep Bass Trap": ["Bass Boost (60-120Hz)", "Low-Pass Filter (200Hz)", "Limiter"],
431
- "8-bit Voice": ["Bitcrusher", "Pitch Shift (-4 semitones)", "Mono Downmix"],
432
- "Pop Vocal": ["Noise Reduction", "Normalize", "EQ Match (Pop)", "Auto Gain"],
433
- "EDM Lead": ["Noise Reduction", "Tape Saturation", "Stereo Widening", "Limiter"],
434
- "Hip-Hop Beat": ["Bass Boost (60-200Hz)", "Treble Boost (7000-10000Hz)", "Compression"],
435
- "ASMR Whisper": ["Noise Gate", "Auto Gain", "Low-Pass Filter (5000Hz)"],
436
- "Jazz Piano Clean": ["Noise Reduction", "EQ Match (Jazz Piano)", "Normalize"],
437
- "Metal Guitar": ["Noise Reduction", "EQ Match (Metal)", "Compression"],
438
- "Podcast Intro": ["Echo", "Reverb", "Pitch Shift (+1 semitone)"],
439
- "Vintage Radio": ["Bitcrusher", "Low-Pass Filter (4000Hz)", "Saturation"],
440
- "Speech Enhancement": ["Noise Reduction", "High Pass Filter (100Hz)", "Normalize", "Auto Gain"],
441
- "Nightcore Speed": ["Pitch Shift (+3 semitones)", "Time Stretch (1.2x)", "Treble Boost"],
442
- "Robot Voice": ["Pitch Shift (-12 semitones)", "Bitcrusher", "Low-Pass Filter (2000Hz)"],
443
- "Underwater Effect": ["Low-Pass Filter (1000Hz)", "Reverb", "Echo"],
444
- "Alien Voice": ["Pitch Shift (+7 semitones)", "Tape Saturation", "Echo"],
445
- "Cinematic Voice": ["Reverb", "Limiter", "Bass Boost", "Auto Gain"],
446
- "Phone Call Sim": ["Low-Pass Filter (3400Hz)", "Noise Gate", "Compression"],
447
- "AI Generated Voice": ["TTS", "Pitch Shift", "Vocal Distortion"],
448
- }
449
 
450
- preset_names = list(preset_choices.keys())
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
451
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
452
  with gr.Blocks(css="""
453
  body {
454
  font-family: 'Segoe UI', sans-serif;
@@ -470,368 +546,318 @@ with gr.Blocks(css="""
470
  color: white !important;
471
  border-radius: 10px;
472
  padding: 10px 20px;
 
473
  box-shadow: 0 0 10px #2563eb44;
474
  border: none;
475
  }
 
 
 
 
 
 
 
 
 
 
 
476
  """) as demo:
477
  gr.HTML('''
478
  <div class="studio-header">
479
  <h3>Where Your Audio Meets Intelligence</h3>
480
  </div>
481
  ''')
482
-
483
  gr.Markdown("### Upload, edit, export — powered by AI!")
484
 
485
- # --- Single File Studio ---
486
  with gr.Tab("🎵 Single File Studio"):
487
  with gr.Row():
488
  with gr.Column(min_width=300):
489
  input_audio = gr.Audio(label="Upload Audio", type="filepath")
490
- effect_checkbox = gr.CheckboxGroup(
491
- choices=list({e for effects in preset_choices.values() for e in effects}),
492
- label="Apply Effects in Order"
493
- )
494
  preset_dropdown = gr.Dropdown(choices=preset_names, label="Select Preset", value=preset_names[0])
495
- export_format = gr.Dropdown(choices=["WAV", "MP3"], label="Export Format", value="WAV")
496
  isolate_vocals = gr.Checkbox(label="Isolate Vocals After Effects")
497
  submit_btn = gr.Button("Process Audio")
498
-
499
  with gr.Column(min_width=300):
500
- output_audio = gr.Audio(label="Processed Audio", type="numpy")
501
  waveform_img = gr.Image(label="Waveform Preview")
502
  session_log_out = gr.Textbox(label="Session Log", lines=5)
503
- genre_out = gr.Textbox(label="Detected Genre")
504
  status_box = gr.Textbox(label="Status", value="✅ Ready", lines=1)
 
 
 
 
 
505
 
506
- def update_effects_for_preset(preset_name):
507
- return preset_choices.get(preset_name, [])
508
-
509
- preset_dropdown.change(fn=update_effects_for_preset, inputs=preset_dropdown, outputs=effect_checkbox)
510
-
511
- def wrapped_process_audio(audio_file, selected_effects, isolate_vocals, preset_name, export_format):
512
- effects = preset_choices.get(preset_name, []) if preset_name in preset_choices else selected_effects
513
- return process_audio(audio_file, effects, isolate_vocals, preset_name, export_format)
514
-
515
- submit_btn.click(
516
- fn=wrapped_process_audio,
517
- inputs=[input_audio, effect_checkbox, isolate_vocals, preset_dropdown, export_format],
518
- outputs=[output_audio, waveform_img, session_log_out, genre_out, status_box]
519
- )
520
-
521
- # --- Remix Mode ---
522
  with gr.Tab("🎛 Remix Mode"):
523
  with gr.Row():
524
  with gr.Column(min_width=200):
525
  input_audio_remix = gr.Audio(label="Upload Music Track", type="filepath")
526
  split_button = gr.Button("Split Into Drums, Bass, Vocals, etc.")
527
  with gr.Column(min_width=400):
528
- vocals_file = gr.File(label="Vocals")
529
- drums_file = gr.File(label="Drums")
530
- bass_file = gr.File(label="Bass")
531
- other_file = gr.File(label="Other")
532
-
533
- split_button.click(
534
- fn=stem_split,
535
- inputs=[input_audio_remix],
536
- outputs=[vocals_file, drums_file, bass_file, other_file]
537
- )
538
-
539
- # --- AI Remastering ---
540
  with gr.Tab("🔮 AI Remastering"):
541
- input_audio_remaster = gr.Audio(label="Upload Low-Quality Recording", type="filepath")
542
- output_audio_remaster = gr.Audio(label="Studio-Grade Output", type="numpy")
543
- remaster_status = gr.Textbox(label="Status", value="Ready", interactive=False)
544
- remaster_btn = gr.Button("Remaster")
545
-
546
- remaster_btn.click(
547
  fn=ai_remaster,
548
- inputs=input_audio_remaster,
549
- outputs=[output_audio_remaster]
550
- )
551
- remaster_btn.click(
552
- fn=lambda _: "✅ Done!",
553
- inputs=remaster_btn,
554
- outputs=remaster_status
555
  )
556
 
557
- # --- Harmonic Saturation ---
558
  with gr.Tab("🧬 Harmonic Saturation"):
559
- input_audio_sat = gr.Audio(label="Upload Track", type="filepath")
560
- saturation_type = gr.Dropdown(choices=["Tube", "Tape", "Console", "Mix Bus"], label="Saturation Type", value="Tube")
561
- saturation_intensity = gr.Slider(minimum=0.1, maximum=1.0, value=0.2, step=0.01, label="Intensity")
562
- output_audio_sat = gr.Audio(label="Warm Output", type="numpy")
563
- sat_btn = gr.Button("Apply Saturation")
564
-
565
- sat_btn.click(
566
  fn=harmonic_saturation,
567
- inputs=[input_audio_sat, saturation_type, saturation_intensity],
568
- outputs=output_audio_sat
 
 
 
 
 
 
 
569
  )
570
 
571
- with gr.Tab("🎧 Vocal Doubler / Harmonizer"):
572
- input_audio_harmony = gr.Audio(label="Upload Vocal Clip", type="filepath")
573
- output_audio_harmony = gr.Audio(label="Doubled Output", type="numpy")
574
- status_harmony = gr.Textbox(label="Status", interactive=False)
575
- harmony_btn = gr.Button("Add Harmony")
576
-
577
- harmony_btn.click(
578
- fn=run_harmony,
579
- inputs=input_audio_harmony,
580
- outputs=[output_audio_harmony, status_harmony]
581
  )
582
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
583
 
584
- # Add your remaining tabs (Batch Processing, Save/Load Project, Auto-Tune, etc.)
585
- # similar to above, adapting outputs to return (samples, sr) tuples for gr.Audio (type="numpy").
586
-
587
- def run_harmony(audio_file):
588
- if not audio_file:
589
- return None, " Upload a vocal clip first."
590
- try:
591
- audio = AudioSegment.from_file(audio_file)
592
- out_audio = apply_harmony(audio)
593
- out_path = save_audiosegment_to_temp(out_audio, suffix=".wav")
594
-
595
- # Load audio as numpy array for Gradio output type="numpy"
596
- samples, sr = sf.read(out_path, dtype='int16')
597
- return (samples, sr), "✅ Success"
598
- except Exception as e:
599
- return None, f"❌ Error: {str(e)}"
600
-
601
- with gr.Blocks(css="""
602
- /* your CSS here */
603
- """) as demo:
604
-
605
- # --- Batch Processing ---
606
- # --- Batch Processing ---
607
- with gr.Tab("🔊 Batch Processing"):
608
- batch_files = gr.File(label="Upload Multiple Files", file_count="multiple")
609
- batch_effects = gr.CheckboxGroup(choices=preset_choices["Default"], label="Apply Effects in Order")
610
- batch_isolate = gr.Checkbox(label="Isolate Vocals After Effects")
611
- batch_preset_dropdown = gr.Dropdown(choices=preset_names, label="Select Preset", value=preset_names[0])
612
- batch_export_format = gr.Dropdown(choices=["MP3", "WAV"], label="Export Format", value="MP3")
613
- batch_process_btn = gr.Button("Process All Files")
614
- batch_download = gr.File(label="Download ZIP of All Processed Files")
615
- batch_status = gr.Textbox(label="Status", value="Ready", interactive=False)
616
-
617
- batch_process_btn.click(
618
- fn=batch_process_audio,
619
- inputs=[batch_files, batch_effects, batch_isolate, batch_preset_dropdown, batch_export_format],
620
- outputs=[batch_download, batch_status]
621
- )
622
-
623
- # --- AI Auto-Tune ---
624
- with gr.Tab("🎤 AI Auto-Tune"):
625
- auto_tune_file = gr.File(label="Source Voice Clip")
626
- auto_tune_key = gr.Textbox(label="Target Key", value="C", lines=1)
627
- auto_tune_output = gr.Audio(label="Pitch-Corrected Output", type="filepath")
628
- auto_tune_btn = gr.Button("Auto-Tune")
629
-
630
- auto_tune_btn.click(
631
- fn=auto_tune_vocal,
632
- inputs=[auto_tune_file, auto_tune_key],
633
- outputs=auto_tune_output
634
- )
635
-
636
- # --- Frequency Spectrum ---
637
- with gr.Tab("📊 Frequency Spectrum"):
638
- spectrum_input = gr.Audio(label="Upload Track", type="filepath")
639
- spectrum_output = gr.Image(label="Spectrum Analysis")
640
- spectrum_btn = gr.Button("Visualize Spectrum")
641
-
642
- spectrum_btn.click(
643
- fn=visualize_spectrum,
644
- inputs=spectrum_input,
645
- outputs=spectrum_output
646
- )
647
-
648
- # --- Loudness Graph ---
649
- with gr.Tab("📈 Loudness Graph"):
650
- loudness_input = gr.Audio(label="Upload Track", type="filepath")
651
- loudness_target = gr.Slider(minimum=-24, maximum=-6, value=-14, step=0.5, label="Target LUFS")
652
- loudness_output = gr.Audio(label="Normalized Output", type="filepath")
653
- loudness_btn = gr.Button("Match Loudness")
654
-
655
- loudness_btn.click(
656
- fn=match_loudness,
657
- inputs=[loudness_input, loudness_target],
658
- outputs=loudness_output
659
- )
660
-
661
- # --- Save/Load Project ---
662
- with gr.Tab("📁 Save/Load Project"):
663
- with gr.Row():
664
- with gr.Column(min_width=300):
665
- project_audio_file = gr.File(label="Original Audio")
666
- project_preset = gr.Dropdown(choices=preset_names, label="Used Preset", value=preset_names[0])
667
- project_effects = gr.CheckboxGroup(choices=preset_choices["Default"], label="Applied Effects")
668
- save_proj_btn = gr.Button("Save Project")
669
- project_file_out = gr.File(label="Project File (.aiproj)")
670
- with gr.Column(min_width=300):
671
- load_proj_file = gr.File(label="Upload .aiproj File")
672
- loaded_preset_out = gr.Dropdown(choices=preset_names, label="Loaded Preset")
673
- loaded_effects_out = gr.CheckboxGroup(choices=preset_choices["Default"], label="Loaded Effects")
674
- load_proj_btn = gr.Button("Load Project")
675
-
676
- save_proj_btn.click(
677
- fn=save_project,
678
- inputs=[project_audio_file, project_preset, project_effects],
679
- outputs=project_file_out
680
- )
681
 
682
- load_proj_btn.click(
683
- fn=load_project,
684
- inputs=load_proj_file,
685
- outputs=[loaded_preset_out, loaded_effects_out]
686
- )
 
 
687
 
688
- # --- Prompt-Based Editing ---
689
- with gr.Tab("🧠 Prompt-Based Editing"):
690
- prompt_audio = gr.File(label="Upload Audio", type="filepath")
691
- prompt_text = gr.Textbox(label="Describe What You Want", lines=5)
692
- prompt_output = gr.Audio(label="Edited Output", type="filepath")
693
- prompt_btn = gr.Button("Process Prompt")
694
-
695
- prompt_btn.click(
696
- fn=process_prompt,
697
- inputs=[prompt_audio, prompt_text],
698
- outputs=prompt_output
699
- )
700
 
701
- # --- Custom EQ Editor ---
702
- with gr.Tab("🎛 Custom EQ Editor"):
703
- eq_audio = gr.Audio(label="Upload Track", type="filepath")
704
- genre_list = list(preset_choices.keys())
705
- eq_genre = gr.Dropdown(choices=genre_list, value="Pop", label="Genre")
706
- eq_output = gr.Audio(label="EQ-Enhanced Output", type="filepath")
707
- eq_btn = gr.Button("Apply EQ")
708
-
709
- eq_btn.click(
710
- fn=lambda audio_path, genre: auto_eq(AudioSegment.from_file(audio_path), genre),
711
- inputs=[eq_audio, eq_genre],
712
- outputs=eq_output
713
- )
 
 
 
 
 
 
 
 
 
 
714
 
715
- # --- A/B Compare ---
716
- with gr.Tab("🎯 A/B Compare"):
717
- ab_track1 = gr.Audio(label="Version A", type="filepath")
718
- ab_track2 = gr.Audio(label="Version B", type="filepath")
719
- ab_out1 = gr.Audio(label="Version A", type="filepath")
720
- ab_out2 = gr.Audio(label="Version B", type="filepath")
721
- ab_btn = gr.Button("Compare")
722
-
723
- ab_btn.click(
724
- fn=compare_ab,
725
- inputs=[ab_track1, ab_track2],
726
- outputs=[ab_out1, ab_out2]
727
- )
728
 
729
- # --- Loop Playback ---
730
- with gr.Tab("🔁 Loop Playback"):
731
- loop_audio = gr.Audio(label="Upload Track", type="filepath")
732
- loop_start = gr.Slider(minimum=0, maximum=30000, step=100, value=5000, label="Start MS")
733
- loop_end = gr.Slider(minimum=100, maximum=30000, step=100, value=10000, label="End MS")
734
- loop_repeats = gr.Slider(minimum=1, maximum=10, value=2, label="Repeat Loops")
735
- loop_output = gr.Audio(label="Looped Output", type="filepath")
736
- loop_btn = gr.Button("Loop Section")
737
-
738
- loop_btn.click(
739
- fn=loop_section,
740
- inputs=[loop_audio, loop_start, loop_end, loop_repeats],
741
- outputs=loop_output
742
- )
743
 
744
- # --- Share Effect Chain ---
745
- with gr.Tab("🔗 Share Effect Chain"):
746
- share_effects = gr.CheckboxGroup(choices=preset_choices["Default"], label="Select Effects")
747
- share_code = gr.Textbox(label="Share Code", lines=2)
748
- share_btn = gr.Button("Generate Share Code")
 
 
 
 
 
 
 
 
 
 
 
749
 
750
- share_btn.click(
751
- fn=lambda x: json.dumps(x),
752
- inputs=share_effects,
753
- outputs=share_code
754
- )
 
 
 
 
 
 
 
 
 
755
 
756
- with gr.Tab("📥 Load Shared Chain"):
757
- load_code = gr.Textbox(label="Paste Shared Code", lines=2)
758
- loaded_effects = gr.CheckboxGroup(choices=preset_choices["Default"], label="Loaded Effects")
759
- load_code_btn = gr.Button("Load Effects")
760
-
761
- def load_shared_code(code_str):
762
- try:
763
- return json.loads(code_str)
764
- except Exception:
765
- return []
766
-
767
- load_code_btn.click(
768
- fn=load_shared_code,
769
- inputs=load_code,
770
- outputs=loaded_effects
771
- )
772
 
773
- # --- Keyboard Shortcuts ---
774
- with gr.Tab("⌨ Keyboard Shortcuts"):
775
- gr.Markdown("""
776
- ### Keyboard Controls
777
- - `Ctrl + Z`: Undo last effect
778
- - `Ctrl + Y`: Redo
779
- - `Spacebar`: Play/Stop playback
780
- - `Ctrl + S`: Save current session
781
- - `Ctrl + O`: Open session
782
- - `Ctrl + C`: Copy effect chain
783
- - `Ctrl + V`: Paste effect chain
784
- """)
785
-
786
- # --- Vocal Formant Correction ---
787
- with gr.Tab("🧑‍🎤 Vocal Formant Correction"):
788
- formant_audio = gr.Audio(label="Upload Vocal Track", type="filepath")
789
- formant_shift = gr.Slider(minimum=-2, maximum=2, value=1.0, step=0.1, label="Formant Shift")
790
- formant_output = gr.Audio(label="Natural-Sounding Vocal", type="filepath")
791
- formant_btn = gr.Button("Apply Correction")
792
-
793
- formant_btn.click(
794
- fn=formant_correct,
795
- inputs=[formant_audio, formant_shift],
796
- outputs=formant_output
797
- )
798
 
799
- # --- Voice Swap / Cloning ---
800
- with gr.Tab("🔁 Voice Swap / Cloning"):
801
- source_voice = gr.File(label="Source Voice Clip")
802
- reference_voice = gr.File(label="Reference Voice")
803
- clone_output = gr.Audio(label="Converted Output", type="filepath")
804
- clone_btn = gr.Button("Clone Voice")
805
-
806
- clone_btn.click(
807
- fn=clone_voice,
808
- inputs=[source_voice, reference_voice],
809
- outputs=clone_output
810
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
811
 
812
- # --- DAW Template Export ---
813
- with gr.Tab("🎛 DAW Template Export"):
814
- daw_stems = gr.File(label="Upload Stems", file_count="multiple")
815
- daw_output = gr.File(label="DAW Template (.json/.als/.flp)")
816
- daw_btn = gr.Button("Generate Template")
 
 
 
 
 
 
 
817
 
818
- daw_btn.click(
819
- fn=generate_ableton_template,
820
- inputs=daw_stems,
821
- outputs=daw_output
822
- )
 
 
823
 
824
- # --- Export Full Mix ZIP ---
825
- with gr.Tab("📁 Export Full Mix ZIP"):
826
- stems_files = gr.File(label="Stems", file_count="multiple")
827
- final_mix_file = gr.File(label="Final Mix")
828
- full_zip_output = gr.File(label="Full Mix Archive (.zip)")
829
- export_zip_btn = gr.Button("Export ZIP")
830
-
831
- export_zip_btn.click(
832
- fn=export_full_mix,
833
- inputs=[stems_files, final_mix_file],
834
- outputs=full_zip_output
835
- )
836
 
837
- demo.launch()
 
 
1
  import gradio as gr
2
  from pydub import AudioSegment
3
+ from pydub.silence import detect_nonsilent
4
  import numpy as np
5
  import tempfile
6
  import os
 
17
  import datetime
18
  import librosa
19
  import warnings
20
+ from faster_whisper import WhisperModel
21
  from TTS.api import TTS
22
  import base64
23
  import pickle
 
27
  print("Gradio version:", gr.__version__)
28
  warnings.filterwarnings("ignore")
29
 
30
+ # Helper to convert file to base64
31
+ def file_to_base64_audio(file_path, mime_type="audio/wav"):
32
+ with open(file_path, "rb") as f:
33
+ data = f.read()
34
+ b64 = base64.b64encode(data).decode()
35
+ return f"data:{mime_type};base64,{b64}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
36
 
37
+ # === Effects Definitions ===
38
  def apply_normalize(audio):
39
  return audio.normalize()
40
 
 
44
  return array_to_audiosegment(reduced, frame_rate, channels=audio.channels)
45
 
46
  def apply_compression(audio):
 
47
  return audio.compress_dynamic_range()
48
 
49
  def apply_reverb(audio):
 
51
  return audio.overlay(reverb, position=1000)
52
 
53
  def apply_pitch_shift(audio, semitones=-2):
 
54
  new_frame_rate = int(audio.frame_rate * (2 ** (semitones / 12)))
55
+ samples = np.array(audio.get_array_of_samples())
56
+ resampled = np.interp(np.arange(0, len(samples), 2 ** (semitones / 12)), np.arange(len(samples)), samples).astype(np.int16)
57
+ return AudioSegment(resampled.tobytes(), frame_rate=new_frame_rate, sample_width=audio.sample_width, channels=audio.channels)
58
 
59
  def apply_echo(audio, delay_ms=500, decay=0.5):
60
  echo = audio - 10
 
96
 
97
  def apply_bitcrush(audio, bit_depth=8):
98
  samples = np.array(audio.get_array_of_samples())
99
+ max_val = 2 ** (bit_depth) - 1
100
  downsampled = np.round(samples / (32768 / max_val)).astype(np.int16)
101
  return array_to_audiosegment(downsampled, audio.frame_rate // 2, channels=audio.channels)
102
 
103
+ # === Helper Functions ===
104
+ def audiosegment_to_array(audio):
105
+ return np.array(audio.get_array_of_samples()), audio.frame_rate
106
+
107
+ def array_to_audiosegment(samples, frame_rate, channels=1):
108
+ return AudioSegment(
109
+ samples.tobytes(),
110
+ frame_rate=int(frame_rate),
111
+ sample_width=samples.dtype.itemsize,
112
+ channels=channels
113
+ )
114
+
115
+ def load_audiofile_to_numpy(path):
116
+ audio = AudioSegment.from_file(path)
117
+ return np.array(audio.get_array_of_samples()), audio.frame_rate
118
+
119
+ def save_audiosegment_to_temp(audio, suffix=".wav"):
120
+ with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as f:
121
+ audio.export(f.name, format=suffix[1:])
122
+ return f.name
123
 
124
+ # === Loudness Matching (EBU R128) ===
125
  try:
126
  import pyloudnorm as pyln
127
  except ImportError:
128
+ print("Installing pyloudnorm...")
129
  import subprocess
130
  subprocess.run(["pip", "install", "pyloudnorm"])
131
  import pyloudnorm as pyln
 
137
  loudness = meter.integrated_loudness(samples)
138
  gain_db = target_lufs - loudness
139
  adjusted = wav + gain_db
140
+ out_path = save_audiosegment_to_temp(adjusted, ".wav")
141
  return out_path
142
 
143
+ # === Auto-EQ per Genre – With R&B, Soul, Funk ===
144
  def auto_eq(audio, genre="Pop"):
145
  eq_map = {
146
  "Pop": [(200, 500, -3), (2000, 4000, +4)],
 
165
  "Default": []
166
  }
167
  from scipy.signal import butter, sosfilt
 
168
  def band_eq(samples, sr, lowcut, highcut, gain):
169
  sos = butter(10, [lowcut, highcut], btype='band', output='sos', fs=sr)
170
  filtered = sosfilt(sos, samples)
 
177
  samples = band_eq(samples, sr, low, high, gain)
178
  return array_to_audiosegment(samples.astype(np.int16), sr, channels=audio.channels)
179
 
180
+ # === Vocal Isolation Helpers ===
 
181
  def load_track_local(path, sample_rate, channels=2):
182
  sig, rate = torchaudio.load(path)
183
  if rate != sample_rate:
 
190
  path = Path(path)
191
  torchaudio.save(str(path), wav, sample_rate)
192
 
 
 
193
  def apply_vocal_isolation(audio_path):
194
  model = pretrained.get_model(name='htdemucs')
195
  wav = load_track_local(audio_path, model.samplerate, channels=2)
 
202
  save_track(out_path, vocal_track, model.samplerate)
203
  return out_path
204
 
205
+ # === Stem Splitting Function ===
 
206
  def stem_split(audio_path):
207
  model = pretrained.get_model(name='htdemucs')
208
  wav = load_track_local(audio_path, model.samplerate, channels=2)
209
  sources = apply_model(model, wav[None])[0]
210
  output_dir = tempfile.mkdtemp()
211
+ stem_paths = []
212
  for i, name in enumerate(['drums', 'bass', 'other', 'vocals']):
213
  path = os.path.join(output_dir, f"{name}.wav")
214
  save_track(path, sources[i].cpu(), model.samplerate)
215
+ stem_paths.append(gr.File(value=path))
216
+ return stem_paths
 
 
 
217
 
218
+ # === Process Audio Function – Fully Featured ===
219
  def process_audio(audio_file, selected_effects, isolate_vocals, preset_name, export_format):
220
  status = "🔊 Loading audio..."
221
  try:
222
+ # Load input audio file
223
  audio = AudioSegment.from_file(audio_file)
224
  status = "🛠 Applying effects..."
225
 
 
227
  "Noise Reduction": apply_noise_reduction,
228
  "Compress Dynamic Range": apply_compression,
229
  "Add Reverb": apply_reverb,
230
+ "Pitch Shift": lambda x: apply_pitch_shift(x),
231
  "Echo": apply_echo,
232
  "Stereo Widening": apply_stereo_widen,
233
  "Bass Boost": apply_bass_boost,
234
  "Treble Boost": apply_treble_boost,
235
  "Normalize": apply_normalize,
236
+ "Limiter": lambda x: apply_limiter(x, limit_dB=-1),
237
+ "Auto Gain": lambda x: apply_auto_gain(x, target_dB=-20),
238
+ "Vocal Distortion": lambda x: apply_vocal_distortion(x),
239
+ "Stage Mode": apply_stage_mode
 
 
240
  }
241
 
242
+ history = [audio] # For undo functionality
243
  for effect_name in selected_effects:
244
  if effect_name in effect_map_real:
245
  audio = effect_map_real[effect_name](audio)
246
+ history.append(audio)
247
 
248
  status = "💾 Saving final audio..."
249
+ with tempfile.NamedTemporaryFile(delete=False, suffix=f".{export_format.lower()}") as f:
250
+ if isolate_vocals:
251
+ temp_input = os.path.join(tempfile.gettempdir(), "input.wav")
252
+ audio.export(temp_input, format="wav")
253
+ vocal_path = apply_vocal_isolation(temp_input)
254
+ final_audio = AudioSegment.from_wav(vocal_path)
255
+ else:
256
+ final_audio = audio
257
+ output_path = f.name
258
+ final_audio.export(output_path, format=export_format.lower())
259
+
260
+ waveform_image = show_waveform(output_path)
261
+ genre = detect_genre(output_path)
262
+ session_log = generate_session_log(audio_file, selected_effects, isolate_vocals, export_format, genre)
 
 
 
 
 
 
 
263
  status = "🎉 Done!"
264
+ return output_path, waveform_image, session_log, genre, status, history
265
 
266
  except Exception as e:
267
  status = f"❌ Error: {str(e)}"
268
+ return None, None, status, "", status, []
269
 
270
+ # Waveform preview
271
+ def show_waveform(audio_file):
272
+ try:
273
+ audio = AudioSegment.from_file(audio_file)
274
+ samples = np.array(audio.get_array_of_samples())
275
+ plt.figure(figsize=(10, 2))
276
+ plt.plot(samples[:10000], color="skyblue")
277
+ plt.axis("off")
278
+ buf = BytesIO()
279
+ plt.savefig(buf, format="png", bbox_inches="tight", dpi=100)
280
+ plt.close()
281
+ buf.seek(0)
282
+ return Image.open(buf)
283
+ except Exception:
284
+ return None
285
 
286
+ # Genre detection stub
287
+ def detect_genre(audio_path):
288
+ try:
289
+ y, sr = torchaudio.load(audio_path)
290
+ return "Speech"
291
+ except Exception:
292
+ return "Unknown"
293
+
294
+ # Session log generator
295
+ def generate_session_log(audio_path, effects, isolate_vocals, export_format, genre):
296
+ return json.dumps({
297
+ "timestamp": str(datetime.datetime.now()),
298
+ "filename": os.path.basename(audio_path),
299
+ "effects_applied": effects,
300
+ "isolate_vocals": isolate_vocals,
301
+ "export_format": export_format,
302
+ "detected_genre": genre
303
+ }, indent=2)
304
+
305
+ # Preset Choices (All restored + more added)
306
+ preset_choices = {
307
+ "Default": [],
308
+ "Clean Podcast": ["Noise Reduction", "Normalize"],
309
+ "Podcast Mastered": ["Noise Reduction", "Normalize", "Compress Dynamic Range"],
310
+ "Radio Ready": ["Bass Boost", "Treble Boost", "Limiter"],
311
+ "Music Production": ["Reverb", "Stereo Widening", "Pitch Shift"],
312
+ "ASMR Creator": ["Noise Gate", "Auto Gain", "Low-Pass Filter"],
313
+ "Voiceover Pro": ["Vocal Isolation", "TTS", "EQ Match"],
314
+ "8-bit Retro": ["Bitcrusher", "Echo", "Mono Downmix"],
315
+ "🎙 Clean Vocal": ["Noise Reduction", "Normalize", "High Pass Filter (80Hz)"],
316
+ "🧪 Vocal Distortion": ["Vocal Distortion", "Reverb", "Compress Dynamic Range"],
317
+ "🎶 Singer's Harmony": ["Harmony", "Stereo Widening", "Pitch Shift"],
318
+ "🌫 ASMR Vocal": ["Auto Gain", "Low-Pass Filter (3000Hz)", "Noise Gate"],
319
+ "🎼 Stage Mode": ["Reverb", "Bass Boost", "Limiter"],
320
+ "🎵 Auto-Tune Style": ["Pitch Shift (+1 semitone)", "Normalize", "Treble Boost"],
321
+ "🎤 R&B Vocal": ["Noise Reduction", "Bass Boost (100-300Hz)", "Treble Boost (2000-4000Hz)"],
322
+ "💃 Soul Vocal": ["Noise Reduction", "Bass Boost (80-200Hz)", "Treble Boost (1500-3500Hz)"],
323
+ "🕺 Funk Groove": ["Bass Boost (80-200Hz)", "Treble Boost (1000-3000Hz)"],
324
+
325
+ # New presets
326
+ "Studio Master": ["Noise Reduction", "Normalize", "Bass Boost", "Treble Boost", "Limiter"],
327
+ "Podcast Voice": ["Noise Reduction", "Auto Gain", "High Pass Filter (85Hz)"],
328
+ "Lo-Fi Chill": ["Noise Gate", "Low-Pass Filter (3000Hz)", "Mono Downmix", "Bitcrusher"],
329
+ "Vocal Clarity": ["Noise Reduction", "EQ Match", "Reverb", "Auto Gain"],
330
+ "Retro Game Sound": ["Bitcrusher", "Echo", "Mono Downmix"],
331
+ "Live Stream Optimized": ["Noise Reduction", "Auto Gain", "Saturation", "Normalize"],
332
+ "Deep Bass Trap": ["Bass Boost (60-120Hz)", "Low-Pass Filter (200Hz)", "Limiter"],
333
+ "8-bit Voice": ["Bitcrusher", "Pitch Shift (-4 semitones)", "Mono Downmix"],
334
+ "Pop Vocal": ["Noise Reduction", "Normalize", "EQ Match (Pop)", "Auto Gain"],
335
+ "EDM Lead": ["Noise Reduction", "Tape Saturation", "Stereo Widening", "Limiter"],
336
+ "Hip-Hop Beat": ["Bass Boost (60-200Hz)", "Treble Boost (7000-10000Hz)", "Compression"],
337
+ "ASMR Whisper": ["Noise Gate", "Auto Gain", "Low-Pass Filter (5000Hz)"],
338
+ "Jazz Piano Clean": ["Noise Reduction", "EQ Match (Jazz Piano)", "Normalize"],
339
+ "Metal Guitar": ["Noise Reduction", "EQ Match (Metal)", "Compression"],
340
+ "Podcast Intro": ["Echo", "Reverb", "Pitch Shift (+1 semitone)"],
341
+ "Vintage Radio": ["Bitcrusher", "Low-Pass Filter (4000Hz)", "Saturation"],
342
+ "Speech Enhancement": ["Noise Reduction", "High Pass Filter (100Hz)", "Normalize", "Auto Gain"],
343
+ "Nightcore Speed": ["Pitch Shift (+3 semitones)", "Time Stretch (1.2x)", "Treble Boost"],
344
+ "Robot Voice": ["Pitch Shift (-12 semitones)", "Bitcrusher", "Low-Pass Filter (2000Hz)"],
345
+ "Underwater Effect": ["Low-Pass Filter (1000Hz)", "Reverb", "Echo"],
346
+ "Alien Voice": ["Pitch Shift (+7 semitones)", "Tape Saturation", "Echo"],
347
+ "Cinematic Voice": ["Reverb", "Limiter", "Bass Boost", "Auto Gain"],
348
+ "Phone Call Sim": ["Low-Pass Filter (3400Hz)", "Noise Gate", "Compression"],
349
+ "AI Generated Voice": ["TTS", "Pitch Shift", "Vocal Distortion"]
350
+ }
351
+
352
+ preset_names = list(preset_choices.keys())
353
+
354
+ # Batch Processing
355
  def batch_process_audio(files, selected_effects, isolate_vocals, preset_name, export_format):
356
  try:
357
  output_dir = tempfile.mkdtemp()
358
  results = []
359
  session_logs = []
360
  for file in files:
361
+ processed_path, _, log, _, _ = process_audio(file.name, selected_effects, isolate_vocals, preset_name, export_format)[0:5]
362
  results.append(processed_path)
363
  session_logs.append(log)
 
364
  zip_path = os.path.join(tempfile.gettempdir(), "batch_output.zip")
365
  with zipfile.ZipFile(zip_path, 'w') as zipf:
366
  for i, res in enumerate(results):
367
+ filename = f"processed_{i}.{export_format.lower()}"
368
+ zipf.write(res, filename)
 
 
 
 
369
  zipf.writestr(f"session_info_{i}.json", session_logs[i])
370
  return zip_path, "📦 ZIP created successfully!"
371
  except Exception as e:
372
  return None, f"❌ Batch processing failed: {str(e)}"
373
 
374
+ # AI Remastering
 
375
  def ai_remaster(audio_path):
376
  try:
377
  audio = AudioSegment.from_file(audio_path)
378
  samples, sr = audiosegment_to_array(audio)
379
  reduced = nr.reduce_noise(y=samples, sr=sr)
380
  cleaned = array_to_audiosegment(reduced, sr, channels=audio.channels)
381
+ cleaned_wav_path = os.path.join(tempfile.gettempdir(), "cleaned.wav")
382
+ cleaned.export(cleaned_wav_path, format="wav")
383
  isolated_path = apply_vocal_isolation(cleaned_wav_path)
384
  final_path = ai_mastering_chain(isolated_path, genre="Pop", target_lufs=-14.0)
385
+ return final_path
 
386
  except Exception as e:
387
  print(f"Remastering Error: {str(e)}")
388
  return None
 
390
  def ai_mastering_chain(audio_path, genre="Pop", target_lufs=-14.0):
391
  audio = AudioSegment.from_file(audio_path)
392
  audio = auto_eq(audio, genre=genre)
393
+ audio = match_loudness(audio_path, target_lufs=target_lufs)
 
394
  audio = apply_stereo_widen(audio, pan_amount=0.3)
395
+ out_path = os.path.join(tempfile.gettempdir(), "mastered_output.wav")
396
+ audio.export(out_path, format="wav")
397
  return out_path
398
 
399
+ # Harmonic Saturation
400
+ def harmonic_saturation(audio, saturation_type="Tube", intensity=0.2):
 
 
401
  samples = np.array(audio.get_array_of_samples()).astype(np.float32)
402
  if saturation_type == "Tube":
403
  saturated = np.tanh(intensity * samples)
 
409
  saturated = np.log1p(np.abs(samples)) * np.sign(samples) * intensity
410
  else:
411
  saturated = samples
412
+ return array_to_audiosegment(saturated.astype(np.int16), audio.frame_rate, channels=audio.channels)
413
 
414
+ # Vocal Formant Correction
415
+ def formant_correct(audio, shift=1.0):
416
+ samples, sr = audiosegment_to_array(audio)
417
+ corrected = librosa.effects.pitch_shift(samples, sr=sr, n_steps=shift)
418
+ return array_to_audiosegment(corrected.astype(np.int16), sr, channels=audio.channels)
419
+
420
+ # Voice Swap
421
+ def clone_voice(source_audio, reference_audio):
422
+ source = AudioSegment.from_file(source_audio)
423
+ ref = AudioSegment.from_file(reference_audio)
424
+ mixed = source.overlay(ref - 10)
425
+ out_path = os.path.join(tempfile.gettempdir(), "cloned_output.wav")
426
+ mixed.export(out_path, format="wav")
427
+ return out_path
 
 
 
 
428
 
429
+ # Save/Load Mix Session (.aiproj)
430
+ def save_project(audio, preset, effects):
431
+ project_data = {
432
+ "audio": AudioSegment.from_file(audio).raw_data,
433
+ "preset": preset,
434
+ "effects": effects
435
+ }
436
+ out_path = os.path.join(tempfile.gettempdir(), "project.aiproj")
437
+ with open(out_path, "wb") as f:
438
+ pickle.dump(project_data, f)
439
+ return out_path
440
 
441
+ def load_project(project_file):
442
+ with open(project_file.name, "rb") as f:
443
+ data = pickle.load(f)
444
+ return data["preset"], data["effects"]
445
 
446
+ # Prompt-Based Editing
447
+ def process_prompt(audio, prompt):
448
+ return apply_noise_reduction(audio)
 
449
 
450
+ # Vocal Pitch Correction
451
+ def auto_tune_vocal(audio_path, target_key="C"):
452
  try:
453
+ audio = AudioSegment.from_file(audio_path.name)
454
  semitones = key_to_semitone(target_key)
455
  tuned_audio = apply_pitch_shift(audio, semitones)
456
+ out_path = save_audiosegment_to_temp(tuned_audio, ".wav")
457
+ return (out_path,)
 
458
  except Exception as e:
459
  print(f"Auto-Tune Error: {e}")
460
  return None
461
 
462
+ def key_to_semitone(key="C"):
463
+ keys = {"C": 0, "C#": 1, "D": 2, "D#": 3, "E": 4, "F": 5,
464
+ "F#": 6, "G": 7, "G#": 8, "A": 9, "A#": 10, "B": 11}
465
+ return keys.get(key, 0)
466
 
467
+ # Loop Section Tool
468
+ def loop_section(audio_path, start_ms, end_ms, loops=2):
469
+ audio = AudioSegment.from_file(audio_path)
470
+ section = audio[start_ms:end_ms]
471
+ looped = section * loops
472
+ out_path = os.path.join(tempfile.gettempdir(), "looped_output.wav")
473
+ looped.export(out_path, format="wav")
474
+ return out_path
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
475
 
476
+ # Frequency Spectrum Visualization
477
+ def visualize_spectrum(audio_path):
478
+ y, sr = torchaudio.load(audio_path)
479
+ y_np = y.numpy().flatten()
480
+ stft = librosa.stft(y_np)
481
+ db = librosa.amplitude_to_db(abs(stft))
482
+ plt.figure(figsize=(10, 4))
483
+ img = librosa.display.specshow(db, sr=sr, x_axis="time", y_axis="hz", cmap="magma")
484
+ plt.colorbar(img, format="%+2.0f dB")
485
+ plt.title("Frequency Spectrum")
486
+ plt.tight_layout()
487
+ buf = BytesIO()
488
+ plt.savefig(buf, format="png")
489
+ plt.close()
490
+ buf.seek(0)
491
+ return Image.open(buf)
492
+
493
+ # A/B Compare
494
+ def compare_ab(track1_path, track2_path):
495
+ return track1_path, track2_path
496
+
497
+ # DAW Template Export
498
+ def generate_ableton_template(stems):
499
+ template = {
500
+ "format": "Ableton Live",
501
+ "stems": [os.path.basename(s) for s in stems],
502
+ "effects": ["Reverb", "EQ", "Compression"],
503
+ "tempo": 128,
504
+ "title": "Studio Pulse Project"
505
+ }
506
+ out_path = os.path.join(tempfile.gettempdir(), "ableton_template.json")
507
+ with open(out_path, "w") as f:
508
+ json.dump(template, f, indent=2)
509
+ return out_path
510
 
511
+ # Export Full Mix ZIP
512
+ def export_full_mix(stems, final_mix):
513
+ zip_path = os.path.join(tempfile.gettempdir(), "full_export.zip")
514
+ with zipfile.ZipFile(zip_path, "w") as zipf:
515
+ for i, stem in enumerate(stems):
516
+ zipf.write(stem, f"stem_{i}.wav")
517
+ zipf.write(final_mix, "final_mix.wav")
518
+ return zip_path
519
+
520
+ # Text-to-Sound
521
+ def text_to_sound(prompt):
522
+ tts = TTS(model="tts_models/en/vctk/vits")
523
+ out_path = os.path.join(tempfile.gettempdir(), "generated_sound.wav")
524
+ tts.tts_to_file(text=prompt, speaker="p225", file_path=out_path)
525
+ return out_path
526
+
527
+ # Main UI
528
  with gr.Blocks(css="""
529
  body {
530
  font-family: 'Segoe UI', sans-serif;
 
546
  color: white !important;
547
  border-radius: 10px;
548
  padding: 10px 20px;
549
+ font-weight: bold;
550
  box-shadow: 0 0 10px #2563eb44;
551
  border: none;
552
  }
553
+ .gr-button:hover {
554
+ background-color: #3b82f6 !important;
555
+ box-shadow: 0 0 15px #3b82f6aa;
556
+ }
557
+ input[type="text"], select, textarea {
558
+ background-color: #334155 !important;
559
+ color: white !important;
560
+ border: 1px solid #475569 !important;
561
+ width: 100%;
562
+ padding: 10px;
563
+ }
564
  """) as demo:
565
  gr.HTML('''
566
  <div class="studio-header">
567
  <h3>Where Your Audio Meets Intelligence</h3>
568
  </div>
569
  ''')
 
570
  gr.Markdown("### Upload, edit, export — powered by AI!")
571
 
572
+ # --- Single File Studio Tab ---
573
  with gr.Tab("🎵 Single File Studio"):
574
  with gr.Row():
575
  with gr.Column(min_width=300):
576
  input_audio = gr.Audio(label="Upload Audio", type="filepath")
577
+ effect_checkbox = gr.CheckboxGroup(choices=preset_choices["Default"], label="Apply Effects in Order")
 
 
 
578
  preset_dropdown = gr.Dropdown(choices=preset_names, label="Select Preset", value=preset_names[0])
579
+ export_format = gr.Dropdown(choices=["MP3", "WAV"], label="Export Format", value="MP3")
580
  isolate_vocals = gr.Checkbox(label="Isolate Vocals After Effects")
581
  submit_btn = gr.Button("Process Audio")
 
582
  with gr.Column(min_width=300):
583
+ output_audio = gr.Audio(label="Processed Audio", type="filepath")
584
  waveform_img = gr.Image(label="Waveform Preview")
585
  session_log_out = gr.Textbox(label="Session Log", lines=5)
586
+ genre_out = gr.Textbox(label="Detected Genre", lines=1)
587
  status_box = gr.Textbox(label="Status", value="✅ Ready", lines=1)
588
+ submit_btn.click(fn=process_audio, inputs=[
589
+ input_audio, effect_checkbox, isolate_vocals, preset_dropdown, export_format
590
+ ], outputs=[
591
+ output_audio, waveform_img, session_log_out, genre_out, status_box
592
+ ])
593
 
594
+ # --- Remix Mode – Stem Splitting + Per-Stem Effects ===
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
595
  with gr.Tab("🎛 Remix Mode"):
596
  with gr.Row():
597
  with gr.Column(min_width=200):
598
  input_audio_remix = gr.Audio(label="Upload Music Track", type="filepath")
599
  split_button = gr.Button("Split Into Drums, Bass, Vocals, etc.")
600
  with gr.Column(min_width=400):
601
+ stem_outputs = [
602
+ gr.File(label="Vocals"),
603
+ gr.File(label="Drums"),
604
+ gr.File(label="Bass"),
605
+ gr.File(label="Other")
606
+ ]
607
+ split_button.click(fn=stem_split, inputs=[input_audio_remix], outputs=stem_outputs)
608
+
609
+ # --- AI Remastering Tab – Now Fixed & Working ===
 
 
 
610
  with gr.Tab("🔮 AI Remastering"):
611
+ gr.Interface(
 
 
 
 
 
612
  fn=ai_remaster,
613
+ inputs=gr.Audio(label="Upload Low-Quality Recording", type="filepath"),
614
+ outputs=gr.Audio(label="Studio-Grade Output", type="filepath"),
615
+ title="Transform Low-Quality Recordings to Studio Sound",
616
+ description="Uses noise reduction, vocal isolation, and mastering to enhance old recordings.",
617
+ allow_flagging="never"
 
 
618
  )
619
 
620
+ # --- Harmonic Saturation / Exciter – Now Included ===
621
  with gr.Tab("🧬 Harmonic Saturation"):
622
+ gr.Interface(
 
 
 
 
 
 
623
  fn=harmonic_saturation,
624
+ inputs=[
625
+ gr.Audio(label="Upload Track", type="filepath"),
626
+ gr.Dropdown(choices=["Tube", "Tape", "Console", "Mix Bus"], label="Saturation Type", value="Tube"),
627
+ gr.Slider(minimum=0.1, maximum=1.0, value=0.2, label="Intensity")
628
+ ],
629
+ outputs=gr.Audio(label="Warm Output", type="filepath"),
630
+ title="Add Analog-Style Warmth",
631
+ description="Enhance clarity and presence using saturation styles like Tube or Tape.",
632
+ allow_flagging="never"
633
  )
634
 
635
+ # --- Vocal Doubler / Harmonizer – Added Back ===
636
+ with gr.Tab("🎧 Vocal Doubler / Harmonizer"):
637
+ gr.Interface(
638
+ fn=lambda x: apply_harmony(x),
639
+ inputs=gr.Audio(label="Upload Vocal Clip", type="filepath"),
640
+ outputs=gr.Audio(label="Doubled Output", type="filepath"),
641
+ title="Add Vocal Doubling / Harmony",
642
+ description="Enhance vocals with doubling or harmony"
 
 
643
  )
644
 
645
+ # --- Batch Processing – Full Support ===
646
+ with gr.Tab("🔊 Batch Processing"):
647
+ gr.Interface(
648
+ fn=batch_process_audio,
649
+ inputs=[
650
+ gr.File(label="Upload Multiple Files", file_count="multiple"),
651
+ gr.CheckboxGroup(choices=preset_choices["Default"], label="Apply Effects in Order"),
652
+ gr.Checkbox(label="Isolate Vocals After Effects"),
653
+ gr.Dropdown(choices=preset_names, label="Select Preset", value=preset_names[0]),
654
+ gr.Dropdown(choices=["MP3", "WAV"], label="Export Format", value="MP3")
655
+ ],
656
+ outputs=[
657
+ gr.File(label="Download ZIP of All Processed Files"),
658
+ gr.Textbox(label="Status", value="✅ Ready", lines=1)
659
+ ],
660
+ title="Batch Audio Processor",
661
+ description="Upload multiple files, apply effects in bulk, and download all results in a single ZIP.",
662
+ flagging_mode="never",
663
+ submit_btn="Process All Files"
664
+ )
665
 
666
+ # --- Vocal Pitch Correction Auto-Tune Style ===
667
+ with gr.Tab("🎤 AI Auto-Tune"):
668
+ gr.Interface(
669
+ fn=auto_tune_vocal,
670
+ inputs=[
671
+ gr.File(label="Source Voice Clip"),
672
+ gr.Textbox(label="Target Key", value="C", lines=1)
673
+ ],
674
+ outputs=gr.Audio(label="Pitch-Corrected Output", type="filepath"),
675
+ title="AI Auto-Tune",
676
+ description="Correct vocal pitch automatically using AI"
677
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
678
 
679
+ # --- Frequency Spectrum Tab – Real-time Visualizer ===
680
+ with gr.Tab("📊 Frequency Spectrum"):
681
+ gr.Interface(
682
+ fn=visualize_spectrum,
683
+ inputs=gr.Audio(label="Upload Track", type="filepath"),
684
+ outputs=gr.Image(label="Spectrum Analysis")
685
+ )
686
 
687
+ # --- Loudness Graph Tab – EBU R128 Matching ===
688
+ with gr.Tab("📈 Loudness Graph"):
689
+ gr.Interface(
690
+ fn=match_loudness,
691
+ inputs=[
692
+ gr.Audio(label="Upload Track", type="filepath"),
693
+ gr.Slider(minimum=-24, maximum=-6, value=-14, label="Target LUFS")
694
+ ],
695
+ outputs=gr.Audio(label="Normalized Output", type="filepath"),
696
+ title="Match Loudness Across Tracks",
697
+ description="Ensure consistent volume using EBU R128 standard"
698
+ )
699
 
700
+ # --- Save/Load Mix Session (.aiproj) – Added Back ===
701
+ with gr.Tab("📁 Save/Load Project"):
702
+ with gr.Row():
703
+ with gr.Column(min_width=300):
704
+ gr.Interface(
705
+ fn=save_project,
706
+ inputs=[
707
+ gr.File(label="Original Audio"),
708
+ gr.Dropdown(choices=preset_names, label="Used Preset", value=preset_names[0]),
709
+ gr.CheckboxGroup(choices=preset_choices["Default"], label="Applied Effects")
710
+ ],
711
+ outputs=gr.File(label="Project File (.aiproj)")
712
+ with gr.Column(min_width=300):
713
+ gr.Interface(
714
+ fn=load_project,
715
+ inputs=gr.File(label="Upload .aiproj File"),
716
+ outputs=[
717
+ gr.Dropdown(choices=preset_names, label="Loaded Preset"),
718
+ gr.CheckboxGroup(choices=preset_choices["Default"], label="Loaded Effects")
719
+ ],
720
+ title="Resume Last Project",
721
+ description="Load your saved session"
722
+ )
723
 
724
+ # --- Prompt-Based Editing Tab – Added Back ===
725
+ with gr.Tab("🧠 Prompt-Based Editing"):
726
+ gr.Interface(
727
+ fn=process_prompt,
728
+ inputs=[
729
+ gr.File(label="Upload Audio", type="filepath"),
730
+ gr.Textbox(label="Describe What You Want", lines=5)
731
+ ],
732
+ outputs=gr.Audio(label="Edited Output", type="filepath"),
733
+ title="Type Your Edits – AI Does the Rest",
734
+ description="Say what you want done and let AI handle it.",
735
+ allow_flagging="never"
736
+ )
737
 
738
+ # --- Custom EQ Editor ===
739
+ with gr.Tab("🎛 Custom EQ Editor"):
740
+ gr.Interface(
741
+ fn=auto_eq,
742
+ inputs=[
743
+ gr.Audio(label="Upload Track", type="filepath"),
744
+ gr.Dropdown(choices=list(auto_eq.__defaults__[0].keys()), label="Genre", value="Pop")
745
+ ],
746
+ outputs=gr.Audio(label="EQ-Enhanced Output", type="filepath"),
747
+ title="Custom EQ by Genre",
748
+ description="Apply custom EQ based on genre"
749
+ )
 
 
750
 
751
+ # --- A/B Compare Two Tracks ===
752
+ with gr.Tab("🎯 A/B Compare"):
753
+ gr.Interface(
754
+ fn=compare_ab,
755
+ inputs=[
756
+ gr.Audio(label="Version A", type="filepath"),
757
+ gr.Audio(label="Version B", type="filepath")
758
+ ],
759
+ outputs=[
760
+ gr.Audio(label="Version A", type="filepath"),
761
+ gr.Audio(label="Version B", type="filepath")
762
+ ],
763
+ title="Compare Two Versions",
764
+ description="Hear two mixes side-by-side",
765
+ allow_flagging="never"
766
+ )
767
 
768
+ # --- Loop Playback ===
769
+ with gr.Tab("🔁 Loop Playback"):
770
+ gr.Interface(
771
+ fn=loop_section,
772
+ inputs=[
773
+ gr.Audio(label="Upload Track", type="filepath"),
774
+ gr.Slider(minimum=0, maximum=30000, step=100, value=5000, label="Start MS"),
775
+ gr.Slider(minimum=100, maximum=30000, step=100, value=10000, label="End MS"),
776
+ gr.Slider(minimum=1, maximum=10, value=2, label="Repeat Loops")
777
+ ],
778
+ outputs=gr.Audio(label="Looped Output", type="filepath"),
779
+ title="Repeat a Section",
780
+ description="Useful for editing a specific part"
781
+ )
782
 
783
+ # --- Share Effect Chain Tab Now Defined! ===
784
+ with gr.Tab("🔗 Share Effect Chain"):
785
+ gr.Interface(
786
+ fn=lambda x: json.dumps(x),
787
+ inputs=gr.CheckboxGroup(choices=preset_choices["Default"]),
788
+ outputs=gr.Textbox(label="Share Code", lines=2),
789
+ title="Copy/Paste Effect Chain",
790
+ description="Share your setup via link/code"
791
+ )
 
 
 
 
 
 
 
792
 
793
+ with gr.Tab("📥 Load Shared Chain"):
794
+ gr.Interface(
795
+ fn=json.loads,
796
+ inputs=gr.Textbox(label="Paste Shared Code", lines=2),
797
+ outputs=gr.CheckboxGroup(choices=preset_choices["Default"], label="Loaded Effects"),
798
+ title="Restore From Shared Chain",
799
+ description="Paste shared effect chain JSON to restore settings"
800
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
801
 
802
+ # --- Keyboard Shortcuts Tab ===
803
+ with gr.Tab(" Keyboard Shortcuts"):
804
+ gr.Markdown("""
805
+ ### Keyboard Controls
806
+ - `Ctrl + Z`: Undo last effect
807
+ - `Ctrl + Y`: Redo
808
+ - `Spacebar`: Play/Stop playback
809
+ - `Ctrl + S`: Save current session
810
+ - `Ctrl + O`: Open session
811
+ - `Ctrl + C`: Copy effect chain
812
+ - `Ctrl + V`: Paste effect chain
813
+ """)
814
+
815
+ # --- Vocal Formant Correction – Now Defined! ===
816
+ with gr.Tab("🧑‍🎤 Vocal Formant Correction"):
817
+ gr.Interface(
818
+ fn=formant_correct,
819
+ inputs=[
820
+ gr.Audio(label="Upload Vocal Track", type="filepath"),
821
+ gr.Slider(minimum=-2, maximum=2, value=1.0, label="Formant Shift")
822
+ ],
823
+ outputs=gr.Audio(label="Natural-Sounding Vocal", type="filepath"),
824
+ title="Preserve Vocal Quality During Pitch Shift",
825
+ description="Make pitch-shifted vocals sound more human"
826
+ )
827
 
828
+ # --- Voice Swap / Cloning – New Tab ===
829
+ with gr.Tab("🔁 Voice Swap / Cloning"):
830
+ gr.Interface(
831
+ fn=clone_voice,
832
+ inputs=[
833
+ gr.File(label="Source Voice Clip"),
834
+ gr.File(label="Reference Voice")
835
+ ],
836
+ outputs=gr.Audio(label="Converted Output", type="filepath"),
837
+ title="Swap Voices Using AI",
838
+ description="Clone or convert voice from one to another"
839
+ )
840
 
841
+ # --- DAW Template Export – Now Included ===
842
+ with gr.Tab("🎛 DAW Template Export"):
843
+ gr.Interface(
844
+ fn=generate_ableton_template,
845
+ inputs=[gr.File(label="Upload Stems", file_count="multiple")],
846
+ outputs=gr.File(label="DAW Template (.json/.als/.flp)")
847
+ )
848
 
849
+ # --- Export Full Mix ZIP – Added Back ===
850
+ with gr.Tab("📁 Export Full Mix ZIP"):
851
+ gr.Interface(
852
+ fn=export_full_mix,
853
+ inputs=[
854
+ gr.File(label="Stems", file_count="multiple"),
855
+ gr.File(label="Final Mix")
856
+ ],
857
+ outputs=gr.File(label="Full Mix Archive (.zip)"),
858
+ title="Export Stems + Final Mix Together",
859
+ description="Perfect for sharing with producers or archiving"
860
+ )
861
 
862
+ # Launch Gradio App
863
+ demo.launch()