namelessai's picture
Update app.py
b2e40b8 verified
import gradio as gr
from pydub import AudioSegment
from pydub.effects import compress_dynamic_range, normalize
import tempfile
import os
import traceback
def process_audio(audio_file_path, threshold, ratio, attack, release, headroom):
"""
Applies Dynamic Range Compression and then Normalizes the audio.
"""
if audio_file_path is None:
return None, "Please upload an audio file."
try:
# Load the audio file
# pydub auto-detects format (mp3, wav, flac, etc.)
audio = AudioSegment.from_file(audio_file_path)
# 1. Apply Dynamic Range Compression
# This is the core logic you described:
# It reduces the volume of parts *above* the threshold
# based on the ratio. This reduces the dynamic range.
gr.Info("Applying dynamic range compression...")
compressed_audio = compress_dynamic_range(
audio,
threshold=threshold, # The volume (dBFS) to start compressing
ratio=ratio, # The amount of compression (e.g., 4.0 = 4:1)
attack=attack, # How fast to compress (ms)
release=release # How fast to stop compressing (ms)
)
# 2. Normalize the result (Makeup Gain)
# After compression, the peaks are lower. We now boost the
# *entire* track so its new, lower peak is at the headroom level.
# This makes the "boosted" quiet parts audibly louder.
gr.Info("Applying makeup gain (normalization)...")
normalized_audio = normalize(compressed_audio, headroom=headroom)
# Save the processed audio to a temporary file
# We use WAV for high-quality, uncompressed output
fd, temp_path = tempfile.mkstemp(suffix=".wav")
os.close(fd)
normalized_audio.export(temp_path, format="wav")
gr.Info("Processing complete!")
return temp_path, "Processing complete!"
except Exception as e:
print(f"Error processing audio: {e}")
traceback.print_exc()
# Return a user-friendly error
error_message = f"Error: Could not process audio. The file might be corrupt or in an unsupported format. Details: {e}"
gr.Warning(error_message)
return None, error_message
# --- Gradio UI ---
title = "Music Volume Normalizer (Compressor)"
description = """
Upload your music track (MP3, WAV, FLAC, etc.) to normalize its volume.
This tool uses **Dynamic Range Compression** to achieve the effect you described: it **"boosts the volume of the quieter parts while preserving volume peaks"** (by taming them).
This makes the track sound fuller and more consistent without "squashing" it flat like a simple limiter.
### How to use the controls:
* **Threshold (dBFS):** The volume level to *start* compressing. Lower values (e.g., -30dB) will compress more of the track.
* **Ratio:** How *much* to compress. A 4:1 ratio means for every 4dB the audio goes *over* the threshold, the output will only go up by 1dB.
* **Attack (ms):** How *fast* to start compressing. Short attacks (1-10ms) are good for controlling sharp peaks (like drums).
* **Release (ms):** How *fast* to *stop* compressing. Short releases (50-150ms) sound "punchy"; longer releases sound "smoother".
* **Headroom (dB):** After compressing, the entire track is boosted so its loudest peak is this far below 0dB. `0.1dB` is standard for loud masters.
"""
with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue", secondary_hue="sky")) as demo:
gr.Markdown(f"<h1 style='text-align: center;'>{title}</h1>")
gr.Markdown(description)
with gr.Row(variant="panel"):
with gr.Column(scale=1, min_width=300):
gr.Markdown("### 1. Compression Controls")
threshold_slider = gr.Slider(
minimum=-60.0, maximum=0.0, value=-20.0, step=0.5,
label="Threshold (dBFS)",
info="Volume to start compressing"
)
ratio_slider = gr.Slider(
minimum=1.0, maximum=20.0, value=4.0, step=0.1,
label="Ratio (e.g., 4.0 = 4:1)",
info="How much to compress"
)
attack_slider = gr.Slider(
minimum=0.1, maximum=50.0, value=5.0, step=0.1,
label="Attack (ms)",
info="How fast to start compressing"
)
release_slider = gr.Slider(
minimum=20.0, maximum=500.0, value=100.0, step=10,
label="Release (ms)",
info="How fast to stop compressing"
)
headroom_slider = gr.Slider(
minimum=0.1, maximum=3.0, value=0.1, step=0.1,
label="Headroom (dB)",
info="Final peak level below 0dB (makeup gain)"
)
submit_btn = gr.Button("Normalize Music", variant="primary", scale=2)
with gr.Column(scale=2, min_width=400):
gr.Markdown("### 2. Upload Audio")
audio_input = gr.Audio(type="filepath", label="Input Music Track")
gr.Markdown("### 3. Get Normalized Audio")
status_output = gr.Textbox(
label="Status",
interactive=False,
placeholder="Upload a file and click 'Normalize Music'..."
)
audio_output = gr.Audio(label="Normalized Music", type="filepath")
with gr.Accordion("Parameter Presets (Click to apply, then click 'Normalize Music')", open=False):
gr.Examples(
examples=[
["Subtle Mastering", -18.0, 2.0, 10.0, 150.0, 0.2],
["Standard 'Punchy' Mix", -20.0, 4.0, 5.0, 100.0, 0.1],
["Heavy Compression", -30.0, 8.0, 1.0, 50.0, 0.1],
["Vocal Leveling (Good for Podcasts)", -22.0, 3.0, 3.0, 200.0, 0.5],
],
# We use a hidden Textbox to provide the "label" for the example
inputs=[gr.Textbox(visible=False), threshold_slider, ratio_slider, attack_slider, release_slider, headroom_slider],
label="Click a preset to apply settings"
)
# Connect the components
submit_btn.click(
fn=process_audio,
inputs=[
audio_input,
threshold_slider,
ratio_slider,
attack_slider,
release_slider,
headroom_slider
],
outputs=[audio_output, status_output]
)
if __name__ == "__main__":
# Add share=True to create a public link (if running in Colab, etc.)
# We add server_name="0.0.0.0" to make it accessible outside the Docker container
demo.launch(server_name="0.0.0.0", server_port=7860)