MiloMusic_YuEGP

Sleeping

App Files Files Community

futurespyhi commited on Sep 14

Commit

326af3b

1 Parent(s): 3dfa434

add --rescale to enable audio rescaling to proper volume

Browse files

Files changed (3) hide show

app.py +1 -0
debug_audio.py +48 -0
fix_audio_volume.py +72 -0

app.py CHANGED Viewed

@@ -367,6 +367,7 @@ def generate_music_spaces(lyrics: str, genre: str, mood: str, progress=gr.Progre
             "--max_new_tokens", "3000",     # Full token count
             "--profile", "1",               # Highest performance profile
             "--verbose", "3",
             "--prompt_start_time", "0",
             "--prompt_end_time", "30",      # Full 30-second clips
         ]

             "--max_new_tokens", "3000",     # Full token count
             "--profile", "1",               # Highest performance profile
             "--verbose", "3",
+            "--rescale",                    # Enable audio rescaling to proper volume
             "--prompt_start_time", "0",
             "--prompt_end_time", "30",      # Full 30-second clips
         ]

debug_audio.py ADDED Viewed

	@@ -0,0 +1,48 @@

+#!/usr/bin/env python3
+"""
+Debug script to analyze audio file properties
+"""
+import numpy as np
+import soundfile as sf
+import os
+import sys
+def analyze_audio_file(file_path):
+    """Analyze audio file and print detailed information"""
+    if not os.path.exists(file_path):
+        print(f"❌ File not found: {file_path}")
+        return False
+    try:
+        # Read audio file
+        audio_data, sample_rate = sf.read(file_path)
+        print(f"📁 File: {os.path.basename(file_path)}")
+        print(f"📊 Shape: {audio_data.shape}")
+        print(f"🎵 Sample rate: {sample_rate} Hz")
+        print(f"⏱️ Duration: {len(audio_data) / sample_rate:.2f} seconds")
+        print(f"🔊 Data type: {audio_data.dtype}")
+        print(f"📈 Min value: {audio_data.min():.8f}")
+        print(f"📈 Max value: {audio_data.max():.8f}")
+        print(f"📈 Max absolute: {np.abs(audio_data).max():.8f}")
+        print(f"🔇 Is silent: {np.abs(audio_data).max() < 1e-6}")
+        print(f"📊 Non-zero samples: {np.count_nonzero(audio_data)}/{len(audio_data)}")
+        # Check if audio needs amplification
+        max_amp = np.abs(audio_data).max()
+        if max_amp > 0:
+            amplification_needed = 0.5 / max_amp  # Target 0.5 max amplitude
+            print(f"🔊 Amplification needed: {amplification_needed:.1f}x ({20*np.log10(amplification_needed):.1f}dB)")
+        return True
+    except Exception as e:
+        print(f"❌ Error reading audio file: {e}")
+        return False
+if __name__ == "__main__":
+    if len(sys.argv) > 1:
+        file_path = sys.argv[1]
+        analyze_audio_file(file_path)
+    else:
+        print("Usage: python debug_audio.py <audio_file_path>")

fix_audio_volume.py ADDED Viewed

	@@ -0,0 +1,72 @@

+#!/usr/bin/env python3
+"""
+Fix audio volume by amplifying low-amplitude audio files
+"""
+import numpy as np
+import soundfile as sf
+import os
+import sys
+def fix_audio_volume(input_file, output_file=None, target_amplitude=0.5):
+    """
+    Amplify audio to target amplitude level
+    Args:
+        input_file: Path to input audio file
+        output_file: Path for output file (default: adds "_fixed" suffix)
+        target_amplitude: Target maximum amplitude (0.0 to 1.0)
+    """
+    if not os.path.exists(input_file):
+        print(f"❌ Input file not found: {input_file}")
+        return False
+    try:
+        # Read audio
+        audio_data, sample_rate = sf.read(input_file)
+        print(f"📁 Processing: {os.path.basename(input_file)}")
+        print(f"📊 Original max amplitude: {np.abs(audio_data).max():.8f}")
+        # Check if audio is actually silent
+        max_amplitude = np.abs(audio_data).max()
+        if max_amplitude < 1e-8:
+            print("❌ Audio appears to be completely silent")
+            return False
+        # Calculate amplification factor
+        amplification_factor = target_amplitude / max_amplitude
+        print(f"🔊 Amplification factor: {amplification_factor:.1f}x ({20*np.log10(amplification_factor):.1f}dB)")
+        # Apply amplification
+        amplified_audio = audio_data * amplification_factor
+        # Ensure we don't clip
+        amplified_audio = np.clip(amplified_audio, -1.0, 1.0)
+        # Determine output filename
+        if output_file is None:
+            base, ext = os.path.splitext(input_file)
+            output_file = f"{base}_fixed{ext}"
+        # Save amplified audio
+        sf.write(output_file, amplified_audio, sample_rate)
+        print(f"✅ Fixed audio saved: {output_file}")
+        print(f"📊 New max amplitude: {np.abs(amplified_audio).max():.6f}")
+        return True
+    except Exception as e:
+        print(f"❌ Error processing audio: {e}")
+        return False
+if __name__ == "__main__":
+    if len(sys.argv) < 2:
+        print("Usage: python fix_audio_volume.py <input_file> [output_file] [target_amplitude]")
+        sys.exit(1)
+    input_file = sys.argv[1]
+    output_file = sys.argv[2] if len(sys.argv) > 2 else None
+    target_amplitude = float(sys.argv[3]) if len(sys.argv) > 3 else 0.5
+    fix_audio_volume(input_file, output_file, target_amplitude)