Spaces:
Sleeping
Sleeping
futurespyhi
commited on
Commit
Β·
326af3b
1
Parent(s):
3dfa434
add --rescale to enable audio rescaling to proper volume
Browse files- app.py +1 -0
- debug_audio.py +48 -0
- fix_audio_volume.py +72 -0
app.py
CHANGED
|
@@ -367,6 +367,7 @@ def generate_music_spaces(lyrics: str, genre: str, mood: str, progress=gr.Progre
|
|
| 367 |
"--max_new_tokens", "3000", # Full token count
|
| 368 |
"--profile", "1", # Highest performance profile
|
| 369 |
"--verbose", "3",
|
|
|
|
| 370 |
"--prompt_start_time", "0",
|
| 371 |
"--prompt_end_time", "30", # Full 30-second clips
|
| 372 |
]
|
|
|
|
| 367 |
"--max_new_tokens", "3000", # Full token count
|
| 368 |
"--profile", "1", # Highest performance profile
|
| 369 |
"--verbose", "3",
|
| 370 |
+
"--rescale", # Enable audio rescaling to proper volume
|
| 371 |
"--prompt_start_time", "0",
|
| 372 |
"--prompt_end_time", "30", # Full 30-second clips
|
| 373 |
]
|
debug_audio.py
ADDED
|
@@ -0,0 +1,48 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""
|
| 3 |
+
Debug script to analyze audio file properties
|
| 4 |
+
"""
|
| 5 |
+
import numpy as np
|
| 6 |
+
import soundfile as sf
|
| 7 |
+
import os
|
| 8 |
+
import sys
|
| 9 |
+
|
| 10 |
+
def analyze_audio_file(file_path):
|
| 11 |
+
"""Analyze audio file and print detailed information"""
|
| 12 |
+
if not os.path.exists(file_path):
|
| 13 |
+
print(f"β File not found: {file_path}")
|
| 14 |
+
return False
|
| 15 |
+
|
| 16 |
+
try:
|
| 17 |
+
# Read audio file
|
| 18 |
+
audio_data, sample_rate = sf.read(file_path)
|
| 19 |
+
|
| 20 |
+
print(f"π File: {os.path.basename(file_path)}")
|
| 21 |
+
print(f"π Shape: {audio_data.shape}")
|
| 22 |
+
print(f"π΅ Sample rate: {sample_rate} Hz")
|
| 23 |
+
print(f"β±οΈ Duration: {len(audio_data) / sample_rate:.2f} seconds")
|
| 24 |
+
print(f"π Data type: {audio_data.dtype}")
|
| 25 |
+
print(f"π Min value: {audio_data.min():.8f}")
|
| 26 |
+
print(f"π Max value: {audio_data.max():.8f}")
|
| 27 |
+
print(f"π Max absolute: {np.abs(audio_data).max():.8f}")
|
| 28 |
+
print(f"π Is silent: {np.abs(audio_data).max() < 1e-6}")
|
| 29 |
+
print(f"π Non-zero samples: {np.count_nonzero(audio_data)}/{len(audio_data)}")
|
| 30 |
+
|
| 31 |
+
# Check if audio needs amplification
|
| 32 |
+
max_amp = np.abs(audio_data).max()
|
| 33 |
+
if max_amp > 0:
|
| 34 |
+
amplification_needed = 0.5 / max_amp # Target 0.5 max amplitude
|
| 35 |
+
print(f"π Amplification needed: {amplification_needed:.1f}x ({20*np.log10(amplification_needed):.1f}dB)")
|
| 36 |
+
|
| 37 |
+
return True
|
| 38 |
+
|
| 39 |
+
except Exception as e:
|
| 40 |
+
print(f"β Error reading audio file: {e}")
|
| 41 |
+
return False
|
| 42 |
+
|
| 43 |
+
if __name__ == "__main__":
|
| 44 |
+
if len(sys.argv) > 1:
|
| 45 |
+
file_path = sys.argv[1]
|
| 46 |
+
analyze_audio_file(file_path)
|
| 47 |
+
else:
|
| 48 |
+
print("Usage: python debug_audio.py <audio_file_path>")
|
fix_audio_volume.py
ADDED
|
@@ -0,0 +1,72 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""
|
| 3 |
+
Fix audio volume by amplifying low-amplitude audio files
|
| 4 |
+
"""
|
| 5 |
+
import numpy as np
|
| 6 |
+
import soundfile as sf
|
| 7 |
+
import os
|
| 8 |
+
import sys
|
| 9 |
+
|
| 10 |
+
def fix_audio_volume(input_file, output_file=None, target_amplitude=0.5):
|
| 11 |
+
"""
|
| 12 |
+
Amplify audio to target amplitude level
|
| 13 |
+
|
| 14 |
+
Args:
|
| 15 |
+
input_file: Path to input audio file
|
| 16 |
+
output_file: Path for output file (default: adds "_fixed" suffix)
|
| 17 |
+
target_amplitude: Target maximum amplitude (0.0 to 1.0)
|
| 18 |
+
"""
|
| 19 |
+
if not os.path.exists(input_file):
|
| 20 |
+
print(f"β Input file not found: {input_file}")
|
| 21 |
+
return False
|
| 22 |
+
|
| 23 |
+
try:
|
| 24 |
+
# Read audio
|
| 25 |
+
audio_data, sample_rate = sf.read(input_file)
|
| 26 |
+
|
| 27 |
+
print(f"π Processing: {os.path.basename(input_file)}")
|
| 28 |
+
print(f"π Original max amplitude: {np.abs(audio_data).max():.8f}")
|
| 29 |
+
|
| 30 |
+
# Check if audio is actually silent
|
| 31 |
+
max_amplitude = np.abs(audio_data).max()
|
| 32 |
+
if max_amplitude < 1e-8:
|
| 33 |
+
print("β Audio appears to be completely silent")
|
| 34 |
+
return False
|
| 35 |
+
|
| 36 |
+
# Calculate amplification factor
|
| 37 |
+
amplification_factor = target_amplitude / max_amplitude
|
| 38 |
+
print(f"π Amplification factor: {amplification_factor:.1f}x ({20*np.log10(amplification_factor):.1f}dB)")
|
| 39 |
+
|
| 40 |
+
# Apply amplification
|
| 41 |
+
amplified_audio = audio_data * amplification_factor
|
| 42 |
+
|
| 43 |
+
# Ensure we don't clip
|
| 44 |
+
amplified_audio = np.clip(amplified_audio, -1.0, 1.0)
|
| 45 |
+
|
| 46 |
+
# Determine output filename
|
| 47 |
+
if output_file is None:
|
| 48 |
+
base, ext = os.path.splitext(input_file)
|
| 49 |
+
output_file = f"{base}_fixed{ext}"
|
| 50 |
+
|
| 51 |
+
# Save amplified audio
|
| 52 |
+
sf.write(output_file, amplified_audio, sample_rate)
|
| 53 |
+
|
| 54 |
+
print(f"β
Fixed audio saved: {output_file}")
|
| 55 |
+
print(f"π New max amplitude: {np.abs(amplified_audio).max():.6f}")
|
| 56 |
+
|
| 57 |
+
return True
|
| 58 |
+
|
| 59 |
+
except Exception as e:
|
| 60 |
+
print(f"β Error processing audio: {e}")
|
| 61 |
+
return False
|
| 62 |
+
|
| 63 |
+
if __name__ == "__main__":
|
| 64 |
+
if len(sys.argv) < 2:
|
| 65 |
+
print("Usage: python fix_audio_volume.py <input_file> [output_file] [target_amplitude]")
|
| 66 |
+
sys.exit(1)
|
| 67 |
+
|
| 68 |
+
input_file = sys.argv[1]
|
| 69 |
+
output_file = sys.argv[2] if len(sys.argv) > 2 else None
|
| 70 |
+
target_amplitude = float(sys.argv[3]) if len(sys.argv) > 3 else 0.5
|
| 71 |
+
|
| 72 |
+
fix_audio_volume(input_file, output_file, target_amplitude)
|