futurespyhi commited on
Commit
326af3b
Β·
1 Parent(s): 3dfa434

add --rescale to enable audio rescaling to proper volume

Browse files
Files changed (3) hide show
  1. app.py +1 -0
  2. debug_audio.py +48 -0
  3. fix_audio_volume.py +72 -0
app.py CHANGED
@@ -367,6 +367,7 @@ def generate_music_spaces(lyrics: str, genre: str, mood: str, progress=gr.Progre
367
  "--max_new_tokens", "3000", # Full token count
368
  "--profile", "1", # Highest performance profile
369
  "--verbose", "3",
 
370
  "--prompt_start_time", "0",
371
  "--prompt_end_time", "30", # Full 30-second clips
372
  ]
 
367
  "--max_new_tokens", "3000", # Full token count
368
  "--profile", "1", # Highest performance profile
369
  "--verbose", "3",
370
+ "--rescale", # Enable audio rescaling to proper volume
371
  "--prompt_start_time", "0",
372
  "--prompt_end_time", "30", # Full 30-second clips
373
  ]
debug_audio.py ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ Debug script to analyze audio file properties
4
+ """
5
+ import numpy as np
6
+ import soundfile as sf
7
+ import os
8
+ import sys
9
+
10
+ def analyze_audio_file(file_path):
11
+ """Analyze audio file and print detailed information"""
12
+ if not os.path.exists(file_path):
13
+ print(f"❌ File not found: {file_path}")
14
+ return False
15
+
16
+ try:
17
+ # Read audio file
18
+ audio_data, sample_rate = sf.read(file_path)
19
+
20
+ print(f"πŸ“ File: {os.path.basename(file_path)}")
21
+ print(f"πŸ“Š Shape: {audio_data.shape}")
22
+ print(f"🎡 Sample rate: {sample_rate} Hz")
23
+ print(f"⏱️ Duration: {len(audio_data) / sample_rate:.2f} seconds")
24
+ print(f"πŸ”Š Data type: {audio_data.dtype}")
25
+ print(f"πŸ“ˆ Min value: {audio_data.min():.8f}")
26
+ print(f"πŸ“ˆ Max value: {audio_data.max():.8f}")
27
+ print(f"πŸ“ˆ Max absolute: {np.abs(audio_data).max():.8f}")
28
+ print(f"πŸ”‡ Is silent: {np.abs(audio_data).max() < 1e-6}")
29
+ print(f"πŸ“Š Non-zero samples: {np.count_nonzero(audio_data)}/{len(audio_data)}")
30
+
31
+ # Check if audio needs amplification
32
+ max_amp = np.abs(audio_data).max()
33
+ if max_amp > 0:
34
+ amplification_needed = 0.5 / max_amp # Target 0.5 max amplitude
35
+ print(f"πŸ”Š Amplification needed: {amplification_needed:.1f}x ({20*np.log10(amplification_needed):.1f}dB)")
36
+
37
+ return True
38
+
39
+ except Exception as e:
40
+ print(f"❌ Error reading audio file: {e}")
41
+ return False
42
+
43
+ if __name__ == "__main__":
44
+ if len(sys.argv) > 1:
45
+ file_path = sys.argv[1]
46
+ analyze_audio_file(file_path)
47
+ else:
48
+ print("Usage: python debug_audio.py <audio_file_path>")
fix_audio_volume.py ADDED
@@ -0,0 +1,72 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ Fix audio volume by amplifying low-amplitude audio files
4
+ """
5
+ import numpy as np
6
+ import soundfile as sf
7
+ import os
8
+ import sys
9
+
10
+ def fix_audio_volume(input_file, output_file=None, target_amplitude=0.5):
11
+ """
12
+ Amplify audio to target amplitude level
13
+
14
+ Args:
15
+ input_file: Path to input audio file
16
+ output_file: Path for output file (default: adds "_fixed" suffix)
17
+ target_amplitude: Target maximum amplitude (0.0 to 1.0)
18
+ """
19
+ if not os.path.exists(input_file):
20
+ print(f"❌ Input file not found: {input_file}")
21
+ return False
22
+
23
+ try:
24
+ # Read audio
25
+ audio_data, sample_rate = sf.read(input_file)
26
+
27
+ print(f"πŸ“ Processing: {os.path.basename(input_file)}")
28
+ print(f"πŸ“Š Original max amplitude: {np.abs(audio_data).max():.8f}")
29
+
30
+ # Check if audio is actually silent
31
+ max_amplitude = np.abs(audio_data).max()
32
+ if max_amplitude < 1e-8:
33
+ print("❌ Audio appears to be completely silent")
34
+ return False
35
+
36
+ # Calculate amplification factor
37
+ amplification_factor = target_amplitude / max_amplitude
38
+ print(f"πŸ”Š Amplification factor: {amplification_factor:.1f}x ({20*np.log10(amplification_factor):.1f}dB)")
39
+
40
+ # Apply amplification
41
+ amplified_audio = audio_data * amplification_factor
42
+
43
+ # Ensure we don't clip
44
+ amplified_audio = np.clip(amplified_audio, -1.0, 1.0)
45
+
46
+ # Determine output filename
47
+ if output_file is None:
48
+ base, ext = os.path.splitext(input_file)
49
+ output_file = f"{base}_fixed{ext}"
50
+
51
+ # Save amplified audio
52
+ sf.write(output_file, amplified_audio, sample_rate)
53
+
54
+ print(f"βœ… Fixed audio saved: {output_file}")
55
+ print(f"πŸ“Š New max amplitude: {np.abs(amplified_audio).max():.6f}")
56
+
57
+ return True
58
+
59
+ except Exception as e:
60
+ print(f"❌ Error processing audio: {e}")
61
+ return False
62
+
63
+ if __name__ == "__main__":
64
+ if len(sys.argv) < 2:
65
+ print("Usage: python fix_audio_volume.py <input_file> [output_file] [target_amplitude]")
66
+ sys.exit(1)
67
+
68
+ input_file = sys.argv[1]
69
+ output_file = sys.argv[2] if len(sys.argv) > 2 else None
70
+ target_amplitude = float(sys.argv[3]) if len(sys.argv) > 3 else 0.5
71
+
72
+ fix_audio_volume(input_file, output_file, target_amplitude)