AudioEditor / utils /audio_utils.py
Ahmet Emre Şafak
initial commit
0a0ea7b
import librosa
import numpy as np
import soundfile as sf
from numpy import ndarray
def load_audio(file_path: str) -> tuple[ndarray, int]:
"""
Load audio file and return audio data and sample rate.
Args:
file_path (str): Path to the audio file.
Returns:
tuple: (audio_data, sample_rate)
"""
audio_data, sample_rate = sf.read(file_path)
return audio_data, sample_rate
def cut_audio(_audio: ndarray, sampling_rate: int | float, start_millis: int, end_millis: int) -> ndarray:
"""Cut audio array from start_millis to end_millis"""
start_sample = int(start_millis / 1000 * sampling_rate)
end_sample = int(end_millis / 1000 * sampling_rate)
return _audio[start_sample:end_sample]
def format_time(seconds):
"""Format seconds into MM:SS format"""
minutes = int(seconds // 60)
secs = int(seconds % 60)
return f"{minutes:02d}:{secs:02d}"
def load_audio_info(audio_file):
"""Load audio file and return audio data, sample rate, and duration info"""
if audio_file is None:
return None, None, None
try:
# Load audio data and sample rate
audio_data, sample_rate = sf.read(audio_file)
# Calculate duration
duration = len(audio_data) / sample_rate
return audio_data, sample_rate, duration
except Exception as e:
print(f"Error loading audio: {e}")
return None, None, None
def get_audio_duration(audio_file):
"""Get just the duration of an audio file"""
try:
info = sf.info(audio_file)
return info.frames / info.samplerate
except Exception:
return None
def merge_audio_arrays(audios: list[ndarray]) -> ndarray:
"""Merge multiple audio arrays by concatenation"""
return np.concatenate(audios)
def apply_fade_in(audio: ndarray, sample_rate: int, fade_duration_ms: int = 100) -> ndarray:
"""Apply fade in effect to audio"""
fade_samples = int(fade_duration_ms / 1000 * sample_rate)
fade_samples = min(fade_samples, len(audio))
fade_curve = np.linspace(0, 1, fade_samples)
audio_copy = audio.copy()
audio_copy[:fade_samples] *= fade_curve
return audio_copy
def apply_fade_out(audio: ndarray, sample_rate: int, fade_duration_ms: int = 100) -> ndarray:
"""Apply fade out effect to audio"""
fade_samples = int(fade_duration_ms / 1000 * sample_rate)
fade_samples = min(fade_samples, len(audio))
fade_curve = np.linspace(1, 0, fade_samples)
audio_copy = audio.copy()
audio_copy[-fade_samples:] *= fade_curve
return audio_copy
def normalize_audio(audio: ndarray, target_level: float = -3.0) -> ndarray:
"""
Normalize audio to target level in dB
target_level: Target peak level in dB (e.g., -3.0 for -3dB)
"""
# Calculate current peak level
peak = np.max(np.abs(audio))
if peak == 0:
return audio # Avoid division by zero for silent audio
# Convert target level from dB to linear scale
target_linear = 10 ** (target_level / 20)
# Calculate gain needed
gain = target_linear / peak
return audio * gain
def adjust_volume(audio: ndarray, gain_db: float) -> ndarray:
"""
Adjust audio volume by specified gain in dB
gain_db: Gain in decibels (positive = louder, negative = quieter)
"""
gain_linear = 10 ** (gain_db / 20)
return audio * gain_linear
def apply_silence(duration_ms: int, sample_rate: int) -> ndarray:
"""Generate silence for specified duration"""
samples = int(duration_ms / 1000 * sample_rate)
return np.zeros(samples)
def reverse_audio(audio: ndarray) -> ndarray:
"""Reverse audio playback"""
return np.flip(audio)
def apply_speed_change(audio: ndarray, speed_factor: float) -> ndarray:
"""
Change playback speed without changing pitch (simple time-stretching)
speed_factor: 1.0 = normal, 2.0 = double speed, 0.5 = half speed
"""
return librosa.effects.time_stretch(audio, rate=speed_factor)
def trim_silence(audio: ndarray, threshold_db: float = -40.0) -> ndarray:
"""
Trim silence from beginning and end of audio
threshold_db: Silence threshold in dB
"""
# Convert threshold to linear scale
threshold_linear = 10 ** (threshold_db / 20)
# Find non-silent regions
non_silent = np.abs(audio) > threshold_linear
if not np.any(non_silent):
return audio # All audio is below threshold
# Find first and last non-silent samples
first_non_silent = np.where(non_silent)[0][0]
last_non_silent = np.where(non_silent)[0][-1]
return audio[first_non_silent:last_non_silent + 1]
def get_audio_stats(audio: ndarray, sample_rate: int) -> dict:
"""Get statistics about the audio"""
peak_level = np.max(np.abs(audio))
rms_level = np.sqrt(np.mean(audio ** 2))
# Convert to dB
peak_db = 20 * np.log10(peak_level) if peak_level > 0 else -np.inf
rms_db = 20 * np.log10(rms_level) if rms_level > 0 else -np.inf
return {
'duration_seconds': len(audio) / sample_rate,
'peak_level_db': peak_db,
'rms_level_db': rms_db,
'sample_rate': sample_rate,
'samples': len(audio),
'channels': 1 if len(audio.shape) == 1 else audio.shape[1]
}
def merge_audio_files(file_paths: list[str]) -> tuple[tuple[ndarray, int | float] | None, str]:
"""
Merge multiple audio files by concatenating them
Args:
file_paths: List of audio file paths
Returns:
tuple: (sample_rate, merged_audio_array, status_message)
"""
if not file_paths or len(file_paths) == 0:
return None, "❌ No audio files to merge"
if len(file_paths) == 1:
return None, "❌ Please upload at least 2 audio files to merge"
try:
merged_audio_segments = []
target_sample_rate = None
file_durations = []
for i, file_path in enumerate(file_paths):
# Load audio file
audio_data, sample_rate, duration = load_audio_info(file_path)
if audio_data is None:
continue
# Set target sample rate from first file
if target_sample_rate is None:
target_sample_rate = sample_rate
elif sample_rate != target_sample_rate:
# Resample if different sample rate
from scipy import signal
num_samples = int(len(audio_data) * target_sample_rate / sample_rate)
audio_data = signal.resample(audio_data, num_samples)
# Convert stereo to mono if needed
if len(audio_data.shape) > 1:
audio_data = np.mean(audio_data, axis=1)
merged_audio_segments.append(audio_data)
file_durations.append(len(audio_data) / target_sample_rate)
if not merged_audio_segments:
return None, "❌ No valid audio files found"
# Concatenate all audio arrays
final_audio = np.concatenate(merged_audio_segments)
# Create status message
total_duration = len(final_audio) / target_sample_rate
status = f"""✅ Successfully merged {len(file_paths)} audio files!
🎵 **Merge Details:**
• Total duration: {format_time(total_duration)} ({total_duration:.2f} seconds)
• Sample rate: {target_sample_rate:,} Hz
• Files processed: {len(merged_audio_segments)}
• Individual durations: {', '.join([f'{d:.1f}s' for d in file_durations])}
🎧 **Result:** Ready for playback and download!"""
return (target_sample_rate, final_audio), status
except Exception as e:
return None, f"❌ Error merging audio files: {str(e)}"