|
import librosa |
|
import numpy as np |
|
import soundfile as sf |
|
from numpy import ndarray |
|
|
|
|
|
def load_audio(file_path: str) -> tuple[ndarray, int]: |
|
""" |
|
Load audio file and return audio data and sample rate. |
|
|
|
Args: |
|
file_path (str): Path to the audio file. |
|
|
|
Returns: |
|
tuple: (audio_data, sample_rate) |
|
""" |
|
audio_data, sample_rate = sf.read(file_path) |
|
return audio_data, sample_rate |
|
|
|
|
|
def cut_audio(_audio: ndarray, sampling_rate: int | float, start_millis: int, end_millis: int) -> ndarray: |
|
"""Cut audio array from start_millis to end_millis""" |
|
start_sample = int(start_millis / 1000 * sampling_rate) |
|
end_sample = int(end_millis / 1000 * sampling_rate) |
|
return _audio[start_sample:end_sample] |
|
|
|
|
|
def format_time(seconds): |
|
"""Format seconds into MM:SS format""" |
|
minutes = int(seconds // 60) |
|
secs = int(seconds % 60) |
|
return f"{minutes:02d}:{secs:02d}" |
|
|
|
|
|
def load_audio_info(audio_file): |
|
"""Load audio file and return audio data, sample rate, and duration info""" |
|
if audio_file is None: |
|
return None, None, None |
|
|
|
try: |
|
|
|
audio_data, sample_rate = sf.read(audio_file) |
|
|
|
|
|
duration = len(audio_data) / sample_rate |
|
|
|
return audio_data, sample_rate, duration |
|
except Exception as e: |
|
print(f"Error loading audio: {e}") |
|
return None, None, None |
|
|
|
|
|
def get_audio_duration(audio_file): |
|
"""Get just the duration of an audio file""" |
|
try: |
|
info = sf.info(audio_file) |
|
return info.frames / info.samplerate |
|
except Exception: |
|
return None |
|
|
|
|
|
def merge_audio_arrays(audios: list[ndarray]) -> ndarray: |
|
"""Merge multiple audio arrays by concatenation""" |
|
return np.concatenate(audios) |
|
|
|
|
|
def apply_fade_in(audio: ndarray, sample_rate: int, fade_duration_ms: int = 100) -> ndarray: |
|
"""Apply fade in effect to audio""" |
|
|
|
fade_samples = int(fade_duration_ms / 1000 * sample_rate) |
|
fade_samples = min(fade_samples, len(audio)) |
|
|
|
fade_curve = np.linspace(0, 1, fade_samples) |
|
audio_copy = audio.copy() |
|
audio_copy[:fade_samples] *= fade_curve |
|
|
|
return audio_copy |
|
|
|
|
|
def apply_fade_out(audio: ndarray, sample_rate: int, fade_duration_ms: int = 100) -> ndarray: |
|
"""Apply fade out effect to audio""" |
|
|
|
fade_samples = int(fade_duration_ms / 1000 * sample_rate) |
|
fade_samples = min(fade_samples, len(audio)) |
|
|
|
fade_curve = np.linspace(1, 0, fade_samples) |
|
audio_copy = audio.copy() |
|
audio_copy[-fade_samples:] *= fade_curve |
|
|
|
return audio_copy |
|
|
|
|
|
def normalize_audio(audio: ndarray, target_level: float = -3.0) -> ndarray: |
|
""" |
|
Normalize audio to target level in dB |
|
target_level: Target peak level in dB (e.g., -3.0 for -3dB) |
|
""" |
|
|
|
|
|
peak = np.max(np.abs(audio)) |
|
|
|
if peak == 0: |
|
return audio |
|
|
|
|
|
target_linear = 10 ** (target_level / 20) |
|
|
|
|
|
gain = target_linear / peak |
|
|
|
return audio * gain |
|
|
|
|
|
def adjust_volume(audio: ndarray, gain_db: float) -> ndarray: |
|
""" |
|
Adjust audio volume by specified gain in dB |
|
gain_db: Gain in decibels (positive = louder, negative = quieter) |
|
""" |
|
gain_linear = 10 ** (gain_db / 20) |
|
return audio * gain_linear |
|
|
|
|
|
def apply_silence(duration_ms: int, sample_rate: int) -> ndarray: |
|
"""Generate silence for specified duration""" |
|
|
|
samples = int(duration_ms / 1000 * sample_rate) |
|
return np.zeros(samples) |
|
|
|
|
|
def reverse_audio(audio: ndarray) -> ndarray: |
|
"""Reverse audio playback""" |
|
return np.flip(audio) |
|
|
|
|
|
def apply_speed_change(audio: ndarray, speed_factor: float) -> ndarray: |
|
""" |
|
Change playback speed without changing pitch (simple time-stretching) |
|
speed_factor: 1.0 = normal, 2.0 = double speed, 0.5 = half speed |
|
""" |
|
|
|
return librosa.effects.time_stretch(audio, rate=speed_factor) |
|
|
|
|
|
def trim_silence(audio: ndarray, threshold_db: float = -40.0) -> ndarray: |
|
""" |
|
Trim silence from beginning and end of audio |
|
threshold_db: Silence threshold in dB |
|
""" |
|
|
|
|
|
threshold_linear = 10 ** (threshold_db / 20) |
|
|
|
|
|
non_silent = np.abs(audio) > threshold_linear |
|
|
|
if not np.any(non_silent): |
|
return audio |
|
|
|
|
|
first_non_silent = np.where(non_silent)[0][0] |
|
last_non_silent = np.where(non_silent)[0][-1] |
|
|
|
return audio[first_non_silent:last_non_silent + 1] |
|
|
|
|
|
def get_audio_stats(audio: ndarray, sample_rate: int) -> dict: |
|
"""Get statistics about the audio""" |
|
|
|
peak_level = np.max(np.abs(audio)) |
|
rms_level = np.sqrt(np.mean(audio ** 2)) |
|
|
|
|
|
peak_db = 20 * np.log10(peak_level) if peak_level > 0 else -np.inf |
|
rms_db = 20 * np.log10(rms_level) if rms_level > 0 else -np.inf |
|
|
|
return { |
|
'duration_seconds': len(audio) / sample_rate, |
|
'peak_level_db': peak_db, |
|
'rms_level_db': rms_db, |
|
'sample_rate': sample_rate, |
|
'samples': len(audio), |
|
'channels': 1 if len(audio.shape) == 1 else audio.shape[1] |
|
} |
|
|
|
|
|
def merge_audio_files(file_paths: list[str]) -> tuple[tuple[ndarray, int | float] | None, str]: |
|
""" |
|
Merge multiple audio files by concatenating them |
|
|
|
Args: |
|
file_paths: List of audio file paths |
|
|
|
Returns: |
|
tuple: (sample_rate, merged_audio_array, status_message) |
|
""" |
|
if not file_paths or len(file_paths) == 0: |
|
return None, "❌ No audio files to merge" |
|
|
|
if len(file_paths) == 1: |
|
return None, "❌ Please upload at least 2 audio files to merge" |
|
|
|
try: |
|
merged_audio_segments = [] |
|
target_sample_rate = None |
|
file_durations = [] |
|
|
|
for i, file_path in enumerate(file_paths): |
|
|
|
audio_data, sample_rate, duration = load_audio_info(file_path) |
|
|
|
if audio_data is None: |
|
continue |
|
|
|
|
|
if target_sample_rate is None: |
|
target_sample_rate = sample_rate |
|
elif sample_rate != target_sample_rate: |
|
|
|
from scipy import signal |
|
num_samples = int(len(audio_data) * target_sample_rate / sample_rate) |
|
audio_data = signal.resample(audio_data, num_samples) |
|
|
|
|
|
if len(audio_data.shape) > 1: |
|
audio_data = np.mean(audio_data, axis=1) |
|
|
|
merged_audio_segments.append(audio_data) |
|
file_durations.append(len(audio_data) / target_sample_rate) |
|
|
|
if not merged_audio_segments: |
|
return None, "❌ No valid audio files found" |
|
|
|
|
|
final_audio = np.concatenate(merged_audio_segments) |
|
|
|
|
|
total_duration = len(final_audio) / target_sample_rate |
|
|
|
status = f"""✅ Successfully merged {len(file_paths)} audio files! |
|
|
|
🎵 **Merge Details:** |
|
• Total duration: {format_time(total_duration)} ({total_duration:.2f} seconds) |
|
• Sample rate: {target_sample_rate:,} Hz |
|
• Files processed: {len(merged_audio_segments)} |
|
• Individual durations: {', '.join([f'{d:.1f}s' for d in file_durations])} |
|
|
|
🎧 **Result:** Ready for playback and download!""" |
|
|
|
return (target_sample_rate, final_audio), status |
|
|
|
except Exception as e: |
|
return None, f"❌ Error merging audio files: {str(e)}" |
|
|