import librosa import numpy as np import soundfile as sf from numpy import ndarray def load_audio(file_path: str) -> tuple[ndarray, int]: """ Load audio file and return audio data and sample rate. Args: file_path (str): Path to the audio file. Returns: tuple: (audio_data, sample_rate) """ audio_data, sample_rate = sf.read(file_path) return audio_data, sample_rate def cut_audio(_audio: ndarray, sampling_rate: int | float, start_millis: int, end_millis: int) -> ndarray: """Cut audio array from start_millis to end_millis""" start_sample = int(start_millis / 1000 * sampling_rate) end_sample = int(end_millis / 1000 * sampling_rate) return _audio[start_sample:end_sample] def format_time(seconds): """Format seconds into MM:SS format""" minutes = int(seconds // 60) secs = int(seconds % 60) return f"{minutes:02d}:{secs:02d}" def load_audio_info(audio_file): """Load audio file and return audio data, sample rate, and duration info""" if audio_file is None: return None, None, None try: # Load audio data and sample rate audio_data, sample_rate = sf.read(audio_file) # Calculate duration duration = len(audio_data) / sample_rate return audio_data, sample_rate, duration except Exception as e: print(f"Error loading audio: {e}") return None, None, None def get_audio_duration(audio_file): """Get just the duration of an audio file""" try: info = sf.info(audio_file) return info.frames / info.samplerate except Exception: return None def merge_audio_arrays(audios: list[ndarray]) -> ndarray: """Merge multiple audio arrays by concatenation""" return np.concatenate(audios) def apply_fade_in(audio: ndarray, sample_rate: int, fade_duration_ms: int = 100) -> ndarray: """Apply fade in effect to audio""" fade_samples = int(fade_duration_ms / 1000 * sample_rate) fade_samples = min(fade_samples, len(audio)) fade_curve = np.linspace(0, 1, fade_samples) audio_copy = audio.copy() audio_copy[:fade_samples] *= fade_curve return audio_copy def apply_fade_out(audio: ndarray, sample_rate: int, fade_duration_ms: int = 100) -> ndarray: """Apply fade out effect to audio""" fade_samples = int(fade_duration_ms / 1000 * sample_rate) fade_samples = min(fade_samples, len(audio)) fade_curve = np.linspace(1, 0, fade_samples) audio_copy = audio.copy() audio_copy[-fade_samples:] *= fade_curve return audio_copy def normalize_audio(audio: ndarray, target_level: float = -3.0) -> ndarray: """ Normalize audio to target level in dB target_level: Target peak level in dB (e.g., -3.0 for -3dB) """ # Calculate current peak level peak = np.max(np.abs(audio)) if peak == 0: return audio # Avoid division by zero for silent audio # Convert target level from dB to linear scale target_linear = 10 ** (target_level / 20) # Calculate gain needed gain = target_linear / peak return audio * gain def adjust_volume(audio: ndarray, gain_db: float) -> ndarray: """ Adjust audio volume by specified gain in dB gain_db: Gain in decibels (positive = louder, negative = quieter) """ gain_linear = 10 ** (gain_db / 20) return audio * gain_linear def apply_silence(duration_ms: int, sample_rate: int) -> ndarray: """Generate silence for specified duration""" samples = int(duration_ms / 1000 * sample_rate) return np.zeros(samples) def reverse_audio(audio: ndarray) -> ndarray: """Reverse audio playback""" return np.flip(audio) def apply_speed_change(audio: ndarray, speed_factor: float) -> ndarray: """ Change playback speed without changing pitch (simple time-stretching) speed_factor: 1.0 = normal, 2.0 = double speed, 0.5 = half speed """ return librosa.effects.time_stretch(audio, rate=speed_factor) def trim_silence(audio: ndarray, threshold_db: float = -40.0) -> ndarray: """ Trim silence from beginning and end of audio threshold_db: Silence threshold in dB """ # Convert threshold to linear scale threshold_linear = 10 ** (threshold_db / 20) # Find non-silent regions non_silent = np.abs(audio) > threshold_linear if not np.any(non_silent): return audio # All audio is below threshold # Find first and last non-silent samples first_non_silent = np.where(non_silent)[0][0] last_non_silent = np.where(non_silent)[0][-1] return audio[first_non_silent:last_non_silent + 1] def get_audio_stats(audio: ndarray, sample_rate: int) -> dict: """Get statistics about the audio""" peak_level = np.max(np.abs(audio)) rms_level = np.sqrt(np.mean(audio ** 2)) # Convert to dB peak_db = 20 * np.log10(peak_level) if peak_level > 0 else -np.inf rms_db = 20 * np.log10(rms_level) if rms_level > 0 else -np.inf return { 'duration_seconds': len(audio) / sample_rate, 'peak_level_db': peak_db, 'rms_level_db': rms_db, 'sample_rate': sample_rate, 'samples': len(audio), 'channels': 1 if len(audio.shape) == 1 else audio.shape[1] } def merge_audio_files(file_paths: list[str]) -> tuple[tuple[ndarray, int | float] | None, str]: """ Merge multiple audio files by concatenating them Args: file_paths: List of audio file paths Returns: tuple: (sample_rate, merged_audio_array, status_message) """ if not file_paths or len(file_paths) == 0: return None, "❌ No audio files to merge" if len(file_paths) == 1: return None, "❌ Please upload at least 2 audio files to merge" try: merged_audio_segments = [] target_sample_rate = None file_durations = [] for i, file_path in enumerate(file_paths): # Load audio file audio_data, sample_rate, duration = load_audio_info(file_path) if audio_data is None: continue # Set target sample rate from first file if target_sample_rate is None: target_sample_rate = sample_rate elif sample_rate != target_sample_rate: # Resample if different sample rate from scipy import signal num_samples = int(len(audio_data) * target_sample_rate / sample_rate) audio_data = signal.resample(audio_data, num_samples) # Convert stereo to mono if needed if len(audio_data.shape) > 1: audio_data = np.mean(audio_data, axis=1) merged_audio_segments.append(audio_data) file_durations.append(len(audio_data) / target_sample_rate) if not merged_audio_segments: return None, "❌ No valid audio files found" # Concatenate all audio arrays final_audio = np.concatenate(merged_audio_segments) # Create status message total_duration = len(final_audio) / target_sample_rate status = f"""✅ Successfully merged {len(file_paths)} audio files! 🎵 **Merge Details:** • Total duration: {format_time(total_duration)} ({total_duration:.2f} seconds) • Sample rate: {target_sample_rate:,} Hz • Files processed: {len(merged_audio_segments)} • Individual durations: {', '.join([f'{d:.1f}s' for d in file_durations])} 🎧 **Result:** Ready for playback and download!""" return (target_sample_rate, final_audio), status except Exception as e: return None, f"❌ Error merging audio files: {str(e)}"