Spaces:

Agents-MCP-Hackathon
/

AudioEditor

Running

AudioEditor / utils /audio_utils.py

Ahmet Emre Şafak

initial commit

0a0ea7b 6 days ago

7.66 kB

	import librosa
	import numpy as np
	import soundfile as sf
	from numpy import ndarray


	def load_audio(file_path: str) -> tuple[ndarray, int]:
	"""
	Load audio file and return audio data and sample rate.

	Args:
	file_path (str): Path to the audio file.

	Returns:
	tuple: (audio_data, sample_rate)
	"""
	audio_data, sample_rate = sf.read(file_path)
	return audio_data, sample_rate


	def cut_audio(_audio: ndarray, sampling_rate: int \| float, start_millis: int, end_millis: int) -> ndarray:
	"""Cut audio array from start_millis to end_millis"""
	start_sample = int(start_millis / 1000 * sampling_rate)
	end_sample = int(end_millis / 1000 * sampling_rate)
	return _audio[start_sample:end_sample]


	def format_time(seconds):
	"""Format seconds into MM:SS format"""
	minutes = int(seconds // 60)
	secs = int(seconds % 60)
	return f"{minutes:02d}:{secs:02d}"


	def load_audio_info(audio_file):
	"""Load audio file and return audio data, sample rate, and duration info"""
	if audio_file is None:
	return None, None, None

	try:
	# Load audio data and sample rate
	audio_data, sample_rate = sf.read(audio_file)

	# Calculate duration
	duration = len(audio_data) / sample_rate

	return audio_data, sample_rate, duration
	except Exception as e:
	print(f"Error loading audio: {e}")
	return None, None, None


	def get_audio_duration(audio_file):
	"""Get just the duration of an audio file"""
	try:
	info = sf.info(audio_file)
	return info.frames / info.samplerate
	except Exception:
	return None


	def merge_audio_arrays(audios: list[ndarray]) -> ndarray:
	"""Merge multiple audio arrays by concatenation"""
	return np.concatenate(audios)


	def apply_fade_in(audio: ndarray, sample_rate: int, fade_duration_ms: int = 100) -> ndarray:
	"""Apply fade in effect to audio"""

	fade_samples = int(fade_duration_ms / 1000 * sample_rate)
	fade_samples = min(fade_samples, len(audio))

	fade_curve = np.linspace(0, 1, fade_samples)
	audio_copy = audio.copy()
	audio_copy[:fade_samples] *= fade_curve

	return audio_copy


	def apply_fade_out(audio: ndarray, sample_rate: int, fade_duration_ms: int = 100) -> ndarray:
	"""Apply fade out effect to audio"""

	fade_samples = int(fade_duration_ms / 1000 * sample_rate)
	fade_samples = min(fade_samples, len(audio))

	fade_curve = np.linspace(1, 0, fade_samples)
	audio_copy = audio.copy()
	audio_copy[-fade_samples:] *= fade_curve

	return audio_copy


	def normalize_audio(audio: ndarray, target_level: float = -3.0) -> ndarray:
	"""
	Normalize audio to target level in dB
	target_level: Target peak level in dB (e.g., -3.0 for -3dB)
	"""

	# Calculate current peak level
	peak = np.max(np.abs(audio))

	if peak == 0:
	return audio # Avoid division by zero for silent audio

	# Convert target level from dB to linear scale
	target_linear = 10 ** (target_level / 20)

	# Calculate gain needed
	gain = target_linear / peak

	return audio * gain


	def adjust_volume(audio: ndarray, gain_db: float) -> ndarray:
	"""
	Adjust audio volume by specified gain in dB
	gain_db: Gain in decibels (positive = louder, negative = quieter)
	"""
	gain_linear = 10 ** (gain_db / 20)
	return audio * gain_linear


	def apply_silence(duration_ms: int, sample_rate: int) -> ndarray:
	"""Generate silence for specified duration"""

	samples = int(duration_ms / 1000 * sample_rate)
	return np.zeros(samples)


	def reverse_audio(audio: ndarray) -> ndarray:
	"""Reverse audio playback"""
	return np.flip(audio)


	def apply_speed_change(audio: ndarray, speed_factor: float) -> ndarray:
	"""
	Change playback speed without changing pitch (simple time-stretching)
	speed_factor: 1.0 = normal, 2.0 = double speed, 0.5 = half speed
	"""

	return librosa.effects.time_stretch(audio, rate=speed_factor)


	def trim_silence(audio: ndarray, threshold_db: float = -40.0) -> ndarray:
	"""
	Trim silence from beginning and end of audio
	threshold_db: Silence threshold in dB
	"""

	# Convert threshold to linear scale
	threshold_linear = 10 ** (threshold_db / 20)

	# Find non-silent regions
	non_silent = np.abs(audio) > threshold_linear

	if not np.any(non_silent):
	return audio # All audio is below threshold

	# Find first and last non-silent samples
	first_non_silent = np.where(non_silent)[0][0]
	last_non_silent = np.where(non_silent)[0][-1]

	return audio[first_non_silent:last_non_silent + 1]


	def get_audio_stats(audio: ndarray, sample_rate: int) -> dict:
	"""Get statistics about the audio"""

	peak_level = np.max(np.abs(audio))
	rms_level = np.sqrt(np.mean(audio ** 2))

	# Convert to dB
	peak_db = 20 * np.log10(peak_level) if peak_level > 0 else -np.inf
	rms_db = 20 * np.log10(rms_level) if rms_level > 0 else -np.inf

	return {
	'duration_seconds': len(audio) / sample_rate,
	'peak_level_db': peak_db,
	'rms_level_db': rms_db,
	'sample_rate': sample_rate,
	'samples': len(audio),
	'channels': 1 if len(audio.shape) == 1 else audio.shape[1]
	}


	def merge_audio_files(file_paths: list[str]) -> tuple[tuple[ndarray, int \| float] \| None, str]:
	"""
	Merge multiple audio files by concatenating them

	Args:
	file_paths: List of audio file paths

	Returns:
	tuple: (sample_rate, merged_audio_array, status_message)
	"""
	if not file_paths or len(file_paths) == 0:
	return None, "❌ No audio files to merge"

	if len(file_paths) == 1:
	return None, "❌ Please upload at least 2 audio files to merge"

	try:
	merged_audio_segments = []
	target_sample_rate = None
	file_durations = []

	for i, file_path in enumerate(file_paths):
	# Load audio file
	audio_data, sample_rate, duration = load_audio_info(file_path)

	if audio_data is None:
	continue

	# Set target sample rate from first file
	if target_sample_rate is None:
	target_sample_rate = sample_rate
	elif sample_rate != target_sample_rate:
	# Resample if different sample rate
	from scipy import signal
	num_samples = int(len(audio_data) * target_sample_rate / sample_rate)
	audio_data = signal.resample(audio_data, num_samples)

	# Convert stereo to mono if needed
	if len(audio_data.shape) > 1:
	audio_data = np.mean(audio_data, axis=1)

	merged_audio_segments.append(audio_data)
	file_durations.append(len(audio_data) / target_sample_rate)

	if not merged_audio_segments:
	return None, "❌ No valid audio files found"

	# Concatenate all audio arrays
	final_audio = np.concatenate(merged_audio_segments)

	# Create status message
	total_duration = len(final_audio) / target_sample_rate

	status = f"""✅ Successfully merged {len(file_paths)} audio files!

	🎵 Merge Details:
	• Total duration: {format_time(total_duration)} ({total_duration:.2f} seconds)
	• Sample rate: {target_sample_rate:,} Hz
	• Files processed: {len(merged_audio_segments)}
	• Individual durations: {', '.join([f'{d:.1f}s' for d in file_durations])}

	🎧 Result: Ready for playback and download!"""

	return (target_sample_rate, final_audio), status

	except Exception as e:
	return None, f"❌ Error merging audio files: {str(e)}"