Spaces:

daasime
/

sop-audio-analyzer

Sleeping

App Files Files Community

sop-audio-analyzer / src /phase1_foundation /preprocessor.py

daasime

Add SOP Audio Analyzer app files

ebba35f 2 months ago

raw

history blame contribute delete

3.42 kB

	"""
	Audio preprocessor - normalize audio for analysis.
	"""
	import torch
	import torchaudio
	import numpy as np
	from pathlib import Path
	from typing import Tuple, Optional
	import tempfile
	import os


	class AudioPreprocessor:
	"""Normalize audio to standard format for analysis."""

	TARGET_SAMPLE_RATE = 16000
	TARGET_CHANNELS = 1

	def __init__(self):
	pass

	def load_audio(self, audio_path: str) -> Tuple[torch.Tensor, int]:
	"""
	Load audio file.

	Returns:
	Tuple of (waveform, sample_rate)
	"""
	# Use soundfile backend to avoid torchcodec dependency
	waveform, sample_rate = torchaudio.load(audio_path, backend="soundfile")
	return waveform, sample_rate

	def normalize(self, waveform: torch.Tensor, sample_rate: int) -> Tuple[torch.Tensor, int]:
	"""
	Normalize audio to mono, 16kHz, normalized amplitude.

	Returns:
	Tuple of (normalized_waveform, target_sample_rate)
	"""
	# Convert to mono
	if waveform.shape[0] > 1:
	waveform = torch.mean(waveform, dim=0, keepdim=True)

	# Resample to 16kHz
	if sample_rate != self.TARGET_SAMPLE_RATE:
	resampler = torchaudio.transforms.Resample(
	orig_freq=sample_rate,
	new_freq=self.TARGET_SAMPLE_RATE
	)
	waveform = resampler(waveform)

	# Normalize amplitude
	max_amp = waveform.abs().max()
	if max_amp > 0:
	waveform = waveform / max_amp * 0.95

	return waveform, self.TARGET_SAMPLE_RATE

	def process_file(self, audio_path: str, output_path: Optional[str] = None) -> Tuple[torch.Tensor, int, dict]:
	"""
	Load and normalize audio file.

	Returns:
	Tuple of (waveform, sample_rate, metadata)
	"""
	# Load
	waveform, orig_sr = self.load_audio(audio_path)
	orig_duration = waveform.shape[1] / orig_sr
	orig_channels = waveform.shape[0]

	# Normalize
	waveform, sample_rate = self.normalize(waveform, orig_sr)

	# Save if output path provided
	if output_path:
	torchaudio.save(output_path, waveform, sample_rate)

	metadata = {
	'original_sample_rate': orig_sr,
	'original_channels': orig_channels,
	'original_duration': orig_duration,
	'normalized_sample_rate': sample_rate,
	'normalized_duration': waveform.shape[1] / sample_rate
	}

	return waveform, sample_rate, metadata

	def get_duration(self, waveform: torch.Tensor, sample_rate: int) -> float:
	"""Get duration in seconds."""
	return waveform.shape[1] / sample_rate

	def save_audio(self, waveform: torch.Tensor, sample_rate: int, output_path: str):
	"""Save audio to file."""
	os.makedirs(os.path.dirname(output_path), exist_ok=True)
	torchaudio.save(output_path, waveform, sample_rate)

	def extract_segment(self, waveform: torch.Tensor, sample_rate: int,
	start: float, end: float) -> torch.Tensor:
	"""Extract segment from waveform."""
	start_sample = int(start * sample_rate)
	end_sample = int(end * sample_rate)
	return waveform[:, start_sample:end_sample]