Spaces:

f2ai
/

exp-audio-recorder

Sleeping

Upload folder using huggingface_hub

ad93d56 verified 6 months ago

1.51 kB

	from abc import ABC, abstractmethod

	import librosa
	import numpy as np


	class AudioPreprocessor(ABC):
	"""
	An abstract base class for all audio preprocessor.
	"""

	def __init__(self):
	pass

	@abstractmethod
	def preprocess(self, audio: np.ndarray, fs: int) -> np.ndarray:
	"""
	Normalization: Adjust the audio amplitude so that it is in a consistent range.
	It helps make comparable audio signals from different sources or record them at different volumes.
	Typically, each sample is divided by the absolute maximum value of the signal.
	"""
	audio = audio / np.max(np.abs(audio))

	"""
	Silence removal: Remove parts of the signal that do not contain useful information (silence).
	Reduces the amount of data to be processed and focuses on the relevant parts of the audio.
	A threshold is used to determine which parts of the signal are considered silent and are removed.
	"""
	trimmed_audio, _ = librosa.effects.trim(audio, top_db=20)

	"""
	Apply pre-emphasis filter: Increase the energy of high frequencies relative to low frequencies.
	Compensates for the natural tendency of speech signals to have less energy in high frequencies, which can improve feature extraction.
	A first-order filter is applied that emphasizes higher frequencies
	"""
	preemphasized_audio = librosa.effects.preemphasis(trimmed_audio)

	return preemphasized_audio