FrenchDroneKeyword / preprocessing.py
blanchon's picture
to python3.9
4b3147f
import numpy as np
import torch
import librosa
from sklearn.base import BaseEstimator, TransformerMixin
from typing import Callable, Optional
class ReductionTransformer(BaseEstimator, TransformerMixin):
def __init__(self, windows_number: int = 300, statistique = np.mean):
self.windows_number = windows_number
self.statistique = statistique
def fit(self, X: np.ndarray, y = None):
return self
def fit_transform(self, X: np.ndarray, y = None) -> np.ndarray:
self.fit(X, y)
return self.transform(X, y)
def transform(self, X: np.ndarray, y = None) -> np.ndarray:
X_ = X.copy()
*c_, size_ = X_.shape
windows_size_ = size_//self.windows_number
metrique_clip = X_[..., :self.windows_number*windows_size_]
return np.apply_along_axis(self.statistique,
axis=-1,
arr=metrique_clip.reshape((*c_, self.windows_number, windows_size_)))
def inverse_transform(self, X: np.ndarray) -> np.ndarray:
raise NotImplementedError
class MeanTransformer(BaseEstimator, TransformerMixin):
def __init__(self, windows_number: int = 300):
self.windows_number = windows_number
self.windows_size = 0
def fit(self, X: np.ndarray, y = None):
return self
def fit_transform(self, X: np.ndarray, y = None) -> np.ndarray:
self.fit(X, y)
return self.transform(X, y)
def transform(self, X: np.ndarray, y = None) -> np.ndarray:
X_ = X.copy()
*c_, size_ = X_.shape
windows_size_ = size_//self.windows_number
self.windows_size = windows_size_
metrique_clip = X_[..., :self.windows_number*windows_size_]
return np.mean(metrique_clip.reshape((*c_, self.windows_number, windows_size_)), axis=-1)
def inverse_transform(self, X: np.ndarray) -> np.ndarray:
original_size = self.windows_size*self.windows_number
X_reconstruct = np.interp(
x = np.arange(start=0, stop=original_size, step=1),
xp = np.arange(start=0, stop=original_size, step=self.windows_size),
fp = X
)
return X_reconstruct
class StdTransformer(BaseEstimator, TransformerMixin):
def __init__(self, windows_number: int = 300):
self.windows_number = windows_number
def fit(self, X: np.ndarray, y = None):
return self
def fit_transform(self, X: np.ndarray, y = None) -> np.ndarray:
self.fit(X, y)
return self.transform(X, y)
def transform(self, X: np.ndarray, y = None) -> np.ndarray:
X_ = X.copy()
*c_, size_ = X_.shape
windows_size_ = size_//self.windows_number
metrique_clip = X_[..., :self.windows_number*windows_size_]
return np.std(metrique_clip.reshape((*c_, self.windows_number, windows_size_)), axis=-1)
def inverse_transform(self, X: np.ndarray) -> np.ndarray:
raise NotImplementedError
class MfccTransformer(BaseEstimator, TransformerMixin):
def __init__(self, sr: int = 22050, N_MFCC: int = 12, hop_length: int = 1024, reshape_output: bool = True):
self.sr = sr
self.N_MFCC = N_MFCC
self.hop_length = hop_length
self.reshape_output = reshape_output
def reshape(self, X: np.ndarray) -> np.ndarray:
X_ = X.copy()
c_, *_ = X_.shape
return X_.reshape(c_, -1, self.N_MFCC)
def fit(self, X: np.ndarray, y = None):
return self
def fit_transform(self, X: np.ndarray, y = None) -> np.ndarray:
self.fit(X, y)
return self.transform(X, y)
def transform(self, X: np.ndarray, y = None) -> np.ndarray:
X_ = X.copy()
c_, *_ = X_.shape
mfcc = librosa.feature.mfcc(y=X_,
sr=self.sr,
hop_length=self.hop_length,
n_mfcc=self.N_MFCC
)
if self.reshape_output:
mfcc = mfcc.reshape(c_, -1)
return mfcc
def inverse_transform(self, X: np.ndarray) -> np.ndarray:
X_reconstruct = librosa.feature.inverse.mfcc_to_audio(
mfcc = X,
n_mels = self.N_MFCC,
)
return X_reconstruct
class MelTransformer(BaseEstimator, TransformerMixin):
def __init__(self, sr: int = 22050, N_MEL: int = 12, hop_length: int = 1024, reshape_output: bool = True):
self.sr = sr
self.N_MEL = N_MEL
self.hop_length = hop_length
self.reshape_output = reshape_output
def reshape(self, X: np.ndarray) -> np.ndarray:
X_ = X.copy()
c_, *_ = X_.shape
return X_.reshape(c_, -1, self.N_MEL)
def fit(self, X: np.ndarray, y = None):
return self
def fit_transform(self, X: np.ndarray, y = None) -> np.ndarray:
self.fit(X, y)
return self.transform(X, y)
def transform(self, X: np.ndarray, y = None) -> np.ndarray:
X_ = X.copy()
c_, *_ = X_.shape
mel = librosa.feature.melspectrogram(y=X,
sr=self.sr,
hop_length=self.hop_length,
n_mels=self.N_MEL
)
if self.reshape_output:
mel = mel.reshape(c_, -1)
return mel
def inverse_transform(self, X: np.ndarray) -> np.ndarray:
X_reconstruct = librosa.feature.inverse.mel_to_audio(
M = X,
sr = self.sr,
hop_length = self.hop_length
)
return X_reconstruct
class TorchTransform(BaseEstimator, TransformerMixin):
def __init__(self):
pass
def fit(self, X: np.ndarray, y = None):
return self
def fit_transform(self, X: np.ndarray, y = None) -> torch.Tensor:
self.fit(X, y)
return self.transform(X, y)
def transform(self, X: np.ndarray, y = None) -> torch.Tensor:
return torch.tensor(X).unsqueeze(dim=1)
def inverse_transform(self, X: torch.Tensor) -> np.ndarray:
return np.array(X.squeeze(dim=1))
class ShuffleTransformer(BaseEstimator, TransformerMixin):
def __init__(self, p: float = 0.005):
self.p = p
def fit(self, X: np.ndarray, y = None):
return self
def fit_transform(self, X: np.ndarray, y = None) -> np.ndarray:
self.fit(X, y)
return self.transform(X, y)
def transform(self, X: np.ndarray, y = None) -> np.ndarray:
will_swap = np.random.choice(X.shape[0], int(self.p*X.shape[0]))
will_swap_with = np.random.choice(X.shape[0], int(self.p*X.shape[0]))
if hasattr(X, "copy"):
X_ = X.copy()
elif hasattr(X, "clone"):
X_ = X.clone()
else:
X_ = X
X_[will_swap, ...] = X_[will_swap_with, ...]
return X_
def inverse_transform(self, X: np.ndarray) -> np.ndarray:
raise NotImplementedError