NumPyNMF-Showcase / algorithm /preprocess.py
XavierSpycy
First commit
bd67cfe
from typing import Union, Tuple
import numpy as np
class MinMaxScaler:
"""
This class scales and transforms features to [0, 1].
"""
def fit(self, X: np.ndarray) -> None:
"""
Compute the minimum and the range of the data for later scaling.
Parameters:
- X: numpy array-like, shape (n_samples, n_features)
The data used to compute the minimum and range used for later scaling.
"""
self.min_ = np.min(X, axis=0)
self.range_ = np.max(X, axis=0) - self.min_
def transform(self, X: np.ndarray) -> np.ndarray:
"""
Scale the data using the values computed during the fit method.
Parameters:
- X: numpy array-like, shape (n_samples, n_features)
Input data that needs to be scaled.
Returns:
- numpy array, shape (n_samples, n_features)
Transformed data.
"""
return (X - self.min_) / self.range_
def fit_transform(self, X: np.ndarray) -> np.ndarray:
"""
Fit to the data and then transform it.
Parameters:
- X: numpy array-like, shape (n_samples, n_features)
Input data that needs to be scaled and transformed.
Returns:
- numpy array, shape (n_samples, n_features)
Transformed data.
"""
self.fit(X)
return self.transform(X)
class StandardScaler:
"""
This class standardizes features by removing the mean and scaling to unit variance.
"""
def fit(self, X: np.ndarray) -> None:
"""
Compute the mean and standard deviation of the data for later standardization.
Parameters:
- X: numpy array-like, shape (n_samples, n_features)
The data used to compute the mean and standard deviation used for later standardization.
"""
self.mean_ = np.mean(X, axis=0)
self.std_ = np.std(X, axis=0)
def transform(self, X: np.ndarray) -> np.ndarray:
"""
Standardize the data using the values computed during the fit method.
Parameters:
- X: numpy array-like, shape (n_samples, n_features)
Input data that needs to be standardized.
Returns:
- numpy array, shape (n_samples, n_features)
Transformed data.
"""
return (X - self.mean_) / self.std_
def fit_transform(self, X: np.ndarray) -> np.ndarray:
"""
Fit to the data and then transform it.
Parameters:
- X: numpy array-like, shape (n_samples, n_features)
Input data that needs to be standardized and transformed.
Returns:
- numpy array, shape (n_samples, n_features)
Transformed data.
"""
self.fit(X)
return self.transform(X)
class NoiseAdder:
"""
This class adds noise to data.
"""
def __init__(self, random_state: Union[int, np.random.RandomState, None]=None) -> None:
"""
Initializes the NoiseAdder with a random state and noise parameters.
Parameters:
- random_state (int or RandomState instance or None): Controls the randomness. If int, is used as seed for RandomState.
- noise_params (dict): Additional noise parameters.
"""
self.rng = np.random.RandomState(random_state)
def add_uniform_noise(self, X_hat: np.ndarray, noise_level: int=0.1) -> Tuple[np.ndarray, np.ndarray]:
"""
Add uniform random noise to data.
Parameters:
- X_hat (numpy array): Original data.
Returns:
- Numpy array of uniform noise.
- Numpy array with added uniform noise.
"""
a, b = 0, 1
# Generate noise
X_noise = self.rng.uniform(a, b, size=X_hat.shape) * noise_level * (np.max(X_hat) - np.min(X_hat))
return X_noise, X_hat + X_noise
def add_gaussian_noise(self, X_hat, noise_level=0.1):
"""
Add Gaussian noise to data.
Parameters:
- X_hat (numpy array): Original data.
- mean (float): Mean of the Gaussian distribution.
- std (float): Standard deviation of the Gaussian distribution.
Returns:
- Numpy array of Gaussian noise.
- Numpy array with added Gaussian noise.
"""
mean, std = 0, 1
# Generate noise
X_noise = self.rng.normal(mean, std, size=X_hat.shape) * noise_level * (np.max(X_hat) - np.min(X_hat))
return X_noise, X_hat + X_noise
def add_laplacian_noise(self, X_hat, noise_level=0.1):
"""
Add Laplacian noise to data.
Parameters:
- X_hat (numpy array): Original data.
- mu (float): Location parameter for the Laplacian distribution.
- lambd (float): Scale (diversity) parameter for the Laplacian distribution.
Returns:
- Numpy array of Laplacian noise.
- Numpy array with added Laplacian noise.
"""
# Initialize parameters
mu, lambd = 0, 1
# Generate noise
X_noise = self.rng.laplace(mu, lambd, size=X_hat.shape) * noise_level * np.max(X_hat)
return X_noise, X_hat + X_noise
def add_block_noise(self, X_hat: np.ndarray, img_width: int, block_size: int=10) -> Tuple[np.ndarray, np.ndarray]:
"""
Add block noise to multiple flattened image samples.
Parameters:
- X (numpy array): Array of shape (m, n) where m is flattened image length and n is number of samples
- img_width (int): width of the original image
- block_size (int): size of the block to occlude
Returns:
- Numpy array of noise added to each sample
- Numpy array with added block noise for all samples
"""
# Initalize parameters
X = X_hat.copy()
m, n_samples = X.shape
X_noise = np.zeros((m, n_samples), dtype=np.uint8)
# For each sample in X
for i in range(n_samples):
sample = X[:, i]
# Reshape the flattened array to 2D
img_2d = sample.reshape(-1, img_width)
height, width = img_2d.shape
# Ensure the block size isn't larger than the image dimensions
block_size = min(block_size, width, height)
# Generate a random starting point for the block
x_start = self.rng.randint(0, width - block_size)
y_start = self.rng.randint(0, height - block_size)
# Add block noise
img_2d[y_start:y_start+block_size, x_start:x_start+block_size] = 255
# Store the noise block to noise array
noise_2d = np.zeros((height, width), dtype=np.uint8)
noise_2d[y_start:y_start+block_size, x_start:x_start+block_size] = 255
X_noise[:, i] = noise_2d.ravel()
# Flatten the array back to 1D and store back in X
X[:, i] = img_2d.ravel()
return X_noise, X
def add_salt_and_pepper_noise(self, X_hat, noise_level=0.02, salt_ratio=0.5) -> Tuple[np.ndarray, np.ndarray]:
"""
Add "salt and pepper" noise to data.
Parameters:
- X_hat (numpy array): Original data.
- amount (float): Proportion of image pixels to be replaced.
- salt_ratio (float): Proportion of replaced pixels that are "salt".
Returns:
- Numpy array of salt and pepper noise.
- Numpy array with added salt and pepper noise.
"""
# Initialize parameters
X = X_hat.copy()
X_noise = np.zeros_like(X)
# Get the total number of pixels that should be replaced by noise
total_pixels = X.size
num_noise_pixels = int(total_pixels * noise_level)
# Separate the number of salt and pepper pixels based on the salt_ratio
num_salt = int(num_noise_pixels * salt_ratio)
num_pepper = num_noise_pixels - num_salt
# Directly generate the noise coordinates without overlap
noise_coords = self.rng.choice(total_pixels, num_noise_pixels, replace=False)
salt_coords = noise_coords[:num_salt]
pepper_coords = noise_coords[num_salt:]
# Convert the 1D noise coordinates back to tuple of N-dim coordinates
salt_coords = np.unravel_index(salt_coords, X.shape)
pepper_coords = np.unravel_index(pepper_coords, X.shape)
# Set salt and pepper pixels in the image
max_pixel_val = np.max(X)
X_noise[salt_coords] = max_pixel_val
X_noise[pepper_coords] = 0
X[salt_coords] = max_pixel_val
X[pepper_coords] = 0
return X_noise, X