Spaces:

XavierSpycy
/

NumPyNMF-Showcase

Sleeping

NumPyNMF-Showcase / algorithm /preprocess.py

XavierSpycy

First commit

bd67cfe about 1 year ago

8.7 kB

	from typing import Union, Tuple

	import numpy as np

	class MinMaxScaler:
	"""
	This class scales and transforms features to [0, 1].
	"""
	def fit(self, X: np.ndarray) -> None:
	"""
	Compute the minimum and the range of the data for later scaling.

	Parameters:
	- X: numpy array-like, shape (n_samples, n_features)
	The data used to compute the minimum and range used for later scaling.
	"""
	self.min_ = np.min(X, axis=0)
	self.range_ = np.max(X, axis=0) - self.min_


	def transform(self, X: np.ndarray) -> np.ndarray:
	"""
	Scale the data using the values computed during the fit method.

	Parameters:
	- X: numpy array-like, shape (n_samples, n_features)
	Input data that needs to be scaled.

	Returns:
	- numpy array, shape (n_samples, n_features)
	Transformed data.
	"""
	return (X - self.min_) / self.range_

	def fit_transform(self, X: np.ndarray) -> np.ndarray:
	"""
	Fit to the data and then transform it.

	Parameters:
	- X: numpy array-like, shape (n_samples, n_features)
	Input data that needs to be scaled and transformed.

	Returns:
	- numpy array, shape (n_samples, n_features)
	Transformed data.
	"""
	self.fit(X)
	return self.transform(X)

	class StandardScaler:
	"""
	This class standardizes features by removing the mean and scaling to unit variance.
	"""
	def fit(self, X: np.ndarray) -> None:
	"""
	Compute the mean and standard deviation of the data for later standardization.

	Parameters:
	- X: numpy array-like, shape (n_samples, n_features)
	The data used to compute the mean and standard deviation used for later standardization.
	"""
	self.mean_ = np.mean(X, axis=0)
	self.std_ = np.std(X, axis=0)

	def transform(self, X: np.ndarray) -> np.ndarray:
	"""
	Standardize the data using the values computed during the fit method.

	Parameters:
	- X: numpy array-like, shape (n_samples, n_features)
	Input data that needs to be standardized.

	Returns:
	- numpy array, shape (n_samples, n_features)
	Transformed data.
	"""
	return (X - self.mean_) / self.std_

	def fit_transform(self, X: np.ndarray) -> np.ndarray:
	"""
	Fit to the data and then transform it.

	Parameters:
	- X: numpy array-like, shape (n_samples, n_features)
	Input data that needs to be standardized and transformed.

	Returns:
	- numpy array, shape (n_samples, n_features)
	Transformed data.
	"""
	self.fit(X)
	return self.transform(X)

	class NoiseAdder:
	"""
	This class adds noise to data.
	"""
	def __init__(self, random_state: Union[int, np.random.RandomState, None]=None) -> None:
	"""
	Initializes the NoiseAdder with a random state and noise parameters.

	Parameters:
	- random_state (int or RandomState instance or None): Controls the randomness. If int, is used as seed for RandomState.
	- noise_params (dict): Additional noise parameters.
	"""
	self.rng = np.random.RandomState(random_state)

	def add_uniform_noise(self, X_hat: np.ndarray, noise_level: int=0.1) -> Tuple[np.ndarray, np.ndarray]:
	"""
	Add uniform random noise to data.

	Parameters:
	- X_hat (numpy array): Original data.

	Returns:
	- Numpy array of uniform noise.
	- Numpy array with added uniform noise.
	"""
	a, b = 0, 1
	# Generate noise
	X_noise = self.rng.uniform(a, b, size=X_hat.shape) * noise_level * (np.max(X_hat) - np.min(X_hat))
	return X_noise, X_hat + X_noise

	def add_gaussian_noise(self, X_hat, noise_level=0.1):
	"""
	Add Gaussian noise to data.

	Parameters:
	- X_hat (numpy array): Original data.
	- mean (float): Mean of the Gaussian distribution.
	- std (float): Standard deviation of the Gaussian distribution.

	Returns:
	- Numpy array of Gaussian noise.
	- Numpy array with added Gaussian noise.
	"""
	mean, std = 0, 1
	# Generate noise
	X_noise = self.rng.normal(mean, std, size=X_hat.shape) * noise_level * (np.max(X_hat) - np.min(X_hat))
	return X_noise, X_hat + X_noise

	def add_laplacian_noise(self, X_hat, noise_level=0.1):
	"""
	Add Laplacian noise to data.

	Parameters:
	- X_hat (numpy array): Original data.
	- mu (float): Location parameter for the Laplacian distribution.
	- lambd (float): Scale (diversity) parameter for the Laplacian distribution.

	Returns:
	- Numpy array of Laplacian noise.
	- Numpy array with added Laplacian noise.
	"""
	# Initialize parameters
	mu, lambd = 0, 1
	# Generate noise
	X_noise = self.rng.laplace(mu, lambd, size=X_hat.shape) * noise_level * np.max(X_hat)
	return X_noise, X_hat + X_noise

	def add_block_noise(self, X_hat: np.ndarray, img_width: int, block_size: int=10) -> Tuple[np.ndarray, np.ndarray]:
	"""
	Add block noise to multiple flattened image samples.

	Parameters:
	- X (numpy array): Array of shape (m, n) where m is flattened image length and n is number of samples
	- img_width (int): width of the original image
	- block_size (int): size of the block to occlude

	Returns:
	- Numpy array of noise added to each sample
	- Numpy array with added block noise for all samples
	"""
	# Initalize parameters
	X = X_hat.copy()
	m, n_samples = X.shape
	X_noise = np.zeros((m, n_samples), dtype=np.uint8)
	# For each sample in X
	for i in range(n_samples):
	sample = X[:, i]
	# Reshape the flattened array to 2D
	img_2d = sample.reshape(-1, img_width)
	height, width = img_2d.shape
	# Ensure the block size isn't larger than the image dimensions
	block_size = min(block_size, width, height)
	# Generate a random starting point for the block
	x_start = self.rng.randint(0, width - block_size)
	y_start = self.rng.randint(0, height - block_size)
	# Add block noise
	img_2d[y_start:y_start+block_size, x_start:x_start+block_size] = 255
	# Store the noise block to noise array
	noise_2d = np.zeros((height, width), dtype=np.uint8)
	noise_2d[y_start:y_start+block_size, x_start:x_start+block_size] = 255
	X_noise[:, i] = noise_2d.ravel()
	# Flatten the array back to 1D and store back in X
	X[:, i] = img_2d.ravel()
	return X_noise, X

	def add_salt_and_pepper_noise(self, X_hat, noise_level=0.02, salt_ratio=0.5) -> Tuple[np.ndarray, np.ndarray]:
	"""
	Add "salt and pepper" noise to data.

	Parameters:
	- X_hat (numpy array): Original data.
	- amount (float): Proportion of image pixels to be replaced.
	- salt_ratio (float): Proportion of replaced pixels that are "salt".

	Returns:
	- Numpy array of salt and pepper noise.
	- Numpy array with added salt and pepper noise.
	"""
	# Initialize parameters
	X = X_hat.copy()
	X_noise = np.zeros_like(X)
	# Get the total number of pixels that should be replaced by noise
	total_pixels = X.size
	num_noise_pixels = int(total_pixels * noise_level)
	# Separate the number of salt and pepper pixels based on the salt_ratio
	num_salt = int(num_noise_pixels * salt_ratio)
	num_pepper = num_noise_pixels - num_salt
	# Directly generate the noise coordinates without overlap
	noise_coords = self.rng.choice(total_pixels, num_noise_pixels, replace=False)
	salt_coords = noise_coords[:num_salt]
	pepper_coords = noise_coords[num_salt:]
	# Convert the 1D noise coordinates back to tuple of N-dim coordinates
	salt_coords = np.unravel_index(salt_coords, X.shape)
	pepper_coords = np.unravel_index(pepper_coords, X.shape)
	# Set salt and pepper pixels in the image
	max_pixel_val = np.max(X)
	X_noise[salt_coords] = max_pixel_val
	X_noise[pepper_coords] = 0
	X[salt_coords] = max_pixel_val
	X[pepper_coords] = 0
	return X_noise, X