Spaces:
Running
Running
import cv2 | |
import random | |
import numpy as np | |
__all__ = ['Compose', 'Normalize', 'CenterCrop', 'RgbToGray', 'RandomCrop', | |
'HorizontalFlip', 'AddNoise', 'NormalizeUtterance'] | |
class Compose(object): | |
"""Compose several preprocess together. | |
Args: | |
preprocess (list of ``Preprocess`` objects): list of preprocess to compose. | |
""" | |
# preprecess ([preprocess]) : dataloaders.py에서 사용됨 | |
# preprocessing['train'] = Compose([ | |
# Normalize( 0.0,255.0 ), | |
# RandomCrop(crop_size), | |
# HorizontalFlip(0.5), | |
# Normalize(mean, std) ]) | |
def __init__(self, preprocess): | |
self.preprocess = preprocess | |
def __call__(self, sample): | |
for t in self.preprocess: | |
sample = t(sample) | |
return sample # preprocess에 담긴 각 augmentation 전처리가 sample에 담겨 반환된다. | |
def __repr__(self): # __repr__() : 괄호 안에 있는 것을 문자열로 반환 | |
format_string = self.__class__.__name__ + '(' | |
for t in self.preprocess: | |
format_string += '\n' | |
format_string += ' {0}'.format(t) | |
format_string += '\n)' | |
return format_string # 클래스명, 전처리명 등을 괄호 안에 출력 | |
class RgbToGray(object): | |
"""Convert image to grayscale. | |
Converts a numpy.ndarray (H x W x C) in the range | |
[0, 255] to a numpy.ndarray of shape (H x W x C) in the range [0.0, 1.0]. | |
""" | |
def __call__(self, frames): | |
""" | |
Args: | |
img (numpy.ndarray): Image to be converted to gray. | |
Returns: | |
numpy.ndarray: grey image | |
""" | |
frames = np.stack([cv2.cvtColor(_, cv2.COLOR_RGB2GRAY) for _ in frames], axis=0) | |
return frames | |
def __repr__(self): | |
return self.__class__.__name__ + '()' | |
class Normalize(object): | |
"""Normalize a ndarray image with mean and standard deviation. | |
""" | |
def __init__(self, mean, std): | |
self.mean = mean | |
self.std = std | |
def __call__(self, frames): | |
""" | |
Args: | |
tensor (Tensor): Tensor image of size (C, H, W) to be normalized. | |
Returns: | |
Tensor: Normalized Tensor image. | |
""" | |
frames = (frames - self.mean) / self.std # 편차를 표준 편차로 나눈 값 : z-score normalization | |
return frames | |
def __repr__(self): | |
return self.__class__.__name__+'(mean={0}, std={1})'.format(self.mean, self.std) | |
class CenterCrop(object): | |
"""Crop the given image at the center | |
""" | |
def __init__(self, size): | |
self.size = size | |
def __call__(self, frames): | |
""" | |
Args: | |
img (numpy.ndarray): Images to be cropped. | |
Returns: | |
numpy.ndarray: Cropped image. | |
""" | |
t, h, w = frames.shape | |
th, tw = self.size # 자르려고 지정한 높이와 넓이 사이즈 | |
delta_w = int(round((w - tw))/2.) | |
delta_h = int(round((h - th))/2.) | |
frames = frames[:, delta_h:delta_h+th, delta_w:delta_w+tw] | |
return frames # center crop된 이미지 반환 (np.array) | |
class RandomCrop(object): | |
"""Crop the given image at the center | |
""" | |
def __init__(self, size): | |
self.size = size | |
def __call__(self, frames): | |
""" | |
Args: | |
img (numpy.ndarray): Images to be cropped. | |
Returns: | |
numpy.ndarray: Cropped image. | |
""" | |
t, h, w = frames.shape # size: 96,96 | |
th, tw = self.size | |
delta_w = random.randint(0, w-tw) | |
delta_h = random.randint(0, h-th) | |
frames = frames[:, delta_h:delta_h+th, delta_w:delta_w+tw] | |
return frames # random crop된 이미지 반환 (np.array) | |
def __repr__(self): | |
return self.__class__.__name__ + '(size={0})'.format(self.size) # random crop된 사이즈를 반환 | |
class HorizontalFlip(object): # HorizontalFlip(비율값 입) | |
"""Flip image horizontally. | |
""" | |
def __init__(self, flip_ratio): | |
self.flip_ratio = flip_ratio | |
def __call__(self, frames): | |
""" | |
Args: | |
img (numpy.ndarray): Images to be flipped with a probability flip_ratio | |
Returns: | |
numpy.ndarray: Cropped image. | |
""" | |
t, h, w = frames.shape | |
if random.random() < self.flip_ratio: | |
for index in range(t): | |
frames[index] = cv2.flip(frames[index], 1) | |
return frames | |
class NormalizeUtterance(): | |
"""Normalize per raw audio by removing the mean and divided by the standard deviation | |
""" | |
# z-score 정규화를 실행 | |
def __call__(self, signal): | |
signal_std = 0. if np.std(signal)==0. else np.std(signal) | |
signal_mean = np.mean(signal) | |
return (signal - signal_mean) / signal_std | |
class AddNoise(object): | |
"""Add SNR noise [-1, 1] | |
""" | |
# snr(signal-to-noise ratio) : 신호 대 잡음 비, 이 값이 클수록 | |
def __init__(self, noise, snr_levels=[-5, 0, 5, 10, 15, 20, 9999]): | |
assert noise.dtype in [np.float32, np.float64], "noise only supports float data type" # noise는 dtype만 지원한다. | |
self.noise = noise | |
self.snr_levels = snr_levels | |
def get_power(self, clip): | |
clip2 = clip.copy() | |
clip2 = clip2 **2 | |
return np.sum(clip2) / (len(clip2) * 1.0) | |
def __call__(self, signal): | |
assert signal.dtype in [np.float32, np.float64], "signal only supports float32 data type" # signal은 dtype만 지원한다. | |
snr_target = random.choice(self.snr_levels) | |
if snr_target == 9999: | |
return signal | |
else: | |
# -- get noise | |
start_idx = random.randint(0, len(self.noise)-len(signal)) | |
noise_clip = self.noise[start_idx:start_idx+len(signal)] | |
sig_power = self.get_power(signal) | |
noise_clip_power = self.get_power(noise_clip) | |
factor = (sig_power / noise_clip_power ) / (10**(snr_target / 10.0)) | |
desired_signal = (signal + noise_clip*np.sqrt(factor)).astype(np.float32) | |
return desired_signal | |