|
|
from timm.data.auto_augment import rand_augment_transform, augment_and_mix_transform, auto_augment_transform |
|
|
from timm.data.transforms import RandomResizedCropAndInterpolation |
|
|
from PIL import Image |
|
|
import torchvision.transforms as transforms |
|
|
import cv2 |
|
|
import numpy as np |
|
|
import torchvision.transforms.functional as F |
|
|
|
|
|
|
|
|
|
|
|
class JPEGCompression: |
|
|
def __init__(self, quality=10, p=0.3): |
|
|
self.quality = quality |
|
|
self.p = p |
|
|
|
|
|
def __call__(self, img): |
|
|
if np.random.rand() < self.p: |
|
|
img_np = np.array(img) |
|
|
_, buffer = cv2.imencode('.jpg', img_np[:, :, ::-1], [int(cv2.IMWRITE_JPEG_QUALITY), self.quality]) |
|
|
jpeg_img = cv2.imdecode(buffer, 1) |
|
|
return Image.fromarray(jpeg_img[:, :, ::-1]) |
|
|
return img |
|
|
|
|
|
|
|
|
|
|
|
def transforms_imagenet_train( |
|
|
img_size=(224, 224), |
|
|
scale=(0.08, 1.0), |
|
|
ratio=(3./4., 4./3.), |
|
|
hflip=0.5, |
|
|
vflip=0.5, |
|
|
auto_augment='rand-m9-mstd0.5-inc1', |
|
|
interpolation='random', |
|
|
mean=(0.485, 0.456, 0.406), |
|
|
jpeg_compression = 0, |
|
|
): |
|
|
scale = tuple(scale or (0.08, 1.0)) |
|
|
ratio = tuple(ratio or (3./4., 4./3.)) |
|
|
|
|
|
primary_tfl = [ |
|
|
RandomResizedCropAndInterpolation(img_size, scale=scale, ratio=ratio, interpolation=interpolation)] |
|
|
if hflip > 0.: |
|
|
primary_tfl += [transforms.RandomHorizontalFlip(p=hflip)] |
|
|
if vflip > 0.: |
|
|
primary_tfl += [transforms.RandomVerticalFlip(p=vflip)] |
|
|
|
|
|
secondary_tfl = [] |
|
|
if auto_augment: |
|
|
assert isinstance(auto_augment, str) |
|
|
|
|
|
if isinstance(img_size, (tuple, list)): |
|
|
img_size_min = min(img_size) |
|
|
else: |
|
|
img_size_min = img_size |
|
|
|
|
|
aa_params = dict( |
|
|
translate_const=int(img_size_min * 0.45), |
|
|
img_mean=tuple([min(255, round(255 * x)) for x in mean]), |
|
|
) |
|
|
if auto_augment.startswith('rand'): |
|
|
secondary_tfl += [rand_augment_transform(auto_augment, aa_params)] |
|
|
elif auto_augment.startswith('augmix'): |
|
|
aa_params['translate_pct'] = 0.3 |
|
|
secondary_tfl += [augment_and_mix_transform(auto_augment, aa_params)] |
|
|
else: |
|
|
secondary_tfl += [auto_augment_transform(auto_augment, aa_params)] |
|
|
|
|
|
if jpeg_compression == 1: |
|
|
secondary_tfl += [JPEGCompression(quality=10, p=0.3)] |
|
|
|
|
|
final_tfl = [transforms.ToTensor()] |
|
|
|
|
|
return transforms.Compose(primary_tfl + secondary_tfl + final_tfl) |
|
|
|
|
|
|
|
|
|
|
|
def create_transforms_inference(h=256, w=256): |
|
|
transformer = transforms.Compose([ |
|
|
transforms.Resize(size=(h, w)), |
|
|
transforms.ToTensor(), |
|
|
]) |
|
|
|
|
|
return transformer |
|
|
|
|
|
|
|
|
def create_transforms_inference1(h=256, w=256): |
|
|
transformer = transforms.Compose([ |
|
|
transforms.Lambda(lambda img: F.rotate(img, angle=90)), |
|
|
transforms.Resize(size=(h, w)), |
|
|
transforms.ToTensor(), |
|
|
]) |
|
|
|
|
|
return transformer |
|
|
|
|
|
|
|
|
def create_transforms_inference2(h=256, w=256): |
|
|
transformer = transforms.Compose([ |
|
|
transforms.Lambda(lambda img: F.rotate(img, angle=180)), |
|
|
transforms.Resize(size=(h, w)), |
|
|
transforms.ToTensor(), |
|
|
]) |
|
|
|
|
|
return transformer |
|
|
|
|
|
|
|
|
def create_transforms_inference3(h=256, w=256): |
|
|
transformer = transforms.Compose([ |
|
|
transforms.Lambda(lambda img: F.rotate(img, angle=270)), |
|
|
transforms.Resize(size=(h, w)), |
|
|
transforms.ToTensor(), |
|
|
]) |
|
|
|
|
|
return transformer |
|
|
|
|
|
|
|
|
def create_transforms_inference4(h=256, w=256): |
|
|
transformer = transforms.Compose([ |
|
|
transforms.Lambda(lambda img: F.hflip(img)), |
|
|
transforms.Resize(size=(h, w)), |
|
|
transforms.ToTensor(), |
|
|
]) |
|
|
|
|
|
return transformer |
|
|
|
|
|
|
|
|
def create_transforms_inference5(h=256, w=256): |
|
|
transformer = transforms.Compose([ |
|
|
transforms.Lambda(lambda img: F.vflip(img)), |
|
|
transforms.Resize(size=(h, w)), |
|
|
transforms.ToTensor(), |
|
|
]) |
|
|
|
|
|
return transformer |
|
|
|