|
import cv2 |
|
import numpy as np |
|
import albumentations as albu |
|
from albumentations.augmentations.geometric.resize import LongestMaxSize |
|
|
|
def round_pixel_dim(dimension: float) -> int: |
|
"""Rounds pixel dimensions consistently.""" |
|
if abs(round(dimension) - dimension) == 0.5: |
|
return int(2.0 * round(dimension / 2.0)) |
|
return int(round(dimension)) |
|
|
|
def resize_with_padding(image, target_size, stride=32, interpolation=cv2.INTER_LINEAR): |
|
"""Resizes image maintaining aspect ratio and ensures dimensions are stride-compatible.""" |
|
height, width = image.shape[:2] |
|
max_dimension = max(height, width) |
|
|
|
if ((height % stride == 0) and (width % stride == 0) and |
|
(max_dimension <= target_size)): |
|
return image |
|
|
|
scale = target_size / float(max(width, height)) |
|
new_dims = tuple(round_pixel_dim(dim * scale) for dim in (height, width)) |
|
new_height, new_width = new_dims |
|
|
|
new_height = ((new_height // stride + 1) * stride |
|
if new_height % stride != 0 else new_height) |
|
new_width = ((new_width // stride + 1) * stride |
|
if new_width % stride != 0 else new_width) |
|
|
|
return cv2.resize(image, (new_width, new_height), interpolation=interpolation) |
|
|
|
class PaddedResize(LongestMaxSize): |
|
def apply(self, img: np.ndarray, target_size: int = 1024, |
|
interpolation: int = cv2.INTER_LINEAR, **params) -> np.ndarray: |
|
return resize_with_padding(img, target_size=target_size, interpolation=interpolation) |
|
|
|
def get_training_augmentations(width=768, height=576): |
|
"""Configures training-time augmentations.""" |
|
target_size = max([width, height]) |
|
transforms = [ |
|
albu.HorizontalFlip(p=0.5), |
|
albu.ShiftScaleRotate( |
|
scale_limit=0.5, rotate_limit=90, shift_limit=0.1, p=0.5, border_mode=0), |
|
albu.PadIfNeeded(min_height=target_size, min_width=target_size, always_apply=True), |
|
albu.RandomCrop(height=target_size, width=target_size, always_apply=True), |
|
albu.GaussNoise(p=0.2), |
|
albu.Perspective(p=0.2), |
|
albu.OneOf([albu.CLAHE(p=1), albu.RandomGamma(p=1)], p=0.33), |
|
albu.OneOf([ |
|
albu.Sharpen(p=1), |
|
albu.Blur(blur_limit=3, p=1), |
|
albu.MotionBlur(blur_limit=3, p=1)], p=0.33), |
|
albu.OneOf([ |
|
albu.RandomBrightnessContrast(p=1), |
|
albu.HueSaturationValue(p=1)], p=0.33), |
|
] |
|
return albu.Compose(transforms) |
|
|
|
def get_validation_augmentations(width=1920, height=1440, fixed_size=True): |
|
"""Configures validation/inference-time augmentations.""" |
|
if fixed_size: |
|
transforms = [albu.Resize(height=height, width=width, always_apply=True)] |
|
return albu.Compose(transforms) |
|
|
|
target_size = max(width, height) |
|
transforms = [PaddedResize(max_size=target_size, always_apply=True)] |
|
return albu.Compose(transforms) |
|
|
|
def convert_to_tensor(x, **kwargs): |
|
"""Converts image array to PyTorch tensor format.""" |
|
if x.ndim == 2: |
|
x = np.expand_dims(x, axis=-1) |
|
return x.transpose(2, 0, 1).astype('float32') |
|
|
|
def get_preprocessing_pipeline(preprocessing_fn): |
|
"""Builds preprocessing pipeline including normalization and tensor conversion.""" |
|
transforms = [ |
|
albu.Lambda(image=preprocessing_fn), |
|
albu.Lambda(image=convert_to_tensor, mask=convert_to_tensor), |
|
] |
|
return albu.Compose(transforms) |