import cv2 import numpy as np import albumentations as albu from albumentations.augmentations.geometric.resize import LongestMaxSize def round_pixel_dim(dimension: float) -> int: """Rounds pixel dimensions consistently.""" if abs(round(dimension) - dimension) == 0.5: return int(2.0 * round(dimension / 2.0)) return int(round(dimension)) def resize_with_padding(image, target_size, stride=32, interpolation=cv2.INTER_LINEAR): """Resizes image maintaining aspect ratio and ensures dimensions are stride-compatible.""" height, width = image.shape[:2] max_dimension = max(height, width) if ((height % stride == 0) and (width % stride == 0) and (max_dimension <= target_size)): return image scale = target_size / float(max(width, height)) new_dims = tuple(round_pixel_dim(dim * scale) for dim in (height, width)) new_height, new_width = new_dims new_height = ((new_height // stride + 1) * stride if new_height % stride != 0 else new_height) new_width = ((new_width // stride + 1) * stride if new_width % stride != 0 else new_width) return cv2.resize(image, (new_width, new_height), interpolation=interpolation) class PaddedResize(LongestMaxSize): def apply(self, img: np.ndarray, target_size: int = 1024, interpolation: int = cv2.INTER_LINEAR, **params) -> np.ndarray: return resize_with_padding(img, target_size=target_size, interpolation=interpolation) def get_training_augmentations(width=768, height=576): """Configures training-time augmentations.""" target_size = max([width, height]) transforms = [ albu.HorizontalFlip(p=0.5), albu.ShiftScaleRotate( scale_limit=0.5, rotate_limit=90, shift_limit=0.1, p=0.5, border_mode=0), albu.PadIfNeeded(min_height=target_size, min_width=target_size, always_apply=True), albu.RandomCrop(height=target_size, width=target_size, always_apply=True), albu.GaussNoise(p=0.2), albu.Perspective(p=0.2), albu.OneOf([albu.CLAHE(p=1), albu.RandomGamma(p=1)], p=0.33), albu.OneOf([ albu.Sharpen(p=1), albu.Blur(blur_limit=3, p=1), albu.MotionBlur(blur_limit=3, p=1)], p=0.33), albu.OneOf([ albu.RandomBrightnessContrast(p=1), albu.HueSaturationValue(p=1)], p=0.33), ] return albu.Compose(transforms) def get_validation_augmentations(width=1920, height=1440, fixed_size=True): """Configures validation/inference-time augmentations.""" if fixed_size: transforms = [albu.Resize(height=height, width=width, always_apply=True)] return albu.Compose(transforms) target_size = max(width, height) transforms = [PaddedResize(max_size=target_size, always_apply=True)] return albu.Compose(transforms) def convert_to_tensor(x, **kwargs): """Converts image array to PyTorch tensor format.""" if x.ndim == 2: x = np.expand_dims(x, axis=-1) return x.transpose(2, 0, 1).astype('float32') def get_preprocessing_pipeline(preprocessing_fn): """Builds preprocessing pipeline including normalization and tensor conversion.""" transforms = [ albu.Lambda(image=preprocessing_fn), albu.Lambda(image=convert_to_tensor, mask=convert_to_tensor), ] return albu.Compose(transforms)