obichimav
/

SemanticSegmentationModel

Model card Files Files and versions Community

SemanticSegmentationModel / semantic-segmentation /SemanticModel /image_preprocessing.py

obichimav

Upload 42 files

8e5d8c7 verified 5 months ago

raw

history blame contribute delete

3.41 kB

	import cv2
	import numpy as np
	import albumentations as albu
	from albumentations.augmentations.geometric.resize import LongestMaxSize

	def round_pixel_dim(dimension: float) -> int:
	"""Rounds pixel dimensions consistently."""
	if abs(round(dimension) - dimension) == 0.5:
	return int(2.0 * round(dimension / 2.0))
	return int(round(dimension))

	def resize_with_padding(image, target_size, stride=32, interpolation=cv2.INTER_LINEAR):
	"""Resizes image maintaining aspect ratio and ensures dimensions are stride-compatible."""
	height, width = image.shape[:2]
	max_dimension = max(height, width)

	if ((height % stride == 0) and (width % stride == 0) and
	(max_dimension <= target_size)):
	return image

	scale = target_size / float(max(width, height))
	new_dims = tuple(round_pixel_dim(dim * scale) for dim in (height, width))
	new_height, new_width = new_dims

	new_height = ((new_height // stride + 1) * stride
	if new_height % stride != 0 else new_height)
	new_width = ((new_width // stride + 1) * stride
	if new_width % stride != 0 else new_width)

	return cv2.resize(image, (new_width, new_height), interpolation=interpolation)

	class PaddedResize(LongestMaxSize):
	def apply(self, img: np.ndarray, target_size: int = 1024,
	interpolation: int = cv2.INTER_LINEAR, **params) -> np.ndarray:
	return resize_with_padding(img, target_size=target_size, interpolation=interpolation)

	def get_training_augmentations(width=768, height=576):
	"""Configures training-time augmentations."""
	target_size = max([width, height])
	transforms = [
	albu.HorizontalFlip(p=0.5),
	albu.ShiftScaleRotate(
	scale_limit=0.5, rotate_limit=90, shift_limit=0.1, p=0.5, border_mode=0),
	albu.PadIfNeeded(min_height=target_size, min_width=target_size, always_apply=True),
	albu.RandomCrop(height=target_size, width=target_size, always_apply=True),
	albu.GaussNoise(p=0.2),
	albu.Perspective(p=0.2),
	albu.OneOf([albu.CLAHE(p=1), albu.RandomGamma(p=1)], p=0.33),
	albu.OneOf([
	albu.Sharpen(p=1),
	albu.Blur(blur_limit=3, p=1),
	albu.MotionBlur(blur_limit=3, p=1)], p=0.33),
	albu.OneOf([
	albu.RandomBrightnessContrast(p=1),
	albu.HueSaturationValue(p=1)], p=0.33),
	]
	return albu.Compose(transforms)

	def get_validation_augmentations(width=1920, height=1440, fixed_size=True):
	"""Configures validation/inference-time augmentations."""
	if fixed_size:
	transforms = [albu.Resize(height=height, width=width, always_apply=True)]
	return albu.Compose(transforms)

	target_size = max(width, height)
	transforms = [PaddedResize(max_size=target_size, always_apply=True)]
	return albu.Compose(transforms)

	def convert_to_tensor(x, **kwargs):
	"""Converts image array to PyTorch tensor format."""
	if x.ndim == 2:
	x = np.expand_dims(x, axis=-1)
	return x.transpose(2, 0, 1).astype('float32')

	def get_preprocessing_pipeline(preprocessing_fn):
	"""Builds preprocessing pipeline including normalization and tensor conversion."""
	transforms = [
	albu.Lambda(image=preprocessing_fn),
	albu.Lambda(image=convert_to_tensor, mask=convert_to_tensor),
	]
	return albu.Compose(transforms)