| import os | |
| import cv2 | |
| import shutil | |
| import imageio | |
| import numpy as np | |
| from glob import glob | |
| from pathlib import Path | |
| from typing import List, Tuple, Optional | |
| def validate_dimensions(width: int, height: int, stride: int = 32) -> Tuple[int, int]: | |
| if height % stride != 0 or width % stride != 0: | |
| new_height = ((height // stride + 1) * stride | |
| if height % stride != 0 else height) | |
| new_width = ((width // stride + 1) * stride | |
| if width % stride != 0 else width) | |
| print(f'Adjusted dimensions to: {new_height}H x {new_width}W') | |
| return width, height | |
| def calc_image_size(image: np.ndarray, target_size: int) -> Tuple[int, int]: | |
| height, width = image.shape[:2] | |
| aspect_ratio = width / height | |
| if aspect_ratio >= 1: | |
| new_width = target_size | |
| new_height = int(target_size / aspect_ratio) | |
| else: | |
| new_height = target_size | |
| new_width = int(target_size * aspect_ratio) | |
| return validate_dimensions(new_width, new_height) | |
| def convert_coordinates(transform: np.ndarray, x: float, y: float) -> Tuple[float, float]: | |
| transformed = transform @ np.array([x, y, 1]) | |
| return transformed[0], transformed[1] | |
| def list_images(directory: str, mask_format: bool = False) -> List[str]: | |
| extensions = ['*.png', '*.PNG'] if mask_format else [ | |
| '*.jpg', '*.jpeg', '*.png', '*.tif', '*.tiff', | |
| '*.JPG', '*.JPEG', '*.PNG', '*.TIF', '*.TIFF' | |
| ] | |
| image_paths = [] | |
| for ext in extensions: | |
| image_paths.extend(glob(os.path.join(directory, ext))) | |
| return sorted(list(set(image_paths))) | |
| def prepare_dataset_split(root_dir: str, train_ratio: float = 0.7, | |
| generate_empty_masks: bool = False) -> None: | |
| image_dir = os.path.join(root_dir, 'Images') | |
| mask_dir = os.path.join(root_dir, 'Masks') | |
| if not all(os.path.exists(d) for d in [image_dir, mask_dir]): | |
| raise Exception("Required 'Images' and 'Masks' directories not found") | |
| image_paths = np.array(list_images(image_dir)) | |
| mask_paths = np.array(list_images(mask_dir, mask_format=True)) | |
| if generate_empty_masks: | |
| temp_dir = os.path.join(mask_dir, 'temp') | |
| create_empty_masks(image_dir, outdir=temp_dir) | |
| for mask_path in list_images(temp_dir, mask_format=True): | |
| target_path = os.path.join(mask_dir, os.path.basename(mask_path)) | |
| if not os.path.exists(target_path): | |
| shutil.move(mask_path, target_path) | |
| shutil.rmtree(temp_dir) | |
| mask_paths = np.array(list_images(mask_dir, mask_format=True)) | |
| if len(image_paths) != len(mask_paths): | |
| raise Exception(f"Unmatched images ({len(image_paths)}) and masks ({len(mask_paths)})") | |
| train_ratio = float(train_ratio) | |
| if not (0 < train_ratio <= 1): | |
| raise ValueError(f"Invalid train ratio: {train_ratio}") | |
| train_size = int(np.floor(train_ratio * len(image_paths))) | |
| indices = np.random.permutation(len(image_paths)) | |
| splits = { | |
| 'train': {'indices': indices[:train_size]}, | |
| 'val': {'indices': indices[train_size:]} if train_ratio < 1 else None | |
| } | |
| for split_name, split_data in splits.items(): | |
| if split_data is None: | |
| continue | |
| split_dir = os.path.join(root_dir, split_name) | |
| for subdir in ['Images', 'Masks']: | |
| subdir_path = os.path.join(split_dir, subdir) | |
| os.makedirs(subdir_path, exist_ok=True) | |
| sources = image_paths if subdir == 'Images' else mask_paths | |
| for idx in split_data['indices']: | |
| source = sources[idx] | |
| destination = os.path.join(subdir_path, os.path.basename(source)) | |
| shutil.copyfile(source, destination) | |
| print(f"Created {split_name} split with {len(split_data['indices'])} samples") | |
| def create_empty_masks(image_dir: str, pixel_value: int = 0, | |
| outdir: Optional[str] = None) -> str: | |
| outdir = outdir or os.path.join(image_dir, 'Masks') | |
| os.makedirs(outdir, exist_ok=True) | |
| image_paths = list_images(image_dir) | |
| print(f"Generating {len(image_paths)} empty masks...") | |
| for image_path in image_paths: | |
| image = imageio.imread(image_path) | |
| mask = np.full((image.shape[0], image.shape[1]), pixel_value, dtype='uint8') | |
| output_path = os.path.join(outdir, | |
| f"{Path(image_path).stem}.png") | |
| imageio.imwrite(output_path, mask) | |
| return outdir |