import os import cv2 import shutil import imageio import numpy as np from glob import glob from pathlib import Path from typing import List, Tuple, Optional def validate_dimensions(width: int, height: int, stride: int = 32) -> Tuple[int, int]: if height % stride != 0 or width % stride != 0: new_height = ((height // stride + 1) * stride if height % stride != 0 else height) new_width = ((width // stride + 1) * stride if width % stride != 0 else width) print(f'Adjusted dimensions to: {new_height}H x {new_width}W') return width, height def calc_image_size(image: np.ndarray, target_size: int) -> Tuple[int, int]: height, width = image.shape[:2] aspect_ratio = width / height if aspect_ratio >= 1: new_width = target_size new_height = int(target_size / aspect_ratio) else: new_height = target_size new_width = int(target_size * aspect_ratio) return validate_dimensions(new_width, new_height) def convert_coordinates(transform: np.ndarray, x: float, y: float) -> Tuple[float, float]: transformed = transform @ np.array([x, y, 1]) return transformed[0], transformed[1] def list_images(directory: str, mask_format: bool = False) -> List[str]: extensions = ['*.png', '*.PNG'] if mask_format else [ '*.jpg', '*.jpeg', '*.png', '*.tif', '*.tiff', '*.JPG', '*.JPEG', '*.PNG', '*.TIF', '*.TIFF' ] image_paths = [] for ext in extensions: image_paths.extend(glob(os.path.join(directory, ext))) return sorted(list(set(image_paths))) def prepare_dataset_split(root_dir: str, train_ratio: float = 0.7, generate_empty_masks: bool = False) -> None: image_dir = os.path.join(root_dir, 'Images') mask_dir = os.path.join(root_dir, 'Masks') if not all(os.path.exists(d) for d in [image_dir, mask_dir]): raise Exception("Required 'Images' and 'Masks' directories not found") image_paths = np.array(list_images(image_dir)) mask_paths = np.array(list_images(mask_dir, mask_format=True)) if generate_empty_masks: temp_dir = os.path.join(mask_dir, 'temp') create_empty_masks(image_dir, outdir=temp_dir) for mask_path in list_images(temp_dir, mask_format=True): target_path = os.path.join(mask_dir, os.path.basename(mask_path)) if not os.path.exists(target_path): shutil.move(mask_path, target_path) shutil.rmtree(temp_dir) mask_paths = np.array(list_images(mask_dir, mask_format=True)) if len(image_paths) != len(mask_paths): raise Exception(f"Unmatched images ({len(image_paths)}) and masks ({len(mask_paths)})") train_ratio = float(train_ratio) if not (0 < train_ratio <= 1): raise ValueError(f"Invalid train ratio: {train_ratio}") train_size = int(np.floor(train_ratio * len(image_paths))) indices = np.random.permutation(len(image_paths)) splits = { 'train': {'indices': indices[:train_size]}, 'val': {'indices': indices[train_size:]} if train_ratio < 1 else None } for split_name, split_data in splits.items(): if split_data is None: continue split_dir = os.path.join(root_dir, split_name) for subdir in ['Images', 'Masks']: subdir_path = os.path.join(split_dir, subdir) os.makedirs(subdir_path, exist_ok=True) sources = image_paths if subdir == 'Images' else mask_paths for idx in split_data['indices']: source = sources[idx] destination = os.path.join(subdir_path, os.path.basename(source)) shutil.copyfile(source, destination) print(f"Created {split_name} split with {len(split_data['indices'])} samples") def create_empty_masks(image_dir: str, pixel_value: int = 0, outdir: Optional[str] = None) -> str: outdir = outdir or os.path.join(image_dir, 'Masks') os.makedirs(outdir, exist_ok=True) image_paths = list_images(image_dir) print(f"Generating {len(image_paths)} empty masks...") for image_path in image_paths: image = imageio.imread(image_path) mask = np.full((image.shape[0], image.shape[1]), pixel_value, dtype='uint8') output_path = os.path.join(outdir, f"{Path(image_path).stem}.png") imageio.imwrite(output_path, mask) return outdir