import os import shutil from sklearn.model_selection import train_test_split def prepare_data(dataset_path): """ Prepares the dataset by splitting it into training, validation, and test sets. Parameters: dataset_path (str): Path to the dataset. """ images_path = os.path.join(dataset_path, 'images') yolo_annotations_path = os.path.join(dataset_path, 'yolo_annotations') # Paths to split datasets train_images_path = os.path.join(dataset_path, 'YOLO', 'train', 'images') val_images_path = os.path.join(dataset_path, 'YOLO', 'val', 'images') test_images_path = os.path.join(dataset_path, 'YOLO', 'test', 'images') train_annotations_path = os.path.join(dataset_path, 'YOLO', 'train', 'labels') val_annotations_path = os.path.join(dataset_path, 'YOLO', 'val', 'labels') test_annotations_path = os.path.join(dataset_path, 'YOLO', 'test', 'labels') # Create directories os.makedirs(train_images_path, exist_ok=True) os.makedirs(val_images_path, exist_ok=True) os.makedirs(test_images_path, exist_ok=True) os.makedirs(train_annotations_path, exist_ok=True) os.makedirs(val_annotations_path, exist_ok=True) os.makedirs(test_annotations_path, exist_ok=True) # Get list of all images all_images = [f for f in os.listdir(images_path) if f.endswith('.png')] train_images, val_test_images = train_test_split(all_images, test_size=0.3, random_state=42) val_images, test_images = train_test_split(val_test_images, test_size=0.33, random_state=42) # Function to copy images and annotations def copy_files(image_list, dest_image_path, dest_label_path): for image in image_list: image_path = os.path.join(images_path, image) label_path = os.path.join(yolo_annotations_path, image.replace('.png', '.txt')) shutil.copy(image_path, dest_image_path) shutil.copy(label_path, dest_label_path) # Copy files to respective directories copy_files(train_images, train_images_path, train_annotations_path) copy_files(val_images, val_images_path, val_annotations_path) copy_files(test_images, test_images_path, test_annotations_path) print("Dataset split into training, validation, and test sets.") if __name__ == "__main__": prepare_data('Dataset')