licence_plate_detection_with_OCR / data_preparation.py
upadhyaysuraj's picture
Upload 11 files
57d8fc8 verified
raw
history blame
2.32 kB
import os
import shutil
from sklearn.model_selection import train_test_split
def prepare_data(dataset_path):
"""
Prepares the dataset by splitting it into training, validation, and test sets.
Parameters:
dataset_path (str): Path to the dataset.
"""
images_path = os.path.join(dataset_path, 'images')
yolo_annotations_path = os.path.join(dataset_path, 'yolo_annotations')
# Paths to split datasets
train_images_path = os.path.join(dataset_path, 'YOLO', 'train', 'images')
val_images_path = os.path.join(dataset_path, 'YOLO', 'val', 'images')
test_images_path = os.path.join(dataset_path, 'YOLO', 'test', 'images')
train_annotations_path = os.path.join(dataset_path, 'YOLO', 'train', 'labels')
val_annotations_path = os.path.join(dataset_path, 'YOLO', 'val', 'labels')
test_annotations_path = os.path.join(dataset_path, 'YOLO', 'test', 'labels')
# Create directories
os.makedirs(train_images_path, exist_ok=True)
os.makedirs(val_images_path, exist_ok=True)
os.makedirs(test_images_path, exist_ok=True)
os.makedirs(train_annotations_path, exist_ok=True)
os.makedirs(val_annotations_path, exist_ok=True)
os.makedirs(test_annotations_path, exist_ok=True)
# Get list of all images
all_images = [f for f in os.listdir(images_path) if f.endswith('.png')]
train_images, val_test_images = train_test_split(all_images, test_size=0.3, random_state=42)
val_images, test_images = train_test_split(val_test_images, test_size=0.33, random_state=42)
# Function to copy images and annotations
def copy_files(image_list, dest_image_path, dest_label_path):
for image in image_list:
image_path = os.path.join(images_path, image)
label_path = os.path.join(yolo_annotations_path, image.replace('.png', '.txt'))
shutil.copy(image_path, dest_image_path)
shutil.copy(label_path, dest_label_path)
# Copy files to respective directories
copy_files(train_images, train_images_path, train_annotations_path)
copy_files(val_images, val_images_path, val_annotations_path)
copy_files(test_images, test_images_path, test_annotations_path)
print("Dataset split into training, validation, and test sets.")
if __name__ == "__main__":
prepare_data('Dataset')