Spaces:

upadhyaysuraj
/

licence_plate_detection_with_OCR

Running

App Files Files Community

licence_plate_detection_with_OCR / data_preparation.py

upadhyaysuraj

Upload 11 files

57d8fc8 verified 6 months ago

raw

history blame

2.32 kB

	import os
	import shutil
	from sklearn.model_selection import train_test_split

	def prepare_data(dataset_path):
	"""
	Prepares the dataset by splitting it into training, validation, and test sets.

	Parameters:
	dataset_path (str): Path to the dataset.
	"""
	images_path = os.path.join(dataset_path, 'images')
	yolo_annotations_path = os.path.join(dataset_path, 'yolo_annotations')

	# Paths to split datasets
	train_images_path = os.path.join(dataset_path, 'YOLO', 'train', 'images')
	val_images_path = os.path.join(dataset_path, 'YOLO', 'val', 'images')
	test_images_path = os.path.join(dataset_path, 'YOLO', 'test', 'images')

	train_annotations_path = os.path.join(dataset_path, 'YOLO', 'train', 'labels')
	val_annotations_path = os.path.join(dataset_path, 'YOLO', 'val', 'labels')
	test_annotations_path = os.path.join(dataset_path, 'YOLO', 'test', 'labels')

	# Create directories
	os.makedirs(train_images_path, exist_ok=True)
	os.makedirs(val_images_path, exist_ok=True)
	os.makedirs(test_images_path, exist_ok=True)
	os.makedirs(train_annotations_path, exist_ok=True)
	os.makedirs(val_annotations_path, exist_ok=True)
	os.makedirs(test_annotations_path, exist_ok=True)

	# Get list of all images
	all_images = [f for f in os.listdir(images_path) if f.endswith('.png')]
	train_images, val_test_images = train_test_split(all_images, test_size=0.3, random_state=42)
	val_images, test_images = train_test_split(val_test_images, test_size=0.33, random_state=42)

	# Function to copy images and annotations
	def copy_files(image_list, dest_image_path, dest_label_path):
	for image in image_list:
	image_path = os.path.join(images_path, image)
	label_path = os.path.join(yolo_annotations_path, image.replace('.png', '.txt'))

	shutil.copy(image_path, dest_image_path)
	shutil.copy(label_path, dest_label_path)

	# Copy files to respective directories
	copy_files(train_images, train_images_path, train_annotations_path)
	copy_files(val_images, val_images_path, val_annotations_path)
	copy_files(test_images, test_images_path, test_annotations_path)

	print("Dataset split into training, validation, and test sets.")

	if __name__ == "__main__":
	prepare_data('Dataset')