|
import os |
|
import shutil |
|
from sklearn.model_selection import train_test_split |
|
|
|
def prepare_data(dataset_path): |
|
""" |
|
Prepares the dataset by splitting it into training, validation, and test sets. |
|
|
|
Parameters: |
|
dataset_path (str): Path to the dataset. |
|
""" |
|
images_path = os.path.join(dataset_path, 'images') |
|
yolo_annotations_path = os.path.join(dataset_path, 'yolo_annotations') |
|
|
|
|
|
train_images_path = os.path.join(dataset_path, 'YOLO', 'train', 'images') |
|
val_images_path = os.path.join(dataset_path, 'YOLO', 'val', 'images') |
|
test_images_path = os.path.join(dataset_path, 'YOLO', 'test', 'images') |
|
|
|
train_annotations_path = os.path.join(dataset_path, 'YOLO', 'train', 'labels') |
|
val_annotations_path = os.path.join(dataset_path, 'YOLO', 'val', 'labels') |
|
test_annotations_path = os.path.join(dataset_path, 'YOLO', 'test', 'labels') |
|
|
|
|
|
os.makedirs(train_images_path, exist_ok=True) |
|
os.makedirs(val_images_path, exist_ok=True) |
|
os.makedirs(test_images_path, exist_ok=True) |
|
os.makedirs(train_annotations_path, exist_ok=True) |
|
os.makedirs(val_annotations_path, exist_ok=True) |
|
os.makedirs(test_annotations_path, exist_ok=True) |
|
|
|
|
|
all_images = [f for f in os.listdir(images_path) if f.endswith('.png')] |
|
train_images, val_test_images = train_test_split(all_images, test_size=0.3, random_state=42) |
|
val_images, test_images = train_test_split(val_test_images, test_size=0.33, random_state=42) |
|
|
|
|
|
def copy_files(image_list, dest_image_path, dest_label_path): |
|
for image in image_list: |
|
image_path = os.path.join(images_path, image) |
|
label_path = os.path.join(yolo_annotations_path, image.replace('.png', '.txt')) |
|
|
|
shutil.copy(image_path, dest_image_path) |
|
shutil.copy(label_path, dest_label_path) |
|
|
|
|
|
copy_files(train_images, train_images_path, train_annotations_path) |
|
copy_files(val_images, val_images_path, val_annotations_path) |
|
copy_files(test_images, test_images_path, test_annotations_path) |
|
|
|
print("Dataset split into training, validation, and test sets.") |
|
|
|
if __name__ == "__main__": |
|
prepare_data('Dataset') |
|
|