In [None]:
from PIL import Image, UnidentifiedImageError
import os
import pillow_avif  # AVIF support for Pillow

# Define paths to folders
data_dir = "./data"
folders = ["comic", "not-comic"]  # Both folders to process
output_format = "png"  # Target image format

# Function to clean, convert, and rename images
def process_images(data_dir, folders, output_format):
    for folder in folders:
        folder_path = os.path.join(data_dir, folder)
        print(f"Processing folder: {folder_path}")

        # Ensure the folder exists
        if not os.path.exists(folder_path):
            print(f"Folder {folder_path} does not exist. Skipping.")
            continue

        # Sort files to preserve order and avoid overwriting
        image_count = 1  # Start numbering images
        for filename in sorted(os.listdir(folder_path)):
            file_path = os.path.join(folder_path, filename)
            if "test_sample.png" in filename:  # Ignore test_sample.png
                continue

            try:
                # Open and convert image (supports AVIF and others)
                with Image.open(file_path) as img:
                    img = img.convert("RGB")  # Ensure compatible format
                    new_filename = f"{str(image_count).zfill(4)}.{output_format}"
                    new_file_path = os.path.join(folder_path, new_filename)

                    # Save as new file
                    img.save(new_file_path, format=output_format.upper())
                    print(f"Converted: {filename} -> {new_filename}")

                    # Remove old file if different
                    if file_path != new_file_path:
                        os.remove(file_path)

                    image_count += 1

            except (UnidentifiedImageError, IOError) as e:
                print(f"Invalid or unreadable file: {filename} ({e}). Deleting.")
                os.remove(file_path)  # Delete invalid files

# Run the processing function
process_images(data_dir, folders, output_format)
