import os from io import BytesIO from multiprocessing import Pool, cpu_count import fiftyone as fo from datasets import load_dataset from PIL import Image # Load the dataset imagenet_hard_dataset = load_dataset('taesiri/imagenet-hard', split='validation') os.makedirs("dataset", exist_ok=True) def process_image(i): image = imagenet_hard_dataset[i]["image"].convert("RGB") image_path = f"dataset/{i}.JPEG" image.save(image_path, "JPEG", quality=80) return { "file_path": image_path, "labels": imagenet_hard_dataset[i]["english_label"], "origin": imagenet_hard_dataset[i]["origin"], } def create_fiftyone_sample(sample): origin_label = fo.Classification(label=str(sample["origin"])) english_label = fo.Classification(label=str(sample["english_label"])) return fo.Sample( filepath=sample["file_path"], labels=fo.Classifications(classifications=[origin_label, english_label]), ) if __name__ == "__main__": # Process images in parallel and get the list of images with their labels with Pool(cpu_count()) as pool: samples_data = pool.map(process_image, range(len(imagenet_hard_dataset))) # Create a FiftyOne dataset dataset = fo.Dataset(name="imagenet-hard") # Add images and labels to the FiftyOne dataset samples = [create_fiftyone_sample(sample_data) for sample_data in samples_data] dataset.add_samples(samples) session = fo.launch_app(dataset, port=8888, remote=True, address="0.0.0.0") session.wait()