Spaces:
Running
Running
import os | |
from io import BytesIO | |
from multiprocessing import Pool, cpu_count | |
import fiftyone as fo | |
from datasets import load_dataset | |
from PIL import Image | |
# Load the dataset | |
imagenet_hard_dataset = load_dataset('taesiri/imagenet-hard', split='validation') | |
os.makedirs("dataset", exist_ok=True) | |
def process_image(i): | |
image = imagenet_hard_dataset[i]["image"].convert("RGB") | |
image_path = f"dataset/{i}.JPEG" | |
image.save(image_path, "JPEG", quality=80) | |
return { | |
"file_path": image_path, | |
"labels": imagenet_hard_dataset[i]["english_label"], | |
"origin": imagenet_hard_dataset[i]["origin"], | |
} | |
def create_fiftyone_sample(sample): | |
origin_label = fo.Classification(label=str(sample["origin"])) | |
english_label = fo.Classification(label=str(sample["english_label"])) | |
return fo.Sample( | |
filepath=sample["file_path"], | |
labels=fo.Classifications(classifications=[origin_label, english_label]), | |
) | |
if __name__ == "__main__": | |
# Process images in parallel and get the list of images with their labels | |
with Pool(cpu_count()) as pool: | |
samples_data = pool.map(process_image, range(len(imagenet_hard_dataset))) | |
# Create a FiftyOne dataset | |
dataset = fo.Dataset(name="imagenet-hard") | |
# Add images and labels to the FiftyOne dataset | |
samples = [create_fiftyone_sample(sample_data) for sample_data in samples_data] | |
dataset.add_samples(samples) | |
session = fo.launch_app(dataset, port=8888, remote=True, address="0.0.0.0") | |
session.wait() | |