sps44's picture
include enrichment
619763d
raw
history blame
No virus
1.1 kB
import pickle
import datasets
import os
import pandas as pd
from cleanvision.imagelab import Imagelab
def cv_issues_cleanvision(df, image_name='image'):
image_paths = df['image'].to_list()
imagelab = Imagelab(filepaths=image_paths)
imagelab.find_issues()
df_cv=imagelab.issues.reset_index()
return df_cv
if __name__ == "__main__":
cache_file = "dataset_cache.parquet"
if os.path.exists(cache_file):
# Load dataset from cache
df = pd.read_parquet(cache_file)
print("Dataset loaded from cache.")
else:
# Load dataset using datasets.load_dataset()
dataset = datasets.load_dataset("renumics/cifar100-enriched", split="test")
print("Dataset loaded using datasets.load_dataset().")
df = dataset.to_pandas()
df=df.reset_index(drop=True)
df_cv=cv_issues_cleanvision(df)
df = pd.concat([df, df_cv], axis=1)
# Save dataset to cache
#save df as parquet
df.to_parquet(cache_file)
print("Dataset saved to cache.")