Spaces:
Runtime error
Runtime error
File size: 1,335 Bytes
e6582df 83da884 e6582df 83da884 e6582df 83da884 e6582df 83da884 e6582df |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 |
import pickle
import datasets
import os
import umap
if __name__ == "__main__":
cache_file = "dataset_cache.pkl"
if os.path.exists(cache_file):
# Load dataset from cache
with open(cache_file, "rb") as file:
dataset = pickle.load(file)
print("Dataset loaded from cache.")
else:
# Load dataset using datasets.load_dataset()
ds = datasets.load_dataset("renumics/cifar10-outlier", split="train")
print("Dataset loaded using datasets.load_dataset().")
df = ds.rename_columns({"img": "image", "label": "labels"}).to_pandas()
df["label_str"] = df["labels"].apply(lambda x: ds.features["label"].int2str(x))
# df = df[:1000]
# precalculate umap embeddings
df["embedding_ft_precalc"] = umap.UMAP(
n_neighbors=70, min_dist=0.5, random_state=42
).fit_transform(df["embedding_ft"].tolist()).tolist()
print("Umap for ft done")
df["embedding_foundation_precalc"] = umap.UMAP(
n_neighbors=70, min_dist=0.5, random_state=42
).fit_transform(df["embedding_foundation"].tolist()).tolist()
print("Umap for base done")
# Save dataset to cache
with open(cache_file, "wb") as file:
pickle.dump(df, file)
print("Dataset saved to cache.")
|