cifar10-outlier

Runtime error

MarkusStoll commited on Jun 16, 2023

Commit

83da884

1 Parent(s): e6582df

init

Files changed (2) hide show

prepare.py CHANGED Viewed

@@ -1,6 +1,8 @@
 import pickle
 import datasets
 import os
 if __name__ == "__main__":
     cache_file = "dataset_cache.pkl"
@@ -11,12 +13,30 @@ if __name__ == "__main__":
         print("Dataset loaded from cache.")
     else:
         # Load dataset using datasets.load_dataset()
-        dataset = datasets.load_dataset("renumics/cifar100-enriched", split="train")
         print("Dataset loaded using datasets.load_dataset().")
         # Save dataset to cache
         with open(cache_file, "wb") as file:
-            pickle.dump(dataset, file)
         print("Dataset saved to cache.")

 import pickle
 import datasets
 import os
+import umap
 if __name__ == "__main__":
     cache_file = "dataset_cache.pkl"
         print("Dataset loaded from cache.")
     else:
         # Load dataset using datasets.load_dataset()
+        ds = datasets.load_dataset("renumics/cifar10-outlier", split="train")
         print("Dataset loaded using datasets.load_dataset().")
+        df = ds.rename_columns({"img": "image", "label": "labels"}).to_pandas()
+        df["label_str"] = df["labels"].apply(lambda x: ds.features["label"].int2str(x))
+        # df = df[:1000]
+        # precalculate umap embeddings
+        df["embedding_ft_precalc"] = umap.UMAP(
+            n_neighbors=70, min_dist=0.5, random_state=42
+        ).fit_transform(df["embedding_ft"].tolist()).tolist()
+        print("Umap for ft done")
+        df["embedding_foundation_precalc"] = umap.UMAP(
+            n_neighbors=70, min_dist=0.5, random_state=42
+        ).fit_transform(df["embedding_foundation"].tolist()).tolist()
+        print("Umap for base done")
         # Save dataset to cache
         with open(cache_file, "wb") as file:
+            pickle.dump(df, file)
         print("Dataset saved to cache.")

run.py CHANGED Viewed

@@ -1,5 +1,4 @@
 import pickle
-import datasets
 from renumics import spotlight
 import os
@@ -8,23 +7,31 @@ if __name__ == "__main__":
     if os.path.exists(cache_file):
         # Load dataset from cache
         with open(cache_file, "rb") as file:
-            dataset = pickle.load(file)
         print("Dataset loaded from cache.")
-    else:
-        # Load dataset using datasets.load_dataset()
-        dataset = datasets.load_dataset("renumics/cifar100-enriched", split="train")
-        print("Dataset loaded using datasets.load_dataset().")
-        # Save dataset to cache
-        with open(cache_file, "wb") as file:
-            pickle.dump(dataset, file)
-        print("Dataset saved to cache.")
-    df = dataset.to_pandas()
-    df_show = df.drop(columns=['embedding', 'probabilities'])
-    while True:
-        view = spotlight.show(df_show.sample(5000, random_state=1), port=7860, host="0.0.0.0",
-                    dtype={"image": spotlight.Image, "embedding_reduced": spotlight.Embedding}, allow_filebrowsing=False)
-        view.close()

 import pickle
 from renumics import spotlight
 import os
     if os.path.exists(cache_file):
         # Load dataset from cache
         with open(cache_file, "rb") as file:
+            df = pickle.load(file)
         print("Dataset loaded from cache.")
+        while True:
+            dtypes = {
+                "nn_image": spotlight.Image,
+                "image": spotlight.Image,
+                "embedding_ft": spotlight.Embedding,
+                "embedding_foundation": spotlight.Embedding,
+                "embedding_ft_precalc": spotlight.Embedding,
+                "embedding_foundation_precalc": spotlight.Embedding,
+            }
+            view = spotlight.show(
+                df,
+                dtype=dtypes,
+                layout="/home/markus/Downloads/layout_ft_hf.json"
+                port=7860,
+                host="0.0.0.0",
+                allow_filebrowsing=False
+            )
+            view.close()
+    else:
+        print(f"Dataset {cache_file} not found. Please run prepare.py first.")