sps44 commited on
Commit
ecfd74a
1 Parent(s): 18df44d

revert to simple cifar

Browse files
Files changed (2) hide show
  1. prepare.py +6 -28
  2. run.py +17 -22
prepare.py CHANGED
@@ -1,44 +1,22 @@
1
  import pickle
2
  import datasets
3
  import os
4
- from cleanvision.imagelab import Imagelab
5
- import pandas as pd
6
-
7
-
8
-
9
-
10
- def cv_issues_cleanvision(df, image_name='image'):
11
-
12
- image_paths = df['image'].to_list()
13
- imagelab = Imagelab(filepaths=image_paths)
14
- imagelab.find_issues()
15
-
16
- df_cv=imagelab.issues.reset_index()
17
-
18
- return df_cv
19
-
20
 
21
  if __name__ == "__main__":
22
- cache_file = "dataset_cache.parquet"
23
  if os.path.exists(cache_file):
24
  # Load dataset from cache
25
- df = pd.read_parquet(cache_file)
 
26
  print("Dataset loaded from cache.")
27
  else:
28
  # Load dataset using datasets.load_dataset()
29
- dataset = datasets.load_dataset("renumics/cifar100-enriched", split="test")
30
  print("Dataset loaded using datasets.load_dataset().")
31
-
32
- df = dataset.to_pandas()
33
-
34
- df=df.reset_index(drop=True)
35
-
36
- df_cv=cv_issues_cleanvision(df)
37
- df = pd.concat([df, df_cv], axis=1)
38
 
39
  # Save dataset to cache
40
- #save df as parquet
41
- df.to_parquet(cache_file)
42
 
43
  print("Dataset saved to cache.")
44
 
 
1
  import pickle
2
  import datasets
3
  import os
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4
 
5
  if __name__ == "__main__":
6
+ cache_file = "dataset_cache.pkl"
7
  if os.path.exists(cache_file):
8
  # Load dataset from cache
9
+ with open(cache_file, "rb") as file:
10
+ dataset = pickle.load(file)
11
  print("Dataset loaded from cache.")
12
  else:
13
  # Load dataset using datasets.load_dataset()
14
+ dataset = datasets.load_dataset("renumics/cifar100-enriched", split="train")
15
  print("Dataset loaded using datasets.load_dataset().")
 
 
 
 
 
 
 
16
 
17
  # Save dataset to cache
18
+ with open(cache_file, "wb") as file:
19
+ pickle.dump(dataset, file)
20
 
21
  print("Dataset saved to cache.")
22
 
run.py CHANGED
@@ -1,26 +1,21 @@
 
 
1
  import os
2
- import pandas as pd
3
- import pickle
4
- from renumics import spotlight
5
 
6
  if __name__ == "__main__":
7
- cache_file = "dataset_cache.parquet"
8
- issue_cache_file="sliceline.pkl"
9
-
10
- print('starting up')
 
 
 
 
 
 
11
 
12
- #load dataframe from parquet
13
- df = pd.read_parquet(cache_file)
14
-
15
-
16
- df_show = df.drop(columns=['embedding', 'probabilities'])
17
-
18
- with open(issue_cache_file, "rb") as file:
19
- issues = pickle.load(file)
20
- print('issues loaded')
21
-
22
- while True:
23
- view = spotlight.show(df_show, port=7860, host="0.0.0.0", issues=issues, layout="sliceline-layout.json",
24
- dtype={"image": spotlight.Image, "embedding_reduced": spotlight.Embedding}, allow_filebrowsing=False)
25
-
26
- view.close()
 
1
+ import pickle
2
+ import datasets
3
  import os
 
 
 
4
 
5
  if __name__ == "__main__":
6
+ cache_file = "dataset_cache.pkl"
7
+ if os.path.exists(cache_file):
8
+ # Load dataset from cache
9
+ with open(cache_file, "rb") as file:
10
+ dataset = pickle.load(file)
11
+ print("Dataset loaded from cache.")
12
+ else:
13
+ # Load dataset using datasets.load_dataset()
14
+ dataset = datasets.load_dataset("renumics/cifar100-enriched", split="train")
15
+ print("Dataset loaded using datasets.load_dataset().")
16
 
17
+ # Save dataset to cache
18
+ with open(cache_file, "wb") as file:
19
+ pickle.dump(dataset, file)
20
+
21
+ print("Dataset saved to cache.")