sps44 commited on
Commit
5ec7c74
1 Parent(s): 7eefcaa

switch to parquet dataset

Browse files
Files changed (4) hide show
  1. Dockerfile +2 -3
  2. cifar100-enriched-cv.parquet +3 -0
  3. prepare.py +0 -22
  4. run.py +9 -16
Dockerfile CHANGED
@@ -8,12 +8,11 @@ ENV HOME=/code
8
  RUN apt install curl
9
  RUN pip install pip -U
10
 
11
- RUN pip install renumics-spotlight==1.2.0rc2
12
 
13
- RUN pip install datasets
14
 
15
  COPY . .
16
  RUN mkdir -p /code/.cache
17
  RUN chmod -R 777 /code
18
- RUN python prepare.py
19
  CMD ["python", "run.py"]
 
8
  RUN apt install curl
9
  RUN pip install pip -U
10
 
11
+ RUN pip install renumics-spotlight==1.3.0rc8
12
 
13
+ # RUN pip install datasets
14
 
15
  COPY . .
16
  RUN mkdir -p /code/.cache
17
  RUN chmod -R 777 /code
 
18
  CMD ["python", "run.py"]
cifar100-enriched-cv.parquet ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a00c5f027c054d5a697540d1396d07f70cd651ff59b6b862f81498e164279351
3
+ size 37007568
prepare.py DELETED
@@ -1,22 +0,0 @@
1
- import pickle
2
- import datasets
3
- import os
4
-
5
- if __name__ == "__main__":
6
- cache_file = "dataset_cache.pkl"
7
- if os.path.exists(cache_file):
8
- # Load dataset from cache
9
- with open(cache_file, "rb") as file:
10
- dataset = pickle.load(file)
11
- print("Dataset loaded from cache.")
12
- else:
13
- # Load dataset using datasets.load_dataset()
14
- dataset = datasets.load_dataset("renumics/cifar100-enriched", split="test")
15
- print("Dataset loaded using datasets.load_dataset().")
16
-
17
- # Save dataset to cache
18
- with open(cache_file, "wb") as file:
19
- pickle.dump(dataset, file)
20
-
21
- print("Dataset saved to cache.")
22
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
run.py CHANGED
@@ -2,31 +2,24 @@ import pickle
2
  import datasets
3
  from renumics import spotlight
4
  import os
 
 
 
5
 
6
  if __name__ == "__main__":
7
  cache_file = "dataset_cache.pkl"
8
  issue_cache_file="sliceline.pkl"
9
- if os.path.exists(cache_file):
10
- # Load dataset from cache
11
- with open(cache_file, "rb") as file:
12
- dataset = pickle.load(file)
13
- print("Dataset loaded from cache.")
14
- else:
15
- # Load dataset using datasets.load_dataset()
16
- dataset = datasets.load_dataset("renumics/cifar100-enriched", split="train")
17
- print("Dataset loaded using datasets.load_dataset().")
18
-
19
- # Save dataset to cache
20
- with open(cache_file, "wb") as file:
21
- pickle.dump(dataset, file)
22
 
23
- print("Dataset saved to cache.")
 
24
 
25
  with open(issue_cache_file, "rb") as issue_file:
26
  issues = pickle.load(issue_file)
27
-
28
- df = dataset.to_pandas()
29
  df_show = df.drop(columns=['embedding', 'probabilities'])
 
30
  while True:
31
  view = spotlight.show(df_show, port=7860, host="0.0.0.0", issues=issues,
32
  dtype={"image": spotlight.Image, "embedding_reduced": spotlight.Embedding}, allow_filebrowsing=False)
 
2
  import datasets
3
  from renumics import spotlight
4
  import os
5
+ import pandas as pd
6
+ import pickle
7
+ from renumics import spotlight
8
 
9
  if __name__ == "__main__":
10
  cache_file = "dataset_cache.pkl"
11
  issue_cache_file="sliceline.pkl"
12
+
13
+ import pandas as pd
 
 
 
 
 
 
 
 
 
 
 
14
 
15
+ #load dataframe from parquet
16
+ df = pd.read_parquet('cifar100-enriched-cv.parquet')
17
 
18
  with open(issue_cache_file, "rb") as issue_file:
19
  issues = pickle.load(issue_file)
20
+
 
21
  df_show = df.drop(columns=['embedding', 'probabilities'])
22
+
23
  while True:
24
  view = spotlight.show(df_show, port=7860, host="0.0.0.0", issues=issues,
25
  dtype={"image": spotlight.Image, "embedding_reduced": spotlight.Embedding}, allow_filebrowsing=False)