sps44 commited on
Commit
619763d
1 Parent(s): c35bcb0

include enrichment

Browse files
Files changed (2) hide show
  1. Dockerfile +1 -1
  2. prepare.py +17 -1
Dockerfile CHANGED
@@ -10,7 +10,7 @@ RUN pip install pip -U
10
 
11
  RUN pip install renumics-spotlight==1.3.0rc8 pyarrow
12
 
13
- RUN pip install datasets
14
 
15
  COPY . .
16
  RUN mkdir -p /code/.cache
 
10
 
11
  RUN pip install renumics-spotlight==1.3.0rc8 pyarrow
12
 
13
+ RUN pip install datasets cleanvision
14
 
15
  COPY . .
16
  RUN mkdir -p /code/.cache
prepare.py CHANGED
@@ -2,7 +2,18 @@ import pickle
2
  import datasets
3
  import os
4
  import pandas as pd
 
5
 
 
 
 
 
 
 
 
 
 
 
6
  if __name__ == "__main__":
7
  cache_file = "dataset_cache.parquet"
8
  if os.path.exists(cache_file):
@@ -14,7 +25,12 @@ if __name__ == "__main__":
14
  dataset = datasets.load_dataset("renumics/cifar100-enriched", split="test")
15
  print("Dataset loaded using datasets.load_dataset().")
16
 
17
- df = dataset.to_pandas()
 
 
 
 
 
18
 
19
 
20
  # Save dataset to cache
 
2
  import datasets
3
  import os
4
  import pandas as pd
5
+ from cleanvision.imagelab import Imagelab
6
 
7
+ def cv_issues_cleanvision(df, image_name='image'):
8
+
9
+ image_paths = df['image'].to_list()
10
+ imagelab = Imagelab(filepaths=image_paths)
11
+ imagelab.find_issues()
12
+
13
+ df_cv=imagelab.issues.reset_index()
14
+
15
+ return df_cv
16
+
17
  if __name__ == "__main__":
18
  cache_file = "dataset_cache.parquet"
19
  if os.path.exists(cache_file):
 
25
  dataset = datasets.load_dataset("renumics/cifar100-enriched", split="test")
26
  print("Dataset loaded using datasets.load_dataset().")
27
 
28
+ df = dataset.to_pandas()
29
+
30
+ df=df.reset_index(drop=True)
31
+
32
+ df_cv=cv_issues_cleanvision(df)
33
+ df = pd.concat([df, df_cv], axis=1)
34
 
35
 
36
  # Save dataset to cache