taesiri commited on
Commit
c1a6c5e
β€’
1 Parent(s): 081f91c
Files changed (3) hide show
  1. README.md +4 -5
  2. app.py +168 -26
  3. requirements.txt +2 -2
README.md CHANGED
@@ -1,14 +1,13 @@
1
  ---
2
- title: ImageNet Hard Browser
3
- emoji: 🐨
4
  colorFrom: indigo
5
  colorTo: gray
6
- sdk: docker
7
- sdk_version: 3.1.1
8
  app_file: app.py
9
  pinned: false
10
  license: mit
11
- app_port: 8888
12
  ---
13
 
14
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
  ---
2
+ title: ImageNet-Hard Browser
3
+ emoji: πŸ”
4
  colorFrom: indigo
5
  colorTo: gray
6
+ sdk: gradio
7
+ sdk_version: 4.9.0
8
  app_file: app.py
9
  pinned: false
10
  license: mit
 
11
  ---
12
 
13
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py CHANGED
@@ -1,47 +1,189 @@
1
  import os
2
  from io import BytesIO
3
  from multiprocessing import Pool, cpu_count
4
- import fiftyone as fo
5
  from datasets import load_dataset
6
  from PIL import Image
 
 
7
 
8
- # Load the dataset
9
- imagenet_hard_dataset = load_dataset('taesiri/imagenet-hard', split='validation')
10
- os.makedirs("dataset", exist_ok=True)
 
 
 
 
 
 
11
 
12
 
13
  def process_image(i):
 
14
  image = imagenet_hard_dataset[i]["image"].convert("RGB")
15
- image_path = f"dataset/{i}.JPEG"
16
- image.save(image_path, "JPEG", quality=80)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
  return {
18
- "file_path": image_path,
19
- "labels": imagenet_hard_dataset[i]["english_label"],
20
  "origin": imagenet_hard_dataset[i]["origin"],
 
21
  }
22
 
23
 
24
- def create_fiftyone_sample(sample):
25
- origin_label = fo.Classification(label=str(sample["origin"]))
26
- english_label = fo.Classification(label=str(sample["labels"]))
27
- return fo.Sample(
28
- filepath=sample["file_path"],
29
- labels=fo.Classifications(classifications=[origin_label, english_label]),
30
- )
 
 
 
 
 
 
 
 
 
31
 
 
 
32
 
 
 
 
 
 
 
 
33
 
34
- if __name__ == "__main__":
35
- # Process images in parallel and get the list of images with their labels
36
- with Pool(cpu_count()) as pool:
37
- samples_data = pool.map(process_image, range(len(imagenet_hard_dataset)))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
38
 
39
- # Create a FiftyOne dataset
40
- dataset = fo.Dataset(name="imagenet-hard")
 
 
 
41
 
42
- # Add images and labels to the FiftyOne dataset
43
- samples = [create_fiftyone_sample(sample_data) for sample_data in samples_data]
44
- dataset.add_samples(samples)
45
 
46
- session = fo.launch_app(dataset, port=8888, remote=True, address="0.0.0.0")
47
- session.wait()
 
1
  import os
2
  from io import BytesIO
3
  from multiprocessing import Pool, cpu_count
 
4
  from datasets import load_dataset
5
  from PIL import Image
6
+ import gradio as gr
7
+ import pandas as pd
8
 
9
+ imagenet_hard_dataset = load_dataset("taesiri/imagenet-hard", split="validation")
10
+ THUMBNAIL_PATH = "dataset/thumbnails"
11
+ os.makedirs(THUMBNAIL_PATH, exist_ok=True)
12
+
13
+ max_size = (480, 480)
14
+
15
+ all_origins = set()
16
+ all_labels = set()
17
+ dataset_df = None
18
 
19
 
20
  def process_image(i):
21
+ global all_origins
22
  image = imagenet_hard_dataset[i]["image"].convert("RGB")
23
+ url_prefix = "https://imagenet-hard.taesiri.ai/"
24
+
25
+ origin = imagenet_hard_dataset[i]["origin"]
26
+ label = imagenet_hard_dataset[i]["english_label"]
27
+
28
+ save_path = os.path.join(THUMBNAIL_PATH, origin)
29
+ # make sure the folder exists
30
+ os.makedirs(save_path, exist_ok=True)
31
+ image_path = os.path.join(save_path, f"{i}.jpg")
32
+
33
+ image.thumbnail(max_size, Image.LANCZOS)
34
+
35
+ image.save(image_path, "JPEG", quality=100)
36
+
37
+ url = url_prefix + image_path
38
+
39
  return {
40
+ "preview": url,
41
+ "filepath": image_path,
42
  "origin": imagenet_hard_dataset[i]["origin"],
43
+ "labels": imagenet_hard_dataset[i]["english_label"],
44
  }
45
 
46
 
47
+ # PREPROCESSING
48
+ if os.path.exists("dataset.pkl"):
49
+ dataset_df = pd.read_pickle("dataset.pkl")
50
+ all_origins = set(dataset_df["origin"])
51
+ all_labels = set().union(*dataset_df["labels"])
52
+ else:
53
+ with Pool(cpu_count()) as pool:
54
+ samples_data = pool.map(process_image, range(len(imagenet_hard_dataset)))
55
+ dataset_df = pd.DataFrame(samples_data)
56
+ print(dataset_df)
57
+ all_origins = set(dataset_df["origin"])
58
+ all_labels = set().union(*dataset_df["labels"])
59
+ # save dataframe on disk
60
+ dataset_df.to_csv("dataset.csv")
61
+ dataset_df.to_pickle("dataset.pkl")
62
+
63
 
64
+ def get_slice(origin, label):
65
+ global dataset_df
66
 
67
+ if not origin and not label:
68
+ filtered_df = dataset_df
69
+ else:
70
+ filtered_df = dataset_df[
71
+ (dataset_df["origin"] == origin if origin else True)
72
+ & (dataset_df["labels"].apply(lambda x: label in x) if label else True)
73
+ ]
74
 
75
+ max_value = len(filtered_df) // 16
76
+
77
+ returned_values = []
78
+
79
+ start_index = 0
80
+ end_index = start_index + 16
81
+
82
+ slice_df = filtered_df.iloc[start_index:end_index]
83
+
84
+ for row in slice_df.itertuples():
85
+ returned_values.append(gr.update(value=row.preview))
86
+ returned_values.append(gr.update(value=row.origin))
87
+ returned_values.append(gr.update(value=row.labels))
88
+
89
+ if len(returned_values) < 48:
90
+ returned_values.extend([None] * (48 - len(returned_values)))
91
+
92
+ filtered_df = gr.Dataframe(filtered_df, datatype="markdown")
93
+ return filtered_df, gr.update(maximum=max_value, value=0), *returned_values
94
+
95
+
96
+ def reset_filters_fn():
97
+ return gr.update(value=None), gr.update(value=None)
98
+
99
+
100
+ def make_grid(grid_size):
101
+ list_of_components = []
102
+
103
+ with gr.Row():
104
+ for row_counter in range(grid_size[0]):
105
+ with gr.Column():
106
+ for col_counter in range(grid_size[1]):
107
+ item_image = gr.Image()
108
+ with gr.Accordion("Click for details", open=False):
109
+ item_source = gr.Textbox(label="Source Dataset")
110
+ item_labels = gr.Textbox(label="Labels")
111
+
112
+ list_of_components.append(item_image)
113
+ list_of_components.append(item_source)
114
+ list_of_components.append(item_labels)
115
+
116
+ return list_of_components
117
+
118
+
119
+ def slider_upadte(slider, df):
120
+ returned_values = []
121
+
122
+ start_index = (slider) * 16
123
+ end_index = start_index + 16
124
+
125
+ slice_df = df.iloc[start_index:end_index]
126
+
127
+ for row in slice_df.itertuples():
128
+ returned_values.append(gr.update(value=row.preview))
129
+ returned_values.append(gr.update(value=row.origin))
130
+ returned_values.append(gr.update(value=row.labels))
131
+
132
+ if len(returned_values) < 48:
133
+ returned_values.extend([None] * (48 - len(returned_values)))
134
+
135
+ return returned_values
136
+
137
+
138
+ with gr.Blocks() as demo:
139
+ gr.Markdown("# ImageNet-Hard Browser")
140
+ # add link to home page and dataset
141
+ gr.HTML("")
142
+ gr.HTML()
143
+ gr.HTML(
144
+ """
145
+ <center>
146
+ <span style="font-size: 14px; vertical-align: middle;">
147
+ <a href='https://zoom.taesiri.ai/'>Project Home Page</a> &nbsp;|&nbsp;
148
+ <a href='https://huggingface.co/datasets/taesiri/imagenet-hard'>Dataset</a>
149
+ </span>
150
+ </center>
151
+ """
152
+ )
153
+
154
+ with gr.Row():
155
+ origin_dropdown = gr.Dropdown(all_origins, label="Origin")
156
+ label_dropdown = gr.Dropdown(all_labels, label="Label")
157
+ with gr.Row():
158
+ show_btn = gr.Button("Show")
159
+ reset_filters = gr.Button("Reset Filters")
160
+
161
+ preview_dataframe = gr.Dataframe(height=500, visible=False)
162
+
163
+ gr.Markdown("## Preview")
164
+
165
+ maximum_vale = len(dataset_df) // 16
166
+
167
+ preview_slider = gr.Slider(minimum=1, maximum=maximum_vale, step=1, value=1)
168
+ all_components = make_grid((4, 4))
169
+
170
+ show_btn.click(
171
+ fn=get_slice,
172
+ inputs=[origin_dropdown, label_dropdown],
173
+ outputs=[preview_dataframe, preview_slider, *all_components],
174
+ )
175
+
176
+ reset_filters.click(
177
+ fn=reset_filters_fn,
178
+ inputs=[],
179
+ outputs=[origin_dropdown, label_dropdown],
180
+ )
181
 
182
+ preview_slider.change(
183
+ fn=slider_upadte,
184
+ inputs=[preview_slider, preview_dataframe],
185
+ outputs=[*all_components],
186
+ )
187
 
 
 
 
188
 
189
+ demo.launch()
 
requirements.txt CHANGED
@@ -1,5 +1,5 @@
1
- fiftyone
2
  transformers
3
  datasets
4
  tqdm
5
- numpy
 
 
 
1
  transformers
2
  datasets
3
  tqdm
4
+ numpy
5
+ pandas