taesiri commited on
Commit
e859cf6
1 Parent(s): fbfb369
Files changed (1) hide show
  1. app.py +27 -16
app.py CHANGED
@@ -41,9 +41,8 @@ bad_items = open("./ex2.txt", "r").read().split("\n")
41
  bad_items = [x.split(".")[0] for x in bad_items]
42
  bad_items = [int(x) for x in bad_items if x != ""]
43
 
44
- NUMBER_OF_IMAGES = 100 # len(bad_items)
45
 
46
- # download and extract folders
47
 
48
  gdown.cached_download(
49
  url="https://huggingface.co/datasets/taesiri/imagenet_hard_review_samples/resolve/main/data.zip",
@@ -81,8 +80,6 @@ def update_snapshot(username):
81
  with open(file) as f:
82
  data = json.load(f)
83
  tdf = [data[x] for x in columns]
84
-
85
- # add filename as a column
86
  rows.append(tdf)
87
 
88
  df = pd.DataFrame(rows, columns=columns)
@@ -98,15 +95,15 @@ def generate_dataset(username):
98
  all_images = set(bad_items)
99
  answered = set(df.id)
100
  remaining = list(all_images - answered)
 
 
 
 
101
 
102
- if len(remaining) < NUMBER_OF_IMAGES and len(remaining) > 0:
103
- NUMBER_OF_IMAGES = len(remaining)
104
- random_indices = list(remaining)
105
- elif len(remaining) == 0:
106
  return []
107
- else:
108
- random_indices = np.random.choice(remaining, NUMBER_OF_IMAGES, replace=False)
109
 
 
110
  random_images = [imagenet_hard[int(i)]["image"] for i in random_indices]
111
  random_gt_ids = [imagenet_hard[int(i)]["label"] for i in random_indices]
112
  random_gt_labels = [imagenet_hard[int(x)]["english_label"] for x in random_indices]
@@ -147,8 +144,6 @@ qid_to_sample = {
147
  int(x.split("/")[-1].split(".")[0].split("_")[0]): x for x in all_samples
148
  }
149
 
150
- # user-e3z5b
151
-
152
 
153
  def get_training_samples(qid):
154
  labels_id = imagenet_hard[int(qid)]["label"]
@@ -197,7 +192,9 @@ def preprocessing(data, current_index, history, username):
197
  def update_app(decision, data, current_index, history, username):
198
  global NUMBER_OF_IMAGES
199
  if current_index == -1:
200
- return
 
 
201
 
202
  if current_index == NUMBER_OF_IMAGES - 1:
203
  time_stamp = int(time.time())
@@ -290,18 +287,32 @@ newcss = """
290
  }
291
  """
292
 
293
- with gr.Blocks(css=newcss) as demo:
294
  data_gr = gr.State({})
295
  current_index = gr.State(-1)
296
  history = gr.State({})
297
 
298
- gr.Markdown("# Cleaning ImageNet-Hard!")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
299
 
300
  random_str = "".join(
301
  random.choice(string.ascii_lowercase + string.digits) for _ in range(5)
302
  )
303
 
304
- with gr.Row():
305
  username = gr.Textbox(label="Username", value=f"user-{random_str}")
306
  prepare_btn = gr.Button(value="Load Samples")
307
 
 
41
  bad_items = [x.split(".")[0] for x in bad_items]
42
  bad_items = [int(x) for x in bad_items if x != ""]
43
 
44
+ NUMBER_OF_IMAGES = len(bad_items)
45
 
 
46
 
47
  gdown.cached_download(
48
  url="https://huggingface.co/datasets/taesiri/imagenet_hard_review_samples/resolve/main/data.zip",
 
80
  with open(file) as f:
81
  data = json.load(f)
82
  tdf = [data[x] for x in columns]
 
 
83
  rows.append(tdf)
84
 
85
  df = pd.DataFrame(rows, columns=columns)
 
95
  all_images = set(bad_items)
96
  answered = set(df.id)
97
  remaining = list(all_images - answered)
98
+ # shuffle remaining
99
+ random.shuffle(remaining)
100
+
101
+ NUMBER_OF_IMAGES = len(bad_items)
102
 
103
+ if NUMBER_OF_IMAGES == 0:
 
 
 
104
  return []
 
 
105
 
106
+ random_indices = remaining
107
  random_images = [imagenet_hard[int(i)]["image"] for i in random_indices]
108
  random_gt_ids = [imagenet_hard[int(i)]["label"] for i in random_indices]
109
  random_gt_labels = [imagenet_hard[int(x)]["english_label"] for x in random_indices]
 
144
  int(x.split("/")[-1].split(".")[0].split("_")[0]): x for x in all_samples
145
  }
146
 
 
 
147
 
148
  def get_training_samples(qid):
149
  labels_id = imagenet_hard[int(qid)]["label"]
 
192
  def update_app(decision, data, current_index, history, username):
193
  global NUMBER_OF_IMAGES
194
  if current_index == -1:
195
+ fake_plot = string_to_image("Please Enter your username and load samples")
196
+ empty_image = Image.new("RGB", (224, 224))
197
+ return empty_image, fake_plot, current_index, history, data, None
198
 
199
  if current_index == NUMBER_OF_IMAGES - 1:
200
  time_stamp = int(time.time())
 
287
  }
288
  """
289
 
290
+ with gr.Blocks(css=newcss, theme=gr.themes.Soft()) as demo:
291
  data_gr = gr.State({})
292
  current_index = gr.State(-1)
293
  history = gr.State({})
294
 
295
+ gr.Markdown("# Help Us to Clean `ImageNet-Hard`!")
296
+
297
+ gr.Markdown("## Instructions")
298
+ gr.Markdown(
299
+ "Please enter your username and press `Load Samples`. The loading process might take up to a minute. Once the loading is done, you can start reviewing the samples."
300
+ )
301
+ gr.Markdown(
302
+ """For each image, please select one of the following options: `Accept`, `Not Sure!`, `Reject`.
303
+ - If you think any of the labels are correct, please select `Accept`.
304
+ - If you think none of the labels matching the image, please select `Reject`.
305
+ - If you are not sure about the label, please select `Not Sure!`.
306
+
307
+ You can refer to `Training samples` if you are not sure about the target label.
308
+ """
309
+ )
310
 
311
  random_str = "".join(
312
  random.choice(string.ascii_lowercase + string.digits) for _ in range(5)
313
  )
314
 
315
+ with gr.Column():
316
  username = gr.Textbox(label="Username", value=f"user-{random_str}")
317
  prepare_btn = gr.Button(value="Load Samples")
318