taesiri commited on
Commit
106ce32
β€’
1 Parent(s): 262ca90
Files changed (2) hide show
  1. README.md +4 -4
  2. app.py +22 -3
README.md CHANGED
@@ -1,8 +1,8 @@
1
  ---
2
- title: Cleaning ImageNet Hard
3
- emoji: πŸ“Š
4
- colorFrom: purple
5
- colorTo: indigo
6
  sdk: gradio
7
  sdk_version: 3.28.0
8
  app_file: app.py
 
1
  ---
2
+ title: Cleaning ImageNet Hard - Group 2
3
+ emoji: πŸ˜‹
4
+ colorFrom: red
5
+ colorTo: blue
6
  sdk: gradio
7
  sdk_version: 3.28.0
8
  app_file: app.py
app.py CHANGED
@@ -18,6 +18,8 @@ import torch
18
  import torchvision
19
  from huggingface_hub import HfApi, login, snapshot_download
20
  from PIL import Image
 
 
21
 
22
  session_token = os.environ.get("SessionToken")
23
  login(token=session_token)
@@ -64,11 +66,13 @@ if not os.path.exists("./imagenet_samples") or not os.path.exists(
64
 
65
  imagenet_hard = datasets.load_dataset("taesiri/imagenet-hard", split="validation")
66
 
67
-
68
  def update_snapshot(username):
 
 
 
69
  output_dir = snapshot_download(
70
  repo_id="taesiri/imagenet_hard_review_data_r2",
71
- allow_patterns="*.json",
72
  repo_type="dataset",
73
  )
74
  files = glob(f"{output_dir}/*.json")
@@ -83,8 +87,23 @@ def update_snapshot(username):
83
  rows.append(tdf)
84
 
85
  df = pd.DataFrame(rows, columns=columns)
86
- df = df[df["user_id"] == username]
87
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
88
  return df
89
 
90
 
 
18
  import torchvision
19
  from huggingface_hub import HfApi, login, snapshot_download
20
  from PIL import Image
21
+ import re
22
+ from fnmatch import translate
23
 
24
  session_token = os.environ.get("SessionToken")
25
  login(token=session_token)
 
66
 
67
  imagenet_hard = datasets.load_dataset("taesiri/imagenet-hard", split="validation")
68
 
 
69
  def update_snapshot(username):
70
+ escaped_username = re.escape(username)
71
+ pattern = f"*{escaped_username}*.json"
72
+
73
  output_dir = snapshot_download(
74
  repo_id="taesiri/imagenet_hard_review_data_r2",
75
+ allow_patterns=translate(pattern),
76
  repo_type="dataset",
77
  )
78
  files = glob(f"{output_dir}/*.json")
 
87
  rows.append(tdf)
88
 
89
  df = pd.DataFrame(rows, columns=columns)
 
90
 
91
+ # download and append all CSV files
92
+ output_dir = snapshot_download(
93
+ repo_id="taesiri/imagenet_hard_review_data_r3",
94
+ allow_patterns="*.csv",
95
+ repo_type="dataset",
96
+ )
97
+ files = glob(f"{output_dir}/*.csv")
98
+
99
+ if len(files) > 0:
100
+ csv_dataframes = [pd.read_csv(file) for file in files]
101
+ csv_dataframes = pd.concat(csv_dataframes, ignore_index=True)
102
+ df = pd.concat([df, csv_dataframes], ignore_index=True)
103
+
104
+ # remove duplicate rows
105
+ df = df.drop_duplicates(subset=["id", "user_id"], keep="last")
106
+ df = df[df["user_id"] == username]
107
  return df
108
 
109