kexinhuang12345 commited on
Commit
3caf072
1 Parent(s): aa0703f
Files changed (5) hide show
  1. app.py +168 -16
  2. src/about.py +27 -4
  3. src/display/utils.py +35 -1
  4. src/populate.py +19 -4
  5. src/submission/submit.py +8 -1
app.py CHANGED
@@ -11,18 +11,24 @@ from src.about import (
11
  INTRODUCTION_TEXT,
12
  LLM_BENCHMARKS_TEXT,
13
  TITLE,
14
- nc_tasks
 
 
15
  )
16
  from src.display.css_html_js import custom_css
17
  from src.display.utils import (
18
  BENCHMARK_COLS,
19
  COLS,
20
  COLS_NC,
 
 
21
  EVAL_COLS,
22
  EVAL_TYPES,
23
  NUMERIC_INTERVALS,
24
  TYPES,
25
  AutoEvalColumn_NodeClassification,
 
 
26
  #AutoEvalColumn,
27
  ModelType,
28
  TASK_LIST,
@@ -56,9 +62,6 @@ except Exception:
56
  restart_space()
57
 
58
 
59
- original_df = get_leaderboard_df(EVAL_REQUESTS_PATH, nc_tasks)
60
- leaderboard_df = original_df.copy()
61
-
62
  # Searching and filtering
63
  def update_table(
64
  hidden_df: pd.DataFrame,
@@ -72,7 +75,7 @@ def update_table(
72
 
73
 
74
  def search_table(df: pd.DataFrame, query: str) -> pd.DataFrame:
75
- return df[(df[AutoEvalColumn_NodeClassification.model.name].str.contains(query, case=False))]
76
 
77
 
78
  def select_columns(df: pd.DataFrame, columns: list) -> pd.DataFrame:
@@ -81,7 +84,7 @@ def select_columns(df: pd.DataFrame, columns: list) -> pd.DataFrame:
81
  ]
82
  # We use COLS to maintain sorting
83
  filtered_df = df[
84
- always_here_cols + [c for c in COLS_NC if c in df.columns and c in columns]
85
  ]
86
  return filtered_df
87
 
@@ -99,7 +102,7 @@ def filter_queries(query: str, filtered_df: pd.DataFrame) -> pd.DataFrame:
99
  if len(final_df) > 0:
100
  filtered_df = pd.concat(final_df)
101
  filtered_df = filtered_df.drop_duplicates(
102
- subset=[AutoEvalColumn_NodeClassification.model.name]
103
  )
104
 
105
  return filtered_df
@@ -112,14 +115,14 @@ def filter_models(
112
  if show_deleted:
113
  filtered_df = df
114
  else: # Show only still on the hub models
115
- filtered_df = df[df[AutoEvalColumn_NodeClassification.still_on_hub.name] == True]
116
 
117
  #type_emoji = [t[0] for t in type_query]
118
  #filtered_df = filtered_df.loc[df[AutoEvalColumn.model_type_symbol.name].isin(type_emoji)]
119
  #filtered_df = filtered_df.loc[df[AutoEvalColumn.precision.name].isin(precision_query + ["None"])]
120
 
121
  numeric_interval = pd.IntervalIndex(sorted([NUMERIC_INTERVALS[s] for s in size_query]))
122
- params_column = pd.to_numeric(df[AutoEvalColumn_NodeClassification.params.name], errors="coerce")
123
  mask = params_column.apply(lambda x: any(numeric_interval.contains(x)))
124
  filtered_df = filtered_df.loc[mask]
125
 
@@ -132,6 +135,155 @@ with demo:
132
 
133
  with gr.Tabs(elem_classes="tab-buttons") as tabs:
134
  with gr.TabItem("🏅 Node Classification Leaderboard", elem_id="llm-benchmark-tab-table", id=0):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
135
  with gr.Row():
136
  with gr.Column():
137
  with gr.Row():
@@ -144,12 +296,12 @@ with demo:
144
  shown_columns = gr.CheckboxGroup(
145
  choices=[
146
  c.name
147
- for c in fields(AutoEvalColumn_NodeClassification)
148
  if not c.hidden and not c.never_hidden
149
  ],
150
  value=[
151
  c.name
152
- for c in fields(AutoEvalColumn_NodeClassification)
153
  if c.displayed_by_default and not c.hidden and not c.never_hidden
154
  ],
155
  label="Select columns to show",
@@ -158,13 +310,13 @@ with demo:
158
  )
159
 
160
  print(leaderboard_df)
161
- print(fields(AutoEvalColumn_NodeClassification))
162
  leaderboard_table = gr.components.Dataframe(
163
  value=leaderboard_df[
164
- [c.name for c in fields(AutoEvalColumn_NodeClassification) if c.never_hidden]
165
  + shown_columns.value
166
  ],
167
- headers=[c.name for c in fields(AutoEvalColumn_NodeClassification) if c.never_hidden] + shown_columns.value,
168
  datatype=TYPES,
169
  elem_id="leaderboard-table",
170
  interactive=False,
@@ -173,8 +325,8 @@ with demo:
173
 
174
  # Dummy leaderboard for handling the case when the user uses backspace key
175
  hidden_leaderboard_table_for_search = gr.components.Dataframe(
176
- value=original_df[COLS_NC],
177
- headers=COLS_NC,
178
  datatype=TYPES,
179
  visible=False,
180
  )
 
11
  INTRODUCTION_TEXT,
12
  LLM_BENCHMARKS_TEXT,
13
  TITLE,
14
+ nc_tasks,
15
+ nr_tasks,
16
+ lp_tasks,
17
  )
18
  from src.display.css_html_js import custom_css
19
  from src.display.utils import (
20
  BENCHMARK_COLS,
21
  COLS,
22
  COLS_NC,
23
+ COLS_NR,
24
+ COLS_LP,
25
  EVAL_COLS,
26
  EVAL_TYPES,
27
  NUMERIC_INTERVALS,
28
  TYPES,
29
  AutoEvalColumn_NodeClassification,
30
+ AutoEvalColumn_NodeRegression,
31
+ AutoEvalColumn_LinkPrediction,
32
  #AutoEvalColumn,
33
  ModelType,
34
  TASK_LIST,
 
62
  restart_space()
63
 
64
 
 
 
 
65
  # Searching and filtering
66
  def update_table(
67
  hidden_df: pd.DataFrame,
 
75
 
76
 
77
  def search_table(df: pd.DataFrame, query: str) -> pd.DataFrame:
78
+ return df[(df[AutoEvalColumn.model.name].str.contains(query, case=False))]
79
 
80
 
81
  def select_columns(df: pd.DataFrame, columns: list) -> pd.DataFrame:
 
84
  ]
85
  # We use COLS to maintain sorting
86
  filtered_df = df[
87
+ always_here_cols + [c for c in COLS if c in df.columns and c in columns]
88
  ]
89
  return filtered_df
90
 
 
102
  if len(final_df) > 0:
103
  filtered_df = pd.concat(final_df)
104
  filtered_df = filtered_df.drop_duplicates(
105
+ subset=[AutoEvalColumn.model.name]
106
  )
107
 
108
  return filtered_df
 
115
  if show_deleted:
116
  filtered_df = df
117
  else: # Show only still on the hub models
118
+ filtered_df = df[df[AutoEvalColumn.still_on_hub.name] == True]
119
 
120
  #type_emoji = [t[0] for t in type_query]
121
  #filtered_df = filtered_df.loc[df[AutoEvalColumn.model_type_symbol.name].isin(type_emoji)]
122
  #filtered_df = filtered_df.loc[df[AutoEvalColumn.precision.name].isin(precision_query + ["None"])]
123
 
124
  numeric_interval = pd.IntervalIndex(sorted([NUMERIC_INTERVALS[s] for s in size_query]))
125
+ params_column = pd.to_numeric(df[AutoEvalColumn.params.name], errors="coerce")
126
  mask = params_column.apply(lambda x: any(numeric_interval.contains(x)))
127
  filtered_df = filtered_df.loc[mask]
128
 
 
135
 
136
  with gr.Tabs(elem_classes="tab-buttons") as tabs:
137
  with gr.TabItem("🏅 Node Classification Leaderboard", elem_id="llm-benchmark-tab-table", id=0):
138
+ COLS = COLS_NC
139
+ AutoEvalColumn = AutoEvalColumn_NodeClassification
140
+ original_df = get_leaderboard_df(EVAL_REQUESTS_PATH, "Node Classification")
141
+ leaderboard_df = original_df.copy()
142
+
143
+ with gr.Row():
144
+ with gr.Column():
145
+ with gr.Row():
146
+ search_bar = gr.Textbox(
147
+ placeholder=" 🔍 Search for your model (separate multiple queries with `;`) and press ENTER...",
148
+ show_label=False,
149
+ elem_id="search-bar",
150
+ )
151
+ with gr.Row():
152
+ shown_columns = gr.CheckboxGroup(
153
+ choices=[
154
+ c.name
155
+ for c in fields(AutoEvalColumn)
156
+ if not c.hidden and not c.never_hidden
157
+ ],
158
+ value=[
159
+ c.name
160
+ for c in fields(AutoEvalColumn)
161
+ if c.displayed_by_default and not c.hidden and not c.never_hidden
162
+ ],
163
+ label="Select columns to show",
164
+ elem_id="column-select",
165
+ interactive=True,
166
+ )
167
+
168
+ print(leaderboard_df)
169
+ print(fields(AutoEvalColumn))
170
+ leaderboard_table = gr.components.Dataframe(
171
+ value=leaderboard_df[
172
+ [c.name for c in fields(AutoEvalColumn) if c.never_hidden]
173
+ + shown_columns.value
174
+ ],
175
+ headers=[c.name for c in fields(AutoEvalColumn) if c.never_hidden] + shown_columns.value,
176
+ datatype=TYPES,
177
+ elem_id="leaderboard-table",
178
+ interactive=False,
179
+ visible=True,
180
+ )
181
+
182
+ # Dummy leaderboard for handling the case when the user uses backspace key
183
+ hidden_leaderboard_table_for_search = gr.components.Dataframe(
184
+ value=original_df[COLS],
185
+ headers=COLS,
186
+ datatype=TYPES,
187
+ visible=False,
188
+ )
189
+ search_bar.submit(
190
+ update_table,
191
+ [
192
+ hidden_leaderboard_table_for_search,
193
+ shown_columns,
194
+ search_bar,
195
+ ],
196
+ leaderboard_table,
197
+ )
198
+ for selector in [shown_columns]:
199
+ selector.change(
200
+ update_table,
201
+ [
202
+ hidden_leaderboard_table_for_search,
203
+ shown_columns,
204
+ search_bar,
205
+ ],
206
+ leaderboard_table,
207
+ queue=True,
208
+ )
209
+
210
+ with gr.TabItem("🏅 Node Regression Leaderboard", elem_id="llm-benchmark-tab-table", id=1):
211
+ COLS = COLS_NR
212
+ AutoEvalColumn = AutoEvalColumn_NodeRegression
213
+ original_df = get_leaderboard_df(EVAL_REQUESTS_PATH, "Node Regression")
214
+ leaderboard_df = original_df.copy()
215
+ with gr.Row():
216
+ with gr.Column():
217
+ with gr.Row():
218
+ search_bar = gr.Textbox(
219
+ placeholder=" 🔍 Search for your model (separate multiple queries with `;`) and press ENTER...",
220
+ show_label=False,
221
+ elem_id="search-bar",
222
+ )
223
+ with gr.Row():
224
+ shown_columns = gr.CheckboxGroup(
225
+ choices=[
226
+ c.name
227
+ for c in fields(AutoEvalColumn)
228
+ if not c.hidden and not c.never_hidden
229
+ ],
230
+ value=[
231
+ c.name
232
+ for c in fields(AutoEvalColumn)
233
+ if c.displayed_by_default and not c.hidden and not c.never_hidden
234
+ ],
235
+ label="Select columns to show",
236
+ elem_id="column-select",
237
+ interactive=True,
238
+ )
239
+
240
+ print(leaderboard_df)
241
+ print(fields(AutoEvalColumn))
242
+ leaderboard_table = gr.components.Dataframe(
243
+ value=leaderboard_df[
244
+ [c.name for c in fields(AutoEvalColumn) if c.never_hidden]
245
+ + shown_columns.value
246
+ ],
247
+ headers=[c.name for c in fields(AutoEvalColumn) if c.never_hidden] + shown_columns.value,
248
+ datatype=TYPES,
249
+ elem_id="leaderboard-table",
250
+ interactive=False,
251
+ visible=True,
252
+ )
253
+
254
+ # Dummy leaderboard for handling the case when the user uses backspace key
255
+ hidden_leaderboard_table_for_search = gr.components.Dataframe(
256
+ value=original_df[COLS],
257
+ headers=COLS,
258
+ datatype=TYPES,
259
+ visible=False,
260
+ )
261
+ search_bar.submit(
262
+ update_table,
263
+ [
264
+ hidden_leaderboard_table_for_search,
265
+ shown_columns,
266
+ search_bar,
267
+ ],
268
+ leaderboard_table,
269
+ )
270
+ for selector in [shown_columns]:
271
+ selector.change(
272
+ update_table,
273
+ [
274
+ hidden_leaderboard_table_for_search,
275
+ shown_columns,
276
+ search_bar,
277
+ ],
278
+ leaderboard_table,
279
+ queue=True,
280
+ )
281
+
282
+ with gr.TabItem("🏅 Link Prediction Leaderboard", elem_id="llm-benchmark-tab-table", id=2):
283
+ COLS = COLS_LP
284
+ AutoEvalColumn = AutoEvalColumn_LinkPrediction
285
+ original_df = get_leaderboard_df(EVAL_REQUESTS_PATH, "Link Prediction")
286
+ leaderboard_df = original_df.copy()
287
  with gr.Row():
288
  with gr.Column():
289
  with gr.Row():
 
296
  shown_columns = gr.CheckboxGroup(
297
  choices=[
298
  c.name
299
+ for c in fields(AutoEvalColumn)
300
  if not c.hidden and not c.never_hidden
301
  ],
302
  value=[
303
  c.name
304
+ for c in fields(AutoEvalColumn)
305
  if c.displayed_by_default and not c.hidden and not c.never_hidden
306
  ],
307
  label="Select columns to show",
 
310
  )
311
 
312
  print(leaderboard_df)
313
+ print(fields(AutoEvalColumn))
314
  leaderboard_table = gr.components.Dataframe(
315
  value=leaderboard_df[
316
+ [c.name for c in fields(AutoEvalColumn) if c.never_hidden]
317
  + shown_columns.value
318
  ],
319
+ headers=[c.name for c in fields(AutoEvalColumn) if c.never_hidden] + shown_columns.value,
320
  datatype=TYPES,
321
  elem_id="leaderboard-table",
322
  interactive=False,
 
325
 
326
  # Dummy leaderboard for handling the case when the user uses backspace key
327
  hidden_leaderboard_table_for_search = gr.components.Dataframe(
328
+ value=original_df[COLS],
329
+ headers=COLS,
330
  datatype=TYPES,
331
  visible=False,
332
  )
src/about.py CHANGED
@@ -21,17 +21,40 @@ NUM_FEWSHOT = 0 # Change with your few shot
21
  class nc_tasks(Enum):
22
  task0 = Task("rel-amazon/user-churn", "auroc", "user-churn")
23
  task1 = Task("rel-amazon/item-churn", "auroc", "item-churn")
24
- task2 = Task("rel-avito/user-clicks", "auroc", "user-clicks")
25
  task3 = Task("rel-avito/user-visits", "auroc", "user-visits")
26
- task4 = Task("rel-hm/user-churn", "auroc", "hm-user-churn")
27
- task5 = Task("rel-stack/user-badge", "auroc", "user-badge")
28
- task6 = Task("rel-stack/user-engagement", "auroc", "user-engagement")
29
  task7 = Task("rel-f1/driver-dnf", "auroc", "driver-dnf")
30
  task8 = Task("rel-f1/driver-top3", "auroc", "driver-top3")
 
 
 
31
  task9 = Task("rel-trial/study-outcome", "auroc", "study-outcome")
32
  task10 = Task("rel-event/user-repeat", "auroc", "user-repeat")
33
  task11 = Task("rel-event/user-ignore", "auroc", "user-ignore")
34
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
35
  # Your leaderboard name
36
  TITLE = """<p align="center"><img src="https://relbench.stanford.edu/img/logo.png" alt="logo" width="400px" /></p>"""
37
 
 
21
  class nc_tasks(Enum):
22
  task0 = Task("rel-amazon/user-churn", "auroc", "user-churn")
23
  task1 = Task("rel-amazon/item-churn", "auroc", "item-churn")
 
24
  task3 = Task("rel-avito/user-visits", "auroc", "user-visits")
25
+ task2 = Task("rel-avito/user-clicks", "auroc", "user-clicks")
 
 
26
  task7 = Task("rel-f1/driver-dnf", "auroc", "driver-dnf")
27
  task8 = Task("rel-f1/driver-top3", "auroc", "driver-top3")
28
+ task4 = Task("rel-hm/user-churn", "auroc", "hm-user-churn")
29
+ task6 = Task("rel-stack/user-engagement", "auroc", "user-engagement")
30
+ task5 = Task("rel-stack/user-badge", "auroc", "user-badge")
31
  task9 = Task("rel-trial/study-outcome", "auroc", "study-outcome")
32
  task10 = Task("rel-event/user-repeat", "auroc", "user-repeat")
33
  task11 = Task("rel-event/user-ignore", "auroc", "user-ignore")
34
 
35
+
36
+ class nr_tasks(Enum):
37
+ task0 = Task("rel-amazon/user-ltv", "mae", "user-ltv")
38
+ task1 = Task("rel-amazon/item-ltv", "mae", "item-ltv")
39
+ task3 = Task("rel-avito/ad-ctr", "mae", "ad-ctr")
40
+ task4 = Task("rel-f1/driver-position", "mae", "driver-position")
41
+ task5 = Task("rel-hm/item-sales", "mae", "item-sales")
42
+ task6 = Task("rel-stack/post-votes", "mae", "post-votes")
43
+ task7 = Task("rel-trial/study-adverse", "mae", "study-adverse")
44
+ task8 = Task("rel-trial/site-success", "mae", "site-success")
45
+ task9 = Task("rel-event/user-attendance", "mae", "user-attendance")
46
+
47
+ class lp_tasks(Enum):
48
+ task0 = Task("rel-amazon/user-item-purchase", "map", "user-item-purchase")
49
+ task1 = Task("rel-amazon/user-item-rate", "map", "user-item-rate")
50
+ task2 = Task("rel-amazon/user-item-review", "map", "user-item-review")
51
+ task3 = Task("rel-avito/user-ad-visit", "map", "user-ad-visit")
52
+ task4 = Task("rel-hm/user-item-purchase", "map", "hm-user-item-purchase")
53
+ task5 = Task("rel-stack/user-post-comment", "map", "user-post-comment")
54
+ task6 = Task("rel-stack/post-post-related", "map", "post-post-related")
55
+ task7 = Task("rel-trial/condition-sponsor-run", "map", "condition-sponsor-run")
56
+ task8 = Task("rel-trial/site-sponsor-run", "map", "site-sponsor-run")
57
+
58
  # Your leaderboard name
59
  TITLE = """<p align="center"><img src="https://relbench.stanford.edu/img/logo.png" alt="logo" width="400px" /></p>"""
60
 
src/display/utils.py CHANGED
@@ -3,7 +3,7 @@ from enum import Enum
3
 
4
  import pandas as pd
5
 
6
- from src.about import Tasks, nc_tasks
7
 
8
  def fields(raw_class):
9
  return [v for k, v in raw_class.__dict__.items() if k[:2] != "__" and k[-2:] != "__"]
@@ -58,6 +58,37 @@ auto_eval_column_dict_nc.append(["num_of_Params", ColumnContent, ColumnContent("
58
 
59
  AutoEvalColumn_NodeClassification = make_dataclass("AutoEvalColumn_NodeClassification", auto_eval_column_dict_nc, frozen=True)
60
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
61
  ## For the queue columns in the submission tab
62
  @dataclass(frozen=True)
63
  class EvalQueueColumn: # Queue column
@@ -182,6 +213,9 @@ class Precision(Enum):
182
  # Column selection
183
  COLS = [c.name for c in fields(AutoEvalColumn) if not c.hidden]
184
  COLS_NC = [c.name for c in fields(AutoEvalColumn_NodeClassification) if not c.hidden]
 
 
 
185
  TYPES = [c.type for c in fields(AutoEvalColumn) if not c.hidden]
186
  COLS_LITE = [c.name for c in fields(AutoEvalColumn) if c.displayed_by_default and not c.hidden]
187
  TYPES_LITE = [c.type for c in fields(AutoEvalColumn) if c.displayed_by_default and not c.hidden]
 
3
 
4
  import pandas as pd
5
 
6
+ from src.about import Tasks, nc_tasks, nr_tasks, lp_tasks
7
 
8
  def fields(raw_class):
9
  return [v for k, v in raw_class.__dict__.items() if k[:2] != "__" and k[-2:] != "__"]
 
58
 
59
  AutoEvalColumn_NodeClassification = make_dataclass("AutoEvalColumn_NodeClassification", auto_eval_column_dict_nc, frozen=True)
60
 
61
+
62
+ auto_eval_column_dict_nr = []
63
+ auto_eval_column_dict_nr.append(["model", ColumnContent, ColumnContent("Model", "markdown", True, never_hidden=True)])
64
+ auto_eval_column_dict_nr.append(["average_rank", ColumnContent, ColumnContent("Average Rank⬆️", "number", True)])
65
+ for task in nr_tasks:
66
+ auto_eval_column_dict_nr.append(['_'.join(task.value.col_name.split('-')), ColumnContent, ColumnContent(task.value.col_name, "number", True)])
67
+ auto_eval_column_dict_nr.append(["author", ColumnContent, ColumnContent("Author", "markdown", True, never_hidden=False)])
68
+ auto_eval_column_dict_nr.append(["email", ColumnContent, ColumnContent("Email", "markdown", True, never_hidden=False)])
69
+ auto_eval_column_dict_nr.append(["Paper_URL", ColumnContent, ColumnContent("Paper URL", "markdown", True, never_hidden=False)])
70
+ auto_eval_column_dict_nr.append(["Github_URL", ColumnContent, ColumnContent("Github URL", "markdown", True, never_hidden=False)])
71
+ auto_eval_column_dict_nr.append(["Time", ColumnContent, ColumnContent("Time", "markdown", True, never_hidden=False)])
72
+ auto_eval_column_dict_nr.append(["num_of_Params", ColumnContent, ColumnContent("# of Params", "markdown", True, never_hidden=False)])
73
+
74
+ AutoEvalColumn_NodeRegression = make_dataclass("AutoEvalColumn_NodeRegression", auto_eval_column_dict_nr, frozen=True)
75
+
76
+
77
+ auto_eval_column_dict_lp = []
78
+ auto_eval_column_dict_lp.append(["model", ColumnContent, ColumnContent("Model", "markdown", True, never_hidden=True)])
79
+ auto_eval_column_dict_lp.append(["average_rank", ColumnContent, ColumnContent("Average Rank⬆️", "number", True)])
80
+ for task in lp_tasks:
81
+ auto_eval_column_dict_lp.append(['_'.join(task.value.col_name.split('-')), ColumnContent, ColumnContent(task.value.col_name, "number", True)])
82
+ auto_eval_column_dict_lp.append(["author", ColumnContent, ColumnContent("Author", "markdown", True, never_hidden=False)])
83
+ auto_eval_column_dict_lp.append(["email", ColumnContent, ColumnContent("Email", "markdown", True, never_hidden=False)])
84
+ auto_eval_column_dict_lp.append(["Paper_URL", ColumnContent, ColumnContent("Paper URL", "markdown", True, never_hidden=False)])
85
+ auto_eval_column_dict_lp.append(["Github_URL", ColumnContent, ColumnContent("Github URL", "markdown", True, never_hidden=False)])
86
+ auto_eval_column_dict_lp.append(["Time", ColumnContent, ColumnContent("Time", "markdown", True, never_hidden=False)])
87
+ auto_eval_column_dict_lp.append(["num_of_Params", ColumnContent, ColumnContent("# of Params", "markdown", True, never_hidden=False)])
88
+
89
+ AutoEvalColumn_LinkPrediction = make_dataclass("AutoEvalColumn_LinkPrediction", auto_eval_column_dict_lp, frozen=True)
90
+
91
+
92
  ## For the queue columns in the submission tab
93
  @dataclass(frozen=True)
94
  class EvalQueueColumn: # Queue column
 
213
  # Column selection
214
  COLS = [c.name for c in fields(AutoEvalColumn) if not c.hidden]
215
  COLS_NC = [c.name for c in fields(AutoEvalColumn_NodeClassification) if not c.hidden]
216
+ COLS_NR = [c.name for c in fields(AutoEvalColumn_NodeRegression) if not c.hidden]
217
+ COLS_LP = [c.name for c in fields(AutoEvalColumn_LinkPrediction) if not c.hidden]
218
+
219
  TYPES = [c.type for c in fields(AutoEvalColumn) if not c.hidden]
220
  COLS_LITE = [c.name for c in fields(AutoEvalColumn) if c.displayed_by_default and not c.hidden]
221
  TYPES_LITE = [c.type for c in fields(AutoEvalColumn) if c.displayed_by_default and not c.hidden]
src/populate.py CHANGED
@@ -6,7 +6,11 @@ import pandas as pd
6
  from src.display.formatting import has_no_nan_values, make_clickable_model
7
  from src.display.utils import AutoEvalColumn, EvalQueueColumn
8
  from src.leaderboard.read_evals import get_raw_eval_results
9
-
 
 
 
 
10
 
11
  '''
12
  def get_leaderboard_df(results_path: str, requests_path: str, cols: list, benchmark_cols: list) -> pd.DataFrame:
@@ -23,7 +27,16 @@ def get_leaderboard_df(results_path: str, requests_path: str, cols: list, benchm
23
  return raw_data, df
24
  '''
25
 
26
- def get_leaderboard_df(EVAL_REQUESTS_PATH, tasks) -> pd.DataFrame:
 
 
 
 
 
 
 
 
 
27
 
28
  model_result_filepaths = []
29
  for root,_, files in os.walk(EVAL_REQUESTS_PATH):
@@ -36,7 +49,9 @@ def get_leaderboard_df(EVAL_REQUESTS_PATH, tasks) -> pd.DataFrame:
36
  for model in model_result_filepaths:
37
  import json
38
  with open(model) as f:
39
- model_res.append(json.load(f))
 
 
40
 
41
  for model in model_res:
42
  model["test"] = literal_eval(model["test"])
@@ -61,7 +76,7 @@ def get_leaderboard_df(EVAL_REQUESTS_PATH, tasks) -> pd.DataFrame:
61
 
62
  #df_res = pd.DataFrame([{col: model[col] for col in columns_to_show} for model in model_res])
63
  print(df_res)
64
- ranks = df_res[list(name2short_name.values())].rank(ascending = False)
65
  df_res.rename(columns={'model': 'Model', 'author': 'Author', 'email': 'Email', 'paper_url': 'Paper URL', 'github_url': 'Github URL', 'submitted_time': 'Time', 'params': '# of Params'}, inplace=True)
66
  df_res['Average Rank⬆️'] = ranks.mean(axis=1)
67
  df_res.sort_values(by='Average Rank⬆️', ascending=True, inplace=True)
 
6
  from src.display.formatting import has_no_nan_values, make_clickable_model
7
  from src.display.utils import AutoEvalColumn, EvalQueueColumn
8
  from src.leaderboard.read_evals import get_raw_eval_results
9
+ from src.about import (
10
+ nc_tasks,
11
+ nr_tasks,
12
+ lp_tasks,
13
+ )
14
 
15
  '''
16
  def get_leaderboard_df(results_path: str, requests_path: str, cols: list, benchmark_cols: list) -> pd.DataFrame:
 
27
  return raw_data, df
28
  '''
29
 
30
+ def get_leaderboard_df(EVAL_REQUESTS_PATH, task_type) -> pd.DataFrame:
31
+ if task_type == 'Node Classification':
32
+ ascending = False
33
+ tasks = nc_tasks
34
+ elif task_type == 'Node Regression':
35
+ ascending = True
36
+ tasks = nr_tasks
37
+ elif task_type == 'Link Prediction':
38
+ ascending = False
39
+ tasks = lp_tasks
40
 
41
  model_result_filepaths = []
42
  for root,_, files in os.walk(EVAL_REQUESTS_PATH):
 
49
  for model in model_result_filepaths:
50
  import json
51
  with open(model) as f:
52
+ out = json.load(f)
53
+ if ('task' in out) and (out['task'] == task_type):
54
+ model_res.append(out)
55
 
56
  for model in model_res:
57
  model["test"] = literal_eval(model["test"])
 
76
 
77
  #df_res = pd.DataFrame([{col: model[col] for col in columns_to_show} for model in model_res])
78
  print(df_res)
79
+ ranks = df_res[list(name2short_name.values())].rank(ascending = ascending)
80
  df_res.rename(columns={'model': 'Model', 'author': 'Author', 'email': 'Email', 'paper_url': 'Paper URL', 'github_url': 'Github URL', 'submitted_time': 'Time', 'params': '# of Params'}, inplace=True)
81
  df_res['Average Rank⬆️'] = ranks.mean(axis=1)
82
  df_res.sort_values(by='Average Rank⬆️', ascending=True, inplace=True)
src/submission/submit.py CHANGED
@@ -44,7 +44,14 @@ def add_new_eval(
44
  if not REQUESTED_MODELS:
45
  REQUESTED_MODELS, USERS_TO_SUBMISSION_DATES = already_submitted_models(EVAL_REQUESTS_PATH)
46
 
47
- model_path = model
 
 
 
 
 
 
 
48
 
49
  #precision = precision.split(" ")[0]
50
  current_time = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
 
44
  if not REQUESTED_MODELS:
45
  REQUESTED_MODELS, USERS_TO_SUBMISSION_DATES = already_submitted_models(EVAL_REQUESTS_PATH)
46
 
47
+ if task_track == 'Node Classification':
48
+ task_type = 'nc'
49
+ elif task_track == 'Node Regression':
50
+ task_type = 'nr'
51
+ elif task_track == 'Link Prediction':
52
+ task_type = 'lp'
53
+
54
+ model_path = model + '_' + task_type
55
 
56
  #precision = precision.split(" ")[0]
57
  current_time = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")