margsli commited on
Commit
42ba245
β€’
1 Parent(s): e49e879

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +34 -189
app.py CHANGED
@@ -29,7 +29,11 @@ def make_arena_leaderboard_md(arena_df):
29
  total_models = len(arena_df)
30
  space = "   "
31
  leaderboard_md = f"""
32
- Total #models: **{total_models}**.{space} Total #votes: **{"{:,}".format(total_votes)}**.{space} Last updated: June 1, 2024.
 
 
 
 
33
 
34
  """
35
  return leaderboard_md
@@ -45,14 +49,6 @@ def make_category_arena_leaderboard_md(arena_df, arena_subset_df, name="Overall"
45
  """
46
  return leaderboard_md
47
 
48
- def make_full_leaderboard_md(elo_results):
49
- leaderboard_md = f"""
50
- Three benchmarks are displayed: **Test Task 1**, **Test Task 2**, **Test Task 3**.
51
-
52
- Higher values are better for all benchmarks.
53
- """
54
- return leaderboard_md
55
-
56
 
57
  def make_leaderboard_md_live(elo_results):
58
  leaderboard_md = f"""
@@ -96,25 +92,11 @@ def update_elo_components(max_num_files, elo_results_file):
96
  basic_component_values[5] = md4
97
 
98
 
99
- def update_worker(max_num_files, interval, elo_results_file):
100
- while True:
101
- tic = time.time()
102
- update_elo_components(max_num_files, elo_results_file)
103
- durtaion = time.time() - tic
104
- print(f"update duration: {durtaion:.2f} s")
105
- time.sleep(max(interval - durtaion, 0))
106
-
107
-
108
- def load_demo(url_params, request: gr.Request):
109
- logger.info(f"load_demo. ip: {request.client.host}. params: {url_params}")
110
- return basic_component_values + leader_component_values
111
-
112
-
113
  def model_hyperlink(model_name, link):
114
  return f'<a target="_blank" href="{link}" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">{model_name}</a>'
115
 
116
 
117
- def load_leaderboard_table_csv(filename, add_hyperlink=True):
118
  lines = open(filename).readlines()
119
  heads = [v.strip() for v in lines[0].split(",")]
120
  rows = []
@@ -180,9 +162,7 @@ def get_full_table(model_table_df):
180
  row.append(model_name)
181
  row.append(np.nan)
182
  row.append(np.nan)
183
- row.append(np.nan)
184
- # row.append(model_table_df.iloc[i]["MT-bench (score)"])
185
- # row.append(model_table_df.iloc[i]["MMLU"])
186
  # Organization
187
  row.append(model_table_df.iloc[i]["Organization"])
188
  # license
@@ -192,86 +172,6 @@ def get_full_table(model_table_df):
192
  values.sort(key=lambda x: -x[1] if not np.isnan(x[1]) else 1e9)
193
  return values
194
 
195
- def create_ranking_str(ranking, ranking_difference):
196
- if ranking_difference > 0:
197
- # return f"{int(ranking)} (\u2191{int(ranking_difference)})"
198
- return f"{int(ranking)} \u2191"
199
- elif ranking_difference < 0:
200
- # return f"{int(ranking)} (\u2193{int(-ranking_difference)})"
201
- return f"{int(ranking)} \u2193"
202
- else:
203
- return f"{int(ranking)}"
204
-
205
- def recompute_final_ranking(arena_df):
206
- # compute ranking based on CI
207
- ranking = {}
208
- for i, model_a in enumerate(arena_df.index):
209
- ranking[model_a] = 1
210
- for j, model_b in enumerate(arena_df.index):
211
- if i == j:
212
- continue
213
- if arena_df.loc[model_b]["rating_q025"] > arena_df.loc[model_a]["rating_q975"]:
214
- ranking[model_a] += 1
215
- return list(ranking.values())
216
-
217
- def get_arena_table(arena_df, model_table_df, arena_subset_df=None):
218
- arena_df = arena_df.sort_values(by=["final_ranking", "rating"], ascending=[True, False])
219
- arena_df["final_ranking"] = recompute_final_ranking(arena_df)
220
- arena_df = arena_df.sort_values(by=["final_ranking"], ascending=True)
221
-
222
- # arena_df["final_ranking"] = range(1, len(arena_df) + 1)
223
- # sort by rating
224
- if arena_subset_df is not None:
225
- # filter out models not in the arena_df
226
- arena_subset_df = arena_subset_df[arena_subset_df.index.isin(arena_df.index)]
227
- arena_subset_df = arena_subset_df.sort_values(by=["rating"], ascending=False)
228
- # arena_subset_df = arena_subset_df.sort_values(by=["final_ranking"], ascending=True)
229
- arena_subset_df["final_ranking"] = recompute_final_ranking(arena_subset_df)
230
- # keep only the models in the subset in arena_df and recompute final_ranking
231
- arena_df = arena_df[arena_df.index.isin(arena_subset_df.index)]
232
- # recompute final ranking
233
- arena_df["final_ranking"] = recompute_final_ranking(arena_df)
234
-
235
- # assign ranking by the order
236
- arena_subset_df["final_ranking_no_tie"] = range(1, len(arena_subset_df) + 1)
237
- arena_df["final_ranking_no_tie"] = range(1, len(arena_df) + 1)
238
- # join arena_df and arena_subset_df on index
239
- arena_df = arena_subset_df.join(arena_df["final_ranking"], rsuffix="_global", how="inner")
240
- arena_df["ranking_difference"] = arena_df["final_ranking_global"] - arena_df["final_ranking"]
241
-
242
- arena_df = arena_df.sort_values(by=["final_ranking", "rating"], ascending=[True, False])
243
- arena_df["final_ranking"] = arena_df.apply(lambda x: create_ranking_str(x["final_ranking"], x["ranking_difference"]), axis=1)
244
-
245
- values = []
246
- for i in range(len(arena_df)):
247
- row = []
248
- model_key = arena_df.index[i]
249
- try: # this is a janky fix for where the model key is not in the model table (model table and arena table dont contain all the same models)
250
- model_name = model_table_df[model_table_df["key"] == model_key]["Model"].values[
251
- 0
252
- ]
253
- # rank
254
- ranking = arena_df.iloc[i].get("final_ranking") or i+1
255
- row.append(ranking)
256
- if arena_subset_df is not None:
257
- row.append(arena_df.iloc[i].get("ranking_difference") or 0)
258
- # model display name
259
- row.append(model_name)
260
- # elo rating
261
- row.append(round(arena_df.iloc[i]["rating"]))
262
- # Organization
263
- row.append(
264
- model_table_df[model_table_df["key"] == model_key]["Organization"].values[0]
265
- )
266
- # license
267
- row.append(
268
- model_table_df[model_table_df["key"] == model_key]["License"].values[0]
269
- )
270
- values.append(row)
271
- except Exception as e:
272
- print(f"{model_key} - {e}")
273
- return values
274
-
275
  key_to_category_name = {
276
  "full": "Overall",
277
  }
@@ -304,9 +204,8 @@ def build_leaderboard_tab(results_file, leaderboard_table_file, show_plot=False)
304
  model_table_df = pd.DataFrame(data)
305
 
306
  with gr.Tabs() as tabs:
307
- # arena table
308
  arena_table_vals = get_full_table(model_table_df)
309
- with gr.Tab("Arena Elo", id=0):
310
  md = make_arena_leaderboard_md(arena_df)
311
  leaderboard_markdown = gr.Markdown(md, elem_id="leaderboard_markdown")
312
  with gr.Row():
@@ -350,40 +249,6 @@ def build_leaderboard_tab(results_file, leaderboard_table_file, show_plot=False)
350
 
351
  leader_component_values[:] = [default_md]
352
 
353
- # with gr.Tab("Full Leaderboard", id=0):
354
- # md = make_full_leaderboard_md(elo_results)
355
- # gr.Markdown(md, elem_id="leaderboard_markdown")
356
- # with gr.Row():
357
- # with gr.Column(scale=2):
358
- # category_dropdown = gr.Dropdown(choices=list(arena_dfs.keys()), label="Category", value="Overall")
359
- # default_category_details = make_category_arena_leaderboard_md(arena_df, arena_df, name="Overall")
360
- # with gr.Column(scale=4, variant="panel"):
361
- # category_deets = gr.Markdown(default_category_details, elem_id="category_deets")
362
-
363
- # full_table_vals = get_full_table(model_table_df)
364
- # display_df = gr.Dataframe(
365
- # headers=[
366
- # "πŸ€– Model",
367
- # "⭐ Task 1",
368
- # "πŸ“ˆ Task 2",
369
- # "πŸ“š Task 3",
370
- # "Organization",
371
- # "License",
372
- # ],
373
- # datatype=["markdown", "number", "number", "number", "str", "str"],
374
- # value=full_table_vals,
375
- # elem_id="full_leaderboard_dataframe",
376
- # column_widths=[200, 100, 100, 100, 150, 150],
377
- # height=700,
378
- # wrap=True,
379
- # )
380
- # gr.Markdown(
381
- # f"""Note: .
382
- # """,
383
- # elem_id="leaderboard_markdown"
384
- # )
385
-
386
- # leader_component_values[:] = [default_md]
387
  if not show_plot:
388
  gr.Markdown(
389
  """ ## Submit your model [here]().
@@ -394,7 +259,7 @@ def build_leaderboard_tab(results_file, leaderboard_table_file, show_plot=False)
394
  pass
395
 
396
  def update_leaderboard_df(arena_table_vals):
397
- elo_datarame = pd.DataFrame(arena_table_vals, columns=[ "Rank", "πŸ€– Model", "⭐ Arena Elo", "Organization", "License"])
398
 
399
  # goal: color the rows based on the rank with styler
400
  def highlight_max(s):
@@ -414,51 +279,31 @@ def build_leaderboard_tab(results_file, leaderboard_table_file, show_plot=False)
414
  arena_values = get_arena_table(arena_df, model_table_df, arena_subset_df = arena_subset_df if category != "Overall" else None)
415
  if category != "Overall":
416
  arena_values = update_leaderboard_df(arena_values)
417
- arena_values = gr.Dataframe(
418
- headers=[
419
- "Rank",
420
- "πŸ€– Model",
421
- "⭐ Arena Elo",
422
- "Organization",
423
- "License",
424
- ],
425
- datatype=[
426
- "number",
427
- "markdown",
428
- "number",
429
- "str",
430
- "str",
431
- ],
432
- value=arena_values,
433
- elem_id="arena_leaderboard_dataframe",
434
- height=700,
435
- column_widths=[60, 190, 110, 160, 150, 140],
436
- wrap=True,
437
- )
438
- else:
439
- arena_values = gr.Dataframe(
440
- headers=[
441
- "Rank",
442
- "πŸ€– Model",
443
- "⭐ Arena Elo",
444
- "Organization",
445
- "License",
446
- ],
447
- datatype=[
448
- "number",
449
- "markdown",
450
- "number",
451
- "str",
452
- "str",
453
- ],
454
- value=arena_values,
455
- elem_id="arena_leaderboard_dataframe",
456
- height=700,
457
- column_widths=[70, 190, 110, 160, 150, 140],
458
- wrap=True,
459
- )
460
-
461
-
462
  leaderboard_md = make_category_arena_leaderboard_md(arena_df, arena_subset_df, name=category)
463
  return arena_values, leaderboard_md
464
 
 
29
  total_models = len(arena_df)
30
  space = "&nbsp;&nbsp;&nbsp;"
31
  leaderboard_md = f"""
32
+ Three benchmarks are displayed: **Test Task 1**, **Test Task 2**, **Test Task 3**.
33
+
34
+ Higher values are better for all benchmarks.
35
+
36
+ Total #models: **{total_models}**.{space} Last updated: June 1, 2024.
37
 
38
  """
39
  return leaderboard_md
 
49
  """
50
  return leaderboard_md
51
 
 
 
 
 
 
 
 
 
52
 
53
  def make_leaderboard_md_live(elo_results):
54
  leaderboard_md = f"""
 
92
  basic_component_values[5] = md4
93
 
94
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
95
  def model_hyperlink(model_name, link):
96
  return f'<a target="_blank" href="{link}" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">{model_name}</a>'
97
 
98
 
99
+ def load_leaderboard_table_csv(filename, add_hyperlink=False):
100
  lines = open(filename).readlines()
101
  heads = [v.strip() for v in lines[0].split(",")]
102
  rows = []
 
162
  row.append(model_name)
163
  row.append(np.nan)
164
  row.append(np.nan)
165
+ row.append(np.nan)\
 
 
166
  # Organization
167
  row.append(model_table_df.iloc[i]["Organization"])
168
  # license
 
172
  values.sort(key=lambda x: -x[1] if not np.isnan(x[1]) else 1e9)
173
  return values
174
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
175
  key_to_category_name = {
176
  "full": "Overall",
177
  }
 
204
  model_table_df = pd.DataFrame(data)
205
 
206
  with gr.Tabs() as tabs:
 
207
  arena_table_vals = get_full_table(model_table_df)
208
+ with gr.Tab("Full leaderboard", id=0):
209
  md = make_arena_leaderboard_md(arena_df)
210
  leaderboard_markdown = gr.Markdown(md, elem_id="leaderboard_markdown")
211
  with gr.Row():
 
249
 
250
  leader_component_values[:] = [default_md]
251
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
252
  if not show_plot:
253
  gr.Markdown(
254
  """ ## Submit your model [here]().
 
259
  pass
260
 
261
  def update_leaderboard_df(arena_table_vals):
262
+ elo_datarame = pd.DataFrame(arena_table_vals, columns=["Rank", "πŸ€– Model", "⭐ Task 1", "πŸ“ˆ Task 2", "πŸ“š Task 3", "Organization", "License"])
263
 
264
  # goal: color the rows based on the rank with styler
265
  def highlight_max(s):
 
279
  arena_values = get_arena_table(arena_df, model_table_df, arena_subset_df = arena_subset_df if category != "Overall" else None)
280
  if category != "Overall":
281
  arena_values = update_leaderboard_df(arena_values)
282
+ arena_values = gr.Dataframe(
283
+ headers=[
284
+ "Rank",
285
+ "πŸ€– Model",
286
+ "⭐ Task 1",
287
+ "πŸ“ˆ Task 2",
288
+ "πŸ“š Task 3",
289
+ "Organization",
290
+ "License",
291
+ ],
292
+ datatype=[
293
+ "number",
294
+ "markdown",
295
+ "number",
296
+ "number",
297
+ "number",
298
+ "str",
299
+ "str",
300
+ ],
301
+ value=arena_values,
302
+ elem_id="arena_leaderboard_dataframe",
303
+ height=700,
304
+ column_widths=[70, 190, 110, 110, 110, 150, 140],
305
+ wrap=True,
306
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
307
  leaderboard_md = make_category_arena_leaderboard_md(arena_df, arena_subset_df, name=category)
308
  return arena_values, leaderboard_md
309