hSterz commited on
Commit
b2b2434
Β·
1 Parent(s): 0318f2d
Files changed (2) hide show
  1. app.py +1 -1
  2. src/display/utils.py +1 -2
app.py CHANGED
@@ -99,7 +99,7 @@ with demo:
99
  with gr.TabItem("πŸ… 1 Correct", elem_id="llm-benchmark-tab-table", id=0):
100
  leaderboard = init_leaderboard(LEADERBOARD_DF)
101
 
102
- with gr.TabItem("πŸ… 1 Correct with Option Variations", elem_id="llm-benchmark-tab-table", id=4):
103
  leaderboard = init_leaderboard(LEADERBOARD_DF_1_CORRECT_VAR)
104
 
105
  with gr.TabItem("πŸ… N Correct", elem_id="llm-benchmark-tab-table", id=1):
 
99
  with gr.TabItem("πŸ… 1 Correct", elem_id="llm-benchmark-tab-table", id=0):
100
  leaderboard = init_leaderboard(LEADERBOARD_DF)
101
 
102
+ with gr.TabItem("πŸ… 1 Correct + Variations", elem_id="llm-benchmark-tab-table", id=4):
103
  leaderboard = init_leaderboard(LEADERBOARD_DF_1_CORRECT_VAR)
104
 
105
  with gr.TabItem("πŸ… N Correct", elem_id="llm-benchmark-tab-table", id=1):
src/display/utils.py CHANGED
@@ -24,6 +24,7 @@ class ColumnContent:
24
  auto_eval_column_dict = []
25
  # Init
26
  auto_eval_column_dict.append(["model", ColumnContent, ColumnContent("Model", "markdown", True, never_hidden=True)])
 
27
  #Scores
28
  auto_eval_column_dict.append(["average", ColumnContent, ColumnContent("Average ⬆️", "number", True)])
29
  for task in Tasks:
@@ -35,8 +36,6 @@ for task in Detail_Tasks:
35
  auto_eval_column_dict.append(["architecture", ColumnContent, ColumnContent("Architecture", "str", False)])
36
  auto_eval_column_dict.append(["revision", ColumnContent, ColumnContent("Model sha", "str", False, False)])
37
 
38
-
39
- auto_eval_column_dict.append(["output_format", ColumnContent, ColumnContent("Output Format", "str", True)])
40
  auto_eval_column_dict.append(["dataset_version", ColumnContent, ColumnContent("Task Version", "str", False, False)])
41
 
42
  # We use make dataclass to dynamically fill the scores from Tasks
 
24
  auto_eval_column_dict = []
25
  # Init
26
  auto_eval_column_dict.append(["model", ColumnContent, ColumnContent("Model", "markdown", True, never_hidden=True)])
27
+ auto_eval_column_dict.append(["output_format", ColumnContent, ColumnContent("Output Format", "str", True)])
28
  #Scores
29
  auto_eval_column_dict.append(["average", ColumnContent, ColumnContent("Average ⬆️", "number", True)])
30
  for task in Tasks:
 
36
  auto_eval_column_dict.append(["architecture", ColumnContent, ColumnContent("Architecture", "str", False)])
37
  auto_eval_column_dict.append(["revision", ColumnContent, ColumnContent("Model sha", "str", False, False)])
38
 
 
 
39
  auto_eval_column_dict.append(["dataset_version", ColumnContent, ColumnContent("Task Version", "str", False, False)])
40
 
41
  # We use make dataclass to dynamically fill the scores from Tasks