eduagarcia commited on
Commit
1b2e131
1 Parent(s): eef299c

show baseline

Browse files
src/display/utils.py CHANGED
@@ -100,7 +100,7 @@ for task in Tasks:
100
  auto_eval_column_dict.append(["model_type", ColumnContent, ColumnContent("Type", "str", False)])
101
  auto_eval_column_dict.append(["architecture", ColumnContent, ColumnContent("Architecture", "str", False)])
102
  auto_eval_column_dict.append(["weight_type", ColumnContent, ColumnContent("Weight type", "str", False, True)])
103
- auto_eval_column_dict.append(["precision", ColumnContent, ColumnContent("Precision", "str", False)])
104
  auto_eval_column_dict.append(["merged", ColumnContent, ColumnContent("Merged", "bool", False)])
105
  auto_eval_column_dict.append(["license", ColumnContent, ColumnContent("Hub License", "str", False)])
106
  auto_eval_column_dict.append(["params", ColumnContent, ColumnContent("#Params (B)", "number", False)])
@@ -128,7 +128,7 @@ class EvalQueueColumn: # Queue column
128
  baseline_row = {
129
  AutoEvalColumn.model.name: "<p>Baseline</p>",
130
  AutoEvalColumn.revision.name: "N/A",
131
- AutoEvalColumn.precision.name: None,
132
  AutoEvalColumn.merged.name: False,
133
  #AutoEvalColumn.average.name: 31.0,
134
  #AutoEvalColumn.arc.name: 25.0,
@@ -140,7 +140,7 @@ baseline_row = {
140
  AutoEvalColumn.dummy.name: "baseline",
141
  AutoEvalColumn.model_type.name: "",
142
  AutoEvalColumn.flagged.name: False,
143
- AutoEvalColumn.model_type_symbol.name: None,
144
  AutoEvalColumn.architecture.name: None,
145
  AutoEvalColumn.weight_type.name: None,
146
  AutoEvalColumn.params.name: 0,
@@ -152,7 +152,7 @@ baseline_row = {
152
 
153
  baseline_list = []
154
  for task in Tasks:
155
- baseline_row[task.name] = task.value.baseline
156
  if task.value.baseline is not None:
157
  baseline_list.append(task.value.baseline)
158
  baseline_row[AutoEvalColumn.average.name] = round(sum(baseline_list) / len(baseline_list), 2)
@@ -168,7 +168,7 @@ baseline_row[AutoEvalColumn.average.name] = round(sum(baseline_list) / len(basel
168
  human_baseline_row = {
169
  AutoEvalColumn.model.name: "<p>Human performance</p>",
170
  AutoEvalColumn.revision.name: "N/A",
171
- AutoEvalColumn.precision.name: None,
172
  #AutoEvalColumn.average.name: 92.75,
173
  AutoEvalColumn.merged.name: False,
174
  #AutoEvalColumn.arc.name: 80.0,
@@ -180,7 +180,7 @@ human_baseline_row = {
180
  AutoEvalColumn.dummy.name: "human_baseline",
181
  AutoEvalColumn.model_type.name: "",
182
  AutoEvalColumn.flagged.name: False,
183
- AutoEvalColumn.model_type_symbol.name: None,
184
  AutoEvalColumn.architecture.name: None,
185
  AutoEvalColumn.weight_type.name: None,
186
  AutoEvalColumn.params.name: 0,
@@ -192,7 +192,7 @@ human_baseline_row = {
192
 
193
  baseline_list = []
194
  for task in Tasks:
195
- human_baseline_row[task.name] = task.value.human_baseline
196
  if task.value.human_baseline is not None:
197
  baseline_list.append(task.value.human_baseline)
198
  human_baseline_row[AutoEvalColumn.average.name] = round(sum(baseline_list) / len(baseline_list), 2)
 
100
  auto_eval_column_dict.append(["model_type", ColumnContent, ColumnContent("Type", "str", False)])
101
  auto_eval_column_dict.append(["architecture", ColumnContent, ColumnContent("Architecture", "str", False)])
102
  auto_eval_column_dict.append(["weight_type", ColumnContent, ColumnContent("Weight type", "str", False, True)])
103
+ auto_eval_column_dict.append(["precision", ColumnContent, ColumnContent("Precision", "str", True)])
104
  auto_eval_column_dict.append(["merged", ColumnContent, ColumnContent("Merged", "bool", False)])
105
  auto_eval_column_dict.append(["license", ColumnContent, ColumnContent("Hub License", "str", False)])
106
  auto_eval_column_dict.append(["params", ColumnContent, ColumnContent("#Params (B)", "number", False)])
 
128
  baseline_row = {
129
  AutoEvalColumn.model.name: "<p>Baseline</p>",
130
  AutoEvalColumn.revision.name: "N/A",
131
+ AutoEvalColumn.precision.name: "?",
132
  AutoEvalColumn.merged.name: False,
133
  #AutoEvalColumn.average.name: 31.0,
134
  #AutoEvalColumn.arc.name: 25.0,
 
140
  AutoEvalColumn.dummy.name: "baseline",
141
  AutoEvalColumn.model_type.name: "",
142
  AutoEvalColumn.flagged.name: False,
143
+ AutoEvalColumn.model_type_symbol.name: "?",
144
  AutoEvalColumn.architecture.name: None,
145
  AutoEvalColumn.weight_type.name: None,
146
  AutoEvalColumn.params.name: 0,
 
152
 
153
  baseline_list = []
154
  for task in Tasks:
155
+ baseline_row[task.value.col_name] = task.value.baseline
156
  if task.value.baseline is not None:
157
  baseline_list.append(task.value.baseline)
158
  baseline_row[AutoEvalColumn.average.name] = round(sum(baseline_list) / len(baseline_list), 2)
 
168
  human_baseline_row = {
169
  AutoEvalColumn.model.name: "<p>Human performance</p>",
170
  AutoEvalColumn.revision.name: "N/A",
171
+ AutoEvalColumn.precision.name: "?",
172
  #AutoEvalColumn.average.name: 92.75,
173
  AutoEvalColumn.merged.name: False,
174
  #AutoEvalColumn.arc.name: 80.0,
 
180
  AutoEvalColumn.dummy.name: "human_baseline",
181
  AutoEvalColumn.model_type.name: "",
182
  AutoEvalColumn.flagged.name: False,
183
+ AutoEvalColumn.model_type_symbol.name: "?",
184
  AutoEvalColumn.architecture.name: None,
185
  AutoEvalColumn.weight_type.name: None,
186
  AutoEvalColumn.params.name: 0,
 
192
 
193
  baseline_list = []
194
  for task in Tasks:
195
+ human_baseline_row[task.value.col_name] = task.value.human_baseline
196
  if task.value.human_baseline is not None:
197
  baseline_list.append(task.value.human_baseline)
198
  human_baseline_row[AutoEvalColumn.average.name] = round(sum(baseline_list) / len(baseline_list), 2)
src/leaderboard/read_evals.py CHANGED
@@ -206,7 +206,6 @@ def get_raw_eval_results(results_path: str, requests_path: str, dynamic_path: st
206
  eval_results = {}
207
  for model_result_filepath in model_result_filepaths:
208
  # Creation of result
209
- print(model_result_filepath)
210
  eval_result = EvalResult.init_from_json_file(model_result_filepath)
211
  eval_result.update_with_request_file(requests_path)
212
  if eval_result.full_model in dynamic_data:
 
206
  eval_results = {}
207
  for model_result_filepath in model_result_filepaths:
208
  # Creation of result
 
209
  eval_result = EvalResult.init_from_json_file(model_result_filepath)
210
  eval_result.update_with_request_file(requests_path)
211
  if eval_result.full_model in dynamic_data: