Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
eduagarcia
commited on
Commit
•
1b2e131
1
Parent(s):
eef299c
show baseline
Browse files- src/display/utils.py +7 -7
- src/leaderboard/read_evals.py +0 -1
src/display/utils.py
CHANGED
@@ -100,7 +100,7 @@ for task in Tasks:
|
|
100 |
auto_eval_column_dict.append(["model_type", ColumnContent, ColumnContent("Type", "str", False)])
|
101 |
auto_eval_column_dict.append(["architecture", ColumnContent, ColumnContent("Architecture", "str", False)])
|
102 |
auto_eval_column_dict.append(["weight_type", ColumnContent, ColumnContent("Weight type", "str", False, True)])
|
103 |
-
auto_eval_column_dict.append(["precision", ColumnContent, ColumnContent("Precision", "str",
|
104 |
auto_eval_column_dict.append(["merged", ColumnContent, ColumnContent("Merged", "bool", False)])
|
105 |
auto_eval_column_dict.append(["license", ColumnContent, ColumnContent("Hub License", "str", False)])
|
106 |
auto_eval_column_dict.append(["params", ColumnContent, ColumnContent("#Params (B)", "number", False)])
|
@@ -128,7 +128,7 @@ class EvalQueueColumn: # Queue column
|
|
128 |
baseline_row = {
|
129 |
AutoEvalColumn.model.name: "<p>Baseline</p>",
|
130 |
AutoEvalColumn.revision.name: "N/A",
|
131 |
-
AutoEvalColumn.precision.name:
|
132 |
AutoEvalColumn.merged.name: False,
|
133 |
#AutoEvalColumn.average.name: 31.0,
|
134 |
#AutoEvalColumn.arc.name: 25.0,
|
@@ -140,7 +140,7 @@ baseline_row = {
|
|
140 |
AutoEvalColumn.dummy.name: "baseline",
|
141 |
AutoEvalColumn.model_type.name: "",
|
142 |
AutoEvalColumn.flagged.name: False,
|
143 |
-
AutoEvalColumn.model_type_symbol.name:
|
144 |
AutoEvalColumn.architecture.name: None,
|
145 |
AutoEvalColumn.weight_type.name: None,
|
146 |
AutoEvalColumn.params.name: 0,
|
@@ -152,7 +152,7 @@ baseline_row = {
|
|
152 |
|
153 |
baseline_list = []
|
154 |
for task in Tasks:
|
155 |
-
baseline_row[task.
|
156 |
if task.value.baseline is not None:
|
157 |
baseline_list.append(task.value.baseline)
|
158 |
baseline_row[AutoEvalColumn.average.name] = round(sum(baseline_list) / len(baseline_list), 2)
|
@@ -168,7 +168,7 @@ baseline_row[AutoEvalColumn.average.name] = round(sum(baseline_list) / len(basel
|
|
168 |
human_baseline_row = {
|
169 |
AutoEvalColumn.model.name: "<p>Human performance</p>",
|
170 |
AutoEvalColumn.revision.name: "N/A",
|
171 |
-
AutoEvalColumn.precision.name:
|
172 |
#AutoEvalColumn.average.name: 92.75,
|
173 |
AutoEvalColumn.merged.name: False,
|
174 |
#AutoEvalColumn.arc.name: 80.0,
|
@@ -180,7 +180,7 @@ human_baseline_row = {
|
|
180 |
AutoEvalColumn.dummy.name: "human_baseline",
|
181 |
AutoEvalColumn.model_type.name: "",
|
182 |
AutoEvalColumn.flagged.name: False,
|
183 |
-
AutoEvalColumn.model_type_symbol.name:
|
184 |
AutoEvalColumn.architecture.name: None,
|
185 |
AutoEvalColumn.weight_type.name: None,
|
186 |
AutoEvalColumn.params.name: 0,
|
@@ -192,7 +192,7 @@ human_baseline_row = {
|
|
192 |
|
193 |
baseline_list = []
|
194 |
for task in Tasks:
|
195 |
-
human_baseline_row[task.
|
196 |
if task.value.human_baseline is not None:
|
197 |
baseline_list.append(task.value.human_baseline)
|
198 |
human_baseline_row[AutoEvalColumn.average.name] = round(sum(baseline_list) / len(baseline_list), 2)
|
|
|
100 |
auto_eval_column_dict.append(["model_type", ColumnContent, ColumnContent("Type", "str", False)])
|
101 |
auto_eval_column_dict.append(["architecture", ColumnContent, ColumnContent("Architecture", "str", False)])
|
102 |
auto_eval_column_dict.append(["weight_type", ColumnContent, ColumnContent("Weight type", "str", False, True)])
|
103 |
+
auto_eval_column_dict.append(["precision", ColumnContent, ColumnContent("Precision", "str", True)])
|
104 |
auto_eval_column_dict.append(["merged", ColumnContent, ColumnContent("Merged", "bool", False)])
|
105 |
auto_eval_column_dict.append(["license", ColumnContent, ColumnContent("Hub License", "str", False)])
|
106 |
auto_eval_column_dict.append(["params", ColumnContent, ColumnContent("#Params (B)", "number", False)])
|
|
|
128 |
baseline_row = {
|
129 |
AutoEvalColumn.model.name: "<p>Baseline</p>",
|
130 |
AutoEvalColumn.revision.name: "N/A",
|
131 |
+
AutoEvalColumn.precision.name: "?",
|
132 |
AutoEvalColumn.merged.name: False,
|
133 |
#AutoEvalColumn.average.name: 31.0,
|
134 |
#AutoEvalColumn.arc.name: 25.0,
|
|
|
140 |
AutoEvalColumn.dummy.name: "baseline",
|
141 |
AutoEvalColumn.model_type.name: "",
|
142 |
AutoEvalColumn.flagged.name: False,
|
143 |
+
AutoEvalColumn.model_type_symbol.name: "?",
|
144 |
AutoEvalColumn.architecture.name: None,
|
145 |
AutoEvalColumn.weight_type.name: None,
|
146 |
AutoEvalColumn.params.name: 0,
|
|
|
152 |
|
153 |
baseline_list = []
|
154 |
for task in Tasks:
|
155 |
+
baseline_row[task.value.col_name] = task.value.baseline
|
156 |
if task.value.baseline is not None:
|
157 |
baseline_list.append(task.value.baseline)
|
158 |
baseline_row[AutoEvalColumn.average.name] = round(sum(baseline_list) / len(baseline_list), 2)
|
|
|
168 |
human_baseline_row = {
|
169 |
AutoEvalColumn.model.name: "<p>Human performance</p>",
|
170 |
AutoEvalColumn.revision.name: "N/A",
|
171 |
+
AutoEvalColumn.precision.name: "?",
|
172 |
#AutoEvalColumn.average.name: 92.75,
|
173 |
AutoEvalColumn.merged.name: False,
|
174 |
#AutoEvalColumn.arc.name: 80.0,
|
|
|
180 |
AutoEvalColumn.dummy.name: "human_baseline",
|
181 |
AutoEvalColumn.model_type.name: "",
|
182 |
AutoEvalColumn.flagged.name: False,
|
183 |
+
AutoEvalColumn.model_type_symbol.name: "?",
|
184 |
AutoEvalColumn.architecture.name: None,
|
185 |
AutoEvalColumn.weight_type.name: None,
|
186 |
AutoEvalColumn.params.name: 0,
|
|
|
192 |
|
193 |
baseline_list = []
|
194 |
for task in Tasks:
|
195 |
+
human_baseline_row[task.value.col_name] = task.value.human_baseline
|
196 |
if task.value.human_baseline is not None:
|
197 |
baseline_list.append(task.value.human_baseline)
|
198 |
human_baseline_row[AutoEvalColumn.average.name] = round(sum(baseline_list) / len(baseline_list), 2)
|
src/leaderboard/read_evals.py
CHANGED
@@ -206,7 +206,6 @@ def get_raw_eval_results(results_path: str, requests_path: str, dynamic_path: st
|
|
206 |
eval_results = {}
|
207 |
for model_result_filepath in model_result_filepaths:
|
208 |
# Creation of result
|
209 |
-
print(model_result_filepath)
|
210 |
eval_result = EvalResult.init_from_json_file(model_result_filepath)
|
211 |
eval_result.update_with_request_file(requests_path)
|
212 |
if eval_result.full_model in dynamic_data:
|
|
|
206 |
eval_results = {}
|
207 |
for model_result_filepath in model_result_filepaths:
|
208 |
# Creation of result
|
|
|
209 |
eval_result = EvalResult.init_from_json_file(model_result_filepath)
|
210 |
eval_result.update_with_request_file(requests_path)
|
211 |
if eval_result.full_model in dynamic_data:
|