Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
eduagarcia
commited on
Commit
•
f976f1c
1
Parent(s):
5639a81
Add NPM field
Browse files- src/display/utils.py +14 -5
- src/leaderboard/read_evals.py +2 -2
src/display/utils.py
CHANGED
@@ -51,7 +51,7 @@ for task in Tasks:
|
|
51 |
auto_eval_column_dict.append(["model_type", ColumnContent, ColumnContent("Type", "str", False)])
|
52 |
auto_eval_column_dict.append(["architecture", ColumnContent, ColumnContent("Architecture", "str", False)])
|
53 |
auto_eval_column_dict.append(["weight_type", ColumnContent, ColumnContent("Weight type", "str", False, True)])
|
54 |
-
auto_eval_column_dict.append(["precision", ColumnContent, ColumnContent("Precision", "str",
|
55 |
auto_eval_column_dict.append(["merged", ColumnContent, ColumnContent("Merged", "bool", False)])
|
56 |
auto_eval_column_dict.append(["license", ColumnContent, ColumnContent("Hub License", "str", False)])
|
57 |
auto_eval_column_dict.append(["params", ColumnContent, ColumnContent("#Params (B)", "number", False)])
|
@@ -107,11 +107,15 @@ baseline_row = {
|
|
107 |
}
|
108 |
|
109 |
baseline_list = []
|
|
|
110 |
for task in Tasks:
|
111 |
baseline_row[task.value.col_name] = task.value.baseline
|
112 |
-
|
113 |
-
|
|
|
|
|
114 |
baseline_row[AutoEvalColumn.average.name] = round(sum(baseline_list) / len(baseline_list), 2)
|
|
|
115 |
|
116 |
#if GET_ORIGINAL_HF_LEADERBOARD_EVAL_RESULTS:
|
117 |
baseline_row["🤗 Leaderboard Average"] = None
|
@@ -151,11 +155,16 @@ human_baseline_row = {
|
|
151 |
}
|
152 |
|
153 |
baseline_list = []
|
|
|
154 |
for task in Tasks:
|
155 |
human_baseline_row[task.value.col_name] = task.value.human_baseline
|
156 |
-
|
157 |
-
|
|
|
|
|
|
|
158 |
human_baseline_row[AutoEvalColumn.average.name] = round(sum(baseline_list) / len(baseline_list), 2)
|
|
|
159 |
#if GET_ORIGINAL_HF_LEADERBOARD_EVAL_RESULTS:
|
160 |
human_baseline_row["🤗 Leaderboard Average"] = None
|
161 |
|
|
|
51 |
auto_eval_column_dict.append(["model_type", ColumnContent, ColumnContent("Type", "str", False)])
|
52 |
auto_eval_column_dict.append(["architecture", ColumnContent, ColumnContent("Architecture", "str", False)])
|
53 |
auto_eval_column_dict.append(["weight_type", ColumnContent, ColumnContent("Weight type", "str", False, True)])
|
54 |
+
auto_eval_column_dict.append(["precision", ColumnContent, ColumnContent("Precision", "str", False)])
|
55 |
auto_eval_column_dict.append(["merged", ColumnContent, ColumnContent("Merged", "bool", False)])
|
56 |
auto_eval_column_dict.append(["license", ColumnContent, ColumnContent("Hub License", "str", False)])
|
57 |
auto_eval_column_dict.append(["params", ColumnContent, ColumnContent("#Params (B)", "number", False)])
|
|
|
107 |
}
|
108 |
|
109 |
baseline_list = []
|
110 |
+
npm = []
|
111 |
for task in Tasks:
|
112 |
baseline_row[task.value.col_name] = task.value.baseline
|
113 |
+
res = task.value.baseline
|
114 |
+
if res is not None and (isinstance(res, float) or isinstance(res, int)):
|
115 |
+
baseline_list.append(res)
|
116 |
+
npm.append((res - task.value.baseline) / (100 - task.value.baseline))
|
117 |
baseline_row[AutoEvalColumn.average.name] = round(sum(baseline_list) / len(baseline_list), 2)
|
118 |
+
baseline_row[AutoEvalColumn.npm.name] = round(sum(npm) / len(npm), 2)
|
119 |
|
120 |
#if GET_ORIGINAL_HF_LEADERBOARD_EVAL_RESULTS:
|
121 |
baseline_row["🤗 Leaderboard Average"] = None
|
|
|
155 |
}
|
156 |
|
157 |
baseline_list = []
|
158 |
+
npm = []
|
159 |
for task in Tasks:
|
160 |
human_baseline_row[task.value.col_name] = task.value.human_baseline
|
161 |
+
res = task.value.human_baseline
|
162 |
+
if res is None or not (isinstance(res, float) or isinstance(res, int)):
|
163 |
+
res = 95.0
|
164 |
+
baseline_list.append(res)
|
165 |
+
npm.append((res - task.value.baseline) / (100 - task.value.baseline))
|
166 |
human_baseline_row[AutoEvalColumn.average.name] = round(sum(baseline_list) / len(baseline_list), 2)
|
167 |
+
human_baseline_row[AutoEvalColumn.npm.name] = round(sum(npm) / len(npm), 2)
|
168 |
#if GET_ORIGINAL_HF_LEADERBOARD_EVAL_RESULTS:
|
169 |
human_baseline_row["🤗 Leaderboard Average"] = None
|
170 |
|
src/leaderboard/read_evals.py
CHANGED
@@ -166,8 +166,8 @@ class EvalResult:
|
|
166 |
continue
|
167 |
average.append(res)
|
168 |
npm.append((res-task.value.baseline)*100.0 / (100.0-task.value.baseline))
|
169 |
-
average = sum(average)/len(average)
|
170 |
-
npm = sum(npm)/len(npm)
|
171 |
|
172 |
data_dict = {
|
173 |
"eval_name": self.eval_name, # not a column, just a save name,
|
|
|
166 |
continue
|
167 |
average.append(res)
|
168 |
npm.append((res-task.value.baseline)*100.0 / (100.0-task.value.baseline))
|
169 |
+
average = round(sum(average)/len(average), 2)
|
170 |
+
npm = round(sum(npm)/len(npm), 2)
|
171 |
|
172 |
data_dict = {
|
173 |
"eval_name": self.eval_name, # not a column, just a save name,
|