sync from github
Browse files
open-moe-llm-leaderboard-gh/src/backend/envs.py
CHANGED
@@ -58,6 +58,7 @@ class Tasks(Enum):
|
|
58 |
# task20 = Task("race", "acc", "RACE", 0)
|
59 |
task21 = Task("mmlu", "acc", "MMLU", 5)
|
60 |
task22 = Task("gsm8k_custom", "em", "GSM8K", 5)
|
|
|
61 |
|
62 |
|
63 |
EVAL_REQUESTS_PATH_BACKEND = os.path.join(CACHE_PATH, "eval-queue-bk")
|
|
|
58 |
# task20 = Task("race", "acc", "RACE", 0)
|
59 |
task21 = Task("mmlu", "acc", "MMLU", 5)
|
60 |
task22 = Task("gsm8k_custom", "em", "GSM8K", 5)
|
61 |
+
task23 = Task("gsm8k_cot", "em", "GSM8K", 8)
|
62 |
|
63 |
|
64 |
EVAL_REQUESTS_PATH_BACKEND = os.path.join(CACHE_PATH, "eval-queue-bk")
|
open-moe-llm-leaderboard-gh/src/display/utils.py
CHANGED
@@ -122,8 +122,8 @@ for task in Tasks:
|
|
122 |
continue
|
123 |
# auto_eval_column_dict.append([f"{task.name}_prefilling_time", ColumnContent, ColumnContent(f"{task.value.col_name} {PREs}", "number", False, hidden=True)])
|
124 |
auto_eval_column_dict.append([f"{task.name}_decoding_throughput", ColumnContent, ColumnContent(f"{task.value.col_name} {TS}", "number", True, hidden=True)])
|
125 |
-
auto_eval_column_dict.append([f"{task.name}
|
126 |
-
auto_eval_column_dict.append([f"{task.name}
|
127 |
|
128 |
|
129 |
# Model information
|
|
|
122 |
continue
|
123 |
# auto_eval_column_dict.append([f"{task.name}_prefilling_time", ColumnContent, ColumnContent(f"{task.value.col_name} {PREs}", "number", False, hidden=True)])
|
124 |
auto_eval_column_dict.append([f"{task.name}_decoding_throughput", ColumnContent, ColumnContent(f"{task.value.col_name} {TS}", "number", True, hidden=True)])
|
125 |
+
auto_eval_column_dict.append([f"{task.name}_mbu", ColumnContent, ColumnContent(f"{task.value.col_name} {MBU}", "number", True, hidden=True)])
|
126 |
+
auto_eval_column_dict.append([f"{task.name}_mfu", ColumnContent, ColumnContent(f"{task.value.col_name} {MFU}", "number", True, hidden=True)])
|
127 |
|
128 |
|
129 |
# Model information
|