Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
pminervini
commited on
Commit
•
e5e2b84
1
Parent(s):
d374577
update
Browse files- src/display/utils.py +3 -1
- src/leaderboard/filter_models.py +1 -0
- src/utils.py +1 -0
src/display/utils.py
CHANGED
@@ -44,7 +44,9 @@ class Tasks(Enum):
|
|
44 |
halueval_summ = Task("halueval_summarization", "acc", "HaluSumm/Acc")
|
45 |
halueval_dial = Task("halueval_dialogue", "acc", "HaluDial/Acc")
|
46 |
|
47 |
-
|
|
|
|
|
48 |
|
49 |
# These classes are for user facing column names,
|
50 |
# to avoid having to change them all around the code
|
|
|
44 |
halueval_summ = Task("halueval_summarization", "acc", "HaluSumm/Acc")
|
45 |
halueval_dial = Task("halueval_dialogue", "acc", "HaluDial/Acc")
|
46 |
|
47 |
+
# XXX include me back at some point
|
48 |
+
# selfcheck = Task("selfcheckgpt", "max-selfcheckgpt", "SelfCheckGPT")
|
49 |
+
|
50 |
|
51 |
# These classes are for user facing column names,
|
52 |
# to avoid having to change them all around the code
|
src/leaderboard/filter_models.py
CHANGED
@@ -20,6 +20,7 @@ DO_NOT_SUBMIT_MODELS = [
|
|
20 |
"Voicelab/trurl-2-13b", # trained on MMLU
|
21 |
]
|
22 |
|
|
|
23 |
def flag_models(leaderboard_data: list[dict]):
|
24 |
for model_data in leaderboard_data:
|
25 |
if model_data["model_name_for_query"] in FLAGGED_MODELS:
|
|
|
20 |
"Voicelab/trurl-2-13b", # trained on MMLU
|
21 |
]
|
22 |
|
23 |
+
|
24 |
def flag_models(leaderboard_data: list[dict]):
|
25 |
for model_data in leaderboard_data:
|
26 |
if model_data["model_name_for_query"] in FLAGGED_MODELS:
|
src/utils.py
CHANGED
@@ -19,6 +19,7 @@ def get_dataset_url(row):
|
|
19 |
benchmark = f'<a target="_blank" href="{dataset_url}" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">{dataset_name}</a>'
|
20 |
return benchmark
|
21 |
|
|
|
22 |
def get_dataset_summary_table(file_path):
|
23 |
df = pd.read_csv(file_path)
|
24 |
|
|
|
19 |
benchmark = f'<a target="_blank" href="{dataset_url}" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">{dataset_name}</a>'
|
20 |
return benchmark
|
21 |
|
22 |
+
|
23 |
def get_dataset_summary_table(file_path):
|
24 |
df = pd.read_csv(file_path)
|
25 |
|