BearSean's picture
Update src/leaderboard/filter_models.py (#45)
be34369 verified
raw
history blame
No virus
1.9 kB
from src.display.formatting import model_hyperlink
from src.display.utils import AutoEvalColumn
# Models which have been flagged by users as being problematic for a reason or another
# (Model name to forum discussion link)
FLAGGED_MODELS = {
"merged": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/510",
"TeamUNIVA/Komodo_7B_v0.1.0": "https://huggingface.co/spaces/upstage/open-ko-llm-leaderboard/discussions/44",
}
# Models which have been requested by orgs to not be submitted on the leaderboard
DO_NOT_SUBMIT_MODELS = [
]
def flag_models(leaderboard_data: list[dict]):
for model_data in leaderboard_data:
# Merges are flagged automatically
if model_data[AutoEvalColumn.flagged.name] == True:
flag_key = "merged"
else:
flag_key = model_data["model_name_for_query"]
if flag_key in FLAGGED_MODELS:
issue_num = FLAGGED_MODELS[flag_key].split("/")[-1]
issue_link = model_hyperlink(
FLAGGED_MODELS[flag_key],
f"See discussion #{issue_num}",
)
model_data[
AutoEvalColumn.model.name
] = f"{model_data[AutoEvalColumn.model.name]} has been flagged! {issue_link}"
model_data[AutoEvalColumn.flagged.name] = True
else:
model_data[AutoEvalColumn.flagged.name] = False
def remove_forbidden_models(leaderboard_data: list[dict]):
indices_to_remove = []
for ix, model in enumerate(leaderboard_data):
if model["model_name_for_query"] in DO_NOT_SUBMIT_MODELS:
indices_to_remove.append(ix)
for ix in reversed(indices_to_remove):
leaderboard_data.pop(ix)
return leaderboard_data
def filter_models(leaderboard_data: list[dict]):
leaderboard_data = remove_forbidden_models(leaderboard_data)
flag_models(leaderboard_data)