Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
eduagarcia
commited on
Merge branch 'main' of https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard into merge_original
Browse files- app.py +23 -22
- requirements.txt +1 -1
- src/leaderboard/filter_models.py +28 -0
- src/submission/check_validity.py +2 -1
- src/submission/submit.py +3 -1
- src/tools/plots.py +3 -1
app.py
CHANGED
@@ -60,28 +60,29 @@ from src.tools.plots import (
|
|
60 |
def restart_space():
|
61 |
API.restart_space(repo_id=REPO_ID, token=H4_TOKEN)
|
62 |
|
63 |
-
def init_space():
|
64 |
-
|
65 |
-
|
66 |
-
|
67 |
-
|
68 |
-
|
69 |
-
|
70 |
-
|
71 |
-
|
72 |
-
|
73 |
-
|
74 |
-
|
75 |
-
|
76 |
-
|
77 |
-
|
78 |
-
|
79 |
-
|
80 |
-
|
81 |
-
|
82 |
-
|
83 |
-
|
84 |
-
|
|
|
85 |
|
86 |
# Init in case of empty
|
87 |
if not os.path.exists(DYNAMIC_INFO_FILE_PATH):
|
|
|
60 |
def restart_space():
|
61 |
API.restart_space(repo_id=REPO_ID, token=H4_TOKEN)
|
62 |
|
63 |
+
def init_space(full_init: bool = True):
|
64 |
+
if full_init:
|
65 |
+
try:
|
66 |
+
print(EVAL_REQUESTS_PATH)
|
67 |
+
snapshot_download(
|
68 |
+
repo_id=QUEUE_REPO, local_dir=EVAL_REQUESTS_PATH, repo_type="dataset", tqdm_class=None, etag_timeout=30
|
69 |
+
)
|
70 |
+
except Exception:
|
71 |
+
restart_space()
|
72 |
+
try:
|
73 |
+
print(DYNAMIC_INFO_PATH)
|
74 |
+
snapshot_download(
|
75 |
+
repo_id=DYNAMIC_INFO_REPO, local_dir=DYNAMIC_INFO_PATH, repo_type="dataset", tqdm_class=None, etag_timeout=30
|
76 |
+
)
|
77 |
+
except Exception:
|
78 |
+
restart_space()
|
79 |
+
try:
|
80 |
+
print(EVAL_RESULTS_PATH)
|
81 |
+
snapshot_download(
|
82 |
+
repo_id=RESULTS_REPO, local_dir=EVAL_RESULTS_PATH, repo_type="dataset", tqdm_class=None, etag_timeout=30
|
83 |
+
)
|
84 |
+
except Exception:
|
85 |
+
restart_space()
|
86 |
|
87 |
# Init in case of empty
|
88 |
if not os.path.exists(DYNAMIC_INFO_FILE_PATH):
|
requirements.txt
CHANGED
@@ -13,7 +13,7 @@ python-dateutil==2.8.2
|
|
13 |
requests==2.28.2
|
14 |
sentencepiece
|
15 |
tqdm==4.65.0
|
16 |
-
transformers==4.
|
17 |
tokenizers>=0.15.0
|
18 |
tiktoken>=0.5.2
|
19 |
einops==0.7.0
|
|
|
13 |
requests==2.28.2
|
14 |
sentencepiece
|
15 |
tqdm==4.65.0
|
16 |
+
transformers==4.38.0
|
17 |
tokenizers>=0.15.0
|
18 |
tiktoken>=0.5.2
|
19 |
einops==0.7.0
|
src/leaderboard/filter_models.py
CHANGED
@@ -89,6 +89,34 @@ FLAGGED_MODELS = {
|
|
89 |
"DopeorNope/SOLARC-MOE-10.7Bx6 ": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/511",
|
90 |
"DopeorNope/SOLARC-MOE-10.7Bx4": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/511",
|
91 |
"gagan3012/MetaModelv2 ": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/511",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
92 |
}
|
93 |
|
94 |
# Models which have been requested by orgs to not be submitted on the leaderboard
|
|
|
89 |
"DopeorNope/SOLARC-MOE-10.7Bx6 ": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/511",
|
90 |
"DopeorNope/SOLARC-MOE-10.7Bx4": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/511",
|
91 |
"gagan3012/MetaModelv2 ": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/511",
|
92 |
+
"udkai/Turdus": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/540",
|
93 |
+
"kodonho/Solar-OrcaDPO-Solar-Instruct-SLERP": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/540",
|
94 |
+
"kodonho/SolarM-SakuraSolar-SLERP": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/540",
|
95 |
+
"Yhyu13/LMCocktail-10.7B-v1": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/540",
|
96 |
+
"mlabonne/NeuralMarcoro14-7B": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/540",
|
97 |
+
"Neuronovo/neuronovo-7B-v0.2": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/540",
|
98 |
+
"ryandt/MusingCaterpillar": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/540",
|
99 |
+
"Neuronovo/neuronovo-7B-v0.3": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/540",
|
100 |
+
"SanjiWatsuki/Lelantos-DPO-7B": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/540",
|
101 |
+
"bardsai/jaskier-7b-dpo": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/540",
|
102 |
+
"cookinai/OpenCM-14": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/540",
|
103 |
+
"bardsai/jaskier-7b-dpo-v2": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/540",
|
104 |
+
"jan-hq/supermario-v2": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/540",
|
105 |
+
# MoErges
|
106 |
+
"cloudyu/Yi-34Bx2-MoE-60B":"https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/540",
|
107 |
+
"cloudyu/Mixtral_34Bx2_MoE_60B":"https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/540",
|
108 |
+
"gagan3012/MetaModel_moe":"https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/540",
|
109 |
+
"macadeliccc/SOLAR-math-2x10.7b-v0.2":"https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/540",
|
110 |
+
"cloudyu/Mixtral_7Bx2_MoE":"https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/540",
|
111 |
+
"macadeliccc/SOLAR-math-2x10.7b":"https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/540",
|
112 |
+
"macadeliccc/Orca-SOLAR-4x10.7b":"https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/540",
|
113 |
+
"macadeliccc/piccolo-8x7b":"https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/540",
|
114 |
+
"cloudyu/Mixtral_7Bx4_MOE_24B":"https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/540",
|
115 |
+
"macadeliccc/laser-dolphin-mixtral-2x7b-dpo":"https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/540",
|
116 |
+
"macadeliccc/polyglot-math-4x7b":"https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/540",
|
117 |
+
# Other - contamination mostly
|
118 |
+
"DopeorNope/COKAL-v1-70B": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/566",
|
119 |
+
"CultriX/MistralTrix-v1": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/556",
|
120 |
}
|
121 |
|
122 |
# Models which have been requested by orgs to not be submitted on the leaderboard
|
src/submission/check_validity.py
CHANGED
@@ -66,9 +66,10 @@ def is_model_on_hub(model_name: str, revision: str, token: str = None, trust_rem
|
|
66 |
|
67 |
except Exception as e:
|
68 |
traceback.print_exc()
|
|
|
|
|
69 |
return False, "was not found on hub!", None
|
70 |
|
71 |
-
|
72 |
def get_model_size(model_info: ModelInfo, precision: str):
|
73 |
size_pattern = re.compile(r"(\d+\.)?\d+(b|m)")
|
74 |
safetensors = None
|
|
|
66 |
|
67 |
except Exception as e:
|
68 |
traceback.print_exc()
|
69 |
+
if "You are trying to access a gated repo." in str(e):
|
70 |
+
return True, "uses a gated model.", None
|
71 |
return False, "was not found on hub!", None
|
72 |
|
|
|
73 |
def get_model_size(model_info: ModelInfo, precision: str):
|
74 |
size_pattern = re.compile(r"(\d+\.)?\d+(b|m)")
|
75 |
safetensors = None
|
src/submission/submit.py
CHANGED
@@ -73,7 +73,7 @@ def add_new_eval(
|
|
73 |
created_at = ""
|
74 |
if not weight_type == "Adapter":
|
75 |
model_on_hub, error, model_config = is_model_on_hub(model_name=model, revision=revision, test_tokenizer=True)
|
76 |
-
if not model_on_hub:
|
77 |
return styled_error(f'Model "{model}" {error}')
|
78 |
if model_config is not None:
|
79 |
architectures = getattr(model_config, "architectures", None)
|
@@ -100,6 +100,8 @@ def add_new_eval(
|
|
100 |
#return styled_error("Please select a license for your model")
|
101 |
|
102 |
modelcard_OK, error_msg, model_card = check_model_card(model)
|
|
|
|
|
103 |
|
104 |
tags = get_model_tags(model_card, model)
|
105 |
|
|
|
73 |
created_at = ""
|
74 |
if not weight_type == "Adapter":
|
75 |
model_on_hub, error, model_config = is_model_on_hub(model_name=model, revision=revision, test_tokenizer=True)
|
76 |
+
if not model_on_hub or model_config is None:
|
77 |
return styled_error(f'Model "{model}" {error}')
|
78 |
if model_config is not None:
|
79 |
architectures = getattr(model_config, "architectures", None)
|
|
|
100 |
#return styled_error("Please select a license for your model")
|
101 |
|
102 |
modelcard_OK, error_msg, model_card = check_model_card(model)
|
103 |
+
#if not modelcard_OK:
|
104 |
+
# return styled_error(error_msg)
|
105 |
|
106 |
tags = get_model_tags(model_card, model)
|
107 |
|
src/tools/plots.py
CHANGED
@@ -34,7 +34,9 @@ def create_scores_df(raw_data: list[EvalResult]) -> pd.DataFrame:
|
|
34 |
column = task.col_name
|
35 |
for _, row in results_df.iterrows():
|
36 |
current_model = row["full_model"]
|
37 |
-
|
|
|
|
|
38 |
continue
|
39 |
|
40 |
current_date = row["date"]
|
|
|
34 |
column = task.col_name
|
35 |
for _, row in results_df.iterrows():
|
36 |
current_model = row["full_model"]
|
37 |
+
# We ignore models that are flagged/no longer on the hub/not finished
|
38 |
+
to_ignore = not row["still_on_hub"] or row["flagged"] or current_model in FLAGGED_MODELS or row["status"] != "FINISHED"
|
39 |
+
if to_ignore:
|
40 |
continue
|
41 |
|
42 |
current_date = row["date"]
|