open_pt_llm_leaderboard

Running on CPU Upgrade

App Files Files Community

eduagarcia commited on Feb 21, 2024

Commit

811ded7

2 Parent(s): f3a1876 a4c11b8

Merge branch 'main' of https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard into merge_original

Browse files

Files changed (6) hide show

app.py +23 -22
requirements.txt +1 -1
src/leaderboard/filter_models.py +28 -0
src/submission/check_validity.py +2 -1
src/submission/submit.py +3 -1
src/tools/plots.py +3 -1

app.py CHANGED Viewed

@@ -60,28 +60,29 @@ from src.tools.plots import (
 def restart_space():
     API.restart_space(repo_id=REPO_ID, token=H4_TOKEN)
-def init_space():
-    try:
-        print(EVAL_REQUESTS_PATH)
-        snapshot_download(
-            repo_id=QUEUE_REPO, local_dir=EVAL_REQUESTS_PATH, repo_type="dataset", tqdm_class=None, etag_timeout=30
-        )
-    except Exception:
-        restart_space()
-    try:
-        print(DYNAMIC_INFO_PATH)
-        snapshot_download(
-            repo_id=DYNAMIC_INFO_REPO, local_dir=DYNAMIC_INFO_PATH, repo_type="dataset", tqdm_class=None, etag_timeout=30
-        )
-    except Exception:
-        restart_space()
-    try:
-        print(EVAL_RESULTS_PATH)
-        snapshot_download(
-            repo_id=RESULTS_REPO, local_dir=EVAL_RESULTS_PATH, repo_type="dataset", tqdm_class=None, etag_timeout=30
-        )
-    except Exception:
-        restart_space()
     # Init in case of empty
     if not os.path.exists(DYNAMIC_INFO_FILE_PATH):

 def restart_space():
     API.restart_space(repo_id=REPO_ID, token=H4_TOKEN)
+def init_space(full_init: bool = True):
+    if full_init:
+        try:
+            print(EVAL_REQUESTS_PATH)
+            snapshot_download(
+                repo_id=QUEUE_REPO, local_dir=EVAL_REQUESTS_PATH, repo_type="dataset", tqdm_class=None, etag_timeout=30
+            )
+        except Exception:
+            restart_space()
+        try:
+            print(DYNAMIC_INFO_PATH)
+            snapshot_download(
+                repo_id=DYNAMIC_INFO_REPO, local_dir=DYNAMIC_INFO_PATH, repo_type="dataset", tqdm_class=None, etag_timeout=30
+            )
+        except Exception:
+            restart_space()
+        try:
+            print(EVAL_RESULTS_PATH)
+            snapshot_download(
+                repo_id=RESULTS_REPO, local_dir=EVAL_RESULTS_PATH, repo_type="dataset", tqdm_class=None, etag_timeout=30
+            )
+        except Exception:
+            restart_space()
     # Init in case of empty
     if not os.path.exists(DYNAMIC_INFO_FILE_PATH):

requirements.txt CHANGED Viewed

@@ -13,7 +13,7 @@ python-dateutil==2.8.2
 requests==2.28.2
 sentencepiece
 tqdm==4.65.0
-transformers==4.37.1
 tokenizers>=0.15.0
 tiktoken>=0.5.2
 einops==0.7.0

 requests==2.28.2
 sentencepiece
 tqdm==4.65.0
+transformers==4.38.0
 tokenizers>=0.15.0
 tiktoken>=0.5.2
 einops==0.7.0

src/leaderboard/filter_models.py CHANGED Viewed

@@ -89,6 +89,34 @@ FLAGGED_MODELS = {
     "DopeorNope/SOLARC-MOE-10.7Bx6 ": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/511",
     "DopeorNope/SOLARC-MOE-10.7Bx4": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/511",
     "gagan3012/MetaModelv2 ": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/511",
 }
 # Models which have been requested by orgs to not be submitted on the leaderboard

     "DopeorNope/SOLARC-MOE-10.7Bx6 ": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/511",
     "DopeorNope/SOLARC-MOE-10.7Bx4": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/511",
     "gagan3012/MetaModelv2 ": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/511",
+    "udkai/Turdus": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/540",
+    "kodonho/Solar-OrcaDPO-Solar-Instruct-SLERP": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/540",
+    "kodonho/SolarM-SakuraSolar-SLERP": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/540",
+    "Yhyu13/LMCocktail-10.7B-v1": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/540",
+    "mlabonne/NeuralMarcoro14-7B": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/540",
+    "Neuronovo/neuronovo-7B-v0.2": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/540",
+    "ryandt/MusingCaterpillar": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/540",
+    "Neuronovo/neuronovo-7B-v0.3": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/540",
+    "SanjiWatsuki/Lelantos-DPO-7B": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/540",
+    "bardsai/jaskier-7b-dpo": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/540",
+    "cookinai/OpenCM-14": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/540",
+    "bardsai/jaskier-7b-dpo-v2": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/540",
+    "jan-hq/supermario-v2": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/540",
+    # MoErges
+    "cloudyu/Yi-34Bx2-MoE-60B":"https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/540",
+    "cloudyu/Mixtral_34Bx2_MoE_60B":"https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/540",
+    "gagan3012/MetaModel_moe":"https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/540",
+    "macadeliccc/SOLAR-math-2x10.7b-v0.2":"https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/540",
+    "cloudyu/Mixtral_7Bx2_MoE":"https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/540",
+    "macadeliccc/SOLAR-math-2x10.7b":"https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/540",
+    "macadeliccc/Orca-SOLAR-4x10.7b":"https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/540",
+    "macadeliccc/piccolo-8x7b":"https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/540",
+    "cloudyu/Mixtral_7Bx4_MOE_24B":"https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/540",
+    "macadeliccc/laser-dolphin-mixtral-2x7b-dpo":"https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/540",
+    "macadeliccc/polyglot-math-4x7b":"https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/540",
+    # Other - contamination mostly
+    "DopeorNope/COKAL-v1-70B": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/566",
+    "CultriX/MistralTrix-v1": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/556",
 }
 # Models which have been requested by orgs to not be submitted on the leaderboard

src/submission/check_validity.py CHANGED Viewed

@@ -66,9 +66,10 @@ def is_model_on_hub(model_name: str, revision: str, token: str = None, trust_rem
     except Exception as e:
         traceback.print_exc()
         return False, "was not found on hub!", None
 def get_model_size(model_info: ModelInfo, precision: str):
     size_pattern = re.compile(r"(\d+\.)?\d+(b|m)")
     safetensors = None

     except Exception as e:
         traceback.print_exc()
+        if "You are trying to access a gated repo." in str(e):
+            return True, "uses a gated model.", None
         return False, "was not found on hub!", None
 def get_model_size(model_info: ModelInfo, precision: str):
     size_pattern = re.compile(r"(\d+\.)?\d+(b|m)")
     safetensors = None

src/submission/submit.py CHANGED Viewed

@@ -73,7 +73,7 @@ def add_new_eval(
     created_at = ""
     if not weight_type == "Adapter":
         model_on_hub, error, model_config = is_model_on_hub(model_name=model, revision=revision, test_tokenizer=True)
-        if not model_on_hub:
             return styled_error(f'Model "{model}" {error}')
         if model_config is not None:
             architectures = getattr(model_config, "architectures", None)
@@ -100,6 +100,8 @@ def add_new_eval(
         #return styled_error("Please select a license for your model")
     modelcard_OK, error_msg, model_card = check_model_card(model)
     tags = get_model_tags(model_card, model)

     created_at = ""
     if not weight_type == "Adapter":
         model_on_hub, error, model_config = is_model_on_hub(model_name=model, revision=revision, test_tokenizer=True)
+        if not model_on_hub or model_config is None:
             return styled_error(f'Model "{model}" {error}')
         if model_config is not None:
             architectures = getattr(model_config, "architectures", None)
         #return styled_error("Please select a license for your model")
     modelcard_OK, error_msg, model_card = check_model_card(model)
+    #if not modelcard_OK:
+    #    return styled_error(error_msg)
     tags = get_model_tags(model_card, model)

src/tools/plots.py CHANGED Viewed

@@ -34,7 +34,9 @@ def create_scores_df(raw_data: list[EvalResult]) -> pd.DataFrame:
         column = task.col_name
         for _, row in results_df.iterrows():
             current_model = row["full_model"]
-            if current_model in FLAGGED_MODELS:
                 continue
             current_date = row["date"]

         column = task.col_name
         for _, row in results_df.iterrows():
             current_model = row["full_model"]
+            # We ignore models that are flagged/no longer on the hub/not finished
+            to_ignore = not row["still_on_hub"] or row["flagged"] or current_model in FLAGGED_MODELS or row["status"] != "FINISHED"
+            if to_ignore:
                 continue
             current_date = row["date"]