eduagarcia commited on
Commit
811ded7
2 Parent(s): f3a1876 a4c11b8

Merge branch 'main' of https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard into merge_original

Browse files
app.py CHANGED
@@ -60,28 +60,29 @@ from src.tools.plots import (
60
  def restart_space():
61
  API.restart_space(repo_id=REPO_ID, token=H4_TOKEN)
62
 
63
- def init_space():
64
- try:
65
- print(EVAL_REQUESTS_PATH)
66
- snapshot_download(
67
- repo_id=QUEUE_REPO, local_dir=EVAL_REQUESTS_PATH, repo_type="dataset", tqdm_class=None, etag_timeout=30
68
- )
69
- except Exception:
70
- restart_space()
71
- try:
72
- print(DYNAMIC_INFO_PATH)
73
- snapshot_download(
74
- repo_id=DYNAMIC_INFO_REPO, local_dir=DYNAMIC_INFO_PATH, repo_type="dataset", tqdm_class=None, etag_timeout=30
75
- )
76
- except Exception:
77
- restart_space()
78
- try:
79
- print(EVAL_RESULTS_PATH)
80
- snapshot_download(
81
- repo_id=RESULTS_REPO, local_dir=EVAL_RESULTS_PATH, repo_type="dataset", tqdm_class=None, etag_timeout=30
82
- )
83
- except Exception:
84
- restart_space()
 
85
 
86
  # Init in case of empty
87
  if not os.path.exists(DYNAMIC_INFO_FILE_PATH):
 
60
  def restart_space():
61
  API.restart_space(repo_id=REPO_ID, token=H4_TOKEN)
62
 
63
+ def init_space(full_init: bool = True):
64
+ if full_init:
65
+ try:
66
+ print(EVAL_REQUESTS_PATH)
67
+ snapshot_download(
68
+ repo_id=QUEUE_REPO, local_dir=EVAL_REQUESTS_PATH, repo_type="dataset", tqdm_class=None, etag_timeout=30
69
+ )
70
+ except Exception:
71
+ restart_space()
72
+ try:
73
+ print(DYNAMIC_INFO_PATH)
74
+ snapshot_download(
75
+ repo_id=DYNAMIC_INFO_REPO, local_dir=DYNAMIC_INFO_PATH, repo_type="dataset", tqdm_class=None, etag_timeout=30
76
+ )
77
+ except Exception:
78
+ restart_space()
79
+ try:
80
+ print(EVAL_RESULTS_PATH)
81
+ snapshot_download(
82
+ repo_id=RESULTS_REPO, local_dir=EVAL_RESULTS_PATH, repo_type="dataset", tqdm_class=None, etag_timeout=30
83
+ )
84
+ except Exception:
85
+ restart_space()
86
 
87
  # Init in case of empty
88
  if not os.path.exists(DYNAMIC_INFO_FILE_PATH):
requirements.txt CHANGED
@@ -13,7 +13,7 @@ python-dateutil==2.8.2
13
  requests==2.28.2
14
  sentencepiece
15
  tqdm==4.65.0
16
- transformers==4.37.1
17
  tokenizers>=0.15.0
18
  tiktoken>=0.5.2
19
  einops==0.7.0
 
13
  requests==2.28.2
14
  sentencepiece
15
  tqdm==4.65.0
16
+ transformers==4.38.0
17
  tokenizers>=0.15.0
18
  tiktoken>=0.5.2
19
  einops==0.7.0
src/leaderboard/filter_models.py CHANGED
@@ -89,6 +89,34 @@ FLAGGED_MODELS = {
89
  "DopeorNope/SOLARC-MOE-10.7Bx6 ": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/511",
90
  "DopeorNope/SOLARC-MOE-10.7Bx4": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/511",
91
  "gagan3012/MetaModelv2 ": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/511",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
92
  }
93
 
94
  # Models which have been requested by orgs to not be submitted on the leaderboard
 
89
  "DopeorNope/SOLARC-MOE-10.7Bx6 ": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/511",
90
  "DopeorNope/SOLARC-MOE-10.7Bx4": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/511",
91
  "gagan3012/MetaModelv2 ": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/511",
92
+ "udkai/Turdus": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/540",
93
+ "kodonho/Solar-OrcaDPO-Solar-Instruct-SLERP": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/540",
94
+ "kodonho/SolarM-SakuraSolar-SLERP": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/540",
95
+ "Yhyu13/LMCocktail-10.7B-v1": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/540",
96
+ "mlabonne/NeuralMarcoro14-7B": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/540",
97
+ "Neuronovo/neuronovo-7B-v0.2": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/540",
98
+ "ryandt/MusingCaterpillar": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/540",
99
+ "Neuronovo/neuronovo-7B-v0.3": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/540",
100
+ "SanjiWatsuki/Lelantos-DPO-7B": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/540",
101
+ "bardsai/jaskier-7b-dpo": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/540",
102
+ "cookinai/OpenCM-14": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/540",
103
+ "bardsai/jaskier-7b-dpo-v2": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/540",
104
+ "jan-hq/supermario-v2": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/540",
105
+ # MoErges
106
+ "cloudyu/Yi-34Bx2-MoE-60B":"https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/540",
107
+ "cloudyu/Mixtral_34Bx2_MoE_60B":"https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/540",
108
+ "gagan3012/MetaModel_moe":"https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/540",
109
+ "macadeliccc/SOLAR-math-2x10.7b-v0.2":"https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/540",
110
+ "cloudyu/Mixtral_7Bx2_MoE":"https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/540",
111
+ "macadeliccc/SOLAR-math-2x10.7b":"https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/540",
112
+ "macadeliccc/Orca-SOLAR-4x10.7b":"https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/540",
113
+ "macadeliccc/piccolo-8x7b":"https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/540",
114
+ "cloudyu/Mixtral_7Bx4_MOE_24B":"https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/540",
115
+ "macadeliccc/laser-dolphin-mixtral-2x7b-dpo":"https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/540",
116
+ "macadeliccc/polyglot-math-4x7b":"https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/540",
117
+ # Other - contamination mostly
118
+ "DopeorNope/COKAL-v1-70B": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/566",
119
+ "CultriX/MistralTrix-v1": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/556",
120
  }
121
 
122
  # Models which have been requested by orgs to not be submitted on the leaderboard
src/submission/check_validity.py CHANGED
@@ -66,9 +66,10 @@ def is_model_on_hub(model_name: str, revision: str, token: str = None, trust_rem
66
 
67
  except Exception as e:
68
  traceback.print_exc()
 
 
69
  return False, "was not found on hub!", None
70
 
71
-
72
  def get_model_size(model_info: ModelInfo, precision: str):
73
  size_pattern = re.compile(r"(\d+\.)?\d+(b|m)")
74
  safetensors = None
 
66
 
67
  except Exception as e:
68
  traceback.print_exc()
69
+ if "You are trying to access a gated repo." in str(e):
70
+ return True, "uses a gated model.", None
71
  return False, "was not found on hub!", None
72
 
 
73
  def get_model_size(model_info: ModelInfo, precision: str):
74
  size_pattern = re.compile(r"(\d+\.)?\d+(b|m)")
75
  safetensors = None
src/submission/submit.py CHANGED
@@ -73,7 +73,7 @@ def add_new_eval(
73
  created_at = ""
74
  if not weight_type == "Adapter":
75
  model_on_hub, error, model_config = is_model_on_hub(model_name=model, revision=revision, test_tokenizer=True)
76
- if not model_on_hub:
77
  return styled_error(f'Model "{model}" {error}')
78
  if model_config is not None:
79
  architectures = getattr(model_config, "architectures", None)
@@ -100,6 +100,8 @@ def add_new_eval(
100
  #return styled_error("Please select a license for your model")
101
 
102
  modelcard_OK, error_msg, model_card = check_model_card(model)
 
 
103
 
104
  tags = get_model_tags(model_card, model)
105
 
 
73
  created_at = ""
74
  if not weight_type == "Adapter":
75
  model_on_hub, error, model_config = is_model_on_hub(model_name=model, revision=revision, test_tokenizer=True)
76
+ if not model_on_hub or model_config is None:
77
  return styled_error(f'Model "{model}" {error}')
78
  if model_config is not None:
79
  architectures = getattr(model_config, "architectures", None)
 
100
  #return styled_error("Please select a license for your model")
101
 
102
  modelcard_OK, error_msg, model_card = check_model_card(model)
103
+ #if not modelcard_OK:
104
+ # return styled_error(error_msg)
105
 
106
  tags = get_model_tags(model_card, model)
107
 
src/tools/plots.py CHANGED
@@ -34,7 +34,9 @@ def create_scores_df(raw_data: list[EvalResult]) -> pd.DataFrame:
34
  column = task.col_name
35
  for _, row in results_df.iterrows():
36
  current_model = row["full_model"]
37
- if current_model in FLAGGED_MODELS:
 
 
38
  continue
39
 
40
  current_date = row["date"]
 
34
  column = task.col_name
35
  for _, row in results_df.iterrows():
36
  current_model = row["full_model"]
37
+ # We ignore models that are flagged/no longer on the hub/not finished
38
+ to_ignore = not row["still_on_hub"] or row["flagged"] or current_model in FLAGGED_MODELS or row["status"] != "FINISHED"
39
+ if to_ignore:
40
  continue
41
 
42
  current_date = row["date"]