hysts HF staff commited on
Commit
b5474e9
1 Parent(s): 4138f92

Apply pre-commit

Browse files
README.md CHANGED
@@ -39,7 +39,7 @@ If you encounter problem on the space, don't hesitate to restart it to remove th
39
 
40
  # Code logic for more complex edits
41
 
42
- You'll find
43
  - the main table' columns names and properties in `src/display/utils.py`
44
  - the logic to read all results and request files, then convert them in dataframe lines, in `src/leaderboard/read_evals.py`, and `src/populate.py`
45
  - teh logic to allow or filter submissions in `src/submission/submit.py` and `src/submission/check_validity.py`
 
39
 
40
  # Code logic for more complex edits
41
 
42
+ You'll find
43
  - the main table' columns names and properties in `src/display/utils.py`
44
  - the logic to read all results and request files, then convert them in dataframe lines, in `src/leaderboard/read_evals.py`, and `src/populate.py`
45
  - teh logic to allow or filter submissions in `src/submission/submit.py` and `src/submission/check_validity.py`
src/about.py CHANGED
@@ -41,8 +41,12 @@ class Tasks(Enum):
41
  jsem_exact_match = Task("scores", "jsem_exact_match", "JSEM")
42
  jsick_exact_match = Task("scores", "jsick_exact_match", "JSICK")
43
  jsquad_char_f1 = Task("scores", "jsquad_char_f1", "JSquad")
44
- jsts_pearson = Task("scores", "jsts_pearson", "JSTS (Pearson) - 意味的類似度") # Semantic Textual Similarity - 意味的類似度
45
- jsts_spearman = Task("scores", "jsts_spearman", "JSTS (Spearman) - 意味的類似度") # Semantic Textual Similarity - 意味的類似度
 
 
 
 
46
  kuci_exact_match = Task("scores", "kuci_exact_match", "KUCI")
47
  mawps_exact_match = Task("scores", "mawps_exact_match", "MAWPS")
48
  mmlu_en_exact_match = Task("scores", "mmlu_en_exact_match", "MMLU")
@@ -52,10 +56,14 @@ class Tasks(Enum):
52
  wiki_ner_set_f1 = Task("scores", "wiki_ner_set_f1", "Wiki NER")
53
  wiki_pas_set_f1 = Task("scores", "wiki_pas_set_f1", "Wiki PAS")
54
  wiki_reading_char_f1 = Task("scores", "wiki_reading_char_f1", "Wiki Reading")
55
- wikicorpus_e_to_j_bert_score_ja_f1 = Task("scores", "wikicorpus-e-to-j_bert_score_ja_f1", "WikiCorpus E to J BERT Score")
 
 
56
  wikicorpus_e_to_j_bleu_ja = Task("scores", "wikicorpus-e-to-j_bleu_ja", "WikiCorpus E to J BLEU")
57
  wikicorpus_e_to_j_comet_wmt22 = Task("scores", "wikicorpus-e-to-j_comet_wmt22", "WikiCorpus E to J COMET WMT22")
58
- wikicorpus_j_to_e_bert_score_en_f1 = Task("scores", "wikicorpus-j-to-e_bert_score_en_f1", "WikiCorpus J to E BERT Score")
 
 
59
  wikicorpus_j_to_e_bleu_en = Task("scores", "wikicorpus-j-to-e_bleu_en", "WikiCorpus J to E BLEU")
60
  wikicorpus_j_to_e_comet_wmt22 = Task("scores", "wikicorpus-j-to-e_comet_wmt22", "WikiCorpus J to E COMET WMT22")
61
  xlsum_ja_bert_score_ja_f1 = Task("scores", "xlsum_ja_bert_score_ja_f1", "XL-Sum JA BERT Score")
 
41
  jsem_exact_match = Task("scores", "jsem_exact_match", "JSEM")
42
  jsick_exact_match = Task("scores", "jsick_exact_match", "JSICK")
43
  jsquad_char_f1 = Task("scores", "jsquad_char_f1", "JSquad")
44
+ jsts_pearson = Task(
45
+ "scores", "jsts_pearson", "JSTS (Pearson) - 意味的類似度"
46
+ ) # Semantic Textual Similarity - 意味的類似度
47
+ jsts_spearman = Task(
48
+ "scores", "jsts_spearman", "JSTS (Spearman) - 意味的類似度"
49
+ ) # Semantic Textual Similarity - 意味的類似度
50
  kuci_exact_match = Task("scores", "kuci_exact_match", "KUCI")
51
  mawps_exact_match = Task("scores", "mawps_exact_match", "MAWPS")
52
  mmlu_en_exact_match = Task("scores", "mmlu_en_exact_match", "MMLU")
 
56
  wiki_ner_set_f1 = Task("scores", "wiki_ner_set_f1", "Wiki NER")
57
  wiki_pas_set_f1 = Task("scores", "wiki_pas_set_f1", "Wiki PAS")
58
  wiki_reading_char_f1 = Task("scores", "wiki_reading_char_f1", "Wiki Reading")
59
+ wikicorpus_e_to_j_bert_score_ja_f1 = Task(
60
+ "scores", "wikicorpus-e-to-j_bert_score_ja_f1", "WikiCorpus E to J BERT Score"
61
+ )
62
  wikicorpus_e_to_j_bleu_ja = Task("scores", "wikicorpus-e-to-j_bleu_ja", "WikiCorpus E to J BLEU")
63
  wikicorpus_e_to_j_comet_wmt22 = Task("scores", "wikicorpus-e-to-j_comet_wmt22", "WikiCorpus E to J COMET WMT22")
64
+ wikicorpus_j_to_e_bert_score_en_f1 = Task(
65
+ "scores", "wikicorpus-j-to-e_bert_score_en_f1", "WikiCorpus J to E BERT Score"
66
+ )
67
  wikicorpus_j_to_e_bleu_en = Task("scores", "wikicorpus-j-to-e_bleu_en", "WikiCorpus J to E BLEU")
68
  wikicorpus_j_to_e_comet_wmt22 = Task("scores", "wikicorpus-j-to-e_comet_wmt22", "WikiCorpus J to E COMET WMT22")
69
  xlsum_ja_bert_score_ja_f1 = Task("scores", "xlsum_ja_bert_score_ja_f1", "XL-Sum JA BERT Score")
src/display/utils.py CHANGED
@@ -5,6 +5,7 @@ import pandas as pd
5
 
6
  from src.about import Tasks
7
 
 
8
  def fields(raw_class):
9
  return [v for k, v in raw_class.__dict__.items() if k[:2] != "__" and k[-2:] != "__"]
10
 
@@ -21,12 +22,13 @@ class ColumnContent:
21
  never_hidden: bool = False
22
  dummy: bool = False
23
 
 
24
  ## Leaderboard columns
25
  auto_eval_column_dict = []
26
  # Init
27
  auto_eval_column_dict.append(["model_type_symbol", ColumnContent, ColumnContent("T", "str", True, never_hidden=True)])
28
  auto_eval_column_dict.append(["model", ColumnContent, ColumnContent("Model", "markdown", True, never_hidden=True)])
29
- #Scores
30
  # auto_eval_column_dict.append(["average", ColumnContent, ColumnContent("Average ⬆️", "number", True)])
31
  for task in Tasks:
32
  auto_eval_column_dict.append([task.name, ColumnContent, ColumnContent(task.value.col_name, "number", True)])
@@ -47,6 +49,7 @@ auto_eval_column_dict.append(["dummy", ColumnContent, ColumnContent("model_name_
47
  # We use make dataclass to dynamically fill the scores from Tasks
48
  AutoEvalColumn = make_dataclass("AutoEvalColumn", auto_eval_column_dict, frozen=True)
49
 
 
50
  ## For the queue columns in the submission tab
51
  @dataclass(frozen=True)
52
  class EvalQueueColumn: # Queue column
@@ -57,12 +60,13 @@ class EvalQueueColumn: # Queue column
57
  weight_type = ColumnContent("weight_type", "str", "Original")
58
  status = ColumnContent("status", "str", True)
59
 
 
60
  ## All the model information that we might need
61
  @dataclass
62
  class ModelDetails:
63
  name: str
64
  display_name: str = ""
65
- symbol: str = "" # emoji
66
 
67
 
68
  class ModelType(Enum):
@@ -87,11 +91,13 @@ class ModelType(Enum):
87
  return ModelType.IFT
88
  return ModelType.Unknown
89
 
 
90
  class WeightType(Enum):
91
  Adapter = ModelDetails("Adapter")
92
  Original = ModelDetails("Original")
93
  Delta = ModelDetails("Delta")
94
 
 
95
  class Precision(Enum):
96
  float16 = ModelDetails("float16")
97
  bfloat16 = ModelDetails("bfloat16")
@@ -104,23 +110,26 @@ class Precision(Enum):
104
  return Precision.bfloat16
105
  return Precision.Unknown
106
 
 
107
  class AddSpecialTokens(Enum):
108
  true = ModelDetails("True")
109
  false = ModelDetails("False")
110
  Unknown = ModelDetails("?")
111
 
 
112
  class NumFewShots(Enum):
113
  shots_0 = ModelDetails("0")
114
  shots_4 = ModelDetails("4")
115
  Unknown = ModelDetails("?")
116
 
117
  def from_str(shots):
118
- if shots=='0':
119
  return NumFewShots.shots_0
120
- if shots=='4':
121
  return NumFewShots.shots_4
122
  return NumFewShots.Unknown
123
 
 
124
  # Column selection
125
  COLS = [c.name for c in fields(AutoEvalColumn) if not c.hidden]
126
  TYPES = [c.type for c in fields(AutoEvalColumn)]
 
5
 
6
  from src.about import Tasks
7
 
8
+
9
  def fields(raw_class):
10
  return [v for k, v in raw_class.__dict__.items() if k[:2] != "__" and k[-2:] != "__"]
11
 
 
22
  never_hidden: bool = False
23
  dummy: bool = False
24
 
25
+
26
  ## Leaderboard columns
27
  auto_eval_column_dict = []
28
  # Init
29
  auto_eval_column_dict.append(["model_type_symbol", ColumnContent, ColumnContent("T", "str", True, never_hidden=True)])
30
  auto_eval_column_dict.append(["model", ColumnContent, ColumnContent("Model", "markdown", True, never_hidden=True)])
31
+ # Scores
32
  # auto_eval_column_dict.append(["average", ColumnContent, ColumnContent("Average ⬆️", "number", True)])
33
  for task in Tasks:
34
  auto_eval_column_dict.append([task.name, ColumnContent, ColumnContent(task.value.col_name, "number", True)])
 
49
  # We use make dataclass to dynamically fill the scores from Tasks
50
  AutoEvalColumn = make_dataclass("AutoEvalColumn", auto_eval_column_dict, frozen=True)
51
 
52
+
53
  ## For the queue columns in the submission tab
54
  @dataclass(frozen=True)
55
  class EvalQueueColumn: # Queue column
 
60
  weight_type = ColumnContent("weight_type", "str", "Original")
61
  status = ColumnContent("status", "str", True)
62
 
63
+
64
  ## All the model information that we might need
65
  @dataclass
66
  class ModelDetails:
67
  name: str
68
  display_name: str = ""
69
+ symbol: str = "" # emoji
70
 
71
 
72
  class ModelType(Enum):
 
91
  return ModelType.IFT
92
  return ModelType.Unknown
93
 
94
+
95
  class WeightType(Enum):
96
  Adapter = ModelDetails("Adapter")
97
  Original = ModelDetails("Original")
98
  Delta = ModelDetails("Delta")
99
 
100
+
101
  class Precision(Enum):
102
  float16 = ModelDetails("float16")
103
  bfloat16 = ModelDetails("bfloat16")
 
110
  return Precision.bfloat16
111
  return Precision.Unknown
112
 
113
+
114
  class AddSpecialTokens(Enum):
115
  true = ModelDetails("True")
116
  false = ModelDetails("False")
117
  Unknown = ModelDetails("?")
118
 
119
+
120
  class NumFewShots(Enum):
121
  shots_0 = ModelDetails("0")
122
  shots_4 = ModelDetails("4")
123
  Unknown = ModelDetails("?")
124
 
125
  def from_str(shots):
126
+ if shots == "0":
127
  return NumFewShots.shots_0
128
+ if shots == "4":
129
  return NumFewShots.shots_4
130
  return NumFewShots.Unknown
131
 
132
+
133
  # Column selection
134
  COLS = [c.name for c in fields(AutoEvalColumn) if not c.hidden]
135
  TYPES = [c.type for c in fields(AutoEvalColumn)]
src/envs.py CHANGED
@@ -4,9 +4,9 @@ from huggingface_hub import HfApi
4
 
5
  # Info to change for your repository
6
  # ----------------------------------
7
- TOKEN = os.environ.get("HF_TOKEN") # A read/write token for your org
8
 
9
- OWNER = "llm-jp" # Change to your org - don't forget to create a results and request dataset, with the correct format!
10
  # ----------------------------------
11
 
12
  REPO_ID = f"{OWNER}/open-japanese-llm-leaderboard"
@@ -14,7 +14,7 @@ QUEUE_REPO = f"{OWNER}/requests"
14
  RESULTS_REPO = f"{OWNER}/results"
15
 
16
  # If you setup a cache later, just change HF_HOME
17
- CACHE_PATH=os.getenv("HF_HOME", ".")
18
 
19
  # Local caches
20
  EVAL_REQUESTS_PATH = os.path.join(CACHE_PATH, "eval-queue")
 
4
 
5
  # Info to change for your repository
6
  # ----------------------------------
7
+ TOKEN = os.environ.get("HF_TOKEN") # A read/write token for your org
8
 
9
+ OWNER = "llm-jp" # Change to your org - don't forget to create a results and request dataset, with the correct format!
10
  # ----------------------------------
11
 
12
  REPO_ID = f"{OWNER}/open-japanese-llm-leaderboard"
 
14
  RESULTS_REPO = f"{OWNER}/results"
15
 
16
  # If you setup a cache later, just change HF_HOME
17
+ CACHE_PATH = os.getenv("HF_HOME", ".")
18
 
19
  # Local caches
20
  EVAL_REQUESTS_PATH = os.path.join(CACHE_PATH, "eval-queue")
src/leaderboard/read_evals.py CHANGED
@@ -1,37 +1,36 @@
1
  import glob
2
  import json
3
- import math
4
  import os
5
  from dataclasses import dataclass
6
- import dateutil
7
- import numpy as np
8
  from decimal import Decimal
9
 
 
 
10
  from src.display.formatting import make_clickable_model
11
- from src.display.utils import AutoEvalColumn, ModelType, Tasks, Precision, WeightType
12
  from src.submission.check_validity import is_model_on_hub
13
 
14
 
15
  @dataclass
16
  class EvalResult:
17
- """Represents one full evaluation. Built from a combination of the result and request file for a given run.
18
- """
19
- eval_name: str # org_model_precision (uid)
20
- full_model: str # org/model (path on hub)
21
- org: str
22
  model: str
23
- revision: str # commit hash, "" if main
24
  results: dict
25
  # precision: Precision = Precision.Unknown
26
- model_type: ModelType = ModelType.Unknown # Pretrained, fine tuned, ...
27
  precision: str = "Unknown"
28
  # model_type: str = "Unknown"
29
- weight_type: WeightType = WeightType.Original # Original or Adapter
30
- architecture: str = "Unknown"
31
  license: str = "?"
32
  likes: int = 0
33
  num_params: int = 0
34
- date: str = "" # submission date of request file
35
  still_on_hub: bool = False
36
  num_few_shots: str = "0"
37
  add_special_tokens: str = ""
@@ -47,7 +46,7 @@ class EvalResult:
47
  model_config = config.get("model", {})
48
 
49
  # Get model type from metainfo
50
- # model_type_str = metainfo.get("model_type", "")
51
  # model_type = ModelType.from_str(model_type_str)
52
  # model_type = metainfo.get("model_type", "Unknown")
53
 
@@ -59,13 +58,15 @@ class EvalResult:
59
  precision = model_config.get("dtype", "Unknown")
60
 
61
  # Add Special Tokens
62
- add_special_tokens = str(config.get("pipeline_kwargs",{"add_special_tokens":"Unknown"}).get("add_special_tokens"))
 
 
63
 
64
  # Get model and org
65
  # org_and_model = config.get("model_name", config.get("offline_inference").get("model_name", None))
66
  org_and_model = config.get("model_name", config.get("offline_inference", {}).get("model_name", "Unknown"))
67
  org_and_model = org_and_model.split("/", 1)
68
-
69
  # org_and_modelがリストの場合、"/"で結合
70
  if isinstance(org_and_model, list):
71
  full_model = "/".join(org_and_model)
@@ -92,7 +93,7 @@ class EvalResult:
92
  architectures = getattr(model_config, "architectures", None)
93
  if architectures:
94
  architecture = ";".join(architectures)
95
-
96
  if "scores" not in data:
97
  raise KeyError(f"'scores' key not found in JSON file: {json_filepath}")
98
 
@@ -103,7 +104,6 @@ class EvalResult:
103
  score = scores.get(task_value.metric)
104
  results[task_value.metric] = score
105
 
106
-
107
  return self(
108
  eval_name=result_key,
109
  full_model=full_model,
@@ -121,12 +121,6 @@ class EvalResult:
121
  def update_with_request_file(self, requests_path):
122
  """Finds the relevant request file for the current model and updates info with it"""
123
  request_file = get_request_file_for_model(requests_path, self.full_model, self.precision)
124
- if request_file:
125
- with open(request_file, "r") as f:
126
- request_data = json.load(f)
127
- else:
128
- print("No request file found.")
129
-
130
  try:
131
  with open(request_file, "r") as f:
132
  request = json.load(f)
@@ -186,17 +180,15 @@ def get_request_file_for_model(requests_path, model_name, precision):
186
  for tmp_request_file in request_files:
187
  with open(tmp_request_file, "r") as f:
188
  req_content = json.load(f)
189
- if (
190
- req_content["status"] in ["FINISHED"]
191
- and req_content["precision"] == precision.split(".")[-1]
192
- ):
193
  request_file = tmp_request_file
194
  return request_file
195
 
 
196
  def get_raw_eval_results(results_path: str, requests_path: str) -> list[EvalResult]:
197
  """From the path of the results folder root, extract all needed info for results"""
198
  model_result_filepaths = []
199
-
200
  for root, _, files in os.walk(results_path):
201
  # We should only have json files in model results
202
  if len(files) == 0 or any([not f.endswith(".json") for f in files]):
@@ -210,7 +202,6 @@ def get_raw_eval_results(results_path: str, requests_path: str) -> list[EvalResu
210
 
211
  for file in files:
212
  model_result_filepaths.append(os.path.join(root, file))
213
-
214
 
215
  eval_results = {}
216
  for model_result_filepath in model_result_filepaths:
@@ -225,17 +216,14 @@ def get_raw_eval_results(results_path: str, requests_path: str) -> list[EvalResu
225
  else:
226
  eval_results[eval_name] = eval_result
227
 
228
- data_dict = eval_result.to_dict()
229
-
230
  results = []
231
  for v in eval_results.values():
232
  try:
233
- v.to_dict() # we test if the dict version is complete
234
  results.append(v)
235
  except KeyError: # not all eval values present
236
  continue
237
  # print(f"Processing file: {model_result_filepath}")
238
  # print(f"Eval result: {eval_result.to_dict()}")
239
 
240
-
241
- return results
 
1
  import glob
2
  import json
 
3
  import os
4
  from dataclasses import dataclass
 
 
5
  from decimal import Decimal
6
 
7
+ import dateutil
8
+
9
  from src.display.formatting import make_clickable_model
10
+ from src.display.utils import AutoEvalColumn, ModelType, Tasks, WeightType
11
  from src.submission.check_validity import is_model_on_hub
12
 
13
 
14
  @dataclass
15
  class EvalResult:
16
+ """Represents one full evaluation. Built from a combination of the result and request file for a given run."""
17
+
18
+ eval_name: str # org_model_precision (uid)
19
+ full_model: str # org/model (path on hub)
20
+ org: str
21
  model: str
22
+ revision: str # commit hash, "" if main
23
  results: dict
24
  # precision: Precision = Precision.Unknown
25
+ model_type: ModelType = ModelType.Unknown # Pretrained, fine tuned, ...
26
  precision: str = "Unknown"
27
  # model_type: str = "Unknown"
28
+ weight_type: WeightType = WeightType.Original # Original or Adapter
29
+ architecture: str = "Unknown"
30
  license: str = "?"
31
  likes: int = 0
32
  num_params: int = 0
33
+ date: str = "" # submission date of request file
34
  still_on_hub: bool = False
35
  num_few_shots: str = "0"
36
  add_special_tokens: str = ""
 
46
  model_config = config.get("model", {})
47
 
48
  # Get model type from metainfo
49
+ # model_type_str = metainfo.get("model_type", "")
50
  # model_type = ModelType.from_str(model_type_str)
51
  # model_type = metainfo.get("model_type", "Unknown")
52
 
 
58
  precision = model_config.get("dtype", "Unknown")
59
 
60
  # Add Special Tokens
61
+ add_special_tokens = str(
62
+ config.get("pipeline_kwargs", {"add_special_tokens": "Unknown"}).get("add_special_tokens")
63
+ )
64
 
65
  # Get model and org
66
  # org_and_model = config.get("model_name", config.get("offline_inference").get("model_name", None))
67
  org_and_model = config.get("model_name", config.get("offline_inference", {}).get("model_name", "Unknown"))
68
  org_and_model = org_and_model.split("/", 1)
69
+
70
  # org_and_modelがリストの場合、"/"で結合
71
  if isinstance(org_and_model, list):
72
  full_model = "/".join(org_and_model)
 
93
  architectures = getattr(model_config, "architectures", None)
94
  if architectures:
95
  architecture = ";".join(architectures)
96
+
97
  if "scores" not in data:
98
  raise KeyError(f"'scores' key not found in JSON file: {json_filepath}")
99
 
 
104
  score = scores.get(task_value.metric)
105
  results[task_value.metric] = score
106
 
 
107
  return self(
108
  eval_name=result_key,
109
  full_model=full_model,
 
121
  def update_with_request_file(self, requests_path):
122
  """Finds the relevant request file for the current model and updates info with it"""
123
  request_file = get_request_file_for_model(requests_path, self.full_model, self.precision)
 
 
 
 
 
 
124
  try:
125
  with open(request_file, "r") as f:
126
  request = json.load(f)
 
180
  for tmp_request_file in request_files:
181
  with open(tmp_request_file, "r") as f:
182
  req_content = json.load(f)
183
+ if req_content["status"] in ["FINISHED"] and req_content["precision"] == precision.split(".")[-1]:
 
 
 
184
  request_file = tmp_request_file
185
  return request_file
186
 
187
+
188
  def get_raw_eval_results(results_path: str, requests_path: str) -> list[EvalResult]:
189
  """From the path of the results folder root, extract all needed info for results"""
190
  model_result_filepaths = []
191
+
192
  for root, _, files in os.walk(results_path):
193
  # We should only have json files in model results
194
  if len(files) == 0 or any([not f.endswith(".json") for f in files]):
 
202
 
203
  for file in files:
204
  model_result_filepaths.append(os.path.join(root, file))
 
205
 
206
  eval_results = {}
207
  for model_result_filepath in model_result_filepaths:
 
216
  else:
217
  eval_results[eval_name] = eval_result
218
 
 
 
219
  results = []
220
  for v in eval_results.values():
221
  try:
222
+ v.to_dict() # we test if the dict version is complete
223
  results.append(v)
224
  except KeyError: # not all eval values present
225
  continue
226
  # print(f"Processing file: {model_result_filepath}")
227
  # print(f"Eval result: {eval_result.to_dict()}")
228
 
229
+ return results
 
src/submission/check_validity.py CHANGED
@@ -1,8 +1,6 @@
1
  import json
2
  import os
3
- import re
4
  from collections import defaultdict
5
- from datetime import datetime, timedelta, timezone
6
 
7
  import huggingface_hub
8
  from huggingface_hub import ModelCard
@@ -10,6 +8,7 @@ from huggingface_hub.hf_api import ModelInfo
10
  from transformers import AutoConfig
11
  from transformers.models.auto.tokenization_auto import AutoTokenizer
12
 
 
13
  def check_model_card(repo_id: str) -> tuple[bool, str]:
14
  """Checks if the model card and license exist and have been filled"""
15
  try:
@@ -31,31 +30,38 @@ def check_model_card(repo_id: str) -> tuple[bool, str]:
31
 
32
  return True, ""
33
 
34
- def is_model_on_hub(model_name: str, revision: str, token: str = None, trust_remote_code=False, test_tokenizer=False) -> tuple[bool, str]:
 
 
 
35
  """Checks if the model model_name is on the hub, and whether it (and its tokenizer) can be loaded with AutoClasses."""
36
  try:
37
- config = AutoConfig.from_pretrained(model_name, revision=revision, trust_remote_code=trust_remote_code, token=token)
 
 
38
  if test_tokenizer:
39
  try:
40
- tk = AutoTokenizer.from_pretrained(model_name, revision=revision, trust_remote_code=trust_remote_code, token=token)
 
 
41
  except ValueError as e:
 
 
42
  return (
43
  False,
44
- f"uses a tokenizer which is not in a transformers release: {e}",
45
- None
46
  )
47
- except Exception as e:
48
- return (False, "'s tokenizer cannot be loaded. Is your tokenizer class in a stable transformers release, and correctly configured?", None)
49
  return True, None, config
50
 
51
  except ValueError:
52
  return (
53
  False,
54
  "needs to be launched with `trust_remote_code=True`. For safety reason, we do not allow these models to be automatically submitted to the leaderboard.",
55
- None
56
  )
57
 
58
- except Exception as e:
59
  return False, "was not found on hub!", None
60
 
61
 
@@ -70,10 +76,12 @@ def get_model_size(model_info: ModelInfo, precision: str):
70
  model_size = size_factor * model_size
71
  return model_size
72
 
 
73
  def get_model_arch(model_info: ModelInfo):
74
  """Gets the model architecture from the configuration"""
75
  return model_info.config.get("architectures", "Unknown")
76
 
 
77
  def already_submitted_models(requested_models_dir: str) -> set[str]:
78
  """Gather a list of already submitted models to avoid duplicates"""
79
  depth = 1
@@ -88,7 +96,7 @@ def already_submitted_models(requested_models_dir: str) -> set[str]:
88
  continue
89
  with open(os.path.join(root, file), "r") as f:
90
  info = json.load(f)
91
- if info['status'] == 'FAILED':
92
  continue
93
  file_names.append(f"{info['model']}_{info['precision']}_{info['add_special_tokens']}")
94
 
 
1
  import json
2
  import os
 
3
  from collections import defaultdict
 
4
 
5
  import huggingface_hub
6
  from huggingface_hub import ModelCard
 
8
  from transformers import AutoConfig
9
  from transformers.models.auto.tokenization_auto import AutoTokenizer
10
 
11
+
12
  def check_model_card(repo_id: str) -> tuple[bool, str]:
13
  """Checks if the model card and license exist and have been filled"""
14
  try:
 
30
 
31
  return True, ""
32
 
33
+
34
+ def is_model_on_hub(
35
+ model_name: str, revision: str, token: str = None, trust_remote_code=False, test_tokenizer=False
36
+ ) -> tuple[bool, str]:
37
  """Checks if the model model_name is on the hub, and whether it (and its tokenizer) can be loaded with AutoClasses."""
38
  try:
39
+ config = AutoConfig.from_pretrained(
40
+ model_name, revision=revision, trust_remote_code=trust_remote_code, token=token
41
+ )
42
  if test_tokenizer:
43
  try:
44
+ AutoTokenizer.from_pretrained(
45
+ model_name, revision=revision, trust_remote_code=trust_remote_code, token=token
46
+ )
47
  except ValueError as e:
48
+ return (False, f"uses a tokenizer which is not in a transformers release: {e}", None)
49
+ except Exception:
50
  return (
51
  False,
52
+ "'s tokenizer cannot be loaded. Is your tokenizer class in a stable transformers release, and correctly configured?",
53
+ None,
54
  )
 
 
55
  return True, None, config
56
 
57
  except ValueError:
58
  return (
59
  False,
60
  "needs to be launched with `trust_remote_code=True`. For safety reason, we do not allow these models to be automatically submitted to the leaderboard.",
61
+ None,
62
  )
63
 
64
+ except Exception:
65
  return False, "was not found on hub!", None
66
 
67
 
 
76
  model_size = size_factor * model_size
77
  return model_size
78
 
79
+
80
  def get_model_arch(model_info: ModelInfo):
81
  """Gets the model architecture from the configuration"""
82
  return model_info.config.get("architectures", "Unknown")
83
 
84
+
85
  def already_submitted_models(requested_models_dir: str) -> set[str]:
86
  """Gather a list of already submitted models to avoid duplicates"""
87
  depth = 1
 
96
  continue
97
  with open(os.path.join(root, file), "r") as f:
98
  info = json.load(f)
99
+ if info["status"] == "FAILED":
100
  continue
101
  file_names.append(f"{info['model']}_{info['precision']}_{info['add_special_tokens']}")
102
 
src/submission/submit.py CHANGED
@@ -3,17 +3,13 @@ import os
3
  from datetime import datetime, timezone
4
 
5
  from src.display.formatting import styled_error, styled_message, styled_warning
6
- from src.envs import API, EVAL_REQUESTS_PATH, TOKEN, QUEUE_REPO
7
- from src.submission.check_validity import (
8
- already_submitted_models,
9
- check_model_card,
10
- get_model_size,
11
- is_model_on_hub,
12
- )
13
 
14
  REQUESTED_MODELS = None
15
  USERS_TO_SUBMISSION_DATES = None
16
 
 
17
  def add_new_eval(
18
  model: str,
19
  revision: str,
 
3
  from datetime import datetime, timezone
4
 
5
  from src.display.formatting import styled_error, styled_message, styled_warning
6
+ from src.envs import API, EVAL_REQUESTS_PATH, QUEUE_REPO, TOKEN
7
+ from src.submission.check_validity import already_submitted_models, check_model_card, get_model_size, is_model_on_hub
 
 
 
 
 
8
 
9
  REQUESTED_MODELS = None
10
  USERS_TO_SUBMISSION_DATES = None
11
 
12
+
13
  def add_new_eval(
14
  model: str,
15
  revision: str,