Clémentine commited on
Commit
9d22eee
1 Parent(s): 7302987

simplified some parts of the code + updated requirements

Browse files
app.py CHANGED
@@ -22,6 +22,8 @@ from src.display.utils import (
22
  AutoEvalColumn,
23
  ModelType,
24
  fields,
 
 
25
  )
26
  from src.envs import API, EVAL_REQUESTS_PATH, EVAL_RESULTS_PATH, H4_TOKEN, IS_PUBLIC, QUEUE_REPO, REPO_ID, RESULTS_REPO
27
  from src.populate import get_evaluation_queue_df, get_leaderboard_df
@@ -37,25 +39,25 @@ from src.tools.plots import (
37
 
38
  def restart_space():
39
  API.restart_space(repo_id=REPO_ID, token=H4_TOKEN)
40
-
41
- try:
42
- print(EVAL_REQUESTS_PATH)
43
- snapshot_download(
44
- repo_id=QUEUE_REPO, local_dir=EVAL_REQUESTS_PATH, repo_type="dataset", tqdm_class=None, etag_timeout=30
45
- )
46
- except Exception:
47
- restart_space()
48
- try:
49
- print(EVAL_RESULTS_PATH)
50
- snapshot_download(
51
- repo_id=RESULTS_REPO, local_dir=EVAL_RESULTS_PATH, repo_type="dataset", tqdm_class=None, etag_timeout=30
52
- )
53
- except Exception:
54
- restart_space()
55
 
56
 
57
  raw_data, original_df = get_leaderboard_df(EVAL_RESULTS_PATH, EVAL_REQUESTS_PATH, COLS, BENCHMARK_COLS)
58
- update_collections(original_df.copy())
59
  leaderboard_df = original_df.copy()
60
 
61
  plot_df = create_plot_df(create_scores_df(raw_data))
@@ -186,8 +188,8 @@ with demo:
186
  )
187
  filter_columns_precision = gr.CheckboxGroup(
188
  label="Precision",
189
- choices=["torch.float16", "torch.bfloat16", "torch.float32", "8bit", "4bit", "GPTQ"],
190
- value=["torch.float16", "torch.bfloat16", "torch.float32", "8bit", "4bit", "GPTQ"],
191
  interactive=True,
192
  elem_id="filter-columns-precision",
193
  )
@@ -317,7 +319,7 @@ with demo:
317
  revision_name_textbox = gr.Textbox(label="Revision commit", placeholder="main")
318
  private = gr.Checkbox(False, label="Private", visible=not IS_PUBLIC)
319
  model_type = gr.Dropdown(
320
- choices=[t.to_str(" : ") for t in ModelType],
321
  label="Model type",
322
  multiselect=False,
323
  value=None,
@@ -326,14 +328,14 @@ with demo:
326
 
327
  with gr.Column():
328
  precision = gr.Dropdown(
329
- choices=["float16", "bfloat16", "8bit (LLM.int8)", "4bit (QLoRA / FP4)", "GPTQ"],
330
  label="Precision",
331
  multiselect=False,
332
  value="float16",
333
  interactive=True,
334
  )
335
  weight_type = gr.Dropdown(
336
- choices=["Original", "Delta", "Adapter"],
337
  label="Weights type",
338
  multiselect=False,
339
  value="Original",
 
22
  AutoEvalColumn,
23
  ModelType,
24
  fields,
25
+ WeightType,
26
+ Precision
27
  )
28
  from src.envs import API, EVAL_REQUESTS_PATH, EVAL_RESULTS_PATH, H4_TOKEN, IS_PUBLIC, QUEUE_REPO, REPO_ID, RESULTS_REPO
29
  from src.populate import get_evaluation_queue_df, get_leaderboard_df
 
39
 
40
  def restart_space():
41
  API.restart_space(repo_id=REPO_ID, token=H4_TOKEN)
42
+ if False:
43
+ try:
44
+ print(EVAL_REQUESTS_PATH)
45
+ snapshot_download(
46
+ repo_id=QUEUE_REPO, local_dir=EVAL_REQUESTS_PATH, repo_type="dataset", tqdm_class=None, etag_timeout=30
47
+ )
48
+ except Exception:
49
+ restart_space()
50
+ try:
51
+ print(EVAL_RESULTS_PATH)
52
+ snapshot_download(
53
+ repo_id=RESULTS_REPO, local_dir=EVAL_RESULTS_PATH, repo_type="dataset", tqdm_class=None, etag_timeout=30
54
+ )
55
+ except Exception:
56
+ restart_space()
57
 
58
 
59
  raw_data, original_df = get_leaderboard_df(EVAL_RESULTS_PATH, EVAL_REQUESTS_PATH, COLS, BENCHMARK_COLS)
60
+ # update_collections(original_df.copy())
61
  leaderboard_df = original_df.copy()
62
 
63
  plot_df = create_plot_df(create_scores_df(raw_data))
 
188
  )
189
  filter_columns_precision = gr.CheckboxGroup(
190
  label="Precision",
191
+ choices=[i.value.name for i in Precision],
192
+ value=[i.value.name for i in Precision],
193
  interactive=True,
194
  elem_id="filter-columns-precision",
195
  )
 
319
  revision_name_textbox = gr.Textbox(label="Revision commit", placeholder="main")
320
  private = gr.Checkbox(False, label="Private", visible=not IS_PUBLIC)
321
  model_type = gr.Dropdown(
322
+ choices=[t.to_str(" : ") for t in ModelType if t != ModelType.Unknown],
323
  label="Model type",
324
  multiselect=False,
325
  value=None,
 
328
 
329
  with gr.Column():
330
  precision = gr.Dropdown(
331
+ choices=[i.value.name for i in Precision if i != Precision.Unknown],
332
  label="Precision",
333
  multiselect=False,
334
  value="float16",
335
  interactive=True,
336
  )
337
  weight_type = gr.Dropdown(
338
+ choices=[i.value.name for i in WeightType],
339
  label="Weights type",
340
  multiselect=False,
341
  value="Original",
requirements.txt CHANGED
@@ -15,6 +15,5 @@ python-dateutil==2.8.2
15
  requests==2.28.2
16
  semantic-version==2.10.0
17
  tqdm==4.65.0
18
- git+https://github.com/clefourrier/transformers.git@req-fix#egg=transformers
19
- #transformers==4.35.1
20
  tokenizers>=0.15.0
 
15
  requests==2.28.2
16
  semantic-version==2.10.0
17
  tqdm==4.65.0
18
+ transformers==4.35.2
 
19
  tokenizers>=0.15.0
src/display/formatting.py CHANGED
@@ -7,23 +7,6 @@ from huggingface_hub.hf_api import ModelInfo
7
 
8
  API = HfApi()
9
 
10
- LLAMAS = [
11
- "huggingface/llama-7b",
12
- "huggingface/llama-13b",
13
- "huggingface/llama-30b",
14
- "huggingface/llama-65b",
15
- ]
16
-
17
- KOALA_LINK = "https://huggingface.co/TheBloke/koala-13B-HF"
18
- VICUNA_LINK = "https://huggingface.co/lmsys/vicuna-13b-delta-v1.1"
19
- OASST_LINK = "https://huggingface.co/OpenAssistant/oasst-sft-4-pythia-12b-epoch-3.5"
20
- DOLLY_LINK = "https://huggingface.co/databricks/dolly-v2-12b"
21
- MODEL_PAGE = "https://huggingface.co/models"
22
- LLAMA_LINK = "https://ai.facebook.com/blog/large-language-model-llama-meta-ai/"
23
- VICUNA_LINK = "https://huggingface.co/CarperAI/stable-vicuna-13b-delta"
24
- ALPACA_LINK = "https://crfm.stanford.edu/2023/03/13/alpaca.html"
25
-
26
-
27
  def model_hyperlink(link, model_name):
28
  return f'<a target="_blank" href="{link}" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">{model_name}</a>'
29
 
@@ -31,44 +14,9 @@ def model_hyperlink(link, model_name):
31
  def make_clickable_model(model_name):
32
  link = f"https://huggingface.co/{model_name}"
33
 
34
- if model_name in LLAMAS:
35
- link = LLAMA_LINK
36
- model_name = model_name.split("/")[1]
37
- elif model_name == "HuggingFaceH4/stable-vicuna-13b-2904":
38
- link = VICUNA_LINK
39
- model_name = "stable-vicuna-13b"
40
- elif model_name == "HuggingFaceH4/llama-7b-ift-alpaca":
41
- link = ALPACA_LINK
42
- model_name = "alpaca-13b"
43
- if model_name == "dolly-12b":
44
- link = DOLLY_LINK
45
- elif model_name == "vicuna-13b":
46
- link = VICUNA_LINK
47
- elif model_name == "koala-13b":
48
- link = KOALA_LINK
49
- elif model_name == "oasst-12b":
50
- link = OASST_LINK
51
-
52
  details_model_name = model_name.replace("/", "__")
53
  details_link = f"https://huggingface.co/datasets/open-llm-leaderboard/details_{details_model_name}"
54
 
55
- if not bool(os.getenv("DEBUG", "False")):
56
- # We only add these checks when not debugging, as they are extremely slow
57
- print(f"details_link: {details_link}")
58
- try:
59
- check_path = list(
60
- API.list_files_info(
61
- repo_id=f"open-llm-leaderboard/details_{details_model_name}",
62
- paths="README.md",
63
- repo_type="dataset",
64
- )
65
- )
66
- print(f"check_path: {check_path}")
67
- except Exception as err:
68
- # No details repo for this model
69
- print(f"No details repo for this model: {err}")
70
- return model_hyperlink(link, model_name)
71
-
72
  return model_hyperlink(link, model_name) + " " + model_hyperlink(details_link, "📑")
73
 
74
 
 
7
 
8
  API = HfApi()
9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
  def model_hyperlink(link, model_name):
11
  return f'<a target="_blank" href="{link}" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">{model_name}</a>'
12
 
 
14
  def make_clickable_model(model_name):
15
  link = f"https://huggingface.co/{model_name}"
16
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
  details_model_name = model_name.replace("/", "__")
18
  details_link = f"https://huggingface.co/datasets/open-llm-leaderboard/details_{details_model_name}"
19
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20
  return model_hyperlink(link, model_name) + " " + model_hyperlink(details_link, "📑")
21
 
22
 
src/display/utils.py CHANGED
@@ -1,8 +1,26 @@
1
- from dataclasses import dataclass
2
  from enum import Enum
3
 
4
  import pandas as pd
5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
 
7
  # These classes are for user facing column names,
8
  # to avoid having to change them all around the code
@@ -16,36 +34,29 @@ class ColumnContent:
16
  never_hidden: bool = False
17
  dummy: bool = False
18
 
19
-
20
- def fields(raw_class):
21
- return [v for k, v in raw_class.__dict__.items() if k[:2] != "__" and k[-2:] != "__"]
22
-
23
-
24
- @dataclass(frozen=True)
25
- class AutoEvalColumn: # Auto evals column
26
- model_type_symbol = ColumnContent("T", "str", True, never_hidden=True)
27
- model = ColumnContent("Model", "markdown", True, never_hidden=True)
28
- average = ColumnContent("Average ⬆️", "number", True)
29
- arc = ColumnContent("ARC", "number", True)
30
- hellaswag = ColumnContent("HellaSwag", "number", True)
31
- mmlu = ColumnContent("MMLU", "number", True)
32
- truthfulqa = ColumnContent("TruthfulQA", "number", True)
33
- winogrande = ColumnContent("Winogrande", "number", True)
34
- gsm8k = ColumnContent("GSM8K", "number", True)
35
- drop = ColumnContent("DROP", "number", True)
36
- model_type = ColumnContent("Type", "str", False)
37
- architecture = ColumnContent("Architecture", "str", False)
38
- weight_type = ColumnContent("Weight type", "str", False, True)
39
- precision = ColumnContent("Precision", "str", False) # , True)
40
- license = ColumnContent("Hub License", "str", False)
41
- params = ColumnContent("#Params (B)", "number", False)
42
- likes = ColumnContent("Hub ❤️", "number", False)
43
- still_on_hub = ColumnContent("Available on the hub", "bool", False)
44
- revision = ColumnContent("Model sha", "str", False, False)
45
- dummy = ColumnContent(
46
- "model_name_for_query", "str", False, dummy=True
47
- ) # dummy col to implement search bar (hidden by custom CSS)
48
-
49
 
50
  @dataclass(frozen=True)
51
  class EvalQueueColumn: # Queue column
@@ -99,17 +110,17 @@ human_baseline_row = {
99
  }
100
 
101
  @dataclass
102
- class ModelTypeDetails:
103
  name: str
104
- symbol: str # emoji
105
 
106
 
107
  class ModelType(Enum):
108
- PT = ModelTypeDetails(name="pretrained", symbol="🟢")
109
- FT = ModelTypeDetails(name="fine-tuned", symbol="🔶")
110
- IFT = ModelTypeDetails(name="instruction-tuned", symbol="⭕")
111
- RL = ModelTypeDetails(name="RL-tuned", symbol="🟦")
112
- Unknown = ModelTypeDetails(name="", symbol="?")
113
 
114
  def to_str(self, separator=" "):
115
  return f"{self.value.symbol}{separator}{self.value.name}"
@@ -126,22 +137,33 @@ class ModelType(Enum):
126
  return ModelType.IFT
127
  return ModelType.Unknown
128
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
129
 
130
- @dataclass
131
- class Task:
132
- benchmark: str
133
- metric: str
134
- col_name: str
135
-
136
-
137
- class Tasks(Enum):
138
- arc = Task("arc:challenge", "acc_norm", AutoEvalColumn.arc.name)
139
- hellaswag = Task("hellaswag", "acc_norm", AutoEvalColumn.hellaswag.name)
140
- mmlu = Task("hendrycksTest", "acc", AutoEvalColumn.mmlu.name)
141
- truthfulqa = Task("truthfulqa:mc", "mc2", AutoEvalColumn.truthfulqa.name)
142
- winogrande = Task("winogrande", "acc", AutoEvalColumn.winogrande.name)
143
- gsm8k = Task("gsm8k", "acc", AutoEvalColumn.gsm8k.name)
144
- drop = Task("drop", "f1", AutoEvalColumn.drop.name)
145
 
146
 
147
  # Column selection
 
1
+ from dataclasses import dataclass, make_dataclass
2
  from enum import Enum
3
 
4
  import pandas as pd
5
 
6
+ def fields(raw_class):
7
+ return [v for k, v in raw_class.__dict__.items() if k[:2] != "__" and k[-2:] != "__"]
8
+
9
+
10
+ @dataclass
11
+ class Task:
12
+ benchmark: str
13
+ metric: str
14
+ col_name: str
15
+
16
+ class Tasks(Enum):
17
+ arc = Task("arc:challenge", "acc_norm", "ARC")
18
+ hellaswag = Task("hellaswag", "acc_norm", "HellaSwag")
19
+ mmlu = Task("hendrycksTest", "acc", "MMLU")
20
+ truthfulqa = Task("truthfulqa:mc", "mc2", "TruthfulQA")
21
+ winogrande = Task("winogrande", "acc", "Winogrande")
22
+ gsm8k = Task("gsm8k", "acc", "GSM8K")
23
+ drop = Task("drop", "f1", "DROP")
24
 
25
  # These classes are for user facing column names,
26
  # to avoid having to change them all around the code
 
34
  never_hidden: bool = False
35
  dummy: bool = False
36
 
37
+ auto_eval_column_dict = []
38
+ # Init
39
+ auto_eval_column_dict.append(["model_type_symbol", ColumnContent, ColumnContent("T", "str", True, never_hidden=True)])
40
+ auto_eval_column_dict.append(["model", ColumnContent, ColumnContent("Model", "markdown", True, never_hidden=True)])
41
+ #Scores
42
+ auto_eval_column_dict.append(["average", ColumnContent, ColumnContent("Average ⬆️", "number", True)])
43
+ for task in Tasks:
44
+ auto_eval_column_dict.append([task.name, ColumnContent, ColumnContent(task.value.col_name, "number", True)])
45
+ # Model information
46
+ auto_eval_column_dict.append(["model_type", ColumnContent, ColumnContent("Type", "str", False)])
47
+ auto_eval_column_dict.append(["architecture", ColumnContent, ColumnContent("Architecture", "str", False)])
48
+ auto_eval_column_dict.append(["weight_type", ColumnContent, ColumnContent("Weight type", "str", False, True)])
49
+ auto_eval_column_dict.append(["precision", ColumnContent, ColumnContent("Precision", "str", False)])
50
+ auto_eval_column_dict.append(["license", ColumnContent, ColumnContent("Hub License", "str", False)])
51
+ auto_eval_column_dict.append(["params", ColumnContent, ColumnContent("#Params (B)", "number", False)])
52
+ auto_eval_column_dict.append(["likes", ColumnContent, ColumnContent("Hub ❤️", "number", False)])
53
+ auto_eval_column_dict.append(["still_on_hub", ColumnContent, ColumnContent("Available on the hub", "bool", False)])
54
+ auto_eval_column_dict.append(["revision", ColumnContent, ColumnContent("Model sha", "str", False, False)])
55
+ # Dummy column for the search bar (hidden by the custom CSS)
56
+ auto_eval_column_dict.append(["dummy", ColumnContent, ColumnContent("model_name_for_query", "str", False, dummy=True)])
57
+
58
+ # We use make dataclass to dynamically fill the scores from Tasks
59
+ AutoEvalColumn = make_dataclass("AutoEvalColumn", auto_eval_column_dict, frozen=True)
 
 
 
 
 
 
 
60
 
61
  @dataclass(frozen=True)
62
  class EvalQueueColumn: # Queue column
 
110
  }
111
 
112
  @dataclass
113
+ class ModelDetails:
114
  name: str
115
+ symbol: str = "" # emoji, only for the model type
116
 
117
 
118
  class ModelType(Enum):
119
+ PT = ModelDetails(name="pretrained", symbol="🟢")
120
+ FT = ModelDetails(name="fine-tuned", symbol="🔶")
121
+ IFT = ModelDetails(name="instruction-tuned", symbol="⭕")
122
+ RL = ModelDetails(name="RL-tuned", symbol="🟦")
123
+ Unknown = ModelDetails(name="", symbol="?")
124
 
125
  def to_str(self, separator=" "):
126
  return f"{self.value.symbol}{separator}{self.value.name}"
 
137
  return ModelType.IFT
138
  return ModelType.Unknown
139
 
140
+ class WeightType(Enum):
141
+ Adapter = ModelDetails("Adapter")
142
+ Original = ModelDetails("Original")
143
+ Delta = ModelDetails("Delta")
144
+
145
+ class Precision(Enum):
146
+ float16 = ModelDetails("float16")
147
+ bfloat16 = ModelDetails("bfloat16")
148
+ qt_8bit = ModelDetails("8bit")
149
+ qt_4bit = ModelDetails("4bit")
150
+ qt_GPTQ = ModelDetails("GPTQ")
151
+ Unknown = ModelDetails("?")
152
+
153
+ def from_str(precision):
154
+ if precision in ["torch.float16", "float16"]:
155
+ return Precision.float16
156
+ if precision in ["torch.bfloat16", "bfloat16"]:
157
+ return Precision.bfloat16
158
+ if precision in ["8bit"]:
159
+ return Precision.qt_8bit
160
+ if precision in ["4bit"]:
161
+ return Precision.qt_4bit
162
+ if precision in ["GPTQ", "None"]:
163
+ return Precision.qt_GPTQ
164
+ return Precision.Unknown
165
+
166
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
167
 
168
 
169
  # Column selection
src/leaderboard/read_evals.py CHANGED
@@ -10,7 +10,7 @@ from transformers import AutoConfig
10
  import numpy as np
11
 
12
  from src.display.formatting import make_clickable_model
13
- from src.display.utils import AutoEvalColumn, ModelType, Tasks
14
  from src.submission.check_validity import is_model_on_hub
15
 
16
 
@@ -23,9 +23,9 @@ class EvalResult:
23
  model: str
24
  revision: str # commit hash, "" if main
25
  results: dict
26
- precision: str = ""
27
  model_type: ModelType = ModelType.Unknown # Pretrained, fine tuned, ...
28
- weight_type: str = "Original" # Original or Adapter
29
  architecture: str = "Unknown" # From config file
30
  license: str = "?"
31
  likes: int = 0
@@ -43,9 +43,7 @@ class EvalResult:
43
  config = data.get("config", data.get("config_general", None))
44
 
45
  # Precision
46
- precision = config.get("model_dtype")
47
- if precision == "None":
48
- precision = "GPTQ"
49
 
50
  # Get model and org
51
  org_and_model = config.get("model_name", config.get("model_args", None))
@@ -54,15 +52,15 @@ class EvalResult:
54
  if len(org_and_model) == 1:
55
  org = None
56
  model = org_and_model[0]
57
- result_key = f"{model}_{precision}"
58
  else:
59
  org = org_and_model[0]
60
  model = org_and_model[1]
61
- result_key = f"{org}_{model}_{precision}"
62
  full_model = "/".join(org_and_model)
63
 
64
  still_on_hub, error, model_config = is_model_on_hub(
65
- full_model, config.get("model_sha", "main"), trust_remote_code=True
66
  )
67
  architecture = "?"
68
  if model_config is not None:
@@ -112,13 +110,13 @@ class EvalResult:
112
 
113
  def update_with_request_file(self, requests_path):
114
  """Finds the relevant request file for the current model and updates info with it"""
115
- request_file = get_request_file_for_model(requests_path, self.full_model, self.precision)
116
 
117
  try:
118
  with open(request_file, "r") as f:
119
  request = json.load(f)
120
  self.model_type = ModelType.from_str(request.get("model_type", ""))
121
- self.weight_type = request.get("weight_type", "?")
122
  self.license = request.get("license", "?")
123
  self.likes = request.get("likes", 0)
124
  self.num_params = request.get("params", 0)
@@ -131,10 +129,10 @@ class EvalResult:
131
  average = sum([v for v in self.results.values() if v is not None]) / len(Tasks)
132
  data_dict = {
133
  "eval_name": self.eval_name, # not a column, just a save name,
134
- AutoEvalColumn.precision.name: self.precision,
135
  AutoEvalColumn.model_type.name: self.model_type.value.name,
136
  AutoEvalColumn.model_type_symbol.name: self.model_type.value.symbol,
137
- AutoEvalColumn.weight_type.name: self.weight_type,
138
  AutoEvalColumn.architecture.name: self.architecture,
139
  AutoEvalColumn.model.name: make_clickable_model(self.full_model),
140
  AutoEvalColumn.dummy.name: self.full_model,
@@ -167,7 +165,7 @@ def get_request_file_for_model(requests_path, model_name, precision):
167
  with open(tmp_request_file, "r") as f:
168
  req_content = json.load(f)
169
  if (
170
- req_content["status"] in ["FINISHED", "PENDING_NEW_EVAL"]
171
  and req_content["precision"] == precision.split(".")[-1]
172
  ):
173
  request_file = tmp_request_file
 
10
  import numpy as np
11
 
12
  from src.display.formatting import make_clickable_model
13
+ from src.display.utils import AutoEvalColumn, ModelType, Tasks, Precision, WeightType
14
  from src.submission.check_validity import is_model_on_hub
15
 
16
 
 
23
  model: str
24
  revision: str # commit hash, "" if main
25
  results: dict
26
+ precision: Precision = Precision.Unknown
27
  model_type: ModelType = ModelType.Unknown # Pretrained, fine tuned, ...
28
+ weight_type: WeightType = WeightType.Original # Original or Adapter
29
  architecture: str = "Unknown" # From config file
30
  license: str = "?"
31
  likes: int = 0
 
43
  config = data.get("config", data.get("config_general", None))
44
 
45
  # Precision
46
+ precision = Precision.from_str(config.get("model_dtype"))
 
 
47
 
48
  # Get model and org
49
  org_and_model = config.get("model_name", config.get("model_args", None))
 
52
  if len(org_and_model) == 1:
53
  org = None
54
  model = org_and_model[0]
55
+ result_key = f"{model}_{precision.value.name}"
56
  else:
57
  org = org_and_model[0]
58
  model = org_and_model[1]
59
+ result_key = f"{org}_{model}_{precision.value.name}"
60
  full_model = "/".join(org_and_model)
61
 
62
  still_on_hub, error, model_config = is_model_on_hub(
63
+ full_model, config.get("model_sha", "main"), trust_remote_code=True, test_tokenizer=False
64
  )
65
  architecture = "?"
66
  if model_config is not None:
 
110
 
111
  def update_with_request_file(self, requests_path):
112
  """Finds the relevant request file for the current model and updates info with it"""
113
+ request_file = get_request_file_for_model(requests_path, self.full_model, self.precision.value.name)
114
 
115
  try:
116
  with open(request_file, "r") as f:
117
  request = json.load(f)
118
  self.model_type = ModelType.from_str(request.get("model_type", ""))
119
+ self.weight_type = WeightType[request.get("weight_type", "Original")]
120
  self.license = request.get("license", "?")
121
  self.likes = request.get("likes", 0)
122
  self.num_params = request.get("params", 0)
 
129
  average = sum([v for v in self.results.values() if v is not None]) / len(Tasks)
130
  data_dict = {
131
  "eval_name": self.eval_name, # not a column, just a save name,
132
+ AutoEvalColumn.precision.name: self.precision.value.name,
133
  AutoEvalColumn.model_type.name: self.model_type.value.name,
134
  AutoEvalColumn.model_type_symbol.name: self.model_type.value.symbol,
135
+ AutoEvalColumn.weight_type.name: self.weight_type.value.name,
136
  AutoEvalColumn.architecture.name: self.architecture,
137
  AutoEvalColumn.model.name: make_clickable_model(self.full_model),
138
  AutoEvalColumn.dummy.name: self.full_model,
 
165
  with open(tmp_request_file, "r") as f:
166
  req_content = json.load(f)
167
  if (
168
+ req_content["status"] in ["FINISHED"]
169
  and req_content["precision"] == precision.split(".")[-1]
170
  ):
171
  request_file = tmp_request_file
src/submission/check_validity.py CHANGED
@@ -87,8 +87,7 @@ def get_model_size(model_info: ModelInfo, precision: str):
87
  def get_model_arch(model_info: ModelInfo):
88
  return model_info.config.get("architectures", "Unknown")
89
 
90
- def user_submission_permission(submission_name, users_to_submission_dates, rate_limit_period, rate_limit_quota):
91
- org_or_user, _ = submission_name.split("/")
92
  if org_or_user not in users_to_submission_dates:
93
  return True, ""
94
  submission_dates = sorted(users_to_submission_dates[org_or_user])
 
87
  def get_model_arch(model_info: ModelInfo):
88
  return model_info.config.get("architectures", "Unknown")
89
 
90
+ def user_submission_permission(org_or_user, users_to_submission_dates, rate_limit_period, rate_limit_quota):
 
91
  if org_or_user not in users_to_submission_dates:
92
  return True, ""
93
  submission_dates = sorted(users_to_submission_dates[org_or_user])
src/submission/submit.py CHANGED
@@ -30,6 +30,11 @@ def add_new_eval(
30
  if not REQUESTED_MODELS:
31
  REQUESTED_MODELS, USERS_TO_SUBMISSION_DATES = already_submitted_models(EVAL_REQUESTS_PATH)
32
 
 
 
 
 
 
33
 
34
  precision = precision.split(" ")[0]
35
  current_time = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
@@ -38,11 +43,12 @@ def add_new_eval(
38
  return styled_error("Please select a model type.")
39
 
40
  # Is the user rate limited?
41
- user_can_submit, error_msg = user_submission_permission(
42
- model, USERS_TO_SUBMISSION_DATES, RATE_LIMIT_PERIOD, RATE_LIMIT_QUOTA
43
- )
44
- if not user_can_submit:
45
- return styled_error(error_msg)
 
46
 
47
  # Did the model authors forbid its submission to the leaderboard?
48
  if model in DO_NOT_SUBMIT_MODELS or base_model in DO_NOT_SUBMIT_MODELS:
@@ -99,12 +105,6 @@ def add_new_eval(
99
  "license": license,
100
  }
101
 
102
- user_name = ""
103
- model_path = model
104
- if "/" in model:
105
- user_name = model.split("/")[0]
106
- model_path = model.split("/")[1]
107
-
108
  # Check for duplicate submission
109
  if f"{model}_{revision}_{precision}" in REQUESTED_MODELS:
110
  return styled_warning("This model has been already submitted.")
 
30
  if not REQUESTED_MODELS:
31
  REQUESTED_MODELS, USERS_TO_SUBMISSION_DATES = already_submitted_models(EVAL_REQUESTS_PATH)
32
 
33
+ user_name = ""
34
+ model_path = model
35
+ if "/" in model:
36
+ user_name = model.split("/")[0]
37
+ model_path = model.split("/")[1]
38
 
39
  precision = precision.split(" ")[0]
40
  current_time = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
 
43
  return styled_error("Please select a model type.")
44
 
45
  # Is the user rate limited?
46
+ if user_name != "":
47
+ user_can_submit, error_msg = user_submission_permission(
48
+ user_name, USERS_TO_SUBMISSION_DATES, RATE_LIMIT_PERIOD, RATE_LIMIT_QUOTA
49
+ )
50
+ if not user_can_submit:
51
+ return styled_error(error_msg)
52
 
53
  # Did the model authors forbid its submission to the leaderboard?
54
  if model in DO_NOT_SUBMIT_MODELS or base_model in DO_NOT_SUBMIT_MODELS:
 
105
  "license": license,
106
  }
107
 
 
 
 
 
 
 
108
  # Check for duplicate submission
109
  if f"{model}_{revision}_{precision}" in REQUESTED_MODELS:
110
  return styled_warning("This model has been already submitted.")