Spaces:

llm-jp
/

open-japanese-llm-leaderboard

Running on CPU Upgrade

App Files Files Community

hysts HF Staff commited on Oct 14, 2024

Commit

b5474e9

1 Parent(s): 4138f92

Apply pre-commit

Browse files

Files changed (7) hide show

README.md +1 -1
src/about.py +12 -4
src/display/utils.py +13 -4
src/envs.py +3 -3
src/leaderboard/read_evals.py +24 -36
src/submission/check_validity.py +20 -12
src/submission/submit.py +3 -7

README.md CHANGED Viewed

@@ -39,7 +39,7 @@ If you encounter problem on the space, don't hesitate to restart it to remove th
 # Code logic for more complex edits
-You'll find
 - the main table' columns names and properties in `src/display/utils.py`
 - the logic to read all results and request files, then convert them in dataframe lines, in `src/leaderboard/read_evals.py`, and `src/populate.py`
 - teh logic to allow or filter submissions in `src/submission/submit.py` and `src/submission/check_validity.py`

 # Code logic for more complex edits
+You'll find
 - the main table' columns names and properties in `src/display/utils.py`
 - the logic to read all results and request files, then convert them in dataframe lines, in `src/leaderboard/read_evals.py`, and `src/populate.py`
 - teh logic to allow or filter submissions in `src/submission/submit.py` and `src/submission/check_validity.py`

src/about.py CHANGED Viewed

@@ -41,8 +41,12 @@ class Tasks(Enum):
     jsem_exact_match = Task("scores", "jsem_exact_match", "JSEM")
     jsick_exact_match = Task("scores", "jsick_exact_match", "JSICK")
     jsquad_char_f1 = Task("scores", "jsquad_char_f1", "JSquad")
-    jsts_pearson = Task("scores", "jsts_pearson", "JSTS (Pearson) - 意味的類似度")  # Semantic Textual Similarity - 意味的類似度
-    jsts_spearman = Task("scores", "jsts_spearman", "JSTS (Spearman) - 意味的類似度")  # Semantic Textual Similarity - 意味的類似度
     kuci_exact_match = Task("scores", "kuci_exact_match", "KUCI")
     mawps_exact_match = Task("scores", "mawps_exact_match", "MAWPS")
     mmlu_en_exact_match = Task("scores", "mmlu_en_exact_match", "MMLU")
@@ -52,10 +56,14 @@ class Tasks(Enum):
     wiki_ner_set_f1 = Task("scores", "wiki_ner_set_f1", "Wiki NER")
     wiki_pas_set_f1 = Task("scores", "wiki_pas_set_f1", "Wiki PAS")
     wiki_reading_char_f1 = Task("scores", "wiki_reading_char_f1", "Wiki Reading")
-    wikicorpus_e_to_j_bert_score_ja_f1 = Task("scores", "wikicorpus-e-to-j_bert_score_ja_f1", "WikiCorpus E to J BERT Score")
     wikicorpus_e_to_j_bleu_ja = Task("scores", "wikicorpus-e-to-j_bleu_ja", "WikiCorpus E to J BLEU")
     wikicorpus_e_to_j_comet_wmt22 = Task("scores", "wikicorpus-e-to-j_comet_wmt22", "WikiCorpus E to J COMET WMT22")
-    wikicorpus_j_to_e_bert_score_en_f1 = Task("scores", "wikicorpus-j-to-e_bert_score_en_f1", "WikiCorpus J to E BERT Score")
     wikicorpus_j_to_e_bleu_en = Task("scores", "wikicorpus-j-to-e_bleu_en", "WikiCorpus J to E BLEU")
     wikicorpus_j_to_e_comet_wmt22 = Task("scores", "wikicorpus-j-to-e_comet_wmt22", "WikiCorpus J to E COMET WMT22")
     xlsum_ja_bert_score_ja_f1 = Task("scores", "xlsum_ja_bert_score_ja_f1", "XL-Sum JA BERT Score")

     jsem_exact_match = Task("scores", "jsem_exact_match", "JSEM")
     jsick_exact_match = Task("scores", "jsick_exact_match", "JSICK")
     jsquad_char_f1 = Task("scores", "jsquad_char_f1", "JSquad")
+    jsts_pearson = Task(
+        "scores", "jsts_pearson", "JSTS (Pearson) - 意味的類似度"
+    )  # Semantic Textual Similarity - 意味的類似度
+    jsts_spearman = Task(
+        "scores", "jsts_spearman", "JSTS (Spearman) - 意味的類似度"
+    )  # Semantic Textual Similarity - 意味的類似度
     kuci_exact_match = Task("scores", "kuci_exact_match", "KUCI")
     mawps_exact_match = Task("scores", "mawps_exact_match", "MAWPS")
     mmlu_en_exact_match = Task("scores", "mmlu_en_exact_match", "MMLU")
     wiki_ner_set_f1 = Task("scores", "wiki_ner_set_f1", "Wiki NER")
     wiki_pas_set_f1 = Task("scores", "wiki_pas_set_f1", "Wiki PAS")
     wiki_reading_char_f1 = Task("scores", "wiki_reading_char_f1", "Wiki Reading")
+    wikicorpus_e_to_j_bert_score_ja_f1 = Task(
+        "scores", "wikicorpus-e-to-j_bert_score_ja_f1", "WikiCorpus E to J BERT Score"
+    )
     wikicorpus_e_to_j_bleu_ja = Task("scores", "wikicorpus-e-to-j_bleu_ja", "WikiCorpus E to J BLEU")
     wikicorpus_e_to_j_comet_wmt22 = Task("scores", "wikicorpus-e-to-j_comet_wmt22", "WikiCorpus E to J COMET WMT22")
+    wikicorpus_j_to_e_bert_score_en_f1 = Task(
+        "scores", "wikicorpus-j-to-e_bert_score_en_f1", "WikiCorpus J to E BERT Score"
+    )
     wikicorpus_j_to_e_bleu_en = Task("scores", "wikicorpus-j-to-e_bleu_en", "WikiCorpus J to E BLEU")
     wikicorpus_j_to_e_comet_wmt22 = Task("scores", "wikicorpus-j-to-e_comet_wmt22", "WikiCorpus J to E COMET WMT22")
     xlsum_ja_bert_score_ja_f1 = Task("scores", "xlsum_ja_bert_score_ja_f1", "XL-Sum JA BERT Score")

src/display/utils.py CHANGED Viewed

@@ -5,6 +5,7 @@ import pandas as pd
 from src.about import Tasks
 def fields(raw_class):
     return [v for k, v in raw_class.__dict__.items() if k[:2] != "__" and k[-2:] != "__"]
@@ -21,12 +22,13 @@ class ColumnContent:
     never_hidden: bool = False
     dummy: bool = False
 ## Leaderboard columns
 auto_eval_column_dict = []
 # Init
 auto_eval_column_dict.append(["model_type_symbol", ColumnContent, ColumnContent("T", "str", True, never_hidden=True)])
 auto_eval_column_dict.append(["model", ColumnContent, ColumnContent("Model", "markdown", True, never_hidden=True)])
-#Scores
 # auto_eval_column_dict.append(["average", ColumnContent, ColumnContent("Average ⬆️", "number", True)])
 for task in Tasks:
     auto_eval_column_dict.append([task.name, ColumnContent, ColumnContent(task.value.col_name, "number", True)])
@@ -47,6 +49,7 @@ auto_eval_column_dict.append(["dummy", ColumnContent, ColumnContent("model_name_
 # We use make dataclass to dynamically fill the scores from Tasks
 AutoEvalColumn = make_dataclass("AutoEvalColumn", auto_eval_column_dict, frozen=True)
 ## For the queue columns in the submission tab
 @dataclass(frozen=True)
 class EvalQueueColumn:  # Queue column
@@ -57,12 +60,13 @@ class EvalQueueColumn:  # Queue column
     weight_type = ColumnContent("weight_type", "str", "Original")
     status = ColumnContent("status", "str", True)
 ## All the model information that we might need
 @dataclass
 class ModelDetails:
     name: str
     display_name: str = ""
-    symbol: str = "" # emoji
 class ModelType(Enum):
@@ -87,11 +91,13 @@ class ModelType(Enum):
             return ModelType.IFT
         return ModelType.Unknown
 class WeightType(Enum):
     Adapter = ModelDetails("Adapter")
     Original = ModelDetails("Original")
     Delta = ModelDetails("Delta")
 class Precision(Enum):
     float16 = ModelDetails("float16")
     bfloat16 = ModelDetails("bfloat16")
@@ -104,23 +110,26 @@ class Precision(Enum):
             return Precision.bfloat16
         return Precision.Unknown
 class AddSpecialTokens(Enum):
     true = ModelDetails("True")
     false = ModelDetails("False")
     Unknown = ModelDetails("?")
 class NumFewShots(Enum):
     shots_0 = ModelDetails("0")
     shots_4 = ModelDetails("4")
     Unknown = ModelDetails("?")
     def from_str(shots):
-        if shots=='0':
             return NumFewShots.shots_0
-        if shots=='4':
             return NumFewShots.shots_4
         return NumFewShots.Unknown
 # Column selection
 COLS = [c.name for c in fields(AutoEvalColumn) if not c.hidden]
 TYPES = [c.type for c in fields(AutoEvalColumn)]

 from src.about import Tasks
 def fields(raw_class):
     return [v for k, v in raw_class.__dict__.items() if k[:2] != "__" and k[-2:] != "__"]
     never_hidden: bool = False
     dummy: bool = False
 ## Leaderboard columns
 auto_eval_column_dict = []
 # Init
 auto_eval_column_dict.append(["model_type_symbol", ColumnContent, ColumnContent("T", "str", True, never_hidden=True)])
 auto_eval_column_dict.append(["model", ColumnContent, ColumnContent("Model", "markdown", True, never_hidden=True)])
+# Scores
 # auto_eval_column_dict.append(["average", ColumnContent, ColumnContent("Average ⬆️", "number", True)])
 for task in Tasks:
     auto_eval_column_dict.append([task.name, ColumnContent, ColumnContent(task.value.col_name, "number", True)])
 # We use make dataclass to dynamically fill the scores from Tasks
 AutoEvalColumn = make_dataclass("AutoEvalColumn", auto_eval_column_dict, frozen=True)
 ## For the queue columns in the submission tab
 @dataclass(frozen=True)
 class EvalQueueColumn:  # Queue column
     weight_type = ColumnContent("weight_type", "str", "Original")
     status = ColumnContent("status", "str", True)
 ## All the model information that we might need
 @dataclass
 class ModelDetails:
     name: str
     display_name: str = ""
+    symbol: str = ""  # emoji
 class ModelType(Enum):
             return ModelType.IFT
         return ModelType.Unknown
 class WeightType(Enum):
     Adapter = ModelDetails("Adapter")
     Original = ModelDetails("Original")
     Delta = ModelDetails("Delta")
 class Precision(Enum):
     float16 = ModelDetails("float16")
     bfloat16 = ModelDetails("bfloat16")
             return Precision.bfloat16
         return Precision.Unknown
 class AddSpecialTokens(Enum):
     true = ModelDetails("True")
     false = ModelDetails("False")
     Unknown = ModelDetails("?")
 class NumFewShots(Enum):
     shots_0 = ModelDetails("0")
     shots_4 = ModelDetails("4")
     Unknown = ModelDetails("?")
     def from_str(shots):
+        if shots == "0":
             return NumFewShots.shots_0
+        if shots == "4":
             return NumFewShots.shots_4
         return NumFewShots.Unknown
 # Column selection
 COLS = [c.name for c in fields(AutoEvalColumn) if not c.hidden]
 TYPES = [c.type for c in fields(AutoEvalColumn)]

src/envs.py CHANGED Viewed

@@ -4,9 +4,9 @@ from huggingface_hub import HfApi
 # Info to change for your repository
 # ----------------------------------
-TOKEN = os.environ.get("HF_TOKEN") # A read/write token for your org
-OWNER = "llm-jp" # Change to your org - don't forget to create a results and request dataset, with the correct format!
 # ----------------------------------
 REPO_ID = f"{OWNER}/open-japanese-llm-leaderboard"
@@ -14,7 +14,7 @@ QUEUE_REPO = f"{OWNER}/requests"
 RESULTS_REPO = f"{OWNER}/results"
 # If you setup a cache later, just change HF_HOME
-CACHE_PATH=os.getenv("HF_HOME", ".")
 # Local caches
 EVAL_REQUESTS_PATH = os.path.join(CACHE_PATH, "eval-queue")

 # Info to change for your repository
 # ----------------------------------
+TOKEN = os.environ.get("HF_TOKEN")  # A read/write token for your org
+OWNER = "llm-jp"  # Change to your org - don't forget to create a results and request dataset, with the correct format!
 # ----------------------------------
 REPO_ID = f"{OWNER}/open-japanese-llm-leaderboard"
 RESULTS_REPO = f"{OWNER}/results"
 # If you setup a cache later, just change HF_HOME
+CACHE_PATH = os.getenv("HF_HOME", ".")
 # Local caches
 EVAL_REQUESTS_PATH = os.path.join(CACHE_PATH, "eval-queue")

src/leaderboard/read_evals.py CHANGED Viewed

@@ -1,37 +1,36 @@
 import glob
 import json
-import math
 import os
 from dataclasses import dataclass
-import dateutil
-import numpy as np
 from decimal import Decimal
 from src.display.formatting import make_clickable_model
-from src.display.utils import AutoEvalColumn, ModelType, Tasks, Precision, WeightType
 from src.submission.check_validity import is_model_on_hub
 @dataclass
 class EvalResult:
-    """Represents one full evaluation. Built from a combination of the result and request file for a given run.
-    """
-    eval_name: str # org_model_precision (uid)
-    full_model: str # org/model (path on hub)
-    org: str
     model: str
-    revision: str # commit hash, "" if main
     results: dict
     # precision: Precision = Precision.Unknown
-    model_type: ModelType = ModelType.Unknown # Pretrained, fine tuned, ...
     precision: str = "Unknown"
     # model_type: str = "Unknown"
-    weight_type: WeightType = WeightType.Original # Original or Adapter
-    architecture: str = "Unknown"
     license: str = "?"
     likes: int = 0
     num_params: int = 0
-    date: str = "" # submission date of request file
     still_on_hub: bool = False
     num_few_shots: str = "0"
     add_special_tokens: str = ""
@@ -47,7 +46,7 @@ class EvalResult:
         model_config = config.get("model", {})
         # Get model type from metainfo
-        # model_type_str = metainfo.get("model_type", "")
         # model_type = ModelType.from_str(model_type_str)
         # model_type = metainfo.get("model_type", "Unknown")
@@ -59,13 +58,15 @@ class EvalResult:
         precision = model_config.get("dtype", "Unknown")
         # Add Special Tokens
-        add_special_tokens = str(config.get("pipeline_kwargs",{"add_special_tokens":"Unknown"}).get("add_special_tokens"))
         # Get model and org
         # org_and_model = config.get("model_name", config.get("offline_inference").get("model_name", None))
         org_and_model = config.get("model_name", config.get("offline_inference", {}).get("model_name", "Unknown"))
         org_and_model = org_and_model.split("/", 1)
         # org_and_modelがリストの場合、"/"で結合
         if isinstance(org_and_model, list):
             full_model = "/".join(org_and_model)
@@ -92,7 +93,7 @@ class EvalResult:
             architectures = getattr(model_config, "architectures", None)
             if architectures:
                 architecture = ";".join(architectures)
         if "scores" not in data:
             raise KeyError(f"'scores' key not found in JSON file: {json_filepath}")
@@ -103,7 +104,6 @@ class EvalResult:
             score = scores.get(task_value.metric)
             results[task_value.metric] = score
         return self(
             eval_name=result_key,
             full_model=full_model,
@@ -121,12 +121,6 @@ class EvalResult:
     def update_with_request_file(self, requests_path):
         """Finds the relevant request file for the current model and updates info with it"""
         request_file = get_request_file_for_model(requests_path, self.full_model, self.precision)
-        if request_file:
-            with open(request_file, "r") as f:
-                request_data = json.load(f)
-        else:
-            print("No request file found.")
         try:
             with open(request_file, "r") as f:
                 request = json.load(f)
@@ -186,17 +180,15 @@ def get_request_file_for_model(requests_path, model_name, precision):
     for tmp_request_file in request_files:
         with open(tmp_request_file, "r") as f:
             req_content = json.load(f)
-            if (
-                req_content["status"] in ["FINISHED"]
-                and req_content["precision"] == precision.split(".")[-1]
-            ):
                 request_file = tmp_request_file
     return request_file
 def get_raw_eval_results(results_path: str, requests_path: str) -> list[EvalResult]:
     """From the path of the results folder root, extract all needed info for results"""
     model_result_filepaths = []
     for root, _, files in os.walk(results_path):
         # We should only have json files in model results
         if len(files) == 0 or any([not f.endswith(".json") for f in files]):
@@ -210,7 +202,6 @@ def get_raw_eval_results(results_path: str, requests_path: str) -> list[EvalResu
         for file in files:
             model_result_filepaths.append(os.path.join(root, file))
     eval_results = {}
     for model_result_filepath in model_result_filepaths:
@@ -225,17 +216,14 @@ def get_raw_eval_results(results_path: str, requests_path: str) -> list[EvalResu
         else:
             eval_results[eval_name] = eval_result
-    data_dict = eval_result.to_dict()
     results = []
     for v in eval_results.values():
         try:
-            v.to_dict() # we test if the dict version is complete
             results.append(v)
         except KeyError:  # not all eval values present
             continue
     # print(f"Processing file: {model_result_filepath}")
     # print(f"Eval result: {eval_result.to_dict()}")
-    return results

 import glob
 import json
 import os
 from dataclasses import dataclass
 from decimal import Decimal
+import dateutil
 from src.display.formatting import make_clickable_model
+from src.display.utils import AutoEvalColumn, ModelType, Tasks, WeightType
 from src.submission.check_validity import is_model_on_hub
 @dataclass
 class EvalResult:
+    """Represents one full evaluation. Built from a combination of the result and request file for a given run."""
+    eval_name: str  # org_model_precision (uid)
+    full_model: str  # org/model (path on hub)
+    org: str
     model: str
+    revision: str  # commit hash, "" if main
     results: dict
     # precision: Precision = Precision.Unknown
+    model_type: ModelType = ModelType.Unknown  # Pretrained, fine tuned, ...
     precision: str = "Unknown"
     # model_type: str = "Unknown"
+    weight_type: WeightType = WeightType.Original  # Original or Adapter
+    architecture: str = "Unknown"
     license: str = "?"
     likes: int = 0
     num_params: int = 0
+    date: str = ""  # submission date of request file
     still_on_hub: bool = False
     num_few_shots: str = "0"
     add_special_tokens: str = ""
         model_config = config.get("model", {})
         # Get model type from metainfo
+        # model_type_str = metainfo.get("model_type", "")
         # model_type = ModelType.from_str(model_type_str)
         # model_type = metainfo.get("model_type", "Unknown")
         precision = model_config.get("dtype", "Unknown")
         # Add Special Tokens
+        add_special_tokens = str(
+            config.get("pipeline_kwargs", {"add_special_tokens": "Unknown"}).get("add_special_tokens")
+        )
         # Get model and org
         # org_and_model = config.get("model_name", config.get("offline_inference").get("model_name", None))
         org_and_model = config.get("model_name", config.get("offline_inference", {}).get("model_name", "Unknown"))
         org_and_model = org_and_model.split("/", 1)
         # org_and_modelがリストの場合、"/"で結合
         if isinstance(org_and_model, list):
             full_model = "/".join(org_and_model)
             architectures = getattr(model_config, "architectures", None)
             if architectures:
                 architecture = ";".join(architectures)
         if "scores" not in data:
             raise KeyError(f"'scores' key not found in JSON file: {json_filepath}")
             score = scores.get(task_value.metric)
             results[task_value.metric] = score
         return self(
             eval_name=result_key,
             full_model=full_model,
     def update_with_request_file(self, requests_path):
         """Finds the relevant request file for the current model and updates info with it"""
         request_file = get_request_file_for_model(requests_path, self.full_model, self.precision)
         try:
             with open(request_file, "r") as f:
                 request = json.load(f)
     for tmp_request_file in request_files:
         with open(tmp_request_file, "r") as f:
             req_content = json.load(f)
+            if req_content["status"] in ["FINISHED"] and req_content["precision"] == precision.split(".")[-1]:
                 request_file = tmp_request_file
     return request_file
 def get_raw_eval_results(results_path: str, requests_path: str) -> list[EvalResult]:
     """From the path of the results folder root, extract all needed info for results"""
     model_result_filepaths = []
     for root, _, files in os.walk(results_path):
         # We should only have json files in model results
         if len(files) == 0 or any([not f.endswith(".json") for f in files]):
         for file in files:
             model_result_filepaths.append(os.path.join(root, file))
     eval_results = {}
     for model_result_filepath in model_result_filepaths:
         else:
             eval_results[eval_name] = eval_result
     results = []
     for v in eval_results.values():
         try:
+            v.to_dict()  # we test if the dict version is complete
             results.append(v)
         except KeyError:  # not all eval values present
             continue
     # print(f"Processing file: {model_result_filepath}")
     # print(f"Eval result: {eval_result.to_dict()}")
+    return results

src/submission/check_validity.py CHANGED Viewed

@@ -1,8 +1,6 @@
 import json
 import os
-import re
 from collections import defaultdict
-from datetime import datetime, timedelta, timezone
 import huggingface_hub
 from huggingface_hub import ModelCard
@@ -10,6 +8,7 @@ from huggingface_hub.hf_api import ModelInfo
 from transformers import AutoConfig
 from transformers.models.auto.tokenization_auto import AutoTokenizer
 def check_model_card(repo_id: str) -> tuple[bool, str]:
     """Checks if the model card and license exist and have been filled"""
     try:
@@ -31,31 +30,38 @@ def check_model_card(repo_id: str) -> tuple[bool, str]:
     return True, ""
-def is_model_on_hub(model_name: str, revision: str, token: str = None, trust_remote_code=False, test_tokenizer=False) -> tuple[bool, str]:
     """Checks if the model model_name is on the hub, and whether it (and its tokenizer) can be loaded with AutoClasses."""
     try:
-        config = AutoConfig.from_pretrained(model_name, revision=revision, trust_remote_code=trust_remote_code, token=token)
         if test_tokenizer:
             try:
-                tk = AutoTokenizer.from_pretrained(model_name, revision=revision, trust_remote_code=trust_remote_code, token=token)
             except ValueError as e:
                 return (
                     False,
-                    f"uses a tokenizer which is not in a transformers release: {e}",
-                    None
                 )
-            except Exception as e:
-                return (False, "'s tokenizer cannot be loaded. Is your tokenizer class in a stable transformers release, and correctly configured?", None)
         return True, None, config
     except ValueError:
         return (
             False,
             "needs to be launched with `trust_remote_code=True`. For safety reason, we do not allow these models to be automatically submitted to the leaderboard.",
-            None
         )
-    except Exception as e:
         return False, "was not found on hub!", None
@@ -70,10 +76,12 @@ def get_model_size(model_info: ModelInfo, precision: str):
     model_size = size_factor * model_size
     return model_size
 def get_model_arch(model_info: ModelInfo):
     """Gets the model architecture from the configuration"""
     return model_info.config.get("architectures", "Unknown")
 def already_submitted_models(requested_models_dir: str) -> set[str]:
     """Gather a list of already submitted models to avoid duplicates"""
     depth = 1
@@ -88,7 +96,7 @@ def already_submitted_models(requested_models_dir: str) -> set[str]:
                     continue
                 with open(os.path.join(root, file), "r") as f:
                     info = json.load(f)
-                    if info['status'] == 'FAILED':
                         continue
                     file_names.append(f"{info['model']}_{info['precision']}_{info['add_special_tokens']}")

 import json
 import os
 from collections import defaultdict
 import huggingface_hub
 from huggingface_hub import ModelCard
 from transformers import AutoConfig
 from transformers.models.auto.tokenization_auto import AutoTokenizer
 def check_model_card(repo_id: str) -> tuple[bool, str]:
     """Checks if the model card and license exist and have been filled"""
     try:
     return True, ""
+def is_model_on_hub(
+    model_name: str, revision: str, token: str = None, trust_remote_code=False, test_tokenizer=False
+) -> tuple[bool, str]:
     """Checks if the model model_name is on the hub, and whether it (and its tokenizer) can be loaded with AutoClasses."""
     try:
+        config = AutoConfig.from_pretrained(
+            model_name, revision=revision, trust_remote_code=trust_remote_code, token=token
+        )
         if test_tokenizer:
             try:
+                AutoTokenizer.from_pretrained(
+                    model_name, revision=revision, trust_remote_code=trust_remote_code, token=token
+                )
             except ValueError as e:
+                return (False, f"uses a tokenizer which is not in a transformers release: {e}", None)
+            except Exception:
                 return (
                     False,
+                    "'s tokenizer cannot be loaded. Is your tokenizer class in a stable transformers release, and correctly configured?",
+                    None,
                 )
         return True, None, config
     except ValueError:
         return (
             False,
             "needs to be launched with `trust_remote_code=True`. For safety reason, we do not allow these models to be automatically submitted to the leaderboard.",
+            None,
         )
+    except Exception:
         return False, "was not found on hub!", None
     model_size = size_factor * model_size
     return model_size
 def get_model_arch(model_info: ModelInfo):
     """Gets the model architecture from the configuration"""
     return model_info.config.get("architectures", "Unknown")
 def already_submitted_models(requested_models_dir: str) -> set[str]:
     """Gather a list of already submitted models to avoid duplicates"""
     depth = 1
                     continue
                 with open(os.path.join(root, file), "r") as f:
                     info = json.load(f)
+                    if info["status"] == "FAILED":
                         continue
                     file_names.append(f"{info['model']}_{info['precision']}_{info['add_special_tokens']}")

src/submission/submit.py CHANGED Viewed

@@ -3,17 +3,13 @@ import os
 from datetime import datetime, timezone
 from src.display.formatting import styled_error, styled_message, styled_warning
-from src.envs import API, EVAL_REQUESTS_PATH, TOKEN, QUEUE_REPO
-from src.submission.check_validity import (
-    already_submitted_models,
-    check_model_card,
-    get_model_size,
-    is_model_on_hub,
-)
 REQUESTED_MODELS = None
 USERS_TO_SUBMISSION_DATES = None
 def add_new_eval(
     model: str,
     revision: str,

 from datetime import datetime, timezone
 from src.display.formatting import styled_error, styled_message, styled_warning
+from src.envs import API, EVAL_REQUESTS_PATH, QUEUE_REPO, TOKEN
+from src.submission.check_validity import already_submitted_models, check_model_card, get_model_size, is_model_on_hub
 REQUESTED_MODELS = None
 USERS_TO_SUBMISSION_DATES = None
 def add_new_eval(
     model: str,
     revision: str,