pminervini commited on
Commit
1793c69
1 Parent(s): bc4faee
Files changed (1) hide show
  1. src/submission/check_validity.py +15 -28
src/submission/check_validity.py CHANGED
@@ -7,11 +7,14 @@ from datetime import datetime, timedelta, timezone
7
  import huggingface_hub
8
  from huggingface_hub import ModelCard
9
  from huggingface_hub.hf_api import ModelInfo
10
- from transformers import AutoConfig
 
11
  from transformers.models.auto.tokenization_auto import tokenizer_class_from_name, get_tokenizer_config
12
 
13
  from src.envs import HAS_HIGHER_RATE_LIMIT
14
 
 
 
15
 
16
  # ht to @Wauplin, thank you for the snippet!
17
  # See https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/317
@@ -37,39 +40,23 @@ def check_model_card(repo_id: str) -> tuple[bool, str]:
37
  return True, ""
38
 
39
 
40
- def is_model_on_hub(model_name: str, revision: str, token: str = None, trust_remote_code=False, test_tokenizer=False) -> tuple[bool, str]:
41
  try:
42
- config = AutoConfig.from_pretrained(model_name, revision=revision, trust_remote_code=trust_remote_code, token=token)
43
  if test_tokenizer:
44
- tokenizer_config = get_tokenizer_config(model_name)
45
-
46
- if tokenizer_config is not None:
47
- tokenizer_class_candidate = tokenizer_config.get("tokenizer_class", None)
48
- else:
49
- tokenizer_class_candidate = config.tokenizer_class
50
-
51
- tokenizer_class = None
52
- if tokenizer_class_candidate is not None:
53
- tokenizer_class = tokenizer_class_from_name(tokenizer_class_candidate)
54
-
55
- if tokenizer_class is None:
56
- return (
57
- False,
58
- f"uses {tokenizer_class_candidate}, which is not in a transformers release, therefore not supported at the moment.",
59
- None
60
- )
61
  return True, None, config
62
 
63
- except ValueError:
64
- return (
65
- False,
66
- "needs to be launched with `trust_remote_code=True`. For safety reason, we do not allow these models to be automatically submitted to the leaderboard.",
67
- None
68
- )
69
 
70
  except Exception as e:
71
- print('XXX', e)
72
- return False, "was not found on hub!", None
73
 
74
 
75
  def get_model_size(model_info: ModelInfo, precision: str):
 
7
  import huggingface_hub
8
  from huggingface_hub import ModelCard
9
  from huggingface_hub.hf_api import ModelInfo
10
+
11
+ from transformers import AutoConfig, AutoTokenizer
12
  from transformers.models.auto.tokenization_auto import tokenizer_class_from_name, get_tokenizer_config
13
 
14
  from src.envs import HAS_HIGHER_RATE_LIMIT
15
 
16
+ from typing import Optional
17
+
18
 
19
  # ht to @Wauplin, thank you for the snippet!
20
  # See https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/317
 
40
  return True, ""
41
 
42
 
43
+ def is_model_on_hub(model_name: str, revision: str, token: str = None, trust_remote_code=False, test_tokenizer=False) -> tuple[bool, Optional[str], Optional[AutoConfig]]:
44
  try:
45
+ config = AutoConfig.from_pretrained(model_name, revision=revision, trust_remote_code=trust_remote_code, token=token) #, force_download=True)
46
  if test_tokenizer:
47
+ try:
48
+ AutoTokenizer.from_pretrained(model_name, revision=revision, trust_remote_code=trust_remote_code, token=token)
49
+ except ValueError as e:
50
+ return False, f"uses a tokenizer which is not in a transformers release: {e}", None
51
+ except Exception as e:
52
+ return False, "'s tokenizer cannot be loaded. Is your tokenizer class in a stable transformers release, and correctly configured?", None
 
 
 
 
 
 
 
 
 
 
 
53
  return True, None, config
54
 
55
+ except ValueError as e:
56
+ return False, "needs to be launched with `trust_remote_code=True`. For safety reason, we do not allow these models to be automatically submitted to the leaderboard.", None
 
 
 
 
57
 
58
  except Exception as e:
59
+ return False, f"was not found on hub -- {str(e)}", None
 
60
 
61
 
62
  def get_model_size(model_info: ModelInfo, precision: str):