pminervini commited on
Commit
d489aeb
1 Parent(s): 83d660d
app.py CHANGED
File without changes
backend-cli.py CHANGED
@@ -1,3 +1,5 @@
 
 
1
  import os
2
  import json
3
 
 
1
+ #!/usr/bin/env python
2
+
3
  import os
4
  import json
5
 
src/backend/manage_requests.py CHANGED
@@ -82,7 +82,8 @@ def get_eval_requests(job_status: list, local_dir: str, hf_repo: str) -> list[Ev
82
  # breakpoint()
83
  data["json_filepath"] = json_filepath
84
 
85
- del data['job_id']
 
86
 
87
  eval_request = EvalRequest(**data)
88
  eval_requests.append(eval_request)
 
82
  # breakpoint()
83
  data["json_filepath"] = json_filepath
84
 
85
+ if 'job_id' in data:
86
+ del data['job_id']
87
 
88
  eval_request = EvalRequest(**data)
89
  eval_requests.append(eval_request)
src/submission/check_validity.py CHANGED
@@ -41,14 +41,17 @@ def is_model_on_hub(model_name: str, revision: str, token: str = None, trust_rem
41
  try:
42
  config = AutoConfig.from_pretrained(model_name, revision=revision, trust_remote_code=trust_remote_code, token=token)
43
  if test_tokenizer:
44
- tokenizer_config = get_tokenizer_config(model_name)
 
45
  if tokenizer_config is not None:
46
  tokenizer_class_candidate = tokenizer_config.get("tokenizer_class", None)
47
  else:
48
- tokenizer_class_candidate = config.tokenizer_class
49
 
 
 
 
50
 
51
- tokenizer_class = tokenizer_class_from_name(tokenizer_class_candidate)
52
  if tokenizer_class is None:
53
  return (
54
  False,
@@ -65,6 +68,7 @@ def is_model_on_hub(model_name: str, revision: str, token: str = None, trust_rem
65
  )
66
 
67
  except Exception as e:
 
68
  return False, "was not found on hub!", None
69
 
70
 
 
41
  try:
42
  config = AutoConfig.from_pretrained(model_name, revision=revision, trust_remote_code=trust_remote_code, token=token)
43
  if test_tokenizer:
44
+ tokenizer_config = get_tokenizer_config(model_name)
45
+
46
  if tokenizer_config is not None:
47
  tokenizer_class_candidate = tokenizer_config.get("tokenizer_class", None)
48
  else:
49
+ tokenizer_class_candidate = config.tokenizer_class
50
 
51
+ tokenizer_class = None
52
+ if tokenizer_class_candidate is not None:
53
+ tokenizer_class = tokenizer_class_from_name(tokenizer_class_candidate)
54
 
 
55
  if tokenizer_class is None:
56
  return (
57
  False,
 
68
  )
69
 
70
  except Exception as e:
71
+ print('XXX', e)
72
  return False, "was not found on hub!", None
73
 
74
 
submit-cli.py ADDED
@@ -0,0 +1,152 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python
2
+
3
+ import json
4
+ import os
5
+
6
+ from datetime import datetime, timezone
7
+
8
+ from src.envs import API, EVAL_REQUESTS_PATH, H4_TOKEN, QUEUE_REPO
9
+ from src.submission.check_validity import already_submitted_models, check_model_card, get_model_size, is_model_on_hub
10
+
11
+
12
+ def add_new_eval(model: str, base_model: str, revision: str, precision: str, private: bool, weight_type: str, model_type: str):
13
+ REQUESTED_MODELS, USERS_TO_SUBMISSION_DATES = already_submitted_models(EVAL_REQUESTS_PATH)
14
+
15
+ user_name = ""
16
+ model_path = model
17
+ if "/" in model:
18
+ tokens = model.split("/")
19
+ user_name = tokens[0]
20
+ model_path = tokens[1]
21
+
22
+ precision = precision.split(" ")[0]
23
+ current_time = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
24
+
25
+ if model_type is None or model_type == "":
26
+ return print("Please select a model type.")
27
+
28
+ # Does the model actually exist?
29
+ if revision == "":
30
+ revision = "main"
31
+
32
+ # Is the model on the hub?
33
+ if weight_type in ["Delta", "Adapter"]:
34
+ base_model_on_hub, error, _ = is_model_on_hub(model_name=base_model, revision=revision, token=H4_TOKEN, test_tokenizer=True)
35
+ if not base_model_on_hub:
36
+ print(f'Base model "{base_model}" {error}')
37
+ return
38
+
39
+ if not weight_type == "Adapter":
40
+ model_on_hub, error, _ = is_model_on_hub(model_name=model, revision=revision, test_tokenizer=True)
41
+ if not model_on_hub:
42
+ print(f'Model "{model}" {error}')
43
+ return
44
+
45
+ # Is the model info correctly filled?
46
+ try:
47
+ model_info = API.model_info(repo_id=model, revision=revision)
48
+ except Exception:
49
+ print("Could not get your model information. Please fill it up properly.")
50
+ return
51
+
52
+ model_size = get_model_size(model_info=model_info, precision=precision)
53
+
54
+ license = 'none'
55
+ try:
56
+ license = model_info.cardData["license"]
57
+ except Exception:
58
+ print("Please select a license for your model")
59
+ # return
60
+
61
+ # modelcard_OK, error_msg = check_model_card(model)
62
+ # if not modelcard_OK:
63
+ # print(error_msg)
64
+ # return
65
+
66
+ # Seems good, creating the eval
67
+ print("Adding new eval")
68
+
69
+ eval_entry = {
70
+ "model": model,
71
+ "base_model": base_model,
72
+ "revision": revision,
73
+ "private": private,
74
+ "precision": precision,
75
+ "weight_type": weight_type,
76
+ "status": "PENDING",
77
+ "submitted_time": current_time,
78
+ "model_type": model_type,
79
+ "likes": model_info.likes,
80
+ "params": model_size,
81
+ "license": license,
82
+ }
83
+
84
+ # Check for duplicate submission
85
+ if f"{model}_{revision}_{precision}" in REQUESTED_MODELS:
86
+ print("This model has been already submitted.")
87
+ return
88
+
89
+ print("Creating eval file")
90
+ OUT_DIR = f"{EVAL_REQUESTS_PATH}/{user_name}"
91
+ os.makedirs(OUT_DIR, exist_ok=True)
92
+ out_path = f"{OUT_DIR}/{model_path}_eval_request_{private}_{precision}_{weight_type}.json"
93
+
94
+ with open(out_path, "w") as f:
95
+ f.write(json.dumps(eval_entry))
96
+
97
+ print("Uploading eval file")
98
+ API.upload_file(path_or_fileobj=out_path, path_in_repo=out_path.split("eval-queue/")[1],
99
+ repo_id=QUEUE_REPO, repo_type="dataset", commit_message=f"Add {model} to eval queue")
100
+
101
+ # Remove the local file
102
+ os.remove(out_path)
103
+
104
+ print("Your request has been submitted to the evaluation queue!\nPlease wait for up to an hour for the model to show in the PENDING list.")
105
+ return
106
+
107
+
108
+ def main():
109
+ from huggingface_hub import HfApi
110
+
111
+ api = HfApi()
112
+ model_lst = api.list_models()
113
+
114
+ model_lst = [m for m in model_lst]
115
+
116
+ def custom_filter(m) -> bool:
117
+ return m.pipeline_tag in {'text-generation'} and 'en' in m.tags and m.private is False
118
+
119
+ filtered_model_lst = sorted([m for m in model_lst if custom_filter(m)], key=lambda m: m.downloads, reverse=True)
120
+
121
+ for i in range(min(50, len(filtered_model_lst))):
122
+ model = filtered_model_lst[i]
123
+
124
+ print(f'Considering {model.id} ..')
125
+
126
+ from huggingface_hub import snapshot_download
127
+ from src.backend.envs import EVAL_REQUESTS_PATH_BACKEND
128
+ from src.backend.manage_requests import get_eval_requests
129
+ from src.backend.manage_requests import EvalRequest
130
+
131
+ snapshot_download(repo_id=QUEUE_REPO, revision="main", local_dir=EVAL_REQUESTS_PATH_BACKEND, repo_type="dataset", max_workers=60)
132
+
133
+ PENDING_STATUS = "PENDING"
134
+ RUNNING_STATUS = "RUNNING"
135
+ FINISHED_STATUS = "FINISHED"
136
+ FAILED_STATUS = "FAILED"
137
+
138
+ status = [PENDING_STATUS, RUNNING_STATUS, FINISHED_STATUS, FAILED_STATUS]
139
+
140
+ # Get all eval request that are FINISHED, if you want to run other evals, change this parameter
141
+ eval_requests: list[EvalRequest] = get_eval_requests(job_status=status, hf_repo=QUEUE_REPO, local_dir=EVAL_REQUESTS_PATH_BACKEND)
142
+
143
+ requested_model_names = {e.model for e in eval_requests}
144
+
145
+ if model.id not in requested_model_names:
146
+ add_new_eval(model=model.id, base_model='', revision='main', precision='float32', private=False, weight_type='Original', model_type='pretrained')
147
+ else:
148
+ print(f'Model {model.id} already added, not adding it to the queue again.')
149
+
150
+
151
+ if __name__ == "__main__":
152
+ main()