Spaces:
Runtime error
Runtime error
File size: 5,318 Bytes
d489aeb |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 |
#!/usr/bin/env python
import json
import os
from datetime import datetime, timezone
from src.envs import API, EVAL_REQUESTS_PATH, H4_TOKEN, QUEUE_REPO
from src.submission.check_validity import already_submitted_models, check_model_card, get_model_size, is_model_on_hub
def add_new_eval(model: str, base_model: str, revision: str, precision: str, private: bool, weight_type: str, model_type: str):
REQUESTED_MODELS, USERS_TO_SUBMISSION_DATES = already_submitted_models(EVAL_REQUESTS_PATH)
user_name = ""
model_path = model
if "/" in model:
tokens = model.split("/")
user_name = tokens[0]
model_path = tokens[1]
precision = precision.split(" ")[0]
current_time = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
if model_type is None or model_type == "":
return print("Please select a model type.")
# Does the model actually exist?
if revision == "":
revision = "main"
# Is the model on the hub?
if weight_type in ["Delta", "Adapter"]:
base_model_on_hub, error, _ = is_model_on_hub(model_name=base_model, revision=revision, token=H4_TOKEN, test_tokenizer=True)
if not base_model_on_hub:
print(f'Base model "{base_model}" {error}')
return
if not weight_type == "Adapter":
model_on_hub, error, _ = is_model_on_hub(model_name=model, revision=revision, test_tokenizer=True)
if not model_on_hub:
print(f'Model "{model}" {error}')
return
# Is the model info correctly filled?
try:
model_info = API.model_info(repo_id=model, revision=revision)
except Exception:
print("Could not get your model information. Please fill it up properly.")
return
model_size = get_model_size(model_info=model_info, precision=precision)
license = 'none'
try:
license = model_info.cardData["license"]
except Exception:
print("Please select a license for your model")
# return
# modelcard_OK, error_msg = check_model_card(model)
# if not modelcard_OK:
# print(error_msg)
# return
# Seems good, creating the eval
print("Adding new eval")
eval_entry = {
"model": model,
"base_model": base_model,
"revision": revision,
"private": private,
"precision": precision,
"weight_type": weight_type,
"status": "PENDING",
"submitted_time": current_time,
"model_type": model_type,
"likes": model_info.likes,
"params": model_size,
"license": license,
}
# Check for duplicate submission
if f"{model}_{revision}_{precision}" in REQUESTED_MODELS:
print("This model has been already submitted.")
return
print("Creating eval file")
OUT_DIR = f"{EVAL_REQUESTS_PATH}/{user_name}"
os.makedirs(OUT_DIR, exist_ok=True)
out_path = f"{OUT_DIR}/{model_path}_eval_request_{private}_{precision}_{weight_type}.json"
with open(out_path, "w") as f:
f.write(json.dumps(eval_entry))
print("Uploading eval file")
API.upload_file(path_or_fileobj=out_path, path_in_repo=out_path.split("eval-queue/")[1],
repo_id=QUEUE_REPO, repo_type="dataset", commit_message=f"Add {model} to eval queue")
# Remove the local file
os.remove(out_path)
print("Your request has been submitted to the evaluation queue!\nPlease wait for up to an hour for the model to show in the PENDING list.")
return
def main():
from huggingface_hub import HfApi
api = HfApi()
model_lst = api.list_models()
model_lst = [m for m in model_lst]
def custom_filter(m) -> bool:
return m.pipeline_tag in {'text-generation'} and 'en' in m.tags and m.private is False
filtered_model_lst = sorted([m for m in model_lst if custom_filter(m)], key=lambda m: m.downloads, reverse=True)
for i in range(min(50, len(filtered_model_lst))):
model = filtered_model_lst[i]
print(f'Considering {model.id} ..')
from huggingface_hub import snapshot_download
from src.backend.envs import EVAL_REQUESTS_PATH_BACKEND
from src.backend.manage_requests import get_eval_requests
from src.backend.manage_requests import EvalRequest
snapshot_download(repo_id=QUEUE_REPO, revision="main", local_dir=EVAL_REQUESTS_PATH_BACKEND, repo_type="dataset", max_workers=60)
PENDING_STATUS = "PENDING"
RUNNING_STATUS = "RUNNING"
FINISHED_STATUS = "FINISHED"
FAILED_STATUS = "FAILED"
status = [PENDING_STATUS, RUNNING_STATUS, FINISHED_STATUS, FAILED_STATUS]
# Get all eval request that are FINISHED, if you want to run other evals, change this parameter
eval_requests: list[EvalRequest] = get_eval_requests(job_status=status, hf_repo=QUEUE_REPO, local_dir=EVAL_REQUESTS_PATH_BACKEND)
requested_model_names = {e.model for e in eval_requests}
if model.id not in requested_model_names:
add_new_eval(model=model.id, base_model='', revision='main', precision='float32', private=False, weight_type='Original', model_type='pretrained')
else:
print(f'Model {model.id} already added, not adding it to the queue again.')
if __name__ == "__main__":
main()
|