|
import json |
|
import os |
|
import ast |
|
from datetime import datetime, timezone |
|
|
|
from src.display.formatting import styled_error, styled_message, styled_warning |
|
from src.envs import API, EVAL_REQUESTS_PATH, TOKEN, QUEUE_REPO |
|
from src.submission.check_validity import ( |
|
already_submitted_models, |
|
check_model_card, |
|
get_model_size, |
|
is_model_on_hub, |
|
) |
|
from src.display.utils import PromptTemplateName |
|
|
|
REQUESTED_MODELS = None |
|
USERS_TO_SUBMISSION_DATES = None |
|
|
|
PLACEHOLDER_DATASET_WISE_NORMALIZATION_CONFIG = """{ |
|
"NCBI" : { |
|
"" : "condition" |
|
}, |
|
"CHIA" : { |
|
"" : "condition" |
|
"" : "drug" |
|
"" : "procedure" |
|
"" : "measurement" |
|
}, |
|
"BIORED" : { |
|
"" : "condition" |
|
"" : "drug" |
|
"" : "gene" |
|
"" : "gene variant" |
|
}, |
|
"BC5CDR" : { |
|
"" : "condition" |
|
"" : "drug" |
|
} |
|
} |
|
|
|
""" |
|
|
|
def add_new_eval( |
|
model: str, |
|
|
|
revision: str, |
|
|
|
|
|
model_arch: str, |
|
label_normalization_map: str, |
|
gliner_threshold:str, |
|
gliner_tokenizer_bool:str, |
|
prompt_template_name:str, |
|
model_type: str, |
|
): |
|
""" |
|
Saves request if valid else returns the error. |
|
Validity is checked based on - |
|
- model's existence on hub |
|
- necessary info on the model's card |
|
- label normalization is a valid python dict and contains the keys for all datasets |
|
- threshold for gliner is a valid float |
|
|
|
""" |
|
global REQUESTED_MODELS |
|
global USERS_TO_SUBMISSION_DATES |
|
if not REQUESTED_MODELS: |
|
REQUESTED_MODELS, USERS_TO_SUBMISSION_DATES = already_submitted_models(EVAL_REQUESTS_PATH) |
|
|
|
user_name = "" |
|
model_path = model |
|
if "/" in model: |
|
user_name = model.split("/")[0] |
|
model_path = model.split("/")[1] |
|
|
|
|
|
current_time = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ") |
|
|
|
if model_type is None or model_type == "": |
|
return styled_error("Please select a model type.") |
|
|
|
model_type = model_type.split(":")[-1].strip() |
|
|
|
|
|
if revision == "": |
|
revision = "main" |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if not model_arch == "GLiNER Encoder": |
|
model_on_hub, error, _ = is_model_on_hub(model_name=model, revision=revision, token=TOKEN, test_tokenizer=True) |
|
if not model_on_hub: |
|
return styled_error(f'Model "{model}" {error}') |
|
else: |
|
if len(list(API.list_models(model_name=model))) !=1: |
|
return styled_error(f'Model "{model}" does not exist on the hub!') |
|
|
|
|
|
|
|
try: |
|
model_info = API.model_info(repo_id=model, revision=revision) |
|
except Exception: |
|
return styled_error("Could not get your model information. Please fill it up properly.") |
|
|
|
model_size = get_model_size(model_info=model_info) |
|
|
|
|
|
try: |
|
license = model_info.cardData["license"] |
|
except Exception: |
|
return styled_error("Please select a license for your model") |
|
|
|
modelcard_OK, error_msg = check_model_card(model) |
|
if not modelcard_OK: |
|
return styled_error(error_msg) |
|
|
|
|
|
try: |
|
label_normalization_map = ast.literal_eval(label_normalization_map) |
|
except Exception as e: |
|
return styled_error("Please enter a valid json for the labe; normalization map") |
|
|
|
inference_config = { |
|
|
|
"label_normalization_map": label_normalization_map, |
|
} |
|
|
|
match model_arch: |
|
case "Encoder": |
|
pass |
|
case "Decoder": |
|
if not prompt_template_name in [prompt_template.value for prompt_template in PromptTemplateName]: |
|
return styled_error("Prompt template name is invalid") |
|
inference_config = { |
|
**inference_config, |
|
"prompt_template_name": prompt_template_name, |
|
} |
|
case "GLiNER Encoder": |
|
try: |
|
gliner_threshold = float(gliner_threshold) |
|
gliner_tokenizer_bool = ast.literal_eval(gliner_tokenizer_bool) |
|
inference_config = { |
|
**inference_config, |
|
"gliner_threshold": gliner_threshold, |
|
"gliner_tokenizer_bool" : gliner_tokenizer_bool |
|
} |
|
except Exception as e: |
|
return styled_error("Please enter a valid float for the threshold") |
|
case _: |
|
return styled_error("Model Architecture is invalid") |
|
|
|
|
|
print("Adding new eval") |
|
|
|
|
|
eval_entry = { |
|
"model": model, |
|
|
|
"revision": revision, |
|
|
|
|
|
"model_architecture": model_arch, |
|
"status": "PENDING", |
|
"submitted_time": current_time, |
|
"model_type": model_type, |
|
"likes": model_info.likes, |
|
"params": model_size, |
|
"license": license, |
|
"private": False, |
|
"inference_config":inference_config, |
|
} |
|
|
|
|
|
|
|
if f"{model}_{revision}" in REQUESTED_MODELS: |
|
return styled_warning("This model has been already submitted. Add the revision if the model has been updated.") |
|
|
|
print("Creating eval file") |
|
OUT_DIR = f"{EVAL_REQUESTS_PATH}/{user_name}" |
|
os.makedirs(OUT_DIR, exist_ok=True) |
|
out_path = f"{OUT_DIR}/{model_path}_{revision}_eval_request.json" |
|
|
|
with open(out_path, "w") as f: |
|
f.write(json.dumps(eval_entry)) |
|
|
|
print("Uploading eval file") |
|
API.upload_file( |
|
path_or_fileobj=out_path, |
|
path_in_repo=out_path.split("eval-queue/")[1], |
|
repo_id=QUEUE_REPO, |
|
repo_type="dataset", |
|
commit_message=f"Add {model} to eval queue", |
|
) |
|
|
|
|
|
os.remove(out_path) |
|
|
|
return styled_message( |
|
"Your request has been submitted to the evaluation queue!\nPlease wait for up to an hour for the model to show in the PENDING list." |
|
) |
|
|