open-ko-llm-leaderboard / scripts /update_request_files.py
Sean Cho
Big update
097981b
raw history blame
No virus
2.64 kB
import json
import os
import glob
import pprint
import re
from datetime import datetime, timezone
import click
from colorama import Fore
from huggingface_hub import HfApi, snapshot_download
from huggingface_hub.hf_api import ModelInfo
API = HfApi()
def get_model_size(model_info: ModelInfo, precision: str):
size_pattern = re.compile(r"(\d+\.)?\d+(b|m)")
try:
model_size = round(model_info.safetensors["total"] / 1e9, 3)
except (AttributeError, TypeError ):
try:
size_match = re.search(size_pattern, model_info.modelId.split("/")[-1].lower())
model_size = size_match.group(0)
model_size = round(float(model_size[:-1]) if model_size[-1] == "b" else float(model_size[:-1]) / 1e3, 3)
except AttributeError:
return 0 # Unknown model sizes are indicated as 0, see NUMERIC_INTERVALS in app.py
size_factor = 8 if (precision == "GPTQ" or "gptq" in model_info.modelId.split("/")[-1].lower()) else 1
model_size = size_factor * model_size
return model_size
def update_request_files(requests_path):
request_files = os.path.join(
requests_path, "*/*.json"
)
request_files = glob.glob(request_files)
request_files = sorted(request_files, reverse=True)
for tmp_request_file in request_files:
with open(tmp_request_file, "r") as f:
req_content = json.load(f)
new_req_content = add_model_info(req_content)
# if new content is different, update the file
if new_req_content != req_content:
with open(tmp_request_file, "w") as f:
f.write(json.dumps(new_req_content, indent=4))
def add_model_info(entry):
model = entry["model"]
revision = entry["revision"]
try:
model_info = API.model_info(repo_id=model, revision=revision)
except Exception:
print(f"Could not get model information for {model} revision {revision}")
return entry
new_entry = entry.copy()
model_size = get_model_size(model_info=model_info, precision='float16')
new_entry["params"] = model_size
new_entry["likes"] = model_info.likes
# Were the model card and license filled?
try:
license = model_info.cardData["license"]
new_entry["license"] = license
except Exception:
print(f"No license for {model} revision {revision}")
print(json.dumps(new_entry, indent=4))
return new_entry
if __name__ == "__main__":
# update_request_files("/Users/sean/workspace/leaderboard/leaderboard-test-requests")
update_request_files("/Volumes/Data-case-sensitive/requests")