open-ko-llm-leaderboard / scripts /update_request_files.py
Sean Cho
Big update
097981b
import json
import os
import glob
import pprint
import re
from datetime import datetime, timezone
import click
from colorama import Fore
from huggingface_hub import HfApi, snapshot_download
from huggingface_hub.hf_api import ModelInfo
API = HfApi()
def get_model_size(model_info: ModelInfo, precision: str):
size_pattern = re.compile(r"(\d+\.)?\d+(b|m)")
try:
model_size = round(model_info.safetensors["total"] / 1e9, 3)
except (AttributeError, TypeError ):
try:
size_match = re.search(size_pattern, model_info.modelId.split("/")[-1].lower())
model_size = size_match.group(0)
model_size = round(float(model_size[:-1]) if model_size[-1] == "b" else float(model_size[:-1]) / 1e3, 3)
except AttributeError:
return 0 # Unknown model sizes are indicated as 0, see NUMERIC_INTERVALS in app.py
size_factor = 8 if (precision == "GPTQ" or "gptq" in model_info.modelId.split("/")[-1].lower()) else 1
model_size = size_factor * model_size
return model_size
def update_request_files(requests_path):
request_files = os.path.join(
requests_path, "*/*.json"
)
request_files = glob.glob(request_files)
request_files = sorted(request_files, reverse=True)
for tmp_request_file in request_files:
with open(tmp_request_file, "r") as f:
req_content = json.load(f)
new_req_content = add_model_info(req_content)
# if new content is different, update the file
if new_req_content != req_content:
with open(tmp_request_file, "w") as f:
f.write(json.dumps(new_req_content, indent=4))
def add_model_info(entry):
model = entry["model"]
revision = entry["revision"]
try:
model_info = API.model_info(repo_id=model, revision=revision)
except Exception:
print(f"Could not get model information for {model} revision {revision}")
return entry
new_entry = entry.copy()
model_size = get_model_size(model_info=model_info, precision='float16')
new_entry["params"] = model_size
new_entry["likes"] = model_info.likes
# Were the model card and license filled?
try:
license = model_info.cardData["license"]
new_entry["license"] = license
except Exception:
print(f"No license for {model} revision {revision}")
print(json.dumps(new_entry, indent=4))
return new_entry
if __name__ == "__main__":
# update_request_files("/Users/sean/workspace/leaderboard/leaderboard-test-requests")
update_request_files("/Volumes/Data-case-sensitive/requests")