File size: 2,637 Bytes
097981b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
import json
import os
import glob
import pprint
import re
from datetime import datetime, timezone

import click
from colorama import Fore
from huggingface_hub import HfApi, snapshot_download
from huggingface_hub.hf_api import ModelInfo

API = HfApi()


def get_model_size(model_info: ModelInfo, precision: str):
    size_pattern = re.compile(r"(\d+\.)?\d+(b|m)")
    try:
        model_size = round(model_info.safetensors["total"] / 1e9, 3)
    except (AttributeError, TypeError ):
        try:
            size_match = re.search(size_pattern, model_info.modelId.split("/")[-1].lower())
            model_size = size_match.group(0)
            model_size = round(float(model_size[:-1]) if model_size[-1] == "b" else float(model_size[:-1]) / 1e3, 3)
        except AttributeError:
            return 0  # Unknown model sizes are indicated as 0, see NUMERIC_INTERVALS in app.py

    size_factor = 8 if (precision == "GPTQ" or "gptq" in model_info.modelId.split("/")[-1].lower()) else 1
    model_size = size_factor * model_size
    return model_size


def update_request_files(requests_path):
    request_files = os.path.join(
        requests_path, "*/*.json"
    )
    request_files = glob.glob(request_files)

    request_files = sorted(request_files, reverse=True)
    for tmp_request_file in request_files:
        with open(tmp_request_file, "r") as f:
            req_content = json.load(f)
            new_req_content = add_model_info(req_content)
        
        # if new content is different, update the file
        if new_req_content != req_content:
            with open(tmp_request_file, "w") as f:
                f.write(json.dumps(new_req_content, indent=4))

def add_model_info(entry):

    model = entry["model"]
    revision = entry["revision"]

    try:
        model_info = API.model_info(repo_id=model, revision=revision)
    except Exception:
        print(f"Could not get model information for {model} revision {revision}")
        return entry

    new_entry = entry.copy()

    model_size = get_model_size(model_info=model_info, precision='float16')
    new_entry["params"] = model_size

    new_entry["likes"] = model_info.likes

    # Were the model card and license filled?
    try:
        license = model_info.cardData["license"]
        new_entry["license"] = license
    except Exception:
        print(f"No license for {model} revision {revision}")

    print(json.dumps(new_entry, indent=4))
    return new_entry


if __name__ == "__main__":
    # update_request_files("/Users/sean/workspace/leaderboard/leaderboard-test-requests")
    update_request_files("/Volumes/Data-case-sensitive/requests")