Spaces:

Intel
/

low_bit_open_llm_leaderboard

Running

App Files Files Community

lvkaokao commited on May 8, 2024

Commit

cf7af95

1 Parent(s): a16a56e

update model size.

Browse files

Files changed (2) hide show

src/submission/check_validity.py +53 -0
src/submission/submit.py +4 -17

src/submission/check_validity.py CHANGED Viewed

@@ -90,6 +90,59 @@ def get_model_size(model_info: ModelInfo, precision: str):
     # model_size = size_factor * model_size
     return model_size
 def get_model_arch(model_info: ModelInfo):
     return model_info.config.get("architectures", "Unknown")

     # model_size = size_factor * model_size
     return model_size
+KNOWN_SIZE_FACTOR = {
+    "gptq": {"4bit": 8, "8bit": 4},
+    "awq": {"4bit": 8},
+    "bitsandbytes": {"4bit": 2}
+}
+BYTES = {
+    "I32": 4,
+    "F16": 2,
+    "BF16": 2,
+    "F32": 4,
+    "U8": 1}
+def get_quantized_model_parameters_memory(model_info: ModelInfo, quant_method="", bits="4bit"):
+    try:
+        safetensors = get_safetensors_metadata(model_info.id)
+        num_parameters = 0
+        mem = 0
+        for key in safetensors.parameter_count:
+            mem += safetensors.parameter_count[key] * BYTES[key]
+            if key in ["I32", "U8"]:
+                num_parameters += safetensors.parameter_count[key] * KNOWN_SIZE_FACTOR[quant_method][bits]
+        params_b = round(num_parameters / 1e9, 2)
+        size_gb = round(mem / 1e9,2)
+        return params_b, size_gb
+    except Exception as e:
+        print(str(e))
+    filenames = [sib.rfilename for sib in model_info.siblings]
+    if "pytorch_model.bin" in filenames:
+        url = hf_hub_url(model_info.id, filename="pytorch_model.bin")
+        meta = get_hf_file_metadata(url)
+        params_b = round(meta.size * 2 / 1e9, 2)
+        size_gb = round(meta.size / 1e9, 2)
+        return params_b, size_gb
+    if "pytorch_model.bin.index.json" in filenames:
+        index_path = hf_hub_download(model_info.id, filename="pytorch_model.bin.index.json")
+        """
+        {
+        "metadata": {
+            "total_size": 28272820224
+        },....
+        """
+        size = json.load(open(index_path))
+        bytes_per_param = 2
+        if ("metadata" in size) and ("total_size" in size["metadata"]):
+            return round(size["metadata"]["total_size"] / bytes_per_param / 1e9, 2), \
+                    round(size["metadata"]["total_size"] / 1e9, 2)
+    return None, None
 def get_model_arch(model_info: ModelInfo):
     return model_info.config.get("architectures", "Unknown")

src/submission/submit.py CHANGED Viewed

@@ -11,6 +11,7 @@ from src.submission.check_validity import (
     already_submitted_models,
     check_model_card,
     get_model_size,
     is_model_on_hub,
     is_gguf_on_hub,
     user_submission_permission,
@@ -95,10 +96,6 @@ def add_new_eval(
     except Exception:
         return styled_error("Could not get your model information. Please fill it up properly.")
-    # ToDo: need to chek
-    model_size = get_model_size(model_info=model_info, precision=precision)
     # Were the model card and license filled?
     try:
         if model_info.cardData is None:
@@ -146,15 +143,9 @@ def add_new_eval(
     if quant_type is None or quant_type == "":
         return styled_error("Please select a quantization model like GPTQ, AWQ etc.")
-    if precision in ["4bit", "8bit"]:
-        model_params = model_size * 8
-    if precision == "4bit":
-        model_size = model_params * 0.5
-    if precision == "8bit":
-        model_size = model_params
     if quant_type == "llama.cpp":
         hardware = "cpu"
@@ -163,9 +154,6 @@ def add_new_eval(
     else:
         hardware = "gpu"
-    # model = "/dataset/llama3_8b_instruct-chat-autoround-w4g128-gpu"
-    # all on gpu
-    # hardware = "gpu"
     if hardware == "gpu" and compute_dtype == "bfloat16":
         compute_dtype = "float16"
@@ -201,7 +189,6 @@ def add_new_eval(
         "created_at": created_at
     }
     print(eval_entry)
-    print(supplementary_info)
     # ToDo: need open
     # Check for duplicate submission

     already_submitted_models,
     check_model_card,
     get_model_size,
+    get_quantized_model_parameters_memory,
     is_model_on_hub,
     is_gguf_on_hub,
     user_submission_permission,
     except Exception:
         return styled_error("Could not get your model information. Please fill it up properly.")
     # Were the model card and license filled?
     try:
         if model_info.cardData is None:
     if quant_type is None or quant_type == "":
         return styled_error("Please select a quantization model like GPTQ, AWQ etc.")
+    model_params, model_size = get_quantized_model_parameters_memory(model_info,
+            quant_method=quant_type.lower(),
+            bits=precision)
     if quant_type == "llama.cpp":
         hardware = "cpu"
     else:
         hardware = "gpu"
     if hardware == "gpu" and compute_dtype == "bfloat16":
         compute_dtype = "float16"
         "created_at": created_at
     }
     print(eval_entry)
     # ToDo: need open
     # Check for duplicate submission