lvkaokao
commited on
Commit
•
cf7af95
1
Parent(s):
a16a56e
update model size.
Browse files- src/submission/check_validity.py +53 -0
- src/submission/submit.py +4 -17
src/submission/check_validity.py
CHANGED
@@ -90,6 +90,59 @@ def get_model_size(model_info: ModelInfo, precision: str):
|
|
90 |
# model_size = size_factor * model_size
|
91 |
return model_size
|
92 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
93 |
def get_model_arch(model_info: ModelInfo):
|
94 |
return model_info.config.get("architectures", "Unknown")
|
95 |
|
|
|
90 |
# model_size = size_factor * model_size
|
91 |
return model_size
|
92 |
|
93 |
+
KNOWN_SIZE_FACTOR = {
|
94 |
+
"gptq": {"4bit": 8, "8bit": 4},
|
95 |
+
"awq": {"4bit": 8},
|
96 |
+
"bitsandbytes": {"4bit": 2}
|
97 |
+
}
|
98 |
+
|
99 |
+
BYTES = {
|
100 |
+
"I32": 4,
|
101 |
+
"F16": 2,
|
102 |
+
"BF16": 2,
|
103 |
+
"F32": 4,
|
104 |
+
"U8": 1}
|
105 |
+
|
106 |
+
def get_quantized_model_parameters_memory(model_info: ModelInfo, quant_method="", bits="4bit"):
|
107 |
+
try:
|
108 |
+
safetensors = get_safetensors_metadata(model_info.id)
|
109 |
+
num_parameters = 0
|
110 |
+
mem = 0
|
111 |
+
for key in safetensors.parameter_count:
|
112 |
+
mem += safetensors.parameter_count[key] * BYTES[key]
|
113 |
+
|
114 |
+
if key in ["I32", "U8"]:
|
115 |
+
num_parameters += safetensors.parameter_count[key] * KNOWN_SIZE_FACTOR[quant_method][bits]
|
116 |
+
params_b = round(num_parameters / 1e9, 2)
|
117 |
+
size_gb = round(mem / 1e9,2)
|
118 |
+
return params_b, size_gb
|
119 |
+
except Exception as e:
|
120 |
+
print(str(e))
|
121 |
+
|
122 |
+
filenames = [sib.rfilename for sib in model_info.siblings]
|
123 |
+
if "pytorch_model.bin" in filenames:
|
124 |
+
url = hf_hub_url(model_info.id, filename="pytorch_model.bin")
|
125 |
+
meta = get_hf_file_metadata(url)
|
126 |
+
params_b = round(meta.size * 2 / 1e9, 2)
|
127 |
+
size_gb = round(meta.size / 1e9, 2)
|
128 |
+
return params_b, size_gb
|
129 |
+
|
130 |
+
if "pytorch_model.bin.index.json" in filenames:
|
131 |
+
index_path = hf_hub_download(model_info.id, filename="pytorch_model.bin.index.json")
|
132 |
+
"""
|
133 |
+
{
|
134 |
+
"metadata": {
|
135 |
+
"total_size": 28272820224
|
136 |
+
},....
|
137 |
+
"""
|
138 |
+
size = json.load(open(index_path))
|
139 |
+
bytes_per_param = 2
|
140 |
+
if ("metadata" in size) and ("total_size" in size["metadata"]):
|
141 |
+
return round(size["metadata"]["total_size"] / bytes_per_param / 1e9, 2), \
|
142 |
+
round(size["metadata"]["total_size"] / 1e9, 2)
|
143 |
+
|
144 |
+
return None, None
|
145 |
+
|
146 |
def get_model_arch(model_info: ModelInfo):
|
147 |
return model_info.config.get("architectures", "Unknown")
|
148 |
|
src/submission/submit.py
CHANGED
@@ -11,6 +11,7 @@ from src.submission.check_validity import (
|
|
11 |
already_submitted_models,
|
12 |
check_model_card,
|
13 |
get_model_size,
|
|
|
14 |
is_model_on_hub,
|
15 |
is_gguf_on_hub,
|
16 |
user_submission_permission,
|
@@ -95,10 +96,6 @@ def add_new_eval(
|
|
95 |
except Exception:
|
96 |
return styled_error("Could not get your model information. Please fill it up properly.")
|
97 |
|
98 |
-
|
99 |
-
# ToDo: need to chek
|
100 |
-
model_size = get_model_size(model_info=model_info, precision=precision)
|
101 |
-
|
102 |
# Were the model card and license filled?
|
103 |
try:
|
104 |
if model_info.cardData is None:
|
@@ -146,15 +143,9 @@ def add_new_eval(
|
|
146 |
if quant_type is None or quant_type == "":
|
147 |
return styled_error("Please select a quantization model like GPTQ, AWQ etc.")
|
148 |
|
149 |
-
|
150 |
-
|
151 |
-
|
152 |
-
if precision == "4bit":
|
153 |
-
model_size = model_params * 0.5
|
154 |
-
|
155 |
-
if precision == "8bit":
|
156 |
-
model_size = model_params
|
157 |
-
|
158 |
|
159 |
if quant_type == "llama.cpp":
|
160 |
hardware = "cpu"
|
@@ -163,9 +154,6 @@ def add_new_eval(
|
|
163 |
else:
|
164 |
hardware = "gpu"
|
165 |
|
166 |
-
# model = "/dataset/llama3_8b_instruct-chat-autoround-w4g128-gpu"
|
167 |
-
# all on gpu
|
168 |
-
# hardware = "gpu"
|
169 |
if hardware == "gpu" and compute_dtype == "bfloat16":
|
170 |
compute_dtype = "float16"
|
171 |
|
@@ -201,7 +189,6 @@ def add_new_eval(
|
|
201 |
"created_at": created_at
|
202 |
}
|
203 |
print(eval_entry)
|
204 |
-
print(supplementary_info)
|
205 |
|
206 |
# ToDo: need open
|
207 |
# Check for duplicate submission
|
|
|
11 |
already_submitted_models,
|
12 |
check_model_card,
|
13 |
get_model_size,
|
14 |
+
get_quantized_model_parameters_memory,
|
15 |
is_model_on_hub,
|
16 |
is_gguf_on_hub,
|
17 |
user_submission_permission,
|
|
|
96 |
except Exception:
|
97 |
return styled_error("Could not get your model information. Please fill it up properly.")
|
98 |
|
|
|
|
|
|
|
|
|
99 |
# Were the model card and license filled?
|
100 |
try:
|
101 |
if model_info.cardData is None:
|
|
|
143 |
if quant_type is None or quant_type == "":
|
144 |
return styled_error("Please select a quantization model like GPTQ, AWQ etc.")
|
145 |
|
146 |
+
model_params, model_size = get_quantized_model_parameters_memory(model_info,
|
147 |
+
quant_method=quant_type.lower(),
|
148 |
+
bits=precision)
|
|
|
|
|
|
|
|
|
|
|
|
|
149 |
|
150 |
if quant_type == "llama.cpp":
|
151 |
hardware = "cpu"
|
|
|
154 |
else:
|
155 |
hardware = "gpu"
|
156 |
|
|
|
|
|
|
|
157 |
if hardware == "gpu" and compute_dtype == "bfloat16":
|
158 |
compute_dtype = "float16"
|
159 |
|
|
|
189 |
"created_at": created_at
|
190 |
}
|
191 |
print(eval_entry)
|
|
|
192 |
|
193 |
# ToDo: need open
|
194 |
# Check for duplicate submission
|