Spaces:
Runtime error
Runtime error
| from typing import Any | |
| import gradio as gr | |
| import pandas as pd | |
| import json | |
| import requests | |
| from html.parser import HTMLParser | |
| quants = { | |
| "Q2_K": 3.35, | |
| "Q3_K_S": 3.5, | |
| "Q3_K_M": 3.91, | |
| "Q3_K_L": 4.27, | |
| "Q4_0": 4.55, | |
| "Q4_K_S": 4.58, | |
| "Q4_K_M": 4.85, | |
| "Q5_0": 5.54, | |
| "Q5_K_S": 5.54, | |
| "Q5_K_M": 5.69, | |
| "Q6_K": 6.59, | |
| "Q8_0": 8.5, | |
| } | |
| class SvelteHydratorExtractor(HTMLParser): | |
| def __init__(self): | |
| self.data = None | |
| super().__init__() | |
| def handle_starttag(self, tag, attrs): | |
| print("Start tag:", tag) | |
| for attr in attrs: | |
| if attr[0] == "data-props": | |
| self.data = attr[1].replace("":", '"') | |
| def calc_model_size(parameters: int, quant: float) -> int: | |
| return parameters * quant // 8 | |
| def get_model_config(hf_model: str) -> dict[str, Any]: | |
| config = requests.get( | |
| f"https://huggingface.co/{hf_model}/raw/main/config.json" | |
| ).json() | |
| model_size = 0 | |
| try: | |
| model_size = requests.get( | |
| f"https://huggingface.co/{hf_model}/raw/main/model.safetensors.index.json" | |
| ).json()["metadta"]["total_size"] | |
| except: | |
| try: | |
| model_size = requests.get( | |
| f"https://huggingface.co/{hf_model}/raw/main/pytorch_model.bin.index.json" | |
| ).json()["metadta"]["total_size"] | |
| except: | |
| model_page = requests.get( | |
| f"https://huggingface.co/{hf_model}" | |
| ).text | |
| param_props_idx = model_page.find('data-target="ModelSafetensorsParams"') | |
| if param_props_idx != -1: | |
| param_props_start = model_page.rfind("<div", 0, param_props_idx) | |
| param_props_end = model_page.find(">", param_props_idx) | |
| extractor = SvelteHydratorExtractor() | |
| extractor.feed(model_page[param_props_start:param_props_end + 1]) | |
| model_size = ( | |
| json.loads( | |
| extractor.data | |
| )["safetensors"]["total"] | |
| * 2 | |
| ) | |
| else: | |
| param_props_idx = model_page.find('data-target="ModelHeader"') | |
| param_props_start = model_page.rfind("<div", 0, param_props_idx) | |
| param_props_end = model_page.find(">", param_props_idx) | |
| extractor = SvelteHydratorExtractor() | |
| extractor.feed(model_page[param_props_start:param_props_end + 1]) | |
| model_size = ( | |
| json.loads( | |
| extractor.data | |
| )["model"]["safetensors"]["total"] | |
| * 2 | |
| ) | |
| # assume fp16 weights | |
| config["parameters"] = model_size / 2 | |
| return config | |
| def calc_input_buffer_size(model_config, context: int) -> float: | |
| return 4096 + 2048 * model_config["hidden_size"] + context * 4 + context * 2048 | |
| def calc_compute_buffer_size(model_config, context: int) -> float: | |
| return ( | |
| (context / 1024 * 2 + 0.75) * model_config["num_attention_heads"] * 1024 * 1024 | |
| ) | |
| def calc_context_size(model_config, context: int) -> float: | |
| n_gqa = model_config["num_attention_heads"] / model_config["num_key_value_heads"] | |
| n_embd_gqa = model_config["hidden_size"] / n_gqa | |
| n_elements = n_embd_gqa * (model_config["num_hidden_layers"] * context) | |
| return 2 * n_elements * 2 | |
| def calc(model_base, context, quant_size): | |
| model_config = get_model_config(model_base) | |
| quant_bpw = 0 | |
| try: | |
| quant_bpw = float(quant_size) | |
| except: | |
| quant_bpw = quants[quant_size] | |
| model_size = round( | |
| calc_model_size(model_config["parameters"], quant_bpw) / 1000 / 1000 / 1000, 2 | |
| ) | |
| context_size = round( | |
| ( | |
| calc_input_buffer_size(model_config, context) | |
| + calc_context_size(model_config, context) | |
| + calc_compute_buffer_size(model_config, context) | |
| ) | |
| / 1000 | |
| / 1000 | |
| / 1000, | |
| 2, | |
| ) | |
| return model_size, context_size, round(model_size + context_size, 2) | |
| title = "GGUF VRAM Calculator" | |
| with gr.Blocks(title=title, theme=gr.themes.Monochrome()) as app: | |
| default_model = "mistralai/Mistral-7B-v0.1" | |
| default_quant = "Q4_K_S" | |
| default_context = 8192 | |
| default_size = calc(default_model, default_context, default_quant) | |
| default_model_size = default_size[0] | |
| default_context_size = default_size[1] | |
| gr.Markdown( | |
| f"# {app.title}\n## This space has been superseeded by the [NyxKrage/LLM-Model-VRAM-Calculator](https://huggingface.co/spaces/NyxKrage/LLM-Model-VRAM-Calculator), which has model search built in, and doesn't rely on gradio\nThis is meant only as a guide and is will not be 100% accurate, this also does not account for anything that might be running in the background on your system or CUDA system memory fallback on Windows" | |
| ) | |
| model = gr.Textbox( | |
| value=default_model, | |
| label="Enter Unquantized HF Model Name (e.g. mistralai/Mistral-7B-v0.1)", | |
| ) | |
| context = gr.Number( | |
| minimum=1, value=default_context, label="Desired Context Size (Tokens)" | |
| ) | |
| quant = gr.Dropdown( | |
| choices=list(quants.keys()), | |
| value=default_quant, | |
| allow_custom_value=True, | |
| label="Enter GGUF Quant (e.g. Q4_K_S) or the specific BPW for other quantization schemes such as exl2 (e.g. 4.5)", | |
| ) | |
| btn = gr.Button(value="Submit", variant="primary") | |
| btn.click( | |
| calc, | |
| inputs=[ | |
| model, | |
| context, | |
| quant, | |
| ], | |
| outputs=[ | |
| gr.Number( | |
| label="Model Size (GB)", | |
| value=default_size[0], | |
| ), | |
| gr.Number( | |
| label="Context Size (GB)", | |
| value=default_size[1], | |
| ), | |
| gr.Number( | |
| label="Total Size (GB)", | |
| value=default_size[2], | |
| ), | |
| ], | |
| ) | |
| app.launch() | |