Spaces:
Runtime error
Runtime error
nyxtestacc
commited on
Commit
·
177e22a
1
Parent(s):
c028b5a
Rework to calculate context size rather than relying on linear regression
Browse files- app.py +95 -60
- context_sizes/Llama2 13B.csv +0 -12
- context_sizes/Llama2 20B.csv +0 -12
- context_sizes/Llama2 70B.csv +0 -12
- context_sizes/Llama2 7B.csv +0 -12
- context_sizes/Mistral 7B.csv +0 -12
- context_sizes/Mixtral 8x7B.csv +0 -12
- context_sizes/Solar 10.7B_11B.csv +0 -12
- context_sizes/Yi 34B.csv +0 -12
- models.csv +0 -9
- quants.csv +0 -13
app.py
CHANGED
@@ -1,78 +1,113 @@
|
|
|
|
1 |
import gradio as gr
|
2 |
import pandas as pd
|
3 |
-
|
4 |
-
|
5 |
-
|
6 |
-
.
|
7 |
-
|
8 |
-
.
|
9 |
-
.
|
10 |
-
|
11 |
-
|
12 |
-
|
13 |
-
.
|
14 |
-
|
15 |
-
.
|
16 |
-
.
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
25 |
)
|
26 |
|
27 |
|
28 |
-
def
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
n = len(xs)
|
34 |
-
|
35 |
-
a = (sum_y * sum_x2 - sum_x * sum_xy) / (n * sum_x2 - sum_x**2)
|
36 |
-
b = (n * sum_xy - sum_x * sum_y) / (n * sum_x2 - sum_x**2)
|
37 |
-
return a, b
|
38 |
-
|
39 |
-
|
40 |
-
def calc_model_size(parameters: float, quant: float) -> float:
|
41 |
-
return round(parameters * quant / 8, 2)
|
42 |
-
|
43 |
-
|
44 |
-
def calc_context_size(context, model) -> float:
|
45 |
-
sizes = context_sizes(model)
|
46 |
-
a, b = linear_regression(sizes["context"], sizes["size"])
|
47 |
-
return round((a + b * context) / 1024, 2)
|
48 |
|
49 |
|
50 |
def calc(model_base, context, quant_size):
|
51 |
-
|
52 |
-
quant_bpw =
|
53 |
-
|
54 |
-
|
55 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
56 |
|
57 |
-
return model_size, context_size, model_size + context_size
|
58 |
|
59 |
|
60 |
title = "GGUF VRAM Calculator"
|
61 |
|
62 |
with gr.Blocks(title=title, theme=gr.themes.Monochrome()) as app:
|
63 |
-
default_model = "Mistral
|
64 |
default_quant = "Q4_K_S"
|
65 |
default_context = 8192
|
66 |
-
|
67 |
-
|
|
|
68 |
|
69 |
-
gr.Markdown(
|
70 |
-
|
71 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
72 |
)
|
73 |
-
|
74 |
-
|
75 |
-
|
76 |
)
|
77 |
btn = gr.Button(value="Submit", variant="primary")
|
78 |
btn.click(
|
@@ -85,15 +120,15 @@ with gr.Blocks(title=title, theme=gr.themes.Monochrome()) as app:
|
|
85 |
outputs=[
|
86 |
gr.Number(
|
87 |
label="Model Size (GB)",
|
88 |
-
value=
|
89 |
),
|
90 |
gr.Number(
|
91 |
label="Context Size (GB)",
|
92 |
-
value=
|
93 |
),
|
94 |
gr.Number(
|
95 |
label="Total Size (GB)",
|
96 |
-
value=
|
97 |
),
|
98 |
],
|
99 |
)
|
|
|
1 |
+
from typing import Any
|
2 |
import gradio as gr
|
3 |
import pandas as pd
|
4 |
+
import requests
|
5 |
+
|
6 |
+
quants = {
|
7 |
+
"Q2_K": 3.35,
|
8 |
+
"Q3_K_S": 3.5,
|
9 |
+
"Q3_K_M": 3.91,
|
10 |
+
"Q3_K_L": 4.27,
|
11 |
+
"Q4_0": 4.55,
|
12 |
+
"Q4_K_S": 4.58,
|
13 |
+
"Q4_K_M": 4.85,
|
14 |
+
"Q5_0": 5.54,
|
15 |
+
"Q5_K_S": 5.54,
|
16 |
+
"Q5_K_M": 5.69,
|
17 |
+
"Q6_K": 6.59,
|
18 |
+
"Q8_0": 8.5,
|
19 |
+
}
|
20 |
+
|
21 |
+
|
22 |
+
def calc_model_size(parameters: int, quant: float) -> int:
|
23 |
+
return parameters * quant // 8
|
24 |
+
|
25 |
+
|
26 |
+
def get_model_config(hf_model: str) -> dict[str, Any]:
|
27 |
+
config = requests.get(
|
28 |
+
f"https://huggingface.co/{hf_model}/raw/main/config.json"
|
29 |
+
).json()
|
30 |
+
model_index = {}
|
31 |
+
try:
|
32 |
+
model_index = requests.get(
|
33 |
+
f"https://huggingface.co/{hf_model}/raw/main/model.safetensors.index.json"
|
34 |
+
).json()
|
35 |
+
except:
|
36 |
+
model_index = requests.get(
|
37 |
+
f"https://huggingface.co/{hf_model}/raw/main/pytorch_model.bin.index.json"
|
38 |
+
).json()
|
39 |
+
|
40 |
+
# assume fp16 weights
|
41 |
+
config["parameters"] = model_index["metadata"]["total_size"] / 2
|
42 |
+
return config
|
43 |
+
|
44 |
+
|
45 |
+
def calc_input_buffer_size(model_config, context: int) -> float:
|
46 |
+
return 4096 + 2048 * model_config["hidden_size"] + context * 4 + context * 2048
|
47 |
+
|
48 |
+
|
49 |
+
def calc_compute_buffer_size(model_config, context: int) -> float:
|
50 |
+
return (
|
51 |
+
(context / 1024 * 2 + 0.75) * model_config["num_attention_heads"] * 1024 * 1024
|
52 |
)
|
53 |
|
54 |
|
55 |
+
def calc_context_size(model_config, context: int) -> float:
|
56 |
+
n_gqa = model_config["num_attention_heads"] / model_config["num_key_value_heads"]
|
57 |
+
n_embd_gqa = model_config["hidden_size"] / n_gqa
|
58 |
+
n_elements = n_embd_gqa * (model_config["num_hidden_layers"] * context)
|
59 |
+
return 2 * n_elements * 2
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
60 |
|
61 |
|
62 |
def calc(model_base, context, quant_size):
|
63 |
+
model_config = get_model_config(model_base)
|
64 |
+
quant_bpw = 0
|
65 |
+
try:
|
66 |
+
quant_bpw = float(quant_size)
|
67 |
+
except:
|
68 |
+
quant_bpw = quants[quant_size]
|
69 |
+
|
70 |
+
model_size = round(
|
71 |
+
calc_model_size(model_config["parameters"], quant_bpw) / 1024 / 1024 / 1024, 2
|
72 |
+
)
|
73 |
+
context_size = round(
|
74 |
+
(
|
75 |
+
calc_input_buffer_size(model_config, context)
|
76 |
+
+ calc_context_size(model_config, context)
|
77 |
+
+ calc_compute_buffer_size(model_config, context)
|
78 |
+
)
|
79 |
+
/ 1024
|
80 |
+
/ 1024
|
81 |
+
/ 1024,
|
82 |
+
2,
|
83 |
+
)
|
84 |
|
85 |
+
return model_size, context_size, round(model_size + context_size, 2)
|
86 |
|
87 |
|
88 |
title = "GGUF VRAM Calculator"
|
89 |
|
90 |
with gr.Blocks(title=title, theme=gr.themes.Monochrome()) as app:
|
91 |
+
default_model = "mistralai/Mistral-7B-v0.1"
|
92 |
default_quant = "Q4_K_S"
|
93 |
default_context = 8192
|
94 |
+
default_size = calc(default_model, default_context, default_quant)
|
95 |
+
default_model_size = default_size[0]
|
96 |
+
default_context_size = default_size[1]
|
97 |
|
98 |
+
gr.Markdown(
|
99 |
+
f"# {app.title}\nThis is meant only as a guide and is will not be 100% accurate, this also does not account for anything that might be running in the background on your system or CUDA system memory fallback on Windows"
|
100 |
+
)
|
101 |
+
model = gr.Textbox(
|
102 |
+
value=default_model,
|
103 |
+
label="Enter Unquantized HF Model Name (e.g. mistralai/Mistral-7B-v0.1)",
|
104 |
+
)
|
105 |
+
context = gr.Number(
|
106 |
+
minimum=1, value=default_context, label="Desired Context Size (Tokens)"
|
107 |
)
|
108 |
+
quant = gr.Textbox(
|
109 |
+
value=default_quant,
|
110 |
+
label="Enter GGUF Quant (Q4_K_S) or BPW for other quantization schemes such as exl2",
|
111 |
)
|
112 |
btn = gr.Button(value="Submit", variant="primary")
|
113 |
btn.click(
|
|
|
120 |
outputs=[
|
121 |
gr.Number(
|
122 |
label="Model Size (GB)",
|
123 |
+
value=default_size[0],
|
124 |
),
|
125 |
gr.Number(
|
126 |
label="Context Size (GB)",
|
127 |
+
value=default_size[1],
|
128 |
),
|
129 |
gr.Number(
|
130 |
label="Total Size (GB)",
|
131 |
+
value=default_size[2],
|
132 |
),
|
133 |
],
|
134 |
)
|
context_sizes/Llama2 13B.csv
DELETED
@@ -1,12 +0,0 @@
|
|
1 |
-
512,475
|
2 |
-
1024,912
|
3 |
-
2048,1794
|
4 |
-
3072,2676
|
5 |
-
4096,3558
|
6 |
-
6144,5322
|
7 |
-
8192,7086
|
8 |
-
12288,10614
|
9 |
-
16384,14142
|
10 |
-
24576,21198
|
11 |
-
32768,28254
|
12 |
-
65536,56508
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
context_sizes/Llama2 20B.csv
DELETED
@@ -1,12 +0,0 @@
|
|
1 |
-
512,695
|
2 |
-
1024,1352
|
3 |
-
2048,2674
|
4 |
-
3072,3996
|
5 |
-
4096,5318
|
6 |
-
6144,7962
|
7 |
-
8192,10606
|
8 |
-
12288,15894
|
9 |
-
16384,21182
|
10 |
-
24576,31782.52
|
11 |
-
32768,42335.26
|
12 |
-
65536,84670.52
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
context_sizes/Llama2 70B.csv
DELETED
@@ -1,12 +0,0 @@
|
|
1 |
-
512,305
|
2 |
-
1024,498
|
3 |
-
2048,948
|
4 |
-
3072,1398
|
5 |
-
4096,1848
|
6 |
-
6144,2748
|
7 |
-
8192,3648
|
8 |
-
12288,5448
|
9 |
-
16384,7248
|
10 |
-
24576,10848
|
11 |
-
32768,14448
|
12 |
-
65536,28896
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
context_sizes/Llama2 7B.csv
DELETED
@@ -1,12 +0,0 @@
|
|
1 |
-
512,326.5
|
2 |
-
1024,602
|
3 |
-
2048,1180
|
4 |
-
3072,1758
|
5 |
-
4096,2336
|
6 |
-
6144,3492
|
7 |
-
8192,4648
|
8 |
-
12288,6960
|
9 |
-
16384,9272
|
10 |
-
24576,13896
|
11 |
-
32768,18520
|
12 |
-
65536,37016
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
context_sizes/Mistral 7B.csv
DELETED
@@ -1,12 +0,0 @@
|
|
1 |
-
512,137
|
2 |
-
1024,218
|
3 |
-
2048,412
|
4 |
-
3072,606
|
5 |
-
4096,800
|
6 |
-
6144,1188
|
7 |
-
8192,1576
|
8 |
-
12288,2352
|
9 |
-
16384,3128
|
10 |
-
24576,4680
|
11 |
-
32768,6232
|
12 |
-
65536,12440
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
context_sizes/Mixtral 8x7B.csv
DELETED
@@ -1,12 +0,0 @@
|
|
1 |
-
512,181.72
|
2 |
-
1024,249.22
|
3 |
-
2048,443.22
|
4 |
-
3072,637.22
|
5 |
-
4096,831.22
|
6 |
-
6144,1219.22
|
7 |
-
8192,1607.22
|
8 |
-
12288,2383.22
|
9 |
-
16384,3159.22
|
10 |
-
24576,4711.22
|
11 |
-
32768,6263.22
|
12 |
-
65536,12471.22
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
context_sizes/Solar 10.7B_11B.csv
DELETED
@@ -1,12 +0,0 @@
|
|
1 |
-
512,172.19
|
2 |
-
1024,285.19
|
3 |
-
2048,543.19
|
4 |
-
3072,801.19
|
5 |
-
4096,1059.19
|
6 |
-
6144,1575.19
|
7 |
-
8192,2091.19
|
8 |
-
12288,3123.19
|
9 |
-
16384,4155.19
|
10 |
-
24576,6219.19
|
11 |
-
32768,8283.19
|
12 |
-
65536,16539.19
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
context_sizes/Yi 34B.csv
DELETED
@@ -1,12 +0,0 @@
|
|
1 |
-
512,262.19
|
2 |
-
1024,399.19
|
3 |
-
2048,753.19
|
4 |
-
3072,1107.19
|
5 |
-
4096,1461.19
|
6 |
-
6144,2169.19
|
7 |
-
8192,2877.19
|
8 |
-
12288,4293.19
|
9 |
-
16384,5709.19
|
10 |
-
24576,8541.19
|
11 |
-
32768,11373.19
|
12 |
-
65536,22701.19
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
models.csv
DELETED
@@ -1,9 +0,0 @@
|
|
1 |
-
model,params
|
2 |
-
Llama2 7B,7
|
3 |
-
Llama2 13B,13
|
4 |
-
Llama2 70B,70
|
5 |
-
Mistral 7B,7
|
6 |
-
Llama2 20B,20
|
7 |
-
Mixtral 8x7B,46.7
|
8 |
-
Yi 34B,34
|
9 |
-
Solar 10.7B/11B,10.7
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
quants.csv
DELETED
@@ -1,13 +0,0 @@
|
|
1 |
-
quant,bpw
|
2 |
-
Q2_K,3.35
|
3 |
-
Q3_K_S,3.5
|
4 |
-
Q3_K_M,3.91
|
5 |
-
Q3_K_L,4.27
|
6 |
-
Q4_0,4.55
|
7 |
-
Q4_K_S,4.58
|
8 |
-
Q4_K_M,4.85
|
9 |
-
Q5_0,5.54
|
10 |
-
Q5_K_S,5.54
|
11 |
-
Q5_K_M,5.69
|
12 |
-
Q6_K,6.59
|
13 |
-
Q8_0,8.5
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|