nyxtestacc commited on
Commit
177e22a
·
1 Parent(s): c028b5a

Rework to calculate context size rather than relying on linear regression

Browse files
app.py CHANGED
@@ -1,78 +1,113 @@
 
1
  import gradio as gr
2
  import pandas as pd
3
-
4
- quants = (
5
- pd.read_csv("quants.csv")
6
- .applymap(str)
7
- .groupby("quant")["bpw"]
8
- .apply(float)
9
- .to_dict()
10
- )
11
- models = (
12
- pd.read_csv("models.csv")
13
- .applymap(str)
14
- .groupby("model")["params"]
15
- .apply(float)
16
- .to_dict()
17
- )
18
-
19
-
20
- def context_sizes(model):
21
- return pd.read_csv(
22
- "context_sizes/" + model.replace("/", "_") + ".csv",
23
- header=None,
24
- names=["context", "size"],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25
  )
26
 
27
 
28
- def linear_regression(xs, ys) -> tuple[float, float]:
29
- sum_y = ys.sum()
30
- sum_x = sum(xs)
31
- sum_xy = sum([x * y for x, y in zip(xs, ys)])
32
- sum_x2 = sum([x**2 for x in xs])
33
- n = len(xs)
34
-
35
- a = (sum_y * sum_x2 - sum_x * sum_xy) / (n * sum_x2 - sum_x**2)
36
- b = (n * sum_xy - sum_x * sum_y) / (n * sum_x2 - sum_x**2)
37
- return a, b
38
-
39
-
40
- def calc_model_size(parameters: float, quant: float) -> float:
41
- return round(parameters * quant / 8, 2)
42
-
43
-
44
- def calc_context_size(context, model) -> float:
45
- sizes = context_sizes(model)
46
- a, b = linear_regression(sizes["context"], sizes["size"])
47
- return round((a + b * context) / 1024, 2)
48
 
49
 
50
  def calc(model_base, context, quant_size):
51
- model_params = models[model_base]
52
- quant_bpw = quants[quant_size]
53
-
54
- model_size = calc_model_size(model_params, quant_bpw)
55
- context_size = calc_context_size(context, model_base)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
56
 
57
- return model_size, context_size, model_size + context_size
58
 
59
 
60
  title = "GGUF VRAM Calculator"
61
 
62
  with gr.Blocks(title=title, theme=gr.themes.Monochrome()) as app:
63
- default_model = "Mistral 7B"
64
  default_quant = "Q4_K_S"
65
  default_context = 8192
66
- default_model_size = calc_model_size(models[default_model], quants[default_quant])
67
- default_context_size = calc_context_size(default_context, default_model)
 
68
 
69
- gr.Markdown(f"# {app.title}")
70
- model = gr.Dropdown(
71
- list(models.keys()), value=default_model, label="Select Model Base"
 
 
 
 
 
 
72
  )
73
- context = gr.Number(minimum=1, value=default_context, label="Context Size (Tokens)")
74
- quant = gr.Dropdown(
75
- list(quants.keys()), value=default_quant, label="Select Quant Size"
76
  )
77
  btn = gr.Button(value="Submit", variant="primary")
78
  btn.click(
@@ -85,15 +120,15 @@ with gr.Blocks(title=title, theme=gr.themes.Monochrome()) as app:
85
  outputs=[
86
  gr.Number(
87
  label="Model Size (GB)",
88
- value=default_model_size,
89
  ),
90
  gr.Number(
91
  label="Context Size (GB)",
92
- value=default_context_size,
93
  ),
94
  gr.Number(
95
  label="Total Size (GB)",
96
- value=default_model_size + default_context_size,
97
  ),
98
  ],
99
  )
 
1
+ from typing import Any
2
  import gradio as gr
3
  import pandas as pd
4
+ import requests
5
+
6
+ quants = {
7
+ "Q2_K": 3.35,
8
+ "Q3_K_S": 3.5,
9
+ "Q3_K_M": 3.91,
10
+ "Q3_K_L": 4.27,
11
+ "Q4_0": 4.55,
12
+ "Q4_K_S": 4.58,
13
+ "Q4_K_M": 4.85,
14
+ "Q5_0": 5.54,
15
+ "Q5_K_S": 5.54,
16
+ "Q5_K_M": 5.69,
17
+ "Q6_K": 6.59,
18
+ "Q8_0": 8.5,
19
+ }
20
+
21
+
22
+ def calc_model_size(parameters: int, quant: float) -> int:
23
+ return parameters * quant // 8
24
+
25
+
26
+ def get_model_config(hf_model: str) -> dict[str, Any]:
27
+ config = requests.get(
28
+ f"https://huggingface.co/{hf_model}/raw/main/config.json"
29
+ ).json()
30
+ model_index = {}
31
+ try:
32
+ model_index = requests.get(
33
+ f"https://huggingface.co/{hf_model}/raw/main/model.safetensors.index.json"
34
+ ).json()
35
+ except:
36
+ model_index = requests.get(
37
+ f"https://huggingface.co/{hf_model}/raw/main/pytorch_model.bin.index.json"
38
+ ).json()
39
+
40
+ # assume fp16 weights
41
+ config["parameters"] = model_index["metadata"]["total_size"] / 2
42
+ return config
43
+
44
+
45
+ def calc_input_buffer_size(model_config, context: int) -> float:
46
+ return 4096 + 2048 * model_config["hidden_size"] + context * 4 + context * 2048
47
+
48
+
49
+ def calc_compute_buffer_size(model_config, context: int) -> float:
50
+ return (
51
+ (context / 1024 * 2 + 0.75) * model_config["num_attention_heads"] * 1024 * 1024
52
  )
53
 
54
 
55
+ def calc_context_size(model_config, context: int) -> float:
56
+ n_gqa = model_config["num_attention_heads"] / model_config["num_key_value_heads"]
57
+ n_embd_gqa = model_config["hidden_size"] / n_gqa
58
+ n_elements = n_embd_gqa * (model_config["num_hidden_layers"] * context)
59
+ return 2 * n_elements * 2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
60
 
61
 
62
  def calc(model_base, context, quant_size):
63
+ model_config = get_model_config(model_base)
64
+ quant_bpw = 0
65
+ try:
66
+ quant_bpw = float(quant_size)
67
+ except:
68
+ quant_bpw = quants[quant_size]
69
+
70
+ model_size = round(
71
+ calc_model_size(model_config["parameters"], quant_bpw) / 1024 / 1024 / 1024, 2
72
+ )
73
+ context_size = round(
74
+ (
75
+ calc_input_buffer_size(model_config, context)
76
+ + calc_context_size(model_config, context)
77
+ + calc_compute_buffer_size(model_config, context)
78
+ )
79
+ / 1024
80
+ / 1024
81
+ / 1024,
82
+ 2,
83
+ )
84
 
85
+ return model_size, context_size, round(model_size + context_size, 2)
86
 
87
 
88
  title = "GGUF VRAM Calculator"
89
 
90
  with gr.Blocks(title=title, theme=gr.themes.Monochrome()) as app:
91
+ default_model = "mistralai/Mistral-7B-v0.1"
92
  default_quant = "Q4_K_S"
93
  default_context = 8192
94
+ default_size = calc(default_model, default_context, default_quant)
95
+ default_model_size = default_size[0]
96
+ default_context_size = default_size[1]
97
 
98
+ gr.Markdown(
99
+ f"# {app.title}\nThis is meant only as a guide and is will not be 100% accurate, this also does not account for anything that might be running in the background on your system or CUDA system memory fallback on Windows"
100
+ )
101
+ model = gr.Textbox(
102
+ value=default_model,
103
+ label="Enter Unquantized HF Model Name (e.g. mistralai/Mistral-7B-v0.1)",
104
+ )
105
+ context = gr.Number(
106
+ minimum=1, value=default_context, label="Desired Context Size (Tokens)"
107
  )
108
+ quant = gr.Textbox(
109
+ value=default_quant,
110
+ label="Enter GGUF Quant (Q4_K_S) or BPW for other quantization schemes such as exl2",
111
  )
112
  btn = gr.Button(value="Submit", variant="primary")
113
  btn.click(
 
120
  outputs=[
121
  gr.Number(
122
  label="Model Size (GB)",
123
+ value=default_size[0],
124
  ),
125
  gr.Number(
126
  label="Context Size (GB)",
127
+ value=default_size[1],
128
  ),
129
  gr.Number(
130
  label="Total Size (GB)",
131
+ value=default_size[2],
132
  ),
133
  ],
134
  )
context_sizes/Llama2 13B.csv DELETED
@@ -1,12 +0,0 @@
1
- 512,475
2
- 1024,912
3
- 2048,1794
4
- 3072,2676
5
- 4096,3558
6
- 6144,5322
7
- 8192,7086
8
- 12288,10614
9
- 16384,14142
10
- 24576,21198
11
- 32768,28254
12
- 65536,56508
 
 
 
 
 
 
 
 
 
 
 
 
 
context_sizes/Llama2 20B.csv DELETED
@@ -1,12 +0,0 @@
1
- 512,695
2
- 1024,1352
3
- 2048,2674
4
- 3072,3996
5
- 4096,5318
6
- 6144,7962
7
- 8192,10606
8
- 12288,15894
9
- 16384,21182
10
- 24576,31782.52
11
- 32768,42335.26
12
- 65536,84670.52
 
 
 
 
 
 
 
 
 
 
 
 
 
context_sizes/Llama2 70B.csv DELETED
@@ -1,12 +0,0 @@
1
- 512,305
2
- 1024,498
3
- 2048,948
4
- 3072,1398
5
- 4096,1848
6
- 6144,2748
7
- 8192,3648
8
- 12288,5448
9
- 16384,7248
10
- 24576,10848
11
- 32768,14448
12
- 65536,28896
 
 
 
 
 
 
 
 
 
 
 
 
 
context_sizes/Llama2 7B.csv DELETED
@@ -1,12 +0,0 @@
1
- 512,326.5
2
- 1024,602
3
- 2048,1180
4
- 3072,1758
5
- 4096,2336
6
- 6144,3492
7
- 8192,4648
8
- 12288,6960
9
- 16384,9272
10
- 24576,13896
11
- 32768,18520
12
- 65536,37016
 
 
 
 
 
 
 
 
 
 
 
 
 
context_sizes/Mistral 7B.csv DELETED
@@ -1,12 +0,0 @@
1
- 512,137
2
- 1024,218
3
- 2048,412
4
- 3072,606
5
- 4096,800
6
- 6144,1188
7
- 8192,1576
8
- 12288,2352
9
- 16384,3128
10
- 24576,4680
11
- 32768,6232
12
- 65536,12440
 
 
 
 
 
 
 
 
 
 
 
 
 
context_sizes/Mixtral 8x7B.csv DELETED
@@ -1,12 +0,0 @@
1
- 512,181.72
2
- 1024,249.22
3
- 2048,443.22
4
- 3072,637.22
5
- 4096,831.22
6
- 6144,1219.22
7
- 8192,1607.22
8
- 12288,2383.22
9
- 16384,3159.22
10
- 24576,4711.22
11
- 32768,6263.22
12
- 65536,12471.22
 
 
 
 
 
 
 
 
 
 
 
 
 
context_sizes/Solar 10.7B_11B.csv DELETED
@@ -1,12 +0,0 @@
1
- 512,172.19
2
- 1024,285.19
3
- 2048,543.19
4
- 3072,801.19
5
- 4096,1059.19
6
- 6144,1575.19
7
- 8192,2091.19
8
- 12288,3123.19
9
- 16384,4155.19
10
- 24576,6219.19
11
- 32768,8283.19
12
- 65536,16539.19
 
 
 
 
 
 
 
 
 
 
 
 
 
context_sizes/Yi 34B.csv DELETED
@@ -1,12 +0,0 @@
1
- 512,262.19
2
- 1024,399.19
3
- 2048,753.19
4
- 3072,1107.19
5
- 4096,1461.19
6
- 6144,2169.19
7
- 8192,2877.19
8
- 12288,4293.19
9
- 16384,5709.19
10
- 24576,8541.19
11
- 32768,11373.19
12
- 65536,22701.19
 
 
 
 
 
 
 
 
 
 
 
 
 
models.csv DELETED
@@ -1,9 +0,0 @@
1
- model,params
2
- Llama2 7B,7
3
- Llama2 13B,13
4
- Llama2 70B,70
5
- Mistral 7B,7
6
- Llama2 20B,20
7
- Mixtral 8x7B,46.7
8
- Yi 34B,34
9
- Solar 10.7B/11B,10.7
 
 
 
 
 
 
 
 
 
 
quants.csv DELETED
@@ -1,13 +0,0 @@
1
- quant,bpw
2
- Q2_K,3.35
3
- Q3_K_S,3.5
4
- Q3_K_M,3.91
5
- Q3_K_L,4.27
6
- Q4_0,4.55
7
- Q4_K_S,4.58
8
- Q4_K_M,4.85
9
- Q5_0,5.54
10
- Q5_K_S,5.54
11
- Q5_K_M,5.69
12
- Q6_K,6.59
13
- Q8_0,8.5