SadP0i commited on
Commit
615c77c
1 Parent(s): f38793c

Upload index.html

Browse files

Added a sanity check when HF returns an error instead of model info

Files changed (1) hide show
  1. index.html +485 -575
index.html CHANGED
@@ -1,575 +1,485 @@
1
- <!DOCTYPE html>
2
- <html lang="en">
3
- <head>
4
- <meta charset="UTF-8" />
5
- <meta name="viewport" content="width=device-width, initial-scale=1.0" />
6
- <script>
7
- function strToHtml(str) {
8
- let parser = new DOMParser();
9
- return parser.parseFromString(str, "text/html");
10
- }
11
-
12
- //Short, jQuery-independent function to read html table and write them into an Array.
13
- //Kudos to RobG at StackOverflow
14
- function tableToObj(table) {
15
- var rows = table.rows;
16
- var propCells = rows[0].cells;
17
- var propNames = [];
18
- var results = [];
19
- var obj, row, cells;
20
-
21
- // Use the first row for the property names
22
- // Could use a header section but result is the same if
23
- // there is only one header row
24
- for (var i = 0, iLen = propCells.length; i < iLen; i++) {
25
- propNames.push(
26
- (propCells[i].textContent || propCells[i].innerText).trim()
27
- );
28
- }
29
-
30
- // Use the rows for data
31
- // Could use tbody rows here to exclude header & footer
32
- // but starting from 1 gives required result
33
- for (var j = 1, jLen = rows.length; j < jLen; j++) {
34
- cells = rows[j].cells;
35
- obj = {};
36
-
37
- for (var k = 0; k < iLen; k++) {
38
- obj[propNames[k]] = (
39
- cells[k].textContent || cells[k].innerText
40
- ).trim();
41
- }
42
- results.push(obj);
43
- }
44
- return results;
45
- }
46
-
47
- function formatGpu(gpus) {
48
- return gpus.map(
49
- (g) => `${g["Product Name"]} - ${g["Memory"].split(",")[0]}`
50
- );
51
- }
52
-
53
- const gguf_quants = {
54
- "IQ1_S": 1.56,
55
- "IQ1_M": 1.75,
56
- "IQ2_XXS": 2.06,
57
- "IQ2_XS": 2.31,
58
- "IQ2_S": 2.5,
59
- "IQ3_XXS": 3.06,
60
- "IQ3_XS": 3.3,
61
- "IQ3_S": 3.44,
62
- "IQ3_M": 3.66,
63
- "Q2_K": 3.35,
64
- "Q3_K_S": 3.5,
65
- "Q3_K_M": 3.91,
66
- "Q3_K_L": 4.27,
67
- "IQ4_XS": 4.25,
68
- "Q4_0": 4.55,
69
- "Q4_K_S": 4.58,
70
- "Q4_K_M": 4.85,
71
- "Q5_0": 5.54,
72
- "Q5_K_S": 5.54,
73
- "Q5_K_M": 5.69,
74
- "Q6_K": 6.59,
75
- "Q8_0": 8.5,
76
- }
77
-
78
- async function modelConfig(hf_model) {
79
- let config = await fetch(
80
- `https://huggingface.co/${hf_model}/raw/main/config.json`
81
- ).then(r => r.json())
82
- let model_size = 0
83
- try {
84
- model_size = (await fetch(`https://huggingface.co/${hf_model}/resolve/main/model.safetensors.index.json`).then(r => r.json()))["metadata"]["total_size"] / 2
85
- if (isNaN(model_size)) {
86
- throw new Erorr("no size in safetensors metadata")
87
- }
88
- } catch (e) {
89
- try {
90
- model_size = (await fetch(`https://huggingface.co/${hf_model}/resolve/main/pytorch_model.bin.index.json`).then(r => r.json()))["metadata"]["total_size"] / 2
91
- if (isNaN(model_size)) {
92
- throw new Erorr("no size in pytorch metadata")
93
- }
94
- } catch {
95
- let model_page = await fetch(
96
- "https://corsproxy.io/?" + encodeURIComponent(`https://huggingface.co/${hf_model}`)
97
- ).then(r => r.text())
98
- let el = document.createElement( 'html' );
99
- el.innerHTML = model_page
100
- let params_el = el.querySelector('div[data-target="ModelSafetensorsParams"]')
101
- if (params_el !== null) {
102
- model_size = JSON.parse(params_el.attributes.getNamedItem("data-props").value)["safetensors"]["total"]
103
- } else {
104
- params_el = el.querySelector('div[data-target="ModelHeader"]')
105
- model_size = JSON.parse(params_el.attributes.getNamedItem("data-props").value)["model"]["safetensors"]["total"]
106
- }
107
- }
108
- }
109
- config.parameters = model_size
110
- return config
111
- }
112
-
113
- function inputBuffer(context=8192, model_config, bsz=512) {
114
- /* Calculation taken from github:ggerganov/llama.cpp/llama.cpp:11248
115
- ctx->inp_tokens = ggml_new_tensor_1d(ctx->ctx_input, GGML_TYPE_I32, cparams.n_batch);
116
- ctx->inp_embd = ggml_new_tensor_2d(ctx->ctx_input, GGML_TYPE_F32, hparams.n_embd, cparams.n_batch);
117
- ctx->inp_pos = ggml_new_tensor_1d(ctx->ctx_input, GGML_TYPE_I32, cparams.n_batch);
118
- ctx->inp_KQ_mask = ggml_new_tensor_2d(ctx->ctx_input, GGML_TYPE_F32, cparams.n_ctx, cparams.n_batch);
119
- ctx->inp_K_shift = ggml_new_tensor_1d(ctx->ctx_input, GGML_TYPE_I32, cparams.n_ctx);
120
- ctx->inp_sum = ggml_new_tensor_2d(ctx->ctx_input, GGML_TYPE_F32, 1, cparams.n_batch);
121
-
122
- n_embd is hidden size (github:ggeranov/llama.cpp/convert.py:248)
123
- */
124
- const inp_tokens = bsz
125
- const inp_embd = model_config["hidden_size"] * bsz
126
- const inp_pos = bsz
127
- const inp_KQ_mask = context * bsz
128
- const inp_K_shift = context
129
- const inp_sum = bsz
130
-
131
- return inp_tokens + inp_embd + inp_pos + inp_KQ_mask + inp_K_shift + inp_sum
132
- }
133
-
134
- function computeBuffer(context=8192, model_config, bsz=512) {
135
- if (bsz != 512) {
136
- alert("batch size other than 512 is currently not supported for the compute buffer, using batchsize 512 for compute buffer calculation, end result result will be an overestimatition")
137
- }
138
- return (context / 1024 * 2 + 0.75) * model_config["num_attention_heads"] * 1024 * 1024
139
- }
140
-
141
- function kvCache(context=8192, model_config, cache_bit=16) {
142
- const n_gqa = model_config["num_attention_heads"] / model_config["num_key_value_heads"]
143
- const n_embd_gqa = model_config["hidden_size"] / n_gqa
144
- const n_elements = n_embd_gqa * (model_config["num_hidden_layers"] * context)
145
- const size = 2 * n_elements
146
- return size * (cache_bit / 8)
147
- }
148
-
149
- function contextSize(context=8192, model_config, bsz=512, cache_bit=16) {
150
- return Number.parseFloat((inputBuffer(context, model_config, bsz) + kvCache(context, model_config, cache_bit) + computeBuffer(context, model_config, bsz)).toFixed(2))
151
- }
152
-
153
- function modelSize(model_config, bpw=4.5) {
154
- return Number.parseFloat((model_config["parameters"] * bpw / 8).toFixed(2))
155
- }
156
-
157
- async function calculateSizes(format, context_loc) {
158
-
159
- format = "gguf"
160
-
161
- try {
162
- const model_config = await modelConfig(document.getElementById("modelsearch").value)
163
- const context = parseInt(document.getElementById("contextsize").value)
164
- let bsz = 512
165
- let cache_bit = 16
166
- let bpw = 0
167
- if (format === "gguf") {
168
- bsz = parseInt(document.getElementById("batchsize").value)
169
- bpw = gguf_quants[document.getElementById("quantsize").innerText]
170
-
171
- } else if (format == "exl2") {
172
- cache_bit = Number.parseInt(document.getElementById("kvCache").value)
173
- bpw = Number.parseFloat(document.getElementById("bpw").value)
174
- }
175
-
176
- const model_size = modelSize(model_config, bpw)
177
- const context_size = contextSize(context, model_config, bsz, cache_bit)
178
- const total_size = ((model_size + context_size) / 2**30)
179
- document.getElementById("resultmodel").innerText = (model_size / 2**30).toFixed(2)
180
- document.getElementById("resultcontext").innerText = (context_size / 2**30).toFixed(2)
181
- const result_total_el = document.getElementById("resulttotal");
182
- result_total_el.innerText = total_size.toFixed(2)
183
-
184
- const allocated_vram = Number.parseInt(document.getElementById("maxvram").value);
185
- const vram = allocated_vram
186
- if (vram - total_size > 0.5) {
187
- result_total_el.style.backgroundColor = "#bef264"
188
- } else if (vram - total_size > 0) {
189
- result_total_el.style.backgroundColor = "#facc15"
190
- } else {
191
- result_total_el.style.backgroundColor = "#ef4444"
192
- }
193
-
194
- const layer_size = ((model_size / 2**30) / model_config["num_hidden_layers"])
195
- const layer_size_el = document.getElementById("layersize");
196
- layer_size_el.innerText = layer_size.toFixed(2)
197
-
198
- const context_dealloc = context_loc === "vram" ? (context_size / 2**30) : 0;
199
- const layers_offload = Math.floor((allocated_vram - context_dealloc) / layer_size)
200
-
201
- const layers_offload_el = document.getElementById("layersoffload");
202
- layers_offload_el.innerText = `${layers_offload > model_config["num_hidden_layers"] ? model_config["num_hidden_layers"] : Math.max(0, layers_offload)}/${model_config["num_hidden_layers"]}`
203
-
204
- } catch(e) {
205
- alert(e);
206
- }
207
- }
208
- </script>
209
- <link href="./styles.css" rel="stylesheet">
210
- <title>Can I split it? - GGUF VRAM Calculator</title>
211
- </head>
212
- <body class="p-8">
213
- <div x-data="{ format: 'gguf', context_loc: 'vram' }" class="flex flex-col max-h-screen items-center mt-16 gap-10">
214
- <div style="text-align: center;">
215
- <h1 class="text-xl font-semibold leading-6 text-gray-900">
216
- GGUF Model, Can I split it?
217
- </h1>
218
- <h3 class="font-semibold leading-6 text-gray-900">
219
- Based on <a href="https://huggingface.co/NyxKrage" style="color: blue;">NyxKrage</a>'s <a href="https://huggingface.co/spaces/NyxKrage/LLM-Model-VRAM-Calculator" style="color: blue;">LLM VRAM calculator</a>
220
- </h3>
221
- </div>
222
- <div class="flex flex-col gap-10">
223
- <div class="w-auto flex flex-col gap-4">
224
- <div class="relative">
225
- <label
226
- for="maxvram"
227
- class="absolute -top-2 left-2 inline-block bg-white px-1 text-xs font-medium text-gray-900"
228
- >
229
- Max Allocated VRAM
230
- </label>
231
- <input
232
- value="24"
233
- type="number"
234
- name="maxvram"
235
- id="maxvram"
236
- step="1"
237
- class="block w-full rounded-md border-0 p-3 text-gray-900 shadow-sm ring-1 ring-inset ring-gray-300 placeholder:text-gray-400 focus:ring-2 focus:ring-inset focus:ring-indigo-600 sm:text-sm sm:leading-6"
238
- />
239
- </div>
240
-
241
- <!-- Model Selector -->
242
-
243
-
244
- <div class="flex flex-row gap-4 relative">
245
- <label
246
- for="contextsize"
247
- class="absolute -top-2 left-2 inline-block bg-white px-1 text-xs font-medium text-gray-900"
248
- >
249
- Model (unquantized)
250
- </label>
251
- <div
252
- class="block w-full rounded-md border-0 p-3 text-gray-900 shadow-sm ring-1 ring-inset ring-gray-300 placeholder:text-gray-400 focus:ring-2 focus:ring-inset focus:ring-indigo-600 sm:text-sm sm:leading-6"
253
- x-data="{
254
- open: false,
255
- value: 'Nexusflow/Starling-LM-7B-beta',
256
- results: null,
257
- toggle() {
258
- if (this.open) {
259
- return this.close()
260
- }
261
-
262
- this.$refs.input.focus()
263
-
264
- this.open = true
265
- },
266
- close(focusAfter) {
267
- if (! this.open) return
268
-
269
- this.open = false
270
-
271
- focusAfter && focusAfter.focus()
272
- }
273
- }"
274
- x-on:keydown.escape.prevent.stop="close($refs.input)"
275
- x-id="['model-typeahead']"
276
- class="relative"
277
- >
278
- <!-- Input -->
279
- <input
280
- id="modelsearch"
281
- x-ref="input"
282
- x-on:click="toggle()"
283
- @keypress.debounce.150ms="results = (await
284
- fetch('https://huggingface.co/api/quicksearch?type=model&q=' +
285
- encodeURIComponent(value)).then(r => r.json())).models.filter(m => !m.id.includes('GGUF') && !m.id.includes('AWQ') && !m.id.includes('GPTQ') && !m.id.includes('exl2'));"
286
- :aria-expanded="open"
287
- :aria-controls="$id('model-typeahead')"
288
- x-model="value"
289
- class="flex justify-between items-center gap-2 w-full"
290
- />
291
-
292
- <!-- Panel -->
293
- <div
294
- x-ref="panel"
295
- x-show="open"
296
- x-transition.origin.top.left
297
- x-on:click.outside="close($refs.input)"
298
- :id="$id('model-typeahead')"
299
- style="display: none"
300
- class="absolute left-0 mt-4 w-full rounded-md bg-white shadow-sm ring-1 ring-inset ring-gray-300 z-10"
301
- >
302
- <template x-for="result in results">
303
- <a
304
- @click="value = result.id; close($refs.input)"
305
- x-text="result.id"
306
- class="flex cursor-pointer items-center gap-2 w-full first-of-type:rounded-t-md last-of-type:rounded-b-md px-4 py-2.5 text-left text-sm hover:bg-gray-500/5 disabled:text-gray-500"
307
- ></a>
308
- </template>
309
- </div>
310
- </div>
311
- </div>
312
-
313
-
314
- <!-- Context Size Selector -->
315
- <div class="relative">
316
- <label
317
- for="contextsize"
318
- class="absolute -top-2 left-2 inline-block bg-white px-1 text-xs font-medium text-gray-900"
319
- >
320
- Context Size
321
- </label>
322
- <input
323
- value="8192"
324
- type="number"
325
- name="contextsize"
326
- id="contextsize"
327
- step="1024"
328
- class="block w-full rounded-md border-0 p-3 text-gray-900 shadow-sm ring-1 ring-inset ring-gray-300 placeholder:text-gray-400 focus:ring-2 focus:ring-inset focus:ring-indigo-600 sm:text-sm sm:leading-6"
329
- />
330
- </div>
331
-
332
- <div class="relative">
333
- <label
334
- class="absolute -top-2 left-2 inline-block bg-white px-1 text-xs font-medium text-gray-900"
335
- >Context offloaded to</label
336
- >
337
- <fieldset
338
- x-model="context_loc"
339
- class="block w-full rounded-md border-0 p-3 text-gray-900 shadow-sm ring-1 ring-inset ring-gray-300 placeholder:text-gray-400 focus:ring-2 focus:ring-inset focus:ring-indigo-600 sm:text-sm sm:leading-6"
340
- >
341
- <legend class="sr-only">Context location</legend>
342
- <div
343
- class="space-y-4 sm:flex sm:items-center sm:space-x-10 sm:space-y-0"
344
- >
345
- <div class="flex items-center">
346
- <input
347
- id="context-vram"
348
- name="context-allocation"
349
- type="radio"
350
- value="vram"
351
- checked
352
- class="h-4 w-4 border-gray-300 text-indigo-600 focus:ring-indigo-600"
353
- />
354
- <label
355
- for="context-vram"
356
- class="ml-3 block text-sm font-medium leading-6 text-gray-900"
357
- >VRAM</label
358
- >
359
- </div>
360
- <div class="flex items-center">
361
- <input
362
- id="context-ram"
363
- name="context-allocation"
364
- type="radio"
365
- value="ram"
366
- class="h-4 w-4 border-gray-300 text-indigo-600 focus:ring-indigo-600"
367
- />
368
- <label
369
- for="context-ram"
370
- class="ml-3 block text-sm font-medium leading-6 text-gray-900"
371
- >RAM</label
372
- >
373
- </div>
374
- </div>
375
- </fieldset>
376
- </div>
377
-
378
- <!-- GGUF Options -->
379
- <div x-show="format === 'gguf'" class="relative">
380
- <div class="flex flex-row gap-4">
381
- <label
382
- for="contextsize"
383
- class="absolute -top-2 left-2 inline-block bg-white px-1 text-xs font-medium text-gray-900"
384
- >
385
- Quantization Size
386
- </label>
387
- <div
388
- class="block w-full rounded-md border-0 p-3 text-gray-900 shadow-sm ring-1 ring-inset ring-gray-300 placeholder:text-gray-400 focus:ring-2 focus:ring-inset focus:ring-indigo-600 sm:text-sm sm:leading-6"
389
- x-data="{
390
- open: false,
391
- value: '',
392
- toggle() {
393
- if (this.open) {
394
- return this.close()
395
- }
396
-
397
- this.$refs.button.focus()
398
-
399
- this.open = true
400
- },
401
- close(focusAfter) {
402
- if (! this.open) return
403
-
404
- this.open = false
405
-
406
- focusAfter && focusAfter.focus()
407
- }
408
- }"
409
- x-on:keydown.escape.prevent.stop="close($refs.button)"
410
- x-id="['dropdown-button']"
411
- class="relative"
412
- >
413
- <!-- Button -->
414
- <button
415
- x-ref="button"
416
- x-on:click="toggle()"
417
- :aria-expanded="open"
418
- :aria-controls="$id('dropdown-button')"
419
- type="button"
420
- id="quantsize"
421
- x-text="value.length === 0 ? 'Q4_K_S' : value"
422
- class="flex justify-between items-center gap-2 w-full"
423
- >
424
- Q4_K_S
425
-
426
- <!-- Heroicon: chevron-down -->
427
- <svg
428
- xmlns="http://www.w3.org/2000/svg"
429
- class="h-5 w-5 text-gray-400"
430
- viewBox="0 0 20 20"
431
- fill="currentColor"
432
- >
433
- <path
434
- fill-rule="evenodd"
435
- d="M5.293 7.293a1 1 0 011.414 0L10 10.586l3.293-3.293a1 1 0 111.414 1.414l-4 4a1 1 0 01-1.414 0l-4-4a1 1 0 010-1.414z"
436
- clip-rule="evenodd"
437
- />
438
- </svg>
439
- </button>
440
-
441
- <!-- Panel -->
442
- <div
443
- x-data="{ quants: [
444
- 'IQ1_S',
445
- 'IQ1_M',
446
- 'IQ2_XXS',
447
- 'IQ2_XS',
448
- 'IQ2_S',
449
- 'IQ3_XXS',
450
- 'IQ3_XS',
451
- 'IQ3_S',
452
- 'IQ3_M',
453
- 'Q2_K',
454
- 'Q3_K_S',
455
- 'Q3_K_M',
456
- 'Q3_K_L',
457
- 'IQ4_XS',
458
- 'Q4_0',
459
- 'Q4_K_S',
460
- 'Q4_K_M',
461
- 'Q5_0',
462
- 'Q5_K_S',
463
- 'Q5_K_M',
464
- 'Q6_K',
465
- 'Q8_0'
466
- ]}"
467
- x-ref="panel"
468
- x-show="open"
469
- x-transition.origin.top.left
470
- x-on:click.outside="close($refs.button)"
471
- :id="$id('dropdown-button')"
472
- style="display: none"
473
- class="absolute left-0 mt-4 w-full rounded-md bg-white shadow-sm ring-1 ring-inset ring-gray-300 z-10"
474
- >
475
- <template x-for="quant in quants">
476
- <a
477
- @click="value = quant; close($refs.button)"
478
- x-text="quant"
479
- class="flex cursor-pointer items-center gap-2 w-full first-of-type:rounded-t-md last-of-type:rounded-b-md px-4 py-2.5 text-left text-sm hover:bg-gray-500/5 disabled:text-gray-500"
480
- ></a>
481
- </template>
482
- </div>
483
- </div>
484
- <div class="relative">
485
- <label
486
- for="batchsize"
487
- class="absolute -top-2 left-2 inline-block bg-white px-1 text-xs font-medium text-gray-900"
488
- >
489
- Batch Size
490
- </label>
491
- <input
492
- value="512"
493
- type="number"
494
- step="128"
495
- id="batchsize"
496
- class="block w-full rounded-md border-0 p-3 text-gray-900 shadow-sm ring-1 ring-inset ring-gray-300 placeholder:text-gray-400 focus:ring-2 focus:ring-inset focus:ring-indigo-600 sm:text-sm sm:leading-6"
497
- />
498
- </div>
499
- </div>
500
- </div>
501
- <button
502
- type="button"
503
- class="rounded-md bg-slate-800 px-3 py-2 text-sm font-semibold text-white shadow-sm hover:bg-slate-700 focus-visible:outline focus-visible:outline-2 focus-visible:outline-offset-2 focus-visible:outline-indigo-600"
504
- @click="calculateSizes(format, context_loc)"
505
- >
506
- Submit
507
- </button>
508
- </div>
509
- <div class="w-auto flex flex-col gap-4">
510
- <div class="relative">
511
- <label
512
- class="absolute -top-2 left-2 inline-block bg-white px-1 text-xs font-medium text-gray-900"
513
- >
514
- Model Size (GB)
515
- </label>
516
- <div
517
- id="resultmodel"
518
- class="block w-full rounded-md border-0 p-3 text-gray-900 shadow-sm ring-1 ring-inset ring-gray-300 placeholder:text-gray-400 focus:ring-2 focus:ring-inset focus:ring-indigo-600 sm:text-sm sm:leading-6"
519
- >4.20</div>
520
- </div>
521
- <div class="relative">
522
- <label
523
- class="absolute -top-2 left-2 inline-block bg-white px-1 text-xs font-medium text-gray-900"
524
- >
525
- Context Size (GB)
526
- </label>
527
- <div
528
- id="resultcontext"
529
- class="block w-full rounded-md border-0 p-3 text-gray-900 shadow-sm ring-1 ring-inset ring-gray-300 placeholder:text-gray-400 focus:ring-2 focus:ring-inset focus:ring-indigo-600 sm:text-sm sm:leading-6"
530
- >6.90</div>
531
- </div>
532
- <div class="relative">
533
- <label
534
- class="absolute -top-2 left-2 inline-block bg-white px-1 text-xs font-medium text-gray-900"
535
- >
536
- Total Size (GB)
537
- </label>
538
- <div
539
- id="resulttotal"
540
- class="block w-full rounded-md border-0 p-3 text-gray-900 shadow-sm ring-1 ring-inset ring-gray-300 placeholder:text-gray-400 focus:ring-2 focus:ring-inset focus:ring-indigo-600 sm:text-sm sm:leading-6"
541
- >420.69</div>
542
- </div>
543
- <div class="relative">
544
- <label
545
- class="absolute -top-2 left-2 inline-block bg-white px-1 text-xs font-medium text-gray-900"
546
- >
547
- Layer size (GB)
548
- </label>
549
- <div
550
- id="layersize"
551
- class="block w-full rounded-md border-0 p-3 text-gray-900 shadow-sm ring-1 ring-inset ring-gray-300 placeholder:text-gray-400 focus:ring-2 focus:ring-inset focus:ring-indigo-600 sm:text-sm sm:leading-6"
552
- >42.69</div>
553
- </div>
554
- <div class="relative">
555
- <label
556
- class="absolute -top-2 left-2 inline-block bg-white px-1 text-xs font-medium text-gray-900"
557
- >
558
- Layers offloaded to GPU (out of total)
559
- </label>
560
- <div
561
- id="layersoffload"
562
- class="block w-full rounded-md border-0 p-3 text-gray-900 shadow-sm ring-1 ring-inset ring-gray-300 placeholder:text-gray-400 focus:ring-2 focus:ring-inset focus:ring-indigo-600 sm:text-sm sm:leading-6"
563
- >42</div>
564
- </div>
565
- </div>
566
- </div>
567
- </div>
568
- <script
569
- src="https://cdn.jsdelivr.net/npm/alpinejs@3.x.x/dist/cdn.min.js"
570
- ></script>
571
- <script defer>
572
- calculateSizes("gguf", "vram")
573
- </script>
574
- </body>
575
- </html>
 
1
+ <!DOCTYPE html>
2
+ <html lang="en">
3
+
4
+ <head>
5
+ <meta charset="UTF-8" />
6
+ <meta name="viewport" content="width=device-width, initial-scale=1.0" />
7
+ <script>
8
+ function strToHtml(str) {
9
+ let parser = new DOMParser();
10
+ return parser.parseFromString(str, "text/html");
11
+ }
12
+
13
+ //Short, jQuery-independent function to read html table and write them into an Array.
14
+ //Kudos to RobG at StackOverflow
15
+ function tableToObj(table) {
16
+ var rows = table.rows;
17
+ var propCells = rows[0].cells;
18
+ var propNames = [];
19
+ var results = [];
20
+ var obj, row, cells;
21
+
22
+ // Use the first row for the property names
23
+ // Could use a header section but result is the same if
24
+ // there is only one header row
25
+ for (var i = 0, iLen = propCells.length; i < iLen; i++) {
26
+ propNames.push(
27
+ (propCells[i].textContent || propCells[i].innerText).trim()
28
+ );
29
+ }
30
+
31
+ // Use the rows for data
32
+ // Could use tbody rows here to exclude header & footer
33
+ // but starting from 1 gives required result
34
+ for (var j = 1, jLen = rows.length; j < jLen; j++) {
35
+ cells = rows[j].cells;
36
+ obj = {};
37
+
38
+ for (var k = 0; k < iLen; k++) {
39
+ obj[propNames[k]] = (
40
+ cells[k].textContent || cells[k].innerText
41
+ ).trim();
42
+ }
43
+ results.push(obj);
44
+ }
45
+ return results;
46
+ }
47
+
48
+ function formatGpu(gpus) {
49
+ return gpus.map(
50
+ (g) => `${g["Product Name"]} - ${g["Memory"].split(",")[0]}`
51
+ );
52
+ }
53
+
54
+ const gguf_quants = {
55
+ "IQ1_S": 1.56,
56
+ "IQ1_M": 1.75,
57
+ "IQ2_XXS": 2.06,
58
+ "IQ2_XS": 2.31,
59
+ "IQ2_S": 2.5,
60
+ "IQ3_XXS": 3.06,
61
+ "IQ3_XS": 3.3,
62
+ "IQ3_S": 3.44,
63
+ "IQ3_M": 3.66,
64
+ "Q2_K": 3.35,
65
+ "Q3_K_S": 3.5,
66
+ "Q3_K_M": 3.91,
67
+ "Q3_K_L": 4.27,
68
+ "IQ4_XS": 4.25,
69
+ "Q4_0": 4.55,
70
+ "Q4_K_S": 4.58,
71
+ "Q4_K_M": 4.85,
72
+ "Q5_0": 5.54,
73
+ "Q5_K_S": 5.54,
74
+ "Q5_K_M": 5.69,
75
+ "Q6_K": 6.59,
76
+ "Q8_0": 8.5,
77
+ }
78
+
79
+ function sanitize(string) {
80
+ const map = {
81
+ '&': '&amp;',
82
+ '<': '&lt;',
83
+ '>': '&gt;',
84
+ '"': '&quot;',
85
+ "'": '&#x27;',
86
+ "/": '&#x2F;',
87
+ };
88
+ const reg = /[&<>"'/]/ig;
89
+ return string.replace(reg, (match) => (map[match]));
90
+ }
91
+
92
+ async function modelConfig(hf_model) {
93
+ let config = {}
94
+
95
+ try {
96
+ config = await fetch(
97
+ `https://huggingface.co/${hf_model}/raw/main/config.json`
98
+ ).then(r => r.json());
99
+ } catch (err) {
100
+ alert(sanitize(err));
101
+ return config;
102
+ }
103
+
104
+ let model_size = 0
105
+ try {
106
+ model_size = (await fetch(`https://huggingface.co/${hf_model}/resolve/main/model.safetensors.index.json`).then(r => r.json()))["metadata"]["total_size"] / 2
107
+ if (isNaN(model_size)) {
108
+ throw new Erorr("no size in safetensors metadata")
109
+ }
110
+ } catch (e) {
111
+ try {
112
+ model_size = (await fetch(`https://huggingface.co/${hf_model}/resolve/main/pytorch_model.bin.index.json`).then(r => r.json()))["metadata"]["total_size"] / 2
113
+ if (isNaN(model_size)) {
114
+ throw new Erorr("no size in pytorch metadata")
115
+ }
116
+ } catch {
117
+ let model_page = await fetch(
118
+ "https://corsproxy.io/?" + encodeURIComponent(`https://huggingface.co/${hf_model}`)
119
+ ).then(r => r.text())
120
+ let el = document.createElement('html');
121
+ el.innerHTML = model_page
122
+ let params_el = el.querySelector('div[data-target="ModelSafetensorsParams"]')
123
+ if (params_el !== null) {
124
+ model_size = JSON.parse(params_el.attributes.getNamedItem("data-props").value)["safetensors"]["total"]
125
+ } else {
126
+ params_el = el.querySelector('div[data-target="ModelHeader"]')
127
+ model_size = JSON.parse(params_el.attributes.getNamedItem("data-props").value)["model"]["safetensors"]["total"]
128
+ }
129
+ }
130
+ }
131
+ config.parameters = model_size
132
+ return config
133
+ }
134
+
135
+ function inputBuffer(context = 8192, model_config, bsz = 512) {
136
+ /* Calculation taken from github:ggerganov/llama.cpp/llama.cpp:11248
137
+ ctx->inp_tokens = ggml_new_tensor_1d(ctx->ctx_input, GGML_TYPE_I32, cparams.n_batch);
138
+ ctx->inp_embd = ggml_new_tensor_2d(ctx->ctx_input, GGML_TYPE_F32, hparams.n_embd, cparams.n_batch);
139
+ ctx->inp_pos = ggml_new_tensor_1d(ctx->ctx_input, GGML_TYPE_I32, cparams.n_batch);
140
+ ctx->inp_KQ_mask = ggml_new_tensor_2d(ctx->ctx_input, GGML_TYPE_F32, cparams.n_ctx, cparams.n_batch);
141
+ ctx->inp_K_shift = ggml_new_tensor_1d(ctx->ctx_input, GGML_TYPE_I32, cparams.n_ctx);
142
+ ctx->inp_sum = ggml_new_tensor_2d(ctx->ctx_input, GGML_TYPE_F32, 1, cparams.n_batch);
143
+
144
+ n_embd is hidden size (github:ggeranov/llama.cpp/convert.py:248)
145
+ */
146
+ const inp_tokens = bsz
147
+ const inp_embd = model_config["hidden_size"] * bsz
148
+ const inp_pos = bsz
149
+ const inp_KQ_mask = context * bsz
150
+ const inp_K_shift = context
151
+ const inp_sum = bsz
152
+
153
+ return inp_tokens + inp_embd + inp_pos + inp_KQ_mask + inp_K_shift + inp_sum
154
+ }
155
+
156
+ function computeBuffer(context = 8192, model_config, bsz = 512) {
157
+ if (bsz != 512) {
158
+ alert("batch size other than 512 is currently not supported for the compute buffer, using batchsize 512 for compute buffer calculation, end result result will be an overestimatition")
159
+ }
160
+ return (context / 1024 * 2 + 0.75) * model_config["num_attention_heads"] * 1024 * 1024
161
+ }
162
+
163
+ function kvCache(context = 8192, model_config, cache_bit = 16) {
164
+ const n_gqa = model_config["num_attention_heads"] / model_config["num_key_value_heads"]
165
+ const n_embd_gqa = model_config["hidden_size"] / n_gqa
166
+ const n_elements = n_embd_gqa * (model_config["num_hidden_layers"] * context)
167
+ const size = 2 * n_elements
168
+ return size * (cache_bit / 8)
169
+ }
170
+
171
+ function contextSize(context = 8192, model_config, bsz = 512, cache_bit = 16) {
172
+ return Number.parseFloat((inputBuffer(context, model_config, bsz) + kvCache(context, model_config, cache_bit) + computeBuffer(context, model_config, bsz)).toFixed(2))
173
+ }
174
+
175
+ function modelSize(model_config, bpw = 4.5) {
176
+ return Number.parseFloat((model_config["parameters"] * bpw / 8).toFixed(2))
177
+ }
178
+
179
+ async function calculateSizes(format, context_loc) {
180
+
181
+ format = "gguf"
182
+
183
+ try {
184
+ const model_config = await modelConfig(document.getElementById("modelsearch").value)
185
+ const context = parseInt(document.getElementById("contextsize").value)
186
+ let bsz = 512
187
+ let cache_bit = 16
188
+ let bpw = 0
189
+ if (format === "gguf") {
190
+ bsz = parseInt(document.getElementById("batchsize").value)
191
+ bpw = gguf_quants[document.getElementById("quantsize").innerText]
192
+
193
+ } else if (format == "exl2") {
194
+ cache_bit = Number.parseInt(document.getElementById("kvCache").value)
195
+ bpw = Number.parseFloat(document.getElementById("bpw").value)
196
+ }
197
+
198
+ const model_size = modelSize(model_config, bpw)
199
+ const context_size = contextSize(context, model_config, bsz, cache_bit)
200
+ const total_size = ((model_size + context_size) / 2 ** 30)
201
+ document.getElementById("resultmodel").innerText = (model_size / 2 ** 30).toFixed(2)
202
+ document.getElementById("resultcontext").innerText = (context_size / 2 ** 30).toFixed(2)
203
+ const result_total_el = document.getElementById("resulttotal");
204
+ result_total_el.innerText = total_size.toFixed(2)
205
+
206
+ const allocated_vram = Number.parseInt(document.getElementById("maxvram").value);
207
+ const vram = allocated_vram
208
+ if (vram - total_size > 0.5) {
209
+ result_total_el.style.backgroundColor = "#bef264"
210
+ } else if (vram - total_size > 0) {
211
+ result_total_el.style.backgroundColor = "#facc15"
212
+ } else {
213
+ result_total_el.style.backgroundColor = "#ef4444"
214
+ }
215
+
216
+ const layer_size = ((model_size / 2 ** 30) / model_config["num_hidden_layers"])
217
+ const layer_size_el = document.getElementById("layersize");
218
+ layer_size_el.innerText = layer_size.toFixed(2)
219
+
220
+ const context_dealloc = context_loc === "vram" ? (context_size / 2 ** 30) : 0;
221
+ const layers_offload = Math.floor((allocated_vram - context_dealloc) / layer_size)
222
+
223
+ const layers_offload_el = document.getElementById("layersoffload");
224
+ layers_offload_el.innerText = `${layers_offload > model_config["num_hidden_layers"] ? model_config["num_hidden_layers"] : Math.max(0, layers_offload)}/${model_config["num_hidden_layers"]}`
225
+
226
+ } catch (e) {
227
+ alert(e);
228
+ }
229
+ }
230
+ </script>
231
+ <link href="./styles.css" rel="stylesheet">
232
+ <title>Can I split it? - GGUF VRAM Calculator</title>
233
+ </head>
234
+
235
+ <body class="p-8">
236
+ <div x-data="{ format: 'gguf', context_loc: 'vram' }" class="flex flex-col max-h-screen items-center mt-16 gap-10">
237
+ <div style="text-align: center;">
238
+ <h1 class="text-xl font-semibold leading-6 text-gray-900">
239
+ GGUF Model, Can I split it?
240
+ </h1>
241
+ <h3 class="font-semibold leading-6 text-gray-900">
242
+ Based on <a href="https://huggingface.co/NyxKrage" style="color: blue;">NyxKrage</a>'s <a
243
+ href="https://huggingface.co/spaces/NyxKrage/LLM-Model-VRAM-Calculator" style="color: blue;">LLM VRAM
244
+ calculator</a>
245
+ </h3>
246
+ </div>
247
+ <div class="flex flex-col gap-10">
248
+ <div class="w-auto flex flex-col gap-4">
249
+ <div class="relative">
250
+ <label for="maxvram"
251
+ class="absolute -top-2 left-2 inline-block bg-white px-1 text-xs font-medium text-gray-900">
252
+ Max Allocated VRAM
253
+ </label>
254
+ <input value="24" type="number" name="maxvram" id="maxvram" step="1"
255
+ class="block w-full rounded-md border-0 p-3 text-gray-900 shadow-sm ring-1 ring-inset ring-gray-300 placeholder:text-gray-400 focus:ring-2 focus:ring-inset focus:ring-indigo-600 sm:text-sm sm:leading-6" />
256
+ </div>
257
+
258
+ <!-- Model Selector -->
259
+
260
+
261
+ <div class="flex flex-row gap-4 relative">
262
+ <label for="contextsize"
263
+ class="absolute -top-2 left-2 inline-block bg-white px-1 text-xs font-medium text-gray-900">
264
+ Model (unquantized)
265
+ </label>
266
+ <div
267
+ class="block w-full rounded-md border-0 p-3 text-gray-900 shadow-sm ring-1 ring-inset ring-gray-300 placeholder:text-gray-400 focus:ring-2 focus:ring-inset focus:ring-indigo-600 sm:text-sm sm:leading-6"
268
+ x-data="{
269
+ open: false,
270
+ value: 'Nexusflow/Starling-LM-7B-beta',
271
+ results: null,
272
+ toggle() {
273
+ if (this.open) {
274
+ return this.close()
275
+ }
276
+
277
+ this.$refs.input.focus()
278
+
279
+ this.open = true
280
+ },
281
+ close(focusAfter) {
282
+ if (! this.open) return
283
+
284
+ this.open = false
285
+
286
+ focusAfter && focusAfter.focus()
287
+ }
288
+ }" x-on:keydown.escape.prevent.stop="close($refs.input)" x-id="['model-typeahead']"
289
+ class="relative">
290
+ <!-- Input -->
291
+ <input id="modelsearch" x-ref="input" x-on:click="toggle()"
292
+ @keypress.debounce.150ms="results = (await
293
+ fetch('https://huggingface.co/api/quicksearch?type=model&q=' +
294
+ encodeURIComponent(value)).then(r => r.json())).models.filter(m => !m.id.includes('GGUF') && !m.id.includes('AWQ') && !m.id.includes('GPTQ') && !m.id.includes('exl2'));"
295
+ :aria-expanded="open" :aria-controls="$id('model-typeahead')" x-model="value"
296
+ class="flex justify-between items-center gap-2 w-full" />
297
+
298
+ <!-- Panel -->
299
+ <div x-ref="panel" x-show="open" x-transition.origin.top.left x-on:click.outside="close($refs.input)"
300
+ :id="$id('model-typeahead')" style="display: none"
301
+ class="absolute left-0 mt-4 w-full rounded-md bg-white shadow-sm ring-1 ring-inset ring-gray-300 z-10">
302
+ <template x-for="result in results">
303
+ <a @click="value = result.id; close($refs.input)" x-text="result.id"
304
+ class="flex cursor-pointer items-center gap-2 w-full first-of-type:rounded-t-md last-of-type:rounded-b-md px-4 py-2.5 text-left text-sm hover:bg-gray-500/5 disabled:text-gray-500"></a>
305
+ </template>
306
+ </div>
307
+ </div>
308
+ </div>
309
+
310
+
311
+ <!-- Context Size Selector -->
312
+ <div class="relative">
313
+ <label for="contextsize"
314
+ class="absolute -top-2 left-2 inline-block bg-white px-1 text-xs font-medium text-gray-900">
315
+ Context Size
316
+ </label>
317
+ <input value="8192" type="number" name="contextsize" id="contextsize" step="1024"
318
+ class="block w-full rounded-md border-0 p-3 text-gray-900 shadow-sm ring-1 ring-inset ring-gray-300 placeholder:text-gray-400 focus:ring-2 focus:ring-inset focus:ring-indigo-600 sm:text-sm sm:leading-6" />
319
+ </div>
320
+
321
+ <div class="relative">
322
+ <label class="absolute -top-2 left-2 inline-block bg-white px-1 text-xs font-medium text-gray-900">Context
323
+ offloaded to</label>
324
+ <fieldset x-model="context_loc"
325
+ class="block w-full rounded-md border-0 p-3 text-gray-900 shadow-sm ring-1 ring-inset ring-gray-300 placeholder:text-gray-400 focus:ring-2 focus:ring-inset focus:ring-indigo-600 sm:text-sm sm:leading-6">
326
+ <legend class="sr-only">Context location</legend>
327
+ <div class="space-y-4 sm:flex sm:items-center sm:space-x-10 sm:space-y-0">
328
+ <div class="flex items-center">
329
+ <input id="context-vram" name="context-allocation" type="radio" value="vram" checked
330
+ class="h-4 w-4 border-gray-300 text-indigo-600 focus:ring-indigo-600" />
331
+ <label for="context-vram" class="ml-3 block text-sm font-medium leading-6 text-gray-900">VRAM</label>
332
+ </div>
333
+ <div class="flex items-center">
334
+ <input id="context-ram" name="context-allocation" type="radio" value="ram"
335
+ class="h-4 w-4 border-gray-300 text-indigo-600 focus:ring-indigo-600" />
336
+ <label for="context-ram" class="ml-3 block text-sm font-medium leading-6 text-gray-900">RAM</label>
337
+ </div>
338
+ </div>
339
+ </fieldset>
340
+ </div>
341
+
342
+ <!-- GGUF Options -->
343
+ <div x-show="format === 'gguf'" class="relative">
344
+ <div class="flex flex-row gap-4">
345
+ <label for="contextsize"
346
+ class="absolute -top-2 left-2 inline-block bg-white px-1 text-xs font-medium text-gray-900">
347
+ Quantization Size
348
+ </label>
349
+ <div
350
+ class="block w-full rounded-md border-0 p-3 text-gray-900 shadow-sm ring-1 ring-inset ring-gray-300 placeholder:text-gray-400 focus:ring-2 focus:ring-inset focus:ring-indigo-600 sm:text-sm sm:leading-6"
351
+ x-data="{
352
+ open: false,
353
+ value: '',
354
+ toggle() {
355
+ if (this.open) {
356
+ return this.close()
357
+ }
358
+
359
+ this.$refs.button.focus()
360
+
361
+ this.open = true
362
+ },
363
+ close(focusAfter) {
364
+ if (! this.open) return
365
+
366
+ this.open = false
367
+
368
+ focusAfter && focusAfter.focus()
369
+ }
370
+ }" x-on:keydown.escape.prevent.stop="close($refs.button)" x-id="['dropdown-button']" class="relative">
371
+ <!-- Button -->
372
+ <button x-ref="button" x-on:click="toggle()" :aria-expanded="open" :aria-controls="$id('dropdown-button')"
373
+ type="button" id="quantsize" x-text="value.length === 0 ? 'Q4_K_S' : value"
374
+ class="flex justify-between items-center gap-2 w-full">
375
+ Q4_K_S
376
+
377
+ <!-- Heroicon: chevron-down -->
378
+ <svg xmlns="http://www.w3.org/2000/svg" class="h-5 w-5 text-gray-400" viewBox="0 0 20 20"
379
+ fill="currentColor">
380
+ <path fill-rule="evenodd"
381
+ d="M5.293 7.293a1 1 0 011.414 0L10 10.586l3.293-3.293a1 1 0 111.414 1.414l-4 4a1 1 0 01-1.414 0l-4-4a1 1 0 010-1.414z"
382
+ clip-rule="evenodd" />
383
+ </svg>
384
+ </button>
385
+
386
+ <!-- Panel -->
387
+ <div x-data="{ quants: [
388
+ 'IQ1_S',
389
+ 'IQ1_M',
390
+ 'IQ2_XXS',
391
+ 'IQ2_XS',
392
+ 'IQ2_S',
393
+ 'IQ3_XXS',
394
+ 'IQ3_XS',
395
+ 'IQ3_S',
396
+ 'IQ3_M',
397
+ 'Q2_K',
398
+ 'Q3_K_S',
399
+ 'Q3_K_M',
400
+ 'Q3_K_L',
401
+ 'IQ4_XS',
402
+ 'Q4_0',
403
+ 'Q4_K_S',
404
+ 'Q4_K_M',
405
+ 'Q5_0',
406
+ 'Q5_K_S',
407
+ 'Q5_K_M',
408
+ 'Q6_K',
409
+ 'Q8_0'
410
+ ]}" x-ref="panel" x-show="open" x-transition.origin.top.left x-on:click.outside="close($refs.button)"
411
+ :id="$id('dropdown-button')" style="display: none"
412
+ class="absolute left-0 mt-4 w-full rounded-md bg-white shadow-sm ring-1 ring-inset ring-gray-300 z-10">
413
+ <template x-for="quant in quants">
414
+ <a @click="value = quant; close($refs.button)" x-text="quant"
415
+ class="flex cursor-pointer items-center gap-2 w-full first-of-type:rounded-t-md last-of-type:rounded-b-md px-4 py-2.5 text-left text-sm hover:bg-gray-500/5 disabled:text-gray-500"></a>
416
+ </template>
417
+ </div>
418
+ </div>
419
+ <div class="relative">
420
+ <label for="batchsize"
421
+ class="absolute -top-2 left-2 inline-block bg-white px-1 text-xs font-medium text-gray-900">
422
+ Batch Size
423
+ </label>
424
+ <input value="512" type="number" step="128" id="batchsize"
425
+ class="block w-full rounded-md border-0 p-3 text-gray-900 shadow-sm ring-1 ring-inset ring-gray-300 placeholder:text-gray-400 focus:ring-2 focus:ring-inset focus:ring-indigo-600 sm:text-sm sm:leading-6" />
426
+ </div>
427
+ </div>
428
+ </div>
429
+ <button type="button"
430
+ class="rounded-md bg-slate-800 px-3 py-2 text-sm font-semibold text-white shadow-sm hover:bg-slate-700 focus-visible:outline focus-visible:outline-2 focus-visible:outline-offset-2 focus-visible:outline-indigo-600"
431
+ @click="calculateSizes(format, context_loc)">
432
+ Submit
433
+ </button>
434
+ </div>
435
+ <div class="w-auto flex flex-col gap-4">
436
+ <div class="relative">
437
+ <label class="absolute -top-2 left-2 inline-block bg-white px-1 text-xs font-medium text-gray-900">
438
+ Model Size (GB)
439
+ </label>
440
+ <div id="resultmodel"
441
+ class="block w-full rounded-md border-0 p-3 text-gray-900 shadow-sm ring-1 ring-inset ring-gray-300 placeholder:text-gray-400 focus:ring-2 focus:ring-inset focus:ring-indigo-600 sm:text-sm sm:leading-6">
442
+ 4.20</div>
443
+ </div>
444
+ <div class="relative">
445
+ <label class="absolute -top-2 left-2 inline-block bg-white px-1 text-xs font-medium text-gray-900">
446
+ Context Size (GB)
447
+ </label>
448
+ <div id="resultcontext"
449
+ class="block w-full rounded-md border-0 p-3 text-gray-900 shadow-sm ring-1 ring-inset ring-gray-300 placeholder:text-gray-400 focus:ring-2 focus:ring-inset focus:ring-indigo-600 sm:text-sm sm:leading-6">
450
+ 6.90</div>
451
+ </div>
452
+ <div class="relative">
453
+ <label class="absolute -top-2 left-2 inline-block bg-white px-1 text-xs font-medium text-gray-900">
454
+ Total Size (GB)
455
+ </label>
456
+ <div id="resulttotal"
457
+ class="block w-full rounded-md border-0 p-3 text-gray-900 shadow-sm ring-1 ring-inset ring-gray-300 placeholder:text-gray-400 focus:ring-2 focus:ring-inset focus:ring-indigo-600 sm:text-sm sm:leading-6">
458
+ 420.69</div>
459
+ </div>
460
+ <div class="relative">
461
+ <label class="absolute -top-2 left-2 inline-block bg-white px-1 text-xs font-medium text-gray-900">
462
+ Layer size (GB)
463
+ </label>
464
+ <div id="layersize"
465
+ class="block w-full rounded-md border-0 p-3 text-gray-900 shadow-sm ring-1 ring-inset ring-gray-300 placeholder:text-gray-400 focus:ring-2 focus:ring-inset focus:ring-indigo-600 sm:text-sm sm:leading-6">
466
+ 42.69</div>
467
+ </div>
468
+ <div class="relative">
469
+ <label class="absolute -top-2 left-2 inline-block bg-white px-1 text-xs font-medium text-gray-900">
470
+ Layers offloaded to GPU (out of total)
471
+ </label>
472
+ <div id="layersoffload"
473
+ class="block w-full rounded-md border-0 p-3 text-gray-900 shadow-sm ring-1 ring-inset ring-gray-300 placeholder:text-gray-400 focus:ring-2 focus:ring-inset focus:ring-indigo-600 sm:text-sm sm:leading-6">
474
+ 42</div>
475
+ </div>
476
+ </div>
477
+ </div>
478
+ </div>
479
+ <script src="https://cdn.jsdelivr.net/npm/alpinejs@3.x.x/dist/cdn.min.js"></script>
480
+ <script defer>
481
+ calculateSizes("gguf", "vram")
482
+ </script>
483
+ </body>
484
+
485
+ </html>