SadP0i's picture
Upload 3 files
bb71914 verified
raw
history blame
21.6 kB
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<script>
function strToHtml(str) {
let parser = new DOMParser();
return parser.parseFromString(str, "text/html");
}
//Short, jQuery-independent function to read html table and write them into an Array.
//Kudos to RobG at StackOverflow
function tableToObj(table) {
var rows = table.rows;
var propCells = rows[0].cells;
var propNames = [];
var results = [];
var obj, row, cells;
// Use the first row for the property names
// Could use a header section but result is the same if
// there is only one header row
for (var i = 0, iLen = propCells.length; i < iLen; i++) {
propNames.push(
(propCells[i].textContent || propCells[i].innerText).trim()
);
}
// Use the rows for data
// Could use tbody rows here to exclude header & footer
// but starting from 1 gives required result
for (var j = 1, jLen = rows.length; j < jLen; j++) {
cells = rows[j].cells;
obj = {};
for (var k = 0; k < iLen; k++) {
obj[propNames[k]] = (
cells[k].textContent || cells[k].innerText
).trim();
}
results.push(obj);
}
return results;
}
function formatGpu(gpus) {
return gpus.map(
(g) => `${g["Product Name"]} - ${g["Memory"].split(",")[0]}`
);
}
const gguf_quants = {
"Q2_K": 3.35,
"Q3_K_S": 3.5,
"Q3_K_M": 3.91,
"Q3_K_L": 4.27,
"Q4_0": 4.55,
"Q4_K_S": 4.58,
"Q4_K_M": 4.85,
"Q5_0": 5.54,
"Q5_K_S": 5.54,
"Q5_K_M": 5.69,
"Q6_K": 6.59,
"Q8_0": 8.5,
}
async function modelConfig(hf_model) {
let config = await fetch(
`https://huggingface.co/${hf_model}/raw/main/config.json`
).then(r => r.json())
let model_size = 0
try {
model_size = (await fetch(`https://huggingface.co/${hf_model}/resolve/main/model.safetensors.index.json`).then(r => r.json()))["metadata"]["total_size"] / 2
if (isNaN(model_size)) {
throw new Erorr("no size in safetensors metadata")
}
} catch (e) {
try {
model_size = (await fetch(`https://huggingface.co/${hf_model}/resolve/main/pytorch_model.bin.index.json`).then(r => r.json()))["metadata"]["total_size"] / 2
if (isNaN(model_size)) {
throw new Erorr("no size in pytorch metadata")
}
} catch {
let model_page = await fetch(
"https://corsproxy.io/?" + encodeURIComponent(`https://huggingface.co/${hf_model}`)
).then(r => r.text())
let el = document.createElement( 'html' );
el.innerHTML = model_page
let params_el = el.querySelector('div[data-target="ModelSafetensorsParams"]')
if (params_el !== null) {
model_size = JSON.parse(params_el.attributes.getNamedItem("data-props").value)["safetensors"]["total"]
} else {
params_el = el.querySelector('div[data-target="ModelHeader"]')
model_size = JSON.parse(params_el.attributes.getNamedItem("data-props").value)["model"]["safetensors"]["total"]
}
}
}
config.parameters = model_size
return config
}
function inputBuffer(context=8192, model_config, bsz=512) {
/* Calculation taken from github:ggerganov/llama.cpp/llama.cpp:11248
ctx->inp_tokens = ggml_new_tensor_1d(ctx->ctx_input, GGML_TYPE_I32, cparams.n_batch);
ctx->inp_embd = ggml_new_tensor_2d(ctx->ctx_input, GGML_TYPE_F32, hparams.n_embd, cparams.n_batch);
ctx->inp_pos = ggml_new_tensor_1d(ctx->ctx_input, GGML_TYPE_I32, cparams.n_batch);
ctx->inp_KQ_mask = ggml_new_tensor_2d(ctx->ctx_input, GGML_TYPE_F32, cparams.n_ctx, cparams.n_batch);
ctx->inp_K_shift = ggml_new_tensor_1d(ctx->ctx_input, GGML_TYPE_I32, cparams.n_ctx);
ctx->inp_sum = ggml_new_tensor_2d(ctx->ctx_input, GGML_TYPE_F32, 1, cparams.n_batch);
n_embd is hidden size (github:ggeranov/llama.cpp/convert.py:248)
*/
const inp_tokens = bsz
const inp_embd = model_config["hidden_size"] * bsz
const inp_pos = bsz
const inp_KQ_mask = context * bsz
const inp_K_shift = context
const inp_sum = bsz
return inp_tokens + inp_embd + inp_pos + inp_KQ_mask + inp_K_shift + inp_sum
}
function computeBuffer(context=8192, model_config, bsz=512) {
if (bsz != 512) {
alert("batch size other than 512 is currently not supported for the compute buffer, using batchsize 512 for compute buffer calculation, end result result will be an overestimatition")
}
return (context / 1024 * 2 + 0.75) * model_config["num_attention_heads"] * 1024 * 1024
}
function kvCache(context=8192, model_config, cache_bit=16) {
const n_gqa = model_config["num_attention_heads"] / model_config["num_key_value_heads"]
const n_embd_gqa = model_config["hidden_size"] / n_gqa
const n_elements = n_embd_gqa * (model_config["num_hidden_layers"] * context)
const size = 2 * n_elements
return size * (cache_bit / 8)
}
function contextSize(context=8192, model_config, bsz=512, cache_bit=16) {
return Number.parseFloat((inputBuffer(context, model_config, bsz) + kvCache(context, model_config, cache_bit) + computeBuffer(context, model_config, bsz)).toFixed(2))
}
function modelSize(model_config, bpw=4.5) {
return Number.parseFloat((model_config["parameters"] * bpw / 8).toFixed(2))
}
async function calculateSizes(format) {
format = "gguf"
try {
const model_config = await modelConfig(document.getElementById("modelsearch").value)
const context = parseInt(document.getElementById("contextsize").value)
let bsz = 512
let cache_bit = 16
let bpw = 0
if (format === "gguf") {
bsz = parseInt(document.getElementById("batchsize").value)
bpw = gguf_quants[document.getElementById("quantsize").innerText]
} else if (format == "exl2") {
cache_bit = Number.parseInt(document.getElementById("kvCache").value)
bpw = Number.parseFloat(document.getElementById("bpw").value)
}
const model_size = modelSize(model_config, bpw)
const context_size = contextSize(context, model_config, bsz, cache_bit)
const total_size = ((model_size + context_size) / 2**30)
document.getElementById("resultmodel").innerText = (model_size / 2**30).toFixed(2)
document.getElementById("resultcontext").innerText = (context_size / 2**30).toFixed(2)
const result_total_el = document.getElementById("resulttotal");
result_total_el.innerText = total_size.toFixed(2)
//const gpu = document.getElementById("gpusearch").value
// if (gpu !== "") {
// const vram = parseFloat(gpu.split("-")[1].replace("GB", "").trim())
// if (vram - total_size > 0.5) {
// result_total_el.style.backgroundColor = "#bef264"
// } else if (vram - total_size > 0) {
// result_total_el.style.backgroundColor = "#facc15"
// } else {
// result_total_el.style.backgroundColor = "#ef4444"
// }
// }
const allocated_vram = Number.parseInt(document.getElementById("maxvram").value);
const vram = allocated_vram
if (vram - total_size > 0.5) {
result_total_el.style.backgroundColor = "#bef264"
} else if (vram - total_size > 0) {
result_total_el.style.backgroundColor = "#facc15"
} else {
result_total_el.style.backgroundColor = "#ef4444"
}
const layer_size = ((model_size / 2**30) / model_config["num_hidden_layers"])
const layer_size_el = document.getElementById("layersize");
layer_size_el.innerText = layer_size.toFixed(2)
const layers_offload = Math.floor((allocated_vram - (context_size / 2**30)) / layer_size)
const layers_offload_el = document.getElementById("layersoffload");
layers_offload_el.innerText = layers_offload > model_config["num_hidden_layers"] ? model_config["num_hidden_layers"] : Math.max(0, layers_offload)
} catch(e) {
alert(e);
}
}
</script>
<link href="./styles.css" rel="stylesheet">
<title>Can I split it? - GGUF VRAM Calculator</title>
</head>
<body class="p-8">
<div x-data="{ format: 'gguf' }" class="flex flex-col max-h-screen items-center mt-16 gap-10">
<div style="text-align: center;">
<h1 class="text-xl font-semibold leading-6 text-gray-900">
GGUF Model, Can I split it?
</h1>
<h3 class="font-semibold leading-6 text-gray-900">
Based on NyxKrage's LLM VRAM calculator
</h3>
</div>
<div class="flex flex-col gap-10">
<div class="w-auto flex flex-col gap-4">
<div class="relative">
<label
for="maxvram"
class="absolute -top-2 left-2 inline-block bg-white px-1 text-xs font-medium text-gray-900"
>
Max Allocated VRAM
</label>
<input
value="24"
type="number"
name="maxvram"
id="maxvram"
step="1"
class="block w-full rounded-md border-0 p-3 text-gray-900 shadow-sm ring-1 ring-inset ring-gray-300 placeholder:text-gray-400 focus:ring-2 focus:ring-inset focus:ring-indigo-600 sm:text-sm sm:leading-6"
/>
</div>
<!-- Model Selector -->
<div class="flex flex-row gap-4 relative">
<label
for="contextsize"
class="absolute -top-2 left-2 inline-block bg-white px-1 text-xs font-medium text-gray-900"
>
Model (unquantized)
</label>
<div
class="block w-full rounded-md border-0 p-3 text-gray-900 shadow-sm ring-1 ring-inset ring-gray-300 placeholder:text-gray-400 focus:ring-2 focus:ring-inset focus:ring-indigo-600 sm:text-sm sm:leading-6"
x-data="{
open: false,
value: 'Nexusflow/Starling-LM-7B-beta',
results: null,
toggle() {
if (this.open) {
return this.close()
}
this.$refs.input.focus()
this.open = true
},
close(focusAfter) {
if (! this.open) return
this.open = false
focusAfter && focusAfter.focus()
}
}"
x-on:keydown.escape.prevent.stop="close($refs.input)"
x-id="['model-typeahead']"
class="relative"
>
<!-- Input -->
<input
id="modelsearch"
x-ref="input"
x-on:click="toggle()"
@keypress.debounce.150ms="results = (await
fetch('https://huggingface.co/api/quicksearch?type=model&q=' +
encodeURIComponent(value)).then(r => r.json())).models.filter(m => !m.id.includes('GGUF') && !m.id.includes('AWQ') && !m.id.includes('GPTQ') && !m.id.includes('exl2'));"
:aria-expanded="open"
:aria-controls="$id('model-typeahead')"
x-model="value"
class="flex justify-between items-center gap-2 w-full"
/>
<!-- Panel -->
<div
x-ref="panel"
x-show="open"
x-transition.origin.top.left
x-on:click.outside="close($refs.input)"
:id="$id('model-typeahead')"
style="display: none"
class="absolute left-0 mt-4 w-full rounded-md bg-white shadow-sm ring-1 ring-inset ring-gray-300 z-10"
>
<template x-for="result in results">
<a
@click="value = result.id; close($refs.input)"
x-text="result.id"
class="flex cursor-pointer items-center gap-2 w-full first-of-type:rounded-t-md last-of-type:rounded-b-md px-4 py-2.5 text-left text-sm hover:bg-gray-500/5 disabled:text-gray-500"
></a>
</template>
</div>
</div>
</div>
<!-- Context Size Selector -->
<div class="relative">
<label
for="contextsize"
class="absolute -top-2 left-2 inline-block bg-white px-1 text-xs font-medium text-gray-900"
>
Context Size
</label>
<input
value="8192"
type="number"
name="contextsize"
id="contextsize"
step="1024"
class="block w-full rounded-md border-0 p-3 text-gray-900 shadow-sm ring-1 ring-inset ring-gray-300 placeholder:text-gray-400 focus:ring-2 focus:ring-inset focus:ring-indigo-600 sm:text-sm sm:leading-6"
/>
</div>
<!-- GGUF Options -->
<div x-show="format === 'gguf'" class="relative">
<div class="flex flex-row gap-4">
<label
for="contextsize"
class="absolute -top-2 left-2 inline-block bg-white px-1 text-xs font-medium text-gray-900"
>
Quantization Size
</label>
<div
class="block w-full rounded-md border-0 p-3 text-gray-900 shadow-sm ring-1 ring-inset ring-gray-300 placeholder:text-gray-400 focus:ring-2 focus:ring-inset focus:ring-indigo-600 sm:text-sm sm:leading-6"
x-data="{
open: false,
value: '',
toggle() {
if (this.open) {
return this.close()
}
this.$refs.button.focus()
this.open = true
},
close(focusAfter) {
if (! this.open) return
this.open = false
focusAfter && focusAfter.focus()
}
}"
x-on:keydown.escape.prevent.stop="close($refs.button)"
x-id="['dropdown-button']"
class="relative"
>
<!-- Button -->
<button
x-ref="button"
x-on:click="toggle()"
:aria-expanded="open"
:aria-controls="$id('dropdown-button')"
type="button"
id="quantsize"
x-text="value.length === 0 ? 'Q4_K_S' : value"
class="flex justify-between items-center gap-2 w-full"
>
Q4_K_S
<!-- Heroicon: chevron-down -->
<svg
xmlns="http://www.w3.org/2000/svg"
class="h-5 w-5 text-gray-400"
viewBox="0 0 20 20"
fill="currentColor"
>
<path
fill-rule="evenodd"
d="M5.293 7.293a1 1 0 011.414 0L10 10.586l3.293-3.293a1 1 0 111.414 1.414l-4 4a1 1 0 01-1.414 0l-4-4a1 1 0 010-1.414z"
clip-rule="evenodd"
/>
</svg>
</button>
<!-- Panel -->
<div
x-data="{ quants: [
'Q3_K_S',
'Q3_K_M',
'Q3_K_L',
'Q4_0',
'Q4_K_S',
'Q4_K_M',
'Q5_0',
'Q5_K_S',
'Q5_K_M',
'Q6_K',
'Q8_0'
]}"
x-ref="panel"
x-show="open"
x-transition.origin.top.left
x-on:click.outside="close($refs.button)"
:id="$id('dropdown-button')"
style="display: none"
class="absolute left-0 mt-4 w-full rounded-md bg-white shadow-sm ring-1 ring-inset ring-gray-300 z-10"
>
<template x-for="quant in quants">
<a
@click="value = quant; close($refs.button)"
x-text="quant"
class="flex cursor-pointer items-center gap-2 w-full first-of-type:rounded-t-md last-of-type:rounded-b-md px-4 py-2.5 text-left text-sm hover:bg-gray-500/5 disabled:text-gray-500"
></a>
</template>
</div>
</div>
<div class="relative">
<label
for="batchsize"
class="absolute -top-2 left-2 inline-block bg-white px-1 text-xs font-medium text-gray-900"
>
Batch Size
</label>
<input
value="512"
type="number"
step="128"
id="batchsize"
class="block w-full rounded-md border-0 p-3 text-gray-900 shadow-sm ring-1 ring-inset ring-gray-300 placeholder:text-gray-400 focus:ring-2 focus:ring-inset focus:ring-indigo-600 sm:text-sm sm:leading-6"
/>
</div>
</div>
</div>
<button
type="button"
class="rounded-md bg-slate-800 px-3 py-2 text-sm font-semibold text-white shadow-sm hover:bg-slate-700 focus-visible:outline focus-visible:outline-2 focus-visible:outline-offset-2 focus-visible:outline-indigo-600"
@click="calculateSizes(format)"
>
Submit
</button>
</div>
<div class="w-auto flex flex-col gap-4">
<div class="relative">
<label
class="absolute -top-2 left-2 inline-block bg-white px-1 text-xs font-medium text-gray-900"
>
Model Size (GB)
</label>
<div
id="resultmodel"
class="block w-full rounded-md border-0 p-3 text-gray-900 shadow-sm ring-1 ring-inset ring-gray-300 placeholder:text-gray-400 focus:ring-2 focus:ring-inset focus:ring-indigo-600 sm:text-sm sm:leading-6"
>4.20</div>
</div>
<div class="relative">
<label
class="absolute -top-2 left-2 inline-block bg-white px-1 text-xs font-medium text-gray-900"
>
Context Size (GB)
</label>
<div
id="resultcontext"
class="block w-full rounded-md border-0 p-3 text-gray-900 shadow-sm ring-1 ring-inset ring-gray-300 placeholder:text-gray-400 focus:ring-2 focus:ring-inset focus:ring-indigo-600 sm:text-sm sm:leading-6"
>6.90</div>
</div>
<div class="relative">
<label
class="absolute -top-2 left-2 inline-block bg-white px-1 text-xs font-medium text-gray-900"
>
Total Size (GB)
</label>
<div
id="resulttotal"
class="block w-full rounded-md border-0 p-3 text-gray-900 shadow-sm ring-1 ring-inset ring-gray-300 placeholder:text-gray-400 focus:ring-2 focus:ring-inset focus:ring-indigo-600 sm:text-sm sm:leading-6"
>420.69</div>
</div>
<div class="relative">
<label
class="absolute -top-2 left-2 inline-block bg-white px-1 text-xs font-medium text-gray-900"
>
Layer size (GB)
</label>
<div
id="layersize"
class="block w-full rounded-md border-0 p-3 text-gray-900 shadow-sm ring-1 ring-inset ring-gray-300 placeholder:text-gray-400 focus:ring-2 focus:ring-inset focus:ring-indigo-600 sm:text-sm sm:leading-6"
>42.69</div>
</div>
<div class="relative">
<label
class="absolute -top-2 left-2 inline-block bg-white px-1 text-xs font-medium text-gray-900"
>
Layers offloaded to GPU
</label>
<div
id="layersoffload"
class="block w-full rounded-md border-0 p-3 text-gray-900 shadow-sm ring-1 ring-inset ring-gray-300 placeholder:text-gray-400 focus:ring-2 focus:ring-inset focus:ring-indigo-600 sm:text-sm sm:leading-6"
>42</div>
</div>
</div>
</div>
</div>
<script
src="https://cdn.jsdelivr.net/npm/alpinejs@3.x.x/dist/cdn.min.js"
></script>
<script defer>
calculateSizes("gguf")
</script>
</body>
</html>