piotr25691's picture
conversion with imatrix
050948f verified
#!/usr/bin/env bash
# convert PyTorch model
# LLaMA 3
# convert --vocab-type bpe --outtype f16 --outfile conv.bin .
# LLaMA 2
# convert --outtype f16 --outfile conv.bin .
# define TheBloke quants and create them
declare -a quants=(Q2_K Q3_K_S Q3_K_M Q3_K_L Q4_0 Q4_K_S Q4_K_M Q5_0 Q5_K_S Q5_K_M Q6_K Q8_0)
currpath=${PWD##*/}
basefile=$(ls -Art *.F16.gguf)
for quant in "${quants[@]}";
do
# basic GGUF
quantize $basefile ${currpath::-5}.$quant.gguf $quant
# imatrix GGUF
# quantize --imatrix model.imatrix $basefile ${currpath::-5}.$quant.gguf $quant
done