# convert PyTorch model | |
# LLaMA 3 | |
# convert --vocab-type bpe --outtype f16 --outfile conv.bin . | |
# LLaMA 2 | |
# convert --outtype f16 --outfile conv.bin . | |
# define TheBloke quants and create them | |
declare -a quants=(Q2_K Q3_K_S Q3_K_M Q3_K_L Q4_0 Q4_K_S Q4_K_M Q5_0 Q5_K_S Q5_K_M Q6_K Q8_0) | |
currpath=${PWD##*/} | |
basefile=$(ls -Art *.F16.gguf) | |
for quant in "${quants[@]}"; | |
do | |
# basic GGUF | |
quantize $basefile ${currpath::-5}.$quant.gguf $quant | |
# imatrix GGUF | |
# quantize --imatrix model.imatrix $basefile ${currpath::-5}.$quant.gguf $quant | |
done |