#!/usr/bin/env bash # convert PyTorch model # LLaMA 3 # convert --vocab-type bpe --outtype f16 --outfile conv.bin . # LLaMA 2 # convert --outtype f16 --outfile conv.bin . # define TheBloke quants and create them declare -a quants=(Q2_K Q3_K_S Q3_K_M Q3_K_L Q4_0 Q4_K_S Q4_K_M Q5_0 Q5_K_S Q5_K_M Q6_K Q8_0) currpath=${PWD##*/} basefile=$(ls -Art *.F16.gguf) for quant in "${quants[@]}"; do # basic GGUF quantize $basefile ${currpath::-5}.$quant.gguf $quant # imatrix GGUF # quantize --imatrix model.imatrix $basefile ${currpath::-5}.$quant.gguf $quant done