piotr25691
/

llama-3-cat-8b-instruct-v1-gguf

Model card Files Files and versions Community

llama-3-cat-8b-instruct-v1-gguf / conv.sh

piotr25691's picture

conversion with imatrix

050948f verified about 1 month ago

raw history blame contribute delete

No virus

567 Bytes

	#!/usr/bin/env bash

	# convert PyTorch model
	# LLaMA 3
	# convert --vocab-type bpe --outtype f16 --outfile conv.bin .

	# LLaMA 2
	# convert --outtype f16 --outfile conv.bin .

	# define TheBloke quants and create them
	declare -a quants=(Q2_K Q3_K_S Q3_K_M Q3_K_L Q4_0 Q4_K_S Q4_K_M Q5_0 Q5_K_S Q5_K_M Q6_K Q8_0)
	currpath=${PWD##*/}
	basefile=$(ls -Art *.F16.gguf)

	for quant in "${quants[@]}";
	do
	# basic GGUF
	quantize $basefile ${currpath::-5}.$quant.gguf $quant
	# imatrix GGUF
	# quantize --imatrix model.imatrix $basefile ${currpath::-5}.$quant.gguf $quant
	done