File size: 2,219 Bytes
bc00f19 85a696e 7b12d2f bc00f19 eac53d7 bc00f19 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 |
#!/usr/bin/env bash
set -eux
cd "$(dirname "$0")"
MODEL_DIR="bench-TriLMs-models"
LLAMA_CPP_PATH="."
sizes=("1.5" "2.4" "3.9")
types=("TQ1_0" "TQ2_0" "Q4_K_M" "Q8_0" "F16" "BF16")
gputypes=("TQ2_0" "Q4_K_M" "Q8_0" "F16")
function gather_models() {
echo Gather the models
if [ ! -d "$MODEL_DIR" ]; then
mkdir -p -- "$MODEL_DIR"
fi
(
cd "$MODEL_DIR"
for sz in "${sizes[@]}"; do
filename="TriLM_${sz}B_Unpacked-TQ1_0-F16.gguf"
if [ ! -f "$filename" ]; then
wget "https://huggingface.co/compilade/quant-tests/resolve/main/${filename}"
fi
done
)
}
function build_llama_cpp() {
echo Build llama.cpp for CPU
(
cd -- "$LLAMA_CPP_PATH"
if [ -d build ]; then
pwd
rm -rf build
fi
mkdir build
cd build
cmake .. "$@"
make -j llama-bench llama-quantize
)
}
function quantize() {
echo "Make all model types we'll test"
(
for sz in "${sizes[@]}"; do
for ty in "${types[@]}"; do
filenames=("$MODEL_DIR"/TriLM_"${sz}"B_Unpacked-{TQ1_0-F16,"$ty"}.gguf)
if [ ! -f "${filenames[1]}" ]; then
"$LLAMA_CPP_PATH"/build/bin/llama-quantize --allow-requantize "${filenames[@]}" "$ty"
fi
done
done
)
}
function bench() {
echo Test each model one by one for different numbers of threads
for sz in "${sizes[@]}"; do
for ty in "$@"; do
for th in 1 2 4 8; do
{
"$LLAMA_CPP_PATH"/build/bin/llama-bench -v -m "${MODEL_DIR}/TriLM_${sz}B_Unpacked-${ty}.gguf" -t "${th}" -p 512 -n 128 -r 4 -o json
printf "%s\n" ","
}
done
done
done
}
function bench_cpu() {
bench "${types[@]}" >> "$1"
}
function bench_gpu() {
bench "${gputypes[@]}" >> "$1"
}
currentTime="$(date +'%s')"
resultFile="results-${currentTime}.json"
infoFile="results-${currentTime}-info.txt"
lscpu > "$infoFile"
gather_models
build_llama_cpp -DGGML_NATIVE=ON -DGGML_CPU=ON
quantize
echo "---" >> "$infoFile"
ls -go "$MODEL_DIR" >> "$infoFile"
bench_cpu "$resultFile"
if [ -x "$(command -v nvidia-smi)" ]; then
echo GPU detected, benchark with that too.
build_llama_cpp -DGGML_NATIVE=ON -DGGML_CUDA=ON -DGGML_CUDA_F16=ON
bench_gpu "$resultFile"
fi
|