|
#!/bin/bash |
|
set -ex |
|
models= |
|
mode="int8" |
|
folder="tmp" |
|
num_device=1 |
|
mode_args="" |
|
device_args="" |
|
quantize_args="--quantize W8BF16" |
|
addr_args="" |
|
name="" |
|
num_layers= |
|
out_model=$name.bmodel |
|
seq_length= |
|
guess_len=1 |
|
hidden_size= |
|
|
|
while [[ $# -gt 0 ]]; do |
|
key="$1" |
|
|
|
case $key in |
|
--mode) |
|
mode="$2" |
|
shift 2 |
|
;; |
|
--num_device) |
|
num_device="$2" |
|
shift 2 |
|
;; |
|
--name) |
|
name="$2" |
|
shift 2 |
|
;; |
|
--addr_mode) |
|
addr_mode="$2" |
|
shift 2 |
|
;; |
|
--seq_length) |
|
seq_length="$2" |
|
shift 2 |
|
;; |
|
--decode_mode) |
|
decode_mode="$2" |
|
shift 2 |
|
;; |
|
*) |
|
echo "Invalid option: $key" >&2 |
|
exit 1 |
|
;; |
|
:) |
|
echo "Option -$OPTARG requires an argument." >&2 |
|
exit 1 |
|
;; |
|
esac |
|
done |
|
|
|
if [[ -z "$seq_length" ]]; then |
|
echo "Error: --seq_length is required." >&2 |
|
exit 1 |
|
fi |
|
|
|
if [ "$name" = "qwen-1_8b" ]; then |
|
num_layers=23 |
|
hidden_size=2048 |
|
echo "Compile Qwen-1_8B" |
|
elif [ "$name" = "qwen-7b" ]; then |
|
num_layers=31 |
|
hidden_size=4096 |
|
echo "Compile Qwen-7B" |
|
elif [ "$name" = "qwen-14b" ]; then |
|
num_layers=39 |
|
hidden_size=5120 |
|
echo "Compile Qwen-14B" |
|
else |
|
>&2 echo -e "Error: Invalid name $name, the input name must be \033[31mqwen-1_8b|qwen-7b|qwen-14b\033[0m" |
|
exit 1 |
|
fi |
|
|
|
if [ x$mode == x"int8" ]; then |
|
quantize_args="--quantize W8BF16" |
|
elif [ x$mode == x"bf16" ]; then |
|
quantize_args="--quantize BF16" |
|
elif [ x$mode == x"int4" ]; then |
|
quantize_args="--quantize W4BF16 --q_group_size 64" |
|
else |
|
echo "Error, unknown quantize mode" |
|
exit 1 |
|
fi |
|
|
|
if [ x$num_device != x1 ]; then |
|
device_args="--num_device $num_device" |
|
out_model=$name'_'$mode'_'$num_device'dev.bmodel' |
|
else |
|
out_model=$name'_'$mode'_1dev.bmodel' |
|
fi |
|
|
|
if [ x$addr_mode == x"io_alone" ]; then |
|
addr_args="--addr_mode io_alone" |
|
fi |
|
|
|
if [ x$decode_mode == x"jacobi" ]; then |
|
guess_len=8 |
|
fi |
|
|
|
outdir=${folder}/embedding |
|
mkdir -p $outdir |
|
pushd $outdir |
|
|
|
model_transform.py \ |
|
--model_name embedding \ |
|
--model_def ../onnx/embedding.pt \ |
|
--input_shapes [[1,$seq_length]] \ |
|
--input_types "int32" \ |
|
--mlir embedding.mlir |
|
|
|
model_deploy.py \ |
|
--mlir embedding.mlir \ |
|
--quantize BF16 \ |
|
--quant_input \ |
|
--quant_output \ |
|
--chip bm1684x \ |
|
$device_args \ |
|
--model embedding.bmodel |
|
|
|
model_transform.py \ |
|
--model_name embedding_cache \ |
|
--model_def ../onnx/embedding.pt \ |
|
--input_shapes [[1,$guess_len]] \ |
|
--input_types "int32" \ |
|
--mlir embedding_cache.mlir |
|
|
|
model_deploy.py \ |
|
--mlir embedding_cache.mlir \ |
|
--quantize BF16 \ |
|
--quant_input \ |
|
--quant_output \ |
|
--chip bm1684x \ |
|
$device_args \ |
|
--model embedding_cache.bmodel |
|
|
|
rm *.npz |
|
|
|
models=$models' '$outdir'/embedding.bmodel '$outdir'/embedding_cache.bmodel ' |
|
|
|
popd |
|
|
|
echo $models |
|
|
|
outdir=${folder}/$mode"_"$num_device"dev"/lm_head |
|
mkdir -p $outdir |
|
pushd $outdir |
|
|
|
model_transform.py \ |
|
--model_name lm_head \ |
|
--model_def ../../onnx/lm_head.pt \ |
|
--input_shapes [[1,${hidden_size}]] \ |
|
--mlir lm_head.mlir |
|
|
|
model_deploy.py \ |
|
--mlir lm_head.mlir \ |
|
$quantize_args \ |
|
--quant_input \ |
|
--chip bm1684x \ |
|
$device_args \ |
|
--model lm_head.bmodel |
|
|
|
|
|
model_transform.py \ |
|
--model_name greedy_head \ |
|
--model_def ../../onnx/greedy_head.onnx \ |
|
--mlir greedy_head.mlir |
|
|
|
model_deploy.py \ |
|
--mlir greedy_head.mlir \ |
|
--chip bm1684x \ |
|
--model greedy_head.bmodel |
|
|
|
|
|
model_transform.py \ |
|
--model_name penalty_sample_head \ |
|
--model_def ../../onnx/penalty_sample_head.onnx \ |
|
--mlir penalty_sample_head.mlir |
|
|
|
model_deploy.py \ |
|
--mlir penalty_sample_head.mlir \ |
|
--chip bm1684x \ |
|
--model penalty_sample_head.bmodel |
|
|
|
rm *.npz |
|
|
|
models=${models}${outdir}'/lm_head.bmodel ' |
|
popd |
|
|
|
echo $models |
|
|
|
outdir=tmp/$mode"_"$num_device"dev"/block |
|
mkdir -p $outdir |
|
|
|
pushd $outdir |
|
mkdir -p $outdir |
|
|
|
for ((i=0; i<=$num_layers; i++)); do |
|
|
|
model_transform.py \ |
|
--model_name block_$i \ |
|
--model_def ../../onnx/block_$i.onnx \ |
|
--mlir block_$i.mlir |
|
|
|
model_deploy.py \ |
|
--mlir block_$i.mlir \ |
|
$quantize_args \ |
|
--quant_input \ |
|
--quant_output \ |
|
--chip bm1684x \ |
|
$device_args \ |
|
--model block_$i.bmodel |
|
|
|
model_transform.py \ |
|
--model_name block_cache_$i \ |
|
--model_def ../../onnx/block_cache_$i.onnx \ |
|
--mlir block_cache_$i.mlir |
|
|
|
model_deploy.py \ |
|
--mlir block_cache_$i.mlir \ |
|
$quantize_args \ |
|
--quant_input \ |
|
--quant_output \ |
|
--chip bm1684x \ |
|
$device_args \ |
|
$addr_args \ |
|
--model block_cache_$i.bmodel |
|
|
|
rm *.npz |
|
|
|
models=${models}${outdir}'/block_'$i'.bmodel '$outdir'/block_cache_'$i'.bmodel ' |
|
|
|
done |
|
popd |
|
echo $models |
|
|
|
model_tool --combine $models -o $out_model |