Spaces:
Paused
Paused
File size: 635 Bytes
3ec0fd4 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 |
#!/bin/bash -e
cd /root/TensorRT-LLM-examples/llama
## Build TensorRT for Mistral with `fp16`
python build.py --model_dir teknium/OpenHermes-2.5-Mistral-7B \
--dtype float16 \
--remove_input_padding \
--use_gpt_attention_plugin float16 \
--enable_context_fmha \
--use_gemm_plugin float16 \
--output_dir ./tmp/mistral/7B/trt_engines/fp16/1-gpu/ \
--max_input_len 5000 \
--max_batch_size 1
mkdir -p /root/scratch-space/models
cp -r tmp/mistral/7B/trt_engines/fp16/1-gpu /root/scratch-space/models/mistral
|