Spaces:
Paused
Paused
cd /root/TensorRT-LLM-examples/llama | |
## Build TensorRT for Mistral with `fp16` | |
python build.py --model_dir teknium/OpenHermes-2.5-Mistral-7B \ | |
--dtype float16 \ | |
--remove_input_padding \ | |
--use_gpt_attention_plugin float16 \ | |
--enable_context_fmha \ | |
--use_gemm_plugin float16 \ | |
--output_dir ./tmp/mistral/7B/trt_engines/fp16/1-gpu/ \ | |
--max_input_len 5000 \ | |
--max_batch_size 1 | |
mkdir -p /root/scratch-space/models | |
cp -r tmp/mistral/7B/trt_engines/fp16/1-gpu /root/scratch-space/models/mistral | |