joostinyi-baseten's picture
Create README.md
9f2b9a7
|
raw
history blame
359 Bytes

python3 build.py --model_dir ./weights/merged_mistral_lora/ --remove_input_padding --use_gpt_attention_plugin float16 --enable_context_fmha --use_gemm_plugin float16 --output_dir ./mistral_engines/fp16/instruct-lora-merged-1-gpu --max_batch_size 64 --use_inflight_batching --max_input_len 2000 --max_output_len 2000 --paged_kv_cache --world_size 1 --tp_size 1