apt install git-lfs
git lfs install
git clone https://huggingface.co/meta-llama/Meta-Llama-3.1-8B-Instruct


sudo apt-get update && sudo apt-get -y install python3.10 python3-pip openmpi-bin libopenmpi-dev
pip3 install tensorrt_llm==0.13.0 --extra-index-url https://pypi.nvidia.com
git clone -b v0.13.0 https://github.com/NVIDIA/TensorRT-LLM.git

int4 awq:

python TensorRT-LLM/examples/quantization/quantize.py --model_dir ./Meta-Llama-3.1-8B-Instruct --dtype float16 --qformat int4_awq  --batch_size 64   --awq_block_size 128  --output_dir ./tllm_checkpoint_1gpu_int4_awq   --calib_size 32
trtllm-build --checkpoint_dir ./tllm_checkpoint_1gpu_int4_awq --output_dir ./tmp/llama/8B/trt_engines/int4_awq/1-gpu  --gpt_attention_plugin auto  --gemm_plugin auto  --max_num_tokens 65536 --max_input_len 1048576 --max_batch_size 64 --gather_generation_logits

Upload

huggingface-cli upload ss-galileo/llama3.1-8b  ./tmp/llama/8B/trt_engines/int4_awq/1-gpu/rank0.engine rank0.engine
huggingface-cli upload ss-galileo/llama3.1-8b  ./tmp/llama/8B/trt_engines/int4_awq/1-gpu/config.json config.json

and the "tokenizer.json", "tokenizer_config.json", "special_tokens_map.json" from meta-llama/Meta-Llama-3.1-8B-Instruct

Downloads last month
26
Inference Providers NEW
This model is not currently available via any of the supported Inference Providers.
The model cannot be deployed to the HF Inference API: The model has no library tag.