export HUGGINGFACE_MODEL_NAME_OR_PATH="meta-llama/Llama-2-7b-chat-hf" | |
echo Running $HUGGINGFACE_MODEL_NAME_OR_PATH with TGI | |
volume=$PWD/data # share a volume with the Docker container to avoid downloading weights every run | |
docker run -e HUGGING_FACE_HUB_TOKEN=$HUGGINGFACE_AUTH_TOKEN --shm-size 1g -p 8081:80 -v $volume:/data ghcr.io/huggingface/text-generation-inference:1.0.0 --model-id $HUGGINGFACE_MODEL_NAME_OR_PATH | |