BASEDIR=$(dirname "$0") | |
cd $BASEDIR | |
echo Current Directory: | |
pwd | |
uname -a | |
# for $NGROK_AUTHTOKEN and $NGROK_EDGE | |
souce env/ngrok.conf | |
export MODEL_ID="meta-llama/Llama-2-7b-chat-hf" | |
export QUANTIZE=--quantize bitsandbytes-fp4 | |
echo Running $MODEL_ID with TGI | |
text-generation-launcher --model-id $MODEL_ID --port $PORT --max-input-length 2048 --max-total-tokens 4096 --ngrok --ngrok-authtoken $NGROK_AUTHTOKEN --ngrok-edge NGROK_EDGE $QUANTIZE | |