fixed bug
Browse files- app_modules/llm_loader.py +2 -2
- tgi.sh +1 -3
app_modules/llm_loader.py
CHANGED
@@ -298,7 +298,7 @@ class LLMLoader:
|
|
298 |
config=config,
|
299 |
quantization_config=double_quant_config,
|
300 |
trust_remote_code=True,
|
301 |
-
|
302 |
)
|
303 |
if is_t5
|
304 |
else AutoModelForCausalLM.from_pretrained(
|
@@ -306,7 +306,7 @@ class LLMLoader:
|
|
306 |
config=config,
|
307 |
quantization_config=double_quant_config,
|
308 |
trust_remote_code=True,
|
309 |
-
|
310 |
)
|
311 |
)
|
312 |
|
|
|
298 |
config=config,
|
299 |
quantization_config=double_quant_config,
|
300 |
trust_remote_code=True,
|
301 |
+
use_auth_token=token,
|
302 |
)
|
303 |
if is_t5
|
304 |
else AutoModelForCausalLM.from_pretrained(
|
|
|
306 |
config=config,
|
307 |
quantization_config=double_quant_config,
|
308 |
trust_remote_code=True,
|
309 |
+
use_auth_token=token,
|
310 |
)
|
311 |
)
|
312 |
|
tgi.sh
CHANGED
@@ -9,9 +9,7 @@ uname -a
|
|
9 |
|
10 |
. env/tgi.conf
|
11 |
|
12 |
-
export MODEL_ID="meta-llama/Llama-2-7b-chat-hf"
|
13 |
-
export QUANTIZE="--quantize bitsandbytes-fp4"
|
14 |
-
|
15 |
echo Running $MODEL_ID with TGI
|
16 |
|
17 |
text-generation-launcher --model-id $MODEL_ID --port $PORT --max-input-length 2048 --max-total-tokens 4096 --ngrok --ngrok-authtoken $NGROK_AUTHTOKEN --ngrok-edge $NGROK_EDGE $QUANTIZE
|
|
|
|
9 |
|
10 |
. env/tgi.conf
|
11 |
|
|
|
|
|
|
|
12 |
echo Running $MODEL_ID with TGI
|
13 |
|
14 |
text-generation-launcher --model-id $MODEL_ID --port $PORT --max-input-length 2048 --max-total-tokens 4096 --ngrok --ngrok-authtoken $NGROK_AUTHTOKEN --ngrok-edge $NGROK_EDGE $QUANTIZE
|
15 |
+
|