yusufs commited on
Commit
266e7dd
·
1 Parent(s): 6bb48e9

fix(runner.sh): explicitly disabling enforce_eager

Browse files
Files changed (3) hide show
  1. run-llama.sh +1 -0
  2. run-sailor.sh +1 -0
  3. runner.sh +1 -0
run-llama.sh CHANGED
@@ -27,4 +27,5 @@ python -u /app/openai_compatible_api_server.py \
27
  --max-num-batched-tokens 32768 \
28
  --max-model-len 32768 \
29
  --dtype float16 \
 
30
  --gpu-memory-utilization 0.85
 
27
  --max-num-batched-tokens 32768 \
28
  --max-model-len 32768 \
29
  --dtype float16 \
30
+ --enforce-eager false \
31
  --gpu-memory-utilization 0.85
run-sailor.sh CHANGED
@@ -29,4 +29,5 @@ python -u /app/openai_compatible_api_server.py \
29
  --max-num-batched-tokens 32768 \
30
  --max-model-len 32768 \
31
  --dtype float16 \
 
32
  --gpu-memory-utilization 0.85
 
29
  --max-num-batched-tokens 32768 \
30
  --max-model-len 32768 \
31
  --dtype float16 \
32
+ --enforce-eager false \
33
  --gpu-memory-utilization 0.85
runner.sh CHANGED
@@ -51,4 +51,5 @@ python -u /app/openai_compatible_api_server.py \
51
  --max-num-batched-tokens 32768 \
52
  --max-model-len 32768 \
53
  --dtype float16 \
 
54
  --gpu-memory-utilization 0.9
 
51
  --max-num-batched-tokens 32768 \
52
  --max-model-len 32768 \
53
  --dtype float16 \
54
+ --enforce-eager false \
55
  --gpu-memory-utilization 0.9