Vezora commited on
Commit
eb87bcf
·
verified ·
1 Parent(s): fb1c114

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +2 -2
README.md CHANGED
@@ -59,7 +59,7 @@ python3 -m vllm.entrypoints.openai.api_server \
59
  --dtype auto \
60
  --api-key token-abc123 \
61
  --quantization compressed-tensors \
62
- --max-num-batched-tokens 32768 \
63
- --max-model-len 32768 \
64
  --tensor-parallel-size 2 \
65
  --gpu-memory-utilization 0.99
 
59
  --dtype auto \
60
  --api-key token-abc123 \
61
  --quantization compressed-tensors \
62
+ --max-num-batched-tokens 16384 \
63
+ --max-model-len 16384 \
64
  --tensor-parallel-size 2 \
65
  --gpu-memory-utilization 0.99