Update README.md
Browse files
README.md
CHANGED
@@ -59,7 +59,7 @@ python3 -m vllm.entrypoints.openai.api_server \
|
|
59 |
--dtype auto \
|
60 |
--api-key token-abc123 \
|
61 |
--quantization compressed-tensors \
|
62 |
-
--max-num-batched-tokens
|
63 |
-
--max-model-len
|
64 |
--tensor-parallel-size 2 \
|
65 |
--gpu-memory-utilization 0.99
|
|
|
59 |
--dtype auto \
|
60 |
--api-key token-abc123 \
|
61 |
--quantization compressed-tensors \
|
62 |
+
--max-num-batched-tokens 16384 \
|
63 |
+
--max-model-len 16384 \
|
64 |
--tensor-parallel-size 2 \
|
65 |
--gpu-memory-utilization 0.99
|