JiaqiXue
/

R2-Router-RouterArena

@@ -86,7 +86,13 @@ Start the embedding server once, then route from any process without reloading t
 ```bash
 # Terminal 1: Start vLLM embedding server (runs once, stays alive)
 uv pip install vllm
 vllm serve Qwen/Qwen3-0.6B --task embed --port 8000
 ```
 ```python

 ```bash
 # Terminal 1: Start vLLM embedding server (runs once, stays alive)
 uv pip install vllm
+# vLLM >= 0.8
 vllm serve Qwen/Qwen3-0.6B --task embed --port 8000
+# vLLM < 0.8 (use this if the above fails)
+python -m vllm.entrypoints.openai.api_server \
+    --model Qwen/Qwen3-0.6B --task embed --port 8000
 ```
 ```python