Update vllm_plugin/SERVING.md
Browse files- vllm_plugin/SERVING.md +3 -3
vllm_plugin/SERVING.md
CHANGED
|
@@ -38,14 +38,14 @@ uv pip install \
|
|
| 38 |
### Offline inference (quick test)
|
| 39 |
|
| 40 |
```bash
|
| 41 |
-
cd
|
| 42 |
python serve.py
|
| 43 |
```
|
| 44 |
|
| 45 |
### OpenAI-compatible API server
|
| 46 |
|
| 47 |
```bash
|
| 48 |
-
cd
|
| 49 |
python serve.py --api --port 8000
|
| 50 |
```
|
| 51 |
|
|
@@ -55,7 +55,7 @@ Then query:
|
|
| 55 |
curl http://localhost:8000/v1/completions \
|
| 56 |
-H "Content-Type: application/json" \
|
| 57 |
-d '{
|
| 58 |
-
"model": "/
|
| 59 |
"prompt": "The capital of France is",
|
| 60 |
"max_tokens": 64,
|
| 61 |
"temperature": 0.8
|
|
|
|
| 38 |
### Offline inference (quick test)
|
| 39 |
|
| 40 |
```bash
|
| 41 |
+
cd CloverLM/vllm_plugin
|
| 42 |
python serve.py
|
| 43 |
```
|
| 44 |
|
| 45 |
### OpenAI-compatible API server
|
| 46 |
|
| 47 |
```bash
|
| 48 |
+
cd CloverLM/vllm_plugin
|
| 49 |
python serve.py --api --port 8000
|
| 50 |
```
|
| 51 |
|
|
|
|
| 55 |
curl http://localhost:8000/v1/completions \
|
| 56 |
-H "Content-Type: application/json" \
|
| 57 |
-d '{
|
| 58 |
+
"model": "path/to/CloverLM",
|
| 59 |
"prompt": "The capital of France is",
|
| 60 |
"max_tokens": 64,
|
| 61 |
"temperature": 0.8
|