Update README.md
Browse files
README.md
CHANGED
@@ -13,6 +13,11 @@ base_model:
|
|
13 |
- Qwen/Qwen2.5-VL-72B-Instruct
|
14 |
---
|
15 |
|
|
|
|
|
|
|
|
|
|
|
16 |
# Qwen2.5-VL-72B-Instruct
|
17 |
<a href="https://chat.qwenlm.ai/" target="_blank" style="margin: 2px;">
|
18 |
<img alt="Chat" src="https://img.shields.io/badge/%F0%9F%92%9C%EF%B8%8F%20Qwen%20Chat%20-536af5" style="display: inline-block; vertical-align: middle;"/>
|
|
|
13 |
- Qwen/Qwen2.5-VL-72B-Instruct
|
14 |
---
|
15 |
|
16 |
+
# Multi-GPU inference with vLLM
|
17 |
+
```
|
18 |
+
docker run -it --name iddt-ben-qwen25vl72 --gpus '"device=0,1"' -v huggingface:/root/.cache/huggingface --shm-size=32g -p 30000:8000 --ipc=host benasd/vllm:latest --model Benasd/Qwen2.5-VL-72B-Instruct-AWQ --dtype float16 --quantization awq -tp 2
|
19 |
+
```
|
20 |
+
|
21 |
# Qwen2.5-VL-72B-Instruct
|
22 |
<a href="https://chat.qwenlm.ai/" target="_blank" style="margin: 2px;">
|
23 |
<img alt="Chat" src="https://img.shields.io/badge/%F0%9F%92%9C%EF%B8%8F%20Qwen%20Chat%20-536af5" style="display: inline-block; vertical-align: middle;"/>
|