CUDA_VISIBLE_DEVICES=0 swift deploy \
    --model Qwen/Qwen2.5-7B-Instruct \
    --infer_backend vllm \
    --served_model_name Qwen2.5-7B-Instruct

# After the server-side deployment above is successful, use the command below to perform a client call test.

# curl http://localhost:8000/v1/chat/completions \
# -H "Content-Type: application/json" \
# -d '{
# "model": "Qwen2.5-7B-Instruct",
# "messages": [{"role": "user", "content": "What is your name?"}],
# "temperature": 0
# }'