CUDA_VISIBLE_DEVICES=0 swift deploy \ --model Qwen/Qwen2.5-7B-Instruct \ --infer_backend vllm \ --served_model_name Qwen2.5-7B-Instruct # After the server-side deployment above is successful, use the command below to perform a client call test. # curl http://localhost:8000/v1/chat/completions \ # -H "Content-Type: application/json" \ # -d '{ # "model": "Qwen2.5-7B-Instruct", # "messages": [{"role": "user", "content": "What is your name?"}], # "temperature": 0 # }'