sparse

File size: 544 Bytes

96fe658

CUDA_VISIBLE_DEVICES=0,1 \
swift deploy \
    --model Qwen/Qwen3-8B \
    --infer_backend sglang \
    --max_new_tokens 2048 \
    --sglang_context_length 8192 \
    --sglang_tp_size 2 \
    --served_model_name Qwen3-8B

# After the server-side deployment above is successful, use the command below to perform a client call test.

# curl http://localhost:8000/v1/chat/completions \
# -H "Content-Type: application/json" \
# -d '{
# "model": "Qwen3-8B",
# "messages": [{"role": "user", "content": "What is your name?"}],
# "temperature": 0
# }'