| python3 -m sglang.launch_server \ | |
| --model-path $MODEL_ID \ | |
| --kv-cache-dtype $KV_CACHE_DTYPE \ | |
| --tensor-parallel-size $TP_SIZE \ | |
| --expert-parallel-size $TP_SIZE \ | |
| --quantization $QUANT_METHOD \ | |
| --enable-torch-compile \ | |
| --enable-ep-moe \ | |
| --tool-call-parser qwen25 \ | |
| --host 0.0.0.0 \ | |
| --port 80 | |