AutoMR-pangu / generator_server.sh
haifei
Align AutoMR-pangu with MATH-only chat workflow
5d74de9
raw
history blame contribute delete
301 Bytes
export ASCEND_RT_VISIBLE_DEVICES=1
export VLLM_USE_V1=1
python -m vllm.entrypoints.openai.api_server \
--model "./openPangu-Embedded-7B" \
--tensor-parallel-size 1 \
--port 8000 \
--host localhost \
--trust-remote-code \
--dtype bfloat16 \
--gpu-memory-utilization 0.90 \