Spaces:
Running
Running
| # ============================================================================= | |
| # setup_vllm.sh β One-command vLLM setup on AMD MI300X for CodeSentry | |
| # ============================================================================= | |
| set -euo pipefail | |
| echo "============================================================" | |
| echo " CodeSentry β vLLM + Qwen2.5-Coder-32B Setup (AMD MI300X)" | |
| echo "============================================================" | |
| # ββ 1. Install vLLM with ROCm backend βββββββββββββββββββββββββ | |
| echo "[1/4] Installing vLLM with ROCm 6.2 support..." | |
| pip install vllm --extra-index-url https://download.pytorch.org/whl/rocm6.2 | |
| # ββ 2. Install project dependencies βββββββββββββββββββββββββββ | |
| echo "[2/4] Installing CodeSentry requirements..." | |
| pip install -r requirements.txt | |
| # ββ 3. Start vLLM server ββββββββββββββββββββββββββββββββββββββ | |
| echo "[3/4] Starting vLLM server with Qwen2.5-Coder-32B-Instruct..." | |
| echo " Model: Qwen/Qwen2.5-Coder-32B-Instruct" | |
| echo " Port: 8080" | |
| echo " GPU utilisation: 85%" | |
| echo " Max context: 32768 tokens" | |
| vllm serve Qwen/Qwen2.5-Coder-32B-Instruct \ | |
| --port 8080 \ | |
| --tensor-parallel-size 1 \ | |
| --gpu-memory-utilization 0.85 \ | |
| --max-model-len 32768 \ | |
| --enable-chunked-prefill \ | |
| --trust-remote-code \ | |
| & | |
| VLLM_PID=$! | |
| echo " vLLM PID: $VLLM_PID" | |
| # ββ 4. Wait for vLLM to be ready ββββββββββββββββββββββββββββββ | |
| echo "[4/4] Waiting for vLLM to be ready..." | |
| MAX_WAIT=300 # 5 minutes max | |
| ELAPSED=0 | |
| until curl -sf http://localhost:8080/health > /dev/null 2>&1; do | |
| if [ "$ELAPSED" -ge "$MAX_WAIT" ]; then | |
| echo "ERROR: vLLM did not become ready within ${MAX_WAIT}s" | |
| kill "$VLLM_PID" 2>/dev/null || true | |
| exit 1 | |
| fi | |
| echo " Waiting... (${ELAPSED}s elapsed)" | |
| sleep 5 | |
| ELAPSED=$((ELAPSED + 5)) | |
| done | |
| echo "" | |
| echo "============================================================" | |
| echo " vLLM is READY at http://localhost:8080" | |
| echo " Starting CodeSentry API at http://localhost:8000 ..." | |
| echo "============================================================" | |
| echo "" | |
| # Start CodeSentry | |
| uvicorn main:app --host 0.0.0.0 --port 8000 --reload | |