Arena / launch_models.py
Elfsong's picture
feat: Add scripts for launching models with vLLM, including dynamic GPU allocation and logging functionality for better monitoring and management of model processes.
04ffcc8
#!/usr/bin/env python3
# coding: utf-8
# Author: Du Mingzhe (dumingzhex@gmail.com)
# Date: 2025-02-03
import os
import subprocess
import time
from pathlib import Path
# Create logs directory
Path("./logs").mkdir(exist_ok=True)
# Launch models via vLLM
model_gpu_mapping = [
# (0, 1000),
# (0, 1500),
# (1, 2000),
# (1, 2500),
# (2, 3000),
(1, 3500),
# (1, 4000),
(1, 4500),
# (1, 5000),
(1, 5500),
# (2, 6000),
(3, 6500),
# (3, 7000),
(3, 7500),
]
launched_models = []
for index, (gpu_id, iter_num) in enumerate(model_gpu_mapping):
formatted_iter_num = f"{iter_num:07d}"
model_name = f"Elfsong/VLM_stage_2_iter_{formatted_iter_num}"
arena_key = f"Local-Model-{iter_num:05d}"
port = 9000 + index
print(f"πŸš€ Launching {model_name} on port {port} (GPU {gpu_id}) ...")
log_file = open(f"./logs/vllm_{formatted_iter_num}.log", "w")
process = subprocess.Popen(
[
"python", "-m", "vllm.entrypoints.openai.api_server",
"--model", model_name,
"--port", str(port),
"--quantization", "bitsandbytes",
"--gpu-memory-utilization", "0.3",
"--max-model-len", "4096",
"--trust-remote-code",
],
env={**os.environ, "CUDA_VISIBLE_DEVICES": str(gpu_id)},
stdout=log_file,
stderr=log_file,
)
launched_models.append({
"process": process,
"model_name": model_name,
"port": port,
"gpu_id": gpu_id,
"arena_key": arena_key,
"log_file": log_file
})
time.sleep(10) # Wait for initialization
print(f"βœ… Launched {len(launched_models)} models. Check logs in ./logs/ directory.")
# Keep the script running and monitor processes
try:
print("Models are running. Press Ctrl+C to stop all models.")
while True:
time.sleep(60)
# Check if any processes have died
for model in launched_models:
if model["process"].poll() is not None:
print(f"⚠️ Model {model['model_name']} (port {model['port']}) has stopped.")
except KeyboardInterrupt:
print("\nπŸ›‘ Stopping all models...")
for model in launched_models:
if model["process"].poll() is None:
print(f"Stopping {model['model_name']} (port {model['port']})...")
model["process"].terminate()
model["log_file"].close()
print("βœ… All models stopped.")