| |
| |
|
|
| |
| |
|
|
| import os |
| import subprocess |
| import time |
| from pathlib import Path |
|
|
| |
| Path("./logs").mkdir(exist_ok=True) |
|
|
| |
| model_gpu_mapping = [ |
| |
| |
| |
| |
| |
| (1, 3500), |
| |
| (1, 4500), |
| |
| (1, 5500), |
| |
| (3, 6500), |
| |
| (3, 7500), |
| ] |
|
|
| launched_models = [] |
|
|
| for index, (gpu_id, iter_num) in enumerate(model_gpu_mapping): |
| formatted_iter_num = f"{iter_num:07d}" |
| model_name = f"Elfsong/VLM_stage_2_iter_{formatted_iter_num}" |
| arena_key = f"Local-Model-{iter_num:05d}" |
|
|
| port = 9000 + index |
| print(f"π Launching {model_name} on port {port} (GPU {gpu_id}) ...") |
| log_file = open(f"./logs/vllm_{formatted_iter_num}.log", "w") |
|
|
| process = subprocess.Popen( |
| [ |
| "python", "-m", "vllm.entrypoints.openai.api_server", |
| "--model", model_name, |
| "--port", str(port), |
| "--quantization", "bitsandbytes", |
| "--gpu-memory-utilization", "0.3", |
| "--max-model-len", "4096", |
| "--trust-remote-code", |
| ], |
| env={**os.environ, "CUDA_VISIBLE_DEVICES": str(gpu_id)}, |
| stdout=log_file, |
| stderr=log_file, |
| ) |
|
|
| launched_models.append({ |
| "process": process, |
| "model_name": model_name, |
| "port": port, |
| "gpu_id": gpu_id, |
| "arena_key": arena_key, |
| "log_file": log_file |
| }) |
|
|
| time.sleep(10) |
|
|
| print(f"β
Launched {len(launched_models)} models. Check logs in ./logs/ directory.") |
|
|
| |
| try: |
| print("Models are running. Press Ctrl+C to stop all models.") |
| while True: |
| time.sleep(60) |
| |
| for model in launched_models: |
| if model["process"].poll() is not None: |
| print(f"β οΈ Model {model['model_name']} (port {model['port']}) has stopped.") |
| except KeyboardInterrupt: |
| print("\nπ Stopping all models...") |
| for model in launched_models: |
| if model["process"].poll() is None: |
| print(f"Stopping {model['model_name']} (port {model['port']})...") |
| model["process"].terminate() |
| model["log_file"].close() |
| print("β
All models stopped.") |