| #!/usr/bin/env python3 | |
| # 3-class clean training on H100 NVL | |
| # Classes: 0 no-helmet | 1 with-helmet | 2 triple-riding | |
| from ultralytics import YOLO | |
| import torch, os | |
| print('GPU:', torch.cuda.get_device_name(0), '|', torch.cuda.get_device_properties(0).total_memory/1e9, 'GB') | |
| # Start from pretrained yolo26m (auto-downloads if missing) | |
| model = YOLO('yolo26m.pt') | |
| results = model.train( | |
| data='/home/azureuser/clean_merged_data/data.yaml', | |
| epochs=150, | |
| imgsz=640, | |
| batch=64, # H100 NVL has 95GB, can push batch high | |
| device=0, | |
| workers=8, | |
| project='runs_clean', | |
| name='h100_3class', | |
| exist_ok=True, | |
| amp=True, | |
| cos_lr=True, | |
| close_mosaic=15, | |
| # augmentation — important for 10k image dataset | |
| mosaic=1.0, | |
| mixup=0.15, | |
| copy_paste=0.3, # boost with-helmet via cross-image pasting | |
| hsv_h=0.015, hsv_s=0.7, hsv_v=0.4, | |
| degrees=5.0, | |
| translate=0.1, | |
| scale=0.5, | |
| fliplr=0.5, | |
| # loss | |
| cls=1.0, # classification loss weight (bump if still confused) | |
| box=7.5, | |
| dfl=1.5, | |
| # regularization | |
| weight_decay=0.0005, | |
| dropout=0.0, | |
| # schedule | |
| optimizer='auto', | |
| lr0=0.01, | |
| patience=40, | |
| plots=True, | |
| verbose=True, | |
| ) | |
| print('TRAIN DONE — running val on test split') | |
| m = YOLO('runs_clean/h100_3class/weights/best.pt') | |
| m.val(data='/home/azureuser/clean_merged_data/data.yaml', split='test', plots=True, save_json=True) | |