vivekvar
/

azure-scripts

Model card Files Files and versions

azure-scripts / train_v3.py

vivekvar's picture

azure home scripts: data gen, training, misc

a70eb3d verified 17 days ago

history blame contribute delete

1.3 kB

	#!/usr/bin/env python3
	# v3 max-throughput: yolo26m on 6673-img dataset, batch=128, cache=ram
	from ultralytics import YOLO
	import torch
	print('GPU:', torch.cuda.get_device_name(0), '\|', round(torch.cuda.get_device_properties(0).total_memory/1e9), 'GB')

	model = YOLO('yolo26m.pt')
	model.train(
	data='/home/azureuser/merged_v3/data.yaml',
	epochs=200,
	imgsz=640,
	batch=128, # 2x v2 — should hit ~70GB VRAM
	device=0,
	workers=16, # feed data faster
	cache='ram', # dataset is ~1GB, fits easily
	project='runs_v3',
	name='h100_3class_v3',
	exist_ok=True,
	amp=True,
	cos_lr=True,
	close_mosaic=20,
	mosaic=1.0, mixup=0.15, copy_paste=0.3,
	hsv_h=0.015, hsv_s=0.7, hsv_v=0.4,
	degrees=5.0, translate=0.1, scale=0.5, fliplr=0.5,
	cls=1.0, box=7.5, dfl=1.5,
	weight_decay=0.0005,
	optimizer='auto',
	patience=60,
	plots=True, verbose=True,
	)
	print('TRAIN DONE — running val + test')
	m = YOLO('runs_v3/h100_3class_v3/weights/best.pt')
	print('--- VAL ---'); m.val(data='/home/azureuser/merged_v3/data.yaml', split='val')
	print('--- TEST ---'); m.val(data='/home/azureuser/merged_v3/data.yaml', split='test')
	print('--- TEST + TTA ---'); m.val(data='/home/azureuser/merged_v3/data.yaml', split='test', augment=True)