Improved AutoBatch DDP error message (#6568)
Browse files* Improved AutoBatch DDP error message
* Cleanup
train.py
CHANGED
@@ -522,10 +522,12 @@ def main(opt, callbacks=Callbacks()):
|
|
522 |
# DDP mode
|
523 |
device = select_device(opt.device, batch_size=opt.batch_size)
|
524 |
if LOCAL_RANK != -1:
|
|
|
|
|
|
|
|
|
|
|
525 |
assert torch.cuda.device_count() > LOCAL_RANK, 'insufficient CUDA devices for DDP command'
|
526 |
-
assert opt.batch_size % WORLD_SIZE == 0, '--batch-size must be multiple of CUDA device count'
|
527 |
-
assert not opt.image_weights, '--image-weights argument is not compatible with DDP training'
|
528 |
-
assert not opt.evolve, '--evolve argument is not compatible with DDP training'
|
529 |
torch.cuda.set_device(LOCAL_RANK)
|
530 |
device = torch.device('cuda', LOCAL_RANK)
|
531 |
dist.init_process_group(backend="nccl" if dist.is_nccl_available() else "gloo")
|
|
|
522 |
# DDP mode
|
523 |
device = select_device(opt.device, batch_size=opt.batch_size)
|
524 |
if LOCAL_RANK != -1:
|
525 |
+
msg = 'is not compatible with YOLOv5 Multi-GPU DDP training'
|
526 |
+
assert not opt.image_weights, f'--image-weights {msg}'
|
527 |
+
assert not opt.evolve, f'--evolve {msg}'
|
528 |
+
assert opt.batch_size != -1, f'AutoBatch with --batch-size -1 {msg}, please pass a valid --batch-size'
|
529 |
+
assert opt.batch_size % WORLD_SIZE == 0, f'--batch-size {opt.batch_size} must be multiple of WORLD_SIZE'
|
530 |
assert torch.cuda.device_count() > LOCAL_RANK, 'insufficient CUDA devices for DDP command'
|
|
|
|
|
|
|
531 |
torch.cuda.set_device(LOCAL_RANK)
|
532 |
device = torch.device('cuda', LOCAL_RANK)
|
533 |
dist.init_process_group(backend="nccl" if dist.is_nccl_available() else "gloo")
|