glenn-jocher commited on
Commit
006eb40
1 Parent(s): b57abb1

Improved AutoBatch DDP error message (#6568)

Browse files

* Improved AutoBatch DDP error message

* Cleanup

Files changed (1) hide show
  1. train.py +5 -3
train.py CHANGED
@@ -522,10 +522,12 @@ def main(opt, callbacks=Callbacks()):
522
  # DDP mode
523
  device = select_device(opt.device, batch_size=opt.batch_size)
524
  if LOCAL_RANK != -1:
 
 
 
 
 
525
  assert torch.cuda.device_count() > LOCAL_RANK, 'insufficient CUDA devices for DDP command'
526
- assert opt.batch_size % WORLD_SIZE == 0, '--batch-size must be multiple of CUDA device count'
527
- assert not opt.image_weights, '--image-weights argument is not compatible with DDP training'
528
- assert not opt.evolve, '--evolve argument is not compatible with DDP training'
529
  torch.cuda.set_device(LOCAL_RANK)
530
  device = torch.device('cuda', LOCAL_RANK)
531
  dist.init_process_group(backend="nccl" if dist.is_nccl_available() else "gloo")
 
522
  # DDP mode
523
  device = select_device(opt.device, batch_size=opt.batch_size)
524
  if LOCAL_RANK != -1:
525
+ msg = 'is not compatible with YOLOv5 Multi-GPU DDP training'
526
+ assert not opt.image_weights, f'--image-weights {msg}'
527
+ assert not opt.evolve, f'--evolve {msg}'
528
+ assert opt.batch_size != -1, f'AutoBatch with --batch-size -1 {msg}, please pass a valid --batch-size'
529
+ assert opt.batch_size % WORLD_SIZE == 0, f'--batch-size {opt.batch_size} must be multiple of WORLD_SIZE'
530
  assert torch.cuda.device_count() > LOCAL_RANK, 'insufficient CUDA devices for DDP command'
 
 
 
531
  torch.cuda.set_device(LOCAL_RANK)
532
  device = torch.device('cuda', LOCAL_RANK)
533
  dist.init_process_group(backend="nccl" if dist.is_nccl_available() else "gloo")