glenn-jocher commited on
Commit
e8810a5
1 Parent(s): fbf41e0

Update DDP backend `if dist.is_nccl_available()` (#3705)

Browse files
Files changed (1) hide show
  1. train.py +1 -1
train.py CHANGED
@@ -539,7 +539,7 @@ def main(opt):
539
  assert torch.cuda.device_count() > LOCAL_RANK, 'insufficient CUDA devices for DDP command'
540
  torch.cuda.set_device(LOCAL_RANK)
541
  device = torch.device('cuda', LOCAL_RANK)
542
- dist.init_process_group(backend="gloo", timeout=timedelta(seconds=60))
543
  assert opt.batch_size % WORLD_SIZE == 0, '--batch-size must be multiple of CUDA device count'
544
  assert not opt.image_weights, '--image-weights argument is not compatible with DDP training'
545
 
 
539
  assert torch.cuda.device_count() > LOCAL_RANK, 'insufficient CUDA devices for DDP command'
540
  torch.cuda.set_device(LOCAL_RANK)
541
  device = torch.device('cuda', LOCAL_RANK)
542
+ dist.init_process_group(backend="nccl" if dist.is_nccl_available() else "gloo", timeout=timedelta(seconds=60))
543
  assert opt.batch_size % WORLD_SIZE == 0, '--batch-size must be multiple of CUDA device count'
544
  assert not opt.image_weights, '--image-weights argument is not compatible with DDP training'
545