glenn-jocher commited on
Commit
c687d5c
1 Parent(s): bc1fd13

reorganize train initialization steps

Browse files
Files changed (1) hide show
  1. train.py +19 -22
train.py CHANGED
@@ -161,7 +161,7 @@ def train(hyp, opt, device, tb_writer=None):
161
 
162
  # DDP mode
163
  if cuda and rank != -1:
164
- model = DDP(model, device_ids=[opt.local_rank], output_device=(opt.local_rank))
165
 
166
  # Trainloader
167
  dataloader, dataset = create_dataloader(train_path, imgsz, batch_size, gs, opt,
@@ -171,12 +171,26 @@ def train(hyp, opt, device, tb_writer=None):
171
  nb = len(dataloader) # number of batches
172
  assert mlc < nc, 'Label class %g exceeds nc=%g in %s. Possible class labels are 0-%g' % (mlc, nc, opt.data, nc - 1)
173
 
174
- # Testloader
175
  if rank in [-1, 0]:
176
  ema.updates = start_epoch * nb // accumulate # set EMA updates
177
  testloader = create_dataloader(test_path, imgsz_test, total_batch_size, gs, opt,
178
  hyp=hyp, augment=False, cache=opt.cache_images, rect=True, rank=-1,
179
- world_size=opt.world_size, workers=opt.workers)[0] # only runs on process 0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
180
 
181
  # Model parameters
182
  hyp['cls'] *= nc / 80. # scale coco-tuned hyp['cls'] to current dataset
@@ -186,21 +200,6 @@ def train(hyp, opt, device, tb_writer=None):
186
  model.class_weights = labels_to_class_weights(dataset.labels, nc).to(device) # attach class weights
187
  model.names = names
188
 
189
- # Classes and Anchors
190
- if rank in [-1, 0] and not opt.resume:
191
- labels = np.concatenate(dataset.labels, 0)
192
- c = torch.tensor(labels[:, 0]) # classes
193
- # cf = torch.bincount(c.long(), minlength=nc) + 1. # frequency
194
- # model._initialize_biases(cf.to(device))
195
- plot_labels(labels, save_dir=log_dir)
196
- if tb_writer:
197
- # tb_writer.add_hparams(hyp, {}) # causes duplicate https://github.com/ultralytics/yolov5/pull/384
198
- tb_writer.add_histogram('classes', c, 0)
199
-
200
- # Anchors
201
- if not opt.noautoanchor:
202
- check_anchors(dataset, model=model, thr=hyp['anchor_t'], imgsz=imgsz)
203
-
204
  # Start training
205
  t0 = time.time()
206
  nw = max(3 * nb, 1e3) # number of warmup iterations, max(3 epochs, 1k iterations)
@@ -209,10 +208,8 @@ def train(hyp, opt, device, tb_writer=None):
209
  results = (0, 0, 0, 0, 0, 0, 0) # 'P', 'R', 'mAP', 'F1', 'val GIoU', 'val Objectness', 'val Classification'
210
  scheduler.last_epoch = start_epoch - 1 # do not move
211
  scaler = amp.GradScaler(enabled=cuda)
212
- logger.info('Image sizes %g train, %g test' % (imgsz, imgsz_test))
213
- logger.info('Using %g dataloader workers' % dataloader.num_workers)
214
- logger.info('Starting training for %g epochs...' % epochs)
215
- # torch.autograd.set_detect_anomaly(True)
216
  for epoch in range(start_epoch, epochs): # epoch ------------------------------------------------------------------
217
  model.train()
218
 
 
161
 
162
  # DDP mode
163
  if cuda and rank != -1:
164
+ model = DDP(model, device_ids=[opt.local_rank], output_device=opt.local_rank)
165
 
166
  # Trainloader
167
  dataloader, dataset = create_dataloader(train_path, imgsz, batch_size, gs, opt,
 
171
  nb = len(dataloader) # number of batches
172
  assert mlc < nc, 'Label class %g exceeds nc=%g in %s. Possible class labels are 0-%g' % (mlc, nc, opt.data, nc - 1)
173
 
174
+ # Process 0
175
  if rank in [-1, 0]:
176
  ema.updates = start_epoch * nb // accumulate # set EMA updates
177
  testloader = create_dataloader(test_path, imgsz_test, total_batch_size, gs, opt,
178
  hyp=hyp, augment=False, cache=opt.cache_images, rect=True, rank=-1,
179
+ world_size=opt.world_size, workers=opt.workers)[0] # testloader
180
+
181
+ if not opt.resume:
182
+ labels = np.concatenate(dataset.labels, 0)
183
+ c = torch.tensor(labels[:, 0]) # classes
184
+ # cf = torch.bincount(c.long(), minlength=nc) + 1. # frequency
185
+ # model._initialize_biases(cf.to(device))
186
+ plot_labels(labels, save_dir=log_dir)
187
+ if tb_writer:
188
+ # tb_writer.add_hparams(hyp, {}) # causes duplicate https://github.com/ultralytics/yolov5/pull/384
189
+ tb_writer.add_histogram('classes', c, 0)
190
+
191
+ # Anchors
192
+ if not opt.noautoanchor:
193
+ check_anchors(dataset, model=model, thr=hyp['anchor_t'], imgsz=imgsz)
194
 
195
  # Model parameters
196
  hyp['cls'] *= nc / 80. # scale coco-tuned hyp['cls'] to current dataset
 
200
  model.class_weights = labels_to_class_weights(dataset.labels, nc).to(device) # attach class weights
201
  model.names = names
202
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
203
  # Start training
204
  t0 = time.time()
205
  nw = max(3 * nb, 1e3) # number of warmup iterations, max(3 epochs, 1k iterations)
 
208
  results = (0, 0, 0, 0, 0, 0, 0) # 'P', 'R', 'mAP', 'F1', 'val GIoU', 'val Objectness', 'val Classification'
209
  scheduler.last_epoch = start_epoch - 1 # do not move
210
  scaler = amp.GradScaler(enabled=cuda)
211
+ logger.info('Image sizes %g train, %g test\nUsing %g dataloader workers\nLogging results to %s\n'
212
+ 'Starting training for %g epochs...' % (imgsz, imgsz_test, dataloader.num_workers, log_dir, epochs))
 
 
213
  for epoch in range(start_epoch, epochs): # epoch ------------------------------------------------------------------
214
  model.train()
215