glenn-jocher commited on
Commit
c4addd7
1 Parent(s): 04081f8

Unified '/project/name' results saving (#1377)

Browse files

* Project/name update

* Update ci-testing.yml

* address project with path separator failure mode

* Project/name update

* address project with path separator failure mode

* Update ci-testing.yml

* detect.py default --name bug fix

* missing rstrip PR

* train/exp0 to train/exp

Files changed (7) hide show
  1. .github/workflows/ci-testing.yml +2 -2
  2. README.md +1 -1
  3. detect.py +7 -9
  4. test.py +7 -9
  5. train.py +30 -27
  6. tutorial.ipynb +13 -13
  7. utils/general.py +11 -17
.github/workflows/ci-testing.yml CHANGED
@@ -66,10 +66,10 @@ jobs:
66
  python train.py --img 256 --batch 8 --weights weights/${{ matrix.model }}.pt --cfg models/${{ matrix.model }}.yaml --epochs 1 --device $di
67
  # detect
68
  python detect.py --weights weights/${{ matrix.model }}.pt --device $di
69
- python detect.py --weights runs/train/exp0/weights/last.pt --device $di
70
  # test
71
  python test.py --img 256 --batch 8 --weights weights/${{ matrix.model }}.pt --device $di
72
- python test.py --img 256 --batch 8 --weights runs/train/exp0/weights/last.pt --device $di
73
 
74
  python models/yolo.py --cfg models/${{ matrix.model }}.yaml # inspect
75
  python models/export.py --img 256 --batch 1 --weights weights/${{ matrix.model }}.pt # export
 
66
  python train.py --img 256 --batch 8 --weights weights/${{ matrix.model }}.pt --cfg models/${{ matrix.model }}.yaml --epochs 1 --device $di
67
  # detect
68
  python detect.py --weights weights/${{ matrix.model }}.pt --device $di
69
+ python detect.py --weights runs/train/exp/weights/last.pt --device $di
70
  # test
71
  python test.py --img 256 --batch 8 --weights weights/${{ matrix.model }}.pt --device $di
72
+ python test.py --img 256 --batch 8 --weights runs/train/exp/weights/last.pt --device $di
73
 
74
  python models/yolo.py --cfg models/${{ matrix.model }}.yaml # inspect
75
  python models/export.py --img 256 --batch 1 --weights weights/${{ matrix.model }}.pt # export
README.md CHANGED
@@ -96,7 +96,7 @@ Fusing layers...
96
  Model Summary: 140 layers, 7.45958e+06 parameters, 0 gradients
97
  image 1/2 data/images/bus.jpg: 640x480 4 persons, 1 buss, 1 skateboards, Done. (0.013s)
98
  image 2/2 data/images/zidane.jpg: 384x640 2 persons, 2 ties, Done. (0.013s)
99
- Results saved to runs/detect/exp0
100
  Done. (0.124s)
101
  ```
102
  <img src="https://user-images.githubusercontent.com/26833433/97107365-685a8d80-16c7-11eb-8c2e-83aac701d8b9.jpeg" width="500">
 
96
  Model Summary: 140 layers, 7.45958e+06 parameters, 0 gradients
97
  image 1/2 data/images/bus.jpg: 640x480 4 persons, 1 buss, 1 skateboards, Done. (0.013s)
98
  image 2/2 data/images/zidane.jpg: 384x640 2 persons, 2 ties, Done. (0.013s)
99
+ Results saved to runs/detect/exp
100
  Done. (0.124s)
101
  ```
102
  <img src="https://user-images.githubusercontent.com/26833433/97107365-685a8d80-16c7-11eb-8c2e-83aac701d8b9.jpeg" width="500">
detect.py CHANGED
@@ -10,21 +10,18 @@ from numpy import random
10
  from models.experimental import attempt_load
11
  from utils.datasets import LoadStreams, LoadImages
12
  from utils.general import check_img_size, non_max_suppression, apply_classifier, scale_coords, xyxy2xywh, \
13
- plot_one_box, strip_optimizer, set_logging, increment_dir
14
  from utils.torch_utils import select_device, load_classifier, time_synchronized
15
 
16
 
17
  def detect(save_img=False):
18
- save_dir, source, weights, view_img, save_txt, imgsz = \
19
- Path(opt.save_dir), opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size
20
  webcam = source.isnumeric() or source.endswith('.txt') or \
21
  source.lower().startswith(('rtsp://', 'rtmp://', 'http://'))
22
 
23
  # Directories
24
- if save_dir == Path('runs/detect'): # if default
25
- save_dir.mkdir(parents=True, exist_ok=True) # make base
26
- save_dir = Path(increment_dir(save_dir / 'exp', opt.name)) # increment run
27
- (save_dir / 'labels' if save_txt else save_dir).mkdir(parents=True, exist_ok=True) # make new dir
28
 
29
  # Initialize
30
  set_logging()
@@ -156,12 +153,13 @@ if __name__ == '__main__':
156
  parser.add_argument('--view-img', action='store_true', help='display results')
157
  parser.add_argument('--save-txt', action='store_true', help='save results to *.txt')
158
  parser.add_argument('--save-conf', action='store_true', help='save confidences in --save-txt labels')
159
- parser.add_argument('--save-dir', type=str, default='runs/detect', help='directory to save results')
160
- parser.add_argument('--name', default='', help='name to append to --save-dir: i.e. runs/{N} -> runs/{N}_{name}')
161
  parser.add_argument('--classes', nargs='+', type=int, help='filter by class: --class 0, or --class 0 2 3')
162
  parser.add_argument('--agnostic-nms', action='store_true', help='class-agnostic NMS')
163
  parser.add_argument('--augment', action='store_true', help='augmented inference')
164
  parser.add_argument('--update', action='store_true', help='update all models')
 
 
 
165
  opt = parser.parse_args()
166
  print(opt)
167
 
 
10
  from models.experimental import attempt_load
11
  from utils.datasets import LoadStreams, LoadImages
12
  from utils.general import check_img_size, non_max_suppression, apply_classifier, scale_coords, xyxy2xywh, \
13
+ plot_one_box, strip_optimizer, set_logging, increment_path
14
  from utils.torch_utils import select_device, load_classifier, time_synchronized
15
 
16
 
17
  def detect(save_img=False):
18
+ source, weights, view_img, save_txt, imgsz = opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size
 
19
  webcam = source.isnumeric() or source.endswith('.txt') or \
20
  source.lower().startswith(('rtsp://', 'rtmp://', 'http://'))
21
 
22
  # Directories
23
+ save_dir = Path(increment_path(Path(opt.project) / opt.name, exist_ok=opt.exist_ok)) # increment run
24
+ (save_dir / 'labels' if save_txt else save_dir).mkdir(parents=True, exist_ok=True) # make dir
 
 
25
 
26
  # Initialize
27
  set_logging()
 
153
  parser.add_argument('--view-img', action='store_true', help='display results')
154
  parser.add_argument('--save-txt', action='store_true', help='save results to *.txt')
155
  parser.add_argument('--save-conf', action='store_true', help='save confidences in --save-txt labels')
 
 
156
  parser.add_argument('--classes', nargs='+', type=int, help='filter by class: --class 0, or --class 0 2 3')
157
  parser.add_argument('--agnostic-nms', action='store_true', help='class-agnostic NMS')
158
  parser.add_argument('--augment', action='store_true', help='augmented inference')
159
  parser.add_argument('--update', action='store_true', help='update all models')
160
+ parser.add_argument('--project', default='runs/detect', help='save results to project/name')
161
+ parser.add_argument('--name', default='exp', help='save results to project/name')
162
+ parser.add_argument('--exist-ok', action='store_true', help='existing project/name ok, do not increment')
163
  opt = parser.parse_args()
164
  print(opt)
165
 
test.py CHANGED
@@ -13,7 +13,7 @@ from models.experimental import attempt_load
13
  from utils.datasets import create_dataloader
14
  from utils.general import coco80_to_coco91_class, check_dataset, check_file, check_img_size, compute_loss, \
15
  non_max_suppression, scale_coords, xyxy2xywh, clip_coords, plot_images, xywh2xyxy, box_iou, output_to_target, \
16
- ap_per_class, set_logging, increment_dir
17
  from utils.torch_utils import select_device, time_synchronized
18
 
19
 
@@ -46,10 +46,8 @@ def test(data,
46
  save_txt = opt.save_txt # save *.txt labels
47
 
48
  # Directories
49
- if save_dir == Path('runs/test'): # if default
50
- save_dir.mkdir(parents=True, exist_ok=True) # make base
51
- save_dir = Path(increment_dir(save_dir / 'exp', opt.name)) # increment run
52
- (save_dir / 'labels' if save_txt else save_dir).mkdir(parents=True, exist_ok=True) # make new dir
53
 
54
  # Load model
55
  model = attempt_load(weights, map_location=device) # load FP32 model
@@ -279,7 +277,6 @@ if __name__ == '__main__':
279
  parser.add_argument('--img-size', type=int, default=640, help='inference size (pixels)')
280
  parser.add_argument('--conf-thres', type=float, default=0.001, help='object confidence threshold')
281
  parser.add_argument('--iou-thres', type=float, default=0.65, help='IOU threshold for NMS')
282
- parser.add_argument('--save-json', action='store_true', help='save a cocoapi-compatible JSON results file')
283
  parser.add_argument('--task', default='val', help="'val', 'test', 'study'")
284
  parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
285
  parser.add_argument('--single-cls', action='store_true', help='treat as single-class dataset')
@@ -287,8 +284,10 @@ if __name__ == '__main__':
287
  parser.add_argument('--verbose', action='store_true', help='report mAP by class')
288
  parser.add_argument('--save-txt', action='store_true', help='save results to *.txt')
289
  parser.add_argument('--save-conf', action='store_true', help='save confidences in --save-txt labels')
290
- parser.add_argument('--save-dir', type=str, default='runs/test', help='directory to save results')
291
- parser.add_argument('--name', default='', help='name to append to --save-dir: i.e. runs/{N} -> runs/{N}_{name}')
 
 
292
  opt = parser.parse_args()
293
  opt.save_json |= opt.data.endswith('coco.yaml')
294
  opt.data = check_file(opt.data) # check file
@@ -305,7 +304,6 @@ if __name__ == '__main__':
305
  opt.single_cls,
306
  opt.augment,
307
  opt.verbose,
308
- save_dir=Path(opt.save_dir),
309
  save_txt=opt.save_txt,
310
  save_conf=opt.save_conf,
311
  )
 
13
  from utils.datasets import create_dataloader
14
  from utils.general import coco80_to_coco91_class, check_dataset, check_file, check_img_size, compute_loss, \
15
  non_max_suppression, scale_coords, xyxy2xywh, clip_coords, plot_images, xywh2xyxy, box_iou, output_to_target, \
16
+ ap_per_class, set_logging, increment_path
17
  from utils.torch_utils import select_device, time_synchronized
18
 
19
 
 
46
  save_txt = opt.save_txt # save *.txt labels
47
 
48
  # Directories
49
+ save_dir = Path(increment_path(Path(opt.project) / opt.name, exist_ok=opt.exist_ok)) # increment run
50
+ (save_dir / 'labels' if save_txt else save_dir).mkdir(parents=True, exist_ok=True) # make dir
 
 
51
 
52
  # Load model
53
  model = attempt_load(weights, map_location=device) # load FP32 model
 
277
  parser.add_argument('--img-size', type=int, default=640, help='inference size (pixels)')
278
  parser.add_argument('--conf-thres', type=float, default=0.001, help='object confidence threshold')
279
  parser.add_argument('--iou-thres', type=float, default=0.65, help='IOU threshold for NMS')
 
280
  parser.add_argument('--task', default='val', help="'val', 'test', 'study'")
281
  parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
282
  parser.add_argument('--single-cls', action='store_true', help='treat as single-class dataset')
 
284
  parser.add_argument('--verbose', action='store_true', help='report mAP by class')
285
  parser.add_argument('--save-txt', action='store_true', help='save results to *.txt')
286
  parser.add_argument('--save-conf', action='store_true', help='save confidences in --save-txt labels')
287
+ parser.add_argument('--save-json', action='store_true', help='save a cocoapi-compatible JSON results file')
288
+ parser.add_argument('--project', default='runs/test', help='save to project/name')
289
+ parser.add_argument('--name', default='exp', help='save to project/name')
290
+ parser.add_argument('--exist-ok', action='store_true', help='existing project/name ok, do not increment')
291
  opt = parser.parse_args()
292
  opt.save_json |= opt.data.endswith('coco.yaml')
293
  opt.data = check_file(opt.data) # check file
 
304
  opt.single_cls,
305
  opt.augment,
306
  opt.verbose,
 
307
  save_txt=opt.save_txt,
308
  save_conf=opt.save_conf,
309
  )
train.py CHANGED
@@ -27,7 +27,7 @@ from utils.datasets import create_dataloader
27
  from utils.general import (
28
  torch_distributed_zero_first, labels_to_class_weights, plot_labels, check_anchors, labels_to_image_weights,
29
  compute_loss, plot_images, fitness, strip_optimizer, plot_results, get_latest_run, check_dataset, check_file,
30
- check_git_status, check_img_size, increment_dir, print_mutation, plot_evolution, set_logging, init_seeds)
31
  from utils.google_utils import attempt_download
32
  from utils.torch_utils import ModelEMA, select_device, intersect_dicts
33
 
@@ -36,19 +36,20 @@ logger = logging.getLogger(__name__)
36
 
37
  def train(hyp, opt, device, tb_writer=None, wandb=None):
38
  logger.info(f'Hyperparameters {hyp}')
39
- log_dir = Path(tb_writer.log_dir) if tb_writer else Path(opt.logdir) / 'evolve' # logging directory
40
- wdir = log_dir / 'weights' # weights directory
41
- wdir.mkdir(parents=True, exist_ok=True)
 
 
 
42
  last = wdir / 'last.pt'
43
  best = wdir / 'best.pt'
44
- results_file = log_dir / 'results.txt'
45
- epochs, batch_size, total_batch_size, weights, rank = \
46
- opt.epochs, opt.batch_size, opt.total_batch_size, opt.weights, opt.global_rank
47
 
48
  # Save run settings
49
- with open(log_dir / 'hyp.yaml', 'w') as f:
50
  yaml.dump(hyp, f, sort_keys=False)
51
- with open(log_dir / 'opt.yaml', 'w') as f:
52
  yaml.dump(vars(opt), f, sort_keys=False)
53
 
54
  # Configure
@@ -120,8 +121,10 @@ def train(hyp, opt, device, tb_writer=None, wandb=None):
120
 
121
  # Logging
122
  if wandb and wandb.run is None:
123
- id = ckpt.get('wandb_id') if 'ckpt' in locals() else None
124
- wandb_run = wandb.init(config=opt, resume="allow", project="YOLOv5", name=log_dir.stem, id=id)
 
 
125
 
126
  # Resume
127
  start_epoch, best_fitness = 0, 0.0
@@ -188,7 +191,7 @@ def train(hyp, opt, device, tb_writer=None, wandb=None):
188
  c = torch.tensor(labels[:, 0]) # classes
189
  # cf = torch.bincount(c.long(), minlength=nc) + 1. # frequency
190
  # model._initialize_biases(cf.to(device))
191
- plot_labels(labels, save_dir=log_dir)
192
  if tb_writer:
193
  # tb_writer.add_hparams(hyp, {}) # causes duplicate https://github.com/ultralytics/yolov5/pull/384
194
  tb_writer.add_histogram('classes', c, 0)
@@ -215,7 +218,7 @@ def train(hyp, opt, device, tb_writer=None, wandb=None):
215
  scaler = amp.GradScaler(enabled=cuda)
216
  logger.info('Image sizes %g train, %g test\n'
217
  'Using %g dataloader workers\nLogging results to %s\n'
218
- 'Starting training for %g epochs...' % (imgsz, imgsz_test, dataloader.num_workers, log_dir, epochs))
219
  for epoch in range(start_epoch, epochs): # epoch ------------------------------------------------------------------
220
  model.train()
221
 
@@ -296,7 +299,7 @@ def train(hyp, opt, device, tb_writer=None, wandb=None):
296
 
297
  # Plot
298
  if ni < 3:
299
- f = str(log_dir / f'train_batch{ni}.jpg') # filename
300
  result = plot_images(images=imgs, targets=targets, paths=paths, fname=f)
301
  # if tb_writer and result is not None:
302
  # tb_writer.add_image(f, result, dataformats='HWC', global_step=epoch)
@@ -321,7 +324,7 @@ def train(hyp, opt, device, tb_writer=None, wandb=None):
321
  model=ema.ema,
322
  single_cls=opt.single_cls,
323
  dataloader=testloader,
324
- save_dir=log_dir,
325
  plots=epoch == 0 or final_epoch, # plot first and last
326
  log_imgs=opt.log_imgs if wandb else 0)
327
 
@@ -369,7 +372,7 @@ def train(hyp, opt, device, tb_writer=None, wandb=None):
369
  if rank in [-1, 0]:
370
  # Strip optimizers
371
  n = opt.name if opt.name.isnumeric() else ''
372
- fresults, flast, fbest = log_dir / f'results{n}.txt', wdir / f'last{n}.pt', wdir / f'best{n}.pt'
373
  for f1, f2 in zip([wdir / 'last.pt', wdir / 'best.pt', results_file], [flast, fbest, fresults]):
374
  if f1.exists():
375
  os.rename(f1, f2) # rename
@@ -378,7 +381,7 @@ def train(hyp, opt, device, tb_writer=None, wandb=None):
378
  os.system('gsutil cp %s gs://%s/weights' % (f2, opt.bucket)) if opt.bucket else None # upload
379
  # Finish
380
  if not opt.evolve:
381
- plot_results(save_dir=log_dir) # save as results.png
382
  logger.info('%g epochs completed in %.3f hours.\n' % (epoch - start_epoch + 1, (time.time() - t0) / 3600))
383
 
384
  dist.destroy_process_group() if rank not in [-1, 0] else None
@@ -410,11 +413,11 @@ if __name__ == '__main__':
410
  parser.add_argument('--adam', action='store_true', help='use torch.optim.Adam() optimizer')
411
  parser.add_argument('--sync-bn', action='store_true', help='use SyncBatchNorm, only available in DDP mode')
412
  parser.add_argument('--local_rank', type=int, default=-1, help='DDP parameter, do not modify')
413
- parser.add_argument('--logdir', type=str, default='runs/train', help='logging directory')
414
- parser.add_argument('--name', default='', help='name to append to --save-dir: i.e. runs/{N} -> runs/{N}_{name}')
415
  parser.add_argument('--log-imgs', type=int, default=10, help='number of images for W&B logging, max 100')
416
  parser.add_argument('--workers', type=int, default=8, help='maximum number of dataloader workers')
417
-
 
 
418
  opt = parser.parse_args()
419
 
420
  # Set DDP variables
@@ -428,19 +431,19 @@ if __name__ == '__main__':
428
  # Resume
429
  if opt.resume: # resume an interrupted run
430
  ckpt = opt.resume if isinstance(opt.resume, str) else get_latest_run() # specified or most recent path
431
- log_dir = Path(ckpt).parent.parent # runs/train/exp0
432
  assert os.path.isfile(ckpt), 'ERROR: --resume checkpoint does not exist'
433
- with open(log_dir / 'opt.yaml') as f:
434
  opt = argparse.Namespace(**yaml.load(f, Loader=yaml.FullLoader)) # replace
435
  opt.cfg, opt.weights, opt.resume = '', ckpt, True
436
  logger.info('Resuming training from %s' % ckpt)
437
-
438
  else:
439
  # opt.hyp = opt.hyp or ('hyp.finetune.yaml' if opt.weights else 'hyp.scratch.yaml')
440
  opt.data, opt.cfg, opt.hyp = check_file(opt.data), check_file(opt.cfg), check_file(opt.hyp) # check files
441
  assert len(opt.cfg) or len(opt.weights), 'either --cfg or --weights must be specified'
442
  opt.img_size.extend([opt.img_size[-1]] * (2 - len(opt.img_size))) # extend to 2 sizes (train, test)
443
- log_dir = increment_dir(Path(opt.logdir) / 'exp', opt.name) # runs/exp1
 
444
 
445
  # DDP mode
446
  device = select_device(opt.device, batch_size=opt.batch_size)
@@ -466,8 +469,8 @@ if __name__ == '__main__':
466
  tb_writer, wandb = None, None # init loggers
467
  if opt.global_rank in [-1, 0]:
468
  # Tensorboard
469
- logger.info(f'Start Tensorboard with "tensorboard --logdir {opt.logdir}", view at http://localhost:6006/')
470
- tb_writer = SummaryWriter(log_dir=log_dir) # runs/train/exp0
471
 
472
  # W&B
473
  try:
@@ -514,7 +517,7 @@ if __name__ == '__main__':
514
  assert opt.local_rank == -1, 'DDP mode not implemented for --evolve'
515
  opt.notest, opt.nosave = True, True # only test/save final epoch
516
  # ei = [isinstance(x, (int, float)) for x in hyp.values()] # evolvable indices
517
- yaml_file = Path(opt.logdir) / 'evolve' / 'hyp_evolved.yaml' # save best result here
518
  if opt.bucket:
519
  os.system('gsutil cp gs://%s/evolve.txt .' % opt.bucket) # download evolve.txt if exists
520
 
 
27
  from utils.general import (
28
  torch_distributed_zero_first, labels_to_class_weights, plot_labels, check_anchors, labels_to_image_weights,
29
  compute_loss, plot_images, fitness, strip_optimizer, plot_results, get_latest_run, check_dataset, check_file,
30
+ check_git_status, check_img_size, increment_path, print_mutation, plot_evolution, set_logging, init_seeds)
31
  from utils.google_utils import attempt_download
32
  from utils.torch_utils import ModelEMA, select_device, intersect_dicts
33
 
 
36
 
37
  def train(hyp, opt, device, tb_writer=None, wandb=None):
38
  logger.info(f'Hyperparameters {hyp}')
39
+ save_dir, epochs, batch_size, total_batch_size, weights, rank = \
40
+ opt.save_dir, opt.epochs, opt.batch_size, opt.total_batch_size, opt.weights, opt.global_rank
41
+
42
+ # Directories
43
+ wdir = save_dir / 'weights'
44
+ wdir.mkdir(parents=True, exist_ok=True) # make dir
45
  last = wdir / 'last.pt'
46
  best = wdir / 'best.pt'
47
+ results_file = save_dir / 'results.txt'
 
 
48
 
49
  # Save run settings
50
+ with open(save_dir / 'hyp.yaml', 'w') as f:
51
  yaml.dump(hyp, f, sort_keys=False)
52
+ with open(save_dir / 'opt.yaml', 'w') as f:
53
  yaml.dump(vars(opt), f, sort_keys=False)
54
 
55
  # Configure
 
121
 
122
  # Logging
123
  if wandb and wandb.run is None:
124
+ wandb_run = wandb.init(config=opt, resume="allow",
125
+ project='YOLOv5' if opt.project == 'runs/train' else Path(opt.project).stem,
126
+ name=save_dir.stem,
127
+ id=ckpt.get('wandb_id') if 'ckpt' in locals() else None)
128
 
129
  # Resume
130
  start_epoch, best_fitness = 0, 0.0
 
191
  c = torch.tensor(labels[:, 0]) # classes
192
  # cf = torch.bincount(c.long(), minlength=nc) + 1. # frequency
193
  # model._initialize_biases(cf.to(device))
194
+ plot_labels(labels, save_dir=save_dir)
195
  if tb_writer:
196
  # tb_writer.add_hparams(hyp, {}) # causes duplicate https://github.com/ultralytics/yolov5/pull/384
197
  tb_writer.add_histogram('classes', c, 0)
 
218
  scaler = amp.GradScaler(enabled=cuda)
219
  logger.info('Image sizes %g train, %g test\n'
220
  'Using %g dataloader workers\nLogging results to %s\n'
221
+ 'Starting training for %g epochs...' % (imgsz, imgsz_test, dataloader.num_workers, save_dir, epochs))
222
  for epoch in range(start_epoch, epochs): # epoch ------------------------------------------------------------------
223
  model.train()
224
 
 
299
 
300
  # Plot
301
  if ni < 3:
302
+ f = str(save_dir / f'train_batch{ni}.jpg') # filename
303
  result = plot_images(images=imgs, targets=targets, paths=paths, fname=f)
304
  # if tb_writer and result is not None:
305
  # tb_writer.add_image(f, result, dataformats='HWC', global_step=epoch)
 
324
  model=ema.ema,
325
  single_cls=opt.single_cls,
326
  dataloader=testloader,
327
+ save_dir=save_dir,
328
  plots=epoch == 0 or final_epoch, # plot first and last
329
  log_imgs=opt.log_imgs if wandb else 0)
330
 
 
372
  if rank in [-1, 0]:
373
  # Strip optimizers
374
  n = opt.name if opt.name.isnumeric() else ''
375
+ fresults, flast, fbest = save_dir / f'results{n}.txt', wdir / f'last{n}.pt', wdir / f'best{n}.pt'
376
  for f1, f2 in zip([wdir / 'last.pt', wdir / 'best.pt', results_file], [flast, fbest, fresults]):
377
  if f1.exists():
378
  os.rename(f1, f2) # rename
 
381
  os.system('gsutil cp %s gs://%s/weights' % (f2, opt.bucket)) if opt.bucket else None # upload
382
  # Finish
383
  if not opt.evolve:
384
+ plot_results(save_dir=save_dir) # save as results.png
385
  logger.info('%g epochs completed in %.3f hours.\n' % (epoch - start_epoch + 1, (time.time() - t0) / 3600))
386
 
387
  dist.destroy_process_group() if rank not in [-1, 0] else None
 
413
  parser.add_argument('--adam', action='store_true', help='use torch.optim.Adam() optimizer')
414
  parser.add_argument('--sync-bn', action='store_true', help='use SyncBatchNorm, only available in DDP mode')
415
  parser.add_argument('--local_rank', type=int, default=-1, help='DDP parameter, do not modify')
 
 
416
  parser.add_argument('--log-imgs', type=int, default=10, help='number of images for W&B logging, max 100')
417
  parser.add_argument('--workers', type=int, default=8, help='maximum number of dataloader workers')
418
+ parser.add_argument('--project', default='runs/train', help='save to project/name')
419
+ parser.add_argument('--name', default='exp', help='save to project/name')
420
+ parser.add_argument('--exist-ok', action='store_true', help='existing project/name ok, do not increment')
421
  opt = parser.parse_args()
422
 
423
  # Set DDP variables
 
431
  # Resume
432
  if opt.resume: # resume an interrupted run
433
  ckpt = opt.resume if isinstance(opt.resume, str) else get_latest_run() # specified or most recent path
434
+ opt.save_dir = Path(ckpt).parent.parent # runs/train/exp
435
  assert os.path.isfile(ckpt), 'ERROR: --resume checkpoint does not exist'
436
+ with open(opt.save_dir / 'opt.yaml') as f:
437
  opt = argparse.Namespace(**yaml.load(f, Loader=yaml.FullLoader)) # replace
438
  opt.cfg, opt.weights, opt.resume = '', ckpt, True
439
  logger.info('Resuming training from %s' % ckpt)
 
440
  else:
441
  # opt.hyp = opt.hyp or ('hyp.finetune.yaml' if opt.weights else 'hyp.scratch.yaml')
442
  opt.data, opt.cfg, opt.hyp = check_file(opt.data), check_file(opt.cfg), check_file(opt.hyp) # check files
443
  assert len(opt.cfg) or len(opt.weights), 'either --cfg or --weights must be specified'
444
  opt.img_size.extend([opt.img_size[-1]] * (2 - len(opt.img_size))) # extend to 2 sizes (train, test)
445
+ opt.name = 'evolve' if opt.evolve else opt.name
446
+ opt.save_dir = Path(increment_path(Path(opt.project) / opt.name, exist_ok=opt.exist_ok)) # increment run
447
 
448
  # DDP mode
449
  device = select_device(opt.device, batch_size=opt.batch_size)
 
469
  tb_writer, wandb = None, None # init loggers
470
  if opt.global_rank in [-1, 0]:
471
  # Tensorboard
472
+ logger.info(f'Start Tensorboard with "tensorboard --logdir {opt.project}", view at http://localhost:6006/')
473
+ tb_writer = SummaryWriter(opt.save_dir) # runs/train/exp
474
 
475
  # W&B
476
  try:
 
517
  assert opt.local_rank == -1, 'DDP mode not implemented for --evolve'
518
  opt.notest, opt.nosave = True, True # only test/save final epoch
519
  # ei = [isinstance(x, (int, float)) for x in hyp.values()] # evolvable indices
520
+ yaml_file = opt.save_dir / 'hyp_evolved.yaml' # save best result here
521
  if opt.bucket:
522
  os.system('gsutil cp gs://%s/evolve.txt .' % opt.bucket) # download evolve.txt if exists
523
 
tutorial.ipynb CHANGED
@@ -597,7 +597,7 @@
597
  },
598
  "source": [
599
  "!python detect.py --weights yolov5s.pt --img 640 --conf 0.25 --source data/images/\n",
600
- "Image(filename='runs/detect/exp0/zidane.jpg', width=600)"
601
  ],
602
  "execution_count": null,
603
  "outputs": [
@@ -611,7 +611,7 @@
611
  "Model Summary: 140 layers, 7.45958e+06 parameters, 0 gradients\n",
612
  "image 1/2 /content/yolov5/data/images/bus.jpg: 640x480 4 persons, 1 buss, 1 skateboards, Done. (0.012s)\n",
613
  "image 2/2 /content/yolov5/data/images/zidane.jpg: 384x640 2 persons, 2 ties, Done. (0.012s)\n",
614
- "Results saved to runs/detect/exp0\n",
615
  "Done. (0.113s)\n"
616
  ],
617
  "name": "stdout"
@@ -887,7 +887,7 @@
887
  "source": [
888
  "Train a YOLOv5s model on [COCO128](https://www.kaggle.com/ultralytics/coco128) with `--data coco128.yaml`, starting from pretrained `--weights yolov5s.pt`, or from randomly initialized `--weights '' --cfg yolov5s.yaml`. Models are downloaded automatically from the [latest YOLOv5 release](https://github.com/ultralytics/yolov5/releases), and **COCO, COCO128, and VOC datasets are downloaded automatically** on first use.\n",
889
  "\n",
890
- "All training results are saved to `runs/train/` with incrementing run directories, i.e. `runs/train/exp0`, `runs/train/exp1` etc.\n"
891
  ]
892
  },
893
  {
@@ -969,7 +969,7 @@
969
  "Analyzing anchors... anchors/target = 4.26, Best Possible Recall (BPR) = 0.9946\n",
970
  "Image sizes 640 train, 640 test\n",
971
  "Using 2 dataloader workers\n",
972
- "Logging results to runs/train/exp0\n",
973
  "Starting training for 3 epochs...\n",
974
  "\n",
975
  " Epoch gpu_mem box obj cls total targets img_size\n",
@@ -986,8 +986,8 @@
986
  " 2/2 3.17G 0.04445 0.06545 0.01666 0.1266 149 640: 100% 8/8 [00:01<00:00, 4.33it/s]\n",
987
  " Class Images Targets P R mAP@.5 mAP@.5:.95: 100% 8/8 [00:02<00:00, 2.78it/s]\n",
988
  " all 128 929 0.395 0.766 0.701 0.455\n",
989
- "Optimizer stripped from runs/train/exp0/weights/last.pt, 15.2MB\n",
990
- "Optimizer stripped from runs/train/exp0/weights/best.pt, 15.2MB\n",
991
  "3 epochs completed in 0.005 hours.\n",
992
  "\n"
993
  ],
@@ -1030,7 +1030,7 @@
1030
  "source": [
1031
  "## Local Logging\n",
1032
  "\n",
1033
- "All results are logged by default to `runs/train`, with a new experiment directory created for each new training as `runs/train/exp0`, `runs/train/exp1`, etc. View train and test jpgs to see mosaics, labels, predictions and augmentation effects. Note a **Mosaic Dataloader** is used for training (shown below), a new concept developed by Ultralytics and first featured in [YOLOv4](https://arxiv.org/abs/2004.10934)."
1034
  ]
1035
  },
1036
  {
@@ -1039,9 +1039,9 @@
1039
  "id": "riPdhraOTCO0"
1040
  },
1041
  "source": [
1042
- "Image(filename='runs/train/exp0/train_batch0.jpg', width=800) # train batch 0 mosaics and labels\n",
1043
- "Image(filename='runs/train/exp0/test_batch0_labels.jpg', width=800) # test batch 0 labels\n",
1044
- "Image(filename='runs/train/exp0/test_batch0_pred.jpg', width=800) # test batch 0 predictions"
1045
  ],
1046
  "execution_count": null,
1047
  "outputs": []
@@ -1078,7 +1078,7 @@
1078
  },
1079
  "source": [
1080
  "from utils.utils import plot_results \n",
1081
- "plot_results(save_dir='runs/train/exp0') # plot results.txt as results.png\n",
1082
  "Image(filename='results.png', width=800) "
1083
  ],
1084
  "execution_count": null,
@@ -1170,9 +1170,9 @@
1170
  " for di in 0 cpu # inference devices\n",
1171
  " do\n",
1172
  " python detect.py --weights $x.pt --device $di # detect official\n",
1173
- " python detect.py --weights runs/train/exp0/weights/last.pt --device $di # detect custom\n",
1174
  " python test.py --weights $x.pt --device $di # test official\n",
1175
- " python test.py --weights runs/train/exp0/weights/last.pt --device $di # test custom\n",
1176
  " done\n",
1177
  " python models/yolo.py --cfg $x.yaml # inspect\n",
1178
  " python models/export.py --weights $x.pt --img 640 --batch 1 # export\n",
 
597
  },
598
  "source": [
599
  "!python detect.py --weights yolov5s.pt --img 640 --conf 0.25 --source data/images/\n",
600
+ "Image(filename='runs/detect/exp/zidane.jpg', width=600)"
601
  ],
602
  "execution_count": null,
603
  "outputs": [
 
611
  "Model Summary: 140 layers, 7.45958e+06 parameters, 0 gradients\n",
612
  "image 1/2 /content/yolov5/data/images/bus.jpg: 640x480 4 persons, 1 buss, 1 skateboards, Done. (0.012s)\n",
613
  "image 2/2 /content/yolov5/data/images/zidane.jpg: 384x640 2 persons, 2 ties, Done. (0.012s)\n",
614
+ "Results saved to runs/detect/exp\n",
615
  "Done. (0.113s)\n"
616
  ],
617
  "name": "stdout"
 
887
  "source": [
888
  "Train a YOLOv5s model on [COCO128](https://www.kaggle.com/ultralytics/coco128) with `--data coco128.yaml`, starting from pretrained `--weights yolov5s.pt`, or from randomly initialized `--weights '' --cfg yolov5s.yaml`. Models are downloaded automatically from the [latest YOLOv5 release](https://github.com/ultralytics/yolov5/releases), and **COCO, COCO128, and VOC datasets are downloaded automatically** on first use.\n",
889
  "\n",
890
+ "All training results are saved to `runs/train/` with incrementing run directories, i.e. `runs/train/exp2`, `runs/train/exp3` etc.\n"
891
  ]
892
  },
893
  {
 
969
  "Analyzing anchors... anchors/target = 4.26, Best Possible Recall (BPR) = 0.9946\n",
970
  "Image sizes 640 train, 640 test\n",
971
  "Using 2 dataloader workers\n",
972
+ "Logging results to runs/train/exp\n",
973
  "Starting training for 3 epochs...\n",
974
  "\n",
975
  " Epoch gpu_mem box obj cls total targets img_size\n",
 
986
  " 2/2 3.17G 0.04445 0.06545 0.01666 0.1266 149 640: 100% 8/8 [00:01<00:00, 4.33it/s]\n",
987
  " Class Images Targets P R mAP@.5 mAP@.5:.95: 100% 8/8 [00:02<00:00, 2.78it/s]\n",
988
  " all 128 929 0.395 0.766 0.701 0.455\n",
989
+ "Optimizer stripped from runs/train/exp/weights/last.pt, 15.2MB\n",
990
+ "Optimizer stripped from runs/train/exp/weights/best.pt, 15.2MB\n",
991
  "3 epochs completed in 0.005 hours.\n",
992
  "\n"
993
  ],
 
1030
  "source": [
1031
  "## Local Logging\n",
1032
  "\n",
1033
+ "All results are logged by default to `runs/train`, with a new experiment directory created for each new training as `runs/train/exp2`, `runs/train/exp3`, etc. View train and test jpgs to see mosaics, labels, predictions and augmentation effects. Note a **Mosaic Dataloader** is used for training (shown below), a new concept developed by Ultralytics and first featured in [YOLOv4](https://arxiv.org/abs/2004.10934)."
1034
  ]
1035
  },
1036
  {
 
1039
  "id": "riPdhraOTCO0"
1040
  },
1041
  "source": [
1042
+ "Image(filename='runs/train/exp/train_batch0.jpg', width=800) # train batch 0 mosaics and labels\n",
1043
+ "Image(filename='runs/train/exp/test_batch0_labels.jpg', width=800) # test batch 0 labels\n",
1044
+ "Image(filename='runs/train/exp/test_batch0_pred.jpg', width=800) # test batch 0 predictions"
1045
  ],
1046
  "execution_count": null,
1047
  "outputs": []
 
1078
  },
1079
  "source": [
1080
  "from utils.utils import plot_results \n",
1081
+ "plot_results(save_dir='runs/train/exp') # plot results.txt as results.png\n",
1082
  "Image(filename='results.png', width=800) "
1083
  ],
1084
  "execution_count": null,
 
1170
  " for di in 0 cpu # inference devices\n",
1171
  " do\n",
1172
  " python detect.py --weights $x.pt --device $di # detect official\n",
1173
+ " python detect.py --weights runs/train/exp/weights/last.pt --device $di # detect custom\n",
1174
  " python test.py --weights $x.pt --device $di # test official\n",
1175
+ " python test.py --weights runs/train/exp/weights/last.pt --device $di # test custom\n",
1176
  " done\n",
1177
  " python models/yolo.py --cfg $x.yaml # inspect\n",
1178
  " python models/export.py --weights $x.pt --img 640 --batch 1 # export\n",
utils/general.py CHANGED
@@ -60,7 +60,7 @@ def init_seeds(seed=0):
60
  init_torch_seeds(seed)
61
 
62
 
63
- def get_latest_run(search_dir='./runs'):
64
  # Return path to most recent 'last.pt' in /runs (i.e. to --resume from)
65
  last_list = glob.glob(f'{search_dir}/**/last*.pt', recursive=True)
66
  return max(last_list, key=os.path.getctime) if last_list else ''
@@ -951,23 +951,17 @@ def output_to_target(output, width, height):
951
  return np.array(targets)
952
 
953
 
954
- def increment_dir(dir, comment=''):
955
- # Increments a directory runs/exp1 --> runs/exp2_comment
956
- n = 0 # number
957
- dir = str(Path(dir)) # os-agnostic
958
- if os.path.isdir(dir):
959
- stem = ''
960
- dir += os.sep # removed by Path
961
  else:
962
- stem = Path(dir).stem
963
-
964
- dirs = sorted(glob.glob(dir + '*')) # directories
965
- if dirs:
966
- matches = [re.search(r"%s(\d+)" % stem, d) for d in dirs]
967
- idxs = [int(m.groups()[0]) for m in matches if m]
968
- if idxs:
969
- n = max(idxs) + 1 # increment
970
- return dir + str(n) + ('_' + comment if comment else '')
971
 
972
 
973
  # Plotting functions ---------------------------------------------------------------------------------------------------
 
60
  init_torch_seeds(seed)
61
 
62
 
63
+ def get_latest_run(search_dir='.'):
64
  # Return path to most recent 'last.pt' in /runs (i.e. to --resume from)
65
  last_list = glob.glob(f'{search_dir}/**/last*.pt', recursive=True)
66
  return max(last_list, key=os.path.getctime) if last_list else ''
 
951
  return np.array(targets)
952
 
953
 
954
+ def increment_path(path, exist_ok=True, sep=''):
955
+ # Increment path, i.e. runs/exp --> runs/exp{sep}0, runs/exp{sep}1 etc.
956
+ path = Path(path) # os-agnostic
957
+ if (path.exists() and exist_ok) or (not path.exists()):
958
+ return str(path)
 
 
959
  else:
960
+ dirs = glob.glob(f"{path}{sep}*") # similar paths
961
+ matches = [re.search(rf"%s{sep}(\d+)" % path.stem, d) for d in dirs]
962
+ i = [int(m.groups()[0]) for m in matches if m] # indices
963
+ n = max(i) + 1 if i else 2 # increment number
964
+ return f"{path}{sep}{n}" # update path
 
 
 
 
965
 
966
 
967
  # Plotting functions ---------------------------------------------------------------------------------------------------