glenn-jocher commited on
Commit
08e97a2
·
1 Parent(s): 9776e70

Update hyperparameters to add lrf, anchors

Browse files
Files changed (3) hide show
  1. data/hyp.finetune.yaml +31 -24
  2. data/hyp.scratch.yaml +3 -1
  3. train.py +9 -5
data/hyp.finetune.yaml CHANGED
@@ -1,27 +1,34 @@
1
- # Hyperparameters for VOC fine-tuning
2
- # python train.py --batch 64 --cfg '' --weights yolov5m.pt --data voc.yaml --img 512 --epochs 50
3
  # See tutorials for hyperparameter evolution https://github.com/ultralytics/yolov5#tutorials
4
 
5
 
6
- lr0: 0.01 # initial learning rate (SGD=1E-2, Adam=1E-3)
7
- momentum: 0.94 # SGD momentum/Adam beta1
8
- weight_decay: 0.0005 # optimizer weight decay 5e-4
9
- giou: 0.05 # GIoU loss gain
10
- cls: 0.4 # cls loss gain
11
- cls_pw: 1.0 # cls BCELoss positive_weight
12
- obj: 0.5 # obj loss gain (scale with pixels)
13
- obj_pw: 1.0 # obj BCELoss positive_weight
14
- iou_t: 0.20 # IoU training threshold
15
- anchor_t: 4.0 # anchor-multiple threshold
16
- fl_gamma: 0.0 # focal loss gamma (efficientDet default gamma=1.5)
17
- hsv_h: 0.015 # image HSV-Hue augmentation (fraction)
18
- hsv_s: 0.7 # image HSV-Saturation augmentation (fraction)
19
- hsv_v: 0.4 # image HSV-Value augmentation (fraction)
20
- degrees: 1.0 # image rotation (+/- deg)
21
- translate: 0.1 # image translation (+/- fraction)
22
- scale: 0.6 # image scale (+/- gain)
23
- shear: 1.0 # image shear (+/- deg)
24
- perspective: 0.0 # image perspective (+/- fraction), range 0-0.001
25
- flipud: 0.01 # image flip up-down (probability)
26
- fliplr: 0.5 # image flip left-right (probability)
27
- mixup: 0.2 # image mixup (probability)
 
 
 
 
 
 
 
 
1
+ # Hyperparameters for VOC finetuning
2
+ # python train.py --batch 64 --weights yolov5m.pt --data voc.yaml --img 512 --epochs 50
3
  # See tutorials for hyperparameter evolution https://github.com/ultralytics/yolov5#tutorials
4
 
5
 
6
+ # Hyperparameter Evolution Results
7
+ # Generations: 51
8
+ # P R mAP.5 mAP.5:.95 box obj cls
9
+ # Metrics: 0.625 0.926 0.89 0.677 0.0111 0.00849 0.00124
10
+
11
+ lr0: 0.00447
12
+ lrf: 0.114
13
+ momentum: 0.873
14
+ weight_decay: 0.00047
15
+ giou: 0.0306
16
+ cls: 0.211
17
+ cls_pw: 0.546
18
+ obj: 0.421
19
+ obj_pw: 0.972
20
+ iou_t: 0.2
21
+ anchor_t: 2.26
22
+ # anchors: 5.07
23
+ fl_gamma: 0.0
24
+ hsv_h: 0.0154
25
+ hsv_s: 0.9
26
+ hsv_v: 0.619
27
+ degrees: 0.404
28
+ translate: 0.206
29
+ scale: 0.86
30
+ shear: 0.795
31
+ perspective: 0.0
32
+ flipud: 0.00756
33
+ fliplr: 0.5
34
+ mixup: 0.153
data/hyp.scratch.yaml CHANGED
@@ -4,15 +4,17 @@
4
 
5
 
6
  lr0: 0.01 # initial learning rate (SGD=1E-2, Adam=1E-3)
 
7
  momentum: 0.937 # SGD momentum/Adam beta1
8
  weight_decay: 0.0005 # optimizer weight decay 5e-4
9
- giou: 0.05 # GIoU loss gain
10
  cls: 0.5 # cls loss gain
11
  cls_pw: 1.0 # cls BCELoss positive_weight
12
  obj: 1.0 # obj loss gain (scale with pixels)
13
  obj_pw: 1.0 # obj BCELoss positive_weight
14
  iou_t: 0.20 # IoU training threshold
15
  anchor_t: 4.0 # anchor-multiple threshold
 
16
  fl_gamma: 0.0 # focal loss gamma (efficientDet default gamma=1.5)
17
  hsv_h: 0.015 # image HSV-Hue augmentation (fraction)
18
  hsv_s: 0.7 # image HSV-Saturation augmentation (fraction)
 
4
 
5
 
6
  lr0: 0.01 # initial learning rate (SGD=1E-2, Adam=1E-3)
7
+ lrf: 0.2 # final OneCycleLR learning rate (lr0 * lrf)
8
  momentum: 0.937 # SGD momentum/Adam beta1
9
  weight_decay: 0.0005 # optimizer weight decay 5e-4
10
+ giou: 0.05 # box loss gain
11
  cls: 0.5 # cls loss gain
12
  cls_pw: 1.0 # cls BCELoss positive_weight
13
  obj: 1.0 # obj loss gain (scale with pixels)
14
  obj_pw: 1.0 # obj BCELoss positive_weight
15
  iou_t: 0.20 # IoU training threshold
16
  anchor_t: 4.0 # anchor-multiple threshold
17
+ # anchors: 0 # anchors per output grid (0 to ignore)
18
  fl_gamma: 0.0 # focal loss gamma (efficientDet default gamma=1.5)
19
  hsv_h: 0.015 # image HSV-Hue augmentation (fraction)
20
  hsv_s: 0.7 # image HSV-Saturation augmentation (fraction)
train.py CHANGED
@@ -53,7 +53,7 @@ def train(hyp, opt, device, tb_writer=None):
53
  cuda = device.type != 'cpu'
54
  init_seeds(2 + rank)
55
  with open(opt.data) as f:
56
- data_dict = yaml.load(f, Loader=yaml.FullLoader) # model dict
57
  with torch_distributed_zero_first(rank):
58
  check_dataset(data_dict) # check
59
  train_path = data_dict['train']
@@ -67,6 +67,8 @@ def train(hyp, opt, device, tb_writer=None):
67
  with torch_distributed_zero_first(rank):
68
  attempt_download(weights) # download if not found locally
69
  ckpt = torch.load(weights, map_location=device) # load checkpoint
 
 
70
  model = Model(opt.cfg or ckpt['model'].yaml, ch=3, nc=nc).to(device) # create
71
  exclude = ['anchor'] if opt.cfg else [] # exclude keys
72
  state_dict = ckpt['model'].float().state_dict() # to FP32
@@ -111,7 +113,7 @@ def train(hyp, opt, device, tb_writer=None):
111
 
112
  # Scheduler https://arxiv.org/pdf/1812.01187.pdf
113
  # https://pytorch.org/docs/stable/_modules/torch/optim/lr_scheduler.html#OneCycleLR
114
- lf = lambda x: (((1 + math.cos(x * math.pi / epochs)) / 2) ** 1.0) * 0.8 + 0.2 # cosine
115
  scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=lf)
116
  # plot_lr_scheduler(optimizer, scheduler, epochs)
117
 
@@ -459,6 +461,7 @@ if __name__ == '__main__':
459
  else:
460
  # Hyperparameter evolution metadata (mutation scale 0-1, lower_limit, upper_limit)
461
  meta = {'lr0': (1, 1e-5, 1e-1), # initial learning rate (SGD=1E-2, Adam=1E-3)
 
462
  'momentum': (0.1, 0.6, 0.98), # SGD momentum/Adam beta1
463
  'weight_decay': (1, 0.0, 0.001), # optimizer weight decay
464
  'giou': (1, 0.02, 0.2), # GIoU loss gain
@@ -468,6 +471,7 @@ if __name__ == '__main__':
468
  'obj_pw': (1, 0.5, 2.0), # obj BCELoss positive_weight
469
  'iou_t': (0, 0.1, 0.7), # IoU training threshold
470
  'anchor_t': (1, 2.0, 8.0), # anchor-multiple threshold
 
471
  'fl_gamma': (0, 0.0, 2.0), # focal loss gamma (efficientDet default gamma=1.5)
472
  'hsv_h': (1, 0.0, 0.1), # image HSV-Hue augmentation (fraction)
473
  'hsv_s': (1, 0.0, 0.9), # image HSV-Saturation augmentation (fraction)
@@ -476,9 +480,9 @@ if __name__ == '__main__':
476
  'translate': (1, 0.0, 0.9), # image translation (+/- fraction)
477
  'scale': (1, 0.0, 0.9), # image scale (+/- gain)
478
  'shear': (1, 0.0, 10.0), # image shear (+/- deg)
479
- 'perspective': (1, 0.0, 0.001), # image perspective (+/- fraction), range 0-0.001
480
- 'flipud': (0, 0.0, 1.0), # image flip up-down (probability)
481
- 'fliplr': (1, 0.0, 1.0), # image flip left-right (probability)
482
  'mixup': (1, 0.0, 1.0)} # image mixup (probability)
483
 
484
  assert opt.local_rank == -1, 'DDP mode not implemented for --evolve'
 
53
  cuda = device.type != 'cpu'
54
  init_seeds(2 + rank)
55
  with open(opt.data) as f:
56
+ data_dict = yaml.load(f, Loader=yaml.FullLoader) # data dict
57
  with torch_distributed_zero_first(rank):
58
  check_dataset(data_dict) # check
59
  train_path = data_dict['train']
 
67
  with torch_distributed_zero_first(rank):
68
  attempt_download(weights) # download if not found locally
69
  ckpt = torch.load(weights, map_location=device) # load checkpoint
70
+ # if hyp['anchors']:
71
+ # ckpt['model'].yaml['anchors'] = round(hyp['anchors']) # force autoanchor
72
  model = Model(opt.cfg or ckpt['model'].yaml, ch=3, nc=nc).to(device) # create
73
  exclude = ['anchor'] if opt.cfg else [] # exclude keys
74
  state_dict = ckpt['model'].float().state_dict() # to FP32
 
113
 
114
  # Scheduler https://arxiv.org/pdf/1812.01187.pdf
115
  # https://pytorch.org/docs/stable/_modules/torch/optim/lr_scheduler.html#OneCycleLR
116
+ lf = lambda x: ((1 + math.cos(x * math.pi / epochs)) / 2) * (1 - hyp['lrf']) + hyp['lrf'] # cosine
117
  scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=lf)
118
  # plot_lr_scheduler(optimizer, scheduler, epochs)
119
 
 
461
  else:
462
  # Hyperparameter evolution metadata (mutation scale 0-1, lower_limit, upper_limit)
463
  meta = {'lr0': (1, 1e-5, 1e-1), # initial learning rate (SGD=1E-2, Adam=1E-3)
464
+ 'lrf': (1, 0.01, 1.0), # final OneCycleLR learning rate (lr0 * lrf)
465
  'momentum': (0.1, 0.6, 0.98), # SGD momentum/Adam beta1
466
  'weight_decay': (1, 0.0, 0.001), # optimizer weight decay
467
  'giou': (1, 0.02, 0.2), # GIoU loss gain
 
471
  'obj_pw': (1, 0.5, 2.0), # obj BCELoss positive_weight
472
  'iou_t': (0, 0.1, 0.7), # IoU training threshold
473
  'anchor_t': (1, 2.0, 8.0), # anchor-multiple threshold
474
+ # 'anchors': (1, 2.0, 10.0), # anchors per output grid (0 to ignore)
475
  'fl_gamma': (0, 0.0, 2.0), # focal loss gamma (efficientDet default gamma=1.5)
476
  'hsv_h': (1, 0.0, 0.1), # image HSV-Hue augmentation (fraction)
477
  'hsv_s': (1, 0.0, 0.9), # image HSV-Saturation augmentation (fraction)
 
480
  'translate': (1, 0.0, 0.9), # image translation (+/- fraction)
481
  'scale': (1, 0.0, 0.9), # image scale (+/- gain)
482
  'shear': (1, 0.0, 10.0), # image shear (+/- deg)
483
+ 'perspective': (0, 0.0, 0.001), # image perspective (+/- fraction), range 0-0.001
484
+ 'flipud': (1, 0.0, 1.0), # image flip up-down (probability)
485
+ 'fliplr': (0, 0.0, 1.0), # image flip left-right (probability)
486
  'mixup': (1, 0.0, 1.0)} # image mixup (probability)
487
 
488
  assert opt.local_rank == -1, 'DDP mode not implemented for --evolve'