glenn-jocher commited on
Commit
efe60b5
β€’
1 Parent(s): 63dd65e

Refactor train.py and val.py `loggers` (#4137)

Browse files

* Update loggers

* Config

* Update val.py

* cleanup

* fix1

* fix2

* fix3 and reformat

* format sweep.py

* Logger() class

* cleanup

* cleanup2

* wandb package import fix

* wandb package import fix2

* txt fix

* fix4

* fix5

* fix6

* drop wandb into utils/loggers

* fix 7

* rename loggers/wandb_logging to loggers/wandb

* Update message

* Update message

* Update message

* cleanup

* Fix x axis bug

* fix rank 0 issue

* cleanup

train.py CHANGED
@@ -10,7 +10,6 @@ import os
10
  import random
11
  import sys
12
  import time
13
- import warnings
14
  from copy import deepcopy
15
  from pathlib import Path
16
  from threading import Thread
@@ -24,7 +23,6 @@ import yaml
24
  from torch.cuda import amp
25
  from torch.nn.parallel import DistributedDataParallel as DDP
26
  from torch.optim import Adam, SGD, lr_scheduler
27
- from torch.utils.tensorboard import SummaryWriter
28
  from tqdm import tqdm
29
 
30
  FILE = Path(__file__).absolute()
@@ -42,8 +40,9 @@ from utils.google_utils import attempt_download
42
  from utils.loss import ComputeLoss
43
  from utils.plots import plot_images, plot_labels, plot_results, plot_evolution
44
  from utils.torch_utils import ModelEMA, select_device, intersect_dicts, torch_distributed_zero_first, de_parallel
45
- from utils.wandb_logging.wandb_utils import WandbLogger, check_wandb_resume
46
  from utils.metrics import fitness
 
47
 
48
  LOGGER = logging.getLogger(__name__)
49
  LOCAL_RANK = int(os.getenv('LOCAL_RANK', -1)) # https://pytorch.org/docs/stable/elastic/run.html
@@ -76,37 +75,23 @@ def train(hyp, # path/to/hyp.yaml or hyp dictionary
76
  with open(save_dir / 'opt.yaml', 'w') as f:
77
  yaml.safe_dump(vars(opt), f, sort_keys=False)
78
 
79
- # Configure
80
  plots = not evolve # create plots
81
  cuda = device.type != 'cpu'
82
  init_seeds(1 + RANK)
83
  with open(data) as f:
84
  data_dict = yaml.safe_load(f) # data dict
85
-
86
- # Loggers
87
- loggers = {'wandb': None, 'tb': None} # loggers dict
88
- if RANK in [-1, 0]:
89
- # TensorBoard
90
- if plots:
91
- prefix = colorstr('tensorboard: ')
92
- LOGGER.info(f"{prefix}Start with 'tensorboard --logdir {opt.project}', view at http://localhost:6006/")
93
- loggers['tb'] = SummaryWriter(str(save_dir))
94
-
95
- # W&B
96
- opt.hyp = hyp # add hyperparameters
97
- run_id = torch.load(weights).get('wandb_id') if weights.endswith('.pt') and os.path.isfile(weights) else None
98
- run_id = run_id if opt.resume else None # start fresh run if transfer learning
99
- wandb_logger = WandbLogger(opt, save_dir.stem, run_id, data_dict)
100
- loggers['wandb'] = wandb_logger.wandb
101
- if loggers['wandb']:
102
- data_dict = wandb_logger.data_dict
103
- weights, epochs, hyp = opt.weights, opt.epochs, opt.hyp # may update values if resuming
104
-
105
  nc = 1 if single_cls else int(data_dict['nc']) # number of classes
106
  names = ['item'] if single_cls and len(data_dict['names']) != 1 else data_dict['names'] # class names
107
  assert len(names) == nc, f'{len(names)} names found for nc={nc} dataset in {data}' # check
108
  is_coco = data.endswith('coco.yaml') and nc == 80 # COCO dataset
109
 
 
 
 
 
 
 
110
  # Model
111
  pretrained = weights.endswith('.pt')
112
  if pretrained:
@@ -351,16 +336,11 @@ def train(hyp, # path/to/hyp.yaml or hyp dictionary
351
  pbar.set_description(s)
352
 
353
  # Plot
354
- if plots and ni < 3:
355
- f = save_dir / f'train_batch{ni}.jpg' # filename
356
- Thread(target=plot_images, args=(imgs, targets, paths, f), daemon=True).start()
357
- if loggers['tb'] and ni == 0: # TensorBoard
358
- with warnings.catch_warnings():
359
- warnings.simplefilter('ignore') # suppress jit trace warning
360
- loggers['tb'].add_graph(torch.jit.trace(de_parallel(model), imgs[0:1], strict=False), [])
361
- elif plots and ni == 10 and loggers['wandb']:
362
- wandb_logger.log({'Mosaics': [loggers['wandb'].Image(str(x), caption=x.name) for x in
363
- save_dir.glob('train*.jpg') if x.exists()]})
364
 
365
  # end batch ------------------------------------------------------------------------------------------------
366
 
@@ -368,13 +348,12 @@ def train(hyp, # path/to/hyp.yaml or hyp dictionary
368
  lr = [x['lr'] for x in optimizer.param_groups] # for loggers
369
  scheduler.step()
370
 
371
- # DDP process 0 or single-GPU
372
  if RANK in [-1, 0]:
373
  # mAP
 
374
  ema.update_attr(model, include=['yaml', 'nc', 'hyp', 'names', 'stride', 'class_weights'])
375
  final_epoch = epoch + 1 == epochs
376
  if not noval or final_epoch: # Calculate mAP
377
- wandb_logger.current_epoch = epoch + 1
378
  results, maps, _ = val.run(data_dict,
379
  batch_size=batch_size // WORLD_SIZE * 2,
380
  imgsz=imgsz,
@@ -385,29 +364,14 @@ def train(hyp, # path/to/hyp.yaml or hyp dictionary
385
  save_json=is_coco and final_epoch,
386
  verbose=nc < 50 and final_epoch,
387
  plots=plots and final_epoch,
388
- wandb_logger=wandb_logger,
389
  compute_loss=compute_loss)
390
 
391
- # Write
392
- with open(results_file, 'a') as f:
393
- f.write(s + '%10.4g' * 7 % results + '\n') # append metrics, val_loss
394
-
395
- # Log
396
- tags = ['train/box_loss', 'train/obj_loss', 'train/cls_loss', # train loss
397
- 'metrics/precision', 'metrics/recall', 'metrics/mAP_0.5', 'metrics/mAP_0.5:0.95',
398
- 'val/box_loss', 'val/obj_loss', 'val/cls_loss', # val loss
399
- 'x/lr0', 'x/lr1', 'x/lr2'] # params
400
- for x, tag in zip(list(mloss[:-1]) + list(results) + lr, tags):
401
- if loggers['tb']:
402
- loggers['tb'].add_scalar(tag, x, epoch) # TensorBoard
403
- if loggers['wandb']:
404
- wandb_logger.log({tag: x}) # W&B
405
-
406
  # Update best mAP
407
  fi = fitness(np.array(results).reshape(1, -1)) # weighted combination of [P, R, mAP@.5, mAP@.5-.95]
408
  if fi > best_fitness:
409
  best_fitness = fi
410
- wandb_logger.end_epoch(best_result=best_fitness == fi)
411
 
412
  # Save model
413
  if (not nosave) or (final_epoch and not evolve): # if save
@@ -418,16 +382,14 @@ def train(hyp, # path/to/hyp.yaml or hyp dictionary
418
  'ema': deepcopy(ema.ema).half(),
419
  'updates': ema.updates,
420
  'optimizer': optimizer.state_dict(),
421
- 'wandb_id': wandb_logger.wandb_run.id if loggers['wandb'] else None}
422
 
423
  # Save last, best and delete
424
  torch.save(ckpt, last)
425
  if best_fitness == fi:
426
  torch.save(ckpt, best)
427
- if loggers['wandb']:
428
- if ((epoch + 1) % opt.save_period == 0 and not final_epoch) and opt.save_period != -1:
429
- wandb_logger.log_model(last.parent, opt, epoch, fi, best_model=best_fitness == fi)
430
  del ckpt
 
431
 
432
  # end epoch ----------------------------------------------------------------------------------------------------
433
  # end training -----------------------------------------------------------------------------------------------------
@@ -435,10 +397,6 @@ def train(hyp, # path/to/hyp.yaml or hyp dictionary
435
  LOGGER.info(f'{epoch - start_epoch + 1} epochs completed in {(time.time() - t0) / 3600:.3f} hours.\n')
436
  if plots:
437
  plot_results(save_dir=save_dir) # save as results.png
438
- if loggers['wandb']:
439
- files = ['results.png', 'confusion_matrix.png', *[f'{x}_curve.png' for x in ('F1', 'PR', 'P', 'R')]]
440
- wandb_logger.log({"Results": [loggers['wandb'].Image(str(save_dir / f), caption=f) for f in files
441
- if (save_dir / f).exists()]})
442
 
443
  if not evolve:
444
  if is_coco: # COCO dataset
@@ -458,11 +416,8 @@ def train(hyp, # path/to/hyp.yaml or hyp dictionary
458
  for f in last, best:
459
  if f.exists():
460
  strip_optimizer(f) # strip optimizers
461
- if loggers['wandb']: # Log the stripped model
462
- loggers['wandb'].log_artifact(str(best if best.exists() else last), type='model',
463
- name='run_' + wandb_logger.wandb_run.id + '_model',
464
- aliases=['latest', 'best', 'stripped'])
465
- wandb_logger.finish_run()
466
 
467
  torch.cuda.empty_cache()
468
  return results
 
10
  import random
11
  import sys
12
  import time
 
13
  from copy import deepcopy
14
  from pathlib import Path
15
  from threading import Thread
 
23
  from torch.cuda import amp
24
  from torch.nn.parallel import DistributedDataParallel as DDP
25
  from torch.optim import Adam, SGD, lr_scheduler
 
26
  from tqdm import tqdm
27
 
28
  FILE = Path(__file__).absolute()
 
40
  from utils.loss import ComputeLoss
41
  from utils.plots import plot_images, plot_labels, plot_results, plot_evolution
42
  from utils.torch_utils import ModelEMA, select_device, intersect_dicts, torch_distributed_zero_first, de_parallel
43
+ from utils.loggers.wandb.wandb_utils import check_wandb_resume
44
  from utils.metrics import fitness
45
+ from utils.loggers import Loggers
46
 
47
  LOGGER = logging.getLogger(__name__)
48
  LOCAL_RANK = int(os.getenv('LOCAL_RANK', -1)) # https://pytorch.org/docs/stable/elastic/run.html
 
75
  with open(save_dir / 'opt.yaml', 'w') as f:
76
  yaml.safe_dump(vars(opt), f, sort_keys=False)
77
 
78
+ # Config
79
  plots = not evolve # create plots
80
  cuda = device.type != 'cpu'
81
  init_seeds(1 + RANK)
82
  with open(data) as f:
83
  data_dict = yaml.safe_load(f) # data dict
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
84
  nc = 1 if single_cls else int(data_dict['nc']) # number of classes
85
  names = ['item'] if single_cls and len(data_dict['names']) != 1 else data_dict['names'] # class names
86
  assert len(names) == nc, f'{len(names)} names found for nc={nc} dataset in {data}' # check
87
  is_coco = data.endswith('coco.yaml') and nc == 80 # COCO dataset
88
 
89
+ # Loggers
90
+ if RANK in [-1, 0]:
91
+ loggers = Loggers(save_dir, results_file, weights, opt, hyp, data_dict, LOGGER).start() # loggers dict
92
+ if loggers.wandb and resume:
93
+ weights, epochs, hyp, data_dict = opt.weights, opt.epochs, opt.hyp, loggers.wandb.data_dict
94
+
95
  # Model
96
  pretrained = weights.endswith('.pt')
97
  if pretrained:
 
336
  pbar.set_description(s)
337
 
338
  # Plot
339
+ if plots:
340
+ if ni < 3:
341
+ f = save_dir / f'train_batch{ni}.jpg' # filename
342
+ Thread(target=plot_images, args=(imgs, targets, paths, f), daemon=True).start()
343
+ loggers.on_train_batch_end(ni, model, imgs)
 
 
 
 
 
344
 
345
  # end batch ------------------------------------------------------------------------------------------------
346
 
 
348
  lr = [x['lr'] for x in optimizer.param_groups] # for loggers
349
  scheduler.step()
350
 
 
351
  if RANK in [-1, 0]:
352
  # mAP
353
+ loggers.on_train_epoch_end(epoch)
354
  ema.update_attr(model, include=['yaml', 'nc', 'hyp', 'names', 'stride', 'class_weights'])
355
  final_epoch = epoch + 1 == epochs
356
  if not noval or final_epoch: # Calculate mAP
 
357
  results, maps, _ = val.run(data_dict,
358
  batch_size=batch_size // WORLD_SIZE * 2,
359
  imgsz=imgsz,
 
364
  save_json=is_coco and final_epoch,
365
  verbose=nc < 50 and final_epoch,
366
  plots=plots and final_epoch,
367
+ loggers=loggers,
368
  compute_loss=compute_loss)
369
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
370
  # Update best mAP
371
  fi = fitness(np.array(results).reshape(1, -1)) # weighted combination of [P, R, mAP@.5, mAP@.5-.95]
372
  if fi > best_fitness:
373
  best_fitness = fi
374
+ loggers.on_train_val_end(mloss, results, lr, epoch, s, best_fitness, fi)
375
 
376
  # Save model
377
  if (not nosave) or (final_epoch and not evolve): # if save
 
382
  'ema': deepcopy(ema.ema).half(),
383
  'updates': ema.updates,
384
  'optimizer': optimizer.state_dict(),
385
+ 'wandb_id': loggers.wandb.wandb_run.id if loggers.wandb else None}
386
 
387
  # Save last, best and delete
388
  torch.save(ckpt, last)
389
  if best_fitness == fi:
390
  torch.save(ckpt, best)
 
 
 
391
  del ckpt
392
+ loggers.on_model_save(last, epoch, final_epoch, best_fitness, fi)
393
 
394
  # end epoch ----------------------------------------------------------------------------------------------------
395
  # end training -----------------------------------------------------------------------------------------------------
 
397
  LOGGER.info(f'{epoch - start_epoch + 1} epochs completed in {(time.time() - t0) / 3600:.3f} hours.\n')
398
  if plots:
399
  plot_results(save_dir=save_dir) # save as results.png
 
 
 
 
400
 
401
  if not evolve:
402
  if is_coco: # COCO dataset
 
416
  for f in last, best:
417
  if f.exists():
418
  strip_optimizer(f) # strip optimizers
419
+
420
+ loggers.on_train_end(last, best)
 
 
 
421
 
422
  torch.cuda.empty_cache()
423
  return results
utils/loggers/__init__.py ADDED
@@ -0,0 +1,129 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # YOLOv5 experiment logging utils
2
+
3
+ import warnings
4
+
5
+ import torch
6
+ from torch.utils.tensorboard import SummaryWriter
7
+
8
+ from utils.general import colorstr, emojis
9
+ from utils.loggers.wandb.wandb_utils import WandbLogger
10
+ from utils.torch_utils import de_parallel
11
+
12
+ LOGGERS = ('txt', 'tb', 'wandb') # text-file, TensorBoard, Weights & Biases
13
+
14
+ try:
15
+ import wandb
16
+
17
+ assert hasattr(wandb, '__version__') # verify package import not local dir
18
+ except (ImportError, AssertionError):
19
+ wandb = None
20
+
21
+
22
+ class Loggers():
23
+ # YOLOv5 Loggers class
24
+ def __init__(self, save_dir=None, results_file=None, weights=None, opt=None, hyp=None,
25
+ data_dict=None, logger=None, include=LOGGERS):
26
+ self.save_dir = save_dir
27
+ self.results_file = results_file
28
+ self.weights = weights
29
+ self.opt = opt
30
+ self.hyp = hyp
31
+ self.data_dict = data_dict
32
+ self.logger = logger # for printing results to console
33
+ self.include = include
34
+ for k in LOGGERS:
35
+ setattr(self, k, None) # init empty logger dictionary
36
+
37
+ def start(self):
38
+ self.txt = True # always log to txt
39
+
40
+ # Message
41
+ try:
42
+ import wandb
43
+ except ImportError:
44
+ prefix = colorstr('Weights & Biases: ')
45
+ s = f"{prefix}run 'pip install wandb' to automatically track and visualize YOLOv5 πŸš€ runs (RECOMMENDED)"
46
+ print(emojis(s))
47
+
48
+ # TensorBoard
49
+ s = self.save_dir
50
+ if 'tb' in self.include and not self.opt.evolve:
51
+ prefix = colorstr('TensorBoard: ')
52
+ self.logger.info(f"{prefix}Start with 'tensorboard --logdir {s.parent}', view at http://localhost:6006/")
53
+ self.tb = SummaryWriter(str(s))
54
+
55
+ # W&B
56
+ try:
57
+ assert 'wandb' in self.include and wandb
58
+ run_id = torch.load(self.weights).get('wandb_id') if self.opt.resume else None
59
+ self.opt.hyp = self.hyp # add hyperparameters
60
+ self.wandb = WandbLogger(self.opt, s.stem, run_id, self.data_dict)
61
+ except:
62
+ self.wandb = None
63
+
64
+ return self
65
+
66
+ def on_train_batch_end(self, ni, model, imgs):
67
+ # Callback runs on train batch end
68
+ if ni == 0:
69
+ with warnings.catch_warnings():
70
+ warnings.simplefilter('ignore') # suppress jit trace warning
71
+ self.tb.add_graph(torch.jit.trace(de_parallel(model), imgs[0:1], strict=False), [])
72
+ if self.wandb and ni == 10:
73
+ files = sorted(self.save_dir.glob('train*.jpg'))
74
+ self.wandb.log({'Mosaics': [wandb.Image(str(f), caption=f.name) for f in files if f.exists()]})
75
+
76
+ def on_train_epoch_end(self, epoch):
77
+ # Callback runs on train epoch end
78
+ if self.wandb:
79
+ self.wandb.current_epoch = epoch + 1
80
+
81
+ def on_val_batch_end(self, pred, predn, path, names, im):
82
+ # Callback runs on train batch end
83
+ if self.wandb:
84
+ self.wandb.val_one_image(pred, predn, path, names, im)
85
+
86
+ def on_val_end(self):
87
+ # Callback runs on val end
88
+ if self.wandb:
89
+ files = sorted(self.save_dir.glob('val*.jpg'))
90
+ self.wandb.log({"Validation": [wandb.Image(str(f), caption=f.name) for f in files]})
91
+
92
+ def on_train_val_end(self, mloss, results, lr, epoch, s, best_fitness, fi):
93
+ # Callback runs on validation end during training
94
+ vals = list(mloss[:-1]) + list(results) + lr
95
+ tags = ['train/box_loss', 'train/obj_loss', 'train/cls_loss', # train loss
96
+ 'metrics/precision', 'metrics/recall', 'metrics/mAP_0.5', 'metrics/mAP_0.5:0.95',
97
+ 'val/box_loss', 'val/obj_loss', 'val/cls_loss', # val loss
98
+ 'x/lr0', 'x/lr1', 'x/lr2'] # params
99
+ if self.txt:
100
+ with open(self.results_file, 'a') as f:
101
+ f.write(s + '%10.4g' * 7 % results + '\n') # append metrics, val_loss
102
+ if self.tb:
103
+ for x, tag in zip(vals, tags):
104
+ self.tb.add_scalar(tag, x, epoch) # TensorBoard
105
+ if self.wandb:
106
+ self.wandb.log({k: v for k, v in zip(tags, vals)})
107
+ self.wandb.end_epoch(best_result=best_fitness == fi)
108
+
109
+ def on_model_save(self, last, epoch, final_epoch, best_fitness, fi):
110
+ # Callback runs on model save event
111
+ if self.wandb:
112
+ if ((epoch + 1) % self.opt.save_period == 0 and not final_epoch) and self.opt.save_period != -1:
113
+ self.wandb.log_model(last.parent, self.opt, epoch, fi, best_model=best_fitness == fi)
114
+
115
+ def on_train_end(self, last, best):
116
+ # Callback runs on training end
117
+ files = ['results.png', 'confusion_matrix.png', *[f'{x}_curve.png' for x in ('F1', 'PR', 'P', 'R')]]
118
+ files = [(self.save_dir / f) for f in files if (self.save_dir / f).exists()] # filter
119
+ if self.wandb:
120
+ wandb.log({"Results": [wandb.Image(str(f), caption=f.name) for f in files]})
121
+ wandb.log_artifact(str(best if best.exists() else last), type='model',
122
+ name='run_' + self.wandb.wandb_run.id + '_model',
123
+ aliases=['latest', 'best', 'stripped'])
124
+ self.wandb.finish_run()
125
+
126
+ def log_images(self, paths):
127
+ # Log images
128
+ if self.wandb:
129
+ self.wandb.log({"Labels": [wandb.Image(str(x), caption=x.name) for x in paths]})
utils/{wandb_logging β†’ loggers/wandb}/__init__.py RENAMED
File without changes
utils/{wandb_logging β†’ loggers/wandb}/log_dataset.py RENAMED
File without changes
utils/{wandb_logging β†’ loggers/wandb}/sweep.py RENAMED
@@ -1,12 +1,12 @@
1
  import sys
2
  from pathlib import Path
 
3
  import wandb
4
 
5
  FILE = Path(__file__).absolute()
6
  sys.path.append(FILE.parents[2].as_posix()) # add utils/ to path
7
 
8
  from train import train, parse_opt
9
- import test
10
  from utils.general import increment_path
11
  from utils.torch_utils import select_device
12
 
 
1
  import sys
2
  from pathlib import Path
3
+
4
  import wandb
5
 
6
  FILE = Path(__file__).absolute()
7
  sys.path.append(FILE.parents[2].as_posix()) # add utils/ to path
8
 
9
  from train import train, parse_opt
 
10
  from utils.general import increment_path
11
  from utils.torch_utils import select_device
12
 
utils/{wandb_logging β†’ loggers/wandb}/sweep.yaml RENAMED
@@ -14,7 +14,7 @@
14
  # You can use grid, bayesian and hyperopt search strategy
15
  # For more info on configuring sweeps visit - https://docs.wandb.ai/guides/sweeps/configuration
16
 
17
- program: utils/wandb_logging/sweep.py
18
  method: random
19
  metric:
20
  name: metrics/mAP_0.5
 
14
  # You can use grid, bayesian and hyperopt search strategy
15
  # For more info on configuring sweeps visit - https://docs.wandb.ai/guides/sweeps/configuration
16
 
17
+ program: utils/loggers/wandb/sweep.py
18
  method: random
19
  metric:
20
  name: metrics/mAP_0.5
utils/{wandb_logging β†’ loggers/wandb}/wandb_utils.py RENAMED
@@ -1,4 +1,5 @@
1
  """Utilities and tools for tracking runs with Weights & Biases."""
 
2
  import logging
3
  import os
4
  import sys
@@ -8,15 +9,18 @@ from pathlib import Path
8
  import yaml
9
  from tqdm import tqdm
10
 
11
- sys.path.append(str(Path(__file__).parent.parent.parent)) # add utils/ to path
 
 
12
  from utils.datasets import LoadImagesAndLabels
13
  from utils.datasets import img2label_paths
14
- from utils.general import colorstr, check_dataset, check_file
15
 
16
  try:
17
  import wandb
18
- from wandb import init, finish
19
- except ImportError:
 
20
  wandb = None
21
 
22
  RANK = int(os.getenv('RANK', -1))
@@ -106,7 +110,7 @@ class WandbLogger():
106
  self.data_dict = data_dict
107
  self.bbox_media_panel_images = []
108
  self.val_table_path_map = None
109
- self.max_imgs_to_log = 16
110
  # It's more elegant to stick to 1 wandb.init call, but useful config data is overwritten in the WandbLogger's wandb.init call
111
  if isinstance(opt.resume, str): # checks resume from artifact
112
  if opt.resume.startswith(WANDB_ARTIFACT_PREFIX):
@@ -134,13 +138,11 @@ class WandbLogger():
134
  if not opt.resume:
135
  wandb_data_dict = self.check_and_upload_dataset(opt) if opt.upload_dataset else data_dict
136
  # Info useful for resuming from artifacts
137
- self.wandb_run.config.update({'opt': vars(opt), 'data_dict': wandb_data_dict}, allow_val_change=True)
 
138
  self.data_dict = self.setup_training(opt, data_dict)
139
  if self.job_type == 'Dataset Creation':
140
  self.data_dict = self.check_and_upload_dataset(opt)
141
- else:
142
- prefix = colorstr('wandb: ')
143
- print(f"{prefix}Install Weights & Biases for YOLOv5 logging with 'pip install wandb' (recommended)")
144
 
145
  def check_and_upload_dataset(self, opt):
146
  assert wandb, 'Install wandb to upload dataset'
@@ -169,7 +171,7 @@ class WandbLogger():
169
  opt.artifact_alias)
170
  self.val_artifact_path, self.val_artifact = self.download_dataset_artifact(data_dict.get('val'),
171
  opt.artifact_alias)
172
-
173
  if self.train_artifact_path is not None:
174
  train_path = Path(self.train_artifact_path) / 'data/images/'
175
  data_dict['train'] = str(train_path)
@@ -177,7 +179,6 @@ class WandbLogger():
177
  val_path = Path(self.val_artifact_path) / 'data/images/'
178
  data_dict['val'] = str(val_path)
179
 
180
-
181
  if self.val_artifact is not None:
182
  self.result_artifact = wandb.Artifact("run_" + wandb.run.id + "_progress", "evaluation")
183
  self.result_table = wandb.Table(["epoch", "id", "ground truth", "prediction", "avg_confidence"])
@@ -315,9 +316,9 @@ class WandbLogger():
315
  )
316
 
317
  def val_one_image(self, pred, predn, path, names, im):
318
- if self.val_table and self.result_table: # Log Table if Val dataset is uploaded as artifact
319
  self.log_training_progress(predn, path, names)
320
- else: # Default to bbox media panelif Val artifact not found
321
  if len(self.bbox_media_panel_images) < self.max_imgs_to_log and self.current_epoch > 0:
322
  if self.current_epoch % self.bbox_interval == 0:
323
  box_data = [{"position": {"minX": xyxy[0], "minY": xyxy[1], "maxX": xyxy[2], "maxY": xyxy[3]},
@@ -328,7 +329,6 @@ class WandbLogger():
328
  boxes = {"predictions": {"box_data": box_data, "class_labels": names}} # inference-space
329
  self.bbox_media_panel_images.append(wandb.Image(im, boxes=boxes, caption=path.name))
330
 
331
-
332
  def log(self, log_dict):
333
  if self.wandb_run:
334
  for key, value in log_dict.items():
 
1
  """Utilities and tools for tracking runs with Weights & Biases."""
2
+
3
  import logging
4
  import os
5
  import sys
 
9
  import yaml
10
  from tqdm import tqdm
11
 
12
+ FILE = Path(__file__).absolute()
13
+ sys.path.append(FILE.parents[3].as_posix()) # add yolov5/ to path
14
+
15
  from utils.datasets import LoadImagesAndLabels
16
  from utils.datasets import img2label_paths
17
+ from utils.general import check_dataset, check_file
18
 
19
  try:
20
  import wandb
21
+
22
+ assert hasattr(wandb, '__version__') # verify package import not local dir
23
+ except (ImportError, AssertionError):
24
  wandb = None
25
 
26
  RANK = int(os.getenv('RANK', -1))
 
110
  self.data_dict = data_dict
111
  self.bbox_media_panel_images = []
112
  self.val_table_path_map = None
113
+ self.max_imgs_to_log = 16
114
  # It's more elegant to stick to 1 wandb.init call, but useful config data is overwritten in the WandbLogger's wandb.init call
115
  if isinstance(opt.resume, str): # checks resume from artifact
116
  if opt.resume.startswith(WANDB_ARTIFACT_PREFIX):
 
138
  if not opt.resume:
139
  wandb_data_dict = self.check_and_upload_dataset(opt) if opt.upload_dataset else data_dict
140
  # Info useful for resuming from artifacts
141
+ self.wandb_run.config.update({'opt': vars(opt), 'data_dict': wandb_data_dict},
142
+ allow_val_change=True)
143
  self.data_dict = self.setup_training(opt, data_dict)
144
  if self.job_type == 'Dataset Creation':
145
  self.data_dict = self.check_and_upload_dataset(opt)
 
 
 
146
 
147
  def check_and_upload_dataset(self, opt):
148
  assert wandb, 'Install wandb to upload dataset'
 
171
  opt.artifact_alias)
172
  self.val_artifact_path, self.val_artifact = self.download_dataset_artifact(data_dict.get('val'),
173
  opt.artifact_alias)
174
+
175
  if self.train_artifact_path is not None:
176
  train_path = Path(self.train_artifact_path) / 'data/images/'
177
  data_dict['train'] = str(train_path)
 
179
  val_path = Path(self.val_artifact_path) / 'data/images/'
180
  data_dict['val'] = str(val_path)
181
 
 
182
  if self.val_artifact is not None:
183
  self.result_artifact = wandb.Artifact("run_" + wandb.run.id + "_progress", "evaluation")
184
  self.result_table = wandb.Table(["epoch", "id", "ground truth", "prediction", "avg_confidence"])
 
316
  )
317
 
318
  def val_one_image(self, pred, predn, path, names, im):
319
+ if self.val_table and self.result_table: # Log Table if Val dataset is uploaded as artifact
320
  self.log_training_progress(predn, path, names)
321
+ else: # Default to bbox media panelif Val artifact not found
322
  if len(self.bbox_media_panel_images) < self.max_imgs_to_log and self.current_epoch > 0:
323
  if self.current_epoch % self.bbox_interval == 0:
324
  box_data = [{"position": {"minX": xyxy[0], "minY": xyxy[1], "maxX": xyxy[2], "maxY": xyxy[3]},
 
329
  boxes = {"predictions": {"box_data": box_data, "class_labels": names}} # inference-space
330
  self.bbox_media_panel_images.append(wandb.Image(im, boxes=boxes, caption=path.name))
331
 
 
332
  def log(self, log_dict):
333
  if self.wandb_run:
334
  for key, value in log_dict.items():
utils/plots.py CHANGED
@@ -327,9 +327,8 @@ def plot_labels(labels, names=(), save_dir=Path(''), loggers=None):
327
  plt.close()
328
 
329
  # loggers
330
- for k, v in loggers.items() or {}:
331
- if k == 'wandb' and v:
332
- v.log({"Labels": [v.Image(str(x), caption=x.name) for x in save_dir.glob('*labels*.jpg')]}, commit=False)
333
 
334
 
335
  def plot_evolution(yaml_file='data/hyp.finetune.yaml'): # from utils.plots import *; plot_evolution()
 
327
  plt.close()
328
 
329
  # loggers
330
+ if loggers:
331
+ loggers.log_images(save_dir.glob('*labels*.jpg'))
 
332
 
333
 
334
  def plot_evolution(yaml_file='data/hyp.finetune.yaml'): # from utils.plots import *; plot_evolution()
val.py CHANGED
@@ -26,6 +26,7 @@ from utils.general import coco80_to_coco91_class, check_dataset, check_file, che
26
  from utils.metrics import ap_per_class, ConfusionMatrix
27
  from utils.plots import plot_images, output_to_target, plot_study_txt
28
  from utils.torch_utils import select_device, time_sync
 
29
 
30
 
31
  def save_one_txt(predn, save_conf, shape, file):
@@ -97,7 +98,7 @@ def run(data,
97
  dataloader=None,
98
  save_dir=Path(''),
99
  plots=True,
100
- wandb_logger=None,
101
  compute_loss=None,
102
  ):
103
  # Initialize/load model and set device
@@ -215,8 +216,7 @@ def run(data,
215
  save_one_txt(predn, save_conf, shape, file=save_dir / 'labels' / (path.stem + '.txt'))
216
  if save_json:
217
  save_one_json(predn, jdict, path, class_map) # append to COCO-JSON dictionary
218
- if wandb_logger and wandb_logger.wandb_run:
219
- wandb_logger.val_one_image(pred, predn, path, names, img[si])
220
 
221
  # Plot images
222
  if plots and batch_i < 3:
@@ -253,9 +253,7 @@ def run(data,
253
  # Plots
254
  if plots:
255
  confusion_matrix.plot(save_dir=save_dir, names=list(names.values()))
256
- if wandb_logger and wandb_logger.wandb:
257
- val_batches = [wandb_logger.wandb.Image(str(f), caption=f.name) for f in sorted(save_dir.glob('val*.jpg'))]
258
- wandb_logger.log({"Validation": val_batches})
259
 
260
  # Save JSON
261
  if save_json and len(jdict):
 
26
  from utils.metrics import ap_per_class, ConfusionMatrix
27
  from utils.plots import plot_images, output_to_target, plot_study_txt
28
  from utils.torch_utils import select_device, time_sync
29
+ from utils.loggers import Loggers
30
 
31
 
32
  def save_one_txt(predn, save_conf, shape, file):
 
98
  dataloader=None,
99
  save_dir=Path(''),
100
  plots=True,
101
+ loggers=Loggers(),
102
  compute_loss=None,
103
  ):
104
  # Initialize/load model and set device
 
216
  save_one_txt(predn, save_conf, shape, file=save_dir / 'labels' / (path.stem + '.txt'))
217
  if save_json:
218
  save_one_json(predn, jdict, path, class_map) # append to COCO-JSON dictionary
219
+ loggers.on_val_batch_end(pred, predn, path, names, img[si])
 
220
 
221
  # Plot images
222
  if plots and batch_i < 3:
 
253
  # Plots
254
  if plots:
255
  confusion_matrix.plot(save_dir=save_dir, names=list(names.values()))
256
+ loggers.on_val_end()
 
 
257
 
258
  # Save JSON
259
  if save_json and len(jdict):