Refactor and `loggers` (#4137)
Browse files* Update loggers
* Config
* Update
* cleanup
* fix1
* fix2
* fix3 and reformat
* format
* Logger() class
* cleanup
* cleanup2
* wandb package import fix
* wandb package import fix2
* txt fix
* fix4
* fix5
* fix6
* drop wandb into utils/loggers
* fix 7
* rename loggers/wandb_logging to loggers/wandb
* Update message
* Update message
* Update message
* cleanup
* Fix x axis bug
* fix rank 0 issue
* cleanup
- +21 -66
- utils/loggers/ +129 -0
- utils/{wandb_logging β loggers/wandb}/ +0 -0
- utils/{wandb_logging β loggers/wandb}/ +0 -0
- utils/{wandb_logging β loggers/wandb}/ +1 -1
- utils/{wandb_logging β loggers/wandb}/sweep.yaml +1 -1
- utils/{wandb_logging β loggers/wandb}/ +14 -14
- utils/ +2 -3
- +4 -6
@@ -10,7 +10,6 @@ import os
10 |
import random
11 |
import sys
12 |
import time
13 |
import warnings
14 |
from copy import deepcopy
15 |
from pathlib import Path
16 |
from threading import Thread
@@ -24,7 +23,6 @@ import yaml
24 |
from torch.cuda import amp
25 |
from torch.nn.parallel import DistributedDataParallel as DDP
26 |
from torch.optim import Adam, SGD, lr_scheduler
27 |
from torch.utils.tensorboard import SummaryWriter
28 |
from tqdm import tqdm
29 |
30 |
FILE = Path(__file__).absolute()
@@ -42,8 +40,9 @@ from utils.google_utils import attempt_download
42 |
from utils.loss import ComputeLoss
43 |
from utils.plots import plot_images, plot_labels, plot_results, plot_evolution
44 |
from utils.torch_utils import ModelEMA, select_device, intersect_dicts, torch_distributed_zero_first, de_parallel
45 |
from utils.
46 |
from utils.metrics import fitness
47 |
48 |
LOGGER = logging.getLogger(__name__)
49 |
LOCAL_RANK = int(os.getenv('LOCAL_RANK', -1)) #
@@ -76,37 +75,23 @@ def train(hyp, # path/to/hyp.yaml or hyp dictionary
76 |
with open(save_dir / 'opt.yaml', 'w') as f:
77 |
yaml.safe_dump(vars(opt), f, sort_keys=False)
78 |
79 |
80 |
plots = not evolve # create plots
81 |
cuda = device.type != 'cpu'
82 |
init_seeds(1 + RANK)
83 |
with open(data) as f:
84 |
data_dict = yaml.safe_load(f) # data dict
85 |
86 |
# Loggers
87 |
loggers = {'wandb': None, 'tb': None} # loggers dict
88 |
if RANK in [-1, 0]:
89 |
# TensorBoard
90 |
if plots:
91 |
prefix = colorstr('tensorboard: ')
92 |
-"{prefix}Start with 'tensorboard --logdir {opt.project}', view at http://localhost:6006/")
93 |
loggers['tb'] = SummaryWriter(str(save_dir))
94 |
95 |
# W&B
96 |
opt.hyp = hyp # add hyperparameters
97 |
run_id = torch.load(weights).get('wandb_id') if weights.endswith('.pt') and os.path.isfile(weights) else None
98 |
run_id = run_id if opt.resume else None # start fresh run if transfer learning
99 |
wandb_logger = WandbLogger(opt, save_dir.stem, run_id, data_dict)
100 |
loggers['wandb'] = wandb_logger.wandb
101 |
if loggers['wandb']:
102 |
data_dict = wandb_logger.data_dict
103 |
weights, epochs, hyp = opt.weights, opt.epochs, opt.hyp # may update values if resuming
104 |
105 |
nc = 1 if single_cls else int(data_dict['nc']) # number of classes
106 |
names = ['item'] if single_cls and len(data_dict['names']) != 1 else data_dict['names'] # class names
107 |
assert len(names) == nc, f'{len(names)} names found for nc={nc} dataset in {data}' # check
108 |
is_coco = data.endswith('coco.yaml') and nc == 80 # COCO dataset
109 |
110 |
# Model
111 |
pretrained = weights.endswith('.pt')
112 |
if pretrained:
@@ -351,16 +336,11 @@ def train(hyp, # path/to/hyp.yaml or hyp dictionary
351 |
352 |
353 |
# Plot
354 |
if plots
355 |
356 |
357 |
358 |
359 |
warnings.simplefilter('ignore') # suppress jit trace warning
360 |
loggers['tb'].add_graph(torch.jit.trace(de_parallel(model), imgs[0:1], strict=False), [])
361 |
elif plots and ni == 10 and loggers['wandb']:
362 |
wandb_logger.log({'Mosaics': [loggers['wandb'].Image(str(x), for x in
363 |
save_dir.glob('train*.jpg') if x.exists()]})
364 |
365 |
# end batch ------------------------------------------------------------------------------------------------
366 |
@@ -368,13 +348,12 @@ def train(hyp, # path/to/hyp.yaml or hyp dictionary
368 |
lr = [x['lr'] for x in optimizer.param_groups] # for loggers
369 |
370 |
371 |
# DDP process 0 or single-GPU
372 |
if RANK in [-1, 0]:
373 |
# mAP
374 |
ema.update_attr(model, include=['yaml', 'nc', 'hyp', 'names', 'stride', 'class_weights'])
375 |
final_epoch = epoch + 1 == epochs
376 |
if not noval or final_epoch: # Calculate mAP
377 |
wandb_logger.current_epoch = epoch + 1
378 |
results, maps, _ =,
379 |
batch_size=batch_size // WORLD_SIZE * 2,
380 |
@@ -385,29 +364,14 @@ def train(hyp, # path/to/hyp.yaml or hyp dictionary
385 |
save_json=is_coco and final_epoch,
386 |
verbose=nc < 50 and final_epoch,
387 |
plots=plots and final_epoch,
388 |
389 |
390 |
391 |
# Write
392 |
with open(results_file, 'a') as f:
393 |
f.write(s + '%10.4g' * 7 % results + '\n') # append metrics, val_loss
394 |
395 |
# Log
396 |
tags = ['train/box_loss', 'train/obj_loss', 'train/cls_loss', # train loss
397 |
'metrics/precision', 'metrics/recall', 'metrics/mAP_0.5', 'metrics/mAP_0.5:0.95',
398 |
'val/box_loss', 'val/obj_loss', 'val/cls_loss', # val loss
399 |
'x/lr0', 'x/lr1', 'x/lr2'] # params
400 |
for x, tag in zip(list(mloss[:-1]) + list(results) + lr, tags):
401 |
if loggers['tb']:
402 |
loggers['tb'].add_scalar(tag, x, epoch) # TensorBoard
403 |
if loggers['wandb']:
404 |
wandb_logger.log({tag: x}) # W&B
405 |
406 |
# Update best mAP
407 |
fi = fitness(np.array(results).reshape(1, -1)) # weighted combination of [P, R, mAP@.5, mAP@.5-.95]
408 |
if fi > best_fitness:
409 |
best_fitness = fi
410 |
411 |
412 |
# Save model
413 |
if (not nosave) or (final_epoch and not evolve): # if save
@@ -418,16 +382,14 @@ def train(hyp, # path/to/hyp.yaml or hyp dictionary
418 |
'ema': deepcopy(ema.ema).half(),
419 |
'updates': ema.updates,
420 |
'optimizer': optimizer.state_dict(),
421 |
422 |
423 |
# Save last, best and delete
424 |, last)
425 |
if best_fitness == fi:
426 |, best)
427 |
if loggers['wandb']:
428 |
if ((epoch + 1) % opt.save_period == 0 and not final_epoch) and opt.save_period != -1:
429 |
wandb_logger.log_model(last.parent, opt, epoch, fi, best_model=best_fitness == fi)
430 |
del ckpt
431 |
432 |
# end epoch ----------------------------------------------------------------------------------------------------
433 |
# end training -----------------------------------------------------------------------------------------------------
@@ -435,10 +397,6 @@ def train(hyp, # path/to/hyp.yaml or hyp dictionary
435 |'{epoch - start_epoch + 1} epochs completed in {(time.time() - t0) / 3600:.3f} hours.\n')
436 |
if plots:
437 |
plot_results(save_dir=save_dir) # save as results.png
438 |
if loggers['wandb']:
439 |
files = ['results.png', 'confusion_matrix.png', *[f'{x}_curve.png' for x in ('F1', 'PR', 'P', 'R')]]
440 |
wandb_logger.log({"Results": [loggers['wandb'].Image(str(save_dir / f), caption=f) for f in files
441 |
if (save_dir / f).exists()]})
442 |
443 |
if not evolve:
444 |
if is_coco: # COCO dataset
@@ -458,11 +416,8 @@ def train(hyp, # path/to/hyp.yaml or hyp dictionary
458 |
for f in last, best:
459 |
if f.exists():
460 |
strip_optimizer(f) # strip optimizers
461 |
462 |
463 |
name='run_' + + '_model',
464 |
aliases=['latest', 'best', 'stripped'])
465 |
466 |
467 |
468 |
return results
10 |
import random
11 |
import sys
12 |
import time
13 |
from copy import deepcopy
14 |
from pathlib import Path
15 |
from threading import Thread
23 |
from torch.cuda import amp
24 |
from torch.nn.parallel import DistributedDataParallel as DDP
25 |
from torch.optim import Adam, SGD, lr_scheduler
26 |
from tqdm import tqdm
27 |
28 |
FILE = Path(__file__).absolute()
40 |
from utils.loss import ComputeLoss
41 |
from utils.plots import plot_images, plot_labels, plot_results, plot_evolution
42 |
from utils.torch_utils import ModelEMA, select_device, intersect_dicts, torch_distributed_zero_first, de_parallel
43 |
from utils.loggers.wandb.wandb_utils import check_wandb_resume
44 |
from utils.metrics import fitness
45 |
from utils.loggers import Loggers
46 |
47 |
LOGGER = logging.getLogger(__name__)
48 |
LOCAL_RANK = int(os.getenv('LOCAL_RANK', -1)) #
75 |
with open(save_dir / 'opt.yaml', 'w') as f:
76 |
yaml.safe_dump(vars(opt), f, sort_keys=False)
77 |
78 |
# Config
79 |
plots = not evolve # create plots
80 |
cuda = device.type != 'cpu'
81 |
init_seeds(1 + RANK)
82 |
with open(data) as f:
83 |
data_dict = yaml.safe_load(f) # data dict
84 |
nc = 1 if single_cls else int(data_dict['nc']) # number of classes
85 |
names = ['item'] if single_cls and len(data_dict['names']) != 1 else data_dict['names'] # class names
86 |
assert len(names) == nc, f'{len(names)} names found for nc={nc} dataset in {data}' # check
87 |
is_coco = data.endswith('coco.yaml') and nc == 80 # COCO dataset
88 |
89 |
# Loggers
90 |
if RANK in [-1, 0]:
91 |
loggers = Loggers(save_dir, results_file, weights, opt, hyp, data_dict, LOGGER).start() # loggers dict
92 |
if loggers.wandb and resume:
93 |
weights, epochs, hyp, data_dict = opt.weights, opt.epochs, opt.hyp, loggers.wandb.data_dict
94 |
95 |
# Model
96 |
pretrained = weights.endswith('.pt')
97 |
if pretrained:
336 |
337 |
338 |
# Plot
339 |
if plots:
340 |
if ni < 3:
341 |
f = save_dir / f'train_batch{ni}.jpg' # filename
342 |
Thread(target=plot_images, args=(imgs, targets, paths, f), daemon=True).start()
343 |
loggers.on_train_batch_end(ni, model, imgs)
344 |
345 |
# end batch ------------------------------------------------------------------------------------------------
346 |
348 |
lr = [x['lr'] for x in optimizer.param_groups] # for loggers
349 |
350 |
351 |
if RANK in [-1, 0]:
352 |
# mAP
353 |
354 |
ema.update_attr(model, include=['yaml', 'nc', 'hyp', 'names', 'stride', 'class_weights'])
355 |
final_epoch = epoch + 1 == epochs
356 |
if not noval or final_epoch: # Calculate mAP
357 |
results, maps, _ =,
358 |
batch_size=batch_size // WORLD_SIZE * 2,
359 |
364 |
save_json=is_coco and final_epoch,
365 |
verbose=nc < 50 and final_epoch,
366 |
plots=plots and final_epoch,
367 |
368 |
369 |
370 |
# Update best mAP
371 |
fi = fitness(np.array(results).reshape(1, -1)) # weighted combination of [P, R, mAP@.5, mAP@.5-.95]
372 |
if fi > best_fitness:
373 |
best_fitness = fi
374 |
loggers.on_train_val_end(mloss, results, lr, epoch, s, best_fitness, fi)
375 |
376 |
# Save model
377 |
if (not nosave) or (final_epoch and not evolve): # if save
382 |
'ema': deepcopy(ema.ema).half(),
383 |
'updates': ema.updates,
384 |
'optimizer': optimizer.state_dict(),
385 |
'wandb_id': if loggers.wandb else None}
386 |
387 |
# Save last, best and delete
388 |, last)
389 |
if best_fitness == fi:
390 |, best)
391 |
del ckpt
392 |
loggers.on_model_save(last, epoch, final_epoch, best_fitness, fi)
393 |
394 |
# end epoch ----------------------------------------------------------------------------------------------------
395 |
# end training -----------------------------------------------------------------------------------------------------
397 |'{epoch - start_epoch + 1} epochs completed in {(time.time() - t0) / 3600:.3f} hours.\n')
398 |
if plots:
399 |
plot_results(save_dir=save_dir) # save as results.png
400 |
401 |
if not evolve:
402 |
if is_coco: # COCO dataset
416 |
for f in last, best:
417 |
if f.exists():
418 |
strip_optimizer(f) # strip optimizers
419 |
420 |
loggers.on_train_end(last, best)
421 |
422 |
423 |
return results
@@ -0,0 +1,129 @@
1 |
# YOLOv5 experiment logging utils
2 |
3 |
import warnings
4 |
5 |
import torch
6 |
from torch.utils.tensorboard import SummaryWriter
7 |
8 |
from utils.general import colorstr, emojis
9 |
from utils.loggers.wandb.wandb_utils import WandbLogger
10 |
from utils.torch_utils import de_parallel
11 |
12 |
LOGGERS = ('txt', 'tb', 'wandb') # text-file, TensorBoard, Weights & Biases
13 |
14 |
15 |
import wandb
16 |
17 |
assert hasattr(wandb, '__version__') # verify package import not local dir
18 |
except (ImportError, AssertionError):
19 |
wandb = None
20 |
21 |
22 |
class Loggers():
23 |
# YOLOv5 Loggers class
24 |
def __init__(self, save_dir=None, results_file=None, weights=None, opt=None, hyp=None,
25 |
data_dict=None, logger=None, include=LOGGERS):
26 |
self.save_dir = save_dir
27 |
self.results_file = results_file
28 |
self.weights = weights
29 |
self.opt = opt
30 |
self.hyp = hyp
31 |
self.data_dict = data_dict
32 |
self.logger = logger # for printing results to console
33 |
self.include = include
34 |
for k in LOGGERS:
35 |
setattr(self, k, None) # init empty logger dictionary
36 |
37 |
def start(self):
38 |
self.txt = True # always log to txt
39 |
40 |
# Message
41 |
42 |
import wandb
43 |
except ImportError:
44 |
prefix = colorstr('Weights & Biases: ')
45 |
s = f"{prefix}run 'pip install wandb' to automatically track and visualize YOLOv5 π runs (RECOMMENDED)"
46 |
47 |
48 |
# TensorBoard
49 |
s = self.save_dir
50 |
if 'tb' in self.include and not self.opt.evolve:
51 |
prefix = colorstr('TensorBoard: ')
52 |
+"{prefix}Start with 'tensorboard --logdir {s.parent}', view at http://localhost:6006/")
53 |
self.tb = SummaryWriter(str(s))
54 |
55 |
# W&B
56 |
57 |
assert 'wandb' in self.include and wandb
58 |
run_id = torch.load(self.weights).get('wandb_id') if self.opt.resume else None
59 |
self.opt.hyp = self.hyp # add hyperparameters
60 |
self.wandb = WandbLogger(self.opt, s.stem, run_id, self.data_dict)
61 |
62 |
self.wandb = None
63 |
64 |
return self
65 |
66 |
def on_train_batch_end(self, ni, model, imgs):
67 |
# Callback runs on train batch end
68 |
if ni == 0:
69 |
with warnings.catch_warnings():
70 |
warnings.simplefilter('ignore') # suppress jit trace warning
71 |
self.tb.add_graph(torch.jit.trace(de_parallel(model), imgs[0:1], strict=False), [])
72 |
if self.wandb and ni == 10:
73 |
files = sorted(self.save_dir.glob('train*.jpg'))
74 |
self.wandb.log({'Mosaics': [wandb.Image(str(f), for f in files if f.exists()]})
75 |
76 |
def on_train_epoch_end(self, epoch):
77 |
# Callback runs on train epoch end
78 |
if self.wandb:
79 |
self.wandb.current_epoch = epoch + 1
80 |
81 |
def on_val_batch_end(self, pred, predn, path, names, im):
82 |
# Callback runs on train batch end
83 |
if self.wandb:
84 |
self.wandb.val_one_image(pred, predn, path, names, im)
85 |
86 |
def on_val_end(self):
87 |
# Callback runs on val end
88 |
if self.wandb:
89 |
files = sorted(self.save_dir.glob('val*.jpg'))
90 |
self.wandb.log({"Validation": [wandb.Image(str(f), for f in files]})
91 |
92 |
def on_train_val_end(self, mloss, results, lr, epoch, s, best_fitness, fi):
93 |
# Callback runs on validation end during training
94 |
vals = list(mloss[:-1]) + list(results) + lr
95 |
tags = ['train/box_loss', 'train/obj_loss', 'train/cls_loss', # train loss
96 |
'metrics/precision', 'metrics/recall', 'metrics/mAP_0.5', 'metrics/mAP_0.5:0.95',
97 |
'val/box_loss', 'val/obj_loss', 'val/cls_loss', # val loss
98 |
'x/lr0', 'x/lr1', 'x/lr2'] # params
99 |
if self.txt:
100 |
with open(self.results_file, 'a') as f:
101 |
f.write(s + '%10.4g' * 7 % results + '\n') # append metrics, val_loss
102 |
if self.tb:
103 |
for x, tag in zip(vals, tags):
104 |
self.tb.add_scalar(tag, x, epoch) # TensorBoard
105 |
if self.wandb:
106 |
self.wandb.log({k: v for k, v in zip(tags, vals)})
107 |
self.wandb.end_epoch(best_result=best_fitness == fi)
108 |
109 |
def on_model_save(self, last, epoch, final_epoch, best_fitness, fi):
110 |
# Callback runs on model save event
111 |
if self.wandb:
112 |
if ((epoch + 1) % self.opt.save_period == 0 and not final_epoch) and self.opt.save_period != -1:
113 |
self.wandb.log_model(last.parent, self.opt, epoch, fi, best_model=best_fitness == fi)
114 |
115 |
def on_train_end(self, last, best):
116 |
# Callback runs on training end
117 |
files = ['results.png', 'confusion_matrix.png', *[f'{x}_curve.png' for x in ('F1', 'PR', 'P', 'R')]]
118 |
files = [(self.save_dir / f) for f in files if (self.save_dir / f).exists()] # filter
119 |
if self.wandb:
120 |
wandb.log({"Results": [wandb.Image(str(f), for f in files]})
121 |
wandb.log_artifact(str(best if best.exists() else last), type='model',
122 |
name='run_' + + '_model',
123 |
aliases=['latest', 'best', 'stripped'])
124 |
125 |
126 |
def log_images(self, paths):
127 |
# Log images
128 |
if self.wandb:
129 |
self.wandb.log({"Labels": [wandb.Image(str(x), for x in paths]})
utils/{wandb_logging β loggers/wandb}/
File without changes
utils/{wandb_logging β loggers/wandb}/
File without changes
utils/{wandb_logging β loggers/wandb}/
@@ -1,12 +1,12 @@
1 |
import sys
2 |
from pathlib import Path
3 |
import wandb
4 |
5 |
FILE = Path(__file__).absolute()
6 |
sys.path.append(FILE.parents[2].as_posix()) # add utils/ to path
7 |
8 |
from train import train, parse_opt
9 |
import test
10 |
from utils.general import increment_path
11 |
from utils.torch_utils import select_device
12 |
1 |
import sys
2 |
from pathlib import Path
3 |
4 |
import wandb
5 |
6 |
FILE = Path(__file__).absolute()
7 |
sys.path.append(FILE.parents[2].as_posix()) # add utils/ to path
8 |
9 |
from train import train, parse_opt
10 |
from utils.general import increment_path
11 |
from utils.torch_utils import select_device
12 |
utils/{wandb_logging β loggers/wandb}/sweep.yaml
@@ -14,7 +14,7 @@
14 |
# You can use grid, bayesian and hyperopt search strategy
15 |
# For more info on configuring sweeps visit -
16 |
17 |
program: utils/
18 |
method: random
19 |
20 |
name: metrics/mAP_0.5
14 |
# You can use grid, bayesian and hyperopt search strategy
15 |
# For more info on configuring sweeps visit -
16 |
17 |
program: utils/loggers/wandb/
18 |
method: random
19 |
20 |
name: metrics/mAP_0.5
utils/{wandb_logging β loggers/wandb}/
@@ -1,4 +1,5 @@
1 |
"""Utilities and tools for tracking runs with Weights & Biases."""
2 |
import logging
3 |
import os
4 |
import sys
@@ -8,15 +9,18 @@ from pathlib import Path
8 |
import yaml
9 |
from tqdm import tqdm
10 |
11 |
12 |
from utils.datasets import LoadImagesAndLabels
13 |
from utils.datasets import img2label_paths
14 |
from utils.general import
15 |
16 |
17 |
import wandb
18 |
19 |
20 |
wandb = None
21 |
22 |
RANK = int(os.getenv('RANK', -1))
@@ -106,7 +110,7 @@ class WandbLogger():
106 |
self.data_dict = data_dict
107 |
self.bbox_media_panel_images = []
108 |
self.val_table_path_map = None
109 |
self.max_imgs_to_log = 16
110 |
# It's more elegant to stick to 1 wandb.init call, but useful config data is overwritten in the WandbLogger's wandb.init call
111 |
if isinstance(opt.resume, str): # checks resume from artifact
112 |
if opt.resume.startswith(WANDB_ARTIFACT_PREFIX):
@@ -134,13 +138,11 @@ class WandbLogger():
134 |
if not opt.resume:
135 |
wandb_data_dict = self.check_and_upload_dataset(opt) if opt.upload_dataset else data_dict
136 |
# Info useful for resuming from artifacts
137 |
self.wandb_run.config.update({'opt': vars(opt), 'data_dict': wandb_data_dict},
138 |
self.data_dict = self.setup_training(opt, data_dict)
139 |
if self.job_type == 'Dataset Creation':
140 |
self.data_dict = self.check_and_upload_dataset(opt)
141 |
142 |
prefix = colorstr('wandb: ')
143 |
print(f"{prefix}Install Weights & Biases for YOLOv5 logging with 'pip install wandb' (recommended)")
144 |
145 |
def check_and_upload_dataset(self, opt):
146 |
assert wandb, 'Install wandb to upload dataset'
@@ -169,7 +171,7 @@ class WandbLogger():
169 |
170 |
self.val_artifact_path, self.val_artifact = self.download_dataset_artifact(data_dict.get('val'),
171 |
172 |
173 |
if self.train_artifact_path is not None:
174 |
train_path = Path(self.train_artifact_path) / 'data/images/'
175 |
data_dict['train'] = str(train_path)
@@ -177,7 +179,6 @@ class WandbLogger():
177 |
val_path = Path(self.val_artifact_path) / 'data/images/'
178 |
data_dict['val'] = str(val_path)
179 |
180 |
181 |
if self.val_artifact is not None:
182 |
self.result_artifact = wandb.Artifact("run_" + + "_progress", "evaluation")
183 |
self.result_table = wandb.Table(["epoch", "id", "ground truth", "prediction", "avg_confidence"])
@@ -315,9 +316,9 @@ class WandbLogger():
315 |
316 |
317 |
def val_one_image(self, pred, predn, path, names, im):
318 |
if self.val_table and self.result_table:
319 |
self.log_training_progress(predn, path, names)
320 |
321 |
if len(self.bbox_media_panel_images) < self.max_imgs_to_log and self.current_epoch > 0:
322 |
if self.current_epoch % self.bbox_interval == 0:
323 |
box_data = [{"position": {"minX": xyxy[0], "minY": xyxy[1], "maxX": xyxy[2], "maxY": xyxy[3]},
@@ -328,7 +329,6 @@ class WandbLogger():
328 |
boxes = {"predictions": {"box_data": box_data, "class_labels": names}} # inference-space
329 |
self.bbox_media_panel_images.append(wandb.Image(im, boxes=boxes,
330 |
331 |
332 |
def log(self, log_dict):
333 |
if self.wandb_run:
334 |
for key, value in log_dict.items():
1 |
"""Utilities and tools for tracking runs with Weights & Biases."""
2 |
3 |
import logging
4 |
import os
5 |
import sys
9 |
import yaml
10 |
from tqdm import tqdm
11 |
12 |
FILE = Path(__file__).absolute()
13 |
sys.path.append(FILE.parents[3].as_posix()) # add yolov5/ to path
14 |
15 |
from utils.datasets import LoadImagesAndLabels
16 |
from utils.datasets import img2label_paths
17 |
from utils.general import check_dataset, check_file
18 |
19 |
20 |
import wandb
21 |
22 |
assert hasattr(wandb, '__version__') # verify package import not local dir
23 |
except (ImportError, AssertionError):
24 |
wandb = None
25 |
26 |
RANK = int(os.getenv('RANK', -1))
110 |
self.data_dict = data_dict
111 |
self.bbox_media_panel_images = []
112 |
self.val_table_path_map = None
113 |
self.max_imgs_to_log = 16
114 |
# It's more elegant to stick to 1 wandb.init call, but useful config data is overwritten in the WandbLogger's wandb.init call
115 |
if isinstance(opt.resume, str): # checks resume from artifact
116 |
if opt.resume.startswith(WANDB_ARTIFACT_PREFIX):
138 |
if not opt.resume:
139 |
wandb_data_dict = self.check_and_upload_dataset(opt) if opt.upload_dataset else data_dict
140 |
# Info useful for resuming from artifacts
141 |
self.wandb_run.config.update({'opt': vars(opt), 'data_dict': wandb_data_dict},
142 |
143 |
self.data_dict = self.setup_training(opt, data_dict)
144 |
if self.job_type == 'Dataset Creation':
145 |
self.data_dict = self.check_and_upload_dataset(opt)
146 |
147 |
def check_and_upload_dataset(self, opt):
148 |
assert wandb, 'Install wandb to upload dataset'
171 |
172 |
self.val_artifact_path, self.val_artifact = self.download_dataset_artifact(data_dict.get('val'),
173 |
174 |
175 |
if self.train_artifact_path is not None:
176 |
train_path = Path(self.train_artifact_path) / 'data/images/'
177 |
data_dict['train'] = str(train_path)
179 |
val_path = Path(self.val_artifact_path) / 'data/images/'
180 |
data_dict['val'] = str(val_path)
181 |
182 |
if self.val_artifact is not None:
183 |
self.result_artifact = wandb.Artifact("run_" + + "_progress", "evaluation")
184 |
self.result_table = wandb.Table(["epoch", "id", "ground truth", "prediction", "avg_confidence"])
316 |
317 |
318 |
def val_one_image(self, pred, predn, path, names, im):
319 |
if self.val_table and self.result_table: # Log Table if Val dataset is uploaded as artifact
320 |
self.log_training_progress(predn, path, names)
321 |
else: # Default to bbox media panelif Val artifact not found
322 |
if len(self.bbox_media_panel_images) < self.max_imgs_to_log and self.current_epoch > 0:
323 |
if self.current_epoch % self.bbox_interval == 0:
324 |
box_data = [{"position": {"minX": xyxy[0], "minY": xyxy[1], "maxX": xyxy[2], "maxY": xyxy[3]},
329 |
boxes = {"predictions": {"box_data": box_data, "class_labels": names}} # inference-space
330 |
self.bbox_media_panel_images.append(wandb.Image(im, boxes=boxes,
331 |
332 |
def log(self, log_dict):
333 |
if self.wandb_run:
334 |
for key, value in log_dict.items():
@@ -327,9 +327,8 @@ def plot_labels(labels, names=(), save_dir=Path(''), loggers=None):
327 |
328 |
329 |
# loggers
330 |
331 |
332 |
v.log({"Labels": [v.Image(str(x), for x in save_dir.glob('*labels*.jpg')]}, commit=False)
333 |
334 |
335 |
def plot_evolution(yaml_file='data/hyp.finetune.yaml'): # from utils.plots import *; plot_evolution()
327 |
328 |
329 |
# loggers
330 |
if loggers:
331 |
332 |
333 |
334 |
def plot_evolution(yaml_file='data/hyp.finetune.yaml'): # from utils.plots import *; plot_evolution()
@@ -26,6 +26,7 @@ from utils.general import coco80_to_coco91_class, check_dataset, check_file, che
26 |
from utils.metrics import ap_per_class, ConfusionMatrix
27 |
from utils.plots import plot_images, output_to_target, plot_study_txt
28 |
from utils.torch_utils import select_device, time_sync
29 |
30 |
31 |
def save_one_txt(predn, save_conf, shape, file):
@@ -97,7 +98,7 @@ def run(data,
97 |
98 |
99 |
100 |
101 |
102 |
103 |
# Initialize/load model and set device
@@ -215,8 +216,7 @@ def run(data,
215 |
save_one_txt(predn, save_conf, shape, file=save_dir / 'labels' / (path.stem + '.txt'))
216 |
if save_json:
217 |
save_one_json(predn, jdict, path, class_map) # append to COCO-JSON dictionary
218 |
219 |
wandb_logger.val_one_image(pred, predn, path, names, img[si])
220 |
221 |
# Plot images
222 |
if plots and batch_i < 3:
@@ -253,9 +253,7 @@ def run(data,
253 |
# Plots
254 |
if plots:
255 |
confusion_matrix.plot(save_dir=save_dir, names=list(names.values()))
256 |
257 |
val_batches = [wandb_logger.wandb.Image(str(f), for f in sorted(save_dir.glob('val*.jpg'))]
258 |
wandb_logger.log({"Validation": val_batches})
259 |
260 |
# Save JSON
261 |
if save_json and len(jdict):
26 |
from utils.metrics import ap_per_class, ConfusionMatrix
27 |
from utils.plots import plot_images, output_to_target, plot_study_txt
28 |
from utils.torch_utils import select_device, time_sync
29 |
from utils.loggers import Loggers
30 |
31 |
32 |
def save_one_txt(predn, save_conf, shape, file):
98 |
99 |
100 |
101 |
102 |
103 |
104 |
# Initialize/load model and set device
216 |
save_one_txt(predn, save_conf, shape, file=save_dir / 'labels' / (path.stem + '.txt'))
217 |
if save_json:
218 |
save_one_json(predn, jdict, path, class_map) # append to COCO-JSON dictionary
219 |
loggers.on_val_batch_end(pred, predn, path, names, img[si])
220 |
221 |
# Plot images
222 |
if plots and batch_i < 3:
253 |
# Plots
254 |
if plots:
255 |
confusion_matrix.plot(save_dir=save_dir, names=list(names.values()))
256 |
257 |
258 |
# Save JSON
259 |
if save_json and len(jdict):