File size: 8,041 Bytes
b944fa1 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 |
# Copyright (c) OpenMMLab. All rights reserved.
import warnings
from typing import Callable, List, Optional, Union
import torch
from ..dist_utils import master_only
from .hook import HOOKS, Hook
@HOOKS.register_module()
class ProfilerHook(Hook):
"""Profiler to analyze performance during training.
PyTorch Profiler is a tool that allows the collection of the performance
metrics during the training. More details on Profiler can be found at
https://pytorch.org/docs/1.8.1/profiler.html#torch.profiler.profile
Args:
by_epoch (bool): Profile performance by epoch or by iteration.
Default: True.
profile_iters (int): Number of iterations for profiling.
If ``by_epoch=True``, profile_iters indicates that they are the
first profile_iters epochs at the beginning of the
training, otherwise it indicates the first profile_iters
iterations. Default: 1.
activities (list[str]): List of activity groups (CPU, CUDA) to use in
profiling. Default: ['cpu', 'cuda'].
schedule (dict, optional): Config of generating the callable schedule.
if schedule is None, profiler will not add step markers into the
trace and table view. Default: None.
on_trace_ready (callable, dict): Either a handler or a dict of generate
handler. Default: None.
record_shapes (bool): Save information about operator's input shapes.
Default: False.
profile_memory (bool): Track tensor memory allocation/deallocation.
Default: False.
with_stack (bool): Record source information (file and line number)
for the ops. Default: False.
with_flops (bool): Use formula to estimate the FLOPS of specific
operators (matrix multiplication and 2D convolution).
Default: False.
json_trace_path (str, optional): Exports the collected trace in Chrome
JSON format. Default: None.
Example:
>>> runner = ... # instantiate a Runner
>>> # tensorboard trace
>>> trace_config = dict(type='tb_trace', dir_name='work_dir')
>>> profiler_config = dict(on_trace_ready=trace_config)
>>> runner.register_profiler_hook(profiler_config)
>>> runner.run(data_loaders=[trainloader], workflow=[('train', 1)])
"""
def __init__(self,
by_epoch: bool = True,
profile_iters: int = 1,
activities: List[str] = ['cpu', 'cuda'],
schedule: Optional[dict] = None,
on_trace_ready: Optional[Union[Callable, dict]] = None,
record_shapes: bool = False,
profile_memory: bool = False,
with_stack: bool = False,
with_flops: bool = False,
json_trace_path: Optional[str] = None) -> None:
try:
from torch import profiler # torch version >= 1.8.1
except ImportError:
raise ImportError('profiler is the new feature of torch1.8.1, '
f'but your version is {torch.__version__}')
assert isinstance(by_epoch, bool), '``by_epoch`` should be a boolean.'
self.by_epoch = by_epoch
if profile_iters < 1:
raise ValueError('profile_iters should be greater than 0, but got '
f'{profile_iters}')
self.profile_iters = profile_iters
if not isinstance(activities, list):
raise ValueError(
f'activities should be list, but got {type(activities)}')
self.activities = []
for activity in activities:
activity = activity.lower()
if activity == 'cpu':
self.activities.append(profiler.ProfilerActivity.CPU)
elif activity == 'cuda':
self.activities.append(profiler.ProfilerActivity.CUDA)
else:
raise ValueError(
f'activity should be "cpu" or "cuda", but got {activity}')
if schedule is not None:
self.schedule = profiler.schedule(**schedule)
else:
self.schedule = None
self.on_trace_ready = on_trace_ready
self.record_shapes = record_shapes
self.profile_memory = profile_memory
self.with_stack = with_stack
self.with_flops = with_flops
self.json_trace_path = json_trace_path
@master_only
def before_run(self, runner):
if self.by_epoch and runner.max_epochs < self.profile_iters:
raise ValueError('self.profile_iters should not be greater than '
f'{runner.max_epochs}')
if not self.by_epoch and runner.max_iters < self.profile_iters:
raise ValueError('self.profile_iters should not be greater than '
f'{runner.max_iters}')
if callable(self.on_trace_ready): # handler
_on_trace_ready = self.on_trace_ready
elif isinstance(self.on_trace_ready, dict): # config of handler
trace_cfg = self.on_trace_ready.copy()
trace_type = trace_cfg.pop('type') # log_trace handler
if trace_type == 'log_trace':
def _log_handler(prof):
print(prof.key_averages().table(**trace_cfg))
_on_trace_ready = _log_handler
elif trace_type == 'tb_trace': # tensorboard_trace handler
try:
import torch_tb_profiler # noqa: F401
except ImportError:
raise ImportError('please run "pip install '
'torch-tb-profiler" to install '
'torch_tb_profiler')
_on_trace_ready = torch.profiler.tensorboard_trace_handler(
**trace_cfg)
else:
raise ValueError('trace_type should be "log_trace" or '
f'"tb_trace", but got {trace_type}')
elif self.on_trace_ready is None:
_on_trace_ready = None # type: ignore
else:
raise ValueError('on_trace_ready should be handler, dict or None, '
f'but got {type(self.on_trace_ready)}')
if runner.max_epochs > 1:
warnings.warn(f'profiler will profile {runner.max_epochs} epochs '
'instead of 1 epoch. Since profiler will slow down '
'the training, it is recommended to train 1 epoch '
'with ProfilerHook and adjust your setting according'
' to the profiler summary. During normal training '
'(epoch > 1), you may disable the ProfilerHook.')
self.profiler = torch.profiler.profile(
activities=self.activities,
schedule=self.schedule,
on_trace_ready=_on_trace_ready,
record_shapes=self.record_shapes,
profile_memory=self.profile_memory,
with_stack=self.with_stack,
with_flops=self.with_flops)
self.profiler.__enter__()
runner.logger.info('profiler is profiling...')
@master_only
def after_train_epoch(self, runner):
if self.by_epoch and runner.epoch == self.profile_iters - 1:
runner.logger.info('profiler may take a few minutes...')
self.profiler.__exit__(None, None, None)
if self.json_trace_path is not None:
self.profiler.export_chrome_trace(self.json_trace_path)
@master_only
def after_train_iter(self, runner):
self.profiler.step()
if not self.by_epoch and runner.iter == self.profile_iters - 1:
runner.logger.info('profiler may take a few minutes...')
self.profiler.__exit__(None, None, None)
if self.json_trace_path is not None:
self.profiler.export_chrome_trace(self.json_trace_path)
|