| |
| import datetime |
| import logging |
| import time |
| from collections import OrderedDict, abc |
| from contextlib import ExitStack, contextmanager |
| from typing import List, Union |
| import torch |
| from torch import nn |
|
|
| from detectron2.utils.comm import get_world_size, is_main_process |
| from detectron2.utils.logger import log_every_n_seconds |
|
|
|
|
| class DatasetEvaluator: |
| """ |
| Base class for a dataset evaluator. |
| |
| The function :func:`inference_on_dataset` runs the model over |
| all samples in the dataset, and have a DatasetEvaluator to process the inputs/outputs. |
| |
| This class will accumulate information of the inputs/outputs (by :meth:`process`), |
| and produce evaluation results in the end (by :meth:`evaluate`). |
| """ |
|
|
| def reset(self): |
| """ |
| Preparation for a new round of evaluation. |
| Should be called before starting a round of evaluation. |
| """ |
| pass |
|
|
| def process(self, inputs, outputs): |
| """ |
| Process the pair of inputs and outputs. |
| If they contain batches, the pairs can be consumed one-by-one using `zip`: |
| |
| .. code-block:: python |
| |
| for input_, output in zip(inputs, outputs): |
| # do evaluation on single input/output pair |
| ... |
| |
| Args: |
| inputs (list): the inputs that's used to call the model. |
| outputs (list): the return value of `model(inputs)` |
| """ |
| pass |
|
|
| def evaluate(self): |
| """ |
| Evaluate/summarize the performance, after processing all input/output pairs. |
| |
| Returns: |
| dict: |
| A new evaluator class can return a dict of arbitrary format |
| as long as the user can process the results. |
| In our train_net.py, we expect the following format: |
| |
| * key: the name of the task (e.g., bbox) |
| * value: a dict of {metric name: score}, e.g.: {"AP50": 80} |
| """ |
| pass |
|
|
|
|
| class DatasetEvaluators(DatasetEvaluator): |
| """ |
| Wrapper class to combine multiple :class:`DatasetEvaluator` instances. |
| |
| This class dispatches every evaluation call to |
| all of its :class:`DatasetEvaluator`. |
| """ |
|
|
| def __init__(self, evaluators): |
| """ |
| Args: |
| evaluators (list): the evaluators to combine. |
| """ |
| super().__init__() |
| self._evaluators = evaluators |
|
|
| def reset(self): |
| for evaluator in self._evaluators: |
| evaluator.reset() |
|
|
| def process(self, inputs, outputs): |
| for evaluator in self._evaluators: |
| evaluator.process(inputs, outputs) |
|
|
| def evaluate(self): |
| results = OrderedDict() |
| for evaluator in self._evaluators: |
| result = evaluator.evaluate() |
| if is_main_process() and result is not None: |
| for k, v in result.items(): |
| assert ( |
| k not in results |
| ), "Different evaluators produce results with the same key {}".format(k) |
| results[k] = v |
| return results |
|
|
|
|
| def inference_on_dataset( |
| model, |
| data_loader, |
| evaluator: Union[DatasetEvaluator, List[DatasetEvaluator], None], |
| callbacks=None, |
| ): |
| """ |
| Run model on the data_loader and evaluate the metrics with evaluator. |
| Also benchmark the inference speed of `model.__call__` accurately. |
| The model will be used in eval mode. |
| |
| Args: |
| model (callable): a callable which takes an object from |
| `data_loader` and returns some outputs. |
| |
| If it's an nn.Module, it will be temporarily set to `eval` mode. |
| If you wish to evaluate a model in `training` mode instead, you can |
| wrap the given model and override its behavior of `.eval()` and `.train()`. |
| data_loader: an iterable object with a length. |
| The elements it generates will be the inputs to the model. |
| evaluator: the evaluator(s) to run. Use `None` if you only want to benchmark, |
| but don't want to do any evaluation. |
| callbacks (dict of callables): a dictionary of callback functions which can be |
| called at each stage of inference. |
| |
| Returns: |
| The return value of `evaluator.evaluate()` |
| """ |
| num_devices = get_world_size() |
| logger = logging.getLogger(__name__) |
| logger.info("Start inference on {} batches".format(len(data_loader))) |
|
|
| total = len(data_loader) |
| if evaluator is None: |
| |
| evaluator = DatasetEvaluators([]) |
| if isinstance(evaluator, abc.MutableSequence): |
| evaluator = DatasetEvaluators(evaluator) |
| evaluator.reset() |
|
|
| num_warmup = min(5, total - 1) |
| start_time = time.perf_counter() |
| total_data_time = 0 |
| total_compute_time = 0 |
| total_eval_time = 0 |
| with ExitStack() as stack: |
| if isinstance(model, nn.Module): |
| stack.enter_context(inference_context(model)) |
| stack.enter_context(torch.no_grad()) |
|
|
| start_data_time = time.perf_counter() |
| dict.get(callbacks or {}, "on_start", lambda: None)() |
| for idx, inputs in enumerate(data_loader): |
| total_data_time += time.perf_counter() - start_data_time |
| if idx == num_warmup: |
| start_time = time.perf_counter() |
| total_data_time = 0 |
| total_compute_time = 0 |
| total_eval_time = 0 |
|
|
| start_compute_time = time.perf_counter() |
| dict.get(callbacks or {}, "before_inference", lambda: None)() |
| outputs = model(inputs) |
| dict.get(callbacks or {}, "after_inference", lambda: None)() |
| if torch.cuda.is_available(): |
| torch.cuda.synchronize() |
| total_compute_time += time.perf_counter() - start_compute_time |
|
|
| start_eval_time = time.perf_counter() |
| evaluator.process(inputs, outputs) |
| total_eval_time += time.perf_counter() - start_eval_time |
|
|
| iters_after_start = idx + 1 - num_warmup * int(idx >= num_warmup) |
| data_seconds_per_iter = total_data_time / iters_after_start |
| compute_seconds_per_iter = total_compute_time / iters_after_start |
| eval_seconds_per_iter = total_eval_time / iters_after_start |
| total_seconds_per_iter = (time.perf_counter() - start_time) / iters_after_start |
| if idx >= num_warmup * 2 or compute_seconds_per_iter > 5: |
| eta = datetime.timedelta(seconds=int(total_seconds_per_iter * (total - idx - 1))) |
| log_every_n_seconds( |
| logging.INFO, |
| ( |
| f"Inference done {idx + 1}/{total}. " |
| f"Dataloading: {data_seconds_per_iter:.4f} s/iter. " |
| f"Inference: {compute_seconds_per_iter:.4f} s/iter. " |
| f"Eval: {eval_seconds_per_iter:.4f} s/iter. " |
| f"Total: {total_seconds_per_iter:.4f} s/iter. " |
| f"ETA={eta}" |
| ), |
| n=5, |
| ) |
| start_data_time = time.perf_counter() |
| dict.get(callbacks or {}, "on_end", lambda: None)() |
|
|
| |
| total_time = time.perf_counter() - start_time |
| total_time_str = str(datetime.timedelta(seconds=total_time)) |
| |
| logger.info( |
| "Total inference time: {} ({:.6f} s / iter per device, on {} devices)".format( |
| total_time_str, total_time / (total - num_warmup), num_devices |
| ) |
| ) |
| total_compute_time_str = str(datetime.timedelta(seconds=int(total_compute_time))) |
| logger.info( |
| "Total inference pure compute time: {} ({:.6f} s / iter per device, on {} devices)".format( |
| total_compute_time_str, total_compute_time / (total - num_warmup), num_devices |
| ) |
| ) |
|
|
| results = evaluator.evaluate() |
| |
| |
| if results is None: |
| results = {} |
| return results |
|
|
|
|
| @contextmanager |
| def inference_context(model): |
| """ |
| A context where the model is temporarily changed to eval mode, |
| and restored to previous mode afterwards. |
| |
| Args: |
| model: a torch Module |
| """ |
| training_mode = model.training |
| model.eval() |
| yield |
| model.train(training_mode) |
|
|