|
|
|
import copy |
|
import itertools |
|
import json |
|
import logging |
|
import os |
|
import pickle |
|
from collections import OrderedDict |
|
import torch |
|
|
|
import detectron2.utils.comm as comm |
|
from detectron2.config import CfgNode |
|
from detectron2.data import MetadataCatalog |
|
from detectron2.structures import Boxes, BoxMode, pairwise_iou |
|
from detectron2.utils.file_io import PathManager |
|
from detectron2.utils.logger import create_small_table |
|
|
|
from .coco_evaluation import instances_to_coco_json |
|
from .evaluator import DatasetEvaluator |
|
|
|
|
|
class LVISEvaluator(DatasetEvaluator): |
|
""" |
|
Evaluate object proposal and instance detection/segmentation outputs using |
|
LVIS's metrics and evaluation API. |
|
""" |
|
|
|
def __init__( |
|
self, |
|
dataset_name, |
|
tasks=None, |
|
distributed=True, |
|
output_dir=None, |
|
*, |
|
max_dets_per_image=None, |
|
): |
|
""" |
|
Args: |
|
dataset_name (str): name of the dataset to be evaluated. |
|
It must have the following corresponding metadata: |
|
"json_file": the path to the LVIS format annotation |
|
tasks (tuple[str]): tasks that can be evaluated under the given |
|
configuration. A task is one of "bbox", "segm". |
|
By default, will infer this automatically from predictions. |
|
distributed (True): if True, will collect results from all ranks for evaluation. |
|
Otherwise, will evaluate the results in the current process. |
|
output_dir (str): optional, an output directory to dump results. |
|
max_dets_per_image (None or int): limit on maximum detections per image in evaluating AP |
|
This limit, by default of the LVIS dataset, is 300. |
|
""" |
|
from lvis import LVIS |
|
|
|
self._logger = logging.getLogger(__name__) |
|
|
|
if tasks is not None and isinstance(tasks, CfgNode): |
|
self._logger.warn( |
|
"COCO Evaluator instantiated using config, this is deprecated behavior." |
|
" Please pass in explicit arguments instead." |
|
) |
|
self._tasks = None |
|
else: |
|
self._tasks = tasks |
|
|
|
self._distributed = distributed |
|
self._output_dir = output_dir |
|
self._max_dets_per_image = max_dets_per_image |
|
|
|
self._cpu_device = torch.device("cpu") |
|
|
|
self._metadata = MetadataCatalog.get(dataset_name) |
|
json_file = PathManager.get_local_path(self._metadata.json_file) |
|
self._lvis_api = LVIS(json_file) |
|
|
|
|
|
self._do_evaluation = len(self._lvis_api.get_ann_ids()) > 0 |
|
|
|
def reset(self): |
|
self._predictions = [] |
|
|
|
def process(self, inputs, outputs): |
|
""" |
|
Args: |
|
inputs: the inputs to a LVIS model (e.g., GeneralizedRCNN). |
|
It is a list of dict. Each dict corresponds to an image and |
|
contains keys like "height", "width", "file_name", "image_id". |
|
outputs: the outputs of a LVIS model. It is a list of dicts with key |
|
"instances" that contains :class:`Instances`. |
|
""" |
|
for input, output in zip(inputs, outputs): |
|
prediction = {"image_id": input["image_id"]} |
|
|
|
if "instances" in output: |
|
instances = output["instances"].to(self._cpu_device) |
|
prediction["instances"] = instances_to_coco_json(instances, input["image_id"]) |
|
if "proposals" in output: |
|
prediction["proposals"] = output["proposals"].to(self._cpu_device) |
|
self._predictions.append(prediction) |
|
|
|
def evaluate(self): |
|
if self._distributed: |
|
comm.synchronize() |
|
predictions = comm.gather(self._predictions, dst=0) |
|
predictions = list(itertools.chain(*predictions)) |
|
|
|
if not comm.is_main_process(): |
|
return |
|
else: |
|
predictions = self._predictions |
|
|
|
if len(predictions) == 0: |
|
self._logger.warning("[LVISEvaluator] Did not receive valid predictions.") |
|
return {} |
|
|
|
if self._output_dir: |
|
PathManager.mkdirs(self._output_dir) |
|
file_path = os.path.join(self._output_dir, "instances_predictions.pth") |
|
with PathManager.open(file_path, "wb") as f: |
|
torch.save(predictions, f) |
|
|
|
self._results = OrderedDict() |
|
if "proposals" in predictions[0]: |
|
self._eval_box_proposals(predictions) |
|
if "instances" in predictions[0]: |
|
self._eval_predictions(predictions) |
|
|
|
return copy.deepcopy(self._results) |
|
|
|
def _tasks_from_predictions(self, predictions): |
|
for pred in predictions: |
|
if "segmentation" in pred: |
|
return ("bbox", "segm") |
|
return ("bbox",) |
|
|
|
def _eval_predictions(self, predictions): |
|
""" |
|
Evaluate predictions. Fill self._results with the metrics of the tasks. |
|
|
|
Args: |
|
predictions (list[dict]): list of outputs from the model |
|
""" |
|
self._logger.info("Preparing results in the LVIS format ...") |
|
lvis_results = list(itertools.chain(*[x["instances"] for x in predictions])) |
|
tasks = self._tasks or self._tasks_from_predictions(lvis_results) |
|
|
|
|
|
|
|
if hasattr(self._metadata, "thing_dataset_id_to_contiguous_id"): |
|
reverse_id_mapping = { |
|
v: k for k, v in self._metadata.thing_dataset_id_to_contiguous_id.items() |
|
} |
|
for result in lvis_results: |
|
result["category_id"] = reverse_id_mapping[result["category_id"]] |
|
else: |
|
|
|
for result in lvis_results: |
|
result["category_id"] += 1 |
|
|
|
if self._output_dir: |
|
file_path = os.path.join(self._output_dir, "lvis_instances_results.json") |
|
self._logger.info("Saving results to {}".format(file_path)) |
|
with PathManager.open(file_path, "w") as f: |
|
f.write(json.dumps(lvis_results)) |
|
f.flush() |
|
|
|
if not self._do_evaluation: |
|
self._logger.info("Annotations are not available for evaluation.") |
|
return |
|
|
|
self._logger.info("Evaluating predictions ...") |
|
for task in sorted(tasks): |
|
res = _evaluate_predictions_on_lvis( |
|
self._lvis_api, |
|
lvis_results, |
|
task, |
|
max_dets_per_image=self._max_dets_per_image, |
|
class_names=self._metadata.get("thing_classes"), |
|
) |
|
self._results[task] = res |
|
|
|
def _eval_box_proposals(self, predictions): |
|
""" |
|
Evaluate the box proposals in predictions. |
|
Fill self._results with the metrics for "box_proposals" task. |
|
""" |
|
if self._output_dir: |
|
|
|
|
|
bbox_mode = BoxMode.XYXY_ABS.value |
|
ids, boxes, objectness_logits = [], [], [] |
|
for prediction in predictions: |
|
ids.append(prediction["image_id"]) |
|
boxes.append(prediction["proposals"].proposal_boxes.tensor.numpy()) |
|
objectness_logits.append(prediction["proposals"].objectness_logits.numpy()) |
|
|
|
proposal_data = { |
|
"boxes": boxes, |
|
"objectness_logits": objectness_logits, |
|
"ids": ids, |
|
"bbox_mode": bbox_mode, |
|
} |
|
with PathManager.open(os.path.join(self._output_dir, "box_proposals.pkl"), "wb") as f: |
|
pickle.dump(proposal_data, f) |
|
|
|
if not self._do_evaluation: |
|
self._logger.info("Annotations are not available for evaluation.") |
|
return |
|
|
|
self._logger.info("Evaluating bbox proposals ...") |
|
res = {} |
|
areas = {"all": "", "small": "s", "medium": "m", "large": "l"} |
|
for limit in [100, 1000]: |
|
for area, suffix in areas.items(): |
|
stats = _evaluate_box_proposals(predictions, self._lvis_api, area=area, limit=limit) |
|
key = "AR{}@{:d}".format(suffix, limit) |
|
res[key] = float(stats["ar"].item() * 100) |
|
self._logger.info("Proposal metrics: \n" + create_small_table(res)) |
|
self._results["box_proposals"] = res |
|
|
|
|
|
|
|
|
|
def _evaluate_box_proposals(dataset_predictions, lvis_api, thresholds=None, area="all", limit=None): |
|
""" |
|
Evaluate detection proposal recall metrics. This function is a much |
|
faster alternative to the official LVIS API recall evaluation code. However, |
|
it produces slightly different results. |
|
""" |
|
|
|
|
|
areas = { |
|
"all": 0, |
|
"small": 1, |
|
"medium": 2, |
|
"large": 3, |
|
"96-128": 4, |
|
"128-256": 5, |
|
"256-512": 6, |
|
"512-inf": 7, |
|
} |
|
area_ranges = [ |
|
[0**2, 1e5**2], |
|
[0**2, 32**2], |
|
[32**2, 96**2], |
|
[96**2, 1e5**2], |
|
[96**2, 128**2], |
|
[128**2, 256**2], |
|
[256**2, 512**2], |
|
[512**2, 1e5**2], |
|
] |
|
assert area in areas, "Unknown area range: {}".format(area) |
|
area_range = area_ranges[areas[area]] |
|
gt_overlaps = [] |
|
num_pos = 0 |
|
|
|
for prediction_dict in dataset_predictions: |
|
predictions = prediction_dict["proposals"] |
|
|
|
|
|
|
|
inds = predictions.objectness_logits.sort(descending=True)[1] |
|
predictions = predictions[inds] |
|
|
|
ann_ids = lvis_api.get_ann_ids(img_ids=[prediction_dict["image_id"]]) |
|
anno = lvis_api.load_anns(ann_ids) |
|
gt_boxes = [ |
|
BoxMode.convert(obj["bbox"], BoxMode.XYWH_ABS, BoxMode.XYXY_ABS) for obj in anno |
|
] |
|
gt_boxes = torch.as_tensor(gt_boxes).reshape(-1, 4) |
|
gt_boxes = Boxes(gt_boxes) |
|
gt_areas = torch.as_tensor([obj["area"] for obj in anno]) |
|
|
|
if len(gt_boxes) == 0 or len(predictions) == 0: |
|
continue |
|
|
|
valid_gt_inds = (gt_areas >= area_range[0]) & (gt_areas <= area_range[1]) |
|
gt_boxes = gt_boxes[valid_gt_inds] |
|
|
|
num_pos += len(gt_boxes) |
|
|
|
if len(gt_boxes) == 0: |
|
continue |
|
|
|
if limit is not None and len(predictions) > limit: |
|
predictions = predictions[:limit] |
|
|
|
overlaps = pairwise_iou(predictions.proposal_boxes, gt_boxes) |
|
|
|
_gt_overlaps = torch.zeros(len(gt_boxes)) |
|
for j in range(min(len(predictions), len(gt_boxes))): |
|
|
|
|
|
max_overlaps, argmax_overlaps = overlaps.max(dim=0) |
|
|
|
|
|
gt_ovr, gt_ind = max_overlaps.max(dim=0) |
|
assert gt_ovr >= 0 |
|
|
|
box_ind = argmax_overlaps[gt_ind] |
|
|
|
_gt_overlaps[j] = overlaps[box_ind, gt_ind] |
|
assert _gt_overlaps[j] == gt_ovr |
|
|
|
overlaps[box_ind, :] = -1 |
|
overlaps[:, gt_ind] = -1 |
|
|
|
|
|
gt_overlaps.append(_gt_overlaps) |
|
gt_overlaps = ( |
|
torch.cat(gt_overlaps, dim=0) if len(gt_overlaps) else torch.zeros(0, dtype=torch.float32) |
|
) |
|
gt_overlaps, _ = torch.sort(gt_overlaps) |
|
|
|
if thresholds is None: |
|
step = 0.05 |
|
thresholds = torch.arange(0.5, 0.95 + 1e-5, step, dtype=torch.float32) |
|
recalls = torch.zeros_like(thresholds) |
|
|
|
for i, t in enumerate(thresholds): |
|
recalls[i] = (gt_overlaps >= t).float().sum() / float(num_pos) |
|
|
|
ar = recalls.mean() |
|
return { |
|
"ar": ar, |
|
"recalls": recalls, |
|
"thresholds": thresholds, |
|
"gt_overlaps": gt_overlaps, |
|
"num_pos": num_pos, |
|
} |
|
|
|
|
|
def _evaluate_predictions_on_lvis( |
|
lvis_gt, lvis_results, iou_type, max_dets_per_image=None, class_names=None |
|
): |
|
""" |
|
Args: |
|
iou_type (str): |
|
max_dets_per_image (None or int): limit on maximum detections per image in evaluating AP |
|
This limit, by default of the LVIS dataset, is 300. |
|
class_names (None or list[str]): if provided, will use it to predict |
|
per-category AP. |
|
|
|
Returns: |
|
a dict of {metric name: score} |
|
""" |
|
metrics = { |
|
"bbox": ["AP", "AP50", "AP75", "APs", "APm", "APl", "APr", "APc", "APf"], |
|
"segm": ["AP", "AP50", "AP75", "APs", "APm", "APl", "APr", "APc", "APf"], |
|
}[iou_type] |
|
|
|
logger = logging.getLogger(__name__) |
|
|
|
if len(lvis_results) == 0: |
|
logger.warn("No predictions from the model!") |
|
return {metric: float("nan") for metric in metrics} |
|
|
|
if iou_type == "segm": |
|
lvis_results = copy.deepcopy(lvis_results) |
|
|
|
|
|
|
|
|
|
for c in lvis_results: |
|
c.pop("bbox", None) |
|
|
|
if max_dets_per_image is None: |
|
max_dets_per_image = 300 |
|
|
|
from lvis import LVISEval, LVISResults |
|
|
|
logger.info(f"Evaluating with max detections per image = {max_dets_per_image}") |
|
lvis_results = LVISResults(lvis_gt, lvis_results, max_dets=max_dets_per_image) |
|
lvis_eval = LVISEval(lvis_gt, lvis_results, iou_type) |
|
lvis_eval.run() |
|
lvis_eval.print_results() |
|
|
|
|
|
results = lvis_eval.get_results() |
|
results = {metric: float(results[metric] * 100) for metric in metrics} |
|
logger.info("Evaluation results for {}: \n".format(iou_type) + create_small_table(results)) |
|
return results |
|
|