|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
"""Helper functions for AVA evaluation.""" |
|
|
|
from __future__ import ( |
|
absolute_import, |
|
division, |
|
print_function, |
|
unicode_literals, |
|
) |
|
import csv |
|
import logging |
|
import numpy as np |
|
import pprint |
|
import time |
|
from collections import defaultdict |
|
from fvcore.common.file_io import PathManager |
|
import timesformer.utils.distributed as du |
|
|
|
from timesformer.utils.ava_evaluation import ( |
|
object_detection_evaluation, |
|
standard_fields, |
|
) |
|
|
|
logger = logging.getLogger(__name__) |
|
|
|
|
|
def make_image_key(video_id, timestamp): |
|
"""Returns a unique identifier for a video id & timestamp.""" |
|
return "%s,%04d" % (video_id, int(timestamp)) |
|
|
|
|
|
def read_csv(csv_file, class_whitelist=None, load_score=False): |
|
"""Loads boxes and class labels from a CSV file in the AVA format. |
|
CSV file format described at https://research.google.com/ava/download.html. |
|
Args: |
|
csv_file: A file object. |
|
class_whitelist: If provided, boxes corresponding to (integer) class labels |
|
not in this set are skipped. |
|
Returns: |
|
boxes: A dictionary mapping each unique image key (string) to a list of |
|
boxes, given as coordinates [y1, x1, y2, x2]. |
|
labels: A dictionary mapping each unique image key (string) to a list of |
|
integer class lables, matching the corresponding box in `boxes`. |
|
scores: A dictionary mapping each unique image key (string) to a list of |
|
score values lables, matching the corresponding label in `labels`. If |
|
scores are not provided in the csv, then they will default to 1.0. |
|
""" |
|
boxes = defaultdict(list) |
|
labels = defaultdict(list) |
|
scores = defaultdict(list) |
|
with PathManager.open(csv_file, "r") as f: |
|
reader = csv.reader(f) |
|
for row in reader: |
|
assert len(row) in [7, 8], "Wrong number of columns: " + row |
|
image_key = make_image_key(row[0], row[1]) |
|
x1, y1, x2, y2 = [float(n) for n in row[2:6]] |
|
action_id = int(row[6]) |
|
if class_whitelist and action_id not in class_whitelist: |
|
continue |
|
score = 1.0 |
|
if load_score: |
|
score = float(row[7]) |
|
boxes[image_key].append([y1, x1, y2, x2]) |
|
labels[image_key].append(action_id) |
|
scores[image_key].append(score) |
|
return boxes, labels, scores |
|
|
|
|
|
def read_exclusions(exclusions_file): |
|
"""Reads a CSV file of excluded timestamps. |
|
Args: |
|
exclusions_file: A file object containing a csv of video-id,timestamp. |
|
Returns: |
|
A set of strings containing excluded image keys, e.g. "aaaaaaaaaaa,0904", |
|
or an empty set if exclusions file is None. |
|
""" |
|
excluded = set() |
|
if exclusions_file: |
|
with PathManager.open(exclusions_file, "r") as f: |
|
reader = csv.reader(f) |
|
for row in reader: |
|
assert len(row) == 2, "Expected only 2 columns, got: " + row |
|
excluded.add(make_image_key(row[0], row[1])) |
|
return excluded |
|
|
|
|
|
def read_labelmap(labelmap_file): |
|
"""Read label map and class ids.""" |
|
|
|
labelmap = [] |
|
class_ids = set() |
|
name = "" |
|
class_id = "" |
|
with PathManager.open(labelmap_file, "r") as f: |
|
for line in f: |
|
if line.startswith(" name:"): |
|
name = line.split('"')[1] |
|
elif line.startswith(" id:") or line.startswith(" label_id:"): |
|
class_id = int(line.strip().split(" ")[-1]) |
|
labelmap.append({"id": class_id, "name": name}) |
|
class_ids.add(class_id) |
|
return labelmap, class_ids |
|
|
|
|
|
def evaluate_ava_from_files(labelmap, groundtruth, detections, exclusions): |
|
"""Run AVA evaluation given annotation/prediction files.""" |
|
|
|
categories, class_whitelist = read_labelmap(labelmap) |
|
excluded_keys = read_exclusions(exclusions) |
|
groundtruth = read_csv(groundtruth, class_whitelist, load_score=False) |
|
detections = read_csv(detections, class_whitelist, load_score=True) |
|
run_evaluation(categories, groundtruth, detections, excluded_keys) |
|
|
|
|
|
def evaluate_ava( |
|
preds, |
|
original_boxes, |
|
metadata, |
|
excluded_keys, |
|
class_whitelist, |
|
categories, |
|
groundtruth=None, |
|
video_idx_to_name=None, |
|
name="latest", |
|
): |
|
"""Run AVA evaluation given numpy arrays.""" |
|
|
|
eval_start = time.time() |
|
|
|
detections = get_ava_eval_data( |
|
preds, |
|
original_boxes, |
|
metadata, |
|
class_whitelist, |
|
video_idx_to_name=video_idx_to_name, |
|
) |
|
|
|
logger.info("Evaluating with %d unique GT frames." % len(groundtruth[0])) |
|
logger.info( |
|
"Evaluating with %d unique detection frames" % len(detections[0]) |
|
) |
|
|
|
write_results(detections, "detections_%s.csv" % name) |
|
write_results(groundtruth, "groundtruth_%s.csv" % name) |
|
|
|
results = run_evaluation(categories, groundtruth, detections, excluded_keys) |
|
|
|
logger.info("AVA eval done in %f seconds." % (time.time() - eval_start)) |
|
return results["PascalBoxes_Precision/mAP@0.5IOU"] |
|
|
|
|
|
def run_evaluation( |
|
categories, groundtruth, detections, excluded_keys, verbose=True |
|
): |
|
"""AVA evaluation main logic.""" |
|
|
|
pascal_evaluator = object_detection_evaluation.PascalDetectionEvaluator( |
|
categories |
|
) |
|
|
|
boxes, labels, _ = groundtruth |
|
|
|
gt_keys = [] |
|
pred_keys = [] |
|
|
|
for image_key in boxes: |
|
if image_key in excluded_keys: |
|
logging.info( |
|
( |
|
"Found excluded timestamp in ground truth: %s. " |
|
"It will be ignored." |
|
), |
|
image_key, |
|
) |
|
continue |
|
pascal_evaluator.add_single_ground_truth_image_info( |
|
image_key, |
|
{ |
|
standard_fields.InputDataFields.groundtruth_boxes: np.array( |
|
boxes[image_key], dtype=float |
|
), |
|
standard_fields.InputDataFields.groundtruth_classes: np.array( |
|
labels[image_key], dtype=int |
|
), |
|
standard_fields.InputDataFields.groundtruth_difficult: np.zeros( |
|
len(boxes[image_key]), dtype=bool |
|
), |
|
}, |
|
) |
|
|
|
gt_keys.append(image_key) |
|
|
|
boxes, labels, scores = detections |
|
|
|
for image_key in boxes: |
|
if image_key in excluded_keys: |
|
logging.info( |
|
( |
|
"Found excluded timestamp in detections: %s. " |
|
"It will be ignored." |
|
), |
|
image_key, |
|
) |
|
continue |
|
pascal_evaluator.add_single_detected_image_info( |
|
image_key, |
|
{ |
|
standard_fields.DetectionResultFields.detection_boxes: np.array( |
|
boxes[image_key], dtype=float |
|
), |
|
standard_fields.DetectionResultFields.detection_classes: np.array( |
|
labels[image_key], dtype=int |
|
), |
|
standard_fields.DetectionResultFields.detection_scores: np.array( |
|
scores[image_key], dtype=float |
|
), |
|
}, |
|
) |
|
|
|
pred_keys.append(image_key) |
|
|
|
metrics = pascal_evaluator.evaluate() |
|
|
|
if du.is_master_proc(): |
|
pprint.pprint(metrics, indent=2) |
|
return metrics |
|
|
|
|
|
def get_ava_eval_data( |
|
scores, |
|
boxes, |
|
metadata, |
|
class_whitelist, |
|
verbose=False, |
|
video_idx_to_name=None, |
|
): |
|
""" |
|
Convert our data format into the data format used in official AVA |
|
evaluation. |
|
""" |
|
|
|
out_scores = defaultdict(list) |
|
out_labels = defaultdict(list) |
|
out_boxes = defaultdict(list) |
|
count = 0 |
|
for i in range(scores.shape[0]): |
|
video_idx = int(np.round(metadata[i][0])) |
|
sec = int(np.round(metadata[i][1])) |
|
|
|
video = video_idx_to_name[video_idx] |
|
|
|
key = video + "," + "%04d" % (sec) |
|
batch_box = boxes[i].tolist() |
|
|
|
batch_box = [batch_box[j] for j in [0, 2, 1, 4, 3]] |
|
|
|
one_scores = scores[i].tolist() |
|
for cls_idx, score in enumerate(one_scores): |
|
if cls_idx + 1 in class_whitelist: |
|
out_scores[key].append(score) |
|
out_labels[key].append(cls_idx + 1) |
|
out_boxes[key].append(batch_box[1:]) |
|
count += 1 |
|
|
|
return out_boxes, out_labels, out_scores |
|
|
|
|
|
def write_results(detections, filename): |
|
"""Write prediction results into official formats.""" |
|
start = time.time() |
|
|
|
boxes, labels, scores = detections |
|
with PathManager.open(filename, "w") as f: |
|
for key in boxes.keys(): |
|
for box, label, score in zip(boxes[key], labels[key], scores[key]): |
|
f.write( |
|
"%s,%.03f,%.03f,%.03f,%.03f,%d,%.04f\n" |
|
% (key, box[1], box[0], box[3], box[2], label, score) |
|
) |
|
|
|
logger.info("AVA results wrote to %s" % filename) |
|
logger.info("\ttook %d seconds." % (time.time() - start)) |
|
|