|
import json |
|
import os |
|
|
|
import numpy as np |
|
import pandas as pd |
|
import torch |
|
from pycocotools.coco import COCO |
|
from torchvision.ops.boxes import box_convert, box_iou |
|
from tqdm import tqdm |
|
|
|
|
|
class NpEncoder(json.JSONEncoder): |
|
"""Custom JSON encoder for NumPy data types. |
|
|
|
This encoder handles NumPy-specific types that are not serializable by |
|
the default JSON library by converting them into standard Python types. |
|
""" |
|
|
|
def default(self, obj): |
|
"""Converts NumPy objects to their native Python equivalents. |
|
|
|
Args: |
|
obj (any): The object to encode. |
|
|
|
Returns: |
|
any: The JSON-serializable representation of the object. |
|
""" |
|
if isinstance(obj, np.integer): |
|
return int(obj) |
|
elif isinstance(obj, np.floating): |
|
return float(obj) |
|
elif isinstance(obj, np.ndarray): |
|
return obj.tolist() |
|
else: |
|
return super(NpEncoder, self).default(obj) |
|
|
|
|
|
class Ensembler: |
|
"""A class to ensemble predictions from multiple object detection models. |
|
|
|
This class loads ground truth data and predictions from several models, |
|
performs non-maximum suppression (NMS) to merge overlapping detections, |
|
and saves the final ensembled results in COCO format. |
|
""" |
|
|
|
def __init__( |
|
self, output_dir, dataset_name, grplist, iou_thresh, coco_gt_path=None, coco_instances_results_fname=None |
|
): |
|
"""Initializes the Ensembler. |
|
|
|
Args: |
|
output_dir (str): The base directory where model outputs and |
|
ensembled results are stored. |
|
dataset_name (str): The name of the dataset being evaluated. |
|
grplist (list[str]): A list of subdirectory names, where each |
|
subdirectory contains the prediction file from one model. |
|
iou_thresh (float): The IoU threshold for considering two bounding |
|
boxes as overlapping during NMS. |
|
coco_gt_path (str, optional): The full path to the ground truth |
|
COCO JSON file. If None, it's assumed to be in `output_dir`. |
|
Defaults to None. |
|
coco_instances_results_fname (str, optional): The filename for the |
|
COCO prediction files within each model's subdirectory. |
|
Defaults to "coco_instances_results.json". |
|
""" |
|
self.output_dir = output_dir |
|
self.dataset_name = dataset_name |
|
self.grplist = grplist |
|
self.iou_thresh = iou_thresh |
|
self.n_detectors = len(grplist) |
|
|
|
if coco_gt_path is None: |
|
fname_gt = os.path.join(output_dir, dataset_name + "_coco_format.json") |
|
else: |
|
fname_gt = coco_gt_path |
|
|
|
if coco_instances_results_fname is None: |
|
fname_dt = "coco_instances_results.json" |
|
else: |
|
fname_dt = coco_instances_results_fname |
|
|
|
|
|
coco_gt = COCO(fname_gt) |
|
|
|
dtlist = [] |
|
for grp in grplist: |
|
fname = os.path.join(output_dir, grp, fname_dt) |
|
dtlist.append(coco_gt.loadRes(fname)) |
|
print("Successfully loaded {} into memory. {} instance detected.\n".format(fname, len(dtlist[-1].anns))) |
|
|
|
self.coco_gt = coco_gt |
|
self.cats = [cat["id"] for cat in self.coco_gt.dataset["categories"]] |
|
self.dtlist = dtlist |
|
self.results = [] |
|
|
|
print( |
|
"Working with {} models, {} categories, and {} images.".format( |
|
self.n_detectors, len(self.cats), len(self.coco_gt.imgs.keys()) |
|
) |
|
) |
|
|
|
def mean_score_nms(self): |
|
"""Performs non-maximum suppression by merging overlapping boxes. |
|
|
|
This method iterates through all images and categories, merging sets of |
|
overlapping bounding boxes from different detectors based on the IoU |
|
threshold. For each merged set, it calculates a mean score and selects |
|
the single box with the highest original score as the representative |
|
detection for the ensembled output. |
|
|
|
Returns: |
|
Ensembler: The instance itself, with the `self.results` attribute |
|
populated with the ensembled predictions. |
|
""" |
|
|
|
def nik_merge(lsts): |
|
"""Niklas B. https://github.com/rikpg/IntersectionMerge/blob/master/core.py""" |
|
sets = [set(lst) for lst in lsts if lst] |
|
merged = 1 |
|
while merged: |
|
merged = 0 |
|
results = [] |
|
while sets: |
|
common, rest = sets[0], sets[1:] |
|
sets = [] |
|
for x in rest: |
|
if x.isdisjoint(common): |
|
sets.append(x) |
|
else: |
|
merged = 1 |
|
common |= x |
|
results.append(common) |
|
sets = results |
|
return sets |
|
|
|
winning_list = [] |
|
print( |
|
"Computing mean score non-max suppression ensembling for {} images.".format(len(self.coco_gt.imgs.keys())) |
|
) |
|
for img in tqdm(self.coco_gt.imgs.keys()): |
|
|
|
dflist = [] |
|
obj_set = set() |
|
for i, coco_dt in enumerate(self.dtlist): |
|
dflist.append(pd.DataFrame(coco_dt.imgToAnns[img]).assign(det=i)) |
|
df = pd.concat(dflist, ignore_index=True) |
|
if not df.empty: |
|
for cat in self.cats: |
|
dfcat = df[df["category_id"] == cat] |
|
ts = box_convert( |
|
torch.tensor(dfcat["bbox"]), in_fmt="xywh", out_fmt="xyxy" |
|
) |
|
iou_bool = np.array((box_iou(ts, ts) > self.iou_thresh)) |
|
for i in range(len(dfcat)): |
|
fset = frozenset(dfcat.index[iou_bool[i]]) |
|
obj_set.add(fset) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
obj_set = nik_merge(obj_set) |
|
for s in obj_set: |
|
dfset = dfcat.loc[list(s)] |
|
mean_score = dfset["score"].sum() / max( |
|
self.n_detectors, len(s) |
|
) |
|
winning_box = dfset.iloc[dfset["score"].argmax()].to_dict() |
|
winning_box["score"] = mean_score |
|
winning_list.append(winning_box) |
|
print("{} resulting instances from NMS".format(len(winning_list))) |
|
self.results = winning_list |
|
return self |
|
|
|
def save_coco_instances(self, fname="coco_instances_results.json"): |
|
"""Saves the ensembled prediction results to a JSON file. |
|
|
|
The output file follows the COCO instance format and can be used for |
|
further evaluation. |
|
|
|
Args: |
|
fname (str, optional): The filename for the output JSON file. |
|
Defaults to "coco_instances_results.json". |
|
""" |
|
if self.results: |
|
with open(os.path.join(self.output_dir, fname), "w") as f: |
|
f.write(json.dumps(self.results, cls=NpEncoder)) |
|
f.flush() |
|
|
|
|
|
if __name__ == "__main__": |
|
|
|
|
|
|
|
ens = Ensembler("dev", ["fold1", "fold2", "fold3", "fold4", "fold5"], 0.2) |
|
ens.mean_score_nms() |
|
|