# ------------------------------------------------------------------------ # Modified from OFA (https://github.com/OFA-Sys/OFA) # Copyright 2022 The OFA-Sys Team. # All rights reserved. # This source code is licensed under the Apache 2.0 license # found in the LICENSE file in the root directory. # ------------------------------------------------------------------------ # Modifications Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. # SPDX-License-Identifier: Apache-2.0 import json from itertools import chain import os import torch import torch.distributed as dist import numpy as np from skimage import draw from PIL import Image from utils.vis_utils import overlay_predictions from torchvision.utils import save_image SMOOTH = 1e-6 def check_length(polygons): length = 0 for polygon in polygons: length += len(polygon) return length def eval_refcoco(task, generator, models, sample, **kwargs): def _computeIoU(pred_seg, gd_seg): I = np.sum(np.logical_and(pred_seg, gd_seg)) U = np.sum(np.logical_or(pred_seg, gd_seg)) return I, U def _calculate_ap_score(hyps, refs, thresh=0.5): interacts = torch.cat( [torch.where(hyps[:, :2] < refs[:, :2], refs[:, :2], hyps[:, :2]), torch.where(hyps[:, 2:] < refs[:, 2:], hyps[:, 2:], refs[:, 2:])], dim=1 ) area_predictions = (hyps[:, 2] - hyps[:, 0]) * (hyps[:, 3] - hyps[:, 1]) area_targets = (refs[:, 2] - refs[:, 0]) * (refs[:, 3] - refs[:, 1]) interacts_w = interacts[:, 2] - interacts[:, 0] interacts_h = interacts[:, 3] - interacts[:, 1] area_interacts = interacts_w * interacts_h ious = area_interacts / (area_predictions + area_targets - area_interacts + 1e-6) return ((ious >= thresh) & (interacts_w > 0) & (interacts_h > 0)).float() def convert_pts(coeffs): pts = [] for i in range(len(coeffs) // 2): pts.append([coeffs[2 * i + 1], coeffs[2 * i]]) # y, x return np.array(pts, np.int32) def get_mask_from_codes(codes, img_size): masks = [np.zeros(img_size)] for code in codes: if len(code) > 0: try: mask = draw.polygon2mask(img_size, convert_pts(code)) mask = np.array(mask, np.uint8) except: mask = np.zeros(img_size) masks.append(mask) mask = sum(masks) mask = mask > 0 return mask.astype(np.uint8) def _calculate_score(hyps, hyps_det, refs, sample, n_poly_pred, n_poly_gt, vis=True, vis_dir=None): if vis: os.makedirs(vis_dir, exist_ok=True) def compute_jf(pred_mask, gt_mask): I, U = _computeIoU(pred_mask, gt_mask) if U == 0: this_iou = 0.0 else: this_iou = I * 1.0 / U prec = (I + SMOOTH) / (pred_mask.sum() + SMOOTH) rec = (I + SMOOTH) / (gt_mask.sum() + SMOOTH) this_f = 2 * prec * rec / (prec + rec) return this_iou, this_f, I, U IoU = [] F_score = [] cum_I = [] cum_U = [] bboxes = hyps_det b = len(hyps) bboxes = torch.tensor(np.stack(bboxes, 0)) bboxes = bboxes.to(sample['w_resize_ratios'].device) ap_scores = _calculate_ap_score(bboxes.float(), sample['region_coords'].float()) for i in range(b): hyps_i = hyps[i] gt_mask = refs[i] pred_mask = get_mask_from_codes(hyps_i, gt_mask.shape[0:2]) this_iou, this_f, this_I, this_U = compute_jf(pred_mask, gt_mask) IoU.append(this_iou) F_score.append(this_f) cum_I.append(this_I) cum_U.append(this_U) if vis: def pre_caption(caption): import re caption = caption.lower().lstrip(",.!?*#:;~").replace('-', ' ').replace('/', ' ').replace( '', 'person') caption = re.sub( r"\s{2,}", ' ', caption, ) caption = caption.rstrip('\n') return caption gt_box = sample['region_coords'][i].cpu().numpy() pred_box = bboxes[i].cpu().numpy() pred_box[::2] *= sample['w_resize_ratios'][i].cpu().numpy() pred_box[1::2] *= sample['h_resize_ratios'][i].cpu().numpy() gt_box[::2] *= sample['w_resize_ratios'][i].cpu().numpy() gt_box[1::2] *= sample['h_resize_ratios'][i].cpu().numpy() uniq_id = sample["id"][i] text = sample["text"][i] text = pre_caption(text) img = sample["net_input"]['patch_images'][i] img = (img + 1) / 2 img_ndarray = img.permute(1, 2, 0).cpu().numpy() * 255 img_ndarray = img_ndarray.astype(np.uint8) gt_overlayed_fn = f"{uniq_id}_{text}_gt_overlayed.png" pred_overlayed_fn = f"{uniq_id}_{text}_pred_overlayed.png" pred_overlayed = overlay_predictions(img_ndarray, pred_mask, hyps_i, pred_box) gt_overlayed = overlay_predictions(img_ndarray, gt_mask, None, gt_box) pred_overlayed = Image.fromarray(pred_overlayed.astype(np.uint8)) pred_overlayed.save(os.path.join(vis_dir, pred_overlayed_fn)) gt_overlayed = Image.fromarray(gt_overlayed.astype(np.uint8)) gt_overlayed.save(os.path.join(vis_dir, gt_overlayed_fn)) img_fn = f"{uniq_id}_{text}.png" save_image(img, os.path.join(vis_dir, img_fn)) return torch.tensor(IoU), torch.tensor(F_score), ap_scores, torch.tensor(cum_I), torch.tensor(cum_U) gen_out = task.inference_step(models, sample) hyps = [] hyps_det = [] n_poly_pred = [] b = len(gen_out) poly_len = [] for i in range(b): gen_out_i = np.array(gen_out[i]) gen_out_i = gen_out_i[gen_out_i != -1] # excluding eos and padding indices gen_out_i_det = gen_out_i[:4] gen_out_i_det[::2] *= sample['w'][i].cpu().numpy() gen_out_i_det[1::2] *= sample['h'][i].cpu().numpy() polygons_pred = gen_out_i[4:] polygons_pred = np.append(polygons_pred, [2]) size = len(polygons_pred) idx_list = [idx for idx, val in enumerate(polygons_pred) if val == 2] # 2 indicates separator token polygons_pred *= task.cfg.patch_image_size # extract the sequence for each polygon polygons = [] prev_idx = 0 for idx in idx_list: cur_idx = idx if prev_idx == cur_idx or prev_idx == size: pass else: polygons.append(polygons_pred[prev_idx: cur_idx]) prev_idx = cur_idx + 1 poly_len.append(check_length(polygons)) n_poly_pred.append(len(polygons)) hyps.append(polygons) hyps_det.append(gen_out_i_det) gt = sample['label'] results = [ {"uniq_id": sample_id} for i, sample_id in enumerate(sample["id"].tolist()) ] iou_scores, f_scores, ap_scores, cum_I, cum_U = _calculate_score(hyps, hyps_det, gt, sample, n_poly_pred, sample['n_poly'], vis=kwargs['vis'], vis_dir=kwargs['vis_dir']) result_dir = kwargs['result_dir'] os.makedirs(result_dir, exist_ok=True) torch.save({"iou_scores": iou_scores, "ap_scores": ap_scores, "n_poly_pred": n_poly_pred, "n_poly_gt": sample['n_poly'], "poly_len": poly_len, "uniq_id": sample["id"]}, os.path.join(result_dir, f'{sample["id"][0]}.pt')) return results, iou_scores, f_scores, ap_scores, cum_I, cum_U def eval_step(task, generator, models, sample, **kwargs): if task.cfg._name == 'refcoco': return eval_refcoco(task, generator, models, sample, **kwargs) else: raise NotImplementedError def merge_results(task, cfg, logger, score_cnt, score_sum, f_score_sum=None, ap_det_score_sum=None, prec_score_sum=None, cum_I_sum=None, cum_U_sum=None, results=None): if task.cfg._name == 'image_gen': if cfg.distributed_training.distributed_world_size > 1: dist.all_reduce(score_sum.data) dist.all_reduce(score_cnt.data) if score_cnt.item() > 0: logger.info("score_sum: {}, score_cnt: {}, score: {}".format( score_sum, score_cnt, round(score_sum.item() / score_cnt.item(), 4) )) else: gather_results = None if cfg.distributed_training.distributed_world_size > 1: gather_results = [None for _ in range(dist.get_world_size())] dist.all_gather_object(gather_results, results) dist.all_reduce(score_sum.data) dist.all_reduce(f_score_sum.data) dist.all_reduce(cum_I_sum.data) dist.all_reduce(cum_U_sum.data) for prec_score in prec_score_sum: dist.all_reduce(prec_score.data) dist.all_reduce(ap_det_score_sum.data) dist.all_reduce(score_cnt.data) if score_cnt.item() > 0: prec_list = [.5, .6, .7, .8, .9] txt = "sample_cnt: {}, mIoU score: {}, oIoU score: {}, ap det score: {}, f score: {}, J&F: {}\n".format( score_cnt, round(score_sum.item() / score_cnt.item(), 4), round(cum_I_sum.item() / cum_U_sum.item(), 4), round(ap_det_score_sum.item() / score_cnt.item(), 4), round(f_score_sum.item() / score_cnt.item(), 4), round((f_score_sum.item() + score_sum.item()) / (2 * score_cnt.item()), 4) ) prec_txt = " ".join( [f"prec@{prec}: {round(prec_score.item() / score_cnt.item(), 4)}\n" for prec, prec_score in zip(prec_list, prec_score_sum)]) txt += prec_txt logger.info(txt) output_path = os.path.join(cfg.common_eval.results_path, "{}_result.txt".format(cfg.dataset.gen_subset)) os.makedirs(cfg.common_eval.results_path, exist_ok=True) with open(output_path, 'w') as f: f.write(txt) if cfg.distributed_training.distributed_world_size == 1 or dist.get_rank() == 0: os.makedirs(cfg.common_eval.results_path, exist_ok=True) output_path = os.path.join(cfg.common_eval.results_path, "{}_predict.json".format(cfg.dataset.gen_subset)) gather_results = list(chain(*gather_results)) if gather_results is not None else results with open(output_path, 'w') as fw: json.dump(gather_results, fw)