Spaces:
Running
Running
# ------------------------------------------------------------------------ | |
# Modified from OFA (https://github.com/OFA-Sys/OFA) | |
# Copyright 2022 The OFA-Sys Team. | |
# All rights reserved. | |
# This source code is licensed under the Apache 2.0 license | |
# found in the LICENSE file in the root directory. | |
# ------------------------------------------------------------------------ | |
# Modifications Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. | |
# SPDX-License-Identifier: Apache-2.0 | |
import json | |
from itertools import chain | |
import os | |
import torch | |
import torch.distributed as dist | |
import numpy as np | |
from skimage import draw | |
from PIL import Image | |
from utils.vis_utils import overlay_predictions | |
from torchvision.utils import save_image | |
SMOOTH = 1e-6 | |
def check_length(polygons): | |
length = 0 | |
for polygon in polygons: | |
length += len(polygon) | |
return length | |
def eval_refcoco(task, generator, models, sample, **kwargs): | |
def _computeIoU(pred_seg, gd_seg): | |
I = np.sum(np.logical_and(pred_seg, gd_seg)) | |
U = np.sum(np.logical_or(pred_seg, gd_seg)) | |
return I, U | |
def _calculate_ap_score(hyps, refs, thresh=0.5): | |
interacts = torch.cat( | |
[torch.where(hyps[:, :2] < refs[:, :2], refs[:, :2], hyps[:, :2]), | |
torch.where(hyps[:, 2:] < refs[:, 2:], hyps[:, 2:], refs[:, 2:])], | |
dim=1 | |
) | |
area_predictions = (hyps[:, 2] - hyps[:, 0]) * (hyps[:, 3] - hyps[:, 1]) | |
area_targets = (refs[:, 2] - refs[:, 0]) * (refs[:, 3] - refs[:, 1]) | |
interacts_w = interacts[:, 2] - interacts[:, 0] | |
interacts_h = interacts[:, 3] - interacts[:, 1] | |
area_interacts = interacts_w * interacts_h | |
ious = area_interacts / (area_predictions + area_targets - area_interacts + 1e-6) | |
return ((ious >= thresh) & (interacts_w > 0) & (interacts_h > 0)).float() | |
def convert_pts(coeffs): | |
pts = [] | |
for i in range(len(coeffs) // 2): | |
pts.append([coeffs[2 * i + 1], coeffs[2 * i]]) # y, x | |
return np.array(pts, np.int32) | |
def get_mask_from_codes(codes, img_size): | |
masks = [np.zeros(img_size)] | |
for code in codes: | |
if len(code) > 0: | |
try: | |
mask = draw.polygon2mask(img_size, convert_pts(code)) | |
mask = np.array(mask, np.uint8) | |
except: | |
mask = np.zeros(img_size) | |
masks.append(mask) | |
mask = sum(masks) | |
mask = mask > 0 | |
return mask.astype(np.uint8) | |
def _calculate_score(hyps, hyps_det, refs, sample, n_poly_pred, n_poly_gt, vis=True, vis_dir=None): | |
if vis: | |
os.makedirs(vis_dir, exist_ok=True) | |
def compute_jf(pred_mask, gt_mask): | |
I, U = _computeIoU(pred_mask, gt_mask) | |
if U == 0: | |
this_iou = 0.0 | |
else: | |
this_iou = I * 1.0 / U | |
prec = (I + SMOOTH) / (pred_mask.sum() + SMOOTH) | |
rec = (I + SMOOTH) / (gt_mask.sum() + SMOOTH) | |
this_f = 2 * prec * rec / (prec + rec) | |
return this_iou, this_f, I, U | |
IoU = [] | |
F_score = [] | |
cum_I = [] | |
cum_U = [] | |
bboxes = hyps_det | |
b = len(hyps) | |
bboxes = torch.tensor(np.stack(bboxes, 0)) | |
bboxes = bboxes.to(sample['w_resize_ratios'].device) | |
ap_scores = _calculate_ap_score(bboxes.float(), sample['region_coords'].float()) | |
for i in range(b): | |
hyps_i = hyps[i] | |
gt_mask = refs[i] | |
pred_mask = get_mask_from_codes(hyps_i, gt_mask.shape[0:2]) | |
this_iou, this_f, this_I, this_U = compute_jf(pred_mask, gt_mask) | |
IoU.append(this_iou) | |
F_score.append(this_f) | |
cum_I.append(this_I) | |
cum_U.append(this_U) | |
if vis: | |
def pre_caption(caption): | |
import re | |
caption = caption.lower().lstrip(",.!?*#:;~").replace('-', ' ').replace('/', ' ').replace( | |
'<person>', | |
'person') | |
caption = re.sub( | |
r"\s{2,}", | |
' ', | |
caption, | |
) | |
caption = caption.rstrip('\n') | |
return caption | |
gt_box = sample['region_coords'][i].cpu().numpy() | |
pred_box = bboxes[i].cpu().numpy() | |
pred_box[::2] *= sample['w_resize_ratios'][i].cpu().numpy() | |
pred_box[1::2] *= sample['h_resize_ratios'][i].cpu().numpy() | |
gt_box[::2] *= sample['w_resize_ratios'][i].cpu().numpy() | |
gt_box[1::2] *= sample['h_resize_ratios'][i].cpu().numpy() | |
uniq_id = sample["id"][i] | |
text = sample["text"][i] | |
text = pre_caption(text) | |
img = sample["net_input"]['patch_images'][i] | |
img = (img + 1) / 2 | |
img_ndarray = img.permute(1, 2, 0).cpu().numpy() * 255 | |
img_ndarray = img_ndarray.astype(np.uint8) | |
gt_overlayed_fn = f"{uniq_id}_{text}_gt_overlayed.png" | |
pred_overlayed_fn = f"{uniq_id}_{text}_pred_overlayed.png" | |
pred_overlayed = overlay_predictions(img_ndarray, pred_mask, hyps_i, pred_box) | |
gt_overlayed = overlay_predictions(img_ndarray, gt_mask, None, gt_box) | |
pred_overlayed = Image.fromarray(pred_overlayed.astype(np.uint8)) | |
pred_overlayed.save(os.path.join(vis_dir, pred_overlayed_fn)) | |
gt_overlayed = Image.fromarray(gt_overlayed.astype(np.uint8)) | |
gt_overlayed.save(os.path.join(vis_dir, gt_overlayed_fn)) | |
img_fn = f"{uniq_id}_{text}.png" | |
save_image(img, os.path.join(vis_dir, img_fn)) | |
return torch.tensor(IoU), torch.tensor(F_score), ap_scores, torch.tensor(cum_I), torch.tensor(cum_U) | |
gen_out = task.inference_step(models, sample) | |
hyps = [] | |
hyps_det = [] | |
n_poly_pred = [] | |
b = len(gen_out) | |
poly_len = [] | |
for i in range(b): | |
gen_out_i = np.array(gen_out[i]) | |
gen_out_i = gen_out_i[gen_out_i != -1] # excluding eos and padding indices | |
gen_out_i_det = gen_out_i[:4] | |
gen_out_i_det[::2] *= sample['w'][i].cpu().numpy() | |
gen_out_i_det[1::2] *= sample['h'][i].cpu().numpy() | |
polygons_pred = gen_out_i[4:] | |
polygons_pred = np.append(polygons_pred, [2]) | |
size = len(polygons_pred) | |
idx_list = [idx for idx, val in | |
enumerate(polygons_pred) if val == 2] # 2 indicates separator token | |
polygons_pred *= task.cfg.patch_image_size | |
# extract the sequence for each polygon | |
polygons = [] | |
prev_idx = 0 | |
for idx in idx_list: | |
cur_idx = idx | |
if prev_idx == cur_idx or prev_idx == size: | |
pass | |
else: | |
polygons.append(polygons_pred[prev_idx: cur_idx]) | |
prev_idx = cur_idx + 1 | |
poly_len.append(check_length(polygons)) | |
n_poly_pred.append(len(polygons)) | |
hyps.append(polygons) | |
hyps_det.append(gen_out_i_det) | |
gt = sample['label'] | |
results = [ | |
{"uniq_id": sample_id} | |
for i, sample_id in enumerate(sample["id"].tolist()) | |
] | |
iou_scores, f_scores, ap_scores, cum_I, cum_U = _calculate_score(hyps, hyps_det, gt, sample, n_poly_pred, | |
sample['n_poly'], | |
vis=kwargs['vis'], vis_dir=kwargs['vis_dir']) | |
result_dir = kwargs['result_dir'] | |
os.makedirs(result_dir, exist_ok=True) | |
torch.save({"iou_scores": iou_scores, "ap_scores": ap_scores, "n_poly_pred": n_poly_pred, | |
"n_poly_gt": sample['n_poly'], "poly_len": poly_len, "uniq_id": sample["id"]}, | |
os.path.join(result_dir, f'{sample["id"][0]}.pt')) | |
return results, iou_scores, f_scores, ap_scores, cum_I, cum_U | |
def eval_step(task, generator, models, sample, **kwargs): | |
if task.cfg._name == 'refcoco': | |
return eval_refcoco(task, generator, models, sample, **kwargs) | |
else: | |
raise NotImplementedError | |
def merge_results(task, cfg, logger, score_cnt, score_sum, f_score_sum=None, ap_det_score_sum=None, prec_score_sum=None, | |
cum_I_sum=None, cum_U_sum=None, results=None): | |
if task.cfg._name == 'image_gen': | |
if cfg.distributed_training.distributed_world_size > 1: | |
dist.all_reduce(score_sum.data) | |
dist.all_reduce(score_cnt.data) | |
if score_cnt.item() > 0: | |
logger.info("score_sum: {}, score_cnt: {}, score: {}".format( | |
score_sum, score_cnt, round(score_sum.item() / score_cnt.item(), 4) | |
)) | |
else: | |
gather_results = None | |
if cfg.distributed_training.distributed_world_size > 1: | |
gather_results = [None for _ in range(dist.get_world_size())] | |
dist.all_gather_object(gather_results, results) | |
dist.all_reduce(score_sum.data) | |
dist.all_reduce(f_score_sum.data) | |
dist.all_reduce(cum_I_sum.data) | |
dist.all_reduce(cum_U_sum.data) | |
for prec_score in prec_score_sum: | |
dist.all_reduce(prec_score.data) | |
dist.all_reduce(ap_det_score_sum.data) | |
dist.all_reduce(score_cnt.data) | |
if score_cnt.item() > 0: | |
prec_list = [.5, .6, .7, .8, .9] | |
txt = "sample_cnt: {}, mIoU score: {}, oIoU score: {}, ap det score: {}, f score: {}, J&F: {}\n".format( | |
score_cnt, round(score_sum.item() / score_cnt.item(), 4), | |
round(cum_I_sum.item() / cum_U_sum.item(), 4), | |
round(ap_det_score_sum.item() / score_cnt.item(), 4), | |
round(f_score_sum.item() / score_cnt.item(), 4), | |
round((f_score_sum.item() + score_sum.item()) / (2 * score_cnt.item()), 4) | |
) | |
prec_txt = " ".join( | |
[f"prec@{prec}: {round(prec_score.item() / score_cnt.item(), 4)}\n" for prec, prec_score in | |
zip(prec_list, prec_score_sum)]) | |
txt += prec_txt | |
logger.info(txt) | |
output_path = os.path.join(cfg.common_eval.results_path, "{}_result.txt".format(cfg.dataset.gen_subset)) | |
os.makedirs(cfg.common_eval.results_path, exist_ok=True) | |
with open(output_path, 'w') as f: | |
f.write(txt) | |
if cfg.distributed_training.distributed_world_size == 1 or dist.get_rank() == 0: | |
os.makedirs(cfg.common_eval.results_path, exist_ok=True) | |
output_path = os.path.join(cfg.common_eval.results_path, "{}_predict.json".format(cfg.dataset.gen_subset)) | |
gather_results = list(chain(*gather_results)) if gather_results is not None else results | |
with open(output_path, 'w') as fw: | |
json.dump(gather_results, fw) | |