Build error
Build error
""" | |
****************** COPYRIGHT AND CONFIDENTIALITY INFORMATION ****************** | |
Copyright (c) 2018 [Thomson Licensing] | |
All Rights Reserved | |
This program contains proprietary information which is a trade secret/business \ | |
secret of [Thomson Licensing] and is protected, even if unpublished, under \ | |
applicable Copyright laws (including French droit d'auteur) and/or may be \ | |
subject to one or more patent(s). | |
Recipient is to retain this program in confidence and is not permitted to use \ | |
or make copies thereof other than as permitted in a written agreement with \ | |
[Thomson Licensing] unless otherwise expressly allowed by applicable laws or \ | |
by [Thomson Licensing] under express agreement. | |
Thomson Licensing is a company of the group TECHNICOLOR | |
******************************************************************************* | |
This scripts permits one to reproduce training and experiments of: | |
Engilberge, M., Chevallier, L., Pérez, P., & Cord, M. (2018, April). | |
Finding beans in burgers: Deep semantic-visual embedding with localization. | |
In Proceedings of CVPR (pp. 3984-3993) | |
Author: Martin Engilberge | |
""" | |
import numpy as np | |
import cv2 | |
import os | |
from scipy.misc import imresize | |
from pycocotools import mask as maskUtils | |
# ################### Functions for the pointing game evaluation ################### # | |
def regions_scale(x, y, rw, rh, h, w, org_dim, cc=None): | |
if cc is None: | |
fx = x * org_dim[0] / w | |
fy = y * org_dim[1] / h | |
srw = rw * org_dim[0] / w | |
srh = rh * org_dim[1] / h | |
else: | |
if (h > w): | |
r = float(h) / float(w) | |
sx = x * cc / w | |
sy = y * cc / w | |
srw = rw * cc / w | |
srh = rh * cc / w | |
fx = sx - (cc - org_dim[0]) / 2 | |
fy = sy - (cc * r - org_dim[1]) / 2 | |
else: | |
r = float(w) / float(h) | |
sx = x * cc / h | |
sy = y * cc / h | |
srw = rw * cc / h | |
srh = rh * cc / h | |
fy = sy - (cc - org_dim[1]) / 2 | |
fx = sx - (cc * r - org_dim[0]) / 2 | |
return fx, fy, srw, srh | |
def is_in_region(x, y, bx, by, w, h): | |
return (x > bx and x < (bx + w) and y > by and y < (by + h)) | |
def one_img_process(act_map, caps_enc, caps_ori, fc_w, regions, h, w, org_dim, nmax=180, bilinear=False, cc=None, img_id=0): | |
size = act_map.shape[1:] | |
act_map = act_map.reshape(act_map.shape[0], -1) | |
prod =, act_map) | |
if not os.path.exists("heat_map"): | |
os.makedirs("heat_map") | |
total = 0 | |
correct = 0 | |
# caps_ori = caps_ori.strip().split(" ") | |
for i, cap in enumerate(caps_enc): | |
order = np.argsort(cap)[::-1] | |
cap_ori = caps_ori[i].phrase | |
heat_map = np.reshape( | |[order[:nmax]]), prod[order[:nmax]]), size) | |
#"heat_map/{}.jpg".format(i)) | |
# print(img_path) | |
img_path = os.path.join("/home/atticus/proj/data/vg/VG_100K", | |
str(img_id) + ".jpg") | |
img_ori = cv2.imread(img_path) | |
if bilinear: | |
heat_map = imresize(heat_map, (org_dim[0], org_dim[1])) | |
x, y = np.unravel_index(heat_map.T.argmax(), heat_map.T.shape) | |
else: | |
x, y = np.unravel_index(heat_map.T.argmax(), heat_map.T.shape) | |
if cc is None: | |
x = (org_dim[0] / size[0]) * x | |
y = (org_dim[1] / size[1]) * y | |
else: | |
if (h > w): | |
r = float(h) / float(w) | |
x = (org_dim[0] / size[0]) * x + (cc - org_dim[0]) / 2 | |
y = (org_dim[1] / size[1]) * y + (cc * r - org_dim[1]) / 2 | |
else: | |
r = float(w) / float(h) | |
x = (org_dim[0] / size[0]) * x + (cc * r - org_dim[0]) / 2 | |
y = (org_dim[1] / size[1]) * y + (cc - org_dim[1]) / 2 | |
r = regions[i] | |
fx, fy, srw, srh = regions_scale( | |
r.x, r.y, r.width, r.height, h, w, org_dim, cc) | |
# heatmap = np.uint8(255 * heat_map) | |
heat_map = imresize(heat_map, (int(org_dim[0]), int(org_dim[1]))) | |
img_ori = cv2.resize(img_ori, (int(org_dim[0]), int(org_dim[1]))) | |
heatmap = np.uint8(255 - 255 * heat_map) # 将特征图转换为uint8格式 | |
heatmap = cv2.applyColorMap(heatmap, cv2.COLORMAP_JET) # 将特征图转为伪彩色图 | |
heat_img = cv2.addWeighted(img_ori, 1, heatmap, 0.5, 0) | |
heat_ori = cv2.applyColorMap(heat_map, cv2.COLORMAP_JET) | |
cv2.imwrite("heat_map/{}-{}-ori.jpg".format(img_id, cap_ori), img_ori) | |
cv2.imwrite("heat_map/{}-{}.jpg".format(img_id, cap_ori), heat_img) | |
cv2.imwrite("heat_map/{}-{}-heat.jpg".format(img_id, cap_ori), heat_ori) | |
if is_in_region(x, y, fx, fy, srw, srh): | |
correct += 1 | |
total += 1 | |
return correct, total | |
def compute_pointing_game_acc(imgs_stack, caps_stack, caps_ori, nb_regions, regions, fc_w, org_dim, cc=None, nmax=180): | |
correct = 0 | |
total = 0 | |
for i, act_map in enumerate(imgs_stack): | |
seen_region = sum(nb_regions[:i]) | |
caps_enc = caps_stack[seen_region:seen_region + nb_regions[i]] | |
region = regions[i][1] | |
h = regions[i][0].height | |
w = regions[i][0].width | |
img_id = regions[i][0].id | |
c, t = one_img_process(act_map, caps_enc, region, fc_w, | |
region, h, w, org_dim, nmax=nmax, cc=cc, img_id=img_id) | |
correct += c | |
total += t | |
# heat_map = generate_heat_map(act_map=act_map, caps_enc=caps_enc, fc_w=fc_w) | |
#"heat_map/{}.jpg".format(i)) | |
return float(correct) / float(total) | |
# ################### Functions for the semantic segmentation evaluation ################### # | |
def generate_heat_map(act_map, caps_enc, fc_w, nmax=180, in_dim=(224, 224)): | |
size = act_map.shape[1:] | |
act_map = act_map.reshape(act_map.shape[0], -1) | |
prod =, act_map) | |
order = np.argsort(caps_enc)[::-1] | |
# print order | |
heat_map = np.reshape( | |[order[:nmax]]), prod[order[:nmax]]), size) | |
# print heat_map | |
heat_map = imresize(heat_map, in_dim) | |
return heat_map | |
def gen_binary_heat_map(maps, concept, fc_w, c_thresh, in_dim=(400, 400)): | |
hm = generate_heat_map(maps, concept, fc_w, nmax=10, in_dim=in_dim) | |
# hm += abs(np.min(hm)) | |
def thresh(a, coef): | |
return coef * (np.max(a) - np.min(a)) | |
return np.int32(hm > thresh(hm, c_thresh)) | |
def compute_iou(hm, target_mask): | |
return np.sum(hm * target_mask) / (np.sum(target_mask) + np.sum(hm) - np.sum(hm * target_mask)) | |
def mask_from_poly(polygons, org_size, in_dim): | |
mask_poli = np.zeros((org_size[1], org_size[0])) | |
for i in range(len(polygons)): | |
if polygons[i][0] == "rle": | |
m = maskUtils.decode(polygons[i][1]) | |
mask_poli += m.squeeze() | |
else: | |
poly = np.int32(np.array(polygons[i]).reshape( | |
(int(len(polygons[i]) / 2), 2))) | |
cv2.fillPoly(mask_poli, [poly], [1]) | |
mask_poli = imresize(mask_poli, in_dim, interp="nearest") | |
return np.float32(mask_poli > 0) | |
def compute_semantic_seg(imgs_stack, sizes_list, target_ann, cats_stack, fc_w, c_thresh, in_dim=(200, 200)): | |
mAp = 0 | |
IoUs = dict() | |
for k in cats_stack.keys(): | |
IoUs[k] = list() | |
for i in range(imgs_stack.shape[0]): | |
if k in target_ann[i]: | |
target_mask = mask_from_poly(target_ann[i][k], sizes_list[i], in_dim) | |
heat_map = gen_binary_heat_map(imgs_stack[i], cats_stack[k], fc_w, c_thresh, in_dim=in_dim) | |
iou = compute_iou(heat_map, target_mask) | |
# last element of tuple is groundtruth target | |
IoUs[k] += [(iou, 1)] | |
else: | |
# if categorie k is not present in grountruth set iou at 0 | |
IoUs[k] += [(0, 0)] | |
mAp = list() | |
for th in [0.3, 0.4, 0.5]: | |
mAp.append(get_map_at(IoUs, th)) | |
return mAp | |
def compute_ap(rec, prec): | |
ap = 0 | |
rec_prev = 0 | |
for k in range(len(rec)): | |
prec_c = prec[k] | |
rec_c = rec[k] | |
ap += prec_c * (rec_c - rec_prev) | |
rec_prev = rec_c | |
return ap | |
def get_map_at(IoUs, at): | |
ap = dict() | |
for c in IoUs.keys(): | |
sort_tupe_c = sorted(list(IoUs[c]), key=lambda tup: tup[0], reverse=True) | |
y_pred = [float(x[0] > at) for x in sort_tupe_c] | |
y_true = [x[1] for x in sort_tupe_c] | |
npos = np.sum(y_true) | |
nd = len(y_pred) | |
tp = np.zeros((nd)) | |
fp = np.zeros((nd)) | |
for i in range(1, nd): | |
if y_pred[i] == 1: | |
tp[i] = 1 | |
else: | |
fp[i] = 1 | |
# compute precision/recall | |
fp = np.cumsum(fp) | |
tp = np.cumsum(tp) | |
rec = tp / npos | |
prec = tp / (fp + tp) | |
prec[0] = 0 | |
ap[c] = compute_ap(rec, prec) | |
return np.mean(list(ap.values())) | |