Spaces:

atticus
/

image-text-retrival-huster

Build error

App Files Files Community

image-text-retrival-huster / misc /localization.py

atticus

completed

30a0ec5 over 2 years ago

raw

history blame

9.04 kB

	"""
	**************** COPYRIGHT AND CONFIDENTIALITY INFORMATION ****************
	Copyright (c) 2018 [Thomson Licensing]
	All Rights Reserved
	This program contains proprietary information which is a trade secret/business \
	secret of [Thomson Licensing] and is protected, even if unpublished, under \
	applicable Copyright laws (including French droit d'auteur) and/or may be \
	subject to one or more patent(s).
	Recipient is to retain this program in confidence and is not permitted to use \
	or make copies thereof other than as permitted in a written agreement with \
	[Thomson Licensing] unless otherwise expressly allowed by applicable laws or \
	by [Thomson Licensing] under express agreement.
	Thomson Licensing is a company of the group TECHNICOLOR
	*******************************************************************************
	This scripts permits one to reproduce training and experiments of:
	Engilberge, M., Chevallier, L., Pérez, P., & Cord, M. (2018, April).
	Finding beans in burgers: Deep semantic-visual embedding with localization.
	In Proceedings of CVPR (pp. 3984-3993)

	Author: Martin Engilberge
	"""

	import numpy as np
	import cv2
	import os

	from scipy.misc import imresize
	from pycocotools import mask as maskUtils


	# ################### Functions for the pointing game evaluation ################### #

	def regions_scale(x, y, rw, rh, h, w, org_dim, cc=None):
	if cc is None:
	fx = x * org_dim[0] / w
	fy = y * org_dim[1] / h
	srw = rw * org_dim[0] / w
	srh = rh * org_dim[1] / h
	else:
	if (h > w):
	r = float(h) / float(w)

	sx = x * cc / w
	sy = y * cc / w

	srw = rw * cc / w
	srh = rh * cc / w

	fx = sx - (cc - org_dim[0]) / 2
	fy = sy - (cc * r - org_dim[1]) / 2
	else:
	r = float(w) / float(h)

	sx = x * cc / h
	sy = y * cc / h

	srw = rw * cc / h
	srh = rh * cc / h

	fy = sy - (cc - org_dim[1]) / 2
	fx = sx - (cc * r - org_dim[0]) / 2

	return fx, fy, srw, srh


	def is_in_region(x, y, bx, by, w, h):
	return (x > bx and x < (bx + w) and y > by and y < (by + h))


	def one_img_process(act_map, caps_enc, caps_ori, fc_w, regions, h, w, org_dim, nmax=180, bilinear=False, cc=None, img_id=0):
	size = act_map.shape[1:]
	act_map = act_map.reshape(act_map.shape[0], -1)
	prod = np.dot(fc_w, act_map)
	if not os.path.exists("heat_map"):
	os.makedirs("heat_map")
	total = 0
	correct = 0
	# caps_ori = caps_ori.strip().split(" ")
	for i, cap in enumerate(caps_enc):
	order = np.argsort(cap)[::-1]
	cap_ori = caps_ori[i].phrase
	heat_map = np.reshape(
	np.dot(np.abs(cap[order[:nmax]]), prod[order[:nmax]]), size)
	# heat_map.save("heat_map/{}.jpg".format(i))
	# print(img_path)
	img_path = os.path.join("/home/atticus/proj/data/vg/VG_100K",
	str(img_id) + ".jpg")
	img_ori = cv2.imread(img_path)

	if bilinear:
	heat_map = imresize(heat_map, (org_dim[0], org_dim[1]))
	x, y = np.unravel_index(heat_map.T.argmax(), heat_map.T.shape)
	else:
	x, y = np.unravel_index(heat_map.T.argmax(), heat_map.T.shape)
	if cc is None:
	x = (org_dim[0] / size[0]) * x
	y = (org_dim[1] / size[1]) * y
	else:
	if (h > w):
	r = float(h) / float(w)
	x = (org_dim[0] / size[0]) * x + (cc - org_dim[0]) / 2
	y = (org_dim[1] / size[1]) * y + (cc * r - org_dim[1]) / 2
	else:
	r = float(w) / float(h)
	x = (org_dim[0] / size[0]) * x + (cc * r - org_dim[0]) / 2
	y = (org_dim[1] / size[1]) * y + (cc - org_dim[1]) / 2

	r = regions[i]
	fx, fy, srw, srh = regions_scale(
	r.x, r.y, r.width, r.height, h, w, org_dim, cc)
	# heatmap = np.uint8(255 * heat_map)
	heat_map = imresize(heat_map, (int(org_dim[0]), int(org_dim[1])))
	img_ori = cv2.resize(img_ori, (int(org_dim[0]), int(org_dim[1])))
	heatmap = np.uint8(255 - 255 * heat_map) # 将特征图转换为uint8格式
	heatmap = cv2.applyColorMap(heatmap, cv2.COLORMAP_JET) # 将特征图转为伪彩色图
	heat_img = cv2.addWeighted(img_ori, 1, heatmap, 0.5, 0)
	heat_ori = cv2.applyColorMap(heat_map, cv2.COLORMAP_JET)
	cv2.imwrite("heat_map/{}-{}-ori.jpg".format(img_id, cap_ori), img_ori)
	cv2.imwrite("heat_map/{}-{}.jpg".format(img_id, cap_ori), heat_img)
	cv2.imwrite("heat_map/{}-{}-heat.jpg".format(img_id, cap_ori), heat_ori)
	if is_in_region(x, y, fx, fy, srw, srh):
	correct += 1
	total += 1

	return correct, total


	def compute_pointing_game_acc(imgs_stack, caps_stack, caps_ori, nb_regions, regions, fc_w, org_dim, cc=None, nmax=180):
	correct = 0
	total = 0

	for i, act_map in enumerate(imgs_stack):
	seen_region = sum(nb_regions[:i])
	caps_enc = caps_stack[seen_region:seen_region + nb_regions[i]]
	region = regions[i][1]
	h = regions[i][0].height
	w = regions[i][0].width
	img_id = regions[i][0].id
	c, t = one_img_process(act_map, caps_enc, region, fc_w,
	region, h, w, org_dim, nmax=nmax, cc=cc, img_id=img_id)
	correct += c
	total += t

	# heat_map = generate_heat_map(act_map=act_map, caps_enc=caps_enc, fc_w=fc_w)
	# heat_map.save("heat_map/{}.jpg".format(i))

	return float(correct) / float(total)


	# ################### Functions for the semantic segmentation evaluation ################### #


	def generate_heat_map(act_map, caps_enc, fc_w, nmax=180, in_dim=(224, 224)):
	size = act_map.shape[1:]
	act_map = act_map.reshape(act_map.shape[0], -1)
	prod = np.dot(fc_w, act_map)

	order = np.argsort(caps_enc)[::-1]
	# print order
	heat_map = np.reshape(
	np.dot(np.abs(caps_enc[order[:nmax]]), prod[order[:nmax]]), size)
	# print heat_map

	heat_map = imresize(heat_map, in_dim)

	return heat_map


	def gen_binary_heat_map(maps, concept, fc_w, c_thresh, in_dim=(400, 400)):
	hm = generate_heat_map(maps, concept, fc_w, nmax=10, in_dim=in_dim)

	# hm += abs(np.min(hm))

	def thresh(a, coef):
	return coef * (np.max(a) - np.min(a))

	return np.int32(hm > thresh(hm, c_thresh))


	def compute_iou(hm, target_mask):
	return np.sum(hm * target_mask) / (np.sum(target_mask) + np.sum(hm) - np.sum(hm * target_mask))


	def mask_from_poly(polygons, org_size, in_dim):
	mask_poli = np.zeros((org_size[1], org_size[0]))

	for i in range(len(polygons)):
	if polygons[i][0] == "rle":
	m = maskUtils.decode(polygons[i][1])
	mask_poli += m.squeeze()
	else:
	poly = np.int32(np.array(polygons[i]).reshape(
	(int(len(polygons[i]) / 2), 2)))
	cv2.fillPoly(mask_poli, [poly], [1])

	mask_poli = imresize(mask_poli, in_dim, interp="nearest")

	return np.float32(mask_poli > 0)


	def compute_semantic_seg(imgs_stack, sizes_list, target_ann, cats_stack, fc_w, c_thresh, in_dim=(200, 200)):

	mAp = 0
	IoUs = dict()
	for k in cats_stack.keys():
	IoUs[k] = list()
	for i in range(imgs_stack.shape[0]):
	if k in target_ann[i]:
	target_mask = mask_from_poly(target_ann[i][k], sizes_list[i], in_dim)

	heat_map = gen_binary_heat_map(imgs_stack[i], cats_stack[k], fc_w, c_thresh, in_dim=in_dim)

	iou = compute_iou(heat_map, target_mask)

	# last element of tuple is groundtruth target
	IoUs[k] += [(iou, 1)]
	else:
	# if categorie k is not present in grountruth set iou at 0
	IoUs[k] += [(0, 0)]

	mAp = list()
	for th in [0.3, 0.4, 0.5]:
	mAp.append(get_map_at(IoUs, th))

	return mAp


	def compute_ap(rec, prec):
	ap = 0
	rec_prev = 0
	for k in range(len(rec)):
	prec_c = prec[k]
	rec_c = rec[k]

	ap += prec_c * (rec_c - rec_prev)

	rec_prev = rec_c
	return ap


	def get_map_at(IoUs, at):
	ap = dict()
	for c in IoUs.keys():
	sort_tupe_c = sorted(list(IoUs[c]), key=lambda tup: tup[0], reverse=True)

	y_pred = [float(x[0] > at) for x in sort_tupe_c]
	y_true = [x[1] for x in sort_tupe_c]

	npos = np.sum(y_true)

	nd = len(y_pred)
	tp = np.zeros((nd))
	fp = np.zeros((nd))

	for i in range(1, nd):
	if y_pred[i] == 1:
	tp[i] = 1
	else:
	fp[i] = 1

	# compute precision/recall
	fp = np.cumsum(fp)
	tp = np.cumsum(tp)
	rec = tp / npos
	prec = tp / (fp + tp)

	prec[0] = 0

	ap[c] = compute_ap(rec, prec)

	return np.mean(list(ap.values()))