Spaces:
Build error
Build error
File size: 3,539 Bytes
30a0ec5 b6fdc7e 30a0ec5 267ac1e 30a0ec5 267ac1e 30a0ec5 267ac1e 30a0ec5 267ac1e 30a0ec5 267ac1e b6fdc7e 30a0ec5 267ac1e b6fdc7e 30a0ec5 267ac1e 30a0ec5 267ac1e 30a0ec5 267ac1e 30a0ec5 267ac1e 30a0ec5 267ac1e 30a0ec5 267ac1e 30a0ec5 267ac1e 30a0ec5 267ac1e 30a0ec5 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 |
"""
****************** COPYRIGHT AND CONFIDENTIALITY INFORMATION ******************
Copyright (c) 2018 [Thomson Licensing]
All Rights Reserved
This program contains proprietary information which is a trade secret/business \
secret of [Thomson Licensing] and is protected, even if ucpublished, under \
applicable Copyright laws (including French droit d'auteur) and/or may be \
subject to one or more patent(s).
Recipient is to retain this program in confidence and is not permitted to use \
or make copies thereof other than as permitted in a written agreement with \
[Thomson Licensing] unless otherwise expressly allowed by applicable laws or \
by [Thomson Licensing] under express agreement.
Thomson Licensing is a company of the group TECHNICOLOR
*******************************************************************************
This scripts permits one to reproduce training and experiments of:
Engilberge, M., Chevallier, L., Pérez, P., & Cord, M. (2018, April).
Finding beans in burgers: Deep semantic-visual embedding with localization.
In Proceedings of CVPR (pp. 3984-3993)
Author: Martin Engilberge
"""
from scripts.postprocess import postprocess
import numpy as np
from misc.utils import flatten
def cosine_sim(A, B):
img_norm = np.linalg.norm(A, axis=1)
caps_norm = np.linalg.norm(B, axis=1)
scores = np.dot(A, B.T)
norms = np.dot(np.expand_dims(img_norm, 1),
np.expand_dims(caps_norm.T, 1).T)
scores = (scores / norms)
return scores
def recallTopK(cap_enc, imgs_enc, imgs_path, method, ks=10, scores=None):
if scores is None:
scores = cosine_sim(cap_enc, imgs_enc)
# recall_imgs = [imgs_path[np.asnumpy(i)] for i in np.argsort(scores, axis=1)[0][::-1][:ks]]
recall_imgs = [imgs_path[i] for i in np.argsort(scores, axis=1)[0][::-1][:ks]]
postprocess(method, recall_imgs)
return recall_imgs
def recall_at_k_multi_cap(imgs_enc, caps_enc, ks=[1, 5, 10], scores=None):
if scores is None:
scores = cosine_sim(imgs_enc[::5, :], caps_enc)
ranks = np.array([np.nonzero(np.in1d(row, np.arange(x * 5, x * 5 + 5, 1)))[0][0]
for x, row in enumerate(np.argsort(scores, axis=1)[:, ::-1])])
medr_caps_search = np.median(ranks)
recall_caps_search = list()
for k in [1, 5, 10]:
recall_caps_search.append(
(float(len(np.where(ranks < k)[0])) / ranks.shape[0]) * 100)
ranks = np.array([np.nonzero(row == int(x / 5.0))[0][0]
for x, row in enumerate(np.argsort(scores.T, axis=1)[:, ::-1])])
medr_imgs_search = np.median(ranks)
recall_imgs_search = list()
for k in ks:
recall_imgs_search.append(
(float(len(np.where(ranks < k)[0])) / ranks.shape[0]) * 100)
return recall_caps_search, recall_imgs_search, medr_caps_search, medr_imgs_search
def avg_recall(imgs_enc, caps_enc):
""" Compute 5 fold recall on set of 1000 images """
res = list()
if len(imgs_enc) % 5000 == 0:
max_iter = len(imgs_enc)
else:
max_iter = len(imgs_enc) - 5000
for i in range(0, max_iter, 5000):
imgs = imgs_enc[i:i + 5000]
caps = caps_enc[i:i + 5000]
res.append(recall_at_k_multi_cap(imgs, caps))
return [np.sum([x[i] for x in res], axis=0) / len(res) for i in range(len(res[0]))]
def eval_recall(imgs_enc, caps_enc):
imgs_enc = np.vstack(flatten(imgs_enc))
caps_enc = np.vstack(flatten(caps_enc))
res = avg_recall(imgs_enc, caps_enc)
return res
|