|
from refTools.evaluation.tokenizer.ptbtokenizer import PTBTokenizer |
|
from refTools.evaluation.bleu.bleu import Bleu |
|
from refTools.evaluation.meteor.meteor import Meteor |
|
from refTools.evaluation.rouge.rouge import Rouge |
|
from refTools.evaluation.cider.cider import Cider |
|
|
|
""" |
|
Input: refer and Res = [{ref_id, sent}] |
|
|
|
Things of interest |
|
evalRefs - list of ['ref_id', 'CIDEr', 'Bleu_1', 'Bleu_2', 'Bleu_3', 'Bleu_4', 'ROUGE_L', 'METEOR'] |
|
eval - dict of {metric: score} |
|
refToEval - dict of {ref_id: ['ref_id', 'CIDEr', 'Bleu_1', 'Bleu_2', 'Bleu_3', 'Bleu_4', 'ROUGE_L', 'METEOR']} |
|
""" |
|
|
|
class RefEvaluation: |
|
def __init__ (self, refer, Res): |
|
""" |
|
:param refer: refer class of current dataset |
|
:param Res: [{'ref_id', 'sent'}] |
|
""" |
|
self.evalRefs = [] |
|
self.eval = {} |
|
self.refToEval = {} |
|
self.refer = refer |
|
self.Res = Res |
|
|
|
def evaluate(self): |
|
|
|
evalRefIds = [ann['ref_id'] for ann in self.Res] |
|
|
|
refToGts = {} |
|
for ref_id in evalRefIds: |
|
ref = self.refer.Refs[ref_id] |
|
gt_sents = [sent['sent'].encode('ascii', 'ignore').decode('ascii') for sent in ref['sentences']] |
|
refToGts[ref_id] = gt_sents |
|
refToRes = {ann['ref_id']: [ann['sent']] for ann in self.Res} |
|
|
|
print('tokenization...') |
|
tokenizer = PTBTokenizer() |
|
self.refToRes = tokenizer.tokenize(refToRes) |
|
self.refToGts = tokenizer.tokenize(refToGts) |
|
|
|
|
|
|
|
|
|
print('setting up scorers...') |
|
scorers = [ |
|
(Bleu(4), ["Bleu_1", "Bleu_2", "Bleu_3", "Bleu_4"]), |
|
(Meteor(),"METEOR"), |
|
(Rouge(), "ROUGE_L"), |
|
(Cider(), "CIDEr") |
|
] |
|
|
|
|
|
|
|
|
|
for scorer, method in scorers: |
|
print('computing %s score...'%(scorer.method())) |
|
score, scores = scorer.compute_score(self.refToGts, self.refToRes) |
|
if type(method) == list: |
|
for sc, scs, m in zip(score, scores, method): |
|
self.setEval(sc, m) |
|
self.setRefToEvalRefs(scs, self.refToGts.keys(), m) |
|
print("%s: %0.3f"%(m, sc)) |
|
else: |
|
self.setEval(score, method) |
|
self.setRefToEvalRefs(scores, self.refToGts.keys(), method) |
|
print("%s: %0.3f"%(method, score)) |
|
self.setEvalRefs() |
|
|
|
def setEval(self, score, method): |
|
self.eval[method] = score |
|
|
|
def setRefToEvalRefs(self, scores, refIds, method): |
|
for refId, score in zip(refIds, scores): |
|
if not refId in self.refToEval: |
|
self.refToEval[refId] = {} |
|
self.refToEval[refId]["ref_id"] = refId |
|
self.refToEval[refId][method] = score |
|
|
|
def setEvalRefs(self): |
|
self.evalRefs = [eval for refId, eval in self.refToEval.items()] |
|
|
|
|
|
if __name__ == '__main__': |
|
|
|
import os.path as osp |
|
import sys |
|
ROOT_DIR = osp.abspath(osp.join(osp.dirname(__file__), '..', '..')) |
|
sys.path.insert(0, osp.join(ROOT_DIR, 'lib', 'datasets')) |
|
from refer import REFER |
|
|
|
|
|
dataset = 'refcoco' |
|
refer = REFER(dataset, splitBy = 'google') |
|
|
|
|
|
val_refIds = refer.getRefIds(split='test') |
|
ref_id = 49767 |
|
print("GD: %s" % refer.Refs[ref_id]['sentences']) |
|
Res = [{'ref_id': ref_id, 'sent': 'left bottle'}] |
|
|
|
|
|
refEval = RefEvaluation(refer, Res) |
|
refEval.evaluate() |
|
|
|
|
|
for metric, score in refEval.eval.items(): |
|
print('%s: %.3f'%(metric, score)) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|