Spaces:
Build error
Build error
| import copy | |
| import json | |
| import numpy as np | |
| import fire | |
| def evaluate_annotation(key2refs, scorer): | |
| if scorer.method() == "Bleu": | |
| scores = np.array([ 0.0 for n in range(4) ]) | |
| else: | |
| scores = 0 | |
| num_cap_per_audio = len(next(iter(key2refs.values()))) | |
| for i in range(num_cap_per_audio): | |
| if i > 0: | |
| for key in key2refs: | |
| key2refs[key].insert(0, res[key][0]) | |
| res = { key: [refs.pop(),] for key, refs in key2refs.items() } | |
| score, _ = scorer.compute_score(key2refs, res) | |
| if scorer.method() == "Bleu": | |
| scores += np.array(score) | |
| else: | |
| scores += score | |
| score = scores / num_cap_per_audio | |
| return score | |
| def evaluate_prediction(key2pred, key2refs, scorer): | |
| if scorer.method() == "Bleu": | |
| scores = np.array([ 0.0 for n in range(4) ]) | |
| else: | |
| scores = 0 | |
| num_cap_per_audio = len(next(iter(key2refs.values()))) | |
| for i in range(num_cap_per_audio): | |
| key2refs_i = {} | |
| for key, refs in key2refs.items(): | |
| key2refs_i[key] = refs[:i] + refs[i+1:] | |
| score, _ = scorer.compute_score(key2refs_i, key2pred) | |
| if scorer.method() == "Bleu": | |
| scores += np.array(score) | |
| else: | |
| scores += score | |
| score = scores / num_cap_per_audio | |
| return score | |
| class Evaluator(object): | |
| def eval_annotation(self, annotation, output): | |
| captions = json.load(open(annotation, "r"))["audios"] | |
| key2refs = {} | |
| for audio_idx in range(len(captions)): | |
| audio_id = captions[audio_idx]["audio_id"] | |
| key2refs[audio_id] = [] | |
| for caption in captions[audio_idx]["captions"]: | |
| key2refs[audio_id].append(caption["caption"]) | |
| from fense.fense import Fense | |
| scores = {} | |
| scorer = Fense() | |
| scores[scorer.method()] = evaluate_annotation(copy.deepcopy(key2refs), scorer) | |
| refs4eval = {} | |
| for key, refs in key2refs.items(): | |
| refs4eval[key] = [] | |
| for idx, ref in enumerate(refs): | |
| refs4eval[key].append({ | |
| "audio_id": key, | |
| "id": idx, | |
| "caption": ref | |
| }) | |
| from pycocoevalcap.tokenizer.ptbtokenizer import PTBTokenizer | |
| tokenizer = PTBTokenizer() | |
| key2refs = tokenizer.tokenize(refs4eval) | |
| from pycocoevalcap.bleu.bleu import Bleu | |
| from pycocoevalcap.cider.cider import Cider | |
| from pycocoevalcap.rouge.rouge import Rouge | |
| from pycocoevalcap.meteor.meteor import Meteor | |
| from pycocoevalcap.spice.spice import Spice | |
| scorers = [Bleu(), Rouge(), Cider(), Meteor(), Spice()] | |
| for scorer in scorers: | |
| scores[scorer.method()] = evaluate_annotation(copy.deepcopy(key2refs), scorer) | |
| spider = 0 | |
| with open(output, "w") as f: | |
| for name, score in scores.items(): | |
| if name == "Bleu": | |
| for n in range(4): | |
| f.write("Bleu-{}: {:6.3f}\n".format(n + 1, score[n])) | |
| else: | |
| f.write("{}: {:6.3f}\n".format(name, score)) | |
| if name in ["CIDEr", "SPICE"]: | |
| spider += score | |
| f.write("SPIDEr: {:6.3f}\n".format(spider / 2)) | |
| def eval_prediction(self, prediction, annotation, output): | |
| ref_captions = json.load(open(annotation, "r"))["audios"] | |
| key2refs = {} | |
| for audio_idx in range(len(ref_captions)): | |
| audio_id = ref_captions[audio_idx]["audio_id"] | |
| key2refs[audio_id] = [] | |
| for caption in ref_captions[audio_idx]["captions"]: | |
| key2refs[audio_id].append(caption["caption"]) | |
| pred_captions = json.load(open(prediction, "r"))["predictions"] | |
| key2pred = {} | |
| for audio_idx in range(len(pred_captions)): | |
| item = pred_captions[audio_idx] | |
| audio_id = item["filename"] | |
| key2pred[audio_id] = [item["tokens"]] | |
| from fense.fense import Fense | |
| scores = {} | |
| scorer = Fense() | |
| scores[scorer.method()] = evaluate_prediction(key2pred, key2refs, scorer) | |
| refs4eval = {} | |
| for key, refs in key2refs.items(): | |
| refs4eval[key] = [] | |
| for idx, ref in enumerate(refs): | |
| refs4eval[key].append({ | |
| "audio_id": key, | |
| "id": idx, | |
| "caption": ref | |
| }) | |
| preds4eval = {} | |
| for key, preds in key2pred.items(): | |
| preds4eval[key] = [] | |
| for idx, pred in enumerate(preds): | |
| preds4eval[key].append({ | |
| "audio_id": key, | |
| "id": idx, | |
| "caption": pred | |
| }) | |
| from pycocoevalcap.tokenizer.ptbtokenizer import PTBTokenizer | |
| tokenizer = PTBTokenizer() | |
| key2refs = tokenizer.tokenize(refs4eval) | |
| key2pred = tokenizer.tokenize(preds4eval) | |
| from pycocoevalcap.bleu.bleu import Bleu | |
| from pycocoevalcap.cider.cider import Cider | |
| from pycocoevalcap.rouge.rouge import Rouge | |
| from pycocoevalcap.meteor.meteor import Meteor | |
| from pycocoevalcap.spice.spice import Spice | |
| scorers = [Bleu(), Rouge(), Cider(), Meteor(), Spice()] | |
| for scorer in scorers: | |
| scores[scorer.method()] = evaluate_prediction(key2pred, key2refs, scorer) | |
| spider = 0 | |
| with open(output, "w") as f: | |
| for name, score in scores.items(): | |
| if name == "Bleu": | |
| for n in range(4): | |
| f.write("Bleu-{}: {:6.3f}\n".format(n + 1, score[n])) | |
| else: | |
| f.write("{}: {:6.3f}\n".format(name, score)) | |
| if name in ["CIDEr", "SPICE"]: | |
| spider += score | |
| f.write("SPIDEr: {:6.3f}\n".format(spider / 2)) | |
| if __name__ == "__main__": | |
| fire.Fire(Evaluator) | |