Spaces:
Runtime error
Runtime error
from tqdm import tqdm | |
from pprint import pprint | |
import pandas as pd | |
import argparse | |
import re | |
import json | |
import nltk | |
from nltk.tokenize import word_tokenize | |
from nltk.stem.porter import PorterStemmer | |
p_stemmer = PorterStemmer() | |
# nltk.download('punkt') | |
# nltk.download('wordnet') | |
# nltk.download('stopwords') | |
import language_evaluation | |
evaluator = language_evaluation.CocoEvaluator() | |
def nltk_process(text): | |
# Tokenization | |
nltk_tokenList = word_tokenize(text) | |
# Stemming | |
nltk_stemedList = [] | |
for word in nltk_tokenList: | |
nltk_stemedList.append(p_stemmer.stem(word)) | |
filtered_sentence = nltk_stemedList | |
# Removing Punctuation | |
tokens = [re.sub(r'[^a-zA-Z0-9]', '', tok) for tok in filtered_sentence] | |
text = " ".join(tokens) | |
return text | |
def calculate_finegrained_scores(pred_id2sent, id2caption, use_coco_eval=False): | |
if use_coco_eval: | |
n_total = 0 | |
refs = [] | |
hyps = [] | |
for id, gt_captions in id2caption.items(): | |
pred_sent = pred_id2sent[id] | |
refs.append(gt_captions) | |
hyps.append(pred_sent) | |
n_total += 1 | |
print('caption') | |
results = evaluator.run_evaluation(hyps, refs) | |
pprint(results) | |
n_total = 0 | |
total_score = 0 | |
for id, gt_phrases in id2background.items(): | |
pred_sent = pred_id2sent[id] | |
score = 0 | |
n_phrases = len(gt_phrases) | |
for gt_phrase in gt_phrases: | |
word_score = 0 | |
for gt_word in gt_phrase.split(): | |
if gt_word in pred_sent: | |
word_score += 1 | |
if len(gt_phrase.split()) > 0: | |
score += word_score / len(gt_phrase.split()) | |
if n_phrases > 0: | |
score /= n_phrases | |
total_score += score | |
n_total += 1 | |
print('background') | |
# print('# retrieved words:', n_retrieved) | |
print(f'Acc: {total_score / n_total * 100:.2f}') | |
n_total = 0 | |
total_score = 0 | |
for id, gt_phrases in id2object.items(): | |
pred_sent = pred_id2sent[id] | |
score = 0 | |
n_phrases = len(gt_phrases) | |
for gt_phrase in gt_phrases: | |
word_score = 0 | |
for gt_word in gt_phrase.split(): | |
if gt_word in pred_sent: | |
word_score += 1 | |
if len(gt_phrase.split()) > 0: | |
score += word_score / len(gt_phrase.split()) | |
if n_phrases > 0: | |
score /= n_phrases | |
total_score += score | |
n_total += 1 | |
print('object') | |
# print('# retrieved words:', n_retrieved) | |
print(f'Acc: {total_score / n_total * 100:.2f}') | |
n_total = 0 | |
total_score = 0 | |
for id, gt_phrases in id2relation.items(): | |
pred_sent = pred_id2sent[id] | |
score = 0 | |
n_phrases = len(gt_phrases) | |
for gt_phrase in gt_phrases: | |
word_score = 0 | |
for gt_word in gt_phrase.split(): | |
if gt_word in pred_sent: | |
word_score += 1 | |
if len(gt_phrase.split()) > 0: | |
score += word_score / len(gt_phrase.split()) | |
if n_phrases > 0: | |
score /= n_phrases | |
total_score += score | |
n_total += 1 | |
print('relation') | |
# print('# retrieved words:', n_retrieved) | |
print(f'Acc: {total_score / n_total * 100:.2f}') | |
if __name__ == '__main__': | |
parser = argparse.ArgumentParser() | |
parser.add_argument('--finecapeval_path', type=str, default="data/FineCapEval.csv") | |
parser.add_argument('--generated_id2caption', type=str, default="FineCapEval_results/mle.json") | |
args = parser.parse_args() | |
df = pd.read_csv(args.finecapeval_path) | |
assert df.shape == (5000, 5) | |
generated_id2caption = json.load(open(args.generated_id2caption, 'r')) | |
print("Preprocessing GT FineCapEval data...") | |
id2caption = {} | |
id2background = {} | |
id2object = {} | |
id2relation = {} | |
for row in tqdm(df.itertuples(), total=len(df)): | |
id = row.image.split('.')[0] | |
caption = row.caption | |
background = row.background | |
object = row.object | |
relation = row.relation | |
if not isinstance(caption, str): | |
continue | |
if not isinstance(background, str): | |
continue | |
if not isinstance(object, str): | |
continue | |
if not isinstance(relation, str): | |
continue | |
if id not in id2caption: | |
id2caption[id] = [] | |
id2background[id] = [] | |
id2object[id] = [] | |
id2relation[id] = [] | |
id2caption[id].append(caption) | |
phrases = [] | |
for phrase in background.lower().split('\;'): | |
if len(phrase) > 1: | |
phrase = nltk_process(phrase) | |
phrases.append(phrase) | |
id2background[id].extend(phrases) | |
phrases = [] | |
for phrase in object.lower().split('\;'): | |
if len(phrase) > 1: | |
phrase = nltk_process(phrase) | |
phrases.append(phrase) | |
id2object[id].extend(phrases) | |
phrases = [] | |
for phrase in relation.lower().split('\;'): | |
if len(phrase) > 1: | |
phrase = nltk_process(phrase) | |
phrases.append(phrase) | |
id2relation[id].extend(phrases) | |
print("Calculating scores...") | |
calculate_finegrained_scores( | |
generated_id2caption, | |
id2caption, | |
use_coco_eval=True) | |