CLIP-Caption-Reward / tools /eval_finecapeval.py
akhaliq's picture
akhaliq HF staff
add files
c80917c
raw
history blame
5.44 kB
from tqdm import tqdm
from pprint import pprint
import pandas as pd
import argparse
import re
import json
import nltk
from nltk.tokenize import word_tokenize
from nltk.stem.porter import PorterStemmer
p_stemmer = PorterStemmer()
# nltk.download('punkt')
# nltk.download('wordnet')
# nltk.download('stopwords')
import language_evaluation
evaluator = language_evaluation.CocoEvaluator()
def nltk_process(text):
# Tokenization
nltk_tokenList = word_tokenize(text)
# Stemming
nltk_stemedList = []
for word in nltk_tokenList:
nltk_stemedList.append(p_stemmer.stem(word))
filtered_sentence = nltk_stemedList
# Removing Punctuation
tokens = [re.sub(r'[^a-zA-Z0-9]', '', tok) for tok in filtered_sentence]
text = " ".join(tokens)
return text
def calculate_finegrained_scores(pred_id2sent, id2caption, use_coco_eval=False):
if use_coco_eval:
n_total = 0
refs = []
hyps = []
for id, gt_captions in id2caption.items():
pred_sent = pred_id2sent[id]
refs.append(gt_captions)
hyps.append(pred_sent)
n_total += 1
print('caption')
results = evaluator.run_evaluation(hyps, refs)
pprint(results)
n_total = 0
total_score = 0
for id, gt_phrases in id2background.items():
pred_sent = pred_id2sent[id]
score = 0
n_phrases = len(gt_phrases)
for gt_phrase in gt_phrases:
word_score = 0
for gt_word in gt_phrase.split():
if gt_word in pred_sent:
word_score += 1
if len(gt_phrase.split()) > 0:
score += word_score / len(gt_phrase.split())
if n_phrases > 0:
score /= n_phrases
total_score += score
n_total += 1
print('background')
# print('# retrieved words:', n_retrieved)
print(f'Acc: {total_score / n_total * 100:.2f}')
n_total = 0
total_score = 0
for id, gt_phrases in id2object.items():
pred_sent = pred_id2sent[id]
score = 0
n_phrases = len(gt_phrases)
for gt_phrase in gt_phrases:
word_score = 0
for gt_word in gt_phrase.split():
if gt_word in pred_sent:
word_score += 1
if len(gt_phrase.split()) > 0:
score += word_score / len(gt_phrase.split())
if n_phrases > 0:
score /= n_phrases
total_score += score
n_total += 1
print('object')
# print('# retrieved words:', n_retrieved)
print(f'Acc: {total_score / n_total * 100:.2f}')
n_total = 0
total_score = 0
for id, gt_phrases in id2relation.items():
pred_sent = pred_id2sent[id]
score = 0
n_phrases = len(gt_phrases)
for gt_phrase in gt_phrases:
word_score = 0
for gt_word in gt_phrase.split():
if gt_word in pred_sent:
word_score += 1
if len(gt_phrase.split()) > 0:
score += word_score / len(gt_phrase.split())
if n_phrases > 0:
score /= n_phrases
total_score += score
n_total += 1
print('relation')
# print('# retrieved words:', n_retrieved)
print(f'Acc: {total_score / n_total * 100:.2f}')
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('--finecapeval_path', type=str, default="data/FineCapEval.csv")
parser.add_argument('--generated_id2caption', type=str, default="FineCapEval_results/mle.json")
args = parser.parse_args()
df = pd.read_csv(args.finecapeval_path)
assert df.shape == (5000, 5)
generated_id2caption = json.load(open(args.generated_id2caption, 'r'))
print("Preprocessing GT FineCapEval data...")
id2caption = {}
id2background = {}
id2object = {}
id2relation = {}
for row in tqdm(df.itertuples(), total=len(df)):
id = row.image.split('.')[0]
caption = row.caption
background = row.background
object = row.object
relation = row.relation
if not isinstance(caption, str):
continue
if not isinstance(background, str):
continue
if not isinstance(object, str):
continue
if not isinstance(relation, str):
continue
if id not in id2caption:
id2caption[id] = []
id2background[id] = []
id2object[id] = []
id2relation[id] = []
id2caption[id].append(caption)
phrases = []
for phrase in background.lower().split('\;'):
if len(phrase) > 1:
phrase = nltk_process(phrase)
phrases.append(phrase)
id2background[id].extend(phrases)
phrases = []
for phrase in object.lower().split('\;'):
if len(phrase) > 1:
phrase = nltk_process(phrase)
phrases.append(phrase)
id2object[id].extend(phrases)
phrases = []
for phrase in relation.lower().split('\;'):
if len(phrase) > 1:
phrase = nltk_process(phrase)
phrases.append(phrase)
id2relation[id].extend(phrases)
print("Calculating scores...")
calculate_finegrained_scores(
generated_id2caption,
id2caption,
use_coco_eval=True)