File size: 3,366 Bytes
e1aaaac |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 |
import os
import json
import argparse
avg_f1_score = 0
def eval_pope(answers, label_file):
label_list = [json.loads(q)['label'] for q in open(label_file, 'r')]
for answer in answers:
text = answer['text']
# Only keep the first sentence
if text.find('.') != -1:
text = text.split('.')[0]
text = text.replace(',', '')
words = text.split(' ')
if 'No' in words or 'not' in words or 'no' in words:
answer['text'] = 'no'
else:
answer['text'] = 'yes'
for i in range(len(label_list)):
if label_list[i] == 'no':
label_list[i] = 0
else:
label_list[i] = 1
pred_list = []
for answer in answers:
if answer['text'] == 'no':
pred_list.append(0)
else:
pred_list.append(1)
pos = 1
neg = 0
yes_ratio = pred_list.count(1) / len(pred_list)
TP, TN, FP, FN = 0, 0, 0, 0
for pred, label in zip(pred_list, label_list):
if pred == pos and label == pos:
TP += 1
elif pred == pos and label == neg:
FP += 1
elif pred == neg and label == neg:
TN += 1
elif pred == neg and label == pos:
FN += 1
str_to_log = ''
str_to_log += 'TP\tFP\tTN\tFN\t\n'
str_to_log += '{}\t{}\t{}\t{}\n'.format(TP, FP, TN, FN)
precision = float(TP) / float(TP + FP)
recall = float(TP) / float(TP + FN)
f1 = 2*precision*recall / (precision + recall)
global avg_f1_score
avg_f1_score += f1
acc = (TP + TN) / (TP + TN + FP + FN)
str_to_log += 'Accuracy: {}\n'.format(acc)
str_to_log += 'Precision: {}\n'.format(precision)
str_to_log += 'Recall: {}\n'.format(recall)
str_to_log += 'F1 score: {}\n'.format(f1)
str_to_log += 'Yes ratio: {}\n'.format(yes_ratio)
str_to_log += '%.3f, %.3f, %.3f, %.3f, %.3f\n' % (f1, acc, precision, recall, yes_ratio)
print(str_to_log)
return str_to_log
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("--annotation-dir", type=str)
parser.add_argument("--question-file", type=str)
parser.add_argument("--result-file", type=str)
parser.add_argument("--model-name", type=str, default='')
args = parser.parse_args()
questions = [json.loads(line) for line in open(args.question_file)]
questions = {question['question_id']: question for question in questions}
answers = [json.loads(q) for q in open(args.result_file)]
outputs = ''
for file in os.listdir(args.annotation_dir):
assert file.startswith('coco_pope_')
assert file.endswith('.json')
category = file[10:-5]
cur_answers = [x for x in answers if questions[x['question_id']]['category'] == category]
outputs += 'Category: {}, # samples: {}\n'.format(category, len(cur_answers))
print('Category: {}, # samples: {}'.format(category, len(cur_answers)))
outputs += eval_pope(cur_answers, os.path.join(args.annotation_dir, file))
print("====================================")
print(f"Average F1-score: {avg_f1_score/3:.4f}")
with open(f"/data/naman_deep_singh/project_multimodal/pope_evals/{args.model_name}.txt", 'w') as f:
f.write(outputs)
f.writelines(f"Average F1-score: {avg_f1_score/3:.4f}\n") |