zhouyik's picture
Upload folder using huggingface_hub
032e687 verified
import os
import pandas as pd
from mmengine.dist import master_only
from PIL import Image
from xtuner.registry import BUILDER
from mmengine.logging import print_log
from .base_eval_dataset import BaseEvalDataset
from .utils import YOrN_Extraction, load_jsonl
from .utils import custom_data_process
def eval_func(pred_list, label_list):
pos = 1
neg = 0
yes_ratio = pred_list.count(1) / len(pred_list)
TP, TN, FP, FN = 0, 0, 0, 0
for pred, label in zip(pred_list, label_list):
if pred == pos and label == pos:
TP += 1
elif pred == pos and label == neg:
FP += 1
elif pred == neg and label == neg:
TN += 1
elif pred == neg and label == pos:
FN += 1
print_log('TP\tFP\tTN\tFN\t', 'current')
print_log(f'{TP}\t{FP}\t{TN}\t{FN}', 'current')
precision = float(TP) / float(TP + FP)
recall = float(TP) / float(TP + FN)
f1 = 2 * precision * recall / (precision + recall)
acc = (TP + TN) / (TP + TN + FP + FN)
print_log(f'Accuracy: {acc}', 'current')
print_log(f'Precision: {precision}', 'current')
print_log(f'Recall: {recall}', 'current')
print_log(f'F1 score: {f1}', 'current')
print_log(f'Yes ratio: {yes_ratio}', 'current')
return f1
class POPEDataset(BaseEvalDataset):
METAINFO: dict = dict(name='pope')
def __init__(self, data_file, coco_val_path, image_processor,
pad_image_to_square=True,
metainfo=None):
super().__init__(metainfo)
if isinstance(data_file, str):
data_file = [data_file]
self.raw_data = [load_jsonl(f) for f in data_file]
self.name = [
os.path.splitext(os.path.basename(f))[0] for f in data_file
]
self.coco_val_path = coco_val_path
self.image_processor = BUILDER.build(image_processor)
self.pad_image_to_square = pad_image_to_square
self.results_xlsx_path = 'pope-results.xlsx'
self.data = self.load_data_list()
def get_image(self, image):
image = Image.open(os.path.join(self.coco_val_path, image))
return image
def __len__(self):
return len(self.data)
def load_data_list(self):
data_list = []
idx = 0
for data_idx in range(len(self.raw_data)):
for sample_idx in range(len(self.raw_data[data_idx])):
sample = self.raw_data[data_idx][sample_idx]
index = sample['question_id']
image_path = sample['image']
question = sample['text']
answer = sample['label']
category = self.name[data_idx]
assert answer in ['yes', 'no']
data = {
'img_id': idx,
'index': index,
'img': image_path,
'question': question,
'answer': answer,
'category': category
}
data_list.append(data)
idx += 1
return data_list
def __getitem__(self, idx):
data = self.data[idx]
data_dict = custom_data_process(self, data)
return data_dict
@master_only
def evaluate(self, result, work_dir, show=True):
orig_index = [x['img_id'] for x in self.data]
results = []
for pred_dict in result:
index = pred_dict['img_id']
new_index = orig_index.index(index)
filtered_rows = self.data[new_index]
cur_result = {}
cur_result['question'] = filtered_rows.get('question')
cur_result['prediction'] = pred_dict['prediction']
cur_result['category'] = filtered_rows['category']
cur_result['index'] = filtered_rows.get('index')
cur_result['answer'] = filtered_rows.get('answer')
results.append(cur_result)
results_df = pd.DataFrame(results)
with pd.ExcelWriter(
os.path.join(work_dir, self.results_xlsx_path),
engine='openpyxl') as writer:
results_df.to_excel(writer, index=False)
score = 0
for sub_name in self.name:
sub_results = [x for x in results if x['category'] == sub_name]
pred_list = [
int(YOrN_Extraction(x['prediction']) == 'Yes')
for x in sub_results
]
label_list = [
int(YOrN_Extraction(x['answer']) == 'Yes') for x in sub_results
]
print_log('============================================', 'current')
print_log('Category: {}, # samples: {}'.format(sub_name,
len(sub_results)), 'current')
cur_f1 = eval_func(pred_list, label_list)
score += cur_f1
score /= len(self.name)
print_log('============================================', 'current')
print_log(f'Average F1-score: {score}', 'current')
print_log('============================================', 'current')
print_log('POPE successfully finished evaluating', 'current')
return score