File size: 5,215 Bytes
032e687
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
import os

import pandas as pd
from mmengine.dist import master_only
from PIL import Image

from xtuner.registry import BUILDER
from mmengine.logging import print_log
from .base_eval_dataset import BaseEvalDataset

from .utils import YOrN_Extraction, load_jsonl
from .utils import custom_data_process


def eval_func(pred_list, label_list):
    pos = 1
    neg = 0
    yes_ratio = pred_list.count(1) / len(pred_list)

    TP, TN, FP, FN = 0, 0, 0, 0
    for pred, label in zip(pred_list, label_list):
        if pred == pos and label == pos:
            TP += 1
        elif pred == pos and label == neg:
            FP += 1
        elif pred == neg and label == neg:
            TN += 1
        elif pred == neg and label == pos:
            FN += 1

    print_log('TP\tFP\tTN\tFN\t', 'current')
    print_log(f'{TP}\t{FP}\t{TN}\t{FN}', 'current')

    precision = float(TP) / float(TP + FP)
    recall = float(TP) / float(TP + FN)
    f1 = 2 * precision * recall / (precision + recall)
    acc = (TP + TN) / (TP + TN + FP + FN)
    print_log(f'Accuracy: {acc}', 'current')
    print_log(f'Precision: {precision}', 'current')
    print_log(f'Recall: {recall}', 'current')
    print_log(f'F1 score: {f1}', 'current')
    print_log(f'Yes ratio: {yes_ratio}', 'current')
    return f1


class POPEDataset(BaseEvalDataset):
    METAINFO: dict = dict(name='pope')

    def __init__(self, data_file, coco_val_path, image_processor,
                 pad_image_to_square=True,
                 metainfo=None):
        super().__init__(metainfo)
        if isinstance(data_file, str):
            data_file = [data_file]
        self.raw_data = [load_jsonl(f) for f in data_file]

        self.name = [
            os.path.splitext(os.path.basename(f))[0] for f in data_file
        ]

        self.coco_val_path = coco_val_path
        self.image_processor = BUILDER.build(image_processor)
        self.pad_image_to_square = pad_image_to_square

        self.results_xlsx_path = 'pope-results.xlsx'
        self.data = self.load_data_list()

    def get_image(self, image):
        image = Image.open(os.path.join(self.coco_val_path, image))
        return image

    def __len__(self):
        return len(self.data)

    def load_data_list(self):
        data_list = []
        idx = 0
        for data_idx in range(len(self.raw_data)):
            for sample_idx in range(len(self.raw_data[data_idx])):
                sample = self.raw_data[data_idx][sample_idx]
                index = sample['question_id']
                image_path = sample['image']
                question = sample['text']
                answer = sample['label']
                category = self.name[data_idx]
                assert answer in ['yes', 'no']
                data = {
                    'img_id': idx,
                    'index': index,
                    'img': image_path,
                    'question': question,
                    'answer': answer,
                    'category': category
                }
                data_list.append(data)
                idx += 1
        return data_list

    def __getitem__(self, idx):
        data = self.data[idx]
        data_dict = custom_data_process(self, data)
        return data_dict

    @master_only
    def evaluate(self, result, work_dir, show=True):
        orig_index = [x['img_id'] for x in self.data]
        results = []
        for pred_dict in result:
            index = pred_dict['img_id']
            new_index = orig_index.index(index)
            filtered_rows = self.data[new_index]
            cur_result = {}
            cur_result['question'] = filtered_rows.get('question')
            cur_result['prediction'] = pred_dict['prediction']
            cur_result['category'] = filtered_rows['category']
            cur_result['index'] = filtered_rows.get('index')
            cur_result['answer'] = filtered_rows.get('answer')
            results.append(cur_result)

        results_df = pd.DataFrame(results)
        with pd.ExcelWriter(
                os.path.join(work_dir, self.results_xlsx_path),
                engine='openpyxl') as writer:
            results_df.to_excel(writer, index=False)

        score = 0
        for sub_name in self.name:
            sub_results = [x for x in results if x['category'] == sub_name]
            pred_list = [
                int(YOrN_Extraction(x['prediction']) == 'Yes')
                for x in sub_results
            ]
            label_list = [
                int(YOrN_Extraction(x['answer']) == 'Yes') for x in sub_results
            ]
            print_log('============================================', 'current')
            print_log('Category: {}, # samples: {}'.format(sub_name,
                                                           len(sub_results)), 'current')
            cur_f1 = eval_func(pred_list, label_list)
            score += cur_f1

        score /= len(self.name)
        print_log('============================================', 'current')
        print_log(f'Average F1-score: {score}', 'current')
        print_log('============================================', 'current')
        print_log('POPE successfully finished evaluating', 'current')
        return score