Meteor / loader /create_eval_dataset.py
BK-Lee's picture
v1
6957169
raw
history blame
17.2 kB
import os
import json
import math
import glob
from config import *
from PIL import Image
import pandas as pd
import pyarrow.parquet as pq
import torch.nn.functional as F
from eval.utils import *
from torch.utils.data import Dataset
from torchvision.transforms.functional import pil_to_tensor
class CreateEvalDataset(Dataset):
def __init__(self):
super(CreateEvalDataset, self).__init__()
"""
Eval Datasets
- VQAv2
- GQA
- SQA-IMG
- VizWiz
- TextVQA
- POPE
- MME
- MMBench
- MMBench-CN
- QBench
- MM-Vet
- MMMU
- MathVista
- AI2D
- HallusionBench
- ChartQA
- SEED
- LLaVA Wild
- BLINK
- MathVerse
"""
# dataset root path
self.dataset_root_path = DATASET_ROOT
# load test data
pre_vqav2 = json.load(open(os.path.join(DATASET_ROOT, VQAV2)))
pre_gqa = json.load(open(os.path.join(DATASET_ROOT, GQA)))
pre_sqa = json.load(open(os.path.join(DATASET_ROOT, SQA)))
pre_sqa_split = json.load(open(os.path.join(DATASET_ROOT, SQA_SPLIT)))
pre_vizwiz = json.load(open(os.path.join(DATASET_ROOT, VIZWIZ)))
pre_textvqa = json.load(open(os.path.join(DATASET_ROOT, TEXTVQA)))
pre_textvqa_annotations = json.load(open(os.path.join(DATASET_ROOT, TEXTVQA_ANNOTATIONS)))
pre_pope_popular = pd.read_json(os.path.join(DATASET_ROOT, POPE_POPULAR), lines=True)
pre_pope_adversarial= pd.read_json(os.path.join(DATASET_ROOT, POPE_ADVERSARIAL), lines=True)
pre_pope_random = pd.read_json(os.path.join(DATASET_ROOT, POPE_RANDOM), lines=True)
pre_mme = json.load(open(os.path.join(DATASET_ROOT, MME)))
pre_mmbench = pd.read_table(os.path.join(DATASET_ROOT, MMBENCH))
pre_mmbench_dev = pd.read_table(os.path.join(DATASET_ROOT, MMBENCH_DEV))
pre_mmbench_cn = pd.read_table(os.path.join(DATASET_ROOT, MMBENCH_CN))
pre_mmbench_cn_dev = pd.read_table(os.path.join(DATASET_ROOT, MMBENCH_CN_DEV))
pre_qbench = json.load(open(os.path.join(DATASET_ROOT, QBENCH)))
pre_qbench_cn = json.load(open(os.path.join(DATASET_ROOT, QBENCH_CN)))
pre_mmvet = json.load(open(os.path.join(DATASET_ROOT, MMVET)))
mmmu_files = glob.glob(os.path.join(DATASET_ROOT, MMMU))
pre_mmmu = [pq.read_pandas(os.path.join(DATASET_ROOT, mf)).to_pandas() for mf in mmmu_files]
pre_mathvista1 = pq.read_pandas(os.path.join(DATASET_ROOT, MATHVISTA)).to_pandas()
pre_ai2d = json.load(open(os.path.join(DATASET_ROOT, AI2D)))
pre_hallusionbench = json.load(open(os.path.join(DATASET_ROOT, HALLUSIONBENCH)))
pre_chartqa = json.load(open(os.path.join(DATASET_ROOT, CHARTQA)))
pre_seed = json.load(open(os.path.join(DATASET_ROOT, SEED)))
pre_llava = pd.read_json(os.path.join(DATASET_ROOT, LLAVA), lines=True)
# pre_blink = json.load(open(os.path.join(DATASET_ROOT, BLINK)))
pre_mathverse = json.load(open(os.path.join(DATASET_ROOT, MATHVERSE)))
pre_mathverse_text_only = json.load(open(os.path.join(DATASET_ROOT, MATHVERSE_TEXT_ONLY)))
pre_mmstar = pq.read_pandas(os.path.join(DATASET_ROOT, MMSTAR)).to_pandas()
# data filtering
vqav2 = self.vqav2_filtering(pre_vqav2)
gqa = self.gqa_filtering(pre_gqa)
sqa = self.sqa_filtering(pre_sqa, pre_sqa_split)
vizwiz = self.vizwiz_filtering(pre_vizwiz)
textvqa = self.textvqa_filtering(pre_textvqa, pre_textvqa_annotations)
pope = self.pope_filtering([pre_pope_popular, pre_pope_adversarial, pre_pope_random])
mme = self.mme_filtering(pre_mme)
mmbench = self.mmbench_filtering(pre_mmbench)
mmbench_dev = self.mmbench_filtering(pre_mmbench_dev)
mmbench_cn = self.mmbench_filtering(pre_mmbench_cn)
mmbench_cn_dev = self.mmbench_filtering(pre_mmbench_cn_dev)
qbench = self.qbench_filtering(pre_qbench)
qbench_cn = self.qbench_filtering(pre_qbench_cn)
mmvet = self.mmvet_filtering(pre_mmvet)
mmmu = self.mmmu_filtering(pre_mmmu)
mathvista = self.mathvista_filtering(pre_mathvista1)
ai2d = self.ai2d_filtering(pre_ai2d)
hallusionbench = self.hallusionbench_filtering(pre_hallusionbench)
chartqa = self.chartqa_filtering(pre_chartqa)
seed = self.seed_filtering(pre_seed)
llava = self.llava_filtering(pre_llava)
# blink = self.blink_filtering(pre_blink)
mathverse = self.mathverse_filtering(pre_mathverse, pre_mathverse_text_only)
mmstar = self.mmstar_filtering(pre_mmstar)
# merging
self.data = {
'vqav2': vqav2,
'gqa': gqa,
'sqa':sqa,
'vizwiz': vizwiz,
'textvqa': textvqa,
'pope': pope,
'mme': mme,
'mmbench': mmbench,
'mmbench_dev': mmbench_dev,
'mmbench_cn': mmbench_cn,
'mmbench_cn_dev': mmbench_cn_dev,
'qbench': qbench,
'mm-vet': mmvet,
'mmmu': mmmu,
'mathvista': mathvista,
'ai2d': ai2d,
'hallusionbench': hallusionbench,
'chartqa': chartqa,
'seed': seed,
'llava': llava,
# 'blink': blink,
'mathverse' : mathverse,
'mmstar' : mmstar
}
def vqav2_filtering(self, pre_data):
data = []
for x in pre_data['questions']:
data.append({'image': f"VQAv2/test2015/COCO_test2015_{x['image_id']:012d}.jpg",
'question': x['question'],
'id': x['question_id']})
return data
def gqa_filtering(self, pre_data):
data = []
for qid, x in pre_data.items():
data.append({'image': f"gqa/images/{x['imageId']}.jpg",
'question': x['question'],
'id': qid})
return data
def sqa_filtering(self, pre_data, pre_sqa_split):
data = []
questions = {idx: pre_data[idx] for idx in pre_sqa_split['test']}
for qid, x in questions.items():
if x['image'] is not None:
choices = '\n'.join(f"{chr(ord('A') + i)}. {choice}" for i, choice in enumerate(x['choices']))
question = '\n'.join([x['hint'], x['question'], choices])
data.append({'image': f"ScienceQA/images/test/{qid}/image.png",
'question': question,
'id': qid,
'candidates': x['choices'],
'gt': x['answer']})
return data
def vizwiz_filtering(self, pre_data):
data = []
for qid, x in enumerate(pre_data):
data.append({'image': f"VizWiz/test/{x['image']}",
'question': x['question'],
'id': qid})
return data
def textvqa_filtering(self, pre_data, annotations):
data = []
for x, answer in zip(pre_data, annotations['data']):
data.append({'image': f"TextVQA/train_images/{x['image']}",
'question': x['text'],
'id': x['question_id'],
'gt': answer['answers']})
return data
def pope_filtering(self, pre_data):
data = []
categories = ['adversarial', 'popular', 'random']
for category, split in zip(categories, pre_data):
for _, x in split.iterrows():
data.append({'image': f"coco2014/val2014/{x['image']}",
'question': x['text'],
'id': x['question_id'],
'category': category})
return data
def mme_filtering(self, pre_data):
data = []
for x in pre_data:
data.append({'image': f"MME_Benchmark_release_version/{x['image']}",
'question': x['text'],
'id': x['question_id'],
'category': x['category']})
return data
def mmbench_filtering(self, pre_data):
data = []
for _, x in pre_data.iterrows():
options = ['A', 'B', 'C', 'D']
choice_list = [choice for choice in options if not self.is_none(x[choice])]
choices = '\n'.join(f"{chr(ord('A') + i)}. {x[choice]}" for i, choice in enumerate(choice_list))
question = '\n'.join([x['question'], choices])
if not self.is_none(x['hint']):
question = '\n'.join([x['hint'], question])
data.append({'image': x['image'],
'question': question,
'id': x['index'],
'answer': x['answer'] if 'answer' in x else None})
return data
def qbench_filtering(self, pre_data):
data = []
for qid, x in enumerate(pre_data):
choices = '\n'.join(f"{chr(ord('A') + i)}. {choice}" for i, choice in enumerate(x['candidates']))
question = '\n'.join([x['question'], choices])
data.append({'image': f"LLVisionQA-QBench/images/{x['img_path']}",
'question': question,
'id': qid,
'candidates': x['candidates'],
'gt': x['correct_ans']})
return data
def mmvet_filtering(self, pre_data):
data = []
for qid, x in pre_data.items():
data.append({'image': f"mm-vet/images/{x['imagename']}",
'question': x['question'],
'id': qid,
'gt': x['answer'],
'capability': x['capability']})
return data
def mmmu_filtering(self, pre_data):
data = []
for split in pre_data:
for _, x in split.iterrows():
index2ans, all_choices = self.get_multi_choice_info(eval(x['options']))
choices = ' '.join([f"{k}. {v}" for k,v in index2ans.items()])
question = '\n'.join([x['question'], choices])
num_images = count_unique_image_tokens(question)
data.append({'images': [x[f"image_{i+1}"]['bytes'] for i in range(num_images)],
'question': replace_image_tokens(question),
'id': x['id'],
'question_type': x['question_type'],
'gt': x['answer'],
'index2ans': index2ans,
'all_choices': all_choices})
return data
def mathvista_filtering(self, pre_data):
data = []
for _, x in pre_data.iterrows():
skills = x['metadata']['skills'].tolist()
x['metadata']['skills'] = skills
choices = x['choices'].tolist() if x['choices'] is not None else None
data.append({'image': f"MathVista/{x['image']}",
'question': x['query'],
'question_type': x['question_type'],
'answer': x['answer'],
'answer_type': x['answer_type'],
'choices': choices,
'metadata': x['metadata'],
'precision': x['precision'],
'id': x['pid']})
return data
def ai2d_filtering(self, pre_data):
data = []
for x in pre_data:
choices = ' '.join(f"{chr(ord('A') + i)}. {choice}" for i, choice in enumerate(x["metadata"]["answerTexts"]))
question = '\n'.join([x['question'], choices])
image = f"ai2d/abc_images/{x['imageName']}" if x['metadata']['abcLabel'] else f"ai2d/images/{x['imageName']}"
data.append({'image': image,
'question': question,
'id': x['metadata']['questionId'],
'gt': x['metadata']['correctAnswer']})
return data
def hallusionbench_filtering(self, pre_data):
data = []
for qid, x in enumerate(pre_data):
if x['filename'] is None:
img_path = ""
question = x['question']
else:
img_path = f"HallusionBench/hallusion_bench/{x['filename'][2:]}".format()
question = "<image>" + x['question']
data.append({'image': img_path,
'question': question,
'id': qid,
'gt': x['gt_answer']})
return data
def chartqa_filtering(self, pre_data):
data = []
for qid, x in enumerate(pre_data):
data.append({'image': f"chartqa/test/png/{x['imgname']}",
'question': x['query'],
'id': x['imgname'],
'gt': x['label']})
return data
def seed_filtering(self, pre_data):
data = []
for x in pre_data['questions']:
if x['data_type'] != 'image':
continue
choice_list = [key for key in x.keys() if 'choice' in key]
choices = '\n'.join(f"{chr(ord('A') + i)}. {x[choice]}" for i, choice in enumerate(choice_list))
question = '\n'.join([x['question'], choices])
data.append({'image': f"SEED-Bench/SEED-Bench-image/{x['data_id']}",
'question': question,
'id': x['question_id'],
'question_type': x['question_type_id'],
'gt': x['answer']})
return data
def llava_filtering(self, pre_data):
data = []
for _, x in pre_data.iterrows():
data.append({'image': f"llava-bench-in-the-wild/images/{x['image']}",
'question': x['text'],
'id': x['question_id'],
"category": x['category']})
return data
def blink_filtering(self, pre_data):
data = []
# TODO
return data
def mathverse_filtering(self, pre_data, pre_data_text_only):
data = []
for x in pre_data:
data.append({'image': f"MathVerse/images/{x['image']}",
'question': "<image>" + x['query_wo'],
# 'question': "<image>" + x['query_cot'],
'id': x['sample_index'],
'problem_index': x['problem_index'],
'problem_version': x['problem_version'],
'gt' : x['answer'],
'question_type': x['question_type'],
'metadata' : x['metadata'],
'query_cot' : x['query_cot'],
'origin_question': x['question']
})
offset = len(pre_data)
for x in pre_data_text_only:
data.append({'image': "",
'question': x['query_wo'],
# 'question': x['query_cot'],
'id': str(int(x['sample_index']) + offset),
'problem_index': x['problem_index'],
'problem_version': x['problem_version'],
'gt' : x['answer'],
'question_type': x['question_type'],
'metadata' : x['metadata'],
'query_cot' : x['query_cot'],
'origin_question': x['question']
})
return data
def is_none(self, value):
return type(value) is float and math.isnan(value)
def get_options(self, row, options):
parsed_options = []
for option in options:
option_value = row[option]
if self.is_none(option_value):
break
parsed_options.append(option_value)
return parsed_options
def __len__(self):
return len(self.data)
def get_multi_choice_info(self, options):
"""
Given the list of options for multiple choice question
Return the index2ans and all_choices
"""
start_chr = 'A'
all_choices = []
index2ans = {}
for i, option in enumerate(options):
index2ans[chr(ord(start_chr) + i)] = option
all_choices.append(chr(ord(start_chr) + i))
return index2ans, all_choices
def mmstar_filtering(self, pre_data):
data = []
for _, x in pre_data.iterrows():
data.append({'id' : x['index'],
'question': x['question'],
'answer': x['answer'],
'category': x['category'],
'l2_category': x['l2_category'],
# 'bench': x['bench'],
'image': x['image']})
return data