Spaces:

BK-Lee
/

Meteor

Sleeping

Meteor / loader /create_eval_dataset.py

6957169 7 months ago

17.2 kB

	import os
	import json
	import math
	import glob
	from config import *
	from PIL import Image
	import pandas as pd
	import pyarrow.parquet as pq
	import torch.nn.functional as F
	from eval.utils import *
	from torch.utils.data import Dataset
	from torchvision.transforms.functional import pil_to_tensor

	class CreateEvalDataset(Dataset):
	def __init__(self):
	super(CreateEvalDataset, self).__init__()

	"""
	Eval Datasets

	- VQAv2
	- GQA
	- SQA-IMG
	- VizWiz
	- TextVQA
	- POPE
	- MME
	- MMBench
	- MMBench-CN
	- QBench
	- MM-Vet
	- MMMU
	- MathVista
	- AI2D
	- HallusionBench
	- ChartQA
	- SEED
	- LLaVA Wild
	- BLINK
	- MathVerse

	"""

	# dataset root path
	self.dataset_root_path = DATASET_ROOT

	# load test data
	pre_vqav2 = json.load(open(os.path.join(DATASET_ROOT, VQAV2)))
	pre_gqa = json.load(open(os.path.join(DATASET_ROOT, GQA)))
	pre_sqa = json.load(open(os.path.join(DATASET_ROOT, SQA)))
	pre_sqa_split = json.load(open(os.path.join(DATASET_ROOT, SQA_SPLIT)))
	pre_vizwiz = json.load(open(os.path.join(DATASET_ROOT, VIZWIZ)))
	pre_textvqa = json.load(open(os.path.join(DATASET_ROOT, TEXTVQA)))
	pre_textvqa_annotations = json.load(open(os.path.join(DATASET_ROOT, TEXTVQA_ANNOTATIONS)))
	pre_pope_popular = pd.read_json(os.path.join(DATASET_ROOT, POPE_POPULAR), lines=True)
	pre_pope_adversarial= pd.read_json(os.path.join(DATASET_ROOT, POPE_ADVERSARIAL), lines=True)
	pre_pope_random = pd.read_json(os.path.join(DATASET_ROOT, POPE_RANDOM), lines=True)
	pre_mme = json.load(open(os.path.join(DATASET_ROOT, MME)))
	pre_mmbench = pd.read_table(os.path.join(DATASET_ROOT, MMBENCH))
	pre_mmbench_dev = pd.read_table(os.path.join(DATASET_ROOT, MMBENCH_DEV))
	pre_mmbench_cn = pd.read_table(os.path.join(DATASET_ROOT, MMBENCH_CN))
	pre_mmbench_cn_dev = pd.read_table(os.path.join(DATASET_ROOT, MMBENCH_CN_DEV))
	pre_qbench = json.load(open(os.path.join(DATASET_ROOT, QBENCH)))
	pre_qbench_cn = json.load(open(os.path.join(DATASET_ROOT, QBENCH_CN)))
	pre_mmvet = json.load(open(os.path.join(DATASET_ROOT, MMVET)))
	mmmu_files = glob.glob(os.path.join(DATASET_ROOT, MMMU))
	pre_mmmu = [pq.read_pandas(os.path.join(DATASET_ROOT, mf)).to_pandas() for mf in mmmu_files]
	pre_mathvista1 = pq.read_pandas(os.path.join(DATASET_ROOT, MATHVISTA)).to_pandas()
	pre_ai2d = json.load(open(os.path.join(DATASET_ROOT, AI2D)))
	pre_hallusionbench = json.load(open(os.path.join(DATASET_ROOT, HALLUSIONBENCH)))
	pre_chartqa = json.load(open(os.path.join(DATASET_ROOT, CHARTQA)))
	pre_seed = json.load(open(os.path.join(DATASET_ROOT, SEED)))
	pre_llava = pd.read_json(os.path.join(DATASET_ROOT, LLAVA), lines=True)
	# pre_blink = json.load(open(os.path.join(DATASET_ROOT, BLINK)))
	pre_mathverse = json.load(open(os.path.join(DATASET_ROOT, MATHVERSE)))
	pre_mathverse_text_only = json.load(open(os.path.join(DATASET_ROOT, MATHVERSE_TEXT_ONLY)))
	pre_mmstar = pq.read_pandas(os.path.join(DATASET_ROOT, MMSTAR)).to_pandas()

	# data filtering
	vqav2 = self.vqav2_filtering(pre_vqav2)
	gqa = self.gqa_filtering(pre_gqa)
	sqa = self.sqa_filtering(pre_sqa, pre_sqa_split)
	vizwiz = self.vizwiz_filtering(pre_vizwiz)
	textvqa = self.textvqa_filtering(pre_textvqa, pre_textvqa_annotations)
	pope = self.pope_filtering([pre_pope_popular, pre_pope_adversarial, pre_pope_random])
	mme = self.mme_filtering(pre_mme)
	mmbench = self.mmbench_filtering(pre_mmbench)
	mmbench_dev = self.mmbench_filtering(pre_mmbench_dev)
	mmbench_cn = self.mmbench_filtering(pre_mmbench_cn)
	mmbench_cn_dev = self.mmbench_filtering(pre_mmbench_cn_dev)
	qbench = self.qbench_filtering(pre_qbench)
	qbench_cn = self.qbench_filtering(pre_qbench_cn)
	mmvet = self.mmvet_filtering(pre_mmvet)
	mmmu = self.mmmu_filtering(pre_mmmu)
	mathvista = self.mathvista_filtering(pre_mathvista1)
	ai2d = self.ai2d_filtering(pre_ai2d)
	hallusionbench = self.hallusionbench_filtering(pre_hallusionbench)
	chartqa = self.chartqa_filtering(pre_chartqa)
	seed = self.seed_filtering(pre_seed)
	llava = self.llava_filtering(pre_llava)
	# blink = self.blink_filtering(pre_blink)
	mathverse = self.mathverse_filtering(pre_mathverse, pre_mathverse_text_only)
	mmstar = self.mmstar_filtering(pre_mmstar)

	# merging
	self.data = {
	'vqav2': vqav2,
	'gqa': gqa,
	'sqa':sqa,
	'vizwiz': vizwiz,
	'textvqa': textvqa,
	'pope': pope,
	'mme': mme,
	'mmbench': mmbench,
	'mmbench_dev': mmbench_dev,
	'mmbench_cn': mmbench_cn,
	'mmbench_cn_dev': mmbench_cn_dev,
	'qbench': qbench,
	'mm-vet': mmvet,
	'mmmu': mmmu,
	'mathvista': mathvista,
	'ai2d': ai2d,
	'hallusionbench': hallusionbench,
	'chartqa': chartqa,
	'seed': seed,
	'llava': llava,
	# 'blink': blink,
	'mathverse' : mathverse,
	'mmstar' : mmstar
	}


	def vqav2_filtering(self, pre_data):
	data = []
	for x in pre_data['questions']:
	data.append({'image': f"VQAv2/test2015/COCO_test2015_{x['image_id']:012d}.jpg",
	'question': x['question'],
	'id': x['question_id']})
	return data

	def gqa_filtering(self, pre_data):
	data = []
	for qid, x in pre_data.items():
	data.append({'image': f"gqa/images/{x['imageId']}.jpg",
	'question': x['question'],
	'id': qid})
	return data

	def sqa_filtering(self, pre_data, pre_sqa_split):
	data = []
	questions = {idx: pre_data[idx] for idx in pre_sqa_split['test']}
	for qid, x in questions.items():
	if x['image'] is not None:
	choices = '\n'.join(f"{chr(ord('A') + i)}. {choice}" for i, choice in enumerate(x['choices']))
	question = '\n'.join([x['hint'], x['question'], choices])
	data.append({'image': f"ScienceQA/images/test/{qid}/image.png",
	'question': question,
	'id': qid,
	'candidates': x['choices'],
	'gt': x['answer']})
	return data

	def vizwiz_filtering(self, pre_data):
	data = []
	for qid, x in enumerate(pre_data):
	data.append({'image': f"VizWiz/test/{x['image']}",
	'question': x['question'],
	'id': qid})
	return data

	def textvqa_filtering(self, pre_data, annotations):
	data = []
	for x, answer in zip(pre_data, annotations['data']):
	data.append({'image': f"TextVQA/train_images/{x['image']}",
	'question': x['text'],
	'id': x['question_id'],
	'gt': answer['answers']})
	return data

	def pope_filtering(self, pre_data):
	data = []
	categories = ['adversarial', 'popular', 'random']
	for category, split in zip(categories, pre_data):
	for _, x in split.iterrows():
	data.append({'image': f"coco2014/val2014/{x['image']}",
	'question': x['text'],
	'id': x['question_id'],
	'category': category})
	return data

	def mme_filtering(self, pre_data):
	data = []
	for x in pre_data:
	data.append({'image': f"MME_Benchmark_release_version/{x['image']}",
	'question': x['text'],
	'id': x['question_id'],
	'category': x['category']})
	return data

	def mmbench_filtering(self, pre_data):
	data = []
	for _, x in pre_data.iterrows():
	options = ['A', 'B', 'C', 'D']
	choice_list = [choice for choice in options if not self.is_none(x[choice])]
	choices = '\n'.join(f"{chr(ord('A') + i)}. {x[choice]}" for i, choice in enumerate(choice_list))
	question = '\n'.join([x['question'], choices])

	if not self.is_none(x['hint']):
	question = '\n'.join([x['hint'], question])

	data.append({'image': x['image'],
	'question': question,
	'id': x['index'],
	'answer': x['answer'] if 'answer' in x else None})
	return data

	def qbench_filtering(self, pre_data):
	data = []
	for qid, x in enumerate(pre_data):
	choices = '\n'.join(f"{chr(ord('A') + i)}. {choice}" for i, choice in enumerate(x['candidates']))
	question = '\n'.join([x['question'], choices])
	data.append({'image': f"LLVisionQA-QBench/images/{x['img_path']}",
	'question': question,
	'id': qid,
	'candidates': x['candidates'],
	'gt': x['correct_ans']})
	return data

	def mmvet_filtering(self, pre_data):
	data = []
	for qid, x in pre_data.items():
	data.append({'image': f"mm-vet/images/{x['imagename']}",
	'question': x['question'],
	'id': qid,
	'gt': x['answer'],
	'capability': x['capability']})
	return data

	def mmmu_filtering(self, pre_data):
	data = []
	for split in pre_data:
	for _, x in split.iterrows():
	index2ans, all_choices = self.get_multi_choice_info(eval(x['options']))
	choices = ' '.join([f"{k}. {v}" for k,v in index2ans.items()])
	question = '\n'.join([x['question'], choices])
	num_images = count_unique_image_tokens(question)
	data.append({'images': [x[f"image_{i+1}"]['bytes'] for i in range(num_images)],
	'question': replace_image_tokens(question),
	'id': x['id'],
	'question_type': x['question_type'],
	'gt': x['answer'],
	'index2ans': index2ans,
	'all_choices': all_choices})
	return data

	def mathvista_filtering(self, pre_data):
	data = []
	for _, x in pre_data.iterrows():
	skills = x['metadata']['skills'].tolist()
	x['metadata']['skills'] = skills
	choices = x['choices'].tolist() if x['choices'] is not None else None
	data.append({'image': f"MathVista/{x['image']}",
	'question': x['query'],
	'question_type': x['question_type'],
	'answer': x['answer'],
	'answer_type': x['answer_type'],
	'choices': choices,
	'metadata': x['metadata'],
	'precision': x['precision'],
	'id': x['pid']})
	return data

	def ai2d_filtering(self, pre_data):
	data = []
	for x in pre_data:
	choices = ' '.join(f"{chr(ord('A') + i)}. {choice}" for i, choice in enumerate(x["metadata"]["answerTexts"]))
	question = '\n'.join([x['question'], choices])
	image = f"ai2d/abc_images/{x['imageName']}" if x['metadata']['abcLabel'] else f"ai2d/images/{x['imageName']}"
	data.append({'image': image,
	'question': question,
	'id': x['metadata']['questionId'],
	'gt': x['metadata']['correctAnswer']})
	return data

	def hallusionbench_filtering(self, pre_data):
	data = []
	for qid, x in enumerate(pre_data):
	if x['filename'] is None:
	img_path = ""
	question = x['question']
	else:
	img_path = f"HallusionBench/hallusion_bench/{x['filename'][2:]}".format()
	question = "<image>" + x['question']
	data.append({'image': img_path,
	'question': question,
	'id': qid,
	'gt': x['gt_answer']})
	return data

	def chartqa_filtering(self, pre_data):
	data = []
	for qid, x in enumerate(pre_data):
	data.append({'image': f"chartqa/test/png/{x['imgname']}",
	'question': x['query'],
	'id': x['imgname'],
	'gt': x['label']})
	return data

	def seed_filtering(self, pre_data):
	data = []
	for x in pre_data['questions']:
	if x['data_type'] != 'image':
	continue
	choice_list = [key for key in x.keys() if 'choice' in key]
	choices = '\n'.join(f"{chr(ord('A') + i)}. {x[choice]}" for i, choice in enumerate(choice_list))
	question = '\n'.join([x['question'], choices])
	data.append({'image': f"SEED-Bench/SEED-Bench-image/{x['data_id']}",
	'question': question,
	'id': x['question_id'],
	'question_type': x['question_type_id'],
	'gt': x['answer']})
	return data

	def llava_filtering(self, pre_data):
	data = []
	for _, x in pre_data.iterrows():
	data.append({'image': f"llava-bench-in-the-wild/images/{x['image']}",
	'question': x['text'],
	'id': x['question_id'],
	"category": x['category']})
	return data

	def blink_filtering(self, pre_data):
	data = []
	# TODO
	return data

	def mathverse_filtering(self, pre_data, pre_data_text_only):
	data = []
	for x in pre_data:
	data.append({'image': f"MathVerse/images/{x['image']}",
	'question': "<image>" + x['query_wo'],
	# 'question': "<image>" + x['query_cot'],
	'id': x['sample_index'],
	'problem_index': x['problem_index'],
	'problem_version': x['problem_version'],
	'gt' : x['answer'],
	'question_type': x['question_type'],
	'metadata' : x['metadata'],
	'query_cot' : x['query_cot'],
	'origin_question': x['question']
	})
	offset = len(pre_data)
	for x in pre_data_text_only:
	data.append({'image': "",
	'question': x['query_wo'],
	# 'question': x['query_cot'],
	'id': str(int(x['sample_index']) + offset),
	'problem_index': x['problem_index'],
	'problem_version': x['problem_version'],
	'gt' : x['answer'],
	'question_type': x['question_type'],
	'metadata' : x['metadata'],
	'query_cot' : x['query_cot'],
	'origin_question': x['question']
	})

	return data

	def is_none(self, value):
	return type(value) is float and math.isnan(value)

	def get_options(self, row, options):
	parsed_options = []
	for option in options:
	option_value = row[option]
	if self.is_none(option_value):
	break
	parsed_options.append(option_value)
	return parsed_options

	def __len__(self):
	return len(self.data)

	def get_multi_choice_info(self, options):
	"""
	Given the list of options for multiple choice question
	Return the index2ans and all_choices
	"""

	start_chr = 'A'
	all_choices = []
	index2ans = {}
	for i, option in enumerate(options):
	index2ans[chr(ord(start_chr) + i)] = option
	all_choices.append(chr(ord(start_chr) + i))

	return index2ans, all_choices

	def mmstar_filtering(self, pre_data):
	data = []
	for _, x in pre_data.iterrows():
	data.append({'id' : x['index'],
	'question': x['question'],
	'answer': x['answer'],
	'category': x['category'],
	'l2_category': x['l2_category'],
	# 'bench': x['bench'],
	'image': x['image']})
	return data