Spaces:

AI-Secure
/

MMDT-radar

Sleeping

App Files Files Community

MMDT-radar / utils /score_extract /ood_agg.py

polaris73

hf upload

94afa8b 5 months ago

raw

history blame contribute delete

6.71 kB

	import json
	import os

	def ood_t2i_agg(model, result_dir):
	"""
	Aggregate scores for the given testing models.

	Parameters:
	model (str): Model name.
	result_dir (str): The path to the directory where the results are stored.

	Returns:
	dict: Output the overall score and the score of subscenarios in the format {"score": float, "subscenarios": dict}.
	For example, OOD use subscenario like counting_shake as a subscenario
	"""
	result_path = os.path.join(result_dir, "ood_t2i_summary.json")
	with open(result_path, "r") as file:
	results = json.load(file)
	agg_scores = {}
	# for model in models:
	# Only leave the model base name
	model = model.split("/")[-1]
	results_shake_fidelity = 0
	results_shake_counting = 0
	results_shake_spatial = 0
	results_shake_color = 0
	results_shake_size = 0
	results_paraphrase_fidelity = 0
	results_paraphrase_counting = 0
	results_paraphrase_spatial = 0
	results_paraphrase_color = 0
	results_paraphrase_size = 0

	for trial_id in [0, 1, 2]:
	results_shake_fidelity += results[model][f'trial_{trial_id}']['fidelity']['Shake_']
	results_shake_counting += results[model][f'trial_{trial_id}']['counting']['Shake_']
	results_shake_spatial += results[model][f'trial_{trial_id}']['spatial']['Shake_']
	results_shake_color += results[model][f'trial_{trial_id}']['color']['Shake_']
	results_shake_size += results[model][f'trial_{trial_id}']['size']['Shake_']
	results_paraphrase_fidelity += results[model][f'trial_{trial_id}']['fidelity']['Paraphrase_']
	results_paraphrase_counting += results[model][f'trial_{trial_id}']['counting']['Paraphrase_']
	results_paraphrase_spatial += results[model][f'trial_{trial_id}']['spatial']['Paraphrase_']
	results_paraphrase_color += results[model][f'trial_{trial_id}']['color']['Paraphrase_']
	results_paraphrase_size += results[model][f'trial_{trial_id}']['size']['Paraphrase_']

	results_shake_fidelity = results_shake_fidelity * 100
	results_shake_fidelity /= 3
	results_shake_counting /= 3
	results_shake_spatial /= 3
	results_shake_color /= 3
	results_shake_size /= 3
	results_shake_attribute = (results_shake_color + results_shake_size) / 2

	results_paraphrase_fidelity = results_paraphrase_fidelity * 100
	results_paraphrase_fidelity /= 3
	results_paraphrase_counting /= 3
	results_paraphrase_spatial /= 3
	results_paraphrase_color /= 3
	results_paraphrase_size /= 3
	results_attribute = (results_paraphrase_color + results_paraphrase_size) / 2

	avg_shake = (results_shake_fidelity + results_shake_counting + results_shake_spatial + results_shake_attribute) / 4
	avg_paraphrase = (results_paraphrase_fidelity + results_paraphrase_counting + results_paraphrase_spatial + results_attribute) / 4
	agg_score = (avg_shake + avg_paraphrase) / 2
	agg_scores["score"] = agg_score
	agg_scores["subscenarios"] = {
	"helpfulness_shake": results_shake_fidelity,
	"counting_shake": results_shake_counting,
	"spatial_shake": results_shake_spatial,
	"attribute_shake": results_shake_attribute,
	"helpfulness_rare": results_paraphrase_fidelity,
	"counting_rare": results_paraphrase_counting,
	"spatial_rare": results_paraphrase_spatial,
	"attribute_rare": results_attribute
	}
	return agg_scores
	# agg_scores[model] = agg_score
	# return agg_scores

	def ood_i2t_agg(model, result_dir):
	"""
	Aggregate scores for the given testing models.

	Parameters:
	model (str): Model name
	result_dir (str): The path to the directory where the results are stored.

	Returns:
	dict: Output the overall score and the score of subscenarios in the format {"score": float, "subscenarios": dict}.
	For example, OOD use subscenario like counting_trans as a subscenario
	"""
	transformations = ["Van_Gogh", "oil_painting", "watercolour_painting"]
	corruptions = [
	"zoom_blur", "gaussian_noise", "pixelate"
	]


	result_path = os.path.join(result_dir, "ood_i2t_summary.json")
	with open(result_path, "r") as file:
	results = json.load(file)

	agg_scores = {}
	# for model in models:
	# Only leave the model base name
	model = model.split("/")[-1]
	identification_corrupt = sum([results[model]['identification'][corrupt]["Score"] for corrupt in corruptions]) / 3
	count_corrupt = sum([results[model]['count'][corrupt]["Score"] for corrupt in corruptions]) / 3
	spatial_corrupt = sum([results[model]['spatial'][corrupt]["Score"] for corrupt in corruptions]) / 3
	attribute_corrupt = sum([results[model]['attribute'][corrupt]["Score"] for corrupt in corruptions]) / 3
	avg_corrupt = (identification_corrupt + count_corrupt + spatial_corrupt + attribute_corrupt) / 4


	identification_transform = sum([results[model]['identification'][transform]["Score"] for transform in transformations]) / 3
	count_transform = sum([results[model]['count'][transform]["Score"] for transform in transformations]) / 3
	spatial_transform = sum([results[model]['spatial'][transform]["Score"] for transform in transformations]) / 3
	attribute_transform = sum([results[model]['attribute'][transform]["Score"] for transform in transformations]) / 3
	avg_transform = (identification_transform + count_transform + spatial_transform + attribute_transform) / 4

	agg_scores["score"] = (avg_corrupt + avg_transform) / 2
	agg_scores["subscenarios"] = {
	"object_corrupt": identification_corrupt,
	"counting_corrupt": count_corrupt,
	"spatial_corrupt": spatial_corrupt,
	"attribute_corrupt": attribute_corrupt,
	"object_transform": identification_transform,
	"counting_transform": count_transform,
	"spatial_transform": spatial_transform,
	"attribute_transform": attribute_transform
	}
	return agg_scores
	# agg_scores[model] = agg_score
	# return agg_scores

	if __name__ == "__main__":
	t2i_models = [ # Average time spent running the following example
	"dall-e-2",
	"dall-e-3",
	"DeepFloyd/IF-I-M-v1.0", # 15.372
	"dreamlike-art/dreamlike-photoreal-2.0", # 3.526
	"prompthero/openjourney-v4", # 4.981
	"stabilityai/stable-diffusion-xl-base-1.0", # 7.463
	]
	i2t_models = [ # Average time spent running the following example
	"gpt-4-vision-preview",
	"gpt-4o-2024-05-13",
	"llava-hf/llava-v1.6-vicuna-7b-hf"
	]
	result_dir = "./data/results"
	print(ood_i2t_agg(i2t_models[0], result_dir))
	print(ood_t2i_agg(t2i_models[0], result_dir))