import json import os import glob import argparse import csv def chatgpt_json(merge_file): # chat results merge_data = merge_file.decode("utf-8") merge_data = merge_data.replace(": true,", ": \"true\",") merge_data = merge_data.replace(": false,", ": \"false\",") merge_data = eval(merge_data) dataset_scores_dict = {} for dataset_name, dataset_results in merge_data.items(): correct, total_nums = 0, 0 for id in dataset_results: for dim in dataset_results[id]: for result in dataset_results[id][dim]: correct += result['rating'] total_nums += 1 dataset_scores_dict[dataset_name] = round(correct / total_nums * 100, 2) # dataset_scores_dict[dataset_name] = round(correct / total_nums , 4) return dataset_scores_dict def compute_scores(merge_file): merge_data = merge_file.decode("utf-8") merge_data = merge_data.replace(": true,", ": \"true\",") merge_data = merge_data.replace(": false,", ": \"false\",") merge_data = merge_data.replace(": null,", ": \"null\",") merge_data = eval(merge_data) dataset_scores_dict = {} total_correct, total_num = 0, 0 eval_dims = ['action', 'speed', 'direction', 'order', 'attribute_change', 'avg'] for dataset_name, dataset_results in merge_data.items(): dataset_correct, dataset_num = {dim: 0 for dim in eval_dims}, {dim: 0 for dim in eval_dims} for id in dataset_results: for dim in dataset_results[id]: for result in dataset_results[id][dim]: dataset_correct['avg'] += result['rating'] dataset_correct[dim] += result['rating'] dataset_num['avg'] += 1 dataset_num[dim] += 1 total_correct += dataset_correct['avg'] total_num += dataset_num['avg'] for dim in eval_dims: dataset_scores_dict[f"{dim}_{dataset_name}"] = round(dataset_correct[dim] / dataset_num[dim] * 100, 2) dataset_scores_dict["avg_all"] = round(total_correct / total_num * 100, 2) # print(dataset_score_dict) # with open(args.score_output_file, 'w', encoding='utf-8') as f: # json.dump(dataset_score_dict, f, indent=2) # print(f'{args.score_output_file} is saved!') # ======================== data = [ ["Avg. All", "Avg. Multi-Choice", "Avg. Yes/No", "Avg. Caption Matching", "Avg. Caption Generation", "Action. Multi-Choice", "Action. Yes/No", "Action. Caption Matching", "Action. Caption Generation", "Direction. Multi-Choice", "Direction. Yes/No", "Direction. Caption Matching", "Direction. Caption Generation", "Speed. Multi-Choice", "Speed. Yes/No", "Speed. Caption Matching", "Speed. Caption Generation", "Event Order. Multi-Choice", "Event Order. Yes/No", "Event Order. Caption Matching", "Event Order. Caption Generation", "Attribute Change. Multi-Choice", "Attribute Change. Yes/No", "Attribute Change. Caption Matching", "Attribute Change. Caption Generation"], [dataset_scores_dict["avg_all"], dataset_scores_dict["avg_multi-choice"], dataset_scores_dict["avg_yes_no"], dataset_scores_dict["avg_caption_matching"], dataset_scores_dict["avg_captioning"], dataset_scores_dict['action_multi-choice'], dataset_scores_dict['action_yes_no'], dataset_scores_dict['action_caption_matching'], dataset_scores_dict['action_captioning'], dataset_scores_dict['direction_multi-choice'], dataset_scores_dict['direction_yes_no'], dataset_scores_dict['direction_caption_matching'], dataset_scores_dict['direction_captioning'], dataset_scores_dict['speed_multi-choice'], dataset_scores_dict['speed_yes_no'], dataset_scores_dict['speed_caption_matching'], dataset_scores_dict['speed_captioning'], dataset_scores_dict['order_multi-choice'], dataset_scores_dict['order_yes_no'], dataset_scores_dict['order_caption_matching'], dataset_scores_dict['order_captioning'], dataset_scores_dict['attribute_change_multi-choice'], dataset_scores_dict['attribute_change_yes_no'], dataset_scores_dict['attribute_change_caption_matching'], dataset_scores_dict['attribute_change_captioning'], ], ] return data