File size: 8,038 Bytes
9fa3d89
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
import json
import argparse
from icecream import ic
import os
import numpy as np


if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument("--results_folder", type=str, default="./playground/data/eval/results")
    parser.add_argument("--ckpt", type=str)
    args = parser.parse_args()

    scores = {}

    dirs = os.listdir(f"{args.results_folder}/{args.ckpt}")
    for dir in dirs:
        if args.ckpt in dir and dir not in args.ckpt:
            break

    
    try:
        with open(f"{args.results_folder}/{args.ckpt}/mmstar/merge_score.json", "r") as f:
            data = json.load(f)
            scores["MMStar"] = round(data.get("final score", 0)*100, 1) if data.get("final score") is not None else None
    except:
        scores["MMStar"] = None
    
    cv_scores = {}

    with open(f"{args.results_folder}/{args.ckpt}/cv-bench/merge_score.json", "r") as f:
        data = json.load(f)
        scores["CV-Bench"] = round(data.get("Overall", 0)*100, 1) if data.get("Overall") is not None else None
        cv_scores["CV-Bench (2D)"] = round(data.get("2D", 0)*100, 1) if data.get("2D") is not None else None
        cv_scores["CV-Bench (3D)"] = round(data.get("3D", 0)*100, 1) if data.get("3D") is not None else None
        cv_scores["CV-Bench (Count)"] = round(data.get("Count", 0)*100, 1) if data.get("Count") is not None else None
        cv_scores["CV-Bench (Depth)"] = round(data.get("Depth", 0)*100, 1) if data.get("Depth") is not None else None
        cv_scores["CV-Bench (Relation)"] = round(data.get("Relation", 0)*100, 1) if data.get("Relation") is not None else None
        cv_scores["CV-Bench (Distance)"] = round(data.get("Distance", 0)*100, 1) if data.get("Distance") is not None else None

    
    with open(f"{args.results_folder}/{args.ckpt}/{dir}/results.json", "r") as f:
        results = json.load(f).get("results", {})
        # scores["MME-Cognition"] = round(results.get("mme", {}).get("mme_cognition_score,none", 0), 1) if results.get("mme", {}).get("mme_cognition_score,none") is not None else None
        # scores["MME-Perception"] = round(results.get("mme", {}).get("mme_percetion_score,none", 0), 1) if results.get("mme", {}).get("mme_percetion_score,none") is not None else None

        scores["Realworld-QA"] = round(results.get("realworldqa", {}).get("exact_match,flexible-extract", 0)*100, 1) if results.get("realworldqa", {}).get("exact_match,flexible-extract") is not None else None
        scores["VizWiz-VQA-Val"] = round(results.get("vizwiz_vqa_val", {}).get("exact_match,none", 0)*100, 1) if results.get("vizwiz_vqa_val", {}).get("exact_match,none") is not None else None
        # scores["SEEDBench-Image"] = round(results.get("seedbench", {}).get("seed_image,none", 0)*100, 1) if results.get("seedbench", {}).get("seed_image,none") is not None else None
        # scores["VQAv2-Val"] = round(results.get("vqav2_val", {}).get("exact_match,none", 0)*100, 1) if results.get("vqav2_val", {}).get("exact_match,none") is not None else None
        
        # scores["Science-QA-Img"] = round(results.get("scienceqa_img", {}).get("exact_match,none", 0)*100, 1) if results.get("scienceqa_img", {}).get("exact_match,none") is not None else None
        scores["MMMU-Val"] = round(results.get("mmmu_val", {}).get("mmmu_acc,none", 0)*100, 1) if results.get("mmmu_val", {}).get("mmmu_acc,none") is not None else None
        # scores["MMBench"] = round(results.get("mmbench_en_dev", {}).get("gpt_eval_score,none", 0), 1) if results.get("mmbench_en_dev", {}).get("gpt_eval_score,none") is not None else None

        # scores["NaturalBench"] = round(results.get("naturalbench", {}).get("mme_score,none", 0)*100, 1) if results.get("naturalbench", {}).get("mme_score,none") is not None else None

        # scores["GQA"] = round(results.get("gqa", {}).get("exact_match,none", 0)*100, 1) if results.get("gqa", {}).get("exact_match,none") is not None else None
        scores["POPE"] = round(results.get("pope", {}).get("pope_accuracy,none", 0)*100, 1) if results.get("pope", {}).get("pope_accuracy,none") is not None else None
        scores["MMVet"] = round(results.get("mmvet", {}).get("gpt_eval_score", 0)*100, 1) if results.get("mmvet", {}).get("gpt_eval_score") is not None else None
        scores["OK-VQA"] = round(results.get("ok_vqa", {}).get("exact_match,none", 0)*100, 1) if results.get("ok_vqa", {}).get("exact_match,none") is not None else None
        # scores["ChartQA"] = round(results.get("chartqa", {}).get("relaxed_overall,none", 0)*100, 1) if results.get("chartqa", {}).get("relaxed_overall,none") is not None else None
        # scores["DocVQA"] = round(results.get("docvqa_val", {}).get("anls,none", 0)*100, 1) if results.get("docvqa_val", {}).get("anls,none") is not None else None
        # scores["TextVQA"] = round(results.get("textvqa_val", {}).get("exact_match,none", 0)*100, 1) if results.get("textvqa_val", {}).get("exact_match,none") is not None else None
    
    try:
        with open(f"{args.results_folder}/{args.ckpt}/mmvp/merge_score.json", "r") as f:
            data = json.load(f)
            scores["MMVP"] = round(data.get("mmvp", 0)*100, 1) if data.get("mmvp") is not None else None
    except:
        scores["MMVP"] = None

    keys = list(scores.keys())
    str_scores = [str(scores[key]) if scores[key] is not None else 'None' for key in keys]

    abl_keys = ["CV-Bench", "MMStar", "VizWiz-VQA-Val", "MMVet", "MMVP", "MMMU-Val"]
    
    abl_scores = [scores[key] for key in abl_keys if scores[key] is not None]

    small_abl_keys = ["CV-Bench", "MMStar", "OK-VQA", "MMMU-Val"]
    small_abl_scores = [scores[key] for key in small_abl_keys if scores[key] is not None]

    cv_bench_keys = ["CV-Bench (2D)", "CV-Bench (3D)", "CV-Bench (Count)", "CV-Bench (Depth)", "CV-Bench (Relation)", "CV-Bench (Distance)"]
    cv_bench_scores = [cv_scores[key] for key in cv_bench_keys if cv_scores[key] is not None]
    
    # cat_scores = {}
    # if os.path.exists(f"{args.results_folder}/{args.ckpt}/categorized_scores.json"):
    #     with open(f"{args.results_folder}/{args.ckpt}/categorized_scores.json", "r") as f:
    #         cat_scores = json.load(f)
    #         cat_scores.pop("Both")
    
    print("\n====================All-Scores===========================================")
    print(" & ".join(keys))
    print(" & ".join(str_scores))
    if abl_scores:
        print("\n====================Abl-Scores===========================================")
        print(" & ".join(abl_keys))
        print(" & ".join([str(a) for a in abl_scores]))
        print(f"Ablation Avg: {round(np.mean(abl_scores), 1)}")
    else:
        print("Ablation Avg: None")
    
    if small_abl_scores:
        print("\n====================Small-Abl-Scores===========================================")
        print(" & ".join(small_abl_keys))
        print(" & ".join([str(a) for a in small_abl_scores]))
        print(f"Small-Ablation Avg: {round(np.mean(small_abl_scores), 1)}")
    else:
        print("Small-Ablation Avg: None")
    
    if cv_bench_scores:
        print("\n====================CV-Bench-Scores===========================================")
        print(" & ".join(cv_bench_keys))
        print(" & ".join([str(c) for c in cv_bench_scores]))
        print(f"CV-Bench Overall: {round(np.mean(cv_bench_scores[:2]), 1)}")
    else:
        print("CV-Bench Avg: None")
    
    # if cat_scores is not None:
    #     print("\n====================Categorized-Scores===========================================")
    #     cats = []
    #     class_scores = []
    #     benches = []
    #     for k, v in cat_scores.items():
    #         cats.append(k)
    #         for bench, score in v.items():
    #             benches.append(bench)
    #             class_scores.append(round(score*100, 1))
    #     print(" & ".join(cats))
    #     print(" & ".join(benches))
    #     print(" & ".join([str(c) for c in class_scores]))
    # print("================================================================")