File size: 5,361 Bytes
19d93fe
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
import json
import os
from typing import Any, Dict

import pandas as pd
from huggingface_hub import HfApi, hf_hub_download, metadata_load

from .dataset_handler import DEPRECATED_VIDORE_2_DATASETS_KEYWORDS, DEPRECATED_VIDORE_DATASETS_KEYWORDS, deprecated_get_datasets_nickname

BLOCKLIST = ["impactframes"]


class DeprecatedModelHandler:
    def __init__(self, model_infos_path="model_infos.json"):
        self.api = HfApi()
        self.model_infos_path = model_infos_path
        self.model_infos = self._load_model_infos()

    def _load_model_infos(self) -> Dict:
        if os.path.exists(self.model_infos_path):
            with open(self.model_infos_path) as f:
                return json.load(f)
        return {}

    def _save_model_infos(self):
        with open(self.model_infos_path, "w") as f:
            json.dump(self.model_infos, f)

    def _are_results_in_new_vidore_format(self, results: Dict[str, Any]) -> bool:
        return "metadata" in results and "metrics" in results

    def _is_baseline_repo(self, repo_id: str) -> bool:
        return repo_id == "vidore/baseline-results"

    def sanitize_model_name(self, model_name):
        return model_name.replace("/", "_").replace(".", "-thisisapoint-")

    def fuze_model_infos(self, model_name, results):
        for dataset, metrics in results.items():
            if dataset not in self.model_infos[model_name]["results"].keys():
                self.model_infos[model_name]["results"][dataset] = metrics
            else:
                continue

    def get_vidore_data(self, metric="ndcg_at_5"):
        models = self.api.list_models(filter="vidore")
        repositories = [model.modelId for model in models]  # type: ignore

        # Sort repositories to process non-baseline repos first (to prioritize their results)
        repositories.sort(key=lambda x: self._is_baseline_repo(x))

        for repo_id in repositories:
            org_name = repo_id.split("/")[0]
            if org_name in BLOCKLIST:
                continue
            files = [f for f in self.api.list_repo_files(repo_id) if f.endswith("_metrics.json") or f == "results.json"]

            if len(files) == 0:
                continue
            else:
                for file in files:
                    if file.endswith("results.json"):
                        model_name = repo_id.replace("/", "_").replace(".", "-thisisapoint-")
                    else:
                        model_name = file.split("_metrics.json")[0]
                        model_name = model_name.replace("/", "_").replace(".", "-thisisapoint-")

                    # Skip if the model is from baseline and we already have results

                    readme_path = hf_hub_download(repo_id, filename="README.md")
                    meta = metadata_load(readme_path)
                    try:
                        result_path = hf_hub_download(repo_id, filename=file)

                        with open(result_path) as f:
                            results = json.load(f)

                        if self._are_results_in_new_vidore_format(results):
                            metadata = results["metadata"]
                            results = results["metrics"]

                        # Handles the case where the model is both in baseline and outside of it
                        # (prioritizes the non-baseline results)
                        if self._is_baseline_repo(repo_id) and self.sanitize_model_name(model_name) in self.model_infos:
                            self.fuze_model_infos(model_name, results)

                        self.model_infos[model_name] = {"meta": meta, "results": results}
                    except Exception as e:
                        print(f"Error loading {model_name} - {e}")
                        continue

    # In order to keep only models relevant to a benchmark
    def filter_models_by_benchmark(self, benchmark_version=1):
        filtered_model_infos = {}
        keywords = DEPRECATED_VIDORE_DATASETS_KEYWORDS if benchmark_version == 1 else DEPRECATED_VIDORE_2_DATASETS_KEYWORDS

        for model, info in self.model_infos.items():
            results = info["results"]
            if any(any(keyword in dataset for keyword in keywords) for dataset in results.keys()):
                filtered_model_infos[model] = info

        return filtered_model_infos

    # Compute the average of a metric for each model,
    def render_df(self, metric="ndcg_at_5", benchmark_version=1):
        model_res = {}
        filtered_model_infos = self.filter_models_by_benchmark(benchmark_version)
        if len(filtered_model_infos) > 0:
            for model in filtered_model_infos.keys():
                res = filtered_model_infos[model]["results"]
                dataset_res = {}
                keywords = DEPRECATED_VIDORE_DATASETS_KEYWORDS if benchmark_version == 1 else DEPRECATED_VIDORE_2_DATASETS_KEYWORDS
                for dataset in res.keys():
                    if not any(keyword in dataset for keyword in keywords):
                        continue

                    dataset_nickname = deprecated_get_datasets_nickname(dataset)
                    dataset_res[dataset_nickname] = res[dataset][metric]
                model_res[model] = dataset_res

            df = pd.DataFrame(model_res).T

            return df
        return pd.DataFrame()