File size: 5,673 Bytes
a04b287
 
 
 
 
 
 
 
 
 
 
 
34052ff
a04b287
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
742dfc3
 
 
34052ff
 
 
 
742dfc3
 
 
34052ff
 
 
 
 
 
 
 
 
742dfc3
34052ff
 
 
 
742dfc3
34052ff
 
 
742dfc3
 
34052ff
742dfc3
 
34052ff
742dfc3
 
34052ff
 
 
 
 
 
 
 
742dfc3
 
 
 
a04b287
 
 
34052ff
 
 
 
ed90aae
a04b287
34052ff
ed90aae
 
 
 
 
 
 
 
 
 
 
34052ff
a04b287
 
34052ff
a04b287
34052ff
 
 
 
a04b287
 
 
 
 
 
 
 
34052ff
a04b287
ed90aae
 
a04b287
 
34052ff
a04b287
742dfc3
 
34052ff
 
 
742dfc3
 
 
 
34052ff
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
import json
import os
from dataclasses import dataclass


from src.display.formatting import make_clickable_model
from src.display.utils import AutoEvalColumn, ModelType, Tasks, Precision, WeightType
from src.submission.check_validity import is_model_on_hub


@dataclass
class EvalResult:
    """Represents one full evaluation. Built from a single result file for a given run."""
    eval_name: str # org_model_precision (uid)
    full_model: str # org/model (path on hub)
    org: str 
    model: str
    revision: str # commit hash, "" if main
    results: dict
    precision: Precision = Precision.Unknown
    model_type: ModelType = ModelType.Unknown # Pretrained, fine tuned, ...
    weight_type: WeightType = WeightType.Original # Original or Adapter
    architecture: str = "Unknown" 
    license: str = "?"
    likes: int = 0
    num_params: int = 0
    date: str = "" # submission date of request file
    still_on_hub: bool = False

    @classmethod
    def init_from_json_file(self, json_filepath):
        """Inits the result from the specific model result file"""
        try:
            with open(json_filepath) as fp:
                data = json.load(fp)

            # Extract model information from the JSON data
            full_model_name = data.get('model')
            org_and_model = full_model_name.split("/", 1)
            org = org_and_model[0]
            model = org_and_model[1]
            
            # Extract other metadata
            precision_str = data.get('precision', 'Unknown')
            precision = Precision.from_str(precision_str)
            model_type = ModelType.from_str(data.get('model_type', 'Unknown'))
            weight_type = WeightType.from_str(data.get('weight_type', 'Original'))
            revision = data.get('revision', '')
            date = data.get('submitted_at', '')
            
            # Extract results and metadata
            results = data.get('results', {})
            license = data.get('license', '?')
            likes = data.get('likes', 0)
            num_params = data.get('params', 0)
            architecture = data.get('architecture', 'Unknown')
            
            # Check if the model is still on the hub
            still_on_hub, _, _ = is_model_on_hub(full_model_name, revision=revision)

            return EvalResult(
                eval_name=f"{org}_{model}_{precision.value}",
                full_model=full_model_name,
                org=org,
                model=model,
                revision=revision,
                results=results,
                precision=precision,
                model_type=model_type,
                weight_type=weight_type,
                architecture=architecture,
                license=license,
                likes=likes,
                num_params=num_params,
                date=date,
                still_on_hub=still_on_hub
            )
        except Exception as e:
            print(f"Error reading evaluation file {json_filepath}: {str(e)}")
            return None

    def to_dict(self):
        """Converts the Eval Result to a dict compatible with our dataframe display"""
        # Calculate the average score for the leaderboard
        scores = [v for k, v in self.results.items() if v is not None and k in [task.value.metric for task in Tasks]]
        average = sum(scores) / len(scores) if scores else 0

        AutoEvalColumnInstance = AutoEvalColumn()
        data_dict = {
            "eval_name": self.eval_name,
            AutoEvalColumnInstance.precision.name: self.precision.value.name,
            AutoEvalColumnInstance.model_type.name: self.model_type.value.name,
            AutoEvalColumnInstance.model_type_symbol.name: self.model_type.value.symbol,
            AutoEvalColumnInstance.weight_type.name: self.weight_type.value.name,
            AutoEvalColumnInstance.architecture.name: self.architecture,
            AutoEvalColumnInstance.model.name: make_clickable_model(self.full_model),
            AutoEvalColumnInstance.revision.name: self.revision,
            AutoEvalColumnInstance.average.name: average,
            AutoEvalColumnInstance.license.name: self.license,
            AutoEvalColumnInstance.likes.name: self.likes,
            AutoEvalColumnInstance.params.name: self.num_params,
            AutoEvalColumnInstance.still_on_hub.name: self.still_on_hub,
        }

        # Dynamically map metric values to their corresponding column names
        for task in Tasks:
            task_metric = task.value.metric
            task_col_name = task.value.col_name
            data_dict[task_col_name] = self.results.get(task_metric)
            
        return data_dict




def get_raw_eval_results(results_path: str, requests_path: str) -> list[EvalResult]:
    """From the path of the results folder root, extract all needed info for results"""
    model_result_filepaths = []
    # Recursively find all result files
    for root, _, files in os.walk(results_path):
        json_files = [f for f in files if f.endswith(".json")]
        for file in json_files:
            model_result_filepaths.append(os.path.join(root, file))

    eval_results = []
    for model_result_filepath in model_result_filepaths:
        try:
            eval_result = EvalResult.init_from_json_file(model_result_filepath)
            if eval_result is not None:
                eval_results.append(eval_result)
            else:
                print(f"Skipping invalid evaluation file: {model_result_filepath}")
        except Exception as e:
            print(f"Error processing evaluation file {model_result_filepath}: {str(e)}")
            continue
            
    return eval_results