File size: 5,673 Bytes
a04b287 34052ff a04b287 742dfc3 34052ff 742dfc3 34052ff 742dfc3 34052ff 742dfc3 34052ff 742dfc3 34052ff 742dfc3 34052ff 742dfc3 34052ff 742dfc3 a04b287 34052ff ed90aae a04b287 34052ff ed90aae 34052ff a04b287 34052ff a04b287 34052ff a04b287 34052ff a04b287 ed90aae a04b287 34052ff a04b287 742dfc3 34052ff 742dfc3 34052ff |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 |
import json
import os
from dataclasses import dataclass
from src.display.formatting import make_clickable_model
from src.display.utils import AutoEvalColumn, ModelType, Tasks, Precision, WeightType
from src.submission.check_validity import is_model_on_hub
@dataclass
class EvalResult:
"""Represents one full evaluation. Built from a single result file for a given run."""
eval_name: str # org_model_precision (uid)
full_model: str # org/model (path on hub)
org: str
model: str
revision: str # commit hash, "" if main
results: dict
precision: Precision = Precision.Unknown
model_type: ModelType = ModelType.Unknown # Pretrained, fine tuned, ...
weight_type: WeightType = WeightType.Original # Original or Adapter
architecture: str = "Unknown"
license: str = "?"
likes: int = 0
num_params: int = 0
date: str = "" # submission date of request file
still_on_hub: bool = False
@classmethod
def init_from_json_file(self, json_filepath):
"""Inits the result from the specific model result file"""
try:
with open(json_filepath) as fp:
data = json.load(fp)
# Extract model information from the JSON data
full_model_name = data.get('model')
org_and_model = full_model_name.split("/", 1)
org = org_and_model[0]
model = org_and_model[1]
# Extract other metadata
precision_str = data.get('precision', 'Unknown')
precision = Precision.from_str(precision_str)
model_type = ModelType.from_str(data.get('model_type', 'Unknown'))
weight_type = WeightType.from_str(data.get('weight_type', 'Original'))
revision = data.get('revision', '')
date = data.get('submitted_at', '')
# Extract results and metadata
results = data.get('results', {})
license = data.get('license', '?')
likes = data.get('likes', 0)
num_params = data.get('params', 0)
architecture = data.get('architecture', 'Unknown')
# Check if the model is still on the hub
still_on_hub, _, _ = is_model_on_hub(full_model_name, revision=revision)
return EvalResult(
eval_name=f"{org}_{model}_{precision.value}",
full_model=full_model_name,
org=org,
model=model,
revision=revision,
results=results,
precision=precision,
model_type=model_type,
weight_type=weight_type,
architecture=architecture,
license=license,
likes=likes,
num_params=num_params,
date=date,
still_on_hub=still_on_hub
)
except Exception as e:
print(f"Error reading evaluation file {json_filepath}: {str(e)}")
return None
def to_dict(self):
"""Converts the Eval Result to a dict compatible with our dataframe display"""
# Calculate the average score for the leaderboard
scores = [v for k, v in self.results.items() if v is not None and k in [task.value.metric for task in Tasks]]
average = sum(scores) / len(scores) if scores else 0
AutoEvalColumnInstance = AutoEvalColumn()
data_dict = {
"eval_name": self.eval_name,
AutoEvalColumnInstance.precision.name: self.precision.value.name,
AutoEvalColumnInstance.model_type.name: self.model_type.value.name,
AutoEvalColumnInstance.model_type_symbol.name: self.model_type.value.symbol,
AutoEvalColumnInstance.weight_type.name: self.weight_type.value.name,
AutoEvalColumnInstance.architecture.name: self.architecture,
AutoEvalColumnInstance.model.name: make_clickable_model(self.full_model),
AutoEvalColumnInstance.revision.name: self.revision,
AutoEvalColumnInstance.average.name: average,
AutoEvalColumnInstance.license.name: self.license,
AutoEvalColumnInstance.likes.name: self.likes,
AutoEvalColumnInstance.params.name: self.num_params,
AutoEvalColumnInstance.still_on_hub.name: self.still_on_hub,
}
# Dynamically map metric values to their corresponding column names
for task in Tasks:
task_metric = task.value.metric
task_col_name = task.value.col_name
data_dict[task_col_name] = self.results.get(task_metric)
return data_dict
def get_raw_eval_results(results_path: str, requests_path: str) -> list[EvalResult]:
"""From the path of the results folder root, extract all needed info for results"""
model_result_filepaths = []
# Recursively find all result files
for root, _, files in os.walk(results_path):
json_files = [f for f in files if f.endswith(".json")]
for file in json_files:
model_result_filepaths.append(os.path.join(root, file))
eval_results = []
for model_result_filepath in model_result_filepaths:
try:
eval_result = EvalResult.init_from_json_file(model_result_filepath)
if eval_result is not None:
eval_results.append(eval_result)
else:
print(f"Skipping invalid evaluation file: {model_result_filepath}")
except Exception as e:
print(f"Error processing evaluation file {model_result_filepath}: {str(e)}")
continue
return eval_results
|