ArmBench-LLM / model_handler.py
Bagratuni's picture
commit
def7cf0 verified
import json
import os
from typing import Any, Dict
import pandas as pd
from huggingface_hub import HfApi, hf_hub_download
from required_categories import required_mmlu_categories, required_unified_exam_categories
class ModelHandler:
def __init__(self, model_infos_path="model_results.json"):
self.api = HfApi()
self.model_infos_path = model_infos_path
self.model_infos = self._load_model_infos()
def _load_model_infos(self) -> Dict:
if os.path.exists(self.model_infos_path):
with open(self.model_infos_path) as f:
return json.load(f)
return {}
def _save_model_infos(self):
print("Saving model infos")
with open(self.model_infos_path, "w") as f:
json.dump(self.model_infos, f, indent=4)
def get_arm_bench_data(self):
models = self.api.list_models(filter="ArmBench-LLM")
model_names = {model["model_name"] for model in self.model_infos}
repositories = [model.modelId for model in models]
for repo_id in repositories:
files = [f for f in self.api.list_repo_files(repo_id) if f == "results.json"]
if not files:
continue
for file in files:
model_name = repo_id
if model_name not in model_names:
try:
result_path = hf_hub_download(repo_id, filename=file)
with open(result_path) as f:
results = json.load(f)
self.model_infos.append({
"model_name": model_name,
"results": results
})
except Exception as e:
print(f"Error loading {model_name} - {e}")
continue
self._save_model_infos()
mmlu_data = []
unified_exam_data = []
for model in self.model_infos:
model_name = model["model_name"]
results = model.get("results", {})
mmlu_results = results.get("mmlu_results", [])
unified_exam_results = results.get("unified_exam_results", [])
if mmlu_results:
mmlu_row = {"Model": model_name}
mmlu_categories = {result["category"] for result in mmlu_results}
if all(category in mmlu_categories for category in required_mmlu_categories):
for result in mmlu_results:
mmlu_row[result["category"]] = result["score"]
mmlu_data.append(mmlu_row)
if unified_exam_results:
unified_exam_row = {"Model": model_name}
unified_exam_categories = {result["category"] for result in unified_exam_results}
if all(category in unified_exam_categories for category in required_unified_exam_categories):
for result in unified_exam_results:
unified_exam_row[result["category"]] = result["score"]
unified_exam_data.append(unified_exam_row)
mmlu_df = pd.DataFrame(mmlu_data)
unified_exam_df = pd.DataFrame(unified_exam_data)
return mmlu_df, unified_exam_df