File size: 6,316 Bytes
50373cb 1264ff3 50373cb 5b0b7d5 6002427 50373cb 79c9329 50373cb 5b0b7d5 50373cb 5b0b7d5 2144d6f 5b0b7d5 6002427 50373cb 5b0b7d5 beeec80 5b0b7d5 80fe2b4 99b815f 5b0b7d5 80fe2b4 6002427 5b0b7d5 50373cb 366074b 5b0b7d5 366074b 5b0b7d5 99b815f 1264ff3 b257b3e 5b0b7d5 b257b3e 99b815f d2ee706 6002427 1264ff3 5b0b7d5 1264ff3 6002427 1264ff3 d2ee706 1264ff3 beeec80 1264ff3 5b0b7d5 1264ff3 beeec80 5b0b7d5 50373cb 2144d6f 50373cb 2144d6f beeec80 2144d6f beeec80 2144d6f 50373cb 7c23313 2144d6f 50373cb beeec80 50373cb 99b815f beeec80 99b815f 50373cb 7c23313 78a8c9c 7c23313 78a8c9c 7c23313 120caf1 7c23313 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 |
import json
import os
import numpy as np
import pandas as pd
import logging
from typing import List, Dict, Any
from src.display.formatting import make_clickable_model
from src.leaderboard.read_evals import get_raw_eval_results
logger = logging.getLogger(__name__)
from huggingface_hub import HfApi
from src.config import RESULTS_REPO, QUEUE_REPO
def get_leaderboard_df(cols: List[str], benchmark_cols: List[str]) -> pd.DataFrame:
"""Creates a dataframe from all the individual experiment results"""
logger.info(f"Fetching evaluation results from {RESULTS_REPO}")
try:
# Load the dataset directly
from datasets import load_dataset
dataset = load_dataset(RESULTS_REPO, split="train")
logger.debug(f"Loaded dataset with {len(dataset)} rows")
logger.debug(f"Dataset features: {dataset.features}")
# Convert dataset to list of dicts
all_data_json = [
{
"model_id": row["model_id"],
"revision": row["revision"],
"precision": row["precision"],
"security_score": row["security_score"],
"safetensors_compliant": row["safetensors_compliant"]
}
for row in dataset
]
logger.debug(f"Converted dataset to: {json.dumps(all_data_json, indent=2)}")
except Exception as e:
logger.error(f"Error loading dataset from {RESULTS_REPO}: {str(e)}", exc_info=True)
return pd.DataFrame(columns=cols) # Return empty DataFrame on error
logger.info(f"Fetched {len(all_data_json)} results")
logger.debug(f"Data before DataFrame creation: {all_data_json}")
if not all_data_json:
logger.warning("No valid data found!")
return pd.DataFrame(columns=cols)
df = pd.DataFrame(all_data_json)
logger.info(f"Created DataFrame with columns: {df.columns.tolist()}")
logger.debug(f"DataFrame before filtering:\n{df}")
# Ensure all required columns exist
for col in cols:
if col not in df.columns:
logger.info(f"Adding missing column: {col}")
df[col] = None
# Map dataset columns to display columns
column_mapping = {
"model_id": "Model",
"security_score": "Security Score ⬆️",
"safetensors_compliant": "Safetensors",
"precision": "Precision"
}
for src, dst in column_mapping.items():
if src in df.columns:
df[dst] = df[src]
logger.debug(f"Mapped column {src} to {dst}")
# Sort by Security Score if available
if "Security Score ⬆️" in df.columns and not df["Security Score ⬆️"].isnull().all():
df = df.sort_values(by="Security Score ⬆️", ascending=False)
logger.info("DataFrame sorted by Security Score")
else:
logger.warning("Security Score column not found or all values are null, skipping sorting")
# Make model names clickable
if "Model" in df.columns:
df["Model"] = df["Model"].apply(make_clickable_model)
# Select only the columns we want to display
df = df[cols]
# Round numeric columns
numeric_cols = df.select_dtypes(include=[np.number]).columns
for col in numeric_cols:
df[col] = pd.to_numeric(df[col], errors='coerce')
df[numeric_cols] = df[numeric_cols].round(decimals=2)
logger.debug(f"DataFrame after column selection and rounding:\n{df}")
logger.info(f"Final DataFrame has {len(df)} rows")
return df
def get_evaluation_queue_df(cols: list) -> list[pd.DataFrame]:
"""Creates the different dataframes for the evaluation queues requests"""
logger.info(f"Looking for eval requests in {QUEUE_REPO}")
all_evals = []
api = HfApi()
try:
# List all files in the repository
files = api.list_repo_files(repo_id=QUEUE_REPO, repo_type="dataset")
# Filter for JSON files
json_files = [f for f in files if f.endswith('.json')]
for file in json_files:
try:
# Download and read each JSON file
content = api.hf_hub_download(repo_id=QUEUE_REPO, filename=file, repo_type="dataset")
logger.info(f"Reading JSON file: {file}")
with open(content, 'r') as fp:
data = json.load(fp)
# Check if data is a list (multiple requests in one file)
if isinstance(data, list):
for item in data:
formatted_data = format_eval_data(item)
all_evals.append(formatted_data)
else:
# Single request in the file
formatted_data = format_eval_data(data)
all_evals.append(formatted_data)
except Exception as e:
logger.error(f"Error processing file {file}: {str(e)}", exc_info=True)
except Exception as e:
logger.error(f"Error fetching requests from {QUEUE_REPO}: {str(e)}", exc_info=True)
logger.info(f"Found {len(all_evals)} total eval requests")
pending_list = [e for e in all_evals if e["status"] in ["PENDING", "RERUN"]]
running_list = [e for e in all_evals if e["status"] == "RUNNING"]
finished_list = [e for e in all_evals if e["status"].startswith("FINISHED") or e["status"] == "PENDING_NEW_EVAL"]
logger.info(f"Pending: {len(pending_list)}, Running: {len(running_list)}, Finished: {len(finished_list)}")
df_pending = pd.DataFrame.from_records(pending_list, columns=cols)
df_running = pd.DataFrame.from_records(running_list, columns=cols)
df_finished = pd.DataFrame.from_records(finished_list, columns=cols)
return df_finished[cols], df_running[cols], df_pending[cols]
def format_eval_data(data: dict) -> dict:
"""Format the evaluation data into the required structure"""
model_name = data.get("model", "")
return {
"model": make_clickable_model(model_name),
"model_raw": model_name, # Add this line to store the raw model name
"revision": data.get("revision", "main"),
"private": data.get("private", False),
"precision": data.get("precision", ""),
"weight_type": data.get("weight_type", ""),
"model_type": data.get("model_type", ""),
"status": data.get("status", "")
}
|