Spaces:
Running
Running
import pandas as pd | |
from pathlib import Path | |
from datasets import load_dataset | |
import numpy as np | |
import os | |
import re | |
# From Open LLM Leaderboard | |
def model_hyperlink(link, model_name): | |
if model_name == "random": | |
return "random" | |
return f'<a target="_blank" href="{link}" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">{model_name}</a>' | |
def undo_hyperlink(html_string): | |
# Regex pattern to match content inside > and < | |
pattern = r'>[^<]+<' | |
match = re.search(pattern, html_string) | |
if match: | |
# Extract the matched text and remove leading '>' and trailing '<' | |
return match.group(0)[1:-1] | |
else: | |
return "No text found" | |
# Define a function to fetch and process data | |
def load_all_data(data_repo, subdir:str, subsubsets=False): # use HF api to pull the git repo | |
dir = Path(data_repo) | |
data_dir = dir / subdir | |
orgs = [d for d in os.listdir(data_dir) if os.path.isdir(os.path.join(data_dir, d))] | |
# get all files within the sub folders orgs | |
models_results = [] | |
for org in orgs: | |
org_dir = data_dir / org | |
files = [f for f in os.listdir(org_dir) if os.path.isfile(os.path.join(org_dir, f))] | |
for file in files: | |
if file.endswith(".json"): | |
models_results.append(org + "/" + file) | |
# create empty dataframe to add all data to | |
df = pd.DataFrame() | |
# load all json data in the list models_results one by one to avoid not having the same entries | |
for model in models_results: | |
model_data = load_dataset("json", data_files=data_repo + subdir+ "/" + model, split="train") | |
df2 = pd.DataFrame(model_data) | |
# add to df | |
df = pd.concat([df2, df]) | |
# remove chat_template comlumn | |
df = df.drop(columns=["chat_template"]) | |
# sort columns alphabetically | |
df = df.reindex(sorted(df.columns), axis=1) | |
# move column "model" to the front | |
cols = list(df.columns) | |
cols.insert(0, cols.pop(cols.index('model'))) | |
df = df.loc[:, cols] | |
# select all columns except "model" | |
cols = df.columns.tolist() | |
cols.remove("model") | |
# if model_type is a column (pref tests may not have it) | |
if "model_type" in cols: | |
cols.remove("model_type") | |
# remove ref_model if in columns | |
if "ref_model" in cols: | |
cols.remove("ref_model") | |
# remove model_beaker from dataframe | |
if "model_beaker" in cols: | |
cols.remove("model_beaker") | |
df = df.drop(columns=["model_beaker"]) | |
# remove column xstest (outdated data) | |
# if xstest is a column | |
if "xstest" in cols: | |
df = df.drop(columns=["xstest"]) | |
cols.remove("xstest") | |
if "ref_model" in df.columns: | |
df = df.drop(columns=["ref_model"]) | |
# remove column anthropic and summarize_prompted (outdated data) | |
if "anthropic" in cols: | |
df = df.drop(columns=["anthropic"]) | |
cols.remove("anthropic") | |
if "summarize_prompted" in cols: | |
df = df.drop(columns=["summarize_prompted"]) | |
cols.remove("summarize_prompted") | |
# remove pku_better and pku_safer (removed from the leaderboard) | |
if "pku_better" in cols: | |
df = df.drop(columns=["pku_better"]) | |
cols.remove("pku_better") | |
if "pku_safer" in cols: | |
df = df.drop(columns=["pku_safer"]) | |
cols.remove("pku_safer") | |
# round | |
df[cols] = (df[cols]*100).round(1) | |
avg = np.nanmean(df[cols].values,axis=1).round(1) | |
# add average column | |
df["average"] = avg | |
# apply model_hyperlink function to column "model" | |
df["model"] = df["model"].apply(lambda x: model_hyperlink(f"https://huggingface.co/{x}", x)) | |
# move average column to the second | |
cols = list(df.columns) | |
cols.insert(1, cols.pop(cols.index('average'))) | |
df = df.loc[:, cols] | |
# move model_type column to first | |
if "model_type" in cols: | |
cols = list(df.columns) | |
cols.insert(1, cols.pop(cols.index('model_type'))) | |
df = df.loc[:, cols] | |
# remove models with DPO Ref. Free as type (future work) | |
df = df[~df["model_type"].str.contains("DPO Ref. Free", na=False)] | |
return df | |