Spaces:
Running
Running
File size: 4,090 Bytes
f2a3e70 bd5b131 87ad165 bd5b131 87ad165 bd5b131 87ad165 bd5b131 87ad165 bd5b131 87ad165 bd5b131 f2a3e70 bd5b131 f2a3e70 bd5b131 f2a3e70 bd5b131 f2a3e70 de1d88f bd5b131 f2a3e70 bd5b131 f2a3e70 bd5b131 f2a3e70 bd5b131 87ad165 bd5b131 87ad165 bd5b131 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 |
import json
import re
import gradio as gr
import numpy
import pandas as pd
from src.display.css_html_js import custom_css
from src.about import (
INTRODUCTION_TEXT,
TITLE,
AUTHORS,
)
from src.display.formatting import make_clickable_model
demo = gr.Blocks(css=custom_css)
with demo:
gr.HTML(TITLE)
gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
# load dataframe from csv
# leaderboard_df = pd.read_csv("benchmark_results.csv")
leaderboard_df = []
with open("benchmark_results.csv", "r") as f:
header = f.readline().strip().split(",")
header = [h.strip() for h in header]
for i, line in enumerate(f):
leaderboard_df.append(line.strip().split(",", 13))
metadata = json.load(open('metadata.json'))
for k, v in list(metadata.items()):
metadata[k.split(",")[0]] = v
# create dataframe from list and header
leaderboard_df = pd.DataFrame(leaderboard_df, columns=header)
# filter column with value eq-bench_v2_pl
print(header)
leaderboard_df = leaderboard_df[(leaderboard_df["Benchmark Version"] == "eq-bench_v2_pl") | (
leaderboard_df["Benchmark Version"] == 'eq-bench_pl')]
# fix: ValueError: The truth value of a Series is ambiguous. Use a.empty, a.bool(), a.item(), a.any() or a.all().
# leave only defined columns
leaderboard_df = leaderboard_df[["Model Path", "Benchmark Score", "Num Questions Parseable", "Error"]]
# create new column with model name
def parse_parseable(x):
if x["Num Questions Parseable"] == 'FAILED':
m = re.match(r'(\d+)\.0 questions were parseable', x["Error"])
return m.group(1)
return x["Num Questions Parseable"]
leaderboard_df["Num Questions Parseable"] = leaderboard_df[["Num Questions Parseable", "Error"]].apply(
lambda x: parse_parseable(x), axis=1)
def get_params(model_name):
if model_name in metadata:
return metadata[model_name]
return numpy.nan
leaderboard_df["Params"] = leaderboard_df["Model Path"].apply(lambda x: get_params(x))
# move column order
leaderboard_df = leaderboard_df[["Model Path", "Params", "Benchmark Score", "Num Questions Parseable", 'Error']]
leaderboard_df["Model Path"] = leaderboard_df["Model Path"].apply(lambda x: make_clickable_model(x))
# change value of column to nan
leaderboard_df["Benchmark Score"] = leaderboard_df["Benchmark Score"].replace('FAILED', numpy.nan)
# set datatype of column
leaderboard_df["Benchmark Score"] = leaderboard_df["Benchmark Score"].astype(float)
leaderboard_df["Num Questions Parseable"] = leaderboard_df["Num Questions Parseable"].astype(float)
# set nan if value of column is less than 0
leaderboard_df.loc[leaderboard_df["Benchmark Score"] < 0, "Benchmark Score"] = 0
# sort by 2 columns
leaderboard_df = leaderboard_df.sort_values(by=["Benchmark Score", "Num Questions Parseable"],
ascending=[False, False])
# rename column
leaderboard_df = leaderboard_df.rename(columns={"Model Path": "Model"})
leaderboard_df_styled = leaderboard_df.style.background_gradient(cmap="RdYlGn")
leaderboard_df_styled = leaderboard_df_styled.background_gradient(cmap="RdYlGn_r", subset=['Params'])
rounding = {}
# for col in ["Benchmark Score", "Num Questions Parseable"]:
rounding["Benchmark Score"] = "{:.2f}"
rounding["Num Questions Parseable"] = "{:.0f}"
rounding["Params"] = "{:.0f}"
leaderboard_df_styled = leaderboard_df_styled.format(rounding)
leaderboard_table = gr.components.Dataframe(
value=leaderboard_df_styled,
# headers=[c.name for c in fields(AutoEvalColumn) if c.never_hidden] + shown_columns.value,
datatype=['markdown', 'number', 'number', 'number', 'str'],
elem_id="leaderboard-table",
interactive=False,
visible=True,
)
gr.Markdown(AUTHORS, elem_classes="markdown-text")
demo.queue(default_concurrency_limit=40).launch()
|