import re import gradio as gr import numpy import pandas as pd from src.display.css_html_js import custom_css from src.about import ( CITATION_BUTTON_LABEL, CITATION_BUTTON_TEXT, EVALUATION_QUEUE_TEXT, INTRODUCTION_TEXT, LLM_BENCHMARKS_TEXT, TITLE, ) demo = gr.Blocks(css=custom_css) with demo: gr.HTML(TITLE) gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text") # load dataframe from csv # leaderboard_df = pd.read_csv("benchmark_results.csv") leaderboard_df = [] with open("benchmark_results.csv", "r") as f: header = f.readline().strip().split(",") header = [h.strip() for h in header] for i, line in enumerate(f): leaderboard_df.append(line.strip().split(",", 13)) # create dataframe from list and header leaderboard_df = pd.DataFrame(leaderboard_df, columns=header) # filter column with value eq-bench_v2_pl print(header) leaderboard_df = leaderboard_df[(leaderboard_df["Benchmark Version"] == "eq-bench_v2_pl") | (leaderboard_df["Benchmark Version"]=='eq-bench_pl')] #fix: ValueError: The truth value of a Series is ambiguous. Use a.empty, a.bool(), a.item(), a.any() or a.all(). #leave only defined columns leaderboard_df = leaderboard_df[["Model Path", "Benchmark Score", "Num Questions Parseable", "Error"]] #create new column with model name def parse_parseable(x): if x["Num Questions Parseable"] == 'FAILED': m = re.match(r'(\d+)\.0 questions were parseable', x["Error"]) print(m.group(1)) return m.group(1) return x["Num Questions Parseable"] leaderboard_df["Num Questions Parseable"] = leaderboard_df[["Num Questions Parseable", "Error"]].apply(lambda x: parse_parseable(x), axis=1) #change value of column to nan leaderboard_df["Benchmark Score"] = leaderboard_df["Benchmark Score"].replace('FAILED', numpy.nan) #set datatype of column leaderboard_df["Benchmark Score"] = leaderboard_df["Benchmark Score"].astype(float) leaderboard_df["Num Questions Parseable"] = leaderboard_df["Num Questions Parseable"].astype(float) #set nan if value of column is less than 0 leaderboard_df.loc[leaderboard_df["Benchmark Score"] < 0, "Benchmark Score"] = 0 #sort by 2 columns leaderboard_df = leaderboard_df.sort_values(by=["Benchmark Score", "Num Questions Parseable"], ascending=[False, False]) leaderboard_df_styled = leaderboard_df.style.background_gradient(cmap="RdYlGn") rounding = {} # for col in ["Benchmark Score", "Num Questions Parseable"]: rounding["Benchmark Score"] = "{:.2f}" rounding["Num Questions Parseable"] = "{:.0f}" leaderboard_df_styled = leaderboard_df_styled.format(rounding) leaderboard_table = gr.components.Dataframe( value=leaderboard_df_styled, # headers=[c.name for c in fields(AutoEvalColumn) if c.never_hidden] + shown_columns.value, # datatype=TYPES, elem_id="leaderboard-table", interactive=False, visible=True, ) demo.queue(default_concurrency_limit=40).launch()