benbogin's picture
leaderboard
507ce38
"""A gradio app that renders a static leaderboard. This is used for Hugging Face Space."""
import argparse
import json
from datetime import datetime
import gradio as gr
import pandas as pd
import pytz
from constants import *
from constants import column_names
# get the last updated time from the elo_ranks.all.jsonl file
LAST_UPDATED = None
# with open("_intro.md", "r") as f:
# INTRO_MD = f.read()
INTRO_MD = ""
with open("_header.md", "r") as f:
HEADER_MD = f.read()
raw_data = None
original_df = None
def df_filters(mode_selection_radio, show_open_source_model_only):
global original_df
original_df.insert(0, "", range(1, 1 + len(original_df)))
return original_df.copy()
def _gstr(text):
return gr.Text(text, visible=False)
def _tab_leaderboard():
global original_df, available_models
if True:
default_mode = "greedy"
default_main_df = df_filters(default_mode, False)
leaderboard_table = gr.components.Dataframe(
value=default_main_df,
datatype= ["number", "markdown", "markdown", "number"],
# max_rows=None,
height=1000,
elem_id="leaderboard-table",
interactive=False,
visible=True,
column_widths=[50, 150, 150, 100, 120, 120, 100,100,110,100],
wrap=True
# min_width=60,
)
def _tab_submit():
markdown_text = """
Please create an issue on our [Github](https://github.com/allenai/super-benchmark) repository with output of trajectories of your model and results. We will update the leaderboard accordingly.
"""
gr.Markdown("## πŸš€ Submit Your Results\n\n" + markdown_text, elem_classes="markdown-text")
def build_demo():
global original_df
with gr.Blocks(theme=gr.themes.Soft(), css=css, js=js_light) as demo:
# convert LAST_UPDATED to the PDT time
LAST_UPDATED = datetime.now(pytz.timezone('US/Pacific')).strftime("%Y-%m-%d %H:%M:%S")
header_md_text = HEADER_MD.replace("{LAST_UPDATED}", str(LAST_UPDATED))
gr.Markdown(header_md_text, elem_classes="markdown-text")
with gr.Tabs(elem_classes="tab-buttons") as tabs:
with gr.TabItem("πŸ… Leaderboard", elem_id="od-benchmark-tab-table", id=0):
_tab_leaderboard()
with gr.TabItem("πŸš€ Submit Your Results", elem_id="od-benchmark-tab-table", id=3):
_tab_submit()
return demo
def data_load(result_file):
global raw_data, original_df
print(f"Loading {result_file}")
column_names_main = column_names.copy()
# column_names_main.update({})
main_ordered_columns = ORDERED_COLUMN_NAMES
# filter the data with Total Puzzles == 1000
click_url = True
# read json file from the result_file
with open(result_file, "r") as f:
raw_data = json.load(f)
# floatify the data, if possible
for d in raw_data:
for k, v in d.items():
try:
d[k] = float(v)
except:
pass
original_df = pd.DataFrame(raw_data)
original_df.sort_values(by="Expert (Accuracy)", ascending=False, inplace=True)
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("--share", action="store_true")
parser.add_argument("--result_file", help="Path to results table", default="ZeroEval-main/result_dirs/leaderboard.json")
args = parser.parse_args()
data_load(args.result_file)
demo = build_demo()
demo.launch(share=args.share, height=3000, width="100%")