Spaces:
Sleeping
Sleeping
"""A gradio app that renders a static leaderboard. This is used for Hugging Face Space.""" | |
import ast | |
import argparse | |
import glob | |
import pickle | |
import plotly | |
import gradio as gr | |
import numpy as np | |
import pandas as pd | |
import gradio as gr | |
import pandas as pd | |
from pathlib import Path | |
import json | |
from constants import BANNER, CITATION_TEXT, WINRATE_HEATMAP, css, js_code, all_task_types, DEFAULT_LP, TASK_TYPE_STR, js_light | |
from datetime import datetime, timezone | |
from data_utils import load_eval_results, sample_an_eval_result, apply_length_penalty, post_processing, add_winrates, add_winrates_tasks | |
# from gradio.themes.utils import colors, fonts, sizes | |
from themes import Seafoam | |
from huggingface_hub import HfApi | |
# from datasets import Dataset, load_dataset, concatenate_datasets | |
import os, uuid | |
from utils_display import model_info | |
# get the last updated time from the elo_ranks.all.jsonl file | |
LAST_UPDATED = None | |
with open("_intro.md", "r") as f: | |
INTRO_MD = f.read() | |
with open("_about_us.md", "r") as f: | |
ABOUT_MD = f.read() | |
with open("_header.md", "r") as f: | |
HEADER_MD = f.read() | |
original_df, ablation_df = None, None | |
eval_results = load_eval_results() | |
available_models = [] # to be filled in later | |
def display_chat_history(model_selections): | |
eval_item = sample_an_eval_result(eval_results, model_selections) | |
task = eval_item["task"] | |
if eval_item["image"]: | |
image_path = eval_item["image"] | |
else: | |
image_path = "" | |
chats_plan = [] | |
for item_user, item_asst in zip(eval_item["plan_history"]["user"], eval_item["plan_history"]["assistant"]): | |
chats_plan += [item_user, item_asst] | |
chats_ground = [] | |
for item_user, item_asst in zip(eval_item["ground_history"]["user"], eval_item["ground_history"]["assistant"]): | |
chats_ground += [item_user, item_asst] | |
chats_plan = [(chats_plan[i], chats_plan[i+1]) for i in range(0, len(chats_plan), 2)] | |
chats_ground = [(chats_ground[i], chats_ground[i+1]) for i in range(0, len(chats_ground), 2)] | |
if image_path != "": | |
image = f'<div style="text-align: center;"> <img src="{image_path}" style="height: 250px;"> </div>' | |
return task, chats_plan, chats_ground, image | |
else: | |
return task, chats_plan, chats_ground, f'<div style="text-align: center;"> </div>' | |
def slider_change_main(length_penalty): | |
global original_df, ablation_df | |
adjusted_df = apply_length_penalty(original_df, ablation_df, length_penalty) | |
adjusted_df = adjusted_df[["Model", "Overall Elo", "Task-Avg Elo", "# battles", "Length"]] | |
adjusted_df = adjusted_df.sort_values(by="Overall Elo", ascending=False) | |
adjusted_df = add_winrates(adjusted_df) | |
adjusted_df = adjusted_df.drop(columns=["Length"]) | |
return adjusted_df | |
def slider_change_full(length_penalty, show_winrate): | |
global original_df, ablation_df | |
adjusted_df = apply_length_penalty(original_df, ablation_df, length_penalty) | |
# sort the model by the "Task-Avg Elo" column | |
adjusted_df = adjusted_df.sort_values(by="Task-Avg Elo", ascending=False) | |
adjusted_df.drop(columns=["Overall Elo", "Task-Avg Elo", "# battles", "Length"], inplace=True) | |
if show_winrate == "none": | |
return adjusted_df | |
elif show_winrate == "gpt-3.5": | |
adjusted_df = add_winrates_tasks(adjusted_df, ref="gpt-3.5") | |
elif show_winrate == "gpt-4": | |
adjusted_df = add_winrates_tasks(adjusted_df, ref="gpt-4") | |
return adjusted_df | |
seafoam = Seafoam() | |
def build_demo(TYPES): | |
global original_df, ablation_df, skip_empty_original_df, skip_empty_ablation_df, available_models | |
with gr.Blocks(theme=gr.themes.Soft(), css=css, js=js_light) as demo: | |
gr.Markdown(HEADER_MD, elem_classes="markdown-text") | |
with gr.Tabs(elem_classes="tab-buttons") as tabs: | |
with gr.TabItem("๐ Explore", elem_id="od-benchmark-tab-table", id=2): | |
with gr.Row(): | |
btn_show_history = gr.Button("๐ฒ Click here to sample an example of ๐ช Lumos outputs! ", elem_classes="sample_button") | |
with gr.Row(): | |
with gr.Column(): | |
with gr.Accordion("Choose models to sample from", open=False, elem_classes="accordion-label"): | |
model_options = available_models | |
selected_models = gr.CheckboxGroup(model_options, info="", value=model_options, show_label=False, elem_id="select-models") | |
clear_button = gr.Button("Clear", elem_classes="btn_boderline_gray", scale=1) | |
# clear the selected_models | |
clear_button.click(lambda: {selected_models: {"value": [], "__type__": "update"}}, inputs=[], outputs=[selected_models]) | |
with gr.Row(): | |
with gr.Column(scale=1.5): | |
with gr.Accordion("๐ Task Description", open=True, elem_classes="accordion-label"): | |
task = gr.Markdown("", elem_classes="markdown-text-tiny") | |
task.change(lambda x: x, inputs=[], outputs=[], scroll_to_output=False, js=js_code) | |
with gr.Column(scale=1): | |
with gr.Accordion("Input Image (optional)", open=True, elem_classes="accordion-label"): | |
image = gr.HTML("", elem_id="input_image") | |
image.change(lambda x: x, inputs=[], outputs=[], scroll_to_output=False, js=js_code) | |
with gr.Row(): | |
with gr.Column(): | |
gr.Markdown("## ๐ข Plan Module Process History", elem_classes="markdown-text") | |
Chatbot_Common_Plan = gr.Chatbot(avatar_images=["human_icon.jpeg", "ai_icon.png"], height="auto", container=False, label="Common Plan History", likeable=False, show_share_button=False, show_label=True, elem_classes="chat-common", layout="bubble") | |
Chatbot_Common_Plan.change(lambda x: x, inputs=[], outputs=[], scroll_to_output=False, js=js_code) | |
with gr.Column(): | |
gr.Markdown("## ๐ข Ground Module Process History", elem_classes="markdown-text") | |
Chatbot_Common_Ground = gr.Chatbot(avatar_images=["human_icon.jpeg", "ai_icon.png"], height="auto", container=False, label="Common Ground History", likeable=False, show_share_button=False, show_label=True, elem_classes="chat-common", layout="bubble") | |
Chatbot_Common_Ground.change(lambda x: x, inputs=[], outputs=[], scroll_to_output=False, js=js_code) | |
# Display chat history when button is clicked | |
btn_show_history.click(fn=display_chat_history, inputs=[selected_models], outputs=[task, Chatbot_Common_Plan, Chatbot_Common_Ground, image]) | |
with gr.TabItem("๐ฎ About Us", elem_id="od-benchmark-tab-table", id=3): | |
gr.Markdown(ABOUT_MD, elem_classes="markdown-text") | |
gr.Markdown(f"Last updated on **{LAST_UPDATED}**", elem_classes="markdown-text-small") | |
with gr.Row(): | |
with gr.Accordion("๐ Citation", open=False, elem_classes="accordion-label"): | |
gr.Textbox( | |
value=CITATION_TEXT, | |
lines=7, | |
label="Copy the BibTeX snippet to cite this source", | |
elem_id="citation-button", | |
show_copy_button=True) | |
# ).style(show_copy_button=True) | |
return demo | |
if __name__ == "__main__": | |
parser = argparse.ArgumentParser() | |
parser.add_argument("--share", action="store_true") | |
parser.add_argument("--result_file", help="Path to results table", default="data_dir/elo_ranks.all.jsonl") | |
parser.add_argument("--length_balation_file", help="Path to results table", default="data_dir/elo_ranks.length_ablation.all.jsonl") | |
parser.add_argument("--skip_empty_result_file", help="Path to results table", default="data_dir/elo_ranks.skip_empty.all.jsonl") | |
parser.add_argument("--skip_empty_length_balation_file", help="Path to results table", default="data_dir/elo_ranks.skip_empty.length_ablation.all.jsonl") | |
args = parser.parse_args() | |
LAST_UPDATED = datetime.fromtimestamp(Path(args.result_file).stat().st_mtime, tz=timezone.utc).strftime("%Y-%m-%d %H:%M:%S") | |
original_df = pd.read_json(args.result_file , lines=True) | |
ablation_df = pd.read_json(args.length_balation_file, lines=True) | |
skip_empty_original_df = pd.read_json(args.skip_empty_result_file , lines=True) | |
skip_empty_ablation_df = pd.read_json(args.skip_empty_length_balation_file, lines=True) | |
# available_models = sorted(list(set(list(original_df["model name "])))) | |
available_models = list(model_info.keys()) | |
# remove the rows where the model name is not in the available_models | |
original_df = original_df[original_df["model name "].isin(available_models)] | |
ablation_df = ablation_df[ablation_df["model name "].isin(available_models)] | |
skip_empty_ablation_df = skip_empty_ablation_df[skip_empty_ablation_df["model name "].isin(available_models)] | |
skip_empty_original_df = skip_empty_original_df[skip_empty_original_df["model name "].isin(available_models)] | |
model_len_info = json.load(open("model_len_info.json", "r")) | |
original_df = post_processing(original_df, model_len_info) | |
ablation_df = post_processing(ablation_df, model_len_info) | |
skip_empty_original_df = post_processing(skip_empty_original_df, model_len_info) | |
skip_empty_ablation_df = post_processing(skip_empty_ablation_df, model_len_info) | |
TYPES = ["markdown", "number"] | |
demo = build_demo(TYPES) | |
demo.launch(share=args.share, allowed_paths=["."], height=1000) | |