import gradio as gr # type: ignore import pandas as pd from sotopia_space.constants import MODEL_OPTIONS from sotopia_space.utils import post_processing LP_MODE = "v2" original_df, ablation_df = None, None LP_original_dfs = {} DEFAULT_LP = 0.5 def benchmark_table(): global original_df, ablation_df global LP_original_dfs, LP_MODE gr.Markdown(f"**Version**: sotopia (v1.01; 2024.04.22) | **# Examples**: 7200 | **# Models**: {len(MODEL_OPTIONS)} | **# Comparisons**: x", elem_classes="markdown-text") with gr.TabItem("Vs GPT-3.5", elem_id="od-benchmark-tab-table-ablation", id=0, elem_classes="subtab"): default_main_df = pd.read_json('data_dir/models_vs_gpt35.jsonl', lines=True) default_main_df = default_main_df.sort_values(by="GOAL [0, 10]", ascending=False) default_main_df = post_processing(default_main_df, None) # add a Rank column to the first columnn (starting from 1) default_main_df.insert(0, "Rank", range(1, 1 + len(default_main_df))) with gr.Row(): with gr.Column(scale=4): gr.Markdown("

**Vs GPT3.5**: The interlocutors are compared against GPT-3.5, the baseline model.") TYPES = ["number", "markdown", "number"] leaderboard_table = gr.components.Dataframe( value=default_main_df, datatype=TYPES, # max_rows=None, height=1000, elem_id="leaderboard-table", interactive=False, visible=True, min_width=60, )