# matplotlib.use('macosx') import gradio as gr import plotly.graph_objects as go from apscheduler.schedulers.background import BackgroundScheduler from get_data_info import plot_data, tab_data def create_data_interface(df): headers = df.columns types = ["str"] + ["number"] * (len(headers) - 1) return gr.components.Dataframe( value=df.values.tolist(), headers=[col_name for col_name in headers], datatype=types, # max_rows=10, ) def plot_radar_chart(df, attributes, category_name): fig = go.Figure() for index, row in df.iterrows(): model = row['Model'] values = row[attributes].tolist() fig.add_trace(go.Scatterpolar( r=values, theta=attributes, fill='toself', name=model )) fig.update_layout( title=f"{category_name}", polar=dict( radialaxis=dict( visible=True, range=[0, 100] # )), showlegend=True ) return fig def create_data_interface_for_aggregated(df, category_name): attributes = df.columns[1:] print(f"attributes: {attributes}") plt = plot_radar_chart(df, attributes, category_name) return plt def reindex_cols(fix_cols, df): # reindex with task_col task_col = [subtask for subtask in fix_cols if subtask in df.columns.values.tolist()] df = df[task_col] return df def launch_gradio(df1, df2): demo = gr.Blocks() with demo: gr.HTML(TITLE) gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text") with gr.Row(): for key, df in df1.items(): if key == "Overall" or key == "Basic Information Retrieval": df = df.replace('', 0) new_df = df[[val for val in df.columns]].copy() # new_df = reindex_cols(Task_COLS, new_df) print(f"{key}: \n{new_df}") plot = create_data_interface_for_aggregated(new_df, key) gr.Plot(plot) del new_df with gr.Row(): for key, df in df1.items(): if key == "Legal Foundation Inference" or key == "Complex Legal Application": # if True: df = df.replace('', 0) new_df = df[[val for val in df.columns]].copy() # new_df = reindex_cols(Task_COLS, new_df) print(f"{key}: \n{new_df}") plot = create_data_interface_for_aggregated(new_df, key) gr.Plot(plot) del new_df for key, df in df2.items(): # if key != "Overall": if True: with gr.Tab(key): # df = reindex_cols(Task_COLS, df) create_data_interface(df) demo.launch() if __name__ == "__main__": df1 = plot_data() df2 = tab_data() # Constants TITLE = '

⚖️ LAiW Leaderboard

' INTRODUCTION_TEXT = """🏆 The LAiW Leaderboard is designed to rigorously track, rank, and evaluate state-of-the-art Large Language Models in Legal. 💡 Our leaderboard not only covers basic Legal NLP tasks but also incorporates Legal practice tasks such as similar case matching, offering a more comprehensive evaluation for real-world Legal applications. 🌟 Our evaluation metrics include, but are not limited to, Accuracy, F1 Score, ROUGE score, and Matthews correlation coefficient (MCC), providing a multidimensional assessment of model performance. 🔗 For more details, refer to our GitHub page [here](https://github.com/Dai-shen/LAiW). """ scheduler = BackgroundScheduler() scheduler.add_job(launch_gradio(df1=df1, df2=df2), "interval", seconds=3600) scheduler.start() # Launch immediately launch_gradio(df1=df1, df2=df2)