Spaces:
Sleeping
Sleeping
# matplotlib.use('macosx') | |
import gradio as gr | |
import plotly.graph_objects as go | |
from apscheduler.schedulers.background import BackgroundScheduler | |
from get_data_info import plot_data, tab_data | |
def create_data_interface(df): | |
headers = df.columns | |
types = ["str"] + ["number"] * (len(headers) - 1) | |
return gr.components.Dataframe( | |
value=df.values.tolist(), | |
headers=[col_name for col_name in headers], | |
datatype=types, | |
# max_rows=10, | |
) | |
def plot_radar_chart(df, attributes, category_name): | |
fig = go.Figure() | |
for index, row in df.iterrows(): | |
model = row['Model'] | |
values = row[attributes].tolist() | |
fig.add_trace(go.Scatterpolar( | |
r=values, | |
theta=attributes, | |
fill='toself', | |
name=model | |
)) | |
fig.update_layout( | |
title=f"{category_name}", | |
polar=dict( | |
radialaxis=dict( | |
visible=True, | |
range=[0, 100] # | |
)), | |
showlegend=True | |
) | |
return fig | |
def create_data_interface_for_aggregated(df, category_name): | |
attributes = df.columns[1:] | |
print(f"attributes: {attributes}") | |
plt = plot_radar_chart(df, attributes, category_name) | |
return plt | |
def reindex_cols(fix_cols, df): | |
# reindex with task_col | |
task_col = [subtask for subtask in fix_cols if subtask in df.columns.values.tolist()] | |
df = df[task_col] | |
return df | |
def launch_gradio(df1, df2): | |
demo = gr.Blocks() | |
with demo: | |
gr.HTML(TITLE) | |
gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text") | |
with gr.Row(): | |
for key, df in df1.items(): | |
if key == "Overall" or key == "Basic Information Retrieval": | |
df = df.replace('', 0) | |
new_df = df[[val for val in df.columns]].copy() | |
# new_df = reindex_cols(Task_COLS, new_df) | |
print(f"{key}: \n{new_df}") | |
plot = create_data_interface_for_aggregated(new_df, key) | |
gr.Plot(plot) | |
del new_df | |
with gr.Row(): | |
for key, df in df1.items(): | |
if key == "Legal Foundation Inference" or key == "Complex Legal Application": | |
# if True: | |
df = df.replace('', 0) | |
new_df = df[[val for val in df.columns]].copy() | |
# new_df = reindex_cols(Task_COLS, new_df) | |
print(f"{key}: \n{new_df}") | |
plot = create_data_interface_for_aggregated(new_df, key) | |
gr.Plot(plot) | |
del new_df | |
for key, df in df2.items(): | |
# if key != "Overall": | |
if True: | |
with gr.Tab(key): | |
# df = reindex_cols(Task_COLS, df) | |
create_data_interface(df) | |
demo.launch() | |
if __name__ == "__main__": | |
df1 = plot_data() | |
df2 = tab_data() | |
# Constants | |
TITLE = '<h1 align="center" id="space-title">βοΈ LAiW Leaderboard</h1>' | |
INTRODUCTION_TEXT = """π The LAiW Leaderboard is designed to rigorously track, rank, and evaluate state-of-the-art Large Language Models in Legal. | |
π‘ Our leaderboard not only covers basic Legal NLP tasks but also incorporates Legal practice tasks such as similar case matching, offering a more comprehensive evaluation for real-world Legal applications. | |
π Our evaluation metrics include, but are not limited to, Accuracy, F1 Score, ROUGE score, and Matthews correlation coefficient (MCC), providing a multidimensional assessment of model performance. | |
π For more details, refer to our GitHub page [here](https://github.com/Dai-shen/LAiW). | |
""" | |
scheduler = BackgroundScheduler() | |
scheduler.add_job(launch_gradio(df1=df1, df2=df2), "interval", seconds=3600) | |
scheduler.start() | |
# Launch immediately | |
launch_gradio(df1=df1, df2=df2) | |