SCULAiW / app.py
daishen
update results
af9d904
raw
history blame
3.96 kB
# matplotlib.use('macosx')
import gradio as gr
import plotly.graph_objects as go
from apscheduler.schedulers.background import BackgroundScheduler
from get_data_info import plot_data, tab_data
def create_data_interface(df):
headers = df.columns
types = ["str"] + ["number"] * (len(headers) - 1)
return gr.components.Dataframe(
value=df.values.tolist(),
headers=[col_name for col_name in headers],
datatype=types,
# max_rows=10,
)
def plot_radar_chart(df, attributes, category_name):
fig = go.Figure()
for index, row in df.iterrows():
model = row['Model']
values = row[attributes].tolist()
fig.add_trace(go.Scatterpolar(
r=values,
theta=attributes,
fill='toself',
name=model
))
fig.update_layout(
title=f"{category_name}",
polar=dict(
radialaxis=dict(
visible=True,
range=[0, 100] #
)),
showlegend=True
)
return fig
def create_data_interface_for_aggregated(df, category_name):
attributes = df.columns[1:]
print(f"attributes: {attributes}")
plt = plot_radar_chart(df, attributes, category_name)
return plt
def reindex_cols(fix_cols, df):
# reindex with task_col
task_col = [subtask for subtask in fix_cols if subtask in df.columns.values.tolist()]
df = df[task_col]
return df
def launch_gradio(df1, df2):
demo = gr.Blocks()
with demo:
gr.HTML(TITLE)
gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
with gr.Row():
for key, df in df1.items():
if key == "Overall" or key == "Basic Information Retrieval":
df = df.replace('', 0)
new_df = df[[val for val in df.columns]].copy()
# new_df = reindex_cols(Task_COLS, new_df)
print(f"{key}: \n{new_df}")
plot = create_data_interface_for_aggregated(new_df, key)
gr.Plot(plot)
del new_df
with gr.Row():
for key, df in df1.items():
if key == "Legal Foundation Inference" or key == "Complex Legal Application":
# if True:
df = df.replace('', 0)
new_df = df[[val for val in df.columns]].copy()
# new_df = reindex_cols(Task_COLS, new_df)
print(f"{key}: \n{new_df}")
plot = create_data_interface_for_aggregated(new_df, key)
gr.Plot(plot)
del new_df
for key, df in df2.items():
# if key != "Overall":
if True:
with gr.Tab(key):
# df = reindex_cols(Task_COLS, df)
create_data_interface(df)
demo.launch()
if __name__ == "__main__":
df1 = plot_data()
df2 = tab_data()
# Constants
TITLE = '<h1 align="center" id="space-title">βš–οΈ LAiW Leaderboard</h1>'
INTRODUCTION_TEXT = """πŸ† The LAiW Leaderboard is designed to rigorously track, rank, and evaluate state-of-the-art Large Language Models in Legal.
πŸ’‘ Our leaderboard not only covers basic Legal NLP tasks but also incorporates Legal practice tasks such as similar case matching, offering a more comprehensive evaluation for real-world Legal applications.
🌟 Our evaluation metrics include, but are not limited to, Accuracy, F1 Score, ROUGE score, and Matthews correlation coefficient (MCC), providing a multidimensional assessment of model performance.
πŸ”— For more details, refer to our GitHub page [here](https://github.com/Dai-shen/LAiW).
"""
scheduler = BackgroundScheduler()
scheduler.add_job(launch_gradio(df1=df1, df2=df2), "interval", seconds=3600)
scheduler.start()
# Launch immediately
launch_gradio(df1=df1, df2=df2)