Spaces:
Sleeping
Sleeping
File size: 3,962 Bytes
c3dcec1 3c4c73c c3dcec1 af9d904 c3dcec1 af9d904 c3dcec1 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 |
# matplotlib.use('macosx')
import gradio as gr
import plotly.graph_objects as go
from apscheduler.schedulers.background import BackgroundScheduler
from get_data_info import plot_data, tab_data
def create_data_interface(df):
headers = df.columns
types = ["str"] + ["number"] * (len(headers) - 1)
return gr.components.Dataframe(
value=df.values.tolist(),
headers=[col_name for col_name in headers],
datatype=types,
# max_rows=10,
)
def plot_radar_chart(df, attributes, category_name):
fig = go.Figure()
for index, row in df.iterrows():
model = row['Model']
values = row[attributes].tolist()
fig.add_trace(go.Scatterpolar(
r=values,
theta=attributes,
fill='toself',
name=model
))
fig.update_layout(
title=f"{category_name}",
polar=dict(
radialaxis=dict(
visible=True,
range=[0, 100] #
)),
showlegend=True
)
return fig
def create_data_interface_for_aggregated(df, category_name):
attributes = df.columns[1:]
print(f"attributes: {attributes}")
plt = plot_radar_chart(df, attributes, category_name)
return plt
def reindex_cols(fix_cols, df):
# reindex with task_col
task_col = [subtask for subtask in fix_cols if subtask in df.columns.values.tolist()]
df = df[task_col]
return df
def launch_gradio(df1, df2):
demo = gr.Blocks()
with demo:
gr.HTML(TITLE)
gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
with gr.Row():
for key, df in df1.items():
if key == "Overall" or key == "Basic Information Retrieval":
df = df.replace('', 0)
new_df = df[[val for val in df.columns]].copy()
# new_df = reindex_cols(Task_COLS, new_df)
print(f"{key}: \n{new_df}")
plot = create_data_interface_for_aggregated(new_df, key)
gr.Plot(plot)
del new_df
with gr.Row():
for key, df in df1.items():
if key == "Legal Foundation Inference" or key == "Complex Legal Application":
# if True:
df = df.replace('', 0)
new_df = df[[val for val in df.columns]].copy()
# new_df = reindex_cols(Task_COLS, new_df)
print(f"{key}: \n{new_df}")
plot = create_data_interface_for_aggregated(new_df, key)
gr.Plot(plot)
del new_df
for key, df in df2.items():
# if key != "Overall":
if True:
with gr.Tab(key):
# df = reindex_cols(Task_COLS, df)
create_data_interface(df)
demo.launch()
if __name__ == "__main__":
df1 = plot_data()
df2 = tab_data()
# Constants
TITLE = '<h1 align="center" id="space-title">βοΈ LAiW Leaderboard</h1>'
INTRODUCTION_TEXT = """π The LAiW Leaderboard is designed to rigorously track, rank, and evaluate state-of-the-art Large Language Models in Legal.
π‘ Our leaderboard not only covers basic Legal NLP tasks but also incorporates Legal practice tasks such as similar case matching, offering a more comprehensive evaluation for real-world Legal applications.
π Our evaluation metrics include, but are not limited to, Accuracy, F1 Score, ROUGE score, and Matthews correlation coefficient (MCC), providing a multidimensional assessment of model performance.
π For more details, refer to our GitHub page [here](https://github.com/Dai-shen/LAiW).
"""
scheduler = BackgroundScheduler()
scheduler.add_job(launch_gradio(df1=df1, df2=df2), "interval", seconds=3600)
scheduler.start()
# Launch immediately
launch_gradio(df1=df1, df2=df2)
|