daishen commited on
Commit
c3dcec1
β€’
1 Parent(s): 9425c6e

add app.py

Browse files
Files changed (4) hide show
  1. app.py +120 -0
  2. get_data_info.py +57 -0
  3. leaderboard.xlsx +0 -0
  4. requirements.txt +11 -0
app.py ADDED
@@ -0,0 +1,120 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # matplotlib.use('macosx')
2
+ import gradio as gr
3
+ import plotly.graph_objects as go
4
+ from apscheduler.schedulers.background import BackgroundScheduler
5
+ from get_data_info import plot_data, tab_data
6
+
7
+
8
+ def create_data_interface(df):
9
+ headers = df.columns
10
+ types = ["str"] + ["number"] * (len(headers) - 1)
11
+
12
+ return gr.components.Dataframe(
13
+ value=df.values.tolist(),
14
+ headers=[col_name for col_name in headers],
15
+ datatype=types,
16
+ max_rows=10,
17
+ )
18
+
19
+
20
+ def plot_radar_chart(df, attributes, category_name):
21
+ fig = go.Figure()
22
+
23
+ for index, row in df.iterrows():
24
+ model = row['Model']
25
+ values = row[attributes].tolist()
26
+ fig.add_trace(go.Scatterpolar(
27
+ r=values,
28
+ theta=attributes,
29
+ fill='toself',
30
+ name=model
31
+ ))
32
+
33
+ fig.update_layout(
34
+ title=f"{category_name}",
35
+ polar=dict(
36
+ radialaxis=dict(
37
+ visible=True,
38
+ range=[0, 100] #
39
+ )),
40
+ showlegend=True
41
+ )
42
+
43
+ return fig
44
+
45
+
46
+ def create_data_interface_for_aggregated(df, category_name):
47
+ attributes = df.columns[1:]
48
+ print(f"attributes: {attributes}")
49
+ plt = plot_radar_chart(df, attributes, category_name)
50
+ return plt
51
+
52
+
53
+ def reindex_cols(fix_cols, df):
54
+ # reindex with task_col
55
+ task_col = [subtask for subtask in fix_cols if subtask in df.columns.values.tolist()]
56
+ df = df[task_col]
57
+ return df
58
+
59
+
60
+ def launch_gradio(df1, df2):
61
+ demo = gr.Blocks()
62
+
63
+ with demo:
64
+ gr.HTML(TITLE)
65
+ gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
66
+
67
+ with gr.Row():
68
+ for key, df in df1.items():
69
+ if key == "Overall" or key == "Basic Legal NLP":
70
+ df = df.replace('', 0)
71
+ new_df = df[[val for val in df.columns]].copy()
72
+ # new_df = reindex_cols(Task_COLS, new_df)
73
+ print(f"{key}: \n{new_df}")
74
+ plot = create_data_interface_for_aggregated(new_df, key)
75
+ gr.Plot(plot)
76
+ del new_df
77
+
78
+ with gr.Row():
79
+ for key, df in df1.items():
80
+ if key == "Basic Legal Application" or key == "Complex Legal Application":
81
+ # if True:
82
+ df = df.replace('', 0)
83
+ new_df = df[[val for val in df.columns]].copy()
84
+ # new_df = reindex_cols(Task_COLS, new_df)
85
+ print(f"{key}: \n{new_df}")
86
+ plot = create_data_interface_for_aggregated(new_df, key)
87
+ gr.Plot(plot)
88
+ del new_df
89
+
90
+ for key, df in df2.items():
91
+ # if key != "Overall":
92
+ if True:
93
+ with gr.Tab(key):
94
+ # df = reindex_cols(Task_COLS, df)
95
+ create_data_interface(df)
96
+
97
+ demo.launch()
98
+
99
+
100
+ if __name__ == "__main__":
101
+ df1 = plot_data()
102
+ df2 = tab_data()
103
+
104
+ # Constants
105
+ TITLE = '<h1 align="center" id="space-title">βš–οΈ LAiW Leaderboard</h1>'
106
+ INTRODUCTION_TEXT = """πŸ† The LAiW Leaderboard is designed to rigorously track, rank, and evaluate state-of-the-art Large Language Models in Legal.
107
+
108
+ πŸ’‘ Our leaderboard not only covers basic Legal NLP tasks but also incorporates Legal practice tasks such as similar case matching, offering a more comprehensive evaluation for real-world Legal applications.
109
+
110
+ 🌟 Our evaluation metrics include, but are not limited to, Accuracy, F1 Score, ROUGE score, and Matthews correlation coefficient (MCC), providing a multidimensional assessment of model performance.
111
+
112
+ πŸ”— For more details, refer to our GitHub page [here](https://github.com/Dai-shen/LAiW).
113
+ """
114
+
115
+ scheduler = BackgroundScheduler()
116
+ scheduler.add_job(launch_gradio(df1=df1, df2=df2), "interval", seconds=3600)
117
+ scheduler.start()
118
+
119
+ # Launch immediately
120
+ launch_gradio(df1=df1, df2=df2)
get_data_info.py ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import copy
2
+ import pandas as pd
3
+
4
+
5
+ def plot_data():
6
+ # read df and replace NaN values with an empty string
7
+ leaderboard_df = pd.read_excel(
8
+ 'leaderboard.xlsx',
9
+ sheet_name='Sheet1',
10
+ header=0,
11
+ usecols='A:P',
12
+ nrows=14)
13
+ leaderboard_df.fillna("-")
14
+
15
+ df_nlp = leaderboard_df.iloc[:, [0] + list(range(2, 7))] # todo
16
+ df_basic = leaderboard_df.iloc[:, [0] + list(range(7, 13))] # todo
17
+ df_complex = leaderboard_df.iloc[:, [0] + list(range(13, 16))] # todo
18
+
19
+ # Get df_overall
20
+ df_overall = leaderboard_df.iloc[:, [0] + list(range(2, 16))]
21
+ plot_df_dict = {
22
+ "Overall": df_overall,
23
+ "Basic Legal NLP": df_nlp,
24
+ "Basic Legal Application": df_basic,
25
+ "Complex Legal Application": df_complex,
26
+ }
27
+ return plot_df_dict
28
+
29
+
30
+ def tab_data():
31
+ # read df and replace NaN values with an empty string
32
+ leaderboard_df = pd.read_excel(
33
+ 'leaderboard.xlsx',
34
+ sheet_name='Sheet2',
35
+ header=0,
36
+ usecols='A:AS',
37
+ nrows=14)
38
+ leaderboard_df.fillna("-")
39
+
40
+ df_nlp = leaderboard_df.iloc[:, [0] + list(range(2, 18))] # todo
41
+ df_basic = leaderboard_df.iloc[:, [0] + list(range(18, 36))] # todo
42
+ df_complex = leaderboard_df.iloc[:, [0] + list(range(36, 45))] # todo
43
+
44
+ # Get df_overall
45
+ df_overall = leaderboard_df.iloc[:, [0] + list(range(2, 45))]
46
+ plot_df_dict = {
47
+ "Overall": df_overall,
48
+ "Basic Legal NLP": df_nlp,
49
+ "Basic Legal Application": df_basic,
50
+ "Complex Legal Application": df_complex,
51
+ }
52
+ return plot_df_dict
53
+
54
+
55
+ if __name__ == "__main__":
56
+ df1 = plot_data()
57
+ df2 = tab_data()
leaderboard.xlsx ADDED
Binary file (16.4 kB). View file
 
requirements.txt ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ aiofiles==23.1.0
2
+ aiohttp==3.8.4
3
+ aiosignal==1.3.1
4
+ APScheduler==3.10.1
5
+ gradio==3.27.0
6
+ gradio_client==0.1.3
7
+ pandas==2.0.0
8
+ matplotlib
9
+ numpy
10
+ plotly
11
+ openpyxl==3.0.10