Spaces:

daishen
/

SCULAiW

Sleeping

SCULAiW / app.py

daishen

update results

af9d904 9 months ago

3.96 kB

	# matplotlib.use('macosx')
	import gradio as gr
	import plotly.graph_objects as go
	from apscheduler.schedulers.background import BackgroundScheduler
	from get_data_info import plot_data, tab_data


	def create_data_interface(df):
	headers = df.columns
	types = ["str"] + ["number"] * (len(headers) - 1)

	return gr.components.Dataframe(
	value=df.values.tolist(),
	headers=[col_name for col_name in headers],
	datatype=types,
	# max_rows=10,
	)


	def plot_radar_chart(df, attributes, category_name):
	fig = go.Figure()

	for index, row in df.iterrows():
	model = row['Model']
	values = row[attributes].tolist()
	fig.add_trace(go.Scatterpolar(
	r=values,
	theta=attributes,
	fill='toself',
	name=model
	))

	fig.update_layout(
	title=f"{category_name}",
	polar=dict(
	radialaxis=dict(
	visible=True,
	range=[0, 100] #
	)),
	showlegend=True
	)

	return fig


	def create_data_interface_for_aggregated(df, category_name):
	attributes = df.columns[1:]
	print(f"attributes: {attributes}")
	plt = plot_radar_chart(df, attributes, category_name)
	return plt


	def reindex_cols(fix_cols, df):
	# reindex with task_col
	task_col = [subtask for subtask in fix_cols if subtask in df.columns.values.tolist()]
	df = df[task_col]
	return df


	def launch_gradio(df1, df2):
	demo = gr.Blocks()

	with demo:
	gr.HTML(TITLE)
	gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")

	with gr.Row():
	for key, df in df1.items():
	if key == "Overall" or key == "Basic Information Retrieval":
	df = df.replace('', 0)
	new_df = df[[val for val in df.columns]].copy()
	# new_df = reindex_cols(Task_COLS, new_df)
	print(f"{key}: \n{new_df}")
	plot = create_data_interface_for_aggregated(new_df, key)
	gr.Plot(plot)
	del new_df

	with gr.Row():
	for key, df in df1.items():
	if key == "Legal Foundation Inference" or key == "Complex Legal Application":
	# if True:
	df = df.replace('', 0)
	new_df = df[[val for val in df.columns]].copy()
	# new_df = reindex_cols(Task_COLS, new_df)
	print(f"{key}: \n{new_df}")
	plot = create_data_interface_for_aggregated(new_df, key)
	gr.Plot(plot)
	del new_df

	for key, df in df2.items():
	# if key != "Overall":
	if True:
	with gr.Tab(key):
	# df = reindex_cols(Task_COLS, df)
	create_data_interface(df)

	demo.launch()


	if __name__ == "__main__":
	df1 = plot_data()
	df2 = tab_data()

	# Constants
	TITLE = '<h1 align="center" id="space-title">⚖️ LAiW Leaderboard</h1>'
	INTRODUCTION_TEXT = """🏆 The LAiW Leaderboard is designed to rigorously track, rank, and evaluate state-of-the-art Large Language Models in Legal.

	💡 Our leaderboard not only covers basic Legal NLP tasks but also incorporates Legal practice tasks such as similar case matching, offering a more comprehensive evaluation for real-world Legal applications.

	🌟 Our evaluation metrics include, but are not limited to, Accuracy, F1 Score, ROUGE score, and Matthews correlation coefficient (MCC), providing a multidimensional assessment of model performance.

	🔗 For more details, refer to our GitHub page [here](https://github.com/Dai-shen/LAiW).
	"""

	scheduler = BackgroundScheduler()
	scheduler.add_job(launch_gradio(df1=df1, df2=df2), "interval", seconds=3600)
	scheduler.start()

	# Launch immediately
	launch_gradio(df1=df1, df2=df2)