Spaces:

andrewrreed
/

closed-vs-open-arena-elo

Running on CPU Upgrade

App Files Files Community

closed-vs-open-arena-elo / app.py

andrewrreed HF staff

format header markdwon

3c49dce 5 months ago

raw

history blame

5.83 kB

	import pickle

	import pandas as pd
	import gradio as gr
	import plotly.express as px

	from utils import (
	KEY_TO_CATEGORY_NAME,
	PROPRIETARY_LICENSES,
	download_latest_data_from_space,
	)

	# with gr.NO_RELOAD:
	###################
	### Load Data
	###################

	# gather ELO data
	latest_elo_file_local = download_latest_data_from_space(
	repo_id="lmsys/chatbot-arena-leaderboard", file_type="pkl"
	)

	with open(latest_elo_file_local, "rb") as fin:
	elo_results = pickle.load(fin)

	arena_dfs = {}
	for k in KEY_TO_CATEGORY_NAME.keys():
	if k not in elo_results:
	continue
	arena_dfs[KEY_TO_CATEGORY_NAME[k]] = elo_results[k]["leaderboard_table_df"]

	# gather open llm leaderboard data
	latest_leaderboard_file_local = download_latest_data_from_space(
	repo_id="lmsys/chatbot-arena-leaderboard", file_type="csv"
	)
	leaderboard_df = pd.read_csv(latest_leaderboard_file_local)

	###################
	### Prepare Data
	###################

	# merge leaderboard data with ELO data
	merged_dfs = {}
	for k, v in arena_dfs.items():
	merged_dfs[k] = (
	pd.merge(arena_dfs[k], leaderboard_df, left_index=True, right_on="key")
	.sort_values("rating", ascending=False)
	.reset_index(drop=True)
	)

	# add release dates into the merged data
	release_date_mapping = pd.read_json("release_date_mapping.json", orient="records")
	for k, v in merged_dfs.items():
	merged_dfs[k] = pd.merge(
	merged_dfs[k], release_date_mapping[["key", "Release Date"]], on="key"
	)

	df = merged_dfs["Overall"]
	df["License"] = df["License"].apply(
	lambda x: "Proprietary LLM" if x in PROPRIETARY_LICENSES else "Open LLM"
	)
	df["Release Date"] = pd.to_datetime(df["Release Date"])
	df["Month-Year"] = df["Release Date"].dt.to_period("M")
	df["rating"] = df["rating"].round()


	###################
	### Plot Data
	###################

	date_updated = elo_results["full"]["last_updated_datetime"].split(" ")[0]
	min_elo_score = df["rating"].min().round()
	max_elo_score = df["rating"].max().round()
	upper_models_per_month = int(
	df.groupby(["Month-Year", "License"])["rating"].apply(lambda x: x.count()).max()
	)


	def build_plot(min_score, max_models_per_month, toggle_annotations):

	filtered_df = df[(df["rating"] >= min_score)]
	filtered_df = (
	filtered_df.groupby(["Month-Year", "License"])
	.apply(lambda x: x.nlargest(max_models_per_month, "rating"))
	.reset_index(drop=True)
	)

	fig = px.scatter(
	filtered_df,
	x="Release Date",
	y="rating",
	color="License",
	hover_name="Model",
	hover_data=["Organization", "License"],
	trendline="ols",
	title=f"Proprietary vs Open LLMs (LMSYS Arena ELO as of {date_updated})",
	labels={"rating": "Arena ELO", "Release Date": "Release Date"},
	height=700,
	template="seaborn",
	)

	fig.update_traces(marker=dict(size=10, opacity=0.6))

	if toggle_annotations:
	# get the points to annotate (only the highest rated model per month per license)
	idx_to_annotate = filtered_df.groupby(["Month-Year", "License"])[
	"rating"
	].idxmax()
	points_to_annotate_df = filtered_df.loc[idx_to_annotate]

	for i, row in points_to_annotate_df.iterrows():
	fig.add_annotation(
	x=row["Release Date"],
	y=row["rating"],
	text=row["Model"],
	showarrow=True,
	arrowhead=0,
	)

	return fig


	with gr.Blocks(
	theme=gr.themes.Soft(
	primary_hue=gr.themes.colors.sky,
	secondary_hue=gr.themes.colors.green,
	font=[
	gr.themes.GoogleFont("Open Sans"),
	"ui-sans-serif",
	"system-ui",
	"sans-serif",
	],
	)
	) as demo:
	gr.Markdown(
	"""
	<div style="text-align: center; max-width: 650px; margin: auto;">
	<h1 style="font-weight: 900; margin-top: 5px;">🔬 Progress Tracker: Proprietary vs Open LLMs
	</h1>
	<p style="text-align: left; margin-top: 10px; margin-bottom: 10px; line-height: 20px;">
	This app visualizes the progress of proprietary and open-source LLMs in the LMSYS Arena ELO leaderboard. The idea is inspired by <a href="https://www.linkedin.com/posts/maxime-labonne_arena-elo-graph-updated-with-new-models-activity-7187062633735368705-u2jB?utm_source=share&utm_medium=member_desktop">this great work</a> from <a href="https://huggingface.co/mlabonne/">Maxime Labonne</a>.
	</p>
	</div>
	"""
	)
	with gr.Row():
	min_score = gr.Slider(
	minimum=min_elo_score,
	maximum=max_elo_score,
	value=800,
	step=50,
	label="Minimum ELO Score",
	)
	max_models_per_month = gr.Slider(
	value=upper_models_per_month,
	minimum=1,
	maximum=upper_models_per_month,
	step=1,
	label="Max Models per Month (per License)",
	)
	toggle_annotations = gr.Radio(
	choices=[True, False], label="Overlay Best Model Name", value=False
	)

	# Show plot
	plot = gr.Plot()
	demo.load(
	fn=build_plot,
	inputs=[min_score, max_models_per_month, toggle_annotations],
	outputs=plot,
	)
	min_score.change(
	fn=build_plot,
	inputs=[min_score, max_models_per_month, toggle_annotations],
	outputs=plot,
	)
	max_models_per_month.change(
	fn=build_plot,
	inputs=[min_score, max_models_per_month, toggle_annotations],
	outputs=plot,
	)
	toggle_annotations.change(
	fn=build_plot,
	inputs=[min_score, max_models_per_month, toggle_annotations],
	outputs=plot,
	)

	demo.launch()
	# if __name__ == "__main__":