Spaces:

devingulliver
/

subquadratic-llm-leaderboard

Running

App Files Files Community

subquadratic-llm-leaderboard / app.py

devingulliver

Hide incomplete evals...

04b19ad verified 3 months ago

raw

history blame contribute delete

7.16 kB

	import os
	import pandas as pd
	import requests
	import huggingface_hub
	import gradio as gr

	data = pd.read_csv("data.csv", dtype="str")
	webhook_url = os.environ.get("WEBHOOK_URL")

	archlinks = {
	"Based": "https://arxiv.org/abs/2402.18668",
	"Griffin": "https://arxiv.org/abs/2402.19427",
	"H3": "https://arxiv.org/abs/2212.14052",
	"Hyena": "https://arxiv.org/abs/2302.10866",
	"M2": "https://arxiv.org/abs/2310.12109",
	"Mamba": "https://arxiv.org/abs/2312.00752",
	"Mamba2": "https://arxiv.org/abs/2405.21060",
	"Jamba": "https://arxiv.org/abs/2403.19887",
	"RWKV-4": "https://arxiv.org/abs/2305.13048",
	"RWKV-5": "https://arxiv.org/abs/2404.05892",
	"RWKV-6": "https://arxiv.org/abs/2404.05892",
	"StripedHyena": "https://www.together.ai/blog/stripedhyena-7b", # no paper?
	"Zamba": "https://arxiv.org/abs/2405.16712",
	}

	def filter_table(cols, name, type, arch, size):
	tmp = data
	# filter
	tmp = tmp[tmp["Name"].str.contains(name, case=False)]
	tmp = tmp[tmp["Type"].isin(type)]
	tmp = tmp[tmp["Architecture"].isin(arch)]
	tmp = tmp[tmp["Model Size"].isin(size)]
	# prettify
	tmp["Type"] = tmp["Type"].apply(lambda x: x[0])
	tmp = tmp.rename({"Type": "T"}, axis=1)
	tmp["Name"] = tmp["Name"].apply(lambda x: f'<a target="_blank" href="https://huggingface.co/{x}" style="color:var(--link-text-color);text-decoration:underline;text-decoration-style:dotted">{x}</a>')
	tmp["Architecture"] = tmp["Architecture"].apply(lambda x: f'<a target="_blank" href="{archlinks[x]}" style="color:var(--link-text-color);text-decoration:underline;text-decoration-style:dotted">{x}</a>')
	tmp["Base Model"] = tmp["Base Model"].apply(lambda x: f'<a target="_blank" href="https://huggingface.co/{x}" style="color:var(--link-text-color);text-decoration:underline;text-decoration-style:dotted">{x}</a>' if x != "base" else "")
	# show/hide
	tmp = tmp.drop(cols, axis=1)
	# done!
	return tmp

	def submit_model(name):
	try:
	huggingface_hub.hf_hub_download(repo_id=name, filename="config.json") # sanity check input
	except huggingface_hub.utils._errors.EntryNotFoundError:
	return "# ERROR: Model does not have a config.json file!"
	except huggingface_hub.utils._errors.RepositoryNotFoundError:
	return "# ERROR: Model could not be found on the Hugging Face Hub!"
	except requests.exceptions.HTTPError:
	return "# ERROR: Network error while validating model. Please try again later."
	except Exception as e:
	print(e)
	return "ERROR: Unexpected error. Please try again later."

	try:
	result = requests.post(webhook_url, json={"content":name})
	except requests.exceptions.HTTPError:
	return "# ERROR: Network error while contacting queue. Please try again in a few minutes."
	except Exception as e:
	print(e)
	return "ERROR: Unexpected error. Please try again later."

	return "# SUCCESS: Please wait up to 24 hours for your model to be added to the queue."

	with gr.Blocks(css=".gradio-container{max-width:95%!important} .tab-buttons button{font-size:1.3em}") as demo:
	gr.HTML('<h1 style="text-align:center"><span style="font-size:1.3em">Subquadratic LLM Leaderboard</span></h1>')
	gr.Markdown("REMEMBER: If you don't see an eligible model here, make sure to submit it! We hope to incentivize subquadratic/attention-free LLM development through friendly competition.")

	with gr.Tabs(elem_classes="tab-buttons") as tabs:
	with gr.Tab("🏅 LLM Benchmark"):
	"""
	with gr.Row():
	with gr.Column():
	namefilter = gr.Textbox(max_lines=1, placeholder="Search by model name and hit Enter...", show_label=False)
	colfilter = gr.CheckboxGroup(label="Hide columns", choices=list(data.columns)[2:], value=["Architecture","Model Size","Base Model"])
	typefilter = gr.CheckboxGroup(label="Filter by model type", choices=list(data["Type"].unique()), value=[n for n in data["Type"].unique() if n not in ["⏳ Pending"]])

	with gr.Column():
	archfilter = gr.CheckboxGroup(label="Filter by model architecture", choices=list(archlinks.keys()), value=list(archlinks.keys()))
	sizefilter = gr.CheckboxGroup(label="Filter by model size", choices=list(data["Model Size"].unique()), value=list(data["Model Size"].unique()))

	table = gr.Dataframe(filter_table(["Architecture","Model Size","Base Model"],"",[n for n in data["Type"].unique() if n not in ["⏳ Pending"]],list(archlinks.keys()),list(data["Model Size"].unique())), datatype="markdown")

	# actions

	namefilter.submit(filter_table, [colfilter,namefilter,typefilter,archfilter,sizefilter], table)

	for filter in [colfilter,typefilter,archfilter,sizefilter]:
	filter.input(filter_table, [colfilter,namefilter,typefilter,archfilter,sizefilter], table)
	"""
	gr.Markdown("This tab temporarily disabled. If you need archived eval data, check the Files tab for data.csv.")
	with gr.Tab("⚖️ Comparison"):
	gr.Markdown("This table is whitelisted to one model per architecture, specifically 1.5B models trained on The Pile for 1 epoch, for a direct comparison of architectures.")
	gr.Dataframe(data[data["Name"].isin(["devingulliver/llama-pile-350b","RWKV/rwkv-4-1b5-pile","state-spaces/mamba-1.4b","state-spaces/mamba2-1.3b"])].drop(["Type","Model Size","Base Model"], axis=1), datatype="markdown") # "danfu09/H3-1.3B"

	with gr.Tab("📝 About"):
	gr.Markdown("""
	The Subquadratic LLM Leaderboard evaluates LLMs with subquadratic/attention-free architectures (i.e. RWKV & Mamba) with the goal of providing open
	evaluation results while the architectures themselves are pending inclusion/release in the 🤗 Transformers library.

	The metrics are the same as the Open LLM Leaderboard: ARC 25-shot, HellaSwag 10-shot, MMLU 5-shot, TruthfulQA zeroshot, Winogrande 5-shot, and GSM8K 5-shot.

	This leaderboard is maintained by Devin Gulliver and is perpetually under construction, check back regularly for further improvements!

	Compute for evaluating RWKV models is generously provided by [Recursal AI](https://recursal.ai).
	""")

	with gr.Tab("🚀 Submit here!"):
	with gr.Group():
	with gr.Row():
	model_name = gr.Textbox(max_lines=1, placeholder="Enter model name...", show_label=False, scale=4)
	submit = gr.Button("Submit", variant="primary", scale=0, interactive=False)

	output = gr.Markdown("Enter a public HF repo id, then hit Submit to add it to the evaluation queue.")

	submit.click(fn=submit_model, inputs=model_name, outputs=output)

	demo.launch(show_api=False, allowed_paths=["data.csv"])