File size: 6,980 Bytes
b414398 38210ee b414398 0fade42 0dcfe74 9fc3aec b414398 91e32b4 f4c1e40 cd7f1fc f03d124 91e32b4 65992ff 42e2af3 5aeb82f 62088f1 b59216c 91e32b4 00e3be0 38210ee 356fe31 aabd0b6 363a6cb 00e3be0 356fe31 06e8c2c 5ce3551 91e32b4 ec51f9d 91e32b4 38210ee b414398 7c67606 0fade42 8300932 7c67606 0fade42 7c67606 59d0aa9 7c67606 59d0aa9 387cdcd b414398 883e413 7285c62 8c47a7a b414398 83d1755 356fe31 cc9ebdf 990bc65 812ef81 339ab1a 356fe31 5719e80 00e3be0 38210ee 5719e80 38210ee e98b562 00e3be0 97d8622 00e3be0 20cfd29 03af7e5 20cfd29 8ac77da b414398 4fd57b2 8c47a7a 4fd57b2 8c47a7a 4fd57b2 8c47a7a 4fd57b2 e13f0c8 b414398 cc9ebdf b414398 38210ee b414398 eb5a1b2 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 |
import os
import pandas as pd
import requests
import huggingface_hub
import gradio as gr
data = pd.read_csv("data.csv", dtype="str")
webhook_url = os.environ.get("WEBHOOK_URL")
archlinks = {
"Based": "https://arxiv.org/abs/2402.18668",
"Griffin": "https://arxiv.org/abs/2402.19427",
"H3": "https://arxiv.org/abs/2212.14052",
"Hyena": "https://arxiv.org/abs/2302.10866",
"M2": "https://arxiv.org/abs/2310.12109",
"Mamba": "https://arxiv.org/abs/2312.00752",
"Mamba2": "https://arxiv.org/abs/2405.21060",
"Jamba": "https://arxiv.org/abs/2403.19887",
"RWKV-4": "https://arxiv.org/abs/2305.13048",
"RWKV-5": "https://arxiv.org/abs/2404.05892",
"RWKV-6": "https://arxiv.org/abs/2404.05892",
"StripedHyena": "https://www.together.ai/blog/stripedhyena-7b", # no paper?
"Zamba": "https://arxiv.org/abs/2405.16712",
}
def filter_table(cols, name, type, arch, size):
tmp = data
# filter
tmp = tmp[tmp["Name"].str.contains(name, case=False)]
tmp = tmp[tmp["Type"].isin(type)]
tmp = tmp[tmp["Architecture"].isin(arch)]
tmp = tmp[tmp["Model Size"].isin(size)]
# prettify
tmp["Type"] = tmp["Type"].apply(lambda x: x[0])
tmp = tmp.rename({"Type": "T"}, axis=1)
tmp["Name"] = tmp["Name"].apply(lambda x: f'<a target="_blank" href="https://huggingface.co/{x}" style="color:var(--link-text-color);text-decoration:underline;text-decoration-style:dotted">{x}</a>')
tmp["Architecture"] = tmp["Architecture"].apply(lambda x: f'<a target="_blank" href="{archlinks[x]}" style="color:var(--link-text-color);text-decoration:underline;text-decoration-style:dotted">{x}</a>')
tmp["Base Model"] = tmp["Base Model"].apply(lambda x: f'<a target="_blank" href="https://huggingface.co/{x}" style="color:var(--link-text-color);text-decoration:underline;text-decoration-style:dotted">{x}</a>' if x != "base" else "")
# show/hide
tmp = tmp.drop(cols, axis=1)
# done!
return tmp
def submit_model(name):
try:
huggingface_hub.hf_hub_download(repo_id=name, filename="config.json") # sanity check input
except huggingface_hub.utils._errors.EntryNotFoundError:
return "# ERROR: Model does not have a config.json file!"
except huggingface_hub.utils._errors.RepositoryNotFoundError:
return "# ERROR: Model could not be found on the Hugging Face Hub!"
except requests.exceptions.HTTPError:
return "# ERROR: Network error while validating model. Please try again later."
except Exception as e:
print(e)
return "ERROR: Unexpected error. Please try again later."
try:
result = requests.post(webhook_url, json={"content":name})
except requests.exceptions.HTTPError:
return "# ERROR: Network error while contacting queue. Please try again in a few minutes."
except Exception as e:
print(e)
return "ERROR: Unexpected error. Please try again later."
return "# SUCCESS: Please wait up to 24 hours for your model to be added to the queue."
with gr.Blocks(css=".gradio-container{max-width:95%!important} .tab-buttons button{font-size:1.3em}") as demo:
gr.HTML('<h1 style="text-align:center"><span style="font-size:1.3em">Subquadratic LLM Leaderboard</span></h1>')
gr.Markdown("**REMEMBER:** If you don't see an eligible model here, make sure to submit it! We hope to incentivize subquadratic/attention-free LLM development through friendly competition.")
with gr.Tabs(elem_classes="tab-buttons") as tabs:
with gr.Tab("🏅 LLM Benchmark"):
with gr.Row():
with gr.Column():
namefilter = gr.Textbox(max_lines=1, placeholder="Search by model name and hit Enter...", show_label=False)
colfilter = gr.CheckboxGroup(label="Hide columns", choices=list(data.columns)[2:], value=["Architecture","Model Size","Base Model"])
typefilter = gr.CheckboxGroup(label="Filter by model type", choices=list(data["Type"].unique()), value=[n for n in data["Type"].unique() if n not in ["⏳ Pending"]])
with gr.Column():
archfilter = gr.CheckboxGroup(label="Filter by model architecture", choices=list(archlinks.keys()), value=list(archlinks.keys()))
sizefilter = gr.CheckboxGroup(label="Filter by model size", choices=list(data["Model Size"].unique()), value=list(data["Model Size"].unique()))
table = gr.Dataframe(filter_table(["Architecture","Model Size","Base Model"],"",[n for n in data["Type"].unique() if n not in ["⏳ Pending"]],list(archlinks.keys()),list(data["Model Size"].unique())), datatype="markdown")
# actions
namefilter.submit(filter_table, [colfilter,namefilter,typefilter,archfilter,sizefilter], table)
for filter in [colfilter,typefilter,archfilter,sizefilter]:
filter.input(filter_table, [colfilter,namefilter,typefilter,archfilter,sizefilter], table)
with gr.Tab("⚖️ Comparison"):
gr.Markdown("This table is whitelisted to one model per architecture, specifically 1.5B models trained on The Pile for 1 epoch, for a direct comparison of architectures.")
gr.Dataframe(data[data["Name"].isin(["devingulliver/llama-pile-350b","RWKV/rwkv-4-1b5-pile","state-spaces/mamba-1.4b","danfu09/H3-1.3B","state-spaces/mamba2-1.3b"])].drop(["Type","Model Size","Base Model"], axis=1), datatype="markdown")
with gr.Tab("📝 About"):
gr.Markdown("""
The **Subquadratic LLM Leaderboard** evaluates LLMs with subquadratic/attention-free architectures (i.e. RWKV & Mamba) with the goal of providing open
evaluation results while the architectures themselves are pending inclusion/release in the 🤗 Transformers library.
The metrics are the same as the Open LLM Leaderboard: ARC 25-shot, HellaSwag 10-shot, MMLU 5-shot, TruthfulQA zeroshot, Winogrande 5-shot, and GSM8K 5-shot.
This leaderboard is maintained by Devin Gulliver and is perpetually under construction, check back regularly for further improvements!
Compute for evaluating RWKV models is generously provided by [Recursal AI](https://recursal.ai).
""")
with gr.Tab("🚀 Submit here!"):
with gr.Group():
with gr.Row():
model_name = gr.Textbox(max_lines=1, placeholder="Enter model name...", show_label=False, scale=4)
submit = gr.Button("Submit", variant="primary", scale=0)
output = gr.Markdown("Enter a public HF repo id, then hit Submit to add it to the evaluation queue.")
submit.click(fn=submit_model, inputs=model_name, outputs=output)
demo.launch(show_api=False, allowed_paths=["data.csv"]) |