"""A gradio app that renders a static leaderboard. This is used for Hugging Face Space.""" import ast import argparse import glob import pickle import gradio as gr import numpy as np import pandas as pd import os from collections import defaultdict from matplotlib.colors import LinearSegmentedColormap def make_default_md(): leaderboard_md = f""" # 🏆 Babilong Leaderboard | [GitHub](https://github.com/booydar/recurrent-memory-transformer/) | [Paper](https://arxiv.org/abs/2402.10790) | [Dataset](https://github.com/booydar/babilong/) | """ return leaderboard_md def make_arena_leaderboard_md(total_models): leaderboard_md = f"""Total #models: **{total_models}**. Last updated: Mar 29, 2024.""" return leaderboard_md def make_model_desc_md(f_len): desc_md = make_arena_leaderboard_md(f_len) models = next(os.walk('info'))[2] for model in models: model_name = model.split('.md')[0] with open(os.path.join('info', model), 'r') as f: description = f.read() desc_md += f"\n\n### {model_name}\n{description}" return desc_md def model_hyperlink(model_name, link): return f'{model_name}' def load_model(folders, tab_name, msg_lengths): results = defaultdict(list) class NA(): def __repr__(self) -> str: return '-' def __float__(self): return 0.0 mean_score = [] for i, folder in enumerate(folders): model_name = folder.split('/')[-1] results['Rank'].append(i) results['Model'].append(model_name) for task in msg_lengths: if not os.path.isfile(f'{folder}/{tab_name}/{task}.csv'): results[msg_lengths[task]].append(NA()) else: df = pd.read_csv(f'{folder}/{tab_name}/{task}.csv') results[msg_lengths[task]].append(int(df['result'].sum() / len(df) * 100)) mean_score.append(-np.mean([float(results[msg_lengths[task]][i]) for task in list(msg_lengths.keys())[:5]])) for rank, i in enumerate(np.argsort(mean_score)): results['Rank'][i] = rank + 1 return pd.DataFrame(results).sort_values(['Rank']) def build_leaderboard_tab(folders): default_md = make_default_md() md_1 = gr.Markdown(default_md, elem_id="leaderboard_markdown") msg_lengths = { '0': '0k', '4000': '4k', '8000': '8k', '16000': '16k', '32000': '32k', '64000': '64k', '128000': '128k', '500000': '500k', '1000000': '1M', '10000000': '10M' } with gr.Tabs() as tabs: for tab_id, tab_name in enumerate(['qa1', 'qa2', 'qa3', 'qa4', 'qa5']): df = load_model(folders, tab_name, msg_lengths) cmap = LinearSegmentedColormap.from_list('ryg', ["red", "yellow", "green"], N=256) df = df.style.background_gradient(cmap=cmap, vmin=0, vmax=100, subset=list(msg_lengths.values())) # arena table with gr.Tab(tab_name, id=tab_id): md = make_arena_leaderboard_md(len(folders)) gr.Markdown(md, elem_id="leaderboard_markdown") gr.Dataframe( headers=[ "Rank", "🤖 Model", ] + list(msg_lengths.values()), datatype=[ "str", "markdown", "str", "str", "str", "str", "str", "str", "str", ], value=df, elem_id="arena_leaderboard_dataframe", height=700, column_widths=[50, 200] + [100] * len(msg_lengths), wrap=True, ) with gr.Tab("Model description", id=tab_id + 1): desc_md = make_model_desc_md(len(folders)) gr.Markdown(desc_md, elem_id="leaderboard_markdown") return [md_1] block_css = """ #notice_markdown { font-size: 104% } #notice_markdown th { display: none; } #notice_markdown td { padding-top: 6px; padding-bottom: 6px; } #leaderboard_markdown { font-size: 104% } #leaderboard_markdown td { padding-top: 6px; padding-bottom: 6px; } #leaderboard_dataframe td { line-height: 0.1em; } footer { display:none !important } .image-container { display: flex; align-items: center; padding: 1px; } .image-container img { margin: 0 30px; height: 20px; max-height: 100%; width: auto; max-width: 20%; } """ def build_demo(folders): text_size = gr.themes.sizes.text_lg with gr.Blocks( title="Babilong leaderboard", theme=gr.themes.Base(text_size=text_size), css=block_css, ) as demo: leader_components = build_leaderboard_tab(folders) return demo if __name__ == "__main__": folders = [f'results/{folders}' for folders in os.listdir('results')] demo = build_demo(folders) demo.launch(share=False)