import os import gradio as gr from src.control_panel import create_control_panel, create_control_callback, create_select_callback from src.latency_score_memory import create_lat_score_mem_plot from src.quantization_kernels import create_quant_plots from src.leaderboard import create_leaderboard_table from src.bettertransformer import create_bt_plots from src.flashattentionv2 import create_fa2_plots from src.llm_perf import get_llm_perf_df from src.assets import custom_css from src.content import ( LOGO, TITLE, ABOUT, CITATION_BUTTON, CITATION_BUTTON_LABEL, ) MACHINE_TO_HARDWARE = {"hf-dgx-01": "A100-80GB-275W 🖥️", "audace": "RTX4090-24GB-450W 💻"} HF_TOKEN = os.environ.get("HF_TOKEN", None) demo = gr.Blocks(css=custom_css) with demo: gr.HTML(LOGO, elem_classes="logo") gr.HTML(TITLE, elem_classes="title") ####################### HARDWARE TABS ####################### with gr.Tabs(elem_classes="tabs"): for id, (machine, hardware) in enumerate(MACHINE_TO_HARDWARE.items()): with gr.TabItem(hardware, id=id): ####################### CONTROL PANEL ####################### ( filter_button, machine_textbox, score_slider, memory_slider, backend_checkboxes, datatype_checkboxes, optimization_checkboxes, quantization_checkboxes, ) = create_control_panel() ####################### HARDWARE SUBTABS ####################### with gr.Tabs(elem_classes="subtabs"): llm_perf_df = get_llm_perf_df(machine=machine) ####################### LEADERBOARD TAB ####################### with gr.TabItem("Leaderboard 🏅", id=0): search_bar, columns_checkboxes, leaderboard_table = create_leaderboard_table(llm_perf_df) with gr.TabItem("Find Your Best Model 🧭", id=1): lat_score_mem_plot = create_lat_score_mem_plot(llm_perf_df) ####################### BETTERTRANSFORMER SPEEDUP TAB ####################### with gr.TabItem("ScaledDotProductAttention 📈", id=2): bt_prefill_plot, bt_decode_plot = create_bt_plots(llm_perf_df) with gr.TabItem("FlashAttentionV2 📈", id=3): fa2_prefill_plot, fa2_decode_plot = create_fa2_plots(llm_perf_df) with gr.TabItem("Quantization Kernels 📈", id=4): quant_prefill_plot, quant_decode_plot = create_quant_plots(llm_perf_df) ####################### CONTROL CALLBACK ####################### create_control_callback( filter_button, # inputs machine_textbox, score_slider, memory_slider, backend_checkboxes, datatype_checkboxes, optimization_checkboxes, quantization_checkboxes, # interactive columns_checkboxes, search_bar, # outputs leaderboard_table, lat_score_mem_plot, bt_prefill_plot, bt_decode_plot, fa2_prefill_plot, fa2_decode_plot, quant_prefill_plot, quant_decode_plot, ) create_select_callback( # inputs machine_textbox, # interactive columns_checkboxes, search_bar, # outputs leaderboard_table, ) ####################### ABOUT TAB ####################### with gr.TabItem("About 📖", id=3): gr.Markdown(ABOUT, elem_classes="descriptive-text") ####################### CITATION with gr.Row(): with gr.Accordion("📙 Citation", open=False): citation_button = gr.Textbox( value=CITATION_BUTTON, label=CITATION_BUTTON_LABEL, elem_id="citation-button", show_copy_button=True, ) if __name__ == "__main__": # Launch demo demo.queue().launch()