File size: 4,387 Bytes
c8763bd
134a499
c8763bd
134a499
ab5f5f1
 
 
 
2460b35
 
ab5f5f1
 
6f3a090
 
0f1bf97
ab5f5f1
 
 
 
0f1bf97
 
c8763bd
efc3d5b
9a3f7b4
ab5f5f1
b3a1bf0
 
c8763bd
 
6f3a090
ab5f5f1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4b40065
ab5f5f1
 
 
 
 
 
 
 
 
2460b35
 
 
ab5f5f1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2460b35
 
d19e350
 
 
ab5f5f1
 
 
d19e350
 
 
ab5f5f1
d19e350
 
134a499
 
d19e350
ab5f5f1
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
import os

import gradio as gr

from src.control_panel import create_control_panel, create_control_callback
from src.latency_score_memory import create_lat_score_mem_plot
from src.leaderboard import create_leaderboard_table
from src.bettertransformer import create_bt_plots
from src.flashattentionv2 import create_fa2_plots
from src.exllama import create_exllama_plots
from src.llm_perf import get_llm_perf_df
from src.assets import custom_css
from src.content import (
    LOGO,
    TITLE,
    ABOUT,
    INTRODUCTION,
    EXAMPLE_CONFIG,
    CITATION_BUTTON,
    CITATION_BUTTON_LABEL,
)


MACHINE_TO_HARDWARE = {"hf-dgx-01": "A100-80GB πŸ–₯️"}
HF_TOKEN = os.environ.get("HF_TOKEN", None)


demo = gr.Blocks(css=custom_css)
with demo:
    gr.HTML(LOGO, elem_classes="logo")
    gr.HTML(TITLE, elem_classes="title")
    gr.Markdown(INTRODUCTION, elem_classes="descriptive-text")
    ####################### HARDWARE TABS #######################
    with gr.Tabs(elem_classes="tabs"):
        for id, (machine, hardware) in enumerate(MACHINE_TO_HARDWARE.items()):
            with gr.TabItem(hardware, id=id):
                ####################### CONTROL PANEL #######################
                (
                    filter_button,
                    machine_textbox,
                    search_bar,
                    score_slider,
                    memory_slider,
                    backend_checkboxes,
                    datatype_checkboxes,
                    optimization_checkboxes,
                    quantization_checkboxes,
                ) = create_control_panel()
                ####################### HARDWARE SUBTABS #######################
                with gr.Tabs(elem_classes="subtabs"):
                    llm_perf_df = get_llm_perf_df(machine=machine)
                    ####################### LEADERBOARD TAB #######################
                    with gr.TabItem("Leaderboard πŸ…", id=0):
                        leaderboard_table = create_leaderboard_table(llm_perf_df)
                    ####################### LAT. vs. SCORE vs. MEM. TAB #######################
                    with gr.TabItem("Latency vs. Score vs. Memory πŸ“Š", id=1):
                        lat_score_mem_plot = create_lat_score_mem_plot(llm_perf_df)
                    ####################### BETTERTRANSFORMER SPEEDUP TAB #######################
                    with gr.TabItem("BetterTransformer Speedup πŸ“ˆ", id=2):
                        bt_prefill_plot, bt_decode_plot = create_bt_plots(llm_perf_df)
                    with gr.TabItem("FlashAttentionV2 Speedup πŸ“ˆ", id=3):
                        fa2_prefill_plot, fa2_decode_plot = create_fa2_plots(llm_perf_df)
                    with gr.TabItem("Exllama Speedup πŸ“ˆ", id=4):
                        exllama_prefill_plot, exllama_decode_plot = create_exllama_plots(llm_perf_df)

                ####################### CONTROL CALLBACK #######################
                create_control_callback(
                    filter_button,
                    # inputs
                    machine_textbox,
                    search_bar,
                    score_slider,
                    memory_slider,
                    backend_checkboxes,
                    datatype_checkboxes,
                    optimization_checkboxes,
                    quantization_checkboxes,
                    # outputs
                    leaderboard_table,
                    lat_score_mem_plot,
                    bt_prefill_plot,
                    bt_decode_plot,
                    fa2_prefill_plot,
                    fa2_decode_plot,
                    exllama_prefill_plot,
                    exllama_decode_plot,
                )
        ####################### ABOUT TAB #######################
        with gr.TabItem("About πŸ“–", id=3):
            gr.HTML(ABOUT, elem_classes="descriptive-text")
            gr.Markdown(EXAMPLE_CONFIG, elem_classes="descriptive-text")
    ####################### CITATION
    with gr.Row():
        with gr.Accordion("πŸ“™ Citation", open=False):
            citation_button = gr.Textbox(
                value=CITATION_BUTTON,
                label=CITATION_BUTTON_LABEL,
                elem_id="citation-button",
                show_copy_button=True,
            )

if __name__ == "__main__":
    # Launch demo
    demo.queue().launch()