File size: 5,112 Bytes
c8763bd
134a499
ab5f5f1
76b423c
 
 
003f467
5345cba
 
 
 
76b423c
 
 
0f1bf97
c8763bd
003f467
b3a1bf0
591a3e4
c8763bd
 
6f3a090
ab5f5f1
 
 
003f467
 
504caea
 
 
 
 
 
003f467
ab5f5f1
 
 
003f467
ab5f5f1
 
 
 
 
 
4f5bf6c
39105fc
ab5f5f1
 
003f467
ab5f5f1
4b40065
76b423c
 
 
39105fc
003f467
 
76b423c
 
 
 
 
 
 
 
 
 
2460b35
ab5f5f1
 
 
 
 
003f467
ab5f5f1
 
 
 
 
 
4f5bf6c
0232cf1
a8a6326
0232cf1
ab5f5f1
 
 
76b423c
 
 
 
d19e350
a8a6326
 
 
 
003f467
0232cf1
a8a6326
0232cf1
a8a6326
 
 
 
d19e350
003f467
a8a6326
ab5f5f1
d19e350
 
 
ab5f5f1
d19e350
 
134a499
 
d19e350
ab5f5f1
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
import gradio as gr

from src.assets import custom_css

# from src.attention import create_attn_plots
from src.content import ABOUT, CITATION_BUTTON, CITATION_BUTTON_LABEL, LOGO, TITLE
from src.hardware import load_hardware_configs
from src.leaderboard import create_leaderboard_table
from src.llm_perf import get_llm_perf_df
from src.map import create_lat_score_mem_plot
from src.panel import (
    create_control_callback,
    create_control_panel,
    create_select_callback,
)

configs = load_hardware_configs("hardware.yml")


demo = gr.Blocks(css=custom_css)
with demo:
    gr.HTML(LOGO, elem_classes="logo")
    gr.HTML(TITLE, elem_classes="title")
    ####################### HARDWARE TABS #######################
    with gr.Tabs(elem_classes="tabs"):
        for id, config in enumerate(configs):
            with gr.TabItem(config.description, id=id):
                ####################### HARDWARE DETAILS #######################
                if config.detail:
                    gr.Markdown(config.detail, elem_classes="descriptive-text")



                # ####################### CONTROL PANEL #######################
                (
                    filter_button,
                    machine_textbox,
                    subsets_values,
                    score_slider,
                    memory_slider,
                    backend_checkboxes,
                    datatype_checkboxes,
                    optimization_checkboxes,
                    quantization_checkboxes,
                    kernels_checkboxes,
                ) = create_control_panel(machine=config.machine, subsets=config.subsets, hardware_provider=config.hardware_provider)
                ####################### HARDWARE SUBTABS #######################
                with gr.Tabs(elem_classes="subtabs"):
                    open_llm_perf_df = get_llm_perf_df(machine=config.machine, subsets=config.subsets)
                    ####################### LEADERBOARD TAB #######################
                    with gr.TabItem("Leaderboard πŸ…", id=0):
                        search_bar, columns_checkboxes, leaderboard_table = (
                            create_leaderboard_table(open_llm_perf_df)
                        )
                    if config.hardware_provider != "intel": # TODO intel CPU does not measure the memory requirements correctly, so disable the graph feature until we fix the underlying issue
                        with gr.TabItem("Find Your Best Model 🧭", id=1):
                            lat_score_mem_plot = create_lat_score_mem_plot(open_llm_perf_df)
                    ###################### ATTENTIONS SPEEDUP TAB #######################
                    # with gr.TabItem("Attention πŸ“ˆ", id=2):
                    #     attn_prefill_plot, attn_decode_plot = create_attn_plots(
                    #         open_llm_perf_df
                    #     )
                    # ####################### KERNELS SPEEDUP TAB #######################
                    # with gr.TabItem("Kernels πŸ“ˆ", id=4):
                    #     quant_krnl_prefill_plot, quant_krnl_decode_plot = (
                    #         create_quant_krnl_plots(llm_perf_df)
                    #     )

                ####################### CONTROL CALLBACK #######################
                create_control_callback(
                    filter_button,
                    # inputs
                    machine_textbox,
                    subsets_values,
                    score_slider,
                    memory_slider,
                    backend_checkboxes,
                    datatype_checkboxes,
                    optimization_checkboxes,
                    quantization_checkboxes,
                    kernels_checkboxes,
                    # interactive
                    columns_checkboxes,
                    search_bar,
                    # outputs
                    leaderboard_table,
                    lat_score_mem_plot,
                    # attn_prefill_plot,
                    # attn_decode_plot,
                    # quant_krnl_prefill_plot,
                    # quant_krnl_decode_plot,
                )

                create_select_callback(
                    # inputs
                    machine_textbox,
                    subsets_values,
                    # interactive
                    columns_checkboxes,
                    search_bar,
                    # outputs
                    leaderboard_table,
                )

        ####################### ABOUT TAB #######################
        with gr.TabItem("About πŸ“–", id=len(configs)):
            gr.Markdown(ABOUT, elem_classes="descriptive-text")
    ####################### CITATION
    with gr.Row():
        with gr.Accordion("πŸ“™ Citation", open=False):
            citation_button = gr.Textbox(
                value=CITATION_BUTTON,
                label=CITATION_BUTTON_LABEL,
                elem_id="citation-button",
                show_copy_button=True,
            )

if __name__ == "__main__":
    # Launch demo
    demo.queue().launch()