|
import gradio as gr |
|
|
|
from src.leaderboard import get_leaderboard_df |
|
from src.llm_perf import get_llm_perf_df |
|
|
|
|
|
|
|
from src.map import get_lat_score_mem_fig |
|
|
|
|
|
def create_control_panel(machine: str): |
|
|
|
machine_textbox = gr.Textbox(value=machine, visible=False) |
|
with gr.Accordion("Control Panel ποΈ", open=False, elem_id="control-panel"): |
|
with gr.Row(): |
|
with gr.Column(scale=2, variant="panel"): |
|
score_slider = gr.Slider( |
|
label="Open LLM Score (%) π", |
|
info="ποΈ Slide to minimum Open LLM score", |
|
value=0, |
|
elem_id="threshold-slider", |
|
) |
|
with gr.Column(scale=2, variant="panel"): |
|
memory_slider = gr.Slider( |
|
label="Peak Memory (MB) π", |
|
info="ποΈ Slide to maximum Peak Memory", |
|
minimum=0, |
|
maximum=80 * 1024, |
|
value=80 * 1024, |
|
elem_id="memory-slider", |
|
) |
|
with gr.Column(scale=1, variant="panel"): |
|
backend_checkboxes = gr.CheckboxGroup( |
|
label="Backends π", |
|
choices=["pytorch"], |
|
value=["pytorch"], |
|
info="βοΈ Select the backends", |
|
elem_id="backend-checkboxes", |
|
) |
|
with gr.Row(): |
|
with gr.Column(scale=1, variant="panel"): |
|
datatype_checkboxes = gr.CheckboxGroup( |
|
label="Precision π₯", |
|
choices=["float32", "float16", "bfloat16"], |
|
value=["float32", "float16", "bfloat16"], |
|
info="βοΈ Select the load data types", |
|
elem_id="dtype-checkboxes", |
|
) |
|
with gr.Column(scale=1, variant="panel"): |
|
optimization_checkboxes = gr.CheckboxGroup( |
|
label="Attentions ποΈ", |
|
choices=["Eager", "SDPA", "FAv2"], |
|
value=["Eager", "SDPA", "FAv2"], |
|
info="βοΈ Select the optimization", |
|
elem_id="optimization-checkboxes", |
|
) |
|
with gr.Row(): |
|
with gr.Column(scale=1, variant="panel"): |
|
quantization_checkboxes = gr.CheckboxGroup( |
|
label="Quantizations ποΈ", |
|
choices=[ |
|
"Unquantized", |
|
"BnB.4bit", |
|
"BnB.8bit", |
|
"AWQ.4bit", |
|
"GPTQ.4bit", |
|
], |
|
value=[ |
|
"Unquantized", |
|
"BnB.4bit", |
|
"BnB.8bit", |
|
"AWQ.4bit", |
|
"GPTQ.4bit", |
|
], |
|
info="βοΈ Select the quantization schemes", |
|
elem_id="quantization-checkboxes", |
|
elem_classes="boxed-option", |
|
) |
|
with gr.Column(scale=1, variant="panel"): |
|
kernels_checkboxes = gr.CheckboxGroup( |
|
label="Kernels βοΈ", |
|
choices=[ |
|
"No Kernel", |
|
"GPTQ.ExllamaV1", |
|
"GPTQ.ExllamaV2", |
|
"AWQ.GEMM", |
|
"AWQ.GEMV", |
|
], |
|
value=[ |
|
"No Kernel", |
|
"GPTQ.ExllamaV1", |
|
"GPTQ.ExllamaV2", |
|
"AWQ.GEMM", |
|
"AWQ.GEMV", |
|
], |
|
info="βοΈ Select the custom kernels", |
|
elem_id="kernel-checkboxes", |
|
elem_classes="boxed-option", |
|
) |
|
with gr.Row(): |
|
filter_button = gr.Button( |
|
value="Filter π", |
|
elem_id="filter-button", |
|
elem_classes="boxed-option", |
|
) |
|
|
|
return ( |
|
filter_button, |
|
machine_textbox, |
|
score_slider, |
|
memory_slider, |
|
backend_checkboxes, |
|
datatype_checkboxes, |
|
optimization_checkboxes, |
|
quantization_checkboxes, |
|
kernels_checkboxes, |
|
) |
|
|
|
|
|
def filter_rows_fn( |
|
machine, |
|
|
|
score, |
|
memory, |
|
backends, |
|
precisions, |
|
attentions, |
|
quantizations, |
|
kernels, |
|
|
|
columns, |
|
search, |
|
): |
|
llm_perf_df = get_llm_perf_df(machine=machine) |
|
|
|
|
|
filtered_llm_perf_df = llm_perf_df[ |
|
llm_perf_df["Model π€"].str.contains(search, case=False) |
|
& llm_perf_df["Backend π"].isin(backends) |
|
& llm_perf_df["Precision π₯"].isin(precisions) |
|
& llm_perf_df["Attention ποΈ"].isin(attentions) |
|
& llm_perf_df["Quantization ποΈ"].isin(quantizations) |
|
& llm_perf_df["Kernel βοΈ"].isin(kernels) |
|
& (llm_perf_df["Open LLM Score (%)"] >= score) |
|
& (llm_perf_df["Memory (MB)"] <= memory) |
|
] |
|
selected_filtered_llm_perf_df = select_columns_fn( |
|
machine, columns, search, filtered_llm_perf_df |
|
) |
|
selected_filtered_lat_score_mem_fig = get_lat_score_mem_fig(filtered_llm_perf_df) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
return [ |
|
selected_filtered_llm_perf_df, |
|
selected_filtered_lat_score_mem_fig, |
|
|
|
|
|
|
|
|
|
|
|
|
|
] |
|
|
|
|
|
def create_control_callback( |
|
|
|
filter_button, |
|
|
|
machine_textbox, |
|
|
|
score_slider, |
|
memory_slider, |
|
backend_checkboxes, |
|
datatype_checkboxes, |
|
optimization_checkboxes, |
|
quantization_checkboxes, |
|
kernels_checkboxes, |
|
|
|
columns_checkboxes, |
|
search_bar, |
|
|
|
leaderboard_table, |
|
lat_score_mem_plot, |
|
|
|
|
|
|
|
|
|
|
|
|
|
): |
|
filter_button.click( |
|
fn=filter_rows_fn, |
|
inputs=[ |
|
|
|
machine_textbox, |
|
|
|
score_slider, |
|
memory_slider, |
|
backend_checkboxes, |
|
datatype_checkboxes, |
|
optimization_checkboxes, |
|
quantization_checkboxes, |
|
kernels_checkboxes, |
|
|
|
columns_checkboxes, |
|
search_bar, |
|
], |
|
outputs=[ |
|
leaderboard_table, |
|
lat_score_mem_plot, |
|
|
|
|
|
|
|
|
|
|
|
|
|
], |
|
) |
|
|
|
|
|
def select_columns_fn(machine, columns, search, llm_perf_df=None): |
|
if llm_perf_df is None: |
|
llm_perf_df = get_llm_perf_df(machine=machine) |
|
|
|
selected_leaderboard_df = get_leaderboard_df(llm_perf_df) |
|
selected_leaderboard_df = selected_leaderboard_df[ |
|
selected_leaderboard_df["Model π€"].str.contains(search, case=False) |
|
] |
|
selected_leaderboard_df = selected_leaderboard_df[columns] |
|
|
|
return selected_leaderboard_df |
|
|
|
|
|
def create_select_callback( |
|
|
|
machine_textbox, |
|
|
|
columns_checkboxes, |
|
search_bar, |
|
|
|
leaderboard_table, |
|
): |
|
columns_checkboxes.change( |
|
fn=select_columns_fn, |
|
inputs=[machine_textbox, columns_checkboxes, search_bar], |
|
outputs=[leaderboard_table], |
|
) |
|
search_bar.change( |
|
fn=select_columns_fn, |
|
inputs=[machine_textbox, columns_checkboxes, search_bar], |
|
outputs=[leaderboard_table], |
|
) |
|
|