from collections import Counter from streamlit_echarts import st_echarts # pylint: disable=import-error import numpy as np import pandas as pd import streamlit as st # pylint: disable=import-error import plotly.figure_factory as ff from plotly import graph_objs as go import plotly.express as px from statistics import median colors = { "blue": "#5470c6", "orange": "#FF7F0E", "green": "#94cc74", "saffron_mango": "#fac858", "red": "#ee6666", "light_blue": "#73c0de", "ocean_green": "#3ba272", } device_colors = { "x86": colors["blue"], "nvidia": colors["green"], "groq": colors["orange"], } class StageCount: def __init__(self, df: pd.DataFrame) -> None: self.all_models = len(df) self.base_onnx = int(np.sum(df["base_onnx"])) self.optimized_onnx = int(np.sum(df["optimized_onnx"])) self.all_ops_supported = int(np.sum(df["all_ops_supported"])) self.fp16_onnx = int(np.sum(df["fp16_onnx"])) self.compiles = int(np.sum(df["compiles"])) self.assembles = int(np.sum(df["assembles"])) class DeviceStageCount: def __init__(self, df: pd.DataFrame) -> None: self.all_models = len(df) self.base_onnx = int(np.sum(df["onnx_exported"])) self.optimized_onnx = int(np.sum(df["onnx_optimized"])) self.fp16_onnx = int(np.sum(df["onnx_converted"])) self.x86 = df.loc[df.x86_latency != "-", "x86_latency"].count() self.nvidia = df.loc[df.nvidia_latency != "-", "nvidia_latency"].count() self.groq = df.loc[ df.groq_estimated_latency != "-", "groq_estimated_latency" ].count() def stages_count_summary(current_df: pd.DataFrame, prev_df: pd.DataFrame) -> None: """ Show count of how many models compile, assemble, etc """ current = StageCount(current_df) prev = StageCount(prev_df) kpi = st.columns(7) kpi[0].metric( label="All models", value=current.all_models, delta=current.all_models - prev.all_models, ) kpi[1].metric( label="Converts to ONNX", value=current.base_onnx, delta=current.base_onnx - prev.base_onnx, ) kpi[2].metric( label="Optimizes ONNX file", value=current.optimized_onnx, delta=current.optimized_onnx - prev.optimized_onnx, ) kpi[3].metric( label="Supports all ops", value=current.all_ops_supported, delta=current.all_ops_supported - prev.all_ops_supported, ) kpi[4].metric( label="Converts to FP16", value=current.fp16_onnx, delta=current.fp16_onnx - prev.fp16_onnx, ) kpi[5].metric( label="Compiles", value=current.compiles, delta=current.compiles - prev.compiles, ) kpi[6].metric( label="Assembles", value=current.assembles, delta=current.assembles - prev.assembles, ) # Show Sankey graph with percentages sk_val = { "All models": "100%", "Converts to ONNX": str(int(100 * current.base_onnx / current.all_models)) + "%", "Optimizes ONNX file": str( int(100 * current.optimized_onnx / current.all_models) ) + "%", "Supports all ops": str( int(100 * current.all_ops_supported / current.all_models) ) + "%", "Converts to FP16": str(int(100 * current.fp16_onnx / current.all_models)) + "%", "Compiles": str(int(100 * current.compiles / current.all_models)) + "%", "Assembles": str(int(100 * current.assembles / current.all_models)) + "%", } option = { "series": { "type": "sankey", "animationDuration": 1, "top": "0%", "bottom": "20%", "left": "0%", "right": "13.5%", "darkMode": "true", "nodeWidth": 2, "textStyle": {"fontSize": 16}, "lineStyle": {"curveness": 0}, "layoutIterations": 0, "layout": "none", "emphasis": {"focus": "adjacency"}, "data": [ { "name": "All models", "value": sk_val["All models"], "itemStyle": {"color": "white", "borderColor": "white"}, }, { "name": "Converts to ONNX", "value": sk_val["Converts to ONNX"], "itemStyle": {"color": "white", "borderColor": "white"}, }, { "name": "Optimizes ONNX file", "value": sk_val["Optimizes ONNX file"], "itemStyle": {"color": "white", "borderColor": "white"}, }, { "name": "Supports all ops", "value": sk_val["Supports all ops"], "itemStyle": {"color": "white", "borderColor": "white"}, }, { "name": "Converts to FP16", "value": sk_val["Converts to FP16"], "itemStyle": {"color": "white", "borderColor": "white"}, }, { "name": "Compiles", "value": sk_val["Compiles"], "itemStyle": {"color": "white", "borderColor": "white"}, }, { "name": "Assembles", "value": sk_val["Assembles"], "itemStyle": {"color": "white", "borderColor": "white"}, }, ], "label": { "position": "insideTopLeft", "borderWidth": 0, "fontSize": 16, "color": "white", "textBorderWidth": 0, "formatter": "{c}", }, "links": [ { "source": "All models", "target": "Converts to ONNX", "value": current.base_onnx, }, { "source": "Converts to ONNX", "target": "Optimizes ONNX file", "value": current.optimized_onnx, }, { "source": "Optimizes ONNX file", "target": "Supports all ops", "value": current.all_ops_supported, }, { "source": "Supports all ops", "target": "Converts to FP16", "value": current.fp16_onnx, }, { "source": "Converts to FP16", "target": "Compiles", "value": current.compiles, }, { "source": "Compiles", "target": "Assembles", "value": current.assembles, }, ], } } st_echarts( options=option, height="50px", ) def workload_origin(df: pd.DataFrame) -> None: """ Show pie chart that groups models by author """ all_authors = list(df.loc[:, "author"]) author_count = {i: all_authors.count(i) for i in all_authors} all_models = len(df) options = { "darkMode": "true", "textStyle": {"fontSize": 16}, "tooltip": {"trigger": "item"}, "series": [ { # "Invisible" chart, used to show author labels "name": "Name of corpus:", "type": "pie", "radius": ["70%", "70%"], "data": [ {"value": author_count[k], "name": k} for k in author_count.keys() ], "label": { "formatter": "{b}\n{d}%", }, }, { # Actual graph where data is shown "name": "Name of corpus:", "type": "pie", "radius": ["50%", "70%"], "data": [ {"value": author_count[k], "name": k} for k in author_count.keys() ], "emphasis": { "itemStyle": { "shadowBlur": 10, "shadowOffsetX": 0, "shadowColor": "rgba(0, 0, 0, 0.5)", } }, "label": { "position": "inner", "formatter": "{c}", "color": "black", "textBorderWidth": 0, }, }, { # Show total number of models inside "name": "Total number of models:", "type": "pie", "radius": ["0%", "0%"], "data": [{"value": all_models, "name": "Total"}], "silent": "true", "label": { "position": "inner", "formatter": "{c}", "color": "white", "fontSize": 30, "textBorderWidth": 0, }, }, ], } st_echarts( options=options, height="400px", ) def parameter_histogram(df: pd.DataFrame, show_assembled=True) -> None: # Add parameters histogram all_models = [float(x) / 1000000 for x in df["params"] if x != "-"] hist_data = [] group_labels = [] if all_models != []: hist_data.append(all_models) if show_assembled: group_labels.append("Models we tried compiling") else: group_labels.append("All models") if show_assembled: assembled_models = df[ df["assembles"] == True # pylint: disable=singleton-comparison ] assembled_models = [ float(x) / 1000000 for x in assembled_models["params"] if x != "-" ] if assembled_models != []: hist_data.append(assembled_models) group_labels.append("Assembled models") if hist_data: fig = ff.create_distplot( hist_data, group_labels, bin_size=25, histnorm="", colors=list(colors.values()), curve_type="normal", ) fig.layout.update(xaxis_title="Parameters in millions") fig.layout.update(yaxis_title="count") fig.update_xaxes(range=[1, 1000]) st.plotly_chart(fig, use_container_width=True) else: st.markdown( """At least one model needs to reach the compiler to show this graph 😅""" ) def speedup_bar_chart_legacy(df: pd.DataFrame) -> None: """ This function will be removed when we start getting CPU numbers for the daily tests """ # Prepare data assembles = np.sum(df["assembles"]) df = df[["model_name", "groq_nvidia_compute_ratio", "groq_nvidia_e2e_ratio"]] df = df.sort_values(by=["model_name"]) df = df[(df.groq_nvidia_compute_ratio != "-")] df = df[(df.groq_nvidia_e2e_ratio != "-")] df["groq_nvidia_compute_ratio"] = df["groq_nvidia_compute_ratio"].astype(float) df["groq_nvidia_e2e_ratio"] = df["groq_nvidia_e2e_ratio"].astype(float) if len(df) == 0 and assembles > 0: st.markdown( ( "We do not have GPU numbers for the model(s) mapped to the GroqChip." " This is potentially due to lack of out-of-the-box TensorRT support." ) ) elif assembles == 0: st.markdown( "Nothing to show here since no models have been successfully assembled." ) else: data = [ go.Bar( x=df["model_name"], y=df["groq_nvidia_compute_ratio"], name="Compute only", ), go.Bar( x=df["model_name"], y=df["groq_nvidia_e2e_ratio"], name="Compute + estimated I/O", ), ] layout = go.Layout( barmode="overlay", yaxis_title="Speedup compared to A100 GPU", colorway=list(colors.values()), ) fig = dict(data=data, layout=layout) st.plotly_chart(fig, use_container_width=True) st.markdown( ( "*Estimated I/O does NOT include delays caused by Groq's runtime. " "See FAQ for details." ), unsafe_allow_html=True, ) def speedup_text_summary_legacy(df: pd.DataFrame) -> None: # pylint: disable=line-too-long """ This function will be removed when we start getting CPU numbers for the daily tests """ # Remove empty elements and convert to float df = df[(df.groq_nvidia_compute_ratio != "-")] df = df[(df.groq_nvidia_e2e_ratio != "-")] df["groq_nvidia_compute_ratio"] = df["groq_nvidia_compute_ratio"].astype(float) df["groq_nvidia_e2e_ratio"] = df["groq_nvidia_e2e_ratio"].astype(float) # Show stats st.markdown( f"""





Average speedup of GroqChipâ„¢ considering compute only:

{round(df["groq_nvidia_compute_ratio"].mean(),2)}x

min {round(df["groq_nvidia_compute_ratio"].min(),2)}x; median {round(median(df["groq_nvidia_compute_ratio"]),2)}x; max {round(df["groq_nvidia_compute_ratio"].max(),2)}x



Average speedup of GroqChipâ„¢ considering compute + estimated I/O*:

{round(df["groq_nvidia_e2e_ratio"].mean(),2)}x

min {round(df["groq_nvidia_e2e_ratio"].min(),2)}x; median {round(median(df["groq_nvidia_e2e_ratio"]),2)}x; max {round(df["groq_nvidia_e2e_ratio"].max(),2)}x

""", unsafe_allow_html=True, ) def process_latency_data(df, baseline): df = df[["model_name", "groq_estimated_latency", "nvidia_latency", "x86_latency"]] df = df.rename(columns={"groq_estimated_latency": "groq_latency"}) df = df.sort_values(by=["model_name"]) df.x86_latency.replace(["-"], [float("inf")], inplace=True) df.nvidia_latency.replace(["-"], [float("inf")], inplace=True) df.groq_latency.replace(["-"], [float("inf")], inplace=True) df["groq_latency"] = df["groq_latency"].astype(float) df["nvidia_latency"] = df["nvidia_latency"].astype(float) df["x86_latency"] = df["x86_latency"].astype(float) df["groq_compute_ratio"] = df[f"{baseline}_latency"] / df["groq_latency"] df["nvidia_compute_ratio"] = df[f"{baseline}_latency"] / df["nvidia_latency"] df["x86_compute_ratio"] = df[f"{baseline}_latency"] / df["x86_latency"] return df def speedup_bar_chart(df: pd.DataFrame, baseline) -> None: if len(df) == 0: st.markdown( ("Nothing to show here since no models have been successfully benchmarked.") ) else: df = process_latency_data(df, baseline) bar_chart = {} bar_chart["nvidia"] = go.Bar( x=df["model_name"], y=df["nvidia_compute_ratio"], name="NVIDIA A100", ) bar_chart["groq"] = go.Bar( x=df["model_name"], y=df["groq_compute_ratio"], name="GroqChip 1", ) bar_chart["x86"] = go.Bar( x=df["model_name"], y=df["x86_compute_ratio"], name="Intel(R) Xeon(R)", ) # Move baseline to the back of the plot plot_sequence = list(bar_chart.keys()) plot_sequence.insert(0, plot_sequence.pop(plot_sequence.index(baseline))) # Ensure that the baseline is the last bar data = [bar_chart[device_type] for device_type in plot_sequence] color_sequence = [device_colors[device_type] for device_type in plot_sequence] layout = go.Layout( barmode="overlay", # group legend={ "orientation": "h", "xanchor": "center", "x": 0.5, "y": 1.2, }, yaxis_title="Latency Speedup", colorway=color_sequence, height=500, ) fig = dict(data=data, layout=layout) st.plotly_chart(fig, use_container_width=True) st.markdown( "*Estimated I/O does NOT include delays caused by Groq's runtime.", unsafe_allow_html=True, ) def kpi_to_markdown( compute_ratio, device, num_baseline_models, is_baseline=False, color="blue" ): if is_baseline: title = f"""

Median {device} Acceleration ({len(compute_ratio)} models):

""" return ( title + f"""

{1}x (Baseline)

""" ) title = f"""

Median {device} Acceleration ({len(compute_ratio)}/{num_baseline_models} models):

""" if len(compute_ratio) > 0: kpi_min, kpi_median, kpi_max = ( round(compute_ratio.min(), 2), round(median(compute_ratio), 2), round(compute_ratio.max(), 2), ) else: kpi_min, kpi_median, kpi_max = 0, 0, 0 return ( title + f"""

{kpi_median}x

min {kpi_min}x; max {kpi_max}x

""" ) def speedup_text_summary(df: pd.DataFrame, baseline) -> None: df = process_latency_data(df, baseline) # Some latencies are "infinite" because they could not be calculated # To calculate statistics, we remove all elements of df where the baseline latency is inf df = df[(df[baseline + "_latency"] != float("inf"))] # Setting latencies that could not be calculated to infinity also causes some compute ratios to be zero # We remove those to avoid doing any calculations with infinite latencies x86_compute_ratio = df["x86_compute_ratio"].to_numpy() nvidia_compute_ratio = df["nvidia_compute_ratio"].to_numpy() groq_compute_ratio = df["groq_compute_ratio"].to_numpy() x86_compute_ratio = x86_compute_ratio[x86_compute_ratio != 0] nvidia_compute_ratio = nvidia_compute_ratio[nvidia_compute_ratio != 0] groq_compute_ratio = groq_compute_ratio[groq_compute_ratio != 0] num_baseline_models = len(df[f"{baseline}_compute_ratio"]) x86_text = kpi_to_markdown( x86_compute_ratio, device="Intel(R) Xeon(R) X40 CPU @ 2.00GHz", num_baseline_models=num_baseline_models, color="blue", is_baseline=baseline == "x86", ) groq_text = kpi_to_markdown( groq_compute_ratio, device="GroqChip 1 Estimated", num_baseline_models=num_baseline_models, color="orange", is_baseline=baseline == "groq", ) nvidia_text = kpi_to_markdown( nvidia_compute_ratio, device="NVIDIA A100-PCIE-40GB", num_baseline_models=num_baseline_models, color="green", is_baseline=baseline == "nvidia", ) cols = st.columns(3) with cols[0]: st.markdown(f"""{x86_text}""", unsafe_allow_html=True) with cols[1]: st.markdown(f"""{nvidia_text}""", unsafe_allow_html=True) with cols[2]: st.markdown(f"""{groq_text}""", unsafe_allow_html=True) def compiler_errors(df: pd.DataFrame) -> None: compiler_errors = df[df["compiler_error"] != "-"]["compiler_error"] compiler_errors = Counter(compiler_errors) if len(compiler_errors) > 0: compiler_errors = pd.DataFrame.from_dict( compiler_errors, orient="index" ).reset_index() compiler_errors = compiler_errors.set_axis( ["error", "count"], axis=1, inplace=False ) compiler_errors["error"] = [ce[:80] for ce in compiler_errors["error"]] fig = px.bar( compiler_errors, x="count", y="error", orientation="h", height=400, ) fig.update_traces(marker_color=colors["blue"]) st.plotly_chart(fig, use_container_width=True) else: st.markdown("""No compiler errors found :tada:""") def io_fraction(df: pd.DataFrame) -> None: fig = go.Figure() for chips in ["1", "2", "4", "8"]: tmp = df[[model_entry == chips for model_entry in df["groq_chips_used"]]] if len(tmp) == 0: continue tmp = tmp[[model_entry != "-" for model_entry in tmp["groq_compute_latency"]]] if len(tmp) == 0: continue tmp = tmp[[model_entry != "-" for model_entry in tmp["groq_latency"]]] if len(tmp) == 0: continue print(len(tmp)) compute_latency = tmp["groq_compute_latency"].astype("float") e2e_latency = tmp["groq_latency"].astype("float") io_fraction = 1 - compute_latency / e2e_latency if chips == "1": name = f"{chips} GroqChip ({len(tmp)} models)" else: name = f"{chips} GroqChips \n({len(tmp)} models)" fig.add_trace( go.Box( y=io_fraction, name=name, ) ) fig.layout.update(xaxis_title="Models compiled for X GroqChip Processors") fig.layout.update(yaxis_title="Estimated fraction of time (in %) spent on I/O") fig.layout.update(colorway=list(colors.values())) st.plotly_chart(fig, use_container_width=True) def results_table(df: pd.DataFrame): model_name = st.text_input("", placeholder="Filter model by name") if model_name != "": df = df[[model_name in x for x in df["Model Name"]]] st.dataframe(df, height=min((len(df) + 1) * 35, 35 * 21)) def device_funnel_metrics(num_models: int, num_total_models: int) -> str: """ Calculates the percentage between models and total_models Avoids ZeroDivisionError when dividend is zero """ models_message = f"{num_models} model" models_message = models_message + "s" if num_models != 1 else models_message percentage_message = "" if num_total_models > 0: model_ratio = num_models / num_total_models if model_ratio < 0.01 and model_ratio != 0: percentage_message = " - < 1%" else: percentage_message = f" - {int(100*num_models / num_total_models)}%" return f"{models_message}{percentage_message}" def device_funnel(df: pd.DataFrame) -> None: """ Show count of how many models compile, assemble, etc """ summ = DeviceStageCount(df) stages = [ "All models", "Export to ONNX", "Optimize ONNX file", "Convert to FP16", "Acquire Performance", ] cols = st.columns(len(stages)) for idx, stage in enumerate(stages): with cols[idx]: st.markdown(stage) # Show Sankey graph with percentages sk_val = { "All models": device_funnel_metrics(summ.all_models, summ.all_models), "Converts to ONNX": device_funnel_metrics(summ.base_onnx, summ.all_models), "Optimizes ONNX file": device_funnel_metrics( summ.optimized_onnx, summ.all_models ), "Converts to FP16": device_funnel_metrics(summ.fp16_onnx, summ.all_models), "Acquires Nvidia Perf": device_funnel_metrics(summ.nvidia, summ.all_models) + " (Nvidia)", "Acquires Groq Perf": device_funnel_metrics(summ.groq, summ.all_models) + " (Groq)", "Acquires x86 Perf": device_funnel_metrics(summ.x86, summ.all_models) + " (x86)", } # Calculate bar heights for each of the devices # Bar height is proportional to the number of models benchmarked by each device default_bar_size = 1 target_combined_height = max(default_bar_size, summ.fp16_onnx) device_bar_size = target_combined_height / 3 option = { "series": { "type": "sankey", "animationDuration": 1, "top": "0%", "bottom": "20%", "left": "0%", "right": "19%", "darkMode": "true", "nodeWidth": 2, "textStyle": {"fontSize": 16}, "nodeAlign": "left", "lineStyle": {"curveness": 0}, "layoutIterations": 0, "nodeGap": 12, "layout": "none", "emphasis": {"focus": "adjacency"}, "data": [ { "name": "All models", "value": sk_val["All models"], "itemStyle": {"color": "white", "borderColor": "white"}, }, { "name": "Converts to ONNX", "value": sk_val["Converts to ONNX"], "itemStyle": {"color": "white", "borderColor": "white"}, }, { "name": "Optimizes ONNX file", "value": sk_val["Optimizes ONNX file"], "itemStyle": {"color": "white", "borderColor": "white"}, }, { "name": "Converts to FP16", "value": sk_val["Converts to FP16"], "itemStyle": {"color": "white", "borderColor": "white"}, }, { "name": "Acquires Nvidia Perf", "value": sk_val["Acquires Nvidia Perf"], "itemStyle": { "color": device_colors["nvidia"], "borderColor": device_colors["nvidia"], }, }, { "name": "Acquires Groq Perf", "value": sk_val["Acquires Groq Perf"], "itemStyle": { "color": device_colors["groq"], "borderColor": device_colors["groq"], }, }, { "name": "Acquires x86 Perf", "value": sk_val["Acquires x86 Perf"], "itemStyle": { "color": device_colors["x86"], "borderColor": device_colors["x86"], }, }, ], "label": { "position": "insideTopLeft", "borderWidth": 0, "fontSize": 16, "color": "white", "textBorderWidth": 0, "formatter": "{c}", }, "links": [ { "source": "All models", "target": "Converts to ONNX", "value": max(default_bar_size, summ.all_models), }, { "source": "Converts to ONNX", "target": "Optimizes ONNX file", "value": max(default_bar_size, summ.optimized_onnx), }, { "source": "Optimizes ONNX file", "target": "Converts to FP16", "value": max(default_bar_size, summ.fp16_onnx), }, { "source": "Converts to FP16", "target": "Acquires Nvidia Perf", "value": device_bar_size, }, { "source": "Converts to FP16", "target": "Acquires Groq Perf", "value": device_bar_size, }, { "source": "Converts to FP16", "target": "Acquires x86 Perf", "value": device_bar_size, }, ], } } st_echarts( options=option, height="70px", )