|
import time |
|
from os import listdir |
|
from os.path import isfile, join |
|
import numpy as np |
|
import pandas as pd |
|
from collections import Counter |
|
import plotly.express as px |
|
from plotly import graph_objs as go |
|
import streamlit as st |
|
import plotly.figure_factory as ff |
|
import numpy as np |
|
from collections import Counter |
|
from streamlit_echarts import st_echarts |
|
import streamlit_toggle as tog |
|
|
|
print("Make sure to activate your VPN before running this script") |
|
|
|
st.set_page_config( |
|
page_title="ML Agility tracker", |
|
page_icon="β‘", |
|
layout="wide", |
|
) |
|
|
|
|
|
|
|
state = st.session_state |
|
if "INFO_CLOSED" not in state: |
|
state.INFO_CLOSED = False |
|
|
|
|
|
st.title("ML Agility Tracker β‘") |
|
|
|
|
|
colorway = [ |
|
"#5470c6", |
|
"#FF7F0E", |
|
"#94cc74", |
|
"#92cb75", |
|
"#fac858", |
|
"#ee6666", |
|
"#73c0de", |
|
"#3ba272", |
|
] |
|
|
|
|
|
st.markdown( |
|
"Machine Learning Agility (MLAgility) measures vendor progress towards providing this turnkey solution to their customers. For more details, please visit [mlagility.org](mlagility.org).", |
|
unsafe_allow_html=True, |
|
) |
|
|
|
|
|
def add_filter( |
|
data_frame_list, name, label, options=None, num_cols=1, last_is_others=True |
|
): |
|
|
|
|
|
all_options = set(data_frame_list[-1][label]) |
|
if "-" in all_options: |
|
all_options.remove("-") |
|
if len(all_options) == 0: |
|
return data_frame_list |
|
|
|
st.markdown(f"#### {name}") |
|
|
|
|
|
if options is None: |
|
options_dict = Counter(data_frame_list[-1][label]) |
|
sorted_options = sorted(options_dict, key=options_dict.get, reverse=True) |
|
if "-" in sorted_options: |
|
sorted_options.remove("-") |
|
if len(sorted_options) > 8: |
|
options = list(sorted_options[:7]) + ["others"] |
|
last_is_others = True |
|
else: |
|
options = list(sorted_options) |
|
last_is_others = False |
|
|
|
cols = st.columns(num_cols) |
|
instantiated_checkbox = [] |
|
for idx in range(len(options)): |
|
with cols[idx % num_cols]: |
|
instantiated_checkbox.append( |
|
st.checkbox(options[idx], False, key=f"{label}_{options[idx]}") |
|
) |
|
|
|
selected_options = [ |
|
options[idx] for idx, checked in enumerate(instantiated_checkbox) if checked |
|
] |
|
|
|
|
|
if instantiated_checkbox[-1] and last_is_others: |
|
selected_options = selected_options[:-1] |
|
other_options = [x for x in all_options if x not in options] |
|
selected_options = set(selected_options + other_options) |
|
|
|
if len(selected_options) > 0: |
|
for idx in range(len(data_frame_list)): |
|
data_frame_list[idx] = data_frame_list[idx][ |
|
[ |
|
any([x == model_entry for x in selected_options]) |
|
for model_entry in data_frame_list[idx][label] |
|
] |
|
] |
|
return data_frame_list |
|
|
|
|
|
def parameter_filter(data_frame_list): |
|
|
|
st.markdown(f"#### Parameters") |
|
|
|
start_params, end_params = st.select_slider( |
|
"Select a range parameters (in millions)", |
|
options=[str(x) for x in np.arange(0, 1001, 10, dtype=int)], |
|
value=("0", "1000"), |
|
) |
|
|
|
for idx in range(len(data_frame_list)): |
|
data_frame_list[idx] = data_frame_list[idx][ |
|
[ |
|
int(model_entry) >= int(start_params) * 1000000 |
|
and int(model_entry) <= int(end_params) * 1000000 |
|
for model_entry in data_frame_list[idx]["params"] |
|
] |
|
] |
|
|
|
return data_frame_list |
|
|
|
|
|
with st.sidebar: |
|
|
|
st.markdown("# Filters") |
|
|
|
selected_test_type = "mlagility" |
|
report_folder = "reports/mlagility" |
|
|
|
reports = sorted( |
|
[f for f in listdir(report_folder) if isfile(join(report_folder, f))] |
|
) |
|
|
|
selected_report = st.selectbox("Test date", reports, index=len(reports) - 1) |
|
selected_report_idx = reports.index(selected_report) |
|
prev_report = reports[max(0, selected_report_idx - 1)] |
|
mla_report = pd.read_csv(f"{report_folder}/{selected_report}") |
|
prev_mla_report = pd.read_csv(f"{report_folder}/{prev_report}") |
|
|
|
|
|
for p in ["chips_used", "cycles", "params"]: |
|
mla_report[p] = mla_report[p].replace("-", 0).astype("int64") |
|
prev_mla_report[p] = prev_mla_report[p].replace("-", 0).astype("int64") |
|
|
|
|
|
mla_report, prev_mla_report = parameter_filter([mla_report, prev_mla_report]) |
|
|
|
|
|
authors = ( |
|
[ |
|
"google", |
|
"apple", |
|
"facebook", |
|
"openai", |
|
"microsoft", |
|
"huggingface", |
|
"CompVis", |
|
"others", |
|
] |
|
if selected_test_type == "monthly" |
|
else None |
|
) |
|
mla_report, prev_mla_report = add_filter( |
|
[mla_report, prev_mla_report], |
|
"Authors", |
|
label="author", |
|
options=authors, |
|
num_cols=2, |
|
) |
|
|
|
|
|
tasks = [ |
|
"Image Classification", |
|
"Translation", |
|
"Image Segmentation", |
|
"Fill-Mask", |
|
"Text-to-Image", |
|
"Token Classification", |
|
"Sentence Similarity", |
|
"Audio Classification", |
|
"Question Answering", |
|
"Summarization", |
|
"other", |
|
] |
|
tasks = None |
|
mla_report, prev_mla_report = add_filter( |
|
[mla_report, prev_mla_report], "Tasks", label="task", options=tasks |
|
) |
|
|
|
|
|
def detailed_progress_list(df_new, df_old, filter=None): |
|
return |
|
""" |
|
if filter is not None: |
|
df_new = df_new[(df_new[filter] == True)] |
|
df_old = df_old[(df_old[filter] == True)] |
|
|
|
progress = df_new[~(df_new["hash"].isin(df_old["hash"]))].reset_index(drop=True) |
|
regression = df_old[~(df_old["hash"].isin(df_new["hash"]))].reset_index(drop=True) |
|
|
|
for model_name in progress["model_name"]: |
|
st.markdown( |
|
f'<span style="color:green">β {model_name}</span>', |
|
unsafe_allow_html=True, |
|
) |
|
for model_name in regression["model_name"]: |
|
st.markdown( |
|
f'<span style="color:red">β {model_name}</span>', |
|
unsafe_allow_html=True, |
|
) |
|
""" |
|
|
|
|
|
|
|
placeholder = st.empty() |
|
|
|
with placeholder.container(): |
|
|
|
st.markdown("## Summary Results") |
|
|
|
all_models = len(mla_report) |
|
base_onnx = np.sum(mla_report["base_onnx"]) |
|
optimized_onnx = np.sum(mla_report["optimized_onnx"]) |
|
all_ops_supported = np.sum(mla_report["all_ops_supported"]) |
|
fp16_onnx = np.sum(mla_report["fp16_onnx"]) |
|
compiles = np.sum(mla_report["compiles"]) |
|
assembles = np.sum(mla_report["assembles"]) |
|
|
|
|
|
|
|
|
|
all_authors = list(mla_report.loc[:, "author"]) |
|
try: |
|
all_sources = list(mla_report.loc[:, "model_type"]) |
|
except KeyError: |
|
all_sources = [] |
|
all_sources = [] |
|
author_count = {i: all_authors.count(i) for i in all_authors} |
|
sources_count = {i: all_sources.count(i) for i in all_sources} |
|
|
|
cols = st.columns(2) |
|
with cols[0]: |
|
st.markdown("""#### Workload origin""") |
|
|
|
options = { |
|
"darkMode": "true", |
|
"textStyle": {"fontSize": 16}, |
|
"tooltip": {"trigger": "item"}, |
|
"series": [ |
|
{ |
|
"name": "Access From", |
|
"type": "pie", |
|
"radius": [0, "30%"], |
|
"label": {"position": "inner", "fontSize": 14}, |
|
"labelLine": {"show": "false"}, |
|
"data": [ |
|
{"value": sources_count[k], "name": k} |
|
for k in sources_count.keys() |
|
], |
|
}, |
|
{ |
|
"name": "Name of corpus:", |
|
"type": "pie", |
|
"radius": ["70%", "70%"], |
|
"data": [ |
|
{"value": author_count[k], "name": k} |
|
for k in author_count.keys() |
|
], |
|
"label": { |
|
"formatter": "{b}\n{d}%", |
|
}, |
|
}, |
|
{ |
|
"name": "Name of corpus:", |
|
"type": "pie", |
|
"radius": ["50%", "70%"], |
|
"data": [ |
|
{"value": author_count[k], "name": k} |
|
for k in author_count.keys() |
|
], |
|
"emphasis": { |
|
"itemStyle": { |
|
"shadowBlur": 10, |
|
"shadowOffsetX": 0, |
|
"shadowColor": "rgba(0, 0, 0, 0.5)", |
|
} |
|
}, |
|
"label": { |
|
"position": "inner", |
|
"formatter": "{c}", |
|
"color": "black", |
|
"textBorderWidth": 0, |
|
}, |
|
}, |
|
{ |
|
|
|
"name": "Total number of models:", |
|
"type": "pie", |
|
"radius": ["0%", "0%"], |
|
"data": [{"value": all_models, "name": "Total"}], |
|
"silent": "true", |
|
"label": { |
|
"position": "inner", |
|
"formatter": "{c}", |
|
"color": "white", |
|
"fontSize": 30, |
|
"textBorderWidth": 0, |
|
}, |
|
}, |
|
], |
|
} |
|
st_echarts( |
|
options=options, |
|
height="400px", |
|
) |
|
|
|
with cols[1]: |
|
|
|
all_models = [float(x) / 1000000 for x in mla_report["params"] if x != "-"] |
|
|
|
hist_data = [] |
|
group_labels = [] |
|
if all_models != []: |
|
hist_data.append(all_models) |
|
group_labels.append("All models") |
|
|
|
st.markdown("""#### Parameter Size Distribution""") |
|
|
|
if hist_data != []: |
|
fig = ff.create_distplot( |
|
hist_data, |
|
group_labels, |
|
bin_size=25, |
|
histnorm="", |
|
colors=colorway, |
|
curve_type="normal", |
|
) |
|
fig.layout.update(xaxis_title="Parameters in millions") |
|
fig.layout.update(yaxis_title="count") |
|
fig.update_xaxes(range=[1, 1000]) |
|
st.plotly_chart(fig, use_container_width=True) |
|
|
|
else: |
|
st.markdown( |
|
"""At least one model needs to reach the compiler to show this graph π
""" |
|
) |
|
|
|
if "tsp_gpu_compute_ratio" in mla_report and "tsp_gpu_e2e_ratio" in mla_report: |
|
cols = st.columns(2) |
|
with cols[0]: |
|
|
|
st.markdown("""#### Benchmark results (latency)""") |
|
|
|
|
|
df = mla_report[ |
|
[ |
|
"model_name", |
|
"tsp_estimated_e2e_latency", |
|
"gpu_e2e_latency", |
|
] |
|
] |
|
df = df.sort_values(by=["model_name"]) |
|
df = df[(df.tsp_estimated_e2e_latency != "-")] |
|
df = df[(df.gpu_e2e_latency != "-")] |
|
df["tsp_estimated_e2e_latency"] = df["tsp_estimated_e2e_latency"].astype( |
|
float |
|
) |
|
df["gpu_e2e_latency"] = df["gpu_e2e_latency"].astype(float) |
|
|
|
if len(df) == 0 and assembles > 0: |
|
st.markdown( |
|
( |
|
"We do not have GPU numbers for the model(s) mapped to the GroqChip." |
|
" This is potentially due to lack of out-of-the-box TensorRT support." |
|
) |
|
) |
|
elif assembles == 0: |
|
st.markdown( |
|
"Nothing to show here since no models have been successfully assembled." |
|
) |
|
else: |
|
|
|
df["cpu_latency"] = ( |
|
df["tsp_estimated_e2e_latency"] + df["gpu_e2e_latency"] |
|
) * 10 |
|
df["tsp_cpu_compute_ratio"] = ( |
|
df["cpu_latency"] / df["tsp_estimated_e2e_latency"] |
|
) |
|
df["gpu_cpu_compute_ratio"] = df["cpu_latency"] / df["gpu_e2e_latency"] |
|
data = [ |
|
go.Bar( |
|
x=df["model_name"], |
|
y=df["gpu_cpu_compute_ratio"], |
|
name="NVIDIA A100-PCIE-40GB", |
|
), |
|
go.Bar( |
|
x=df["model_name"], |
|
y=df["tsp_cpu_compute_ratio"], |
|
name="GroqChip 1", |
|
), |
|
go.Bar( |
|
x=df["model_name"], |
|
y=df["cpu_latency"] * 0 + 1, |
|
name="Intel(R) Xeon(R) Gold 6338 CPU", |
|
), |
|
] |
|
|
|
layout = go.Layout( |
|
barmode="overlay", |
|
legend={ |
|
"orientation": "h", |
|
"xanchor": "center", |
|
"x": 0.5, |
|
"y": 1.2, |
|
}, |
|
yaxis_title="Latency Speedup", |
|
colorway=[colorway[2], colorway[1], colorway[0]], |
|
height=600, |
|
) |
|
|
|
fig = dict(data=data, layout=layout) |
|
st.plotly_chart(fig, use_container_width=True) |
|
|
|
st.markdown( |
|
"<sup>*</sup>Estimated I/O does NOT include delays caused by Groq's runtime.", |
|
unsafe_allow_html=True, |
|
) |
|
st.markdown( |
|
"<sup>β </sup>Baseline corresponds to Intel(R) Xeon(R) Gold 6338 CPU @ 2.00GHz.", |
|
unsafe_allow_html=True, |
|
) |
|
|
|
with cols[1]: |
|
|
|
st.markdown( |
|
f"""<br><br><br><br> |
|
<p style="font-family:sans-serif; font-size: 20px;text-align: center;">Intel(R) Xeon(R) Gold 6338 CPU @ 2.00GHz Acceleration:</p> |
|
<p style="font-family:sans-serif; color:{colorway[0]}; font-size: 26px;text-align: center;"> {1}x (Baseline)</p> |
|
<br><br> |
|
<p style="font-family:sans-serif; font-size: 20px;text-align: center;">NVIDIA A100-PCIE-40GB Acceleration:</p> |
|
<p style="font-family:sans-serif; color:{colorway[2]}; font-size: 26px;text-align: center;"> {round(df["gpu_cpu_compute_ratio"].mean(),2)}x</p> |
|
<p style="font-family:sans-serif; color:{colorway[2]}; font-size: 20px;text-align: center;"> min {round(df["gpu_cpu_compute_ratio"].min(),2)}x; max {round(df["gpu_cpu_compute_ratio"].max(),2)}x</p> |
|
<br><br> |
|
<p style="font-family:sans-serif; font-size: 20px;text-align: center;">GroqChip 1 Acceleration<sup>*</sup>:</p> |
|
<p style="font-family:sans-serif; color:{colorway[1]}; font-size: 26px;text-align: center;"> {round(df["tsp_cpu_compute_ratio"].mean(),2)}x</p> |
|
<p style="font-family:sans-serif; color:{colorway[1]}; font-size: 20px;text-align: center;"> min {round(df["tsp_cpu_compute_ratio"].min(),2)}x; max {round(df["tsp_cpu_compute_ratio"].max(),2)}x</p>""", |
|
unsafe_allow_html=True, |
|
) |
|
|
|
|
|
cols = st.columns(2) |
|
with cols[0]: |
|
|
|
st.markdown( |
|
"""<style> |
|
.big-font { |
|
font-size:20px !important; |
|
} |
|
</style> |
|
""", |
|
unsafe_allow_html=True, |
|
) |
|
|
|
class Collapsable: |
|
def __init__(self, preamble="", epilogue=""): |
|
self.preamble = preamble |
|
self.epilogue = epilogue |
|
self.sections = [] |
|
|
|
def add_section(self, heading, text): |
|
self.sections.append((heading, text)) |
|
|
|
def deploy(self): |
|
small_font = 18 |
|
large_font = 18 |
|
secs = "".join( |
|
[ |
|
( |
|
f"<details><summary style='font-size:{large_font}px;'>{heading}</summary>" |
|
f"<blockquote><details><summary style='font-size:{small_font}px;max-width: 80%;'>{text}</summary>" |
|
f"<blockquote></blockquote></details></blockquote></details>" |
|
) |
|
for heading, text in self.sections |
|
] |
|
) |
|
collapsable_sec = f""" |
|
<ol> |
|
{self.preamble} |
|
{secs} |
|
{self.epilogue} |
|
</ol> |
|
""" |
|
st.markdown(collapsable_sec, unsafe_allow_html=True) |
|
|
|
st.markdown("""## About this workload analysis (FAQ)""") |
|
faq = Collapsable() |
|
faq.add_section( |
|
"Model selection", |
|
'The models that are part of the "ML Agility" set are models that have been internally selected and represent a mix between popular open-source models and models that Groq has historically focused some efforts on (like GNNs).', |
|
) |
|
faq.add_section( |
|
"Experimental Setup", |
|
"-", |
|
) |
|
faq.add_section( |
|
"Key limitations", |
|
"This set of workloads does not include models with more than 1B parametes.", |
|
) |
|
|
|
faq.deploy() |
|
st.markdown( |
|
"For more details, please visit [mlagility.org](mlagility.org).", |
|
unsafe_allow_html=True, |
|
) |
|
|
|
st.markdown("## Detailed Data View") |
|
|
|
model_name = st.text_input("", placeholder="Filter model by name") |
|
if model_name != "": |
|
mla_report = mla_report[[model_name in x for x in mla_report["model_name"]]] |
|
|
|
|
|
mla_report["chips_used_gpu"] = 1 |
|
mla_report["cpu_latency"] = 0 |
|
mla_report["chips_used_cpu"] = 0 |
|
|
|
|
|
mla_report["tsp_estimated_e2e_latency"] = [ |
|
"-" if x == "-" else "{:.3f}".format(float(x)) |
|
for x in mla_report["tsp_estimated_e2e_latency"] |
|
] |
|
mla_report["gpu_e2e_latency"] = [ |
|
"-" if x == "-" else "{:.3f}".format(float(x)) |
|
for x in mla_report["gpu_e2e_latency"] |
|
] |
|
|
|
renamed_cols = { |
|
"model_name": "Model Name", |
|
"author": "Source", |
|
"params": "Parameters", |
|
"model_type": "Framework", |
|
"tsp_estimated_e2e_latency": "GroqChip 1: Latency (ms)", |
|
"gpu_e2e_latency": "NVIDIA A100-PCIE-40GB: Latency (ms)", |
|
"cpu_latency": "Intel(R) Xeon(R) Gold 6338 CPU: Latency (ms)", |
|
"chips_used": "GroqChip 1: Chips Used", |
|
"chips_used_gpu": "NVIDIA A100-PCIE-40GB: Chips Used", |
|
"chips_used_cpu": "Intel(R) Xeon(R) Gold 6338 CPU: Chips Used", |
|
} |
|
mla_report.rename(columns=renamed_cols, inplace=True) |
|
selected_cols = renamed_cols.values() |
|
|
|
st.dataframe( |
|
mla_report[selected_cols], |
|
height=min((len(mla_report) + 1) * 35, 35 * 21), |
|
use_container_width=True, |
|
) |
|
|