Spaces:

Groq
/

mlagility

Runtime error

App Files Files Community

mlagility / app.py

danielhn

New layout

e1f2481 about 2 years ago

raw

history blame

20 kB

	import time # to simulate a real time data, time loop
	from os import listdir
	from os.path import isfile, join
	import numpy as np # np mean, np random
	import pandas as pd # read csv, df manipulation
	from collections import Counter
	import plotly.express as px # interactive charts
	from plotly import graph_objs as go
	import streamlit as st # 🎈 data web app development
	import plotly.figure_factory as ff
	import numpy as np
	from collections import Counter
	from streamlit_echarts import st_echarts
	import streamlit_toggle as tog

	print("Make sure to activate your VPN before running this script")

	st.set_page_config(
	page_title="ML Agility tracker",
	page_icon="⚡",
	layout="wide",
	)


	# Session State variables:
	state = st.session_state
	if "INFO_CLOSED" not in state:
	state.INFO_CLOSED = False

	# dashboard title
	st.title("ML Agility Tracker ⚡")

	# Custom chart colors (https://plotly.com/python/discrete-color/)
	colorway = [
	"#5470c6",
	"#FF7F0E",
	"#94cc74",
	"#92cb75",
	"#fac858",
	"#ee6666",
	"#73c0de",
	"#3ba272",
	]
	# colorway = ["#3366cc", "#FF7F0E"]

	st.markdown(
	"Machine Learning Agility (MLAgility) measures vendor progress towards providing this turnkey solution to their customers. For more details, please visit [mlagility.org](mlagility.org).",
	unsafe_allow_html=True,
	)


	def add_filter(
	data_frame_list, name, label, options=None, num_cols=1, last_is_others=True
	):

	# Get list of all options and return if no options are available
	all_options = set(data_frame_list[-1][label])
	if "-" in all_options:
	all_options.remove("-")
	if len(all_options) == 0:
	return data_frame_list

	st.markdown(f"#### {name}")

	# Create list of options if selectable options are not provided
	if options is None:
	options_dict = Counter(data_frame_list[-1][label])
	sorted_options = sorted(options_dict, key=options_dict.get, reverse=True)
	if "-" in sorted_options:
	sorted_options.remove("-")
	if len(sorted_options) > 8:
	options = list(sorted_options[:7]) + ["others"]
	last_is_others = True
	else:
	options = list(sorted_options)
	last_is_others = False

	cols = st.columns(num_cols)
	instantiated_checkbox = []
	for idx in range(len(options)):
	with cols[idx % num_cols]:
	instantiated_checkbox.append(
	st.checkbox(options[idx], False, key=f"{label}_{options[idx]}")
	)

	selected_options = [
	options[idx] for idx, checked in enumerate(instantiated_checkbox) if checked
	]

	# The last checkbox will always correspond to "other"
	if instantiated_checkbox[-1] and last_is_others:
	selected_options = selected_options[:-1]
	other_options = [x for x in all_options if x not in options]
	selected_options = set(selected_options + other_options)

	if len(selected_options) > 0:
	for idx in range(len(data_frame_list)):
	data_frame_list[idx] = data_frame_list[idx][
	[
	any([x == model_entry for x in selected_options])
	for model_entry in data_frame_list[idx][label]
	]
	]
	return data_frame_list


	def parameter_filter(data_frame_list):

	st.markdown(f"#### Parameters")

	start_params, end_params = st.select_slider(
	"Select a range parameters (in millions)",
	options=[str(x) for x in np.arange(0, 1001, 10, dtype=int)],
	value=("0", "1000"),
	)

	for idx in range(len(data_frame_list)):
	data_frame_list[idx] = data_frame_list[idx][
	[
	int(model_entry) >= int(start_params) * 1000000
	and int(model_entry) <= int(end_params) * 1000000
	for model_entry in data_frame_list[idx]["params"]
	]
	]

	return data_frame_list


	with st.sidebar:

	st.markdown("# Filters")

	selected_test_type = "mlagility"
	report_folder = "reports/mlagility"
	# Get ML Agility reports
	reports = sorted(
	[f for f in listdir(report_folder) if isfile(join(report_folder, f))]
	)

	selected_report = st.selectbox("Test date", reports, index=len(reports) - 1)
	selected_report_idx = reports.index(selected_report)
	prev_report = reports[max(0, selected_report_idx - 1)]
	mla_report = pd.read_csv(f"{report_folder}/{selected_report}")
	prev_mla_report = pd.read_csv(f"{report_folder}/{prev_report}")

	# Convert int parameters to int/float
	for p in ["chips_used", "cycles", "params"]:
	mla_report[p] = mla_report[p].replace("-", 0).astype("int64")
	prev_mla_report[p] = prev_mla_report[p].replace("-", 0).astype("int64")

	# Parameter filter
	mla_report, prev_mla_report = parameter_filter([mla_report, prev_mla_report])

	# Add author filter
	authors = (
	[
	"google",
	"apple",
	"facebook",
	"openai",
	"microsoft",
	"huggingface",
	"CompVis",
	"others",
	]
	if selected_test_type == "monthly"
	else None
	)
	mla_report, prev_mla_report = add_filter(
	[mla_report, prev_mla_report],
	"Authors",
	label="author",
	options=authors,
	num_cols=2,
	)

	# Add task filter
	tasks = [
	"Image Classification",
	"Translation",
	"Image Segmentation",
	"Fill-Mask",
	"Text-to-Image",
	"Token Classification",
	"Sentence Similarity",
	"Audio Classification",
	"Question Answering",
	"Summarization",
	"other",
	]
	tasks = None
	mla_report, prev_mla_report = add_filter(
	[mla_report, prev_mla_report], "Tasks", label="task", options=tasks
	)


	def detailed_progress_list(df_new, df_old, filter=None):
	return
	"""
	if filter is not None:
	df_new = df_new[(df_new[filter] == True)]
	df_old = df_old[(df_old[filter] == True)]

	progress = df_new[~(df_new["hash"].isin(df_old["hash"]))].reset_index(drop=True)
	regression = df_old[~(df_old["hash"].isin(df_new["hash"]))].reset_index(drop=True)

	for model_name in progress["model_name"]:
	st.markdown(
	f'<span style="color:green">↑ {model_name}</span>',
	unsafe_allow_html=True,
	)
	for model_name in regression["model_name"]:
	st.markdown(
	f'<span style="color:red">↓ {model_name}</span>',
	unsafe_allow_html=True,
	)
	"""


	# creating a single-element container
	placeholder = st.empty()

	with placeholder.container():

	st.markdown("## Summary Results")

	all_models = len(mla_report)
	base_onnx = np.sum(mla_report["base_onnx"])
	optimized_onnx = np.sum(mla_report["optimized_onnx"])
	all_ops_supported = np.sum(mla_report["all_ops_supported"])
	fp16_onnx = np.sum(mla_report["fp16_onnx"])
	compiles = np.sum(mla_report["compiles"])
	assembles = np.sum(mla_report["assembles"])

	# Pie chart for showing origin of models
	# based on https://echarts.apache.org/examples/en/editor.html?c=pie-simple

	all_authors = list(mla_report.loc[:, "author"])
	try:
	all_sources = list(mla_report.loc[:, "model_type"])
	except KeyError:
	all_sources = []
	all_sources = []
	author_count = {i: all_authors.count(i) for i in all_authors}
	sources_count = {i: all_sources.count(i) for i in all_sources}

	cols = st.columns(2)
	with cols[0]:
	st.markdown("""#### Workload origin""")

	options = {
	"darkMode": "true",
	"textStyle": {"fontSize": 16},
	"tooltip": {"trigger": "item"},
	"series": [
	{
	"name": "Access From",
	"type": "pie",
	"radius": [0, "30%"],
	"label": {"position": "inner", "fontSize": 14},
	"labelLine": {"show": "false"},
	"data": [
	{"value": sources_count[k], "name": k}
	for k in sources_count.keys()
	],
	},
	{
	"name": "Name of corpus:",
	"type": "pie",
	"radius": ["70%", "70%"],
	"data": [
	{"value": author_count[k], "name": k}
	for k in author_count.keys()
	],
	"label": {
	"formatter": "{b}\n{d}%",
	},
	},
	{
	"name": "Name of corpus:",
	"type": "pie",
	"radius": ["50%", "70%"],
	"data": [
	{"value": author_count[k], "name": k}
	for k in author_count.keys()
	],
	"emphasis": {
	"itemStyle": {
	"shadowBlur": 10,
	"shadowOffsetX": 0,
	"shadowColor": "rgba(0, 0, 0, 0.5)",
	}
	},
	"label": {
	"position": "inner",
	"formatter": "{c}",
	"color": "black",
	"textBorderWidth": 0,
	},
	},
	{
	# Show total number of models inside
	"name": "Total number of models:",
	"type": "pie",
	"radius": ["0%", "0%"],
	"data": [{"value": all_models, "name": "Total"}],
	"silent": "true",
	"label": {
	"position": "inner",
	"formatter": "{c}",
	"color": "white",
	"fontSize": 30,
	"textBorderWidth": 0,
	},
	},
	],
	}
	st_echarts(
	options=options,
	height="400px",
	)

	with cols[1]:
	# Add parameters histogram
	all_models = [float(x) / 1000000 for x in mla_report["params"] if x != "-"]

	hist_data = []
	group_labels = []
	if all_models != []:
	hist_data.append(all_models)
	group_labels.append("All models")

	st.markdown("""#### Parameter Size Distribution""")

	if hist_data != []:
	fig = ff.create_distplot(
	hist_data,
	group_labels,
	bin_size=25,
	histnorm="",
	colors=colorway,
	curve_type="normal",
	)
	fig.layout.update(xaxis_title="Parameters in millions")
	fig.layout.update(yaxis_title="count")
	fig.update_xaxes(range=[1, 1000])
	st.plotly_chart(fig, use_container_width=True)

	else:
	st.markdown(
	"""At least one model needs to reach the compiler to show this graph 😅"""
	)

	if "tsp_gpu_compute_ratio" in mla_report and "tsp_gpu_e2e_ratio" in mla_report:
	cols = st.columns(2)
	with cols[0]:
	# GPU Acceleration plot
	st.markdown("""#### Benchmark results (latency)""")

	# Prepare data
	df = mla_report[
	[
	"model_name",
	"tsp_estimated_e2e_latency",
	"gpu_e2e_latency",
	]
	]
	df = df.sort_values(by=["model_name"])
	df = df[(df.tsp_estimated_e2e_latency != "-")]
	df = df[(df.gpu_e2e_latency != "-")]
	df["tsp_estimated_e2e_latency"] = df["tsp_estimated_e2e_latency"].astype(
	float
	)
	df["gpu_e2e_latency"] = df["gpu_e2e_latency"].astype(float)

	if len(df) == 0 and assembles > 0:
	st.markdown(
	(
	"We do not have GPU numbers for the model(s) mapped to the GroqChip."
	" This is potentially due to lack of out-of-the-box TensorRT support."
	)
	)
	elif assembles == 0:
	st.markdown(
	"Nothing to show here since no models have been successfully assembled."
	)
	else:
	# Coming up with artificial data for now
	df["cpu_latency"] = (
	df["tsp_estimated_e2e_latency"] + df["gpu_e2e_latency"]
	) * 10
	df["tsp_cpu_compute_ratio"] = (
	df["cpu_latency"] / df["tsp_estimated_e2e_latency"]
	)
	df["gpu_cpu_compute_ratio"] = df["cpu_latency"] / df["gpu_e2e_latency"]
	data = [
	go.Bar(
	x=df["model_name"],
	y=df["gpu_cpu_compute_ratio"],
	name="NVIDIA A100-PCIE-40GB",
	),
	go.Bar(
	x=df["model_name"],
	y=df["tsp_cpu_compute_ratio"],
	name="GroqChip 1",
	),
	go.Bar(
	x=df["model_name"],
	y=df["cpu_latency"] * 0 + 1,
	name="Intel(R) Xeon(R) Gold 6338 CPU",
	),
	]

	layout = go.Layout(
	barmode="overlay", # group
	legend={
	"orientation": "h",
	"xanchor": "center",
	"x": 0.5,
	"y": 1.2,
	},
	yaxis_title="Latency Speedup",
	colorway=[colorway[2], colorway[1], colorway[0]],
	height=600,
	)

	fig = dict(data=data, layout=layout)
	st.plotly_chart(fig, use_container_width=True)

	st.markdown(
	"<sup>*</sup>Estimated I/O does NOT include delays caused by Groq's runtime.",
	unsafe_allow_html=True,
	)
	st.markdown(
	"<sup>†</sup>Baseline corresponds to Intel(R) Xeon(R) Gold 6338 CPU @ 2.00GHz.",
	unsafe_allow_html=True,
	)

	with cols[1]:
	# Show stats
	st.markdown(
	f"""<br><br><br><br>
	<p style="font-family:sans-serif; font-size: 20px;text-align: center;">Intel(R) Xeon(R) Gold 6338 CPU @ 2.00GHz Acceleration:</p>
	<p style="font-family:sans-serif; color:{colorway[0]}; font-size: 26px;text-align: center;"> {1}x (Baseline)</p>
	<br><br>
	<p style="font-family:sans-serif; font-size: 20px;text-align: center;">NVIDIA A100-PCIE-40GB Acceleration:</p>
	<p style="font-family:sans-serif; color:{colorway[2]}; font-size: 26px;text-align: center;"> {round(df["gpu_cpu_compute_ratio"].mean(),2)}x</p>
	<p style="font-family:sans-serif; color:{colorway[2]}; font-size: 20px;text-align: center;"> min {round(df["gpu_cpu_compute_ratio"].min(),2)}x; max {round(df["gpu_cpu_compute_ratio"].max(),2)}x</p>
	<br><br>
	<p style="font-family:sans-serif; font-size: 20px;text-align: center;">GroqChip 1 Acceleration<sup>*</sup>:</p>
	<p style="font-family:sans-serif; color:{colorway[1]}; font-size: 26px;text-align: center;"> {round(df["tsp_cpu_compute_ratio"].mean(),2)}x</p>
	<p style="font-family:sans-serif; color:{colorway[1]}; font-size: 20px;text-align: center;"> min {round(df["tsp_cpu_compute_ratio"].min(),2)}x; max {round(df["tsp_cpu_compute_ratio"].max(),2)}x</p>""",
	unsafe_allow_html=True,
	)

	# FAQ Block
	cols = st.columns(2)
	with cols[0]:

	st.markdown(
	"""<style>
	.big-font {
	font-size:20px !important;
	}
	</style>
	""",
	unsafe_allow_html=True,
	)

	class Collapsable:
	def __init__(self, preamble="", epilogue=""):
	self.preamble = preamble
	self.epilogue = epilogue
	self.sections = []

	def add_section(self, heading, text):
	self.sections.append((heading, text))

	def deploy(self):
	small_font = 18
	large_font = 18
	secs = "".join(
	[
	(
	f"<details><summary style='font-size:{large_font}px;'>{heading}</summary>"
	f"<blockquote><details><summary style='font-size:{small_font}px;max-width: 80%;'>{text}</summary>"
	f"<blockquote></blockquote></details></blockquote></details>"
	)
	for heading, text in self.sections
	]
	)
	collapsable_sec = f"""
	<ol>
	{self.preamble}
	{secs}
	{self.epilogue}
	</ol>
	"""
	st.markdown(collapsable_sec, unsafe_allow_html=True)

	st.markdown("""## About this workload analysis (FAQ)""")
	faq = Collapsable()
	faq.add_section(
	"Model selection",
	'The models that are part of the "ML Agility" set are models that have been internally selected and represent a mix between popular open-source models and models that Groq has historically focused some efforts on (like GNNs).',
	)
	faq.add_section(
	"Experimental Setup",
	"-",
	)
	faq.add_section(
	"Key limitations",
	"This set of workloads does not include models with more than 1B parametes.",
	)

	faq.deploy()
	st.markdown(
	"For more details, please visit [mlagility.org](mlagility.org).",
	unsafe_allow_html=True,
	)

	st.markdown("## Detailed Data View")

	model_name = st.text_input("", placeholder="Filter model by name")
	if model_name != "":
	mla_report = mla_report[[model_name in x for x in mla_report["model_name"]]]

	# Add columns that do not exist yet
	mla_report["chips_used_gpu"] = 1
	mla_report["cpu_latency"] = 0
	mla_report["chips_used_cpu"] = 0

	# Using 2 significant digits
	mla_report["tsp_estimated_e2e_latency"] = [
	"-" if x == "-" else "{:.3f}".format(float(x))
	for x in mla_report["tsp_estimated_e2e_latency"]
	]
	mla_report["gpu_e2e_latency"] = [
	"-" if x == "-" else "{:.3f}".format(float(x))
	for x in mla_report["gpu_e2e_latency"]
	]

	renamed_cols = {
	"model_name": "Model Name",
	"author": "Source",
	"params": "Parameters",
	"model_type": "Framework",
	"tsp_estimated_e2e_latency": "GroqChip 1: Latency (ms)",
	"gpu_e2e_latency": "NVIDIA A100-PCIE-40GB: Latency (ms)",
	"cpu_latency": "Intel(R) Xeon(R) Gold 6338 CPU: Latency (ms)",
	"chips_used": "GroqChip 1: Chips Used",
	"chips_used_gpu": "NVIDIA A100-PCIE-40GB: Chips Used",
	"chips_used_cpu": "Intel(R) Xeon(R) Gold 6338 CPU: Chips Used",
	}
	mla_report.rename(columns=renamed_cols, inplace=True)
	selected_cols = renamed_cols.values()

	st.dataframe(
	mla_report[selected_cols],
	height=min((len(mla_report) + 1) * 35, 35 * 21),
	use_container_width=True,
	)