joaogante's picture
joaogante HF staff
Add Markdown
74e572f
raw
history blame
No virus
5.76 kB
import matplotlib
matplotlib.use('Agg')
import functools
import gradio as gr
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
# benchmark order: pytorch, tf eager, tf xla; units = ms
BENCHMARK_DATA = {
"Greedy Search": {
"DistilGPT2": {
"T4": [336.22, 3976.23, 115.84],
"3090": [158.38, 1835.82, 46.56],
"A100": [371.49, 4073.84, 60.94],
},
"GPT2": {
"T4": [607.31, 7140.23, 185.12],
"3090": [297.03, 3308.31, 76.68],
"A100": [691.75, 7323.60, 110.72],
},
"OPT-1.3B": {
"T4": [1303.41, 15939.07, 1488.15],
"3090": [428.33, 7259.43, 468.37],
"A100": [1125.00, 16713.63, 384.52],
},
"GPTJ-6B": {
"T4": [0, 0, 0],
"3090": [0, 0, 0],
"A100": [2664.28, 32783.09, 1440.06],
},
"T5 Small": {
"T4": [99.88, 1527.73, 18.78],
"3090": [55.09, 665.70, 9.25],
"A100": [124.91, 1642.07, 13.72],
},
"T5 Base": {
"T4": [416.56, 6095.05, 106.12],
"3090": [223.00, 2503.28, 46.67],
"A100": [550.76, 6504.11, 64.57],
},
"T5 Large": {
"T4": [645.05, 9587.67, 225.17],
"3090": [377.74, 4216.41, 97.92],
"A100": [944.17, 10572.43, 116.52],
},
"T5 3B": {
"T4": [1493.61, 13629.80, 1494.80],
"3090": [694.75, 6316.79, 489.33],
"A100": [1801.68, 16707.71, 411.93],
},
},
"Sample": {
"DistilGPT2": {
"T4": [],
"3090": [],
"A100": [],
},
"GPT2": {
"T4": [],
"3090": [],
"A100": [],
},
"OPT-1.3B": {
"T4": [],
"3090": [],
"A100": [],
},
"GPTJ-6B": {
"T4": [],
"3090": [],
"A100": [],
},
"T5 Small": {
"T4": [],
"3090": [],
"A100": [],
},
"T5 Base": {
"T4": [],
"3090": [],
"A100": [],
},
"T5 Large": {
"T4": [],
"3090": [],
"A100": [],
},
"T5 3B": {
"T4": [],
"3090": [],
"A100": [],
},
},
"Beam Search": {
"DistilGPT2": {
"T4": [],
"3090": [],
"A100": [],
},
"GPT2": {
"T4": [],
"3090": [],
"A100": [],
},
"OPT-1.3B": {
"T4": [],
"3090": [],
"A100": [],
},
"GPTJ-6B": {
"T4": [],
"3090": [],
"A100": [],
},
"T5 Small": {
"T4": [],
"3090": [],
"A100": [],
},
"T5 Base": {
"T4": [],
"3090": [],
"A100": [],
},
"T5 Large": {
"T4": [],
"3090": [],
"A100": [],
},
"T5 3B": {
"T4": [],
"3090": [],
"A100": [],
},
},
}
def get_plot(model_name, generate_type):
df = pd.DataFrame(BENCHMARK_DATA[generate_type][model_name])
df["framework"] = ["PyTorch", "TF (Eager Execition)", "TF (XLA)"]
df = pd.melt(df, id_vars=["framework"], value_vars=["T4", "3090", "A100"])
# fig = plt.figure(figsize=(100, 6), dpi=200)
g = sns.catplot(
data=df, kind="bar",
x="variable", y="value", hue="framework",
ci="sd", palette="dark", alpha=.6, height=6
)
g.despine(left=True)
g.set_axis_labels("GPU", "Generation time (ms)")
g.legend.set_title("Framework")
return plt.gcf()
demo = gr.Blocks()
with demo:
gr.Markdown(
"""
# TensorFlow XLA Text Generation Benchmark
Pick a tab for the type of generation (or other information), and then select a model from the dropdown menu.
You can also ommit results from TensorFlow Eager Execution, if you wish to better compare the performance of
PyTorch to TensorFlow with XLA.
"""
)
with gr.Tabs():
with gr.TabItem("Greedy Search"):
model_selector = gr.Dropdown(
choices=["DistilGPT2", "GPT2", "OPT-1.3B", "GPTJ-6B", "T5 Small", "T5 Base", "T5 Large", "T5 3B"],
value="T5 Small",
label="Model",
interactive=True,
)
plot_fn = functools.partial(get_plot, generate_type="Greedy Search")
plot = gr.Plot(value=plot_fn("T5 Small")) # Show plot when the gradio app is initialized
model_selector.change(fn=plot_fn, inputs=model_selector, outputs=plot)
with gr.TabItem("Sample"):
gr.Button("New Tiger")
with gr.TabItem("Beam Search"):
gr.Button("New Tiger")
with gr.TabItem("Benchmark Information"):
gr.Dataframe(
headers=["Parameter", "Value"],
value=[
["Transformers Version", "4.22.dev0"],
["TensorFlow Version", "2.9.1"],
["Pytorch Version", "1.11.0"],
["OS", "22.04 LTS (3090) / Debian 10 (other GPUs)"],
["CUDA", "11.6 (3090) / 11.3 (others GPUs)"],
["Number of Runs", "100 (the first run was discarded to ignore compilation time)"],
["Is there code to reproduce?", "Yes -- https://gist.github.com/gante/f0017e3f13ac11b0c02e4e4db351f52f"],
],
)
demo.launch()