Spaces:

joaogante
/

tf_xla_generate_benchmarks

Running

File size: 5,763 Bytes

1dd0620
 
 
734ca59
28dd0d5
976eb10
1dd0620
2723972
 
976eb10
 
2723972
0dfa35f
734ca59
0dfa35f
241d9b0
 
 
0dfa35f
 
241d9b0
 
 
0dfa35f
 
241d9b0
 
 
0dfa35f
 
da756b5
 
 
0dfa35f
 
2723972
 
 
0dfa35f
 
da756b5
 
 
0dfa35f
 
da756b5
 
 
0dfa35f
 
da756b5
 
 
0dfa35f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
734ca59
2723972
 
 
 
f74e135
2723972
 
 
 
 
 
e3f1090
241d9b0
6d9bae3
7aef7b8
 
 
 
979192e
 
74e572f
 
 
 
979192e
 
7aef7b8
28dd0d5
d63c8d0
0dfa35f
cb89f67
e589f54
 
cb89f67
734ca59
31b8889
3fdc6ff
28dd0d5
 
 
7aef7b8
28dd0d5
7c04b7a
9a8c083
 
 
 
 
7fa6af3
 
241d9b0
7fa6af3
9a8c083
28dd0d5
7aef7b8

import matplotlib
matplotlib.use('Agg')

import functools

import gradio as gr
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd


# benchmark order: pytorch, tf eager, tf xla; units = ms
BENCHMARK_DATA = {
    "Greedy Search": {
        "DistilGPT2": {
            "T4": [336.22, 3976.23, 115.84],
            "3090": [158.38, 1835.82, 46.56],
            "A100": [371.49, 4073.84, 60.94],
        },
        "GPT2": {
            "T4": [607.31, 7140.23, 185.12],
            "3090": [297.03, 3308.31, 76.68],
            "A100": [691.75, 7323.60, 110.72],
        },
        "OPT-1.3B": {
            "T4": [1303.41, 15939.07, 1488.15],
            "3090": [428.33, 7259.43, 468.37],
            "A100": [1125.00, 16713.63, 384.52],
        },
        "GPTJ-6B": {
            "T4": [0, 0, 0],
            "3090": [0, 0, 0],
            "A100": [2664.28, 32783.09, 1440.06],
        },
        "T5 Small": {
            "T4": [99.88, 1527.73, 18.78],
            "3090": [55.09, 665.70, 9.25],
            "A100": [124.91, 1642.07, 13.72],
        },
        "T5 Base": {
            "T4": [416.56, 6095.05, 106.12],
            "3090": [223.00, 2503.28, 46.67],
            "A100": [550.76, 6504.11, 64.57],
        },
        "T5 Large": {
            "T4": [645.05, 9587.67, 225.17],
            "3090": [377.74, 4216.41, 97.92],
            "A100": [944.17, 10572.43, 116.52],
        },
        "T5 3B": {
            "T4": [1493.61, 13629.80, 1494.80],
            "3090": [694.75, 6316.79, 489.33],
            "A100": [1801.68, 16707.71, 411.93],
        },
    },
    "Sample": {
        "DistilGPT2": {
            "T4": [],
            "3090": [],
            "A100": [],
        },
        "GPT2": {
            "T4": [],
            "3090": [],
            "A100": [],
        },
        "OPT-1.3B": {
            "T4": [],
            "3090": [],
            "A100": [],
        },
        "GPTJ-6B": {
            "T4": [],
            "3090": [],
            "A100": [],
        },
        "T5 Small": {
            "T4": [],
            "3090": [],
            "A100": [],
        },
        "T5 Base": {
            "T4": [],
            "3090": [],
            "A100": [],
        },
        "T5 Large": {
            "T4": [],
            "3090": [],
            "A100": [],
        },
        "T5 3B": {
            "T4": [],
            "3090": [],
            "A100": [],
        },
    },
    "Beam Search": {
        "DistilGPT2": {
            "T4": [],
            "3090": [],
            "A100": [],
        },
        "GPT2": {
            "T4": [],
            "3090": [],
            "A100": [],
        },
        "OPT-1.3B": {
            "T4": [],
            "3090": [],
            "A100": [],
        },
        "GPTJ-6B": {
            "T4": [],
            "3090": [],
            "A100": [],
        },
        "T5 Small": {
            "T4": [],
            "3090": [],
            "A100": [],
        },
        "T5 Base": {
            "T4": [],
            "3090": [],
            "A100": [],
        },
        "T5 Large": {
            "T4": [],
            "3090": [],
            "A100": [],
        },
        "T5 3B": {
            "T4": [],
            "3090": [],
            "A100": [],
        },
    },
}


def get_plot(model_name, generate_type):
    df = pd.DataFrame(BENCHMARK_DATA[generate_type][model_name])
    df["framework"] = ["PyTorch", "TF (Eager Execition)", "TF (XLA)"]
    df = pd.melt(df, id_vars=["framework"], value_vars=["T4", "3090", "A100"])

    # fig = plt.figure(figsize=(100, 6), dpi=200)
    g = sns.catplot(
        data=df, kind="bar",
        x="variable", y="value", hue="framework",
        ci="sd", palette="dark", alpha=.6, height=6
    )
    g.despine(left=True)
    g.set_axis_labels("GPU", "Generation time (ms)")
    g.legend.set_title("Framework")
    return plt.gcf()

demo = gr.Blocks()

with demo:
    gr.Markdown(
        """
        # TensorFlow XLA Text Generation Benchmark
        Pick a tab for the type of generation (or other information), and then select a model from the dropdown menu.
        You can also ommit results from TensorFlow Eager Execution, if you wish to better compare the performance of
        PyTorch to TensorFlow with XLA.
        """
    )
    with gr.Tabs():
        with gr.TabItem("Greedy Search"):
            model_selector = gr.Dropdown(
                choices=["DistilGPT2", "GPT2", "OPT-1.3B", "GPTJ-6B", "T5 Small", "T5 Base", "T5 Large", "T5 3B"],
                value="T5 Small",
                label="Model",
                interactive=True,
            )
            plot_fn = functools.partial(get_plot, generate_type="Greedy Search")
            plot = gr.Plot(value=plot_fn("T5 Small"))  # Show plot when the gradio app is initialized
            model_selector.change(fn=plot_fn, inputs=model_selector, outputs=plot)
        with gr.TabItem("Sample"):
            gr.Button("New Tiger")
        with gr.TabItem("Beam Search"):
            gr.Button("New Tiger")
        with gr.TabItem("Benchmark Information"):
            gr.Dataframe(
                headers=["Parameter", "Value"],
                value=[
                    ["Transformers Version", "4.22.dev0"],
                    ["TensorFlow Version", "2.9.1"],
                    ["Pytorch Version", "1.11.0"],
                    ["OS", "22.04 LTS (3090) / Debian 10 (other GPUs)"],
                    ["CUDA", "11.6 (3090) / 11.3 (others GPUs)"],
                    ["Number of Runs", "100 (the first run was discarded to ignore compilation time)"],
                    ["Is there code to reproduce?", "Yes -- https://gist.github.com/gante/f0017e3f13ac11b0c02e4e4db351f52f"],
                ],
            )

demo.launch()