2024_Leaderboard

Running

2024_Leaderboard

File size: 9,634 Bytes

460fdc7
 
 
42e8f64
 
0bcbe4f
 
 
 
 
b7b78a8
2dc39dd
b7b78a8
 
dae3ac5
 
b7b78a8
dae3ac5
23c7589
b7b78a8
dae3ac5
 
 
 
 
 
e5599c2
f7b4006
0bcbe4f
73b1941
0bcbe4f
 
 
 
 
1aae210
eb5bbd0
0bcbe4f
 
 
 
 
 
 
 
 
 
 
 
8eb2a95
9563b89
8eb2a95
 
0bcbe4f
 
 
d01211c
7ddc374
 
0bcbe4f
 
 
 
 
5136650
0bcbe4f
 
 
 
 
73b1941
0bcbe4f
 
 
 
 
1aae210
eb5bbd0
0bcbe4f
 
 
d087156
7ddc374
78be425
5136650
0bcbe4f
ba0ef01
d4ded0a
0bcbe4f
23c7589
 
 
 
 
 
 
c76229c
7e4fcf8
7022131
f7b4006
7022131
7786ff5
68c7721
655d435
 
7786ff5
655d435
 
 
f7b4006
7022131
f7b4006
2dc39dd
3fe7e68
2dc39dd
5966339
2dc39dd
7022131
f7b4006
2dc39dd
b7b78a8
2dc39dd
097117b
2dc39dd
7022131
f7b4006
d4ded0a
b7b78a8
d4ded0a
097117b
d4ded0a
7022131
f7b4006
2dc39dd
3e19f3e
2dc39dd
097117b
2dc39dd
3fe7e68
 
 
 
 
097117b
3fe7e68
 
 
e513088
3fe7e68
097117b
3fe7e68
 
 
 
 
 
097117b
3fe7e68
 
 
 
 
 
097117b
3fe7e68
 
 
 
 
 
097117b
3fe7e68
7022131
f7b4006
2dc39dd
b7b78a8
2dc39dd
097117b
0bcbe4f
296b387
0bcbe4f
296b387
0bcbe4f
296b387
eb5bbd0
5136650
5c13955
68c7721
 
0bcbe4f
68c7721
 
 
f4a0e9d
 
7022131

import gradio as gr
import pandas as pd
from huggingface_hub import list_models
import plotly.express as px


tasks = ['asr.csv', 'object_detection.csv', 'text_classification.csv', 'image_captioning.csv',
        'question_answering.csv', 'text_generation.csv', 'image_classification.csv',
        'sentence_similarity.csv', 'image_generation.csv', 'summarization.csv']

def get_plots(task):
    #TO DO : hover text with energy efficiency number, parameters
    task_df= pd.read_csv('data/energy/'+task)
    params_df = pd.read_csv('data/params/'+task)
    params_df= params_df.rename(columns={"Link": "model"})
    all_df = pd.merge(task_df, params_df, on='model')
    all_df['Total GPU Energy (Wh)'] = all_df['total_gpu_energy']*1000
    all_df = all_df.sort_values(by=['Total GPU Energy (Wh)'])
    all_df['parameters'] = all_df['parameters'].apply(format_params)
    all_df['energy_star'] = pd.cut(all_df['Total GPU Energy (Wh)'], 3, labels=["⭐⭐⭐", "⭐⭐", "⭐"])
    fig = px.scatter(all_df, x="model", y='Total GPU Energy (Wh)', custom_data=['parameters'], height= 500, width= 800, color = 'energy_star', color_discrete_map={"⭐": 'red', "⭐⭐": "yellow", "⭐⭐⭐": "green"})
    fig.update_traces(
    hovertemplate="<br>".join([
        "Total Energy: %{y}",
        "Parameters: %{customdata[0]}"])
    )
    return fig

def get_all_plots():
    all_df = pd.DataFrame(columns= ['model', 'parameters', 'total_gpu_energy'])
    for task in tasks:
        task_df= pd.read_csv('data/energy/'+task)
        params_df = pd.read_csv('data/params/'+task)
        params_df= params_df.rename(columns={"Link": "model"})
        tasks_df = pd.merge(task_df, params_df, on='model')
        tasks_df= tasks_df[['model', 'parameters', 'total_gpu_energy']]
        all_df = pd.concat([all_df, tasks_df])
    all_df['Total GPU Energy (Wh)'] = all_df['total_gpu_energy']*1000
    all_df = all_df.sort_values(by=['Total GPU Energy (Wh)'])
    all_df['parameters'] = all_df['parameters'].apply(format_params)
    all_df['energy_star'] = pd.cut(all_df['Total GPU Energy (Wh)'], 3, labels=["⭐⭐⭐", "⭐⭐", "⭐"])
    fig = px.scatter(all_df, x="model", y='Total GPU Energy (Wh)', custom_data=['parameters'], height= 500, width= 800, color = 'energy_star', color_discrete_map={"⭐": 'red', "⭐⭐": "yellow", "⭐⭐⭐": "green"})
    fig.update_traces(
    hovertemplate="<br>".join([
        "Total Energy: %{y}",
        "Parameters: %{customdata[0]}"])
    )
    return fig

def make_link(mname):
    link = "["+ str(mname).split('/')[1] +'](https://huggingface.co/'+str(mname)+")"
    return link

def get_model_names(task):
    task_df= pd.read_csv('data/params/'+task)
    energy_df= pd.read_csv('data/energy/'+task)
    task_df= task_df.rename(columns={"Link": "model"})
    all_df = pd.merge(task_df, energy_df, on='model')
    all_df=all_df.drop_duplicates(subset=['model'])
    all_df['Parameters'] = all_df['parameters'].apply(format_params)
    all_df['Model'] = all_df['model'].apply(make_link)
    all_df['Total GPU Energy (Wh)'] = all_df['total_gpu_energy']*1000
    all_df['Total GPU Energy (Wh)'] = all_df['Total GPU Energy (Wh)'].round(2)
    all_df['Rating'] = pd.cut(all_df['Total GPU Energy (Wh)'], 3, labels=["⭐⭐⭐", "⭐⭐", "⭐"])
    all_df= all_df.sort_values('Total GPU Energy (Wh)')
    model_names = all_df[['Model','Rating','Total GPU Energy (Wh)', 'Parameters']]
    return model_names

def get_all_model_names():
    #TODO: add link to results in model card of each model
    all_df = pd.DataFrame(columns = ['model', 'parameters', 'total_gpu_energy'])
    for task in tasks:
        task_df= pd.read_csv('data/params/'+task)
        energy_df= pd.read_csv('data/energy/'+task)
        task_df= task_df.rename(columns={"Link": "model"})
        tasks_df = pd.merge(task_df, energy_df, on='model')
        tasks_df= tasks_df[['model', 'parameters', 'total_gpu_energy']]
        all_df = pd.concat([all_df, tasks_df])
    all_df=all_df.drop_duplicates(subset=['model'])
    all_df['Parameters'] = all_df['parameters'].apply(format_params)
    all_df['Model'] = all_df['model'].apply(make_link)
    all_df['Total GPU Energy (Wh)'] = all_df['total_gpu_energy']*1000
    all_df['Total GPU Energy (Wh)'] = all_df['Total GPU Energy (Wh)'].round(2)
    all_df['Rating'] = pd.cut(all_df['Total GPU Energy (Wh)'], 3, labels=["⭐⭐⭐", "⭐⭐", "⭐"])
    all_df= all_df.sort_values('Total GPU Energy (Wh)')
    model_names = all_df[['Model','Rating','Total GPU Energy (Wh)', 'Parameters']]
    return model_names


def format_params(num):
    if num > 1000000000:
        if not num % 1000000000:
            return f'{num // 1000000000}B'
        return f'{round(num / 1000000000, 1)}B'
    return f'{num // 1000000}M'



demo = gr.Blocks()

with demo:
    gr.Markdown(
        """# Energy Star Leaderboard - v.0 (2024) 🌎 💻 🌟
    ### Welcome to the leaderboard for the [AI Energy Star Project!](https://huggingface.co/EnergyStarAI)
    Click through the tasks below to see how different models measure up in terms of energy efficiency"""
    )
    gr.Markdown(
        """Test your own models via the [submission portal (TODO)]."""
        )
    with gr.Tabs():
        with gr.TabItem("Text Generation 💬"):
            with gr.Row():
                with gr.Column():
                    plot = gr.Plot(get_plots('text_generation.csv'))
                with gr.Column():
                    table = gr.Dataframe(get_model_names('text_generation.csv'), datatype="markdown")

        with gr.TabItem("Image Generation 📷"):
            with gr.Row():
                with gr.Column():
                    plot = gr.Plot(get_plots('image_generation.csv'))
                with gr.Column():
                    table = gr.Dataframe(get_model_names('image_generation.csv'), datatype="markdown")

        with gr.TabItem("Text Classification 🎭"):
            with gr.Row():
                with gr.Column():
                    plot = gr.Plot(get_plots('text_classification.csv'))
                with gr.Column():
                    table = gr.Dataframe(get_model_names('text_classification.csv'), datatype="markdown")

        with gr.TabItem("Image Classification 🖼️"):
            with gr.Row():
                with gr.Column():
                    plot = gr.Plot(get_plots('image_classification.csv'))
                with gr.Column():
                    table = gr.Dataframe(get_model_names('image_classification.csv'), datatype="markdown")

        with gr.TabItem("Image Captioning 📝"):
            with gr.Row():
                with gr.Column():
                    plot = gr.Plot(get_plots('question_answering.csv'))
                with gr.Column():
                    table = gr.Dataframe(get_model_names('question_answering.csv'), datatype="markdown")
        with gr.TabItem("Summarization 📃"):
            with gr.Row():
                with gr.Column():
                    plot = gr.Plot(get_plots('summarization.csv'))
                with gr.Column():
                    table = gr.Dataframe(get_model_names('summarization.csv'), datatype="markdown")

        with gr.TabItem("Automatic Speech Recognition 💬 "):
            with gr.Row():
                with gr.Column():
                    plot = gr.Plot(get_plots('asr.csv'))
                with gr.Column():
                    table = gr.Dataframe(get_model_names('asr.csv'), datatype="markdown")

        with gr.TabItem("Object Detection 🚘"):
            with gr.Row():
                with gr.Column():
                    plot = gr.Plot(get_plots('object_detection.csv'))
                with gr.Column():
                    table = gr.Dataframe(get_model_names('object_detection.csv'), datatype="markdown")

        with gr.TabItem("Sentence Similarity 📚"):
            with gr.Row():
                with gr.Column():
                    plot = gr.Plot(get_plots('sentence_similarity.csv'))
                with gr.Column():
                    table = gr.Dataframe(get_model_names('sentence_similarity.csv'), datatype="markdown")

        with gr.TabItem("Extractive QA ❔"):
            with gr.Row():
                with gr.Column():
                    plot = gr.Plot(get_plots('question_answering.csv'))
                with gr.Column():
                    table = gr.Dataframe(get_model_names('question_answering.csv'), datatype="markdown")

        with gr.TabItem("All Tasks 💡"):
            with gr.Row():
                with gr.Column(scale=2):
                    plot = gr.Plot(get_all_plots)
                with gr.Column(scale=1):
                    table = gr.Dataframe(get_all_model_names, datatype="markdown")

    with gr.Accordion("Methodology", open = False):
        gr.Markdown(
        """For each of the ten tasks above, we created a custom dataset with 1,000 entries (see all of the datasets on our [org Hub page](https://huggingface.co/EnergyStarAI)).
        We then tested each of the models from the leaderboard on the appropriate task on Nvidia A100 GPUs, measuring the energy consumed using [Code Carbon](https://mlco2.github.io/codecarbon/), an open-source Python package for tracking the environmental impacts of code.
        We developed and used a [Docker container](https://github.com/huggingface/EnergyStarAI/) to maximize the reproducibility of results, and to enable members of the community to benchmark internal models.
        Reach out to us if you want to collaborate!
        """)
    gr.Markdown(
    """Last updated: September 20th, 2024 by [Sasha Luccioni](https://huggingface.co/sasha)""")
demo.launch()