File size: 5,471 Bytes
393f86d
 
32ac110
 
393f86d
 
db8cc8e
 
 
32ac110
 
 
db8cc8e
32ac110
 
 
9ca002e
32ac110
 
 
 
 
db8cc8e
393f86d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7ff9457
393f86d
9ca002e
393f86d
 
 
bbb131b
393f86d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
bbb131b
0e83308
32ac110
 
393f86d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
import streamlit as st
import pandas as pd
import os, csv
from huggingface_hub import hf_hub_download, HfApi

HF_TOKEN = os.getenv('HUGGING_FACE_HUB_TOKEN')

CACHED_FILE_PATH = hf_hub_download(repo_id="sasha/co2_submissions", filename="dynamic_emissions.csv", repo_type="dataset")

api = HfApi()

def write_to_csv(hardware, training_time, provider, carbon_intensity, dynamic_emissions):
    with open(CACHED_FILE_PATH,'a', newline='') as f:
        writer = csv.writer(f)
        writer.writerow([hardware, training_time, provider, carbon_intensity, dynamic_emissions])
    api.upload_file(
        path_or_fileobj=CACHED_FILE_PATH,
        path_in_repo="dynamic_emissions.csv",
        repo_id="sasha/co2_submissions",
        repo_type="dataset",
    )



st.set_page_config(
    page_title="AI Carbon Calculator",
    layout="wide",
)

tdp_url = "https://raw.githubusercontent.com/mlco2/impact/master/data/gpus.csv"
compute_url = "https://raw.githubusercontent.com/mlco2/impact/master/data/impact.csv"


electricity_url = "https://raw.githubusercontent.com/mlco2/impact/master/data/2021-10-27yearly_averages.csv"

server_sheet_id = "1DqYgQnEDLQVQm5acMAhLgHLD8xXCG9BIrk-_Nv6jF3k"
server_sheet_name = "Server%20Carbon%20Footprint"
server_url = f"https://docs.google.com/spreadsheets/d/{server_sheet_id}/gviz/tq?tqx=out:csv&sheet={server_sheet_name}"


embodied_gpu_sheet_id = "1DqYgQnEDLQVQm5acMAhLgHLD8xXCG9BIrk-_Nv6jF3k"
embodied_gpu_sheet_name = "Scope%203%20Ratios"
embodied_gpu_url = f"https://docs.google.com/spreadsheets/d/{embodied_gpu_sheet_id}/gviz/tq?tqx=out:csv&sheet={embodied_gpu_sheet_name}"

TDP =pd.read_csv(tdp_url)

instances = pd.read_csv(compute_url)
providers = [p.upper() for p in instances['provider'].unique().tolist()]
providers.append('Local/Private Infastructure')

kg_per_mile = 0.348

electricity = pd.read_csv(electricity_url)
servers = pd.read_csv(server_url)
embodied_gpu = pd.read_csv(embodied_gpu_url)


st.title("AI Carbon Calculator")

st.markdown('## Estimate your model\'s CO2 carbon footprint!')

st.markdown('Building on the work of the [ML CO2 Calculator](https://mlco2.github.io/impact/), this tool allows you to consider'
            ' other aspects of your model\'s carbon footprint based on the LCA methodology.')

st.markdown('You can use this tool to calculate different aspects of your model: the dynamic emissions, idle emissions embodied emissions.')

st.markdown('### Dynamic Emissions')
with st.expander("Calculate the emissions produced by energy consumption of model training"):
    with st.form(key='dynamic_emissions'):
        col1, col2, col3, col4 = st.columns(4)
        with col1:
            hardware = st.selectbox('GPU used', TDP['name'].tolist())
            gpu_tdp = TDP['tdp_watts'][TDP['name'] == hardware].tolist()[0]
            st.markdown("Different GPUs have different TDP (Thermal Design Power), which impacts how much energy you use.")
        with col2:
           training_time = st.number_input('Total number of GPU hours')
           st.markdown('This is calculated by multiplying the number of GPUs you used by the training time: '
                       'i.e. if you used 100 GPUs for 10 hours, this is equal to 100x10 = 1,000 GPU hours.')
        with col3:
           provider = st.selectbox('Provider used', providers)
           st.markdown('If you can\'t find your provider here, select "Local/Private Infrastructure".')
        with col4:
            if provider != 'Local/Private Infastructure':
                provider_instances = instances['region'][instances['provider'] == provider.lower()].unique().tolist()
                region = st.selectbox('Provider used', provider_instances)
                carbon_intensity = instances['impact'][(instances['provider'] == provider.lower()) & (instances['region'] == region)].tolist()[0]

            else:
                carbon_intensity = st.number_input('Carbon intensity of your energy grid, in grams of CO2 per kWh')
                st.markdown('You can consult a resource like the [IEA](https://www.iea.org/countries) or '
                            ' [Electricity Map](https://app.electricitymaps.com/) to get this information.')

        dynamic_emissions = round(gpu_tdp * training_time * carbon_intensity/1000000)
        st.metric(label="Dynamic emissions", value=str(dynamic_emissions)+' kilograms of CO2eq')
        st.markdown('This is roughly equivalent to '+ str(round(dynamic_emissions/kg_per_mile,1)) + ' miles driven in an average US car'
            ' produced in 2021. [(Source: energy.gov)](https://www.energy.gov/eere/vehicles/articles/fotw-1223-january-31-2022-average-carbon-dioxide-emissions-2021-model-year)')
        st.form_submit_button(label="Anonymously share my data", help="Share the data from your model anonymously for research purposes!",\
        on_click = lambda *args: write_to_csv(hardware, training_time, provider, carbon_intensity, dynamic_emissions))


st.markdown('### Idle Emissions')
st.markdown('Do you know what the PUE (Power Usage Effectiveness) of your infrastructure is?')



st.markdown('### Embodied Emissions')
st.markdown('Choose your hardware, runtime and cloud provider/physical infrastructure to estimate the carbon impact of your research.')




st.markdown('#### More information about our Methodology')

st.image('images/LCA_CO2.png', caption='The LCA methodology - the parts in green are those we focus on.')

modelname = st.selectbox('Choose a model to test', TDP)