import streamlit as st import pandas as pd import os, csv from huggingface_hub import hf_hub_download, HfApi import math HF_TOKEN = os.getenv('HUGGING_FACE_HUB_TOKEN') CACHED_FILE_PATH = hf_hub_download(repo_id="sasha/co2_submissions", filename="co2_emissions.csv", repo_type="dataset") api = HfApi() def write_to_csv(hardware, training_time, provider, carbon_intensity, dynamic_emissions): with open(CACHED_FILE_PATH,'a', newline='') as f: writer = csv.writer(f) writer.writerow([hardware, training_time, provider, carbon_intensity, dynamic_emissions]) api.upload_file( path_or_fileobj=CACHED_FILE_PATH, path_in_repo="co2_emissions.csv", repo_id="sasha/co2_submissions", repo_type="dataset", ) st.set_page_config( page_title="AI Carbon Calculator", layout="wide", ) tdp_url = "https://raw.githubusercontent.com/mlco2/impact/master/data/gpus.csv" compute_url = "https://raw.githubusercontent.com/mlco2/impact/master/data/impact.csv" electricity_url = "https://raw.githubusercontent.com/mlco2/impact/master/data/2021-10-27yearly_averages.csv" server_sheet_id = "1DqYgQnEDLQVQm5acMAhLgHLD8xXCG9BIrk-_Nv6jF3k" server_sheet_name = "Server%20Carbon%20Footprint" server_url = f"https://docs.google.com/spreadsheets/d/{server_sheet_id}/gviz/tq?tqx=out:csv&sheet={server_sheet_name}" embodied_gpu_sheet_name = "Scope%203%20Ratios" embodied_gpu_url = f"https://docs.google.com/spreadsheets/d/{server_sheet_id}/gviz/tq?tqx=out:csv&sheet={embodied_gpu_sheet_name}" TDP =pd.read_csv(tdp_url) instances = pd.read_csv(compute_url) providers = [p.upper() for p in instances['provider'].unique().tolist()] providers.append('Local/Private Infastructure') kg_per_mile = 0.348 electricity = pd.read_csv(electricity_url) servers = pd.read_csv(server_url) #print(servers.columns) embodied_gpu = pd.read_csv(embodied_gpu_url) #print(embodied_gpu.columns) #st.image('images/MIT_carbon_image_narrow.png', use_column_width=True, caption = 'Image credit: ') st.title("AI Carbon Calculator") st.markdown('## Estimate your AI model\'s CO2 carbon footprint! 🌎🖥️🌎') st.markdown('##### The calculators below will help you calculate different aspects of your model\'s carbon footprint, as we did for' ' BLOOM 🌸, a 176-billion parameter language model [(see our preprint!)](https://arxiv.org/abs/2211.02001)') st.markdown('##### Don\'t forget to share your data to help us get a better idea of AI model\'s carbon emissions!') st.markdown('### Dynamic Emissions 🚀') st.markdown('##### These are the emissions produced by generating the electricity necessary for powering model training.') with st.expander("Calculate the dynamic emissions of your model"): col1, col2, col3, col4 = st.columns(4) with col1: hardware = st.selectbox('GPU used', TDP['name'].tolist()) gpu_tdp = TDP['tdp_watts'][TDP['name'] == hardware].tolist()[0] st.markdown("Different GPUs have different TDP (Thermal Design Power), which impacts how much energy you use.") with col2: training_time = st.number_input('Total number of GPU hours') st.markdown('This is calculated by multiplying the number of GPUs you used by the training time: ' 'i.e. if you used 100 GPUs for 10 hours, this is equal to 100x10 = 1,000 GPU hours.') with col3: provider = st.selectbox('Provider used', providers) st.markdown('If you can\'t find your provider here, select "Local/Private Infrastructure".') with col4: if provider != 'Local/Private Infastructure': provider_instances = instances['region'][instances['provider'] == provider.lower()].unique().tolist() region = st.selectbox('Provider used', provider_instances) carbon_intensity = instances['impact'][(instances['provider'] == provider.lower()) & (instances['region'] == region)].tolist()[0] else: carbon_intensity = st.number_input('Carbon intensity of your energy grid, in grams of CO2 per kWh') st.markdown('You can consult a resource like the [IEA](https://www.iea.org/countries) or ' ' [Electricity Map](https://app.electricitymaps.com/) to get this information.') dynamic_emissions = round(gpu_tdp * training_time * carbon_intensity/1000000) st.metric(label="Dynamic emissions", value=str(dynamic_emissions)+' kilograms of CO2eq') st.markdown('This is roughly equivalent to '+ str(round(dynamic_emissions/kg_per_mile,1)) + ' miles driven in an average US car' ' produced in 2021. [(Source: energy.gov)](https://www.energy.gov/eere/vehicles/articles/fotw-1223-january-31-2022-average-carbon-dioxide-emissions-2021-model-year)') st.markdown('### Experimental Emissions 👩‍🔬') st.markdown('##### These are the emissions produced by generating the electricity necessary for powering the experiments and tests needed to pick your final model architecture ' 'and parameters.') with st.expander("Calculate the experimental emissions of your model"): st.markdown('##### Consult your training logs to figure out how many ablations, baselines and experiments were run before converging on the final model.') experimentation_time = st.number_input(label='Number of hours of experimentation run', value=training_time) st.markdown('##### As a baseline, language models such as [OPT](https://arxiv.org/pdf/2205.01068.pdf) and [BLOOM](https://arxiv.org/abs/2211.02001)' ' found that experimentation roughly doubles the amount of compute used by training the model itself.') experimental_emissions = round(gpu_tdp * (experimentation_time) * carbon_intensity/1000000) st.metric(label="Experimental emissions", value=str(0.0)+' kilograms of CO2eq') st.markdown('### Idle Emissions 🌐') st.markdown('##### These are the emissions produced by generating the electricity needed to power the rest of the infrastructure' 'used for model training -- the datacenter, network, heating/cooling, storage, etc.') with st.expander("Calculate the idle emissions of your model"): st.markdown('##### A proxy often used to reflect idle emissions is PUE (Power Usage Effectiveness), which represents ' ' the ratio of energy used for computing overheads like cooling, which varies depending on the data center.') pue = instances['PUE'][(instances['provider'] == provider.lower()) & (instances['region'] == region)].tolist()[0] if math.isnan(pue) == True: if provider != 'Local/Private Infastructure': st.markdown('##### The exact information isn\'t available for this datacenter! We will use your provider\'s average instead, which is:') if provider == 'AWS': pue = 1.135 st.markdown('#### ' + str(pue)+ " [(source)](https://www.cloudcarbonfootprint.org/docs/methodology/)") elif provider == 'GCP': pue = 1.1 st.markdown('#### ' + str(pue) + " [(source)](https://www.google.ca/about/datacenters/efficiency/)") elif provider == 'AZURE': pue = 1.185 st.markdown('#### ' + str(pue) + " [(source)](https://www.cloudcarbonfootprint.org/docs/methodology/)") elif provider == 'OVH': pue = 1.28 st.markdown('#### ' + str(pue) + " [(source)](https://corporate.ovhcloud.com/en-ca/sustainability/environment/)") elif provider == 'SCALEWAY': pue = 1.35 st.markdown('#### ' +str(pue) + " [(source)](https://pue.dc3.scaleway.com/en/)") else: st.markdown('##### Try to find the PUE of your local infrastructure. Otherwise, you can use the industry average, 1.58:') pue = st.number_input('Total number of GPU hours', value = 1.58) else: st.markdown('##### The PUE of the datacenter you used is: ') st.markdown('#### '+ str(pue)) pue_emissions = round((experimental_emissions+ dynamic_emissions)*pue) st.metric(label="Emissions considering PUE", value=str(pue_emissions)+' kilograms of CO2eq') st.markdown('### Embodied Emissions 🖥️🔨') st.markdown('##### These are the emissions associated with the materials and processes involved in producing' ' the computing equipment needed for AI models.') with st.expander("Calculate the embodied emissions of your model"): st.markdown('##### These are the trickiest emissions to track down since a lot of the information needed is missing!') m = st.markdown(""" """, unsafe_allow_html=True) buttoncol1, cuttoncol2, buttoncol3 = st.columns(3) with cuttoncol2: st.button(label="Anonymously share my data!", on_click = lambda *args: write_to_csv(hardware, training_time, provider, carbon_intensity, dynamic_emissions)) st.markdown('### Methodology') with st.expander("More information about our Methodology"): st.markdown('Building on the work of the [ML CO2 Calculator](https://mlco2.github.io/impact/), this tool allows you to consider' ' other aspects of your model\'s carbon footprint based on the LCA methodology.') st.image('images/LCA_CO2.png', caption='The LCA methodology - the parts in green are those we focus on.')