Spaces:

sasha
/

AI_Carbon

Running

App Files Files Community

sashavor commited on Nov 21, 2022

Commit

ce8bd36

•

1 Parent(s): 795ccdc

lotsa changes

Browse files

Files changed (1) hide show

app.py +75 -30

app.py CHANGED Viewed

@@ -2,10 +2,11 @@ import streamlit as st
 import pandas as pd
 import os, csv
 from huggingface_hub import hf_hub_download, HfApi
 HF_TOKEN = os.getenv('HUGGING_FACE_HUB_TOKEN')
-CACHED_FILE_PATH = hf_hub_download(repo_id="sasha/co2_submissions", filename="dynamic_emissions.csv", repo_type="dataset")
 api = HfApi()
@@ -15,18 +16,18 @@ def write_to_csv(hardware, training_time, provider, carbon_intensity, dynamic_em
         writer.writerow([hardware, training_time, provider, carbon_intensity, dynamic_emissions])
     api.upload_file(
         path_or_fileobj=CACHED_FILE_PATH,
-        path_in_repo="dynamic_emissions.csv",
         repo_id="sasha/co2_submissions",
         repo_type="dataset",
     )
 st.set_page_config(
     page_title="AI Carbon Calculator",
     layout="wide",
 )
 tdp_url = "https://raw.githubusercontent.com/mlco2/impact/master/data/gpus.csv"
 compute_url = "https://raw.githubusercontent.com/mlco2/impact/master/data/impact.csv"
@@ -38,9 +39,8 @@ server_sheet_name = "Server%20Carbon%20Footprint"
 server_url = f"https://docs.google.com/spreadsheets/d/{server_sheet_id}/gviz/tq?tqx=out:csv&sheet={server_sheet_name}"
-embodied_gpu_sheet_id = "1DqYgQnEDLQVQm5acMAhLgHLD8xXCG9BIrk-_Nv6jF3k"
 embodied_gpu_sheet_name = "Scope%203%20Ratios"
-embodied_gpu_url = f"https://docs.google.com/spreadsheets/d/{embodied_gpu_sheet_id}/gviz/tq?tqx=out:csv&sheet={embodied_gpu_sheet_name}"
 TDP =pd.read_csv(tdp_url)
@@ -52,18 +52,21 @@ kg_per_mile = 0.348
 electricity = pd.read_csv(electricity_url)
 servers = pd.read_csv(server_url)
 embodied_gpu = pd.read_csv(embodied_gpu_url)
 #st.image('images/MIT_carbon_image_narrow.png', use_column_width=True, caption = 'Image credit: ')
 st.title("AI Carbon Calculator")
-st.markdown('## Estimate your model\'s CO2 carbon footprint!')
-st.markdown('##### You can use this tool to calculate different aspects of your model\'s carbon footprint.')
-st.markdown('##### Share your data to help us get a better idea of AI model\'s carbon emissions.')
-st.markdown('### Dynamic Emissions')
-st.markdown('##### These are the carbon emissions produced by generating the electricity necessary for powering model training')
 with st.expander("Calculate the dynamic emissions of your model"):
     col1, col2, col3, col4 = st.columns(4)
     with col1:
@@ -91,31 +94,73 @@ with st.expander("Calculate the dynamic emissions of your model"):
     st.metric(label="Dynamic emissions", value=str(dynamic_emissions)+' kilograms of CO2eq')
     st.markdown('This is roughly equivalent to '+ str(round(dynamic_emissions/kg_per_mile,1)) + ' miles driven in an average US car'
     ' produced in 2021. [(Source: energy.gov)](https://www.energy.gov/eere/vehicles/articles/fotw-1223-january-31-2022-average-carbon-dioxide-emissions-2021-model-year)')
-    st.button(label="Anonymously share my data", help="Share the data from your model anonymously for research purposes!",\
-    on_click = lambda *args: write_to_csv(hardware, training_time, provider, carbon_intensity, dynamic_emissions))
-st.markdown('### Idle Emissions')
-st.markdown('##### These are the emissions produced by generating the electricity needed to power the rest of the infrastructure'
-            'used for model training -- the datacenter, network, heating/cooling, storage, etc.')
-st.markdown('### Idle Emissions')
 st.markdown('##### These are the emissions produced by generating the electricity needed to power the rest of the infrastructure'
             'used for model training -- the datacenter, network, heating/cooling, storage, etc.')
 with st.expander("Calculate the idle emissions of your model"):
-    st.markdown('Do you know what the PUE (Power Usage Effectiveness) of your infrastructure is?')
-st.markdown('### Embodied Emissions')
-st.markdown('Choose your hardware, runtime and cloud provider/physical infrastructure to estimate the carbon impact of your research.')
 with st.expander("More information about our Methodology"):
     st.markdown('Building on the work of the [ML CO2 Calculator](https://mlco2.github.io/impact/), this tool allows you to consider'
                 ' other aspects of your model\'s carbon footprint based on the LCA methodology.')
     st.image('images/LCA_CO2.png', caption='The LCA methodology - the parts in green are those we focus on.')

 import pandas as pd
 import os, csv
 from huggingface_hub import hf_hub_download, HfApi
+import math
 HF_TOKEN = os.getenv('HUGGING_FACE_HUB_TOKEN')
+CACHED_FILE_PATH = hf_hub_download(repo_id="sasha/co2_submissions", filename="co2_emissions.csv", repo_type="dataset")
 api = HfApi()
         writer.writerow([hardware, training_time, provider, carbon_intensity, dynamic_emissions])
     api.upload_file(
         path_or_fileobj=CACHED_FILE_PATH,
+        path_in_repo="co2_emissions.csv",
         repo_id="sasha/co2_submissions",
         repo_type="dataset",
     )
 st.set_page_config(
     page_title="AI Carbon Calculator",
     layout="wide",
 )
 tdp_url = "https://raw.githubusercontent.com/mlco2/impact/master/data/gpus.csv"
 compute_url = "https://raw.githubusercontent.com/mlco2/impact/master/data/impact.csv"
 server_url = f"https://docs.google.com/spreadsheets/d/{server_sheet_id}/gviz/tq?tqx=out:csv&sheet={server_sheet_name}"
 embodied_gpu_sheet_name = "Scope%203%20Ratios"
+embodied_gpu_url = f"https://docs.google.com/spreadsheets/d/{server_sheet_id}/gviz/tq?tqx=out:csv&sheet={embodied_gpu_sheet_name}"
 TDP =pd.read_csv(tdp_url)
 electricity = pd.read_csv(electricity_url)
 servers = pd.read_csv(server_url)
+#print(servers.columns)
 embodied_gpu = pd.read_csv(embodied_gpu_url)
+#print(embodied_gpu.columns)
 #st.image('images/MIT_carbon_image_narrow.png', use_column_width=True, caption = 'Image credit: ')
 st.title("AI Carbon Calculator")
+st.markdown('## Estimate your AI model\'s CO2 carbon footprint! 🌎🖥️🌎')
+st.markdown('##### The calculators below will help you calculate different aspects of your model\'s carbon footprint, as we did for'
+            ' BLOOM 🌸, a 176-billion parameter language model [(see our preprint!)](https://arxiv.org/abs/2211.02001)')
+st.markdown('##### Don\'t forget to share your data to help us get a better idea of AI model\'s carbon emissions!')
+st.markdown('### Dynamic Emissions 🚀')
+st.markdown('##### These are the emissions produced by generating the electricity necessary for powering model training.')
 with st.expander("Calculate the dynamic emissions of your model"):
     col1, col2, col3, col4 = st.columns(4)
     with col1:
     st.metric(label="Dynamic emissions", value=str(dynamic_emissions)+' kilograms of CO2eq')
     st.markdown('This is roughly equivalent to '+ str(round(dynamic_emissions/kg_per_mile,1)) + ' miles driven in an average US car'
     ' produced in 2021. [(Source: energy.gov)](https://www.energy.gov/eere/vehicles/articles/fotw-1223-january-31-2022-average-carbon-dioxide-emissions-2021-model-year)')
+st.markdown('### Experimental Emissions 👩‍🔬')
+st.markdown('##### These are the emissions produced by generating the electricity necessary for powering the experiments and tests needed to pick your final model architecture '
+            'and parameters.')
+with st.expander("Calculate the experimental emissions of your model"):
+            st.markdown('##### Consult your training logs to figure out how many ablations, baselines and experiments were run before converging on the final model.')
+            experimentation_time = st.number_input(label='Number of hours of experimentation run', value=training_time)
+            st.markdown('##### As a baseline, language models such as [OPT](https://arxiv.org/pdf/2205.01068.pdf) and [BLOOM](https://arxiv.org/abs/2211.02001)'
+                        ' found that experimentation roughly doubles the amount of compute used by training the model itself.')
+            experimental_emissions = round(gpu_tdp * (experimentation_time) * carbon_intensity/1000000)
+            st.metric(label="Experimental emissions", value=str(0.0)+' kilograms of CO2eq')
+st.markdown('### Idle Emissions 🌐')
 st.markdown('##### These are the emissions produced by generating the electricity needed to power the rest of the infrastructure'
             'used for model training -- the datacenter, network, heating/cooling, storage, etc.')
 with st.expander("Calculate the idle emissions of your model"):
+    st.markdown('##### A proxy often used to reflect idle emissions is PUE (Power Usage Effectiveness), which represents '
+                ' the ratio of energy used for computing overheads like cooling, which varies depending on the data center.')
+    pue = instances['PUE'][(instances['provider'] == provider.lower()) & (instances['region'] == region)].tolist()[0]
+    if math.isnan(pue) == True:
+        if provider != 'Local/Private Infastructure':
+            st.markdown('##### The exact information isn\'t available for this datacenter! We will use your provider\'s average instead, which is:')
+            if provider == 'AWS':
+                pue = 1.135
+                st.markdown('#### ' + str(pue)+ " [(source)](https://www.cloudcarbonfootprint.org/docs/methodology/)")
+            elif provider == 'GCP':
+                pue = 1.1
+                st.markdown('#### ' + str(pue) + " [(source)](https://www.google.ca/about/datacenters/efficiency/)")
+            elif provider == 'AZURE':
+                pue = 1.185
+                st.markdown('#### ' + str(pue) + " [(source)](https://www.cloudcarbonfootprint.org/docs/methodology/)")
+            elif provider == 'OVH':
+                pue = 1.28
+                st.markdown('#### ' + str(pue) + " [(source)](https://corporate.ovhcloud.com/en-ca/sustainability/environment/)")
+            elif provider == 'SCALEWAY':
+                pue = 1.35
+                st.markdown('#### ' +str(pue) + " [(source)](https://pue.dc3.scaleway.com/en/)")
+        else:
+            st.markdown('##### Try to find the PUE of your local infrastructure. Otherwise, you can use the industry average, 1.58:')
+            pue = st.number_input('Total number of GPU hours', value = 1.58)
+    else:
+        st.markdown('##### The PUE of the datacenter you used is: ')
+        st.markdown('#### '+ str(pue))
+    pue_emissions = round((experimental_emissions+ dynamic_emissions)*pue)
+    st.metric(label="Emissions considering PUE", value=str(pue_emissions)+' kilograms of CO2eq')
+st.markdown('### Embodied Emissions 🖥️🔨')
+st.markdown('##### These are the emissions associated with the materials and processes involved in producing'
+            ' the computing equipment needed for AI models.')
+with st.expander("Calculate the embodied emissions of your model"):
+        st.markdown('##### These are the trickiest emissions to track down since a lot of the information needed is missing!')
+m = st.markdown("""
+<style>
+div.stButton > button:first-child {
+    background-color: rgb(80, 200, 120);
+    font-size: 20px;
+    height: 3em;
+}
+</style>""", unsafe_allow_html=True)
+buttoncol1, cuttoncol2, buttoncol3 = st.columns(3)
+with cuttoncol2:
+    st.button(label="Anonymously share my data!", on_click = lambda *args: write_to_csv(hardware, training_time, provider, carbon_intensity, dynamic_emissions))
+st.markdown('### Methodology')
 with st.expander("More information about our Methodology"):
     st.markdown('Building on the work of the [ML CO2 Calculator](https://mlco2.github.io/impact/), this tool allows you to consider'
                 ' other aspects of your model\'s carbon footprint based on the LCA methodology.')
     st.image('images/LCA_CO2.png', caption='The LCA methodology - the parts in green are those we focus on.')