sashavor commited on
Commit
ce8bd36
β€’
1 Parent(s): 795ccdc

lotsa changes

Browse files
Files changed (1) hide show
  1. app.py +75 -30
app.py CHANGED
@@ -2,10 +2,11 @@ import streamlit as st
2
  import pandas as pd
3
  import os, csv
4
  from huggingface_hub import hf_hub_download, HfApi
 
5
 
6
  HF_TOKEN = os.getenv('HUGGING_FACE_HUB_TOKEN')
7
 
8
- CACHED_FILE_PATH = hf_hub_download(repo_id="sasha/co2_submissions", filename="dynamic_emissions.csv", repo_type="dataset")
9
 
10
  api = HfApi()
11
 
@@ -15,18 +16,18 @@ def write_to_csv(hardware, training_time, provider, carbon_intensity, dynamic_em
15
  writer.writerow([hardware, training_time, provider, carbon_intensity, dynamic_emissions])
16
  api.upload_file(
17
  path_or_fileobj=CACHED_FILE_PATH,
18
- path_in_repo="dynamic_emissions.csv",
19
  repo_id="sasha/co2_submissions",
20
  repo_type="dataset",
21
  )
22
 
23
-
24
-
25
  st.set_page_config(
26
  page_title="AI Carbon Calculator",
27
  layout="wide",
28
  )
29
 
 
 
30
  tdp_url = "https://raw.githubusercontent.com/mlco2/impact/master/data/gpus.csv"
31
  compute_url = "https://raw.githubusercontent.com/mlco2/impact/master/data/impact.csv"
32
 
@@ -38,9 +39,8 @@ server_sheet_name = "Server%20Carbon%20Footprint"
38
  server_url = f"https://docs.google.com/spreadsheets/d/{server_sheet_id}/gviz/tq?tqx=out:csv&sheet={server_sheet_name}"
39
 
40
 
41
- embodied_gpu_sheet_id = "1DqYgQnEDLQVQm5acMAhLgHLD8xXCG9BIrk-_Nv6jF3k"
42
  embodied_gpu_sheet_name = "Scope%203%20Ratios"
43
- embodied_gpu_url = f"https://docs.google.com/spreadsheets/d/{embodied_gpu_sheet_id}/gviz/tq?tqx=out:csv&sheet={embodied_gpu_sheet_name}"
44
 
45
  TDP =pd.read_csv(tdp_url)
46
 
@@ -52,18 +52,21 @@ kg_per_mile = 0.348
52
 
53
  electricity = pd.read_csv(electricity_url)
54
  servers = pd.read_csv(server_url)
 
55
  embodied_gpu = pd.read_csv(embodied_gpu_url)
 
56
 
57
  #st.image('images/MIT_carbon_image_narrow.png', use_column_width=True, caption = 'Image credit: ')
58
  st.title("AI Carbon Calculator")
59
 
60
- st.markdown('## Estimate your model\'s CO2 carbon footprint!')
61
 
62
- st.markdown('##### You can use this tool to calculate different aspects of your model\'s carbon footprint.')
63
- st.markdown('##### Share your data to help us get a better idea of AI model\'s carbon emissions.')
 
64
 
65
- st.markdown('### Dynamic Emissions')
66
- st.markdown('##### These are the carbon emissions produced by generating the electricity necessary for powering model training')
67
  with st.expander("Calculate the dynamic emissions of your model"):
68
  col1, col2, col3, col4 = st.columns(4)
69
  with col1:
@@ -91,31 +94,73 @@ with st.expander("Calculate the dynamic emissions of your model"):
91
  st.metric(label="Dynamic emissions", value=str(dynamic_emissions)+' kilograms of CO2eq')
92
  st.markdown('This is roughly equivalent to '+ str(round(dynamic_emissions/kg_per_mile,1)) + ' miles driven in an average US car'
93
  ' produced in 2021. [(Source: energy.gov)](https://www.energy.gov/eere/vehicles/articles/fotw-1223-january-31-2022-average-carbon-dioxide-emissions-2021-model-year)')
94
- st.button(label="Anonymously share my data", help="Share the data from your model anonymously for research purposes!",\
95
- on_click = lambda *args: write_to_csv(hardware, training_time, provider, carbon_intensity, dynamic_emissions))
96
-
97
- st.markdown('### Idle Emissions')
98
- st.markdown('##### These are the emissions produced by generating the electricity needed to power the rest of the infrastructure'
99
- 'used for model training -- the datacenter, network, heating/cooling, storage, etc.')
100
-
101
-
102
 
103
- st.markdown('### Idle Emissions')
 
 
 
 
 
 
 
 
 
 
 
104
  st.markdown('##### These are the emissions produced by generating the electricity needed to power the rest of the infrastructure'
105
  'used for model training -- the datacenter, network, heating/cooling, storage, etc.')
106
  with st.expander("Calculate the idle emissions of your model"):
107
- st.markdown('Do you know what the PUE (Power Usage Effectiveness) of your infrastructure is?')
108
-
109
-
110
-
111
- st.markdown('### Embodied Emissions')
112
- st.markdown('Choose your hardware, runtime and cloud provider/physical infrastructure to estimate the carbon impact of your research.')
113
-
114
-
 
 
 
 
 
 
 
 
 
 
 
 
 
115
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
116
  with st.expander("More information about our Methodology"):
117
  st.markdown('Building on the work of the [ML CO2 Calculator](https://mlco2.github.io/impact/), this tool allows you to consider'
118
  ' other aspects of your model\'s carbon footprint based on the LCA methodology.')
119
-
120
-
121
  st.image('images/LCA_CO2.png', caption='The LCA methodology - the parts in green are those we focus on.')
 
2
  import pandas as pd
3
  import os, csv
4
  from huggingface_hub import hf_hub_download, HfApi
5
+ import math
6
 
7
  HF_TOKEN = os.getenv('HUGGING_FACE_HUB_TOKEN')
8
 
9
+ CACHED_FILE_PATH = hf_hub_download(repo_id="sasha/co2_submissions", filename="co2_emissions.csv", repo_type="dataset")
10
 
11
  api = HfApi()
12
 
 
16
  writer.writerow([hardware, training_time, provider, carbon_intensity, dynamic_emissions])
17
  api.upload_file(
18
  path_or_fileobj=CACHED_FILE_PATH,
19
+ path_in_repo="co2_emissions.csv",
20
  repo_id="sasha/co2_submissions",
21
  repo_type="dataset",
22
  )
23
 
 
 
24
  st.set_page_config(
25
  page_title="AI Carbon Calculator",
26
  layout="wide",
27
  )
28
 
29
+
30
+
31
  tdp_url = "https://raw.githubusercontent.com/mlco2/impact/master/data/gpus.csv"
32
  compute_url = "https://raw.githubusercontent.com/mlco2/impact/master/data/impact.csv"
33
 
 
39
  server_url = f"https://docs.google.com/spreadsheets/d/{server_sheet_id}/gviz/tq?tqx=out:csv&sheet={server_sheet_name}"
40
 
41
 
 
42
  embodied_gpu_sheet_name = "Scope%203%20Ratios"
43
+ embodied_gpu_url = f"https://docs.google.com/spreadsheets/d/{server_sheet_id}/gviz/tq?tqx=out:csv&sheet={embodied_gpu_sheet_name}"
44
 
45
  TDP =pd.read_csv(tdp_url)
46
 
 
52
 
53
  electricity = pd.read_csv(electricity_url)
54
  servers = pd.read_csv(server_url)
55
+ #print(servers.columns)
56
  embodied_gpu = pd.read_csv(embodied_gpu_url)
57
+ #print(embodied_gpu.columns)
58
 
59
  #st.image('images/MIT_carbon_image_narrow.png', use_column_width=True, caption = 'Image credit: ')
60
  st.title("AI Carbon Calculator")
61
 
62
+ st.markdown('## Estimate your AI model\'s CO2 carbon footprint! 🌎πŸ–₯️🌎')
63
 
64
+ st.markdown('##### The calculators below will help you calculate different aspects of your model\'s carbon footprint, as we did for'
65
+ ' BLOOM 🌸, a 176-billion parameter language model [(see our preprint!)](https://arxiv.org/abs/2211.02001)')
66
+ st.markdown('##### Don\'t forget to share your data to help us get a better idea of AI model\'s carbon emissions!')
67
 
68
+ st.markdown('### Dynamic Emissions πŸš€')
69
+ st.markdown('##### These are the emissions produced by generating the electricity necessary for powering model training.')
70
  with st.expander("Calculate the dynamic emissions of your model"):
71
  col1, col2, col3, col4 = st.columns(4)
72
  with col1:
 
94
  st.metric(label="Dynamic emissions", value=str(dynamic_emissions)+' kilograms of CO2eq')
95
  st.markdown('This is roughly equivalent to '+ str(round(dynamic_emissions/kg_per_mile,1)) + ' miles driven in an average US car'
96
  ' produced in 2021. [(Source: energy.gov)](https://www.energy.gov/eere/vehicles/articles/fotw-1223-january-31-2022-average-carbon-dioxide-emissions-2021-model-year)')
 
 
 
 
 
 
 
 
97
 
98
+ st.markdown('### Experimental Emissions πŸ‘©β€πŸ”¬')
99
+ st.markdown('##### These are the emissions produced by generating the electricity necessary for powering the experiments and tests needed to pick your final model architecture '
100
+ 'and parameters.')
101
+ with st.expander("Calculate the experimental emissions of your model"):
102
+ st.markdown('##### Consult your training logs to figure out how many ablations, baselines and experiments were run before converging on the final model.')
103
+ experimentation_time = st.number_input(label='Number of hours of experimentation run', value=training_time)
104
+ st.markdown('##### As a baseline, language models such as [OPT](https://arxiv.org/pdf/2205.01068.pdf) and [BLOOM](https://arxiv.org/abs/2211.02001)'
105
+ ' found that experimentation roughly doubles the amount of compute used by training the model itself.')
106
+ experimental_emissions = round(gpu_tdp * (experimentation_time) * carbon_intensity/1000000)
107
+ st.metric(label="Experimental emissions", value=str(0.0)+' kilograms of CO2eq')
108
+
109
+ st.markdown('### Idle Emissions 🌐')
110
  st.markdown('##### These are the emissions produced by generating the electricity needed to power the rest of the infrastructure'
111
  'used for model training -- the datacenter, network, heating/cooling, storage, etc.')
112
  with st.expander("Calculate the idle emissions of your model"):
113
+ st.markdown('##### A proxy often used to reflect idle emissions is PUE (Power Usage Effectiveness), which represents '
114
+ ' the ratio of energy used for computing overheads like cooling, which varies depending on the data center.')
115
+ pue = instances['PUE'][(instances['provider'] == provider.lower()) & (instances['region'] == region)].tolist()[0]
116
+ if math.isnan(pue) == True:
117
+ if provider != 'Local/Private Infastructure':
118
+ st.markdown('##### The exact information isn\'t available for this datacenter! We will use your provider\'s average instead, which is:')
119
+ if provider == 'AWS':
120
+ pue = 1.135
121
+ st.markdown('#### ' + str(pue)+ " [(source)](https://www.cloudcarbonfootprint.org/docs/methodology/)")
122
+ elif provider == 'GCP':
123
+ pue = 1.1
124
+ st.markdown('#### ' + str(pue) + " [(source)](https://www.google.ca/about/datacenters/efficiency/)")
125
+ elif provider == 'AZURE':
126
+ pue = 1.185
127
+ st.markdown('#### ' + str(pue) + " [(source)](https://www.cloudcarbonfootprint.org/docs/methodology/)")
128
+ elif provider == 'OVH':
129
+ pue = 1.28
130
+ st.markdown('#### ' + str(pue) + " [(source)](https://corporate.ovhcloud.com/en-ca/sustainability/environment/)")
131
+ elif provider == 'SCALEWAY':
132
+ pue = 1.35
133
+ st.markdown('#### ' +str(pue) + " [(source)](https://pue.dc3.scaleway.com/en/)")
134
 
135
+ else:
136
+ st.markdown('##### Try to find the PUE of your local infrastructure. Otherwise, you can use the industry average, 1.58:')
137
+ pue = st.number_input('Total number of GPU hours', value = 1.58)
138
+ else:
139
+ st.markdown('##### The PUE of the datacenter you used is: ')
140
+ st.markdown('#### '+ str(pue))
141
+ pue_emissions = round((experimental_emissions+ dynamic_emissions)*pue)
142
+ st.metric(label="Emissions considering PUE", value=str(pue_emissions)+' kilograms of CO2eq')
143
+
144
+ st.markdown('### Embodied Emissions πŸ–₯οΈπŸ”¨')
145
+ st.markdown('##### These are the emissions associated with the materials and processes involved in producing'
146
+ ' the computing equipment needed for AI models.')
147
+ with st.expander("Calculate the embodied emissions of your model"):
148
+ st.markdown('##### These are the trickiest emissions to track down since a lot of the information needed is missing!')
149
+
150
+ m = st.markdown("""
151
+ <style>
152
+ div.stButton > button:first-child {
153
+ background-color: rgb(80, 200, 120);
154
+ font-size: 20px;
155
+ height: 3em;
156
+ }
157
+ </style>""", unsafe_allow_html=True)
158
+ buttoncol1, cuttoncol2, buttoncol3 = st.columns(3)
159
+ with cuttoncol2:
160
+ st.button(label="Anonymously share my data!", on_click = lambda *args: write_to_csv(hardware, training_time, provider, carbon_intensity, dynamic_emissions))
161
+
162
+ st.markdown('### Methodology')
163
  with st.expander("More information about our Methodology"):
164
  st.markdown('Building on the work of the [ML CO2 Calculator](https://mlco2.github.io/impact/), this tool allows you to consider'
165
  ' other aspects of your model\'s carbon footprint based on the LCA methodology.')
 
 
166
  st.image('images/LCA_CO2.png', caption='The LCA methodology - the parts in green are those we focus on.')