sashavor commited on
Commit
419df8a
โ€ข
1 Parent(s): a23bf4a

big overhaul

Browse files
Files changed (1) hide show
  1. app.py +55 -24
app.py CHANGED
@@ -10,10 +10,10 @@ CACHED_FILE_PATH = hf_hub_download(repo_id="sasha/co2_submissions", filename="co
10
 
11
  api = HfApi()
12
 
13
- def write_to_csv(hardware, training_time, provider, carbon_intensity, dynamic_emissions):
14
  with open(CACHED_FILE_PATH,'a', newline='') as f:
15
  writer = csv.writer(f)
16
- writer.writerow([hardware, training_time, provider, carbon_intensity, dynamic_emissions])
17
  api.upload_file(
18
  path_or_fileobj=CACHED_FILE_PATH,
19
  path_in_repo="co2_emissions.csv",
@@ -48,39 +48,55 @@ instances = pd.read_csv(compute_url)
48
  providers = [p.upper() for p in instances['provider'].unique().tolist()]
49
  providers.append('Local/Private Infastructure')
50
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
51
  kg_per_mile = 0.348
 
52
 
53
  electricity = pd.read_csv(electricity_url)
54
  servers = pd.read_csv(server_url)
55
- #print(servers.columns)
56
  embodied_gpu = pd.read_csv(embodied_gpu_url)
57
- #print(embodied_gpu.columns)
58
-
59
  #st.image('images/MIT_carbon_image_narrow.png', use_column_width=True, caption = 'Image credit: ')
60
  st.title("AI Carbon Calculator")
61
 
62
  st.markdown('## Estimate your AI model\'s CO2 carbon footprint! ๐ŸŒŽ๐Ÿ–ฅ๏ธ๐ŸŒŽ')
63
-
64
- st.markdown('##### The calculators below will help you calculate different aspects of your model\'s carbon footprint, as we did for'
65
- ' BLOOM ๐ŸŒธ, a 176-billion parameter language model [(see our preprint!)](https://arxiv.org/abs/2211.02001)')
66
- st.markdown('##### Don\'t forget to share your data to help us get a better idea of AI model\'s carbon emissions!')
67
 
68
  st.markdown('### Dynamic Emissions ๐Ÿš€')
69
  st.markdown('##### These are the emissions produced by generating the electricity necessary for powering model training.')
70
  with st.expander("Calculate the dynamic emissions of your model"):
71
- col1, col2, col3, col4 = st.columns(4)
72
  with col1:
73
- hardware = st.selectbox('GPU used', TDP['name'].tolist())
74
  gpu_tdp = TDP['tdp_watts'][TDP['name'] == hardware].tolist()[0]
75
- st.markdown("Different GPUs have different TDP (Thermal Design Power), which impacts how much energy you use.")
76
  with col2:
77
- training_time = st.number_input('Total number of GPU hours')
78
- st.markdown('This is calculated by multiplying the number of GPUs you used by the training time: '
79
- 'i.e. if you used 100 GPUs for 10 hours, this is equal to 100x10 = 1,000 GPU hours.')
80
  with col3:
 
 
81
  provider = st.selectbox('Provider used', providers)
82
  st.markdown('If you can\'t find your provider here, select "Local/Private Infrastructure".')
83
- with col4:
84
  if provider != 'Local/Private Infastructure':
85
  provider_instances = instances['region'][instances['provider'] == provider.lower()].unique().tolist()
86
  region = st.selectbox('Provider used', provider_instances)
@@ -90,7 +106,7 @@ with st.expander("Calculate the dynamic emissions of your model"):
90
  carbon_intensity = st.number_input('Carbon intensity of your energy grid, in grams of CO2 per kWh')
91
  st.markdown('You can consult a resource like the [IEA](https://www.iea.org/countries) or '
92
  ' [Electricity Map](https://app.electricitymaps.com/) to get this information.')
93
- dynamic_emissions = round(gpu_tdp * training_time * carbon_intensity/1000000)
94
  st.metric(label="Dynamic emissions", value=str(dynamic_emissions)+' kilograms of CO2eq')
95
  st.markdown('This is roughly equivalent to '+ str(round(dynamic_emissions/kg_per_mile,1)) + ' miles driven in an average US car'
96
  ' produced in 2021. [(Source: energy.gov)](https://www.energy.gov/eere/vehicles/articles/fotw-1223-january-31-2022-average-carbon-dioxide-emissions-2021-model-year)')
@@ -106,7 +122,7 @@ with st.expander("Calculate the experimental emissions of your model"):
106
  experimental_emissions = round(gpu_tdp * (experimentation_time) * carbon_intensity/1000000)
107
  st.metric(label="Experimental emissions", value=str(0.0)+' kilograms of CO2eq')
108
 
109
- st.markdown('### Idle Emissions ๐ŸŒ')
110
  st.markdown('##### These are the emissions produced by generating the electricity needed to power the rest of the infrastructure'
111
  'used for model training -- the datacenter, network, heating/cooling, storage, etc.')
112
  with st.expander("Calculate the idle emissions of your model"):
@@ -134,7 +150,7 @@ with st.expander("Calculate the idle emissions of your model"):
134
 
135
  else:
136
  st.markdown('##### Try to find the PUE of your local infrastructure. Otherwise, you can use the industry average, 1.58:')
137
- pue = st.number_input('Total number of GPU hours', value = 1.58)
138
  else:
139
  st.markdown('##### The PUE of the datacenter you used is: ')
140
  st.markdown('#### '+ str(pue))
@@ -145,23 +161,38 @@ st.markdown('### Embodied Emissions ๐Ÿ–ฅ๏ธ๐Ÿ”จ')
145
  st.markdown('##### These are the emissions associated with the materials and processes involved in producing'
146
  ' the computing equipment needed for AI models.')
147
  with st.expander("Calculate the embodied emissions of your model"):
148
- st.markdown('##### These are the trickiest emissions to track down since a lot of the information needed is missing!')
 
 
 
 
 
 
 
 
 
 
 
 
 
149
 
150
  m = st.markdown("""
151
  <style>
152
  div.stButton > button:first-child {
153
  background-color: rgb(80, 200, 120);
154
  background-image: none;
155
- font-size: 20px;
156
  height: 3em;
 
157
  }
158
  </style>""", unsafe_allow_html=True)
159
- buttoncol1, cuttoncol2, buttoncol3 = st.columns(3)
160
- with cuttoncol2:
161
- st.button(label="Anonymously share my data!", on_click = lambda *args: write_to_csv(hardware, training_time, provider, carbon_intensity, dynamic_emissions))
162
 
163
  st.markdown('### Methodology')
164
  with st.expander("More information about our Methodology"):
165
  st.markdown('Building on the work of the [ML CO2 Calculator](https://mlco2.github.io/impact/), this tool allows you to consider'
166
  ' other aspects of your model\'s carbon footprint based on the LCA methodology.')
 
167
  st.image('images/LCA_CO2.png', caption='The LCA methodology - the parts in green are those we focus on.')
 
10
 
11
  api = HfApi()
12
 
13
+ def write_to_csv(hardware, gpu_tdp, num_gpus, training_time, provider, carbon_intensity, dynamic_emissions, experimentation_time, experimental_emissions, pue, pue_emissions, embodied_type, embodied_emissions, model_info):
14
  with open(CACHED_FILE_PATH,'a', newline='') as f:
15
  writer = csv.writer(f)
16
+ writer.writerow([hardware, gpu_tdp, num_gpus, training_time, provider, carbon_intensity, dynamic_emissions, experimentation_time, experimental_emissions, pue, pue_emissions, embodied_type, embodied_emissions, model_info])
17
  api.upload_file(
18
  path_or_fileobj=CACHED_FILE_PATH,
19
  path_in_repo="co2_emissions.csv",
 
48
  providers = [p.upper() for p in instances['provider'].unique().tolist()]
49
  providers.append('Local/Private Infastructure')
50
 
51
+ ### Default values
52
+ hardware = "N/A"
53
+ gpu_tdp = 0
54
+ num_gpus = 0
55
+ training_time = 0.0
56
+ provider = "N/A"
57
+ carbon_intensity = 0.0
58
+ dynamic_emissions = 0.0
59
+ experimentation_time = 0.0
60
+ experimental_emissions = 0.0
61
+ pue = 1.0
62
+ pue_emissions = 0.0
63
+ embodied_type = 0.0
64
+ embodied_emissions = 0.0
65
+ model_info = "N/A"
66
+
67
+ ### Conversion factors
68
  kg_per_mile = 0.348
69
+ embodied_conversion_factor = 0.0289
70
 
71
  electricity = pd.read_csv(electricity_url)
72
  servers = pd.read_csv(server_url)
 
73
  embodied_gpu = pd.read_csv(embodied_gpu_url)
 
 
74
  #st.image('images/MIT_carbon_image_narrow.png', use_column_width=True, caption = 'Image credit: ')
75
  st.title("AI Carbon Calculator")
76
 
77
  st.markdown('## Estimate your AI model\'s CO2 carbon footprint! ๐ŸŒŽ๐Ÿ–ฅ๏ธ๐ŸŒŽ')
78
+ st.markdown('### Calculating the carbon footprint of AI models can be hard... this tool is here to help!')
79
+ st.markdown('##### Use the calculators below to calculate different aspects of your model\'s carbon footprint' \
80
+ 'and don\'t forget to share your data to help the community better understand the carbon emissions of AI!')
 
81
 
82
  st.markdown('### Dynamic Emissions ๐Ÿš€')
83
  st.markdown('##### These are the emissions produced by generating the electricity necessary for powering model training.')
84
  with st.expander("Calculate the dynamic emissions of your model"):
85
+ col1, col2, col3, col4, col5 = st.columns(5)
86
  with col1:
87
+ hardware = st.selectbox('Hardware used', TDP['name'].tolist())
88
  gpu_tdp = TDP['tdp_watts'][TDP['name'] == hardware].tolist()[0]
89
+ st.markdown("Different hardware has different TDP (Thermal Design Power), which impacts how much energy you use.")
90
  with col2:
91
+ num_gpus = st.number_input('Number of GPUs/CPUs/TPUs used', value = 16)
92
+ #st.markdown('This is calculated by multiplying the number of GPUs you used by the training time: '
93
+ # 'i.e. if you used 100 GPUs for 10 hours, this is equal to 100x10 = 1,000 GPU hours.')
94
  with col3:
95
+ training_time = st.number_input('Total training time (in hours)', value = 0.0)
96
+ with col4:
97
  provider = st.selectbox('Provider used', providers)
98
  st.markdown('If you can\'t find your provider here, select "Local/Private Infrastructure".')
99
+ with col5:
100
  if provider != 'Local/Private Infastructure':
101
  provider_instances = instances['region'][instances['provider'] == provider.lower()].unique().tolist()
102
  region = st.selectbox('Provider used', provider_instances)
 
106
  carbon_intensity = st.number_input('Carbon intensity of your energy grid, in grams of CO2 per kWh')
107
  st.markdown('You can consult a resource like the [IEA](https://www.iea.org/countries) or '
108
  ' [Electricity Map](https://app.electricitymaps.com/) to get this information.')
109
+ dynamic_emissions = round(gpu_tdp * num_gpus*training_time * carbon_intensity/1000000)
110
  st.metric(label="Dynamic emissions", value=str(dynamic_emissions)+' kilograms of CO2eq')
111
  st.markdown('This is roughly equivalent to '+ str(round(dynamic_emissions/kg_per_mile,1)) + ' miles driven in an average US car'
112
  ' produced in 2021. [(Source: energy.gov)](https://www.energy.gov/eere/vehicles/articles/fotw-1223-january-31-2022-average-carbon-dioxide-emissions-2021-model-year)')
 
122
  experimental_emissions = round(gpu_tdp * (experimentation_time) * carbon_intensity/1000000)
123
  st.metric(label="Experimental emissions", value=str(0.0)+' kilograms of CO2eq')
124
 
125
+ st.markdown('### Datacenter (Overhead) Emissions ๐ŸŒ')
126
  st.markdown('##### These are the emissions produced by generating the electricity needed to power the rest of the infrastructure'
127
  'used for model training -- the datacenter, network, heating/cooling, storage, etc.')
128
  with st.expander("Calculate the idle emissions of your model"):
 
150
 
151
  else:
152
  st.markdown('##### Try to find the PUE of your local infrastructure. Otherwise, you can use the industry average, 1.58:')
153
+ pue = st.slider('Total number of GPU hours', value = 1.58)
154
  else:
155
  st.markdown('##### The PUE of the datacenter you used is: ')
156
  st.markdown('#### '+ str(pue))
 
161
  st.markdown('##### These are the emissions associated with the materials and processes involved in producing'
162
  ' the computing equipment needed for AI models.')
163
  with st.expander("Calculate the embodied emissions of your model"):
164
+ st.markdown('These are the trickiest emissions to track down since a lot of the information needed is missing.')
165
+ st.markdown('##### Based on the number of GPUs and training time you indicated above, we can estimate that your model\'s embodied emissions are approximately: ')
166
+ hardware_type = TDP['type'][TDP['name'] == hardware].tolist()[0]
167
+ if hardware_type == 'cpu':
168
+ embodied_type = embodied_gpu['Value'][embodied_gpu['Ratio']=='Manufacturing emissions per additional CPU (kgCOโ‚‚eq)'].tolist()[0]
169
+ elif hardware_type == 'gpu' or hardware_type == 'tpu':
170
+ embodied_type = embodied_gpu['Value'][embodied_gpu['Ratio']=='Manufacturing emissions per additionnal GPU Card (kgCOโ‚‚eq)'].tolist()[0]
171
+ embodied_emissions = round(int(embodied_type)*embodied_conversion_factor*num_gpus*training_time/1000,1)
172
+ st.metric(label="Embodied emissions", value=str(embodied_emissions)+' kilograms of CO2eq')
173
+ st.markdown('This is a high-level estimate based on an hourly manufacturing emissions conversion factor (linearly ammortised) of 0.0289 [(source)](https://docs.google.com/spreadsheets/d/1DqYgQnEDLQVQm5acMAhLgHLD8xXCG9BIrk-_Nv6jF3k/).')
174
+
175
+ st.markdown('### Model Information โ„น๏ธ')
176
+ st.markdown('##### If you want to share the link to your model code or paper, please do so below! Otherwise, your submission will be anonymous.')
177
+ model_info = st.text_input(label= "Enter a link to your model (optional)")
178
 
179
  m = st.markdown("""
180
  <style>
181
  div.stButton > button:first-child {
182
  background-color: rgb(80, 200, 120);
183
  background-image: none;
184
+ font-size: 25px;
185
  height: 3em;
186
+ width: 15em;
187
  }
188
  </style>""", unsafe_allow_html=True)
189
+ buttoncol1, buttoncol2, buttoncol3 = st.columns(3)
190
+ with buttoncol2:
191
+ st.button(label="Share my CO2 data!", on_click = lambda *args: write_to_csv(hardware, gpu_tdp, num_gpus, training_time, provider, carbon_intensity, dynamic_emissions, experimentation_time, experimental_emissions, pue, pue_emissions, embodied_type, embodied_emissions, model_info))
192
 
193
  st.markdown('### Methodology')
194
  with st.expander("More information about our Methodology"):
195
  st.markdown('Building on the work of the [ML CO2 Calculator](https://mlco2.github.io/impact/), this tool allows you to consider'
196
  ' other aspects of your model\'s carbon footprint based on the LCA methodology.')
197
+ st.markdown('We considered all of these aspects when calculating the CO2 emissions of BLOOM ๐ŸŒธ, a 176-billion parameter language model [(see our preprint!)](https://arxiv.org/abs/2211.02001)'')')
198
  st.image('images/LCA_CO2.png', caption='The LCA methodology - the parts in green are those we focus on.')