sashavor commited on
Commit
1f6c998
β€’
1 Parent(s): 11ad112

fixing bugs

Browse files
Files changed (1) hide show
  1. app.py +15 -10
app.py CHANGED
@@ -11,6 +11,7 @@ CACHED_FILE_PATH = hf_hub_download(repo_id="sasha/co2_submissions", filename="co
11
  api = HfApi()
12
 
13
  def write_to_csv(hardware, gpu_tdp, num_gpus, training_time, provider, carbon_intensity, dynamic_emissions, experimentation_time, experimental_emissions, pue, pue_emissions, embodied_type, embodied_emissions, model_info):
 
14
  with open(CACHED_FILE_PATH,'a', newline='') as f:
15
  writer = csv.writer(f)
16
  writer.writerow([hardware, gpu_tdp, num_gpus, training_time, provider, carbon_intensity, dynamic_emissions, experimentation_time, experimental_emissions, pue, pue_emissions, embodied_type, embodied_emissions, model_info])
@@ -67,6 +68,7 @@ model_info = "N/A"
67
  ### Conversion factors
68
  kg_per_mile = 0.348
69
  embodied_conversion_factor = 0.0289
 
70
 
71
  electricity = pd.read_csv(electricity_url)
72
  servers = pd.read_csv(server_url)
@@ -86,47 +88,46 @@ with st.expander("Calculate the dynamic emissions of your model"):
86
  with col1:
87
  hardware = st.selectbox('Hardware used', TDP['name'].tolist())
88
  gpu_tdp = TDP['tdp_watts'][TDP['name'] == hardware].tolist()[0]
89
- st.markdown("Different hardware has different TDP (Thermal Design Power), which impacts how much energy you use.")
90
  with col2:
91
  num_gpus = st.text_input('Number of GPUs/CPUs/TPUs used', value = 16)
92
- #st.markdown('This is calculated by multiplying the number of GPUs you used by the training time: '
93
- # 'i.e. if you used 100 GPUs for 10 hours, this is equal to 100x10 = 1,000 GPU hours.')
94
  with col3:
95
  training_time = st.number_input('Total training time (in hours)', value = 0.0)
 
96
  with col4:
97
  provider = st.selectbox('Provider used', providers)
98
  st.markdown('If you can\'t find your provider here, select "Local/Private Infrastructure".')
99
  with col5:
100
  if provider != 'Local/Private Infastructure':
101
  provider_instances = instances['region'][instances['provider'] == provider.lower()].unique().tolist()
102
- region = st.selectbox('Provider used', provider_instances)
103
  carbon_intensity = instances['impact'][(instances['provider'] == provider.lower()) & (instances['region'] == region)].tolist()[0]
104
-
105
  else:
106
  carbon_intensity = st.number_input('Carbon intensity of your energy grid, in grams of CO2 per kWh')
107
  st.markdown('You can consult a resource like the [IEA](https://www.iea.org/countries) or '
108
  ' [Electricity Map](https://app.electricitymaps.com/) to get this information.')
109
  dynamic_emissions = round(gpu_tdp * float(num_gpus)*training_time * carbon_intensity/1000000)
110
  st.metric(label="Dynamic emissions", value=str(dynamic_emissions)+' kilograms of CO2eq')
111
- st.markdown('This is roughly equivalent to '+ str(round(dynamic_emissions/kg_per_mile,1)) + ' miles driven in an average US car'
112
  ' produced in 2021. [(Source: energy.gov)](https://www.energy.gov/eere/vehicles/articles/fotw-1223-january-31-2022-average-carbon-dioxide-emissions-2021-model-year)')
113
 
114
  st.markdown('### Experimental Emissions πŸ‘©β€πŸ”¬')
115
  st.markdown('##### These are the emissions produced by generating the electricity necessary for powering the experiments and tests needed to pick your final model architecture '
116
  'and parameters.')
117
  with st.expander("Calculate the experimental emissions of your model"):
118
- st.markdown('##### Consult your training logs to figure out how many ablations, baselines and experiments were run before converging on the final model.')
119
  experimentation_time = st.number_input(label='Number of hours of experimentation run', value=training_time)
120
  st.markdown('##### As a baseline, language models such as [OPT](https://arxiv.org/pdf/2205.01068.pdf) and [BLOOM](https://arxiv.org/abs/2211.02001)'
121
  ' found that experimentation roughly doubles the amount of compute used by training the model itself.')
122
  experimental_emissions = round(gpu_tdp * (experimentation_time) * carbon_intensity/1000000)
123
- st.metric(label="Experimental emissions", value=str(0.0)+' kilograms of CO2eq')
124
 
125
  st.markdown('### Datacenter (Overhead) Emissions 🌐')
126
  st.markdown('##### These are the emissions produced by generating the electricity needed to power the rest of the infrastructure'
127
  'used for model training -- the datacenter, network, heating/cooling, storage, etc.')
128
  with st.expander("Calculate the idle emissions of your model"):
129
- st.markdown('##### A proxy often used to reflect idle emissions is PUE (Power Usage Effectiveness), which represents '
130
  ' the ratio of energy used for computing overheads like cooling, which varies depending on the data center.')
131
  pue = instances['PUE'][(instances['provider'] == provider.lower()) & (instances['region'] == region)].tolist()[0]
132
  if math.isnan(pue) == True:
@@ -186,9 +187,13 @@ div.stButton > button:first-child {
186
  width: 15em;
187
  }
188
  </style>""", unsafe_allow_html=True)
 
189
  buttoncol1, buttoncol2, buttoncol3 = st.columns(3)
190
  with buttoncol2:
191
- st.button(label="Share my CO2 data!", on_click = lambda *args: write_to_csv(hardware, gpu_tdp, num_gpus, training_time, provider, carbon_intensity, dynamic_emissions, experimentation_time, experimental_emissions, pue, pue_emissions, embodied_type, embodied_emissions, model_info))
 
 
 
192
 
193
  st.markdown('### Methodology')
194
  with st.expander("More information about our Methodology"):
 
11
  api = HfApi()
12
 
13
  def write_to_csv(hardware, gpu_tdp, num_gpus, training_time, provider, carbon_intensity, dynamic_emissions, experimentation_time, experimental_emissions, pue, pue_emissions, embodied_type, embodied_emissions, model_info):
14
+ st.session_state["is_shared"] = True
15
  with open(CACHED_FILE_PATH,'a', newline='') as f:
16
  writer = csv.writer(f)
17
  writer.writerow([hardware, gpu_tdp, num_gpus, training_time, provider, carbon_intensity, dynamic_emissions, experimentation_time, experimental_emissions, pue, pue_emissions, embodied_type, embodied_emissions, model_info])
 
68
  ### Conversion factors
69
  kg_per_mile = 0.348
70
  embodied_conversion_factor = 0.0289
71
+ st.session_state["is_shared"] = False
72
 
73
  electricity = pd.read_csv(electricity_url)
74
  servers = pd.read_csv(server_url)
 
88
  with col1:
89
  hardware = st.selectbox('Hardware used', TDP['name'].tolist())
90
  gpu_tdp = TDP['tdp_watts'][TDP['name'] == hardware].tolist()[0]
91
+ st.markdown("Different hardware has different efficiencies, which impacts how much energy you use.")
92
  with col2:
93
  num_gpus = st.text_input('Number of GPUs/CPUs/TPUs used', value = 16)
94
+ st.markdown('If you can\'t find your hardware in the list, select the closest similar model.')
 
95
  with col3:
96
  training_time = st.number_input('Total training time (in hours)', value = 0.0)
97
+ st.markdown('You can find this number in your training logs or TensorBoards')
98
  with col4:
99
  provider = st.selectbox('Provider used', providers)
100
  st.markdown('If you can\'t find your provider here, select "Local/Private Infrastructure".')
101
  with col5:
102
  if provider != 'Local/Private Infastructure':
103
  provider_instances = instances['region'][instances['provider'] == provider.lower()].unique().tolist()
104
+ region = st.selectbox('Region used', provider_instances)
105
  carbon_intensity = instances['impact'][(instances['provider'] == provider.lower()) & (instances['region'] == region)].tolist()[0]
 
106
  else:
107
  carbon_intensity = st.number_input('Carbon intensity of your energy grid, in grams of CO2 per kWh')
108
  st.markdown('You can consult a resource like the [IEA](https://www.iea.org/countries) or '
109
  ' [Electricity Map](https://app.electricitymaps.com/) to get this information.')
110
  dynamic_emissions = round(gpu_tdp * float(num_gpus)*training_time * carbon_intensity/1000000)
111
  st.metric(label="Dynamic emissions", value=str(dynamic_emissions)+' kilograms of CO2eq')
112
+ st.info('This is roughly equivalent to '+ str(round(dynamic_emissions/kg_per_mile,1)) + ' miles driven in an average US car'
113
  ' produced in 2021. [(Source: energy.gov)](https://www.energy.gov/eere/vehicles/articles/fotw-1223-january-31-2022-average-carbon-dioxide-emissions-2021-model-year)')
114
 
115
  st.markdown('### Experimental Emissions πŸ‘©β€πŸ”¬')
116
  st.markdown('##### These are the emissions produced by generating the electricity necessary for powering the experiments and tests needed to pick your final model architecture '
117
  'and parameters.')
118
  with st.expander("Calculate the experimental emissions of your model"):
119
+ st.info('Consult your training logs to figure out how many ablations, baselines and experiments were run before converging on the final model.')
120
  experimentation_time = st.number_input(label='Number of hours of experimentation run', value=training_time)
121
  st.markdown('##### As a baseline, language models such as [OPT](https://arxiv.org/pdf/2205.01068.pdf) and [BLOOM](https://arxiv.org/abs/2211.02001)'
122
  ' found that experimentation roughly doubles the amount of compute used by training the model itself.')
123
  experimental_emissions = round(gpu_tdp * (experimentation_time) * carbon_intensity/1000000)
124
+ st.metric(label="Experimental emissions", value=str(experimental_emissions)+' kilograms of CO2eq')
125
 
126
  st.markdown('### Datacenter (Overhead) Emissions 🌐')
127
  st.markdown('##### These are the emissions produced by generating the electricity needed to power the rest of the infrastructure'
128
  'used for model training -- the datacenter, network, heating/cooling, storage, etc.')
129
  with st.expander("Calculate the idle emissions of your model"):
130
+ st.info('A proxy often used to reflect idle emissions is PUE (Power Usage Effectiveness), which represents '
131
  ' the ratio of energy used for computing overheads like cooling, which varies depending on the data center.')
132
  pue = instances['PUE'][(instances['provider'] == provider.lower()) & (instances['region'] == region)].tolist()[0]
133
  if math.isnan(pue) == True:
 
187
  width: 15em;
188
  }
189
  </style>""", unsafe_allow_html=True)
190
+
191
  buttoncol1, buttoncol2, buttoncol3 = st.columns(3)
192
  with buttoncol2:
193
+ if st.session_state["is_shared"] == False:
194
+ submitted = st.button(label="Share my CO2 data!", on_click = lambda *args: write_to_csv(hardware, gpu_tdp, num_gpus, training_time, provider, carbon_intensity, dynamic_emissions, experimentation_time, experimental_emissions, pue, pue_emissions, embodied_type, embodied_emissions, model_info))
195
+ else:
196
+ st.info('Thank you! Your data has been shared in https://huggingface.co/datasets/sasha/co2_submissions.')
197
 
198
  st.markdown('### Methodology')
199
  with st.expander("More information about our Methodology"):