mbuuck commited on
Commit
12ca36b
1 Parent(s): f9eaf69

Working calculator

Browse files
Files changed (4) hide show
  1. .gitignore +2 -0
  2. app.py +313 -17
  3. conda_environment.yml +3 -2
  4. indices.yaml +169 -0
.gitignore CHANGED
@@ -1,3 +1,5 @@
1
  .venv
2
  __pycache__/
3
  service_account.json
 
 
 
1
  .venv
2
  __pycache__/
3
  service_account.json
4
+ ee_service_account.json
5
+ md_service_token.txt
app.py CHANGED
@@ -1,17 +1,286 @@
1
  import gradio as gr
2
  import plotly.graph_objects as go
3
- from datasets import load_dataset
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4
  import ee
5
  # import geemap
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
 
7
- # GEE
8
- service_account = 'climatebase-july-2023@ee-geospatialml-aquarry.iam.gserviceaccount.com'
9
- credentials = ee.ServiceAccountCredentials(service_account, 'service_account.json')
10
- ee.Initialize(credentials)
 
 
 
 
 
 
11
 
12
- # Gradio dataset
13
- dataset = load_dataset("gradio/NYC-Airbnb-Open-Data", split="train")
14
- df = dataset.to_pandas()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
 
16
  def filter_map(min_price, max_price, boroughs):
17
 
@@ -49,16 +318,43 @@ def filter_map(min_price, max_price, boroughs):
49
  return fig
50
 
51
  with gr.Blocks() as demo:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
52
  with gr.Column():
 
 
 
 
 
 
 
53
  with gr.Row():
54
- min_price = gr.Number(value=250, label="Project Name")
55
- max_price = gr.Number(value=1000, label="Project Description")
56
- boroughs = gr.CheckboxGroup(choices=["Queens", "Brooklyn", "Manhattan", "Bronx", "Staten Island"], value=["Queens", "Brooklyn"], label="Select Methodology:")
57
- btn = gr.Button(value="Update Filter")
58
- btn = gr.Button(value="Save")
59
- btn = gr.Button(value="Run")
60
- map = gr.Plot().style()
61
- demo.load(filter_map, [min_price, max_price, boroughs], map)
62
- btn.click(filter_map, [min_price, max_price, boroughs], map)
 
 
 
 
63
 
64
  demo.launch()
 
1
  import gradio as gr
2
  import plotly.graph_objects as go
3
+ # import ee
4
+ # # import geemap
5
+
6
+ # # GEE
7
+ # service_account = 'climatebase-july-2023@ee-geospatialml-aquarry.iam.gserviceaccount.com'
8
+ # credentials = ee.ServiceAccountCredentials(service_account, 'service_account.json')
9
+ # ee.Initialize(credentials)
10
+
11
+ # # Gradio dataset
12
+ # dataset = load_dataset("gradio/NYC-Airbnb-Open-Data", split="train")
13
+ # df = dataset.to_pandas()
14
+
15
+ import os
16
+ import duckdb
17
+ import pandas as pd
18
+ import datetime
19
  import ee
20
  # import geemap
21
+ import yaml
22
+
23
+ # Define constants
24
+ MD_SERVICE_TOKEN = 'md_service_token.txt'
25
+ # to-do: set-up with papermill parameters
26
+ DATE='2020-01-01'
27
+ YEAR = 2020
28
+ LOCATION=[-74.653370, 5.845328]
29
+ ROI_RADIUS = 20000
30
+ GEE_SERVICE_ACCOUNT = 'climatebase-july-2023@ee-geospatialml-aquarry.iam.gserviceaccount.com'
31
+ GEE_SERVICE_ACCOUNT_CREDENTIALS_FILE = 'ee_service_account.json'
32
+ INDICES_FILE = 'indices.yaml'
33
+ START_YEAR = 2015
34
+ END_YEAR = 2022
35
+
36
+ class IndexGenerator:
37
+ """
38
+ A class to generate indices and compute zonal means.
39
+
40
+ Args:
41
+ centroid (tuple): The centroid coordinates (latitude, longitude) of the region of interest.
42
+ year (int): The year for which indices are generated.
43
+ roi_radius (int, optional): The radius (in meters) for creating a buffer around the centroid as the region of interest. Defaults to 20000.
44
+ project_name (str, optional): The name of the project. Defaults to "".
45
+ map (geemap.Map, optional): Map object for mapping. Defaults to None (i.e. no map created)
46
+ """
47
+ def __init__(self,
48
+ centroid,
49
+ roi_radius,
50
+ year,
51
+ indices_file,
52
+ project_name="",
53
+ map = None,
54
+ ):
55
+ self.indices = self._load_indices(indices_file)
56
+ self.centroid = centroid
57
+ self.roi = ee.Geometry.Point(*centroid).buffer(roi_radius)
58
+ self.year = year
59
+ self.start_date = str(datetime.date(self.year, 1, 1))
60
+ self.end_date = str(datetime.date(self.year, 12, 31))
61
+ self.daterange=[self.start_date, self.end_date]
62
+ self.project_name=project_name
63
+ self.map = map
64
+ if self.map is not None:
65
+ self.show = True
66
+ else:
67
+ self.show = False
68
+
69
+ def _cloudfree(self, gee_path):
70
+ """
71
+ Internal method to generate a cloud-free composite.
72
+
73
+ Args:
74
+ gee_path (str): The path to the Google Earth Engine (GEE) image or image collection.
75
+
76
+ Returns:
77
+ ee.Image: The cloud-free composite clipped to the region of interest.
78
+ """
79
+ # Load a raw Landsat ImageCollection for a single year.
80
+ collection = (
81
+ ee.ImageCollection(gee_path)
82
+ .filterDate(*self.daterange)
83
+ .filterBounds(self.roi)
84
+ )
85
+
86
+ # Create a cloud-free composite with custom parameters for cloud score threshold and percentile.
87
+ composite_cloudfree = ee.Algorithms.Landsat.simpleComposite(**{
88
+ 'collection': collection,
89
+ 'percentile': 75,
90
+ 'cloudScoreRange': 5
91
+ })
92
+ return composite_cloudfree.clip(self.roi)
93
+
94
+ def _load_indices(self, indices_file):
95
+ # Read index configurations
96
+ with open(indices_file, 'r') as stream:
97
+ try:
98
+ return yaml.safe_load(stream)
99
+ except yaml.YAMLError as e:
100
+ print(e)
101
+ return None
102
+
103
+ def show_map(self, map=None):
104
+ if map is not None:
105
+ self.map = map
106
+ self.show = True
107
+
108
+ def disable_map(self):
109
+ self.show = False
110
+
111
+ def generate_index(self, index_config):
112
+ """
113
+ Generates an index based on the provided index configuration.
114
+
115
+ Args:
116
+ index_config (dict): Configuration for generating the index.
117
+
118
+ Returns:
119
+ ee.Image: The generated index clipped to the region of interest.
120
+ """
121
+ match index_config["gee_type"]:
122
+ case 'image':
123
+ dataset = ee.Image(index_config['gee_path']).clip(self.roi)
124
+ if index_config.get('select'):
125
+ dataset = dataset.select(index_config['select'])
126
+ case 'image_collection':
127
+ dataset = ee.ImageCollection(index_config['gee_path']).filterBounds(self.roi).map(lambda image: image.clip(self.roi)).mean()
128
+ if index_config.get('select'):
129
+ dataset = dataset.select(index_config['select'])
130
+ case 'feature_collection':
131
+ dataset = ee.Image().float().paint(ee.FeatureCollection(index_config['gee_path']), index_config['select']).clip(self.roi)
132
+ case 'algebraic':
133
+ image = self._cloudfree(index_config['gee_path'])
134
+ dataset = image.normalizedDifference(['B4', 'B3'])
135
+ case _:
136
+ dataset=None
137
+
138
+ if not dataset:
139
+ raise Exception("Failed to generate dataset.")
140
+ if self.show and index_config.get('show'):
141
+ map.addLayer(dataset, index_config['viz'], index_config['name'])
142
+ print(f"Generated index: {index_config['name']}")
143
+ return dataset
144
+
145
+ def zonal_mean_index(self, index_key):
146
+ index_config = self.indices[index_key]
147
+ dataset = self.generate_index(index_config)
148
+ # zm = self._zonal_mean(single, index_config.get('bandname') or 'constant')
149
+ out = dataset.reduceRegion(**{
150
+ 'reducer': ee.Reducer.mean(),
151
+ 'geometry': self.roi,
152
+ 'scale': 200 # map scale
153
+ }).getInfo()
154
+ if index_config.get('bandname'):
155
+ return out[index_config.get('bandname')]
156
+ return out
157
+
158
+ def generate_composite_index_df(self, indices=[]):
159
+ data={
160
+ "metric": indices,
161
+ "year":self.year,
162
+ "centroid": str(self.centroid),
163
+ "project_name": self.project_name,
164
+ "value": list(map(self.zonal_mean_index, indices)),
165
+ "area": roi.area().getInfo(), # m^2
166
+ "geojson": str(roi.getInfo()),
167
+ }
168
+
169
+ print('data', data)
170
+ df = pd.DataFrame(data)
171
+ return df
172
+
173
+ def set_up_duckdb(service_token_file=None):
174
+ print('setting up duckdb')
175
+ # use `climatebase` db
176
+ if service_token_file is not None:
177
+ with open(service_token_file, 'r') as f:
178
+ md_service_token=f.read()
179
+
180
+ os.environ['motherduck_token'] = md_service_token
181
+ con = duckdb.connect('md:climatebase')
182
+ else:
183
+ con = duckdb.connect(':climatebase:')
184
+ con.sql("USE climatebase;")
185
+
186
+ # load extensions
187
+ con.sql("""INSTALL spatial; LOAD spatial;""")
188
+
189
+ return con
190
+
191
+ def authenticate_gee(gee_service_account, gee_service_account_credentials_file):
192
+ print('authenticate_gee')
193
+ # to-do: alert if dataset filter date nan
194
+ credentials = ee.ServiceAccountCredentials(gee_service_account, gee_service_account_credentials_file)
195
+ ee.Initialize(credentials)
196
+
197
+ def load_indices(indices_file):
198
+ # Read index configurations
199
+ with open(indices_file, 'r') as stream:
200
+ try:
201
+ return yaml.safe_load(stream)
202
+ except yaml.YAMLError as e:
203
+ print(e)
204
+ return None
205
 
206
+ def create_dataframe(years, project_name):
207
+ dfs=[]
208
+ print(years)
209
+ indices = load_indices(INDICES_FILE)
210
+ for year in years:
211
+ print(year)
212
+ ig = IndexGenerator(centroid=LOCATION, roi_radius=ROI_RADIUS, year=year, indices_file=INDICES_FILE, project_name=project_name)
213
+ df = ig.generate_composite_index_df(list(indices.keys()))
214
+ dfs.append(df)
215
+ return pd.concat(dfs)
216
 
217
+ # def preview_table():
218
+ # con.sql("FROM bioindicator;").show()
219
+
220
+ # if __name__ == '__main__':
221
+
222
+
223
+ # Map = geemap.Map()
224
+
225
+
226
+ # # Create a cloud-free composite with custom parameters for cloud score threshold and percentile.
227
+ # composite_cloudfree = ee.Algorithms.Landsat.simpleComposite(**{
228
+ # 'collection': collection,
229
+ # 'percentile': 75,
230
+ # 'cloudScoreRange': 5
231
+ # })
232
+
233
+ # Map.addLayer(composite_cloudfree, {'bands': ['B4', 'B3', 'B2'], 'max': 128}, 'Custom TOA composite')
234
+ # Map.centerObject(roi, 14)
235
+
236
+
237
+ # ig = IndexGenerator(centroid=LOCATION, year=2015, indices_file=INDICES_FILE, project_name='Test Project', map=Map)
238
+ # dataset = ig.generate_index(indices['Air'])
239
+
240
+ # minMax = dataset.clip(roi).reduceRegion(
241
+ # geometry = roi,
242
+ # reducer = ee.Reducer.minMax(),
243
+ # scale= 3000,
244
+ # maxPixels= 10e3,
245
+ # )
246
+
247
+ # minMax.getInfo()
248
+ def calculate_biodiversity_score(start_year, end_year, project_name):
249
+ years = []
250
+ for year in range(start_year, end_year):
251
+ row_exists = con.sql(f"SELECT COUNT(1) FROM bioindicator WHERE (year = {year} AND project_name = '{project_name}')").fetchall()[0][0]
252
+ if not row_exists:
253
+ years.append(year)
254
+
255
+ if len(years)>0:
256
+ df = create_dataframe(years, project_name)
257
+ # con.sql('FROM df LIMIT 5').show()
258
+
259
+ # Write score table to `_temptable`
260
+ con.sql('CREATE OR REPLACE TABLE _temptable AS SELECT *, (value * area) AS score FROM (SELECT year, project_name, AVG(value) AS value, area FROM df GROUP BY year, project_name, area ORDER BY project_name)')
261
+
262
+
263
+ # Create `bioindicator` table IF NOT EXISTS.
264
+ con.sql("""
265
+ USE climatebase;
266
+ CREATE TABLE IF NOT EXISTS bioindicator (year BIGINT, project_name VARCHAR(255), value DOUBLE, area DOUBLE, score DOUBLE, CONSTRAINT unique_year_project_name UNIQUE (year, project_name));
267
+ """)
268
+
269
+ return con.sql(f"SELECT * FROM bioindicator WHERE (year > {start_year} AND year <= {end_year} AND project_name = '{project_name}')").df()
270
+
271
+ def view_all():
272
+ print('view_all')
273
+ return con.sql(f"SELECT * FROM bioindicator").df()
274
+
275
+ def push_to_md():
276
+ # UPSERT project record
277
+ con.sql("""
278
+ INSERT INTO bioindicator FROM _temptable
279
+ ON CONFLICT (year, project_name) DO UPDATE SET value = excluded.value;
280
+ """)
281
+ print('Saved records')
282
+
283
+ # preview_table()
284
 
285
  def filter_map(min_price, max_price, boroughs):
286
 
 
318
  return fig
319
 
320
  with gr.Blocks() as demo:
321
+ con = set_up_duckdb(MD_SERVICE_TOKEN)
322
+ authenticate_gee(GEE_SERVICE_ACCOUNT, GEE_SERVICE_ACCOUNT_CREDENTIALS_FILE)
323
+ # Create circle buffer over point
324
+ # roi = ee.Geometry.Point(*LOCATION).buffer(ROI_RADIUS)
325
+
326
+ # # Load a raw Landsat ImageCollection for a single year.
327
+ # start_date = str(datetime.date(YEAR, 1, 1))
328
+ # end_date = str(datetime.date(YEAR, 12, 31))
329
+ # collection = (
330
+ # ee.ImageCollection('LANDSAT/LC08/C02/T1')
331
+ # .filterDate(start_date, end_date)
332
+ # .filterBounds(roi)
333
+ # )
334
+
335
+ # indices = load_indices(INDICES_FILE)
336
+ # push_to_md(START_YEAR, END_YEAR, 'Test Project')
337
  with gr.Column():
338
+ # map = gr.Plot().style()
339
+ with gr.Row():
340
+ start_year = gr.Number(value=2017, label="Start Year", precision=0)
341
+ end_year = gr.Number(value=2022, label="End Year", precision=0)
342
+ project_name = gr.Textbox(label='Project Name')
343
+ # boroughs = gr.CheckboxGroup(choices=["Queens", "Brooklyn", "Manhattan", "Bronx", "Staten Island"], value=["Queens", "Brooklyn"], label="Select Methodology:")
344
+ # btn = gr.Button(value="Update Filter")
345
  with gr.Row():
346
+ calc_btn = gr.Button(value="Calculate!")
347
+ view_btn = gr.Button(value="View all")
348
+ save_btn = gr.Button(value="Save")
349
+ results_df = gr.Dataframe(
350
+ headers=["Year", "Project Name", "Score"],
351
+ datatype=["number", "str", "number"],
352
+ label="Biodiversity scores by year",
353
+ )
354
+ # demo.load(filter_map, [min_price, max_price, boroughs], map)
355
+ # btn.click(filter_map, [min_price, max_price, boroughs], map)
356
+ calc_btn.click(calculate_biodiversity_score, inputs=[start_year, end_year, project_name], outputs=results_df)
357
+ view_btn.click(view_all, outputs=results_df)
358
+ save_btn.click(push_to_md)
359
 
360
  demo.launch()
conda_environment.yml CHANGED
@@ -1,13 +1,14 @@
1
  name: openbiodiversity_calculator
2
  channels:
3
  - conda-forge
4
- - huggingface
5
  - plotly
6
  dependencies:
7
- - datasets
8
  - geemap
 
9
  - plotly
10
  - segment-geospatial
 
11
  - pip
12
  - pip:
13
  - duckdb==0.8.1
 
1
  name: openbiodiversity_calculator
2
  channels:
3
  - conda-forge
 
4
  - plotly
5
  dependencies:
6
+ - earthengine-api
7
  - geemap
8
+ - geopandas
9
  - plotly
10
  - segment-geospatial
11
+ - pandas
12
  - pip
13
  - pip:
14
  - duckdb==0.8.1
indices.yaml ADDED
@@ -0,0 +1,169 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ Water:
3
+ name: Water
4
+ roi: ''
5
+ gee_path: JRC/GSW1_1/GlobalSurfaceWater
6
+ gee_type: image
7
+ viz:
8
+ min: 0
9
+ max: 100
10
+ palette:
11
+ - ffffff
12
+ - ffbbbb
13
+ - 0000ff
14
+ bandname: occurrence
15
+ select: occurrence
16
+ show: true
17
+ Protected:
18
+ name: Protected
19
+ roi: ''
20
+ gee_path: WCMC/WDPA/current/polygons
21
+ gee_type: feature_collection
22
+ viz:
23
+ palette:
24
+ - 2ed033
25
+ - 5aff05
26
+ - 67b9ff
27
+ - 5844ff
28
+ - 0a7618
29
+ - 2c05ff
30
+ min: 0
31
+ max: 1550000
32
+ opacity: 0.8
33
+ select: REP_AREA
34
+ bandname: constant
35
+ show: true
36
+ Air:
37
+ name: Air
38
+ roi: ''
39
+ gee_path: COPERNICUS/S5P/OFFL/L3_AER_AI
40
+ gee_type: image_collection
41
+ viz:
42
+ min: -1
43
+ max: 2
44
+ palette:
45
+ - black
46
+ - blue
47
+ - purple
48
+ - cyan
49
+ - green
50
+ - yellow
51
+ - red
52
+ bandname: absorbing_aerosol_index
53
+ select: absorbing_aerosol_index
54
+ dates: false
55
+ show: false
56
+ Soil:
57
+ name: Soil
58
+ roi: ''
59
+ gee_path: OpenLandMap/SOL/SOL_ORGANIC-CARBON_USDA-6A1C_M/v02
60
+ gee_type: image
61
+ viz:
62
+ bands:
63
+ - b200
64
+ min: 0
65
+ max: 12
66
+ palette:
67
+ - ffffa0
68
+ - f7fcb9
69
+ - d9f0a3
70
+ - addd8e
71
+ - 78c679
72
+ - 41ab5d
73
+ - '238443'
74
+ - 005b29
75
+ - 004b29
76
+ - 012b13
77
+ - 00120b
78
+ select: b0
79
+ bandname: b0
80
+ show: false
81
+ Temperature:
82
+ name: Temperature
83
+ roi: ''
84
+ gee_path: MODIS/061/MYD21C1
85
+ gee_type: image_collection
86
+ viz:
87
+ min: 216
88
+ max: 348
89
+ palette:
90
+ - '040274'
91
+ - '040281'
92
+ - 0502a3
93
+ - 0502b8
94
+ - 0502ce
95
+ - 0502e6
96
+ - 0602ff
97
+ - 235cb1
98
+ - 307ef3
99
+ - 269db1
100
+ - 30c8e2
101
+ - 32d3ef
102
+ - 3be285
103
+ - 3ff38f
104
+ - 86e26f
105
+ - 3ae237
106
+ - b5e22e
107
+ - d6e21f
108
+ - fff705
109
+ - ffd611
110
+ - ffb613
111
+ - ff8b13
112
+ - ff6e08
113
+ - ff500d
114
+ - ff0000
115
+ - de0101
116
+ - c21301
117
+ - a71001
118
+ - '911003'
119
+ select: LST_Day
120
+ bandname: LST_Day
121
+ dates: true
122
+ show: true
123
+ Habitat:
124
+ name: Habitat
125
+ roi: ''
126
+ gee_path: projects/sat-io/open-datasets/IUCN_HABITAT/iucn_habitatclassification_composite_lvl2_ver004
127
+ gee_type: image
128
+ viz: {}
129
+ bandname: comp_first
130
+ show: true
131
+ NDVI:
132
+ name: NDVI
133
+ roi: ''
134
+ gee_path: LANDSAT/LC08/C02/T1
135
+ gee_type: algebraic
136
+ normalized_difference:
137
+ - B4
138
+ - B3
139
+ viz:
140
+ palette:
141
+ - "#d73027"
142
+ - "#f46d43"
143
+ - "#fdae61"
144
+ - "#fee08b"
145
+ - "#d9ef8b"
146
+ - "#a6d96a"
147
+ - "#66bd63"
148
+ - "#1a9850"
149
+ bandname: nd
150
+ NDWI:
151
+ name: NDWI
152
+ roi: ''
153
+ gee_path: LANDSAT/LC08/C02/T1
154
+ gee_type: algebraic
155
+ normalized_difference:
156
+ - B3
157
+ - B5
158
+ viz:
159
+ palette:
160
+ - "#ece7f2"
161
+ - "#d0d1e6"
162
+ - "#a6bddb"
163
+ - "#74a9cf"
164
+ - "#3690c0"
165
+ - "#0570b0"
166
+ - "#045a8d"
167
+ - "#023858"
168
+ bandname: nd
169
+ show: true