cboettig commited on
Commit
25d7546
·
1 Parent(s): f2a920b

and so it begins :seedling:

Browse files
Files changed (6) hide show
  1. .github/workflows/deploy.yml +20 -0
  2. .gitignore +7 -0
  3. Dockerfile +11 -0
  4. README.md +19 -2
  5. app.py +176 -0
  6. utilities.py +203 -0
.github/workflows/deploy.yml ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: Sync to Hugging Face hub
2
+ on:
3
+ push:
4
+ branches: [main]
5
+
6
+ # to run this workflow manually from the Actions tab
7
+ workflow_dispatch:
8
+
9
+ jobs:
10
+ sync-to-hub:
11
+ runs-on: ubuntu-latest
12
+ steps:
13
+ - uses: actions/checkout@v3
14
+ with:
15
+ fetch-depth: 0
16
+ lfs: true
17
+ - name: Push to hub
18
+ env:
19
+ HF_TOKEN: ${{ secrets.HF_TOKEN }}
20
+ run: git push https://cboettig:$HF_TOKEN@huggingface.co/spaces/boettiger-lab/redlining main
.gitignore CHANGED
@@ -160,3 +160,10 @@ cython_debug/
160
  # and can be added to the global gitignore or merged into this file. For a more nuclear
161
  # option (not recommended) you can uncomment the following to ignore the entire idea folder.
162
  #.idea/
 
 
 
 
 
 
 
 
160
  # and can be added to the global gitignore or merged into this file. For a more nuclear
161
  # option (not recommended) you can uncomment the following to ignore the entire idea folder.
162
  #.idea/
163
+
164
+
165
+ *.parquet
166
+ *.tiff
167
+ *.tif
168
+
169
+
Dockerfile ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM ghcr.io/boettiger-lab/k8s:latest
2
+ WORKDIR /app
3
+
4
+ COPY . .
5
+
6
+ # huggingface uses port 7860 by default
7
+ CMD streamlit run app.py \
8
+ --server.address 0.0.0.0 \
9
+ --server.port 7860 \
10
+ --server.headless true \
11
+ --server.fileWatcherType none
README.md CHANGED
@@ -1,2 +1,19 @@
1
- # redlining-app
2
- Source code for streamlit app for exploring GBIF distribution & other biodiversity variables by redlined area
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: Streamlit Demo
3
+ emoji: 🌍
4
+ colorFrom: yellow
5
+ colorTo: indigo
6
+ sdk: docker
7
+ pinned: false
8
+ license: bsd-2-clause
9
+ ---
10
+
11
+ For ESPM-157 students using <https://nature.datahub.berkeley.edu> servers:
12
+ To preview locally, use
13
+
14
+ ```
15
+ make
16
+ ```
17
+
18
+ and click the link provided
19
+
app.py ADDED
@@ -0,0 +1,176 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+
3
+ import ibis
4
+ from ibis import _
5
+ import pydeck as pdk
6
+ from utilities import *
7
+ import leafmap.maplibregl as leafmap
8
+ import requests
9
+ import geopandas as gpd
10
+
11
+ st.set_page_config(page_title="Redlining & GBIF", layout="wide")
12
+ st.title("Redlining & GBIF")
13
+
14
+ con = ibis.duckdb.connect(extensions=['httpfs', 'spatial', 'h3'])
15
+ set_secrets(con) # s3 credentials
16
+ #set_aws_secrets(con)
17
+ #set_source_secrets(con)
18
+
19
+ distinct_taxa = "" # default
20
+
21
+ col1, col2, col3, col4 = st.columns([1,3,3,3])
22
+
23
+ # placed outside the form so that toggling this immediately updates the form options available
24
+ with col1:
25
+ st.markdown("#### Start 👇")
26
+ area_source = st.radio("Area types", ["City", "All"])
27
+ nunique = st.toggle("unique taxa only", False)
28
+
29
+
30
+ # config with different default settings by area
31
+ config = {
32
+ "City": {
33
+ "names": con.read_parquet("s3://public-gbif/app/city_names.parquet").select("name").execute(),
34
+ "index": 183,
35
+ "zoom": 11,
36
+ "vertical": 0.1,
37
+ "rank_index": 2,
38
+ "taxa": "Aves",
39
+ },
40
+ "All": {
41
+ "names": ["All"],
42
+ "index": 0,
43
+ "zoom": 9,
44
+ "vertical": 1.0,
45
+ "rank_index": 2,
46
+ "taxa": "Aves",
47
+ }
48
+ }
49
+
50
+ with st.form("my_form"):
51
+
52
+ taxonomic_ranks = ["kingdom", "phylum", "class", "order", "family","genus", "species"]
53
+ default = config[area_source]
54
+
55
+ with col2:
56
+ ## Add additional layer toggles here, e.g. SVI?
57
+ st.markdown("#### 🗺️ Select map layers")
58
+ gdf_name = st.selectbox("Area", default["names"], index=default["index"])
59
+
60
+ with col3:
61
+ st.markdown("#### 🐦 Select taxonomic groups")
62
+ ## add support for multiple taxa!
63
+ rank = st.selectbox("Taxonomic Rank", options=taxonomic_ranks, index = default["rank_index"])
64
+ taxa = st.text_input("taxa", default["taxa"])
65
+ if nunique:
66
+ distinct_taxa = st.selectbox("Count only unique occurrences by:", options=taxonomic_ranks, index = default["rank_index"])
67
+
68
+ with col4:
69
+ st.markdown('''
70
+ #### 🔎 Set spatial resolution
71
+ See [H3 cell size by zoom](https://h3geo.org/docs/core-library/restable/#cell-areas)
72
+ ''')
73
+ zoom = st.slider("H3 resolution", min_value=1, max_value=11, value = default["zoom"])
74
+ v_scale = st.number_input("vertical scale", min_value = 0.0, value = default["vertical"])
75
+
76
+ submitted = st.form_submit_button("Go")
77
+
78
+ @st.cache_data
79
+ def compute_hexes(_gdf, gdf_name, rank, taxa, zoom, distinct_taxa = ""):
80
+
81
+ # FIXME check if dest exists in cache
82
+ dest = unique_path(gdf_name, rank, taxa, zoom, distinct_taxa)
83
+ bucket = "public-gbif"
84
+ url = base_url + f"/{bucket}/" + dest
85
+
86
+ response = requests.head(url)
87
+ if response.status_code != 404:
88
+ return url
89
+
90
+ sel = con.read_parquet("s3://public-gbif/app/redlined_cities_gbif.parquet")
91
+
92
+ sel = (sel
93
+ .rename(hex = "h" + str(zoom)) # h3 == 41,150 hexes. h5 == 2,016,830 hexes
94
+ .group_by(_.hex)
95
+ )
96
+
97
+ if distinct_taxa != "": # count n unique taxa
98
+ sel = sel.agg(n = _[distinct_taxa].nunique())
99
+ else: # count occurrences
100
+ sel = sel.agg(n = _.count())
101
+
102
+ sel = (sel
103
+ .filter(_.n > 0)
104
+ .mutate(logn = _.n.log())
105
+ .mutate(value = (255 * _.logn / _.logn.max()).cast("int")) # normalized color-scale
106
+ )
107
+
108
+ # .to_json() doesn't exist in ibis, use SQL
109
+ query = ibis.to_sql(sel)
110
+ con.raw_sql(f"COPY ({query}) TO 's3://{bucket}/{dest}' (FORMAT JSON, ARRAY true);")
111
+
112
+ return url
113
+
114
+
115
+
116
+ import altair as alt
117
+
118
+ @st.cache_data
119
+ def bar_chart(gdf_name, rank, taxa, zoom, distinct_taxa = ""):
120
+ sel = con.read_parquet("s3://public-gbif/app/redlined_cities_gbif.parquet")
121
+ sel = sel.filter(_[rank] == taxa)
122
+
123
+ if gdf_name != "All":
124
+ sel = sel.filter(_.city == gdf_name)
125
+
126
+ sel = (sel
127
+ .group_by(_.city, _.grade)
128
+ .agg(n = _.count(), area = _.area.sum())
129
+ .mutate(density = _.n /_.area)
130
+ .group_by(_.grade)
131
+ .agg(mean = _.density.mean(),sd = _.density.std())
132
+ .order_by(_.mean.desc())
133
+ )
134
+
135
+ plt = alt.Chart(sel.execute()).mark_bar().encode(x = "grade", y = "mean")
136
+ return st.altair_chart(plt)
137
+
138
+ mappinginequality = 'https://data.source.coop/cboettig/us-boundaries/mappinginequality.pmtiles'
139
+
140
+ redlines = {'version': 8,
141
+ 'sources': {'source': {'type': 'vector',
142
+ 'url': 'pmtiles://' + mappinginequality,
143
+ 'attribution': 'PMTiles'}},
144
+ 'layers': [{'id': 'mappinginequality_fill',
145
+ 'source': 'source',
146
+ 'source-layer': 'mappinginequality',
147
+ 'type': 'fill',
148
+ 'paint': {'fill-color': ["get", "fill"], 'fill-opacity': 0.9},}
149
+ ]}
150
+
151
+
152
+ count = "occurrences"
153
+ if nunique:
154
+ count = "unique " + distinct_taxa
155
+
156
+ mapcol, chartcol = st.columns([4,1])
157
+
158
+ if submitted:
159
+ gdf = get_polygon(gdf_name, area_source, con)
160
+ url = compute_hexes(gdf, gdf_name, rank, taxa, zoom, distinct_taxa = distinct_taxa)
161
+ layer = HexagonLayer(url, v_scale)
162
+
163
+
164
+ m = leafmap.Map(style= terrain_style, center=[-120, 37.6], zoom=2, pitch=35, bearing=10)
165
+ if gdf is not None:
166
+ m.add_gdf(gdf[[gdf.geometry.name]], "fill", paint = {"fill-opacity": 0.2}) # adds area of interest & zooms in
167
+ m.add_pmtiles(mappinginequality, style=redlines, visible=True, opacity = 0.9, fit_bounds=False)
168
+ m.add_deck_layers([layer])
169
+ m.add_layer_control()
170
+
171
+ with mapcol:
172
+ m.to_streamlit()
173
+ with chartcol:
174
+ st.markdown("Mean number of " + count + " by redline grade")
175
+ bar_chart(gdf_name, rank, taxa, zoom, distinct_taxa = "")
176
+
utilities.py ADDED
@@ -0,0 +1,203 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+
3
+ base_url = "https://minio.carlboettiger.info"
4
+
5
+ # make sure h3 is installed.
6
+ import duckdb
7
+ db = duckdb.connect()
8
+ db.install_extension("h3", repository = "community")
9
+ db.close()
10
+
11
+ # enable ibis to use built-in function from the h3 extension
12
+ import ibis
13
+ from ibis import _
14
+ @ibis.udf.scalar.builtin
15
+ def h3_cell_to_boundary_wkt (array) -> str:
16
+ ...
17
+
18
+
19
+ # Configure write-access to source.coop
20
+ import streamlit as st
21
+ def set_source_secrets(con):
22
+ source_key = st.secrets["SOURCE_KEY"]
23
+ source_secret = st.secrets["SOURCE_SECRET"]
24
+
25
+ query= f'''
26
+ CREATE OR REPLACE SECRET source (
27
+ TYPE S3,
28
+ KEY_ID '{source_key}',
29
+ SECRET '{source_secret}',
30
+ ENDPOINT 'data.source.coop',
31
+ URL_STYLE 'path',
32
+ SCOPE 's3://cboettig'
33
+ );
34
+
35
+ set THREADS=100;
36
+ '''
37
+
38
+
39
+ con.raw_sql(query)
40
+
41
+ def set_aws_secrets(con):
42
+ query= f'''
43
+ CREATE OR REPLACE SECRET aws (
44
+ TYPE S3,
45
+ ENDPOINT 's3.us-west-2.amazonaws.com',
46
+ SCOPE 's3://overturemaps-us-west-2/release/'
47
+ );
48
+ '''
49
+
50
+ # ENDPOINT 'data.source.coop',
51
+
52
+ con.raw_sql(query)
53
+
54
+ # or write access to minio
55
+ def set_secrets(con):
56
+ minio_key = st.secrets["MINIO_KEY"]
57
+ minio_secret = st.secrets["MINIO_SECRET"]
58
+ query= f'''
59
+ CREATE OR REPLACE SECRET secret2 (
60
+ TYPE S3,
61
+ KEY_ID '{minio_key}',
62
+ SECRET '{minio_secret}',
63
+ ENDPOINT 'minio.carlboettiger.info',
64
+ URL_STYLE 'path',
65
+ SCOPE 's3://public-gbif/'
66
+ );
67
+ '''
68
+ con.raw_sql(query)
69
+
70
+
71
+ import minio
72
+ def s3_client(type="minio"):
73
+ minio_key = st.secrets["MINIO_KEY"]
74
+ minio_secret = st.secrets["MINIO_SECRET"]
75
+ client = minio.Minio("minio.carlboettiger.info", minio_key, minio_secret)
76
+ if type == "minio":
77
+ return client
78
+
79
+ source_key = st.secrets["SOURCE_KEY"]
80
+ source_secret = st.secrets["SOURCE_SECRET"]
81
+ client = minio.Minio("data.source.coop", source_key, source_secret)
82
+ return client
83
+
84
+
85
+ import pydeck as pdk
86
+ def HexagonLayer(data, v_scale = 1):
87
+ return pdk.Layer(
88
+ "H3HexagonLayer",
89
+ id="gbif",
90
+ data=data,
91
+ extruded=True,
92
+ get_elevation="value",
93
+ get_hexagon="hex",
94
+ elevation_scale = 50 * v_scale,
95
+ elevation_range = [0,1],
96
+ pickable=True,
97
+ auto_highlight=True,
98
+ get_fill_color="[255 - value, 255, value]",
99
+ )
100
+
101
+ def DeckGlobe(layer):
102
+ view_state = pdk.ViewState(latitude=51.47, longitude=0.45, zoom=0)
103
+ view = pdk.View(type="_GlobeView", controller=True, width=1000, height=600)
104
+ COUNTRIES = "https://d2ad6b4ur7yvpq.cloudfront.net/naturalearth-3.3.0/ne_50m_admin_0_scale_rank.geojson"
105
+
106
+ layers = [
107
+ pdk.Layer(
108
+ "GeoJsonLayer",
109
+ id="base-map",
110
+ data=COUNTRIES,
111
+ stroked=False,
112
+ filled=True,
113
+ get_fill_color=[200, 200, 200],
114
+ ),
115
+ layer,
116
+ ]
117
+ deck = pdk.Deck(
118
+ views=[view],
119
+ initial_view_state=view_state,
120
+ layers=layers,
121
+ map_provider=None,
122
+ # Note that this must be set for the globe to be opaque
123
+ parameters={"cull": True},
124
+ )
125
+ return deck
126
+
127
+ key = st.secrets['MAPTILER_KEY']
128
+ terrain_style = {
129
+ "version": 8,
130
+ "sources": {
131
+ "osm": {
132
+ "type": "raster",
133
+ "tiles": ["https://server.arcgisonline.com/ArcGIS/rest/services/NatGeo_World_Map/MapServer/tile/{z}/{y}/{x}.png"],
134
+ "tileSize": 256,
135
+ "attribution": "&copy; National Geographic",
136
+ "maxzoom": 19,
137
+ },
138
+ "terrainSource": {
139
+ "type": "raster-dem",
140
+ "url": f"https://api.maptiler.com/tiles/terrain-rgb-v2/tiles.json?key={key}",
141
+ "tileSize": 256,
142
+ },
143
+ "hillshadeSource": {
144
+ "type": "raster-dem",
145
+ "url": f"https://api.maptiler.com/tiles/terrain-rgb-v2/tiles.json?key={key}",
146
+ "tileSize": 256,
147
+ },
148
+ },
149
+ "layers": [
150
+ {"id": "osm", "type": "raster", "source": "osm"},
151
+ {
152
+ "id": "hills",
153
+ "type": "hillshade",
154
+ "source": "hillshadeSource",
155
+ "layout": {"visibility": "visible"},
156
+ "paint": {"hillshade-shadow-color": "#473B24"},
157
+ },
158
+ ],
159
+ "terrain": {"source": "terrainSource", "exaggeration": .1},
160
+ }
161
+ ####
162
+
163
+
164
+ ## grab polygon of a National park:
165
+ import ibis
166
+ from ibis import _
167
+
168
+ import geopandas as gpd
169
+ def get_city(name = "Oakland", con = ibis.duckdb.connect()):
170
+ gdf = (con
171
+ .read_geo("/vsicurl/https://data.source.coop/cboettig/us-boundaries/mappinginequality.json")
172
+ .filter(_.city == name)
173
+ .agg(geom = _.geom.unary_union())
174
+ ).execute()
175
+ return gdf
176
+
177
+
178
+ @st.cache_data
179
+ def get_polygon(name = "New Haven",
180
+ source = "City",
181
+ _con = ibis.duckdb.connect()):
182
+ match source:
183
+ case 'City':
184
+ gdf = get_city(name, _con)
185
+ case 'Custom':
186
+ gdf = gpd.read_file(name)
187
+ case "All":
188
+ gdf = None
189
+ case _:
190
+ gdf = None
191
+ return gdf
192
+
193
+ import hashlib
194
+ import pandas as pd
195
+ def unique_path(gdf_name, rank, taxa, zoom, distinct_taxa):
196
+ #gdf_hash = str(pd.util.hash_pandas_object(gdf).sum())
197
+ text = gdf_name + rank + taxa + str(zoom) + distinct_taxa
198
+ hash_object = hashlib.sha1(text.encode())
199
+ sig = hash_object.hexdigest()
200
+ dest = "cache/gbif_" + sig + ".json"
201
+ return dest
202
+
203
+