Spaces:
Sleeping
Sleeping
File size: 3,636 Bytes
4f08a04 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 |
import ibis
from ibis import _
import pydeck
# +
def connect_data():
con = ibis.duckdb.connect()
con.raw_sql('''
INSTALL httpfs;
LOAD httpfs;
LOAD 'build/release/extension/h3ext/h3ext.duckdb_extension';
SET s3_url_style='path';
SET s3_endpoint='minio.carlboettiger.info';
CREATE VIEW gb AS SELECT * FROM read_parquet('s3://shared-data/gbif_gb/**');
''')
return con
'''
CREATE VIEW gbif AS SELECT * FROM read_parquet('s3://gbif/*');
INSTALL httpfs;
LOAD httpfs;
SET s3_url_style='path';
SET s3_endpoint='minio.carlboettiger.info';
SET temp_directory='/tmp/duckdb';
SET memory_limit = '150GB';
SET max_memory = '150GB';
COPY
(
SELECT *,
hex(h3_latlng_to_cell(gbif.decimallatitude, gbif.decimallongitude, 1)) as h3z1,
hex(h3_latlng_to_cell(gbif.decimallatitude, gbif.decimallongitude, 2)) as h3z2,
hex(h3_latlng_to_cell(gbif.decimallatitude, gbif.decimallongitude, 3)) as h3z3,
hex(h3_latlng_to_cell(gbif.decimallatitude, gbif.decimallongitude, 4)) as h3z4,
hex(h3_latlng_to_cell(gbif.decimallatitude, gbif.decimallongitude, 5)) as h3z5,
hex(h3_latlng_to_cell(gbif.decimallatitude, gbif.decimallongitude, 6)) as h3z6,
hex(h3_latlng_to_cell(gbif.decimallatitude, gbif.decimallongitude, 7)) as h3z7
FROM gbif
WHERE (NOT((decimallatitude IS NULL))) AND (NOT((decimallongitude IS NULL))) AND (countrycode = 'US')
) TO 's3://shared-data/gbif/US' (FORMAT 'parquet', PARTITION_BY h3z1);
'''
# distinct species observations at h7 resolution
def richness_data(con):
data = (
con.table("gb").
filter(_.phylum == "Chordata").
select(_.genus, _.species, _["class"], _.h3z2, _.h3z3, _.h3z4, _.h3z5, _.h3z6, _.h3z7).
distinct().
to_parquet("gb-cache.parquet")
)
return data
con = connect_data()
richness_data(con)
# -
# +
def zoom_data(zoom=6):
hzoom = "h3z" + str(zoom)
data = (
con.read_parquet("gb-cache.parquet").
rename(h3 = hzoom).
group_by([_.h3, _["class"]]).
aggregate(n = _.count()).
to_csv("gbif-vert-gb-" + hzoom + ".csv")
)
return data
def filterdata(df, year):
return df[df.year == year]
zoom_data(4)
zoom_data(5)
zoom_data(6)
zoom_data(7)
# +
def load_data(zoom=7):
con = ibis.duckdb.connect()
path = "gbif-vert-gb-h3z" + str(zoom) + ".csv"
df_all = (
con.
read_csv(path).
group_by(_.h3).
aggregate(n = _.n.sum()).
mutate(color = 255 * _.n / _.n.max()).
to_pandas()
)
return df_all
def load_class(taxa="Amphibia", zoom=7):
con = ibis.duckdb.connect()
path = "gbif-vert-gb-h3z" + str(zoom) + ".csv"
df = (con.
read_csv(path).
filter(_['class']==taxa).
mutate(color = 255 * _.n / _.n.max()).
to_pandas()
)
return df
df = load_data()
df
# +
# Define a layer to display on a map
import pydeck as pdk
# Set the viewport location
view_state = pdk.ViewState(
longitude=-1.415,
latitude=52.2323,
zoom=4,
min_zoom=1,
max_zoom=12,
pitch=40.5,
bearing=-27.36)
def map(data):
layer = pdk.Layer(
"H3HexagonLayer",
data,
pickable=True,
stroked=True,
filled=True,
extruded=True,
elevation_scale=100,
get_elevation='color',
get_hexagon="h3",
get_fill_color="[color, 30, 255 - color, 160]",
get_line_color=[255, 255, 255],
line_width_min_pixels=2,
)
# Render
r = pdk.Deck(layers=[layer], initial_view_state=view_state)
return r.to_html("hex_layer.html")
map(df)
|