Spaces:
Sleeping
Sleeping
use gbif
Browse files- h3_ibis.ipynb +0 -177
- main.py +12 -36
h3_ibis.ipynb
DELETED
@@ -1,177 +0,0 @@
|
|
1 |
-
{
|
2 |
-
"cells": [
|
3 |
-
{
|
4 |
-
"cell_type": "code",
|
5 |
-
"execution_count": null,
|
6 |
-
"id": "ae26cf1d-f639-418a-8c16-365024a8182f",
|
7 |
-
"metadata": {},
|
8 |
-
"outputs": [],
|
9 |
-
"source": [
|
10 |
-
"import ibis\n",
|
11 |
-
"from ibis import _\n",
|
12 |
-
"\n",
|
13 |
-
"con = ibis.duckdb.connect(extensions=[\"h3\"])\n",
|
14 |
-
"\n",
|
15 |
-
"@ibis.udf.scalar.builtin\n",
|
16 |
-
"def h3_latlng_to_cell(lat: float, lng: float, zoom: int) -> int:\n",
|
17 |
-
" ...\n",
|
18 |
-
"\n",
|
19 |
-
"@ibis.udf.scalar.builtin\n",
|
20 |
-
"def hex(array) -> str:\n",
|
21 |
-
" ...\n",
|
22 |
-
"\n",
|
23 |
-
"\n",
|
24 |
-
"parquet = \"https://data.source.coop/cboettig/obis/obis_20240625.parquet\"\n",
|
25 |
-
"parquet = \"/home/rstudio/source.coop/cboettig/obis/obis_20240625.parquet\"\n",
|
26 |
-
"\n",
|
27 |
-
"obis = con.read_parquet(parquet)\n"
|
28 |
-
]
|
29 |
-
},
|
30 |
-
{
|
31 |
-
"cell_type": "code",
|
32 |
-
"execution_count": null,
|
33 |
-
"id": "ee13fff4-fa0d-4c5e-9546-9a788ccdec2e",
|
34 |
-
"metadata": {},
|
35 |
-
"outputs": [],
|
36 |
-
"source": [
|
37 |
-
"%%time \n",
|
38 |
-
"\n",
|
39 |
-
"(obis\n",
|
40 |
-
" .mutate(h1 = hex(h3_latlng_to_cell(_.decimalLatitude, _.decimalLongitude, 1)),\n",
|
41 |
-
" h2 = hex(h3_latlng_to_cell(_.decimalLatitude, _.decimalLongitude, 2)),\n",
|
42 |
-
" h3 = hex(h3_latlng_to_cell(_.decimalLatitude, _.decimalLongitude, 3)),\n",
|
43 |
-
" h4 = hex(h3_latlng_to_cell(_.decimalLatitude, _.decimalLongitude, 4)),\n",
|
44 |
-
" h5 = hex(h3_latlng_to_cell(_.decimalLatitude, _.decimalLongitude, 5)),\n",
|
45 |
-
" h6 = hex(h3_latlng_to_cell(_.decimalLatitude, _.decimalLongitude, 6)),\n",
|
46 |
-
" h7 = hex(h3_latlng_to_cell(_.decimalLatitude, _.decimalLongitude, 7)),\n",
|
47 |
-
" h8 = hex(h3_latlng_to_cell(_.decimalLatitude, _.decimalLongitude, 8)),\n",
|
48 |
-
" h9 = hex(h3_latlng_to_cell(_.decimalLatitude, _.decimalLongitude, 9)))\n",
|
49 |
-
" .to_parquet(\"obis_h3.parquet\")\n",
|
50 |
-
")"
|
51 |
-
]
|
52 |
-
},
|
53 |
-
{
|
54 |
-
"cell_type": "code",
|
55 |
-
"execution_count": null,
|
56 |
-
"id": "68b5b794-91cb-4b98-acb3-008b80516d3c",
|
57 |
-
"metadata": {},
|
58 |
-
"outputs": [],
|
59 |
-
"source": [
|
60 |
-
"obis_h3 = con.read_parquet(\"obis_h3.parquet\")\n"
|
61 |
-
]
|
62 |
-
},
|
63 |
-
{
|
64 |
-
"cell_type": "code",
|
65 |
-
"execution_count": null,
|
66 |
-
"id": "e42f44fd-8668-41f0-b890-25918072f2c2",
|
67 |
-
"metadata": {},
|
68 |
-
"outputs": [],
|
69 |
-
"source": [
|
70 |
-
"\n"
|
71 |
-
]
|
72 |
-
},
|
73 |
-
{
|
74 |
-
"cell_type": "code",
|
75 |
-
"execution_count": null,
|
76 |
-
"id": "47e2ebce-f1cc-42af-a0f9-0ff155143bcc",
|
77 |
-
"metadata": {},
|
78 |
-
"outputs": [],
|
79 |
-
"source": [
|
80 |
-
"import pandas as pd\n",
|
81 |
-
"def get_h3point_df(resolution: float) -> pd.DataFrame:\n",
|
82 |
-
" column = \"h\" + str(resolution)\n",
|
83 |
-
" df = (obis_h3\n",
|
84 |
-
" .rename(cell = column)\n",
|
85 |
-
" .cell.value_counts()\n",
|
86 |
-
" .mutate(v = _.cell_count.log())\n",
|
87 |
-
" .mutate(normalized_values = _.v / _.v.max())\n",
|
88 |
-
" .to_pandas()\n",
|
89 |
-
" )\n",
|
90 |
-
" return df\n",
|
91 |
-
"\n",
|
92 |
-
"\n",
|
93 |
-
"df = get_h3point_df(4)"
|
94 |
-
]
|
95 |
-
},
|
96 |
-
{
|
97 |
-
"cell_type": "code",
|
98 |
-
"execution_count": null,
|
99 |
-
"id": "7c717c3b-ec75-4ffa-857f-bfbe8595847e",
|
100 |
-
"metadata": {},
|
101 |
-
"outputs": [],
|
102 |
-
"source": [
|
103 |
-
"from matplotlib import cm\n",
|
104 |
-
"def viridis_color(x) -> str:\n",
|
105 |
-
" rgb_array = np.round( cm.viridis(x) * 255 ).astype(int).clip(0,255).tolist()\n",
|
106 |
-
" return rgb_array\n",
|
107 |
-
" \n",
|
108 |
-
"df['rgb'] = viridis_color(df.normalized_values)\n",
|
109 |
-
"df"
|
110 |
-
]
|
111 |
-
},
|
112 |
-
{
|
113 |
-
"cell_type": "code",
|
114 |
-
"execution_count": null,
|
115 |
-
"id": "854a8b05-4244-4f6d-b7a5-11cdd108e3d3",
|
116 |
-
"metadata": {},
|
117 |
-
"outputs": [],
|
118 |
-
"source": [
|
119 |
-
"import pydeck as pdk\n",
|
120 |
-
"from typing import List\n",
|
121 |
-
"\n",
|
122 |
-
"def get_coverage_layer(df: pd.DataFrame, line_color: List) -> pdk.Layer:\n",
|
123 |
-
" return pdk.Layer(\n",
|
124 |
-
" \"H3HexagonLayer\",\n",
|
125 |
-
" df,\n",
|
126 |
-
" get_hexagon=\"cell\",\n",
|
127 |
-
" filled=True,\n",
|
128 |
-
" auto_highlight=True,\n",
|
129 |
-
" get_fill_color=\"rgb\",\n",
|
130 |
-
" # get_elevation=\"normalized_values\",\n",
|
131 |
-
" pickable=True,\n",
|
132 |
-
" extruded=False,\n",
|
133 |
-
" line_width_min_pixels=1,\n",
|
134 |
-
" )\n",
|
135 |
-
"\n",
|
136 |
-
"layer_coverage_1 = get_coverage_layer(df, line_color = [36, 191, 242])\n"
|
137 |
-
]
|
138 |
-
},
|
139 |
-
{
|
140 |
-
"cell_type": "code",
|
141 |
-
"execution_count": null,
|
142 |
-
"id": "dee7c1c1-1664-433c-9963-ae85f8d0ef85",
|
143 |
-
"metadata": {},
|
144 |
-
"outputs": [],
|
145 |
-
"source": [
|
146 |
-
"pdk.Deck(map_provider='carto', \n",
|
147 |
-
" map_style='light',\n",
|
148 |
-
" initial_view_state=pdk.ViewState(\n",
|
149 |
-
" latitude=30., longitude=0., zoom=3, height=400\n",
|
150 |
-
" ),\n",
|
151 |
-
" layers=[layer_coverage_1],\n",
|
152 |
-
")"
|
153 |
-
]
|
154 |
-
}
|
155 |
-
],
|
156 |
-
"metadata": {
|
157 |
-
"kernelspec": {
|
158 |
-
"display_name": "Python 3 (ipykernel)",
|
159 |
-
"language": "python",
|
160 |
-
"name": "python3"
|
161 |
-
},
|
162 |
-
"language_info": {
|
163 |
-
"codemirror_mode": {
|
164 |
-
"name": "ipython",
|
165 |
-
"version": 3
|
166 |
-
},
|
167 |
-
"file_extension": ".py",
|
168 |
-
"mimetype": "text/x-python",
|
169 |
-
"name": "python",
|
170 |
-
"nbconvert_exporter": "python",
|
171 |
-
"pygments_lexer": "ipython3",
|
172 |
-
"version": "3.10.12"
|
173 |
-
}
|
174 |
-
},
|
175 |
-
"nbformat": 4,
|
176 |
-
"nbformat_minor": 5
|
177 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
main.py
CHANGED
@@ -9,45 +9,19 @@ import ibis
|
|
9 |
from ibis import _
|
10 |
|
11 |
image = Image.open('./favicon.png')
|
12 |
-
st.set_page_config(page_title="
|
13 |
-
st.header("
|
14 |
|
15 |
# st.set_page_config(page_title="H3 in Streamlit", layout="wide")
|
16 |
|
17 |
-
h3_parquet = "https://data.source.coop/cboettig/
|
18 |
-
h3_parquet = "/home/rstudio/source.coop/cboettig/obis/obis_h3.parquet"
|
19 |
|
20 |
con = ibis.duckdb.connect(extensions=["h3"])
|
21 |
obis_h3 = con.read_parquet(h3_parquet)
|
22 |
|
23 |
-
st.subheader("
|
24 |
|
25 |
-
col1, col2 = st.columns([0.7, 0.3])
|
26 |
-
with col1:
|
27 |
-
st.markdown("H3 Discrete Global Grid is a way to divide the world into a grid of hexagonal cells of equal sizes, "
|
28 |
-
"each with a unique identifier (string or integer). It is a hierarchical grid, meaning that cells can be "
|
29 |
-
"aggregated into larger cells, and vice versa. This makes it very efficient for processing geospatial data.")
|
30 |
-
with col2:
|
31 |
-
st.image('https://viennadatasciencegroup.at/post/2019-11-21-h3spark/featured.png',
|
32 |
-
width=180)
|
33 |
-
|
34 |
-
col1, col2, col3, col4 = st.columns(4)
|
35 |
-
with col2:
|
36 |
-
st.write("**String**")
|
37 |
-
st.text('8c274daeb7a0bff')
|
38 |
-
st.text('8c2ab2d9294c5ff')
|
39 |
-
st.text('8c2ab2da36605ff')
|
40 |
-
|
41 |
-
with col3:
|
42 |
-
st.write("**Integer**")
|
43 |
-
st.text('631195381387627519')
|
44 |
-
st.text('631255110006392319')
|
45 |
-
st.text('631255110288541183')
|
46 |
-
|
47 |
-
st.markdown("The lowest resolution is 0, at which the world is divided into 122 hexagons. "
|
48 |
-
"The highest resolution is 15, at which the size of a hexagon is less than a square meter, "
|
49 |
-
"and the world can be divided into approximately 600 trillion hexagons."
|
50 |
-
" You can check different resolutions and play with hierarchy levels using the widget below.")
|
51 |
|
52 |
# ------ Visualisation 1 ---------
|
53 |
def viridis_color(x) -> str:
|
@@ -76,20 +50,22 @@ def get_coverage_layer(df: pd.DataFrame) -> pdk.Layer:
|
|
76 |
filled=True,
|
77 |
auto_highlight=True,
|
78 |
get_fill_color="rgb",
|
79 |
-
|
|
|
|
|
80 |
pickable=True,
|
81 |
-
extruded=
|
82 |
line_width_min_pixels=1,
|
83 |
)
|
84 |
|
85 |
-
min_v_1, max_v_1, v_1, z_1, lon_1, lat_1 = ( 1, 9,
|
86 |
col1, col2 = st.columns([70, 30])
|
87 |
with col1:
|
88 |
h3_resolut_1 = st.slider(
|
89 |
"H3 resolution", min_value=min_v_1, max_value=max_v_1, value=v_1)
|
90 |
|
91 |
with col2:
|
92 |
-
levels_option = st.selectbox("
|
93 |
|
94 |
df = get_h3point_df(h3_resolut_1)
|
95 |
layer_coverage_1 = get_coverage_layer(df)
|
@@ -101,7 +77,7 @@ st.pydeck_chart(
|
|
101 |
initial_view_state=pdk.ViewState(
|
102 |
latitude=lat_1, longitude=lon_1, zoom=z_1, height=400
|
103 |
),
|
104 |
-
tooltip={"html": "<b>ID:</b> {
|
105 |
layers=visible_layers_coverage_1,
|
106 |
)
|
107 |
)
|
|
|
9 |
from ibis import _
|
10 |
|
11 |
image = Image.open('./favicon.png')
|
12 |
+
st.set_page_config(page_title="GBIF Observations in H3", page_icon=image)
|
13 |
+
st.header("GBIF Observations in H3", divider="rainbow")
|
14 |
|
15 |
# st.set_page_config(page_title="H3 in Streamlit", layout="wide")
|
16 |
|
17 |
+
h3_parquet = "https://data.source.coop/cboettig/gbif/gbif_ca_h3.parquet"
|
18 |
+
#h3_parquet = "/home/rstudio/source.coop/cboettig/obis/obis_h3.parquet"
|
19 |
|
20 |
con = ibis.duckdb.connect(extensions=["h3"])
|
21 |
obis_h3 = con.read_parquet(h3_parquet)
|
22 |
|
23 |
+
st.subheader("California total observations")
|
24 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
25 |
|
26 |
# ------ Visualisation 1 ---------
|
27 |
def viridis_color(x) -> str:
|
|
|
50 |
filled=True,
|
51 |
auto_highlight=True,
|
52 |
get_fill_color="rgb",
|
53 |
+
get_elevation="normalized_values",
|
54 |
+
elevation_scale=5000,
|
55 |
+
elevation_range=[0,1],
|
56 |
pickable=True,
|
57 |
+
extruded=True,
|
58 |
line_width_min_pixels=1,
|
59 |
)
|
60 |
|
61 |
+
min_v_1, max_v_1, v_1, z_1, lon_1, lat_1 = ( 1, 9, 4, 4, -120, 40,)
|
62 |
col1, col2 = st.columns([70, 30])
|
63 |
with col1:
|
64 |
h3_resolut_1 = st.slider(
|
65 |
"H3 resolution", min_value=min_v_1, max_value=max_v_1, value=v_1)
|
66 |
|
67 |
with col2:
|
68 |
+
levels_option = st.selectbox("Add filters", ("One", "Two", "Three"))
|
69 |
|
70 |
df = get_h3point_df(h3_resolut_1)
|
71 |
layer_coverage_1 = get_coverage_layer(df)
|
|
|
77 |
initial_view_state=pdk.ViewState(
|
78 |
latitude=lat_1, longitude=lon_1, zoom=z_1, height=400
|
79 |
),
|
80 |
+
tooltip={"html": "<b>ID:</b> {cell_count}", "style": {"color": "white"}},
|
81 |
layers=visible_layers_coverage_1,
|
82 |
)
|
83 |
)
|