hf-streamlit-demo / gbif-app.py
cboettig's picture
drafts
4f08a04
# -*- coding: utf-8 -*-
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""An example of showing geographic data."""
import os
# +
import altair as alt
import numpy as np
import pandas as pd
import pydeck as pdk
import streamlit as st
import ibis
from ibis import _
# -
# SETTING PAGE CONFIG TO WIDE MODE AND ADDING A TITLE AND FAVICON
st.set_page_config(layout="wide", page_title="GBIF Biodiversity Demo", page_icon=":butterfly:")
# LOAD DATA ONCE
@st.cache_resource
def load_data():
con = ibis.duckdb.connect()
path = "butterflies_gb.parquet"
if os.path.isfile(path):
return con.read_parquet(path)
df = con.read_parquet("s3://anonymous@gbif-open-data-us-east-1/occurrence/2023-06-01/occurrence.parquet")
data = (df
.dropna(["decimallongitude", "decimallatitude", "year"], how="any")
.filter([_.order == "Lepidoptera", _.countrycode=="GB"])
.filter(_.year >= 2010)
.select(['year', 'decimallongitude', 'decimallatitude'])
)
return data
def map(data, lat, lon, zoom):
st.write(
pdk.Deck(
map_style="mapbox://styles/mapbox/light-v9",
initial_view_state={
"latitude": lat,
"longitude": lon,
"zoom": zoom,
"pitch": 50,
},
layers=[
pdk.Layer(
"HexagonLayer",
data=data,
get_position=["decimallongitude", "decimallatitude"],
radius=1000,
elevation_scale=100,
pickable=True,
extruded=True,
),
],
)
)
# +
# FILTER DATA FOR A SPECIFIC YEAR. ibis lazytable not cache-able..
# @st.cache_data
def filterdata(df, year):
return df.filter(_.year == year).to_pandas()
# -
# CALCULATE MIDPOINT FOR GIVEN SET OF DATA
@st.cache_data
def mpoint(lat, lon):
return (np.average(lat), np.average(lon))
# STREAMLIT APP LAYOUT
data = load_data()
# LAYING OUT THE TOP SECTION OF THE APP
row1_1, row1_2 = st.columns((2, 3))
# SEE IF THERE'S A QUERY PARAM IN THE URL (e.g. ?pickup_hour=2)
# THIS ALLOWS YOU TO PASS A STATEFUL URL TO SOMEONE WITH A SPECIFIC HOUR SELECTED,
# E.G. https://share.streamlit.io/streamlit/demo-uber-nyc-pickups/main?pickup_hour=2
if not st.session_state.get("url_synced", False):
try:
year = int(st.query_params["year"][0])
st.session_state["year"] = year
st.session_state["url_synced"] = True
except KeyError:
pass
# IF THE SLIDER CHANGES, UPDATE THE QUERY PARAM
def update_query_params():
year_selected = st.session_state["year"]
st.query_params["year"]=year_selected
with row1_1:
st.title("GBIF Butterfly Occurances")
year_selected = st.slider(
"Select year", 2010, 2023, key="year", on_change=update_query_params
)
with row1_2:
st.write(
"""
##
By sliding the slider on the left you can view different slices of time and explore different trends.
"""
)
# LAYING OUT THE MIDDLE SECTION OF THE APP WITH THE MAPS
row2_1, row2_2, row2_3, row2_4 = st.columns((2, 1, 1, 1))
# +
# SETTING THE ZOOM LOCATIONS FOR THE AIRPORTS
midpoint = (52.0, -1.0) #mpoint(data["lat"], data["lon"])
# -
with row2_1:
st.write(
f"""**Large Map**"""
)
map(filterdata(data, year_selected), midpoint[0], midpoint[1], 4)
with row2_2:
st.write("**Panel 1**")
map(filterdata(data, year_selected), midpoint[0], midpoint[1], 3)
with row2_3:
st.write("**Panel 2**")
map(filterdata(data, year_selected), midpoint[0], midpoint[1], 2)
with row2_4:
st.write("**Panel 3**")
map(filterdata(data, year_selected), midpoint[0], midpoint[1], 1)