cboettig commited on
Commit
14124c0
·
1 Parent(s): c36cda6

so it begins :seedling:

Browse files
Files changed (5) hide show
  1. .github/workflows/deploy.yml +20 -0
  2. Dockerfile +20 -0
  3. LICENSE +24 -0
  4. README.md +14 -1
  5. app.R +278 -0
.github/workflows/deploy.yml ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: Sync to Hugging Face hub
2
+ on:
3
+ push:
4
+ branches: [main]
5
+
6
+ # to run this workflow manually from the Actions tab
7
+ workflow_dispatch:
8
+
9
+ jobs:
10
+ sync-to-hub:
11
+ runs-on: ubuntu-latest
12
+ steps:
13
+ - uses: actions/checkout@v3
14
+ with:
15
+ fetch-depth: 0
16
+ lfs: true
17
+ - name: Push to hub
18
+ env:
19
+ HF_TOKEN: ${{ secrets.HF_TOKEN }}
20
+ run: git push -f https://cboettig:$HF_TOKEN@huggingface.co/spaces/boettiger-lab/biodiversity-justice main
Dockerfile ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM rocker/geospatial:latest
2
+
3
+ WORKDIR /code
4
+
5
+ RUN install2.r --error \
6
+ bsicons \
7
+ bslib \
8
+ duckdbfs \
9
+ fontawesome \
10
+ gt \
11
+ markdown \
12
+ shiny \
13
+ shinychat \
14
+ tidyverse
15
+
16
+ RUN installGithub.r cboettig/mapgl tidyverse/ellmer
17
+
18
+ COPY . .
19
+
20
+ CMD ["R", "--quiet", "-e", "shiny::runApp(host='0.0.0.0', port=7860)"]
LICENSE ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ BSD 2-Clause License
2
+
3
+ Copyright (c) 2024, Boettiger Lab, UC Berkeley
4
+
5
+ Redistribution and use in source and binary forms, with or without
6
+ modification, are permitted provided that the following conditions are met:
7
+
8
+ 1. Redistributions of source code must retain the above copyright notice, this
9
+ list of conditions and the following disclaimer.
10
+
11
+ 2. Redistributions in binary form must reproduce the above copyright notice,
12
+ this list of conditions and the following disclaimer in the documentation
13
+ and/or other materials provided with the distribution.
14
+
15
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
16
+ AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17
+ IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18
+ DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
19
+ FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20
+ DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
21
+ SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
22
+ CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
23
+ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
24
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
README.md CHANGED
@@ -1 +1,14 @@
1
- # biodiversity-justice
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: Biodiversity Justice
3
+ emoji: 📚
4
+ colorFrom: blue
5
+ colorTo: green
6
+ sdk: docker
7
+ pinned: false
8
+ license: bsd-2-clause
9
+ ---
10
+
11
+ # Demo Shiny App with Maplibre + open LLM interface
12
+
13
+ :hugs: Shiny App on Huggingface: <https://huggingface.co/spaces/boettiger-lab/geo-llm-r>
14
+
app.R ADDED
@@ -0,0 +1,278 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ library(shiny)
2
+ library(bslib)
3
+ library(htmltools)
4
+ library(markdown)
5
+ library(fontawesome)
6
+ library(bsicons)
7
+ library(gt)
8
+ library(glue)
9
+ library(ggplot2)
10
+
11
+ library(mapgl)
12
+ library(dplyr)
13
+ library(duckdbfs)
14
+
15
+ duckdbfs::load_spatial()
16
+
17
+ css <- HTML("<link rel='stylesheet' type='text/css' href='https://demos.creative-tim.com/material-dashboard/assets/css/material-dashboard.min.css?v=3.2.0'>")
18
+
19
+
20
+ # Define the UI
21
+ ui <- page_sidebar(
22
+ fillable = FALSE, # do not squeeze to vertical screen space
23
+ tags$head(css),
24
+ titlePanel("Demo App"),
25
+
26
+ "This is a proof-of-principle for a simple chat-driven interface to dynamically explore geospatial data.
27
+ ",
28
+
29
+
30
+ card(
31
+ layout_columns(
32
+ textInput("chat",
33
+ label = NULL,
34
+ "Which counties in California have the highest average social vulnerability?",
35
+ width = "100%"),
36
+ div(
37
+ actionButton("user_msg", "", icon = icon("paper-plane"),
38
+ class = "btn-primary btn-sm align-bottom"),
39
+ class = "align-text-bottom"),
40
+ col_widths = c(11, 1)),
41
+ fill = FALSE
42
+ ),
43
+ layout_columns(
44
+ card(maplibreOutput("map")),
45
+ card(includeMarkdown("## Plot"),
46
+ plotOutput("chart1"),
47
+ plotOutput("chart2"),
48
+ ),
49
+ col_widths = c(8, 4),
50
+ row_heights = c("600px"),
51
+ max_height = "700px"
52
+ ),
53
+
54
+
55
+ gt_output("table"),
56
+
57
+ card(fill = TRUE,
58
+ card_header(fa("robot")),
59
+
60
+ accordion(
61
+ open = FALSE,
62
+ accordion_panel(
63
+ title = "show sql",
64
+ icon = fa("terminal"),
65
+ verbatimTextOutput("sql_code"),
66
+ ),
67
+ accordion_panel(
68
+ title = "explain",
69
+ icon = fa("user", prefer_type="solid"),
70
+ textOutput("explanation"),
71
+ )
72
+ ),
73
+
74
+ card(
75
+ card_header("Errata"),
76
+ markdown(
77
+ "
78
+ #### Credits
79
+
80
+ Developed by Carl Boettiger, UC Berkeley, 2025. BSD License.
81
+
82
+ Data from the US Census and CDC's [Social Vulnerability Index](https://www.atsdr.cdc.gov/place-health/php/svi/index.html)
83
+
84
+ #### Technical details
85
+
86
+ The app is written entirely in R using shiny. The app will translate natural language queries in SQL code using
87
+ a small open-weights language model. The SQL code is executed using the duckdb backend against cloud-native
88
+ geoparquet snapshot of the Social Vulnerability Index hosted on Source Cooperative. Summary chart data are also
89
+ computed in duckdb by streaming, providing responsive updates while needing minimal RAM or disk storage despite
90
+ the large size of the data sources.
91
+
92
+ The map is rendered and updated using MapLibre with PMTiles, which provides responsive rendering for large feature sets.
93
+ The PMTiles layer is also hosted on Source cooperative where it can be streamed efficiently.
94
+ ")
95
+ )
96
+
97
+ ),
98
+
99
+ sidebar = sidebar(
100
+
101
+ input_switch("redlines", "Redlined Areas", value = FALSE),
102
+ input_switch("svi", "Social Vulnerability", value = TRUE),
103
+ input_switch("richness", "Biodiversity Richness", value = FALSE),
104
+ input_switch("rsr", "Biodiversity Range Size Rarity", value = FALSE),
105
+
106
+ card(
107
+ card_header(bs_icon("github"), "Source code:"),
108
+ a(href = "https://github.com/boettiger-lab/geo-llm-r",
109
+ "https://github.com/boettiger-lab/geo-llm-r"))
110
+ ),
111
+
112
+ theme = bs_theme(version = "5")
113
+ )
114
+
115
+
116
+
117
+
118
+ repo <- "https://data.source.coop/cboettig/social-vulnerability"
119
+ pmtiles <- glue("{repo}/svi2020_us_tract.pmtiles")
120
+ parquet <- glue("{repo}/svi2020_us_tract.parquet")
121
+ svi <- open_dataset(parquet, tblname = "svi") |>
122
+ filter(RPL_THEMES > 0)
123
+
124
+
125
+ con <- duckdbfs::cached_connection()
126
+ schema <- DBI::dbGetQuery(con, "PRAGMA table_info(svi)")
127
+
128
+ system_prompt = glue::glue('
129
+ You are a helpful agent who always replies strictly in JSON-formatted text.
130
+ Your task is to translate the users question into a SQL query that will be run
131
+ against the "svi" table in a duckdb database. The duckdb database has a
132
+ spatial extension which understands PostGIS operations as well.
133
+ Include semantically meaningful columns like COUNTY and STATE name.
134
+
135
+ In the data, each row represents an individual census tract. If asked for
136
+ county or state level statistics, be sure to aggregate across all the tracts
137
+ in that county or state.
138
+
139
+ The table schema is <schema>
140
+
141
+ The column called "RPL_THEMES" corresponds to the overall "Social vulnerability index" number.
142
+
143
+ Format your answer as follows:
144
+
145
+ {
146
+ "query": "your raw SQL response goes here",
147
+ "explanation": "your explanation of the query"
148
+ }
149
+ ', .open = "<", .close = ">")
150
+
151
+ chat <- ellmer::chat_vllm(
152
+ base_url = "https://llm.nrp-nautilus.io/",
153
+ model = "llama3",
154
+ api_key = Sys.getenv("NRP_API_KEY"),
155
+ system_prompt = system_prompt,
156
+ api_args = list(temperature = 0)
157
+ )
158
+
159
+ # helper utilities
160
+ # faster/more scalable to pass maplibre the ids to refilter pmtiles,
161
+ # than to pass it the full geospatial/sf object
162
+ filter_column <- function(full_data, filtered_data, id_col = "FIPS") {
163
+ if (nrow(filtered_data) < 1) return(NULL)
164
+ values <- full_data |>
165
+ inner_join(filtered_data, copy = TRUE) |>
166
+ pull(id_col)
167
+ # maplibre syntax for the filter of PMTiles
168
+ list("in", list("get", id_col), list("literal", values))
169
+ }
170
+
171
+ chart1_data <- svi |>
172
+ group_by(COUNTY) |>
173
+ summarise(mean_svi = mean(RPL_THEMES)) |>
174
+ collect()
175
+
176
+ chart1 <- chart1_data |>
177
+ ggplot(aes(mean_svi)) + geom_density(fill="darkred") +
178
+ ggtitle("County-level vulnerability nation-wide")
179
+
180
+
181
+ # Define the server
182
+ server <- function(input, output, session) {
183
+ data <- reactiveValues(df = tibble())
184
+ output$chart1 <- renderPlot(chart1)
185
+
186
+ observeEvent(input$user_msg, {
187
+ stream <- chat$chat(input$chat)
188
+
189
+ # optional, remember previous discussion
190
+ #chat_append("chat", stream)
191
+
192
+ # Parse response
193
+ response <- jsonlite::fromJSON(stream)
194
+ output$sql_code <- renderText(stringr::str_wrap(response$query, width = 60))
195
+ output$explanation <- renderText(response$explanation)
196
+
197
+ # Actually execute the SQL query generated:
198
+ df <- DBI::dbGetQuery(con, response$query)
199
+
200
+ # don't display shape column in render
201
+ df <- df |> select(-any_of("Shape"))
202
+ output$table <- render_gt(df, height = 300)
203
+
204
+
205
+ y_axis <- colnames(df)[!colnames(df) %in% colnames(svi)]
206
+ chart2 <- df |>
207
+ rename(social_vulnerability = y_axis) |>
208
+ ggplot(aes(social_vulnerability)) +
209
+ geom_density(fill = "darkred") +
210
+ xlim(c(0, 1)) +
211
+ ggtitle("Vulnerability of selected areas")
212
+
213
+ output$chart2 <- renderPlot(chart2)
214
+
215
+ # We need to somehow trigger this df to update the map.
216
+ data$df <- df
217
+
218
+ })
219
+
220
+
221
+
222
+ output$map <- renderMaplibre({
223
+
224
+ m <- maplibre(center = c(-92.9, 41.3), zoom = 3, height = "400")
225
+ if (input$redlines) {
226
+ m <- m |>
227
+ add_fill_layer(
228
+ id = "redlines",
229
+ source = list(type = "vector",
230
+ url = paste0("pmtiles://", "https://data.source.coop/cboettig/us-boundaries/mappinginequality.pmtiles")),
231
+ source_layer = "mappinginequality",
232
+ fill_color = list("get", "fill")
233
+ )
234
+ }
235
+ if (input$richness) {
236
+ m <- m |>
237
+ add_raster_source(id = "richness",
238
+ tiles = "https://data.source.coop/cboettig/mobi/tiles/red/species-richness-all/{z}/{x}/{y}.png",
239
+ maxzoom = 11
240
+ ) |>
241
+ add_raster_layer(id = "richness-layer",
242
+ source = "richness")
243
+
244
+ }
245
+
246
+ if (input$rsr) {
247
+ m <- m |>
248
+ add_raster_source(id = "rsr",
249
+ tiles = "https://data.source.coop/cboettig/mobi/tiles/green/range-size-rarity-all/{z}/{x}/{y}.png",
250
+ maxzoom = 11
251
+ ) |>
252
+ add_raster_layer(id = "richness-layer",
253
+ source = "rsr")
254
+
255
+ }
256
+ if (input$svi) {
257
+ m <- m |>
258
+ add_fill_layer(
259
+ id = "svi_layer",
260
+ source = list(type = "vector",
261
+ url = paste0("pmtiles://", pmtiles)),
262
+ source_layer = "SVI2000_US_tract",
263
+ filter = filter_column(svi, data$df, "FIPS"),
264
+ fill_opacity = 0.5,
265
+ fill_color = interpolate(column = "RPL_THEMES",
266
+ values = c(0, 1),
267
+ stops = c("lightpink", "darkred"),
268
+ na_color = "lightgrey")
269
+ )
270
+ }
271
+ m})
272
+
273
+
274
+
275
+ }
276
+
277
+ # Run the app
278
+ shinyApp(ui = ui, server = server)