{ "cells": [ { "cell_type": "markdown", "id": "707b1a14", "metadata": {}, "source": [ "Pre-process SVI Data from [CDC portal](https://www.atsdr.cdc.gov/place-health/php/svi/svi-data-documentation-download.html)\n", "\n", "- Tract data for United States from 2022, 2020, 2010, 2000. \n", "- Data documentation" ] }, { "cell_type": "code", "execution_count": 1, "id": "803df305", "metadata": {}, "outputs": [], "source": [ "import ibis\n", "from ibis import _\n", "import streamlit as st\n", "from utilities import generate_pmtiles\n", "\n", "con = ibis.duckdb.connect(\"duck.db\", extensions=['httpfs', 'spatial', 'h3'])\n" ] }, { "cell_type": "code", "execution_count": 13, "id": "7ac648e6", "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "781a57e6e9004c5b8b7ae644aea77dbe", "version_major": 2, "version_minor": 0 }, "text/plain": [ "FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "9de6547cfe7e4b32af6852eadf27e53e", "version_major": 2, "version_minor": 0 }, "text/plain": [ "FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stderr", "output_type": "stream", "text": [ "For layer 0, using name \"svi\"\n", "84120 features, 34922477 bytes of geometry, 5150225 bytes of string pool\n", "tile 1/0/0 size is 673414 with detail 12, >500000 \n", "Going to try keeping the sparsest 66.82% of the features to make it fit\n", "tile 1/0/0 size is 654918 with detail 12, >500000 \n", "Going to try keeping the sparsest 45.92% of the features to make it fit\n", "tile 1/0/0 size is 627082 with detail 12, >500000 \n", "Going to try keeping the sparsest 32.95% of the features to make it fit\n", "tile 1/0/0 size is 571221 with detail 12, >500000 \n", "Going to try keeping the sparsest 25.96% of the features to make it fit\n", "tile 1/0/0 size is 515026 with detail 12, >500000 \n", "Going to try keeping the sparsest 22.68% of the features to make it fit\n", "tile 2/0/1 size is 556184 with detail 12, >500000 \n", "Going to try keeping the sparsest 80.91% of the features to make it fit\n", "tile 2/1/1 size is 680483 with detail 12, >500000 \n", "Going to try keeping the sparsest 66.13% of the features to make it fit\n", "tile 2/0/1 size is 544973 with detail 12, >500000 \n", "Going to try keeping the sparsest 66.81% of the features to make it fit\n", "tile 2/1/1 size is 633636 with detail 12, >500000 \n", "Going to try keeping the sparsest 46.96% of the features to make it fit\n", "tile 2/0/1 size is 529976 with detail 12, >500000 \n", "Going to try keeping the sparsest 56.73% of the features to make it fit\n", "tile 2/1/1 size is 562278 with detail 12, >500000 \n", "Going to try keeping the sparsest 37.59% of the features to make it fit\n", "tile 2/0/1 size is 509845 with detail 12, >500000 \n", "Going to try keeping the sparsest 50.07% of the features to make it fit\n", "tile 3/1/3 size is 614365 with detail 12, >500000 \n", "Going to try keeping the sparsest 73.25% of the features to make it fit\n", "tile 3/2/3 size is 828844 with detail 12, >500000 \n", "Going to try keeping the sparsest 54.29% of the features to make it fit\n", "tile 3/1/3 size is 557346 with detail 12, >500000 \n", "Going to try keeping the sparsest 59.14% of the features to make it fit\n", "tile 3/2/3 size is 622365 with detail 12, >500000 \n", "Going to try keeping the sparsest 39.26% of the features to make it fit\n", "tile 3/1/3 size is 507698 with detail 12, >500000 \n", "Going to try keeping the sparsest 52.42% of the features to make it fit\n", "tile 4/4/5 size is 513228 with detail 12, >500000 \n", "Going to try keeping the sparsest 87.68% of the features to make it fit\n", "tile 4/3/6 size is 635333 with detail 12, >500000 \n", "Going to try keeping the sparsest 70.83% of the features to make it fit\n", "tile 4/3/6 size is 515357 with detail 12, >500000 \n", "Going to try keeping the sparsest 61.85% of the features to make it fit\n", "tile 4/4/6 size is 1080604 with detail 12, >500000 \n", "Going to try keeping the sparsest 41.64% of the features to make it fit\n", "tile 4/4/6 size is 614947 with detail 12, >500000 \n", "Going to try keeping the sparsest 30.47% of the features to make it fit\n", "tile 5/8/12 size is 784796 with detail 12, >500000 \n", "Going to try keeping the sparsest 57.34% of the features to make it fit\n", "tile 5/8/12 size is 540488 with detail 12, >500000 \n", "Going to try keeping the sparsest 47.74% of the features to make it fit\n", " 99.9% 12/973/1656 \n", " 100.0% 12/4092/1352 \r" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Successfully generated PMTiles file: svi-data/2022/SVI2022_US_tract.pmtiles\n" ] } ], "source": [ "expr = con.read_geo(\"svi-data/2022/SVI2022_US_tract.gdb\")\n", "expr.to_parquet(\"svi-data/2022/SVI2022_US_tract.parquet\")\n", "\n", "# tippecanoe requires geojson input to create PMTiles. Drop most additional variables in PMTiles creation.\n", "query = ibis.to_sql(expr.select('STATE', 'COUNTY', 'LOCATION', 'FIPS', 'RPL_THEMES', 'Shape'))\n", "con.raw_sql(f\"COPY ({query}) TO '/tmp/svi.json' WITH (FORMAT GDAL, DRIVER 'GeoJSON', LAYER_CREATION_OPTIONS 'WRITE_BBOX=YES');\")\n", "\n", "generate_pmtiles(\"/tmp/svi.json\", \"svi-data/2022/SVI2022_US_tract.pmtiles\")\n" ] }, { "cell_type": "code", "execution_count": 15, "id": "2e29cc6e", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import minio\n", "import re\n", "\n", "minio_key = st.secrets[\"MINIO_KEY\"]\n", "minio_secret = st.secrets[\"MINIO_SECRET\"]\n", "mc = minio.Minio(\"minio.carlboettiger.info\", minio_key, minio_secret)\n", "\n", "mc.fput_object(\"public-data\", \"social-vulnerability/2022/SVI2022_US_tract.pmtiles\", \"svi-data/2022/SVI2022_US_tract.pmtiles\")\n", "mc.fput_object(\"public-data\", \"social-vulnerability/2022/SVI2022_US_tract.parquet\", \"svi-data/2022/SVI2022_US_tract.parquet\")\n" ] }, { "cell_type": "code", "execution_count": 19, "id": "5fcd59bc-72a4-4de7-9cdb-1b6eca9407fb", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 19, "metadata": {}, "output_type": "execute_result" } ], "source": [ "\n", "\n", "\n", "# Local cloud\n", "minio_key = st.secrets[\"MINIO_KEY\"]\n", "minio_secret = st.secrets[\"MINIO_SECRET\"]\n", "query1 = f'''\n", "CREATE OR REPLACE SECRET secret1 (\n", " TYPE S3,\n", " KEY_ID '{minio_key}',\n", " SECRET '{minio_secret}',\n", " ENDPOINT 'minio.carlboettiger.info',\n", " URL_STYLE 'path',\n", " SCOPE \"s3://public-gbif\"\n", "\n", ");\n", "'''\n", "query2 = f'''\n", "CREATE OR REPLACE SECRET secret2 (\n", " TYPE S3,\n", " KEY_ID '{minio_key}',\n", " SECRET '{minio_secret}',\n", " ENDPOINT 'minio.carlboettiger.info',\n", " URL_STYLE 'path',\n", " SCOPE \"s3://public-data\"\n", "\n", ");\n", "'''\n", "# don't scope to a single bucket\n", "# SCOPE 's3://public-gbif'\n", "\n", "con.raw_sql(query1)\n", "con.raw_sql(query2)\n", "## Limits are sometimes good \n", "con.raw_sql(\"SET memory_limit = '20GB';\")\n", "con.raw_sql(\"set threads=40;\")\n", "\n", "# can/should we add explicit spatial index to gbif first? using RTree takes too much memory" ] }, { "cell_type": "code", "execution_count": 20, "id": "dcf50375-75ee-4208-87b2-6ffef6361742", "metadata": {}, "outputs": [], "source": [ "overture = (\n", " con.read_parquet('s3://overturemaps-us-west-2/release/2024-11-13.0/theme=divisions/type=division_area/*', \n", " filename=True, hive_partitioning=1))\n", "usa = overture.filter(_.subtype==\"country\").filter(_.country == \"US\").select(_.geometry).execute()" ] }, { "cell_type": "code", "execution_count": 21, "id": "ce86081b-a46f-426b-9432-9bce588156ee", "metadata": {}, "outputs": [], "source": [ "\n", "gbif = con.read_parquet(\"s3://public-gbif/2024-10-01/**\")\n", "svi = con.read_parquet(\"s3://public-data/social-vulnerability/2022/SVI2022_US_tract.parquet\").rename(geom = \"Shape\")\n" ] }, { "cell_type": "markdown", "id": "3891abb6-3652-4217-8615-106d354ff131", "metadata": {}, "source": [ "We iterate through the city list to do this efficiently. (Should we filter gbif down to US boundary as a one-off first? We will assume it is efficient to filter the full globe state by state)" ] }, { "cell_type": "code", "execution_count": 23, "id": "69bf6dc6-4a13-4830-8c1a-87bb5899eb32", "metadata": {}, "outputs": [], "source": [ "all_states = svi.select(_.ST_ABBR).distinct().order_by(_.ST_ABBR).execute()[\"ST_ABBR\"]\n", "#all_states" ] }, { "cell_type": "code", "execution_count": 26, "id": "32a2b4c1-e08b-4fbb-b891-ac19053a4585", "metadata": {}, "outputs": [], "source": [ "## select from the list we haven't yet written (allows resume).\n", "import minio\n", "import re\n", "\n", "minio_key = st.secrets[\"MINIO_KEY\"]\n", "minio_secret = st.secrets[\"MINIO_SECRET\"]\n", "mc = minio.Minio(\"minio.carlboettiger.info\", minio_key, minio_secret)\n", "obj = mc.list_objects(\"public-gbif\", \"social-vulnerability\", recursive=True)\n", "pattern = r\"social-vulnerability/|\\.parquet$\"\n", "finished = [re.sub(pattern, \"\", i.object_name) for i in obj if not i.is_dir]\n", "remaining = set(all_states) - set(finished)" ] }, { "cell_type": "code", "execution_count": 27, "id": "4ecc58a3", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{'AK',\n", " 'AL',\n", " 'AR',\n", " 'AZ',\n", " 'CA',\n", " 'CO',\n", " 'CT',\n", " 'DC',\n", " 'DE',\n", " 'FL',\n", " 'GA',\n", " 'HI',\n", " 'IA',\n", " 'ID',\n", " 'IL',\n", " 'IN',\n", " 'KS',\n", " 'KY',\n", " 'LA',\n", " 'MA',\n", " 'MD',\n", " 'ME',\n", " 'MI',\n", " 'MN',\n", " 'MO',\n", " 'MS',\n", " 'MT',\n", " 'NC',\n", " 'ND',\n", " 'NE',\n", " 'NH',\n", " 'NJ',\n", " 'NM',\n", " 'NV',\n", " 'NY',\n", " 'OH',\n", " 'OK',\n", " 'OR',\n", " 'PA',\n", " 'RI',\n", " 'SC',\n", " 'SD',\n", " 'TN',\n", " 'TX',\n", " 'UT',\n", " 'VA',\n", " 'VT',\n", " 'WA',\n", " 'WI',\n", " 'WV',\n", " 'WY'}" ] }, "execution_count": 27, "metadata": {}, "output_type": "execute_result" } ], "source": [ "remaining" ] }, { "cell_type": "code", "execution_count": null, "id": "c3a4005c-1e8c-4f2a-a93c-1c158c9c26ab", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "NV/Eureka County\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "6fd7b6e6fe1a4e3b9c8d476e0e757644", "version_major": 2, "version_minor": 0 }, "text/plain": [ "FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "NV/Lander County\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "80f08f9cc267481996667dd1e383a3fb", "version_major": 2, "version_minor": 0 }, "text/plain": [ "FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "NV/Clark County\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "872f806c983d4804b399881eac7d3bd9", "version_major": 2, "version_minor": 0 }, "text/plain": [ "FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "NV/Storey County\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "9927ce307cc8413f973891b388c89288", "version_major": 2, "version_minor": 0 }, "text/plain": [ "FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "NV/Churchill County\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "a5d573d2766c41fbbfde5af9ed19ab76", "version_major": 2, "version_minor": 0 }, "text/plain": [ "FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "NV/Esmeralda County\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "bda8ee87bd1340f1b666c61b5d6bc716", "version_major": 2, "version_minor": 0 }, "text/plain": [ "FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "NV/Lyon County\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "460041e4c2a745d5b0ff6dff64f26343", "version_major": 2, "version_minor": 0 }, "text/plain": [ "FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "NV/Nye County\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "7690f211b2e2418e876898369d3b04ef", "version_major": 2, "version_minor": 0 }, "text/plain": [ "FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "NV/Douglas County\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "fb3235545ac04933ae82d95b5783657e", "version_major": 2, "version_minor": 0 }, "text/plain": [ "FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "NV/Elko County\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "9c72bf43115f4c65b9b7bdfb44c2fc67", "version_major": 2, "version_minor": 0 }, "text/plain": [ "FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "NV/Pershing County\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "2e7d1ff419fe4bf4ae07c1d4288d0259", "version_major": 2, "version_minor": 0 }, "text/plain": [ "FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "NV/Washoe County\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "c57a17eaf70941a9968ad2db89e2c98d", "version_major": 2, "version_minor": 0 }, "text/plain": [ "FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "NV/Humboldt County\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "958d00b68de94acaa96d63871daa356b", "version_major": 2, "version_minor": 0 }, "text/plain": [ "FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "NV/Carson City\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "a162c535a70f4dc89b7e708f2fc8633b", "version_major": 2, "version_minor": 0 }, "text/plain": [ "FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "NV/Lincoln County\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "e4ca31b6f6b64667bc8c1e5dfac7ab03", "version_major": 2, "version_minor": 0 }, "text/plain": [ "FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "NV/White Pine County\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "b89cd3441e714d5983daffa6c93cc0ee", "version_major": 2, "version_minor": 0 }, "text/plain": [ "FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "NV/Mineral County\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "c05bb3bbd3434373a068a489410beb4a", "version_major": 2, "version_minor": 0 }, "text/plain": [ "FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "NE/Blaine County\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "f88979e7186546f3be239f901393ab8a", "version_major": 2, "version_minor": 0 }, "text/plain": [ "FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "NE/Butler County\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "c4bfcd9ba81944b29907ccf3c6d3783e", "version_major": 2, "version_minor": 0 }, "text/plain": [ "FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "NE/Custer County\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "6cd52ae08b7b4dea931ff2ffa5d6c7f6", "version_major": 2, "version_minor": 0 }, "text/plain": [ "FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "NE/Dakota County\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "b3a2e7411b69407ba76c06b0d083a961", "version_major": 2, "version_minor": 0 }, "text/plain": [ "FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "NE/Kearney County\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "e0ccf52489a3467ba172afef3e36f2f0", "version_major": 2, "version_minor": 0 }, "text/plain": [ "FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "NE/Keith County\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "e6f8cd9d59284e7aaa9eabe117a69079", "version_major": 2, "version_minor": 0 }, "text/plain": [ "FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "## And here we go, long-running loop over each city\n", "for i in remaining:\n", " counties = svi.filter(_.ST_ABBR == i).select(_.COUNTY).distinct().execute()[\"COUNTY\"].to_numpy()\n", " for county in counties:\n", " gdf = (svi\n", " .filter(_.ST_ABBR == i, _.COUNTY== county)\n", " .mutate(area = _.geom.area())\n", " )\n", "\n", " print(i + \"/\" + county)\n", " \n", " bounds = gdf.execute().total_bounds\n", " points = (gbif\n", " .filter(_.decimallongitude >= bounds[0], \n", " _.decimallongitude < bounds[2], \n", " _.decimallatitude >= bounds[1], \n", " _.decimallatitude < bounds[3])\n", " )\n", " \n", " (gdf\n", " .join(points, gdf.geom.intersects(points.geom))\n", " .to_parquet(f\"s3://public-gbif/social-vulnerability/state={i}/{county}.parquet\")\n", " )\n" ] }, { "cell_type": "markdown", "id": "050a358f-e2de-49bd-a80d-4f8c47e36bab", "metadata": {}, "source": [ "gbif_usa = con.read_parquet(\"s3://cboettig/gbif/svi/**\")\n" ] }, { "cell_type": "code", "execution_count": 43, "id": "9bd1299b-af6b-4d85-97fb-ba83a5c26c70", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
DatabaseTable: ibis_read_parquet_msislo4d7fcgdfh2pyoxvxjkdu\n",
       "  OBJECTID                         int64\n",
       "  ST                               string\n",
       "  STATE                            string\n",
       "  ST_ABBR                          string\n",
       "  STCNTY                           string\n",
       "  COUNTY                           string\n",
       "  FIPS                             string\n",
       "  LOCATION                         string\n",
       "  AREA_SQMI                        float64\n",
       "  E_TOTPOP                         int32\n",
       "  M_TOTPOP                         int32\n",
       "  E_HU                             int32\n",
       "  M_HU                             int32\n",
       "  E_HH                             int32\n",
       "  M_HH                             int32\n",
       "  E_POV150                         int32\n",
       "  M_POV150                         int32\n",
       "  E_UNEMP                          int32\n",
       "  M_UNEMP                          int32\n",
       "  E_HBURD                          int32\n",
       "  M_HBURD                          int32\n",
       "  E_NOHSDP                         int32\n",
       "  M_NOHSDP                         int32\n",
       "  E_UNINSUR                        int32\n",
       "  M_UNINSUR                        int32\n",
       "  E_AGE65                          int32\n",
       "  M_AGE65                          int32\n",
       "  E_AGE17                          int32\n",
       "  M_AGE17                          int32\n",
       "  E_DISABL                         int32\n",
       "  M_DISABL                         int32\n",
       "  E_SNGPNT                         int32\n",
       "  M_SNGPNT                         int32\n",
       "  E_LIMENG                         int32\n",
       "  M_LIMENG                         int32\n",
       "  E_MINRTY                         int32\n",
       "  M_MINRTY                         int32\n",
       "  E_MUNIT                          int32\n",
       "  M_MUNIT                          int32\n",
       "  E_MOBILE                         int32\n",
       "  M_MOBILE                         int32\n",
       "  E_CROWD                          int32\n",
       "  M_CROWD                          int32\n",
       "  E_NOVEH                          int32\n",
       "  M_NOVEH                          int32\n",
       "  E_GROUPQ                         int32\n",
       "  M_GROUPQ                         int32\n",
       "  EP_POV150                        float64\n",
       "  MP_POV150                        float64\n",
       "  EP_UNEMP                         float64\n",
       "  MP_UNEMP                         float64\n",
       "  EP_HBURD                         float64\n",
       "  MP_HBURD                         float64\n",
       "  EP_NOHSDP                        float64\n",
       "  MP_NOHSDP                        float64\n",
       "  EP_UNINSUR                       float64\n",
       "  MP_UNINSUR                       float64\n",
       "  EP_AGE65                         float64\n",
       "  MP_AGE65                         float64\n",
       "  EP_AGE17                         float64\n",
       "  MP_AGE17                         float64\n",
       "  EP_DISABL                        float64\n",
       "  MP_DISABL                        float64\n",
       "  EP_SNGPNT                        float64\n",
       "  MP_SNGPNT                        float64\n",
       "  EP_LIMENG                        float64\n",
       "  MP_LIMENG                        float64\n",
       "  EP_MINRTY                        float64\n",
       "  MP_MINRTY                        float64\n",
       "  EP_MUNIT                         float64\n",
       "  MP_MUNIT                         float64\n",
       "  EP_MOBILE                        float64\n",
       "  MP_MOBILE                        float64\n",
       "  EP_CROWD                         float64\n",
       "  MP_CROWD                         float64\n",
       "  EP_NOVEH                         float64\n",
       "  MP_NOVEH                         float64\n",
       "  EP_GROUPQ                        float64\n",
       "  MP_GROUPQ                        float64\n",
       "  EPL_POV150                       float64\n",
       "  EPL_UNEMP                        float64\n",
       "  EPL_HBURD                        float64\n",
       "  EPL_NOHSDP                       float64\n",
       "  EPL_UNINSUR                      float64\n",
       "  SPL_THEME1                       float64\n",
       "  RPL_THEME1                       float64\n",
       "  EPL_AGE65                        float64\n",
       "  EPL_AGE17                        float64\n",
       "  EPL_DISABL                       float64\n",
       "  EPL_SNGPNT                       float64\n",
       "  EPL_LIMENG                       float64\n",
       "  SPL_THEME2                       float64\n",
       "  RPL_THEME2                       float64\n",
       "  EPL_MINRTY                       float64\n",
       "  SPL_THEME3                       float64\n",
       "  RPL_THEME3                       float64\n",
       "  EPL_MUNIT                        float64\n",
       "  EPL_MOBILE                       float64\n",
       "  EPL_CROWD                        float64\n",
       "  EPL_NOVEH                        float64\n",
       "  EPL_GROUPQ                       float64\n",
       "  SPL_THEME4                       float64\n",
       "  RPL_THEME4                       float64\n",
       "  SPL_THEMES                       float64\n",
       "  RPL_THEMES                       float64\n",
       "  F_POV150                         int16\n",
       "  F_UNEMP                          int16\n",
       "  F_HBURD                          int16\n",
       "  F_NOHSDP                         int16\n",
       "  F_UNINSUR                        int16\n",
       "  F_THEME1                         int16\n",
       "  F_AGE65                          int16\n",
       "  F_AGE17                          int16\n",
       "  F_DISABL                         int16\n",
       "  F_SNGPNT                         int16\n",
       "  F_LIMENG                         int16\n",
       "  F_THEME2                         int16\n",
       "  F_MINRTY                         int16\n",
       "  F_THEME3                         int16\n",
       "  F_MUNIT                          int16\n",
       "  F_MOBILE                         int16\n",
       "  F_CROWD                          int16\n",
       "  F_NOVEH                          int16\n",
       "  F_GROUPQ                         int16\n",
       "  F_THEME4                         int16\n",
       "  F_TOTAL                          int16\n",
       "  E_DAYPOP                         int32\n",
       "  E_NOINT                          int32\n",
       "  M_NOINT                          int32\n",
       "  E_AFAM                           int32\n",
       "  M_AFAM                           int32\n",
       "  E_HISP                           int32\n",
       "  M_HISP                           int32\n",
       "  E_ASIAN                          int32\n",
       "  M_ASIAN                          int32\n",
       "  E_AIAN                           int32\n",
       "  M_AIAN                           int32\n",
       "  E_NHPI                           int32\n",
       "  M_NHPI                           int32\n",
       "  E_TWOMORE                        int32\n",
       "  M_TWOMORE                        int32\n",
       "  E_OTHERRACE                      int32\n",
       "  M_OTHERRACE                      int32\n",
       "  EP_NOINT                         float64\n",
       "  MP_NOINT                         float64\n",
       "  EP_AFAM                          float64\n",
       "  MP_AFAM                          float64\n",
       "  EP_HISP                          float64\n",
       "  MP_HISP                          float64\n",
       "  EP_ASIAN                         float64\n",
       "  MP_ASIAN                         float64\n",
       "  EP_AIAN                          float64\n",
       "  MP_AIAN                          float64\n",
       "  EP_NHPI                          float64\n",
       "  MP_NHPI                          float64\n",
       "  EP_TWOMORE                       float64\n",
       "  MP_TWOMORE                       float64\n",
       "  EP_OTHERRACE                     float64\n",
       "  MP_OTHERRACE                     float64\n",
       "  Shape_Length                     float64\n",
       "  Shape_Area                       float64\n",
       "  geom                             geospatial:geometry\n",
       "  area                             float64\n",
       "  gbifid                           string\n",
       "  datasetkey                       string\n",
       "  occurrenceid                     string\n",
       "  kingdom                          string\n",
       "  phylum                           string\n",
       "  class                            string\n",
       "  order                            string\n",
       "  family                           string\n",
       "  genus                            string\n",
       "  species                          string\n",
       "  infraspecificepithet             string\n",
       "  taxonrank                        string\n",
       "  scientificname                   string\n",
       "  verbatimscientificname           string\n",
       "  verbatimscientificnameauthorship string\n",
       "  countrycode                      string\n",
       "  locality                         string\n",
       "  stateprovince                    string\n",
       "  occurrencestatus                 string\n",
       "  individualcount                  int32\n",
       "  publishingorgkey                 string\n",
       "  decimallatitude                  float64\n",
       "  decimallongitude                 float64\n",
       "  coordinateuncertaintyinmeters    float64\n",
       "  coordinateprecision              float64\n",
       "  elevation                        float64\n",
       "  elevationaccuracy                float64\n",
       "  depth                            float64\n",
       "  depthaccuracy                    float64\n",
       "  eventdate                        timestamp(6)\n",
       "  day                              int32\n",
       "  month                            int32\n",
       "  year                             int32\n",
       "  taxonkey                         int32\n",
       "  specieskey                       int32\n",
       "  basisofrecord                    string\n",
       "  institutioncode                  string\n",
       "  collectioncode                   string\n",
       "  catalognumber                    string\n",
       "  recordnumber                     string\n",
       "  identifiedby                     array<string>\n",
       "  dateidentified                   timestamp(6)\n",
       "  license                          string\n",
       "  rightsholder                     string\n",
       "  recordedby                       array<string>\n",
       "  typestatus                       array<string>\n",
       "  establishmentmeans               string\n",
       "  lastinterpreted                  timestamp(6)\n",
       "  mediatype                        array<string>\n",
       "  issue                            array<string>\n",
       "  geom_right                       geospatial:geometry\n",
       "  h0                               string\n",
       "  h1                               string\n",
       "  h2                               string\n",
       "  h3                               string\n",
       "  h4                               string\n",
       "  h5                               string\n",
       "  h6                               string\n",
       "  h7                               string\n",
       "  h8                               string\n",
       "  h9                               string\n",
       "  h10                              string\n",
       "  h11                              string\n",
       "
\n" ], "text/plain": [ "DatabaseTable: ibis_read_parquet_msislo4d7fcgdfh2pyoxvxjkdu\n", " OBJECTID int64\n", " ST string\n", " STATE string\n", " ST_ABBR string\n", " STCNTY string\n", " COUNTY string\n", " FIPS string\n", " LOCATION string\n", " AREA_SQMI float64\n", " E_TOTPOP int32\n", " M_TOTPOP int32\n", " E_HU int32\n", " M_HU int32\n", " E_HH int32\n", " M_HH int32\n", " E_POV150 int32\n", " M_POV150 int32\n", " E_UNEMP int32\n", " M_UNEMP int32\n", " E_HBURD int32\n", " M_HBURD int32\n", " E_NOHSDP int32\n", " M_NOHSDP int32\n", " E_UNINSUR int32\n", " M_UNINSUR int32\n", " E_AGE65 int32\n", " M_AGE65 int32\n", " E_AGE17 int32\n", " M_AGE17 int32\n", " E_DISABL int32\n", " M_DISABL int32\n", " E_SNGPNT int32\n", " M_SNGPNT int32\n", " E_LIMENG int32\n", " M_LIMENG int32\n", " E_MINRTY int32\n", " M_MINRTY int32\n", " E_MUNIT int32\n", " M_MUNIT int32\n", " E_MOBILE int32\n", " M_MOBILE int32\n", " E_CROWD int32\n", " M_CROWD int32\n", " E_NOVEH int32\n", " M_NOVEH int32\n", " E_GROUPQ int32\n", " M_GROUPQ int32\n", " EP_POV150 float64\n", " MP_POV150 float64\n", " EP_UNEMP float64\n", " MP_UNEMP float64\n", " EP_HBURD float64\n", " MP_HBURD float64\n", " EP_NOHSDP float64\n", " MP_NOHSDP float64\n", " EP_UNINSUR float64\n", " MP_UNINSUR float64\n", " EP_AGE65 float64\n", " MP_AGE65 float64\n", " EP_AGE17 float64\n", " MP_AGE17 float64\n", " EP_DISABL float64\n", " MP_DISABL float64\n", " EP_SNGPNT float64\n", " MP_SNGPNT float64\n", " EP_LIMENG float64\n", " MP_LIMENG float64\n", " EP_MINRTY float64\n", " MP_MINRTY float64\n", " EP_MUNIT float64\n", " MP_MUNIT float64\n", " EP_MOBILE float64\n", " MP_MOBILE float64\n", " EP_CROWD float64\n", " MP_CROWD float64\n", " EP_NOVEH float64\n", " MP_NOVEH float64\n", " EP_GROUPQ float64\n", " MP_GROUPQ float64\n", " EPL_POV150 float64\n", " EPL_UNEMP float64\n", " EPL_HBURD float64\n", " EPL_NOHSDP float64\n", " EPL_UNINSUR float64\n", " SPL_THEME1 float64\n", " RPL_THEME1 float64\n", " EPL_AGE65 float64\n", " EPL_AGE17 float64\n", " EPL_DISABL float64\n", " EPL_SNGPNT float64\n", " EPL_LIMENG float64\n", " SPL_THEME2 float64\n", " RPL_THEME2 float64\n", " EPL_MINRTY float64\n", " SPL_THEME3 float64\n", " RPL_THEME3 float64\n", " EPL_MUNIT float64\n", " EPL_MOBILE float64\n", " EPL_CROWD float64\n", " EPL_NOVEH float64\n", " EPL_GROUPQ float64\n", " SPL_THEME4 float64\n", " RPL_THEME4 float64\n", " SPL_THEMES float64\n", " RPL_THEMES float64\n", " F_POV150 int16\n", " F_UNEMP int16\n", " F_HBURD int16\n", " F_NOHSDP int16\n", " F_UNINSUR int16\n", " F_THEME1 int16\n", " F_AGE65 int16\n", " F_AGE17 int16\n", " F_DISABL int16\n", " F_SNGPNT int16\n", " F_LIMENG int16\n", " F_THEME2 int16\n", " F_MINRTY int16\n", " F_THEME3 int16\n", " F_MUNIT int16\n", " F_MOBILE int16\n", " F_CROWD int16\n", " F_NOVEH int16\n", " F_GROUPQ int16\n", " F_THEME4 int16\n", " F_TOTAL int16\n", " E_DAYPOP int32\n", " E_NOINT int32\n", " M_NOINT int32\n", " E_AFAM int32\n", " M_AFAM int32\n", " E_HISP int32\n", " M_HISP int32\n", " E_ASIAN int32\n", " M_ASIAN int32\n", " E_AIAN int32\n", " M_AIAN int32\n", " E_NHPI int32\n", " M_NHPI int32\n", " E_TWOMORE int32\n", " M_TWOMORE int32\n", " E_OTHERRACE int32\n", " M_OTHERRACE int32\n", " EP_NOINT float64\n", " MP_NOINT float64\n", " EP_AFAM float64\n", " MP_AFAM float64\n", " EP_HISP float64\n", " MP_HISP float64\n", " EP_ASIAN float64\n", " MP_ASIAN float64\n", " EP_AIAN float64\n", " MP_AIAN float64\n", " EP_NHPI float64\n", " MP_NHPI float64\n", " EP_TWOMORE float64\n", " MP_TWOMORE float64\n", " EP_OTHERRACE float64\n", " MP_OTHERRACE float64\n", " Shape_Length float64\n", " Shape_Area float64\n", " geom geospatial:geometry\n", " area float64\n", " gbifid string\n", " datasetkey string\n", " occurrenceid string\n", " kingdom string\n", " phylum string\n", " class string\n", " order string\n", " family string\n", " genus string\n", " species string\n", " infraspecificepithet string\n", " taxonrank string\n", " scientificname string\n", " verbatimscientificname string\n", " verbatimscientificnameauthorship string\n", " countrycode string\n", " locality string\n", " stateprovince string\n", " occurrencestatus string\n", " individualcount int32\n", " publishingorgkey string\n", " decimallatitude float64\n", " decimallongitude float64\n", " coordinateuncertaintyinmeters float64\n", " coordinateprecision float64\n", " elevation float64\n", " elevationaccuracy float64\n", " depth float64\n", " depthaccuracy float64\n", " eventdate timestamp(6)\n", " day int32\n", " month int32\n", " year int32\n", " taxonkey int32\n", " specieskey int32\n", " basisofrecord string\n", " institutioncode string\n", " collectioncode string\n", " catalognumber string\n", " recordnumber string\n", " identifiedby array\n", " dateidentified timestamp(6)\n", " license string\n", " rightsholder string\n", " recordedby array\n", " typestatus array\n", " establishmentmeans string\n", " lastinterpreted timestamp(6)\n", " mediatype array\n", " issue array\n", " geom_right geospatial:geometry\n", " h0 string\n", " h1 string\n", " h2 string\n", " h3 string\n", " h4 string\n", " h5 string\n", " h6 string\n", " h7 string\n", " h8 string\n", " h9 string\n", " h10 string\n", " h11 string" ] }, "execution_count": 43, "metadata": {}, "output_type": "execute_result" } ], "source": [ "gbif_usa" ] }, { "cell_type": "code", "execution_count": null, "id": "a6ce4d65-6f93-4725-87fa-29bf413398ad", "metadata": {}, "outputs": [], "source": [ "The four summary theme ranking variables, detailed in the Data Dictionary below, are:\n", "• Socioeconomic Status - RPL_THEME1\n", "• Household Characteristics - RPL_THEME2\n", "• Racial & Ethnic Minority Status - RPL_THEME3\n", "• Housing Type & Transportation - RPL_THEME4 " ] }, { "cell_type": "code", "execution_count": null, "id": "d2e85529-348b-4f33-b09d-f8424299dc8d", "metadata": {}, "outputs": [], "source": [ "import seaborn.objects as so\n", "\n", "#df = gbif_usa.group_by(_.FIPS).agg(n = _.count().log(), svi = _.RPL_THEMES.mean()).execute()\n", "df = gbif_usa.group_by(_.STATE, _.COUNTY).agg(n = _.count() / _.Shape_Area.sum(), svi1 = _.RPL_THEME1.mean(), svi3 = _.RPL_THEME3.mean()).execute()\n", "\n", "so.Plot(df, x = \"svi1\", y=\"n\", color = \"svi3\").add(so.Dots()).scale(y=\"log\")" ] }, { "cell_type": "code", "execution_count": null, "id": "9030d3dc-e2fb-41b7-8fe9-80ee76739b78", "metadata": {}, "outputs": [], "source": [ "import altair as alt\n", "\n", "alt.Chart(df).mark_point().encode(\n", " x='svi1',\n", " y='n',\n", " color='svi3',\n", " tooltip = ['STATE', 'COUNTY']\n", ")\n" ] } ], "metadata": { "kernelspec": { "display_name": "base", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.12.8" } }, "nbformat": 4, "nbformat_minor": 5 }