cassiebuhler commited on
Commit
c67e57e
1 Parent(s): e338567

adding municipal data

Browse files
Files changed (3) hide show
  1. preprocess.ipynb +100 -0
  2. requirements.txt +2 -0
  3. static-maps.ipynb +84 -23
preprocess.ipynb ADDED
@@ -0,0 +1,100 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": null,
6
+ "id": "3e5756d2-382b-49e9-93b5-2ecf6d0eb812",
7
+ "metadata": {},
8
+ "outputs": [],
9
+ "source": [
10
+ "import duckdb\n",
11
+ "\n",
12
+ "con = duckdb.connect()\n",
13
+ "\n",
14
+ "con.execute(\"SET s3_region='us-west-2';\")\n",
15
+ "con.execute(\"LOAD spatial;\")\n",
16
+ "con.execute(\"LOAD httpfs;\")\n",
17
+ "\n",
18
+ "query = \"\"\"\n",
19
+ " COPY (\n",
20
+ " SELECT * \n",
21
+ " FROM read_parquet('s3://overturemaps-us-west-2/release/2024-09-18.0/theme=divisions/*/*')\n",
22
+ " WHERE country = 'US' AND subtype IN ('locality', 'neighborhood')\n",
23
+ " ) TO 'us_localities_neighborhoods.parquet' (FORMAT 'parquet');\n",
24
+ "\"\"\"\n",
25
+ "con.execute(query)\n",
26
+ "\n"
27
+ ]
28
+ },
29
+ {
30
+ "cell_type": "code",
31
+ "execution_count": null,
32
+ "id": "25f62dd7-5539-438b-8f0a-1d85c9bc78ab",
33
+ "metadata": {},
34
+ "outputs": [],
35
+ "source": [
36
+ "import ibis\n",
37
+ "from ibis import _\n",
38
+ "\n",
39
+ "conn = ibis.duckdb.connect(extensions=[\"spatial\"])\n",
40
+ "\n",
41
+ "df = (conn\n",
42
+ " .read_parquet(\"us_localities_neighborhoods.parquet\")\n",
43
+ " .cast({\"geometry\": \"geometry\"})\n",
44
+ " .filter(_[\"type\"] == \"division\")\n",
45
+ " .filter(_[\"subtype\"] == \"locality\")\n",
46
+ " .mutate(name = _.names[\"primary\"])\n",
47
+ " .mutate(state_id = _.region.replace(\"US-\", \"\")) \n",
48
+ " .mutate(county = _.hierarchies[0][2]['name'] )\n",
49
+ " .mutate(key_long = _.name + ibis.literal('-') + _.county + ibis.literal('-') + _.state_id)\n",
50
+ " .select(\"key_long\",\"name\", \"county\",\"state_id\" ,\"geometry\")\n",
51
+ " )\n",
52
+ "\n",
53
+ "\n",
54
+ "## Dropping rows with same locality and state, with differing counties \n",
55
+ "county_count = (\n",
56
+ " df.group_by([\"name\", \"state_id\"])\n",
57
+ " .aggregate(county_count=_.county.nunique()) # Count unique counties for each group\n",
58
+ ") \n",
59
+ "valid_names = county_count.filter(county_count.county_count == 1).select(\"name\", \"state_id\")\n",
60
+ "df_filtered = df.join(valid_names, [\"name\", \"state_id\"], how=\"inner\")\n",
61
+ "\n",
62
+ "\n",
63
+ "# if two records have the same name but different geometries, only keep the first one.\n",
64
+ "df_first = (\n",
65
+ " df_filtered.group_by(\"key_long\")\n",
66
+ " .aggregate(\n",
67
+ " name=df_filtered.name.first(),\n",
68
+ " county=df_filtered.county.first(),\n",
69
+ " state_id=df_filtered.state_id.first(),\n",
70
+ " geometry=df_filtered.geometry.first()\n",
71
+ " )\n",
72
+ "\n",
73
+ ")\n",
74
+ "\n",
75
+ "df_first.execute().to_parquet(\"us_localities.parquet\")\n"
76
+ ]
77
+ }
78
+ ],
79
+ "metadata": {
80
+ "kernelspec": {
81
+ "display_name": "Python 3 (ipykernel)",
82
+ "language": "python",
83
+ "name": "python3"
84
+ },
85
+ "language_info": {
86
+ "codemirror_mode": {
87
+ "name": "ipython",
88
+ "version": 3
89
+ },
90
+ "file_extension": ".py",
91
+ "mimetype": "text/x-python",
92
+ "name": "python",
93
+ "nbconvert_exporter": "python",
94
+ "pygments_lexer": "ipython3",
95
+ "version": "3.10.12"
96
+ }
97
+ },
98
+ "nbformat": 4,
99
+ "nbformat_minor": 5
100
+ }
requirements.txt CHANGED
@@ -157,3 +157,5 @@ whiteboxgui==2.3.0
157
  widgetsnbextension==4.0.13
158
  xyzservices==2024.9.0
159
  yarl==1.9.7
 
 
 
157
  widgetsnbextension==4.0.13
158
  xyzservices==2024.9.0
159
  yarl==1.9.7
160
+ overturemaps
161
+ tippecanoe
static-maps.ipynb CHANGED
@@ -18,27 +18,79 @@
18
  "states = conn.read_parquet(state_boundaries).rename(state_id = \"STUSPS\", state = \"NAME\")\n",
19
  "county = conn.read_parquet(county_boundaries).rename(county = \"NAMELSAD\", state = \"STATE_NAME\")\n",
20
  "\n",
21
- "votes = conn.read_csv(\"landvote.csv\")\n",
22
- "votes.count().execute()\n",
23
- "\n",
24
- "vote = (votes\n",
25
- " .filter(_[\"Jurisdiction Type\"] == \"County\")\n",
26
- " .rename(county = \"Jurisdiction Name\", state_id = \"State\")\n",
27
- " .mutate(key = _.county + ibis.literal('-') + _.state_id)\n",
28
- " .rename(amount = 'Conservation Funds at Stake', yes = '% Yes')\n",
29
- " .mutate(amount_n=_.amount.replace('$', '').replace(',', '').cast('float'))\n",
30
- " .mutate(log_amount=_.amount_n.log())\n",
31
- " .mutate(year=_['Date'].year().cast('int32'))\n",
32
- " .select('key', 'Status', 'yes', 'year', 'amount', 'log_amount', )\n",
33
- " )\n",
34
- "df = (county\n",
35
- " .join(states.select(\"state\", \"state_id\"), \"state\")\n",
36
- " .mutate(key = _.county + ibis.literal('-') + _.state_id)\n",
37
- " .select('key', 'geometry')\n",
38
- " .right_join(vote, \"key\")\n",
39
- " .drop('key_right')\n",
40
- " )\n",
41
- "\n"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
42
  ]
43
  },
44
  {
@@ -54,7 +106,8 @@
54
  "import streamlit as st\n",
55
  "\n",
56
  "login(st.secrets[\"HF_TOKEN\"])\n",
57
- "api = HfApi(add_to_git_credential=False)\n",
 
58
  "\n",
59
  "def hf_upload(file, repo_id):\n",
60
  " info = api.upload_file(\n",
@@ -163,6 +216,14 @@
163
  "#m.add_gdf(gdf, \"fill-extrusion\", paint = paint)\n",
164
  "#m"
165
  ]
 
 
 
 
 
 
 
 
166
  }
167
  ],
168
  "metadata": {
@@ -181,7 +242,7 @@
181
  "name": "python",
182
  "nbconvert_exporter": "python",
183
  "pygments_lexer": "ipython3",
184
- "version": "3.11.10"
185
  }
186
  },
187
  "nbformat": 4,
 
18
  "states = conn.read_parquet(state_boundaries).rename(state_id = \"STUSPS\", state = \"NAME\")\n",
19
  "county = conn.read_parquet(county_boundaries).rename(county = \"NAMELSAD\", state = \"STATE_NAME\")\n",
20
  "\n",
21
+ "localities_boundaries = \"us_localities.parquet\"\n",
22
+ "locality = conn.read_parquet(localities_boundaries)\n",
23
+ "\n",
24
+ "\n",
25
+ "votes = conn.read_csv(\"landvote.csv\")"
26
+ ]
27
+ },
28
+ {
29
+ "cell_type": "code",
30
+ "execution_count": null,
31
+ "id": "ba4d8915-cde3-4ef9-ad8c-7759ed2c8a13",
32
+ "metadata": {},
33
+ "outputs": [],
34
+ "source": [
35
+ "vote_county = (votes\n",
36
+ " .filter(_[\"Jurisdiction Type\"] == \"County\")\n",
37
+ " .rename(county = \"Jurisdiction Name\", state_id = \"State\")\n",
38
+ " .mutate(key = _.county + ibis.literal('-') + _.state_id)\n",
39
+ " .rename(amount = 'Conservation Funds at Stake', yes = '% Yes')\n",
40
+ " .mutate(amount_n=_.amount.replace('$', '').replace(',', '').cast('float'))\n",
41
+ " .mutate(log_amount=_.amount_n.log())\n",
42
+ " .mutate(year=_['Date'].year().cast('int32'))\n",
43
+ " .select('key', 'Status', 'yes', 'year', 'amount', 'log_amount', )\n",
44
+ " )\n",
45
+ "df_county = (county\n",
46
+ " .join(states.select(\"state\", \"state_id\"), \"state\")\n",
47
+ " .mutate(key = _.county + ibis.literal('-') + _.state_id)\n",
48
+ " .select('key', 'geometry')\n",
49
+ " .right_join(vote_county, \"key\")\n",
50
+ " .drop('key_right')\n",
51
+ " .mutate(jurisdiction = ibis.literal(\"County\"))\n",
52
+ " .cast({\"geometry\": \"geometry\"})\n",
53
+ " )\n"
54
+ ]
55
+ },
56
+ {
57
+ "cell_type": "code",
58
+ "execution_count": null,
59
+ "id": "0cce23c9-245c-4c28-9523-0231eb5acc17",
60
+ "metadata": {},
61
+ "outputs": [],
62
+ "source": [
63
+ "vote_local = (votes\n",
64
+ " .filter(_[\"Jurisdiction Type\"] == \"Municipal\")\n",
65
+ " .rename(city = \"Jurisdiction Name\", state_id = \"State\")\n",
66
+ " .mutate(key = _.city + ibis.literal('-') + _.state_id)\n",
67
+ " .rename(amount = 'Conservation Funds at Stake', yes = '% Yes')\n",
68
+ " .mutate(amount_n=_.amount.replace('$', '').replace(',', '').cast('float'))\n",
69
+ " .mutate(log_amount=_.amount_n.log())\n",
70
+ " .mutate(year=_['Date'].year().cast('int32'))\n",
71
+ " .select('key', 'Status', 'yes', 'year', 'amount', 'log_amount', )\n",
72
+ " )\n",
73
+ "\n",
74
+ "df_local = (locality\n",
75
+ " .mutate(key = _.name + ibis.literal('-') + _.state_id)\n",
76
+ " .select('key', 'geometry')\n",
77
+ " .right_join(vote_local, \"key\")\n",
78
+ " .drop('key_right')\n",
79
+ " .mutate(jurisdiction = ibis.literal(\"Municipal\"))\n",
80
+ " .cast({\"geometry\": \"geometry\"})\n",
81
+ " \n",
82
+ " )\n"
83
+ ]
84
+ },
85
+ {
86
+ "cell_type": "code",
87
+ "execution_count": null,
88
+ "id": "a1e81807-8ce3-44bf-9a1c-8563fa33817c",
89
+ "metadata": {},
90
+ "outputs": [],
91
+ "source": [
92
+ "df = df_county.union(df_local)\n",
93
+ "df.execute()"
94
  ]
95
  },
96
  {
 
106
  "import streamlit as st\n",
107
  "\n",
108
  "login(st.secrets[\"HF_TOKEN\"])\n",
109
+ "# api = HfApi(add_to_git_credential=False)\n",
110
+ "api = HfApi()\n",
111
  "\n",
112
  "def hf_upload(file, repo_id):\n",
113
  " info = api.upload_file(\n",
 
216
  "#m.add_gdf(gdf, \"fill-extrusion\", paint = paint)\n",
217
  "#m"
218
  ]
219
+ },
220
+ {
221
+ "cell_type": "code",
222
+ "execution_count": null,
223
+ "id": "5e521f00-1b04-4016-9a6a-71a12e846dd3",
224
+ "metadata": {},
225
+ "outputs": [],
226
+ "source": []
227
  }
228
  ],
229
  "metadata": {
 
242
  "name": "python",
243
  "nbconvert_exporter": "python",
244
  "pygments_lexer": "ipython3",
245
+ "version": "3.10.12"
246
  }
247
  },
248
  "nbformat": 4,