Spaces:
Sleeping
Sleeping
cassiebuhler
commited on
Commit
•
c67e57e
1
Parent(s):
e338567
adding municipal data
Browse files- preprocess.ipynb +100 -0
- requirements.txt +2 -0
- static-maps.ipynb +84 -23
preprocess.ipynb
ADDED
@@ -0,0 +1,100 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"cells": [
|
3 |
+
{
|
4 |
+
"cell_type": "code",
|
5 |
+
"execution_count": null,
|
6 |
+
"id": "3e5756d2-382b-49e9-93b5-2ecf6d0eb812",
|
7 |
+
"metadata": {},
|
8 |
+
"outputs": [],
|
9 |
+
"source": [
|
10 |
+
"import duckdb\n",
|
11 |
+
"\n",
|
12 |
+
"con = duckdb.connect()\n",
|
13 |
+
"\n",
|
14 |
+
"con.execute(\"SET s3_region='us-west-2';\")\n",
|
15 |
+
"con.execute(\"LOAD spatial;\")\n",
|
16 |
+
"con.execute(\"LOAD httpfs;\")\n",
|
17 |
+
"\n",
|
18 |
+
"query = \"\"\"\n",
|
19 |
+
" COPY (\n",
|
20 |
+
" SELECT * \n",
|
21 |
+
" FROM read_parquet('s3://overturemaps-us-west-2/release/2024-09-18.0/theme=divisions/*/*')\n",
|
22 |
+
" WHERE country = 'US' AND subtype IN ('locality', 'neighborhood')\n",
|
23 |
+
" ) TO 'us_localities_neighborhoods.parquet' (FORMAT 'parquet');\n",
|
24 |
+
"\"\"\"\n",
|
25 |
+
"con.execute(query)\n",
|
26 |
+
"\n"
|
27 |
+
]
|
28 |
+
},
|
29 |
+
{
|
30 |
+
"cell_type": "code",
|
31 |
+
"execution_count": null,
|
32 |
+
"id": "25f62dd7-5539-438b-8f0a-1d85c9bc78ab",
|
33 |
+
"metadata": {},
|
34 |
+
"outputs": [],
|
35 |
+
"source": [
|
36 |
+
"import ibis\n",
|
37 |
+
"from ibis import _\n",
|
38 |
+
"\n",
|
39 |
+
"conn = ibis.duckdb.connect(extensions=[\"spatial\"])\n",
|
40 |
+
"\n",
|
41 |
+
"df = (conn\n",
|
42 |
+
" .read_parquet(\"us_localities_neighborhoods.parquet\")\n",
|
43 |
+
" .cast({\"geometry\": \"geometry\"})\n",
|
44 |
+
" .filter(_[\"type\"] == \"division\")\n",
|
45 |
+
" .filter(_[\"subtype\"] == \"locality\")\n",
|
46 |
+
" .mutate(name = _.names[\"primary\"])\n",
|
47 |
+
" .mutate(state_id = _.region.replace(\"US-\", \"\")) \n",
|
48 |
+
" .mutate(county = _.hierarchies[0][2]['name'] )\n",
|
49 |
+
" .mutate(key_long = _.name + ibis.literal('-') + _.county + ibis.literal('-') + _.state_id)\n",
|
50 |
+
" .select(\"key_long\",\"name\", \"county\",\"state_id\" ,\"geometry\")\n",
|
51 |
+
" )\n",
|
52 |
+
"\n",
|
53 |
+
"\n",
|
54 |
+
"## Dropping rows with same locality and state, with differing counties \n",
|
55 |
+
"county_count = (\n",
|
56 |
+
" df.group_by([\"name\", \"state_id\"])\n",
|
57 |
+
" .aggregate(county_count=_.county.nunique()) # Count unique counties for each group\n",
|
58 |
+
") \n",
|
59 |
+
"valid_names = county_count.filter(county_count.county_count == 1).select(\"name\", \"state_id\")\n",
|
60 |
+
"df_filtered = df.join(valid_names, [\"name\", \"state_id\"], how=\"inner\")\n",
|
61 |
+
"\n",
|
62 |
+
"\n",
|
63 |
+
"# if two records have the same name but different geometries, only keep the first one.\n",
|
64 |
+
"df_first = (\n",
|
65 |
+
" df_filtered.group_by(\"key_long\")\n",
|
66 |
+
" .aggregate(\n",
|
67 |
+
" name=df_filtered.name.first(),\n",
|
68 |
+
" county=df_filtered.county.first(),\n",
|
69 |
+
" state_id=df_filtered.state_id.first(),\n",
|
70 |
+
" geometry=df_filtered.geometry.first()\n",
|
71 |
+
" )\n",
|
72 |
+
"\n",
|
73 |
+
")\n",
|
74 |
+
"\n",
|
75 |
+
"df_first.execute().to_parquet(\"us_localities.parquet\")\n"
|
76 |
+
]
|
77 |
+
}
|
78 |
+
],
|
79 |
+
"metadata": {
|
80 |
+
"kernelspec": {
|
81 |
+
"display_name": "Python 3 (ipykernel)",
|
82 |
+
"language": "python",
|
83 |
+
"name": "python3"
|
84 |
+
},
|
85 |
+
"language_info": {
|
86 |
+
"codemirror_mode": {
|
87 |
+
"name": "ipython",
|
88 |
+
"version": 3
|
89 |
+
},
|
90 |
+
"file_extension": ".py",
|
91 |
+
"mimetype": "text/x-python",
|
92 |
+
"name": "python",
|
93 |
+
"nbconvert_exporter": "python",
|
94 |
+
"pygments_lexer": "ipython3",
|
95 |
+
"version": "3.10.12"
|
96 |
+
}
|
97 |
+
},
|
98 |
+
"nbformat": 4,
|
99 |
+
"nbformat_minor": 5
|
100 |
+
}
|
requirements.txt
CHANGED
@@ -157,3 +157,5 @@ whiteboxgui==2.3.0
|
|
157 |
widgetsnbextension==4.0.13
|
158 |
xyzservices==2024.9.0
|
159 |
yarl==1.9.7
|
|
|
|
|
|
157 |
widgetsnbextension==4.0.13
|
158 |
xyzservices==2024.9.0
|
159 |
yarl==1.9.7
|
160 |
+
overturemaps
|
161 |
+
tippecanoe
|
static-maps.ipynb
CHANGED
@@ -18,27 +18,79 @@
|
|
18 |
"states = conn.read_parquet(state_boundaries).rename(state_id = \"STUSPS\", state = \"NAME\")\n",
|
19 |
"county = conn.read_parquet(county_boundaries).rename(county = \"NAMELSAD\", state = \"STATE_NAME\")\n",
|
20 |
"\n",
|
21 |
-
"
|
22 |
-
"
|
23 |
-
"\n",
|
24 |
-
"
|
25 |
-
"
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
"
|
36 |
-
"
|
37 |
-
"
|
38 |
-
"
|
39 |
-
"
|
40 |
-
"
|
41 |
-
"\n"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
42 |
]
|
43 |
},
|
44 |
{
|
@@ -54,7 +106,8 @@
|
|
54 |
"import streamlit as st\n",
|
55 |
"\n",
|
56 |
"login(st.secrets[\"HF_TOKEN\"])\n",
|
57 |
-
"api = HfApi(add_to_git_credential=False)\n",
|
|
|
58 |
"\n",
|
59 |
"def hf_upload(file, repo_id):\n",
|
60 |
" info = api.upload_file(\n",
|
@@ -163,6 +216,14 @@
|
|
163 |
"#m.add_gdf(gdf, \"fill-extrusion\", paint = paint)\n",
|
164 |
"#m"
|
165 |
]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
166 |
}
|
167 |
],
|
168 |
"metadata": {
|
@@ -181,7 +242,7 @@
|
|
181 |
"name": "python",
|
182 |
"nbconvert_exporter": "python",
|
183 |
"pygments_lexer": "ipython3",
|
184 |
-
"version": "3.
|
185 |
}
|
186 |
},
|
187 |
"nbformat": 4,
|
|
|
18 |
"states = conn.read_parquet(state_boundaries).rename(state_id = \"STUSPS\", state = \"NAME\")\n",
|
19 |
"county = conn.read_parquet(county_boundaries).rename(county = \"NAMELSAD\", state = \"STATE_NAME\")\n",
|
20 |
"\n",
|
21 |
+
"localities_boundaries = \"us_localities.parquet\"\n",
|
22 |
+
"locality = conn.read_parquet(localities_boundaries)\n",
|
23 |
+
"\n",
|
24 |
+
"\n",
|
25 |
+
"votes = conn.read_csv(\"landvote.csv\")"
|
26 |
+
]
|
27 |
+
},
|
28 |
+
{
|
29 |
+
"cell_type": "code",
|
30 |
+
"execution_count": null,
|
31 |
+
"id": "ba4d8915-cde3-4ef9-ad8c-7759ed2c8a13",
|
32 |
+
"metadata": {},
|
33 |
+
"outputs": [],
|
34 |
+
"source": [
|
35 |
+
"vote_county = (votes\n",
|
36 |
+
" .filter(_[\"Jurisdiction Type\"] == \"County\")\n",
|
37 |
+
" .rename(county = \"Jurisdiction Name\", state_id = \"State\")\n",
|
38 |
+
" .mutate(key = _.county + ibis.literal('-') + _.state_id)\n",
|
39 |
+
" .rename(amount = 'Conservation Funds at Stake', yes = '% Yes')\n",
|
40 |
+
" .mutate(amount_n=_.amount.replace('$', '').replace(',', '').cast('float'))\n",
|
41 |
+
" .mutate(log_amount=_.amount_n.log())\n",
|
42 |
+
" .mutate(year=_['Date'].year().cast('int32'))\n",
|
43 |
+
" .select('key', 'Status', 'yes', 'year', 'amount', 'log_amount', )\n",
|
44 |
+
" )\n",
|
45 |
+
"df_county = (county\n",
|
46 |
+
" .join(states.select(\"state\", \"state_id\"), \"state\")\n",
|
47 |
+
" .mutate(key = _.county + ibis.literal('-') + _.state_id)\n",
|
48 |
+
" .select('key', 'geometry')\n",
|
49 |
+
" .right_join(vote_county, \"key\")\n",
|
50 |
+
" .drop('key_right')\n",
|
51 |
+
" .mutate(jurisdiction = ibis.literal(\"County\"))\n",
|
52 |
+
" .cast({\"geometry\": \"geometry\"})\n",
|
53 |
+
" )\n"
|
54 |
+
]
|
55 |
+
},
|
56 |
+
{
|
57 |
+
"cell_type": "code",
|
58 |
+
"execution_count": null,
|
59 |
+
"id": "0cce23c9-245c-4c28-9523-0231eb5acc17",
|
60 |
+
"metadata": {},
|
61 |
+
"outputs": [],
|
62 |
+
"source": [
|
63 |
+
"vote_local = (votes\n",
|
64 |
+
" .filter(_[\"Jurisdiction Type\"] == \"Municipal\")\n",
|
65 |
+
" .rename(city = \"Jurisdiction Name\", state_id = \"State\")\n",
|
66 |
+
" .mutate(key = _.city + ibis.literal('-') + _.state_id)\n",
|
67 |
+
" .rename(amount = 'Conservation Funds at Stake', yes = '% Yes')\n",
|
68 |
+
" .mutate(amount_n=_.amount.replace('$', '').replace(',', '').cast('float'))\n",
|
69 |
+
" .mutate(log_amount=_.amount_n.log())\n",
|
70 |
+
" .mutate(year=_['Date'].year().cast('int32'))\n",
|
71 |
+
" .select('key', 'Status', 'yes', 'year', 'amount', 'log_amount', )\n",
|
72 |
+
" )\n",
|
73 |
+
"\n",
|
74 |
+
"df_local = (locality\n",
|
75 |
+
" .mutate(key = _.name + ibis.literal('-') + _.state_id)\n",
|
76 |
+
" .select('key', 'geometry')\n",
|
77 |
+
" .right_join(vote_local, \"key\")\n",
|
78 |
+
" .drop('key_right')\n",
|
79 |
+
" .mutate(jurisdiction = ibis.literal(\"Municipal\"))\n",
|
80 |
+
" .cast({\"geometry\": \"geometry\"})\n",
|
81 |
+
" \n",
|
82 |
+
" )\n"
|
83 |
+
]
|
84 |
+
},
|
85 |
+
{
|
86 |
+
"cell_type": "code",
|
87 |
+
"execution_count": null,
|
88 |
+
"id": "a1e81807-8ce3-44bf-9a1c-8563fa33817c",
|
89 |
+
"metadata": {},
|
90 |
+
"outputs": [],
|
91 |
+
"source": [
|
92 |
+
"df = df_county.union(df_local)\n",
|
93 |
+
"df.execute()"
|
94 |
]
|
95 |
},
|
96 |
{
|
|
|
106 |
"import streamlit as st\n",
|
107 |
"\n",
|
108 |
"login(st.secrets[\"HF_TOKEN\"])\n",
|
109 |
+
"# api = HfApi(add_to_git_credential=False)\n",
|
110 |
+
"api = HfApi()\n",
|
111 |
"\n",
|
112 |
"def hf_upload(file, repo_id):\n",
|
113 |
" info = api.upload_file(\n",
|
|
|
216 |
"#m.add_gdf(gdf, \"fill-extrusion\", paint = paint)\n",
|
217 |
"#m"
|
218 |
]
|
219 |
+
},
|
220 |
+
{
|
221 |
+
"cell_type": "code",
|
222 |
+
"execution_count": null,
|
223 |
+
"id": "5e521f00-1b04-4016-9a6a-71a12e846dd3",
|
224 |
+
"metadata": {},
|
225 |
+
"outputs": [],
|
226 |
+
"source": []
|
227 |
}
|
228 |
],
|
229 |
"metadata": {
|
|
|
242 |
"name": "python",
|
243 |
"nbconvert_exporter": "python",
|
244 |
"pygments_lexer": "ipython3",
|
245 |
+
"version": "3.10.12"
|
246 |
}
|
247 |
},
|
248 |
"nbformat": 4,
|