cassiebuhler commited on
Commit
d9b3bb0
·
1 Parent(s): 45d9637

added parties for counties/cities pre-2000

Browse files
Files changed (1) hide show
  1. get_party_2000.ipynb +495 -0
get_party_2000.ipynb ADDED
@@ -0,0 +1,495 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": null,
6
+ "id": "b8e12b31-956b-46a9-8263-2b0c27bad60a",
7
+ "metadata": {},
8
+ "outputs": [],
9
+ "source": [
10
+ "import ibis\n",
11
+ "from ibis import _\n",
12
+ "import streamlit as st\n",
13
+ "import ibis.expr.datatypes as dt \n",
14
+ "\n",
15
+ "conn = ibis.duckdb.connect(extensions=[\"spatial\"])"
16
+ ]
17
+ },
18
+ {
19
+ "cell_type": "markdown",
20
+ "id": "18dc8e53-b775-4931-b86e-18f3b3213dbb",
21
+ "metadata": {},
22
+ "source": [
23
+ "# Adding county/city parties for 1988 - 1996"
24
+ ]
25
+ },
26
+ {
27
+ "cell_type": "code",
28
+ "execution_count": null,
29
+ "id": "27e27c6d-6ad5-4e2c-a780-2bac6ecd7836",
30
+ "metadata": {},
31
+ "outputs": [],
32
+ "source": [
33
+ "# convert wide format to long format\n",
34
+ "def convert_data(candidates,year,county):\n",
35
+ " county= (county\n",
36
+ " .rename(county=\"Geographic Name\", GEOID = \"FIPS\")\n",
37
+ " .filter(_.GEOID != \"fips\")\n",
38
+ " .cast({\"GEOID\":\"string\"})\n",
39
+ " .mutate(\n",
40
+ " GEOID=ibis.case()\n",
41
+ " .when(_.GEOID.length() == 4, \"0\"+_.GEOID)\n",
42
+ " .else_(_.GEOID)\n",
43
+ " .end()\n",
44
+ " )\n",
45
+ " .drop(\"Geographic Subtype\")\n",
46
+ " .mutate(county= \n",
47
+ " ibis.case()\n",
48
+ " .when(_.county.endswith('County'), _.county)\n",
49
+ " .else_(_.county + ' County') .end())\n",
50
+ " )\n",
51
+ " \n",
52
+ " candidate_labels = ibis.memtable({'candidate': candidates}).cross_join(county)\n",
53
+ " \n",
54
+ " case_builder = (\n",
55
+ " ibis.case()\n",
56
+ " .when(candidate_labels.candidate == candidates[0], county[candidates[0]])\n",
57
+ " )\n",
58
+ " \n",
59
+ " for candidate in candidates[1:]:\n",
60
+ " case_builder = case_builder.when(candidate_labels.candidate == candidate, county[candidate])\n",
61
+ " \n",
62
+ " candidate_votes = case_builder.end()\n",
63
+ " \n",
64
+ " long_format = candidate_labels.mutate(\n",
65
+ " GEOID = _.GEOID,\n",
66
+ " year=year,\n",
67
+ " candidate_votes=candidate_votes.cast(\"int64\")\n",
68
+ " ).filter(candidate_votes.notnull() & (candidate_votes.cast(\"int64\") > 0))\n",
69
+ " \n",
70
+ " result = long_format.select(\n",
71
+ " \"year\", \n",
72
+ " \"GEOID\",\n",
73
+ " \"county\", \n",
74
+ " \"candidate\", \n",
75
+ " \"candidate_votes\"\n",
76
+ " )\n",
77
+ " return result"
78
+ ]
79
+ },
80
+ {
81
+ "cell_type": "code",
82
+ "execution_count": null,
83
+ "id": "6f59d864-1ff2-49ab-a4c3-f6b268989059",
84
+ "metadata": {},
85
+ "outputs": [],
86
+ "source": [
87
+ "#get winner of each county\n",
88
+ "def get_winner(df):\n",
89
+ " win = (df\n",
90
+ " .group_by([\"year\", \"GEOID\"])\n",
91
+ " .aggregate(candidate=_.candidate.argmax(_.candidate_votes)\n",
92
+ " , county = _.county.max()) # winning party \n",
93
+ " ) \n",
94
+ " return win"
95
+ ]
96
+ },
97
+ {
98
+ "cell_type": "markdown",
99
+ "id": "abc6b2d8-4ea9-4542-8e4b-612ee10a444e",
100
+ "metadata": {},
101
+ "source": [
102
+ "# 1988 Election Results"
103
+ ]
104
+ },
105
+ {
106
+ "cell_type": "code",
107
+ "execution_count": null,
108
+ "id": "7cc5793d-fba9-4986-864e-3fdd6d5bddd3",
109
+ "metadata": {},
110
+ "outputs": [],
111
+ "source": [
112
+ "county = conn.read_csv(\"1988_0_0_2.csv\")\n",
113
+ " \n",
114
+ "candidates = ['Michael Dukakis', 'George Bush', 'Ron Paul', 'Lenora Fulani', 'David Duke', 'Eugene McCarthy', \n",
115
+ " 'James Griffen', 'Lyndon LaRouche', 'William Marra', 'Write-In', 'Edward Winn', 'James Warren', \n",
116
+ " 'Herbert Lewin', 'Earl Dodge', 'Larry Holmes', 'None o.t. Candidates', 'Willa Kenoyer', \n",
117
+ " 'Delmar Dennis', 'Jack Herer', 'Louis Youngkeit', 'John Martin']\n",
118
+ "\n",
119
+ "year = ibis.literal(\"1988\")\n",
120
+ "\n",
121
+ "year1 = convert_data(candidates,year,county)\n",
122
+ "\n",
123
+ "winner = get_winner(year1)\n",
124
+ "\n",
125
+ "df_1988 = (winner\n",
126
+ " .mutate(\n",
127
+ " party=ibis.case()\n",
128
+ " .when(_.candidate == \"George Bush\", \"REPUBLICAN\") \n",
129
+ " .when(_.candidate == \"Michael Dukakis\", \"DEMOCRAT\") \n",
130
+ " .else_(None) \n",
131
+ " .end()\n",
132
+ " )\n",
133
+ " )\n",
134
+ "\n"
135
+ ]
136
+ },
137
+ {
138
+ "cell_type": "markdown",
139
+ "id": "e51abd1c-3cba-4beb-b092-82405f8a253a",
140
+ "metadata": {},
141
+ "source": [
142
+ "# 1992 Election Results"
143
+ ]
144
+ },
145
+ {
146
+ "cell_type": "code",
147
+ "execution_count": null,
148
+ "id": "9128ee1b-fcef-4cb3-b03b-14620918c4c0",
149
+ "metadata": {},
150
+ "outputs": [],
151
+ "source": [
152
+ "import ibis\n",
153
+ "from ibis import _\n",
154
+ "import ibis.expr.datatypes as dt \n",
155
+ "\n",
156
+ "county = (conn\n",
157
+ " .read_csv(\"1992_0_0_2.csv\")\n",
158
+ " )\n",
159
+ "\n",
160
+ "candidates = ['William Clinton', 'George Bush', 'H. Ross Perot', 'Andre Marrou', 'James Bo Gritz', 'Lenore Fulani', \n",
161
+ " 'Howard Phillips', 'Dr. John Hagelin', 'Ron Daniels', 'Lyndon LaRouche Jr.', 'James Warren', \n",
162
+ " 'Write-ins', 'Drew Bradford', 'Jack Herer', 'J. Quinn Brisben', 'Helen Halyard', \n",
163
+ " 'None o.t. Candidates', 'John Yiamouyiannis', 'Ehlers', 'Earl Dodge', 'Jim Boren', \n",
164
+ " 'Hem', 'Isabell Masters', 'Smith', 'Gloria LaRiva']\n",
165
+ "\n",
166
+ "year = ibis.literal(\"1992\")\n",
167
+ "\n",
168
+ "year2 = convert_data(candidates,year,county)\n",
169
+ "\n",
170
+ "winner = get_winner(year2)\n",
171
+ "\n",
172
+ "df_1992 = (winner\n",
173
+ " .mutate(\n",
174
+ " party=ibis.case()\n",
175
+ " .when(_.candidate == \"George Bush\", \"REPUBLICAN\") \n",
176
+ " .when(_.candidate == \"William Clinton\", \"DEMOCRAT\") \n",
177
+ " .when(_.candidate == \"H. Ross Perot\", \"REFORM\") \n",
178
+ " .else_(None) \n",
179
+ " .end()\n",
180
+ " )\n",
181
+ " )\n",
182
+ "\n"
183
+ ]
184
+ },
185
+ {
186
+ "cell_type": "markdown",
187
+ "id": "652dd5e5-0347-4adf-99ba-65cb61fabb8d",
188
+ "metadata": {},
189
+ "source": [
190
+ "# 1996 Election Results"
191
+ ]
192
+ },
193
+ {
194
+ "cell_type": "code",
195
+ "execution_count": null,
196
+ "id": "b23a92d7-9916-4412-be4f-e4d249ccabca",
197
+ "metadata": {},
198
+ "outputs": [],
199
+ "source": [
200
+ "\n",
201
+ "county = (conn\n",
202
+ " .read_csv(\"1996_0_0_2.csv\")\n",
203
+ " )\n",
204
+ "\n",
205
+ "candidates = [\n",
206
+ " 'William Clinton', 'Robert Dole', 'H. Ross Perot', 'Ralph Nader', 'Harry Browne', \n",
207
+ " 'Howard Phillips', 'Dr. John Hagelin', 'Monica Moorehead', 'Marsha Feinland', \n",
208
+ " 'Write-ins', 'Charles Collins', 'James Harris', 'None o.t. Candidates', \n",
209
+ " 'Dennis Peron', 'Mary Cal Hollis', 'Jerome White', 'Diane Templin', 'Earl Dodge', \n",
210
+ " 'A. Peter Crane', 'Just. Ralph Forbes', 'John Birrenback', 'Isabell Masters pHD', \n",
211
+ " 'Steve Michael'\n",
212
+ "]\n",
213
+ "year = ibis.literal(\"1996\")\n",
214
+ "\n",
215
+ "year3 = convert_data(candidates,year,county)\n",
216
+ "\n",
217
+ "winner = get_winner(year3)\n",
218
+ "\n",
219
+ "df_1996 = (winner\n",
220
+ " .mutate(\n",
221
+ " party=ibis.case()\n",
222
+ " .when(_.candidate == \"Robert Dole\", \"REPUBLICAN\") \n",
223
+ " .when(_.candidate == \"William Clinton\", \"DEMOCRAT\") \n",
224
+ " .else_(None) \n",
225
+ " .end()\n",
226
+ " )\n",
227
+ " )"
228
+ ]
229
+ },
230
+ {
231
+ "cell_type": "markdown",
232
+ "id": "761bdd22-c2b9-431b-a59f-84920a85b841",
233
+ "metadata": {},
234
+ "source": [
235
+ "# Merging with county polygons "
236
+ ]
237
+ },
238
+ {
239
+ "cell_type": "code",
240
+ "execution_count": null,
241
+ "id": "6b084fcb-a502-4058-b141-98d04af17048",
242
+ "metadata": {},
243
+ "outputs": [],
244
+ "source": [
245
+ "df_temp = df_1988.union(df_1992)\n",
246
+ "df_county = df_temp.union(df_1996)"
247
+ ]
248
+ },
249
+ {
250
+ "cell_type": "code",
251
+ "execution_count": null,
252
+ "id": "a900e1ff-86ec-480a-8305-54da9fc744bc",
253
+ "metadata": {},
254
+ "outputs": [],
255
+ "source": [
256
+ "state_boundaries = \"https://data.source.coop/cboettig/us-boundaries/us-state-territory.parquet\"\n",
257
+ "state = (conn\n",
258
+ " .read_parquet(state_boundaries)\n",
259
+ " .rename(state = \"STUSPS\", state_name = \"NAME\")\n",
260
+ " .select('state','geometry','state_name',\"GEOID\")\n",
261
+ " .cast({\"geometry\": \"geometry\",\"GEOID\":\"string\"})\n",
262
+ " )\n",
263
+ "\n",
264
+ "\n",
265
+ "# merging with county polygons\n",
266
+ "county_boundaries = \"https://data.source.coop/cboettig/us-boundaries/us-county.parquet\"\n",
267
+ "df_polygons = (conn\n",
268
+ " .read_parquet(county_boundaries)\n",
269
+ " .rename(county = \"NAMELSAD\", state_name = \"STATE_NAME\")\n",
270
+ " .join(state, \"state_name\", how = \"inner\")\n",
271
+ " .select('state','state_name','county','geometry','GEOID')\n",
272
+ " )\n",
273
+ "\n",
274
+ "\n",
275
+ "\n",
276
+ "\n",
277
+ "df_county_polygons = (df_polygons\n",
278
+ " .join(df_county, [\"GEOID\"],how = \"inner\")\n",
279
+ " .mutate(municipal = None)\n",
280
+ " .mutate(jurisdiction = ibis.literal(\"County\"))\n",
281
+ " .select(\"state\", \"county\", \"municipal\",\"jurisdiction\",\"geometry\", \"year\", \"party\")\n",
282
+ " .cast({\"geometry\": \"geometry\",\"municipal\":\"string\",\"year\":\"int64\"})\n",
283
+ " .mutate(county = _.county.upper())\n",
284
+ " )"
285
+ ]
286
+ },
287
+ {
288
+ "cell_type": "markdown",
289
+ "id": "fbeb95fd-eb09-48d3-8883-9966669f8079",
290
+ "metadata": {},
291
+ "source": [
292
+ "# Combine with Elections 2000-2020 (Only Counties)"
293
+ ]
294
+ },
295
+ {
296
+ "cell_type": "code",
297
+ "execution_count": null,
298
+ "id": "a26e28c6-4229-42ad-8548-6617623de30a",
299
+ "metadata": {},
300
+ "outputs": [],
301
+ "source": [
302
+ "df_2000 = (conn\n",
303
+ " .read_parquet(\"party_polygons.parquet\")\n",
304
+ " .cast({\"geometry\": \"geometry\"})\n",
305
+ " .filter(_.jurisdiction == \"County\")\n",
306
+ " .union(df_county_polygons)\n",
307
+ " )\n"
308
+ ]
309
+ },
310
+ {
311
+ "cell_type": "markdown",
312
+ "id": "5741c5d8-9f7a-45b3-9563-07315d23e77b",
313
+ "metadata": {},
314
+ "source": [
315
+ "# Make PMTiles - only with county level parties "
316
+ ]
317
+ },
318
+ {
319
+ "cell_type": "code",
320
+ "execution_count": null,
321
+ "id": "c7f7689a-e0e5-40b9-9fda-3e39f5c7b08f",
322
+ "metadata": {},
323
+ "outputs": [],
324
+ "source": [
325
+ "import subprocess\n",
326
+ "import os\n",
327
+ "from huggingface_hub import HfApi, login\n",
328
+ "import streamlit as st\n",
329
+ "\n",
330
+ "login(st.secrets[\"HF_TOKEN\"])\n",
331
+ "# api = HfApi(add_to_git_credential=False)\n",
332
+ "api = HfApi()\n",
333
+ "\n",
334
+ "def hf_upload(file, repo_id):\n",
335
+ " info = api.upload_file(\n",
336
+ " path_or_fileobj=file,\n",
337
+ " path_in_repo=file,\n",
338
+ " repo_id=repo_id,\n",
339
+ " repo_type=\"dataset\",\n",
340
+ " )\n",
341
+ "def generate_pmtiles(input_file, output_file, max_zoom=12):\n",
342
+ " # Ensure Tippecanoe is installed\n",
343
+ " if subprocess.call([\"which\", \"tippecanoe\"], stdout=subprocess.DEVNULL) != 0:\n",
344
+ " raise RuntimeError(\"Tippecanoe is not installed or not in PATH\")\n",
345
+ "\n",
346
+ " # Construct the Tippecanoe command\n",
347
+ " command = [\n",
348
+ " \"tippecanoe\",\n",
349
+ " \"-o\", output_file,\n",
350
+ " \"-zg\",\n",
351
+ " \"--extend-zooms-if-still-dropping\",\n",
352
+ " \"--force\",\n",
353
+ " \"--projection\", \"EPSG:4326\", \n",
354
+ " \"-L\",\"county:\"+input_file\n",
355
+ " ]\n",
356
+ " # Run Tippecanoe\n",
357
+ " try:\n",
358
+ " subprocess.run(command, check=True)\n",
359
+ " print(f\"Successfully generated PMTiles file: {output_file}\")\n",
360
+ " except subprocess.CalledProcessError as e:\n",
361
+ " print(f\"Error running Tippecanoe: {e}\")\n",
362
+ "\n"
363
+ ]
364
+ },
365
+ {
366
+ "cell_type": "code",
367
+ "execution_count": null,
368
+ "id": "6d54069c-49c5-48dc-9cca-c59ded295f16",
369
+ "metadata": {},
370
+ "outputs": [],
371
+ "source": [
372
+ "gdf = df_2000.execute().set_crs(\"EPSG:4326\")\n",
373
+ "gdf.to_file(\"party_polygons_all.geojson\")\n",
374
+ "\n",
375
+ "generate_pmtiles(\"party_polygons_all.geojson\", \"party_polygons_all.pmtiles\")\n",
376
+ "hf_upload(\"party_polygons_all.pmtiles\", \"boettiger-lab/landvote\")\n"
377
+ ]
378
+ },
379
+ {
380
+ "cell_type": "code",
381
+ "execution_count": null,
382
+ "id": "1706ec30-9c56-488d-b739-5babed79d8e8",
383
+ "metadata": {},
384
+ "outputs": [],
385
+ "source": [
386
+ "# save parquet \n",
387
+ "df_2000.execute().set_crs(\"EPSG:4326\").to_parquet(\"party_polygons_all.parquet\")\n",
388
+ "hf_upload(\"party_polygons_all.parquet\", \"boettiger-lab/landvote\")\n"
389
+ ]
390
+ },
391
+ {
392
+ "cell_type": "markdown",
393
+ "id": "51efd1ff-945b-47ec-8ca0-97a8ce43fd2d",
394
+ "metadata": {},
395
+ "source": [
396
+ "# Getting party affiliation for cities 1988-1996"
397
+ ]
398
+ },
399
+ {
400
+ "cell_type": "code",
401
+ "execution_count": null,
402
+ "id": "86b63b03-1f33-4945-85c5-1ecdc2b91907",
403
+ "metadata": {},
404
+ "outputs": [],
405
+ "source": [
406
+ "\n",
407
+ " \n",
408
+ "df_state_city = (df_polygons\n",
409
+ " .join(df_county, [\"GEOID\"],how = \"inner\")\n",
410
+ " .mutate(county = _.county.upper())\n",
411
+ " .select('state','county','year','party')\n",
412
+ ")\n",
413
+ "\n",
414
+ "\n",
415
+ "localities_boundaries = \"us_localities.parquet\"\n",
416
+ "locality = (conn\n",
417
+ " .read_parquet(localities_boundaries)\n",
418
+ " .mutate(county = _.county.upper())\n",
419
+ " .mutate(municipal = _.municipal.upper())\n",
420
+ " .mutate(county=ibis.case()\n",
421
+ " .when(_.county.endswith('COUNTY'), _.county)\n",
422
+ " .else_(_.county + ' COUNTY')\n",
423
+ " .end())\n",
424
+ " )\n",
425
+ "\n",
426
+ "df_city = (df_state_city\n",
427
+ " # .drop(\"state_name\")\n",
428
+ " .join(locality, [\"state\",\"county\"], how = \"inner\")\n",
429
+ " .mutate(jurisdiction = ibis.literal(\"Municipal\"))\n",
430
+ " .select(\"state\", \"county\", \"municipal\",\"jurisdiction\",\"geometry\", \"year\", \"party\")\n",
431
+ " .cast({\"geometry\": \"geometry\",\"municipal\":\"string\",\"year\":\"int64\"})\n",
432
+ " )\n",
433
+ "\n",
434
+ "\n",
435
+ "df_before2000 = df_city.union(df_county_polygons) # adding all the data before 2000\n"
436
+ ]
437
+ },
438
+ {
439
+ "cell_type": "markdown",
440
+ "id": "a1db2c44-7b84-42ff-85b9-46dee348650f",
441
+ "metadata": {},
442
+ "source": [
443
+ "# Combine with Elections 2000-2020 (Cities + Counties)"
444
+ ]
445
+ },
446
+ {
447
+ "cell_type": "code",
448
+ "execution_count": null,
449
+ "id": "d478037c-083f-488d-950f-692ff5fc1b83",
450
+ "metadata": {},
451
+ "outputs": [],
452
+ "source": [
453
+ "\n",
454
+ "df_2000_all = (conn\n",
455
+ " .read_parquet(\"party_polygons.parquet\")\n",
456
+ " .cast({\"geometry\": \"geometry\"})\n",
457
+ " .union(df_before2000)\n",
458
+ " )\n"
459
+ ]
460
+ },
461
+ {
462
+ "cell_type": "code",
463
+ "execution_count": null,
464
+ "id": "c471d9cf-8775-4d88-bc73-d963b8ecfb37",
465
+ "metadata": {},
466
+ "outputs": [],
467
+ "source": [
468
+ "# save parquet \n",
469
+ "df_2000_all.execute().set_crs(\"EPSG:4326\").to_parquet(\"party_polygons_all.parquet\")\n",
470
+ "hf_upload(\"party_polygons_all.parquet\", \"boettiger-lab/landvote\")\n"
471
+ ]
472
+ }
473
+ ],
474
+ "metadata": {
475
+ "kernelspec": {
476
+ "display_name": "Python 3 (ipykernel)",
477
+ "language": "python",
478
+ "name": "python3"
479
+ },
480
+ "language_info": {
481
+ "codemirror_mode": {
482
+ "name": "ipython",
483
+ "version": 3
484
+ },
485
+ "file_extension": ".py",
486
+ "mimetype": "text/x-python",
487
+ "name": "python",
488
+ "nbconvert_exporter": "python",
489
+ "pygments_lexer": "ipython3",
490
+ "version": "3.12.7"
491
+ }
492
+ },
493
+ "nbformat": 4,
494
+ "nbformat_minor": 5
495
+ }